Coverage for src/thunderlab/convertdata.py: 89%

1"""Command line script for converting, downsampling, renaming and merging data files.

3```sh

4convertdata -o test.wav test.raw

5```

6converts 'test.raw' to 'test.wav'.

8The script reads all input files with `dataloader.load_data()`,

9combines the audio and marker data, and writes them along with the

10metadata to an output file using `datawriter.write_data()`. Thus, all

11formats supported by these functions and the installed python audio

12modules are supported.

14Run

15```sh

16convertdata -l

17```

18for a list of supported output file formats and

19```sh

20convertdata -f wav -l

21```

22for a list of supported encodings for a given output format.

24Running

25```sh

26convertdata --help

27```

28prints

29```text

30usage: convertdata [-h] [--version] [-v] [-l] [-f FORMAT] [-e ENCODING] [-s SCALE] [-u [THRESH]] [-U [THRESH]]

31 [-d FAC] [-c CHANNELS] [-a KEY=VALUE] [-r KEY] [-n NUM] [-o OUTPATH]

32 [file ...]

34Convert, downsample, rename, and merge data files.

36positional arguments:

37 file one or more input files to be combined into a single output file

39options:

40 -h, --help show this help message and exit

41 --version show program's version number and exit

42 -v print debug output

43 -l list supported file formats and encodings

44 -f FORMAT audio format of output file

45 -e ENCODING audio encoding of output file

46 -s SCALE scale the data by factor SCALE

47 -u [THRESH] unwrap clipped data with threshold (default is 0.5) and divide by two

48 -U [THRESH] unwrap clipped data with threshold (default is 0.5) and clip

49 -d FAC downsample by integer factor

50 -c CHANNELS comma and dash separated list of channels to be saved (first channel is 0)

51 -a KEY=VALUE add key-value pairs to metadata. Keys can have section names separated by "."

52 -r KEY remove keys from metadata. Keys can have section names separated by "."

53 -n NUM merge NUM input files into one output file

54 -o OUTPATH path or filename of output file. Metadata keys enclosed in curly braces will be replaced by their

55 values from the input file

57version 1.12.0 by Benda-Lab (2020-2024)

58```

60"""

62import os

63import sys

64import argparse

65import numpy as np

66from .version import __version__, __year__

67from audioio import add_metadata, remove_metadata, cleanup_metadata

68from audioio import bext_history_str, add_history

69from audioio.audioconverter import add_arguments, parse_channels

70from audioio.audioconverter import make_outfile, format_outfile

71from audioio.audioconverter import modify_data

72from .dataloader import load_data, DataLoader, markers

73from .datawriter import available_formats, available_encodings

74from .datawriter import format_from_extension, write_data

77def check_format(format):

78 """

79 Check whether requested audio format is valid and supported.

81 If the format is not available print an error message on console.

83 Parameters

84 ----------

85 format: string

86 Audio format to be checked.

88 Returns

89 -------

90 valid: bool

91 True if the requested audio format is valid.

92 """

93 if not format or format.upper() not in available_formats():

94 print(f'! invalid data file format "{format}"!')

95 print('run')

96 print(f'> {__file__} -l')

97 print('for a list of available formats')

98 return False

99 else:

100 return True

101

102

103def list_formats_encodings(data_format):

104 """ List available formats or encodings.

105

106 Parameters

107 ----------

108 data_format: None or str

109 If provided, list encodings for this data format.

110 """

111 if not data_format:

112 print('available file formats:')

113 for f in available_formats():

114 print(f' {f}')

115 else:

116 if not check_format(data_format):

117 sys.exit(-1)

118 print(f'available encodings for {data_format} file format:')

119 for e in available_encodings(data_format):

120 print(f' {e}')

121

122

123def main(*cargs):

124 """Command line script for converting, downsampling, renaming and

125 merging data files.

126

127 Parameters

128 ----------

129 cargs: list of strings

130 Command line arguments as returned by sys.argv[1:].

131

132 """

133 # command line arguments:

134 parser = argparse.ArgumentParser(add_help=True,

135 description='Convert, downsample, rename, and merge data files.',

136 epilog=f'version {__version__} by Benda-Lab (2020-{__year__})')

137 add_arguments(parser)

138 if len(cargs) == 0:

139 cargs = None

140 args = parser.parse_args(cargs)

141

142 channels = parse_channels(args.channels)

143

144 if args.list_formats:

145 if args.data_format is None and len(args.file) > 0:

146 args.data_format = args.file[0]

147 list_formats_encodings(args.data_format)

148 return

149

150 if len(args.file) == 0:

151 print('! need to specify at least one input file !')

152 sys.exit(-1)

153

154 nmerge = args.nmerge

155 if nmerge == 0:

156 nmerge = len(args.file)

157

158 for i0 in range(0, len(args.file), nmerge):

159 infile = args.file[i0]

160 outfile, data_format = make_outfile(args.outpath, infile,

161 args.data_format,

162 nmerge < len(args.file),

163 format_from_extension)

164 if not check_format(data_format):

165 sys.exit(-1)

166 if os.path.realpath(infile) == os.path.realpath(outfile):

167 print(f'! cannot convert "{infile}" to itself !')

168 sys.exit(-1)

169 # read in data:

170 pre_history = None

171 try:

172 with DataLoader(infile) as sf:

173 data = sf[:,:]

174 rate = sf.rate

175 unit = sf.unit

176 amax = sf.ampl_max

177 md = sf.metadata()

178 locs, labels = sf.markers()

179 pre_history = bext_history_str(sf.encoding,

180 sf.rate,

181 sf.channels,

182 sf.filepath)

183 if sf.encoding is not None and args.encoding is None:

184 args.encoding = sf.encoding

185 except FileNotFoundError:

186 print(f'file "{infile}" not found!')

187 sys.exit(-1)

188 if args.verbose > 1:

189 print(f'loaded data file "{infile}"')

190 for infile in args.file[i0+1:i0+nmerge]:

191 try:

192 xdata, xrate, xunit, xamax = load_data(infile)

193 except FileNotFoundError:

194 print(f'file "{infile}" not found!')

195 sys.exit(-1)

196 if abs(rate - xrate) > 1:

197 print('! cannot merge files with different sampling rates !')

198 print(f' file "{args.file[i0]}" has {rate:.0f}Hz')

199 print(f' file "{infile}" has {xrate:.0f}Hz')

200 sys.exit(-1)

201 if xdata.shape[1] != data.shape[1]:

202 print('! cannot merge files with different numbers of channels !')

203 print(f' file "{args.file[i0]}" has {data.shape[1]} channels')

204 print(f' file "{infile}" has {xdata.shape[1]} channels')

205 sys.exit(-1)

206 if xamax > amax:

207 amax = xamax

208 data = np.vstack((data, xdata))

209 xlocs, xlabels = markers(infile)

210 locs = np.vstack((locs, xlocs))

211 labels = np.vstack((labels, xlabels))

212 if args.verbose > 1:

213 print(f'loaded data file "{infile}"')

214 data, rate = modify_data(data, rate, md, channels, args.scale,

215 args.unwrap_clip, args.unwrap, amax, unit,

216 args.decimate)

217 add_metadata(md, args.md_list, '.')

218 if len(args.remove_keys) > 0:

219 remove_metadata(md, args.remove_keys, '.')

220 cleanup_metadata(md)

221 outfile = format_outfile(outfile, md)

222 # history:

223 hkey = 'CodingHistory'

224 if 'BEXT' in md:

225 hkey = 'BEXT.' + hkey

226 history = bext_history_str(args.encoding, rate,

227 data.shape[1], outfile)

228 add_history(md, history, hkey, pre_history)

229 # write out data:

230 try:

231 write_data(outfile, data, rate, amax, unit,

232 md, locs, labels,

233 format=data_format, encoding=args.encoding)

234 except PermissionError:

235 print(f'failed to write "{outfile}": permission denied!')

236 sys.exit(-1)

237 # message:

238 if args.verbose > 1:

239 print(f'wrote "{outfile}"')

240 elif args.verbose:

241 print(f'converted data file "{infile}" to "{outfile}"')

242

243

244if __name__ == '__main__':

245 main(*sys.argv[1:])