Coverage for src/audioio/audioconverter.py: 94%

186 statements  

« prev     ^ index     » next       coverage.py v7.6.3, created at 2024-10-15 07:29 +0000

1"""Command line script for converting, downsampling, renaming and merging audio files. 

2 

3```sh 

4audioconverter -o test.wav test.mp3 

5``` 

6converts 'test.mp3' to 'test.wav'. 

7 

8The script reads all input files with `audioio.audioloader.load_audio()`, 

9combines the audio and marker data and writes them along with the 

10metadata to an output file using `audioio.audiowriter.write_audio()`. 

11 

12Thus, all formats supported by these functions and the installed 

13python audio modules are available. This implies that MP3 files can be 

14read via the [audioread](https://github.com/beetbox/audioread) module, 

15and they can be written via [pydub](https://github.com/jiaaro/pydub). 

16Many other input and output file formats are supported by the [sndfile 

17library](http://www.mega-nerd.com/libsndfile/), provided the 

18[SoundFile](http://pysoundfile.readthedocs.org) or 

19[wavefile](https://github.com/vokimon/python-wavefile) python packages 

20are [installed](https://bendalab.github.io/audioio/installation). 

21 

22Metadata and markers are preserved if possible. 

23 

24Run 

25```sh 

26audioconverter -l 

27``` 

28for a list of supported output file formats and 

29```sh 

30audioconverter -f wav -l 

31``` 

32for a list of supported encodings for a given output format (`-f`option). 

33 

34Running 

35```sh 

36audioconverter --help 

37``` 

38prints 

39```text 

40usage: audioconverter [-h] [--version] [-v] [-l] [-f FORMAT] [-e ENCODING] [-s SCALE] [-u [THRESH]] [-U [THRESH]] 

41 [-d FAC] [-c CHANNELS] [-a KEY=VALUE] [-r KEY] [-n NUM] [-o OUTPATH] 

42 [file ...] 

43 

44Convert audio file formats. 

45 

46positional arguments: 

47 file one or more input files to be combined into a single output file 

48 

49options: 

50 -h, --help show this help message and exit 

51 --version show program's version number and exit 

52 -v print debug output 

53 -l list supported file formats and encodings 

54 -f FORMAT audio format of output file 

55 -e ENCODING audio encoding of output file 

56 -s SCALE scale the data by factor SCALE 

57 -u [THRESH] unwrap clipped data with threshold relative to maximum of input range (default is 0.5) and divide by 

58 two 

59 -U [THRESH] unwrap clipped data with threshold relative to maximum of input range (default is 0.5) and clip 

60 -d FAC downsample by integer factor 

61 -c CHANNELS comma and dash separated list of channels to be saved (first channel is 0) 

62 -a KEY=VALUE add key-value pairs to metadata. Keys can have section names separated by "." 

63 -r KEY remove keys from metadata. Keys can have section names separated by "." 

64 -n NUM merge NUM input files into one output file 

65 -o OUTPATH path or filename of output file. Metadata keys enclosed in curly braces will be replaced by their 

66 values from the input file 

67 

68version 2.0.0 by Benda-Lab (2020-2024) 

69``` 

70 

71""" 

72 

73import os 

74import sys 

75import argparse 

76import numpy as np 

77from scipy.signal import decimate 

78from .version import __version__, __year__ 

79from .audioloader import load_audio, markers, AudioLoader 

80from .audiometadata import flatten_metadata, unflatten_metadata 

81from .audiometadata import add_metadata, remove_metadata, cleanup_metadata 

82from .audiometadata import bext_history_str, add_history 

83from .audiometadata import update_gain, add_unwrap 

84from .audiotools import unwrap 

85from .audiowriter import available_formats, available_encodings 

86from .audiowriter import format_from_extension, write_audio 

87 

88 

89def add_arguments(parser): 

90 """ Add command line arguments to parser. 

91 

92 Parameters 

93 ---------- 

94 parser: argparse.ArgumentParser 

95 The parser. 

96 """ 

97 parser.add_argument('--version', action='version', version=__version__) 

98 parser.add_argument('-v', action='count', dest='verbose', default=0, 

99 help='print debug output') 

100 parser.add_argument('-l', dest='list_formats', action='store_true', 

101 help='list supported file formats and encodings') 

102 parser.add_argument('-f', dest='data_format', default=None, type=str, 

103 metavar='FORMAT', help='audio format of output file') 

104 parser.add_argument('-e', dest='encoding', default=None, type=str, 

105 help='audio encoding of output file') 

106 parser.add_argument('-s', dest='scale', default=1, type=float, 

107 help='scale the data by factor SCALE') 

108 parser.add_argument('-u', dest='unwrap', default=0, type=float, 

109 metavar='THRESH', const=1.5, nargs='?', 

110 help='unwrap clipped data with threshold relative to maximum of input range (default is 0.5) and divide by two') 

111 parser.add_argument('-U', dest='unwrap_clip', default=0, type=float, 

112 metavar='THRESH', const=1.5, nargs='?', 

113 help='unwrap clipped data with threshold relative to maximum of input range (default is 0.5) and clip') 

114 parser.add_argument('-d', dest='decimate', default=1, type=int, 

115 metavar='FAC', 

116 help='downsample by integer factor') 

117 parser.add_argument('-c', dest='channels', default='', type=str, 

118 help='comma and dash separated list of channels to be saved (first channel is 0)') 

119 parser.add_argument('-a', dest='md_list', action='append', default=[], 

120 type=str, metavar='KEY=VALUE', 

121 help='add key-value pairs to metadata. Keys can have section names separated by "."') 

122 parser.add_argument('-r', dest='remove_keys', action='append', default=[], 

123 type=str, metavar='KEY', 

124 help='remove keys from metadata. Keys can have section names separated by "."') 

125 parser.add_argument('-n', dest='nmerge', default=0, type=int, metavar='NUM', 

126 help='merge NUM input files into one output file') 

127 parser.add_argument('-o', dest='outpath', default=None, type=str, 

128 help='path or filename of output file. Metadata keys enclosed in curly braces will be replaced by their values from the input file') 

129 parser.add_argument('file', nargs='*', type=str, 

130 help='one or more input files to be combined into a single output file') 

131 

132 

133def parse_channels(cstr): 

134 """ Parse channel selection string. 

135 

136 Parameters 

137 ---------- 

138 cstr: str 

139 String with comma separated channels and dash separated channel ranges. 

140 

141 Returns 

142 ------- 

143 channels: list of int 

144 List of selected channels. 

145 """ 

146 cs = [s.strip() for s in cstr.split(',')] 

147 channels = [] 

148 for c in cs: 

149 if len(c) == 0: 

150 continue 

151 css = [s.strip() for s in c.split('-')] 

152 if len(css) == 2: 

153 channels.extend(list(range(int(css[0]), int(css[1])+1))) 

154 else: 

155 channels.append(int(c)) 

156 return channels 

157 

158 

159def check_format(format): 

160 """ 

161 Check whether requested audio format is valid and supported. 

162 

163 If the format is not available print an error message on console. 

164 

165 Parameters 

166 ---------- 

167 format: string 

168 Audio format to be checked. 

169 

170 Returns 

171 ------- 

172 valid: bool 

173 True if the requested audio format is valid. 

174 """ 

175 if not format or format.upper() not in available_formats(): 

176 print(f'! invalid audio file format "{format}"!') 

177 print('run') 

178 print(f'> {__file__} -l') 

179 print('for a list of available formats') 

180 return False 

181 else: 

182 return True 

183 

184 

185def list_formats_encodings(data_format): 

186 """ List available formats or encodings. 

187 

188 Parameters 

189 ---------- 

190 data_format: None or str 

191 If provided, list encodings for this data format. 

192 Otherwise, list available audio file formats. 

193 """ 

194 if not data_format: 

195 print('available file formats:') 

196 for f in available_formats(): 

197 print(f' {f}') 

198 else: 

199 if not check_format(data_format): 

200 sys.exit(-1) 

201 print(f'available encodings for {data_format} file format:') 

202 for e in available_encodings(data_format): 

203 print(f' {e}') 

204 

205 

206def make_outfile(outpath, infile, data_format, blocks, format_from_ext): 

207 """ Make name for output file. 

208 

209 Parameters 

210 ---------- 

211 outpath: None or str 

212 Requested output path. 

213 infile: str 

214 Name of the input file. 

215 data_format: None or str 

216 Requested output file format. 

217 blocks: bool 

218 If True, produce outputfile for group of input files. 

219 format_from_ext: function 

220 Function that inspects a filename for its extension and 

221 deduces a file format from it. 

222 

223 Returns 

224 ------- 

225 outfile: str 

226 Name of output file. 

227 data_format: str 

228 Format of output file. 

229 """ 

230 if blocks and outpath and \ 

231 format_from_ext(outpath) is None and \ 

232 not os.path.exists(outpath): 

233 os.mkdir(outpath) 

234 if not outpath or os.path.isdir(outpath): 

235 outfile = infile 

236 if outpath: 

237 outfile = os.path.join(outpath, outfile) 

238 if not data_format: 

239 print('! need to specify an audio format via -f or a file extension !') 

240 sys.exit(-1) 

241 outfile = os.path.splitext(outfile)[0] + os.extsep + data_format.lower() 

242 else: 

243 outfile = outpath 

244 if data_format: 

245 outfile = os.path.splitext(outfile)[0] + os.extsep + data_format.lower() 

246 else: 

247 data_format = format_from_ext(outfile) 

248 return outfile, data_format 

249 

250 

251def modify_data(data, rate, metadata, channels, scale, 

252 unwrap_clip, unwrap_thresh, ampl_max, unit, decimate_fac): 

253 """ Modify audio data and add modifications to metadata. 

254 

255 Parameters 

256 ---------- 

257 data: 2-D array of float 

258 The data to be written into the output file. 

259 rate: float 

260 Sampling rate of the data in Hertz. 

261 metadata: nested dict 

262 Metadata. 

263 channels: list of int 

264 List of channels to be selected from the data. 

265 scale: float 

266 Scaling factor to be applied to the data. 

267 unwrap_clip: float 

268 If larger than zero, unwrap the data using this as a threshold 

269 relative to `ampl_max`, and clip the data at +-`ampl_max`. 

270 unwrap_thresh: float 

271 If larger than zero, unwrap the data using this as a threshold 

272 relative to `ampl_max`, and downscale the data by a factor of two. 

273 Also update the gain in the metadata. 

274 ampl_max: float 

275 Maximum amplitude of the input range. 

276 unit: str 

277 Unit of the input range. 

278 decimate_fac: int 

279 Downsample the data by this factor. 

280 

281 Returns 

282 ------- 

283 """ 

284 # select channels: 

285 if len(channels) > 0: 

286 data = data[:,channels] 

287 # scale data: 

288 if scale != 1: 

289 data *= scale 

290 update_gain(metadata, 1/scale) 

291 # fix data: 

292 if unwrap_clip > 1e-3: 

293 unwrap(data, unwrap_clip, ampl_max) 

294 data[data > +ampl_max] = +ampl_max 

295 data[data < -ampl_max] = -ampl_max 

296 add_unwrap(metadata, unwrap_clip*ampl_max, ampl_max, unit) 

297 elif unwrap_thresh > 1e-3: 

298 unwrap(data, unwrap_thresh, ampl_max) 

299 data *= 0.5 

300 update_gain(metadata, 0.5) 

301 add_unwrap(metadata, unwrap_thresh*ampl_max, 0.0, unit) 

302 # decimate: 

303 if decimate_fac > 1: 

304 data = decimate(data, decimate_fac, axis=0) 

305 rate /= decimate_fac 

306 return data, rate 

307 

308 

309def format_outfile(outfile, metadata): 

310 """ Put metadata values into name of output file. 

311 

312 Parameters 

313 ---------- 

314 outfile: str 

315 Name of output file. May contain metadata keys enclosed in curly braces. 

316 metadata: nested dict 

317 Metadata. 

318 

319 Returns 

320 ------- 

321 outfile: str 

322 Name of output file. 

323 """ 

324 if len(metadata) > 0 and '{' in outfile and '}' in outfile: 

325 fmd = flatten_metadata(metadata) 

326 fmd = {k:(fmd[k] if isinstance(fmd[k], (int, float)) else fmd[k].replace(' ', '_')) for k in fmd} 

327 outfile = outfile.format(**fmd) 

328 return outfile 

329 

330 

331def main(*cargs): 

332 """ 

333 Command line script for converting, downsampling, renaming and merging audio files. 

334 

335 Parameters 

336 ---------- 

337 cargs: list of strings 

338 Command line arguments as returned by sys.argv[1:]. 

339 """ 

340 # command line arguments: 

341 parser = argparse.ArgumentParser(add_help=True, 

342 description='Convert audio file formats.', 

343 epilog=f'version {__version__} by Benda-Lab (2020-{__year__})') 

344 add_arguments(parser) 

345 if len(cargs) == 0: 

346 cargs = None 

347 args = parser.parse_args(cargs) 

348 

349 channels = parse_channels(args.channels) 

350 

351 if args.list_formats: 

352 if args.data_format is None and len(args.file) > 0: 

353 args.data_format = args.file[0] 

354 list_formats_encodings(args.data_format) 

355 return 

356 

357 if len(args.file) == 0 or len(args.file[0]) == 0: 

358 print('! need to specify at least one input file !') 

359 sys.exit(-1) 

360 

361 nmerge = args.nmerge 

362 if nmerge == 0: 

363 nmerge = len(args.file) 

364 

365 for i0 in range(0, len(args.file), nmerge): 

366 infile = args.file[i0] 

367 outfile, data_format = make_outfile(args.outpath, infile, 

368 args.data_format, 

369 nmerge < len(args.file), 

370 format_from_extension) 

371 if not check_format(data_format): 

372 sys.exit(-1) 

373 if os.path.realpath(infile) == os.path.realpath(outfile): 

374 print(f'! cannot convert "{infile}" to itself !') 

375 sys.exit(-1) 

376 # read in audio: 

377 pre_history = None 

378 try: 

379 with AudioLoader(infile) as sf: 

380 data = sf[:,:] 

381 rate = sf.rate 

382 md = sf.metadata() 

383 locs, labels = sf.markers() 

384 pre_history = bext_history_str(sf.encoding, 

385 sf.rate, 

386 sf.channels, 

387 sf.filepath) 

388 if sf.encoding is not None and args.encoding is None: 

389 args.encoding = sf.encoding 

390 except FileNotFoundError: 

391 print(f'file "{infile}" not found!') 

392 sys.exit(-1) 

393 if args.verbose > 1: 

394 print(f'loaded audio file "{infile}"') 

395 for infile in args.file[i0+1:i0+nmerge]: 

396 try: 

397 xdata, xrate = load_audio(infile) 

398 except FileNotFoundError: 

399 print(f'file "{infile}" not found!') 

400 sys.exit(-1) 

401 if abs(rate - xrate) > 1: 

402 print('! cannot merge files with different sampling rates !') 

403 print(f' file "{args.file[i0]}" has {rate:.0f}Hz') 

404 print(f' file "{infile}" has {xrate:.0f}Hz') 

405 sys.exit(-1) 

406 if xdata.shape[1] != data.shape[1]: 

407 print('! cannot merge files with different numbers of channels !') 

408 print(f' file "{args.file[i0]}" has {data.shape[1]} channels') 

409 print(f' file "{infile}" has {xdata.shape[1]} channels') 

410 sys.exit(-1) 

411 data = np.vstack((data, xdata)) 

412 xlocs, xlabels = markers(infile) 

413 locs = np.vstack((locs, xlocs)) 

414 labels = np.vstack((labels, xlabels)) 

415 if args.verbose > 1: 

416 print(f'loaded audio file "{infile}"') 

417 data, rate = modify_data(data, rate, md, channels, args.scale, 

418 args.unwrap_clip, args.unwrap, 1.0, 

419 '', args.decimate) 

420 add_metadata(md, args.md_list, '.') 

421 if len(args.remove_keys) > 0: 

422 remove_metadata(md, args.remove_keys, '.') 

423 cleanup_metadata(md) 

424 outfile = format_outfile(outfile, md) 

425 # history: 

426 hkey = 'CodingHistory' 

427 if 'BEXT' in md: 

428 hkey = 'BEXT.' + hkey 

429 history = bext_history_str(args.encoding, rate, 

430 data.shape[1], outfile) 

431 add_history(md, history, hkey, pre_history) 

432 # write out audio: 

433 try: 

434 write_audio(outfile, data, rate, md, locs, labels, 

435 format=data_format, encoding=args.encoding) 

436 except PermissionError: 

437 print(f'failed to write "{outfile}": permission denied!') 

438 sys.exit(-1) 

439 # message: 

440 if args.verbose > 1: 

441 print(f'wrote "{outfile}"') 

442 elif args.verbose: 

443 print(f'converted audio file "{infile}" to "{outfile}"') 

444 

445 

446if __name__ == '__main__': 

447 main(*sys.argv[1:])