Coverage for src / audioio / audioconverter.py: 87%

205 statements  

« prev     ^ index     » next       coverage.py v7.13.1, created at 2026-01-17 21:34 +0000

1"""Command line script for converting, downsampling, renaming and merging audio files. 

2 

3```sh 

4audioconverter -o test.wav test.mp3 

5``` 

6converts 'test.mp3' to 'test.wav'. 

7 

8The script reads all input files with `audioio.audioloader.load_audio()`, 

9combines the audio and marker data and writes them along with the 

10metadata to an output file using `audioio.audiowriter.write_audio()`. 

11 

12Thus, all formats supported by these functions and the installed 

13python audio modules are available. This implies that MP3 files can be 

14read via the [audioread](https://github.com/beetbox/audioread) module, 

15and they can be written via [pydub](https://github.com/jiaaro/pydub). 

16Many other input and output file formats are supported by the [sndfile 

17library](http://www.mega-nerd.com/libsndfile/), provided the 

18[SoundFile](http://pysoundfile.readthedocs.org) or 

19[wavefile](https://github.com/vokimon/python-wavefile) python packages 

20are [installed](https://bendalab.github.io/audioio/installation). 

21 

22Metadata and markers are preserved if possible. 

23 

24Run 

25```sh 

26audioconverter -l 

27``` 

28for a list of supported output file formats and 

29```sh 

30audioconverter -f wav -l 

31``` 

32for a list of supported encodings for a given output format (`-f` option). 

33 

34Running 

35```sh 

36audioconverter --help 

37``` 

38prints 

39```text 

40usage: audioconverter [-h] [--version] [-v] [-l] [-f FORMAT] [-e ENCODING] [-s SCALE] [-u [THRESH]] 

41 [-U [THRESH]] [-d FAC] [-c CHANNELS] [-a KEY=VALUE] [-r KEY] [-n NUM] 

42 [-o OUTPATH] [-i KWARGS] 

43 [files ...] 

44 

45Convert audio file formats. 

46 

47positional arguments: 

48 files one or more input files to be combined into a single output file 

49 

50options: 

51 -h, --help show this help message and exit 

52 --version show program's version number and exit 

53 -v print debug output 

54 -l list supported file formats and encodings 

55 -f FORMAT audio format of output file 

56 -e ENCODING audio encoding of output file 

57 -s SCALE scale the data by factor SCALE 

58 -u [THRESH] unwrap clipped data with threshold relative to maximum of input range (default is 

59 0.5) and divide by two 

60 -U [THRESH] unwrap clipped data with threshold relative to maximum of input range (default is 

61 0.5) and clip 

62 -d FAC downsample by integer factor 

63 -c CHANNELS comma and dash separated list of channels to be saved (first channel is 0) 

64 -a KEY=VALUE add key-value pairs to metadata. Keys can have section names separated by "." 

65 -r KEY remove keys from metadata. Keys can have section names separated by "." 

66 -n NUM merge NUM input files into one output file 

67 -o OUTPATH path or filename of output file. Metadata keys enclosed in curly braces will be 

68 replaced by their values from the input file 

69 -i KWARGS key-word arguments for the data loader function 

70 

71version 2.6.0 by Benda-Lab (2020-2025) 

72``` 

73 

74## Functions 

75 

76- `add_arguments()`: add command line arguments to parser. 

77- `parse_channels()`: parse channel selection string. 

78- `parse_load_kwargs()`: parse additional arguments for loading data. 

79- `check_format()`: check whether requested audio format is valid and supported. 

80- `list_formats_encodings()`: list available formats or encodings. 

81- `make_outfile()`: make name for output file. 

82- `modify_data()`: modify audio data and add modifications to metadata. 

83- `format_outfile()`: put metadata values into name of output file. 

84- `main()`: command line script for converting, downsampling, renaming and merging audio files. 

85""" 

86 

87import os 

88import sys 

89import argparse 

90import numpy as np 

91 

92from pathlib import Path 

93from scipy.signal import decimate 

94 

95from .version import __version__, __year__ 

96from .audioloader import load_audio, markers, AudioLoader 

97from .audiometadata import flatten_metadata, unflatten_metadata 

98from .audiometadata import add_metadata, remove_metadata, cleanup_metadata 

99from .audiometadata import bext_history_str, add_history 

100from .audiometadata import update_gain, add_unwrap 

101from .audiotools import unwrap 

102from .audiowriter import available_formats, available_encodings 

103from .audiowriter import format_from_extension, write_audio 

104 

105 

106def add_arguments(parser): 

107 """Add command line arguments to parser. 

108 

109 Parameters 

110 ---------- 

111 parser: argparse.ArgumentParser 

112 The parser. 

113 """ 

114 parser.add_argument('--version', action='version', version=__version__) 

115 parser.add_argument('-v', action='count', dest='verbose', default=0, 

116 help='print debug output') 

117 parser.add_argument('-l', dest='list_formats', action='store_true', 

118 help='list supported file formats and encodings') 

119 parser.add_argument('-f', dest='data_format', default=None, type=str, 

120 metavar='FORMAT', help='audio format of output file') 

121 parser.add_argument('-e', dest='encoding', default=None, type=str, 

122 help='audio encoding of output file') 

123 parser.add_argument('-s', dest='scale', default=1, type=float, 

124 help='scale the data by factor SCALE') 

125 parser.add_argument('-u', dest='unwrap', default=0, type=float, 

126 metavar='THRESH', const=1.5, nargs='?', 

127 help='unwrap clipped data with threshold relative to maximum of input range (default is 0.5) and divide by two') 

128 parser.add_argument('-U', dest='unwrap_clip', default=0, type=float, 

129 metavar='THRESH', const=1.5, nargs='?', 

130 help='unwrap clipped data with threshold relative to maximum of input range (default is 0.5) and clip') 

131 parser.add_argument('-d', dest='decimate', default=1, type=int, 

132 metavar='FAC', 

133 help='downsample by integer factor') 

134 parser.add_argument('-c', dest='channels', default='', type=str, 

135 help='comma and dash separated list of channels to be saved (first channel is 0)') 

136 parser.add_argument('-a', dest='md_list', action='append', default=[], 

137 type=str, metavar='KEY=VALUE', 

138 help='add key-value pairs to metadata. Keys can have section names separated by "."') 

139 parser.add_argument('-r', dest='remove_keys', action='append', default=[], 

140 type=str, metavar='KEY', 

141 help='remove keys from metadata. Keys can have section names separated by "."') 

142 parser.add_argument('-n', dest='nmerge', default=0, type=int, metavar='NUM', 

143 help='merge NUM input files into one output file') 

144 parser.add_argument('-o', dest='outpath', default=None, type=str, 

145 help='path or filename of output file. Metadata keys enclosed in curly braces will be replaced by their values from the input file') 

146 parser.add_argument('-i', dest='load_kwargs', default=[], 

147 action='append', metavar='KWARGS', 

148 help='key-word arguments for the data loader function') 

149 parser.add_argument('files', nargs='*', type=str, 

150 help='one or more input files to be combined into a single output file') 

151 

152 

153def parse_channels(cstr): 

154 """Parse channel selection string. 

155 

156 Parameters 

157 ---------- 

158 cstr: str 

159 String with comma separated channels and dash separated channel ranges. 

160 

161 Returns 

162 ------- 

163 channels: list of int 

164 List of selected channels. 

165 """ 

166 cs = [s.strip() for s in cstr.split(',')] 

167 channels = [] 

168 for c in cs: 

169 if len(c) == 0: 

170 continue 

171 css = [s.strip() for s in c.split('-')] 

172 if len(css) == 2: 

173 channels.extend(list(range(int(css[0]), int(css[1])+1))) 

174 else: 

175 channels.append(int(c)) 

176 return channels 

177 

178 

179def parse_load_kwargs(load_strs): 

180 """Parse additional arguments for loading data. 

181 

182 Parameters 

183 ---------- 

184 load_strs: list of str 

185 Strings with with comma separated key-value pairs as returned 

186 by args.load_kwargs from `add_arguments()`. 

187 

188 Returns 

189 ------- 

190 load_kwargs: dict 

191 Key-word arguments for `load_audio()` and related functions. 

192 Value strings containing integer or floating point numbers 

193 are converted to `int` and `float`, respectively. 

194 """ 

195 load_kwargs = {} 

196 for s in load_strs: 

197 for kw in s.split(','): 

198 kws = kw.split(':') 

199 if len(kws) == 2: 

200 key = kws[0].strip() 

201 value = kws[1].strip() 

202 try: 

203 val = int(value) 

204 except ValueError: 

205 try: 

206 val = float(value) 

207 except ValueError: 

208 val = value 

209 load_kwargs[key] = val 

210 return load_kwargs 

211 

212 

213def check_format(format): 

214 """Check whether requested audio format is valid and supported. 

215 

216 If the format is not available print an error message on console. 

217 

218 Parameters 

219 ---------- 

220 format: string 

221 Audio format to be checked. 

222 

223 Returns 

224 ------- 

225 valid: bool 

226 True if the requested audio format is valid. 

227 """ 

228 if not format or format.upper() not in available_formats(): 

229 print(f'! invalid audio file format "{format}"!') 

230 print('run') 

231 print(f'> {__file__} -l') 

232 print('for a list of available formats') 

233 return False 

234 else: 

235 return True 

236 

237 

238def list_formats_encodings(data_format): 

239 """List available formats or encodings. 

240 

241 Parameters 

242 ---------- 

243 data_format: None or str 

244 If provided, list encodings for this data format. 

245 Otherwise, list available audio file formats. 

246 """ 

247 if not data_format: 

248 print('available file formats:') 

249 for f in available_formats(): 

250 print(f' {f}') 

251 else: 

252 if not check_format(data_format): 

253 sys.exit(-1) 

254 print(f'available encodings for {data_format} file format:') 

255 for e in available_encodings(data_format): 

256 print(f' {e}') 

257 

258 

259def make_outfile(outpath, infile, data_format, blocks, format_from_ext): 

260 """Make name for output file. 

261 

262 Parameters 

263 ---------- 

264 outpath: None or str 

265 Requested output path. 

266 infile: Path 

267 Path of the input file. 

268 data_format: None or str 

269 Requested output file format. 

270 blocks: bool 

271 If True, produce outputfile for group of input files. 

272 format_from_ext: function 

273 Function that inspects a filename for its extension and 

274 deduces a file format from it. 

275 

276 Returns 

277 ------- 

278 outfile: Path 

279 Path of output file. 

280 data_format: str 

281 Format of output file. 

282 """ 

283 outpath = Path('' if outpath is None else outpath) 

284 if blocks and not data_format and \ 

285 format_from_ext(outpath) is None and \ 

286 not outpath.exists(): 

287 outpath.mkdir() 

288 if outpath == Path() or outpath.is_dir(): 

289 if outpath != Path(): 

290 outfile = outpath / infile 

291 else: 

292 outfile = infile 

293 if not data_format: 

294 print('! need to specify an audio format via -f or a file extension !') 

295 sys.exit(-1) 

296 outfile = outfile.with_suffix('.' + data_format.lower()) 

297 else: 

298 outfile = outpath 

299 if data_format: 

300 outfile = outfile.with_suffix('.' + data_format.lower()) 

301 else: 

302 data_format = format_from_ext(outfile) 

303 return outfile, data_format 

304 

305 

306def modify_data(data, rate, metadata, channels, scale, 

307 unwrap_clip, unwrap_thresh, ampl_max, unit, decimate_fac): 

308 """Modify audio data and add modifications to metadata. 

309 

310 Parameters 

311 ---------- 

312 data: 2-D array of float 

313 The data to be written into the output file. 

314 rate: float 

315 Sampling rate of the data in Hertz. 

316 metadata: nested dict 

317 Metadata. 

318 channels: list of int 

319 List of channels to be selected from the data. 

320 scale: float 

321 Scaling factor to be applied to the data. 

322 unwrap_clip: float 

323 If larger than zero, unwrap the data using this as a threshold 

324 relative to `ampl_max`, and clip the data at +-`ampl_max`. 

325 unwrap_thresh: float 

326 If larger than zero, unwrap the data using this as a threshold 

327 relative to `ampl_max`, and downscale the data by a factor of two. 

328 Also update the gain in the metadata. 

329 ampl_max: float 

330 Maximum amplitude of the input range. 

331 unit: str 

332 Unit of the input range. 

333 decimate_fac: int 

334 Downsample the data by this factor. 

335 

336 Returns 

337 ------- 

338 """ 

339 # select channels: 

340 if len(channels) > 0: 

341 data = data[:,channels] 

342 # scale data: 

343 if scale != 1: 

344 data *= scale 

345 if not update_gain(metadata, 1/scale): 

346 metadata['gain'] = 1/scale 

347 # fix data: 

348 if unwrap_clip > 1e-3: 

349 unwrap(data, unwrap_clip, ampl_max) 

350 data[data > +ampl_max] = +ampl_max 

351 data[data < -ampl_max] = -ampl_max 

352 add_unwrap(metadata, unwrap_clip*ampl_max, ampl_max, unit) 

353 elif unwrap_thresh > 1e-3: 

354 unwrap(data, unwrap_thresh, ampl_max) 

355 data *= 0.5 

356 update_gain(metadata, 0.5) 

357 add_unwrap(metadata, unwrap_thresh*ampl_max, 0.0, unit) 

358 # decimate: 

359 if decimate_fac > 1: 

360 data = decimate(data, decimate_fac, axis=0) 

361 rate /= decimate_fac 

362 return data, rate 

363 

364 

365def format_outfile(outfile, metadata): 

366 """Put metadata values into name of output file. 

367 

368 Parameters 

369 ---------- 

370 outfile: Path 

371 Path of output file. May contain metadata keys enclosed in curly braces. 

372 metadata: nested dict 

373 Metadata. 

374 

375 Returns 

376 ------- 

377 outfile: Path 

378 Output path with formatted name. 

379 """ 

380 if len(metadata) > 0 and '{' in outfile.stem and '}' in outfile.stem: 

381 fmd = flatten_metadata(metadata) 

382 fmd = {k:(fmd[k] if isinstance(fmd[k], (int, float)) else fmd[k].replace(' ', '_')) for k in fmd} 

383 outfile = outfile.with_stem(outfile.stem.format(**fmd)) 

384 return outfile 

385 

386 

387def main(*cargs): 

388 """Command line script for converting, downsampling, renaming and merging audio files. 

389 

390 Parameters 

391 ---------- 

392 cargs: list of strings 

393 Command line arguments as returned by sys.argv[1:]. 

394 """ 

395 # command line arguments: 

396 parser = argparse.ArgumentParser(add_help=True, 

397 description='Convert audio file formats.', 

398 epilog=f'version {__version__} by Benda-Lab (2020-{__year__})') 

399 add_arguments(parser) 

400 if len(cargs) == 0: 

401 cargs = None 

402 args = parser.parse_args(cargs) 

403 

404 channels = parse_channels(args.channels) 

405 

406 if args.list_formats: 

407 if args.data_format is None and len(args.files) > 0: 

408 args.data_format = args.files[0] 

409 list_formats_encodings(args.data_format) 

410 return 

411 

412 if len(args.files) == 0 or len(args.files[0]) == 0: 

413 print('! need to specify at least one input file !') 

414 sys.exit(-1) 

415 

416 # expand wildcard patterns: 

417 files = [] 

418 if os.name == 'nt': 

419 for fn in args.files: 

420 files.extend(glob.glob(fn)) 

421 else: 

422 files = args.files 

423 

424 nmerge = args.nmerge 

425 if nmerge == 0: 

426 nmerge = len(files) 

427 

428 # kwargs for audio loader: 

429 load_kwargs = parse_load_kwargs(args.load_kwargs) 

430 

431 # read in audio: 

432 try: 

433 data = AudioLoader(files, verbose=args.verbose - 1, 

434 **load_kwargs) 

435 except FileNotFoundError: 

436 print(f'file "{files[0]}" not found!') 

437 sys.exit(-1) 

438 if len(data.file_paths) < len(files): 

439 print(f'file "{files[len(data.file_paths)]}" does not continue file "{data.file_paths[-1]}"!') 

440 sys.exit(-1) 

441 md = data.metadata() 

442 add_metadata(md, args.md_list, '.') 

443 if len(args.remove_keys) > 0: 

444 remove_metadata(md, args.remove_keys, '.') 

445 cleanup_metadata(md) 

446 locs, labels = data.markers() 

447 pre_history = bext_history_str(data.encoding, 

448 data.rate, 

449 data.channels, 

450 os.fsdecode(data.filepath)) 

451 if args.verbose > 1: 

452 print(f'loaded audio file "{data.filepath}"') 

453 

454 if data.encoding is not None and args.encoding is None: 

455 args.encoding = data.encoding 

456 for i0 in range(0, len(data.file_paths), nmerge): 

457 infile = data.file_paths[i0] 

458 outfile, data_format = make_outfile(args.outpath, infile, 

459 args.data_format, 

460 nmerge < len(files), 

461 format_from_extension) 

462 if not check_format(data_format): 

463 sys.exit(-1) 

464 if infile.resolve() == outfile.resolve(): 

465 print(f'! cannot convert "{infile}" to itself !') 

466 sys.exit(-1) 

467 

468 if len(data.file_paths) > 1: 

469 i1 = i0 + nmerge - 1 

470 if i1 >= len(data.end_indices): 

471 i1 = len(data.end_indices) - 1 

472 si = data.start_indices[i0] 

473 ei = data.end_indices[i1] 

474 else: 

475 si = 0 

476 ei = data.frames 

477 wdata, wrate = modify_data(data[si:ei], data.rate, 

478 md, channels, args.scale, 

479 args.unwrap_clip, args.unwrap, 1.0, 

480 '', args.decimate) 

481 mask = (locs[:, 0] >= si) & (locs[:, 0] < ei) 

482 wlocs = locs[mask, :] 

483 if len(wlocs) > 0: 

484 wlocs[:, 0] -= si 

485 wlabels = labels[mask, :] 

486 outfile = format_outfile(outfile, md) 

487 # history: 

488 hkey = 'CodingHistory' 

489 if 'BEXT' in md: 

490 hkey = 'BEXT.' + hkey 

491 history = bext_history_str(args.encoding, wrate, 

492 data.shape[1], os.fsdecode(outfile)) 

493 add_history(md, history, hkey, pre_history) 

494 # write out audio: 

495 try: 

496 write_audio(outfile, wdata, wrate, md, wlocs, wlabels, 

497 format=data_format, encoding=args.encoding) 

498 except PermissionError: 

499 print(f'failed to write "{outfile}": permission denied!') 

500 sys.exit(-1) 

501 # message: 

502 if args.verbose > 1: 

503 print(f'wrote "{outfile}"') 

504 elif args.verbose: 

505 print(f'converted audio file "{infile}" to "{outfile}"') 

506 data.close() 

507 

508 

509if __name__ == '__main__': 

510 main(*sys.argv[1:])