Coverage for src / audioio / audioconverter.py: 87%
205 statements
« prev ^ index » next coverage.py v7.13.1, created at 2026-01-17 21:34 +0000
« prev ^ index » next coverage.py v7.13.1, created at 2026-01-17 21:34 +0000
1"""Command line script for converting, downsampling, renaming and merging audio files.
3```sh
4audioconverter -o test.wav test.mp3
5```
6converts 'test.mp3' to 'test.wav'.
8The script reads all input files with `audioio.audioloader.load_audio()`,
9combines the audio and marker data and writes them along with the
10metadata to an output file using `audioio.audiowriter.write_audio()`.
12Thus, all formats supported by these functions and the installed
13python audio modules are available. This implies that MP3 files can be
14read via the [audioread](https://github.com/beetbox/audioread) module,
15and they can be written via [pydub](https://github.com/jiaaro/pydub).
16Many other input and output file formats are supported by the [sndfile
17library](http://www.mega-nerd.com/libsndfile/), provided the
18[SoundFile](http://pysoundfile.readthedocs.org) or
19[wavefile](https://github.com/vokimon/python-wavefile) python packages
20are [installed](https://bendalab.github.io/audioio/installation).
22Metadata and markers are preserved if possible.
24Run
25```sh
26audioconverter -l
27```
28for a list of supported output file formats and
29```sh
30audioconverter -f wav -l
31```
32for a list of supported encodings for a given output format (`-f` option).
34Running
35```sh
36audioconverter --help
37```
38prints
39```text
40usage: audioconverter [-h] [--version] [-v] [-l] [-f FORMAT] [-e ENCODING] [-s SCALE] [-u [THRESH]]
41 [-U [THRESH]] [-d FAC] [-c CHANNELS] [-a KEY=VALUE] [-r KEY] [-n NUM]
42 [-o OUTPATH] [-i KWARGS]
43 [files ...]
45Convert audio file formats.
47positional arguments:
48 files one or more input files to be combined into a single output file
50options:
51 -h, --help show this help message and exit
52 --version show program's version number and exit
53 -v print debug output
54 -l list supported file formats and encodings
55 -f FORMAT audio format of output file
56 -e ENCODING audio encoding of output file
57 -s SCALE scale the data by factor SCALE
58 -u [THRESH] unwrap clipped data with threshold relative to maximum of input range (default is
59 0.5) and divide by two
60 -U [THRESH] unwrap clipped data with threshold relative to maximum of input range (default is
61 0.5) and clip
62 -d FAC downsample by integer factor
63 -c CHANNELS comma and dash separated list of channels to be saved (first channel is 0)
64 -a KEY=VALUE add key-value pairs to metadata. Keys can have section names separated by "."
65 -r KEY remove keys from metadata. Keys can have section names separated by "."
66 -n NUM merge NUM input files into one output file
67 -o OUTPATH path or filename of output file. Metadata keys enclosed in curly braces will be
68 replaced by their values from the input file
69 -i KWARGS key-word arguments for the data loader function
71version 2.6.0 by Benda-Lab (2020-2025)
72```
74## Functions
76- `add_arguments()`: add command line arguments to parser.
77- `parse_channels()`: parse channel selection string.
78- `parse_load_kwargs()`: parse additional arguments for loading data.
79- `check_format()`: check whether requested audio format is valid and supported.
80- `list_formats_encodings()`: list available formats or encodings.
81- `make_outfile()`: make name for output file.
82- `modify_data()`: modify audio data and add modifications to metadata.
83- `format_outfile()`: put metadata values into name of output file.
84- `main()`: command line script for converting, downsampling, renaming and merging audio files.
85"""
87import os
88import sys
89import argparse
90import numpy as np
92from pathlib import Path
93from scipy.signal import decimate
95from .version import __version__, __year__
96from .audioloader import load_audio, markers, AudioLoader
97from .audiometadata import flatten_metadata, unflatten_metadata
98from .audiometadata import add_metadata, remove_metadata, cleanup_metadata
99from .audiometadata import bext_history_str, add_history
100from .audiometadata import update_gain, add_unwrap
101from .audiotools import unwrap
102from .audiowriter import available_formats, available_encodings
103from .audiowriter import format_from_extension, write_audio
106def add_arguments(parser):
107 """Add command line arguments to parser.
109 Parameters
110 ----------
111 parser: argparse.ArgumentParser
112 The parser.
113 """
114 parser.add_argument('--version', action='version', version=__version__)
115 parser.add_argument('-v', action='count', dest='verbose', default=0,
116 help='print debug output')
117 parser.add_argument('-l', dest='list_formats', action='store_true',
118 help='list supported file formats and encodings')
119 parser.add_argument('-f', dest='data_format', default=None, type=str,
120 metavar='FORMAT', help='audio format of output file')
121 parser.add_argument('-e', dest='encoding', default=None, type=str,
122 help='audio encoding of output file')
123 parser.add_argument('-s', dest='scale', default=1, type=float,
124 help='scale the data by factor SCALE')
125 parser.add_argument('-u', dest='unwrap', default=0, type=float,
126 metavar='THRESH', const=1.5, nargs='?',
127 help='unwrap clipped data with threshold relative to maximum of input range (default is 0.5) and divide by two')
128 parser.add_argument('-U', dest='unwrap_clip', default=0, type=float,
129 metavar='THRESH', const=1.5, nargs='?',
130 help='unwrap clipped data with threshold relative to maximum of input range (default is 0.5) and clip')
131 parser.add_argument('-d', dest='decimate', default=1, type=int,
132 metavar='FAC',
133 help='downsample by integer factor')
134 parser.add_argument('-c', dest='channels', default='', type=str,
135 help='comma and dash separated list of channels to be saved (first channel is 0)')
136 parser.add_argument('-a', dest='md_list', action='append', default=[],
137 type=str, metavar='KEY=VALUE',
138 help='add key-value pairs to metadata. Keys can have section names separated by "."')
139 parser.add_argument('-r', dest='remove_keys', action='append', default=[],
140 type=str, metavar='KEY',
141 help='remove keys from metadata. Keys can have section names separated by "."')
142 parser.add_argument('-n', dest='nmerge', default=0, type=int, metavar='NUM',
143 help='merge NUM input files into one output file')
144 parser.add_argument('-o', dest='outpath', default=None, type=str,
145 help='path or filename of output file. Metadata keys enclosed in curly braces will be replaced by their values from the input file')
146 parser.add_argument('-i', dest='load_kwargs', default=[],
147 action='append', metavar='KWARGS',
148 help='key-word arguments for the data loader function')
149 parser.add_argument('files', nargs='*', type=str,
150 help='one or more input files to be combined into a single output file')
153def parse_channels(cstr):
154 """Parse channel selection string.
156 Parameters
157 ----------
158 cstr: str
159 String with comma separated channels and dash separated channel ranges.
161 Returns
162 -------
163 channels: list of int
164 List of selected channels.
165 """
166 cs = [s.strip() for s in cstr.split(',')]
167 channels = []
168 for c in cs:
169 if len(c) == 0:
170 continue
171 css = [s.strip() for s in c.split('-')]
172 if len(css) == 2:
173 channels.extend(list(range(int(css[0]), int(css[1])+1)))
174 else:
175 channels.append(int(c))
176 return channels
179def parse_load_kwargs(load_strs):
180 """Parse additional arguments for loading data.
182 Parameters
183 ----------
184 load_strs: list of str
185 Strings with with comma separated key-value pairs as returned
186 by args.load_kwargs from `add_arguments()`.
188 Returns
189 -------
190 load_kwargs: dict
191 Key-word arguments for `load_audio()` and related functions.
192 Value strings containing integer or floating point numbers
193 are converted to `int` and `float`, respectively.
194 """
195 load_kwargs = {}
196 for s in load_strs:
197 for kw in s.split(','):
198 kws = kw.split(':')
199 if len(kws) == 2:
200 key = kws[0].strip()
201 value = kws[1].strip()
202 try:
203 val = int(value)
204 except ValueError:
205 try:
206 val = float(value)
207 except ValueError:
208 val = value
209 load_kwargs[key] = val
210 return load_kwargs
213def check_format(format):
214 """Check whether requested audio format is valid and supported.
216 If the format is not available print an error message on console.
218 Parameters
219 ----------
220 format: string
221 Audio format to be checked.
223 Returns
224 -------
225 valid: bool
226 True if the requested audio format is valid.
227 """
228 if not format or format.upper() not in available_formats():
229 print(f'! invalid audio file format "{format}"!')
230 print('run')
231 print(f'> {__file__} -l')
232 print('for a list of available formats')
233 return False
234 else:
235 return True
238def list_formats_encodings(data_format):
239 """List available formats or encodings.
241 Parameters
242 ----------
243 data_format: None or str
244 If provided, list encodings for this data format.
245 Otherwise, list available audio file formats.
246 """
247 if not data_format:
248 print('available file formats:')
249 for f in available_formats():
250 print(f' {f}')
251 else:
252 if not check_format(data_format):
253 sys.exit(-1)
254 print(f'available encodings for {data_format} file format:')
255 for e in available_encodings(data_format):
256 print(f' {e}')
259def make_outfile(outpath, infile, data_format, blocks, format_from_ext):
260 """Make name for output file.
262 Parameters
263 ----------
264 outpath: None or str
265 Requested output path.
266 infile: Path
267 Path of the input file.
268 data_format: None or str
269 Requested output file format.
270 blocks: bool
271 If True, produce outputfile for group of input files.
272 format_from_ext: function
273 Function that inspects a filename for its extension and
274 deduces a file format from it.
276 Returns
277 -------
278 outfile: Path
279 Path of output file.
280 data_format: str
281 Format of output file.
282 """
283 outpath = Path('' if outpath is None else outpath)
284 if blocks and not data_format and \
285 format_from_ext(outpath) is None and \
286 not outpath.exists():
287 outpath.mkdir()
288 if outpath == Path() or outpath.is_dir():
289 if outpath != Path():
290 outfile = outpath / infile
291 else:
292 outfile = infile
293 if not data_format:
294 print('! need to specify an audio format via -f or a file extension !')
295 sys.exit(-1)
296 outfile = outfile.with_suffix('.' + data_format.lower())
297 else:
298 outfile = outpath
299 if data_format:
300 outfile = outfile.with_suffix('.' + data_format.lower())
301 else:
302 data_format = format_from_ext(outfile)
303 return outfile, data_format
306def modify_data(data, rate, metadata, channels, scale,
307 unwrap_clip, unwrap_thresh, ampl_max, unit, decimate_fac):
308 """Modify audio data and add modifications to metadata.
310 Parameters
311 ----------
312 data: 2-D array of float
313 The data to be written into the output file.
314 rate: float
315 Sampling rate of the data in Hertz.
316 metadata: nested dict
317 Metadata.
318 channels: list of int
319 List of channels to be selected from the data.
320 scale: float
321 Scaling factor to be applied to the data.
322 unwrap_clip: float
323 If larger than zero, unwrap the data using this as a threshold
324 relative to `ampl_max`, and clip the data at +-`ampl_max`.
325 unwrap_thresh: float
326 If larger than zero, unwrap the data using this as a threshold
327 relative to `ampl_max`, and downscale the data by a factor of two.
328 Also update the gain in the metadata.
329 ampl_max: float
330 Maximum amplitude of the input range.
331 unit: str
332 Unit of the input range.
333 decimate_fac: int
334 Downsample the data by this factor.
336 Returns
337 -------
338 """
339 # select channels:
340 if len(channels) > 0:
341 data = data[:,channels]
342 # scale data:
343 if scale != 1:
344 data *= scale
345 if not update_gain(metadata, 1/scale):
346 metadata['gain'] = 1/scale
347 # fix data:
348 if unwrap_clip > 1e-3:
349 unwrap(data, unwrap_clip, ampl_max)
350 data[data > +ampl_max] = +ampl_max
351 data[data < -ampl_max] = -ampl_max
352 add_unwrap(metadata, unwrap_clip*ampl_max, ampl_max, unit)
353 elif unwrap_thresh > 1e-3:
354 unwrap(data, unwrap_thresh, ampl_max)
355 data *= 0.5
356 update_gain(metadata, 0.5)
357 add_unwrap(metadata, unwrap_thresh*ampl_max, 0.0, unit)
358 # decimate:
359 if decimate_fac > 1:
360 data = decimate(data, decimate_fac, axis=0)
361 rate /= decimate_fac
362 return data, rate
365def format_outfile(outfile, metadata):
366 """Put metadata values into name of output file.
368 Parameters
369 ----------
370 outfile: Path
371 Path of output file. May contain metadata keys enclosed in curly braces.
372 metadata: nested dict
373 Metadata.
375 Returns
376 -------
377 outfile: Path
378 Output path with formatted name.
379 """
380 if len(metadata) > 0 and '{' in outfile.stem and '}' in outfile.stem:
381 fmd = flatten_metadata(metadata)
382 fmd = {k:(fmd[k] if isinstance(fmd[k], (int, float)) else fmd[k].replace(' ', '_')) for k in fmd}
383 outfile = outfile.with_stem(outfile.stem.format(**fmd))
384 return outfile
387def main(*cargs):
388 """Command line script for converting, downsampling, renaming and merging audio files.
390 Parameters
391 ----------
392 cargs: list of strings
393 Command line arguments as returned by sys.argv[1:].
394 """
395 # command line arguments:
396 parser = argparse.ArgumentParser(add_help=True,
397 description='Convert audio file formats.',
398 epilog=f'version {__version__} by Benda-Lab (2020-{__year__})')
399 add_arguments(parser)
400 if len(cargs) == 0:
401 cargs = None
402 args = parser.parse_args(cargs)
404 channels = parse_channels(args.channels)
406 if args.list_formats:
407 if args.data_format is None and len(args.files) > 0:
408 args.data_format = args.files[0]
409 list_formats_encodings(args.data_format)
410 return
412 if len(args.files) == 0 or len(args.files[0]) == 0:
413 print('! need to specify at least one input file !')
414 sys.exit(-1)
416 # expand wildcard patterns:
417 files = []
418 if os.name == 'nt':
419 for fn in args.files:
420 files.extend(glob.glob(fn))
421 else:
422 files = args.files
424 nmerge = args.nmerge
425 if nmerge == 0:
426 nmerge = len(files)
428 # kwargs for audio loader:
429 load_kwargs = parse_load_kwargs(args.load_kwargs)
431 # read in audio:
432 try:
433 data = AudioLoader(files, verbose=args.verbose - 1,
434 **load_kwargs)
435 except FileNotFoundError:
436 print(f'file "{files[0]}" not found!')
437 sys.exit(-1)
438 if len(data.file_paths) < len(files):
439 print(f'file "{files[len(data.file_paths)]}" does not continue file "{data.file_paths[-1]}"!')
440 sys.exit(-1)
441 md = data.metadata()
442 add_metadata(md, args.md_list, '.')
443 if len(args.remove_keys) > 0:
444 remove_metadata(md, args.remove_keys, '.')
445 cleanup_metadata(md)
446 locs, labels = data.markers()
447 pre_history = bext_history_str(data.encoding,
448 data.rate,
449 data.channels,
450 os.fsdecode(data.filepath))
451 if args.verbose > 1:
452 print(f'loaded audio file "{data.filepath}"')
454 if data.encoding is not None and args.encoding is None:
455 args.encoding = data.encoding
456 for i0 in range(0, len(data.file_paths), nmerge):
457 infile = data.file_paths[i0]
458 outfile, data_format = make_outfile(args.outpath, infile,
459 args.data_format,
460 nmerge < len(files),
461 format_from_extension)
462 if not check_format(data_format):
463 sys.exit(-1)
464 if infile.resolve() == outfile.resolve():
465 print(f'! cannot convert "{infile}" to itself !')
466 sys.exit(-1)
468 if len(data.file_paths) > 1:
469 i1 = i0 + nmerge - 1
470 if i1 >= len(data.end_indices):
471 i1 = len(data.end_indices) - 1
472 si = data.start_indices[i0]
473 ei = data.end_indices[i1]
474 else:
475 si = 0
476 ei = data.frames
477 wdata, wrate = modify_data(data[si:ei], data.rate,
478 md, channels, args.scale,
479 args.unwrap_clip, args.unwrap, 1.0,
480 '', args.decimate)
481 mask = (locs[:, 0] >= si) & (locs[:, 0] < ei)
482 wlocs = locs[mask, :]
483 if len(wlocs) > 0:
484 wlocs[:, 0] -= si
485 wlabels = labels[mask, :]
486 outfile = format_outfile(outfile, md)
487 # history:
488 hkey = 'CodingHistory'
489 if 'BEXT' in md:
490 hkey = 'BEXT.' + hkey
491 history = bext_history_str(args.encoding, wrate,
492 data.shape[1], os.fsdecode(outfile))
493 add_history(md, history, hkey, pre_history)
494 # write out audio:
495 try:
496 write_audio(outfile, wdata, wrate, md, wlocs, wlabels,
497 format=data_format, encoding=args.encoding)
498 except PermissionError:
499 print(f'failed to write "{outfile}": permission denied!')
500 sys.exit(-1)
501 # message:
502 if args.verbose > 1:
503 print(f'wrote "{outfile}"')
504 elif args.verbose:
505 print(f'converted audio file "{infile}" to "{outfile}"')
506 data.close()
509if __name__ == '__main__':
510 main(*sys.argv[1:])