Coverage for src/audioio/audioconverter.py: 94%
186 statements
« prev ^ index » next coverage.py v7.6.3, created at 2024-10-15 07:29 +0000
« prev ^ index » next coverage.py v7.6.3, created at 2024-10-15 07:29 +0000
1"""Command line script for converting, downsampling, renaming and merging audio files.
3```sh
4audioconverter -o test.wav test.mp3
5```
6converts 'test.mp3' to 'test.wav'.
8The script reads all input files with `audioio.audioloader.load_audio()`,
9combines the audio and marker data and writes them along with the
10metadata to an output file using `audioio.audiowriter.write_audio()`.
12Thus, all formats supported by these functions and the installed
13python audio modules are available. This implies that MP3 files can be
14read via the [audioread](https://github.com/beetbox/audioread) module,
15and they can be written via [pydub](https://github.com/jiaaro/pydub).
16Many other input and output file formats are supported by the [sndfile
17library](http://www.mega-nerd.com/libsndfile/), provided the
18[SoundFile](http://pysoundfile.readthedocs.org) or
19[wavefile](https://github.com/vokimon/python-wavefile) python packages
20are [installed](https://bendalab.github.io/audioio/installation).
22Metadata and markers are preserved if possible.
24Run
25```sh
26audioconverter -l
27```
28for a list of supported output file formats and
29```sh
30audioconverter -f wav -l
31```
32for a list of supported encodings for a given output format (`-f`option).
34Running
35```sh
36audioconverter --help
37```
38prints
39```text
40usage: audioconverter [-h] [--version] [-v] [-l] [-f FORMAT] [-e ENCODING] [-s SCALE] [-u [THRESH]] [-U [THRESH]]
41 [-d FAC] [-c CHANNELS] [-a KEY=VALUE] [-r KEY] [-n NUM] [-o OUTPATH]
42 [file ...]
44Convert audio file formats.
46positional arguments:
47 file one or more input files to be combined into a single output file
49options:
50 -h, --help show this help message and exit
51 --version show program's version number and exit
52 -v print debug output
53 -l list supported file formats and encodings
54 -f FORMAT audio format of output file
55 -e ENCODING audio encoding of output file
56 -s SCALE scale the data by factor SCALE
57 -u [THRESH] unwrap clipped data with threshold relative to maximum of input range (default is 0.5) and divide by
58 two
59 -U [THRESH] unwrap clipped data with threshold relative to maximum of input range (default is 0.5) and clip
60 -d FAC downsample by integer factor
61 -c CHANNELS comma and dash separated list of channels to be saved (first channel is 0)
62 -a KEY=VALUE add key-value pairs to metadata. Keys can have section names separated by "."
63 -r KEY remove keys from metadata. Keys can have section names separated by "."
64 -n NUM merge NUM input files into one output file
65 -o OUTPATH path or filename of output file. Metadata keys enclosed in curly braces will be replaced by their
66 values from the input file
68version 2.0.0 by Benda-Lab (2020-2024)
69```
71"""
73import os
74import sys
75import argparse
76import numpy as np
77from scipy.signal import decimate
78from .version import __version__, __year__
79from .audioloader import load_audio, markers, AudioLoader
80from .audiometadata import flatten_metadata, unflatten_metadata
81from .audiometadata import add_metadata, remove_metadata, cleanup_metadata
82from .audiometadata import bext_history_str, add_history
83from .audiometadata import update_gain, add_unwrap
84from .audiotools import unwrap
85from .audiowriter import available_formats, available_encodings
86from .audiowriter import format_from_extension, write_audio
89def add_arguments(parser):
90 """ Add command line arguments to parser.
92 Parameters
93 ----------
94 parser: argparse.ArgumentParser
95 The parser.
96 """
97 parser.add_argument('--version', action='version', version=__version__)
98 parser.add_argument('-v', action='count', dest='verbose', default=0,
99 help='print debug output')
100 parser.add_argument('-l', dest='list_formats', action='store_true',
101 help='list supported file formats and encodings')
102 parser.add_argument('-f', dest='data_format', default=None, type=str,
103 metavar='FORMAT', help='audio format of output file')
104 parser.add_argument('-e', dest='encoding', default=None, type=str,
105 help='audio encoding of output file')
106 parser.add_argument('-s', dest='scale', default=1, type=float,
107 help='scale the data by factor SCALE')
108 parser.add_argument('-u', dest='unwrap', default=0, type=float,
109 metavar='THRESH', const=1.5, nargs='?',
110 help='unwrap clipped data with threshold relative to maximum of input range (default is 0.5) and divide by two')
111 parser.add_argument('-U', dest='unwrap_clip', default=0, type=float,
112 metavar='THRESH', const=1.5, nargs='?',
113 help='unwrap clipped data with threshold relative to maximum of input range (default is 0.5) and clip')
114 parser.add_argument('-d', dest='decimate', default=1, type=int,
115 metavar='FAC',
116 help='downsample by integer factor')
117 parser.add_argument('-c', dest='channels', default='', type=str,
118 help='comma and dash separated list of channels to be saved (first channel is 0)')
119 parser.add_argument('-a', dest='md_list', action='append', default=[],
120 type=str, metavar='KEY=VALUE',
121 help='add key-value pairs to metadata. Keys can have section names separated by "."')
122 parser.add_argument('-r', dest='remove_keys', action='append', default=[],
123 type=str, metavar='KEY',
124 help='remove keys from metadata. Keys can have section names separated by "."')
125 parser.add_argument('-n', dest='nmerge', default=0, type=int, metavar='NUM',
126 help='merge NUM input files into one output file')
127 parser.add_argument('-o', dest='outpath', default=None, type=str,
128 help='path or filename of output file. Metadata keys enclosed in curly braces will be replaced by their values from the input file')
129 parser.add_argument('file', nargs='*', type=str,
130 help='one or more input files to be combined into a single output file')
133def parse_channels(cstr):
134 """ Parse channel selection string.
136 Parameters
137 ----------
138 cstr: str
139 String with comma separated channels and dash separated channel ranges.
141 Returns
142 -------
143 channels: list of int
144 List of selected channels.
145 """
146 cs = [s.strip() for s in cstr.split(',')]
147 channels = []
148 for c in cs:
149 if len(c) == 0:
150 continue
151 css = [s.strip() for s in c.split('-')]
152 if len(css) == 2:
153 channels.extend(list(range(int(css[0]), int(css[1])+1)))
154 else:
155 channels.append(int(c))
156 return channels
159def check_format(format):
160 """
161 Check whether requested audio format is valid and supported.
163 If the format is not available print an error message on console.
165 Parameters
166 ----------
167 format: string
168 Audio format to be checked.
170 Returns
171 -------
172 valid: bool
173 True if the requested audio format is valid.
174 """
175 if not format or format.upper() not in available_formats():
176 print(f'! invalid audio file format "{format}"!')
177 print('run')
178 print(f'> {__file__} -l')
179 print('for a list of available formats')
180 return False
181 else:
182 return True
185def list_formats_encodings(data_format):
186 """ List available formats or encodings.
188 Parameters
189 ----------
190 data_format: None or str
191 If provided, list encodings for this data format.
192 Otherwise, list available audio file formats.
193 """
194 if not data_format:
195 print('available file formats:')
196 for f in available_formats():
197 print(f' {f}')
198 else:
199 if not check_format(data_format):
200 sys.exit(-1)
201 print(f'available encodings for {data_format} file format:')
202 for e in available_encodings(data_format):
203 print(f' {e}')
206def make_outfile(outpath, infile, data_format, blocks, format_from_ext):
207 """ Make name for output file.
209 Parameters
210 ----------
211 outpath: None or str
212 Requested output path.
213 infile: str
214 Name of the input file.
215 data_format: None or str
216 Requested output file format.
217 blocks: bool
218 If True, produce outputfile for group of input files.
219 format_from_ext: function
220 Function that inspects a filename for its extension and
221 deduces a file format from it.
223 Returns
224 -------
225 outfile: str
226 Name of output file.
227 data_format: str
228 Format of output file.
229 """
230 if blocks and outpath and \
231 format_from_ext(outpath) is None and \
232 not os.path.exists(outpath):
233 os.mkdir(outpath)
234 if not outpath or os.path.isdir(outpath):
235 outfile = infile
236 if outpath:
237 outfile = os.path.join(outpath, outfile)
238 if not data_format:
239 print('! need to specify an audio format via -f or a file extension !')
240 sys.exit(-1)
241 outfile = os.path.splitext(outfile)[0] + os.extsep + data_format.lower()
242 else:
243 outfile = outpath
244 if data_format:
245 outfile = os.path.splitext(outfile)[0] + os.extsep + data_format.lower()
246 else:
247 data_format = format_from_ext(outfile)
248 return outfile, data_format
251def modify_data(data, rate, metadata, channels, scale,
252 unwrap_clip, unwrap_thresh, ampl_max, unit, decimate_fac):
253 """ Modify audio data and add modifications to metadata.
255 Parameters
256 ----------
257 data: 2-D array of float
258 The data to be written into the output file.
259 rate: float
260 Sampling rate of the data in Hertz.
261 metadata: nested dict
262 Metadata.
263 channels: list of int
264 List of channels to be selected from the data.
265 scale: float
266 Scaling factor to be applied to the data.
267 unwrap_clip: float
268 If larger than zero, unwrap the data using this as a threshold
269 relative to `ampl_max`, and clip the data at +-`ampl_max`.
270 unwrap_thresh: float
271 If larger than zero, unwrap the data using this as a threshold
272 relative to `ampl_max`, and downscale the data by a factor of two.
273 Also update the gain in the metadata.
274 ampl_max: float
275 Maximum amplitude of the input range.
276 unit: str
277 Unit of the input range.
278 decimate_fac: int
279 Downsample the data by this factor.
281 Returns
282 -------
283 """
284 # select channels:
285 if len(channels) > 0:
286 data = data[:,channels]
287 # scale data:
288 if scale != 1:
289 data *= scale
290 update_gain(metadata, 1/scale)
291 # fix data:
292 if unwrap_clip > 1e-3:
293 unwrap(data, unwrap_clip, ampl_max)
294 data[data > +ampl_max] = +ampl_max
295 data[data < -ampl_max] = -ampl_max
296 add_unwrap(metadata, unwrap_clip*ampl_max, ampl_max, unit)
297 elif unwrap_thresh > 1e-3:
298 unwrap(data, unwrap_thresh, ampl_max)
299 data *= 0.5
300 update_gain(metadata, 0.5)
301 add_unwrap(metadata, unwrap_thresh*ampl_max, 0.0, unit)
302 # decimate:
303 if decimate_fac > 1:
304 data = decimate(data, decimate_fac, axis=0)
305 rate /= decimate_fac
306 return data, rate
309def format_outfile(outfile, metadata):
310 """ Put metadata values into name of output file.
312 Parameters
313 ----------
314 outfile: str
315 Name of output file. May contain metadata keys enclosed in curly braces.
316 metadata: nested dict
317 Metadata.
319 Returns
320 -------
321 outfile: str
322 Name of output file.
323 """
324 if len(metadata) > 0 and '{' in outfile and '}' in outfile:
325 fmd = flatten_metadata(metadata)
326 fmd = {k:(fmd[k] if isinstance(fmd[k], (int, float)) else fmd[k].replace(' ', '_')) for k in fmd}
327 outfile = outfile.format(**fmd)
328 return outfile
331def main(*cargs):
332 """
333 Command line script for converting, downsampling, renaming and merging audio files.
335 Parameters
336 ----------
337 cargs: list of strings
338 Command line arguments as returned by sys.argv[1:].
339 """
340 # command line arguments:
341 parser = argparse.ArgumentParser(add_help=True,
342 description='Convert audio file formats.',
343 epilog=f'version {__version__} by Benda-Lab (2020-{__year__})')
344 add_arguments(parser)
345 if len(cargs) == 0:
346 cargs = None
347 args = parser.parse_args(cargs)
349 channels = parse_channels(args.channels)
351 if args.list_formats:
352 if args.data_format is None and len(args.file) > 0:
353 args.data_format = args.file[0]
354 list_formats_encodings(args.data_format)
355 return
357 if len(args.file) == 0 or len(args.file[0]) == 0:
358 print('! need to specify at least one input file !')
359 sys.exit(-1)
361 nmerge = args.nmerge
362 if nmerge == 0:
363 nmerge = len(args.file)
365 for i0 in range(0, len(args.file), nmerge):
366 infile = args.file[i0]
367 outfile, data_format = make_outfile(args.outpath, infile,
368 args.data_format,
369 nmerge < len(args.file),
370 format_from_extension)
371 if not check_format(data_format):
372 sys.exit(-1)
373 if os.path.realpath(infile) == os.path.realpath(outfile):
374 print(f'! cannot convert "{infile}" to itself !')
375 sys.exit(-1)
376 # read in audio:
377 pre_history = None
378 try:
379 with AudioLoader(infile) as sf:
380 data = sf[:,:]
381 rate = sf.rate
382 md = sf.metadata()
383 locs, labels = sf.markers()
384 pre_history = bext_history_str(sf.encoding,
385 sf.rate,
386 sf.channels,
387 sf.filepath)
388 if sf.encoding is not None and args.encoding is None:
389 args.encoding = sf.encoding
390 except FileNotFoundError:
391 print(f'file "{infile}" not found!')
392 sys.exit(-1)
393 if args.verbose > 1:
394 print(f'loaded audio file "{infile}"')
395 for infile in args.file[i0+1:i0+nmerge]:
396 try:
397 xdata, xrate = load_audio(infile)
398 except FileNotFoundError:
399 print(f'file "{infile}" not found!')
400 sys.exit(-1)
401 if abs(rate - xrate) > 1:
402 print('! cannot merge files with different sampling rates !')
403 print(f' file "{args.file[i0]}" has {rate:.0f}Hz')
404 print(f' file "{infile}" has {xrate:.0f}Hz')
405 sys.exit(-1)
406 if xdata.shape[1] != data.shape[1]:
407 print('! cannot merge files with different numbers of channels !')
408 print(f' file "{args.file[i0]}" has {data.shape[1]} channels')
409 print(f' file "{infile}" has {xdata.shape[1]} channels')
410 sys.exit(-1)
411 data = np.vstack((data, xdata))
412 xlocs, xlabels = markers(infile)
413 locs = np.vstack((locs, xlocs))
414 labels = np.vstack((labels, xlabels))
415 if args.verbose > 1:
416 print(f'loaded audio file "{infile}"')
417 data, rate = modify_data(data, rate, md, channels, args.scale,
418 args.unwrap_clip, args.unwrap, 1.0,
419 '', args.decimate)
420 add_metadata(md, args.md_list, '.')
421 if len(args.remove_keys) > 0:
422 remove_metadata(md, args.remove_keys, '.')
423 cleanup_metadata(md)
424 outfile = format_outfile(outfile, md)
425 # history:
426 hkey = 'CodingHistory'
427 if 'BEXT' in md:
428 hkey = 'BEXT.' + hkey
429 history = bext_history_str(args.encoding, rate,
430 data.shape[1], outfile)
431 add_history(md, history, hkey, pre_history)
432 # write out audio:
433 try:
434 write_audio(outfile, data, rate, md, locs, labels,
435 format=data_format, encoding=args.encoding)
436 except PermissionError:
437 print(f'failed to write "{outfile}": permission denied!')
438 sys.exit(-1)
439 # message:
440 if args.verbose > 1:
441 print(f'wrote "{outfile}"')
442 elif args.verbose:
443 print(f'converted audio file "{infile}" to "{outfile}"')
446if __name__ == '__main__':
447 main(*sys.argv[1:])