Coverage for src/audioio/audioconverter.py: 94%

1"""Command line script for converting, downsampling, renaming and merging audio files.

3```sh

4audioconverter -o test.wav test.mp3

5```

6converts 'test.mp3' to 'test.wav'.

8The script reads all input files with `audioio.audioloader.load_audio()`,

9combines the audio and marker data and writes them along with the

10metadata to an output file using `audioio.audiowriter.write_audio()`.

12Thus, all formats supported by these functions and the installed

13python audio modules are available. This implies that MP3 files can be

14read via the [audioread](https://github.com/beetbox/audioread) module,

15and they can be written via [pydub](https://github.com/jiaaro/pydub).

16Many other input and output file formats are supported by the [sndfile

17library](http://www.mega-nerd.com/libsndfile/), provided the

18[SoundFile](http://pysoundfile.readthedocs.org) or

19[wavefile](https://github.com/vokimon/python-wavefile) python packages

20are [installed](https://bendalab.github.io/audioio/installation).

22Metadata and markers are preserved if possible.

24Run

25```sh

26audioconverter -l

27```

28for a list of supported output file formats and

29```sh

30audioconverter -f wav -l

31```

32for a list of supported encodings for a given output format (`-f`option).

34Running

35```sh

36audioconverter --help

37```

38prints

39```text

40usage: audioconverter [-h] [--version] [-v] [-l] [-f FORMAT] [-e ENCODING] [-s SCALE] [-u [THRESH]] [-U [THRESH]]

41 [-d FAC] [-c CHANNELS] [-a KEY=VALUE] [-r KEY] [-n NUM] [-o OUTPATH]

42 [file ...]

44Convert audio file formats.

46positional arguments:

47 file one or more input files to be combined into a single output file

49options:

50 -h, --help show this help message and exit

51 --version show program's version number and exit

52 -v print debug output

53 -l list supported file formats and encodings

54 -f FORMAT audio format of output file

55 -e ENCODING audio encoding of output file

56 -s SCALE scale the data by factor SCALE

57 -u [THRESH] unwrap clipped data with threshold relative to maximum of input range (default is 0.5) and divide by

58 two

59 -U [THRESH] unwrap clipped data with threshold relative to maximum of input range (default is 0.5) and clip

60 -d FAC downsample by integer factor

61 -c CHANNELS comma and dash separated list of channels to be saved (first channel is 0)

62 -a KEY=VALUE add key-value pairs to metadata. Keys can have section names separated by "."

63 -r KEY remove keys from metadata. Keys can have section names separated by "."

64 -n NUM merge NUM input files into one output file

65 -o OUTPATH path or filename of output file. Metadata keys enclosed in curly braces will be replaced by their

66 values from the input file

68version 2.0.0 by Benda-Lab (2020-2024)

69```

71"""

73import os

74import sys

75import argparse

76import numpy as np

77from scipy.signal import decimate

78from .version import __version__, __year__

79from .audioloader import load_audio, markers, AudioLoader

80from .audiometadata import flatten_metadata, unflatten_metadata

81from .audiometadata import add_metadata, remove_metadata, cleanup_metadata

82from .audiometadata import bext_history_str, add_history

83from .audiometadata import update_gain, add_unwrap

84from .audiotools import unwrap

85from .audiowriter import available_formats, available_encodings

86from .audiowriter import format_from_extension, write_audio

89def add_arguments(parser):

90 """ Add command line arguments to parser.

92 Parameters

93 ----------

94 parser: argparse.ArgumentParser

95 The parser.

96 """

97 parser.add_argument('--version', action='version', version=__version__)

98 parser.add_argument('-v', action='count', dest='verbose', default=0,

99 help='print debug output')

100 parser.add_argument('-l', dest='list_formats', action='store_true',

101 help='list supported file formats and encodings')

102 parser.add_argument('-f', dest='data_format', default=None, type=str,

103 metavar='FORMAT', help='audio format of output file')

104 parser.add_argument('-e', dest='encoding', default=None, type=str,

105 help='audio encoding of output file')

106 parser.add_argument('-s', dest='scale', default=1, type=float,

107 help='scale the data by factor SCALE')

108 parser.add_argument('-u', dest='unwrap', default=0, type=float,

109 metavar='THRESH', const=1.5, nargs='?',

110 help='unwrap clipped data with threshold relative to maximum of input range (default is 0.5) and divide by two')

111 parser.add_argument('-U', dest='unwrap_clip', default=0, type=float,

112 metavar='THRESH', const=1.5, nargs='?',

113 help='unwrap clipped data with threshold relative to maximum of input range (default is 0.5) and clip')

114 parser.add_argument('-d', dest='decimate', default=1, type=int,

115 metavar='FAC',

116 help='downsample by integer factor')

117 parser.add_argument('-c', dest='channels', default='', type=str,

118 help='comma and dash separated list of channels to be saved (first channel is 0)')

119 parser.add_argument('-a', dest='md_list', action='append', default=[],

120 type=str, metavar='KEY=VALUE',

121 help='add key-value pairs to metadata. Keys can have section names separated by "."')

122 parser.add_argument('-r', dest='remove_keys', action='append', default=[],

123 type=str, metavar='KEY',

124 help='remove keys from metadata. Keys can have section names separated by "."')

125 parser.add_argument('-n', dest='nmerge', default=0, type=int, metavar='NUM',

126 help='merge NUM input files into one output file')

127 parser.add_argument('-o', dest='outpath', default=None, type=str,

128 help='path or filename of output file. Metadata keys enclosed in curly braces will be replaced by their values from the input file')

129 parser.add_argument('file', nargs='*', type=str,

130 help='one or more input files to be combined into a single output file')

131

132

133def parse_channels(cstr):

134 """ Parse channel selection string.

135

136 Parameters

137 ----------

138 cstr: str

139 String with comma separated channels and dash separated channel ranges.

140

141 Returns

142 -------

143 channels: list of int

144 List of selected channels.

145 """

146 cs = [s.strip() for s in cstr.split(',')]

147 channels = []

148 for c in cs:

149 if len(c) == 0:

150 continue

151 css = [s.strip() for s in c.split('-')]

152 if len(css) == 2:

153 channels.extend(list(range(int(css[0]), int(css[1])+1)))

154 else:

155 channels.append(int(c))

156 return channels

157

158

159def check_format(format):

160 """

161 Check whether requested audio format is valid and supported.

162

163 If the format is not available print an error message on console.

164

165 Parameters

166 ----------

167 format: string

168 Audio format to be checked.

169

170 Returns

171 -------

172 valid: bool

173 True if the requested audio format is valid.

174 """

175 if not format or format.upper() not in available_formats():

176 print(f'! invalid audio file format "{format}"!')

177 print('run')

178 print(f'> {__file__} -l')

179 print('for a list of available formats')

180 return False

181 else:

182 return True

183

184

185def list_formats_encodings(data_format):

186 """ List available formats or encodings.

187

188 Parameters

189 ----------

190 data_format: None or str

191 If provided, list encodings for this data format.

192 Otherwise, list available audio file formats.

193 """

194 if not data_format:

195 print('available file formats:')

196 for f in available_formats():

197 print(f' {f}')

198 else:

199 if not check_format(data_format):

200 sys.exit(-1)

201 print(f'available encodings for {data_format} file format:')

202 for e in available_encodings(data_format):

203 print(f' {e}')

204

205

206def make_outfile(outpath, infile, data_format, blocks, format_from_ext):

207 """ Make name for output file.

208

209 Parameters

210 ----------

211 outpath: None or str

212 Requested output path.

213 infile: str

214 Name of the input file.

215 data_format: None or str

216 Requested output file format.

217 blocks: bool

218 If True, produce outputfile for group of input files.

219 format_from_ext: function

220 Function that inspects a filename for its extension and

221 deduces a file format from it.

222

223 Returns

224 -------

225 outfile: str

226 Name of output file.

227 data_format: str

228 Format of output file.

229 """

230 if blocks and outpath and \

231 format_from_ext(outpath) is None and \

232 not os.path.exists(outpath):

233 os.mkdir(outpath)

234 if not outpath or os.path.isdir(outpath):

235 outfile = infile

236 if outpath:

237 outfile = os.path.join(outpath, outfile)

238 if not data_format:

239 print('! need to specify an audio format via -f or a file extension !')

240 sys.exit(-1)

241 outfile = os.path.splitext(outfile)[0] + os.extsep + data_format.lower()

242 else:

243 outfile = outpath

244 if data_format:

245 outfile = os.path.splitext(outfile)[0] + os.extsep + data_format.lower()

246 else:

247 data_format = format_from_ext(outfile)

248 return outfile, data_format

249

250

251def modify_data(data, rate, metadata, channels, scale,

252 unwrap_clip, unwrap_thresh, ampl_max, unit, decimate_fac):

253 """ Modify audio data and add modifications to metadata.

254

255 Parameters

256 ----------

257 data: 2-D array of float

258 The data to be written into the output file.

259 rate: float

260 Sampling rate of the data in Hertz.

261 metadata: nested dict

262 Metadata.

263 channels: list of int

264 List of channels to be selected from the data.

265 scale: float

266 Scaling factor to be applied to the data.

267 unwrap_clip: float

268 If larger than zero, unwrap the data using this as a threshold

269 relative to `ampl_max`, and clip the data at +-`ampl_max`.

270 unwrap_thresh: float

271 If larger than zero, unwrap the data using this as a threshold

272 relative to `ampl_max`, and downscale the data by a factor of two.

273 Also update the gain in the metadata.

274 ampl_max: float

275 Maximum amplitude of the input range.

276 unit: str

277 Unit of the input range.

278 decimate_fac: int

279 Downsample the data by this factor.

280

281 Returns

282 -------

283 """

284 # select channels:

285 if len(channels) > 0:

286 data = data[:,channels]

287 # scale data:

288 if scale != 1:

289 data *= scale

290 update_gain(metadata, 1/scale)

291 # fix data:

292 if unwrap_clip > 1e-3:

293 unwrap(data, unwrap_clip, ampl_max)

294 data[data > +ampl_max] = +ampl_max

295 data[data < -ampl_max] = -ampl_max

296 add_unwrap(metadata, unwrap_clip*ampl_max, ampl_max, unit)

297 elif unwrap_thresh > 1e-3:

298 unwrap(data, unwrap_thresh, ampl_max)

299 data *= 0.5

300 update_gain(metadata, 0.5)

301 add_unwrap(metadata, unwrap_thresh*ampl_max, 0.0, unit)

302 # decimate:

303 if decimate_fac > 1:

304 data = decimate(data, decimate_fac, axis=0)

305 rate /= decimate_fac

306 return data, rate

307

308

309def format_outfile(outfile, metadata):

310 """ Put metadata values into name of output file.

311

312 Parameters

313 ----------

314 outfile: str

315 Name of output file. May contain metadata keys enclosed in curly braces.

316 metadata: nested dict

317 Metadata.

318

319 Returns

320 -------

321 outfile: str

322 Name of output file.

323 """

324 if len(metadata) > 0 and '{' in outfile and '}' in outfile:

325 fmd = flatten_metadata(metadata)

326 fmd = {k:(fmd[k] if isinstance(fmd[k], (int, float)) else fmd[k].replace(' ', '_')) for k in fmd}

327 outfile = outfile.format(**fmd)

328 return outfile

329

330

331def main(*cargs):

332 """

333 Command line script for converting, downsampling, renaming and merging audio files.

334

335 Parameters

336 ----------

337 cargs: list of strings

338 Command line arguments as returned by sys.argv[1:].

339 """

340 # command line arguments:

341 parser = argparse.ArgumentParser(add_help=True,

342 description='Convert audio file formats.',

343 epilog=f'version {__version__} by Benda-Lab (2020-{__year__})')

344 add_arguments(parser)

345 if len(cargs) == 0:

346 cargs = None

347 args = parser.parse_args(cargs)

348

349 channels = parse_channels(args.channels)

350

351 if args.list_formats:

352 if args.data_format is None and len(args.file) > 0:

353 args.data_format = args.file[0]

354 list_formats_encodings(args.data_format)

355 return

356

357 if len(args.file) == 0 or len(args.file[0]) == 0:

358 print('! need to specify at least one input file !')

359 sys.exit(-1)

360

361 nmerge = args.nmerge

362 if nmerge == 0:

363 nmerge = len(args.file)

364

365 for i0 in range(0, len(args.file), nmerge):

366 infile = args.file[i0]

367 outfile, data_format = make_outfile(args.outpath, infile,

368 args.data_format,

369 nmerge < len(args.file),

370 format_from_extension)

371 if not check_format(data_format):

372 sys.exit(-1)

373 if os.path.realpath(infile) == os.path.realpath(outfile):

374 print(f'! cannot convert "{infile}" to itself !')

375 sys.exit(-1)

376 # read in audio:

377 pre_history = None

378 try:

379 with AudioLoader(infile) as sf:

380 data = sf[:,:]

381 rate = sf.rate

382 md = sf.metadata()

383 locs, labels = sf.markers()

384 pre_history = bext_history_str(sf.encoding,

385 sf.rate,

386 sf.channels,

387 sf.filepath)

388 if sf.encoding is not None and args.encoding is None:

389 args.encoding = sf.encoding

390 except FileNotFoundError:

391 print(f'file "{infile}" not found!')

392 sys.exit(-1)

393 if args.verbose > 1:

394 print(f'loaded audio file "{infile}"')

395 for infile in args.file[i0+1:i0+nmerge]:

396 try:

397 xdata, xrate = load_audio(infile)

398 except FileNotFoundError:

399 print(f'file "{infile}" not found!')

400 sys.exit(-1)

401 if abs(rate - xrate) > 1:

402 print('! cannot merge files with different sampling rates !')

403 print(f' file "{args.file[i0]}" has {rate:.0f}Hz')

404 print(f' file "{infile}" has {xrate:.0f}Hz')

405 sys.exit(-1)

406 if xdata.shape[1] != data.shape[1]:

407 print('! cannot merge files with different numbers of channels !')

408 print(f' file "{args.file[i0]}" has {data.shape[1]} channels')

409 print(f' file "{infile}" has {xdata.shape[1]} channels')

410 sys.exit(-1)

411 data = np.vstack((data, xdata))

412 xlocs, xlabels = markers(infile)

413 locs = np.vstack((locs, xlocs))

414 labels = np.vstack((labels, xlabels))

415 if args.verbose > 1:

416 print(f'loaded audio file "{infile}"')

417 data, rate = modify_data(data, rate, md, channels, args.scale,

418 args.unwrap_clip, args.unwrap, 1.0,

419 '', args.decimate)

420 add_metadata(md, args.md_list, '.')

421 if len(args.remove_keys) > 0:

422 remove_metadata(md, args.remove_keys, '.')

423 cleanup_metadata(md)

424 outfile = format_outfile(outfile, md)

425 # history:

426 hkey = 'CodingHistory'

427 if 'BEXT' in md:

428 hkey = 'BEXT.' + hkey

429 history = bext_history_str(args.encoding, rate,

430 data.shape[1], outfile)

431 add_history(md, history, hkey, pre_history)

432 # write out audio:

433 try:

434 write_audio(outfile, data, rate, md, locs, labels,

435 format=data_format, encoding=args.encoding)

436 except PermissionError:

437 print(f'failed to write "{outfile}": permission denied!')

438 sys.exit(-1)

439 # message:

440 if args.verbose > 1:

441 print(f'wrote "{outfile}"')

442 elif args.verbose:

443 print(f'converted audio file "{infile}" to "{outfile}"')

444

445

446if __name__ == '__main__':

447 main(*sys.argv[1:])