Coverage for src / thunderlab / convertdata.py: 88%
115 statements
« prev ^ index » next coverage.py v7.13.4, created at 2026-02-10 21:21 +0000
« prev ^ index » next coverage.py v7.13.4, created at 2026-02-10 21:21 +0000
1"""Command line script for converting, downsampling, renaming and merging data files.
3```sh
4convertdata -o test.wav test.raw
5```
6converts 'test.raw' to 'test.wav'.
8The script reads all input files with `dataloader.DataLoader()`, and
9writes them along with the metadata to an output file using
10`datawriter.write_data()`. Thus, all formats supported by these
11functions and the installed python audio modules are supported.
13Run
14```sh
15convertdata -l
16```
17for a list of supported output file formats and
18```sh
19convertdata -f wav -l
20```
21for a list of supported encodings for a given output format.
23Running
24```sh
25convertdata --help
26```
27prints
28```text
29usage: convertdata [-h] [--version] [-v] [-l] [-f FORMAT] [-e ENCODING] [-s SCALE] [-u [THRESH]]
30 [-U [THRESH]] [-d FAC] [-c CHANNELS] [-a KEY=VALUE] [-r KEY] [-n NUM] [-o OUTPATH]
31 [-i KWARGS]
32 [files ...]
34Convert, downsample, rename, and merge data files.
36positional arguments:
37 files one or more input files to be combined into a single output file
39options:
40 -h, --help show this help message and exit
41 --version show program's version number and exit
42 -v print debug output
43 -l list supported file formats and encodings
44 -f FORMAT audio format of output file
45 -e ENCODING audio encoding of output file
46 -s SCALE scale the data by factor SCALE
47 -u [THRESH] unwrap clipped data with threshold relative to maximum of input range (default is
48 0.5) and divide by two
49 -U [THRESH] unwrap clipped data with threshold relative to maximum of input range (default is
50 0.5) and clip
51 -d FAC downsample by integer factor
52 -c CHANNELS comma and dash separated list of channels to be saved (first channel is 0)
53 -a KEY=VALUE add key-value pairs to metadata. Keys can have section names separated by "."
54 -r KEY remove keys from metadata. Keys can have section names separated by "."
55 -n NUM merge NUM input files into one output file
56 -o OUTPATH path or filename of output file. Metadata keys enclosed in curly braces will be
57 replaced by their values from the input file
58 -i KWARGS key-word arguments for the data loader function
60version 1.6.0 by Benda-Lab (2020-2025)
61```
63"""
65import os
66import sys
67import argparse
68import numpy as np
70from pathlib import Path
72from audioio import add_metadata, remove_metadata, cleanup_metadata
73from audioio import bext_history_str, add_history
74from audioio.audioconverter import add_arguments, parse_channels, parse_load_kwargs
75from audioio.audioconverter import make_outfile, format_outfile
76from audioio.audioconverter import modify_data
78from .dataloader import load_data, DataLoader, markers
79from .datawriter import available_formats, available_encodings
80from .datawriter import format_from_extension, write_data
81from .version import __version__, __year__
84def check_format(format):
85 """
86 Check whether requested audio format is valid and supported.
88 If the format is not available print an error message on console.
90 Parameters
91 ----------
92 format: string
93 Audio format to be checked.
95 Returns
96 -------
97 valid: bool
98 True if the requested audio format is valid.
99 """
100 if not format or format.upper() not in available_formats():
101 print(f'! invalid data file format "{format}"!')
102 print('run')
103 print(f'> {__file__} -l')
104 print('for a list of available formats')
105 return False
106 else:
107 return True
110def list_formats_encodings(data_format):
111 """ List available formats or encodings.
113 Parameters
114 ----------
115 data_format: None or str
116 If provided, list encodings for this data format.
117 """
118 if not data_format:
119 print('available file formats:')
120 for f in available_formats():
121 print(f' {f}')
122 else:
123 if not check_format(data_format):
124 sys.exit(-1)
125 print(f'available encodings for {data_format} file format:')
126 for e in available_encodings(data_format):
127 print(f' {e}')
130def main(*cargs):
131 """Command line script for converting, downsampling, renaming and
132 merging data files.
134 Parameters
135 ----------
136 cargs: list of strings
137 Command line arguments as returned by sys.argv[1:].
139 """
140 # command line arguments:
141 parser = argparse.ArgumentParser(add_help=True,
142 description='Convert, downsample, rename, and merge data files.',
143 epilog=f'version {__version__} by Benda-Lab (2020-{__year__})')
144 add_arguments(parser)
145 if len(cargs) == 0:
146 cargs = None
147 args = parser.parse_args(cargs)
149 channels = parse_channels(args.channels)
151 if args.list_formats:
152 if args.data_format is None and len(args.files) > 0:
153 args.data_format = args.files[0]
154 list_formats_encodings(args.data_format)
155 return
157 if len(args.files) == 0 or len(args.files[0]) == 0:
158 print('! need to specify at least one input file !')
159 sys.exit(-1)
161 # expand wildcard patterns:
162 files = []
163 if os.name == 'nt':
164 for fn in args.files:
165 files.extend(glob.glob(fn))
166 else:
167 files = args.files
169 nmerge = args.nmerge
170 if nmerge == 0:
171 nmerge = len(args.files)
173 # kwargs for audio loader:
174 load_kwargs = parse_load_kwargs(args.load_kwargs)
176 # read in data:
177 try:
178 data = DataLoader(files, verbose=args.verbose - 1,
179 **load_kwargs)
180 except FileNotFoundError:
181 print(f'file "{infile}" not found!')
182 sys.exit(-1)
183 if len(data.file_paths) < len(files):
184 print(f'file "{files[len(data.file_paths)]}" does not continue file "{data.file_paths[-1]}"!')
185 sys.exit(-1)
186 md = data.metadata()
187 add_metadata(md, args.md_list, '.')
188 if len(args.remove_keys) > 0:
189 remove_metadata(md, args.remove_keys, '.')
190 cleanup_metadata(md)
191 locs, labels = data.markers()
192 pre_history = bext_history_str(data.encoding,
193 data.rate,
194 data.channels,
195 os.fsdecode(data.filepath))
196 if args.verbose > 1:
197 print(f'loaded data file "{data.filepath}"')
199 if data.encoding is not None and args.encoding is None:
200 args.encoding = data.encoding
201 for i0 in range(0, len(args.files), nmerge):
202 infile = data.file_paths[i0]
203 outfile, data_format = make_outfile(args.outpath, infile,
204 args.data_format,
205 nmerge < len(args.files),
206 format_from_extension)
207 if not check_format(data_format):
208 sys.exit(-1)
209 if infile.resolve() == outfile.resolve():
210 print(f'! cannot convert "{infile}" to itself !')
211 sys.exit(-1)
213 if len(data.file_paths) > 1:
214 i1 = i0 + nmerge - 1
215 if i1 >= len(data.end_indices):
216 i1 = len(data.end_indices) - 1
217 si = data.start_indices[i0]
218 ei = data.end_indices[i1]
219 else:
220 si = 0
221 ei = data.frames
222 wdata, wrate = modify_data(data[si:ei], data.rate,
223 md, channels, args.scale,
224 args.unwrap_clip, args.unwrap,
225 data.ampl_max, data.unit,
226 args.decimate)
227 mask = (locs[:, 0] >= si) & (locs[:, 0] < ei)
228 wlocs = locs[mask, :]
229 if len(wlocs) > 0:
230 wlocs[:, 0] -= si
231 wlabels = labels[mask, :]
232 outfile = format_outfile(outfile, md)
233 # history:
234 hkey = 'CodingHistory'
235 if 'BEXT' in md:
236 hkey = 'BEXT.' + hkey
237 history = bext_history_str(args.encoding, wrate,
238 data.shape[1], os.fsdecode(outfile))
239 add_history(md, history, hkey, pre_history)
240 # write out data:
241 try:
242 write_data(outfile, wdata, wrate, data.ampl_max, data.unit,
243 md, wlocs, wlabels,
244 format=data_format, encoding=args.encoding)
245 except PermissionError:
246 print(f'failed to write "{outfile}": permission denied!')
247 sys.exit(-1)
248 # message:
249 if args.verbose > 1:
250 print(f'wrote "{outfile}"')
251 elif args.verbose:
252 print(f'converted data file "{infile}" to "{outfile}"')
253 data.close()
256if __name__ == '__main__':
257 main(*sys.argv[1:])