Coverage for src/thunderlab/convertdata.py: 89%
120 statements
« prev ^ index » next coverage.py v7.6.2, created at 2024-10-09 16:02 +0000
« prev ^ index » next coverage.py v7.6.2, created at 2024-10-09 16:02 +0000
1"""Command line script for converting, downsampling, renaming and merging data files.
3```sh
4convertdata -o test.wav test.raw
5```
6converts 'test.raw' to 'test.wav'.
8The script reads all input files with `dataloader.load_data()`,
9combines the audio and marker data, and writes them along with the
10metadata to an output file using `datawriter.write_data()`. Thus, all
11formats supported by these functions and the installed python audio
12modules are supported.
14Run
15```sh
16convertdata -l
17```
18for a list of supported output file formats and
19```sh
20convertdata -f wav -l
21```
22for a list of supported encodings for a given output format.
24Running
25```sh
26convertdata --help
27```
28prints
29```text
30usage: convertdata [-h] [--version] [-v] [-l] [-f FORMAT] [-e ENCODING] [-s SCALE] [-u [THRESH]] [-U [THRESH]]
31 [-d FAC] [-c CHANNELS] [-a KEY=VALUE] [-r KEY] [-n NUM] [-o OUTPATH]
32 [file ...]
34Convert, downsample, rename, and merge data files.
36positional arguments:
37 file one or more input files to be combined into a single output file
39options:
40 -h, --help show this help message and exit
41 --version show program's version number and exit
42 -v print debug output
43 -l list supported file formats and encodings
44 -f FORMAT audio format of output file
45 -e ENCODING audio encoding of output file
46 -s SCALE scale the data by factor SCALE
47 -u [THRESH] unwrap clipped data with threshold (default is 0.5) and divide by two
48 -U [THRESH] unwrap clipped data with threshold (default is 0.5) and clip
49 -d FAC downsample by integer factor
50 -c CHANNELS comma and dash separated list of channels to be saved (first channel is 0)
51 -a KEY=VALUE add key-value pairs to metadata. Keys can have section names separated by "."
52 -r KEY remove keys from metadata. Keys can have section names separated by "."
53 -n NUM merge NUM input files into one output file
54 -o OUTPATH path or filename of output file. Metadata keys enclosed in curly braces will be replaced by their
55 values from the input file
57version 1.12.0 by Benda-Lab (2020-2024)
58```
60"""
62import os
63import sys
64import argparse
65import numpy as np
66from .version import __version__, __year__
67from audioio import add_metadata, remove_metadata, cleanup_metadata
68from audioio import bext_history_str, add_history
69from audioio.audioconverter import add_arguments, parse_channels
70from audioio.audioconverter import make_outfile, format_outfile
71from audioio.audioconverter import modify_data
72from .dataloader import load_data, DataLoader, markers
73from .datawriter import available_formats, available_encodings
74from .datawriter import format_from_extension, write_data
77def check_format(format):
78 """
79 Check whether requested audio format is valid and supported.
81 If the format is not available print an error message on console.
83 Parameters
84 ----------
85 format: string
86 Audio format to be checked.
88 Returns
89 -------
90 valid: bool
91 True if the requested audio format is valid.
92 """
93 if not format or format.upper() not in available_formats():
94 print(f'! invalid data file format "{format}"!')
95 print('run')
96 print(f'> {__file__} -l')
97 print('for a list of available formats')
98 return False
99 else:
100 return True
103def list_formats_encodings(data_format):
104 """ List available formats or encodings.
106 Parameters
107 ----------
108 data_format: None or str
109 If provided, list encodings for this data format.
110 """
111 if not data_format:
112 print('available file formats:')
113 for f in available_formats():
114 print(f' {f}')
115 else:
116 if not check_format(data_format):
117 sys.exit(-1)
118 print(f'available encodings for {data_format} file format:')
119 for e in available_encodings(data_format):
120 print(f' {e}')
123def main(*cargs):
124 """Command line script for converting, downsampling, renaming and
125 merging data files.
127 Parameters
128 ----------
129 cargs: list of strings
130 Command line arguments as returned by sys.argv[1:].
132 """
133 # command line arguments:
134 parser = argparse.ArgumentParser(add_help=True,
135 description='Convert, downsample, rename, and merge data files.',
136 epilog=f'version {__version__} by Benda-Lab (2020-{__year__})')
137 add_arguments(parser)
138 if len(cargs) == 0:
139 cargs = None
140 args = parser.parse_args(cargs)
142 channels = parse_channels(args.channels)
144 if args.list_formats:
145 if args.data_format is None and len(args.file) > 0:
146 args.data_format = args.file[0]
147 list_formats_encodings(args.data_format)
148 return
150 if len(args.file) == 0:
151 print('! need to specify at least one input file !')
152 sys.exit(-1)
154 nmerge = args.nmerge
155 if nmerge == 0:
156 nmerge = len(args.file)
158 for i0 in range(0, len(args.file), nmerge):
159 infile = args.file[i0]
160 outfile, data_format = make_outfile(args.outpath, infile,
161 args.data_format,
162 nmerge < len(args.file),
163 format_from_extension)
164 if not check_format(data_format):
165 sys.exit(-1)
166 if os.path.realpath(infile) == os.path.realpath(outfile):
167 print(f'! cannot convert "{infile}" to itself !')
168 sys.exit(-1)
169 # read in data:
170 pre_history = None
171 try:
172 with DataLoader(infile) as sf:
173 data = sf[:,:]
174 rate = sf.rate
175 unit = sf.unit
176 amax = sf.ampl_max
177 md = sf.metadata()
178 locs, labels = sf.markers()
179 pre_history = bext_history_str(sf.encoding,
180 sf.rate,
181 sf.channels,
182 sf.filepath)
183 if sf.encoding is not None and args.encoding is None:
184 args.encoding = sf.encoding
185 except FileNotFoundError:
186 print(f'file "{infile}" not found!')
187 sys.exit(-1)
188 if args.verbose > 1:
189 print(f'loaded data file "{infile}"')
190 for infile in args.file[i0+1:i0+nmerge]:
191 try:
192 xdata, xrate, xunit, xamax = load_data(infile)
193 except FileNotFoundError:
194 print(f'file "{infile}" not found!')
195 sys.exit(-1)
196 if abs(rate - xrate) > 1:
197 print('! cannot merge files with different sampling rates !')
198 print(f' file "{args.file[i0]}" has {rate:.0f}Hz')
199 print(f' file "{infile}" has {xrate:.0f}Hz')
200 sys.exit(-1)
201 if xdata.shape[1] != data.shape[1]:
202 print('! cannot merge files with different numbers of channels !')
203 print(f' file "{args.file[i0]}" has {data.shape[1]} channels')
204 print(f' file "{infile}" has {xdata.shape[1]} channels')
205 sys.exit(-1)
206 if xamax > amax:
207 amax = xamax
208 data = np.vstack((data, xdata))
209 xlocs, xlabels = markers(infile)
210 locs = np.vstack((locs, xlocs))
211 labels = np.vstack((labels, xlabels))
212 if args.verbose > 1:
213 print(f'loaded data file "{infile}"')
214 data, rate = modify_data(data, rate, md, channels, args.scale,
215 args.unwrap_clip, args.unwrap, amax, unit,
216 args.decimate)
217 add_metadata(md, args.md_list, '.')
218 if len(args.remove_keys) > 0:
219 remove_metadata(md, args.remove_keys, '.')
220 cleanup_metadata(md)
221 outfile = format_outfile(outfile, md)
222 # history:
223 hkey = 'CodingHistory'
224 if 'BEXT' in md:
225 hkey = 'BEXT.' + hkey
226 history = bext_history_str(args.encoding, rate,
227 data.shape[1], outfile)
228 add_history(md, history, hkey, pre_history)
229 # write out data:
230 try:
231 write_data(outfile, data, rate, amax, unit,
232 md, locs, labels,
233 format=data_format, encoding=args.encoding)
234 except PermissionError:
235 print(f'failed to write "{outfile}": permission denied!')
236 sys.exit(-1)
237 # message:
238 if args.verbose > 1:
239 print(f'wrote "{outfile}"')
240 elif args.verbose:
241 print(f'converted data file "{infile}" to "{outfile}"')
244if __name__ == '__main__':
245 main(*sys.argv[1:])