Coverage for src / thunderlab / convertdata.py: 88%

115 statements  

« prev     ^ index     » next       coverage.py v7.13.4, created at 2026-02-10 21:21 +0000

1"""Command line script for converting, downsampling, renaming and merging data files. 

2 

3```sh 

4convertdata -o test.wav test.raw 

5``` 

6converts 'test.raw' to 'test.wav'. 

7 

8The script reads all input files with `dataloader.DataLoader()`, and 

9writes them along with the metadata to an output file using 

10`datawriter.write_data()`. Thus, all formats supported by these 

11functions and the installed python audio modules are supported. 

12 

13Run 

14```sh 

15convertdata -l 

16``` 

17for a list of supported output file formats and 

18```sh 

19convertdata -f wav -l 

20``` 

21for a list of supported encodings for a given output format. 

22 

23Running 

24```sh 

25convertdata --help 

26``` 

27prints 

28```text 

29usage: convertdata [-h] [--version] [-v] [-l] [-f FORMAT] [-e ENCODING] [-s SCALE] [-u [THRESH]] 

30 [-U [THRESH]] [-d FAC] [-c CHANNELS] [-a KEY=VALUE] [-r KEY] [-n NUM] [-o OUTPATH] 

31 [-i KWARGS] 

32 [files ...] 

33 

34Convert, downsample, rename, and merge data files. 

35 

36positional arguments: 

37 files one or more input files to be combined into a single output file 

38 

39options: 

40 -h, --help show this help message and exit 

41 --version show program's version number and exit 

42 -v print debug output 

43 -l list supported file formats and encodings 

44 -f FORMAT audio format of output file 

45 -e ENCODING audio encoding of output file 

46 -s SCALE scale the data by factor SCALE 

47 -u [THRESH] unwrap clipped data with threshold relative to maximum of input range (default is 

48 0.5) and divide by two 

49 -U [THRESH] unwrap clipped data with threshold relative to maximum of input range (default is 

50 0.5) and clip 

51 -d FAC downsample by integer factor 

52 -c CHANNELS comma and dash separated list of channels to be saved (first channel is 0) 

53 -a KEY=VALUE add key-value pairs to metadata. Keys can have section names separated by "." 

54 -r KEY remove keys from metadata. Keys can have section names separated by "." 

55 -n NUM merge NUM input files into one output file 

56 -o OUTPATH path or filename of output file. Metadata keys enclosed in curly braces will be 

57 replaced by their values from the input file 

58 -i KWARGS key-word arguments for the data loader function 

59 

60version 1.6.0 by Benda-Lab (2020-2025) 

61``` 

62 

63""" 

64 

65import os 

66import sys 

67import argparse 

68import numpy as np 

69 

70from pathlib import Path 

71 

72from audioio import add_metadata, remove_metadata, cleanup_metadata 

73from audioio import bext_history_str, add_history 

74from audioio.audioconverter import add_arguments, parse_channels, parse_load_kwargs 

75from audioio.audioconverter import make_outfile, format_outfile 

76from audioio.audioconverter import modify_data 

77 

78from .dataloader import load_data, DataLoader, markers 

79from .datawriter import available_formats, available_encodings 

80from .datawriter import format_from_extension, write_data 

81from .version import __version__, __year__ 

82 

83 

84def check_format(format): 

85 """ 

86 Check whether requested audio format is valid and supported. 

87 

88 If the format is not available print an error message on console. 

89 

90 Parameters 

91 ---------- 

92 format: string 

93 Audio format to be checked. 

94 

95 Returns 

96 ------- 

97 valid: bool 

98 True if the requested audio format is valid. 

99 """ 

100 if not format or format.upper() not in available_formats(): 

101 print(f'! invalid data file format "{format}"!') 

102 print('run') 

103 print(f'> {__file__} -l') 

104 print('for a list of available formats') 

105 return False 

106 else: 

107 return True 

108 

109 

110def list_formats_encodings(data_format): 

111 """ List available formats or encodings. 

112 

113 Parameters 

114 ---------- 

115 data_format: None or str 

116 If provided, list encodings for this data format. 

117 """ 

118 if not data_format: 

119 print('available file formats:') 

120 for f in available_formats(): 

121 print(f' {f}') 

122 else: 

123 if not check_format(data_format): 

124 sys.exit(-1) 

125 print(f'available encodings for {data_format} file format:') 

126 for e in available_encodings(data_format): 

127 print(f' {e}') 

128 

129 

130def main(*cargs): 

131 """Command line script for converting, downsampling, renaming and 

132 merging data files. 

133 

134 Parameters 

135 ---------- 

136 cargs: list of strings 

137 Command line arguments as returned by sys.argv[1:]. 

138 

139 """ 

140 # command line arguments: 

141 parser = argparse.ArgumentParser(add_help=True, 

142 description='Convert, downsample, rename, and merge data files.', 

143 epilog=f'version {__version__} by Benda-Lab (2020-{__year__})') 

144 add_arguments(parser) 

145 if len(cargs) == 0: 

146 cargs = None 

147 args = parser.parse_args(cargs) 

148 

149 channels = parse_channels(args.channels) 

150 

151 if args.list_formats: 

152 if args.data_format is None and len(args.files) > 0: 

153 args.data_format = args.files[0] 

154 list_formats_encodings(args.data_format) 

155 return 

156 

157 if len(args.files) == 0 or len(args.files[0]) == 0: 

158 print('! need to specify at least one input file !') 

159 sys.exit(-1) 

160 

161 # expand wildcard patterns: 

162 files = [] 

163 if os.name == 'nt': 

164 for fn in args.files: 

165 files.extend(glob.glob(fn)) 

166 else: 

167 files = args.files 

168 

169 nmerge = args.nmerge 

170 if nmerge == 0: 

171 nmerge = len(args.files) 

172 

173 # kwargs for audio loader: 

174 load_kwargs = parse_load_kwargs(args.load_kwargs) 

175 

176 # read in data: 

177 try: 

178 data = DataLoader(files, verbose=args.verbose - 1, 

179 **load_kwargs) 

180 except FileNotFoundError: 

181 print(f'file "{infile}" not found!') 

182 sys.exit(-1) 

183 if len(data.file_paths) < len(files): 

184 print(f'file "{files[len(data.file_paths)]}" does not continue file "{data.file_paths[-1]}"!') 

185 sys.exit(-1) 

186 md = data.metadata() 

187 add_metadata(md, args.md_list, '.') 

188 if len(args.remove_keys) > 0: 

189 remove_metadata(md, args.remove_keys, '.') 

190 cleanup_metadata(md) 

191 locs, labels = data.markers() 

192 pre_history = bext_history_str(data.encoding, 

193 data.rate, 

194 data.channels, 

195 os.fsdecode(data.filepath)) 

196 if args.verbose > 1: 

197 print(f'loaded data file "{data.filepath}"') 

198 

199 if data.encoding is not None and args.encoding is None: 

200 args.encoding = data.encoding 

201 for i0 in range(0, len(args.files), nmerge): 

202 infile = data.file_paths[i0] 

203 outfile, data_format = make_outfile(args.outpath, infile, 

204 args.data_format, 

205 nmerge < len(args.files), 

206 format_from_extension) 

207 if not check_format(data_format): 

208 sys.exit(-1) 

209 if infile.resolve() == outfile.resolve(): 

210 print(f'! cannot convert "{infile}" to itself !') 

211 sys.exit(-1) 

212 

213 if len(data.file_paths) > 1: 

214 i1 = i0 + nmerge - 1 

215 if i1 >= len(data.end_indices): 

216 i1 = len(data.end_indices) - 1 

217 si = data.start_indices[i0] 

218 ei = data.end_indices[i1] 

219 else: 

220 si = 0 

221 ei = data.frames 

222 wdata, wrate = modify_data(data[si:ei], data.rate, 

223 md, channels, args.scale, 

224 args.unwrap_clip, args.unwrap, 

225 data.ampl_max, data.unit, 

226 args.decimate) 

227 mask = (locs[:, 0] >= si) & (locs[:, 0] < ei) 

228 wlocs = locs[mask, :] 

229 if len(wlocs) > 0: 

230 wlocs[:, 0] -= si 

231 wlabels = labels[mask, :] 

232 outfile = format_outfile(outfile, md) 

233 # history: 

234 hkey = 'CodingHistory' 

235 if 'BEXT' in md: 

236 hkey = 'BEXT.' + hkey 

237 history = bext_history_str(args.encoding, wrate, 

238 data.shape[1], os.fsdecode(outfile)) 

239 add_history(md, history, hkey, pre_history) 

240 # write out data: 

241 try: 

242 write_data(outfile, wdata, wrate, data.ampl_max, data.unit, 

243 md, wlocs, wlabels, 

244 format=data_format, encoding=args.encoding) 

245 except PermissionError: 

246 print(f'failed to write "{outfile}": permission denied!') 

247 sys.exit(-1) 

248 # message: 

249 if args.verbose > 1: 

250 print(f'wrote "{outfile}"') 

251 elif args.verbose: 

252 print(f'converted data file "{infile}" to "{outfile}"') 

253 data.close() 

254 

255 

256if __name__ == '__main__': 

257 main(*sys.argv[1:])