Coverage for src/thunderlab/convertdata.py: 89%

120 statements  

« prev     ^ index     » next       coverage.py v7.6.8, created at 2024-11-29 17:59 +0000

1"""Command line script for converting, downsampling, renaming and merging data files. 

2 

3```sh 

4convertdata -o test.wav test.raw 

5``` 

6converts 'test.raw' to 'test.wav'. 

7 

8The script reads all input files with `dataloader.load_data()`, 

9combines the audio and marker data, and writes them along with the 

10metadata to an output file using `datawriter.write_data()`. Thus, all 

11formats supported by these functions and the installed python audio 

12modules are supported. 

13 

14Run 

15```sh 

16convertdata -l 

17``` 

18for a list of supported output file formats and 

19```sh 

20convertdata -f wav -l 

21``` 

22for a list of supported encodings for a given output format. 

23 

24Running 

25```sh 

26convertdata --help 

27``` 

28prints 

29```text 

30usage: convertdata [-h] [--version] [-v] [-l] [-f FORMAT] [-e ENCODING] [-s SCALE] [-u [THRESH]] [-U [THRESH]] 

31 [-d FAC] [-c CHANNELS] [-a KEY=VALUE] [-r KEY] [-n NUM] [-o OUTPATH] 

32 [file ...] 

33 

34Convert, downsample, rename, and merge data files. 

35 

36positional arguments: 

37 file one or more input files to be combined into a single output file 

38 

39options: 

40 -h, --help show this help message and exit 

41 --version show program's version number and exit 

42 -v print debug output 

43 -l list supported file formats and encodings 

44 -f FORMAT audio format of output file 

45 -e ENCODING audio encoding of output file 

46 -s SCALE scale the data by factor SCALE 

47 -u [THRESH] unwrap clipped data with threshold (default is 0.5) and divide by two 

48 -U [THRESH] unwrap clipped data with threshold (default is 0.5) and clip 

49 -d FAC downsample by integer factor 

50 -c CHANNELS comma and dash separated list of channels to be saved (first channel is 0) 

51 -a KEY=VALUE add key-value pairs to metadata. Keys can have section names separated by "." 

52 -r KEY remove keys from metadata. Keys can have section names separated by "." 

53 -n NUM merge NUM input files into one output file 

54 -o OUTPATH path or filename of output file. Metadata keys enclosed in curly braces will be replaced by their 

55 values from the input file 

56 

57version 1.12.0 by Benda-Lab (2020-2024) 

58``` 

59 

60""" 

61 

62import os 

63import sys 

64import argparse 

65import numpy as np 

66from .version import __version__, __year__ 

67from audioio import add_metadata, remove_metadata, cleanup_metadata 

68from audioio import bext_history_str, add_history 

69from audioio.audioconverter import add_arguments, parse_channels 

70from audioio.audioconverter import make_outfile, format_outfile 

71from audioio.audioconverter import modify_data 

72from .dataloader import load_data, DataLoader, markers 

73from .datawriter import available_formats, available_encodings 

74from .datawriter import format_from_extension, write_data 

75 

76 

77def check_format(format): 

78 """ 

79 Check whether requested audio format is valid and supported. 

80 

81 If the format is not available print an error message on console. 

82 

83 Parameters 

84 ---------- 

85 format: string 

86 Audio format to be checked. 

87 

88 Returns 

89 ------- 

90 valid: bool 

91 True if the requested audio format is valid. 

92 """ 

93 if not format or format.upper() not in available_formats(): 

94 print(f'! invalid data file format "{format}"!') 

95 print('run') 

96 print(f'> {__file__} -l') 

97 print('for a list of available formats') 

98 return False 

99 else: 

100 return True 

101 

102 

103def list_formats_encodings(data_format): 

104 """ List available formats or encodings. 

105 

106 Parameters 

107 ---------- 

108 data_format: None or str 

109 If provided, list encodings for this data format. 

110 """ 

111 if not data_format: 

112 print('available file formats:') 

113 for f in available_formats(): 

114 print(f' {f}') 

115 else: 

116 if not check_format(data_format): 

117 sys.exit(-1) 

118 print(f'available encodings for {data_format} file format:') 

119 for e in available_encodings(data_format): 

120 print(f' {e}') 

121 

122 

123def main(*cargs): 

124 """Command line script for converting, downsampling, renaming and 

125 merging data files. 

126 

127 Parameters 

128 ---------- 

129 cargs: list of strings 

130 Command line arguments as returned by sys.argv[1:]. 

131 

132 """ 

133 # command line arguments: 

134 parser = argparse.ArgumentParser(add_help=True, 

135 description='Convert, downsample, rename, and merge data files.', 

136 epilog=f'version {__version__} by Benda-Lab (2020-{__year__})') 

137 add_arguments(parser) 

138 if len(cargs) == 0: 

139 cargs = None 

140 args = parser.parse_args(cargs) 

141 

142 channels = parse_channels(args.channels) 

143 

144 if args.list_formats: 

145 if args.data_format is None and len(args.file) > 0: 

146 args.data_format = args.file[0] 

147 list_formats_encodings(args.data_format) 

148 return 

149 

150 if len(args.file) == 0: 

151 print('! need to specify at least one input file !') 

152 sys.exit(-1) 

153 

154 nmerge = args.nmerge 

155 if nmerge == 0: 

156 nmerge = len(args.file) 

157 

158 for i0 in range(0, len(args.file), nmerge): 

159 infile = args.file[i0] 

160 outfile, data_format = make_outfile(args.outpath, infile, 

161 args.data_format, 

162 nmerge < len(args.file), 

163 format_from_extension) 

164 if not check_format(data_format): 

165 sys.exit(-1) 

166 if os.path.realpath(infile) == os.path.realpath(outfile): 

167 print(f'! cannot convert "{infile}" to itself !') 

168 sys.exit(-1) 

169 # read in data: 

170 pre_history = None 

171 try: 

172 with DataLoader(infile) as sf: 

173 data = sf[:,:] 

174 rate = sf.rate 

175 unit = sf.unit 

176 amax = sf.ampl_max 

177 md = sf.metadata() 

178 locs, labels = sf.markers() 

179 pre_history = bext_history_str(sf.encoding, 

180 sf.rate, 

181 sf.channels, 

182 sf.filepath) 

183 if sf.encoding is not None and args.encoding is None: 

184 args.encoding = sf.encoding 

185 except FileNotFoundError: 

186 print(f'file "{infile}" not found!') 

187 sys.exit(-1) 

188 if args.verbose > 1: 

189 print(f'loaded data file "{infile}"') 

190 for infile in args.file[i0+1:i0+nmerge]: 

191 try: 

192 xdata, xrate, xunit, xamax = load_data(infile) 

193 except FileNotFoundError: 

194 print(f'file "{infile}" not found!') 

195 sys.exit(-1) 

196 if abs(rate - xrate) > 1: 

197 print('! cannot merge files with different sampling rates !') 

198 print(f' file "{args.file[i0]}" has {rate:.0f}Hz') 

199 print(f' file "{infile}" has {xrate:.0f}Hz') 

200 sys.exit(-1) 

201 if xdata.shape[1] != data.shape[1]: 

202 print('! cannot merge files with different numbers of channels !') 

203 print(f' file "{args.file[i0]}" has {data.shape[1]} channels') 

204 print(f' file "{infile}" has {xdata.shape[1]} channels') 

205 sys.exit(-1) 

206 if xamax > amax: 

207 amax = xamax 

208 data = np.vstack((data, xdata)) 

209 xlocs, xlabels = markers(infile) 

210 locs = np.vstack((locs, xlocs)) 

211 labels = np.vstack((labels, xlabels)) 

212 if args.verbose > 1: 

213 print(f'loaded data file "{infile}"') 

214 data, rate = modify_data(data, rate, md, channels, args.scale, 

215 args.unwrap_clip, args.unwrap, amax, unit, 

216 args.decimate) 

217 add_metadata(md, args.md_list, '.') 

218 if len(args.remove_keys) > 0: 

219 remove_metadata(md, args.remove_keys, '.') 

220 cleanup_metadata(md) 

221 outfile = format_outfile(outfile, md) 

222 # history: 

223 hkey = 'CodingHistory' 

224 if 'BEXT' in md: 

225 hkey = 'BEXT.' + hkey 

226 history = bext_history_str(args.encoding, rate, 

227 data.shape[1], outfile) 

228 add_history(md, history, hkey, pre_history) 

229 # write out data: 

230 try: 

231 write_data(outfile, data, rate, amax, unit, 

232 md, locs, labels, 

233 format=data_format, encoding=args.encoding) 

234 except PermissionError: 

235 print(f'failed to write "{outfile}": permission denied!') 

236 sys.exit(-1) 

237 # message: 

238 if args.verbose > 1: 

239 print(f'wrote "{outfile}"') 

240 elif args.verbose: 

241 print(f'converted data file "{infile}" to "{outfile}"') 

242 

243 

244if __name__ == '__main__': 

245 main(*sys.argv[1:])