Coverage for src / audioio / fixtimestamps.py: 79%

160 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-04-10 20:35 +0000

1"""Fix time stamps. 

2 

3Change time stamps in the metadata (of wave files) and file names 

4*without rewriting* the entire file. This is useful in case the 

5real-time clock of a recorder failed. 

6 

7## Command line script 

8 

9Let's assume you have a continous recording spread over the following 

10four files each covering 3 minutes of the recording: 

11```txt 

12logger-20190101T000015.wav 

13logger-20190101T000315.wav 

14logger-20190101T000615.wav 

15logger-20190101T000915.wav 

16``` 

17However, the recording was actually started at 2025-06-09T10:42:17. 

18Obviously, the real-time clock failed, since all times in the file name 

19and the time stamps in the metadata start in the year 2019. 

20 

21To fix this, run 

22```sh 

23> fixtimestamps -s '2025-06-09T10:42:17' logger-2019*.wav 

24``` 

25 

26Then the files are renamed: 

27```txt 

28logger-20190101T000015.wav -> logger-20250609T104217.wav 

29logger-20190101T000315.wav -> logger-20250609T104517.wav 

30logger-20190101T000615.wav -> logger-20250609T104817.wav 

31logger-20190101T000915.wav -> logger-20250609T105117.wav 

32``` 

33and the time stamps in the meta data are set accordingly. 

34 

35Supported date formats are "YYYY-MM-DD" or "YYYYMMDD". 

36Supported time formats are "HH:MM:SS" or "HHMMSS". 

37 

38Adding the `-n` flag runs the script in dry mode, i.e. it just reports 

39what it would do without modifying the audio files: 

40 

41```sh 

42> fixtimestamps -n -s 20250701T173420 *.wav 

43``` 

44 

45Alternatively, the script can be run from within the audioio source 

46tree as: 

47``` 

48python -m src.audioio.fixtimestamps -s 20250701T173420 *.wav 

49``` 

50 

51Running 

52```sh 

53fixtimestamps --help 

54``` 

55prints 

56```text 

57usage: fixtimestamps [-h] [--version] -s STARTTIME [-n] files [files ...] 

58 

59Fix time stamps. 

60 

61positional arguments: 

62 files audio files 

63 

64options: 

65 -h, --help show this help message and exit 

66 --version show program's version number and exit 

67 -s STARTTIME new start time of the first file 

68 -n do not modify the files, just report what would be done. 

69 

70version 2.5.0 by Benda-Lab (2020-2025) 

71``` 

72 

73## Functions 

74 

75- `parse_datetime()`: parse string for a date and a time. 

76- `replace_datetime()`: replace in a string date and time. 

77- `write_riff_datetime()`: modify time stamps in the metadata of a RIFF/WAVE file. 

78 

79""" 

80 

81 

82import re 

83import os 

84import sys 

85import glob 

86import argparse 

87import datetime as dt 

88 

89from pathlib import Path 

90 

91from .version import __version__, __year__ 

92from .riffmetadata import read_riff_header, read_chunk_tags, read_format_chunk 

93from .riffmetadata import read_info_chunks, read_bext_chunk, read_ixml_chunk, read_guano_chunk 

94from .riffmetadata import write_info_chunk, write_bext_chunk, write_ixml_chunk, write_guano_chunk 

95from .audiometadata import get_datetime, set_starttime 

96 

97 

98def parse_datetime(string): 

99 """Parse string for a date and a time. 

100 

101 Date and time can appear anywhere in the string, 

102 but time needs to appear after a date. 

103 Supported date formats are "YYYY-MM-DD" or "YYYYMMDD". 

104 Supported time formats are "HH:MM:SS" or "HHMMSS". 

105 

106 Parameters 

107 ---------- 

108 string: str 

109 String to be parsed. 

110 

111 Returns 

112 ------- 

113 dtime: datetime or None 

114 The date and time parsed from the string. 

115 None if neither a date nor a time was found. 

116 """ 

117 date = None 

118 time = None 

119 time_pos = 0 

120 m = re.search('([123][0-9][0-9][0-9]-[01][0-9]-[0123][0-9])', string) 

121 if m is not None: 

122 date = dt.date.fromisoformat(m[0]) 

123 time_pos = m.end() 

124 else: 

125 m = re.search('([123][0-9][0-9][0-9][01][0-9][0123][0-9])', string) 

126 if m is not None: 

127 dts = m[0] 

128 dts = f'{dts[0:4]}-{dts[4:6]}-{dts[6:8]}' 

129 date = dt.date.fromisoformat(dts) 

130 time_pos = m.end() 

131 m = re.search('([012][0-9]:[0-5][0-9]:[0-5][0-9])', string[time_pos:]) 

132 if m is not None: 

133 time = dt.time.fromisoformat(m[0]) 

134 else: 

135 m = re.search('([012][0-9][0-5][0-9][0-5][0-9])', string[time_pos:]) 

136 if m is not None: 

137 dts = m[0] 

138 dts = f'{dts[0:2]}:{dts[2:4]}:{dts[4:6]}' 

139 time = dt.time.fromisoformat(dts) 

140 if date is None and time is None: 

141 return None 

142 if date is None: 

143 date = dt.date(1, 1, 1) 

144 if time is None: 

145 time = dt.time(0, 0, 0) 

146 dtime = dt.datetime.combine(date, time) 

147 return dtime 

148 

149 

150def replace_datetime(string, date_time): 

151 """ Replace in a string date and time. 

152 

153 Date and time can appear anywhere in the string, 

154 but time needs to appear after a date. 

155 Supported date formats are "YYYY-MM-DD" or "YYYYMMDD". 

156 Supported time formats are "HH:MM:SS" or "HHMMSS". 

157 

158 Parameters 

159 ---------- 

160 string: str 

161 String in which date and time are replaced. 

162 date_time: datetime 

163 Date and time to write into the string. 

164 

165 Returns 

166 ------- 

167 new_string: str 

168 The `string` with date and time replaced by `date_time`. 

169 """ 

170 if date_time is None: 

171 return string 

172 new_string = string 

173 time_pos = 0 

174 dts = date_time.date().isoformat() 

175 pattern = re.compile('([123][0-9][0-9][0-9]-[01][0-9]-[0123][0-9])') 

176 m = pattern.search(new_string) 

177 if m is not None: 

178 time_pos = m.end() 

179 new_string = pattern.sub(dts, new_string) 

180 else: 

181 pattern = re.compile('([123][0-9][0-9][0-9][01][0-9][0123][0-9])') 

182 m = pattern.search(new_string) 

183 if m is not None: 

184 time_pos = m.end() 

185 new_string = pattern.sub(dts.replace('-', ''), new_string) 

186 dts = date_time.time().isoformat() 

187 pattern = re.compile('([012][0-9]:[0-5][0-9]:[0-5][0-9])') 

188 m = pattern.search(new_string[time_pos:]) 

189 if m is not None: 

190 new_string = new_string[:time_pos] + \ 

191 pattern.sub(dts, new_string[time_pos:]) 

192 else: 

193 pattern = re.compile('([012][0-9][0-5][0-9][0-5][0-9])') 

194 m = pattern.search(new_string[time_pos:]) 

195 if m is not None: 

196 new_string = new_string[:time_pos] + \ 

197 pattern.sub(dts.replace(':', ''), new_string[time_pos:]) 

198 return new_string 

199 

200 

201def write_riff_datetime(path, start_time, file_time=None, no_mod=False): 

202 """ Modify time stamps in the metadata of a RIFF/WAVE file in place. 

203 

204 Parameters 

205 ---------- 

206 path: str 

207 Path to a wave file. 

208 start_time: datetime 

209 Date and time to which all time stamps should be set. 

210 file_time: None or date_time 

211 If provided check whether the time stamp in the metadata 

212 matches. If they do not match a `ValueError` is raised. 

213 no_mod: bool 

214 Do not modify the files, just report what would be done.  

215 

216 Returns 

217 ------- 

218 duration: timedelta 

219 Total duration of the audio data in the file. 

220 orig_time: date_time or None 

221 The original time stamp found in the metadata. 

222 

223 Raises 

224 ------ 

225 ValueError: 

226 Time stamp in metadata does not match expected `file_time`.  

227 """ 

228 def check_starttime(orig_time, file_time, path): 

229 if file_time is not None and orig_time is not None and \ 

230 abs(orig_time - file_time) > dt.timedelta(seconds=1): 

231 raise ValueError(f'"{path}" start time is {orig_time} but should be {file_time} for a continuous recording.') 

232 

233 

234 duration = dt.timedelta(seconds=0) 

235 orig_time = None 

236 store_empty = False 

237 with open(path, 'r+b') as sf: 

238 try: 

239 fsize = read_riff_header(sf) 

240 except ValueError: 

241 raise ValueError(f'"{path}" is not a valid RIFF/WAVE file, time stamps cannot be modified.') 

242 tags = read_chunk_tags(sf) 

243 if 'FMT ' not in tags: 

244 raise ValueError(f'missing FMT chunk in "{path}".') 

245 sf.seek(tags['FMT '][0] - 4, os.SEEK_SET) 

246 channels, rate, bits = read_format_chunk(sf) 

247 bts = 1 + (bits - 1) // 8 

248 if 'DATA' not in tags: 

249 raise ValueError(f'missing DATA chunk in "{path}".') 

250 dsize = tags['DATA'][1] 

251 duration = dt.timedelta(seconds=(dsize//bts//channels)/rate) 

252 for chunk in tags: 

253 sf.seek(tags[chunk][0] - 4, os.SEEK_SET) 

254 md = {} 

255 if chunk == 'LIST-INFO': 

256 md['INFO'] = read_info_chunks(sf, store_empty) 

257 orig_time = get_datetime(md) 

258 check_starttime(orig_time, file_time, path) 

259 if not no_mod and set_starttime(md, start_time): 

260 sf.seek(tags[chunk][0] - 8, os.SEEK_SET) 

261 write_info_chunk(sf, md, tags[chunk][1]) 

262 elif chunk == 'BEXT': 

263 md['BEXT'] = read_bext_chunk(sf, store_empty) 

264 orig_time = get_datetime(md) 

265 check_starttime(orig_time, file_time, path) 

266 if not no_mod and set_starttime(md, start_time): 

267 sf.seek(tags[chunk][0] - 8, os.SEEK_SET) 

268 write_bext_chunk(sf, md) 

269 elif chunk == 'IXML': 

270 md['IXML'] = read_ixml_chunk(sf, store_empty) 

271 orig_time = get_datetime(md) 

272 check_starttime(orig_time, file_time, path) 

273 if not no_mod and set_starttime(md, start_time): 

274 sf.seek(tags[chunk][0] - 8, os.SEEK_SET) 

275 write_ixml_chunk(sf, md) 

276 elif chunk == 'GUAN': 

277 md['GUANO'] = read_guano_chunk(sf) 

278 orig_time = get_datetime(md) 

279 check_starttime(orig_time, file_time, path) 

280 if not no_mod and set_starttime(md, start_time): 

281 sf.seek(tags[chunk][0] - 8, os.SEEK_SET) 

282 write_guano_chunk(sf, md['GUANO']) 

283 return duration, orig_time 

284 

285 

286def demo(start_time, file_pathes, no_mod=False): 

287 """Modify time stamps of audio files. 

288 

289 Parameters 

290 ---------- 

291 start_time: str 

292 Time stamp of the first file. 

293 file_pathes: list of str 

294 Pathes of audio files. 

295 no_mod: bool 

296 Do not modify the files, just report what would be done.  

297 """ 

298 file_time = None 

299 start_time = dt.datetime.fromisoformat(start_time) 

300 for fp in file_pathes: 

301 duration, orig_time = write_riff_datetime(fp, start_time, 

302 file_time, no_mod) 

303 name_time = parse_datetime(Path(fp).stem) 

304 if orig_time is None: 

305 orig_time = name_time 

306 if file_time is None: 

307 file_time = orig_time 

308 if orig_time is None: 

309 raise ValueError(f'"{fp}" does not contain any time in its metadata or name.') 

310 if name_time is not None: 

311 p = Path(fp) 

312 np = p.with_stem(replace_datetime(p.stem, start_time)) 

313 if not no_mod: 

314 os.rename(fp, np) 

315 print(f'{fp} -> {np}') 

316 else: 

317 print(f'{fp}: {orig_time} -> {start_time}') 

318 start_time += duration 

319 file_time += duration 

320 

321 

322def main(*cargs): 

323 """Call demo with command line arguments. 

324 

325 Parameters 

326 ---------- 

327 cargs: list of strings 

328 Command line arguments as provided by sys.argv[1:] 

329 """ 

330 # command line arguments: 

331 parser = argparse.ArgumentParser(add_help=True, 

332 description='Fix time stamps.', 

333 epilog=f'version {__version__} by Benda-Lab (2020-{__year__})') 

334 parser.add_argument('--version', action='version', version=__version__) 

335 parser.add_argument('-s', dest='starttime', default=None, type=str, required=True, 

336 help='new start time of the first file') 

337 parser.add_argument('-n', dest='nomod', action='store_true', 

338 help='do not modify the files, just report what would be done.') 

339 parser.add_argument('files', type=str, nargs='+', 

340 help='audio files') 

341 if len(cargs) == 0: 

342 cargs = None 

343 args = parser.parse_args(cargs) 

344 

345 # expand wildcard patterns: 

346 files = [] 

347 if os.name == 'nt': 

348 for fn in args.files: 

349 files.extend(glob.glob(fn)) 

350 else: 

351 files = args.files 

352 

353 demo(args.starttime, files, args.nomod) 

354 

355 

356if __name__ == "__main__": 

357 main(*sys.argv[1:])