Coverage for src / thunderlab / datawriter.py: 87%

515 statements  

« prev     ^ index     » next       coverage.py v7.13.4, created at 2026-02-10 21:21 +0000

1"""Writing numpy arrays of floats to data files. 

2 

3- `write_data()`: write data into a file. 

4- `available_formats()`: supported data and audio file formats. 

5- `available_encodings()`: encodings of a data file format. 

6- `format_from_extension()`: deduce data file format from file extension. 

7- `recode_array()`: recode array of floats. 

8- `insert_container_metadata()`: insert flattened metadata to data dictionary for a container file format. 

9""" 

10 

11import sys 

12import datetime as dt 

13 

14from pathlib import Path 

15from copy import deepcopy 

16from audioio import find_key, add_metadata, move_metadata 

17from audioio import get_datetime, default_gain_keys 

18 

19data_modules = {} 

20"""Dictionary with availability of various modules needed for writing data. 

21Keys are the module names, values are booleans. 

22""" 

23 

24try: 

25 import pickle 

26 data_modules['pickle'] = True 

27except ImportError: 

28 data_modules['pickle'] = False 

29 

30try: 

31 import numpy as np 

32 data_modules['numpy'] = True 

33except ImportError: 

34 data_modules['numpy'] = False 

35 

36try: 

37 import scipy.io as sio 

38 data_modules['scipy'] = True 

39except ImportError: 

40 data_modules['scipy'] = False 

41 

42try: 

43 import audioio.audiowriter as aw 

44 import audioio.audiometadata as am 

45 from audioio import write_metadata_text, flatten_metadata 

46 data_modules['audioio'] = True 

47except ImportError: 

48 data_modules['audioio'] = False 

49 

50 

51def format_from_extension(filepath): 

52 """Deduce data file format from file extension. 

53 

54 Parameters 

55 ---------- 

56 filepath: str or Path or None 

57 Path and name of the data file. 

58 

59 Returns 

60 ------- 

61 format: str 

62 Data format deduced from file extension. 

63 """ 

64 if filepath is None: 

65 return None 

66 filepath = Path(filepath) 

67 ext = filepath.suffix 

68 if not ext: 

69 return None 

70 if ext[0] == '.': 

71 ext = ext[1:] 

72 if not ext: 

73 return None 

74 ext = ext.upper() 

75 if ext == 'SCANDAT': 

76 return 'RAW' 

77 if data_modules['audioio']: 

78 ext = aw.format_from_extension(filepath) 

79 return ext 

80 

81 

82def recode_array(data, amax, encoding): 

83 """Recode array of floats. 

84 

85 Parameters 

86 ---------- 

87 data: array of floats 

88 Data array with values ranging between -1 and 1 

89 amax: float 

90 Maximum amplitude of data range. 

91 encoding: str 

92 Encoding, one of PCM_16, PCM_32, PCM_64, FLOAT or DOUBLE. 

93 

94 Returns 

95 ------- 

96 buffer: array 

97 The data recoded according to `encoding`. 

98 """ 

99 

100 encodings = {'PCM_16': (2, 'i2'), 

101 'PCM_32': (4, 'i4'), 

102 'PCM_64': (8, 'i8'), 

103 'FLOAT': (4, 'f'), 

104 'DOUBLE': (8, 'd')} 

105 

106 if not encoding in encodings: 

107 return data 

108 dtype = encodings[encoding][1] 

109 if dtype[0] == 'i': 

110 sampwidth = encodings[encoding][0] 

111 factor = 2**(sampwidth*8-1) 

112 buffer = np.round(data/amax*factor).astype(dtype) 

113 buffer[data >= +amax] = factor - 1 

114 buffer[data <= -amax] = -(factor - 1) 

115 else: 

116 buffer = data.astype(dtype, copy=False) 

117 return buffer 

118 

119 

120def formats_relacs(): 

121 """Data format of the relacs file format. 

122 

123 Returns 

124 ------- 

125 formats: list of str 

126 List of supported file formats as strings. 

127 """ 

128 return ['RELACS'] 

129 

130 

131def encodings_relacs(format=None): 

132 """Encodings of the relacs file format. 

133 

134 Parameters 

135 ---------- 

136 format: str 

137 The file format. 

138 

139 Returns 

140 ------- 

141 encodings: list of str 

142 List of supported encodings as strings. 

143 """ 

144 if not format: 

145 format = 'RELACS' 

146 if format.upper() != 'RELACS': 

147 return [] 

148 else: 

149 return ['FLOAT'] 

150 

151 

152def write_relacs(filepath, data, rate, amax=1.0, unit=None, 

153 metadata=None, locs=None, labels=None, format=None, 

154 encoding=None): 

155 """Write data as relacs raw files. 

156 

157 Parameters 

158 ---------- 

159 filepath: str or Path 

160 Full path of folder where to write relacs files. 

161 data: 1-D or 2-D array of floats 

162 Array with the data (first index time, optional second index channel). 

163 rate: float 

164 Sampling rate of the data in Hertz. 

165 amax: float 

166 Maximum possible amplitude of the data in `unit`. 

167 unit: str 

168 Unit of the data. 

169 metadata: nested dict 

170 Additional metadata saved into `info.dat`. 

171 locs: None or 1-D or 2-D array of ints 

172 Marker positions (first column) and spans (optional second column) 

173 for each marker (rows). 

174 labels: None or 2-D array of string objects 

175 Labels (first column) and texts (optional second column) 

176 for each marker (rows). 

177 format: str or None 

178 File format, only None or 'RELACS' are supported. 

179 encoding: str or None 

180 Encoding of the data. Only None or 'FLOAT' are supported. 

181 

182 Returns 

183 ------- 

184 filepath: Path 

185 The actual folder used for writing the data. 

186 

187 Raises 

188 ------ 

189 ValueError 

190 File format or encoding not supported. 

191 """ 

192 if format is None: 

193 format = 'RELACS' 

194 if format.upper() != 'RELACS': 

195 raise ValueError(f'file format {format} not supported by relacs file format') 

196 if encoding is None: 

197 encoding = 'FLOAT' 

198 if encoding.upper() != 'FLOAT': 

199 raise ValueError(f'file encoding {encoding} not supported by relacs file format') 

200 filepath = Path(filepath) 

201 if not filepath.exists(): 

202 filepath.mkdir() 

203 # write data: 

204 if data.ndim == 1: 

205 with open(filepath / f'trace-1.raw', 'wb') as df: 

206 df.write(data.astype(np.float32).tobytes()) 

207 else: 

208 for c in range(data.shape[1]): 

209 with open(filepath / f'trace-{c+1}.raw', 'wb') as df: 

210 df.write(data[:, c].astype(np.float32).tobytes()) 

211 if unit is None: 

212 unit = 'V' 

213 # write data format: 

214 df = open(filepath / 'stimuli.dat', 'w') 

215 df.write('# analog input traces:\n') 

216 for c in range(data.shape[1] if data.ndim > 1 else 1): 

217 df.write(f'# identifier{c+1} : V-{c+1}\n') 

218 df.write(f'# data file{c+1} : trace-{{c+1}}.raw\n') 

219 df.write(f'# sample interval{c+1} : {1000.0/rate:.4f}ms\n') 

220 df.write(f'# sampling rate{c+1} : {rate:.2f}Hz\n') 

221 df.write(f'# unit{c+1} : {unit}\n') 

222 df.write('# event lists:\n') 

223 df.write('# event file1: stimulus-events.dat\n') 

224 df.write('# event file2: restart-events.dat\n') 

225 df.write('# event file3: recording-events.dat\n') 

226 df.close() 

227 # write empty event files: 

228 for events in ['Recording', 'Restart', 'Stimulus']: 

229 df = open(filepath / f'{events.lower()}-events.dat', 'w') 

230 df.write(f'# events: {events}\n\n') 

231 df.write('#Key\n') 

232 if events == 'Stimulus': 

233 df.write('# t duration\n') 

234 df.write('# sec s\n') 

235 df.write('# 1 2\n') 

236 else: 

237 df.write('# t\n') 

238 df.write('# sec\n') 

239 df.write('# 1\n') 

240 if events == 'Recording': 

241 df.write(' 0.0\n') 

242 df.close() 

243 # write metadata: 

244 if metadata: 

245 write_metadata_text(filepath / 'info.dat', 

246 metadata, prefix='# ') 

247 return filepath 

248 

249 

250def formats_fishgrid(): 

251 """Data format of the fishgrid file format. 

252 

253 Returns 

254 ------- 

255 formats: list of str 

256 List of supported file formats as strings. 

257 """ 

258 return ['FISHGRID'] 

259 

260 

261def encodings_fishgrid(format=None): 

262 """Encodings of the fishgrid file format. 

263 

264 Parameters 

265 ---------- 

266 format: str 

267 The file format. 

268 

269 Returns 

270 ------- 

271 encodings: list of str 

272 List of supported encodings as strings. 

273 """ 

274 if not format: 

275 format = 'FISHGRID' 

276 if format.upper() != 'FISHGRID': 

277 return [] 

278 else: 

279 return ['FLOAT'] 

280 

281 

282def write_fishgrid(filepath, data, rate, amax=1.0, unit=None, 

283 metadata=None, locs=None, labels=None, format=None, 

284 encoding=None): 

285 """Write data as fishgrid raw files. 

286 

287 Parameters 

288 ---------- 

289 filepath: str or Path 

290 Full path of the folder where to write fishgrid files. 

291 data: 1-D or 2-D array of floats 

292 Array with the data (first index time, optional second index channel). 

293 rate: float 

294 Sampling rate of the data in Hertz. 

295 amax: float 

296 Maximum possible amplitude of the data in `unit`. 

297 unit: str 

298 Unit of the data. 

299 metadata: nested dict 

300 Additional metadata saved into the `fishgrid.cfg`. 

301 locs: None or 1-D or 2-D array of ints 

302 Marker positions (first column) and spans (optional second column) 

303 for each marker (rows). 

304 labels: None or 2-D array of string objects 

305 Labels (first column) and texts (optional second column) 

306 for each marker (rows). 

307 format: str or None 

308 File format, only None or 'FISHGRID' are supported. 

309 encoding: str or None 

310 Encoding of the data. Only None or 'FLOAT' are supported. 

311 

312 Returns 

313 ------- 

314 filepath: Path 

315 The actual folder used for writing the data. 

316 

317 Raises 

318 ------ 

319 ValueError 

320 File format or encoding not supported. 

321 """ 

322 def write_timestamp(df, count, index, span, rate, starttime, 

323 label, comment): 

324 datetime = starttime + dt.timedelta(seconds=index/rate) 

325 df.write(f' Num: {count}\n') 

326 df.write(f' Index1: {index}\n') 

327 #df.write(f' Index2: 0\n') 

328 #df.write(f' Index3: 0\n') 

329 #df.write(f' Index4: 0\n') 

330 if span > 0: 

331 df.write(f' Span1: {span}\n') 

332 df.write(f' Date: {datetime.date().isoformat()}\n') 

333 df.write(f' Time: {datetime.time().isoformat(timespec="seconds")}\n') 

334 if label: 

335 df.write(f' Label: {label}\n') 

336 df.write(f'Comment: {comment}\n') 

337 df.write('\n') 

338 

339 if format is None: 

340 format = 'FISHGRID' 

341 if format.upper() != 'FISHGRID': 

342 raise ValueError(f'file format {format} not supported by fishgrid file format') 

343 if encoding is None: 

344 encoding = 'FLOAT' 

345 if encoding.upper() != 'FLOAT': 

346 raise ValueError(f'file encoding {encoding} not supported by fishgrid file format') 

347 filepath = Path(filepath) 

348 if not filepath.exists(): 

349 filepath.mkdir() 

350 # write data: 

351 with open(filepath / 'traces-grid1.raw', 'wb') as df: 

352 df.write(data.astype(np.float32).tobytes()) 

353 # write metadata: 

354 if unit is None: 

355 unit = 'mV' 

356 cfgfile = filepath / 'fishgrid.cfg' 

357 nchannels = data.shape[1] if data.ndim > 1 else 1 

358 ncols = int(np.ceil(np.sqrt(nchannels))) 

359 nrows = int(np.ceil(nchannels/ncols)) 

360 if metadata is None: 

361 metadata = {} 

362 if 'FishGrid' in metadata: 

363 md = {} 

364 rmd = {} 

365 for k in metadata: 

366 if isinstance(metadata[k], dict): 

367 md[k] = deepcopy(metadata[k]) 

368 else: 

369 rmd[k] = metadata[k] 

370 if len(rmd) > 0: 

371 m, k = find_key(md, 'FishGrid.Recording') 

372 if k in m: 

373 m[k].update(rmd) 

374 else: 

375 m[k] = rmd 

376 else: 

377 smd = deepcopy(metadata) 

378 gm = dict(Used1='true', Columns1=f'{ncols}', Rows1=f'{nrows}') 

379 hm = {'DAQ board': dict()} 

380 if not move_metadata(smd, hm, 'Amplifier'): 

381 am = {} 

382 move_metadata(smd, am, ['Amplifier.Name', 'AmplName'], 'AmplName') 

383 move_metadata(smd, am, ['Amplifier.Model', 'AmplModel'], 'AmplModel') 

384 move_metadata(smd, am, 'Amplifier.Type') 

385 move_metadata(smd, am, 'Gain') 

386 move_metadata(smd, am, 'HighpassCutoff') 

387 move_metadata(smd, am, 'LowpassCutoff') 

388 if len(am) > 0: 

389 hm['Amplifier'] = am 

390 md = dict(FishGrid={'Grid 1': gm, 'Hardware Settings': hm}) 

391 move_metadata(smd, md['FishGrid'], 'Recording') 

392 gm = {} 

393 starttime = get_datetime(smd, remove=True) 

394 if not starttime is None: 

395 gm['StartDate'] = starttime.date().isoformat() 

396 gm['StartTime'] = starttime.time().isoformat(timespec='seconds') 

397 move_metadata(smd, gm, 'Location') 

398 move_metadata(smd, gm, 'Position') 

399 move_metadata(smd, gm, 'WaterTemperature') 

400 move_metadata(smd, gm, 'WaterConductivity') 

401 move_metadata(smd, gm, 'WaterpH') 

402 move_metadata(smd, gm, 'WaterOxygen') 

403 move_metadata(smd, gm, 'Temperature') 

404 move_metadata(smd, gm, 'Humidity') 

405 move_metadata(smd, gm, 'Pressure') 

406 move_metadata(smd, gm, 'Comment') 

407 move_metadata(smd, gm, 'Experimenter') 

408 if len(gm) > 0: 

409 if not 'Recording' in md['FishGrid']: 

410 md['FishGrid']['Recording'] = {} 

411 md['FishGrid']['Recording'].update({'General': gm}) 

412 bm = {} 

413 move_metadata(smd, bm, 'DataTime') 

414 move_metadata(smd, bm, 'DataInterval') 

415 move_metadata(smd, bm, 'BufferTime') 

416 move_metadata(smd, bm, 'BufferInterval') 

417 if len(bm) > 0: 

418 if not 'Recording' in md['FishGrid']: 

419 md['FishGrid']['Recording'] = {} 

420 md['FishGrid']['Recording'].update({'Buffers and timing': bm}) 

421 if smd: 

422 md['FishGrid']['Other'] = smd 

423 add_metadata(md, 

424 [f'FishGrid.Hardware Settings.DAQ board.AISampleRate={0.001*rate:.3f}kHz', 

425 f'FishGrid.Hardware Settings.DAQ board.AIMaxVolt={amax:g}{unit}']) 

426 with open(cfgfile, 'w') as df: 

427 for k in md: 

428 df.write(f'*{k}\n') 

429 write_metadata_text(df, md[k], prefix=' ') 

430 # write markers: 

431 filename = filepath / 'timestamps.dat' 

432 starttime = get_datetime(metadata, (('DateTimeOriginal',), 

433 ('OriginationDate', 'OriginationTime'), 

434 ('StartDate', 'StartTime'), 

435 ('Location_Time',)), 

436 default=dt.datetime.fromtimestamp(0, dt.timezone.utc)) 

437 with open(filename, 'w') as df: 

438 count = 0 

439 write_timestamp(df, count, 0, 0, rate, starttime, 

440 '', 'begin of recording') 

441 count += 1 

442 if locs is not None: 

443 for i in range(len(locs)): 

444 label = '' 

445 comment = '' 

446 if labels is not None and len(labels) > i: 

447 label = labels[i,0] if labels.ndim > 1 else labels[i] 

448 comment = labels[i,1] if labels.ndim > 1 else '' 

449 index = locs[i,0] if locs.ndim > 1 else locs[i] 

450 span = locs[i,1] if locs.ndim > 1 else 0 

451 write_timestamp(df, count, index*nchannels, 

452 span*nchannels, rate, 

453 starttime, label, comment) 

454 count += 1 

455 write_timestamp(df, count, len(data)*nchannels, 0, rate, 

456 starttime, '', 'end of recording') 

457 return filepath 

458 

459 

460def formats_pickle(): 

461 """Data formats supported by pickle.dump(). 

462 

463 Returns 

464 ------- 

465 formats: list of str 

466 List of supported file formats as strings. 

467 """ 

468 if not data_modules['pickle']: 

469 return [] 

470 else: 

471 return ['PKL'] 

472 

473 

474def encodings_pickle(format=None): 

475 """Encodings of the pickle format. 

476 

477 Parameters 

478 ---------- 

479 format: str 

480 The file format. 

481 

482 Returns 

483 ------- 

484 encodings: list of str 

485 List of supported encodings as strings. 

486 """ 

487 if not format: 

488 format = 'PKL' 

489 if format.upper() != 'PKL': 

490 return [] 

491 else: 

492 return ['PCM_16', 'PCM_32', 'FLOAT', 'DOUBLE'] 

493 

494 

495def write_pickle(filepath, data, rate, amax=1.0, unit=None, 

496 metadata=None, locs=None, labels=None, format=None, 

497 encoding=None): 

498 """Write data into python pickle file. 

499  

500 Documentation 

501 ------------- 

502 https://docs.python.org/3/library/pickle.html 

503 

504 Parameters 

505 ---------- 

506 filepath: str or Path 

507 Full path and name of the file to write. 

508 data: 1-D or 2-D array of floats 

509 Array with the data (first index time, optional second index channel). 

510 Stored under the key "data". 

511 rate: float 

512 Sampling rate of the data in Hertz. 

513 Stored under the key "rate". 

514 amax: float 

515 Maximum possible amplitude of the data in `unit`. 

516 Stored under the key "amax". 

517 unit: str 

518 Unit of the data. 

519 Stored under the key "unit". 

520 metadata: nested dict 

521 Additional metadata saved into the pickle. 

522 Stored under the key "metadata". 

523 locs: None or 1-D or 2-D array of ints 

524 Marker positions (first column) and spans (optional second column) 

525 for each marker (rows). 

526 labels: None or 2-D array of string objects 

527 Labels (first column) and texts (optional second column) 

528 for each marker (rows). 

529 format: str or None 

530 File format, only None or 'PKL' are supported. 

531 encoding: str or None 

532 Encoding of the data. 

533 

534 Returns 

535 ------- 

536 filepath: Path 

537 The actual file name used for writing the data. 

538 

539 Raises 

540 ------ 

541 ImportError 

542 The pickle module is not available. 

543 ValueError 

544 File format or encoding not supported. 

545 """ 

546 if not data_modules['pickle']: 

547 raise ImportError 

548 if format is None: 

549 format = 'PKL' 

550 if format.upper() != 'PKL': 

551 raise ValueError(f'file format {format} not supported by pickle file format') 

552 filepath = Path(filepath) 

553 ext = filepath.suffix 

554 if len(ext) <= 1 or ext[1].upper() != 'P': 

555 filepath = filepath.with_suffix('.pkl') 

556 if encoding is None: 

557 encoding = 'DOUBLE' 

558 encoding = encoding.upper() 

559 if not encoding in encodings_pickle(format): 

560 raise ValueError(f'file encoding {encoding} not supported by pickle file format') 

561 buffer = recode_array(data, amax, encoding) 

562 ddict = dict(data=buffer, rate=rate) 

563 ddict['amax'] = amax 

564 if unit: 

565 ddict['unit'] = unit 

566 if metadata: 

567 ddict['metadata'] = metadata 

568 if locs is not None and len(locs) > 0: 

569 if locs.ndim == 1: 

570 ddict['positions'] = locs 

571 else: 

572 ddict['positions'] = locs[:,0] 

573 if locs.shape[1] > 1: 

574 ddict['spans'] = locs[:,1] 

575 if labels is not None and len(labels) > 0: 

576 if labels.ndim == 1: 

577 ddict['labels'] = labels 

578 else: 

579 ddict['labels'] = labels[:,0] 

580 if labels.shape[1] > 1: 

581 ddict['descriptions'] = labels[:,1] 

582 with open(filepath, 'wb') as df: 

583 pickle.dump(ddict, df) 

584 return filepath 

585 

586 

587def insert_container_metadata(metadata, data_dict, metadatakey='metadata'): 

588 """Insert flattened metadata to data dictionary for a container file format. 

589 

590 Parameters 

591 ---------- 

592 metadata: nested dict 

593 Nested dictionary with key-value pairs of the meta data. 

594 data_dict: dict 

595 Dictionary of the data items contained in the container to 

596 which the metadata should be added. 

597 metadatakey: str or list of str 

598 Name of the variable holding the metadata. 

599 """ 

600 fmeta = flatten_metadata(metadata, True, sep='__') 

601 for k in list(fmeta): 

602 fmeta[metadatakey + '__' + k] = fmeta.pop(k) 

603 data_dict.update(fmeta) 

604 

605 

606def formats_numpy(): 

607 """Data formats supported by numpy.savez(). 

608 

609 Returns 

610 ------- 

611 formats: list of str 

612 List of supported file formats as strings. 

613 """ 

614 if not data_modules['numpy']: 

615 return [] 

616 else: 

617 return ['NPZ'] 

618 

619 

620def encodings_numpy(format=None): 

621 """Encodings of the numpy file format. 

622 

623 Parameters 

624 ---------- 

625 format: str 

626 The file format. 

627 

628 Returns 

629 ------- 

630 encodings: list of str 

631 List of supported encodings as strings. 

632 """ 

633 if not format: 

634 format = 'NPZ' 

635 if format.upper() != 'NPZ': 

636 return [] 

637 else: 

638 return ['PCM_16', 'PCM_32', 'FLOAT', 'DOUBLE'] 

639 

640 

641def write_numpy(filepath, data, rate, amax=1.0, unit=None, 

642 metadata=None, locs=None, labels=None, format=None, 

643 encoding=None): 

644 """Write data into numpy npz file. 

645  

646 Documentation 

647 ------------- 

648 https://numpy.org/doc/stable/reference/generated/numpy.savez.html 

649 

650 Parameters 

651 ---------- 

652 filepath: str or Path 

653 Full path and name of the file to write. 

654 data: 1-D or 2-D array of floats 

655 Array with the data (first index time, optional second index channel). 

656 Stored under the key "data". 

657 rate: float 

658 Sampling rate of the data in Hertz. 

659 Stored under the key "rate". 

660 amax: float 

661 Maximum possible amplitude of the data in `unit`. 

662 Stored under the key "amax". 

663 unit: str 

664 Unit of the data. 

665 Stored under the key "unit". 

666 metadata: nested dict 

667 Additional metadata saved into the numpy file. 

668 Flattened dictionary entries stored under keys 

669 starting with "metadata__". 

670 locs: None or 1-D or 2-D array of ints 

671 Marker positions (first column) and spans (optional second column) 

672 for each marker (rows). 

673 labels: None or 2-D array of string objects 

674 Labels (first column) and texts (optional second column) 

675 for each marker (rows). 

676 format: str or None 

677 File format, only None or 'NPZ' are supported. 

678 encoding: str or None 

679 Encoding of the data. 

680 

681 Returns 

682 ------- 

683 filepath: Path 

684 The actual file name used for writing the data. 

685 

686 Raises 

687 ------ 

688 ImportError 

689 The numpy module is not available. 

690 ValueError 

691 File format or encoding not supported. 

692 """ 

693 if not data_modules['numpy']: 

694 raise ImportError 

695 if format is None: 

696 format = 'NPZ' 

697 if format.upper() not in formats_numpy(): 

698 raise ValueError(f'file format {format} not supported by numpy file format') 

699 filepath = Path(filepath) 

700 ext = filepath.suffix 

701 if len(ext) <= 1 or ext[1].upper() != 'N': 

702 filepath = filepath.with_suffix('.npz') 

703 if encoding is None: 

704 encoding = 'DOUBLE' 

705 encoding = encoding.upper() 

706 if not encoding in encodings_numpy(format): 

707 raise ValueError(f'file encoding {encoding} not supported by numpy file format') 

708 buffer = recode_array(data, amax, encoding) 

709 ddict = dict(data=buffer, rate=rate) 

710 ddict['amax'] = amax 

711 if unit: 

712 ddict['unit'] = unit 

713 if metadata: 

714 insert_container_metadata(metadata, ddict, 'metadata') 

715 if locs is not None and len(locs) > 0: 

716 if locs.ndim == 1: 

717 ddict['positions'] = locs 

718 else: 

719 ddict['positions'] = locs[:,0] 

720 if locs.shape[1] > 1: 

721 ddict['spans'] = locs[:,1] 

722 if labels is not None and len(labels) > 0: 

723 if labels.ndim == 1: 

724 maxc = np.max([len(l) for l in labels]) 

725 ddict['labels'] = labels.astype(dtype=f'U{maxc}') 

726 else: 

727 maxc = np.max([len(l) for l in labels[:,0]]) 

728 ddict['labels'] = labels[:,0].astype(dtype=f'U{maxc}') 

729 if labels.shape[1] > 1: 

730 maxc = np.max([len(l) for l in labels[:,1]]) 

731 ddict['descriptions'] = labels[:,1].astype(dtype=f'U{maxc}') 

732 np.savez(filepath, **ddict) 

733 return filepath 

734 

735 

736def formats_mat(): 

737 """Data formats supported by scipy.io.savemat(). 

738 

739 Returns 

740 ------- 

741 formats: list of str 

742 List of supported file formats as strings. 

743 """ 

744 if not data_modules['scipy']: 

745 return [] 

746 else: 

747 return ['MAT'] 

748 

749 

750def encodings_mat(format=None): 

751 """Encodings of the matlab format. 

752 

753 Parameters 

754 ---------- 

755 format: str 

756 The file format. 

757 

758 Returns 

759 ------- 

760 encodings: list of str 

761 List of supported encodings as strings. 

762 """ 

763 if not format: 

764 format = 'MAT' 

765 if format.upper() != 'MAT': 

766 return [] 

767 else: 

768 return ['PCM_16', 'PCM_32', 'FLOAT', 'DOUBLE'] 

769 

770 

771def write_mat(filepath, data, rate, amax=1.0, unit=None, 

772 metadata=None, locs=None, labels=None, format=None, 

773 encoding=None): 

774 """Write data into matlab file. 

775  

776 Documentation 

777 ------------- 

778 https://docs.scipy.org/doc/scipy/reference/generated/scipy.io.savemat.html 

779 

780 Parameters 

781 ---------- 

782 filepath: str or Path 

783 Full path and name of the file to write. 

784 data: 1-D or 2-D array of floats 

785 Array with the data (first index time, optional second index channel). 

786 Stored under the key "data". 

787 rate: float 

788 Sampling rate of the data in Hertz. 

789 Stored under the key "rate". 

790 amax: float 

791 Maximum possible amplitude of the data in `unit`. 

792 Stored under the key "amax". 

793 unit: str 

794 Unit of the data. 

795 Stored under the key "unit". 

796 metadata: nested dict 

797 Additional metadata saved into the mat file. 

798 Stored under the key "metadata". 

799 locs: None or 1-D or 2-D array of ints 

800 Marker positions (first column) and spans (optional second column) 

801 for each marker (rows). 

802 labels: None or 2-D array of string objects 

803 Labels (first column) and texts (optional second column) 

804 for each marker (rows). 

805 format: str or None 

806 File format, only None or 'MAT' are supported. 

807 encoding: str or None 

808 Encoding of the data. 

809 

810 Returns 

811 ------- 

812 filepath: Path 

813 The actual file name used for writing the data. 

814 

815 Raises 

816 ------ 

817 ImportError 

818 The scipy.io module is not available. 

819 ValueError 

820 File format or encoding not supported. 

821 """ 

822 if not data_modules['scipy']: 

823 raise ImportError 

824 if format is None: 

825 format = 'MAT' 

826 if format.upper() not in formats_mat(): 

827 raise ValueError(f'file format {format} not supported by matlab file format') 

828 filepath = Path(filepath) 

829 ext = filepath.suffix 

830 if len(ext) <= 1 or ext[1].upper() != 'M': 

831 filepath = filepath.with_suffix('.mat') 

832 if encoding is None: 

833 encoding = 'DOUBLE' 

834 encoding = encoding.upper() 

835 if not encoding in encodings_mat(format): 

836 raise ValueError(f'file encoding {encoding} not supported by matlab file format') 

837 buffer = recode_array(data, amax, encoding) 

838 ddict = dict(data=buffer, rate=rate) 

839 ddict['amax'] = amax 

840 if unit: 

841 ddict['unit'] = unit 

842 if metadata: 

843 insert_container_metadata(metadata, ddict, 'metadata') 

844 if locs is not None and len(locs) > 0: 

845 if locs.ndim == 1: 

846 ddict['positions'] = locs 

847 else: 

848 ddict['positions'] = locs[:,0] 

849 if locs.shape[1] > 1: 

850 ddict['spans'] = locs[:,1] 

851 if labels is not None and len(labels) > 0: 

852 if labels.ndim == 1: 

853 maxc = np.max([len(l) for l in labels]) 

854 ddict['labels'] = labels.astype(dtype=f'U{maxc}') 

855 else: 

856 maxc = np.max([len(l) for l in labels[:,0]]) 

857 ddict['labels'] = labels[:,0].astype(dtype=f'U{maxc}') 

858 if labels.shape[1] > 1: 

859 maxc = np.max([len(l) for l in labels[:,1]]) 

860 ddict['descriptions'] = labels[:,1].astype(dtype=f'U{maxc}') 

861 sio.savemat(filepath, ddict) 

862 return filepath 

863 

864 

865def formats_raw(): 

866 """Data formats supported as raw formats. 

867 

868 Returns 

869 ------- 

870 formats: list of str 

871 List of supported file formats as strings. 

872 """ 

873 return ['RAW'] 

874 

875 

876def encodings_raw(format=None): 

877 """Encodings supported for raw file formats. 

878 

879 Parameters 

880 ---------- 

881 format: str 

882 The file format. 

883 

884 Returns 

885 ------- 

886 encodings: list of str 

887 List of supported encodings as strings. 

888 """ 

889 if not format: 

890 format = 'RAW' 

891 if format.upper() != 'RAW': 

892 return [] 

893 else: 

894 return ['PCM_16', 'PCM_32', 'FLOAT', 'DOUBLE'] 

895 

896 

897def write_raw(filepath, data, rate, amax=1.0, unit=None, 

898 metadata=None, locs=None, labels=None, format=None, 

899 encoding=None): 

900 """Write data into raw file. 

901 

902 Writes just the data without sampling rate, metadata and markers. 

903 

904 Parameters 

905 ---------- 

906 filepath: str or Path 

907 Full path and name of the file to write. 

908 data: 1-D or 2-D array of floats 

909 Array with the data (first index time, optional second index channel). 

910 rate: float 

911 Sampling rate of the data in Hertz. 

912 amax: float 

913 Maximum possible amplitude of the data in `unit`. 

914 unit: str 

915 Unit of the data. 

916 metadata: nested dict 

917 Additional metadata saved into the mat file. 

918 locs: None or 1-D or 2-D array of ints 

919 Marker positions (first column) and spans (optional second column) 

920 for each marker (rows). 

921 labels: None or 2-D array of string objects 

922 Labels (first column) and texts (optional second column) 

923 for each marker (rows). 

924 format: str or None 

925 File format, only None or 'RAW' are supported. 

926 encoding: str or None 

927 Encoding of the data. 

928 

929 Returns 

930 ------- 

931 filepath: Path 

932 The actual file name used for writing the data. 

933 

934 Raises 

935 ------ 

936 ValueError 

937 File format or encoding not supported. 

938 """ 

939 if format is None: 

940 format = 'RAW' 

941 if format.upper() not in formats_raw(): 

942 raise ValueError(f'file format {format} not supported by matlab file format') 

943 filepath = Path(filepath) 

944 ext = filepath.suffix 

945 if len(ext) <= 1 or ext[1].upper() != 'R': 

946 filepath = filepath.with_suffix('.raw') 

947 if encoding is None: 

948 encoding = 'DOUBLE' 

949 encoding = encoding.upper() 

950 if not encoding in encodings_raw(format): 

951 raise ValueError(f'file encoding {encoding} not supported by raw file format') 

952 buffer = recode_array(data, amax, encoding) 

953 with open(filepath, 'wb') as df: 

954 df.write(buffer.tobytes()) 

955 return filepath 

956 

957 

958def formats_audioio(): 

959 """Data formats supported by audioio. 

960 

961 Returns 

962 ------- 

963 formats: list of str 

964 List of supported file formats as strings. 

965 """ 

966 if not data_modules['audioio']: 

967 return [] 

968 else: 

969 return aw.available_formats() 

970 

971 

972def encodings_audio(format): 

973 """Encodings of any audio format. 

974 

975 Parameters 

976 ---------- 

977 format: str 

978 The file format. 

979 

980 Returns 

981 ------- 

982 encodings: list of str 

983 List of supported encodings as strings. 

984 """ 

985 if not data_modules['audioio']: 

986 return [] 

987 else: 

988 return aw.available_encodings(format) 

989 

990 

991def write_audioio(filepath, data, rate, amax=1.0, unit=None, 

992 metadata=None, locs=None, labels=None, format=None, 

993 encoding=None, gainkey=default_gain_keys, sep='.'): 

994 """Write data into audio file. 

995 

996 If a gain setting is available in the metadata, then the data are divided 

997 by the gain before they are stored in the audio file. 

998 After this operation, the data values need to range between -1 and 1, 

999 in particular if the data are encoded as integers 

1000 (i.e. PCM_16, PCM_32 and PCM_64). 

1001 Note, that this function does not check for this requirement! 

1002  

1003 Documentation 

1004 ------------- 

1005 https://bendalab.github.io/audioio/ 

1006 

1007 Parameters 

1008 ---------- 

1009 filepath: str or Path 

1010 Full path and name of the file to write. 

1011 data: 1-D or 2-D array of floats 

1012 Array with the data (first index time, optional second index channel). 

1013 rate: float 

1014 Sampling rate of the data in Hertz. 

1015 amax: float 

1016 Maximum possible amplitude of the data in `unit`. 

1017 unit: str 

1018 Unit of the data. If supplied and a gain is found in the metadata it 

1019 has to match the unit of the gain. If no gain is found in the metadata 

1020 and metadata is not None, then a gain of one with this unit is added 

1021 to the metadata using the first key in `gainkey`. 

1022 metadata: nested dict 

1023 Metadata saved into the audio file. If it contains a gain, 

1024 the gain factor is used to divide the data down into a 

1025 range between -1 and 1. 

1026 locs: None or 1-D or 2-D array of ints 

1027 Marker positions (first column) and spans (optional second column) 

1028 for each marker (rows). 

1029 labels: None or 2-D array of string objects 

1030 Labels (first column) and texts (optional second column) 

1031 for each marker (rows). 

1032 format: str or None 

1033 File format. If None deduce file format from filepath. 

1034 See `available_formats()` for possible values. 

1035 encoding: str or None 

1036 Encoding of the data. See `available_encodings()` for possible values. 

1037 If None or empty string use 'PCM_16'. 

1038 gainkey: str or list of str 

1039 Key in the file's metadata that holds some gain information. 

1040 If found, the data will be multiplied with the gain, 

1041 and if available, the corresponding unit is returned. 

1042 See the [audioio.get_gain()](https://bendalab.github.io/audioio/api/audiometadata.html#audioio.audiometadata.get_gain) function for details. 

1043 sep: str 

1044 String that separates section names in `gainkey`. 

1045 

1046 Returns 

1047 ------- 

1048 filepath: Path 

1049 The actual file name used for writing the data. 

1050 

1051 Raises 

1052 ------ 

1053 ImportError 

1054 The audioio module is not available. 

1055 ValueError 

1056 `unit` does not match gain in metadata. 

1057 """ 

1058 if not data_modules['audioio']: 

1059 raise ImportError 

1060 if amax is None or not np.isfinite(amax): 

1061 amax, u = am.get_gain(metadata, gainkey, sep, 1.0, 'a.u.') 

1062 if not unit: 

1063 unit = u 

1064 elif unit != 'a.u.' and u != 'a.u.' and unit != u: 

1065 raise ValueError(f'unit "{unit}" does not match gain unit "{u}" in metadata') 

1066 if amax != 1.0: 

1067 data = data / amax 

1068 if metadata is None: 

1069 metadata = {} 

1070 if unit == 'a.u.': 

1071 unit = '' 

1072 if not isinstance(gainkey, (list, tuple, np.ndarray)): 

1073 gainkey = [gainkey,] 

1074 gainkey.append('Gain') 

1075 for gk in gainkey: 

1076 m, k = am.find_key(metadata, gk) 

1077 if k in m: 

1078 m[k] = f'{amax:g}{unit}' 

1079 break 

1080 else: 

1081 if 'INFO' in metadata: 

1082 metadata['INFO'][gainkey[0]] = f'{amax:g}{unit}' 

1083 else: 

1084 metadata[gainkey[0]] = f'{amax:g}{unit}' 

1085 aw.write_audio(filepath, data, rate, metadata, locs, labels) 

1086 return Path(filepath) 

1087 

1088 

1089data_formats_funcs = ( 

1090 ('relacs', None, formats_relacs), 

1091 ('fishgrid', None, formats_fishgrid), 

1092 ('pickle', 'pickle', formats_pickle), 

1093 ('numpy', 'numpy', formats_numpy), 

1094 ('matlab', 'scipy', formats_mat), 

1095 ('raw', None, formats_raw), 

1096 ('audio', 'audioio', formats_audioio) 

1097 ) 

1098"""List of implemented formats functions. 

1099 

1100Each element of the list is a tuple with the format's name, the 

1101module's name in `data_modules` or None, and the formats function. 

1102""" 

1103 

1104 

1105def available_formats(): 

1106 """Data and audio file formats supported by any of the installed modules. 

1107 

1108 Returns 

1109 ------- 

1110 formats: list of str 

1111 List of supported file formats as strings. 

1112 """ 

1113 formats = set() 

1114 for fmt, lib, formats_func in data_formats_funcs: 

1115 if not lib or data_modules[lib]: 

1116 formats |= set(formats_func()) 

1117 return sorted(list(formats)) 

1118 

1119 

1120data_encodings_funcs = ( 

1121 ('relacs', encodings_relacs), 

1122 ('fishgrid', encodings_fishgrid), 

1123 ('pickle', encodings_pickle), 

1124 ('numpy', encodings_numpy), 

1125 ('matlab', encodings_mat), 

1126 ('raw', encodings_raw), 

1127 ('audio', encodings_audio) 

1128 ) 

1129""" List of implemented encodings functions. 

1130 

1131Each element of the list is a tuple with the module's name and the encodings function. 

1132""" 

1133 

1134 

1135def available_encodings(format): 

1136 """Encodings of a data file format. 

1137 

1138 Parameters 

1139 ---------- 

1140 format: str 

1141 The file format. 

1142 

1143 Returns 

1144 ------- 

1145 encodings: list of str 

1146 List of supported encodings as strings. 

1147 """ 

1148 for module, encodings_func in data_encodings_funcs: 

1149 encs = encodings_func(format) 

1150 if len(encs) > 0: 

1151 return encs 

1152 return [] 

1153 

1154 

1155data_writer_funcs = { 

1156 'relacs': write_relacs, 

1157 'fishgrid': write_fishgrid, 

1158 'pickle': write_pickle, 

1159 'numpy': write_numpy, 

1160 'matlab': write_mat, 

1161 'raw': write_raw, 

1162 'audio': write_audioio 

1163 } 

1164"""Dictionary of implemented write functions. 

1165 

1166Keys are the format's name and values the corresponding write 

1167function. 

1168""" 

1169 

1170 

1171def write_data(filepath, data, rate, amax=1.0, unit=None, 

1172 metadata=None, locs=None, labels=None, format=None, 

1173 encoding=None, verbose=0, **kwargs): 

1174 """Write data into a file. 

1175 

1176 Parameters 

1177 ---------- 

1178 filepath: str or Path 

1179 Full path and name of the file to write. 

1180 File format is determined from extension. 

1181 data: 1-D or 2-D array of floats 

1182 Array with the data (first index time, second index channel). 

1183 rate: float 

1184 Sampling rate of the data in Hertz. 

1185 amax: float 

1186 Maximum possible amplitude of the data in `unit`. 

1187 unit: str 

1188 Unit of the data. 

1189 metadata: nested dict 

1190 Additional metadata. 

1191 locs: None or 1-D or 2-D array of ints 

1192 Marker positions (first column) and spans (optional second column) 

1193 for each marker (rows). 

1194 labels: None or 2-D array of string objects 

1195 Labels (first column) and texts (optional second column) 

1196 for each marker (rows). 

1197 format: str or None 

1198 File format. If None deduce file format from filepath. 

1199 See `available_formats()` for possible values. 

1200 encoding: str or None 

1201 Encoding of the data. See `available_encodings()` for possible values. 

1202 If None or empty string use 'PCM_16'. 

1203 verbose: int 

1204 If >0 show detailed error/warning messages. 

1205 kwargs: dict 

1206 Additional, file format specific keyword arguments. 

1207 

1208 Returns 

1209 ------- 

1210 filepath: str or None 

1211 On success, the actual file name used for writing the data. 

1212 

1213 Raises 

1214 ------ 

1215 ValueError 

1216 Unspecified file format. 

1217 IOError 

1218 Requested file format not supported. 

1219 

1220 Example 

1221 ------- 

1222 ``` 

1223 import numpy as np 

1224 from thunderlab.datawriter import write_data 

1225  

1226 rate = 28000.0 

1227 freq = 800.0 

1228 time = np.arange(0.0, 1.0, 1/rate) # one second 

1229 data = 2.5*np.sin(2.0*np.p*freq*time) # 800Hz sine wave 

1230 md = dict(Artist='underscore_') # metadata 

1231 write_data('audio/file.npz', data, rate, 'mV', md) 

1232 ``` 

1233 """ 

1234 if not format: 

1235 format = format_from_extension(filepath) 

1236 if not format: 

1237 raise ValueError('unspecified file format') 

1238 for fmt, lib, formats_func in data_formats_funcs: 

1239 if lib and not data_modules[lib]: 

1240 continue 

1241 if format.upper() in formats_func(): 

1242 writer_func = data_writer_funcs[fmt] 

1243 filepath = writer_func(filepath, data, rate, amax, 

1244 unit, metadata, locs, labels, 

1245 format=format, encoding=encoding, 

1246 **kwargs) 

1247 if verbose > 0: 

1248 print(f'wrote data to file "{filepath}" using {fmt} format') 

1249 if verbose > 1: 

1250 print(f' sampling rate: {rate:g}Hz') 

1251 print(f' channels : {data.shape[1] if len(data.shape) > 1 else 1}') 

1252 print(f' frames : {len(data)}') 

1253 print(f' range : {amax:g}{unit}') 

1254 return filepath 

1255 raise IOError(f'file format "{format.upper()}" not supported.') 

1256 

1257 

1258def demo(file_path, channels=2, format=None): 

1259 """Demo of the datawriter functions. 

1260 

1261 Parameters 

1262 ---------- 

1263 file_path: str 

1264 File path of a data file. 

1265 format: str or None 

1266 File format to be used. 

1267 """ 

1268 print('generate data ...') 

1269 rate = 44100.0 

1270 t = np.arange(0.0, 1.0, 1.0/rate) 

1271 data = np.zeros((len(t), channels)) 

1272 for c in range(channels): 

1273 data[:,c] = 0.1*(channels-c)*np.sin(2.0*np.pi*(440.0+c*8.0)*t) 

1274 

1275 print(f"write_data('{file_path}') ...") 

1276 write_data(file_path, data, rate, 1.0, 'mV', format=format, verbose=2) 

1277 

1278 print('done.') 

1279 

1280 

1281def main(*cargs): 

1282 """Call demo with command line arguments. 

1283 

1284 Parameters 

1285 ---------- 

1286 cargs: list of str 

1287 Command line arguments as provided by sys.argv[1:] 

1288 """ 

1289 import argparse 

1290 parser = argparse.ArgumentParser(description= 

1291 'Checking thunderlab.datawriter module.') 

1292 parser.add_argument('-c', dest='channels', default=2, type=int, 

1293 help='number of channels to be written') 

1294 parser.add_argument('-f', dest='format', default=None, type=str, 

1295 help='file format') 

1296 parser.add_argument('file', nargs=1, default='test.npz', type=str, 

1297 help='name of data file') 

1298 args = parser.parse_args(cargs) 

1299 demo(args.file[0], args.channels, args.format) 

1300 

1301 

1302if __name__ == "__main__": 

1303 main(*sys.argv[1:]) 

1304 

1305 

1306