Coverage for src/thunderlab/datawriter.py: 87%

494 statements  

« prev     ^ index     » next       coverage.py v7.6.8, created at 2024-11-29 17:59 +0000

1"""Writing numpy arrays of floats to data files. 

2 

3- `write_data()`: write data into a file. 

4- `available_formats()`: supported data and audio file formats. 

5- `available_encodings()`: encodings of a data file format. 

6- `format_from_extension()`: deduce data file format from file extension. 

7- `recode_array()`: recode array of floats. 

8- `insert_container_metadata()`: insert flattened metadata to data dictionary for a container file format. 

9""" 

10 

11import os 

12import sys 

13import datetime as dt 

14from copy import deepcopy 

15from audioio import find_key, add_metadata, move_metadata 

16from audioio import get_datetime, default_gain_keys 

17 

18data_modules = {} 

19"""Dictionary with availability of various modules needed for writing data. 

20Keys are the module names, values are booleans. 

21""" 

22 

23try: 

24 import pickle 

25 data_modules['pickle'] = True 

26except ImportError: 

27 data_modules['pickle'] = False 

28 

29try: 

30 import numpy as np 

31 data_modules['numpy'] = True 

32except ImportError: 

33 data_modules['numpy'] = False 

34 

35try: 

36 import scipy.io as sio 

37 data_modules['scipy'] = True 

38except ImportError: 

39 data_modules['scipy'] = False 

40 

41try: 

42 import audioio.audiowriter as aw 

43 import audioio.audiometadata as am 

44 from audioio import write_metadata_text, flatten_metadata 

45 data_modules['audioio'] = True 

46except ImportError: 

47 data_modules['audioio'] = False 

48 

49 

50def format_from_extension(filepath): 

51 """Deduce data file format from file extension. 

52 

53 Parameters 

54 ---------- 

55 filepath: str 

56 Name of the data file. 

57 

58 Returns 

59 ------- 

60 format: str 

61 Data format deduced from file extension. 

62 """ 

63 if not filepath: 

64 return None 

65 ext = os.path.splitext(filepath)[1] 

66 if not ext: 

67 return None 

68 if ext[0] == '.': 

69 ext = ext[1:] 

70 if not ext: 

71 return None 

72 ext = ext.upper() 

73 if data_modules['audioio']: 

74 ext = aw.format_from_extension(filepath) 

75 return ext 

76 

77 

78def recode_array(data, amax, encoding): 

79 """Recode array of floats. 

80 

81 Parameters 

82 ---------- 

83 data: array of floats 

84 Data array with values ranging between -1 and 1 

85 amax: float 

86 Maximum amplitude of data range. 

87 encoding: str 

88 Encoding, one of PCM_16, PCM_32, PCM_64, FLOAT or DOUBLE. 

89 

90 Returns 

91 ------- 

92 buffer: array 

93 The data recoded according to `encoding`. 

94 """ 

95 

96 encodings = {'PCM_16': (2, 'i2'), 

97 'PCM_32': (4, 'i4'), 

98 'PCM_64': (8, 'i8'), 

99 'FLOAT': (4, 'f'), 

100 'DOUBLE': (8, 'd')} 

101 

102 if not encoding in encodings: 

103 return data 

104 dtype = encodings[encoding][1] 

105 if dtype[0] == 'i': 

106 sampwidth = encodings[encoding][0] 

107 factor = 2**(sampwidth*8-1) 

108 buffer = np.round(data/amax*factor).astype(dtype) 

109 buffer[data >= +amax] = factor - 1 

110 buffer[data <= -amax] = -(factor - 1) 

111 else: 

112 buffer = data.astype(dtype, copy=False) 

113 return buffer 

114 

115 

116def formats_relacs(): 

117 """Data format of the relacs file format. 

118 

119 Returns 

120 ------- 

121 formats: list of str 

122 List of supported file formats as strings. 

123 """ 

124 return ['RELACS'] 

125 

126 

127def encodings_relacs(format=None): 

128 """Encodings of the relacs file format. 

129 

130 Parameters 

131 ---------- 

132 format: str 

133 The file format. 

134 

135 Returns 

136 ------- 

137 encodings: list of str 

138 List of supported encodings as strings. 

139 """ 

140 if not format: 

141 format = 'RELACS' 

142 if format.upper() != 'RELACS': 

143 return [] 

144 else: 

145 return ['FLOAT'] 

146 

147 

148def write_relacs(filepath, data, rate, amax=1.0, unit=None, 

149 metadata=None, locs=None, labels=None, format=None, 

150 encoding=None): 

151 """Write data as relacs raw files. 

152 

153 Parameters 

154 ---------- 

155 filepath: str 

156 Full path of folder where to write relacs files. 

157 data: 1-D or 2-D array of floats 

158 Array with the data (first index time, optional second index channel). 

159 rate: float 

160 Sampling rate of the data in Hertz. 

161 amax: float 

162 Maximum possible amplitude of the data in `unit`. 

163 unit: str 

164 Unit of the data. 

165 metadata: nested dict 

166 Additional metadata saved into `info.dat`. 

167 locs: None or 1-D or 2-D array of ints 

168 Marker positions (first column) and spans (optional second column) 

169 for each marker (rows). 

170 labels: None or 2-D array of string objects 

171 Labels (first column) and texts (optional second column) 

172 for each marker (rows). 

173 format: str or None 

174 File format, only None or 'RELACS' are supported. 

175 encoding: str or None 

176 Encoding of the data. Only None or 'FLOAT' are supported. 

177 

178 Returns 

179 ------- 

180 filepath: str or None 

181 On success, the actual file name used for writing the data. 

182 

183 Raises 

184 ------ 

185 ValueError 

186 Invalid `filepath`. 

187 ValueError 

188 File format or encoding not supported. 

189 """ 

190 if not filepath: 

191 raise ValueError('no file specified!') 

192 if format is None: 

193 format = 'RELACS' 

194 if format.upper() != 'RELACS': 

195 raise ValueError(f'file format {format} not supported by relacs file format') 

196 if encoding is None: 

197 encoding = 'FLOAT' 

198 if encoding.upper() != 'FLOAT': 

199 raise ValueError(f'file encoding {format} not supported by relacs file format') 

200 if not os.path.exists(filepath): 

201 os.mkdir(filepath) 

202 # write data: 

203 if data.ndim == 1: 

204 with open(os.path.join(filepath, f'trace-1.raw'), 'wb') as df: 

205 df.write(data.astype(np.float32).tobytes()) 

206 else: 

207 for c in range(data.shape[1]): 

208 with open(os.path.join(filepath, f'trace-{c+1}.raw'), 'wb') as df: 

209 df.write(data[:,c].astype(np.float32).tobytes()) 

210 if unit is None: 

211 unit = 'V' 

212 # write data format: 

213 filename = os.path.join(filepath, 'stimuli.dat') 

214 df = open(filename, 'w') 

215 df.write('# analog input traces:\n') 

216 for c in range(data.shape[1] if data.ndim > 1 else 1): 

217 df.write(f'# identifier{c+1} : V-{c+1}\n') 

218 df.write(f'# data file{c+1} : trace-{ c+1} .raw\n') 

219 df.write(f'# sample interval{c+1} : {1000.0/rate:.4f}ms\n') 

220 df.write(f'# sampling rate{c+1} : {rate:.2f}Hz\n') 

221 df.write(f'# unit{c+1} : {unit}\n') 

222 df.write('# event lists:\n') 

223 df.write('# event file1: stimulus-events.dat\n') 

224 df.write('# event file2: restart-events.dat\n') 

225 df.write('# event file3: recording-events.dat\n') 

226 df.close() 

227 # write empty event files: 

228 for events in ['Recording', 'Restart', 'Stimulus']: 

229 df = open(os.path.join(filepath, f'{events.lower()}-events.dat'), 'w') 

230 df.write(f'# events: {events}\n\n') 

231 df.write('#Key\n') 

232 if events == 'Stimulus': 

233 df.write('# t duration\n') 

234 df.write('# sec s\n') 

235 df.write('# 1 2\n') 

236 else: 

237 df.write('# t\n') 

238 df.write('# sec\n') 

239 df.write('# 1\n') 

240 if events == 'Recording': 

241 df.write(' 0.0\n') 

242 df.close() 

243 # write metadata: 

244 if metadata: 

245 write_metadata_text(os.path.join(filepath, 'info.dat'), 

246 metadata, prefix='# ') 

247 return filename 

248 

249 

250def formats_fishgrid(): 

251 """Data format of the fishgrid file format. 

252 

253 Returns 

254 ------- 

255 formats: list of str 

256 List of supported file formats as strings. 

257 """ 

258 return ['FISHGRID'] 

259 

260 

261def encodings_fishgrid(format=None): 

262 """Encodings of the fishgrid file format. 

263 

264 Parameters 

265 ---------- 

266 format: str 

267 The file format. 

268 

269 Returns 

270 ------- 

271 encodings: list of str 

272 List of supported encodings as strings. 

273 """ 

274 if not format: 

275 format = 'FISHGRID' 

276 if format.upper() != 'FISHGRID': 

277 return [] 

278 else: 

279 return ['FLOAT'] 

280 

281 

282def write_fishgrid(filepath, data, rate, amax=1.0, unit=None, 

283 metadata=None, locs=None, labels=None, format=None, 

284 encoding=None): 

285 """Write data as fishgrid raw files. 

286 

287 Parameters 

288 ---------- 

289 filepath: str 

290 Full path of the folder where to write fishgrid files. 

291 data: 1-D or 2-D array of floats 

292 Array with the data (first index time, optional second index channel). 

293 rate: float 

294 Sampling rate of the data in Hertz. 

295 amax: float 

296 Maximum possible amplitude of the data in `unit`. 

297 unit: str 

298 Unit of the data. 

299 metadata: nested dict 

300 Additional metadata saved into the `fishgrid.cfg`. 

301 locs: None or 1-D or 2-D array of ints 

302 Marker positions (first column) and spans (optional second column) 

303 for each marker (rows). 

304 labels: None or 2-D array of string objects 

305 Labels (first column) and texts (optional second column) 

306 for each marker (rows). 

307 format: str or None 

308 File format, only None or 'FISHGRID' are supported. 

309 encoding: str or None 

310 Encoding of the data. Only None or 'FLOAT' are supported. 

311 

312 Returns 

313 ------- 

314 filepath: str or None 

315 On success, the actual file name used for writing the data. 

316 

317 Raises 

318 ------ 

319 ValueError 

320 Invalid `filepath`. 

321 ValueError 

322 File format or encoding not supported. 

323 """ 

324 def write_timestamp(df, count, index, span, rate, starttime, 

325 label, comment): 

326 datetime = starttime + dt.timedelta(seconds=index/rate) 

327 df.write(f' Num: {count}\n') 

328 df.write(f' Index1: {index}\n') 

329 #df.write(f' Index2: 0\n') 

330 #df.write(f' Index3: 0\n') 

331 #df.write(f' Index4: 0\n') 

332 if span > 0: 

333 df.write(f' Span1: {span}\n') 

334 df.write(f' Date: {datetime.date().isoformat()}\n') 

335 df.write(f' Time: {datetime.time().isoformat(timespec="seconds")}\n') 

336 if label: 

337 df.write(f' Label: {label}\n') 

338 df.write(f'Comment: {comment}\n') 

339 df.write('\n') 

340 

341 if not filepath: 

342 raise ValueError('no file specified!') 

343 if format is None: 

344 format = 'FISHGRID' 

345 if format.upper() != 'FISHGRID': 

346 raise ValueError(f'file format {format} not supported by fishgrid file format') 

347 if encoding is None: 

348 encoding = 'FLOAT' 

349 if encoding.upper() != 'FLOAT': 

350 raise ValueError(f'file encoding {format} not supported by fishgrid file format') 

351 if not os.path.exists(filepath): 

352 os.mkdir(filepath) 

353 # write data: 

354 with open(os.path.join(filepath, 'traces-grid1.raw'), 'wb') as df: 

355 df.write(data.astype(np.float32).tobytes()) 

356 # write metadata: 

357 if unit is None: 

358 unit = 'mV' 

359 cfgfilename = os.path.join(filepath, 'fishgrid.cfg') 

360 nchannels = data.shape[1] if data.ndim > 1 else 1 

361 ncols = int(np.ceil(np.sqrt(nchannels))) 

362 nrows = int(np.ceil(nchannels/ncols)) 

363 if 'FishGrid' in metadata: 

364 md = {} 

365 rmd = {} 

366 for k in metadata: 

367 if isinstance(metadata[k], dict): 

368 md[k] = deepcopy(metadata[k]) 

369 else: 

370 rmd[k] = metadata[k] 

371 if len(rmd) > 0: 

372 m, k = find_key(md, 'FishGrid.Recording') 

373 if k in m: 

374 m[k].update(rmd) 

375 else: 

376 m[k] = rmd 

377 else: 

378 smd = deepcopy(metadata) 

379 gm = dict(Used1='true', Columns1=f'{ncols}', Rows1=f'{nrows}') 

380 hm = {'DAQ board': dict()} 

381 if not move_metadata(smd, hm, 'Amplifier'): 

382 am = {} 

383 move_metadata(smd, am, ['Amplifier.Name', 'AmplName'], 'AmplName') 

384 move_metadata(smd, am, ['Amplifier.Model', 'AmplModel'], 'AmplModel') 

385 move_metadata(smd, am, 'Amplifier.Type') 

386 move_metadata(smd, am, 'Gain') 

387 move_metadata(smd, am, 'HighpassCutoff') 

388 move_metadata(smd, am, 'LowpassCutoff') 

389 if len(am) > 0: 

390 hm['Amplifier'] = am 

391 md = dict(FishGrid={'Grid 1': gm, 'Hardware Settings': hm}) 

392 move_metadata(smd, md['FishGrid'], 'Recording') 

393 gm = {} 

394 starttime = get_datetime(smd, remove=True) 

395 if not starttime is None: 

396 gm['StartDate'] = starttime.date().isoformat() 

397 gm['StartTime'] = starttime.time().isoformat(timespec='seconds') 

398 move_metadata(smd, gm, 'Location') 

399 move_metadata(smd, gm, 'Position') 

400 move_metadata(smd, gm, 'WaterTemperature') 

401 move_metadata(smd, gm, 'WaterConductivity') 

402 move_metadata(smd, gm, 'WaterpH') 

403 move_metadata(smd, gm, 'WaterOxygen') 

404 move_metadata(smd, gm, 'Temperature') 

405 move_metadata(smd, gm, 'Humidity') 

406 move_metadata(smd, gm, 'Pressure') 

407 move_metadata(smd, gm, 'Comment') 

408 move_metadata(smd, gm, 'Experimenter') 

409 if len(gm) > 0: 

410 if not 'Recording' in md['FishGrid']: 

411 md['FishGrid']['Recording'] = {} 

412 md['FishGrid']['Recording'].update({'General': gm}) 

413 bm = {} 

414 move_metadata(smd, bm, 'DataTime') 

415 move_metadata(smd, bm, 'DataInterval') 

416 move_metadata(smd, bm, 'BufferTime') 

417 move_metadata(smd, bm, 'BufferInterval') 

418 if len(bm) > 0: 

419 if not 'Recording' in md['FishGrid']: 

420 md['FishGrid']['Recording'] = {} 

421 md['FishGrid']['Recording'].update({'Buffers and timing': bm}) 

422 if smd: 

423 md['FishGrid']['Other'] = smd 

424 add_metadata(md, 

425 [f'FishGrid.Hardware Settings.DAQ board.AISampleRate={0.001*rate:.3f}kHz', 

426 f'FishGrid.Hardware Settings.DAQ board.AIMaxVolt={amax:g}{unit}']) 

427 with open(cfgfilename, 'w') as df: 

428 for k in md: 

429 df.write(f'*{k}\n') 

430 write_metadata_text(df, md[k], prefix=' ') 

431 # write markers: 

432 filename = os.path.join(filepath, 'timestamps.dat') 

433 starttime = get_datetime(metadata, (('DateTimeOriginal',), 

434 ('OriginationDate', 'OriginationTime'), 

435 ('StartDate', 'StartTime'), 

436 ('Location_Time',)), 

437 default=dt.datetime.fromtimestamp(0, dt.timezone.utc)) 

438 with open(filename, 'w') as df: 

439 count = 0 

440 write_timestamp(df, count, 0, 0, rate, starttime, 

441 '', 'begin of recording') 

442 count += 1 

443 if locs is not None: 

444 for i in range(len(locs)): 

445 label = '' 

446 comment = '' 

447 if labels is not None and len(labels) > i: 

448 label = labels[i,0] if labels.ndim > 1 else labels[i] 

449 comment = labels[i,1] if labels.ndim > 1 else '' 

450 index = locs[i,0] if locs.ndim > 1 else locs[i] 

451 span = locs[i,1] if locs.ndim > 1 else 0 

452 write_timestamp(df, count, index*nchannels, 

453 span*nchannels, rate, 

454 starttime, label, comment) 

455 count += 1 

456 write_timestamp(df, count, len(data)*nchannels, 0, rate, 

457 starttime, '', 'end of recording') 

458 return cfgfilename 

459 

460 

461def formats_pickle(): 

462 """Data formats supported by pickle.dump(). 

463 

464 Returns 

465 ------- 

466 formats: list of str 

467 List of supported file formats as strings. 

468 """ 

469 if not data_modules['pickle']: 

470 return [] 

471 else: 

472 return ['PKL'] 

473 

474 

475def encodings_pickle(format=None): 

476 """Encodings of the pickle format. 

477 

478 Parameters 

479 ---------- 

480 format: str 

481 The file format. 

482 

483 Returns 

484 ------- 

485 encodings: list of str 

486 List of supported encodings as strings. 

487 """ 

488 if not format: 

489 format = 'PKL' 

490 if format.upper() != 'PKL': 

491 return [] 

492 else: 

493 return ['PCM_16', 'PCM_32', 'FLOAT', 'DOUBLE'] 

494 

495 

496def write_pickle(filepath, data, rate, amax=1.0, unit=None, 

497 metadata=None, locs=None, labels=None, format=None, 

498 encoding=None): 

499 """Write data into python pickle file. 

500  

501 Documentation 

502 ------------- 

503 https://docs.python.org/3/library/pickle.html 

504 

505 Parameters 

506 ---------- 

507 filepath: str 

508 Full path and name of the file to write. 

509 data: 1-D or 2-D array of floats 

510 Array with the data (first index time, optional second index channel). 

511 Stored under the key "data". 

512 rate: float 

513 Sampling rate of the data in Hertz. 

514 Stored under the key "rate". 

515 amax: float 

516 Maximum possible amplitude of the data in `unit`. 

517 Stored under the key "amax". 

518 unit: str 

519 Unit of the data. 

520 Stored under the key "unit". 

521 metadata: nested dict 

522 Additional metadata saved into the pickle. 

523 Stored under the key "metadata". 

524 locs: None or 1-D or 2-D array of ints 

525 Marker positions (first column) and spans (optional second column) 

526 for each marker (rows). 

527 labels: None or 2-D array of string objects 

528 Labels (first column) and texts (optional second column) 

529 for each marker (rows). 

530 format: str or None 

531 File format, only None or 'PKL' are supported. 

532 encoding: str or None 

533 Encoding of the data. 

534 

535 Returns 

536 ------- 

537 filepath: str or None 

538 On success, the actual file name used for writing the data. 

539 

540 Raises 

541 ------ 

542 ImportError 

543 The pickle module is not available. 

544 ValueError 

545 Invalid `filepath`. 

546 ValueError 

547 File format or encoding not supported. 

548 """ 

549 if not data_modules['pickle']: 

550 raise ImportError 

551 if not filepath: 

552 raise ValueError('no file specified!') 

553 if format is None: 

554 format = 'PKL' 

555 if format.upper() != 'PKL': 

556 raise ValueError(f'file format {format} not supported by pickle file format') 

557 ext = os.path.splitext(filepath)[1] 

558 if len(ext) <= 1 or ext[1].upper() != 'P': 

559 filepath += os.extsep + 'pkl' 

560 if encoding is None: 

561 encoding = 'DOUBLE' 

562 encoding = encoding.upper() 

563 if not encoding in encodings_pickle(format): 

564 raise ValueError(f'file encoding {format} not supported by pickle file format') 

565 buffer = recode_array(data, amax, encoding) 

566 ddict = dict(data=buffer, rate=rate) 

567 ddict['amax'] = amax 

568 if unit: 

569 ddict['unit'] = unit 

570 if metadata: 

571 ddict['metadata'] = metadata 

572 if locs is not None and len(locs) > 0: 

573 if locs.ndim == 1: 

574 ddict['positions'] = locs 

575 else: 

576 ddict['positions'] = locs[:,0] 

577 if locs.shape[1] > 1: 

578 ddict['spans'] = locs[:,1] 

579 if labels is not None and len(labels) > 0: 

580 if labels.ndim == 1: 

581 ddict['labels'] = labels 

582 else: 

583 ddict['labels'] = labels[:,0] 

584 if labels.shape[1] > 1: 

585 ddict['descriptions'] = labels[:,1] 

586 with open(filepath, 'wb') as df: 

587 pickle.dump(ddict, df) 

588 return filepath 

589 

590 

591def insert_container_metadata(metadata, data_dict, metadatakey='metadata'): 

592 """Insert flattened metadata to data dictionary for a container file format. 

593 

594 Parameters 

595 ---------- 

596 metadata: nested dict 

597 Nested dictionary with key-value pairs of the meta data. 

598 data_dict: dict 

599 Dictionary of the data items contained in the container to 

600 which the metadata should be added. 

601 metadatakey: str or list of str 

602 Name of the variable holding the metadata. 

603 """ 

604 fmeta = flatten_metadata(metadata, True, sep='__') 

605 for k in list(fmeta): 

606 fmeta[metadatakey + '__' + k] = fmeta.pop(k) 

607 data_dict.update(fmeta) 

608 

609 

610def formats_numpy(): 

611 """Data formats supported by numpy.savez(). 

612 

613 Returns 

614 ------- 

615 formats: list of str 

616 List of supported file formats as strings. 

617 """ 

618 if not data_modules['numpy']: 

619 return [] 

620 else: 

621 return ['NPZ'] 

622 

623 

624def encodings_numpy(format=None): 

625 """Encodings of the numpy file format. 

626 

627 Parameters 

628 ---------- 

629 format: str 

630 The file format. 

631 

632 Returns 

633 ------- 

634 encodings: list of str 

635 List of supported encodings as strings. 

636 """ 

637 if not format: 

638 format = 'NPZ' 

639 if format.upper() != 'NPZ': 

640 return [] 

641 else: 

642 return ['PCM_16', 'PCM_32', 'FLOAT', 'DOUBLE'] 

643 

644 

645def write_numpy(filepath, data, rate, amax=1.0, unit=None, 

646 metadata=None, locs=None, labels=None, format=None, 

647 encoding=None): 

648 """Write data into numpy npz file. 

649  

650 Documentation 

651 ------------- 

652 https://numpy.org/doc/stable/reference/generated/numpy.savez.html 

653 

654 Parameters 

655 ---------- 

656 filepath: str 

657 Full path and name of the file to write. 

658 data: 1-D or 2-D array of floats 

659 Array with the data (first index time, optional second index channel). 

660 Stored under the key "data". 

661 rate: float 

662 Sampling rate of the data in Hertz. 

663 Stored under the key "rate". 

664 amax: float 

665 Maximum possible amplitude of the data in `unit`. 

666 Stored under the key "amax". 

667 unit: str 

668 Unit of the data. 

669 Stored under the key "unit". 

670 metadata: nested dict 

671 Additional metadata saved into the numpy file. 

672 Flattened dictionary entries stored under keys 

673 starting with "metadata__". 

674 locs: None or 1-D or 2-D array of ints 

675 Marker positions (first column) and spans (optional second column) 

676 for each marker (rows). 

677 labels: None or 2-D array of string objects 

678 Labels (first column) and texts (optional second column) 

679 for each marker (rows). 

680 format: str or None 

681 File format, only None or 'NPZ' are supported. 

682 encoding: str or None 

683 Encoding of the data. 

684 

685 Returns 

686 ------- 

687 filepath: str or None 

688 On success, the actual file name used for writing the data. 

689 

690 Raises 

691 ------ 

692 ImportError 

693 The numpy module is not available. 

694 ValueError 

695 Invalid `filepath`. 

696 ValueError 

697 File format or encoding not supported. 

698 """ 

699 if not data_modules['numpy']: 

700 raise ImportError 

701 if not filepath: 

702 raise ValueError('no file specified!') 

703 if format is None: 

704 format = 'NPZ' 

705 if format.upper() not in formats_numpy(): 

706 raise ValueError(f'file format {format} not supported by numpy file format') 

707 ext = os.path.splitext(filepath)[1] 

708 if len(ext) <= 1 or ext[1].upper() != 'N': 

709 filepath += os.extsep + 'npz' 

710 if encoding is None: 

711 encoding = 'DOUBLE' 

712 encoding = encoding.upper() 

713 if not encoding in encodings_numpy(format): 

714 raise ValueError(f'file encoding {format} not supported by numpy file format') 

715 buffer = recode_array(data, amax, encoding) 

716 ddict = dict(data=buffer, rate=rate) 

717 ddict['amax'] = amax 

718 if unit: 

719 ddict['unit'] = unit 

720 if metadata: 

721 insert_container_metadata(metadata, ddict, 'metadata') 

722 if locs is not None and len(locs) > 0: 

723 if locs.ndim == 1: 

724 ddict['positions'] = locs 

725 else: 

726 ddict['positions'] = locs[:,0] 

727 if locs.shape[1] > 1: 

728 ddict['spans'] = locs[:,1] 

729 if labels is not None and len(labels) > 0: 

730 if labels.ndim == 1: 

731 maxc = np.max([len(l) for l in labels]) 

732 ddict['labels'] = labels.astype(dtype=f'U{maxc}') 

733 else: 

734 maxc = np.max([len(l) for l in labels[:,0]]) 

735 ddict['labels'] = labels[:,0].astype(dtype=f'U{maxc}') 

736 if labels.shape[1] > 1: 

737 maxc = np.max([len(l) for l in labels[:,1]]) 

738 ddict['descriptions'] = labels[:,1].astype(dtype=f'U{maxc}') 

739 np.savez(filepath, **ddict) 

740 return filepath 

741 

742 

743def formats_mat(): 

744 """Data formats supported by scipy.io.savemat(). 

745 

746 Returns 

747 ------- 

748 formats: list of str 

749 List of supported file formats as strings. 

750 """ 

751 if not data_modules['scipy']: 

752 return [] 

753 else: 

754 return ['MAT'] 

755 

756 

757def encodings_mat(format=None): 

758 """Encodings of the matlab format. 

759 

760 Parameters 

761 ---------- 

762 format: str 

763 The file format. 

764 

765 Returns 

766 ------- 

767 encodings: list of str 

768 List of supported encodings as strings. 

769 """ 

770 if not format: 

771 format = 'MAT' 

772 if format.upper() != 'MAT': 

773 return [] 

774 else: 

775 return ['PCM_16', 'PCM_32', 'FLOAT', 'DOUBLE'] 

776 

777 

778def write_mat(filepath, data, rate, amax=1.0, unit=None, 

779 metadata=None, locs=None, labels=None, format=None, 

780 encoding=None): 

781 """Write data into matlab file. 

782  

783 Documentation 

784 ------------- 

785 https://docs.scipy.org/doc/scipy/reference/generated/scipy.io.savemat.html 

786 

787 Parameters 

788 ---------- 

789 filepath: str 

790 Full path and name of the file to write. 

791 Stored under the key "data". 

792 data: 1-D or 2-D array of floats 

793 Array with the data (first index time, optional second index channel). 

794 Stored under the key "data". 

795 rate: float 

796 Sampling rate of the data in Hertz. 

797 Stored under the key "rate". 

798 amax: float 

799 Maximum possible amplitude of the data in `unit`. 

800 Stored under the key "amax". 

801 unit: str 

802 Unit of the data. 

803 Stored under the key "unit". 

804 metadata: nested dict 

805 Additional metadata saved into the mat file. 

806 Stored under the key "metadata". 

807 locs: None or 1-D or 2-D array of ints 

808 Marker positions (first column) and spans (optional second column) 

809 for each marker (rows). 

810 labels: None or 2-D array of string objects 

811 Labels (first column) and texts (optional second column) 

812 for each marker (rows). 

813 format: str or None 

814 File format, only None or 'MAT' are supported. 

815 encoding: str or None 

816 Encoding of the data. 

817 

818 Returns 

819 ------- 

820 filepath: str or None 

821 On success, the actual file name used for writing the data. 

822 

823 Raises 

824 ------ 

825 ImportError 

826 The scipy.io module is not available. 

827 ValueError 

828 Invalid `filepath`. 

829 ValueError 

830 File format or encoding not supported. 

831 """ 

832 if not data_modules['scipy']: 

833 raise ImportError 

834 if not filepath: 

835 raise ValueError('no file specified!') 

836 if format is None: 

837 format = 'MAT' 

838 if format.upper() not in formats_mat(): 

839 raise ValueError(f'file format {format} not supported by matlab file format') 

840 ext = os.path.splitext(filepath)[1] 

841 if len(ext) <= 1 or ext[1].upper() != 'M': 

842 filepath += os.extsep + 'mat' 

843 if encoding is None: 

844 encoding = 'DOUBLE' 

845 encoding = encoding.upper() 

846 if not encoding in encodings_mat(format): 

847 raise ValueError(f'file encoding {format} not supported by matlab file format') 

848 buffer = recode_array(data, amax, encoding) 

849 ddict = dict(data=buffer, rate=rate) 

850 ddict['amax'] = amax 

851 if unit: 

852 ddict['unit'] = unit 

853 if metadata: 

854 insert_container_metadata(metadata, ddict, 'metadata') 

855 if locs is not None and len(locs) > 0: 

856 if locs.ndim == 1: 

857 ddict['positions'] = locs 

858 else: 

859 ddict['positions'] = locs[:,0] 

860 if locs.shape[1] > 1: 

861 ddict['spans'] = locs[:,1] 

862 if labels is not None and len(labels) > 0: 

863 if labels.ndim == 1: 

864 maxc = np.max([len(l) for l in labels]) 

865 ddict['labels'] = labels.astype(dtype=f'U{maxc}') 

866 else: 

867 maxc = np.max([len(l) for l in labels[:,0]]) 

868 ddict['labels'] = labels[:,0].astype(dtype=f'U{maxc}') 

869 if labels.shape[1] > 1: 

870 maxc = np.max([len(l) for l in labels[:,1]]) 

871 ddict['descriptions'] = labels[:,1].astype(dtype=f'U{maxc}') 

872 sio.savemat(filepath, ddict) 

873 return filepath 

874 

875 

876def formats_audioio(): 

877 """Data formats supported by audioio. 

878 

879 Returns 

880 ------- 

881 formats: list of str 

882 List of supported file formats as strings. 

883 """ 

884 if not data_modules['audioio']: 

885 return [] 

886 else: 

887 return aw.available_formats() 

888 

889 

890def encodings_audio(format): 

891 """Encodings of any audio format. 

892 

893 Parameters 

894 ---------- 

895 format: str 

896 The file format. 

897 

898 Returns 

899 ------- 

900 encodings: list of str 

901 List of supported encodings as strings. 

902 """ 

903 if not data_modules['audioio']: 

904 return [] 

905 else: 

906 return aw.available_encodings(format) 

907 

908 

909def write_audioio(filepath, data, rate, amax=1.0, unit=None, 

910 metadata=None, locs=None, labels=None, format=None, 

911 encoding=None, gainkey=default_gain_keys, sep='.'): 

912 """Write data into audio file. 

913 

914 If a gain setting is available in the metadata, then the data are divided 

915 by the gain before they are stored in the audio file. 

916 After this operation, the data values need to range between -1 and 1, 

917 in particular if the data are encoded as integers 

918 (i.e. PCM_16, PCM_32 and PCM_64). 

919 Note, that this function does not check for this requirement! 

920  

921 Documentation 

922 ------------- 

923 https://bendalab.github.io/audioio/ 

924 

925 Parameters 

926 ---------- 

927 filepath: str 

928 Full path and name of the file to write. 

929 data: 1-D or 2-D array of floats 

930 Array with the data (first index time, optional second index channel). 

931 rate: float 

932 Sampling rate of the data in Hertz. 

933 amax: float 

934 Maximum possible amplitude of the data in `unit`. 

935 unit: str 

936 Unit of the data. If supplied and a gain is found in the metadata it 

937 has to match the unit of the gain. If no gain is found in the metadata 

938 and metadata is not None, then a gain of one with this unit is added 

939 to the metadata using the first key in `gainkey`. 

940 metadata: nested dict 

941 Metadata saved into the audio file. If it contains a gain, 

942 the gain factor is used to divide the data down into a 

943 range between -1 and 1. 

944 locs: None or 1-D or 2-D array of ints 

945 Marker positions (first column) and spans (optional second column) 

946 for each marker (rows). 

947 labels: None or 2-D array of string objects 

948 Labels (first column) and texts (optional second column) 

949 for each marker (rows). 

950 format: str or None 

951 File format. If None deduce file format from filepath. 

952 See `available_formats()` for possible values. 

953 encoding: str or None 

954 Encoding of the data. See `available_encodings()` for possible values. 

955 If None or empty string use 'PCM_16'. 

956 gainkey: str or list of str 

957 Key in the file's metadata that holds some gain information. 

958 If found, the data will be multiplied with the gain, 

959 and if available, the corresponding unit is returned. 

960 See the [audioio.get_gain()](https://bendalab.github.io/audioio/api/audiometadata.html#audioio.audiometadata.get_gain) function for details. 

961 sep: str 

962 String that separates section names in `gainkey`. 

963 

964 Returns 

965 ------- 

966 filepath: str or None 

967 On success, the actual file name used for writing the data. 

968 

969 Raises 

970 ------ 

971 ImportError 

972 The audioio module is not available. 

973 ValueError 

974 Invalid `filepath` or `unit` does not match gain in metadata. 

975 """ 

976 if not data_modules['audioio']: 

977 raise ImportError 

978 if not filepath: 

979 raise ValueError('no file specified!') 

980 if amax is None or not np.isfinite(amax): 

981 amax, u = am.get_gain(metadata, gainkey, sep, 1.0, 'a.u.') 

982 if not unit: 

983 unit = u 

984 elif unit != 'a.u.' and u != 'a.u.' and unit != u: 

985 raise ValueError(f'unit "{unit}" does not match gain unit "{u}" in metadata') 

986 if amax != 1.0: 

987 data = data / amax 

988 if metadata is None: 

989 metadata = {} 

990 if unit == 'a.u.': 

991 unit = '' 

992 if not isinstance(gainkey, (list, tuple, np.ndarray)): 

993 gainkey = [gainkey,] 

994 gainkey.append('Gain') 

995 for gk in gainkey: 

996 m, k = am.find_key(metadata, gk) 

997 if k in m: 

998 m[k] = f'{amax:g}{unit}' 

999 break 

1000 else: 

1001 if 'INFO' in metadata: 

1002 metadata['INFO'][gainkey[0]] = f'{amax:g}{unit}' 

1003 else: 

1004 metadata[gainkey[0]] = f'{amax:g}{unit}' 

1005 aw.write_audio(filepath, data, rate, metadata, locs, labels) 

1006 return filepath 

1007 

1008 

1009data_formats_funcs = ( 

1010 ('relacs', None, formats_relacs), 

1011 ('fishgrid', None, formats_fishgrid), 

1012 ('pickle', 'pickle', formats_pickle), 

1013 ('numpy', 'numpy', formats_numpy), 

1014 ('matlab', 'scipy', formats_mat), 

1015 ('audio', 'audioio', formats_audioio) 

1016 ) 

1017"""List of implemented formats functions. 

1018 

1019Each element of the list is a tuple with the format's name, the 

1020module's name in `data_modules` or None, and the formats function. 

1021""" 

1022 

1023 

1024def available_formats(): 

1025 """Data and audio file formats supported by any of the installed modules. 

1026 

1027 Returns 

1028 ------- 

1029 formats: list of str 

1030 List of supported file formats as strings. 

1031 """ 

1032 formats = set() 

1033 for fmt, lib, formats_func in data_formats_funcs: 

1034 if not lib or data_modules[lib]: 

1035 formats |= set(formats_func()) 

1036 return sorted(list(formats)) 

1037 

1038 

1039data_encodings_funcs = ( 

1040 ('relacs', encodings_relacs), 

1041 ('fishgrid', encodings_fishgrid), 

1042 ('pickle', encodings_pickle), 

1043 ('numpy', encodings_numpy), 

1044 ('matlab', encodings_mat), 

1045 ('audio', encodings_audio) 

1046 ) 

1047""" List of implemented encodings functions. 

1048 

1049Each element of the list is a tuple with the module's name and the encodings function. 

1050""" 

1051 

1052 

1053def available_encodings(format): 

1054 """Encodings of a data file format. 

1055 

1056 Parameters 

1057 ---------- 

1058 format: str 

1059 The file format. 

1060 

1061 Returns 

1062 ------- 

1063 encodings: list of str 

1064 List of supported encodings as strings. 

1065 """ 

1066 for module, encodings_func in data_encodings_funcs: 

1067 encs = encodings_func(format) 

1068 if len(encs) > 0: 

1069 return encs 

1070 return [] 

1071 

1072 

1073data_writer_funcs = { 

1074 'relacs': write_relacs, 

1075 'fishgrid': write_fishgrid, 

1076 'pickle': write_pickle, 

1077 'numpy': write_numpy, 

1078 'matlab': write_mat, 

1079 'audio': write_audioio 

1080 } 

1081"""Dictionary of implemented write functions. 

1082 

1083Keys are the format's name and values the corresponding write 

1084function. 

1085""" 

1086 

1087 

1088def write_data(filepath, data, rate, amax=1.0, unit=None, 

1089 metadata=None, locs=None, labels=None, format=None, 

1090 encoding=None, verbose=0, **kwargs): 

1091 """Write data into a file. 

1092 

1093 Parameters 

1094 ---------- 

1095 filepath: str 

1096 Full path and name of the file to write. 

1097 File format is determined from extension. 

1098 data: 1-D or 2-D array of floats 

1099 Array with the data (first index time, second index channel). 

1100 rate: float 

1101 Sampling rate of the data in Hertz. 

1102 amax: float 

1103 Maximum possible amplitude of the data in `unit`. 

1104 unit: str 

1105 Unit of the data. 

1106 metadata: nested dict 

1107 Additional metadata. 

1108 locs: None or 1-D or 2-D array of ints 

1109 Marker positions (first column) and spans (optional second column) 

1110 for each marker (rows). 

1111 labels: None or 2-D array of string objects 

1112 Labels (first column) and texts (optional second column) 

1113 for each marker (rows). 

1114 format: str or None 

1115 File format. If None deduce file format from filepath. 

1116 See `available_formats()` for possible values. 

1117 encoding: str or None 

1118 Encoding of the data. See `available_encodings()` for possible values. 

1119 If None or empty string use 'PCM_16'. 

1120 verbose: int 

1121 If >0 show detailed error/warning messages. 

1122 kwargs: dict 

1123 Additional, file format specific keyword arguments. 

1124 

1125 Returns 

1126 ------- 

1127 filepath: str or None 

1128 On success, the actual file name used for writing the data. 

1129 

1130 Raises 

1131 ------ 

1132 ValueError 

1133 `filepath` is empty string or unspecified format. 

1134 IOError 

1135 Requested file format not supported. 

1136 

1137 Example 

1138 ------- 

1139 ``` 

1140 import numpy as np 

1141 from thunderlab.datawriter import write_data 

1142  

1143 rate = 28000.0 

1144 freq = 800.0 

1145 time = np.arange(0.0, 1.0, 1/rate) # one second 

1146 data = 2.5*np.sin(2.0*np.p*freq*time) # 800Hz sine wave 

1147 md = dict(Artist='underscore_') # metadata 

1148 write_data('audio/file.npz', data, rate, 'mV', md) 

1149 ``` 

1150 """ 

1151 if not filepath: 

1152 raise ValueError('no file specified!') 

1153 if not format: 

1154 format = format_from_extension(filepath) 

1155 if not format: 

1156 raise ValueError('unspecified file format') 

1157 for fmt, lib, formats_func in data_formats_funcs: 

1158 if lib and not data_modules[lib]: 

1159 continue 

1160 if format.upper() in formats_func(): 

1161 writer_func = data_writer_funcs[fmt] 

1162 filepath = writer_func(filepath, data, rate, amax, 

1163 unit, metadata, locs, labels, 

1164 format=format, encoding=encoding, 

1165 **kwargs) 

1166 if verbose > 0: 

1167 print(f'wrote data to file "{filepath}" using {fmt} format') 

1168 if verbose > 1: 

1169 print(f' sampling rate: {rate:g}Hz') 

1170 print(f' channels : {data.shape[1] if len(data.shape) > 1 else 1}') 

1171 print(f' frames : {len(data)}') 

1172 print(f' range : {amax:g}{unit}') 

1173 return filepath 

1174 raise IOError(f'file format "{format.upper()}" not supported.') 

1175 

1176 

1177def demo(file_path, channels=2, format=None): 

1178 """Demo of the datawriter functions. 

1179 

1180 Parameters 

1181 ---------- 

1182 file_path: str 

1183 File path of a data file. 

1184 format: str or None 

1185 File format to be used. 

1186 """ 

1187 print('generate data ...') 

1188 rate = 44100.0 

1189 t = np.arange(0.0, 1.0, 1.0/rate) 

1190 data = np.zeros((len(t), channels)) 

1191 for c in range(channels): 

1192 data[:,c] = 0.1*(channels-c)*np.sin(2.0*np.pi*(440.0+c*8.0)*t) 

1193 

1194 print(f"write_data('{file_path}') ...") 

1195 write_data(file_path, data, rate, 1.0, 'mV', format=format, verbose=2) 

1196 

1197 print('done.') 

1198 

1199 

1200def main(*cargs): 

1201 """Call demo with command line arguments. 

1202 

1203 Parameters 

1204 ---------- 

1205 cargs: list of str 

1206 Command line arguments as provided by sys.argv[1:] 

1207 """ 

1208 import argparse 

1209 parser = argparse.ArgumentParser(description= 

1210 'Checking thunderlab.datawriter module.') 

1211 parser.add_argument('-c', dest='channels', default=2, type=int, 

1212 help='number of channels to be written') 

1213 parser.add_argument('-f', dest='format', default=None, type=str, 

1214 help='file format') 

1215 parser.add_argument('file', nargs=1, default='test.npz', type=str, 

1216 help='name of data file') 

1217 args = parser.parse_args(cargs) 

1218 demo(args.file[0], args.channels, args.format) 

1219 

1220 

1221if __name__ == "__main__": 

1222 main(*sys.argv[1:]) 

1223 

1224 

1225