Coverage for src/thunderlab/datawriter.py: 87%

1"""Writing numpy arrays of floats to data files.

3- `write_data()`: write data into a file.

4- `available_formats()`: supported data and audio file formats.

5- `available_encodings()`: encodings of a data file format.

6- `format_from_extension()`: deduce data file format from file extension.

7- `recode_array()`: recode array of floats.

8- `insert_container_metadata()`: insert flattened metadata to data dictionary for a container file format.

9"""

11import os

12import sys

13import datetime as dt

14from copy import deepcopy

15from audioio import find_key, add_metadata, move_metadata

16from audioio import get_datetime, default_gain_keys

18data_modules = {}

19"""Dictionary with availability of various modules needed for writing data.

20Keys are the module names, values are booleans.

21"""

23try:

24 import pickle

25 data_modules['pickle'] = True

26except ImportError:

27 data_modules['pickle'] = False

29try:

30 import numpy as np

31 data_modules['numpy'] = True

32except ImportError:

33 data_modules['numpy'] = False

35try:

36 import scipy.io as sio

37 data_modules['scipy'] = True

38except ImportError:

39 data_modules['scipy'] = False

41try:

42 import audioio.audiowriter as aw

43 import audioio.audiometadata as am

44 from audioio import write_metadata_text, flatten_metadata

45 data_modules['audioio'] = True

46except ImportError:

47 data_modules['audioio'] = False

50def format_from_extension(filepath):

51 """Deduce data file format from file extension.

53 Parameters

54 ----------

55 filepath: str

56 Name of the data file.

58 Returns

59 -------

60 format: str

61 Data format deduced from file extension.

62 """

63 if not filepath:

64 return None

65 ext = os.path.splitext(filepath)[1]

66 if not ext:

67 return None

68 if ext[0] == '.':

69 ext = ext[1:]

70 if not ext:

71 return None

72 ext = ext.upper()

73 if data_modules['audioio']:

74 ext = aw.format_from_extension(filepath)

75 return ext

78def recode_array(data, amax, encoding):

79 """Recode array of floats.

81 Parameters

82 ----------

83 data: array of floats

84 Data array with values ranging between -1 and 1

85 amax: float

86 Maximum amplitude of data range.

87 encoding: str

88 Encoding, one of PCM_16, PCM_32, PCM_64, FLOAT or DOUBLE.

90 Returns

91 -------

92 buffer: array

93 The data recoded according to `encoding`.

94 """

96 encodings = {'PCM_16': (2, 'i2'),

97 'PCM_32': (4, 'i4'),

98 'PCM_64': (8, 'i8'),

99 'FLOAT': (4, 'f'),

100 'DOUBLE': (8, 'd')}

101

102 if not encoding in encodings:

103 return data

104 dtype = encodings[encoding][1]

105 if dtype[0] == 'i':

106 sampwidth = encodings[encoding][0]

107 factor = 2**(sampwidth*8-1)

108 buffer = np.round(data/amax*factor).astype(dtype)

109 buffer[data >= +amax] = factor - 1

110 buffer[data <= -amax] = -(factor - 1)

111 else:

112 buffer = data.astype(dtype, copy=False)

113 return buffer

114

115

116def formats_relacs():

117 """Data format of the relacs file format.

118

119 Returns

120 -------

121 formats: list of str

122 List of supported file formats as strings.

123 """

124 return ['RELACS']

125

126

127def encodings_relacs(format=None):

128 """Encodings of the relacs file format.

129

130 Parameters

131 ----------

132 format: str

133 The file format.

134

135 Returns

136 -------

137 encodings: list of str

138 List of supported encodings as strings.

139 """

140 if not format:

141 format = 'RELACS'

142 if format.upper() != 'RELACS':

143 return []

144 else:

145 return ['FLOAT']

146

147

148def write_relacs(filepath, data, rate, amax=1.0, unit=None,

149 metadata=None, locs=None, labels=None, format=None,

150 encoding=None):

151 """Write data as relacs raw files.

152

153 Parameters

154 ----------

155 filepath: str

156 Full path of folder where to write relacs files.

157 data: 1-D or 2-D array of floats

158 Array with the data (first index time, optional second index channel).

159 rate: float

160 Sampling rate of the data in Hertz.

161 amax: float

162 Maximum possible amplitude of the data in `unit`.

163 unit: str

164 Unit of the data.

165 metadata: nested dict

166 Additional metadata saved into `info.dat`.

167 locs: None or 1-D or 2-D array of ints

168 Marker positions (first column) and spans (optional second column)

169 for each marker (rows).

170 labels: None or 2-D array of string objects

171 Labels (first column) and texts (optional second column)

172 for each marker (rows).

173 format: str or None

174 File format, only None or 'RELACS' are supported.

175 encoding: str or None

176 Encoding of the data. Only None or 'FLOAT' are supported.

177

178 Returns

179 -------

180 filepath: str or None

181 On success, the actual file name used for writing the data.

182

183 Raises

184 ------

185 ValueError

186 Invalid `filepath`.

187 ValueError

188 File format or encoding not supported.

189 """

190 if not filepath:

191 raise ValueError('no file specified!')

192 if format is None:

193 format = 'RELACS'

194 if format.upper() != 'RELACS':

195 raise ValueError(f'file format {format} not supported by relacs file format')

196 if encoding is None:

197 encoding = 'FLOAT'

198 if encoding.upper() != 'FLOAT':

199 raise ValueError(f'file encoding {format} not supported by relacs file format')

200 if not os.path.exists(filepath):

201 os.mkdir(filepath)

202 # write data:

203 if data.ndim == 1:

204 with open(os.path.join(filepath, f'trace-1.raw'), 'wb') as df:

205 df.write(data.astype(np.float32).tobytes())

206 else:

207 for c in range(data.shape[1]):

208 with open(os.path.join(filepath, f'trace-{c+1}.raw'), 'wb') as df:

209 df.write(data[:,c].astype(np.float32).tobytes())

210 if unit is None:

211 unit = 'V'

212 # write data format:

213 filename = os.path.join(filepath, 'stimuli.dat')

214 df = open(filename, 'w')

215 df.write('# analog input traces:\n')

216 for c in range(data.shape[1] if data.ndim > 1 else 1):

217 df.write(f'# identifier{c+1} : V-{c+1}\n')

218 df.write(f'# data file{c+1} : trace-{ c+1} .raw\n')

219 df.write(f'# sample interval{c+1} : {1000.0/rate:.4f}ms\n')

220 df.write(f'# sampling rate{c+1} : {rate:.2f}Hz\n')

221 df.write(f'# unit{c+1} : {unit}\n')

222 df.write('# event lists:\n')

223 df.write('# event file1: stimulus-events.dat\n')

224 df.write('# event file2: restart-events.dat\n')

225 df.write('# event file3: recording-events.dat\n')

226 df.close()

227 # write empty event files:

228 for events in ['Recording', 'Restart', 'Stimulus']:

229 df = open(os.path.join(filepath, f'{events.lower()}-events.dat'), 'w')

230 df.write(f'# events: {events}\n\n')

231 df.write('#Key\n')

232 if events == 'Stimulus':

233 df.write('# t duration\n')

234 df.write('# sec s\n')

235 df.write('# 1 2\n')

236 else:

237 df.write('# t\n')

238 df.write('# sec\n')

239 df.write('# 1\n')

240 if events == 'Recording':

241 df.write(' 0.0\n')

242 df.close()

243 # write metadata:

244 if metadata:

245 write_metadata_text(os.path.join(filepath, 'info.dat'),

246 metadata, prefix='# ')

247 return filename

248

249

250def formats_fishgrid():

251 """Data format of the fishgrid file format.

252

253 Returns

254 -------

255 formats: list of str

256 List of supported file formats as strings.

257 """

258 return ['FISHGRID']

259

260

261def encodings_fishgrid(format=None):

262 """Encodings of the fishgrid file format.

263

264 Parameters

265 ----------

266 format: str

267 The file format.

268

269 Returns

270 -------

271 encodings: list of str

272 List of supported encodings as strings.

273 """

274 if not format:

275 format = 'FISHGRID'

276 if format.upper() != 'FISHGRID':

277 return []

278 else:

279 return ['FLOAT']

280

281

282def write_fishgrid(filepath, data, rate, amax=1.0, unit=None,

283 metadata=None, locs=None, labels=None, format=None,

284 encoding=None):

285 """Write data as fishgrid raw files.

286

287 Parameters

288 ----------

289 filepath: str

290 Full path of the folder where to write fishgrid files.

291 data: 1-D or 2-D array of floats

292 Array with the data (first index time, optional second index channel).

293 rate: float

294 Sampling rate of the data in Hertz.

295 amax: float

296 Maximum possible amplitude of the data in `unit`.

297 unit: str

298 Unit of the data.

299 metadata: nested dict

300 Additional metadata saved into the `fishgrid.cfg`.

301 locs: None or 1-D or 2-D array of ints

302 Marker positions (first column) and spans (optional second column)

303 for each marker (rows).

304 labels: None or 2-D array of string objects

305 Labels (first column) and texts (optional second column)

306 for each marker (rows).

307 format: str or None

308 File format, only None or 'FISHGRID' are supported.

309 encoding: str or None

310 Encoding of the data. Only None or 'FLOAT' are supported.

311

312 Returns

313 -------

314 filepath: str or None

315 On success, the actual file name used for writing the data.

316

317 Raises

318 ------

319 ValueError

320 Invalid `filepath`.

321 ValueError

322 File format or encoding not supported.

323 """

324 def write_timestamp(df, count, index, span, rate, starttime,

325 label, comment):

326 datetime = starttime + dt.timedelta(seconds=index/rate)

327 df.write(f' Num: {count}\n')

328 df.write(f' Index1: {index}\n')

329 #df.write(f' Index2: 0\n')

330 #df.write(f' Index3: 0\n')

331 #df.write(f' Index4: 0\n')

332 if span > 0:

333 df.write(f' Span1: {span}\n')

334 df.write(f' Date: {datetime.date().isoformat()}\n')

335 df.write(f' Time: {datetime.time().isoformat(timespec="seconds")}\n')

336 if label:

337 df.write(f' Label: {label}\n')

338 df.write(f'Comment: {comment}\n')

339 df.write('\n')

340

341 if not filepath:

342 raise ValueError('no file specified!')

343 if format is None:

344 format = 'FISHGRID'

345 if format.upper() != 'FISHGRID':

346 raise ValueError(f'file format {format} not supported by fishgrid file format')

347 if encoding is None:

348 encoding = 'FLOAT'

349 if encoding.upper() != 'FLOAT':

350 raise ValueError(f'file encoding {format} not supported by fishgrid file format')

351 if not os.path.exists(filepath):

352 os.mkdir(filepath)

353 # write data:

354 with open(os.path.join(filepath, 'traces-grid1.raw'), 'wb') as df:

355 df.write(data.astype(np.float32).tobytes())

356 # write metadata:

357 if unit is None:

358 unit = 'mV'

359 cfgfilename = os.path.join(filepath, 'fishgrid.cfg')

360 nchannels = data.shape[1] if data.ndim > 1 else 1

361 ncols = int(np.ceil(np.sqrt(nchannels)))

362 nrows = int(np.ceil(nchannels/ncols))

363 if 'FishGrid' in metadata:

364 md = {}

365 rmd = {}

366 for k in metadata:

367 if isinstance(metadata[k], dict):

368 md[k] = deepcopy(metadata[k])

369 else:

370 rmd[k] = metadata[k]

371 if len(rmd) > 0:

372 m, k = find_key(md, 'FishGrid.Recording')

373 if k in m:

374 m[k].update(rmd)

375 else:

376 m[k] = rmd

377 else:

378 smd = deepcopy(metadata)

379 gm = dict(Used1='true', Columns1=f'{ncols}', Rows1=f'{nrows}')

380 hm = {'DAQ board': dict()}

381 if not move_metadata(smd, hm, 'Amplifier'):

382 am = {}

383 move_metadata(smd, am, ['Amplifier.Name', 'AmplName'], 'AmplName')

384 move_metadata(smd, am, ['Amplifier.Model', 'AmplModel'], 'AmplModel')

385 move_metadata(smd, am, 'Amplifier.Type')

386 move_metadata(smd, am, 'Gain')

387 move_metadata(smd, am, 'HighpassCutoff')

388 move_metadata(smd, am, 'LowpassCutoff')

389 if len(am) > 0:

390 hm['Amplifier'] = am

391 md = dict(FishGrid={'Grid 1': gm, 'Hardware Settings': hm})

392 move_metadata(smd, md['FishGrid'], 'Recording')

393 gm = {}

394 starttime = get_datetime(smd, remove=True)

395 if not starttime is None:

396 gm['StartDate'] = starttime.date().isoformat()

397 gm['StartTime'] = starttime.time().isoformat(timespec='seconds')

398 move_metadata(smd, gm, 'Location')

399 move_metadata(smd, gm, 'Position')

400 move_metadata(smd, gm, 'WaterTemperature')

401 move_metadata(smd, gm, 'WaterConductivity')

402 move_metadata(smd, gm, 'WaterpH')

403 move_metadata(smd, gm, 'WaterOxygen')

404 move_metadata(smd, gm, 'Temperature')

405 move_metadata(smd, gm, 'Humidity')

406 move_metadata(smd, gm, 'Pressure')

407 move_metadata(smd, gm, 'Comment')

408 move_metadata(smd, gm, 'Experimenter')

409 if len(gm) > 0:

410 if not 'Recording' in md['FishGrid']:

411 md['FishGrid']['Recording'] = {}

412 md['FishGrid']['Recording'].update({'General': gm})

413 bm = {}

414 move_metadata(smd, bm, 'DataTime')

415 move_metadata(smd, bm, 'DataInterval')

416 move_metadata(smd, bm, 'BufferTime')

417 move_metadata(smd, bm, 'BufferInterval')

418 if len(bm) > 0:

419 if not 'Recording' in md['FishGrid']:

420 md['FishGrid']['Recording'] = {}

421 md['FishGrid']['Recording'].update({'Buffers and timing': bm})

422 if smd:

423 md['FishGrid']['Other'] = smd

424 add_metadata(md,

425 [f'FishGrid.Hardware Settings.DAQ board.AISampleRate={0.001*rate:.3f}kHz',

426 f'FishGrid.Hardware Settings.DAQ board.AIMaxVolt={amax:g}{unit}'])

427 with open(cfgfilename, 'w') as df:

428 for k in md:

429 df.write(f'*{k}\n')

430 write_metadata_text(df, md[k], prefix=' ')

431 # write markers:

432 filename = os.path.join(filepath, 'timestamps.dat')

433 starttime = get_datetime(metadata, (('DateTimeOriginal',),

434 ('OriginationDate', 'OriginationTime'),

435 ('StartDate', 'StartTime'),

436 ('Location_Time',)),

437 default=dt.datetime.fromtimestamp(0, dt.timezone.utc))

438 with open(filename, 'w') as df:

439 count = 0

440 write_timestamp(df, count, 0, 0, rate, starttime,

441 '', 'begin of recording')

442 count += 1

443 if locs is not None:

444 for i in range(len(locs)):

445 label = ''

446 comment = ''

447 if labels is not None and len(labels) > i:

448 label = labels[i,0] if labels.ndim > 1 else labels[i]

449 comment = labels[i,1] if labels.ndim > 1 else ''

450 index = locs[i,0] if locs.ndim > 1 else locs[i]

451 span = locs[i,1] if locs.ndim > 1 else 0

452 write_timestamp(df, count, index*nchannels,

453 span*nchannels, rate,

454 starttime, label, comment)

455 count += 1

456 write_timestamp(df, count, len(data)*nchannels, 0, rate,

457 starttime, '', 'end of recording')

458 return cfgfilename

459

460

461def formats_pickle():

462 """Data formats supported by pickle.dump().

463

464 Returns

465 -------

466 formats: list of str

467 List of supported file formats as strings.

468 """

469 if not data_modules['pickle']:

470 return []

471 else:

472 return ['PKL']

473

474

475def encodings_pickle(format=None):

476 """Encodings of the pickle format.

477

478 Parameters

479 ----------

480 format: str

481 The file format.

482

483 Returns

484 -------

485 encodings: list of str

486 List of supported encodings as strings.

487 """

488 if not format:

489 format = 'PKL'

490 if format.upper() != 'PKL':

491 return []

492 else:

493 return ['PCM_16', 'PCM_32', 'FLOAT', 'DOUBLE']

494

495

496def write_pickle(filepath, data, rate, amax=1.0, unit=None,

497 metadata=None, locs=None, labels=None, format=None,

498 encoding=None):

499 """Write data into python pickle file.

500

501 Documentation

502 -------------

503 https://docs.python.org/3/library/pickle.html

504

505 Parameters

506 ----------

507 filepath: str

508 Full path and name of the file to write.

509 data: 1-D or 2-D array of floats

510 Array with the data (first index time, optional second index channel).

511 Stored under the key "data".

512 rate: float

513 Sampling rate of the data in Hertz.

514 Stored under the key "rate".

515 amax: float

516 Maximum possible amplitude of the data in `unit`.

517 Stored under the key "amax".

518 unit: str

519 Unit of the data.

520 Stored under the key "unit".

521 metadata: nested dict

522 Additional metadata saved into the pickle.

523 Stored under the key "metadata".

524 locs: None or 1-D or 2-D array of ints

525 Marker positions (first column) and spans (optional second column)

526 for each marker (rows).

527 labels: None or 2-D array of string objects

528 Labels (first column) and texts (optional second column)

529 for each marker (rows).

530 format: str or None

531 File format, only None or 'PKL' are supported.

532 encoding: str or None

533 Encoding of the data.

534

535 Returns

536 -------

537 filepath: str or None

538 On success, the actual file name used for writing the data.

539

540 Raises

541 ------

542 ImportError

543 The pickle module is not available.

544 ValueError

545 Invalid `filepath`.

546 ValueError

547 File format or encoding not supported.

548 """

549 if not data_modules['pickle']:

550 raise ImportError

551 if not filepath:

552 raise ValueError('no file specified!')

553 if format is None:

554 format = 'PKL'

555 if format.upper() != 'PKL':

556 raise ValueError(f'file format {format} not supported by pickle file format')

557 ext = os.path.splitext(filepath)[1]

558 if len(ext) <= 1 or ext[1].upper() != 'P':

559 filepath += os.extsep + 'pkl'

560 if encoding is None:

561 encoding = 'DOUBLE'

562 encoding = encoding.upper()

563 if not encoding in encodings_pickle(format):

564 raise ValueError(f'file encoding {format} not supported by pickle file format')

565 buffer = recode_array(data, amax, encoding)

566 ddict = dict(data=buffer, rate=rate)

567 ddict['amax'] = amax

568 if unit:

569 ddict['unit'] = unit

570 if metadata:

571 ddict['metadata'] = metadata

572 if locs is not None and len(locs) > 0:

573 if locs.ndim == 1:

574 ddict['positions'] = locs

575 else:

576 ddict['positions'] = locs[:,0]

577 if locs.shape[1] > 1:

578 ddict['spans'] = locs[:,1]

579 if labels is not None and len(labels) > 0:

580 if labels.ndim == 1:

581 ddict['labels'] = labels

582 else:

583 ddict['labels'] = labels[:,0]

584 if labels.shape[1] > 1:

585 ddict['descriptions'] = labels[:,1]

586 with open(filepath, 'wb') as df:

587 pickle.dump(ddict, df)

588 return filepath

589

590

591def insert_container_metadata(metadata, data_dict, metadatakey='metadata'):

592 """Insert flattened metadata to data dictionary for a container file format.

593

594 Parameters

595 ----------

596 metadata: nested dict

597 Nested dictionary with key-value pairs of the meta data.

598 data_dict: dict

599 Dictionary of the data items contained in the container to

600 which the metadata should be added.

601 metadatakey: str or list of str

602 Name of the variable holding the metadata.

603 """

604 fmeta = flatten_metadata(metadata, True, sep='__')

605 for k in list(fmeta):

606 fmeta[metadatakey + '__' + k] = fmeta.pop(k)

607 data_dict.update(fmeta)

608

609

610def formats_numpy():

611 """Data formats supported by numpy.savez().

612

613 Returns

614 -------

615 formats: list of str

616 List of supported file formats as strings.

617 """

618 if not data_modules['numpy']:

619 return []

620 else:

621 return ['NPZ']

622

623

624def encodings_numpy(format=None):

625 """Encodings of the numpy file format.

626

627 Parameters

628 ----------

629 format: str

630 The file format.

631

632 Returns

633 -------

634 encodings: list of str

635 List of supported encodings as strings.

636 """

637 if not format:

638 format = 'NPZ'

639 if format.upper() != 'NPZ':

640 return []

641 else:

642 return ['PCM_16', 'PCM_32', 'FLOAT', 'DOUBLE']

643

644

645def write_numpy(filepath, data, rate, amax=1.0, unit=None,

646 metadata=None, locs=None, labels=None, format=None,

647 encoding=None):

648 """Write data into numpy npz file.

649

650 Documentation

651 -------------

652 https://numpy.org/doc/stable/reference/generated/numpy.savez.html

653

654 Parameters

655 ----------

656 filepath: str

657 Full path and name of the file to write.

658 data: 1-D or 2-D array of floats

659 Array with the data (first index time, optional second index channel).

660 Stored under the key "data".

661 rate: float

662 Sampling rate of the data in Hertz.

663 Stored under the key "rate".

664 amax: float

665 Maximum possible amplitude of the data in `unit`.

666 Stored under the key "amax".

667 unit: str

668 Unit of the data.

669 Stored under the key "unit".

670 metadata: nested dict

671 Additional metadata saved into the numpy file.

672 Flattened dictionary entries stored under keys

673 starting with "metadata__".

674 locs: None or 1-D or 2-D array of ints

675 Marker positions (first column) and spans (optional second column)

676 for each marker (rows).

677 labels: None or 2-D array of string objects

678 Labels (first column) and texts (optional second column)

679 for each marker (rows).

680 format: str or None

681 File format, only None or 'NPZ' are supported.

682 encoding: str or None

683 Encoding of the data.

684

685 Returns

686 -------

687 filepath: str or None

688 On success, the actual file name used for writing the data.

689

690 Raises

691 ------

692 ImportError

693 The numpy module is not available.

694 ValueError

695 Invalid `filepath`.

696 ValueError

697 File format or encoding not supported.

698 """

699 if not data_modules['numpy']:

700 raise ImportError

701 if not filepath:

702 raise ValueError('no file specified!')

703 if format is None:

704 format = 'NPZ'

705 if format.upper() not in formats_numpy():

706 raise ValueError(f'file format {format} not supported by numpy file format')

707 ext = os.path.splitext(filepath)[1]

708 if len(ext) <= 1 or ext[1].upper() != 'N':

709 filepath += os.extsep + 'npz'

710 if encoding is None:

711 encoding = 'DOUBLE'

712 encoding = encoding.upper()

713 if not encoding in encodings_numpy(format):

714 raise ValueError(f'file encoding {format} not supported by numpy file format')

715 buffer = recode_array(data, amax, encoding)

716 ddict = dict(data=buffer, rate=rate)

717 ddict['amax'] = amax

718 if unit:

719 ddict['unit'] = unit

720 if metadata:

721 insert_container_metadata(metadata, ddict, 'metadata')

722 if locs is not None and len(locs) > 0:

723 if locs.ndim == 1:

724 ddict['positions'] = locs

725 else:

726 ddict['positions'] = locs[:,0]

727 if locs.shape[1] > 1:

728 ddict['spans'] = locs[:,1]

729 if labels is not None and len(labels) > 0:

730 if labels.ndim == 1:

731 maxc = np.max([len(l) for l in labels])

732 ddict['labels'] = labels.astype(dtype=f'U{maxc}')

733 else:

734 maxc = np.max([len(l) for l in labels[:,0]])

735 ddict['labels'] = labels[:,0].astype(dtype=f'U{maxc}')

736 if labels.shape[1] > 1:

737 maxc = np.max([len(l) for l in labels[:,1]])

738 ddict['descriptions'] = labels[:,1].astype(dtype=f'U{maxc}')

739 np.savez(filepath, **ddict)

740 return filepath

741

742

743def formats_mat():

744 """Data formats supported by scipy.io.savemat().

745

746 Returns

747 -------

748 formats: list of str

749 List of supported file formats as strings.

750 """

751 if not data_modules['scipy']:

752 return []

753 else:

754 return ['MAT']

755

756

757def encodings_mat(format=None):

758 """Encodings of the matlab format.

759

760 Parameters

761 ----------

762 format: str

763 The file format.

764

765 Returns

766 -------

767 encodings: list of str

768 List of supported encodings as strings.

769 """

770 if not format:

771 format = 'MAT'

772 if format.upper() != 'MAT':

773 return []

774 else:

775 return ['PCM_16', 'PCM_32', 'FLOAT', 'DOUBLE']

776

777

778def write_mat(filepath, data, rate, amax=1.0, unit=None,

779 metadata=None, locs=None, labels=None, format=None,

780 encoding=None):

781 """Write data into matlab file.

782

783 Documentation

784 -------------

785 https://docs.scipy.org/doc/scipy/reference/generated/scipy.io.savemat.html

786

787 Parameters

788 ----------

789 filepath: str

790 Full path and name of the file to write.

791 Stored under the key "data".

792 data: 1-D or 2-D array of floats

793 Array with the data (first index time, optional second index channel).

794 Stored under the key "data".

795 rate: float

796 Sampling rate of the data in Hertz.

797 Stored under the key "rate".

798 amax: float

799 Maximum possible amplitude of the data in `unit`.

800 Stored under the key "amax".

801 unit: str

802 Unit of the data.

803 Stored under the key "unit".

804 metadata: nested dict

805 Additional metadata saved into the mat file.

806 Stored under the key "metadata".

807 locs: None or 1-D or 2-D array of ints

808 Marker positions (first column) and spans (optional second column)

809 for each marker (rows).

810 labels: None or 2-D array of string objects

811 Labels (first column) and texts (optional second column)

812 for each marker (rows).

813 format: str or None

814 File format, only None or 'MAT' are supported.

815 encoding: str or None

816 Encoding of the data.

817

818 Returns

819 -------

820 filepath: str or None

821 On success, the actual file name used for writing the data.

822

823 Raises

824 ------

825 ImportError

826 The scipy.io module is not available.

827 ValueError

828 Invalid `filepath`.

829 ValueError

830 File format or encoding not supported.

831 """

832 if not data_modules['scipy']:

833 raise ImportError

834 if not filepath:

835 raise ValueError('no file specified!')

836 if format is None:

837 format = 'MAT'

838 if format.upper() not in formats_mat():

839 raise ValueError(f'file format {format} not supported by matlab file format')

840 ext = os.path.splitext(filepath)[1]

841 if len(ext) <= 1 or ext[1].upper() != 'M':

842 filepath += os.extsep + 'mat'

843 if encoding is None:

844 encoding = 'DOUBLE'

845 encoding = encoding.upper()

846 if not encoding in encodings_mat(format):

847 raise ValueError(f'file encoding {format} not supported by matlab file format')

848 buffer = recode_array(data, amax, encoding)

849 ddict = dict(data=buffer, rate=rate)

850 ddict['amax'] = amax

851 if unit:

852 ddict['unit'] = unit

853 if metadata:

854 insert_container_metadata(metadata, ddict, 'metadata')

855 if locs is not None and len(locs) > 0:

856 if locs.ndim == 1:

857 ddict['positions'] = locs

858 else:

859 ddict['positions'] = locs[:,0]

860 if locs.shape[1] > 1:

861 ddict['spans'] = locs[:,1]

862 if labels is not None and len(labels) > 0:

863 if labels.ndim == 1:

864 maxc = np.max([len(l) for l in labels])

865 ddict['labels'] = labels.astype(dtype=f'U{maxc}')

866 else:

867 maxc = np.max([len(l) for l in labels[:,0]])

868 ddict['labels'] = labels[:,0].astype(dtype=f'U{maxc}')

869 if labels.shape[1] > 1:

870 maxc = np.max([len(l) for l in labels[:,1]])

871 ddict['descriptions'] = labels[:,1].astype(dtype=f'U{maxc}')

872 sio.savemat(filepath, ddict)

873 return filepath

874

875

876def formats_audioio():

877 """Data formats supported by audioio.

878

879 Returns

880 -------

881 formats: list of str

882 List of supported file formats as strings.

883 """

884 if not data_modules['audioio']:

885 return []

886 else:

887 return aw.available_formats()

888

889

890def encodings_audio(format):

891 """Encodings of any audio format.

892

893 Parameters

894 ----------

895 format: str

896 The file format.

897

898 Returns

899 -------

900 encodings: list of str

901 List of supported encodings as strings.

902 """

903 if not data_modules['audioio']:

904 return []

905 else:

906 return aw.available_encodings(format)

907

908

909def write_audioio(filepath, data, rate, amax=1.0, unit=None,

910 metadata=None, locs=None, labels=None, format=None,

911 encoding=None, gainkey=default_gain_keys, sep='.'):

912 """Write data into audio file.

913

914 If a gain setting is available in the metadata, then the data are divided

915 by the gain before they are stored in the audio file.

916 After this operation, the data values need to range between -1 and 1,

917 in particular if the data are encoded as integers

918 (i.e. PCM_16, PCM_32 and PCM_64).

919 Note, that this function does not check for this requirement!

920

921 Documentation

922 -------------

923 https://bendalab.github.io/audioio/

924

925 Parameters

926 ----------

927 filepath: str

928 Full path and name of the file to write.

929 data: 1-D or 2-D array of floats

930 Array with the data (first index time, optional second index channel).

931 rate: float

932 Sampling rate of the data in Hertz.

933 amax: float

934 Maximum possible amplitude of the data in `unit`.

935 unit: str

936 Unit of the data. If supplied and a gain is found in the metadata it

937 has to match the unit of the gain. If no gain is found in the metadata

938 and metadata is not None, then a gain of one with this unit is added

939 to the metadata using the first key in `gainkey`.

940 metadata: nested dict

941 Metadata saved into the audio file. If it contains a gain,

942 the gain factor is used to divide the data down into a

943 range between -1 and 1.

944 locs: None or 1-D or 2-D array of ints

945 Marker positions (first column) and spans (optional second column)

946 for each marker (rows).

947 labels: None or 2-D array of string objects

948 Labels (first column) and texts (optional second column)

949 for each marker (rows).

950 format: str or None

951 File format. If None deduce file format from filepath.

952 See `available_formats()` for possible values.

953 encoding: str or None

954 Encoding of the data. See `available_encodings()` for possible values.

955 If None or empty string use 'PCM_16'.

956 gainkey: str or list of str

957 Key in the file's metadata that holds some gain information.

958 If found, the data will be multiplied with the gain,

959 and if available, the corresponding unit is returned.

960 See the [audioio.get_gain()](https://bendalab.github.io/audioio/api/audiometadata.html#audioio.audiometadata.get_gain) function for details.

961 sep: str

962 String that separates section names in `gainkey`.

963

964 Returns

965 -------

966 filepath: str or None

967 On success, the actual file name used for writing the data.

968

969 Raises

970 ------

971 ImportError

972 The audioio module is not available.

973 ValueError

974 Invalid `filepath` or `unit` does not match gain in metadata.

975 """

976 if not data_modules['audioio']:

977 raise ImportError

978 if not filepath:

979 raise ValueError('no file specified!')

980 if amax is None or not np.isfinite(amax):

981 amax, u = am.get_gain(metadata, gainkey, sep, 1.0, 'a.u.')

982 if not unit:

983 unit = u

984 elif unit != 'a.u.' and u != 'a.u.' and unit != u:

985 raise ValueError(f'unit "{unit}" does not match gain unit "{u}" in metadata')

986 if amax != 1.0:

987 data = data / amax

988 if metadata is None:

989 metadata = {}

990 if unit == 'a.u.':

991 unit = ''

992 if not isinstance(gainkey, (list, tuple, np.ndarray)):

993 gainkey = [gainkey,]

994 gainkey.append('Gain')

995 for gk in gainkey:

996 m, k = am.find_key(metadata, gk)

997 if k in m:

998 m[k] = f'{amax:g}{unit}'

999 break

1000 else:

1001 if 'INFO' in metadata:

1002 metadata['INFO'][gainkey[0]] = f'{amax:g}{unit}'

1003 else:

1004 metadata[gainkey[0]] = f'{amax:g}{unit}'

1005 aw.write_audio(filepath, data, rate, metadata, locs, labels)

1006 return filepath

1007

1008

1009data_formats_funcs = (

1010 ('relacs', None, formats_relacs),

1011 ('fishgrid', None, formats_fishgrid),

1012 ('pickle', 'pickle', formats_pickle),

1013 ('numpy', 'numpy', formats_numpy),

1014 ('matlab', 'scipy', formats_mat),

1015 ('audio', 'audioio', formats_audioio)

1016 )

1017"""List of implemented formats functions.

1018

1019Each element of the list is a tuple with the format's name, the

1020module's name in `data_modules` or None, and the formats function.

1021"""

1022

1023

1024def available_formats():

1025 """Data and audio file formats supported by any of the installed modules.

1026

1027 Returns

1028 -------

1029 formats: list of str

1030 List of supported file formats as strings.

1031 """

1032 formats = set()

1033 for fmt, lib, formats_func in data_formats_funcs:

1034 if not lib or data_modules[lib]:

1035 formats |= set(formats_func())

1036 return sorted(list(formats))

1037

1038

1039data_encodings_funcs = (

1040 ('relacs', encodings_relacs),

1041 ('fishgrid', encodings_fishgrid),

1042 ('pickle', encodings_pickle),

1043 ('numpy', encodings_numpy),

1044 ('matlab', encodings_mat),

1045 ('audio', encodings_audio)

1046 )

1047""" List of implemented encodings functions.

1048

1049Each element of the list is a tuple with the module's name and the encodings function.

1050"""

1051

1052

1053def available_encodings(format):

1054 """Encodings of a data file format.

1055

1056 Parameters

1057 ----------

1058 format: str

1059 The file format.

1060

1061 Returns

1062 -------

1063 encodings: list of str

1064 List of supported encodings as strings.

1065 """

1066 for module, encodings_func in data_encodings_funcs:

1067 encs = encodings_func(format)

1068 if len(encs) > 0:

1069 return encs

1070 return []

1071

1072

1073data_writer_funcs = {

1074 'relacs': write_relacs,

1075 'fishgrid': write_fishgrid,

1076 'pickle': write_pickle,

1077 'numpy': write_numpy,

1078 'matlab': write_mat,

1079 'audio': write_audioio

1080 }

1081"""Dictionary of implemented write functions.

1082

1083Keys are the format's name and values the corresponding write

1084function.

1085"""

1086

1087

1088def write_data(filepath, data, rate, amax=1.0, unit=None,

1089 metadata=None, locs=None, labels=None, format=None,

1090 encoding=None, verbose=0, **kwargs):

1091 """Write data into a file.

1092

1093 Parameters

1094 ----------

1095 filepath: str

1096 Full path and name of the file to write.

1097 File format is determined from extension.

1098 data: 1-D or 2-D array of floats

1099 Array with the data (first index time, second index channel).

1100 rate: float

1101 Sampling rate of the data in Hertz.

1102 amax: float

1103 Maximum possible amplitude of the data in `unit`.

1104 unit: str

1105 Unit of the data.

1106 metadata: nested dict

1107 Additional metadata.

1108 locs: None or 1-D or 2-D array of ints

1109 Marker positions (first column) and spans (optional second column)

1110 for each marker (rows).

1111 labels: None or 2-D array of string objects

1112 Labels (first column) and texts (optional second column)

1113 for each marker (rows).

1114 format: str or None

1115 File format. If None deduce file format from filepath.

1116 See `available_formats()` for possible values.

1117 encoding: str or None

1118 Encoding of the data. See `available_encodings()` for possible values.

1119 If None or empty string use 'PCM_16'.

1120 verbose: int

1121 If >0 show detailed error/warning messages.

1122 kwargs: dict

1123 Additional, file format specific keyword arguments.

1124

1125 Returns

1126 -------

1127 filepath: str or None

1128 On success, the actual file name used for writing the data.

1129

1130 Raises

1131 ------

1132 ValueError

1133 `filepath` is empty string or unspecified format.

1134 IOError

1135 Requested file format not supported.

1136

1137 Example

1138 -------

1139 ```

1140 import numpy as np

1141 from thunderlab.datawriter import write_data

1142

1143 rate = 28000.0

1144 freq = 800.0

1145 time = np.arange(0.0, 1.0, 1/rate) # one second

1146 data = 2.5*np.sin(2.0*np.p*freq*time) # 800Hz sine wave

1147 md = dict(Artist='underscore_') # metadata

1148 write_data('audio/file.npz', data, rate, 'mV', md)

1149 ```

1150 """

1151 if not filepath:

1152 raise ValueError('no file specified!')

1153 if not format:

1154 format = format_from_extension(filepath)

1155 if not format:

1156 raise ValueError('unspecified file format')

1157 for fmt, lib, formats_func in data_formats_funcs:

1158 if lib and not data_modules[lib]:

1159 continue

1160 if format.upper() in formats_func():

1161 writer_func = data_writer_funcs[fmt]

1162 filepath = writer_func(filepath, data, rate, amax,

1163 unit, metadata, locs, labels,

1164 format=format, encoding=encoding,

1165 **kwargs)

1166 if verbose > 0:

1167 print(f'wrote data to file "{filepath}" using {fmt} format')

1168 if verbose > 1:

1169 print(f' sampling rate: {rate:g}Hz')

1170 print(f' channels : {data.shape[1] if len(data.shape) > 1 else 1}')

1171 print(f' frames : {len(data)}')

1172 print(f' range : {amax:g}{unit}')

1173 return filepath

1174 raise IOError(f'file format "{format.upper()}" not supported.')

1175

1176

1177def demo(file_path, channels=2, format=None):

1178 """Demo of the datawriter functions.

1179

1180 Parameters

1181 ----------

1182 file_path: str

1183 File path of a data file.

1184 format: str or None

1185 File format to be used.

1186 """

1187 print('generate data ...')

1188 rate = 44100.0

1189 t = np.arange(0.0, 1.0, 1.0/rate)

1190 data = np.zeros((len(t), channels))

1191 for c in range(channels):

1192 data[:,c] = 0.1*(channels-c)*np.sin(2.0*np.pi*(440.0+c*8.0)*t)

1193

1194 print(f"write_data('{file_path}') ...")

1195 write_data(file_path, data, rate, 1.0, 'mV', format=format, verbose=2)

1196

1197 print('done.')

1198

1199

1200def main(*cargs):

1201 """Call demo with command line arguments.

1202

1203 Parameters

1204 ----------

1205 cargs: list of str

1206 Command line arguments as provided by sys.argv[1:]

1207 """

1208 import argparse

1209 parser = argparse.ArgumentParser(description=

1210 'Checking thunderlab.datawriter module.')

1211 parser.add_argument('-c', dest='channels', default=2, type=int,

1212 help='number of channels to be written')

1213 parser.add_argument('-f', dest='format', default=None, type=str,

1214 help='file format')

1215 parser.add_argument('file', nargs=1, default='test.npz', type=str,

1216 help='name of data file')

1217 args = parser.parse_args(cargs)

1218 demo(args.file[0], args.channels, args.format)

1219

1220

1221if __name__ == "__main__":

1222 main(*sys.argv[1:])

1223

1224

1225