Coverage for src/thunderlab/datawriter.py: 87%
495 statements
« prev ^ index » next coverage.py v7.6.2, created at 2024-10-09 16:02 +0000
« prev ^ index » next coverage.py v7.6.2, created at 2024-10-09 16:02 +0000
1"""Writing numpy arrays of floats to data files.
3- `write_data()`: write data into a file.
4- `available_formats()`: supported data and audio file formats.
5- `available_encodings()`: encodings of a data file format.
6- `format_from_extension()`: deduce data file format from file extension.
7- `recode_array()`: recode array of floats.
8"""
10import os
11import sys
12import datetime as dt
13from copy import deepcopy
14from audioio import find_key, add_metadata, move_metadata
15from audioio import get_datetime, default_gain_keys
17data_modules = {}
18"""Dictionary with availability of various modules needed for writing data.
19Keys are the module names, values are booleans.
20"""
22try:
23 import pickle
24 data_modules['pickle'] = True
25except ImportError:
26 data_modules['pickle'] = False
28try:
29 import numpy as np
30 data_modules['numpy'] = True
31except ImportError:
32 data_modules['numpy'] = False
34try:
35 import scipy.io as sio
36 data_modules['scipy'] = True
37except ImportError:
38 data_modules['scipy'] = False
40try:
41 import audioio.audiowriter as aw
42 import audioio.audiometadata as am
43 from audioio import write_metadata_text, flatten_metadata
44 data_modules['audioio'] = True
45except ImportError:
46 data_modules['audioio'] = False
49def format_from_extension(filepath):
50 """Deduce data file format from file extension.
52 Parameters
53 ----------
54 filepath: str
55 Name of the data file.
57 Returns
58 -------
59 format: str
60 Data format deduced from file extension.
61 """
62 if not filepath:
63 return None
64 ext = os.path.splitext(filepath)[1]
65 if not ext:
66 return None
67 if ext[0] == '.':
68 ext = ext[1:]
69 if not ext:
70 return None
71 ext = ext.upper()
72 if data_modules['audioio']:
73 ext = aw.format_from_extension(filepath)
74 return ext
77def recode_array(data, amax, encoding):
78 """Recode array of floats.
80 Parameters
81 ----------
82 data: array of floats
83 Data array with values ranging between -1 and 1
84 amax: float
85 Maximum amplitude of data range.
86 encoding: str
87 Encoding, one of PCM_16, PCM_32, PCM_64, FLOAT or DOUBLE.
89 Returns
90 -------
91 buffer: array
92 The data recoded according to `encoding`.
93 """
95 encodings = {'PCM_16': (2, 'i2'),
96 'PCM_32': (4, 'i4'),
97 'PCM_64': (8, 'i8'),
98 'FLOAT': (4, 'f'),
99 'DOUBLE': (8, 'd')}
101 if not encoding in encodings:
102 return data
103 dtype = encodings[encoding][1]
104 if dtype[0] == 'i':
105 sampwidth = encodings[encoding][0]
106 factor = 2**(sampwidth*8-1)
107 buffer = np.round(data/amax*factor).astype(dtype)
108 buffer[data >= +amax] = factor - 1
109 buffer[data <= -amax] = -(factor - 1)
110 else:
111 buffer = data.astype(dtype, copy=False)
112 return buffer
115def formats_relacs():
116 """Data format of the relacs file format.
118 Returns
119 -------
120 formats: list of str
121 List of supported file formats as strings.
122 """
123 return ['RELACS']
126def encodings_relacs(format=None):
127 """Encodings of the relacs file format.
129 Parameters
130 ----------
131 format: str
132 The file format.
134 Returns
135 -------
136 encodings: list of str
137 List of supported encodings as strings.
138 """
139 if not format:
140 format = 'RELACS'
141 if format.upper() != 'RELACS':
142 return []
143 else:
144 return ['FLOAT']
147def write_relacs(filepath, data, rate, amax=1.0, unit=None,
148 metadata=None, locs=None, labels=None, format=None,
149 encoding=None):
150 """Write data as relacs raw files.
152 Parameters
153 ----------
154 filepath: str
155 Full path of folder where to write relacs files.
156 data: 1-D or 2-D array of floats
157 Array with the data (first index time, optional second index channel).
158 rate: float
159 Sampling rate of the data in Hertz.
160 amax: float
161 Maximum possible amplitude of the data in `unit`.
162 unit: str
163 Unit of the data.
164 metadata: nested dict
165 Additional metadata saved into `info.dat`.
166 locs: None or 1-D or 2-D array of ints
167 Marker positions (first column) and spans (optional second column)
168 for each marker (rows).
169 labels: None or 2-D array of string objects
170 Labels (first column) and texts (optional second column)
171 for each marker (rows).
172 format: str or None
173 File format, only None or 'RELACS' are supported.
174 encoding: str or None
175 Encoding of the data. Only None or 'FLOAT' are supported.
177 Returns
178 -------
179 filepath: str or None
180 On success, the actual file name used for writing the data.
182 Raises
183 ------
184 ValueError
185 Invalid `filepath`.
186 ValueError
187 File format or encoding not supported.
188 """
189 if not filepath:
190 raise ValueError('no file specified!')
191 if format is None:
192 format = 'RELACS'
193 if format.upper() != 'RELACS':
194 raise ValueError(f'file format {format} not supported by relacs file format')
195 if encoding is None:
196 encoding = 'FLOAT'
197 if encoding.upper() != 'FLOAT':
198 raise ValueError(f'file encoding {format} not supported by relacs file format')
199 if not os.path.exists(filepath):
200 os.mkdir(filepath)
201 # write data:
202 if data.ndim == 1:
203 with open(os.path.join(filepath, f'trace-1.raw'), 'wb') as df:
204 df.write(data.astype(np.float32).tobytes())
205 else:
206 for c in range(data.shape[1]):
207 with open(os.path.join(filepath, f'trace-{c+1}.raw'), 'wb') as df:
208 df.write(data[:,c].astype(np.float32).tobytes())
209 if unit is None:
210 unit = 'V'
211 # write data format:
212 filename = os.path.join(filepath, 'stimuli.dat')
213 df = open(filename, 'w')
214 df.write('# analog input traces:\n')
215 for c in range(data.shape[1] if data.ndim > 1 else 1):
216 df.write(f'# identifier{c+1} : V-{c+1}\n')
217 df.write(f'# data file{c+1} : trace-{ c+1} .raw\n')
218 df.write(f'# sample interval{c+1} : {1000.0/rate:.4f}ms\n')
219 df.write(f'# sampling rate{c+1} : {rate:.2f}Hz\n')
220 df.write(f'# unit{c+1} : {unit}\n')
221 df.write('# event lists:\n')
222 df.write('# event file1: stimulus-events.dat\n')
223 df.write('# event file2: restart-events.dat\n')
224 df.write('# event file3: recording-events.dat\n')
225 df.close()
226 # write empty event files:
227 for events in ['Recording', 'Restart', 'Stimulus']:
228 df = open(os.path.join(filepath, f'{events.lower()}-events.dat'), 'w')
229 df.write(f'# events: {events}\n\n')
230 df.write('#Key\n')
231 if events == 'Stimulus':
232 df.write('# t duration\n')
233 df.write('# sec s\n')
234 df.write('# 1 2\n')
235 else:
236 df.write('# t\n')
237 df.write('# sec\n')
238 df.write('# 1\n')
239 if events == 'Recording':
240 df.write(' 0.0\n')
241 df.close()
242 # write metadata:
243 if metadata:
244 write_metadata_text(os.path.join(filepath, 'info.dat'),
245 metadata, prefix='# ')
246 return filename
249def formats_fishgrid():
250 """Data format of the fishgrid file format.
252 Returns
253 -------
254 formats: list of str
255 List of supported file formats as strings.
256 """
257 return ['FISHGRID']
260def encodings_fishgrid(format=None):
261 """Encodings of the fishgrid file format.
263 Parameters
264 ----------
265 format: str
266 The file format.
268 Returns
269 -------
270 encodings: list of str
271 List of supported encodings as strings.
272 """
273 if not format:
274 format = 'FISHGRID'
275 if format.upper() != 'FISHGRID':
276 return []
277 else:
278 return ['FLOAT']
281def write_fishgrid(filepath, data, rate, amax=1.0, unit=None,
282 metadata=None, locs=None, labels=None, format=None,
283 encoding=None):
284 """Write data as fishgrid raw files.
286 Parameters
287 ----------
288 filepath: str
289 Full path of the folder where to write fishgrid files.
290 data: 1-D or 2-D array of floats
291 Array with the data (first index time, optional second index channel).
292 rate: float
293 Sampling rate of the data in Hertz.
294 amax: float
295 Maximum possible amplitude of the data in `unit`.
296 unit: str
297 Unit of the data.
298 metadata: nested dict
299 Additional metadata saved into the `fishgrid.cfg`.
300 locs: None or 1-D or 2-D array of ints
301 Marker positions (first column) and spans (optional second column)
302 for each marker (rows).
303 labels: None or 2-D array of string objects
304 Labels (first column) and texts (optional second column)
305 for each marker (rows).
306 format: str or None
307 File format, only None or 'FISHGRID' are supported.
308 encoding: str or None
309 Encoding of the data. Only None or 'FLOAT' are supported.
311 Returns
312 -------
313 filepath: str or None
314 On success, the actual file name used for writing the data.
316 Raises
317 ------
318 ValueError
319 Invalid `filepath`.
320 ValueError
321 File format or encoding not supported.
322 """
323 def write_timestamp(df, count, index, span, rate, starttime,
324 label, comment):
325 datetime = starttime + dt.timedelta(seconds=index/rate)
326 df.write(f' Num: {count}\n')
327 df.write(f' Index1: {index}\n')
328 #df.write(f' Index2: 0\n')
329 #df.write(f' Index3: 0\n')
330 #df.write(f' Index4: 0\n')
331 if span > 0:
332 df.write(f' Span1: {span}\n')
333 df.write(f' Date: {datetime.date().isoformat()}\n')
334 df.write(f' Time: {datetime.time().isoformat("seconds")}\n')
335 if label:
336 df.write(f' Label: {label}\n')
337 df.write(f'Comment: {comment}\n')
338 df.write('\n')
340 if not filepath:
341 raise ValueError('no file specified!')
342 if format is None:
343 format = 'FISHGRID'
344 if format.upper() != 'FISHGRID':
345 raise ValueError(f'file format {format} not supported by fishgrid file format')
346 if encoding is None:
347 encoding = 'FLOAT'
348 if encoding.upper() != 'FLOAT':
349 raise ValueError(f'file encoding {format} not supported by fishgrid file format')
350 if not os.path.exists(filepath):
351 os.mkdir(filepath)
352 # write data:
353 with open(os.path.join(filepath, 'traces-grid1.raw'), 'wb') as df:
354 df.write(data.astype(np.float32).tobytes())
355 # write metadata:
356 if unit is None:
357 unit = 'mV'
358 cfgfilename = os.path.join(filepath, 'fishgrid.cfg')
359 nchannels = data.shape[1] if data.ndim > 1 else 1
360 ncols = int(np.ceil(np.sqrt(nchannels)))
361 nrows = int(np.ceil(nchannels/ncols))
362 if 'FishGrid' in metadata:
363 md = {}
364 rmd = {}
365 for k in metadata:
366 if isinstance(metadata[k], dict):
367 md[k] = deepcopy(metadata[k])
368 else:
369 rmd[k] = metadata[k]
370 if len(rmd) > 0:
371 m, k = find_key(md, 'FishGrid.Recording')
372 if k in m:
373 m[k].update(rmd)
374 else:
375 m[k] = rmd
376 else:
377 smd = deepcopy(metadata)
378 gm = dict(Used1='true', Columns1=f'{ncols}', Rows1=f'{nrows}')
379 hm = {'DAQ board': dict()}
380 if not move_metadata(smd, hm, 'Amplifier'):
381 am = {}
382 move_metadata(smd, am, ['Amplifier.Name', 'AmplName'], 'AmplName')
383 move_metadata(smd, am, ['Amplifier.Model', 'AmplModel'], 'AmplModel')
384 move_metadata(smd, am, 'Amplifier.Type')
385 move_metadata(smd, am, 'Gain')
386 move_metadata(smd, am, 'HighpassCutoff')
387 move_metadata(smd, am, 'LowpassCutoff')
388 if len(am) > 0:
389 hm['Amplifier'] = am
390 md = dict(FishGrid={'Grid 1': gm, 'Hardware Settings': hm})
391 move_metadata(smd, md['FishGrid'], 'Recording')
392 gm = {}
393 starttime = get_datetime(smd, remove=True)
394 if not starttime is None:
395 gm['StartDate'] = starttime.date().isoformat()
396 gm['StartTime'] = starttime.time().isoformat(timespec=seconds)
397 move_metadata(smd, gm, 'Location')
398 move_metadata(smd, gm, 'Position')
399 move_metadata(smd, gm, 'WaterTemperature')
400 move_metadata(smd, gm, 'WaterConductivity')
401 move_metadata(smd, gm, 'WaterpH')
402 move_metadata(smd, gm, 'WaterOxygen')
403 move_metadata(smd, gm, 'Temperature')
404 move_metadata(smd, gm, 'Humidity')
405 move_metadata(smd, gm, 'Pressure')
406 move_metadata(smd, gm, 'Comment')
407 move_metadata(smd, gm, 'Experimenter')
408 if len(gm) > 0:
409 if not 'Recording' in md['FishGrid']:
410 md['FishGrid']['Recording'] = {}
411 md['FishGrid']['Recording'].update({'General': gm})
412 bm = {}
413 move_metadata(smd, bm, 'DataTime')
414 move_metadata(smd, bm, 'DataInterval')
415 move_metadata(smd, bm, 'BufferTime')
416 move_metadata(smd, bm, 'BufferInterval')
417 if len(bm) > 0:
418 if not 'Recording' in md['FishGrid']:
419 md['FishGrid']['Recording'] = {}
420 md['FishGrid']['Recording'].update({'Buffers and timing': bm})
421 if smd:
422 md['FishGrid']['Other'] = smd
423 add_metadata(md,
424 [f'FishGrid.Hardware Settings.DAQ board.AISampleRate={0.001*rate:.3f}kHz',
425 f'FishGrid.Hardware Settings.DAQ board.AIMaxVolt={amax:g}{unit}'])
426 with open(cfgfilename, 'w') as df:
427 for k in md:
428 df.write(f'*{k}\n')
429 write_metadata_text(df, md[k], prefix=' ')
430 # write markers:
431 filename = os.path.join(filepath, 'timestamps.dat')
432 starttime = get_datetime(metadata, (('DateTimeOriginal',),
433 ('OriginationDate', 'OriginationTime'),
434 ('StartDate', 'StartTime'),
435 ('Location_Time',)),
436 default=dt.datetime.fromtimestamp(0, dt.timezone.utc))
437 with open(filename, 'w') as df:
438 count = 0
439 write_timestamp(df, count, 0, 0, rate, starttime,
440 '', 'begin of recording')
441 count += 1
442 if locs is not None:
443 for i in range(len(locs)):
444 label = ''
445 comment = ''
446 if labels is not None and len(labels) > i:
447 label = labels[i,0] if labels.ndim > 1 else labels[i]
448 comment = labels[i,1] if labels.ndim > 1 else ''
449 index = locs[i,0] if locs.ndim > 1 else locs[i]
450 span = locs[i,1] if locs.ndim > 1 else 0
451 write_timestamp(df, count, index*nchannels,
452 span*nchannels, rate,
453 starttime, label, comment)
454 count += 1
455 write_timestamp(df, count, len(data)*nchannels, 0, rate,
456 starttime, '', 'end of recording')
457 return cfgfilename
460def formats_pickle():
461 """Data formats supported by pickle.dump().
463 Returns
464 -------
465 formats: list of str
466 List of supported file formats as strings.
467 """
468 if not data_modules['pickle']:
469 return []
470 else:
471 return ['PKL']
474def encodings_pickle(format=None):
475 """Encodings of the pickle format.
477 Parameters
478 ----------
479 format: str
480 The file format.
482 Returns
483 -------
484 encodings: list of str
485 List of supported encodings as strings.
486 """
487 if not format:
488 format = 'PKL'
489 if format.upper() != 'PKL':
490 return []
491 else:
492 return ['PCM_16', 'PCM_32', 'FLOAT', 'DOUBLE']
495def write_pickle(filepath, data, rate, amax=1.0, unit=None,
496 metadata=None, locs=None, labels=None, format=None,
497 encoding=None):
498 """Write data into python pickle file.
500 Documentation
501 -------------
502 https://docs.python.org/3/library/pickle.html
504 Parameters
505 ----------
506 filepath: str
507 Full path and name of the file to write.
508 data: 1-D or 2-D array of floats
509 Array with the data (first index time, optional second index channel).
510 Stored under the key "data".
511 rate: float
512 Sampling rate of the data in Hertz.
513 Stored under the key "rate".
514 amax: float
515 Maximum possible amplitude of the data in `unit`.
516 Stored under the key "amax".
517 unit: str
518 Unit of the data.
519 Stored under the key "unit".
520 metadata: nested dict
521 Additional metadata saved into the pickle.
522 Stored under the key "metadata".
523 locs: None or 1-D or 2-D array of ints
524 Marker positions (first column) and spans (optional second column)
525 for each marker (rows).
526 labels: None or 2-D array of string objects
527 Labels (first column) and texts (optional second column)
528 for each marker (rows).
529 format: str or None
530 File format, only None or 'PKL' are supported.
531 encoding: str or None
532 Encoding of the data.
534 Returns
535 -------
536 filepath: str or None
537 On success, the actual file name used for writing the data.
539 Raises
540 ------
541 ImportError
542 The pickle module is not available.
543 ValueError
544 Invalid `filepath`.
545 ValueError
546 File format or encoding not supported.
547 """
548 if not data_modules['pickle']:
549 raise ImportError
550 if not filepath:
551 raise ValueError('no file specified!')
552 if format is None:
553 format = 'PKL'
554 if format.upper() != 'PKL':
555 raise ValueError(f'file format {format} not supported by pickle file format')
556 ext = os.path.splitext(filepath)[1]
557 if len(ext) <= 1 or ext[1].upper() != 'P':
558 filepath += os.extsep + 'pkl'
559 if encoding is None:
560 encoding = 'DOUBLE'
561 encoding = encoding.upper()
562 if not encoding in encodings_pickle(format):
563 raise ValueError(f'file encoding {format} not supported by pickle file format')
564 buffer = recode_array(data, amax, encoding)
565 ddict = dict(data=buffer, rate=rate)
566 ddict['amax'] = amax
567 if unit:
568 ddict['unit'] = unit
569 if metadata:
570 ddict['metadata'] = metadata
571 if locs is not None and len(locs) > 0:
572 if locs.ndim == 1:
573 ddict['positions'] = locs
574 else:
575 ddict['positions'] = locs[:,0]
576 if locs.shape[1] > 1:
577 ddict['spans'] = locs[:,1]
578 if labels is not None and len(labels) > 0:
579 if labels.ndim == 1:
580 ddict['labels'] = labels
581 else:
582 ddict['labels'] = labels[:,0]
583 if labels.shape[1] > 1:
584 ddict['descriptions'] = labels[:,1]
585 with open(filepath, 'wb') as df:
586 pickle.dump(ddict, df)
587 return filepath
590def formats_numpy():
591 """Data formats supported by numpy.savez().
593 Returns
594 -------
595 formats: list of str
596 List of supported file formats as strings.
597 """
598 if not data_modules['numpy']:
599 return []
600 else:
601 return ['NPZ']
604def encodings_numpy(format=None):
605 """Encodings of the numpy file format.
607 Parameters
608 ----------
609 format: str
610 The file format.
612 Returns
613 -------
614 encodings: list of str
615 List of supported encodings as strings.
616 """
617 if not format:
618 format = 'NPZ'
619 if format.upper() != 'NPZ':
620 return []
621 else:
622 return ['PCM_16', 'PCM_32', 'FLOAT', 'DOUBLE']
625def write_numpy(filepath, data, rate, amax=1.0, unit=None,
626 metadata=None, locs=None, labels=None, format=None,
627 encoding=None):
628 """Write data into numpy npz file.
630 Documentation
631 -------------
632 https://numpy.org/doc/stable/reference/generated/numpy.savez.html
634 Parameters
635 ----------
636 filepath: str
637 Full path and name of the file to write.
638 data: 1-D or 2-D array of floats
639 Array with the data (first index time, optional second index channel).
640 Stored under the key "data".
641 rate: float
642 Sampling rate of the data in Hertz.
643 Stored under the key "rate".
644 amax: float
645 Maximum possible amplitude of the data in `unit`.
646 Stored under the key "amax".
647 unit: str
648 Unit of the data.
649 Stored under the key "unit".
650 metadata: nested dict
651 Additional metadata saved into the numpy file.
652 Flattened dictionary entries stored under keys
653 starting with "metadata__".
654 locs: None or 1-D or 2-D array of ints
655 Marker positions (first column) and spans (optional second column)
656 for each marker (rows).
657 labels: None or 2-D array of string objects
658 Labels (first column) and texts (optional second column)
659 for each marker (rows).
660 format: str or None
661 File format, only None or 'NPZ' are supported.
662 encoding: str or None
663 Encoding of the data.
665 Returns
666 -------
667 filepath: str or None
668 On success, the actual file name used for writing the data.
670 Raises
671 ------
672 ImportError
673 The numpy module is not available.
674 ValueError
675 Invalid `filepath`.
676 ValueError
677 File format or encoding not supported.
678 """
679 if not data_modules['numpy']:
680 raise ImportError
681 if not filepath:
682 raise ValueError('no file specified!')
683 if format is None:
684 format = 'NPZ'
685 if format.upper() not in formats_numpy():
686 raise ValueError(f'file format {format} not supported by numpy file format')
687 ext = os.path.splitext(filepath)[1]
688 if len(ext) <= 1 or ext[1].upper() != 'N':
689 filepath += os.extsep + 'npz'
690 if encoding is None:
691 encoding = 'DOUBLE'
692 encoding = encoding.upper()
693 if not encoding in encodings_numpy(format):
694 raise ValueError(f'file encoding {format} not supported by numpy file format')
695 buffer = recode_array(data, amax, encoding)
696 ddict = dict(data=buffer, rate=rate)
697 ddict['amax'] = amax
698 if unit:
699 ddict['unit'] = unit
700 if metadata:
701 fmeta = flatten_metadata(metadata, True, sep='__')
702 for k in list(fmeta):
703 fmeta['metadata__'+k] = fmeta.pop(k)
704 ddict.update(fmeta)
705 if locs is not None and len(locs) > 0:
706 if locs.ndim == 1:
707 ddict['positions'] = locs
708 else:
709 ddict['positions'] = locs[:,0]
710 if locs.shape[1] > 1:
711 ddict['spans'] = locs[:,1]
712 if labels is not None and len(labels) > 0:
713 if labels.ndim == 1:
714 maxc = np.max([len(l) for l in labels])
715 ddict['labels'] = labels.astype(dtype=f'U{maxc}')
716 else:
717 maxc = np.max([len(l) for l in labels[:,0]])
718 ddict['labels'] = labels[:,0].astype(dtype=f'U{maxc}')
719 if labels.shape[1] > 1:
720 maxc = np.max([len(l) for l in labels[:,1]])
721 ddict['descriptions'] = labels[:,1].astype(dtype=f'U{maxc}')
722 np.savez(filepath, **ddict)
723 return filepath
726def formats_mat():
727 """Data formats supported by scipy.io.savemat().
729 Returns
730 -------
731 formats: list of str
732 List of supported file formats as strings.
733 """
734 if not data_modules['scipy']:
735 return []
736 else:
737 return ['MAT']
740def encodings_mat(format=None):
741 """Encodings of the matlab format.
743 Parameters
744 ----------
745 format: str
746 The file format.
748 Returns
749 -------
750 encodings: list of str
751 List of supported encodings as strings.
752 """
753 if not format:
754 format = 'MAT'
755 if format.upper() != 'MAT':
756 return []
757 else:
758 return ['PCM_16', 'PCM_32', 'FLOAT', 'DOUBLE']
761def write_mat(filepath, data, rate, amax=1.0, unit=None,
762 metadata=None, locs=None, labels=None, format=None,
763 encoding=None):
764 """Write data into matlab file.
766 Documentation
767 -------------
768 https://docs.scipy.org/doc/scipy/reference/generated/scipy.io.savemat.html
770 Parameters
771 ----------
772 filepath: str
773 Full path and name of the file to write.
774 Stored under the key "data".
775 data: 1-D or 2-D array of floats
776 Array with the data (first index time, optional second index channel).
777 Stored under the key "data".
778 rate: float
779 Sampling rate of the data in Hertz.
780 Stored under the key "rate".
781 amax: float
782 Maximum possible amplitude of the data in `unit`.
783 Stored under the key "amax".
784 unit: str
785 Unit of the data.
786 Stored under the key "unit".
787 metadata: nested dict
788 Additional metadata saved into the mat file.
789 Stored under the key "metadata".
790 locs: None or 1-D or 2-D array of ints
791 Marker positions (first column) and spans (optional second column)
792 for each marker (rows).
793 labels: None or 2-D array of string objects
794 Labels (first column) and texts (optional second column)
795 for each marker (rows).
796 format: str or None
797 File format, only None or 'MAT' are supported.
798 encoding: str or None
799 Encoding of the data.
801 Returns
802 -------
803 filepath: str or None
804 On success, the actual file name used for writing the data.
806 Raises
807 ------
808 ImportError
809 The scipy.io module is not available.
810 ValueError
811 Invalid `filepath`.
812 ValueError
813 File format or encoding not supported.
814 """
815 if not data_modules['scipy']:
816 raise ImportError
817 if not filepath:
818 raise ValueError('no file specified!')
819 if format is None:
820 format = 'MAT'
821 if format.upper() not in formats_mat():
822 raise ValueError(f'file format {format} not supported by matlab file format')
823 ext = os.path.splitext(filepath)[1]
824 if len(ext) <= 1 or ext[1].upper() != 'M':
825 filepath += os.extsep + 'mat'
826 if encoding is None:
827 encoding = 'DOUBLE'
828 encoding = encoding.upper()
829 if not encoding in encodings_mat(format):
830 raise ValueError(f'file encoding {format} not supported by matlab file format')
831 buffer = recode_array(data, amax, encoding)
832 ddict = dict(data=buffer, rate=rate)
833 ddict['amax'] = amax
834 if unit:
835 ddict['unit'] = unit
836 if metadata:
837 fmeta = flatten_metadata(metadata, True, sep='__')
838 for k in list(fmeta):
839 fmeta['metadata__'+k] = fmeta.pop(k)
840 ddict.update(fmeta)
841 if locs is not None and len(locs) > 0:
842 if locs.ndim == 1:
843 ddict['positions'] = locs
844 else:
845 ddict['positions'] = locs[:,0]
846 if locs.shape[1] > 1:
847 ddict['spans'] = locs[:,1]
848 if labels is not None and len(labels) > 0:
849 if labels.ndim == 1:
850 maxc = np.max([len(l) for l in labels])
851 ddict['labels'] = labels.astype(dtype=f'U{maxc}')
852 else:
853 maxc = np.max([len(l) for l in labels[:,0]])
854 ddict['labels'] = labels[:,0].astype(dtype=f'U{maxc}')
855 if labels.shape[1] > 1:
856 maxc = np.max([len(l) for l in labels[:,1]])
857 ddict['descriptions'] = labels[:,1].astype(dtype=f'U{maxc}')
858 sio.savemat(filepath, ddict)
859 return filepath
862def formats_audioio():
863 """Data formats supported by audioio.
865 Returns
866 -------
867 formats: list of str
868 List of supported file formats as strings.
869 """
870 if not data_modules['audioio']:
871 return []
872 else:
873 return aw.available_formats()
876def encodings_audio(format):
877 """Encodings of any audio format.
879 Parameters
880 ----------
881 format: str
882 The file format.
884 Returns
885 -------
886 encodings: list of str
887 List of supported encodings as strings.
888 """
889 if not data_modules['audioio']:
890 return []
891 else:
892 return aw.available_encodings(format)
895def write_audioio(filepath, data, rate, amax=1.0, unit=None,
896 metadata=None, locs=None, labels=None, format=None,
897 encoding=None, gainkey=default_gain_keys, sep='.'):
898 """Write data into audio file.
900 If a gain setting is available in the metadata, then the data are divided
901 by the gain before they are stored in the audio file.
902 After this operation, the data values need to range between -1 and 1,
903 in particular if the data are encoded as integers
904 (i.e. PCM_16, PCM_32 and PCM_64).
905 Note, that this function does not check for this requirement!
907 Documentation
908 -------------
909 https://bendalab.github.io/audioio/
911 Parameters
912 ----------
913 filepath: str
914 Full path and name of the file to write.
915 data: 1-D or 2-D array of floats
916 Array with the data (first index time, optional second index channel).
917 rate: float
918 Sampling rate of the data in Hertz.
919 amax: float
920 Maximum possible amplitude of the data in `unit`.
921 unit: str
922 Unit of the data. If supplied and a gain is found in the metadata it
923 has to match the unit of the gain. If no gain is found in the metadata
924 and metadata is not None, then a gain of one with this unit is added
925 to the metadata using the first key in `gainkey`.
926 metadata: nested dict
927 Metadata saved into the audio file. If it contains a gain,
928 the gain factor is used to divide the data down into a
929 range between -1 and 1.
930 locs: None or 1-D or 2-D array of ints
931 Marker positions (first column) and spans (optional second column)
932 for each marker (rows).
933 labels: None or 2-D array of string objects
934 Labels (first column) and texts (optional second column)
935 for each marker (rows).
936 format: str or None
937 File format. If None deduce file format from filepath.
938 See `available_formats()` for possible values.
939 encoding: str or None
940 Encoding of the data. See `available_encodings()` for possible values.
941 If None or empty string use 'PCM_16'.
942 gainkey: str or list of str
943 Key in the file's metadata that holds some gain information.
944 If found, the data will be multiplied with the gain,
945 and if available, the corresponding unit is returned.
946 See the [audioio.get_gain()](https://bendalab.github.io/audioio/api/audiometadata.html#audioio.audiometadata.get_gain) function for details.
947 sep: str
948 String that separates section names in `gainkey`.
950 Returns
951 -------
952 filepath: str or None
953 On success, the actual file name used for writing the data.
955 Raises
956 ------
957 ImportError
958 The audioio module is not available.
959 ValueError
960 Invalid `filepath` or `unit` does not match gain in metadata.
961 """
962 if not data_modules['audioio']:
963 raise ImportError
964 if not filepath:
965 raise ValueError('no file specified!')
966 if amax is None or not np.isfinite(amax):
967 amax, u = am.get_gain(metadata, gainkey, sep, 1.0, 'a.u.')
968 if not unit:
969 unit = u
970 elif unit != 'a.u.' and u != 'a.u.' and unit != u:
971 raise ValueError(f'unit "{unit}" does not match gain unit "{u}" in metadata')
972 if amax != 1.0:
973 data = data / amax
974 if metadata is None:
975 metadata = {}
976 if unit == 'a.u.':
977 unit = ''
978 if not isinstance(gainkey, (list, tuple, np.ndarray)):
979 gainkey = [gainkey,]
980 gainkey.append('Gain')
981 for gk in gainkey:
982 m, k = am.find_key(metadata, gk)
983 if k in m:
984 m[k] = f'{amax:g}{unit}'
985 break
986 else:
987 if 'INFO' in metadata:
988 metadata['INFO'][gainkey[0]] = f'{amax:g}{unit}'
989 else:
990 metadata[gainkey[0]] = f'{amax:g}{unit}'
991 aw.write_audio(filepath, data, rate, metadata, locs, labels)
992 return filepath
995data_formats_funcs = (
996 ('relacs', None, formats_relacs),
997 ('fishgrid', None, formats_fishgrid),
998 ('pickle', 'pickle', formats_pickle),
999 ('numpy', 'numpy', formats_numpy),
1000 ('matlab', 'scipy', formats_mat),
1001 ('audio', 'audioio', formats_audioio)
1002 )
1003"""List of implemented formats functions.
1005Each element of the list is a tuple with the format's name, the
1006module's name in `data_modules` or None, and the formats function.
1007"""
1010def available_formats():
1011 """Data and audio file formats supported by any of the installed modules.
1013 Returns
1014 -------
1015 formats: list of str
1016 List of supported file formats as strings.
1017 """
1018 formats = set()
1019 for fmt, lib, formats_func in data_formats_funcs:
1020 if not lib or data_modules[lib]:
1021 formats |= set(formats_func())
1022 return sorted(list(formats))
1025data_encodings_funcs = (
1026 ('relacs', encodings_relacs),
1027 ('fishgrid', encodings_fishgrid),
1028 ('pickle', encodings_pickle),
1029 ('numpy', encodings_numpy),
1030 ('matlab', encodings_mat),
1031 ('audio', encodings_audio)
1032 )
1033""" List of implemented encodings functions.
1035Each element of the list is a tuple with the module's name and the encodings function.
1036"""
1039def available_encodings(format):
1040 """Encodings of a data file format.
1042 Parameters
1043 ----------
1044 format: str
1045 The file format.
1047 Returns
1048 -------
1049 encodings: list of str
1050 List of supported encodings as strings.
1051 """
1052 for module, encodings_func in data_encodings_funcs:
1053 encs = encodings_func(format)
1054 if len(encs) > 0:
1055 return encs
1056 return []
1059data_writer_funcs = {
1060 'relacs': write_relacs,
1061 'fishgrid': write_fishgrid,
1062 'pickle': write_pickle,
1063 'numpy': write_numpy,
1064 'matlab': write_mat,
1065 'audio': write_audioio
1066 }
1067"""Dictionary of implemented write functions.
1069Keys are the format's name and values the corresponding write
1070function.
1071"""
1074def write_data(filepath, data, rate, amax=1.0, unit=None,
1075 metadata=None, locs=None, labels=None, format=None,
1076 encoding=None, verbose=0, **kwargs):
1077 """Write data into a file.
1079 Parameters
1080 ----------
1081 filepath: str
1082 Full path and name of the file to write.
1083 File format is determined from extension.
1084 data: 1-D or 2-D array of floats
1085 Array with the data (first index time, second index channel).
1086 rate: float
1087 Sampling rate of the data in Hertz.
1088 amax: float
1089 Maximum possible amplitude of the data in `unit`.
1090 unit: str
1091 Unit of the data.
1092 metadata: nested dict
1093 Additional metadata.
1094 locs: None or 1-D or 2-D array of ints
1095 Marker positions (first column) and spans (optional second column)
1096 for each marker (rows).
1097 labels: None or 2-D array of string objects
1098 Labels (first column) and texts (optional second column)
1099 for each marker (rows).
1100 format: str or None
1101 File format. If None deduce file format from filepath.
1102 See `available_formats()` for possible values.
1103 encoding: str or None
1104 Encoding of the data. See `available_encodings()` for possible values.
1105 If None or empty string use 'PCM_16'.
1106 verbose: int
1107 If >0 show detailed error/warning messages.
1108 kwargs: dict
1109 Additional, file format specific keyword arguments.
1111 Returns
1112 -------
1113 filepath: str or None
1114 On success, the actual file name used for writing the data.
1116 Raises
1117 ------
1118 ValueError
1119 `filepath` is empty string or unspecified format.
1120 IOError
1121 Requested file format not supported.
1123 Example
1124 -------
1125 ```
1126 import numpy as np
1127 from thunderlab.datawriter import write_data
1129 rate = 28000.0
1130 freq = 800.0
1131 time = np.arange(0.0, 1.0, 1/rate) # one second
1132 data = 2.5*np.sin(2.0*np.p*freq*time) # 800Hz sine wave
1133 md = dict(Artist='underscore_') # metadata
1134 write_data('audio/file.npz', data, rate, 'mV', md)
1135 ```
1136 """
1137 if not filepath:
1138 raise ValueError('no file specified!')
1139 if not format:
1140 format = format_from_extension(filepath)
1141 if not format:
1142 raise ValueError('unspecified file format')
1143 for fmt, lib, formats_func in data_formats_funcs:
1144 if lib and not data_modules[lib]:
1145 continue
1146 if format.upper() in formats_func():
1147 writer_func = data_writer_funcs[fmt]
1148 filepath = writer_func(filepath, data, rate, amax,
1149 unit, metadata, locs, labels,
1150 format=format, encoding=encoding,
1151 **kwargs)
1152 if verbose > 0:
1153 print(f'wrote data to file "{filepath}" using {fmt} format')
1154 if verbose > 1:
1155 print(f' sampling rate: {rate:g}Hz')
1156 print(f' channels : {data.shape[1] if len(data.shape) > 1 else 1}')
1157 print(f' frames : {len(data)}')
1158 print(f' range : {amax:g}{unit}')
1159 return filepath
1160 raise IOError(f'file format "{format.upper()}" not supported.')
1163def demo(file_path, channels=2, format=None):
1164 """Demo of the datawriter functions.
1166 Parameters
1167 ----------
1168 file_path: str
1169 File path of a data file.
1170 format: str or None
1171 File format to be used.
1172 """
1173 print('generate data ...')
1174 rate = 44100.0
1175 t = np.arange(0.0, 1.0, 1.0/rate)
1176 data = np.zeros((len(t), channels))
1177 for c in range(channels):
1178 data[:,c] = 0.1*(channels-c)*np.sin(2.0*np.pi*(440.0+c*8.0)*t)
1180 print(f"write_data('{file_path}') ...")
1181 write_data(file_path, data, rate, 1.0, 'mV', format=format, verbose=2)
1183 print('done.')
1186def main(*cargs):
1187 """Call demo with command line arguments.
1189 Parameters
1190 ----------
1191 cargs: list of str
1192 Command line arguments as provided by sys.argv[1:]
1193 """
1194 import argparse
1195 parser = argparse.ArgumentParser(description=
1196 'Checking thunderlab.datawriter module.')
1197 parser.add_argument('-c', dest='channels', default=2, type=int,
1198 help='number of channels to be written')
1199 parser.add_argument('-f', dest='format', default=None, type=str,
1200 help='file format')
1201 parser.add_argument('file', nargs=1, default='test.npz', type=str,
1202 help='name of data file')
1203 args = parser.parse_args(cargs)
1204 demo(args.file[0], args.channels, args.format)
1207if __name__ == "__main__":
1208 main(*sys.argv[1:])