Coverage for src / thunderlab / datawriter.py: 87%
515 statements
« prev ^ index » next coverage.py v7.13.4, created at 2026-02-10 21:21 +0000
« prev ^ index » next coverage.py v7.13.4, created at 2026-02-10 21:21 +0000
1"""Writing numpy arrays of floats to data files.
3- `write_data()`: write data into a file.
4- `available_formats()`: supported data and audio file formats.
5- `available_encodings()`: encodings of a data file format.
6- `format_from_extension()`: deduce data file format from file extension.
7- `recode_array()`: recode array of floats.
8- `insert_container_metadata()`: insert flattened metadata to data dictionary for a container file format.
9"""
11import sys
12import datetime as dt
14from pathlib import Path
15from copy import deepcopy
16from audioio import find_key, add_metadata, move_metadata
17from audioio import get_datetime, default_gain_keys
19data_modules = {}
20"""Dictionary with availability of various modules needed for writing data.
21Keys are the module names, values are booleans.
22"""
24try:
25 import pickle
26 data_modules['pickle'] = True
27except ImportError:
28 data_modules['pickle'] = False
30try:
31 import numpy as np
32 data_modules['numpy'] = True
33except ImportError:
34 data_modules['numpy'] = False
36try:
37 import scipy.io as sio
38 data_modules['scipy'] = True
39except ImportError:
40 data_modules['scipy'] = False
42try:
43 import audioio.audiowriter as aw
44 import audioio.audiometadata as am
45 from audioio import write_metadata_text, flatten_metadata
46 data_modules['audioio'] = True
47except ImportError:
48 data_modules['audioio'] = False
51def format_from_extension(filepath):
52 """Deduce data file format from file extension.
54 Parameters
55 ----------
56 filepath: str or Path or None
57 Path and name of the data file.
59 Returns
60 -------
61 format: str
62 Data format deduced from file extension.
63 """
64 if filepath is None:
65 return None
66 filepath = Path(filepath)
67 ext = filepath.suffix
68 if not ext:
69 return None
70 if ext[0] == '.':
71 ext = ext[1:]
72 if not ext:
73 return None
74 ext = ext.upper()
75 if ext == 'SCANDAT':
76 return 'RAW'
77 if data_modules['audioio']:
78 ext = aw.format_from_extension(filepath)
79 return ext
82def recode_array(data, amax, encoding):
83 """Recode array of floats.
85 Parameters
86 ----------
87 data: array of floats
88 Data array with values ranging between -1 and 1
89 amax: float
90 Maximum amplitude of data range.
91 encoding: str
92 Encoding, one of PCM_16, PCM_32, PCM_64, FLOAT or DOUBLE.
94 Returns
95 -------
96 buffer: array
97 The data recoded according to `encoding`.
98 """
100 encodings = {'PCM_16': (2, 'i2'),
101 'PCM_32': (4, 'i4'),
102 'PCM_64': (8, 'i8'),
103 'FLOAT': (4, 'f'),
104 'DOUBLE': (8, 'd')}
106 if not encoding in encodings:
107 return data
108 dtype = encodings[encoding][1]
109 if dtype[0] == 'i':
110 sampwidth = encodings[encoding][0]
111 factor = 2**(sampwidth*8-1)
112 buffer = np.round(data/amax*factor).astype(dtype)
113 buffer[data >= +amax] = factor - 1
114 buffer[data <= -amax] = -(factor - 1)
115 else:
116 buffer = data.astype(dtype, copy=False)
117 return buffer
120def formats_relacs():
121 """Data format of the relacs file format.
123 Returns
124 -------
125 formats: list of str
126 List of supported file formats as strings.
127 """
128 return ['RELACS']
131def encodings_relacs(format=None):
132 """Encodings of the relacs file format.
134 Parameters
135 ----------
136 format: str
137 The file format.
139 Returns
140 -------
141 encodings: list of str
142 List of supported encodings as strings.
143 """
144 if not format:
145 format = 'RELACS'
146 if format.upper() != 'RELACS':
147 return []
148 else:
149 return ['FLOAT']
152def write_relacs(filepath, data, rate, amax=1.0, unit=None,
153 metadata=None, locs=None, labels=None, format=None,
154 encoding=None):
155 """Write data as relacs raw files.
157 Parameters
158 ----------
159 filepath: str or Path
160 Full path of folder where to write relacs files.
161 data: 1-D or 2-D array of floats
162 Array with the data (first index time, optional second index channel).
163 rate: float
164 Sampling rate of the data in Hertz.
165 amax: float
166 Maximum possible amplitude of the data in `unit`.
167 unit: str
168 Unit of the data.
169 metadata: nested dict
170 Additional metadata saved into `info.dat`.
171 locs: None or 1-D or 2-D array of ints
172 Marker positions (first column) and spans (optional second column)
173 for each marker (rows).
174 labels: None or 2-D array of string objects
175 Labels (first column) and texts (optional second column)
176 for each marker (rows).
177 format: str or None
178 File format, only None or 'RELACS' are supported.
179 encoding: str or None
180 Encoding of the data. Only None or 'FLOAT' are supported.
182 Returns
183 -------
184 filepath: Path
185 The actual folder used for writing the data.
187 Raises
188 ------
189 ValueError
190 File format or encoding not supported.
191 """
192 if format is None:
193 format = 'RELACS'
194 if format.upper() != 'RELACS':
195 raise ValueError(f'file format {format} not supported by relacs file format')
196 if encoding is None:
197 encoding = 'FLOAT'
198 if encoding.upper() != 'FLOAT':
199 raise ValueError(f'file encoding {encoding} not supported by relacs file format')
200 filepath = Path(filepath)
201 if not filepath.exists():
202 filepath.mkdir()
203 # write data:
204 if data.ndim == 1:
205 with open(filepath / f'trace-1.raw', 'wb') as df:
206 df.write(data.astype(np.float32).tobytes())
207 else:
208 for c in range(data.shape[1]):
209 with open(filepath / f'trace-{c+1}.raw', 'wb') as df:
210 df.write(data[:, c].astype(np.float32).tobytes())
211 if unit is None:
212 unit = 'V'
213 # write data format:
214 df = open(filepath / 'stimuli.dat', 'w')
215 df.write('# analog input traces:\n')
216 for c in range(data.shape[1] if data.ndim > 1 else 1):
217 df.write(f'# identifier{c+1} : V-{c+1}\n')
218 df.write(f'# data file{c+1} : trace-{{c+1}}.raw\n')
219 df.write(f'# sample interval{c+1} : {1000.0/rate:.4f}ms\n')
220 df.write(f'# sampling rate{c+1} : {rate:.2f}Hz\n')
221 df.write(f'# unit{c+1} : {unit}\n')
222 df.write('# event lists:\n')
223 df.write('# event file1: stimulus-events.dat\n')
224 df.write('# event file2: restart-events.dat\n')
225 df.write('# event file3: recording-events.dat\n')
226 df.close()
227 # write empty event files:
228 for events in ['Recording', 'Restart', 'Stimulus']:
229 df = open(filepath / f'{events.lower()}-events.dat', 'w')
230 df.write(f'# events: {events}\n\n')
231 df.write('#Key\n')
232 if events == 'Stimulus':
233 df.write('# t duration\n')
234 df.write('# sec s\n')
235 df.write('# 1 2\n')
236 else:
237 df.write('# t\n')
238 df.write('# sec\n')
239 df.write('# 1\n')
240 if events == 'Recording':
241 df.write(' 0.0\n')
242 df.close()
243 # write metadata:
244 if metadata:
245 write_metadata_text(filepath / 'info.dat',
246 metadata, prefix='# ')
247 return filepath
250def formats_fishgrid():
251 """Data format of the fishgrid file format.
253 Returns
254 -------
255 formats: list of str
256 List of supported file formats as strings.
257 """
258 return ['FISHGRID']
261def encodings_fishgrid(format=None):
262 """Encodings of the fishgrid file format.
264 Parameters
265 ----------
266 format: str
267 The file format.
269 Returns
270 -------
271 encodings: list of str
272 List of supported encodings as strings.
273 """
274 if not format:
275 format = 'FISHGRID'
276 if format.upper() != 'FISHGRID':
277 return []
278 else:
279 return ['FLOAT']
282def write_fishgrid(filepath, data, rate, amax=1.0, unit=None,
283 metadata=None, locs=None, labels=None, format=None,
284 encoding=None):
285 """Write data as fishgrid raw files.
287 Parameters
288 ----------
289 filepath: str or Path
290 Full path of the folder where to write fishgrid files.
291 data: 1-D or 2-D array of floats
292 Array with the data (first index time, optional second index channel).
293 rate: float
294 Sampling rate of the data in Hertz.
295 amax: float
296 Maximum possible amplitude of the data in `unit`.
297 unit: str
298 Unit of the data.
299 metadata: nested dict
300 Additional metadata saved into the `fishgrid.cfg`.
301 locs: None or 1-D or 2-D array of ints
302 Marker positions (first column) and spans (optional second column)
303 for each marker (rows).
304 labels: None or 2-D array of string objects
305 Labels (first column) and texts (optional second column)
306 for each marker (rows).
307 format: str or None
308 File format, only None or 'FISHGRID' are supported.
309 encoding: str or None
310 Encoding of the data. Only None or 'FLOAT' are supported.
312 Returns
313 -------
314 filepath: Path
315 The actual folder used for writing the data.
317 Raises
318 ------
319 ValueError
320 File format or encoding not supported.
321 """
322 def write_timestamp(df, count, index, span, rate, starttime,
323 label, comment):
324 datetime = starttime + dt.timedelta(seconds=index/rate)
325 df.write(f' Num: {count}\n')
326 df.write(f' Index1: {index}\n')
327 #df.write(f' Index2: 0\n')
328 #df.write(f' Index3: 0\n')
329 #df.write(f' Index4: 0\n')
330 if span > 0:
331 df.write(f' Span1: {span}\n')
332 df.write(f' Date: {datetime.date().isoformat()}\n')
333 df.write(f' Time: {datetime.time().isoformat(timespec="seconds")}\n')
334 if label:
335 df.write(f' Label: {label}\n')
336 df.write(f'Comment: {comment}\n')
337 df.write('\n')
339 if format is None:
340 format = 'FISHGRID'
341 if format.upper() != 'FISHGRID':
342 raise ValueError(f'file format {format} not supported by fishgrid file format')
343 if encoding is None:
344 encoding = 'FLOAT'
345 if encoding.upper() != 'FLOAT':
346 raise ValueError(f'file encoding {encoding} not supported by fishgrid file format')
347 filepath = Path(filepath)
348 if not filepath.exists():
349 filepath.mkdir()
350 # write data:
351 with open(filepath / 'traces-grid1.raw', 'wb') as df:
352 df.write(data.astype(np.float32).tobytes())
353 # write metadata:
354 if unit is None:
355 unit = 'mV'
356 cfgfile = filepath / 'fishgrid.cfg'
357 nchannels = data.shape[1] if data.ndim > 1 else 1
358 ncols = int(np.ceil(np.sqrt(nchannels)))
359 nrows = int(np.ceil(nchannels/ncols))
360 if metadata is None:
361 metadata = {}
362 if 'FishGrid' in metadata:
363 md = {}
364 rmd = {}
365 for k in metadata:
366 if isinstance(metadata[k], dict):
367 md[k] = deepcopy(metadata[k])
368 else:
369 rmd[k] = metadata[k]
370 if len(rmd) > 0:
371 m, k = find_key(md, 'FishGrid.Recording')
372 if k in m:
373 m[k].update(rmd)
374 else:
375 m[k] = rmd
376 else:
377 smd = deepcopy(metadata)
378 gm = dict(Used1='true', Columns1=f'{ncols}', Rows1=f'{nrows}')
379 hm = {'DAQ board': dict()}
380 if not move_metadata(smd, hm, 'Amplifier'):
381 am = {}
382 move_metadata(smd, am, ['Amplifier.Name', 'AmplName'], 'AmplName')
383 move_metadata(smd, am, ['Amplifier.Model', 'AmplModel'], 'AmplModel')
384 move_metadata(smd, am, 'Amplifier.Type')
385 move_metadata(smd, am, 'Gain')
386 move_metadata(smd, am, 'HighpassCutoff')
387 move_metadata(smd, am, 'LowpassCutoff')
388 if len(am) > 0:
389 hm['Amplifier'] = am
390 md = dict(FishGrid={'Grid 1': gm, 'Hardware Settings': hm})
391 move_metadata(smd, md['FishGrid'], 'Recording')
392 gm = {}
393 starttime = get_datetime(smd, remove=True)
394 if not starttime is None:
395 gm['StartDate'] = starttime.date().isoformat()
396 gm['StartTime'] = starttime.time().isoformat(timespec='seconds')
397 move_metadata(smd, gm, 'Location')
398 move_metadata(smd, gm, 'Position')
399 move_metadata(smd, gm, 'WaterTemperature')
400 move_metadata(smd, gm, 'WaterConductivity')
401 move_metadata(smd, gm, 'WaterpH')
402 move_metadata(smd, gm, 'WaterOxygen')
403 move_metadata(smd, gm, 'Temperature')
404 move_metadata(smd, gm, 'Humidity')
405 move_metadata(smd, gm, 'Pressure')
406 move_metadata(smd, gm, 'Comment')
407 move_metadata(smd, gm, 'Experimenter')
408 if len(gm) > 0:
409 if not 'Recording' in md['FishGrid']:
410 md['FishGrid']['Recording'] = {}
411 md['FishGrid']['Recording'].update({'General': gm})
412 bm = {}
413 move_metadata(smd, bm, 'DataTime')
414 move_metadata(smd, bm, 'DataInterval')
415 move_metadata(smd, bm, 'BufferTime')
416 move_metadata(smd, bm, 'BufferInterval')
417 if len(bm) > 0:
418 if not 'Recording' in md['FishGrid']:
419 md['FishGrid']['Recording'] = {}
420 md['FishGrid']['Recording'].update({'Buffers and timing': bm})
421 if smd:
422 md['FishGrid']['Other'] = smd
423 add_metadata(md,
424 [f'FishGrid.Hardware Settings.DAQ board.AISampleRate={0.001*rate:.3f}kHz',
425 f'FishGrid.Hardware Settings.DAQ board.AIMaxVolt={amax:g}{unit}'])
426 with open(cfgfile, 'w') as df:
427 for k in md:
428 df.write(f'*{k}\n')
429 write_metadata_text(df, md[k], prefix=' ')
430 # write markers:
431 filename = filepath / 'timestamps.dat'
432 starttime = get_datetime(metadata, (('DateTimeOriginal',),
433 ('OriginationDate', 'OriginationTime'),
434 ('StartDate', 'StartTime'),
435 ('Location_Time',)),
436 default=dt.datetime.fromtimestamp(0, dt.timezone.utc))
437 with open(filename, 'w') as df:
438 count = 0
439 write_timestamp(df, count, 0, 0, rate, starttime,
440 '', 'begin of recording')
441 count += 1
442 if locs is not None:
443 for i in range(len(locs)):
444 label = ''
445 comment = ''
446 if labels is not None and len(labels) > i:
447 label = labels[i,0] if labels.ndim > 1 else labels[i]
448 comment = labels[i,1] if labels.ndim > 1 else ''
449 index = locs[i,0] if locs.ndim > 1 else locs[i]
450 span = locs[i,1] if locs.ndim > 1 else 0
451 write_timestamp(df, count, index*nchannels,
452 span*nchannels, rate,
453 starttime, label, comment)
454 count += 1
455 write_timestamp(df, count, len(data)*nchannels, 0, rate,
456 starttime, '', 'end of recording')
457 return filepath
460def formats_pickle():
461 """Data formats supported by pickle.dump().
463 Returns
464 -------
465 formats: list of str
466 List of supported file formats as strings.
467 """
468 if not data_modules['pickle']:
469 return []
470 else:
471 return ['PKL']
474def encodings_pickle(format=None):
475 """Encodings of the pickle format.
477 Parameters
478 ----------
479 format: str
480 The file format.
482 Returns
483 -------
484 encodings: list of str
485 List of supported encodings as strings.
486 """
487 if not format:
488 format = 'PKL'
489 if format.upper() != 'PKL':
490 return []
491 else:
492 return ['PCM_16', 'PCM_32', 'FLOAT', 'DOUBLE']
495def write_pickle(filepath, data, rate, amax=1.0, unit=None,
496 metadata=None, locs=None, labels=None, format=None,
497 encoding=None):
498 """Write data into python pickle file.
500 Documentation
501 -------------
502 https://docs.python.org/3/library/pickle.html
504 Parameters
505 ----------
506 filepath: str or Path
507 Full path and name of the file to write.
508 data: 1-D or 2-D array of floats
509 Array with the data (first index time, optional second index channel).
510 Stored under the key "data".
511 rate: float
512 Sampling rate of the data in Hertz.
513 Stored under the key "rate".
514 amax: float
515 Maximum possible amplitude of the data in `unit`.
516 Stored under the key "amax".
517 unit: str
518 Unit of the data.
519 Stored under the key "unit".
520 metadata: nested dict
521 Additional metadata saved into the pickle.
522 Stored under the key "metadata".
523 locs: None or 1-D or 2-D array of ints
524 Marker positions (first column) and spans (optional second column)
525 for each marker (rows).
526 labels: None or 2-D array of string objects
527 Labels (first column) and texts (optional second column)
528 for each marker (rows).
529 format: str or None
530 File format, only None or 'PKL' are supported.
531 encoding: str or None
532 Encoding of the data.
534 Returns
535 -------
536 filepath: Path
537 The actual file name used for writing the data.
539 Raises
540 ------
541 ImportError
542 The pickle module is not available.
543 ValueError
544 File format or encoding not supported.
545 """
546 if not data_modules['pickle']:
547 raise ImportError
548 if format is None:
549 format = 'PKL'
550 if format.upper() != 'PKL':
551 raise ValueError(f'file format {format} not supported by pickle file format')
552 filepath = Path(filepath)
553 ext = filepath.suffix
554 if len(ext) <= 1 or ext[1].upper() != 'P':
555 filepath = filepath.with_suffix('.pkl')
556 if encoding is None:
557 encoding = 'DOUBLE'
558 encoding = encoding.upper()
559 if not encoding in encodings_pickle(format):
560 raise ValueError(f'file encoding {encoding} not supported by pickle file format')
561 buffer = recode_array(data, amax, encoding)
562 ddict = dict(data=buffer, rate=rate)
563 ddict['amax'] = amax
564 if unit:
565 ddict['unit'] = unit
566 if metadata:
567 ddict['metadata'] = metadata
568 if locs is not None and len(locs) > 0:
569 if locs.ndim == 1:
570 ddict['positions'] = locs
571 else:
572 ddict['positions'] = locs[:,0]
573 if locs.shape[1] > 1:
574 ddict['spans'] = locs[:,1]
575 if labels is not None and len(labels) > 0:
576 if labels.ndim == 1:
577 ddict['labels'] = labels
578 else:
579 ddict['labels'] = labels[:,0]
580 if labels.shape[1] > 1:
581 ddict['descriptions'] = labels[:,1]
582 with open(filepath, 'wb') as df:
583 pickle.dump(ddict, df)
584 return filepath
587def insert_container_metadata(metadata, data_dict, metadatakey='metadata'):
588 """Insert flattened metadata to data dictionary for a container file format.
590 Parameters
591 ----------
592 metadata: nested dict
593 Nested dictionary with key-value pairs of the meta data.
594 data_dict: dict
595 Dictionary of the data items contained in the container to
596 which the metadata should be added.
597 metadatakey: str or list of str
598 Name of the variable holding the metadata.
599 """
600 fmeta = flatten_metadata(metadata, True, sep='__')
601 for k in list(fmeta):
602 fmeta[metadatakey + '__' + k] = fmeta.pop(k)
603 data_dict.update(fmeta)
606def formats_numpy():
607 """Data formats supported by numpy.savez().
609 Returns
610 -------
611 formats: list of str
612 List of supported file formats as strings.
613 """
614 if not data_modules['numpy']:
615 return []
616 else:
617 return ['NPZ']
620def encodings_numpy(format=None):
621 """Encodings of the numpy file format.
623 Parameters
624 ----------
625 format: str
626 The file format.
628 Returns
629 -------
630 encodings: list of str
631 List of supported encodings as strings.
632 """
633 if not format:
634 format = 'NPZ'
635 if format.upper() != 'NPZ':
636 return []
637 else:
638 return ['PCM_16', 'PCM_32', 'FLOAT', 'DOUBLE']
641def write_numpy(filepath, data, rate, amax=1.0, unit=None,
642 metadata=None, locs=None, labels=None, format=None,
643 encoding=None):
644 """Write data into numpy npz file.
646 Documentation
647 -------------
648 https://numpy.org/doc/stable/reference/generated/numpy.savez.html
650 Parameters
651 ----------
652 filepath: str or Path
653 Full path and name of the file to write.
654 data: 1-D or 2-D array of floats
655 Array with the data (first index time, optional second index channel).
656 Stored under the key "data".
657 rate: float
658 Sampling rate of the data in Hertz.
659 Stored under the key "rate".
660 amax: float
661 Maximum possible amplitude of the data in `unit`.
662 Stored under the key "amax".
663 unit: str
664 Unit of the data.
665 Stored under the key "unit".
666 metadata: nested dict
667 Additional metadata saved into the numpy file.
668 Flattened dictionary entries stored under keys
669 starting with "metadata__".
670 locs: None or 1-D or 2-D array of ints
671 Marker positions (first column) and spans (optional second column)
672 for each marker (rows).
673 labels: None or 2-D array of string objects
674 Labels (first column) and texts (optional second column)
675 for each marker (rows).
676 format: str or None
677 File format, only None or 'NPZ' are supported.
678 encoding: str or None
679 Encoding of the data.
681 Returns
682 -------
683 filepath: Path
684 The actual file name used for writing the data.
686 Raises
687 ------
688 ImportError
689 The numpy module is not available.
690 ValueError
691 File format or encoding not supported.
692 """
693 if not data_modules['numpy']:
694 raise ImportError
695 if format is None:
696 format = 'NPZ'
697 if format.upper() not in formats_numpy():
698 raise ValueError(f'file format {format} not supported by numpy file format')
699 filepath = Path(filepath)
700 ext = filepath.suffix
701 if len(ext) <= 1 or ext[1].upper() != 'N':
702 filepath = filepath.with_suffix('.npz')
703 if encoding is None:
704 encoding = 'DOUBLE'
705 encoding = encoding.upper()
706 if not encoding in encodings_numpy(format):
707 raise ValueError(f'file encoding {encoding} not supported by numpy file format')
708 buffer = recode_array(data, amax, encoding)
709 ddict = dict(data=buffer, rate=rate)
710 ddict['amax'] = amax
711 if unit:
712 ddict['unit'] = unit
713 if metadata:
714 insert_container_metadata(metadata, ddict, 'metadata')
715 if locs is not None and len(locs) > 0:
716 if locs.ndim == 1:
717 ddict['positions'] = locs
718 else:
719 ddict['positions'] = locs[:,0]
720 if locs.shape[1] > 1:
721 ddict['spans'] = locs[:,1]
722 if labels is not None and len(labels) > 0:
723 if labels.ndim == 1:
724 maxc = np.max([len(l) for l in labels])
725 ddict['labels'] = labels.astype(dtype=f'U{maxc}')
726 else:
727 maxc = np.max([len(l) for l in labels[:,0]])
728 ddict['labels'] = labels[:,0].astype(dtype=f'U{maxc}')
729 if labels.shape[1] > 1:
730 maxc = np.max([len(l) for l in labels[:,1]])
731 ddict['descriptions'] = labels[:,1].astype(dtype=f'U{maxc}')
732 np.savez(filepath, **ddict)
733 return filepath
736def formats_mat():
737 """Data formats supported by scipy.io.savemat().
739 Returns
740 -------
741 formats: list of str
742 List of supported file formats as strings.
743 """
744 if not data_modules['scipy']:
745 return []
746 else:
747 return ['MAT']
750def encodings_mat(format=None):
751 """Encodings of the matlab format.
753 Parameters
754 ----------
755 format: str
756 The file format.
758 Returns
759 -------
760 encodings: list of str
761 List of supported encodings as strings.
762 """
763 if not format:
764 format = 'MAT'
765 if format.upper() != 'MAT':
766 return []
767 else:
768 return ['PCM_16', 'PCM_32', 'FLOAT', 'DOUBLE']
771def write_mat(filepath, data, rate, amax=1.0, unit=None,
772 metadata=None, locs=None, labels=None, format=None,
773 encoding=None):
774 """Write data into matlab file.
776 Documentation
777 -------------
778 https://docs.scipy.org/doc/scipy/reference/generated/scipy.io.savemat.html
780 Parameters
781 ----------
782 filepath: str or Path
783 Full path and name of the file to write.
784 data: 1-D or 2-D array of floats
785 Array with the data (first index time, optional second index channel).
786 Stored under the key "data".
787 rate: float
788 Sampling rate of the data in Hertz.
789 Stored under the key "rate".
790 amax: float
791 Maximum possible amplitude of the data in `unit`.
792 Stored under the key "amax".
793 unit: str
794 Unit of the data.
795 Stored under the key "unit".
796 metadata: nested dict
797 Additional metadata saved into the mat file.
798 Stored under the key "metadata".
799 locs: None or 1-D or 2-D array of ints
800 Marker positions (first column) and spans (optional second column)
801 for each marker (rows).
802 labels: None or 2-D array of string objects
803 Labels (first column) and texts (optional second column)
804 for each marker (rows).
805 format: str or None
806 File format, only None or 'MAT' are supported.
807 encoding: str or None
808 Encoding of the data.
810 Returns
811 -------
812 filepath: Path
813 The actual file name used for writing the data.
815 Raises
816 ------
817 ImportError
818 The scipy.io module is not available.
819 ValueError
820 File format or encoding not supported.
821 """
822 if not data_modules['scipy']:
823 raise ImportError
824 if format is None:
825 format = 'MAT'
826 if format.upper() not in formats_mat():
827 raise ValueError(f'file format {format} not supported by matlab file format')
828 filepath = Path(filepath)
829 ext = filepath.suffix
830 if len(ext) <= 1 or ext[1].upper() != 'M':
831 filepath = filepath.with_suffix('.mat')
832 if encoding is None:
833 encoding = 'DOUBLE'
834 encoding = encoding.upper()
835 if not encoding in encodings_mat(format):
836 raise ValueError(f'file encoding {encoding} not supported by matlab file format')
837 buffer = recode_array(data, amax, encoding)
838 ddict = dict(data=buffer, rate=rate)
839 ddict['amax'] = amax
840 if unit:
841 ddict['unit'] = unit
842 if metadata:
843 insert_container_metadata(metadata, ddict, 'metadata')
844 if locs is not None and len(locs) > 0:
845 if locs.ndim == 1:
846 ddict['positions'] = locs
847 else:
848 ddict['positions'] = locs[:,0]
849 if locs.shape[1] > 1:
850 ddict['spans'] = locs[:,1]
851 if labels is not None and len(labels) > 0:
852 if labels.ndim == 1:
853 maxc = np.max([len(l) for l in labels])
854 ddict['labels'] = labels.astype(dtype=f'U{maxc}')
855 else:
856 maxc = np.max([len(l) for l in labels[:,0]])
857 ddict['labels'] = labels[:,0].astype(dtype=f'U{maxc}')
858 if labels.shape[1] > 1:
859 maxc = np.max([len(l) for l in labels[:,1]])
860 ddict['descriptions'] = labels[:,1].astype(dtype=f'U{maxc}')
861 sio.savemat(filepath, ddict)
862 return filepath
865def formats_raw():
866 """Data formats supported as raw formats.
868 Returns
869 -------
870 formats: list of str
871 List of supported file formats as strings.
872 """
873 return ['RAW']
876def encodings_raw(format=None):
877 """Encodings supported for raw file formats.
879 Parameters
880 ----------
881 format: str
882 The file format.
884 Returns
885 -------
886 encodings: list of str
887 List of supported encodings as strings.
888 """
889 if not format:
890 format = 'RAW'
891 if format.upper() != 'RAW':
892 return []
893 else:
894 return ['PCM_16', 'PCM_32', 'FLOAT', 'DOUBLE']
897def write_raw(filepath, data, rate, amax=1.0, unit=None,
898 metadata=None, locs=None, labels=None, format=None,
899 encoding=None):
900 """Write data into raw file.
902 Writes just the data without sampling rate, metadata and markers.
904 Parameters
905 ----------
906 filepath: str or Path
907 Full path and name of the file to write.
908 data: 1-D or 2-D array of floats
909 Array with the data (first index time, optional second index channel).
910 rate: float
911 Sampling rate of the data in Hertz.
912 amax: float
913 Maximum possible amplitude of the data in `unit`.
914 unit: str
915 Unit of the data.
916 metadata: nested dict
917 Additional metadata saved into the mat file.
918 locs: None or 1-D or 2-D array of ints
919 Marker positions (first column) and spans (optional second column)
920 for each marker (rows).
921 labels: None or 2-D array of string objects
922 Labels (first column) and texts (optional second column)
923 for each marker (rows).
924 format: str or None
925 File format, only None or 'RAW' are supported.
926 encoding: str or None
927 Encoding of the data.
929 Returns
930 -------
931 filepath: Path
932 The actual file name used for writing the data.
934 Raises
935 ------
936 ValueError
937 File format or encoding not supported.
938 """
939 if format is None:
940 format = 'RAW'
941 if format.upper() not in formats_raw():
942 raise ValueError(f'file format {format} not supported by matlab file format')
943 filepath = Path(filepath)
944 ext = filepath.suffix
945 if len(ext) <= 1 or ext[1].upper() != 'R':
946 filepath = filepath.with_suffix('.raw')
947 if encoding is None:
948 encoding = 'DOUBLE'
949 encoding = encoding.upper()
950 if not encoding in encodings_raw(format):
951 raise ValueError(f'file encoding {encoding} not supported by raw file format')
952 buffer = recode_array(data, amax, encoding)
953 with open(filepath, 'wb') as df:
954 df.write(buffer.tobytes())
955 return filepath
958def formats_audioio():
959 """Data formats supported by audioio.
961 Returns
962 -------
963 formats: list of str
964 List of supported file formats as strings.
965 """
966 if not data_modules['audioio']:
967 return []
968 else:
969 return aw.available_formats()
972def encodings_audio(format):
973 """Encodings of any audio format.
975 Parameters
976 ----------
977 format: str
978 The file format.
980 Returns
981 -------
982 encodings: list of str
983 List of supported encodings as strings.
984 """
985 if not data_modules['audioio']:
986 return []
987 else:
988 return aw.available_encodings(format)
991def write_audioio(filepath, data, rate, amax=1.0, unit=None,
992 metadata=None, locs=None, labels=None, format=None,
993 encoding=None, gainkey=default_gain_keys, sep='.'):
994 """Write data into audio file.
996 If a gain setting is available in the metadata, then the data are divided
997 by the gain before they are stored in the audio file.
998 After this operation, the data values need to range between -1 and 1,
999 in particular if the data are encoded as integers
1000 (i.e. PCM_16, PCM_32 and PCM_64).
1001 Note, that this function does not check for this requirement!
1003 Documentation
1004 -------------
1005 https://bendalab.github.io/audioio/
1007 Parameters
1008 ----------
1009 filepath: str or Path
1010 Full path and name of the file to write.
1011 data: 1-D or 2-D array of floats
1012 Array with the data (first index time, optional second index channel).
1013 rate: float
1014 Sampling rate of the data in Hertz.
1015 amax: float
1016 Maximum possible amplitude of the data in `unit`.
1017 unit: str
1018 Unit of the data. If supplied and a gain is found in the metadata it
1019 has to match the unit of the gain. If no gain is found in the metadata
1020 and metadata is not None, then a gain of one with this unit is added
1021 to the metadata using the first key in `gainkey`.
1022 metadata: nested dict
1023 Metadata saved into the audio file. If it contains a gain,
1024 the gain factor is used to divide the data down into a
1025 range between -1 and 1.
1026 locs: None or 1-D or 2-D array of ints
1027 Marker positions (first column) and spans (optional second column)
1028 for each marker (rows).
1029 labels: None or 2-D array of string objects
1030 Labels (first column) and texts (optional second column)
1031 for each marker (rows).
1032 format: str or None
1033 File format. If None deduce file format from filepath.
1034 See `available_formats()` for possible values.
1035 encoding: str or None
1036 Encoding of the data. See `available_encodings()` for possible values.
1037 If None or empty string use 'PCM_16'.
1038 gainkey: str or list of str
1039 Key in the file's metadata that holds some gain information.
1040 If found, the data will be multiplied with the gain,
1041 and if available, the corresponding unit is returned.
1042 See the [audioio.get_gain()](https://bendalab.github.io/audioio/api/audiometadata.html#audioio.audiometadata.get_gain) function for details.
1043 sep: str
1044 String that separates section names in `gainkey`.
1046 Returns
1047 -------
1048 filepath: Path
1049 The actual file name used for writing the data.
1051 Raises
1052 ------
1053 ImportError
1054 The audioio module is not available.
1055 ValueError
1056 `unit` does not match gain in metadata.
1057 """
1058 if not data_modules['audioio']:
1059 raise ImportError
1060 if amax is None or not np.isfinite(amax):
1061 amax, u = am.get_gain(metadata, gainkey, sep, 1.0, 'a.u.')
1062 if not unit:
1063 unit = u
1064 elif unit != 'a.u.' and u != 'a.u.' and unit != u:
1065 raise ValueError(f'unit "{unit}" does not match gain unit "{u}" in metadata')
1066 if amax != 1.0:
1067 data = data / amax
1068 if metadata is None:
1069 metadata = {}
1070 if unit == 'a.u.':
1071 unit = ''
1072 if not isinstance(gainkey, (list, tuple, np.ndarray)):
1073 gainkey = [gainkey,]
1074 gainkey.append('Gain')
1075 for gk in gainkey:
1076 m, k = am.find_key(metadata, gk)
1077 if k in m:
1078 m[k] = f'{amax:g}{unit}'
1079 break
1080 else:
1081 if 'INFO' in metadata:
1082 metadata['INFO'][gainkey[0]] = f'{amax:g}{unit}'
1083 else:
1084 metadata[gainkey[0]] = f'{amax:g}{unit}'
1085 aw.write_audio(filepath, data, rate, metadata, locs, labels)
1086 return Path(filepath)
1089data_formats_funcs = (
1090 ('relacs', None, formats_relacs),
1091 ('fishgrid', None, formats_fishgrid),
1092 ('pickle', 'pickle', formats_pickle),
1093 ('numpy', 'numpy', formats_numpy),
1094 ('matlab', 'scipy', formats_mat),
1095 ('raw', None, formats_raw),
1096 ('audio', 'audioio', formats_audioio)
1097 )
1098"""List of implemented formats functions.
1100Each element of the list is a tuple with the format's name, the
1101module's name in `data_modules` or None, and the formats function.
1102"""
1105def available_formats():
1106 """Data and audio file formats supported by any of the installed modules.
1108 Returns
1109 -------
1110 formats: list of str
1111 List of supported file formats as strings.
1112 """
1113 formats = set()
1114 for fmt, lib, formats_func in data_formats_funcs:
1115 if not lib or data_modules[lib]:
1116 formats |= set(formats_func())
1117 return sorted(list(formats))
1120data_encodings_funcs = (
1121 ('relacs', encodings_relacs),
1122 ('fishgrid', encodings_fishgrid),
1123 ('pickle', encodings_pickle),
1124 ('numpy', encodings_numpy),
1125 ('matlab', encodings_mat),
1126 ('raw', encodings_raw),
1127 ('audio', encodings_audio)
1128 )
1129""" List of implemented encodings functions.
1131Each element of the list is a tuple with the module's name and the encodings function.
1132"""
1135def available_encodings(format):
1136 """Encodings of a data file format.
1138 Parameters
1139 ----------
1140 format: str
1141 The file format.
1143 Returns
1144 -------
1145 encodings: list of str
1146 List of supported encodings as strings.
1147 """
1148 for module, encodings_func in data_encodings_funcs:
1149 encs = encodings_func(format)
1150 if len(encs) > 0:
1151 return encs
1152 return []
1155data_writer_funcs = {
1156 'relacs': write_relacs,
1157 'fishgrid': write_fishgrid,
1158 'pickle': write_pickle,
1159 'numpy': write_numpy,
1160 'matlab': write_mat,
1161 'raw': write_raw,
1162 'audio': write_audioio
1163 }
1164"""Dictionary of implemented write functions.
1166Keys are the format's name and values the corresponding write
1167function.
1168"""
1171def write_data(filepath, data, rate, amax=1.0, unit=None,
1172 metadata=None, locs=None, labels=None, format=None,
1173 encoding=None, verbose=0, **kwargs):
1174 """Write data into a file.
1176 Parameters
1177 ----------
1178 filepath: str or Path
1179 Full path and name of the file to write.
1180 File format is determined from extension.
1181 data: 1-D or 2-D array of floats
1182 Array with the data (first index time, second index channel).
1183 rate: float
1184 Sampling rate of the data in Hertz.
1185 amax: float
1186 Maximum possible amplitude of the data in `unit`.
1187 unit: str
1188 Unit of the data.
1189 metadata: nested dict
1190 Additional metadata.
1191 locs: None or 1-D or 2-D array of ints
1192 Marker positions (first column) and spans (optional second column)
1193 for each marker (rows).
1194 labels: None or 2-D array of string objects
1195 Labels (first column) and texts (optional second column)
1196 for each marker (rows).
1197 format: str or None
1198 File format. If None deduce file format from filepath.
1199 See `available_formats()` for possible values.
1200 encoding: str or None
1201 Encoding of the data. See `available_encodings()` for possible values.
1202 If None or empty string use 'PCM_16'.
1203 verbose: int
1204 If >0 show detailed error/warning messages.
1205 kwargs: dict
1206 Additional, file format specific keyword arguments.
1208 Returns
1209 -------
1210 filepath: str or None
1211 On success, the actual file name used for writing the data.
1213 Raises
1214 ------
1215 ValueError
1216 Unspecified file format.
1217 IOError
1218 Requested file format not supported.
1220 Example
1221 -------
1222 ```
1223 import numpy as np
1224 from thunderlab.datawriter import write_data
1226 rate = 28000.0
1227 freq = 800.0
1228 time = np.arange(0.0, 1.0, 1/rate) # one second
1229 data = 2.5*np.sin(2.0*np.p*freq*time) # 800Hz sine wave
1230 md = dict(Artist='underscore_') # metadata
1231 write_data('audio/file.npz', data, rate, 'mV', md)
1232 ```
1233 """
1234 if not format:
1235 format = format_from_extension(filepath)
1236 if not format:
1237 raise ValueError('unspecified file format')
1238 for fmt, lib, formats_func in data_formats_funcs:
1239 if lib and not data_modules[lib]:
1240 continue
1241 if format.upper() in formats_func():
1242 writer_func = data_writer_funcs[fmt]
1243 filepath = writer_func(filepath, data, rate, amax,
1244 unit, metadata, locs, labels,
1245 format=format, encoding=encoding,
1246 **kwargs)
1247 if verbose > 0:
1248 print(f'wrote data to file "{filepath}" using {fmt} format')
1249 if verbose > 1:
1250 print(f' sampling rate: {rate:g}Hz')
1251 print(f' channels : {data.shape[1] if len(data.shape) > 1 else 1}')
1252 print(f' frames : {len(data)}')
1253 print(f' range : {amax:g}{unit}')
1254 return filepath
1255 raise IOError(f'file format "{format.upper()}" not supported.')
1258def demo(file_path, channels=2, format=None):
1259 """Demo of the datawriter functions.
1261 Parameters
1262 ----------
1263 file_path: str
1264 File path of a data file.
1265 format: str or None
1266 File format to be used.
1267 """
1268 print('generate data ...')
1269 rate = 44100.0
1270 t = np.arange(0.0, 1.0, 1.0/rate)
1271 data = np.zeros((len(t), channels))
1272 for c in range(channels):
1273 data[:,c] = 0.1*(channels-c)*np.sin(2.0*np.pi*(440.0+c*8.0)*t)
1275 print(f"write_data('{file_path}') ...")
1276 write_data(file_path, data, rate, 1.0, 'mV', format=format, verbose=2)
1278 print('done.')
1281def main(*cargs):
1282 """Call demo with command line arguments.
1284 Parameters
1285 ----------
1286 cargs: list of str
1287 Command line arguments as provided by sys.argv[1:]
1288 """
1289 import argparse
1290 parser = argparse.ArgumentParser(description=
1291 'Checking thunderlab.datawriter module.')
1292 parser.add_argument('-c', dest='channels', default=2, type=int,
1293 help='number of channels to be written')
1294 parser.add_argument('-f', dest='format', default=None, type=str,
1295 help='file format')
1296 parser.add_argument('file', nargs=1, default='test.npz', type=str,
1297 help='name of data file')
1298 args = parser.parse_args(cargs)
1299 demo(args.file[0], args.channels, args.format)
1302if __name__ == "__main__":
1303 main(*sys.argv[1:])