Coverage for src/thunderlab/datawriter.py: 87%
494 statements
« prev ^ index » next coverage.py v7.6.8, created at 2024-11-29 17:59 +0000
« prev ^ index » next coverage.py v7.6.8, created at 2024-11-29 17:59 +0000
1"""Writing numpy arrays of floats to data files.
3- `write_data()`: write data into a file.
4- `available_formats()`: supported data and audio file formats.
5- `available_encodings()`: encodings of a data file format.
6- `format_from_extension()`: deduce data file format from file extension.
7- `recode_array()`: recode array of floats.
8- `insert_container_metadata()`: insert flattened metadata to data dictionary for a container file format.
9"""
11import os
12import sys
13import datetime as dt
14from copy import deepcopy
15from audioio import find_key, add_metadata, move_metadata
16from audioio import get_datetime, default_gain_keys
18data_modules = {}
19"""Dictionary with availability of various modules needed for writing data.
20Keys are the module names, values are booleans.
21"""
23try:
24 import pickle
25 data_modules['pickle'] = True
26except ImportError:
27 data_modules['pickle'] = False
29try:
30 import numpy as np
31 data_modules['numpy'] = True
32except ImportError:
33 data_modules['numpy'] = False
35try:
36 import scipy.io as sio
37 data_modules['scipy'] = True
38except ImportError:
39 data_modules['scipy'] = False
41try:
42 import audioio.audiowriter as aw
43 import audioio.audiometadata as am
44 from audioio import write_metadata_text, flatten_metadata
45 data_modules['audioio'] = True
46except ImportError:
47 data_modules['audioio'] = False
50def format_from_extension(filepath):
51 """Deduce data file format from file extension.
53 Parameters
54 ----------
55 filepath: str
56 Name of the data file.
58 Returns
59 -------
60 format: str
61 Data format deduced from file extension.
62 """
63 if not filepath:
64 return None
65 ext = os.path.splitext(filepath)[1]
66 if not ext:
67 return None
68 if ext[0] == '.':
69 ext = ext[1:]
70 if not ext:
71 return None
72 ext = ext.upper()
73 if data_modules['audioio']:
74 ext = aw.format_from_extension(filepath)
75 return ext
78def recode_array(data, amax, encoding):
79 """Recode array of floats.
81 Parameters
82 ----------
83 data: array of floats
84 Data array with values ranging between -1 and 1
85 amax: float
86 Maximum amplitude of data range.
87 encoding: str
88 Encoding, one of PCM_16, PCM_32, PCM_64, FLOAT or DOUBLE.
90 Returns
91 -------
92 buffer: array
93 The data recoded according to `encoding`.
94 """
96 encodings = {'PCM_16': (2, 'i2'),
97 'PCM_32': (4, 'i4'),
98 'PCM_64': (8, 'i8'),
99 'FLOAT': (4, 'f'),
100 'DOUBLE': (8, 'd')}
102 if not encoding in encodings:
103 return data
104 dtype = encodings[encoding][1]
105 if dtype[0] == 'i':
106 sampwidth = encodings[encoding][0]
107 factor = 2**(sampwidth*8-1)
108 buffer = np.round(data/amax*factor).astype(dtype)
109 buffer[data >= +amax] = factor - 1
110 buffer[data <= -amax] = -(factor - 1)
111 else:
112 buffer = data.astype(dtype, copy=False)
113 return buffer
116def formats_relacs():
117 """Data format of the relacs file format.
119 Returns
120 -------
121 formats: list of str
122 List of supported file formats as strings.
123 """
124 return ['RELACS']
127def encodings_relacs(format=None):
128 """Encodings of the relacs file format.
130 Parameters
131 ----------
132 format: str
133 The file format.
135 Returns
136 -------
137 encodings: list of str
138 List of supported encodings as strings.
139 """
140 if not format:
141 format = 'RELACS'
142 if format.upper() != 'RELACS':
143 return []
144 else:
145 return ['FLOAT']
148def write_relacs(filepath, data, rate, amax=1.0, unit=None,
149 metadata=None, locs=None, labels=None, format=None,
150 encoding=None):
151 """Write data as relacs raw files.
153 Parameters
154 ----------
155 filepath: str
156 Full path of folder where to write relacs files.
157 data: 1-D or 2-D array of floats
158 Array with the data (first index time, optional second index channel).
159 rate: float
160 Sampling rate of the data in Hertz.
161 amax: float
162 Maximum possible amplitude of the data in `unit`.
163 unit: str
164 Unit of the data.
165 metadata: nested dict
166 Additional metadata saved into `info.dat`.
167 locs: None or 1-D or 2-D array of ints
168 Marker positions (first column) and spans (optional second column)
169 for each marker (rows).
170 labels: None or 2-D array of string objects
171 Labels (first column) and texts (optional second column)
172 for each marker (rows).
173 format: str or None
174 File format, only None or 'RELACS' are supported.
175 encoding: str or None
176 Encoding of the data. Only None or 'FLOAT' are supported.
178 Returns
179 -------
180 filepath: str or None
181 On success, the actual file name used for writing the data.
183 Raises
184 ------
185 ValueError
186 Invalid `filepath`.
187 ValueError
188 File format or encoding not supported.
189 """
190 if not filepath:
191 raise ValueError('no file specified!')
192 if format is None:
193 format = 'RELACS'
194 if format.upper() != 'RELACS':
195 raise ValueError(f'file format {format} not supported by relacs file format')
196 if encoding is None:
197 encoding = 'FLOAT'
198 if encoding.upper() != 'FLOAT':
199 raise ValueError(f'file encoding {format} not supported by relacs file format')
200 if not os.path.exists(filepath):
201 os.mkdir(filepath)
202 # write data:
203 if data.ndim == 1:
204 with open(os.path.join(filepath, f'trace-1.raw'), 'wb') as df:
205 df.write(data.astype(np.float32).tobytes())
206 else:
207 for c in range(data.shape[1]):
208 with open(os.path.join(filepath, f'trace-{c+1}.raw'), 'wb') as df:
209 df.write(data[:,c].astype(np.float32).tobytes())
210 if unit is None:
211 unit = 'V'
212 # write data format:
213 filename = os.path.join(filepath, 'stimuli.dat')
214 df = open(filename, 'w')
215 df.write('# analog input traces:\n')
216 for c in range(data.shape[1] if data.ndim > 1 else 1):
217 df.write(f'# identifier{c+1} : V-{c+1}\n')
218 df.write(f'# data file{c+1} : trace-{ c+1} .raw\n')
219 df.write(f'# sample interval{c+1} : {1000.0/rate:.4f}ms\n')
220 df.write(f'# sampling rate{c+1} : {rate:.2f}Hz\n')
221 df.write(f'# unit{c+1} : {unit}\n')
222 df.write('# event lists:\n')
223 df.write('# event file1: stimulus-events.dat\n')
224 df.write('# event file2: restart-events.dat\n')
225 df.write('# event file3: recording-events.dat\n')
226 df.close()
227 # write empty event files:
228 for events in ['Recording', 'Restart', 'Stimulus']:
229 df = open(os.path.join(filepath, f'{events.lower()}-events.dat'), 'w')
230 df.write(f'# events: {events}\n\n')
231 df.write('#Key\n')
232 if events == 'Stimulus':
233 df.write('# t duration\n')
234 df.write('# sec s\n')
235 df.write('# 1 2\n')
236 else:
237 df.write('# t\n')
238 df.write('# sec\n')
239 df.write('# 1\n')
240 if events == 'Recording':
241 df.write(' 0.0\n')
242 df.close()
243 # write metadata:
244 if metadata:
245 write_metadata_text(os.path.join(filepath, 'info.dat'),
246 metadata, prefix='# ')
247 return filename
250def formats_fishgrid():
251 """Data format of the fishgrid file format.
253 Returns
254 -------
255 formats: list of str
256 List of supported file formats as strings.
257 """
258 return ['FISHGRID']
261def encodings_fishgrid(format=None):
262 """Encodings of the fishgrid file format.
264 Parameters
265 ----------
266 format: str
267 The file format.
269 Returns
270 -------
271 encodings: list of str
272 List of supported encodings as strings.
273 """
274 if not format:
275 format = 'FISHGRID'
276 if format.upper() != 'FISHGRID':
277 return []
278 else:
279 return ['FLOAT']
282def write_fishgrid(filepath, data, rate, amax=1.0, unit=None,
283 metadata=None, locs=None, labels=None, format=None,
284 encoding=None):
285 """Write data as fishgrid raw files.
287 Parameters
288 ----------
289 filepath: str
290 Full path of the folder where to write fishgrid files.
291 data: 1-D or 2-D array of floats
292 Array with the data (first index time, optional second index channel).
293 rate: float
294 Sampling rate of the data in Hertz.
295 amax: float
296 Maximum possible amplitude of the data in `unit`.
297 unit: str
298 Unit of the data.
299 metadata: nested dict
300 Additional metadata saved into the `fishgrid.cfg`.
301 locs: None or 1-D or 2-D array of ints
302 Marker positions (first column) and spans (optional second column)
303 for each marker (rows).
304 labels: None or 2-D array of string objects
305 Labels (first column) and texts (optional second column)
306 for each marker (rows).
307 format: str or None
308 File format, only None or 'FISHGRID' are supported.
309 encoding: str or None
310 Encoding of the data. Only None or 'FLOAT' are supported.
312 Returns
313 -------
314 filepath: str or None
315 On success, the actual file name used for writing the data.
317 Raises
318 ------
319 ValueError
320 Invalid `filepath`.
321 ValueError
322 File format or encoding not supported.
323 """
324 def write_timestamp(df, count, index, span, rate, starttime,
325 label, comment):
326 datetime = starttime + dt.timedelta(seconds=index/rate)
327 df.write(f' Num: {count}\n')
328 df.write(f' Index1: {index}\n')
329 #df.write(f' Index2: 0\n')
330 #df.write(f' Index3: 0\n')
331 #df.write(f' Index4: 0\n')
332 if span > 0:
333 df.write(f' Span1: {span}\n')
334 df.write(f' Date: {datetime.date().isoformat()}\n')
335 df.write(f' Time: {datetime.time().isoformat(timespec="seconds")}\n')
336 if label:
337 df.write(f' Label: {label}\n')
338 df.write(f'Comment: {comment}\n')
339 df.write('\n')
341 if not filepath:
342 raise ValueError('no file specified!')
343 if format is None:
344 format = 'FISHGRID'
345 if format.upper() != 'FISHGRID':
346 raise ValueError(f'file format {format} not supported by fishgrid file format')
347 if encoding is None:
348 encoding = 'FLOAT'
349 if encoding.upper() != 'FLOAT':
350 raise ValueError(f'file encoding {format} not supported by fishgrid file format')
351 if not os.path.exists(filepath):
352 os.mkdir(filepath)
353 # write data:
354 with open(os.path.join(filepath, 'traces-grid1.raw'), 'wb') as df:
355 df.write(data.astype(np.float32).tobytes())
356 # write metadata:
357 if unit is None:
358 unit = 'mV'
359 cfgfilename = os.path.join(filepath, 'fishgrid.cfg')
360 nchannels = data.shape[1] if data.ndim > 1 else 1
361 ncols = int(np.ceil(np.sqrt(nchannels)))
362 nrows = int(np.ceil(nchannels/ncols))
363 if 'FishGrid' in metadata:
364 md = {}
365 rmd = {}
366 for k in metadata:
367 if isinstance(metadata[k], dict):
368 md[k] = deepcopy(metadata[k])
369 else:
370 rmd[k] = metadata[k]
371 if len(rmd) > 0:
372 m, k = find_key(md, 'FishGrid.Recording')
373 if k in m:
374 m[k].update(rmd)
375 else:
376 m[k] = rmd
377 else:
378 smd = deepcopy(metadata)
379 gm = dict(Used1='true', Columns1=f'{ncols}', Rows1=f'{nrows}')
380 hm = {'DAQ board': dict()}
381 if not move_metadata(smd, hm, 'Amplifier'):
382 am = {}
383 move_metadata(smd, am, ['Amplifier.Name', 'AmplName'], 'AmplName')
384 move_metadata(smd, am, ['Amplifier.Model', 'AmplModel'], 'AmplModel')
385 move_metadata(smd, am, 'Amplifier.Type')
386 move_metadata(smd, am, 'Gain')
387 move_metadata(smd, am, 'HighpassCutoff')
388 move_metadata(smd, am, 'LowpassCutoff')
389 if len(am) > 0:
390 hm['Amplifier'] = am
391 md = dict(FishGrid={'Grid 1': gm, 'Hardware Settings': hm})
392 move_metadata(smd, md['FishGrid'], 'Recording')
393 gm = {}
394 starttime = get_datetime(smd, remove=True)
395 if not starttime is None:
396 gm['StartDate'] = starttime.date().isoformat()
397 gm['StartTime'] = starttime.time().isoformat(timespec='seconds')
398 move_metadata(smd, gm, 'Location')
399 move_metadata(smd, gm, 'Position')
400 move_metadata(smd, gm, 'WaterTemperature')
401 move_metadata(smd, gm, 'WaterConductivity')
402 move_metadata(smd, gm, 'WaterpH')
403 move_metadata(smd, gm, 'WaterOxygen')
404 move_metadata(smd, gm, 'Temperature')
405 move_metadata(smd, gm, 'Humidity')
406 move_metadata(smd, gm, 'Pressure')
407 move_metadata(smd, gm, 'Comment')
408 move_metadata(smd, gm, 'Experimenter')
409 if len(gm) > 0:
410 if not 'Recording' in md['FishGrid']:
411 md['FishGrid']['Recording'] = {}
412 md['FishGrid']['Recording'].update({'General': gm})
413 bm = {}
414 move_metadata(smd, bm, 'DataTime')
415 move_metadata(smd, bm, 'DataInterval')
416 move_metadata(smd, bm, 'BufferTime')
417 move_metadata(smd, bm, 'BufferInterval')
418 if len(bm) > 0:
419 if not 'Recording' in md['FishGrid']:
420 md['FishGrid']['Recording'] = {}
421 md['FishGrid']['Recording'].update({'Buffers and timing': bm})
422 if smd:
423 md['FishGrid']['Other'] = smd
424 add_metadata(md,
425 [f'FishGrid.Hardware Settings.DAQ board.AISampleRate={0.001*rate:.3f}kHz',
426 f'FishGrid.Hardware Settings.DAQ board.AIMaxVolt={amax:g}{unit}'])
427 with open(cfgfilename, 'w') as df:
428 for k in md:
429 df.write(f'*{k}\n')
430 write_metadata_text(df, md[k], prefix=' ')
431 # write markers:
432 filename = os.path.join(filepath, 'timestamps.dat')
433 starttime = get_datetime(metadata, (('DateTimeOriginal',),
434 ('OriginationDate', 'OriginationTime'),
435 ('StartDate', 'StartTime'),
436 ('Location_Time',)),
437 default=dt.datetime.fromtimestamp(0, dt.timezone.utc))
438 with open(filename, 'w') as df:
439 count = 0
440 write_timestamp(df, count, 0, 0, rate, starttime,
441 '', 'begin of recording')
442 count += 1
443 if locs is not None:
444 for i in range(len(locs)):
445 label = ''
446 comment = ''
447 if labels is not None and len(labels) > i:
448 label = labels[i,0] if labels.ndim > 1 else labels[i]
449 comment = labels[i,1] if labels.ndim > 1 else ''
450 index = locs[i,0] if locs.ndim > 1 else locs[i]
451 span = locs[i,1] if locs.ndim > 1 else 0
452 write_timestamp(df, count, index*nchannels,
453 span*nchannels, rate,
454 starttime, label, comment)
455 count += 1
456 write_timestamp(df, count, len(data)*nchannels, 0, rate,
457 starttime, '', 'end of recording')
458 return cfgfilename
461def formats_pickle():
462 """Data formats supported by pickle.dump().
464 Returns
465 -------
466 formats: list of str
467 List of supported file formats as strings.
468 """
469 if not data_modules['pickle']:
470 return []
471 else:
472 return ['PKL']
475def encodings_pickle(format=None):
476 """Encodings of the pickle format.
478 Parameters
479 ----------
480 format: str
481 The file format.
483 Returns
484 -------
485 encodings: list of str
486 List of supported encodings as strings.
487 """
488 if not format:
489 format = 'PKL'
490 if format.upper() != 'PKL':
491 return []
492 else:
493 return ['PCM_16', 'PCM_32', 'FLOAT', 'DOUBLE']
496def write_pickle(filepath, data, rate, amax=1.0, unit=None,
497 metadata=None, locs=None, labels=None, format=None,
498 encoding=None):
499 """Write data into python pickle file.
501 Documentation
502 -------------
503 https://docs.python.org/3/library/pickle.html
505 Parameters
506 ----------
507 filepath: str
508 Full path and name of the file to write.
509 data: 1-D or 2-D array of floats
510 Array with the data (first index time, optional second index channel).
511 Stored under the key "data".
512 rate: float
513 Sampling rate of the data in Hertz.
514 Stored under the key "rate".
515 amax: float
516 Maximum possible amplitude of the data in `unit`.
517 Stored under the key "amax".
518 unit: str
519 Unit of the data.
520 Stored under the key "unit".
521 metadata: nested dict
522 Additional metadata saved into the pickle.
523 Stored under the key "metadata".
524 locs: None or 1-D or 2-D array of ints
525 Marker positions (first column) and spans (optional second column)
526 for each marker (rows).
527 labels: None or 2-D array of string objects
528 Labels (first column) and texts (optional second column)
529 for each marker (rows).
530 format: str or None
531 File format, only None or 'PKL' are supported.
532 encoding: str or None
533 Encoding of the data.
535 Returns
536 -------
537 filepath: str or None
538 On success, the actual file name used for writing the data.
540 Raises
541 ------
542 ImportError
543 The pickle module is not available.
544 ValueError
545 Invalid `filepath`.
546 ValueError
547 File format or encoding not supported.
548 """
549 if not data_modules['pickle']:
550 raise ImportError
551 if not filepath:
552 raise ValueError('no file specified!')
553 if format is None:
554 format = 'PKL'
555 if format.upper() != 'PKL':
556 raise ValueError(f'file format {format} not supported by pickle file format')
557 ext = os.path.splitext(filepath)[1]
558 if len(ext) <= 1 or ext[1].upper() != 'P':
559 filepath += os.extsep + 'pkl'
560 if encoding is None:
561 encoding = 'DOUBLE'
562 encoding = encoding.upper()
563 if not encoding in encodings_pickle(format):
564 raise ValueError(f'file encoding {format} not supported by pickle file format')
565 buffer = recode_array(data, amax, encoding)
566 ddict = dict(data=buffer, rate=rate)
567 ddict['amax'] = amax
568 if unit:
569 ddict['unit'] = unit
570 if metadata:
571 ddict['metadata'] = metadata
572 if locs is not None and len(locs) > 0:
573 if locs.ndim == 1:
574 ddict['positions'] = locs
575 else:
576 ddict['positions'] = locs[:,0]
577 if locs.shape[1] > 1:
578 ddict['spans'] = locs[:,1]
579 if labels is not None and len(labels) > 0:
580 if labels.ndim == 1:
581 ddict['labels'] = labels
582 else:
583 ddict['labels'] = labels[:,0]
584 if labels.shape[1] > 1:
585 ddict['descriptions'] = labels[:,1]
586 with open(filepath, 'wb') as df:
587 pickle.dump(ddict, df)
588 return filepath
591def insert_container_metadata(metadata, data_dict, metadatakey='metadata'):
592 """Insert flattened metadata to data dictionary for a container file format.
594 Parameters
595 ----------
596 metadata: nested dict
597 Nested dictionary with key-value pairs of the meta data.
598 data_dict: dict
599 Dictionary of the data items contained in the container to
600 which the metadata should be added.
601 metadatakey: str or list of str
602 Name of the variable holding the metadata.
603 """
604 fmeta = flatten_metadata(metadata, True, sep='__')
605 for k in list(fmeta):
606 fmeta[metadatakey + '__' + k] = fmeta.pop(k)
607 data_dict.update(fmeta)
610def formats_numpy():
611 """Data formats supported by numpy.savez().
613 Returns
614 -------
615 formats: list of str
616 List of supported file formats as strings.
617 """
618 if not data_modules['numpy']:
619 return []
620 else:
621 return ['NPZ']
624def encodings_numpy(format=None):
625 """Encodings of the numpy file format.
627 Parameters
628 ----------
629 format: str
630 The file format.
632 Returns
633 -------
634 encodings: list of str
635 List of supported encodings as strings.
636 """
637 if not format:
638 format = 'NPZ'
639 if format.upper() != 'NPZ':
640 return []
641 else:
642 return ['PCM_16', 'PCM_32', 'FLOAT', 'DOUBLE']
645def write_numpy(filepath, data, rate, amax=1.0, unit=None,
646 metadata=None, locs=None, labels=None, format=None,
647 encoding=None):
648 """Write data into numpy npz file.
650 Documentation
651 -------------
652 https://numpy.org/doc/stable/reference/generated/numpy.savez.html
654 Parameters
655 ----------
656 filepath: str
657 Full path and name of the file to write.
658 data: 1-D or 2-D array of floats
659 Array with the data (first index time, optional second index channel).
660 Stored under the key "data".
661 rate: float
662 Sampling rate of the data in Hertz.
663 Stored under the key "rate".
664 amax: float
665 Maximum possible amplitude of the data in `unit`.
666 Stored under the key "amax".
667 unit: str
668 Unit of the data.
669 Stored under the key "unit".
670 metadata: nested dict
671 Additional metadata saved into the numpy file.
672 Flattened dictionary entries stored under keys
673 starting with "metadata__".
674 locs: None or 1-D or 2-D array of ints
675 Marker positions (first column) and spans (optional second column)
676 for each marker (rows).
677 labels: None or 2-D array of string objects
678 Labels (first column) and texts (optional second column)
679 for each marker (rows).
680 format: str or None
681 File format, only None or 'NPZ' are supported.
682 encoding: str or None
683 Encoding of the data.
685 Returns
686 -------
687 filepath: str or None
688 On success, the actual file name used for writing the data.
690 Raises
691 ------
692 ImportError
693 The numpy module is not available.
694 ValueError
695 Invalid `filepath`.
696 ValueError
697 File format or encoding not supported.
698 """
699 if not data_modules['numpy']:
700 raise ImportError
701 if not filepath:
702 raise ValueError('no file specified!')
703 if format is None:
704 format = 'NPZ'
705 if format.upper() not in formats_numpy():
706 raise ValueError(f'file format {format} not supported by numpy file format')
707 ext = os.path.splitext(filepath)[1]
708 if len(ext) <= 1 or ext[1].upper() != 'N':
709 filepath += os.extsep + 'npz'
710 if encoding is None:
711 encoding = 'DOUBLE'
712 encoding = encoding.upper()
713 if not encoding in encodings_numpy(format):
714 raise ValueError(f'file encoding {format} not supported by numpy file format')
715 buffer = recode_array(data, amax, encoding)
716 ddict = dict(data=buffer, rate=rate)
717 ddict['amax'] = amax
718 if unit:
719 ddict['unit'] = unit
720 if metadata:
721 insert_container_metadata(metadata, ddict, 'metadata')
722 if locs is not None and len(locs) > 0:
723 if locs.ndim == 1:
724 ddict['positions'] = locs
725 else:
726 ddict['positions'] = locs[:,0]
727 if locs.shape[1] > 1:
728 ddict['spans'] = locs[:,1]
729 if labels is not None and len(labels) > 0:
730 if labels.ndim == 1:
731 maxc = np.max([len(l) for l in labels])
732 ddict['labels'] = labels.astype(dtype=f'U{maxc}')
733 else:
734 maxc = np.max([len(l) for l in labels[:,0]])
735 ddict['labels'] = labels[:,0].astype(dtype=f'U{maxc}')
736 if labels.shape[1] > 1:
737 maxc = np.max([len(l) for l in labels[:,1]])
738 ddict['descriptions'] = labels[:,1].astype(dtype=f'U{maxc}')
739 np.savez(filepath, **ddict)
740 return filepath
743def formats_mat():
744 """Data formats supported by scipy.io.savemat().
746 Returns
747 -------
748 formats: list of str
749 List of supported file formats as strings.
750 """
751 if not data_modules['scipy']:
752 return []
753 else:
754 return ['MAT']
757def encodings_mat(format=None):
758 """Encodings of the matlab format.
760 Parameters
761 ----------
762 format: str
763 The file format.
765 Returns
766 -------
767 encodings: list of str
768 List of supported encodings as strings.
769 """
770 if not format:
771 format = 'MAT'
772 if format.upper() != 'MAT':
773 return []
774 else:
775 return ['PCM_16', 'PCM_32', 'FLOAT', 'DOUBLE']
778def write_mat(filepath, data, rate, amax=1.0, unit=None,
779 metadata=None, locs=None, labels=None, format=None,
780 encoding=None):
781 """Write data into matlab file.
783 Documentation
784 -------------
785 https://docs.scipy.org/doc/scipy/reference/generated/scipy.io.savemat.html
787 Parameters
788 ----------
789 filepath: str
790 Full path and name of the file to write.
791 Stored under the key "data".
792 data: 1-D or 2-D array of floats
793 Array with the data (first index time, optional second index channel).
794 Stored under the key "data".
795 rate: float
796 Sampling rate of the data in Hertz.
797 Stored under the key "rate".
798 amax: float
799 Maximum possible amplitude of the data in `unit`.
800 Stored under the key "amax".
801 unit: str
802 Unit of the data.
803 Stored under the key "unit".
804 metadata: nested dict
805 Additional metadata saved into the mat file.
806 Stored under the key "metadata".
807 locs: None or 1-D or 2-D array of ints
808 Marker positions (first column) and spans (optional second column)
809 for each marker (rows).
810 labels: None or 2-D array of string objects
811 Labels (first column) and texts (optional second column)
812 for each marker (rows).
813 format: str or None
814 File format, only None or 'MAT' are supported.
815 encoding: str or None
816 Encoding of the data.
818 Returns
819 -------
820 filepath: str or None
821 On success, the actual file name used for writing the data.
823 Raises
824 ------
825 ImportError
826 The scipy.io module is not available.
827 ValueError
828 Invalid `filepath`.
829 ValueError
830 File format or encoding not supported.
831 """
832 if not data_modules['scipy']:
833 raise ImportError
834 if not filepath:
835 raise ValueError('no file specified!')
836 if format is None:
837 format = 'MAT'
838 if format.upper() not in formats_mat():
839 raise ValueError(f'file format {format} not supported by matlab file format')
840 ext = os.path.splitext(filepath)[1]
841 if len(ext) <= 1 or ext[1].upper() != 'M':
842 filepath += os.extsep + 'mat'
843 if encoding is None:
844 encoding = 'DOUBLE'
845 encoding = encoding.upper()
846 if not encoding in encodings_mat(format):
847 raise ValueError(f'file encoding {format} not supported by matlab file format')
848 buffer = recode_array(data, amax, encoding)
849 ddict = dict(data=buffer, rate=rate)
850 ddict['amax'] = amax
851 if unit:
852 ddict['unit'] = unit
853 if metadata:
854 insert_container_metadata(metadata, ddict, 'metadata')
855 if locs is not None and len(locs) > 0:
856 if locs.ndim == 1:
857 ddict['positions'] = locs
858 else:
859 ddict['positions'] = locs[:,0]
860 if locs.shape[1] > 1:
861 ddict['spans'] = locs[:,1]
862 if labels is not None and len(labels) > 0:
863 if labels.ndim == 1:
864 maxc = np.max([len(l) for l in labels])
865 ddict['labels'] = labels.astype(dtype=f'U{maxc}')
866 else:
867 maxc = np.max([len(l) for l in labels[:,0]])
868 ddict['labels'] = labels[:,0].astype(dtype=f'U{maxc}')
869 if labels.shape[1] > 1:
870 maxc = np.max([len(l) for l in labels[:,1]])
871 ddict['descriptions'] = labels[:,1].astype(dtype=f'U{maxc}')
872 sio.savemat(filepath, ddict)
873 return filepath
876def formats_audioio():
877 """Data formats supported by audioio.
879 Returns
880 -------
881 formats: list of str
882 List of supported file formats as strings.
883 """
884 if not data_modules['audioio']:
885 return []
886 else:
887 return aw.available_formats()
890def encodings_audio(format):
891 """Encodings of any audio format.
893 Parameters
894 ----------
895 format: str
896 The file format.
898 Returns
899 -------
900 encodings: list of str
901 List of supported encodings as strings.
902 """
903 if not data_modules['audioio']:
904 return []
905 else:
906 return aw.available_encodings(format)
909def write_audioio(filepath, data, rate, amax=1.0, unit=None,
910 metadata=None, locs=None, labels=None, format=None,
911 encoding=None, gainkey=default_gain_keys, sep='.'):
912 """Write data into audio file.
914 If a gain setting is available in the metadata, then the data are divided
915 by the gain before they are stored in the audio file.
916 After this operation, the data values need to range between -1 and 1,
917 in particular if the data are encoded as integers
918 (i.e. PCM_16, PCM_32 and PCM_64).
919 Note, that this function does not check for this requirement!
921 Documentation
922 -------------
923 https://bendalab.github.io/audioio/
925 Parameters
926 ----------
927 filepath: str
928 Full path and name of the file to write.
929 data: 1-D or 2-D array of floats
930 Array with the data (first index time, optional second index channel).
931 rate: float
932 Sampling rate of the data in Hertz.
933 amax: float
934 Maximum possible amplitude of the data in `unit`.
935 unit: str
936 Unit of the data. If supplied and a gain is found in the metadata it
937 has to match the unit of the gain. If no gain is found in the metadata
938 and metadata is not None, then a gain of one with this unit is added
939 to the metadata using the first key in `gainkey`.
940 metadata: nested dict
941 Metadata saved into the audio file. If it contains a gain,
942 the gain factor is used to divide the data down into a
943 range between -1 and 1.
944 locs: None or 1-D or 2-D array of ints
945 Marker positions (first column) and spans (optional second column)
946 for each marker (rows).
947 labels: None or 2-D array of string objects
948 Labels (first column) and texts (optional second column)
949 for each marker (rows).
950 format: str or None
951 File format. If None deduce file format from filepath.
952 See `available_formats()` for possible values.
953 encoding: str or None
954 Encoding of the data. See `available_encodings()` for possible values.
955 If None or empty string use 'PCM_16'.
956 gainkey: str or list of str
957 Key in the file's metadata that holds some gain information.
958 If found, the data will be multiplied with the gain,
959 and if available, the corresponding unit is returned.
960 See the [audioio.get_gain()](https://bendalab.github.io/audioio/api/audiometadata.html#audioio.audiometadata.get_gain) function for details.
961 sep: str
962 String that separates section names in `gainkey`.
964 Returns
965 -------
966 filepath: str or None
967 On success, the actual file name used for writing the data.
969 Raises
970 ------
971 ImportError
972 The audioio module is not available.
973 ValueError
974 Invalid `filepath` or `unit` does not match gain in metadata.
975 """
976 if not data_modules['audioio']:
977 raise ImportError
978 if not filepath:
979 raise ValueError('no file specified!')
980 if amax is None or not np.isfinite(amax):
981 amax, u = am.get_gain(metadata, gainkey, sep, 1.0, 'a.u.')
982 if not unit:
983 unit = u
984 elif unit != 'a.u.' and u != 'a.u.' and unit != u:
985 raise ValueError(f'unit "{unit}" does not match gain unit "{u}" in metadata')
986 if amax != 1.0:
987 data = data / amax
988 if metadata is None:
989 metadata = {}
990 if unit == 'a.u.':
991 unit = ''
992 if not isinstance(gainkey, (list, tuple, np.ndarray)):
993 gainkey = [gainkey,]
994 gainkey.append('Gain')
995 for gk in gainkey:
996 m, k = am.find_key(metadata, gk)
997 if k in m:
998 m[k] = f'{amax:g}{unit}'
999 break
1000 else:
1001 if 'INFO' in metadata:
1002 metadata['INFO'][gainkey[0]] = f'{amax:g}{unit}'
1003 else:
1004 metadata[gainkey[0]] = f'{amax:g}{unit}'
1005 aw.write_audio(filepath, data, rate, metadata, locs, labels)
1006 return filepath
1009data_formats_funcs = (
1010 ('relacs', None, formats_relacs),
1011 ('fishgrid', None, formats_fishgrid),
1012 ('pickle', 'pickle', formats_pickle),
1013 ('numpy', 'numpy', formats_numpy),
1014 ('matlab', 'scipy', formats_mat),
1015 ('audio', 'audioio', formats_audioio)
1016 )
1017"""List of implemented formats functions.
1019Each element of the list is a tuple with the format's name, the
1020module's name in `data_modules` or None, and the formats function.
1021"""
1024def available_formats():
1025 """Data and audio file formats supported by any of the installed modules.
1027 Returns
1028 -------
1029 formats: list of str
1030 List of supported file formats as strings.
1031 """
1032 formats = set()
1033 for fmt, lib, formats_func in data_formats_funcs:
1034 if not lib or data_modules[lib]:
1035 formats |= set(formats_func())
1036 return sorted(list(formats))
1039data_encodings_funcs = (
1040 ('relacs', encodings_relacs),
1041 ('fishgrid', encodings_fishgrid),
1042 ('pickle', encodings_pickle),
1043 ('numpy', encodings_numpy),
1044 ('matlab', encodings_mat),
1045 ('audio', encodings_audio)
1046 )
1047""" List of implemented encodings functions.
1049Each element of the list is a tuple with the module's name and the encodings function.
1050"""
1053def available_encodings(format):
1054 """Encodings of a data file format.
1056 Parameters
1057 ----------
1058 format: str
1059 The file format.
1061 Returns
1062 -------
1063 encodings: list of str
1064 List of supported encodings as strings.
1065 """
1066 for module, encodings_func in data_encodings_funcs:
1067 encs = encodings_func(format)
1068 if len(encs) > 0:
1069 return encs
1070 return []
1073data_writer_funcs = {
1074 'relacs': write_relacs,
1075 'fishgrid': write_fishgrid,
1076 'pickle': write_pickle,
1077 'numpy': write_numpy,
1078 'matlab': write_mat,
1079 'audio': write_audioio
1080 }
1081"""Dictionary of implemented write functions.
1083Keys are the format's name and values the corresponding write
1084function.
1085"""
1088def write_data(filepath, data, rate, amax=1.0, unit=None,
1089 metadata=None, locs=None, labels=None, format=None,
1090 encoding=None, verbose=0, **kwargs):
1091 """Write data into a file.
1093 Parameters
1094 ----------
1095 filepath: str
1096 Full path and name of the file to write.
1097 File format is determined from extension.
1098 data: 1-D or 2-D array of floats
1099 Array with the data (first index time, second index channel).
1100 rate: float
1101 Sampling rate of the data in Hertz.
1102 amax: float
1103 Maximum possible amplitude of the data in `unit`.
1104 unit: str
1105 Unit of the data.
1106 metadata: nested dict
1107 Additional metadata.
1108 locs: None or 1-D or 2-D array of ints
1109 Marker positions (first column) and spans (optional second column)
1110 for each marker (rows).
1111 labels: None or 2-D array of string objects
1112 Labels (first column) and texts (optional second column)
1113 for each marker (rows).
1114 format: str or None
1115 File format. If None deduce file format from filepath.
1116 See `available_formats()` for possible values.
1117 encoding: str or None
1118 Encoding of the data. See `available_encodings()` for possible values.
1119 If None or empty string use 'PCM_16'.
1120 verbose: int
1121 If >0 show detailed error/warning messages.
1122 kwargs: dict
1123 Additional, file format specific keyword arguments.
1125 Returns
1126 -------
1127 filepath: str or None
1128 On success, the actual file name used for writing the data.
1130 Raises
1131 ------
1132 ValueError
1133 `filepath` is empty string or unspecified format.
1134 IOError
1135 Requested file format not supported.
1137 Example
1138 -------
1139 ```
1140 import numpy as np
1141 from thunderlab.datawriter import write_data
1143 rate = 28000.0
1144 freq = 800.0
1145 time = np.arange(0.0, 1.0, 1/rate) # one second
1146 data = 2.5*np.sin(2.0*np.p*freq*time) # 800Hz sine wave
1147 md = dict(Artist='underscore_') # metadata
1148 write_data('audio/file.npz', data, rate, 'mV', md)
1149 ```
1150 """
1151 if not filepath:
1152 raise ValueError('no file specified!')
1153 if not format:
1154 format = format_from_extension(filepath)
1155 if not format:
1156 raise ValueError('unspecified file format')
1157 for fmt, lib, formats_func in data_formats_funcs:
1158 if lib and not data_modules[lib]:
1159 continue
1160 if format.upper() in formats_func():
1161 writer_func = data_writer_funcs[fmt]
1162 filepath = writer_func(filepath, data, rate, amax,
1163 unit, metadata, locs, labels,
1164 format=format, encoding=encoding,
1165 **kwargs)
1166 if verbose > 0:
1167 print(f'wrote data to file "{filepath}" using {fmt} format')
1168 if verbose > 1:
1169 print(f' sampling rate: {rate:g}Hz')
1170 print(f' channels : {data.shape[1] if len(data.shape) > 1 else 1}')
1171 print(f' frames : {len(data)}')
1172 print(f' range : {amax:g}{unit}')
1173 return filepath
1174 raise IOError(f'file format "{format.upper()}" not supported.')
1177def demo(file_path, channels=2, format=None):
1178 """Demo of the datawriter functions.
1180 Parameters
1181 ----------
1182 file_path: str
1183 File path of a data file.
1184 format: str or None
1185 File format to be used.
1186 """
1187 print('generate data ...')
1188 rate = 44100.0
1189 t = np.arange(0.0, 1.0, 1.0/rate)
1190 data = np.zeros((len(t), channels))
1191 for c in range(channels):
1192 data[:,c] = 0.1*(channels-c)*np.sin(2.0*np.pi*(440.0+c*8.0)*t)
1194 print(f"write_data('{file_path}') ...")
1195 write_data(file_path, data, rate, 1.0, 'mV', format=format, verbose=2)
1197 print('done.')
1200def main(*cargs):
1201 """Call demo with command line arguments.
1203 Parameters
1204 ----------
1205 cargs: list of str
1206 Command line arguments as provided by sys.argv[1:]
1207 """
1208 import argparse
1209 parser = argparse.ArgumentParser(description=
1210 'Checking thunderlab.datawriter module.')
1211 parser.add_argument('-c', dest='channels', default=2, type=int,
1212 help='number of channels to be written')
1213 parser.add_argument('-f', dest='format', default=None, type=str,
1214 help='file format')
1215 parser.add_argument('file', nargs=1, default='test.npz', type=str,
1216 help='name of data file')
1217 args = parser.parse_args(cargs)
1218 demo(args.file[0], args.channels, args.format)
1221if __name__ == "__main__":
1222 main(*sys.argv[1:])