Coverage for src/thunderlab/dataloader.py: 77%
981 statements
« prev ^ index » next coverage.py v7.6.12, created at 2025-02-16 21:22 +0000
« prev ^ index » next coverage.py v7.6.12, created at 2025-02-16 21:22 +0000
1"""Load time-series data from files.
3```
4data, rate, unit, amax = load_data('data/file.wav')
5```
7The function `data_loader()` loads the whole time-series from the file
8as a numpy array of floats. First dimension is frames, second is
9channels. In contrast to the `audioio.load_audio()` function, the
10values of the data array are not restricted between -1 and 1. They can
11assume any value wihin the range `-amax` to `+amax` with the returned
12`unit`.
14```
15data = DataLoader('data/file.wav', 60.0)
16```
17or
18```
19with DataLoader('data/file.wav', 60.0) as data:
20```
21Create an `DataLoader` object that loads chuncks of 60 seconds long data
22on demand. `data` can be used like a read-only numpy array of floats.
25## Supported file formats
27- python pickle files
28- numpy .npz files
29- matlab .mat files
30- audio files via [`audioio`](https://github.com/bendalab/audioio) package
31- LabView .scandat files
32- relacs trace*.raw files (https://www.relacs.net)
33- fishgrid traces-*.raw files (https://github.com/bendalab/fishgrid)
36## Metadata
38Many file formats allow to store metadata that further describe the
39stored time series data. We handle them as nested dictionary of key-value
40pairs. Load them with the `metadata()` function:
41```
42metadata = metadata('data/file.mat')
43```
45## Markers
47Some file formats also allow to store markers that mark specific
48positions in the time series data. Load marker positions and spans (in
49the 2-D array `locs`) and label and text strings (in the 2-D array
50`labels`) with the `markers()` function:
51```
52locs, labels = markers('data.wav')
53```
55## Aditional, format specific functions
57- `extract_container_metadata()`: extract metadata from dictionary loaded from a container file.
58- `relacs_samplerate_unit()`: retrieve sampling rate and unit from a relacs stimuli.dat file.
59- `relacs_header()`: read key-value pairs from relacs *.dat file headers.
60- `fishgrid_grids()`: retrieve grid sizes from a fishgrid.cfg file.
61- `fishgrid_spacings()`: spacing between grid electrodes.
63"""
65import os
66import sys
67import glob
68import gzip
69import numpy as np
70try:
71 import matplotlib.pyplot as plt
72except ImportError:
73 pass
74from datetime import timedelta
75from audioio import load_audio, AudioLoader, unflatten_metadata
76from audioio import get_number_unit, get_number, get_int, get_bool, get_gain
77from audioio import default_starttime_keys, default_gain_keys
78from audioio import get_datetime, flatten_metadata, add_metadata, set_starttime
79from audioio import metadata as metadata_audioio
80from audioio import markers as markers_audioio
83def relacs_samplerate_unit(filepath, channel=0):
84 """Retrieve sampling rate and unit from a relacs stimuli.dat file.
86 Parameters
87 ----------
88 filepath: str
89 Path to a relacs data directory, or a file in a relacs data directory.
90 channel: int
91 Channel (trace) number, if `filepath` does not specify a
92 trace-*.raw file.
94 Returns
95 -------
96 samplerate: float
97 Sampling rate in Hertz
98 unit: str
99 Unit of the trace, can be empty if not found
101 Raises
102 ------
103 IOError/FileNotFoundError:
104 If the stimuli.dat file does not exist.
105 ValueError:
106 stimuli.dat file does not contain sampling rate.
107 """
108 trace = channel + 1
109 relacs_dir = filepath
110 # check for relacs data directory:
111 if not os.path.isdir(filepath):
112 relacs_dir = os.path.dirname(filepath)
113 bn = os.path.basename(filepath).lower()
114 i = bn.find('.raw')
115 if len(bn) > 5 and bn[0:5] == 'trace' and i > 6:
116 trace = int(bn[6:i])
118 # retreive sampling rate and unit from stimuli.dat file:
119 samplerate = None
120 sampleinterval = None
121 unit = ""
123 lines = []
124 stimuli_file = os.path.join(relacs_dir, 'stimuli.dat')
125 if os.path.isfile(stimuli_file + '.gz'):
126 stimuli_file += '.gz'
127 if stimuli_file[-3:] == '.gz':
128 with gzip.open(stimuli_file, 'r', encoding='latin-1') as sf:
129 for line in sf:
130 line = line.strip()
131 if len(line) == 0 or line[0] != '#':
132 break
133 lines.append(line)
134 else:
135 with open(stimuli_file, 'r', encoding='latin-1') as sf:
136 for line in sf:
137 line = line.strip()
138 if len(line) == 0 or line[0] != '#':
139 break
140 lines.append(line)
142 for line in lines:
143 if "unit%d" % trace in line:
144 unit = line.split(':')[1].strip()
145 if "sampling rate%d" % trace in line:
146 value = line.split(':')[1].strip()
147 samplerate = float(value.replace('Hz',''))
148 elif "sample interval%d" % trace in line:
149 value = line.split(':')[1].strip()
150 sampleinterval = float(value.replace('ms',''))
152 if samplerate is not None:
153 return samplerate, unit
154 if sampleinterval is not None:
155 return 1000/sampleinterval, unit
156 raise ValueError(f'could not retrieve sampling rate from {stimuli_file}')
159def relacs_header(filepath, store_empty=False, first_only=False,
160 lower_keys=False, flat=False,
161 add_sections=False):
162 """Read key-value pairs from a relacs *.dat file header.
164 Parameters
165 ----------
166 filepath: str
167 A relacs *.dat file, can be also a zipped .gz file.
168 store_empty: bool
169 If `False` do not add meta data with empty values.
170 first_only: bool
171 If `False` only store the first element of a list.
172 lower_keys: bool
173 Make all keys lower case.
174 flat: bool
175 Do not make a nested dictionary.
176 Use this option also to read in very old relacs metadata with
177 ragged left alignment.
178 add_sections: bool
179 If `True`, prepend keys with sections names separated by
180 '.' to make them unique.
182 Returns
183 -------
184 data: dict
185 Nested dictionary with key-value pairs of the file header.
187 Raises
188 ------
189 IOError/FileNotFoundError:
190 If `filepath` cannot be opened.
191 """
192 # read in header from file:
193 lines = []
194 if os.path.isfile(filepath + '.gz'):
195 filepath += '.gz'
196 if filepath[-3:] == '.gz':
197 with gzip.open(filepath, 'r', encoding='latin-1') as sf:
198 for line in sf:
199 line = line.strip()
200 if len(line) == 0 or line[0] != '#':
201 break
202 lines.append(line)
203 else:
204 with open(filepath, 'r', encoding='latin-1') as sf:
205 for line in sf:
206 line = line.strip()
207 if len(line) == 0 or line[0] != '#':
208 break
209 lines.append(line)
210 # parse:
211 data = {}
212 cdatas = [data]
213 sections = ['']
214 ident_offs = None
215 ident = None
216 for line in lines:
217 words = line.split(':')
218 value = ':'.join(words[1:]).strip() if len(words) > 1 else ''
219 if len(words) >= 1:
220 key = words[0].strip('#')
221 # get section level:
222 level = 0
223 if not flat or len(value) == 0:
224 nident = len(key) - len(key.lstrip())
225 if ident_offs is None:
226 ident_offs = nident
227 elif ident is None:
228 if nident > ident_offs:
229 ident = nident - ident_offs
230 level = 1
231 else:
232 level = (nident - ident_offs)//ident
233 # close sections:
234 if not flat:
235 while len(cdatas) > level + 1:
236 cdatas[-1][sections.pop()] = cdatas.pop()
237 else:
238 while len(sections) > level + 1:
239 sections.pop()
240 # key:
241 key = key.strip().strip('"')
242 if lower_keys:
243 key = key.lower()
244 skey = key
245 if add_sections:
246 key = '.'.join(sections[1:] + [key])
247 if len(value) == 0:
248 # new sub-section:
249 if flat:
250 if store_empty:
251 cdatas[-1][key] = None
252 else:
253 cdatas.append({})
254 sections.append(skey)
255 else:
256 # key-value pair:
257 value = value.strip('"')
258 if len(value) > 0 or value != '-' or store_empty:
259 if len(value) > 0 and value[0] == '[' and value[-1] == ']':
260 value = [v.strip() for v in value.lstrip('[').rstrip(']').split(',')]
261 if first_only:
262 value = value[0]
263 cdatas[-1][key] = value
264 while len(cdatas) > 1:
265 cdatas[-1][sections.pop()] = cdatas.pop()
266 return data
269def check_relacs(filepath):
270 """Check for valid relacs file.
272 Parameters
273 ----------
274 filepath: str
275 Path to a relacs data directory, or a file in a relacs data directory.
277 Returns
278 -------
279 is_relacs: boolean
280 `True` if `filepath` is a valid relacs directory or is a file therein.
281 """
282 # relacs data directory:
283 relacs_dir = filepath
284 if not os.path.isdir(filepath):
285 relacs_dir = os.path.dirname(filepath)
286 # check for a valid relacs data directory:
287 has_stimuli = False
288 has_trace = False
289 for fname in ['stimuli.dat', 'stimuli.dat.gz']:
290 if os.path.isfile(os.path.join(relacs_dir, fname)):
291 has_stimuli = True
292 for fname in ['trace-1.raw', 'trace-1.raw.gz']:
293 if os.path.isfile(os.path.join(relacs_dir, fname)):
294 has_trace = True
295 return has_stimuli and has_trace
298def relacs_trace_files(filepath):
299 """Expand file path for relacs data to appropriate trace*.raw file names.
301 Parameters
302 ----------
303 filepath: str
304 Path to a relacs data directory, or a file in a relacs data directory.
306 Returns
307 -------
308 trace_filepaths: list of str
309 List of relacs trace*.raw files.
310 """
311 relacs_dir = filepath
312 if not os.path.isdir(filepath):
313 relacs_dir = os.path.dirname(filepath)
314 trace_filepaths = []
315 for k in range(10000):
316 fname = os.path.join(relacs_dir, f'trace-{k+1}.raw')
317 if os.path.isfile(fname):
318 trace_filepaths.append(fname)
319 elif os.path.isfile(fname + '.gz'):
320 trace_filepaths.append(fname + '.gz')
321 else:
322 break
323 return trace_filepaths
326def load_relacs(filepath, amax=1.0):
327 """Load traces that have been recorded with relacs (https://github.com/relacs/relacs).
329 Parameters
330 ----------
331 filepath: str
332 Path to a relacs data directory, or a file in a relacs data directory.
333 amax: float
334 The amplitude range of the data.
336 Returns
337 -------
338 data: 2-D array
339 All data traces as an 2-D numpy array, even for single channel data.
340 First dimension is time, second is channel.
341 rate: float
342 Sampling rate of the data in Hz
343 unit: str
344 Unit of the data
345 amax: float
346 Maximum amplitude of data range.
348 Raises
349 ------
350 ValueError:
351 - Invalid name for relacs trace-*.raw file.
352 - Sampling rates of traces differ.
353 - Unit of traces differ.
354 """
355 trace_filepaths = relacs_trace_files(filepath)
356 # load trace*.raw files:
357 nchannels = len(trace_filepaths)
358 data = None
359 nrows = 0
360 rate = None
361 unit = ''
362 for c, path in enumerate(sorted(trace_filepaths)):
363 if path[-3:] == '.gz':
364 with gzip.open(path, 'rb') as sf:
365 x = np.frombuffer(sf.read(), dtype=np.float32)
366 else:
367 x = np.fromfile(path, np.float32)
368 if data is None:
369 nrows = len(x)
370 data = np.zeros((nrows, nchannels))
371 n = min(len(x), nrows)
372 data[:n,c] = x[:n]
373 # retrieve sampling rate and unit:
374 crate, us = relacs_samplerate_unit(path, c)
375 if rate is None:
376 rate = crate
377 elif crate != rate:
378 raise ValueError('sampling rates of traces differ')
379 if len(unit) == 0:
380 unit = us
381 elif us != unit:
382 raise ValueError('unit of traces differ')
383 return data, rate, unit, amax
386def metadata_relacs(filepath, store_empty=False, first_only=False,
387 lower_keys=False, flat=False, add_sections=False):
388 """ Read meta-data of a relacs data set.
390 Parameters
391 ----------
392 filepath: str
393 A relacs data directory or a file therein.
394 store_empty: bool
395 If `False` do not add meta data with empty values.
396 first_only: bool
397 If `False` only store the first element of a list.
398 lower_keys: bool
399 Make all keys lower case.
400 flat: bool
401 Do not make a nested dictionary.
402 Use this option also to read in very old relacs metadata with
403 ragged left alignment.
404 add_sections: bool
405 If `True`, prepend keys with sections names separated by
406 '.' to make them unique.
408 Returns
409 -------
410 data: nested dict
411 Nested dictionary with key-value pairs of the meta data.
412 """
413 relacs_dir = filepath
414 if not os.path.isdir(filepath):
415 relacs_dir = os.path.dirname(filepath)
416 info_path = os.path.join(relacs_dir, 'info.dat')
417 if not os.path.exists(info_path):
418 return dict(), []
419 data = relacs_header(info_path, store_empty, first_only,
420 lower_keys, flat, add_sections)
421 return data
424def fishgrid_spacings(metadata, unit='m'):
425 """Spacing between grid electrodes.
427 Parameters
428 ----------
429 metadata: dict
430 Fishgrid metadata obtained from `metadata_fishgrid()`.
431 unit: str
432 Unit in which to return the spacings.
434 Returns
435 -------
436 grid_dist: list of tuple of float
437 For each grid the distances between rows and columns in `unit`.
438 """
439 grids_dist = []
440 for k in range(4):
441 row_dist = get_number(metadata, unit, f'RowDistance{k+1}', default=0)
442 col_dist = get_number(metadata, unit, f'ColumnDistance{k+1}', default=0)
443 rows = get_int(metadata, f'Rows{k+1}', default=0)
444 cols = get_int(metadata, f'Columns{k+1}', default=0)
445 if get_bool(metadata, f'Used{k+1}', default=False) or \
446 cols > 0 and rows > 0:
447 grids_dist.append((row_dist, col_dist))
448 return grids_dist
451def fishgrid_grids(metadata):
452 """Retrieve grid sizes from a fishgrid.cfg file.
454 Parameters
455 ----------
456 metadata: dict
457 Fishgrid metadata obtained from `metadata_fishgrid()`.
459 Returns
460 -------
461 grids: list of tuple of int
462 For each grid the number of rows and columns.
463 """
464 grids = []
465 for k in range(4):
466 rows = get_int(metadata, f'Rows{k+1}', default=0)
467 cols = get_int(metadata, f'Columns{k+1}', default=0)
468 if get_bool(metadata, f'Used{k+1}', default=False) or \
469 cols > 0 and rows > 0:
470 grids.append((rows, cols))
471 return grids
474def check_fishgrid(filepath):
475 """Check for valid fishgrid file (https://github.com/bendalab/fishgrid).
477 Parameters
478 ----------
479 filepath: str
480 Path to a fishgrid data directory or a file in a fishgrid
481 data directory.
483 Returns
484 -------
485 is_fishgrid: bool
486 `True` if `filepath` is a valid fishgrid data directory or
487 a file therein.
488 """
489 # fishgrid data directory:
490 fishgrid_dir = filepath
491 if not os.path.isdir(filepath):
492 fishgrid_dir = os.path.dirname(filepath)
493 # check for a valid fishgrid data directory:
494 return (os.path.isfile(os.path.join(fishgrid_dir, 'fishgrid.cfg')) and
495 (os.path.isfile(os.path.join(fishgrid_dir, 'traces-grid1.raw')) or
496 os.path.isfile(os.path.join(fishgrid_dir, 'traces.raw'))))
499def fishgrid_trace_files(filepath):
500 """Expand file paths for fishgrid data to appropriate traces*.raw file names.
502 Parameters
503 ----------
504 filepath: str
505 Path to a fishgrid data directory, or a file therein.
507 Returns
508 -------
509 trace_filepaths: list of str
510 List of fishgrid traces*.raw files.
511 """
512 # find grids:
513 fishgrid_dir = filepath
514 if not os.path.isdir(fishgrid_dir):
515 fishgrid_dir = os.path.dirname(filepath)
516 trace_filepaths = []
517 for k in range(10000):
518 file = os.path.join(fishgrid_dir, f'traces-grid{k+1}.raw')
519 if os.path.isfile(file):
520 trace_filepaths.append(file)
521 else:
522 break
523 if len(trace_filepaths) == 0:
524 file = os.path.join(fishgrid_dir, f'traces.raw')
525 if os.path.isfile(file):
526 trace_filepaths.append(file)
527 return trace_filepaths
530def load_fishgrid(filepath):
531 """Load traces that have been recorded with fishgrid (https://github.com/bendalab/fishgrid).
533 Parameters
534 ----------
535 filepath: str
536 Path to a fishgrid data directory, or a file therein.
538 Returns
539 -------
540 data: 2-D array
541 All data traces as an 2-D numpy array, even for single channel data.
542 First dimension is time, second is channel.
543 rate: float
544 Sampling rate of the data in Hz.
545 unit: str
546 Unit of the data.
547 amax: float
548 Maximum amplitude of data range.
550 Raises
551 ------
552 FileNotFoundError:
553 Invalid or not existing fishgrid files.
554 """
555 trace_filepaths = fishgrid_trace_files(filepath)
556 if len(trace_filepaths) == 0:
557 raise FileNotFoundError(f'no fishgrid files specified')
558 md = metadata_fishgrid(filepath)
559 grids = fishgrid_grids(md)
560 grid_sizes = [r*c for r, c in grids]
562 # load traces-grid*.raw files:
563 grid_channels = []
564 nchannels = 0
565 for g, path in enumerate(trace_filepaths):
566 grid_channels.append(grid_sizes[g])
567 nchannels += grid_sizes[g]
568 data = None
569 nrows = 0
570 c = 0
571 rate = get_number(md, 'Hz', 'AISampleRate')
572 for path, channels in zip(trace_filepaths, grid_channels):
573 x = np.fromfile(path, np.float32).reshape((-1, channels))
574 if data is None:
575 nrows = len(x)
576 data = np.zeros((nrows, nchannels))
577 n = min(len(x), nrows)
578 data[:n,c:c+channels] = x[:n,:]
579 c += channels
580 amax, unit = get_number_unit(md, 'AIMaxVolt')
581 return data, rate, unit, amax
584# add fishgrid keys:
585default_starttime_keys.append(['StartDate', 'StartTime'])
586default_gain_keys.insert(0, 'AIMaxVolt')
589def metadata_fishgrid(filepath):
590 """ Read meta-data of a fishgrid data set.
592 Parameters
593 ----------
594 filepath: str
595 A fishgrid data directory or a file therein.
597 Returns
598 -------
599 data: nested dict
600 Nested dictionary with key-value pairs of the meta data.
601 """
602 fishgrid_dir = filepath
603 if not os.path.isdir(fishgrid_dir):
604 fishgrid_dir = os.path.dirname(filepath)
605 path = os.path.join(fishgrid_dir, 'fishgrid.cfg')
606 # read in header from file:
607 lines = []
608 if os.path.isfile(path + '.gz'):
609 path += '.gz'
610 if not os.path.exists(path):
611 return {}
612 if path[-3:] == '.gz':
613 with gzip.open(path, 'r', encoding='latin-1') as sf:
614 for line in sf:
615 lines.append(line)
616 else:
617 with open(path, 'r', encoding='latin-1') as sf:
618 for line in sf:
619 lines.append(line)
620 # parse:
621 data = {}
622 cdatas = [data]
623 ident_offs = None
624 ident = None
625 old_style = False
626 grid_n = False
627 for line in lines:
628 if len(line.strip()) == 0:
629 continue
630 if line[0] == '*':
631 key = line[1:].strip()
632 data[key] = {}
633 cdatas = [data, data[key]]
634 elif '----' in line:
635 old_style = True
636 key = line.strip().strip(' -').replace('&', '')
637 if key.upper() == 'SETUP':
638 key = 'Grid 1'
639 grid_n = False
640 if key[:4].lower() == 'grid':
641 grid_n = key[5]
642 cdatas = cdatas[:2]
643 cdatas[1][key] = {}
644 cdatas.append(cdatas[1][key])
645 else:
646 words = line.split(':')
647 key = words[0].strip().strip('"')
648 value = None
649 if len(words) > 1 and (len(words[1].strip()) > 0 or old_style):
650 value = ':'.join(words[1:]).strip().strip('"')
651 if old_style:
652 if value is None:
653 cdatas = cdatas[:3]
654 cdatas[2][key] = {}
655 cdatas.append(cdatas[2][key])
656 else:
657 if grid_n and key[-1] != grid_n:
658 key = key + grid_n
659 cdatas[-1][key] = value
660 else:
661 # get section level:
662 level = 0
663 nident = len(line) - len(line.lstrip())
664 if ident_offs is None:
665 ident_offs = nident
666 elif ident is None:
667 if nident > ident_offs:
668 ident = nident - ident_offs
669 level = 1
670 else:
671 level = (nident - ident_offs)//ident
672 # close sections:
673 cdatas = cdatas[:2 + level]
674 if value is None:
675 # new section:
676 cdatas[-1][key] = {}
677 cdatas.append(cdatas[-1][key])
678 else:
679 # key-value pair:
680 cdatas[-1][key] = value.replace(r'\n', '\n')
681 # remove unused grids:
682 fgm = data.get('FishGrid', {})
683 for i in range(4):
684 gs = f'Grid {i+1}'
685 if gs in fgm:
686 gm = fgm[gs]
687 us = f'Used{i+1}'
688 if us in gm and gm[us].upper() == 'FALSE':
689 del fgm[gs]
690 return data
693def markers_fishgrid(filepath):
694 """ Read markers of a fishgrid data set.
696 Parameters
697 ----------
698 filepath: str
699 A fishgrid data directory or a file therein.
701 Returns
702 -------
703 locs: 2-D array of ints
704 Marker positions (first column) and spans (second column)
705 for each marker (rows).
706 labels: 2-D array of string objects
707 Labels (first column) and texts (second column)
708 for each marker (rows).
709 """
710 def add_marker():
711 if 'index1' in marker:
712 index1 = int(marker['index1'])//nchannels
713 else:
714 index1 = int(marker['index'])//nchannels
715 span1 = int(marker.get('span1', 0))//nchannels
716 locs.append([index1, span1])
717 ls = marker.get('label', 'M')
718 cs = marker.get('comment', '')
719 labels.append([ls, cs])
721 fishgrid_dir = filepath
722 if not os.path.isdir(fishgrid_dir):
723 fishgrid_dir = os.path.dirname(filepath)
724 path = os.path.join(fishgrid_dir, 'timestamps.dat')
725 if not os.path.isfile(path):
726 return np.zeros((0, 2), dtype=int), np.zeros((0, 2), dtype=object)
727 # get number of channels:
728 md = metadata_fishgrid(path.replace('timestamps.dat', 'fishgrid.cfg'))
729 grids = fishgrid_grids(md)
730 nchannels = np.prod(grids[0])
731 # read timestamps:
732 locs = []
733 labels = []
734 marker = {}
735 with open(path, 'r') as sf:
736 for line in sf:
737 if len(line.strip()) == 0:
738 add_marker()
739 marker = {}
740 else:
741 words = line.split(':')
742 if len(words) > 1:
743 v = words[1].strip()
744 v = v.strip('"')
745 marker[words[0].strip().lower()] = v
746 if len(marker) > 0:
747 add_marker()
748 if len(locs) > 2:
749 return np.array(locs[1:-1]), np.array(labels[1:-1])
750 else:
751 return np.zeros((0, 2), dtype=int), np.zeros((0, 2), dtype=object)
754def check_container(filepath):
755 """Check if file is a generic container file.
757 Supported file formats are:
759 - python pickle files (.pkl)
760 - numpy files (.npz)
761 - matlab files (.mat)
763 Parameters
764 ----------
765 filepath: str
766 Path of the file to check.
768 Returns
769 -------
770 is_container: bool
771 `True`, if `filepath` is a supported container format.
772 """
773 ext = os.path.splitext(filepath)[1]
774 return ext.lower() in ('.pkl', '.npz', '.mat')
777def extract_container_data(data_dict, datakey=None,
778 samplekey=['rate', 'Fs', 'fs'],
779 timekey=['time'], amplkey=['amax'], unitkey='unit',
780 amax=1.0, unit='a.u.'):
781 """Extract data from dictionary loaded from a container file.
783 Parameters
784 ----------
785 data_dict: dict
786 Dictionary of the data items contained in the container.
787 datakey: None, str, or list of str
788 Name of the variable holding the data. If `None` take the
789 variable that is an 2D array and has the largest number of
790 elements.
791 samplekey: str or list of str
792 Name of the variable holding the sampling rate.
793 timekey: str or list of str
794 Name of the variable holding sampling times.
795 If no sampling rate is available, the sampling rate is retrieved
796 from the sampling times.
797 amplkey: str or list of str
798 Name of the variable holding the amplitude range of the data.
799 unitkey: str
800 Name of the variable holding the unit of the data.
801 amax: None or float
802 If specified and no amplitude range has been found in `data_dict`,
803 then this is the amplitude range of the data.
804 unit: None or str
805 If specified and no unit has been found in `data_dict`,
806 then return this as the unit of the data.
808 Returns
809 -------
810 data: 2-D array of floats
811 All data traces as an 2-D numpy array, even for single channel data.
812 First dimension is time, second is channel.
813 rate: float
814 Sampling rate of the data in Hz.
815 unit: str
816 Unit of the data.
817 amax: float
818 Maximum amplitude of data range in `unit`.
820 Raises
821 ------
822 ValueError:
823 Invalid key requested.
824 """
825 # extract format data:
826 if not isinstance(samplekey, (list, tuple, np.ndarray)):
827 samplekey = (samplekey,)
828 if not isinstance(timekey, (list, tuple, np.ndarray)):
829 timekey = (timekey,)
830 if not isinstance(amplkey, (list, tuple, np.ndarray)):
831 amplkey = (amplkey,)
832 rate = 0.0
833 for skey in samplekey:
834 if skey in data_dict:
835 rate = float(data_dict[skey])
836 break
837 if rate == 0.0:
838 for tkey in timekey:
839 if tkey in data_dict:
840 rate = 1.0/(data_dict[tkey][1] - data_dict[tkey][0])
841 break
842 if rate == 0.0:
843 raise ValueError(f"invalid keys {', '.join(samplekey)} and {', '.join(timekey)} for requesting sampling rate or sampling times")
844 for akey in amplkey:
845 if akey in data_dict:
846 amax = float(data_dict[akey])
847 break
848 if unitkey in data_dict:
849 unit = data_dict[unitkey]
850 # get data array:
851 raw_data = np.array([])
852 if datakey:
853 # try data keys:
854 if not isinstance(datakey, (list, tuple, np.ndarray)):
855 datakey = (datakey,)
856 for dkey in datakey:
857 if dkey in data_dict:
858 raw_data = data_dict[dkey]
859 break
860 if len(raw_data) == 0:
861 raise ValueError(f"invalid key(s) {', '.join(datakey)} for requesting data")
862 else:
863 # find largest 2D array:
864 for d in data_dict:
865 if hasattr(data_dict[d], 'shape'):
866 if 1 <= len(data_dict[d].shape) <= 2 and \
867 np.max(data_dict[d].shape) > np.max(raw_data.shape):
868 raw_data = data_dict[d]
869 if len(raw_data) == 0:
870 raise ValueError('no data found')
871 # make 2D:
872 if len(raw_data.shape) == 1:
873 raw_data = raw_data.reshape(-1, 1)
874 # transpose if necessary:
875 if np.argmax(raw_data.shape) > 0:
876 raw_data = raw_data.T
877 # recode:
878 if raw_data.dtype == np.dtype('int16'):
879 data = raw_data.astype('float32')
880 data *= amax/2**15
881 elif raw_data.dtype == np.dtype('int32'):
882 data = raw_data.astype(float)
883 data *= amax/2**31
884 elif raw_data.dtype == np.dtype('int64'):
885 data = raw_data.astype(float)
886 data *= amax/2**63
887 else:
888 data = raw_data
889 return data, rate, unit, amax
892def load_container(filepath, datakey=None,
893 samplekey=['rate', 'Fs', 'fs'],
894 timekey=['time'], amplkey=['amax'], unitkey='unit',
895 amax=1.0, unit='a.u.'):
896 """Load data from a generic container file.
898 Supported file formats are:
900 - python pickle files (.pkl)
901 - numpy files (.npz)
902 - matlab files (.mat)
904 Parameters
905 ----------
906 filepath: str
907 Path of the file to load.
908 datakey: None, str, or list of str
909 Name of the variable holding the data. If `None` take the
910 variable that is an 2D array and has the largest number of
911 elements.
912 samplekey: str or list of str
913 Name of the variable holding the sampling rate.
914 timekey: str or list of str
915 Name of the variable holding sampling times.
916 If no sampling rate is available, the sampling rate is retrieved
917 from the sampling times.
918 amplkey: str
919 Name of the variable holding the amplitude range of the data.
920 unitkey: str
921 Name of the variable holding the unit of the data.
922 If `unitkey` is not a valid key, then return `unitkey` as the `unit`.
923 amax: None or float
924 If specified and no amplitude range has been found in the data
925 container, then this is the amplitude range of the data.
926 unit: None or str
927 If specified and no unit has been found in the data container,
928 then return this as the unit of the data.
930 Returns
931 -------
932 data: 2-D array of floats
933 All data traces as an 2-D numpy array, even for single channel data.
934 First dimension is time, second is channel.
935 rate: float
936 Sampling rate of the data in Hz.
937 unit: str
938 Unit of the data.
939 amax: float
940 Maximum amplitude of data range.
942 Raises
943 ------
944 ValueError:
945 Invalid key requested.
946 """
947 # load data:
948 data_dict = {}
949 ext = os.path.splitext(filepath)[1]
950 if ext == '.pkl':
951 import pickle
952 with open(filepath, 'rb') as f:
953 data_dict = pickle.load(f)
954 elif ext == '.npz':
955 data_dict = np.load(filepath)
956 elif ext == '.mat':
957 from scipy.io import loadmat
958 data_dict = loadmat(filepath, squeeze_me=True)
959 return extract_container_data(data_dict, datakey, samplekey,
960 timekey, amplkey, unitkey, amax, unit)
963def extract_container_metadata(data_dict, metadatakey=['metadata', 'info']):
964 """ Extract metadata from dictionary loaded from a container file.
966 Parameters
967 ----------
968 data_dict: dict
969 Dictionary of the data items contained in the container.
970 metadatakey: str or list of str
971 Name of the variable holding the metadata.
973 Returns
974 -------
975 metadata: nested dict
976 Nested dictionary with key-value pairs of the meta data.
977 """
978 if not isinstance(metadatakey, (list, tuple, np.ndarray)):
979 metadatakey = (metadatakey,)
980 # get single metadata dictionary:
981 for mkey in metadatakey:
982 if mkey in data_dict:
983 return data_dict[mkey]
984 # collect all keys starting with metadatakey:
985 metadata = {}
986 for mkey in metadatakey:
987 mkey += '__'
988 for dkey in data_dict:
989 if dkey[:len(mkey)] == mkey:
990 v = data_dict[dkey]
991 if hasattr(v, 'size') and v.ndim == 0:
992 v = v.item()
993 metadata[dkey[len(mkey):]] = v
994 if len(metadata) > 0:
995 return unflatten_metadata(metadata, sep='__')
996 return metadata
999def metadata_container(filepath, metadatakey=['metadata', 'info']):
1000 """ Read meta-data of a container file.
1002 Parameters
1003 ----------
1004 filepath: str
1005 A container file.
1006 metadatakey: str or list of str
1007 Name of the variable holding the metadata.
1009 Returns
1010 -------
1011 metadata: nested dict
1012 Nested dictionary with key-value pairs of the meta data.
1013 """
1014 data_dict = {}
1015 ext = os.path.splitext(filepath)[1]
1016 if ext == '.pkl':
1017 import pickle
1018 with open(filepath, 'rb') as f:
1019 data_dict = pickle.load(f)
1020 elif ext == '.npz':
1021 data_dict = np.load(filepath)
1022 elif ext == '.mat':
1023 from scipy.io import loadmat
1024 data_dict = loadmat(filepath, squeeze_me=True)
1025 return extract_container_metadata(data_dict, metadatakey)
1028def extract_container_markers(data_dict, poskey=['positions'],
1029 spanskey=['spans'], labelskey=['labels'],
1030 descrkey=['descriptions']):
1031 """ Extract markers from dictionary loaded from a container file.
1033 Parameters
1034 ----------
1035 data_dict: dict
1036 Dictionary of the data items contained in the container.
1037 poskey: str or list of str
1038 Name of the variable holding positions of markers.
1039 spanskey: str or list of str
1040 Name of the variable holding spans of markers.
1041 labelskey: str or list of str
1042 Name of the variable holding labels of markers.
1043 descrkey: str or list of str
1044 Name of the variable holding descriptions of markers.
1046 Returns
1047 -------
1048 locs: 2-D array of ints
1049 Marker positions (first column) and spans (second column)
1050 for each marker (rows).
1051 labels: 2-D array of string objects
1052 Labels (first column) and texts (second column)
1053 for each marker (rows).
1054 """
1055 if not isinstance(poskey, (list, tuple, np.ndarray)):
1056 poskey = (poskey,)
1057 if not isinstance(spanskey, (list, tuple, np.ndarray)):
1058 spanskey = (spanskey,)
1059 if not isinstance(labelskey, (list, tuple, np.ndarray)):
1060 labelskey = (labelskey,)
1061 if not isinstance(descrkey, (list, tuple, np.ndarray)):
1062 descrkey = (descrkey,)
1063 locs = np.zeros((0, 2), dtype=int)
1064 for pkey in poskey:
1065 if pkey in data_dict:
1066 locs = np.zeros((len(data_dict[pkey]), 2), dtype=int)
1067 locs[:,0] = data_dict[pkey]
1068 break
1069 for skey in spanskey:
1070 if skey in data_dict:
1071 locs[:,1] = data_dict[skey]
1072 break
1073 labels = np.zeros((0, 2), dtype=object)
1074 for lkey in labelskey:
1075 if lkey in data_dict:
1076 labels = np.zeros((len(data_dict[lkey]), 2), dtype=object)
1077 labels[:,0] = data_dict[lkey]
1078 break
1079 for dkey in descrkey:
1080 if dkey in data_dict:
1081 labels[:,1] = data_dict[dkey]
1082 break
1083 return locs, labels
1086def markers_container(filepath, poskey=['positions'],
1087 spanskey=['spans'], labelskey=['labels'],
1088 descrkey=['descriptions']):
1089 """ Read markers of a container file.
1091 Parameters
1092 ----------
1093 filepath: str
1094 A container file.
1095 poskey: str or list of str
1096 Name of the variable holding positions of markers.
1097 spanskey: str or list of str
1098 Name of the variable holding spans of markers.
1099 labelskey: str or list of str
1100 Name of the variable holding labels of markers.
1101 descrkey: str or list of str
1102 Name of the variable holding descriptions of markers.
1104 Returns
1105 -------
1106 locs: 2-D array of ints
1107 Marker positions (first column) and spans (second column)
1108 for each marker (rows).
1109 labels: 2-D array of string objects
1110 Labels (first column) and texts (second column)
1111 for each marker (rows).
1112 """
1113 data_dict = {}
1114 ext = os.path.splitext(filepath)[1]
1115 if ext == '.pkl':
1116 import pickle
1117 with open(filepath, 'rb') as f:
1118 data_dict = pickle.load(f)
1119 elif ext == '.npz':
1120 data_dict = np.load(filepath)
1121 elif ext == '.mat':
1122 from scipy.io import loadmat
1123 data_dict = loadmat(filepath, squeeze_me=True)
1124 return extract_container_markers(data_dict, poskey, spanskey,
1125 labelskey, descrkey)
1128def check_raw(filepath):
1129 """Check if file is a raw file.
1131 The following extensions are interpreted as raw files:
1133 - raw files (*.raw)
1134 - LabView scandata (*.scandat)
1136 Parameters
1137 ----------
1138 filepath: str
1139 Path of the file to check.
1141 Returns
1142 -------
1143 is_raw: bool
1144 `True`, if `filepath` is a raw format.
1145 """
1146 ext = os.path.splitext(filepath)[1]
1147 return ext.lower() in ('.raw', '.scandat', '.mat')
1150def load_raw(filepath, rate=44000, channels=1, dtype=np.float32,
1151 amax=1.0, unit='a.u.'):
1152 """Load data from a raw file.
1154 Raw files just contain the data and absolutely no metadata, not
1155 even the smapling rate, number of channels, etc.
1156 Supported file formats are:
1158 - raw files (*.raw)
1159 - LabView scandata (*.scandat)
1161 Parameters
1162 ----------
1163 filepath: str
1164 Path of the file to load.
1165 rate: float
1166 Sampling rate of the data in Hertz.
1167 channels: int
1168 Number of channels multiplexed in the data.
1169 dtype: str or numpy.dtype
1170 The data type stored in the file.
1171 amax: float
1172 The amplitude range of the data.
1173 unit: str
1174 The unit of the data.
1176 Returns
1177 -------
1178 data: 2-D array of floats
1179 All data traces as an 2-D numpy array, even for single channel data.
1180 First dimension is time, second is channel.
1181 rate: float
1182 Sampling rate of the data in Hz.
1183 unit: str
1184 Unit of the data.
1185 amax: float
1186 Maximum amplitude of data range.
1188 """
1189 raw_data = np.fromfile(filepath, dtype=dtype).reshape(-1, channels)
1190 # recode:
1191 if dtype == np.dtype('int16'):
1192 data = raw_data.astype('float32')
1193 data *= amax/2**15
1194 elif dtype == np.dtype('int32'):
1195 data = raw_data.astype(float)
1196 data *= amax/2**31
1197 elif dtype == np.dtype('int64'):
1198 data = raw_data.astype(float)
1199 data *= amax/2**63
1200 else:
1201 data = raw_data
1202 return data, rate, unit, amax
1205def load_audioio(filepath, verbose=0, gainkey=default_gain_keys, sep='.',
1206 amax=1.0, unit='a.u.'):
1207 """Load data from an audio file.
1209 See the
1210 [`load_audio()`](https://bendalab.github.io/audioio/api/audioloader.html#audioio.audioloader.load_audio)
1211 function of the [`audioio`](https://github.com/bendalab/audioio)
1212 package for more infos.
1214 Parameters
1215 ----------
1216 filepath: str
1217 Path of the file to load.
1218 verbose: int
1219 If > 0 show detailed error/warning messages.
1220 gainkey: str or list of str
1221 Key in the file's metadata that holds some gain information.
1222 If found, the data will be multiplied with the gain,
1223 and if available, the corresponding unit is returned.
1224 See the [audioio.get_gain()](https://bendalab.github.io/audioio/api/audiometadata.html#audioio.audiometadata.get_gain) function for details.
1225 sep: str
1226 String that separates section names in `gainkey`.
1227 amax: float
1228 If specified and no gain has been found in the metadata,
1229 then use this as the amplitude range.
1230 unit: str
1231 If specified and no gain has been found in the metadata,
1232 then return this as the unit of the data.
1234 Returns
1235 -------
1236 data: 2-D array of floats
1237 All data traces as an 2-D numpy array, even for single channel data.
1238 First dimension is time, second is channel.
1239 rate: float
1240 Sampling rate of the data in Hz.
1241 unit: str
1242 Unit of the data if found in the metadata (see `gainkey`),
1243 otherwise `unit`.
1244 amax: float
1245 Maximum amplitude of data range.
1246 """
1247 # get gain:
1248 md = metadata_audioio(filepath)
1249 amax, unit = get_gain(md, gainkey, sep, amax, unit)
1250 # load data:
1251 data, rate = load_audio(filepath, verbose)
1252 if amax != 1.0:
1253 data *= amax
1254 return data, rate, unit, amax
1257data_loader_funcs = (
1258 ('relacs', check_relacs, load_relacs, metadata_relacs, None),
1259 ('fishgrid', check_fishgrid, load_fishgrid, metadata_fishgrid, markers_fishgrid),
1260 ('container', check_container, load_container, metadata_container, markers_container),
1261 ('raw', check_raw, load_raw, None, None),
1262 ('audioio', None, load_audioio, metadata_audioio, markers_audioio),
1263 )
1264"""List of implemented load functions.
1266Each element of the list is a tuple with the data format's name, its
1267check and its load function.
1269"""
1272def load_data(filepath, verbose=0, **kwargs):
1273 """Load time-series data from a file.
1275 Parameters
1276 ----------
1277 filepath: str
1278 Path and name of the file to load.
1279 verbose: int
1280 If > 0 show detailed error/warning messages.
1281 **kwargs: dict
1282 Further keyword arguments that are passed on to the
1283 format specific loading functions.
1284 For example:
1285 - `amax`: the amplitude range of the data.
1286 - 'unit': the unit of the data.
1288 Returns
1289 -------
1290 data: 2-D array
1291 All data traces as an 2-D numpy array, even for single channel data.
1292 First dimension is time, second is channel.
1293 rate: float
1294 Sampling rate of the data in Hz.
1295 unit: str
1296 Unit of the data.
1297 amax: float
1298 Maximum amplitude of data range.
1300 Raises
1301 ------
1302 ValueError:
1303 `filepath` is empty string.
1304 """
1305 if len(filepath) == 0:
1306 raise ValueError('input argument filepath is empty string.')
1307 # load data:
1308 for name, check_file, load_file, _, _ in data_loader_funcs:
1309 if check_file is None or check_file(filepath):
1310 data, rate, unit, amax = load_file(filepath, **kwargs)
1311 if verbose > 0:
1312 print(f'loaded {name} data from file "{filepath}"')
1313 if verbose > 1:
1314 print(f' sampling rate: {rate:g} Hz')
1315 print(f' channels : {data.shape[1]}')
1316 print(f' frames : {len(data)}')
1317 print(f' range : {amax:g}{unit}')
1318 return data, rate, unit, amax
1319 return np.zeros((0, 1)), 0.0, '', 1.0
1322def metadata(filepath, **kwargs):
1323 """ Read meta-data from a data file.
1325 Parameters
1326 ----------
1327 filepath: str
1328 The full path and name of the file to load. For some file
1329 formats several files can be provided in a list.
1330 **kwargs: dict
1331 Further keyword arguments that are passed on to the
1332 format specific loading functions.
1334 Returns
1335 -------
1336 meta_data: nested dict
1337 Meta data contained in the file. Keys of the nested
1338 dictionaries are always strings. If the corresponding
1339 values are dictionaries, then the key is the section name
1340 of the metadata contained in the dictionary. All other
1341 types of values are values for the respective key. In
1342 particular they are strings, or list of strings. But other
1343 simple types like ints or floats are also allowed.
1345 Raises
1346 ------
1347 ValueError:
1348 `filepath` is empty string.
1349 """
1350 if len(filepath) == 0:
1351 raise ValueError('input argument filepath is empty string.')
1352 # load metadata:
1353 for _, check_file, _, metadata_file, _ in data_loader_funcs:
1354 if check_file is None or check_file(filepath):
1355 if metadata_file is not None:
1356 return metadata_file(filepath, **kwargs)
1357 return {}
1360def markers(filepath):
1361 """ Read markers of a data file.
1363 Parameters
1364 ----------
1365 filepath: str or file handle
1366 The data file.
1368 Returns
1369 -------
1370 locs: 2-D array of ints
1371 Marker positions (first column) and spans (second column)
1372 for each marker (rows).
1373 labels: 2-D array of string objects
1374 Labels (first column) and texts (second column)
1375 for each marker (rows).
1377 Raises
1378 ------
1379 ValueError:
1380 `filepath` is empty string.
1381 """
1382 if len(filepath) == 0:
1383 raise ValueError('input argument filepath is empty string.')
1384 # load markers:
1385 for _, check_file, _, _, markers_file in data_loader_funcs:
1386 if check_file is None or check_file(filepath):
1387 if markers_file is not None:
1388 return markers_file(filepath)
1389 return np.zeros((0, 2), dtype=int), np.zeros((0, 2), dtype=object)
1392class DataLoader(AudioLoader):
1393 """Buffered reading of time-series data for random access of the data in the file.
1395 This allows for reading very large data files that do not fit into
1396 memory. A `DataLoader` instance can be used like a huge
1397 read-only numpy array, i.e.
1398 ```
1399 data = DataLoader('path/to/data/file.dat')
1400 x = data[10000:20000,0]
1401 ```
1402 The first index specifies the frame, the second one the channel.
1404 `DataLoader` first determines the format of the data file and then
1405 opens the file (first line). It then reads data from the file as
1406 necessary for the requested data (second line).
1408 Supported file formats are
1410 - audio files via `audioio` package
1411 - python pickle files
1412 - numpy .npz files
1413 - matlab .mat files
1414 - relacs trace*.raw files (www.relacs.net)
1415 - fishgrid traces-*.raw files
1417 Reading sequentially through the file is always possible. If
1418 previous data are requested, then the file is read from the
1419 beginning. This might slow down access to previous data
1420 considerably. Use the `backsize` argument to the open functions to
1421 make sure some data are loaded before the requested frame. Then a
1422 subsequent access to the data within `backsize` seconds before that
1423 frame can still be handled without the need to reread the file
1424 from the beginning.
1426 Usage:
1427 ------
1428 ```
1429 import thunderlab.dataloader as dl
1430 with dl.DataLoader(filepath, 60.0, 10.0) as data:
1431 # do something with the content of the file:
1432 x = data[0:10000,0]
1433 y = data[10000:20000,0]
1434 z = x + y
1435 ```
1437 Normal open and close:
1438 ```
1439 data = dl.DataLoader(filepath, 60.0)
1440 x = data[:,:] # read the whole file
1441 data.close()
1442 ```
1443 that is the same as:
1444 ```
1445 data = dl.DataLoader()
1446 data.open(filepath, 60.0)
1447 ```
1449 Parameters
1450 ----------
1451 filepath: str
1452 Name of the file.
1453 buffersize: float
1454 Size of internal buffer in seconds.
1455 backsize: float
1456 Part of the buffer to be loaded before the requested start index in seconds.
1457 verbose: int
1458 If larger than zero show detailed error/warning messages.
1459 meta_kwargs: dict
1460 Keyword arguments that are passed on to the _load_metadata() function.
1462 Attributes
1463 ----------
1464 rate: float
1465 The sampling rate of the data in Hertz.
1466 channels: int
1467 The number of channels that are read in.
1468 frames: int
1469 The number of frames in the file.
1470 format: str or None
1471 Format of the audio file.
1472 encoding: str or None
1473 Encoding/subtype of the audio file.
1474 shape: tuple
1475 Number of frames and channels of the data.
1476 ndim: int
1477 Number of dimensions: always 2 (frames and channels).
1478 unit: str
1479 Unit of the data.
1480 ampl_min: float
1481 Minimum amplitude the file format supports.
1482 ampl_max: float
1483 Maximum amplitude the file format supports.
1485 Methods
1486 -------
1488 - `len()`: the number of frames
1489 - `open()`: open a data file.
1490 - `open_*()`: open a data file of a specific format.
1491 - `close()`: close the file.
1492 - `metadata()`: metadata of the file.
1493 - `markers()`: markers of the file.
1494 - `set_unwrap()`: Set parameters for unwrapping clipped data.
1496 """
1498 def __init__(self, filepath=None, buffersize=10.0, backsize=0.0,
1499 verbose=0, **meta_kwargs):
1500 super().__init__(None, buffersize, backsize,
1501 verbose, **meta_kwargs)
1502 if filepath is not None:
1503 self.open(filepath, buffersize, backsize, verbose, **meta_kwargs)
1505 def __getitem__(self, key):
1506 return super(DataLoader, self).__getitem__(key)
1508 def __next__(self):
1509 return super(DataLoader, self).__next__()
1512 # relacs interface:
1513 def open_relacs(self, filepath, buffersize=10.0, backsize=0.0,
1514 verbose=0, amax=1.0):
1515 """Open relacs data files (www.relacs.net) for reading.
1517 Parameters
1518 ----------
1519 filepath: str
1520 Path to a relacs data directory or a file therein.
1521 buffersize: float
1522 Size of internal buffer in seconds.
1523 backsize: float
1524 Part of the buffer to be loaded before the requested start index in seconds.
1525 verbose: int
1526 If > 0 show detailed error/warning messages.
1527 amax: float
1528 The amplitude range of the data.
1530 Raises
1531 ------
1532 ValueError: .gz files not supported.
1533 """
1534 self.verbose = verbose
1536 if self.sf is not None:
1537 self._close_relacs()
1539 trace_filepaths = relacs_trace_files(filepath)
1541 # open trace files:
1542 self.sf = []
1543 self.frames = None
1544 self.rate = None
1545 self.unit = ''
1546 self.filepath = None
1547 if len(trace_filepaths) > 0:
1548 self.filepath = os.path.dirname(trace_filepaths[0])
1549 for path in sorted(trace_filepaths):
1550 if path[-3:] == '.gz':
1551 raise ValueError('.gz files not supported')
1552 sf = open(path, 'rb')
1553 self.sf.append(sf)
1554 if verbose > 0:
1555 print(f'open_relacs(filepath) with filepath={path}')
1556 # file size:
1557 sf.seek(0, os.SEEK_END)
1558 frames = sf.tell()//4
1559 if self.frames is None:
1560 self.frames = frames
1561 elif self.frames != frames:
1562 diff = self.frames - frames
1563 if diff > 1 or diff < -2:
1564 raise ValueError('number of frames of traces differ')
1565 elif diff >= 0:
1566 self.frames = frames
1567 sf.seek(0)
1568 # retrieve sampling rate and unit:
1569 rate, us = relacs_samplerate_unit(path)
1570 if self.rate is None:
1571 self.rate = rate
1572 elif rate != self.rate:
1573 raise ValueError('sampling rates of traces differ')
1574 if len(self.unit) == 0:
1575 self.unit = us
1576 elif us != self.unit:
1577 raise ValueError('unit of traces differ')
1578 self.channels = len(self.sf)
1579 self.shape = (self.frames, self.channels)
1580 self.size = self.frames * self.channels
1581 self.ndim = len(self.shape)
1582 self.format = 'RELACS'
1583 self.encoding = 'FLOAT'
1584 self.bufferframes = int(buffersize*self.rate)
1585 self.backframes = int(backsize*self.rate)
1586 self.init_buffer()
1587 self.offset = 0
1588 self.close = self._close_relacs
1589 self.load_audio_buffer = self._load_buffer_relacs
1590 self.ampl_min = -amax
1591 self.ampl_max = +amax
1592 self._load_metadata = self._metadata_relacs
1593 # TODO: load markers:
1594 self._locs = np.zeros((0, 2), dtype=int)
1595 self._labels = np.zeros((0, 2), dtype=object)
1596 self._load_markers = None
1597 return self
1599 def _close_relacs(self):
1600 """Close the relacs data files.
1601 """
1602 if self.sf is not None:
1603 for file in self.sf:
1604 file.close()
1605 self.sf = None
1607 def _load_buffer_relacs(self, r_offset, r_size, buffer):
1608 """Load new data from relacs data file.
1610 Parameters
1611 ----------
1612 r_offset: int
1613 First frame to be read from file.
1614 r_size: int
1615 Number of frames to be read from file.
1616 buffer: ndarray
1617 Buffer where to store the loaded data.
1618 """
1619 for i, file in enumerate(self.sf):
1620 file.seek(r_offset*4)
1621 data = file.read(r_size*4)
1622 buffer[:, i] = np.frombuffer(data, dtype=np.float32)
1625 def _metadata_relacs(self, store_empty=False, first_only=False):
1626 """ Load meta-data of a relacs data set.
1627 """
1628 info_path = os.path.join(self.filepath, 'info.dat')
1629 if not os.path.exists(info_path):
1630 return {}
1631 return relacs_header(info_path, store_empty, first_only)
1634 # fishgrid interface:
1635 def open_fishgrid(self, filepath, buffersize=10.0, backsize=0.0,
1636 verbose=0):
1637 """Open fishgrid data files (https://github.com/bendalab/fishgrid) for reading.
1639 Parameters
1640 ----------
1641 filepath: str
1642 Path to a fishgrid data directory, or a file therein.
1643 buffersize: float
1644 Size of internal buffer in seconds.
1645 backsize: float
1646 Part of the buffer to be loaded before the requested start index in seconds.
1647 verbose: int
1648 If > 0 show detailed error/warning messages.
1649 """
1650 self.verbose = verbose
1652 if self.sf is not None:
1653 self._close_fishgrid()
1655 trace_filepaths = fishgrid_trace_files(filepath)
1656 self.filepath = None
1657 if len(trace_filepaths) > 0:
1658 self.filepath = os.path.dirname(trace_filepaths[0])
1659 self._load_metadata = metadata_fishgrid
1660 self._load_markers = markers_fishgrid
1662 # open grid files:
1663 grids = fishgrid_grids(self.metadata())
1664 grid_sizes = [r*c for r,c in grids]
1665 self.channels = 0
1666 for g, path in enumerate(trace_filepaths):
1667 self.channels += grid_sizes[g]
1668 self.sf = []
1669 self.grid_channels = []
1670 self.grid_offs = []
1671 offs = 0
1672 self.frames = None
1673 self.rate = get_number(self.metadata(), 'Hz', 'AISampleRate')
1674 v, self.unit = get_number_unit(self.metadata(), 'AIMaxVolt')
1675 if v is not None:
1676 self.ampl_min = -v
1677 self.ampl_max = +v
1679 for g, path in enumerate(trace_filepaths):
1680 sf = open(path, 'rb')
1681 self.sf.append(sf)
1682 if verbose > 0:
1683 print(f'open_fishgrid(filepath) with filepath={path}')
1684 # grid channels:
1685 self.grid_channels.append(grid_sizes[g])
1686 self.grid_offs.append(offs)
1687 offs += grid_sizes[g]
1688 # file size:
1689 sf.seek(0, os.SEEK_END)
1690 frames = sf.tell()//4//grid_sizes[g]
1691 if self.frames is None:
1692 self.frames = frames
1693 elif self.frames != frames:
1694 diff = self.frames - frames
1695 if diff > 1 or diff < -2:
1696 raise ValueError('number of frames of traces differ')
1697 elif diff >= 0:
1698 self.frames = frames
1699 sf.seek(0)
1700 self.shape = (self.frames, self.channels)
1701 self.size = self.frames * self.channels
1702 self.ndim = len(self.shape)
1703 self.format = 'FISHGRID'
1704 self.encoding = 'FLOAT'
1705 self.bufferframes = int(buffersize*self.rate)
1706 self.backframes = int(backsize*self.rate)
1707 self.init_buffer()
1708 self.offset = 0
1709 self.close = self._close_fishgrid
1710 self.load_audio_buffer = self._load_buffer_fishgrid
1711 return self
1713 def _close_fishgrid(self):
1714 """Close the fishgrid data files.
1715 """
1716 if self.sf is not None:
1717 for file in self.sf:
1718 file.close()
1719 self.sf = None
1721 def _load_buffer_fishgrid(self, r_offset, r_size, buffer):
1722 """Load new data from relacs data file.
1724 Parameters
1725 ----------
1726 r_offset: int
1727 First frame to be read from file.
1728 r_size: int
1729 Number of frames to be read from file.
1730 buffer: ndarray
1731 Buffer where to store the loaded data.
1732 """
1733 for file, gchannels, goffset in zip(self.sf, self.grid_channels, self.grid_offs):
1734 file.seek(r_offset*4*gchannels)
1735 data = file.read(r_size*4*gchannels)
1736 buffer[:, goffset:goffset+gchannels] = np.frombuffer(data, dtype=np.float32).reshape((-1, gchannels))
1739 # container interface:
1740 def open_container(self, filepath, buffersize=10.0,
1741 backsize=0.0, verbose=0, datakey=None,
1742 samplekey=['rate', 'Fs', 'fs'],
1743 timekey=['time'], amplkey=['amax'], unitkey='unit',
1744 metadatakey=['metadata', 'info'],
1745 poskey=['positions'],
1746 spanskey=['spans'], labelskey=['labels'],
1747 descrkey=['descriptions'],
1748 amax=1.0, unit='a.u.'):
1749 """Open generic container file.
1751 Supported file formats are:
1753 - python pickle files (.pkl)
1754 - numpy files (.npz)
1755 - matlab files (.mat)
1757 Parameters
1758 ----------
1759 filepath: str
1760 Path to a container file.
1761 buffersize: float
1762 Size of internal buffer in seconds.
1763 backsize: float
1764 Part of the buffer to be loaded before the requested start index in seconds.
1765 verbose: int
1766 If > 0 show detailed error/warning messages.
1767 datakey: None, str, or list of str
1768 Name of the variable holding the data. If `None` take the
1769 variable that is an 2D array and has the largest number of
1770 elements.
1771 samplekey: str or list of str
1772 Name of the variable holding the sampling rate.
1773 timekey: str or list of str
1774 Name of the variable holding sampling times.
1775 If no sampling rate is available, the sampling rate is retrieved
1776 from the sampling times.
1777 amplkey: str or list of str
1778 Name of the variable holding the amplitude range of the data.
1779 unitkey: str
1780 Name of the variable holding the unit of the data.
1781 metadatakey: str or list of str
1782 Name of the variable holding the metadata.
1783 poskey: str or list of str
1784 Name of the variable holding positions of markers.
1785 spanskey: str or list of str
1786 Name of the variable holding spans of markers.
1787 labelskey: str or list of str
1788 Name of the variable holding labels of markers.
1789 descrkey: str or list of str
1790 Name of the variable holding descriptions of markers.
1791 amax: None or float
1792 If specified and no amplitude range has been found in the data
1793 container, then this is the amplitude range of the data.
1794 unit: None or str
1795 If specified and no unit has been found in the data container,
1796 then return this as the unit of the data.
1798 Raises
1799 ------
1800 ValueError:
1801 Invalid key requested.
1802 """
1803 self.verbose = verbose
1804 data_dict = {}
1805 ext = os.path.splitext(filepath)[1]
1806 if ext == '.pkl':
1807 import pickle
1808 with open(filepath, 'rb') as f:
1809 data_dict = pickle.load(f)
1810 self.format = 'PKL'
1811 elif ext == '.npz':
1812 data_dict = np.load(filepath)
1813 self.format = 'NPZ'
1814 elif ext == '.mat':
1815 from scipy.io import loadmat
1816 data_dict = loadmat(filepath, squeeze_me=True)
1817 self.format = 'MAT'
1818 self.buffer, self.rate, self.unit, amax = \
1819 extract_container_data(data_dict, datakey, samplekey,
1820 timekey, amplkey, unitkey, amax, unit)
1821 self.filepath = filepath
1822 self.channels = self.buffer.shape[1]
1823 self.frames = self.buffer.shape[0]
1824 self.shape = self.buffer.shape
1825 self.ndim = self.buffer.ndim
1826 self.size = self.buffer.size
1827 self.encoding = self.numpy_encodings[self.buffer.dtype]
1828 self.ampl_min = -amax
1829 self.ampl_max = +amax
1830 self.offset = 0
1831 self.buffer_changed = np.zeros(self.channels, dtype=bool)
1832 self.bufferframes = self.frames
1833 self.backsize = 0
1834 self.close = self._close_container
1835 self.load_audio_buffer = self._load_buffer_container
1836 self._metadata = extract_container_metadata(data_dict, metadatakey)
1837 self._load_metadata = None
1838 self._locs, self._labels = extract_container_markers(data_dict,
1839 poskey,
1840 spanskey,
1841 labelskey,
1842 descrkey)
1843 self._load_markers = None
1845 def _close_container(self):
1846 """Close container. """
1847 pass
1849 def _load_buffer_container(self, r_offset, r_size, buffer):
1850 """Load new data from container."""
1851 buffer[:, :] = self.buffer[r_offset:r_offset + r_size, :]
1854 # raw data interface:
1855 def open_raw(self, filepath, buffersize=10.0, backsize=0.0,
1856 verbose=0, rate=44000, channels=1, dtype=np.float32,
1857 amax=1.0, unit='a.u.'):
1858 """Load data from a raw file.
1860 Raw files just contain the data and absolutely no metadata, not
1861 even the smapling rate, number of channels, etc.
1862 Supported file formats are:
1864 - raw files (*.raw)
1865 - LabView scandata (*.scandat)
1867 Parameters
1868 ----------
1869 filepath: str
1870 Path of the file to load.
1871 buffersize: float
1872 Size of internal buffer in seconds.
1873 backsize: float
1874 Part of the buffer to be loaded before the requested start index in seconds.
1875 verbose: int
1876 If > 0 show detailed error/warning messages.
1877 rate: float
1878 Sampling rate of the data in Hertz.
1879 channels: int
1880 Number of channels multiplexed in the data.
1881 dtype: str or numpy.dtype
1882 The data type stored in the file.
1883 amax: float
1884 The amplitude range of the data.
1885 unit: str
1886 The unit of the data.
1887 """
1888 self.verbose = verbose
1889 self.filepath = filepath
1890 self.sf = open(filepath, 'rb')
1891 if verbose > 0:
1892 print(f'open_raw(filepath) with filepath={filepath}')
1893 self.dtype = np.dtype(dtype)
1894 self.rate = float(rate)
1895 # file size:
1896 self.sf.seek(0, os.SEEK_END)
1897 self.frames = self.sf.tell()//self.dtype.itemsize
1898 self.sf.seek(0)
1899 self.channels = int(channels)
1900 self.shape = (self.frames, self.channels)
1901 self.ndim = len(self.shape)
1902 self.size = self.frames*self.channels
1903 self.format = 'RAW'
1904 self.encoding = self.numpy_encodings.get(self.dtype, 'UNKNOWN')
1905 self.unit = unit
1906 self.ampl_max = float(amax)
1907 self.ampl_min = -self.ampl_max
1908 self.offset = 0
1909 self.bufferframes = int(buffersize*self.rate)
1910 self.backframes = int(backsize*self.rate)
1911 self.init_buffer()
1912 self.close = self._close_raw
1913 self.load_audio_buffer = self._load_buffer_raw
1914 self._metadata = None
1915 self._load_metadata = None
1916 self._locs = None
1917 self._labels = None
1918 self._load_markers = None
1920 def _close_raw(self):
1921 """Close raw file. """
1922 self.sf.close()
1923 self.sf = None
1925 def _load_buffer_raw(self, r_offset, r_size, buffer):
1926 """Load new data from container."""
1927 self.sf.seek(r_offset*self.dtype.itemsize)
1928 raw_data = self.sf.read(r_size*self.dtype.itemsize)
1929 raw_data = np.frombuffer(raw_data, dtype=self.dtype)
1930 raw_data = raw_data.reshape(-1, self.channels)
1931 # recode:
1932 if self.dtype == np.dtype('int16'):
1933 data = raw_data.astype('float32')
1934 data *= self.ampl_max/2**15
1935 elif self.dtype == np.dtype('int32'):
1936 data = raw_data.astype(float)
1937 data *= self.ampl_max/2**31
1938 elif self.dtype == np.dtype('int64'):
1939 data = raw_data.astype(float)
1940 data *= self.ampl_max/2**63
1941 else:
1942 data = raw_data
1943 buffer[:, :] = data
1946 # audioio interface:
1947 def open_audioio(self, filepath, buffersize=10.0, backsize=0.0,
1948 verbose=0, gainkey=default_gain_keys, sep='.',
1949 amax=None, unit='a.u.'):
1950 """Open an audio file.
1952 See the [audioio](https://github.com/bendalab/audioio) package
1953 for details.
1955 Parameters
1956 ----------
1957 filepath: str
1958 Path to an audio file.
1959 buffersize: float
1960 Size of internal buffer in seconds.
1961 backsize: float
1962 Part of the buffer to be loaded before the requested start index
1963 in seconds.
1964 verbose: int
1965 If > 0 show detailed error/warning messages.
1966 gainkey: str or list of str
1967 Key in the file's metadata that holds some gain information.
1968 If found, the data will be multiplied with the gain,
1969 and if available, the corresponding unit is returned.
1970 See the [audioio.get_gain()](https://bendalab.github.io/audioio/api/audiometadata.html#audioio.audiometadata.get_gain) function for details.
1971 sep: str
1972 String that separates section names in `gainkey`.
1973 amax: None or float
1974 If specified and no gain has been found in the metadata,
1975 then use this as the amplitude range.
1976 unit: None or str
1977 If specified and no gain has been found in the metadata,
1978 then this is the unit of the data.
1980 """
1981 self.verbose = verbose
1982 super(DataLoader, self).open(filepath, buffersize, backsize, verbose)
1983 md = self.metadata()
1984 fac, unit = get_gain(md, gainkey, sep, amax, unit)
1985 if fac is None:
1986 self.gain_fac = 1.0
1987 else:
1988 self.gain_fac = fac
1989 self._load_buffer_audio_org = self.load_audio_buffer
1990 self.load_audio_buffer = self._load_buffer_audioio
1991 self.ampl_min *= self.gain_fac
1992 self.ampl_max *= self.gain_fac
1993 self.unit = unit
1994 return self
1996 def _load_buffer_audioio(self, r_offset, r_size, buffer):
1997 """Load and scale new data from an audio file.
1999 Parameters
2000 ----------
2001 r_offset: int
2002 First frame to be read from file.
2003 r_size: int
2004 Number of frames to be read from file.
2005 buffer: ndarray
2006 Buffer where to store the loaded data.
2007 """
2008 self._load_buffer_audio_org(r_offset, r_size, buffer)
2009 buffer *= self.gain_fac
2012 # open multiple files as one:
2013 def open_multiple(self, filepaths, buffersize=10.0, backsize=0.0,
2014 verbose=0):
2015 """Open multiple files as a single concatenated array.
2017 Parameters
2018 ----------
2019 filepaths: list of str
2020 List of file names of audio files.
2021 buffersize: float
2022 Size of internal buffer in seconds.
2023 backsize: float
2024 Part of the buffer to be loaded before the requested start index in seconds.
2025 verbose: int
2026 If larger than zero show detailed error/warning messages.
2028 Raises
2029 ------
2030 TypeError
2031 `filepaths` must be a sequence.
2032 ValueError
2033 Empty `filepaths`.
2034 FileNotFoundError
2035 `filepaths` does not contain a single valid file.
2037 """
2038 if not isinstance(filepaths, (list, tuple, np.ndarray)):
2039 raise TypeError('input argument filepaths is not a sequence!')
2040 if len(filepaths) == 0:
2041 raise ValueError('input argument filepaths is empy sequence!')
2042 self.data_files = []
2043 self.start_indices = []
2044 for filepath in filepaths:
2045 try:
2046 a = DataLoader(filepath, buffersize, backsize, verbose)
2047 self.data_files. append(a)
2048 except Exception as e:
2049 if verbose > 0:
2050 print(e)
2051 if len(self.data_files) == 0:
2052 raise FileNotFoundError('input argument filepaths does not contain any valid audio file!')
2053 # check contingency and set start indices:
2054 a0 = self.data_files[0]
2055 self.filepath = a0.filepath
2056 self.format = a0.format
2057 self.encoding = a0.encoding
2058 self.rate = a0.rate
2059 self.channels = a0.channels
2060 self.unit = a0.unit
2061 self.ampl_max = a0.ampl_max
2062 self.ampl_min = a0.ampl_min
2063 self.frames = 0
2064 self.start_indices = []
2065 self.end_indices = []
2066 md = a0.metadata()
2067 start_time = get_datetime(md)
2068 self._metadata = {}
2069 self._locs = np.zeros((0, 2), dtype=int)
2070 self._labels = np.zeros((0, 2), dtype=object)
2071 for a in self.data_files:
2072 if a.channels != self.channels:
2073 raise ValueError(f'number of channels differs: '
2074 f'{a.channels} in {a.filepath} versus '
2075 f'{self.channels} in {self.filepath}')
2076 if a.rate != self.rate:
2077 raise ValueError(f'sampling rates differ: '
2078 f'{a.rate} in {a.filepath} versus '
2079 f'{self.rate} in {self.filepath}')
2080 if a.ampl_min != self.ampl_min:
2081 raise ValueError(f'minimum amplitudes differ: '
2082 f'{a.ampl_min} in {a.filepath} versus '
2083 f'{self.ampl_min} in {self.filepath}')
2084 if a.ampl_max != self.ampl_max:
2085 raise ValueError(f'maximum amplitudes differ: '
2086 f'{a.ampl_max} in {a.filepath} versus '
2087 f'{self.ampl_max} in {self.filepath}')
2088 # metadata:
2089 md = a.metadata()
2090 fmd = flatten_metadata(md, True)
2091 add_metadata(self._metadata, fmd)
2092 # check start time of recording:
2093 stime = get_datetime(md)
2094 if start_time is not None and stime is not None and \
2095 abs(start_time - stime) > timedelta(seconds=1):
2096 raise ValueError(f'start time does not indicate continuous recording: '
2097 f'expected {start_time} instead of '
2098 f'{stime} in {a.filepath}')
2099 # markers:
2100 locs, labels = a.markers()
2101 locs[:,0] += self.frames
2102 self._locs = np.vstack((self._locs, locs))
2103 self._labels = np.vstack((self._labels, labels))
2104 # indices:
2105 self.start_indices.append(self.frames)
2106 self.frames += a.frames
2107 self.end_indices.append(self.frames)
2108 start_time += timedelta(seconds=a.frames/a.rate)
2109 self.start_indices = np.array(self.start_indices)
2110 self.end_indices = np.array(self.end_indices)
2111 # set startime from first file:
2112 start_time = get_datetime(a0.metadata())
2113 set_starttime(self._metadata, start_time)
2114 # setup infrastructure:
2115 self.shape = (self.frames, self.channels)
2116 self.bufferframes = int(buffersize*self.rate)
2117 self.backframes = int(backsize*self.rate)
2118 self.init_buffer()
2119 self.close = self._close_multiple
2120 self.load_audio_buffer = self._load_buffer_multiple
2121 return self
2123 def _close_multiple(self):
2124 """Close all the data files. """
2125 for a in self.data_files:
2126 a.close()
2127 self.data_files = []
2128 self.start_indices = []
2129 self.end_indices = []
2131 def _load_buffer_multiple(self, r_offset, r_size, buffer):
2132 """Load new data from the underlying files.
2134 Parameters
2135 ----------
2136 r_offset: int
2137 First frame to be read from file.
2138 r_size: int
2139 Number of frames to be read from file.
2140 buffer: ndarray
2141 Buffer where to store the loaded data.
2142 """
2143 offs = r_offset
2144 size = r_size
2145 boffs = 0
2146 ai = np.searchsorted(self.end_indices, offs, side='right')
2147 while size > 0:
2148 ai0 = offs - self.start_indices[ai]
2149 ai1 = offs + size
2150 if ai1 > self.end_indices[ai]:
2151 ai1 = self.end_indices[ai]
2152 ai1 -= self.start_indices[ai]
2153 n = ai1 - ai0
2154 self.data_files[ai].load_audio_buffer(ai0, n,
2155 buffer[boffs:boffs + n,:])
2156 boffs += n
2157 offs += n
2158 size -= n
2159 ai += 1
2162 def open(self, filepath, buffersize=10.0, backsize=0.0,
2163 verbose=0, **kwargs):
2164 """Open file with time-series data for reading.
2166 Parameters
2167 ----------
2168 filepath: str or list of str
2169 Name of the file or list of many file names that should be
2170 made accessible as a single array.
2171 buffersize: float
2172 Size of internal buffer in seconds.
2173 backsize: float
2174 Part of the buffer to be loaded before the requested start index
2175 in seconds.
2176 verbose: int
2177 If > 0 show detailed error/warning messages.
2178 **kwargs: dict
2179 Further keyword arguments that are passed on to the
2180 format specific opening functions.
2181 For example:
2182 - `amax`: the amplitude range of the data.
2183 - 'unit': the unit of the data.
2185 Raises
2186 ------
2187 ValueError:
2188 `filepath` is empty string.
2189 """
2190 # list of implemented open functions:
2191 data_open_funcs = (
2192 ('relacs', check_relacs, self.open_relacs, 1),
2193 ('fishgrid', check_fishgrid, self.open_fishgrid, 1),
2194 ('container', check_container, self.open_container, 1),
2195 ('raw', check_raw, self.open_raw, 1),
2196 ('audioio', None, self.open_audioio, 0),
2197 )
2199 self.buffer = np.array([])
2200 self.rate = 0.0
2201 if not filepath:
2202 raise ValueError('input argument filepath is empty string.')
2203 if isinstance(filepath, (list, tuple, np.ndarray)):
2204 return self.open_multiple(filepath, buffersize, backsize, verbose)
2205 # open data:
2206 for name, check_file, open_file, v in data_open_funcs:
2207 if check_file is None or check_file(filepath):
2208 open_file(filepath, buffersize, backsize, verbose, **kwargs)
2209 if v*verbose > 1:
2210 if self.format is not None:
2211 print(f' format : {self.format}')
2212 if self.encoding is not None:
2213 print(f' encoding : {self.encoding}')
2214 print(f' sampling rate: {self.rate} Hz')
2215 print(f' channels : {self.channels}')
2216 print(f' frames : {self.frames}')
2217 print(f' range : {self.ampl_max:g}{self.unit}')
2218 break
2219 return self
2222def demo(filepath, plot=False):
2223 print("try load_data:")
2224 data, rate, unit, amax = load_data(filepath, verbose=2)
2225 if plot:
2226 fig, ax = plt.subplots()
2227 time = np.arange(len(data))/rate
2228 for c in range(data.shape[1]):
2229 ax.plot(time, data[:,c])
2230 ax.set_xlabel('Time [s]')
2231 ax.set_ylabel(f'[{unit}]')
2232 if amax is not None and np.isfinite(amax):
2233 ax.set_ylim(-amax, +amax)
2234 plt.show()
2235 return
2237 print('')
2238 print("try DataLoader:")
2239 with DataLoader(filepath, 2.0, 1.0, 1) as data:
2240 print('sampling rate: %g' % data.rate)
2241 print('frames : %d %d' % (len(data), data.shape[0]))
2242 nframes = int(1.0 * data.rate)
2243 # forward:
2244 for i in range(0, len(data), nframes):
2245 print('forward %d-%d' % (i, i + nframes))
2246 x = data[i:i + nframes, 0]
2247 if plot:
2248 fig, ax = plt.subplots()
2249 ax.plot((i + np.arange(len(x)))/data.rate, x)
2250 ax.set_xlabel('Time [s]')
2251 ax.set_ylabel(f'[{data.unit}]')
2252 plt.show()
2253 # and backwards:
2254 for i in reversed(range(0, len(data), nframes)):
2255 print('backward %d-%d' % (i, i + nframes))
2256 x = data[i:i + nframes, 0]
2257 if plot:
2258 fig, ax = plt.subplots()
2259 ax.plot((i + np.arange(len(x)))/data.rate, x)
2260 ax.set_xlabel('Time [s]')
2261 ax.set_ylabel(f'[{data.unit}]')
2262 plt.show()
2265def main(*cargs):
2266 """Call demo with command line arguments.
2268 Parameters
2269 ----------
2270 cargs: list of str
2271 Command line arguments as provided by sys.argv[1:]
2272 """
2273 import argparse
2274 parser = argparse.ArgumentParser(description=
2275 'Checking thunderlab.dataloader module.')
2276 parser.add_argument('-p', dest='plot', action='store_true',
2277 help='plot loaded data')
2278 parser.add_argument('file', nargs=1, default='', type=str,
2279 help='name of data file')
2280 args = parser.parse_args(cargs)
2281 demo(args.file[0], args.plot)
2284if __name__ == "__main__":
2285 main(*sys.argv[1:])