Coverage for src/thunderlab/dataloader.py: 76%

885 statements  

« prev     ^ index     » next       coverage.py v7.6.8, created at 2024-11-29 17:59 +0000

1"""Load time-series data from files. 

2 

3``` 

4data, rate, unit, amax = load_data('data/file.wav') 

5``` 

6 

7The function `data_loader()` loads the whole time-series from the file 

8as a numpy array of floats. First dimension is frames, second is 

9channels. In contrast to the `audioio.load_audio()` function, the 

10values of the data array are not restricted between -1 and 1. They can 

11assume any value wihin the range `-amax` to `+amax` with the returned 

12`unit`. 

13 

14``` 

15data = DataLoader('data/file.wav', 60.0) 

16``` 

17or 

18``` 

19with DataLoader('data/file.wav', 60.0) as data: 

20``` 

21Create an `DataLoader` object that loads chuncks of 60 seconds long data 

22on demand. `data` can be used like a read-only numpy array of floats. 

23 

24 

25## Supported file formats 

26 

27- python pickle files 

28- numpy .npz files 

29- matlab .mat files 

30- audio files via [`audioio`](https://github.com/bendalab/audioio) package 

31- LabView .scandat files 

32- relacs trace*.raw files (https://www.relacs.net) 

33- fishgrid traces-*.raw files (https://github.com/bendalab/fishgrid) 

34 

35 

36## Metadata 

37 

38Many file formats allow to store metadata that further describe the 

39stored time series data. We handle them as nested dictionary of key-value 

40pairs. Load them with the `metadata()` function: 

41``` 

42metadata = metadata('data/file.mat') 

43``` 

44 

45## Markers 

46 

47Some file formats also allow to store markers that mark specific 

48positions in the time series data. Load marker positions and spans (in 

49the 2-D array `locs`) and label and text strings (in the 2-D array 

50`labels`) with the `markers()` function: 

51``` 

52locs, labels = markers('data.wav') 

53``` 

54 

55## Aditional, format specific functions 

56 

57- `extract_container_metadata()`: extract metadata from dictionary loaded from a container file. 

58- `relacs_samplerate_unit()`: retrieve sampling rate and unit from a relacs stimuli.dat file. 

59- `relacs_header()`: read key-value pairs from relacs *.dat file headers. 

60- `fishgrid_grids()`: retrieve grid sizes from a fishgrid.cfg file. 

61- `fishgrid_spacings()`: spacing between grid electrodes. 

62 

63""" 

64 

65import os 

66import sys 

67import glob 

68import gzip 

69import numpy as np 

70try: 

71 import matplotlib.pyplot as plt 

72except ImportError: 

73 pass 

74from audioio import load_audio, AudioLoader, unflatten_metadata 

75from audioio import get_number_unit, get_number, get_int, get_bool, get_gain 

76from audioio import default_starttime_keys, default_gain_keys 

77from audioio import metadata as metadata_audioio 

78from audioio import markers as markers_audioio 

79 

80 

81def relacs_samplerate_unit(filepath, channel=0): 

82 """Retrieve sampling rate and unit from a relacs stimuli.dat file. 

83 

84 Parameters 

85 ---------- 

86 filepath: str 

87 Path to a relacs data directory, or a file in a relacs data directory. 

88 channel: int 

89 Channel (trace) number, if `filepath` does not specify a 

90 trace-*.raw file. 

91 

92 Returns 

93 ------- 

94 samplerate: float 

95 Sampling rate in Hertz 

96 unit: str 

97 Unit of the trace, can be empty if not found 

98 

99 Raises 

100 ------ 

101 IOError/FileNotFoundError: 

102 If the stimuli.dat file does not exist. 

103 ValueError: 

104 stimuli.dat file does not contain sampling rate. 

105 """ 

106 trace = channel + 1 

107 relacs_dir = filepath 

108 # check for relacs data directory: 

109 if not os.path.isdir(filepath): 

110 relacs_dir = os.path.dirname(filepath) 

111 bn = os.path.basename(filepath).lower() 

112 i = bn.find('.raw') 

113 if len(bn) > 5 and bn[0:5] == 'trace' and i > 6: 

114 trace = int(bn[6:i]) 

115 

116 # retreive sampling rate and unit from stimuli.dat file: 

117 samplerate = None 

118 sampleinterval = None 

119 unit = "" 

120 

121 lines = [] 

122 stimuli_file = os.path.join(relacs_dir, 'stimuli.dat') 

123 if os.path.isfile(stimuli_file + '.gz'): 

124 stimuli_file += '.gz' 

125 if stimuli_file[-3:] == '.gz': 

126 with gzip.open(stimuli_file, 'r', encoding='latin-1') as sf: 

127 for line in sf: 

128 line = line.strip() 

129 if len(line) == 0 or line[0] != '#': 

130 break 

131 lines.append(line) 

132 else: 

133 with open(stimuli_file, 'r', encoding='latin-1') as sf: 

134 for line in sf: 

135 line = line.strip() 

136 if len(line) == 0 or line[0] != '#': 

137 break 

138 lines.append(line) 

139 

140 for line in lines: 

141 if "unit%d" % trace in line: 

142 unit = line.split(':')[1].strip() 

143 if "sampling rate%d" % trace in line: 

144 value = line.split(':')[1].strip() 

145 samplerate = float(value.replace('Hz','')) 

146 elif "sample interval%d" % trace in line: 

147 value = line.split(':')[1].strip() 

148 sampleinterval = float(value.replace('ms','')) 

149 

150 if samplerate is not None: 

151 return samplerate, unit 

152 if sampleinterval is not None: 

153 return 1000/sampleinterval, unit 

154 raise ValueError(f'could not retrieve sampling rate from {stimuli_file}') 

155 

156 

157def relacs_header(filepath, store_empty=False, first_only=False, 

158 lower_keys=False, flat=False, 

159 add_sections=False): 

160 """Read key-value pairs from a relacs *.dat file header. 

161 

162 Parameters 

163 ---------- 

164 filepath: str 

165 A relacs *.dat file, can be also a zipped .gz file. 

166 store_empty: bool 

167 If `False` do not add meta data with empty values. 

168 first_only: bool 

169 If `False` only store the first element of a list. 

170 lower_keys: bool 

171 Make all keys lower case. 

172 flat: bool 

173 Do not make a nested dictionary. 

174 Use this option also to read in very old relacs metadata with 

175 ragged left alignment. 

176 add_sections: bool 

177 If `True`, prepend keys with sections names separated by 

178 '.' to make them unique. 

179 

180 Returns 

181 ------- 

182 data: dict 

183 Nested dictionary with key-value pairs of the file header. 

184  

185 Raises 

186 ------ 

187 IOError/FileNotFoundError: 

188 If `filepath` cannot be opened. 

189 """ 

190 # read in header from file: 

191 lines = [] 

192 if os.path.isfile(filepath + '.gz'): 

193 filepath += '.gz' 

194 if filepath[-3:] == '.gz': 

195 with gzip.open(filepath, 'r', encoding='latin-1') as sf: 

196 for line in sf: 

197 line = line.strip() 

198 if len(line) == 0 or line[0] != '#': 

199 break 

200 lines.append(line) 

201 else: 

202 with open(filepath, 'r', encoding='latin-1') as sf: 

203 for line in sf: 

204 line = line.strip() 

205 if len(line) == 0 or line[0] != '#': 

206 break 

207 lines.append(line) 

208 # parse: 

209 data = {} 

210 cdatas = [data] 

211 sections = [''] 

212 ident_offs = None 

213 ident = None 

214 for line in lines: 

215 words = line.split(':') 

216 value = ':'.join(words[1:]).strip() if len(words) > 1 else '' 

217 if len(words) >= 1: 

218 key = words[0].strip('#') 

219 # get section level: 

220 level = 0 

221 if not flat or len(value) == 0: 

222 nident = len(key) - len(key.lstrip()) 

223 if ident_offs is None: 

224 ident_offs = nident 

225 elif ident is None: 

226 if nident > ident_offs: 

227 ident = nident - ident_offs 

228 level = 1 

229 else: 

230 level = (nident - ident_offs)//ident 

231 # close sections: 

232 if not flat: 

233 while len(cdatas) > level + 1: 

234 cdatas[-1][sections.pop()] = cdatas.pop() 

235 else: 

236 while len(sections) > level + 1: 

237 sections.pop() 

238 # key: 

239 key = key.strip().strip('"') 

240 if lower_keys: 

241 key = key.lower() 

242 skey = key 

243 if add_sections: 

244 key = '.'.join(sections[1:] + [key]) 

245 if len(value) == 0: 

246 # new sub-section: 

247 if flat: 

248 if store_empty: 

249 cdatas[-1][key] = None 

250 else: 

251 cdatas.append({}) 

252 sections.append(skey) 

253 else: 

254 # key-value pair: 

255 value = value.strip('"') 

256 if len(value) > 0 or value != '-' or store_empty: 

257 if len(value) > 0 and value[0] == '[' and value[-1] == ']': 

258 value = [v.strip() for v in value.lstrip('[').rstrip(']').split(',')] 

259 if first_only: 

260 value = value[0] 

261 cdatas[-1][key] = value 

262 while len(cdatas) > 1: 

263 cdatas[-1][sections.pop()] = cdatas.pop() 

264 return data 

265 

266 

267def check_relacs(file_path): 

268 """Check for valid relacs file. 

269 

270 Parameters 

271 ---------- 

272 file_path: str 

273 Path to a relacs data directory, or a file in a relacs data directory. 

274 

275 Returns 

276 ------- 

277 is_relacs: boolean 

278 `True` if `file_path` is a valid relacs directory or is a file therein. 

279 """ 

280 # relacs data directory: 

281 relacs_dir = file_path 

282 if not os.path.isdir(file_path): 

283 relacs_dir = os.path.dirname(file_path) 

284 # check for a valid relacs data directory: 

285 has_stimuli = False 

286 has_trace = False 

287 for fname in ['stimuli.dat', 'stimuli.dat.gz']: 

288 if os.path.isfile(os.path.join(relacs_dir, fname)): 

289 has_stimuli = True 

290 for fname in ['trace-1.raw', 'trace-1.raw.gz']: 

291 if os.path.isfile(os.path.join(relacs_dir, fname)): 

292 has_trace = True 

293 return has_stimuli and has_trace 

294 

295 

296def relacs_trace_files(file_path): 

297 """Expand file path for relacs data to appropriate trace*.raw file names. 

298 

299 Parameters 

300 ---------- 

301 file_path: str 

302 Path to a relacs data directory, or a file in a relacs data directory. 

303  

304 Returns 

305 ------- 

306 trace_file_paths: list of str 

307 List of relacs trace*.raw files. 

308 """ 

309 relacs_dir = file_path 

310 if not os.path.isdir(file_path): 

311 relacs_dir = os.path.dirname(file_path) 

312 trace_file_paths = [] 

313 for k in range(10000): 

314 fname = os.path.join(relacs_dir, f'trace-{k+1}.raw') 

315 if os.path.isfile(fname): 

316 trace_file_paths.append(fname) 

317 elif os.path.isfile(fname + '.gz'): 

318 trace_file_paths.append(fname + '.gz') 

319 else: 

320 break 

321 return trace_file_paths 

322 

323 

324def load_relacs(file_path, amax=1.0): 

325 """Load traces that have been recorded with relacs (https://github.com/relacs/relacs). 

326 

327 Parameters 

328 ---------- 

329 file_path: str 

330 Path to a relacs data directory, or a file in a relacs data directory. 

331 amax: float 

332 The amplitude range of the data. 

333 

334 Returns 

335 ------- 

336 data: 2-D array 

337 All data traces as an 2-D numpy array, even for single channel data. 

338 First dimension is time, second is channel. 

339 rate: float 

340 Sampling rate of the data in Hz 

341 unit: str 

342 Unit of the data 

343 amax: float 

344 Maximum amplitude of data range. 

345 

346 Raises 

347 ------ 

348 ValueError: 

349 - Invalid name for relacs trace-*.raw file. 

350 - Sampling rates of traces differ. 

351 - Unit of traces differ. 

352 """ 

353 trace_file_paths = relacs_trace_files(file_path) 

354 # load trace*.raw files: 

355 nchannels = len(trace_file_paths) 

356 data = None 

357 nrows = 0 

358 rate = None 

359 unit = '' 

360 for c, path in enumerate(sorted(trace_file_paths)): 

361 if path[-3:] == '.gz': 

362 with gzip.open(path, 'rb') as sf: 

363 x = np.frombuffer(sf.read(), dtype=np.float32) 

364 else: 

365 x = np.fromfile(path, np.float32) 

366 if data is None: 

367 nrows = len(x) 

368 data = np.zeros((nrows, nchannels)) 

369 n = min(len(x), nrows) 

370 data[:n,c] = x[:n] 

371 # retrieve sampling rate and unit: 

372 crate, us = relacs_samplerate_unit(path, c) 

373 if rate is None: 

374 rate = crate 

375 elif crate != rate: 

376 raise ValueError('sampling rates of traces differ') 

377 if len(unit) == 0: 

378 unit = us 

379 elif us != unit: 

380 raise ValueError('unit of traces differ') 

381 return data, rate, unit, amax 

382 

383 

384def metadata_relacs(file_path, store_empty=False, first_only=False, 

385 lower_keys=False, flat=False, add_sections=False): 

386 """ Read meta-data of a relacs data set. 

387 

388 Parameters 

389 ---------- 

390 file_path: str 

391 A relacs data directory or a file therein. 

392 store_empty: bool 

393 If `False` do not add meta data with empty values. 

394 first_only: bool 

395 If `False` only store the first element of a list. 

396 lower_keys: bool 

397 Make all keys lower case. 

398 flat: bool 

399 Do not make a nested dictionary. 

400 Use this option also to read in very old relacs metadata with 

401 ragged left alignment. 

402 add_sections: bool 

403 If `True`, prepend keys with sections names separated by 

404 '.' to make them unique. 

405 

406 Returns 

407 ------- 

408 data: nested dict 

409 Nested dictionary with key-value pairs of the meta data. 

410 """ 

411 relacs_dir = file_path 

412 if not os.path.isdir(file_path): 

413 relacs_dir = os.path.dirname(file_path) 

414 info_path = os.path.join(relacs_dir, 'info.dat') 

415 if not os.path.exists(info_path): 

416 return dict(), [] 

417 data = relacs_header(info_path, store_empty, first_only, 

418 lower_keys, flat, add_sections) 

419 return data 

420 

421 

422def fishgrid_spacings(metadata, unit='m'): 

423 """Spacing between grid electrodes. 

424 

425 Parameters 

426 ---------- 

427 metadata: dict 

428 Fishgrid metadata obtained from `metadata_fishgrid()`. 

429 unit: str 

430 Unit in which to return the spacings. 

431 

432 Returns 

433 ------- 

434 grid_dist: list of tuple of float 

435 For each grid the distances between rows and columns in `unit`. 

436 """ 

437 grids_dist = [] 

438 for k in range(4): 

439 row_dist = get_number(metadata, unit, f'RowDistance{k+1}', default=0) 

440 col_dist = get_number(metadata, unit, f'ColumnDistance{k+1}', default=0) 

441 rows = get_int(metadata, f'Rows{k+1}', default=0) 

442 cols = get_int(metadata, f'Columns{k+1}', default=0) 

443 if get_bool(metadata, f'Used{k+1}', default=False) or \ 

444 cols > 0 and rows > 0: 

445 grids_dist.append((row_dist, col_dist)) 

446 return grids_dist 

447 

448 

449def fishgrid_grids(metadata): 

450 """Retrieve grid sizes from a fishgrid.cfg file. 

451 

452 Parameters 

453 ---------- 

454 metadata: dict 

455 Fishgrid metadata obtained from `metadata_fishgrid()`. 

456 

457 Returns 

458 ------- 

459 grids: list of tuple of int 

460 For each grid the number of rows and columns. 

461 """ 

462 grids = [] 

463 for k in range(4): 

464 rows = get_int(metadata, f'Rows{k+1}', default=0) 

465 cols = get_int(metadata, f'Columns{k+1}', default=0) 

466 if get_bool(metadata, f'Used{k+1}', default=False) or \ 

467 cols > 0 and rows > 0: 

468 grids.append((rows, cols)) 

469 return grids 

470 

471 

472def check_fishgrid(file_path): 

473 """Check for valid fishgrid file (https://github.com/bendalab/fishgrid). 

474 

475 Parameters 

476 ---------- 

477 file_path: str 

478 Path to a fishgrid data directory or a file in a fishgrid 

479 data directory. 

480 

481 Returns 

482 ------- 

483 is_fishgrid: bool 

484 `True` if `file_path` is a valid fishgrid data directory or 

485 a file therein. 

486 """ 

487 # fishgrid data directory: 

488 fishgrid_dir = file_path 

489 if not os.path.isdir(file_path): 

490 fishgrid_dir = os.path.dirname(file_path) 

491 # check for a valid fishgrid data directory: 

492 return (os.path.isfile(os.path.join(fishgrid_dir, 'fishgrid.cfg')) and 

493 (os.path.isfile(os.path.join(fishgrid_dir, 'traces-grid1.raw')) or 

494 os.path.isfile(os.path.join(fishgrid_dir, 'traces.raw')))) 

495 

496 

497def fishgrid_trace_files(file_path): 

498 """Expand file paths for fishgrid data to appropriate traces*.raw file names. 

499 

500 Parameters 

501 ---------- 

502 file_path: str 

503 Path to a fishgrid data directory, or a file therein. 

504  

505 Returns 

506 ------- 

507 trace_file_paths: list of str 

508 List of fishgrid traces*.raw files. 

509 """ 

510 # find grids: 

511 fishgrid_dir = file_path 

512 if not os.path.isdir(fishgrid_dir): 

513 fishgrid_dir = os.path.dirname(file_path) 

514 trace_file_paths = [] 

515 for k in range(10000): 

516 file = os.path.join(fishgrid_dir, f'traces-grid{k+1}.raw') 

517 if os.path.isfile(file): 

518 trace_file_paths.append(file) 

519 else: 

520 break 

521 if len(trace_file_paths) == 0: 

522 file = os.path.join(fishgrid_dir, f'traces.raw') 

523 if os.path.isfile(file): 

524 trace_file_paths.append(file) 

525 return trace_file_paths 

526 

527 

528def load_fishgrid(file_path): 

529 """Load traces that have been recorded with fishgrid (https://github.com/bendalab/fishgrid). 

530 

531 Parameters 

532 ---------- 

533 file_path: str 

534 Path to a fishgrid data directory, or a file therein. 

535 

536 Returns 

537 ------- 

538 data: 2-D array 

539 All data traces as an 2-D numpy array, even for single channel data. 

540 First dimension is time, second is channel. 

541 rate: float 

542 Sampling rate of the data in Hz. 

543 unit: str 

544 Unit of the data. 

545 amax: float 

546 Maximum amplitude of data range. 

547 

548 Raises 

549 ------ 

550 FileNotFoundError: 

551 Invalid or not existing fishgrid files. 

552 """ 

553 trace_file_paths = fishgrid_trace_files(file_path) 

554 if len(trace_file_paths) == 0: 

555 raise FileNotFoundError(f'no fishgrid files specified') 

556 md = metadata_fishgrid(file_path) 

557 grids = fishgrid_grids(md) 

558 grid_sizes = [r*c for r,c in grids] 

559 

560 # load traces-grid*.raw files: 

561 grid_channels = [] 

562 nchannels = 0 

563 for g, path in enumerate(trace_file_paths): 

564 grid_channels.append(grid_sizes[g]) 

565 nchannels += grid_sizes[g] 

566 data = None 

567 nrows = 0 

568 c = 0 

569 rate = get_number(md, 'Hz', 'AISampleRate') 

570 for path, channels in zip(trace_file_paths, grid_channels): 

571 x = np.fromfile(path, np.float32).reshape((-1, channels)) 

572 if data is None: 

573 nrows = len(x) 

574 data = np.zeros((nrows, nchannels)) 

575 n = min(len(x), nrows) 

576 data[:n,c:c+channels] = x[:n,:] 

577 c += channels 

578 amax, unit = get_number_unit(md, 'AIMaxVolt') 

579 return data, rate, unit, amax 

580 

581 

582# add fishgrid keys: 

583default_starttime_keys.append(['StartDate', 'StartTime']) 

584default_gain_keys.insert(0, 'AIMaxVolt') 

585 

586 

587def metadata_fishgrid(file_path): 

588 """ Read meta-data of a fishgrid data set. 

589 

590 Parameters 

591 ---------- 

592 file_path: str 

593 A fishgrid data directory or a file therein. 

594 

595 Returns 

596 ------- 

597 data: nested dict 

598 Nested dictionary with key-value pairs of the meta data. 

599 """ 

600 fishgrid_dir = file_path 

601 if not os.path.isdir(fishgrid_dir): 

602 fishgrid_dir = os.path.dirname(file_path) 

603 path = os.path.join(fishgrid_dir, 'fishgrid.cfg') 

604 # read in header from file: 

605 lines = [] 

606 if os.path.isfile(path + '.gz'): 

607 path += '.gz' 

608 if not os.path.exists(path): 

609 return {} 

610 if path[-3:] == '.gz': 

611 with gzip.open(path, 'r', encoding='latin-1') as sf: 

612 for line in sf: 

613 lines.append(line) 

614 else: 

615 with open(path, 'r', encoding='latin-1') as sf: 

616 for line in sf: 

617 lines.append(line) 

618 # parse: 

619 data = {} 

620 cdatas = [data] 

621 ident_offs = None 

622 ident = None 

623 old_style = False 

624 grid_n = False 

625 for line in lines: 

626 if len(line.strip()) == 0: 

627 continue 

628 if line[0] == '*': 

629 key = line[1:].strip() 

630 data[key] = {} 

631 cdatas = [data, data[key]] 

632 elif '----' in line: 

633 old_style = True 

634 key = line.strip().strip(' -').replace('&', '') 

635 if key.upper() == 'SETUP': 

636 key = 'Grid 1' 

637 grid_n = False 

638 if key[:4].lower() == 'grid': 

639 grid_n = key[5] 

640 cdatas = cdatas[:2] 

641 cdatas[1][key] = {} 

642 cdatas.append(cdatas[1][key]) 

643 else: 

644 words = line.split(':') 

645 key = words[0].strip().strip('"') 

646 value = None 

647 if len(words) > 1 and (len(words[1].strip()) > 0 or old_style): 

648 value = ':'.join(words[1:]).strip().strip('"') 

649 if old_style: 

650 if value is None: 

651 cdatas = cdatas[:3] 

652 cdatas[2][key] = {} 

653 cdatas.append(cdatas[2][key]) 

654 else: 

655 if grid_n and key[-1] != grid_n: 

656 key = key + grid_n 

657 cdatas[-1][key] = value 

658 else: 

659 # get section level: 

660 level = 0 

661 nident = len(line) - len(line.lstrip()) 

662 if ident_offs is None: 

663 ident_offs = nident 

664 elif ident is None: 

665 if nident > ident_offs: 

666 ident = nident - ident_offs 

667 level = 1 

668 else: 

669 level = (nident - ident_offs)//ident 

670 # close sections: 

671 cdatas = cdatas[:2 + level] 

672 if value is None: 

673 # new section: 

674 cdatas[-1][key] = {} 

675 cdatas.append(cdatas[-1][key]) 

676 else: 

677 # key-value pair: 

678 cdatas[-1][key] = value.replace(r'\n', '\n') 

679 # remove unused grids: 

680 fgm = data.get('FishGrid', {}) 

681 for i in range(4): 

682 gs = f'Grid {i+1}' 

683 if gs in fgm: 

684 gm = fgm[gs] 

685 us = f'Used{i+1}' 

686 if us in gm and gm[us].upper() == 'FALSE': 

687 del fgm[gs] 

688 return data 

689 

690 

691def markers_fishgrid(file_path): 

692 """ Read markers of a fishgrid data set. 

693 

694 Parameters 

695 ---------- 

696 file_path: str 

697 A fishgrid data directory or a file therein. 

698 

699 Returns 

700 ------- 

701 locs: 2-D array of ints 

702 Marker positions (first column) and spans (second column) 

703 for each marker (rows). 

704 labels: 2-D array of string objects 

705 Labels (first column) and texts (second column) 

706 for each marker (rows). 

707 """ 

708 def add_marker(): 

709 if 'index1' in marker: 

710 index1 = int(marker['index1'])//nchannels 

711 else: 

712 index1 = int(marker['index'])//nchannels 

713 span1 = int(marker.get('span1', 0))//nchannels 

714 locs.append([index1, span1]) 

715 ls = marker.get('label', 'M') 

716 cs = marker.get('comment', '') 

717 labels.append([ls, cs]) 

718 

719 fishgrid_dir = file_path 

720 if not os.path.isdir(fishgrid_dir): 

721 fishgrid_dir = os.path.dirname(file_path) 

722 path = os.path.join(fishgrid_dir, 'timestamps.dat') 

723 if not os.path.isfile(path): 

724 return np.zeros((0, 2), dtype=int), np.zeros((0, 2), dtype=object) 

725 # get number of channels: 

726 md = metadata_fishgrid(path.replace('timestamps.dat', 'fishgrid.cfg')) 

727 grids = fishgrid_grids(md) 

728 nchannels = np.prod(grids[0]) 

729 # read timestamps: 

730 locs = [] 

731 labels = [] 

732 marker = {} 

733 with open(path, 'r') as sf: 

734 for line in sf: 

735 if len(line.strip()) == 0: 

736 add_marker() 

737 marker = {} 

738 else: 

739 words = line.split(':') 

740 if len(words) > 1: 

741 v = words[1].strip() 

742 v = v.strip('"') 

743 marker[words[0].strip().lower()] = v 

744 if len(marker) > 0: 

745 add_marker() 

746 if len(locs) > 2: 

747 return np.array(locs[1:-1]), np.array(labels[1:-1]) 

748 else: 

749 return np.zeros((0, 2), dtype=int), np.zeros((0, 2), dtype=object) 

750 

751 

752def check_container(filepath): 

753 """Check if file is a generic container file. 

754 

755 Supported file formats are: 

756 

757 - python pickle files (.pkl) 

758 - numpy files (.npz) 

759 - matlab files (.mat) 

760 

761 Parameters 

762 ---------- 

763 filepath: str 

764 Path of the file to check. 

765  

766 Returns 

767 ------- 

768 is_container: bool 

769 `True`, if `filepath` is a supported container format. 

770 """ 

771 ext = os.path.splitext(filepath)[1] 

772 return ext.lower() in ('.pkl', '.npz', '.mat') 

773 

774 

775def extract_container_data(data_dict, datakey=None, 

776 samplekey=['rate', 'Fs', 'fs'], 

777 timekey=['time'], amplkey=['amax'], unitkey='unit', 

778 amax=1.0, unit='a.u.'): 

779 """Extract data from dictionary loaded from a container file. 

780 

781 Parameters 

782 ---------- 

783 data_dict: dict 

784 Dictionary of the data items contained in the container. 

785 datakey: None, str, or list of str 

786 Name of the variable holding the data. If `None` take the 

787 variable that is an 2D array and has the largest number of 

788 elements. 

789 samplekey: str or list of str 

790 Name of the variable holding the sampling rate. 

791 timekey: str or list of str 

792 Name of the variable holding sampling times. 

793 If no sampling rate is available, the sampling rate is retrieved 

794 from the sampling times. 

795 amplkey: str or list of str 

796 Name of the variable holding the amplitude range of the data. 

797 unitkey: str 

798 Name of the variable holding the unit of the data. 

799 amax: None or float 

800 If specified and no amplitude range has been found in `data_dict`, 

801 then this is the amplitude range of the data. 

802 unit: None or str 

803 If specified and no unit has been found in `data_dict`, 

804 then return this as the unit of the data. 

805 

806 Returns 

807 ------- 

808 data: 2-D array of floats 

809 All data traces as an 2-D numpy array, even for single channel data. 

810 First dimension is time, second is channel. 

811 rate: float 

812 Sampling rate of the data in Hz. 

813 unit: str 

814 Unit of the data. 

815 amax: float 

816 Maximum amplitude of data range in `unit`. 

817 

818 Raises 

819 ------ 

820 ValueError: 

821 Invalid key requested. 

822 """ 

823 # extract format data: 

824 if not isinstance(samplekey, (list, tuple, np.ndarray)): 

825 samplekey = (samplekey,) 

826 if not isinstance(timekey, (list, tuple, np.ndarray)): 

827 timekey = (timekey,) 

828 if not isinstance(amplkey, (list, tuple, np.ndarray)): 

829 amplkey = (amplkey,) 

830 rate = 0.0 

831 for skey in samplekey: 

832 if skey in data_dict: 

833 rate = float(data_dict[skey]) 

834 break 

835 if rate == 0.0: 

836 for tkey in timekey: 

837 if tkey in data_dict: 

838 rate = 1.0/(data_dict[tkey][1] - data_dict[tkey][0]) 

839 break 

840 if rate == 0.0: 

841 raise ValueError(f"invalid keys {', '.join(samplekey)} and {', '.join(timekey)} for requesting sampling rate or sampling times") 

842 for akey in amplkey: 

843 if akey in data_dict: 

844 amax = float(data_dict[akey]) 

845 break 

846 if unitkey in data_dict: 

847 unit = data_dict[unitkey] 

848 # get data array: 

849 raw_data = np.array([]) 

850 if datakey: 

851 # try data keys: 

852 if not isinstance(datakey, (list, tuple, np.ndarray)): 

853 datakey = (datakey,) 

854 for dkey in datakey: 

855 if dkey in data_dict: 

856 raw_data = data_dict[dkey] 

857 break 

858 if len(raw_data) == 0: 

859 raise ValueError(f"invalid key(s) {', '.join(datakey)} for requesting data") 

860 else: 

861 # find largest 2D array: 

862 for d in data_dict: 

863 if hasattr(data_dict[d], 'shape'): 

864 if 1 <= len(data_dict[d].shape) <= 2 and \ 

865 np.max(data_dict[d].shape) > np.max(raw_data.shape): 

866 raw_data = data_dict[d] 

867 if len(raw_data) == 0: 

868 raise ValueError('no data found') 

869 # make 2D: 

870 if len(raw_data.shape) == 1: 

871 raw_data = raw_data.reshape(-1, 1) 

872 # transpose if necessary: 

873 if np.argmax(raw_data.shape) > 0: 

874 raw_data = raw_data.T 

875 # recode: 

876 if raw_data.dtype == np.dtype('int16'): 

877 data = raw_data.astype('float32') 

878 data *= amax/2**15 

879 elif raw_data.dtype == np.dtype('int32'): 

880 data = raw_data.astype(float) 

881 data *= amax/2**31 

882 elif raw_data.dtype == np.dtype('int64'): 

883 data = raw_data.astype(float) 

884 data *= amax/2**63 

885 else: 

886 data = raw_data 

887 return data, rate, unit, amax 

888 

889 

890def load_container(file_path, datakey=None, 

891 samplekey=['rate', 'Fs', 'fs'], 

892 timekey=['time'], amplkey=['amax'], unitkey='unit', 

893 amax=1.0, unit='a.u.'): 

894 """Load data from a generic container file. 

895 

896 Supported file formats are: 

897 

898 - python pickle files (.pkl) 

899 - numpy files (.npz) 

900 - matlab files (.mat) 

901 

902 Parameters 

903 ---------- 

904 file_path: str 

905 Path of the file to load. 

906 datakey: None, str, or list of str 

907 Name of the variable holding the data. If `None` take the 

908 variable that is an 2D array and has the largest number of 

909 elements. 

910 samplekey: str or list of str 

911 Name of the variable holding the sampling rate. 

912 timekey: str or list of str 

913 Name of the variable holding sampling times. 

914 If no sampling rate is available, the sampling rate is retrieved 

915 from the sampling times. 

916 amplkey: str 

917 Name of the variable holding the amplitude range of the data. 

918 unitkey: str 

919 Name of the variable holding the unit of the data. 

920 If `unitkey` is not a valid key, then return `unitkey` as the `unit`. 

921 amax: None or float 

922 If specified and no amplitude range has been found in the data 

923 container, then this is the amplitude range of the data. 

924 unit: None or str 

925 If specified and no unit has been found in the data container, 

926 then return this as the unit of the data. 

927 

928 Returns 

929 ------- 

930 data: 2-D array of floats 

931 All data traces as an 2-D numpy array, even for single channel data. 

932 First dimension is time, second is channel. 

933 rate: float 

934 Sampling rate of the data in Hz. 

935 unit: str 

936 Unit of the data. 

937 amax: float 

938 Maximum amplitude of data range. 

939 

940 Raises 

941 ------ 

942 ValueError: 

943 Invalid key requested. 

944 """ 

945 # load data: 

946 data_dict = {} 

947 ext = os.path.splitext(file_path)[1] 

948 if ext == '.pkl': 

949 import pickle 

950 with open(file_path, 'rb') as f: 

951 data_dict = pickle.load(f) 

952 elif ext == '.npz': 

953 data_dict = np.load(file_path) 

954 elif ext == '.mat': 

955 from scipy.io import loadmat 

956 data_dict = loadmat(file_path, squeeze_me=True) 

957 return extract_container_data(data_dict, datakey, samplekey, 

958 timekey, amplkey, unitkey, amax, unit) 

959 

960 

961def extract_container_metadata(data_dict, metadatakey=['metadata', 'info']): 

962 """ Extract metadata from dictionary loaded from a container file. 

963 

964 Parameters 

965 ---------- 

966 data_dict: dict 

967 Dictionary of the data items contained in the container. 

968 metadatakey: str or list of str 

969 Name of the variable holding the metadata. 

970 

971 Returns 

972 ------- 

973 metadata: nested dict 

974 Nested dictionary with key-value pairs of the meta data. 

975 """ 

976 if not isinstance(metadatakey, (list, tuple, np.ndarray)): 

977 metadatakey = (metadatakey,) 

978 # get single metadata dictionary: 

979 for mkey in metadatakey: 

980 if mkey in data_dict: 

981 return data_dict[mkey] 

982 # collect all keys starting with metadatakey: 

983 metadata = {} 

984 for mkey in metadatakey: 

985 mkey += '__' 

986 for dkey in data_dict: 

987 if dkey[:len(mkey)] == mkey: 

988 v = data_dict[dkey] 

989 if hasattr(v, 'size') and v.ndim == 0: 

990 v = v.item() 

991 metadata[dkey[len(mkey):]] = v 

992 if len(metadata) > 0: 

993 return unflatten_metadata(metadata, sep='__') 

994 return metadata 

995 

996 

997def metadata_container(file_path, metadatakey=['metadata', 'info']): 

998 """ Read meta-data of a container file. 

999 

1000 Parameters 

1001 ---------- 

1002 file_path: str 

1003 A container file. 

1004 metadatakey: str or list of str 

1005 Name of the variable holding the metadata. 

1006 

1007 Returns 

1008 ------- 

1009 metadata: nested dict 

1010 Nested dictionary with key-value pairs of the meta data. 

1011 """ 

1012 data_dict = {} 

1013 ext = os.path.splitext(file_path)[1] 

1014 if ext == '.pkl': 

1015 import pickle 

1016 with open(file_path, 'rb') as f: 

1017 data_dict = pickle.load(f) 

1018 elif ext == '.npz': 

1019 data_dict = np.load(file_path) 

1020 elif ext == '.mat': 

1021 from scipy.io import loadmat 

1022 data_dict = loadmat(file_path, squeeze_me=True) 

1023 return extract_container_metadata(data_dict, metadatakey) 

1024 

1025 

1026def extract_container_markers(data_dict, poskey=['positions'], 

1027 spanskey=['spans'], labelskey=['labels'], 

1028 descrkey=['descriptions']): 

1029 """ Extract markers from dictionary loaded from a container file. 

1030 

1031 Parameters 

1032 ---------- 

1033 data_dict: dict 

1034 Dictionary of the data items contained in the container. 

1035 poskey: str or list of str 

1036 Name of the variable holding positions of markers. 

1037 spanskey: str or list of str 

1038 Name of the variable holding spans of markers. 

1039 labelskey: str or list of str 

1040 Name of the variable holding labels of markers. 

1041 descrkey: str or list of str 

1042 Name of the variable holding descriptions of markers. 

1043 

1044 Returns 

1045 ------- 

1046 locs: 2-D array of ints 

1047 Marker positions (first column) and spans (second column) 

1048 for each marker (rows). 

1049 labels: 2-D array of string objects 

1050 Labels (first column) and texts (second column) 

1051 for each marker (rows). 

1052 """ 

1053 if not isinstance(poskey, (list, tuple, np.ndarray)): 

1054 poskey = (poskey,) 

1055 if not isinstance(spanskey, (list, tuple, np.ndarray)): 

1056 spanskey = (spanskey,) 

1057 if not isinstance(labelskey, (list, tuple, np.ndarray)): 

1058 labelskey = (labelskey,) 

1059 if not isinstance(descrkey, (list, tuple, np.ndarray)): 

1060 descrkey = (descrkey,) 

1061 locs = np.zeros((0, 2), dtype=int) 

1062 for pkey in poskey: 

1063 if pkey in data_dict: 

1064 locs = np.zeros((len(data_dict[pkey]), 2), dtype=int) 

1065 locs[:,0] = data_dict[pkey] 

1066 break 

1067 for skey in spanskey: 

1068 if skey in data_dict: 

1069 locs[:,1] = data_dict[skey] 

1070 break 

1071 labels = np.zeros((0, 2), dtype=object) 

1072 for lkey in labelskey: 

1073 if lkey in data_dict: 

1074 labels = np.zeros((len(data_dict[lkey]), 2), dtype=object) 

1075 labels[:,0] = data_dict[lkey] 

1076 break 

1077 for dkey in descrkey: 

1078 if dkey in data_dict: 

1079 labels[:,1] = data_dict[dkey] 

1080 break 

1081 return locs, labels 

1082 

1083 

1084def markers_container(file_path, poskey=['positions'], 

1085 spanskey=['spans'], labelskey=['labels'], 

1086 descrkey=['descriptions']): 

1087 """ Read markers of a container file. 

1088 

1089 Parameters 

1090 ---------- 

1091 file_path: str 

1092 A container file. 

1093 poskey: str or list of str 

1094 Name of the variable holding positions of markers. 

1095 spanskey: str or list of str 

1096 Name of the variable holding spans of markers. 

1097 labelskey: str or list of str 

1098 Name of the variable holding labels of markers. 

1099 descrkey: str or list of str 

1100 Name of the variable holding descriptions of markers. 

1101 

1102 Returns 

1103 ------- 

1104 locs: 2-D array of ints 

1105 Marker positions (first column) and spans (second column) 

1106 for each marker (rows). 

1107 labels: 2-D array of string objects 

1108 Labels (first column) and texts (second column) 

1109 for each marker (rows). 

1110 """ 

1111 data_dict = {} 

1112 ext = os.path.splitext(file_path)[1] 

1113 if ext == '.pkl': 

1114 import pickle 

1115 with open(file_path, 'rb') as f: 

1116 data_dict = pickle.load(f) 

1117 elif ext == '.npz': 

1118 data_dict = np.load(file_path) 

1119 elif ext == '.mat': 

1120 from scipy.io import loadmat 

1121 data_dict = loadmat(file_path, squeeze_me=True) 

1122 return extract_container_markers(data_dict, poskey, spanskey, 

1123 labelskey, descrkey) 

1124 

1125 

1126def check_raw(filepath): 

1127 """Check if file is a raw file. 

1128 

1129 The following extensions are interpreted as raw files: 

1130 

1131 - raw files (*.raw) 

1132 - LabView scandata (*.scandat) 

1133 

1134 Parameters 

1135 ---------- 

1136 filepath: str 

1137 Path of the file to check. 

1138  

1139 Returns 

1140 ------- 

1141 is_raw: bool 

1142 `True`, if `filepath` is a raw format. 

1143 """ 

1144 ext = os.path.splitext(filepath)[1] 

1145 return ext.lower() in ('.raw', '.scandat', '.mat') 

1146 

1147 

1148def load_raw(file_path, rate=44000, channels=1, dtype=np.float32, 

1149 amax=1.0, unit='a.u.'): 

1150 """Load data from a raw file. 

1151 

1152 Raw files just contain the data and absolutely no metadata, not 

1153 even the smapling rate, number of channels, etc. 

1154 Supported file formats are: 

1155 

1156 - raw files (*.raw) 

1157 - LabView scandata (*.scandat) 

1158 

1159 Parameters 

1160 ---------- 

1161 file_path: str 

1162 Path of the file to load. 

1163 rate: float 

1164 Sampling rate of the data in Hertz. 

1165 channels: int 

1166 Number of channels multiplexed in the data. 

1167 dtype: str or numpy.dtype 

1168 The data type stored in the file. 

1169 amax: float 

1170 The amplitude range of the data. 

1171 unit: str 

1172 The unit of the data. 

1173 

1174 Returns 

1175 ------- 

1176 data: 2-D array of floats 

1177 All data traces as an 2-D numpy array, even for single channel data. 

1178 First dimension is time, second is channel. 

1179 rate: float 

1180 Sampling rate of the data in Hz. 

1181 unit: str 

1182 Unit of the data. 

1183 amax: float 

1184 Maximum amplitude of data range. 

1185 

1186 """ 

1187 raw_data = np.fromfile(file_path, dtype=dtype).reshape(-1, channels) 

1188 # recode: 

1189 if dtype == np.dtype('int16'): 

1190 data = raw_data.astype('float32') 

1191 data *= amax/2**15 

1192 elif dtype == np.dtype('int32'): 

1193 data = raw_data.astype(float) 

1194 data *= amax/2**31 

1195 elif dtype == np.dtype('int64'): 

1196 data = raw_data.astype(float) 

1197 data *= amax/2**63 

1198 else: 

1199 data = raw_data 

1200 return data, rate, unit, amax 

1201 

1202 

1203def load_audioio(file_path, verbose=0, gainkey=default_gain_keys, sep='.', 

1204 amax=1.0, unit='a.u.'): 

1205 """Load data from an audio file. 

1206 

1207 See the 

1208 [`load_audio()`](https://bendalab.github.io/audioio/api/audioloader.html#audioio.audioloader.load_audio) 

1209 function of the [`audioio`](https://github.com/bendalab/audioio) 

1210 package for more infos. 

1211 

1212 Parameters 

1213 ---------- 

1214 file_path: str 

1215 Path of the file to load. 

1216 verbose: int 

1217 If > 0 show detailed error/warning messages. 

1218 gainkey: str or list of str 

1219 Key in the file's metadata that holds some gain information. 

1220 If found, the data will be multiplied with the gain, 

1221 and if available, the corresponding unit is returned. 

1222 See the [audioio.get_gain()](https://bendalab.github.io/audioio/api/audiometadata.html#audioio.audiometadata.get_gain) function for details. 

1223 sep: str 

1224 String that separates section names in `gainkey`. 

1225 amax: float 

1226 If specified and no gain has been found in the metadata, 

1227 then use this as the amplitude range. 

1228 unit: str 

1229 If specified and no gain has been found in the metadata, 

1230 then return this as the unit of the data. 

1231 

1232 Returns 

1233 ------- 

1234 data: 2-D array of floats 

1235 All data traces as an 2-D numpy array, even for single channel data. 

1236 First dimension is time, second is channel. 

1237 rate: float 

1238 Sampling rate of the data in Hz. 

1239 unit: str 

1240 Unit of the data if found in the metadata (see `gainkey`), 

1241 otherwise `unit`. 

1242 amax: float 

1243 Maximum amplitude of data range. 

1244 """ 

1245 # get gain: 

1246 md = metadata_audioio(file_path) 

1247 amax, unit = get_gain(md, gainkey, sep, amax, unit) 

1248 # load data: 

1249 data, rate = load_audio(file_path, verbose) 

1250 if amax != 1.0: 

1251 data *= amax 

1252 return data, rate, unit, amax 

1253 

1254 

1255data_loader_funcs = ( 

1256 ('relacs', check_relacs, load_relacs, metadata_relacs, None), 

1257 ('fishgrid', check_fishgrid, load_fishgrid, metadata_fishgrid, markers_fishgrid), 

1258 ('container', check_container, load_container, metadata_container, markers_container), 

1259 ('raw', check_raw, load_raw, None, None), 

1260 ('audioio', None, load_audioio, metadata_audioio, markers_audioio), 

1261 ) 

1262"""List of implemented load functions. 

1263 

1264Each element of the list is a tuple with the data format's name, its 

1265check and its load function. 

1266 

1267""" 

1268 

1269 

1270def load_data(file_path, verbose=0, **kwargs): 

1271 """Load time-series data from a file. 

1272 

1273 Parameters 

1274 ---------- 

1275 file_path: str 

1276 Path and name of the file to load. 

1277 verbose: int 

1278 If > 0 show detailed error/warning messages. 

1279 **kwargs: dict 

1280 Further keyword arguments that are passed on to the  

1281 format specific loading functions. 

1282 For example: 

1283 - `amax`: the amplitude range of the data. 

1284 - 'unit': the unit of the data. 

1285 

1286 Returns 

1287 ------- 

1288 data: 2-D array 

1289 All data traces as an 2-D numpy array, even for single channel data. 

1290 First dimension is time, second is channel. 

1291 rate: float 

1292 Sampling rate of the data in Hz. 

1293 unit: str 

1294 Unit of the data. 

1295 amax: float 

1296 Maximum amplitude of data range. 

1297 

1298 Raises 

1299 ------ 

1300 ValueError: 

1301 `file_path` is empty string. 

1302 """ 

1303 if len(file_path) == 0: 

1304 raise ValueError('input argument file_path is empty string.') 

1305 # load data: 

1306 for name, check_file, load_file, _, _ in data_loader_funcs: 

1307 if check_file is None or check_file(file_path): 

1308 data, rate, unit, amax = load_file(file_path, **kwargs) 

1309 if verbose > 0: 

1310 print(f'loaded {name} data from file "{file_path}"') 

1311 if verbose > 1: 

1312 print(f' sampling rate: {rate:g} Hz') 

1313 print(f' channels : {data.shape[1]}') 

1314 print(f' frames : {len(data)}') 

1315 print(f' range : {amax:g}{unit}') 

1316 return data, rate, unit, amax 

1317 return np.zeros((0, 1)), 0.0, '', 1.0 

1318 

1319 

1320def metadata(file_path, **kwargs): 

1321 """ Read meta-data from a data file. 

1322 

1323 Parameters 

1324 ---------- 

1325 file_path: str 

1326 The full path and name of the file to load. For some file 

1327 formats several files can be provided in a list. 

1328 **kwargs: dict 

1329 Further keyword arguments that are passed on to the  

1330 format specific loading functions. 

1331 

1332 Returns 

1333 ------- 

1334 meta_data: nested dict 

1335 Meta data contained in the file. Keys of the nested 

1336 dictionaries are always strings. If the corresponding 

1337 values are dictionaries, then the key is the section name 

1338 of the metadata contained in the dictionary. All other 

1339 types of values are values for the respective key. In 

1340 particular they are strings, or list of strings. But other 

1341 simple types like ints or floats are also allowed. 

1342 

1343 Raises 

1344 ------ 

1345 ValueError: 

1346 `file_path` is empty string. 

1347 """ 

1348 if len(file_path) == 0: 

1349 raise ValueError('input argument file_path is empty string.') 

1350 # load metadata: 

1351 for _, check_file, _, metadata_file, _ in data_loader_funcs: 

1352 if check_file is None or check_file(file_path): 

1353 if metadata_file is not None: 

1354 return metadata_file(file_path, **kwargs) 

1355 return {} 

1356 

1357 

1358def markers(file_path): 

1359 """ Read markers of a data file. 

1360 

1361 Parameters 

1362 ---------- 

1363 file_path: str or file handle 

1364 The data file. 

1365 

1366 Returns 

1367 ------- 

1368 locs: 2-D array of ints 

1369 Marker positions (first column) and spans (second column) 

1370 for each marker (rows). 

1371 labels: 2-D array of string objects 

1372 Labels (first column) and texts (second column) 

1373 for each marker (rows). 

1374 

1375 Raises 

1376 ------ 

1377 ValueError: 

1378 `file_path` is empty string. 

1379 """ 

1380 if len(file_path) == 0: 

1381 raise ValueError('input argument file_path is empty string.') 

1382 # load markers: 

1383 for _, check_file, _, _, markers_file in data_loader_funcs: 

1384 if check_file is None or check_file(file_path): 

1385 if markers_file is not None: 

1386 return markers_file(file_path) 

1387 return np.zeros((0, 2), dtype=int), np.zeros((0, 2), dtype=object) 

1388 

1389 

1390class DataLoader(AudioLoader): 

1391 """Buffered reading of time-series data for random access of the data in the file. 

1392  

1393 This allows for reading very large data files that do not fit into 

1394 memory. A `DataLoader` instance can be used like a huge 

1395 read-only numpy array, i.e. 

1396 ``` 

1397 data = DataLoader('path/to/data/file.dat') 

1398 x = data[10000:20000,0] 

1399 ``` 

1400 The first index specifies the frame, the second one the channel. 

1401 

1402 `DataLoader` first determines the format of the data file and then 

1403 opens the file (first line). It then reads data from the file as 

1404 necessary for the requested data (second line). 

1405 

1406 Supported file formats are 

1407 

1408 - audio files via `audioio` package 

1409 - python pickle files 

1410 - numpy .npz files 

1411 - matlab .mat files 

1412 - relacs trace*.raw files (www.relacs.net) 

1413 - fishgrid traces-*.raw files 

1414 

1415 Reading sequentially through the file is always possible. If 

1416 previous data are requested, then the file is read from the 

1417 beginning. This might slow down access to previous data 

1418 considerably. Use the `backsize` argument to the open functions to 

1419 make sure some data are loaded before the requested frame. Then a 

1420 subsequent access to the data within `backsize` seconds before that 

1421 frame can still be handled without the need to reread the file 

1422 from the beginning. 

1423 

1424 Usage: 

1425 ------ 

1426 ``` 

1427 import thunderlab.dataloader as dl 

1428 with dl.DataLoader(file_path, 60.0, 10.0) as data: 

1429 # do something with the content of the file: 

1430 x = data[0:10000,0] 

1431 y = data[10000:20000,0] 

1432 z = x + y 

1433 ``` 

1434  

1435 Normal open and close: 

1436 ``` 

1437 data = dl.DataLoader(file_path, 60.0) 

1438 x = data[:,:] # read the whole file 

1439 data.close() 

1440 ```  

1441 that is the same as: 

1442 ``` 

1443 data = dl.DataLoader() 

1444 data.open(file_path, 60.0) 

1445 ``` 

1446  

1447 Parameters 

1448 ---------- 

1449 file_path: str 

1450 Name of the file. 

1451 buffersize: float 

1452 Size of internal buffer in seconds. 

1453 backsize: float 

1454 Part of the buffer to be loaded before the requested start index in seconds. 

1455 verbose: int 

1456 If larger than zero show detailed error/warning messages. 

1457 meta_kwargs: dict 

1458 Keyword arguments that are passed on to the _load_metadata() function. 

1459 

1460 Attributes 

1461 ---------- 

1462 rate: float 

1463 The sampling rate of the data in Hertz. 

1464 channels: int 

1465 The number of channels that are read in. 

1466 frames: int 

1467 The number of frames in the file. 

1468 format: str or None 

1469 Format of the audio file. 

1470 encoding: str or None 

1471 Encoding/subtype of the audio file. 

1472 shape: tuple 

1473 Number of frames and channels of the data. 

1474 ndim: int 

1475 Number of dimensions: always 2 (frames and channels). 

1476 unit: str 

1477 Unit of the data. 

1478 ampl_min: float 

1479 Minimum amplitude the file format supports. 

1480 ampl_max: float 

1481 Maximum amplitude the file format supports. 

1482 

1483 Methods 

1484 ------- 

1485 

1486 - `len()`: the number of frames 

1487 - `open()`: open a data file. 

1488 - `open_*()`: open a data file of a specific format. 

1489 - `close()`: close the file. 

1490 - `metadata()`: metadata of the file. 

1491 - `markers()`: markers of the file. 

1492 - `set_unwrap()`: Set parameters for unwrapping clipped data. 

1493 

1494 """ 

1495 

1496 def __init__(self, file_path=None, buffersize=10.0, backsize=0.0, 

1497 verbose=0, **meta_kwargs): 

1498 super().__init__(None, buffersize, backsize, 

1499 verbose, **meta_kwargs) 

1500 if file_path is not None: 

1501 self.open(file_path, buffersize, backsize, verbose, **meta_kwargs) 

1502 

1503 def __getitem__(self, key): 

1504 return super(DataLoader, self).__getitem__(key) 

1505 

1506 def __next__(self): 

1507 return super(DataLoader, self).__next__() 

1508 

1509 

1510 # relacs interface:  

1511 def open_relacs(self, file_path, buffersize=10.0, backsize=0.0, 

1512 verbose=0, amax=1.0): 

1513 """Open relacs data files (www.relacs.net) for reading. 

1514 

1515 Parameters 

1516 ---------- 

1517 file_path: str 

1518 Path to a relacs data directory or a file therein. 

1519 buffersize: float 

1520 Size of internal buffer in seconds. 

1521 backsize: float 

1522 Part of the buffer to be loaded before the requested start index in seconds. 

1523 verbose: int 

1524 If > 0 show detailed error/warning messages. 

1525 amax: float 

1526 The amplitude range of the data. 

1527 

1528 Raises 

1529 ------ 

1530 ValueError: .gz files not supported. 

1531 """ 

1532 self.verbose = verbose 

1533 

1534 if self.sf is not None: 

1535 self._close_relacs() 

1536 

1537 trace_file_paths = relacs_trace_files(file_path) 

1538 

1539 # open trace files: 

1540 self.sf = [] 

1541 self.frames = None 

1542 self.rate = None 

1543 self.unit = '' 

1544 self.filepath = None 

1545 if len(trace_file_paths) > 0: 

1546 self.filepath = os.path.dirname(trace_file_paths[0]) 

1547 for path in sorted(trace_file_paths): 

1548 if path[-3:] == '.gz': 

1549 raise ValueError('.gz files not supported') 

1550 sf = open(path, 'rb') 

1551 self.sf.append(sf) 

1552 if verbose > 0: 

1553 print(f'open_relacs(file_path) with file_path={path}') 

1554 # file size: 

1555 sf.seek(0, os.SEEK_END) 

1556 frames = sf.tell()//4 

1557 if self.frames is None: 

1558 self.frames = frames 

1559 elif self.frames != frames: 

1560 diff = self.frames - frames 

1561 if diff > 1 or diff < -2: 

1562 raise ValueError('number of frames of traces differ') 

1563 elif diff >= 0: 

1564 self.frames = frames 

1565 sf.seek(0) 

1566 # retrieve sampling rate and unit: 

1567 rate, us = relacs_samplerate_unit(path) 

1568 if self.rate is None: 

1569 self.rate = rate 

1570 elif rate != self.rate: 

1571 raise ValueError('sampling rates of traces differ') 

1572 if len(self.unit) == 0: 

1573 self.unit = us 

1574 elif us != self.unit: 

1575 raise ValueError('unit of traces differ') 

1576 self.channels = len(self.sf) 

1577 self.shape = (self.frames, self.channels) 

1578 self.size = self.frames * self.channels 

1579 self.ndim = len(self.shape) 

1580 self.format = 'RELACS' 

1581 self.encoding = 'FLOAT' 

1582 self.bufferframes = int(buffersize*self.rate) 

1583 self.backframes = int(backsize*self.rate) 

1584 self.init_buffer() 

1585 self.offset = 0 

1586 self.close = self._close_relacs 

1587 self.load_audio_buffer = self._load_buffer_relacs 

1588 self.ampl_min = -amax 

1589 self.ampl_max = +amax 

1590 self._load_metadata = self._metadata_relacs 

1591 # TODO: load markers: 

1592 self._locs = np.zeros((0, 2), dtype=int) 

1593 self._labels = np.zeros((0, 2), dtype=object) 

1594 self._load_markers = None 

1595 return self 

1596 

1597 def _close_relacs(self): 

1598 """Close the relacs data files. 

1599 """ 

1600 if self.sf is not None: 

1601 for file in self.sf: 

1602 file.close() 

1603 self.sf = None 

1604 

1605 def _load_buffer_relacs(self, r_offset, r_size, buffer): 

1606 """Load new data from relacs data file. 

1607 

1608 Parameters 

1609 ---------- 

1610 r_offset: int 

1611 First frame to be read from file. 

1612 r_size: int 

1613 Number of frames to be read from file. 

1614 buffer: ndarray 

1615 Buffer where to store the loaded data. 

1616 """ 

1617 for i, file in enumerate(self.sf): 

1618 file.seek(r_offset*4) 

1619 data = file.read(r_size*4) 

1620 buffer[:, i] = np.frombuffer(data, dtype=np.float32) 

1621 

1622 

1623 def _metadata_relacs(self, store_empty=False, first_only=False): 

1624 """ Load meta-data of a relacs data set. 

1625 """ 

1626 info_path = os.path.join(self.filepath, 'info.dat') 

1627 if not os.path.exists(info_path): 

1628 return {} 

1629 return relacs_header(info_path, store_empty, first_only) 

1630 

1631 

1632 # fishgrid interface:  

1633 def open_fishgrid(self, file_path, buffersize=10.0, backsize=0.0, 

1634 verbose=0): 

1635 """Open fishgrid data files (https://github.com/bendalab/fishgrid) for reading. 

1636 

1637 Parameters 

1638 ---------- 

1639 file_path: str 

1640 Path to a fishgrid data directory, or a file therein. 

1641 buffersize: float 

1642 Size of internal buffer in seconds. 

1643 backsize: float 

1644 Part of the buffer to be loaded before the requested start index in seconds. 

1645 verbose: int 

1646 If > 0 show detailed error/warning messages. 

1647 """ 

1648 self.verbose = verbose 

1649 

1650 if self.sf is not None: 

1651 self._close_fishgrid() 

1652 

1653 trace_file_paths = fishgrid_trace_files(file_path) 

1654 self.filepath = None 

1655 if len(trace_file_paths) > 0: 

1656 self.filepath = os.path.dirname(trace_file_paths[0]) 

1657 self._load_metadata = metadata_fishgrid 

1658 self._load_markers = markers_fishgrid 

1659 

1660 # open grid files: 

1661 grids = fishgrid_grids(self.metadata()) 

1662 grid_sizes = [r*c for r,c in grids] 

1663 self.channels = 0 

1664 for g, path in enumerate(trace_file_paths): 

1665 self.channels += grid_sizes[g] 

1666 self.sf = [] 

1667 self.grid_channels = [] 

1668 self.grid_offs = [] 

1669 offs = 0 

1670 self.frames = None 

1671 self.rate = get_number(self.metadata(), 'Hz', 'AISampleRate') 

1672 v, self.unit = get_number_unit(self.metadata(), 'AIMaxVolt') 

1673 if v is not None: 

1674 self.ampl_min = -v 

1675 self.ampl_max = +v 

1676 

1677 for g, path in enumerate(trace_file_paths): 

1678 sf = open(path, 'rb') 

1679 self.sf.append(sf) 

1680 if verbose > 0: 

1681 print(f'open_fishgrid(file_path) with file_path={path}') 

1682 # grid channels: 

1683 self.grid_channels.append(grid_sizes[g]) 

1684 self.grid_offs.append(offs) 

1685 offs += grid_sizes[g] 

1686 # file size: 

1687 sf.seek(0, os.SEEK_END) 

1688 frames = sf.tell()//4//grid_sizes[g] 

1689 if self.frames is None: 

1690 self.frames = frames 

1691 elif self.frames != frames: 

1692 diff = self.frames - frames 

1693 if diff > 1 or diff < -2: 

1694 raise ValueError('number of frames of traces differ') 

1695 elif diff >= 0: 

1696 self.frames = frames 

1697 sf.seek(0) 

1698 self.shape = (self.frames, self.channels) 

1699 self.size = self.frames * self.channels 

1700 self.ndim = len(self.shape) 

1701 self.format = 'FISHGRID' 

1702 self.encoding = 'FLOAT' 

1703 self.bufferframes = int(buffersize*self.rate) 

1704 self.backframes = int(backsize*self.rate) 

1705 self.init_buffer() 

1706 self.offset = 0 

1707 self.close = self._close_fishgrid 

1708 self.load_audio_buffer = self._load_buffer_fishgrid 

1709 return self 

1710 

1711 def _close_fishgrid(self): 

1712 """Close the fishgrid data files. 

1713 """ 

1714 if self.sf is not None: 

1715 for file in self.sf: 

1716 file.close() 

1717 self.sf = None 

1718 

1719 def _load_buffer_fishgrid(self, r_offset, r_size, buffer): 

1720 """Load new data from relacs data file. 

1721 

1722 Parameters 

1723 ---------- 

1724 r_offset: int 

1725 First frame to be read from file. 

1726 r_size: int 

1727 Number of frames to be read from file. 

1728 buffer: ndarray 

1729 Buffer where to store the loaded data. 

1730 """ 

1731 for file, gchannels, goffset in zip(self.sf, self.grid_channels, self.grid_offs): 

1732 file.seek(r_offset*4*gchannels) 

1733 data = file.read(r_size*4*gchannels) 

1734 buffer[:, goffset:goffset+gchannels] = np.frombuffer(data, dtype=np.float32).reshape((-1, gchannels)) 

1735 

1736 

1737 # container interface: 

1738 def open_container(self, file_path, buffersize=10.0, 

1739 backsize=0.0, verbose=0, datakey=None, 

1740 samplekey=['rate', 'Fs', 'fs'], 

1741 timekey=['time'], amplkey=['amax'], unitkey='unit', 

1742 metadatakey=['metadata', 'info'], 

1743 poskey=['positions'], 

1744 spanskey=['spans'], labelskey=['labels'], 

1745 descrkey=['descriptions'], 

1746 amax=1.0, unit='a.u.'): 

1747 """Open generic container file. 

1748 

1749 Supported file formats are: 

1750 

1751 - python pickle files (.pkl) 

1752 - numpy files (.npz) 

1753 - matlab files (.mat) 

1754 

1755 Parameters 

1756 ---------- 

1757 file_path: str 

1758 Path to a container file. 

1759 buffersize: float 

1760 Size of internal buffer in seconds. 

1761 backsize: float 

1762 Part of the buffer to be loaded before the requested start index in seconds. 

1763 verbose: int 

1764 If > 0 show detailed error/warning messages. 

1765 datakey: None, str, or list of str 

1766 Name of the variable holding the data. If `None` take the 

1767 variable that is an 2D array and has the largest number of 

1768 elements. 

1769 samplekey: str or list of str 

1770 Name of the variable holding the sampling rate. 

1771 timekey: str or list of str 

1772 Name of the variable holding sampling times. 

1773 If no sampling rate is available, the sampling rate is retrieved 

1774 from the sampling times. 

1775 amplkey: str or list of str 

1776 Name of the variable holding the amplitude range of the data. 

1777 unitkey: str 

1778 Name of the variable holding the unit of the data. 

1779 metadatakey: str or list of str 

1780 Name of the variable holding the metadata. 

1781 poskey: str or list of str 

1782 Name of the variable holding positions of markers. 

1783 spanskey: str or list of str 

1784 Name of the variable holding spans of markers. 

1785 labelskey: str or list of str 

1786 Name of the variable holding labels of markers. 

1787 descrkey: str or list of str 

1788 Name of the variable holding descriptions of markers. 

1789 amax: None or float 

1790 If specified and no amplitude range has been found in the data 

1791 container, then this is the amplitude range of the data. 

1792 unit: None or str 

1793 If specified and no unit has been found in the data container, 

1794 then return this as the unit of the data. 

1795 

1796 Raises 

1797 ------ 

1798 ValueError: 

1799 Invalid key requested. 

1800 """ 

1801 self.verbose = verbose 

1802 data_dict = {} 

1803 ext = os.path.splitext(file_path)[1] 

1804 if ext == '.pkl': 

1805 import pickle 

1806 with open(file_path, 'rb') as f: 

1807 data_dict = pickle.load(f) 

1808 self.format = 'PKL' 

1809 elif ext == '.npz': 

1810 data_dict = np.load(file_path) 

1811 self.format = 'NPZ' 

1812 elif ext == '.mat': 

1813 from scipy.io import loadmat 

1814 data_dict = loadmat(file_path, squeeze_me=True) 

1815 self.format = 'MAT' 

1816 self.buffer, self.rate, self.unit, amax = \ 

1817 extract_container_data(data_dict, datakey, samplekey, 

1818 timekey, amplkey, unitkey, amax, unit) 

1819 self.filepath = file_path 

1820 self.channels = self.buffer.shape[1] 

1821 self.frames = self.buffer.shape[0] 

1822 self.shape = self.buffer.shape 

1823 self.ndim = self.buffer.ndim 

1824 self.size = self.buffer.size 

1825 self.encoding = self.numpy_encodings[self.buffer.dtype] 

1826 self.ampl_min = -amax 

1827 self.ampl_max = +amax 

1828 self.offset = 0 

1829 self.buffer_changed = np.zeros(self.channels, dtype=bool) 

1830 self.bufferframes = self.frames 

1831 self.backsize = 0 

1832 self.close = self._close_container 

1833 self.load_audio_buffer = self._load_buffer_container 

1834 self._metadata = extract_container_metadata(data_dict, metadatakey) 

1835 self._load_metadata = None 

1836 self._locs, self._labels = extract_container_markers(data_dict, 

1837 poskey, 

1838 spanskey, 

1839 labelskey, 

1840 descrkey) 

1841 self._load_markers = None 

1842 

1843 def _close_container(self): 

1844 """Close container. """ 

1845 pass 

1846 

1847 def _load_buffer_container(self, r_offset, r_size, buffer): 

1848 """Load new data from container.""" 

1849 buffer[:, :] = self.buffer[r_offset:r_offset + r_size, :] 

1850 

1851 

1852 # raw data interface: 

1853 def open_raw(self, file_path, buffersize=10.0, backsize=0.0, 

1854 verbose=0, rate=44000, channels=1, dtype=np.float32, 

1855 amax=1.0, unit='a.u.'): 

1856 """Load data from a raw file. 

1857 

1858 Raw files just contain the data and absolutely no metadata, not 

1859 even the smapling rate, number of channels, etc. 

1860 Supported file formats are: 

1861 

1862 - raw files (*.raw) 

1863 - LabView scandata (*.scandat) 

1864 

1865 Parameters 

1866 ---------- 

1867 file_path: str 

1868 Path of the file to load. 

1869 buffersize: float 

1870 Size of internal buffer in seconds. 

1871 backsize: float 

1872 Part of the buffer to be loaded before the requested start index in seconds. 

1873 verbose: int 

1874 If > 0 show detailed error/warning messages. 

1875 rate: float 

1876 Sampling rate of the data in Hertz. 

1877 channels: int 

1878 Number of channels multiplexed in the data. 

1879 dtype: str or numpy.dtype 

1880 The data type stored in the file. 

1881 amax: float 

1882 The amplitude range of the data. 

1883 unit: str 

1884 The unit of the data. 

1885 """ 

1886 self.verbose = verbose 

1887 self.filepath = file_path 

1888 self.sf = open(file_path, 'rb') 

1889 if verbose > 0: 

1890 print(f'open_raw(file_path) with file_path={file_path}') 

1891 self.dtype = np.dtype(dtype) 

1892 self.rate = float(rate) 

1893 # file size: 

1894 self.sf.seek(0, os.SEEK_END) 

1895 self.frames = self.sf.tell()//self.dtype.itemsize 

1896 self.sf.seek(0) 

1897 self.channels = int(channels) 

1898 self.shape = (self.frames, self.channels) 

1899 self.ndim = len(self.shape) 

1900 self.size = self.frames*self.channels 

1901 self.format = 'RAW' 

1902 self.encoding = self.numpy_encodings.get(self.dtype, 'UNKNOWN') 

1903 self.unit = unit 

1904 self.ampl_max = float(amax) 

1905 self.ampl_min = -self.ampl_max 

1906 self.offset = 0 

1907 self.bufferframes = int(buffersize*self.rate) 

1908 self.backframes = int(backsize*self.rate) 

1909 self.init_buffer() 

1910 self.close = self._close_raw 

1911 self.load_audio_buffer = self._load_buffer_raw 

1912 self._metadata = None 

1913 self._load_metadata = None 

1914 self._locs = None 

1915 self._labels = None 

1916 self._load_markers = None 

1917 

1918 def _close_raw(self): 

1919 """Close raw file. """ 

1920 self.sf.close() 

1921 self.sf = None 

1922 

1923 def _load_buffer_raw(self, r_offset, r_size, buffer): 

1924 """Load new data from container.""" 

1925 self.sf.seek(r_offset*self.dtype.itemsize) 

1926 raw_data = self.sf.read(r_size*self.dtype.itemsize) 

1927 raw_data = np.frombuffer(raw_data, dtype=self.dtype) 

1928 raw_data = raw_data.reshape(-1, self.channels) 

1929 # recode: 

1930 if self.dtype == np.dtype('int16'): 

1931 data = raw_data.astype('float32') 

1932 data *= self.ampl_max/2**15 

1933 elif self.dtype == np.dtype('int32'): 

1934 data = raw_data.astype(float) 

1935 data *= self.ampl_max/2**31 

1936 elif self.dtype == np.dtype('int64'): 

1937 data = raw_data.astype(float) 

1938 data *= self.ampl_max/2**63 

1939 else: 

1940 data = raw_data 

1941 buffer[:, :] = data 

1942 

1943 

1944 # audioio interface:  

1945 def open_audioio(self, file_path, buffersize=10.0, backsize=0.0, 

1946 verbose=0, gainkey=default_gain_keys, sep='.', 

1947 amax=None, unit='a.u.'): 

1948 """Open an audio file. 

1949 

1950 See the [audioio](https://github.com/bendalab/audioio) package 

1951 for details. 

1952 

1953 Parameters 

1954 ---------- 

1955 file_path: str 

1956 Path to an audio file. 

1957 buffersize: float 

1958 Size of internal buffer in seconds. 

1959 backsize: float 

1960 Part of the buffer to be loaded before the requested start index 

1961 in seconds. 

1962 verbose: int 

1963 If > 0 show detailed error/warning messages. 

1964 gainkey: str or list of str 

1965 Key in the file's metadata that holds some gain information. 

1966 If found, the data will be multiplied with the gain, 

1967 and if available, the corresponding unit is returned. 

1968 See the [audioio.get_gain()](https://bendalab.github.io/audioio/api/audiometadata.html#audioio.audiometadata.get_gain) function for details. 

1969 sep: str 

1970 String that separates section names in `gainkey`. 

1971 amax: None or float 

1972 If specified and no gain has been found in the metadata, 

1973 then use this as the amplitude range. 

1974 unit: None or str 

1975 If specified and no gain has been found in the metadata, 

1976 then this is the unit of the data. 

1977 

1978 """ 

1979 self.verbose = verbose 

1980 super(DataLoader, self).open(file_path, buffersize, backsize, verbose) 

1981 md = self.metadata() 

1982 fac, unit = get_gain(md, gainkey, sep, amax, unit) 

1983 if fac is None: 

1984 self.gain_fac = 1.0 

1985 else: 

1986 self.gain_fac = fac 

1987 self._load_buffer_audio_org = self.load_audio_buffer 

1988 self.load_audio_buffer = self._load_buffer_audioio 

1989 self.ampl_min *= self.gain_fac 

1990 self.ampl_max *= self.gain_fac 

1991 self.unit = unit 

1992 return self 

1993 

1994 def _load_buffer_audioio(self, r_offset, r_size, buffer): 

1995 """Load and scale new data from an audio file. 

1996 

1997 Parameters 

1998 ---------- 

1999 r_offset: int 

2000 First frame to be read from file. 

2001 r_size: int 

2002 Number of frames to be read from file. 

2003 buffer: ndarray 

2004 Buffer where to store the loaded data. 

2005 """ 

2006 self._load_buffer_audio_org(r_offset, r_size, buffer) 

2007 buffer *= self.gain_fac 

2008 

2009 

2010 def open(self, file_path, buffersize=10.0, backsize=0.0, 

2011 verbose=0, **kwargs): 

2012 """Open file with time-series data for reading. 

2013 

2014 Parameters 

2015 ---------- 

2016 file_path: str or list of str 

2017 Path to a data files or directory. 

2018 buffersize: float 

2019 Size of internal buffer in seconds. 

2020 backsize: float 

2021 Part of the buffer to be loaded before the requested start index 

2022 in seconds. 

2023 verbose: int 

2024 If > 0 show detailed error/warning messages. 

2025 **kwargs: dict 

2026 Further keyword arguments that are passed on to the  

2027 format specific opening functions. 

2028 For example: 

2029 - `amax`: the amplitude range of the data. 

2030 - 'unit': the unit of the data. 

2031 

2032 Raises 

2033 ------ 

2034 ValueError: 

2035 `file_path` is empty string. 

2036 """ 

2037 # list of implemented open functions: 

2038 data_open_funcs = ( 

2039 ('relacs', check_relacs, self.open_relacs, 1), 

2040 ('fishgrid', check_fishgrid, self.open_fishgrid, 1), 

2041 ('container', check_container, self.open_container, 1), 

2042 ('raw', check_raw, self.open_raw, 1), 

2043 ('audioio', None, self.open_audioio, 0), 

2044 ) 

2045 if len(file_path) == 0: 

2046 raise ValueError('input argument file_path is empty string.') 

2047 # open data: 

2048 for name, check_file, open_file, v in data_open_funcs: 

2049 if check_file is None or check_file(file_path): 

2050 open_file(file_path, buffersize, backsize, verbose, **kwargs) 

2051 if v*verbose > 1: 

2052 if self.format is not None: 

2053 print(f' format : {self.format}') 

2054 if self.encoding is not None: 

2055 print(f' encoding : {self.encoding}') 

2056 print(f' sampling rate: {self.rate} Hz') 

2057 print(f' channels : {self.channels}') 

2058 print(f' frames : {self.frames}') 

2059 print(f' range : {self.ampl_max:g}{self.unit}') 

2060 break 

2061 return self 

2062 

2063 

2064def demo(file_path, plot=False): 

2065 print("try load_data:") 

2066 data, rate, unit, amax = load_data(file_path, verbose=2) 

2067 if plot: 

2068 fig, ax = plt.subplots() 

2069 time = np.arange(len(data))/rate 

2070 for c in range(data.shape[1]): 

2071 ax.plot(time, data[:,c]) 

2072 ax.set_xlabel('Time [s]') 

2073 ax.set_ylabel(f'[{unit}]') 

2074 if amax is not None and np.isfinite(amax): 

2075 ax.set_ylim(-amax, +amax) 

2076 plt.show() 

2077 return 

2078 

2079 print('') 

2080 print("try DataLoader:") 

2081 with DataLoader(file_path, 2.0, 1.0, 1) as data: 

2082 print('sampling rate: %g' % data.rate) 

2083 print('frames : %d %d' % (len(data), data.shape[0])) 

2084 nframes = int(1.0 * data.rate) 

2085 # forward: 

2086 for i in range(0, len(data), nframes): 

2087 print('forward %d-%d' % (i, i + nframes)) 

2088 x = data[i:i + nframes, 0] 

2089 if plot: 

2090 fig, ax = plt.subplots() 

2091 ax.plot((i + np.arange(len(x)))/data.rate, x) 

2092 ax.set_xlabel('Time [s]') 

2093 ax.set_ylabel(f'[{data.unit}]') 

2094 plt.show() 

2095 # and backwards: 

2096 for i in reversed(range(0, len(data), nframes)): 

2097 print('backward %d-%d' % (i, i + nframes)) 

2098 x = data[i:i + nframes, 0] 

2099 if plot: 

2100 fig, ax = plt.subplots() 

2101 ax.plot((i + np.arange(len(x)))/data.rate, x) 

2102 ax.set_xlabel('Time [s]') 

2103 ax.set_ylabel(f'[{data.unit}]') 

2104 plt.show() 

2105 

2106 

2107def main(*cargs): 

2108 """Call demo with command line arguments. 

2109 

2110 Parameters 

2111 ---------- 

2112 cargs: list of str 

2113 Command line arguments as provided by sys.argv[1:] 

2114 """ 

2115 import argparse 

2116 parser = argparse.ArgumentParser(description= 

2117 'Checking thunderlab.dataloader module.') 

2118 parser.add_argument('-p', dest='plot', action='store_true', 

2119 help='plot loaded data') 

2120 parser.add_argument('file', nargs=1, default='', type=str, 

2121 help='name of data file') 

2122 args = parser.parse_args(cargs) 

2123 demo(args.file[0], args.plot) 

2124 

2125 

2126if __name__ == "__main__": 

2127 main(*sys.argv[1:])