Coverage for src/thunderlab/dataloader.py: 76%

885 statements  

« prev     ^ index     » next       coverage.py v7.6.2, created at 2024-10-09 16:02 +0000

1"""Load time-series data from files. 

2 

3``` 

4data, rate, unit, amax = load_data('data/file.wav') 

5``` 

6 

7The function `data_loader()` loads the whole time-series from the file 

8as a numpy array of floats. First dimension is frames, second is 

9channels. In contrast to the `audioio.load_audio()` function, the 

10values of the data array are not restricted between -1 and 1. They can 

11assume any value wihin the range `-amax` to `+amax` with the returned 

12`unit`. 

13 

14``` 

15data = DataLoader('data/file.wav', 60.0) 

16``` 

17or 

18``` 

19with DataLoader('data/file.wav', 60.0) as data: 

20``` 

21Create an `DataLoader` object that loads chuncks of 60 seconds long data 

22on demand. `data` can be used like a read-only numpy array of floats. 

23 

24 

25## Supported file formats 

26 

27- python pickle files 

28- numpy .npz files 

29- matlab .mat files 

30- audio files via [`audioio`](https://github.com/bendalab/audioio) package 

31- LabView .scandat files 

32- relacs trace*.raw files (https://www.relacs.net) 

33- fishgrid traces-*.raw files (https://github.com/bendalab/fishgrid) 

34 

35 

36## Metadata 

37 

38Many file formats allow to store metadata that further describe the 

39stored time series data. We handle them as nested dictionary of key-value 

40pairs. Load them with the `metadata()` function: 

41``` 

42metadata = metadata('data/file.mat') 

43``` 

44 

45## Markers 

46 

47Some file formats also allow to store markers that mark specific 

48positions in the time series data. Load marker positions and spans (in 

49the 2-D array `locs`) and label and text strings (in the 2-D array 

50`labels`) with the `markers()` function: 

51``` 

52locs, labels = markers('data.wav') 

53``` 

54 

55## Aditional, format specific functions 

56 

57- `relacs_samplerate_unit()`: retrieve sampling rate and unit from a relacs stimuli.dat file. 

58- `relacs_header()`: read key-value pairs from relacs *.dat file headers. 

59- `fishgrid_grids()`: retrieve grid sizes from a fishgrid.cfg file. 

60- `fishgrid_spacings()`: spacing between grid electrodes. 

61 

62""" 

63 

64import os 

65import sys 

66import glob 

67import gzip 

68import numpy as np 

69try: 

70 import matplotlib.pyplot as plt 

71except ImportError: 

72 pass 

73from audioio import load_audio, AudioLoader, unflatten_metadata 

74from audioio import get_number_unit, get_number, get_int, get_bool, get_gain 

75from audioio import default_starttime_keys, default_gain_keys 

76from audioio import metadata as metadata_audioio 

77from audioio import markers as markers_audioio 

78 

79 

80def relacs_samplerate_unit(filepath, channel=0): 

81 """Retrieve sampling rate and unit from a relacs stimuli.dat file. 

82 

83 Parameters 

84 ---------- 

85 filepath: str 

86 Path to a relacs data directory, or a file in a relacs data directory. 

87 channel: int 

88 Channel (trace) number, if `filepath` does not specify a 

89 trace-*.raw file. 

90 

91 Returns 

92 ------- 

93 samplerate: float 

94 Sampling rate in Hertz 

95 unit: str 

96 Unit of the trace, can be empty if not found 

97 

98 Raises 

99 ------ 

100 IOError/FileNotFoundError: 

101 If the stimuli.dat file does not exist. 

102 ValueError: 

103 stimuli.dat file does not contain sampling rate. 

104 """ 

105 trace = channel + 1 

106 relacs_dir = filepath 

107 # check for relacs data directory: 

108 if not os.path.isdir(filepath): 

109 relacs_dir = os.path.dirname(filepath) 

110 bn = os.path.basename(filepath).lower() 

111 i = bn.find('.raw') 

112 if len(bn) > 5 and bn[0:5] == 'trace' and i > 6: 

113 trace = int(bn[6:i]) 

114 

115 # retreive sampling rate and unit from stimuli.dat file: 

116 samplerate = None 

117 sampleinterval = None 

118 unit = "" 

119 

120 lines = [] 

121 stimuli_file = os.path.join(relacs_dir, 'stimuli.dat') 

122 if os.path.isfile(stimuli_file + '.gz'): 

123 stimuli_file += '.gz' 

124 if stimuli_file[-3:] == '.gz': 

125 with gzip.open(stimuli_file, 'r', encoding='latin-1') as sf: 

126 for line in sf: 

127 line = line.strip() 

128 if len(line) == 0 or line[0] != '#': 

129 break 

130 lines.append(line) 

131 else: 

132 with open(stimuli_file, 'r', encoding='latin-1') as sf: 

133 for line in sf: 

134 line = line.strip() 

135 if len(line) == 0 or line[0] != '#': 

136 break 

137 lines.append(line) 

138 

139 for line in lines: 

140 if "unit%d" % trace in line: 

141 unit = line.split(':')[1].strip() 

142 if "sampling rate%d" % trace in line: 

143 value = line.split(':')[1].strip() 

144 samplerate = float(value.replace('Hz','')) 

145 elif "sample interval%d" % trace in line: 

146 value = line.split(':')[1].strip() 

147 sampleinterval = float(value.replace('ms','')) 

148 

149 if samplerate is not None: 

150 return samplerate, unit 

151 if sampleinterval is not None: 

152 return 1000/sampleinterval, unit 

153 raise ValueError(f'could not retrieve sampling rate from {stimuli_file}') 

154 

155 

156def relacs_header(filepath, store_empty=False, first_only=False, 

157 lower_keys=False, flat=False, 

158 add_sections=False): 

159 """Read key-value pairs from a relacs *.dat file header. 

160 

161 Parameters 

162 ---------- 

163 filepath: str 

164 A relacs *.dat file, can be also a zipped .gz file. 

165 store_empty: bool 

166 If `False` do not add meta data with empty values. 

167 first_only: bool 

168 If `False` only store the first element of a list. 

169 lower_keys: bool 

170 Make all keys lower case. 

171 flat: bool 

172 Do not make a nested dictionary. 

173 Use this option also to read in very old relacs metadata with 

174 ragged left alignment. 

175 add_sections: bool 

176 If `True`, prepend keys with sections names separated by 

177 '.' to make them unique. 

178 

179 Returns 

180 ------- 

181 data: dict 

182 Nested dictionary with key-value pairs of the file header. 

183  

184 Raises 

185 ------ 

186 IOError/FileNotFoundError: 

187 If `filepath` cannot be opened. 

188 """ 

189 # read in header from file: 

190 lines = [] 

191 if os.path.isfile(filepath + '.gz'): 

192 filepath += '.gz' 

193 if filepath[-3:] == '.gz': 

194 with gzip.open(filepath, 'r', encoding='latin-1') as sf: 

195 for line in sf: 

196 line = line.strip() 

197 if len(line) == 0 or line[0] != '#': 

198 break 

199 lines.append(line) 

200 else: 

201 with open(filepath, 'r', encoding='latin-1') as sf: 

202 for line in sf: 

203 line = line.strip() 

204 if len(line) == 0 or line[0] != '#': 

205 break 

206 lines.append(line) 

207 # parse: 

208 data = {} 

209 cdatas = [data] 

210 sections = [''] 

211 ident_offs = None 

212 ident = None 

213 for line in lines: 

214 words = line.split(':') 

215 value = ':'.join(words[1:]).strip() if len(words) > 1 else '' 

216 if len(words) >= 1: 

217 key = words[0].strip('#') 

218 # get section level: 

219 level = 0 

220 if not flat or len(value) == 0: 

221 nident = len(key) - len(key.lstrip()) 

222 if ident_offs is None: 

223 ident_offs = nident 

224 elif ident is None: 

225 if nident > ident_offs: 

226 ident = nident - ident_offs 

227 level = 1 

228 else: 

229 level = (nident - ident_offs)//ident 

230 # close sections: 

231 if not flat: 

232 while len(cdatas) > level + 1: 

233 cdatas[-1][sections.pop()] = cdatas.pop() 

234 else: 

235 while len(sections) > level + 1: 

236 sections.pop() 

237 # key: 

238 key = key.strip().strip('"') 

239 if lower_keys: 

240 key = key.lower() 

241 skey = key 

242 if add_sections: 

243 key = '.'.join(sections[1:] + [key]) 

244 if len(value) == 0: 

245 # new sub-section: 

246 if flat: 

247 if store_empty: 

248 cdatas[-1][key] = None 

249 else: 

250 cdatas.append({}) 

251 sections.append(skey) 

252 else: 

253 # key-value pair: 

254 value = value.strip('"') 

255 if len(value) > 0 or value != '-' or store_empty: 

256 if len(value) > 0 and value[0] == '[' and value[-1] == ']': 

257 value = [v.strip() for v in value.lstrip('[').rstrip(']').split(',')] 

258 if first_only: 

259 value = value[0] 

260 cdatas[-1][key] = value 

261 while len(cdatas) > 1: 

262 cdatas[-1][sections.pop()] = cdatas.pop() 

263 return data 

264 

265 

266def check_relacs(file_path): 

267 """Check for valid relacs file. 

268 

269 Parameters 

270 ---------- 

271 file_path: str 

272 Path to a relacs data directory, or a file in a relacs data directory. 

273 

274 Returns 

275 ------- 

276 is_relacs: boolean 

277 `True` if `file_path` is a valid relacs directory or is a file therein. 

278 """ 

279 # relacs data directory: 

280 relacs_dir = file_path 

281 if not os.path.isdir(file_path): 

282 relacs_dir = os.path.dirname(file_path) 

283 # check for a valid relacs data directory: 

284 has_stimuli = False 

285 has_trace = False 

286 for fname in ['stimuli.dat', 'stimuli.dat.gz']: 

287 if os.path.isfile(os.path.join(relacs_dir, fname)): 

288 has_stimuli = True 

289 for fname in ['trace-1.raw', 'trace-1.raw.gz']: 

290 if os.path.isfile(os.path.join(relacs_dir, fname)): 

291 has_trace = True 

292 return has_stimuli and has_trace 

293 

294 

295def relacs_trace_files(file_path): 

296 """Expand file path for relacs data to appropriate trace*.raw file names. 

297 

298 Parameters 

299 ---------- 

300 file_path: str 

301 Path to a relacs data directory, or a file in a relacs data directory. 

302  

303 Returns 

304 ------- 

305 trace_file_paths: list of str 

306 List of relacs trace*.raw files. 

307 """ 

308 relacs_dir = file_path 

309 if not os.path.isdir(file_path): 

310 relacs_dir = os.path.dirname(file_path) 

311 trace_file_paths = [] 

312 for k in range(10000): 

313 fname = os.path.join(relacs_dir, f'trace-{k+1}.raw') 

314 if os.path.isfile(fname): 

315 trace_file_paths.append(fname) 

316 elif os.path.isfile(fname + '.gz'): 

317 trace_file_paths.append(fname + '.gz') 

318 else: 

319 break 

320 return trace_file_paths 

321 

322 

323def load_relacs(file_path, amax=1.0): 

324 """Load traces that have been recorded with relacs (https://github.com/relacs/relacs). 

325 

326 Parameters 

327 ---------- 

328 file_path: str 

329 Path to a relacs data directory, or a file in a relacs data directory. 

330 amax: float 

331 The amplitude range of the data. 

332 

333 Returns 

334 ------- 

335 data: 2-D array 

336 All data traces as an 2-D numpy array, even for single channel data. 

337 First dimension is time, second is channel. 

338 rate: float 

339 Sampling rate of the data in Hz 

340 unit: str 

341 Unit of the data 

342 amax: float 

343 Maximum amplitude of data range. 

344 

345 Raises 

346 ------ 

347 ValueError: 

348 - Invalid name for relacs trace-*.raw file. 

349 - Sampling rates of traces differ. 

350 - Unit of traces differ. 

351 """ 

352 trace_file_paths = relacs_trace_files(file_path) 

353 # load trace*.raw files: 

354 nchannels = len(trace_file_paths) 

355 data = None 

356 nrows = 0 

357 rate = None 

358 unit = '' 

359 for c, path in enumerate(sorted(trace_file_paths)): 

360 if path[-3:] == '.gz': 

361 with gzip.open(path, 'rb') as sf: 

362 x = np.frombuffer(sf.read(), dtype=np.float32) 

363 else: 

364 x = np.fromfile(path, np.float32) 

365 if data is None: 

366 nrows = len(x) 

367 data = np.zeros((nrows, nchannels)) 

368 n = min(len(x), nrows) 

369 data[:n,c] = x[:n] 

370 # retrieve sampling rate and unit: 

371 crate, us = relacs_samplerate_unit(path, c) 

372 if rate is None: 

373 rate = crate 

374 elif crate != rate: 

375 raise ValueError('sampling rates of traces differ') 

376 if len(unit) == 0: 

377 unit = us 

378 elif us != unit: 

379 raise ValueError('unit of traces differ') 

380 return data, rate, unit, amax 

381 

382 

383def metadata_relacs(file_path, store_empty=False, first_only=False, 

384 lower_keys=False, flat=False, add_sections=False): 

385 """ Read meta-data of a relacs data set. 

386 

387 Parameters 

388 ---------- 

389 file_path: str 

390 A relacs data directory or a file therein. 

391 store_empty: bool 

392 If `False` do not add meta data with empty values. 

393 first_only: bool 

394 If `False` only store the first element of a list. 

395 lower_keys: bool 

396 Make all keys lower case. 

397 flat: bool 

398 Do not make a nested dictionary. 

399 Use this option also to read in very old relacs metadata with 

400 ragged left alignment. 

401 add_sections: bool 

402 If `True`, prepend keys with sections names separated by 

403 '.' to make them unique. 

404 

405 Returns 

406 ------- 

407 data: nested dict 

408 Nested dictionary with key-value pairs of the meta data. 

409 """ 

410 relacs_dir = file_path 

411 if not os.path.isdir(file_path): 

412 relacs_dir = os.path.dirname(file_path) 

413 info_path = os.path.join(relacs_dir, 'info.dat') 

414 if not os.path.exists(info_path): 

415 return dict(), [] 

416 data = relacs_header(info_path, store_empty, first_only, 

417 lower_keys, flat, add_sections) 

418 return data 

419 

420 

421def fishgrid_spacings(metadata, unit='m'): 

422 """Spacing between grid electrodes. 

423 

424 Parameters 

425 ---------- 

426 metadata: dict 

427 Fishgrid metadata obtained from `metadata_fishgrid()`. 

428 unit: str 

429 Unit in which to return the spacings. 

430 

431 Returns 

432 ------- 

433 grid_dist: list of tuple of float 

434 For each grid the distances between rows and columns in `unit`. 

435 """ 

436 grids_dist = [] 

437 for k in range(4): 

438 row_dist = get_number(metadata, unit, f'RowDistance{k+1}', default=0) 

439 col_dist = get_number(metadata, unit, f'ColumnDistance{k+1}', default=0) 

440 rows = get_int(metadata, f'Rows{k+1}', default=0) 

441 cols = get_int(metadata, f'Columns{k+1}', default=0) 

442 if get_bool(metadata, f'Used{k+1}', default=False) or \ 

443 cols > 0 and rows > 0: 

444 grids_dist.append((row_dist, col_dist)) 

445 return grids_dist 

446 

447 

448def fishgrid_grids(metadata): 

449 """Retrieve grid sizes from a fishgrid.cfg file. 

450 

451 Parameters 

452 ---------- 

453 metadata: dict 

454 Fishgrid metadata obtained from `metadata_fishgrid()`. 

455 

456 Returns 

457 ------- 

458 grids: list of tuple of int 

459 For each grid the number of rows and columns. 

460 """ 

461 grids = [] 

462 for k in range(4): 

463 rows = get_int(metadata, f'Rows{k+1}', default=0) 

464 cols = get_int(metadata, f'Columns{k+1}', default=0) 

465 if get_bool(metadata, f'Used{k+1}', default=False) or \ 

466 cols > 0 and rows > 0: 

467 grids.append((rows, cols)) 

468 return grids 

469 

470 

471def check_fishgrid(file_path): 

472 """Check for valid fishgrid file (https://github.com/bendalab/fishgrid). 

473 

474 Parameters 

475 ---------- 

476 file_path: str 

477 Path to a fishgrid data directory or a file in a fishgrid 

478 data directory. 

479 

480 Returns 

481 ------- 

482 is_fishgrid: bool 

483 `True` if `file_path` is a valid fishgrid data directory or 

484 a file therein. 

485 """ 

486 # fishgrid data directory: 

487 fishgrid_dir = file_path 

488 if not os.path.isdir(file_path): 

489 fishgrid_dir = os.path.dirname(file_path) 

490 # check for a valid fishgrid data directory: 

491 return (os.path.isfile(os.path.join(fishgrid_dir, 'fishgrid.cfg')) and 

492 (os.path.isfile(os.path.join(fishgrid_dir, 'traces-grid1.raw')) or 

493 os.path.isfile(os.path.join(fishgrid_dir, 'traces.raw')))) 

494 

495 

496def fishgrid_trace_files(file_path): 

497 """Expand file paths for fishgrid data to appropriate traces*.raw file names. 

498 

499 Parameters 

500 ---------- 

501 file_path: str 

502 Path to a fishgrid data directory, or a file therein. 

503  

504 Returns 

505 ------- 

506 trace_file_paths: list of str 

507 List of fishgrid traces*.raw files. 

508 """ 

509 # find grids: 

510 fishgrid_dir = file_path 

511 if not os.path.isdir(fishgrid_dir): 

512 fishgrid_dir = os.path.dirname(file_path) 

513 trace_file_paths = [] 

514 for k in range(10000): 

515 file = os.path.join(fishgrid_dir, f'traces-grid{k+1}.raw') 

516 if os.path.isfile(file): 

517 trace_file_paths.append(file) 

518 else: 

519 break 

520 if len(trace_file_paths) == 0: 

521 file = os.path.join(fishgrid_dir, f'traces.raw') 

522 if os.path.isfile(file): 

523 trace_file_paths.append(file) 

524 return trace_file_paths 

525 

526 

527def load_fishgrid(file_path): 

528 """Load traces that have been recorded with fishgrid (https://github.com/bendalab/fishgrid). 

529 

530 Parameters 

531 ---------- 

532 file_path: str 

533 Path to a fishgrid data directory, or a file therein. 

534 

535 Returns 

536 ------- 

537 data: 2-D array 

538 All data traces as an 2-D numpy array, even for single channel data. 

539 First dimension is time, second is channel. 

540 rate: float 

541 Sampling rate of the data in Hz. 

542 unit: str 

543 Unit of the data. 

544 amax: float 

545 Maximum amplitude of data range. 

546 

547 Raises 

548 ------ 

549 FileNotFoundError: 

550 Invalid or not existing fishgrid files. 

551 """ 

552 trace_file_paths = fishgrid_trace_files(file_path) 

553 if len(trace_file_paths) == 0: 

554 raise FileNotFoundError(f'no fishgrid files specified') 

555 md = metadata_fishgrid(file_path) 

556 grids = fishgrid_grids(md) 

557 grid_sizes = [r*c for r,c in grids] 

558 

559 # load traces-grid*.raw files: 

560 grid_channels = [] 

561 nchannels = 0 

562 for g, path in enumerate(trace_file_paths): 

563 grid_channels.append(grid_sizes[g]) 

564 nchannels += grid_sizes[g] 

565 data = None 

566 nrows = 0 

567 c = 0 

568 rate = get_number(md, 'Hz', 'AISampleRate') 

569 for path, channels in zip(trace_file_paths, grid_channels): 

570 x = np.fromfile(path, np.float32).reshape((-1, channels)) 

571 if data is None: 

572 nrows = len(x) 

573 data = np.zeros((nrows, nchannels)) 

574 n = min(len(x), nrows) 

575 data[:n,c:c+channels] = x[:n,:] 

576 c += channels 

577 amax, unit = get_number_unit(md, 'AIMaxVolt') 

578 return data, rate, unit, amax 

579 

580 

581# add fishgrid keys: 

582default_starttime_keys.append(['StartDate', 'StartTime']) 

583default_gain_keys.insert(0, 'AIMaxVolt') 

584 

585 

586def metadata_fishgrid(file_path): 

587 """ Read meta-data of a fishgrid data set. 

588 

589 Parameters 

590 ---------- 

591 file_path: str 

592 A fishgrid data directory or a file therein. 

593 

594 Returns 

595 ------- 

596 data: nested dict 

597 Nested dictionary with key-value pairs of the meta data. 

598 """ 

599 fishgrid_dir = file_path 

600 if not os.path.isdir(fishgrid_dir): 

601 fishgrid_dir = os.path.dirname(file_path) 

602 path = os.path.join(fishgrid_dir, 'fishgrid.cfg') 

603 # read in header from file: 

604 lines = [] 

605 if os.path.isfile(path + '.gz'): 

606 path += '.gz' 

607 if not os.path.exists(path): 

608 return {} 

609 if path[-3:] == '.gz': 

610 with gzip.open(path, 'r', encoding='latin-1') as sf: 

611 for line in sf: 

612 lines.append(line) 

613 else: 

614 with open(path, 'r', encoding='latin-1') as sf: 

615 for line in sf: 

616 lines.append(line) 

617 # parse: 

618 data = {} 

619 cdatas = [data] 

620 ident_offs = None 

621 ident = None 

622 old_style = False 

623 grid_n = False 

624 for line in lines: 

625 if len(line.strip()) == 0: 

626 continue 

627 if line[0] == '*': 

628 key = line[1:].strip() 

629 data[key] = {} 

630 cdatas = [data, data[key]] 

631 elif '----' in line: 

632 old_style = True 

633 key = line.strip().strip(' -').replace('&', '') 

634 if key.upper() == 'SETUP': 

635 key = 'Grid 1' 

636 grid_n = False 

637 if key[:4].lower() == 'grid': 

638 grid_n = key[5] 

639 cdatas = cdatas[:2] 

640 cdatas[1][key] = {} 

641 cdatas.append(cdatas[1][key]) 

642 else: 

643 words = line.split(':') 

644 key = words[0].strip().strip('"') 

645 value = None 

646 if len(words) > 1 and (len(words[1].strip()) > 0 or old_style): 

647 value = ':'.join(words[1:]).strip().strip('"') 

648 if old_style: 

649 if value is None: 

650 cdatas = cdatas[:3] 

651 cdatas[2][key] = {} 

652 cdatas.append(cdatas[2][key]) 

653 else: 

654 if grid_n and key[-1] != grid_n: 

655 key = key + grid_n 

656 cdatas[-1][key] = value 

657 else: 

658 # get section level: 

659 level = 0 

660 nident = len(line) - len(line.lstrip()) 

661 if ident_offs is None: 

662 ident_offs = nident 

663 elif ident is None: 

664 if nident > ident_offs: 

665 ident = nident - ident_offs 

666 level = 1 

667 else: 

668 level = (nident - ident_offs)//ident 

669 # close sections: 

670 cdatas = cdatas[:2 + level] 

671 if value is None: 

672 # new section: 

673 cdatas[-1][key] = {} 

674 cdatas.append(cdatas[-1][key]) 

675 else: 

676 # key-value pair: 

677 cdatas[-1][key] = value.replace(r'\n', '\n') 

678 # remove unused grids: 

679 fgm = data.get('FishGrid', {}) 

680 for i in range(4): 

681 gs = f'Grid {i+1}' 

682 if gs in fgm: 

683 gm = fgm[gs] 

684 us = f'Used{i+1}' 

685 if us in gm and gm[us].upper() == 'FALSE': 

686 del fgm[gs] 

687 return data 

688 

689 

690def markers_fishgrid(file_path): 

691 """ Read markers of a fishgrid data set. 

692 

693 Parameters 

694 ---------- 

695 file_path: str 

696 A fishgrid data directory or a file therein. 

697 

698 Returns 

699 ------- 

700 locs: 2-D array of ints 

701 Marker positions (first column) and spans (second column) 

702 for each marker (rows). 

703 labels: 2-D array of string objects 

704 Labels (first column) and texts (second column) 

705 for each marker (rows). 

706 """ 

707 def add_marker(): 

708 if 'index1' in marker: 

709 index1 = int(marker['index1'])//nchannels 

710 else: 

711 index1 = int(marker['index'])//nchannels 

712 span1 = int(marker.get('span1', 0))//nchannels 

713 locs.append([index1, span1]) 

714 ls = marker.get('label', 'M') 

715 cs = marker.get('comment', '') 

716 labels.append([ls, cs]) 

717 

718 fishgrid_dir = file_path 

719 if not os.path.isdir(fishgrid_dir): 

720 fishgrid_dir = os.path.dirname(file_path) 

721 path = os.path.join(fishgrid_dir, 'timestamps.dat') 

722 if not os.path.isfile(path): 

723 return np.zeros((0, 2), dtype=int), np.zeros((0, 2), dtype=object) 

724 # get number of channels: 

725 md = metadata_fishgrid(path.replace('timestamps.dat', 'fishgrid.cfg')) 

726 grids = fishgrid_grids(md) 

727 nchannels = np.prod(grids[0]) 

728 # read timestamps: 

729 locs = [] 

730 labels = [] 

731 marker = {} 

732 with open(path, 'r') as sf: 

733 for line in sf: 

734 if len(line.strip()) == 0: 

735 add_marker() 

736 marker = {} 

737 else: 

738 words = line.split(':') 

739 if len(words) > 1: 

740 v = words[1].strip() 

741 v = v.strip('"') 

742 marker[words[0].strip().lower()] = v 

743 if len(marker) > 0: 

744 add_marker() 

745 if len(locs) > 2: 

746 return np.array(locs[1:-1]), np.array(labels[1:-1]) 

747 else: 

748 return np.zeros((0, 2), dtype=int), np.zeros((0, 2), dtype=object) 

749 

750 

751def check_container(filepath): 

752 """Check if file is a generic container file. 

753 

754 Supported file formats are: 

755 

756 - python pickle files (.pkl) 

757 - numpy files (.npz) 

758 - matlab files (.mat) 

759 

760 Parameters 

761 ---------- 

762 filepath: str 

763 Path of the file to check. 

764  

765 Returns 

766 ------- 

767 is_container: bool 

768 `True`, if `filepath` is a supported container format. 

769 """ 

770 ext = os.path.splitext(filepath)[1] 

771 return ext.lower() in ('.pkl', '.npz', '.mat') 

772 

773 

774def extract_container_data(data_dict, datakey=None, 

775 samplekey=['rate', 'Fs', 'fs'], 

776 timekey=['time'], amplkey=['amax'], unitkey='unit', 

777 amax=1.0, unit='a.u.'): 

778 """Extract data from dictionary loaded from a container file. 

779 

780 Parameters 

781 ---------- 

782 data_dict: dict 

783 Dictionary of the data items contained in the container. 

784 datakey: None, str, or list of str 

785 Name of the variable holding the data. If `None` take the 

786 variable that is an 2D array and has the largest number of 

787 elements. 

788 samplekey: str or list of str 

789 Name of the variable holding the sampling rate. 

790 timekey: str or list of str 

791 Name of the variable holding sampling times. 

792 If no sampling rate is available, the sampling rate is retrieved 

793 from the sampling times. 

794 amplkey: str or list of str 

795 Name of the variable holding the amplitude range of the data. 

796 unitkey: str 

797 Name of the variable holding the unit of the data. 

798 amax: None or float 

799 If specified and no amplitude range has been found in `data_dict`, 

800 then this is the amplitude range of the data. 

801 unit: None or str 

802 If specified and no unit has been found in `data_dict`, 

803 then return this as the unit of the data. 

804 

805 Returns 

806 ------- 

807 data: 2-D array of floats 

808 All data traces as an 2-D numpy array, even for single channel data. 

809 First dimension is time, second is channel. 

810 rate: float 

811 Sampling rate of the data in Hz. 

812 unit: str 

813 Unit of the data. 

814 amax: float 

815 Maximum amplitude of data range in `unit`. 

816 

817 Raises 

818 ------ 

819 ValueError: 

820 Invalid key requested. 

821 """ 

822 # extract format data: 

823 if not isinstance(samplekey, (list, tuple, np.ndarray)): 

824 samplekey = (samplekey,) 

825 if not isinstance(timekey, (list, tuple, np.ndarray)): 

826 timekey = (timekey,) 

827 if not isinstance(amplkey, (list, tuple, np.ndarray)): 

828 amplkey = (amplkey,) 

829 rate = 0.0 

830 for skey in samplekey: 

831 if skey in data_dict: 

832 rate = float(data_dict[skey]) 

833 break 

834 if rate == 0.0: 

835 for tkey in timekey: 

836 if tkey in data_dict: 

837 rate = 1.0/(data_dict[tkey][1] - data_dict[tkey][0]) 

838 break 

839 if rate == 0.0: 

840 raise ValueError(f"invalid keys {', '.join(samplekey)} and {', '.join(timekey)} for requesting sampling rate or sampling times") 

841 for akey in amplkey: 

842 if akey in data_dict: 

843 amax = float(data_dict[akey]) 

844 break 

845 if unitkey in data_dict: 

846 unit = data_dict[unitkey] 

847 # get data array: 

848 raw_data = np.array([]) 

849 if datakey: 

850 # try data keys: 

851 if not isinstance(datakey, (list, tuple, np.ndarray)): 

852 datakey = (datakey,) 

853 for dkey in datakey: 

854 if dkey in data_dict: 

855 raw_data = data_dict[dkey] 

856 break 

857 if len(raw_data) == 0: 

858 raise ValueError(f"invalid key(s) {', '.join(datakey)} for requesting data") 

859 else: 

860 # find largest 2D array: 

861 for d in data_dict: 

862 if hasattr(data_dict[d], 'shape'): 

863 if 1 <= len(data_dict[d].shape) <= 2 and \ 

864 np.max(data_dict[d].shape) > np.max(raw_data.shape): 

865 raw_data = data_dict[d] 

866 if len(raw_data) == 0: 

867 raise ValueError('no data found') 

868 # make 2D: 

869 if len(raw_data.shape) == 1: 

870 raw_data = raw_data.reshape(-1, 1) 

871 # transpose if necessary: 

872 if np.argmax(raw_data.shape) > 0: 

873 raw_data = raw_data.T 

874 # recode: 

875 if raw_data.dtype == np.dtype('int16'): 

876 data = raw_data.astype('float32') 

877 data *= amax/2**15 

878 elif raw_data.dtype == np.dtype('int32'): 

879 data = raw_data.astype(float) 

880 data *= amax/2**31 

881 elif raw_data.dtype == np.dtype('int64'): 

882 data = raw_data.astype(float) 

883 data *= amax/2**63 

884 else: 

885 data = raw_data 

886 return data, rate, unit, amax 

887 

888 

889def load_container(file_path, datakey=None, 

890 samplekey=['rate', 'Fs', 'fs'], 

891 timekey=['time'], amplkey=['amax'], unitkey='unit', 

892 amax=1.0, unit='a.u.'): 

893 """Load data from a generic container file. 

894 

895 Supported file formats are: 

896 

897 - python pickle files (.pkl) 

898 - numpy files (.npz) 

899 - matlab files (.mat) 

900 

901 Parameters 

902 ---------- 

903 file_path: str 

904 Path of the file to load. 

905 datakey: None, str, or list of str 

906 Name of the variable holding the data. If `None` take the 

907 variable that is an 2D array and has the largest number of 

908 elements. 

909 samplekey: str or list of str 

910 Name of the variable holding the sampling rate. 

911 timekey: str or list of str 

912 Name of the variable holding sampling times. 

913 If no sampling rate is available, the sampling rate is retrieved 

914 from the sampling times. 

915 amplkey: str 

916 Name of the variable holding the amplitude range of the data. 

917 unitkey: str 

918 Name of the variable holding the unit of the data. 

919 If `unitkey` is not a valid key, then return `unitkey` as the `unit`. 

920 amax: None or float 

921 If specified and no amplitude range has been found in the data 

922 container, then this is the amplitude range of the data. 

923 unit: None or str 

924 If specified and no unit has been found in the data container, 

925 then return this as the unit of the data. 

926 

927 Returns 

928 ------- 

929 data: 2-D array of floats 

930 All data traces as an 2-D numpy array, even for single channel data. 

931 First dimension is time, second is channel. 

932 rate: float 

933 Sampling rate of the data in Hz. 

934 unit: str 

935 Unit of the data. 

936 amax: float 

937 Maximum amplitude of data range. 

938 

939 Raises 

940 ------ 

941 ValueError: 

942 Invalid key requested. 

943 """ 

944 # load data: 

945 data_dict = {} 

946 ext = os.path.splitext(file_path)[1] 

947 if ext == '.pkl': 

948 import pickle 

949 with open(file_path, 'rb') as f: 

950 data_dict = pickle.load(f) 

951 elif ext == '.npz': 

952 data_dict = np.load(file_path) 

953 elif ext == '.mat': 

954 from scipy.io import loadmat 

955 data_dict = loadmat(file_path, squeeze_me=True) 

956 return extract_container_data(data_dict, datakey, samplekey, 

957 timekey, amplkey, unitkey, amax, unit) 

958 

959 

960def extract_container_metadata(data_dict, metadatakey=['metadata', 'info']): 

961 """ Extract metadata from dictionary loaded from a container file. 

962 

963 Parameters 

964 ---------- 

965 data_dict: dict 

966 Dictionary of the data items contained in the container. 

967 metadatakey: str or list of str 

968 Name of the variable holding the metadata. 

969 

970 Returns 

971 ------- 

972 metadata: nested dict 

973 Nested dictionary with key-value pairs of the meta data. 

974 """ 

975 if not isinstance(metadatakey, (list, tuple, np.ndarray)): 

976 metadatakey = (metadatakey,) 

977 # get single metadata dictionary: 

978 for mkey in metadatakey: 

979 if mkey in data_dict: 

980 return data_dict[mkey] 

981 # collect all keys starting with metadatakey: 

982 metadata = {} 

983 for mkey in metadatakey: 

984 mkey += '__' 

985 for dkey in data_dict: 

986 if dkey[:len(mkey)] == mkey: 

987 v = data_dict[dkey] 

988 if hasattr(v, 'size') and v.ndim == 0: 

989 v = v.item() 

990 metadata[dkey[len(mkey):]] = v 

991 if len(metadata) > 0: 

992 return unflatten_metadata(metadata, sep='__') 

993 return metadata 

994 

995 

996def metadata_container(file_path, metadatakey=['metadata', 'info']): 

997 """ Read meta-data of a container file. 

998 

999 Parameters 

1000 ---------- 

1001 file_path: str 

1002 A container file. 

1003 metadatakey: str or list of str 

1004 Name of the variable holding the metadata. 

1005 

1006 Returns 

1007 ------- 

1008 metadata: nested dict 

1009 Nested dictionary with key-value pairs of the meta data. 

1010 """ 

1011 data_dict = {} 

1012 ext = os.path.splitext(file_path)[1] 

1013 if ext == '.pkl': 

1014 import pickle 

1015 with open(file_path, 'rb') as f: 

1016 data_dict = pickle.load(f) 

1017 elif ext == '.npz': 

1018 data_dict = np.load(file_path) 

1019 elif ext == '.mat': 

1020 from scipy.io import loadmat 

1021 data_dict = loadmat(file_path, squeeze_me=True) 

1022 return extract_container_metadata(data_dict, metadatakey) 

1023 

1024 

1025def extract_container_markers(data_dict, poskey=['positions'], 

1026 spanskey=['spans'], labelskey=['labels'], 

1027 descrkey=['descriptions']): 

1028 """ Extract markers from dictionary loaded from a container file. 

1029 

1030 Parameters 

1031 ---------- 

1032 data_dict: dict 

1033 Dictionary of the data items contained in the container. 

1034 poskey: str or list of str 

1035 Name of the variable holding positions of markers. 

1036 spanskey: str or list of str 

1037 Name of the variable holding spans of markers. 

1038 labelskey: str or list of str 

1039 Name of the variable holding labels of markers. 

1040 descrkey: str or list of str 

1041 Name of the variable holding descriptions of markers. 

1042 

1043 Returns 

1044 ------- 

1045 locs: 2-D array of ints 

1046 Marker positions (first column) and spans (second column) 

1047 for each marker (rows). 

1048 labels: 2-D array of string objects 

1049 Labels (first column) and texts (second column) 

1050 for each marker (rows). 

1051 """ 

1052 if not isinstance(poskey, (list, tuple, np.ndarray)): 

1053 poskey = (poskey,) 

1054 if not isinstance(spanskey, (list, tuple, np.ndarray)): 

1055 spanskey = (spanskey,) 

1056 if not isinstance(labelskey, (list, tuple, np.ndarray)): 

1057 labelskey = (labelskey,) 

1058 if not isinstance(descrkey, (list, tuple, np.ndarray)): 

1059 descrkey = (descrkey,) 

1060 locs = np.zeros((0, 2), dtype=int) 

1061 for pkey in poskey: 

1062 if pkey in data_dict: 

1063 locs = np.zeros((len(data_dict[pkey]), 2), dtype=int) 

1064 locs[:,0] = data_dict[pkey] 

1065 break 

1066 for skey in spanskey: 

1067 if skey in data_dict: 

1068 locs[:,1] = data_dict[skey] 

1069 break 

1070 labels = np.zeros((0, 2), dtype=object) 

1071 for lkey in labelskey: 

1072 if lkey in data_dict: 

1073 labels = np.zeros((len(data_dict[lkey]), 2), dtype=object) 

1074 labels[:,0] = data_dict[lkey] 

1075 break 

1076 for dkey in descrkey: 

1077 if dkey in data_dict: 

1078 labels[:,1] = data_dict[dkey] 

1079 break 

1080 return locs, labels 

1081 

1082 

1083def markers_container(file_path, poskey=['positions'], 

1084 spanskey=['spans'], labelskey=['labels'], 

1085 descrkey=['descriptions']): 

1086 """ Read markers of a container file. 

1087 

1088 Parameters 

1089 ---------- 

1090 file_path: str 

1091 A container file. 

1092 poskey: str or list of str 

1093 Name of the variable holding positions of markers. 

1094 spanskey: str or list of str 

1095 Name of the variable holding spans of markers. 

1096 labelskey: str or list of str 

1097 Name of the variable holding labels of markers. 

1098 descrkey: str or list of str 

1099 Name of the variable holding descriptions of markers. 

1100 

1101 Returns 

1102 ------- 

1103 locs: 2-D array of ints 

1104 Marker positions (first column) and spans (second column) 

1105 for each marker (rows). 

1106 labels: 2-D array of string objects 

1107 Labels (first column) and texts (second column) 

1108 for each marker (rows). 

1109 """ 

1110 data_dict = {} 

1111 ext = os.path.splitext(file_path)[1] 

1112 if ext == '.pkl': 

1113 import pickle 

1114 with open(file_path, 'rb') as f: 

1115 data_dict = pickle.load(f) 

1116 elif ext == '.npz': 

1117 data_dict = np.load(file_path) 

1118 elif ext == '.mat': 

1119 from scipy.io import loadmat 

1120 data_dict = loadmat(file_path, squeeze_me=True) 

1121 return extract_container_markers(data_dict, poskey, spanskey, 

1122 labelskey, descrkey) 

1123 

1124 

1125def check_raw(filepath): 

1126 """Check if file is a raw file. 

1127 

1128 The following extensions are interpreted as raw files: 

1129 

1130 - raw files (*.raw) 

1131 - LabView scandata (*.scandat) 

1132 

1133 Parameters 

1134 ---------- 

1135 filepath: str 

1136 Path of the file to check. 

1137  

1138 Returns 

1139 ------- 

1140 is_raw: bool 

1141 `True`, if `filepath` is a raw format. 

1142 """ 

1143 ext = os.path.splitext(filepath)[1] 

1144 return ext.lower() in ('.raw', '.scandat', '.mat') 

1145 

1146 

1147def load_raw(file_path, rate=44000, channels=1, dtype=np.float32, 

1148 amax=1.0, unit='a.u.'): 

1149 """Load data from a raw file. 

1150 

1151 Raw files just contain the data and absolutely no metadata, not 

1152 even the smapling rate, number of channels, etc. 

1153 Supported file formats are: 

1154 

1155 - raw files (*.raw) 

1156 - LabView scandata (*.scandat) 

1157 

1158 Parameters 

1159 ---------- 

1160 file_path: str 

1161 Path of the file to load. 

1162 rate: float 

1163 Sampling rate of the data in Hertz. 

1164 channels: int 

1165 Number of channels multiplexed in the data. 

1166 dtype: str or numpy.dtype 

1167 The data type stored in the file. 

1168 amax: float 

1169 The amplitude range of the data. 

1170 unit: str 

1171 The unit of the data. 

1172 

1173 Returns 

1174 ------- 

1175 data: 2-D array of floats 

1176 All data traces as an 2-D numpy array, even for single channel data. 

1177 First dimension is time, second is channel. 

1178 rate: float 

1179 Sampling rate of the data in Hz. 

1180 unit: str 

1181 Unit of the data. 

1182 amax: float 

1183 Maximum amplitude of data range. 

1184 

1185 """ 

1186 raw_data = np.fromfile(file_path, dtype=dtype).reshape(-1, channels) 

1187 # recode: 

1188 if dtype == np.dtype('int16'): 

1189 data = raw_data.astype('float32') 

1190 data *= amax/2**15 

1191 elif dtype == np.dtype('int32'): 

1192 data = raw_data.astype(float) 

1193 data *= amax/2**31 

1194 elif dtype == np.dtype('int64'): 

1195 data = raw_data.astype(float) 

1196 data *= amax/2**63 

1197 else: 

1198 data = raw_data 

1199 return data, rate, unit, amax 

1200 

1201 

1202def load_audioio(file_path, verbose=0, gainkey=default_gain_keys, sep='.', 

1203 amax=1.0, unit='a.u.'): 

1204 """Load data from an audio file. 

1205 

1206 See the 

1207 [`load_audio()`](https://bendalab.github.io/audioio/api/audioloader.html#audioio.audioloader.load_audio) 

1208 function of the [`audioio`](https://github.com/bendalab/audioio) 

1209 package for more infos. 

1210 

1211 Parameters 

1212 ---------- 

1213 file_path: str 

1214 Path of the file to load. 

1215 verbose: int 

1216 If > 0 show detailed error/warning messages. 

1217 gainkey: str or list of str 

1218 Key in the file's metadata that holds some gain information. 

1219 If found, the data will be multiplied with the gain, 

1220 and if available, the corresponding unit is returned. 

1221 See the [audioio.get_gain()](https://bendalab.github.io/audioio/api/audiometadata.html#audioio.audiometadata.get_gain) function for details. 

1222 sep: str 

1223 String that separates section names in `gainkey`. 

1224 amax: float 

1225 If specified and no gain has been found in the metadata, 

1226 then use this as the amplitude range. 

1227 unit: str 

1228 If specified and no gain has been found in the metadata, 

1229 then return this as the unit of the data. 

1230 

1231 Returns 

1232 ------- 

1233 data: 2-D array of floats 

1234 All data traces as an 2-D numpy array, even for single channel data. 

1235 First dimension is time, second is channel. 

1236 rate: float 

1237 Sampling rate of the data in Hz. 

1238 unit: str 

1239 Unit of the data if found in the metadata (see `gainkey`), 

1240 otherwise `unit`. 

1241 amax: float 

1242 Maximum amplitude of data range. 

1243 """ 

1244 # get gain: 

1245 md = metadata_audioio(file_path) 

1246 amax, unit = get_gain(md, gainkey, sep, amax, unit) 

1247 # load data: 

1248 data, rate = load_audio(file_path, verbose) 

1249 if amax != 1.0: 

1250 data *= amax 

1251 return data, rate, unit, amax 

1252 

1253 

1254data_loader_funcs = ( 

1255 ('relacs', check_relacs, load_relacs, metadata_relacs, None), 

1256 ('fishgrid', check_fishgrid, load_fishgrid, metadata_fishgrid, markers_fishgrid), 

1257 ('container', check_container, load_container, metadata_container, markers_container), 

1258 ('raw', check_raw, load_raw, None, None), 

1259 ('audioio', None, load_audioio, metadata_audioio, markers_audioio), 

1260 ) 

1261"""List of implemented load functions. 

1262 

1263Each element of the list is a tuple with the data format's name, its 

1264check and its load function. 

1265 

1266""" 

1267 

1268 

1269def load_data(file_path, verbose=0, **kwargs): 

1270 """Load time-series data from a file. 

1271 

1272 Parameters 

1273 ---------- 

1274 file_path: str 

1275 Path and name of the file to load. 

1276 verbose: int 

1277 If > 0 show detailed error/warning messages. 

1278 **kwargs: dict 

1279 Further keyword arguments that are passed on to the  

1280 format specific loading functions. 

1281 For example: 

1282 - `amax`: the amplitude range of the data. 

1283 - 'unit': the unit of the data. 

1284 

1285 Returns 

1286 ------- 

1287 data: 2-D array 

1288 All data traces as an 2-D numpy array, even for single channel data. 

1289 First dimension is time, second is channel. 

1290 rate: float 

1291 Sampling rate of the data in Hz. 

1292 unit: str 

1293 Unit of the data. 

1294 amax: float 

1295 Maximum amplitude of data range. 

1296 

1297 Raises 

1298 ------ 

1299 ValueError: 

1300 `file_path` is empty string. 

1301 """ 

1302 if len(file_path) == 0: 

1303 raise ValueError('input argument file_path is empty string.') 

1304 # load data: 

1305 for name, check_file, load_file, _, _ in data_loader_funcs: 

1306 if check_file is None or check_file(file_path): 

1307 data, rate, unit, amax = load_file(file_path, **kwargs) 

1308 if verbose > 0: 

1309 print(f'loaded {name} data from file "{file_path}"') 

1310 if verbose > 1: 

1311 print(f' sampling rate: {rate:g} Hz') 

1312 print(f' channels : {data.shape[1]}') 

1313 print(f' frames : {len(data)}') 

1314 print(f' range : {amax:g}{unit}') 

1315 return data, rate, unit, amax 

1316 return np.zeros((0, 1)), 0.0, '', 1.0 

1317 

1318 

1319def metadata(file_path, **kwargs): 

1320 """ Read meta-data from a data file. 

1321 

1322 Parameters 

1323 ---------- 

1324 file_path: str 

1325 The full path and name of the file to load. For some file 

1326 formats several files can be provided in a list. 

1327 **kwargs: dict 

1328 Further keyword arguments that are passed on to the  

1329 format specific loading functions. 

1330 

1331 Returns 

1332 ------- 

1333 meta_data: nested dict 

1334 Meta data contained in the file. Keys of the nested 

1335 dictionaries are always strings. If the corresponding 

1336 values are dictionaries, then the key is the section name 

1337 of the metadata contained in the dictionary. All other 

1338 types of values are values for the respective key. In 

1339 particular they are strings, or list of strings. But other 

1340 simple types like ints or floats are also allowed. 

1341 

1342 Raises 

1343 ------ 

1344 ValueError: 

1345 `file_path` is empty string. 

1346 """ 

1347 if len(file_path) == 0: 

1348 raise ValueError('input argument file_path is empty string.') 

1349 # load metadata: 

1350 for _, check_file, _, metadata_file, _ in data_loader_funcs: 

1351 if check_file is None or check_file(file_path): 

1352 if metadata_file is not None: 

1353 return metadata_file(file_path, **kwargs) 

1354 return {} 

1355 

1356 

1357def markers(file_path): 

1358 """ Read markers of a data file. 

1359 

1360 Parameters 

1361 ---------- 

1362 file_path: str or file handle 

1363 The data file. 

1364 

1365 Returns 

1366 ------- 

1367 locs: 2-D array of ints 

1368 Marker positions (first column) and spans (second column) 

1369 for each marker (rows). 

1370 labels: 2-D array of string objects 

1371 Labels (first column) and texts (second column) 

1372 for each marker (rows). 

1373 

1374 Raises 

1375 ------ 

1376 ValueError: 

1377 `file_path` is empty string. 

1378 """ 

1379 if len(file_path) == 0: 

1380 raise ValueError('input argument file_path is empty string.') 

1381 # load markers: 

1382 for _, check_file, _, _, markers_file in data_loader_funcs: 

1383 if check_file is None or check_file(file_path): 

1384 if markers_file is not None: 

1385 return markers_file(file_path) 

1386 return np.zeros((0, 2), dtype=int), np.zeros((0, 2), dtype=object) 

1387 

1388 

1389class DataLoader(AudioLoader): 

1390 """Buffered reading of time-series data for random access of the data in the file. 

1391  

1392 This allows for reading very large data files that do not fit into 

1393 memory. A `DataLoader` instance can be used like a huge 

1394 read-only numpy array, i.e. 

1395 ``` 

1396 data = DataLoader('path/to/data/file.dat') 

1397 x = data[10000:20000,0] 

1398 ``` 

1399 The first index specifies the frame, the second one the channel. 

1400 

1401 `DataLoader` first determines the format of the data file and then 

1402 opens the file (first line). It then reads data from the file as 

1403 necessary for the requested data (second line). 

1404 

1405 Supported file formats are 

1406 

1407 - audio files via `audioio` package 

1408 - python pickle files 

1409 - numpy .npz files 

1410 - matlab .mat files 

1411 - relacs trace*.raw files (www.relacs.net) 

1412 - fishgrid traces-*.raw files 

1413 

1414 Reading sequentially through the file is always possible. If 

1415 previous data are requested, then the file is read from the 

1416 beginning. This might slow down access to previous data 

1417 considerably. Use the `backsize` argument to the open functions to 

1418 make sure some data are loaded before the requested frame. Then a 

1419 subsequent access to the data within `backsize` seconds before that 

1420 frame can still be handled without the need to reread the file 

1421 from the beginning. 

1422 

1423 Usage: 

1424 ------ 

1425 ``` 

1426 import thunderlab.dataloader as dl 

1427 with dl.DataLoader(file_path, 60.0, 10.0) as data: 

1428 # do something with the content of the file: 

1429 x = data[0:10000,0] 

1430 y = data[10000:20000,0] 

1431 z = x + y 

1432 ``` 

1433  

1434 Normal open and close: 

1435 ``` 

1436 data = dl.DataLoader(file_path, 60.0) 

1437 x = data[:,:] # read the whole file 

1438 data.close() 

1439 ```  

1440 that is the same as: 

1441 ``` 

1442 data = dl.DataLoader() 

1443 data.open(file_path, 60.0) 

1444 ``` 

1445  

1446 Parameters 

1447 ---------- 

1448 file_path: str 

1449 Name of the file. 

1450 buffersize: float 

1451 Size of internal buffer in seconds. 

1452 backsize: float 

1453 Part of the buffer to be loaded before the requested start index in seconds. 

1454 verbose: int 

1455 If larger than zero show detailed error/warning messages. 

1456 meta_kwargs: dict 

1457 Keyword arguments that are passed on to the _load_metadata() function. 

1458 

1459 Attributes 

1460 ---------- 

1461 rate: float 

1462 The sampling rate of the data in Hertz. 

1463 channels: int 

1464 The number of channels that are read in. 

1465 frames: int 

1466 The number of frames in the file. 

1467 format: str or None 

1468 Format of the audio file. 

1469 encoding: str or None 

1470 Encoding/subtype of the audio file. 

1471 shape: tuple 

1472 Number of frames and channels of the data. 

1473 ndim: int 

1474 Number of dimensions: always 2 (frames and channels). 

1475 unit: str 

1476 Unit of the data. 

1477 ampl_min: float 

1478 Minimum amplitude the file format supports. 

1479 ampl_max: float 

1480 Maximum amplitude the file format supports. 

1481 

1482 Methods 

1483 ------- 

1484 

1485 - `len()`: the number of frames 

1486 - `open()`: open a data file. 

1487 - `open_*()`: open a data file of a specific format. 

1488 - `close()`: close the file. 

1489 - `metadata()`: metadata of the file. 

1490 - `markers()`: markers of the file. 

1491 - `set_unwrap()`: Set parameters for unwrapping clipped data. 

1492 

1493 """ 

1494 

1495 def __init__(self, file_path=None, buffersize=10.0, backsize=0.0, 

1496 verbose=0, **meta_kwargs): 

1497 super().__init__(None, buffersize, backsize, 

1498 verbose, **meta_kwargs) 

1499 if file_path is not None: 

1500 self.open(file_path, buffersize, backsize, verbose, **meta_kwargs) 

1501 

1502 def __getitem__(self, key): 

1503 return super(DataLoader, self).__getitem__(key) 

1504 

1505 def __next__(self): 

1506 return super(DataLoader, self).__next__() 

1507 

1508 

1509 # relacs interface:  

1510 def open_relacs(self, file_path, buffersize=10.0, backsize=0.0, 

1511 verbose=0, amax=1.0): 

1512 """Open relacs data files (www.relacs.net) for reading. 

1513 

1514 Parameters 

1515 ---------- 

1516 file_path: str 

1517 Path to a relacs data directory or a file therein. 

1518 buffersize: float 

1519 Size of internal buffer in seconds. 

1520 backsize: float 

1521 Part of the buffer to be loaded before the requested start index in seconds. 

1522 verbose: int 

1523 If > 0 show detailed error/warning messages. 

1524 amax: float 

1525 The amplitude range of the data. 

1526 

1527 Raises 

1528 ------ 

1529 ValueError: .gz files not supported. 

1530 """ 

1531 self.verbose = verbose 

1532 

1533 if self.sf is not None: 

1534 self._close_relacs() 

1535 

1536 trace_file_paths = relacs_trace_files(file_path) 

1537 

1538 # open trace files: 

1539 self.sf = [] 

1540 self.frames = None 

1541 self.rate = None 

1542 self.unit = '' 

1543 self.filepath = None 

1544 if len(trace_file_paths) > 0: 

1545 self.filepath = os.path.dirname(trace_file_paths[0]) 

1546 for path in sorted(trace_file_paths): 

1547 if path[-3:] == '.gz': 

1548 raise ValueError('.gz files not supported') 

1549 sf = open(path, 'rb') 

1550 self.sf.append(sf) 

1551 if verbose > 0: 

1552 print(f'open_relacs(file_path) with file_path={path}') 

1553 # file size: 

1554 sf.seek(0, os.SEEK_END) 

1555 frames = sf.tell()//4 

1556 if self.frames is None: 

1557 self.frames = frames 

1558 elif self.frames != frames: 

1559 diff = self.frames - frames 

1560 if diff > 1 or diff < -2: 

1561 raise ValueError('number of frames of traces differ') 

1562 elif diff >= 0: 

1563 self.frames = frames 

1564 sf.seek(0) 

1565 # retrieve sampling rate and unit: 

1566 rate, us = relacs_samplerate_unit(path) 

1567 if self.rate is None: 

1568 self.rate = rate 

1569 elif rate != self.rate: 

1570 raise ValueError('sampling rates of traces differ') 

1571 if len(self.unit) == 0: 

1572 self.unit = us 

1573 elif us != self.unit: 

1574 raise ValueError('unit of traces differ') 

1575 self.channels = len(self.sf) 

1576 self.shape = (self.frames, self.channels) 

1577 self.size = self.frames * self.channels 

1578 self.ndim = len(self.shape) 

1579 self.format = 'RELACS' 

1580 self.encoding = 'FLOAT' 

1581 self.bufferframes = int(buffersize*self.rate) 

1582 self.backframes = int(backsize*self.rate) 

1583 self.init_buffer() 

1584 self.offset = 0 

1585 self.close = self._close_relacs 

1586 self.load_audio_buffer = self._load_buffer_relacs 

1587 self.ampl_min = -amax 

1588 self.ampl_max = +amax 

1589 self._load_metadata = self._metadata_relacs 

1590 # TODO: load markers: 

1591 self._locs = np.zeros((0, 2), dtype=int) 

1592 self._labels = np.zeros((0, 2), dtype=object) 

1593 self._load_markers = None 

1594 return self 

1595 

1596 def _close_relacs(self): 

1597 """Close the relacs data files. 

1598 """ 

1599 if self.sf is not None: 

1600 for file in self.sf: 

1601 file.close() 

1602 self.sf = None 

1603 

1604 def _load_buffer_relacs(self, r_offset, r_size, buffer): 

1605 """Load new data from relacs data file. 

1606 

1607 Parameters 

1608 ---------- 

1609 r_offset: int 

1610 First frame to be read from file. 

1611 r_size: int 

1612 Number of frames to be read from file. 

1613 buffer: ndarray 

1614 Buffer where to store the loaded data. 

1615 """ 

1616 for i, file in enumerate(self.sf): 

1617 file.seek(r_offset*4) 

1618 data = file.read(r_size*4) 

1619 buffer[:, i] = np.frombuffer(data, dtype=np.float32) 

1620 

1621 

1622 def _metadata_relacs(self, store_empty=False, first_only=False): 

1623 """ Load meta-data of a relacs data set. 

1624 """ 

1625 info_path = os.path.join(self.filepath, 'info.dat') 

1626 if not os.path.exists(info_path): 

1627 return {} 

1628 return relacs_header(info_path, store_empty, first_only) 

1629 

1630 

1631 # fishgrid interface:  

1632 def open_fishgrid(self, file_path, buffersize=10.0, backsize=0.0, 

1633 verbose=0): 

1634 """Open fishgrid data files (https://github.com/bendalab/fishgrid) for reading. 

1635 

1636 Parameters 

1637 ---------- 

1638 file_path: str 

1639 Path to a fishgrid data directory, or a file therein. 

1640 buffersize: float 

1641 Size of internal buffer in seconds. 

1642 backsize: float 

1643 Part of the buffer to be loaded before the requested start index in seconds. 

1644 verbose: int 

1645 If > 0 show detailed error/warning messages. 

1646 """ 

1647 self.verbose = verbose 

1648 

1649 if self.sf is not None: 

1650 self._close_fishgrid() 

1651 

1652 trace_file_paths = fishgrid_trace_files(file_path) 

1653 self.filepath = None 

1654 if len(trace_file_paths) > 0: 

1655 self.filepath = os.path.dirname(trace_file_paths[0]) 

1656 self._load_metadata = metadata_fishgrid 

1657 self._load_markers = markers_fishgrid 

1658 

1659 # open grid files: 

1660 grids = fishgrid_grids(self.metadata()) 

1661 grid_sizes = [r*c for r,c in grids] 

1662 self.channels = 0 

1663 for g, path in enumerate(trace_file_paths): 

1664 self.channels += grid_sizes[g] 

1665 self.sf = [] 

1666 self.grid_channels = [] 

1667 self.grid_offs = [] 

1668 offs = 0 

1669 self.frames = None 

1670 self.rate = get_number(self.metadata(), 'Hz', 'AISampleRate') 

1671 v, self.unit = get_number_unit(self.metadata(), 'AIMaxVolt') 

1672 if v is not None: 

1673 self.ampl_min = -v 

1674 self.ampl_max = +v 

1675 

1676 for g, path in enumerate(trace_file_paths): 

1677 sf = open(path, 'rb') 

1678 self.sf.append(sf) 

1679 if verbose > 0: 

1680 print(f'open_fishgrid(file_path) with file_path={path}') 

1681 # grid channels: 

1682 self.grid_channels.append(grid_sizes[g]) 

1683 self.grid_offs.append(offs) 

1684 offs += grid_sizes[g] 

1685 # file size: 

1686 sf.seek(0, os.SEEK_END) 

1687 frames = sf.tell()//4//grid_sizes[g] 

1688 if self.frames is None: 

1689 self.frames = frames 

1690 elif self.frames != frames: 

1691 diff = self.frames - frames 

1692 if diff > 1 or diff < -2: 

1693 raise ValueError('number of frames of traces differ') 

1694 elif diff >= 0: 

1695 self.frames = frames 

1696 sf.seek(0) 

1697 self.shape = (self.frames, self.channels) 

1698 self.size = self.frames * self.channels 

1699 self.ndim = len(self.shape) 

1700 self.format = 'FISHGRID' 

1701 self.encoding = 'FLOAT' 

1702 self.bufferframes = int(buffersize*self.rate) 

1703 self.backframes = int(backsize*self.rate) 

1704 self.init_buffer() 

1705 self.offset = 0 

1706 self.close = self._close_fishgrid 

1707 self.load_audio_buffer = self._load_buffer_fishgrid 

1708 return self 

1709 

1710 def _close_fishgrid(self): 

1711 """Close the fishgrid data files. 

1712 """ 

1713 if self.sf is not None: 

1714 for file in self.sf: 

1715 file.close() 

1716 self.sf = None 

1717 

1718 def _load_buffer_fishgrid(self, r_offset, r_size, buffer): 

1719 """Load new data from relacs data file. 

1720 

1721 Parameters 

1722 ---------- 

1723 r_offset: int 

1724 First frame to be read from file. 

1725 r_size: int 

1726 Number of frames to be read from file. 

1727 buffer: ndarray 

1728 Buffer where to store the loaded data. 

1729 """ 

1730 for file, gchannels, goffset in zip(self.sf, self.grid_channels, self.grid_offs): 

1731 file.seek(r_offset*4*gchannels) 

1732 data = file.read(r_size*4*gchannels) 

1733 buffer[:, goffset:goffset+gchannels] = np.frombuffer(data, dtype=np.float32).reshape((-1, gchannels)) 

1734 

1735 

1736 # container interface: 

1737 def open_container(self, file_path, buffersize=10.0, 

1738 backsize=0.0, verbose=0, datakey=None, 

1739 samplekey=['rate', 'Fs', 'fs'], 

1740 timekey=['time'], amplkey=['amax'], unitkey='unit', 

1741 metadatakey=['metadata', 'info'], 

1742 poskey=['positions'], 

1743 spanskey=['spans'], labelskey=['labels'], 

1744 descrkey=['descriptions'], 

1745 amax=1.0, unit='a.u.'): 

1746 """Open generic container file. 

1747 

1748 Supported file formats are: 

1749 

1750 - python pickle files (.pkl) 

1751 - numpy files (.npz) 

1752 - matlab files (.mat) 

1753 

1754 Parameters 

1755 ---------- 

1756 file_path: str 

1757 Path to a container file. 

1758 buffersize: float 

1759 Size of internal buffer in seconds. 

1760 backsize: float 

1761 Part of the buffer to be loaded before the requested start index in seconds. 

1762 verbose: int 

1763 If > 0 show detailed error/warning messages. 

1764 datakey: None, str, or list of str 

1765 Name of the variable holding the data. If `None` take the 

1766 variable that is an 2D array and has the largest number of 

1767 elements. 

1768 samplekey: str or list of str 

1769 Name of the variable holding the sampling rate. 

1770 timekey: str or list of str 

1771 Name of the variable holding sampling times. 

1772 If no sampling rate is available, the sampling rate is retrieved 

1773 from the sampling times. 

1774 amplkey: str or list of str 

1775 Name of the variable holding the amplitude range of the data. 

1776 unitkey: str 

1777 Name of the variable holding the unit of the data. 

1778 metadatakey: str or list of str 

1779 Name of the variable holding the metadata. 

1780 poskey: str or list of str 

1781 Name of the variable holding positions of markers. 

1782 spanskey: str or list of str 

1783 Name of the variable holding spans of markers. 

1784 labelskey: str or list of str 

1785 Name of the variable holding labels of markers. 

1786 descrkey: str or list of str 

1787 Name of the variable holding descriptions of markers. 

1788 amax: None or float 

1789 If specified and no amplitude range has been found in the data 

1790 container, then this is the amplitude range of the data. 

1791 unit: None or str 

1792 If specified and no unit has been found in the data container, 

1793 then return this as the unit of the data. 

1794 

1795 Raises 

1796 ------ 

1797 ValueError: 

1798 Invalid key requested. 

1799 """ 

1800 self.verbose = verbose 

1801 data_dict = {} 

1802 ext = os.path.splitext(file_path)[1] 

1803 if ext == '.pkl': 

1804 import pickle 

1805 with open(file_path, 'rb') as f: 

1806 data_dict = pickle.load(f) 

1807 self.format = 'PKL' 

1808 elif ext == '.npz': 

1809 data_dict = np.load(file_path) 

1810 self.format = 'NPZ' 

1811 elif ext == '.mat': 

1812 from scipy.io import loadmat 

1813 data_dict = loadmat(file_path, squeeze_me=True) 

1814 self.format = 'MAT' 

1815 self.buffer, self.rate, self.unit, amax = \ 

1816 extract_container_data(data_dict, datakey, samplekey, 

1817 timekey, amplkey, unitkey, amax, unit) 

1818 self.filepath = file_path 

1819 self.channels = self.buffer.shape[1] 

1820 self.frames = self.buffer.shape[0] 

1821 self.shape = self.buffer.shape 

1822 self.ndim = self.buffer.ndim 

1823 self.size = self.buffer.size 

1824 self.encoding = self.numpy_encodings[self.buffer.dtype] 

1825 self.ampl_min = -amax 

1826 self.ampl_max = +amax 

1827 self.offset = 0 

1828 self.buffer_changed = np.zeros(self.channels, dtype=bool) 

1829 self.bufferframes = self.frames 

1830 self.backsize = 0 

1831 self.close = self._close_container 

1832 self.load_audio_buffer = self._load_buffer_container 

1833 self._metadata = extract_container_metadata(data_dict, metadatakey) 

1834 self._load_metadata = None 

1835 self._locs, self._labels = extract_container_markers(data_dict, 

1836 poskey, 

1837 spanskey, 

1838 labelskey, 

1839 descrkey) 

1840 self._load_markers = None 

1841 

1842 def _close_container(self): 

1843 """Close container. """ 

1844 pass 

1845 

1846 def _load_buffer_container(self, r_offset, r_size, buffer): 

1847 """Load new data from container.""" 

1848 buffer[:, :] = self.buffer[r_offset:r_offset + r_size, :] 

1849 

1850 

1851 # raw data interface: 

1852 def open_raw(self, file_path, buffersize=10.0, backsize=0.0, 

1853 verbose=0, rate=44000, channels=1, dtype=np.float32, 

1854 amax=1.0, unit='a.u.'): 

1855 """Load data from a raw file. 

1856 

1857 Raw files just contain the data and absolutely no metadata, not 

1858 even the smapling rate, number of channels, etc. 

1859 Supported file formats are: 

1860 

1861 - raw files (*.raw) 

1862 - LabView scandata (*.scandat) 

1863 

1864 Parameters 

1865 ---------- 

1866 file_path: str 

1867 Path of the file to load. 

1868 buffersize: float 

1869 Size of internal buffer in seconds. 

1870 backsize: float 

1871 Part of the buffer to be loaded before the requested start index in seconds. 

1872 verbose: int 

1873 If > 0 show detailed error/warning messages. 

1874 rate: float 

1875 Sampling rate of the data in Hertz. 

1876 channels: int 

1877 Number of channels multiplexed in the data. 

1878 dtype: str or numpy.dtype 

1879 The data type stored in the file. 

1880 amax: float 

1881 The amplitude range of the data. 

1882 unit: str 

1883 The unit of the data. 

1884 """ 

1885 self.verbose = verbose 

1886 self.filepath = file_path 

1887 self.sf = open(file_path, 'rb') 

1888 if verbose > 0: 

1889 print(f'open_raw(file_path) with file_path={file_path}') 

1890 self.dtype = np.dtype(dtype) 

1891 self.rate = float(rate) 

1892 # file size: 

1893 self.sf.seek(0, os.SEEK_END) 

1894 self.frames = self.sf.tell()//self.dtype.itemsize 

1895 self.sf.seek(0) 

1896 self.channels = int(channels) 

1897 self.shape = (self.frames, self.channels) 

1898 self.ndim = len(self.shape) 

1899 self.size = self.frames*self.channels 

1900 self.format = 'RAW' 

1901 self.encoding = self.numpy_encodings.get(self.dtype, 'UNKNOWN') 

1902 self.unit = unit 

1903 self.ampl_max = float(amax) 

1904 self.ampl_min = -self.ampl_max 

1905 self.offset = 0 

1906 self.bufferframes = int(buffersize*self.rate) 

1907 self.backframes = int(backsize*self.rate) 

1908 self.init_buffer() 

1909 self.close = self._close_raw 

1910 self.load_audio_buffer = self._load_buffer_raw 

1911 self._metadata = None 

1912 self._load_metadata = None 

1913 self._locs = None 

1914 self._labels = None 

1915 self._load_markers = None 

1916 

1917 def _close_raw(self): 

1918 """Close raw file. """ 

1919 self.sf.close() 

1920 self.sf = None 

1921 

1922 def _load_buffer_raw(self, r_offset, r_size, buffer): 

1923 """Load new data from container.""" 

1924 self.sf.seek(r_offset*self.dtype.itemsize) 

1925 raw_data = self.sf.read(r_size*self.dtype.itemsize) 

1926 raw_data = np.frombuffer(raw_data, dtype=self.dtype) 

1927 raw_data = raw_data.reshape(-1, self.channels) 

1928 # recode: 

1929 if self.dtype == np.dtype('int16'): 

1930 data = raw_data.astype('float32') 

1931 data *= amax/2**15 

1932 elif self.dtype == np.dtype('int32'): 

1933 data = raw_data.astype(float) 

1934 data *= amax/2**31 

1935 elif self.dtype == np.dtype('int64'): 

1936 data = raw_data.astype(float) 

1937 data *= amax/2**63 

1938 else: 

1939 data = raw_data 

1940 buffer[:, :] = data 

1941 

1942 

1943 # audioio interface:  

1944 def open_audioio(self, file_path, buffersize=10.0, backsize=0.0, 

1945 verbose=0, gainkey=default_gain_keys, sep='.', 

1946 amax=None, unit='a.u.'): 

1947 """Open an audio file. 

1948 

1949 See the [audioio](https://github.com/bendalab/audioio) package 

1950 for details. 

1951 

1952 Parameters 

1953 ---------- 

1954 file_path: str 

1955 Path to an audio file. 

1956 buffersize: float 

1957 Size of internal buffer in seconds. 

1958 backsize: float 

1959 Part of the buffer to be loaded before the requested start index 

1960 in seconds. 

1961 verbose: int 

1962 If > 0 show detailed error/warning messages. 

1963 gainkey: str or list of str 

1964 Key in the file's metadata that holds some gain information. 

1965 If found, the data will be multiplied with the gain, 

1966 and if available, the corresponding unit is returned. 

1967 See the [audioio.get_gain()](https://bendalab.github.io/audioio/api/audiometadata.html#audioio.audiometadata.get_gain) function for details. 

1968 sep: str 

1969 String that separates section names in `gainkey`. 

1970 amax: None or float 

1971 If specified and no gain has been found in the metadata, 

1972 then use this as the amplitude range. 

1973 unit: None or str 

1974 If specified and no gain has been found in the metadata, 

1975 then this is the unit of the data. 

1976 

1977 """ 

1978 self.verbose = verbose 

1979 super(DataLoader, self).open(file_path, buffersize, backsize, verbose) 

1980 md = self.metadata() 

1981 fac, unit = get_gain(md, gainkey, sep, amax, unit) 

1982 if fac is None: 

1983 self.gain_fac = 1.0 

1984 else: 

1985 self.gain_fac = fac 

1986 self._load_buffer_audio_org = self.load_audio_buffer 

1987 self.load_audio_buffer = self._load_buffer_audioio 

1988 self.ampl_min *= self.gain_fac 

1989 self.ampl_max *= self.gain_fac 

1990 self.unit = unit 

1991 return self 

1992 

1993 def _load_buffer_audioio(self, r_offset, r_size, buffer): 

1994 """Load and scale new data from an audio file. 

1995 

1996 Parameters 

1997 ---------- 

1998 r_offset: int 

1999 First frame to be read from file. 

2000 r_size: int 

2001 Number of frames to be read from file. 

2002 buffer: ndarray 

2003 Buffer where to store the loaded data. 

2004 """ 

2005 self._load_buffer_audio_org(r_offset, r_size, buffer) 

2006 buffer *= self.gain_fac 

2007 

2008 

2009 def open(self, file_path, buffersize=10.0, backsize=0.0, 

2010 verbose=0, **kwargs): 

2011 """Open file with time-series data for reading. 

2012 

2013 Parameters 

2014 ---------- 

2015 file_path: str or list of str 

2016 Path to a data files or directory. 

2017 buffersize: float 

2018 Size of internal buffer in seconds. 

2019 backsize: float 

2020 Part of the buffer to be loaded before the requested start index 

2021 in seconds. 

2022 verbose: int 

2023 If > 0 show detailed error/warning messages. 

2024 **kwargs: dict 

2025 Further keyword arguments that are passed on to the  

2026 format specific opening functions. 

2027 For example: 

2028 - `amax`: the amplitude range of the data. 

2029 - 'unit': the unit of the data. 

2030 

2031 Raises 

2032 ------ 

2033 ValueError: 

2034 `file_path` is empty string. 

2035 """ 

2036 # list of implemented open functions: 

2037 data_open_funcs = ( 

2038 ('relacs', check_relacs, self.open_relacs, 1), 

2039 ('fishgrid', check_fishgrid, self.open_fishgrid, 1), 

2040 ('container', check_container, self.open_container, 1), 

2041 ('raw', check_raw, self.open_raw, 1), 

2042 ('audioio', None, self.open_audioio, 0), 

2043 ) 

2044 if len(file_path) == 0: 

2045 raise ValueError('input argument file_path is empty string.') 

2046 # open data: 

2047 for name, check_file, open_file, v in data_open_funcs: 

2048 if check_file is None or check_file(file_path): 

2049 open_file(file_path, buffersize, backsize, verbose, **kwargs) 

2050 if v*verbose > 1: 

2051 if self.format is not None: 

2052 print(f' format : {self.format}') 

2053 if self.encoding is not None: 

2054 print(f' encoding : {self.encoding}') 

2055 print(f' sampling rate: {self.rate} Hz') 

2056 print(f' channels : {self.channels}') 

2057 print(f' frames : {self.frames}') 

2058 print(f' range : {amax:g}{unit}') 

2059 break 

2060 return self 

2061 

2062 

2063def demo(file_path, plot=False): 

2064 print("try load_data:") 

2065 data, rate, unit, amax = load_data(file_path, verbose=2) 

2066 if plot: 

2067 fig, ax = plt.subplots() 

2068 time = np.arange(len(data))/rate 

2069 for c in range(data.shape[1]): 

2070 ax.plot(time, data[:,c]) 

2071 ax.set_xlabel('Time [s]') 

2072 ax.set_ylabel(f'[{unit}]') 

2073 if amax is not None and np.isfinite(amax): 

2074 ax.set_ylim(-amax, +amax) 

2075 plt.show() 

2076 return 

2077 

2078 print('') 

2079 print("try DataLoader:") 

2080 with DataLoader(file_path, 2.0, 1.0, 1) as data: 

2081 print('sampling rate: %g' % data.rate) 

2082 print('frames : %d %d' % (len(data), data.shape[0])) 

2083 nframes = int(1.0 * data.rate) 

2084 # forward: 

2085 for i in range(0, len(data), nframes): 

2086 print('forward %d-%d' % (i, i + nframes)) 

2087 x = data[i:i + nframes, 0] 

2088 if plot: 

2089 fig, ax = plt.subplots() 

2090 ax.plot((i + np.arange(len(x)))/data.rate, x) 

2091 ax.set_xlabel('Time [s]') 

2092 ax.set_ylabel(f'[{data.unit}]') 

2093 plt.show() 

2094 # and backwards: 

2095 for i in reversed(range(0, len(data), nframes)): 

2096 print('backward %d-%d' % (i, i + nframes)) 

2097 x = data[i:i + nframes, 0] 

2098 if plot: 

2099 fig, ax = plt.subplots() 

2100 ax.plot((i + np.arange(len(x)))/data.rate, x) 

2101 ax.set_xlabel('Time [s]') 

2102 ax.set_ylabel(f'[{data.unit}]') 

2103 plt.show() 

2104 

2105 

2106def main(*cargs): 

2107 """Call demo with command line arguments. 

2108 

2109 Parameters 

2110 ---------- 

2111 cargs: list of str 

2112 Command line arguments as provided by sys.argv[1:] 

2113 """ 

2114 import argparse 

2115 parser = argparse.ArgumentParser(description= 

2116 'Checking thunderlab.dataloader module.') 

2117 parser.add_argument('-p', dest='plot', action='store_true', 

2118 help='plot loaded data') 

2119 parser.add_argument('file', nargs=1, default='', type=str, 

2120 help='name of data file') 

2121 args = parser.parse_args(cargs) 

2122 demo(args.file[0], args.plot) 

2123 

2124 

2125if __name__ == "__main__": 

2126 main(*sys.argv[1:])