Coverage for src/audioio/audioloader.py: 90%

825 statements  

« prev     ^ index     » next       coverage.py v7.10.1, created at 2025-08-02 12:23 +0000

1"""Loading data, metadata, and markers from audio files. 

2 

3- `load_audio()`: load a whole audio file at once. 

4- `metadata()`: read metadata of an audio file. 

5- `markers()`: read markers of an audio file. 

6- class `AudioLoader`: read data from audio files in chunks. 

7 

8The read in data are always numpy arrays of floats ranging between -1 and 1. 

9The arrays are 2-D ndarrays with first axis time and second axis channel, 

10even for single channel data. 

11 

12If an audio file cannot be loaded, you might need to install 

13additional packages. See 

14[installation](https://bendalab.github.io/audioio/installation) for 

15further instructions. 

16 

17For a demo run the module as: 

18``` 

19python -m src.audioio.audioloader audiofile.wav 

20``` 

21""" 

22 

23import gc 

24import sys 

25import warnings 

26import os.path 

27import numpy as np 

28from pathlib import Path 

29from datetime import timedelta 

30from .audiomodules import * 

31from .bufferedarray import BufferedArray 

32from .riffmetadata import metadata_riff, markers_riff 

33from .audiometadata import update_gain, add_unwrap, get_datetime 

34from .audiometadata import flatten_metadata, add_metadata, set_starttime 

35from .audiotools import unwrap 

36 

37 

38def load_wave(filepath): 

39 """Load wav file using the wave module from pythons standard libray. 

40  

41 Documentation 

42 ------------- 

43 https://docs.python.org/3.8/library/wave.html 

44 

45 Parameters 

46 ---------- 

47 filepath: str 

48 The full path and name of the file to load. 

49 

50 Returns 

51 ------- 

52 data: ndarray 

53 All data traces as an 2-D ndarray, first dimension is time, second is channel 

54 rate: float 

55 The sampling rate of the data in Hertz. 

56 

57 Raises 

58 ------ 

59 ImportError 

60 The wave module is not installed 

61 * 

62 Loading of the data failed 

63 """ 

64 if not audio_modules['wave']: 

65 raise ImportError 

66 

67 wf = wave.open(filepath, 'r') # 'with' is not supported by wave 

68 (nchannels, sampwidth, rate, nframes, comptype, compname) = wf.getparams() 

69 buffer = wf.readframes(nframes) 

70 factor = 2.0**(sampwidth*8-1) 

71 if sampwidth == 1: 

72 dtype = 'u1' 

73 buffer = np.frombuffer(buffer, dtype=dtype).reshape(-1, nchannels) 

74 data = buffer.astype('d')/factor - 1.0 

75 else: 

76 dtype = f'i{sampwidth}' 

77 buffer = np.frombuffer(buffer, dtype=dtype).reshape(-1, nchannels) 

78 data = buffer.astype('d')/factor 

79 wf.close() 

80 return data, float(rate) 

81 

82 

83def load_ewave(filepath): 

84 """Load wav file using ewave module. 

85 

86 Documentation 

87 ------------- 

88 https://github.com/melizalab/py-ewave 

89 

90 Parameters 

91 ---------- 

92 filepath: str 

93 The full path and name of the file to load. 

94 

95 Returns 

96 ------- 

97 data: ndarray 

98 All data traces as an 2-D ndarray, first dimension is time, second is channel. 

99 rate: float 

100 The sampling rate of the data in Hertz. 

101 

102 Raises 

103 ------ 

104 ImportError 

105 The ewave module is not installed 

106 * 

107 Loading of the data failed 

108 """ 

109 if not audio_modules['ewave']: 

110 raise ImportError 

111 

112 data = np.array([]) 

113 rate = 0.0 

114 with ewave.open(filepath, 'r') as wf: 

115 rate = wf.sampling_rate 

116 buffer = wf.read() 

117 data = ewave.rescale(buffer, 'float') 

118 if len(data.shape) == 1: 

119 data = np.reshape(data,(-1, 1)) 

120 return data, float(rate) 

121 

122 

123def load_wavfile(filepath): 

124 """Load wav file using scipy.io.wavfile. 

125 

126 Documentation 

127 ------------- 

128 http://docs.scipy.org/doc/scipy/reference/io.html 

129 Does not support blocked read. 

130  

131 Parameters 

132 ---------- 

133 filepath: str 

134 The full path and name of the file to load. 

135 

136 Returns 

137 ------- 

138 data: ndarray 

139 All data traces as an 2-D ndarray, first dimension is time, second is channel. 

140 rate: float 

141 The sampling rate of the data in Hertz. 

142 

143 Raises 

144 ------ 

145 ImportError 

146 The scipy.io module is not installed 

147 * 

148 Loading of the data failed 

149 """ 

150 if not audio_modules['scipy.io.wavfile']: 

151 raise ImportError 

152 

153 warnings.filterwarnings("ignore") 

154 rate, data = wavfile.read(filepath) 

155 warnings.filterwarnings("always") 

156 if data.dtype == np.uint8: 

157 data = data / 128.0 - 1.0 

158 elif np.issubdtype(data.dtype, np.signedinteger): 

159 data = data / (2.0**(data.dtype.itemsize*8-1)) 

160 else: 

161 data = data.astype(np.float64, copy=False) 

162 if len(data.shape) == 1: 

163 data = np.reshape(data,(-1, 1)) 

164 return data, float(rate) 

165 

166 

167def load_soundfile(filepath): 

168 """Load audio file using SoundFile (based on libsndfile). 

169 

170 Documentation 

171 ------------- 

172 http://pysoundfile.readthedocs.org 

173 http://www.mega-nerd.com/libsndfile 

174 

175 Parameters 

176 ---------- 

177 filepath: str 

178 The full path and name of the file to load. 

179 

180 Returns 

181 ------- 

182 data: ndarray 

183 All data traces as an 2-D ndarray, first dimension is time, second is channel. 

184 rate: float 

185 The sampling rate of the data in Hertz. 

186 

187 Raises 

188 ------ 

189 ImportError 

190 The soundfile module is not installed. 

191 * 

192 Loading of the data failed. 

193 """ 

194 if not audio_modules['soundfile']: 

195 raise ImportError 

196 

197 data = np.array([]) 

198 rate = 0.0 

199 with soundfile.SoundFile(filepath, 'r') as sf: 

200 rate = sf.samplerate 

201 data = sf.read(frames=-1, dtype='float64', always_2d=True) 

202 return data, float(rate) 

203 

204 

205def load_wavefile(filepath): 

206 """Load audio file using wavefile (based on libsndfile). 

207 

208 Documentation 

209 ------------- 

210 https://github.com/vokimon/python-wavefile 

211 

212 Parameters 

213 ---------- 

214 filepath: str 

215 The full path and name of the file to load. 

216 

217 Returns 

218 ------- 

219 data: ndarray 

220 All data traces as an 2-D ndarray, first dimension is time, second is channel. 

221 rate: float 

222 The sampling rate of the data in Hertz. 

223 

224 Raises 

225 ------ 

226 ImportError 

227 The wavefile module is not installed. 

228 * 

229 Loading of the data failed. 

230 """ 

231 if not audio_modules['wavefile']: 

232 raise ImportError 

233 

234 rate, data = wavefile.load(filepath) 

235 return data.astype(np.float64, copy=False).T, float(rate) 

236 

237 

238def load_audioread(filepath): 

239 """Load audio file using audioread. 

240 

241 Documentation 

242 ------------- 

243 https://github.com/beetbox/audioread 

244 

245 Parameters 

246 ---------- 

247 filepath: str 

248 The full path and name of the file to load. 

249 

250 Returns 

251 ------- 

252 data: ndarray 

253 All data traces as an 2-D ndarray, first dimension is time, second is channel. 

254 rate: float 

255 The sampling rate of the data in Hertz. 

256 

257 Raises 

258 ------ 

259 ImportError 

260 The audioread module is not installed. 

261 * 

262 Loading of the data failed. 

263 """ 

264 if not audio_modules['audioread']: 

265 raise ImportError 

266 

267 data = np.array([]) 

268 rate = 0.0 

269 with audioread.audio_open(filepath) as af: 

270 rate = af.samplerate 

271 data = np.zeros((int(np.ceil(af.samplerate*af.duration)), af.channels), 

272 dtype="<i2") 

273 index = 0 

274 for buffer in af: 

275 fulldata = np.frombuffer(buffer, dtype='<i2').reshape(-1, af.channels) 

276 n = fulldata.shape[0] 

277 if index + n > len(data): 

278 n = len(fulldata) - index 

279 if n <= 0: 

280 break 

281 data[index:index+n,:] = fulldata[:n,:] 

282 index += n 

283 return data/(2.0**15-1.0), float(rate) 

284 

285 

286audio_loader_funcs = ( 

287 ('soundfile', load_soundfile), 

288 ('wave', load_wave), 

289 ('wavefile', load_wavefile), 

290 ('ewave', load_ewave), 

291 ('scipy.io.wavfile', load_wavfile), 

292 ('audioread', load_audioread), 

293 ) 

294"""List of implemented load() functions. 

295 

296Each element of the list is a tuple with the module's name and its 

297load() function. 

298 

299""" 

300 

301 

302def load_audio(filepath, verbose=0): 

303 """Call this function to load all channels of audio data from a file. 

304  

305 This function tries different python modules to load the audio file. 

306 

307 Parameters 

308 ---------- 

309 filepath: str 

310 The full path and name of the file to load. 

311 verbose: int 

312 If larger than zero show detailed error/warning messages. 

313 

314 Returns 

315 ------- 

316 data: ndarray 

317 All data traces as an 2-D ndarray, even for single channel data. 

318 First dimension is time, second is channel. 

319 Data values range maximally between -1 and 1. 

320 rate: float 

321 The sampling rate of the data in Hertz. 

322 

323 Raises 

324 ------ 

325 ValueError 

326 Empty `filepath`. 

327 FileNotFoundError 

328 `filepath` is not an existing file. 

329 EOFError 

330 File size of `filepath` is zero. 

331 IOError 

332 Failed to load data. 

333 

334 Examples 

335 -------- 

336 ``` 

337 import matplotlib.pyplot as plt 

338 from audioio import load_audio 

339  

340 data, rate = load_audio('some/audio.wav') 

341 plt.plot(np.arange(len(data))/rate, data[:,0]) 

342 plt.show() 

343 ``` 

344 """ 

345 # check values: 

346 if filepath is None or len(filepath) == 0: 

347 raise ValueError('input argument filepath is empty string!') 

348 if not os.path.isfile(filepath): 

349 raise FileNotFoundError(f'file "{filepath}" not found') 

350 if os.path.getsize(filepath) <= 0: 

351 raise EOFError(f'file "{filepath}" is empty (size=0)!') 

352 

353 # load an audio file by trying various modules: 

354 not_installed = [] 

355 errors = [f'failed to load data from file "{filepath}":'] 

356 for lib, load_file in audio_loader_funcs: 

357 if not audio_modules[lib]: 

358 if verbose > 1: 

359 print(f'unable to load data from file "{filepath}" using {lib} module: module not available') 

360 not_installed.append(lib) 

361 continue 

362 try: 

363 data, rate = load_file(filepath) 

364 if len(data) > 0: 

365 if verbose > 0: 

366 print(f'loaded data from file "{filepath}" using {lib} module') 

367 if verbose > 1: 

368 print(f' sampling rate: {rate:g} Hz') 

369 print(f' channels : {data.shape[1]}') 

370 print(f' frames : {len(data)}') 

371 return data, rate 

372 except Exception as e: 

373 errors.append(f' {lib} failed: {str(e)}') 

374 if verbose > 1: 

375 print(errors[-1]) 

376 if len(not_installed) > 0: 

377 errors.append('\n You may need to install one of the ' + \ 

378 ', '.join(not_installed) + ' packages.') 

379 raise IOError('\n'.join(errors)) 

380 return np.zeros(0), 0.0 

381 

382 

383def metadata(filepath, store_empty=False): 

384 """Read metadata of an audio file. 

385 

386 Parameters 

387 ---------- 

388 filepath: str or file handle 

389 The audio file from which to read metadata. 

390 store_empty: bool 

391 If `False` do not return meta data with empty values. 

392 

393 Returns 

394 ------- 

395 meta_data: nested dict 

396 Meta data contained in the audio file. Keys of the nested 

397 dictionaries are always strings. If the corresponding values 

398 are dictionaries, then the key is the section name of the 

399 metadata contained in the dictionary. All other types of 

400 values are values for the respective key. In particular they 

401 are strings. But other types like for example ints or floats 

402 are also allowed. See `audioio.audiometadata` module for 

403 available functions to work with such metadata. 

404 

405 Examples 

406 -------- 

407 ``` 

408 from audioio import metadata, print_metadata 

409 md = metadata('data.wav') 

410 print_metadata(md) 

411 ``` 

412 

413 """ 

414 try: 

415 return metadata_riff(filepath, store_empty) 

416 except ValueError: # not a RIFF file 

417 return {} 

418 

419 

420def markers(filepath): 

421 """ Read markers of an audio file. 

422 

423 See `audioio.audiomarkers` module for available functions 

424 to work with markers. 

425 

426 Parameters 

427 ---------- 

428 filepath: str or file handle 

429 The audio file. 

430 

431 Returns 

432 ------- 

433 locs: 2-D ndarray of int 

434 Marker positions (first column) and spans (second column) 

435 for each marker (rows). 

436 labels: 2-D ndarray of string objects 

437 Labels (first column) and texts (second column) 

438 for each marker (rows). 

439 

440 Examples 

441 -------- 

442 ``` 

443 from audioio import markers, print_markers 

444 locs, labels = markers('data.wav') 

445 print_markers(locs, labels) 

446 ``` 

447 """ 

448 try: 

449 return markers_riff(filepath) 

450 except ValueError: # not a RIFF file 

451 return np.zeros((0, 2), dtype=int), np.zeros((0, 2), dtype=object) 

452 

453 

454class AudioLoader(BufferedArray): 

455 """Buffered reading of audio data for random access of the data in the file. 

456  

457 The class allows for reading very large audio files or many 

458 sequential audio files that do not fit into memory. 

459 An AudioLoader instance can be used like a huge read-only numpy array, i.e. 

460 ``` 

461 data = AudioLoader('path/to/audio/file.wav') 

462 x = data[10000:20000,0] 

463 ``` 

464 The first index specifies the frame, the second one the channel. 

465 

466 Behind the scenes, `AudioLoader` tries to open the audio file with 

467 all available audio modules until it succeeds (first line). It 

468 then reads data from the file as necessary for the requested data 

469 (second line). Accesing the content of the audio files via a 

470 buffer that holds only a part of the data is managed by the 

471 `BufferedArray` class. 

472 

473 Reading sequentially through the file is always possible. Some 

474 modules, however, (e.g. audioread, needed for mp3 files) can only 

475 read forward. If previous data are requested, then the file is read 

476 from the beginning again. This slows down access to previous data 

477 considerably. Use the `backsize` argument of the open function to 

478 make sure some data are loaded into the buffer before the requested 

479 frame. Then a subsequent access to the data within `backsize` seconds 

480 before that frame can still be handled without the need to reread 

481 the file from the beginning. 

482 

483 Usage 

484 ----- 

485 With context management: 

486 ``` 

487 import audioio as aio 

488 with aio.AudioLoader(filepath, 60.0, 10.0) as data: 

489 # do something with the content of the file: 

490 x = data[0:10000] 

491 y = data[10000:20000] 

492 z = x + y 

493 ``` 

494 

495 For using a specific audio module, here the audioread module: 

496 ``` 

497 data = aio.AudioLoader() 

498 with data.open_audioread(filepath, 60.0, 10.0): 

499 # do something ... 

500 ``` 

501 

502 Use `blocks()` for sequential, blockwise reading and processing: 

503 ``` 

504 from scipy.signal import spectrogram 

505 nfft = 2048 

506 with aio.AudioLoader('some/audio.wav') as data: 

507 for x in data.blocks(100*nfft, nfft//2): 

508 f, t, Sxx = spectrogram(x, fs=data.rate, 

509 nperseg=nfft, noverlap=nfft//2) 

510 ``` 

511 

512 For loop iterates over single frames (1-D arrays containing samples for each channel): 

513 ``` 

514 with aio.AudioLoader('some/audio.wav') as data: 

515 for x in data: 

516 print(x) 

517 ``` 

518  

519 Traditional open and close: 

520 ``` 

521 data = aio.AudioLoader(filepath, 60.0) 

522 x = data[:,:] # read the whole file 

523 data.close() 

524 ``` 

525  

526 this is the same as: 

527 ``` 

528 data = aio.AudioLoader() 

529 data.open(filepath, 60.0) 

530 ... 

531 ``` 

532 

533 Classes inheriting AudioLoader just need to implement 

534 ``` 

535 self.load_audio_buffer(offset, nsamples, pbuffer) 

536 ``` 

537 This function needs to load the supplied `pbuffer` with 

538 `nframes` frames of data starting at frame `offset`. 

539 

540 In the constructor or some kind of opening function, you need to 

541 set some member variables, as described for `BufferedArray`. 

542 

543 For loading metadata and markers, implement the functions 

544 ``` 

545 self._load_metadata(filepath, **kwargs) 

546 self._load_markers(filepath) 

547 ``` 

548  

549 Parameters 

550 ---------- 

551 filepath: str or list of str 

552 Name of the file or list of many file names that should be 

553 made accessible as a single array. 

554 buffersize: float 

555 Size of internal buffer in seconds. 

556 backsize: float 

557 Part of the buffer to be loaded before the requested start index in seconds. 

558 verbose: int 

559 If larger than zero show detailed error/warning messages. 

560 store_empty: bool 

561 If `False` do not return meta data with empty values. 

562 

563 Attributes 

564 ---------- 

565 filepath: str 

566 Name and path of the opened file. In case of many files, the first one. 

567 file_paths: list of str 

568 List of pathes of the opened files that are made accessible 

569 as a single array. 

570 file_indices: list of int 

571 For each file the index of its first sample. 

572 rate: float 

573 The sampling rate of the data in seconds. 

574 channels: int 

575 The number of channels. 

576 frames: int 

577 The number of frames in the file. Same as `len()`. 

578 format: str or None 

579 Format of the audio file. 

580 encoding: str or None 

581 Encoding/subtype of the audio file. 

582 shape: tuple 

583 Frames and channels of the data. 

584 ndim: int 

585 Number of dimensions: always 2 (frames and channels). 

586 offset: int 

587 Index of first frame in the current buffer. 

588 buffer: ndarray of floats 

589 The curently available data from the file. 

590 ampl_min: float 

591 Minimum amplitude the file format supports. 

592 Always -1.0 for audio data. 

593 ampl_max: float 

594 Maximum amplitude the file format supports. 

595 Always +1.0 for audio data. 

596 

597 Methods 

598 ------- 

599 - `len()`: Number of frames. 

600 - `file_start_times()`: time of first frame of each file in seconds. 

601 - `get_file_index()`: file path and index of frame contained by this file. 

602 - `open()`: Open an audio file by trying available audio modules. 

603 - `open_*()`: Open an audio file with the respective audio module. 

604 - `__getitem__`: Access data of the audio file. 

605 - `update_buffer()`: Update the internal buffer for a range of frames. 

606 - `blocks()`: Generator for blockwise processing of AudioLoader data. 

607 - `basename()`: Base name of the audio data. 

608 - `format_dict()`: technical infos about how the data are stored. 

609 - `metadata()`: Metadata stored along with the audio data. 

610 - `markers()`: Markers stored along with the audio data. 

611 - `set_unwrap()`: Set parameters for unwrapping clipped data. 

612 - `close()`: Close the file. 

613 

614 """ 

615 

616 max_open_files = 5 

617 """ Suggestion for maximum number of open file descriptors. """ 

618 

619 max_open_loaders = 10 

620 """ Suggestion for maximum number of AudioLoaders when opening multiple files. """ 

621 

622 def __init__(self, filepath=None, buffersize=10.0, backsize=0.0, 

623 verbose=0, **meta_kwargs): 

624 super().__init__(verbose=verbose) 

625 self.format = None 

626 self.encoding = None 

627 self._metadata = None 

628 self._locs = None 

629 self._labels = None 

630 self._load_metadata = metadata 

631 self._load_markers = markers 

632 self._metadata_kwargs = meta_kwargs 

633 self.filepath = None 

634 self.file_paths = None 

635 self.file_indices = [] 

636 self.sf = None 

637 self.close = self._close 

638 self.load_buffer = self._load_buffer_unwrap 

639 self.ampl_min = -1.0 

640 self.ampl_max = +1.0 

641 self.unwrap = False 

642 self.unwrap_thresh = 0.0 

643 self.unwrap_clips = False 

644 self.unwrap_ampl = 1.0 

645 self.unwrap_downscale = True 

646 if filepath is not None: 

647 self.open(filepath, buffersize, backsize, verbose) 

648 

649 numpy_encodings = {np.dtype(np.int64): 'PCM_64', 

650 np.dtype(np.int32): 'PCM_32', 

651 np.dtype(np.int16): 'PCM_16', 

652 np.dtype(np.single): 'FLOAT', 

653 np.dtype(np.double): 'DOUBLE', 

654 np.dtype('>f4'): 'FLOAT', 

655 np.dtype('>f8'): 'DOUBLE'} 

656 """ Map numpy dtypes to encodings. 

657 """ 

658 

659 def _close(self): 

660 pass 

661 

662 def __del__(self): 

663 self.close() 

664 

665 def file_start_times(self): 

666 """ Time of first frame of each file in seconds. 

667  

668 Returns 

669 ------- 

670 times: array of float 

671 Time of the first frame of each file relative to buffer start 

672 in seconds. 

673 """ 

674 times = [] 

675 for idx in self.file_indices: 

676 times.append(idx/self.rate) 

677 return np.array(times) 

678 

679 def get_file_index(self, frame): 

680 """ File path and index of frame contained by this file. 

681 

682 Parameters 

683 ---------- 

684 frame: int 

685 Index of frame. 

686  

687 Returns 

688 ------- 

689 filepath: str 

690 Path of file that contains the frame. 

691 index: int 

692 Index of the frame relative to the first frame 

693 in the containing file. 

694 """ 

695 if frame < 0 or frame >= self.frames: 

696 raise ValueError('invalid frame') 

697 fname = self.file_paths[0] 

698 index = self.file_indices[0] 

699 for i in reversed(range(len(self.file_indices))): 

700 if self.file_indices[i] <= frame: 

701 fname = self.file_paths[i] 

702 index = self.file_indices[i] 

703 break 

704 return fname, frame - index 

705 

706 def basename(self, path=None): 

707 """ Base name of the audio data. 

708 

709 Parameters 

710 ---------- 

711 path: str or None 

712 Path of the audio file from which a base name is generated. 

713 If `None`, use `self.filepath`. 

714 

715 Returns 

716 ------- 

717 s: str 

718 The name. Defaults to the stem of `path`. 

719 

720 """ 

721 if path is None: 

722 path = self.filepath 

723 return Path(path).stem 

724 

725 def format_dict(self): 

726 """ Technical infos about how the data are stored in the file. 

727 

728 Returns 

729 ------- 

730 fmt: dict 

731 Dictionary with filepath, format, encoding, samplingrate, 

732 channels, frames, and duration of the audio file as strings. 

733 

734 """ 

735 fmt = dict(name=self.basename(), filepath=self.filepath) 

736 if self.format is not None: 

737 fmt['format'] = self.format 

738 if self.encoding is not None: 

739 fmt['encoding'] = self.encoding 

740 fmt.update(dict(samplingrate=f'{self.rate:.0f}Hz', 

741 channels=self.channels, 

742 frames=self.frames, 

743 duration=f'{self.frames/self.rate:.3f}s')) 

744 return fmt 

745 

746 def metadata(self): 

747 """Metadata of the audio file. 

748 

749 Parameters 

750 ---------- 

751 store_empty: bool 

752 If `False` do not add meta data with empty values. 

753 

754 Returns 

755 ------- 

756 meta_data: nested dict 

757 

758 Meta data contained in the audio file. Keys of the nested 

759 dictionaries are always strings. If the corresponding 

760 values are dictionaries, then the key is the section name 

761 of the metadata contained in the dictionary. All other 

762 types of values are values for the respective key. In 

763 particular they are strings. But other types like for 

764 example ints or floats are also allowed. See 

765 `audioio.audiometadata` module for available functions to 

766 work with such metadata. 

767 

768 """ 

769 if self._metadata is None: 

770 if self._load_metadata is None: 

771 self._metadata = {} 

772 else: 

773 self._metadata = self._load_metadata(self.filepath, 

774 **self._metadata_kwargs) 

775 return self._metadata 

776 

777 def markers(self): 

778 """Read markers of the audio file. 

779 

780 See `audioio.audiomarkers` module for available functions 

781 to work with markers. 

782 

783 Returns 

784 ------- 

785 locs: 2-D ndarray of int 

786 Marker positions (first column) and spans (second column) 

787 for each marker (rows). 

788 labels: 2-D ndarray of str objects 

789 Labels (first column) and texts (second column) 

790 for each marker (rows). 

791 """ 

792 if self._locs is None: 

793 if self._load_markers is None: 

794 self._locs = np.zeros((0, 2), dtype=int) 

795 self._labels = np.zeros((0, 2), dtype=object) 

796 else: 

797 self._locs, self._labels = self._load_markers(self.filepath) 

798 return self._locs, self._labels 

799 

800 def set_unwrap(self, thresh, clips=False, down_scale=True, unit=''): 

801 """Set parameters for unwrapping clipped data. 

802 

803 See unwrap() function from the audioio package. 

804 

805 Parameters 

806 ---------- 

807 thresh: float 

808 Threshold for detecting wrapped data relative to self.unwrap_ampl 

809 which is initially set to self.ampl_max. 

810 If zero, do not unwrap. 

811 clips: bool 

812 If True, then clip the unwrapped data properly. 

813 Otherwise, unwrap the data and double the 

814 minimum and maximum data range 

815 (self.ampl_min and self.ampl_max). 

816 down_scale: bool 

817 If not `clips`, then downscale the signal by a factor of two, 

818 in order to keep the range between -1 and 1. 

819 unit: str 

820 Unit of the data. 

821 """ 

822 self.unwrap_ampl = self.ampl_max 

823 self.unwrap_thresh = thresh 

824 self.unwrap_clips = clips 

825 self.unwrap_down_scale = down_scale 

826 self.unwrap = thresh > 1e-3 

827 if self.unwrap: 

828 if self.unwrap_clips: 

829 add_unwrap(self.metadata(), 

830 self.unwrap_thresh*self.unwrap_ampl, 

831 self.unwrap_ampl, unit) 

832 elif down_scale: 

833 update_gain(self.metadata(), 0.5) 

834 add_unwrap(self.metadata(), 

835 0.5*self.unwrap_thresh*self.unwrap_ampl, 

836 0.0, unit) 

837 else: 

838 self.ampl_min *= 2 

839 self.ampl_max *= 2 

840 add_unwrap(self.metadata(), 

841 self.unwrap_thresh*self.unwrap_ampl, 

842 0.0, unit) 

843 

844 def _load_buffer_unwrap(self, r_offset, r_size, pbuffer): 

845 """Load new data and unwrap it. 

846 

847 Parameters 

848 ---------- 

849 r_offset: int 

850 First frame to be read from file. 

851 r_size: int 

852 Number of frames to be read from file. 

853 pbuffer: ndarray 

854 Buffer where to store the loaded data. 

855 """ 

856 self.load_audio_buffer(r_offset, r_size, pbuffer) 

857 if self.unwrap: 

858 # TODO: handle edge effects! 

859 unwrap(pbuffer, self.unwrap_thresh, self.unwrap_ampl) 

860 if self.unwrap_clips: 

861 pbuffer[pbuffer > self.ampl_max] = self.ampl_max 

862 pbuffer[pbuffer < self.ampl_min] = self.ampl_min 

863 elif self.unwrap_down_scale: 

864 pbuffer *= 0.5 

865 

866 

867 # wave interface:  

868 def open_wave(self, filepath, buffersize=10.0, backsize=0.0, 

869 verbose=0): 

870 """Open audio file for reading using the wave module. 

871 

872 Note: we assume that setpos() and tell() use integer numbers! 

873 

874 Parameters 

875 ---------- 

876 filepath: str 

877 Name of the file. 

878 buffersize: float 

879 Size of internal buffer in seconds. 

880 backsize: float 

881 Part of the buffer to be loaded before the requested start index in seconds. 

882 verbose: int 

883 If larger than zero show detailed error/warning messages. 

884 

885 Raises 

886 ------ 

887 ImportError 

888 The wave module is not installed 

889 """ 

890 self.verbose = verbose 

891 if self.verbose > 0: 

892 print(f'open_wave(filepath) with filepath={filepath}') 

893 if not audio_modules['wave']: 

894 self.rate = 0.0 

895 self.channels = 0 

896 self.frames = 0 

897 self.size = 0 

898 self.shape = (0, 0) 

899 self.offset = 0 

900 raise ImportError 

901 if self.sf is not None: 

902 self._close_wave() 

903 self.sf = wave.open(filepath, 'r') 

904 self.filepath = filepath 

905 self.file_paths = [filepath] 

906 self.file_indices = [0] 

907 self.rate = float(self.sf.getframerate()) 

908 self.format = 'WAV' 

909 sampwidth = self.sf.getsampwidth() 

910 if sampwidth == 1: 

911 self.dtype = 'u1' 

912 self.encoding = 'PCM_U8' 

913 else: 

914 self.dtype = f'i{sampwidth}' 

915 self.encoding = f'PCM_{sampwidth*8}' 

916 self.factor = 1.0/(2.0**(sampwidth*8-1)) 

917 self.channels = self.sf.getnchannels() 

918 self.frames = self.sf.getnframes() 

919 self.shape = (self.frames, self.channels) 

920 self.size = self.frames * self.channels 

921 self.bufferframes = int(buffersize*self.rate) 

922 self.backframes = int(backsize*self.rate) 

923 self.init_buffer() 

924 self.close = self._close_wave 

925 self.load_audio_buffer = self._load_buffer_wave 

926 # read 1 frame to determine the unit of the position values: 

927 self.p0 = self.sf.tell() 

928 self.sf.readframes(1) 

929 self.pfac = self.sf.tell() - self.p0 

930 self.sf.setpos(self.p0) 

931 return self 

932 

933 def _close_wave(self): 

934 """Close the audio file using the wave module. """ 

935 if self.sf is not None: 

936 self.sf.close() 

937 self.sf = None 

938 

939 def _load_buffer_wave(self, r_offset, r_size, buffer): 

940 """Load new data from file using the wave module. 

941 

942 Parameters 

943 ---------- 

944 r_offset: int 

945 First frame to be read from file. 

946 r_size: int 

947 Number of frames to be read from file. 

948 buffer: ndarray 

949 Buffer where to store the loaded data. 

950 """ 

951 if self.sf is None: 

952 self.sf = wave.open(self.filepath, 'r') 

953 self.sf.setpos(r_offset*self.pfac + self.p0) 

954 fbuffer = self.sf.readframes(r_size) 

955 fbuffer = np.frombuffer(fbuffer, dtype=self.dtype).reshape((-1, self.channels)) 

956 if self.dtype[0] == 'u': 

957 buffer[:, :] = fbuffer * self.factor - 1.0 

958 else: 

959 buffer[:, :] = fbuffer * self.factor 

960 

961 

962 # ewave interface:  

963 def open_ewave(self, filepath, buffersize=10.0, backsize=0.0, 

964 verbose=0): 

965 """Open audio file for reading using the ewave module. 

966 

967 Parameters 

968 ---------- 

969 filepath: str 

970 Name of the file. 

971 buffersize: float 

972 Size of internal buffer in seconds. 

973 backsize: float 

974 Part of the buffer to be loaded before the requested start index in seconds. 

975 verbose: int 

976 If larger than zero show detailed error/warning messages. 

977 

978 Raises 

979 ------ 

980 ImportError 

981 The ewave module is not installed. 

982 """ 

983 self.verbose = verbose 

984 if self.verbose > 0: 

985 print(f'open_ewave(filepath) with filepath={filepath}') 

986 if not audio_modules['ewave']: 

987 self.rate = 0.0 

988 self.channels = 0 

989 self.frames = 0 

990 self.shape = (0, 0) 

991 self.size = 0 

992 self.offset = 0 

993 raise ImportError 

994 if self.sf is not None: 

995 self._close_ewave() 

996 self.sf = ewave.open(filepath, 'r') 

997 self.filepath = filepath 

998 self.file_paths = [filepath] 

999 self.file_indices = [0] 

1000 self.rate = float(self.sf.sampling_rate) 

1001 self.channels = self.sf.nchannels 

1002 self.frames = self.sf.nframes 

1003 self.shape = (self.frames, self.channels) 

1004 self.size = self.frames * self.channels 

1005 self.format = 'WAV' # or WAVEX? 

1006 self.encoding = self.numpy_encodings[self.sf.dtype] 

1007 self.bufferframes = int(buffersize*self.rate) 

1008 self.backframes = int(backsize*self.rate) 

1009 self.init_buffer() 

1010 self.close = self._close_ewave 

1011 self.load_audio_buffer = self._load_buffer_ewave 

1012 return self 

1013 

1014 def _close_ewave(self): 

1015 """Close the audio file using the ewave module. """ 

1016 if self.sf is not None: 

1017 del self.sf 

1018 self.sf = None 

1019 

1020 def _load_buffer_ewave(self, r_offset, r_size, buffer): 

1021 """Load new data from file using the ewave module. 

1022 

1023 Parameters 

1024 ---------- 

1025 r_offset: int 

1026 First frame to be read from file. 

1027 r_size: int 

1028 Number of frames to be read from file. 

1029 buffer: ndarray 

1030 Buffer where to store the loaded data. 

1031 """ 

1032 if self.sf is None: 

1033 self.sf = ewave.open(self.filepath, 'r') 

1034 fbuffer = self.sf.read(frames=r_size, offset=r_offset, memmap='r') 

1035 fbuffer = ewave.rescale(fbuffer, 'float') 

1036 if len(fbuffer.shape) == 1: 

1037 fbuffer = np.reshape(fbuffer,(-1, 1)) 

1038 buffer[:,:] = fbuffer 

1039 

1040 

1041 # soundfile interface:  

1042 def open_soundfile(self, filepath, buffersize=10.0, backsize=0.0, 

1043 verbose=0): 

1044 """Open audio file for reading using the SoundFile module. 

1045 

1046 Parameters 

1047 ---------- 

1048 filepath: str 

1049 Name of the file. 

1050 bufferframes: float 

1051 Size of internal buffer in seconds. 

1052 backsize: float 

1053 Part of the buffer to be loaded before the requested start index in seconds. 

1054 verbose: int 

1055 If larger than zero show detailed error/warning messages. 

1056 

1057 Raises 

1058 ------ 

1059 ImportError 

1060 The SoundFile module is not installed 

1061 """ 

1062 self.verbose = verbose 

1063 if self.verbose > 0: 

1064 print(f'open_soundfile(filepath) with filepath={filepath}') 

1065 if not audio_modules['soundfile']: 

1066 self.rate = 0.0 

1067 self.channels = 0 

1068 self.frames = 0 

1069 self.shape = (0, 0) 

1070 self.size = 0 

1071 self.offset = 0 

1072 raise ImportError 

1073 if self.sf is not None: 

1074 self._close_soundfile() 

1075 self.sf = soundfile.SoundFile(filepath, 'r') 

1076 self.filepath = filepath 

1077 self.file_paths = [filepath] 

1078 self.file_indices = [0] 

1079 self.rate = float(self.sf.samplerate) 

1080 self.channels = self.sf.channels 

1081 self.frames = 0 

1082 self.size = 0 

1083 if self.sf.seekable(): 

1084 self.frames = self.sf.seek(0, soundfile.SEEK_END) 

1085 self.sf.seek(0, soundfile.SEEK_SET) 

1086 # TODO: if not seekable, we cannot handle that file! 

1087 self.shape = (self.frames, self.channels) 

1088 self.size = self.frames * self.channels 

1089 self.format = self.sf.format 

1090 self.encoding = self.sf.subtype 

1091 self.bufferframes = int(buffersize*self.rate) 

1092 self.backframes = int(backsize*self.rate) 

1093 self.init_buffer() 

1094 self.close = self._close_soundfile 

1095 self.load_audio_buffer = self._load_buffer_soundfile 

1096 return self 

1097 

1098 def _close_soundfile(self): 

1099 """Close the audio file using the SoundFile module. """ 

1100 if self.sf is not None: 

1101 self.sf.close() 

1102 self.sf = None 

1103 

1104 def _load_buffer_soundfile(self, r_offset, r_size, buffer): 

1105 """Load new data from file using the SoundFile module. 

1106 

1107 Parameters 

1108 ---------- 

1109 r_offset: int 

1110 First frame to be read from file. 

1111 r_size: int 

1112 Number of frames to be read from file. 

1113 buffer: ndarray 

1114 Buffer where to store the loaded data. 

1115 """ 

1116 if self.sf is None: 

1117 self.sf = soundfile.SoundFile(self.filepath, 'r') 

1118 self.sf.seek(r_offset, soundfile.SEEK_SET) 

1119 buffer[:, :] = self.sf.read(r_size, always_2d=True) 

1120 

1121 

1122 # wavefile interface:  

1123 def open_wavefile(self, filepath, buffersize=10.0, backsize=0.0, 

1124 verbose=0): 

1125 """Open audio file for reading using the wavefile module. 

1126 

1127 Parameters 

1128 ---------- 

1129 filepath: str 

1130 Name of the file. 

1131 bufferframes: float 

1132 Size of internal buffer in seconds. 

1133 backsize: float 

1134 Part of the buffer to be loaded before the requested start index in seconds. 

1135 verbose: int 

1136 If larger than zero show detailed error/warning messages. 

1137 

1138 Raises 

1139 ------ 

1140 ImportError 

1141 The wavefile module is not installed 

1142 """ 

1143 self.verbose = verbose 

1144 if self.verbose > 0: 

1145 print(f'open_wavefile(filepath) with filepath={filepath}') 

1146 if not audio_modules['wavefile']: 

1147 self.rate = 0.0 

1148 self.channels = 0 

1149 self.frames = 0 

1150 self.shape = (0, 0) 

1151 self.size = 0 

1152 self.offset = 0 

1153 raise ImportError 

1154 if self.sf is not None: 

1155 self._close_wavefile() 

1156 self.sf = wavefile.WaveReader(filepath) 

1157 self.filepath = filepath 

1158 self.file_paths = [filepath] 

1159 self.file_indices = [0] 

1160 self.rate = float(self.sf.samplerate) 

1161 self.channels = self.sf.channels 

1162 self.frames = self.sf.frames 

1163 self.shape = (self.frames, self.channels) 

1164 self.size = self.frames * self.channels 

1165 # get format and encoding: 

1166 for attr in dir(wavefile.Format): 

1167 v = getattr(wavefile.Format, attr) 

1168 if isinstance(v, int): 

1169 if v & wavefile.Format.TYPEMASK > 0 and \ 

1170 (self.sf.format & wavefile.Format.TYPEMASK) == v: 

1171 self.format = attr 

1172 if v & wavefile.Format.SUBMASK > 0 and \ 

1173 (self.sf.format & wavefile.Format.SUBMASK) == v: 

1174 self.encoding = attr 

1175 # init buffer: 

1176 self.bufferframes = int(buffersize*self.rate) 

1177 self.backframes = int(backsize*self.rate) 

1178 self.init_buffer() 

1179 self.close = self._close_wavefile 

1180 self.load_audio_buffer = self._load_buffer_wavefile 

1181 return self 

1182 

1183 def _close_wavefile(self): 

1184 """Close the audio file using the wavefile module. """ 

1185 if self.sf is not None: 

1186 self.sf.close() 

1187 self.sf = None 

1188 

1189 def _load_buffer_wavefile(self, r_offset, r_size, buffer): 

1190 """Load new data from file using the wavefile module. 

1191 

1192 Parameters 

1193 ---------- 

1194 r_offset: int 

1195 First frame to be read from file. 

1196 r_size: int 

1197 Number of frames to be read from file. 

1198 buffer: ndarray 

1199 Buffer where to store the loaded data. 

1200 """ 

1201 if self.sf is None: 

1202 self.sf = wavefile.WaveReader(self.filepath) 

1203 self.sf.seek(r_offset, wavefile.Seek.SET) 

1204 fbuffer = self.sf.buffer(r_size, dtype=self.buffer.dtype) 

1205 self.sf.read(fbuffer) 

1206 buffer[:,:] = fbuffer.T 

1207 

1208 

1209 # audioread interface:  

1210 def open_audioread(self, filepath, buffersize=10.0, backsize=0.0, 

1211 verbose=0): 

1212 """Open audio file for reading using the audioread module. 

1213 

1214 Note, that audioread can only read forward, therefore random and 

1215 backward access is really slow. 

1216 

1217 Parameters 

1218 ---------- 

1219 filepath: str 

1220 Name of the file. 

1221 bufferframes: float 

1222 Size of internal buffer in seconds. 

1223 backsize: float 

1224 Part of the buffer to be loaded before the requested start index in seconds. 

1225 verbose: int 

1226 If larger than zero show detailed error/warning messages. 

1227 

1228 Raises 

1229 ------ 

1230 ImportError 

1231 The audioread module is not installed 

1232 """ 

1233 self.verbose = verbose 

1234 if self.verbose > 0: 

1235 print(f'open_audioread(filepath) with filepath={filepath}') 

1236 if not audio_modules['audioread']: 

1237 self.rate = 0.0 

1238 self.channels = 0 

1239 self.frames = 0 

1240 self.shape = (0, 0) 

1241 self.size = 0 

1242 self.offset = 0 

1243 raise ImportError 

1244 if self.sf is not None: 

1245 self._close_audioread() 

1246 self.sf = audioread.audio_open(filepath) 

1247 self.filepath = filepath 

1248 self.file_paths = [filepath] 

1249 self.file_indices = [0] 

1250 self.rate = float(self.sf.samplerate) 

1251 self.channels = self.sf.channels 

1252 self.frames = int(np.ceil(self.rate*self.sf.duration)) 

1253 self.shape = (self.frames, self.channels) 

1254 self.size = self.frames * self.channels 

1255 self.bufferframes = int(buffersize*self.rate) 

1256 self.backframes = int(backsize*self.rate) 

1257 self.init_buffer() 

1258 self.read_buffer = np.zeros((0,0)) 

1259 self.read_offset = 0 

1260 self.close = self._close_audioread 

1261 self.load_audio_buffer = self._load_buffer_audioread 

1262 self.filepath = filepath 

1263 self.sf_iter = self.sf.__iter__() 

1264 return self 

1265 

1266 def _close_audioread(self): 

1267 """Close the audio file using the audioread module. """ 

1268 if self.sf is not None: 

1269 self.sf.__exit__(None, None, None) 

1270 self.sf = None 

1271 

1272 def _load_buffer_audioread(self, r_offset, r_size, buffer): 

1273 """Load new data from file using the audioread module. 

1274 

1275 audioread can only iterate through a file once and in blocksizes that are 

1276 given by audioread. Therefore we keep yet another buffer: `self.read_buffer` 

1277 at file offset `self.read_offset` containing whatever audioread returned. 

1278 

1279 Parameters 

1280 ---------- 

1281 r_offset: int 

1282 First frame to be read from file. 

1283 r_size: int 

1284 Number of frames to be read from file. 

1285 buffer: ndarray 

1286 Buffer where to store the loaded data. 

1287 """ 

1288 if self.sf is None: 

1289 self.sf = audioread.audio_open(self.filepath) 

1290 b_offset = 0 

1291 if ( self.read_offset + self.read_buffer.shape[0] >= r_offset + r_size 

1292 and self.read_offset < r_offset + r_size ): 

1293 # read_buffer overlaps at the end of the requested interval: 

1294 i = 0 

1295 n = r_offset + r_size - self.read_offset 

1296 if n > r_size: 

1297 i += n - r_size 

1298 n = r_size 

1299 buffer[self.read_offset+i-r_offset:self.read_offset+i+n-r_offset,:] = self.read_buffer[i:i+n,:] / (2.0**15-1.0) 

1300 if self.verbose > 2: 

1301 print(f' recycle {n:6d} frames from the front of the read buffer at {self.read_offset}-{self.read_offset+n} ({self.read_offset-self.offset}-{self.read_offset-self.offset+n} in buffer)') 

1302 r_size -= n 

1303 if r_size <= 0: 

1304 return 

1305 # go back to beginning of file: 

1306 if r_offset < self.read_offset: 

1307 if self.verbose > 2: 

1308 print(' rewind') 

1309 self._close_audioread() 

1310 self.sf = audioread.audio_open(self.filepath) 

1311 self.sf_iter = self.sf.__iter__() 

1312 self.read_buffer = np.zeros((0,0)) 

1313 self.read_offset = 0 

1314 # read to position: 

1315 while self.read_offset + self.read_buffer.shape[0] < r_offset: 

1316 self.read_offset += self.read_buffer.shape[0] 

1317 try: 

1318 if hasattr(self.sf_iter, 'next'): 

1319 fbuffer = self.sf_iter.next() 

1320 else: 

1321 fbuffer = next(self.sf_iter) 

1322 except StopIteration: 

1323 self.read_buffer = np.zeros((0,0)) 

1324 buffer[:,:] = 0.0 

1325 if self.verbose > 1: 

1326 print(f' caught StopIteration, padded buffer with {r_size} zeros') 

1327 break 

1328 self.read_buffer = np.frombuffer(fbuffer, dtype='<i2').reshape(-1, self.channels) 

1329 if self.verbose > 2: 

1330 print(f' read forward by {self.read_buffer.shape[0]} frames') 

1331 # recycle file data: 

1332 if ( self.read_offset + self.read_buffer.shape[0] > r_offset 

1333 and self.read_offset <= r_offset ): 

1334 i = r_offset - self.read_offset 

1335 n = self.read_offset + self.read_buffer.shape[0] - r_offset 

1336 if n > r_size: 

1337 n = r_size 

1338 buffer[:n,:] = self.read_buffer[i:i+n,:] / (2.0**15-1.0) 

1339 if self.verbose > 2: 

1340 print(f' recycle {n:6d} frames from the end of the read buffer at {self.read_offset}-{self.read_offset + self.read_buffer.shape[0]} to {r_offset}-{r_offset+n} ({r_offset-self.offset}-{r_offset+n-self.offset} in buffer)') 

1341 b_offset += n 

1342 r_offset += n 

1343 r_size -= n 

1344 # read data: 

1345 if self.verbose > 2 and r_size > 0: 

1346 print(f' read {r_size:6d} frames at {r_offset}-{r_offset+r_size} ({r_offset-self.offset}-{r_offset+r_size-self.offset} in buffer)') 

1347 while r_size > 0: 

1348 self.read_offset += self.read_buffer.shape[0] 

1349 try: 

1350 if hasattr(self.sf_iter, 'next'): 

1351 fbuffer = self.sf_iter.next() 

1352 else: 

1353 fbuffer = next(self.sf_iter) 

1354 except StopIteration: 

1355 self.read_buffer = np.zeros((0,0)) 

1356 buffer[b_offset:,:] = 0.0 

1357 if self.verbose > 1: 

1358 print(f' caught StopIteration, padded buffer with {r_size} zeros') 

1359 break 

1360 self.read_buffer = np.frombuffer(fbuffer, dtype='<i2').reshape(-1, self.channels) 

1361 n = self.read_buffer.shape[0] 

1362 if n > r_size: 

1363 n = r_size 

1364 if n > 0: 

1365 buffer[b_offset:b_offset+n,:] = self.read_buffer[:n,:] / (2.0**15-1.0) 

1366 if self.verbose > 2: 

1367 print(f' read {n:6d} frames to {r_offset}-{r_offset+n} ({r_offset-self.offset}-{r_offset+n-self.offset} in buffer)') 

1368 b_offset += n 

1369 r_offset += n 

1370 r_size -= n 

1371 

1372 

1373 # open multiple audio files as one: 

1374 def open_multiple(self, filepaths, buffersize=10.0, backsize=0.0, 

1375 verbose=0, rate=None, channels=None, end_indices=None): 

1376 """Open multiple audio files as a single concatenated array. 

1377 

1378 Parameters 

1379 ---------- 

1380 filepaths: list of str 

1381 List of file names of audio files. 

1382 buffersize: float 

1383 Size of internal buffer in seconds. 

1384 backsize: float 

1385 Part of the buffer to be loaded before the requested start index in seconds. 

1386 verbose: int 

1387 If larger than zero show detailed error/warning messages. 

1388 rate: float 

1389 If provided, do a minimal initialization (no checking) 

1390 using the provided sampling rate (in Hertz), channels, 

1391 and end_indices. 

1392 channels: int 

1393 If provided, do a minimal initialization (no checking) 

1394 using the provided rate, number of channels, and end_indices. 

1395 end_indices: sequence of int 

1396 If provided, do a minimal initialization (no checking) 

1397 using the provided rate, channels, and end_indices. 

1398 

1399 Raises 

1400 ------ 

1401 TypeError 

1402 `filepaths` must be a sequence. 

1403 ValueError 

1404 Empty `filepaths`. 

1405 FileNotFoundError 

1406 `filepaths` does not contain a single valid file. 

1407 

1408 """ 

1409 if not isinstance(filepaths, (list, tuple, np.ndarray)): 

1410 raise TypeError('input argument filepaths is not a sequence!') 

1411 if len(filepaths) == 0: 

1412 raise ValueError('input argument filepaths is empy sequence!') 

1413 self.buffersize = buffersize 

1414 self.backsize = backsize 

1415 self.filepath = None 

1416 self.file_paths = [] 

1417 self.open_files = [] 

1418 self.open_loaders = [] 

1419 self.audio_files = [] 

1420 self.collect_counter = 0 

1421 self.frames = 0 

1422 self.start_indices = [] 

1423 self.end_indices = [] 

1424 self.start_time = None 

1425 start_time = None 

1426 self._metadata = {} 

1427 self._locs = np.zeros((0, 2), dtype=int) 

1428 self._labels = np.zeros((0, 2), dtype=object) 

1429 if end_indices is not None: 

1430 self.filepath = filepaths[0] 

1431 self.file_paths = filepaths 

1432 self.audio_files = [None] * len(filepaths) 

1433 self.frames = end_indices[-1] 

1434 self.start_indices = [0] + list(end_indices[:-1]) 

1435 self.end_indices = end_indices 

1436 self.format = None 

1437 self.encoding = None 

1438 self.rate = rate 

1439 self.channels = channels 

1440 else: 

1441 for filepath in filepaths: 

1442 try: 

1443 a = AudioLoader(filepath, buffersize, backsize, verbose) 

1444 except Exception as e: 

1445 if verbose > 0: 

1446 print(e) 

1447 continue 

1448 # collect metadata: 

1449 md = a.metadata() 

1450 fmd = flatten_metadata(md, True) 

1451 add_metadata(self._metadata, fmd) 

1452 if self.filepath is None: 

1453 # first file: 

1454 self.filepath = a.filepath 

1455 self.format = a.format 

1456 self.encoding = a.encoding 

1457 self.rate = a.rate 

1458 self.channels = a.channels 

1459 self.start_time = get_datetime(md) 

1460 start_time = self.start_time 

1461 else: 

1462 # check channels and rate: 

1463 error_str = None 

1464 if a.channels != self.channels: 

1465 error_str = f'number of channels differs: ' \ 

1466 f'{a.channels} in {a.filepath} versus ' \ 

1467 f'{self.channels} in {self.filepath}' 

1468 if a.rate != self.rate: 

1469 error_str = f'sampling rates differ: ' \ 

1470 f'{a.rate} in {a.filepath} versus ' \ 

1471 f'{self.rate} in {self.filepath}' 

1472 # check start time of recording: 

1473 stime = get_datetime(md) 

1474 if start_time is None or stime is None or \ 

1475 abs(start_time - stime) > timedelta(seconds=1): 

1476 error_str = f'start time does not indicate continuous recording: ' \ 

1477 f'expected {start_time} instead of ' \ 

1478 f'{stime} in {a.filepath}' 

1479 if error_str is not None: 

1480 if verbose > 0: 

1481 print(error_str) 

1482 a.close() 

1483 del a 

1484 break 

1485 # markers: 

1486 locs, labels = a.markers() 

1487 locs[:,0] += self.frames 

1488 self._locs = np.vstack((self._locs, locs)) 

1489 self._labels = np.vstack((self._labels, labels)) 

1490 # indices: 

1491 self.start_indices.append(self.frames) 

1492 self.frames += a.frames 

1493 self.end_indices.append(self.frames) 

1494 if start_time is not None: 

1495 start_time += timedelta(seconds=a.frames/a.rate) 

1496 # add file to lists: 

1497 self.file_paths.append(filepath) 

1498 if len(self.open_files) < AudioLoader.max_open_files: 

1499 self.open_files.append(a) 

1500 else: 

1501 a.close() 

1502 if len(self.open_loaders) < AudioLoader.max_open_loaders: 

1503 self.audio_files.append(a) 

1504 self.open_loaders.append(a) 

1505 else: 

1506 a.close() 

1507 del a 

1508 self.audio_files.append(None) 

1509 if len(self.audio_files) == 0: 

1510 raise FileNotFoundError('input argument filepaths does not contain any valid audio file!') 

1511 # set startime from first file: 

1512 if self.start_time is not None: 

1513 set_starttime(self._metadata, self.start_time) 

1514 # setup infrastructure: 

1515 self.file_indices = self.start_indices 

1516 self.start_indices = np.array(self.start_indices) 

1517 self.end_indices = np.array(self.end_indices) 

1518 self.shape = (self.frames, self.channels) 

1519 self.bufferframes = int(buffersize*self.rate) 

1520 self.backframes = int(backsize*self.rate) 

1521 self.init_buffer() 

1522 self.close = self._close_multiple 

1523 self.load_audio_buffer = self._load_buffer_multiple 

1524 self._load_metadata = None 

1525 self._load_markers = None 

1526 return self 

1527 

1528 def _close_multiple(self): 

1529 """Close all the audio files. """ 

1530 self.open_files = [] 

1531 self.open_loaders = [] 

1532 if hasattr(self, 'audio_files'): 

1533 for a in self.audio_files: 

1534 if a is not None: 

1535 a.close() 

1536 self.audio_files = [] 

1537 self.filepath = None 

1538 self.file_paths = [] 

1539 self.file_indices = [] 

1540 self.start_indices = [] 

1541 self.end_indices = [] 

1542 del self.audio_files 

1543 del self.open_files 

1544 del self.open_loaders 

1545 del self.start_indices 

1546 del self.end_indices 

1547 

1548 def _load_buffer_multiple(self, r_offset, r_size, buffer): 

1549 """Load new data from the underlying files. 

1550 

1551 Parameters 

1552 ---------- 

1553 r_offset: int 

1554 First frame to be read from file. 

1555 r_size: int 

1556 Number of frames to be read from file. 

1557 buffer: ndarray 

1558 Buffer where to store the loaded data. 

1559 """ 

1560 offs = r_offset 

1561 size = r_size 

1562 boffs = 0 

1563 ai = np.searchsorted(self.end_indices, offs, side='right') 

1564 while size > 0: 

1565 if self.audio_files[ai] is None: 

1566 a = AudioLoader(self.file_paths[ai], 

1567 self.buffersize, self.backsize, 0) 

1568 self.audio_files[ai] = a 

1569 self.open_loaders.append(a) 

1570 self.open_files.append(a) 

1571 if len(self.open_files) > AudioLoader.max_open_files: 

1572 a0 = self.open_files.pop(0) 

1573 a0.close() 

1574 if len(self.open_loaders) > AudioLoader.max_open_loaders: 

1575 a0 = self.open_loaders.pop(0) 

1576 self.audio_files[self.audio_files.index(a0)] = None 

1577 a0.close() 

1578 del a0 

1579 self.collect_counter += 1 

1580 if self.collect_counter > AudioLoader.max_open_loaders//2: 

1581 gc.collect() 

1582 self.collect_counter = 0 

1583 else: 

1584 self.open_loaders.pop(self.open_loaders.index(self.audio_files[ai])) 

1585 self.open_loaders.append(self.audio_files[ai]) 

1586 ai0 = offs - self.start_indices[ai] 

1587 ai1 = offs + size 

1588 if ai1 > self.end_indices[ai]: 

1589 ai1 = self.end_indices[ai] 

1590 ai1 -= self.start_indices[ai] 

1591 n = ai1 - ai0 

1592 self.audio_files[ai].load_audio_buffer(ai0, n, 

1593 buffer[boffs:boffs + n,:]) 

1594 if self.audio_files[ai] in self.open_files: 

1595 self.open_files.pop(self.open_files.index(self.audio_files[ai])) 

1596 self.open_files.append(self.audio_files[ai]) 

1597 if len(self.open_files) > AudioLoader.max_open_files: 

1598 self.open_files[0].close() 

1599 self.open_files.pop(0) 

1600 boffs += n 

1601 offs += n 

1602 size -= n 

1603 ai += 1 

1604 

1605 

1606 def open(self, filepath, buffersize=10.0, backsize=0.0, 

1607 verbose=0, **kwargs): 

1608 """Open audio file for reading. 

1609 

1610 Parameters 

1611 ---------- 

1612 filepath: str or list of str 

1613 Name of the file or list of many file names that should be 

1614 made accessible as a single array. 

1615 buffersize: float 

1616 Size of internal buffer in seconds. 

1617 backsize: float 

1618 Part of the buffer to be loaded before the requested start index in seconds. 

1619 verbose: int 

1620 If larger than zero show detailed error/warning messages. 

1621 **kwargs: dict 

1622 Further keyword arguments that are passed on to the  

1623 specific opening functions. Only used by open_multiple() so far. 

1624 

1625 Raises 

1626 ------ 

1627 ValueError 

1628 Empty `filepath`. 

1629 FileNotFoundError 

1630 `filepath` is not an existing file. 

1631 EOFError 

1632 File size of `filepath` is zero. 

1633 IOError 

1634 Failed to load data. 

1635 

1636 """ 

1637 self.buffer = np.array([]) 

1638 self.rate = 0.0 

1639 if not filepath: 

1640 raise ValueError('input argument filepath is empty string!') 

1641 if isinstance(filepath, (list, tuple, np.ndarray)): 

1642 if len(filepath) > 1: 

1643 self.open_multiple(filepath, buffersize, backsize, verbose) 

1644 if len(self.file_paths) > 1: 

1645 return self 

1646 filepath = self.file_paths[0] 

1647 self.close() 

1648 else: 

1649 filepath = filepath[0] 

1650 if not os.path.isfile(filepath): 

1651 raise FileNotFoundError(f'file "{filepath}" not found') 

1652 if os.path.getsize(filepath) <= 0: 

1653 raise EOFError(f'file "{filepath}" is empty (size=0)!') 

1654 # list of implemented open functions: 

1655 audio_open_funcs = ( 

1656 ('soundfile', self.open_soundfile), 

1657 ('wave', self.open_wave), 

1658 ('wavefile', self.open_wavefile), 

1659 ('ewave', self.open_ewave), 

1660 ('audioread', self.open_audioread), 

1661 ) 

1662 # open an audio file by trying various modules: 

1663 not_installed = [] 

1664 errors = [f'failed to load data from file "{filepath}":'] 

1665 for lib, open_file in audio_open_funcs: 

1666 if not audio_modules[lib]: 

1667 if verbose > 1: 

1668 print(f'unable to load data from file "{filepath}" using {lib} module: module not available') 

1669 not_installed.append(lib) 

1670 continue 

1671 try: 

1672 open_file(filepath, buffersize, backsize, verbose-1, **kwargs) 

1673 if self.frames > 0: 

1674 if verbose > 0: 

1675 print(f'opened audio file "{filepath}" using {lib}') 

1676 if verbose > 1: 

1677 if self.format is not None: 

1678 print(f' format : {self.format}') 

1679 if self.encoding is not None: 

1680 print(f' encoding : {self.encoding}') 

1681 print(f' sampling rate: {self.rate} Hz') 

1682 print(f' channels : {self.channels}') 

1683 print(f' frames : {self.frames}') 

1684 return self 

1685 except Exception as e: 

1686 errors.append(f' {lib} failed: {str(e)}') 

1687 if verbose > 1: 

1688 print(errors[-1]) 

1689 if len(not_installed) > 0: 

1690 errors.append('\n You may need to install one of the ' + \ 

1691 ', '.join(not_installed) + ' packages.') 

1692 raise IOError('\n'.join(errors)) 

1693 return self 

1694 

1695 

1696def demo(file_path, plot): 

1697 """Demo of the audioloader functions. 

1698 

1699 Parameters 

1700 ---------- 

1701 file_path: str 

1702 File path of an audio file. 

1703 plot: bool 

1704 If True also plot the loaded data. 

1705 """ 

1706 print('') 

1707 print("try load_audio:") 

1708 full_data, rate = load_audio(file_path, 1) 

1709 if plot: 

1710 plt.plot(np.arange(len(full_data))/rate, full_data[:,0]) 

1711 plt.show() 

1712 

1713 if audio_modules['soundfile'] and audio_modules['audioread']: 

1714 print('') 

1715 print("cross check:") 

1716 data1, rate1 = load_soundfile(file_path) 

1717 data2, rate2 = load_audioread(file_path) 

1718 n = min((len(data1), len(data2))) 

1719 print(f"rms difference is {np.std(data1[:n]-data2[:n])}") 

1720 if plot: 

1721 plt.plot(np.arange(len(data1))/rate1, data1[:,0]) 

1722 plt.plot(np.arange(len(data2))/rate2, data2[:,0]) 

1723 plt.show() 

1724 

1725 print('') 

1726 print("try AudioLoader:") 

1727 with AudioLoader(file_path, 4.0, 1.0, verbose=1) as data: 

1728 print(f'samplerate: {data.rate:0f}Hz') 

1729 print(f'channels: {data.channels} {data.shape[1]}') 

1730 print(f'frames: {len(data)} {data.shape[0]}') 

1731 nframes = int(1.5*data.rate) 

1732 # check access: 

1733 print('check random single frame access') 

1734 for inx in np.random.randint(0, len(data), 1000): 

1735 if np.any(np.abs(full_data[inx] - data[inx]) > 2.0**(-14)): 

1736 print('single random frame access failed', inx, full_data[inx], data[inx]) 

1737 print('check random frame slice access') 

1738 for inx in np.random.randint(0, len(data)-nframes, 1000): 

1739 if np.any(np.abs(full_data[inx:inx+nframes] - data[inx:inx+nframes]) > 2.0**(-14)): 

1740 print('random frame slice access failed', inx) 

1741 print('check frame slice access forward') 

1742 for inx in range(0, len(data)-nframes, 10): 

1743 if np.any(np.abs(full_data[inx:inx+nframes] - data[inx:inx+nframes]) > 2.0**(-14)): 

1744 print('frame slice access forward failed', inx) 

1745 print('check frame slice access backward') 

1746 for inx in range(len(data)-nframes, 0, -10): 

1747 if np.any(np.abs(full_data[inx:inx+nframes] - data[inx:inx+nframes]) > 2.0**(-14)): 

1748 print('frame slice access backward failed', inx) 

1749 # forward: 

1750 for i in range(0, len(data), nframes): 

1751 print(f'forward {i}-{i+nframes}') 

1752 x = data[i:i+nframes,0] 

1753 if plot: 

1754 plt.plot((i+np.arange(len(x)))/rate, x) 

1755 plt.show() 

1756 # and backwards: 

1757 for i in reversed(range(0, len(data), nframes)): 

1758 print(f'backward {i}-{i+nframes}') 

1759 x = data[i:i+nframes,0] 

1760 if plot: 

1761 plt.plot((i+np.arange(len(x)))/rate, x) 

1762 plt.show() 

1763 

1764 

1765def main(*args): 

1766 """Call demo with command line arguments. 

1767 

1768 Parameters 

1769 ---------- 

1770 args: list of str 

1771 Command line arguments as provided by sys.argv[1:] 

1772 """ 

1773 print("Checking audioloader module ...") 

1774 

1775 help = False 

1776 plot = False 

1777 file_path = None 

1778 mod = False 

1779 for arg in args: 

1780 if mod: 

1781 if not select_module(arg): 

1782 print(f'can not select module {arg} that is not installed') 

1783 return 

1784 mod = False 

1785 elif arg == '-h': 

1786 help = True 

1787 break 

1788 elif arg == '-p': 

1789 plot = True 

1790 elif arg == '-m': 

1791 mod = True 

1792 else: 

1793 file_path = arg 

1794 break 

1795 

1796 if help: 

1797 print('') 

1798 print('Usage:') 

1799 print(' python -m src.audioio.audioloader [-m <module>] [-p] <audio/file.wav>') 

1800 print(' -m: audio module to be used') 

1801 print(' -p: plot loaded data') 

1802 return 

1803 

1804 if plot: 

1805 import matplotlib.pyplot as plt 

1806 

1807 demo(file_path, plot) 

1808 

1809 

1810if __name__ == "__main__": 

1811 main(*sys.argv[1:])