Coverage for src / audioio / audioloader.py: 92%

826 statements  

« prev     ^ index     » next       coverage.py v7.13.1, created at 2026-01-17 21:34 +0000

1"""Loading data, metadata, and markers from audio files. 

2 

3- `load_audio()`: load a whole audio file at once. 

4- `metadata()`: read metadata of an audio file. 

5- `markers()`: read markers of an audio file. 

6- class `AudioLoader`: read data from audio files in chunks. 

7 

8The read in data are always numpy arrays of floats ranging between -1 and 1. 

9The arrays are 2-D ndarrays with first axis time and second axis channel, 

10even for single channel data. 

11 

12If an audio file cannot be loaded, you might need to install 

13additional packages. See 

14[installation](https://bendalab.github.io/audioio/installation) for 

15further instructions. 

16 

17For a demo run the module as: 

18``` 

19python -m src.audioio.audioloader audiofile.wav 

20``` 

21""" 

22 

23import os 

24import gc 

25import sys 

26import warnings 

27import numpy as np 

28 

29from pathlib import Path 

30from datetime import timedelta 

31 

32from .audiomodules import * 

33from .bufferedarray import BufferedArray 

34from .riffmetadata import metadata_riff, markers_riff 

35from .audiometadata import update_gain, add_unwrap, get_datetime 

36from .audiometadata import flatten_metadata, add_metadata, set_starttime 

37from .audiotools import unwrap 

38 

39 

40def load_wave(filepath): 

41 """Load wav file using the wave module from pythons standard libray. 

42  

43 Documentation 

44 ------------- 

45 https://docs.python.org/3.8/library/wave.html 

46 

47 Parameters 

48 ---------- 

49 filepath: str or Path 

50 The full path and name of the file to load. 

51 

52 Returns 

53 ------- 

54 data: ndarray 

55 All data traces as an 2-D ndarray, first dimension is time, second is channel 

56 rate: float 

57 The sampling rate of the data in Hertz. 

58 

59 Raises 

60 ------ 

61 ImportError 

62 The wave module is not installed 

63 * 

64 Loading of the data failed 

65 """ 

66 if not audio_modules['wave']: 

67 raise ImportError 

68 

69 wf = wave.open(os.fspath(filepath), 'r') # 'with' is not supported by wave 

70 (nchannels, sampwidth, rate, nframes, comptype, compname) = wf.getparams() 

71 buffer = wf.readframes(nframes) 

72 factor = 2.0**(sampwidth*8-1) 

73 if sampwidth == 1: 

74 dtype = 'u1' 

75 buffer = np.frombuffer(buffer, dtype=dtype).reshape(-1, nchannels) 

76 data = buffer.astype('d')/factor - 1.0 

77 else: 

78 dtype = f'i{sampwidth}' 

79 buffer = np.frombuffer(buffer, dtype=dtype).reshape(-1, nchannels) 

80 data = buffer.astype('d')/factor 

81 wf.close() 

82 return data, float(rate) 

83 

84 

85def load_ewave(filepath): 

86 """Load wav file using ewave module. 

87 

88 Documentation 

89 ------------- 

90 https://github.com/melizalab/py-ewave 

91 

92 Parameters 

93 ---------- 

94 filepath: str or Path 

95 The full path and name of the file to load. 

96 

97 Returns 

98 ------- 

99 data: ndarray 

100 All data traces as an 2-D ndarray, first dimension is time, second is channel. 

101 rate: float 

102 The sampling rate of the data in Hertz. 

103 

104 Raises 

105 ------ 

106 ImportError 

107 The ewave module is not installed 

108 * 

109 Loading of the data failed 

110 """ 

111 if not audio_modules['ewave']: 

112 raise ImportError 

113 

114 data = np.array([]) 

115 rate = 0.0 

116 with ewave.open(os.fspath(filepath), 'r') as wf: 

117 rate = wf.sampling_rate 

118 buffer = wf.read() 

119 data = ewave.rescale(buffer, 'float') 

120 if len(data.shape) == 1: 

121 data = np.reshape(data,(-1, 1)) 

122 return data, float(rate) 

123 

124 

125def load_wavfile(filepath): 

126 """Load wav file using scipy.io.wavfile. 

127 

128 Documentation 

129 ------------- 

130 http://docs.scipy.org/doc/scipy/reference/io.html 

131 Does not support blocked read. 

132  

133 Parameters 

134 ---------- 

135 filepath: str or Path 

136 The full path and name of the file to load. 

137 

138 Returns 

139 ------- 

140 data: ndarray 

141 All data traces as an 2-D ndarray, first dimension is time, second is channel. 

142 rate: float 

143 The sampling rate of the data in Hertz. 

144 

145 Raises 

146 ------ 

147 ImportError 

148 The scipy.io module is not installed 

149 * 

150 Loading of the data failed 

151 """ 

152 if not audio_modules['scipy.io.wavfile']: 

153 raise ImportError 

154 

155 warnings.filterwarnings("ignore") 

156 rate, data = wavfile.read(filepath) 

157 warnings.filterwarnings("always") 

158 if data.dtype == np.uint8: 

159 data = data / 128.0 - 1.0 

160 elif np.issubdtype(data.dtype, np.signedinteger): 

161 data = data / (2.0**(data.dtype.itemsize*8-1)) 

162 else: 

163 data = data.astype(np.float64, copy=False) 

164 if len(data.shape) == 1: 

165 data = np.reshape(data,(-1, 1)) 

166 return data, float(rate) 

167 

168 

169def load_soundfile(filepath): 

170 """Load audio file using SoundFile (based on libsndfile). 

171 

172 Documentation 

173 ------------- 

174 http://pysoundfile.readthedocs.org 

175 http://www.mega-nerd.com/libsndfile 

176 

177 Parameters 

178 ---------- 

179 filepath: str or Path 

180 The full path and name of the file to load. 

181 

182 Returns 

183 ------- 

184 data: ndarray 

185 All data traces as an 2-D ndarray, first dimension is time, second is channel. 

186 rate: float 

187 The sampling rate of the data in Hertz. 

188 

189 Raises 

190 ------ 

191 ImportError 

192 The soundfile module is not installed. 

193 * 

194 Loading of the data failed. 

195 """ 

196 if not audio_modules['soundfile']: 

197 raise ImportError 

198 

199 data = np.array([]) 

200 rate = 0.0 

201 with soundfile.SoundFile(filepath, 'r') as sf: 

202 rate = sf.samplerate 

203 data = sf.read(frames=-1, dtype='float64', always_2d=True) 

204 return data, float(rate) 

205 

206 

207def load_wavefile(filepath): 

208 """Load audio file using wavefile (based on libsndfile). 

209 

210 Documentation 

211 ------------- 

212 https://github.com/vokimon/python-wavefile 

213 

214 Parameters 

215 ---------- 

216 filepath: str or Path 

217 The full path and name of the file to load. 

218 

219 Returns 

220 ------- 

221 data: ndarray 

222 All data traces as an 2-D ndarray, first dimension is time, second is channel. 

223 rate: float 

224 The sampling rate of the data in Hertz. 

225 

226 Raises 

227 ------ 

228 ImportError 

229 The wavefile module is not installed. 

230 * 

231 Loading of the data failed. 

232 """ 

233 if not audio_modules['wavefile']: 

234 raise ImportError 

235 

236 rate, data = wavefile.load(os.fspath(filepath)) 

237 return data.astype(np.float64, copy=False).T, float(rate) 

238 

239 

240def load_audioread(filepath): 

241 """Load audio file using audioread. 

242 

243 Documentation 

244 ------------- 

245 https://github.com/beetbox/audioread 

246 

247 Parameters 

248 ---------- 

249 filepath: str or Path 

250 The full path and name of the file to load. 

251 

252 Returns 

253 ------- 

254 data: ndarray 

255 All data traces as an 2-D ndarray, first dimension is time, second is channel. 

256 rate: float 

257 The sampling rate of the data in Hertz. 

258 

259 Raises 

260 ------ 

261 ImportError 

262 The audioread module is not installed. 

263 * 

264 Loading of the data failed. 

265 """ 

266 if not audio_modules['audioread']: 

267 raise ImportError 

268 

269 data = np.array([]) 

270 rate = 0.0 

271 with audioread.audio_open(filepath) as af: 

272 rate = af.samplerate 

273 data = np.zeros((int(np.ceil(af.samplerate*af.duration)), af.channels), 

274 dtype="<i2") 

275 index = 0 

276 for buffer in af: 

277 fulldata = np.frombuffer(buffer, dtype='<i2').reshape(-1, af.channels) 

278 n = fulldata.shape[0] 

279 if index + n > len(data): 

280 n = len(fulldata) - index 

281 if n <= 0: 

282 break 

283 data[index:index+n,:] = fulldata[:n,:] 

284 index += n 

285 return data/(2.0**15-1.0), float(rate) 

286 

287 

288audio_loader_funcs = ( 

289 ('soundfile', load_soundfile), 

290 ('wave', load_wave), 

291 ('wavefile', load_wavefile), 

292 ('ewave', load_ewave), 

293 ('scipy.io.wavfile', load_wavfile), 

294 ('audioread', load_audioread), 

295 ) 

296"""List of implemented load() functions. 

297 

298Each element of the list is a tuple with the module's name and its 

299load() function. 

300 

301""" 

302 

303 

304def load_audio(filepath, verbose=0): 

305 """Call this function to load all channels of audio data from a file. 

306  

307 This function tries different python modules to load the audio file. 

308 

309 Parameters 

310 ---------- 

311 filepath: str or Path 

312 The full path and name of the file to load. 

313 verbose: int 

314 If larger than zero show detailed error/warning messages. 

315 

316 Returns 

317 ------- 

318 data: ndarray 

319 All data traces as an 2-D ndarray, even for single channel data. 

320 First dimension is time, second is channel. 

321 Data values range maximally between -1 and 1. 

322 rate: float 

323 The sampling rate of the data in Hertz. 

324 

325 Raises 

326 ------ 

327 FileNotFoundError 

328 `filepath` is not an existing file. 

329 EOFError 

330 File size of `filepath` is zero. 

331 IOError 

332 Failed to load data. 

333 

334 Examples 

335 -------- 

336 ``` 

337 import matplotlib.pyplot as plt 

338 from audioio import load_audio 

339  

340 data, rate = load_audio('some/audio.wav') 

341 plt.plot(np.arange(len(data))/rate, data[:,0]) 

342 plt.show() 

343 ``` 

344 """ 

345 # check values: 

346 filepath = Path(filepath) 

347 if not filepath.is_file: 

348 raise FileNotFoundError(f'file "{filepath}" not found') 

349 if filepath.stat().st_size <= 0: 

350 raise EOFError(f'file "{filepath}" is empty (size=0)!') 

351 

352 # load an audio file by trying various modules: 

353 not_installed = [] 

354 errors = [f'failed to load data from file "{filepath}":'] 

355 for lib, load_file in audio_loader_funcs: 

356 if not audio_modules[lib]: 

357 if verbose > 1: 

358 print(f'unable to load data from file "{filepath}" using {lib} module: module not available') 

359 not_installed.append(lib) 

360 continue 

361 try: 

362 data, rate = load_file(filepath) 

363 if len(data) > 0: 

364 if verbose > 0: 

365 print(f'loaded data from file "{filepath}" using {lib} module') 

366 if verbose > 1: 

367 print(f' sampling rate: {rate:g} Hz') 

368 print(f' channels : {data.shape[1]}') 

369 print(f' frames : {len(data)}') 

370 return data, rate 

371 except Exception as e: 

372 errors.append(f' {lib} failed: {str(e)}') 

373 if verbose > 1: 

374 print(errors[-1]) 

375 if len(not_installed) > 0: 

376 errors.append('\n You may need to install one of the ' + \ 

377 ', '.join(not_installed) + ' packages.') 

378 raise IOError('\n'.join(errors)) 

379 return np.zeros(0), 0.0 

380 

381 

382def metadata(filepath, store_empty=False): 

383 """Read metadata of an audio file. 

384 

385 Parameters 

386 ---------- 

387 filepath: str or file handle 

388 The audio file from which to read metadata. 

389 store_empty: bool 

390 If `False` do not return meta data with empty values. 

391 

392 Returns 

393 ------- 

394 meta_data: nested dict 

395 Meta data contained in the audio file. Keys of the nested 

396 dictionaries are always strings. If the corresponding values 

397 are dictionaries, then the key is the section name of the 

398 metadata contained in the dictionary. All other types of 

399 values are values for the respective key. In particular they 

400 are strings. But other types like for example ints or floats 

401 are also allowed. See `audioio.audiometadata` module for 

402 available functions to work with such metadata. 

403  

404 Raises 

405 ------ 

406 ValueError 

407 Not a RIFF file. 

408 

409 Examples 

410 -------- 

411 ``` 

412 from audioio import metadata, print_metadata 

413 md = metadata('data.wav') 

414 print_metadata(md) 

415 ``` 

416 

417 """ 

418 try: 

419 return metadata_riff(filepath, store_empty) 

420 except ValueError: # not a RIFF file 

421 return {} 

422 

423 

424def markers(filepath): 

425 """ Read markers of an audio file. 

426 

427 See `audioio.audiomarkers` module for available functions 

428 to work with markers. 

429 

430 Parameters 

431 ---------- 

432 filepath: str or file handle 

433 The audio file. 

434 

435 Returns 

436 ------- 

437 locs: 2-D ndarray of int 

438 Marker positions (first column) and spans (second column) 

439 for each marker (rows). 

440 labels: 2-D ndarray of string objects 

441 Labels (first column) and texts (second column) 

442 for each marker (rows). 

443 

444 Raises 

445 ------ 

446 ValueError 

447 Not a RIFF file. 

448  

449 Examples 

450 -------- 

451 ``` 

452 from audioio import markers, print_markers 

453 locs, labels = markers('data.wav') 

454 print_markers(locs, labels) 

455 ``` 

456 """ 

457 try: 

458 return markers_riff(filepath) 

459 except ValueError: # not a RIFF file 

460 return np.zeros((0, 2), dtype=int), np.zeros((0, 2), dtype=object) 

461 

462 

463class AudioLoader(BufferedArray): 

464 """Buffered reading of audio data for random access of the data in the file. 

465  

466 The class allows for reading very large audio files or many 

467 sequential audio files that do not fit into memory. 

468 An AudioLoader instance can be used like a huge read-only numpy array, i.e. 

469 ``` 

470 data = AudioLoader('path/to/audio/file.wav') 

471 x = data[10000:20000,0] 

472 ``` 

473 The first index specifies the frame, the second one the channel. 

474 

475 Behind the scenes, `AudioLoader` tries to open the audio file with 

476 all available audio modules until it succeeds (first line). It 

477 then reads data from the file as necessary for the requested data 

478 (second line). Accesing the content of the audio files via a 

479 buffer that holds only a part of the data is managed by the 

480 `BufferedArray` class. 

481 

482 Reading sequentially through the file is always possible. Some 

483 modules, however, (e.g. audioread, needed for mp3 files) can only 

484 read forward. If previous data are requested, then the file is read 

485 from the beginning again. This slows down access to previous data 

486 considerably. Use the `backsize` argument of the open function to 

487 make sure some data are loaded into the buffer before the requested 

488 frame. Then a subsequent access to the data within `backsize` seconds 

489 before that frame can still be handled without the need to reread 

490 the file from the beginning. 

491 

492 Usage 

493 ----- 

494 With context management: 

495 ``` 

496 import audioio as aio 

497 with aio.AudioLoader(filepath, 60.0, 10.0) as data: 

498 # do something with the content of the file: 

499 x = data[0:10000] 

500 y = data[10000:20000] 

501 z = x + y 

502 ``` 

503 

504 For using a specific audio module, here the audioread module: 

505 ``` 

506 data = aio.AudioLoader() 

507 with data.open_audioread(filepath, 60.0, 10.0): 

508 # do something ... 

509 ``` 

510 

511 Use `blocks()` for sequential, blockwise reading and processing: 

512 ``` 

513 from scipy.signal import spectrogram 

514 nfft = 2048 

515 with aio.AudioLoader('some/audio.wav') as data: 

516 for x in data.blocks(100*nfft, nfft//2): 

517 f, t, Sxx = spectrogram(x, fs=data.rate, 

518 nperseg=nfft, noverlap=nfft//2) 

519 ``` 

520 

521 For loop iterates over single frames (1-D arrays containing samples for each channel): 

522 ``` 

523 with aio.AudioLoader('some/audio.wav') as data: 

524 for x in data: 

525 print(x) 

526 ``` 

527  

528 Traditional open and close: 

529 ``` 

530 data = aio.AudioLoader(filepath, 60.0) 

531 x = data[:,:] # read the whole file 

532 data.close() 

533 ``` 

534  

535 this is the same as: 

536 ``` 

537 data = aio.AudioLoader() 

538 data.open(filepath, 60.0) 

539 ... 

540 ``` 

541 

542 Classes inheriting AudioLoader just need to implement 

543 ``` 

544 self.load_audio_buffer(offset, nsamples, pbuffer) 

545 ``` 

546 This function needs to load the supplied `pbuffer` with 

547 `nframes` frames of data starting at frame `offset`. 

548 

549 In the constructor or some kind of opening function, you need to 

550 set some member variables, as described for `BufferedArray`. 

551 

552 For loading metadata and markers, implement the functions 

553 ``` 

554 self._load_metadata(filepath, **kwargs) 

555 self._load_markers(filepath) 

556 ``` 

557  

558 Parameters 

559 ---------- 

560 filepath: str or Path or list of str of list of Path 

561 Name of the file or list of many file names that should be 

562 made accessible as a single array. 

563 buffersize: float 

564 Size of internal buffer in seconds. 

565 backsize: float 

566 Part of the buffer to be loaded before the requested start index in seconds. 

567 verbose: int 

568 If larger than zero show detailed error/warning messages. 

569 store_empty: bool 

570 If `False` do not return meta data with empty values. 

571 meta_kwargs: dict 

572 Keyword arguments that are passed on to the _load_metadata() 

573 function. For audio data the only recognized key is 

574 `store_empty` - see the metadata() function for more infos. 

575 **kwargs: dict 

576 Further keyword arguments that are passed on to the  

577 specific open() functions. 

578 

579 Attributes 

580 ---------- 

581 filepath: Path 

582 Full path of the opened file. In case of many files, the first one. 

583 file_paths: list of Path 

584 List of pathes of the opened files that are made accessible 

585 as a single array. 

586 file_indices: list of int 

587 For each file the index of its first sample. 

588 rate: float 

589 The sampling rate of the data in seconds. 

590 channels: int 

591 The number of channels. 

592 frames: int 

593 The number of frames in the file. Same as `len()`. 

594 format: str or None 

595 Format of the audio file. 

596 encoding: str or None 

597 Encoding/subtype of the audio file. 

598 shape: tuple 

599 Frames and channels of the data. 

600 ndim: int 

601 Number of dimensions: always 2 (frames and channels). 

602 offset: int 

603 Index of first frame in the current buffer. 

604 buffer: ndarray of floats 

605 The curently available data from the file. 

606 ampl_min: float 

607 Minimum amplitude the file format supports. 

608 Always -1.0 for audio data. 

609 ampl_max: float 

610 Maximum amplitude the file format supports. 

611 Always +1.0 for audio data. 

612 

613 Methods 

614 ------- 

615 - `len()`: Number of frames. 

616 - `file_start_times()`: time of first frame of each file in seconds. 

617 - `get_file_index()`: file path and index of frame contained by this file. 

618 - `open()`: Open an audio file by trying available audio modules. 

619 - `open_*()`: Open an audio file with the respective audio module. 

620 - `__getitem__`: Access data of the audio file. 

621 - `update_buffer()`: Update the internal buffer for a range of frames. 

622 - `blocks()`: Generator for blockwise processing of AudioLoader data. 

623 - `file_start_times()`: Time of first frame of each file in seconds. 

624 - `get_file_index()`: File path and index of frame contained by this file. 

625 - `basename()`: Base name of the audio data. 

626 - `format_dict()`: technical infos about how the data are stored. 

627 - `metadata()`: Metadata stored along with the audio data. 

628 - `markers()`: Markers stored along with the audio data. 

629 - `set_unwrap()`: Set parameters for unwrapping clipped data. 

630 - `set_time_delta()`: Set maximum allowed time difference between successive files. 

631 - `close()`: Close the file. 

632 

633 """ 

634 

635 max_open_files = 5 

636 """ Suggestion for maximum number of open file descriptors. """ 

637 

638 max_open_loaders = 10 

639 """ Suggestion for maximum number of AudioLoaders when opening multiple files. """ 

640 

641 def __init__(self, filepath=None, buffersize=10.0, backsize=0.0, 

642 verbose=0, meta_kwargs={}, **kwargs): 

643 super().__init__(verbose=verbose) 

644 self.format = None 

645 self.encoding = None 

646 self._metadata = None 

647 self._locs = None 

648 self._labels = None 

649 self._load_metadata = metadata 

650 self._load_markers = markers 

651 self._metadata_kwargs = meta_kwargs 

652 self.filepath = None 

653 self.file_paths = None 

654 self.file_indices = [] 

655 self._max_time_diff = 1 

656 self.sf = None 

657 self.close = self._close 

658 self.load_buffer = self._load_buffer_unwrap 

659 self.ampl_min = -1.0 

660 self.ampl_max = +1.0 

661 self.unwrap = False 

662 self.unwrap_thresh = 0.0 

663 self.unwrap_clips = False 

664 self.unwrap_ampl = 1.0 

665 self.unwrap_downscale = True 

666 if filepath is not None: 

667 self.open(filepath, buffersize, backsize, verbose, **kwargs) 

668 

669 numpy_encodings = {np.dtype(np.int64): 'PCM_64', 

670 np.dtype(np.int32): 'PCM_32', 

671 np.dtype(np.int16): 'PCM_16', 

672 np.dtype(np.single): 'FLOAT', 

673 np.dtype(np.double): 'DOUBLE', 

674 np.dtype('>f4'): 'FLOAT', 

675 np.dtype('>f8'): 'DOUBLE'} 

676 """ Map numpy dtypes to encodings. 

677 """ 

678 

679 def _close(self): 

680 pass 

681 

682 def __del__(self): 

683 self.close() 

684 

685 def file_start_times(self): 

686 """ Time of first frame of each file in seconds. 

687  

688 Returns 

689 ------- 

690 times: array of float 

691 Time of the first frame of each file relative to buffer start 

692 in seconds. 

693 """ 

694 times = [] 

695 for idx in self.file_indices: 

696 times.append(idx/self.rate) 

697 return np.array(times) 

698 

699 def get_file_index(self, frame): 

700 """ File path and index of frame contained by this file. 

701 

702 Parameters 

703 ---------- 

704 frame: int 

705 Index of frame. 

706  

707 Returns 

708 ------- 

709 filepath: Path 

710 Path of file that contains the frame. 

711 index: int 

712 Index of the frame relative to the first frame 

713 in the containing file. 

714  

715 Raises 

716 ------ 

717 ValueError 

718 Invalid frame index. 

719 """ 

720 if frame < 0 or frame >= self.frames: 

721 raise ValueError('invalid frame') 

722 fname = self.file_paths[0] 

723 index = self.file_indices[0] 

724 for i in reversed(range(len(self.file_indices))): 

725 if self.file_indices[i] <= frame: 

726 fname = self.file_paths[i] 

727 index = self.file_indices[i] 

728 break 

729 return fname, frame - index 

730 

731 def basename(self, path=None): 

732 """ Base name of the audio data. 

733 

734 Parameters 

735 ---------- 

736 path: str or Path or None 

737 Path of the audio file from which a base name is generated. 

738 If `None`, use `self.filepath`. 

739 

740 Returns 

741 ------- 

742 s: str 

743 The name. Defaults to the stem of `path`. 

744 

745 """ 

746 if path is None: 

747 path = self.filepath 

748 return Path(path).stem 

749 

750 def format_dict(self): 

751 """ Technical infos about how the data are stored in the file. 

752 

753 Returns 

754 ------- 

755 fmt: dict 

756 Dictionary with filepath, format, encoding, samplingrate, 

757 channels, frames, and duration of the audio file as strings. 

758 

759 """ 

760 fmt = dict(name=self.basename(), filepath=os.fsdecode(self.filepath)) 

761 if self.format is not None: 

762 fmt['format'] = self.format 

763 if self.encoding is not None: 

764 fmt['encoding'] = self.encoding 

765 fmt.update(dict(samplingrate=f'{self.rate:.0f}Hz', 

766 channels=self.channels, 

767 frames=self.frames, 

768 duration=f'{self.frames/self.rate:.3f}s')) 

769 return fmt 

770 

771 def metadata(self): 

772 """Metadata of the audio file. 

773 

774 Parameters 

775 ---------- 

776 store_empty: bool 

777 If `False` do not add meta data with empty values. 

778 

779 Returns 

780 ------- 

781 meta_data: nested dict 

782 

783 Meta data contained in the audio file. Keys of the nested 

784 dictionaries are always strings. If the corresponding 

785 values are dictionaries, then the key is the section name 

786 of the metadata contained in the dictionary. All other 

787 types of values are values for the respective key. In 

788 particular they are strings. But other types like for 

789 example ints or floats are also allowed. See 

790 `audioio.audiometadata` module for available functions to 

791 work with such metadata. 

792 

793 """ 

794 if self._metadata is None: 

795 if self._load_metadata is None: 

796 self._metadata = {} 

797 else: 

798 self._metadata = self._load_metadata(self.filepath, 

799 **self._metadata_kwargs) 

800 return self._metadata 

801 

802 def markers(self): 

803 """Read markers of the audio file. 

804 

805 See `audioio.audiomarkers` module for available functions 

806 to work with markers. 

807 

808 Returns 

809 ------- 

810 locs: 2-D ndarray of int 

811 Marker positions (first column) and spans (second column) 

812 for each marker (rows). 

813 labels: 2-D ndarray of str objects 

814 Labels (first column) and texts (second column) 

815 for each marker (rows). 

816 """ 

817 if self._locs is None: 

818 if self._load_markers is None: 

819 self._locs = np.zeros((0, 2), dtype=int) 

820 self._labels = np.zeros((0, 2), dtype=object) 

821 else: 

822 self._locs, self._labels = self._load_markers(self.filepath) 

823 return self._locs, self._labels 

824 

825 def set_unwrap(self, thresh, clips=False, down_scale=True, unit=''): 

826 """Set parameters for unwrapping clipped data. 

827 

828 See unwrap() function from the audioio package. 

829 

830 Parameters 

831 ---------- 

832 thresh: float 

833 Threshold for detecting wrapped data relative to self.unwrap_ampl 

834 which is initially set to self.ampl_max. 

835 If zero, do not unwrap. 

836 clips: bool 

837 If True, then clip the unwrapped data properly. 

838 Otherwise, unwrap the data and double the 

839 minimum and maximum data range 

840 (self.ampl_min and self.ampl_max). 

841 down_scale: bool 

842 If not `clips`, then downscale the signal by a factor of two, 

843 in order to keep the range between -1 and 1. 

844 unit: str 

845 Unit of the data. 

846 """ 

847 self.unwrap_ampl = self.ampl_max 

848 self.unwrap_thresh = thresh 

849 self.unwrap_clips = clips 

850 self.unwrap_down_scale = down_scale 

851 self.unwrap = thresh > 1e-3 

852 if self.unwrap: 

853 if self.unwrap_clips: 

854 add_unwrap(self.metadata(), 

855 self.unwrap_thresh*self.unwrap_ampl, 

856 self.unwrap_ampl, unit) 

857 elif down_scale: 

858 update_gain(self.metadata(), 0.5) 

859 add_unwrap(self.metadata(), 

860 0.5*self.unwrap_thresh*self.unwrap_ampl, 

861 0.0, unit) 

862 else: 

863 self.ampl_min *= 2 

864 self.ampl_max *= 2 

865 add_unwrap(self.metadata(), 

866 self.unwrap_thresh*self.unwrap_ampl, 

867 0.0, unit) 

868 

869 def _load_buffer_unwrap(self, r_offset, r_size, pbuffer): 

870 """Load new data and unwrap it. 

871 

872 Parameters 

873 ---------- 

874 r_offset: int 

875 First frame to be read from file. 

876 r_size: int 

877 Number of frames to be read from file. 

878 pbuffer: ndarray 

879 Buffer where to store the loaded data. 

880 """ 

881 self.load_audio_buffer(r_offset, r_size, pbuffer) 

882 if self.unwrap: 

883 # TODO: handle edge effects! 

884 unwrap(pbuffer, self.unwrap_thresh, self.unwrap_ampl) 

885 if self.unwrap_clips: 

886 pbuffer[pbuffer > self.ampl_max] = self.ampl_max 

887 pbuffer[pbuffer < self.ampl_min] = self.ampl_min 

888 elif self.unwrap_down_scale: 

889 pbuffer *= 0.5 

890 

891 def set_time_delta(time_delta): 

892 """ Set maximum allowed time difference between successive files. 

893 

894 Parameters 

895 ---------- 

896 time_delta: int 

897 Maximum number of seconds the start time of a recording file is allowed 

898 to differ from the end of the previous file. 

899 Default is one second. 

900 """ 

901 self._max_time_diff = time_delta 

902 

903 # wave interface:  

904 def open_wave(self, filepath, buffersize=10.0, backsize=0.0, 

905 verbose=0): 

906 """Open audio file for reading using the wave module. 

907 

908 Note: we assume that setpos() and tell() use integer numbers! 

909 

910 Parameters 

911 ---------- 

912 filepath: str or Path 

913 Name of the file. 

914 buffersize: float 

915 Size of internal buffer in seconds. 

916 backsize: float 

917 Part of the buffer to be loaded before the requested start index in seconds. 

918 verbose: int 

919 If larger than zero show detailed error/warning messages. 

920 

921 Raises 

922 ------ 

923 ImportError 

924 The wave module is not installed 

925 """ 

926 self.verbose = verbose 

927 if self.verbose > 0: 

928 print(f'open_wave(filepath) with filepath={filepath}') 

929 if not audio_modules['wave']: 

930 self.rate = 0.0 

931 self.channels = 0 

932 self.frames = 0 

933 self.size = 0 

934 self.shape = (0, 0) 

935 self.offset = 0 

936 raise ImportError 

937 if self.sf is not None: 

938 self._close_wave() 

939 self.sf = wave.open(os.fspath(filepath), 'r') 

940 self.filepath = Path(filepath) 

941 self.file_paths = [self.filepath] 

942 self.file_indices = [0] 

943 self.rate = float(self.sf.getframerate()) 

944 self.format = 'WAV' 

945 sampwidth = self.sf.getsampwidth() 

946 if sampwidth == 1: 

947 self.dtype = 'u1' 

948 self.encoding = 'PCM_U8' 

949 else: 

950 self.dtype = f'i{sampwidth}' 

951 self.encoding = f'PCM_{sampwidth*8}' 

952 self.factor = 1.0/(2.0**(sampwidth*8-1)) 

953 self.channels = self.sf.getnchannels() 

954 self.frames = self.sf.getnframes() 

955 self.shape = (self.frames, self.channels) 

956 self.size = self.frames * self.channels 

957 self.bufferframes = int(buffersize*self.rate) 

958 self.backframes = int(backsize*self.rate) 

959 self.init_buffer() 

960 self.close = self._close_wave 

961 self.load_audio_buffer = self._load_buffer_wave 

962 # read 1 frame to determine the unit of the position values: 

963 self.p0 = self.sf.tell() 

964 self.sf.readframes(1) 

965 self.pfac = self.sf.tell() - self.p0 

966 self.sf.setpos(self.p0) 

967 return self 

968 

969 def _close_wave(self): 

970 """Close the audio file using the wave module. """ 

971 if self.sf is not None: 

972 self.sf.close() 

973 self.sf = None 

974 

975 def _load_buffer_wave(self, r_offset, r_size, buffer): 

976 """Load new data from file using the wave module. 

977 

978 Parameters 

979 ---------- 

980 r_offset: int 

981 First frame to be read from file. 

982 r_size: int 

983 Number of frames to be read from file. 

984 buffer: ndarray 

985 Buffer where to store the loaded data. 

986 """ 

987 if self.sf is None: 

988 self.sf = wave.open(os.fspath(self.filepath), 'r') 

989 self.sf.setpos(r_offset*self.pfac + self.p0) 

990 fbuffer = self.sf.readframes(r_size) 

991 fbuffer = np.frombuffer(fbuffer, dtype=self.dtype).reshape((-1, self.channels)) 

992 if self.dtype[0] == 'u': 

993 buffer[:, :] = fbuffer * self.factor - 1.0 

994 else: 

995 buffer[:, :] = fbuffer * self.factor 

996 

997 

998 # ewave interface:  

999 def open_ewave(self, filepath, buffersize=10.0, backsize=0.0, 

1000 verbose=0): 

1001 """Open audio file for reading using the ewave module. 

1002 

1003 Parameters 

1004 ---------- 

1005 filepath: str or Path 

1006 Name of the file. 

1007 buffersize: float 

1008 Size of internal buffer in seconds. 

1009 backsize: float 

1010 Part of the buffer to be loaded before the requested start index in seconds. 

1011 verbose: int 

1012 If larger than zero show detailed error/warning messages. 

1013 

1014 Raises 

1015 ------ 

1016 ImportError 

1017 The ewave module is not installed. 

1018 """ 

1019 self.verbose = verbose 

1020 if self.verbose > 0: 

1021 print(f'open_ewave(filepath) with filepath={filepath}') 

1022 if not audio_modules['ewave']: 

1023 self.rate = 0.0 

1024 self.channels = 0 

1025 self.frames = 0 

1026 self.shape = (0, 0) 

1027 self.size = 0 

1028 self.offset = 0 

1029 raise ImportError 

1030 if self.sf is not None: 

1031 self._close_ewave() 

1032 self.sf = ewave.open(os.fspath(filepath), 'r') 

1033 self.filepath = Path(filepath) 

1034 self.file_paths = [self.filepath] 

1035 self.file_indices = [0] 

1036 self.rate = float(self.sf.sampling_rate) 

1037 self.channels = self.sf.nchannels 

1038 self.frames = self.sf.nframes 

1039 self.shape = (self.frames, self.channels) 

1040 self.size = self.frames * self.channels 

1041 self.format = 'WAV' # or WAVEX? 

1042 self.encoding = self.numpy_encodings[self.sf.dtype] 

1043 self.bufferframes = int(buffersize*self.rate) 

1044 self.backframes = int(backsize*self.rate) 

1045 self.init_buffer() 

1046 self.close = self._close_ewave 

1047 self.load_audio_buffer = self._load_buffer_ewave 

1048 return self 

1049 

1050 def _close_ewave(self): 

1051 """Close the audio file using the ewave module. """ 

1052 if self.sf is not None: 

1053 del self.sf 

1054 self.sf = None 

1055 

1056 def _load_buffer_ewave(self, r_offset, r_size, buffer): 

1057 """Load new data from file using the ewave module. 

1058 

1059 Parameters 

1060 ---------- 

1061 r_offset: int 

1062 First frame to be read from file. 

1063 r_size: int 

1064 Number of frames to be read from file. 

1065 buffer: ndarray 

1066 Buffer where to store the loaded data. 

1067 """ 

1068 if self.sf is None: 

1069 self.sf = ewave.open(os.fspath(self.filepath), 'r') 

1070 fbuffer = self.sf.read(frames=r_size, offset=r_offset, memmap='r') 

1071 fbuffer = ewave.rescale(fbuffer, 'float') 

1072 if len(fbuffer.shape) == 1: 

1073 fbuffer = np.reshape(fbuffer,(-1, 1)) 

1074 buffer[:,:] = fbuffer 

1075 

1076 

1077 # soundfile interface:  

1078 def open_soundfile(self, filepath, buffersize=10.0, backsize=0.0, 

1079 verbose=0): 

1080 """Open audio file for reading using the SoundFile module. 

1081 

1082 Parameters 

1083 ---------- 

1084 filepath: str or Path 

1085 Name of the file. 

1086 bufferframes: float 

1087 Size of internal buffer in seconds. 

1088 backsize: float 

1089 Part of the buffer to be loaded before the requested start index in seconds. 

1090 verbose: int 

1091 If larger than zero show detailed error/warning messages. 

1092 

1093 Raises 

1094 ------ 

1095 ImportError 

1096 The SoundFile module is not installed 

1097 """ 

1098 self.verbose = verbose 

1099 if self.verbose > 0: 

1100 print(f'open_soundfile(filepath) with filepath={filepath}') 

1101 if not audio_modules['soundfile']: 

1102 self.rate = 0.0 

1103 self.channels = 0 

1104 self.frames = 0 

1105 self.shape = (0, 0) 

1106 self.size = 0 

1107 self.offset = 0 

1108 raise ImportError 

1109 if self.sf is not None: 

1110 self._close_soundfile() 

1111 self.sf = soundfile.SoundFile(filepath, 'r') 

1112 self.filepath = Path(filepath) 

1113 self.file_paths = [self.filepath] 

1114 self.file_indices = [0] 

1115 self.rate = float(self.sf.samplerate) 

1116 self.channels = self.sf.channels 

1117 self.frames = 0 

1118 self.size = 0 

1119 if self.sf.seekable(): 

1120 self.frames = self.sf.seek(0, soundfile.SEEK_END) 

1121 self.sf.seek(0, soundfile.SEEK_SET) 

1122 # TODO: if not seekable, we cannot handle that file! 

1123 self.shape = (self.frames, self.channels) 

1124 self.size = self.frames * self.channels 

1125 self.format = self.sf.format 

1126 self.encoding = self.sf.subtype 

1127 self.bufferframes = int(buffersize*self.rate) 

1128 self.backframes = int(backsize*self.rate) 

1129 self.init_buffer() 

1130 self.close = self._close_soundfile 

1131 self.load_audio_buffer = self._load_buffer_soundfile 

1132 return self 

1133 

1134 def _close_soundfile(self): 

1135 """Close the audio file using the SoundFile module. """ 

1136 if self.sf is not None: 

1137 self.sf.close() 

1138 self.sf = None 

1139 

1140 def _load_buffer_soundfile(self, r_offset, r_size, buffer): 

1141 """Load new data from file using the SoundFile module. 

1142 

1143 Parameters 

1144 ---------- 

1145 r_offset: int 

1146 First frame to be read from file. 

1147 r_size: int 

1148 Number of frames to be read from file. 

1149 buffer: ndarray 

1150 Buffer where to store the loaded data. 

1151 """ 

1152 if self.sf is None: 

1153 self.sf = soundfile.SoundFile(self.filepath, 'r') 

1154 self.sf.seek(r_offset, soundfile.SEEK_SET) 

1155 buffer[:, :] = self.sf.read(r_size, always_2d=True) 

1156 

1157 

1158 # wavefile interface:  

1159 def open_wavefile(self, filepath, buffersize=10.0, backsize=0.0, 

1160 verbose=0): 

1161 """Open audio file for reading using the wavefile module. 

1162 

1163 Parameters 

1164 ---------- 

1165 filepath: str or Path 

1166 Name of the file. 

1167 bufferframes: float 

1168 Size of internal buffer in seconds. 

1169 backsize: float 

1170 Part of the buffer to be loaded before the requested start index in seconds. 

1171 verbose: int 

1172 If larger than zero show detailed error/warning messages. 

1173 

1174 Raises 

1175 ------ 

1176 ImportError 

1177 The wavefile module is not installed 

1178 """ 

1179 self.verbose = verbose 

1180 if self.verbose > 0: 

1181 print(f'open_wavefile(filepath) with filepath={filepath}') 

1182 if not audio_modules['wavefile']: 

1183 self.rate = 0.0 

1184 self.channels = 0 

1185 self.frames = 0 

1186 self.shape = (0, 0) 

1187 self.size = 0 

1188 self.offset = 0 

1189 raise ImportError 

1190 if self.sf is not None: 

1191 self._close_wavefile() 

1192 self.sf = wavefile.WaveReader(os.fspath(filepath)) 

1193 self.filepath = Path(filepath) 

1194 self.file_paths = [self.filepath] 

1195 self.file_indices = [0] 

1196 self.rate = float(self.sf.samplerate) 

1197 self.channels = self.sf.channels 

1198 self.frames = self.sf.frames 

1199 self.shape = (self.frames, self.channels) 

1200 self.size = self.frames * self.channels 

1201 # get format and encoding: 

1202 for attr in dir(wavefile.Format): 

1203 v = getattr(wavefile.Format, attr) 

1204 if isinstance(v, int): 

1205 if v & wavefile.Format.TYPEMASK > 0 and \ 

1206 (self.sf.format & wavefile.Format.TYPEMASK) == v: 

1207 self.format = attr 

1208 if v & wavefile.Format.SUBMASK > 0 and \ 

1209 (self.sf.format & wavefile.Format.SUBMASK) == v: 

1210 self.encoding = attr 

1211 # init buffer: 

1212 self.bufferframes = int(buffersize*self.rate) 

1213 self.backframes = int(backsize*self.rate) 

1214 self.init_buffer() 

1215 self.close = self._close_wavefile 

1216 self.load_audio_buffer = self._load_buffer_wavefile 

1217 return self 

1218 

1219 def _close_wavefile(self): 

1220 """Close the audio file using the wavefile module. """ 

1221 if self.sf is not None: 

1222 self.sf.close() 

1223 self.sf = None 

1224 

1225 def _load_buffer_wavefile(self, r_offset, r_size, buffer): 

1226 """Load new data from file using the wavefile module. 

1227 

1228 Parameters 

1229 ---------- 

1230 r_offset: int 

1231 First frame to be read from file. 

1232 r_size: int 

1233 Number of frames to be read from file. 

1234 buffer: ndarray 

1235 Buffer where to store the loaded data. 

1236 """ 

1237 if self.sf is None: 

1238 self.sf = wavefile.WaveReader(os.fspath(self.filepath)) 

1239 self.sf.seek(r_offset, wavefile.Seek.SET) 

1240 fbuffer = self.sf.buffer(r_size, dtype=self.buffer.dtype) 

1241 self.sf.read(fbuffer) 

1242 buffer[:,:] = fbuffer.T 

1243 

1244 

1245 # audioread interface:  

1246 def open_audioread(self, filepath, buffersize=10.0, backsize=0.0, 

1247 verbose=0): 

1248 """Open audio file for reading using the audioread module. 

1249 

1250 Note, that audioread can only read forward, therefore random and 

1251 backward access is really slow. 

1252 

1253 Parameters 

1254 ---------- 

1255 filepath: str or Path 

1256 Name of the file. 

1257 bufferframes: float 

1258 Size of internal buffer in seconds. 

1259 backsize: float 

1260 Part of the buffer to be loaded before the requested start index in seconds. 

1261 verbose: int 

1262 If larger than zero show detailed error/warning messages. 

1263 

1264 Raises 

1265 ------ 

1266 ImportError 

1267 The audioread module is not installed 

1268 """ 

1269 self.verbose = verbose 

1270 if self.verbose > 0: 

1271 print(f'open_audioread(filepath) with filepath={filepath}') 

1272 if not audio_modules['audioread']: 

1273 self.rate = 0.0 

1274 self.channels = 0 

1275 self.frames = 0 

1276 self.shape = (0, 0) 

1277 self.size = 0 

1278 self.offset = 0 

1279 raise ImportError 

1280 if self.sf is not None: 

1281 self._close_audioread() 

1282 self.sf = audioread.audio_open(filepath) 

1283 self.filepath = Path(filepath) 

1284 self.file_paths = [self.filepath] 

1285 self.file_indices = [0] 

1286 self.rate = float(self.sf.samplerate) 

1287 self.channels = self.sf.channels 

1288 self.frames = int(np.ceil(self.rate*self.sf.duration)) 

1289 self.shape = (self.frames, self.channels) 

1290 self.size = self.frames * self.channels 

1291 self.bufferframes = int(buffersize*self.rate) 

1292 self.backframes = int(backsize*self.rate) 

1293 self.init_buffer() 

1294 self.read_buffer = np.zeros((0,0)) 

1295 self.read_offset = 0 

1296 self.close = self._close_audioread 

1297 self.load_audio_buffer = self._load_buffer_audioread 

1298 self.sf_iter = self.sf.__iter__() 

1299 return self 

1300 

1301 def _close_audioread(self): 

1302 """Close the audio file using the audioread module. """ 

1303 if self.sf is not None: 

1304 self.sf.__exit__(None, None, None) 

1305 self.sf = None 

1306 

1307 def _load_buffer_audioread(self, r_offset, r_size, buffer): 

1308 """Load new data from file using the audioread module. 

1309 

1310 audioread can only iterate through a file once and in blocksizes that are 

1311 given by audioread. Therefore we keep yet another buffer: `self.read_buffer` 

1312 at file offset `self.read_offset` containing whatever audioread returned. 

1313 

1314 Parameters 

1315 ---------- 

1316 r_offset: int 

1317 First frame to be read from file. 

1318 r_size: int 

1319 Number of frames to be read from file. 

1320 buffer: ndarray 

1321 Buffer where to store the loaded data. 

1322 """ 

1323 if self.sf is None: 

1324 self.sf = audioread.audio_open(self.filepath) 

1325 b_offset = 0 

1326 if ( self.read_offset + self.read_buffer.shape[0] >= r_offset + r_size 

1327 and self.read_offset < r_offset + r_size ): 

1328 # read_buffer overlaps at the end of the requested interval: 

1329 i = 0 

1330 n = r_offset + r_size - self.read_offset 

1331 if n > r_size: 

1332 i += n - r_size 

1333 n = r_size 

1334 buffer[self.read_offset+i-r_offset:self.read_offset+i+n-r_offset,:] = self.read_buffer[i:i+n,:] / (2.0**15-1.0) 

1335 if self.verbose > 2: 

1336 print(f' recycle {n:6d} frames from the front of the read buffer at {self.read_offset}-{self.read_offset+n} ({self.read_offset-self.offset}-{self.read_offset-self.offset+n} in buffer)') 

1337 r_size -= n 

1338 if r_size <= 0: 

1339 return 

1340 # go back to beginning of file: 

1341 if r_offset < self.read_offset: 

1342 if self.verbose > 2: 

1343 print(' rewind') 

1344 self._close_audioread() 

1345 self.sf = audioread.audio_open(self.filepath) 

1346 self.sf_iter = self.sf.__iter__() 

1347 self.read_buffer = np.zeros((0,0)) 

1348 self.read_offset = 0 

1349 # read to position: 

1350 while self.read_offset + self.read_buffer.shape[0] < r_offset: 

1351 self.read_offset += self.read_buffer.shape[0] 

1352 try: 

1353 if hasattr(self.sf_iter, 'next'): 

1354 fbuffer = self.sf_iter.next() 

1355 else: 

1356 fbuffer = next(self.sf_iter) 

1357 except StopIteration: 

1358 self.read_buffer = np.zeros((0,0)) 

1359 buffer[:,:] = 0.0 

1360 if self.verbose > 1: 

1361 print(f' caught StopIteration, padded buffer with {r_size} zeros') 

1362 break 

1363 self.read_buffer = np.frombuffer(fbuffer, dtype='<i2').reshape(-1, self.channels) 

1364 if self.verbose > 2: 

1365 print(f' read forward by {self.read_buffer.shape[0]} frames') 

1366 # recycle file data: 

1367 if ( self.read_offset + self.read_buffer.shape[0] > r_offset 

1368 and self.read_offset <= r_offset ): 

1369 i = r_offset - self.read_offset 

1370 n = self.read_offset + self.read_buffer.shape[0] - r_offset 

1371 if n > r_size: 

1372 n = r_size 

1373 buffer[:n,:] = self.read_buffer[i:i+n,:] / (2.0**15-1.0) 

1374 if self.verbose > 2: 

1375 print(f' recycle {n:6d} frames from the end of the read buffer at {self.read_offset}-{self.read_offset + self.read_buffer.shape[0]} to {r_offset}-{r_offset+n} ({r_offset-self.offset}-{r_offset+n-self.offset} in buffer)') 

1376 b_offset += n 

1377 r_offset += n 

1378 r_size -= n 

1379 # read data: 

1380 if self.verbose > 2 and r_size > 0: 

1381 print(f' read {r_size:6d} frames at {r_offset}-{r_offset+r_size} ({r_offset-self.offset}-{r_offset+r_size-self.offset} in buffer)') 

1382 while r_size > 0: 

1383 self.read_offset += self.read_buffer.shape[0] 

1384 try: 

1385 if hasattr(self.sf_iter, 'next'): 

1386 fbuffer = self.sf_iter.next() 

1387 else: 

1388 fbuffer = next(self.sf_iter) 

1389 except StopIteration: 

1390 self.read_buffer = np.zeros((0,0)) 

1391 buffer[b_offset:,:] = 0.0 

1392 if self.verbose > 1: 

1393 print(f' caught StopIteration, padded buffer with {r_size} zeros') 

1394 break 

1395 self.read_buffer = np.frombuffer(fbuffer, dtype='<i2').reshape(-1, self.channels) 

1396 n = self.read_buffer.shape[0] 

1397 if n > r_size: 

1398 n = r_size 

1399 if n > 0: 

1400 buffer[b_offset:b_offset+n,:] = self.read_buffer[:n,:] / (2.0**15-1.0) 

1401 if self.verbose > 2: 

1402 print(f' read {n:6d} frames to {r_offset}-{r_offset+n} ({r_offset-self.offset}-{r_offset+n-self.offset} in buffer)') 

1403 b_offset += n 

1404 r_offset += n 

1405 r_size -= n 

1406 

1407 

1408 # open multiple audio files as one: 

1409 def open_multiple(self, filepaths, buffersize=10.0, backsize=0.0, 

1410 verbose=0, rate=None, channels=None, end_indices=None): 

1411 """Open multiple audio files as a single concatenated array. 

1412 

1413 Parameters 

1414 ---------- 

1415 filepaths: list of str or Path 

1416 List of file paths of audio files. 

1417 buffersize: float 

1418 Size of internal buffer in seconds. 

1419 backsize: float 

1420 Part of the buffer to be loaded before the requested start index in seconds. 

1421 verbose: int 

1422 If larger than zero show detailed error/warning messages. 

1423 rate: float 

1424 If provided, do a minimal initialization (no checking) 

1425 using the provided sampling rate (in Hertz), channels, 

1426 and end_indices. 

1427 channels: int 

1428 If provided, do a minimal initialization (no checking) 

1429 using the provided rate, number of channels, and end_indices. 

1430 end_indices: sequence of int 

1431 If provided, do a minimal initialization (no checking) 

1432 using the provided rate, channels, and end_indices. 

1433 

1434 Raises 

1435 ------ 

1436 TypeError 

1437 `filepaths` must be a sequence. 

1438 ValueError 

1439 Empty `filepaths`. 

1440 FileNotFoundError 

1441 `filepaths` does not contain a single valid file. 

1442 

1443 """ 

1444 if not isinstance(filepaths, (list, tuple, np.ndarray)): 

1445 raise TypeError('input argument filepaths is not a sequence!') 

1446 if len(filepaths) == 0: 

1447 raise ValueError('input argument filepaths is empy sequence!') 

1448 self.buffersize = buffersize 

1449 self.backsize = backsize 

1450 self.filepath = None 

1451 self.file_paths = [] 

1452 self.open_files = [] 

1453 self.open_loaders = [] 

1454 self.audio_files = [] 

1455 self.collect_counter = 0 

1456 self.frames = 0 

1457 self.start_indices = [] 

1458 self.end_indices = [] 

1459 self.start_time = None 

1460 start_time = None 

1461 self._metadata = {} 

1462 self._locs = np.zeros((0, 2), dtype=int) 

1463 self._labels = np.zeros((0, 2), dtype=object) 

1464 if end_indices is not None: 

1465 self.filepath = Path(filepaths[0]) 

1466 self.file_paths = [Path(fp) for fp in filepaths] 

1467 self.audio_files = [None] * len(filepaths) 

1468 self.frames = end_indices[-1] 

1469 self.start_indices = [0] + list(end_indices[:-1]) 

1470 self.end_indices = end_indices 

1471 self.format = None 

1472 self.encoding = None 

1473 self.rate = rate 

1474 self.channels = channels 

1475 else: 

1476 for filepath in filepaths: 

1477 try: 

1478 a = AudioLoader(filepath, buffersize, backsize, verbose) 

1479 except Exception as e: 

1480 if verbose > 0: 

1481 print(e) 

1482 continue 

1483 # collect metadata: 

1484 md = a.metadata() 

1485 fmd = flatten_metadata(md, True) 

1486 add_metadata(self._metadata, fmd) 

1487 if self.filepath is None: 

1488 # first file: 

1489 self.filepath = a.filepath 

1490 self.format = a.format 

1491 self.encoding = a.encoding 

1492 self.rate = a.rate 

1493 self.channels = a.channels 

1494 self.start_time = get_datetime(md) 

1495 start_time = self.start_time 

1496 stime = self.start_time 

1497 else: 

1498 # check channels and rate: 

1499 error_str = None 

1500 if a.channels != self.channels: 

1501 error_str = f'number of channels differs: ' \ 

1502 f'{a.channels} in {a.filepath} versus ' \ 

1503 f'{self.channels} in {self.filepath}' 

1504 if a.rate != self.rate: 

1505 error_str = f'sampling rates differ: ' \ 

1506 f'{a.rate} in {a.filepath} versus ' \ 

1507 f'{self.rate} in {self.filepath}' 

1508 # check start time of recording: 

1509 stime = get_datetime(md) 

1510 if start_time is not None and stime is not None and \ 

1511 abs(start_time - stime) > timedelta(seconds=self._max_time_diff): 

1512 error_str = f'start time does not indicate continuous recording: ' \ 

1513 f'expected {start_time} instead of ' \ 

1514 f'{stime} in {a.filepath}' 

1515 if error_str is not None: 

1516 if verbose > 0: 

1517 print(error_str) 

1518 a.close() 

1519 del a 

1520 break 

1521 # markers: 

1522 locs, labels = a.markers() 

1523 locs[:,0] += self.frames 

1524 self._locs = np.vstack((self._locs, locs)) 

1525 self._labels = np.vstack((self._labels, labels)) 

1526 # indices: 

1527 self.start_indices.append(self.frames) 

1528 self.frames += a.frames 

1529 self.end_indices.append(self.frames) 

1530 if stime is not None: 

1531 start_time = stime + timedelta(seconds=a.frames/a.rate) 

1532 # add file to lists: 

1533 self.file_paths.append(a.filepath) 

1534 if len(self.open_files) < AudioLoader.max_open_files: 

1535 self.open_files.append(a) 

1536 else: 

1537 a.close() 

1538 if len(self.open_loaders) < AudioLoader.max_open_loaders: 

1539 self.audio_files.append(a) 

1540 self.open_loaders.append(a) 

1541 else: 

1542 a.close() 

1543 del a 

1544 self.audio_files.append(None) 

1545 if len(self.audio_files) == 0: 

1546 raise FileNotFoundError('input argument filepaths does not contain any valid audio file!') 

1547 # set startime from first file: 

1548 if self.start_time is not None: 

1549 set_starttime(self._metadata, self.start_time) 

1550 # setup infrastructure: 

1551 self.file_indices = self.start_indices 

1552 self.start_indices = np.array(self.start_indices) 

1553 self.end_indices = np.array(self.end_indices) 

1554 self.shape = (self.frames, self.channels) 

1555 self.bufferframes = int(buffersize*self.rate) 

1556 self.backframes = int(backsize*self.rate) 

1557 self.init_buffer() 

1558 self.close = self._close_multiple 

1559 self.load_audio_buffer = self._load_buffer_multiple 

1560 self._load_metadata = None 

1561 self._load_markers = None 

1562 return self 

1563 

1564 def _close_multiple(self): 

1565 """Close all the audio files. """ 

1566 self.open_files = [] 

1567 self.open_loaders = [] 

1568 if hasattr(self, 'audio_files'): 

1569 for a in self.audio_files: 

1570 if a is not None: 

1571 a.close() 

1572 self.audio_files = [] 

1573 self.filepath = None 

1574 self.file_paths = [] 

1575 self.file_indices = [] 

1576 self.start_indices = [] 

1577 self.end_indices = [] 

1578 del self.audio_files 

1579 del self.open_files 

1580 del self.open_loaders 

1581 del self.start_indices 

1582 del self.end_indices 

1583 

1584 def _load_buffer_multiple(self, r_offset, r_size, buffer): 

1585 """Load new data from the underlying files. 

1586 

1587 Parameters 

1588 ---------- 

1589 r_offset: int 

1590 First frame to be read from file. 

1591 r_size: int 

1592 Number of frames to be read from file. 

1593 buffer: ndarray 

1594 Buffer where to store the loaded data. 

1595 """ 

1596 offs = r_offset 

1597 size = r_size 

1598 boffs = 0 

1599 ai = np.searchsorted(self.end_indices, offs, side='right') 

1600 while size > 0: 

1601 if self.audio_files[ai] is None: 

1602 a = AudioLoader(self.file_paths[ai], 

1603 self.buffersize, self.backsize, 0) 

1604 self.audio_files[ai] = a 

1605 self.open_loaders.append(a) 

1606 self.open_files.append(a) 

1607 if len(self.open_files) > AudioLoader.max_open_files: 

1608 a0 = self.open_files.pop(0) 

1609 a0.close() 

1610 if len(self.open_loaders) > AudioLoader.max_open_loaders: 

1611 a0 = self.open_loaders.pop(0) 

1612 self.audio_files[self.audio_files.index(a0)] = None 

1613 a0.close() 

1614 del a0 

1615 self.collect_counter += 1 

1616 if self.collect_counter > AudioLoader.max_open_loaders//2: 

1617 gc.collect() 

1618 self.collect_counter = 0 

1619 else: 

1620 self.open_loaders.pop(self.open_loaders.index(self.audio_files[ai])) 

1621 self.open_loaders.append(self.audio_files[ai]) 

1622 ai0 = offs - self.start_indices[ai] 

1623 ai1 = offs + size 

1624 if ai1 > self.end_indices[ai]: 

1625 ai1 = self.end_indices[ai] 

1626 ai1 -= self.start_indices[ai] 

1627 n = ai1 - ai0 

1628 self.audio_files[ai].load_audio_buffer(ai0, n, 

1629 buffer[boffs:boffs + n,:]) 

1630 if self.audio_files[ai] in self.open_files: 

1631 self.open_files.pop(self.open_files.index(self.audio_files[ai])) 

1632 self.open_files.append(self.audio_files[ai]) 

1633 if len(self.open_files) > AudioLoader.max_open_files: 

1634 self.open_files[0].close() 

1635 self.open_files.pop(0) 

1636 boffs += n 

1637 offs += n 

1638 size -= n 

1639 ai += 1 

1640 

1641 

1642 def open(self, filepath, buffersize=10.0, backsize=0.0, 

1643 verbose=0, **kwargs): 

1644 """Open audio file for reading. 

1645 

1646 Parameters 

1647 ---------- 

1648 filepath: str or Path or list of str or Path 

1649 Path of the file or list of many file paths that should be 

1650 made accessible as a single array. 

1651 buffersize: float 

1652 Size of internal buffer in seconds. 

1653 backsize: float 

1654 Part of the buffer to be loaded before the requested start index in seconds. 

1655 verbose: int 

1656 If larger than zero show detailed error/warning messages. 

1657 **kwargs: dict 

1658 Further keyword arguments that are passed on to the  

1659 specific opening functions. Only used by open_multiple() so far. 

1660 

1661 Raises 

1662 ------ 

1663 FileNotFoundError 

1664 `filepath` is not an existing file. 

1665 EOFError 

1666 File size of `filepath` is zero. 

1667 IOError 

1668 Failed to load data. 

1669 

1670 """ 

1671 self.buffer = np.array([]) 

1672 self.rate = 0.0 

1673 if isinstance(filepath, (list, tuple, np.ndarray)): 

1674 if len(filepath) > 1: 

1675 self.open_multiple(filepath, buffersize, backsize, 

1676 verbose - 1, **kwargs) 

1677 if len(self.file_paths) > 1: 

1678 return self 

1679 filepath = self.file_paths[0] 

1680 self.close() 

1681 else: 

1682 filepath = filepath[0] 

1683 filepath = Path(filepath) 

1684 if not filepath.is_file(): 

1685 raise FileNotFoundError(f'file "{filepath}" not found') 

1686 if filepath.stat().st_size <= 0: 

1687 raise EOFError(f'file "{filepath}" is empty (size=0)!') 

1688 # list of implemented open functions: 

1689 audio_open_funcs = ( 

1690 ('soundfile', self.open_soundfile), 

1691 ('wave', self.open_wave), 

1692 ('wavefile', self.open_wavefile), 

1693 ('ewave', self.open_ewave), 

1694 ('audioread', self.open_audioread), 

1695 ) 

1696 # open an audio file by trying various modules: 

1697 not_installed = [] 

1698 errors = [f'failed to load data from file "{filepath}":'] 

1699 for lib, open_file in audio_open_funcs: 

1700 if not audio_modules[lib]: 

1701 if verbose > 1: 

1702 print(f'unable to load data from file "{filepath}" using {lib} module: module not available') 

1703 not_installed.append(lib) 

1704 continue 

1705 try: 

1706 open_file(filepath, buffersize, backsize, 

1707 verbose - 1, **kwargs) 

1708 if self.frames > 0: 

1709 if verbose > 0: 

1710 print(f'opened audio file "{filepath}" using {lib}') 

1711 if verbose > 1: 

1712 if self.format is not None: 

1713 print(f' format : {self.format}') 

1714 if self.encoding is not None: 

1715 print(f' encoding : {self.encoding}') 

1716 print(f' sampling rate: {self.rate} Hz') 

1717 print(f' channels : {self.channels}') 

1718 print(f' frames : {self.frames}') 

1719 return self 

1720 except Exception as e: 

1721 errors.append(f' {lib} failed: {str(e)}') 

1722 if verbose > 1: 

1723 print(errors[-1]) 

1724 if len(not_installed) > 0: 

1725 errors.append('\n You may need to install one of the ' + \ 

1726 ', '.join(not_installed) + ' packages.') 

1727 raise IOError('\n'.join(errors)) 

1728 return self 

1729 

1730 

1731def demo(file_path, plot): 

1732 """Demo of the audioloader functions. 

1733 

1734 Parameters 

1735 ---------- 

1736 file_path: str 

1737 File path of an audio file. 

1738 plot: bool 

1739 If True also plot the loaded data. 

1740 """ 

1741 print('') 

1742 print("try load_audio:") 

1743 full_data, rate = load_audio(file_path, 1) 

1744 if plot: 

1745 plt.plot(np.arange(len(full_data))/rate, full_data[:,0]) 

1746 plt.show() 

1747 

1748 if audio_modules['soundfile'] and audio_modules['audioread']: 

1749 print('') 

1750 print("cross check:") 

1751 data1, rate1 = load_soundfile(file_path) 

1752 data2, rate2 = load_audioread(file_path) 

1753 n = min((len(data1), len(data2))) 

1754 print(f"rms difference is {np.std(data1[:n]-data2[:n])}") 

1755 if plot: 

1756 plt.plot(np.arange(len(data1))/rate1, data1[:,0]) 

1757 plt.plot(np.arange(len(data2))/rate2, data2[:,0]) 

1758 plt.show() 

1759 

1760 print('') 

1761 print("try AudioLoader:") 

1762 with AudioLoader(file_path, 4.0, 1.0, verbose=1) as data: 

1763 print(f'samplerate: {data.rate:0f}Hz') 

1764 print(f'channels: {data.channels} {data.shape[1]}') 

1765 print(f'frames: {len(data)} {data.shape[0]}') 

1766 nframes = int(1.5*data.rate) 

1767 # check access: 

1768 print('check random single frame access') 

1769 for inx in np.random.randint(0, len(data), 1000): 

1770 if np.any(np.abs(full_data[inx] - data[inx]) > 2.0**(-14)): 

1771 print('single random frame access failed', inx, full_data[inx], data[inx]) 

1772 print('check random frame slice access') 

1773 for inx in np.random.randint(0, len(data)-nframes, 1000): 

1774 if np.any(np.abs(full_data[inx:inx+nframes] - data[inx:inx+nframes]) > 2.0**(-14)): 

1775 print('random frame slice access failed', inx) 

1776 print('check frame slice access forward') 

1777 for inx in range(0, len(data)-nframes, 10): 

1778 if np.any(np.abs(full_data[inx:inx+nframes] - data[inx:inx+nframes]) > 2.0**(-14)): 

1779 print('frame slice access forward failed', inx) 

1780 print('check frame slice access backward') 

1781 for inx in range(len(data)-nframes, 0, -10): 

1782 if np.any(np.abs(full_data[inx:inx+nframes] - data[inx:inx+nframes]) > 2.0**(-14)): 

1783 print('frame slice access backward failed', inx) 

1784 # forward: 

1785 for i in range(0, len(data), nframes): 

1786 print(f'forward {i}-{i+nframes}') 

1787 x = data[i:i+nframes,0] 

1788 if plot: 

1789 plt.plot((i+np.arange(len(x)))/rate, x) 

1790 plt.show() 

1791 # and backwards: 

1792 for i in reversed(range(0, len(data), nframes)): 

1793 print(f'backward {i}-{i+nframes}') 

1794 x = data[i:i+nframes,0] 

1795 if plot: 

1796 plt.plot((i+np.arange(len(x)))/rate, x) 

1797 plt.show() 

1798 

1799 

1800def main(*args): 

1801 """Call demo with command line arguments. 

1802 

1803 Parameters 

1804 ---------- 

1805 args: list of str 

1806 Command line arguments as provided by sys.argv[1:] 

1807 """ 

1808 print("Checking audioloader module ...") 

1809 

1810 help = False 

1811 plot = False 

1812 file_path = None 

1813 mod = False 

1814 for arg in args: 

1815 if mod: 

1816 if not select_module(arg): 

1817 print(f'can not select module {arg} that is not installed') 

1818 return 

1819 mod = False 

1820 elif arg == '-h': 

1821 help = True 

1822 break 

1823 elif arg == '-p': 

1824 plot = True 

1825 elif arg == '-m': 

1826 mod = True 

1827 else: 

1828 file_path = arg 

1829 break 

1830 

1831 if help: 

1832 print('') 

1833 print('Usage:') 

1834 print(' python -m src.audioio.audioloader [-m <module>] [-p] <audio/file.wav>') 

1835 print(' -m: audio module to be used') 

1836 print(' -p: plot loaded data') 

1837 return 

1838 

1839 if plot: 

1840 import matplotlib.pyplot as plt 

1841 

1842 demo(file_path, plot) 

1843 

1844 

1845if __name__ == "__main__": 

1846 main(*sys.argv[1:])