Coverage for src/audioio/audioloader.py: 90%

818 statements  

« prev     ^ index     » next       coverage.py v7.7.0, created at 2025-03-18 22:33 +0000

1"""Loading data, metadata, and markers from audio files. 

2 

3- `load_audio()`: load a whole audio file at once. 

4- `metadata()`: read metadata of an audio file. 

5- `markers()`: read markers of an audio file. 

6- class `AudioLoader`: read data from audio files in chunks. 

7 

8The read in data are always numpy arrays of floats ranging between -1 and 1. 

9The arrays are 2-D ndarrays with first axis time and second axis channel, 

10even for single channel data. 

11 

12If an audio file cannot be loaded, you might need to install 

13additional packages. See 

14[installation](https://bendalab.github.io/audioio/installation) for 

15further instructions. 

16 

17For a demo run the module as: 

18``` 

19python -m src.audioio.audioloader audiofile.wav 

20``` 

21""" 

22 

23import gc 

24import sys 

25import warnings 

26import os.path 

27import numpy as np 

28from datetime import timedelta 

29from .audiomodules import * 

30from .bufferedarray import BufferedArray 

31from .riffmetadata import metadata_riff, markers_riff 

32from .audiometadata import update_gain, add_unwrap, get_datetime 

33from .audiometadata import flatten_metadata, add_metadata, set_starttime 

34from .audiotools import unwrap 

35 

36 

37def load_wave(filepath): 

38 """Load wav file using the wave module from pythons standard libray. 

39  

40 Documentation 

41 ------------- 

42 https://docs.python.org/3.8/library/wave.html 

43 

44 Parameters 

45 ---------- 

46 filepath: str 

47 The full path and name of the file to load. 

48 

49 Returns 

50 ------- 

51 data: ndarray 

52 All data traces as an 2-D ndarray, first dimension is time, second is channel 

53 rate: float 

54 The sampling rate of the data in Hertz. 

55 

56 Raises 

57 ------ 

58 ImportError 

59 The wave module is not installed 

60 * 

61 Loading of the data failed 

62 """ 

63 if not audio_modules['wave']: 

64 raise ImportError 

65 

66 wf = wave.open(filepath, 'r') # 'with' is not supported by wave 

67 (nchannels, sampwidth, rate, nframes, comptype, compname) = wf.getparams() 

68 buffer = wf.readframes(nframes) 

69 factor = 2.0**(sampwidth*8-1) 

70 if sampwidth == 1: 

71 dtype = 'u1' 

72 buffer = np.frombuffer(buffer, dtype=dtype).reshape(-1, nchannels) 

73 data = buffer.astype('d')/factor - 1.0 

74 else: 

75 dtype = f'i{sampwidth}' 

76 buffer = np.frombuffer(buffer, dtype=dtype).reshape(-1, nchannels) 

77 data = buffer.astype('d')/factor 

78 wf.close() 

79 return data, float(rate) 

80 

81 

82def load_ewave(filepath): 

83 """Load wav file using ewave module. 

84 

85 Documentation 

86 ------------- 

87 https://github.com/melizalab/py-ewave 

88 

89 Parameters 

90 ---------- 

91 filepath: str 

92 The full path and name of the file to load. 

93 

94 Returns 

95 ------- 

96 data: ndarray 

97 All data traces as an 2-D ndarray, first dimension is time, second is channel. 

98 rate: float 

99 The sampling rate of the data in Hertz. 

100 

101 Raises 

102 ------ 

103 ImportError 

104 The ewave module is not installed 

105 * 

106 Loading of the data failed 

107 """ 

108 if not audio_modules['ewave']: 

109 raise ImportError 

110 

111 data = np.array([]) 

112 rate = 0.0 

113 with ewave.open(filepath, 'r') as wf: 

114 rate = wf.sampling_rate 

115 buffer = wf.read() 

116 data = ewave.rescale(buffer, 'float') 

117 if len(data.shape) == 1: 

118 data = np.reshape(data,(-1, 1)) 

119 return data, float(rate) 

120 

121 

122def load_wavfile(filepath): 

123 """Load wav file using scipy.io.wavfile. 

124 

125 Documentation 

126 ------------- 

127 http://docs.scipy.org/doc/scipy/reference/io.html 

128 Does not support blocked read. 

129  

130 Parameters 

131 ---------- 

132 filepath: str 

133 The full path and name of the file to load. 

134 

135 Returns 

136 ------- 

137 data: ndarray 

138 All data traces as an 2-D ndarray, first dimension is time, second is channel. 

139 rate: float 

140 The sampling rate of the data in Hertz. 

141 

142 Raises 

143 ------ 

144 ImportError 

145 The scipy.io module is not installed 

146 * 

147 Loading of the data failed 

148 """ 

149 if not audio_modules['scipy.io.wavfile']: 

150 raise ImportError 

151 

152 warnings.filterwarnings("ignore") 

153 rate, data = wavfile.read(filepath) 

154 warnings.filterwarnings("always") 

155 if data.dtype == np.uint8: 

156 data = data / 128.0 - 1.0 

157 elif np.issubdtype(data.dtype, np.signedinteger): 

158 data = data / (2.0**(data.dtype.itemsize*8-1)) 

159 else: 

160 data = data.astype(np.float64, copy=False) 

161 if len(data.shape) == 1: 

162 data = np.reshape(data,(-1, 1)) 

163 return data, float(rate) 

164 

165 

166def load_soundfile(filepath): 

167 """Load audio file using SoundFile (based on libsndfile). 

168 

169 Documentation 

170 ------------- 

171 http://pysoundfile.readthedocs.org 

172 http://www.mega-nerd.com/libsndfile 

173 

174 Parameters 

175 ---------- 

176 filepath: str 

177 The full path and name of the file to load. 

178 

179 Returns 

180 ------- 

181 data: ndarray 

182 All data traces as an 2-D ndarray, first dimension is time, second is channel. 

183 rate: float 

184 The sampling rate of the data in Hertz. 

185 

186 Raises 

187 ------ 

188 ImportError 

189 The soundfile module is not installed. 

190 * 

191 Loading of the data failed. 

192 """ 

193 if not audio_modules['soundfile']: 

194 raise ImportError 

195 

196 data = np.array([]) 

197 rate = 0.0 

198 with soundfile.SoundFile(filepath, 'r') as sf: 

199 rate = sf.samplerate 

200 data = sf.read(frames=-1, dtype='float64', always_2d=True) 

201 return data, float(rate) 

202 

203 

204def load_wavefile(filepath): 

205 """Load audio file using wavefile (based on libsndfile). 

206 

207 Documentation 

208 ------------- 

209 https://github.com/vokimon/python-wavefile 

210 

211 Parameters 

212 ---------- 

213 filepath: str 

214 The full path and name of the file to load. 

215 

216 Returns 

217 ------- 

218 data: ndarray 

219 All data traces as an 2-D ndarray, first dimension is time, second is channel. 

220 rate: float 

221 The sampling rate of the data in Hertz. 

222 

223 Raises 

224 ------ 

225 ImportError 

226 The wavefile module is not installed. 

227 * 

228 Loading of the data failed. 

229 """ 

230 if not audio_modules['wavefile']: 

231 raise ImportError 

232 

233 rate, data = wavefile.load(filepath) 

234 return data.astype(np.float64, copy=False).T, float(rate) 

235 

236 

237def load_audioread(filepath): 

238 """Load audio file using audioread. 

239 

240 Documentation 

241 ------------- 

242 https://github.com/beetbox/audioread 

243 

244 Parameters 

245 ---------- 

246 filepath: str 

247 The full path and name of the file to load. 

248 

249 Returns 

250 ------- 

251 data: ndarray 

252 All data traces as an 2-D ndarray, first dimension is time, second is channel. 

253 rate: float 

254 The sampling rate of the data in Hertz. 

255 

256 Raises 

257 ------ 

258 ImportError 

259 The audioread module is not installed. 

260 * 

261 Loading of the data failed. 

262 """ 

263 if not audio_modules['audioread']: 

264 raise ImportError 

265 

266 data = np.array([]) 

267 rate = 0.0 

268 with audioread.audio_open(filepath) as af: 

269 rate = af.samplerate 

270 data = np.zeros((int(np.ceil(af.samplerate*af.duration)), af.channels), 

271 dtype="<i2") 

272 index = 0 

273 for buffer in af: 

274 fulldata = np.frombuffer(buffer, dtype='<i2').reshape(-1, af.channels) 

275 n = fulldata.shape[0] 

276 if index + n > len(data): 

277 n = len(fulldata) - index 

278 if n <= 0: 

279 break 

280 data[index:index+n,:] = fulldata[:n,:] 

281 index += n 

282 return data/(2.0**15-1.0), float(rate) 

283 

284 

285audio_loader_funcs = ( 

286 ('soundfile', load_soundfile), 

287 ('wave', load_wave), 

288 ('wavefile', load_wavefile), 

289 ('ewave', load_ewave), 

290 ('scipy.io.wavfile', load_wavfile), 

291 ('audioread', load_audioread), 

292 ) 

293"""List of implemented load() functions. 

294 

295Each element of the list is a tuple with the module's name and its 

296load() function. 

297 

298""" 

299 

300 

301def load_audio(filepath, verbose=0): 

302 """Call this function to load all channels of audio data from a file. 

303  

304 This function tries different python modules to load the audio file. 

305 

306 Parameters 

307 ---------- 

308 filepath: str 

309 The full path and name of the file to load. 

310 verbose: int 

311 If larger than zero show detailed error/warning messages. 

312 

313 Returns 

314 ------- 

315 data: ndarray 

316 All data traces as an 2-D ndarray, even for single channel data. 

317 First dimension is time, second is channel. 

318 Data values range maximally between -1 and 1. 

319 rate: float 

320 The sampling rate of the data in Hertz. 

321 

322 Raises 

323 ------ 

324 ValueError 

325 Empty `filepath`. 

326 FileNotFoundError 

327 `filepath` is not an existing file. 

328 EOFError 

329 File size of `filepath` is zero. 

330 IOError 

331 Failed to load data. 

332 

333 Examples 

334 -------- 

335 ``` 

336 import matplotlib.pyplot as plt 

337 from audioio import load_audio 

338  

339 data, rate = load_audio('some/audio.wav') 

340 plt.plot(np.arange(len(data))/rate, data[:,0]) 

341 plt.show() 

342 ``` 

343 """ 

344 # check values: 

345 if filepath is None or len(filepath) == 0: 

346 raise ValueError('input argument filepath is empty string!') 

347 if not os.path.isfile(filepath): 

348 raise FileNotFoundError(f'file "{filepath}" not found') 

349 if os.path.getsize(filepath) <= 0: 

350 raise EOFError(f'file "{filepath}" is empty (size=0)!') 

351 

352 # load an audio file by trying various modules: 

353 not_installed = [] 

354 errors = [f'failed to load data from file "{filepath}":'] 

355 for lib, load_file in audio_loader_funcs: 

356 if not audio_modules[lib]: 

357 if verbose > 1: 

358 print(f'unable to load data from file "{filepath}" using {lib} module: module not available') 

359 not_installed.append(lib) 

360 continue 

361 try: 

362 data, rate = load_file(filepath) 

363 if len(data) > 0: 

364 if verbose > 0: 

365 print(f'loaded data from file "{filepath}" using {lib} module') 

366 if verbose > 1: 

367 print(f' sampling rate: {rate:g} Hz') 

368 print(f' channels : {data.shape[1]}') 

369 print(f' frames : {len(data)}') 

370 return data, rate 

371 except Exception as e: 

372 errors.append(f' {lib} failed: {str(e)}') 

373 if verbose > 1: 

374 print(errors[-1]) 

375 if len(not_installed) > 0: 

376 errors.append('\n You may need to install one of the ' + \ 

377 ', '.join(not_installed) + ' packages.') 

378 raise IOError('\n'.join(errors)) 

379 return np.zeros(0), 0.0 

380 

381 

382def metadata(filepath, store_empty=False): 

383 """Read metadata of an audio file. 

384 

385 Parameters 

386 ---------- 

387 filepath: str or file handle 

388 The audio file from which to read metadata. 

389 store_empty: bool 

390 If `False` do not return meta data with empty values. 

391 

392 Returns 

393 ------- 

394 meta_data: nested dict 

395 Meta data contained in the audio file. Keys of the nested 

396 dictionaries are always strings. If the corresponding values 

397 are dictionaries, then the key is the section name of the 

398 metadata contained in the dictionary. All other types of 

399 values are values for the respective key. In particular they 

400 are strings. But other types like for example ints or floats 

401 are also allowed. See `audioio.audiometadata` module for 

402 available functions to work with such metadata. 

403 

404 Examples 

405 -------- 

406 ``` 

407 from audioio import metadata, print_metadata 

408 md = metadata('data.wav') 

409 print_metadata(md) 

410 ``` 

411 

412 """ 

413 try: 

414 return metadata_riff(filepath, store_empty) 

415 except ValueError: # not a RIFF file 

416 return {} 

417 

418 

419def markers(filepath): 

420 """ Read markers of an audio file. 

421 

422 See `audioio.audiomarkers` module for available functions 

423 to work with markers. 

424 

425 Parameters 

426 ---------- 

427 filepath: str or file handle 

428 The audio file. 

429 

430 Returns 

431 ------- 

432 locs: 2-D ndarray of int 

433 Marker positions (first column) and spans (second column) 

434 for each marker (rows). 

435 labels: 2-D ndarray of string objects 

436 Labels (first column) and texts (second column) 

437 for each marker (rows). 

438 

439 Examples 

440 -------- 

441 ``` 

442 from audioio import markers, print_markers 

443 locs, labels = markers('data.wav') 

444 print_markers(locs, labels) 

445 ``` 

446 """ 

447 try: 

448 return markers_riff(filepath) 

449 except ValueError: # not a RIFF file 

450 return np.zeros((0, 2), dtype=int), np.zeros((0, 2), dtype=object) 

451 

452 

453class AudioLoader(BufferedArray): 

454 """Buffered reading of audio data for random access of the data in the file. 

455  

456 The class allows for reading very large audio files or many 

457 sequential audio files that do not fit into memory. 

458 An AudioLoader instance can be used like a huge read-only numpy array, i.e. 

459 ``` 

460 data = AudioLoader('path/to/audio/file.wav') 

461 x = data[10000:20000,0] 

462 ``` 

463 The first index specifies the frame, the second one the channel. 

464 

465 Behind the scenes, `AudioLoader` tries to open the audio file with 

466 all available audio modules until it succeeds (first line). It 

467 then reads data from the file as necessary for the requested data 

468 (second line). Accesing the content of the audio files via a 

469 buffer that holds only a part of the data is managed by the 

470 `BufferedArray` class. 

471 

472 Reading sequentially through the file is always possible. Some 

473 modules, however, (e.g. audioread, needed for mp3 files) can only 

474 read forward. If previous data are requested, then the file is read 

475 from the beginning again. This slows down access to previous data 

476 considerably. Use the `backsize` argument of the open function to 

477 make sure some data are loaded into the buffer before the requested 

478 frame. Then a subsequent access to the data within `backsize` seconds 

479 before that frame can still be handled without the need to reread 

480 the file from the beginning. 

481 

482 Usage 

483 ----- 

484 With context management: 

485 ``` 

486 import audioio as aio 

487 with aio.AudioLoader(filepath, 60.0, 10.0) as data: 

488 # do something with the content of the file: 

489 x = data[0:10000] 

490 y = data[10000:20000] 

491 z = x + y 

492 ``` 

493 

494 For using a specific audio module, here the audioread module: 

495 ``` 

496 data = aio.AudioLoader() 

497 with data.open_audioread(filepath, 60.0, 10.0): 

498 # do something ... 

499 ``` 

500 

501 Use `blocks()` for sequential, blockwise reading and processing: 

502 ``` 

503 from scipy.signal import spectrogram 

504 nfft = 2048 

505 with aio.AudioLoader('some/audio.wav') as data: 

506 for x in data.blocks(100*nfft, nfft//2): 

507 f, t, Sxx = spectrogram(x, fs=data.rate, 

508 nperseg=nfft, noverlap=nfft//2) 

509 ``` 

510 

511 For loop iterates over single frames (1-D arrays containing samples for each channel): 

512 ``` 

513 with aio.AudioLoader('some/audio.wav') as data: 

514 for x in data: 

515 print(x) 

516 ``` 

517  

518 Traditional open and close: 

519 ``` 

520 data = aio.AudioLoader(filepath, 60.0) 

521 x = data[:,:] # read the whole file 

522 data.close() 

523 ``` 

524  

525 this is the same as: 

526 ``` 

527 data = aio.AudioLoader() 

528 data.open(filepath, 60.0) 

529 ... 

530 ``` 

531 

532 Classes inheriting AudioLoader just need to implement 

533 ``` 

534 self.load_audio_buffer(offset, nsamples, pbuffer) 

535 ``` 

536 This function needs to load the supplied `pbuffer` with 

537 `nframes` frames of data starting at frame `offset`. 

538 

539 In the constructor or some kind of opening function, you need to 

540 set some member variables, as described for `BufferedArray`. 

541 

542 For loading metadata and markers, implement the functions 

543 ``` 

544 self._load_metadata(filepath, **kwargs) 

545 self._load_markers(filepath) 

546 ``` 

547  

548 Parameters 

549 ---------- 

550 filepath: str or list of str 

551 Name of the file or list of many file names that should be 

552 made accessible as a single array. 

553 buffersize: float 

554 Size of internal buffer in seconds. 

555 backsize: float 

556 Part of the buffer to be loaded before the requested start index in seconds. 

557 verbose: int 

558 If larger than zero show detailed error/warning messages. 

559 store_empty: bool 

560 If `False` do not return meta data with empty values. 

561 

562 Attributes 

563 ---------- 

564 filepath: str 

565 Name and path of the opened file. In case of many files, the first one. 

566 file_paths: list of str 

567 List of pathes of the opened files that are made accessible 

568 as a single array. 

569 file_indices: list of int 

570 For each file the index of its first sample. 

571 rate: float 

572 The sampling rate of the data in seconds. 

573 channels: int 

574 The number of channels. 

575 frames: int 

576 The number of frames in the file. Same as `len()`. 

577 format: str or None 

578 Format of the audio file. 

579 encoding: str or None 

580 Encoding/subtype of the audio file. 

581 shape: tuple 

582 Frames and channels of the data. 

583 ndim: int 

584 Number of dimensions: always 2 (frames and channels). 

585 offset: int 

586 Index of first frame in the current buffer. 

587 buffer: ndarray of floats 

588 The curently available data from the file. 

589 ampl_min: float 

590 Minimum amplitude the file format supports. 

591 Always -1.0 for audio data. 

592 ampl_max: float 

593 Maximum amplitude the file format supports. 

594 Always +1.0 for audio data. 

595 

596 Methods 

597 ------- 

598 - `len()`: Number of frames. 

599 - `file_start_times()`: time of first frame of each file in seconds. 

600 - `get_file_index()`: file path and index of frame contained by this file. 

601 - `open()`: Open an audio file by trying available audio modules. 

602 - `open_*()`: Open an audio file with the respective audio module. 

603 - `__getitem__`: Access data of the audio file. 

604 - `update_buffer()`: Update the internal buffer for a range of frames. 

605 - `blocks()`: Generator for blockwise processing of AudioLoader data. 

606 - `format_dict()`: technical infos about how the data are stored. 

607 - `metadata()`: Metadata stored along with the audio data. 

608 - `markers()`: Markers stored along with the audio data. 

609 - `set_unwrap()`: Set parameters for unwrapping clipped data. 

610 - `close()`: Close the file. 

611 

612 """ 

613 

614 max_open_files = 5 

615 """ Suggestion for maximum number of open file descriptors. """ 

616 

617 max_open_loaders = 10 

618 """ Suggestion for maximum number of AudioLoaders when opening multiple files. """ 

619 

620 def __init__(self, filepath=None, buffersize=10.0, backsize=0.0, 

621 verbose=0, **meta_kwargs): 

622 super().__init__(verbose=verbose) 

623 self.format = None 

624 self.encoding = None 

625 self._metadata = None 

626 self._locs = None 

627 self._labels = None 

628 self._load_metadata = metadata 

629 self._load_markers = markers 

630 self._metadata_kwargs = meta_kwargs 

631 self.filepath = None 

632 self.file_paths = None 

633 self.file_indices = [] 

634 self.sf = None 

635 self.close = self._close 

636 self.load_buffer = self._load_buffer_unwrap 

637 self.ampl_min = -1.0 

638 self.ampl_max = +1.0 

639 self.unwrap = False 

640 self.unwrap_thresh = 0.0 

641 self.unwrap_clips = False 

642 self.unwrap_ampl = 1.0 

643 self.unwrap_downscale = True 

644 if filepath is not None: 

645 self.open(filepath, buffersize, backsize, verbose) 

646 

647 numpy_encodings = {np.dtype(np.int64): 'PCM_64', 

648 np.dtype(np.int32): 'PCM_32', 

649 np.dtype(np.int16): 'PCM_16', 

650 np.dtype(np.single): 'FLOAT', 

651 np.dtype(np.double): 'DOUBLE', 

652 np.dtype('>f4'): 'FLOAT', 

653 np.dtype('>f8'): 'DOUBLE'} 

654 """ Map numpy dtypes to encodings. 

655 """ 

656 

657 def _close(self): 

658 pass 

659 

660 def __del__(self): 

661 self.close() 

662 

663 def file_start_times(self): 

664 """ Time of first frame of each file in seconds. 

665  

666 Returns 

667 ------- 

668 times: array of float 

669 Time of the first frame of each file relative to buffer start 

670 in seconds. 

671 """ 

672 times = [] 

673 for idx in self.file_indices: 

674 times.append(idx/self.rate) 

675 return np.array(times) 

676 

677 def get_file_index(self, frame): 

678 """ File path and index of frame contained by this file. 

679 

680 Parameters 

681 ---------- 

682 frame: int 

683 Index of frame. 

684  

685 Returns 

686 ------- 

687 filepath: str 

688 Path of file that contains the frame. 

689 index: int 

690 Index of the frame relative to the first frame 

691 in the containing file. 

692 """ 

693 if frame < 0 or frame >= self.frames: 

694 raise ValueError('invalid frame') 

695 fname = self.file_paths[0] 

696 index = self.file_indices[0] 

697 for i in reversed(range(len(self.file_indices))): 

698 if self.file_indices[i] <= frame: 

699 fname = self.file_paths[i] 

700 index = self.file_indices[i] 

701 break 

702 return fname, frame - index 

703 

704 def format_dict(self): 

705 """ Technical infos about how the data are stored in the file. 

706 

707 Returns 

708 ------- 

709 fmt: dict 

710 Dictionary with filepath, format, encoding, samplingrate, 

711 channels, frames, and duration of the audio file as strings. 

712 

713 """ 

714 fmt = dict(filepath=self.filepath) 

715 if self.format is not None: 

716 fmt['format'] = self.format 

717 if self.encoding is not None: 

718 fmt['encoding'] = self.encoding 

719 fmt.update(dict(samplingrate=f'{self.rate:.0f}Hz', 

720 channels=self.channels, 

721 frames=self.frames, 

722 duration=f'{self.frames/self.rate:.3f}s')) 

723 return fmt 

724 

725 def metadata(self): 

726 """Metadata of the audio file. 

727 

728 Parameters 

729 ---------- 

730 store_empty: bool 

731 If `False` do not add meta data with empty values. 

732 

733 Returns 

734 ------- 

735 meta_data: nested dict 

736 

737 Meta data contained in the audio file. Keys of the nested 

738 dictionaries are always strings. If the corresponding 

739 values are dictionaries, then the key is the section name 

740 of the metadata contained in the dictionary. All other 

741 types of values are values for the respective key. In 

742 particular they are strings. But other types like for 

743 example ints or floats are also allowed. See 

744 `audioio.audiometadata` module for available functions to 

745 work with such metadata. 

746 

747 """ 

748 if self._metadata is None: 

749 if self._load_metadata is None: 

750 self._metadata = {} 

751 else: 

752 self._metadata = self._load_metadata(self.filepath, 

753 **self._metadata_kwargs) 

754 return self._metadata 

755 

756 def markers(self): 

757 """Read markers of the audio file. 

758 

759 See `audioio.audiomarkers` module for available functions 

760 to work with markers. 

761 

762 Returns 

763 ------- 

764 locs: 2-D ndarray of int 

765 Marker positions (first column) and spans (second column) 

766 for each marker (rows). 

767 labels: 2-D ndarray of str objects 

768 Labels (first column) and texts (second column) 

769 for each marker (rows). 

770 """ 

771 if self._locs is None: 

772 if self._load_markers is None: 

773 self._locs = np.zeros((0, 2), dtype=int) 

774 self._labels = np.zeros((0, 2), dtype=object) 

775 else: 

776 self._locs, self._labels = self._load_markers(self.filepath) 

777 return self._locs, self._labels 

778 

779 def set_unwrap(self, thresh, clips=False, down_scale=True, unit=''): 

780 """Set parameters for unwrapping clipped data. 

781 

782 See unwrap() function from the audioio package. 

783 

784 Parameters 

785 ---------- 

786 thresh: float 

787 Threshold for detecting wrapped data relative to self.unwrap_ampl 

788 which is initially set to self.ampl_max. 

789 If zero, do not unwrap. 

790 clips: bool 

791 If True, then clip the unwrapped data properly. 

792 Otherwise, unwrap the data and double the 

793 minimum and maximum data range 

794 (self.ampl_min and self.ampl_max). 

795 down_scale: bool 

796 If not `clips`, then downscale the signal by a factor of two, 

797 in order to keep the range between -1 and 1. 

798 unit: str 

799 Unit of the data. 

800 """ 

801 self.unwrap_ampl = self.ampl_max 

802 self.unwrap_thresh = thresh 

803 self.unwrap_clips = clips 

804 self.unwrap_down_scale = down_scale 

805 self.unwrap = thresh > 1e-3 

806 if self.unwrap: 

807 if self.unwrap_clips: 

808 add_unwrap(self.metadata(), 

809 self.unwrap_thresh*self.unwrap_ampl, 

810 self.unwrap_ampl, unit) 

811 elif down_scale: 

812 update_gain(self.metadata(), 0.5) 

813 add_unwrap(self.metadata(), 

814 0.5*self.unwrap_thresh*self.unwrap_ampl, 

815 0.0, unit) 

816 else: 

817 self.ampl_min *= 2 

818 self.ampl_max *= 2 

819 add_unwrap(self.metadata(), 

820 self.unwrap_thresh*self.unwrap_ampl, 

821 0.0, unit) 

822 

823 def _load_buffer_unwrap(self, r_offset, r_size, pbuffer): 

824 """Load new data and unwrap it. 

825 

826 Parameters 

827 ---------- 

828 r_offset: int 

829 First frame to be read from file. 

830 r_size: int 

831 Number of frames to be read from file. 

832 pbuffer: ndarray 

833 Buffer where to store the loaded data. 

834 """ 

835 self.load_audio_buffer(r_offset, r_size, pbuffer) 

836 if self.unwrap: 

837 # TODO: handle edge effects! 

838 unwrap(pbuffer, self.unwrap_thresh, self.unwrap_ampl) 

839 if self.unwrap_clips: 

840 pbuffer[pbuffer > self.ampl_max] = self.ampl_max 

841 pbuffer[pbuffer < self.ampl_min] = self.ampl_min 

842 elif self.unwrap_down_scale: 

843 pbuffer *= 0.5 

844 

845 

846 # wave interface:  

847 def open_wave(self, filepath, buffersize=10.0, backsize=0.0, 

848 verbose=0): 

849 """Open audio file for reading using the wave module. 

850 

851 Note: we assume that setpos() and tell() use integer numbers! 

852 

853 Parameters 

854 ---------- 

855 filepath: str 

856 Name of the file. 

857 buffersize: float 

858 Size of internal buffer in seconds. 

859 backsize: float 

860 Part of the buffer to be loaded before the requested start index in seconds. 

861 verbose: int 

862 If larger than zero show detailed error/warning messages. 

863 

864 Raises 

865 ------ 

866 ImportError 

867 The wave module is not installed 

868 """ 

869 self.verbose = verbose 

870 if self.verbose > 0: 

871 print(f'open_wave(filepath) with filepath={filepath}') 

872 if not audio_modules['wave']: 

873 self.rate = 0.0 

874 self.channels = 0 

875 self.frames = 0 

876 self.size = 0 

877 self.shape = (0, 0) 

878 self.offset = 0 

879 raise ImportError 

880 if self.sf is not None: 

881 self._close_wave() 

882 self.sf = wave.open(filepath, 'r') 

883 self.filepath = filepath 

884 self.file_paths = [filepath] 

885 self.file_indices = [0] 

886 self.rate = float(self.sf.getframerate()) 

887 self.format = 'WAV' 

888 sampwidth = self.sf.getsampwidth() 

889 if sampwidth == 1: 

890 self.dtype = 'u1' 

891 self.encoding = 'PCM_U8' 

892 else: 

893 self.dtype = f'i{sampwidth}' 

894 self.encoding = f'PCM_{sampwidth*8}' 

895 self.factor = 1.0/(2.0**(sampwidth*8-1)) 

896 self.channels = self.sf.getnchannels() 

897 self.frames = self.sf.getnframes() 

898 self.shape = (self.frames, self.channels) 

899 self.size = self.frames * self.channels 

900 self.bufferframes = int(buffersize*self.rate) 

901 self.backframes = int(backsize*self.rate) 

902 self.init_buffer() 

903 self.close = self._close_wave 

904 self.load_audio_buffer = self._load_buffer_wave 

905 # read 1 frame to determine the unit of the position values: 

906 self.p0 = self.sf.tell() 

907 self.sf.readframes(1) 

908 self.pfac = self.sf.tell() - self.p0 

909 self.sf.setpos(self.p0) 

910 return self 

911 

912 def _close_wave(self): 

913 """Close the audio file using the wave module. """ 

914 if self.sf is not None: 

915 self.sf.close() 

916 self.sf = None 

917 

918 def _load_buffer_wave(self, r_offset, r_size, buffer): 

919 """Load new data from file using the wave module. 

920 

921 Parameters 

922 ---------- 

923 r_offset: int 

924 First frame to be read from file. 

925 r_size: int 

926 Number of frames to be read from file. 

927 buffer: ndarray 

928 Buffer where to store the loaded data. 

929 """ 

930 if self.sf is None: 

931 self.sf = wave.open(self.filepath, 'r') 

932 self.sf.setpos(r_offset*self.pfac + self.p0) 

933 fbuffer = self.sf.readframes(r_size) 

934 fbuffer = np.frombuffer(fbuffer, dtype=self.dtype).reshape((-1, self.channels)) 

935 if self.dtype[0] == 'u': 

936 buffer[:, :] = fbuffer * self.factor - 1.0 

937 else: 

938 buffer[:, :] = fbuffer * self.factor 

939 

940 

941 # ewave interface:  

942 def open_ewave(self, filepath, buffersize=10.0, backsize=0.0, 

943 verbose=0): 

944 """Open audio file for reading using the ewave module. 

945 

946 Parameters 

947 ---------- 

948 filepath: str 

949 Name of the file. 

950 buffersize: float 

951 Size of internal buffer in seconds. 

952 backsize: float 

953 Part of the buffer to be loaded before the requested start index in seconds. 

954 verbose: int 

955 If larger than zero show detailed error/warning messages. 

956 

957 Raises 

958 ------ 

959 ImportError 

960 The ewave module is not installed. 

961 """ 

962 self.verbose = verbose 

963 if self.verbose > 0: 

964 print(f'open_ewave(filepath) with filepath={filepath}') 

965 if not audio_modules['ewave']: 

966 self.rate = 0.0 

967 self.channels = 0 

968 self.frames = 0 

969 self.shape = (0, 0) 

970 self.size = 0 

971 self.offset = 0 

972 raise ImportError 

973 if self.sf is not None: 

974 self._close_ewave() 

975 self.sf = ewave.open(filepath, 'r') 

976 self.filepath = filepath 

977 self.file_paths = [filepath] 

978 self.file_indices = [0] 

979 self.rate = float(self.sf.sampling_rate) 

980 self.channels = self.sf.nchannels 

981 self.frames = self.sf.nframes 

982 self.shape = (self.frames, self.channels) 

983 self.size = self.frames * self.channels 

984 self.format = 'WAV' # or WAVEX? 

985 self.encoding = self.numpy_encodings[self.sf.dtype] 

986 self.bufferframes = int(buffersize*self.rate) 

987 self.backframes = int(backsize*self.rate) 

988 self.init_buffer() 

989 self.close = self._close_ewave 

990 self.load_audio_buffer = self._load_buffer_ewave 

991 return self 

992 

993 def _close_ewave(self): 

994 """Close the audio file using the ewave module. """ 

995 if self.sf is not None: 

996 del self.sf 

997 self.sf = None 

998 

999 def _load_buffer_ewave(self, r_offset, r_size, buffer): 

1000 """Load new data from file using the ewave module. 

1001 

1002 Parameters 

1003 ---------- 

1004 r_offset: int 

1005 First frame to be read from file. 

1006 r_size: int 

1007 Number of frames to be read from file. 

1008 buffer: ndarray 

1009 Buffer where to store the loaded data. 

1010 """ 

1011 if self.sf is None: 

1012 self.sf = ewave.open(self.filepath, 'r') 

1013 fbuffer = self.sf.read(frames=r_size, offset=r_offset, memmap='r') 

1014 fbuffer = ewave.rescale(fbuffer, 'float') 

1015 if len(fbuffer.shape) == 1: 

1016 fbuffer = np.reshape(fbuffer,(-1, 1)) 

1017 buffer[:,:] = fbuffer 

1018 

1019 

1020 # soundfile interface:  

1021 def open_soundfile(self, filepath, buffersize=10.0, backsize=0.0, 

1022 verbose=0): 

1023 """Open audio file for reading using the SoundFile module. 

1024 

1025 Parameters 

1026 ---------- 

1027 filepath: str 

1028 Name of the file. 

1029 bufferframes: float 

1030 Size of internal buffer in seconds. 

1031 backsize: float 

1032 Part of the buffer to be loaded before the requested start index in seconds. 

1033 verbose: int 

1034 If larger than zero show detailed error/warning messages. 

1035 

1036 Raises 

1037 ------ 

1038 ImportError 

1039 The SoundFile module is not installed 

1040 """ 

1041 self.verbose = verbose 

1042 if self.verbose > 0: 

1043 print(f'open_soundfile(filepath) with filepath={filepath}') 

1044 if not audio_modules['soundfile']: 

1045 self.rate = 0.0 

1046 self.channels = 0 

1047 self.frames = 0 

1048 self.shape = (0, 0) 

1049 self.size = 0 

1050 self.offset = 0 

1051 raise ImportError 

1052 if self.sf is not None: 

1053 self._close_soundfile() 

1054 self.sf = soundfile.SoundFile(filepath, 'r') 

1055 self.filepath = filepath 

1056 self.file_paths = [filepath] 

1057 self.file_indices = [0] 

1058 self.rate = float(self.sf.samplerate) 

1059 self.channels = self.sf.channels 

1060 self.frames = 0 

1061 self.size = 0 

1062 if self.sf.seekable(): 

1063 self.frames = self.sf.seek(0, soundfile.SEEK_END) 

1064 self.sf.seek(0, soundfile.SEEK_SET) 

1065 # TODO: if not seekable, we cannot handle that file! 

1066 self.shape = (self.frames, self.channels) 

1067 self.size = self.frames * self.channels 

1068 self.format = self.sf.format 

1069 self.encoding = self.sf.subtype 

1070 self.bufferframes = int(buffersize*self.rate) 

1071 self.backframes = int(backsize*self.rate) 

1072 self.init_buffer() 

1073 self.close = self._close_soundfile 

1074 self.load_audio_buffer = self._load_buffer_soundfile 

1075 return self 

1076 

1077 def _close_soundfile(self): 

1078 """Close the audio file using the SoundFile module. """ 

1079 if self.sf is not None: 

1080 self.sf.close() 

1081 self.sf = None 

1082 

1083 def _load_buffer_soundfile(self, r_offset, r_size, buffer): 

1084 """Load new data from file using the SoundFile module. 

1085 

1086 Parameters 

1087 ---------- 

1088 r_offset: int 

1089 First frame to be read from file. 

1090 r_size: int 

1091 Number of frames to be read from file. 

1092 buffer: ndarray 

1093 Buffer where to store the loaded data. 

1094 """ 

1095 if self.sf is None: 

1096 self.sf = soundfile.SoundFile(self.filepath, 'r') 

1097 self.sf.seek(r_offset, soundfile.SEEK_SET) 

1098 buffer[:, :] = self.sf.read(r_size, always_2d=True) 

1099 

1100 

1101 # wavefile interface:  

1102 def open_wavefile(self, filepath, buffersize=10.0, backsize=0.0, 

1103 verbose=0): 

1104 """Open audio file for reading using the wavefile module. 

1105 

1106 Parameters 

1107 ---------- 

1108 filepath: str 

1109 Name of the file. 

1110 bufferframes: float 

1111 Size of internal buffer in seconds. 

1112 backsize: float 

1113 Part of the buffer to be loaded before the requested start index in seconds. 

1114 verbose: int 

1115 If larger than zero show detailed error/warning messages. 

1116 

1117 Raises 

1118 ------ 

1119 ImportError 

1120 The wavefile module is not installed 

1121 """ 

1122 self.verbose = verbose 

1123 if self.verbose > 0: 

1124 print(f'open_wavefile(filepath) with filepath={filepath}') 

1125 if not audio_modules['wavefile']: 

1126 self.rate = 0.0 

1127 self.channels = 0 

1128 self.frames = 0 

1129 self.shape = (0, 0) 

1130 self.size = 0 

1131 self.offset = 0 

1132 raise ImportError 

1133 if self.sf is not None: 

1134 self._close_wavefile() 

1135 self.sf = wavefile.WaveReader(filepath) 

1136 self.filepath = filepath 

1137 self.file_paths = [filepath] 

1138 self.file_indices = [0] 

1139 self.rate = float(self.sf.samplerate) 

1140 self.channels = self.sf.channels 

1141 self.frames = self.sf.frames 

1142 self.shape = (self.frames, self.channels) 

1143 self.size = self.frames * self.channels 

1144 # get format and encoding: 

1145 for attr in dir(wavefile.Format): 

1146 v = getattr(wavefile.Format, attr) 

1147 if isinstance(v, int): 

1148 if v & wavefile.Format.TYPEMASK > 0 and \ 

1149 (self.sf.format & wavefile.Format.TYPEMASK) == v: 

1150 self.format = attr 

1151 if v & wavefile.Format.SUBMASK > 0 and \ 

1152 (self.sf.format & wavefile.Format.SUBMASK) == v: 

1153 self.encoding = attr 

1154 # init buffer: 

1155 self.bufferframes = int(buffersize*self.rate) 

1156 self.backframes = int(backsize*self.rate) 

1157 self.init_buffer() 

1158 self.close = self._close_wavefile 

1159 self.load_audio_buffer = self._load_buffer_wavefile 

1160 return self 

1161 

1162 def _close_wavefile(self): 

1163 """Close the audio file using the wavefile module. """ 

1164 if self.sf is not None: 

1165 self.sf.close() 

1166 self.sf = None 

1167 

1168 def _load_buffer_wavefile(self, r_offset, r_size, buffer): 

1169 """Load new data from file using the wavefile module. 

1170 

1171 Parameters 

1172 ---------- 

1173 r_offset: int 

1174 First frame to be read from file. 

1175 r_size: int 

1176 Number of frames to be read from file. 

1177 buffer: ndarray 

1178 Buffer where to store the loaded data. 

1179 """ 

1180 if self.sf is None: 

1181 self.sf = wavefile.WaveReader(self.filepath) 

1182 self.sf.seek(r_offset, wavefile.Seek.SET) 

1183 fbuffer = self.sf.buffer(r_size, dtype=self.buffer.dtype) 

1184 self.sf.read(fbuffer) 

1185 buffer[:,:] = fbuffer.T 

1186 

1187 

1188 # audioread interface:  

1189 def open_audioread(self, filepath, buffersize=10.0, backsize=0.0, 

1190 verbose=0): 

1191 """Open audio file for reading using the audioread module. 

1192 

1193 Note, that audioread can only read forward, therefore random and 

1194 backward access is really slow. 

1195 

1196 Parameters 

1197 ---------- 

1198 filepath: str 

1199 Name of the file. 

1200 bufferframes: float 

1201 Size of internal buffer in seconds. 

1202 backsize: float 

1203 Part of the buffer to be loaded before the requested start index in seconds. 

1204 verbose: int 

1205 If larger than zero show detailed error/warning messages. 

1206 

1207 Raises 

1208 ------ 

1209 ImportError 

1210 The audioread module is not installed 

1211 """ 

1212 self.verbose = verbose 

1213 if self.verbose > 0: 

1214 print(f'open_audioread(filepath) with filepath={filepath}') 

1215 if not audio_modules['audioread']: 

1216 self.rate = 0.0 

1217 self.channels = 0 

1218 self.frames = 0 

1219 self.shape = (0, 0) 

1220 self.size = 0 

1221 self.offset = 0 

1222 raise ImportError 

1223 if self.sf is not None: 

1224 self._close_audioread() 

1225 self.sf = audioread.audio_open(filepath) 

1226 self.filepath = filepath 

1227 self.file_paths = [filepath] 

1228 self.file_indices = [0] 

1229 self.rate = float(self.sf.samplerate) 

1230 self.channels = self.sf.channels 

1231 self.frames = int(np.ceil(self.rate*self.sf.duration)) 

1232 self.shape = (self.frames, self.channels) 

1233 self.size = self.frames * self.channels 

1234 self.bufferframes = int(buffersize*self.rate) 

1235 self.backframes = int(backsize*self.rate) 

1236 self.init_buffer() 

1237 self.read_buffer = np.zeros((0,0)) 

1238 self.read_offset = 0 

1239 self.close = self._close_audioread 

1240 self.load_audio_buffer = self._load_buffer_audioread 

1241 self.filepath = filepath 

1242 self.sf_iter = self.sf.__iter__() 

1243 return self 

1244 

1245 def _close_audioread(self): 

1246 """Close the audio file using the audioread module. """ 

1247 if self.sf is not None: 

1248 self.sf.__exit__(None, None, None) 

1249 self.sf = None 

1250 

1251 def _load_buffer_audioread(self, r_offset, r_size, buffer): 

1252 """Load new data from file using the audioread module. 

1253 

1254 audioread can only iterate through a file once and in blocksizes that are 

1255 given by audioread. Therefore we keep yet another buffer: `self.read_buffer` 

1256 at file offset `self.read_offset` containing whatever audioread returned. 

1257 

1258 Parameters 

1259 ---------- 

1260 r_offset: int 

1261 First frame to be read from file. 

1262 r_size: int 

1263 Number of frames to be read from file. 

1264 buffer: ndarray 

1265 Buffer where to store the loaded data. 

1266 """ 

1267 if self.sf is None: 

1268 self.sf = audioread.audio_open(self.filepath) 

1269 b_offset = 0 

1270 if ( self.read_offset + self.read_buffer.shape[0] >= r_offset + r_size 

1271 and self.read_offset < r_offset + r_size ): 

1272 # read_buffer overlaps at the end of the requested interval: 

1273 i = 0 

1274 n = r_offset + r_size - self.read_offset 

1275 if n > r_size: 

1276 i += n - r_size 

1277 n = r_size 

1278 buffer[self.read_offset+i-r_offset:self.read_offset+i+n-r_offset,:] = self.read_buffer[i:i+n,:] / (2.0**15-1.0) 

1279 if self.verbose > 2: 

1280 print(f' recycle {n:6d} frames from the front of the read buffer at {self.read_offset}-{self.read_offset+n} ({self.read_offset-self.offset}-{self.read_offset-self.offset+n} in buffer)') 

1281 r_size -= n 

1282 if r_size <= 0: 

1283 return 

1284 # go back to beginning of file: 

1285 if r_offset < self.read_offset: 

1286 if self.verbose > 2: 

1287 print(' rewind') 

1288 self._close_audioread() 

1289 self.sf = audioread.audio_open(self.filepath) 

1290 self.sf_iter = self.sf.__iter__() 

1291 self.read_buffer = np.zeros((0,0)) 

1292 self.read_offset = 0 

1293 # read to position: 

1294 while self.read_offset + self.read_buffer.shape[0] < r_offset: 

1295 self.read_offset += self.read_buffer.shape[0] 

1296 try: 

1297 if hasattr(self.sf_iter, 'next'): 

1298 fbuffer = self.sf_iter.next() 

1299 else: 

1300 fbuffer = next(self.sf_iter) 

1301 except StopIteration: 

1302 self.read_buffer = np.zeros((0,0)) 

1303 buffer[:,:] = 0.0 

1304 if self.verbose > 1: 

1305 print(f' caught StopIteration, padded buffer with {r_size} zeros') 

1306 break 

1307 self.read_buffer = np.frombuffer(fbuffer, dtype='<i2').reshape(-1, self.channels) 

1308 if self.verbose > 2: 

1309 print(f' read forward by {self.read_buffer.shape[0]} frames') 

1310 # recycle file data: 

1311 if ( self.read_offset + self.read_buffer.shape[0] > r_offset 

1312 and self.read_offset <= r_offset ): 

1313 i = r_offset - self.read_offset 

1314 n = self.read_offset + self.read_buffer.shape[0] - r_offset 

1315 if n > r_size: 

1316 n = r_size 

1317 buffer[:n,:] = self.read_buffer[i:i+n,:] / (2.0**15-1.0) 

1318 if self.verbose > 2: 

1319 print(f' recycle {n:6d} frames from the end of the read buffer at {self.read_offset}-{self.read_offset + self.read_buffer.shape[0]} to {r_offset}-{r_offset+n} ({r_offset-self.offset}-{r_offset+n-self.offset} in buffer)') 

1320 b_offset += n 

1321 r_offset += n 

1322 r_size -= n 

1323 # read data: 

1324 if self.verbose > 2 and r_size > 0: 

1325 print(f' read {r_size:6d} frames at {r_offset}-{r_offset+r_size} ({r_offset-self.offset}-{r_offset+r_size-self.offset} in buffer)') 

1326 while r_size > 0: 

1327 self.read_offset += self.read_buffer.shape[0] 

1328 try: 

1329 if hasattr(self.sf_iter, 'next'): 

1330 fbuffer = self.sf_iter.next() 

1331 else: 

1332 fbuffer = next(self.sf_iter) 

1333 except StopIteration: 

1334 self.read_buffer = np.zeros((0,0)) 

1335 buffer[b_offset:,:] = 0.0 

1336 if self.verbose > 1: 

1337 print(f' caught StopIteration, padded buffer with {r_size} zeros') 

1338 break 

1339 self.read_buffer = np.frombuffer(fbuffer, dtype='<i2').reshape(-1, self.channels) 

1340 n = self.read_buffer.shape[0] 

1341 if n > r_size: 

1342 n = r_size 

1343 if n > 0: 

1344 buffer[b_offset:b_offset+n,:] = self.read_buffer[:n,:] / (2.0**15-1.0) 

1345 if self.verbose > 2: 

1346 print(f' read {n:6d} frames to {r_offset}-{r_offset+n} ({r_offset-self.offset}-{r_offset+n-self.offset} in buffer)') 

1347 b_offset += n 

1348 r_offset += n 

1349 r_size -= n 

1350 

1351 

1352 # open multiple audio files as one: 

1353 def open_multiple(self, filepaths, buffersize=10.0, backsize=0.0, 

1354 verbose=0, rate=None, channels=None, end_indices=None): 

1355 """Open multiple audio files as a single concatenated array. 

1356 

1357 Parameters 

1358 ---------- 

1359 filepaths: list of str 

1360 List of file names of audio files. 

1361 buffersize: float 

1362 Size of internal buffer in seconds. 

1363 backsize: float 

1364 Part of the buffer to be loaded before the requested start index in seconds. 

1365 verbose: int 

1366 If larger than zero show detailed error/warning messages. 

1367 rate: float 

1368 If provided, do a minimal initialization (no checking) 

1369 using the provided sampling rate (in Hertz), channels, 

1370 and end_indices. 

1371 channels: int 

1372 If provided, do a minimal initialization (no checking) 

1373 using the provided rate, number of channels, and end_indices. 

1374 end_indices: sequence of int 

1375 If provided, do a minimal initialization (no checking) 

1376 using the provided rate, channels, and end_indices. 

1377 

1378 Raises 

1379 ------ 

1380 TypeError 

1381 `filepaths` must be a sequence. 

1382 ValueError 

1383 Empty `filepaths`. 

1384 FileNotFoundError 

1385 `filepaths` does not contain a single valid file. 

1386 

1387 """ 

1388 if not isinstance(filepaths, (list, tuple, np.ndarray)): 

1389 raise TypeError('input argument filepaths is not a sequence!') 

1390 if len(filepaths) == 0: 

1391 raise ValueError('input argument filepaths is empy sequence!') 

1392 self.buffersize = buffersize 

1393 self.backsize = backsize 

1394 self.filepath = None 

1395 self.file_paths = [] 

1396 self.open_files = [] 

1397 self.open_loaders = [] 

1398 self.audio_files = [] 

1399 self.collect_counter = 0 

1400 self.frames = 0 

1401 self.start_indices = [] 

1402 self.end_indices = [] 

1403 self.start_time = None 

1404 start_time = None 

1405 self._metadata = {} 

1406 self._locs = np.zeros((0, 2), dtype=int) 

1407 self._labels = np.zeros((0, 2), dtype=object) 

1408 if end_indices is not None: 

1409 self.filepath = filepaths[0] 

1410 self.file_paths = filepaths 

1411 self.audio_files = [None] * len(filepaths) 

1412 self.frames = end_indices[-1] 

1413 self.start_indices = [0] + list(end_indices[:-1]) 

1414 self.end_indices = end_indices 

1415 self.format = None 

1416 self.encoding = None 

1417 self.rate = rate 

1418 self.channels = channels 

1419 else: 

1420 for filepath in filepaths: 

1421 try: 

1422 a = AudioLoader(filepath, buffersize, backsize, verbose) 

1423 except Exception as e: 

1424 if verbose > 0: 

1425 print(e) 

1426 continue 

1427 # collect metadata: 

1428 md = a.metadata() 

1429 fmd = flatten_metadata(md, True) 

1430 add_metadata(self._metadata, fmd) 

1431 if self.filepath is None: 

1432 # first file: 

1433 self.filepath = a.filepath 

1434 self.format = a.format 

1435 self.encoding = a.encoding 

1436 self.rate = a.rate 

1437 self.channels = a.channels 

1438 self.start_time = get_datetime(md) 

1439 start_time = self.start_time 

1440 else: 

1441 # check channels and rate: 

1442 error_str = None 

1443 if a.channels != self.channels: 

1444 error_str = f'number of channels differs: ' \ 

1445 f'{a.channels} in {a.filepath} versus ' \ 

1446 f'{self.channels} in {self.filepath}' 

1447 if a.rate != self.rate: 

1448 error_str = f'sampling rates differ: ' \ 

1449 f'{a.rate} in {a.filepath} versus ' \ 

1450 f'{self.rate} in {self.filepath}' 

1451 # check start time of recording: 

1452 stime = get_datetime(md) 

1453 if start_time is None or stime is None or \ 

1454 abs(start_time - stime) > timedelta(seconds=1): 

1455 error_str = f'start time does not indicate continuous recording: ' \ 

1456 f'expected {start_time} instead of ' \ 

1457 f'{stime} in {a.filepath}' 

1458 if error_str is not None: 

1459 if verbose > 0: 

1460 print(error_str) 

1461 a.close() 

1462 del a 

1463 break 

1464 # markers: 

1465 locs, labels = a.markers() 

1466 locs[:,0] += self.frames 

1467 self._locs = np.vstack((self._locs, locs)) 

1468 self._labels = np.vstack((self._labels, labels)) 

1469 # indices: 

1470 self.start_indices.append(self.frames) 

1471 self.frames += a.frames 

1472 self.end_indices.append(self.frames) 

1473 if start_time is not None: 

1474 start_time += timedelta(seconds=a.frames/a.rate) 

1475 # add file to lists: 

1476 self.file_paths.append(filepath) 

1477 if len(self.open_files) < AudioLoader.max_open_files: 

1478 self.open_files.append(a) 

1479 else: 

1480 a.close() 

1481 if len(self.open_loaders) < AudioLoader.max_open_loaders: 

1482 self.audio_files.append(a) 

1483 self.open_loaders.append(a) 

1484 else: 

1485 a.close() 

1486 del a 

1487 self.audio_files.append(None) 

1488 if len(self.audio_files) == 0: 

1489 raise FileNotFoundError('input argument filepaths does not contain any valid audio file!') 

1490 # set startime from first file: 

1491 if self.start_time is not None: 

1492 set_starttime(self._metadata, self.start_time) 

1493 # setup infrastructure: 

1494 self.file_indices = self.start_indices 

1495 self.start_indices = np.array(self.start_indices) 

1496 self.end_indices = np.array(self.end_indices) 

1497 self.shape = (self.frames, self.channels) 

1498 self.bufferframes = int(buffersize*self.rate) 

1499 self.backframes = int(backsize*self.rate) 

1500 self.init_buffer() 

1501 self.close = self._close_multiple 

1502 self.load_audio_buffer = self._load_buffer_multiple 

1503 self._load_metadata = None 

1504 self._load_markers = None 

1505 return self 

1506 

1507 def _close_multiple(self): 

1508 """Close all the audio files. """ 

1509 self.open_files = [] 

1510 self.open_loaders = [] 

1511 if hasattr(self, 'audio_files'): 

1512 for a in self.audio_files: 

1513 if a is not None: 

1514 a.close() 

1515 self.audio_files = [] 

1516 self.filepath = None 

1517 self.file_paths = [] 

1518 self.file_indices = [] 

1519 self.start_indices = [] 

1520 self.end_indices = [] 

1521 del self.audio_files 

1522 del self.open_files 

1523 del self.open_loaders 

1524 del self.start_indices 

1525 del self.end_indices 

1526 

1527 def _load_buffer_multiple(self, r_offset, r_size, buffer): 

1528 """Load new data from the underlying files. 

1529 

1530 Parameters 

1531 ---------- 

1532 r_offset: int 

1533 First frame to be read from file. 

1534 r_size: int 

1535 Number of frames to be read from file. 

1536 buffer: ndarray 

1537 Buffer where to store the loaded data. 

1538 """ 

1539 offs = r_offset 

1540 size = r_size 

1541 boffs = 0 

1542 ai = np.searchsorted(self.end_indices, offs, side='right') 

1543 while size > 0: 

1544 if self.audio_files[ai] is None: 

1545 a = AudioLoader(self.file_paths[ai], 

1546 self.buffersize, self.backsize, 0) 

1547 self.audio_files[ai] = a 

1548 self.open_loaders.append(a) 

1549 self.open_files.append(a) 

1550 if len(self.open_files) > AudioLoader.max_open_files: 

1551 a0 = self.open_files.pop(0) 

1552 a0.close() 

1553 if len(self.open_loaders) > AudioLoader.max_open_loaders: 

1554 a0 = self.open_loaders.pop(0) 

1555 self.audio_files[self.audio_files.index(a0)] = None 

1556 a0.close() 

1557 del a0 

1558 self.collect_counter += 1 

1559 if self.collect_counter > AudioLoader.max_open_loaders//2: 

1560 gc.collect() 

1561 self.collect_counter = 0 

1562 else: 

1563 self.open_loaders.pop(self.open_loaders.index(self.audio_files[ai])) 

1564 self.open_loaders.append(self.audio_files[ai]) 

1565 ai0 = offs - self.start_indices[ai] 

1566 ai1 = offs + size 

1567 if ai1 > self.end_indices[ai]: 

1568 ai1 = self.end_indices[ai] 

1569 ai1 -= self.start_indices[ai] 

1570 n = ai1 - ai0 

1571 self.audio_files[ai].load_audio_buffer(ai0, n, 

1572 buffer[boffs:boffs + n,:]) 

1573 if self.audio_files[ai] in self.open_files: 

1574 self.open_files.pop(self.open_files.index(self.audio_files[ai])) 

1575 self.open_files.append(self.audio_files[ai]) 

1576 if len(self.open_files) > AudioLoader.max_open_files: 

1577 self.open_files[0].close() 

1578 self.open_files.pop(0) 

1579 boffs += n 

1580 offs += n 

1581 size -= n 

1582 ai += 1 

1583 

1584 

1585 def open(self, filepath, buffersize=10.0, backsize=0.0, 

1586 verbose=0, **kwargs): 

1587 """Open audio file for reading. 

1588 

1589 Parameters 

1590 ---------- 

1591 filepath: str or list of str 

1592 Name of the file or list of many file names that should be 

1593 made accessible as a single array. 

1594 buffersize: float 

1595 Size of internal buffer in seconds. 

1596 backsize: float 

1597 Part of the buffer to be loaded before the requested start index in seconds. 

1598 verbose: int 

1599 If larger than zero show detailed error/warning messages. 

1600 **kwargs: dict 

1601 Further keyword arguments that are passed on to the  

1602 specific opening functions. Only used by open_multiple() so far. 

1603 

1604 Raises 

1605 ------ 

1606 ValueError 

1607 Empty `filepath`. 

1608 FileNotFoundError 

1609 `filepath` is not an existing file. 

1610 EOFError 

1611 File size of `filepath` is zero. 

1612 IOError 

1613 Failed to load data. 

1614 

1615 """ 

1616 self.buffer = np.array([]) 

1617 self.rate = 0.0 

1618 if not filepath: 

1619 raise ValueError('input argument filepath is empty string!') 

1620 if isinstance(filepath, (list, tuple, np.ndarray)): 

1621 self.open_multiple(filepath, buffersize, backsize, verbose) 

1622 if len(self.file_paths) > 1: 

1623 return self 

1624 filepath = self.file_paths[0] 

1625 self.close() 

1626 if not os.path.isfile(filepath): 

1627 raise FileNotFoundError(f'file "{filepath}" not found') 

1628 if os.path.getsize(filepath) <= 0: 

1629 raise EOFError(f'file "{filepath}" is empty (size=0)!') 

1630 # list of implemented open functions: 

1631 audio_open_funcs = ( 

1632 ('soundfile', self.open_soundfile), 

1633 ('wave', self.open_wave), 

1634 ('wavefile', self.open_wavefile), 

1635 ('ewave', self.open_ewave), 

1636 ('audioread', self.open_audioread), 

1637 ) 

1638 # open an audio file by trying various modules: 

1639 not_installed = [] 

1640 errors = [f'failed to load data from file "{filepath}":'] 

1641 for lib, open_file in audio_open_funcs: 

1642 if not audio_modules[lib]: 

1643 if verbose > 1: 

1644 print(f'unable to load data from file "{filepath}" using {lib} module: module not available') 

1645 not_installed.append(lib) 

1646 continue 

1647 try: 

1648 open_file(filepath, buffersize, backsize, verbose-1, **kwargs) 

1649 if self.frames > 0: 

1650 if verbose > 0: 

1651 print(f'opened audio file "{filepath}" using {lib}') 

1652 if verbose > 1: 

1653 if self.format is not None: 

1654 print(f' format : {self.format}') 

1655 if self.encoding is not None: 

1656 print(f' encoding : {self.encoding}') 

1657 print(f' sampling rate: {self.rate} Hz') 

1658 print(f' channels : {self.channels}') 

1659 print(f' frames : {self.frames}') 

1660 return self 

1661 except Exception as e: 

1662 errors.append(f' {lib} failed: {str(e)}') 

1663 if verbose > 1: 

1664 print(errors[-1]) 

1665 if len(not_installed) > 0: 

1666 errors.append('\n You may need to install one of the ' + \ 

1667 ', '.join(not_installed) + ' packages.') 

1668 raise IOError('\n'.join(errors)) 

1669 return self 

1670 

1671 

1672def demo(file_path, plot): 

1673 """Demo of the audioloader functions. 

1674 

1675 Parameters 

1676 ---------- 

1677 file_path: str 

1678 File path of an audio file. 

1679 plot: bool 

1680 If True also plot the loaded data. 

1681 """ 

1682 print('') 

1683 print("try load_audio:") 

1684 full_data, rate = load_audio(file_path, 1) 

1685 if plot: 

1686 plt.plot(np.arange(len(full_data))/rate, full_data[:,0]) 

1687 plt.show() 

1688 

1689 if audio_modules['soundfile'] and audio_modules['audioread']: 

1690 print('') 

1691 print("cross check:") 

1692 data1, rate1 = load_soundfile(file_path) 

1693 data2, rate2 = load_audioread(file_path) 

1694 n = min((len(data1), len(data2))) 

1695 print(f"rms difference is {np.std(data1[:n]-data2[:n])}") 

1696 if plot: 

1697 plt.plot(np.arange(len(data1))/rate1, data1[:,0]) 

1698 plt.plot(np.arange(len(data2))/rate2, data2[:,0]) 

1699 plt.show() 

1700 

1701 print('') 

1702 print("try AudioLoader:") 

1703 with AudioLoader(file_path, 4.0, 1.0, verbose=1) as data: 

1704 print(f'samplerate: {data.rate:0f}Hz') 

1705 print(f'channels: {data.channels} {data.shape[1]}') 

1706 print(f'frames: {len(data)} {data.shape[0]}') 

1707 nframes = int(1.5*data.rate) 

1708 # check access: 

1709 print('check random single frame access') 

1710 for inx in np.random.randint(0, len(data), 1000): 

1711 if np.any(np.abs(full_data[inx] - data[inx]) > 2.0**(-14)): 

1712 print('single random frame access failed', inx, full_data[inx], data[inx]) 

1713 print('check random frame slice access') 

1714 for inx in np.random.randint(0, len(data)-nframes, 1000): 

1715 if np.any(np.abs(full_data[inx:inx+nframes] - data[inx:inx+nframes]) > 2.0**(-14)): 

1716 print('random frame slice access failed', inx) 

1717 print('check frame slice access forward') 

1718 for inx in range(0, len(data)-nframes, 10): 

1719 if np.any(np.abs(full_data[inx:inx+nframes] - data[inx:inx+nframes]) > 2.0**(-14)): 

1720 print('frame slice access forward failed', inx) 

1721 print('check frame slice access backward') 

1722 for inx in range(len(data)-nframes, 0, -10): 

1723 if np.any(np.abs(full_data[inx:inx+nframes] - data[inx:inx+nframes]) > 2.0**(-14)): 

1724 print('frame slice access backward failed', inx) 

1725 # forward: 

1726 for i in range(0, len(data), nframes): 

1727 print(f'forward {i}-{i+nframes}') 

1728 x = data[i:i+nframes,0] 

1729 if plot: 

1730 plt.plot((i+np.arange(len(x)))/rate, x) 

1731 plt.show() 

1732 # and backwards: 

1733 for i in reversed(range(0, len(data), nframes)): 

1734 print(f'backward {i}-{i+nframes}') 

1735 x = data[i:i+nframes,0] 

1736 if plot: 

1737 plt.plot((i+np.arange(len(x)))/rate, x) 

1738 plt.show() 

1739 

1740 

1741def main(*args): 

1742 """Call demo with command line arguments. 

1743 

1744 Parameters 

1745 ---------- 

1746 args: list of str 

1747 Command line arguments as provided by sys.argv[1:] 

1748 """ 

1749 print("Checking audioloader module ...") 

1750 

1751 help = False 

1752 plot = False 

1753 file_path = None 

1754 mod = False 

1755 for arg in args: 

1756 if mod: 

1757 if not select_module(arg): 

1758 print(f'can not select module {arg} that is not installed') 

1759 return 

1760 mod = False 

1761 elif arg == '-h': 

1762 help = True 

1763 break 

1764 elif arg == '-p': 

1765 plot = True 

1766 elif arg == '-m': 

1767 mod = True 

1768 else: 

1769 file_path = arg 

1770 break 

1771 

1772 if help: 

1773 print('') 

1774 print('Usage:') 

1775 print(' python -m src.audioio.audioloader [-m <module>] [-p] <audio/file.wav>') 

1776 print(' -m: audio module to be used') 

1777 print(' -p: plot loaded data') 

1778 return 

1779 

1780 if plot: 

1781 import matplotlib.pyplot as plt 

1782 

1783 demo(file_path, plot) 

1784 

1785 

1786if __name__ == "__main__": 

1787 main(*sys.argv[1:])