Coverage for src/audioio/audioloader.py: 90%

829 statements  

« prev     ^ index     » next       coverage.py v7.11.0, created at 2025-10-23 13:35 +0000

1"""Loading data, metadata, and markers from audio files. 

2 

3- `load_audio()`: load a whole audio file at once. 

4- `metadata()`: read metadata of an audio file. 

5- `markers()`: read markers of an audio file. 

6- class `AudioLoader`: read data from audio files in chunks. 

7 

8The read in data are always numpy arrays of floats ranging between -1 and 1. 

9The arrays are 2-D ndarrays with first axis time and second axis channel, 

10even for single channel data. 

11 

12If an audio file cannot be loaded, you might need to install 

13additional packages. See 

14[installation](https://bendalab.github.io/audioio/installation) for 

15further instructions. 

16 

17For a demo run the module as: 

18``` 

19python -m src.audioio.audioloader audiofile.wav 

20``` 

21""" 

22 

23import gc 

24import sys 

25import warnings 

26import os.path 

27import numpy as np 

28from pathlib import Path 

29from datetime import timedelta 

30from .audiomodules import * 

31from .bufferedarray import BufferedArray 

32from .riffmetadata import metadata_riff, markers_riff 

33from .audiometadata import update_gain, add_unwrap, get_datetime 

34from .audiometadata import flatten_metadata, add_metadata, set_starttime 

35from .audiotools import unwrap 

36 

37 

38def load_wave(filepath): 

39 """Load wav file using the wave module from pythons standard libray. 

40  

41 Documentation 

42 ------------- 

43 https://docs.python.org/3.8/library/wave.html 

44 

45 Parameters 

46 ---------- 

47 filepath: str 

48 The full path and name of the file to load. 

49 

50 Returns 

51 ------- 

52 data: ndarray 

53 All data traces as an 2-D ndarray, first dimension is time, second is channel 

54 rate: float 

55 The sampling rate of the data in Hertz. 

56 

57 Raises 

58 ------ 

59 ImportError 

60 The wave module is not installed 

61 * 

62 Loading of the data failed 

63 """ 

64 if not audio_modules['wave']: 

65 raise ImportError 

66 

67 wf = wave.open(filepath, 'r') # 'with' is not supported by wave 

68 (nchannels, sampwidth, rate, nframes, comptype, compname) = wf.getparams() 

69 buffer = wf.readframes(nframes) 

70 factor = 2.0**(sampwidth*8-1) 

71 if sampwidth == 1: 

72 dtype = 'u1' 

73 buffer = np.frombuffer(buffer, dtype=dtype).reshape(-1, nchannels) 

74 data = buffer.astype('d')/factor - 1.0 

75 else: 

76 dtype = f'i{sampwidth}' 

77 buffer = np.frombuffer(buffer, dtype=dtype).reshape(-1, nchannels) 

78 data = buffer.astype('d')/factor 

79 wf.close() 

80 return data, float(rate) 

81 

82 

83def load_ewave(filepath): 

84 """Load wav file using ewave module. 

85 

86 Documentation 

87 ------------- 

88 https://github.com/melizalab/py-ewave 

89 

90 Parameters 

91 ---------- 

92 filepath: str 

93 The full path and name of the file to load. 

94 

95 Returns 

96 ------- 

97 data: ndarray 

98 All data traces as an 2-D ndarray, first dimension is time, second is channel. 

99 rate: float 

100 The sampling rate of the data in Hertz. 

101 

102 Raises 

103 ------ 

104 ImportError 

105 The ewave module is not installed 

106 * 

107 Loading of the data failed 

108 """ 

109 if not audio_modules['ewave']: 

110 raise ImportError 

111 

112 data = np.array([]) 

113 rate = 0.0 

114 with ewave.open(filepath, 'r') as wf: 

115 rate = wf.sampling_rate 

116 buffer = wf.read() 

117 data = ewave.rescale(buffer, 'float') 

118 if len(data.shape) == 1: 

119 data = np.reshape(data,(-1, 1)) 

120 return data, float(rate) 

121 

122 

123def load_wavfile(filepath): 

124 """Load wav file using scipy.io.wavfile. 

125 

126 Documentation 

127 ------------- 

128 http://docs.scipy.org/doc/scipy/reference/io.html 

129 Does not support blocked read. 

130  

131 Parameters 

132 ---------- 

133 filepath: str 

134 The full path and name of the file to load. 

135 

136 Returns 

137 ------- 

138 data: ndarray 

139 All data traces as an 2-D ndarray, first dimension is time, second is channel. 

140 rate: float 

141 The sampling rate of the data in Hertz. 

142 

143 Raises 

144 ------ 

145 ImportError 

146 The scipy.io module is not installed 

147 * 

148 Loading of the data failed 

149 """ 

150 if not audio_modules['scipy.io.wavfile']: 

151 raise ImportError 

152 

153 warnings.filterwarnings("ignore") 

154 rate, data = wavfile.read(filepath) 

155 warnings.filterwarnings("always") 

156 if data.dtype == np.uint8: 

157 data = data / 128.0 - 1.0 

158 elif np.issubdtype(data.dtype, np.signedinteger): 

159 data = data / (2.0**(data.dtype.itemsize*8-1)) 

160 else: 

161 data = data.astype(np.float64, copy=False) 

162 if len(data.shape) == 1: 

163 data = np.reshape(data,(-1, 1)) 

164 return data, float(rate) 

165 

166 

167def load_soundfile(filepath): 

168 """Load audio file using SoundFile (based on libsndfile). 

169 

170 Documentation 

171 ------------- 

172 http://pysoundfile.readthedocs.org 

173 http://www.mega-nerd.com/libsndfile 

174 

175 Parameters 

176 ---------- 

177 filepath: str 

178 The full path and name of the file to load. 

179 

180 Returns 

181 ------- 

182 data: ndarray 

183 All data traces as an 2-D ndarray, first dimension is time, second is channel. 

184 rate: float 

185 The sampling rate of the data in Hertz. 

186 

187 Raises 

188 ------ 

189 ImportError 

190 The soundfile module is not installed. 

191 * 

192 Loading of the data failed. 

193 """ 

194 if not audio_modules['soundfile']: 

195 raise ImportError 

196 

197 data = np.array([]) 

198 rate = 0.0 

199 with soundfile.SoundFile(filepath, 'r') as sf: 

200 rate = sf.samplerate 

201 data = sf.read(frames=-1, dtype='float64', always_2d=True) 

202 return data, float(rate) 

203 

204 

205def load_wavefile(filepath): 

206 """Load audio file using wavefile (based on libsndfile). 

207 

208 Documentation 

209 ------------- 

210 https://github.com/vokimon/python-wavefile 

211 

212 Parameters 

213 ---------- 

214 filepath: str 

215 The full path and name of the file to load. 

216 

217 Returns 

218 ------- 

219 data: ndarray 

220 All data traces as an 2-D ndarray, first dimension is time, second is channel. 

221 rate: float 

222 The sampling rate of the data in Hertz. 

223 

224 Raises 

225 ------ 

226 ImportError 

227 The wavefile module is not installed. 

228 * 

229 Loading of the data failed. 

230 """ 

231 if not audio_modules['wavefile']: 

232 raise ImportError 

233 

234 rate, data = wavefile.load(filepath) 

235 return data.astype(np.float64, copy=False).T, float(rate) 

236 

237 

238def load_audioread(filepath): 

239 """Load audio file using audioread. 

240 

241 Documentation 

242 ------------- 

243 https://github.com/beetbox/audioread 

244 

245 Parameters 

246 ---------- 

247 filepath: str 

248 The full path and name of the file to load. 

249 

250 Returns 

251 ------- 

252 data: ndarray 

253 All data traces as an 2-D ndarray, first dimension is time, second is channel. 

254 rate: float 

255 The sampling rate of the data in Hertz. 

256 

257 Raises 

258 ------ 

259 ImportError 

260 The audioread module is not installed. 

261 * 

262 Loading of the data failed. 

263 """ 

264 if not audio_modules['audioread']: 

265 raise ImportError 

266 

267 data = np.array([]) 

268 rate = 0.0 

269 with audioread.audio_open(filepath) as af: 

270 rate = af.samplerate 

271 data = np.zeros((int(np.ceil(af.samplerate*af.duration)), af.channels), 

272 dtype="<i2") 

273 index = 0 

274 for buffer in af: 

275 fulldata = np.frombuffer(buffer, dtype='<i2').reshape(-1, af.channels) 

276 n = fulldata.shape[0] 

277 if index + n > len(data): 

278 n = len(fulldata) - index 

279 if n <= 0: 

280 break 

281 data[index:index+n,:] = fulldata[:n,:] 

282 index += n 

283 return data/(2.0**15-1.0), float(rate) 

284 

285 

286audio_loader_funcs = ( 

287 ('soundfile', load_soundfile), 

288 ('wave', load_wave), 

289 ('wavefile', load_wavefile), 

290 ('ewave', load_ewave), 

291 ('scipy.io.wavfile', load_wavfile), 

292 ('audioread', load_audioread), 

293 ) 

294"""List of implemented load() functions. 

295 

296Each element of the list is a tuple with the module's name and its 

297load() function. 

298 

299""" 

300 

301 

302def load_audio(filepath, verbose=0): 

303 """Call this function to load all channels of audio data from a file. 

304  

305 This function tries different python modules to load the audio file. 

306 

307 Parameters 

308 ---------- 

309 filepath: str 

310 The full path and name of the file to load. 

311 verbose: int 

312 If larger than zero show detailed error/warning messages. 

313 

314 Returns 

315 ------- 

316 data: ndarray 

317 All data traces as an 2-D ndarray, even for single channel data. 

318 First dimension is time, second is channel. 

319 Data values range maximally between -1 and 1. 

320 rate: float 

321 The sampling rate of the data in Hertz. 

322 

323 Raises 

324 ------ 

325 ValueError 

326 Empty `filepath`. 

327 FileNotFoundError 

328 `filepath` is not an existing file. 

329 EOFError 

330 File size of `filepath` is zero. 

331 IOError 

332 Failed to load data. 

333 

334 Examples 

335 -------- 

336 ``` 

337 import matplotlib.pyplot as plt 

338 from audioio import load_audio 

339  

340 data, rate = load_audio('some/audio.wav') 

341 plt.plot(np.arange(len(data))/rate, data[:,0]) 

342 plt.show() 

343 ``` 

344 """ 

345 # check values: 

346 if filepath is None or len(filepath) == 0: 

347 raise ValueError('input argument filepath is empty string!') 

348 if not os.path.isfile(filepath): 

349 raise FileNotFoundError(f'file "{filepath}" not found') 

350 if os.path.getsize(filepath) <= 0: 

351 raise EOFError(f'file "{filepath}" is empty (size=0)!') 

352 

353 # load an audio file by trying various modules: 

354 not_installed = [] 

355 errors = [f'failed to load data from file "{filepath}":'] 

356 for lib, load_file in audio_loader_funcs: 

357 if not audio_modules[lib]: 

358 if verbose > 1: 

359 print(f'unable to load data from file "{filepath}" using {lib} module: module not available') 

360 not_installed.append(lib) 

361 continue 

362 try: 

363 data, rate = load_file(filepath) 

364 if len(data) > 0: 

365 if verbose > 0: 

366 print(f'loaded data from file "{filepath}" using {lib} module') 

367 if verbose > 1: 

368 print(f' sampling rate: {rate:g} Hz') 

369 print(f' channels : {data.shape[1]}') 

370 print(f' frames : {len(data)}') 

371 return data, rate 

372 except Exception as e: 

373 errors.append(f' {lib} failed: {str(e)}') 

374 if verbose > 1: 

375 print(errors[-1]) 

376 if len(not_installed) > 0: 

377 errors.append('\n You may need to install one of the ' + \ 

378 ', '.join(not_installed) + ' packages.') 

379 raise IOError('\n'.join(errors)) 

380 return np.zeros(0), 0.0 

381 

382 

383def metadata(filepath, store_empty=False): 

384 """Read metadata of an audio file. 

385 

386 Parameters 

387 ---------- 

388 filepath: str or file handle 

389 The audio file from which to read metadata. 

390 store_empty: bool 

391 If `False` do not return meta data with empty values. 

392 

393 Returns 

394 ------- 

395 meta_data: nested dict 

396 Meta data contained in the audio file. Keys of the nested 

397 dictionaries are always strings. If the corresponding values 

398 are dictionaries, then the key is the section name of the 

399 metadata contained in the dictionary. All other types of 

400 values are values for the respective key. In particular they 

401 are strings. But other types like for example ints or floats 

402 are also allowed. See `audioio.audiometadata` module for 

403 available functions to work with such metadata. 

404 

405 Examples 

406 -------- 

407 ``` 

408 from audioio import metadata, print_metadata 

409 md = metadata('data.wav') 

410 print_metadata(md) 

411 ``` 

412 

413 """ 

414 try: 

415 return metadata_riff(filepath, store_empty) 

416 except ValueError: # not a RIFF file 

417 return {} 

418 

419 

420def markers(filepath): 

421 """ Read markers of an audio file. 

422 

423 See `audioio.audiomarkers` module for available functions 

424 to work with markers. 

425 

426 Parameters 

427 ---------- 

428 filepath: str or file handle 

429 The audio file. 

430 

431 Returns 

432 ------- 

433 locs: 2-D ndarray of int 

434 Marker positions (first column) and spans (second column) 

435 for each marker (rows). 

436 labels: 2-D ndarray of string objects 

437 Labels (first column) and texts (second column) 

438 for each marker (rows). 

439 

440 Examples 

441 -------- 

442 ``` 

443 from audioio import markers, print_markers 

444 locs, labels = markers('data.wav') 

445 print_markers(locs, labels) 

446 ``` 

447 """ 

448 try: 

449 return markers_riff(filepath) 

450 except ValueError: # not a RIFF file 

451 return np.zeros((0, 2), dtype=int), np.zeros((0, 2), dtype=object) 

452 

453 

454class AudioLoader(BufferedArray): 

455 """Buffered reading of audio data for random access of the data in the file. 

456  

457 The class allows for reading very large audio files or many 

458 sequential audio files that do not fit into memory. 

459 An AudioLoader instance can be used like a huge read-only numpy array, i.e. 

460 ``` 

461 data = AudioLoader('path/to/audio/file.wav') 

462 x = data[10000:20000,0] 

463 ``` 

464 The first index specifies the frame, the second one the channel. 

465 

466 Behind the scenes, `AudioLoader` tries to open the audio file with 

467 all available audio modules until it succeeds (first line). It 

468 then reads data from the file as necessary for the requested data 

469 (second line). Accesing the content of the audio files via a 

470 buffer that holds only a part of the data is managed by the 

471 `BufferedArray` class. 

472 

473 Reading sequentially through the file is always possible. Some 

474 modules, however, (e.g. audioread, needed for mp3 files) can only 

475 read forward. If previous data are requested, then the file is read 

476 from the beginning again. This slows down access to previous data 

477 considerably. Use the `backsize` argument of the open function to 

478 make sure some data are loaded into the buffer before the requested 

479 frame. Then a subsequent access to the data within `backsize` seconds 

480 before that frame can still be handled without the need to reread 

481 the file from the beginning. 

482 

483 Usage 

484 ----- 

485 With context management: 

486 ``` 

487 import audioio as aio 

488 with aio.AudioLoader(filepath, 60.0, 10.0) as data: 

489 # do something with the content of the file: 

490 x = data[0:10000] 

491 y = data[10000:20000] 

492 z = x + y 

493 ``` 

494 

495 For using a specific audio module, here the audioread module: 

496 ``` 

497 data = aio.AudioLoader() 

498 with data.open_audioread(filepath, 60.0, 10.0): 

499 # do something ... 

500 ``` 

501 

502 Use `blocks()` for sequential, blockwise reading and processing: 

503 ``` 

504 from scipy.signal import spectrogram 

505 nfft = 2048 

506 with aio.AudioLoader('some/audio.wav') as data: 

507 for x in data.blocks(100*nfft, nfft//2): 

508 f, t, Sxx = spectrogram(x, fs=data.rate, 

509 nperseg=nfft, noverlap=nfft//2) 

510 ``` 

511 

512 For loop iterates over single frames (1-D arrays containing samples for each channel): 

513 ``` 

514 with aio.AudioLoader('some/audio.wav') as data: 

515 for x in data: 

516 print(x) 

517 ``` 

518  

519 Traditional open and close: 

520 ``` 

521 data = aio.AudioLoader(filepath, 60.0) 

522 x = data[:,:] # read the whole file 

523 data.close() 

524 ``` 

525  

526 this is the same as: 

527 ``` 

528 data = aio.AudioLoader() 

529 data.open(filepath, 60.0) 

530 ... 

531 ``` 

532 

533 Classes inheriting AudioLoader just need to implement 

534 ``` 

535 self.load_audio_buffer(offset, nsamples, pbuffer) 

536 ``` 

537 This function needs to load the supplied `pbuffer` with 

538 `nframes` frames of data starting at frame `offset`. 

539 

540 In the constructor or some kind of opening function, you need to 

541 set some member variables, as described for `BufferedArray`. 

542 

543 For loading metadata and markers, implement the functions 

544 ``` 

545 self._load_metadata(filepath, **kwargs) 

546 self._load_markers(filepath) 

547 ``` 

548  

549 Parameters 

550 ---------- 

551 filepath: str or list of str 

552 Name of the file or list of many file names that should be 

553 made accessible as a single array. 

554 buffersize: float 

555 Size of internal buffer in seconds. 

556 backsize: float 

557 Part of the buffer to be loaded before the requested start index in seconds. 

558 verbose: int 

559 If larger than zero show detailed error/warning messages. 

560 store_empty: bool 

561 If `False` do not return meta data with empty values. 

562 

563 Attributes 

564 ---------- 

565 filepath: str 

566 Name and path of the opened file. In case of many files, the first one. 

567 file_paths: list of str 

568 List of pathes of the opened files that are made accessible 

569 as a single array. 

570 file_indices: list of int 

571 For each file the index of its first sample. 

572 rate: float 

573 The sampling rate of the data in seconds. 

574 channels: int 

575 The number of channels. 

576 frames: int 

577 The number of frames in the file. Same as `len()`. 

578 format: str or None 

579 Format of the audio file. 

580 encoding: str or None 

581 Encoding/subtype of the audio file. 

582 shape: tuple 

583 Frames and channels of the data. 

584 ndim: int 

585 Number of dimensions: always 2 (frames and channels). 

586 offset: int 

587 Index of first frame in the current buffer. 

588 buffer: ndarray of floats 

589 The curently available data from the file. 

590 ampl_min: float 

591 Minimum amplitude the file format supports. 

592 Always -1.0 for audio data. 

593 ampl_max: float 

594 Maximum amplitude the file format supports. 

595 Always +1.0 for audio data. 

596 

597 Methods 

598 ------- 

599 - `len()`: Number of frames. 

600 - `file_start_times()`: time of first frame of each file in seconds. 

601 - `get_file_index()`: file path and index of frame contained by this file. 

602 - `open()`: Open an audio file by trying available audio modules. 

603 - `open_*()`: Open an audio file with the respective audio module. 

604 - `__getitem__`: Access data of the audio file. 

605 - `update_buffer()`: Update the internal buffer for a range of frames. 

606 - `blocks()`: Generator for blockwise processing of AudioLoader data. 

607 - `file_start_times()`: Time of first frame of each file in seconds. 

608 - `get_file_index()`: File path and index of frame contained by this file. 

609 - `basename()`: Base name of the audio data. 

610 - `format_dict()`: technical infos about how the data are stored. 

611 - `metadata()`: Metadata stored along with the audio data. 

612 - `markers()`: Markers stored along with the audio data. 

613 - `set_unwrap()`: Set parameters for unwrapping clipped data. 

614 - `set_time_delta()`: Set maximum allowed time difference between successive files. 

615 - `close()`: Close the file. 

616 

617 """ 

618 

619 max_open_files = 5 

620 """ Suggestion for maximum number of open file descriptors. """ 

621 

622 max_open_loaders = 10 

623 """ Suggestion for maximum number of AudioLoaders when opening multiple files. """ 

624 

625 def __init__(self, filepath=None, buffersize=10.0, backsize=0.0, 

626 verbose=0, **meta_kwargs): 

627 super().__init__(verbose=verbose) 

628 self.format = None 

629 self.encoding = None 

630 self._metadata = None 

631 self._locs = None 

632 self._labels = None 

633 self._load_metadata = metadata 

634 self._load_markers = markers 

635 self._metadata_kwargs = meta_kwargs 

636 self.filepath = None 

637 self.file_paths = None 

638 self.file_indices = [] 

639 self._max_time_diff = 1 

640 self.sf = None 

641 self.close = self._close 

642 self.load_buffer = self._load_buffer_unwrap 

643 self.ampl_min = -1.0 

644 self.ampl_max = +1.0 

645 self.unwrap = False 

646 self.unwrap_thresh = 0.0 

647 self.unwrap_clips = False 

648 self.unwrap_ampl = 1.0 

649 self.unwrap_downscale = True 

650 if filepath is not None: 

651 self.open(filepath, buffersize, backsize, verbose) 

652 

653 numpy_encodings = {np.dtype(np.int64): 'PCM_64', 

654 np.dtype(np.int32): 'PCM_32', 

655 np.dtype(np.int16): 'PCM_16', 

656 np.dtype(np.single): 'FLOAT', 

657 np.dtype(np.double): 'DOUBLE', 

658 np.dtype('>f4'): 'FLOAT', 

659 np.dtype('>f8'): 'DOUBLE'} 

660 """ Map numpy dtypes to encodings. 

661 """ 

662 

663 def _close(self): 

664 pass 

665 

666 def __del__(self): 

667 self.close() 

668 

669 def file_start_times(self): 

670 """ Time of first frame of each file in seconds. 

671  

672 Returns 

673 ------- 

674 times: array of float 

675 Time of the first frame of each file relative to buffer start 

676 in seconds. 

677 """ 

678 times = [] 

679 for idx in self.file_indices: 

680 times.append(idx/self.rate) 

681 return np.array(times) 

682 

683 def get_file_index(self, frame): 

684 """ File path and index of frame contained by this file. 

685 

686 Parameters 

687 ---------- 

688 frame: int 

689 Index of frame. 

690  

691 Returns 

692 ------- 

693 filepath: str 

694 Path of file that contains the frame. 

695 index: int 

696 Index of the frame relative to the first frame 

697 in the containing file. 

698 """ 

699 if frame < 0 or frame >= self.frames: 

700 raise ValueError('invalid frame') 

701 fname = self.file_paths[0] 

702 index = self.file_indices[0] 

703 for i in reversed(range(len(self.file_indices))): 

704 if self.file_indices[i] <= frame: 

705 fname = self.file_paths[i] 

706 index = self.file_indices[i] 

707 break 

708 return fname, frame - index 

709 

710 def basename(self, path=None): 

711 """ Base name of the audio data. 

712 

713 Parameters 

714 ---------- 

715 path: str or None 

716 Path of the audio file from which a base name is generated. 

717 If `None`, use `self.filepath`. 

718 

719 Returns 

720 ------- 

721 s: str 

722 The name. Defaults to the stem of `path`. 

723 

724 """ 

725 if path is None: 

726 path = self.filepath 

727 return Path(path).stem 

728 

729 def format_dict(self): 

730 """ Technical infos about how the data are stored in the file. 

731 

732 Returns 

733 ------- 

734 fmt: dict 

735 Dictionary with filepath, format, encoding, samplingrate, 

736 channels, frames, and duration of the audio file as strings. 

737 

738 """ 

739 fmt = dict(name=self.basename(), filepath=self.filepath) 

740 if self.format is not None: 

741 fmt['format'] = self.format 

742 if self.encoding is not None: 

743 fmt['encoding'] = self.encoding 

744 fmt.update(dict(samplingrate=f'{self.rate:.0f}Hz', 

745 channels=self.channels, 

746 frames=self.frames, 

747 duration=f'{self.frames/self.rate:.3f}s')) 

748 return fmt 

749 

750 def metadata(self): 

751 """Metadata of the audio file. 

752 

753 Parameters 

754 ---------- 

755 store_empty: bool 

756 If `False` do not add meta data with empty values. 

757 

758 Returns 

759 ------- 

760 meta_data: nested dict 

761 

762 Meta data contained in the audio file. Keys of the nested 

763 dictionaries are always strings. If the corresponding 

764 values are dictionaries, then the key is the section name 

765 of the metadata contained in the dictionary. All other 

766 types of values are values for the respective key. In 

767 particular they are strings. But other types like for 

768 example ints or floats are also allowed. See 

769 `audioio.audiometadata` module for available functions to 

770 work with such metadata. 

771 

772 """ 

773 if self._metadata is None: 

774 if self._load_metadata is None: 

775 self._metadata = {} 

776 else: 

777 self._metadata = self._load_metadata(self.filepath, 

778 **self._metadata_kwargs) 

779 return self._metadata 

780 

781 def markers(self): 

782 """Read markers of the audio file. 

783 

784 See `audioio.audiomarkers` module for available functions 

785 to work with markers. 

786 

787 Returns 

788 ------- 

789 locs: 2-D ndarray of int 

790 Marker positions (first column) and spans (second column) 

791 for each marker (rows). 

792 labels: 2-D ndarray of str objects 

793 Labels (first column) and texts (second column) 

794 for each marker (rows). 

795 """ 

796 if self._locs is None: 

797 if self._load_markers is None: 

798 self._locs = np.zeros((0, 2), dtype=int) 

799 self._labels = np.zeros((0, 2), dtype=object) 

800 else: 

801 self._locs, self._labels = self._load_markers(self.filepath) 

802 return self._locs, self._labels 

803 

804 def set_unwrap(self, thresh, clips=False, down_scale=True, unit=''): 

805 """Set parameters for unwrapping clipped data. 

806 

807 See unwrap() function from the audioio package. 

808 

809 Parameters 

810 ---------- 

811 thresh: float 

812 Threshold for detecting wrapped data relative to self.unwrap_ampl 

813 which is initially set to self.ampl_max. 

814 If zero, do not unwrap. 

815 clips: bool 

816 If True, then clip the unwrapped data properly. 

817 Otherwise, unwrap the data and double the 

818 minimum and maximum data range 

819 (self.ampl_min and self.ampl_max). 

820 down_scale: bool 

821 If not `clips`, then downscale the signal by a factor of two, 

822 in order to keep the range between -1 and 1. 

823 unit: str 

824 Unit of the data. 

825 """ 

826 self.unwrap_ampl = self.ampl_max 

827 self.unwrap_thresh = thresh 

828 self.unwrap_clips = clips 

829 self.unwrap_down_scale = down_scale 

830 self.unwrap = thresh > 1e-3 

831 if self.unwrap: 

832 if self.unwrap_clips: 

833 add_unwrap(self.metadata(), 

834 self.unwrap_thresh*self.unwrap_ampl, 

835 self.unwrap_ampl, unit) 

836 elif down_scale: 

837 update_gain(self.metadata(), 0.5) 

838 add_unwrap(self.metadata(), 

839 0.5*self.unwrap_thresh*self.unwrap_ampl, 

840 0.0, unit) 

841 else: 

842 self.ampl_min *= 2 

843 self.ampl_max *= 2 

844 add_unwrap(self.metadata(), 

845 self.unwrap_thresh*self.unwrap_ampl, 

846 0.0, unit) 

847 

848 def _load_buffer_unwrap(self, r_offset, r_size, pbuffer): 

849 """Load new data and unwrap it. 

850 

851 Parameters 

852 ---------- 

853 r_offset: int 

854 First frame to be read from file. 

855 r_size: int 

856 Number of frames to be read from file. 

857 pbuffer: ndarray 

858 Buffer where to store the loaded data. 

859 """ 

860 self.load_audio_buffer(r_offset, r_size, pbuffer) 

861 if self.unwrap: 

862 # TODO: handle edge effects! 

863 unwrap(pbuffer, self.unwrap_thresh, self.unwrap_ampl) 

864 if self.unwrap_clips: 

865 pbuffer[pbuffer > self.ampl_max] = self.ampl_max 

866 pbuffer[pbuffer < self.ampl_min] = self.ampl_min 

867 elif self.unwrap_down_scale: 

868 pbuffer *= 0.5 

869 

870 def set_time_delta(time_delta): 

871 """ Set maximum allowed time difference between successive files. 

872 

873 Parameters 

874 ---------- 

875 time_delta: int 

876 Maximum number of seconds the start time of a recording file is allowed 

877 to differ from the end of the previous file. 

878 Default is one second. 

879 """ 

880 self._max_time_diff = time_delta 

881 

882 # wave interface:  

883 def open_wave(self, filepath, buffersize=10.0, backsize=0.0, 

884 verbose=0): 

885 """Open audio file for reading using the wave module. 

886 

887 Note: we assume that setpos() and tell() use integer numbers! 

888 

889 Parameters 

890 ---------- 

891 filepath: str 

892 Name of the file. 

893 buffersize: float 

894 Size of internal buffer in seconds. 

895 backsize: float 

896 Part of the buffer to be loaded before the requested start index in seconds. 

897 verbose: int 

898 If larger than zero show detailed error/warning messages. 

899 

900 Raises 

901 ------ 

902 ImportError 

903 The wave module is not installed 

904 """ 

905 self.verbose = verbose 

906 if self.verbose > 0: 

907 print(f'open_wave(filepath) with filepath={filepath}') 

908 if not audio_modules['wave']: 

909 self.rate = 0.0 

910 self.channels = 0 

911 self.frames = 0 

912 self.size = 0 

913 self.shape = (0, 0) 

914 self.offset = 0 

915 raise ImportError 

916 if self.sf is not None: 

917 self._close_wave() 

918 self.sf = wave.open(filepath, 'r') 

919 self.filepath = filepath 

920 self.file_paths = [filepath] 

921 self.file_indices = [0] 

922 self.rate = float(self.sf.getframerate()) 

923 self.format = 'WAV' 

924 sampwidth = self.sf.getsampwidth() 

925 if sampwidth == 1: 

926 self.dtype = 'u1' 

927 self.encoding = 'PCM_U8' 

928 else: 

929 self.dtype = f'i{sampwidth}' 

930 self.encoding = f'PCM_{sampwidth*8}' 

931 self.factor = 1.0/(2.0**(sampwidth*8-1)) 

932 self.channels = self.sf.getnchannels() 

933 self.frames = self.sf.getnframes() 

934 self.shape = (self.frames, self.channels) 

935 self.size = self.frames * self.channels 

936 self.bufferframes = int(buffersize*self.rate) 

937 self.backframes = int(backsize*self.rate) 

938 self.init_buffer() 

939 self.close = self._close_wave 

940 self.load_audio_buffer = self._load_buffer_wave 

941 # read 1 frame to determine the unit of the position values: 

942 self.p0 = self.sf.tell() 

943 self.sf.readframes(1) 

944 self.pfac = self.sf.tell() - self.p0 

945 self.sf.setpos(self.p0) 

946 return self 

947 

948 def _close_wave(self): 

949 """Close the audio file using the wave module. """ 

950 if self.sf is not None: 

951 self.sf.close() 

952 self.sf = None 

953 

954 def _load_buffer_wave(self, r_offset, r_size, buffer): 

955 """Load new data from file using the wave module. 

956 

957 Parameters 

958 ---------- 

959 r_offset: int 

960 First frame to be read from file. 

961 r_size: int 

962 Number of frames to be read from file. 

963 buffer: ndarray 

964 Buffer where to store the loaded data. 

965 """ 

966 if self.sf is None: 

967 self.sf = wave.open(self.filepath, 'r') 

968 self.sf.setpos(r_offset*self.pfac + self.p0) 

969 fbuffer = self.sf.readframes(r_size) 

970 fbuffer = np.frombuffer(fbuffer, dtype=self.dtype).reshape((-1, self.channels)) 

971 if self.dtype[0] == 'u': 

972 buffer[:, :] = fbuffer * self.factor - 1.0 

973 else: 

974 buffer[:, :] = fbuffer * self.factor 

975 

976 

977 # ewave interface:  

978 def open_ewave(self, filepath, buffersize=10.0, backsize=0.0, 

979 verbose=0): 

980 """Open audio file for reading using the ewave module. 

981 

982 Parameters 

983 ---------- 

984 filepath: str 

985 Name of the file. 

986 buffersize: float 

987 Size of internal buffer in seconds. 

988 backsize: float 

989 Part of the buffer to be loaded before the requested start index in seconds. 

990 verbose: int 

991 If larger than zero show detailed error/warning messages. 

992 

993 Raises 

994 ------ 

995 ImportError 

996 The ewave module is not installed. 

997 """ 

998 self.verbose = verbose 

999 if self.verbose > 0: 

1000 print(f'open_ewave(filepath) with filepath={filepath}') 

1001 if not audio_modules['ewave']: 

1002 self.rate = 0.0 

1003 self.channels = 0 

1004 self.frames = 0 

1005 self.shape = (0, 0) 

1006 self.size = 0 

1007 self.offset = 0 

1008 raise ImportError 

1009 if self.sf is not None: 

1010 self._close_ewave() 

1011 self.sf = ewave.open(filepath, 'r') 

1012 self.filepath = filepath 

1013 self.file_paths = [filepath] 

1014 self.file_indices = [0] 

1015 self.rate = float(self.sf.sampling_rate) 

1016 self.channels = self.sf.nchannels 

1017 self.frames = self.sf.nframes 

1018 self.shape = (self.frames, self.channels) 

1019 self.size = self.frames * self.channels 

1020 self.format = 'WAV' # or WAVEX? 

1021 self.encoding = self.numpy_encodings[self.sf.dtype] 

1022 self.bufferframes = int(buffersize*self.rate) 

1023 self.backframes = int(backsize*self.rate) 

1024 self.init_buffer() 

1025 self.close = self._close_ewave 

1026 self.load_audio_buffer = self._load_buffer_ewave 

1027 return self 

1028 

1029 def _close_ewave(self): 

1030 """Close the audio file using the ewave module. """ 

1031 if self.sf is not None: 

1032 del self.sf 

1033 self.sf = None 

1034 

1035 def _load_buffer_ewave(self, r_offset, r_size, buffer): 

1036 """Load new data from file using the ewave module. 

1037 

1038 Parameters 

1039 ---------- 

1040 r_offset: int 

1041 First frame to be read from file. 

1042 r_size: int 

1043 Number of frames to be read from file. 

1044 buffer: ndarray 

1045 Buffer where to store the loaded data. 

1046 """ 

1047 if self.sf is None: 

1048 self.sf = ewave.open(self.filepath, 'r') 

1049 fbuffer = self.sf.read(frames=r_size, offset=r_offset, memmap='r') 

1050 fbuffer = ewave.rescale(fbuffer, 'float') 

1051 if len(fbuffer.shape) == 1: 

1052 fbuffer = np.reshape(fbuffer,(-1, 1)) 

1053 buffer[:,:] = fbuffer 

1054 

1055 

1056 # soundfile interface:  

1057 def open_soundfile(self, filepath, buffersize=10.0, backsize=0.0, 

1058 verbose=0): 

1059 """Open audio file for reading using the SoundFile module. 

1060 

1061 Parameters 

1062 ---------- 

1063 filepath: str 

1064 Name of the file. 

1065 bufferframes: float 

1066 Size of internal buffer in seconds. 

1067 backsize: float 

1068 Part of the buffer to be loaded before the requested start index in seconds. 

1069 verbose: int 

1070 If larger than zero show detailed error/warning messages. 

1071 

1072 Raises 

1073 ------ 

1074 ImportError 

1075 The SoundFile module is not installed 

1076 """ 

1077 self.verbose = verbose 

1078 if self.verbose > 0: 

1079 print(f'open_soundfile(filepath) with filepath={filepath}') 

1080 if not audio_modules['soundfile']: 

1081 self.rate = 0.0 

1082 self.channels = 0 

1083 self.frames = 0 

1084 self.shape = (0, 0) 

1085 self.size = 0 

1086 self.offset = 0 

1087 raise ImportError 

1088 if self.sf is not None: 

1089 self._close_soundfile() 

1090 self.sf = soundfile.SoundFile(filepath, 'r') 

1091 self.filepath = filepath 

1092 self.file_paths = [filepath] 

1093 self.file_indices = [0] 

1094 self.rate = float(self.sf.samplerate) 

1095 self.channels = self.sf.channels 

1096 self.frames = 0 

1097 self.size = 0 

1098 if self.sf.seekable(): 

1099 self.frames = self.sf.seek(0, soundfile.SEEK_END) 

1100 self.sf.seek(0, soundfile.SEEK_SET) 

1101 # TODO: if not seekable, we cannot handle that file! 

1102 self.shape = (self.frames, self.channels) 

1103 self.size = self.frames * self.channels 

1104 self.format = self.sf.format 

1105 self.encoding = self.sf.subtype 

1106 self.bufferframes = int(buffersize*self.rate) 

1107 self.backframes = int(backsize*self.rate) 

1108 self.init_buffer() 

1109 self.close = self._close_soundfile 

1110 self.load_audio_buffer = self._load_buffer_soundfile 

1111 return self 

1112 

1113 def _close_soundfile(self): 

1114 """Close the audio file using the SoundFile module. """ 

1115 if self.sf is not None: 

1116 self.sf.close() 

1117 self.sf = None 

1118 

1119 def _load_buffer_soundfile(self, r_offset, r_size, buffer): 

1120 """Load new data from file using the SoundFile module. 

1121 

1122 Parameters 

1123 ---------- 

1124 r_offset: int 

1125 First frame to be read from file. 

1126 r_size: int 

1127 Number of frames to be read from file. 

1128 buffer: ndarray 

1129 Buffer where to store the loaded data. 

1130 """ 

1131 if self.sf is None: 

1132 self.sf = soundfile.SoundFile(self.filepath, 'r') 

1133 self.sf.seek(r_offset, soundfile.SEEK_SET) 

1134 buffer[:, :] = self.sf.read(r_size, always_2d=True) 

1135 

1136 

1137 # wavefile interface:  

1138 def open_wavefile(self, filepath, buffersize=10.0, backsize=0.0, 

1139 verbose=0): 

1140 """Open audio file for reading using the wavefile module. 

1141 

1142 Parameters 

1143 ---------- 

1144 filepath: str 

1145 Name of the file. 

1146 bufferframes: float 

1147 Size of internal buffer in seconds. 

1148 backsize: float 

1149 Part of the buffer to be loaded before the requested start index in seconds. 

1150 verbose: int 

1151 If larger than zero show detailed error/warning messages. 

1152 

1153 Raises 

1154 ------ 

1155 ImportError 

1156 The wavefile module is not installed 

1157 """ 

1158 self.verbose = verbose 

1159 if self.verbose > 0: 

1160 print(f'open_wavefile(filepath) with filepath={filepath}') 

1161 if not audio_modules['wavefile']: 

1162 self.rate = 0.0 

1163 self.channels = 0 

1164 self.frames = 0 

1165 self.shape = (0, 0) 

1166 self.size = 0 

1167 self.offset = 0 

1168 raise ImportError 

1169 if self.sf is not None: 

1170 self._close_wavefile() 

1171 self.sf = wavefile.WaveReader(filepath) 

1172 self.filepath = filepath 

1173 self.file_paths = [filepath] 

1174 self.file_indices = [0] 

1175 self.rate = float(self.sf.samplerate) 

1176 self.channels = self.sf.channels 

1177 self.frames = self.sf.frames 

1178 self.shape = (self.frames, self.channels) 

1179 self.size = self.frames * self.channels 

1180 # get format and encoding: 

1181 for attr in dir(wavefile.Format): 

1182 v = getattr(wavefile.Format, attr) 

1183 if isinstance(v, int): 

1184 if v & wavefile.Format.TYPEMASK > 0 and \ 

1185 (self.sf.format & wavefile.Format.TYPEMASK) == v: 

1186 self.format = attr 

1187 if v & wavefile.Format.SUBMASK > 0 and \ 

1188 (self.sf.format & wavefile.Format.SUBMASK) == v: 

1189 self.encoding = attr 

1190 # init buffer: 

1191 self.bufferframes = int(buffersize*self.rate) 

1192 self.backframes = int(backsize*self.rate) 

1193 self.init_buffer() 

1194 self.close = self._close_wavefile 

1195 self.load_audio_buffer = self._load_buffer_wavefile 

1196 return self 

1197 

1198 def _close_wavefile(self): 

1199 """Close the audio file using the wavefile module. """ 

1200 if self.sf is not None: 

1201 self.sf.close() 

1202 self.sf = None 

1203 

1204 def _load_buffer_wavefile(self, r_offset, r_size, buffer): 

1205 """Load new data from file using the wavefile module. 

1206 

1207 Parameters 

1208 ---------- 

1209 r_offset: int 

1210 First frame to be read from file. 

1211 r_size: int 

1212 Number of frames to be read from file. 

1213 buffer: ndarray 

1214 Buffer where to store the loaded data. 

1215 """ 

1216 if self.sf is None: 

1217 self.sf = wavefile.WaveReader(self.filepath) 

1218 self.sf.seek(r_offset, wavefile.Seek.SET) 

1219 fbuffer = self.sf.buffer(r_size, dtype=self.buffer.dtype) 

1220 self.sf.read(fbuffer) 

1221 buffer[:,:] = fbuffer.T 

1222 

1223 

1224 # audioread interface:  

1225 def open_audioread(self, filepath, buffersize=10.0, backsize=0.0, 

1226 verbose=0): 

1227 """Open audio file for reading using the audioread module. 

1228 

1229 Note, that audioread can only read forward, therefore random and 

1230 backward access is really slow. 

1231 

1232 Parameters 

1233 ---------- 

1234 filepath: str 

1235 Name of the file. 

1236 bufferframes: float 

1237 Size of internal buffer in seconds. 

1238 backsize: float 

1239 Part of the buffer to be loaded before the requested start index in seconds. 

1240 verbose: int 

1241 If larger than zero show detailed error/warning messages. 

1242 

1243 Raises 

1244 ------ 

1245 ImportError 

1246 The audioread module is not installed 

1247 """ 

1248 self.verbose = verbose 

1249 if self.verbose > 0: 

1250 print(f'open_audioread(filepath) with filepath={filepath}') 

1251 if not audio_modules['audioread']: 

1252 self.rate = 0.0 

1253 self.channels = 0 

1254 self.frames = 0 

1255 self.shape = (0, 0) 

1256 self.size = 0 

1257 self.offset = 0 

1258 raise ImportError 

1259 if self.sf is not None: 

1260 self._close_audioread() 

1261 self.sf = audioread.audio_open(filepath) 

1262 self.filepath = filepath 

1263 self.file_paths = [filepath] 

1264 self.file_indices = [0] 

1265 self.rate = float(self.sf.samplerate) 

1266 self.channels = self.sf.channels 

1267 self.frames = int(np.ceil(self.rate*self.sf.duration)) 

1268 self.shape = (self.frames, self.channels) 

1269 self.size = self.frames * self.channels 

1270 self.bufferframes = int(buffersize*self.rate) 

1271 self.backframes = int(backsize*self.rate) 

1272 self.init_buffer() 

1273 self.read_buffer = np.zeros((0,0)) 

1274 self.read_offset = 0 

1275 self.close = self._close_audioread 

1276 self.load_audio_buffer = self._load_buffer_audioread 

1277 self.filepath = filepath 

1278 self.sf_iter = self.sf.__iter__() 

1279 return self 

1280 

1281 def _close_audioread(self): 

1282 """Close the audio file using the audioread module. """ 

1283 if self.sf is not None: 

1284 self.sf.__exit__(None, None, None) 

1285 self.sf = None 

1286 

1287 def _load_buffer_audioread(self, r_offset, r_size, buffer): 

1288 """Load new data from file using the audioread module. 

1289 

1290 audioread can only iterate through a file once and in blocksizes that are 

1291 given by audioread. Therefore we keep yet another buffer: `self.read_buffer` 

1292 at file offset `self.read_offset` containing whatever audioread returned. 

1293 

1294 Parameters 

1295 ---------- 

1296 r_offset: int 

1297 First frame to be read from file. 

1298 r_size: int 

1299 Number of frames to be read from file. 

1300 buffer: ndarray 

1301 Buffer where to store the loaded data. 

1302 """ 

1303 if self.sf is None: 

1304 self.sf = audioread.audio_open(self.filepath) 

1305 b_offset = 0 

1306 if ( self.read_offset + self.read_buffer.shape[0] >= r_offset + r_size 

1307 and self.read_offset < r_offset + r_size ): 

1308 # read_buffer overlaps at the end of the requested interval: 

1309 i = 0 

1310 n = r_offset + r_size - self.read_offset 

1311 if n > r_size: 

1312 i += n - r_size 

1313 n = r_size 

1314 buffer[self.read_offset+i-r_offset:self.read_offset+i+n-r_offset,:] = self.read_buffer[i:i+n,:] / (2.0**15-1.0) 

1315 if self.verbose > 2: 

1316 print(f' recycle {n:6d} frames from the front of the read buffer at {self.read_offset}-{self.read_offset+n} ({self.read_offset-self.offset}-{self.read_offset-self.offset+n} in buffer)') 

1317 r_size -= n 

1318 if r_size <= 0: 

1319 return 

1320 # go back to beginning of file: 

1321 if r_offset < self.read_offset: 

1322 if self.verbose > 2: 

1323 print(' rewind') 

1324 self._close_audioread() 

1325 self.sf = audioread.audio_open(self.filepath) 

1326 self.sf_iter = self.sf.__iter__() 

1327 self.read_buffer = np.zeros((0,0)) 

1328 self.read_offset = 0 

1329 # read to position: 

1330 while self.read_offset + self.read_buffer.shape[0] < r_offset: 

1331 self.read_offset += self.read_buffer.shape[0] 

1332 try: 

1333 if hasattr(self.sf_iter, 'next'): 

1334 fbuffer = self.sf_iter.next() 

1335 else: 

1336 fbuffer = next(self.sf_iter) 

1337 except StopIteration: 

1338 self.read_buffer = np.zeros((0,0)) 

1339 buffer[:,:] = 0.0 

1340 if self.verbose > 1: 

1341 print(f' caught StopIteration, padded buffer with {r_size} zeros') 

1342 break 

1343 self.read_buffer = np.frombuffer(fbuffer, dtype='<i2').reshape(-1, self.channels) 

1344 if self.verbose > 2: 

1345 print(f' read forward by {self.read_buffer.shape[0]} frames') 

1346 # recycle file data: 

1347 if ( self.read_offset + self.read_buffer.shape[0] > r_offset 

1348 and self.read_offset <= r_offset ): 

1349 i = r_offset - self.read_offset 

1350 n = self.read_offset + self.read_buffer.shape[0] - r_offset 

1351 if n > r_size: 

1352 n = r_size 

1353 buffer[:n,:] = self.read_buffer[i:i+n,:] / (2.0**15-1.0) 

1354 if self.verbose > 2: 

1355 print(f' recycle {n:6d} frames from the end of the read buffer at {self.read_offset}-{self.read_offset + self.read_buffer.shape[0]} to {r_offset}-{r_offset+n} ({r_offset-self.offset}-{r_offset+n-self.offset} in buffer)') 

1356 b_offset += n 

1357 r_offset += n 

1358 r_size -= n 

1359 # read data: 

1360 if self.verbose > 2 and r_size > 0: 

1361 print(f' read {r_size:6d} frames at {r_offset}-{r_offset+r_size} ({r_offset-self.offset}-{r_offset+r_size-self.offset} in buffer)') 

1362 while r_size > 0: 

1363 self.read_offset += self.read_buffer.shape[0] 

1364 try: 

1365 if hasattr(self.sf_iter, 'next'): 

1366 fbuffer = self.sf_iter.next() 

1367 else: 

1368 fbuffer = next(self.sf_iter) 

1369 except StopIteration: 

1370 self.read_buffer = np.zeros((0,0)) 

1371 buffer[b_offset:,:] = 0.0 

1372 if self.verbose > 1: 

1373 print(f' caught StopIteration, padded buffer with {r_size} zeros') 

1374 break 

1375 self.read_buffer = np.frombuffer(fbuffer, dtype='<i2').reshape(-1, self.channels) 

1376 n = self.read_buffer.shape[0] 

1377 if n > r_size: 

1378 n = r_size 

1379 if n > 0: 

1380 buffer[b_offset:b_offset+n,:] = self.read_buffer[:n,:] / (2.0**15-1.0) 

1381 if self.verbose > 2: 

1382 print(f' read {n:6d} frames to {r_offset}-{r_offset+n} ({r_offset-self.offset}-{r_offset+n-self.offset} in buffer)') 

1383 b_offset += n 

1384 r_offset += n 

1385 r_size -= n 

1386 

1387 

1388 # open multiple audio files as one: 

1389 def open_multiple(self, filepaths, buffersize=10.0, backsize=0.0, 

1390 verbose=0, rate=None, channels=None, end_indices=None): 

1391 """Open multiple audio files as a single concatenated array. 

1392 

1393 Parameters 

1394 ---------- 

1395 filepaths: list of str 

1396 List of file names of audio files. 

1397 buffersize: float 

1398 Size of internal buffer in seconds. 

1399 backsize: float 

1400 Part of the buffer to be loaded before the requested start index in seconds. 

1401 verbose: int 

1402 If larger than zero show detailed error/warning messages. 

1403 rate: float 

1404 If provided, do a minimal initialization (no checking) 

1405 using the provided sampling rate (in Hertz), channels, 

1406 and end_indices. 

1407 channels: int 

1408 If provided, do a minimal initialization (no checking) 

1409 using the provided rate, number of channels, and end_indices. 

1410 end_indices: sequence of int 

1411 If provided, do a minimal initialization (no checking) 

1412 using the provided rate, channels, and end_indices. 

1413 

1414 Raises 

1415 ------ 

1416 TypeError 

1417 `filepaths` must be a sequence. 

1418 ValueError 

1419 Empty `filepaths`. 

1420 FileNotFoundError 

1421 `filepaths` does not contain a single valid file. 

1422 

1423 """ 

1424 if not isinstance(filepaths, (list, tuple, np.ndarray)): 

1425 raise TypeError('input argument filepaths is not a sequence!') 

1426 if len(filepaths) == 0: 

1427 raise ValueError('input argument filepaths is empy sequence!') 

1428 self.buffersize = buffersize 

1429 self.backsize = backsize 

1430 self.filepath = None 

1431 self.file_paths = [] 

1432 self.open_files = [] 

1433 self.open_loaders = [] 

1434 self.audio_files = [] 

1435 self.collect_counter = 0 

1436 self.frames = 0 

1437 self.start_indices = [] 

1438 self.end_indices = [] 

1439 self.start_time = None 

1440 start_time = None 

1441 self._metadata = {} 

1442 self._locs = np.zeros((0, 2), dtype=int) 

1443 self._labels = np.zeros((0, 2), dtype=object) 

1444 if end_indices is not None: 

1445 self.filepath = filepaths[0] 

1446 self.file_paths = filepaths 

1447 self.audio_files = [None] * len(filepaths) 

1448 self.frames = end_indices[-1] 

1449 self.start_indices = [0] + list(end_indices[:-1]) 

1450 self.end_indices = end_indices 

1451 self.format = None 

1452 self.encoding = None 

1453 self.rate = rate 

1454 self.channels = channels 

1455 else: 

1456 for filepath in filepaths: 

1457 try: 

1458 a = AudioLoader(filepath, buffersize, backsize, verbose) 

1459 except Exception as e: 

1460 if verbose > 0: 

1461 print(e) 

1462 continue 

1463 # collect metadata: 

1464 md = a.metadata() 

1465 fmd = flatten_metadata(md, True) 

1466 add_metadata(self._metadata, fmd) 

1467 if self.filepath is None: 

1468 # first file: 

1469 self.filepath = a.filepath 

1470 self.format = a.format 

1471 self.encoding = a.encoding 

1472 self.rate = a.rate 

1473 self.channels = a.channels 

1474 self.start_time = get_datetime(md) 

1475 start_time = self.start_time 

1476 stime = self.start_time 

1477 else: 

1478 # check channels and rate: 

1479 error_str = None 

1480 if a.channels != self.channels: 

1481 error_str = f'number of channels differs: ' \ 

1482 f'{a.channels} in {a.filepath} versus ' \ 

1483 f'{self.channels} in {self.filepath}' 

1484 if a.rate != self.rate: 

1485 error_str = f'sampling rates differ: ' \ 

1486 f'{a.rate} in {a.filepath} versus ' \ 

1487 f'{self.rate} in {self.filepath}' 

1488 # check start time of recording: 

1489 stime = get_datetime(md) 

1490 if start_time is None or stime is None or \ 

1491 abs(start_time - stime) > timedelta(seconds=self._max_time_diff): 

1492 error_str = f'start time does not indicate continuous recording: ' \ 

1493 f'expected {start_time} instead of ' \ 

1494 f'{stime} in {a.filepath}' 

1495 if error_str is not None: 

1496 if verbose > 0: 

1497 print(error_str) 

1498 a.close() 

1499 del a 

1500 break 

1501 # markers: 

1502 locs, labels = a.markers() 

1503 locs[:,0] += self.frames 

1504 self._locs = np.vstack((self._locs, locs)) 

1505 self._labels = np.vstack((self._labels, labels)) 

1506 # indices: 

1507 self.start_indices.append(self.frames) 

1508 self.frames += a.frames 

1509 self.end_indices.append(self.frames) 

1510 if stime is not None: 

1511 start_time = stime + timedelta(seconds=a.frames/a.rate) 

1512 # add file to lists: 

1513 self.file_paths.append(filepath) 

1514 if len(self.open_files) < AudioLoader.max_open_files: 

1515 self.open_files.append(a) 

1516 else: 

1517 a.close() 

1518 if len(self.open_loaders) < AudioLoader.max_open_loaders: 

1519 self.audio_files.append(a) 

1520 self.open_loaders.append(a) 

1521 else: 

1522 a.close() 

1523 del a 

1524 self.audio_files.append(None) 

1525 if len(self.audio_files) == 0: 

1526 raise FileNotFoundError('input argument filepaths does not contain any valid audio file!') 

1527 # set startime from first file: 

1528 if self.start_time is not None: 

1529 set_starttime(self._metadata, self.start_time) 

1530 # setup infrastructure: 

1531 self.file_indices = self.start_indices 

1532 self.start_indices = np.array(self.start_indices) 

1533 self.end_indices = np.array(self.end_indices) 

1534 self.shape = (self.frames, self.channels) 

1535 self.bufferframes = int(buffersize*self.rate) 

1536 self.backframes = int(backsize*self.rate) 

1537 self.init_buffer() 

1538 self.close = self._close_multiple 

1539 self.load_audio_buffer = self._load_buffer_multiple 

1540 self._load_metadata = None 

1541 self._load_markers = None 

1542 return self 

1543 

1544 def _close_multiple(self): 

1545 """Close all the audio files. """ 

1546 self.open_files = [] 

1547 self.open_loaders = [] 

1548 if hasattr(self, 'audio_files'): 

1549 for a in self.audio_files: 

1550 if a is not None: 

1551 a.close() 

1552 self.audio_files = [] 

1553 self.filepath = None 

1554 self.file_paths = [] 

1555 self.file_indices = [] 

1556 self.start_indices = [] 

1557 self.end_indices = [] 

1558 del self.audio_files 

1559 del self.open_files 

1560 del self.open_loaders 

1561 del self.start_indices 

1562 del self.end_indices 

1563 

1564 def _load_buffer_multiple(self, r_offset, r_size, buffer): 

1565 """Load new data from the underlying files. 

1566 

1567 Parameters 

1568 ---------- 

1569 r_offset: int 

1570 First frame to be read from file. 

1571 r_size: int 

1572 Number of frames to be read from file. 

1573 buffer: ndarray 

1574 Buffer where to store the loaded data. 

1575 """ 

1576 offs = r_offset 

1577 size = r_size 

1578 boffs = 0 

1579 ai = np.searchsorted(self.end_indices, offs, side='right') 

1580 while size > 0: 

1581 if self.audio_files[ai] is None: 

1582 a = AudioLoader(self.file_paths[ai], 

1583 self.buffersize, self.backsize, 0) 

1584 self.audio_files[ai] = a 

1585 self.open_loaders.append(a) 

1586 self.open_files.append(a) 

1587 if len(self.open_files) > AudioLoader.max_open_files: 

1588 a0 = self.open_files.pop(0) 

1589 a0.close() 

1590 if len(self.open_loaders) > AudioLoader.max_open_loaders: 

1591 a0 = self.open_loaders.pop(0) 

1592 self.audio_files[self.audio_files.index(a0)] = None 

1593 a0.close() 

1594 del a0 

1595 self.collect_counter += 1 

1596 if self.collect_counter > AudioLoader.max_open_loaders//2: 

1597 gc.collect() 

1598 self.collect_counter = 0 

1599 else: 

1600 self.open_loaders.pop(self.open_loaders.index(self.audio_files[ai])) 

1601 self.open_loaders.append(self.audio_files[ai]) 

1602 ai0 = offs - self.start_indices[ai] 

1603 ai1 = offs + size 

1604 if ai1 > self.end_indices[ai]: 

1605 ai1 = self.end_indices[ai] 

1606 ai1 -= self.start_indices[ai] 

1607 n = ai1 - ai0 

1608 self.audio_files[ai].load_audio_buffer(ai0, n, 

1609 buffer[boffs:boffs + n,:]) 

1610 if self.audio_files[ai] in self.open_files: 

1611 self.open_files.pop(self.open_files.index(self.audio_files[ai])) 

1612 self.open_files.append(self.audio_files[ai]) 

1613 if len(self.open_files) > AudioLoader.max_open_files: 

1614 self.open_files[0].close() 

1615 self.open_files.pop(0) 

1616 boffs += n 

1617 offs += n 

1618 size -= n 

1619 ai += 1 

1620 

1621 

1622 def open(self, filepath, buffersize=10.0, backsize=0.0, 

1623 verbose=0, **kwargs): 

1624 """Open audio file for reading. 

1625 

1626 Parameters 

1627 ---------- 

1628 filepath: str or list of str 

1629 Name of the file or list of many file names that should be 

1630 made accessible as a single array. 

1631 buffersize: float 

1632 Size of internal buffer in seconds. 

1633 backsize: float 

1634 Part of the buffer to be loaded before the requested start index in seconds. 

1635 verbose: int 

1636 If larger than zero show detailed error/warning messages. 

1637 **kwargs: dict 

1638 Further keyword arguments that are passed on to the  

1639 specific opening functions. Only used by open_multiple() so far. 

1640 

1641 Raises 

1642 ------ 

1643 ValueError 

1644 Empty `filepath`. 

1645 FileNotFoundError 

1646 `filepath` is not an existing file. 

1647 EOFError 

1648 File size of `filepath` is zero. 

1649 IOError 

1650 Failed to load data. 

1651 

1652 """ 

1653 self.buffer = np.array([]) 

1654 self.rate = 0.0 

1655 if not filepath: 

1656 raise ValueError('input argument filepath is empty string!') 

1657 if isinstance(filepath, (list, tuple, np.ndarray)): 

1658 if len(filepath) > 1: 

1659 self.open_multiple(filepath, buffersize, backsize, verbose) 

1660 if len(self.file_paths) > 1: 

1661 return self 

1662 filepath = self.file_paths[0] 

1663 self.close() 

1664 else: 

1665 filepath = filepath[0] 

1666 if not os.path.isfile(filepath): 

1667 raise FileNotFoundError(f'file "{filepath}" not found') 

1668 if os.path.getsize(filepath) <= 0: 

1669 raise EOFError(f'file "{filepath}" is empty (size=0)!') 

1670 # list of implemented open functions: 

1671 audio_open_funcs = ( 

1672 ('soundfile', self.open_soundfile), 

1673 ('wave', self.open_wave), 

1674 ('wavefile', self.open_wavefile), 

1675 ('ewave', self.open_ewave), 

1676 ('audioread', self.open_audioread), 

1677 ) 

1678 # open an audio file by trying various modules: 

1679 not_installed = [] 

1680 errors = [f'failed to load data from file "{filepath}":'] 

1681 for lib, open_file in audio_open_funcs: 

1682 if not audio_modules[lib]: 

1683 if verbose > 1: 

1684 print(f'unable to load data from file "{filepath}" using {lib} module: module not available') 

1685 not_installed.append(lib) 

1686 continue 

1687 try: 

1688 open_file(filepath, buffersize, backsize, verbose-1, **kwargs) 

1689 if self.frames > 0: 

1690 if verbose > 0: 

1691 print(f'opened audio file "{filepath}" using {lib}') 

1692 if verbose > 1: 

1693 if self.format is not None: 

1694 print(f' format : {self.format}') 

1695 if self.encoding is not None: 

1696 print(f' encoding : {self.encoding}') 

1697 print(f' sampling rate: {self.rate} Hz') 

1698 print(f' channels : {self.channels}') 

1699 print(f' frames : {self.frames}') 

1700 return self 

1701 except Exception as e: 

1702 errors.append(f' {lib} failed: {str(e)}') 

1703 if verbose > 1: 

1704 print(errors[-1]) 

1705 if len(not_installed) > 0: 

1706 errors.append('\n You may need to install one of the ' + \ 

1707 ', '.join(not_installed) + ' packages.') 

1708 raise IOError('\n'.join(errors)) 

1709 return self 

1710 

1711 

1712def demo(file_path, plot): 

1713 """Demo of the audioloader functions. 

1714 

1715 Parameters 

1716 ---------- 

1717 file_path: str 

1718 File path of an audio file. 

1719 plot: bool 

1720 If True also plot the loaded data. 

1721 """ 

1722 print('') 

1723 print("try load_audio:") 

1724 full_data, rate = load_audio(file_path, 1) 

1725 if plot: 

1726 plt.plot(np.arange(len(full_data))/rate, full_data[:,0]) 

1727 plt.show() 

1728 

1729 if audio_modules['soundfile'] and audio_modules['audioread']: 

1730 print('') 

1731 print("cross check:") 

1732 data1, rate1 = load_soundfile(file_path) 

1733 data2, rate2 = load_audioread(file_path) 

1734 n = min((len(data1), len(data2))) 

1735 print(f"rms difference is {np.std(data1[:n]-data2[:n])}") 

1736 if plot: 

1737 plt.plot(np.arange(len(data1))/rate1, data1[:,0]) 

1738 plt.plot(np.arange(len(data2))/rate2, data2[:,0]) 

1739 plt.show() 

1740 

1741 print('') 

1742 print("try AudioLoader:") 

1743 with AudioLoader(file_path, 4.0, 1.0, verbose=1) as data: 

1744 print(f'samplerate: {data.rate:0f}Hz') 

1745 print(f'channels: {data.channels} {data.shape[1]}') 

1746 print(f'frames: {len(data)} {data.shape[0]}') 

1747 nframes = int(1.5*data.rate) 

1748 # check access: 

1749 print('check random single frame access') 

1750 for inx in np.random.randint(0, len(data), 1000): 

1751 if np.any(np.abs(full_data[inx] - data[inx]) > 2.0**(-14)): 

1752 print('single random frame access failed', inx, full_data[inx], data[inx]) 

1753 print('check random frame slice access') 

1754 for inx in np.random.randint(0, len(data)-nframes, 1000): 

1755 if np.any(np.abs(full_data[inx:inx+nframes] - data[inx:inx+nframes]) > 2.0**(-14)): 

1756 print('random frame slice access failed', inx) 

1757 print('check frame slice access forward') 

1758 for inx in range(0, len(data)-nframes, 10): 

1759 if np.any(np.abs(full_data[inx:inx+nframes] - data[inx:inx+nframes]) > 2.0**(-14)): 

1760 print('frame slice access forward failed', inx) 

1761 print('check frame slice access backward') 

1762 for inx in range(len(data)-nframes, 0, -10): 

1763 if np.any(np.abs(full_data[inx:inx+nframes] - data[inx:inx+nframes]) > 2.0**(-14)): 

1764 print('frame slice access backward failed', inx) 

1765 # forward: 

1766 for i in range(0, len(data), nframes): 

1767 print(f'forward {i}-{i+nframes}') 

1768 x = data[i:i+nframes,0] 

1769 if plot: 

1770 plt.plot((i+np.arange(len(x)))/rate, x) 

1771 plt.show() 

1772 # and backwards: 

1773 for i in reversed(range(0, len(data), nframes)): 

1774 print(f'backward {i}-{i+nframes}') 

1775 x = data[i:i+nframes,0] 

1776 if plot: 

1777 plt.plot((i+np.arange(len(x)))/rate, x) 

1778 plt.show() 

1779 

1780 

1781def main(*args): 

1782 """Call demo with command line arguments. 

1783 

1784 Parameters 

1785 ---------- 

1786 args: list of str 

1787 Command line arguments as provided by sys.argv[1:] 

1788 """ 

1789 print("Checking audioloader module ...") 

1790 

1791 help = False 

1792 plot = False 

1793 file_path = None 

1794 mod = False 

1795 for arg in args: 

1796 if mod: 

1797 if not select_module(arg): 

1798 print(f'can not select module {arg} that is not installed') 

1799 return 

1800 mod = False 

1801 elif arg == '-h': 

1802 help = True 

1803 break 

1804 elif arg == '-p': 

1805 plot = True 

1806 elif arg == '-m': 

1807 mod = True 

1808 else: 

1809 file_path = arg 

1810 break 

1811 

1812 if help: 

1813 print('') 

1814 print('Usage:') 

1815 print(' python -m src.audioio.audioloader [-m <module>] [-p] <audio/file.wav>') 

1816 print(' -m: audio module to be used') 

1817 print(' -p: plot loaded data') 

1818 return 

1819 

1820 if plot: 

1821 import matplotlib.pyplot as plt 

1822 

1823 demo(file_path, plot) 

1824 

1825 

1826if __name__ == "__main__": 

1827 main(*sys.argv[1:])