Coverage for src / audioio / audioloader.py: 92%
826 statements
« prev ^ index » next coverage.py v7.13.1, created at 2026-01-17 21:34 +0000
« prev ^ index » next coverage.py v7.13.1, created at 2026-01-17 21:34 +0000
1"""Loading data, metadata, and markers from audio files.
3- `load_audio()`: load a whole audio file at once.
4- `metadata()`: read metadata of an audio file.
5- `markers()`: read markers of an audio file.
6- class `AudioLoader`: read data from audio files in chunks.
8The read in data are always numpy arrays of floats ranging between -1 and 1.
9The arrays are 2-D ndarrays with first axis time and second axis channel,
10even for single channel data.
12If an audio file cannot be loaded, you might need to install
13additional packages. See
14[installation](https://bendalab.github.io/audioio/installation) for
15further instructions.
17For a demo run the module as:
18```
19python -m src.audioio.audioloader audiofile.wav
20```
21"""
23import os
24import gc
25import sys
26import warnings
27import numpy as np
29from pathlib import Path
30from datetime import timedelta
32from .audiomodules import *
33from .bufferedarray import BufferedArray
34from .riffmetadata import metadata_riff, markers_riff
35from .audiometadata import update_gain, add_unwrap, get_datetime
36from .audiometadata import flatten_metadata, add_metadata, set_starttime
37from .audiotools import unwrap
40def load_wave(filepath):
41 """Load wav file using the wave module from pythons standard libray.
43 Documentation
44 -------------
45 https://docs.python.org/3.8/library/wave.html
47 Parameters
48 ----------
49 filepath: str or Path
50 The full path and name of the file to load.
52 Returns
53 -------
54 data: ndarray
55 All data traces as an 2-D ndarray, first dimension is time, second is channel
56 rate: float
57 The sampling rate of the data in Hertz.
59 Raises
60 ------
61 ImportError
62 The wave module is not installed
63 *
64 Loading of the data failed
65 """
66 if not audio_modules['wave']:
67 raise ImportError
69 wf = wave.open(os.fspath(filepath), 'r') # 'with' is not supported by wave
70 (nchannels, sampwidth, rate, nframes, comptype, compname) = wf.getparams()
71 buffer = wf.readframes(nframes)
72 factor = 2.0**(sampwidth*8-1)
73 if sampwidth == 1:
74 dtype = 'u1'
75 buffer = np.frombuffer(buffer, dtype=dtype).reshape(-1, nchannels)
76 data = buffer.astype('d')/factor - 1.0
77 else:
78 dtype = f'i{sampwidth}'
79 buffer = np.frombuffer(buffer, dtype=dtype).reshape(-1, nchannels)
80 data = buffer.astype('d')/factor
81 wf.close()
82 return data, float(rate)
85def load_ewave(filepath):
86 """Load wav file using ewave module.
88 Documentation
89 -------------
90 https://github.com/melizalab/py-ewave
92 Parameters
93 ----------
94 filepath: str or Path
95 The full path and name of the file to load.
97 Returns
98 -------
99 data: ndarray
100 All data traces as an 2-D ndarray, first dimension is time, second is channel.
101 rate: float
102 The sampling rate of the data in Hertz.
104 Raises
105 ------
106 ImportError
107 The ewave module is not installed
108 *
109 Loading of the data failed
110 """
111 if not audio_modules['ewave']:
112 raise ImportError
114 data = np.array([])
115 rate = 0.0
116 with ewave.open(os.fspath(filepath), 'r') as wf:
117 rate = wf.sampling_rate
118 buffer = wf.read()
119 data = ewave.rescale(buffer, 'float')
120 if len(data.shape) == 1:
121 data = np.reshape(data,(-1, 1))
122 return data, float(rate)
125def load_wavfile(filepath):
126 """Load wav file using scipy.io.wavfile.
128 Documentation
129 -------------
130 http://docs.scipy.org/doc/scipy/reference/io.html
131 Does not support blocked read.
133 Parameters
134 ----------
135 filepath: str or Path
136 The full path and name of the file to load.
138 Returns
139 -------
140 data: ndarray
141 All data traces as an 2-D ndarray, first dimension is time, second is channel.
142 rate: float
143 The sampling rate of the data in Hertz.
145 Raises
146 ------
147 ImportError
148 The scipy.io module is not installed
149 *
150 Loading of the data failed
151 """
152 if not audio_modules['scipy.io.wavfile']:
153 raise ImportError
155 warnings.filterwarnings("ignore")
156 rate, data = wavfile.read(filepath)
157 warnings.filterwarnings("always")
158 if data.dtype == np.uint8:
159 data = data / 128.0 - 1.0
160 elif np.issubdtype(data.dtype, np.signedinteger):
161 data = data / (2.0**(data.dtype.itemsize*8-1))
162 else:
163 data = data.astype(np.float64, copy=False)
164 if len(data.shape) == 1:
165 data = np.reshape(data,(-1, 1))
166 return data, float(rate)
169def load_soundfile(filepath):
170 """Load audio file using SoundFile (based on libsndfile).
172 Documentation
173 -------------
174 http://pysoundfile.readthedocs.org
175 http://www.mega-nerd.com/libsndfile
177 Parameters
178 ----------
179 filepath: str or Path
180 The full path and name of the file to load.
182 Returns
183 -------
184 data: ndarray
185 All data traces as an 2-D ndarray, first dimension is time, second is channel.
186 rate: float
187 The sampling rate of the data in Hertz.
189 Raises
190 ------
191 ImportError
192 The soundfile module is not installed.
193 *
194 Loading of the data failed.
195 """
196 if not audio_modules['soundfile']:
197 raise ImportError
199 data = np.array([])
200 rate = 0.0
201 with soundfile.SoundFile(filepath, 'r') as sf:
202 rate = sf.samplerate
203 data = sf.read(frames=-1, dtype='float64', always_2d=True)
204 return data, float(rate)
207def load_wavefile(filepath):
208 """Load audio file using wavefile (based on libsndfile).
210 Documentation
211 -------------
212 https://github.com/vokimon/python-wavefile
214 Parameters
215 ----------
216 filepath: str or Path
217 The full path and name of the file to load.
219 Returns
220 -------
221 data: ndarray
222 All data traces as an 2-D ndarray, first dimension is time, second is channel.
223 rate: float
224 The sampling rate of the data in Hertz.
226 Raises
227 ------
228 ImportError
229 The wavefile module is not installed.
230 *
231 Loading of the data failed.
232 """
233 if not audio_modules['wavefile']:
234 raise ImportError
236 rate, data = wavefile.load(os.fspath(filepath))
237 return data.astype(np.float64, copy=False).T, float(rate)
240def load_audioread(filepath):
241 """Load audio file using audioread.
243 Documentation
244 -------------
245 https://github.com/beetbox/audioread
247 Parameters
248 ----------
249 filepath: str or Path
250 The full path and name of the file to load.
252 Returns
253 -------
254 data: ndarray
255 All data traces as an 2-D ndarray, first dimension is time, second is channel.
256 rate: float
257 The sampling rate of the data in Hertz.
259 Raises
260 ------
261 ImportError
262 The audioread module is not installed.
263 *
264 Loading of the data failed.
265 """
266 if not audio_modules['audioread']:
267 raise ImportError
269 data = np.array([])
270 rate = 0.0
271 with audioread.audio_open(filepath) as af:
272 rate = af.samplerate
273 data = np.zeros((int(np.ceil(af.samplerate*af.duration)), af.channels),
274 dtype="<i2")
275 index = 0
276 for buffer in af:
277 fulldata = np.frombuffer(buffer, dtype='<i2').reshape(-1, af.channels)
278 n = fulldata.shape[0]
279 if index + n > len(data):
280 n = len(fulldata) - index
281 if n <= 0:
282 break
283 data[index:index+n,:] = fulldata[:n,:]
284 index += n
285 return data/(2.0**15-1.0), float(rate)
288audio_loader_funcs = (
289 ('soundfile', load_soundfile),
290 ('wave', load_wave),
291 ('wavefile', load_wavefile),
292 ('ewave', load_ewave),
293 ('scipy.io.wavfile', load_wavfile),
294 ('audioread', load_audioread),
295 )
296"""List of implemented load() functions.
298Each element of the list is a tuple with the module's name and its
299load() function.
301"""
304def load_audio(filepath, verbose=0):
305 """Call this function to load all channels of audio data from a file.
307 This function tries different python modules to load the audio file.
309 Parameters
310 ----------
311 filepath: str or Path
312 The full path and name of the file to load.
313 verbose: int
314 If larger than zero show detailed error/warning messages.
316 Returns
317 -------
318 data: ndarray
319 All data traces as an 2-D ndarray, even for single channel data.
320 First dimension is time, second is channel.
321 Data values range maximally between -1 and 1.
322 rate: float
323 The sampling rate of the data in Hertz.
325 Raises
326 ------
327 FileNotFoundError
328 `filepath` is not an existing file.
329 EOFError
330 File size of `filepath` is zero.
331 IOError
332 Failed to load data.
334 Examples
335 --------
336 ```
337 import matplotlib.pyplot as plt
338 from audioio import load_audio
340 data, rate = load_audio('some/audio.wav')
341 plt.plot(np.arange(len(data))/rate, data[:,0])
342 plt.show()
343 ```
344 """
345 # check values:
346 filepath = Path(filepath)
347 if not filepath.is_file:
348 raise FileNotFoundError(f'file "{filepath}" not found')
349 if filepath.stat().st_size <= 0:
350 raise EOFError(f'file "{filepath}" is empty (size=0)!')
352 # load an audio file by trying various modules:
353 not_installed = []
354 errors = [f'failed to load data from file "{filepath}":']
355 for lib, load_file in audio_loader_funcs:
356 if not audio_modules[lib]:
357 if verbose > 1:
358 print(f'unable to load data from file "{filepath}" using {lib} module: module not available')
359 not_installed.append(lib)
360 continue
361 try:
362 data, rate = load_file(filepath)
363 if len(data) > 0:
364 if verbose > 0:
365 print(f'loaded data from file "{filepath}" using {lib} module')
366 if verbose > 1:
367 print(f' sampling rate: {rate:g} Hz')
368 print(f' channels : {data.shape[1]}')
369 print(f' frames : {len(data)}')
370 return data, rate
371 except Exception as e:
372 errors.append(f' {lib} failed: {str(e)}')
373 if verbose > 1:
374 print(errors[-1])
375 if len(not_installed) > 0:
376 errors.append('\n You may need to install one of the ' + \
377 ', '.join(not_installed) + ' packages.')
378 raise IOError('\n'.join(errors))
379 return np.zeros(0), 0.0
382def metadata(filepath, store_empty=False):
383 """Read metadata of an audio file.
385 Parameters
386 ----------
387 filepath: str or file handle
388 The audio file from which to read metadata.
389 store_empty: bool
390 If `False` do not return meta data with empty values.
392 Returns
393 -------
394 meta_data: nested dict
395 Meta data contained in the audio file. Keys of the nested
396 dictionaries are always strings. If the corresponding values
397 are dictionaries, then the key is the section name of the
398 metadata contained in the dictionary. All other types of
399 values are values for the respective key. In particular they
400 are strings. But other types like for example ints or floats
401 are also allowed. See `audioio.audiometadata` module for
402 available functions to work with such metadata.
404 Raises
405 ------
406 ValueError
407 Not a RIFF file.
409 Examples
410 --------
411 ```
412 from audioio import metadata, print_metadata
413 md = metadata('data.wav')
414 print_metadata(md)
415 ```
417 """
418 try:
419 return metadata_riff(filepath, store_empty)
420 except ValueError: # not a RIFF file
421 return {}
424def markers(filepath):
425 """ Read markers of an audio file.
427 See `audioio.audiomarkers` module for available functions
428 to work with markers.
430 Parameters
431 ----------
432 filepath: str or file handle
433 The audio file.
435 Returns
436 -------
437 locs: 2-D ndarray of int
438 Marker positions (first column) and spans (second column)
439 for each marker (rows).
440 labels: 2-D ndarray of string objects
441 Labels (first column) and texts (second column)
442 for each marker (rows).
444 Raises
445 ------
446 ValueError
447 Not a RIFF file.
449 Examples
450 --------
451 ```
452 from audioio import markers, print_markers
453 locs, labels = markers('data.wav')
454 print_markers(locs, labels)
455 ```
456 """
457 try:
458 return markers_riff(filepath)
459 except ValueError: # not a RIFF file
460 return np.zeros((0, 2), dtype=int), np.zeros((0, 2), dtype=object)
463class AudioLoader(BufferedArray):
464 """Buffered reading of audio data for random access of the data in the file.
466 The class allows for reading very large audio files or many
467 sequential audio files that do not fit into memory.
468 An AudioLoader instance can be used like a huge read-only numpy array, i.e.
469 ```
470 data = AudioLoader('path/to/audio/file.wav')
471 x = data[10000:20000,0]
472 ```
473 The first index specifies the frame, the second one the channel.
475 Behind the scenes, `AudioLoader` tries to open the audio file with
476 all available audio modules until it succeeds (first line). It
477 then reads data from the file as necessary for the requested data
478 (second line). Accesing the content of the audio files via a
479 buffer that holds only a part of the data is managed by the
480 `BufferedArray` class.
482 Reading sequentially through the file is always possible. Some
483 modules, however, (e.g. audioread, needed for mp3 files) can only
484 read forward. If previous data are requested, then the file is read
485 from the beginning again. This slows down access to previous data
486 considerably. Use the `backsize` argument of the open function to
487 make sure some data are loaded into the buffer before the requested
488 frame. Then a subsequent access to the data within `backsize` seconds
489 before that frame can still be handled without the need to reread
490 the file from the beginning.
492 Usage
493 -----
494 With context management:
495 ```
496 import audioio as aio
497 with aio.AudioLoader(filepath, 60.0, 10.0) as data:
498 # do something with the content of the file:
499 x = data[0:10000]
500 y = data[10000:20000]
501 z = x + y
502 ```
504 For using a specific audio module, here the audioread module:
505 ```
506 data = aio.AudioLoader()
507 with data.open_audioread(filepath, 60.0, 10.0):
508 # do something ...
509 ```
511 Use `blocks()` for sequential, blockwise reading and processing:
512 ```
513 from scipy.signal import spectrogram
514 nfft = 2048
515 with aio.AudioLoader('some/audio.wav') as data:
516 for x in data.blocks(100*nfft, nfft//2):
517 f, t, Sxx = spectrogram(x, fs=data.rate,
518 nperseg=nfft, noverlap=nfft//2)
519 ```
521 For loop iterates over single frames (1-D arrays containing samples for each channel):
522 ```
523 with aio.AudioLoader('some/audio.wav') as data:
524 for x in data:
525 print(x)
526 ```
528 Traditional open and close:
529 ```
530 data = aio.AudioLoader(filepath, 60.0)
531 x = data[:,:] # read the whole file
532 data.close()
533 ```
535 this is the same as:
536 ```
537 data = aio.AudioLoader()
538 data.open(filepath, 60.0)
539 ...
540 ```
542 Classes inheriting AudioLoader just need to implement
543 ```
544 self.load_audio_buffer(offset, nsamples, pbuffer)
545 ```
546 This function needs to load the supplied `pbuffer` with
547 `nframes` frames of data starting at frame `offset`.
549 In the constructor or some kind of opening function, you need to
550 set some member variables, as described for `BufferedArray`.
552 For loading metadata and markers, implement the functions
553 ```
554 self._load_metadata(filepath, **kwargs)
555 self._load_markers(filepath)
556 ```
558 Parameters
559 ----------
560 filepath: str or Path or list of str of list of Path
561 Name of the file or list of many file names that should be
562 made accessible as a single array.
563 buffersize: float
564 Size of internal buffer in seconds.
565 backsize: float
566 Part of the buffer to be loaded before the requested start index in seconds.
567 verbose: int
568 If larger than zero show detailed error/warning messages.
569 store_empty: bool
570 If `False` do not return meta data with empty values.
571 meta_kwargs: dict
572 Keyword arguments that are passed on to the _load_metadata()
573 function. For audio data the only recognized key is
574 `store_empty` - see the metadata() function for more infos.
575 **kwargs: dict
576 Further keyword arguments that are passed on to the
577 specific open() functions.
579 Attributes
580 ----------
581 filepath: Path
582 Full path of the opened file. In case of many files, the first one.
583 file_paths: list of Path
584 List of pathes of the opened files that are made accessible
585 as a single array.
586 file_indices: list of int
587 For each file the index of its first sample.
588 rate: float
589 The sampling rate of the data in seconds.
590 channels: int
591 The number of channels.
592 frames: int
593 The number of frames in the file. Same as `len()`.
594 format: str or None
595 Format of the audio file.
596 encoding: str or None
597 Encoding/subtype of the audio file.
598 shape: tuple
599 Frames and channels of the data.
600 ndim: int
601 Number of dimensions: always 2 (frames and channels).
602 offset: int
603 Index of first frame in the current buffer.
604 buffer: ndarray of floats
605 The curently available data from the file.
606 ampl_min: float
607 Minimum amplitude the file format supports.
608 Always -1.0 for audio data.
609 ampl_max: float
610 Maximum amplitude the file format supports.
611 Always +1.0 for audio data.
613 Methods
614 -------
615 - `len()`: Number of frames.
616 - `file_start_times()`: time of first frame of each file in seconds.
617 - `get_file_index()`: file path and index of frame contained by this file.
618 - `open()`: Open an audio file by trying available audio modules.
619 - `open_*()`: Open an audio file with the respective audio module.
620 - `__getitem__`: Access data of the audio file.
621 - `update_buffer()`: Update the internal buffer for a range of frames.
622 - `blocks()`: Generator for blockwise processing of AudioLoader data.
623 - `file_start_times()`: Time of first frame of each file in seconds.
624 - `get_file_index()`: File path and index of frame contained by this file.
625 - `basename()`: Base name of the audio data.
626 - `format_dict()`: technical infos about how the data are stored.
627 - `metadata()`: Metadata stored along with the audio data.
628 - `markers()`: Markers stored along with the audio data.
629 - `set_unwrap()`: Set parameters for unwrapping clipped data.
630 - `set_time_delta()`: Set maximum allowed time difference between successive files.
631 - `close()`: Close the file.
633 """
635 max_open_files = 5
636 """ Suggestion for maximum number of open file descriptors. """
638 max_open_loaders = 10
639 """ Suggestion for maximum number of AudioLoaders when opening multiple files. """
641 def __init__(self, filepath=None, buffersize=10.0, backsize=0.0,
642 verbose=0, meta_kwargs={}, **kwargs):
643 super().__init__(verbose=verbose)
644 self.format = None
645 self.encoding = None
646 self._metadata = None
647 self._locs = None
648 self._labels = None
649 self._load_metadata = metadata
650 self._load_markers = markers
651 self._metadata_kwargs = meta_kwargs
652 self.filepath = None
653 self.file_paths = None
654 self.file_indices = []
655 self._max_time_diff = 1
656 self.sf = None
657 self.close = self._close
658 self.load_buffer = self._load_buffer_unwrap
659 self.ampl_min = -1.0
660 self.ampl_max = +1.0
661 self.unwrap = False
662 self.unwrap_thresh = 0.0
663 self.unwrap_clips = False
664 self.unwrap_ampl = 1.0
665 self.unwrap_downscale = True
666 if filepath is not None:
667 self.open(filepath, buffersize, backsize, verbose, **kwargs)
669 numpy_encodings = {np.dtype(np.int64): 'PCM_64',
670 np.dtype(np.int32): 'PCM_32',
671 np.dtype(np.int16): 'PCM_16',
672 np.dtype(np.single): 'FLOAT',
673 np.dtype(np.double): 'DOUBLE',
674 np.dtype('>f4'): 'FLOAT',
675 np.dtype('>f8'): 'DOUBLE'}
676 """ Map numpy dtypes to encodings.
677 """
679 def _close(self):
680 pass
682 def __del__(self):
683 self.close()
685 def file_start_times(self):
686 """ Time of first frame of each file in seconds.
688 Returns
689 -------
690 times: array of float
691 Time of the first frame of each file relative to buffer start
692 in seconds.
693 """
694 times = []
695 for idx in self.file_indices:
696 times.append(idx/self.rate)
697 return np.array(times)
699 def get_file_index(self, frame):
700 """ File path and index of frame contained by this file.
702 Parameters
703 ----------
704 frame: int
705 Index of frame.
707 Returns
708 -------
709 filepath: Path
710 Path of file that contains the frame.
711 index: int
712 Index of the frame relative to the first frame
713 in the containing file.
715 Raises
716 ------
717 ValueError
718 Invalid frame index.
719 """
720 if frame < 0 or frame >= self.frames:
721 raise ValueError('invalid frame')
722 fname = self.file_paths[0]
723 index = self.file_indices[0]
724 for i in reversed(range(len(self.file_indices))):
725 if self.file_indices[i] <= frame:
726 fname = self.file_paths[i]
727 index = self.file_indices[i]
728 break
729 return fname, frame - index
731 def basename(self, path=None):
732 """ Base name of the audio data.
734 Parameters
735 ----------
736 path: str or Path or None
737 Path of the audio file from which a base name is generated.
738 If `None`, use `self.filepath`.
740 Returns
741 -------
742 s: str
743 The name. Defaults to the stem of `path`.
745 """
746 if path is None:
747 path = self.filepath
748 return Path(path).stem
750 def format_dict(self):
751 """ Technical infos about how the data are stored in the file.
753 Returns
754 -------
755 fmt: dict
756 Dictionary with filepath, format, encoding, samplingrate,
757 channels, frames, and duration of the audio file as strings.
759 """
760 fmt = dict(name=self.basename(), filepath=os.fsdecode(self.filepath))
761 if self.format is not None:
762 fmt['format'] = self.format
763 if self.encoding is not None:
764 fmt['encoding'] = self.encoding
765 fmt.update(dict(samplingrate=f'{self.rate:.0f}Hz',
766 channels=self.channels,
767 frames=self.frames,
768 duration=f'{self.frames/self.rate:.3f}s'))
769 return fmt
771 def metadata(self):
772 """Metadata of the audio file.
774 Parameters
775 ----------
776 store_empty: bool
777 If `False` do not add meta data with empty values.
779 Returns
780 -------
781 meta_data: nested dict
783 Meta data contained in the audio file. Keys of the nested
784 dictionaries are always strings. If the corresponding
785 values are dictionaries, then the key is the section name
786 of the metadata contained in the dictionary. All other
787 types of values are values for the respective key. In
788 particular they are strings. But other types like for
789 example ints or floats are also allowed. See
790 `audioio.audiometadata` module for available functions to
791 work with such metadata.
793 """
794 if self._metadata is None:
795 if self._load_metadata is None:
796 self._metadata = {}
797 else:
798 self._metadata = self._load_metadata(self.filepath,
799 **self._metadata_kwargs)
800 return self._metadata
802 def markers(self):
803 """Read markers of the audio file.
805 See `audioio.audiomarkers` module for available functions
806 to work with markers.
808 Returns
809 -------
810 locs: 2-D ndarray of int
811 Marker positions (first column) and spans (second column)
812 for each marker (rows).
813 labels: 2-D ndarray of str objects
814 Labels (first column) and texts (second column)
815 for each marker (rows).
816 """
817 if self._locs is None:
818 if self._load_markers is None:
819 self._locs = np.zeros((0, 2), dtype=int)
820 self._labels = np.zeros((0, 2), dtype=object)
821 else:
822 self._locs, self._labels = self._load_markers(self.filepath)
823 return self._locs, self._labels
825 def set_unwrap(self, thresh, clips=False, down_scale=True, unit=''):
826 """Set parameters for unwrapping clipped data.
828 See unwrap() function from the audioio package.
830 Parameters
831 ----------
832 thresh: float
833 Threshold for detecting wrapped data relative to self.unwrap_ampl
834 which is initially set to self.ampl_max.
835 If zero, do not unwrap.
836 clips: bool
837 If True, then clip the unwrapped data properly.
838 Otherwise, unwrap the data and double the
839 minimum and maximum data range
840 (self.ampl_min and self.ampl_max).
841 down_scale: bool
842 If not `clips`, then downscale the signal by a factor of two,
843 in order to keep the range between -1 and 1.
844 unit: str
845 Unit of the data.
846 """
847 self.unwrap_ampl = self.ampl_max
848 self.unwrap_thresh = thresh
849 self.unwrap_clips = clips
850 self.unwrap_down_scale = down_scale
851 self.unwrap = thresh > 1e-3
852 if self.unwrap:
853 if self.unwrap_clips:
854 add_unwrap(self.metadata(),
855 self.unwrap_thresh*self.unwrap_ampl,
856 self.unwrap_ampl, unit)
857 elif down_scale:
858 update_gain(self.metadata(), 0.5)
859 add_unwrap(self.metadata(),
860 0.5*self.unwrap_thresh*self.unwrap_ampl,
861 0.0, unit)
862 else:
863 self.ampl_min *= 2
864 self.ampl_max *= 2
865 add_unwrap(self.metadata(),
866 self.unwrap_thresh*self.unwrap_ampl,
867 0.0, unit)
869 def _load_buffer_unwrap(self, r_offset, r_size, pbuffer):
870 """Load new data and unwrap it.
872 Parameters
873 ----------
874 r_offset: int
875 First frame to be read from file.
876 r_size: int
877 Number of frames to be read from file.
878 pbuffer: ndarray
879 Buffer where to store the loaded data.
880 """
881 self.load_audio_buffer(r_offset, r_size, pbuffer)
882 if self.unwrap:
883 # TODO: handle edge effects!
884 unwrap(pbuffer, self.unwrap_thresh, self.unwrap_ampl)
885 if self.unwrap_clips:
886 pbuffer[pbuffer > self.ampl_max] = self.ampl_max
887 pbuffer[pbuffer < self.ampl_min] = self.ampl_min
888 elif self.unwrap_down_scale:
889 pbuffer *= 0.5
891 def set_time_delta(time_delta):
892 """ Set maximum allowed time difference between successive files.
894 Parameters
895 ----------
896 time_delta: int
897 Maximum number of seconds the start time of a recording file is allowed
898 to differ from the end of the previous file.
899 Default is one second.
900 """
901 self._max_time_diff = time_delta
903 # wave interface:
904 def open_wave(self, filepath, buffersize=10.0, backsize=0.0,
905 verbose=0):
906 """Open audio file for reading using the wave module.
908 Note: we assume that setpos() and tell() use integer numbers!
910 Parameters
911 ----------
912 filepath: str or Path
913 Name of the file.
914 buffersize: float
915 Size of internal buffer in seconds.
916 backsize: float
917 Part of the buffer to be loaded before the requested start index in seconds.
918 verbose: int
919 If larger than zero show detailed error/warning messages.
921 Raises
922 ------
923 ImportError
924 The wave module is not installed
925 """
926 self.verbose = verbose
927 if self.verbose > 0:
928 print(f'open_wave(filepath) with filepath={filepath}')
929 if not audio_modules['wave']:
930 self.rate = 0.0
931 self.channels = 0
932 self.frames = 0
933 self.size = 0
934 self.shape = (0, 0)
935 self.offset = 0
936 raise ImportError
937 if self.sf is not None:
938 self._close_wave()
939 self.sf = wave.open(os.fspath(filepath), 'r')
940 self.filepath = Path(filepath)
941 self.file_paths = [self.filepath]
942 self.file_indices = [0]
943 self.rate = float(self.sf.getframerate())
944 self.format = 'WAV'
945 sampwidth = self.sf.getsampwidth()
946 if sampwidth == 1:
947 self.dtype = 'u1'
948 self.encoding = 'PCM_U8'
949 else:
950 self.dtype = f'i{sampwidth}'
951 self.encoding = f'PCM_{sampwidth*8}'
952 self.factor = 1.0/(2.0**(sampwidth*8-1))
953 self.channels = self.sf.getnchannels()
954 self.frames = self.sf.getnframes()
955 self.shape = (self.frames, self.channels)
956 self.size = self.frames * self.channels
957 self.bufferframes = int(buffersize*self.rate)
958 self.backframes = int(backsize*self.rate)
959 self.init_buffer()
960 self.close = self._close_wave
961 self.load_audio_buffer = self._load_buffer_wave
962 # read 1 frame to determine the unit of the position values:
963 self.p0 = self.sf.tell()
964 self.sf.readframes(1)
965 self.pfac = self.sf.tell() - self.p0
966 self.sf.setpos(self.p0)
967 return self
969 def _close_wave(self):
970 """Close the audio file using the wave module. """
971 if self.sf is not None:
972 self.sf.close()
973 self.sf = None
975 def _load_buffer_wave(self, r_offset, r_size, buffer):
976 """Load new data from file using the wave module.
978 Parameters
979 ----------
980 r_offset: int
981 First frame to be read from file.
982 r_size: int
983 Number of frames to be read from file.
984 buffer: ndarray
985 Buffer where to store the loaded data.
986 """
987 if self.sf is None:
988 self.sf = wave.open(os.fspath(self.filepath), 'r')
989 self.sf.setpos(r_offset*self.pfac + self.p0)
990 fbuffer = self.sf.readframes(r_size)
991 fbuffer = np.frombuffer(fbuffer, dtype=self.dtype).reshape((-1, self.channels))
992 if self.dtype[0] == 'u':
993 buffer[:, :] = fbuffer * self.factor - 1.0
994 else:
995 buffer[:, :] = fbuffer * self.factor
998 # ewave interface:
999 def open_ewave(self, filepath, buffersize=10.0, backsize=0.0,
1000 verbose=0):
1001 """Open audio file for reading using the ewave module.
1003 Parameters
1004 ----------
1005 filepath: str or Path
1006 Name of the file.
1007 buffersize: float
1008 Size of internal buffer in seconds.
1009 backsize: float
1010 Part of the buffer to be loaded before the requested start index in seconds.
1011 verbose: int
1012 If larger than zero show detailed error/warning messages.
1014 Raises
1015 ------
1016 ImportError
1017 The ewave module is not installed.
1018 """
1019 self.verbose = verbose
1020 if self.verbose > 0:
1021 print(f'open_ewave(filepath) with filepath={filepath}')
1022 if not audio_modules['ewave']:
1023 self.rate = 0.0
1024 self.channels = 0
1025 self.frames = 0
1026 self.shape = (0, 0)
1027 self.size = 0
1028 self.offset = 0
1029 raise ImportError
1030 if self.sf is not None:
1031 self._close_ewave()
1032 self.sf = ewave.open(os.fspath(filepath), 'r')
1033 self.filepath = Path(filepath)
1034 self.file_paths = [self.filepath]
1035 self.file_indices = [0]
1036 self.rate = float(self.sf.sampling_rate)
1037 self.channels = self.sf.nchannels
1038 self.frames = self.sf.nframes
1039 self.shape = (self.frames, self.channels)
1040 self.size = self.frames * self.channels
1041 self.format = 'WAV' # or WAVEX?
1042 self.encoding = self.numpy_encodings[self.sf.dtype]
1043 self.bufferframes = int(buffersize*self.rate)
1044 self.backframes = int(backsize*self.rate)
1045 self.init_buffer()
1046 self.close = self._close_ewave
1047 self.load_audio_buffer = self._load_buffer_ewave
1048 return self
1050 def _close_ewave(self):
1051 """Close the audio file using the ewave module. """
1052 if self.sf is not None:
1053 del self.sf
1054 self.sf = None
1056 def _load_buffer_ewave(self, r_offset, r_size, buffer):
1057 """Load new data from file using the ewave module.
1059 Parameters
1060 ----------
1061 r_offset: int
1062 First frame to be read from file.
1063 r_size: int
1064 Number of frames to be read from file.
1065 buffer: ndarray
1066 Buffer where to store the loaded data.
1067 """
1068 if self.sf is None:
1069 self.sf = ewave.open(os.fspath(self.filepath), 'r')
1070 fbuffer = self.sf.read(frames=r_size, offset=r_offset, memmap='r')
1071 fbuffer = ewave.rescale(fbuffer, 'float')
1072 if len(fbuffer.shape) == 1:
1073 fbuffer = np.reshape(fbuffer,(-1, 1))
1074 buffer[:,:] = fbuffer
1077 # soundfile interface:
1078 def open_soundfile(self, filepath, buffersize=10.0, backsize=0.0,
1079 verbose=0):
1080 """Open audio file for reading using the SoundFile module.
1082 Parameters
1083 ----------
1084 filepath: str or Path
1085 Name of the file.
1086 bufferframes: float
1087 Size of internal buffer in seconds.
1088 backsize: float
1089 Part of the buffer to be loaded before the requested start index in seconds.
1090 verbose: int
1091 If larger than zero show detailed error/warning messages.
1093 Raises
1094 ------
1095 ImportError
1096 The SoundFile module is not installed
1097 """
1098 self.verbose = verbose
1099 if self.verbose > 0:
1100 print(f'open_soundfile(filepath) with filepath={filepath}')
1101 if not audio_modules['soundfile']:
1102 self.rate = 0.0
1103 self.channels = 0
1104 self.frames = 0
1105 self.shape = (0, 0)
1106 self.size = 0
1107 self.offset = 0
1108 raise ImportError
1109 if self.sf is not None:
1110 self._close_soundfile()
1111 self.sf = soundfile.SoundFile(filepath, 'r')
1112 self.filepath = Path(filepath)
1113 self.file_paths = [self.filepath]
1114 self.file_indices = [0]
1115 self.rate = float(self.sf.samplerate)
1116 self.channels = self.sf.channels
1117 self.frames = 0
1118 self.size = 0
1119 if self.sf.seekable():
1120 self.frames = self.sf.seek(0, soundfile.SEEK_END)
1121 self.sf.seek(0, soundfile.SEEK_SET)
1122 # TODO: if not seekable, we cannot handle that file!
1123 self.shape = (self.frames, self.channels)
1124 self.size = self.frames * self.channels
1125 self.format = self.sf.format
1126 self.encoding = self.sf.subtype
1127 self.bufferframes = int(buffersize*self.rate)
1128 self.backframes = int(backsize*self.rate)
1129 self.init_buffer()
1130 self.close = self._close_soundfile
1131 self.load_audio_buffer = self._load_buffer_soundfile
1132 return self
1134 def _close_soundfile(self):
1135 """Close the audio file using the SoundFile module. """
1136 if self.sf is not None:
1137 self.sf.close()
1138 self.sf = None
1140 def _load_buffer_soundfile(self, r_offset, r_size, buffer):
1141 """Load new data from file using the SoundFile module.
1143 Parameters
1144 ----------
1145 r_offset: int
1146 First frame to be read from file.
1147 r_size: int
1148 Number of frames to be read from file.
1149 buffer: ndarray
1150 Buffer where to store the loaded data.
1151 """
1152 if self.sf is None:
1153 self.sf = soundfile.SoundFile(self.filepath, 'r')
1154 self.sf.seek(r_offset, soundfile.SEEK_SET)
1155 buffer[:, :] = self.sf.read(r_size, always_2d=True)
1158 # wavefile interface:
1159 def open_wavefile(self, filepath, buffersize=10.0, backsize=0.0,
1160 verbose=0):
1161 """Open audio file for reading using the wavefile module.
1163 Parameters
1164 ----------
1165 filepath: str or Path
1166 Name of the file.
1167 bufferframes: float
1168 Size of internal buffer in seconds.
1169 backsize: float
1170 Part of the buffer to be loaded before the requested start index in seconds.
1171 verbose: int
1172 If larger than zero show detailed error/warning messages.
1174 Raises
1175 ------
1176 ImportError
1177 The wavefile module is not installed
1178 """
1179 self.verbose = verbose
1180 if self.verbose > 0:
1181 print(f'open_wavefile(filepath) with filepath={filepath}')
1182 if not audio_modules['wavefile']:
1183 self.rate = 0.0
1184 self.channels = 0
1185 self.frames = 0
1186 self.shape = (0, 0)
1187 self.size = 0
1188 self.offset = 0
1189 raise ImportError
1190 if self.sf is not None:
1191 self._close_wavefile()
1192 self.sf = wavefile.WaveReader(os.fspath(filepath))
1193 self.filepath = Path(filepath)
1194 self.file_paths = [self.filepath]
1195 self.file_indices = [0]
1196 self.rate = float(self.sf.samplerate)
1197 self.channels = self.sf.channels
1198 self.frames = self.sf.frames
1199 self.shape = (self.frames, self.channels)
1200 self.size = self.frames * self.channels
1201 # get format and encoding:
1202 for attr in dir(wavefile.Format):
1203 v = getattr(wavefile.Format, attr)
1204 if isinstance(v, int):
1205 if v & wavefile.Format.TYPEMASK > 0 and \
1206 (self.sf.format & wavefile.Format.TYPEMASK) == v:
1207 self.format = attr
1208 if v & wavefile.Format.SUBMASK > 0 and \
1209 (self.sf.format & wavefile.Format.SUBMASK) == v:
1210 self.encoding = attr
1211 # init buffer:
1212 self.bufferframes = int(buffersize*self.rate)
1213 self.backframes = int(backsize*self.rate)
1214 self.init_buffer()
1215 self.close = self._close_wavefile
1216 self.load_audio_buffer = self._load_buffer_wavefile
1217 return self
1219 def _close_wavefile(self):
1220 """Close the audio file using the wavefile module. """
1221 if self.sf is not None:
1222 self.sf.close()
1223 self.sf = None
1225 def _load_buffer_wavefile(self, r_offset, r_size, buffer):
1226 """Load new data from file using the wavefile module.
1228 Parameters
1229 ----------
1230 r_offset: int
1231 First frame to be read from file.
1232 r_size: int
1233 Number of frames to be read from file.
1234 buffer: ndarray
1235 Buffer where to store the loaded data.
1236 """
1237 if self.sf is None:
1238 self.sf = wavefile.WaveReader(os.fspath(self.filepath))
1239 self.sf.seek(r_offset, wavefile.Seek.SET)
1240 fbuffer = self.sf.buffer(r_size, dtype=self.buffer.dtype)
1241 self.sf.read(fbuffer)
1242 buffer[:,:] = fbuffer.T
1245 # audioread interface:
1246 def open_audioread(self, filepath, buffersize=10.0, backsize=0.0,
1247 verbose=0):
1248 """Open audio file for reading using the audioread module.
1250 Note, that audioread can only read forward, therefore random and
1251 backward access is really slow.
1253 Parameters
1254 ----------
1255 filepath: str or Path
1256 Name of the file.
1257 bufferframes: float
1258 Size of internal buffer in seconds.
1259 backsize: float
1260 Part of the buffer to be loaded before the requested start index in seconds.
1261 verbose: int
1262 If larger than zero show detailed error/warning messages.
1264 Raises
1265 ------
1266 ImportError
1267 The audioread module is not installed
1268 """
1269 self.verbose = verbose
1270 if self.verbose > 0:
1271 print(f'open_audioread(filepath) with filepath={filepath}')
1272 if not audio_modules['audioread']:
1273 self.rate = 0.0
1274 self.channels = 0
1275 self.frames = 0
1276 self.shape = (0, 0)
1277 self.size = 0
1278 self.offset = 0
1279 raise ImportError
1280 if self.sf is not None:
1281 self._close_audioread()
1282 self.sf = audioread.audio_open(filepath)
1283 self.filepath = Path(filepath)
1284 self.file_paths = [self.filepath]
1285 self.file_indices = [0]
1286 self.rate = float(self.sf.samplerate)
1287 self.channels = self.sf.channels
1288 self.frames = int(np.ceil(self.rate*self.sf.duration))
1289 self.shape = (self.frames, self.channels)
1290 self.size = self.frames * self.channels
1291 self.bufferframes = int(buffersize*self.rate)
1292 self.backframes = int(backsize*self.rate)
1293 self.init_buffer()
1294 self.read_buffer = np.zeros((0,0))
1295 self.read_offset = 0
1296 self.close = self._close_audioread
1297 self.load_audio_buffer = self._load_buffer_audioread
1298 self.sf_iter = self.sf.__iter__()
1299 return self
1301 def _close_audioread(self):
1302 """Close the audio file using the audioread module. """
1303 if self.sf is not None:
1304 self.sf.__exit__(None, None, None)
1305 self.sf = None
1307 def _load_buffer_audioread(self, r_offset, r_size, buffer):
1308 """Load new data from file using the audioread module.
1310 audioread can only iterate through a file once and in blocksizes that are
1311 given by audioread. Therefore we keep yet another buffer: `self.read_buffer`
1312 at file offset `self.read_offset` containing whatever audioread returned.
1314 Parameters
1315 ----------
1316 r_offset: int
1317 First frame to be read from file.
1318 r_size: int
1319 Number of frames to be read from file.
1320 buffer: ndarray
1321 Buffer where to store the loaded data.
1322 """
1323 if self.sf is None:
1324 self.sf = audioread.audio_open(self.filepath)
1325 b_offset = 0
1326 if ( self.read_offset + self.read_buffer.shape[0] >= r_offset + r_size
1327 and self.read_offset < r_offset + r_size ):
1328 # read_buffer overlaps at the end of the requested interval:
1329 i = 0
1330 n = r_offset + r_size - self.read_offset
1331 if n > r_size:
1332 i += n - r_size
1333 n = r_size
1334 buffer[self.read_offset+i-r_offset:self.read_offset+i+n-r_offset,:] = self.read_buffer[i:i+n,:] / (2.0**15-1.0)
1335 if self.verbose > 2:
1336 print(f' recycle {n:6d} frames from the front of the read buffer at {self.read_offset}-{self.read_offset+n} ({self.read_offset-self.offset}-{self.read_offset-self.offset+n} in buffer)')
1337 r_size -= n
1338 if r_size <= 0:
1339 return
1340 # go back to beginning of file:
1341 if r_offset < self.read_offset:
1342 if self.verbose > 2:
1343 print(' rewind')
1344 self._close_audioread()
1345 self.sf = audioread.audio_open(self.filepath)
1346 self.sf_iter = self.sf.__iter__()
1347 self.read_buffer = np.zeros((0,0))
1348 self.read_offset = 0
1349 # read to position:
1350 while self.read_offset + self.read_buffer.shape[0] < r_offset:
1351 self.read_offset += self.read_buffer.shape[0]
1352 try:
1353 if hasattr(self.sf_iter, 'next'):
1354 fbuffer = self.sf_iter.next()
1355 else:
1356 fbuffer = next(self.sf_iter)
1357 except StopIteration:
1358 self.read_buffer = np.zeros((0,0))
1359 buffer[:,:] = 0.0
1360 if self.verbose > 1:
1361 print(f' caught StopIteration, padded buffer with {r_size} zeros')
1362 break
1363 self.read_buffer = np.frombuffer(fbuffer, dtype='<i2').reshape(-1, self.channels)
1364 if self.verbose > 2:
1365 print(f' read forward by {self.read_buffer.shape[0]} frames')
1366 # recycle file data:
1367 if ( self.read_offset + self.read_buffer.shape[0] > r_offset
1368 and self.read_offset <= r_offset ):
1369 i = r_offset - self.read_offset
1370 n = self.read_offset + self.read_buffer.shape[0] - r_offset
1371 if n > r_size:
1372 n = r_size
1373 buffer[:n,:] = self.read_buffer[i:i+n,:] / (2.0**15-1.0)
1374 if self.verbose > 2:
1375 print(f' recycle {n:6d} frames from the end of the read buffer at {self.read_offset}-{self.read_offset + self.read_buffer.shape[0]} to {r_offset}-{r_offset+n} ({r_offset-self.offset}-{r_offset+n-self.offset} in buffer)')
1376 b_offset += n
1377 r_offset += n
1378 r_size -= n
1379 # read data:
1380 if self.verbose > 2 and r_size > 0:
1381 print(f' read {r_size:6d} frames at {r_offset}-{r_offset+r_size} ({r_offset-self.offset}-{r_offset+r_size-self.offset} in buffer)')
1382 while r_size > 0:
1383 self.read_offset += self.read_buffer.shape[0]
1384 try:
1385 if hasattr(self.sf_iter, 'next'):
1386 fbuffer = self.sf_iter.next()
1387 else:
1388 fbuffer = next(self.sf_iter)
1389 except StopIteration:
1390 self.read_buffer = np.zeros((0,0))
1391 buffer[b_offset:,:] = 0.0
1392 if self.verbose > 1:
1393 print(f' caught StopIteration, padded buffer with {r_size} zeros')
1394 break
1395 self.read_buffer = np.frombuffer(fbuffer, dtype='<i2').reshape(-1, self.channels)
1396 n = self.read_buffer.shape[0]
1397 if n > r_size:
1398 n = r_size
1399 if n > 0:
1400 buffer[b_offset:b_offset+n,:] = self.read_buffer[:n,:] / (2.0**15-1.0)
1401 if self.verbose > 2:
1402 print(f' read {n:6d} frames to {r_offset}-{r_offset+n} ({r_offset-self.offset}-{r_offset+n-self.offset} in buffer)')
1403 b_offset += n
1404 r_offset += n
1405 r_size -= n
1408 # open multiple audio files as one:
1409 def open_multiple(self, filepaths, buffersize=10.0, backsize=0.0,
1410 verbose=0, rate=None, channels=None, end_indices=None):
1411 """Open multiple audio files as a single concatenated array.
1413 Parameters
1414 ----------
1415 filepaths: list of str or Path
1416 List of file paths of audio files.
1417 buffersize: float
1418 Size of internal buffer in seconds.
1419 backsize: float
1420 Part of the buffer to be loaded before the requested start index in seconds.
1421 verbose: int
1422 If larger than zero show detailed error/warning messages.
1423 rate: float
1424 If provided, do a minimal initialization (no checking)
1425 using the provided sampling rate (in Hertz), channels,
1426 and end_indices.
1427 channels: int
1428 If provided, do a minimal initialization (no checking)
1429 using the provided rate, number of channels, and end_indices.
1430 end_indices: sequence of int
1431 If provided, do a minimal initialization (no checking)
1432 using the provided rate, channels, and end_indices.
1434 Raises
1435 ------
1436 TypeError
1437 `filepaths` must be a sequence.
1438 ValueError
1439 Empty `filepaths`.
1440 FileNotFoundError
1441 `filepaths` does not contain a single valid file.
1443 """
1444 if not isinstance(filepaths, (list, tuple, np.ndarray)):
1445 raise TypeError('input argument filepaths is not a sequence!')
1446 if len(filepaths) == 0:
1447 raise ValueError('input argument filepaths is empy sequence!')
1448 self.buffersize = buffersize
1449 self.backsize = backsize
1450 self.filepath = None
1451 self.file_paths = []
1452 self.open_files = []
1453 self.open_loaders = []
1454 self.audio_files = []
1455 self.collect_counter = 0
1456 self.frames = 0
1457 self.start_indices = []
1458 self.end_indices = []
1459 self.start_time = None
1460 start_time = None
1461 self._metadata = {}
1462 self._locs = np.zeros((0, 2), dtype=int)
1463 self._labels = np.zeros((0, 2), dtype=object)
1464 if end_indices is not None:
1465 self.filepath = Path(filepaths[0])
1466 self.file_paths = [Path(fp) for fp in filepaths]
1467 self.audio_files = [None] * len(filepaths)
1468 self.frames = end_indices[-1]
1469 self.start_indices = [0] + list(end_indices[:-1])
1470 self.end_indices = end_indices
1471 self.format = None
1472 self.encoding = None
1473 self.rate = rate
1474 self.channels = channels
1475 else:
1476 for filepath in filepaths:
1477 try:
1478 a = AudioLoader(filepath, buffersize, backsize, verbose)
1479 except Exception as e:
1480 if verbose > 0:
1481 print(e)
1482 continue
1483 # collect metadata:
1484 md = a.metadata()
1485 fmd = flatten_metadata(md, True)
1486 add_metadata(self._metadata, fmd)
1487 if self.filepath is None:
1488 # first file:
1489 self.filepath = a.filepath
1490 self.format = a.format
1491 self.encoding = a.encoding
1492 self.rate = a.rate
1493 self.channels = a.channels
1494 self.start_time = get_datetime(md)
1495 start_time = self.start_time
1496 stime = self.start_time
1497 else:
1498 # check channels and rate:
1499 error_str = None
1500 if a.channels != self.channels:
1501 error_str = f'number of channels differs: ' \
1502 f'{a.channels} in {a.filepath} versus ' \
1503 f'{self.channels} in {self.filepath}'
1504 if a.rate != self.rate:
1505 error_str = f'sampling rates differ: ' \
1506 f'{a.rate} in {a.filepath} versus ' \
1507 f'{self.rate} in {self.filepath}'
1508 # check start time of recording:
1509 stime = get_datetime(md)
1510 if start_time is not None and stime is not None and \
1511 abs(start_time - stime) > timedelta(seconds=self._max_time_diff):
1512 error_str = f'start time does not indicate continuous recording: ' \
1513 f'expected {start_time} instead of ' \
1514 f'{stime} in {a.filepath}'
1515 if error_str is not None:
1516 if verbose > 0:
1517 print(error_str)
1518 a.close()
1519 del a
1520 break
1521 # markers:
1522 locs, labels = a.markers()
1523 locs[:,0] += self.frames
1524 self._locs = np.vstack((self._locs, locs))
1525 self._labels = np.vstack((self._labels, labels))
1526 # indices:
1527 self.start_indices.append(self.frames)
1528 self.frames += a.frames
1529 self.end_indices.append(self.frames)
1530 if stime is not None:
1531 start_time = stime + timedelta(seconds=a.frames/a.rate)
1532 # add file to lists:
1533 self.file_paths.append(a.filepath)
1534 if len(self.open_files) < AudioLoader.max_open_files:
1535 self.open_files.append(a)
1536 else:
1537 a.close()
1538 if len(self.open_loaders) < AudioLoader.max_open_loaders:
1539 self.audio_files.append(a)
1540 self.open_loaders.append(a)
1541 else:
1542 a.close()
1543 del a
1544 self.audio_files.append(None)
1545 if len(self.audio_files) == 0:
1546 raise FileNotFoundError('input argument filepaths does not contain any valid audio file!')
1547 # set startime from first file:
1548 if self.start_time is not None:
1549 set_starttime(self._metadata, self.start_time)
1550 # setup infrastructure:
1551 self.file_indices = self.start_indices
1552 self.start_indices = np.array(self.start_indices)
1553 self.end_indices = np.array(self.end_indices)
1554 self.shape = (self.frames, self.channels)
1555 self.bufferframes = int(buffersize*self.rate)
1556 self.backframes = int(backsize*self.rate)
1557 self.init_buffer()
1558 self.close = self._close_multiple
1559 self.load_audio_buffer = self._load_buffer_multiple
1560 self._load_metadata = None
1561 self._load_markers = None
1562 return self
1564 def _close_multiple(self):
1565 """Close all the audio files. """
1566 self.open_files = []
1567 self.open_loaders = []
1568 if hasattr(self, 'audio_files'):
1569 for a in self.audio_files:
1570 if a is not None:
1571 a.close()
1572 self.audio_files = []
1573 self.filepath = None
1574 self.file_paths = []
1575 self.file_indices = []
1576 self.start_indices = []
1577 self.end_indices = []
1578 del self.audio_files
1579 del self.open_files
1580 del self.open_loaders
1581 del self.start_indices
1582 del self.end_indices
1584 def _load_buffer_multiple(self, r_offset, r_size, buffer):
1585 """Load new data from the underlying files.
1587 Parameters
1588 ----------
1589 r_offset: int
1590 First frame to be read from file.
1591 r_size: int
1592 Number of frames to be read from file.
1593 buffer: ndarray
1594 Buffer where to store the loaded data.
1595 """
1596 offs = r_offset
1597 size = r_size
1598 boffs = 0
1599 ai = np.searchsorted(self.end_indices, offs, side='right')
1600 while size > 0:
1601 if self.audio_files[ai] is None:
1602 a = AudioLoader(self.file_paths[ai],
1603 self.buffersize, self.backsize, 0)
1604 self.audio_files[ai] = a
1605 self.open_loaders.append(a)
1606 self.open_files.append(a)
1607 if len(self.open_files) > AudioLoader.max_open_files:
1608 a0 = self.open_files.pop(0)
1609 a0.close()
1610 if len(self.open_loaders) > AudioLoader.max_open_loaders:
1611 a0 = self.open_loaders.pop(0)
1612 self.audio_files[self.audio_files.index(a0)] = None
1613 a0.close()
1614 del a0
1615 self.collect_counter += 1
1616 if self.collect_counter > AudioLoader.max_open_loaders//2:
1617 gc.collect()
1618 self.collect_counter = 0
1619 else:
1620 self.open_loaders.pop(self.open_loaders.index(self.audio_files[ai]))
1621 self.open_loaders.append(self.audio_files[ai])
1622 ai0 = offs - self.start_indices[ai]
1623 ai1 = offs + size
1624 if ai1 > self.end_indices[ai]:
1625 ai1 = self.end_indices[ai]
1626 ai1 -= self.start_indices[ai]
1627 n = ai1 - ai0
1628 self.audio_files[ai].load_audio_buffer(ai0, n,
1629 buffer[boffs:boffs + n,:])
1630 if self.audio_files[ai] in self.open_files:
1631 self.open_files.pop(self.open_files.index(self.audio_files[ai]))
1632 self.open_files.append(self.audio_files[ai])
1633 if len(self.open_files) > AudioLoader.max_open_files:
1634 self.open_files[0].close()
1635 self.open_files.pop(0)
1636 boffs += n
1637 offs += n
1638 size -= n
1639 ai += 1
1642 def open(self, filepath, buffersize=10.0, backsize=0.0,
1643 verbose=0, **kwargs):
1644 """Open audio file for reading.
1646 Parameters
1647 ----------
1648 filepath: str or Path or list of str or Path
1649 Path of the file or list of many file paths that should be
1650 made accessible as a single array.
1651 buffersize: float
1652 Size of internal buffer in seconds.
1653 backsize: float
1654 Part of the buffer to be loaded before the requested start index in seconds.
1655 verbose: int
1656 If larger than zero show detailed error/warning messages.
1657 **kwargs: dict
1658 Further keyword arguments that are passed on to the
1659 specific opening functions. Only used by open_multiple() so far.
1661 Raises
1662 ------
1663 FileNotFoundError
1664 `filepath` is not an existing file.
1665 EOFError
1666 File size of `filepath` is zero.
1667 IOError
1668 Failed to load data.
1670 """
1671 self.buffer = np.array([])
1672 self.rate = 0.0
1673 if isinstance(filepath, (list, tuple, np.ndarray)):
1674 if len(filepath) > 1:
1675 self.open_multiple(filepath, buffersize, backsize,
1676 verbose - 1, **kwargs)
1677 if len(self.file_paths) > 1:
1678 return self
1679 filepath = self.file_paths[0]
1680 self.close()
1681 else:
1682 filepath = filepath[0]
1683 filepath = Path(filepath)
1684 if not filepath.is_file():
1685 raise FileNotFoundError(f'file "{filepath}" not found')
1686 if filepath.stat().st_size <= 0:
1687 raise EOFError(f'file "{filepath}" is empty (size=0)!')
1688 # list of implemented open functions:
1689 audio_open_funcs = (
1690 ('soundfile', self.open_soundfile),
1691 ('wave', self.open_wave),
1692 ('wavefile', self.open_wavefile),
1693 ('ewave', self.open_ewave),
1694 ('audioread', self.open_audioread),
1695 )
1696 # open an audio file by trying various modules:
1697 not_installed = []
1698 errors = [f'failed to load data from file "{filepath}":']
1699 for lib, open_file in audio_open_funcs:
1700 if not audio_modules[lib]:
1701 if verbose > 1:
1702 print(f'unable to load data from file "{filepath}" using {lib} module: module not available')
1703 not_installed.append(lib)
1704 continue
1705 try:
1706 open_file(filepath, buffersize, backsize,
1707 verbose - 1, **kwargs)
1708 if self.frames > 0:
1709 if verbose > 0:
1710 print(f'opened audio file "{filepath}" using {lib}')
1711 if verbose > 1:
1712 if self.format is not None:
1713 print(f' format : {self.format}')
1714 if self.encoding is not None:
1715 print(f' encoding : {self.encoding}')
1716 print(f' sampling rate: {self.rate} Hz')
1717 print(f' channels : {self.channels}')
1718 print(f' frames : {self.frames}')
1719 return self
1720 except Exception as e:
1721 errors.append(f' {lib} failed: {str(e)}')
1722 if verbose > 1:
1723 print(errors[-1])
1724 if len(not_installed) > 0:
1725 errors.append('\n You may need to install one of the ' + \
1726 ', '.join(not_installed) + ' packages.')
1727 raise IOError('\n'.join(errors))
1728 return self
1731def demo(file_path, plot):
1732 """Demo of the audioloader functions.
1734 Parameters
1735 ----------
1736 file_path: str
1737 File path of an audio file.
1738 plot: bool
1739 If True also plot the loaded data.
1740 """
1741 print('')
1742 print("try load_audio:")
1743 full_data, rate = load_audio(file_path, 1)
1744 if plot:
1745 plt.plot(np.arange(len(full_data))/rate, full_data[:,0])
1746 plt.show()
1748 if audio_modules['soundfile'] and audio_modules['audioread']:
1749 print('')
1750 print("cross check:")
1751 data1, rate1 = load_soundfile(file_path)
1752 data2, rate2 = load_audioread(file_path)
1753 n = min((len(data1), len(data2)))
1754 print(f"rms difference is {np.std(data1[:n]-data2[:n])}")
1755 if plot:
1756 plt.plot(np.arange(len(data1))/rate1, data1[:,0])
1757 plt.plot(np.arange(len(data2))/rate2, data2[:,0])
1758 plt.show()
1760 print('')
1761 print("try AudioLoader:")
1762 with AudioLoader(file_path, 4.0, 1.0, verbose=1) as data:
1763 print(f'samplerate: {data.rate:0f}Hz')
1764 print(f'channels: {data.channels} {data.shape[1]}')
1765 print(f'frames: {len(data)} {data.shape[0]}')
1766 nframes = int(1.5*data.rate)
1767 # check access:
1768 print('check random single frame access')
1769 for inx in np.random.randint(0, len(data), 1000):
1770 if np.any(np.abs(full_data[inx] - data[inx]) > 2.0**(-14)):
1771 print('single random frame access failed', inx, full_data[inx], data[inx])
1772 print('check random frame slice access')
1773 for inx in np.random.randint(0, len(data)-nframes, 1000):
1774 if np.any(np.abs(full_data[inx:inx+nframes] - data[inx:inx+nframes]) > 2.0**(-14)):
1775 print('random frame slice access failed', inx)
1776 print('check frame slice access forward')
1777 for inx in range(0, len(data)-nframes, 10):
1778 if np.any(np.abs(full_data[inx:inx+nframes] - data[inx:inx+nframes]) > 2.0**(-14)):
1779 print('frame slice access forward failed', inx)
1780 print('check frame slice access backward')
1781 for inx in range(len(data)-nframes, 0, -10):
1782 if np.any(np.abs(full_data[inx:inx+nframes] - data[inx:inx+nframes]) > 2.0**(-14)):
1783 print('frame slice access backward failed', inx)
1784 # forward:
1785 for i in range(0, len(data), nframes):
1786 print(f'forward {i}-{i+nframes}')
1787 x = data[i:i+nframes,0]
1788 if plot:
1789 plt.plot((i+np.arange(len(x)))/rate, x)
1790 plt.show()
1791 # and backwards:
1792 for i in reversed(range(0, len(data), nframes)):
1793 print(f'backward {i}-{i+nframes}')
1794 x = data[i:i+nframes,0]
1795 if plot:
1796 plt.plot((i+np.arange(len(x)))/rate, x)
1797 plt.show()
1800def main(*args):
1801 """Call demo with command line arguments.
1803 Parameters
1804 ----------
1805 args: list of str
1806 Command line arguments as provided by sys.argv[1:]
1807 """
1808 print("Checking audioloader module ...")
1810 help = False
1811 plot = False
1812 file_path = None
1813 mod = False
1814 for arg in args:
1815 if mod:
1816 if not select_module(arg):
1817 print(f'can not select module {arg} that is not installed')
1818 return
1819 mod = False
1820 elif arg == '-h':
1821 help = True
1822 break
1823 elif arg == '-p':
1824 plot = True
1825 elif arg == '-m':
1826 mod = True
1827 else:
1828 file_path = arg
1829 break
1831 if help:
1832 print('')
1833 print('Usage:')
1834 print(' python -m src.audioio.audioloader [-m <module>] [-p] <audio/file.wav>')
1835 print(' -m: audio module to be used')
1836 print(' -p: plot loaded data')
1837 return
1839 if plot:
1840 import matplotlib.pyplot as plt
1842 demo(file_path, plot)
1845if __name__ == "__main__":
1846 main(*sys.argv[1:])