Coverage for src/audioio/audioloader.py: 90%
825 statements
« prev ^ index » next coverage.py v7.10.1, created at 2025-08-02 12:23 +0000
« prev ^ index » next coverage.py v7.10.1, created at 2025-08-02 12:23 +0000
1"""Loading data, metadata, and markers from audio files.
3- `load_audio()`: load a whole audio file at once.
4- `metadata()`: read metadata of an audio file.
5- `markers()`: read markers of an audio file.
6- class `AudioLoader`: read data from audio files in chunks.
8The read in data are always numpy arrays of floats ranging between -1 and 1.
9The arrays are 2-D ndarrays with first axis time and second axis channel,
10even for single channel data.
12If an audio file cannot be loaded, you might need to install
13additional packages. See
14[installation](https://bendalab.github.io/audioio/installation) for
15further instructions.
17For a demo run the module as:
18```
19python -m src.audioio.audioloader audiofile.wav
20```
21"""
23import gc
24import sys
25import warnings
26import os.path
27import numpy as np
28from pathlib import Path
29from datetime import timedelta
30from .audiomodules import *
31from .bufferedarray import BufferedArray
32from .riffmetadata import metadata_riff, markers_riff
33from .audiometadata import update_gain, add_unwrap, get_datetime
34from .audiometadata import flatten_metadata, add_metadata, set_starttime
35from .audiotools import unwrap
38def load_wave(filepath):
39 """Load wav file using the wave module from pythons standard libray.
41 Documentation
42 -------------
43 https://docs.python.org/3.8/library/wave.html
45 Parameters
46 ----------
47 filepath: str
48 The full path and name of the file to load.
50 Returns
51 -------
52 data: ndarray
53 All data traces as an 2-D ndarray, first dimension is time, second is channel
54 rate: float
55 The sampling rate of the data in Hertz.
57 Raises
58 ------
59 ImportError
60 The wave module is not installed
61 *
62 Loading of the data failed
63 """
64 if not audio_modules['wave']:
65 raise ImportError
67 wf = wave.open(filepath, 'r') # 'with' is not supported by wave
68 (nchannels, sampwidth, rate, nframes, comptype, compname) = wf.getparams()
69 buffer = wf.readframes(nframes)
70 factor = 2.0**(sampwidth*8-1)
71 if sampwidth == 1:
72 dtype = 'u1'
73 buffer = np.frombuffer(buffer, dtype=dtype).reshape(-1, nchannels)
74 data = buffer.astype('d')/factor - 1.0
75 else:
76 dtype = f'i{sampwidth}'
77 buffer = np.frombuffer(buffer, dtype=dtype).reshape(-1, nchannels)
78 data = buffer.astype('d')/factor
79 wf.close()
80 return data, float(rate)
83def load_ewave(filepath):
84 """Load wav file using ewave module.
86 Documentation
87 -------------
88 https://github.com/melizalab/py-ewave
90 Parameters
91 ----------
92 filepath: str
93 The full path and name of the file to load.
95 Returns
96 -------
97 data: ndarray
98 All data traces as an 2-D ndarray, first dimension is time, second is channel.
99 rate: float
100 The sampling rate of the data in Hertz.
102 Raises
103 ------
104 ImportError
105 The ewave module is not installed
106 *
107 Loading of the data failed
108 """
109 if not audio_modules['ewave']:
110 raise ImportError
112 data = np.array([])
113 rate = 0.0
114 with ewave.open(filepath, 'r') as wf:
115 rate = wf.sampling_rate
116 buffer = wf.read()
117 data = ewave.rescale(buffer, 'float')
118 if len(data.shape) == 1:
119 data = np.reshape(data,(-1, 1))
120 return data, float(rate)
123def load_wavfile(filepath):
124 """Load wav file using scipy.io.wavfile.
126 Documentation
127 -------------
128 http://docs.scipy.org/doc/scipy/reference/io.html
129 Does not support blocked read.
131 Parameters
132 ----------
133 filepath: str
134 The full path and name of the file to load.
136 Returns
137 -------
138 data: ndarray
139 All data traces as an 2-D ndarray, first dimension is time, second is channel.
140 rate: float
141 The sampling rate of the data in Hertz.
143 Raises
144 ------
145 ImportError
146 The scipy.io module is not installed
147 *
148 Loading of the data failed
149 """
150 if not audio_modules['scipy.io.wavfile']:
151 raise ImportError
153 warnings.filterwarnings("ignore")
154 rate, data = wavfile.read(filepath)
155 warnings.filterwarnings("always")
156 if data.dtype == np.uint8:
157 data = data / 128.0 - 1.0
158 elif np.issubdtype(data.dtype, np.signedinteger):
159 data = data / (2.0**(data.dtype.itemsize*8-1))
160 else:
161 data = data.astype(np.float64, copy=False)
162 if len(data.shape) == 1:
163 data = np.reshape(data,(-1, 1))
164 return data, float(rate)
167def load_soundfile(filepath):
168 """Load audio file using SoundFile (based on libsndfile).
170 Documentation
171 -------------
172 http://pysoundfile.readthedocs.org
173 http://www.mega-nerd.com/libsndfile
175 Parameters
176 ----------
177 filepath: str
178 The full path and name of the file to load.
180 Returns
181 -------
182 data: ndarray
183 All data traces as an 2-D ndarray, first dimension is time, second is channel.
184 rate: float
185 The sampling rate of the data in Hertz.
187 Raises
188 ------
189 ImportError
190 The soundfile module is not installed.
191 *
192 Loading of the data failed.
193 """
194 if not audio_modules['soundfile']:
195 raise ImportError
197 data = np.array([])
198 rate = 0.0
199 with soundfile.SoundFile(filepath, 'r') as sf:
200 rate = sf.samplerate
201 data = sf.read(frames=-1, dtype='float64', always_2d=True)
202 return data, float(rate)
205def load_wavefile(filepath):
206 """Load audio file using wavefile (based on libsndfile).
208 Documentation
209 -------------
210 https://github.com/vokimon/python-wavefile
212 Parameters
213 ----------
214 filepath: str
215 The full path and name of the file to load.
217 Returns
218 -------
219 data: ndarray
220 All data traces as an 2-D ndarray, first dimension is time, second is channel.
221 rate: float
222 The sampling rate of the data in Hertz.
224 Raises
225 ------
226 ImportError
227 The wavefile module is not installed.
228 *
229 Loading of the data failed.
230 """
231 if not audio_modules['wavefile']:
232 raise ImportError
234 rate, data = wavefile.load(filepath)
235 return data.astype(np.float64, copy=False).T, float(rate)
238def load_audioread(filepath):
239 """Load audio file using audioread.
241 Documentation
242 -------------
243 https://github.com/beetbox/audioread
245 Parameters
246 ----------
247 filepath: str
248 The full path and name of the file to load.
250 Returns
251 -------
252 data: ndarray
253 All data traces as an 2-D ndarray, first dimension is time, second is channel.
254 rate: float
255 The sampling rate of the data in Hertz.
257 Raises
258 ------
259 ImportError
260 The audioread module is not installed.
261 *
262 Loading of the data failed.
263 """
264 if not audio_modules['audioread']:
265 raise ImportError
267 data = np.array([])
268 rate = 0.0
269 with audioread.audio_open(filepath) as af:
270 rate = af.samplerate
271 data = np.zeros((int(np.ceil(af.samplerate*af.duration)), af.channels),
272 dtype="<i2")
273 index = 0
274 for buffer in af:
275 fulldata = np.frombuffer(buffer, dtype='<i2').reshape(-1, af.channels)
276 n = fulldata.shape[0]
277 if index + n > len(data):
278 n = len(fulldata) - index
279 if n <= 0:
280 break
281 data[index:index+n,:] = fulldata[:n,:]
282 index += n
283 return data/(2.0**15-1.0), float(rate)
286audio_loader_funcs = (
287 ('soundfile', load_soundfile),
288 ('wave', load_wave),
289 ('wavefile', load_wavefile),
290 ('ewave', load_ewave),
291 ('scipy.io.wavfile', load_wavfile),
292 ('audioread', load_audioread),
293 )
294"""List of implemented load() functions.
296Each element of the list is a tuple with the module's name and its
297load() function.
299"""
302def load_audio(filepath, verbose=0):
303 """Call this function to load all channels of audio data from a file.
305 This function tries different python modules to load the audio file.
307 Parameters
308 ----------
309 filepath: str
310 The full path and name of the file to load.
311 verbose: int
312 If larger than zero show detailed error/warning messages.
314 Returns
315 -------
316 data: ndarray
317 All data traces as an 2-D ndarray, even for single channel data.
318 First dimension is time, second is channel.
319 Data values range maximally between -1 and 1.
320 rate: float
321 The sampling rate of the data in Hertz.
323 Raises
324 ------
325 ValueError
326 Empty `filepath`.
327 FileNotFoundError
328 `filepath` is not an existing file.
329 EOFError
330 File size of `filepath` is zero.
331 IOError
332 Failed to load data.
334 Examples
335 --------
336 ```
337 import matplotlib.pyplot as plt
338 from audioio import load_audio
340 data, rate = load_audio('some/audio.wav')
341 plt.plot(np.arange(len(data))/rate, data[:,0])
342 plt.show()
343 ```
344 """
345 # check values:
346 if filepath is None or len(filepath) == 0:
347 raise ValueError('input argument filepath is empty string!')
348 if not os.path.isfile(filepath):
349 raise FileNotFoundError(f'file "{filepath}" not found')
350 if os.path.getsize(filepath) <= 0:
351 raise EOFError(f'file "{filepath}" is empty (size=0)!')
353 # load an audio file by trying various modules:
354 not_installed = []
355 errors = [f'failed to load data from file "{filepath}":']
356 for lib, load_file in audio_loader_funcs:
357 if not audio_modules[lib]:
358 if verbose > 1:
359 print(f'unable to load data from file "{filepath}" using {lib} module: module not available')
360 not_installed.append(lib)
361 continue
362 try:
363 data, rate = load_file(filepath)
364 if len(data) > 0:
365 if verbose > 0:
366 print(f'loaded data from file "{filepath}" using {lib} module')
367 if verbose > 1:
368 print(f' sampling rate: {rate:g} Hz')
369 print(f' channels : {data.shape[1]}')
370 print(f' frames : {len(data)}')
371 return data, rate
372 except Exception as e:
373 errors.append(f' {lib} failed: {str(e)}')
374 if verbose > 1:
375 print(errors[-1])
376 if len(not_installed) > 0:
377 errors.append('\n You may need to install one of the ' + \
378 ', '.join(not_installed) + ' packages.')
379 raise IOError('\n'.join(errors))
380 return np.zeros(0), 0.0
383def metadata(filepath, store_empty=False):
384 """Read metadata of an audio file.
386 Parameters
387 ----------
388 filepath: str or file handle
389 The audio file from which to read metadata.
390 store_empty: bool
391 If `False` do not return meta data with empty values.
393 Returns
394 -------
395 meta_data: nested dict
396 Meta data contained in the audio file. Keys of the nested
397 dictionaries are always strings. If the corresponding values
398 are dictionaries, then the key is the section name of the
399 metadata contained in the dictionary. All other types of
400 values are values for the respective key. In particular they
401 are strings. But other types like for example ints or floats
402 are also allowed. See `audioio.audiometadata` module for
403 available functions to work with such metadata.
405 Examples
406 --------
407 ```
408 from audioio import metadata, print_metadata
409 md = metadata('data.wav')
410 print_metadata(md)
411 ```
413 """
414 try:
415 return metadata_riff(filepath, store_empty)
416 except ValueError: # not a RIFF file
417 return {}
420def markers(filepath):
421 """ Read markers of an audio file.
423 See `audioio.audiomarkers` module for available functions
424 to work with markers.
426 Parameters
427 ----------
428 filepath: str or file handle
429 The audio file.
431 Returns
432 -------
433 locs: 2-D ndarray of int
434 Marker positions (first column) and spans (second column)
435 for each marker (rows).
436 labels: 2-D ndarray of string objects
437 Labels (first column) and texts (second column)
438 for each marker (rows).
440 Examples
441 --------
442 ```
443 from audioio import markers, print_markers
444 locs, labels = markers('data.wav')
445 print_markers(locs, labels)
446 ```
447 """
448 try:
449 return markers_riff(filepath)
450 except ValueError: # not a RIFF file
451 return np.zeros((0, 2), dtype=int), np.zeros((0, 2), dtype=object)
454class AudioLoader(BufferedArray):
455 """Buffered reading of audio data for random access of the data in the file.
457 The class allows for reading very large audio files or many
458 sequential audio files that do not fit into memory.
459 An AudioLoader instance can be used like a huge read-only numpy array, i.e.
460 ```
461 data = AudioLoader('path/to/audio/file.wav')
462 x = data[10000:20000,0]
463 ```
464 The first index specifies the frame, the second one the channel.
466 Behind the scenes, `AudioLoader` tries to open the audio file with
467 all available audio modules until it succeeds (first line). It
468 then reads data from the file as necessary for the requested data
469 (second line). Accesing the content of the audio files via a
470 buffer that holds only a part of the data is managed by the
471 `BufferedArray` class.
473 Reading sequentially through the file is always possible. Some
474 modules, however, (e.g. audioread, needed for mp3 files) can only
475 read forward. If previous data are requested, then the file is read
476 from the beginning again. This slows down access to previous data
477 considerably. Use the `backsize` argument of the open function to
478 make sure some data are loaded into the buffer before the requested
479 frame. Then a subsequent access to the data within `backsize` seconds
480 before that frame can still be handled without the need to reread
481 the file from the beginning.
483 Usage
484 -----
485 With context management:
486 ```
487 import audioio as aio
488 with aio.AudioLoader(filepath, 60.0, 10.0) as data:
489 # do something with the content of the file:
490 x = data[0:10000]
491 y = data[10000:20000]
492 z = x + y
493 ```
495 For using a specific audio module, here the audioread module:
496 ```
497 data = aio.AudioLoader()
498 with data.open_audioread(filepath, 60.0, 10.0):
499 # do something ...
500 ```
502 Use `blocks()` for sequential, blockwise reading and processing:
503 ```
504 from scipy.signal import spectrogram
505 nfft = 2048
506 with aio.AudioLoader('some/audio.wav') as data:
507 for x in data.blocks(100*nfft, nfft//2):
508 f, t, Sxx = spectrogram(x, fs=data.rate,
509 nperseg=nfft, noverlap=nfft//2)
510 ```
512 For loop iterates over single frames (1-D arrays containing samples for each channel):
513 ```
514 with aio.AudioLoader('some/audio.wav') as data:
515 for x in data:
516 print(x)
517 ```
519 Traditional open and close:
520 ```
521 data = aio.AudioLoader(filepath, 60.0)
522 x = data[:,:] # read the whole file
523 data.close()
524 ```
526 this is the same as:
527 ```
528 data = aio.AudioLoader()
529 data.open(filepath, 60.0)
530 ...
531 ```
533 Classes inheriting AudioLoader just need to implement
534 ```
535 self.load_audio_buffer(offset, nsamples, pbuffer)
536 ```
537 This function needs to load the supplied `pbuffer` with
538 `nframes` frames of data starting at frame `offset`.
540 In the constructor or some kind of opening function, you need to
541 set some member variables, as described for `BufferedArray`.
543 For loading metadata and markers, implement the functions
544 ```
545 self._load_metadata(filepath, **kwargs)
546 self._load_markers(filepath)
547 ```
549 Parameters
550 ----------
551 filepath: str or list of str
552 Name of the file or list of many file names that should be
553 made accessible as a single array.
554 buffersize: float
555 Size of internal buffer in seconds.
556 backsize: float
557 Part of the buffer to be loaded before the requested start index in seconds.
558 verbose: int
559 If larger than zero show detailed error/warning messages.
560 store_empty: bool
561 If `False` do not return meta data with empty values.
563 Attributes
564 ----------
565 filepath: str
566 Name and path of the opened file. In case of many files, the first one.
567 file_paths: list of str
568 List of pathes of the opened files that are made accessible
569 as a single array.
570 file_indices: list of int
571 For each file the index of its first sample.
572 rate: float
573 The sampling rate of the data in seconds.
574 channels: int
575 The number of channels.
576 frames: int
577 The number of frames in the file. Same as `len()`.
578 format: str or None
579 Format of the audio file.
580 encoding: str or None
581 Encoding/subtype of the audio file.
582 shape: tuple
583 Frames and channels of the data.
584 ndim: int
585 Number of dimensions: always 2 (frames and channels).
586 offset: int
587 Index of first frame in the current buffer.
588 buffer: ndarray of floats
589 The curently available data from the file.
590 ampl_min: float
591 Minimum amplitude the file format supports.
592 Always -1.0 for audio data.
593 ampl_max: float
594 Maximum amplitude the file format supports.
595 Always +1.0 for audio data.
597 Methods
598 -------
599 - `len()`: Number of frames.
600 - `file_start_times()`: time of first frame of each file in seconds.
601 - `get_file_index()`: file path and index of frame contained by this file.
602 - `open()`: Open an audio file by trying available audio modules.
603 - `open_*()`: Open an audio file with the respective audio module.
604 - `__getitem__`: Access data of the audio file.
605 - `update_buffer()`: Update the internal buffer for a range of frames.
606 - `blocks()`: Generator for blockwise processing of AudioLoader data.
607 - `basename()`: Base name of the audio data.
608 - `format_dict()`: technical infos about how the data are stored.
609 - `metadata()`: Metadata stored along with the audio data.
610 - `markers()`: Markers stored along with the audio data.
611 - `set_unwrap()`: Set parameters for unwrapping clipped data.
612 - `close()`: Close the file.
614 """
616 max_open_files = 5
617 """ Suggestion for maximum number of open file descriptors. """
619 max_open_loaders = 10
620 """ Suggestion for maximum number of AudioLoaders when opening multiple files. """
622 def __init__(self, filepath=None, buffersize=10.0, backsize=0.0,
623 verbose=0, **meta_kwargs):
624 super().__init__(verbose=verbose)
625 self.format = None
626 self.encoding = None
627 self._metadata = None
628 self._locs = None
629 self._labels = None
630 self._load_metadata = metadata
631 self._load_markers = markers
632 self._metadata_kwargs = meta_kwargs
633 self.filepath = None
634 self.file_paths = None
635 self.file_indices = []
636 self.sf = None
637 self.close = self._close
638 self.load_buffer = self._load_buffer_unwrap
639 self.ampl_min = -1.0
640 self.ampl_max = +1.0
641 self.unwrap = False
642 self.unwrap_thresh = 0.0
643 self.unwrap_clips = False
644 self.unwrap_ampl = 1.0
645 self.unwrap_downscale = True
646 if filepath is not None:
647 self.open(filepath, buffersize, backsize, verbose)
649 numpy_encodings = {np.dtype(np.int64): 'PCM_64',
650 np.dtype(np.int32): 'PCM_32',
651 np.dtype(np.int16): 'PCM_16',
652 np.dtype(np.single): 'FLOAT',
653 np.dtype(np.double): 'DOUBLE',
654 np.dtype('>f4'): 'FLOAT',
655 np.dtype('>f8'): 'DOUBLE'}
656 """ Map numpy dtypes to encodings.
657 """
659 def _close(self):
660 pass
662 def __del__(self):
663 self.close()
665 def file_start_times(self):
666 """ Time of first frame of each file in seconds.
668 Returns
669 -------
670 times: array of float
671 Time of the first frame of each file relative to buffer start
672 in seconds.
673 """
674 times = []
675 for idx in self.file_indices:
676 times.append(idx/self.rate)
677 return np.array(times)
679 def get_file_index(self, frame):
680 """ File path and index of frame contained by this file.
682 Parameters
683 ----------
684 frame: int
685 Index of frame.
687 Returns
688 -------
689 filepath: str
690 Path of file that contains the frame.
691 index: int
692 Index of the frame relative to the first frame
693 in the containing file.
694 """
695 if frame < 0 or frame >= self.frames:
696 raise ValueError('invalid frame')
697 fname = self.file_paths[0]
698 index = self.file_indices[0]
699 for i in reversed(range(len(self.file_indices))):
700 if self.file_indices[i] <= frame:
701 fname = self.file_paths[i]
702 index = self.file_indices[i]
703 break
704 return fname, frame - index
706 def basename(self, path=None):
707 """ Base name of the audio data.
709 Parameters
710 ----------
711 path: str or None
712 Path of the audio file from which a base name is generated.
713 If `None`, use `self.filepath`.
715 Returns
716 -------
717 s: str
718 The name. Defaults to the stem of `path`.
720 """
721 if path is None:
722 path = self.filepath
723 return Path(path).stem
725 def format_dict(self):
726 """ Technical infos about how the data are stored in the file.
728 Returns
729 -------
730 fmt: dict
731 Dictionary with filepath, format, encoding, samplingrate,
732 channels, frames, and duration of the audio file as strings.
734 """
735 fmt = dict(name=self.basename(), filepath=self.filepath)
736 if self.format is not None:
737 fmt['format'] = self.format
738 if self.encoding is not None:
739 fmt['encoding'] = self.encoding
740 fmt.update(dict(samplingrate=f'{self.rate:.0f}Hz',
741 channels=self.channels,
742 frames=self.frames,
743 duration=f'{self.frames/self.rate:.3f}s'))
744 return fmt
746 def metadata(self):
747 """Metadata of the audio file.
749 Parameters
750 ----------
751 store_empty: bool
752 If `False` do not add meta data with empty values.
754 Returns
755 -------
756 meta_data: nested dict
758 Meta data contained in the audio file. Keys of the nested
759 dictionaries are always strings. If the corresponding
760 values are dictionaries, then the key is the section name
761 of the metadata contained in the dictionary. All other
762 types of values are values for the respective key. In
763 particular they are strings. But other types like for
764 example ints or floats are also allowed. See
765 `audioio.audiometadata` module for available functions to
766 work with such metadata.
768 """
769 if self._metadata is None:
770 if self._load_metadata is None:
771 self._metadata = {}
772 else:
773 self._metadata = self._load_metadata(self.filepath,
774 **self._metadata_kwargs)
775 return self._metadata
777 def markers(self):
778 """Read markers of the audio file.
780 See `audioio.audiomarkers` module for available functions
781 to work with markers.
783 Returns
784 -------
785 locs: 2-D ndarray of int
786 Marker positions (first column) and spans (second column)
787 for each marker (rows).
788 labels: 2-D ndarray of str objects
789 Labels (first column) and texts (second column)
790 for each marker (rows).
791 """
792 if self._locs is None:
793 if self._load_markers is None:
794 self._locs = np.zeros((0, 2), dtype=int)
795 self._labels = np.zeros((0, 2), dtype=object)
796 else:
797 self._locs, self._labels = self._load_markers(self.filepath)
798 return self._locs, self._labels
800 def set_unwrap(self, thresh, clips=False, down_scale=True, unit=''):
801 """Set parameters for unwrapping clipped data.
803 See unwrap() function from the audioio package.
805 Parameters
806 ----------
807 thresh: float
808 Threshold for detecting wrapped data relative to self.unwrap_ampl
809 which is initially set to self.ampl_max.
810 If zero, do not unwrap.
811 clips: bool
812 If True, then clip the unwrapped data properly.
813 Otherwise, unwrap the data and double the
814 minimum and maximum data range
815 (self.ampl_min and self.ampl_max).
816 down_scale: bool
817 If not `clips`, then downscale the signal by a factor of two,
818 in order to keep the range between -1 and 1.
819 unit: str
820 Unit of the data.
821 """
822 self.unwrap_ampl = self.ampl_max
823 self.unwrap_thresh = thresh
824 self.unwrap_clips = clips
825 self.unwrap_down_scale = down_scale
826 self.unwrap = thresh > 1e-3
827 if self.unwrap:
828 if self.unwrap_clips:
829 add_unwrap(self.metadata(),
830 self.unwrap_thresh*self.unwrap_ampl,
831 self.unwrap_ampl, unit)
832 elif down_scale:
833 update_gain(self.metadata(), 0.5)
834 add_unwrap(self.metadata(),
835 0.5*self.unwrap_thresh*self.unwrap_ampl,
836 0.0, unit)
837 else:
838 self.ampl_min *= 2
839 self.ampl_max *= 2
840 add_unwrap(self.metadata(),
841 self.unwrap_thresh*self.unwrap_ampl,
842 0.0, unit)
844 def _load_buffer_unwrap(self, r_offset, r_size, pbuffer):
845 """Load new data and unwrap it.
847 Parameters
848 ----------
849 r_offset: int
850 First frame to be read from file.
851 r_size: int
852 Number of frames to be read from file.
853 pbuffer: ndarray
854 Buffer where to store the loaded data.
855 """
856 self.load_audio_buffer(r_offset, r_size, pbuffer)
857 if self.unwrap:
858 # TODO: handle edge effects!
859 unwrap(pbuffer, self.unwrap_thresh, self.unwrap_ampl)
860 if self.unwrap_clips:
861 pbuffer[pbuffer > self.ampl_max] = self.ampl_max
862 pbuffer[pbuffer < self.ampl_min] = self.ampl_min
863 elif self.unwrap_down_scale:
864 pbuffer *= 0.5
867 # wave interface:
868 def open_wave(self, filepath, buffersize=10.0, backsize=0.0,
869 verbose=0):
870 """Open audio file for reading using the wave module.
872 Note: we assume that setpos() and tell() use integer numbers!
874 Parameters
875 ----------
876 filepath: str
877 Name of the file.
878 buffersize: float
879 Size of internal buffer in seconds.
880 backsize: float
881 Part of the buffer to be loaded before the requested start index in seconds.
882 verbose: int
883 If larger than zero show detailed error/warning messages.
885 Raises
886 ------
887 ImportError
888 The wave module is not installed
889 """
890 self.verbose = verbose
891 if self.verbose > 0:
892 print(f'open_wave(filepath) with filepath={filepath}')
893 if not audio_modules['wave']:
894 self.rate = 0.0
895 self.channels = 0
896 self.frames = 0
897 self.size = 0
898 self.shape = (0, 0)
899 self.offset = 0
900 raise ImportError
901 if self.sf is not None:
902 self._close_wave()
903 self.sf = wave.open(filepath, 'r')
904 self.filepath = filepath
905 self.file_paths = [filepath]
906 self.file_indices = [0]
907 self.rate = float(self.sf.getframerate())
908 self.format = 'WAV'
909 sampwidth = self.sf.getsampwidth()
910 if sampwidth == 1:
911 self.dtype = 'u1'
912 self.encoding = 'PCM_U8'
913 else:
914 self.dtype = f'i{sampwidth}'
915 self.encoding = f'PCM_{sampwidth*8}'
916 self.factor = 1.0/(2.0**(sampwidth*8-1))
917 self.channels = self.sf.getnchannels()
918 self.frames = self.sf.getnframes()
919 self.shape = (self.frames, self.channels)
920 self.size = self.frames * self.channels
921 self.bufferframes = int(buffersize*self.rate)
922 self.backframes = int(backsize*self.rate)
923 self.init_buffer()
924 self.close = self._close_wave
925 self.load_audio_buffer = self._load_buffer_wave
926 # read 1 frame to determine the unit of the position values:
927 self.p0 = self.sf.tell()
928 self.sf.readframes(1)
929 self.pfac = self.sf.tell() - self.p0
930 self.sf.setpos(self.p0)
931 return self
933 def _close_wave(self):
934 """Close the audio file using the wave module. """
935 if self.sf is not None:
936 self.sf.close()
937 self.sf = None
939 def _load_buffer_wave(self, r_offset, r_size, buffer):
940 """Load new data from file using the wave module.
942 Parameters
943 ----------
944 r_offset: int
945 First frame to be read from file.
946 r_size: int
947 Number of frames to be read from file.
948 buffer: ndarray
949 Buffer where to store the loaded data.
950 """
951 if self.sf is None:
952 self.sf = wave.open(self.filepath, 'r')
953 self.sf.setpos(r_offset*self.pfac + self.p0)
954 fbuffer = self.sf.readframes(r_size)
955 fbuffer = np.frombuffer(fbuffer, dtype=self.dtype).reshape((-1, self.channels))
956 if self.dtype[0] == 'u':
957 buffer[:, :] = fbuffer * self.factor - 1.0
958 else:
959 buffer[:, :] = fbuffer * self.factor
962 # ewave interface:
963 def open_ewave(self, filepath, buffersize=10.0, backsize=0.0,
964 verbose=0):
965 """Open audio file for reading using the ewave module.
967 Parameters
968 ----------
969 filepath: str
970 Name of the file.
971 buffersize: float
972 Size of internal buffer in seconds.
973 backsize: float
974 Part of the buffer to be loaded before the requested start index in seconds.
975 verbose: int
976 If larger than zero show detailed error/warning messages.
978 Raises
979 ------
980 ImportError
981 The ewave module is not installed.
982 """
983 self.verbose = verbose
984 if self.verbose > 0:
985 print(f'open_ewave(filepath) with filepath={filepath}')
986 if not audio_modules['ewave']:
987 self.rate = 0.0
988 self.channels = 0
989 self.frames = 0
990 self.shape = (0, 0)
991 self.size = 0
992 self.offset = 0
993 raise ImportError
994 if self.sf is not None:
995 self._close_ewave()
996 self.sf = ewave.open(filepath, 'r')
997 self.filepath = filepath
998 self.file_paths = [filepath]
999 self.file_indices = [0]
1000 self.rate = float(self.sf.sampling_rate)
1001 self.channels = self.sf.nchannels
1002 self.frames = self.sf.nframes
1003 self.shape = (self.frames, self.channels)
1004 self.size = self.frames * self.channels
1005 self.format = 'WAV' # or WAVEX?
1006 self.encoding = self.numpy_encodings[self.sf.dtype]
1007 self.bufferframes = int(buffersize*self.rate)
1008 self.backframes = int(backsize*self.rate)
1009 self.init_buffer()
1010 self.close = self._close_ewave
1011 self.load_audio_buffer = self._load_buffer_ewave
1012 return self
1014 def _close_ewave(self):
1015 """Close the audio file using the ewave module. """
1016 if self.sf is not None:
1017 del self.sf
1018 self.sf = None
1020 def _load_buffer_ewave(self, r_offset, r_size, buffer):
1021 """Load new data from file using the ewave module.
1023 Parameters
1024 ----------
1025 r_offset: int
1026 First frame to be read from file.
1027 r_size: int
1028 Number of frames to be read from file.
1029 buffer: ndarray
1030 Buffer where to store the loaded data.
1031 """
1032 if self.sf is None:
1033 self.sf = ewave.open(self.filepath, 'r')
1034 fbuffer = self.sf.read(frames=r_size, offset=r_offset, memmap='r')
1035 fbuffer = ewave.rescale(fbuffer, 'float')
1036 if len(fbuffer.shape) == 1:
1037 fbuffer = np.reshape(fbuffer,(-1, 1))
1038 buffer[:,:] = fbuffer
1041 # soundfile interface:
1042 def open_soundfile(self, filepath, buffersize=10.0, backsize=0.0,
1043 verbose=0):
1044 """Open audio file for reading using the SoundFile module.
1046 Parameters
1047 ----------
1048 filepath: str
1049 Name of the file.
1050 bufferframes: float
1051 Size of internal buffer in seconds.
1052 backsize: float
1053 Part of the buffer to be loaded before the requested start index in seconds.
1054 verbose: int
1055 If larger than zero show detailed error/warning messages.
1057 Raises
1058 ------
1059 ImportError
1060 The SoundFile module is not installed
1061 """
1062 self.verbose = verbose
1063 if self.verbose > 0:
1064 print(f'open_soundfile(filepath) with filepath={filepath}')
1065 if not audio_modules['soundfile']:
1066 self.rate = 0.0
1067 self.channels = 0
1068 self.frames = 0
1069 self.shape = (0, 0)
1070 self.size = 0
1071 self.offset = 0
1072 raise ImportError
1073 if self.sf is not None:
1074 self._close_soundfile()
1075 self.sf = soundfile.SoundFile(filepath, 'r')
1076 self.filepath = filepath
1077 self.file_paths = [filepath]
1078 self.file_indices = [0]
1079 self.rate = float(self.sf.samplerate)
1080 self.channels = self.sf.channels
1081 self.frames = 0
1082 self.size = 0
1083 if self.sf.seekable():
1084 self.frames = self.sf.seek(0, soundfile.SEEK_END)
1085 self.sf.seek(0, soundfile.SEEK_SET)
1086 # TODO: if not seekable, we cannot handle that file!
1087 self.shape = (self.frames, self.channels)
1088 self.size = self.frames * self.channels
1089 self.format = self.sf.format
1090 self.encoding = self.sf.subtype
1091 self.bufferframes = int(buffersize*self.rate)
1092 self.backframes = int(backsize*self.rate)
1093 self.init_buffer()
1094 self.close = self._close_soundfile
1095 self.load_audio_buffer = self._load_buffer_soundfile
1096 return self
1098 def _close_soundfile(self):
1099 """Close the audio file using the SoundFile module. """
1100 if self.sf is not None:
1101 self.sf.close()
1102 self.sf = None
1104 def _load_buffer_soundfile(self, r_offset, r_size, buffer):
1105 """Load new data from file using the SoundFile module.
1107 Parameters
1108 ----------
1109 r_offset: int
1110 First frame to be read from file.
1111 r_size: int
1112 Number of frames to be read from file.
1113 buffer: ndarray
1114 Buffer where to store the loaded data.
1115 """
1116 if self.sf is None:
1117 self.sf = soundfile.SoundFile(self.filepath, 'r')
1118 self.sf.seek(r_offset, soundfile.SEEK_SET)
1119 buffer[:, :] = self.sf.read(r_size, always_2d=True)
1122 # wavefile interface:
1123 def open_wavefile(self, filepath, buffersize=10.0, backsize=0.0,
1124 verbose=0):
1125 """Open audio file for reading using the wavefile module.
1127 Parameters
1128 ----------
1129 filepath: str
1130 Name of the file.
1131 bufferframes: float
1132 Size of internal buffer in seconds.
1133 backsize: float
1134 Part of the buffer to be loaded before the requested start index in seconds.
1135 verbose: int
1136 If larger than zero show detailed error/warning messages.
1138 Raises
1139 ------
1140 ImportError
1141 The wavefile module is not installed
1142 """
1143 self.verbose = verbose
1144 if self.verbose > 0:
1145 print(f'open_wavefile(filepath) with filepath={filepath}')
1146 if not audio_modules['wavefile']:
1147 self.rate = 0.0
1148 self.channels = 0
1149 self.frames = 0
1150 self.shape = (0, 0)
1151 self.size = 0
1152 self.offset = 0
1153 raise ImportError
1154 if self.sf is not None:
1155 self._close_wavefile()
1156 self.sf = wavefile.WaveReader(filepath)
1157 self.filepath = filepath
1158 self.file_paths = [filepath]
1159 self.file_indices = [0]
1160 self.rate = float(self.sf.samplerate)
1161 self.channels = self.sf.channels
1162 self.frames = self.sf.frames
1163 self.shape = (self.frames, self.channels)
1164 self.size = self.frames * self.channels
1165 # get format and encoding:
1166 for attr in dir(wavefile.Format):
1167 v = getattr(wavefile.Format, attr)
1168 if isinstance(v, int):
1169 if v & wavefile.Format.TYPEMASK > 0 and \
1170 (self.sf.format & wavefile.Format.TYPEMASK) == v:
1171 self.format = attr
1172 if v & wavefile.Format.SUBMASK > 0 and \
1173 (self.sf.format & wavefile.Format.SUBMASK) == v:
1174 self.encoding = attr
1175 # init buffer:
1176 self.bufferframes = int(buffersize*self.rate)
1177 self.backframes = int(backsize*self.rate)
1178 self.init_buffer()
1179 self.close = self._close_wavefile
1180 self.load_audio_buffer = self._load_buffer_wavefile
1181 return self
1183 def _close_wavefile(self):
1184 """Close the audio file using the wavefile module. """
1185 if self.sf is not None:
1186 self.sf.close()
1187 self.sf = None
1189 def _load_buffer_wavefile(self, r_offset, r_size, buffer):
1190 """Load new data from file using the wavefile module.
1192 Parameters
1193 ----------
1194 r_offset: int
1195 First frame to be read from file.
1196 r_size: int
1197 Number of frames to be read from file.
1198 buffer: ndarray
1199 Buffer where to store the loaded data.
1200 """
1201 if self.sf is None:
1202 self.sf = wavefile.WaveReader(self.filepath)
1203 self.sf.seek(r_offset, wavefile.Seek.SET)
1204 fbuffer = self.sf.buffer(r_size, dtype=self.buffer.dtype)
1205 self.sf.read(fbuffer)
1206 buffer[:,:] = fbuffer.T
1209 # audioread interface:
1210 def open_audioread(self, filepath, buffersize=10.0, backsize=0.0,
1211 verbose=0):
1212 """Open audio file for reading using the audioread module.
1214 Note, that audioread can only read forward, therefore random and
1215 backward access is really slow.
1217 Parameters
1218 ----------
1219 filepath: str
1220 Name of the file.
1221 bufferframes: float
1222 Size of internal buffer in seconds.
1223 backsize: float
1224 Part of the buffer to be loaded before the requested start index in seconds.
1225 verbose: int
1226 If larger than zero show detailed error/warning messages.
1228 Raises
1229 ------
1230 ImportError
1231 The audioread module is not installed
1232 """
1233 self.verbose = verbose
1234 if self.verbose > 0:
1235 print(f'open_audioread(filepath) with filepath={filepath}')
1236 if not audio_modules['audioread']:
1237 self.rate = 0.0
1238 self.channels = 0
1239 self.frames = 0
1240 self.shape = (0, 0)
1241 self.size = 0
1242 self.offset = 0
1243 raise ImportError
1244 if self.sf is not None:
1245 self._close_audioread()
1246 self.sf = audioread.audio_open(filepath)
1247 self.filepath = filepath
1248 self.file_paths = [filepath]
1249 self.file_indices = [0]
1250 self.rate = float(self.sf.samplerate)
1251 self.channels = self.sf.channels
1252 self.frames = int(np.ceil(self.rate*self.sf.duration))
1253 self.shape = (self.frames, self.channels)
1254 self.size = self.frames * self.channels
1255 self.bufferframes = int(buffersize*self.rate)
1256 self.backframes = int(backsize*self.rate)
1257 self.init_buffer()
1258 self.read_buffer = np.zeros((0,0))
1259 self.read_offset = 0
1260 self.close = self._close_audioread
1261 self.load_audio_buffer = self._load_buffer_audioread
1262 self.filepath = filepath
1263 self.sf_iter = self.sf.__iter__()
1264 return self
1266 def _close_audioread(self):
1267 """Close the audio file using the audioread module. """
1268 if self.sf is not None:
1269 self.sf.__exit__(None, None, None)
1270 self.sf = None
1272 def _load_buffer_audioread(self, r_offset, r_size, buffer):
1273 """Load new data from file using the audioread module.
1275 audioread can only iterate through a file once and in blocksizes that are
1276 given by audioread. Therefore we keep yet another buffer: `self.read_buffer`
1277 at file offset `self.read_offset` containing whatever audioread returned.
1279 Parameters
1280 ----------
1281 r_offset: int
1282 First frame to be read from file.
1283 r_size: int
1284 Number of frames to be read from file.
1285 buffer: ndarray
1286 Buffer where to store the loaded data.
1287 """
1288 if self.sf is None:
1289 self.sf = audioread.audio_open(self.filepath)
1290 b_offset = 0
1291 if ( self.read_offset + self.read_buffer.shape[0] >= r_offset + r_size
1292 and self.read_offset < r_offset + r_size ):
1293 # read_buffer overlaps at the end of the requested interval:
1294 i = 0
1295 n = r_offset + r_size - self.read_offset
1296 if n > r_size:
1297 i += n - r_size
1298 n = r_size
1299 buffer[self.read_offset+i-r_offset:self.read_offset+i+n-r_offset,:] = self.read_buffer[i:i+n,:] / (2.0**15-1.0)
1300 if self.verbose > 2:
1301 print(f' recycle {n:6d} frames from the front of the read buffer at {self.read_offset}-{self.read_offset+n} ({self.read_offset-self.offset}-{self.read_offset-self.offset+n} in buffer)')
1302 r_size -= n
1303 if r_size <= 0:
1304 return
1305 # go back to beginning of file:
1306 if r_offset < self.read_offset:
1307 if self.verbose > 2:
1308 print(' rewind')
1309 self._close_audioread()
1310 self.sf = audioread.audio_open(self.filepath)
1311 self.sf_iter = self.sf.__iter__()
1312 self.read_buffer = np.zeros((0,0))
1313 self.read_offset = 0
1314 # read to position:
1315 while self.read_offset + self.read_buffer.shape[0] < r_offset:
1316 self.read_offset += self.read_buffer.shape[0]
1317 try:
1318 if hasattr(self.sf_iter, 'next'):
1319 fbuffer = self.sf_iter.next()
1320 else:
1321 fbuffer = next(self.sf_iter)
1322 except StopIteration:
1323 self.read_buffer = np.zeros((0,0))
1324 buffer[:,:] = 0.0
1325 if self.verbose > 1:
1326 print(f' caught StopIteration, padded buffer with {r_size} zeros')
1327 break
1328 self.read_buffer = np.frombuffer(fbuffer, dtype='<i2').reshape(-1, self.channels)
1329 if self.verbose > 2:
1330 print(f' read forward by {self.read_buffer.shape[0]} frames')
1331 # recycle file data:
1332 if ( self.read_offset + self.read_buffer.shape[0] > r_offset
1333 and self.read_offset <= r_offset ):
1334 i = r_offset - self.read_offset
1335 n = self.read_offset + self.read_buffer.shape[0] - r_offset
1336 if n > r_size:
1337 n = r_size
1338 buffer[:n,:] = self.read_buffer[i:i+n,:] / (2.0**15-1.0)
1339 if self.verbose > 2:
1340 print(f' recycle {n:6d} frames from the end of the read buffer at {self.read_offset}-{self.read_offset + self.read_buffer.shape[0]} to {r_offset}-{r_offset+n} ({r_offset-self.offset}-{r_offset+n-self.offset} in buffer)')
1341 b_offset += n
1342 r_offset += n
1343 r_size -= n
1344 # read data:
1345 if self.verbose > 2 and r_size > 0:
1346 print(f' read {r_size:6d} frames at {r_offset}-{r_offset+r_size} ({r_offset-self.offset}-{r_offset+r_size-self.offset} in buffer)')
1347 while r_size > 0:
1348 self.read_offset += self.read_buffer.shape[0]
1349 try:
1350 if hasattr(self.sf_iter, 'next'):
1351 fbuffer = self.sf_iter.next()
1352 else:
1353 fbuffer = next(self.sf_iter)
1354 except StopIteration:
1355 self.read_buffer = np.zeros((0,0))
1356 buffer[b_offset:,:] = 0.0
1357 if self.verbose > 1:
1358 print(f' caught StopIteration, padded buffer with {r_size} zeros')
1359 break
1360 self.read_buffer = np.frombuffer(fbuffer, dtype='<i2').reshape(-1, self.channels)
1361 n = self.read_buffer.shape[0]
1362 if n > r_size:
1363 n = r_size
1364 if n > 0:
1365 buffer[b_offset:b_offset+n,:] = self.read_buffer[:n,:] / (2.0**15-1.0)
1366 if self.verbose > 2:
1367 print(f' read {n:6d} frames to {r_offset}-{r_offset+n} ({r_offset-self.offset}-{r_offset+n-self.offset} in buffer)')
1368 b_offset += n
1369 r_offset += n
1370 r_size -= n
1373 # open multiple audio files as one:
1374 def open_multiple(self, filepaths, buffersize=10.0, backsize=0.0,
1375 verbose=0, rate=None, channels=None, end_indices=None):
1376 """Open multiple audio files as a single concatenated array.
1378 Parameters
1379 ----------
1380 filepaths: list of str
1381 List of file names of audio files.
1382 buffersize: float
1383 Size of internal buffer in seconds.
1384 backsize: float
1385 Part of the buffer to be loaded before the requested start index in seconds.
1386 verbose: int
1387 If larger than zero show detailed error/warning messages.
1388 rate: float
1389 If provided, do a minimal initialization (no checking)
1390 using the provided sampling rate (in Hertz), channels,
1391 and end_indices.
1392 channels: int
1393 If provided, do a minimal initialization (no checking)
1394 using the provided rate, number of channels, and end_indices.
1395 end_indices: sequence of int
1396 If provided, do a minimal initialization (no checking)
1397 using the provided rate, channels, and end_indices.
1399 Raises
1400 ------
1401 TypeError
1402 `filepaths` must be a sequence.
1403 ValueError
1404 Empty `filepaths`.
1405 FileNotFoundError
1406 `filepaths` does not contain a single valid file.
1408 """
1409 if not isinstance(filepaths, (list, tuple, np.ndarray)):
1410 raise TypeError('input argument filepaths is not a sequence!')
1411 if len(filepaths) == 0:
1412 raise ValueError('input argument filepaths is empy sequence!')
1413 self.buffersize = buffersize
1414 self.backsize = backsize
1415 self.filepath = None
1416 self.file_paths = []
1417 self.open_files = []
1418 self.open_loaders = []
1419 self.audio_files = []
1420 self.collect_counter = 0
1421 self.frames = 0
1422 self.start_indices = []
1423 self.end_indices = []
1424 self.start_time = None
1425 start_time = None
1426 self._metadata = {}
1427 self._locs = np.zeros((0, 2), dtype=int)
1428 self._labels = np.zeros((0, 2), dtype=object)
1429 if end_indices is not None:
1430 self.filepath = filepaths[0]
1431 self.file_paths = filepaths
1432 self.audio_files = [None] * len(filepaths)
1433 self.frames = end_indices[-1]
1434 self.start_indices = [0] + list(end_indices[:-1])
1435 self.end_indices = end_indices
1436 self.format = None
1437 self.encoding = None
1438 self.rate = rate
1439 self.channels = channels
1440 else:
1441 for filepath in filepaths:
1442 try:
1443 a = AudioLoader(filepath, buffersize, backsize, verbose)
1444 except Exception as e:
1445 if verbose > 0:
1446 print(e)
1447 continue
1448 # collect metadata:
1449 md = a.metadata()
1450 fmd = flatten_metadata(md, True)
1451 add_metadata(self._metadata, fmd)
1452 if self.filepath is None:
1453 # first file:
1454 self.filepath = a.filepath
1455 self.format = a.format
1456 self.encoding = a.encoding
1457 self.rate = a.rate
1458 self.channels = a.channels
1459 self.start_time = get_datetime(md)
1460 start_time = self.start_time
1461 else:
1462 # check channels and rate:
1463 error_str = None
1464 if a.channels != self.channels:
1465 error_str = f'number of channels differs: ' \
1466 f'{a.channels} in {a.filepath} versus ' \
1467 f'{self.channels} in {self.filepath}'
1468 if a.rate != self.rate:
1469 error_str = f'sampling rates differ: ' \
1470 f'{a.rate} in {a.filepath} versus ' \
1471 f'{self.rate} in {self.filepath}'
1472 # check start time of recording:
1473 stime = get_datetime(md)
1474 if start_time is None or stime is None or \
1475 abs(start_time - stime) > timedelta(seconds=1):
1476 error_str = f'start time does not indicate continuous recording: ' \
1477 f'expected {start_time} instead of ' \
1478 f'{stime} in {a.filepath}'
1479 if error_str is not None:
1480 if verbose > 0:
1481 print(error_str)
1482 a.close()
1483 del a
1484 break
1485 # markers:
1486 locs, labels = a.markers()
1487 locs[:,0] += self.frames
1488 self._locs = np.vstack((self._locs, locs))
1489 self._labels = np.vstack((self._labels, labels))
1490 # indices:
1491 self.start_indices.append(self.frames)
1492 self.frames += a.frames
1493 self.end_indices.append(self.frames)
1494 if start_time is not None:
1495 start_time += timedelta(seconds=a.frames/a.rate)
1496 # add file to lists:
1497 self.file_paths.append(filepath)
1498 if len(self.open_files) < AudioLoader.max_open_files:
1499 self.open_files.append(a)
1500 else:
1501 a.close()
1502 if len(self.open_loaders) < AudioLoader.max_open_loaders:
1503 self.audio_files.append(a)
1504 self.open_loaders.append(a)
1505 else:
1506 a.close()
1507 del a
1508 self.audio_files.append(None)
1509 if len(self.audio_files) == 0:
1510 raise FileNotFoundError('input argument filepaths does not contain any valid audio file!')
1511 # set startime from first file:
1512 if self.start_time is not None:
1513 set_starttime(self._metadata, self.start_time)
1514 # setup infrastructure:
1515 self.file_indices = self.start_indices
1516 self.start_indices = np.array(self.start_indices)
1517 self.end_indices = np.array(self.end_indices)
1518 self.shape = (self.frames, self.channels)
1519 self.bufferframes = int(buffersize*self.rate)
1520 self.backframes = int(backsize*self.rate)
1521 self.init_buffer()
1522 self.close = self._close_multiple
1523 self.load_audio_buffer = self._load_buffer_multiple
1524 self._load_metadata = None
1525 self._load_markers = None
1526 return self
1528 def _close_multiple(self):
1529 """Close all the audio files. """
1530 self.open_files = []
1531 self.open_loaders = []
1532 if hasattr(self, 'audio_files'):
1533 for a in self.audio_files:
1534 if a is not None:
1535 a.close()
1536 self.audio_files = []
1537 self.filepath = None
1538 self.file_paths = []
1539 self.file_indices = []
1540 self.start_indices = []
1541 self.end_indices = []
1542 del self.audio_files
1543 del self.open_files
1544 del self.open_loaders
1545 del self.start_indices
1546 del self.end_indices
1548 def _load_buffer_multiple(self, r_offset, r_size, buffer):
1549 """Load new data from the underlying files.
1551 Parameters
1552 ----------
1553 r_offset: int
1554 First frame to be read from file.
1555 r_size: int
1556 Number of frames to be read from file.
1557 buffer: ndarray
1558 Buffer where to store the loaded data.
1559 """
1560 offs = r_offset
1561 size = r_size
1562 boffs = 0
1563 ai = np.searchsorted(self.end_indices, offs, side='right')
1564 while size > 0:
1565 if self.audio_files[ai] is None:
1566 a = AudioLoader(self.file_paths[ai],
1567 self.buffersize, self.backsize, 0)
1568 self.audio_files[ai] = a
1569 self.open_loaders.append(a)
1570 self.open_files.append(a)
1571 if len(self.open_files) > AudioLoader.max_open_files:
1572 a0 = self.open_files.pop(0)
1573 a0.close()
1574 if len(self.open_loaders) > AudioLoader.max_open_loaders:
1575 a0 = self.open_loaders.pop(0)
1576 self.audio_files[self.audio_files.index(a0)] = None
1577 a0.close()
1578 del a0
1579 self.collect_counter += 1
1580 if self.collect_counter > AudioLoader.max_open_loaders//2:
1581 gc.collect()
1582 self.collect_counter = 0
1583 else:
1584 self.open_loaders.pop(self.open_loaders.index(self.audio_files[ai]))
1585 self.open_loaders.append(self.audio_files[ai])
1586 ai0 = offs - self.start_indices[ai]
1587 ai1 = offs + size
1588 if ai1 > self.end_indices[ai]:
1589 ai1 = self.end_indices[ai]
1590 ai1 -= self.start_indices[ai]
1591 n = ai1 - ai0
1592 self.audio_files[ai].load_audio_buffer(ai0, n,
1593 buffer[boffs:boffs + n,:])
1594 if self.audio_files[ai] in self.open_files:
1595 self.open_files.pop(self.open_files.index(self.audio_files[ai]))
1596 self.open_files.append(self.audio_files[ai])
1597 if len(self.open_files) > AudioLoader.max_open_files:
1598 self.open_files[0].close()
1599 self.open_files.pop(0)
1600 boffs += n
1601 offs += n
1602 size -= n
1603 ai += 1
1606 def open(self, filepath, buffersize=10.0, backsize=0.0,
1607 verbose=0, **kwargs):
1608 """Open audio file for reading.
1610 Parameters
1611 ----------
1612 filepath: str or list of str
1613 Name of the file or list of many file names that should be
1614 made accessible as a single array.
1615 buffersize: float
1616 Size of internal buffer in seconds.
1617 backsize: float
1618 Part of the buffer to be loaded before the requested start index in seconds.
1619 verbose: int
1620 If larger than zero show detailed error/warning messages.
1621 **kwargs: dict
1622 Further keyword arguments that are passed on to the
1623 specific opening functions. Only used by open_multiple() so far.
1625 Raises
1626 ------
1627 ValueError
1628 Empty `filepath`.
1629 FileNotFoundError
1630 `filepath` is not an existing file.
1631 EOFError
1632 File size of `filepath` is zero.
1633 IOError
1634 Failed to load data.
1636 """
1637 self.buffer = np.array([])
1638 self.rate = 0.0
1639 if not filepath:
1640 raise ValueError('input argument filepath is empty string!')
1641 if isinstance(filepath, (list, tuple, np.ndarray)):
1642 if len(filepath) > 1:
1643 self.open_multiple(filepath, buffersize, backsize, verbose)
1644 if len(self.file_paths) > 1:
1645 return self
1646 filepath = self.file_paths[0]
1647 self.close()
1648 else:
1649 filepath = filepath[0]
1650 if not os.path.isfile(filepath):
1651 raise FileNotFoundError(f'file "{filepath}" not found')
1652 if os.path.getsize(filepath) <= 0:
1653 raise EOFError(f'file "{filepath}" is empty (size=0)!')
1654 # list of implemented open functions:
1655 audio_open_funcs = (
1656 ('soundfile', self.open_soundfile),
1657 ('wave', self.open_wave),
1658 ('wavefile', self.open_wavefile),
1659 ('ewave', self.open_ewave),
1660 ('audioread', self.open_audioread),
1661 )
1662 # open an audio file by trying various modules:
1663 not_installed = []
1664 errors = [f'failed to load data from file "{filepath}":']
1665 for lib, open_file in audio_open_funcs:
1666 if not audio_modules[lib]:
1667 if verbose > 1:
1668 print(f'unable to load data from file "{filepath}" using {lib} module: module not available')
1669 not_installed.append(lib)
1670 continue
1671 try:
1672 open_file(filepath, buffersize, backsize, verbose-1, **kwargs)
1673 if self.frames > 0:
1674 if verbose > 0:
1675 print(f'opened audio file "{filepath}" using {lib}')
1676 if verbose > 1:
1677 if self.format is not None:
1678 print(f' format : {self.format}')
1679 if self.encoding is not None:
1680 print(f' encoding : {self.encoding}')
1681 print(f' sampling rate: {self.rate} Hz')
1682 print(f' channels : {self.channels}')
1683 print(f' frames : {self.frames}')
1684 return self
1685 except Exception as e:
1686 errors.append(f' {lib} failed: {str(e)}')
1687 if verbose > 1:
1688 print(errors[-1])
1689 if len(not_installed) > 0:
1690 errors.append('\n You may need to install one of the ' + \
1691 ', '.join(not_installed) + ' packages.')
1692 raise IOError('\n'.join(errors))
1693 return self
1696def demo(file_path, plot):
1697 """Demo of the audioloader functions.
1699 Parameters
1700 ----------
1701 file_path: str
1702 File path of an audio file.
1703 plot: bool
1704 If True also plot the loaded data.
1705 """
1706 print('')
1707 print("try load_audio:")
1708 full_data, rate = load_audio(file_path, 1)
1709 if plot:
1710 plt.plot(np.arange(len(full_data))/rate, full_data[:,0])
1711 plt.show()
1713 if audio_modules['soundfile'] and audio_modules['audioread']:
1714 print('')
1715 print("cross check:")
1716 data1, rate1 = load_soundfile(file_path)
1717 data2, rate2 = load_audioread(file_path)
1718 n = min((len(data1), len(data2)))
1719 print(f"rms difference is {np.std(data1[:n]-data2[:n])}")
1720 if plot:
1721 plt.plot(np.arange(len(data1))/rate1, data1[:,0])
1722 plt.plot(np.arange(len(data2))/rate2, data2[:,0])
1723 plt.show()
1725 print('')
1726 print("try AudioLoader:")
1727 with AudioLoader(file_path, 4.0, 1.0, verbose=1) as data:
1728 print(f'samplerate: {data.rate:0f}Hz')
1729 print(f'channels: {data.channels} {data.shape[1]}')
1730 print(f'frames: {len(data)} {data.shape[0]}')
1731 nframes = int(1.5*data.rate)
1732 # check access:
1733 print('check random single frame access')
1734 for inx in np.random.randint(0, len(data), 1000):
1735 if np.any(np.abs(full_data[inx] - data[inx]) > 2.0**(-14)):
1736 print('single random frame access failed', inx, full_data[inx], data[inx])
1737 print('check random frame slice access')
1738 for inx in np.random.randint(0, len(data)-nframes, 1000):
1739 if np.any(np.abs(full_data[inx:inx+nframes] - data[inx:inx+nframes]) > 2.0**(-14)):
1740 print('random frame slice access failed', inx)
1741 print('check frame slice access forward')
1742 for inx in range(0, len(data)-nframes, 10):
1743 if np.any(np.abs(full_data[inx:inx+nframes] - data[inx:inx+nframes]) > 2.0**(-14)):
1744 print('frame slice access forward failed', inx)
1745 print('check frame slice access backward')
1746 for inx in range(len(data)-nframes, 0, -10):
1747 if np.any(np.abs(full_data[inx:inx+nframes] - data[inx:inx+nframes]) > 2.0**(-14)):
1748 print('frame slice access backward failed', inx)
1749 # forward:
1750 for i in range(0, len(data), nframes):
1751 print(f'forward {i}-{i+nframes}')
1752 x = data[i:i+nframes,0]
1753 if plot:
1754 plt.plot((i+np.arange(len(x)))/rate, x)
1755 plt.show()
1756 # and backwards:
1757 for i in reversed(range(0, len(data), nframes)):
1758 print(f'backward {i}-{i+nframes}')
1759 x = data[i:i+nframes,0]
1760 if plot:
1761 plt.plot((i+np.arange(len(x)))/rate, x)
1762 plt.show()
1765def main(*args):
1766 """Call demo with command line arguments.
1768 Parameters
1769 ----------
1770 args: list of str
1771 Command line arguments as provided by sys.argv[1:]
1772 """
1773 print("Checking audioloader module ...")
1775 help = False
1776 plot = False
1777 file_path = None
1778 mod = False
1779 for arg in args:
1780 if mod:
1781 if not select_module(arg):
1782 print(f'can not select module {arg} that is not installed')
1783 return
1784 mod = False
1785 elif arg == '-h':
1786 help = True
1787 break
1788 elif arg == '-p':
1789 plot = True
1790 elif arg == '-m':
1791 mod = True
1792 else:
1793 file_path = arg
1794 break
1796 if help:
1797 print('')
1798 print('Usage:')
1799 print(' python -m src.audioio.audioloader [-m <module>] [-p] <audio/file.wav>')
1800 print(' -m: audio module to be used')
1801 print(' -p: plot loaded data')
1802 return
1804 if plot:
1805 import matplotlib.pyplot as plt
1807 demo(file_path, plot)
1810if __name__ == "__main__":
1811 main(*sys.argv[1:])