Coverage for src/audioio/audioloader.py: 90%
818 statements
« prev ^ index » next coverage.py v7.7.0, created at 2025-03-18 22:33 +0000
« prev ^ index » next coverage.py v7.7.0, created at 2025-03-18 22:33 +0000
1"""Loading data, metadata, and markers from audio files.
3- `load_audio()`: load a whole audio file at once.
4- `metadata()`: read metadata of an audio file.
5- `markers()`: read markers of an audio file.
6- class `AudioLoader`: read data from audio files in chunks.
8The read in data are always numpy arrays of floats ranging between -1 and 1.
9The arrays are 2-D ndarrays with first axis time and second axis channel,
10even for single channel data.
12If an audio file cannot be loaded, you might need to install
13additional packages. See
14[installation](https://bendalab.github.io/audioio/installation) for
15further instructions.
17For a demo run the module as:
18```
19python -m src.audioio.audioloader audiofile.wav
20```
21"""
23import gc
24import sys
25import warnings
26import os.path
27import numpy as np
28from datetime import timedelta
29from .audiomodules import *
30from .bufferedarray import BufferedArray
31from .riffmetadata import metadata_riff, markers_riff
32from .audiometadata import update_gain, add_unwrap, get_datetime
33from .audiometadata import flatten_metadata, add_metadata, set_starttime
34from .audiotools import unwrap
37def load_wave(filepath):
38 """Load wav file using the wave module from pythons standard libray.
40 Documentation
41 -------------
42 https://docs.python.org/3.8/library/wave.html
44 Parameters
45 ----------
46 filepath: str
47 The full path and name of the file to load.
49 Returns
50 -------
51 data: ndarray
52 All data traces as an 2-D ndarray, first dimension is time, second is channel
53 rate: float
54 The sampling rate of the data in Hertz.
56 Raises
57 ------
58 ImportError
59 The wave module is not installed
60 *
61 Loading of the data failed
62 """
63 if not audio_modules['wave']:
64 raise ImportError
66 wf = wave.open(filepath, 'r') # 'with' is not supported by wave
67 (nchannels, sampwidth, rate, nframes, comptype, compname) = wf.getparams()
68 buffer = wf.readframes(nframes)
69 factor = 2.0**(sampwidth*8-1)
70 if sampwidth == 1:
71 dtype = 'u1'
72 buffer = np.frombuffer(buffer, dtype=dtype).reshape(-1, nchannels)
73 data = buffer.astype('d')/factor - 1.0
74 else:
75 dtype = f'i{sampwidth}'
76 buffer = np.frombuffer(buffer, dtype=dtype).reshape(-1, nchannels)
77 data = buffer.astype('d')/factor
78 wf.close()
79 return data, float(rate)
82def load_ewave(filepath):
83 """Load wav file using ewave module.
85 Documentation
86 -------------
87 https://github.com/melizalab/py-ewave
89 Parameters
90 ----------
91 filepath: str
92 The full path and name of the file to load.
94 Returns
95 -------
96 data: ndarray
97 All data traces as an 2-D ndarray, first dimension is time, second is channel.
98 rate: float
99 The sampling rate of the data in Hertz.
101 Raises
102 ------
103 ImportError
104 The ewave module is not installed
105 *
106 Loading of the data failed
107 """
108 if not audio_modules['ewave']:
109 raise ImportError
111 data = np.array([])
112 rate = 0.0
113 with ewave.open(filepath, 'r') as wf:
114 rate = wf.sampling_rate
115 buffer = wf.read()
116 data = ewave.rescale(buffer, 'float')
117 if len(data.shape) == 1:
118 data = np.reshape(data,(-1, 1))
119 return data, float(rate)
122def load_wavfile(filepath):
123 """Load wav file using scipy.io.wavfile.
125 Documentation
126 -------------
127 http://docs.scipy.org/doc/scipy/reference/io.html
128 Does not support blocked read.
130 Parameters
131 ----------
132 filepath: str
133 The full path and name of the file to load.
135 Returns
136 -------
137 data: ndarray
138 All data traces as an 2-D ndarray, first dimension is time, second is channel.
139 rate: float
140 The sampling rate of the data in Hertz.
142 Raises
143 ------
144 ImportError
145 The scipy.io module is not installed
146 *
147 Loading of the data failed
148 """
149 if not audio_modules['scipy.io.wavfile']:
150 raise ImportError
152 warnings.filterwarnings("ignore")
153 rate, data = wavfile.read(filepath)
154 warnings.filterwarnings("always")
155 if data.dtype == np.uint8:
156 data = data / 128.0 - 1.0
157 elif np.issubdtype(data.dtype, np.signedinteger):
158 data = data / (2.0**(data.dtype.itemsize*8-1))
159 else:
160 data = data.astype(np.float64, copy=False)
161 if len(data.shape) == 1:
162 data = np.reshape(data,(-1, 1))
163 return data, float(rate)
166def load_soundfile(filepath):
167 """Load audio file using SoundFile (based on libsndfile).
169 Documentation
170 -------------
171 http://pysoundfile.readthedocs.org
172 http://www.mega-nerd.com/libsndfile
174 Parameters
175 ----------
176 filepath: str
177 The full path and name of the file to load.
179 Returns
180 -------
181 data: ndarray
182 All data traces as an 2-D ndarray, first dimension is time, second is channel.
183 rate: float
184 The sampling rate of the data in Hertz.
186 Raises
187 ------
188 ImportError
189 The soundfile module is not installed.
190 *
191 Loading of the data failed.
192 """
193 if not audio_modules['soundfile']:
194 raise ImportError
196 data = np.array([])
197 rate = 0.0
198 with soundfile.SoundFile(filepath, 'r') as sf:
199 rate = sf.samplerate
200 data = sf.read(frames=-1, dtype='float64', always_2d=True)
201 return data, float(rate)
204def load_wavefile(filepath):
205 """Load audio file using wavefile (based on libsndfile).
207 Documentation
208 -------------
209 https://github.com/vokimon/python-wavefile
211 Parameters
212 ----------
213 filepath: str
214 The full path and name of the file to load.
216 Returns
217 -------
218 data: ndarray
219 All data traces as an 2-D ndarray, first dimension is time, second is channel.
220 rate: float
221 The sampling rate of the data in Hertz.
223 Raises
224 ------
225 ImportError
226 The wavefile module is not installed.
227 *
228 Loading of the data failed.
229 """
230 if not audio_modules['wavefile']:
231 raise ImportError
233 rate, data = wavefile.load(filepath)
234 return data.astype(np.float64, copy=False).T, float(rate)
237def load_audioread(filepath):
238 """Load audio file using audioread.
240 Documentation
241 -------------
242 https://github.com/beetbox/audioread
244 Parameters
245 ----------
246 filepath: str
247 The full path and name of the file to load.
249 Returns
250 -------
251 data: ndarray
252 All data traces as an 2-D ndarray, first dimension is time, second is channel.
253 rate: float
254 The sampling rate of the data in Hertz.
256 Raises
257 ------
258 ImportError
259 The audioread module is not installed.
260 *
261 Loading of the data failed.
262 """
263 if not audio_modules['audioread']:
264 raise ImportError
266 data = np.array([])
267 rate = 0.0
268 with audioread.audio_open(filepath) as af:
269 rate = af.samplerate
270 data = np.zeros((int(np.ceil(af.samplerate*af.duration)), af.channels),
271 dtype="<i2")
272 index = 0
273 for buffer in af:
274 fulldata = np.frombuffer(buffer, dtype='<i2').reshape(-1, af.channels)
275 n = fulldata.shape[0]
276 if index + n > len(data):
277 n = len(fulldata) - index
278 if n <= 0:
279 break
280 data[index:index+n,:] = fulldata[:n,:]
281 index += n
282 return data/(2.0**15-1.0), float(rate)
285audio_loader_funcs = (
286 ('soundfile', load_soundfile),
287 ('wave', load_wave),
288 ('wavefile', load_wavefile),
289 ('ewave', load_ewave),
290 ('scipy.io.wavfile', load_wavfile),
291 ('audioread', load_audioread),
292 )
293"""List of implemented load() functions.
295Each element of the list is a tuple with the module's name and its
296load() function.
298"""
301def load_audio(filepath, verbose=0):
302 """Call this function to load all channels of audio data from a file.
304 This function tries different python modules to load the audio file.
306 Parameters
307 ----------
308 filepath: str
309 The full path and name of the file to load.
310 verbose: int
311 If larger than zero show detailed error/warning messages.
313 Returns
314 -------
315 data: ndarray
316 All data traces as an 2-D ndarray, even for single channel data.
317 First dimension is time, second is channel.
318 Data values range maximally between -1 and 1.
319 rate: float
320 The sampling rate of the data in Hertz.
322 Raises
323 ------
324 ValueError
325 Empty `filepath`.
326 FileNotFoundError
327 `filepath` is not an existing file.
328 EOFError
329 File size of `filepath` is zero.
330 IOError
331 Failed to load data.
333 Examples
334 --------
335 ```
336 import matplotlib.pyplot as plt
337 from audioio import load_audio
339 data, rate = load_audio('some/audio.wav')
340 plt.plot(np.arange(len(data))/rate, data[:,0])
341 plt.show()
342 ```
343 """
344 # check values:
345 if filepath is None or len(filepath) == 0:
346 raise ValueError('input argument filepath is empty string!')
347 if not os.path.isfile(filepath):
348 raise FileNotFoundError(f'file "{filepath}" not found')
349 if os.path.getsize(filepath) <= 0:
350 raise EOFError(f'file "{filepath}" is empty (size=0)!')
352 # load an audio file by trying various modules:
353 not_installed = []
354 errors = [f'failed to load data from file "{filepath}":']
355 for lib, load_file in audio_loader_funcs:
356 if not audio_modules[lib]:
357 if verbose > 1:
358 print(f'unable to load data from file "{filepath}" using {lib} module: module not available')
359 not_installed.append(lib)
360 continue
361 try:
362 data, rate = load_file(filepath)
363 if len(data) > 0:
364 if verbose > 0:
365 print(f'loaded data from file "{filepath}" using {lib} module')
366 if verbose > 1:
367 print(f' sampling rate: {rate:g} Hz')
368 print(f' channels : {data.shape[1]}')
369 print(f' frames : {len(data)}')
370 return data, rate
371 except Exception as e:
372 errors.append(f' {lib} failed: {str(e)}')
373 if verbose > 1:
374 print(errors[-1])
375 if len(not_installed) > 0:
376 errors.append('\n You may need to install one of the ' + \
377 ', '.join(not_installed) + ' packages.')
378 raise IOError('\n'.join(errors))
379 return np.zeros(0), 0.0
382def metadata(filepath, store_empty=False):
383 """Read metadata of an audio file.
385 Parameters
386 ----------
387 filepath: str or file handle
388 The audio file from which to read metadata.
389 store_empty: bool
390 If `False` do not return meta data with empty values.
392 Returns
393 -------
394 meta_data: nested dict
395 Meta data contained in the audio file. Keys of the nested
396 dictionaries are always strings. If the corresponding values
397 are dictionaries, then the key is the section name of the
398 metadata contained in the dictionary. All other types of
399 values are values for the respective key. In particular they
400 are strings. But other types like for example ints or floats
401 are also allowed. See `audioio.audiometadata` module for
402 available functions to work with such metadata.
404 Examples
405 --------
406 ```
407 from audioio import metadata, print_metadata
408 md = metadata('data.wav')
409 print_metadata(md)
410 ```
412 """
413 try:
414 return metadata_riff(filepath, store_empty)
415 except ValueError: # not a RIFF file
416 return {}
419def markers(filepath):
420 """ Read markers of an audio file.
422 See `audioio.audiomarkers` module for available functions
423 to work with markers.
425 Parameters
426 ----------
427 filepath: str or file handle
428 The audio file.
430 Returns
431 -------
432 locs: 2-D ndarray of int
433 Marker positions (first column) and spans (second column)
434 for each marker (rows).
435 labels: 2-D ndarray of string objects
436 Labels (first column) and texts (second column)
437 for each marker (rows).
439 Examples
440 --------
441 ```
442 from audioio import markers, print_markers
443 locs, labels = markers('data.wav')
444 print_markers(locs, labels)
445 ```
446 """
447 try:
448 return markers_riff(filepath)
449 except ValueError: # not a RIFF file
450 return np.zeros((0, 2), dtype=int), np.zeros((0, 2), dtype=object)
453class AudioLoader(BufferedArray):
454 """Buffered reading of audio data for random access of the data in the file.
456 The class allows for reading very large audio files or many
457 sequential audio files that do not fit into memory.
458 An AudioLoader instance can be used like a huge read-only numpy array, i.e.
459 ```
460 data = AudioLoader('path/to/audio/file.wav')
461 x = data[10000:20000,0]
462 ```
463 The first index specifies the frame, the second one the channel.
465 Behind the scenes, `AudioLoader` tries to open the audio file with
466 all available audio modules until it succeeds (first line). It
467 then reads data from the file as necessary for the requested data
468 (second line). Accesing the content of the audio files via a
469 buffer that holds only a part of the data is managed by the
470 `BufferedArray` class.
472 Reading sequentially through the file is always possible. Some
473 modules, however, (e.g. audioread, needed for mp3 files) can only
474 read forward. If previous data are requested, then the file is read
475 from the beginning again. This slows down access to previous data
476 considerably. Use the `backsize` argument of the open function to
477 make sure some data are loaded into the buffer before the requested
478 frame. Then a subsequent access to the data within `backsize` seconds
479 before that frame can still be handled without the need to reread
480 the file from the beginning.
482 Usage
483 -----
484 With context management:
485 ```
486 import audioio as aio
487 with aio.AudioLoader(filepath, 60.0, 10.0) as data:
488 # do something with the content of the file:
489 x = data[0:10000]
490 y = data[10000:20000]
491 z = x + y
492 ```
494 For using a specific audio module, here the audioread module:
495 ```
496 data = aio.AudioLoader()
497 with data.open_audioread(filepath, 60.0, 10.0):
498 # do something ...
499 ```
501 Use `blocks()` for sequential, blockwise reading and processing:
502 ```
503 from scipy.signal import spectrogram
504 nfft = 2048
505 with aio.AudioLoader('some/audio.wav') as data:
506 for x in data.blocks(100*nfft, nfft//2):
507 f, t, Sxx = spectrogram(x, fs=data.rate,
508 nperseg=nfft, noverlap=nfft//2)
509 ```
511 For loop iterates over single frames (1-D arrays containing samples for each channel):
512 ```
513 with aio.AudioLoader('some/audio.wav') as data:
514 for x in data:
515 print(x)
516 ```
518 Traditional open and close:
519 ```
520 data = aio.AudioLoader(filepath, 60.0)
521 x = data[:,:] # read the whole file
522 data.close()
523 ```
525 this is the same as:
526 ```
527 data = aio.AudioLoader()
528 data.open(filepath, 60.0)
529 ...
530 ```
532 Classes inheriting AudioLoader just need to implement
533 ```
534 self.load_audio_buffer(offset, nsamples, pbuffer)
535 ```
536 This function needs to load the supplied `pbuffer` with
537 `nframes` frames of data starting at frame `offset`.
539 In the constructor or some kind of opening function, you need to
540 set some member variables, as described for `BufferedArray`.
542 For loading metadata and markers, implement the functions
543 ```
544 self._load_metadata(filepath, **kwargs)
545 self._load_markers(filepath)
546 ```
548 Parameters
549 ----------
550 filepath: str or list of str
551 Name of the file or list of many file names that should be
552 made accessible as a single array.
553 buffersize: float
554 Size of internal buffer in seconds.
555 backsize: float
556 Part of the buffer to be loaded before the requested start index in seconds.
557 verbose: int
558 If larger than zero show detailed error/warning messages.
559 store_empty: bool
560 If `False` do not return meta data with empty values.
562 Attributes
563 ----------
564 filepath: str
565 Name and path of the opened file. In case of many files, the first one.
566 file_paths: list of str
567 List of pathes of the opened files that are made accessible
568 as a single array.
569 file_indices: list of int
570 For each file the index of its first sample.
571 rate: float
572 The sampling rate of the data in seconds.
573 channels: int
574 The number of channels.
575 frames: int
576 The number of frames in the file. Same as `len()`.
577 format: str or None
578 Format of the audio file.
579 encoding: str or None
580 Encoding/subtype of the audio file.
581 shape: tuple
582 Frames and channels of the data.
583 ndim: int
584 Number of dimensions: always 2 (frames and channels).
585 offset: int
586 Index of first frame in the current buffer.
587 buffer: ndarray of floats
588 The curently available data from the file.
589 ampl_min: float
590 Minimum amplitude the file format supports.
591 Always -1.0 for audio data.
592 ampl_max: float
593 Maximum amplitude the file format supports.
594 Always +1.0 for audio data.
596 Methods
597 -------
598 - `len()`: Number of frames.
599 - `file_start_times()`: time of first frame of each file in seconds.
600 - `get_file_index()`: file path and index of frame contained by this file.
601 - `open()`: Open an audio file by trying available audio modules.
602 - `open_*()`: Open an audio file with the respective audio module.
603 - `__getitem__`: Access data of the audio file.
604 - `update_buffer()`: Update the internal buffer for a range of frames.
605 - `blocks()`: Generator for blockwise processing of AudioLoader data.
606 - `format_dict()`: technical infos about how the data are stored.
607 - `metadata()`: Metadata stored along with the audio data.
608 - `markers()`: Markers stored along with the audio data.
609 - `set_unwrap()`: Set parameters for unwrapping clipped data.
610 - `close()`: Close the file.
612 """
614 max_open_files = 5
615 """ Suggestion for maximum number of open file descriptors. """
617 max_open_loaders = 10
618 """ Suggestion for maximum number of AudioLoaders when opening multiple files. """
620 def __init__(self, filepath=None, buffersize=10.0, backsize=0.0,
621 verbose=0, **meta_kwargs):
622 super().__init__(verbose=verbose)
623 self.format = None
624 self.encoding = None
625 self._metadata = None
626 self._locs = None
627 self._labels = None
628 self._load_metadata = metadata
629 self._load_markers = markers
630 self._metadata_kwargs = meta_kwargs
631 self.filepath = None
632 self.file_paths = None
633 self.file_indices = []
634 self.sf = None
635 self.close = self._close
636 self.load_buffer = self._load_buffer_unwrap
637 self.ampl_min = -1.0
638 self.ampl_max = +1.0
639 self.unwrap = False
640 self.unwrap_thresh = 0.0
641 self.unwrap_clips = False
642 self.unwrap_ampl = 1.0
643 self.unwrap_downscale = True
644 if filepath is not None:
645 self.open(filepath, buffersize, backsize, verbose)
647 numpy_encodings = {np.dtype(np.int64): 'PCM_64',
648 np.dtype(np.int32): 'PCM_32',
649 np.dtype(np.int16): 'PCM_16',
650 np.dtype(np.single): 'FLOAT',
651 np.dtype(np.double): 'DOUBLE',
652 np.dtype('>f4'): 'FLOAT',
653 np.dtype('>f8'): 'DOUBLE'}
654 """ Map numpy dtypes to encodings.
655 """
657 def _close(self):
658 pass
660 def __del__(self):
661 self.close()
663 def file_start_times(self):
664 """ Time of first frame of each file in seconds.
666 Returns
667 -------
668 times: array of float
669 Time of the first frame of each file relative to buffer start
670 in seconds.
671 """
672 times = []
673 for idx in self.file_indices:
674 times.append(idx/self.rate)
675 return np.array(times)
677 def get_file_index(self, frame):
678 """ File path and index of frame contained by this file.
680 Parameters
681 ----------
682 frame: int
683 Index of frame.
685 Returns
686 -------
687 filepath: str
688 Path of file that contains the frame.
689 index: int
690 Index of the frame relative to the first frame
691 in the containing file.
692 """
693 if frame < 0 or frame >= self.frames:
694 raise ValueError('invalid frame')
695 fname = self.file_paths[0]
696 index = self.file_indices[0]
697 for i in reversed(range(len(self.file_indices))):
698 if self.file_indices[i] <= frame:
699 fname = self.file_paths[i]
700 index = self.file_indices[i]
701 break
702 return fname, frame - index
704 def format_dict(self):
705 """ Technical infos about how the data are stored in the file.
707 Returns
708 -------
709 fmt: dict
710 Dictionary with filepath, format, encoding, samplingrate,
711 channels, frames, and duration of the audio file as strings.
713 """
714 fmt = dict(filepath=self.filepath)
715 if self.format is not None:
716 fmt['format'] = self.format
717 if self.encoding is not None:
718 fmt['encoding'] = self.encoding
719 fmt.update(dict(samplingrate=f'{self.rate:.0f}Hz',
720 channels=self.channels,
721 frames=self.frames,
722 duration=f'{self.frames/self.rate:.3f}s'))
723 return fmt
725 def metadata(self):
726 """Metadata of the audio file.
728 Parameters
729 ----------
730 store_empty: bool
731 If `False` do not add meta data with empty values.
733 Returns
734 -------
735 meta_data: nested dict
737 Meta data contained in the audio file. Keys of the nested
738 dictionaries are always strings. If the corresponding
739 values are dictionaries, then the key is the section name
740 of the metadata contained in the dictionary. All other
741 types of values are values for the respective key. In
742 particular they are strings. But other types like for
743 example ints or floats are also allowed. See
744 `audioio.audiometadata` module for available functions to
745 work with such metadata.
747 """
748 if self._metadata is None:
749 if self._load_metadata is None:
750 self._metadata = {}
751 else:
752 self._metadata = self._load_metadata(self.filepath,
753 **self._metadata_kwargs)
754 return self._metadata
756 def markers(self):
757 """Read markers of the audio file.
759 See `audioio.audiomarkers` module for available functions
760 to work with markers.
762 Returns
763 -------
764 locs: 2-D ndarray of int
765 Marker positions (first column) and spans (second column)
766 for each marker (rows).
767 labels: 2-D ndarray of str objects
768 Labels (first column) and texts (second column)
769 for each marker (rows).
770 """
771 if self._locs is None:
772 if self._load_markers is None:
773 self._locs = np.zeros((0, 2), dtype=int)
774 self._labels = np.zeros((0, 2), dtype=object)
775 else:
776 self._locs, self._labels = self._load_markers(self.filepath)
777 return self._locs, self._labels
779 def set_unwrap(self, thresh, clips=False, down_scale=True, unit=''):
780 """Set parameters for unwrapping clipped data.
782 See unwrap() function from the audioio package.
784 Parameters
785 ----------
786 thresh: float
787 Threshold for detecting wrapped data relative to self.unwrap_ampl
788 which is initially set to self.ampl_max.
789 If zero, do not unwrap.
790 clips: bool
791 If True, then clip the unwrapped data properly.
792 Otherwise, unwrap the data and double the
793 minimum and maximum data range
794 (self.ampl_min and self.ampl_max).
795 down_scale: bool
796 If not `clips`, then downscale the signal by a factor of two,
797 in order to keep the range between -1 and 1.
798 unit: str
799 Unit of the data.
800 """
801 self.unwrap_ampl = self.ampl_max
802 self.unwrap_thresh = thresh
803 self.unwrap_clips = clips
804 self.unwrap_down_scale = down_scale
805 self.unwrap = thresh > 1e-3
806 if self.unwrap:
807 if self.unwrap_clips:
808 add_unwrap(self.metadata(),
809 self.unwrap_thresh*self.unwrap_ampl,
810 self.unwrap_ampl, unit)
811 elif down_scale:
812 update_gain(self.metadata(), 0.5)
813 add_unwrap(self.metadata(),
814 0.5*self.unwrap_thresh*self.unwrap_ampl,
815 0.0, unit)
816 else:
817 self.ampl_min *= 2
818 self.ampl_max *= 2
819 add_unwrap(self.metadata(),
820 self.unwrap_thresh*self.unwrap_ampl,
821 0.0, unit)
823 def _load_buffer_unwrap(self, r_offset, r_size, pbuffer):
824 """Load new data and unwrap it.
826 Parameters
827 ----------
828 r_offset: int
829 First frame to be read from file.
830 r_size: int
831 Number of frames to be read from file.
832 pbuffer: ndarray
833 Buffer where to store the loaded data.
834 """
835 self.load_audio_buffer(r_offset, r_size, pbuffer)
836 if self.unwrap:
837 # TODO: handle edge effects!
838 unwrap(pbuffer, self.unwrap_thresh, self.unwrap_ampl)
839 if self.unwrap_clips:
840 pbuffer[pbuffer > self.ampl_max] = self.ampl_max
841 pbuffer[pbuffer < self.ampl_min] = self.ampl_min
842 elif self.unwrap_down_scale:
843 pbuffer *= 0.5
846 # wave interface:
847 def open_wave(self, filepath, buffersize=10.0, backsize=0.0,
848 verbose=0):
849 """Open audio file for reading using the wave module.
851 Note: we assume that setpos() and tell() use integer numbers!
853 Parameters
854 ----------
855 filepath: str
856 Name of the file.
857 buffersize: float
858 Size of internal buffer in seconds.
859 backsize: float
860 Part of the buffer to be loaded before the requested start index in seconds.
861 verbose: int
862 If larger than zero show detailed error/warning messages.
864 Raises
865 ------
866 ImportError
867 The wave module is not installed
868 """
869 self.verbose = verbose
870 if self.verbose > 0:
871 print(f'open_wave(filepath) with filepath={filepath}')
872 if not audio_modules['wave']:
873 self.rate = 0.0
874 self.channels = 0
875 self.frames = 0
876 self.size = 0
877 self.shape = (0, 0)
878 self.offset = 0
879 raise ImportError
880 if self.sf is not None:
881 self._close_wave()
882 self.sf = wave.open(filepath, 'r')
883 self.filepath = filepath
884 self.file_paths = [filepath]
885 self.file_indices = [0]
886 self.rate = float(self.sf.getframerate())
887 self.format = 'WAV'
888 sampwidth = self.sf.getsampwidth()
889 if sampwidth == 1:
890 self.dtype = 'u1'
891 self.encoding = 'PCM_U8'
892 else:
893 self.dtype = f'i{sampwidth}'
894 self.encoding = f'PCM_{sampwidth*8}'
895 self.factor = 1.0/(2.0**(sampwidth*8-1))
896 self.channels = self.sf.getnchannels()
897 self.frames = self.sf.getnframes()
898 self.shape = (self.frames, self.channels)
899 self.size = self.frames * self.channels
900 self.bufferframes = int(buffersize*self.rate)
901 self.backframes = int(backsize*self.rate)
902 self.init_buffer()
903 self.close = self._close_wave
904 self.load_audio_buffer = self._load_buffer_wave
905 # read 1 frame to determine the unit of the position values:
906 self.p0 = self.sf.tell()
907 self.sf.readframes(1)
908 self.pfac = self.sf.tell() - self.p0
909 self.sf.setpos(self.p0)
910 return self
912 def _close_wave(self):
913 """Close the audio file using the wave module. """
914 if self.sf is not None:
915 self.sf.close()
916 self.sf = None
918 def _load_buffer_wave(self, r_offset, r_size, buffer):
919 """Load new data from file using the wave module.
921 Parameters
922 ----------
923 r_offset: int
924 First frame to be read from file.
925 r_size: int
926 Number of frames to be read from file.
927 buffer: ndarray
928 Buffer where to store the loaded data.
929 """
930 if self.sf is None:
931 self.sf = wave.open(self.filepath, 'r')
932 self.sf.setpos(r_offset*self.pfac + self.p0)
933 fbuffer = self.sf.readframes(r_size)
934 fbuffer = np.frombuffer(fbuffer, dtype=self.dtype).reshape((-1, self.channels))
935 if self.dtype[0] == 'u':
936 buffer[:, :] = fbuffer * self.factor - 1.0
937 else:
938 buffer[:, :] = fbuffer * self.factor
941 # ewave interface:
942 def open_ewave(self, filepath, buffersize=10.0, backsize=0.0,
943 verbose=0):
944 """Open audio file for reading using the ewave module.
946 Parameters
947 ----------
948 filepath: str
949 Name of the file.
950 buffersize: float
951 Size of internal buffer in seconds.
952 backsize: float
953 Part of the buffer to be loaded before the requested start index in seconds.
954 verbose: int
955 If larger than zero show detailed error/warning messages.
957 Raises
958 ------
959 ImportError
960 The ewave module is not installed.
961 """
962 self.verbose = verbose
963 if self.verbose > 0:
964 print(f'open_ewave(filepath) with filepath={filepath}')
965 if not audio_modules['ewave']:
966 self.rate = 0.0
967 self.channels = 0
968 self.frames = 0
969 self.shape = (0, 0)
970 self.size = 0
971 self.offset = 0
972 raise ImportError
973 if self.sf is not None:
974 self._close_ewave()
975 self.sf = ewave.open(filepath, 'r')
976 self.filepath = filepath
977 self.file_paths = [filepath]
978 self.file_indices = [0]
979 self.rate = float(self.sf.sampling_rate)
980 self.channels = self.sf.nchannels
981 self.frames = self.sf.nframes
982 self.shape = (self.frames, self.channels)
983 self.size = self.frames * self.channels
984 self.format = 'WAV' # or WAVEX?
985 self.encoding = self.numpy_encodings[self.sf.dtype]
986 self.bufferframes = int(buffersize*self.rate)
987 self.backframes = int(backsize*self.rate)
988 self.init_buffer()
989 self.close = self._close_ewave
990 self.load_audio_buffer = self._load_buffer_ewave
991 return self
993 def _close_ewave(self):
994 """Close the audio file using the ewave module. """
995 if self.sf is not None:
996 del self.sf
997 self.sf = None
999 def _load_buffer_ewave(self, r_offset, r_size, buffer):
1000 """Load new data from file using the ewave module.
1002 Parameters
1003 ----------
1004 r_offset: int
1005 First frame to be read from file.
1006 r_size: int
1007 Number of frames to be read from file.
1008 buffer: ndarray
1009 Buffer where to store the loaded data.
1010 """
1011 if self.sf is None:
1012 self.sf = ewave.open(self.filepath, 'r')
1013 fbuffer = self.sf.read(frames=r_size, offset=r_offset, memmap='r')
1014 fbuffer = ewave.rescale(fbuffer, 'float')
1015 if len(fbuffer.shape) == 1:
1016 fbuffer = np.reshape(fbuffer,(-1, 1))
1017 buffer[:,:] = fbuffer
1020 # soundfile interface:
1021 def open_soundfile(self, filepath, buffersize=10.0, backsize=0.0,
1022 verbose=0):
1023 """Open audio file for reading using the SoundFile module.
1025 Parameters
1026 ----------
1027 filepath: str
1028 Name of the file.
1029 bufferframes: float
1030 Size of internal buffer in seconds.
1031 backsize: float
1032 Part of the buffer to be loaded before the requested start index in seconds.
1033 verbose: int
1034 If larger than zero show detailed error/warning messages.
1036 Raises
1037 ------
1038 ImportError
1039 The SoundFile module is not installed
1040 """
1041 self.verbose = verbose
1042 if self.verbose > 0:
1043 print(f'open_soundfile(filepath) with filepath={filepath}')
1044 if not audio_modules['soundfile']:
1045 self.rate = 0.0
1046 self.channels = 0
1047 self.frames = 0
1048 self.shape = (0, 0)
1049 self.size = 0
1050 self.offset = 0
1051 raise ImportError
1052 if self.sf is not None:
1053 self._close_soundfile()
1054 self.sf = soundfile.SoundFile(filepath, 'r')
1055 self.filepath = filepath
1056 self.file_paths = [filepath]
1057 self.file_indices = [0]
1058 self.rate = float(self.sf.samplerate)
1059 self.channels = self.sf.channels
1060 self.frames = 0
1061 self.size = 0
1062 if self.sf.seekable():
1063 self.frames = self.sf.seek(0, soundfile.SEEK_END)
1064 self.sf.seek(0, soundfile.SEEK_SET)
1065 # TODO: if not seekable, we cannot handle that file!
1066 self.shape = (self.frames, self.channels)
1067 self.size = self.frames * self.channels
1068 self.format = self.sf.format
1069 self.encoding = self.sf.subtype
1070 self.bufferframes = int(buffersize*self.rate)
1071 self.backframes = int(backsize*self.rate)
1072 self.init_buffer()
1073 self.close = self._close_soundfile
1074 self.load_audio_buffer = self._load_buffer_soundfile
1075 return self
1077 def _close_soundfile(self):
1078 """Close the audio file using the SoundFile module. """
1079 if self.sf is not None:
1080 self.sf.close()
1081 self.sf = None
1083 def _load_buffer_soundfile(self, r_offset, r_size, buffer):
1084 """Load new data from file using the SoundFile module.
1086 Parameters
1087 ----------
1088 r_offset: int
1089 First frame to be read from file.
1090 r_size: int
1091 Number of frames to be read from file.
1092 buffer: ndarray
1093 Buffer where to store the loaded data.
1094 """
1095 if self.sf is None:
1096 self.sf = soundfile.SoundFile(self.filepath, 'r')
1097 self.sf.seek(r_offset, soundfile.SEEK_SET)
1098 buffer[:, :] = self.sf.read(r_size, always_2d=True)
1101 # wavefile interface:
1102 def open_wavefile(self, filepath, buffersize=10.0, backsize=0.0,
1103 verbose=0):
1104 """Open audio file for reading using the wavefile module.
1106 Parameters
1107 ----------
1108 filepath: str
1109 Name of the file.
1110 bufferframes: float
1111 Size of internal buffer in seconds.
1112 backsize: float
1113 Part of the buffer to be loaded before the requested start index in seconds.
1114 verbose: int
1115 If larger than zero show detailed error/warning messages.
1117 Raises
1118 ------
1119 ImportError
1120 The wavefile module is not installed
1121 """
1122 self.verbose = verbose
1123 if self.verbose > 0:
1124 print(f'open_wavefile(filepath) with filepath={filepath}')
1125 if not audio_modules['wavefile']:
1126 self.rate = 0.0
1127 self.channels = 0
1128 self.frames = 0
1129 self.shape = (0, 0)
1130 self.size = 0
1131 self.offset = 0
1132 raise ImportError
1133 if self.sf is not None:
1134 self._close_wavefile()
1135 self.sf = wavefile.WaveReader(filepath)
1136 self.filepath = filepath
1137 self.file_paths = [filepath]
1138 self.file_indices = [0]
1139 self.rate = float(self.sf.samplerate)
1140 self.channels = self.sf.channels
1141 self.frames = self.sf.frames
1142 self.shape = (self.frames, self.channels)
1143 self.size = self.frames * self.channels
1144 # get format and encoding:
1145 for attr in dir(wavefile.Format):
1146 v = getattr(wavefile.Format, attr)
1147 if isinstance(v, int):
1148 if v & wavefile.Format.TYPEMASK > 0 and \
1149 (self.sf.format & wavefile.Format.TYPEMASK) == v:
1150 self.format = attr
1151 if v & wavefile.Format.SUBMASK > 0 and \
1152 (self.sf.format & wavefile.Format.SUBMASK) == v:
1153 self.encoding = attr
1154 # init buffer:
1155 self.bufferframes = int(buffersize*self.rate)
1156 self.backframes = int(backsize*self.rate)
1157 self.init_buffer()
1158 self.close = self._close_wavefile
1159 self.load_audio_buffer = self._load_buffer_wavefile
1160 return self
1162 def _close_wavefile(self):
1163 """Close the audio file using the wavefile module. """
1164 if self.sf is not None:
1165 self.sf.close()
1166 self.sf = None
1168 def _load_buffer_wavefile(self, r_offset, r_size, buffer):
1169 """Load new data from file using the wavefile module.
1171 Parameters
1172 ----------
1173 r_offset: int
1174 First frame to be read from file.
1175 r_size: int
1176 Number of frames to be read from file.
1177 buffer: ndarray
1178 Buffer where to store the loaded data.
1179 """
1180 if self.sf is None:
1181 self.sf = wavefile.WaveReader(self.filepath)
1182 self.sf.seek(r_offset, wavefile.Seek.SET)
1183 fbuffer = self.sf.buffer(r_size, dtype=self.buffer.dtype)
1184 self.sf.read(fbuffer)
1185 buffer[:,:] = fbuffer.T
1188 # audioread interface:
1189 def open_audioread(self, filepath, buffersize=10.0, backsize=0.0,
1190 verbose=0):
1191 """Open audio file for reading using the audioread module.
1193 Note, that audioread can only read forward, therefore random and
1194 backward access is really slow.
1196 Parameters
1197 ----------
1198 filepath: str
1199 Name of the file.
1200 bufferframes: float
1201 Size of internal buffer in seconds.
1202 backsize: float
1203 Part of the buffer to be loaded before the requested start index in seconds.
1204 verbose: int
1205 If larger than zero show detailed error/warning messages.
1207 Raises
1208 ------
1209 ImportError
1210 The audioread module is not installed
1211 """
1212 self.verbose = verbose
1213 if self.verbose > 0:
1214 print(f'open_audioread(filepath) with filepath={filepath}')
1215 if not audio_modules['audioread']:
1216 self.rate = 0.0
1217 self.channels = 0
1218 self.frames = 0
1219 self.shape = (0, 0)
1220 self.size = 0
1221 self.offset = 0
1222 raise ImportError
1223 if self.sf is not None:
1224 self._close_audioread()
1225 self.sf = audioread.audio_open(filepath)
1226 self.filepath = filepath
1227 self.file_paths = [filepath]
1228 self.file_indices = [0]
1229 self.rate = float(self.sf.samplerate)
1230 self.channels = self.sf.channels
1231 self.frames = int(np.ceil(self.rate*self.sf.duration))
1232 self.shape = (self.frames, self.channels)
1233 self.size = self.frames * self.channels
1234 self.bufferframes = int(buffersize*self.rate)
1235 self.backframes = int(backsize*self.rate)
1236 self.init_buffer()
1237 self.read_buffer = np.zeros((0,0))
1238 self.read_offset = 0
1239 self.close = self._close_audioread
1240 self.load_audio_buffer = self._load_buffer_audioread
1241 self.filepath = filepath
1242 self.sf_iter = self.sf.__iter__()
1243 return self
1245 def _close_audioread(self):
1246 """Close the audio file using the audioread module. """
1247 if self.sf is not None:
1248 self.sf.__exit__(None, None, None)
1249 self.sf = None
1251 def _load_buffer_audioread(self, r_offset, r_size, buffer):
1252 """Load new data from file using the audioread module.
1254 audioread can only iterate through a file once and in blocksizes that are
1255 given by audioread. Therefore we keep yet another buffer: `self.read_buffer`
1256 at file offset `self.read_offset` containing whatever audioread returned.
1258 Parameters
1259 ----------
1260 r_offset: int
1261 First frame to be read from file.
1262 r_size: int
1263 Number of frames to be read from file.
1264 buffer: ndarray
1265 Buffer where to store the loaded data.
1266 """
1267 if self.sf is None:
1268 self.sf = audioread.audio_open(self.filepath)
1269 b_offset = 0
1270 if ( self.read_offset + self.read_buffer.shape[0] >= r_offset + r_size
1271 and self.read_offset < r_offset + r_size ):
1272 # read_buffer overlaps at the end of the requested interval:
1273 i = 0
1274 n = r_offset + r_size - self.read_offset
1275 if n > r_size:
1276 i += n - r_size
1277 n = r_size
1278 buffer[self.read_offset+i-r_offset:self.read_offset+i+n-r_offset,:] = self.read_buffer[i:i+n,:] / (2.0**15-1.0)
1279 if self.verbose > 2:
1280 print(f' recycle {n:6d} frames from the front of the read buffer at {self.read_offset}-{self.read_offset+n} ({self.read_offset-self.offset}-{self.read_offset-self.offset+n} in buffer)')
1281 r_size -= n
1282 if r_size <= 0:
1283 return
1284 # go back to beginning of file:
1285 if r_offset < self.read_offset:
1286 if self.verbose > 2:
1287 print(' rewind')
1288 self._close_audioread()
1289 self.sf = audioread.audio_open(self.filepath)
1290 self.sf_iter = self.sf.__iter__()
1291 self.read_buffer = np.zeros((0,0))
1292 self.read_offset = 0
1293 # read to position:
1294 while self.read_offset + self.read_buffer.shape[0] < r_offset:
1295 self.read_offset += self.read_buffer.shape[0]
1296 try:
1297 if hasattr(self.sf_iter, 'next'):
1298 fbuffer = self.sf_iter.next()
1299 else:
1300 fbuffer = next(self.sf_iter)
1301 except StopIteration:
1302 self.read_buffer = np.zeros((0,0))
1303 buffer[:,:] = 0.0
1304 if self.verbose > 1:
1305 print(f' caught StopIteration, padded buffer with {r_size} zeros')
1306 break
1307 self.read_buffer = np.frombuffer(fbuffer, dtype='<i2').reshape(-1, self.channels)
1308 if self.verbose > 2:
1309 print(f' read forward by {self.read_buffer.shape[0]} frames')
1310 # recycle file data:
1311 if ( self.read_offset + self.read_buffer.shape[0] > r_offset
1312 and self.read_offset <= r_offset ):
1313 i = r_offset - self.read_offset
1314 n = self.read_offset + self.read_buffer.shape[0] - r_offset
1315 if n > r_size:
1316 n = r_size
1317 buffer[:n,:] = self.read_buffer[i:i+n,:] / (2.0**15-1.0)
1318 if self.verbose > 2:
1319 print(f' recycle {n:6d} frames from the end of the read buffer at {self.read_offset}-{self.read_offset + self.read_buffer.shape[0]} to {r_offset}-{r_offset+n} ({r_offset-self.offset}-{r_offset+n-self.offset} in buffer)')
1320 b_offset += n
1321 r_offset += n
1322 r_size -= n
1323 # read data:
1324 if self.verbose > 2 and r_size > 0:
1325 print(f' read {r_size:6d} frames at {r_offset}-{r_offset+r_size} ({r_offset-self.offset}-{r_offset+r_size-self.offset} in buffer)')
1326 while r_size > 0:
1327 self.read_offset += self.read_buffer.shape[0]
1328 try:
1329 if hasattr(self.sf_iter, 'next'):
1330 fbuffer = self.sf_iter.next()
1331 else:
1332 fbuffer = next(self.sf_iter)
1333 except StopIteration:
1334 self.read_buffer = np.zeros((0,0))
1335 buffer[b_offset:,:] = 0.0
1336 if self.verbose > 1:
1337 print(f' caught StopIteration, padded buffer with {r_size} zeros')
1338 break
1339 self.read_buffer = np.frombuffer(fbuffer, dtype='<i2').reshape(-1, self.channels)
1340 n = self.read_buffer.shape[0]
1341 if n > r_size:
1342 n = r_size
1343 if n > 0:
1344 buffer[b_offset:b_offset+n,:] = self.read_buffer[:n,:] / (2.0**15-1.0)
1345 if self.verbose > 2:
1346 print(f' read {n:6d} frames to {r_offset}-{r_offset+n} ({r_offset-self.offset}-{r_offset+n-self.offset} in buffer)')
1347 b_offset += n
1348 r_offset += n
1349 r_size -= n
1352 # open multiple audio files as one:
1353 def open_multiple(self, filepaths, buffersize=10.0, backsize=0.0,
1354 verbose=0, rate=None, channels=None, end_indices=None):
1355 """Open multiple audio files as a single concatenated array.
1357 Parameters
1358 ----------
1359 filepaths: list of str
1360 List of file names of audio files.
1361 buffersize: float
1362 Size of internal buffer in seconds.
1363 backsize: float
1364 Part of the buffer to be loaded before the requested start index in seconds.
1365 verbose: int
1366 If larger than zero show detailed error/warning messages.
1367 rate: float
1368 If provided, do a minimal initialization (no checking)
1369 using the provided sampling rate (in Hertz), channels,
1370 and end_indices.
1371 channels: int
1372 If provided, do a minimal initialization (no checking)
1373 using the provided rate, number of channels, and end_indices.
1374 end_indices: sequence of int
1375 If provided, do a minimal initialization (no checking)
1376 using the provided rate, channels, and end_indices.
1378 Raises
1379 ------
1380 TypeError
1381 `filepaths` must be a sequence.
1382 ValueError
1383 Empty `filepaths`.
1384 FileNotFoundError
1385 `filepaths` does not contain a single valid file.
1387 """
1388 if not isinstance(filepaths, (list, tuple, np.ndarray)):
1389 raise TypeError('input argument filepaths is not a sequence!')
1390 if len(filepaths) == 0:
1391 raise ValueError('input argument filepaths is empy sequence!')
1392 self.buffersize = buffersize
1393 self.backsize = backsize
1394 self.filepath = None
1395 self.file_paths = []
1396 self.open_files = []
1397 self.open_loaders = []
1398 self.audio_files = []
1399 self.collect_counter = 0
1400 self.frames = 0
1401 self.start_indices = []
1402 self.end_indices = []
1403 self.start_time = None
1404 start_time = None
1405 self._metadata = {}
1406 self._locs = np.zeros((0, 2), dtype=int)
1407 self._labels = np.zeros((0, 2), dtype=object)
1408 if end_indices is not None:
1409 self.filepath = filepaths[0]
1410 self.file_paths = filepaths
1411 self.audio_files = [None] * len(filepaths)
1412 self.frames = end_indices[-1]
1413 self.start_indices = [0] + list(end_indices[:-1])
1414 self.end_indices = end_indices
1415 self.format = None
1416 self.encoding = None
1417 self.rate = rate
1418 self.channels = channels
1419 else:
1420 for filepath in filepaths:
1421 try:
1422 a = AudioLoader(filepath, buffersize, backsize, verbose)
1423 except Exception as e:
1424 if verbose > 0:
1425 print(e)
1426 continue
1427 # collect metadata:
1428 md = a.metadata()
1429 fmd = flatten_metadata(md, True)
1430 add_metadata(self._metadata, fmd)
1431 if self.filepath is None:
1432 # first file:
1433 self.filepath = a.filepath
1434 self.format = a.format
1435 self.encoding = a.encoding
1436 self.rate = a.rate
1437 self.channels = a.channels
1438 self.start_time = get_datetime(md)
1439 start_time = self.start_time
1440 else:
1441 # check channels and rate:
1442 error_str = None
1443 if a.channels != self.channels:
1444 error_str = f'number of channels differs: ' \
1445 f'{a.channels} in {a.filepath} versus ' \
1446 f'{self.channels} in {self.filepath}'
1447 if a.rate != self.rate:
1448 error_str = f'sampling rates differ: ' \
1449 f'{a.rate} in {a.filepath} versus ' \
1450 f'{self.rate} in {self.filepath}'
1451 # check start time of recording:
1452 stime = get_datetime(md)
1453 if start_time is None or stime is None or \
1454 abs(start_time - stime) > timedelta(seconds=1):
1455 error_str = f'start time does not indicate continuous recording: ' \
1456 f'expected {start_time} instead of ' \
1457 f'{stime} in {a.filepath}'
1458 if error_str is not None:
1459 if verbose > 0:
1460 print(error_str)
1461 a.close()
1462 del a
1463 break
1464 # markers:
1465 locs, labels = a.markers()
1466 locs[:,0] += self.frames
1467 self._locs = np.vstack((self._locs, locs))
1468 self._labels = np.vstack((self._labels, labels))
1469 # indices:
1470 self.start_indices.append(self.frames)
1471 self.frames += a.frames
1472 self.end_indices.append(self.frames)
1473 if start_time is not None:
1474 start_time += timedelta(seconds=a.frames/a.rate)
1475 # add file to lists:
1476 self.file_paths.append(filepath)
1477 if len(self.open_files) < AudioLoader.max_open_files:
1478 self.open_files.append(a)
1479 else:
1480 a.close()
1481 if len(self.open_loaders) < AudioLoader.max_open_loaders:
1482 self.audio_files.append(a)
1483 self.open_loaders.append(a)
1484 else:
1485 a.close()
1486 del a
1487 self.audio_files.append(None)
1488 if len(self.audio_files) == 0:
1489 raise FileNotFoundError('input argument filepaths does not contain any valid audio file!')
1490 # set startime from first file:
1491 if self.start_time is not None:
1492 set_starttime(self._metadata, self.start_time)
1493 # setup infrastructure:
1494 self.file_indices = self.start_indices
1495 self.start_indices = np.array(self.start_indices)
1496 self.end_indices = np.array(self.end_indices)
1497 self.shape = (self.frames, self.channels)
1498 self.bufferframes = int(buffersize*self.rate)
1499 self.backframes = int(backsize*self.rate)
1500 self.init_buffer()
1501 self.close = self._close_multiple
1502 self.load_audio_buffer = self._load_buffer_multiple
1503 self._load_metadata = None
1504 self._load_markers = None
1505 return self
1507 def _close_multiple(self):
1508 """Close all the audio files. """
1509 self.open_files = []
1510 self.open_loaders = []
1511 if hasattr(self, 'audio_files'):
1512 for a in self.audio_files:
1513 if a is not None:
1514 a.close()
1515 self.audio_files = []
1516 self.filepath = None
1517 self.file_paths = []
1518 self.file_indices = []
1519 self.start_indices = []
1520 self.end_indices = []
1521 del self.audio_files
1522 del self.open_files
1523 del self.open_loaders
1524 del self.start_indices
1525 del self.end_indices
1527 def _load_buffer_multiple(self, r_offset, r_size, buffer):
1528 """Load new data from the underlying files.
1530 Parameters
1531 ----------
1532 r_offset: int
1533 First frame to be read from file.
1534 r_size: int
1535 Number of frames to be read from file.
1536 buffer: ndarray
1537 Buffer where to store the loaded data.
1538 """
1539 offs = r_offset
1540 size = r_size
1541 boffs = 0
1542 ai = np.searchsorted(self.end_indices, offs, side='right')
1543 while size > 0:
1544 if self.audio_files[ai] is None:
1545 a = AudioLoader(self.file_paths[ai],
1546 self.buffersize, self.backsize, 0)
1547 self.audio_files[ai] = a
1548 self.open_loaders.append(a)
1549 self.open_files.append(a)
1550 if len(self.open_files) > AudioLoader.max_open_files:
1551 a0 = self.open_files.pop(0)
1552 a0.close()
1553 if len(self.open_loaders) > AudioLoader.max_open_loaders:
1554 a0 = self.open_loaders.pop(0)
1555 self.audio_files[self.audio_files.index(a0)] = None
1556 a0.close()
1557 del a0
1558 self.collect_counter += 1
1559 if self.collect_counter > AudioLoader.max_open_loaders//2:
1560 gc.collect()
1561 self.collect_counter = 0
1562 else:
1563 self.open_loaders.pop(self.open_loaders.index(self.audio_files[ai]))
1564 self.open_loaders.append(self.audio_files[ai])
1565 ai0 = offs - self.start_indices[ai]
1566 ai1 = offs + size
1567 if ai1 > self.end_indices[ai]:
1568 ai1 = self.end_indices[ai]
1569 ai1 -= self.start_indices[ai]
1570 n = ai1 - ai0
1571 self.audio_files[ai].load_audio_buffer(ai0, n,
1572 buffer[boffs:boffs + n,:])
1573 if self.audio_files[ai] in self.open_files:
1574 self.open_files.pop(self.open_files.index(self.audio_files[ai]))
1575 self.open_files.append(self.audio_files[ai])
1576 if len(self.open_files) > AudioLoader.max_open_files:
1577 self.open_files[0].close()
1578 self.open_files.pop(0)
1579 boffs += n
1580 offs += n
1581 size -= n
1582 ai += 1
1585 def open(self, filepath, buffersize=10.0, backsize=0.0,
1586 verbose=0, **kwargs):
1587 """Open audio file for reading.
1589 Parameters
1590 ----------
1591 filepath: str or list of str
1592 Name of the file or list of many file names that should be
1593 made accessible as a single array.
1594 buffersize: float
1595 Size of internal buffer in seconds.
1596 backsize: float
1597 Part of the buffer to be loaded before the requested start index in seconds.
1598 verbose: int
1599 If larger than zero show detailed error/warning messages.
1600 **kwargs: dict
1601 Further keyword arguments that are passed on to the
1602 specific opening functions. Only used by open_multiple() so far.
1604 Raises
1605 ------
1606 ValueError
1607 Empty `filepath`.
1608 FileNotFoundError
1609 `filepath` is not an existing file.
1610 EOFError
1611 File size of `filepath` is zero.
1612 IOError
1613 Failed to load data.
1615 """
1616 self.buffer = np.array([])
1617 self.rate = 0.0
1618 if not filepath:
1619 raise ValueError('input argument filepath is empty string!')
1620 if isinstance(filepath, (list, tuple, np.ndarray)):
1621 self.open_multiple(filepath, buffersize, backsize, verbose)
1622 if len(self.file_paths) > 1:
1623 return self
1624 filepath = self.file_paths[0]
1625 self.close()
1626 if not os.path.isfile(filepath):
1627 raise FileNotFoundError(f'file "{filepath}" not found')
1628 if os.path.getsize(filepath) <= 0:
1629 raise EOFError(f'file "{filepath}" is empty (size=0)!')
1630 # list of implemented open functions:
1631 audio_open_funcs = (
1632 ('soundfile', self.open_soundfile),
1633 ('wave', self.open_wave),
1634 ('wavefile', self.open_wavefile),
1635 ('ewave', self.open_ewave),
1636 ('audioread', self.open_audioread),
1637 )
1638 # open an audio file by trying various modules:
1639 not_installed = []
1640 errors = [f'failed to load data from file "{filepath}":']
1641 for lib, open_file in audio_open_funcs:
1642 if not audio_modules[lib]:
1643 if verbose > 1:
1644 print(f'unable to load data from file "{filepath}" using {lib} module: module not available')
1645 not_installed.append(lib)
1646 continue
1647 try:
1648 open_file(filepath, buffersize, backsize, verbose-1, **kwargs)
1649 if self.frames > 0:
1650 if verbose > 0:
1651 print(f'opened audio file "{filepath}" using {lib}')
1652 if verbose > 1:
1653 if self.format is not None:
1654 print(f' format : {self.format}')
1655 if self.encoding is not None:
1656 print(f' encoding : {self.encoding}')
1657 print(f' sampling rate: {self.rate} Hz')
1658 print(f' channels : {self.channels}')
1659 print(f' frames : {self.frames}')
1660 return self
1661 except Exception as e:
1662 errors.append(f' {lib} failed: {str(e)}')
1663 if verbose > 1:
1664 print(errors[-1])
1665 if len(not_installed) > 0:
1666 errors.append('\n You may need to install one of the ' + \
1667 ', '.join(not_installed) + ' packages.')
1668 raise IOError('\n'.join(errors))
1669 return self
1672def demo(file_path, plot):
1673 """Demo of the audioloader functions.
1675 Parameters
1676 ----------
1677 file_path: str
1678 File path of an audio file.
1679 plot: bool
1680 If True also plot the loaded data.
1681 """
1682 print('')
1683 print("try load_audio:")
1684 full_data, rate = load_audio(file_path, 1)
1685 if plot:
1686 plt.plot(np.arange(len(full_data))/rate, full_data[:,0])
1687 plt.show()
1689 if audio_modules['soundfile'] and audio_modules['audioread']:
1690 print('')
1691 print("cross check:")
1692 data1, rate1 = load_soundfile(file_path)
1693 data2, rate2 = load_audioread(file_path)
1694 n = min((len(data1), len(data2)))
1695 print(f"rms difference is {np.std(data1[:n]-data2[:n])}")
1696 if plot:
1697 plt.plot(np.arange(len(data1))/rate1, data1[:,0])
1698 plt.plot(np.arange(len(data2))/rate2, data2[:,0])
1699 plt.show()
1701 print('')
1702 print("try AudioLoader:")
1703 with AudioLoader(file_path, 4.0, 1.0, verbose=1) as data:
1704 print(f'samplerate: {data.rate:0f}Hz')
1705 print(f'channels: {data.channels} {data.shape[1]}')
1706 print(f'frames: {len(data)} {data.shape[0]}')
1707 nframes = int(1.5*data.rate)
1708 # check access:
1709 print('check random single frame access')
1710 for inx in np.random.randint(0, len(data), 1000):
1711 if np.any(np.abs(full_data[inx] - data[inx]) > 2.0**(-14)):
1712 print('single random frame access failed', inx, full_data[inx], data[inx])
1713 print('check random frame slice access')
1714 for inx in np.random.randint(0, len(data)-nframes, 1000):
1715 if np.any(np.abs(full_data[inx:inx+nframes] - data[inx:inx+nframes]) > 2.0**(-14)):
1716 print('random frame slice access failed', inx)
1717 print('check frame slice access forward')
1718 for inx in range(0, len(data)-nframes, 10):
1719 if np.any(np.abs(full_data[inx:inx+nframes] - data[inx:inx+nframes]) > 2.0**(-14)):
1720 print('frame slice access forward failed', inx)
1721 print('check frame slice access backward')
1722 for inx in range(len(data)-nframes, 0, -10):
1723 if np.any(np.abs(full_data[inx:inx+nframes] - data[inx:inx+nframes]) > 2.0**(-14)):
1724 print('frame slice access backward failed', inx)
1725 # forward:
1726 for i in range(0, len(data), nframes):
1727 print(f'forward {i}-{i+nframes}')
1728 x = data[i:i+nframes,0]
1729 if plot:
1730 plt.plot((i+np.arange(len(x)))/rate, x)
1731 plt.show()
1732 # and backwards:
1733 for i in reversed(range(0, len(data), nframes)):
1734 print(f'backward {i}-{i+nframes}')
1735 x = data[i:i+nframes,0]
1736 if plot:
1737 plt.plot((i+np.arange(len(x)))/rate, x)
1738 plt.show()
1741def main(*args):
1742 """Call demo with command line arguments.
1744 Parameters
1745 ----------
1746 args: list of str
1747 Command line arguments as provided by sys.argv[1:]
1748 """
1749 print("Checking audioloader module ...")
1751 help = False
1752 plot = False
1753 file_path = None
1754 mod = False
1755 for arg in args:
1756 if mod:
1757 if not select_module(arg):
1758 print(f'can not select module {arg} that is not installed')
1759 return
1760 mod = False
1761 elif arg == '-h':
1762 help = True
1763 break
1764 elif arg == '-p':
1765 plot = True
1766 elif arg == '-m':
1767 mod = True
1768 else:
1769 file_path = arg
1770 break
1772 if help:
1773 print('')
1774 print('Usage:')
1775 print(' python -m src.audioio.audioloader [-m <module>] [-p] <audio/file.wav>')
1776 print(' -m: audio module to be used')
1777 print(' -p: plot loaded data')
1778 return
1780 if plot:
1781 import matplotlib.pyplot as plt
1783 demo(file_path, plot)
1786if __name__ == "__main__":
1787 main(*sys.argv[1:])