Coverage for src/audioio/audioloader.py: 90%
829 statements
« prev ^ index » next coverage.py v7.11.0, created at 2025-10-23 13:35 +0000
« prev ^ index » next coverage.py v7.11.0, created at 2025-10-23 13:35 +0000
1"""Loading data, metadata, and markers from audio files.
3- `load_audio()`: load a whole audio file at once.
4- `metadata()`: read metadata of an audio file.
5- `markers()`: read markers of an audio file.
6- class `AudioLoader`: read data from audio files in chunks.
8The read in data are always numpy arrays of floats ranging between -1 and 1.
9The arrays are 2-D ndarrays with first axis time and second axis channel,
10even for single channel data.
12If an audio file cannot be loaded, you might need to install
13additional packages. See
14[installation](https://bendalab.github.io/audioio/installation) for
15further instructions.
17For a demo run the module as:
18```
19python -m src.audioio.audioloader audiofile.wav
20```
21"""
23import gc
24import sys
25import warnings
26import os.path
27import numpy as np
28from pathlib import Path
29from datetime import timedelta
30from .audiomodules import *
31from .bufferedarray import BufferedArray
32from .riffmetadata import metadata_riff, markers_riff
33from .audiometadata import update_gain, add_unwrap, get_datetime
34from .audiometadata import flatten_metadata, add_metadata, set_starttime
35from .audiotools import unwrap
38def load_wave(filepath):
39 """Load wav file using the wave module from pythons standard libray.
41 Documentation
42 -------------
43 https://docs.python.org/3.8/library/wave.html
45 Parameters
46 ----------
47 filepath: str
48 The full path and name of the file to load.
50 Returns
51 -------
52 data: ndarray
53 All data traces as an 2-D ndarray, first dimension is time, second is channel
54 rate: float
55 The sampling rate of the data in Hertz.
57 Raises
58 ------
59 ImportError
60 The wave module is not installed
61 *
62 Loading of the data failed
63 """
64 if not audio_modules['wave']:
65 raise ImportError
67 wf = wave.open(filepath, 'r') # 'with' is not supported by wave
68 (nchannels, sampwidth, rate, nframes, comptype, compname) = wf.getparams()
69 buffer = wf.readframes(nframes)
70 factor = 2.0**(sampwidth*8-1)
71 if sampwidth == 1:
72 dtype = 'u1'
73 buffer = np.frombuffer(buffer, dtype=dtype).reshape(-1, nchannels)
74 data = buffer.astype('d')/factor - 1.0
75 else:
76 dtype = f'i{sampwidth}'
77 buffer = np.frombuffer(buffer, dtype=dtype).reshape(-1, nchannels)
78 data = buffer.astype('d')/factor
79 wf.close()
80 return data, float(rate)
83def load_ewave(filepath):
84 """Load wav file using ewave module.
86 Documentation
87 -------------
88 https://github.com/melizalab/py-ewave
90 Parameters
91 ----------
92 filepath: str
93 The full path and name of the file to load.
95 Returns
96 -------
97 data: ndarray
98 All data traces as an 2-D ndarray, first dimension is time, second is channel.
99 rate: float
100 The sampling rate of the data in Hertz.
102 Raises
103 ------
104 ImportError
105 The ewave module is not installed
106 *
107 Loading of the data failed
108 """
109 if not audio_modules['ewave']:
110 raise ImportError
112 data = np.array([])
113 rate = 0.0
114 with ewave.open(filepath, 'r') as wf:
115 rate = wf.sampling_rate
116 buffer = wf.read()
117 data = ewave.rescale(buffer, 'float')
118 if len(data.shape) == 1:
119 data = np.reshape(data,(-1, 1))
120 return data, float(rate)
123def load_wavfile(filepath):
124 """Load wav file using scipy.io.wavfile.
126 Documentation
127 -------------
128 http://docs.scipy.org/doc/scipy/reference/io.html
129 Does not support blocked read.
131 Parameters
132 ----------
133 filepath: str
134 The full path and name of the file to load.
136 Returns
137 -------
138 data: ndarray
139 All data traces as an 2-D ndarray, first dimension is time, second is channel.
140 rate: float
141 The sampling rate of the data in Hertz.
143 Raises
144 ------
145 ImportError
146 The scipy.io module is not installed
147 *
148 Loading of the data failed
149 """
150 if not audio_modules['scipy.io.wavfile']:
151 raise ImportError
153 warnings.filterwarnings("ignore")
154 rate, data = wavfile.read(filepath)
155 warnings.filterwarnings("always")
156 if data.dtype == np.uint8:
157 data = data / 128.0 - 1.0
158 elif np.issubdtype(data.dtype, np.signedinteger):
159 data = data / (2.0**(data.dtype.itemsize*8-1))
160 else:
161 data = data.astype(np.float64, copy=False)
162 if len(data.shape) == 1:
163 data = np.reshape(data,(-1, 1))
164 return data, float(rate)
167def load_soundfile(filepath):
168 """Load audio file using SoundFile (based on libsndfile).
170 Documentation
171 -------------
172 http://pysoundfile.readthedocs.org
173 http://www.mega-nerd.com/libsndfile
175 Parameters
176 ----------
177 filepath: str
178 The full path and name of the file to load.
180 Returns
181 -------
182 data: ndarray
183 All data traces as an 2-D ndarray, first dimension is time, second is channel.
184 rate: float
185 The sampling rate of the data in Hertz.
187 Raises
188 ------
189 ImportError
190 The soundfile module is not installed.
191 *
192 Loading of the data failed.
193 """
194 if not audio_modules['soundfile']:
195 raise ImportError
197 data = np.array([])
198 rate = 0.0
199 with soundfile.SoundFile(filepath, 'r') as sf:
200 rate = sf.samplerate
201 data = sf.read(frames=-1, dtype='float64', always_2d=True)
202 return data, float(rate)
205def load_wavefile(filepath):
206 """Load audio file using wavefile (based on libsndfile).
208 Documentation
209 -------------
210 https://github.com/vokimon/python-wavefile
212 Parameters
213 ----------
214 filepath: str
215 The full path and name of the file to load.
217 Returns
218 -------
219 data: ndarray
220 All data traces as an 2-D ndarray, first dimension is time, second is channel.
221 rate: float
222 The sampling rate of the data in Hertz.
224 Raises
225 ------
226 ImportError
227 The wavefile module is not installed.
228 *
229 Loading of the data failed.
230 """
231 if not audio_modules['wavefile']:
232 raise ImportError
234 rate, data = wavefile.load(filepath)
235 return data.astype(np.float64, copy=False).T, float(rate)
238def load_audioread(filepath):
239 """Load audio file using audioread.
241 Documentation
242 -------------
243 https://github.com/beetbox/audioread
245 Parameters
246 ----------
247 filepath: str
248 The full path and name of the file to load.
250 Returns
251 -------
252 data: ndarray
253 All data traces as an 2-D ndarray, first dimension is time, second is channel.
254 rate: float
255 The sampling rate of the data in Hertz.
257 Raises
258 ------
259 ImportError
260 The audioread module is not installed.
261 *
262 Loading of the data failed.
263 """
264 if not audio_modules['audioread']:
265 raise ImportError
267 data = np.array([])
268 rate = 0.0
269 with audioread.audio_open(filepath) as af:
270 rate = af.samplerate
271 data = np.zeros((int(np.ceil(af.samplerate*af.duration)), af.channels),
272 dtype="<i2")
273 index = 0
274 for buffer in af:
275 fulldata = np.frombuffer(buffer, dtype='<i2').reshape(-1, af.channels)
276 n = fulldata.shape[0]
277 if index + n > len(data):
278 n = len(fulldata) - index
279 if n <= 0:
280 break
281 data[index:index+n,:] = fulldata[:n,:]
282 index += n
283 return data/(2.0**15-1.0), float(rate)
286audio_loader_funcs = (
287 ('soundfile', load_soundfile),
288 ('wave', load_wave),
289 ('wavefile', load_wavefile),
290 ('ewave', load_ewave),
291 ('scipy.io.wavfile', load_wavfile),
292 ('audioread', load_audioread),
293 )
294"""List of implemented load() functions.
296Each element of the list is a tuple with the module's name and its
297load() function.
299"""
302def load_audio(filepath, verbose=0):
303 """Call this function to load all channels of audio data from a file.
305 This function tries different python modules to load the audio file.
307 Parameters
308 ----------
309 filepath: str
310 The full path and name of the file to load.
311 verbose: int
312 If larger than zero show detailed error/warning messages.
314 Returns
315 -------
316 data: ndarray
317 All data traces as an 2-D ndarray, even for single channel data.
318 First dimension is time, second is channel.
319 Data values range maximally between -1 and 1.
320 rate: float
321 The sampling rate of the data in Hertz.
323 Raises
324 ------
325 ValueError
326 Empty `filepath`.
327 FileNotFoundError
328 `filepath` is not an existing file.
329 EOFError
330 File size of `filepath` is zero.
331 IOError
332 Failed to load data.
334 Examples
335 --------
336 ```
337 import matplotlib.pyplot as plt
338 from audioio import load_audio
340 data, rate = load_audio('some/audio.wav')
341 plt.plot(np.arange(len(data))/rate, data[:,0])
342 plt.show()
343 ```
344 """
345 # check values:
346 if filepath is None or len(filepath) == 0:
347 raise ValueError('input argument filepath is empty string!')
348 if not os.path.isfile(filepath):
349 raise FileNotFoundError(f'file "{filepath}" not found')
350 if os.path.getsize(filepath) <= 0:
351 raise EOFError(f'file "{filepath}" is empty (size=0)!')
353 # load an audio file by trying various modules:
354 not_installed = []
355 errors = [f'failed to load data from file "{filepath}":']
356 for lib, load_file in audio_loader_funcs:
357 if not audio_modules[lib]:
358 if verbose > 1:
359 print(f'unable to load data from file "{filepath}" using {lib} module: module not available')
360 not_installed.append(lib)
361 continue
362 try:
363 data, rate = load_file(filepath)
364 if len(data) > 0:
365 if verbose > 0:
366 print(f'loaded data from file "{filepath}" using {lib} module')
367 if verbose > 1:
368 print(f' sampling rate: {rate:g} Hz')
369 print(f' channels : {data.shape[1]}')
370 print(f' frames : {len(data)}')
371 return data, rate
372 except Exception as e:
373 errors.append(f' {lib} failed: {str(e)}')
374 if verbose > 1:
375 print(errors[-1])
376 if len(not_installed) > 0:
377 errors.append('\n You may need to install one of the ' + \
378 ', '.join(not_installed) + ' packages.')
379 raise IOError('\n'.join(errors))
380 return np.zeros(0), 0.0
383def metadata(filepath, store_empty=False):
384 """Read metadata of an audio file.
386 Parameters
387 ----------
388 filepath: str or file handle
389 The audio file from which to read metadata.
390 store_empty: bool
391 If `False` do not return meta data with empty values.
393 Returns
394 -------
395 meta_data: nested dict
396 Meta data contained in the audio file. Keys of the nested
397 dictionaries are always strings. If the corresponding values
398 are dictionaries, then the key is the section name of the
399 metadata contained in the dictionary. All other types of
400 values are values for the respective key. In particular they
401 are strings. But other types like for example ints or floats
402 are also allowed. See `audioio.audiometadata` module for
403 available functions to work with such metadata.
405 Examples
406 --------
407 ```
408 from audioio import metadata, print_metadata
409 md = metadata('data.wav')
410 print_metadata(md)
411 ```
413 """
414 try:
415 return metadata_riff(filepath, store_empty)
416 except ValueError: # not a RIFF file
417 return {}
420def markers(filepath):
421 """ Read markers of an audio file.
423 See `audioio.audiomarkers` module for available functions
424 to work with markers.
426 Parameters
427 ----------
428 filepath: str or file handle
429 The audio file.
431 Returns
432 -------
433 locs: 2-D ndarray of int
434 Marker positions (first column) and spans (second column)
435 for each marker (rows).
436 labels: 2-D ndarray of string objects
437 Labels (first column) and texts (second column)
438 for each marker (rows).
440 Examples
441 --------
442 ```
443 from audioio import markers, print_markers
444 locs, labels = markers('data.wav')
445 print_markers(locs, labels)
446 ```
447 """
448 try:
449 return markers_riff(filepath)
450 except ValueError: # not a RIFF file
451 return np.zeros((0, 2), dtype=int), np.zeros((0, 2), dtype=object)
454class AudioLoader(BufferedArray):
455 """Buffered reading of audio data for random access of the data in the file.
457 The class allows for reading very large audio files or many
458 sequential audio files that do not fit into memory.
459 An AudioLoader instance can be used like a huge read-only numpy array, i.e.
460 ```
461 data = AudioLoader('path/to/audio/file.wav')
462 x = data[10000:20000,0]
463 ```
464 The first index specifies the frame, the second one the channel.
466 Behind the scenes, `AudioLoader` tries to open the audio file with
467 all available audio modules until it succeeds (first line). It
468 then reads data from the file as necessary for the requested data
469 (second line). Accesing the content of the audio files via a
470 buffer that holds only a part of the data is managed by the
471 `BufferedArray` class.
473 Reading sequentially through the file is always possible. Some
474 modules, however, (e.g. audioread, needed for mp3 files) can only
475 read forward. If previous data are requested, then the file is read
476 from the beginning again. This slows down access to previous data
477 considerably. Use the `backsize` argument of the open function to
478 make sure some data are loaded into the buffer before the requested
479 frame. Then a subsequent access to the data within `backsize` seconds
480 before that frame can still be handled without the need to reread
481 the file from the beginning.
483 Usage
484 -----
485 With context management:
486 ```
487 import audioio as aio
488 with aio.AudioLoader(filepath, 60.0, 10.0) as data:
489 # do something with the content of the file:
490 x = data[0:10000]
491 y = data[10000:20000]
492 z = x + y
493 ```
495 For using a specific audio module, here the audioread module:
496 ```
497 data = aio.AudioLoader()
498 with data.open_audioread(filepath, 60.0, 10.0):
499 # do something ...
500 ```
502 Use `blocks()` for sequential, blockwise reading and processing:
503 ```
504 from scipy.signal import spectrogram
505 nfft = 2048
506 with aio.AudioLoader('some/audio.wav') as data:
507 for x in data.blocks(100*nfft, nfft//2):
508 f, t, Sxx = spectrogram(x, fs=data.rate,
509 nperseg=nfft, noverlap=nfft//2)
510 ```
512 For loop iterates over single frames (1-D arrays containing samples for each channel):
513 ```
514 with aio.AudioLoader('some/audio.wav') as data:
515 for x in data:
516 print(x)
517 ```
519 Traditional open and close:
520 ```
521 data = aio.AudioLoader(filepath, 60.0)
522 x = data[:,:] # read the whole file
523 data.close()
524 ```
526 this is the same as:
527 ```
528 data = aio.AudioLoader()
529 data.open(filepath, 60.0)
530 ...
531 ```
533 Classes inheriting AudioLoader just need to implement
534 ```
535 self.load_audio_buffer(offset, nsamples, pbuffer)
536 ```
537 This function needs to load the supplied `pbuffer` with
538 `nframes` frames of data starting at frame `offset`.
540 In the constructor or some kind of opening function, you need to
541 set some member variables, as described for `BufferedArray`.
543 For loading metadata and markers, implement the functions
544 ```
545 self._load_metadata(filepath, **kwargs)
546 self._load_markers(filepath)
547 ```
549 Parameters
550 ----------
551 filepath: str or list of str
552 Name of the file or list of many file names that should be
553 made accessible as a single array.
554 buffersize: float
555 Size of internal buffer in seconds.
556 backsize: float
557 Part of the buffer to be loaded before the requested start index in seconds.
558 verbose: int
559 If larger than zero show detailed error/warning messages.
560 store_empty: bool
561 If `False` do not return meta data with empty values.
563 Attributes
564 ----------
565 filepath: str
566 Name and path of the opened file. In case of many files, the first one.
567 file_paths: list of str
568 List of pathes of the opened files that are made accessible
569 as a single array.
570 file_indices: list of int
571 For each file the index of its first sample.
572 rate: float
573 The sampling rate of the data in seconds.
574 channels: int
575 The number of channels.
576 frames: int
577 The number of frames in the file. Same as `len()`.
578 format: str or None
579 Format of the audio file.
580 encoding: str or None
581 Encoding/subtype of the audio file.
582 shape: tuple
583 Frames and channels of the data.
584 ndim: int
585 Number of dimensions: always 2 (frames and channels).
586 offset: int
587 Index of first frame in the current buffer.
588 buffer: ndarray of floats
589 The curently available data from the file.
590 ampl_min: float
591 Minimum amplitude the file format supports.
592 Always -1.0 for audio data.
593 ampl_max: float
594 Maximum amplitude the file format supports.
595 Always +1.0 for audio data.
597 Methods
598 -------
599 - `len()`: Number of frames.
600 - `file_start_times()`: time of first frame of each file in seconds.
601 - `get_file_index()`: file path and index of frame contained by this file.
602 - `open()`: Open an audio file by trying available audio modules.
603 - `open_*()`: Open an audio file with the respective audio module.
604 - `__getitem__`: Access data of the audio file.
605 - `update_buffer()`: Update the internal buffer for a range of frames.
606 - `blocks()`: Generator for blockwise processing of AudioLoader data.
607 - `file_start_times()`: Time of first frame of each file in seconds.
608 - `get_file_index()`: File path and index of frame contained by this file.
609 - `basename()`: Base name of the audio data.
610 - `format_dict()`: technical infos about how the data are stored.
611 - `metadata()`: Metadata stored along with the audio data.
612 - `markers()`: Markers stored along with the audio data.
613 - `set_unwrap()`: Set parameters for unwrapping clipped data.
614 - `set_time_delta()`: Set maximum allowed time difference between successive files.
615 - `close()`: Close the file.
617 """
619 max_open_files = 5
620 """ Suggestion for maximum number of open file descriptors. """
622 max_open_loaders = 10
623 """ Suggestion for maximum number of AudioLoaders when opening multiple files. """
625 def __init__(self, filepath=None, buffersize=10.0, backsize=0.0,
626 verbose=0, **meta_kwargs):
627 super().__init__(verbose=verbose)
628 self.format = None
629 self.encoding = None
630 self._metadata = None
631 self._locs = None
632 self._labels = None
633 self._load_metadata = metadata
634 self._load_markers = markers
635 self._metadata_kwargs = meta_kwargs
636 self.filepath = None
637 self.file_paths = None
638 self.file_indices = []
639 self._max_time_diff = 1
640 self.sf = None
641 self.close = self._close
642 self.load_buffer = self._load_buffer_unwrap
643 self.ampl_min = -1.0
644 self.ampl_max = +1.0
645 self.unwrap = False
646 self.unwrap_thresh = 0.0
647 self.unwrap_clips = False
648 self.unwrap_ampl = 1.0
649 self.unwrap_downscale = True
650 if filepath is not None:
651 self.open(filepath, buffersize, backsize, verbose)
653 numpy_encodings = {np.dtype(np.int64): 'PCM_64',
654 np.dtype(np.int32): 'PCM_32',
655 np.dtype(np.int16): 'PCM_16',
656 np.dtype(np.single): 'FLOAT',
657 np.dtype(np.double): 'DOUBLE',
658 np.dtype('>f4'): 'FLOAT',
659 np.dtype('>f8'): 'DOUBLE'}
660 """ Map numpy dtypes to encodings.
661 """
663 def _close(self):
664 pass
666 def __del__(self):
667 self.close()
669 def file_start_times(self):
670 """ Time of first frame of each file in seconds.
672 Returns
673 -------
674 times: array of float
675 Time of the first frame of each file relative to buffer start
676 in seconds.
677 """
678 times = []
679 for idx in self.file_indices:
680 times.append(idx/self.rate)
681 return np.array(times)
683 def get_file_index(self, frame):
684 """ File path and index of frame contained by this file.
686 Parameters
687 ----------
688 frame: int
689 Index of frame.
691 Returns
692 -------
693 filepath: str
694 Path of file that contains the frame.
695 index: int
696 Index of the frame relative to the first frame
697 in the containing file.
698 """
699 if frame < 0 or frame >= self.frames:
700 raise ValueError('invalid frame')
701 fname = self.file_paths[0]
702 index = self.file_indices[0]
703 for i in reversed(range(len(self.file_indices))):
704 if self.file_indices[i] <= frame:
705 fname = self.file_paths[i]
706 index = self.file_indices[i]
707 break
708 return fname, frame - index
710 def basename(self, path=None):
711 """ Base name of the audio data.
713 Parameters
714 ----------
715 path: str or None
716 Path of the audio file from which a base name is generated.
717 If `None`, use `self.filepath`.
719 Returns
720 -------
721 s: str
722 The name. Defaults to the stem of `path`.
724 """
725 if path is None:
726 path = self.filepath
727 return Path(path).stem
729 def format_dict(self):
730 """ Technical infos about how the data are stored in the file.
732 Returns
733 -------
734 fmt: dict
735 Dictionary with filepath, format, encoding, samplingrate,
736 channels, frames, and duration of the audio file as strings.
738 """
739 fmt = dict(name=self.basename(), filepath=self.filepath)
740 if self.format is not None:
741 fmt['format'] = self.format
742 if self.encoding is not None:
743 fmt['encoding'] = self.encoding
744 fmt.update(dict(samplingrate=f'{self.rate:.0f}Hz',
745 channels=self.channels,
746 frames=self.frames,
747 duration=f'{self.frames/self.rate:.3f}s'))
748 return fmt
750 def metadata(self):
751 """Metadata of the audio file.
753 Parameters
754 ----------
755 store_empty: bool
756 If `False` do not add meta data with empty values.
758 Returns
759 -------
760 meta_data: nested dict
762 Meta data contained in the audio file. Keys of the nested
763 dictionaries are always strings. If the corresponding
764 values are dictionaries, then the key is the section name
765 of the metadata contained in the dictionary. All other
766 types of values are values for the respective key. In
767 particular they are strings. But other types like for
768 example ints or floats are also allowed. See
769 `audioio.audiometadata` module for available functions to
770 work with such metadata.
772 """
773 if self._metadata is None:
774 if self._load_metadata is None:
775 self._metadata = {}
776 else:
777 self._metadata = self._load_metadata(self.filepath,
778 **self._metadata_kwargs)
779 return self._metadata
781 def markers(self):
782 """Read markers of the audio file.
784 See `audioio.audiomarkers` module for available functions
785 to work with markers.
787 Returns
788 -------
789 locs: 2-D ndarray of int
790 Marker positions (first column) and spans (second column)
791 for each marker (rows).
792 labels: 2-D ndarray of str objects
793 Labels (first column) and texts (second column)
794 for each marker (rows).
795 """
796 if self._locs is None:
797 if self._load_markers is None:
798 self._locs = np.zeros((0, 2), dtype=int)
799 self._labels = np.zeros((0, 2), dtype=object)
800 else:
801 self._locs, self._labels = self._load_markers(self.filepath)
802 return self._locs, self._labels
804 def set_unwrap(self, thresh, clips=False, down_scale=True, unit=''):
805 """Set parameters for unwrapping clipped data.
807 See unwrap() function from the audioio package.
809 Parameters
810 ----------
811 thresh: float
812 Threshold for detecting wrapped data relative to self.unwrap_ampl
813 which is initially set to self.ampl_max.
814 If zero, do not unwrap.
815 clips: bool
816 If True, then clip the unwrapped data properly.
817 Otherwise, unwrap the data and double the
818 minimum and maximum data range
819 (self.ampl_min and self.ampl_max).
820 down_scale: bool
821 If not `clips`, then downscale the signal by a factor of two,
822 in order to keep the range between -1 and 1.
823 unit: str
824 Unit of the data.
825 """
826 self.unwrap_ampl = self.ampl_max
827 self.unwrap_thresh = thresh
828 self.unwrap_clips = clips
829 self.unwrap_down_scale = down_scale
830 self.unwrap = thresh > 1e-3
831 if self.unwrap:
832 if self.unwrap_clips:
833 add_unwrap(self.metadata(),
834 self.unwrap_thresh*self.unwrap_ampl,
835 self.unwrap_ampl, unit)
836 elif down_scale:
837 update_gain(self.metadata(), 0.5)
838 add_unwrap(self.metadata(),
839 0.5*self.unwrap_thresh*self.unwrap_ampl,
840 0.0, unit)
841 else:
842 self.ampl_min *= 2
843 self.ampl_max *= 2
844 add_unwrap(self.metadata(),
845 self.unwrap_thresh*self.unwrap_ampl,
846 0.0, unit)
848 def _load_buffer_unwrap(self, r_offset, r_size, pbuffer):
849 """Load new data and unwrap it.
851 Parameters
852 ----------
853 r_offset: int
854 First frame to be read from file.
855 r_size: int
856 Number of frames to be read from file.
857 pbuffer: ndarray
858 Buffer where to store the loaded data.
859 """
860 self.load_audio_buffer(r_offset, r_size, pbuffer)
861 if self.unwrap:
862 # TODO: handle edge effects!
863 unwrap(pbuffer, self.unwrap_thresh, self.unwrap_ampl)
864 if self.unwrap_clips:
865 pbuffer[pbuffer > self.ampl_max] = self.ampl_max
866 pbuffer[pbuffer < self.ampl_min] = self.ampl_min
867 elif self.unwrap_down_scale:
868 pbuffer *= 0.5
870 def set_time_delta(time_delta):
871 """ Set maximum allowed time difference between successive files.
873 Parameters
874 ----------
875 time_delta: int
876 Maximum number of seconds the start time of a recording file is allowed
877 to differ from the end of the previous file.
878 Default is one second.
879 """
880 self._max_time_diff = time_delta
882 # wave interface:
883 def open_wave(self, filepath, buffersize=10.0, backsize=0.0,
884 verbose=0):
885 """Open audio file for reading using the wave module.
887 Note: we assume that setpos() and tell() use integer numbers!
889 Parameters
890 ----------
891 filepath: str
892 Name of the file.
893 buffersize: float
894 Size of internal buffer in seconds.
895 backsize: float
896 Part of the buffer to be loaded before the requested start index in seconds.
897 verbose: int
898 If larger than zero show detailed error/warning messages.
900 Raises
901 ------
902 ImportError
903 The wave module is not installed
904 """
905 self.verbose = verbose
906 if self.verbose > 0:
907 print(f'open_wave(filepath) with filepath={filepath}')
908 if not audio_modules['wave']:
909 self.rate = 0.0
910 self.channels = 0
911 self.frames = 0
912 self.size = 0
913 self.shape = (0, 0)
914 self.offset = 0
915 raise ImportError
916 if self.sf is not None:
917 self._close_wave()
918 self.sf = wave.open(filepath, 'r')
919 self.filepath = filepath
920 self.file_paths = [filepath]
921 self.file_indices = [0]
922 self.rate = float(self.sf.getframerate())
923 self.format = 'WAV'
924 sampwidth = self.sf.getsampwidth()
925 if sampwidth == 1:
926 self.dtype = 'u1'
927 self.encoding = 'PCM_U8'
928 else:
929 self.dtype = f'i{sampwidth}'
930 self.encoding = f'PCM_{sampwidth*8}'
931 self.factor = 1.0/(2.0**(sampwidth*8-1))
932 self.channels = self.sf.getnchannels()
933 self.frames = self.sf.getnframes()
934 self.shape = (self.frames, self.channels)
935 self.size = self.frames * self.channels
936 self.bufferframes = int(buffersize*self.rate)
937 self.backframes = int(backsize*self.rate)
938 self.init_buffer()
939 self.close = self._close_wave
940 self.load_audio_buffer = self._load_buffer_wave
941 # read 1 frame to determine the unit of the position values:
942 self.p0 = self.sf.tell()
943 self.sf.readframes(1)
944 self.pfac = self.sf.tell() - self.p0
945 self.sf.setpos(self.p0)
946 return self
948 def _close_wave(self):
949 """Close the audio file using the wave module. """
950 if self.sf is not None:
951 self.sf.close()
952 self.sf = None
954 def _load_buffer_wave(self, r_offset, r_size, buffer):
955 """Load new data from file using the wave module.
957 Parameters
958 ----------
959 r_offset: int
960 First frame to be read from file.
961 r_size: int
962 Number of frames to be read from file.
963 buffer: ndarray
964 Buffer where to store the loaded data.
965 """
966 if self.sf is None:
967 self.sf = wave.open(self.filepath, 'r')
968 self.sf.setpos(r_offset*self.pfac + self.p0)
969 fbuffer = self.sf.readframes(r_size)
970 fbuffer = np.frombuffer(fbuffer, dtype=self.dtype).reshape((-1, self.channels))
971 if self.dtype[0] == 'u':
972 buffer[:, :] = fbuffer * self.factor - 1.0
973 else:
974 buffer[:, :] = fbuffer * self.factor
977 # ewave interface:
978 def open_ewave(self, filepath, buffersize=10.0, backsize=0.0,
979 verbose=0):
980 """Open audio file for reading using the ewave module.
982 Parameters
983 ----------
984 filepath: str
985 Name of the file.
986 buffersize: float
987 Size of internal buffer in seconds.
988 backsize: float
989 Part of the buffer to be loaded before the requested start index in seconds.
990 verbose: int
991 If larger than zero show detailed error/warning messages.
993 Raises
994 ------
995 ImportError
996 The ewave module is not installed.
997 """
998 self.verbose = verbose
999 if self.verbose > 0:
1000 print(f'open_ewave(filepath) with filepath={filepath}')
1001 if not audio_modules['ewave']:
1002 self.rate = 0.0
1003 self.channels = 0
1004 self.frames = 0
1005 self.shape = (0, 0)
1006 self.size = 0
1007 self.offset = 0
1008 raise ImportError
1009 if self.sf is not None:
1010 self._close_ewave()
1011 self.sf = ewave.open(filepath, 'r')
1012 self.filepath = filepath
1013 self.file_paths = [filepath]
1014 self.file_indices = [0]
1015 self.rate = float(self.sf.sampling_rate)
1016 self.channels = self.sf.nchannels
1017 self.frames = self.sf.nframes
1018 self.shape = (self.frames, self.channels)
1019 self.size = self.frames * self.channels
1020 self.format = 'WAV' # or WAVEX?
1021 self.encoding = self.numpy_encodings[self.sf.dtype]
1022 self.bufferframes = int(buffersize*self.rate)
1023 self.backframes = int(backsize*self.rate)
1024 self.init_buffer()
1025 self.close = self._close_ewave
1026 self.load_audio_buffer = self._load_buffer_ewave
1027 return self
1029 def _close_ewave(self):
1030 """Close the audio file using the ewave module. """
1031 if self.sf is not None:
1032 del self.sf
1033 self.sf = None
1035 def _load_buffer_ewave(self, r_offset, r_size, buffer):
1036 """Load new data from file using the ewave module.
1038 Parameters
1039 ----------
1040 r_offset: int
1041 First frame to be read from file.
1042 r_size: int
1043 Number of frames to be read from file.
1044 buffer: ndarray
1045 Buffer where to store the loaded data.
1046 """
1047 if self.sf is None:
1048 self.sf = ewave.open(self.filepath, 'r')
1049 fbuffer = self.sf.read(frames=r_size, offset=r_offset, memmap='r')
1050 fbuffer = ewave.rescale(fbuffer, 'float')
1051 if len(fbuffer.shape) == 1:
1052 fbuffer = np.reshape(fbuffer,(-1, 1))
1053 buffer[:,:] = fbuffer
1056 # soundfile interface:
1057 def open_soundfile(self, filepath, buffersize=10.0, backsize=0.0,
1058 verbose=0):
1059 """Open audio file for reading using the SoundFile module.
1061 Parameters
1062 ----------
1063 filepath: str
1064 Name of the file.
1065 bufferframes: float
1066 Size of internal buffer in seconds.
1067 backsize: float
1068 Part of the buffer to be loaded before the requested start index in seconds.
1069 verbose: int
1070 If larger than zero show detailed error/warning messages.
1072 Raises
1073 ------
1074 ImportError
1075 The SoundFile module is not installed
1076 """
1077 self.verbose = verbose
1078 if self.verbose > 0:
1079 print(f'open_soundfile(filepath) with filepath={filepath}')
1080 if not audio_modules['soundfile']:
1081 self.rate = 0.0
1082 self.channels = 0
1083 self.frames = 0
1084 self.shape = (0, 0)
1085 self.size = 0
1086 self.offset = 0
1087 raise ImportError
1088 if self.sf is not None:
1089 self._close_soundfile()
1090 self.sf = soundfile.SoundFile(filepath, 'r')
1091 self.filepath = filepath
1092 self.file_paths = [filepath]
1093 self.file_indices = [0]
1094 self.rate = float(self.sf.samplerate)
1095 self.channels = self.sf.channels
1096 self.frames = 0
1097 self.size = 0
1098 if self.sf.seekable():
1099 self.frames = self.sf.seek(0, soundfile.SEEK_END)
1100 self.sf.seek(0, soundfile.SEEK_SET)
1101 # TODO: if not seekable, we cannot handle that file!
1102 self.shape = (self.frames, self.channels)
1103 self.size = self.frames * self.channels
1104 self.format = self.sf.format
1105 self.encoding = self.sf.subtype
1106 self.bufferframes = int(buffersize*self.rate)
1107 self.backframes = int(backsize*self.rate)
1108 self.init_buffer()
1109 self.close = self._close_soundfile
1110 self.load_audio_buffer = self._load_buffer_soundfile
1111 return self
1113 def _close_soundfile(self):
1114 """Close the audio file using the SoundFile module. """
1115 if self.sf is not None:
1116 self.sf.close()
1117 self.sf = None
1119 def _load_buffer_soundfile(self, r_offset, r_size, buffer):
1120 """Load new data from file using the SoundFile module.
1122 Parameters
1123 ----------
1124 r_offset: int
1125 First frame to be read from file.
1126 r_size: int
1127 Number of frames to be read from file.
1128 buffer: ndarray
1129 Buffer where to store the loaded data.
1130 """
1131 if self.sf is None:
1132 self.sf = soundfile.SoundFile(self.filepath, 'r')
1133 self.sf.seek(r_offset, soundfile.SEEK_SET)
1134 buffer[:, :] = self.sf.read(r_size, always_2d=True)
1137 # wavefile interface:
1138 def open_wavefile(self, filepath, buffersize=10.0, backsize=0.0,
1139 verbose=0):
1140 """Open audio file for reading using the wavefile module.
1142 Parameters
1143 ----------
1144 filepath: str
1145 Name of the file.
1146 bufferframes: float
1147 Size of internal buffer in seconds.
1148 backsize: float
1149 Part of the buffer to be loaded before the requested start index in seconds.
1150 verbose: int
1151 If larger than zero show detailed error/warning messages.
1153 Raises
1154 ------
1155 ImportError
1156 The wavefile module is not installed
1157 """
1158 self.verbose = verbose
1159 if self.verbose > 0:
1160 print(f'open_wavefile(filepath) with filepath={filepath}')
1161 if not audio_modules['wavefile']:
1162 self.rate = 0.0
1163 self.channels = 0
1164 self.frames = 0
1165 self.shape = (0, 0)
1166 self.size = 0
1167 self.offset = 0
1168 raise ImportError
1169 if self.sf is not None:
1170 self._close_wavefile()
1171 self.sf = wavefile.WaveReader(filepath)
1172 self.filepath = filepath
1173 self.file_paths = [filepath]
1174 self.file_indices = [0]
1175 self.rate = float(self.sf.samplerate)
1176 self.channels = self.sf.channels
1177 self.frames = self.sf.frames
1178 self.shape = (self.frames, self.channels)
1179 self.size = self.frames * self.channels
1180 # get format and encoding:
1181 for attr in dir(wavefile.Format):
1182 v = getattr(wavefile.Format, attr)
1183 if isinstance(v, int):
1184 if v & wavefile.Format.TYPEMASK > 0 and \
1185 (self.sf.format & wavefile.Format.TYPEMASK) == v:
1186 self.format = attr
1187 if v & wavefile.Format.SUBMASK > 0 and \
1188 (self.sf.format & wavefile.Format.SUBMASK) == v:
1189 self.encoding = attr
1190 # init buffer:
1191 self.bufferframes = int(buffersize*self.rate)
1192 self.backframes = int(backsize*self.rate)
1193 self.init_buffer()
1194 self.close = self._close_wavefile
1195 self.load_audio_buffer = self._load_buffer_wavefile
1196 return self
1198 def _close_wavefile(self):
1199 """Close the audio file using the wavefile module. """
1200 if self.sf is not None:
1201 self.sf.close()
1202 self.sf = None
1204 def _load_buffer_wavefile(self, r_offset, r_size, buffer):
1205 """Load new data from file using the wavefile module.
1207 Parameters
1208 ----------
1209 r_offset: int
1210 First frame to be read from file.
1211 r_size: int
1212 Number of frames to be read from file.
1213 buffer: ndarray
1214 Buffer where to store the loaded data.
1215 """
1216 if self.sf is None:
1217 self.sf = wavefile.WaveReader(self.filepath)
1218 self.sf.seek(r_offset, wavefile.Seek.SET)
1219 fbuffer = self.sf.buffer(r_size, dtype=self.buffer.dtype)
1220 self.sf.read(fbuffer)
1221 buffer[:,:] = fbuffer.T
1224 # audioread interface:
1225 def open_audioread(self, filepath, buffersize=10.0, backsize=0.0,
1226 verbose=0):
1227 """Open audio file for reading using the audioread module.
1229 Note, that audioread can only read forward, therefore random and
1230 backward access is really slow.
1232 Parameters
1233 ----------
1234 filepath: str
1235 Name of the file.
1236 bufferframes: float
1237 Size of internal buffer in seconds.
1238 backsize: float
1239 Part of the buffer to be loaded before the requested start index in seconds.
1240 verbose: int
1241 If larger than zero show detailed error/warning messages.
1243 Raises
1244 ------
1245 ImportError
1246 The audioread module is not installed
1247 """
1248 self.verbose = verbose
1249 if self.verbose > 0:
1250 print(f'open_audioread(filepath) with filepath={filepath}')
1251 if not audio_modules['audioread']:
1252 self.rate = 0.0
1253 self.channels = 0
1254 self.frames = 0
1255 self.shape = (0, 0)
1256 self.size = 0
1257 self.offset = 0
1258 raise ImportError
1259 if self.sf is not None:
1260 self._close_audioread()
1261 self.sf = audioread.audio_open(filepath)
1262 self.filepath = filepath
1263 self.file_paths = [filepath]
1264 self.file_indices = [0]
1265 self.rate = float(self.sf.samplerate)
1266 self.channels = self.sf.channels
1267 self.frames = int(np.ceil(self.rate*self.sf.duration))
1268 self.shape = (self.frames, self.channels)
1269 self.size = self.frames * self.channels
1270 self.bufferframes = int(buffersize*self.rate)
1271 self.backframes = int(backsize*self.rate)
1272 self.init_buffer()
1273 self.read_buffer = np.zeros((0,0))
1274 self.read_offset = 0
1275 self.close = self._close_audioread
1276 self.load_audio_buffer = self._load_buffer_audioread
1277 self.filepath = filepath
1278 self.sf_iter = self.sf.__iter__()
1279 return self
1281 def _close_audioread(self):
1282 """Close the audio file using the audioread module. """
1283 if self.sf is not None:
1284 self.sf.__exit__(None, None, None)
1285 self.sf = None
1287 def _load_buffer_audioread(self, r_offset, r_size, buffer):
1288 """Load new data from file using the audioread module.
1290 audioread can only iterate through a file once and in blocksizes that are
1291 given by audioread. Therefore we keep yet another buffer: `self.read_buffer`
1292 at file offset `self.read_offset` containing whatever audioread returned.
1294 Parameters
1295 ----------
1296 r_offset: int
1297 First frame to be read from file.
1298 r_size: int
1299 Number of frames to be read from file.
1300 buffer: ndarray
1301 Buffer where to store the loaded data.
1302 """
1303 if self.sf is None:
1304 self.sf = audioread.audio_open(self.filepath)
1305 b_offset = 0
1306 if ( self.read_offset + self.read_buffer.shape[0] >= r_offset + r_size
1307 and self.read_offset < r_offset + r_size ):
1308 # read_buffer overlaps at the end of the requested interval:
1309 i = 0
1310 n = r_offset + r_size - self.read_offset
1311 if n > r_size:
1312 i += n - r_size
1313 n = r_size
1314 buffer[self.read_offset+i-r_offset:self.read_offset+i+n-r_offset,:] = self.read_buffer[i:i+n,:] / (2.0**15-1.0)
1315 if self.verbose > 2:
1316 print(f' recycle {n:6d} frames from the front of the read buffer at {self.read_offset}-{self.read_offset+n} ({self.read_offset-self.offset}-{self.read_offset-self.offset+n} in buffer)')
1317 r_size -= n
1318 if r_size <= 0:
1319 return
1320 # go back to beginning of file:
1321 if r_offset < self.read_offset:
1322 if self.verbose > 2:
1323 print(' rewind')
1324 self._close_audioread()
1325 self.sf = audioread.audio_open(self.filepath)
1326 self.sf_iter = self.sf.__iter__()
1327 self.read_buffer = np.zeros((0,0))
1328 self.read_offset = 0
1329 # read to position:
1330 while self.read_offset + self.read_buffer.shape[0] < r_offset:
1331 self.read_offset += self.read_buffer.shape[0]
1332 try:
1333 if hasattr(self.sf_iter, 'next'):
1334 fbuffer = self.sf_iter.next()
1335 else:
1336 fbuffer = next(self.sf_iter)
1337 except StopIteration:
1338 self.read_buffer = np.zeros((0,0))
1339 buffer[:,:] = 0.0
1340 if self.verbose > 1:
1341 print(f' caught StopIteration, padded buffer with {r_size} zeros')
1342 break
1343 self.read_buffer = np.frombuffer(fbuffer, dtype='<i2').reshape(-1, self.channels)
1344 if self.verbose > 2:
1345 print(f' read forward by {self.read_buffer.shape[0]} frames')
1346 # recycle file data:
1347 if ( self.read_offset + self.read_buffer.shape[0] > r_offset
1348 and self.read_offset <= r_offset ):
1349 i = r_offset - self.read_offset
1350 n = self.read_offset + self.read_buffer.shape[0] - r_offset
1351 if n > r_size:
1352 n = r_size
1353 buffer[:n,:] = self.read_buffer[i:i+n,:] / (2.0**15-1.0)
1354 if self.verbose > 2:
1355 print(f' recycle {n:6d} frames from the end of the read buffer at {self.read_offset}-{self.read_offset + self.read_buffer.shape[0]} to {r_offset}-{r_offset+n} ({r_offset-self.offset}-{r_offset+n-self.offset} in buffer)')
1356 b_offset += n
1357 r_offset += n
1358 r_size -= n
1359 # read data:
1360 if self.verbose > 2 and r_size > 0:
1361 print(f' read {r_size:6d} frames at {r_offset}-{r_offset+r_size} ({r_offset-self.offset}-{r_offset+r_size-self.offset} in buffer)')
1362 while r_size > 0:
1363 self.read_offset += self.read_buffer.shape[0]
1364 try:
1365 if hasattr(self.sf_iter, 'next'):
1366 fbuffer = self.sf_iter.next()
1367 else:
1368 fbuffer = next(self.sf_iter)
1369 except StopIteration:
1370 self.read_buffer = np.zeros((0,0))
1371 buffer[b_offset:,:] = 0.0
1372 if self.verbose > 1:
1373 print(f' caught StopIteration, padded buffer with {r_size} zeros')
1374 break
1375 self.read_buffer = np.frombuffer(fbuffer, dtype='<i2').reshape(-1, self.channels)
1376 n = self.read_buffer.shape[0]
1377 if n > r_size:
1378 n = r_size
1379 if n > 0:
1380 buffer[b_offset:b_offset+n,:] = self.read_buffer[:n,:] / (2.0**15-1.0)
1381 if self.verbose > 2:
1382 print(f' read {n:6d} frames to {r_offset}-{r_offset+n} ({r_offset-self.offset}-{r_offset+n-self.offset} in buffer)')
1383 b_offset += n
1384 r_offset += n
1385 r_size -= n
1388 # open multiple audio files as one:
1389 def open_multiple(self, filepaths, buffersize=10.0, backsize=0.0,
1390 verbose=0, rate=None, channels=None, end_indices=None):
1391 """Open multiple audio files as a single concatenated array.
1393 Parameters
1394 ----------
1395 filepaths: list of str
1396 List of file names of audio files.
1397 buffersize: float
1398 Size of internal buffer in seconds.
1399 backsize: float
1400 Part of the buffer to be loaded before the requested start index in seconds.
1401 verbose: int
1402 If larger than zero show detailed error/warning messages.
1403 rate: float
1404 If provided, do a minimal initialization (no checking)
1405 using the provided sampling rate (in Hertz), channels,
1406 and end_indices.
1407 channels: int
1408 If provided, do a minimal initialization (no checking)
1409 using the provided rate, number of channels, and end_indices.
1410 end_indices: sequence of int
1411 If provided, do a minimal initialization (no checking)
1412 using the provided rate, channels, and end_indices.
1414 Raises
1415 ------
1416 TypeError
1417 `filepaths` must be a sequence.
1418 ValueError
1419 Empty `filepaths`.
1420 FileNotFoundError
1421 `filepaths` does not contain a single valid file.
1423 """
1424 if not isinstance(filepaths, (list, tuple, np.ndarray)):
1425 raise TypeError('input argument filepaths is not a sequence!')
1426 if len(filepaths) == 0:
1427 raise ValueError('input argument filepaths is empy sequence!')
1428 self.buffersize = buffersize
1429 self.backsize = backsize
1430 self.filepath = None
1431 self.file_paths = []
1432 self.open_files = []
1433 self.open_loaders = []
1434 self.audio_files = []
1435 self.collect_counter = 0
1436 self.frames = 0
1437 self.start_indices = []
1438 self.end_indices = []
1439 self.start_time = None
1440 start_time = None
1441 self._metadata = {}
1442 self._locs = np.zeros((0, 2), dtype=int)
1443 self._labels = np.zeros((0, 2), dtype=object)
1444 if end_indices is not None:
1445 self.filepath = filepaths[0]
1446 self.file_paths = filepaths
1447 self.audio_files = [None] * len(filepaths)
1448 self.frames = end_indices[-1]
1449 self.start_indices = [0] + list(end_indices[:-1])
1450 self.end_indices = end_indices
1451 self.format = None
1452 self.encoding = None
1453 self.rate = rate
1454 self.channels = channels
1455 else:
1456 for filepath in filepaths:
1457 try:
1458 a = AudioLoader(filepath, buffersize, backsize, verbose)
1459 except Exception as e:
1460 if verbose > 0:
1461 print(e)
1462 continue
1463 # collect metadata:
1464 md = a.metadata()
1465 fmd = flatten_metadata(md, True)
1466 add_metadata(self._metadata, fmd)
1467 if self.filepath is None:
1468 # first file:
1469 self.filepath = a.filepath
1470 self.format = a.format
1471 self.encoding = a.encoding
1472 self.rate = a.rate
1473 self.channels = a.channels
1474 self.start_time = get_datetime(md)
1475 start_time = self.start_time
1476 stime = self.start_time
1477 else:
1478 # check channels and rate:
1479 error_str = None
1480 if a.channels != self.channels:
1481 error_str = f'number of channels differs: ' \
1482 f'{a.channels} in {a.filepath} versus ' \
1483 f'{self.channels} in {self.filepath}'
1484 if a.rate != self.rate:
1485 error_str = f'sampling rates differ: ' \
1486 f'{a.rate} in {a.filepath} versus ' \
1487 f'{self.rate} in {self.filepath}'
1488 # check start time of recording:
1489 stime = get_datetime(md)
1490 if start_time is None or stime is None or \
1491 abs(start_time - stime) > timedelta(seconds=self._max_time_diff):
1492 error_str = f'start time does not indicate continuous recording: ' \
1493 f'expected {start_time} instead of ' \
1494 f'{stime} in {a.filepath}'
1495 if error_str is not None:
1496 if verbose > 0:
1497 print(error_str)
1498 a.close()
1499 del a
1500 break
1501 # markers:
1502 locs, labels = a.markers()
1503 locs[:,0] += self.frames
1504 self._locs = np.vstack((self._locs, locs))
1505 self._labels = np.vstack((self._labels, labels))
1506 # indices:
1507 self.start_indices.append(self.frames)
1508 self.frames += a.frames
1509 self.end_indices.append(self.frames)
1510 if stime is not None:
1511 start_time = stime + timedelta(seconds=a.frames/a.rate)
1512 # add file to lists:
1513 self.file_paths.append(filepath)
1514 if len(self.open_files) < AudioLoader.max_open_files:
1515 self.open_files.append(a)
1516 else:
1517 a.close()
1518 if len(self.open_loaders) < AudioLoader.max_open_loaders:
1519 self.audio_files.append(a)
1520 self.open_loaders.append(a)
1521 else:
1522 a.close()
1523 del a
1524 self.audio_files.append(None)
1525 if len(self.audio_files) == 0:
1526 raise FileNotFoundError('input argument filepaths does not contain any valid audio file!')
1527 # set startime from first file:
1528 if self.start_time is not None:
1529 set_starttime(self._metadata, self.start_time)
1530 # setup infrastructure:
1531 self.file_indices = self.start_indices
1532 self.start_indices = np.array(self.start_indices)
1533 self.end_indices = np.array(self.end_indices)
1534 self.shape = (self.frames, self.channels)
1535 self.bufferframes = int(buffersize*self.rate)
1536 self.backframes = int(backsize*self.rate)
1537 self.init_buffer()
1538 self.close = self._close_multiple
1539 self.load_audio_buffer = self._load_buffer_multiple
1540 self._load_metadata = None
1541 self._load_markers = None
1542 return self
1544 def _close_multiple(self):
1545 """Close all the audio files. """
1546 self.open_files = []
1547 self.open_loaders = []
1548 if hasattr(self, 'audio_files'):
1549 for a in self.audio_files:
1550 if a is not None:
1551 a.close()
1552 self.audio_files = []
1553 self.filepath = None
1554 self.file_paths = []
1555 self.file_indices = []
1556 self.start_indices = []
1557 self.end_indices = []
1558 del self.audio_files
1559 del self.open_files
1560 del self.open_loaders
1561 del self.start_indices
1562 del self.end_indices
1564 def _load_buffer_multiple(self, r_offset, r_size, buffer):
1565 """Load new data from the underlying files.
1567 Parameters
1568 ----------
1569 r_offset: int
1570 First frame to be read from file.
1571 r_size: int
1572 Number of frames to be read from file.
1573 buffer: ndarray
1574 Buffer where to store the loaded data.
1575 """
1576 offs = r_offset
1577 size = r_size
1578 boffs = 0
1579 ai = np.searchsorted(self.end_indices, offs, side='right')
1580 while size > 0:
1581 if self.audio_files[ai] is None:
1582 a = AudioLoader(self.file_paths[ai],
1583 self.buffersize, self.backsize, 0)
1584 self.audio_files[ai] = a
1585 self.open_loaders.append(a)
1586 self.open_files.append(a)
1587 if len(self.open_files) > AudioLoader.max_open_files:
1588 a0 = self.open_files.pop(0)
1589 a0.close()
1590 if len(self.open_loaders) > AudioLoader.max_open_loaders:
1591 a0 = self.open_loaders.pop(0)
1592 self.audio_files[self.audio_files.index(a0)] = None
1593 a0.close()
1594 del a0
1595 self.collect_counter += 1
1596 if self.collect_counter > AudioLoader.max_open_loaders//2:
1597 gc.collect()
1598 self.collect_counter = 0
1599 else:
1600 self.open_loaders.pop(self.open_loaders.index(self.audio_files[ai]))
1601 self.open_loaders.append(self.audio_files[ai])
1602 ai0 = offs - self.start_indices[ai]
1603 ai1 = offs + size
1604 if ai1 > self.end_indices[ai]:
1605 ai1 = self.end_indices[ai]
1606 ai1 -= self.start_indices[ai]
1607 n = ai1 - ai0
1608 self.audio_files[ai].load_audio_buffer(ai0, n,
1609 buffer[boffs:boffs + n,:])
1610 if self.audio_files[ai] in self.open_files:
1611 self.open_files.pop(self.open_files.index(self.audio_files[ai]))
1612 self.open_files.append(self.audio_files[ai])
1613 if len(self.open_files) > AudioLoader.max_open_files:
1614 self.open_files[0].close()
1615 self.open_files.pop(0)
1616 boffs += n
1617 offs += n
1618 size -= n
1619 ai += 1
1622 def open(self, filepath, buffersize=10.0, backsize=0.0,
1623 verbose=0, **kwargs):
1624 """Open audio file for reading.
1626 Parameters
1627 ----------
1628 filepath: str or list of str
1629 Name of the file or list of many file names that should be
1630 made accessible as a single array.
1631 buffersize: float
1632 Size of internal buffer in seconds.
1633 backsize: float
1634 Part of the buffer to be loaded before the requested start index in seconds.
1635 verbose: int
1636 If larger than zero show detailed error/warning messages.
1637 **kwargs: dict
1638 Further keyword arguments that are passed on to the
1639 specific opening functions. Only used by open_multiple() so far.
1641 Raises
1642 ------
1643 ValueError
1644 Empty `filepath`.
1645 FileNotFoundError
1646 `filepath` is not an existing file.
1647 EOFError
1648 File size of `filepath` is zero.
1649 IOError
1650 Failed to load data.
1652 """
1653 self.buffer = np.array([])
1654 self.rate = 0.0
1655 if not filepath:
1656 raise ValueError('input argument filepath is empty string!')
1657 if isinstance(filepath, (list, tuple, np.ndarray)):
1658 if len(filepath) > 1:
1659 self.open_multiple(filepath, buffersize, backsize, verbose)
1660 if len(self.file_paths) > 1:
1661 return self
1662 filepath = self.file_paths[0]
1663 self.close()
1664 else:
1665 filepath = filepath[0]
1666 if not os.path.isfile(filepath):
1667 raise FileNotFoundError(f'file "{filepath}" not found')
1668 if os.path.getsize(filepath) <= 0:
1669 raise EOFError(f'file "{filepath}" is empty (size=0)!')
1670 # list of implemented open functions:
1671 audio_open_funcs = (
1672 ('soundfile', self.open_soundfile),
1673 ('wave', self.open_wave),
1674 ('wavefile', self.open_wavefile),
1675 ('ewave', self.open_ewave),
1676 ('audioread', self.open_audioread),
1677 )
1678 # open an audio file by trying various modules:
1679 not_installed = []
1680 errors = [f'failed to load data from file "{filepath}":']
1681 for lib, open_file in audio_open_funcs:
1682 if not audio_modules[lib]:
1683 if verbose > 1:
1684 print(f'unable to load data from file "{filepath}" using {lib} module: module not available')
1685 not_installed.append(lib)
1686 continue
1687 try:
1688 open_file(filepath, buffersize, backsize, verbose-1, **kwargs)
1689 if self.frames > 0:
1690 if verbose > 0:
1691 print(f'opened audio file "{filepath}" using {lib}')
1692 if verbose > 1:
1693 if self.format is not None:
1694 print(f' format : {self.format}')
1695 if self.encoding is not None:
1696 print(f' encoding : {self.encoding}')
1697 print(f' sampling rate: {self.rate} Hz')
1698 print(f' channels : {self.channels}')
1699 print(f' frames : {self.frames}')
1700 return self
1701 except Exception as e:
1702 errors.append(f' {lib} failed: {str(e)}')
1703 if verbose > 1:
1704 print(errors[-1])
1705 if len(not_installed) > 0:
1706 errors.append('\n You may need to install one of the ' + \
1707 ', '.join(not_installed) + ' packages.')
1708 raise IOError('\n'.join(errors))
1709 return self
1712def demo(file_path, plot):
1713 """Demo of the audioloader functions.
1715 Parameters
1716 ----------
1717 file_path: str
1718 File path of an audio file.
1719 plot: bool
1720 If True also plot the loaded data.
1721 """
1722 print('')
1723 print("try load_audio:")
1724 full_data, rate = load_audio(file_path, 1)
1725 if plot:
1726 plt.plot(np.arange(len(full_data))/rate, full_data[:,0])
1727 plt.show()
1729 if audio_modules['soundfile'] and audio_modules['audioread']:
1730 print('')
1731 print("cross check:")
1732 data1, rate1 = load_soundfile(file_path)
1733 data2, rate2 = load_audioread(file_path)
1734 n = min((len(data1), len(data2)))
1735 print(f"rms difference is {np.std(data1[:n]-data2[:n])}")
1736 if plot:
1737 plt.plot(np.arange(len(data1))/rate1, data1[:,0])
1738 plt.plot(np.arange(len(data2))/rate2, data2[:,0])
1739 plt.show()
1741 print('')
1742 print("try AudioLoader:")
1743 with AudioLoader(file_path, 4.0, 1.0, verbose=1) as data:
1744 print(f'samplerate: {data.rate:0f}Hz')
1745 print(f'channels: {data.channels} {data.shape[1]}')
1746 print(f'frames: {len(data)} {data.shape[0]}')
1747 nframes = int(1.5*data.rate)
1748 # check access:
1749 print('check random single frame access')
1750 for inx in np.random.randint(0, len(data), 1000):
1751 if np.any(np.abs(full_data[inx] - data[inx]) > 2.0**(-14)):
1752 print('single random frame access failed', inx, full_data[inx], data[inx])
1753 print('check random frame slice access')
1754 for inx in np.random.randint(0, len(data)-nframes, 1000):
1755 if np.any(np.abs(full_data[inx:inx+nframes] - data[inx:inx+nframes]) > 2.0**(-14)):
1756 print('random frame slice access failed', inx)
1757 print('check frame slice access forward')
1758 for inx in range(0, len(data)-nframes, 10):
1759 if np.any(np.abs(full_data[inx:inx+nframes] - data[inx:inx+nframes]) > 2.0**(-14)):
1760 print('frame slice access forward failed', inx)
1761 print('check frame slice access backward')
1762 for inx in range(len(data)-nframes, 0, -10):
1763 if np.any(np.abs(full_data[inx:inx+nframes] - data[inx:inx+nframes]) > 2.0**(-14)):
1764 print('frame slice access backward failed', inx)
1765 # forward:
1766 for i in range(0, len(data), nframes):
1767 print(f'forward {i}-{i+nframes}')
1768 x = data[i:i+nframes,0]
1769 if plot:
1770 plt.plot((i+np.arange(len(x)))/rate, x)
1771 plt.show()
1772 # and backwards:
1773 for i in reversed(range(0, len(data), nframes)):
1774 print(f'backward {i}-{i+nframes}')
1775 x = data[i:i+nframes,0]
1776 if plot:
1777 plt.plot((i+np.arange(len(x)))/rate, x)
1778 plt.show()
1781def main(*args):
1782 """Call demo with command line arguments.
1784 Parameters
1785 ----------
1786 args: list of str
1787 Command line arguments as provided by sys.argv[1:]
1788 """
1789 print("Checking audioloader module ...")
1791 help = False
1792 plot = False
1793 file_path = None
1794 mod = False
1795 for arg in args:
1796 if mod:
1797 if not select_module(arg):
1798 print(f'can not select module {arg} that is not installed')
1799 return
1800 mod = False
1801 elif arg == '-h':
1802 help = True
1803 break
1804 elif arg == '-p':
1805 plot = True
1806 elif arg == '-m':
1807 mod = True
1808 else:
1809 file_path = arg
1810 break
1812 if help:
1813 print('')
1814 print('Usage:')
1815 print(' python -m src.audioio.audioloader [-m <module>] [-p] <audio/file.wav>')
1816 print(' -m: audio module to be used')
1817 print(' -p: plot loaded data')
1818 return
1820 if plot:
1821 import matplotlib.pyplot as plt
1823 demo(file_path, plot)
1826if __name__ == "__main__":
1827 main(*sys.argv[1:])