Coverage for src/audioio/audioloader.py: 92%
608 statements
« prev ^ index » next coverage.py v7.6.3, created at 2024-10-15 07:29 +0000
« prev ^ index » next coverage.py v7.6.3, created at 2024-10-15 07:29 +0000
1"""Loading data, metadata, and markers from audio files.
3- `load_audio()`: load a whole audio file at once.
4- `metadata()`: read metadata of an audio file.
5- `markers()`: read markers of an audio file.
6- class `AudioLoader`: read data from audio files in chunks.
8The read in data are always numpy arrays of floats ranging between -1 and 1.
9The arrays are 2-D ndarrays with first axis time and second axis channel,
10even for single channel data.
12If an audio file cannot be loaded, you might need to install
13additional packages. See
14[installation](https://bendalab.github.io/audioio/installation) for
15further instructions.
17For a demo run the module as:
18```
19python -m src.audioio.audioloader audiofile.wav
20```
21"""
23import sys
24import warnings
25import os.path
26import numpy as np
27from .audiomodules import *
28from .bufferedarray import BufferedArray
29from .riffmetadata import metadata_riff, markers_riff
30from .audiometadata import update_gain, add_unwrap
31from .audiotools import unwrap
34def load_wave(filepath):
35 """Load wav file using the wave module from pythons standard libray.
37 Documentation
38 -------------
39 https://docs.python.org/3.8/library/wave.html
41 Parameters
42 ----------
43 filepath: str
44 The full path and name of the file to load.
46 Returns
47 -------
48 data: ndarray
49 All data traces as an 2-D ndarray, first dimension is time, second is channel
50 rate: float
51 The sampling rate of the data in Hertz.
53 Raises
54 ------
55 ImportError
56 The wave module is not installed
57 *
58 Loading of the data failed
59 """
60 if not audio_modules['wave']:
61 raise ImportError
63 wf = wave.open(filepath, 'r') # 'with' is not supported by wave
64 (nchannels, sampwidth, rate, nframes, comptype, compname) = wf.getparams()
65 buffer = wf.readframes(nframes)
66 factor = 2.0**(sampwidth*8-1)
67 if sampwidth == 1:
68 dtype = 'u1'
69 buffer = np.frombuffer(buffer, dtype=dtype).reshape(-1, nchannels)
70 data = buffer.astype('d')/factor - 1.0
71 else:
72 dtype = f'i{sampwidth}'
73 buffer = np.frombuffer(buffer, dtype=dtype).reshape(-1, nchannels)
74 data = buffer.astype('d')/factor
75 wf.close()
76 return data, float(rate)
79def load_ewave(filepath):
80 """Load wav file using ewave module.
82 Documentation
83 -------------
84 https://github.com/melizalab/py-ewave
86 Parameters
87 ----------
88 filepath: str
89 The full path and name of the file to load.
91 Returns
92 -------
93 data: ndarray
94 All data traces as an 2-D ndarray, first dimension is time, second is channel.
95 rate: float
96 The sampling rate of the data in Hertz.
98 Raises
99 ------
100 ImportError
101 The ewave module is not installed
102 *
103 Loading of the data failed
104 """
105 if not audio_modules['ewave']:
106 raise ImportError
108 data = np.array([])
109 rate = 0.0
110 with ewave.open(filepath, 'r') as wf:
111 rate = wf.sampling_rate
112 buffer = wf.read()
113 data = ewave.rescale(buffer, 'float')
114 if len(data.shape) == 1:
115 data = np.reshape(data,(-1, 1))
116 return data, float(rate)
119def load_wavfile(filepath):
120 """Load wav file using scipy.io.wavfile.
122 Documentation
123 -------------
124 http://docs.scipy.org/doc/scipy/reference/io.html
125 Does not support blocked read.
127 Parameters
128 ----------
129 filepath: str
130 The full path and name of the file to load.
132 Returns
133 -------
134 data: ndarray
135 All data traces as an 2-D ndarray, first dimension is time, second is channel.
136 rate: float
137 The sampling rate of the data in Hertz.
139 Raises
140 ------
141 ImportError
142 The scipy.io module is not installed
143 *
144 Loading of the data failed
145 """
146 if not audio_modules['scipy.io.wavfile']:
147 raise ImportError
149 warnings.filterwarnings("ignore")
150 rate, data = wavfile.read(filepath)
151 warnings.filterwarnings("always")
152 if data.dtype == np.uint8:
153 data = data / 128.0 - 1.0
154 elif np.issubdtype(data.dtype, np.signedinteger):
155 data = data / (2.0**(data.dtype.itemsize*8-1))
156 else:
157 data = data.astype(np.float64, copy=False)
158 if len(data.shape) == 1:
159 data = np.reshape(data,(-1, 1))
160 return data, float(rate)
163def load_soundfile(filepath):
164 """Load audio file using SoundFile (based on libsndfile).
166 Documentation
167 -------------
168 http://pysoundfile.readthedocs.org
169 http://www.mega-nerd.com/libsndfile
171 Parameters
172 ----------
173 filepath: str
174 The full path and name of the file to load.
176 Returns
177 -------
178 data: ndarray
179 All data traces as an 2-D ndarray, first dimension is time, second is channel.
180 rate: float
181 The sampling rate of the data in Hertz.
183 Raises
184 ------
185 ImportError
186 The soundfile module is not installed.
187 *
188 Loading of the data failed.
189 """
190 if not audio_modules['soundfile']:
191 raise ImportError
193 data = np.array([])
194 rate = 0.0
195 with soundfile.SoundFile(filepath, 'r') as sf:
196 rate = sf.samplerate
197 data = sf.read(frames=-1, dtype='float64', always_2d=True)
198 return data, float(rate)
201def load_wavefile(filepath):
202 """Load audio file using wavefile (based on libsndfile).
204 Documentation
205 -------------
206 https://github.com/vokimon/python-wavefile
208 Parameters
209 ----------
210 filepath: str
211 The full path and name of the file to load.
213 Returns
214 -------
215 data: ndarray
216 All data traces as an 2-D ndarray, first dimension is time, second is channel.
217 rate: float
218 The sampling rate of the data in Hertz.
220 Raises
221 ------
222 ImportError
223 The wavefile module is not installed.
224 *
225 Loading of the data failed.
226 """
227 if not audio_modules['wavefile']:
228 raise ImportError
230 rate, data = wavefile.load(filepath)
231 return data.astype(np.float64, copy=False).T, float(rate)
234def load_audioread(filepath):
235 """Load audio file using audioread.
237 Documentation
238 -------------
239 https://github.com/beetbox/audioread
241 Parameters
242 ----------
243 filepath: str
244 The full path and name of the file to load.
246 Returns
247 -------
248 data: ndarray
249 All data traces as an 2-D ndarray, first dimension is time, second is channel.
250 rate: float
251 The sampling rate of the data in Hertz.
253 Raises
254 ------
255 ImportError
256 The audioread module is not installed.
257 *
258 Loading of the data failed.
259 """
260 if not audio_modules['audioread']:
261 raise ImportError
263 data = np.array([])
264 rate = 0.0
265 with audioread.audio_open(filepath) as af:
266 rate = af.samplerate
267 data = np.zeros((int(np.ceil(af.samplerate*af.duration)), af.channels),
268 dtype="<i2")
269 index = 0
270 for buffer in af:
271 fulldata = np.frombuffer(buffer, dtype='<i2').reshape(-1, af.channels)
272 n = fulldata.shape[0]
273 if index + n > len(data):
274 n = len(fulldata) - index
275 if n <= 0:
276 break
277 data[index:index+n,:] = fulldata[:n,:]
278 index += n
279 return data/(2.0**15-1.0), float(rate)
282audio_loader_funcs = (
283 ('soundfile', load_soundfile),
284 ('wave', load_wave),
285 ('wavefile', load_wavefile),
286 ('ewave', load_ewave),
287 ('scipy.io.wavfile', load_wavfile),
288 ('audioread', load_audioread),
289 )
290"""List of implemented load() functions.
292Each element of the list is a tuple with the module's name and its
293load() function.
295"""
298def load_audio(filepath, verbose=0):
299 """Call this function to load all channels of audio data from a file.
301 This function tries different python modules to load the audio file.
303 Parameters
304 ----------
305 filepath: str
306 The full path and name of the file to load.
307 verbose: int
308 If larger than zero show detailed error/warning messages.
310 Returns
311 -------
312 data: ndarray
313 All data traces as an 2-D ndarray, even for single channel data.
314 First dimension is time, second is channel.
315 Data values range maximally between -1 and 1.
316 rate: float
317 The sampling rate of the data in Hertz.
319 Raises
320 ------
321 ValueError
322 Empty `filepath`.
323 FileNotFoundError
324 `filepath` is not an existing file.
325 EOFError
326 File size of `filepath` is zero.
327 IOError
328 Failed to load data.
330 Examples
331 --------
332 ```
333 import matplotlib.pyplot as plt
334 from audioio import load_audio
336 data, rate = load_audio('some/audio.wav')
337 plt.plot(np.arange(len(data))/rate, data[:,0])
338 plt.show()
339 ```
340 """
341 # check values:
342 if filepath is None or len(filepath) == 0:
343 raise ValueError('input argument filepath is empty string!')
344 if not os.path.isfile(filepath):
345 raise FileNotFoundError(f'file "{filepath}" not found')
346 if os.path.getsize(filepath) <= 0:
347 raise EOFError(f'file "{filepath}" is empty (size=0)!')
349 # load an audio file by trying various modules:
350 not_installed = []
351 errors = [f'failed to load data from file "{filepath}":']
352 for lib, load_file in audio_loader_funcs:
353 if not audio_modules[lib]:
354 if verbose > 1:
355 print(f'unable to load data from file "{filepath}" using {lib} module: module not available')
356 not_installed.append(lib)
357 continue
358 try:
359 data, rate = load_file(filepath)
360 if len(data) > 0:
361 if verbose > 0:
362 print(f'loaded data from file "{filepath}" using {lib} module')
363 if verbose > 1:
364 print(f' sampling rate: {rate:g} Hz')
365 print(f' channels : {data.shape[1]}')
366 print(f' frames : {len(data)}')
367 return data, rate
368 except Exception as e:
369 errors.append(f' {lib} failed: {str(e)}')
370 if verbose > 1:
371 print(errors[-1])
372 if len(not_installed) > 0:
373 errors.append('\n You may need to install one of the ' + \
374 ', '.join(not_installed) + ' packages.')
375 raise IOError('\n'.join(errors))
376 return np.zeros(0), 0.0
379def metadata(filepath, store_empty=False):
380 """Read metadata of an audio file.
382 Parameters
383 ----------
384 filepath: str or file handle
385 The audio file from which to read metadata.
386 store_empty: bool
387 If `False` do not return meta data with empty values.
389 Returns
390 -------
391 meta_data: nested dict
392 Meta data contained in the audio file. Keys of the nested
393 dictionaries are always strings. If the corresponding values
394 are dictionaries, then the key is the section name of the
395 metadata contained in the dictionary. All other types of
396 values are values for the respective key. In particular they
397 are strings. But other types like for example ints or floats
398 are also allowed. See `audioio.audiometadata` module for
399 available functions to work with such metadata.
401 Examples
402 --------
403 ```
404 from audioio import metadata, print_metadata
405 md = metadata('data.wav')
406 print_metadata(md)
407 ```
409 """
410 try:
411 return metadata_riff(filepath, store_empty)
412 except ValueError: # not a RIFF file
413 return {}
416def markers(filepath):
417 """ Read markers of an audio file.
419 See `audioio.audiomarkers` module for available functions
420 to work with markers.
422 Parameters
423 ----------
424 filepath: str or file handle
425 The audio file.
427 Returns
428 -------
429 locs: 2-D ndarray of int
430 Marker positions (first column) and spans (second column)
431 for each marker (rows).
432 labels: 2-D ndarray of string objects
433 Labels (first column) and texts (second column)
434 for each marker (rows).
436 Examples
437 --------
438 ```
439 from audioio import markers, print_markers
440 locs, labels = markers('data.wav')
441 print_markers(locs, labels)
442 ```
443 """
444 try:
445 return markers_riff(filepath)
446 except ValueError: # not a RIFF file
447 return np.zeros((0, 2), dtype=int), np.zeros((0, 2), dtype=object)
450class AudioLoader(BufferedArray):
451 """Buffered reading of audio data for random access of the data in the file.
453 The class allows for reading very large audio files that do not
454 fit into memory.
455 An AudioLoader instance can be used like a huge read-only numpy array, i.e.
456 ```
457 data = AudioLoader('path/to/audio/file.wav')
458 x = data[10000:20000,0]
459 ```
460 The first index specifies the frame, the second one the channel.
462 Behind the scenes, `AudioLoader` tries to open the audio file with
463 all available audio modules until it succeeds (first line). It
464 then reads data from the file as necessary for the requested data
465 (second line). Accesing the content of the audio files via a
466 buffer that holds only a part of the data is managed by the
467 `BufferedArray` class.
469 Reading sequentially through the file is always possible. Some
470 modules, however, (e.g. audioread, needed for mp3 files) can only
471 read forward. If previous data are requested, then the file is read
472 from the beginning again. This slows down access to previous data
473 considerably. Use the `backsize` argument of the open function to
474 make sure some data are loaded into the buffer before the requested
475 frame. Then a subsequent access to the data within `backsize` seconds
476 before that frame can still be handled without the need to reread
477 the file from the beginning.
479 Usage
480 -----
481 With context management:
482 ```
483 import audioio as aio
484 with aio.AudioLoader(filepath, 60.0, 10.0) as data:
485 # do something with the content of the file:
486 x = data[0:10000]
487 y = data[10000:20000]
488 z = x + y
489 ```
491 For using a specific audio module, here the audioread module:
492 ```
493 data = aio.AudioLoader()
494 with data.open_audioread(filepath, 60.0, 10.0):
495 # do something ...
496 ```
498 Use `blocks()` for sequential, blockwise reading and processing:
499 ```
500 from scipy.signal import spectrogram
501 nfft = 2048
502 with aio.AudioLoader('some/audio.wav') as data:
503 for x in data.blocks(100*nfft, nfft//2):
504 f, t, Sxx = spectrogram(x, fs=data.rate,
505 nperseg=nfft, noverlap=nfft//2)
506 ```
508 For loop iterates over single frames (1-D arrays containing samples for each channel):
509 ```
510 with aio.AudioLoader('some/audio.wav') as data:
511 for x in data:
512 print(x)
513 ```
515 Traditional open and close:
516 ```
517 data = aio.AudioLoader(filepath, 60.0)
518 x = data[:,:] # read the whole file
519 data.close()
520 ```
522 this is the same as:
523 ```
524 data = aio.AudioLoader()
525 data.open(filepath, 60.0)
526 ...
527 ```
529 Classes inheriting AudioLoader just need to implement
530 ```
531 self.load_audio_buffer(offset, nsamples, pbuffer)
532 ```
533 This function needs to load the supplied `pbuffer` with
534 `nframes` frames of data starting at frame `offset`.
536 In the constructor or some kind of opening function, you need to
537 set some member variables, as described for `BufferedArray`.
539 Parameters
540 ----------
541 filepath: str
542 Name of the file.
543 buffersize: float
544 Size of internal buffer in seconds.
545 backsize: float
546 Part of the buffer to be loaded before the requested start index in seconds.
547 verbose: int
548 If larger than zero show detailed error/warning messages.
549 store_empty: bool
550 If `False` do not return meta data with empty values.
552 Attributes
553 ----------
554 filepath: str
555 Path and name of the file.
556 rate: float
557 The sampling rate of the data in seconds.
558 channels: int
559 The number of channels.
560 frames: int
561 The number of frames in the file. Same as `len()`.
562 format: str or None
563 Format of the audio file.
564 encoding: str or None
565 Encoding/subtype of the audio file.
566 shape: tuple
567 Frames and channels of the data.
568 ndim: int
569 Number of dimensions: always 2 (frames and channels).
570 offset: int
571 Index of first frame in the current buffer.
572 buffer: ndarray of floats
573 The curently available data from the file.
574 ampl_min: float
575 Minimum amplitude the file format supports.
576 Always -1.0 for audio data.
577 ampl_max: float
578 Maximum amplitude the file format supports.
579 Always +1.0 for audio data.
581 Methods
582 -------
583 - `len()`: Number of frames.
584 - `open()`: Open an audio file by trying available audio modules.
585 - `open_*()`: Open an audio file with the respective audio module.
586 - `__getitem__`: Access data of the audio file.
587 - `update_buffer()`: Update the internal buffer for a range of frames.
588 - `blocks()`: Generator for blockwise processing of AudioLoader data.
589 - `format_dict()`: technical infos about how the data are stored.
590 - `metadata()`: Metadata stored along with the audio data.
591 - `markers()`: Markers stored along with the audio data.
592 - `set_unwrap()`: Set parameters for unwrapping clipped data.
593 - `close()`: Close the file.
595 Notes
596 -----
597 Access via `__getitem__` or `__next__` is slow!
598 Even worse, using numpy functions on this class first converts
599 it to a numpy array - that is something we actually do not want!
600 We should subclass directly from numpy.ndarray .
601 For details see http://docs.scipy.org/doc/numpy/user/basics.subclassing.html
602 When subclassing, there is an offset argument, that might help to
603 speed up `__getitem__` .
605 """
607 def __init__(self, filepath=None, buffersize=10.0, backsize=0.0,
608 verbose=0, **meta_kwargs):
609 super().__init__(verbose=verbose)
610 self.format = None
611 self.encoding = None
612 self._metadata = None
613 self._locs = None
614 self._labels = None
615 self._load_metadata = metadata
616 self._load_markers = markers
617 self._metadata_kwargs = meta_kwargs
618 self.filepath = None
619 self.sf = None
620 self.close = self._close
621 self.load_buffer = self._load_buffer_unwrap
622 self.ampl_min = -1.0
623 self.ampl_max = +1.0
624 self.unwrap = False
625 self.unwrap_thresh = 0.0
626 self.unwrap_clips = False
627 self.unwrap_ampl = 1.0
628 self.unwrap_downscale = True
629 if filepath is not None:
630 self.open(filepath, buffersize, backsize, verbose)
632 numpy_encodings = {np.dtype(np.int64): 'PCM_64',
633 np.dtype(np.int32): 'PCM_32',
634 np.dtype(np.int16): 'PCM_16',
635 np.dtype(np.single): 'FLOAT',
636 np.dtype(np.double): 'DOUBLE',
637 np.dtype('>f4'): 'FLOAT',
638 np.dtype('>f8'): 'DOUBLE'}
639 """ Map numpy dtypes to encodings.
640 """
642 def _close(self):
643 pass
645 def __del__(self):
646 self.close()
648 def format_dict(self):
649 """ Technical infos about how the data are stored in the file.
651 Returns
652 -------
653 fmt: dict
654 Dictionary with filepath, format, encoding, samplingrate,
655 channels, frames, and duration of the audio file as strings.
657 """
658 fmt = dict(filepath=self.filepath)
659 if self.format is not None:
660 fmt['format'] = self.format
661 if self.encoding is not None:
662 fmt['encoding'] = self.encoding
663 fmt.update(dict(samplingrate=f'{self.rate:.0f}Hz',
664 channels=self.channels,
665 frames=self.frames,
666 duration=f'{self.frames/self.rate:.3f}s'))
667 return fmt
669 def metadata(self):
670 """Metadata of the audio file.
672 Parameters
673 ----------
674 store_empty: bool
675 If `False` do not add meta data with empty values.
677 Returns
678 -------
679 meta_data: nested dict
681 Meta data contained in the audio file. Keys of the nested
682 dictionaries are always strings. If the corresponding
683 values are dictionaries, then the key is the section name
684 of the metadata contained in the dictionary. All other
685 types of values are values for the respective key. In
686 particular they are strings. But other types like for
687 example ints or floats are also allowed. See
688 `audioio.audiometadata` module for available functions to
689 work with such metadata.
691 """
692 if self._metadata is None:
693 if self._load_metadata is None:
694 self._metadata = {}
695 else:
696 self._metadata = self._load_metadata(self.filepath,
697 **self._metadata_kwargs)
698 return self._metadata
700 def markers(self):
701 """Read markers of the audio file.
703 See `audioio.audiomarkers` module for available functions
704 to work with markers.
706 Returns
707 -------
708 locs: 2-D ndarray of int
709 Marker positions (first column) and spans (second column)
710 for each marker (rows).
711 labels: 2-D ndarray of str objects
712 Labels (first column) and texts (second column)
713 for each marker (rows).
714 """
715 if self._locs is None:
716 if self._load_markers is None:
717 self._locs = np.zeros((0, 2), dtype=int)
718 self._labels = np.zeros((0, 2), dtype=object)
719 else:
720 self._locs, self._labels = self._load_markers(self.filepath)
721 return self._locs, self._labels
723 def set_unwrap(self, thresh, clips=False, down_scale=True, unit=''):
724 """Set parameters for unwrapping clipped data.
726 See unwrap() function from the audioio package.
728 Parameters
729 ----------
730 thresh: float
731 Threshold for detecting wrapped data relative to self.unwrap_ampl
732 which is initially set to self.ampl_max.
733 If zero, do not unwrap.
734 clips: bool
735 If True, then clip the unwrapped data properly.
736 Otherwise, unwrap the data and double the
737 minimum and maximum data range
738 (self.ampl_min and self.ampl_max).
739 down_scale: bool
740 If not `clip`, then downscale the signal by a factor of two,
741 in order to keep the range between -1 and 1.
742 unit: str
743 Unit of the data.
744 """
745 self.unwrap_ampl = self.ampl_max
746 self.unwrap_thresh = thresh
747 self.unwrap_clips = clips
748 self.unwrap_down_scale = down_scale
749 self.unwrap = thresh > 1e-3
750 if self.unwrap:
751 if self.unwrap_clips:
752 add_unwrap(self.metadata(),
753 self.unwrap_thresh*self.unwrap_ampl,
754 self.unwrap_ampl, unit)
755 elif down_scale:
756 update_gain(self.metadata(), 0.5)
757 add_unwrap(self.metadata(),
758 0.5*self.unwrap_thresh*self.unwrap_ampl,
759 0.0, unit)
760 else:
761 self.ampl_min *= 2
762 self.ampl_max *= 2
763 add_unwrap(self.metadata(),
764 self.unwrap_thresh*self.unwrap_ampl,
765 0.0, unit)
767 def _load_buffer_unwrap(self, r_offset, r_size, pbuffer):
768 """Load new data and unwrap it.
770 Parameters
771 ----------
772 r_offset: int
773 First frame to be read from file.
774 r_size: int
775 Number of frames to be read from file.
776 pbuffer: ndarray
777 Buffer where to store the loaded data.
778 """
779 self.load_audio_buffer(r_offset, r_size, pbuffer)
780 if self.unwrap:
781 # TODO: handle edge effects!
782 unwrap(pbuffer, self.unwrap_thresh, self.unwrap_ampl)
783 if self.unwrap_clips:
784 pbuffer[pbuffer > self.ampl_max] = self.ampl_max
785 pbuffer[pbuffer < self.ampl_min] = self.ampl_min
786 elif self.unwrap_down_scale:
787 pbuffer *= 0.5
790 # wave interface:
791 def open_wave(self, filepath, buffersize=10.0, backsize=0.0,
792 verbose=0):
793 """Open audio file for reading using the wave module.
795 Note: we assume that setpos() and tell() use integer numbers!
797 Parameters
798 ----------
799 filepath: str
800 Name of the file.
801 buffersize: float
802 Size of internal buffer in seconds.
803 backsize: float
804 Part of the buffer to be loaded before the requested start index in seconds.
805 verbose: int
806 If larger than zero show detailed error/warning messages.
808 Raises
809 ------
810 ImportError
811 The wave module is not installed
812 """
813 self.verbose = verbose
814 if self.verbose > 0:
815 print(f'open_wave(filepath) with filepath={filepath}')
816 if not audio_modules['wave']:
817 self.rate = 0.0
818 self.channels = 0
819 self.frames = 0
820 self.size = 0
821 self.shape = (0, 0)
822 self.offset = 0
823 raise ImportError
824 if self.sf is not None:
825 self._close_wave()
826 self.sf = wave.open(filepath, 'r')
827 self.filepath = filepath
828 self.rate = float(self.sf.getframerate())
829 self.format = 'WAV'
830 sampwidth = self.sf.getsampwidth()
831 if sampwidth == 1:
832 self.dtype = 'u1'
833 self.encoding = 'PCM_U8'
834 else:
835 self.dtype = f'i{sampwidth}'
836 self.encoding = f'PCM_{sampwidth*8}'
837 self.factor = 1.0/(2.0**(sampwidth*8-1))
838 self.channels = self.sf.getnchannels()
839 self.frames = self.sf.getnframes()
840 self.shape = (self.frames, self.channels)
841 self.size = self.frames * self.channels
842 self.bufferframes = int(buffersize*self.rate)
843 self.backframes = int(backsize*self.rate)
844 self.init_buffer()
845 self.close = self._close_wave
846 self.load_audio_buffer = self._load_buffer_wave
847 # read 1 frame to determine the unit of the position values:
848 self.p0 = self.sf.tell()
849 self.sf.readframes(1)
850 self.pfac = self.sf.tell() - self.p0
851 self.sf.setpos(self.p0)
852 return self
854 def _close_wave(self):
855 """Close the audio file using the wave module. """
856 if self.sf is not None:
857 self.sf.close()
858 self.sf = None
860 def _load_buffer_wave(self, r_offset, r_size, buffer):
861 """Load new data from file using the wave module.
863 Parameters
864 ----------
865 r_offset: int
866 First frame to be read from file.
867 r_size: int
868 Number of frames to be read from file.
869 buffer: ndarray
870 Buffer where to store the loaded data.
871 """
872 self.sf.setpos(r_offset*self.pfac + self.p0)
873 fbuffer = self.sf.readframes(r_size)
874 fbuffer = np.frombuffer(fbuffer, dtype=self.dtype).reshape((-1, self.channels))
875 if self.dtype[0] == 'u':
876 buffer[:, :] = fbuffer * self.factor - 1.0
877 else:
878 buffer[:, :] = fbuffer * self.factor
881 # ewave interface:
882 def open_ewave(self, filepath, buffersize=10.0, backsize=0.0,
883 verbose=0):
884 """Open audio file for reading using the ewave module.
886 Parameters
887 ----------
888 filepath: str
889 Name of the file.
890 buffersize: float
891 Size of internal buffer in seconds.
892 backsize: float
893 Part of the buffer to be loaded before the requested start index in seconds.
894 verbose: int
895 If larger than zero show detailed error/warning messages.
897 Raises
898 ------
899 ImportError
900 The ewave module is not installed.
901 """
902 self.verbose = verbose
903 if self.verbose > 0:
904 print(f'open_ewave(filepath) with filepath={filepath}')
905 if not audio_modules['ewave']:
906 self.rate = 0.0
907 self.channels = 0
908 self.frames = 0
909 self.shape = (0, 0)
910 self.size = 0
911 self.offset = 0
912 raise ImportError
913 if self.sf is not None:
914 self._close_ewave()
915 self.sf = ewave.open(filepath, 'r')
916 self.filepath = filepath
917 self.rate = float(self.sf.sampling_rate)
918 self.channels = self.sf.nchannels
919 self.frames = self.sf.nframes
920 self.shape = (self.frames, self.channels)
921 self.size = self.frames * self.channels
922 self.format = 'WAV' # or WAVEX?
923 self.encoding = self.numpy_encodings[self.sf.dtype]
924 self.bufferframes = int(buffersize*self.rate)
925 self.backframes = int(backsize*self.rate)
926 self.init_buffer()
927 self.close = self._close_ewave
928 self.load_audio_buffer = self._load_buffer_ewave
929 return self
931 def _close_ewave(self):
932 """Close the audio file using the ewave module. """
933 if self.sf is not None:
934 del self.sf
935 self.sf = None
937 def _load_buffer_ewave(self, r_offset, r_size, buffer):
938 """Load new data from file using the ewave module.
940 Parameters
941 ----------
942 r_offset: int
943 First frame to be read from file.
944 r_size: int
945 Number of frames to be read from file.
946 buffer: ndarray
947 Buffer where to store the loaded data.
948 """
949 fbuffer = self.sf.read(frames=r_size, offset=r_offset, memmap='r')
950 fbuffer = ewave.rescale(fbuffer, 'float')
951 if len(fbuffer.shape) == 1:
952 fbuffer = np.reshape(fbuffer,(-1, 1))
953 buffer[:,:] = fbuffer
956 # soundfile interface:
957 def open_soundfile(self, filepath, buffersize=10.0, backsize=0.0,
958 verbose=0):
959 """Open audio file for reading using the SoundFile module.
961 Parameters
962 ----------
963 filepath: str
964 Name of the file.
965 bufferframes: float
966 Size of internal buffer in seconds.
967 backsize: float
968 Part of the buffer to be loaded before the requested start index in seconds.
969 verbose: int
970 If larger than zero show detailed error/warning messages.
972 Raises
973 ------
974 ImportError
975 The SoundFile module is not installed
976 """
977 self.verbose = verbose
978 if self.verbose > 0:
979 print(f'open_soundfile(filepath) with filepath={filepath}')
980 if not audio_modules['soundfile']:
981 self.rate = 0.0
982 self.channels = 0
983 self.frames = 0
984 self.shape = (0, 0)
985 self.size = 0
986 self.offset = 0
987 raise ImportError
988 if self.sf is not None:
989 self._close_soundfile()
990 self.sf = soundfile.SoundFile(filepath, 'r')
991 self.filepath = filepath
992 self.rate = float(self.sf.samplerate)
993 self.channels = self.sf.channels
994 self.frames = 0
995 self.size = 0
996 if self.sf.seekable():
997 self.frames = self.sf.seek(0, soundfile.SEEK_END)
998 self.sf.seek(0, soundfile.SEEK_SET)
999 # TODO: if not seekable, we cannot handle that file!
1000 self.shape = (self.frames, self.channels)
1001 self.size = self.frames * self.channels
1002 self.format = self.sf.format
1003 self.encoding = self.sf.subtype
1004 self.bufferframes = int(buffersize*self.rate)
1005 self.backframes = int(backsize*self.rate)
1006 self.init_buffer()
1007 self.close = self._close_soundfile
1008 self.load_audio_buffer = self._load_buffer_soundfile
1009 return self
1011 def _close_soundfile(self):
1012 """Close the audio file using the SoundFile module. """
1013 if self.sf is not None:
1014 self.sf.close()
1015 self.sf = None
1017 def _load_buffer_soundfile(self, r_offset, r_size, buffer):
1018 """Load new data from file using the SoundFile module.
1020 Parameters
1021 ----------
1022 r_offset: int
1023 First frame to be read from file.
1024 r_size: int
1025 Number of frames to be read from file.
1026 buffer: ndarray
1027 Buffer where to store the loaded data.
1028 """
1029 self.sf.seek(r_offset, soundfile.SEEK_SET)
1030 buffer[:, :] = self.sf.read(r_size, always_2d=True)
1033 # wavefile interface:
1034 def open_wavefile(self, filepath, buffersize=10.0, backsize=0.0,
1035 verbose=0):
1036 """Open audio file for reading using the wavefile module.
1038 Parameters
1039 ----------
1040 filepath: str
1041 Name of the file.
1042 bufferframes: float
1043 Size of internal buffer in seconds.
1044 backsize: float
1045 Part of the buffer to be loaded before the requested start index in seconds.
1046 verbose: int
1047 If larger than zero show detailed error/warning messages.
1049 Raises
1050 ------
1051 ImportError
1052 The wavefile module is not installed
1053 """
1054 self.verbose = verbose
1055 if self.verbose > 0:
1056 print(f'open_wavefile(filepath) with filepath={filepath}')
1057 if not audio_modules['wavefile']:
1058 self.rate = 0.0
1059 self.channels = 0
1060 self.frames = 0
1061 self.shape = (0, 0)
1062 self.size = 0
1063 self.offset = 0
1064 raise ImportError
1065 if self.sf is not None:
1066 self._close_wavefile()
1067 self.sf = wavefile.WaveReader(filepath)
1068 self.filepath = filepath
1069 self.rate = float(self.sf.samplerate)
1070 self.channels = self.sf.channels
1071 self.frames = self.sf.frames
1072 self.shape = (self.frames, self.channels)
1073 self.size = self.frames * self.channels
1074 # get format and encoding:
1075 for attr in dir(wavefile.Format):
1076 v = getattr(wavefile.Format, attr)
1077 if isinstance(v, int):
1078 if v & wavefile.Format.TYPEMASK > 0 and \
1079 (self.sf.format & wavefile.Format.TYPEMASK) == v:
1080 self.format = attr
1081 if v & wavefile.Format.SUBMASK > 0 and \
1082 (self.sf.format & wavefile.Format.SUBMASK) == v:
1083 self.encoding = attr
1084 # init buffer:
1085 self.bufferframes = int(buffersize*self.rate)
1086 self.backframes = int(backsize*self.rate)
1087 self.init_buffer()
1088 self.close = self._close_wavefile
1089 self.load_audio_buffer = self._load_buffer_wavefile
1090 return self
1092 def _close_wavefile(self):
1093 """Close the audio file using the wavefile module. """
1094 if self.sf is not None:
1095 self.sf.close()
1096 self.sf = None
1098 def _load_buffer_wavefile(self, r_offset, r_size, buffer):
1099 """Load new data from file using the wavefile module.
1101 Parameters
1102 ----------
1103 r_offset: int
1104 First frame to be read from file.
1105 r_size: int
1106 Number of frames to be read from file.
1107 buffer: ndarray
1108 Buffer where to store the loaded data.
1109 """
1110 self.sf.seek(r_offset, wavefile.Seek.SET)
1111 fbuffer = self.sf.buffer(r_size, dtype=self.buffer.dtype)
1112 self.sf.read(fbuffer)
1113 buffer[:,:] = fbuffer.T
1116 # audioread interface:
1117 def open_audioread(self, filepath, buffersize=10.0, backsize=0.0,
1118 verbose=0):
1119 """Open audio file for reading using the audioread module.
1121 Note, that audioread can only read forward, therefore random and
1122 backward access is really slow.
1124 Parameters
1125 ----------
1126 filepath: str
1127 Name of the file.
1128 bufferframes: float
1129 Size of internal buffer in seconds.
1130 backsize: float
1131 Part of the buffer to be loaded before the requested start index in seconds.
1132 verbose: int
1133 If larger than zero show detailed error/warning messages.
1135 Raises
1136 ------
1137 ImportError
1138 The audioread module is not installed
1139 """
1140 self.verbose = verbose
1141 if self.verbose > 0:
1142 print(f'open_audioread(filepath) with filepath={filepath}')
1143 if not audio_modules['audioread']:
1144 self.rate = 0.0
1145 self.channels = 0
1146 self.frames = 0
1147 self.shape = (0, 0)
1148 self.size = 0
1149 self.offset = 0
1150 raise ImportError
1151 if self.sf is not None:
1152 self._close_audioread()
1153 self.sf = audioread.audio_open(filepath)
1154 self.filepath = filepath
1155 self.rate = float(self.sf.samplerate)
1156 self.channels = self.sf.channels
1157 self.frames = int(np.ceil(self.rate*self.sf.duration))
1158 self.shape = (self.frames, self.channels)
1159 self.size = self.frames * self.channels
1160 self.bufferframes = int(buffersize*self.rate)
1161 self.backframes = int(backsize*self.rate)
1162 self.init_buffer()
1163 self.read_buffer = np.zeros((0,0))
1164 self.read_offset = 0
1165 self.close = self._close_audioread
1166 self.load_audio_buffer = self._load_buffer_audioread
1167 self.filepath = filepath
1168 self.sf_iter = self.sf.__iter__()
1169 return self
1171 def _close_audioread(self):
1172 """Close the audio file using the audioread module. """
1173 if self.sf is not None:
1174 self.sf.__exit__(None, None, None)
1175 self.sf = None
1177 def _load_buffer_audioread(self, r_offset, r_size, buffer):
1178 """Load new data from file using the audioread module.
1180 audioread can only iterate through a file once and in blocksizes that are
1181 given by audioread. Therefore we keep yet another buffer: `self.read_buffer`
1182 at file offset `self.read_offset` containing whatever audioread returned.
1184 Parameters
1185 ----------
1186 r_offset: int
1187 First frame to be read from file.
1188 r_size: int
1189 Number of frames to be read from file.
1190 buffer: ndarray
1191 Buffer where to store the loaded data.
1192 """
1193 b_offset = 0
1194 if ( self.read_offset + self.read_buffer.shape[0] >= r_offset + r_size
1195 and self.read_offset < r_offset + r_size ):
1196 # read_buffer overlaps at the end of the requested interval:
1197 i = 0
1198 n = r_offset + r_size - self.read_offset
1199 if n > r_size:
1200 i += n - r_size
1201 n = r_size
1202 buffer[self.read_offset+i-r_offset:self.read_offset+i+n-r_offset,:] = self.read_buffer[i:i+n,:] / (2.0**15-1.0)
1203 if self.verbose > 2:
1204 print(f' recycle {n:6d} frames from the front of the read buffer at {self.read_offset}-{self.read_offset+n} ({self.read_offset-self.offset}-{self.read_offset-self.offset+n} in buffer)')
1205 r_size -= n
1206 if r_size <= 0:
1207 return
1208 # go back to beginning of file:
1209 if r_offset < self.read_offset:
1210 if self.verbose > 2:
1211 print(' rewind')
1212 self._close_audioread()
1213 self.sf = audioread.audio_open(self.filepath)
1214 self.sf_iter = self.sf.__iter__()
1215 self.read_buffer = np.zeros((0,0))
1216 self.read_offset = 0
1217 # read to position:
1218 while self.read_offset + self.read_buffer.shape[0] < r_offset:
1219 self.read_offset += self.read_buffer.shape[0]
1220 try:
1221 if hasattr(self.sf_iter, 'next'):
1222 fbuffer = self.sf_iter.next()
1223 else:
1224 fbuffer = next(self.sf_iter)
1225 except StopIteration:
1226 self.read_buffer = np.zeros((0,0))
1227 buffer[:,:] = 0.0
1228 if self.verbose > 1:
1229 print(f' caught StopIteration, padded buffer with {r_size} zeros')
1230 break
1231 self.read_buffer = np.frombuffer(fbuffer, dtype='<i2').reshape(-1, self.channels)
1232 if self.verbose > 2:
1233 print(f' read forward by {self.read_buffer.shape[0]} frames')
1234 # recycle file data:
1235 if ( self.read_offset + self.read_buffer.shape[0] > r_offset
1236 and self.read_offset <= r_offset ):
1237 i = r_offset - self.read_offset
1238 n = self.read_offset + self.read_buffer.shape[0] - r_offset
1239 if n > r_size:
1240 n = r_size
1241 buffer[:n,:] = self.read_buffer[i:i+n,:] / (2.0**15-1.0)
1242 if self.verbose > 2:
1243 print(f' recycle {n:6d} frames from the end of the read buffer at {self.read_offset}-{self.read_offset + self.read_buffer.shape[0]} to {r_offset}-{r_offset+n} ({r_offset-self.offset}-{r_offset+n-self.offset} in buffer)')
1244 b_offset += n
1245 r_offset += n
1246 r_size -= n
1247 # read data:
1248 if self.verbose > 2 and r_size > 0:
1249 print(f' read {r_size:6d} frames at {r_offset}-{r_offset+r_size} ({r_offset-self.offset}-{r_offset+r_size-self.offset} in buffer)')
1250 while r_size > 0:
1251 self.read_offset += self.read_buffer.shape[0]
1252 try:
1253 if hasattr(self.sf_iter, 'next'):
1254 fbuffer = self.sf_iter.next()
1255 else:
1256 fbuffer = next(self.sf_iter)
1257 except StopIteration:
1258 self.read_buffer = np.zeros((0,0))
1259 buffer[b_offset:,:] = 0.0
1260 if self.verbose > 1:
1261 print(f' caught StopIteration, padded buffer with {r_size} zeros')
1262 break
1263 self.read_buffer = np.frombuffer(fbuffer, dtype='<i2').reshape(-1, self.channels)
1264 n = self.read_buffer.shape[0]
1265 if n > r_size:
1266 n = r_size
1267 if n > 0:
1268 buffer[b_offset:b_offset+n,:] = self.read_buffer[:n,:] / (2.0**15-1.0)
1269 if self.verbose > 2:
1270 print(f' read {n:6d} frames to {r_offset}-{r_offset+n} ({r_offset-self.offset}-{r_offset+n-self.offset} in buffer)')
1271 b_offset += n
1272 r_offset += n
1273 r_size -= n
1276 def open(self, filepath, buffersize=10.0, backsize=0.0, verbose=0):
1277 """Open audio file for reading.
1279 Parameters
1280 ----------
1281 filepath: str
1282 Name of the file.
1283 buffersize: float
1284 Size of internal buffer in seconds.
1285 backsize: float
1286 Part of the buffer to be loaded before the requested start index in seconds.
1287 verbose: int
1288 If larger than zero show detailed error/warning messages.
1290 Raises
1291 ------
1292 ValueError
1293 Empty `filepath`.
1294 FileNotFoundError
1295 `filepath` is not an existing file.
1296 EOFError
1297 File size of `filepath` is zero.
1298 IOError
1299 Failed to load data.
1300 """
1301 self.buffer = np.array([])
1302 self.rate = 0.0
1303 if not filepath:
1304 raise ValueError('input argument filepath is empty string!')
1305 if not os.path.isfile(filepath):
1306 raise FileNotFoundError(f'file "{filepath}" not found')
1307 if os.path.getsize(filepath) <= 0:
1308 raise EOFError(f'file "{filepath}" is empty (size=0)!')
1309 # list of implemented open functions:
1310 audio_open_funcs = (
1311 ('soundfile', self.open_soundfile),
1312 ('wave', self.open_wave),
1313 ('wavefile', self.open_wavefile),
1314 ('ewave', self.open_ewave),
1315 ('audioread', self.open_audioread),
1316 )
1317 # open an audio file by trying various modules:
1318 not_installed = []
1319 errors = [f'failed to load data from file "{filepath}":']
1320 for lib, open_file in audio_open_funcs:
1321 if not audio_modules[lib]:
1322 if verbose > 1:
1323 print(f'unable to load data from file "{filepath}" using {lib} module: module not available')
1324 not_installed.append(lib)
1325 continue
1326 try:
1327 open_file(filepath, buffersize, backsize, verbose-1)
1328 if self.frames > 0:
1329 if verbose > 0:
1330 print(f'opened audio file "{filepath}" using {lib}')
1331 if verbose > 1:
1332 if self.format is not None:
1333 print(f' format : {self.format}')
1334 if self.encoding is not None:
1335 print(f' encoding : {self.encoding}')
1336 print(f' sampling rate: {self.rate} Hz')
1337 print(f' channels : {self.channels}')
1338 print(f' frames : {self.frames}')
1339 return self
1340 except Exception as e:
1341 errors.append(f' {lib} failed: {str(e)}')
1342 if verbose > 1:
1343 print(errors[-1])
1344 if len(not_installed) > 0:
1345 errors.append('\n You may need to install one of the ' + \
1346 ', '.join(not_installed) + ' packages.')
1347 raise IOError('\n'.join(errors))
1348 return self
1351def demo(file_path, plot):
1352 """Demo of the audioloader functions.
1354 Parameters
1355 ----------
1356 file_path: str
1357 File path of an audio file.
1358 plot: bool
1359 If True also plot the loaded data.
1360 """
1361 print('')
1362 print("try load_audio:")
1363 full_data, rate = load_audio(file_path, 1)
1364 if plot:
1365 plt.plot(np.arange(len(full_data))/rate, full_data[:,0])
1366 plt.show()
1368 if audio_modules['soundfile'] and audio_modules['audioread']:
1369 print('')
1370 print("cross check:")
1371 data1, rate1 = load_soundfile(file_path)
1372 data2, rate2 = load_audioread(file_path)
1373 n = min((len(data1), len(data2)))
1374 print(f"rms difference is {np.std(data1[:n]-data2[:n])}")
1375 if plot:
1376 plt.plot(np.arange(len(data1))/rate1, data1[:,0])
1377 plt.plot(np.arange(len(data2))/rate2, data2[:,0])
1378 plt.show()
1380 print('')
1381 print("try AudioLoader:")
1382 with AudioLoader(file_path, 4.0, 1.0, verbose=1) as data:
1383 print(f'samplerate: {data.rate:0f}Hz')
1384 print(f'channels: {data.channels} {data.shape[1]}')
1385 print(f'frames: {len(data)} {data.shape[0]}')
1386 nframes = int(1.5*data.rate)
1387 # check access:
1388 print('check random single frame access')
1389 for inx in np.random.randint(0, len(data), 1000):
1390 if np.any(np.abs(full_data[inx] - data[inx]) > 2.0**(-14)):
1391 print('single random frame access failed', inx, full_data[inx], data[inx])
1392 print('check random frame slice access')
1393 for inx in np.random.randint(0, len(data)-nframes, 1000):
1394 if np.any(np.abs(full_data[inx:inx+nframes] - data[inx:inx+nframes]) > 2.0**(-14)):
1395 print('random frame slice access failed', inx)
1396 print('check frame slice access forward')
1397 for inx in range(0, len(data)-nframes, 10):
1398 if np.any(np.abs(full_data[inx:inx+nframes] - data[inx:inx+nframes]) > 2.0**(-14)):
1399 print('frame slice access forward failed', inx)
1400 print('check frame slice access backward')
1401 for inx in range(len(data)-nframes, 0, -10):
1402 if np.any(np.abs(full_data[inx:inx+nframes] - data[inx:inx+nframes]) > 2.0**(-14)):
1403 print('frame slice access backward failed', inx)
1404 # forward:
1405 for i in range(0, len(data), nframes):
1406 print(f'forward {i}-{i+nframes}')
1407 x = data[i:i+nframes,0]
1408 if plot:
1409 plt.plot((i+np.arange(len(x)))/rate, x)
1410 plt.show()
1411 # and backwards:
1412 for i in reversed(range(0, len(data), nframes)):
1413 print(f'backward {i}-{i+nframes}')
1414 x = data[i:i+nframes,0]
1415 if plot:
1416 plt.plot((i+np.arange(len(x)))/rate, x)
1417 plt.show()
1420def main(*args):
1421 """Call demo with command line arguments.
1423 Parameters
1424 ----------
1425 args: list of str
1426 Command line arguments as provided by sys.argv[1:]
1427 """
1428 print("Checking audioloader module ...")
1430 help = False
1431 plot = False
1432 file_path = None
1433 mod = False
1434 for arg in args:
1435 if mod:
1436 if not select_module(arg):
1437 print(f'can not select module {arg} that is not installed')
1438 return
1439 mod = False
1440 elif arg == '-h':
1441 help = True
1442 break
1443 elif arg == '-p':
1444 plot = True
1445 elif arg == '-m':
1446 mod = True
1447 else:
1448 file_path = arg
1449 break
1451 if help:
1452 print('')
1453 print('Usage:')
1454 print(' python -m src.audioio.audioloader [-m <module>] [-p] <audio/file.wav>')
1455 print(' -m: audio module to be used')
1456 print(' -p: plot loaded data')
1457 return
1459 if plot:
1460 import matplotlib.pyplot as plt
1462 demo(file_path, plot)
1465if __name__ == "__main__":
1466 main(*sys.argv[1:])