Coverage for src/audioio/bufferedarray.py: 92%
191 statements
« prev ^ index » next coverage.py v7.6.12, created at 2025-02-16 18:31 +0000
« prev ^ index » next coverage.py v7.6.12, created at 2025-02-16 18:31 +0000
1"""Buffered time-series data.
3- `blocks()`: generator for blockwise processing of array data.
4- class `BufferedArray()`: random access to time-series data of which only a part is held in memory.
5"""
8import numpy as np
11def blocks(data, block_size, noverlap=0, start=0, stop=None):
12 """Generator for blockwise processing of array data.
14 Parameters
15 ----------
16 data: ndarray
17 Data to loop over. First dimension is time.
18 block_size: int
19 Len of data blocks to be returned.
20 noverlap: int
21 Number of indices successive data blocks should overlap.
22 start: int
23 Optional first index from which on to return blocks of data.
24 stop: int
25 Optional last index until which to return blocks of data.
27 Yields
28 ------
29 data: ndarray
30 Successive slices of the input data.
32 Raises
33 ------
34 ValueError
35 `noverlap` larger or equal to `block_size`.
37 Examples
38 --------
39 ```
40 import numpy as np
41 from audioio import blocks
42 data = np.arange(20)
43 for x in blocks(data, 6, 2):
44 print(x)
45 ```
46 results in
47 ```text
48 [0 1 2 3 4 5]
49 [4 5 6 7 8 9]
50 [ 8 9 10 11 12 13]
51 [12 13 14 15 16 17]
52 [16 17 18 19]
53 ```
55 Use it for processing long audio data, like computing a
56 spectrogram with overlap:
57 ```
58 from scipy.signal import spectrogram
59 from audioio import AudioLoader, blocks
60 nfft = 2048
61 with AudioLoader('some/audio.wav') as data:
62 for x in blocks(data, 100*nfft, nfft//2):
63 f, t, Sxx = spectrogram(x, fs=data.rate,
64 nperseg=nfft, noverlap=nfft//2)
65 ```
67 """
68 if noverlap >= block_size:
69 raise ValueError(f'noverlap={noverlap} larger than block_size={block_size}')
70 if stop is None:
71 stop = len(data)
72 step = block_size - noverlap
73 n = (stop - start - noverlap)//step
74 if n == 0:
75 yield data[start:stop]
76 else:
77 for k in range(n):
78 yield data[start + k*step:start + k*step + block_size]
79 if stop - start - (k*step + block_size) > 0:
80 yield data[start + (k + 1)*step:stop]
83class BufferedArray(object):
84 """Random access to time-series data of which only a part is held in memory.
86 This is a base class for accessing large audio recordings either
87 from a file (class ` AudioLoader`) or by computing its contents on
88 the fly (e.g. filtered data, envelopes or spectrograms). The
89 `BufferedArray` behaves like a single big ndarray with first
90 dimension indexing the frames and second dimension indexing the
91 channels of the data. Higher dimensions are also supported. For
92 example, a third dimension for frequencies needed for
93 spectrograms. Internally the class holds only a part of the data
94 in memory. The size of this buffer is set to `bufferframes`
95 frames. If more data are requested, the buffer is enlarged
96 accordingly.
98 Classes inheriting `BufferedArray` just need to implement
99 ```
100 self.load_buffer(offset, nsamples, pbuffer)
101 ```
102 This function needs to load the supplied `pbuffer` with
103 `nframes` frames of data starting at frame `offset`.
105 In the constructor or some kind of opening function, you need to
106 set the following member variables, followed by a call to
107 `init_buffer()`:
109 ```
110 self.rate # number of frames per second
111 self.channels # number of channels per frame
112 self.frames # total number of frames
113 self.shape = (self.frames, self.channels, ...)
114 self.bufferframes # number of frames the buffer should hold
115 self.backframes # number of frames kept for moving back
116 self.init_buffer()
117 ```
119 or provide all this information via the constructor:
121 Parameters
122 ----------
123 rate: float
124 The sampling rate of the data in seconds.
125 channels: int
126 The number of channels.
127 frames: int
128 The number of frames.
129 bufferframes: int
130 Number of frames the curent data buffer holds.
131 backframes: int
132 Number of frames the curent data buffer should keep
133 before requested data ranges.
134 verbose: int
135 If larger than zero show detailed error/warning messages.
137 Attributes
138 ----------
139 rate: float
140 The sampling rate of the data in seconds.
141 channels: int
142 The number of channels.
143 frames: int
144 The number of frames. Same as `len()`.
145 shape: tuple
146 Frames and channels of the data. Optional higher dimensions.
147 ndim: int
148 Number of dimensions: 2 (frames and channels) or higher.
149 size: int
150 Total number of samples: frames times channels.
151 offset: int
152 Index of first frame in the current buffer.
153 buffer: ndarray of floats
154 The curently available data. First dimension is time, second channels.
155 Optional higher dimensions according to `ndim` and `shape`.
156 bufferframes: int
157 Number of samples the curent data buffer holds.
158 backframes: int
159 Number of samples the curent data buffer should keep
160 before requested data ranges.
161 buffer_changed: ndarray of bool
162 For each channel a flag, whether the buffer content has been changed.
163 Set to `True`, whenever `load_buffer()` was called.
165 Methods
166 -------
167 - `len()`: Number of frames.
168 - `__getitem__`: Access data.
169 - `blocks()`: Generator for blockwise processing of the data.
170 - `update_buffer()`: make sure that the buffer contains data of a range of indices.
171 - `update_time()`: make sure that the buffer contains data of a given time range.
172 - `reload_buffer()`: reload the current buffer.
173 - `move_buffer()`: move and resize buffer (called by update_buffer()).
174 - `load_buffer()`: load a range of samples into a buffer (called by reload_buffer() and move_buffer()).
175 - `_buffer_position()`: compute position and size of buffer (used by update_buffer()).
176 - `_recycle_buffer()`: move buffer to new position and recycle content if possible (called by move_buffer()).
177 - `allocate_buffer()`: reallocate the buffer to have the right size (called by _recycle_buffer()).
180 Notes
181 -----
182 Access via `__getitem__` or `__next__` is slow!
183 Even worse, using numpy functions on this class first converts
184 it to a numpy array - that is something we actually do not want!
185 We should subclass directly from numpy.ndarray .
186 For details see http://docs.scipy.org/doc/numpy/user/basics.subclassing.html
187 When subclassing, there is an offset argument, that might help to
188 speed up `__getitem__` .
190 """
192 def __init__(self, rate=0, channels=0, frames=0, bufferframes=0,
193 backframes=0, verbose=0):
194 """ Construtor for initializing 2D arrays (times x channels).
195 """
196 self.rate = rate
197 self.channels = channels
198 self.frames = frames
199 self.shape = (self.frames, self.channels)
200 self.ndim = 2
201 self.size = self.frames * self.channels
202 self.bufferframes = bufferframes # number of frames the buffer can hold
203 self.backframes = backframes # number of frames kept before
204 self.verbose = verbose
205 self.offset = 0 # index of first frame in buffer
206 self.init_buffer()
209 def __enter__(self):
210 return self
213 def __exit__(self, ex_type, ex_value, tb):
214 self.__del__()
215 return (ex_value is None)
218 def __len__(self):
219 return self.frames
222 def __iter__(self):
223 self.iter_counter = -1
224 return self
227 def __next__(self):
228 self.iter_counter += 1
229 if self.iter_counter >= self.frames:
230 raise StopIteration
231 else:
232 self.update_buffer(self.iter_counter, self.iter_counter + 1)
233 return self.buffer[self.iter_counter - self.offset]
236 def __getitem__(self, key):
237 """Access data of the audio file."""
238 if type(key) is tuple:
239 index = key[0]
240 else:
241 index = key
242 if isinstance(index, slice):
243 start = index.start
244 stop = index.stop
245 step = index.step
246 if start is None:
247 start = 0
248 else:
249 start = int(start)
250 if start < 0:
251 start += len(self)
252 if stop is None:
253 stop = len(self)
254 else:
255 stop = int(stop)
256 if stop < 0:
257 stop += len(self)
258 if stop > self.frames:
259 stop = self.frames
260 if step is None:
261 step = 1
262 else:
263 step = int(step)
264 self.update_buffer(start, stop)
265 newindex = slice(start - self.offset, stop - self.offset, step)
266 elif hasattr(index, '__len__'):
267 index = [inx if inx >= 0 else inx + len(self) for inx in index]
268 start = min(index)
269 stop = max(index)
270 self.update_buffer(start, stop + 1)
271 newindex = [inx - self.offset for inx in index]
272 else:
273 if index > self.frames:
274 raise IndexError
275 index = int(index)
276 if index < 0:
277 index += len(self)
278 self.update_buffer(index, index + 1)
279 newindex = index - self.offset
280 if type(key) is tuple:
281 newkey = (newindex,) + key[1:]
282 return self.buffer[newkey]
283 else:
284 return self.buffer[newindex]
287 def blocks(self, block_size, noverlap=0, start=0, stop=None):
288 """Generator for blockwise processing of data.
290 Parameters
291 ----------
292 block_size: int
293 Len of data blocks to be returned.
294 noverlap: int
295 Number of indices successive data blocks should overlap.
296 start: int
297 Optional first index from which on to return blocks of data.
298 stop: int
299 Optional last index until which to return blocks of data.
301 Yields
302 ------
303 data: ndarray
304 Successive slices of the data.
306 Raises
307 ------
308 ValueError
309 `noverlap` larger or equal to `block_size`.
311 Examples
312 --------
313 Use it for processing long audio data, like computing a spectrogram with overlap:
314 ```
315 from scipy.signal import spectrogram
316 from audioio import AudioLoader # AudioLoader is a BufferedArray
317 nfft = 2048
318 with AudioLoader('some/audio.wav') as data:
319 for x in data.blocks(100*nfft, nfft//2):
320 f, t, Sxx = spectrogram(x, fs=data.rate,
321 nperseg=nfft, noverlap=nfft//2)
322 ```
323 """
324 return blocks(self, block_size, noverlap, start, stop)
327 def init_buffer(self):
328 """Allocate a buffer with zero frames but all the channels.
330 Fix `bufferframes` and `backframes` to not exceed the total
331 number of frames.
333 """
334 self.ndim = len(self.shape)
335 self.size = self.frames * self.channels
336 if self.bufferframes > self.frames:
337 self.bufferframes = self.frames
338 self.backframes = 0
339 shape = list(self.shape)
340 shape[0] = 0
341 self.buffer = np.empty(shape)
342 self.offset = 0
343 self.buffer_changed = np.zeros(self.channels, dtype=bool)
346 def update_buffer(self, start, stop):
347 """Make sure that the buffer contains data of a range of indices.
349 Parameters
350 ----------
351 start: int
352 Index of the first requested frame.
353 stop: int
354 Index of the last requested frame.
355 """
356 offset, nframes = self._buffer_position(start, stop)
357 self.move_buffer(offset, nframes)
360 def update_time(self, start, stop):
361 """Make sure that the buffer contains data of a given time range.
363 Parameters
364 ----------
365 start: float
366 Time point of first requested frame.
367 stop: int
368 Time point of last requested frame.
369 """
370 self.update_buffer(int(start*self.rate), int(stop*self.rate) + 1)
373 def reload_buffer(self):
374 """Reload the current buffer.
375 """
376 if len(self.buffer) > 0:
377 self.load_buffer(self.offset, len(self.buffer), self.buffer)
378 self.buffer_changed[:] = True
379 if self.verbose > 1:
380 print(f' reloaded {len(self.buffer)} frames from {self.offset} up to {self.offset + len(self.buffer)}')
383 def move_buffer(self, offset, nframes):
384 """Move and resize buffer.
386 Called by update_buffer().
388 Parameters
389 ----------
390 offset: int
391 Frame index of the first frame in the new buffer.
392 nframes: int
393 Number of frames the new buffer should hold.
394 """
395 if offset < 0:
396 offset = 0
397 if offset + nframes > self.frames:
398 nframes = self.frames - offset
399 if offset != self.offset or nframes != len(self.buffer):
400 r_offset, r_nframes = self._recycle_buffer(offset, nframes)
401 self.offset = offset
402 if r_nframes > 0:
403 # load buffer content, this is backend specific:
404 pbuffer = self.buffer[r_offset - self.offset:
405 r_offset - self.offset + r_nframes]
406 self.load_buffer(r_offset, r_nframes, pbuffer)
407 self.buffer_changed[:] = True
408 if self.verbose > 1:
409 print(f' loaded {len(pbuffer)} frames from {r_offset} up to {r_offset + r_nframes}')
412 def _buffer_position(self, start, stop):
413 """Compute position and size of buffer.
415 You usually should not need to call this function
416 directly. This is handled by `update_buffer()`.
418 Takes `bufferframes` and `backframes` into account.
420 Parameters
421 ----------
422 start: int
423 Index of the first requested frame.
424 stop: int
425 Index of the last requested frame.
427 Returns
428 -------
429 offset: int
430 Frame index of the first frame in the new buffer.
431 nframes: int
432 Number of frames the new buffer should hold.
434 """
435 if start < 0:
436 start = 0
437 if stop > self.frames:
438 stop = self.frames
439 offset = start
440 nframes = stop - start
441 if start < self.offset or stop > self.offset + len(self.buffer):
442 # we need to move the buffer:
443 if nframes < self.bufferframes:
444 # find optimal new position of buffer that accomodates start:stop
445 back = self.backframes
446 if self.bufferframes - nframes < 2*back:
447 back = (self.bufferframes - nframes)//2
448 offset -= back
449 nframes = self.bufferframes
450 if offset < 0:
451 offset = 0
452 if offset + nframes > self.frames:
453 offset = self.frames - nframes
454 if offset < 0:
455 offset = 0
456 nframes = self.frames - offset
457 # expand buffer to accomodate nearby beginning or end:
458 elif self.frames - offset - nframes < self.bufferframes//2:
459 nframes = self.frames - offset
460 elif offset < self.bufferframes//2:
461 nframes += offset
462 offset = 0
463 if self.verbose > 2:
464 print(f' request {nframes:6d} frames at {offset}-{offset+nframes}')
465 return offset, nframes
466 # no need to move buffer:
467 return self.offset, len(self.buffer)
470 def _recycle_buffer(self, offset, nframes):
471 """Move buffer to new position and recycle content if possible.
473 You usually should not need to call this function
474 directly. This is handled by `update_buffer()` via move_buffer().
476 Move already existing parts of the buffer to their new position (as
477 returned by `_buffer_position()`) and return position and size of
478 data chunk that still needs to be loaded from file.
480 Parameters
481 ----------
482 offset: int
483 Frame index of the new first frame in the buffer.
484 nframes: int
485 Number of frames the new buffer should hold.
487 Returns
488 -------
489 r_offset: int
490 First frame to be read from file.
491 r_nframes: int
492 Number of frames to be read from file.
494 """
495 r_offset = offset
496 r_nframes = nframes
497 if (offset >= self.offset and
498 offset < self.offset + len(self.buffer)):
499 i = offset - self.offset
500 n = len(self.buffer) - i
501 if n > nframes:
502 n = nframes
503 tmp_buffer = self.buffer[i:i + n]
504 self.allocate_buffer(nframes)
505 self.buffer[:n] = tmp_buffer
506 r_offset += n
507 r_nframes -= n
508 if self.verbose > 2:
509 print(f' recycle {n:6d} frames from {self.offset + i} - {self.offset + i + n} of the old to the front at {offset} - {offset + n} ({0} - {n} in buffer)')
510 elif (offset + nframes > self.offset and
511 offset + nframes <= self.offset + len(self.buffer)):
512 n = offset + nframes - self.offset
513 m = len(self.buffer)
514 tmp_buffer = self.buffer[:n]
515 self.allocate_buffer(nframes)
516 self.buffer[-n:] = tmp_buffer
517 r_nframes -= n
518 if self.verbose > 2:
519 print(f' recycle {n:6d} frames from {self.offset} - {self.offset + n} of the old {m}-sized buffer to the end at {offset + nframes - n} - {offset + nframes} ({nframes - n} - {nframes} in buffer)')
520 else:
521 # new buffer is somewhere else or larger than current buffer:
522 self.allocate_buffer(nframes)
523 return r_offset, r_nframes
526 def allocate_buffer(self, nframes=None, force=False):
527 """Reallocate the buffer to have the right size.
529 Called by _recycle_buffer().
531 Parameters
532 ----------
533 nframes: int or None
534 Number of frames the buffer should hold.
535 If None, use `self.bufferframes`.
536 force: bool
537 If True, reallocate buffer even if it has the same size as before.
538 """
539 if self.bufferframes > self.frames:
540 self.bufferframes = self.frames
541 self.backframes = 0
542 if nframes is None:
543 nframes = self.bufferframes
544 if nframes == 0:
545 return
546 if force or nframes != len(self.buffer) or \
547 self.shape[1:] != self.buffer.shape[1:]:
548 shape = list(self.shape)
549 shape[0] = nframes
550 self.buffer = np.empty(shape)