Coverage for src/audioio/bufferedarray.py: 92%

1"""Buffered time-series data.

3- `blocks()`: generator for blockwise processing of array data.

4- class `BufferedArray()`: random access to time-series data of which only a part is held in memory.

5"""

8import numpy as np

11def blocks(data, block_size, noverlap=0, start=0, stop=None):

12 """Generator for blockwise processing of array data.

14 Parameters

15 ----------

16 data: ndarray

17 Data to loop over. First dimension is time.

18 block_size: int

19 Len of data blocks to be returned.

20 noverlap: int

21 Number of indices successive data blocks should overlap.

22 start: int

23 Optional first index from which on to return blocks of data.

24 stop: int

25 Optional last index until which to return blocks of data.

27 Yields

28 ------

29 data: ndarray

30 Successive slices of the input data.

32 Raises

33 ------

34 ValueError

35 `noverlap` larger or equal to `block_size`.

37 Examples

38 --------

39 ```

40 import numpy as np

41 from audioio import blocks

42 data = np.arange(20)

43 for x in blocks(data, 6, 2):

44 print(x)

45 ```

46 results in

47 ```text

48 [0 1 2 3 4 5]

49 [4 5 6 7 8 9]

50 [ 8 9 10 11 12 13]

51 [12 13 14 15 16 17]

52 [16 17 18 19]

53 ```

55 Use it for processing long audio data, like computing a

56 spectrogram with overlap:

57 ```

58 from scipy.signal import spectrogram

59 from audioio import AudioLoader, blocks

60 nfft = 2048

61 with AudioLoader('some/audio.wav') as data:

62 for x in blocks(data, 100*nfft, nfft//2):

63 f, t, Sxx = spectrogram(x, fs=data.rate,

64 nperseg=nfft, noverlap=nfft//2)

65 ```

67 """

68 if noverlap >= block_size:

69 raise ValueError(f'noverlap={noverlap} larger than block_size={block_size}')

70 if stop is None:

71 stop = len(data)

72 step = block_size - noverlap

73 n = (stop - start - noverlap)//step

74 if n == 0:

75 yield data[start:stop]

76 else:

77 for k in range(n):

78 yield data[start + k*step:start + k*step + block_size]

79 if stop - start - (k*step + block_size) > 0:

80 yield data[start + (k + 1)*step:stop]

83class BufferedArray(object):

84 """Random access to time-series data of which only a part is held in memory.

86 This is a base class for accessing large audio recordings either

87 from a file (class ` AudioLoader`) or by computing its contents on

88 the fly (e.g. filtered data, envelopes or spectrograms). The

89 `BufferedArray` behaves like a single big ndarray with first

90 dimension indexing the frames and second dimension indexing the

91 channels of the data. Higher dimensions are also supported. For

92 example, a third dimension for frequencies needed for

93 spectrograms. Internally the class holds only a part of the data

94 in memory. The size of this buffer is set to `bufferframes`

95 frames. If more data are requested, the buffer is enlarged

96 accordingly.

98 Classes inheriting `BufferedArray` just need to implement

99 ```

100 self.load_buffer(offset, nsamples, pbuffer)

101 ```

102 This function needs to load the supplied `pbuffer` with

103 `nframes` frames of data starting at frame `offset`.

104

105 In the constructor or some kind of opening function, you need to

106 set the following member variables, followed by a call to

107 `init_buffer()`:

108

109 ```

110 self.rate # number of frames per second

111 self.channels # number of channels per frame

112 self.frames # total number of frames

113 self.shape = (self.frames, self.channels, ...)

114 self.bufferframes # number of frames the buffer should hold

115 self.backframes # number of frames kept for moving back

116 self.init_buffer()

117 ```

118

119 or provide all this information via the constructor:

120

121 Parameters

122 ----------

123 rate: float

124 The sampling rate of the data in seconds.

125 channels: int

126 The number of channels.

127 frames: int

128 The number of frames.

129 bufferframes: int

130 Number of frames the curent data buffer holds.

131 backframes: int

132 Number of frames the curent data buffer should keep

133 before requested data ranges.

134 verbose: int

135 If larger than zero show detailed error/warning messages.

136

137 Attributes

138 ----------

139 rate: float

140 The sampling rate of the data in seconds.

141 channels: int

142 The number of channels.

143 frames: int

144 The number of frames. Same as `len()`.

145 shape: tuple

146 Frames and channels of the data. Optional higher dimensions.

147 ndim: int

148 Number of dimensions: 2 (frames and channels) or higher.

149 size: int

150 Total number of samples: frames times channels.

151 offset: int

152 Index of first frame in the current buffer.

153 buffer: ndarray of floats

154 The curently available data. First dimension is time, second channels.

155 Optional higher dimensions according to `ndim` and `shape`.

156 bufferframes: int

157 Number of samples the curent data buffer holds.

158 backframes: int

159 Number of samples the curent data buffer should keep

160 before requested data ranges.

161 buffer_changed: ndarray of bool

162 For each channel a flag, whether the buffer content has been changed.

163 Set to `True`, whenever `load_buffer()` was called.

164

165 Methods

166 -------

167 - `len()`: Number of frames.

168 - `__getitem__`: Access data.

169 - `blocks()`: Generator for blockwise processing of the data.

170 - `update_buffer()`: make sure that the buffer contains data of a range of indices.

171 - `update_time()`: make sure that the buffer contains data of a given time range.

172 - `reload_buffer()`: reload the current buffer.

173 - `move_buffer()`: move and resize buffer (called by update_buffer()).

174 - `load_buffer()`: load a range of samples into a buffer (called by reload_buffer() and move_buffer()).

175 - `_buffer_position()`: compute position and size of buffer (used by update_buffer()).

176 - `_recycle_buffer()`: move buffer to new position and recycle content if possible (called by move_buffer()).

177 - `allocate_buffer()`: reallocate the buffer to have the right size (called by _recycle_buffer()).

178

179

180 Notes

181 -----

182 Access via `__getitem__` or `__next__` is slow!

183 Even worse, using numpy functions on this class first converts

184 it to a numpy array - that is something we actually do not want!

185 We should subclass directly from numpy.ndarray .

186 For details see http://docs.scipy.org/doc/numpy/user/basics.subclassing.html

187 When subclassing, there is an offset argument, that might help to

188 speed up `__getitem__` .

189

190 """

191

192 def __init__(self, rate=0, channels=0, frames=0, bufferframes=0,

193 backframes=0, verbose=0):

194 """ Construtor for initializing 2D arrays (times x channels).

195 """

196 self.rate = rate

197 self.channels = channels

198 self.frames = frames

199 self.shape = (self.frames, self.channels)

200 self.ndim = 2

201 self.size = self.frames * self.channels

202 self.bufferframes = bufferframes # number of frames the buffer can hold

203 self.backframes = backframes # number of frames kept before

204 self.verbose = verbose

205 self.offset = 0 # index of first frame in buffer

206 self.init_buffer()

207

208

209 def __enter__(self):

210 return self

211

212

213 def __exit__(self, ex_type, ex_value, tb):

214 self.__del__()

215 return (ex_value is None)

216

217

218 def __len__(self):

219 return self.frames

220

221

222 def __iter__(self):

223 self.iter_counter = -1

224 return self

225

226

227 def __next__(self):

228 self.iter_counter += 1

229 if self.iter_counter >= self.frames:

230 raise StopIteration

231 else:

232 self.update_buffer(self.iter_counter, self.iter_counter + 1)

233 return self.buffer[self.iter_counter - self.offset]

234

235

236 def __getitem__(self, key):

237 """Access data of the audio file."""

238 if type(key) is tuple:

239 index = key[0]

240 else:

241 index = key

242 if isinstance(index, slice):

243 start = index.start

244 stop = index.stop

245 step = index.step

246 if start is None:

247 start = 0

248 else:

249 start = int(start)

250 if start < 0:

251 start += len(self)

252 if stop is None:

253 stop = len(self)

254 else:

255 stop = int(stop)

256 if stop < 0:

257 stop += len(self)

258 if stop > self.frames:

259 stop = self.frames

260 if step is None:

261 step = 1

262 else:

263 step = int(step)

264 self.update_buffer(start, stop)

265 newindex = slice(start - self.offset, stop - self.offset, step)

266 elif hasattr(index, '__len__'):

267 index = [inx if inx >= 0 else inx + len(self) for inx in index]

268 start = min(index)

269 stop = max(index)

270 self.update_buffer(start, stop + 1)

271 newindex = [inx - self.offset for inx in index]

272 else:

273 if index > self.frames:

274 raise IndexError

275 index = int(index)

276 if index < 0:

277 index += len(self)

278 self.update_buffer(index, index + 1)

279 newindex = index - self.offset

280 if type(key) is tuple:

281 newkey = (newindex,) + key[1:]

282 return self.buffer[newkey]

283 else:

284 return self.buffer[newindex]

285

286

287 def blocks(self, block_size, noverlap=0, start=0, stop=None):

288 """Generator for blockwise processing of data.

289

290 Parameters

291 ----------

292 block_size: int

293 Len of data blocks to be returned.

294 noverlap: int

295 Number of indices successive data blocks should overlap.

296 start: int

297 Optional first index from which on to return blocks of data.

298 stop: int

299 Optional last index until which to return blocks of data.

300

301 Yields

302 ------

303 data: ndarray

304 Successive slices of the data.

305

306 Raises

307 ------

308 ValueError

309 `noverlap` larger or equal to `block_size`.

310

311 Examples

312 --------

313 Use it for processing long audio data, like computing a spectrogram with overlap:

314 ```

315 from scipy.signal import spectrogram

316 from audioio import AudioLoader # AudioLoader is a BufferedArray

317 nfft = 2048

318 with AudioLoader('some/audio.wav') as data:

319 for x in data.blocks(100*nfft, nfft//2):

320 f, t, Sxx = spectrogram(x, fs=data.rate,

321 nperseg=nfft, noverlap=nfft//2)

322 ```

323 """

324 return blocks(self, block_size, noverlap, start, stop)

325

326

327 def init_buffer(self):

328 """Allocate a buffer with zero frames but all the channels.

329

330 Fix `bufferframes` and `backframes` to not exceed the total

331 number of frames.

332

333 """

334 self.ndim = len(self.shape)

335 self.size = self.frames * self.channels

336 if self.bufferframes > self.frames:

337 self.bufferframes = self.frames

338 self.backframes = 0

339 shape = list(self.shape)

340 shape[0] = 0

341 self.buffer = np.empty(shape)

342 self.offset = 0

343 self.buffer_changed = np.zeros(self.channels, dtype=bool)

344

345

346 def update_buffer(self, start, stop):

347 """Make sure that the buffer contains data of a range of indices.

348

349 Parameters

350 ----------

351 start: int

352 Index of the first requested frame.

353 stop: int

354 Index of the last requested frame.

355 """

356 offset, nframes = self._buffer_position(start, stop)

357 self.move_buffer(offset, nframes)

358

359

360 def update_time(self, start, stop):

361 """Make sure that the buffer contains data of a given time range.

362

363 Parameters

364 ----------

365 start: float

366 Time point of first requested frame.

367 stop: int

368 Time point of last requested frame.

369 """

370 self.update_buffer(int(start*self.rate), int(stop*self.rate) + 1)

371

372

373 def reload_buffer(self):

374 """Reload the current buffer.

375 """

376 if len(self.buffer) > 0:

377 self.load_buffer(self.offset, len(self.buffer), self.buffer)

378 self.buffer_changed[:] = True

379 if self.verbose > 1:

380 print(f' reloaded {len(self.buffer)} frames from {self.offset} up to {self.offset + len(self.buffer)}')

381

382

383 def move_buffer(self, offset, nframes):

384 """Move and resize buffer.

385

386 Called by update_buffer().

387

388 Parameters

389 ----------

390 offset: int

391 Frame index of the first frame in the new buffer.

392 nframes: int

393 Number of frames the new buffer should hold.

394 """

395 if offset < 0:

396 offset = 0

397 if offset + nframes > self.frames:

398 nframes = self.frames - offset

399 if offset != self.offset or nframes != len(self.buffer):

400 r_offset, r_nframes = self._recycle_buffer(offset, nframes)

401 self.offset = offset

402 if r_nframes > 0:

403 # load buffer content, this is backend specific:

404 pbuffer = self.buffer[r_offset - self.offset:

405 r_offset - self.offset + r_nframes]

406 self.load_buffer(r_offset, r_nframes, pbuffer)

407 self.buffer_changed[:] = True

408 if self.verbose > 1:

409 print(f' loaded {len(pbuffer)} frames from {r_offset} up to {r_offset + r_nframes}')

410

411

412 def _buffer_position(self, start, stop):

413 """Compute position and size of buffer.

414

415 You usually should not need to call this function

416 directly. This is handled by `update_buffer()`.

417

418 Takes `bufferframes` and `backframes` into account.

419

420 Parameters

421 ----------

422 start: int

423 Index of the first requested frame.

424 stop: int

425 Index of the last requested frame.

426

427 Returns

428 -------

429 offset: int

430 Frame index of the first frame in the new buffer.

431 nframes: int

432 Number of frames the new buffer should hold.

433

434 """

435 if start < 0:

436 start = 0

437 if stop > self.frames:

438 stop = self.frames

439 offset = start

440 nframes = stop - start

441 if start < self.offset or stop > self.offset + len(self.buffer):

442 # we need to move the buffer:

443 if nframes < self.bufferframes:

444 # find optimal new position of buffer that accomodates start:stop

445 back = self.backframes

446 if self.bufferframes - nframes < 2*back:

447 back = (self.bufferframes - nframes)//2

448 offset -= back

449 nframes = self.bufferframes

450 if offset < 0:

451 offset = 0

452 if offset + nframes > self.frames:

453 offset = self.frames - nframes

454 if offset < 0:

455 offset = 0

456 nframes = self.frames - offset

457 # expand buffer to accomodate nearby beginning or end:

458 elif self.frames - offset - nframes < self.bufferframes//2:

459 nframes = self.frames - offset

460 elif offset < self.bufferframes//2:

461 nframes += offset

462 offset = 0

463 if self.verbose > 2:

464 print(f' request {nframes:6d} frames at {offset}-{offset+nframes}')

465 return offset, nframes

466 # no need to move buffer:

467 return self.offset, len(self.buffer)

468

469

470 def _recycle_buffer(self, offset, nframes):

471 """Move buffer to new position and recycle content if possible.

472

473 You usually should not need to call this function

474 directly. This is handled by `update_buffer()` via move_buffer().

475

476 Move already existing parts of the buffer to their new position (as

477 returned by `_buffer_position()`) and return position and size of

478 data chunk that still needs to be loaded from file.

479

480 Parameters

481 ----------

482 offset: int

483 Frame index of the new first frame in the buffer.

484 nframes: int

485 Number of frames the new buffer should hold.

486

487 Returns

488 -------

489 r_offset: int

490 First frame to be read from file.

491 r_nframes: int

492 Number of frames to be read from file.

493

494 """

495 r_offset = offset

496 r_nframes = nframes

497 if (offset >= self.offset and

498 offset < self.offset + len(self.buffer)):

499 i = offset - self.offset

500 n = len(self.buffer) - i

501 if n > nframes:

502 n = nframes

503 tmp_buffer = self.buffer[i:i + n]

504 self.allocate_buffer(nframes)

505 self.buffer[:n] = tmp_buffer

506 r_offset += n

507 r_nframes -= n

508 if self.verbose > 2:

509 print(f' recycle {n:6d} frames from {self.offset + i} - {self.offset + i + n} of the old to the front at {offset} - {offset + n} ({0} - {n} in buffer)')

510 elif (offset + nframes > self.offset and

511 offset + nframes <= self.offset + len(self.buffer)):

512 n = offset + nframes - self.offset

513 m = len(self.buffer)

514 tmp_buffer = self.buffer[:n]

515 self.allocate_buffer(nframes)

516 self.buffer[-n:] = tmp_buffer

517 r_nframes -= n

518 if self.verbose > 2:

519 print(f' recycle {n:6d} frames from {self.offset} - {self.offset + n} of the old {m}-sized buffer to the end at {offset + nframes - n} - {offset + nframes} ({nframes - n} - {nframes} in buffer)')

520 else:

521 # new buffer is somewhere else or larger than current buffer:

522 self.allocate_buffer(nframes)

523 return r_offset, r_nframes

524

525

526 def allocate_buffer(self, nframes=None, force=False):

527 """Reallocate the buffer to have the right size.

528

529 Called by _recycle_buffer().

530

531 Parameters

532 ----------

533 nframes: int or None

534 Number of frames the buffer should hold.

535 If None, use `self.bufferframes`.

536 force: bool

537 If True, reallocate buffer even if it has the same size as before.

538 """

539 if self.bufferframes > self.frames:

540 self.bufferframes = self.frames

541 self.backframes = 0

542 if nframes is None:

543 nframes = self.bufferframes

544 if nframes == 0:

545 return

546 if force or nframes != len(self.buffer) or \

547 self.shape[1:] != self.buffer.shape[1:]:

548 shape = list(self.shape)

549 shape[0] = nframes

550 self.buffer = np.empty(shape)

551