Coverage for src/thunderlab/eventdetection.py: 77%

1"""Detect and handle peaks and troughs as well as threshold crossings in data arrays.

3## Peak detection

5- `detect_peaks()`: detect peaks and troughs using a relative threshold.

6- `peak_width()`: compute width of each peak.

7- `peak_size_width()`: compute size and width of each peak.

9## Threshold crossings

11- `threshold_crossings()`: detect crossings of an absolute threshold.

12- `threshold_crossing_times()`: compute times of threshold crossings by linear interpolation.

14## Event manipulation

16- `trim()`: make the list of peaks and troughs the same length.

17- `trim_to_peak()`: ensure that the peak is first.

18- `trim_closest()`: ensure that peaks minus troughs is smallest.

20- `merge_events()`: Merge events if they are closer than a minimum distance.

21- `remove_events()`: Remove events that are too short or too long.

22- `widen_events()`: Enlarge events on both sides without overlap.

24## Threshold estimation

26- `std_threshold()`: estimate detection threshold based on the standard deviation.

27- `median_std_threshold()`: estimate detection threshold based on the median standard deviation of data snippets.

28- `hist_threshold()`: esimate detection threshold based on a histogram of the data.

29- `minmax_threshold()`: estimate detection threshold based on maximum minus minimum value.

30- `percentile_threshold()`: estimate detection threshold based on interpercentile range.

32## Snippets

34- `snippets()`: cut out data snippets around a list of indices.

36## Peak detection with dynamic threshold:

38- `detect_dynamic_peaks()`: peak and trough detection with a dynamically adapted threshold.

39- `accept_peak_size_threshold()`: adapt the dection threshold to the size of the detected peaks.

40"""

42import sys

43import numpy as np

44try:

45 import matplotlib.pyplot as plt

46except ImportError:

47 pass

49try:

50 from numba import jit

51except ImportError:

52 def jit(*args, **kwargs):

53 def decorator_jit(func):

54 return func

55 return decorator_jit

58def detect_peaks(data, threshold):

59 """Detect peaks and troughs using a relative threshold.

61 This is an implementation of the algorithm by

62 Bryan S. Todd and David C. Andrews (1999): The identification of peaks in physiological signals.

63 Computers and Biomedical Research 32, 322-335.

65 Parameters

66 ----------

67 data: array

68 An 1-D array of input data where peaks are detected.

69 threshold: float or array of floats

70 A positive number or array of numbers setting the detection threshold,

71 i.e. the minimum distance between peaks and troughs.

72 In case of an array make sure that the threshold does not change faster

73 than the expected intervals between peaks and troughs.

75 Returns

76 -------

77 peaks: array of ints

78 Array of indices of detected peaks.

79 troughs: array of ints

80 Array of indices of detected troughs.

82 Raises

83 ------

84 ValueError:

85 If `threshold <= 0`.

86 IndexError:

87 If `data` and `threshold` arrays differ in length.

88 """

89 if np.isscalar(threshold):

90 if threshold <= 0:

91 raise ValueError('threshold value must be positive!')

92 return detect_peaks_fixed(data, threshold)

93 else:

94 if len(data) != len(threshold):

95 raise IndexError('input arrays data and threshold must have same length!')

96 if np.min(threshold) <= 0:

97 raise ValueError('threshold values must be positive!')

98 return detect_peaks_array(data, threshold)

100

101@jit(nopython=True)

102def detect_peaks_fixed(data, threshold):

103 """Detect peaks and troughs using a fixed, relative threshold.

104

105 Helper function for detect_peaks().

106

107 Parameters

108 ----------

109 data: array

110 An 1-D array of input data where peaks are detected.

111 threshold: float

112 A positive number setting the detection threshold,

113 i.e. the minimum distance between peaks and troughs.

114

115 Returns

116 -------

117 peaks: array of ints

118 Array of indices of detected peaks.

119 troughs: array of ints

120 Array of indices of detected troughs.

121 """

122 peaks = []

123 troughs = []

124

125 # initialize:

126 direction = 0

127 min_inx = 0

128 max_inx = 0

129 min_value = data[0]

130 max_value = min_value

131

132 # loop through the data:

133 for index, value in enumerate(data):

134 # rising?

135 if direction > 0:

136 if value > max_value:

137 # update maximum element:

138 max_inx = index

139 max_value = value

140 # otherwise, if the new value is falling below

141 # the maximum value minus the threshold:

142 # the maximum is a peak!

143 elif value <= max_value - threshold:

144 peaks.append(max_inx)

145 # change direction:

146 direction = -1

147 # store minimum element:

148 min_inx = index

149 min_value = value

150

151 # falling?

152 elif direction < 0:

153 if value < min_value:

154 # update minimum element:

155 min_inx = index

156 min_value = value

157 # otherwise, if the new value is rising above

158 # the minimum value plus the threshold:

159 # the minimum is a trough!

160 elif value >= min_value + threshold:

161 troughs.append(min_inx)

162 # change direction:

163 direction = +1

164 # store maximum element:

165 max_inx = index

166 max_value = value

167

168 # don't know direction yet:

169 else:

170 if value <= max_value - threshold:

171 direction = -1 # falling

172 elif value >= min_value + threshold:

173 direction = 1 # rising

174

175 if value > max_value:

176 # update maximum element:

177 max_inx = index

178 max_value = value

179 elif value < min_value:

180 # update minimum element:

181 min_inx = index

182 min_value = value

183

184 return np.asarray(peaks, dtype=np.int64), \

185 np.asarray(troughs, dtype=np.int64)

186

187

188@jit(nopython=True)

189def detect_peaks_array(data, threshold):

190 """Detect peaks and troughs using a variable relative threshold.

191

192 Helper function for detect_peaks().

193

194 Parameters

195 ----------

196 data: array

197 An 1-D array of input data where peaks are detected.

198 threshold: array

199 A array of positive numbers setting the detection threshold,

200 i.e. the minimum distance between peaks and troughs.

201

202 Returns

203 -------

204 peaks: array of ints

205 Array of indices of detected peaks.

206 troughs: array of ints

207 Array of indices of detected troughs.

208 """

209 peaks = []

210 troughs = []

211

212 # initialize:

213 direction = 0

214 min_inx = 0

215 max_inx = 0

216 min_value = data[0]

217 max_value = min_value

218

219 # loop through the data:

220 for index, value in enumerate(data):

221 # rising?

222 if direction > 0:

223 if value > max_value:

224 # update maximum element:

225 max_inx = index

226 max_value = value

227 # otherwise, if the new value is falling below

228 # the maximum value minus the threshold:

229 # the maximum is a peak!

230 elif value <= max_value - threshold[index]:

231 peaks.append(max_inx)

232 # change direction:

233 direction = -1

234 # store minimum element:

235 min_inx = index

236 min_value = value

237

238 # falling?

239 elif direction < 0:

240 if value < min_value:

241 # update minimum element:

242 min_inx = index

243 min_value = value

244 # otherwise, if the new value is rising above

245 # the minimum value plus the threshold:

246 # the minimum is a trough!

247 elif value >= min_value + threshold[index]:

248 troughs.append(min_inx)

249 # change direction:

250 direction = +1

251 # store maximum element:

252 max_inx = index

253 max_value = value

254

255 # don't know direction yet:

256 else:

257 if value <= max_value - threshold[index]:

258 direction = -1 # falling

259 elif value >= min_value + threshold[index]:

260 direction = 1 # rising

261

262 if value > max_value:

263 # update maximum element:

264 max_inx = index

265 max_value = value

266 elif value < min_value:

267 # update minimum element:

268 min_inx = index

269 min_value = value

270

271 return np.asarray(peaks, dtype=np.int64), \

272 np.asarray(troughs, dtype=np.int64)

273

274

275def peak_width(time, data, peak_indices, trough_indices,

276 peak_frac=0.5, base='max'):

277 """Width of each peak.

278

279 Peak width is computed from interpolated threshold crossings at

280 `peak_frac` hieght of each peak.

281

282 Parameters

283 ----------

284 time: array

285 Time, must not be `None`.

286 data: array

287 The data with the peaks.

288 peak_indices: array

289 Indices of the peaks.

290 trough_indices: array

291 Indices of corresponding troughs.

292 peak_frac: float

293 Fraction of peak height where its width is measured.

294 base: string

295 Height and width of peak is measured relative to

296

297 - 'left': trough to the left

298 - 'right': trough to the right

299 - 'min': the minimum of the two troughs to the left and to the right

300 - 'max': the maximum of the two troughs to the left and to the right

301 - 'mean': mean of the throughs to the left and to the rigth

302 - 'closest': trough that is closest to peak

303

304 Returns

305 -------

306 widths: array

307 Width at `peak_frac` height of each peak.

308

309 Raises

310 ------

311 ValueError:

312 If an invalid value is passed to `base`.

313 """

314 def left_base(data, left_inx, right_inx, peak_inx):

315 return data[left_inx]

316 def right_base(data, left_inx, right_inx, peak_inx):

317 return data[right_inx]

318 def min_base(data, left_inx, right_inx, peak_inx):

319 return min(data[left_inx], data[right_inx])

320 def max_base(data, left_inx, right_inx, peak_inx):

321 return max(data[left_inx], data[right_inx])

322 def mean_base(data, left_inx, right_inx, peak_inx):

323 return np.mean((data[left_inx], data[right_inx]))

324 def closest_base(data, left_inx, right_inx, peak_inx):

325 return data[left_inx] if peak_inx-left_inx <= right_inx-peak_inx else data[right_inx]

326

327 widths = np.zeros(len(peak_indices))

328 if len(peak_indices) == 0:

329 return widths

330 # we need a trough before and after each peak:

331 peak_inx = np.asarray(peak_indices, dtype=int)

332 trough_inx = np.asarray(trough_indices, dtype=int)

333 if len(trough_inx) == 0 or peak_inx[0] < trough_inx[0]:

334 trough_inx = np.hstack((0, trough_inx))

335 if peak_inx[-1] > trough_inx[-1]:

336 trough_inx = np.hstack((trough_inx, len(data)-1))

337 # base for size of peaks:

338 base_func = closest_base

339 if base == 'left':

340 base_func = left_base

341 elif base == 'right':

342 base_func = right_base

343 elif base == 'min':

344 base_func = min_base

345 elif base == 'max':

346 base_func = max_base

347 elif base == 'mean':

348 base_func = mean_base

349 elif base == 'closest':

350 base_func = closest_base

351 else:

352 raise ValueError(f'Invalid value for base ({base})')

353 # width of peaks:

354 for j in range(len(peak_inx)):

355 li = trough_inx[j]

356 ri = trough_inx[j+1]

357 baseval = base_func(data, li, ri, peak_inx[j])

358 thresh = baseval*(1.0-peak_frac) + data[peak_inx[j]]*peak_frac

359 inx = li + np.argmax(data[li:ri] > thresh)

360 if inx > 0:

361 ti0 = np.interp(thresh, data[inx-1:inx+1], time[inx-1:inx+1])

362 else:

363 ti0 = time[0]

364 inx = ri - np.argmax(data[ri:li:-1] > thresh)

365 if inx+1 < len(data):

366 ti1 = np.interp(thresh, data[inx+1:inx-1:-1], time[inx+1:inx-1:-1])

367 else:

368 ti1 = time[-1]

369 widths[j] = ti1 - ti0

370 return widths

371

372

373def peak_size_width(time, data, peak_indices, trough_indices,

374 peak_frac=0.75, base='closest'):

375 """Compute size and width of each peak.

376

377 Parameters

378 ----------

379 time: array

380 Time, must not be `None`.

381 data: array

382 The data with the peaks.

383 peak_indices: array

384 Indices of the peaks.

385 trough_indices: array

386 Indices of the troughs.

387 peak_frac: float

388 Fraction of peak height where its width is measured.

389 base: string

390 Height and width of peak is measured relative to

391

392 - 'left': trough to the left

393 - 'right': trough to the right

394 - 'min': the minimum of the two troughs to the left and to the right

395 - 'max': the maximum of the two troughs to the left and to the right

396 - 'mean': mean of the throughs to the left and to the rigth

397 - 'closest': trough that is closest to peak

398

399 Returns

400 -------

401 peaks: 2-D array

402 First dimension is the peak index. Second dimension is

403 time, height (value of data at the peak),

404 size (peak height minus height of closest trough),

405 width (at `peak_frac` size), 0.0 (count) of the peak. See `peak_width()`.

406

407 Raises

408 ------

409 ValueError:

410 If an invalid value is passed to `base`.

411 """

412 def left_base(data, left_inx, right_inx, peak_inx):

413 return data[left_inx]

414 def right_base(data, left_inx, right_inx, peak_inx):

415 return data[right_inx]

416 def min_base(data, left_inx, right_inx, peak_inx):

417 return min(data[left_inx], data[right_inx])

418 def max_base(data, left_inx, right_inx, peak_inx):

419 return max(data[left_inx], data[right_inx])

420 def mean_base(data, left_inx, right_inx, peak_inx):

421 return np.mean((data[left_inx], data[right_inx]))

422 def closest_base(data, left_inx, right_inx, peak_inx):

423 return data[left_inx] if peak_inx-left_inx <= right_inx-peak_inx else data[right_inx]

424

425 peaks = np.zeros((len(peak_indices), 5))

426 if len(peak_indices) == 0:

427 return peaks

428 # time point of peaks:

429 peaks[:, 0] = time[peak_indices]

430 # height of peaks:

431 peaks[:, 1] = data[peak_indices]

432 # we need a trough before and after each peak:

433 peak_inx = np.asarray(peak_indices, dtype=int)

434 trough_inx = np.asarray(trough_indices, dtype=int)

435 if len(trough_inx) == 0 or peak_inx[0] < trough_inx[0]:

436 trough_inx = np.hstack((0, trough_inx))

437

438 if peak_inx[-1] > trough_inx[-1]:

439 trough_inx = np.hstack((trough_inx, len(data)-1))

440 # base for size of peaks:

441 base_func = closest_base

442 if base == 'left':

443 base_func = left_base

444 elif base == 'right':

445 base_func = right_base

446 elif base == 'min':

447 base_func = min_base

448 elif base == 'max':

449 base_func = max_base

450 elif base == 'mean':

451 base_func = mean_base

452 elif base == 'closest':

453 base_func = closest_base

454

455 else:

456 raise ValueError('Invalid value for base ({base})')

457 # size and width of peaks:

458 for j, pi in enumerate(peak_inx):

459 li = trough_inx[j]

460 ri = trough_inx[j+1]

461 baseval = base_func(data, li, ri, pi)

462 thresh = baseval*(1.0-peak_frac) + data[pi]*peak_frac

463 inx = li + np.argmax(data[li:ri] > thresh)

464 if inx > 0:

465 ti0 = np.interp(thresh, data[inx-1:inx+1], time[inx-1:inx+1])

466 else:

467 ti0 = time[0]

468 inx = ri - np.argmax(data[ri:li:-1] > thresh)

469 if inx+1 < len(data):

470 ti1 = np.interp(thresh, data[inx+1:inx-1:-1], time[inx+1:inx-1:-1])

471 else:

472 ti1 = time[-1]

473 if np.any(np.isfinite((data[pi], baseval))):

474 peaks[j, 2] = data[pi] - baseval

475 peaks[j, 3] = ti1 - ti0

476 return peaks

477

478

479def threshold_crossings(data, threshold):

480 """Detect crossings of a threshold with positive and negative slope.

481

482 Parameters

483 ----------

484 data: array

485 An 1-D array of input data where threshold crossings are detected.

486 threshold: float or array

487 A number or array of numbers setting the threshold

488 that needs to be crossed.

489

490 Returns

491 -------

492 up_indices: array of ints

493 A list of indices where the threshold is crossed with positive slope.

494 down_indices: array of ints

495 A list of indices where the threshold is crossed with negative slope.

496

497 Raises

498 ------

499 IndexError:

500 If `data` and `threshold` arrays differ in length.

501 """

502 if np.isscalar(threshold):

503 up_indices = np.nonzero((data[1:]>threshold) & (data[:-1]<=threshold))[0]

504 down_indices = np.nonzero((data[1:]<=threshold) & (data[:-1]>threshold))[0]

505 else:

506 if len(data) != len(threshold):

507 raise IndexError('input arrays data and threshold must have same length!')

508 up_indices = np.nonzero((data[1:]>threshold[1:]) & (data[:-1]<=threshold[:-1]))[0]

509 down_indices = np.nonzero((data[1:]<=threshold[1:]) & (data[:-1]>threshold[:-1]))[0]

510 return up_indices, down_indices

511

512

513def threshold_crossing_times(time, data, threshold, up_indices, down_indices):

514 """Compute times of threshold crossings by linear interpolation.

515

516 Parameters

517 ----------

518 time: array

519 Time, must not be `None`.

520 data: array

521 The data.

522 threshold: float

523 A number or array of numbers setting the threshold

524 that was crossed.

525 up_indices: array of ints

526 A list of indices where the threshold is crossed with positive slope.

527 down_indices: array of ints

528 A list of indices where the threshold is crossed with negative slope.

529

530 Returns

531 -------

532 up_times: array of floats

533 Interpolated times where the threshold is crossed with positive slope.

534 down_times: array of floats

535 Interpolated times where the threshold is crossed with negative slope.

536 """

537 up_times = np.zeros(len(up_indices))

538 for k, inx in enumerate(up_indices):

539 up_times[k] = np.interp(threshold, data[inx:inx+2], time[inx:inx+2])

540 down_times = np.zeros(len(down_indices))

541 for k, inx in enumerate(down_indices):

542 down_times[k] = np.interp(-threshold, -data[inx:inx+2], time[inx:inx+2])

543 return up_times, down_times

544

545

546def trim(peaks, troughs):

547 """Trims the peaks and troughs arrays such that they have the same length.

548

549 Parameters

550 ----------

551 peaks: array

552 List of peak indices or times.

553 troughs: array

554 List of trough indices or times.

555

556 Returns

557 -------

558 peaks: array

559 List of peak indices or times.

560 troughs: array

561 List of trough indices or times.

562 """

563 # common len:

564 n = min(len(peaks), len(troughs))

565 # align arrays:

566 return peaks[:n], troughs[:n]

567

568

569def trim_to_peak(peaks, troughs):

570 """Trims the peaks and troughs arrays such that they have the same length

571 and the first peak comes first.

572

573 Parameters

574 ----------

575 peaks: array

576 List of peak indices or times.

577 troughs: array

578 List of trough indices or times.

579

580 Returns

581 -------

582 peaks: array

583 List of peak indices or times.

584 troughs: array

585 List of trough indices or times.

586 """

587 # start index for troughs:

588 tidx = 0

589 if len(peaks) > 0 and len(troughs) > 0 and troughs[0] < peaks[0]:

590 tidx = 1

591 # common len:

592 n = min(len(peaks), len(troughs[tidx:]))

593 # align arrays:

594 return peaks[:n], troughs[tidx:tidx + n]

595

596

597def trim_closest(peaks, troughs):

598 """Trims the peaks and troughs arrays such that they have the same length

599 and that peaks-troughs is on average as small as possible.

600

601 Parameters

602 ----------

603 peaks: array

604 List of peak indices or times.

605 troughs: array

606 List of trough indices or times.

607

608 Returns

609 -------

610 peaks: array

611 List of peak indices or times.

612 troughs: array

613 List of trough indices or times.

614 """

615 pidx = 0

616 tidx = 0

617 nn = min(len(peaks), len(troughs))

618 if nn == 0:

619 return np.array([]), np.array([])

620 dist = np.abs(np.mean(peaks[:nn] - troughs[:nn]))

621 if len(peaks) == 0 or len(troughs) == 0:

622 nn = 0

623 else:

624 if peaks[0] < troughs[0]:

625 nnp = min(len(peaks[1:]), len(troughs))

626 distp = np.abs(np.mean(peaks[1:nnp] - troughs[:nnp - 1]))

627 if distp < dist:

628 pidx = 1

629 nn = nnp

630 else:

631 nnt = min(len(peaks), len(troughs[1:]))

632 distt = np.abs(np.mean(peaks[:nnt - 1] - troughs[1:nnt]))

633 if distt < dist:

634 tidx = 1

635 nn = nnt

636 # align arrays:

637 return peaks[pidx:pidx + nn], troughs[tidx:tidx + nn]

638

639

640def merge_events(onsets, offsets, min_distance):

641 """Merge events if they are closer than a minimum distance.

642

643 If the beginning of an event (onset, peak, or positive threshold crossing,

644 is too close to the end of the previous event (offset, trough, or negative

645 threshold crossing) the two events are merged into a single one that begins

646 with the first one and ends with the second one.

647

648 Parameters

649 ----------

650 onsets: 1-D array

651 The onsets (peaks, or positive threshold crossings) of the events

652 as indices or times.

653 offsets: 1-D array

654 The offsets (troughs, or negative threshold crossings) of the events

655 as indices or times.

656 min_distance: int or float

657 The minimum distance between events. If the beginning of an event is separated

658 from the end of the previous event by less than this distance then the two events

659 are merged into one. If the event onsets and offsets are given in indices than

660 min_distance is also in indices.

661

662 Returns

663 -------

664 merged_onsets: 1-D array

665 The onsets (peaks, or positive threshold crossings) of the merged events

666 as indices or times according to onsets.

667 merged_offsets: 1-D array

668 The offsets (troughs, or negative threshold crossings) of the merged events

669 as indices or times according to offsets.

670 """

671 onsets, offsets = trim_to_peak(onsets, offsets)

672 if len(onsets) == 0 or len(offsets) == 0:

673 return np.array([]), np.array([])

674 else:

675 diff = onsets[1:] - offsets[:-1]

676 indices = diff > min_distance

677 merged_onsets = onsets[np.hstack([True, indices])]

678 merged_offsets = offsets[np.hstack([indices, True])]

679 return merged_onsets, merged_offsets

680

681

682def remove_events(onsets, offsets, min_duration, max_duration=None):

683 """Remove events that are too short or too long.

684

685 If the length of an event, i.e. `offset` (offset, trough, or negative

686 threshold crossing) minus `onset` (onset, peak, or positive threshold crossing),

687 is shorter than `min_duration` or longer than `max_duration`, then this event is

688 removed.

689

690 Parameters

691 ----------

692 onsets: 1-D array

693 The onsets (peaks, or positive threshold crossings) of the events

694 as indices or times.

695 offsets: 1-D array

696 The offsets (troughs, or negative threshold crossings) of the events

697 as indices or times.

698 min_duration: int, float, or None

699 The minimum duration of events. If the event offset minus the event onset

700 is less than `min_duration`, then the event is removed from the lists.

701 If the event onsets and offsets are given in indices than

702 `min_duration` is also in indices. If `None` then this test is skipped.

703 max_duration: int, float, or None

704 The maximum duration of events. If the event offset minus the event onset

705 is larger than `max_duration`, then the event is removed from the lists.

706 If the event onsets and offsets are given in indices than

707 `max_duration` is also in indices. If `None` then this test is skipped.

708

709 Returns

710 -------

711 onsets: 1-D array

712 The onsets (peaks, or positive threshold crossings) of the events

713 with too short and too long events removed as indices or times according to onsets.

714 offsets: 1-D array

715 The offsets (troughs, or negative threshold crossings) of the events

716 with too short and too long events removed as indices or times according to offsets.

717 """

718 onsets, offsets = trim_to_peak(onsets, offsets)

719 if len(onsets) == 0 or len(offsets) == 0:

720 return np.array([]), np.array([])

721 elif min_duration is not None or max_duration is not None:

722 diff = offsets - onsets

723 if min_duration is not None and max_duration is not None:

724 indices = (diff > min_duration) & (diff < max_duration)

725 elif min_duration is not None:

726 indices = diff > min_duration

727 else:

728 indices = diff < max_duration

729 onsets = onsets[indices]

730 offsets = offsets[indices]

731 return onsets, offsets

732

733

734def widen_events(onsets, offsets, max_time, duration):

735 """Enlarge events on both sides without overlap.

736

737 Subtracts `duration` from the `onsets` and adds `duration` to the offsets.

738 If two succeeding events are separated by less than two times the

739 `duration`, then the offset of the previous event and the onset of

740 the following event are set at the center between the two events.

741

742 Parameters

743 ----------

744 onsets: 1-D array

745 The onsets (peaks, or positive threshold crossings) of the events

746 as indices or times.

747 offsets: 1-D array

748 The offsets (troughs, or negative threshold crossings) of the events

749 as indices or times.

750 max_time: int or float

751 The maximum value for the end of the last event.

752 If the event onsets and offsets are given in indices than

753 max_time is the maximum possible index, i.e. the len of the

754 data array on which the events where detected.

755 duration: int or float

756 The number of indices or the time by which the events should

757 be enlarged.

758 If the event onsets and offsets are given in indices than

759 duration is also in indices.

760

761 Returns

762 -------

763 onsets: 1-D array

764 The onsets (peaks, or positive threshold crossings) of the enlarged events.

765 offsets: 1-D array

766 The offsets (troughs, or negative threshold crossings) of the enlarged events.

767

768 """

769 new_onsets = []

770 new_offsets = []

771 if len(onsets) > 0:

772 on_idx = onsets[0]

773 new_onsets.append(on_idx - duration if on_idx >= duration else 0)

774 for off_idx, on_idx in zip(offsets[:-1], onsets[1:]):

775 if on_idx - off_idx < 2*duration:

776 mid_idx = (on_idx + off_idx)//2

777 new_offsets.append(mid_idx)

778 new_onsets.append(mid_idx)

779 else:

780 new_offsets.append(off_idx + duration)

781 new_onsets.append(on_idx - duration)

782 if len(offsets) > 0:

783 off_idx = offsets[-1]

784 new_offsets.append(off_idx + duration if off_idx + duration < max_time else max_time)

785 return np.array(new_onsets, dtype=onsets.dtype), np.array(new_offsets, dtype=offsets.dtype)

786

787

788def std_threshold(data, win_size=None, thresh_fac=5.0):

789 """Estimates a threshold for peak detection based on the standard deviation of the data.

790

791 The threshold is computed as the standard deviation of the data

792 multiplied with `thresh_fac`.

793

794 In case of Gaussian distributed data, setting `thresh_fac=2.0`

795 (two standard deviations) captures 68% of the data,

796 `thresh_fac=4.0` captures 95%, and `thresh_fac=6.0` 99.7%.

797

798 If `win_size` is given, then the threshold is computed for

799 half-overlapping windows of size `win_size` separately. In this

800 case the returned threshold is an array of the same size as data.

801 Without a `win_size` a single threshold value determined from the

802 whole data array is returned.

803

804 Parameters

805 ----------

806 data: 1-D array

807 The data to be analyzed.

808 win_size: int or None

809 Size of window in which a threshold value is computed.

810 thresh_fac: float

811 Factor by which the standard deviation is multiplied to set the threshold.

812

813 Returns

814 -------

815 threshold: float or 1-D array

816 The computed threshold.

817

818 """

819

820 if win_size:

821 threshold = np.zeros(len(data))

822 for inx0 in range(0, len(data) - win_size//2, win_size//2):

823 inx1 = inx0 + win_size

824 std = np.std(data[inx0:inx1], ddof=1)

825 threshold[inx0:inx1] = std * thresh_fac

826 return threshold

827 else:

828 return np.std(data, ddof=1) * thresh_fac

829

830

831@jit(nopython=True)

832def median_std_threshold(data, win_size=100, thresh_fac=6.0, n_snippets=1000):

833 """Estimate a threshold for peak detection based on the median standard deviation of data snippets.

834

835 On `n_snippets` snippets of `win_size` size the standard

836 deviation of the data is estimated. The returned threshold is the

837 median of these standard deviations that are larger than zero

838 multiplied by `thresh_fac`.

839

840 Parameters

841 ----------

842 data: 1-D array of float

843 The data to be analysed.

844 win_size: int

845 Size of windows on which standarad deviations are computed.

846 thresh_fac: float

847 Factor by which the median standard deviation is multiplied to set the threshold.

848 n_snippets: int

849 Number of snippets on which the standard deviations are estimated.

850

851 Returns

852 -------

853 threshold: float

854 The computed threshold.

855 """

856 if win_size < 10:

857 win_size = 10

858 step = len(data)//n_snippets

859 if step < win_size//2:

860 step = win_size//2

861 stds = np.array([np.std(data[i:i+win_size])

862 for i in range(0, len(data)-win_size, step)])

863 return np.median(stds[stds>0])*thresh_fac

864

865

866def hist_threshold(data, win_size=None, thresh_fac=5.0,

867 nbins=100, hist_height=1.0/np.sqrt(np.e)):

868 """Estimate a threshold for peak detection based on a histogram of the data.

869

870 The standard deviation of the data is estimated from half the

871 width of the histogram of the data at `hist_height` relative

872 height. This estimates the data's standard deviation by ignoring

873 tails of the distribution.

874

875 However, you need enough data to robustly estimate the histogram.

876

877 If `win_size` is given, then the threshold is computed for

878 half-overlapping windows of size `win_size` separately. In this

879 case the returned threshold is an array of the same size as data.

880 Without a win_size a single threshold value determined from the

881 whole data array is returned.

882

883 Parameters

884 ----------

885 data: 1-D array

886 The data to be analyzed.

887 win_size: int or None

888 Size of window in which a threshold value is computed.

889 thresh_fac: float

890 Factor by which the width of the histogram is multiplied to set the threshold.

891 nbins: int or list of floats

892 Number of bins or the bins for computing the histogram.

893 hist_height: float

894 Height between 0 and 1 at which the width of the histogram is computed.

895

896 Returns

897 -------

898 threshold: float or 1-D array

899 The computed threshold.

900 center: float or 1-D array

901 The center (mean) of the width of the histogram.

902

903 """

904 if win_size:

905 threshold = np.zeros(len(data))

906 centers = np.zeros(len(data))

907 for inx0 in range(0, len(data) - win_size//2, win_size//2):

908 inx1 = inx0 + win_size

909 std, center = hist_threshold(data[inx0:inx1], win_size=None,

910 thresh_fac=thresh_fac, nbins=nbins,

911 hist_height=hist_height)

912 threshold[inx0:inx1] = std

913 centers[inx0:inx1] = center

914 return threshold, centers

915 else:

916 maxd = np.max(data)

917 mind = np.min(data)

918 contrast = np.abs((maxd - mind)/(maxd + mind))

919 if contrast > 1e-8:

920 hist, bins = np.histogram(data, nbins, density=False)

921 inx = hist > np.max(hist) * hist_height

922 lower = bins[0:-1][inx][0]

923 upper = bins[1:][inx][-1] # needs to return the next bin

924 center = 0.5 * (lower + upper)

925 std = 0.5 * (upper - lower)

926 else:

927 std = np.std(data)

928 center = np.mean(data)

929 return std * thresh_fac, center

930

931

932def minmax_threshold(data, win_size=None, thresh_fac=0.8):

933 """Estimate a threshold for peak detection based on minimum and maximum values of the data.

934

935 The threshold is computed as the difference between maximum and

936 minimum value of the data multiplied with `thresh_fac`.

937

938 If `win_size` is given, then the threshold is computed for

939 half-overlapping windows of size `win_size` separately. In this

940 case the returned threshold is an array of the same size as data.

941 Without a win_size a single threshold value determined from the

942 whole data array is returned.

943

944 Parameters

945 ----------

946 data: 1-D array

947 The data to be analyzed.

948 win_size: int or None

949 Size of window in which a threshold value is computed.

950 thresh_fac: float

951 Factor by which the difference between minimum and maximum data value

952 is multiplied to set the threshold.

953

954 Returns

955 -------

956 threshold: float or 1-D array

957 The computed threshold.

958

959 """

960 if win_size:

961 threshold = np.zeros(len(data))

962 for inx0 in range(0, len(data) - win_size//2, win_size//2):

963 inx1 = inx0 + win_size

964 window_min = np.min(data[inx0:inx1])

965 window_max = np.max(data[inx0:inx1])

966 threshold[inx0:inx1] = (window_max - window_min) * thresh_fac

967 return threshold

968

969 else:

970 return (np.max(data) - np.min(data)) * thresh_fac

971

972

973def percentile_threshold(data, win_size=None, thresh_fac=1.0, percentile=1.0):

974 """Estimate a threshold for peak detection based on an inter-percentile range of the data.

975

976 The threshold is computed as the range between the percentile and

977 100.0-percentile percentiles of the data multiplied with

978 thresh_fac.

979

980 For very small values of `percentile` the estimated threshold

981 approaches the one returned by `minmax_threshold()` (for same

982 values of `thresh_fac`). For `percentile=16.0` and Gaussian

983 distributed data, the returned theshold is twice the one returned

984 by `std_threshold()` or `hist_threshold()`, i.e. twice the

985 standard deviation.

986

987 If you have knowledge about how many data points are in the tails of

988 the distribution, then this method is preferred over

989 `hist_threshold()`. For example, if you expect peaks that you want

990 to detect using `detect_peaks()` at an average rate of 10Hz and

991 these peaks are about 1ms wide, then you have a 1ms peak per 100ms

992 period, i.e. the peaks make up 1% of the distribution. So you should

993 set `percentile=1.0` or lower. For much lower percentile values, you

994 might choose `thresh_fac` slightly smaller than one to capture also

995 smaller peaks. Setting `percentile` slightly higher might not change

996 the estimated threshold too much, since you start incorporating the

997 noise floor with a large density, but you may want to set

998 `thresh_fac` larger than one to reduce false detections.

999

1000 If `win_size` is given, then the threshold is computed for

1001 half-overlapping windows of size `win_size` separately. In this

1002 case the returned threshold is an array of the same size as data.

1003 Without a win_size a single threshold value determined from the

1004 whole data array is returned.

1005

1006 Parameters

1007 ----------

1008 data: 1-D array

1009 The data to be analyzed.

1010 win_size: int or None

1011 Size of window in which a threshold value is computed.

1012 percentile: float

1013 The interpercentile range is computed at percentile and

1014 100.0-percentile.

1015 If zero, compute maximum minus minimum data value as the

1016 interpercentile range.

1017 thresh_fac: float

1018 Factor by which the inter-percentile range of the data is

1019 multiplied to set the threshold.

1020

1021 Returns

1022 -------

1023 threshold: float or 1-D array

1024 The computed threshold.

1025

1026 """

1027 if percentile < 1e-8:

1028 return minmax_threshold(data, win_size=win_size,

1029 thresh_fac=thresh_fac)

1030 if win_size:

1031 threshold = np.zeros(len(data))

1032 for inx0 in range(0, len(data) - win_size//2, win_size//2):

1033 inx1 = inx0 + win_size

1034 threshold[inx0:inx1] = np.squeeze(np.abs(np.diff(

1035 np.percentile(data[inx0:inx1], [100.0 - percentile, percentile])))) * thresh_fac

1036 return threshold

1037 else:

1038 return np.squeeze(np.abs(np.diff(

1039 np.percentile(data, [100.0 - percentile, percentile])))) * thresh_fac

1040

1041

1042def snippets(data, indices, start=-10, stop=10):

1043 """Cut out data arround each position given in indices.

1044

1045 Parameters

1046 ----------

1047 data: 1-D array

1048 Data array from which snippets are extracted.

1049 indices: list of int

1050 Indices around which snippets are cut out.

1051 start: int

1052 Each snippet starts at index + start.

1053 stop: int

1054 Each snippet ends at index + stop.

1055

1056 Returns

1057 -------

1058 snippet_data: 2-D array

1059 The snippets: first index number of snippet, second index time.

1060 """

1061 idxs = indices[(indices>=-start) & (indices<len(data)-stop)]

1062 snippet_data = np.empty((len(idxs), stop-start))

1063 for k, idx in enumerate(idxs):

1064 snippet_data[k] = data[idx+start:idx+stop]

1065 # XXX alternative: check speed and behavior for empty idxs

1066 # snippets = np.vstack([data[idx+start:idx+stop] for idx in idxs])

1067 return snippet_data

1068

1069

1070def detect_dynamic_peaks(data, threshold, min_thresh, tau, time=None,

1071 check_peak_func=None, check_trough_func=None, **kwargs):

1072 """Detect peaks and troughs using a relative threshold.

1073

1074 The threshold decays dynamically towards min_thresh with time

1075 constant tau. Use `check_peak_func` or `check_trough_func` to

1076 reset the threshold to an appropriate size.

1077

1078 Based on Bryan S. Todd and David C. Andrews (1999): The

1079 identification of peaks in physiological signals. Computers and

1080 Biomedical Research 32, 322-335.

1081

1082 Parameters

1083 ----------

1084 data: array

1085 An 1-D array of input data where peaks are detected.

1086 threshold: float

1087 A positive number setting the minimum distance between peaks and troughs.

1088 min_thresh: float

1089 The minimum value the threshold is allowed to assume.

1090 tau: float

1091 The time constant of the the decay of the threshold value

1092 given in indices (`time` is None) or time units (`time` is not `None`).

1093 time: array

1094 The (optional) 1-D array with the time corresponding to the data values.

1095 check_peak_func: function

1096 An optional function to be used for further evaluating and analysing a peak.

1097 The signature of the function is

1098

1099 ```

1100 r, th = check_peak_func(time, data, peak_inx, index, min_inx, threshold, **kwargs)

1101 ```

1102

1103 with the arguments:

1104

1105 - time (array): the full time array that might be None

1106 - data (array): the full data array

1107 - peak_inx (int): the index of the detected peak

1108 - index (int): the current index

1109 - min_inx (int): the index of the trough preceeding the peak (might be 0)

1110 - threshold (float): the threshold value

1111 - min_thresh (float): the minimum value the threshold is allowed to assume.

1112 - tau (float): the time constant of the the decay of the threshold value

1113 given in indices (time is None) or time units (time is not None)

1114 - **kwargs: further keyword arguments provided by the user

1115 - r (scalar or np.array): a single number or an array with properties of the peak or None to skip the peak

1116 - th (float): a new value for the threshold or None (to keep the original value)

1117 check_trough_func: function

1118 An optional function to be used for further evaluating and analysing a trough.

1119 The signature of the function is

1120

1121 ```

1122 r, th = check_trough_func(time, data, trough_inx, index, max_inx, threshold, **kwargs)

1123 ```

1124

1125 with the arguments:

1126

1127 - time (array): the full time array that might be None

1128 - data (array): the full data array

1129 - trough_inx (int): the index of the detected trough

1130 - index (int): the current index

1131 - max_inx (int): the index of the peak preceeding the trough (might be 0)

1132 - threshold (float): the threshold value

1133 - min_thresh (float): the minimum value the threshold is allowed to assume.

1134 - tau (float): the time constant of the the decay of the threshold value

1135 given in indices (time is None) or time units (time is not None)

1136 - **kwargs: further keyword arguments provided by the user

1137 - r (scalar or np.array): a single number or an array with properties of the trough or None to skip the trough

1138 - th (float): a new value for the threshold or None (to keep the original value)

1139 kwargs: key-word arguments

1140 Arguments passed on to `check_peak_func` and `check_trough_func`.

1141

1142 Returns

1143 -------

1144 peak_list: array

1145 List of peaks.

1146 trough_list: array

1147 List of troughs.

1148

1149 - If time is `None` and no `check_peak_func` is given,

1150 then these are lists of the indices where the peaks/troughs occur.

1151 - If `time` is given and no `check_peak_func`/`check_trough_func` is given,

1152 then these are lists of the times where the peaks/troughs occur.

1153 - If `check_peak_func` or `check_trough_func` is given,

1154 then these are lists of whatever `check_peak_func`/`check_trough_func` return.

1155

1156 Raises

1157 ------

1158 ValueError:

1159 If `threshold <= 0` or `min_thresh <= 0` or `tau <= 0`.

1160 IndexError:

1161 If `data` and `time` arrays differ in length.

1162

1163 """

1164 if threshold <= 0:

1165 raise ValueError('input argument threshold must be positive!')

1166 if min_thresh <= 0:

1167 raise ValueError('input argument min_thresh must be positive!')

1168 if tau <= 0:

1169 raise ValueError('input argument tau must be positive!')

1170 if time is not None and len(data) != len(time):

1171 raise IndexError('input arrays time and data must have same length!')

1172

1173 peaks_list = list()

1174 troughs_list = list()

1175

1176 # initialize:

1177 direction = 0

1178 min_inx = 0

1179 max_inx = 0

1180 min_value = data[0]

1181 max_value = min_value

1182

1183 # loop through the data:

1184 for index, value in enumerate(data):

1185

1186 # decaying threshold (first order low pass filter):

1187 if time is None:

1188 threshold += (min_thresh - threshold) / tau

1189 else:

1190 idx = index

1191 if idx + 1 >= len(data):

1192 idx = len(data) - 2

1193 threshold += (min_thresh - threshold) * (time[idx + 1] - time[idx]) / tau

1194

1195 # rising?

1196 if direction > 0:

1197 # if the new value is bigger than the old maximum: set it as new maximum:

1198 if value > max_value:

1199 max_inx = index # maximum element

1200 max_value = value

1201

1202 # otherwise, if the new value is falling below the maximum value minus the threshold:

1203 # the maximum is a peak!

1204 elif max_value >= value + threshold:

1205 # check and update peak with the check_peak_func function:

1206 if check_peak_func:

1207 r, th = check_peak_func(time, data, max_inx, index,

1208 min_inx, threshold,

1209 min_thresh=min_thresh, tau=tau, **kwargs)

1210 if r is not None:

1211 # this really is a peak:

1212 peaks_list.append(r)

1213 if th is not None:

1214 threshold = th

1215 if threshold < min_thresh:

1216 threshold = min_thresh

1217 else:

1218 # this is a peak:

1219 if time is None:

1220 peaks_list.append(max_inx)

1221 else:

1222 peaks_list.append(time[max_inx])

1223

1224 # change direction:

1225 min_inx = index # minimum element

1226 min_value = value

1227 direction = -1

1228

1229 # falling?

1230 elif direction < 0:

1231 if value < min_value:

1232 min_inx = index # minimum element

1233 min_value = value

1234

1235 elif value >= min_value + threshold:

1236 # there was a trough:

1237

1238 # check and update trough with the check_trough function:

1239 if check_trough_func:

1240 r, th = check_trough_func(time, data, min_inx, index,

1241 max_inx, threshold,

1242 min_thresh=min_thresh, tau=tau, **kwargs)

1243 if r is not None:

1244 # this really is a trough:

1245 troughs_list.append(r)

1246 if th is not None:

1247 threshold = th

1248 if threshold < min_thresh:

1249 threshold = min_thresh

1250 else:

1251 # this is a trough:

1252 if time is None:

1253 troughs_list.append(min_inx)

1254 else:

1255 troughs_list.append(time[min_inx])

1256

1257 # change direction:

1258 max_inx = index # maximum element

1259 max_value = value

1260 direction = 1

1261

1262 # don't know direction yet:

1263 else:

1264 if max_value >= value + threshold:

1265 direction = -1 # falling

1266 elif value >= min_value + threshold:

1267 direction = 1 # rising

1268

1269 if max_value < value:

1270 max_inx = index # maximum element

1271 max_value = value

1272

1273 elif value < min_value:

1274 min_inx = index # minimum element

1275 min_value = value

1276

1277 return np.asarray(peaks_list), np.asarray(troughs_list)

1278

1279

1280def accept_peak_size_threshold(time, data, event_inx, index, min_inx, threshold,

1281 min_thresh, tau, thresh_ampl_fac=0.75, thresh_weight=0.02):

1282 """Accept each detected peak/trough and return its index or time.

1283

1284 Adjust the threshold to the size of the detected peak.

1285 To be passed to the `detect_dynamic_peaks()` function.

1286

1287 Parameters

1288 ----------

1289 time: array

1290 Time values, can be `None`.

1291 data: array

1292 The data in wich peaks and troughs are detected.

1293 event_inx: int

1294 Index of the current peak/trough.

1295 index: int

1296 Current index.

1297 min_inx: int

1298 Index of the previous trough/peak.

1299 threshold: float

1300 Threshold value.

1301 min_thresh: float

1302 The minimum value the threshold is allowed to assume..

1303 tau: float

1304 The time constant of the the decay of the threshold value

1305 given in indices (`time` is `None`) or time units (`time` is not `None`).

1306 thresh_ampl_fac: float

1307 The new threshold is `thresh_ampl_fac` times the size of the current peak.

1308 thresh_weight: float

1309 New threshold is weighted against current threshold with `thresh_weight`.

1310

1311 Returns

1312 -------

1313 index: int

1314 Index of the peak/trough if `time` is `None`.

1315 time: int

1316 Time of the peak/trough if `time` is not `None`.

1317 threshold: float

1318 The new threshold to be used.

1319 """

1320 size = data[event_inx] - data[min_inx]

1321 threshold += thresh_weight * (thresh_ampl_fac * size - threshold)

1322 if time is None:

1323 return event_inx, threshold

1324 else:

1325 return time[event_inx], threshold

1326

1327

1328def main():

1329 print('Checking eventetection module ...')

1330 print('')

1331 # generate data:

1332 dt = 0.001

1333 time = np.arange(0.0, 10.0, dt)

1334 f = 2.0

1335 data = (0.5 * np.sin(2.0 * np.pi * f * time) + 0.5) ** 4.0

1336 data += -0.1 * time * (time - 10.0)

1337 data += 0.1 * np.random.randn(len(data))

1338

1339 print(f'generated waveform with {int(np.round(time[-1] * f))} peaks')

1340 plt.plot(time, data)

1341

1342 print('')

1343 print('check detect_peaks(data, 1.0)...')

1344 peaks, troughs = detect_peaks(data, 1.0)

1345 # print peaks:

1346 print(f'detected {len(peaks)} peaks with period {np.mean(np.diff(peaks)):.1f} that differs from the real frequency by {f - 1.0 / np.mean(np.diff(peaks)) / np.mean(np.diff(time)):.3f}')

1347 # print troughs:

1348 print(f'detected {len(troughs)} troughs with period {np.mean(np.diff(troughs)):.1f} that differs from the real frequency by {f - 1.0 / np.mean(np.diff(troughs)) / np.mean(np.diff(time)):.3f}')

1349

1350 # plot peaks and troughs:

1351 plt.plot(time[peaks], data[peaks], '.r', ms=20)

1352 plt.plot(time[troughs], data[troughs], '.g', ms=20)

1353

1354 # detect threshold crossings:

1355 onsets, offsets = threshold_crossings(data, 3.0)

1356 onsets, offsets = merge_events(onsets, offsets, int(0.5/f/dt))

1357 plt.plot(time, 3.0*np.ones(len(time)), 'k')

1358 plt.plot(time[onsets], data[onsets], '.c', ms=20)

1359 plt.plot(time[offsets], data[offsets], '.b', ms=20)

1360

1361 plt.ylim(-0.5, 4.0)

1362 plt.show()

1363

1364 # timing of the detect_peaks() algorithm:

1365 import timeit

1366 def wrapper(func, *args, **kwargs):

1367 def wrapped():

1368 return func(*args, **kwargs)

1369 return wrapped

1370 wrapped = wrapper(detect_peaks, data, 1.0)

1371 t1 = timeit.timeit(wrapped, number=200)

1372 print(t1)

1373

1374

1375if __name__ == '__main__':

1376 main()