Coverage for src/thunderlab/eventdetection.py: 77%
474 statements
« prev ^ index » next coverage.py v7.6.8, created at 2024-11-29 17:59 +0000
« prev ^ index » next coverage.py v7.6.8, created at 2024-11-29 17:59 +0000
1"""Detect and handle peaks and troughs as well as threshold crossings in data arrays.
3## Peak detection
5- `detect_peaks()`: detect peaks and troughs using a relative threshold.
6- `peak_width()`: compute width of each peak.
7- `peak_size_width()`: compute size and width of each peak.
9## Threshold crossings
11- `threshold_crossings()`: detect crossings of an absolute threshold.
12- `threshold_crossing_times()`: compute times of threshold crossings by linear interpolation.
14## Event manipulation
16- `trim()`: make the list of peaks and troughs the same length.
17- `trim_to_peak()`: ensure that the peak is first.
18- `trim_closest()`: ensure that peaks minus troughs is smallest.
20- `merge_events()`: Merge events if they are closer than a minimum distance.
21- `remove_events()`: Remove events that are too short or too long.
22- `widen_events()`: Enlarge events on both sides without overlap.
24## Threshold estimation
26- `std_threshold()`: estimate detection threshold based on the standard deviation.
27- `median_std_threshold()`: estimate detection threshold based on the median standard deviation of data snippets.
28- `hist_threshold()`: esimate detection threshold based on a histogram of the data.
29- `minmax_threshold()`: estimate detection threshold based on maximum minus minimum value.
30- `percentile_threshold()`: estimate detection threshold based on interpercentile range.
32## Snippets
34- `snippets()`: cut out data snippets around a list of indices.
36## Peak detection with dynamic threshold:
38- `detect_dynamic_peaks()`: peak and trough detection with a dynamically adapted threshold.
39- `accept_peak_size_threshold()`: adapt the dection threshold to the size of the detected peaks.
40"""
42import sys
43import numpy as np
44try:
45 import matplotlib.pyplot as plt
46except ImportError:
47 pass
49try:
50 from numba import jit
51except ImportError:
52 def jit(*args, **kwargs):
53 def decorator_jit(func):
54 return func
55 return decorator_jit
58def detect_peaks(data, threshold):
59 """Detect peaks and troughs using a relative threshold.
61 This is an implementation of the algorithm by
62 Bryan S. Todd and David C. Andrews (1999): The identification of peaks in physiological signals.
63 Computers and Biomedical Research 32, 322-335.
65 Parameters
66 ----------
67 data: array
68 An 1-D array of input data where peaks are detected.
69 threshold: float or array of floats
70 A positive number or array of numbers setting the detection threshold,
71 i.e. the minimum distance between peaks and troughs.
72 In case of an array make sure that the threshold does not change faster
73 than the expected intervals between peaks and troughs.
75 Returns
76 -------
77 peaks: array of ints
78 Array of indices of detected peaks.
79 troughs: array of ints
80 Array of indices of detected troughs.
82 Raises
83 ------
84 ValueError:
85 If `threshold <= 0`.
86 IndexError:
87 If `data` and `threshold` arrays differ in length.
88 """
89 if np.isscalar(threshold):
90 if threshold <= 0:
91 raise ValueError('threshold value must be positive!')
92 return detect_peaks_fixed(data, threshold)
93 else:
94 if len(data) != len(threshold):
95 raise IndexError('input arrays data and threshold must have same length!')
96 if np.min(threshold) <= 0:
97 raise ValueError('threshold values must be positive!')
98 return detect_peaks_array(data, threshold)
101@jit(nopython=True)
102def detect_peaks_fixed(data, threshold):
103 """Detect peaks and troughs using a fixed, relative threshold.
105 Helper function for detect_peaks().
107 Parameters
108 ----------
109 data: array
110 An 1-D array of input data where peaks are detected.
111 threshold: float
112 A positive number setting the detection threshold,
113 i.e. the minimum distance between peaks and troughs.
115 Returns
116 -------
117 peaks: array of ints
118 Array of indices of detected peaks.
119 troughs: array of ints
120 Array of indices of detected troughs.
121 """
122 peaks = []
123 troughs = []
125 # initialize:
126 direction = 0
127 min_inx = 0
128 max_inx = 0
129 min_value = data[0]
130 max_value = min_value
132 # loop through the data:
133 for index, value in enumerate(data):
134 # rising?
135 if direction > 0:
136 if value > max_value:
137 # update maximum element:
138 max_inx = index
139 max_value = value
140 # otherwise, if the new value is falling below
141 # the maximum value minus the threshold:
142 # the maximum is a peak!
143 elif value <= max_value - threshold:
144 peaks.append(max_inx)
145 # change direction:
146 direction = -1
147 # store minimum element:
148 min_inx = index
149 min_value = value
151 # falling?
152 elif direction < 0:
153 if value < min_value:
154 # update minimum element:
155 min_inx = index
156 min_value = value
157 # otherwise, if the new value is rising above
158 # the minimum value plus the threshold:
159 # the minimum is a trough!
160 elif value >= min_value + threshold:
161 troughs.append(min_inx)
162 # change direction:
163 direction = +1
164 # store maximum element:
165 max_inx = index
166 max_value = value
168 # don't know direction yet:
169 else:
170 if value <= max_value - threshold:
171 direction = -1 # falling
172 elif value >= min_value + threshold:
173 direction = 1 # rising
175 if value > max_value:
176 # update maximum element:
177 max_inx = index
178 max_value = value
179 elif value < min_value:
180 # update minimum element:
181 min_inx = index
182 min_value = value
184 return np.asarray(peaks, dtype=np.int64), \
185 np.asarray(troughs, dtype=np.int64)
188@jit(nopython=True)
189def detect_peaks_array(data, threshold):
190 """Detect peaks and troughs using a variable relative threshold.
192 Helper function for detect_peaks().
194 Parameters
195 ----------
196 data: array
197 An 1-D array of input data where peaks are detected.
198 threshold: array
199 A array of positive numbers setting the detection threshold,
200 i.e. the minimum distance between peaks and troughs.
202 Returns
203 -------
204 peaks: array of ints
205 Array of indices of detected peaks.
206 troughs: array of ints
207 Array of indices of detected troughs.
208 """
209 peaks = []
210 troughs = []
212 # initialize:
213 direction = 0
214 min_inx = 0
215 max_inx = 0
216 min_value = data[0]
217 max_value = min_value
219 # loop through the data:
220 for index, value in enumerate(data):
221 # rising?
222 if direction > 0:
223 if value > max_value:
224 # update maximum element:
225 max_inx = index
226 max_value = value
227 # otherwise, if the new value is falling below
228 # the maximum value minus the threshold:
229 # the maximum is a peak!
230 elif value <= max_value - threshold[index]:
231 peaks.append(max_inx)
232 # change direction:
233 direction = -1
234 # store minimum element:
235 min_inx = index
236 min_value = value
238 # falling?
239 elif direction < 0:
240 if value < min_value:
241 # update minimum element:
242 min_inx = index
243 min_value = value
244 # otherwise, if the new value is rising above
245 # the minimum value plus the threshold:
246 # the minimum is a trough!
247 elif value >= min_value + threshold[index]:
248 troughs.append(min_inx)
249 # change direction:
250 direction = +1
251 # store maximum element:
252 max_inx = index
253 max_value = value
255 # don't know direction yet:
256 else:
257 if value <= max_value - threshold[index]:
258 direction = -1 # falling
259 elif value >= min_value + threshold[index]:
260 direction = 1 # rising
262 if value > max_value:
263 # update maximum element:
264 max_inx = index
265 max_value = value
266 elif value < min_value:
267 # update minimum element:
268 min_inx = index
269 min_value = value
271 return np.asarray(peaks, dtype=np.int64), \
272 np.asarray(troughs, dtype=np.int64)
275def peak_width(time, data, peak_indices, trough_indices,
276 peak_frac=0.5, base='max'):
277 """Width of each peak.
279 Peak width is computed from interpolated threshold crossings at
280 `peak_frac` hieght of each peak.
282 Parameters
283 ----------
284 time: array
285 Time, must not be `None`.
286 data: array
287 The data with the peaks.
288 peak_indices: array
289 Indices of the peaks.
290 trough_indices: array
291 Indices of corresponding troughs.
292 peak_frac: float
293 Fraction of peak height where its width is measured.
294 base: string
295 Height and width of peak is measured relative to
297 - 'left': trough to the left
298 - 'right': trough to the right
299 - 'min': the minimum of the two troughs to the left and to the right
300 - 'max': the maximum of the two troughs to the left and to the right
301 - 'mean': mean of the throughs to the left and to the rigth
302 - 'closest': trough that is closest to peak
304 Returns
305 -------
306 widths: array
307 Width at `peak_frac` height of each peak.
309 Raises
310 ------
311 ValueError:
312 If an invalid value is passed to `base`.
313 """
314 def left_base(data, left_inx, right_inx, peak_inx):
315 return data[left_inx]
316 def right_base(data, left_inx, right_inx, peak_inx):
317 return data[right_inx]
318 def min_base(data, left_inx, right_inx, peak_inx):
319 return min(data[left_inx], data[right_inx])
320 def max_base(data, left_inx, right_inx, peak_inx):
321 return max(data[left_inx], data[right_inx])
322 def mean_base(data, left_inx, right_inx, peak_inx):
323 return np.mean((data[left_inx], data[right_inx]))
324 def closest_base(data, left_inx, right_inx, peak_inx):
325 return data[left_inx] if peak_inx-left_inx <= right_inx-peak_inx else data[right_inx]
327 widths = np.zeros(len(peak_indices))
328 if len(peak_indices) == 0:
329 return widths
330 # we need a trough before and after each peak:
331 peak_inx = np.asarray(peak_indices, dtype=int)
332 trough_inx = np.asarray(trough_indices, dtype=int)
333 if len(trough_inx) == 0 or peak_inx[0] < trough_inx[0]:
334 trough_inx = np.hstack((0, trough_inx))
335 if peak_inx[-1] > trough_inx[-1]:
336 trough_inx = np.hstack((trough_inx, len(data)-1))
337 # base for size of peaks:
338 base_func = closest_base
339 if base == 'left':
340 base_func = left_base
341 elif base == 'right':
342 base_func = right_base
343 elif base == 'min':
344 base_func = min_base
345 elif base == 'max':
346 base_func = max_base
347 elif base == 'mean':
348 base_func = mean_base
349 elif base == 'closest':
350 base_func = closest_base
351 else:
352 raise ValueError(f'Invalid value for base ({base})')
353 # width of peaks:
354 for j in range(len(peak_inx)):
355 li = trough_inx[j]
356 ri = trough_inx[j+1]
357 baseval = base_func(data, li, ri, peak_inx[j])
358 thresh = baseval*(1.0-peak_frac) + data[peak_inx[j]]*peak_frac
359 inx = li + np.argmax(data[li:ri] > thresh)
360 if inx > 0:
361 ti0 = np.interp(thresh, data[inx-1:inx+1], time[inx-1:inx+1])
362 else:
363 ti0 = time[0]
364 inx = ri - np.argmax(data[ri:li:-1] > thresh)
365 if inx+1 < len(data):
366 ti1 = np.interp(thresh, data[inx+1:inx-1:-1], time[inx+1:inx-1:-1])
367 else:
368 ti1 = time[-1]
369 widths[j] = ti1 - ti0
370 return widths
373def peak_size_width(time, data, peak_indices, trough_indices,
374 peak_frac=0.75, base='closest'):
375 """Compute size and width of each peak.
377 Parameters
378 ----------
379 time: array
380 Time, must not be `None`.
381 data: array
382 The data with the peaks.
383 peak_indices: array
384 Indices of the peaks.
385 trough_indices: array
386 Indices of the troughs.
387 peak_frac: float
388 Fraction of peak height where its width is measured.
389 base: string
390 Height and width of peak is measured relative to
392 - 'left': trough to the left
393 - 'right': trough to the right
394 - 'min': the minimum of the two troughs to the left and to the right
395 - 'max': the maximum of the two troughs to the left and to the right
396 - 'mean': mean of the throughs to the left and to the rigth
397 - 'closest': trough that is closest to peak
399 Returns
400 -------
401 peaks: 2-D array
402 First dimension is the peak index. Second dimension is
403 time, height (value of data at the peak),
404 size (peak height minus height of closest trough),
405 width (at `peak_frac` size), 0.0 (count) of the peak. See `peak_width()`.
407 Raises
408 ------
409 ValueError:
410 If an invalid value is passed to `base`.
411 """
412 def left_base(data, left_inx, right_inx, peak_inx):
413 return data[left_inx]
414 def right_base(data, left_inx, right_inx, peak_inx):
415 return data[right_inx]
416 def min_base(data, left_inx, right_inx, peak_inx):
417 return min(data[left_inx], data[right_inx])
418 def max_base(data, left_inx, right_inx, peak_inx):
419 return max(data[left_inx], data[right_inx])
420 def mean_base(data, left_inx, right_inx, peak_inx):
421 return np.mean((data[left_inx], data[right_inx]))
422 def closest_base(data, left_inx, right_inx, peak_inx):
423 return data[left_inx] if peak_inx-left_inx <= right_inx-peak_inx else data[right_inx]
425 peaks = np.zeros((len(peak_indices), 5))
426 if len(peak_indices) == 0:
427 return peaks
428 # time point of peaks:
429 peaks[:, 0] = time[peak_indices]
430 # height of peaks:
431 peaks[:, 1] = data[peak_indices]
432 # we need a trough before and after each peak:
433 peak_inx = np.asarray(peak_indices, dtype=int)
434 trough_inx = np.asarray(trough_indices, dtype=int)
435 if len(trough_inx) == 0 or peak_inx[0] < trough_inx[0]:
436 trough_inx = np.hstack((0, trough_inx))
438 if peak_inx[-1] > trough_inx[-1]:
439 trough_inx = np.hstack((trough_inx, len(data)-1))
440 # base for size of peaks:
441 base_func = closest_base
442 if base == 'left':
443 base_func = left_base
444 elif base == 'right':
445 base_func = right_base
446 elif base == 'min':
447 base_func = min_base
448 elif base == 'max':
449 base_func = max_base
450 elif base == 'mean':
451 base_func = mean_base
452 elif base == 'closest':
453 base_func = closest_base
455 else:
456 raise ValueError('Invalid value for base ({base})')
457 # size and width of peaks:
458 for j, pi in enumerate(peak_inx):
459 li = trough_inx[j]
460 ri = trough_inx[j+1]
461 baseval = base_func(data, li, ri, pi)
462 thresh = baseval*(1.0-peak_frac) + data[pi]*peak_frac
463 inx = li + np.argmax(data[li:ri] > thresh)
464 if inx > 0:
465 ti0 = np.interp(thresh, data[inx-1:inx+1], time[inx-1:inx+1])
466 else:
467 ti0 = time[0]
468 inx = ri - np.argmax(data[ri:li:-1] > thresh)
469 if inx+1 < len(data):
470 ti1 = np.interp(thresh, data[inx+1:inx-1:-1], time[inx+1:inx-1:-1])
471 else:
472 ti1 = time[-1]
473 if np.any(np.isfinite((data[pi], baseval))):
474 peaks[j, 2] = data[pi] - baseval
475 peaks[j, 3] = ti1 - ti0
476 return peaks
479def threshold_crossings(data, threshold):
480 """Detect crossings of a threshold with positive and negative slope.
482 Parameters
483 ----------
484 data: array
485 An 1-D array of input data where threshold crossings are detected.
486 threshold: float or array
487 A number or array of numbers setting the threshold
488 that needs to be crossed.
490 Returns
491 -------
492 up_indices: array of ints
493 A list of indices where the threshold is crossed with positive slope.
494 down_indices: array of ints
495 A list of indices where the threshold is crossed with negative slope.
497 Raises
498 ------
499 IndexError:
500 If `data` and `threshold` arrays differ in length.
501 """
502 if np.isscalar(threshold):
503 up_indices = np.nonzero((data[1:]>threshold) & (data[:-1]<=threshold))[0]
504 down_indices = np.nonzero((data[1:]<=threshold) & (data[:-1]>threshold))[0]
505 else:
506 if len(data) != len(threshold):
507 raise IndexError('input arrays data and threshold must have same length!')
508 up_indices = np.nonzero((data[1:]>threshold[1:]) & (data[:-1]<=threshold[:-1]))[0]
509 down_indices = np.nonzero((data[1:]<=threshold[1:]) & (data[:-1]>threshold[:-1]))[0]
510 return up_indices, down_indices
513def threshold_crossing_times(time, data, threshold, up_indices, down_indices):
514 """Compute times of threshold crossings by linear interpolation.
516 Parameters
517 ----------
518 time: array
519 Time, must not be `None`.
520 data: array
521 The data.
522 threshold: float
523 A number or array of numbers setting the threshold
524 that was crossed.
525 up_indices: array of ints
526 A list of indices where the threshold is crossed with positive slope.
527 down_indices: array of ints
528 A list of indices where the threshold is crossed with negative slope.
530 Returns
531 -------
532 up_times: array of floats
533 Interpolated times where the threshold is crossed with positive slope.
534 down_times: array of floats
535 Interpolated times where the threshold is crossed with negative slope.
536 """
537 up_times = np.zeros(len(up_indices))
538 for k, inx in enumerate(up_indices):
539 up_times[k] = np.interp(threshold, data[inx:inx+2], time[inx:inx+2])
540 down_times = np.zeros(len(down_indices))
541 for k, inx in enumerate(down_indices):
542 down_times[k] = np.interp(-threshold, -data[inx:inx+2], time[inx:inx+2])
543 return up_times, down_times
546def trim(peaks, troughs):
547 """Trims the peaks and troughs arrays such that they have the same length.
549 Parameters
550 ----------
551 peaks: array
552 List of peak indices or times.
553 troughs: array
554 List of trough indices or times.
556 Returns
557 -------
558 peaks: array
559 List of peak indices or times.
560 troughs: array
561 List of trough indices or times.
562 """
563 # common len:
564 n = min(len(peaks), len(troughs))
565 # align arrays:
566 return peaks[:n], troughs[:n]
569def trim_to_peak(peaks, troughs):
570 """Trims the peaks and troughs arrays such that they have the same length
571 and the first peak comes first.
573 Parameters
574 ----------
575 peaks: array
576 List of peak indices or times.
577 troughs: array
578 List of trough indices or times.
580 Returns
581 -------
582 peaks: array
583 List of peak indices or times.
584 troughs: array
585 List of trough indices or times.
586 """
587 # start index for troughs:
588 tidx = 0
589 if len(peaks) > 0 and len(troughs) > 0 and troughs[0] < peaks[0]:
590 tidx = 1
591 # common len:
592 n = min(len(peaks), len(troughs[tidx:]))
593 # align arrays:
594 return peaks[:n], troughs[tidx:tidx + n]
597def trim_closest(peaks, troughs):
598 """Trims the peaks and troughs arrays such that they have the same length
599 and that peaks-troughs is on average as small as possible.
601 Parameters
602 ----------
603 peaks: array
604 List of peak indices or times.
605 troughs: array
606 List of trough indices or times.
608 Returns
609 -------
610 peaks: array
611 List of peak indices or times.
612 troughs: array
613 List of trough indices or times.
614 """
615 pidx = 0
616 tidx = 0
617 nn = min(len(peaks), len(troughs))
618 if nn == 0:
619 return np.array([]), np.array([])
620 dist = np.abs(np.mean(peaks[:nn] - troughs[:nn]))
621 if len(peaks) == 0 or len(troughs) == 0:
622 nn = 0
623 else:
624 if peaks[0] < troughs[0]:
625 nnp = min(len(peaks[1:]), len(troughs))
626 distp = np.abs(np.mean(peaks[1:nnp] - troughs[:nnp - 1]))
627 if distp < dist:
628 pidx = 1
629 nn = nnp
630 else:
631 nnt = min(len(peaks), len(troughs[1:]))
632 distt = np.abs(np.mean(peaks[:nnt - 1] - troughs[1:nnt]))
633 if distt < dist:
634 tidx = 1
635 nn = nnt
636 # align arrays:
637 return peaks[pidx:pidx + nn], troughs[tidx:tidx + nn]
640def merge_events(onsets, offsets, min_distance):
641 """Merge events if they are closer than a minimum distance.
643 If the beginning of an event (onset, peak, or positive threshold crossing,
644 is too close to the end of the previous event (offset, trough, or negative
645 threshold crossing) the two events are merged into a single one that begins
646 with the first one and ends with the second one.
648 Parameters
649 ----------
650 onsets: 1-D array
651 The onsets (peaks, or positive threshold crossings) of the events
652 as indices or times.
653 offsets: 1-D array
654 The offsets (troughs, or negative threshold crossings) of the events
655 as indices or times.
656 min_distance: int or float
657 The minimum distance between events. If the beginning of an event is separated
658 from the end of the previous event by less than this distance then the two events
659 are merged into one. If the event onsets and offsets are given in indices than
660 min_distance is also in indices.
662 Returns
663 -------
664 merged_onsets: 1-D array
665 The onsets (peaks, or positive threshold crossings) of the merged events
666 as indices or times according to onsets.
667 merged_offsets: 1-D array
668 The offsets (troughs, or negative threshold crossings) of the merged events
669 as indices or times according to offsets.
670 """
671 onsets, offsets = trim_to_peak(onsets, offsets)
672 if len(onsets) == 0 or len(offsets) == 0:
673 return np.array([]), np.array([])
674 else:
675 diff = onsets[1:] - offsets[:-1]
676 indices = diff > min_distance
677 merged_onsets = onsets[np.hstack([True, indices])]
678 merged_offsets = offsets[np.hstack([indices, True])]
679 return merged_onsets, merged_offsets
682def remove_events(onsets, offsets, min_duration, max_duration=None):
683 """Remove events that are too short or too long.
685 If the length of an event, i.e. `offset` (offset, trough, or negative
686 threshold crossing) minus `onset` (onset, peak, or positive threshold crossing),
687 is shorter than `min_duration` or longer than `max_duration`, then this event is
688 removed.
690 Parameters
691 ----------
692 onsets: 1-D array
693 The onsets (peaks, or positive threshold crossings) of the events
694 as indices or times.
695 offsets: 1-D array
696 The offsets (troughs, or negative threshold crossings) of the events
697 as indices or times.
698 min_duration: int, float, or None
699 The minimum duration of events. If the event offset minus the event onset
700 is less than `min_duration`, then the event is removed from the lists.
701 If the event onsets and offsets are given in indices than
702 `min_duration` is also in indices. If `None` then this test is skipped.
703 max_duration: int, float, or None
704 The maximum duration of events. If the event offset minus the event onset
705 is larger than `max_duration`, then the event is removed from the lists.
706 If the event onsets and offsets are given in indices than
707 `max_duration` is also in indices. If `None` then this test is skipped.
709 Returns
710 -------
711 onsets: 1-D array
712 The onsets (peaks, or positive threshold crossings) of the events
713 with too short and too long events removed as indices or times according to onsets.
714 offsets: 1-D array
715 The offsets (troughs, or negative threshold crossings) of the events
716 with too short and too long events removed as indices or times according to offsets.
717 """
718 onsets, offsets = trim_to_peak(onsets, offsets)
719 if len(onsets) == 0 or len(offsets) == 0:
720 return np.array([]), np.array([])
721 elif min_duration is not None or max_duration is not None:
722 diff = offsets - onsets
723 if min_duration is not None and max_duration is not None:
724 indices = (diff > min_duration) & (diff < max_duration)
725 elif min_duration is not None:
726 indices = diff > min_duration
727 else:
728 indices = diff < max_duration
729 onsets = onsets[indices]
730 offsets = offsets[indices]
731 return onsets, offsets
734def widen_events(onsets, offsets, max_time, duration):
735 """Enlarge events on both sides without overlap.
737 Subtracts `duration` from the `onsets` and adds `duration` to the offsets.
738 If two succeeding events are separated by less than two times the
739 `duration`, then the offset of the previous event and the onset of
740 the following event are set at the center between the two events.
742 Parameters
743 ----------
744 onsets: 1-D array
745 The onsets (peaks, or positive threshold crossings) of the events
746 as indices or times.
747 offsets: 1-D array
748 The offsets (troughs, or negative threshold crossings) of the events
749 as indices or times.
750 max_time: int or float
751 The maximum value for the end of the last event.
752 If the event onsets and offsets are given in indices than
753 max_time is the maximum possible index, i.e. the len of the
754 data array on which the events where detected.
755 duration: int or float
756 The number of indices or the time by which the events should
757 be enlarged.
758 If the event onsets and offsets are given in indices than
759 duration is also in indices.
761 Returns
762 -------
763 onsets: 1-D array
764 The onsets (peaks, or positive threshold crossings) of the enlarged events.
765 offsets: 1-D array
766 The offsets (troughs, or negative threshold crossings) of the enlarged events.
768 """
769 new_onsets = []
770 new_offsets = []
771 if len(onsets) > 0:
772 on_idx = onsets[0]
773 new_onsets.append(on_idx - duration if on_idx >= duration else 0)
774 for off_idx, on_idx in zip(offsets[:-1], onsets[1:]):
775 if on_idx - off_idx < 2*duration:
776 mid_idx = (on_idx + off_idx)//2
777 new_offsets.append(mid_idx)
778 new_onsets.append(mid_idx)
779 else:
780 new_offsets.append(off_idx + duration)
781 new_onsets.append(on_idx - duration)
782 if len(offsets) > 0:
783 off_idx = offsets[-1]
784 new_offsets.append(off_idx + duration if off_idx + duration < max_time else max_time)
785 return np.array(new_onsets, dtype=onsets.dtype), np.array(new_offsets, dtype=offsets.dtype)
788def std_threshold(data, win_size=None, thresh_fac=5.0):
789 """Estimates a threshold for peak detection based on the standard deviation of the data.
791 The threshold is computed as the standard deviation of the data
792 multiplied with `thresh_fac`.
794 In case of Gaussian distributed data, setting `thresh_fac=2.0`
795 (two standard deviations) captures 68% of the data,
796 `thresh_fac=4.0` captures 95%, and `thresh_fac=6.0` 99.7%.
798 If `win_size` is given, then the threshold is computed for
799 half-overlapping windows of size `win_size` separately. In this
800 case the returned threshold is an array of the same size as data.
801 Without a `win_size` a single threshold value determined from the
802 whole data array is returned.
804 Parameters
805 ----------
806 data: 1-D array
807 The data to be analyzed.
808 win_size: int or None
809 Size of window in which a threshold value is computed.
810 thresh_fac: float
811 Factor by which the standard deviation is multiplied to set the threshold.
813 Returns
814 -------
815 threshold: float or 1-D array
816 The computed threshold.
818 """
820 if win_size:
821 threshold = np.zeros(len(data))
822 for inx0 in range(0, len(data) - win_size//2, win_size//2):
823 inx1 = inx0 + win_size
824 std = np.std(data[inx0:inx1], ddof=1)
825 threshold[inx0:inx1] = std * thresh_fac
826 return threshold
827 else:
828 return np.std(data, ddof=1) * thresh_fac
831@jit(nopython=True)
832def median_std_threshold(data, win_size=100, thresh_fac=6.0, n_snippets=1000):
833 """Estimate a threshold for peak detection based on the median standard deviation of data snippets.
835 On `n_snippets` snippets of `win_size` size the standard
836 deviation of the data is estimated. The returned threshold is the
837 median of these standard deviations that are larger than zero
838 multiplied by `thresh_fac`.
840 Parameters
841 ----------
842 data: 1-D array of float
843 The data to be analysed.
844 win_size: int
845 Size of windows on which standarad deviations are computed.
846 thresh_fac: float
847 Factor by which the median standard deviation is multiplied to set the threshold.
848 n_snippets: int
849 Number of snippets on which the standard deviations are estimated.
851 Returns
852 -------
853 threshold: float
854 The computed threshold.
855 """
856 if win_size < 10:
857 win_size = 10
858 step = len(data)//n_snippets
859 if step < win_size//2:
860 step = win_size//2
861 stds = np.array([np.std(data[i:i+win_size])
862 for i in range(0, len(data)-win_size, step)])
863 return np.median(stds[stds>0])*thresh_fac
866def hist_threshold(data, win_size=None, thresh_fac=5.0,
867 nbins=100, hist_height=1.0/np.sqrt(np.e)):
868 """Estimate a threshold for peak detection based on a histogram of the data.
870 The standard deviation of the data is estimated from half the
871 width of the histogram of the data at `hist_height` relative
872 height. This estimates the data's standard deviation by ignoring
873 tails of the distribution.
875 However, you need enough data to robustly estimate the histogram.
877 If `win_size` is given, then the threshold is computed for
878 half-overlapping windows of size `win_size` separately. In this
879 case the returned threshold is an array of the same size as data.
880 Without a win_size a single threshold value determined from the
881 whole data array is returned.
883 Parameters
884 ----------
885 data: 1-D array
886 The data to be analyzed.
887 win_size: int or None
888 Size of window in which a threshold value is computed.
889 thresh_fac: float
890 Factor by which the width of the histogram is multiplied to set the threshold.
891 nbins: int or list of floats
892 Number of bins or the bins for computing the histogram.
893 hist_height: float
894 Height between 0 and 1 at which the width of the histogram is computed.
896 Returns
897 -------
898 threshold: float or 1-D array
899 The computed threshold.
900 center: float or 1-D array
901 The center (mean) of the width of the histogram.
903 """
904 if win_size:
905 threshold = np.zeros(len(data))
906 centers = np.zeros(len(data))
907 for inx0 in range(0, len(data) - win_size//2, win_size//2):
908 inx1 = inx0 + win_size
909 std, center = hist_threshold(data[inx0:inx1], win_size=None,
910 thresh_fac=thresh_fac, nbins=nbins,
911 hist_height=hist_height)
912 threshold[inx0:inx1] = std
913 centers[inx0:inx1] = center
914 return threshold, centers
915 else:
916 maxd = np.max(data)
917 mind = np.min(data)
918 contrast = np.abs((maxd - mind)/(maxd + mind))
919 if contrast > 1e-8:
920 hist, bins = np.histogram(data, nbins, density=False)
921 inx = hist > np.max(hist) * hist_height
922 lower = bins[0:-1][inx][0]
923 upper = bins[1:][inx][-1] # needs to return the next bin
924 center = 0.5 * (lower + upper)
925 std = 0.5 * (upper - lower)
926 else:
927 std = np.std(data)
928 center = np.mean(data)
929 return std * thresh_fac, center
932def minmax_threshold(data, win_size=None, thresh_fac=0.8):
933 """Estimate a threshold for peak detection based on minimum and maximum values of the data.
935 The threshold is computed as the difference between maximum and
936 minimum value of the data multiplied with `thresh_fac`.
938 If `win_size` is given, then the threshold is computed for
939 half-overlapping windows of size `win_size` separately. In this
940 case the returned threshold is an array of the same size as data.
941 Without a win_size a single threshold value determined from the
942 whole data array is returned.
944 Parameters
945 ----------
946 data: 1-D array
947 The data to be analyzed.
948 win_size: int or None
949 Size of window in which a threshold value is computed.
950 thresh_fac: float
951 Factor by which the difference between minimum and maximum data value
952 is multiplied to set the threshold.
954 Returns
955 -------
956 threshold: float or 1-D array
957 The computed threshold.
959 """
960 if win_size:
961 threshold = np.zeros(len(data))
962 for inx0 in range(0, len(data) - win_size//2, win_size//2):
963 inx1 = inx0 + win_size
964 window_min = np.min(data[inx0:inx1])
965 window_max = np.max(data[inx0:inx1])
966 threshold[inx0:inx1] = (window_max - window_min) * thresh_fac
967 return threshold
969 else:
970 return (np.max(data) - np.min(data)) * thresh_fac
973def percentile_threshold(data, win_size=None, thresh_fac=1.0, percentile=1.0):
974 """Estimate a threshold for peak detection based on an inter-percentile range of the data.
976 The threshold is computed as the range between the percentile and
977 100.0-percentile percentiles of the data multiplied with
978 thresh_fac.
980 For very small values of `percentile` the estimated threshold
981 approaches the one returned by `minmax_threshold()` (for same
982 values of `thresh_fac`). For `percentile=16.0` and Gaussian
983 distributed data, the returned theshold is twice the one returned
984 by `std_threshold()` or `hist_threshold()`, i.e. twice the
985 standard deviation.
987 If you have knowledge about how many data points are in the tails of
988 the distribution, then this method is preferred over
989 `hist_threshold()`. For example, if you expect peaks that you want
990 to detect using `detect_peaks()` at an average rate of 10Hz and
991 these peaks are about 1ms wide, then you have a 1ms peak per 100ms
992 period, i.e. the peaks make up 1% of the distribution. So you should
993 set `percentile=1.0` or lower. For much lower percentile values, you
994 might choose `thresh_fac` slightly smaller than one to capture also
995 smaller peaks. Setting `percentile` slightly higher might not change
996 the estimated threshold too much, since you start incorporating the
997 noise floor with a large density, but you may want to set
998 `thresh_fac` larger than one to reduce false detections.
1000 If `win_size` is given, then the threshold is computed for
1001 half-overlapping windows of size `win_size` separately. In this
1002 case the returned threshold is an array of the same size as data.
1003 Without a win_size a single threshold value determined from the
1004 whole data array is returned.
1006 Parameters
1007 ----------
1008 data: 1-D array
1009 The data to be analyzed.
1010 win_size: int or None
1011 Size of window in which a threshold value is computed.
1012 percentile: float
1013 The interpercentile range is computed at percentile and
1014 100.0-percentile.
1015 If zero, compute maximum minus minimum data value as the
1016 interpercentile range.
1017 thresh_fac: float
1018 Factor by which the inter-percentile range of the data is
1019 multiplied to set the threshold.
1021 Returns
1022 -------
1023 threshold: float or 1-D array
1024 The computed threshold.
1026 """
1027 if percentile < 1e-8:
1028 return minmax_threshold(data, win_size=win_size,
1029 thresh_fac=thresh_fac)
1030 if win_size:
1031 threshold = np.zeros(len(data))
1032 for inx0 in range(0, len(data) - win_size//2, win_size//2):
1033 inx1 = inx0 + win_size
1034 threshold[inx0:inx1] = np.squeeze(np.abs(np.diff(
1035 np.percentile(data[inx0:inx1], [100.0 - percentile, percentile])))) * thresh_fac
1036 return threshold
1037 else:
1038 return np.squeeze(np.abs(np.diff(
1039 np.percentile(data, [100.0 - percentile, percentile])))) * thresh_fac
1042def snippets(data, indices, start=-10, stop=10):
1043 """Cut out data arround each position given in indices.
1045 Parameters
1046 ----------
1047 data: 1-D array
1048 Data array from which snippets are extracted.
1049 indices: list of int
1050 Indices around which snippets are cut out.
1051 start: int
1052 Each snippet starts at index + start.
1053 stop: int
1054 Each snippet ends at index + stop.
1056 Returns
1057 -------
1058 snippet_data: 2-D array
1059 The snippets: first index number of snippet, second index time.
1060 """
1061 idxs = indices[(indices>=-start) & (indices<len(data)-stop)]
1062 snippet_data = np.empty((len(idxs), stop-start))
1063 for k, idx in enumerate(idxs):
1064 snippet_data[k] = data[idx+start:idx+stop]
1065 # XXX alternative: check speed and behavior for empty idxs
1066 # snippets = np.vstack([data[idx+start:idx+stop] for idx in idxs])
1067 return snippet_data
1070def detect_dynamic_peaks(data, threshold, min_thresh, tau, time=None,
1071 check_peak_func=None, check_trough_func=None, **kwargs):
1072 """Detect peaks and troughs using a relative threshold.
1074 The threshold decays dynamically towards min_thresh with time
1075 constant tau. Use `check_peak_func` or `check_trough_func` to
1076 reset the threshold to an appropriate size.
1078 Based on Bryan S. Todd and David C. Andrews (1999): The
1079 identification of peaks in physiological signals. Computers and
1080 Biomedical Research 32, 322-335.
1082 Parameters
1083 ----------
1084 data: array
1085 An 1-D array of input data where peaks are detected.
1086 threshold: float
1087 A positive number setting the minimum distance between peaks and troughs.
1088 min_thresh: float
1089 The minimum value the threshold is allowed to assume.
1090 tau: float
1091 The time constant of the the decay of the threshold value
1092 given in indices (`time` is None) or time units (`time` is not `None`).
1093 time: array
1094 The (optional) 1-D array with the time corresponding to the data values.
1095 check_peak_func: function
1096 An optional function to be used for further evaluating and analysing a peak.
1097 The signature of the function is
1099 ```
1100 r, th = check_peak_func(time, data, peak_inx, index, min_inx, threshold, **kwargs)
1101 ```
1103 with the arguments:
1105 - time (array): the full time array that might be None
1106 - data (array): the full data array
1107 - peak_inx (int): the index of the detected peak
1108 - index (int): the current index
1109 - min_inx (int): the index of the trough preceeding the peak (might be 0)
1110 - threshold (float): the threshold value
1111 - min_thresh (float): the minimum value the threshold is allowed to assume.
1112 - tau (float): the time constant of the the decay of the threshold value
1113 given in indices (time is None) or time units (time is not None)
1114 - **kwargs: further keyword arguments provided by the user
1115 - r (scalar or np.array): a single number or an array with properties of the peak or None to skip the peak
1116 - th (float): a new value for the threshold or None (to keep the original value)
1117 check_trough_func: function
1118 An optional function to be used for further evaluating and analysing a trough.
1119 The signature of the function is
1121 ```
1122 r, th = check_trough_func(time, data, trough_inx, index, max_inx, threshold, **kwargs)
1123 ```
1125 with the arguments:
1127 - time (array): the full time array that might be None
1128 - data (array): the full data array
1129 - trough_inx (int): the index of the detected trough
1130 - index (int): the current index
1131 - max_inx (int): the index of the peak preceeding the trough (might be 0)
1132 - threshold (float): the threshold value
1133 - min_thresh (float): the minimum value the threshold is allowed to assume.
1134 - tau (float): the time constant of the the decay of the threshold value
1135 given in indices (time is None) or time units (time is not None)
1136 - **kwargs: further keyword arguments provided by the user
1137 - r (scalar or np.array): a single number or an array with properties of the trough or None to skip the trough
1138 - th (float): a new value for the threshold or None (to keep the original value)
1139 kwargs: key-word arguments
1140 Arguments passed on to `check_peak_func` and `check_trough_func`.
1142 Returns
1143 -------
1144 peak_list: array
1145 List of peaks.
1146 trough_list: array
1147 List of troughs.
1149 - If time is `None` and no `check_peak_func` is given,
1150 then these are lists of the indices where the peaks/troughs occur.
1151 - If `time` is given and no `check_peak_func`/`check_trough_func` is given,
1152 then these are lists of the times where the peaks/troughs occur.
1153 - If `check_peak_func` or `check_trough_func` is given,
1154 then these are lists of whatever `check_peak_func`/`check_trough_func` return.
1156 Raises
1157 ------
1158 ValueError:
1159 If `threshold <= 0` or `min_thresh <= 0` or `tau <= 0`.
1160 IndexError:
1161 If `data` and `time` arrays differ in length.
1163 """
1164 if threshold <= 0:
1165 raise ValueError('input argument threshold must be positive!')
1166 if min_thresh <= 0:
1167 raise ValueError('input argument min_thresh must be positive!')
1168 if tau <= 0:
1169 raise ValueError('input argument tau must be positive!')
1170 if time is not None and len(data) != len(time):
1171 raise IndexError('input arrays time and data must have same length!')
1173 peaks_list = list()
1174 troughs_list = list()
1176 # initialize:
1177 direction = 0
1178 min_inx = 0
1179 max_inx = 0
1180 min_value = data[0]
1181 max_value = min_value
1183 # loop through the data:
1184 for index, value in enumerate(data):
1186 # decaying threshold (first order low pass filter):
1187 if time is None:
1188 threshold += (min_thresh - threshold) / tau
1189 else:
1190 idx = index
1191 if idx + 1 >= len(data):
1192 idx = len(data) - 2
1193 threshold += (min_thresh - threshold) * (time[idx + 1] - time[idx]) / tau
1195 # rising?
1196 if direction > 0:
1197 # if the new value is bigger than the old maximum: set it as new maximum:
1198 if value > max_value:
1199 max_inx = index # maximum element
1200 max_value = value
1202 # otherwise, if the new value is falling below the maximum value minus the threshold:
1203 # the maximum is a peak!
1204 elif max_value >= value + threshold:
1205 # check and update peak with the check_peak_func function:
1206 if check_peak_func:
1207 r, th = check_peak_func(time, data, max_inx, index,
1208 min_inx, threshold,
1209 min_thresh=min_thresh, tau=tau, **kwargs)
1210 if r is not None:
1211 # this really is a peak:
1212 peaks_list.append(r)
1213 if th is not None:
1214 threshold = th
1215 if threshold < min_thresh:
1216 threshold = min_thresh
1217 else:
1218 # this is a peak:
1219 if time is None:
1220 peaks_list.append(max_inx)
1221 else:
1222 peaks_list.append(time[max_inx])
1224 # change direction:
1225 min_inx = index # minimum element
1226 min_value = value
1227 direction = -1
1229 # falling?
1230 elif direction < 0:
1231 if value < min_value:
1232 min_inx = index # minimum element
1233 min_value = value
1235 elif value >= min_value + threshold:
1236 # there was a trough:
1238 # check and update trough with the check_trough function:
1239 if check_trough_func:
1240 r, th = check_trough_func(time, data, min_inx, index,
1241 max_inx, threshold,
1242 min_thresh=min_thresh, tau=tau, **kwargs)
1243 if r is not None:
1244 # this really is a trough:
1245 troughs_list.append(r)
1246 if th is not None:
1247 threshold = th
1248 if threshold < min_thresh:
1249 threshold = min_thresh
1250 else:
1251 # this is a trough:
1252 if time is None:
1253 troughs_list.append(min_inx)
1254 else:
1255 troughs_list.append(time[min_inx])
1257 # change direction:
1258 max_inx = index # maximum element
1259 max_value = value
1260 direction = 1
1262 # don't know direction yet:
1263 else:
1264 if max_value >= value + threshold:
1265 direction = -1 # falling
1266 elif value >= min_value + threshold:
1267 direction = 1 # rising
1269 if max_value < value:
1270 max_inx = index # maximum element
1271 max_value = value
1273 elif value < min_value:
1274 min_inx = index # minimum element
1275 min_value = value
1277 return np.asarray(peaks_list), np.asarray(troughs_list)
1280def accept_peak_size_threshold(time, data, event_inx, index, min_inx, threshold,
1281 min_thresh, tau, thresh_ampl_fac=0.75, thresh_weight=0.02):
1282 """Accept each detected peak/trough and return its index or time.
1284 Adjust the threshold to the size of the detected peak.
1285 To be passed to the `detect_dynamic_peaks()` function.
1287 Parameters
1288 ----------
1289 time: array
1290 Time values, can be `None`.
1291 data: array
1292 The data in wich peaks and troughs are detected.
1293 event_inx: int
1294 Index of the current peak/trough.
1295 index: int
1296 Current index.
1297 min_inx: int
1298 Index of the previous trough/peak.
1299 threshold: float
1300 Threshold value.
1301 min_thresh: float
1302 The minimum value the threshold is allowed to assume..
1303 tau: float
1304 The time constant of the the decay of the threshold value
1305 given in indices (`time` is `None`) or time units (`time` is not `None`).
1306 thresh_ampl_fac: float
1307 The new threshold is `thresh_ampl_fac` times the size of the current peak.
1308 thresh_weight: float
1309 New threshold is weighted against current threshold with `thresh_weight`.
1311 Returns
1312 -------
1313 index: int
1314 Index of the peak/trough if `time` is `None`.
1315 time: int
1316 Time of the peak/trough if `time` is not `None`.
1317 threshold: float
1318 The new threshold to be used.
1319 """
1320 size = data[event_inx] - data[min_inx]
1321 threshold += thresh_weight * (thresh_ampl_fac * size - threshold)
1322 if time is None:
1323 return event_inx, threshold
1324 else:
1325 return time[event_inx], threshold
1328def main():
1329 print('Checking eventetection module ...')
1330 print('')
1331 # generate data:
1332 dt = 0.001
1333 time = np.arange(0.0, 10.0, dt)
1334 f = 2.0
1335 data = (0.5 * np.sin(2.0 * np.pi * f * time) + 0.5) ** 4.0
1336 data += -0.1 * time * (time - 10.0)
1337 data += 0.1 * np.random.randn(len(data))
1339 print(f'generated waveform with {int(np.round(time[-1] * f))} peaks')
1340 plt.plot(time, data)
1342 print('')
1343 print('check detect_peaks(data, 1.0)...')
1344 peaks, troughs = detect_peaks(data, 1.0)
1345 # print peaks:
1346 print(f'detected {len(peaks)} peaks with period {np.mean(np.diff(peaks)):.1f} that differs from the real frequency by {f - 1.0 / np.mean(np.diff(peaks)) / np.mean(np.diff(time)):.3f}')
1347 # print troughs:
1348 print(f'detected {len(troughs)} troughs with period {np.mean(np.diff(troughs)):.1f} that differs from the real frequency by {f - 1.0 / np.mean(np.diff(troughs)) / np.mean(np.diff(time)):.3f}')
1350 # plot peaks and troughs:
1351 plt.plot(time[peaks], data[peaks], '.r', ms=20)
1352 plt.plot(time[troughs], data[troughs], '.g', ms=20)
1354 # detect threshold crossings:
1355 onsets, offsets = threshold_crossings(data, 3.0)
1356 onsets, offsets = merge_events(onsets, offsets, int(0.5/f/dt))
1357 plt.plot(time, 3.0*np.ones(len(time)), 'k')
1358 plt.plot(time[onsets], data[onsets], '.c', ms=20)
1359 plt.plot(time[offsets], data[offsets], '.b', ms=20)
1361 plt.ylim(-0.5, 4.0)
1362 plt.show()
1364 # timing of the detect_peaks() algorithm:
1365 import timeit
1366 def wrapper(func, *args, **kwargs):
1367 def wrapped():
1368 return func(*args, **kwargs)
1369 return wrapped
1370 wrapped = wrapper(detect_peaks, data, 1.0)
1371 t1 = timeit.timeit(wrapped, number=200)
1372 print(t1)
1375if __name__ == '__main__':
1376 main()