Coverage for src/thunderfish/bestwindow.py: 84%
284 statements
« prev ^ index » next coverage.py v7.7.1, created at 2025-03-23 22:57 +0000
« prev ^ index » next coverage.py v7.7.1, created at 2025-03-23 22:57 +0000
1"""
2Select the best region within a recording with the most stable signal of largest amplitude that is not clipped.
4## Main functions
5- `clip_amplitudes()`: estimated clipping amplitudes from the data.
6- `best_window_indices()`: select start- and end-indices of the best window
7- `best_window_times()`: select start end end-time of the best window
8- `best_window()`: return data of the best window
9- `analysis_window()`: set clipping amplitudes and find analysis window.
11## Configuration
12- `add_clip_config()`: add parameters for `clip_amplitudes()` to configuration.
13- `clip_args()`: retrieve parameters for `clip_amplitudes()` from configuration.
14- `add_best_window_config()`: add parameters for `best_window()` to configuration.
15- `best_window_args()`: retrieve parameters for `best_window*()` from configuration.
17## Visualization
18- `plot_clipping()`: visualization of the algorithm for detecting clipped amplitudes in `clip_amplitudes()`.
19- `plot_best_window()`: visualization of the algorithm used in `best_window_indices()`.
20- `plot_data_window()`: plot the data and the selected analysis window.
21"""
23import numpy as np
24from thunderlab.eventdetection import percentile_threshold, detect_peaks, trim_to_peak
25from audioio import unwrap
26try:
27 import matplotlib.pyplot as plt
28 import matplotlib.ticker as ticker
29except ImportError:
30 pass
33def clip_amplitudes(data, win_indices, min_fac=2.0, nbins=20,
34 min_ampl=None, max_ampl=1.0,
35 plot_hist_func=None, **kwargs):
36 """Find the amplitudes where the signal clips.
38 Histograms in data segements of win_indices length are analyzed.
39 If the bins at the edges are more than min_fac times as large as
40 the neighboring bins, clipping at the bin's amplitude is assumed.
42 Parameters
43 ----------
44 data: 1-D array
45 The data.
46 win_indices: int
47 Size of the analysis window in indices.
48 min_fac: float
49 If the first or the second bin is at least `min_fac` times
50 as large as the third bin, their upper bin edge is set as min_clip.
51 Likewise for the last and next-to last bin.
52 nbins: int
53 Number of bins used for computing a histogram within `min_ampl` and `max_ampl`.
54 min_ampl: float or None
55 Minimum to be expected amplitude of the data.
56 If set to None, set it to the negative of max_ampl.
57 max_ampl: float
58 Maximum to be expected amplitude of the data
59 plot_hist_func: function
60 Function for visualizing the histograms, is called for every window.
61 `plot_clipping()` is a simple function that can be passed as `plot_hist_func`
62 to quickly visualize what is going on in `clip_amplitudes()`.
64 Signature:
66 `plot_hist_func(data, winx0, winx1, bins, h, min_clip, max_clip,
67 min_ampl, max_ampl, kwargs)`
69 with the arguments:
71 - `data` (array): the full data array.
72 - `winx0` (int): the start index of the current window.
73 - `winx1` (int): the end index of the current window.
74 - `bins` (array): the bin edges of the histogram.
75 - `h` (array): the histogram, plot it with
76 ```
77 plt.bar(bins[:-1], h, width=np.mean(np.diff(bins)))
78 ```
79 - `min_clip` (float): the current value of the minimum clip amplitude.
80 - `max_clip` (float): the current value of the minimum clip amplitude.
81 - `min_ampl` (float): the minimum amplitude of the data.
82 - `max_ampl` (float): the maximum amplitude of the data.
83 - `kwargs` (dict): further user supplied key-word arguments.
85 Returns
86 -------
87 min_clip: float
88 Minimum amplitude that is not clipped.
89 max_clip: float
90 Maximum amplitude that is not clipped.
91 """
92 if min_ampl is None:
93 min_ampl = -max_ampl
94 min_clipa = min_ampl
95 max_clipa = max_ampl
96 bins = np.linspace(min_ampl, max_ampl, nbins, endpoint=True)
97 win_tinxs = np.arange(0, len(data) - win_indices, win_indices)
98 for wtinx in win_tinxs:
99 h, b = np.histogram(data[wtinx:wtinx + win_indices], bins)
100 if h[0] > min_fac * h[2] and b[0] < 0.4*min_ampl:
101 if h[1] > min_fac * h[2] and b[2] > min_clipa:
102 min_clipa = b[2]
103 elif b[1] > min_clipa:
104 min_clipa = b[1]
105 if h[-1] > min_fac * h[-3] and b[-1] > 0.4*max_ampl:
106 if h[-2] > min_fac * h[-3] and b[-3] < max_clipa:
107 max_clipa = b[-3]
108 elif b[-2] < max_clipa:
109 max_clipa = b[-2]
110 if plot_hist_func:
111 plot_hist_func(data, wtinx, wtinx + win_indices,
112 b, h, min_clipa, max_clipa,
113 min_ampl, max_ampl, **kwargs)
114 return min_clipa, max_clipa
117def plot_clipping(data, winx0, winx1, bins,
118 h, min_clip, max_clip, min_ampl, max_ampl):
119 """Visualize the data histograms and the detected clipping amplitudes.
121 Pass this function as the `plot_hist_func` argument to `clip_amplitudes()`.
122 """
123 fig, (ax1, ax2) = plt.subplots(2, 1)
124 ax1.plot(data[winx0:winx1], 'b')
125 ax1.axhline(min_clip, color='r')
126 ax1.axhline(max_clip, color='r')
127 ax1.set_ylim(-1.0, 1.0)
128 ax2.bar(bins[:-1], h, width=np.mean(np.diff(bins)))
129 ax2.axvline(min_clip, color='r')
130 ax2.axvline(max_clip, color='r')
131 ax2.set_xlim(-1.0, 1.0)
132 plt.show()
135def add_clip_config(cfg, min_clip=0.0, max_clip=0.0,
136 window=1.0, min_fac=2.0, nbins=20,
137 min_ampl=-1.0, max_ampl=1.0):
138 """Add parameter needed for `clip_amplitudes()` as a new section to a configuration.
140 Parameters
141 ----------
142 cfg: ConfigFile
143 The configuration.
144 min_clip: float
145 Default minimum clip amplitude.
146 max_clip: float
147 Default maximum clip amplitude.
149 See `clip_amplitudes()` for details on the remaining arguments.
150 """
151 cfg.add_section('Clipping amplitudes:')
152 cfg.add('minClipAmplitude', min_clip, '', 'Minimum amplitude that is not clipped. If zero estimate from data.')
153 cfg.add('maxClipAmplitude', max_clip, '', 'Maximum amplitude that is not clipped. If zero estimate from data.')
154 cfg.add('clipWindow', window, 's', 'Window size for estimating clip amplitudes.')
155 cfg.add('clipBins', nbins, '', 'Number of bins used for constructing histograms of signal amplitudes.')
156 cfg.add('minClipFactor', min_fac, '',
157 'Edge bins of the histogram of clipped signals have to be larger then their neighbors by this factor.')
160def clip_args(cfg, rate):
161 """Translates a configuration to the respective parameter names of `clip_amplitudes()`.
163 The return value can then be passed as key-word arguments to this
164 function.
166 Parameters
167 ----------
168 cfg: ConfigFile
169 The configuration.
170 rate: float
171 Sampling rate of the data.
173 Returns
174 -------
175 a: dict
176 Dictionary with names of arguments of the `clip_amplitudes()` function
177 and their values as supplied by `cfg`.
178 """
179 a = cfg.map({'min_fac': 'minClipFactor',
180 'nbins': 'clipBins'})
181 a['win_indices'] = int(cfg.value('clipWindow')*rate)
182 return a
185def best_window_indices(data, rate, expand=False, win_size=1.,
186 win_shift=0.5, thresh_fac=0.8, percentile=0.1,
187 min_clip=-np.inf, max_clip=np.inf,
188 w_cv_interv=1.0, w_ampl=1.0, w_cv_ampl=1.0,
189 tolerance=0.2, plot_data_func=None, **kwargs):
190 """Find the window within data most suitable for subsequent analysis.
192 First, large peaks and troughs of the data are detected. Peaks and
193 troughs have to be separated in amplitude by at least the value of a
194 dynamic threshold. The threshold is computed in `win_shift` wide
195 windows as `thresh_fac` times the interpercentile range at
196 the `percentile`-th and 100.0-`percentile`-th percentile of the data
197 using the `thunderlab.eventdetection.percentile_threshold()` function.
199 Second, criteria for selecting the best window are computed for each
200 window of width `win_size` shifted by `win_shift` trough the data. The
201 three criteria are:
203 - the mean peak-to-trough amplitude multiplied with the fraction of
204 non clipped peak and trough amplitudes.
205 - the coefficient of variation of the peak-to-trough amplitude.
206 - the coefficient of variation of the inter-peak and inter-trough
207 intervals.
209 Third, a cost function is computed as a weighted sum of the three
210 criteria (the mean amplitude is taken negatively). The respective
211 weights are given by `w_ampl`, `w_cv_ampl`, and `w_cv_interv`.
213 Finally, a threshold is set to the minimum value of the cost
214 function plus tolerance. Then the largest region with the cost
215 function below this threshold is selected as the best window. If
216 `expand` is `False`, then only the single window with smallest cost
217 within the selected largest region is returned.
219 The data used by best window algorithm can be visualized by supplying the
220 function `plot_data_func`. Additional arguments for this function can
221 be supplied via key-word arguments `kwargs`.
223 Parameters
224 ----------
225 data: 1-D array
226 The data to be analyzed.
227 rate: float
228 Sampling rate of the data in Hertz.
229 expand: boolean
230 If `False` return only the single window with the smallest cost.
231 If `True` return the largest window with the cost below the minimum cost
232 plus tolerance.
233 win_size: float
234 Minimum size of the desired best window in seconds.
235 Choose it large enough for the subsequent analysis.
236 win_shift: float
237 Time shift in seconds between windows. Should be smaller or equal to `win_size`.
238 percentile: float
239 `percentile` parameter for the `thunderlab.eventdetection.percentile_threshold()` function
240 used to estimate thresholds for detecting peaks in the data.
241 thresh_fac: float
242 `thresh_fac` parameter for the `thunderlab.eventdetection.percentile_threshold()` function
243 used to estimate thresholds for detecting peaks in the data.
244 min_clip: float
245 Minimum amplitude below which data are clipped.
246 max_clip: float
247 Maximum amplitude above which data are clipped.
248 w_cv_interv: float
249 Weight for the coefficient of variation of the intervals between detected
250 peaks and throughs.
251 w_ampl: float
252 Weight for the mean peak-to-trough amplitude.
253 w_cv_ampl: float
254 Weight for the coefficient of variation of the amplitudes.
255 tolerance: float
256 Added to the minimum cost for expanding the region of the best window.
257 plot_data_func: function
258 Function for plotting the raw data, detected peaks and troughs, the criteria,
259 the cost function and the selected best window.
260 `plot_best_window()` is a simple function that can be passed as the `plot_data_func`
261 parameter to quickly visualize what is going on in selecting the best window.
263 Signature:
265 ````
266 plot_data_func(data, rate, peak_thresh, peak_idx, trough_idx,
267 idx0, idx1, win_start_times, cv_interv, mean_ampl,
268 cv_ampl, clipped_frac, cost_thresh, thresh,
269 valid_wins, **kwargs)
270 ```
272 with the arguments:
274 - `data` (array): raw data.
275 - `rate` (float): sampling rate of the data.
276 - `peak_thresh` (array): thresholds used for detecting peaks and troughs in each data window.
277 - `peak_idx` (array): indices into raw data indicating detected peaks.
278 - `trough_idx` (array): indices into raw data indicating detected troughs.
279 - `idx0` (int): index of the start of the best window.
280 - `idx1` (int): index of the end of the best window.
281 - `win_start_times` (array): times of the analysis windows.
282 - `cv_interv` (array): coefficients of variation of the inter-peak and -trough
283 intervals.
284 - `mean_ampl` (array): mean peak-to-trough amplitudes.
285 - `cv_ampl` (array): coefficients of variation of the peak-to-trough amplitudes.
286 - `clipped_frac` (array): fraction of clipped peaks or troughs.
287 - `cost` (array): cost function.
288 - `cost_thresh` (float): threshold for the cost function.
289 - `valid_wins` (array): boolean array indicating the windows which fulfill
290 all three criteria.
291 - `**kwargs` (dict): further user supplied key-word arguments.
292 kwargs: dict
293 Keyword arguments passed to `plot_data_func`.
295 Returns
296 -------
297 start_index: int
298 Index of the start of the best window.
299 end_index: int
300 Index of the end of the best window.
301 clipped: float.
302 The fraction of clipped peaks or troughs.
304 Raises
305 ------
306 UserWarning
307 - Not enough data for requested `win_size`.
308 - No peaks detected.
309 - No finite amplitudes detected.
310 - No valid interval CV detected.
311 - No valid amplitude CV detected.
312 """
313 # too little data:
314 if len(data)/rate < win_size:
315 raise UserWarning(f'not enough data (data={len(data) / rate:g}s, win={win_size:g}s)')
317 # threshold for peak detection:
318 threshold = percentile_threshold(data, int(win_shift*rate),
319 thresh_fac=thresh_fac,
320 percentile=percentile)
322 # detect large peaks and troughs:
323 peak_idx, trough_idx = detect_peaks(data, threshold)
324 if len(peak_idx) == 0 or len(trough_idx) == 0:
325 raise UserWarning('no peaks or troughs detected')
327 # compute cv of intervals, mean peak amplitude and its cv:
328 invalid_cv = 1000.0
329 win_size_indices = int(win_size*rate)
330 win_start_inxs = np.arange(0, len(data) - win_size_indices,
331 int(0.5*win_shift*rate))
332 if len(win_start_inxs) == 0:
333 win_start_inxs = [0]
334 cv_interv = np.zeros(len(win_start_inxs))
335 mean_ampl = np.zeros(len(win_start_inxs))
336 cv_ampl = np.zeros(len(win_start_inxs))
337 clipped_frac = np.zeros(len(win_start_inxs))
338 for i, wtinx in enumerate(win_start_inxs):
339 # indices of peaks and troughs inside analysis window:
340 pinx = (peak_idx >= wtinx) & (peak_idx <= wtinx + win_size_indices)
341 tinx = (trough_idx >= wtinx) & (trough_idx <= wtinx + win_size_indices)
342 p_idx, t_idx = trim_to_peak(peak_idx[pinx], trough_idx[tinx])
343 # interval statistics:
344 ipis = np.diff(p_idx)
345 itis = np.diff(t_idx)
346 if len(ipis) > 2:
347 cv_interv[i] = 0.5 * (np.std(ipis) / np.mean(ipis) + np.std(itis) / np.mean(itis))
348 # penalize regions without detected peaks:
349 mean_interv = np.mean(ipis)
350 if p_idx[0] - wtinx > mean_interv:
351 cv_interv[i] *= (p_idx[0] - wtinx) / mean_interv
352 if wtinx + win_size_indices - p_idx[-1] > mean_interv:
353 cv_interv[i] *= (wtinx + win_size_indices - p_idx[-1]) / mean_interv
354 else:
355 cv_interv[i] = invalid_cv
356 # statistics of peak-to-trough amplitude:
357 p2t_ampl = data[p_idx] - data[t_idx]
358 if len(p2t_ampl) > 2:
359 mean_ampl[i] = np.mean(p2t_ampl)
360 cv_ampl[i] = np.std(p2t_ampl) / mean_ampl[i]
361 # penalize for clipped peaks:
362 clipped_frac[i] = float(np.sum(data[p_idx] > max_clip) +
363 np.sum(data[t_idx] < min_clip)) / 2.0 / len(p2t_ampl)
364 mean_ampl[i] *= (1.0 - clipped_frac[i]) ** 2.0
365 else:
366 mean_ampl[i] = 0.0
367 cv_ampl[i] = invalid_cv
369 # check:
370 if len(mean_ampl[mean_ampl >= 0.0]) < 0:
371 raise UserWarning('no finite amplitudes detected')
372 if len(cv_interv[cv_interv < invalid_cv]) <= 0:
373 raise UserWarning('no valid interval cv detected')
374 if len(cv_ampl[cv_ampl < invalid_cv]) <= 0:
375 raise UserWarning('no valid amplitude cv detected')
377 # cost function:
378 cost = w_cv_interv * cv_interv + w_cv_ampl * cv_ampl - w_ampl * mean_ampl
379 thresh = np.min(cost) + tolerance
381 # find largest region with low costs:
382 valid_win_idx = np.nonzero(cost <= thresh)[0]
383 cidx0 = valid_win_idx[0] # start of current window
384 cidx1 = cidx0 + 1 # end of current window
385 win_idx0 = cidx0 # start of largest window
386 win_idx1 = cidx1 # end of largest window
387 i = 1
388 while i < len(valid_win_idx): # loop through all valid window positions
389 if valid_win_idx[i] == valid_win_idx[i - 1] + 1:
390 cidx1 = valid_win_idx[i] + 1
391 else:
392 cidx0 = valid_win_idx[i]
393 if cidx1 - cidx0 > win_idx1 - win_idx0: # current window is largest
394 win_idx0 = cidx0
395 win_idx1 = cidx1
396 i += 1
398 # find single best window within the largest region:
399 if not expand:
400 win_idx0 += np.argmin(cost[win_idx0:win_idx1])
401 win_idx1 = win_idx0 + 1
403 # retrive indices of best window for data:
404 idx0 = win_start_inxs[win_idx0]
405 idx1 = win_start_inxs[win_idx1 - 1] + win_size_indices
407 # clipped data?
408 clipped = np.mean(clipped_frac[win_idx0:win_idx1])
410 if plot_data_func:
411 plot_data_func(data, rate, threshold, peak_idx, trough_idx,
412 idx0, idx1, win_start_inxs/rate, cv_interv,
413 mean_ampl, cv_ampl, clipped_frac, cost, thresh,
414 win_idx0, win_idx1, **kwargs)
416 return idx0, idx1, clipped
419def best_window_times(data, rate, expand=False, win_size=1.,
420 win_shift=0.5, thresh_fac=0.8, percentile=0.1,
421 min_clip=-np.inf, max_clip=np.inf,
422 w_cv_interv=1.0, w_ampl=1.0, w_cv_ampl=1.0,
423 tolerance=0.2, plot_data_func=None, **kwargs):
424 """Find the window within data most suitable for subsequent analysis.
426 See `best_window_indices()` for details.
428 Returns
429 -------
430 start_time: float
431 Time of the start of the best window.
432 end_time: float
433 Time of the end of the best window.
434 clipped: float
435 The fraction of clipped peaks or troughs.
436 """
437 start_inx, end_inx, clipped = best_window_indices(data, rate,
438 expand,
439 win_size,
440 win_shift,
441 thresh_fac,
442 percentile,
443 min_clip,
444 max_clip,
445 w_cv_interv,
446 w_ampl,
447 w_cv_ampl,
448 tolerance,
449 plot_data_func,
450 **kwargs)
451 return start_inx/rate, end_inx/rate, clipped
454def best_window(data, rate, expand=False, win_size=1., win_shift=0.5,
455 thresh_fac=0.8, percentile=0.1, min_clip=-np.inf, max_clip=np.inf,
456 w_cv_interv=1.0, w_ampl=1.0, w_cv_ampl=1.0, tolerance=0.2,
457 plot_data_func=None, **kwargs):
458 """Find the window within data most suitable for subsequent analysis.
460 See `best_window_indices()` for details.
462 Returns
463 -------
464 data: array
465 The data of the best window.
466 clipped: float
467 The fraction of clipped peaks or troughs.
468 """
469 start_inx, end_inx, clipped = best_window_indices(data, rate, expand,
470 win_size, win_shift,
471 thresh_fac, percentile,
472 min_clip, max_clip,
473 w_cv_interv, w_ampl, w_cv_ampl,
474 tolerance, plot_data_func, **kwargs)
475 return data[start_inx:end_inx], clipped
478def plot_best_window(data, rate, threshold, peak_idx, trough_idx, idx0, idx1,
479 win_times, cv_interv, mean_ampl, cv_ampl, clipped_frac,
480 cost, thresh, win_idx0, win_idx1, ax):
481 """Visualize the cost function of used for finding the best window for analysis.
483 Pass this function as the `plot_data_func` to the `best_window_*` functions.
485 Parameters
486 ----------
487 See documentation of the `best_window_indices()` functions.
488 """
489 # raw data:
490 time = np.arange(0.0, len(data))/rate
491 ax[0].plot(time, data, 'b', lw=3)
492 if np.mean(clipped_frac[win_idx0:win_idx1]) > 0.01:
493 ax[0].plot(time[idx0:idx1], data[idx0:idx1], color='magenta', lw=3)
494 else:
495 ax[0].plot(time[idx0:idx1], data[idx0:idx1], color='grey', lw=3)
496 ax[0].plot(time[peak_idx], data[peak_idx], 'o', mfc='red', ms=6)
497 ax[0].plot(time[trough_idx], data[trough_idx], 'o', mfc='green', ms=6)
498 ax[0].plot(time, threshold, '#CCCCCC', lw=2)
499 up_lim = np.max(data) * 1.05
500 down_lim = np.min(data) * .95
501 ax[0].set_ylim((down_lim, up_lim))
502 ax[0].set_ylabel('Amplitude [a.u]')
504 # cv of inter-peak intervals:
505 ax[1].plot(win_times[cv_interv < 1000.0], cv_interv[cv_interv < 1000.0], 'o', ms=10, color='grey', mew=2.,
506 mec='black', alpha=0.6)
507 ax[1].plot(win_times[win_idx0:win_idx1], cv_interv[win_idx0:win_idx1], 'o', ms=10, color='red', mew=2., mec='black',
508 alpha=0.6)
509 ax[1].set_ylabel('CV intervals')
510 ax[1].set_ylim(bottom=0.0)
512 # mean amplitude:
513 ax[2].plot(win_times[mean_ampl > 0.0], mean_ampl[mean_ampl > 0.0], 'o', ms=10, color='grey', mew=2., mec='black',
514 alpha=0.6)
515 ax[2].plot(win_times[win_idx0:win_idx1], mean_ampl[win_idx0:win_idx1], 'o', ms=10, color='red', mew=2., mec='black',
516 alpha=0.6)
517 ax[2].set_ylabel('Mean amplitude [a.u]')
518 ax[2].set_ylim(bottom=0.0)
520 # cv:
521 ax[3].plot(win_times[cv_ampl < 1000.0], cv_ampl[cv_ampl < 1000.0], 'o', ms=10, color='grey', mew=2., mec='black',
522 alpha=0.6)
523 ax[3].plot(win_times[win_idx0:win_idx1], cv_ampl[win_idx0:win_idx1], 'o', ms=10, color='red', mew=2., mec='black',
524 alpha=0.6)
525 ax[3].set_ylabel('CV amplitude')
526 ax[3].set_ylim(bottom=0.0)
528 # cost:
529 ax[4].plot(win_times[cost < thresh + 10], cost[cost < thresh + 10], 'o', ms=10, color='grey', mew=2., mec='black',
530 alpha=0.6)
531 ax[4].plot(win_times[win_idx0:win_idx1], cost[win_idx0:win_idx1], 'o', ms=10, color='red', mew=2., mec='black',
532 alpha=0.6)
533 ax[4].axhline(thresh, color='k')
534 ax[4].set_ylabel('Cost')
535 ax[4].set_xlabel('Time [sec]')
538def plot_data_window(ax, data, rate, unit, idx0, idx1, clipped,
539 dstyle=dict(color='tab:blue', lw=1),
540 wstyle=dict(color='tab:red', lw=1)):
541 """Plot the data and mark the analysis window.
543 Parameters
544 ----------
545 ax: matplotlib axes
546 Axes used for plotting.
547 data: 1-D array
548 The full data trace.
549 rate: float
550 Sampling rate of the data in Hertz.
551 unit: string
552 The unit of the data.
553 idx0: int
554 Start index of the best window.
555 idx1: int
556 Stop index of the best window.
557 clipped: float
558 Fraction of clipped peaks.
559 data_color:
560 Color used for plotting the data trace.
561 window_color:
562 Color used for plotting the selected best window.
563 """
564 time = np.arange(len(data))/rate
565 ax.plot(time[:idx0], data[:idx0], **dstyle)
566 ax.plot(time[idx1:], data[idx1:], **dstyle)
567 if idx1 > idx0:
568 ax.plot(time[idx0:idx1], data[idx0:idx1], **wstyle)
569 label = 'analysis\nwindow'
570 if clipped > 0.0:
571 label += f'\n{100.0*clipped:.0f}% clipped'
572 ax.text(time[(idx0+idx1)//2], 0.0, label, ha='center', va='center')
573 ax.set_xlim(time[0], time[-1])
574 ax.set_xlabel('Time [sec]')
575 if len(unit) == 0 or unit == 'a.u.':
576 ax.set_ylabel('Amplitude')
577 else:
578 ax.set_ylabel(f'Amplitude [{unit}]')
579 ax.yaxis.set_major_locator(ticker.MaxNLocator(3))
582def add_best_window_config(cfg, win_pos='best', win_size=1., win_shift=0.5,
583 thresh_fac=0.8, percentile=0.1,
584 min_clip=-np.inf, max_clip=np.inf,
585 w_cv_interv=1.0, w_ampl=1.0, w_cv_ampl=1.0,
586 tolerance=0.2, expand=False):
587 """ Add parameter needed for the `best_window()` functions as a new section to a configuration.
589 Parameters
590 ----------
591 cfg: ConfigFile
592 The configuration.
594 See `best_window_indices()` for details on the remaining arguments.
595 """
596 cfg.add_section('Analysis window:')
597 cfg.add('windowPosition', win_pos, '', 'Position of the analysis window: "beginning", "center", "end", "best", or a time in seconds.')
598 cfg.add('windowSize', win_size, 's', 'Size of the best window. This should be much larger than the expected period of the signal. If 0 select the whole time series.')
599 cfg.add('bestWindowShift', win_shift, 's',
600 'Increment for shifting the analysis windows trough the data. Should be larger than the expected period of the signal.')
601 cfg.add('bestWindowThresholdPercentile', percentile, '%',
602 'Percentile for estimating interpercentile range. Should be smaller than the duty cycle of the periodic signal.')
603 cfg.add('bestWindowThresholdFactor', thresh_fac, '',
604 'Threshold for detecting peaks is interperecntile range of the data times this factor.')
605 cfg.add('weightCVInterval', w_cv_interv, '',
606 'Weight factor for the coefficient of variation of the inter-peak and inter-trough intervals.')
607 cfg.add('weightAmplitude', w_ampl, '',
608 'Weight factor for the mean peak-to-trough amplitudes.')
609 cfg.add('weightCVAmplitude', w_cv_ampl, '',
610 'Weight factor for the coefficient of variation of the peak-to-trough amplitude.')
611 cfg.add('bestWindowTolerance', tolerance, '',
612 'Add this to the minimum value of the cost function to get a threshold for selecting the largest best window.')
613 cfg.add('expandBestWindow', expand, '',
614 'Return the largest valid best window. If False return sole best window. ')
617def best_window_args(cfg):
618 """Translates a configuration to the respective parameter names of the functions `best_window*()`.
620 The return value can then be passed as key-word arguments to these
621 functions.
623 Parameters
624 ----------
625 cfg: ConfigFile
626 The configuration.
628 Returns
629 -------
630 a: dict
631 Dictionary with names of arguments of the `best_window*()` functions
632 and their values as supplied by `cfg`.
633 """
634 return cfg.map({'win_size': 'windowSize',
635 'win_shift': 'bestWindowShift',
636 'percentile': 'bestWindowThresholdPercentile',
637 'thresh_fac': 'bestWindowThresholdFactor',
638 'w_cv_interv': 'weightCVInterval',
639 'w_ampl': 'weightAmplitude',
640 'w_cv_ampl': 'weightCVAmplitude',
641 'tolerance': 'bestWindowTolerance',
642 'expand': 'expandBestWindow'})
645def analysis_window(data, rate, ampl_max, win_pos,
646 cfg, show_bestwindow=False):
647 """Set clipping amplitudes and find analysis window.
649 Parameters
650 ----------
651 data: 1-D array
652 The data to be analyzed.
653 rate: float
654 Sampling rate of the data in Hertz.
655 ampl_max: float
656 Maximum value of input range.
657 win_pos: string or float
658 Position of the analysis window: "beginning", "center", "end" or "best".
659 Alternatively the beginning of the analysis window in seconds.
660 cfg: ConfigFile
661 Configuration for clipping and best window.
662 show_bestwindow: boolean
663 If true show a plot with the best window cost functions.
665 Returns
666 -------
667 data: 1-D array
668 The data array of the best window
669 idx0: int
670 The start index of the best window in the original data.
671 idx1: int
672 The end index of the best window in the original data.
673 clipped: float
674 Fraction of clipped amplitudes.
675 min_clip: float
676 Minimum amplitude that is not clipped.
677 max_clip: float
678 Maximum amplitude that is not clipped.
679 """
680 found_bestwindow = True
681 min_clip = cfg.value('minClipAmplitude')
682 max_clip = cfg.value('maxClipAmplitude')
683 clipped = 0
684 if min_clip == 0.0 or max_clip == 0.0:
685 min_clip, max_clip = clip_amplitudes(data, max_ampl=ampl_max,
686 **clip_args(cfg, rate))
687 if cfg.value('unwrapData'):
688 unwrap(data, 1.5, ampl_max)
689 min_clip *= 2
690 max_clip *= 2
691 # window size parameter:
692 bwa = best_window_args(cfg)
693 if 'win_size' in bwa:
694 del bwa['win_size']
695 window_size = cfg.value('windowSize')
696 if window_size <= 0.0:
697 window_size = (len(data)-1)/rate
698 # show cost function:
699 if win_pos == 'best' and show_bestwindow:
700 fig, ax = plt.subplots(5, sharex=True, figsize=(14., 10.))
701 try:
702 idx0, idx1, clipped = best_window_indices(data, rate,
703 min_clip=min_clip,
704 max_clip=max_clip,
705 win_size=window_size,
706 plot_data_func=plot_best_window,
707 ax=ax, **bwa)
708 plt.show()
709 except UserWarning as e:
710 found_bestwindow = False
711 else:
712 # too little data:
713 n_win = int(window_size*rate)
714 if len(data) < n_win:
715 return data, 0, 0, False, min_clip, max_clip
716 if win_pos == 'best':
717 try:
718 idx0, idx1, clipped = best_window_indices(data, rate,
719 min_clip=min_clip,
720 max_clip=max_clip,
721 win_size=window_size,
722 **bwa)
723 except UserWarning as e:
724 found_bestwindow = False
725 else:
726 if win_pos[:5] == 'begin':
727 idx0 = 0
728 elif win_pos == 'end':
729 idx0 = len(data) - n_win
730 elif win_pos == 'center':
731 idx0 = (len(data) - n_win)//2
732 else:
733 try:
734 if win_pos[-1] == 's':
735 win_pos = win_pos[:-1]
736 t0 = float(win_pos)
737 except ValueError:
738 found_bestwindow = False
739 t0 = 0.0
740 idx0 = int(t0*rate)
741 idx1 = idx0 + n_win
742 if not found_bestwindow or idx1 > len(data):
743 return data, 0, 0, False, min_clip, max_clip
744 data_seg = data[idx0:idx1]
745 # check for clipping:
746 win_shift = cfg.value('bestWindowShift')
747 thresh_fac = cfg.value('bestWindowThresholdFactor')
748 percentile = cfg.value('bestWindowThresholdPercentile')
749 threshold = percentile_threshold(data_seg,
750 int(win_shift*rate),
751 thresh_fac=thresh_fac,
752 percentile=percentile)
753 peak_idx, trough_idx = detect_peaks(data_seg, threshold)
754 p_idx, t_idx = trim_to_peak(peak_idx, trough_idx)
755 if len(p_idx) > 0:
756 p2t_ampl = data_seg[p_idx] - data_seg[t_idx]
757 clipped = float(np.sum(data_seg[p_idx] > max_clip) +
758 np.sum(data_seg[t_idx] < min_clip))/2/len(p2t_ampl)
759 if found_bestwindow:
760 return data[idx0:idx1], idx0, idx1, clipped, min_clip, max_clip
761 else:
762 return data, 0, 0, False, min_clip, max_clip
765def main(data_file=None):
766 title = 'bestwindow'
767 if data_file is None:
768 # generate data:
769 print('generate waveform...')
770 rate = 100000.0
771 time = np.arange(0.0, 1.0, 1.0 / rate)
772 f = 600.0
773 snippets = []
774 amf = 20.0
775 for ampl in [0.2, 0.5, 0.8]:
776 for am_ampl in [0.0, 0.3, 0.9]:
777 data = ampl * np.sin(2.0 * np.pi * f * time) * (1.0 + am_ampl * np.sin(2.0 * np.pi * amf * time))
778 data[data > 1.3] = 1.3
779 data[data < -1.3] = -1.3
780 snippets.extend(data)
781 data = np.asarray(snippets)
782 title = 'test sines'
783 data += 0.01 * np.random.randn(len(data))
784 else:
785 from thunderlab.dataloader import load_data
786 print(f'load {data_file} ...')
787 data, rate, unit, amax = load_data(data_file)
788 data = data[:,0]
789 title = data_file
791 # determine clipping amplitudes:
792 clip_win_size = 0.5
793 min_clip_fac = 2.0
794 min_clip, max_clip = clip_amplitudes(data, int(clip_win_size * rate),
795 min_fac=min_clip_fac)
796 # min_clip, max_clip = clip_amplitudes(data, int(clip_win_size*rate),
797 # min_fac=min_clip_fac,
798 # plot_hist_func=plot_clipping)
800 # setup plots:
801 fig, ax = plt.subplots(5, 1, sharex=True, figsize=(20, 12))
802 fig.canvas.manager.set_window_title(title)
804 # compute best window:
805 best_window_indices(data, rate, expand=False, win_size=4.0,
806 win_shift=0.5, thresh_fac=0.8, percentile=0.1,
807 min_clip=min_clip, max_clip=max_clip,
808 w_cv_ampl=10.0, tolerance=0.5,
809 plot_data_func=plot_best_window, ax=ax)
811 plt.tight_layout()
812 plt.show()
815if __name__ == '__main__':
816 import sys
817 data_file = sys.argv[1] if len(sys.argv) > 1 else None
818 main(data_file)