Coverage for src / thunderfish / bestwindow.py: 82%

284 statements  

« prev     ^ index     » next       coverage.py v7.13.1, created at 2026-01-15 17:50 +0000

1""" 

2Select the best region within a recording with the most stable signal of largest amplitude that is not clipped. 

3 

4## Main functions 

5- `clip_amplitudes()`: estimated clipping amplitudes from the data. 

6- `best_window_indices()`: select start- and end-indices of the best window 

7- `best_window_times()`: select start end end-time of the best window 

8- `best_window()`: return data of the best window 

9- `analysis_window()`: set clipping amplitudes and find analysis window. 

10 

11## Configuration 

12- `add_clip_config()`: add parameters for `clip_amplitudes()` to configuration. 

13- `clip_args()`: retrieve parameters for `clip_amplitudes()` from configuration. 

14- `add_best_window_config()`: add parameters for `best_window()` to configuration. 

15- `best_window_args()`: retrieve parameters for `best_window*()` from configuration. 

16 

17## Visualization 

18- `plot_clipping()`: visualization of the algorithm for detecting clipped amplitudes in `clip_amplitudes()`. 

19- `plot_best_window()`: visualization of the algorithm used in `best_window_indices()`. 

20- `plot_data_window()`: plot the data and the selected analysis window. 

21""" 

22 

23import numpy as np 

24try: 

25 import matplotlib.pyplot as plt 

26 import matplotlib.ticker as ticker 

27except ImportError: 

28 pass 

29 

30from thunderlab.eventdetection import percentile_threshold, detect_peaks, trim_to_peak 

31from audioio import unwrap 

32 

33 

34def clip_amplitudes(data, win_indices, min_fac=2.0, nbins=20, 

35 min_ampl=None, max_ampl=1.0, 

36 plot_hist_func=None, **kwargs): 

37 """Find the amplitudes where the signal clips. 

38 

39 Histograms in data segements of win_indices length are analyzed. 

40 If the bins at the edges are more than min_fac times as large as 

41 the neighboring bins, clipping at the bin's amplitude is assumed. 

42 

43 Parameters 

44 ---------- 

45 data: 1-D array 

46 The data. 

47 win_indices: int 

48 Size of the analysis window in indices. 

49 min_fac: float 

50 If the first or the second bin is at least `min_fac` times 

51 as large as the third bin, their upper bin edge is set as min_clip. 

52 Likewise for the last and next-to last bin. 

53 nbins: int 

54 Number of bins used for computing a histogram within `min_ampl` and `max_ampl`. 

55 min_ampl: float or None 

56 Minimum to be expected amplitude of the data. 

57 If set to None, set it to the negative of max_ampl. 

58 max_ampl: float 

59 Maximum to be expected amplitude of the data 

60 plot_hist_func: function 

61 Function for visualizing the histograms, is called for every window. 

62 `plot_clipping()` is a simple function that can be passed as `plot_hist_func` 

63 to quickly visualize what is going on in `clip_amplitudes()`. 

64  

65 Signature: 

66 

67 `plot_hist_func(data, winx0, winx1, bins, h, min_clip, max_clip, 

68 min_ampl, max_ampl, kwargs)` 

69 

70 with the arguments: 

71  

72 - `data` (array): the full data array. 

73 - `winx0` (int): the start index of the current window. 

74 - `winx1` (int): the end index of the current window. 

75 - `bins` (array): the bin edges of the histogram. 

76 - `h` (array): the histogram, plot it with 

77 ``` 

78 plt.bar(bins[:-1], h, width=np.mean(np.diff(bins))) 

79 ``` 

80 - `min_clip` (float): the current value of the minimum clip amplitude. 

81 - `max_clip` (float): the current value of the minimum clip amplitude. 

82 - `min_ampl` (float): the minimum amplitude of the data. 

83 - `max_ampl` (float): the maximum amplitude of the data. 

84 - `kwargs` (dict): further user supplied key-word arguments. 

85 

86 Returns 

87 ------- 

88 min_clip: float 

89 Minimum amplitude that is not clipped. 

90 max_clip: float 

91 Maximum amplitude that is not clipped. 

92 """ 

93 if min_ampl is None: 

94 min_ampl = -max_ampl 

95 min_clipa = min_ampl 

96 max_clipa = max_ampl 

97 bins = np.linspace(min_ampl, max_ampl, nbins, endpoint=True) 

98 win_tinxs = np.arange(0, len(data) - win_indices, win_indices) 

99 for wtinx in win_tinxs: 

100 h, b = np.histogram(data[wtinx:wtinx + win_indices], bins) 

101 if h[0] > min_fac * h[2] and b[0] < 0.4*min_ampl: 

102 if h[1] > min_fac * h[2] and b[2] > min_clipa: 

103 min_clipa = b[2] 

104 elif b[1] > min_clipa: 

105 min_clipa = b[1] 

106 if h[-1] > min_fac * h[-3] and b[-1] > 0.4*max_ampl: 

107 if h[-2] > min_fac * h[-3] and b[-3] < max_clipa: 

108 max_clipa = b[-3] 

109 elif b[-2] < max_clipa: 

110 max_clipa = b[-2] 

111 if plot_hist_func: 

112 plot_hist_func(data, wtinx, wtinx + win_indices, 

113 b, h, min_clipa, max_clipa, 

114 min_ampl, max_ampl, **kwargs) 

115 return min_clipa, max_clipa 

116 

117 

118def plot_clipping(data, winx0, winx1, bins, 

119 h, min_clip, max_clip, min_ampl, max_ampl): 

120 """Visualize the data histograms and the detected clipping amplitudes. 

121 

122 Pass this function as the `plot_hist_func` argument to `clip_amplitudes()`. 

123 """ 

124 fig, (ax1, ax2) = plt.subplots(2, 1) 

125 ax1.plot(data[winx0:winx1], 'b') 

126 ax1.axhline(min_clip, color='r') 

127 ax1.axhline(max_clip, color='r') 

128 ax1.set_ylim(-1.0, 1.0) 

129 ax2.bar(bins[:-1], h, width=np.mean(np.diff(bins))) 

130 ax2.axvline(min_clip, color='r') 

131 ax2.axvline(max_clip, color='r') 

132 ax2.set_xlim(-1.0, 1.0) 

133 plt.show() 

134 

135 

136def add_clip_config(cfg, min_clip=0.0, max_clip=0.0, 

137 window=1.0, min_fac=2.0, nbins=20, 

138 min_ampl=-1.0, max_ampl=1.0): 

139 """Add parameter needed for `clip_amplitudes()` as a new section to a configuration. 

140 

141 Parameters 

142 ---------- 

143 cfg: ConfigFile 

144 The configuration. 

145 min_clip: float 

146 Default minimum clip amplitude. 

147 max_clip: float 

148 Default maximum clip amplitude. 

149  

150 See `clip_amplitudes()` for details on the remaining arguments. 

151 """ 

152 cfg.add_section('Clipping amplitudes:') 

153 cfg.add('minClipAmplitude', min_clip, '', 'Minimum amplitude that is not clipped. If zero estimate from data.') 

154 cfg.add('maxClipAmplitude', max_clip, '', 'Maximum amplitude that is not clipped. If zero estimate from data.') 

155 cfg.add('clipWindow', window, 's', 'Window size for estimating clip amplitudes.') 

156 cfg.add('clipBins', nbins, '', 'Number of bins used for constructing histograms of signal amplitudes.') 

157 cfg.add('minClipFactor', min_fac, '', 

158 'Edge bins of the histogram of clipped signals have to be larger then their neighbors by this factor.') 

159 

160 

161def clip_args(cfg, rate): 

162 """Translates a configuration to the respective parameter names of `clip_amplitudes()`. 

163 

164 The return value can then be passed as key-word arguments to this 

165 function. 

166 

167 Parameters 

168 ---------- 

169 cfg: ConfigFile 

170 The configuration. 

171 rate: float 

172 Sampling rate of the data. 

173 

174 Returns 

175 ------- 

176 a: dict 

177 Dictionary with names of arguments of the `clip_amplitudes()` function 

178 and their values as supplied by `cfg`. 

179 """ 

180 a = cfg.map({'min_fac': 'minClipFactor', 

181 'nbins': 'clipBins'}) 

182 a['win_indices'] = int(cfg.value('clipWindow')*rate) 

183 return a 

184 

185 

186def best_window_indices(data, rate, expand=False, win_size=1., 

187 win_shift=0.5, thresh_fac=0.8, percentile=0.1, 

188 min_clip=-np.inf, max_clip=np.inf, 

189 w_cv_interv=1.0, w_ampl=1.0, w_cv_ampl=1.0, 

190 tolerance=0.2, plot_data_func=None, **kwargs): 

191 """Find the window within data most suitable for subsequent analysis. 

192  

193 First, large peaks and troughs of the data are detected. Peaks and 

194 troughs have to be separated in amplitude by at least the value of a 

195 dynamic threshold. The threshold is computed in `win_shift` wide 

196 windows as `thresh_fac` times the interpercentile range at 

197 the `percentile`-th and 100.0-`percentile`-th percentile of the data 

198 using the `thunderlab.eventdetection.percentile_threshold()` function. 

199 

200 Second, criteria for selecting the best window are computed for each 

201 window of width `win_size` shifted by `win_shift` trough the data. The 

202 three criteria are: 

203 

204 - the mean peak-to-trough amplitude multiplied with the fraction of 

205 non clipped peak and trough amplitudes. 

206 - the coefficient of variation of the peak-to-trough amplitude. 

207 - the coefficient of variation of the inter-peak and inter-trough 

208 intervals. 

209 

210 Third, a cost function is computed as a weighted sum of the three 

211 criteria (the mean amplitude is taken negatively). The respective 

212 weights are given by `w_ampl`, `w_cv_ampl`, and `w_cv_interv`. 

213 

214 Finally, a threshold is set to the minimum value of the cost 

215 function plus tolerance. Then the largest region with the cost 

216 function below this threshold is selected as the best window. If 

217 `expand` is `False`, then only the single window with smallest cost 

218 within the selected largest region is returned. 

219 

220 The data used by best window algorithm can be visualized by supplying the 

221 function `plot_data_func`. Additional arguments for this function can 

222 be supplied via key-word arguments `kwargs`. 

223 

224 Parameters 

225 ---------- 

226 data: 1-D array 

227 The data to be analyzed. 

228 rate: float 

229 Sampling rate of the data in Hertz. 

230 expand: boolean 

231 If `False` return only the single window with the smallest cost. 

232 If `True` return the largest window with the cost below the minimum cost 

233 plus tolerance. 

234 win_size: float 

235 Minimum size of the desired best window in seconds. 

236 Choose it large enough for the subsequent analysis. 

237 win_shift: float 

238 Time shift in seconds between windows. Should be smaller or equal to `win_size`. 

239 percentile: float 

240 `percentile` parameter for the `thunderlab.eventdetection.percentile_threshold()` function 

241 used to estimate thresholds for detecting peaks in the data. 

242 thresh_fac: float 

243 `thresh_fac` parameter for the `thunderlab.eventdetection.percentile_threshold()` function 

244 used to estimate thresholds for detecting peaks in the data. 

245 min_clip: float 

246 Minimum amplitude below which data are clipped. 

247 max_clip: float 

248 Maximum amplitude above which data are clipped. 

249 w_cv_interv: float 

250 Weight for the coefficient of variation of the intervals between detected 

251 peaks and throughs. 

252 w_ampl: float 

253 Weight for the mean peak-to-trough amplitude. 

254 w_cv_ampl: float 

255 Weight for the coefficient of variation of the amplitudes. 

256 tolerance: float 

257 Added to the minimum cost for expanding the region of the best window. 

258 plot_data_func: function 

259 Function for plotting the raw data, detected peaks and troughs, the criteria, 

260 the cost function and the selected best window. 

261 `plot_best_window()` is a simple function that can be passed as the `plot_data_func` 

262 parameter to quickly visualize what is going on in selecting the best window. 

263  

264 Signature: 

265  

266 ```` 

267 plot_data_func(data, rate, peak_thresh, peak_idx, trough_idx, 

268 idx0, idx1, win_start_times, cv_interv, mean_ampl, 

269 cv_ampl, clipped_frac, cost_thresh, thresh, 

270 valid_wins, **kwargs) 

271 ``` 

272 

273 with the arguments: 

274  

275 - `data` (array): raw data. 

276 - `rate` (float): sampling rate of the data. 

277 - `peak_thresh` (array): thresholds used for detecting peaks and troughs in each data window. 

278 - `peak_idx` (array): indices into raw data indicating detected peaks. 

279 - `trough_idx` (array): indices into raw data indicating detected troughs. 

280 - `idx0` (int): index of the start of the best window. 

281 - `idx1` (int): index of the end of the best window. 

282 - `win_start_times` (array): times of the analysis windows. 

283 - `cv_interv` (array): coefficients of variation of the inter-peak and -trough 

284 intervals. 

285 - `mean_ampl` (array): mean peak-to-trough amplitudes. 

286 - `cv_ampl` (array): coefficients of variation of the peak-to-trough amplitudes. 

287 - `clipped_frac` (array): fraction of clipped peaks or troughs. 

288 - `cost` (array): cost function. 

289 - `cost_thresh` (float): threshold for the cost function. 

290 - `valid_wins` (array): boolean array indicating the windows which fulfill 

291 all three criteria. 

292 - `**kwargs` (dict): further user supplied key-word arguments. 

293 kwargs: dict 

294 Keyword arguments passed to `plot_data_func`.  

295  

296 Returns 

297 ------- 

298 start_index: int 

299 Index of the start of the best window. 

300 end_index: int 

301 Index of the end of the best window. 

302 clipped: float. 

303 The fraction of clipped peaks or troughs. 

304 

305 Raises 

306 ------ 

307 UserWarning 

308 - Not enough data for requested `win_size`. 

309 - No peaks detected. 

310 - No finite amplitudes detected. 

311 - No valid interval CV detected. 

312 - No valid amplitude CV detected. 

313 """ 

314 # too little data: 

315 if len(data)/rate < win_size: 

316 raise UserWarning(f'not enough data (data={len(data) / rate:g}s, win={win_size:g}s)') 

317 

318 # threshold for peak detection: 

319 threshold = percentile_threshold(data, int(win_shift*rate), 

320 thresh_fac=thresh_fac, 

321 percentile=percentile) 

322 

323 # detect large peaks and troughs: 

324 peak_idx, trough_idx = detect_peaks(data, threshold) 

325 if len(peak_idx) == 0 or len(trough_idx) == 0: 

326 raise UserWarning('no peaks or troughs detected') 

327 

328 # compute cv of intervals, mean peak amplitude and its cv: 

329 invalid_cv = 1000.0 

330 win_size_indices = int(win_size*rate) 

331 win_start_inxs = np.arange(0, len(data) - win_size_indices, 

332 int(0.5*win_shift*rate)) 

333 if len(win_start_inxs) == 0: 

334 win_start_inxs = [0] 

335 cv_interv = np.zeros(len(win_start_inxs)) 

336 mean_ampl = np.zeros(len(win_start_inxs)) 

337 cv_ampl = np.zeros(len(win_start_inxs)) 

338 clipped_frac = np.zeros(len(win_start_inxs)) 

339 for i, wtinx in enumerate(win_start_inxs): 

340 # indices of peaks and troughs inside analysis window: 

341 pinx = (peak_idx >= wtinx) & (peak_idx <= wtinx + win_size_indices) 

342 tinx = (trough_idx >= wtinx) & (trough_idx <= wtinx + win_size_indices) 

343 p_idx, t_idx = trim_to_peak(peak_idx[pinx], trough_idx[tinx]) 

344 # interval statistics: 

345 ipis = np.diff(p_idx) 

346 itis = np.diff(t_idx) 

347 if len(ipis) > 2: 

348 cv_interv[i] = 0.5 * (np.std(ipis) / np.mean(ipis) + np.std(itis) / np.mean(itis)) 

349 # penalize regions without detected peaks: 

350 mean_interv = np.mean(ipis) 

351 if p_idx[0] - wtinx > mean_interv: 

352 cv_interv[i] *= (p_idx[0] - wtinx) / mean_interv 

353 if wtinx + win_size_indices - p_idx[-1] > mean_interv: 

354 cv_interv[i] *= (wtinx + win_size_indices - p_idx[-1]) / mean_interv 

355 else: 

356 cv_interv[i] = invalid_cv 

357 # statistics of peak-to-trough amplitude: 

358 p2t_ampl = data[p_idx] - data[t_idx] 

359 if len(p2t_ampl) > 2: 

360 mean_ampl[i] = np.mean(p2t_ampl) 

361 cv_ampl[i] = np.std(p2t_ampl) / mean_ampl[i] 

362 # penalize for clipped peaks: 

363 clipped_frac[i] = float(np.sum(data[p_idx] > max_clip) + 

364 np.sum(data[t_idx] < min_clip)) / 2.0 / len(p2t_ampl) 

365 mean_ampl[i] *= (1.0 - clipped_frac[i]) ** 2.0 

366 else: 

367 mean_ampl[i] = 0.0 

368 cv_ampl[i] = invalid_cv 

369 

370 # check: 

371 if len(mean_ampl[mean_ampl >= 0.0]) < 0: 

372 raise UserWarning('no finite amplitudes detected') 

373 if len(cv_interv[cv_interv < invalid_cv]) <= 0: 

374 raise UserWarning('no valid interval cv detected') 

375 if len(cv_ampl[cv_ampl < invalid_cv]) <= 0: 

376 raise UserWarning('no valid amplitude cv detected') 

377 

378 # cost function: 

379 cost = w_cv_interv * cv_interv + w_cv_ampl * cv_ampl - w_ampl * mean_ampl 

380 thresh = np.min(cost) + tolerance 

381 

382 # find largest region with low costs: 

383 valid_win_idx = np.nonzero(cost <= thresh)[0] 

384 cidx0 = valid_win_idx[0] # start of current window 

385 cidx1 = cidx0 + 1 # end of current window 

386 win_idx0 = cidx0 # start of largest window 

387 win_idx1 = cidx1 # end of largest window 

388 i = 1 

389 while i < len(valid_win_idx): # loop through all valid window positions 

390 if valid_win_idx[i] == valid_win_idx[i - 1] + 1: 

391 cidx1 = valid_win_idx[i] + 1 

392 else: 

393 cidx0 = valid_win_idx[i] 

394 if cidx1 - cidx0 > win_idx1 - win_idx0: # current window is largest 

395 win_idx0 = cidx0 

396 win_idx1 = cidx1 

397 i += 1 

398 

399 # find single best window within the largest region: 

400 if not expand: 

401 win_idx0 += np.argmin(cost[win_idx0:win_idx1]) 

402 win_idx1 = win_idx0 + 1 

403 

404 # retrive indices of best window for data: 

405 idx0 = win_start_inxs[win_idx0] 

406 idx1 = win_start_inxs[win_idx1 - 1] + win_size_indices 

407 

408 # clipped data? 

409 clipped = np.mean(clipped_frac[win_idx0:win_idx1]) 

410 

411 if plot_data_func: 

412 plot_data_func(data, rate, threshold, peak_idx, trough_idx, 

413 idx0, idx1, win_start_inxs/rate, cv_interv, 

414 mean_ampl, cv_ampl, clipped_frac, cost, thresh, 

415 win_idx0, win_idx1, **kwargs) 

416 

417 return idx0, idx1, clipped 

418 

419 

420def best_window_times(data, rate, expand=False, win_size=1., 

421 win_shift=0.5, thresh_fac=0.8, percentile=0.1, 

422 min_clip=-np.inf, max_clip=np.inf, 

423 w_cv_interv=1.0, w_ampl=1.0, w_cv_ampl=1.0, 

424 tolerance=0.2, plot_data_func=None, **kwargs): 

425 """Find the window within data most suitable for subsequent analysis. 

426 

427 See `best_window_indices()` for details. 

428 

429 Returns 

430 ------- 

431 start_time: float 

432 Time of the start of the best window. 

433 end_time: float 

434 Time of the end of the best window. 

435 clipped: float 

436 The fraction of clipped peaks or troughs. 

437 """ 

438 start_inx, end_inx, clipped = best_window_indices(data, rate, 

439 expand, 

440 win_size, 

441 win_shift, 

442 thresh_fac, 

443 percentile, 

444 min_clip, 

445 max_clip, 

446 w_cv_interv, 

447 w_ampl, 

448 w_cv_ampl, 

449 tolerance, 

450 plot_data_func, 

451 **kwargs) 

452 return start_inx/rate, end_inx/rate, clipped 

453 

454 

455def best_window(data, rate, expand=False, win_size=1., win_shift=0.5, 

456 thresh_fac=0.8, percentile=0.1, min_clip=-np.inf, max_clip=np.inf, 

457 w_cv_interv=1.0, w_ampl=1.0, w_cv_ampl=1.0, tolerance=0.2, 

458 plot_data_func=None, **kwargs): 

459 """Find the window within data most suitable for subsequent analysis. 

460 

461 See `best_window_indices()` for details. 

462 

463 Returns 

464 ------- 

465 data: array 

466 The data of the best window. 

467 clipped: float 

468 The fraction of clipped peaks or troughs. 

469 """ 

470 start_inx, end_inx, clipped = best_window_indices(data, rate, expand, 

471 win_size, win_shift, 

472 thresh_fac, percentile, 

473 min_clip, max_clip, 

474 w_cv_interv, w_ampl, w_cv_ampl, 

475 tolerance, plot_data_func, **kwargs) 

476 return data[start_inx:end_inx], clipped 

477 

478 

479def plot_best_window(data, rate, threshold, peak_idx, trough_idx, idx0, idx1, 

480 win_times, cv_interv, mean_ampl, cv_ampl, clipped_frac, 

481 cost, thresh, win_idx0, win_idx1, ax): 

482 """Visualize the cost function of used for finding the best window for analysis. 

483 

484 Pass this function as the `plot_data_func` to the `best_window_*` functions. 

485 

486 Parameters 

487 ---------- 

488 See documentation of the `best_window_indices()` functions. 

489 """ 

490 # raw data: 

491 time = np.arange(0.0, len(data))/rate 

492 ax[0].plot(time, data, 'b', lw=3) 

493 if np.mean(clipped_frac[win_idx0:win_idx1]) > 0.01: 

494 ax[0].plot(time[idx0:idx1], data[idx0:idx1], color='magenta', lw=3) 

495 else: 

496 ax[0].plot(time[idx0:idx1], data[idx0:idx1], color='grey', lw=3) 

497 ax[0].plot(time[peak_idx], data[peak_idx], 'o', mfc='red', ms=6) 

498 ax[0].plot(time[trough_idx], data[trough_idx], 'o', mfc='green', ms=6) 

499 ax[0].plot(time, threshold, '#CCCCCC', lw=2) 

500 up_lim = np.max(data) * 1.05 

501 down_lim = np.min(data) * .95 

502 ax[0].set_ylim((down_lim, up_lim)) 

503 ax[0].set_ylabel('Amplitude [a.u]') 

504 

505 # cv of inter-peak intervals: 

506 ax[1].plot(win_times[cv_interv < 1000.0], cv_interv[cv_interv < 1000.0], 'o', ms=10, color='grey', mew=2., 

507 mec='black', alpha=0.6) 

508 ax[1].plot(win_times[win_idx0:win_idx1], cv_interv[win_idx0:win_idx1], 'o', ms=10, color='red', mew=2., mec='black', 

509 alpha=0.6) 

510 ax[1].set_ylabel('CV intervals') 

511 ax[1].set_ylim(bottom=0.0) 

512 

513 # mean amplitude: 

514 ax[2].plot(win_times[mean_ampl > 0.0], mean_ampl[mean_ampl > 0.0], 'o', ms=10, color='grey', mew=2., mec='black', 

515 alpha=0.6) 

516 ax[2].plot(win_times[win_idx0:win_idx1], mean_ampl[win_idx0:win_idx1], 'o', ms=10, color='red', mew=2., mec='black', 

517 alpha=0.6) 

518 ax[2].set_ylabel('Mean amplitude [a.u]') 

519 ax[2].set_ylim(bottom=0.0) 

520 

521 # cv: 

522 ax[3].plot(win_times[cv_ampl < 1000.0], cv_ampl[cv_ampl < 1000.0], 'o', ms=10, color='grey', mew=2., mec='black', 

523 alpha=0.6) 

524 ax[3].plot(win_times[win_idx0:win_idx1], cv_ampl[win_idx0:win_idx1], 'o', ms=10, color='red', mew=2., mec='black', 

525 alpha=0.6) 

526 ax[3].set_ylabel('CV amplitude') 

527 ax[3].set_ylim(bottom=0.0) 

528 

529 # cost: 

530 ax[4].plot(win_times[cost < thresh + 10], cost[cost < thresh + 10], 'o', ms=10, color='grey', mew=2., mec='black', 

531 alpha=0.6) 

532 ax[4].plot(win_times[win_idx0:win_idx1], cost[win_idx0:win_idx1], 'o', ms=10, color='red', mew=2., mec='black', 

533 alpha=0.6) 

534 ax[4].axhline(thresh, color='k') 

535 ax[4].set_ylabel('Cost') 

536 ax[4].set_xlabel('Time [sec]') 

537 

538 

539def plot_data_window(ax, data, rate, unit, idx0, idx1, clipped, 

540 dstyle=dict(color='tab:blue', lw=1), 

541 wstyle=dict(color='tab:red', lw=1)): 

542 """Plot the data and mark the analysis window. 

543 

544 Parameters 

545 ---------- 

546 ax: matplotlib axes 

547 Axes used for plotting. 

548 data: 1-D array 

549 The full data trace. 

550 rate: float 

551 Sampling rate of the data in Hertz. 

552 unit: string 

553 The unit of the data. 

554 idx0: int 

555 Start index of the best window. 

556 idx1: int 

557 Stop index of the best window. 

558 clipped: float 

559 Fraction of clipped peaks. 

560 data_color: 

561 Color used for plotting the data trace. 

562 window_color: 

563 Color used for plotting the selected best window. 

564 """ 

565 time = np.arange(len(data))/rate 

566 ax.plot(time[:idx0], data[:idx0], **dstyle) 

567 ax.plot(time[idx1:], data[idx1:], **dstyle) 

568 if idx1 > idx0: 

569 ax.plot(time[idx0:idx1], data[idx0:idx1], **wstyle) 

570 label = 'analysis\nwindow' 

571 if clipped > 0.0: 

572 label += f'\n{100.0*clipped:.0f}% clipped' 

573 ax.text(time[(idx0+idx1)//2], 0.0, label, ha='center', va='center') 

574 ax.set_xlim(time[0], time[-1]) 

575 ax.set_xlabel('Time [sec]') 

576 if len(unit) == 0 or unit == 'a.u.': 

577 ax.set_ylabel('Amplitude') 

578 else: 

579 ax.set_ylabel(f'Amplitude [{unit}]') 

580 ax.yaxis.set_major_locator(ticker.MaxNLocator(3)) 

581 

582 

583def add_best_window_config(cfg, win_pos='best', win_size=1., win_shift=0.5, 

584 thresh_fac=0.8, percentile=0.1, 

585 min_clip=-np.inf, max_clip=np.inf, 

586 w_cv_interv=1.0, w_ampl=1.0, w_cv_ampl=1.0, 

587 tolerance=0.2, expand=False): 

588 """ Add parameter needed for the `best_window()` functions as a new section to a configuration. 

589 

590 Parameters 

591 ---------- 

592 cfg: ConfigFile 

593 The configuration. 

594  

595 See `best_window_indices()` for details on the remaining arguments. 

596 """ 

597 cfg.add_section('Analysis window:') 

598 cfg.add('windowPosition', win_pos, '', 'Position of the analysis window: "beginning", "center", "end", "best", or a time in seconds.') 

599 cfg.add('windowSize', win_size, 's', 'Size of the best window. This should be much larger than the expected period of the signal. If 0 select the whole time series.') 

600 cfg.add('bestWindowShift', win_shift, 's', 

601 'Increment for shifting the analysis windows trough the data. Should be larger than the expected period of the signal.') 

602 cfg.add('bestWindowThresholdPercentile', percentile, '%', 

603 'Percentile for estimating interpercentile range. Should be smaller than the duty cycle of the periodic signal.') 

604 cfg.add('bestWindowThresholdFactor', thresh_fac, '', 

605 'Threshold for detecting peaks is interperecntile range of the data times this factor.') 

606 cfg.add('weightCVInterval', w_cv_interv, '', 

607 'Weight factor for the coefficient of variation of the inter-peak and inter-trough intervals.') 

608 cfg.add('weightAmplitude', w_ampl, '', 

609 'Weight factor for the mean peak-to-trough amplitudes.') 

610 cfg.add('weightCVAmplitude', w_cv_ampl, '', 

611 'Weight factor for the coefficient of variation of the peak-to-trough amplitude.') 

612 cfg.add('bestWindowTolerance', tolerance, '', 

613 'Add this to the minimum value of the cost function to get a threshold for selecting the largest best window.') 

614 cfg.add('expandBestWindow', expand, '', 

615 'Return the largest valid best window. If False return sole best window. ') 

616 

617 

618def best_window_args(cfg): 

619 """Translates a configuration to the respective parameter names of the functions `best_window*()`. 

620 

621 The return value can then be passed as key-word arguments to these 

622 functions. 

623 

624 Parameters 

625 ---------- 

626 cfg: ConfigFile 

627 The configuration. 

628 

629 Returns 

630 ------- 

631 a: dict 

632 Dictionary with names of arguments of the `best_window*()` functions 

633 and their values as supplied by `cfg`. 

634 """ 

635 return cfg.map({'win_size': 'windowSize', 

636 'win_shift': 'bestWindowShift', 

637 'percentile': 'bestWindowThresholdPercentile', 

638 'thresh_fac': 'bestWindowThresholdFactor', 

639 'w_cv_interv': 'weightCVInterval', 

640 'w_ampl': 'weightAmplitude', 

641 'w_cv_ampl': 'weightCVAmplitude', 

642 'tolerance': 'bestWindowTolerance', 

643 'expand': 'expandBestWindow'}) 

644 

645 

646def analysis_window(data, rate, ampl_max, win_pos, 

647 cfg, show_bestwindow=False): 

648 """Set clipping amplitudes and find analysis window. 

649 

650 Parameters 

651 ---------- 

652 data: 1-D array 

653 The data to be analyzed. 

654 rate: float 

655 Sampling rate of the data in Hertz. 

656 ampl_max: float 

657 Maximum value of input range. 

658 win_pos: string or float 

659 Position of the analysis window: "beginning", "center", "end" or "best". 

660 Alternatively the beginning of the analysis window in seconds. 

661 cfg: ConfigFile 

662 Configuration for clipping and best window. 

663 show_bestwindow: boolean 

664 If true show a plot with the best window cost functions. 

665 

666 Returns 

667 ------- 

668 data: 1-D array 

669 The data array of the best window 

670 idx0: int 

671 The start index of the best window in the original data. 

672 idx1: int 

673 The end index of the best window in the original data. 

674 clipped: float 

675 Fraction of clipped amplitudes. 

676 min_clip: float 

677 Minimum amplitude that is not clipped. 

678 max_clip: float 

679 Maximum amplitude that is not clipped. 

680 """ 

681 found_bestwindow = True 

682 min_clip = cfg.value('minClipAmplitude') 

683 max_clip = cfg.value('maxClipAmplitude') 

684 clipped = 0 

685 if min_clip == 0.0 or max_clip == 0.0: 

686 min_clip, max_clip = clip_amplitudes(data, max_ampl=ampl_max, 

687 **clip_args(cfg, rate)) 

688 if cfg.value('unwrapData'): 

689 unwrap(data, 1.5, ampl_max) 

690 min_clip *= 2 

691 max_clip *= 2 

692 # window size parameter: 

693 bwa = best_window_args(cfg) 

694 if 'win_size' in bwa: 

695 del bwa['win_size'] 

696 window_size = cfg.value('windowSize') 

697 if window_size <= 0.0: 

698 window_size = (len(data)-1)/rate 

699 # show cost function: 

700 if win_pos == 'best' and show_bestwindow: 

701 fig, ax = plt.subplots(5, sharex=True, figsize=(14., 10.)) 

702 try: 

703 idx0, idx1, clipped = best_window_indices(data, rate, 

704 min_clip=min_clip, 

705 max_clip=max_clip, 

706 win_size=window_size, 

707 plot_data_func=plot_best_window, 

708 ax=ax, **bwa) 

709 plt.show() 

710 except UserWarning as e: 

711 found_bestwindow = False 

712 else: 

713 # too little data: 

714 n_win = int(window_size*rate) 

715 if len(data) < n_win: 

716 return data, 0, 0, False, min_clip, max_clip 

717 if win_pos == 'best': 

718 try: 

719 idx0, idx1, clipped = best_window_indices(data, rate, 

720 min_clip=min_clip, 

721 max_clip=max_clip, 

722 win_size=window_size, 

723 **bwa) 

724 except UserWarning as e: 

725 found_bestwindow = False 

726 else: 

727 if win_pos[:5] == 'begin': 

728 idx0 = 0 

729 elif win_pos == 'end': 

730 idx0 = len(data) - n_win 

731 elif win_pos == 'center': 

732 idx0 = (len(data) - n_win)//2 

733 else: 

734 try: 

735 if win_pos[-1] == 's': 

736 win_pos = win_pos[:-1] 

737 t0 = float(win_pos) 

738 except ValueError: 

739 found_bestwindow = False 

740 t0 = 0.0 

741 idx0 = int(t0*rate) 

742 idx1 = idx0 + n_win 

743 if not found_bestwindow or idx1 > len(data): 

744 return data, 0, 0, False, min_clip, max_clip 

745 data_seg = data[idx0:idx1] 

746 # check for clipping: 

747 win_shift = cfg.value('bestWindowShift') 

748 thresh_fac = cfg.value('bestWindowThresholdFactor') 

749 percentile = cfg.value('bestWindowThresholdPercentile') 

750 threshold = percentile_threshold(data_seg, 

751 int(win_shift*rate), 

752 thresh_fac=thresh_fac, 

753 percentile=percentile) 

754 peak_idx, trough_idx = detect_peaks(data_seg, threshold) 

755 p_idx, t_idx = trim_to_peak(peak_idx, trough_idx) 

756 if len(p_idx) > 0: 

757 p2t_ampl = data_seg[p_idx] - data_seg[t_idx] 

758 clipped = float(np.sum(data_seg[p_idx] > max_clip) + 

759 np.sum(data_seg[t_idx] < min_clip))/2/len(p2t_ampl) 

760 if found_bestwindow: 

761 return data[idx0:idx1], idx0, idx1, clipped, min_clip, max_clip 

762 else: 

763 return data, 0, 0, False, min_clip, max_clip 

764 

765 

766def main(data_file=None): 

767 title = 'bestwindow' 

768 if data_file is None: 

769 # generate data: 

770 print('generate waveform...') 

771 rate = 100000.0 

772 time = np.arange(0.0, 1.0, 1.0 / rate) 

773 f = 600.0 

774 snippets = [] 

775 amf = 20.0 

776 for ampl in [0.2, 0.5, 0.8]: 

777 for am_ampl in [0.0, 0.3, 0.9]: 

778 data = ampl * np.sin(2.0 * np.pi * f * time) * (1.0 + am_ampl * np.sin(2.0 * np.pi * amf * time)) 

779 data[data > 1.3] = 1.3 

780 data[data < -1.3] = -1.3 

781 snippets.extend(data) 

782 data = np.asarray(snippets) 

783 title = 'test sines' 

784 data += 0.01 * np.random.randn(len(data)) 

785 else: 

786 from thunderlab.dataloader import load_data 

787 print(f'load {data_file} ...') 

788 data, rate, unit, amax = load_data(data_file) 

789 data = data[:,0] 

790 title = data_file 

791 

792 # determine clipping amplitudes: 

793 clip_win_size = 0.5 

794 min_clip_fac = 2.0 

795 min_clip, max_clip = clip_amplitudes(data, int(clip_win_size * rate), 

796 min_fac=min_clip_fac) 

797 # min_clip, max_clip = clip_amplitudes(data, int(clip_win_size*rate), 

798 # min_fac=min_clip_fac, 

799 # plot_hist_func=plot_clipping) 

800 

801 # setup plots: 

802 fig, ax = plt.subplots(5, 1, sharex=True, figsize=(20, 12)) 

803 fig.canvas.manager.set_window_title(title) 

804 

805 # compute best window: 

806 best_window_indices(data, rate, expand=False, win_size=4.0, 

807 win_shift=0.5, thresh_fac=0.8, percentile=0.1, 

808 min_clip=min_clip, max_clip=max_clip, 

809 w_cv_ampl=10.0, tolerance=0.5, 

810 plot_data_func=plot_best_window, ax=ax) 

811 

812 plt.tight_layout() 

813 plt.show() 

814 

815 

816if __name__ == '__main__': 

817 import sys 

818 data_file = sys.argv[1] if len(sys.argv) > 1 else None 

819 main(data_file)