Coverage for src/thunderfish/bestwindow.py: 84%

284 statements  

« prev     ^ index     » next       coverage.py v7.6.12, created at 2025-02-16 22:05 +0000

1""" 

2Select the best region within a recording with the most stable signal of largest amplitude that is not clipped. 

3 

4## Main functions 

5- `clip_amplitudes()`: estimated clipping amplitudes from the data. 

6- `best_window_indices()`: select start- and end-indices of the best window 

7- `best_window_times()`: select start end end-time of the best window 

8- `best_window()`: return data of the best window 

9- `analysis_window()`: set clipping amplitudes and find analysis window. 

10 

11## Configuration 

12- `add_clip_config()`: add parameters for `clip_amplitudes()` to configuration. 

13- `clip_args()`: retrieve parameters for `clip_amplitudes()` from configuration. 

14- `add_best_window_config()`: add parameters for `best_window()` to configuration. 

15- `best_window_args()`: retrieve parameters for `best_window*()` from configuration. 

16 

17## Visualization 

18- `plot_clipping()`: visualization of the algorithm for detecting clipped amplitudes in `clip_amplitudes()`. 

19- `plot_best_window()`: visualization of the algorithm used in `best_window_indices()`. 

20- `plot_data_window()`: plot the data and the selected analysis window. 

21""" 

22 

23import numpy as np 

24from thunderlab.eventdetection import percentile_threshold, detect_peaks, trim_to_peak 

25from audioio import unwrap 

26try: 

27 import matplotlib.pyplot as plt 

28 import matplotlib.ticker as ticker 

29except ImportError: 

30 pass 

31 

32 

33def clip_amplitudes(data, win_indices, min_fac=2.0, nbins=20, 

34 min_ampl=None, max_ampl=1.0, 

35 plot_hist_func=None, **kwargs): 

36 """Find the amplitudes where the signal clips. 

37 

38 Histograms in data segements of win_indices length are analyzed. 

39 If the bins at the edges are more than min_fac times as large as 

40 the neighboring bins, clipping at the bin's amplitude is assumed. 

41 

42 Parameters 

43 ---------- 

44 data: 1-D array 

45 The data. 

46 win_indices: int 

47 Size of the analysis window in indices. 

48 min_fac: float 

49 If the first or the second bin is at least `min_fac` times 

50 as large as the third bin, their upper bin edge is set as min_clip. 

51 Likewise for the last and next-to last bin. 

52 nbins: int 

53 Number of bins used for computing a histogram within `min_ampl` and `max_ampl`. 

54 min_ampl: float or None 

55 Minimum to be expected amplitude of the data. 

56 If set to None, set it to the negative of max_ampl. 

57 max_ampl: float 

58 Maximum to be expected amplitude of the data 

59 plot_hist_func: function 

60 Function for visualizing the histograms, is called for every window. 

61 `plot_clipping()` is a simple function that can be passed as `plot_hist_func` 

62 to quickly visualize what is going on in `clip_amplitudes()`. 

63  

64 Signature: 

65 

66 `plot_hist_func(data, winx0, winx1, bins, h, min_clip, max_clip, 

67 min_ampl, max_ampl, kwargs)` 

68 

69 with the arguments: 

70  

71 - `data` (array): the full data array. 

72 - `winx0` (int): the start index of the current window. 

73 - `winx1` (int): the end index of the current window. 

74 - `bins` (array): the bin edges of the histogram. 

75 - `h` (array): the histogram, plot it with 

76 ``` 

77 plt.bar(bins[:-1], h, width=np.mean(np.diff(bins))) 

78 ``` 

79 - `min_clip` (float): the current value of the minimum clip amplitude. 

80 - `max_clip` (float): the current value of the minimum clip amplitude. 

81 - `min_ampl` (float): the minimum amplitude of the data. 

82 - `max_ampl` (float): the maximum amplitude of the data. 

83 - `kwargs` (dict): further user supplied key-word arguments. 

84 

85 Returns 

86 ------- 

87 min_clip: float 

88 Minimum amplitude that is not clipped. 

89 max_clip: float 

90 Maximum amplitude that is not clipped. 

91 """ 

92 if min_ampl is None: 

93 min_ampl = -max_ampl 

94 min_clipa = min_ampl 

95 max_clipa = max_ampl 

96 bins = np.linspace(min_ampl, max_ampl, nbins, endpoint=True) 

97 win_tinxs = np.arange(0, len(data) - win_indices, win_indices) 

98 for wtinx in win_tinxs: 

99 h, b = np.histogram(data[wtinx:wtinx + win_indices], bins) 

100 if h[0] > min_fac * h[2] and b[0] < 0.4*min_ampl: 

101 if h[1] > min_fac * h[2] and b[2] > min_clipa: 

102 min_clipa = b[2] 

103 elif b[1] > min_clipa: 

104 min_clipa = b[1] 

105 if h[-1] > min_fac * h[-3] and b[-1] > 0.4*max_ampl: 

106 if h[-2] > min_fac * h[-3] and b[-3] < max_clipa: 

107 max_clipa = b[-3] 

108 elif b[-2] < max_clipa: 

109 max_clipa = b[-2] 

110 if plot_hist_func: 

111 plot_hist_func(data, wtinx, wtinx + win_indices, 

112 b, h, min_clipa, max_clipa, 

113 min_ampl, max_ampl, **kwargs) 

114 return min_clipa, max_clipa 

115 

116 

117def plot_clipping(data, winx0, winx1, bins, 

118 h, min_clip, max_clip, min_ampl, max_ampl): 

119 """Visualize the data histograms and the detected clipping amplitudes. 

120 

121 Pass this function as the `plot_hist_func` argument to `clip_amplitudes()`. 

122 """ 

123 fig, (ax1, ax2) = plt.subplots(2, 1) 

124 ax1.plot(data[winx0:winx1], 'b') 

125 ax1.axhline(min_clip, color='r') 

126 ax1.axhline(max_clip, color='r') 

127 ax1.set_ylim(-1.0, 1.0) 

128 ax2.bar(bins[:-1], h, width=np.mean(np.diff(bins))) 

129 ax2.axvline(min_clip, color='r') 

130 ax2.axvline(max_clip, color='r') 

131 ax2.set_xlim(-1.0, 1.0) 

132 plt.show() 

133 

134 

135def add_clip_config(cfg, min_clip=0.0, max_clip=0.0, 

136 window=1.0, min_fac=2.0, nbins=20, 

137 min_ampl=-1.0, max_ampl=1.0): 

138 """Add parameter needed for `clip_amplitudes()` as a new section to a configuration. 

139 

140 Parameters 

141 ---------- 

142 cfg: ConfigFile 

143 The configuration. 

144 min_clip: float 

145 Default minimum clip amplitude. 

146 max_clip: float 

147 Default maximum clip amplitude. 

148  

149 See `clip_amplitudes()` for details on the remaining arguments. 

150 """ 

151 cfg.add_section('Clipping amplitudes:') 

152 cfg.add('minClipAmplitude', min_clip, '', 'Minimum amplitude that is not clipped. If zero estimate from data.') 

153 cfg.add('maxClipAmplitude', max_clip, '', 'Maximum amplitude that is not clipped. If zero estimate from data.') 

154 cfg.add('clipWindow', window, 's', 'Window size for estimating clip amplitudes.') 

155 cfg.add('clipBins', nbins, '', 'Number of bins used for constructing histograms of signal amplitudes.') 

156 cfg.add('minClipFactor', min_fac, '', 

157 'Edge bins of the histogram of clipped signals have to be larger then their neighbors by this factor.') 

158 

159 

160def clip_args(cfg, rate): 

161 """Translates a configuration to the respective parameter names of `clip_amplitudes()`. 

162 

163 The return value can then be passed as key-word arguments to this 

164 function. 

165 

166 Parameters 

167 ---------- 

168 cfg: ConfigFile 

169 The configuration. 

170 rate: float 

171 Sampling rate of the data. 

172 

173 Returns 

174 ------- 

175 a: dict 

176 Dictionary with names of arguments of the `clip_amplitudes()` function 

177 and their values as supplied by `cfg`. 

178 """ 

179 a = cfg.map({'min_fac': 'minClipFactor', 

180 'nbins': 'clipBins'}) 

181 a['win_indices'] = int(cfg.value('clipWindow')*rate) 

182 return a 

183 

184 

185def best_window_indices(data, rate, expand=False, win_size=1., 

186 win_shift=0.5, thresh_fac=0.8, percentile=0.1, 

187 min_clip=-np.inf, max_clip=np.inf, 

188 w_cv_interv=1.0, w_ampl=1.0, w_cv_ampl=1.0, 

189 tolerance=0.2, plot_data_func=None, **kwargs): 

190 """Find the window within data most suitable for subsequent analysis. 

191  

192 First, large peaks and troughs of the data are detected. Peaks and 

193 troughs have to be separated in amplitude by at least the value of a 

194 dynamic threshold. The threshold is computed in `win_shift` wide 

195 windows as `thresh_fac` times the interpercentile range at 

196 the `percentile`-th and 100.0-`percentile`-th percentile of the data 

197 using the `thunderlab.eventdetection.percentile_threshold()` function. 

198 

199 Second, criteria for selecting the best window are computed for each 

200 window of width `win_size` shifted by `win_shift` trough the data. The 

201 three criteria are: 

202 

203 - the mean peak-to-trough amplitude multiplied with the fraction of 

204 non clipped peak and trough amplitudes. 

205 - the coefficient of variation of the peak-to-trough amplitude. 

206 - the coefficient of variation of the inter-peak and inter-trough 

207 intervals. 

208 

209 Third, a cost function is computed as a weighted sum of the three 

210 criteria (the mean amplitude is taken negatively). The respective 

211 weights are given by `w_ampl`, `w_cv_ampl`, and `w_cv_interv`. 

212 

213 Finally, a threshold is set to the minimum value of the cost 

214 function plus tolerance. Then the largest region with the cost 

215 function below this threshold is selected as the best window. If 

216 `expand` is `False`, then only the single window with smallest cost 

217 within the selected largest region is returned. 

218 

219 The data used by best window algorithm can be visualized by supplying the 

220 function `plot_data_func`. Additional arguments for this function can 

221 be supplied via key-word arguments `kwargs`. 

222 

223 Parameters 

224 ---------- 

225 data: 1-D array 

226 The data to be analyzed. 

227 rate: float 

228 Sampling rate of the data in Hertz. 

229 expand: boolean 

230 If `False` return only the single window with the smallest cost. 

231 If `True` return the largest window with the cost below the minimum cost 

232 plus tolerance. 

233 win_size: float 

234 Minimum size of the desired best window in seconds. 

235 Choose it large enough for the subsequent analysis. 

236 win_shift: float 

237 Time shift in seconds between windows. Should be smaller or equal to `win_size`. 

238 percentile: float 

239 `percentile` parameter for the `thunderlab.eventdetection.percentile_threshold()` function 

240 used to estimate thresholds for detecting peaks in the data. 

241 thresh_fac: float 

242 `thresh_fac` parameter for the `thunderlab.eventdetection.percentile_threshold()` function 

243 used to estimate thresholds for detecting peaks in the data. 

244 min_clip: float 

245 Minimum amplitude below which data are clipped. 

246 max_clip: float 

247 Maximum amplitude above which data are clipped. 

248 w_cv_interv: float 

249 Weight for the coefficient of variation of the intervals between detected 

250 peaks and throughs. 

251 w_ampl: float 

252 Weight for the mean peak-to-trough amplitude. 

253 w_cv_ampl: float 

254 Weight for the coefficient of variation of the amplitudes. 

255 tolerance: float 

256 Added to the minimum cost for expanding the region of the best window. 

257 plot_data_func: function 

258 Function for plotting the raw data, detected peaks and troughs, the criteria, 

259 the cost function and the selected best window. 

260 `plot_best_window()` is a simple function that can be passed as the `plot_data_func` 

261 parameter to quickly visualize what is going on in selecting the best window. 

262  

263 Signature: 

264  

265 ```` 

266 plot_data_func(data, rate, peak_thresh, peak_idx, trough_idx, 

267 idx0, idx1, win_start_times, cv_interv, mean_ampl, 

268 cv_ampl, clipped_frac, cost_thresh, thresh, 

269 valid_wins, **kwargs) 

270 ``` 

271 

272 with the arguments: 

273  

274 - `data` (array): raw data. 

275 - `rate` (float): sampling rate of the data. 

276 - `peak_thresh` (array): thresholds used for detecting peaks and troughs in each data window. 

277 - `peak_idx` (array): indices into raw data indicating detected peaks. 

278 - `trough_idx` (array): indices into raw data indicating detected troughs. 

279 - `idx0` (int): index of the start of the best window. 

280 - `idx1` (int): index of the end of the best window. 

281 - `win_start_times` (array): times of the analysis windows. 

282 - `cv_interv` (array): coefficients of variation of the inter-peak and -trough 

283 intervals. 

284 - `mean_ampl` (array): mean peak-to-trough amplitudes. 

285 - `cv_ampl` (array): coefficients of variation of the peak-to-trough amplitudes. 

286 - `clipped_frac` (array): fraction of clipped peaks or troughs. 

287 - `cost` (array): cost function. 

288 - `cost_thresh` (float): threshold for the cost function. 

289 - `valid_wins` (array): boolean array indicating the windows which fulfill 

290 all three criteria. 

291 - `**kwargs` (dict): further user supplied key-word arguments. 

292 kwargs: dict 

293 Keyword arguments passed to `plot_data_func`.  

294  

295 Returns 

296 ------- 

297 start_index: int 

298 Index of the start of the best window. 

299 end_index: int 

300 Index of the end of the best window. 

301 clipped: float. 

302 The fraction of clipped peaks or troughs. 

303 

304 Raises 

305 ------ 

306 UserWarning 

307 - Not enough data for requested `win_size`. 

308 - No peaks detected. 

309 - No finite amplitudes detected. 

310 - No valid interval CV detected. 

311 - No valid amplitude CV detected. 

312 """ 

313 # too little data: 

314 if len(data)/rate < win_size: 

315 raise UserWarning(f'not enough data (data={len(data) / rate:g}s, win={win_size:g}s)') 

316 

317 # threshold for peak detection: 

318 threshold = percentile_threshold(data, int(win_shift*rate), 

319 thresh_fac=thresh_fac, 

320 percentile=percentile) 

321 

322 # detect large peaks and troughs: 

323 peak_idx, trough_idx = detect_peaks(data, threshold) 

324 if len(peak_idx) == 0 or len(trough_idx) == 0: 

325 raise UserWarning('no peaks or troughs detected') 

326 

327 # compute cv of intervals, mean peak amplitude and its cv: 

328 invalid_cv = 1000.0 

329 win_size_indices = int(win_size*rate) 

330 win_start_inxs = np.arange(0, len(data) - win_size_indices, 

331 int(0.5*win_shift*rate)) 

332 if len(win_start_inxs) == 0: 

333 win_start_inxs = [0] 

334 cv_interv = np.zeros(len(win_start_inxs)) 

335 mean_ampl = np.zeros(len(win_start_inxs)) 

336 cv_ampl = np.zeros(len(win_start_inxs)) 

337 clipped_frac = np.zeros(len(win_start_inxs)) 

338 for i, wtinx in enumerate(win_start_inxs): 

339 # indices of peaks and troughs inside analysis window: 

340 pinx = (peak_idx >= wtinx) & (peak_idx <= wtinx + win_size_indices) 

341 tinx = (trough_idx >= wtinx) & (trough_idx <= wtinx + win_size_indices) 

342 p_idx, t_idx = trim_to_peak(peak_idx[pinx], trough_idx[tinx]) 

343 # interval statistics: 

344 ipis = np.diff(p_idx) 

345 itis = np.diff(t_idx) 

346 if len(ipis) > 2: 

347 cv_interv[i] = 0.5 * (np.std(ipis) / np.mean(ipis) + np.std(itis) / np.mean(itis)) 

348 # penalize regions without detected peaks: 

349 mean_interv = np.mean(ipis) 

350 if p_idx[0] - wtinx > mean_interv: 

351 cv_interv[i] *= (p_idx[0] - wtinx) / mean_interv 

352 if wtinx + win_size_indices - p_idx[-1] > mean_interv: 

353 cv_interv[i] *= (wtinx + win_size_indices - p_idx[-1]) / mean_interv 

354 else: 

355 cv_interv[i] = invalid_cv 

356 # statistics of peak-to-trough amplitude: 

357 p2t_ampl = data[p_idx] - data[t_idx] 

358 if len(p2t_ampl) > 2: 

359 mean_ampl[i] = np.mean(p2t_ampl) 

360 cv_ampl[i] = np.std(p2t_ampl) / mean_ampl[i] 

361 # penalize for clipped peaks: 

362 clipped_frac[i] = float(np.sum(data[p_idx] > max_clip) + 

363 np.sum(data[t_idx] < min_clip)) / 2.0 / len(p2t_ampl) 

364 mean_ampl[i] *= (1.0 - clipped_frac[i]) ** 2.0 

365 else: 

366 mean_ampl[i] = 0.0 

367 cv_ampl[i] = invalid_cv 

368 

369 # check: 

370 if len(mean_ampl[mean_ampl >= 0.0]) < 0: 

371 raise UserWarning('no finite amplitudes detected') 

372 if len(cv_interv[cv_interv < invalid_cv]) <= 0: 

373 raise UserWarning('no valid interval cv detected') 

374 if len(cv_ampl[cv_ampl < invalid_cv]) <= 0: 

375 raise UserWarning('no valid amplitude cv detected') 

376 

377 # cost function: 

378 cost = w_cv_interv * cv_interv + w_cv_ampl * cv_ampl - w_ampl * mean_ampl 

379 thresh = np.min(cost) + tolerance 

380 

381 # find largest region with low costs: 

382 valid_win_idx = np.nonzero(cost <= thresh)[0] 

383 cidx0 = valid_win_idx[0] # start of current window 

384 cidx1 = cidx0 + 1 # end of current window 

385 win_idx0 = cidx0 # start of largest window 

386 win_idx1 = cidx1 # end of largest window 

387 i = 1 

388 while i < len(valid_win_idx): # loop through all valid window positions 

389 if valid_win_idx[i] == valid_win_idx[i - 1] + 1: 

390 cidx1 = valid_win_idx[i] + 1 

391 else: 

392 cidx0 = valid_win_idx[i] 

393 if cidx1 - cidx0 > win_idx1 - win_idx0: # current window is largest 

394 win_idx0 = cidx0 

395 win_idx1 = cidx1 

396 i += 1 

397 

398 # find single best window within the largest region: 

399 if not expand: 

400 win_idx0 += np.argmin(cost[win_idx0:win_idx1]) 

401 win_idx1 = win_idx0 + 1 

402 

403 # retrive indices of best window for data: 

404 idx0 = win_start_inxs[win_idx0] 

405 idx1 = win_start_inxs[win_idx1 - 1] + win_size_indices 

406 

407 # clipped data? 

408 clipped = np.mean(clipped_frac[win_idx0:win_idx1]) 

409 

410 if plot_data_func: 

411 plot_data_func(data, rate, threshold, peak_idx, trough_idx, 

412 idx0, idx1, win_start_inxs/rate, cv_interv, 

413 mean_ampl, cv_ampl, clipped_frac, cost, thresh, 

414 win_idx0, win_idx1, **kwargs) 

415 

416 return idx0, idx1, clipped 

417 

418 

419def best_window_times(data, rate, expand=False, win_size=1., 

420 win_shift=0.5, thresh_fac=0.8, percentile=0.1, 

421 min_clip=-np.inf, max_clip=np.inf, 

422 w_cv_interv=1.0, w_ampl=1.0, w_cv_ampl=1.0, 

423 tolerance=0.2, plot_data_func=None, **kwargs): 

424 """Find the window within data most suitable for subsequent analysis. 

425 

426 See `best_window_indices()` for details. 

427 

428 Returns 

429 ------- 

430 start_time: float 

431 Time of the start of the best window. 

432 end_time: float 

433 Time of the end of the best window. 

434 clipped: float 

435 The fraction of clipped peaks or troughs. 

436 """ 

437 start_inx, end_inx, clipped = best_window_indices(data, rate, 

438 expand, 

439 win_size, 

440 win_shift, 

441 thresh_fac, 

442 percentile, 

443 min_clip, 

444 max_clip, 

445 w_cv_interv, 

446 w_ampl, 

447 w_cv_ampl, 

448 tolerance, 

449 plot_data_func, 

450 **kwargs) 

451 return start_inx/rate, end_inx/rate, clipped 

452 

453 

454def best_window(data, rate, expand=False, win_size=1., win_shift=0.5, 

455 thresh_fac=0.8, percentile=0.1, min_clip=-np.inf, max_clip=np.inf, 

456 w_cv_interv=1.0, w_ampl=1.0, w_cv_ampl=1.0, tolerance=0.2, 

457 plot_data_func=None, **kwargs): 

458 """Find the window within data most suitable for subsequent analysis. 

459 

460 See `best_window_indices()` for details. 

461 

462 Returns 

463 ------- 

464 data: array 

465 The data of the best window. 

466 clipped: float 

467 The fraction of clipped peaks or troughs. 

468 """ 

469 start_inx, end_inx, clipped = best_window_indices(data, rate, expand, 

470 win_size, win_shift, 

471 thresh_fac, percentile, 

472 min_clip, max_clip, 

473 w_cv_interv, w_ampl, w_cv_ampl, 

474 tolerance, plot_data_func, **kwargs) 

475 return data[start_inx:end_inx], clipped 

476 

477 

478def plot_best_window(data, rate, threshold, peak_idx, trough_idx, idx0, idx1, 

479 win_times, cv_interv, mean_ampl, cv_ampl, clipped_frac, 

480 cost, thresh, win_idx0, win_idx1, ax): 

481 """Visualize the cost function of used for finding the best window for analysis. 

482 

483 Pass this function as the `plot_data_func` to the `best_window_*` functions. 

484 

485 Parameters 

486 ---------- 

487 See documentation of the `best_window_indices()` functions. 

488 """ 

489 # raw data: 

490 time = np.arange(0.0, len(data))/rate 

491 ax[0].plot(time, data, 'b', lw=3) 

492 if np.mean(clipped_frac[win_idx0:win_idx1]) > 0.01: 

493 ax[0].plot(time[idx0:idx1], data[idx0:idx1], color='magenta', lw=3) 

494 else: 

495 ax[0].plot(time[idx0:idx1], data[idx0:idx1], color='grey', lw=3) 

496 ax[0].plot(time[peak_idx], data[peak_idx], 'o', mfc='red', ms=6) 

497 ax[0].plot(time[trough_idx], data[trough_idx], 'o', mfc='green', ms=6) 

498 ax[0].plot(time, threshold, '#CCCCCC', lw=2) 

499 up_lim = np.max(data) * 1.05 

500 down_lim = np.min(data) * .95 

501 ax[0].set_ylim((down_lim, up_lim)) 

502 ax[0].set_ylabel('Amplitude [a.u]') 

503 

504 # cv of inter-peak intervals: 

505 ax[1].plot(win_times[cv_interv < 1000.0], cv_interv[cv_interv < 1000.0], 'o', ms=10, color='grey', mew=2., 

506 mec='black', alpha=0.6) 

507 ax[1].plot(win_times[win_idx0:win_idx1], cv_interv[win_idx0:win_idx1], 'o', ms=10, color='red', mew=2., mec='black', 

508 alpha=0.6) 

509 ax[1].set_ylabel('CV intervals') 

510 ax[1].set_ylim(bottom=0.0) 

511 

512 # mean amplitude: 

513 ax[2].plot(win_times[mean_ampl > 0.0], mean_ampl[mean_ampl > 0.0], 'o', ms=10, color='grey', mew=2., mec='black', 

514 alpha=0.6) 

515 ax[2].plot(win_times[win_idx0:win_idx1], mean_ampl[win_idx0:win_idx1], 'o', ms=10, color='red', mew=2., mec='black', 

516 alpha=0.6) 

517 ax[2].set_ylabel('Mean amplitude [a.u]') 

518 ax[2].set_ylim(bottom=0.0) 

519 

520 # cv: 

521 ax[3].plot(win_times[cv_ampl < 1000.0], cv_ampl[cv_ampl < 1000.0], 'o', ms=10, color='grey', mew=2., mec='black', 

522 alpha=0.6) 

523 ax[3].plot(win_times[win_idx0:win_idx1], cv_ampl[win_idx0:win_idx1], 'o', ms=10, color='red', mew=2., mec='black', 

524 alpha=0.6) 

525 ax[3].set_ylabel('CV amplitude') 

526 ax[3].set_ylim(bottom=0.0) 

527 

528 # cost: 

529 ax[4].plot(win_times[cost < thresh + 10], cost[cost < thresh + 10], 'o', ms=10, color='grey', mew=2., mec='black', 

530 alpha=0.6) 

531 ax[4].plot(win_times[win_idx0:win_idx1], cost[win_idx0:win_idx1], 'o', ms=10, color='red', mew=2., mec='black', 

532 alpha=0.6) 

533 ax[4].axhline(thresh, color='k') 

534 ax[4].set_ylabel('Cost') 

535 ax[4].set_xlabel('Time [sec]') 

536 

537 

538def plot_data_window(ax, data, rate, unit, idx0, idx1, clipped, 

539 data_color='blue', window_color='red'): 

540 """Plot the data and mark the analysis window. 

541 

542 Parameters 

543 ---------- 

544 ax: matplotlib axes 

545 Axes used for plotting. 

546 data: 1-D array 

547 The full data trace. 

548 rate: float 

549 Sampling rate of the data in Hertz. 

550 unit: string 

551 The unit of the data. 

552 idx0: int 

553 Start index of the best window. 

554 idx1: int 

555 Stop index of the best window. 

556 clipped: float 

557 Fraction of clipped peaks. 

558 data_color: 

559 Color used for plotting the data trace. 

560 window_color: 

561 Color used for plotting the selected best window. 

562 """ 

563 time = np.arange(len(data))/rate 

564 ax.plot(time[:idx0], data[:idx0], color=data_color) 

565 ax.plot(time[idx1:], data[idx1:], color=data_color) 

566 if idx1 > idx0: 

567 ax.plot(time[idx0:idx1], data[idx0:idx1], color=window_color) 

568 label = 'analysis\nwindow' 

569 if clipped > 0.0: 

570 label += f'\n{100.0*clipped:.0f}% clipped' 

571 ax.text(time[(idx0+idx1)//2], 0.0, label, ha='center', va='center') 

572 ax.set_xlim(time[0], time[-1]) 

573 ax.set_xlabel('Time [sec]') 

574 if len(unit) == 0 or unit == 'a.u.': 

575 ax.set_ylabel('Amplitude') 

576 else: 

577 ax.set_ylabel(f'Amplitude [{unit}]') 

578 ax.yaxis.set_major_locator(ticker.MaxNLocator(3)) 

579 

580 

581def add_best_window_config(cfg, win_pos='best', win_size=1., win_shift=0.5, 

582 thresh_fac=0.8, percentile=0.1, 

583 min_clip=-np.inf, max_clip=np.inf, 

584 w_cv_interv=1.0, w_ampl=1.0, w_cv_ampl=1.0, 

585 tolerance=0.2, expand=False): 

586 """ Add parameter needed for the `best_window()` functions as a new section to a configuration. 

587 

588 Parameters 

589 ---------- 

590 cfg: ConfigFile 

591 The configuration. 

592  

593 See `best_window_indices()` for details on the remaining arguments. 

594 """ 

595 cfg.add_section('Analysis window:') 

596 cfg.add('windowPosition', win_pos, '', 'Position of the analysis window: "beginning", "center", "end", "best", or a time in seconds.') 

597 cfg.add('windowSize', win_size, 's', 'Size of the best window. This should be much larger than the expected period of the signal. If 0 select the whole time series.') 

598 cfg.add('bestWindowShift', win_shift, 's', 

599 'Increment for shifting the analysis windows trough the data. Should be larger than the expected period of the signal.') 

600 cfg.add('bestWindowThresholdPercentile', percentile, '%', 

601 'Percentile for estimating interpercentile range. Should be smaller than the duty cycle of the periodic signal.') 

602 cfg.add('bestWindowThresholdFactor', thresh_fac, '', 

603 'Threshold for detecting peaks is interperecntile range of the data times this factor.') 

604 cfg.add('weightCVInterval', w_cv_interv, '', 

605 'Weight factor for the coefficient of variation of the inter-peak and inter-trough intervals.') 

606 cfg.add('weightAmplitude', w_ampl, '', 

607 'Weight factor for the mean peak-to-trough amplitudes.') 

608 cfg.add('weightCVAmplitude', w_cv_ampl, '', 

609 'Weight factor for the coefficient of variation of the peak-to-trough amplitude.') 

610 cfg.add('bestWindowTolerance', tolerance, '', 

611 'Add this to the minimum value of the cost function to get a threshold for selecting the largest best window.') 

612 cfg.add('expandBestWindow', expand, '', 

613 'Return the largest valid best window. If False return sole best window. ') 

614 

615 

616def best_window_args(cfg): 

617 """Translates a configuration to the respective parameter names of the functions `best_window*()`. 

618 

619 The return value can then be passed as key-word arguments to these 

620 functions. 

621 

622 Parameters 

623 ---------- 

624 cfg: ConfigFile 

625 The configuration. 

626 

627 Returns 

628 ------- 

629 a: dict 

630 Dictionary with names of arguments of the `best_window*()` functions 

631 and their values as supplied by `cfg`. 

632 """ 

633 return cfg.map({'win_size': 'windowSize', 

634 'win_shift': 'bestWindowShift', 

635 'percentile': 'bestWindowThresholdPercentile', 

636 'thresh_fac': 'bestWindowThresholdFactor', 

637 'w_cv_interv': 'weightCVInterval', 

638 'w_ampl': 'weightAmplitude', 

639 'w_cv_ampl': 'weightCVAmplitude', 

640 'tolerance': 'bestWindowTolerance', 

641 'expand': 'expandBestWindow'}) 

642 

643 

644def analysis_window(data, rate, ampl_max, win_pos, 

645 cfg, show_bestwindow=False): 

646 """Set clipping amplitudes and find analysis window. 

647 

648 Parameters 

649 ---------- 

650 data: 1-D array 

651 The data to be analyzed. 

652 rate: float 

653 Sampling rate of the data in Hertz. 

654 ampl_max: float 

655 Maximum value of input range. 

656 win_pos: string or float 

657 Position of the analysis window: "beginning", "center", "end" or "best". 

658 Alternatively the beginning of the analysis window in seconds. 

659 cfg: ConfigFile 

660 Configuration for clipping and best window. 

661 show_bestwindow: boolean 

662 If true show a plot with the best window cost functions. 

663 

664 Returns 

665 ------- 

666 data: 1-D array 

667 The data array of the best window 

668 idx0: int 

669 The start index of the best window in the original data. 

670 idx1: int 

671 The end index of the best window in the original data. 

672 clipped: float 

673 Fraction of clipped amplitudes. 

674 min_clip: float 

675 Minimum amplitude that is not clipped. 

676 max_clip: float 

677 Maximum amplitude that is not clipped. 

678 """ 

679 found_bestwindow = True 

680 min_clip = cfg.value('minClipAmplitude') 

681 max_clip = cfg.value('maxClipAmplitude') 

682 clipped = 0 

683 if min_clip == 0.0 or max_clip == 0.0: 

684 min_clip, max_clip = clip_amplitudes(data, max_ampl=ampl_max, 

685 **clip_args(cfg, rate)) 

686 if cfg.value('unwrapData'): 

687 unwrap(data, 1.5, ampl_max) 

688 min_clip *= 2 

689 max_clip *= 2 

690 # window size parameter: 

691 bwa = best_window_args(cfg) 

692 if 'win_size' in bwa: 

693 del bwa['win_size'] 

694 window_size = cfg.value('windowSize') 

695 if window_size <= 0.0: 

696 window_size = (len(data)-1)/rate 

697 # show cost function: 

698 if win_pos == 'best' and show_bestwindow: 

699 fig, ax = plt.subplots(5, sharex=True, figsize=(14., 10.)) 

700 try: 

701 idx0, idx1, clipped = best_window_indices(data, rate, 

702 min_clip=min_clip, 

703 max_clip=max_clip, 

704 win_size=window_size, 

705 plot_data_func=plot_best_window, 

706 ax=ax, **bwa) 

707 plt.show() 

708 except UserWarning as e: 

709 found_bestwindow = False 

710 else: 

711 # too little data: 

712 n_win = int(window_size*rate) 

713 if len(data) < n_win: 

714 return data, 0, 0, False, min_clip, max_clip 

715 if win_pos == 'best': 

716 try: 

717 idx0, idx1, clipped = best_window_indices(data, rate, 

718 min_clip=min_clip, 

719 max_clip=max_clip, 

720 win_size=window_size, 

721 **bwa) 

722 except UserWarning as e: 

723 found_bestwindow = False 

724 else: 

725 if win_pos[:5] == 'begin': 

726 idx0 = 0 

727 elif win_pos == 'end': 

728 idx0 = len(data) - n_win 

729 elif win_pos == 'center': 

730 idx0 = (len(data) - n_win)//2 

731 else: 

732 try: 

733 if win_pos[-1] == 's': 

734 win_pos = win_pos[:-1] 

735 t0 = float(win_pos) 

736 except ValueError: 

737 found_bestwindow = False 

738 t0 = 0.0 

739 idx0 = int(t0*rate) 

740 idx1 = idx0 + n_win 

741 if not found_bestwindow or idx1 > len(data): 

742 return data, 0, 0, False, min_clip, max_clip 

743 data_seg = data[idx0:idx1] 

744 # check for clipping: 

745 win_shift = cfg.value('bestWindowShift') 

746 thresh_fac = cfg.value('bestWindowThresholdFactor') 

747 percentile = cfg.value('bestWindowThresholdPercentile') 

748 threshold = percentile_threshold(data_seg, 

749 int(win_shift*rate), 

750 thresh_fac=thresh_fac, 

751 percentile=percentile) 

752 peak_idx, trough_idx = detect_peaks(data_seg, threshold) 

753 p_idx, t_idx = trim_to_peak(peak_idx, trough_idx) 

754 if len(p_idx) > 0: 

755 p2t_ampl = data_seg[p_idx] - data_seg[t_idx] 

756 clipped = float(np.sum(data_seg[p_idx] > max_clip) + 

757 np.sum(data_seg[t_idx] < min_clip))/2/len(p2t_ampl) 

758 if found_bestwindow: 

759 return data[idx0:idx1], idx0, idx1, clipped, min_clip, max_clip 

760 else: 

761 return data, 0, 0, False, min_clip, max_clip 

762 

763 

764def main(data_file=None): 

765 title = 'bestwindow' 

766 if data_file is None: 

767 # generate data: 

768 print('generate waveform...') 

769 rate = 100000.0 

770 time = np.arange(0.0, 1.0, 1.0 / rate) 

771 f = 600.0 

772 snippets = [] 

773 amf = 20.0 

774 for ampl in [0.2, 0.5, 0.8]: 

775 for am_ampl in [0.0, 0.3, 0.9]: 

776 data = ampl * np.sin(2.0 * np.pi * f * time) * (1.0 + am_ampl * np.sin(2.0 * np.pi * amf * time)) 

777 data[data > 1.3] = 1.3 

778 data[data < -1.3] = -1.3 

779 snippets.extend(data) 

780 data = np.asarray(snippets) 

781 title = 'test sines' 

782 data += 0.01 * np.random.randn(len(data)) 

783 else: 

784 from thunderlab.dataloader import load_data 

785 print(f'load {data_file} ...') 

786 data, rate, unit, amax = load_data(data_file) 

787 data = data[:,0] 

788 title = data_file 

789 

790 # determine clipping amplitudes: 

791 clip_win_size = 0.5 

792 min_clip_fac = 2.0 

793 min_clip, max_clip = clip_amplitudes(data, int(clip_win_size * rate), 

794 min_fac=min_clip_fac) 

795 # min_clip, max_clip = clip_amplitudes(data, int(clip_win_size*rate), 

796 # min_fac=min_clip_fac, 

797 # plot_hist_func=plot_clipping) 

798 

799 # setup plots: 

800 fig, ax = plt.subplots(5, 1, sharex=True, figsize=(20, 12)) 

801 fig.canvas.manager.set_window_title(title) 

802 

803 # compute best window: 

804 best_window_indices(data, rate, expand=False, win_size=4.0, 

805 win_shift=0.5, thresh_fac=0.8, percentile=0.1, 

806 min_clip=min_clip, max_clip=max_clip, 

807 w_cv_ampl=10.0, tolerance=0.5, 

808 plot_data_func=plot_best_window, ax=ax) 

809 

810 plt.tight_layout() 

811 plt.show() 

812 

813 

814if __name__ == '__main__': 

815 import sys 

816 data_file = sys.argv[1] if len(sys.argv) > 1 else None 

817 main(data_file)