Coverage for src/thunderfish/bestwindow.py: 84%

284 statements  

« prev     ^ index     » next       coverage.py v7.5.0, created at 2024-04-29 16:21 +0000

1""" 

2Select the best region within a recording with the most stable signal of largest amplitude that is not clipped. 

3 

4## Main functions 

5- `clip_amplitudes()`: estimated clipping amplitudes from the data. 

6- `best_window_indices()`: select start- and end-indices of the best window 

7- `best_window_times()`: select start end end-time of the best window 

8- `best_window()`: return data of the best window 

9- `analysis_window()`: set clipping amplitudes and find analysis window. 

10 

11## Configuration 

12- `add_clip_config()`: add parameters for `clip_amplitudes()` to configuration. 

13- `clip_args()`: retrieve parameters for `clip_amplitudes()` from configuration. 

14- `add_best_window_config()`: add parameters for `best_window()` to configuration. 

15- `best_window_args()`: retrieve parameters for `best_window*()` from configuration. 

16 

17## Visualization 

18- `plot_clipping()`: visualization of the algorithm for detecting clipped amplitudes in `clip_amplitudes()`. 

19- `plot_best_window()`: visualization of the algorithm used in `best_window_indices()`. 

20- `plot_data_window()`: plot the data and the selected analysis window. 

21""" 

22 

23import numpy as np 

24from thunderlab.eventdetection import percentile_threshold, detect_peaks, trim_to_peak 

25from audioio import unwrap 

26try: 

27 import matplotlib.pyplot as plt 

28 import matplotlib.ticker as ticker 

29except ImportError: 

30 pass 

31 

32 

33def clip_amplitudes(data, win_indices, min_fac=2.0, nbins=20, 

34 min_ampl=-1.0, max_ampl=1.0, 

35 plot_hist_func=None, **kwargs): 

36 """Find the amplitudes where the signal clips. 

37 

38 Histograms in data segements of win_indices length are analyzed. 

39 If the bins at the edges are more than min_fac times as large as 

40 the neighboring bins, clipping at the bin's amplitude is assumed. 

41 

42 Parameters 

43 ---------- 

44 data: 1-D array 

45 The data. 

46 win_indices: int 

47 Size of the analysis window in indices. 

48 min_fac: float 

49 If the first or the second bin is at least `min_fac` times 

50 as large as the third bin, their upper bin edge is set as min_clip. 

51 Likewise for the last and next-to last bin. 

52 nbins: int 

53 Number of bins used for computing a histogram within `min_ampl` and `max_ampl`. 

54 min_ampl: float 

55 Minimum to be expected amplitude of the data. 

56 max_ampl: float 

57 Maximum to be expected amplitude of the data 

58 plot_hist_func: function 

59 Function for visualizing the histograms, is called for every window. 

60 `plot_clipping()` is a simple function that can be passed as `plot_hist_func` 

61 to quickly visualize what is going on in `clip_amplitudes()`. 

62  

63 Signature: 

64 

65 `plot_hist_func(data, winx0, winx1, bins, h, min_clip, max_clip, 

66 min_ampl, max_ampl, kwargs)` 

67 

68 with the arguments: 

69  

70 - `data` (array): the full data array. 

71 - `winx0` (int): the start index of the current window. 

72 - `winx1` (int): the end index of the current window. 

73 - `bins` (array): the bin edges of the histogram. 

74 - `h` (array): the histogram, plot it with 

75 ``` 

76 plt.bar(bins[:-1], h, width=np.mean(np.diff(bins))) 

77 ``` 

78 - `min_clip` (float): the current value of the minimum clip amplitude. 

79 - `max_clip` (float): the current value of the minimum clip amplitude. 

80 - `min_ampl` (float): the minimum amplitude of the data. 

81 - `max_ampl` (float): the maximum amplitude of the data. 

82 - `kwargs` (dict): further user supplied key-word arguments. 

83 

84 Returns 

85 ------- 

86 min_clip: float 

87 Minimum amplitude that is not clipped. 

88 max_clip: float 

89 Maximum amplitude that is not clipped. 

90 """ 

91 

92 min_clipa = min_ampl 

93 max_clipa = max_ampl 

94 bins = np.linspace(min_ampl, max_ampl, nbins, endpoint=True) 

95 win_tinxs = np.arange(0, len(data) - win_indices, win_indices) 

96 for wtinx in win_tinxs: 

97 h, b = np.histogram(data[wtinx:wtinx + win_indices], bins) 

98 if h[0] > min_fac * h[2] and b[0] < 0.4*min_ampl: 

99 if h[1] > min_fac * h[2] and b[2] > min_clipa: 

100 min_clipa = b[2] 

101 elif b[1] > min_clipa: 

102 min_clipa = b[1] 

103 if h[-1] > min_fac * h[-3] and b[-1] > 0.4*max_ampl: 

104 if h[-2] > min_fac * h[-3] and b[-3] < max_clipa: 

105 max_clipa = b[-3] 

106 elif b[-2] < max_clipa: 

107 max_clipa = b[-2] 

108 if plot_hist_func: 

109 plot_hist_func(data, wtinx, wtinx + win_indices, 

110 b, h, min_clipa, max_clipa, 

111 min_ampl, max_ampl, **kwargs) 

112 return min_clipa, max_clipa 

113 

114 

115def plot_clipping(data, winx0, winx1, bins, 

116 h, min_clip, max_clip, min_ampl, max_ampl): 

117 """Visualize the data histograms and the detected clipping amplitudes. 

118 

119 Pass this function as the `plot_hist_func` argument to `clip_amplitudes()`. 

120 """ 

121 fig, (ax1, ax2) = plt.subplots(2, 1) 

122 ax1.plot(data[winx0:winx1], 'b') 

123 ax1.axhline(min_clip, color='r') 

124 ax1.axhline(max_clip, color='r') 

125 ax1.set_ylim(-1.0, 1.0) 

126 ax2.bar(bins[:-1], h, width=np.mean(np.diff(bins))) 

127 ax2.axvline(min_clip, color='r') 

128 ax2.axvline(max_clip, color='r') 

129 ax2.set_xlim(-1.0, 1.0) 

130 plt.show() 

131 

132 

133def add_clip_config(cfg, min_clip=0.0, max_clip=0.0, 

134 window=1.0, min_fac=2.0, nbins=20, 

135 min_ampl=-1.0, max_ampl=1.0): 

136 """Add parameter needed for `clip_amplitudes()` as a new section to a configuration. 

137 

138 Parameters 

139 ---------- 

140 cfg: ConfigFile 

141 The configuration. 

142 min_clip: float 

143 Default minimum clip amplitude. 

144 max_clip: float 

145 Default maximum clip amplitude. 

146  

147 See `clip_amplitudes()` for details on the remaining arguments. 

148 """ 

149 cfg.add_section('Clipping amplitudes:') 

150 cfg.add('minClipAmplitude', min_clip, '', 'Minimum amplitude that is not clipped. If zero estimate from data.') 

151 cfg.add('maxClipAmplitude', max_clip, '', 'Maximum amplitude that is not clipped. If zero estimate from data.') 

152 cfg.add('clipWindow', window, 's', 'Window size for estimating clip amplitudes.') 

153 cfg.add('clipBins', nbins, '', 'Number of bins used for constructing histograms of signal amplitudes.') 

154 cfg.add('minClipFactor', min_fac, '', 

155 'Edge bins of the histogram of clipped signals have to be larger then their neighbors by this factor.') 

156 cfg.add('minDataAmplitude', min_ampl, '', 'Minimum amplitude that is to be expected in the data.') 

157 cfg.add('maxDataAmplitude', max_ampl, '', 'Maximum amplitude that is to be expected in the data.') 

158 

159 

160def clip_args(cfg, rate): 

161 """Translates a configuration to the respective parameter names of `clip_amplitudes()`. 

162 

163 The return value can then be passed as key-word arguments to this 

164 function. 

165 

166 Parameters 

167 ---------- 

168 cfg: ConfigFile 

169 The configuration. 

170 rate: float 

171 The sampling rate of the data. 

172 

173 Returns 

174 ------- 

175 a: dict 

176 Dictionary with names of arguments of the `clip_amplitudes()` function 

177 and their values as supplied by `cfg`. 

178 """ 

179 a = cfg.map({'min_fac': 'minClipFactor', 

180 'nbins': 'clipBins', 

181 'min_ampl': 'minDataAmplitude', 

182 'max_ampl': 'maxDataAmplitude'}) 

183 a['win_indices'] = int(cfg.value('clipWindow') * rate) 

184 return a 

185 

186 

187def best_window_indices(data, samplerate, expand=False, win_size=1., win_shift=0.5, 

188 thresh_fac=0.8, percentile=0.1, min_clip=-np.inf, max_clip=np.inf, 

189 w_cv_interv=1.0, w_ampl=1.0, w_cv_ampl=1.0, tolerance=0.2, 

190 plot_data_func=None, **kwargs): 

191 """Find the window within data most suitable for subsequent analysis. 

192  

193 First, large peaks and troughs of the data are detected. Peaks and 

194 troughs have to be separated in amplitude by at least the value of a 

195 dynamic threshold. The threshold is computed in `win_shift` wide 

196 windows as `thresh_fac` times the interpercentile range at 

197 the `percentile`-th and 100.0-`percentile`-th percentile of the data 

198 using the `thunderlab.eventdetection.percentile_threshold()` function. 

199 

200 Second, criteria for selecting the best window are computed for each 

201 window of width `win_size` shifted by `win_shift` trough the data. The 

202 three criteria are: 

203 

204 - the mean peak-to-trough amplitude multiplied with the fraction of 

205 non clipped peak and trough amplitudes. 

206 - the coefficient of variation of the peak-to-trough amplitude. 

207 - the coefficient of variation of the inter-peak and inter-trough 

208 intervals. 

209 

210 Third, a cost function is computed as a weighted sum of the three 

211 criteria (the mean amplitude is taken negatively). The respective 

212 weights are given by `w_ampl`, `w_cv_ampl`, and `w_cv_interv`. 

213 

214 Finally, a threshold is set to the minimum value of the cost 

215 function plus tolerance. Then the largest region with the cost 

216 function below this threshold is selected as the best window. If 

217 `expand` is `False`, then only the single window with smallest cost 

218 within the selected largest region is returned. 

219 

220 The data used by best window algorithm can be visualized by supplying the 

221 function `plot_data_func`. Additional arguments for this function can 

222 be supplied via key-word arguments `kwargs`. 

223 

224 Parameters 

225 ---------- 

226 data: 1-D array 

227 The data to be analyzed. 

228 samplerate: float 

229 Sampling rate of the data in Hertz. 

230 expand: boolean 

231 If `False` return only the single window with the smallest cost. 

232 If `True` return the largest window with the cost below the minimum cost 

233 plus tolerance. 

234 win_size: float 

235 Minimum size of the desired best window in seconds. 

236 Choose it large enough for the subsequent analysis. 

237 win_shift: float 

238 Time shift in seconds between windows. Should be smaller or equal to `win_size`. 

239 percentile: float 

240 `percentile` parameter for the `thunderlab.eventdetection.percentile_threshold()` function 

241 used to estimate thresholds for detecting peaks in the data. 

242 thresh_fac: float 

243 `thresh_fac` parameter for the `thunderlab.eventdetection.percentile_threshold()` function 

244 used to estimate thresholds for detecting peaks in the data. 

245 min_clip: float 

246 Minimum amplitude below which data are clipped. 

247 max_clip: float 

248 Maximum amplitude above which data are clipped. 

249 w_cv_interv: float 

250 Weight for the coefficient of variation of the intervals between detected 

251 peaks and throughs. 

252 w_ampl: float 

253 Weight for the mean peak-to-trough amplitude. 

254 w_cv_ampl: float 

255 Weight for the coefficient of variation of the amplitudes. 

256 tolerance: float 

257 Added to the minimum cost for expanding the region of the best window. 

258 plot_data_func: function 

259 Function for plotting the raw data, detected peaks and troughs, the criteria, 

260 the cost function and the selected best window. 

261 `plot_best_window()` is a simple function that can be passed as the `plot_data_func` 

262 parameter to quickly visualize what is going on in selecting the best window. 

263  

264 Signature: 

265  

266 ```` 

267 plot_data_func(data, rate, peak_thresh, peak_idx, trough_idx, idx0, idx1, 

268 win_start_times, cv_interv, mean_ampl, cv_ampl, clipped_frac, cost_thresh, 

269 thresh, valid_wins, **kwargs) 

270 ``` 

271 

272 with the arguments: 

273  

274 - `data` (array): raw data. 

275 - `rate` (float): sampling rate of the data. 

276 - `peak_thresh` (array): thresholds used for detecting peaks and troughs in each data window. 

277 - `peak_idx` (array): indices into raw data indicating detected peaks. 

278 - `trough_idx` (array): indices into raw data indicating detected troughs. 

279 - `idx0` (int): index of the start of the best window. 

280 - `idx1` (int): index of the end of the best window. 

281 - `win_start_times` (array): times of the analysis windows. 

282 - `cv_interv` (array): coefficients of variation of the inter-peak and -trough 

283 intervals. 

284 - `mean_ampl` (array): mean peak-to-trough amplitudes. 

285 - `cv_ampl` (array): coefficients of variation of the peak-to-trough amplitudes. 

286 - `clipped_frac` (array): fraction of clipped peaks or troughs. 

287 - `cost` (array): cost function. 

288 - `cost_thresh` (float): threshold for the cost function. 

289 - `valid_wins` (array): boolean array indicating the windows which fulfill 

290 all three criteria. 

291 - `**kwargs` (dict): further user supplied key-word arguments. 

292 kwargs: dict 

293 Keyword arguments passed to `plot_data_func`.  

294  

295 Returns 

296 ------- 

297 start_index: int 

298 Index of the start of the best window. 

299 end_index: int 

300 Index of the end of the best window. 

301 clipped: float. 

302 The fraction of clipped peaks or troughs. 

303 

304 Raises 

305 ------ 

306 UserWarning 

307 - Not enough data for requested `win_size`. 

308 - No peaks detected. 

309 - No finite amplitudes detected. 

310 - No valid interval CV detected. 

311 - No valid amplitude CV detected. 

312 """ 

313 # too little data: 

314 if len(data) / samplerate < win_size: 

315 raise UserWarning(f'not enough data (data={len(data) / samplerate:g}s, win={win_size:g}s)') 

316 

317 # threshold for peak detection: 

318 threshold = percentile_threshold(data, int(win_shift*samplerate), 

319 thresh_fac=thresh_fac, 

320 percentile=percentile) 

321 

322 # detect large peaks and troughs: 

323 peak_idx, trough_idx = detect_peaks(data, threshold) 

324 if len(peak_idx) == 0 or len(trough_idx) == 0: 

325 raise UserWarning('no peaks or troughs detected') 

326 

327 # compute cv of intervals, mean peak amplitude and its cv: 

328 invalid_cv = 1000.0 

329 win_size_indices = int(win_size * samplerate) 

330 win_start_inxs = np.arange(0, len(data) - win_size_indices, 

331 int(0.5*win_shift*samplerate)) 

332 if len(win_start_inxs) == 0: 

333 win_start_inxs = [0] 

334 cv_interv = np.zeros(len(win_start_inxs)) 

335 mean_ampl = np.zeros(len(win_start_inxs)) 

336 cv_ampl = np.zeros(len(win_start_inxs)) 

337 clipped_frac = np.zeros(len(win_start_inxs)) 

338 for i, wtinx in enumerate(win_start_inxs): 

339 # indices of peaks and troughs inside analysis window: 

340 pinx = (peak_idx >= wtinx) & (peak_idx <= wtinx + win_size_indices) 

341 tinx = (trough_idx >= wtinx) & (trough_idx <= wtinx + win_size_indices) 

342 p_idx, t_idx = trim_to_peak(peak_idx[pinx], trough_idx[tinx]) 

343 # interval statistics: 

344 ipis = np.diff(p_idx) 

345 itis = np.diff(t_idx) 

346 if len(ipis) > 2: 

347 cv_interv[i] = 0.5 * (np.std(ipis) / np.mean(ipis) + np.std(itis) / np.mean(itis)) 

348 # penalize regions without detected peaks: 

349 mean_interv = np.mean(ipis) 

350 if p_idx[0] - wtinx > mean_interv: 

351 cv_interv[i] *= (p_idx[0] - wtinx) / mean_interv 

352 if wtinx + win_size_indices - p_idx[-1] > mean_interv: 

353 cv_interv[i] *= (wtinx + win_size_indices - p_idx[-1]) / mean_interv 

354 else: 

355 cv_interv[i] = invalid_cv 

356 # statistics of peak-to-trough amplitude: 

357 p2t_ampl = data[p_idx] - data[t_idx] 

358 if len(p2t_ampl) > 2: 

359 mean_ampl[i] = np.mean(p2t_ampl) 

360 cv_ampl[i] = np.std(p2t_ampl) / mean_ampl[i] 

361 # penalize for clipped peaks: 

362 clipped_frac[i] = float(np.sum(data[p_idx] > max_clip) + 

363 np.sum(data[t_idx] < min_clip)) / 2.0 / len(p2t_ampl) 

364 mean_ampl[i] *= (1.0 - clipped_frac[i]) ** 2.0 

365 else: 

366 mean_ampl[i] = 0.0 

367 cv_ampl[i] = invalid_cv 

368 

369 # check: 

370 if len(mean_ampl[mean_ampl >= 0.0]) < 0: 

371 raise UserWarning('no finite amplitudes detected') 

372 if len(cv_interv[cv_interv < invalid_cv]) <= 0: 

373 raise UserWarning('no valid interval cv detected') 

374 if len(cv_ampl[cv_ampl < invalid_cv]) <= 0: 

375 raise UserWarning('no valid amplitude cv detected') 

376 

377 # cost function: 

378 cost = w_cv_interv * cv_interv + w_cv_ampl * cv_ampl - w_ampl * mean_ampl 

379 thresh = np.min(cost) + tolerance 

380 

381 # find largest region with low costs: 

382 valid_win_idx = np.nonzero(cost <= thresh)[0] 

383 cidx0 = valid_win_idx[0] # start of current window 

384 cidx1 = cidx0 + 1 # end of current window 

385 win_idx0 = cidx0 # start of largest window 

386 win_idx1 = cidx1 # end of largest window 

387 i = 1 

388 while i < len(valid_win_idx): # loop through all valid window positions 

389 if valid_win_idx[i] == valid_win_idx[i - 1] + 1: 

390 cidx1 = valid_win_idx[i] + 1 

391 else: 

392 cidx0 = valid_win_idx[i] 

393 if cidx1 - cidx0 > win_idx1 - win_idx0: # current window is largest 

394 win_idx0 = cidx0 

395 win_idx1 = cidx1 

396 i += 1 

397 

398 # find single best window within the largest region: 

399 if not expand: 

400 win_idx0 += np.argmin(cost[win_idx0:win_idx1]) 

401 win_idx1 = win_idx0 + 1 

402 

403 # retrive indices of best window for data: 

404 idx0 = win_start_inxs[win_idx0] 

405 idx1 = win_start_inxs[win_idx1 - 1] + win_size_indices 

406 

407 # clipped data? 

408 clipped = np.mean(clipped_frac[win_idx0:win_idx1]) 

409 

410 if plot_data_func: 

411 plot_data_func(data, samplerate, threshold, peak_idx, trough_idx, idx0, idx1, 

412 win_start_inxs / samplerate, cv_interv, mean_ampl, cv_ampl, clipped_frac, 

413 cost, thresh, win_idx0, win_idx1, **kwargs) 

414 

415 return idx0, idx1, clipped 

416 

417 

418def best_window_times(data, samplerate, expand=False, win_size=1., win_shift=0.5, 

419 thresh_fac=0.8, percentile=0.1, min_clip=-np.inf, max_clip=np.inf, 

420 w_cv_interv=1.0, w_ampl=1.0, w_cv_ampl=1.0, tolerance=0.2, 

421 plot_data_func=None, **kwargs): 

422 """Find the window within data most suitable for subsequent analysis. 

423 

424 See `best_window_indices()` for details. 

425 

426 Returns 

427 ------- 

428 start_time: float 

429 Time of the start of the best window. 

430 end_time: float 

431 Time of the end of the best window. 

432 clipped: float 

433 The fraction of clipped peaks or troughs. 

434 """ 

435 start_inx, end_inx, clipped = best_window_indices(data, samplerate, expand, 

436 win_size, win_shift, 

437 thresh_fac, percentile, 

438 min_clip, max_clip, 

439 w_cv_interv, w_ampl, w_cv_ampl, tolerance, 

440 plot_data_func, **kwargs) 

441 return start_inx / samplerate, end_inx / samplerate, clipped 

442 

443 

444def best_window(data, samplerate, expand=False, win_size=1., win_shift=0.5, 

445 thresh_fac=0.8, percentile=0.1, min_clip=-np.inf, max_clip=np.inf, 

446 w_cv_interv=1.0, w_ampl=1.0, w_cv_ampl=1.0, tolerance=0.2, 

447 plot_data_func=None, **kwargs): 

448 """Find the window within data most suitable for subsequent analysis. 

449 

450 See `best_window_indices()` for details. 

451 

452 Returns 

453 ------- 

454 data: array 

455 The data of the best window. 

456 clipped: float 

457 The fraction of clipped peaks or troughs. 

458 """ 

459 start_inx, end_inx, clipped = best_window_indices(data, samplerate, expand, 

460 win_size, win_shift, 

461 thresh_fac, percentile, 

462 min_clip, max_clip, 

463 w_cv_interv, w_ampl, w_cv_ampl, 

464 tolerance, plot_data_func, **kwargs) 

465 return data[start_inx:end_inx], clipped 

466 

467 

468def plot_best_window(data, rate, threshold, peak_idx, trough_idx, idx0, idx1, 

469 win_times, cv_interv, mean_ampl, cv_ampl, clipped_frac, 

470 cost, thresh, win_idx0, win_idx1, ax): 

471 """Visualize the cost function of used for finding the best window for analysis. 

472 

473 Pass this function as the `plot_data_func` to the `best_window_*` functions. 

474 

475 Parameters 

476 ---------- 

477 See documentation of the `best_window_indices()` functions. 

478 """ 

479 # raw data: 

480 time = np.arange(0.0, len(data)) / rate 

481 ax[0].plot(time, data, 'b', lw=3) 

482 if np.mean(clipped_frac[win_idx0:win_idx1]) > 0.01: 

483 ax[0].plot(time[idx0:idx1], data[idx0:idx1], color='magenta', lw=3) 

484 else: 

485 ax[0].plot(time[idx0:idx1], data[idx0:idx1], color='grey', lw=3) 

486 ax[0].plot(time[peak_idx], data[peak_idx], 'o', mfc='red', ms=6) 

487 ax[0].plot(time[trough_idx], data[trough_idx], 'o', mfc='green', ms=6) 

488 ax[0].plot(time, threshold, '#CCCCCC', lw=2) 

489 up_lim = np.max(data) * 1.05 

490 down_lim = np.min(data) * .95 

491 ax[0].set_ylim((down_lim, up_lim)) 

492 ax[0].set_ylabel('Amplitude [a.u]') 

493 

494 # cv of inter-peak intervals: 

495 ax[1].plot(win_times[cv_interv < 1000.0], cv_interv[cv_interv < 1000.0], 'o', ms=10, color='grey', mew=2., 

496 mec='black', alpha=0.6) 

497 ax[1].plot(win_times[win_idx0:win_idx1], cv_interv[win_idx0:win_idx1], 'o', ms=10, color='red', mew=2., mec='black', 

498 alpha=0.6) 

499 ax[1].set_ylabel('CV intervals') 

500 ax[1].set_ylim(bottom=0.0) 

501 

502 # mean amplitude: 

503 ax[2].plot(win_times[mean_ampl > 0.0], mean_ampl[mean_ampl > 0.0], 'o', ms=10, color='grey', mew=2., mec='black', 

504 alpha=0.6) 

505 ax[2].plot(win_times[win_idx0:win_idx1], mean_ampl[win_idx0:win_idx1], 'o', ms=10, color='red', mew=2., mec='black', 

506 alpha=0.6) 

507 ax[2].set_ylabel('Mean amplitude [a.u]') 

508 ax[2].set_ylim(bottom=0.0) 

509 

510 # cv: 

511 ax[3].plot(win_times[cv_ampl < 1000.0], cv_ampl[cv_ampl < 1000.0], 'o', ms=10, color='grey', mew=2., mec='black', 

512 alpha=0.6) 

513 ax[3].plot(win_times[win_idx0:win_idx1], cv_ampl[win_idx0:win_idx1], 'o', ms=10, color='red', mew=2., mec='black', 

514 alpha=0.6) 

515 ax[3].set_ylabel('CV amplitude') 

516 ax[3].set_ylim(bottom=0.0) 

517 

518 # cost: 

519 ax[4].plot(win_times[cost < thresh + 10], cost[cost < thresh + 10], 'o', ms=10, color='grey', mew=2., mec='black', 

520 alpha=0.6) 

521 ax[4].plot(win_times[win_idx0:win_idx1], cost[win_idx0:win_idx1], 'o', ms=10, color='red', mew=2., mec='black', 

522 alpha=0.6) 

523 ax[4].axhline(thresh, color='k') 

524 ax[4].set_ylabel('Cost') 

525 ax[4].set_xlabel('Time [sec]') 

526 

527 

528def plot_data_window(ax, data, samplerate, unit, idx0, idx1, clipped, 

529 data_color='blue', window_color='red'): 

530 """Plot the data and mark the analysis window. 

531 

532 Parameters 

533 ---------- 

534 ax: matplotlib axes 

535 Axes used for plotting. 

536 data: 1-D array 

537 The full data trace. 

538 samplerate: float 

539 Sampling rate of the data in Hertz. 

540 unit: string 

541 The unit of the data. 

542 idx0: int 

543 Start index of the best window. 

544 idx1: int 

545 Stop index of the best window. 

546 clipped: float 

547 Fraction of clipped peaks. 

548 data_color: 

549 Color used for plotting the data trace. 

550 window_color: 

551 Color used for plotting the selected best window. 

552 """ 

553 time = np.arange(len(data)) / samplerate 

554 ax.plot(time[:idx0], data[:idx0], color=data_color) 

555 ax.plot(time[idx1:], data[idx1:], color=data_color) 

556 if idx1 > idx0: 

557 ax.plot(time[idx0:idx1], data[idx0:idx1], color=window_color) 

558 label = 'analysis\nwindow' 

559 if clipped > 0.0: 

560 label += f'\n{100.0*clipped:.0f}% clipped' 

561 ax.text(time[(idx0+idx1)//2], 0.0, label, ha='center', va='center') 

562 ax.set_xlim(time[0], time[-1]) 

563 ax.set_xlabel('Time [sec]') 

564 if len(unit) == 0 or unit == 'a.u.': 

565 ax.set_ylabel('Amplitude') 

566 else: 

567 ax.set_ylabel(f'Amplitude [{unit}]') 

568 ax.yaxis.set_major_locator(ticker.MaxNLocator(3)) 

569 

570 

571def add_best_window_config(cfg, win_pos='best', win_size=1., win_shift=0.5, 

572 thresh_fac=0.8, percentile=0.1, 

573 min_clip=-np.inf, max_clip=np.inf, 

574 w_cv_interv=1.0, w_ampl=1.0, w_cv_ampl=1.0, 

575 tolerance=0.2, expand=False): 

576 """ Add parameter needed for the `best_window()` functions as a new section to a configuration. 

577 

578 Parameters 

579 ---------- 

580 cfg: ConfigFile 

581 The configuration. 

582  

583 See `best_window_indices()` for details on the remaining arguments. 

584 """ 

585 cfg.add_section('Analysis window:') 

586 cfg.add('windowPosition', win_pos, '', 'Position of the analysis window: "beginning", "center", "end", "best", or a time in seconds.') 

587 cfg.add('windowSize', win_size, 's', 'Size of the best window. This should be much larger than the expected period of the signal. If 0 select the whole time series.') 

588 cfg.add('bestWindowShift', win_shift, 's', 

589 'Increment for shifting the analysis windows trough the data. Should be larger than the expected period of the signal.') 

590 cfg.add('bestWindowThresholdPercentile', percentile, '%', 

591 'Percentile for estimating interpercentile range. Should be smaller than the duty cycle of the periodic signal.') 

592 cfg.add('bestWindowThresholdFactor', thresh_fac, '', 

593 'Threshold for detecting peaks is interperecntile range of the data times this factor.') 

594 cfg.add('weightCVInterval', w_cv_interv, '', 

595 'Weight factor for the coefficient of variation of the inter-peak and inter-trough intervals.') 

596 cfg.add('weightAmplitude', w_ampl, '', 

597 'Weight factor for the mean peak-to-trough amplitudes.') 

598 cfg.add('weightCVAmplitude', w_cv_ampl, '', 

599 'Weight factor for the coefficient of variation of the peak-to-trough amplitude.') 

600 cfg.add('bestWindowTolerance', tolerance, '', 

601 'Add this to the minimum value of the cost function to get a threshold for selecting the largest best window.') 

602 cfg.add('expandBestWindow', expand, '', 

603 'Return the largest valid best window. If False return sole best window. ') 

604 

605 

606def best_window_args(cfg): 

607 """Translates a configuration to the respective parameter names of the functions `best_window*()`. 

608 

609 The return value can then be passed as key-word arguments to these 

610 functions. 

611 

612 Parameters 

613 ---------- 

614 cfg: ConfigFile 

615 The configuration. 

616 

617 Returns 

618 ------- 

619 a: dict 

620 Dictionary with names of arguments of the `best_window*()` functions 

621 and their values as supplied by `cfg`. 

622 """ 

623 return cfg.map({'win_size': 'windowSize', 

624 'win_shift': 'bestWindowShift', 

625 'percentile': 'bestWindowThresholdPercentile', 

626 'thresh_fac': 'bestWindowThresholdFactor', 

627 'w_cv_interv': 'weightCVInterval', 

628 'w_ampl': 'weightAmplitude', 

629 'w_cv_ampl': 'weightCVAmplitude', 

630 'tolerance': 'bestWindowTolerance', 

631 'expand': 'expandBestWindow'}) 

632 

633 

634def analysis_window(data, samplerate, ampl_max, win_pos, 

635 cfg, show_bestwindow=False): 

636 """Set clipping amplitudes and find analysis window. 

637 

638 Parameters 

639 ---------- 

640 data: 1-D array 

641 The data to be analyzed. 

642 samplerate: float 

643 Sampling rate of the data in Hertz. 

644 ampl_max: float 

645 Maximum value of input range. 

646 win_pos: string or float 

647 Position of the analysis window: "beginning", "center", "end" or "best". 

648 Alternatively the beginning of the analysis window in seconds. 

649 cfg: ConfigFile 

650 Configuration for clipping and best window. 

651 show_bestwindow: boolean 

652 If true show a plot with the best window cost functions. 

653 

654 Returns 

655 ------- 

656 data: 1-D array 

657 The data array of the best window 

658 idx0: int 

659 The start index of the best window in the original data. 

660 idx1: int 

661 The end index of the best window in the original data. 

662 clipped: float 

663 Fraction of clipped amplitudes. 

664 min_clip: float 

665 Minimum amplitude that is not clipped. 

666 max_clip: float 

667 Maximum amplitude that is not clipped. 

668 """ 

669 found_bestwindow = True 

670 min_clip = cfg.value('minClipAmplitude') 

671 max_clip = cfg.value('maxClipAmplitude') 

672 clipped = 0 

673 if min_clip == 0.0 or max_clip == 0.0: 

674 min_clip, max_clip = clip_amplitudes(data, **clip_args(cfg, samplerate)) 

675 if cfg.value('unwrapData'): 

676 unwrap(data, 1.5, ampl_max) 

677 min_clip *= 2 

678 max_clip *= 2 

679 # window size parameter: 

680 bwa = best_window_args(cfg) 

681 if 'win_size' in bwa: 

682 del bwa['win_size'] 

683 window_size = cfg.value('windowSize') 

684 if window_size <= 0.0: 

685 window_size = (len(data)-1)/samplerate 

686 # show cost function: 

687 if win_pos == 'best' and show_bestwindow: 

688 fig, ax = plt.subplots(5, sharex=True, figsize=(14., 10.)) 

689 try: 

690 idx0, idx1, clipped = best_window_indices(data, samplerate, 

691 min_clip=min_clip, max_clip=max_clip, 

692 win_size=window_size, 

693 plot_data_func=plot_best_window, ax=ax, 

694 **bwa) 

695 plt.show() 

696 except UserWarning as e: 

697 found_bestwindow = False 

698 else: 

699 # too little data: 

700 n_win = int(window_size*samplerate) 

701 if len(data) < n_win: 

702 return data, 0, 0, False, min_clip, max_clip 

703 if win_pos == 'best': 

704 try: 

705 idx0, idx1, clipped = best_window_indices(data, samplerate, 

706 min_clip=min_clip, 

707 max_clip=max_clip, 

708 win_size=window_size, 

709 **bwa) 

710 except UserWarning as e: 

711 found_bestwindow = False 

712 else: 

713 if win_pos[:5] == 'begin': 

714 idx0 = 0 

715 elif win_pos == 'end': 

716 idx0 = len(data) - n_win 

717 elif win_pos == 'center': 

718 idx0 = (len(data) - n_win)//2 

719 else: 

720 try: 

721 if win_pos[-1] == 's': 

722 win_pos = win_pos[:-1] 

723 t0 = float(win_pos) 

724 except ValueError: 

725 found_bestwindow = False 

726 t0 = 0.0 

727 idx0 = int(t0*samplerate) 

728 idx1 = idx0 + n_win 

729 if not found_bestwindow or idx1 > len(data): 

730 return data, 0, 0, False, min_clip, max_clip 

731 data_seg = data[idx0:idx1] 

732 # check for clipping: 

733 win_shift = cfg.value('bestWindowShift') 

734 thresh_fac = cfg.value('bestWindowThresholdFactor') 

735 percentile = cfg.value('bestWindowThresholdPercentile') 

736 threshold = percentile_threshold(data_seg, 

737 int(win_shift*samplerate), 

738 thresh_fac=thresh_fac, 

739 percentile=percentile) 

740 peak_idx, trough_idx = detect_peaks(data_seg, threshold) 

741 p_idx, t_idx = trim_to_peak(peak_idx, trough_idx) 

742 if len(p_idx) > 0: 

743 p2t_ampl = data_seg[p_idx] - data_seg[t_idx] 

744 clipped = float(np.sum(data_seg[p_idx] > max_clip) + 

745 np.sum(data_seg[t_idx] < min_clip))/2/len(p2t_ampl) 

746 if found_bestwindow: 

747 return data[idx0:idx1], idx0, idx1, clipped, min_clip, max_clip 

748 else: 

749 return data, 0, 0, False, min_clip, max_clip 

750 

751 

752def main(data_file=None): 

753 title = 'bestwindow' 

754 if data_file is None: 

755 # generate data: 

756 print('generate waveform...') 

757 rate = 100000.0 

758 time = np.arange(0.0, 1.0, 1.0 / rate) 

759 f = 600.0 

760 snippets = [] 

761 amf = 20.0 

762 for ampl in [0.2, 0.5, 0.8]: 

763 for am_ampl in [0.0, 0.3, 0.9]: 

764 data = ampl * np.sin(2.0 * np.pi * f * time) * (1.0 + am_ampl * np.sin(2.0 * np.pi * amf * time)) 

765 data[data > 1.3] = 1.3 

766 data[data < -1.3] = -1.3 

767 snippets.extend(data) 

768 data = np.asarray(snippets) 

769 title = 'test sines' 

770 data += 0.01 * np.random.randn(len(data)) 

771 else: 

772 from thunderlab.dataloader import load_data 

773 print(f'load {data_file} ...') 

774 data, rate, unit, amax = load_data(data_file) 

775 data = data[:,0] 

776 title = data_file 

777 

778 # determine clipping amplitudes: 

779 clip_win_size = 0.5 

780 min_clip_fac = 2.0 

781 min_clip, max_clip = clip_amplitudes(data, int(clip_win_size * rate), 

782 min_fac=min_clip_fac) 

783 # min_clip, max_clip = clip_amplitudes(data, int(clip_win_size*rate), 

784 # min_fac=min_clip_fac, 

785 # plot_hist_func=plot_clipping) 

786 

787 # setup plots: 

788 fig, ax = plt.subplots(5, 1, sharex=True, figsize=(20, 12)) 

789 fig.canvas.manager.set_window_title(title) 

790 

791 # compute best window: 

792 best_window_indices(data, rate, expand=False, win_size=4.0, 

793 win_shift=0.5, thresh_fac=0.8, percentile=0.1, 

794 min_clip=min_clip, max_clip=max_clip, 

795 w_cv_ampl=10.0, tolerance=0.5, 

796 plot_data_func=plot_best_window, ax=ax) 

797 

798 plt.tight_layout() 

799 plt.show() 

800 

801 

802if __name__ == '__main__': 

803 import sys 

804 data_file = sys.argv[1] if len(sys.argv) > 1 else None 

805 main(data_file)