Coverage for src/thunderfish/harmonics.py: 85%
656 statements
« prev ^ index » next coverage.py v7.5.0, created at 2024-04-29 16:21 +0000
« prev ^ index » next coverage.py v7.5.0, created at 2024-04-29 16:21 +0000
1"""
2Extract and analyze harmonic frequencies from power spectra.
4## Harmonic group extraction
6- `harmonic_groups()`: detect peaks in power spectrum and group them
7 according to their harmonic structure.
8- `expand_group()`: add more harmonics to harmonic group.
9- `extract_fundamentals()`: collect harmonic groups from
10 lists of power spectrum peaks.
11- `threshold_estimate()`: estimates thresholds for peak detection
12 in a power spectrum.
14## Helper functions for harmonic group extraction
16- `build_harmonic_group()`: find all the harmonics belonging to the largest peak in the power spectrum.
17- `retrieve_harmonic_group()`: find all the harmonics belonging to a given fundamental.
18- `group_candidate()`: candidate harmonic frequencies belonging to a fundamental frequency.
19- `update_group()`: update frequency lists and harmonic group.
21## Handling of lists of harmonic groups
23- `fundamental_freqs()`: extract fundamental frequencies from
24 lists of harmonic groups.
25- `fundamental_freqs_and_power()`: extract fundamental frequencies and their
26 power in dB from lists of harmonic groups.
28## Handling of lists of fundamental frequencies
30- `add_relative_power()`: add a column with relative power.
31- `add_power_ranks()`: add a column with power ranks.
32- `similar_indices()`: indices of similar frequencies.
33- `unique_mask()`: mark similar frequencies from different recordings as dublicate.
34- `unique()`: remove similar frequencies from different recordings.
36## Visualization
38- `colors_markers()`: Generate a list of colors and markers for plotting.
39- `plot_harmonic_groups()`: Mark decibel power of fundamentals and their
40 harmonics.
41- `plot_psd_harmonic_groups()`: Plot decibel power-spectrum with detected peaks,
42 harmonic groups, and mains frequencies.
44## Configuration
46- `add_psd_peak_detection_config()`: add parameters for the detection of
47 peaks in power spectra to configuration.
48- `psd_peak_detection_args()`: retrieve parameters for the detection of peaks
49 in power spectra from configuration.
50- `add_harmonic_groups_config()`: add parameters for the detection of
51 harmonic groups to configuration.
52- `harmonic_groups_args()`: retrieve parameters for the detection of
53 harmonic groups from configuration.
54"""
56import math as m
57import numpy as np
58import scipy.signal as sig
59from thunderlab.eventdetection import detect_peaks, trim, hist_threshold
60from thunderlab.powerspectrum import decibel, power, plot_decibel_psd
61try:
62 import matplotlib.cm as cm
63 import matplotlib.colors as mc
64except ImportError:
65 pass
68def group_candidate(good_freqs, all_freqs, freq, divisor,
69 freq_tol, max_freq_tol, min_group_size, verbose):
70 """Candidate harmonic frequencies belonging to a fundamental frequency.
72 Parameters
73 ----------
74 good_freqs: 2-D array
75 Frequency, power, and use count (columns) of strong peaks detected
76 in a power spectrum.
77 all_freqs: 2-D array
78 Frequency, power, and use count (columns) of all peaks detected
79 in a power spectrum.
80 freq: float
81 Fundamental frequency for which a harmonic group should be assembled.
82 divisor: int
83 Fundamental frequency was obtained from a frequency divided by divisor.
84 0: Fundamental frequency is given as is.
85 freq_tol: float
86 Harmonics should fall within this frequency tolerance.
87 This should be in the range of the frequency resolution
88 and should not be smaller than half of the frequency resolution.
89 max_freq_tol: float
90 Maximum deviation of harmonics from their expected frequency.
91 Peaks with frequencies between `freq_tol` and `max_freq_tol`
92 get penalized the further away from their expected frequency.
93 min_group_size: int
94 Minimum number of harmonics of a harmonic group.
95 The harmonics from min_group_size/3 to max(min_group_size, divisor)
96 need to be in good_freqs.
97 verbose: int
98 Verbosity level.
100 Returns
101 -------
102 new_group: 1-D aray of indices
103 Frequencies in all_freqs belonging to the candidate group.
104 fzero: float
105 Adjusted fundamental frequency. If negative, no group was found.
106 fzero_harmonics: int
107 The highest harmonics that was used to recompute the fundamental frequency.
108 """
109 if verbose > 0:
110 print('')
112 dvs = True
113 if divisor <= 0:
114 divisor = 1
115 dvs = False
117 # 1. find harmonics in good_freqs and adjust fzero accordingly:
118 fzero = freq
119 fzero_harmonics = 1
120 if len(good_freqs[:,0]) > 0:
121 """
122 ff = good_freqs[:,0]
123 h = np.round(ff/fzero)
124 h_indices = np.unique(h, return_index=True)[1]
125 f_best = [hi+np.argmin(np.abs(fh/h - fzero)) for hi, fh in zip(h_indices, np.split(ff, h_indices))]
126 fe = np.abs(ff/h - fzero)
127 h = h[fe < freq_tol]
128 fe = fe[fe < freq_tol]
129 """
130 prev_freq = divisor * freq
131 for h in range(divisor+1, 2*min_group_size+1):
132 idx = np.argmin(np.abs(good_freqs[:,0]/h - fzero))
133 ff = good_freqs[idx,0]
134 if m.fabs(ff/h - fzero) > freq_tol:
135 continue
136 df = ff - prev_freq
137 dh = np.round(df/fzero)
138 fe = m.fabs(df/dh - fzero)
139 if fe > 2.0*freq_tol:
140 if h > min_group_size:
141 break
142 continue
143 # update fzero:
144 prev_freq = ff
145 fzero_harmonics = h
146 fzero = ff/fzero_harmonics
147 if verbose > 1:
148 print('adjusted fzero to %.2fHz from %d-th harmonic' % (fzero, h))
150 if verbose > 0:
151 ds = 'divisor: %d, ' % divisor if dvs else ''
152 print('# %sfzero=%7.2fHz adjusted from harmonics %d'
153 % (ds, fzero, fzero_harmonics))
155 # 2. check fzero:
156 # freq might not be in our group anymore, because fzero was adjusted:
157 if np.abs(freq - fzero) > freq_tol:
158 if verbose > 0:
159 print(' discarded: lost frequency')
160 return [], -1.0, fzero_harmonics
162 # 3. collect harmonics from all_freqs:
163 new_group = -np.ones(min_group_size, dtype=int)
164 new_penalties = np.ones(min_group_size)
165 freqs = []
166 prev_h = 0
167 prev_fe = 0.0
168 for h in range(1, min_group_size+1):
169 penalty = 0
170 i = np.argmin(np.abs(all_freqs[:,0]/h - fzero))
171 f = all_freqs[i,0]
172 if verbose > 2:
173 print(' check %7.2fHz as %d. harmonics' % (f, h))
174 fac = 1.0 if h >= divisor else 2.0
175 fe = m.fabs(f/h - fzero)
176 if fe > fac*max_freq_tol:
177 if verbose > 1 and fe < 2*fac*max_freq_tol:
178 print(' %d. harmonics at %7.2fHz is off by %7.2fHz (max between %5.2fHz and %5.2fHz) from %7.2fHz'
179 % (h, f, h*fe, h*fac*freq_tol, h*fac*max_freq_tol, h*fzero))
180 continue
181 if fe > fac*freq_tol:
182 penalty = np.interp(fe, [fac*freq_tol, fac*max_freq_tol], [0.0, 1.0])
183 if len(freqs) > 0:
184 pf = freqs[-1]
185 df = f - pf
186 if df < 0.5*fzero:
187 if len(freqs)>1:
188 pf = freqs[-2]
189 df = f - pf
190 else:
191 pf = 0.0
192 df = h*fzero
193 dh = m.floor(df/fzero + 0.5)
194 fe = m.fabs(df/dh - fzero)
195 if fe > 2*dh*fac*max_freq_tol:
196 if verbose > 1:
197 print(' %d. harmonics at %7.2fHz is off by %7.2fHz (max between %5.2fHz and %5.2fHz) from previous harmonics at %7.2fHz'
198 % (h, f, dh*fe, 2*fac*dh*freq_tol, 2*fac*dh*max_freq_tol, pf))
199 continue
200 if fe > 2*dh*fac*freq_tol:
201 penalty = np.interp(fe, [2*dh*fac*freq_tol, 2*dh*fac*max_freq_tol], [0.0, 1.0])
202 else:
203 fe = 0.0
204 if h > prev_h or fe < prev_fe:
205 if prev_h > 0 and h - prev_h > 1:
206 if verbose > 1:
207 print(' previous harmonics %d more than 1 away from %d. harmonics at %7.2fHz'
208 % (prev_h, h, f))
209 break
210 if h == prev_h and len(freqs) > 0:
211 freqs.pop()
212 freqs.append(f)
213 new_group[int(h)-1] = i
214 new_penalties[int(h)-1] = penalty
215 prev_h = h
216 prev_fe = fe
217 if verbose > 1:
218 print(' %d. harmonics at %7.2fHz has been taken (peak %2d) with penalty %3.1f' % (h, f, i, penalty))
220 # 4. check new group:
222 # almost all harmonics in min_group_size required:
223 max_penalties = min_group_size/3
224 if np.sum(new_penalties) > max_penalties:
225 if verbose > 0:
226 print(' discarded group because sum of penalties %3.1f is more than %3.1f: indices' %
227 (np.sum(new_penalties), max_penalties), new_group, ' penalties', new_penalties)
228 return [], -1.0, fzero_harmonics
229 new_group = new_group[new_group>=0]
231 # check use count of frequencies:
232 double_use = np.sum(all_freqs[new_group, 2]>0)
233 if double_use >= 2: # XXX make this a parameter?
234 if verbose > 0:
235 print(' discarded group because of use count = %2d >= 2' % double_use)
236 return [], -1.0, fzero_harmonics
238 # 5. return results:
239 return new_group, fzero, fzero_harmonics
242def update_group(good_freqs, all_freqs, new_group, fzero,
243 freq_tol, verbose, group_str):
244 """Update good frequencies and harmonic group.
246 Remove frequencies from good_freqs, add missing fundamental to group.
248 Parameters
249 ----------
250 good_freqs: 2-D array
251 Frequency, power, and use count (columns) of strong peaks detected
252 in a power spectrum.
253 all_freqs: 2-D array
254 Frequency, power, and use count (columns) of all peaks detected
255 in a power spectrum.
256 new_group: 1-D aray of indices
257 Frequencies in all_freqs of an harmonic group.
258 fzero: float
259 Fundamental frequency for which frequencies are collected in good_freqs.
260 freq_tol: float
261 Harmonics need to fall within this frequency tolerance.
262 This should be in the range of the frequency resolution
263 and not be smaller than half of the frequency resolution.
264 verbose: int
265 Verbosity level.
266 group_str: string
267 String for debug message.
269 Returns
270 -------
271 good_freqs: 2-D array
272 Frequency, power, and use count (columns) of strong peaks detected
273 in a power spectrum with frequencies for harmonic group
274 of fundamental frequency fzero removed.
275 group: 2-D array
276 Frequency, power, and use count (columns) of harmonic group
277 for fundamental frequency fzero.
278 """
279 # initialize group:
280 group = all_freqs[new_group,:]
282 # indices of group in good_freqs:
283 freq_tol *= 1.1
284 indices = []
285 for f in group[:,0]:
286 idx = np.argmin(np.abs(good_freqs[:,0]-f))
287 if np.abs(good_freqs[idx,0]-f) <= freq_tol:
288 indices.append(idx)
289 indices = np.asarray(indices, dtype=int)
291 # harmonics in good_freqs:
292 nharm = np.round(good_freqs[:,0]/fzero)
293 idxs = np.where(np.abs(good_freqs[:,0] - nharm*fzero) <= freq_tol)[0]
294 indices = np.unique(np.concatenate((indices, idxs)))
296 # report:
297 if verbose > 1:
298 print('# good freqs: ', indices,
299 '[', ', '.join(['%.2f' % f for f in good_freqs[indices,0]]), ']')
300 print('# all freqs : ', new_group,
301 '[', ', '.join(['%.2f' % f for f in all_freqs[new_group,0]]), ']')
302 if verbose > 0:
303 refi = np.argmax(group[:,1] > 0.0)
304 print('')
305 print(group_str)
306 for i in range(len(group)):
307 print('f=%8.2fHz n=%5.2f: power=%9.3g power/pmax=%6.4f=%5.1fdB'
308 % (group[i,0], group[i,0]/fzero,
309 group[i,1], group[i,1]/group[refi,1], decibel(group[i,1], group[refi,1])))
310 print('')
312 # erase group from good_freqs:
313 good_freqs = np.delete(good_freqs, indices, axis=0)
315 # adjust frequencies to fzero:
316 group[:,0] = np.round(group[:,0]/fzero)*fzero
318 # insert missing fzero:
319 if np.round(group[0,0]/fzero) != 1.0:
320 group = np.vstack(((fzero, group[0,1], -2.0), group))
322 return good_freqs, group
325def build_harmonic_group(good_freqs, all_freqs, freq_tol, max_freq_tol,
326 verbose=0, min_group_size=3, max_divisor=4):
327 """Find all the harmonics belonging to the largest peak in a list of frequency peaks.
329 Parameters
330 ----------
331 good_freqs: 2-D array
332 Frequency, power, and use count (columns) of strong peaks detected
333 in a power spectrum.
334 all_freqs: 2-D array
335 Frequency, power, and use count (columns) of all peaks detected
336 in a power spectrum.
337 freq_tol: float
338 Harmonics should fall within this frequency tolerance.
339 This should be in the range of the frequency resolution
340 and should not be smaller than half of the frequency resolution.
341 max_freq_tol: float
342 Maximum deviation of harmonics from their expected frequency.
343 Peaks with frequencies between `freq_tol` and `max_freq_tol`
344 get penalized the further away from their expected frequency.
345 verbose: int
346 Verbosity level.
347 min_group_size: int
348 Minimum number of harmonics of a harmonic group.
349 The harmonics from min_group_size/3 to max(min_group_size, divisor)
350 need to be in good_freqs.
351 max_divisor: int
352 Maximum divisor used for checking for sub-harmonics.
354 Returns
355 -------
356 good_freqs: 2-D array
357 Frequency, power, and use count (columns) of strong peaks detected
358 in a power spectrum with frequencies of the returned harmonic group removed.
359 group: 2-D array
360 The detected harmonic group. Might be empty.
361 indices: 1-D array of indices
362 Indices of the harmonic group in all_freqs.
363 best_fzero_harmonics: int
364 The highest harmonics that was used to recompute
365 the fundamental frequency.
366 fmax: float
367 The frequency of the largest peak in good_freqs
368 for which the harmonic group was detected.
369 """
370 # select strongest frequency for building the harmonic group:
371 fmaxinx = np.argmax(good_freqs[:,1])
372 fmax = good_freqs[fmaxinx,0]
373 if verbose > 0:
374 print('')
375 print('%s build_harmonic_group %s' % (10*'#', 38*'#'))
376 print('%s fmax=%7.2fHz, power=%9.3g %s'
377 % (10*'#', fmax, good_freqs[fmaxinx,1], 27*'#'))
378 print('good_freqs: ', '[',
379 ', '.join(['%.2f' % f for f in good_freqs[:,0]]), ']')
381 # container for harmonic groups:
382 best_group = []
383 best_value = -1e6
384 best_divisor = 0
385 best_fzero = 0.0
386 best_fzero_harmonics = 0
388 # check for integer fractions of the frequency:
389 for divisor in range(1, max_divisor + 1):
390 # 1. hypothesized fundamental:
391 freq = fmax / divisor
393 # 2. find harmonics in good_freqs and adjust fzero accordingly:
394 group_size = min_group_size if divisor <= min_group_size else divisor
395 new_group, fzero, fzero_harmonics = group_candidate(good_freqs, all_freqs,
396 freq, divisor,
397 freq_tol, max_freq_tol,
398 group_size, verbose)
399 # no group found:
400 if fzero < 0.0:
401 continue
403 # 3. compare new group to best group:
404 peaksum = decibel(np.sum(all_freqs[new_group, 1])*min_group_size/len(new_group))
405 diff = np.std(np.diff(decibel(all_freqs[new_group, 1])))
406 new_group_value = peaksum - diff
407 counts = np.sum(all_freqs[new_group, 2])
408 if verbose > 0:
409 print(' new group: fzero=%7.2fHz, nharmonics=%d, value=%6.1fdB, peaksum=%5.1fdB, diff=%6.1fdB, use count=%2d, peaks:'
410 % (fzero, len(new_group), new_group_value, peaksum, diff, counts), new_group)
411 if verbose > 1:
412 print(' best group: divisor=%d, fzero=%7.2fHz, nharmonics=%d, value=%6.1fdB, peaks:'
413 % (best_divisor, best_fzero, len(best_group), best_value), best_group)
414 # select new group if sum of peak power minus diff is larger:
415 if len(new_group) >= len(best_group) and new_group_value >= best_value:
416 best_value = new_group_value
417 best_group = new_group
418 best_divisor = divisor
419 best_fzero = fzero
420 best_fzero_harmonics = fzero_harmonics
421 if verbose > 1:
422 print(' new best group: divisor=%d, fzero=%7.2fHz, value=%6.1fdB, peaks:'
423 % (best_divisor, best_fzero, best_value), best_group)
424 elif verbose > 0:
425 print(' took as new best group')
427 # no group found:
428 if len(best_group) == 0:
429 # erase freq:
430 good_freqs = np.delete(good_freqs, fmaxinx, axis=0)
431 group = np.zeros((0, 3))
432 return good_freqs, group, best_group, 1, fmax
434 # update frequencies and group:
435 if verbose > 1:
436 print('')
437 print('# best group found for fmax=%.2fHz, fzero=%.2fHz, divisor=%d:'
438 % (fmax, best_fzero, best_divisor))
439 group_str = '%s resulting harmonic group for fmax=%.2fHz' % (10*'#', fmax)
440 good_freqs, group = update_group(good_freqs, all_freqs, best_group, best_fzero, freq_tol, verbose, group_str)
442 # good_freqs: removed all frequencies of bestgroup
443 return good_freqs, group, best_group, best_fzero_harmonics, fmax
446def retrieve_harmonic_group(freq, good_freqs, all_freqs,
447 freq_tol, max_freq_tol, verbose=0,
448 min_group_size=3):
449 """Find all the harmonics belonging to a given fundamental.
451 Parameters
452 ----------
453 freq: float
454 Fundamental frequency for which harmonics are to be retrieved.
455 good_freqs: 2-D array
456 Frequency, power, and use count (columns) of strong peaks detected
457 in a power spectrum. All harmonics of `freq` will be
458 removed from `good_freqs`.
459 all_freqs: 2-D array
460 Frequency, power, and use count (columns) of all peaks detected
461 in a power spectrum.
462 freq_tol: float
463 Harmonics should fall within this frequency tolerance.
464 This should be in the range of the frequency resolution
465 and should not be smaller than half of the frequency resolution.
466 max_freq_tol: float
467 Maximum deviation of harmonics from their expected frequency.
468 Peaks with frequencies between `freq_tol` and `max_freq_tol`
469 get penalized the further away from their expected frequency.
470 verbose: int
471 Verbosity level.
472 min_group_size: int
473 Minimum number of harmonics of a harmonic group.
474 The harmonics from min_group_size/3 to max(min_group_size, divisor)
475 need to be in good_freqs.
477 Returns
478 -------
479 good_freqs: 2-D array
480 Frequency, power, and use count (columns) of strong peaks detected
481 in a power spectrum with frequencies of the returned harmonic group removed.
482 group: 2-D array
483 The detected harmonic group. Might be empty.
484 indices: 1-D array of indices
485 Indices of the harmonic group in all_freqs.
486 fzero_harmonics: int
487 The highest harmonics that was used to recompute
488 the fundamental frequency.
489 """
490 if verbose > 0:
491 print('')
492 print('%s retrieve harmonic group %s' % (10*'#', 35*'#'))
493 print('%s freq=%7.2fHz %s' % (10*'#', freq, 44*'#'))
494 print('good_freqs: ', '[',
495 ', '.join(['%.2f' % f for f in good_freqs[:,0]]), ']')
497 # find harmonics in good_freqs and adjust fzero accordingly:
498 new_group, fzero, fzero_harmonics = group_candidate(good_freqs, all_freqs,
499 freq, 0,
500 freq_tol, max_freq_tol,
501 min_group_size, verbose)
503 # no group found:
504 if fzero < 0.0:
505 return good_freqs, np.zeros((0, 2)), np.zeros(0), fzero_harmonics
507 if verbose > 1:
508 print('')
509 print('# group found for freq=%.2fHz, fzero=%.2fHz:'
510 % (freq, fzero))
511 group_str = '#### resulting harmonic group for freq=%.2fHz' % freq
512 good_freqs, group = update_group(good_freqs, all_freqs, new_group,
513 fzero, freq_tol, verbose, group_str)
515 # good_freqs: removed all frequencies of bestgroup
516 return good_freqs, group, new_group, fzero_harmonics
519def expand_group(group, indices, freqs, freq_tol, max_harmonics=0):
520 """Add more harmonics to harmonic group.
522 Parameters
523 ----------
524 group: 2-D array
525 Group of fundamental frequency and harmonics
526 as returned by build_harmonic_group.
527 indices: 1-D array of indices
528 Indices of the harmonics in group in `freqs`.
529 freqs: 2-D array
530 Frequency, power, and use count (columns) of all peaks detected
531 in a power spectrum.
532 freq_tol: float
533 Harmonics need to fall within this frequency tolerance.
534 This should be in the range of the frequency resolution
535 and not be smaller than half of the frequency resolution.
536 max_harmonics: int
537 Maximum number of harmonics to be returned for each group.
539 Returns
540 -------
541 group: 2-D array
542 Expanded group of fundamental frequency and harmonics.
543 indices: 1-D array of indices
544 Indices of the harmonics in the expanded group in all_freqs.
545 """
546 if len(group) == 0:
547 return group, indices
548 fzero = group[0,0]
549 if max_harmonics <= 0:
550 max_harmonics = m.floor(freqs[-1,0]/fzero + 0.5) # round
551 if max_harmonics <= len(group):
552 return group, indices
553 group_freqs = list(group[:,0])
554 indices = list(indices)
555 last_h = m.floor(group_freqs[-1]/fzero + 0.5) # round
556 for h in range(last_h+1, max_harmonics+1):
557 i = np.argmin(np.abs(freqs[:,0]/h - fzero))
558 f = freqs[i,0]
559 if m.fabs(f/h - fzero) > freq_tol:
560 continue
561 df = f - group_freqs[-1]
562 dh = m.floor(df/fzero + 0.5) # round
563 if dh < 1:
564 continue
565 fe = m.fabs(df/dh - fzero)
566 if fe > 2*freq_tol:
567 continue
568 group_freqs.append(f)
569 indices.append(i)
570 # assemble group:
571 new_group = freqs[indices,:group.shape[1]]
572 # keep filled in fundamental:
573 if group[0,2] == -2:
574 new_group = np.vstack((group[0,:], new_group))
575 return new_group, np.array(indices, dtype=int)
578def extract_fundamentals(good_freqs, all_freqs, freq_tol, max_freq_tol,
579 verbose=0, check_freqs=[],
580 mains_freq=60.0, mains_freq_tol=1.0,
581 min_freq=0.0, max_freq=2000.0, max_db_diff=20.0,
582 max_divisor=4, min_group_size=3, max_harmonics_db=-5.0,
583 max_harmonics=0, max_groups=0, **kwargs):
584 """Extract fundamental frequencies from power-spectrum peaks.
586 Parameters
587 ----------
588 good_freqs: 2-D array
589 Frequency, power, and use count (columns) of strong peaks detected
590 in a power spectrum.
591 all_freqs: 2-D array
592 Frequency, power, and use count (columns) of all peaks detected
593 in a power spectrum.
594 freq_tol: float
595 Harmonics should fall within this frequency tolerance.
596 This should be in the range of the frequency resolution
597 and should not be smaller than half of the frequency resolution.
598 max_freq_tol: float
599 Maximum deviation of harmonics from their expected frequency.
600 Peaks with frequencies between `freq_tol` and `max_freq_tol`
601 get penalized the further away from their expected frequency.
602 verbose: int
603 Verbosity level.
604 check_freqs: list of float
605 List of fundamental frequencies that will be checked
606 first for being present and valid harmonic groups in the peak frequencies
607 of a power spectrum.
608 mains_freq: float
609 Frequency of the mains power supply.
610 mains_freq_tol: float
611 Tolarerance around harmonics of the mains frequency,
612 within which peaks are removed.
613 min_freq: float
614 Minimum frequency accepted as a fundamental frequency.
615 max_freq: float
616 Maximum frequency accepted as a fundamental frequency.
617 max_db_diff: float
618 If larger than zero, maximum standard deviation of differences between
619 logarithmic powers of harmonics in decibel.
620 Low values enforce smoother power spectra.
621 max_divisor: int
622 Maximum divisor used for checking for sub-harmonics.
623 min_group_size: int
624 Minimum number of harmonics of a harmonic group.
625 The harmonics from min_group_size/3 to max(min_group_size, divisor)
626 need to be in good_freqs.
627 max_harmonics_db: float
628 Maximum allowed power of the `min_group_size`-th and higher harmonics
629 after the peak (in decibel relative to peak power withn the first
630 `min_group_size` harmonics, i.e. if harmonics are required to be
631 smaller than fundamental then this is a negative number).
632 Make it a large positive number to effectively not check for relative power.
633 max_harmonics: int
634 Maximum number of harmonics to be returned for each group.
635 max_groups: int
636 If not zero the maximum number of most powerful harmonic groups.
638 Returns
639 -------
640 group_list: list of 2-D arrays
641 List of all harmonic groups found sorted by fundamental frequency.
642 Each harmonic group is a 2-D array with the first dimension the harmonics
643 and the second dimension containing frequency and power of each harmonic.
644 If the power is zero, there was no corresponding peak in the power spectrum.
645 fzero_harmonics_list: list of int
646 The harmonics from which the fundamental frequencies were computed.
647 mains_freqs: 2-d array
648 Array of mains peaks found in all_freqs (frequency, power).
649 """
650 if verbose > 0:
651 print('')
653 # set use count to zero:
654 all_freqs[:,2] = 0.0
656 # remove power line harmonics from good_freqs:
657 if mains_freq > 0.0:
658 indices = np.where(np.abs(good_freqs[:,0] - np.round(good_freqs[:,0]/mains_freq)*mains_freq) < mains_freq_tol)[0]
659 if len(indices)>0:
660 if verbose > 1:
661 print('remove power line frequencies',
662 ', '.join(['%.1f' % f for f in good_freqs[indices,0]]))
663 good_freqs = np.delete(good_freqs, indices, axis=0)
665 if verbose > 1:
666 print('all_freqs: ', '[',
667 ', '.join(['%.2f' % f for f in all_freqs[:,0] if f < 3000.0]), ']')
669 group_list = []
670 fzero_harmonics_list = []
671 first = True
672 # as long as there are frequencies left in good_freqs:
673 fi = 0
674 while len(good_freqs) > 0:
675 if fi < len(check_freqs):
676 # check for harmonic group of a given fundamental frequency:
677 fmax = check_freqs[fi]
678 f0s = 'freq'
679 good_freqs, harm_group, harm_indices, fzero_harmonics = \
680 retrieve_harmonic_group(fmax, good_freqs, all_freqs,
681 freq_tol, max_freq_tol,
682 verbose-1, min_group_size)
683 fi += 1
684 else:
685 # check for harmonic groups:
686 f0s = 'fmax'
687 good_freqs, harm_group, harm_indices, fzero_harmonics, fmax = \
688 build_harmonic_group(good_freqs, all_freqs,
689 freq_tol, max_freq_tol, verbose-1,
690 min_group_size, max_divisor)
692 # nothing found:
693 if len(harm_group) == 0:
694 if verbose > 1 - first:
695 s = ' (largest peak)' if first else ''
696 print('No harmonic group for %7.2fHz%s' % (fmax, s))
697 first = False
698 continue
699 first = False
701 # fill up harmonic group:
702 harm_group, harm_indices = expand_group(harm_group, harm_indices,
703 all_freqs, freq_tol,
704 max_harmonics)
706 # check whether fundamental was filled in:
707 first_h = 0 if harm_group[0,2] > -2 else 1
709 # increment use count:
710 all_freqs[harm_indices,2] += 1
711 if harm_group[0,2] == -1:
712 harm_group[0,2] = -2
714 # check frequency range of fundamental:
715 fundamental_ok = (harm_group[0, 0] >= min_freq and
716 harm_group[0, 0] <= max_freq)
717 # check power hum:
718 mains_ok = ((mains_freq <= 0.0) or
719 (m.fabs(harm_group[0,0] - mains_freq) > freq_tol))
720 # check smoothness:
721 db_powers = decibel(harm_group[first_h:,1])
722 diff = np.std(np.diff(db_powers))
723 smooth_ok = max_db_diff <= 0.0 or diff < max_db_diff
724 # check relative power of higher harmonics:
725 p_max = np.argmax(db_powers[:min_group_size])
726 db_powers -= db_powers[p_max]
727 amplitude_ok = len(db_powers[p_max+min_group_size:]) == 0
728 if amplitude_ok:
729 pi = len(db_powers) - 1
730 else:
731 amplitude_ok = np.all((db_powers[p_max+min_group_size:] < max_harmonics_db) |
732 (harm_group[first_h+p_max+min_group_size:,2] > 1))
733 if amplitude_ok:
734 pi = p_max+min_group_size-1
735 else:
736 pi = np.where((db_powers[p_max+min_group_size:] >= max_harmonics_db) &
737 (harm_group[first_h+p_max+min_group_size:,2] <= 1))[0][0]
739 # check:
740 if fundamental_ok and mains_ok and smooth_ok and amplitude_ok:
741 if verbose > 0:
742 print('Accepting harmonic group from %s=%7.2fHz: %7.2fHz power=%9.3g nharmonics=%2d, use count=%d'
743 % (f0s, fmax, harm_group[0,0], np.sum(harm_group[first_h:,1]),
744 len(harm_group), np.sum(harm_group[first_h:,2])))
745 group_list.append(harm_group[:,0:2])
746 fzero_harmonics_list.append(fzero_harmonics)
747 else:
748 if verbose > 0:
749 fs = 'is ' if fundamental_ok else 'NOT'
750 ms = 'not ' if mains_ok else 'IS'
751 ss = 'smaller' if smooth_ok else 'LARGER '
752 ps = 'smaller' if amplitude_ok else 'LARGER '
753 print('Discarded harmonic group from %s=%7.2fHz: %7.2fHz power=%9.3g: %s in frequency range, %s mains frequency, smooth=%4.1fdB %s than %4.1fdB, relpower[%d]=%5.1fdB %s than %5.1fdB'
754 % (f0s, fmax, harm_group[0,0], np.sum(harm_group[first_h:,1]),
755 fs, ms, diff, ss, max_db_diff,
756 pi, db_powers[pi], ps, max_harmonics_db))
758 # select most powerful harmonic groups:
759 if max_groups > 0 and len(group_list) > max_groups:
760 n = len(group_list)
761 powers = [np.sum(group[:,1]) for group in group_list]
762 powers_inx = np.argsort(powers)
763 group_list = [group_list[pi] for pi in powers_inx[-max_groups:]]
764 fzero_harmonics_list = [fzero_harmonics_list[pi] for pi in powers_inx[-max_groups:]]
765 if verbose > 0:
766 print('Selected from %d groups the %d most powerful groups.' % (n, max_groups))
768 # sort groups by fundamental frequency:
769 freqs = [group[0,0] for group in group_list]
770 freq_inx = np.argsort(freqs)
771 group_list = [group_list[fi] for fi in freq_inx]
772 fzero_harmonics_list = [fzero_harmonics_list[fi] for fi in freq_inx]
774 if verbose > 1:
775 print('')
776 if len(group_list) > 0:
777 print('## FUNDAMENTALS FOUND: ##')
778 for group, fzero_h in zip(group_list, fzero_harmonics_list):
779 print('%7.2fHz: power=%9.3g fzero-h=%2d'
780 % (group[0,0], np.sum(group[:,1]), fzero_h))
781 else:
782 print('## NO FUNDAMENTALS FOUND ##')
784 # assemble mains frequencies from all_freqs:
785 if mains_freq > 0.0:
786 mains_freqs = all_freqs[np.abs(all_freqs[:,0] - np.round(all_freqs[:,0]/mains_freq)*mains_freq) < mains_freq_tol,:2]
787 else:
788 mains_freqs = np.zeros((0, 2))
790 return group_list, fzero_harmonics_list, mains_freqs
793def threshold_estimate(psd_data, low_thresh_factor=6.0, high_thresh_factor=10.0,
794 nbins=100, hist_height=1.0/ np.sqrt(np.e)):
795 """Estimate thresholds for peak detection from histogram of power spectrum.
797 The standard deviation of the noise floor without peaks is estimated from
798 the width of the histogram of the power spectrum at `hist_height` relative height.
799 The histogram is computed in the third quarter of the linearly detrended power spectrum.
801 Parameters
802 ----------
803 psd_data: 1-D array
804 The power spectrum from which to estimate the thresholds.
805 low_thresh_factor: float
806 Factor by which the estimated standard deviation of the noise floor
807 is multiplied to set the `low_threshold`.
808 high_thresh_factor: float
809 Factor by which the estimated standard deviation of the noise floor
810 is multiplied to set the `high_threshold`.
811 nbins: int or list of floats
812 Number of bins or the bins for computing the histogram.
813 hist_height: float
814 Height between 0 and 1 at which the standard deviation of the histogram is estimated.
816 Returns
817 -------
818 low_threshold: float
819 The threshold for peaks just above the noise floor.
820 high_threshold: float
821 The threshold for distinct peaks.
822 center: float
823 The baseline level of the power spectrum.
824 """
825 n = len(psd_data)
826 psd_data_seg = psd_data[n//2:n*3//4]
827 psd_data_seg = psd_data_seg[~np.isinf(psd_data_seg)]
828 psd_data_seg = np.mean(psd_data_seg) + \
829 sig.detrend(psd_data_seg, type='linear')
830 noise_std, center = hist_threshold(psd_data_seg, thresh_fac=1.0, nbins=nbins)
831 low_threshold = noise_std * low_thresh_factor
832 high_threshold = noise_std * high_thresh_factor
833 return low_threshold, high_threshold, center
836def harmonic_groups(psd_freqs, psd, verbose=0, check_freqs=[],
837 low_threshold=0.0, high_threshold=0.0, thresh_bins=100,
838 low_thresh_factor=6.0, high_thresh_factor=10.0,
839 freq_tol_fac=1.0, max_freq_tol=1.0,
840 mains_freq=60.0, mains_freq_tol=1.0,
841 min_freq=0.0, max_freq=2000.0, max_db_diff=20.0, max_divisor=4,
842 min_group_size=3, max_harmonics_db=-5.0,
843 max_harmonics=0, max_groups=0, **kwargs):
844 """Detect peaks in power spectrum and group them according to their harmonic structure.
846 Parameters
847 ----------
848 psd_freqs: 1-D array
849 Frequencies of the power spectrum.
850 psd: 1-D array
851 Power spectrum (linear, not decible).
852 verbose: int
853 Verbosity level.
854 check_freqs: list of float
855 List of fundamental frequencies that will be checked first for being
856 present and valid harmonic groups in the peak frequencies
857 of the power spectrum.
858 low_threshold: float
859 The relative threshold for detecting all peaks in the decibel spectrum.
860 high_threshold: float
861 The relative threshold for detecting good peaks in the decibel spectrum.
862 thresh_bins: int or list of floats
863 Number of bins or the bins for computing the histogram from
864 which the standard deviation of the noise level in the `psd` is estimated.
865 low_thresh_factor: float
866 Factor by which the estimated standard deviation of the noise floor
867 is multiplied to set the `low_threshold`.
868 high_thresh_factor: float
869 Factor by which the estimated standard deviation of the noise floor
870 is multiplied to set the `high_threshold`.
871 freq_tol_fac: float
872 Harmonics should fall within `deltaf*freq_tol_fac`.
873 max_freq_tol: float
874 Maximum absolute frequency deviation of harmonics in Hertz..
875 mains_freq: float
876 Frequency of the mains power supply.
877 mains_freq_tol: float
878 Tolarerance around harmonics of the mains frequency,
879 within which peaks are removed.
880 min_freq: float
881 Minimum frequency accepted as a fundamental frequency.
882 max_freq: float
883 Maximum frequency accepted as a fundamental frequency.
884 max_db_diff: float
885 If larger than zero, maximum standard deviation of differences between
886 logarithmic powers of harmonics in decibel (larger than zero).
887 Low values enforce smoother power spectra.
888 max_divisor: int
889 Maximum divisor used for checking for sub-harmonics.
890 min_group_size: int
891 Minimum number of harmonics of a harmonic group.
892 The harmonics from min_group_size/3 to max(min_group_size, divisor)
893 need to be in good_freqs.
894 max_harmonics_db: float
895 Maximum allowed power of the `min_group_size`-th and higher harmonics
896 after the peak (in decibel relative to peak power withn the first
897 `min_group_size` harmonics, i.e. if harmonics are required to be
898 smaller than fundamental then this is a negative number).
899 Make it a large positive number to effectively not check for relative power.
900 max_harmonics: int
901 Maximum number of harmonics to be returned for each group.
902 max_groups: int
903 If not zero the maximum number of most powerful harmonic groups.
905 Returns
906 -------
907 groups: list of 2-D arrays
908 List of all extracted harmonic groups, sorted by fundamental frequency.
909 Each harmonic group is a 2-D array with the first dimension the harmonics
910 and the second dimension containing frequency and power of each harmonic.
911 If the power is zero, there was no corresponding peak in the power spectrum.
912 fzero_harmonics: list of ints
913 The harmonics from which the fundamental frequencies were computed.
914 mains: 2-d array
915 Frequencies and power of multiples of the mains frequency found in the power spectrum.
916 all_freqs: 2-D array
917 Frequency, power, and use count (columns) of all peaks detected
918 in the power spectrum.
919 good_freqs: 1-D array
920 Frequencies of strong peaks detected in the power spectrum.
921 low_threshold: float
922 The relative threshold for detecting all peaks in the decibel spectrum.
923 high_threshold: float
924 The relative threshold for detecting good peaks in the decibel spectrum.
925 center: float
926 The baseline level of the power spectrum.
927 """
928 if verbose > 0:
929 print('')
930 if verbose > 1:
931 print(70*'#')
932 print('##### harmonic_groups', 48*'#')
934 # decibel power spectrum:
935 log_psd = decibel(psd)
936 max_idx = np.argmax(~np.isfinite(log_psd))
937 if max_idx > 0:
938 log_psd = log_psd[:max_idx]
939 delta_f = psd_freqs[1] - psd_freqs[0]
941 # thresholds:
942 center = np.NaN
943 if low_threshold <= 0.0 or high_threshold <= 0.0:
944 low_th, high_th, center = threshold_estimate(log_psd, low_thresh_factor,
945 high_thresh_factor,
946 thresh_bins)
947 if low_threshold <= 0.0:
948 low_threshold = low_th
949 if high_threshold <= 0.0:
950 high_threshold = high_th
951 if verbose > 1:
952 print('')
953 print('low_threshold =%4.1fdB, center+low_threshold =%6.1fdB' % (low_threshold, center + low_threshold))
954 print('high_threshold=%4.1fdB, center+high_threshold=%6.1fdB' % (high_threshold, center + high_threshold))
955 print(' center=%6.1fdB' % center)
957 # detect peaks in decibel power spectrum:
958 peaks, troughs = detect_peaks(log_psd, low_threshold)
959 peaks, troughs = trim(peaks, troughs)
960 all_freqs = np.zeros((len(peaks), 3))
961 all_freqs[:,0] = psd_freqs[peaks]
962 all_freqs[:,1] = psd[peaks]
964 if len(all_freqs) == 0:
965 return [], [], [], np.zeros((0, 3)), [], low_threshold, high_threshold, center
967 # select good peaks:
968 good_freqs = all_freqs[(log_psd[peaks] - log_psd[troughs] > high_threshold) &
969 (all_freqs[:,0] >= min_freq) &
970 (all_freqs[:,0] < max_freq*max_divisor),:]
972 # detect harmonic groups:
973 freq_tol = delta_f*freq_tol_fac
974 if max_freq_tol < 1.1*freq_tol:
975 max_freq_tol = 1.1*freq_tol
976 groups, fzero_harmonics, mains = \
977 extract_fundamentals(good_freqs, all_freqs,
978 freq_tol, max_freq_tol,
979 verbose, check_freqs, mains_freq, mains_freq_tol,
980 min_freq, max_freq, max_db_diff, max_divisor, min_group_size,
981 max_harmonics_db, max_harmonics, max_groups)
983 return (groups, fzero_harmonics, mains, all_freqs, good_freqs[:,0],
984 low_threshold, high_threshold, center)
987def fundamental_freqs(group_list):
988 """
989 Extract fundamental frequencies from lists of harmonic groups.
991 The inner list of 2-D arrays of the input argument is transformed into
992 a 1-D array containig the fundamental frequencies extracted from
993 the 2-D arrays.
995 Parameters
996 ----------
997 group_list: (list of (list of ...)) list of 2-D arrays
998 Arbitrarily nested lists of harmonic groups as returned by
999 extract_fundamentals() and harmonic_groups() with the element
1000 [0, 0] being the fundamental frequency.
1002 Returns
1003 -------
1004 fundamentals: (list of (list of ...)) 1-D array
1005 Nested list (corresponding to `group_list`) of 1-D arrays
1006 with the fundamental frequencies extracted from the harmonic groups.
1007 """
1008 if len(group_list) == 0:
1009 return np.array([])
1011 # check whether group_list is list of harmonic groups:
1012 list_of_groups = True
1013 for group in group_list:
1014 if not ( hasattr(group, 'shape') and len(group.shape) == 2 ):
1015 list_of_groups = False
1016 break
1018 if list_of_groups:
1019 fundamentals = np.array([group[0, 0] for group in group_list if len(group) > 0])
1020 else:
1021 fundamentals = []
1022 for groups in group_list:
1023 f = fundamental_freqs(groups)
1024 fundamentals.append(f)
1025 return fundamentals
1028def fundamental_freqs_and_power(group_list, power=False,
1029 ref_power=1.0, min_power=1e-20):
1030 """Extract fundamental frequencies and their power in dB from lists
1031 of harmonic groups.
1033 The inner list of 2-D arrays of the input argument is transformed
1034 into a 2-D array containig for each fish (1st dimension) the
1035 fundamental frequencies and powers (summed over all harmonics)
1036 extracted from the 2-D arrays.
1038 Parameters
1039 ----------
1040 group_list: (list of (list of ...)) list of 2-D arrays
1041 Arbitrarily nested lists of harmonic groups as returned by
1042 extract_fundamentals() and harmonic_groups() with the element
1043 [0, 0] being the fundamental frequency and the elements [:,1] being
1044 the powers of each harmonics.
1045 power: boolean
1046 If `False` convert the power into decibel using the
1047 `thunderlab.powerspectrum.decibel()` function.
1048 ref_power: float
1049 Reference power for computing decibel.
1050 If set to `None` the maximum power is used.
1051 min_power: float
1052 Power values smaller than `min_power` are set to `-np.inf`.
1054 Returns
1055 -------
1056 fundamentals: (list of (list of ...)) 2-D array
1057 Nested list (corresponding to `group_list`) of 2-D arrays
1058 with fundamental frequencies in first column and
1059 corresponding power in second column.
1060 """
1061 if len(group_list) == 0:
1062 return np.array([])
1064 # check whether group_list is list of harmonic groups:
1065 list_of_groups = True
1066 for group in group_list:
1067 if not ( hasattr(group, 'shape') and len(group.shape) == 2 ):
1068 list_of_groups = False
1069 break
1071 if list_of_groups:
1072 fundamentals = np.array([[group[0, 0], np.sum(group[:, 1])]
1073 for group in group_list if len(group) > 0])
1074 if not power:
1075 fundamentals[:, 1] = decibel(fundamentals[:, 1],
1076 ref_power, min_power)
1077 else:
1078 fundamentals = []
1079 for groups in group_list:
1080 f = fundamental_freqs_and_power(groups, power,
1081 ref_power, min_power)
1082 fundamentals.append(f)
1083 return fundamentals
1086def add_relative_power(freqs):
1087 """Add a column with relative power.
1089 For each element in `freqs`, its maximum power is subtracted
1090 from all powers.
1092 Parameters
1093 ----------
1094 freqs: list of 2-D arrays
1095 First column in the ndarrays is fundamental frequency and
1096 second column the corresponding power.
1097 Further columns are optional and kept in the returned list.
1098 fundamental_freqs_and_power() returns such a list.
1100 Returns
1101 -------
1102 power_freqs: list of 2-D arrays
1103 Same as freqs, but with an added column containing the relative power.
1104 """
1105 return [np.column_stack((f, f[:,1] - np.max(f[:,1]))) for f in freqs]
1108def add_power_ranks(freqs):
1109 """Add a column with power ranks.
1111 Parameters
1112 ----------
1113 freqs: list of 2-D arrays
1114 First column in the arrays is fundamental frequency and
1115 second column the corresponding power.
1116 Further columns are optional and kept in the returned list.
1117 fundamental_freqs_and_power() returns such a list.
1119 Returns
1120 -------
1121 rank_freqs: list of 2-D arrays
1122 Same as freqs, but with an added column containing the ranks.
1123 The highest power is assinged to zero,
1124 lower powers are assigned negative integers.
1125 """
1126 rank_freqs = []
1127 for f in freqs:
1128 i = np.argsort(f[:,1])[::-1]
1129 ranks = np.empty_like(i)
1130 ranks[i] = -np.arange(len(i))
1131 rank_freqs.append(np.column_stack((f, ranks)))
1132 return rank_freqs
1135def similar_indices(freqs, df_thresh, nextfs=0):
1136 """Indices of similar frequencies.
1138 If two frequencies from different elements in the inner lists of `freqs` are
1139 reciprocally the closest to each other and closer than `df_thresh`,
1140 then two indices (element, frequency) of the respective other frequency
1141 are appended.
1143 Parameters
1144 ----------
1145 freqs: (list of (list of ...)) list of 2-D arrays
1146 First column in the arrays is fundamental frequency.
1147 df_thresh: float
1148 Fundamental frequencies closer than this threshold are considered
1149 equal.
1150 nextfs: int
1151 If zero, compare all elements in freqs with each other. Otherwise,
1152 only compare with the `nextfs` next elements in freqs.
1154 Returns
1155 -------
1156 indices: (list of (list of ...)) list of list of two-tuples of int
1157 For each frequency of each element in `freqs` a list of two tuples containing
1158 the indices of elements and frequencies that are similar.
1159 """
1160 if len(freqs) == 0:
1161 return []
1163 # check whether freqs is list of fundamental frequencies and powers:
1164 list_of_freq_power = True
1165 for group in freqs:
1166 if not (hasattr(group, 'shape') and len(group.shape) == 2):
1167 list_of_freq_power = False
1168 break
1170 if list_of_freq_power:
1171 indices = [ [[] for j in range(len(freqs[i]))] for i in range(len(freqs))]
1172 for j in range(len(freqs)-1):
1173 freqsj = np.asarray(freqs[j])
1174 for m in range(len(freqsj)):
1175 freq1 = freqsj[m]
1176 nn = len(freqs) if nextfs == 0 else j+1+nextfs
1177 if nn > len(freqs):
1178 nn = len(freqs)
1179 for k in range(j+1, nn):
1180 freqsk = np.asarray(freqs[k])
1181 if len(freqsk) == 0:
1182 continue
1183 n = np.argmin(np.abs(freqsk[:,0] - freq1[0]))
1184 freq2 = freqsk[n]
1185 if np.argmin(np.abs(freqsj[:,0] - freq2[0])) != m:
1186 continue
1187 if np.abs(freq1[0] - freq2[0]) < df_thresh:
1188 indices[k][n].append((j, m))
1189 indices[j][m].append((k, n))
1190 else:
1191 indices = []
1192 for groups in freqs:
1193 indices.append(similar_indices(groups, df_thresh, nextfs))
1194 return indices
1197def unique_mask(freqs, df_thresh, nextfs=0):
1198 """Mark similar frequencies from different recordings as dublicate.
1200 If two frequencies from different elements in `freqs` are
1201 reciprocally the closest to each other and closer than `df_thresh`,
1202 then the one with the smaller power is marked for removal.
1204 Parameters
1205 ----------
1206 freqs: list of 2-D arrays
1207 First column in the arrays is fundamental frequency and
1208 second column the corresponding power or equivalent.
1209 If values in the second column are equal (e.g. they are the same ranks),
1210 and there is a third column (e.g. power),
1211 the third column is used to decide, which element should be removed.
1212 df_thresh: float
1213 Fundamental frequencies closer than this threshold are considered
1214 equal.
1215 nextfs: int
1216 If zero, compare all elements in freqs with each other. Otherwise,
1217 only compare with the `nextfs` next elements in freqs.
1219 Returns
1220 -------
1221 mask: list of boolean arrays
1222 For each element in `freqs` True if that frequency should be kept.
1223 """
1224 mask = [np.ones(len(freqs[i]), dtype=bool) for i in range(len(freqs))]
1225 for j in range(len(freqs)-1):
1226 freqsj = np.asarray(freqs[j])
1227 for m in range(len(freqsj)):
1228 freq1 = freqsj[m]
1229 nn = len(freqs) if nextfs == 0 else j+1+nextfs
1230 if nn > len(freqs):
1231 nn = len(freqs)
1232 for k in range(j+1, nn):
1233 freqsk = np.asarray(freqs[k])
1234 if len(freqsk) == 0:
1235 continue
1236 n = np.argmin(np.abs(freqsk[:,0] - freq1[0]))
1237 freq2 = freqsk[n]
1238 if np.argmin(np.abs(freqsj[:,0] - freq2[0])) != m:
1239 continue
1240 if np.abs(freq1[0] - freq2[0]) < df_thresh:
1241 if freq1[1] > freq2[1]:
1242 mask[k][n] = False
1243 elif freq1[1] < freq2[1]:
1244 mask[j][m] = False
1245 elif len(freq1) > 2:
1246 if freq1[2] > freq2[2]:
1247 mask[k][n] = False
1248 else:
1249 mask[j][m] = False
1250 else:
1251 mask[j][m] = False
1252 return mask
1255def unique(freqs, df_thresh, mode='power', nextfs=0):
1256 """Remove similar frequencies from different recordings.
1258 If two frequencies from different elements in the inner lists of `freqs`
1259 are reciprocally the closest to each other and closer than `df_thresh`,
1260 then the one with the smaller power is removed. As power, either the
1261 absolute power as provided in the second column of the data elements
1262 in `freqs` is taken (mode=='power'), or the relative power
1263 (mode='relpower'), or the power rank (mode='rank').
1265 Parameters
1266 ----------
1267 freqs: (list of (list of ...)) list of 2-D arrays
1268 First column in the arrays is fundamental frequency and
1269 second column the corresponding power, as returned by
1270 fundamental_freqs_and_power().
1271 df_thresh: float
1272 Fundamental frequencies closer than this threshold are considered
1273 equal.
1274 mode: string
1275 - 'power': use second column of freqs elements as power.
1276 - 'relpower': use relative power computed from the second column
1277 of freqs elements for deciding which frequency to delete.
1278 - 'rank': use rank of second column of freqs elements
1279 for deciding which frequency to delete.
1280 nextfs: int
1281 If zero, compare all elements in freqs with each other. Otherwise,
1282 only compare with the `nextfs` next elements in freqs.
1284 Returns
1285 -------
1286 uniqe_freqs: (list of (list of ...)) list of 2-D arrays
1287 Same as `freqs` but elements with similar fundamental frequencies
1288 removed.
1289 """
1290 if len(freqs) == 0:
1291 return []
1293 # check whether freqs is list of fundamental frequencies and powers:
1294 list_of_freq_power = True
1295 for group in freqs:
1296 if not (hasattr(group, 'shape') and len(group.shape) == 2):
1297 list_of_freq_power = False
1298 break
1300 if list_of_freq_power:
1301 if mode == 'power':
1302 mask = unique_mask(freqs, df_thresh, nextfs)
1303 elif mode == 'relpower':
1304 power_freqs = [f[:,[0, 2, 1]] for f in add_relative_power(freqs)]
1305 mask = unique_mask(power_freqs, df_thresh, nextfs)
1306 elif mode == 'rank':
1307 rank_freqs = [f[:,[0, 2, 1]] for f in add_power_ranks(freqs)]
1308 mask = unique_mask(rank_freqs, df_thresh, nextfs)
1309 else:
1310 raise ValueError('%s is not a valid mode for unique(). Choose one of "power" or "rank"')
1311 unique_freqs = []
1312 for f, m in zip(freqs, mask):
1313 unique_freqs.append(f[m])
1314 else:
1315 unique_freqs = []
1316 for groups in freqs:
1317 unique_freqs.append(unique(groups, df_thresh, mode, nextfs))
1318 return unique_freqs
1321def colors_markers():
1322 """Generate a list of colors and markers for plotting.
1324 Returns
1325 -------
1326 colors: list
1327 list of colors
1328 markers: list
1329 list of markers
1330 """
1331 # color and marker range:
1332 colors = []
1333 markers = []
1334 mr2 = []
1336 # first color range:
1337 cc0 = cm.gist_rainbow(np.linspace(0.0, 1.0, 8))
1339 # shuffle it:
1340 for k in range((len(cc0) + 1) // 2):
1341 colors.extend(cc0[k::(len(cc0) + 1) // 2])
1342 markers.extend(len(cc0) * 'o')
1343 mr2.extend(len(cc0) * 'v')
1344 # second darker color range:
1345 cc1 = cm.gist_rainbow(np.linspace(0.33 / 7.0, 1.0, 7))
1346 cc1 = mc.hsv_to_rgb(mc.rgb_to_hsv(np.array([cc1[:, :3]])) * np.array([1.0, 0.9, 0.7]))[0]
1347 cc1 = np.hstack((cc1, np.ones((len(cc1),1))))
1348 # shuffle it:
1349 for k in range((len(cc1) + 1) // 2):
1350 colors.extend(cc1[k::(len(cc1) + 1) // 2])
1351 markers.extend(len(cc1) * '^')
1352 mr2.extend(len(cc1) * '*')
1353 # third lighter color range:
1354 cc2 = cm.gist_rainbow(np.linspace(0.67 / 6.0, 1.0, 6))
1355 cc2 = mc.hsv_to_rgb(mc.rgb_to_hsv(np.array([cc1[:, :3]])) * np.array([1.0, 0.5, 1.0]))[0]
1356 cc2 = np.hstack((cc2, np.ones((len(cc2),1))))
1357 # shuffle it:
1358 for k in range((len(cc2) + 1) // 2):
1359 colors.extend(cc2[k::(len(cc2) + 1) // 2])
1360 markers.extend(len(cc2) * 'D')
1361 mr2.extend(len(cc2) * 'x')
1362 markers.extend(mr2)
1363 return colors, markers
1366def plot_harmonic_groups(ax, group_list, indices=None, max_groups=0,
1367 skip_bad=False, sort_by_freq=True, label_power=False,
1368 colors=None, markers=None, legend_rows=8, **kwargs):
1369 """Mark decibel power of fundamentals and their harmonics in a plot.
1371 Parameters
1372 ----------
1373 ax: axis for plot
1374 Axis used for plotting.
1375 group_list: list of 2-D arrays
1376 Lists of harmonic groups as returned by extract_fundamentals() and
1377 harmonic_groups() with the element [0, 0] of the harmonic groups being
1378 the fundamental frequency, and element[0, 1] being the corresponding power.
1379 indices: list of int or None
1380 If smaller than zero then set the legend label of the corresponding group in brackets.
1381 max_groups: int
1382 If not zero plot only the max_groups most powerful groups.
1383 skip_bad: bool
1384 Skip harmonic groups without index (entry in indices is negative).
1385 sort_by_freq: boolean
1386 If True sort legend by frequency, otherwise by power.
1387 label_power: boolean
1388 If `True` put the power in decibel in addition to the frequency into the legend.
1389 colors: list of colors or None
1390 If not None list of colors for plotting each group
1391 markers: list of markers or None
1392 If not None list of markers for plotting each group
1393 legend_rows: int
1394 Maximum number of rows to be used for the legend.
1395 kwargs:
1396 Key word arguments for the legend of the plot.
1397 """
1398 if len(group_list) == 0:
1399 return
1401 # sort by power:
1402 powers = np.array([np.sum(group[:,1]) for group in group_list])
1403 max_power = np.max(powers)
1404 idx = np.argsort(-powers)
1405 if max_groups > 0 and len(idx > max_groups):
1406 idx = idx[:max_groups]
1408 # sort by frequency:
1409 if sort_by_freq:
1410 freqs = np.array([group_list[group][0,0] for group in idx])
1411 if legend_rows > 0 and legend_rows < len(freqs):
1412 idx = idx[np.argsort(freqs[:legend_rows])]
1413 else:
1414 idx = idx[np.argsort(freqs)]
1416 # plot:
1417 k = 0
1418 for i in idx:
1419 if indices is not None and skip_bad and indices[i] < 0:
1420 continue
1421 group = group_list[i]
1422 x = group[:,0]
1423 y = decibel(group[:,1])
1424 msize = 7.0 + 10.0*(powers[i]/max_power)**0.25
1425 color_kwargs = {}
1426 if colors is not None:
1427 color_kwargs = {'color': colors[k%len(colors)]}
1428 label = '%6.1f Hz' % group[0, 0]
1429 if label_power:
1430 label += ' %6.1f dB' % decibel(np.array([np.sum(group[:,1])]))[0]
1431 if indices is not None:
1432 if indices[i] < 0:
1433 label = '(' + label + ')'
1434 else:
1435 label = ' ' + label + ' '
1436 if legend_rows > 5 and k >= legend_rows:
1437 label = None
1438 if markers is None:
1439 ax.plot(x, y, 'o', ms=msize, label=label, **color_kwargs)
1440 else:
1441 if k >= len(markers):
1442 break
1443 ax.plot(x, y, linestyle='None', marker=markers[k],
1444 mec=None, mew=0.0, ms=msize, label=label, **color_kwargs)
1445 k += 1
1447 # legend:
1448 if legend_rows > 0:
1449 if legend_rows > 5:
1450 ncol = 1
1451 else:
1452 ncol = (len(idx)-1) // legend_rows + 1
1453 ax.legend(numpoints=1, ncol=ncol, **kwargs)
1454 else:
1455 ax.legend(numpoints=1, **kwargs)
1458def plot_psd_harmonic_groups(ax, psd_freqs, psd, group_list,
1459 mains=None, all_freqs=None, good_freqs=None,
1460 log_freq=False, min_freq=0.0, max_freq=2000.0, ymarg=0.0):
1461 """Plot decibel power-spectrum with detected peaks, harmonic groups,
1462 and mains frequencies.
1464 Parameters
1465 ----------
1466 psd_freqs: array
1467 Frequencies of the power spectrum.
1468 psd: array
1469 Power spectrum (linear, not decible).
1470 group_list: list of 2-D arrays
1471 Lists of harmonic groups as returned by extract_fundamentals() and
1472 harmonic_groups() with the element [0, 0] of the harmonic groups being the fundamental frequency,
1473 and element[0, 1] being the corresponding power.
1474 mains: 2-D array
1475 Frequencies and power of multiples of the mains frequency found in the power spectrum.
1476 all_freqs: 2-D array
1477 Peaks in the power spectrum detected with low threshold.
1478 good_freqs: 1-D array
1479 Frequencies of peaks detected with high threshold.
1480 log_freq: boolean
1481 Logarithmic (True) or linear (False) frequency axis.
1482 min_freq: float
1483 Limits of frequency axis are set to `(min_freq, max_freq)`
1484 if `max_freq` is greater than zero
1485 max_freq: float
1486 Limits of frequency axis are set to `(min_freq, max_freq)`
1487 and limits of power axis are computed from powers below max_freq
1488 if `max_freq` is greater than zero
1489 ymarg: float
1490 Add this to the maximum decibel power for setting the ylim.
1491 """
1493 # mark all and good psd peaks:
1494 pmin, pmax = ax.get_ylim()
1495 doty = pmax - 5.0
1496 if all_freqs is not None:
1497 ax.plot(all_freqs[:, 0], np.zeros(len(all_freqs[:, 0])) + doty, 'o', color='#ffffff')
1498 if good_freqs is not None:
1499 ax.plot(good_freqs, np.zeros(len(good_freqs)) + doty, 'o', color='#888888')
1500 # mark mains frequencies:
1501 if mains is not None and len(mains) > 0:
1502 fpeaks = mains[:, 0]
1503 fpeakinx = [int(np.round(fp/(psd_freqs[1]-psd_freqs[0]))) for fp in fpeaks if fp < psd_freqs[-1]]
1504 ax.plot(fpeaks[:len(fpeakinx)], decibel(psd[fpeakinx]), linestyle='None',
1505 marker='.', color='k', ms=10, mec=None, mew=0.0,
1506 label='%3.0f Hz mains' % mains[0, 0])
1507 # mark harmonic groups:
1508 colors, markers = colors_markers()
1509 plot_harmonic_groups(ax, group_list, max_groups=0, sort_by_freq=True,
1510 colors=colors, markers=markers, legend_rows=8,
1511 loc='upper right')
1512 # plot power spectrum:
1513 plot_decibel_psd(ax, psd_freqs, psd, log_freq=log_freq, min_freq=min_freq,
1514 max_freq=max_freq, ymarg=ymarg, color='blue')
1517def add_psd_peak_detection_config(cfg, low_threshold=0.0, high_threshold=0.0,
1518 thresh_bins=100,
1519 low_thresh_factor=6.0, high_thresh_factor=10.0):
1520 """Add parameter needed for detection of peaks in power spectrum used
1521 by harmonic_groups() as a new section to a configuration.
1523 Parameters
1524 ----------
1525 cfg: ConfigFile
1526 The configuration.
1527 """
1528 cfg.add_section('Thresholds for peak detection in power spectra:')
1529 cfg.add('lowThreshold', low_threshold, 'dB', 'Threshold for all peaks.\n If 0.0 estimate threshold from histogram.')
1530 cfg.add('highThreshold', high_threshold, 'dB', 'Threshold for good peaks. If 0.0 estimate threshold from histogram.')
1532 cfg.add_section('Threshold estimation:\nIf no thresholds are specified they are estimated from the histogram of the decibel power spectrum.')
1533 cfg.add('thresholdBins', thresh_bins, '', 'Number of bins used to compute the histogram used for threshold estimation.')
1534 cfg.add('lowThresholdFactor', low_thresh_factor, '', 'Factor for multiplying standard deviation of noise floor for lower threshold.')
1535 cfg.add('highThresholdFactor', high_thresh_factor, '', 'Factor for multiplying standard deviation of noise floor for higher threshold.')
1538def psd_peak_detection_args(cfg):
1539 """Translates a configuration to the respective parameter names for
1540 the detection of peaks in power spectrum used by
1541 harmonic_groups(). The return value can then be passed as
1542 key-word arguments to this function.
1544 Parameters
1545 ----------
1546 cfg: ConfigFile
1547 The configuration.
1549 Returns
1550 -------
1551 a: dict
1552 Dictionary with names of arguments of the `harmonic-group()` function
1553 and their values as supplied by `cfg`.
1554 """
1555 return cfg.map({'low_threshold': 'lowThreshold',
1556 'high_threshold': 'highThreshold',
1557 'thresh_bins': 'thresholdBins',
1558 'low_thresh_factor': 'lowThresholdFactor',
1559 'high_thresh_factor': 'highThresholdFactor'})
1562def add_harmonic_groups_config(cfg, mains_freq=60.0, mains_freq_tol=1.0,
1563 max_divisor=4, freq_tol_fac=1.0,
1564 max_freq_tol=1.0, min_group_size=3,
1565 min_freq=20.0, max_freq=2000.0, max_db_diff=20.0,
1566 max_harmonics_db=-5.0, max_harmonics=0, max_groups=0):
1567 """Add parameter needed for detection of harmonic groups as a new
1568 section to a configuration.
1570 Parameters
1571 ----------
1572 cfg: ConfigFile
1573 The configuration.
1574 """
1575 cfg.add_section('Harmonic groups:')
1576 cfg.add('mainsFreq', mains_freq, 'Hz', 'Mains frequency to be excluded.')
1577 cfg.add('mainsFreqTolerance', mains_freq_tol, 'Hz', 'Exclude peaks within this tolerance around multiples of the mains frequency.')
1578 cfg.add('minimumGroupSize', min_group_size, '',
1579'Minimum number of harmonics (inclusively fundamental) that make up a harmonic group.')
1580 cfg.add('maxDivisor', max_divisor, '', 'Maximum ratio between the frequency of the largest peak and its fundamental.')
1581 cfg.add('freqTolerance', freq_tol_fac, '',
1582 'Harmonics should be within this factor times the frequency resolution of the power spectrum. Needs to be higher than 0.5!')
1583 cfg.add('maximumFreqTolerance', max_freq_tol, 'Hz',
1584 'Maximum deviation of harmonics from their expected value.')
1586 cfg.add_section('Acceptance of best harmonic groups:')
1587 cfg.add('minimumFrequency', min_freq, 'Hz', 'Minimum frequency allowed for the fundamental.')
1588 cfg.add('maximumFrequency', max_freq, 'Hz', 'Maximum frequency allowed for the fundamental.')
1589 cfg.add('maximumPowerDifference', max_db_diff, 'dB', 'If larger than zero, maximum standard deviation allowed for difference in logarithmic power between successive harmonics. Smaller values enforce smoother spectra.')
1590 cfg.add('maximumHarmonicsPower', max_harmonics_db, 'dB', 'Maximum allowed power of the minimumGroupSize-th and higher harmonics relative to peak power.')
1591 cfg.add('maximumHarmonics', max_harmonics, '', '0: keep all, >0 only keep the first # harmonics.')
1592 cfg.add('maximumGroups', max_groups, '', 'Maximum number of harmonic groups. If 0 process all.')
1595def harmonic_groups_args(cfg):
1596 """Translates a configuration to the respective parameter names of
1597 the harmonic-group detection functions. The return value can then
1598 be passed as key-word arguments to this function.
1600 Parameters
1601 ----------
1602 cfg: ConfigFile
1603 The configuration.
1605 Returns
1606 -------
1607 a: dict
1608 Dictionary with names of arguments of the harmonic-group detection functions
1609 and their values as supplied by `cfg`.
1610 """
1611 return cfg.map({'mains_freq': 'mainsFreq',
1612 'mains_freq_tol': 'mainsFreqTolerance',
1613 'freq_tol_fac': 'freqTolerance',
1614 'max_freq_tol': 'maximumFreqTolerance',
1615 'max_divisor': 'maxDivisor',
1616 'min_group_size': 'minimumGroupSize',
1617 'min_freq': 'minimumFrequency',
1618 'max_freq': 'maximumFrequency',
1619 'max_db_diff': 'maximumPowerDifference',
1620 'max_harmonics_db': 'maximumHarmonicsPower',
1621 'max_harmonics': 'maximumHarmonics',
1622 'max_groups': 'maximumGroups'})
1625def main(data_file=None):
1626 import matplotlib.pyplot as plt
1627 from thunderlab.powerspectrum import psd
1628 from .fakefish import wavefish_eods
1630 if data_file is None:
1631 # generate data:
1632 title = 'simulation'
1633 samplerate = 44100.0
1634 d = 20.0
1635 noise = 0.01
1636 eodfs = [123.0, 333.0, 666.0, 666.5]
1637 fish1 = 0.5*wavefish_eods('Eigenmannia', eodfs[0], samplerate,
1638 duration=d, noise_std=noise)
1639 fish2 = 1.0*wavefish_eods('Eigenmannia', eodfs[1], samplerate,
1640 duration=d, noise_std=noise)
1641 fish3 = 10.0*wavefish_eods('Alepto', eodfs[2], samplerate,
1642 duration=d, noise_std=noise)
1643 fish4 = 6.0*wavefish_eods('Alepto', eodfs[3], samplerate,
1644 duration=d, noise_std=noise)
1645 data = fish1 + fish2 + fish3 + fish4
1646 else:
1647 from thunderlab.dataloader import load_data
1648 print("load %s ..." % data_file)
1649 data, samplerate, unit, amax = load_data(data_file)
1650 data = data[:,0]
1651 title = data_file
1653 # retrieve fundamentals from power spectrum:
1654 psd_data = psd(data, samplerate, freq_resolution=0.1)
1655 groups, _, mains, all_freqs, good_freqs, _, _, _ = harmonic_groups(psd_data[0], psd_data[1], check_freqs=[123.0, 666.0], max_db_diff=30.0, verbose=0)
1656 fig, ax = plt.subplots()
1657 plot_psd_harmonic_groups(ax, psd_data[0], psd_data[1], groups, mains,
1658 all_freqs, good_freqs, max_freq=3000.0)
1659 ax.set_title(title)
1660 plt.show()
1662 # unify fundamental frequencies:
1663 fundamentals = fundamental_freqs(groups)
1664 np.set_printoptions(formatter={'float': lambda x: '%5.1f' % x})
1665 print('fundamental frequencies extracted from power spectrum:')
1666 print(fundamentals)
1667 print('')
1668 freqs = fundamental_freqs_and_power([groups])
1669 freqs.append(np.array([[44.0, -20.0], [44.2, -10.0], [320.5, 2.5], [665.5, 5.0], [666.2, 10.0]]))
1670 freqs.append(np.array([[123.3, 1.0], [320.2, -2.0], [668.4, 2.0]]))
1671 rank_freqs = add_relative_power(freqs)
1672 rank_freqs = add_power_ranks(rank_freqs)
1673 print('all frequencies (frequency, power, relpower, rank):')
1674 print('\n'.join(( str(f) for f in rank_freqs)))
1675 print('')
1676 indices = similar_indices(freqs, 1.0)
1677 print('similar indices:')
1678 print('\n'.join(( ('\n '.join((str(f) for f in g)) for g in indices))))
1679 print('')
1680 unique_freqs = unique(freqs, 1.0, 'power')
1681 print('unique power:')
1682 print('\n'.join(( str(f) for f in unique_freqs)))
1683 print('')
1684 unique_freqs = unique(freqs, 1.0, 'relpower')
1685 print('unique relative power:')
1686 print('\n'.join(( str(f) for f in unique_freqs)))
1687 print('')
1688 unique_freqs = unique(freqs, 1.0, 'rank')
1689 print('unique rank:')
1690 print('\n'.join(( str(f) for f in unique_freqs)))
1691 print('')
1692 unique_freqs = unique(freqs, 1.0, 'rank', 1)
1693 print('unique rank for next neighour only:')
1694 print('\n'.join(( str(f) for f in unique_freqs)))
1695 print('')
1698if __name__ == "__main__":
1699 import sys
1700 data_file = sys.argv[1] if len(sys.argv) > 1 else None
1701 main(data_file)