Coverage for src / thunderfish / pulsetracker.py: 0%
536 statements
« prev ^ index » next coverage.py v7.13.1, created at 2026-01-15 17:50 +0000
« prev ^ index » next coverage.py v7.13.1, created at 2026-01-15 17:50 +0000
1"""
2by Dexter Frueh
3"""
5import os
6import sys
7import ntpath
8import time
9import copy
10import numpy as np
11#from fish import ProgressFish
12import matplotlib.pyplot as plt
14from shutil import copy2
15from collections import deque
16from scipy import stats
17from scipy import signal
18from scipy import optimize
19from scipy.interpolate import interp1d
20from sklearn.preprocessing import StandardScaler
21from sklearn.decomposition import PCA
22from sklearn.cluster import DBSCAN
23from sklearn.cluster import AgglomerativeClustering
24from thunderlab.dataloader import DataLoader
25from thunderlab.eventdetection import detect_peaks
28def makeeventlist(main_event_positions,side_event_positions,data,event_width=20):
29 """
30 Generate array of events that might be EODs of a pulse-type fish, using the location of peaks and troughs,
31 the data and an optional width of an supposed EOD-event.
32 The generated event-array contains location and height of such events.
33 The height of the events is calculated by its height-difference to nearby troughs and main events that have no side events in a range closer than event_width are discarded and not considered as EOD event.
35 Parameters
36 ----------
37 main_event_positions: array of int or float
38 Positions of the detected main events in the data time series. Either peaks or troughs.
39 side_event_positions: array of int or float
40 Positions of the detected side events in the data time series. The complimentary event to the main events.
41 data: array of float
42 The given data.
43 event_width: int or float, optional
45 Returns
46 -------
47 EOD_events: ndarray
48 2D array containing data with 'float' type, size (number_of_properties = 3, number_of_events).
49 Generated and combined data of the detected events in an array with arrays of x, y and height along the first axis.
51 """
52 mainfirst = int((min(main_event_positions[0],side_event_positions[0])<side_event_positions[0])) # determines if there is a peak or through first. Evaluates to 1 if there is a peak first.
53 main_x = main_event_positions
54 main_y = data[main_event_positions]
55 # empty placeholders, filled in the next step while iterating over the properties of single main
56 main_h = np.zeros(len(main_event_positions))
57 main_real = np.ones(len(main_event_positions))
58 # iteration over the properties of the single main
59 for ind,(x, y, h, r) in enumerate(np.nditer([main_x, main_y, main_h, main_real], op_flags=[["readonly"],['readonly'],['readwrite'],['readwrite']])):
60 l_side_ind = ind - mainfirst
61 r_side_ind = l_side_ind + 1
62 try:
63 r_side_x = side_event_positions[r_side_ind]
64 r_distance = r_side_x - x
65 r_side_y = data[r_side_x]
66 except:
67 pass
68 try:
69 l_side_x = side_event_positions[l_side_ind]
70 l_distance = x - l_side_x
71 l_side_y = data[l_side_x]
72 except:
73 pass # ignore left or rightmost events which throw IndexError
74 # calculate distances to the two side events next to the main event and mark all events where the next side events are not closer than maximum event_width as unreal. If an event might be an EOD, then calculate its height.
75 if l_side_ind >= 0 and r_side_ind < len(side_event_positions):
76 if min((l_distance),(r_distance)) > event_width:
77 r[...] = False
78 elif max((l_distance),(r_distance)) <= event_width:
79 h[...] = max(abs(y-l_side_y),abs(y-r_side_y)) #calculated using absolutes in case of for example troughs instead of peaks as main events
80 else:
81 if (l_distance)<(r_distance): # evaluated only when exactly one side event is out of reach of the event width. Then the closer event will be the correct event
82 h[...] = abs(y-l_side_y)
83 else:
84 h[...] = abs(y-r_side_y)
85 # check corner cases
86 elif l_side_ind == -1:
87 if r_distance > event_width:
88 r[...] = False
89 else:
90 h[...] = y-r_side_y
91 elif r_side_ind == len(side_event_positions):
92 if l_distance> event_width:
93 r[...] = False
94 else:
95 h[...] = y-l_side_y
96 # generate return array and discard all events that are not marked as real
97 EOD_events = np.array([main_x, main_y, main_h], dtype = float)[:,main_real==1]
98 return EOD_events
100def discardnearbyevents(event_locations, event_heights, min_distance):
101 """
102 Given a number of events with given location and heights, returns a selection
103 of these events where no event is closer than eventwidth to the next event.
104 Among neighboring events closer than eventwidth the event with smaller height
105 is discarded.
106 Used to discard sidepeaks in detected multiple peaks of single EOD-pulses and
107 only keep the largest event_height and the corresponding location as
108 representative of the whole EOD pulse.
110 Parameters
111 ----------
112 event_locations: array of int or float
113 Positions of the given events in the data time series.
114 event_heights: array of int or float
115 Heights of the given events, indices refer to the same events as in
116 event_locations.
117 min_distance: int or float
118 minimal distance between events before one of the events gets discarded.
120 Returns
121 -------
122 event_locations: array of int or float
123 Positions of the returned events in the data time series.
124 event_heights: array of int or float
125 Heights of the returned events, indices refer to the same events as in
126 event_locations.
128 """
129 unchanged = False
130 counter = 0
131 event_indices = np.arange(0,len(event_locations)+1,1)
133 while unchanged == False:# and counter<=200:
134 x_diffs = np.diff(event_locations)
135 events_delete = np.zeros(len(event_locations))
137 for i, diff in enumerate(x_diffs):
139 if diff < min_distance:
141 if event_heights[i+1] > event_heights[i] :
142 events_delete[i] = 1
143 else:
144 events_delete[i+1] = 1
146 event_heights = event_heights[events_delete!=1]
147 event_locations = event_locations[events_delete!=1]
148 event_indices = event_indices[np.where(events_delete!=1)[0]]
149 if np.count_nonzero(events_delete)==0:
150 unchanged = True
151 counter += 1
152 if counter > 2000:
153 print('Warning: unusual many discarding steps needed, unusually dense events')
154 pass
155 return event_indices, event_locations, event_heights
157def crosscorrelation(sig, data):
158 'returns crosscorrelation of two arrays, the first array should have a length equal to or smaller than the second array.'
159 return signal.fftconvolve(data, sig[::-1], mode='valid')
161def interpol(data, kind):
162 """
163 interpolates the given data using scipy interpolation python package
165 Parameters
166 ----------
167 data: array
169 kind: string or int
170 (‘linear’, ‘nearest’, ‘zero’, ‘slinear’, ‘quadratic’, ‘cubic’, ‘previous’, ‘next’), or integer of order of spline interpolation to be used
172 Returns
173 -------
174 interpolation: function
176 """
177 width = len(data)
178 x = np.linspace(0, width-1, num = width, endpoint = True)
179 #return interp1d(x, data[0:width], kind, assume_sorted=True)
180 return interp1d(x, data[0:width], kind)
182def interpolated_array(data, kind, int_fact):
183 """
184 returns an interpolated array of the given dataarray.
186 Parameters
187 ----------
188 data: array
190 kind: string or int
191 (‘linear’, ‘nearest’, ‘zero’, ‘slinear’, ‘quadratic’, ‘cubic’, ‘previous’, ‘next’), or integer of order of spline interpolation to be used
193 int_fact: int
194 factor by which the interpolated array is larger than the original array
196 Returns
197 -------
198 interpolated array: array
200 """
201 return interpol(data,kind)(np.arange(0, len(data)-1, 1/int_fact))
203def cut_snippets(data,event_locations,cut_width,int_met="linear",int_fact=10,max_offset = 1000000):
204 """
205 cuts intervals from a data array, interpolates and aligns them and returns them in a list
207 TODO: ALIGN THEM TO CAUSE LEAST SQUARE ERROR
209 Parameters
210 ----------
211 data: array
213 event_locations: array
215 cut_width: [int, int]
216 lower and upper limit of the intervals relative to the event locations.
217 f.e. [-15,15] indicates an interval of 30 datapoints around each event location
219 int_met: string or int
220 method of interpolation. (‘linear’, ‘nearest’, ‘zero’, ‘slinear’, ‘quadratic’, ‘cubic’, ‘previous’, ‘next’), or integer of order of spline interpolation to be used
222 int_fact: int
223 factor by which the interpolated array is larger than the original
225 max_offset: float
226 maximal offset by which the interpolated intervals can be moved to be aligned with each other. offset relative to the datapoints of the original data.
228 Returns
229 -------
230 aligned_snips: twodimensional nparray
231 the processed intervals (interval#,intervallen)
233 """
234 snippets = []
235 heights = np.zeros(len(event_locations))
236 cut_width = [-cut_width, cut_width]
237 #alignwidth = int(np.ceil((max_offset) * int_fact))
238 alignwidth = 50
240 for pos in event_locations.astype('int'):
241 snippets.append(data[pos+cut_width[0]:pos+cut_width[1]])
242 ipoled_snips = np.empty((len(snippets), (cut_width[1]-cut_width[0])*int_fact-int_fact))
244 for i, snip in enumerate(snippets):
245 if len(snip) < ((cut_width[1]-cut_width[0])):
246 if i == 0:
247 snip = np.concatenate([np.zeros([((cut_width[1]-cut_width[0]) - len(snip))]),np.array(snip)])
248 if i == len(snippets):
249 snip = np.concatenate([snip, np.zeros([((cut_width[1]-cut_width[0])-len(snip))])])
250 else:
251 snip = np.zeros([(cut_width[1]-cut_width[0])])
253 #f_interpoled = interpol(snip, int_met) #if len(snip) > 0 else np.zeros([(cut_width[1]-cut_width[0]-1)*int_fact ])
254 interpoled_snip = interpolated_array(snip, int_met, 10)#f_interpoled(np.arange(0, len(snip)-1, 1/int_fact))
255 intsnipheight = np.max(interpoled_snip) - np.min(interpoled_snip)
258 if intsnipheight == 0:
259 intsnipheight = 1
261 interpoled_snip = (interpoled_snip - max(interpoled_snip))* 1/intsnipheight
262 ipoled_snips[i] = interpoled_snip
264 mean = np.mean(ipoled_snips, axis = 0)
266 aligned_snips = np.empty((len(snippets), (cut_width[1]-cut_width[0])* int_fact-(2*alignwidth)-int_fact))
268 for i, interpoled_snip in enumerate(ipoled_snips):
269 cc = crosscorrelation(interpoled_snip[alignwidth:-alignwidth], mean)
270 #cc = crosscorrelation(interpoled_snip[15 + 10*-cut_width[0]-10*7:-15+ -10*cut_width[1]+ 31], mean[10*-cut_width[0]-10*7:-10*cut_width[1]+31])
271 offset = -alignwidth + np.argmax(cc)
272 aligned_snip = interpoled_snip[alignwidth-offset:-alignwidth-offset] if offset != -alignwidth else interpoled_snip[2*alignwidth:]
273 if len(aligned_snip[~np.isnan(aligned_snip)])>0:
274 aligned_snips[i] = aligned_snip
275 try:
276 heights[i] = np.max(interpoled_snip[alignwidth-offset:-alignwidth-offset]) - np.min(interpoled_snip[alignwidth-offset:-alignwidth-offset])
277 except:
278 heights[i] = np.max(interpoled_snip[2*alignwidth:]) - np.min(interpoled_snip[2*alignwidth:])
279 return aligned_snips, heights
281def pc(dataset):
282 """
283 Calculates the principal components of a dataset using the python module scikit-learn's principal component analysis
285 Parameters
286 ----------
287 dataset: ndarray
288 dataset of which the principal components are to be calculated.
289 twodimensional array of shape (observations, features)
291 Returns
292 -------
293 pc_comp: ndarray
294 principal components of the dataset
296 """
297 pca = PCA(n_components=10)
298 pc_comp = pca.fit_transform(dataset)
300 return pc_comp #, pca
302def chebyshev(dataset):
303 x = range(len(dataset[0]))
304 npol=5
305 p = np.zeros((len(dataset),npol+1))
306 for i,s in enumerate(dataset):
307 cheb = np.polynomial.chebyshev.Chebyshev.fit(x,s,npol)
308 p[i] = cheb.coef
310 return p #, pca
313def dbscan(pcs, events, eps, min_samples, takekm):
314 """
315 improve description, add parameter and returns
317 calculates clusters of high spatial density of the given observations in their feature space.
318 #For example, the first few principal components of the data could be used as features for the classification.
320 Parameters
321 ----------
322 pcs: ndarray
323 %TODO
324 shape(samples, features)
325 ...
327 Returns
328 -------
329 labels: ndarray
330 labels of the clusters of each observation
332 """
333 # pcs (samples, features)
334 # X (samples, features)
335 #try:
336 # X = pcs[:,:order]
337 #except:
338 # X = pcs[:,order]
339 X = pcs
341 # #############################################################################
342 # Compute DBSCAN
344 clusters = DBSCAN(eps, min_samples).fit(X)
345 labels = clusters.labels_
347 comp = clusters.components_
349 comp_means = np.zeros((len(np.unique(labels)) - 1,comp.shape[1]))
351 for i in range(len(np.unique(labels)) - 1):
352 comp_means[i] = np.mean(pcs[labels==i],axis=0)
354 return labels, comp_means
356def cluster_events(features, events, eps, min_samples, takekm, method='DBSCAN'):
357 """F
358 clusters the given events using the given feature space and the clustering algorithm of choice and appends the assigned cluster number to the event's properties.
360 Parameters
361 ----------
363 Returns
364 -------
366 """
367 ######################## function maybe could be even more generic, ? (dependant on datatype of "events" )
368 if method == 'DBSCAN':
369 labels, clusters = dbscan(features,events, eps, min_samples, takekm)
370 elif method == 'kMean':
371 pass
372 # To be implemented
373 #labels = kmeans([])
374 return labels, clusters
376class Peaklist(object):
377 def __init__(self, peaklist):
378 self.list = peaklist
379 self.lastofclass = {}
380 self.lastofclassx = {}
381 self.classesnearby = []
382 self.classesnearbyx = []
383 self.classesnearbypccl = []
384 self.classlist = []
385 self.classamount = 0
386 self.shapes = {}
388def connect_blocks(oldblock):
389 """
390 used to connect blocks.
391 transfers data from the previous analysis block to the current block
392 """
393 newblock = Peaklist([])
394 newblock.lastofclass = oldblock.lastofclass
395 newblock.lastofclassx = oldblock.lastofclassx
396 newblock.classesnearby = oldblock.classesnearby
397 newblock.classesnearbypccl = oldblock.classesnearbypccl
398 newblock.classesnearbyx = [clnearbyx - oldblock.len for clnearbyx in oldblock.classesnearbyx]
399 newblock.classamount = oldblock.classamount
400 newblock.len = oldblock.len
401 return newblock
403def alignclusterlabels(labels, peaklist, peaks, data='test'):
404 """
405 used to connect blocks.
406 changes the labels of clusters in the current block to fit with the labels of the previous block
407 """
409 # take first second of new peak data
410 overlapamount = len(peaks[0,peaks[0]<30000])
411 if overlapamount == 0:
412 return None
414 old_peaklist = copy.deepcopy(peaklist) #redundant
415 overlappeaks = copy.deepcopy(peaks[:,:overlapamount])
416 overlap_peaklist = copy.deepcopy(old_peaklist)
418 # delete cluster classifications of the overlap class
419 overlappeaks[3]=[-1]*len(overlappeaks[0])
421 # set nearby pc classes to -1
422 overlap_peaklist.classesnearbypccl = [-1]*len(overlap_peaklist.classesnearbypccl)
424 # create peak labels using ampwalk classifier
425 classified_overlap = ampwalkclassify3_refactor(overlappeaks,overlap_peaklist,glue=True)[0]
427 # for each class
428 for cl in np.unique(classified_overlap[4]):
430 # indexes of the peaks with current class by ampwalk classification
431 labelindex = np.where(classified_overlap[4] == cl)[0]
433 # pc clustering labels that were originally given to those peaks
434 label = labels[labelindex]
436 # index of a peak with the most common translation from ampwalk class to pc clustering class
437 labelindex = labelindex[np.where(label == stats.mode(label)[0])[0][0]]
439 # pc clustering label belonging to the class cl in the new block
440 newlabel = labels[labelindex]
442 try:
443 peaklist.classesnearbypccl[old_peaklist.classesnearby.index(cl)] = newlabel
444 except:
445 pass
447def ampwalkclassify3_refactor(peaks,peaklist,glue=False):
448 """
450 classifies peaks/EOD_events into different classes by their amplitude.
452 Takes list of peaks and list of properties of the list of the last analysis block
453 Classifies the single peaks in the direction of their occurence in time, based on their amplitude and
454 their previously assigned class based on their waveform (... using the method cluster_events on the
455 principal components of the snippets around the single peaks)
457 Method:
458 calculates differences in amplitude between the current peak and different amplitudeclasses that are nearby. creates new amplitudeclass if no class is close enough. creates no new class if the peaks's waveformclass is a noiseclass of the DBSCAN algorithm. Does not compare peaks of different Waveformclasses.
460 --can be used without prior waveformclasses, resulting in classification solely on the amplitude development
461 pcclclasses need to be set to the same class herefore, .... . not practical, but should be possible to
462 split up into more general functions
463 """
465 classamount = peaklist.classamount
466 lastofclass = peaklist.lastofclass
467 lastofclassx = peaklist.lastofclassx
468 a=0
469 elem = 0
470 thresholder = []
471 comperr = 1
472 classesnearby = peaklist.classesnearby
473 classesnearbyx = peaklist.classesnearbyx
474 classesnearbypccl = peaklist.classesnearbypccl
475 classes = np.zeros((len(peaks[0])))
476 pcclasses = peaks[3]
477 positions = peaks[0]
478 heights = peaks[2]
479 cl = 0
480 maxdistance = 30000 # Max distance to possibly belong to the same class
481 factor = 1.6 # factor by which a peak fits into a class, f.E: classheight = 1 , factor = 2 => peaks accepted in range (0.5,2)
482 c=0
484 # loop through all the new peaks
485 for peaknum, p in enumerate(peaks.T):
487 if len(lastofclass) == 0:
488 lastofclass[1] = deque()
489 lastofclassx[1] = deque()
490 lastofclass[1].append(heights[peaknum])
491 lastofclassx[1].append(positions[peaknum])
492 classesnearby.append(1)
493 classesnearbyx.append(-1)
494 classesnearbypccl.append(pcclasses[peaknum])
495 classes[peaknum] = 1
496 classamount += 1
497 continue
499 time1 = time.time()
501 # classes nearby only count if they are within maxdistance
502 for i, cl in enumerate(classesnearby):
503 if (positions[peaknum] - classesnearbyx[i]) > maxdistance:
504 classesnearby.pop(i)
505 classesnearbyx.pop(i)
506 classesnearbypccl.pop(i)
508 # compute mean isi of a class by taking the last 3 pulses in that class
509 lastofclassisis = []
510 for i in classesnearby:
511 lastofclassisis.append(np.median(np.diff(lastofclassx[i])))
513 meanisi = np.mean(lastofclassisis)
515 # stop adding to a class if 40 isis have passed
516 if 32000 > 40*meanisi> 6000:
517 maxdistance = 20*meanisi
519 cl = 0 # 'No class'
520 comperr = 100
521 clnrby = np.unique(classesnearby)
523 last_err = 1000
525 # TODO this assigns peaks with no class to the last clase if there are multiple candidates.
526 # can I fix this?
527 for i in clnrby:
529 # if the class of the current peak is equal to the current evaluated class or current peak has no class
530 if classesnearbypccl[classesnearby.index(i)] == pcclasses[peaknum]: #or glue==True: #pcclasses[peaknum] == -1: or
531 classmean = np.mean(lastofclass[i]) #mean hight of class
533 # difference between current peak hight and average class hight
534 logerror = np.abs(np.log2(heights[peaknum])-np.log2(classmean))
535 logthresh = np.log2(factor)
536 relerror = logerror
538 # if the peak hights are similar
539 if logerror < logthresh: ## SameClass-Condition
540 # if the peaks are close together in distance (20*isi)
541 if relerror < comperr and (positions[peaknum]-classesnearbyx[classesnearby.index(i)])<maxdistance:
542 # keep the same class (or in case of no class assign that class)
543 cl = i
544 comperr = relerror
545 time2 = time.time()
546 time1 = time.time()
548 # if a pc class is assigned to the peak
549 if pcclasses[peaknum] != -1:
550 # if the class is kept
551 if cl != 0 :
552 # append this peak to the peaklist for the right class (only keep last 3 peaks)
553 if len(lastofclass[cl]) >= 3:
554 lastofclass[cl].popleft()
555 if len(lastofclassx[cl]) >= 3:
556 lastofclassx[cl].popleft()
557 lastofclass[cl].append(heights[peaknum])
558 lastofclassx[cl].append(positions[peaknum])
559 classes[peaknum] = cl
560 else:
561 # if the class if not the same as any of the existing classes, create new class
562 cl = classamount+1
563 classamount = cl
564 lastofclass[cl] = deque()
565 lastofclassx[cl] = deque()
566 lastofclass[cl].append(heights[peaknum])
567 lastofclassx[cl].append(positions[peaknum])
568 classes[peaknum] = cl
569 classesnearby.append(cl)
570 classesnearbyx.append(positions[peaknum])
571 classesnearbypccl.append(pcclasses[peaknum])
573 # if there are more than 12 classes, delete the class that is furthest away in proximity
574 if len(classesnearby) >= 12: #kacke implementiert?
575 minind = classesnearbyx.index(min(classesnearbyx))
576 del lastofclass[classesnearby[minind]]
577 del lastofclassx[classesnearby[minind]]
578 classesnearby.pop(minind)
579 classesnearbyx.pop(minind)
580 classesnearbypccl.pop(minind)
582 # add position and class to peaklist
583 try:
584 ind=classesnearby.index(cl)
585 classesnearbyx[ind] = positions[peaknum]
586 except ValueError:
587 classesnearby.append(cl)
588 classesnearbyx.append(positions[peaknum])
589 classesnearbypccl.append(pcclasses[peaknum])
591 # if no pc class is assigned to the peak
592 elif glue == True:
593 # if a class is assigned through the peak amp method
594 if cl != 0:
595 # add this class to the peak
596 classes[peaknum] = cl
597 else:
598 # create new class
599 cl = classamount+1
600 classamount = cl
601 lastofclass[cl] = deque()
602 lastofclassx[cl] = deque()
603 lastofclass[cl].append(heights[peaknum])
604 lastofclassx[cl].append(positions[peaknum])
605 classes[peaknum] = cl
606 classesnearby.append(cl)
607 classesnearbyx.append(positions[peaknum])
608 classesnearbypccl.append(pcclasses[peaknum])
609 if len(classesnearby) >= 12: #kacke implementiert?
610 minind = classesnearbyx.index(min(classesnearbyx))
611 del lastofclass[classesnearby[minind]]
612 del lastofclassx[classesnearby[minind]]
613 classesnearby.pop(minind)
614 classesnearbyx.pop(minind)
615 classesnearbypccl.pop(minind)
616 try:
617 ind=classesnearby.index(cl)
618 classesnearbyx[ind] = positions[peaknum]
619 except ValueError:
620 classesnearby.append(cl)
621 classesnearbyx.append(positions[peaknum])
622 classesnearbypccl.append(pcclasses[peaknum])
623 peaklist.lastofclass = lastofclass
624 peaklist.lastofclassx = lastofclassx
625 peaklist.classesnearby = classesnearby
626 peaklist.classesnearbyx = classesnearbyx
627 peaklist.classesnearbypccl = classesnearbypccl
628 peaklist.classlist = classes # np.vectorize(lambda peak: peak.cl, otypes=[object])(peaklist.list)
629 peaklist.classamount = classamount
630 peaks = np.append(peaks,classes[None,:], axis = 0)
632 return peaks, peaklist
634def discard_wave_pulses(peaks, data):
635 """
636 discards events from a pulse_event list which are unusally wide (wider than a tenth of the inter pulse interval), which indicates a wave-type EOD instead of a pulse type
637 """
638 deleteclasses = []
639 for cl in np.unique(peaks[3]):
640 peaksofclass = peaks[:,peaks[3] == cl]
641 isi = np.diff(peaksofclass[0])
642 isi_mean = np.mean(isi)
643 widepeaks = 0
644 isi_tenth_area = lambda x, isi : np.arange(np.floor(x-0.1*isi),np.ceil(x+0.1*isi),1, dtype=int)
645 for p in peaksofclass.T:
646 data = np.array(data)
647 try:
648 for dp_around in data[isi_tenth_area(p[0],isi_mean)]:
649 if dp_around <= p[1]-p[2]:
650 break
651 except (IndexError,ValueError) as e:
652 pass
653 else:
654 widepeaks+=1
655 if widepeaks > len(peaksofclass)*0.5:
656 deleteclasses.append(cl)
657 for cl in deleteclasses:
658 peaks = peaks[:,peaks[3]!=cl]
659 return peaks
661def plot_events_on_data(peaks, data, colors):
662 """
663 plots the detected events onto the data timeseries. If the events are classified, the classes are plotted in different colors and the class -1 (not belonging to a cluster) is plotted in black
664 """
665 plt.plot(range(len(data)),data, color = 'black')
666 if len(peaks)>3:
667 classlist = np.array(peaks[3],dtype=int)
668 if len(peaks) > 4:
669 classlist = np.array(peaks[4],dtype=int)
670 #classlist=labels
671 cmap = plt.get_cmap('jet')
673 for cl in np.unique(classlist):
675 if cl == -1:
676 color = 'black'
677 else:
678 color = colors[cl]
679 peaksofclass = peaks[:,classlist == cl]
680 plt.plot(peaksofclass[0],peaksofclass[1], '.', color = color, ms =20, label=cl)
681 plt.legend()
682 else:
683 plt.scatter(peaks[0],peaks[1], color = 'red')
684 plt.show()
685 plt.close()
687def discard_short_classes(events, minlen):
688 """
689 returns all events despite events which are in classes with less than minlen members
690 """
692 u, c = np.unique(events[-1],return_counts=True)
694 smallclasses = u[c<minlen]
695 classlist = events[-1]
697 delete = np.zeros(len(classlist))
699 for cl in smallclasses:
700 delete[classlist == cl] = 1
701 events = events[:,delete != 1]
703 return events
705def path_leaf(path):
706 ntpath.basename("a/b/c")
707 head, tail = ntpath.split(path)
708 return tail or ntpath.basename(head)
710def save_EOD_events_to_npmmp(EOD_Events,eods_len,startblock,datasavepath,mmpname='eods.npmmp'):
711 n_EOD_Events = len(EOD_Events[0])
712 savepath = datasavepath+"/"+mmpname
713 if startblock:
714 eods = np.memmap(savepath,
715 dtype='float64', mode='w+',
716 shape=(4,n_EOD_Events), order = 'F')
717 else:
718 dtypesize = 8#4 #float32 is 32bit = >4< bytes long ---changed to float64 -> 8bit
719 eods = np.memmap(savepath, dtype=
720 'float64', mode='r+', offset = dtypesize*eods_len*4,
721 shape=(4,n_EOD_Events), order = 'F')
722 eods[:] = EOD_Events
725def create_threshold_array(data,window,threshold):
726 thr_array = np.zeros(data.shape)
727 for i in range(int(len(data)/window)):
728 thr_array[i*window:(i+1)*window] = np.std(data[i*window:(i+1)*window])*4
729 thr_array[thr_array<threshold] = threshold
730 return thr_array
732def alignlabels(labels,clusters,old_labels,old_clusters,maxlabel):
734 old_labels = old_labels[old_labels!=-1]
736 labels_new = -1*np.ones(labels.shape)
737 newclass = maxlabel
739 for curlabel, cluster in enumerate(clusters):
740 n = np.linalg.norm(old_clusters-cluster,axis=1)
742 if np.min(n) < 0.1:
743 labels_new[labels==curlabel] = old_labels[np.argmin(n)]
744 else:
745 labels_new[labels==curlabel] = newclass
746 newclass = newclass + 1
748 return labels_new
751def analyze_pulse_data(filepath, deltat=10, thresh=0.04, starttime = 0, endtime = 0, cluster_thresh = 0.1, savepath = False,save=False, npmmp = False, plot_eods=False,plot_features=False,plot_steps=False, plot_result=False):
753 """
754 analyzes timeseries of a pulse fish EOD recording
756 Parameters
757 ----------
758 filepath: WAV-file with the recorded timeseries
760 deltat: int, optional
761 time for a single analysisblock (recommended less than a minute, due to principal component clustering on the EOD-waveforms)
763 thresh: float, optional
764 minimum threshold for the peakdetection (if computing frequencies recommended a tiny bit lower than the wished threshold, and instead discard the EOD below the wished threshold after computing the frequencies for each EOD.)
766 starttime: int or, str of int, optional
767 time into the data from where to start the analysis, seconds.
769 endtime: int or str of int, optional
770 time into the data where to end the analysis, seconds, larger than starttime.
772 cluster_thresh: float, optional
773 threshold that decides the cluster density of the EOD waveform features.
775 savepath = Boolean or str, optional
776 path to where to save results and intermediate result, only needed if save or npmmp is True.
777 string to specify a relative path to the directory where results and intermediate results will bed
778 or False to use preset savepath, which is ~/filepath/
779 or True to specify savepath as input when the script is running
781 save: Boolean, optional
782 True to save the results into a npy file at the savepath
784 npmmp: Boolean, optional
785 True to save intermediate results into a npmmp at the savepath, only recommended in case of memory overflow
787 plot_steps: Boolean, optional
788 True to plot the results of each analysis block
790 plot_results: Boolean, optional
791 True to plot the results of the final analysis. Not recommended for long recordings due to %TODO
793 plot_eods: Boolean, optional
794 True to plot the EOD waveforms for each analysis block
796 plot_features: Boolean, optional
797 True to plot the EOD waveform features for each analysis block
799 Returns
800 -------
801 eods: numpy array
802 2D numpy array. first axis: attributes of an EOD (x (datapoints), y (recorded voltage), height (difference from maximum to minimum), class), second axis: EODs in chronological order.
803 """
805 # parameters for the analysis
806 thresh = 0.04 # minimal threshold for peakdetection
807 peakwidth = 20 # width of a peak and minimal distance between two EODs
808 # basic parameters for thunderfish.dataloader.DataLoader
809 verbose = 0
810 channel = 0
811 ultimate_threshold = thresh+0.01
812 startblock = 0
813 starttime = int(starttime)
814 endtime = int(endtime)
815 timegiven = False
816 if endtime > starttime>=0:
817 timegiven = True
818 peaks = np.array([])
819 troughs = np.array([])
820 filename = path_leaf(filepath)
821 eods_len = 0
822 if savepath==False:
823 datasavepath = filename[:-4]
824 elif savepath==True:
825 datasavepath = input('With the option npmmp enabled, a numpy memmap will be saved to: ').lower()
826 else: datasavepath=savepath
828 if save and (os.path.exists(datasavepath+"/eods8_"+filename[:-3]+"npy") or os.path.exists(datasavepath+"/eods5_"+filename[:-3]+"npy")):
829 print('there already exists an analyzed file, aborting. Change the code if you don\'t want to abort')
830 quit()
831 if npmmp:
832 #proceed = input('With the option npmmp enabled, a numpy memmap will be saved to ' + datasavepath + '. continue? [y/n] ').lower()
833 proceed = 'y'
834 if proceed != 'y':
835 quit()
836 # starting analysis
837 with DataLoader(filepath, deltat, 0.0, verbose) as data:
839 rate = data.rate
841 # selected time interval
842 if timegiven == True:
843 parttime1 = starttime*rate
844 parttime2 = endtime*rate
845 data = data[parttime1:parttime2,channel]
847 #split data into blocks
848 nblock = int(deltat*rate)
849 if len(data)%nblock != 0:
850 blockamount = len(data)//nblock + 1
851 else:
852 blockamount = len(data)//nblock
853 #fish = ProgressFish(total = blockamount)
855 pca_cur = 0
856 progress = 0
858 for idx in range(0, blockamount):
859 print('BLOCK %i/%i'%(idx+1,blockamount))
861 blockdata = data[idx*nblock:(idx+1)*nblock,channel]
862 if progress < (idx*100 //blockamount):
863 progress = (idx*100)//blockamount
864 progressstr = ' Filestatus: '
866 # fish.animate(amount = idx, dexextra = progressstr)
867 # delete peaks under absolute threshold
870 #thresh_array = create_threshold_array(blockdata,30000,thresh)
871 pk, tr = detect_peaks(blockdata, thresh)
872 troughs = tr
874 if len(pk) > 3:
876 peaks = makeeventlist(pk,tr,blockdata,peakwidth)
877 peakindices, peakx, peakh = discardnearbyevents(peaks[0],peaks[1],peakwidth)
878 peaks = peaks[:,peakindices]
880 if len(peaks) > 0:
882 #if idx > startblock:
883 # # adding a new block as copy of old list, only difference is peak indexing as it refers to last block
884 # peaklist = connect_blocks(peaklist)
885 #else:
886 # peaklist = Peaklist([])
888 aligned_snips, snip_heights = cut_snippets(blockdata,peaks[0], 30, int_met = "cubic", int_fact = 10,max_offset = 20)
890 pols = chebyshev(aligned_snips)
892 feats = np.zeros((pols.shape[0],pols.shape[1]+1))
893 feats[:,:6] = pols
894 feats[:,-1] = snip_heights*0.1
895 #pcs, pca_cur = pc(aligned_snips) #pc_refactor(aligned_snips)
897 minpeaks = 3 if deltat < 2 else 10
899 labels, clusters = cluster_events(feats, peaks, cluster_thresh, minpeaks, False, method = 'DBSCAN')
900 peaks = np.append(peaks,[labels], axis = 0)
902 if idx > startblock:
903 # instead of the peaklist I would have to add the previous cluster means
904 # alignclusterlabels(labels, peaklist, peaks,data=blockdata)
905 peaks[-1] = alignlabels(labels,clusters,old_labels,old_clusters,maxlabel)
907 old_labels = np.unique(peaks[-1])
908 old_clusters = clusters
909 #I would want peaks updated here to have the right pc classes as well..
911 #peaks, peaklist = ampwalkclassify3_refactor(peaks, peaklist) # classification by amplitude
913 minlen = 5
914 peaks = discard_short_classes(peaks, minlen)
916 if len(peaks[0]) > 0:
917 peaks = discard_wave_pulses(peaks, blockdata)
918 # delete peaks under absolute threshold
919 #thresh_array = create_threshold_array(blockdata,30000)
920 #peaks = peaks[:,peaks[1]>thresh_array[list(map(int,peaks[0]))]]
922 cmap = plt.get_cmap('jet')
923 colors = cmap(np.linspace(0, 1.0, 10))
925 if plot_steps == True:
926 plot_events_on_data(peaks, blockdata, colors)
927 pass
929 for lab in np.unique(labels):
931 if lab == -1:
932 c = 'k'
933 z=-1
934 else:
935 c=colors[lab]
936 z=1
937 if plot_eods==True:
938 plt.plot(range(aligned_snips.shape[1]),np.transpose(aligned_snips[labels == lab]),color=c,zorder=z,label=lab)
940 if plot_eods==True:
941 plt.title('Detected and classified EODs')
942 plt.xlabel('time [ms]')
943 plt.ylabel('signal (normalized)')
945 phandles, plabels = plt.gca().get_legend_handles_labels()
946 by_label = dict(zip(plabels, phandles))
947 plt.legend(by_label.values(), by_label.keys())
948 plt.show()
950 for lab in np.unique(labels):
952 if lab == -1:
953 c = 'k'
954 z = -1
955 else:
956 c = colors[lab]
957 z=1
959 if plot_features==True:
960 plt.plot(np.squeeze(np.transpose(feats[labels == lab])),color=c,zorder=z,label=lab)
962 if plot_features==True:
963 plt.title('EOD Features')
964 plt.xlabel('feature [#]')
965 plt.ylabel('value [a.u.]')
967 phandles, plabels = plt.gca().get_legend_handles_labels()
968 by_label = dict(zip(plabels, phandles))
969 plt.legend(by_label.values(), by_label.keys())
970 plt.show()
972 #peaklist.len = nblock
973 worldpeaks = np.copy(peaks)
974 worldpeaks[0] = worldpeaks[0] + (idx*nblock)
975 # delete the classification that only considers wave shape.
976 #thisblock_eods = np.delete(worldpeaks,3,0)
977 thisblock_eods = worldpeaks
979 if idx == startblock:
980 maxlabel = np.max(peaks[-1]) + 1
981 else:
982 maxlabel = np.max([maxlabel, (np.max(peaks[-1]) + 1)])
984 if npmmp:
985 if idx == startblock:
986 if not os.path.exists(datasavepath):
987 os.makedirs(datasavepath)
988 mmpname = "eods_"+filename[:-3]+"npmmp"
989 # save the peaks of the current buffered part to a numpy-memmap on the disk
990 save_EOD_events_to_npmmp(thisblock_eods,eods_len,idx==startblock,datasavepath,mmpname)
991 eods_len += len(thisblock_eods[0])
992 else:
993 if idx > 0:
994 all_eods = np.concatenate((all_eods,thisblock_eods),axis = 1)
995 else:
996 all_eods = thisblock_eods
997 if plot_steps == True:
998 print('FINAL RESULTS')
999 plot_events_on_data(all_eods, data[:,channel], colors)
1000 #plot_events_on_data(all_eods,data)
1001 print('returnes analyzed EODS. Calculate frequencies using all of these but discard the data from the EODS within the lowest few percent of amplitude')
1003 if npmmp:
1004 all_eods = np.memmap(datasavepath+'/'+mmpname, dtype='float64', mode='r+', shape=(4,eods_len), order = 'F')
1005 if save == 1:
1006 path = filename[:-4]+"/"
1007 if not os.path.exists(path):
1008 os.makedirs(path)
1009 if eods_len > 0:
1010 np.save(datasavepath+"/eods8_"+filename[:-3]+"npy", all_eods)
1011 print('Saved!')
1012 else:
1013 print('not saved')
1014 return all_eods
1017def main():
1019 eods = analyze_pulse_data(sys.argv[1], save=True, npmmp=True)
1020 print(eods)
1022if __name__ == '__main__':
1023 main()