Coverage for src/thunderfish/pulsetracker.py: 0%
537 statements
« prev ^ index » next coverage.py v7.5.0, created at 2024-04-29 16:21 +0000
« prev ^ index » next coverage.py v7.5.0, created at 2024-04-29 16:21 +0000
1"""
2by Dexter Frueh
3"""
5import os
6import sys
7import ntpath
8import time
9import copy
10from shutil import copy2
11from collections import deque
12import numpy as np
13from scipy import stats
14from scipy import signal
15from scipy import optimize
16import matplotlib
17#from fish import ProgressFish
18import matplotlib.pyplot as plt
19from scipy.interpolate import interp1d
20from sklearn.preprocessing import StandardScaler
21from sklearn.decomposition import PCA
22from sklearn.cluster import DBSCAN
23from sklearn.cluster import AgglomerativeClustering
24from thunderlab.dataloader import DataLoader
25from thunderlab.eventdetection import detect_peaks
27def makeeventlist(main_event_positions,side_event_positions,data,event_width=20):
28 """
29 Generate array of events that might be EODs of a pulse-type fish, using the location of peaks and troughs,
30 the data and an optional width of an supposed EOD-event.
31 The generated event-array contains location and height of such events.
32 The height of the events is calculated by its height-difference to nearby troughs and main events that have no side events in a range closer than event_width are discarded and not considered as EOD event.
34 Parameters
35 ----------
36 main_event_positions: array of int or float
37 Positions of the detected main events in the data time series. Either peaks or troughs.
38 side_event_positions: array of int or float
39 Positions of the detected side events in the data time series. The complimentary event to the main events.
40 data: array of float
41 The given data.
42 event_width: int or float, optional
44 Returns
45 -------
46 EOD_events: ndarray
47 2D array containing data with 'float' type, size (number_of_properties = 3, number_of_events).
48 Generated and combined data of the detected events in an array with arrays of x, y and height along the first axis.
50 """
51 mainfirst = int((min(main_event_positions[0],side_event_positions[0])<side_event_positions[0])) # determines if there is a peak or through first. Evaluates to 1 if there is a peak first.
52 main_x = main_event_positions
53 main_y = data[main_event_positions]
54 # empty placeholders, filled in the next step while iterating over the properties of single main
55 main_h = np.zeros(len(main_event_positions))
56 main_real = np.ones(len(main_event_positions))
57 # iteration over the properties of the single main
58 for ind,(x, y, h, r) in enumerate(np.nditer([main_x, main_y, main_h, main_real], op_flags=[["readonly"],['readonly'],['readwrite'],['readwrite']])):
59 l_side_ind = ind - mainfirst
60 r_side_ind = l_side_ind + 1
61 try:
62 r_side_x = side_event_positions[r_side_ind]
63 r_distance = r_side_x - x
64 r_side_y = data[r_side_x]
65 except:
66 pass
67 try:
68 l_side_x = side_event_positions[l_side_ind]
69 l_distance = x - l_side_x
70 l_side_y = data[l_side_x]
71 except:
72 pass # ignore left or rightmost events which throw IndexError
73 # calculate distances to the two side events next to the main event and mark all events where the next side events are not closer than maximum event_width as unreal. If an event might be an EOD, then calculate its height.
74 if l_side_ind >= 0 and r_side_ind < len(side_event_positions):
75 if min((l_distance),(r_distance)) > event_width:
76 r[...] = False
77 elif max((l_distance),(r_distance)) <= event_width:
78 h[...] = max(abs(y-l_side_y),abs(y-r_side_y)) #calculated using absolutes in case of for example troughs instead of peaks as main events
79 else:
80 if (l_distance)<(r_distance): # evaluated only when exactly one side event is out of reach of the event width. Then the closer event will be the correct event
81 h[...] = abs(y-l_side_y)
82 else:
83 h[...] = abs(y-r_side_y)
84 # check corner cases
85 elif l_side_ind == -1:
86 if r_distance > event_width:
87 r[...] = False
88 else:
89 h[...] = y-r_side_y
90 elif r_side_ind == len(side_event_positions):
91 if l_distance> event_width:
92 r[...] = False
93 else:
94 h[...] = y-l_side_y
95 # generate return array and discard all events that are not marked as real
96 EOD_events = np.array([main_x, main_y, main_h], dtype = float)[:,main_real==1]
97 return EOD_events
99def discardnearbyevents(event_locations, event_heights, min_distance):
100 """
101 Given a number of events with given location and heights, returns a selection
102 of these events where no event is closer than eventwidth to the next event.
103 Among neighboring events closer than eventwidth the event with smaller height
104 is discarded.
105 Used to discard sidepeaks in detected multiple peaks of single EOD-pulses and
106 only keep the largest event_height and the corresponding location as
107 representative of the whole EOD pulse.
109 Parameters
110 ----------
111 event_locations: array of int or float
112 Positions of the given events in the data time series.
113 event_heights: array of int or float
114 Heights of the given events, indices refer to the same events as in
115 event_locations.
116 min_distance: int or float
117 minimal distance between events before one of the events gets discarded.
119 Returns
120 -------
121 event_locations: array of int or float
122 Positions of the returned events in the data time series.
123 event_heights: array of int or float
124 Heights of the returned events, indices refer to the same events as in
125 event_locations.
127 """
128 unchanged = False
129 counter = 0
130 event_indices = np.arange(0,len(event_locations)+1,1)
132 while unchanged == False:# and counter<=200:
133 x_diffs = np.diff(event_locations)
134 events_delete = np.zeros(len(event_locations))
136 for i, diff in enumerate(x_diffs):
138 if diff < min_distance:
140 if event_heights[i+1] > event_heights[i] :
141 events_delete[i] = 1
142 else:
143 events_delete[i+1] = 1
145 event_heights = event_heights[events_delete!=1]
146 event_locations = event_locations[events_delete!=1]
147 event_indices = event_indices[np.where(events_delete!=1)[0]]
148 if np.count_nonzero(events_delete)==0:
149 unchanged = True
150 counter += 1
151 if counter > 2000:
152 print('Warning: unusual many discarding steps needed, unusually dense events')
153 pass
154 return event_indices, event_locations, event_heights
156def crosscorrelation(sig, data):
157 'returns crosscorrelation of two arrays, the first array should have a length equal to or smaller than the second array.'
158 return signal.fftconvolve(data, sig[::-1], mode='valid')
160def interpol(data, kind):
161 """
162 interpolates the given data using scipy interpolation python package
164 Parameters
165 ----------
166 data: array
168 kind: string or int
169 (‘linear’, ‘nearest’, ‘zero’, ‘slinear’, ‘quadratic’, ‘cubic’, ‘previous’, ‘next’), or integer of order of spline interpolation to be used
171 Returns
172 -------
173 interpolation: function
175 """
176 width = len(data)
177 x = np.linspace(0, width-1, num = width, endpoint = True)
178 #return interp1d(x, data[0:width], kind, assume_sorted=True)
179 return interp1d(x, data[0:width], kind)
181def interpolated_array(data, kind, int_fact):
182 """
183 returns an interpolated array of the given dataarray.
185 Parameters
186 ----------
187 data: array
189 kind: string or int
190 (‘linear’, ‘nearest’, ‘zero’, ‘slinear’, ‘quadratic’, ‘cubic’, ‘previous’, ‘next’), or integer of order of spline interpolation to be used
192 int_fact: int
193 factor by which the interpolated array is larger than the original array
195 Returns
196 -------
197 interpolated array: array
199 """
200 return interpol(data,kind)(np.arange(0, len(data)-1, 1/int_fact))
202def cut_snippets(data,event_locations,cut_width,int_met="linear",int_fact=10,max_offset = 1000000):
203 """
204 cuts intervals from a data array, interpolates and aligns them and returns them in a list
206 TODO: ALIGN THEM TO CAUSE LEAST SQUARE ERROR
208 Parameters
209 ----------
210 data: array
212 event_locations: array
214 cut_width: [int, int]
215 lower and upper limit of the intervals relative to the event locations.
216 f.e. [-15,15] indicates an interval of 30 datapoints around each event location
218 int_met: string or int
219 method of interpolation. (‘linear’, ‘nearest’, ‘zero’, ‘slinear’, ‘quadratic’, ‘cubic’, ‘previous’, ‘next’), or integer of order of spline interpolation to be used
221 int_fact: int
222 factor by which the interpolated array is larger than the original
224 max_offset: float
225 maximal offset by which the interpolated intervals can be moved to be aligned with each other. offset relative to the datapoints of the original data.
227 Returns
228 -------
229 aligned_snips: twodimensional nparray
230 the processed intervals (interval#,intervallen)
232 """
233 snippets = []
234 heights = np.zeros(len(event_locations))
235 cut_width = [-cut_width, cut_width]
236 #alignwidth = int(np.ceil((max_offset) * int_fact))
237 alignwidth = 50
239 for pos in event_locations.astype('int'):
240 snippets.append(data[pos+cut_width[0]:pos+cut_width[1]])
241 ipoled_snips = np.empty((len(snippets), (cut_width[1]-cut_width[0])*int_fact-int_fact))
243 for i, snip in enumerate(snippets):
244 if len(snip) < ((cut_width[1]-cut_width[0])):
245 if i == 0:
246 snip = np.concatenate([np.zeros([((cut_width[1]-cut_width[0]) - len(snip))]),np.array(snip)])
247 if i == len(snippets):
248 snip = np.concatenate([snip, np.zeros([((cut_width[1]-cut_width[0])-len(snip))])])
249 else:
250 snip = np.zeros([(cut_width[1]-cut_width[0])])
252 #f_interpoled = interpol(snip, int_met) #if len(snip) > 0 else np.zeros([(cut_width[1]-cut_width[0]-1)*int_fact ])
253 interpoled_snip = interpolated_array(snip, int_met, 10)#f_interpoled(np.arange(0, len(snip)-1, 1/int_fact))
254 intsnipheight = np.max(interpoled_snip) - np.min(interpoled_snip)
257 if intsnipheight == 0:
258 intsnipheight = 1
260 interpoled_snip = (interpoled_snip - max(interpoled_snip))* 1/intsnipheight
261 ipoled_snips[i] = interpoled_snip
263 mean = np.mean(ipoled_snips, axis = 0)
265 aligned_snips = np.empty((len(snippets), (cut_width[1]-cut_width[0])* int_fact-(2*alignwidth)-int_fact))
267 for i, interpoled_snip in enumerate(ipoled_snips):
268 cc = crosscorrelation(interpoled_snip[alignwidth:-alignwidth], mean)
269 #cc = crosscorrelation(interpoled_snip[15 + 10*-cut_width[0]-10*7:-15+ -10*cut_width[1]+ 31], mean[10*-cut_width[0]-10*7:-10*cut_width[1]+31])
270 offset = -alignwidth + np.argmax(cc)
271 aligned_snip = interpoled_snip[alignwidth-offset:-alignwidth-offset] if offset != -alignwidth else interpoled_snip[2*alignwidth:]
272 if len(aligned_snip[~np.isnan(aligned_snip)])>0:
273 aligned_snips[i] = aligned_snip
274 try:
275 heights[i] = np.max(interpoled_snip[alignwidth-offset:-alignwidth-offset]) - np.min(interpoled_snip[alignwidth-offset:-alignwidth-offset])
276 except:
277 heights[i] = np.max(interpoled_snip[2*alignwidth:]) - np.min(interpoled_snip[2*alignwidth:])
278 return aligned_snips, heights
280def pc(dataset):
281 """
282 Calculates the principal components of a dataset using the python module scikit-learn's principal component analysis
284 Parameters
285 ----------
286 dataset: ndarray
287 dataset of which the principal components are to be calculated.
288 twodimensional array of shape (observations, features)
290 Returns
291 -------
292 pc_comp: ndarray
293 principal components of the dataset
295 """
296 pca = PCA(n_components=10)
297 pc_comp = pca.fit_transform(dataset)
299 return pc_comp #, pca
301def chebyshev(dataset):
302 x = range(len(dataset[0]))
303 npol=5
304 p = np.zeros((len(dataset),npol+1))
305 for i,s in enumerate(dataset):
306 cheb = np.polynomial.chebyshev.Chebyshev.fit(x,s,npol)
307 p[i] = cheb.coef
309 return p #, pca
312def dbscan(pcs, events, eps, min_samples, takekm):
313 """
314 improve description, add parameter and returns
316 calculates clusters of high spatial density of the given observations in their feature space.
317 #For example, the first few principal components of the data could be used as features for the classification.
319 Parameters
320 ----------
321 pcs: ndarray
322 %TODO
323 shape(samples, features)
324 ...
326 Returns
327 -------
328 labels: ndarray
329 labels of the clusters of each observation
331 """
332 # pcs (samples, features)
333 # X (samples, features)
334 #try:
335 # X = pcs[:,:order]
336 #except:
337 # X = pcs[:,order]
338 X = pcs
340 # #############################################################################
341 # Compute DBSCAN
343 clusters = DBSCAN(eps, min_samples).fit(X)
344 labels = clusters.labels_
346 comp = clusters.components_
348 comp_means = np.zeros((len(np.unique(labels)) - 1,comp.shape[1]))
350 for i in range(len(np.unique(labels)) - 1):
351 comp_means[i] = np.mean(pcs[labels==i],axis=0)
353 return labels, comp_means
355def cluster_events(features, events, eps, min_samples, takekm, method='DBSCAN'):
356 """F
357 clusters the given events using the given feature space and the clustering algorithm of choice and appends the assigned cluster number to the event's properties.
359 Parameters
360 ----------
362 Returns
363 -------
365 """
366 ######################## function maybe could be even more generic, ? (dependant on datatype of "events" )
367 if method == 'DBSCAN':
368 labels, clusters = dbscan(features,events, eps, min_samples, takekm)
369 elif method == 'kMean':
370 pass
371 # To be implemented
372 #labels = kmeans([])
373 return labels, clusters
375class Peaklist(object):
376 def __init__(self, peaklist):
377 self.list = peaklist
378 self.lastofclass = {}
379 self.lastofclassx = {}
380 self.classesnearby = []
381 self.classesnearbyx = []
382 self.classesnearbypccl = []
383 self.classlist = []
384 self.classamount = 0
385 self.shapes = {}
387def connect_blocks(oldblock):
388 """
389 used to connect blocks.
390 transfers data from the previous analysis block to the current block
391 """
392 newblock = Peaklist([])
393 newblock.lastofclass = oldblock.lastofclass
394 newblock.lastofclassx = oldblock.lastofclassx
395 newblock.classesnearby = oldblock.classesnearby
396 newblock.classesnearbypccl = oldblock.classesnearbypccl
397 newblock.classesnearbyx = [clnearbyx - oldblock.len for clnearbyx in oldblock.classesnearbyx]
398 newblock.classamount = oldblock.classamount
399 newblock.len = oldblock.len
400 return newblock
402def alignclusterlabels(labels, peaklist, peaks, data='test'):
403 """
404 used to connect blocks.
405 changes the labels of clusters in the current block to fit with the labels of the previous block
406 """
408 # take first second of new peak data
409 overlapamount = len(peaks[0,peaks[0]<30000])
410 if overlapamount == 0:
411 return None
413 old_peaklist = copy.deepcopy(peaklist) #redundant
414 overlappeaks = copy.deepcopy(peaks[:,:overlapamount])
415 overlap_peaklist = copy.deepcopy(old_peaklist)
417 # delete cluster classifications of the overlap class
418 overlappeaks[3]=[-1]*len(overlappeaks[0])
420 # set nearby pc classes to -1
421 overlap_peaklist.classesnearbypccl = [-1]*len(overlap_peaklist.classesnearbypccl)
423 # create peak labels using ampwalk classifier
424 classified_overlap = ampwalkclassify3_refactor(overlappeaks,overlap_peaklist,glue=True)[0]
426 # for each class
427 for cl in np.unique(classified_overlap[4]):
429 # indexes of the peaks with current class by ampwalk classification
430 labelindex = np.where(classified_overlap[4] == cl)[0]
432 # pc clustering labels that were originally given to those peaks
433 label = labels[labelindex]
435 # index of a peak with the most common translation from ampwalk class to pc clustering class
436 labelindex = labelindex[np.where(label == stats.mode(label)[0])[0][0]]
438 # pc clustering label belonging to the class cl in the new block
439 newlabel = labels[labelindex]
441 try:
442 peaklist.classesnearbypccl[old_peaklist.classesnearby.index(cl)] = newlabel
443 except:
444 pass
446def ampwalkclassify3_refactor(peaks,peaklist,glue=False):
447 """
449 classifies peaks/EOD_events into different classes by their amplitude.
451 Takes list of peaks and list of properties of the list of the last analysis block
452 Classifies the single peaks in the direction of their occurence in time, based on their amplitude and
453 their previously assigned class based on their waveform (... using the method cluster_events on the
454 principal components of the snippets around the single peaks)
456 Method:
457 calculates differences in amplitude between the current peak and different amplitudeclasses that are nearby. creates new amplitudeclass if no class is close enough. creates no new class if the peaks's waveformclass is a noiseclass of the DBSCAN algorithm. Does not compare peaks of different Waveformclasses.
459 --can be used without prior waveformclasses, resulting in classification solely on the amplitude development
460 pcclclasses need to be set to the same class herefore, .... . not practical, but should be possible to
461 split up into more general functions
462 """
464 classamount = peaklist.classamount
465 lastofclass = peaklist.lastofclass
466 lastofclassx = peaklist.lastofclassx
467 a=0
468 elem = 0
469 thresholder = []
470 comperr = 1
471 classesnearby = peaklist.classesnearby
472 classesnearbyx = peaklist.classesnearbyx
473 classesnearbypccl = peaklist.classesnearbypccl
474 classes = np.zeros((len(peaks[0])))
475 pcclasses = peaks[3]
476 positions = peaks[0]
477 heights = peaks[2]
478 cl = 0
479 maxdistance = 30000 # Max distance to possibly belong to the same class
480 factor = 1.6 # factor by which a peak fits into a class, f.E: classheight = 1 , factor = 2 => peaks accepted in range (0.5,2)
481 c=0
483 # loop through all the new peaks
484 for peaknum, p in enumerate(peaks.T):
486 if len(lastofclass) == 0:
487 lastofclass[1] = deque()
488 lastofclassx[1] = deque()
489 lastofclass[1].append(heights[peaknum])
490 lastofclassx[1].append(positions[peaknum])
491 classesnearby.append(1)
492 classesnearbyx.append(-1)
493 classesnearbypccl.append(pcclasses[peaknum])
494 classes[peaknum] = 1
495 classamount += 1
496 continue
498 time1 = time.time()
500 # classes nearby only count if they are within maxdistance
501 for i, cl in enumerate(classesnearby):
502 if (positions[peaknum] - classesnearbyx[i]) > maxdistance:
503 classesnearby.pop(i)
504 classesnearbyx.pop(i)
505 classesnearbypccl.pop(i)
507 # compute mean isi of a class by taking the last 3 pulses in that class
508 lastofclassisis = []
509 for i in classesnearby:
510 lastofclassisis.append(np.median(np.diff(lastofclassx[i])))
512 meanisi = np.mean(lastofclassisis)
514 # stop adding to a class if 40 isis have passed
515 if 32000 > 40*meanisi> 6000:
516 maxdistance = 20*meanisi
518 cl = 0 # 'No class'
519 comperr = 100
520 clnrby = np.unique(classesnearby)
522 last_err = 1000
524 # TODO this assigns peaks with no class to the last clase if there are multiple candidates.
525 # can I fix this?
526 for i in clnrby:
528 # if the class of the current peak is equal to the current evaluated class or current peak has no class
529 if classesnearbypccl[classesnearby.index(i)] == pcclasses[peaknum]: #or glue==True: #pcclasses[peaknum] == -1: or
530 classmean = np.mean(lastofclass[i]) #mean hight of class
532 # difference between current peak hight and average class hight
533 logerror = np.abs(np.log2(heights[peaknum])-np.log2(classmean))
534 logthresh = np.log2(factor)
535 relerror = logerror
537 # if the peak hights are similar
538 if logerror < logthresh: ## SameClass-Condition
539 # if the peaks are close together in distance (20*isi)
540 if relerror < comperr and (positions[peaknum]-classesnearbyx[classesnearby.index(i)])<maxdistance:
541 # keep the same class (or in case of no class assign that class)
542 cl = i
543 comperr = relerror
544 time2 = time.time()
545 time1 = time.time()
547 # if a pc class is assigned to the peak
548 if pcclasses[peaknum] != -1:
549 # if the class is kept
550 if cl != 0 :
551 # append this peak to the peaklist for the right class (only keep last 3 peaks)
552 if len(lastofclass[cl]) >= 3:
553 lastofclass[cl].popleft()
554 if len(lastofclassx[cl]) >= 3:
555 lastofclassx[cl].popleft()
556 lastofclass[cl].append(heights[peaknum])
557 lastofclassx[cl].append(positions[peaknum])
558 classes[peaknum] = cl
559 else:
560 # if the class if not the same as any of the existing classes, create new class
561 cl = classamount+1
562 classamount = cl
563 lastofclass[cl] = deque()
564 lastofclassx[cl] = deque()
565 lastofclass[cl].append(heights[peaknum])
566 lastofclassx[cl].append(positions[peaknum])
567 classes[peaknum] = cl
568 classesnearby.append(cl)
569 classesnearbyx.append(positions[peaknum])
570 classesnearbypccl.append(pcclasses[peaknum])
572 # if there are more than 12 classes, delete the class that is furthest away in proximity
573 if len(classesnearby) >= 12: #kacke implementiert?
574 minind = classesnearbyx.index(min(classesnearbyx))
575 del lastofclass[classesnearby[minind]]
576 del lastofclassx[classesnearby[minind]]
577 classesnearby.pop(minind)
578 classesnearbyx.pop(minind)
579 classesnearbypccl.pop(minind)
581 # add position and class to peaklist
582 try:
583 ind=classesnearby.index(cl)
584 classesnearbyx[ind] = positions[peaknum]
585 except ValueError:
586 classesnearby.append(cl)
587 classesnearbyx.append(positions[peaknum])
588 classesnearbypccl.append(pcclasses[peaknum])
590 # if no pc class is assigned to the peak
591 elif glue == True:
592 # if a class is assigned through the peak amp method
593 if cl != 0:
594 # add this class to the peak
595 classes[peaknum] = cl
596 else:
597 # create new class
598 cl = classamount+1
599 classamount = cl
600 lastofclass[cl] = deque()
601 lastofclassx[cl] = deque()
602 lastofclass[cl].append(heights[peaknum])
603 lastofclassx[cl].append(positions[peaknum])
604 classes[peaknum] = cl
605 classesnearby.append(cl)
606 classesnearbyx.append(positions[peaknum])
607 classesnearbypccl.append(pcclasses[peaknum])
608 if len(classesnearby) >= 12: #kacke implementiert?
609 minind = classesnearbyx.index(min(classesnearbyx))
610 del lastofclass[classesnearby[minind]]
611 del lastofclassx[classesnearby[minind]]
612 classesnearby.pop(minind)
613 classesnearbyx.pop(minind)
614 classesnearbypccl.pop(minind)
615 try:
616 ind=classesnearby.index(cl)
617 classesnearbyx[ind] = positions[peaknum]
618 except ValueError:
619 classesnearby.append(cl)
620 classesnearbyx.append(positions[peaknum])
621 classesnearbypccl.append(pcclasses[peaknum])
622 peaklist.lastofclass = lastofclass
623 peaklist.lastofclassx = lastofclassx
624 peaklist.classesnearby = classesnearby
625 peaklist.classesnearbyx = classesnearbyx
626 peaklist.classesnearbypccl = classesnearbypccl
627 peaklist.classlist = classes # np.vectorize(lambda peak: peak.cl, otypes=[object])(peaklist.list)
628 peaklist.classamount = classamount
629 peaks = np.append(peaks,classes[None,:], axis = 0)
631 return peaks, peaklist
633def discard_wave_pulses(peaks, data):
634 """
635 discards events from a pulse_event list which are unusally wide (wider than a tenth of the inter pulse interval), which indicates a wave-type EOD instead of a pulse type
636 """
637 deleteclasses = []
638 for cl in np.unique(peaks[3]):
639 peaksofclass = peaks[:,peaks[3] == cl]
640 isi = np.diff(peaksofclass[0])
641 isi_mean = np.mean(isi)
642 widepeaks = 0
643 isi_tenth_area = lambda x, isi : np.arange(np.floor(x-0.1*isi),np.ceil(x+0.1*isi),1, dtype=int)
644 for p in peaksofclass.T:
645 data = np.array(data)
646 try:
647 for dp_around in data[isi_tenth_area(p[0],isi_mean)]:
648 if dp_around <= p[1]-p[2]:
649 break
650 except (IndexError,ValueError) as e:
651 pass
652 else:
653 widepeaks+=1
654 if widepeaks > len(peaksofclass)*0.5:
655 deleteclasses.append(cl)
656 for cl in deleteclasses:
657 peaks = peaks[:,peaks[3]!=cl]
658 return peaks
660def plot_events_on_data(peaks, data, colors):
661 """
662 plots the detected events onto the data timeseries. If the events are classified, the classes are plotted in different colors and the class -1 (not belonging to a cluster) is plotted in black
663 """
664 plt.plot(range(len(data)),data, color = 'black')
665 if len(peaks)>3:
666 classlist = np.array(peaks[3],dtype=int)
667 if len(peaks) > 4:
668 classlist = np.array(peaks[4],dtype=int)
669 #classlist=labels
670 cmap = plt.get_cmap('jet')
672 for cl in np.unique(classlist):
674 if cl == -1:
675 color = 'black'
676 else:
677 color = colors[cl]
678 peaksofclass = peaks[:,classlist == cl]
679 plt.plot(peaksofclass[0],peaksofclass[1], '.', color = color, ms =20, label=cl)
680 plt.legend()
681 else:
682 plt.scatter(peaks[0],peaks[1], color = 'red')
683 plt.show()
684 plt.close()
686def discard_short_classes(events, minlen):
687 """
688 returns all events despite events which are in classes with less than minlen members
689 """
691 u, c = np.unique(events[-1],return_counts=True)
693 smallclasses = u[c<minlen]
694 classlist = events[-1]
696 delete = np.zeros(len(classlist))
698 for cl in smallclasses:
699 delete[classlist == cl] = 1
700 events = events[:,delete != 1]
702 return events
704def path_leaf(path):
705 ntpath.basename("a/b/c")
706 head, tail = ntpath.split(path)
707 return tail or ntpath.basename(head)
709def save_EOD_events_to_npmmp(EOD_Events,eods_len,startblock,datasavepath,mmpname='eods.npmmp'):
710 n_EOD_Events = len(EOD_Events[0])
711 savepath = datasavepath+"/"+mmpname
712 if startblock:
713 eods = np.memmap(savepath,
714 dtype='float64', mode='w+',
715 shape=(4,n_EOD_Events), order = 'F')
716 else:
717 dtypesize = 8#4 #float32 is 32bit = >4< bytes long ---changed to float64 -> 8bit
718 eods = np.memmap(savepath, dtype=
719 'float64', mode='r+', offset = dtypesize*eods_len*4,
720 shape=(4,n_EOD_Events), order = 'F')
721 eods[:] = EOD_Events
724def create_threshold_array(data,window,threshold):
725 thr_array = np.zeros(data.shape)
726 for i in range(int(len(data)/window)):
727 thr_array[i*window:(i+1)*window] = np.std(data[i*window:(i+1)*window])*4
728 thr_array[thr_array<threshold] = threshold
729 return thr_array
731def alignlabels(labels,clusters,old_labels,old_clusters,maxlabel):
733 old_labels = old_labels[old_labels!=-1]
735 labels_new = -1*np.ones(labels.shape)
736 newclass = maxlabel
738 for curlabel, cluster in enumerate(clusters):
739 n = np.linalg.norm(old_clusters-cluster,axis=1)
741 if np.min(n) < 0.1:
742 labels_new[labels==curlabel] = old_labels[np.argmin(n)]
743 else:
744 labels_new[labels==curlabel] = newclass
745 newclass = newclass + 1
747 return labels_new
750def analyze_pulse_data(filepath, deltat=10, thresh=0.04, starttime = 0, endtime = 0, cluster_thresh = 0.1, savepath = False,save=False, npmmp = False, plot_eods=False,plot_features=False,plot_steps=False, plot_result=False):
752 """
753 analyzes timeseries of a pulse fish EOD recording
755 Parameters
756 ----------
757 filepath: WAV-file with the recorded timeseries
759 deltat: int, optional
760 time for a single analysisblock (recommended less than a minute, due to principal component clustering on the EOD-waveforms)
762 thresh: float, optional
763 minimum threshold for the peakdetection (if computing frequencies recommended a tiny bit lower than the wished threshold, and instead discard the EOD below the wished threshold after computing the frequencies for each EOD.)
765 starttime: int or, str of int, optional
766 time into the data from where to start the analysis, seconds.
768 endtime: int or str of int, optional
769 time into the data where to end the analysis, seconds, larger than starttime.
771 cluster_thresh: float, optional
772 threshold that decides the cluster density of the EOD waveform features.
774 savepath = Boolean or str, optional
775 path to where to save results and intermediate result, only needed if save or npmmp is True.
776 string to specify a relative path to the directory where results and intermediate results will bed
777 or False to use preset savepath, which is ~/filepath/
778 or True to specify savepath as input when the script is running
780 save: Boolean, optional
781 True to save the results into a npy file at the savepath
783 npmmp: Boolean, optional
784 True to save intermediate results into a npmmp at the savepath, only recommended in case of memory overflow
786 plot_steps: Boolean, optional
787 True to plot the results of each analysis block
789 plot_results: Boolean, optional
790 True to plot the results of the final analysis. Not recommended for long recordings due to %TODO
792 plot_eods: Boolean, optional
793 True to plot the EOD waveforms for each analysis block
795 plot_features: Boolean, optional
796 True to plot the EOD waveform features for each analysis block
798 Returns
799 -------
800 eods: numpy array
801 2D numpy array. first axis: attributes of an EOD (x (datapoints), y (recorded voltage), height (difference from maximum to minimum), class), second axis: EODs in chronological order.
802 """
804 # parameters for the analysis
805 thresh = 0.04 # minimal threshold for peakdetection
806 peakwidth = 20 # width of a peak and minimal distance between two EODs
807 # basic parameters for thunderfish.dataloader.DataLoader
808 verbose = 0
809 channel = 0
810 ultimate_threshold = thresh+0.01
811 startblock = 0
812 starttime = int(starttime)
813 endtime = int(endtime)
814 timegiven = False
815 if endtime > starttime>=0:
816 timegiven = True
817 peaks = np.array([])
818 troughs = np.array([])
819 filename = path_leaf(filepath)
820 eods_len = 0
821 if savepath==False:
822 datasavepath = filename[:-4]
823 elif savepath==True:
824 datasavepath = input('With the option npmmp enabled, a numpy memmap will be saved to: ').lower()
825 else: datasavepath=savepath
827 if save and (os.path.exists(datasavepath+"/eods8_"+filename[:-3]+"npy") or os.path.exists(datasavepath+"/eods5_"+filename[:-3]+"npy")):
828 print('there already exists an analyzed file, aborting. Change the code if you don\'t want to abort')
829 quit()
830 if npmmp:
831 #proceed = input('With the option npmmp enabled, a numpy memmap will be saved to ' + datasavepath + '. continue? [y/n] ').lower()
832 proceed = 'y'
833 if proceed != 'y':
834 quit()
835 # starting analysis
836 with DataLoader(filepath, deltat, 0.0, verbose) as data:
838 samplerate = data.samplerate
840 # selected time interval
841 if timegiven == True:
842 parttime1 = starttime*samplerate
843 parttime2 = endtime*samplerate
844 data = data[parttime1:parttime2,channel]
846 #split data into blocks
847 nblock = int(deltat*samplerate)
848 if len(data)%nblock != 0:
849 blockamount = len(data)//nblock + 1
850 else:
851 blockamount = len(data)//nblock
852 #fish = ProgressFish(total = blockamount)
854 pca_cur = 0
855 progress = 0
857 for idx in range(0, blockamount):
858 print('BLOCK %i/%i'%(idx+1,blockamount))
860 blockdata = data[idx*nblock:(idx+1)*nblock,channel]
861 if progress < (idx*100 //blockamount):
862 progress = (idx*100)//blockamount
863 progressstr = ' Filestatus: '
865 # fish.animate(amount = idx, dexextra = progressstr)
866 # delete peaks under absolute threshold
869 #thresh_array = create_threshold_array(blockdata,30000,thresh)
870 pk, tr = detect_peaks(blockdata, thresh)
871 troughs = tr
873 if len(pk) > 3:
875 peaks = makeeventlist(pk,tr,blockdata,peakwidth)
876 peakindices, peakx, peakh = discardnearbyevents(peaks[0],peaks[1],peakwidth)
877 peaks = peaks[:,peakindices]
879 if len(peaks) > 0:
881 #if idx > startblock:
882 # # adding a new block as copy of old list, only difference is peak indexing as it refers to last block
883 # peaklist = connect_blocks(peaklist)
884 #else:
885 # peaklist = Peaklist([])
887 aligned_snips, snip_heights = cut_snippets(blockdata,peaks[0], 30, int_met = "cubic", int_fact = 10,max_offset = 20)
889 pols = chebyshev(aligned_snips)
891 feats = np.zeros((pols.shape[0],pols.shape[1]+1))
892 feats[:,:6] = pols
893 feats[:,-1] = snip_heights*0.1
894 #pcs, pca_cur = pc(aligned_snips) #pc_refactor(aligned_snips)
896 minpeaks = 3 if deltat < 2 else 10
898 labels, clusters = cluster_events(feats, peaks, cluster_thresh, minpeaks, False, method = 'DBSCAN')
899 peaks = np.append(peaks,[labels], axis = 0)
901 if idx > startblock:
902 # instead of the peaklist I would have to add the previous cluster means
903 # alignclusterlabels(labels, peaklist, peaks,data=blockdata)
904 peaks[-1] = alignlabels(labels,clusters,old_labels,old_clusters,maxlabel)
906 old_labels = np.unique(peaks[-1])
907 old_clusters = clusters
908 #I would want peaks updated here to have the right pc classes as well..
910 #peaks, peaklist = ampwalkclassify3_refactor(peaks, peaklist) # classification by amplitude
912 minlen = 5
913 peaks = discard_short_classes(peaks, minlen)
915 if len(peaks[0]) > 0:
916 peaks = discard_wave_pulses(peaks, blockdata)
917 # delete peaks under absolute threshold
918 #thresh_array = create_threshold_array(blockdata,30000)
919 #peaks = peaks[:,peaks[1]>thresh_array[list(map(int,peaks[0]))]]
921 cmap = plt.get_cmap('jet')
922 colors = cmap(np.linspace(0, 1.0, 10))
924 if plot_steps == True:
925 plot_events_on_data(peaks, blockdata, colors)
926 pass
928 for lab in np.unique(labels):
930 if lab == -1:
931 c = 'k'
932 z=-1
933 else:
934 c=colors[lab]
935 z=1
936 if plot_eods==True:
937 plt.plot(range(aligned_snips.shape[1]),np.transpose(aligned_snips[labels == lab]),color=c,zorder=z,label=lab)
939 if plot_eods==True:
940 plt.title('Detected and classified EODs')
941 plt.xlabel('time [ms]')
942 plt.ylabel('signal (normalized)')
944 phandles, plabels = plt.gca().get_legend_handles_labels()
945 by_label = dict(zip(plabels, phandles))
946 plt.legend(by_label.values(), by_label.keys())
947 plt.show()
949 for lab in np.unique(labels):
951 if lab == -1:
952 c = 'k'
953 z = -1
954 else:
955 c = colors[lab]
956 z=1
958 if plot_features==True:
959 plt.plot(np.squeeze(np.transpose(feats[labels == lab])),color=c,zorder=z,label=lab)
961 if plot_features==True:
962 plt.title('EOD Features')
963 plt.xlabel('feature [#]')
964 plt.ylabel('value [a.u.]')
966 phandles, plabels = plt.gca().get_legend_handles_labels()
967 by_label = dict(zip(plabels, phandles))
968 plt.legend(by_label.values(), by_label.keys())
969 plt.show()
971 #peaklist.len = nblock
972 worldpeaks = np.copy(peaks)
973 worldpeaks[0] = worldpeaks[0] + (idx*nblock)
974 # delete the classification that only considers wave shape.
975 #thisblock_eods = np.delete(worldpeaks,3,0)
976 thisblock_eods = worldpeaks
978 if idx == startblock:
979 maxlabel = np.max(peaks[-1]) + 1
980 else:
981 maxlabel = np.max([maxlabel, (np.max(peaks[-1]) + 1)])
983 if npmmp:
984 if idx == startblock:
985 if not os.path.exists(datasavepath):
986 os.makedirs(datasavepath)
987 mmpname = "eods_"+filename[:-3]+"npmmp"
988 # save the peaks of the current buffered part to a numpy-memmap on the disk
989 save_EOD_events_to_npmmp(thisblock_eods,eods_len,idx==startblock,datasavepath,mmpname)
990 eods_len += len(thisblock_eods[0])
991 else:
992 if idx > 0:
993 all_eods = np.concatenate((all_eods,thisblock_eods),axis = 1)
994 else:
995 all_eods = thisblock_eods
996 if plot_steps == True:
997 print('FINAL RESULTS')
998 plot_events_on_data(all_eods, data[:,channel], colors)
999 #plot_events_on_data(all_eods,data)
1000 print('returnes analyzed EODS. Calculate frequencies using all of these but discard the data from the EODS within the lowest few percent of amplitude')
1002 if npmmp:
1003 all_eods = np.memmap(datasavepath+'/'+mmpname, dtype='float64', mode='r+', shape=(4,eods_len), order = 'F')
1004 if save == 1:
1005 path = filename[:-4]+"/"
1006 if not os.path.exists(path):
1007 os.makedirs(path)
1008 if eods_len > 0:
1009 np.save(datasavepath+"/eods8_"+filename[:-3]+"npy", all_eods)
1010 print('Saved!')
1011 else:
1012 print('not saved')
1013 return all_eods
1016def main():
1018 eods = analyze_pulse_data(sys.argv[1], save=True, npmmp=True)
1019 print(eods)
1021if __name__ == '__main__':
1022 main()