Coverage for src / thunderfish / pulsetracker.py: 0%

536 statements  

« prev     ^ index     » next       coverage.py v7.13.1, created at 2026-01-15 17:50 +0000

1""" 

2by Dexter Frueh 

3""" 

4 

5import os 

6import sys 

7import ntpath 

8import time 

9import copy 

10import numpy as np 

11#from fish import ProgressFish 

12import matplotlib.pyplot as plt 

13 

14from shutil import copy2 

15from collections import deque 

16from scipy import stats 

17from scipy import signal 

18from scipy import optimize 

19from scipy.interpolate import interp1d 

20from sklearn.preprocessing import StandardScaler 

21from sklearn.decomposition import PCA 

22from sklearn.cluster import DBSCAN 

23from sklearn.cluster import AgglomerativeClustering 

24from thunderlab.dataloader import DataLoader 

25from thunderlab.eventdetection import detect_peaks 

26 

27 

28def makeeventlist(main_event_positions,side_event_positions,data,event_width=20): 

29 """ 

30 Generate array of events that might be EODs of a pulse-type fish, using the location of peaks and troughs, 

31 the data and an optional width of an supposed EOD-event. 

32 The generated event-array contains location and height of such events. 

33 The height of the events is calculated by its height-difference to nearby troughs and main events that have no side events in a range closer than event_width are discarded and not considered as EOD event. 

34 

35 Parameters 

36 ---------- 

37 main_event_positions: array of int or float 

38 Positions of the detected main events in the data time series. Either peaks or troughs. 

39 side_event_positions: array of int or float 

40 Positions of the detected side events in the data time series. The complimentary event to the main events. 

41 data: array of float 

42 The given data. 

43 event_width: int or float, optional 

44 

45 Returns 

46 ------- 

47 EOD_events: ndarray 

48 2D array containing data with 'float' type, size (number_of_properties = 3, number_of_events). 

49 Generated and combined data of the detected events in an array with arrays of x, y and height along the first axis. 

50 

51 """ 

52 mainfirst = int((min(main_event_positions[0],side_event_positions[0])<side_event_positions[0])) # determines if there is a peak or through first. Evaluates to 1 if there is a peak first. 

53 main_x = main_event_positions 

54 main_y = data[main_event_positions] 

55 # empty placeholders, filled in the next step while iterating over the properties of single main 

56 main_h = np.zeros(len(main_event_positions)) 

57 main_real = np.ones(len(main_event_positions)) 

58 # iteration over the properties of the single main 

59 for ind,(x, y, h, r) in enumerate(np.nditer([main_x, main_y, main_h, main_real], op_flags=[["readonly"],['readonly'],['readwrite'],['readwrite']])): 

60 l_side_ind = ind - mainfirst 

61 r_side_ind = l_side_ind + 1 

62 try: 

63 r_side_x = side_event_positions[r_side_ind] 

64 r_distance = r_side_x - x 

65 r_side_y = data[r_side_x] 

66 except: 

67 pass 

68 try: 

69 l_side_x = side_event_positions[l_side_ind] 

70 l_distance = x - l_side_x 

71 l_side_y = data[l_side_x] 

72 except: 

73 pass # ignore left or rightmost events which throw IndexError 

74 # calculate distances to the two side events next to the main event and mark all events where the next side events are not closer than maximum event_width as unreal. If an event might be an EOD, then calculate its height. 

75 if l_side_ind >= 0 and r_side_ind < len(side_event_positions): 

76 if min((l_distance),(r_distance)) > event_width: 

77 r[...] = False 

78 elif max((l_distance),(r_distance)) <= event_width: 

79 h[...] = max(abs(y-l_side_y),abs(y-r_side_y)) #calculated using absolutes in case of for example troughs instead of peaks as main events  

80 else: 

81 if (l_distance)<(r_distance): # evaluated only when exactly one side event is out of reach of the event width. Then the closer event will be the correct event 

82 h[...] = abs(y-l_side_y) 

83 else: 

84 h[...] = abs(y-r_side_y) 

85 # check corner cases 

86 elif l_side_ind == -1: 

87 if r_distance > event_width: 

88 r[...] = False 

89 else: 

90 h[...] = y-r_side_y 

91 elif r_side_ind == len(side_event_positions): 

92 if l_distance> event_width: 

93 r[...] = False 

94 else: 

95 h[...] = y-l_side_y 

96 # generate return array and discard all events that are not marked as real 

97 EOD_events = np.array([main_x, main_y, main_h], dtype = float)[:,main_real==1] 

98 return EOD_events 

99 

100def discardnearbyevents(event_locations, event_heights, min_distance): 

101 """ 

102 Given a number of events with given location and heights, returns a selection 

103 of these events where no event is closer than eventwidth to the next event. 

104 Among neighboring events closer than eventwidth the event with smaller height 

105 is discarded. 

106 Used to discard sidepeaks in detected multiple peaks of single EOD-pulses and 

107 only keep the largest event_height and the corresponding location as 

108 representative of the whole EOD pulse. 

109 

110 Parameters 

111 ---------- 

112 event_locations: array of int or float 

113 Positions of the given events in the data time series. 

114 event_heights: array of int or float 

115 Heights of the given events, indices refer to the same events as in 

116 event_locations. 

117 min_distance: int or float 

118 minimal distance between events before one of the events gets discarded. 

119 

120 Returns 

121 ------- 

122 event_locations: array of int or float 

123 Positions of the returned events in the data time series. 

124 event_heights: array of int or float 

125 Heights of the returned events, indices refer to the same events as in 

126 event_locations. 

127 

128 """ 

129 unchanged = False 

130 counter = 0 

131 event_indices = np.arange(0,len(event_locations)+1,1) 

132 

133 while unchanged == False:# and counter<=200: 

134 x_diffs = np.diff(event_locations) 

135 events_delete = np.zeros(len(event_locations)) 

136 

137 for i, diff in enumerate(x_diffs): 

138 

139 if diff < min_distance: 

140 

141 if event_heights[i+1] > event_heights[i] : 

142 events_delete[i] = 1 

143 else: 

144 events_delete[i+1] = 1 

145 

146 event_heights = event_heights[events_delete!=1] 

147 event_locations = event_locations[events_delete!=1] 

148 event_indices = event_indices[np.where(events_delete!=1)[0]] 

149 if np.count_nonzero(events_delete)==0: 

150 unchanged = True 

151 counter += 1 

152 if counter > 2000: 

153 print('Warning: unusual many discarding steps needed, unusually dense events') 

154 pass 

155 return event_indices, event_locations, event_heights 

156 

157def crosscorrelation(sig, data): 

158 'returns crosscorrelation of two arrays, the first array should have a length equal to or smaller than the second array.' 

159 return signal.fftconvolve(data, sig[::-1], mode='valid') 

160 

161def interpol(data, kind): 

162 """ 

163 interpolates the given data using scipy interpolation python package 

164 

165 Parameters 

166 ---------- 

167 data: array 

168 

169 kind: string or int 

170 (‘linear’, ‘nearest’, ‘zero’, ‘slinear’, ‘quadratic’, ‘cubic’, ‘previous’, ‘next’), or integer of order of spline interpolation to be used 

171 

172 Returns 

173 ------- 

174 interpolation: function 

175 

176 """ 

177 width = len(data) 

178 x = np.linspace(0, width-1, num = width, endpoint = True) 

179 #return interp1d(x, data[0:width], kind, assume_sorted=True) 

180 return interp1d(x, data[0:width], kind) 

181 

182def interpolated_array(data, kind, int_fact): 

183 """ 

184 returns an interpolated array of the given dataarray. 

185 

186 Parameters 

187 ---------- 

188 data: array 

189 

190 kind: string or int 

191 (‘linear’, ‘nearest’, ‘zero’, ‘slinear’, ‘quadratic’, ‘cubic’, ‘previous’, ‘next’), or integer of order of spline interpolation to be used 

192 

193 int_fact: int 

194 factor by which the interpolated array is larger than the original array 

195 

196 Returns 

197 ------- 

198 interpolated array: array 

199 

200 """ 

201 return interpol(data,kind)(np.arange(0, len(data)-1, 1/int_fact)) 

202 

203def cut_snippets(data,event_locations,cut_width,int_met="linear",int_fact=10,max_offset = 1000000): 

204 """ 

205 cuts intervals from a data array, interpolates and aligns them and returns them in a list 

206 

207 TODO: ALIGN THEM TO CAUSE LEAST SQUARE ERROR 

208 

209 Parameters 

210 ---------- 

211 data: array 

212 

213 event_locations: array 

214 

215 cut_width: [int, int] 

216 lower and upper limit of the intervals relative to the event locations. 

217 f.e. [-15,15] indicates an interval of 30 datapoints around each event location 

218 

219 int_met: string or int 

220 method of interpolation. (‘linear’, ‘nearest’, ‘zero’, ‘slinear’, ‘quadratic’, ‘cubic’, ‘previous’, ‘next’), or integer of order of spline interpolation to be used 

221 

222 int_fact: int 

223 factor by which the interpolated array is larger than the original 

224 

225 max_offset: float 

226 maximal offset by which the interpolated intervals can be moved to be aligned with each other. offset relative to the datapoints of the original data. 

227 

228 Returns 

229 ------- 

230 aligned_snips: twodimensional nparray 

231 the processed intervals (interval#,intervallen) 

232 

233 """ 

234 snippets = [] 

235 heights = np.zeros(len(event_locations)) 

236 cut_width = [-cut_width, cut_width] 

237 #alignwidth = int(np.ceil((max_offset) * int_fact)) 

238 alignwidth = 50 

239 

240 for pos in event_locations.astype('int'): 

241 snippets.append(data[pos+cut_width[0]:pos+cut_width[1]]) 

242 ipoled_snips = np.empty((len(snippets), (cut_width[1]-cut_width[0])*int_fact-int_fact)) 

243 

244 for i, snip in enumerate(snippets): 

245 if len(snip) < ((cut_width[1]-cut_width[0])): 

246 if i == 0: 

247 snip = np.concatenate([np.zeros([((cut_width[1]-cut_width[0]) - len(snip))]),np.array(snip)]) 

248 if i == len(snippets): 

249 snip = np.concatenate([snip, np.zeros([((cut_width[1]-cut_width[0])-len(snip))])]) 

250 else: 

251 snip = np.zeros([(cut_width[1]-cut_width[0])]) 

252 

253 #f_interpoled = interpol(snip, int_met) #if len(snip) > 0 else np.zeros([(cut_width[1]-cut_width[0]-1)*int_fact ]) 

254 interpoled_snip = interpolated_array(snip, int_met, 10)#f_interpoled(np.arange(0, len(snip)-1, 1/int_fact)) 

255 intsnipheight = np.max(interpoled_snip) - np.min(interpoled_snip) 

256 

257 

258 if intsnipheight == 0: 

259 intsnipheight = 1 

260 

261 interpoled_snip = (interpoled_snip - max(interpoled_snip))* 1/intsnipheight 

262 ipoled_snips[i] = interpoled_snip 

263 

264 mean = np.mean(ipoled_snips, axis = 0) 

265 

266 aligned_snips = np.empty((len(snippets), (cut_width[1]-cut_width[0])* int_fact-(2*alignwidth)-int_fact)) 

267 

268 for i, interpoled_snip in enumerate(ipoled_snips): 

269 cc = crosscorrelation(interpoled_snip[alignwidth:-alignwidth], mean) 

270 #cc = crosscorrelation(interpoled_snip[15 + 10*-cut_width[0]-10*7:-15+ -10*cut_width[1]+ 31], mean[10*-cut_width[0]-10*7:-10*cut_width[1]+31]) 

271 offset = -alignwidth + np.argmax(cc) 

272 aligned_snip = interpoled_snip[alignwidth-offset:-alignwidth-offset] if offset != -alignwidth else interpoled_snip[2*alignwidth:] 

273 if len(aligned_snip[~np.isnan(aligned_snip)])>0: 

274 aligned_snips[i] = aligned_snip 

275 try: 

276 heights[i] = np.max(interpoled_snip[alignwidth-offset:-alignwidth-offset]) - np.min(interpoled_snip[alignwidth-offset:-alignwidth-offset]) 

277 except: 

278 heights[i] = np.max(interpoled_snip[2*alignwidth:]) - np.min(interpoled_snip[2*alignwidth:]) 

279 return aligned_snips, heights 

280 

281def pc(dataset): 

282 """ 

283 Calculates the principal components of a dataset using the python module scikit-learn's principal component analysis 

284 

285 Parameters 

286 ---------- 

287 dataset: ndarray 

288 dataset of which the principal components are to be calculated. 

289 twodimensional array of shape (observations, features) 

290 

291 Returns 

292 ------- 

293 pc_comp: ndarray 

294 principal components of the dataset 

295 

296 """ 

297 pca = PCA(n_components=10) 

298 pc_comp = pca.fit_transform(dataset) 

299 

300 return pc_comp #, pca 

301 

302def chebyshev(dataset): 

303 x = range(len(dataset[0])) 

304 npol=5 

305 p = np.zeros((len(dataset),npol+1)) 

306 for i,s in enumerate(dataset): 

307 cheb = np.polynomial.chebyshev.Chebyshev.fit(x,s,npol) 

308 p[i] = cheb.coef 

309 

310 return p #, pca 

311 

312 

313def dbscan(pcs, events, eps, min_samples, takekm): 

314 """ 

315 improve description, add parameter and returns 

316 

317 calculates clusters of high spatial density of the given observations in their feature space.  

318 #For example, the first few principal components of the data could be used as features for the classification. 

319 

320 Parameters 

321 ---------- 

322 pcs: ndarray 

323 %TODO 

324 shape(samples, features) 

325 ... 

326 

327 Returns 

328 ------- 

329 labels: ndarray 

330 labels of the clusters of each observation 

331 

332 """ 

333 # pcs (samples, features) 

334 # X (samples, features) 

335 #try: 

336 # X = pcs[:,:order] 

337 #except: 

338 # X = pcs[:,order] 

339 X = pcs 

340 

341 # ############################################################################# 

342 # Compute DBSCAN 

343 

344 clusters = DBSCAN(eps, min_samples).fit(X) 

345 labels = clusters.labels_ 

346 

347 comp = clusters.components_ 

348 

349 comp_means = np.zeros((len(np.unique(labels)) - 1,comp.shape[1])) 

350 

351 for i in range(len(np.unique(labels)) - 1): 

352 comp_means[i] = np.mean(pcs[labels==i],axis=0) 

353 

354 return labels, comp_means 

355 

356def cluster_events(features, events, eps, min_samples, takekm, method='DBSCAN'): 

357 """F 

358 clusters the given events using the given feature space and the clustering algorithm of choice and appends the assigned cluster number to the event's properties. 

359 

360 Parameters 

361 ---------- 

362 

363 Returns 

364 ------- 

365 

366 """ 

367 ######################## function maybe could be even more generic, ? (dependant on datatype of "events" ) 

368 if method == 'DBSCAN': 

369 labels, clusters = dbscan(features,events, eps, min_samples, takekm) 

370 elif method == 'kMean': 

371 pass 

372 # To be implemented 

373 #labels = kmeans([]) 

374 return labels, clusters 

375 

376class Peaklist(object): 

377 def __init__(self, peaklist): 

378 self.list = peaklist 

379 self.lastofclass = {} 

380 self.lastofclassx = {} 

381 self.classesnearby = [] 

382 self.classesnearbyx = [] 

383 self.classesnearbypccl = [] 

384 self.classlist = [] 

385 self.classamount = 0 

386 self.shapes = {} 

387 

388def connect_blocks(oldblock): 

389 """ 

390 used to connect blocks. 

391 transfers data from the previous analysis block to the current block 

392 """ 

393 newblock = Peaklist([]) 

394 newblock.lastofclass = oldblock.lastofclass 

395 newblock.lastofclassx = oldblock.lastofclassx 

396 newblock.classesnearby = oldblock.classesnearby 

397 newblock.classesnearbypccl = oldblock.classesnearbypccl 

398 newblock.classesnearbyx = [clnearbyx - oldblock.len for clnearbyx in oldblock.classesnearbyx] 

399 newblock.classamount = oldblock.classamount 

400 newblock.len = oldblock.len 

401 return newblock 

402 

403def alignclusterlabels(labels, peaklist, peaks, data='test'): 

404 """ 

405 used to connect blocks. 

406 changes the labels of clusters in the current block to fit with the labels of the previous block 

407 """ 

408 

409 # take first second of new peak data 

410 overlapamount = len(peaks[0,peaks[0]<30000]) 

411 if overlapamount == 0: 

412 return None 

413 

414 old_peaklist = copy.deepcopy(peaklist) #redundant  

415 overlappeaks = copy.deepcopy(peaks[:,:overlapamount]) 

416 overlap_peaklist = copy.deepcopy(old_peaklist) 

417 

418 # delete cluster classifications of the overlap class 

419 overlappeaks[3]=[-1]*len(overlappeaks[0]) 

420 

421 # set nearby pc classes to -1 

422 overlap_peaklist.classesnearbypccl = [-1]*len(overlap_peaklist.classesnearbypccl) 

423 

424 # create peak labels using ampwalk classifier 

425 classified_overlap = ampwalkclassify3_refactor(overlappeaks,overlap_peaklist,glue=True)[0] 

426 

427 # for each class 

428 for cl in np.unique(classified_overlap[4]): 

429 

430 # indexes of the peaks with current class by ampwalk classification 

431 labelindex = np.where(classified_overlap[4] == cl)[0] 

432 

433 # pc clustering labels that were originally given to those peaks 

434 label = labels[labelindex] 

435 

436 # index of a peak with the most common translation from ampwalk class to pc clustering class 

437 labelindex = labelindex[np.where(label == stats.mode(label)[0])[0][0]] 

438 

439 # pc clustering label belonging to the class cl in the new block 

440 newlabel = labels[labelindex] 

441 

442 try: 

443 peaklist.classesnearbypccl[old_peaklist.classesnearby.index(cl)] = newlabel 

444 except: 

445 pass 

446 

447def ampwalkclassify3_refactor(peaks,peaklist,glue=False): 

448 """ 

449 

450 classifies peaks/EOD_events into different classes by their amplitude. 

451 

452 Takes list of peaks and list of properties of the list of the last analysis block 

453 Classifies the single peaks in the direction of their occurence in time, based on their amplitude and 

454 their previously assigned class based on their waveform (... using the method cluster_events on the 

455 principal components of the snippets around the single peaks) 

456 

457 Method: 

458 calculates differences in amplitude between the current peak and different amplitudeclasses that are nearby. creates new amplitudeclass if no class is close enough. creates no new class if the peaks's waveformclass is a noiseclass of the DBSCAN algorithm. Does not compare peaks of different Waveformclasses. 

459 

460 --can be used without prior waveformclasses, resulting in classification solely on the amplitude development 

461 pcclclasses need to be set to the same class herefore, .... . not practical, but should be possible to 

462 split up into more general functions 

463 """ 

464 

465 classamount = peaklist.classamount 

466 lastofclass = peaklist.lastofclass 

467 lastofclassx = peaklist.lastofclassx 

468 a=0 

469 elem = 0 

470 thresholder = [] 

471 comperr = 1 

472 classesnearby = peaklist.classesnearby 

473 classesnearbyx = peaklist.classesnearbyx 

474 classesnearbypccl = peaklist.classesnearbypccl 

475 classes = np.zeros((len(peaks[0]))) 

476 pcclasses = peaks[3] 

477 positions = peaks[0] 

478 heights = peaks[2] 

479 cl = 0 

480 maxdistance = 30000 # Max distance to possibly belong to the same class 

481 factor = 1.6 # factor by which a peak fits into a class, f.E: classheight = 1 , factor = 2 => peaks accepted in range (0.5,2) 

482 c=0 

483 

484 # loop through all the new peaks 

485 for peaknum, p in enumerate(peaks.T): 

486 

487 if len(lastofclass) == 0: 

488 lastofclass[1] = deque() 

489 lastofclassx[1] = deque() 

490 lastofclass[1].append(heights[peaknum]) 

491 lastofclassx[1].append(positions[peaknum]) 

492 classesnearby.append(1) 

493 classesnearbyx.append(-1) 

494 classesnearbypccl.append(pcclasses[peaknum]) 

495 classes[peaknum] = 1 

496 classamount += 1 

497 continue 

498 

499 time1 = time.time() 

500 

501 # classes nearby only count if they are within maxdistance 

502 for i, cl in enumerate(classesnearby): 

503 if (positions[peaknum] - classesnearbyx[i]) > maxdistance: 

504 classesnearby.pop(i) 

505 classesnearbyx.pop(i) 

506 classesnearbypccl.pop(i) 

507 

508 # compute mean isi of a class by taking the last 3 pulses in that class 

509 lastofclassisis = [] 

510 for i in classesnearby: 

511 lastofclassisis.append(np.median(np.diff(lastofclassx[i]))) 

512 

513 meanisi = np.mean(lastofclassisis) 

514 

515 # stop adding to a class if 40 isis have passed 

516 if 32000 > 40*meanisi> 6000: 

517 maxdistance = 20*meanisi 

518 

519 cl = 0 # 'No class' 

520 comperr = 100 

521 clnrby = np.unique(classesnearby) 

522 

523 last_err = 1000 

524 

525 # TODO this assigns peaks with no class to the last clase if there are multiple candidates.  

526 # can I fix this? 

527 for i in clnrby: 

528 

529 # if the class of the current peak is equal to the current evaluated class or current peak has no class 

530 if classesnearbypccl[classesnearby.index(i)] == pcclasses[peaknum]: #or glue==True: #pcclasses[peaknum] == -1: or  

531 classmean = np.mean(lastofclass[i]) #mean hight of class 

532 

533 # difference between current peak hight and average class hight 

534 logerror = np.abs(np.log2(heights[peaknum])-np.log2(classmean)) 

535 logthresh = np.log2(factor) 

536 relerror = logerror 

537 

538 # if the peak hights are similar 

539 if logerror < logthresh: ## SameClass-Condition 

540 # if the peaks are close together in distance (20*isi) 

541 if relerror < comperr and (positions[peaknum]-classesnearbyx[classesnearby.index(i)])<maxdistance: 

542 # keep the same class (or in case of no class assign that class) 

543 cl = i 

544 comperr = relerror 

545 time2 = time.time() 

546 time1 = time.time() 

547 

548 # if a pc class is assigned to the peak 

549 if pcclasses[peaknum] != -1: 

550 # if the class is kept 

551 if cl != 0 : 

552 # append this peak to the peaklist for the right class (only keep last 3 peaks) 

553 if len(lastofclass[cl]) >= 3: 

554 lastofclass[cl].popleft() 

555 if len(lastofclassx[cl]) >= 3: 

556 lastofclassx[cl].popleft() 

557 lastofclass[cl].append(heights[peaknum]) 

558 lastofclassx[cl].append(positions[peaknum]) 

559 classes[peaknum] = cl 

560 else: 

561 # if the class if not the same as any of the existing classes, create new class 

562 cl = classamount+1 

563 classamount = cl 

564 lastofclass[cl] = deque() 

565 lastofclassx[cl] = deque() 

566 lastofclass[cl].append(heights[peaknum]) 

567 lastofclassx[cl].append(positions[peaknum]) 

568 classes[peaknum] = cl 

569 classesnearby.append(cl) 

570 classesnearbyx.append(positions[peaknum]) 

571 classesnearbypccl.append(pcclasses[peaknum]) 

572 

573 # if there are more than 12 classes, delete the class that is furthest away in proximity 

574 if len(classesnearby) >= 12: #kacke implementiert?  

575 minind = classesnearbyx.index(min(classesnearbyx)) 

576 del lastofclass[classesnearby[minind]] 

577 del lastofclassx[classesnearby[minind]] 

578 classesnearby.pop(minind) 

579 classesnearbyx.pop(minind) 

580 classesnearbypccl.pop(minind) 

581 

582 # add position and class to peaklist 

583 try: 

584 ind=classesnearby.index(cl) 

585 classesnearbyx[ind] = positions[peaknum] 

586 except ValueError: 

587 classesnearby.append(cl) 

588 classesnearbyx.append(positions[peaknum]) 

589 classesnearbypccl.append(pcclasses[peaknum]) 

590 

591 # if no pc class is assigned to the peak 

592 elif glue == True: 

593 # if a class is assigned through the peak amp method 

594 if cl != 0: 

595 # add this class to the peak 

596 classes[peaknum] = cl 

597 else: 

598 # create new class 

599 cl = classamount+1 

600 classamount = cl 

601 lastofclass[cl] = deque() 

602 lastofclassx[cl] = deque() 

603 lastofclass[cl].append(heights[peaknum]) 

604 lastofclassx[cl].append(positions[peaknum]) 

605 classes[peaknum] = cl 

606 classesnearby.append(cl) 

607 classesnearbyx.append(positions[peaknum]) 

608 classesnearbypccl.append(pcclasses[peaknum]) 

609 if len(classesnearby) >= 12: #kacke implementiert?  

610 minind = classesnearbyx.index(min(classesnearbyx)) 

611 del lastofclass[classesnearby[minind]] 

612 del lastofclassx[classesnearby[minind]] 

613 classesnearby.pop(minind) 

614 classesnearbyx.pop(minind) 

615 classesnearbypccl.pop(minind) 

616 try: 

617 ind=classesnearby.index(cl) 

618 classesnearbyx[ind] = positions[peaknum] 

619 except ValueError: 

620 classesnearby.append(cl) 

621 classesnearbyx.append(positions[peaknum]) 

622 classesnearbypccl.append(pcclasses[peaknum]) 

623 peaklist.lastofclass = lastofclass 

624 peaklist.lastofclassx = lastofclassx 

625 peaklist.classesnearby = classesnearby 

626 peaklist.classesnearbyx = classesnearbyx 

627 peaklist.classesnearbypccl = classesnearbypccl 

628 peaklist.classlist = classes # np.vectorize(lambda peak: peak.cl, otypes=[object])(peaklist.list) 

629 peaklist.classamount = classamount 

630 peaks = np.append(peaks,classes[None,:], axis = 0) 

631 

632 return peaks, peaklist 

633 

634def discard_wave_pulses(peaks, data): 

635 """ 

636 discards events from a pulse_event list which are unusally wide (wider than a tenth of the inter pulse interval), which indicates a wave-type EOD instead of a pulse type 

637 """ 

638 deleteclasses = [] 

639 for cl in np.unique(peaks[3]): 

640 peaksofclass = peaks[:,peaks[3] == cl] 

641 isi = np.diff(peaksofclass[0]) 

642 isi_mean = np.mean(isi) 

643 widepeaks = 0 

644 isi_tenth_area = lambda x, isi : np.arange(np.floor(x-0.1*isi),np.ceil(x+0.1*isi),1, dtype=int) 

645 for p in peaksofclass.T: 

646 data = np.array(data) 

647 try: 

648 for dp_around in data[isi_tenth_area(p[0],isi_mean)]: 

649 if dp_around <= p[1]-p[2]: 

650 break 

651 except (IndexError,ValueError) as e: 

652 pass 

653 else: 

654 widepeaks+=1 

655 if widepeaks > len(peaksofclass)*0.5: 

656 deleteclasses.append(cl) 

657 for cl in deleteclasses: 

658 peaks = peaks[:,peaks[3]!=cl] 

659 return peaks 

660 

661def plot_events_on_data(peaks, data, colors): 

662 """ 

663 plots the detected events onto the data timeseries. If the events are classified, the classes are plotted in different colors and the class -1 (not belonging to a cluster) is plotted in black 

664 """ 

665 plt.plot(range(len(data)),data, color = 'black') 

666 if len(peaks)>3: 

667 classlist = np.array(peaks[3],dtype=int) 

668 if len(peaks) > 4: 

669 classlist = np.array(peaks[4],dtype=int) 

670 #classlist=labels 

671 cmap = plt.get_cmap('jet') 

672 

673 for cl in np.unique(classlist): 

674 

675 if cl == -1: 

676 color = 'black' 

677 else: 

678 color = colors[cl] 

679 peaksofclass = peaks[:,classlist == cl] 

680 plt.plot(peaksofclass[0],peaksofclass[1], '.', color = color, ms =20, label=cl) 

681 plt.legend() 

682 else: 

683 plt.scatter(peaks[0],peaks[1], color = 'red') 

684 plt.show() 

685 plt.close() 

686 

687def discard_short_classes(events, minlen): 

688 """  

689 returns all events despite events which are in classes with less than minlen members 

690 """ 

691 

692 u, c = np.unique(events[-1],return_counts=True) 

693 

694 smallclasses = u[c<minlen] 

695 classlist = events[-1] 

696 

697 delete = np.zeros(len(classlist)) 

698 

699 for cl in smallclasses: 

700 delete[classlist == cl] = 1 

701 events = events[:,delete != 1] 

702 

703 return events 

704 

705def path_leaf(path): 

706 ntpath.basename("a/b/c") 

707 head, tail = ntpath.split(path) 

708 return tail or ntpath.basename(head) 

709 

710def save_EOD_events_to_npmmp(EOD_Events,eods_len,startblock,datasavepath,mmpname='eods.npmmp'): 

711 n_EOD_Events = len(EOD_Events[0]) 

712 savepath = datasavepath+"/"+mmpname 

713 if startblock: 

714 eods = np.memmap(savepath, 

715 dtype='float64', mode='w+', 

716 shape=(4,n_EOD_Events), order = 'F') 

717 else: 

718 dtypesize = 8#4 #float32 is 32bit = >4< bytes long ---changed to float64 -> 8bit 

719 eods = np.memmap(savepath, dtype= 

720 'float64', mode='r+', offset = dtypesize*eods_len*4, 

721 shape=(4,n_EOD_Events), order = 'F') 

722 eods[:] = EOD_Events 

723 

724 

725def create_threshold_array(data,window,threshold): 

726 thr_array = np.zeros(data.shape) 

727 for i in range(int(len(data)/window)): 

728 thr_array[i*window:(i+1)*window] = np.std(data[i*window:(i+1)*window])*4 

729 thr_array[thr_array<threshold] = threshold 

730 return thr_array 

731 

732def alignlabels(labels,clusters,old_labels,old_clusters,maxlabel): 

733 

734 old_labels = old_labels[old_labels!=-1] 

735 

736 labels_new = -1*np.ones(labels.shape) 

737 newclass = maxlabel 

738 

739 for curlabel, cluster in enumerate(clusters): 

740 n = np.linalg.norm(old_clusters-cluster,axis=1) 

741 

742 if np.min(n) < 0.1: 

743 labels_new[labels==curlabel] = old_labels[np.argmin(n)] 

744 else: 

745 labels_new[labels==curlabel] = newclass 

746 newclass = newclass + 1 

747 

748 return labels_new 

749 

750 

751def analyze_pulse_data(filepath, deltat=10, thresh=0.04, starttime = 0, endtime = 0, cluster_thresh = 0.1, savepath = False,save=False, npmmp = False, plot_eods=False,plot_features=False,plot_steps=False, plot_result=False): 

752 

753 """ 

754 analyzes timeseries of a pulse fish EOD recording 

755 

756 Parameters 

757 ---------- 

758 filepath: WAV-file with the recorded timeseries 

759 

760 deltat: int, optional 

761 time for a single analysisblock (recommended less than a minute, due to principal component clustering on the EOD-waveforms) 

762 

763 thresh: float, optional 

764 minimum threshold for the peakdetection (if computing frequencies recommended a tiny bit lower than the wished threshold, and instead discard the EOD below the wished threshold after computing the frequencies for each EOD.) 

765 

766 starttime: int or, str of int, optional 

767 time into the data from where to start the analysis, seconds. 

768 

769 endtime: int or str of int, optional 

770 time into the data where to end the analysis, seconds, larger than starttime. 

771  

772 cluster_thresh: float, optional 

773 threshold that decides the cluster density of the EOD waveform features. 

774 

775 savepath = Boolean or str, optional 

776 path to where to save results and intermediate result, only needed if save or npmmp is True. 

777 string to specify a relative path to the directory where results and intermediate results will bed 

778 or False to use preset savepath, which is ~/filepath/ 

779 or True to specify savepath as input when the script is running 

780 

781 save: Boolean, optional 

782 True to save the results into a npy file at the savepath 

783 

784 npmmp: Boolean, optional 

785 True to save intermediate results into a npmmp at the savepath, only recommended in case of memory overflow 

786 

787 plot_steps: Boolean, optional 

788 True to plot the results of each analysis block 

789 

790 plot_results: Boolean, optional 

791 True to plot the results of the final analysis. Not recommended for long recordings due to %TODO 

792 

793 plot_eods: Boolean, optional 

794 True to plot the EOD waveforms for each analysis block 

795 

796 plot_features: Boolean, optional 

797 True to plot the EOD waveform features for each analysis block 

798 

799 Returns 

800 ------- 

801 eods: numpy array 

802 2D numpy array. first axis: attributes of an EOD (x (datapoints), y (recorded voltage), height (difference from maximum to minimum), class), second axis: EODs in chronological order. 

803 """ 

804 

805 # parameters for the analysis 

806 thresh = 0.04 # minimal threshold for peakdetection 

807 peakwidth = 20 # width of a peak and minimal distance between two EODs 

808 # basic parameters for thunderfish.dataloader.DataLoader 

809 verbose = 0 

810 channel = 0 

811 ultimate_threshold = thresh+0.01 

812 startblock = 0 

813 starttime = int(starttime) 

814 endtime = int(endtime) 

815 timegiven = False 

816 if endtime > starttime>=0: 

817 timegiven = True 

818 peaks = np.array([]) 

819 troughs = np.array([]) 

820 filename = path_leaf(filepath) 

821 eods_len = 0 

822 if savepath==False: 

823 datasavepath = filename[:-4] 

824 elif savepath==True: 

825 datasavepath = input('With the option npmmp enabled, a numpy memmap will be saved to: ').lower() 

826 else: datasavepath=savepath 

827 

828 if save and (os.path.exists(datasavepath+"/eods8_"+filename[:-3]+"npy") or os.path.exists(datasavepath+"/eods5_"+filename[:-3]+"npy")): 

829 print('there already exists an analyzed file, aborting. Change the code if you don\'t want to abort') 

830 quit() 

831 if npmmp: 

832 #proceed = input('With the option npmmp enabled, a numpy memmap will be saved to ' + datasavepath + '. continue? [y/n] ').lower() 

833 proceed = 'y' 

834 if proceed != 'y': 

835 quit() 

836 # starting analysis 

837 with DataLoader(filepath, deltat, 0.0, verbose) as data: 

838 

839 rate = data.rate 

840 

841 # selected time interval 

842 if timegiven == True: 

843 parttime1 = starttime*rate 

844 parttime2 = endtime*rate 

845 data = data[parttime1:parttime2,channel] 

846 

847 #split data into blocks 

848 nblock = int(deltat*rate) 

849 if len(data)%nblock != 0: 

850 blockamount = len(data)//nblock + 1 

851 else: 

852 blockamount = len(data)//nblock 

853 #fish = ProgressFish(total = blockamount) 

854 

855 pca_cur = 0 

856 progress = 0 

857 

858 for idx in range(0, blockamount): 

859 print('BLOCK %i/%i'%(idx+1,blockamount)) 

860 

861 blockdata = data[idx*nblock:(idx+1)*nblock,channel] 

862 if progress < (idx*100 //blockamount): 

863 progress = (idx*100)//blockamount 

864 progressstr = ' Filestatus: ' 

865 

866 # fish.animate(amount = idx, dexextra = progressstr) 

867 # delete peaks under absolute threshold 

868 

869 

870 #thresh_array = create_threshold_array(blockdata,30000,thresh)  

871 pk, tr = detect_peaks(blockdata, thresh) 

872 troughs = tr 

873 

874 if len(pk) > 3: 

875 

876 peaks = makeeventlist(pk,tr,blockdata,peakwidth) 

877 peakindices, peakx, peakh = discardnearbyevents(peaks[0],peaks[1],peakwidth) 

878 peaks = peaks[:,peakindices] 

879 

880 if len(peaks) > 0: 

881 

882 #if idx > startblock: 

883 # # adding a new block as copy of old list, only difference is peak indexing as it refers to last block 

884 # peaklist = connect_blocks(peaklist) 

885 #else: 

886 # peaklist = Peaklist([]) 

887 

888 aligned_snips, snip_heights = cut_snippets(blockdata,peaks[0], 30, int_met = "cubic", int_fact = 10,max_offset = 20) 

889 

890 pols = chebyshev(aligned_snips) 

891 

892 feats = np.zeros((pols.shape[0],pols.shape[1]+1)) 

893 feats[:,:6] = pols 

894 feats[:,-1] = snip_heights*0.1 

895 #pcs, pca_cur = pc(aligned_snips) #pc_refactor(aligned_snips) 

896 

897 minpeaks = 3 if deltat < 2 else 10 

898 

899 labels, clusters = cluster_events(feats, peaks, cluster_thresh, minpeaks, False, method = 'DBSCAN') 

900 peaks = np.append(peaks,[labels], axis = 0) 

901 

902 if idx > startblock: 

903 # instead of the peaklist I would have to add the previous cluster means 

904 # alignclusterlabels(labels, peaklist, peaks,data=blockdata) 

905 peaks[-1] = alignlabels(labels,clusters,old_labels,old_clusters,maxlabel) 

906 

907 old_labels = np.unique(peaks[-1]) 

908 old_clusters = clusters 

909 #I would want peaks updated here to have the right pc classes as well.. 

910 

911 #peaks, peaklist = ampwalkclassify3_refactor(peaks, peaklist) # classification by amplitude 

912 

913 minlen = 5 

914 peaks = discard_short_classes(peaks, minlen) 

915 

916 if len(peaks[0]) > 0: 

917 peaks = discard_wave_pulses(peaks, blockdata) 

918 # delete peaks under absolute threshold 

919 #thresh_array = create_threshold_array(blockdata,30000) 

920 #peaks = peaks[:,peaks[1]>thresh_array[list(map(int,peaks[0]))]] 

921 

922 cmap = plt.get_cmap('jet') 

923 colors = cmap(np.linspace(0, 1.0, 10)) 

924 

925 if plot_steps == True: 

926 plot_events_on_data(peaks, blockdata, colors) 

927 pass 

928 

929 for lab in np.unique(labels): 

930 

931 if lab == -1: 

932 c = 'k' 

933 z=-1 

934 else: 

935 c=colors[lab] 

936 z=1 

937 if plot_eods==True: 

938 plt.plot(range(aligned_snips.shape[1]),np.transpose(aligned_snips[labels == lab]),color=c,zorder=z,label=lab) 

939 

940 if plot_eods==True: 

941 plt.title('Detected and classified EODs') 

942 plt.xlabel('time [ms]') 

943 plt.ylabel('signal (normalized)') 

944 

945 phandles, plabels = plt.gca().get_legend_handles_labels() 

946 by_label = dict(zip(plabels, phandles)) 

947 plt.legend(by_label.values(), by_label.keys()) 

948 plt.show() 

949 

950 for lab in np.unique(labels): 

951 

952 if lab == -1: 

953 c = 'k' 

954 z = -1 

955 else: 

956 c = colors[lab] 

957 z=1 

958 

959 if plot_features==True: 

960 plt.plot(np.squeeze(np.transpose(feats[labels == lab])),color=c,zorder=z,label=lab) 

961 

962 if plot_features==True: 

963 plt.title('EOD Features') 

964 plt.xlabel('feature [#]') 

965 plt.ylabel('value [a.u.]') 

966 

967 phandles, plabels = plt.gca().get_legend_handles_labels() 

968 by_label = dict(zip(plabels, phandles)) 

969 plt.legend(by_label.values(), by_label.keys()) 

970 plt.show() 

971 

972 #peaklist.len = nblock 

973 worldpeaks = np.copy(peaks) 

974 worldpeaks[0] = worldpeaks[0] + (idx*nblock) 

975 # delete the classification that only considers wave shape. 

976 #thisblock_eods = np.delete(worldpeaks,3,0) 

977 thisblock_eods = worldpeaks 

978 

979 if idx == startblock: 

980 maxlabel = np.max(peaks[-1]) + 1 

981 else: 

982 maxlabel = np.max([maxlabel, (np.max(peaks[-1]) + 1)]) 

983 

984 if npmmp: 

985 if idx == startblock: 

986 if not os.path.exists(datasavepath): 

987 os.makedirs(datasavepath) 

988 mmpname = "eods_"+filename[:-3]+"npmmp" 

989 # save the peaks of the current buffered part to a numpy-memmap on the disk 

990 save_EOD_events_to_npmmp(thisblock_eods,eods_len,idx==startblock,datasavepath,mmpname) 

991 eods_len += len(thisblock_eods[0]) 

992 else: 

993 if idx > 0: 

994 all_eods = np.concatenate((all_eods,thisblock_eods),axis = 1) 

995 else: 

996 all_eods = thisblock_eods 

997 if plot_steps == True: 

998 print('FINAL RESULTS') 

999 plot_events_on_data(all_eods, data[:,channel], colors) 

1000 #plot_events_on_data(all_eods,data) 

1001 print('returnes analyzed EODS. Calculate frequencies using all of these but discard the data from the EODS within the lowest few percent of amplitude') 

1002 

1003 if npmmp: 

1004 all_eods = np.memmap(datasavepath+'/'+mmpname, dtype='float64', mode='r+', shape=(4,eods_len), order = 'F') 

1005 if save == 1: 

1006 path = filename[:-4]+"/" 

1007 if not os.path.exists(path): 

1008 os.makedirs(path) 

1009 if eods_len > 0: 

1010 np.save(datasavepath+"/eods8_"+filename[:-3]+"npy", all_eods) 

1011 print('Saved!') 

1012 else: 

1013 print('not saved') 

1014 return all_eods 

1015 

1016 

1017def main(): 

1018 

1019 eods = analyze_pulse_data(sys.argv[1], save=True, npmmp=True) 

1020 print(eods) 

1021 

1022if __name__ == '__main__': 

1023 main() 

1024