Coverage for src/thunderfish/pulsetracker.py: 0%

537 statements  

« prev     ^ index     » next       coverage.py v7.5.0, created at 2024-04-29 16:21 +0000

1""" 

2by Dexter Frueh 

3""" 

4 

5import os 

6import sys 

7import ntpath 

8import time 

9import copy 

10from shutil import copy2 

11from collections import deque 

12import numpy as np 

13from scipy import stats 

14from scipy import signal 

15from scipy import optimize 

16import matplotlib 

17#from fish import ProgressFish 

18import matplotlib.pyplot as plt 

19from scipy.interpolate import interp1d 

20from sklearn.preprocessing import StandardScaler 

21from sklearn.decomposition import PCA 

22from sklearn.cluster import DBSCAN 

23from sklearn.cluster import AgglomerativeClustering 

24from thunderlab.dataloader import DataLoader 

25from thunderlab.eventdetection import detect_peaks 

26 

27def makeeventlist(main_event_positions,side_event_positions,data,event_width=20): 

28 """ 

29 Generate array of events that might be EODs of a pulse-type fish, using the location of peaks and troughs, 

30 the data and an optional width of an supposed EOD-event. 

31 The generated event-array contains location and height of such events. 

32 The height of the events is calculated by its height-difference to nearby troughs and main events that have no side events in a range closer than event_width are discarded and not considered as EOD event. 

33 

34 Parameters 

35 ---------- 

36 main_event_positions: array of int or float 

37 Positions of the detected main events in the data time series. Either peaks or troughs. 

38 side_event_positions: array of int or float 

39 Positions of the detected side events in the data time series. The complimentary event to the main events. 

40 data: array of float 

41 The given data. 

42 event_width: int or float, optional 

43 

44 Returns 

45 ------- 

46 EOD_events: ndarray 

47 2D array containing data with 'float' type, size (number_of_properties = 3, number_of_events). 

48 Generated and combined data of the detected events in an array with arrays of x, y and height along the first axis. 

49 

50 """ 

51 mainfirst = int((min(main_event_positions[0],side_event_positions[0])<side_event_positions[0])) # determines if there is a peak or through first. Evaluates to 1 if there is a peak first. 

52 main_x = main_event_positions 

53 main_y = data[main_event_positions] 

54 # empty placeholders, filled in the next step while iterating over the properties of single main 

55 main_h = np.zeros(len(main_event_positions)) 

56 main_real = np.ones(len(main_event_positions)) 

57 # iteration over the properties of the single main 

58 for ind,(x, y, h, r) in enumerate(np.nditer([main_x, main_y, main_h, main_real], op_flags=[["readonly"],['readonly'],['readwrite'],['readwrite']])): 

59 l_side_ind = ind - mainfirst 

60 r_side_ind = l_side_ind + 1 

61 try: 

62 r_side_x = side_event_positions[r_side_ind] 

63 r_distance = r_side_x - x 

64 r_side_y = data[r_side_x] 

65 except: 

66 pass 

67 try: 

68 l_side_x = side_event_positions[l_side_ind] 

69 l_distance = x - l_side_x 

70 l_side_y = data[l_side_x] 

71 except: 

72 pass # ignore left or rightmost events which throw IndexError 

73 # calculate distances to the two side events next to the main event and mark all events where the next side events are not closer than maximum event_width as unreal. If an event might be an EOD, then calculate its height. 

74 if l_side_ind >= 0 and r_side_ind < len(side_event_positions): 

75 if min((l_distance),(r_distance)) > event_width: 

76 r[...] = False 

77 elif max((l_distance),(r_distance)) <= event_width: 

78 h[...] = max(abs(y-l_side_y),abs(y-r_side_y)) #calculated using absolutes in case of for example troughs instead of peaks as main events  

79 else: 

80 if (l_distance)<(r_distance): # evaluated only when exactly one side event is out of reach of the event width. Then the closer event will be the correct event 

81 h[...] = abs(y-l_side_y) 

82 else: 

83 h[...] = abs(y-r_side_y) 

84 # check corner cases 

85 elif l_side_ind == -1: 

86 if r_distance > event_width: 

87 r[...] = False 

88 else: 

89 h[...] = y-r_side_y 

90 elif r_side_ind == len(side_event_positions): 

91 if l_distance> event_width: 

92 r[...] = False 

93 else: 

94 h[...] = y-l_side_y 

95 # generate return array and discard all events that are not marked as real 

96 EOD_events = np.array([main_x, main_y, main_h], dtype = float)[:,main_real==1] 

97 return EOD_events 

98 

99def discardnearbyevents(event_locations, event_heights, min_distance): 

100 """ 

101 Given a number of events with given location and heights, returns a selection 

102 of these events where no event is closer than eventwidth to the next event. 

103 Among neighboring events closer than eventwidth the event with smaller height 

104 is discarded. 

105 Used to discard sidepeaks in detected multiple peaks of single EOD-pulses and 

106 only keep the largest event_height and the corresponding location as 

107 representative of the whole EOD pulse. 

108 

109 Parameters 

110 ---------- 

111 event_locations: array of int or float 

112 Positions of the given events in the data time series. 

113 event_heights: array of int or float 

114 Heights of the given events, indices refer to the same events as in 

115 event_locations. 

116 min_distance: int or float 

117 minimal distance between events before one of the events gets discarded. 

118 

119 Returns 

120 ------- 

121 event_locations: array of int or float 

122 Positions of the returned events in the data time series. 

123 event_heights: array of int or float 

124 Heights of the returned events, indices refer to the same events as in 

125 event_locations. 

126 

127 """ 

128 unchanged = False 

129 counter = 0 

130 event_indices = np.arange(0,len(event_locations)+1,1) 

131 

132 while unchanged == False:# and counter<=200: 

133 x_diffs = np.diff(event_locations) 

134 events_delete = np.zeros(len(event_locations)) 

135 

136 for i, diff in enumerate(x_diffs): 

137 

138 if diff < min_distance: 

139 

140 if event_heights[i+1] > event_heights[i] : 

141 events_delete[i] = 1 

142 else: 

143 events_delete[i+1] = 1 

144 

145 event_heights = event_heights[events_delete!=1] 

146 event_locations = event_locations[events_delete!=1] 

147 event_indices = event_indices[np.where(events_delete!=1)[0]] 

148 if np.count_nonzero(events_delete)==0: 

149 unchanged = True 

150 counter += 1 

151 if counter > 2000: 

152 print('Warning: unusual many discarding steps needed, unusually dense events') 

153 pass 

154 return event_indices, event_locations, event_heights 

155 

156def crosscorrelation(sig, data): 

157 'returns crosscorrelation of two arrays, the first array should have a length equal to or smaller than the second array.' 

158 return signal.fftconvolve(data, sig[::-1], mode='valid') 

159 

160def interpol(data, kind): 

161 """ 

162 interpolates the given data using scipy interpolation python package 

163 

164 Parameters 

165 ---------- 

166 data: array 

167 

168 kind: string or int 

169 (‘linear’, ‘nearest’, ‘zero’, ‘slinear’, ‘quadratic’, ‘cubic’, ‘previous’, ‘next’), or integer of order of spline interpolation to be used 

170 

171 Returns 

172 ------- 

173 interpolation: function 

174 

175 """ 

176 width = len(data) 

177 x = np.linspace(0, width-1, num = width, endpoint = True) 

178 #return interp1d(x, data[0:width], kind, assume_sorted=True) 

179 return interp1d(x, data[0:width], kind) 

180 

181def interpolated_array(data, kind, int_fact): 

182 """ 

183 returns an interpolated array of the given dataarray. 

184 

185 Parameters 

186 ---------- 

187 data: array 

188 

189 kind: string or int 

190 (‘linear’, ‘nearest’, ‘zero’, ‘slinear’, ‘quadratic’, ‘cubic’, ‘previous’, ‘next’), or integer of order of spline interpolation to be used 

191 

192 int_fact: int 

193 factor by which the interpolated array is larger than the original array 

194 

195 Returns 

196 ------- 

197 interpolated array: array 

198 

199 """ 

200 return interpol(data,kind)(np.arange(0, len(data)-1, 1/int_fact)) 

201 

202def cut_snippets(data,event_locations,cut_width,int_met="linear",int_fact=10,max_offset = 1000000): 

203 """ 

204 cuts intervals from a data array, interpolates and aligns them and returns them in a list 

205 

206 TODO: ALIGN THEM TO CAUSE LEAST SQUARE ERROR 

207 

208 Parameters 

209 ---------- 

210 data: array 

211 

212 event_locations: array 

213 

214 cut_width: [int, int] 

215 lower and upper limit of the intervals relative to the event locations. 

216 f.e. [-15,15] indicates an interval of 30 datapoints around each event location 

217 

218 int_met: string or int 

219 method of interpolation. (‘linear’, ‘nearest’, ‘zero’, ‘slinear’, ‘quadratic’, ‘cubic’, ‘previous’, ‘next’), or integer of order of spline interpolation to be used 

220 

221 int_fact: int 

222 factor by which the interpolated array is larger than the original 

223 

224 max_offset: float 

225 maximal offset by which the interpolated intervals can be moved to be aligned with each other. offset relative to the datapoints of the original data. 

226 

227 Returns 

228 ------- 

229 aligned_snips: twodimensional nparray 

230 the processed intervals (interval#,intervallen) 

231 

232 """ 

233 snippets = [] 

234 heights = np.zeros(len(event_locations)) 

235 cut_width = [-cut_width, cut_width] 

236 #alignwidth = int(np.ceil((max_offset) * int_fact)) 

237 alignwidth = 50 

238 

239 for pos in event_locations.astype('int'): 

240 snippets.append(data[pos+cut_width[0]:pos+cut_width[1]]) 

241 ipoled_snips = np.empty((len(snippets), (cut_width[1]-cut_width[0])*int_fact-int_fact)) 

242 

243 for i, snip in enumerate(snippets): 

244 if len(snip) < ((cut_width[1]-cut_width[0])): 

245 if i == 0: 

246 snip = np.concatenate([np.zeros([((cut_width[1]-cut_width[0]) - len(snip))]),np.array(snip)]) 

247 if i == len(snippets): 

248 snip = np.concatenate([snip, np.zeros([((cut_width[1]-cut_width[0])-len(snip))])]) 

249 else: 

250 snip = np.zeros([(cut_width[1]-cut_width[0])]) 

251 

252 #f_interpoled = interpol(snip, int_met) #if len(snip) > 0 else np.zeros([(cut_width[1]-cut_width[0]-1)*int_fact ]) 

253 interpoled_snip = interpolated_array(snip, int_met, 10)#f_interpoled(np.arange(0, len(snip)-1, 1/int_fact)) 

254 intsnipheight = np.max(interpoled_snip) - np.min(interpoled_snip) 

255 

256 

257 if intsnipheight == 0: 

258 intsnipheight = 1 

259 

260 interpoled_snip = (interpoled_snip - max(interpoled_snip))* 1/intsnipheight 

261 ipoled_snips[i] = interpoled_snip 

262 

263 mean = np.mean(ipoled_snips, axis = 0) 

264 

265 aligned_snips = np.empty((len(snippets), (cut_width[1]-cut_width[0])* int_fact-(2*alignwidth)-int_fact)) 

266 

267 for i, interpoled_snip in enumerate(ipoled_snips): 

268 cc = crosscorrelation(interpoled_snip[alignwidth:-alignwidth], mean) 

269 #cc = crosscorrelation(interpoled_snip[15 + 10*-cut_width[0]-10*7:-15+ -10*cut_width[1]+ 31], mean[10*-cut_width[0]-10*7:-10*cut_width[1]+31]) 

270 offset = -alignwidth + np.argmax(cc) 

271 aligned_snip = interpoled_snip[alignwidth-offset:-alignwidth-offset] if offset != -alignwidth else interpoled_snip[2*alignwidth:] 

272 if len(aligned_snip[~np.isnan(aligned_snip)])>0: 

273 aligned_snips[i] = aligned_snip 

274 try: 

275 heights[i] = np.max(interpoled_snip[alignwidth-offset:-alignwidth-offset]) - np.min(interpoled_snip[alignwidth-offset:-alignwidth-offset]) 

276 except: 

277 heights[i] = np.max(interpoled_snip[2*alignwidth:]) - np.min(interpoled_snip[2*alignwidth:]) 

278 return aligned_snips, heights 

279 

280def pc(dataset): 

281 """ 

282 Calculates the principal components of a dataset using the python module scikit-learn's principal component analysis 

283 

284 Parameters 

285 ---------- 

286 dataset: ndarray 

287 dataset of which the principal components are to be calculated. 

288 twodimensional array of shape (observations, features) 

289 

290 Returns 

291 ------- 

292 pc_comp: ndarray 

293 principal components of the dataset 

294 

295 """ 

296 pca = PCA(n_components=10) 

297 pc_comp = pca.fit_transform(dataset) 

298 

299 return pc_comp #, pca 

300 

301def chebyshev(dataset): 

302 x = range(len(dataset[0])) 

303 npol=5 

304 p = np.zeros((len(dataset),npol+1)) 

305 for i,s in enumerate(dataset): 

306 cheb = np.polynomial.chebyshev.Chebyshev.fit(x,s,npol) 

307 p[i] = cheb.coef 

308 

309 return p #, pca 

310 

311 

312def dbscan(pcs, events, eps, min_samples, takekm): 

313 """ 

314 improve description, add parameter and returns 

315 

316 calculates clusters of high spatial density of the given observations in their feature space.  

317 #For example, the first few principal components of the data could be used as features for the classification. 

318 

319 Parameters 

320 ---------- 

321 pcs: ndarray 

322 %TODO 

323 shape(samples, features) 

324 ... 

325 

326 Returns 

327 ------- 

328 labels: ndarray 

329 labels of the clusters of each observation 

330 

331 """ 

332 # pcs (samples, features) 

333 # X (samples, features) 

334 #try: 

335 # X = pcs[:,:order] 

336 #except: 

337 # X = pcs[:,order] 

338 X = pcs 

339 

340 # ############################################################################# 

341 # Compute DBSCAN 

342 

343 clusters = DBSCAN(eps, min_samples).fit(X) 

344 labels = clusters.labels_ 

345 

346 comp = clusters.components_ 

347 

348 comp_means = np.zeros((len(np.unique(labels)) - 1,comp.shape[1])) 

349 

350 for i in range(len(np.unique(labels)) - 1): 

351 comp_means[i] = np.mean(pcs[labels==i],axis=0) 

352 

353 return labels, comp_means 

354 

355def cluster_events(features, events, eps, min_samples, takekm, method='DBSCAN'): 

356 """F 

357 clusters the given events using the given feature space and the clustering algorithm of choice and appends the assigned cluster number to the event's properties. 

358 

359 Parameters 

360 ---------- 

361 

362 Returns 

363 ------- 

364 

365 """ 

366 ######################## function maybe could be even more generic, ? (dependant on datatype of "events" ) 

367 if method == 'DBSCAN': 

368 labels, clusters = dbscan(features,events, eps, min_samples, takekm) 

369 elif method == 'kMean': 

370 pass 

371 # To be implemented 

372 #labels = kmeans([]) 

373 return labels, clusters 

374 

375class Peaklist(object): 

376 def __init__(self, peaklist): 

377 self.list = peaklist 

378 self.lastofclass = {} 

379 self.lastofclassx = {} 

380 self.classesnearby = [] 

381 self.classesnearbyx = [] 

382 self.classesnearbypccl = [] 

383 self.classlist = [] 

384 self.classamount = 0 

385 self.shapes = {} 

386 

387def connect_blocks(oldblock): 

388 """ 

389 used to connect blocks. 

390 transfers data from the previous analysis block to the current block 

391 """ 

392 newblock = Peaklist([]) 

393 newblock.lastofclass = oldblock.lastofclass 

394 newblock.lastofclassx = oldblock.lastofclassx 

395 newblock.classesnearby = oldblock.classesnearby 

396 newblock.classesnearbypccl = oldblock.classesnearbypccl 

397 newblock.classesnearbyx = [clnearbyx - oldblock.len for clnearbyx in oldblock.classesnearbyx] 

398 newblock.classamount = oldblock.classamount 

399 newblock.len = oldblock.len 

400 return newblock 

401 

402def alignclusterlabels(labels, peaklist, peaks, data='test'): 

403 """ 

404 used to connect blocks. 

405 changes the labels of clusters in the current block to fit with the labels of the previous block 

406 """ 

407 

408 # take first second of new peak data 

409 overlapamount = len(peaks[0,peaks[0]<30000]) 

410 if overlapamount == 0: 

411 return None 

412 

413 old_peaklist = copy.deepcopy(peaklist) #redundant  

414 overlappeaks = copy.deepcopy(peaks[:,:overlapamount]) 

415 overlap_peaklist = copy.deepcopy(old_peaklist) 

416 

417 # delete cluster classifications of the overlap class 

418 overlappeaks[3]=[-1]*len(overlappeaks[0]) 

419 

420 # set nearby pc classes to -1 

421 overlap_peaklist.classesnearbypccl = [-1]*len(overlap_peaklist.classesnearbypccl) 

422 

423 # create peak labels using ampwalk classifier 

424 classified_overlap = ampwalkclassify3_refactor(overlappeaks,overlap_peaklist,glue=True)[0] 

425 

426 # for each class 

427 for cl in np.unique(classified_overlap[4]): 

428 

429 # indexes of the peaks with current class by ampwalk classification 

430 labelindex = np.where(classified_overlap[4] == cl)[0] 

431 

432 # pc clustering labels that were originally given to those peaks 

433 label = labels[labelindex] 

434 

435 # index of a peak with the most common translation from ampwalk class to pc clustering class 

436 labelindex = labelindex[np.where(label == stats.mode(label)[0])[0][0]] 

437 

438 # pc clustering label belonging to the class cl in the new block 

439 newlabel = labels[labelindex] 

440 

441 try: 

442 peaklist.classesnearbypccl[old_peaklist.classesnearby.index(cl)] = newlabel 

443 except: 

444 pass 

445 

446def ampwalkclassify3_refactor(peaks,peaklist,glue=False): 

447 """ 

448 

449 classifies peaks/EOD_events into different classes by their amplitude. 

450 

451 Takes list of peaks and list of properties of the list of the last analysis block 

452 Classifies the single peaks in the direction of their occurence in time, based on their amplitude and 

453 their previously assigned class based on their waveform (... using the method cluster_events on the 

454 principal components of the snippets around the single peaks) 

455 

456 Method: 

457 calculates differences in amplitude between the current peak and different amplitudeclasses that are nearby. creates new amplitudeclass if no class is close enough. creates no new class if the peaks's waveformclass is a noiseclass of the DBSCAN algorithm. Does not compare peaks of different Waveformclasses. 

458 

459 --can be used without prior waveformclasses, resulting in classification solely on the amplitude development 

460 pcclclasses need to be set to the same class herefore, .... . not practical, but should be possible to 

461 split up into more general functions 

462 """ 

463 

464 classamount = peaklist.classamount 

465 lastofclass = peaklist.lastofclass 

466 lastofclassx = peaklist.lastofclassx 

467 a=0 

468 elem = 0 

469 thresholder = [] 

470 comperr = 1 

471 classesnearby = peaklist.classesnearby 

472 classesnearbyx = peaklist.classesnearbyx 

473 classesnearbypccl = peaklist.classesnearbypccl 

474 classes = np.zeros((len(peaks[0]))) 

475 pcclasses = peaks[3] 

476 positions = peaks[0] 

477 heights = peaks[2] 

478 cl = 0 

479 maxdistance = 30000 # Max distance to possibly belong to the same class 

480 factor = 1.6 # factor by which a peak fits into a class, f.E: classheight = 1 , factor = 2 => peaks accepted in range (0.5,2) 

481 c=0 

482 

483 # loop through all the new peaks 

484 for peaknum, p in enumerate(peaks.T): 

485 

486 if len(lastofclass) == 0: 

487 lastofclass[1] = deque() 

488 lastofclassx[1] = deque() 

489 lastofclass[1].append(heights[peaknum]) 

490 lastofclassx[1].append(positions[peaknum]) 

491 classesnearby.append(1) 

492 classesnearbyx.append(-1) 

493 classesnearbypccl.append(pcclasses[peaknum]) 

494 classes[peaknum] = 1 

495 classamount += 1 

496 continue 

497 

498 time1 = time.time() 

499 

500 # classes nearby only count if they are within maxdistance 

501 for i, cl in enumerate(classesnearby): 

502 if (positions[peaknum] - classesnearbyx[i]) > maxdistance: 

503 classesnearby.pop(i) 

504 classesnearbyx.pop(i) 

505 classesnearbypccl.pop(i) 

506 

507 # compute mean isi of a class by taking the last 3 pulses in that class 

508 lastofclassisis = [] 

509 for i in classesnearby: 

510 lastofclassisis.append(np.median(np.diff(lastofclassx[i]))) 

511 

512 meanisi = np.mean(lastofclassisis) 

513 

514 # stop adding to a class if 40 isis have passed 

515 if 32000 > 40*meanisi> 6000: 

516 maxdistance = 20*meanisi 

517 

518 cl = 0 # 'No class' 

519 comperr = 100 

520 clnrby = np.unique(classesnearby) 

521 

522 last_err = 1000 

523 

524 # TODO this assigns peaks with no class to the last clase if there are multiple candidates.  

525 # can I fix this? 

526 for i in clnrby: 

527 

528 # if the class of the current peak is equal to the current evaluated class or current peak has no class 

529 if classesnearbypccl[classesnearby.index(i)] == pcclasses[peaknum]: #or glue==True: #pcclasses[peaknum] == -1: or  

530 classmean = np.mean(lastofclass[i]) #mean hight of class 

531 

532 # difference between current peak hight and average class hight 

533 logerror = np.abs(np.log2(heights[peaknum])-np.log2(classmean)) 

534 logthresh = np.log2(factor) 

535 relerror = logerror 

536 

537 # if the peak hights are similar 

538 if logerror < logthresh: ## SameClass-Condition 

539 # if the peaks are close together in distance (20*isi) 

540 if relerror < comperr and (positions[peaknum]-classesnearbyx[classesnearby.index(i)])<maxdistance: 

541 # keep the same class (or in case of no class assign that class) 

542 cl = i 

543 comperr = relerror 

544 time2 = time.time() 

545 time1 = time.time() 

546 

547 # if a pc class is assigned to the peak 

548 if pcclasses[peaknum] != -1: 

549 # if the class is kept 

550 if cl != 0 : 

551 # append this peak to the peaklist for the right class (only keep last 3 peaks) 

552 if len(lastofclass[cl]) >= 3: 

553 lastofclass[cl].popleft() 

554 if len(lastofclassx[cl]) >= 3: 

555 lastofclassx[cl].popleft() 

556 lastofclass[cl].append(heights[peaknum]) 

557 lastofclassx[cl].append(positions[peaknum]) 

558 classes[peaknum] = cl 

559 else: 

560 # if the class if not the same as any of the existing classes, create new class 

561 cl = classamount+1 

562 classamount = cl 

563 lastofclass[cl] = deque() 

564 lastofclassx[cl] = deque() 

565 lastofclass[cl].append(heights[peaknum]) 

566 lastofclassx[cl].append(positions[peaknum]) 

567 classes[peaknum] = cl 

568 classesnearby.append(cl) 

569 classesnearbyx.append(positions[peaknum]) 

570 classesnearbypccl.append(pcclasses[peaknum]) 

571 

572 # if there are more than 12 classes, delete the class that is furthest away in proximity 

573 if len(classesnearby) >= 12: #kacke implementiert?  

574 minind = classesnearbyx.index(min(classesnearbyx)) 

575 del lastofclass[classesnearby[minind]] 

576 del lastofclassx[classesnearby[minind]] 

577 classesnearby.pop(minind) 

578 classesnearbyx.pop(minind) 

579 classesnearbypccl.pop(minind) 

580 

581 # add position and class to peaklist 

582 try: 

583 ind=classesnearby.index(cl) 

584 classesnearbyx[ind] = positions[peaknum] 

585 except ValueError: 

586 classesnearby.append(cl) 

587 classesnearbyx.append(positions[peaknum]) 

588 classesnearbypccl.append(pcclasses[peaknum]) 

589 

590 # if no pc class is assigned to the peak 

591 elif glue == True: 

592 # if a class is assigned through the peak amp method 

593 if cl != 0: 

594 # add this class to the peak 

595 classes[peaknum] = cl 

596 else: 

597 # create new class 

598 cl = classamount+1 

599 classamount = cl 

600 lastofclass[cl] = deque() 

601 lastofclassx[cl] = deque() 

602 lastofclass[cl].append(heights[peaknum]) 

603 lastofclassx[cl].append(positions[peaknum]) 

604 classes[peaknum] = cl 

605 classesnearby.append(cl) 

606 classesnearbyx.append(positions[peaknum]) 

607 classesnearbypccl.append(pcclasses[peaknum]) 

608 if len(classesnearby) >= 12: #kacke implementiert?  

609 minind = classesnearbyx.index(min(classesnearbyx)) 

610 del lastofclass[classesnearby[minind]] 

611 del lastofclassx[classesnearby[minind]] 

612 classesnearby.pop(minind) 

613 classesnearbyx.pop(minind) 

614 classesnearbypccl.pop(minind) 

615 try: 

616 ind=classesnearby.index(cl) 

617 classesnearbyx[ind] = positions[peaknum] 

618 except ValueError: 

619 classesnearby.append(cl) 

620 classesnearbyx.append(positions[peaknum]) 

621 classesnearbypccl.append(pcclasses[peaknum]) 

622 peaklist.lastofclass = lastofclass 

623 peaklist.lastofclassx = lastofclassx 

624 peaklist.classesnearby = classesnearby 

625 peaklist.classesnearbyx = classesnearbyx 

626 peaklist.classesnearbypccl = classesnearbypccl 

627 peaklist.classlist = classes # np.vectorize(lambda peak: peak.cl, otypes=[object])(peaklist.list) 

628 peaklist.classamount = classamount 

629 peaks = np.append(peaks,classes[None,:], axis = 0) 

630 

631 return peaks, peaklist 

632 

633def discard_wave_pulses(peaks, data): 

634 """ 

635 discards events from a pulse_event list which are unusally wide (wider than a tenth of the inter pulse interval), which indicates a wave-type EOD instead of a pulse type 

636 """ 

637 deleteclasses = [] 

638 for cl in np.unique(peaks[3]): 

639 peaksofclass = peaks[:,peaks[3] == cl] 

640 isi = np.diff(peaksofclass[0]) 

641 isi_mean = np.mean(isi) 

642 widepeaks = 0 

643 isi_tenth_area = lambda x, isi : np.arange(np.floor(x-0.1*isi),np.ceil(x+0.1*isi),1, dtype=int) 

644 for p in peaksofclass.T: 

645 data = np.array(data) 

646 try: 

647 for dp_around in data[isi_tenth_area(p[0],isi_mean)]: 

648 if dp_around <= p[1]-p[2]: 

649 break 

650 except (IndexError,ValueError) as e: 

651 pass 

652 else: 

653 widepeaks+=1 

654 if widepeaks > len(peaksofclass)*0.5: 

655 deleteclasses.append(cl) 

656 for cl in deleteclasses: 

657 peaks = peaks[:,peaks[3]!=cl] 

658 return peaks 

659 

660def plot_events_on_data(peaks, data, colors): 

661 """ 

662 plots the detected events onto the data timeseries. If the events are classified, the classes are plotted in different colors and the class -1 (not belonging to a cluster) is plotted in black 

663 """ 

664 plt.plot(range(len(data)),data, color = 'black') 

665 if len(peaks)>3: 

666 classlist = np.array(peaks[3],dtype=int) 

667 if len(peaks) > 4: 

668 classlist = np.array(peaks[4],dtype=int) 

669 #classlist=labels 

670 cmap = plt.get_cmap('jet') 

671 

672 for cl in np.unique(classlist): 

673 

674 if cl == -1: 

675 color = 'black' 

676 else: 

677 color = colors[cl] 

678 peaksofclass = peaks[:,classlist == cl] 

679 plt.plot(peaksofclass[0],peaksofclass[1], '.', color = color, ms =20, label=cl) 

680 plt.legend() 

681 else: 

682 plt.scatter(peaks[0],peaks[1], color = 'red') 

683 plt.show() 

684 plt.close() 

685 

686def discard_short_classes(events, minlen): 

687 """  

688 returns all events despite events which are in classes with less than minlen members 

689 """ 

690 

691 u, c = np.unique(events[-1],return_counts=True) 

692 

693 smallclasses = u[c<minlen] 

694 classlist = events[-1] 

695 

696 delete = np.zeros(len(classlist)) 

697 

698 for cl in smallclasses: 

699 delete[classlist == cl] = 1 

700 events = events[:,delete != 1] 

701 

702 return events 

703 

704def path_leaf(path): 

705 ntpath.basename("a/b/c") 

706 head, tail = ntpath.split(path) 

707 return tail or ntpath.basename(head) 

708 

709def save_EOD_events_to_npmmp(EOD_Events,eods_len,startblock,datasavepath,mmpname='eods.npmmp'): 

710 n_EOD_Events = len(EOD_Events[0]) 

711 savepath = datasavepath+"/"+mmpname 

712 if startblock: 

713 eods = np.memmap(savepath, 

714 dtype='float64', mode='w+', 

715 shape=(4,n_EOD_Events), order = 'F') 

716 else: 

717 dtypesize = 8#4 #float32 is 32bit = >4< bytes long ---changed to float64 -> 8bit 

718 eods = np.memmap(savepath, dtype= 

719 'float64', mode='r+', offset = dtypesize*eods_len*4, 

720 shape=(4,n_EOD_Events), order = 'F') 

721 eods[:] = EOD_Events 

722 

723 

724def create_threshold_array(data,window,threshold): 

725 thr_array = np.zeros(data.shape) 

726 for i in range(int(len(data)/window)): 

727 thr_array[i*window:(i+1)*window] = np.std(data[i*window:(i+1)*window])*4 

728 thr_array[thr_array<threshold] = threshold 

729 return thr_array 

730 

731def alignlabels(labels,clusters,old_labels,old_clusters,maxlabel): 

732 

733 old_labels = old_labels[old_labels!=-1] 

734 

735 labels_new = -1*np.ones(labels.shape) 

736 newclass = maxlabel 

737 

738 for curlabel, cluster in enumerate(clusters): 

739 n = np.linalg.norm(old_clusters-cluster,axis=1) 

740 

741 if np.min(n) < 0.1: 

742 labels_new[labels==curlabel] = old_labels[np.argmin(n)] 

743 else: 

744 labels_new[labels==curlabel] = newclass 

745 newclass = newclass + 1 

746 

747 return labels_new 

748 

749 

750def analyze_pulse_data(filepath, deltat=10, thresh=0.04, starttime = 0, endtime = 0, cluster_thresh = 0.1, savepath = False,save=False, npmmp = False, plot_eods=False,plot_features=False,plot_steps=False, plot_result=False): 

751 

752 """ 

753 analyzes timeseries of a pulse fish EOD recording 

754 

755 Parameters 

756 ---------- 

757 filepath: WAV-file with the recorded timeseries 

758 

759 deltat: int, optional 

760 time for a single analysisblock (recommended less than a minute, due to principal component clustering on the EOD-waveforms) 

761 

762 thresh: float, optional 

763 minimum threshold for the peakdetection (if computing frequencies recommended a tiny bit lower than the wished threshold, and instead discard the EOD below the wished threshold after computing the frequencies for each EOD.) 

764 

765 starttime: int or, str of int, optional 

766 time into the data from where to start the analysis, seconds. 

767 

768 endtime: int or str of int, optional 

769 time into the data where to end the analysis, seconds, larger than starttime. 

770  

771 cluster_thresh: float, optional 

772 threshold that decides the cluster density of the EOD waveform features. 

773 

774 savepath = Boolean or str, optional 

775 path to where to save results and intermediate result, only needed if save or npmmp is True. 

776 string to specify a relative path to the directory where results and intermediate results will bed 

777 or False to use preset savepath, which is ~/filepath/ 

778 or True to specify savepath as input when the script is running 

779 

780 save: Boolean, optional 

781 True to save the results into a npy file at the savepath 

782 

783 npmmp: Boolean, optional 

784 True to save intermediate results into a npmmp at the savepath, only recommended in case of memory overflow 

785 

786 plot_steps: Boolean, optional 

787 True to plot the results of each analysis block 

788 

789 plot_results: Boolean, optional 

790 True to plot the results of the final analysis. Not recommended for long recordings due to %TODO 

791 

792 plot_eods: Boolean, optional 

793 True to plot the EOD waveforms for each analysis block 

794 

795 plot_features: Boolean, optional 

796 True to plot the EOD waveform features for each analysis block 

797 

798 Returns 

799 ------- 

800 eods: numpy array 

801 2D numpy array. first axis: attributes of an EOD (x (datapoints), y (recorded voltage), height (difference from maximum to minimum), class), second axis: EODs in chronological order. 

802 """ 

803 

804 # parameters for the analysis 

805 thresh = 0.04 # minimal threshold for peakdetection 

806 peakwidth = 20 # width of a peak and minimal distance between two EODs 

807 # basic parameters for thunderfish.dataloader.DataLoader 

808 verbose = 0 

809 channel = 0 

810 ultimate_threshold = thresh+0.01 

811 startblock = 0 

812 starttime = int(starttime) 

813 endtime = int(endtime) 

814 timegiven = False 

815 if endtime > starttime>=0: 

816 timegiven = True 

817 peaks = np.array([]) 

818 troughs = np.array([]) 

819 filename = path_leaf(filepath) 

820 eods_len = 0 

821 if savepath==False: 

822 datasavepath = filename[:-4] 

823 elif savepath==True: 

824 datasavepath = input('With the option npmmp enabled, a numpy memmap will be saved to: ').lower() 

825 else: datasavepath=savepath 

826 

827 if save and (os.path.exists(datasavepath+"/eods8_"+filename[:-3]+"npy") or os.path.exists(datasavepath+"/eods5_"+filename[:-3]+"npy")): 

828 print('there already exists an analyzed file, aborting. Change the code if you don\'t want to abort') 

829 quit() 

830 if npmmp: 

831 #proceed = input('With the option npmmp enabled, a numpy memmap will be saved to ' + datasavepath + '. continue? [y/n] ').lower() 

832 proceed = 'y' 

833 if proceed != 'y': 

834 quit() 

835 # starting analysis 

836 with DataLoader(filepath, deltat, 0.0, verbose) as data: 

837 

838 samplerate = data.samplerate 

839 

840 # selected time interval 

841 if timegiven == True: 

842 parttime1 = starttime*samplerate 

843 parttime2 = endtime*samplerate 

844 data = data[parttime1:parttime2,channel] 

845 

846 #split data into blocks 

847 nblock = int(deltat*samplerate) 

848 if len(data)%nblock != 0: 

849 blockamount = len(data)//nblock + 1 

850 else: 

851 blockamount = len(data)//nblock 

852 #fish = ProgressFish(total = blockamount) 

853 

854 pca_cur = 0 

855 progress = 0 

856 

857 for idx in range(0, blockamount): 

858 print('BLOCK %i/%i'%(idx+1,blockamount)) 

859 

860 blockdata = data[idx*nblock:(idx+1)*nblock,channel] 

861 if progress < (idx*100 //blockamount): 

862 progress = (idx*100)//blockamount 

863 progressstr = ' Filestatus: ' 

864 

865 # fish.animate(amount = idx, dexextra = progressstr) 

866 # delete peaks under absolute threshold 

867 

868 

869 #thresh_array = create_threshold_array(blockdata,30000,thresh)  

870 pk, tr = detect_peaks(blockdata, thresh) 

871 troughs = tr 

872 

873 if len(pk) > 3: 

874 

875 peaks = makeeventlist(pk,tr,blockdata,peakwidth) 

876 peakindices, peakx, peakh = discardnearbyevents(peaks[0],peaks[1],peakwidth) 

877 peaks = peaks[:,peakindices] 

878 

879 if len(peaks) > 0: 

880 

881 #if idx > startblock: 

882 # # adding a new block as copy of old list, only difference is peak indexing as it refers to last block 

883 # peaklist = connect_blocks(peaklist) 

884 #else: 

885 # peaklist = Peaklist([]) 

886 

887 aligned_snips, snip_heights = cut_snippets(blockdata,peaks[0], 30, int_met = "cubic", int_fact = 10,max_offset = 20) 

888 

889 pols = chebyshev(aligned_snips) 

890 

891 feats = np.zeros((pols.shape[0],pols.shape[1]+1)) 

892 feats[:,:6] = pols 

893 feats[:,-1] = snip_heights*0.1 

894 #pcs, pca_cur = pc(aligned_snips) #pc_refactor(aligned_snips) 

895 

896 minpeaks = 3 if deltat < 2 else 10 

897 

898 labels, clusters = cluster_events(feats, peaks, cluster_thresh, minpeaks, False, method = 'DBSCAN') 

899 peaks = np.append(peaks,[labels], axis = 0) 

900 

901 if idx > startblock: 

902 # instead of the peaklist I would have to add the previous cluster means 

903 # alignclusterlabels(labels, peaklist, peaks,data=blockdata) 

904 peaks[-1] = alignlabels(labels,clusters,old_labels,old_clusters,maxlabel) 

905 

906 old_labels = np.unique(peaks[-1]) 

907 old_clusters = clusters 

908 #I would want peaks updated here to have the right pc classes as well.. 

909 

910 #peaks, peaklist = ampwalkclassify3_refactor(peaks, peaklist) # classification by amplitude 

911 

912 minlen = 5 

913 peaks = discard_short_classes(peaks, minlen) 

914 

915 if len(peaks[0]) > 0: 

916 peaks = discard_wave_pulses(peaks, blockdata) 

917 # delete peaks under absolute threshold 

918 #thresh_array = create_threshold_array(blockdata,30000) 

919 #peaks = peaks[:,peaks[1]>thresh_array[list(map(int,peaks[0]))]] 

920 

921 cmap = plt.get_cmap('jet') 

922 colors = cmap(np.linspace(0, 1.0, 10)) 

923 

924 if plot_steps == True: 

925 plot_events_on_data(peaks, blockdata, colors) 

926 pass 

927 

928 for lab in np.unique(labels): 

929 

930 if lab == -1: 

931 c = 'k' 

932 z=-1 

933 else: 

934 c=colors[lab] 

935 z=1 

936 if plot_eods==True: 

937 plt.plot(range(aligned_snips.shape[1]),np.transpose(aligned_snips[labels == lab]),color=c,zorder=z,label=lab) 

938 

939 if plot_eods==True: 

940 plt.title('Detected and classified EODs') 

941 plt.xlabel('time [ms]') 

942 plt.ylabel('signal (normalized)') 

943 

944 phandles, plabels = plt.gca().get_legend_handles_labels() 

945 by_label = dict(zip(plabels, phandles)) 

946 plt.legend(by_label.values(), by_label.keys()) 

947 plt.show() 

948 

949 for lab in np.unique(labels): 

950 

951 if lab == -1: 

952 c = 'k' 

953 z = -1 

954 else: 

955 c = colors[lab] 

956 z=1 

957 

958 if plot_features==True: 

959 plt.plot(np.squeeze(np.transpose(feats[labels == lab])),color=c,zorder=z,label=lab) 

960 

961 if plot_features==True: 

962 plt.title('EOD Features') 

963 plt.xlabel('feature [#]') 

964 plt.ylabel('value [a.u.]') 

965 

966 phandles, plabels = plt.gca().get_legend_handles_labels() 

967 by_label = dict(zip(plabels, phandles)) 

968 plt.legend(by_label.values(), by_label.keys()) 

969 plt.show() 

970 

971 #peaklist.len = nblock 

972 worldpeaks = np.copy(peaks) 

973 worldpeaks[0] = worldpeaks[0] + (idx*nblock) 

974 # delete the classification that only considers wave shape. 

975 #thisblock_eods = np.delete(worldpeaks,3,0) 

976 thisblock_eods = worldpeaks 

977 

978 if idx == startblock: 

979 maxlabel = np.max(peaks[-1]) + 1 

980 else: 

981 maxlabel = np.max([maxlabel, (np.max(peaks[-1]) + 1)]) 

982 

983 if npmmp: 

984 if idx == startblock: 

985 if not os.path.exists(datasavepath): 

986 os.makedirs(datasavepath) 

987 mmpname = "eods_"+filename[:-3]+"npmmp" 

988 # save the peaks of the current buffered part to a numpy-memmap on the disk 

989 save_EOD_events_to_npmmp(thisblock_eods,eods_len,idx==startblock,datasavepath,mmpname) 

990 eods_len += len(thisblock_eods[0]) 

991 else: 

992 if idx > 0: 

993 all_eods = np.concatenate((all_eods,thisblock_eods),axis = 1) 

994 else: 

995 all_eods = thisblock_eods 

996 if plot_steps == True: 

997 print('FINAL RESULTS') 

998 plot_events_on_data(all_eods, data[:,channel], colors) 

999 #plot_events_on_data(all_eods,data) 

1000 print('returnes analyzed EODS. Calculate frequencies using all of these but discard the data from the EODS within the lowest few percent of amplitude') 

1001 

1002 if npmmp: 

1003 all_eods = np.memmap(datasavepath+'/'+mmpname, dtype='float64', mode='r+', shape=(4,eods_len), order = 'F') 

1004 if save == 1: 

1005 path = filename[:-4]+"/" 

1006 if not os.path.exists(path): 

1007 os.makedirs(path) 

1008 if eods_len > 0: 

1009 np.save(datasavepath+"/eods8_"+filename[:-3]+"npy", all_eods) 

1010 print('Saved!') 

1011 else: 

1012 print('not saved') 

1013 return all_eods 

1014 

1015 

1016def main(): 

1017 

1018 eods = analyze_pulse_data(sys.argv[1], save=True, npmmp=True) 

1019 print(eods) 

1020 

1021if __name__ == '__main__': 

1022 main() 

1023