Coverage for src/thunderfish/pulsetracker.py: 0%

1"""

2by Dexter Frueh

3"""

5import os

6import sys

7import ntpath

8import time

9import copy

10from shutil import copy2

11from collections import deque

12import numpy as np

13from scipy import stats

14from scipy import signal

15from scipy import optimize

16import matplotlib

17#from fish import ProgressFish

18import matplotlib.pyplot as plt

19from scipy.interpolate import interp1d

20from sklearn.preprocessing import StandardScaler

21from sklearn.decomposition import PCA

22from sklearn.cluster import DBSCAN

23from sklearn.cluster import AgglomerativeClustering

24from thunderlab.dataloader import DataLoader

25from thunderlab.eventdetection import detect_peaks

27def makeeventlist(main_event_positions,side_event_positions,data,event_width=20):

28 """

29 Generate array of events that might be EODs of a pulse-type fish, using the location of peaks and troughs,

30 the data and an optional width of an supposed EOD-event.

31 The generated event-array contains location and height of such events.

32 The height of the events is calculated by its height-difference to nearby troughs and main events that have no side events in a range closer than event_width are discarded and not considered as EOD event.

34 Parameters

35 ----------

36 main_event_positions: array of int or float

37 Positions of the detected main events in the data time series. Either peaks or troughs.

38 side_event_positions: array of int or float

39 Positions of the detected side events in the data time series. The complimentary event to the main events.

40 data: array of float

41 The given data.

42 event_width: int or float, optional

44 Returns

45 -------

46 EOD_events: ndarray

47 2D array containing data with 'float' type, size (number_of_properties = 3, number_of_events).

48 Generated and combined data of the detected events in an array with arrays of x, y and height along the first axis.

50 """

51 mainfirst = int((min(main_event_positions[0],side_event_positions[0])<side_event_positions[0])) # determines if there is a peak or through first. Evaluates to 1 if there is a peak first.

52 main_x = main_event_positions

53 main_y = data[main_event_positions]

54 # empty placeholders, filled in the next step while iterating over the properties of single main

55 main_h = np.zeros(len(main_event_positions))

56 main_real = np.ones(len(main_event_positions))

57 # iteration over the properties of the single main

58 for ind,(x, y, h, r) in enumerate(np.nditer([main_x, main_y, main_h, main_real], op_flags=[["readonly"],['readonly'],['readwrite'],['readwrite']])):

59 l_side_ind = ind - mainfirst

60 r_side_ind = l_side_ind + 1

61 try:

62 r_side_x = side_event_positions[r_side_ind]

63 r_distance = r_side_x - x

64 r_side_y = data[r_side_x]

65 except:

66 pass

67 try:

68 l_side_x = side_event_positions[l_side_ind]

69 l_distance = x - l_side_x

70 l_side_y = data[l_side_x]

71 except:

72 pass # ignore left or rightmost events which throw IndexError

73 # calculate distances to the two side events next to the main event and mark all events where the next side events are not closer than maximum event_width as unreal. If an event might be an EOD, then calculate its height.

74 if l_side_ind >= 0 and r_side_ind < len(side_event_positions):

75 if min((l_distance),(r_distance)) > event_width:

76 r[...] = False

77 elif max((l_distance),(r_distance)) <= event_width:

78 h[...] = max(abs(y-l_side_y),abs(y-r_side_y)) #calculated using absolutes in case of for example troughs instead of peaks as main events

79 else:

80 if (l_distance)<(r_distance): # evaluated only when exactly one side event is out of reach of the event width. Then the closer event will be the correct event

81 h[...] = abs(y-l_side_y)

82 else:

83 h[...] = abs(y-r_side_y)

84 # check corner cases

85 elif l_side_ind == -1:

86 if r_distance > event_width:

87 r[...] = False

88 else:

89 h[...] = y-r_side_y

90 elif r_side_ind == len(side_event_positions):

91 if l_distance> event_width:

92 r[...] = False

93 else:

94 h[...] = y-l_side_y

95 # generate return array and discard all events that are not marked as real

96 EOD_events = np.array([main_x, main_y, main_h], dtype = float)[:,main_real==1]

97 return EOD_events

99def discardnearbyevents(event_locations, event_heights, min_distance):

100 """

101 Given a number of events with given location and heights, returns a selection

102 of these events where no event is closer than eventwidth to the next event.

103 Among neighboring events closer than eventwidth the event with smaller height

104 is discarded.

105 Used to discard sidepeaks in detected multiple peaks of single EOD-pulses and

106 only keep the largest event_height and the corresponding location as

107 representative of the whole EOD pulse.

108

109 Parameters

110 ----------

111 event_locations: array of int or float

112 Positions of the given events in the data time series.

113 event_heights: array of int or float

114 Heights of the given events, indices refer to the same events as in

115 event_locations.

116 min_distance: int or float

117 minimal distance between events before one of the events gets discarded.

118

119 Returns

120 -------

121 event_locations: array of int or float

122 Positions of the returned events in the data time series.

123 event_heights: array of int or float

124 Heights of the returned events, indices refer to the same events as in

125 event_locations.

126

127 """

128 unchanged = False

129 counter = 0

130 event_indices = np.arange(0,len(event_locations)+1,1)

131

132 while unchanged == False:# and counter<=200:

133 x_diffs = np.diff(event_locations)

134 events_delete = np.zeros(len(event_locations))

135

136 for i, diff in enumerate(x_diffs):

137

138 if diff < min_distance:

139

140 if event_heights[i+1] > event_heights[i] :

141 events_delete[i] = 1

142 else:

143 events_delete[i+1] = 1

144

145 event_heights = event_heights[events_delete!=1]

146 event_locations = event_locations[events_delete!=1]

147 event_indices = event_indices[np.where(events_delete!=1)[0]]

148 if np.count_nonzero(events_delete)==0:

149 unchanged = True

150 counter += 1

151 if counter > 2000:

152 print('Warning: unusual many discarding steps needed, unusually dense events')

153 pass

154 return event_indices, event_locations, event_heights

155

156def crosscorrelation(sig, data):

157 'returns crosscorrelation of two arrays, the first array should have a length equal to or smaller than the second array.'

158 return signal.fftconvolve(data, sig[::-1], mode='valid')

159

160def interpol(data, kind):

161 """

162 interpolates the given data using scipy interpolation python package

163

164 Parameters

165 ----------

166 data: array

167

168 kind: string or int

169 (‘linear’, ‘nearest’, ‘zero’, ‘slinear’, ‘quadratic’, ‘cubic’, ‘previous’, ‘next’), or integer of order of spline interpolation to be used

170

171 Returns

172 -------

173 interpolation: function

174

175 """

176 width = len(data)

177 x = np.linspace(0, width-1, num = width, endpoint = True)

178 #return interp1d(x, data[0:width], kind, assume_sorted=True)

179 return interp1d(x, data[0:width], kind)

180

181def interpolated_array(data, kind, int_fact):

182 """

183 returns an interpolated array of the given dataarray.

184

185 Parameters

186 ----------

187 data: array

188

189 kind: string or int

190 (‘linear’, ‘nearest’, ‘zero’, ‘slinear’, ‘quadratic’, ‘cubic’, ‘previous’, ‘next’), or integer of order of spline interpolation to be used

191

192 int_fact: int

193 factor by which the interpolated array is larger than the original array

194

195 Returns

196 -------

197 interpolated array: array

198

199 """

200 return interpol(data,kind)(np.arange(0, len(data)-1, 1/int_fact))

201

202def cut_snippets(data,event_locations,cut_width,int_met="linear",int_fact=10,max_offset = 1000000):

203 """

204 cuts intervals from a data array, interpolates and aligns them and returns them in a list

205

206 TODO: ALIGN THEM TO CAUSE LEAST SQUARE ERROR

207

208 Parameters

209 ----------

210 data: array

211

212 event_locations: array

213

214 cut_width: [int, int]

215 lower and upper limit of the intervals relative to the event locations.

216 f.e. [-15,15] indicates an interval of 30 datapoints around each event location

217

218 int_met: string or int

219 method of interpolation. (‘linear’, ‘nearest’, ‘zero’, ‘slinear’, ‘quadratic’, ‘cubic’, ‘previous’, ‘next’), or integer of order of spline interpolation to be used

220

221 int_fact: int

222 factor by which the interpolated array is larger than the original

223

224 max_offset: float

225 maximal offset by which the interpolated intervals can be moved to be aligned with each other. offset relative to the datapoints of the original data.

226

227 Returns

228 -------

229 aligned_snips: twodimensional nparray

230 the processed intervals (interval#,intervallen)

231

232 """

233 snippets = []

234 heights = np.zeros(len(event_locations))

235 cut_width = [-cut_width, cut_width]

236 #alignwidth = int(np.ceil((max_offset) * int_fact))

237 alignwidth = 50

238

239 for pos in event_locations.astype('int'):

240 snippets.append(data[pos+cut_width[0]:pos+cut_width[1]])

241 ipoled_snips = np.empty((len(snippets), (cut_width[1]-cut_width[0])*int_fact-int_fact))

242

243 for i, snip in enumerate(snippets):

244 if len(snip) < ((cut_width[1]-cut_width[0])):

245 if i == 0:

246 snip = np.concatenate([np.zeros([((cut_width[1]-cut_width[0]) - len(snip))]),np.array(snip)])

247 if i == len(snippets):

248 snip = np.concatenate([snip, np.zeros([((cut_width[1]-cut_width[0])-len(snip))])])

249 else:

250 snip = np.zeros([(cut_width[1]-cut_width[0])])

251

252 #f_interpoled = interpol(snip, int_met) #if len(snip) > 0 else np.zeros([(cut_width[1]-cut_width[0]-1)*int_fact ])

253 interpoled_snip = interpolated_array(snip, int_met, 10)#f_interpoled(np.arange(0, len(snip)-1, 1/int_fact))

254 intsnipheight = np.max(interpoled_snip) - np.min(interpoled_snip)

255

256

257 if intsnipheight == 0:

258 intsnipheight = 1

259

260 interpoled_snip = (interpoled_snip - max(interpoled_snip))* 1/intsnipheight

261 ipoled_snips[i] = interpoled_snip

262

263 mean = np.mean(ipoled_snips, axis = 0)

264

265 aligned_snips = np.empty((len(snippets), (cut_width[1]-cut_width[0])* int_fact-(2*alignwidth)-int_fact))

266

267 for i, interpoled_snip in enumerate(ipoled_snips):

268 cc = crosscorrelation(interpoled_snip[alignwidth:-alignwidth], mean)

269 #cc = crosscorrelation(interpoled_snip[15 + 10*-cut_width[0]-10*7:-15+ -10*cut_width[1]+ 31], mean[10*-cut_width[0]-10*7:-10*cut_width[1]+31])

270 offset = -alignwidth + np.argmax(cc)

271 aligned_snip = interpoled_snip[alignwidth-offset:-alignwidth-offset] if offset != -alignwidth else interpoled_snip[2*alignwidth:]

272 if len(aligned_snip[~np.isnan(aligned_snip)])>0:

273 aligned_snips[i] = aligned_snip

274 try:

275 heights[i] = np.max(interpoled_snip[alignwidth-offset:-alignwidth-offset]) - np.min(interpoled_snip[alignwidth-offset:-alignwidth-offset])

276 except:

277 heights[i] = np.max(interpoled_snip[2*alignwidth:]) - np.min(interpoled_snip[2*alignwidth:])

278 return aligned_snips, heights

279

280def pc(dataset):

281 """

282 Calculates the principal components of a dataset using the python module scikit-learn's principal component analysis

283

284 Parameters

285 ----------

286 dataset: ndarray

287 dataset of which the principal components are to be calculated.

288 twodimensional array of shape (observations, features)

289

290 Returns

291 -------

292 pc_comp: ndarray

293 principal components of the dataset

294

295 """

296 pca = PCA(n_components=10)

297 pc_comp = pca.fit_transform(dataset)

298

299 return pc_comp #, pca

300

301def chebyshev(dataset):

302 x = range(len(dataset[0]))

303 npol=5

304 p = np.zeros((len(dataset),npol+1))

305 for i,s in enumerate(dataset):

306 cheb = np.polynomial.chebyshev.Chebyshev.fit(x,s,npol)

307 p[i] = cheb.coef

308

309 return p #, pca

310

311

312def dbscan(pcs, events, eps, min_samples, takekm):

313 """

314 improve description, add parameter and returns

315

316 calculates clusters of high spatial density of the given observations in their feature space.

317 #For example, the first few principal components of the data could be used as features for the classification.

318

319 Parameters

320 ----------

321 pcs: ndarray

322 %TODO

323 shape(samples, features)

324 ...

325

326 Returns

327 -------

328 labels: ndarray

329 labels of the clusters of each observation

330

331 """

332 # pcs (samples, features)

333 # X (samples, features)

334 #try:

335 # X = pcs[:,:order]

336 #except:

337 # X = pcs[:,order]

338 X = pcs

339

340 # #############################################################################

341 # Compute DBSCAN

342

343 clusters = DBSCAN(eps, min_samples).fit(X)

344 labels = clusters.labels_

345

346 comp = clusters.components_

347

348 comp_means = np.zeros((len(np.unique(labels)) - 1,comp.shape[1]))

349

350 for i in range(len(np.unique(labels)) - 1):

351 comp_means[i] = np.mean(pcs[labels==i],axis=0)

352

353 return labels, comp_means

354

355def cluster_events(features, events, eps, min_samples, takekm, method='DBSCAN'):

356 """F

357 clusters the given events using the given feature space and the clustering algorithm of choice and appends the assigned cluster number to the event's properties.

358

359 Parameters

360 ----------

361

362 Returns

363 -------

364

365 """

366 ######################## function maybe could be even more generic, ? (dependant on datatype of "events" )

367 if method == 'DBSCAN':

368 labels, clusters = dbscan(features,events, eps, min_samples, takekm)

369 elif method == 'kMean':

370 pass

371 # To be implemented

372 #labels = kmeans([])

373 return labels, clusters

374

375class Peaklist(object):

376 def __init__(self, peaklist):

377 self.list = peaklist

378 self.lastofclass = {}

379 self.lastofclassx = {}

380 self.classesnearby = []

381 self.classesnearbyx = []

382 self.classesnearbypccl = []

383 self.classlist = []

384 self.classamount = 0

385 self.shapes = {}

386

387def connect_blocks(oldblock):

388 """

389 used to connect blocks.

390 transfers data from the previous analysis block to the current block

391 """

392 newblock = Peaklist([])

393 newblock.lastofclass = oldblock.lastofclass

394 newblock.lastofclassx = oldblock.lastofclassx

395 newblock.classesnearby = oldblock.classesnearby

396 newblock.classesnearbypccl = oldblock.classesnearbypccl

397 newblock.classesnearbyx = [clnearbyx - oldblock.len for clnearbyx in oldblock.classesnearbyx]

398 newblock.classamount = oldblock.classamount

399 newblock.len = oldblock.len

400 return newblock

401

402def alignclusterlabels(labels, peaklist, peaks, data='test'):

403 """

404 used to connect blocks.

405 changes the labels of clusters in the current block to fit with the labels of the previous block

406 """

407

408 # take first second of new peak data

409 overlapamount = len(peaks[0,peaks[0]<30000])

410 if overlapamount == 0:

411 return None

412

413 old_peaklist = copy.deepcopy(peaklist) #redundant

414 overlappeaks = copy.deepcopy(peaks[:,:overlapamount])

415 overlap_peaklist = copy.deepcopy(old_peaklist)

416

417 # delete cluster classifications of the overlap class

418 overlappeaks[3]=[-1]*len(overlappeaks[0])

419

420 # set nearby pc classes to -1

421 overlap_peaklist.classesnearbypccl = [-1]*len(overlap_peaklist.classesnearbypccl)

422

423 # create peak labels using ampwalk classifier

424 classified_overlap = ampwalkclassify3_refactor(overlappeaks,overlap_peaklist,glue=True)[0]

425

426 # for each class

427 for cl in np.unique(classified_overlap[4]):

428

429 # indexes of the peaks with current class by ampwalk classification

430 labelindex = np.where(classified_overlap[4] == cl)[0]

431

432 # pc clustering labels that were originally given to those peaks

433 label = labels[labelindex]

434

435 # index of a peak with the most common translation from ampwalk class to pc clustering class

436 labelindex = labelindex[np.where(label == stats.mode(label)[0])[0][0]]

437

438 # pc clustering label belonging to the class cl in the new block

439 newlabel = labels[labelindex]

440

441 try:

442 peaklist.classesnearbypccl[old_peaklist.classesnearby.index(cl)] = newlabel

443 except:

444 pass

445

446def ampwalkclassify3_refactor(peaks,peaklist,glue=False):

447 """

448

449 classifies peaks/EOD_events into different classes by their amplitude.

450

451 Takes list of peaks and list of properties of the list of the last analysis block

452 Classifies the single peaks in the direction of their occurence in time, based on their amplitude and

453 their previously assigned class based on their waveform (... using the method cluster_events on the

454 principal components of the snippets around the single peaks)

455

456 Method:

457 calculates differences in amplitude between the current peak and different amplitudeclasses that are nearby. creates new amplitudeclass if no class is close enough. creates no new class if the peaks's waveformclass is a noiseclass of the DBSCAN algorithm. Does not compare peaks of different Waveformclasses.

458

459 --can be used without prior waveformclasses, resulting in classification solely on the amplitude development

460 pcclclasses need to be set to the same class herefore, .... . not practical, but should be possible to

461 split up into more general functions

462 """

463

464 classamount = peaklist.classamount

465 lastofclass = peaklist.lastofclass

466 lastofclassx = peaklist.lastofclassx

467 a=0

468 elem = 0

469 thresholder = []

470 comperr = 1

471 classesnearby = peaklist.classesnearby

472 classesnearbyx = peaklist.classesnearbyx

473 classesnearbypccl = peaklist.classesnearbypccl

474 classes = np.zeros((len(peaks[0])))

475 pcclasses = peaks[3]

476 positions = peaks[0]

477 heights = peaks[2]

478 cl = 0

479 maxdistance = 30000 # Max distance to possibly belong to the same class

480 factor = 1.6 # factor by which a peak fits into a class, f.E: classheight = 1 , factor = 2 => peaks accepted in range (0.5,2)

481 c=0

482

483 # loop through all the new peaks

484 for peaknum, p in enumerate(peaks.T):

485

486 if len(lastofclass) == 0:

487 lastofclass[1] = deque()

488 lastofclassx[1] = deque()

489 lastofclass[1].append(heights[peaknum])

490 lastofclassx[1].append(positions[peaknum])

491 classesnearby.append(1)

492 classesnearbyx.append(-1)

493 classesnearbypccl.append(pcclasses[peaknum])

494 classes[peaknum] = 1

495 classamount += 1

496 continue

497

498 time1 = time.time()

499

500 # classes nearby only count if they are within maxdistance

501 for i, cl in enumerate(classesnearby):

502 if (positions[peaknum] - classesnearbyx[i]) > maxdistance:

503 classesnearby.pop(i)

504 classesnearbyx.pop(i)

505 classesnearbypccl.pop(i)

506

507 # compute mean isi of a class by taking the last 3 pulses in that class

508 lastofclassisis = []

509 for i in classesnearby:

510 lastofclassisis.append(np.median(np.diff(lastofclassx[i])))

511

512 meanisi = np.mean(lastofclassisis)

513

514 # stop adding to a class if 40 isis have passed

515 if 32000 > 40*meanisi> 6000:

516 maxdistance = 20*meanisi

517

518 cl = 0 # 'No class'

519 comperr = 100

520 clnrby = np.unique(classesnearby)

521

522 last_err = 1000

523

524 # TODO this assigns peaks with no class to the last clase if there are multiple candidates.

525 # can I fix this?

526 for i in clnrby:

527

528 # if the class of the current peak is equal to the current evaluated class or current peak has no class

529 if classesnearbypccl[classesnearby.index(i)] == pcclasses[peaknum]: #or glue==True: #pcclasses[peaknum] == -1: or

530 classmean = np.mean(lastofclass[i]) #mean hight of class

531

532 # difference between current peak hight and average class hight

533 logerror = np.abs(np.log2(heights[peaknum])-np.log2(classmean))

534 logthresh = np.log2(factor)

535 relerror = logerror

536

537 # if the peak hights are similar

538 if logerror < logthresh: ## SameClass-Condition

539 # if the peaks are close together in distance (20*isi)

540 if relerror < comperr and (positions[peaknum]-classesnearbyx[classesnearby.index(i)])<maxdistance:

541 # keep the same class (or in case of no class assign that class)

542 cl = i

543 comperr = relerror

544 time2 = time.time()

545 time1 = time.time()

546

547 # if a pc class is assigned to the peak

548 if pcclasses[peaknum] != -1:

549 # if the class is kept

550 if cl != 0 :

551 # append this peak to the peaklist for the right class (only keep last 3 peaks)

552 if len(lastofclass[cl]) >= 3:

553 lastofclass[cl].popleft()

554 if len(lastofclassx[cl]) >= 3:

555 lastofclassx[cl].popleft()

556 lastofclass[cl].append(heights[peaknum])

557 lastofclassx[cl].append(positions[peaknum])

558 classes[peaknum] = cl

559 else:

560 # if the class if not the same as any of the existing classes, create new class

561 cl = classamount+1

562 classamount = cl

563 lastofclass[cl] = deque()

564 lastofclassx[cl] = deque()

565 lastofclass[cl].append(heights[peaknum])

566 lastofclassx[cl].append(positions[peaknum])

567 classes[peaknum] = cl

568 classesnearby.append(cl)

569 classesnearbyx.append(positions[peaknum])

570 classesnearbypccl.append(pcclasses[peaknum])

571

572 # if there are more than 12 classes, delete the class that is furthest away in proximity

573 if len(classesnearby) >= 12: #kacke implementiert?

574 minind = classesnearbyx.index(min(classesnearbyx))

575 del lastofclass[classesnearby[minind]]

576 del lastofclassx[classesnearby[minind]]

577 classesnearby.pop(minind)

578 classesnearbyx.pop(minind)

579 classesnearbypccl.pop(minind)

580

581 # add position and class to peaklist

582 try:

583 ind=classesnearby.index(cl)

584 classesnearbyx[ind] = positions[peaknum]

585 except ValueError:

586 classesnearby.append(cl)

587 classesnearbyx.append(positions[peaknum])

588 classesnearbypccl.append(pcclasses[peaknum])

589

590 # if no pc class is assigned to the peak

591 elif glue == True:

592 # if a class is assigned through the peak amp method

593 if cl != 0:

594 # add this class to the peak

595 classes[peaknum] = cl

596 else:

597 # create new class

598 cl = classamount+1

599 classamount = cl

600 lastofclass[cl] = deque()

601 lastofclassx[cl] = deque()

602 lastofclass[cl].append(heights[peaknum])

603 lastofclassx[cl].append(positions[peaknum])

604 classes[peaknum] = cl

605 classesnearby.append(cl)

606 classesnearbyx.append(positions[peaknum])

607 classesnearbypccl.append(pcclasses[peaknum])

608 if len(classesnearby) >= 12: #kacke implementiert?

609 minind = classesnearbyx.index(min(classesnearbyx))

610 del lastofclass[classesnearby[minind]]

611 del lastofclassx[classesnearby[minind]]

612 classesnearby.pop(minind)

613 classesnearbyx.pop(minind)

614 classesnearbypccl.pop(minind)

615 try:

616 ind=classesnearby.index(cl)

617 classesnearbyx[ind] = positions[peaknum]

618 except ValueError:

619 classesnearby.append(cl)

620 classesnearbyx.append(positions[peaknum])

621 classesnearbypccl.append(pcclasses[peaknum])

622 peaklist.lastofclass = lastofclass

623 peaklist.lastofclassx = lastofclassx

624 peaklist.classesnearby = classesnearby

625 peaklist.classesnearbyx = classesnearbyx

626 peaklist.classesnearbypccl = classesnearbypccl

627 peaklist.classlist = classes # np.vectorize(lambda peak: peak.cl, otypes=[object])(peaklist.list)

628 peaklist.classamount = classamount

629 peaks = np.append(peaks,classes[None,:], axis = 0)

630

631 return peaks, peaklist

632

633def discard_wave_pulses(peaks, data):

634 """

635 discards events from a pulse_event list which are unusally wide (wider than a tenth of the inter pulse interval), which indicates a wave-type EOD instead of a pulse type

636 """

637 deleteclasses = []

638 for cl in np.unique(peaks[3]):

639 peaksofclass = peaks[:,peaks[3] == cl]

640 isi = np.diff(peaksofclass[0])

641 isi_mean = np.mean(isi)

642 widepeaks = 0

643 isi_tenth_area = lambda x, isi : np.arange(np.floor(x-0.1*isi),np.ceil(x+0.1*isi),1, dtype=int)

644 for p in peaksofclass.T:

645 data = np.array(data)

646 try:

647 for dp_around in data[isi_tenth_area(p[0],isi_mean)]:

648 if dp_around <= p[1]-p[2]:

649 break

650 except (IndexError,ValueError) as e:

651 pass

652 else:

653 widepeaks+=1

654 if widepeaks > len(peaksofclass)*0.5:

655 deleteclasses.append(cl)

656 for cl in deleteclasses:

657 peaks = peaks[:,peaks[3]!=cl]

658 return peaks

659

660def plot_events_on_data(peaks, data, colors):

661 """

662 plots the detected events onto the data timeseries. If the events are classified, the classes are plotted in different colors and the class -1 (not belonging to a cluster) is plotted in black

663 """

664 plt.plot(range(len(data)),data, color = 'black')

665 if len(peaks)>3:

666 classlist = np.array(peaks[3],dtype=int)

667 if len(peaks) > 4:

668 classlist = np.array(peaks[4],dtype=int)

669 #classlist=labels

670 cmap = plt.get_cmap('jet')

671

672 for cl in np.unique(classlist):

673

674 if cl == -1:

675 color = 'black'

676 else:

677 color = colors[cl]

678 peaksofclass = peaks[:,classlist == cl]

679 plt.plot(peaksofclass[0],peaksofclass[1], '.', color = color, ms =20, label=cl)

680 plt.legend()

681 else:

682 plt.scatter(peaks[0],peaks[1], color = 'red')

683 plt.show()

684 plt.close()

685

686def discard_short_classes(events, minlen):

687 """

688 returns all events despite events which are in classes with less than minlen members

689 """

690

691 u, c = np.unique(events[-1],return_counts=True)

692

693 smallclasses = u[c<minlen]

694 classlist = events[-1]

695

696 delete = np.zeros(len(classlist))

697

698 for cl in smallclasses:

699 delete[classlist == cl] = 1

700 events = events[:,delete != 1]

701

702 return events

703

704def path_leaf(path):

705 ntpath.basename("a/b/c")

706 head, tail = ntpath.split(path)

707 return tail or ntpath.basename(head)

708

709def save_EOD_events_to_npmmp(EOD_Events,eods_len,startblock,datasavepath,mmpname='eods.npmmp'):

710 n_EOD_Events = len(EOD_Events[0])

711 savepath = datasavepath+"/"+mmpname

712 if startblock:

713 eods = np.memmap(savepath,

714 dtype='float64', mode='w+',

715 shape=(4,n_EOD_Events), order = 'F')

716 else:

717 dtypesize = 8#4 #float32 is 32bit = >4< bytes long ---changed to float64 -> 8bit

718 eods = np.memmap(savepath, dtype=

719 'float64', mode='r+', offset = dtypesize*eods_len*4,

720 shape=(4,n_EOD_Events), order = 'F')

721 eods[:] = EOD_Events

722

723

724def create_threshold_array(data,window,threshold):

725 thr_array = np.zeros(data.shape)

726 for i in range(int(len(data)/window)):

727 thr_array[i*window:(i+1)*window] = np.std(data[i*window:(i+1)*window])*4

728 thr_array[thr_array<threshold] = threshold

729 return thr_array

730

731def alignlabels(labels,clusters,old_labels,old_clusters,maxlabel):

732

733 old_labels = old_labels[old_labels!=-1]

734

735 labels_new = -1*np.ones(labels.shape)

736 newclass = maxlabel

737

738 for curlabel, cluster in enumerate(clusters):

739 n = np.linalg.norm(old_clusters-cluster,axis=1)

740

741 if np.min(n) < 0.1:

742 labels_new[labels==curlabel] = old_labels[np.argmin(n)]

743 else:

744 labels_new[labels==curlabel] = newclass

745 newclass = newclass + 1

746

747 return labels_new

748

749

750def analyze_pulse_data(filepath, deltat=10, thresh=0.04, starttime = 0, endtime = 0, cluster_thresh = 0.1, savepath = False,save=False, npmmp = False, plot_eods=False,plot_features=False,plot_steps=False, plot_result=False):

751

752 """

753 analyzes timeseries of a pulse fish EOD recording

754

755 Parameters

756 ----------

757 filepath: WAV-file with the recorded timeseries

758

759 deltat: int, optional

760 time for a single analysisblock (recommended less than a minute, due to principal component clustering on the EOD-waveforms)

761

762 thresh: float, optional

763 minimum threshold for the peakdetection (if computing frequencies recommended a tiny bit lower than the wished threshold, and instead discard the EOD below the wished threshold after computing the frequencies for each EOD.)

764

765 starttime: int or, str of int, optional

766 time into the data from where to start the analysis, seconds.

767

768 endtime: int or str of int, optional

769 time into the data where to end the analysis, seconds, larger than starttime.

770

771 cluster_thresh: float, optional

772 threshold that decides the cluster density of the EOD waveform features.

773

774 savepath = Boolean or str, optional

775 path to where to save results and intermediate result, only needed if save or npmmp is True.

776 string to specify a relative path to the directory where results and intermediate results will bed

777 or False to use preset savepath, which is ~/filepath/

778 or True to specify savepath as input when the script is running

779

780 save: Boolean, optional

781 True to save the results into a npy file at the savepath

782

783 npmmp: Boolean, optional

784 True to save intermediate results into a npmmp at the savepath, only recommended in case of memory overflow

785

786 plot_steps: Boolean, optional

787 True to plot the results of each analysis block

788

789 plot_results: Boolean, optional

790 True to plot the results of the final analysis. Not recommended for long recordings due to %TODO

791

792 plot_eods: Boolean, optional

793 True to plot the EOD waveforms for each analysis block

794

795 plot_features: Boolean, optional

796 True to plot the EOD waveform features for each analysis block

797

798 Returns

799 -------

800 eods: numpy array

801 2D numpy array. first axis: attributes of an EOD (x (datapoints), y (recorded voltage), height (difference from maximum to minimum), class), second axis: EODs in chronological order.

802 """

803

804 # parameters for the analysis

805 thresh = 0.04 # minimal threshold for peakdetection

806 peakwidth = 20 # width of a peak and minimal distance between two EODs

807 # basic parameters for thunderfish.dataloader.DataLoader

808 verbose = 0

809 channel = 0

810 ultimate_threshold = thresh+0.01

811 startblock = 0

812 starttime = int(starttime)

813 endtime = int(endtime)

814 timegiven = False

815 if endtime > starttime>=0:

816 timegiven = True

817 peaks = np.array([])

818 troughs = np.array([])

819 filename = path_leaf(filepath)

820 eods_len = 0

821 if savepath==False:

822 datasavepath = filename[:-4]

823 elif savepath==True:

824 datasavepath = input('With the option npmmp enabled, a numpy memmap will be saved to: ').lower()

825 else: datasavepath=savepath

826

827 if save and (os.path.exists(datasavepath+"/eods8_"+filename[:-3]+"npy") or os.path.exists(datasavepath+"/eods5_"+filename[:-3]+"npy")):

828 print('there already exists an analyzed file, aborting. Change the code if you don\'t want to abort')

829 quit()

830 if npmmp:

831 #proceed = input('With the option npmmp enabled, a numpy memmap will be saved to ' + datasavepath + '. continue? [y/n] ').lower()

832 proceed = 'y'

833 if proceed != 'y':

834 quit()

835 # starting analysis

836 with DataLoader(filepath, deltat, 0.0, verbose) as data:

837

838 rate = data.rate

839

840 # selected time interval

841 if timegiven == True:

842 parttime1 = starttime*rate

843 parttime2 = endtime*rate

844 data = data[parttime1:parttime2,channel]

845

846 #split data into blocks

847 nblock = int(deltat*rate)

848 if len(data)%nblock != 0:

849 blockamount = len(data)//nblock + 1

850 else:

851 blockamount = len(data)//nblock

852 #fish = ProgressFish(total = blockamount)

853

854 pca_cur = 0

855 progress = 0

856

857 for idx in range(0, blockamount):

858 print('BLOCK %i/%i'%(idx+1,blockamount))

859

860 blockdata = data[idx*nblock:(idx+1)*nblock,channel]

861 if progress < (idx*100 //blockamount):

862 progress = (idx*100)//blockamount

863 progressstr = ' Filestatus: '

864

865 # fish.animate(amount = idx, dexextra = progressstr)

866 # delete peaks under absolute threshold

867

868

869 #thresh_array = create_threshold_array(blockdata,30000,thresh)

870 pk, tr = detect_peaks(blockdata, thresh)

871 troughs = tr

872

873 if len(pk) > 3:

874

875 peaks = makeeventlist(pk,tr,blockdata,peakwidth)

876 peakindices, peakx, peakh = discardnearbyevents(peaks[0],peaks[1],peakwidth)

877 peaks = peaks[:,peakindices]

878

879 if len(peaks) > 0:

880

881 #if idx > startblock:

882 # # adding a new block as copy of old list, only difference is peak indexing as it refers to last block

883 # peaklist = connect_blocks(peaklist)

884 #else:

885 # peaklist = Peaklist([])

886

887 aligned_snips, snip_heights = cut_snippets(blockdata,peaks[0], 30, int_met = "cubic", int_fact = 10,max_offset = 20)

888

889 pols = chebyshev(aligned_snips)

890

891 feats = np.zeros((pols.shape[0],pols.shape[1]+1))

892 feats[:,:6] = pols

893 feats[:,-1] = snip_heights*0.1

894 #pcs, pca_cur = pc(aligned_snips) #pc_refactor(aligned_snips)

895

896 minpeaks = 3 if deltat < 2 else 10

897

898 labels, clusters = cluster_events(feats, peaks, cluster_thresh, minpeaks, False, method = 'DBSCAN')

899 peaks = np.append(peaks,[labels], axis = 0)

900

901 if idx > startblock:

902 # instead of the peaklist I would have to add the previous cluster means

903 # alignclusterlabels(labels, peaklist, peaks,data=blockdata)

904 peaks[-1] = alignlabels(labels,clusters,old_labels,old_clusters,maxlabel)

905

906 old_labels = np.unique(peaks[-1])

907 old_clusters = clusters

908 #I would want peaks updated here to have the right pc classes as well..

909

910 #peaks, peaklist = ampwalkclassify3_refactor(peaks, peaklist) # classification by amplitude

911

912 minlen = 5

913 peaks = discard_short_classes(peaks, minlen)

914

915 if len(peaks[0]) > 0:

916 peaks = discard_wave_pulses(peaks, blockdata)

917 # delete peaks under absolute threshold

918 #thresh_array = create_threshold_array(blockdata,30000)

919 #peaks = peaks[:,peaks[1]>thresh_array[list(map(int,peaks[0]))]]

920

921 cmap = plt.get_cmap('jet')

922 colors = cmap(np.linspace(0, 1.0, 10))

923

924 if plot_steps == True:

925 plot_events_on_data(peaks, blockdata, colors)

926 pass

927

928 for lab in np.unique(labels):

929

930 if lab == -1:

931 c = 'k'

932 z=-1

933 else:

934 c=colors[lab]

935 z=1

936 if plot_eods==True:

937 plt.plot(range(aligned_snips.shape[1]),np.transpose(aligned_snips[labels == lab]),color=c,zorder=z,label=lab)

938

939 if plot_eods==True:

940 plt.title('Detected and classified EODs')

941 plt.xlabel('time [ms]')

942 plt.ylabel('signal (normalized)')

943

944 phandles, plabels = plt.gca().get_legend_handles_labels()

945 by_label = dict(zip(plabels, phandles))

946 plt.legend(by_label.values(), by_label.keys())

947 plt.show()

948

949 for lab in np.unique(labels):

950

951 if lab == -1:

952 c = 'k'

953 z = -1

954 else:

955 c = colors[lab]

956 z=1

957

958 if plot_features==True:

959 plt.plot(np.squeeze(np.transpose(feats[labels == lab])),color=c,zorder=z,label=lab)

960

961 if plot_features==True:

962 plt.title('EOD Features')

963 plt.xlabel('feature [#]')

964 plt.ylabel('value [a.u.]')

965

966 phandles, plabels = plt.gca().get_legend_handles_labels()

967 by_label = dict(zip(plabels, phandles))

968 plt.legend(by_label.values(), by_label.keys())

969 plt.show()

970

971 #peaklist.len = nblock

972 worldpeaks = np.copy(peaks)

973 worldpeaks[0] = worldpeaks[0] + (idx*nblock)

974 # delete the classification that only considers wave shape.

975 #thisblock_eods = np.delete(worldpeaks,3,0)

976 thisblock_eods = worldpeaks

977

978 if idx == startblock:

979 maxlabel = np.max(peaks[-1]) + 1

980 else:

981 maxlabel = np.max([maxlabel, (np.max(peaks[-1]) + 1)])

982

983 if npmmp:

984 if idx == startblock:

985 if not os.path.exists(datasavepath):

986 os.makedirs(datasavepath)

987 mmpname = "eods_"+filename[:-3]+"npmmp"

988 # save the peaks of the current buffered part to a numpy-memmap on the disk

989 save_EOD_events_to_npmmp(thisblock_eods,eods_len,idx==startblock,datasavepath,mmpname)

990 eods_len += len(thisblock_eods[0])

991 else:

992 if idx > 0:

993 all_eods = np.concatenate((all_eods,thisblock_eods),axis = 1)

994 else:

995 all_eods = thisblock_eods

996 if plot_steps == True:

997 print('FINAL RESULTS')

998 plot_events_on_data(all_eods, data[:,channel], colors)

999 #plot_events_on_data(all_eods,data)

1000 print('returnes analyzed EODS. Calculate frequencies using all of these but discard the data from the EODS within the lowest few percent of amplitude')

1001

1002 if npmmp:

1003 all_eods = np.memmap(datasavepath+'/'+mmpname, dtype='float64', mode='r+', shape=(4,eods_len), order = 'F')

1004 if save == 1:

1005 path = filename[:-4]+"/"

1006 if not os.path.exists(path):

1007 os.makedirs(path)

1008 if eods_len > 0:

1009 np.save(datasavepath+"/eods8_"+filename[:-3]+"npy", all_eods)

1010 print('Saved!')

1011 else:

1012 print('not saved')

1013 return all_eods

1014

1015

1016def main():

1017

1018 eods = analyze_pulse_data(sys.argv[1], save=True, npmmp=True)

1019 print(eods)

1020

1021if __name__ == '__main__':

1022 main()

1023