Module thunderfish.thunderlogger
thunderlogger
Detect segments of interest in large data files and extract EOD waveforms.
Expand source code
"""# thunderlogger
Detect segments of interest in large data files and extract EOD waveforms.
"""
import sys
import os
import glob
import argparse
import traceback
import datetime as dt
import numpy as np
import pandas as pd
from scipy.signal import butter, lfilter
from types import SimpleNamespace
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from thunderlab.configfile import ConfigFile
from thunderlab.dataloader import DataLoader
from thunderlab.tabledata import TableData, write_table_args
from thunderlab.eventdetection import hist_threshold
from .version import __version__, __year__
from .eodanalysis import save_eod_waveform, save_wave_fish, save_pulse_fish
from .eodanalysis import save_wave_spectrum, save_pulse_spectrum, save_pulse_peaks
from .eodanalysis import load_eod_waveform, load_wave_fish, load_pulse_fish
from .eodanalysis import load_wave_spectrum, load_pulse_spectrum, load_pulse_peaks
from .eodanalysis import wave_similarity, pulse_similarity
from .thunderfish import configuration, save_configuration
from .thunderfish import detect_eods, remove_eod_files
def add_thunderlogger_config(cfg, detection_thresh='auto',
default_thresh=0.002, thresh_fac=3.0,
thresh_nbins=500):
"""Add parameters needed for by thunderlogger.
Parameters
----------
cfg: ConfigFile
The configuration.
detection_thresh: float or 'auto'
Only data segments with standard deviation larger than this value
are analyzed for EODs. If set to 'auto' a threshold is computed
from all the data segments of a recording channel.
default_thresh: float
Threshold that is used if "detection_thresh" is set to "auto" and
no data are available.
thresh_fac: float
The threshold for analysing data segments is set to the mean of the
most-likely standard deviations plus this factor times the corresponding
standard deviation.
thresh_nbins: int
The number of bins used to compute a histogram of the standard
deviations of the data segments, from which the mean and standard
deviation are estimated for automatically computing a threshold.
"""
cfg.add_section('Thunderlogger:')
cfg.add('detectionThreshold', detection_thresh, '', 'Only analyse data segements with a standard deviation that is larger than this threshold. If set to "auto" compute threshold from all the standard deviations of a recording channel.')
cfg.add('detectionThresholdDefault', default_thresh, '', 'Threshold that is used if "detectionThreshold" is set to "auto" and no data are available.')
cfg.add('detectionThresholdStdFac', thresh_fac, '', 'An automatically computed threshold for analysing data segments is set to the mean of the most-likely standard deviations plus this factor times the corresponding standard deviation.')
cfg.add('detectionThresholdNBins', thresh_nbins, '', 'The number of bins used to compute a histogram of the standard deviations of the data segments, from which the mean and standard deviation are estimated for automatically computing a threshold.')
cfg.add('startTime', 'none', '', 'Provide a start time for the recordings overwriting the meta data of the data files (YYYY-mm-ddTHH:MM:SS format).')
def extract_eods(files, thresholds, stds_only, cfg, verbose, plot_level,
thresh=0.002, max_deltaf=1.0, max_dist=0.00005,
deltat_max=dt.timedelta(minutes=5), start_time=None):
t0s = []
stds = None
supra_thresh = None
wave_fishes = None
pulse_fishes = None
if start_time is None:
# XXX we should read this from the meta data:
filename = os.path.splitext(os.path.basename(files[0]))[0]
times = filename.split('-')[1]
start_time = dt.datetime.strptime(times, '%Y%m%dT%H%M%S')
toffs = start_time
t1 = start_time
unit = None
for file in files:
try:
with DataLoader(file) as sf:
# analyze:
sys.stdout.write(file + ': ')
unit = sf.unit
if max_dist < 1.1/sf.samplerate:
max_dist = 1.1/sf.samplerate
window_size = cfg.value('windowSize')
ndata = int(window_size * sf.samplerate)
step = ndata//2
b, a = butter(1, 10.0, 'hp', fs=sf.samplerate, output='ba')
if stds is None:
stds = [[] for c in range(sf.channels)]
supra_thresh = [[] for c in range(sf.channels)]
if wave_fishes is None:
wave_fishes = [[] for c in range(sf.channels)]
if pulse_fishes is None:
pulse_fishes = [[] for c in range(sf.channels)]
for k, data in enumerate(sf.blocks(ndata, step)):
sys.stdout.write('.')
sys.stdout.flush()
t0 = toffs + dt.timedelta(seconds=k*step/sf.samplerate)
t1 = t0 + dt.timedelta(seconds=ndata/sf.samplerate)
t0s.append(t0)
for channel in range(sf.channels):
if thresholds:
thresh = thresholds[channel]
fdata = lfilter(b, a, data[:,channel] - np.mean(data[:ndata//20,channel]))
sd = np.std(fdata)
stds[channel].append(sd)
supra_thresh[channel].append(1 if sd > thresh else 0)
if stds_only:
continue
if sd > thresh:
# clipping:
min_clip = cfg.value('minClipAmplitude')
if min_clip == 0.0:
min_clip = cfg.value('minDataAmplitude')
max_clip = cfg.value('maxClipAmplitude')
if max_clip == 0.0:
max_clip = cfg.value('maxDataAmplitude')
name = file
# detect EODs in the data:
_, _, _, eod_props, mean_eods, spec_data, peak_data, _, _, _ = \
detect_eods(data[:,channel], sf.samplerate,
min_clip, max_clip,
name, verbose, plot_level, cfg)
first_fish = True
for props, eod, spec, peaks in zip(eod_props, mean_eods,
spec_data, peak_data):
fish = None
fish_deltaf = 100000.0
if props['type'] == 'wave':
for wfish in wave_fishes[channel]:
deltaf = np.abs(wfish.props['EODf'] - props['EODf'])
if deltaf < fish_deltaf:
fish_deltaf = deltaf
fish = wfish
if fish_deltaf > max_deltaf:
fish = None
peaks = None
else:
fish_dist = 10000.0
for pfish in pulse_fishes[channel]:
ddist = np.abs(pfish.props['P1-P1-dist'] -
props['P1-P1-dist'])
if ddist < fish_dist:
fish_dist = ddist
fish_deltaf = np.abs(pfish.props['EODf'] -
props['EODf'])
fish = pfish
if fish_dist > max_dist or \
fish_deltaf > max_deltaf:
fish = None
spec = None
if fish is not None and \
t0 - fish.times[-1][1] < deltat_max:
if fish.times[-1][1] >= t0 and \
np.abs(fish.times[-1][2] - props['EODf']) < 0.5 and \
fish.times[-1][3] == channel and \
fish.times[-1][4] == file:
fish.times[-1][1] = t1
else:
fish.times.append([t0, t1, props['EODf'], channel, file])
if props['p-p-amplitude'] > fish.props['p-p-amplitude']:
fish.props = props
fish.waveform = eod
fish.spec = spec
fish.peaks = peaks
else:
new_fish = SimpleNamespace(props=props,
waveform=eod,
spec=spec,
peaks=peaks,
times=[[t0, t1, props['EODf'], channel, file]])
if props['type'] == 'pulse':
pulse_fishes[channel].append(new_fish)
else:
wave_fishes[channel].append(new_fish)
if first_fish:
sys.stdout.write('\n ')
first_fish = False
sys.stdout.write('%6.1fHz %5s-fish @ %s\n ' %
(props['EODf'], props['type'],
t0.strftime('%Y-%m-%dT%H:%M:%S')))
toffs += dt.timedelta(seconds=len(sf)/sf.samplerate)
sys.stdout.write('\n')
sys.stdout.flush()
except EOFError as error:
# XXX we need to update toffs by means of the metadata of the next file!
sys.stdout.write(file + ': ' + str(error) + '\n')
if pulse_fishes is not None and len(pulse_fishes) > 0:
pulse_fishes = [[pulse_fishes[c][i] for i in
np.argsort([fish.props['EODf'] for fish in pulse_fishes[c]])]
for c in range(len(pulse_fishes))]
if wave_fishes is not None and len(wave_fishes) > 0:
wave_fishes = [[wave_fishes[c][i] for i in
np.argsort([fish.props['EODf'] for fish in wave_fishes[c]])]
for c in range(len(wave_fishes))]
return pulse_fishes, wave_fishes, start_time, toffs, t0s, stds, supra_thresh, unit
def save_times(times, idx, output_basename, name, **kwargs):
td = TableData()
td.append('index', '', '%d', [idx] * len(times))
td.append('tstart', '', '%s',
[t[0].strftime('%Y-%m-%dT%H:%M:%S') for t in times])
td.append('tend', '', '%s',
[t[1].strftime('%Y-%m-%dT%H:%M:%S') for t in times])
if len(times[0]) > 2:
td.append('EODf', 'Hz', '%.1f', [t[2] for t in times])
td.append('device', '', '%s',
[name for t in times])
if len(times[0]) > 2:
td.append('channel', '', '%d', [t[3] for t in times])
td.append('file', '', '%s', [t[4] for t in times])
fp = output_basename + '-times'
if idx is not None:
fp += '-%d' % idx
td.write(fp, **kwargs)
def load_times(file_path):
data = TableData(file_path).data_frame()
data['index'] = data['index'].astype(int)
data['tstart'] = pd.to_datetime(data['tstart'])
data['tstart'] = pd.Series(data['tstart'].dt.to_pydatetime(), dtype=object)
data['tend'] = pd.to_datetime(data['tend'])
data['tend'] = pd.Series(data['tend'].dt.to_pydatetime(), dtype=object)
if 'channel' in data:
data['channel'] = data['channel'].astype(int)
return data
def save_power(times, stds, supra_thresh, unit, output_basename, **kwargs):
td = TableData()
td.append('index', '', '%d', list(range(len(times))))
td.append('time', '', '%s',
[t.strftime('%Y-%m-%dT%H:%M:%S') for t in times])
for c, (std, thresh) in enumerate(zip(stds, supra_thresh)):
td.append('channel%d'%c, unit, '%g', std)
td.append('thresh%d'%c, '', '%d', thresh)
fp = output_basename + '-stdevs'
td.write(fp, **kwargs)
def load_power(file_path, start_time=None):
base = os.path.basename(file_path)
device = base[0:base.find('-stdevs')]
data = TableData(file_path)
times = []
for row in range(data.rows()):
times.append(dt.datetime.strptime(data[row,'time'],
'%Y-%m-%dT%H:%M:%S'))
if start_time is not None:
deltat = start_time - times[0]
for k in range(len(times)):
times[k] += deltat
channels = (data.columns()-2)//2
stds = np.zeros((len(times), channels))
supra_thresh = np.zeros((len(times), channels), dtype=int)
for c in range(channels):
stds[:,c] = data[:,'channel%d'%c]
supra_thresh[:,c] = data[:,'thresh%d'%c]
return np.array(times), stds, supra_thresh, device
def save_data(output_folder, name, pulse_fishes, wave_fishes,
tstart, tend, t0s, stds, supra_thresh, unit, cfg):
output_basename = os.path.join(output_folder, name)
if pulse_fishes is not None:
for c in range(len(pulse_fishes)):
out_path = output_basename + '-c%d' % c
idx = 0
# pulse fish:
pulse_props = []
for fish in pulse_fishes[c]:
save_eod_waveform(fish.waveform, unit, idx, out_path,
**write_table_args(cfg))
if fish.peaks is not None:
save_pulse_peaks(fish.peaks, unit, idx, out_path,
**write_table_args(cfg))
save_times(fish.times, idx, out_path, name,
**write_table_args(cfg))
pulse_props.append(fish.props)
pulse_props[-1]['index'] = idx
idx += 1
save_pulse_fish(pulse_props, unit, out_path,
**write_table_args(cfg))
# wave fish:
wave_props = []
if wave_fishes is not None:
for fish in wave_fishes[c]:
save_eod_waveform(fish.waveform, unit, idx, out_path,
**write_table_args(cfg))
if fish.spec is not None:
save_wave_spectrum(fish.spec, unit, idx, out_path,
**write_table_args(cfg))
save_times(fish.times, idx, out_path, name,
**write_table_args(cfg))
wave_props.append(fish.props)
wave_props[-1]['index'] = idx
idx += 1
save_wave_fish(wave_props, unit, out_path,
**write_table_args(cfg))
# recording time window:
save_times([(tstart, tend)], None, output_basename, name,
**write_table_args(cfg))
# signal power:
if stds is not None and len(stds) > 0:
save_power(t0s, stds, supra_thresh, unit, output_basename,
**write_table_args(cfg))
def load_data(files, start_time=None):
all_files = []
for file in files:
if os.path.isdir(file):
all_files.extend(glob.glob(os.path.join(file, '*fish*')))
else:
all_files.append(file)
pulse_fishes = []
wave_fishes = []
channels = set()
for file in all_files:
if 'pulse' in os.path.basename(file):
pulse_props = load_pulse_fish(file)
base_file, ext = os.path.splitext(file)
base_file = base_file[:base_file.rfind('pulse')]
count = 0
for props in pulse_props:
idx = props['index']
waveform, unit = \
load_eod_waveform(base_file + 'eodwaveform-%d'%idx + ext)
times = load_times(base_file + 'times-%d'%idx + ext)
times['index'] = idx
fish = SimpleNamespace(props=props,
waveform=waveform,
unit=unit,
times=times)
for i, t in times.iterrows():
channels.add((t['device'], t['channel']))
try:
peaks, unit = \
load_pulse_peaks(base_file + 'pulsepeaks-%d'%idx + ext)
fish.peaks = peaks
except FileNotFoundError:
fish.peaks = None
pulse_fishes.append(fish)
count += 1
#if count > 300: # XXX REMOVE
# break
elif 'wave' in os.path.basename(file):
wave_props = load_wave_fish(file)
base_file, ext = os.path.splitext(file)
base_file = base_file[:base_file.rfind('wave')]
count = 0
for props in wave_props:
idx = props['index']
waveform, unit = \
load_eod_waveform(base_file + 'eodwaveform-%d'%idx + ext)
times = load_times(base_file + 'times-%d'%idx + ext)
times['index'] = idx
fish = SimpleNamespace(props=props,
waveform=waveform,
unit=unit,
times=times)
for i, t in times.iterrows():
channels.add((t['device'], t['channel']))
try:
spec, unit = \
load_wave_spectrum(base_file + 'wavespectrum-%d'%idx + ext)
fish.spec = spec
except FileNotFoundError:
fish.spec = None
wave_fishes.append(fish)
count += 1
#if count > 300: # XXX REMOVE
# break
base_file = base_file[:base_file.rfind('-c')+1]
times = load_times(base_file + 'times' + ext)
tstart = times.tstart[0]
tend = times.tend[0]
if start_time is not None:
deltat = start_time - tstart
tstart += deltat
tend += deltat
for fish in pulse_fishes + wave_fishes:
fish.times.tstart += deltat
fish.times.tend += deltat
return pulse_fishes, wave_fishes, tstart, tend, sorted(channels)
def plot_signal_power(times, stds, supra_threshs, devices, thresholds,
title, output_folder):
plt.rcParams['axes.xmargin'] = 0
plt.rcParams['axes.ymargin'] = 0
n = 0
for s in stds:
n += s.shape[1]
h = n*4.0
t = 0.8 if title else 0.1
fig, axs = plt.subplots(n, 1, figsize=(16/2.54, h/2.54),
sharex=True, sharey=True)
fig.subplots_adjust(left=0.1, right=0.99, top=1-t/h, bottom=1.6/h,
hspace=0)
i = 0
for time, cstds, threshs, device in zip(times, stds, supra_threshs, devices):
for c, (std, thresh) in enumerate(zip(cstds.T, threshs.T)):
ax = axs[i]
ax.plot(time, std)
if thresholds:
ax.axhline(thresholds[i], color='k', lw=0.5)
elif len(std[thresh<1]) > 0:
thresh = np.max(std[thresh<1])
ax.axhline(thresh, color='k', lw=0.5)
#stdm = np.ma.masked_where(thresh < 1, std)
#ax.plot(time, stdm)
ax.set_yscale('log')
#ax.set_ylim(bottom=0)
ax.set_ylabel('%s-c%d' % (device, c))
i += 1
if title:
axs[0].set_title(title)
axs[-1].xaxis.set_major_formatter(mdates.DateFormatter('%b %d %Hh'))
plt.setp(axs[-1].get_xticklabels(), ha='right',
rotation=30, rotation_mode='anchor')
fig.savefig(os.path.join(output_folder, 'signalpowers.pdf'))
plt.show()
def merge_fish(pulse_fishes, wave_fishes,
max_noise=0.1, max_deltaf=10.0, max_dist=0.0002, max_rms=0.3):
pulse_eods = []
for i in np.argsort([fish.props['P2-P1-dist'] for fish in pulse_fishes]):
if pulse_fishes[i].props['noise'] > max_noise:
continue
if pulse_fishes[i].props['P2-P1-dist'] <= 0.0:
continue
if len(pulse_eods) > 0 and \
np.abs(pulse_fishes[i].props['P2-P1-dist'] - pulse_eods[-1].props['P2-P1-dist']) <= max_dist and \
pulse_similarity(pulse_fishes[i].waveform, pulse_eods[-1].waveform, 4) < max_rms:
pulse_eods[-1].times.append(pulse_fishes[i].times)
if not hasattr(pulse_eods[-1], 'othereods'):
pulse_eods[-1].othereods = [pulse_eods[-1].waveform]
pulse_eods[-1].othereods.append(pulse_fishes[i].waveform)
if pulse_fishes[i].props['p-p-amplitude'] > pulse_eods[-1].props['p-p-amplitude']:
pulse_eods[-1].waveform = pulse_fishes[i].waveform
pulse_eods[-1].props = pulse_fishes[i].props
continue
pulse_eods.append(pulse_fishes[i])
pulse_eods = [pulse_eods[i] for i in np.argsort([fish.props['EODf'] for fish in pulse_eods])]
wave_eods = []
for i in np.argsort([fish.props['EODf'] for fish in wave_fishes]):
if wave_fishes[i].props['noise'] > max_noise:
continue
if len(wave_eods) > 0 and \
np.abs(wave_fishes[i].props['EODf'] - wave_eods[-1].props['EODf']) < max_deltaf and \
wave_similarity(wave_fishes[i].waveform, wave_eods[-1].waveform,
wave_fishes[i].props['EODf'], wave_eods[-1].props['EODf']) < max_rms:
wave_eods[-1].times.append(wave_fishes[i].times)
if not hasattr(wave_eods[-1], 'othereods'):
wave_eods[-1].othereods = []
wave_eods[-1].othereods.append(wave_fishes[i].waveform)
if wave_fishes[i].props['p-p-amplitude'] > wave_eods[-1].props['p-p-amplitude']:
wave_eods[-1].waveform = wave_fishes[i].waveform
wave_eods[-1].props = wave_fishes[i].props
continue
wave_eods.append(wave_fishes[i])
return pulse_eods, wave_eods
def plot_eod_occurances(pulse_fishes, wave_fishes, tstart, tend,
channels, output_folder):
channel_colors = ['#2060A7', '#40A787', '#478010', '#F0D730',
'#C02717', '#873770', '#008797', '#007030',
'#AAB71B', '#F78017', '#D03050', '#53379B']
plt.rcParams['axes.facecolor'] = 'none'
plt.rcParams['axes.xmargin'] = 0
plt.rcParams['axes.ymargin'] = 0.05
plt.rcParams['font.family'] = 'sans-serif'
n = len(pulse_fishes) + len(wave_fishes)
h = n*2.5 + 2.5 + 0.3
fig, axs = plt.subplots(n, 2, squeeze=False, figsize=(16/2.54, h/2.54),
gridspec_kw=dict(width_ratios=(1,2)))
fig.subplots_adjust(left=0.02, right=0.97, top=1-0.3/h, bottom=2.2/h,
hspace=0.2)
pi = 0
prev_xscale = 0.0
for ax, fish in zip(axs, pulse_fishes + wave_fishes):
# EOD waveform:
ax[0].spines['left'].set_visible(False)
ax[0].spines['right'].set_visible(False)
ax[0].spines['top'].set_visible(False)
ax[0].spines['bottom'].set_visible(False)
ax[0].xaxis.set_visible(False)
ax[0].yaxis.set_visible(False)
time = 1000.0 * fish.waveform[:,0]
#ax[0].plot([time[0], time[-1]], [0.0, 0.0],
# zorder=-10, lw=1, color='#AAAAAA')
if hasattr(fish, 'othereods'):
for eod in fish.othereods:
ax[0].plot(1000.0*eod[:,0], eod[:,1],
zorder=5, lw=1, color='#AAAAAA')
ax[0].plot(time, fish.waveform[:,1],
zorder=10, lw=2, color='#C02717')
ax[0].text(0.0, 1.0, '%.0f\u2009Hz' % fish.props['EODf'],
transform=ax[0].transAxes, va='baseline', zorder=20)
if fish.props['type'] == 'wave':
lim = 750.0/fish.props['EODf']
ax[0].set_xlim([-lim, +lim])
tmax = lim
else:
ax[0].set_xlim(time[0], time[-1])
tmax = time[-1]
xscale = 1.0
if tmax < 1.0:
xscale = 0.5
elif tmax > 10.0:
xscale = 5.0
ymin = np.min(fish.waveform[:,1])
ymax = np.max(fish.waveform[:,1])
ax[0].plot((tmax-xscale, tmax), (ymin - 0.04*(ymax-ymin),)*2,
'k', lw=3, clip_on=False, zorder=0)
if ax[0] is axs[-1,0] or xscale != prev_xscale:
if xscale < 1.0:
ax[0].text(tmax-0.5*xscale, ymin - 0.1*(ymax-ymin),
'%.0f\u2009\u00b5s' % (1000.0*xscale),
ha='center', va='top', zorder=0)
else:
ax[0].text(tmax-0.5*xscale, ymin - 0.1*(ymax-ymin),
'%.0f\u2009ms' % xscale,
ha='center', va='top', zorder=0)
prev_xscale = xscale
# time bar:
ax[1].xaxis.set_major_formatter(mdates.DateFormatter('%b %d %Hh'))
min_eodf = 10000
max_eodf = 0
for index, time in fish.times.iterrows():
if time['EODf'] < min_eodf:
min_eodf = time['EODf']
if time['EODf'] > max_eodf:
max_eodf = time['EODf']
ax[1].plot([time['tstart'], time['tend']],
[time['EODf'], time['EODf']],
lw=5, color=channel_colors[channels.index((time['device'], time['channel']))%len(channel_colors)])
if max_eodf > min_eodf + 10.0:
ax[1].text(0.0, 1.0, '%.0f \u2013 %.0f\u2009Hz' % (min_eodf, max_eodf),
transform=ax[1].transAxes, va='baseline', zorder=20)
ax[1].set_xlim(tstart, tend)
ax[1].spines['left'].set_visible(False)
ax[1].spines['right'].set_visible(False)
ax[1].yaxis.set_visible(False)
ax[1].spines['top'].set_visible(False)
if ax[1] is not axs[-1,1]:
ax[1].spines['bottom'].set_visible(False)
ax[1].xaxis.set_visible(False)
else:
plt.setp(ax[1].get_xticklabels(), ha='right',
rotation=30, rotation_mode='anchor')
fig.savefig(os.path.join(output_folder, 'eodwaveforms.pdf'))
def main(cargs=None):
# config file name:
cfgfile = __package__ + '.cfg'
# command line arguments:
if cargs is None:
cargs = sys.argv[1:]
parser = argparse.ArgumentParser(add_help=False,
description='Extract EOD waveforms of weakly electric fish from logger data.',
epilog='version %s by Benda-Lab (2015-%s)' % (__version__, __year__))
parser.add_argument('-h', '--help', action='store_true',
help='show this help message and exit')
parser.add_argument('--version', action='version', version=__version__)
parser.add_argument('-v', action='count', dest='verbose', default=0,
help='verbosity level. Increase by specifying -v multiple times, or like -vvv')
parser.add_argument('-V', action='count', dest='plot_level', default=0,
help='level for debugging plots. Increase by specifying -V multiple times, or like -VVV')
parser.add_argument('-c', dest='save_config', action='store_true',
help='save configuration to file {0} after reading all configuration files'.format(cfgfile))
parser.add_argument('-f', dest='format', default='auto', type=str,
choices=TableData.formats + ['py'],
help='file format used for saving analysis results, defaults to the format specified in the configuration file or "csv"')
parser.add_argument('-p', dest='save_plot', action='store_true',
help='plot analyzed data')
parser.add_argument('-m', dest='merge', action='store_true',
help='merge similar EODs before plotting')
parser.add_argument('-s', dest='stds_only', action='store_true',
help='analyze or plot standard deviation of data only')
parser.add_argument('-o', dest='outpath', default='.', type=str,
help='path where to store results and figures (defaults to current working directory)')
parser.add_argument('-k', dest='keep_path', action='store_true',
help='keep path of input file when saving analysis files, i.e. append path of input file to OUTPATH')
parser.add_argument('-n', dest='name', default='', type=str,
help='base name of all output files or title of plots')
parser.add_argument('file', nargs='*', default='', type=str,
help='name of a file with time series data of an EOD recording, may include wildcards')
args = parser.parse_args(cargs)
# help:
if args.help:
parser.print_help()
print('')
print('examples:')
print('- write configuration file:')
print(' > thunderlogger -c')
print('- compute standard deviations of data segments:')
print(' > thunderlogger -o results -k -n logger1 -s river1/logger1-*.wav')
print('- plot the standard deviations and the computed threshold:')
print(' > thunderlogger -o plots -k -s -p -n river1 results/river1/logger1-stdevs.*')
print(' you may adapt the settings in the configureation file "thunderfish.cfg"')
print('- extract EODs from the data:')
print(' > thunderlogger -o results -k river1/logger1-*.wav')
print('- merge and plot extracted EODs:')
print(' > thunderlogger -o plots -k -p -m results/river*/*fish.*')
parser.exit()
# set verbosity level from command line:
verbose = args.verbose
plot_level = args.plot_level
if verbose < plot_level:
verbose = plot_level
# expand wildcard patterns:
files = []
if os.name == 'nt':
for fn in args.file:
files.extend(glob.glob(fn))
else:
files = args.file
if args.save_config:
# save configuration:
file_name = files[0] if len(files) else ''
cfg = configuration()
add_thunderlogger_config(cfg)
cfg.load_files(cfgfile, file_name, 4, verbose)
save_configuration(cfg, cfgfile)
exit()
elif len(files) == 0:
parser.error('you need to specify at least one file for the analysis')
# configure:
cfg = configuration()
add_thunderlogger_config(cfg)
cfg.load_files(cfgfile, files[0], 4, verbose-1)
if args.format != 'auto':
cfg.set('fileFormat', args.format)
# create output folder for data and plots:
output_folder = args.outpath
if args.keep_path:
output_folder = os.path.join(output_folder,
os.path.split(files[0])[0])
if not os.path.exists(output_folder):
if verbose > 1:
print('mkdir %s' % output_folder)
os.makedirs(output_folder)
# start time:
start_time = None
if cfg.value('startTime') != 'none':
cfg.value('startTime')
start_time = dt.datetime.strptime(cfg.value('startTime'), '%Y-%m-%dT%H:%M:%S')
# analyze and save data:
plt.ioff()
if not args.save_plot:
# assemble device name and output file:
if len(args.name) > 0:
device_name = args.name
else:
device_name = os.path.basename(files[0])
device_name = device_name[:device_name.find('-')]
output_basename = os.path.join(output_folder, device_name)
# compute thresholds:
thresholds = []
power_file = output_basename + '-stdevs.csv'
thresh = cfg.value('detectionThreshold')
if thresh == 'auto':
thresh = cfg.value('detectionThresholdDefault')
if os.path.isfile(power_file):
_, powers, _, _ = load_power(power_file)
for std in powers.T:
ss, cc = hist_threshold(std,
thresh_fac=cfg.value('detectionThresholdStdFac'),
nbins=cfg.value('detectionThresholdNBins'))
thresholds.append(cc + ss)
else:
thresh = float(thresh)
pulse_fishes, wave_fishes, tstart, tend, t0s, \
stds, supra_thresh, unit = \
extract_eods(files, thresholds,
args.stds_only, cfg, verbose, plot_level,
thresh=thresh, start_time=start_time)
remove_eod_files(output_basename, verbose, cfg)
save_data(output_folder, device_name, pulse_fishes, wave_fishes,
tstart, tend, t0s, stds, supra_thresh, unit, cfg)
sys.stdout.write('DONE!\n')
if args.stds_only:
sys.stdout.write('Signal powers saved in %s\n' % (output_folder+'-stdevs.csv'))
sys.stdout.write('To generate plots run thunderlogger with the -p and -s flags on the generated file:\n')
sys.stdout.write('> thunderlogger -p -s %s\n' % (output_folder+'-stdevs.csv'))
else:
sys.stdout.write('Extracted EOD waveforms saved in %s\n' % output_folder)
sys.stdout.write('To generate plots run thunderlogger with the -p flag on the generated files:\n')
sys.stdout.write('> thunderlogger -p -o %s%s %s\n' %
(args.outpath, ' -k' if args.keep_path else '',
output_basename))
else:
if args.stds_only:
times = []
stds = []
supra_threshs = []
devices = []
thresholds = []
for file in files:
t, p, s, d = load_power(file, start_time)
times.append(t)
stds.append(p)
supra_threshs.append(s)
devices.append(d)
# compute detection thresholds:
for std in p.T:
if cfg.value('detectionThreshold') == 'auto':
ss, cc = hist_threshold(std,
thresh_fac=cfg.value('detectionThresholdStdFac'),
nbins=cfg.value('detectionThresholdNBins'))
thresholds.append(cc + ss)
else:
thresholds.append(float(cfg.value('detectionThreshold')))
plot_signal_power(times, stds, supra_threshs, devices, thresholds,
args.name, output_folder)
else:
pulse_fishes, wave_fishes, tstart, tend, channels = \
load_data(files, start_time)
if args.merge:
pulse_fishes, wave_fishes = merge_fish(pulse_fishes, wave_fishes)
plot_eod_occurances(pulse_fishes, wave_fishes, tstart, tend,
channels, output_folder)
if __name__ == '__main__':
main()
Functions
def add_thunderlogger_config(cfg, detection_thresh='auto', default_thresh=0.002, thresh_fac=3.0, thresh_nbins=500)
-
Add parameters needed for by thunderlogger.
Parameters
cfg
:ConfigFile
- The configuration.
detection_thresh
:float
or'auto'
- Only data segments with standard deviation larger than this value are analyzed for EODs. If set to 'auto' a threshold is computed from all the data segments of a recording channel.
default_thresh
:float
- Threshold that is used if "detection_thresh" is set to "auto" and no data are available.
thresh_fac
:float
- The threshold for analysing data segments is set to the mean of the most-likely standard deviations plus this factor times the corresponding standard deviation.
thresh_nbins
:int
- The number of bins used to compute a histogram of the standard deviations of the data segments, from which the mean and standard deviation are estimated for automatically computing a threshold.
Expand source code
def add_thunderlogger_config(cfg, detection_thresh='auto', default_thresh=0.002, thresh_fac=3.0, thresh_nbins=500): """Add parameters needed for by thunderlogger. Parameters ---------- cfg: ConfigFile The configuration. detection_thresh: float or 'auto' Only data segments with standard deviation larger than this value are analyzed for EODs. If set to 'auto' a threshold is computed from all the data segments of a recording channel. default_thresh: float Threshold that is used if "detection_thresh" is set to "auto" and no data are available. thresh_fac: float The threshold for analysing data segments is set to the mean of the most-likely standard deviations plus this factor times the corresponding standard deviation. thresh_nbins: int The number of bins used to compute a histogram of the standard deviations of the data segments, from which the mean and standard deviation are estimated for automatically computing a threshold. """ cfg.add_section('Thunderlogger:') cfg.add('detectionThreshold', detection_thresh, '', 'Only analyse data segements with a standard deviation that is larger than this threshold. If set to "auto" compute threshold from all the standard deviations of a recording channel.') cfg.add('detectionThresholdDefault', default_thresh, '', 'Threshold that is used if "detectionThreshold" is set to "auto" and no data are available.') cfg.add('detectionThresholdStdFac', thresh_fac, '', 'An automatically computed threshold for analysing data segments is set to the mean of the most-likely standard deviations plus this factor times the corresponding standard deviation.') cfg.add('detectionThresholdNBins', thresh_nbins, '', 'The number of bins used to compute a histogram of the standard deviations of the data segments, from which the mean and standard deviation are estimated for automatically computing a threshold.') cfg.add('startTime', 'none', '', 'Provide a start time for the recordings overwriting the meta data of the data files (YYYY-mm-ddTHH:MM:SS format).')
def extract_eods(files, thresholds, stds_only, cfg, verbose, plot_level, thresh=0.002, max_deltaf=1.0, max_dist=5e-05, deltat_max=datetime.timedelta(seconds=300), start_time=None)
-
Expand source code
def extract_eods(files, thresholds, stds_only, cfg, verbose, plot_level, thresh=0.002, max_deltaf=1.0, max_dist=0.00005, deltat_max=dt.timedelta(minutes=5), start_time=None): t0s = [] stds = None supra_thresh = None wave_fishes = None pulse_fishes = None if start_time is None: # XXX we should read this from the meta data: filename = os.path.splitext(os.path.basename(files[0]))[0] times = filename.split('-')[1] start_time = dt.datetime.strptime(times, '%Y%m%dT%H%M%S') toffs = start_time t1 = start_time unit = None for file in files: try: with DataLoader(file) as sf: # analyze: sys.stdout.write(file + ': ') unit = sf.unit if max_dist < 1.1/sf.samplerate: max_dist = 1.1/sf.samplerate window_size = cfg.value('windowSize') ndata = int(window_size * sf.samplerate) step = ndata//2 b, a = butter(1, 10.0, 'hp', fs=sf.samplerate, output='ba') if stds is None: stds = [[] for c in range(sf.channels)] supra_thresh = [[] for c in range(sf.channels)] if wave_fishes is None: wave_fishes = [[] for c in range(sf.channels)] if pulse_fishes is None: pulse_fishes = [[] for c in range(sf.channels)] for k, data in enumerate(sf.blocks(ndata, step)): sys.stdout.write('.') sys.stdout.flush() t0 = toffs + dt.timedelta(seconds=k*step/sf.samplerate) t1 = t0 + dt.timedelta(seconds=ndata/sf.samplerate) t0s.append(t0) for channel in range(sf.channels): if thresholds: thresh = thresholds[channel] fdata = lfilter(b, a, data[:,channel] - np.mean(data[:ndata//20,channel])) sd = np.std(fdata) stds[channel].append(sd) supra_thresh[channel].append(1 if sd > thresh else 0) if stds_only: continue if sd > thresh: # clipping: min_clip = cfg.value('minClipAmplitude') if min_clip == 0.0: min_clip = cfg.value('minDataAmplitude') max_clip = cfg.value('maxClipAmplitude') if max_clip == 0.0: max_clip = cfg.value('maxDataAmplitude') name = file # detect EODs in the data: _, _, _, eod_props, mean_eods, spec_data, peak_data, _, _, _ = \ detect_eods(data[:,channel], sf.samplerate, min_clip, max_clip, name, verbose, plot_level, cfg) first_fish = True for props, eod, spec, peaks in zip(eod_props, mean_eods, spec_data, peak_data): fish = None fish_deltaf = 100000.0 if props['type'] == 'wave': for wfish in wave_fishes[channel]: deltaf = np.abs(wfish.props['EODf'] - props['EODf']) if deltaf < fish_deltaf: fish_deltaf = deltaf fish = wfish if fish_deltaf > max_deltaf: fish = None peaks = None else: fish_dist = 10000.0 for pfish in pulse_fishes[channel]: ddist = np.abs(pfish.props['P1-P1-dist'] - props['P1-P1-dist']) if ddist < fish_dist: fish_dist = ddist fish_deltaf = np.abs(pfish.props['EODf'] - props['EODf']) fish = pfish if fish_dist > max_dist or \ fish_deltaf > max_deltaf: fish = None spec = None if fish is not None and \ t0 - fish.times[-1][1] < deltat_max: if fish.times[-1][1] >= t0 and \ np.abs(fish.times[-1][2] - props['EODf']) < 0.5 and \ fish.times[-1][3] == channel and \ fish.times[-1][4] == file: fish.times[-1][1] = t1 else: fish.times.append([t0, t1, props['EODf'], channel, file]) if props['p-p-amplitude'] > fish.props['p-p-amplitude']: fish.props = props fish.waveform = eod fish.spec = spec fish.peaks = peaks else: new_fish = SimpleNamespace(props=props, waveform=eod, spec=spec, peaks=peaks, times=[[t0, t1, props['EODf'], channel, file]]) if props['type'] == 'pulse': pulse_fishes[channel].append(new_fish) else: wave_fishes[channel].append(new_fish) if first_fish: sys.stdout.write('\n ') first_fish = False sys.stdout.write('%6.1fHz %5s-fish @ %s\n ' % (props['EODf'], props['type'], t0.strftime('%Y-%m-%dT%H:%M:%S'))) toffs += dt.timedelta(seconds=len(sf)/sf.samplerate) sys.stdout.write('\n') sys.stdout.flush() except EOFError as error: # XXX we need to update toffs by means of the metadata of the next file! sys.stdout.write(file + ': ' + str(error) + '\n') if pulse_fishes is not None and len(pulse_fishes) > 0: pulse_fishes = [[pulse_fishes[c][i] for i in np.argsort([fish.props['EODf'] for fish in pulse_fishes[c]])] for c in range(len(pulse_fishes))] if wave_fishes is not None and len(wave_fishes) > 0: wave_fishes = [[wave_fishes[c][i] for i in np.argsort([fish.props['EODf'] for fish in wave_fishes[c]])] for c in range(len(wave_fishes))] return pulse_fishes, wave_fishes, start_time, toffs, t0s, stds, supra_thresh, unit
def save_times(times, idx, output_basename, name, **kwargs)
-
Expand source code
def save_times(times, idx, output_basename, name, **kwargs): td = TableData() td.append('index', '', '%d', [idx] * len(times)) td.append('tstart', '', '%s', [t[0].strftime('%Y-%m-%dT%H:%M:%S') for t in times]) td.append('tend', '', '%s', [t[1].strftime('%Y-%m-%dT%H:%M:%S') for t in times]) if len(times[0]) > 2: td.append('EODf', 'Hz', '%.1f', [t[2] for t in times]) td.append('device', '', '%s', [name for t in times]) if len(times[0]) > 2: td.append('channel', '', '%d', [t[3] for t in times]) td.append('file', '', '%s', [t[4] for t in times]) fp = output_basename + '-times' if idx is not None: fp += '-%d' % idx td.write(fp, **kwargs)
def load_times(file_path)
-
Expand source code
def load_times(file_path): data = TableData(file_path).data_frame() data['index'] = data['index'].astype(int) data['tstart'] = pd.to_datetime(data['tstart']) data['tstart'] = pd.Series(data['tstart'].dt.to_pydatetime(), dtype=object) data['tend'] = pd.to_datetime(data['tend']) data['tend'] = pd.Series(data['tend'].dt.to_pydatetime(), dtype=object) if 'channel' in data: data['channel'] = data['channel'].astype(int) return data
def save_power(times, stds, supra_thresh, unit, output_basename, **kwargs)
-
Expand source code
def save_power(times, stds, supra_thresh, unit, output_basename, **kwargs): td = TableData() td.append('index', '', '%d', list(range(len(times)))) td.append('time', '', '%s', [t.strftime('%Y-%m-%dT%H:%M:%S') for t in times]) for c, (std, thresh) in enumerate(zip(stds, supra_thresh)): td.append('channel%d'%c, unit, '%g', std) td.append('thresh%d'%c, '', '%d', thresh) fp = output_basename + '-stdevs' td.write(fp, **kwargs)
def load_power(file_path, start_time=None)
-
Expand source code
def load_power(file_path, start_time=None): base = os.path.basename(file_path) device = base[0:base.find('-stdevs')] data = TableData(file_path) times = [] for row in range(data.rows()): times.append(dt.datetime.strptime(data[row,'time'], '%Y-%m-%dT%H:%M:%S')) if start_time is not None: deltat = start_time - times[0] for k in range(len(times)): times[k] += deltat channels = (data.columns()-2)//2 stds = np.zeros((len(times), channels)) supra_thresh = np.zeros((len(times), channels), dtype=int) for c in range(channels): stds[:,c] = data[:,'channel%d'%c] supra_thresh[:,c] = data[:,'thresh%d'%c] return np.array(times), stds, supra_thresh, device
def save_data(output_folder, name, pulse_fishes, wave_fishes, tstart, tend, t0s, stds, supra_thresh, unit, cfg)
-
Expand source code
def save_data(output_folder, name, pulse_fishes, wave_fishes, tstart, tend, t0s, stds, supra_thresh, unit, cfg): output_basename = os.path.join(output_folder, name) if pulse_fishes is not None: for c in range(len(pulse_fishes)): out_path = output_basename + '-c%d' % c idx = 0 # pulse fish: pulse_props = [] for fish in pulse_fishes[c]: save_eod_waveform(fish.waveform, unit, idx, out_path, **write_table_args(cfg)) if fish.peaks is not None: save_pulse_peaks(fish.peaks, unit, idx, out_path, **write_table_args(cfg)) save_times(fish.times, idx, out_path, name, **write_table_args(cfg)) pulse_props.append(fish.props) pulse_props[-1]['index'] = idx idx += 1 save_pulse_fish(pulse_props, unit, out_path, **write_table_args(cfg)) # wave fish: wave_props = [] if wave_fishes is not None: for fish in wave_fishes[c]: save_eod_waveform(fish.waveform, unit, idx, out_path, **write_table_args(cfg)) if fish.spec is not None: save_wave_spectrum(fish.spec, unit, idx, out_path, **write_table_args(cfg)) save_times(fish.times, idx, out_path, name, **write_table_args(cfg)) wave_props.append(fish.props) wave_props[-1]['index'] = idx idx += 1 save_wave_fish(wave_props, unit, out_path, **write_table_args(cfg)) # recording time window: save_times([(tstart, tend)], None, output_basename, name, **write_table_args(cfg)) # signal power: if stds is not None and len(stds) > 0: save_power(t0s, stds, supra_thresh, unit, output_basename, **write_table_args(cfg))
def load_data(files, start_time=None)
-
Expand source code
def load_data(files, start_time=None): all_files = [] for file in files: if os.path.isdir(file): all_files.extend(glob.glob(os.path.join(file, '*fish*'))) else: all_files.append(file) pulse_fishes = [] wave_fishes = [] channels = set() for file in all_files: if 'pulse' in os.path.basename(file): pulse_props = load_pulse_fish(file) base_file, ext = os.path.splitext(file) base_file = base_file[:base_file.rfind('pulse')] count = 0 for props in pulse_props: idx = props['index'] waveform, unit = \ load_eod_waveform(base_file + 'eodwaveform-%d'%idx + ext) times = load_times(base_file + 'times-%d'%idx + ext) times['index'] = idx fish = SimpleNamespace(props=props, waveform=waveform, unit=unit, times=times) for i, t in times.iterrows(): channels.add((t['device'], t['channel'])) try: peaks, unit = \ load_pulse_peaks(base_file + 'pulsepeaks-%d'%idx + ext) fish.peaks = peaks except FileNotFoundError: fish.peaks = None pulse_fishes.append(fish) count += 1 #if count > 300: # XXX REMOVE # break elif 'wave' in os.path.basename(file): wave_props = load_wave_fish(file) base_file, ext = os.path.splitext(file) base_file = base_file[:base_file.rfind('wave')] count = 0 for props in wave_props: idx = props['index'] waveform, unit = \ load_eod_waveform(base_file + 'eodwaveform-%d'%idx + ext) times = load_times(base_file + 'times-%d'%idx + ext) times['index'] = idx fish = SimpleNamespace(props=props, waveform=waveform, unit=unit, times=times) for i, t in times.iterrows(): channels.add((t['device'], t['channel'])) try: spec, unit = \ load_wave_spectrum(base_file + 'wavespectrum-%d'%idx + ext) fish.spec = spec except FileNotFoundError: fish.spec = None wave_fishes.append(fish) count += 1 #if count > 300: # XXX REMOVE # break base_file = base_file[:base_file.rfind('-c')+1] times = load_times(base_file + 'times' + ext) tstart = times.tstart[0] tend = times.tend[0] if start_time is not None: deltat = start_time - tstart tstart += deltat tend += deltat for fish in pulse_fishes + wave_fishes: fish.times.tstart += deltat fish.times.tend += deltat return pulse_fishes, wave_fishes, tstart, tend, sorted(channels)
def plot_signal_power(times, stds, supra_threshs, devices, thresholds, title, output_folder)
-
Expand source code
def plot_signal_power(times, stds, supra_threshs, devices, thresholds, title, output_folder): plt.rcParams['axes.xmargin'] = 0 plt.rcParams['axes.ymargin'] = 0 n = 0 for s in stds: n += s.shape[1] h = n*4.0 t = 0.8 if title else 0.1 fig, axs = plt.subplots(n, 1, figsize=(16/2.54, h/2.54), sharex=True, sharey=True) fig.subplots_adjust(left=0.1, right=0.99, top=1-t/h, bottom=1.6/h, hspace=0) i = 0 for time, cstds, threshs, device in zip(times, stds, supra_threshs, devices): for c, (std, thresh) in enumerate(zip(cstds.T, threshs.T)): ax = axs[i] ax.plot(time, std) if thresholds: ax.axhline(thresholds[i], color='k', lw=0.5) elif len(std[thresh<1]) > 0: thresh = np.max(std[thresh<1]) ax.axhline(thresh, color='k', lw=0.5) #stdm = np.ma.masked_where(thresh < 1, std) #ax.plot(time, stdm) ax.set_yscale('log') #ax.set_ylim(bottom=0) ax.set_ylabel('%s-c%d' % (device, c)) i += 1 if title: axs[0].set_title(title) axs[-1].xaxis.set_major_formatter(mdates.DateFormatter('%b %d %Hh')) plt.setp(axs[-1].get_xticklabels(), ha='right', rotation=30, rotation_mode='anchor') fig.savefig(os.path.join(output_folder, 'signalpowers.pdf')) plt.show()
def merge_fish(pulse_fishes, wave_fishes, max_noise=0.1, max_deltaf=10.0, max_dist=0.0002, max_rms=0.3)
-
Expand source code
def merge_fish(pulse_fishes, wave_fishes, max_noise=0.1, max_deltaf=10.0, max_dist=0.0002, max_rms=0.3): pulse_eods = [] for i in np.argsort([fish.props['P2-P1-dist'] for fish in pulse_fishes]): if pulse_fishes[i].props['noise'] > max_noise: continue if pulse_fishes[i].props['P2-P1-dist'] <= 0.0: continue if len(pulse_eods) > 0 and \ np.abs(pulse_fishes[i].props['P2-P1-dist'] - pulse_eods[-1].props['P2-P1-dist']) <= max_dist and \ pulse_similarity(pulse_fishes[i].waveform, pulse_eods[-1].waveform, 4) < max_rms: pulse_eods[-1].times.append(pulse_fishes[i].times) if not hasattr(pulse_eods[-1], 'othereods'): pulse_eods[-1].othereods = [pulse_eods[-1].waveform] pulse_eods[-1].othereods.append(pulse_fishes[i].waveform) if pulse_fishes[i].props['p-p-amplitude'] > pulse_eods[-1].props['p-p-amplitude']: pulse_eods[-1].waveform = pulse_fishes[i].waveform pulse_eods[-1].props = pulse_fishes[i].props continue pulse_eods.append(pulse_fishes[i]) pulse_eods = [pulse_eods[i] for i in np.argsort([fish.props['EODf'] for fish in pulse_eods])] wave_eods = [] for i in np.argsort([fish.props['EODf'] for fish in wave_fishes]): if wave_fishes[i].props['noise'] > max_noise: continue if len(wave_eods) > 0 and \ np.abs(wave_fishes[i].props['EODf'] - wave_eods[-1].props['EODf']) < max_deltaf and \ wave_similarity(wave_fishes[i].waveform, wave_eods[-1].waveform, wave_fishes[i].props['EODf'], wave_eods[-1].props['EODf']) < max_rms: wave_eods[-1].times.append(wave_fishes[i].times) if not hasattr(wave_eods[-1], 'othereods'): wave_eods[-1].othereods = [] wave_eods[-1].othereods.append(wave_fishes[i].waveform) if wave_fishes[i].props['p-p-amplitude'] > wave_eods[-1].props['p-p-amplitude']: wave_eods[-1].waveform = wave_fishes[i].waveform wave_eods[-1].props = wave_fishes[i].props continue wave_eods.append(wave_fishes[i]) return pulse_eods, wave_eods
def plot_eod_occurances(pulse_fishes, wave_fishes, tstart, tend, channels, output_folder)
-
Expand source code
def plot_eod_occurances(pulse_fishes, wave_fishes, tstart, tend, channels, output_folder): channel_colors = ['#2060A7', '#40A787', '#478010', '#F0D730', '#C02717', '#873770', '#008797', '#007030', '#AAB71B', '#F78017', '#D03050', '#53379B'] plt.rcParams['axes.facecolor'] = 'none' plt.rcParams['axes.xmargin'] = 0 plt.rcParams['axes.ymargin'] = 0.05 plt.rcParams['font.family'] = 'sans-serif' n = len(pulse_fishes) + len(wave_fishes) h = n*2.5 + 2.5 + 0.3 fig, axs = plt.subplots(n, 2, squeeze=False, figsize=(16/2.54, h/2.54), gridspec_kw=dict(width_ratios=(1,2))) fig.subplots_adjust(left=0.02, right=0.97, top=1-0.3/h, bottom=2.2/h, hspace=0.2) pi = 0 prev_xscale = 0.0 for ax, fish in zip(axs, pulse_fishes + wave_fishes): # EOD waveform: ax[0].spines['left'].set_visible(False) ax[0].spines['right'].set_visible(False) ax[0].spines['top'].set_visible(False) ax[0].spines['bottom'].set_visible(False) ax[0].xaxis.set_visible(False) ax[0].yaxis.set_visible(False) time = 1000.0 * fish.waveform[:,0] #ax[0].plot([time[0], time[-1]], [0.0, 0.0], # zorder=-10, lw=1, color='#AAAAAA') if hasattr(fish, 'othereods'): for eod in fish.othereods: ax[0].plot(1000.0*eod[:,0], eod[:,1], zorder=5, lw=1, color='#AAAAAA') ax[0].plot(time, fish.waveform[:,1], zorder=10, lw=2, color='#C02717') ax[0].text(0.0, 1.0, '%.0f\u2009Hz' % fish.props['EODf'], transform=ax[0].transAxes, va='baseline', zorder=20) if fish.props['type'] == 'wave': lim = 750.0/fish.props['EODf'] ax[0].set_xlim([-lim, +lim]) tmax = lim else: ax[0].set_xlim(time[0], time[-1]) tmax = time[-1] xscale = 1.0 if tmax < 1.0: xscale = 0.5 elif tmax > 10.0: xscale = 5.0 ymin = np.min(fish.waveform[:,1]) ymax = np.max(fish.waveform[:,1]) ax[0].plot((tmax-xscale, tmax), (ymin - 0.04*(ymax-ymin),)*2, 'k', lw=3, clip_on=False, zorder=0) if ax[0] is axs[-1,0] or xscale != prev_xscale: if xscale < 1.0: ax[0].text(tmax-0.5*xscale, ymin - 0.1*(ymax-ymin), '%.0f\u2009\u00b5s' % (1000.0*xscale), ha='center', va='top', zorder=0) else: ax[0].text(tmax-0.5*xscale, ymin - 0.1*(ymax-ymin), '%.0f\u2009ms' % xscale, ha='center', va='top', zorder=0) prev_xscale = xscale # time bar: ax[1].xaxis.set_major_formatter(mdates.DateFormatter('%b %d %Hh')) min_eodf = 10000 max_eodf = 0 for index, time in fish.times.iterrows(): if time['EODf'] < min_eodf: min_eodf = time['EODf'] if time['EODf'] > max_eodf: max_eodf = time['EODf'] ax[1].plot([time['tstart'], time['tend']], [time['EODf'], time['EODf']], lw=5, color=channel_colors[channels.index((time['device'], time['channel']))%len(channel_colors)]) if max_eodf > min_eodf + 10.0: ax[1].text(0.0, 1.0, '%.0f \u2013 %.0f\u2009Hz' % (min_eodf, max_eodf), transform=ax[1].transAxes, va='baseline', zorder=20) ax[1].set_xlim(tstart, tend) ax[1].spines['left'].set_visible(False) ax[1].spines['right'].set_visible(False) ax[1].yaxis.set_visible(False) ax[1].spines['top'].set_visible(False) if ax[1] is not axs[-1,1]: ax[1].spines['bottom'].set_visible(False) ax[1].xaxis.set_visible(False) else: plt.setp(ax[1].get_xticklabels(), ha='right', rotation=30, rotation_mode='anchor') fig.savefig(os.path.join(output_folder, 'eodwaveforms.pdf'))
def main(cargs=None)
-
Expand source code
def main(cargs=None): # config file name: cfgfile = __package__ + '.cfg' # command line arguments: if cargs is None: cargs = sys.argv[1:] parser = argparse.ArgumentParser(add_help=False, description='Extract EOD waveforms of weakly electric fish from logger data.', epilog='version %s by Benda-Lab (2015-%s)' % (__version__, __year__)) parser.add_argument('-h', '--help', action='store_true', help='show this help message and exit') parser.add_argument('--version', action='version', version=__version__) parser.add_argument('-v', action='count', dest='verbose', default=0, help='verbosity level. Increase by specifying -v multiple times, or like -vvv') parser.add_argument('-V', action='count', dest='plot_level', default=0, help='level for debugging plots. Increase by specifying -V multiple times, or like -VVV') parser.add_argument('-c', dest='save_config', action='store_true', help='save configuration to file {0} after reading all configuration files'.format(cfgfile)) parser.add_argument('-f', dest='format', default='auto', type=str, choices=TableData.formats + ['py'], help='file format used for saving analysis results, defaults to the format specified in the configuration file or "csv"') parser.add_argument('-p', dest='save_plot', action='store_true', help='plot analyzed data') parser.add_argument('-m', dest='merge', action='store_true', help='merge similar EODs before plotting') parser.add_argument('-s', dest='stds_only', action='store_true', help='analyze or plot standard deviation of data only') parser.add_argument('-o', dest='outpath', default='.', type=str, help='path where to store results and figures (defaults to current working directory)') parser.add_argument('-k', dest='keep_path', action='store_true', help='keep path of input file when saving analysis files, i.e. append path of input file to OUTPATH') parser.add_argument('-n', dest='name', default='', type=str, help='base name of all output files or title of plots') parser.add_argument('file', nargs='*', default='', type=str, help='name of a file with time series data of an EOD recording, may include wildcards') args = parser.parse_args(cargs) # help: if args.help: parser.print_help() print('') print('examples:') print('- write configuration file:') print(' > thunderlogger -c') print('- compute standard deviations of data segments:') print(' > thunderlogger -o results -k -n logger1 -s river1/logger1-*.wav') print('- plot the standard deviations and the computed threshold:') print(' > thunderlogger -o plots -k -s -p -n river1 results/river1/logger1-stdevs.*') print(' you may adapt the settings in the configureation file "thunderfish.cfg"') print('- extract EODs from the data:') print(' > thunderlogger -o results -k river1/logger1-*.wav') print('- merge and plot extracted EODs:') print(' > thunderlogger -o plots -k -p -m results/river*/*fish.*') parser.exit() # set verbosity level from command line: verbose = args.verbose plot_level = args.plot_level if verbose < plot_level: verbose = plot_level # expand wildcard patterns: files = [] if os.name == 'nt': for fn in args.file: files.extend(glob.glob(fn)) else: files = args.file if args.save_config: # save configuration: file_name = files[0] if len(files) else '' cfg = configuration() add_thunderlogger_config(cfg) cfg.load_files(cfgfile, file_name, 4, verbose) save_configuration(cfg, cfgfile) exit() elif len(files) == 0: parser.error('you need to specify at least one file for the analysis') # configure: cfg = configuration() add_thunderlogger_config(cfg) cfg.load_files(cfgfile, files[0], 4, verbose-1) if args.format != 'auto': cfg.set('fileFormat', args.format) # create output folder for data and plots: output_folder = args.outpath if args.keep_path: output_folder = os.path.join(output_folder, os.path.split(files[0])[0]) if not os.path.exists(output_folder): if verbose > 1: print('mkdir %s' % output_folder) os.makedirs(output_folder) # start time: start_time = None if cfg.value('startTime') != 'none': cfg.value('startTime') start_time = dt.datetime.strptime(cfg.value('startTime'), '%Y-%m-%dT%H:%M:%S') # analyze and save data: plt.ioff() if not args.save_plot: # assemble device name and output file: if len(args.name) > 0: device_name = args.name else: device_name = os.path.basename(files[0]) device_name = device_name[:device_name.find('-')] output_basename = os.path.join(output_folder, device_name) # compute thresholds: thresholds = [] power_file = output_basename + '-stdevs.csv' thresh = cfg.value('detectionThreshold') if thresh == 'auto': thresh = cfg.value('detectionThresholdDefault') if os.path.isfile(power_file): _, powers, _, _ = load_power(power_file) for std in powers.T: ss, cc = hist_threshold(std, thresh_fac=cfg.value('detectionThresholdStdFac'), nbins=cfg.value('detectionThresholdNBins')) thresholds.append(cc + ss) else: thresh = float(thresh) pulse_fishes, wave_fishes, tstart, tend, t0s, \ stds, supra_thresh, unit = \ extract_eods(files, thresholds, args.stds_only, cfg, verbose, plot_level, thresh=thresh, start_time=start_time) remove_eod_files(output_basename, verbose, cfg) save_data(output_folder, device_name, pulse_fishes, wave_fishes, tstart, tend, t0s, stds, supra_thresh, unit, cfg) sys.stdout.write('DONE!\n') if args.stds_only: sys.stdout.write('Signal powers saved in %s\n' % (output_folder+'-stdevs.csv')) sys.stdout.write('To generate plots run thunderlogger with the -p and -s flags on the generated file:\n') sys.stdout.write('> thunderlogger -p -s %s\n' % (output_folder+'-stdevs.csv')) else: sys.stdout.write('Extracted EOD waveforms saved in %s\n' % output_folder) sys.stdout.write('To generate plots run thunderlogger with the -p flag on the generated files:\n') sys.stdout.write('> thunderlogger -p -o %s%s %s\n' % (args.outpath, ' -k' if args.keep_path else '', output_basename)) else: if args.stds_only: times = [] stds = [] supra_threshs = [] devices = [] thresholds = [] for file in files: t, p, s, d = load_power(file, start_time) times.append(t) stds.append(p) supra_threshs.append(s) devices.append(d) # compute detection thresholds: for std in p.T: if cfg.value('detectionThreshold') == 'auto': ss, cc = hist_threshold(std, thresh_fac=cfg.value('detectionThresholdStdFac'), nbins=cfg.value('detectionThresholdNBins')) thresholds.append(cc + ss) else: thresholds.append(float(cfg.value('detectionThreshold'))) plot_signal_power(times, stds, supra_threshs, devices, thresholds, args.name, output_folder) else: pulse_fishes, wave_fishes, tstart, tend, channels = \ load_data(files, start_time) if args.merge: pulse_fishes, wave_fishes = merge_fish(pulse_fishes, wave_fishes) plot_eod_occurances(pulse_fishes, wave_fishes, tstart, tend, channels, output_folder)