Module thunderfish.dataloader
Load time-series data from files.
data, samplingrate, unit = load_data('data/file.wav')
Loads the whole time-series from the file as a numpy array of floats.
data = DataLoader('data/file.wav', 0, 60.0)
or
with open_data('data/file.wav', 0, 60.0) as data:
Create an DataLoader
object that loads chuncks of 60 seconds long data
on demand. data
can be used like a read-only numpy array of floats.
Aditional functions
relacs_metadata()
reads key-value pairs from relacs *.dat file headers.fishgrid_grids()
: retrieve grid sizes from a fishgrid.cfg file.fishgrid_spacings()
: spacing between grid electrodes.
Expand source code
"""Load time-series data from files.
```
data, samplingrate, unit = load_data('data/file.wav')
```
Loads the whole time-series from the file as a numpy array of floats.
```
data = DataLoader('data/file.wav', 0, 60.0)
```
or
```
with open_data('data/file.wav', 0, 60.0) as data:
```
Create an `DataLoader` object that loads chuncks of 60 seconds long data
on demand. `data` can be used like a read-only numpy array of floats.
## Aditional functions
- `relacs_metadata()` reads key-value pairs from relacs *.dat file headers.
- `fishgrid_grids()`: retrieve grid sizes from a fishgrid.cfg file.
- `fishgrid_spacings()`: spacing between grid electrodes.
"""
import os
import glob
import numpy as np
from audioio.audioloader import load_audio, AudioLoader
def relacs_samplerate_unit(filepath, channel=0):
"""Retrieve sampling rate and unit from a relacs stimuli.dat file.
Parameters
----------
filepath: string
Path to a relacs data directory, a file in a relacs data directory,
or a relacs trace-*.raw file.
channel: int
Channel (trace) number, if `filepath` does not specify a
trace-*.raw file.
Returns
-------
samplerate: float
Sampling rate in Hertz
unit: string
Unit of the trace, can be empty if not found
Raises
------
IOError/FileNotFoundError:
If the stimuli.dat file does not exist.
ValueError:
stimuli.dat file does not contain sampling rate.
"""
trace = channel+1
relacs_dir = filepath
# check for relacs data directory:
if not os.path.isdir(filepath):
relacs_dir = os.path.dirname(filepath)
bn = os.path.basename(filepath).lower()
if (len(bn) > 5 and bn[0:5] == 'trace' and bn[-4:] == '.raw'):
trace = int(bn[6:].replace('.raw', ''))
# retreive sampling rate and unit from stimuli.dat file:
samplerate = None
unit = ""
stimuli_file = os.path.join(relacs_dir, 'stimuli.dat')
with open(stimuli_file, 'r') as sf:
for line in sf:
if len(line) == 0 or line[0] != '#':
break
if "unit%d" % trace in line:
unit = line.split(':')[1].strip()
if "sampling rate%d" % trace in line:
value = line.split(':')[1].strip()
samplerate = float(value.replace('Hz',''))
if samplerate is not None:
return samplerate, unit
raise ValueError('could not retrieve sampling rate from ' + stimuli_file)
def relacs_metadata(filepath):
"""Reads header of a relacs *.dat file.
Parameters
----------
filepath: string
A relacs *.dat file.
Returns
-------
data: dict
Dictionary with key-value pairs of the file header.
Raises
------
IOError/FileNotFoundError:
If `filepath` cannot be opened.
"""
data = {}
with open(filepath, 'r') as sf:
for line in sf:
if len(line) == 0 or line[0] != '#':
break
words = line.split(':')
if len(words) >= 2:
key = words[0].strip('# ')
value = ':'.join(words[1:]).strip()
data[key] = value
return data
def check_relacs(filepathes):
"""Check whether filepathes are relacs files.
Parameters
----------
filepathes: string or list of strings
Path to a relacs data directory, a file in a relacs data directory,
or relacs trace-*.raw files.
Returns
-------
is_relacs: boolean
If `filepathes` is a single path, then returns `True` if it is a or is a file in
a valid relacs data directory.
If filepathes are more than one path, then returns `True` if `filepathes`
are 'trace-*.raw' files in a valid relacs data directory.
"""
path = filepathes
# filepathes must be trace-*.raw:
if isinstance(filepathes, (list, tuple, np.ndarray)):
if len(filepathes) > 1:
for file in filepathes:
bn = os.path.basename(file)
if len(bn) <= 5 or bn[0:5] != 'trace' or bn[-4:] != '.raw':
return False
path = filepathes[0]
# relacs data directory:
relacs_dir = path
if not os.path.isdir(path):
relacs_dir = os.path.dirname(path)
# check for a valid relacs data directory:
if (os.path.isfile(os.path.join(relacs_dir, 'stimuli.dat')) and
os.path.isfile(os.path.join(relacs_dir, 'trace-1.raw'))):
return True
else:
return False
def relacs_files(filepathes, channel):
"""Expand file pathes for relacs data to appropriate trace*.raw file names.
Parameters
----------
filepathes: string or list of strings
Path to a relacs data directory, a file in a relacs data directory,
or relacs trace-*.raw files.
channel: int
The data channel. If negative all channels are selected.
Returns
-------
filepathes: list of strings
List of relacs trace*.raw files.
Raises
------
ValueError: invalid name of relacs trace file
"""
if not isinstance(filepathes, (list, tuple, np.ndarray)):
filepathes = [filepathes]
if len(filepathes) == 1:
if os.path.isdir(filepathes[0]):
if channel < 0:
relacs_dir = filepathes[0]
filepathes = []
for k in range(10000):
file = os.path.join(relacs_dir, 'trace-%d.raw'%(k+1))
if os.path.isfile(file):
filepathes.append(file)
else:
break
else:
filepathes[0] = os.path.join(filepathes[0], 'trace-%d.raw' % (channel+1))
else:
bn = os.path.basename(filepathes[0])
if len(bn) <= 5 or bn[0:5] != 'trace' or bn[-4:] != '.raw':
if channel < 0:
relacs_dir = os.path.dirname(filepathes[0])
filepathes = []
for k in range(10000):
file = os.path.join(relacs_dir, 'trace-%d.raw'%(k+1))
if os.path.isfile(file):
filepathes.append(file)
else:
break
else:
filepathes[0] = os.path.join(os.path.dirname(filepathes[0]),
'trace-%d.raw' % (channel+1))
for path in filepathes:
bn = os.path.basename(path)
if len(bn) <= 5 or bn[0:5] != 'trace' or bn[-4:] != '.raw':
raise ValueError('invalid name %s of relacs trace file', path)
return filepathes
def load_relacs(filepathes, channel=-1, verbose=0):
"""Load traces (trace-*.raw files) that have been recorded with relacs (www.relacs.net).
Parameters
----------
filepathes: string or list of strings
Path to a relacs data directory, a file in a relacs data directory,
or relacs trace-*.raw files.
channel: int
The data channel. If negative all channels are selected.
verbose: int
if > 0 show detailed error/warning messages
Returns
-------
data: 1-D or 2-D array
If `channel` is negative or more than one trace file is specified,
a 2-D array with data of all channels is returned,
where first dimension is time and second dimension is channel number.
Otherwise an 1-D array with the data of that channel is returned.
samplerate: float
Sampling rate of the data in Hz
unit: string
Unit of the data
Raises
------
ValueError:
- Invalid name for relacs trace-*.raw file.
- Sampling rates of traces differ.
- Unit of traces differ.
"""
filepathes = relacs_files(filepathes, channel)
if len(filepathes) > 1:
channel = -1
# load trace*.raw files:
nchannels = len(filepathes)
data = None
nrows = 0
samplerate = None
unit = ""
for n, path in enumerate(filepathes):
x = np.fromfile(path, np.float32)
if verbose > 0:
print( 'loaded %s' % path)
if data is None:
nrows = len(x)-2
data = np.empty((nrows, nchannels))
data[:,n] = x[:nrows]
# retrieve sampling rate and unit:
rate, us = relacs_samplerate_unit(path)
if samplerate is None:
samplerate = rate
elif rate != samplerate:
raise ValueError('sampling rates of traces differ')
if len(unit) == 0:
unit = us
elif us != unit:
raise ValueError('unit of traces differ')
if channel < 0:
return data, samplerate, unit
else:
return data[:, 0], samplerate, unit
def fishgrid_samplerate(filepath):
"""Retrieve the sampling rate from a fishgrid.cfg file.
Parameters
----------
filepath: string
Path to a fishgrid data directory, a file in a fishgrid data
directory, or a fishgrid traces-*.raw file.
Returns
-------
samplerate: float
Sampling rate in Hertz
Raises
------
IOError/FileNotFoundError:
If the fishgrid.cfg file does not exist.
ValueError:
fishgrid.cfg file does not contain sampling rate.
"""
# check for fishgrid data directory:
fishgrid_dir = filepath
if not os.path.isdir(filepath):
fishgrid_dir = os.path.dirname(filepath)
# retreive sampling rate from fishgrid.cfg file:
samplerate = None
fishgrid_file = os.path.join(fishgrid_dir, 'fishgrid.cfg')
with open(fishgrid_file, 'r') as sf:
for line in sf:
if "AISampleRate" in line:
value = line.split(':')[1].strip()
samplerate = float(value.replace('kHz',''))*1000.0
if samplerate is not None:
return samplerate
raise ValueError('could not retrieve sampling rate from ' + fishgrid_file)
def fishgrid_spacings(filepath):
"""Spacing between grid electrodes.
Parameters
----------
filepath: string
Path to a fishgrid data directory, a file in a fishgrid data
directory, or a fishgrid traces-*.raw file.
Returns
-------
grid_dist: list of tuples of floats
For each grid the distances between rows and columns.
"""
fishgrid_dir = filepath
if not os.path.isdir(filepath):
fishgrid_dir = os.path.dirname(filepath)
# retreive grids from fishgrid.cfg file:
grids_dist = []
rows_dist = None
cols_dist = None
fishgrid_file = os.path.join(fishgrid_dir, 'fishgrid.cfg')
with open(fishgrid_file, 'r') as sf:
for line in sf:
if "Grid" in line:
if rows_dist is not None and cols_dist is not None:
grids_dist.append((rows_dist, cols_dist))
rows_dist = None
cols_dist = None
elif "ColumnDistance1" in line:
cols_dist = int(line.split(':')[1].strip().split('.')[0])
elif "RowDistance1" in line:
rows_dist = int(line.split(':')[1].strip().split('.')[0])
if rows_dist is not None and cols_dist is not None:
grids_dist.append((rows_dist, cols_dist))
return grids_dist
def fishgrid_grids(filepath):
"""Retrieve grid sizes from a fishgrid.cfg file.
Parameters
----------
filepath: string
path to a fishgrid data directory, a file in a fishgrid data directory,
or a fishgrid traces-*.raw file.
Returns
-------
grids: list of tuples of ints
For each grid the number of rows and columns.
Raises
------
IOError/FileNotFoundError:
If the fishgrid.cfg file does not exist.
"""
# check for fishgrid data directory:
fishgrid_dir = filepath
if not os.path.isdir(filepath):
fishgrid_dir = os.path.dirname(filepath)
# retreive grids from fishgrid.cfg file:
grids = []
rows = None
cols = None
fishgrid_file = os.path.join(fishgrid_dir, 'fishgrid.cfg')
with open(fishgrid_file, 'r') as sf:
for line in sf:
if "Grid" in line:
if rows is not None and cols is not None:
grids.append((rows, cols))
rows = None
cols = None
elif "Columns" in line:
cols = int(line.split(':')[1].strip())
elif "Rows" in line:
rows = int(line.split(':')[1].strip())
if rows is not None and cols is not None:
grids.append((rows, cols))
return grids
def check_fishgrid(filepathes):
"""Check whether filepathes are valid fishgrid files (https://github.com/bendalab/fishgrid).
Parameters
----------
filepathes: string or list of strings
Path to a fishgrid data directory, a file in a fishgrid data directory,
or fishgrid traces-*.raw files.
Returns
-------
is_fishgrid: bool
If `filepathes` is a single path, then returns `True` if it is a file in
a valid fishgrid data directory.
If `filepathes` are more than one path, then returns `True` if `filepathes`
are 'trace-*.raw' files in a valid fishgrid data directory.
"""
path = filepathes
# filepathes must be traces-*.raw:
if isinstance(filepathes, (list, tuple, np.ndarray)):
if len(filepathes) > 1:
for file in filepathes:
bn = os.path.basename(file).lower()
if len(bn) <= 7 or bn[0:7] != 'traces-' or bn[-4:] != '.raw':
return False
path = filepathes[0]
# fishgrid data directory:
fishgrid_dir = path
if not os.path.isdir(path):
fishgrid_dir = os.path.dirname(path)
# check for a valid fishgrid data directory:
return (os.path.isfile(os.path.join(fishgrid_dir, 'fishgrid.cfg')) and
os.path.isfile(os.path.join(fishgrid_dir, 'traces-grid1.raw')))
def fishgrid_files(filepathes, channel, grid_sizes):
"""Expand file pathes for fishgrid data to appropriate traces-*.raw file names.
Parameters
----------
filepathes: string or list of strings
Path to a fishgrid data directory, a file in a fishgrid data directory,
or fishgrid traces-*.raw files.
channel: int
The data channel. If negative all channels are selected.
grid_sizes: list of int
The number of channels of each grid.
Returns
-------
filepathes: list of strings
List of fishgrid traces-*.raw files.
Raises
------
IndexError:
Invalid channel.
"""
# find grids:
grid = -1
if channel >= 0:
grid = -1
gs = 0
for g, s in enumerate(grid_sizes):
gs += s
if channel < gs:
grid = g
break
if grid < 0:
raise IndexError("invalid channel")
if not isinstance(filepathes, (list, tuple, np.ndarray)):
filepathes = [filepathes]
if len(filepathes) == 1:
if os.path.isdir(filepathes[0]):
if grid < 0:
fishgrid_dir = filepathes[0]
filepathes = []
for k in range(10000):
file = os.path.join(fishgrid_dir, 'traces-grid%d.raw'%(k+1))
if os.path.isfile(file):
filepathes.append(file)
else:
break
else:
filepathes[0] = os.path.join(filepathes[0], 'traces-grid%d.raw' % (grid+1))
else:
bn = os.path.basename(filepathes[0])
if len(bn) <= 7 or bn[0:7] != 'traces-' or bn[-4:] != '.raw':
if grid < 0:
fishgrid_dir = os.path.dirname(filepathes[0])
filepathes = []
for k in range(10000):
file = os.path.join(fishgrid_dir, 'traces-grid%d.raw'%(k+1))
if os.path.isfile(file):
filepathes.append(file)
else:
break
else:
filepathes[0] = os.path.join(os.path.dirname(filepathes[0]),
'traces-grid%d.raw' % (grid+1))
for path in filepathes:
bn = os.path.basename(path)
if len(bn) <= 7 or bn[0:7] != 'traces-' or bn[-4:] != '.raw':
raise ValueError('invalid name %s of fishgrid traces file', path)
return filepathes
def load_fishgrid(filepathes, channel=-1, verbose=0):
"""Load traces (traces-grid*.raw files) that have been recorded with fishgrid (https://github.com/bendalab/fishgrid).
Parameters
----------
filepathes: string or list of string
Path to a fishgrid data directory, a fishgrid.cfg file,
or fidhgrid traces-grid*.raw files.
channel: int
The data channel. If negative all channels are selected.
verbose: int
If > 0 show detailed error/warning messages.
Returns
-------
data: 1-D or 2-D array
If `channel` is negative or more than one trace file is specified,
a 2-D array with data of all channels is returned,
where first dimension is time and second dimension is channel number.
Otherwise an 1-D array with the data of that channel is returned.
samplerate: float
Sampling rate of the data in Hz.
unit: string
Unit of the data.
"""
if not isinstance(filepathes, (list, tuple, np.ndarray)):
filepathes = [filepathes]
grids = fishgrid_grids(filepathes[0])
grid_sizes = [r*c for r,c in grids]
filepathes = fishgrid_files(filepathes, channel, grid_sizes)
if len(filepathes) > 1:
channel = -1
# load traces-grid*.raw files:
grid_channels = []
nchannels = 0
for path in filepathes:
g = int(os.path.basename(path)[11:].replace('.raw', '')) - 1
grid_channels.append(grid_sizes[g])
nchannels += grid_sizes[g]
data = None
nrows = 0
n = 0
samplerate = None
if len(filepathes) > 0:
samplerate = fishgrid_samplerate(filepathes[0])
unit = "V"
for path, channels in zip(filepathes, grid_channels):
x = np.fromfile(path, np.float32).reshape((-1, channels))
if verbose > 0:
print( 'loaded %s' % path)
if data is None:
nrows = len(x)-2
data = np.empty((nrows, nchannels))
data[:,n:n+channels] = x[:nrows,:]
if channel < 0:
return data, samplerate, unit
else:
gs = 0
for s in grid_sizes:
if channel < gs + s:
break
gs += s
return data[:, channel-gs], samplerate, unit
def check_container(filepath):
"""Check if file is a generic container file.
Supported file formats are:
- python pickle files (.pkl, .pickle)
- numpy files (.npz)
- matlab files (.mat)
Parameters
----------
filepath: string
Path of the file to check.
Returns
-------
is_container: bool
`True`, if `filepath` is a supported container format.
"""
ext = os.path.splitext(filepath)[1]
return ext.lower() in ('.pkl', '.pickle', '.npz', '.mat')
def load_container(filepath, channel=-1, verbose=0, datakey=None,
samplekey=['rate', 'Fs', 'fs'],
timekey=['time'], unitkey='unit'):
"""Load data from a generic container file.
Supported file formats are:
- python pickle files (.pkl)
- numpy files (.npz)
- matlab files (.mat)
Parameters
----------
filepath: string
Path of the file to load.
channel: int
The data channel. If negative all channels are selected.
verbose: int
if > 0 show detailed error/warning messages
datakey: None or string
Name of the variable holding the data. If `None` take the
variable that is an 2D array and has the largest number of
elements.
samplekey: string
Name of the variable holding the sampling rate.
timekey: string
Name of the variable holding sampling times.
If no sampling rate is available, the samplingrate is retrieved
from the sampling times.
unitkey: string
Name of the variable holding the unit of the data.
If `unitkey` is not a valid key, then return `unitkey` as the `unit`.
Returns
-------
data: 1-D or 2-D array of floats
If `channel` is negative, a 2-D array with data of all channels
is returned, where first dimension is time and second
dimension is channel number. Otherwise an 1-D array with the
data of that channel is returned.
samplerate: float
Sampling rate of the data in Hz.
unit: string
Unit of the data.
Raises
------
IndexError:
Invalid channel requested
ValueError:
Invalid key requested.
"""
# load data:
data = {}
ext = os.path.splitext(filepath)[1]
if ext in ('.pkl', '.pickle'):
import pickle
with open(filepath, 'rb') as f:
data = pickle.load(f)
elif ext == '.npz':
data = np.load(filepath)
elif ext == '.mat':
from scipy.io import loadmat
data = loadmat(filepath, squeeze_me=True)
if verbose > 0:
print( 'loaded %s' % filepath)
# extract metadata:
if not isinstance(samplekey, (list, tuple, np.ndarray)):
samplekey = (samplekey,)
if not isinstance(timekey, (list, tuple, np.ndarray)):
timekey = (timekey,)
samplerate = 0.0
for skey in samplekey:
if skey in data:
samplerate = float(data[skey])
break
if samplerate == 0.0:
for tkey in timekey:
if tkey in data:
samplerate = 1.0/(data[tkey][1] - data[tkey][0])
break
if samplerate == 0.0:
raise ValueError('invalid keys %s and %s for requesting sampling rate or sampling times'
% (', '.join(samplekey), ', '.join(timekey)))
unit = ''
if unitkey in data:
unit = data[unitkey]
elif unitkey != 'unit':
unit = unitkey
unit = str(unit)
# get data array:
raw_data = np.array([])
if datakey:
# try data keys:
if not isinstance(datakey, (list, tuple, np.ndarray)):
datakey = (datakey,)
for dkey in datakey:
if dkey in data:
raw_data = data[dkey]
break
if np.prod(raw_data.shape) == 0:
raise ValueError('invalid key(s) %s for requesting data'
% ', '.join(datakey))
else:
# find largest 2D array:
for d in data:
if hasattr(data[d], 'shape'):
if 1 <= len(data[d].shape) <= 2 and \
np.prod(data[d].shape) > np.prod(raw_data.shape):
raw_data = data[d]
if np.prod(raw_data.shape) == 0:
raise ValueError('no data found')
# make 2D:
if len(raw_data.shape) == 1:
raw_data = raw_data.reshape(-1, 1)
# transpose if necessary:
if np.argmax(raw_data.shape) > 0:
raw_data = raw_data.T
if channel >= 0:
if channel >= raw_data.shape[1]:
raise IndexError('invalid channel number %d requested' % channel)
raw_data = raw_data[:,channel]
return raw_data.astype(float), samplerate, unit
def load_data(filepath, channel=-1, verbose=0, **kwargs):
"""Load time-series data from a file of arbitrary format.
Parameters
----------
filepath: string or list of strings
The full path and name of the file to load. For some file
formats several files can be provided in a list.
channel: int
The data channel. If negative all channels are selected.
verbose: int
If > 0 show detailed error/warning messages.
**kwargs: dict
Further keyword arguments that are passed on to the
format specific loading functions.
Returns
-------
data: 1-D or 2-D array
If `channel` is negative, a 2-D array with data of all
channels is returned, where first dimension is time and second
dimension is channel number. Otherwise an 1-D array with the
data of that channel is returned.
samplerate: float
Sampling rate of the data in Hz.
unit: string
Unit of the data.
Raises
------
ValueError:
Input argument `filepath` is empty string or list.
IndexError:
Invalid channel requested.
"""
# check values:
data = np.array([])
samplerate = 0.0
unit = ''
if len(filepath) == 0:
raise ValueError('input argument filepath is empty string or list.')
# load data:
if check_relacs(filepath):
return load_relacs(filepath, channel, verbose)
elif check_fishgrid(filepath):
return load_fishgrid(filepath, channel, verbose)
else:
if isinstance(filepath, (list, tuple, np.ndarray)):
filepath = filepath[0]
if check_container(filepath):
return load_container(filepath, channel, verbose=verbose, **kwargs)
else:
data, samplerate = load_audio(filepath, verbose)
if channel >= 0:
if channel >= data.shape[1]:
raise IndexError('invalid channel number %d requested' % channel)
data = data[:, channel]
unit = 'a.u.'
return data, samplerate, unit
class DataLoader(AudioLoader):
"""Buffered reading of time-series data for random access of the data in the file.
This allows for reading very large data files that do not fit into memory.
An `DataLoader` instance can be used like a huge read-only numpy array, i.e.
```
data = DataLoader('path/to/data/file.dat')
x = data[10000:20000,0]
```
The first index specifies the frame, the second one the channel.
`DataLoader` first determines the format of the data file and then opens
the file (first line). It then reads data from the file
as necessary for the requested data (second line).
Supported file formats are relacs trace*.raw files (www.relacs.net),
fishgrid traces-*.raw files, and audio files via `audioio.AudioLoader`.
Reading sequentially through the file is always possible. If previous data
are requested, then the file is read from the beginning. This might slow down access
to previous data considerably. Use the `backsize` argument to the open functions to
make sure some data are loaded before the requested frame. Then a subsequent access
to the data within backsize seconds before that frame can still be handled without
the need to reread the file from the beginning.
Usage:
------
```
import thunderfish.dataloader as dl
with dl.open_data(filepath, -1, 60.0, 10.0) as data:
# do something with the content of the file:
x = data[0:10000,0]
y = data[10000:20000,0]
z = x + y
```
Normal open and close:
```
data = dl.DataLoader(filepath, 0, 60.0)
x = data[:] # read the whole file
data.close()
```
that is the same as:
```
data = dl.DataLoader()
data.open(filepath, 0, 60.0)
```
Member variables:
-----------------
samplerate (float): the sampling rate of the data in Hertz.
channels (int): the number of channels that are read in.
channel (int): the channel of which the trace is returned.
If negative, all channels are returned.
frames (int): the number of frames in the file.
shape (tuple): number of frames and channels of the data.
unit (string): unit of the data.
Some member functions:
----------------------
len(): the number of frames
open(): open a data file.
open_*(): open a data file of a specific format.
close(): close the file.
"""
def __init__(self, filepath=None, channel=-1, buffersize=10.0, backsize=0.0, verbose=0):
"""Initialize the DataLoader instance. If filepath is not None open the file.
Parameters
----------
filepath: string
Name of the file.
channel: int
The single channel to be worked on.
buffersize: float
Size of internal buffer in seconds.
backsize: float
Part of the buffer to be loaded before the requested start index in seconds.
verbose: int
If > 0 show detailed error/warning messages.
"""
super(DataLoader, self).__init__(None, buffersize, backsize, verbose)
if filepath is not None:
self.open(filepath, channel, buffersize, backsize, verbose)
def __getitem__(self, key):
if self.channel >= 0:
if type(key) is tuple:
raise IndexError
return super(DataLoader, self).__getitem__((key, self.channel))
else:
return super(DataLoader, self).__getitem__(key)
def __next__(self):
if self.channel >= 0:
return super(DataLoader, self).__next__()[self.channel]
else:
return super(DataLoader, self).__next__()
# relacs interface:
def open_relacs(self, filepathes, channel=-1, buffersize=10.0, backsize=0.0, verbose=0):
"""Open relacs data files (www.relacs.net) for reading.
Parameters
----------
filepathes: string or list of string
Path to a relacs data directory, a relacs stimuli.dat file, a relacs info.dat file,
or relacs trace-*.raw files.
channel: int
The requested data channel. If negative all channels are selected.
buffersize: float
Size of internal buffer in seconds.
backsize: float
Part of the buffer to be loaded before the requested start index in seconds.
verbose: int
If > 0 show detailed error/warning messages.
"""
self.verbose = verbose
if self.sf is not None:
self._close_relacs()
filepathes = relacs_files(filepathes, channel)
# open trace files:
self.sf = []
self.frames = None
self.samplerate = None
self.unit = ""
for path in filepathes:
file = open(path, 'rb')
self.sf.append(file)
if verbose > 0:
print( 'opened %s' % path)
# file size:
file.seek(0, os.SEEK_END)
frames = file.tell()//4
if self.frames is None:
self.frames = frames
elif self.frames != frames:
diff = self.frames - frames
if diff > 1 or diff < -2:
raise ValueError('number of frames of traces differ')
elif diff >= 0:
self.frames = frames
file.seek(0)
# retrieve sampling rate and unit:
rate, us = relacs_samplerate_unit(path)
if self.samplerate is None:
self.samplerate = rate
elif rate != self.samplerate:
raise ValueError('sampling rates of traces differ')
if len(self.unit) == 0:
self.unit = us
elif us != self.unit:
raise ValueError('unit of traces differ')
self.channels = len(self.sf)
self.channel = channel
if self.channel >= 0:
self.shape = (self.frames,)
else:
self.shape = (self.frames, self.channels)
self.buffersize = int(buffersize*self.samplerate)
self.backsize = int(backsize*self.samplerate)
self._init_buffer()
self.offset = 0
self.close = self._close_relacs
self._update_buffer = self._update_buffer_relacs
return self
def _close_relacs(self):
"""Close the relacs data files.
"""
if self.sf is not None:
for file in self.sf:
file.close()
self.sf = None
def _update_buffer_relacs(self, start, stop):
"""Make sure that the buffer contains the data between
start and stop for relacs files.
"""
if start < self.offset or stop > self.offset + self.buffer.shape[0]:
offset, size = self._read_indices(start, stop)
r_offset, r_size = self._recycle_buffer(offset, size)
# read buffer:
for i, file in enumerate(self.sf):
file.seek(r_offset*4)
buffer = file.read(r_size*4)
self.buffer[r_offset-offset:r_offset+r_size-offset, i] = np.fromstring(buffer, dtype=np.float32)
self.offset = offset
if self.verbose > 1:
print(' read %6d frames at %d' % (r_size, r_offset))
if self.verbose > 0:
print(' loaded %d frames from %d up to %d'
% (self.buffer.shape[0], self.offset, self.offset+self.buffer.shape[0]))
# fishgrid interface:
def open_fishgrid(self, filepathes, channel=-1, buffersize=10.0, backsize=0.0, verbose=0):
"""Open fishgrid data files (https://github.com/bendalab/fishgrid) for reading.
Parameters
----------
filepathes: string or list of string
Path to a fishgrid data directory, a fishgrid.cfg file,
or fishgrid trace-*.raw files.
channel: int
The requested data channel. If negative all channels are selected.
buffersize: float
Size of internal buffer in seconds.
backsize: float
Part of the buffer to be loaded before the requested start index in seconds.
verbose: int
If > 0 show detailed error/warning messages.
"""
self.verbose = verbose
if self.sf is not None:
self._close_fishgrid()
if not isinstance(filepathes, (list, tuple, np.ndarray)):
filepathes = [filepathes]
grids = fishgrid_grids(filepathes[0])
grid_sizes = [r*c for r,c in grids]
filepathes = fishgrid_files(filepathes, channel, grid_sizes)
# open grid files:
self.channels = 0
for path in filepathes:
g = int(os.path.basename(path)[11:].replace('.raw', '')) - 1
self.channels += grid_sizes[g]
self.sf = []
self.grid_channels = []
self.grid_offs = []
offs = 0
self.frames = None
self.samplerate = None
if len(filepathes) > 0:
self.samplerate = fishgrid_samplerate(filepathes[0])
self.unit = "V"
for path in filepathes:
file = open(path, 'rb')
self.sf.append(file)
if verbose > 0:
print( 'opened %s' % path)
# grid channels:
g = int(os.path.basename(path)[11:].replace('.raw', '')) - 1
self.grid_channels.append(grid_sizes[g])
self.grid_offs.append(offs)
offs += grid_sizes[g]
# file size:
file.seek(0, os.SEEK_END)
frames = file.tell()//4//grid_sizes[g]
if self.frames is None:
self.frames = frames
elif self.frames != frames:
diff = self.frames - frames
if diff > 1 or diff < -2:
raise ValueError('number of frames of traces differ')
elif diff >= 0:
self.frames = frames
file.seek(0)
gs = 0
for s in grid_sizes:
if channel < gs + s:
break
gs += s
self.channel = channel - gs
if self.channel >= 0:
self.shape = (self.frames,)
else:
self.shape = (self.frames, self.channels)
self.buffersize = int(buffersize*self.samplerate)
self.backsize = int(backsize*self.samplerate)
self._init_buffer()
self.offset = 0
self.close = self._close_fishgrid
self._update_buffer = self._update_buffer_fishgrid
return self
def _close_fishgrid(self):
"""Close the fishgrid data files.
"""
if self.sf is not None:
for file in self.sf:
file.close()
self.sf = None
def _update_buffer_fishgrid(self, start, stop):
"""Make sure that the buffer contains the data between
start and stop for fishgrid files.
"""
if start < self.offset or stop > self.offset + self.buffer.shape[0]:
offset, size = self._read_indices(start, stop)
r_offset, r_size = self._recycle_buffer(offset, size)
# read buffer:
for file, gchannels, goffset in zip(self.sf, self.grid_channels, self.grid_offs):
file.seek(r_offset*4*gchannels)
buffer = file.read(r_size*4*gchannels)
self.buffer[r_offset-offset:r_offset+r_size-offset, goffset:goffset+gchannels] = np.fromstring(buffer, dtype=np.float32).reshape((-1, gchannels))
self.offset = offset
if self.verbose > 1:
print(' read %6d frames at %d' % (r_size, r_offset))
if self.verbose > 0:
print(' loaded %d frames from %d up to %d'
% (self.buffer.shape[0], self.offset, self.offset+self.buffer.shape[0]))
def open(self, filepath, channel=0, buffersize=10.0, backsize=0.0,
verbose=0):
"""Open file with time-series data for reading.
Parameters
----------
filepath: string or list of string
Path to a data files or directory.
channel: int
The requested data channel. If negative all channels are selected.
buffersize: float
Size of internal buffer in seconds.
backsize: float
Part of the buffer to be loaded before the requested start index in seconds.
verbose: int
If > 0 show detailed error/warning messages.
"""
if check_relacs(filepath):
self.open_relacs(filepath, channel, buffersize, backsize, verbose)
elif check_fishgrid(filepath):
self.open_fishgrid(filepath, channel, buffersize, backsize, verbose)
else:
if isinstance(filepath, (list, tuple, np.ndarray)):
filepath = filepath[0]
if check_container(filepath):
raise ValueError('file format not supported')
super(DataLoader, self).open(filepath, buffersize, backsize, verbose)
if channel > self.channels:
raise IndexError('invalid channel number %d' % channel)
self.channel = channel
if self.channel >= 0:
self.shape = (self.frames,)
else:
self.shape = (self.frames, self.channels)
self.unit = 'a.u.'
return self
open_data = DataLoader
def demo(filepath, plot=False, channel=-1):
print("try load_data:")
data, samplerate, unit = load_data(filepath, channel, verbose=2)
if plot:
time = np.arange(len(data))/samplerate
if channel < 0:
for c in range(data.shape[1]):
plt.plot(time, data[:,c])
else:
plt.plot(time, data)
plt.xlabel('Time [s]')
plt.ylabel('[' + unit + ']')
plt.show()
return
print('')
print("try DataLoader for channel=%d:" % channel)
with open_data(filepath, channel, 2.0, 1.0, 1) as data:
print('samplerate: %g' % data.samplerate)
print('frames: %d %d' % (len(data), data.shape[0]))
nframes = int(1.0 * data.samplerate)
# forward:
for i in range(0, len(data), nframes):
print('forward %d-%d' % (i, i + nframes))
if channel < 0:
x = data[i:i + nframes, 0]
else:
x = data[i:i + nframes]
if plot:
plt.plot((i + np.arange(len(x))) / data.samplerate, x)
plt.xlabel('Time [s]')
plt.ylabel('[' + data.unit + ']')
plt.show()
# and backwards:
for i in reversed(range(0, len(data), nframes)):
print('backward %d-%d' % (i, i + nframes))
if channel < 0:
x = data[i:i + nframes, 0]
else:
x = data[i:i + nframes]
if plot:
plt.plot((i + np.arange(len(x))) / data.samplerate, x)
plt.xlabel('Time [s]')
plt.ylabel('[' + data.unit + ']')
plt.show()
def main(cargs):
"""Call demo with command line arguments.
Parameters
----------
cargs: list of strings
Command line arguments as provided by sys.argv[1:]
"""
import argparse
parser = argparse.ArgumentParser(description=
'Checking thunderfish.dataloader module.')
parser.add_argument('-p', dest='plot', action='store_true',
help='plot loaded data')
parser.add_argument('-c', dest='channel', default=-1, type=int,
help='channel to be loaded')
parser.add_argument('file', nargs=1, default='', type=str,
help='name of data file')
args = parser.parse_args(cargs)
demo(args.file[0], args.plot, args.channel)
if __name__ == "__main__":
import sys
import matplotlib.pyplot as plt
main(sys.argv[1:])
Functions
def check_container(filepath)
-
Check if file is a generic container file.
Supported file formats are: - python pickle files (.pkl, .pickle) - numpy files (.npz) - matlab files (.mat)
Parameters
filepath
:string
- Path of the file to check.
Returns
is_container
:bool
True
, iffilepath
is a supported container format.
Expand source code
def check_container(filepath): """Check if file is a generic container file. Supported file formats are: - python pickle files (.pkl, .pickle) - numpy files (.npz) - matlab files (.mat) Parameters ---------- filepath: string Path of the file to check. Returns ------- is_container: bool `True`, if `filepath` is a supported container format. """ ext = os.path.splitext(filepath)[1] return ext.lower() in ('.pkl', '.pickle', '.npz', '.mat')
def check_fishgrid(filepathes)
-
Check whether filepathes are valid fishgrid files (https://github.com/bendalab/fishgrid).
Parameters
filepathes
:string
orlist
ofstrings
- Path to a fishgrid data directory, a file in a fishgrid data directory, or fishgrid traces-*.raw files.
Returns
is_fishgrid
:bool
- If
filepathes
is a single path, then returnsTrue
if it is a file in a valid fishgrid data directory. Iffilepathes
are more than one path, then returnsTrue
iffilepathes
are 'trace-*.raw' files in a valid fishgrid data directory.
Expand source code
def check_fishgrid(filepathes): """Check whether filepathes are valid fishgrid files (https://github.com/bendalab/fishgrid). Parameters ---------- filepathes: string or list of strings Path to a fishgrid data directory, a file in a fishgrid data directory, or fishgrid traces-*.raw files. Returns ------- is_fishgrid: bool If `filepathes` is a single path, then returns `True` if it is a file in a valid fishgrid data directory. If `filepathes` are more than one path, then returns `True` if `filepathes` are 'trace-*.raw' files in a valid fishgrid data directory. """ path = filepathes # filepathes must be traces-*.raw: if isinstance(filepathes, (list, tuple, np.ndarray)): if len(filepathes) > 1: for file in filepathes: bn = os.path.basename(file).lower() if len(bn) <= 7 or bn[0:7] != 'traces-' or bn[-4:] != '.raw': return False path = filepathes[0] # fishgrid data directory: fishgrid_dir = path if not os.path.isdir(path): fishgrid_dir = os.path.dirname(path) # check for a valid fishgrid data directory: return (os.path.isfile(os.path.join(fishgrid_dir, 'fishgrid.cfg')) and os.path.isfile(os.path.join(fishgrid_dir, 'traces-grid1.raw')))
def check_relacs(filepathes)
-
Check whether filepathes are relacs files.
Parameters
filepathes
:string
orlist
ofstrings
- Path to a relacs data directory, a file in a relacs data directory, or relacs trace-*.raw files.
Returns
is_relacs
:boolean
If
filepathes
is a single path, then returnsTrue
if it is a or is a file in a valid relacs data directory. If filepathes are more than one path, then returnsTrue
iffilepathes
are 'trace-*.raw' files in a valid relacs data directory.Expand source code
def check_relacs(filepathes): """Check whether filepathes are relacs files. Parameters ---------- filepathes: string or list of strings Path to a relacs data directory, a file in a relacs data directory, or relacs trace-*.raw files. Returns ------- is_relacs: boolean If `filepathes` is a single path, then returns `True` if it is a or is a file in a valid relacs data directory. If filepathes are more than one path, then returns `True` if `filepathes` are 'trace-*.raw' files in a valid relacs data directory. """ path = filepathes # filepathes must be trace-*.raw: if isinstance(filepathes, (list, tuple, np.ndarray)): if len(filepathes) > 1: for file in filepathes: bn = os.path.basename(file) if len(bn) <= 5 or bn[0:5] != 'trace' or bn[-4:] != '.raw': return False path = filepathes[0] # relacs data directory: relacs_dir = path if not os.path.isdir(path): relacs_dir = os.path.dirname(path) # check for a valid relacs data directory: if (os.path.isfile(os.path.join(relacs_dir, 'stimuli.dat')) and os.path.isfile(os.path.join(relacs_dir, 'trace-1.raw'))): return True else: return False
def demo(filepath, plot=False, channel=-1)
-
Expand source code
def demo(filepath, plot=False, channel=-1): print("try load_data:") data, samplerate, unit = load_data(filepath, channel, verbose=2) if plot: time = np.arange(len(data))/samplerate if channel < 0: for c in range(data.shape[1]): plt.plot(time, data[:,c]) else: plt.plot(time, data) plt.xlabel('Time [s]') plt.ylabel('[' + unit + ']') plt.show() return print('') print("try DataLoader for channel=%d:" % channel) with open_data(filepath, channel, 2.0, 1.0, 1) as data: print('samplerate: %g' % data.samplerate) print('frames: %d %d' % (len(data), data.shape[0])) nframes = int(1.0 * data.samplerate) # forward: for i in range(0, len(data), nframes): print('forward %d-%d' % (i, i + nframes)) if channel < 0: x = data[i:i + nframes, 0] else: x = data[i:i + nframes] if plot: plt.plot((i + np.arange(len(x))) / data.samplerate, x) plt.xlabel('Time [s]') plt.ylabel('[' + data.unit + ']') plt.show() # and backwards: for i in reversed(range(0, len(data), nframes)): print('backward %d-%d' % (i, i + nframes)) if channel < 0: x = data[i:i + nframes, 0] else: x = data[i:i + nframes] if plot: plt.plot((i + np.arange(len(x))) / data.samplerate, x) plt.xlabel('Time [s]') plt.ylabel('[' + data.unit + ']') plt.show()
def fishgrid_files(filepathes, channel, grid_sizes)
-
Expand file pathes for fishgrid data to appropriate traces-*.raw file names.
Parameters
filepathes
:string
orlist
ofstrings
- Path to a fishgrid data directory, a file in a fishgrid data directory, or fishgrid traces-*.raw files.
channel
:int
- The data channel. If negative all channels are selected.
grid_sizes
:list
ofint
- The number of channels of each grid.
Returns
filepathes
:list
ofstrings
- List of fishgrid traces-*.raw files.
Raises
Indexerror
Invalid channel.
Expand source code
def fishgrid_files(filepathes, channel, grid_sizes): """Expand file pathes for fishgrid data to appropriate traces-*.raw file names. Parameters ---------- filepathes: string or list of strings Path to a fishgrid data directory, a file in a fishgrid data directory, or fishgrid traces-*.raw files. channel: int The data channel. If negative all channels are selected. grid_sizes: list of int The number of channels of each grid. Returns ------- filepathes: list of strings List of fishgrid traces-*.raw files. Raises ------ IndexError: Invalid channel. """ # find grids: grid = -1 if channel >= 0: grid = -1 gs = 0 for g, s in enumerate(grid_sizes): gs += s if channel < gs: grid = g break if grid < 0: raise IndexError("invalid channel") if not isinstance(filepathes, (list, tuple, np.ndarray)): filepathes = [filepathes] if len(filepathes) == 1: if os.path.isdir(filepathes[0]): if grid < 0: fishgrid_dir = filepathes[0] filepathes = [] for k in range(10000): file = os.path.join(fishgrid_dir, 'traces-grid%d.raw'%(k+1)) if os.path.isfile(file): filepathes.append(file) else: break else: filepathes[0] = os.path.join(filepathes[0], 'traces-grid%d.raw' % (grid+1)) else: bn = os.path.basename(filepathes[0]) if len(bn) <= 7 or bn[0:7] != 'traces-' or bn[-4:] != '.raw': if grid < 0: fishgrid_dir = os.path.dirname(filepathes[0]) filepathes = [] for k in range(10000): file = os.path.join(fishgrid_dir, 'traces-grid%d.raw'%(k+1)) if os.path.isfile(file): filepathes.append(file) else: break else: filepathes[0] = os.path.join(os.path.dirname(filepathes[0]), 'traces-grid%d.raw' % (grid+1)) for path in filepathes: bn = os.path.basename(path) if len(bn) <= 7 or bn[0:7] != 'traces-' or bn[-4:] != '.raw': raise ValueError('invalid name %s of fishgrid traces file', path) return filepathes
def fishgrid_grids(filepath)
-
Retrieve grid sizes from a fishgrid.cfg file.
Parameters
filepath
:string
- path to a fishgrid data directory, a file in a fishgrid data directory, or a fishgrid traces-*.raw file.
Returns
grids
:list
oftuples
ofints
- For each grid the number of rows and columns.
Raises
IOError/FileNotFoundError:
- If the fishgrid.cfg file does not exist.
Expand source code
def fishgrid_grids(filepath): """Retrieve grid sizes from a fishgrid.cfg file. Parameters ---------- filepath: string path to a fishgrid data directory, a file in a fishgrid data directory, or a fishgrid traces-*.raw file. Returns ------- grids: list of tuples of ints For each grid the number of rows and columns. Raises ------ IOError/FileNotFoundError: If the fishgrid.cfg file does not exist. """ # check for fishgrid data directory: fishgrid_dir = filepath if not os.path.isdir(filepath): fishgrid_dir = os.path.dirname(filepath) # retreive grids from fishgrid.cfg file: grids = [] rows = None cols = None fishgrid_file = os.path.join(fishgrid_dir, 'fishgrid.cfg') with open(fishgrid_file, 'r') as sf: for line in sf: if "Grid" in line: if rows is not None and cols is not None: grids.append((rows, cols)) rows = None cols = None elif "Columns" in line: cols = int(line.split(':')[1].strip()) elif "Rows" in line: rows = int(line.split(':')[1].strip()) if rows is not None and cols is not None: grids.append((rows, cols)) return grids
def fishgrid_samplerate(filepath)
-
Retrieve the sampling rate from a fishgrid.cfg file.
Parameters
filepath
:string
- Path to a fishgrid data directory, a file in a fishgrid data directory, or a fishgrid traces-*.raw file.
Returns
samplerate
:float
- Sampling rate in Hertz
Raises
IOError/FileNotFoundError:
- If the fishgrid.cfg file does not exist.
Valueerror
fishgrid.cfg file does not contain sampling rate.
Expand source code
def fishgrid_samplerate(filepath): """Retrieve the sampling rate from a fishgrid.cfg file. Parameters ---------- filepath: string Path to a fishgrid data directory, a file in a fishgrid data directory, or a fishgrid traces-*.raw file. Returns ------- samplerate: float Sampling rate in Hertz Raises ------ IOError/FileNotFoundError: If the fishgrid.cfg file does not exist. ValueError: fishgrid.cfg file does not contain sampling rate. """ # check for fishgrid data directory: fishgrid_dir = filepath if not os.path.isdir(filepath): fishgrid_dir = os.path.dirname(filepath) # retreive sampling rate from fishgrid.cfg file: samplerate = None fishgrid_file = os.path.join(fishgrid_dir, 'fishgrid.cfg') with open(fishgrid_file, 'r') as sf: for line in sf: if "AISampleRate" in line: value = line.split(':')[1].strip() samplerate = float(value.replace('kHz',''))*1000.0 if samplerate is not None: return samplerate raise ValueError('could not retrieve sampling rate from ' + fishgrid_file)
def fishgrid_spacings(filepath)
-
Spacing between grid electrodes.
Parameters
filepath
:string
- Path to a fishgrid data directory, a file in a fishgrid data directory, or a fishgrid traces-*.raw file.
Returns
grid_dist
:list
oftuples
offloats
- For each grid the distances between rows and columns.
Expand source code
def fishgrid_spacings(filepath): """Spacing between grid electrodes. Parameters ---------- filepath: string Path to a fishgrid data directory, a file in a fishgrid data directory, or a fishgrid traces-*.raw file. Returns ------- grid_dist: list of tuples of floats For each grid the distances between rows and columns. """ fishgrid_dir = filepath if not os.path.isdir(filepath): fishgrid_dir = os.path.dirname(filepath) # retreive grids from fishgrid.cfg file: grids_dist = [] rows_dist = None cols_dist = None fishgrid_file = os.path.join(fishgrid_dir, 'fishgrid.cfg') with open(fishgrid_file, 'r') as sf: for line in sf: if "Grid" in line: if rows_dist is not None and cols_dist is not None: grids_dist.append((rows_dist, cols_dist)) rows_dist = None cols_dist = None elif "ColumnDistance1" in line: cols_dist = int(line.split(':')[1].strip().split('.')[0]) elif "RowDistance1" in line: rows_dist = int(line.split(':')[1].strip().split('.')[0]) if rows_dist is not None and cols_dist is not None: grids_dist.append((rows_dist, cols_dist)) return grids_dist
def load_container(filepath, channel=-1, verbose=0, datakey=None, samplekey=['rate', 'Fs', 'fs'], timekey=['time'], unitkey='unit')
-
Load data from a generic container file.
Supported file formats are: - python pickle files (.pkl) - numpy files (.npz) - matlab files (.mat)
Parameters
filepath
:string
- Path of the file to load.
channel
:int
- The data channel. If negative all channels are selected.
verbose
:int
- if > 0 show detailed error/warning messages
datakey
:None
orstring
- Name of the variable holding the data.
If
None
take the variable that is an 2D array and has the largest number of elements. samplekey
:string
- Name of the variable holding the sampling rate.
timekey
:string
- Name of the variable holding sampling times. If no sampling rate is available, the samplingrate is retrieved from the sampling times.
unitkey
:string
- Name of the variable holding the unit of the data.
If
unitkey
is not a valid key, then returnunitkey
as theunit
.
Returns
data
:1-D
or2-D array
offloats
- If
channel
is negative, a 2-D array with data of all channels is returned, where first dimension is time and second dimension is channel number. Otherwise an 1-D array with the data of that channel is returned. samplerate
:float
- Sampling rate of the data in Hz.
unit
:string
- Unit of the data.
Raises
Indexerror
Invalid channel requested
Valueerror
Invalid key requested.
Expand source code
def load_container(filepath, channel=-1, verbose=0, datakey=None, samplekey=['rate', 'Fs', 'fs'], timekey=['time'], unitkey='unit'): """Load data from a generic container file. Supported file formats are: - python pickle files (.pkl) - numpy files (.npz) - matlab files (.mat) Parameters ---------- filepath: string Path of the file to load. channel: int The data channel. If negative all channels are selected. verbose: int if > 0 show detailed error/warning messages datakey: None or string Name of the variable holding the data. If `None` take the variable that is an 2D array and has the largest number of elements. samplekey: string Name of the variable holding the sampling rate. timekey: string Name of the variable holding sampling times. If no sampling rate is available, the samplingrate is retrieved from the sampling times. unitkey: string Name of the variable holding the unit of the data. If `unitkey` is not a valid key, then return `unitkey` as the `unit`. Returns ------- data: 1-D or 2-D array of floats If `channel` is negative, a 2-D array with data of all channels is returned, where first dimension is time and second dimension is channel number. Otherwise an 1-D array with the data of that channel is returned. samplerate: float Sampling rate of the data in Hz. unit: string Unit of the data. Raises ------ IndexError: Invalid channel requested ValueError: Invalid key requested. """ # load data: data = {} ext = os.path.splitext(filepath)[1] if ext in ('.pkl', '.pickle'): import pickle with open(filepath, 'rb') as f: data = pickle.load(f) elif ext == '.npz': data = np.load(filepath) elif ext == '.mat': from scipy.io import loadmat data = loadmat(filepath, squeeze_me=True) if verbose > 0: print( 'loaded %s' % filepath) # extract metadata: if not isinstance(samplekey, (list, tuple, np.ndarray)): samplekey = (samplekey,) if not isinstance(timekey, (list, tuple, np.ndarray)): timekey = (timekey,) samplerate = 0.0 for skey in samplekey: if skey in data: samplerate = float(data[skey]) break if samplerate == 0.0: for tkey in timekey: if tkey in data: samplerate = 1.0/(data[tkey][1] - data[tkey][0]) break if samplerate == 0.0: raise ValueError('invalid keys %s and %s for requesting sampling rate or sampling times' % (', '.join(samplekey), ', '.join(timekey))) unit = '' if unitkey in data: unit = data[unitkey] elif unitkey != 'unit': unit = unitkey unit = str(unit) # get data array: raw_data = np.array([]) if datakey: # try data keys: if not isinstance(datakey, (list, tuple, np.ndarray)): datakey = (datakey,) for dkey in datakey: if dkey in data: raw_data = data[dkey] break if np.prod(raw_data.shape) == 0: raise ValueError('invalid key(s) %s for requesting data' % ', '.join(datakey)) else: # find largest 2D array: for d in data: if hasattr(data[d], 'shape'): if 1 <= len(data[d].shape) <= 2 and \ np.prod(data[d].shape) > np.prod(raw_data.shape): raw_data = data[d] if np.prod(raw_data.shape) == 0: raise ValueError('no data found') # make 2D: if len(raw_data.shape) == 1: raw_data = raw_data.reshape(-1, 1) # transpose if necessary: if np.argmax(raw_data.shape) > 0: raw_data = raw_data.T if channel >= 0: if channel >= raw_data.shape[1]: raise IndexError('invalid channel number %d requested' % channel) raw_data = raw_data[:,channel] return raw_data.astype(float), samplerate, unit
def load_data(filepath, channel=-1, verbose=0, **kwargs)
-
Load time-series data from a file of arbitrary format.
Parameters
filepath
:string
orlist
ofstrings
- The full path and name of the file to load. For some file formats several files can be provided in a list.
channel
:int
- The data channel. If negative all channels are selected.
verbose
:int
- If > 0 show detailed error/warning messages.
**kwargs
:dict
- Further keyword arguments that are passed on to the format specific loading functions.
Returns
data
:1-D
or2-D array
- If
channel
is negative, a 2-D array with data of all channels is returned, where first dimension is time and second dimension is channel number. Otherwise an 1-D array with the data of that channel is returned. samplerate
:float
- Sampling rate of the data in Hz.
unit
:string
- Unit of the data.
Raises
Valueerror
Input argument
filepath
is empty string or list.Indexerror
Invalid channel requested.
Expand source code
def load_data(filepath, channel=-1, verbose=0, **kwargs): """Load time-series data from a file of arbitrary format. Parameters ---------- filepath: string or list of strings The full path and name of the file to load. For some file formats several files can be provided in a list. channel: int The data channel. If negative all channels are selected. verbose: int If > 0 show detailed error/warning messages. **kwargs: dict Further keyword arguments that are passed on to the format specific loading functions. Returns ------- data: 1-D or 2-D array If `channel` is negative, a 2-D array with data of all channels is returned, where first dimension is time and second dimension is channel number. Otherwise an 1-D array with the data of that channel is returned. samplerate: float Sampling rate of the data in Hz. unit: string Unit of the data. Raises ------ ValueError: Input argument `filepath` is empty string or list. IndexError: Invalid channel requested. """ # check values: data = np.array([]) samplerate = 0.0 unit = '' if len(filepath) == 0: raise ValueError('input argument filepath is empty string or list.') # load data: if check_relacs(filepath): return load_relacs(filepath, channel, verbose) elif check_fishgrid(filepath): return load_fishgrid(filepath, channel, verbose) else: if isinstance(filepath, (list, tuple, np.ndarray)): filepath = filepath[0] if check_container(filepath): return load_container(filepath, channel, verbose=verbose, **kwargs) else: data, samplerate = load_audio(filepath, verbose) if channel >= 0: if channel >= data.shape[1]: raise IndexError('invalid channel number %d requested' % channel) data = data[:, channel] unit = 'a.u.' return data, samplerate, unit
def load_fishgrid(filepathes, channel=-1, verbose=0)
-
Load traces (traces-grid*.raw files) that have been recorded with fishgrid (https://github.com/bendalab/fishgrid).
Parameters
filepathes
:string
orlist
ofstring
- Path to a fishgrid data directory, a fishgrid.cfg file, or fidhgrid traces-grid*.raw files.
- channel: int
- The data channel. If negative all channels are selected.
verbose
:int
- If > 0 show detailed error/warning messages.
Returns
data
:1-D
or2-D array
- If
channel
is negative or more than one trace file is specified, a 2-D array with data of all channels is returned, where first dimension is time and second dimension is channel number. Otherwise an 1-D array with the data of that channel is returned. samplerate
:float
- Sampling rate of the data in Hz.
unit
:string
- Unit of the data.
Expand source code
def load_fishgrid(filepathes, channel=-1, verbose=0): """Load traces (traces-grid*.raw files) that have been recorded with fishgrid (https://github.com/bendalab/fishgrid). Parameters ---------- filepathes: string or list of string Path to a fishgrid data directory, a fishgrid.cfg file, or fidhgrid traces-grid*.raw files. channel: int The data channel. If negative all channels are selected. verbose: int If > 0 show detailed error/warning messages. Returns ------- data: 1-D or 2-D array If `channel` is negative or more than one trace file is specified, a 2-D array with data of all channels is returned, where first dimension is time and second dimension is channel number. Otherwise an 1-D array with the data of that channel is returned. samplerate: float Sampling rate of the data in Hz. unit: string Unit of the data. """ if not isinstance(filepathes, (list, tuple, np.ndarray)): filepathes = [filepathes] grids = fishgrid_grids(filepathes[0]) grid_sizes = [r*c for r,c in grids] filepathes = fishgrid_files(filepathes, channel, grid_sizes) if len(filepathes) > 1: channel = -1 # load traces-grid*.raw files: grid_channels = [] nchannels = 0 for path in filepathes: g = int(os.path.basename(path)[11:].replace('.raw', '')) - 1 grid_channels.append(grid_sizes[g]) nchannels += grid_sizes[g] data = None nrows = 0 n = 0 samplerate = None if len(filepathes) > 0: samplerate = fishgrid_samplerate(filepathes[0]) unit = "V" for path, channels in zip(filepathes, grid_channels): x = np.fromfile(path, np.float32).reshape((-1, channels)) if verbose > 0: print( 'loaded %s' % path) if data is None: nrows = len(x)-2 data = np.empty((nrows, nchannels)) data[:,n:n+channels] = x[:nrows,:] if channel < 0: return data, samplerate, unit else: gs = 0 for s in grid_sizes: if channel < gs + s: break gs += s return data[:, channel-gs], samplerate, unit
def load_relacs(filepathes, channel=-1, verbose=0)
-
Load traces (trace-*.raw files) that have been recorded with relacs (www.relacs.net).
Parameters
filepathes
:string
orlist
ofstrings
- Path to a relacs data directory, a file in a relacs data directory, or relacs trace-*.raw files.
channel
:int
- The data channel. If negative all channels are selected.
verbose
:int
- if > 0 show detailed error/warning messages
Returns
data
:1-D
or2-D array
- If
channel
is negative or more than one trace file is specified, a 2-D array with data of all channels is returned, where first dimension is time and second dimension is channel number. Otherwise an 1-D array with the data of that channel is returned. samplerate
:float
- Sampling rate of the data in Hz
unit
:string
- Unit of the data
Raises
Valueerror
- Invalid name for relacs trace-*.raw file.
- Sampling rates of traces differ.
- Unit of traces differ.
Expand source code
def load_relacs(filepathes, channel=-1, verbose=0): """Load traces (trace-*.raw files) that have been recorded with relacs (www.relacs.net). Parameters ---------- filepathes: string or list of strings Path to a relacs data directory, a file in a relacs data directory, or relacs trace-*.raw files. channel: int The data channel. If negative all channels are selected. verbose: int if > 0 show detailed error/warning messages Returns ------- data: 1-D or 2-D array If `channel` is negative or more than one trace file is specified, a 2-D array with data of all channels is returned, where first dimension is time and second dimension is channel number. Otherwise an 1-D array with the data of that channel is returned. samplerate: float Sampling rate of the data in Hz unit: string Unit of the data Raises ------ ValueError: - Invalid name for relacs trace-*.raw file. - Sampling rates of traces differ. - Unit of traces differ. """ filepathes = relacs_files(filepathes, channel) if len(filepathes) > 1: channel = -1 # load trace*.raw files: nchannels = len(filepathes) data = None nrows = 0 samplerate = None unit = "" for n, path in enumerate(filepathes): x = np.fromfile(path, np.float32) if verbose > 0: print( 'loaded %s' % path) if data is None: nrows = len(x)-2 data = np.empty((nrows, nchannels)) data[:,n] = x[:nrows] # retrieve sampling rate and unit: rate, us = relacs_samplerate_unit(path) if samplerate is None: samplerate = rate elif rate != samplerate: raise ValueError('sampling rates of traces differ') if len(unit) == 0: unit = us elif us != unit: raise ValueError('unit of traces differ') if channel < 0: return data, samplerate, unit else: return data[:, 0], samplerate, unit
def main(cargs)
-
Call demo with command line arguments.
Parameters
cargs
:list
ofstrings
- Command line arguments as provided by sys.argv[1:]
Expand source code
def main(cargs): """Call demo with command line arguments. Parameters ---------- cargs: list of strings Command line arguments as provided by sys.argv[1:] """ import argparse parser = argparse.ArgumentParser(description= 'Checking thunderfish.dataloader module.') parser.add_argument('-p', dest='plot', action='store_true', help='plot loaded data') parser.add_argument('-c', dest='channel', default=-1, type=int, help='channel to be loaded') parser.add_argument('file', nargs=1, default='', type=str, help='name of data file') args = parser.parse_args(cargs) demo(args.file[0], args.plot, args.channel)
def relacs_files(filepathes, channel)
-
Expand file pathes for relacs data to appropriate trace*.raw file names.
Parameters
filepathes
:string
orlist
ofstrings
- Path to a relacs data directory, a file in a relacs data directory, or relacs trace-*.raw files.
channel
:int
- The data channel. If negative all channels are selected.
Returns
filepathes
:list
ofstrings
- List of relacs trace*.raw files.
Raises
ValueError
:invalid name
ofrelacs trace file
Expand source code
def relacs_files(filepathes, channel): """Expand file pathes for relacs data to appropriate trace*.raw file names. Parameters ---------- filepathes: string or list of strings Path to a relacs data directory, a file in a relacs data directory, or relacs trace-*.raw files. channel: int The data channel. If negative all channels are selected. Returns ------- filepathes: list of strings List of relacs trace*.raw files. Raises ------ ValueError: invalid name of relacs trace file """ if not isinstance(filepathes, (list, tuple, np.ndarray)): filepathes = [filepathes] if len(filepathes) == 1: if os.path.isdir(filepathes[0]): if channel < 0: relacs_dir = filepathes[0] filepathes = [] for k in range(10000): file = os.path.join(relacs_dir, 'trace-%d.raw'%(k+1)) if os.path.isfile(file): filepathes.append(file) else: break else: filepathes[0] = os.path.join(filepathes[0], 'trace-%d.raw' % (channel+1)) else: bn = os.path.basename(filepathes[0]) if len(bn) <= 5 or bn[0:5] != 'trace' or bn[-4:] != '.raw': if channel < 0: relacs_dir = os.path.dirname(filepathes[0]) filepathes = [] for k in range(10000): file = os.path.join(relacs_dir, 'trace-%d.raw'%(k+1)) if os.path.isfile(file): filepathes.append(file) else: break else: filepathes[0] = os.path.join(os.path.dirname(filepathes[0]), 'trace-%d.raw' % (channel+1)) for path in filepathes: bn = os.path.basename(path) if len(bn) <= 5 or bn[0:5] != 'trace' or bn[-4:] != '.raw': raise ValueError('invalid name %s of relacs trace file', path) return filepathes
def relacs_metadata(filepath)
-
Reads header of a relacs *.dat file.
Parameters
filepath
:string
- A relacs *.dat file.
Returns
data
:dict
- Dictionary with key-value pairs of the file header.
Raises
IOError/FileNotFoundError:
- If
filepath
cannot be opened.
Expand source code
def relacs_metadata(filepath): """Reads header of a relacs *.dat file. Parameters ---------- filepath: string A relacs *.dat file. Returns ------- data: dict Dictionary with key-value pairs of the file header. Raises ------ IOError/FileNotFoundError: If `filepath` cannot be opened. """ data = {} with open(filepath, 'r') as sf: for line in sf: if len(line) == 0 or line[0] != '#': break words = line.split(':') if len(words) >= 2: key = words[0].strip('# ') value = ':'.join(words[1:]).strip() data[key] = value return data
def relacs_samplerate_unit(filepath, channel=0)
-
Retrieve sampling rate and unit from a relacs stimuli.dat file.
Parameters
filepath
:string
- Path to a relacs data directory, a file in a relacs data directory, or a relacs trace-*.raw file.
channel
:int
- Channel (trace) number, if
filepath
does not specify a trace-*.raw file.
Returns
samplerate
:float
- Sampling rate in Hertz
unit
:string
- Unit of the trace, can be empty if not found
Raises
IOError/FileNotFoundError:
- If the stimuli.dat file does not exist.
Valueerror
stimuli.dat file does not contain sampling rate.
Expand source code
def relacs_samplerate_unit(filepath, channel=0): """Retrieve sampling rate and unit from a relacs stimuli.dat file. Parameters ---------- filepath: string Path to a relacs data directory, a file in a relacs data directory, or a relacs trace-*.raw file. channel: int Channel (trace) number, if `filepath` does not specify a trace-*.raw file. Returns ------- samplerate: float Sampling rate in Hertz unit: string Unit of the trace, can be empty if not found Raises ------ IOError/FileNotFoundError: If the stimuli.dat file does not exist. ValueError: stimuli.dat file does not contain sampling rate. """ trace = channel+1 relacs_dir = filepath # check for relacs data directory: if not os.path.isdir(filepath): relacs_dir = os.path.dirname(filepath) bn = os.path.basename(filepath).lower() if (len(bn) > 5 and bn[0:5] == 'trace' and bn[-4:] == '.raw'): trace = int(bn[6:].replace('.raw', '')) # retreive sampling rate and unit from stimuli.dat file: samplerate = None unit = "" stimuli_file = os.path.join(relacs_dir, 'stimuli.dat') with open(stimuli_file, 'r') as sf: for line in sf: if len(line) == 0 or line[0] != '#': break if "unit%d" % trace in line: unit = line.split(':')[1].strip() if "sampling rate%d" % trace in line: value = line.split(':')[1].strip() samplerate = float(value.replace('Hz','')) if samplerate is not None: return samplerate, unit raise ValueError('could not retrieve sampling rate from ' + stimuli_file)
Classes
class DataLoader (filepath=None, channel=-1, buffersize=10.0, backsize=0.0, verbose=0)
-
Buffered reading of time-series data for random access of the data in the file.
This allows for reading very large data files that do not fit into memory. An
DataLoader
instance can be used like a huge read-only numpy array, i.e.data = DataLoader('path/to/data/file.dat') x = data[10000:20000,0]
The first index specifies the frame, the second one the channel.
DataLoader
first determines the format of the data file and then opens the file (first line). It then reads data from the file as necessary for the requested data (second line).Supported file formats are relacs trace.raw files (www.relacs.net), fishgrid traces-.raw files, and audio files via
audioio.AudioLoader
.Reading sequentially through the file is always possible. If previous data are requested, then the file is read from the beginning. This might slow down access to previous data considerably. Use the
backsize
argument to the open functions to make sure some data are loaded before the requested frame. Then a subsequent access to the data within backsize seconds before that frame can still be handled without the need to reread the file from the beginning.Usage:
import thunderfish.dataloader as dl with dl.open_data(filepath, -1, 60.0, 10.0) as data: # do something with the content of the file: x = data[0:10000,0] y = data[10000:20000,0] z = x + y
Normal open and close:
data = dl.DataLoader(filepath, 0, 60.0) x = data[:] # read the whole file data.close()
that is the same as:
data = dl.DataLoader() data.open(filepath, 0, 60.0)
Member variables:
samplerate (float): the sampling rate of the data in Hertz. channels (int): the number of channels that are read in. channel (int): the channel of which the trace is returned. If negative, all channels are returned. frames (int): the number of frames in the file. shape (tuple): number of frames and channels of the data. unit (string): unit of the data.
Some member functions:
len(): the number of frames open(): open a data file. open_*(): open a data file of a specific format. close(): close the file.
Initialize the DataLoader instance. If filepath is not None open the file.
Parameters
filepath
:string
- Name of the file.
channel
:int
- The single channel to be worked on.
buffersize
:float
- Size of internal buffer in seconds.
backsize
:float
- Part of the buffer to be loaded before the requested start index in seconds.
verbose
:int
- If > 0 show detailed error/warning messages.
Expand source code
class DataLoader(AudioLoader): """Buffered reading of time-series data for random access of the data in the file. This allows for reading very large data files that do not fit into memory. An `DataLoader` instance can be used like a huge read-only numpy array, i.e. ``` data = DataLoader('path/to/data/file.dat') x = data[10000:20000,0] ``` The first index specifies the frame, the second one the channel. `DataLoader` first determines the format of the data file and then opens the file (first line). It then reads data from the file as necessary for the requested data (second line). Supported file formats are relacs trace*.raw files (www.relacs.net), fishgrid traces-*.raw files, and audio files via `audioio.AudioLoader`. Reading sequentially through the file is always possible. If previous data are requested, then the file is read from the beginning. This might slow down access to previous data considerably. Use the `backsize` argument to the open functions to make sure some data are loaded before the requested frame. Then a subsequent access to the data within backsize seconds before that frame can still be handled without the need to reread the file from the beginning. Usage: ------ ``` import thunderfish.dataloader as dl with dl.open_data(filepath, -1, 60.0, 10.0) as data: # do something with the content of the file: x = data[0:10000,0] y = data[10000:20000,0] z = x + y ``` Normal open and close: ``` data = dl.DataLoader(filepath, 0, 60.0) x = data[:] # read the whole file data.close() ``` that is the same as: ``` data = dl.DataLoader() data.open(filepath, 0, 60.0) ``` Member variables: ----------------- samplerate (float): the sampling rate of the data in Hertz. channels (int): the number of channels that are read in. channel (int): the channel of which the trace is returned. If negative, all channels are returned. frames (int): the number of frames in the file. shape (tuple): number of frames and channels of the data. unit (string): unit of the data. Some member functions: ---------------------- len(): the number of frames open(): open a data file. open_*(): open a data file of a specific format. close(): close the file. """ def __init__(self, filepath=None, channel=-1, buffersize=10.0, backsize=0.0, verbose=0): """Initialize the DataLoader instance. If filepath is not None open the file. Parameters ---------- filepath: string Name of the file. channel: int The single channel to be worked on. buffersize: float Size of internal buffer in seconds. backsize: float Part of the buffer to be loaded before the requested start index in seconds. verbose: int If > 0 show detailed error/warning messages. """ super(DataLoader, self).__init__(None, buffersize, backsize, verbose) if filepath is not None: self.open(filepath, channel, buffersize, backsize, verbose) def __getitem__(self, key): if self.channel >= 0: if type(key) is tuple: raise IndexError return super(DataLoader, self).__getitem__((key, self.channel)) else: return super(DataLoader, self).__getitem__(key) def __next__(self): if self.channel >= 0: return super(DataLoader, self).__next__()[self.channel] else: return super(DataLoader, self).__next__() # relacs interface: def open_relacs(self, filepathes, channel=-1, buffersize=10.0, backsize=0.0, verbose=0): """Open relacs data files (www.relacs.net) for reading. Parameters ---------- filepathes: string or list of string Path to a relacs data directory, a relacs stimuli.dat file, a relacs info.dat file, or relacs trace-*.raw files. channel: int The requested data channel. If negative all channels are selected. buffersize: float Size of internal buffer in seconds. backsize: float Part of the buffer to be loaded before the requested start index in seconds. verbose: int If > 0 show detailed error/warning messages. """ self.verbose = verbose if self.sf is not None: self._close_relacs() filepathes = relacs_files(filepathes, channel) # open trace files: self.sf = [] self.frames = None self.samplerate = None self.unit = "" for path in filepathes: file = open(path, 'rb') self.sf.append(file) if verbose > 0: print( 'opened %s' % path) # file size: file.seek(0, os.SEEK_END) frames = file.tell()//4 if self.frames is None: self.frames = frames elif self.frames != frames: diff = self.frames - frames if diff > 1 or diff < -2: raise ValueError('number of frames of traces differ') elif diff >= 0: self.frames = frames file.seek(0) # retrieve sampling rate and unit: rate, us = relacs_samplerate_unit(path) if self.samplerate is None: self.samplerate = rate elif rate != self.samplerate: raise ValueError('sampling rates of traces differ') if len(self.unit) == 0: self.unit = us elif us != self.unit: raise ValueError('unit of traces differ') self.channels = len(self.sf) self.channel = channel if self.channel >= 0: self.shape = (self.frames,) else: self.shape = (self.frames, self.channels) self.buffersize = int(buffersize*self.samplerate) self.backsize = int(backsize*self.samplerate) self._init_buffer() self.offset = 0 self.close = self._close_relacs self._update_buffer = self._update_buffer_relacs return self def _close_relacs(self): """Close the relacs data files. """ if self.sf is not None: for file in self.sf: file.close() self.sf = None def _update_buffer_relacs(self, start, stop): """Make sure that the buffer contains the data between start and stop for relacs files. """ if start < self.offset or stop > self.offset + self.buffer.shape[0]: offset, size = self._read_indices(start, stop) r_offset, r_size = self._recycle_buffer(offset, size) # read buffer: for i, file in enumerate(self.sf): file.seek(r_offset*4) buffer = file.read(r_size*4) self.buffer[r_offset-offset:r_offset+r_size-offset, i] = np.fromstring(buffer, dtype=np.float32) self.offset = offset if self.verbose > 1: print(' read %6d frames at %d' % (r_size, r_offset)) if self.verbose > 0: print(' loaded %d frames from %d up to %d' % (self.buffer.shape[0], self.offset, self.offset+self.buffer.shape[0])) # fishgrid interface: def open_fishgrid(self, filepathes, channel=-1, buffersize=10.0, backsize=0.0, verbose=0): """Open fishgrid data files (https://github.com/bendalab/fishgrid) for reading. Parameters ---------- filepathes: string or list of string Path to a fishgrid data directory, a fishgrid.cfg file, or fishgrid trace-*.raw files. channel: int The requested data channel. If negative all channels are selected. buffersize: float Size of internal buffer in seconds. backsize: float Part of the buffer to be loaded before the requested start index in seconds. verbose: int If > 0 show detailed error/warning messages. """ self.verbose = verbose if self.sf is not None: self._close_fishgrid() if not isinstance(filepathes, (list, tuple, np.ndarray)): filepathes = [filepathes] grids = fishgrid_grids(filepathes[0]) grid_sizes = [r*c for r,c in grids] filepathes = fishgrid_files(filepathes, channel, grid_sizes) # open grid files: self.channels = 0 for path in filepathes: g = int(os.path.basename(path)[11:].replace('.raw', '')) - 1 self.channels += grid_sizes[g] self.sf = [] self.grid_channels = [] self.grid_offs = [] offs = 0 self.frames = None self.samplerate = None if len(filepathes) > 0: self.samplerate = fishgrid_samplerate(filepathes[0]) self.unit = "V" for path in filepathes: file = open(path, 'rb') self.sf.append(file) if verbose > 0: print( 'opened %s' % path) # grid channels: g = int(os.path.basename(path)[11:].replace('.raw', '')) - 1 self.grid_channels.append(grid_sizes[g]) self.grid_offs.append(offs) offs += grid_sizes[g] # file size: file.seek(0, os.SEEK_END) frames = file.tell()//4//grid_sizes[g] if self.frames is None: self.frames = frames elif self.frames != frames: diff = self.frames - frames if diff > 1 or diff < -2: raise ValueError('number of frames of traces differ') elif diff >= 0: self.frames = frames file.seek(0) gs = 0 for s in grid_sizes: if channel < gs + s: break gs += s self.channel = channel - gs if self.channel >= 0: self.shape = (self.frames,) else: self.shape = (self.frames, self.channels) self.buffersize = int(buffersize*self.samplerate) self.backsize = int(backsize*self.samplerate) self._init_buffer() self.offset = 0 self.close = self._close_fishgrid self._update_buffer = self._update_buffer_fishgrid return self def _close_fishgrid(self): """Close the fishgrid data files. """ if self.sf is not None: for file in self.sf: file.close() self.sf = None def _update_buffer_fishgrid(self, start, stop): """Make sure that the buffer contains the data between start and stop for fishgrid files. """ if start < self.offset or stop > self.offset + self.buffer.shape[0]: offset, size = self._read_indices(start, stop) r_offset, r_size = self._recycle_buffer(offset, size) # read buffer: for file, gchannels, goffset in zip(self.sf, self.grid_channels, self.grid_offs): file.seek(r_offset*4*gchannels) buffer = file.read(r_size*4*gchannels) self.buffer[r_offset-offset:r_offset+r_size-offset, goffset:goffset+gchannels] = np.fromstring(buffer, dtype=np.float32).reshape((-1, gchannels)) self.offset = offset if self.verbose > 1: print(' read %6d frames at %d' % (r_size, r_offset)) if self.verbose > 0: print(' loaded %d frames from %d up to %d' % (self.buffer.shape[0], self.offset, self.offset+self.buffer.shape[0])) def open(self, filepath, channel=0, buffersize=10.0, backsize=0.0, verbose=0): """Open file with time-series data for reading. Parameters ---------- filepath: string or list of string Path to a data files or directory. channel: int The requested data channel. If negative all channels are selected. buffersize: float Size of internal buffer in seconds. backsize: float Part of the buffer to be loaded before the requested start index in seconds. verbose: int If > 0 show detailed error/warning messages. """ if check_relacs(filepath): self.open_relacs(filepath, channel, buffersize, backsize, verbose) elif check_fishgrid(filepath): self.open_fishgrid(filepath, channel, buffersize, backsize, verbose) else: if isinstance(filepath, (list, tuple, np.ndarray)): filepath = filepath[0] if check_container(filepath): raise ValueError('file format not supported') super(DataLoader, self).open(filepath, buffersize, backsize, verbose) if channel > self.channels: raise IndexError('invalid channel number %d' % channel) self.channel = channel if self.channel >= 0: self.shape = (self.frames,) else: self.shape = (self.frames, self.channels) self.unit = 'a.u.' return self
Ancestors
- audioio.audioloader.AudioLoader
Methods
def open(self, filepath, channel=0, buffersize=10.0, backsize=0.0, verbose=0)
-
Open file with time-series data for reading.
Parameters
filepath
:string
orlist
ofstring
- Path to a data files or directory.
channel
:int
- The requested data channel. If negative all channels are selected.
buffersize
:float
- Size of internal buffer in seconds.
backsize
:float
- Part of the buffer to be loaded before the requested start index in seconds.
verbose
:int
- If > 0 show detailed error/warning messages.
Expand source code
def open(self, filepath, channel=0, buffersize=10.0, backsize=0.0, verbose=0): """Open file with time-series data for reading. Parameters ---------- filepath: string or list of string Path to a data files or directory. channel: int The requested data channel. If negative all channels are selected. buffersize: float Size of internal buffer in seconds. backsize: float Part of the buffer to be loaded before the requested start index in seconds. verbose: int If > 0 show detailed error/warning messages. """ if check_relacs(filepath): self.open_relacs(filepath, channel, buffersize, backsize, verbose) elif check_fishgrid(filepath): self.open_fishgrid(filepath, channel, buffersize, backsize, verbose) else: if isinstance(filepath, (list, tuple, np.ndarray)): filepath = filepath[0] if check_container(filepath): raise ValueError('file format not supported') super(DataLoader, self).open(filepath, buffersize, backsize, verbose) if channel > self.channels: raise IndexError('invalid channel number %d' % channel) self.channel = channel if self.channel >= 0: self.shape = (self.frames,) else: self.shape = (self.frames, self.channels) self.unit = 'a.u.' return self
def open_fishgrid(self, filepathes, channel=-1, buffersize=10.0, backsize=0.0, verbose=0)
-
Open fishgrid data files (https://github.com/bendalab/fishgrid) for reading.
Parameters
filepathes
:string
orlist
ofstring
- Path to a fishgrid data directory, a fishgrid.cfg file, or fishgrid trace-*.raw files.
channel
:int
- The requested data channel. If negative all channels are selected.
buffersize
:float
- Size of internal buffer in seconds.
backsize
:float
- Part of the buffer to be loaded before the requested start index in seconds.
verbose
:int
- If > 0 show detailed error/warning messages.
Expand source code
def open_fishgrid(self, filepathes, channel=-1, buffersize=10.0, backsize=0.0, verbose=0): """Open fishgrid data files (https://github.com/bendalab/fishgrid) for reading. Parameters ---------- filepathes: string or list of string Path to a fishgrid data directory, a fishgrid.cfg file, or fishgrid trace-*.raw files. channel: int The requested data channel. If negative all channels are selected. buffersize: float Size of internal buffer in seconds. backsize: float Part of the buffer to be loaded before the requested start index in seconds. verbose: int If > 0 show detailed error/warning messages. """ self.verbose = verbose if self.sf is not None: self._close_fishgrid() if not isinstance(filepathes, (list, tuple, np.ndarray)): filepathes = [filepathes] grids = fishgrid_grids(filepathes[0]) grid_sizes = [r*c for r,c in grids] filepathes = fishgrid_files(filepathes, channel, grid_sizes) # open grid files: self.channels = 0 for path in filepathes: g = int(os.path.basename(path)[11:].replace('.raw', '')) - 1 self.channels += grid_sizes[g] self.sf = [] self.grid_channels = [] self.grid_offs = [] offs = 0 self.frames = None self.samplerate = None if len(filepathes) > 0: self.samplerate = fishgrid_samplerate(filepathes[0]) self.unit = "V" for path in filepathes: file = open(path, 'rb') self.sf.append(file) if verbose > 0: print( 'opened %s' % path) # grid channels: g = int(os.path.basename(path)[11:].replace('.raw', '')) - 1 self.grid_channels.append(grid_sizes[g]) self.grid_offs.append(offs) offs += grid_sizes[g] # file size: file.seek(0, os.SEEK_END) frames = file.tell()//4//grid_sizes[g] if self.frames is None: self.frames = frames elif self.frames != frames: diff = self.frames - frames if diff > 1 or diff < -2: raise ValueError('number of frames of traces differ') elif diff >= 0: self.frames = frames file.seek(0) gs = 0 for s in grid_sizes: if channel < gs + s: break gs += s self.channel = channel - gs if self.channel >= 0: self.shape = (self.frames,) else: self.shape = (self.frames, self.channels) self.buffersize = int(buffersize*self.samplerate) self.backsize = int(backsize*self.samplerate) self._init_buffer() self.offset = 0 self.close = self._close_fishgrid self._update_buffer = self._update_buffer_fishgrid return self
def open_relacs(self, filepathes, channel=-1, buffersize=10.0, backsize=0.0, verbose=0)
-
Open relacs data files (www.relacs.net) for reading.
Parameters
filepathes
:string
orlist
ofstring
- Path to a relacs data directory, a relacs stimuli.dat file, a relacs info.dat file, or relacs trace-*.raw files.
channel
:int
- The requested data channel. If negative all channels are selected.
buffersize
:float
- Size of internal buffer in seconds.
backsize
:float
- Part of the buffer to be loaded before the requested start index in seconds.
verbose
:int
- If > 0 show detailed error/warning messages.
Expand source code
def open_relacs(self, filepathes, channel=-1, buffersize=10.0, backsize=0.0, verbose=0): """Open relacs data files (www.relacs.net) for reading. Parameters ---------- filepathes: string or list of string Path to a relacs data directory, a relacs stimuli.dat file, a relacs info.dat file, or relacs trace-*.raw files. channel: int The requested data channel. If negative all channels are selected. buffersize: float Size of internal buffer in seconds. backsize: float Part of the buffer to be loaded before the requested start index in seconds. verbose: int If > 0 show detailed error/warning messages. """ self.verbose = verbose if self.sf is not None: self._close_relacs() filepathes = relacs_files(filepathes, channel) # open trace files: self.sf = [] self.frames = None self.samplerate = None self.unit = "" for path in filepathes: file = open(path, 'rb') self.sf.append(file) if verbose > 0: print( 'opened %s' % path) # file size: file.seek(0, os.SEEK_END) frames = file.tell()//4 if self.frames is None: self.frames = frames elif self.frames != frames: diff = self.frames - frames if diff > 1 or diff < -2: raise ValueError('number of frames of traces differ') elif diff >= 0: self.frames = frames file.seek(0) # retrieve sampling rate and unit: rate, us = relacs_samplerate_unit(path) if self.samplerate is None: self.samplerate = rate elif rate != self.samplerate: raise ValueError('sampling rates of traces differ') if len(self.unit) == 0: self.unit = us elif us != self.unit: raise ValueError('unit of traces differ') self.channels = len(self.sf) self.channel = channel if self.channel >= 0: self.shape = (self.frames,) else: self.shape = (self.frames, self.channels) self.buffersize = int(buffersize*self.samplerate) self.backsize = int(backsize*self.samplerate) self._init_buffer() self.offset = 0 self.close = self._close_relacs self._update_buffer = self._update_buffer_relacs return self
class open_data (filepath=None, channel=-1, buffersize=10.0, backsize=0.0, verbose=0)
-
Buffered reading of time-series data for random access of the data in the file.
This allows for reading very large data files that do not fit into memory. An
DataLoader
instance can be used like a huge read-only numpy array, i.e.data = DataLoader('path/to/data/file.dat') x = data[10000:20000,0]
The first index specifies the frame, the second one the channel.
DataLoader
first determines the format of the data file and then opens the file (first line). It then reads data from the file as necessary for the requested data (second line).Supported file formats are relacs trace.raw files (www.relacs.net), fishgrid traces-.raw files, and audio files via
audioio.AudioLoader
.Reading sequentially through the file is always possible. If previous data are requested, then the file is read from the beginning. This might slow down access to previous data considerably. Use the
backsize
argument to the open functions to make sure some data are loaded before the requested frame. Then a subsequent access to the data within backsize seconds before that frame can still be handled without the need to reread the file from the beginning.Usage:
import thunderfish.dataloader as dl with dl.open_data(filepath, -1, 60.0, 10.0) as data: # do something with the content of the file: x = data[0:10000,0] y = data[10000:20000,0] z = x + y
Normal open and close:
data = dl.DataLoader(filepath, 0, 60.0) x = data[:] # read the whole file data.close()
that is the same as:
data = dl.DataLoader() data.open(filepath, 0, 60.0)
Member variables:
samplerate (float): the sampling rate of the data in Hertz. channels (int): the number of channels that are read in. channel (int): the channel of which the trace is returned. If negative, all channels are returned. frames (int): the number of frames in the file. shape (tuple): number of frames and channels of the data. unit (string): unit of the data.
Some member functions:
len(): the number of frames open(): open a data file. open_*(): open a data file of a specific format. close(): close the file.
Initialize the DataLoader instance. If filepath is not None open the file.
Parameters
filepath
:string
- Name of the file.
channel
:int
- The single channel to be worked on.
buffersize
:float
- Size of internal buffer in seconds.
backsize
:float
- Part of the buffer to be loaded before the requested start index in seconds.
verbose
:int
- If > 0 show detailed error/warning messages.
Expand source code
class DataLoader(AudioLoader): """Buffered reading of time-series data for random access of the data in the file. This allows for reading very large data files that do not fit into memory. An `DataLoader` instance can be used like a huge read-only numpy array, i.e. ``` data = DataLoader('path/to/data/file.dat') x = data[10000:20000,0] ``` The first index specifies the frame, the second one the channel. `DataLoader` first determines the format of the data file and then opens the file (first line). It then reads data from the file as necessary for the requested data (second line). Supported file formats are relacs trace*.raw files (www.relacs.net), fishgrid traces-*.raw files, and audio files via `audioio.AudioLoader`. Reading sequentially through the file is always possible. If previous data are requested, then the file is read from the beginning. This might slow down access to previous data considerably. Use the `backsize` argument to the open functions to make sure some data are loaded before the requested frame. Then a subsequent access to the data within backsize seconds before that frame can still be handled without the need to reread the file from the beginning. Usage: ------ ``` import thunderfish.dataloader as dl with dl.open_data(filepath, -1, 60.0, 10.0) as data: # do something with the content of the file: x = data[0:10000,0] y = data[10000:20000,0] z = x + y ``` Normal open and close: ``` data = dl.DataLoader(filepath, 0, 60.0) x = data[:] # read the whole file data.close() ``` that is the same as: ``` data = dl.DataLoader() data.open(filepath, 0, 60.0) ``` Member variables: ----------------- samplerate (float): the sampling rate of the data in Hertz. channels (int): the number of channels that are read in. channel (int): the channel of which the trace is returned. If negative, all channels are returned. frames (int): the number of frames in the file. shape (tuple): number of frames and channels of the data. unit (string): unit of the data. Some member functions: ---------------------- len(): the number of frames open(): open a data file. open_*(): open a data file of a specific format. close(): close the file. """ def __init__(self, filepath=None, channel=-1, buffersize=10.0, backsize=0.0, verbose=0): """Initialize the DataLoader instance. If filepath is not None open the file. Parameters ---------- filepath: string Name of the file. channel: int The single channel to be worked on. buffersize: float Size of internal buffer in seconds. backsize: float Part of the buffer to be loaded before the requested start index in seconds. verbose: int If > 0 show detailed error/warning messages. """ super(DataLoader, self).__init__(None, buffersize, backsize, verbose) if filepath is not None: self.open(filepath, channel, buffersize, backsize, verbose) def __getitem__(self, key): if self.channel >= 0: if type(key) is tuple: raise IndexError return super(DataLoader, self).__getitem__((key, self.channel)) else: return super(DataLoader, self).__getitem__(key) def __next__(self): if self.channel >= 0: return super(DataLoader, self).__next__()[self.channel] else: return super(DataLoader, self).__next__() # relacs interface: def open_relacs(self, filepathes, channel=-1, buffersize=10.0, backsize=0.0, verbose=0): """Open relacs data files (www.relacs.net) for reading. Parameters ---------- filepathes: string or list of string Path to a relacs data directory, a relacs stimuli.dat file, a relacs info.dat file, or relacs trace-*.raw files. channel: int The requested data channel. If negative all channels are selected. buffersize: float Size of internal buffer in seconds. backsize: float Part of the buffer to be loaded before the requested start index in seconds. verbose: int If > 0 show detailed error/warning messages. """ self.verbose = verbose if self.sf is not None: self._close_relacs() filepathes = relacs_files(filepathes, channel) # open trace files: self.sf = [] self.frames = None self.samplerate = None self.unit = "" for path in filepathes: file = open(path, 'rb') self.sf.append(file) if verbose > 0: print( 'opened %s' % path) # file size: file.seek(0, os.SEEK_END) frames = file.tell()//4 if self.frames is None: self.frames = frames elif self.frames != frames: diff = self.frames - frames if diff > 1 or diff < -2: raise ValueError('number of frames of traces differ') elif diff >= 0: self.frames = frames file.seek(0) # retrieve sampling rate and unit: rate, us = relacs_samplerate_unit(path) if self.samplerate is None: self.samplerate = rate elif rate != self.samplerate: raise ValueError('sampling rates of traces differ') if len(self.unit) == 0: self.unit = us elif us != self.unit: raise ValueError('unit of traces differ') self.channels = len(self.sf) self.channel = channel if self.channel >= 0: self.shape = (self.frames,) else: self.shape = (self.frames, self.channels) self.buffersize = int(buffersize*self.samplerate) self.backsize = int(backsize*self.samplerate) self._init_buffer() self.offset = 0 self.close = self._close_relacs self._update_buffer = self._update_buffer_relacs return self def _close_relacs(self): """Close the relacs data files. """ if self.sf is not None: for file in self.sf: file.close() self.sf = None def _update_buffer_relacs(self, start, stop): """Make sure that the buffer contains the data between start and stop for relacs files. """ if start < self.offset or stop > self.offset + self.buffer.shape[0]: offset, size = self._read_indices(start, stop) r_offset, r_size = self._recycle_buffer(offset, size) # read buffer: for i, file in enumerate(self.sf): file.seek(r_offset*4) buffer = file.read(r_size*4) self.buffer[r_offset-offset:r_offset+r_size-offset, i] = np.fromstring(buffer, dtype=np.float32) self.offset = offset if self.verbose > 1: print(' read %6d frames at %d' % (r_size, r_offset)) if self.verbose > 0: print(' loaded %d frames from %d up to %d' % (self.buffer.shape[0], self.offset, self.offset+self.buffer.shape[0])) # fishgrid interface: def open_fishgrid(self, filepathes, channel=-1, buffersize=10.0, backsize=0.0, verbose=0): """Open fishgrid data files (https://github.com/bendalab/fishgrid) for reading. Parameters ---------- filepathes: string or list of string Path to a fishgrid data directory, a fishgrid.cfg file, or fishgrid trace-*.raw files. channel: int The requested data channel. If negative all channels are selected. buffersize: float Size of internal buffer in seconds. backsize: float Part of the buffer to be loaded before the requested start index in seconds. verbose: int If > 0 show detailed error/warning messages. """ self.verbose = verbose if self.sf is not None: self._close_fishgrid() if not isinstance(filepathes, (list, tuple, np.ndarray)): filepathes = [filepathes] grids = fishgrid_grids(filepathes[0]) grid_sizes = [r*c for r,c in grids] filepathes = fishgrid_files(filepathes, channel, grid_sizes) # open grid files: self.channels = 0 for path in filepathes: g = int(os.path.basename(path)[11:].replace('.raw', '')) - 1 self.channels += grid_sizes[g] self.sf = [] self.grid_channels = [] self.grid_offs = [] offs = 0 self.frames = None self.samplerate = None if len(filepathes) > 0: self.samplerate = fishgrid_samplerate(filepathes[0]) self.unit = "V" for path in filepathes: file = open(path, 'rb') self.sf.append(file) if verbose > 0: print( 'opened %s' % path) # grid channels: g = int(os.path.basename(path)[11:].replace('.raw', '')) - 1 self.grid_channels.append(grid_sizes[g]) self.grid_offs.append(offs) offs += grid_sizes[g] # file size: file.seek(0, os.SEEK_END) frames = file.tell()//4//grid_sizes[g] if self.frames is None: self.frames = frames elif self.frames != frames: diff = self.frames - frames if diff > 1 or diff < -2: raise ValueError('number of frames of traces differ') elif diff >= 0: self.frames = frames file.seek(0) gs = 0 for s in grid_sizes: if channel < gs + s: break gs += s self.channel = channel - gs if self.channel >= 0: self.shape = (self.frames,) else: self.shape = (self.frames, self.channels) self.buffersize = int(buffersize*self.samplerate) self.backsize = int(backsize*self.samplerate) self._init_buffer() self.offset = 0 self.close = self._close_fishgrid self._update_buffer = self._update_buffer_fishgrid return self def _close_fishgrid(self): """Close the fishgrid data files. """ if self.sf is not None: for file in self.sf: file.close() self.sf = None def _update_buffer_fishgrid(self, start, stop): """Make sure that the buffer contains the data between start and stop for fishgrid files. """ if start < self.offset or stop > self.offset + self.buffer.shape[0]: offset, size = self._read_indices(start, stop) r_offset, r_size = self._recycle_buffer(offset, size) # read buffer: for file, gchannels, goffset in zip(self.sf, self.grid_channels, self.grid_offs): file.seek(r_offset*4*gchannels) buffer = file.read(r_size*4*gchannels) self.buffer[r_offset-offset:r_offset+r_size-offset, goffset:goffset+gchannels] = np.fromstring(buffer, dtype=np.float32).reshape((-1, gchannels)) self.offset = offset if self.verbose > 1: print(' read %6d frames at %d' % (r_size, r_offset)) if self.verbose > 0: print(' loaded %d frames from %d up to %d' % (self.buffer.shape[0], self.offset, self.offset+self.buffer.shape[0])) def open(self, filepath, channel=0, buffersize=10.0, backsize=0.0, verbose=0): """Open file with time-series data for reading. Parameters ---------- filepath: string or list of string Path to a data files or directory. channel: int The requested data channel. If negative all channels are selected. buffersize: float Size of internal buffer in seconds. backsize: float Part of the buffer to be loaded before the requested start index in seconds. verbose: int If > 0 show detailed error/warning messages. """ if check_relacs(filepath): self.open_relacs(filepath, channel, buffersize, backsize, verbose) elif check_fishgrid(filepath): self.open_fishgrid(filepath, channel, buffersize, backsize, verbose) else: if isinstance(filepath, (list, tuple, np.ndarray)): filepath = filepath[0] if check_container(filepath): raise ValueError('file format not supported') super(DataLoader, self).open(filepath, buffersize, backsize, verbose) if channel > self.channels: raise IndexError('invalid channel number %d' % channel) self.channel = channel if self.channel >= 0: self.shape = (self.frames,) else: self.shape = (self.frames, self.channels) self.unit = 'a.u.' return self
Ancestors
- audioio.audioloader.AudioLoader
Methods
def open(self, filepath, channel=0, buffersize=10.0, backsize=0.0, verbose=0)
-
Open file with time-series data for reading.
Parameters
filepath
:string
orlist
ofstring
- Path to a data files or directory.
channel
:int
- The requested data channel. If negative all channels are selected.
buffersize
:float
- Size of internal buffer in seconds.
backsize
:float
- Part of the buffer to be loaded before the requested start index in seconds.
verbose
:int
- If > 0 show detailed error/warning messages.
Expand source code
def open(self, filepath, channel=0, buffersize=10.0, backsize=0.0, verbose=0): """Open file with time-series data for reading. Parameters ---------- filepath: string or list of string Path to a data files or directory. channel: int The requested data channel. If negative all channels are selected. buffersize: float Size of internal buffer in seconds. backsize: float Part of the buffer to be loaded before the requested start index in seconds. verbose: int If > 0 show detailed error/warning messages. """ if check_relacs(filepath): self.open_relacs(filepath, channel, buffersize, backsize, verbose) elif check_fishgrid(filepath): self.open_fishgrid(filepath, channel, buffersize, backsize, verbose) else: if isinstance(filepath, (list, tuple, np.ndarray)): filepath = filepath[0] if check_container(filepath): raise ValueError('file format not supported') super(DataLoader, self).open(filepath, buffersize, backsize, verbose) if channel > self.channels: raise IndexError('invalid channel number %d' % channel) self.channel = channel if self.channel >= 0: self.shape = (self.frames,) else: self.shape = (self.frames, self.channels) self.unit = 'a.u.' return self
def open_fishgrid(self, filepathes, channel=-1, buffersize=10.0, backsize=0.0, verbose=0)
-
Open fishgrid data files (https://github.com/bendalab/fishgrid) for reading.
Parameters
filepathes
:string
orlist
ofstring
- Path to a fishgrid data directory, a fishgrid.cfg file, or fishgrid trace-*.raw files.
channel
:int
- The requested data channel. If negative all channels are selected.
buffersize
:float
- Size of internal buffer in seconds.
backsize
:float
- Part of the buffer to be loaded before the requested start index in seconds.
verbose
:int
- If > 0 show detailed error/warning messages.
Expand source code
def open_fishgrid(self, filepathes, channel=-1, buffersize=10.0, backsize=0.0, verbose=0): """Open fishgrid data files (https://github.com/bendalab/fishgrid) for reading. Parameters ---------- filepathes: string or list of string Path to a fishgrid data directory, a fishgrid.cfg file, or fishgrid trace-*.raw files. channel: int The requested data channel. If negative all channels are selected. buffersize: float Size of internal buffer in seconds. backsize: float Part of the buffer to be loaded before the requested start index in seconds. verbose: int If > 0 show detailed error/warning messages. """ self.verbose = verbose if self.sf is not None: self._close_fishgrid() if not isinstance(filepathes, (list, tuple, np.ndarray)): filepathes = [filepathes] grids = fishgrid_grids(filepathes[0]) grid_sizes = [r*c for r,c in grids] filepathes = fishgrid_files(filepathes, channel, grid_sizes) # open grid files: self.channels = 0 for path in filepathes: g = int(os.path.basename(path)[11:].replace('.raw', '')) - 1 self.channels += grid_sizes[g] self.sf = [] self.grid_channels = [] self.grid_offs = [] offs = 0 self.frames = None self.samplerate = None if len(filepathes) > 0: self.samplerate = fishgrid_samplerate(filepathes[0]) self.unit = "V" for path in filepathes: file = open(path, 'rb') self.sf.append(file) if verbose > 0: print( 'opened %s' % path) # grid channels: g = int(os.path.basename(path)[11:].replace('.raw', '')) - 1 self.grid_channels.append(grid_sizes[g]) self.grid_offs.append(offs) offs += grid_sizes[g] # file size: file.seek(0, os.SEEK_END) frames = file.tell()//4//grid_sizes[g] if self.frames is None: self.frames = frames elif self.frames != frames: diff = self.frames - frames if diff > 1 or diff < -2: raise ValueError('number of frames of traces differ') elif diff >= 0: self.frames = frames file.seek(0) gs = 0 for s in grid_sizes: if channel < gs + s: break gs += s self.channel = channel - gs if self.channel >= 0: self.shape = (self.frames,) else: self.shape = (self.frames, self.channels) self.buffersize = int(buffersize*self.samplerate) self.backsize = int(backsize*self.samplerate) self._init_buffer() self.offset = 0 self.close = self._close_fishgrid self._update_buffer = self._update_buffer_fishgrid return self
def open_relacs(self, filepathes, channel=-1, buffersize=10.0, backsize=0.0, verbose=0)
-
Open relacs data files (www.relacs.net) for reading.
Parameters
filepathes
:string
orlist
ofstring
- Path to a relacs data directory, a relacs stimuli.dat file, a relacs info.dat file, or relacs trace-*.raw files.
channel
:int
- The requested data channel. If negative all channels are selected.
buffersize
:float
- Size of internal buffer in seconds.
backsize
:float
- Part of the buffer to be loaded before the requested start index in seconds.
verbose
:int
- If > 0 show detailed error/warning messages.
Expand source code
def open_relacs(self, filepathes, channel=-1, buffersize=10.0, backsize=0.0, verbose=0): """Open relacs data files (www.relacs.net) for reading. Parameters ---------- filepathes: string or list of string Path to a relacs data directory, a relacs stimuli.dat file, a relacs info.dat file, or relacs trace-*.raw files. channel: int The requested data channel. If negative all channels are selected. buffersize: float Size of internal buffer in seconds. backsize: float Part of the buffer to be loaded before the requested start index in seconds. verbose: int If > 0 show detailed error/warning messages. """ self.verbose = verbose if self.sf is not None: self._close_relacs() filepathes = relacs_files(filepathes, channel) # open trace files: self.sf = [] self.frames = None self.samplerate = None self.unit = "" for path in filepathes: file = open(path, 'rb') self.sf.append(file) if verbose > 0: print( 'opened %s' % path) # file size: file.seek(0, os.SEEK_END) frames = file.tell()//4 if self.frames is None: self.frames = frames elif self.frames != frames: diff = self.frames - frames if diff > 1 or diff < -2: raise ValueError('number of frames of traces differ') elif diff >= 0: self.frames = frames file.seek(0) # retrieve sampling rate and unit: rate, us = relacs_samplerate_unit(path) if self.samplerate is None: self.samplerate = rate elif rate != self.samplerate: raise ValueError('sampling rates of traces differ') if len(self.unit) == 0: self.unit = us elif us != self.unit: raise ValueError('unit of traces differ') self.channels = len(self.sf) self.channel = channel if self.channel >= 0: self.shape = (self.frames,) else: self.shape = (self.frames, self.channels) self.buffersize = int(buffersize*self.samplerate) self.backsize = int(backsize*self.samplerate) self._init_buffer() self.offset = 0 self.close = self._close_relacs self._update_buffer = self._update_buffer_relacs return self