Module thunderfish.hopkinsloader

Load EODs from Hopkins files.

Carl Hopkins and John Sullivan stored only a few cut-out EOD waveforms of Mormyrid EODs in a specific mat file. These recordings are available at the Macaulay library.

Functions

Expand source code
"""Load EODs from Hopkins files.

Carl Hopkins and John Sullivan stored only a few cut-out EOD waveforms
of Mormyrid EODs in a specific mat file. These recordings are
available at the Macaulay library.

## Functions

- `load_hopkins()`: load a Hopkins file containing a few EOD pulses.
- `analyse_hopkins()`: analyze the content of Hopkins files.

"""

import sys
import numpy as np
import matplotlib.pyplot as plt
from scipy.io import loadmat
from audioio import print_metadata


def load_hopkins(file_path):
    """ Load a Hopkins file containing a few EOD pulses.

    Parameters
    ----------
    file_path: str
        The mat file with the data.

    Returns
    -------
    data: list of 2-D ndarrays
        A list of single EOD pulses.
        First column is time in seconds, second column EOD waveform.
    md: nested dict
        Metadata.

    Raises
    ------
    FileNotFoundError
        `file_path` does not exist.
    ValueError
        `file_path` is not a valid mat file.
    """
    x = loadmat(file_path, squeeze_me=True)
    if not 'eod' in x or not hasattr(x['eod'], 'size'):
        return [], {}
    y = x['eod'].reshape(x['eod'].size)
    if not 'wave' in y.dtype.names or not 'time' in y.dtype.names:
        return [], {}
    # assemble data:
    data = []
    for k in range(len(y['wave'])):
        eod = np.zeros((len(y['wave'][k]), 2))
        eod[:, 0] = y['time'][k]
        eod[:, 1] = (y['wave'][k]).astype(float)
        data.append(eod)
    # assemble metadata:
    md = {}
    eod_md = []
    for n in y.dtype.names:
        t = type(y[n][0])
        if not t is np.ndarray:
            # some metadata may or may not differ between EODs:
            for k in range(len(y[n])):
                if y[n][k] and n != 'eodnum' and y[n][k] != y[n][0]:
                    while len(eod_md) < len(y[n]):
                        eod_md.append({})
                    for k in range(len(y[n])):
                        v = y[n][k]
                        if isinstance(v, str):
                            v = v.replace('Date:', '')
                            v = v.replace('Time:', '')
                            v = v.replace('Time', '')
                            v = v.strip()
                        eod_md[k][n] = v
                    break
            else:
                v = y[n][0]
                if isinstance(v, str):
                    v = v.replace('Date:', '')
                    v = v.replace('Time:', '')
                    v = v.replace('Time', '')
                    v = v.replace(' T: ', 'T')
                    v = v.strip()
                md[n] = v
    for k in range(len(eod_md)):
        md[f'EOD{k}'] = eod_md[k]
    return data, md


def analyse_hopkins(pathes):
    """ Analyze the content of Hopkins files.

    Prints out some statistics about the field names and types.

    Parameters
    ----------
    pathes: list of str
        Files to be analyzed.
    """
    keys = {}
    types = {}
    data_types = {}
    for file_path in pathes:
        x = loadmat(file_path, squeeze_me=True)
        y = x['eod'].reshape(x['eod'].size)
        for n in y.dtype.names:
            c = keys.get(n, 0)
            keys[n] = c + 1
            t = type(y[n][0])
            c = types.get(t, 0)
            types[t] = c + 1
        t = y['wave'][0].dtype
        c = data_types.get(t, 0)
        data_types[t] = c + 1

    # each file contains several "wave" and "time" arrays for plotting the EODs.
    # within a file they might differ in size!

    # print all keys found in the data with their frequency:
    print('keys:')
    for k in keys:
        print(f'  {100*keys[k]/len(pathes):3.0f}%', k)
    print()

    # print all wave data types with their frequency:
    print('data types:')
    for t in data_types:
        print(f'  {data_types[t]:5d}', t)
    print()
    #  226 float64
    #   57 int16

    # print types of all fields with their frequency:
    print('field types:')
    for t in types:
        print(f'  {types[t]:5d}', t)
    print()
    # 2845 <class 'int'>
    # 4002 <class 'numpy.ndarray'>
    #14681 <class 'str'>
    # 1478 <class 'float'>


if __name__ == '__main__':

    analyse_hopkins(sys.argv[1:])

    for file_path in sys.argv[1:]:
        print(file_path)
        data, md = load_hopkins(file_path)
        print_metadata(md, '  ')
        fig, ax = plt.subplots()
        ax.set_title(md.get('speciesIDweb', ''))
        for k in range(len(data)):
            ax.plot(1000*data[k][:, 0], data[k][:, 1])
            ax.set_xlabel('Time [ms]')
        plt.show()
        print()

Functions

def load_hopkins(file_path)

Load a Hopkins file containing a few EOD pulses.

Parameters

file_path : str
The mat file with the data.

Returns

data : list of 2-D ndarrays
A list of single EOD pulses. First column is time in seconds, second column EOD waveform.
md : nested dict
Metadata.

Raises

FileNotFoundError
file_path does not exist.
ValueError
file_path is not a valid mat file.
Expand source code
def load_hopkins(file_path):
    """ Load a Hopkins file containing a few EOD pulses.

    Parameters
    ----------
    file_path: str
        The mat file with the data.

    Returns
    -------
    data: list of 2-D ndarrays
        A list of single EOD pulses.
        First column is time in seconds, second column EOD waveform.
    md: nested dict
        Metadata.

    Raises
    ------
    FileNotFoundError
        `file_path` does not exist.
    ValueError
        `file_path` is not a valid mat file.
    """
    x = loadmat(file_path, squeeze_me=True)
    if not 'eod' in x or not hasattr(x['eod'], 'size'):
        return [], {}
    y = x['eod'].reshape(x['eod'].size)
    if not 'wave' in y.dtype.names or not 'time' in y.dtype.names:
        return [], {}
    # assemble data:
    data = []
    for k in range(len(y['wave'])):
        eod = np.zeros((len(y['wave'][k]), 2))
        eod[:, 0] = y['time'][k]
        eod[:, 1] = (y['wave'][k]).astype(float)
        data.append(eod)
    # assemble metadata:
    md = {}
    eod_md = []
    for n in y.dtype.names:
        t = type(y[n][0])
        if not t is np.ndarray:
            # some metadata may or may not differ between EODs:
            for k in range(len(y[n])):
                if y[n][k] and n != 'eodnum' and y[n][k] != y[n][0]:
                    while len(eod_md) < len(y[n]):
                        eod_md.append({})
                    for k in range(len(y[n])):
                        v = y[n][k]
                        if isinstance(v, str):
                            v = v.replace('Date:', '')
                            v = v.replace('Time:', '')
                            v = v.replace('Time', '')
                            v = v.strip()
                        eod_md[k][n] = v
                    break
            else:
                v = y[n][0]
                if isinstance(v, str):
                    v = v.replace('Date:', '')
                    v = v.replace('Time:', '')
                    v = v.replace('Time', '')
                    v = v.replace(' T: ', 'T')
                    v = v.strip()
                md[n] = v
    for k in range(len(eod_md)):
        md[f'EOD{k}'] = eod_md[k]
    return data, md
def analyse_hopkins(pathes)

Analyze the content of Hopkins files.

Prints out some statistics about the field names and types.

Parameters

pathes : list of str
Files to be analyzed.
Expand source code
def analyse_hopkins(pathes):
    """ Analyze the content of Hopkins files.

    Prints out some statistics about the field names and types.

    Parameters
    ----------
    pathes: list of str
        Files to be analyzed.
    """
    keys = {}
    types = {}
    data_types = {}
    for file_path in pathes:
        x = loadmat(file_path, squeeze_me=True)
        y = x['eod'].reshape(x['eod'].size)
        for n in y.dtype.names:
            c = keys.get(n, 0)
            keys[n] = c + 1
            t = type(y[n][0])
            c = types.get(t, 0)
            types[t] = c + 1
        t = y['wave'][0].dtype
        c = data_types.get(t, 0)
        data_types[t] = c + 1

    # each file contains several "wave" and "time" arrays for plotting the EODs.
    # within a file they might differ in size!

    # print all keys found in the data with their frequency:
    print('keys:')
    for k in keys:
        print(f'  {100*keys[k]/len(pathes):3.0f}%', k)
    print()

    # print all wave data types with their frequency:
    print('data types:')
    for t in data_types:
        print(f'  {data_types[t]:5d}', t)
    print()
    #  226 float64
    #   57 int16

    # print types of all fields with their frequency:
    print('field types:')
    for t in types:
        print(f'  {types[t]:5d}', t)
    print()
    # 2845 <class 'int'>
    # 4002 <class 'numpy.ndarray'>
    #14681 <class 'str'>
    # 1478 <class 'float'>