Source code for sefef.labeling

# -*- coding: utf-8 -*-
"""
sefef.labeling
--------------

This module contains functions to automatically label samples according to the desired pre-ictal duration and prediction latency.

:copyright: (c) 2024 by Ana Sofia Carmo
:license: BSD 3-clause License, see LICENSE for more details.
"""


# built-in
import warnings

# third-party
import numpy as np


[docs] def add_annotations(h5dataset, sz_onsets_ts, preictal_duration=3600, prediction_latency=600): ''' Add "annotations", with shape (#samples, ) and dtype "bool", to HDF5 file object according to the variables "preictal_duration" and "prediction_latency". Annotations are either 0 (inter-ictal), or 1 (pre-ictal). Parameters ---------- h5dataset : HDF5 file HDF5 file object with the following datasets: - "data": each entry corresponds to a sample with shape (embedding shape), e.g. (#features, ) or (sample duration, #channels). - "timestamps": contains the start timestamp (unix in seconds) of each sample in the "data" dataset, with shape (#samples, ). - "sz_onsets": contains the Unix timestamps of the onsets of seizures (#sz_onsets, ). (optional) sz_onsets_ts : array-like, shape (#sz onsets, ) Contains the unix timestamps (in seconds) of the onsets of seizures. preictal_duration : int, defaults to 3600 (60min) Duration of the period (in seconds) that will be labeled as preictal, i.e. that we expect to contain useful information for the forecast prediction_latency : int, defaults to 600 (10min) Latency (in seconds) of the preictal period with regards to seizure onset. Returns ------- None, but adds a dataset instance to the h5dataset file object. ''' if 'timestamps' not in h5dataset.keys(): raise KeyError('HDF5 file does not contain a "timestamps" dataset, which should contain the start timestamp (unix in seconds) of each sample in the "data" dataset, with shape (#samples, ).') if 'annotations' in h5dataset.keys(): warnings.warn('Dataset already contains annotations. Skipping this step.') return None timestamps = h5dataset['timestamps'][()] labels = np.zeros(timestamps.shape, dtype='bool') for sz_ts in sz_onsets_ts: labels[np.where(np.logical_and(timestamps >= sz_ts-(preictal_duration + prediction_latency), timestamps < sz_ts-prediction_latency))] = 1 h5dataset.create_dataset("annotations", data=labels, dtype='bool')
[docs] def add_sz_onsets(h5dataset, sz_onsets_ts): ''' Add "sz_onsets", with shape (#seizures, ) and dtype "int64", to HDF5 file object, corresponding to the Unix timestamps of each seizure onset. Parameters ---------- h5dataset : HDF5 file HDF5 file object with the following datasets: - "data": each entry corresponds to a sample with shape (embedding shape), e.g. (#features, ) or (sample duration, #channels). - "timestamps": contains the start timestamp (unix in seconds) of each sample in the "data" dataset, with shape (#samples, ). - "annotations": contains the annotations (aka labels) of each sample. (optional) sz_onsets_ts : array-like, shape (#sz onsets, ) Contains the unix timestamps (in seconds) of the onsts of seizures. Returns ------- None, but adds a dataset instance to the h5dataset file object. ''' if 'sz_onsets' in h5dataset.keys(): warnings.warn('Dataset already contains the onsets of seizures. Skipping this step.') return None h5dataset.create_dataset("sz_onsets", data=sz_onsets_ts, dtype='int64')