Skip to content

Commit

Permalink
[ENH] remove aeon bound and Transformers notebook (#101)
Browse files Browse the repository at this point in the history
* remove aeon bound

* notebook

* notebook

* notebook

* notebook

* tests

* notebook

* import

* rocket

* tests

* tests

* tests

* tests

* formatting

* notebook

* notebook

* notebook

* BP test
  • Loading branch information
TonyBagnall authored Dec 16, 2024
1 parent effc961 commit 8089c93
Show file tree
Hide file tree
Showing 55 changed files with 723 additions and 539 deletions.
9 changes: 9 additions & 0 deletions aeon_neuro/datasets/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
"""Utilities for loading datasets."""

__maintainer__ = ["TonyBagnall"]
__all__ = ["load_kdd_example", "load_kdd_full_example"]

from aeon_neuro.datasets._single_problem_loaders import (
load_kdd_example,
load_kdd_full_example,
)
160 changes: 160 additions & 0 deletions aeon_neuro/datasets/_single_problem_loaders.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,160 @@
"""Utilities for loading datasets."""

__maintainer__ = ["TonyBagnall"]
__all__ = ["load_kdd_example"]

import mne
import numpy as np
from aeon.datasets import load_classification


def load_kdd_example(split=None, return_metadata=False):
"""Load the preprocessed EEG KDD dataset used in the 2024 SIGKDD Tutorial.
On time series classification and regression. Slides and code [are available]
(https://github.com/aeon-toolkit/aeon-tutorials/tree/main/KDD-2024).
The full dataset is loadable directly from the BIDS files using
``load_kdd_full``. This data contains four channels and is downsampled to 100
time points.
Parameters
----------
split: None or one of "TRAIN", "TEST", default=None
Whether to load the train or test instances of the problem. By default it
loads both train and test instances into a single array.
Raises
------
ValueError is raised if the data cannot be stored in the requested return_type.
Returns
-------
X: np.ndarray
Collection of EEG recordings.
y: np.ndarray
The class labels for each EEG in X.
Notes
-----
Number of time points: 100
Number of channels: 4
Train cases: 20
Test cases: 20
Number of classes: 2 ("task", "rest")
Details: TBC
"""
return load_classification(
extract_path="data/KDD_Example/KDD_Example/",
name="KDD_MTSC",
split=split,
return_metadata=return_metadata,
)


def load_kdd_full_example(split=None, verbose=False):
"""Load data and format to classification problem.
Toy EEG recorded by Aiden Rushbrooke and stored in BIDS format to use as an example
for the aeon_neuro package. Loads the data, pre-processes, segments into instances.
Parameters
----------
path : str
Relative path to the directory "example_raw_eeg".
Returns
-------
X_train : np.ndarray
First 20 recordings of shape (240,32,1000)
time series classification and regression. Slides and code [are available]
(https://github.com/aeon-toolkit/aeon-tutorials/tree/main/KDD-2024).
Parameters
----------
split: None or one of "TRAIN", "TEST", default=None
Whether to load the train or test instances of the problem. By default it
loads both train and test instances into a single array.
Raises
------
ValueError is raised if the data cannot be stored in the requested return_type.
Returns
-------
X: np.ndarray
shape (n_cases, X, X)
y: np.ndarray
1D array of length XX or 300. The class labels for each time series instance
in X.
Notes
-----
Number of time points: 150
Number of channels:
Train cases: X
Test cases: X
Number of classes: X
Details: TBC
"""
data_path = "../../example_raw_eeg/basic_classification_task"
tasks = ["task", "rest"]
runs = ["01", "02", "03", "04", "05", "06"]

X = []
y = []

# Load each data file from BIDS format
for task in tasks:
for run in runs:
raw = mne.io.read_raw_brainvision(
data_path
+ "/sub-01/ses-01/eeg/sub-01_ses-01_task-"
+ task
+ "_run-"
+ run
+ "_eeg.vhdr",
verbose=verbose,
)
raw.load_data(verbose=verbose)
raw.drop_channels(["ACC_X", "ACC_Y", "ACC_Z", "DeviceTrigger"])
raw = raw.copy().filter(l_freq=0.5, h_freq=100, verbose=verbose)
if task == "task":
for trial in raw.annotations:
trial_data = raw.copy().crop(
tmin=trial["onset"] - 0.2, tmax=trial["onset"] + 0.800
)
data = trial_data.get_data()
X.append(data[:, :1000])
y.append("task")
else:
for timepoint in np.arange(30, 90, 1.5):
trial_data = raw.copy().crop(tmin=timepoint, tmax=timepoint + 1)
data = trial_data.get_data()
X.append(data[:, :1000])
y.append("rest")

X_train, X_test = [], []
y_train, y_test = [], []
n_instances, _, _ = np.shape(X)
subject_instances = int(n_instances / 40)

# Reformat data into a classification problem
for i in range(subject_instances):
for j in range(40):
loc = i * 40 + j
if j < 20:
X_train.append(X[loc])
y_train.append(y[loc])
else:
X_test.append(X[loc])
y_test.append(y[loc])

X_train, X_test = np.array(X_train), np.array(X_test)
y_train, y_test = np.array(y_train), np.array(y_test)
if split == "TRAIN":
return X_train, y_train
elif split == "TEST":
return X_test, y_test
return np.concatenate((X_train, X_test), axis=0), np.concatenate((y_train, y_test))
File renamed without changes
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
11 changes: 0 additions & 11 deletions aeon_neuro/transformations/__init__.py
Original file line number Diff line number Diff line change
@@ -1,12 +1 @@
"""EEG related transformations."""

__all__ = [
"BandPowerSeriesTransformer",
"EpochSeriesTransformer",
"DownsampleCollectionTransformer",
]


from aeon_neuro.transformations._bandpower import BandPowerSeriesTransformer
from aeon_neuro.transformations._downsample import DownsampleCollectionTransformer
from aeon_neuro.transformations._epoching import EpochSeriesTransformer
9 changes: 9 additions & 0 deletions aeon_neuro/transformations/collection/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1,10 @@
"""Collection transformers."""

__all__ = [
"DownsampleCollectionTransformer",
]


from aeon_neuro.transformations.collection._downsample import (
DownsampleCollectionTransformer,
)
File renamed without changes.
1 change: 1 addition & 0 deletions aeon_neuro/transformations/collection/tests/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
"""Tests."""
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import numpy as np
import pytest

from aeon_neuro.transformations import DownsampleCollectionTransformer
from aeon_neuro.transformations.collection import DownsampleCollectionTransformer

# list of 2D numpy arrays, unequal lengths
X = [
Expand Down
10 changes: 10 additions & 0 deletions aeon_neuro/transformations/series/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
"""Init file."""

__all__ = [
"BandPowerSeriesTransformer",
"EpochSeriesTransformer",
]


from aeon_neuro.transformations.series._bandpower import BandPowerSeriesTransformer
from aeon_neuro.transformations.series._epoching import EpochSeriesTransformer
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,13 @@ class BandPowerSeriesTransformer(BaseSeriesTransformer):
EEG signals occupy the frequency range of 0 - 60Hz,
which is roughly divided into five constituent physiological EEG sub bands:
δ (0 - 4Hz), θ (4 - 7Hz), α (8 - 12Hz), β (13 - 30Hz), and γ (30 - 60Hz).
Power within each frequency band is estimated over time using windowed FFTs,
then averaged across channels.
delta (δ): 0 - 4Hz, theta (θ): 4 - 7Hz, alpha (α): 8 - 12Hz, beta (β): 13 - 30Hz
and gamma (γ): 30 - 60Hz.
Power within each frequency band is estimated over time using windowed FFTs
The transformer uses psd_array_welch from MNE to calculate power spectral
densities for each window for the given sampling frequency. Band powers are then
found through summing over band widths and channels.
Parameters
----------
Expand All @@ -42,6 +46,7 @@ class BandPowerSeriesTransformer(BaseSeriesTransformer):
If `sfreq` is too low to capture the highest frequency band.
If `window_size` is too small to capture the lowest frequency band.
If `stride` is not between 1 and `window_size`.
"""

_tags = {
Expand All @@ -66,28 +71,13 @@ def __init__(
relative=True,
n_jobs=1,
):
super().__init__(axis=1) # (n_channels, n_timepoints)

# checks
nyquist_freq = 2 * self.FREQ_BANDS["gamma"][1]
if sfreq < nyquist_freq:
raise ValueError(f"sfreq must be at least {nyquist_freq} Hz.")

min_n = sfreq // 2
if window_size < min_n:
raise ValueError(f"window_size must be at least {min_n} for lowest freqs.")

if stride is None:
stride = window_size
elif not (1 <= stride <= window_size):
raise ValueError(f"stride must be between 1 and {window_size}.")

self.sfreq = sfreq
self.window_size = window_size
self.window_function = window_function
self.stride = stride
self.relative = relative
self.n_jobs = check_n_jobs(n_jobs)
self.n_jobs = n_jobs
super().__init__(axis=1) # (n_channels, n_timepoints)

def _transform(self, X, y=None):
"""Transform the input series to extract band power series.
Expand All @@ -104,7 +94,23 @@ def _transform(self, X, y=None):
np.ndarray of shape (5_bands, (n_timepoints - window_size) // stride + 1)
Power within δ, θ, α, β, and γ bands over time.
"""
n_overlap = self.window_size - self.stride
# checks
n_jobs = check_n_jobs(self.n_jobs)
nyquist_freq = 2 * self.FREQ_BANDS["gamma"][1]
if self.sfreq < nyquist_freq:
raise ValueError(f"sfreq must be at least {nyquist_freq} Hz.")

min_n = self.sfreq // 2
if self.window_size < min_n:
raise ValueError(f"window_size must be at least {min_n} for lowest freqs.")

stride = self.stride
if self.stride is None:
stride = self.window_size
elif not (1 <= stride <= self.window_size):
raise ValueError(f"stride must be between 1 and {self.window_size}.")

n_overlap = self.window_size - stride
# next power of 2, for FFT efficiency
n_fft = int(2 ** np.ceil(np.log2(self.window_size)))
powers, freqs = psd_array_welch(
Expand All @@ -115,7 +121,7 @@ def _transform(self, X, y=None):
n_fft=n_fft,
n_overlap=n_overlap,
n_per_seg=self.window_size,
n_jobs=self.n_jobs,
n_jobs=n_jobs,
average=None,
window=self.window_function,
verbose="error",
Expand Down
File renamed without changes.
1 change: 1 addition & 0 deletions aeon_neuro/transformations/series/tests/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
"""Tests."""
Original file line number Diff line number Diff line change
Expand Up @@ -3,23 +3,17 @@
import numpy as np
import pytest

from aeon_neuro.transformations import BandPowerSeriesTransformer
from aeon_neuro.transformations.series import BandPowerSeriesTransformer

# set paramaters, assuming X ~ iid = flat PSD
n_channels, n_timepoints, window_size, stride = 3, 30000, 1024, 100
n_windows = (n_timepoints - window_size) // stride + 1


@pytest.fixture
def sim_X():
"""Simulate X ~ iid = flat PSD."""
rng = np.random.default_rng(seed=0)
return rng.standard_normal(size=(n_channels, n_timepoints)) # axis=1


def test_transform(sim_X):
def test_transform():
"""Test BandPowerSeriesTransformer."""
X = sim_X
rng = np.random.default_rng(seed=0)
X = rng.standard_normal(size=(n_channels, n_timepoints)) # axis=1
transformer = BandPowerSeriesTransformer(
sfreq=256, window_size=window_size, stride=stride, relative=True
)
Expand Down Expand Up @@ -54,16 +48,18 @@ def test_transform_nyquist():
transformer.fit_transform(X)

with pytest.raises(ValueError, match="sfreq must be at least .* Hz."):
BandPowerSeriesTransformer(sfreq=119)
bp = BandPowerSeriesTransformer(sfreq=119)
bp.fit_transform(X)

with pytest.raises(
ValueError,
match="window_size must be at least .* for lowest freqs.",
):
BandPowerSeriesTransformer(sfreq=120, window_size=59)

bp = BandPowerSeriesTransformer(sfreq=120, window_size=59)
bp.fit_transform(X)
with pytest.raises(ValueError, match="stride must be between 1 and .*"):
BandPowerSeriesTransformer(window_size=100, stride=101)

bp = BandPowerSeriesTransformer(window_size=100, stride=101)
bp.fit_transform(X)
with pytest.raises(ValueError, match="stride must be between 1 and .*"):
BandPowerSeriesTransformer(window_size=100, stride=0)
bp = BandPowerSeriesTransformer(window_size=100, stride=0)
bp.fit_transform(X)
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@

import numpy as np

from aeon_neuro.transformations import EpochSeriesTransformer
from aeon_neuro.transformations._epoching import epoch_dataset
from aeon_neuro.transformations.series import EpochSeriesTransformer
from aeon_neuro.transformations.series._epoching import epoch_dataset

# set parameters
n_cases, n_channels, n_timepoints, sfreq = 3, 2, 12, 1
Expand Down
1 change: 0 additions & 1 deletion aeon_neuro/transformations/tests/__init__.py

This file was deleted.

Loading

0 comments on commit 8089c93

Please sign in to comment.