From 737772ffd2745cb36f741f54677acbc2b203b711 Mon Sep 17 00:00:00 2001 From: Seung Hyun Kim Date: Sat, 7 May 2022 08:42:59 -0500 Subject: [PATCH 01/15] wip: documentation on signal pre-processing --- .../miv.signal.filter.FilterCollection.rst | 7 ---- docs/api/signal.rst | 37 +++---------------- miv/signal/filter/butter_bandpass_filter.py | 4 +- miv/signal/filter/filter_collection.py | 21 ++++++++++- miv/signal/filter/protocol.py | 4 ++ miv/signal/spike/detection.py | 18 ++++++++- 6 files changed, 50 insertions(+), 41 deletions(-) delete mode 100644 docs/api/_toctree/FilterAPI/miv.signal.filter.FilterCollection.rst diff --git a/docs/api/_toctree/FilterAPI/miv.signal.filter.FilterCollection.rst b/docs/api/_toctree/FilterAPI/miv.signal.filter.FilterCollection.rst deleted file mode 100644 index b21e210b..00000000 --- a/docs/api/_toctree/FilterAPI/miv.signal.filter.FilterCollection.rst +++ /dev/null @@ -1,7 +0,0 @@ -miv.signal.filter.FilterCollection -================================== - -.. currentmodule:: miv.signal.filter - -.. autoclass:: FilterCollection - :members: append, insert diff --git a/docs/api/signal.rst b/docs/api/signal.rst index b8461213..2f1ad546 100644 --- a/docs/api/signal.rst +++ b/docs/api/signal.rst @@ -1,34 +1,9 @@ -************************* -Signal Processing Modules -************************* +********************* +Signal Pre-Processing +********************* + -Filter -###### +.. automodule:: miv.signal.filter.filter_collection -.. currentmodule:: miv.signal.filter - -.. automodule:: miv.signal.filter - - .. autosummary:: - :nosignatures: - :toctree: _toctree/FilterAPI - - FilterProtocol - ButterBandpass - FilterCollection - -Spike Detection -############### - -.. automodule:: miv.signal.spike - - .. autosummary:: - :nosignatures: - :toctree: _toctree/DetectionAPI - - SpikeDetectionProtocol - ThresholdCutoff - -Spike Sorting -############# +.. automodule:: miv.signal.spike.detection diff --git a/miv/signal/filter/butter_bandpass_filter.py b/miv/signal/filter/butter_bandpass_filter.py index 3d882f8f..d0d4a923 100644 --- a/miv/signal/filter/butter_bandpass_filter.py +++ b/miv/signal/filter/butter_bandpass_filter.py @@ -32,7 +32,9 @@ class ButterBandpass: order: int = 5 tag: str = "" - def __call__(self, signal: SignalType, sampling_rate: float) -> SignalType: + def __call__( + self, signal: SignalType, sampling_rate: float, **kwargs + ) -> SignalType: b, a = self._butter_bandpass(sampling_rate) y = sps.lfilter(b, a, signal) return y diff --git a/miv/signal/filter/filter_collection.py b/miv/signal/filter/filter_collection.py index 26ea0f7d..35338479 100644 --- a/miv/signal/filter/filter_collection.py +++ b/miv/signal/filter/filter_collection.py @@ -1,4 +1,23 @@ -__doc__ = "" +__doc__ = """ + +Signal Filter +############# + + + +.. currentmodule:: miv.signal.filter + +.. autoclass:: FilterCollection + :members: append, insert + +.. autosummary:: + :nosignatures: + :toctree: _toctree/FilterAPI + + FilterProtocol + ButterBandpass + +""" __all__ = ["FilterCollection"] from typing import Union, List diff --git a/miv/signal/filter/protocol.py b/miv/signal/filter/protocol.py index ece88bca..9393f508 100644 --- a/miv/signal/filter/protocol.py +++ b/miv/signal/filter/protocol.py @@ -24,3 +24,7 @@ def __call__(self, array: SignalType, sampling_rate: float, **kwargs) -> SignalT samping_rate : float """ ... + + def __repr__(self) -> str: + """String representation for interactive debugging.""" + ... diff --git a/miv/signal/spike/detection.py b/miv/signal/spike/detection.py index 2e9657cc..d179148e 100644 --- a/miv/signal/spike/detection.py +++ b/miv/signal/spike/detection.py @@ -1,4 +1,20 @@ -__doc__ = "" +__doc__ = """ + +Spike Detection +############### + + + +.. currentmodule:: miv.signal.spike + +.. autosummary:: + :nosignatures: + :toctree: _toctree/DetectionAPI + + SpikeDetectionProtocol + ThresholdCutoff + +""" __all__ = ["ThresholdCutoff"] from typing import Union, List, Iterable From 633f10f266bd4e4c8f0d544cc5a40f63b4133386 Mon Sep 17 00:00:00 2001 From: Seung Hyun Kim Date: Sat, 7 May 2022 08:43:30 -0500 Subject: [PATCH 02/15] update: fix link for FilterCollection --- docs/guide/signal_processing.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/guide/signal_processing.md b/docs/guide/signal_processing.md index 25ebe1fb..e61f4a6c 100644 --- a/docs/guide/signal_processing.md +++ b/docs/guide/signal_processing.md @@ -104,7 +104,7 @@ from miv.signal.filter import FilterCollection, ButterBandpass [Here](../api/signal.html#filter) is the list of provided filters. All filters are `Callable`, taking `signal` and `sampling_rate` as parameters. -To define a multiple filters together, we provide [`FilterCollection`](../api/_toctree/FilterAPI/miv.signal.filter.FilterCollection) that execute multiple filters in a series. +To define a multiple filters together, we provide [`FilterCollection`](miv.signal.filter.FilterCollection) that execute multiple filters in a series. ```{code-cell} ipython3 # Butter bandpass filter From cb20afa77f03b0e2098efd2e5f28263f14646d14 Mon Sep 17 00:00:00 2001 From: Seung Hyun Kim Date: Sun, 8 May 2022 09:22:13 -0600 Subject: [PATCH 03/15] add: coverage check in make commmand --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 81021c25..e5d5a257 100644 --- a/Makefile +++ b/Makefile @@ -7,5 +7,5 @@ mypy: coverage: @pytest --cov=miv tests/ -all:test mypy +all:test mypy coverage ci: test mypy From 090490ba7e818024f6f029d3663315497ff24afd Mon Sep 17 00:00:00 2001 From: Seung Hyun Kim Date: Sun, 8 May 2022 11:15:27 -0600 Subject: [PATCH 04/15] doc: Add data_management guide and rename sections --- docs/api/io.rst | 9 ++-- docs/api/signal.rst | 2 - docs/guide/data_management.md | 86 +++++++++++++++++++++++++++++++++++ docs/index.rst | 1 + 4 files changed, 93 insertions(+), 5 deletions(-) create mode 100644 docs/guide/data_management.md diff --git a/docs/api/io.rst b/docs/api/io.rst index da6820c9..c47f8dee 100644 --- a/docs/api/io.rst +++ b/docs/api/io.rst @@ -1,6 +1,9 @@ -********************* -Input / Output Module -********************* +******************** +Data Managing Module +******************** + +.. automodule:: miv.io.data + :members: .. automodule:: miv.io.binary :members: diff --git a/docs/api/signal.rst b/docs/api/signal.rst index 2f1ad546..2f096736 100644 --- a/docs/api/signal.rst +++ b/docs/api/signal.rst @@ -2,8 +2,6 @@ Signal Pre-Processing ********************* - - .. automodule:: miv.signal.filter.filter_collection .. automodule:: miv.signal.spike.detection diff --git a/docs/guide/data_management.md b/docs/guide/data_management.md new file mode 100644 index 00000000..0735c012 --- /dev/null +++ b/docs/guide/data_management.md @@ -0,0 +1,86 @@ +--- +jupytext: + text_representation: + extension: .md + format_name: myst + format_version: 0.13 + jupytext_version: 1.13.8 +kernelspec: + display_name: Python 3 (ipykernel) + language: python + name: python3 +--- + +# Data Management + +```{code-cell} ipython3 +:tags: [hide-cell] + +import os +import numpy as np +import quantities as pq +import matplotlib.pyplot as plt + +``` + +## 1. Data Load + +```{code-cell} ipython3 +:tags: [hide-cell] + +from miv.io import load_data +from miv.io.data import Data, Dataset +``` + +```{code-cell} ipython3 +# Load dataset from OpenEphys recording +folder_path: str = "~/Open Ephys/2022-03-10-16-19-09" # Data Path +# Provide the path of experimental recording tree to the DataSet class +# Data set class will load the data and create a list of objects for each data +# dataset = load_data(folder_path, device="OpenEphys") +dataset = Dataset(data_folder_path=folder_path, + device="OpenEphys", + channels=32, + sampling_rate=30E3, + timestamps_npy="", # We can read similar to continuous.dat + + ) +#TODO: synchornized_timestamp what for shifted ?? +# Masking channels for data set. Channels can be a list. +# Show user the tree. Implement representation method. filter_collection.html#FilterCollection.insert +# An example code to get the tree https://github.com/skim0119/mindinvitro/blob/master/utility/common.py +# Trimming the tree?? +``` + +### 1.1. Meta Data Structure + +```{code-cell} ipython3 +# Get signal and rate(hz) +record_node: int = dataset.get_nodes[0] +recording = dataset[record_node]["experiment1"]["recording1"] # Returns the object for recording 1 +# TODO: does openephys returns the timestamp?? +timestamp = recording.timestamp # returns the time stamp for the recording. + +signal, _, rate = recording.continuous["100"] +# time = recording.continuous["100"].timestamp / rate +num_channels = signal.shape[1] +``` + +### 1.2 Raw Data + ++++ + +If the data is provided in single `continuous.dat` instead of meta-data, user must provide number of channels and sampling rate in order to import data accurately. + +> **WARNING** The size of the raw datafile can be _large_ depending on sampling rate and the amount of recorded duration. We highly recommand using meta-data structure to handle datafiles, since it only loads the data during the processing and unloads once the processing is done. + +```{code-cell} ipython3 +from miv.io import load_continuous_data_file + +datapath = 'continuous.dat' +rate = 30_000 +num_channel = 64 +timestamps, signal = load_continuous_data_file(datapath, num_channel, rate) +``` + +## 2. Instant Visualization diff --git a/docs/index.rst b/docs/index.rst index 0900f672..faa40ce7 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -30,6 +30,7 @@ You can also download the source code from `GitHub Date: Mon, 9 May 2022 05:45:22 -0600 Subject: [PATCH 05/15] wip: doc update --- docs/api/io.rst | 1 - miv/io/__init__.py | 1 + miv/io/binary.py | 2 ++ miv/io/data.py | 54 ++++++++++++++++++++++++++++++++++++++++------ 4 files changed, 50 insertions(+), 8 deletions(-) diff --git a/docs/api/io.rst b/docs/api/io.rst index c47f8dee..4c67339e 100644 --- a/docs/api/io.rst +++ b/docs/api/io.rst @@ -3,7 +3,6 @@ Data Managing Module ******************** .. automodule:: miv.io.data - :members: .. automodule:: miv.io.binary :members: diff --git a/miv/io/__init__.py b/miv/io/__init__.py index e49ec5ed..0ec7ac30 100644 --- a/miv/io/__init__.py +++ b/miv/io/__init__.py @@ -1 +1,2 @@ +from miv.io.data import * from miv.io.binary import * diff --git a/miv/io/binary.py b/miv/io/binary.py index ce9cf107..89136381 100644 --- a/miv/io/binary.py +++ b/miv/io/binary.py @@ -1,5 +1,7 @@ __doc__ = """ +------------------------------------- + We expect the data structure to follow the default format exported from OpenEphys system: `format `_. Original Author diff --git a/miv/io/data.py b/miv/io/data.py index 5486b2c9..377c3fde 100644 --- a/miv/io/data.py +++ b/miv/io/data.py @@ -1,7 +1,27 @@ +__doc__ = """ + +Data Manager +############ + +.. currentmodule:: miv.io.data + +.. autoclass:: Data + :members: + +.. autoclass:: DataManager + :members: + +""" +__all__ = ["Data", "DataManager"] + +from typing import Any, Optional, Iterable, Callable + from collections.abc import MutableSequence -from typing import Optional + import os +import glob import numpy as np + from miv.signal.filter import FilterProtocol from miv.typing import SignalType @@ -32,13 +52,13 @@ def load( Parameters ---------- - data_file: continuous.dat file from Open_Ethys recording - channels: number of recording channels recorded from + data_file: continuous.dat file from Open_Ethys recording + channels: number of recording channels recorded from Returns ------- - raw_data: - timestamps: + raw_data: + timestamps: """ @@ -81,12 +101,12 @@ def save(self, tag: str, format: str): ) -class Dataset(MutableSequence): +class DataManager(MutableSequence): def __init__( self, data_folder_path: str, channels: int, - sampling_rate: float = 30000, + sampling_rate: float, timestamps_npy: Optional[str] = "", device="", ): @@ -159,3 +179,23 @@ def __setitem__(self, idx, system): def __call__(self, *args, **kwargs): pass + + +def get_experiments_recordings(data_paths: str) -> Iterable[str]: + # fmt: off + list_of_experiments_to_process = [] + for path in data_paths: + path_list = [path for path in glob.glob(os.path.join(path, "*", "*", "*")) if "Record Node" in path and "recording" in path and os.path.isdir(path)] + list_of_experiments_to_process.extend(path_list) + # fmt: on + return list_of_experiments_to_process + + +def get_analysis_paths(data_paths: str, output_folder_name: str) -> Iterable[str]: + # fmt: off + list_of_analysis_paths = [] + for path in data_paths: + path_list = [path for path in glob.glob(os.path.join(path, "*", "*", "*", "*")) if ("Record Node" in path) and ("recording" in path) and (output_folder_name in path) and os.path.isdir(path)] + list_of_analysis_paths.extend(path_list) + # fmt: on + return list_of_analysis_paths From d655bd74b5d5042130f8fdff80a7ea42b84e4e10 Mon Sep 17 00:00:00 2001 From: Seung Hyun Kim Date: Mon, 9 May 2022 07:23:58 -0600 Subject: [PATCH 06/15] update: raw load module --- miv/io/binary.py | 63 +++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 51 insertions(+), 12 deletions(-) diff --git a/miv/io/binary.py b/miv/io/binary.py index 89136381..bf38481b 100644 --- a/miv/io/binary.py +++ b/miv/io/binary.py @@ -12,8 +12,10 @@ - last modified by: skim449 - date: 2022-04-11 """ -__all__ = ["Load", "load_data", "load_continuous_data_file"] +__all__ = ["load_continuous_data"] from typing import Optional + +import os import numpy as np from ast import literal_eval from glob import glob @@ -177,30 +179,67 @@ def Load( return (Data, Rate) -def load_data(): - raise NotImplementedError - - -def load_continuous_data_file(data_file: str, num_channels: int, sampling_rate: float): +def load_continuous_data( + data_path: str, + num_channels: int, + sampling_rate: float, + timestamps_path: Optional[str] = None, + start_at_zero: bool = True, +): """ Describe function Parameters ---------- - data_file: continuous.dat file from Open_Ethys recording - num_channels: number of recording channels recorded from + data_path : str + continuous.dat file path from Open_Ethys recording. + num_channels : int + number of recording channels recorded. Note, this method will not throw an error + if you don't provide the correct number of channels. + sampling_rate : float + data sampling rate. + timestamps_path : Optional[str] + If None, first check if the file "timestamps.npy" exists on the same directory. + If the file doesn't exist, we deduce the timestamps based on the sampling rate + and the length of the data. + start_at_zero : bool + If True, the timestamps is adjusted to start at zero. + Note, recorded timestamps might not start at zero for some reason. Returns ------- - timestamps: TimestampsType - raw_data: SignalType + timestamps: TimestampsType + raw_data: SignalType + + Raises + ------ + FileNotFoundError + If data_path is invalid. + ValueError + If the error message shows the array cannot be reshaped due to shape, + make sure the num_channels is set accurately. """ - raw_data: np.ndarray = np.memmap(data_file, dtype="int16") + # Read raw data signal + raw_data: np.ndarray = np.memmap(data_path, dtype="int16") length = raw_data.size // num_channels raw_data = np.reshape(raw_data, (length, num_channels)) - timestamps = np.array(range(0, length)) / sampling_rate + # Get timestamps_path + if timestamps_path is None: + dirname = os.path.dirname(data_path) + timestamps_path = os.path.join(dirname, "timestamps.npy") + + # Get timestamps + if os.path.exists(timestamps_path): + timestamps = np.load(timestamps_path) + timestamps /= sampling_rate + else: # If timestamps_path doesn't exist, deduce the stamps + timestamps = np.array(range(0, length)) / sampling_rate + + # Adjust timestamps to start from zero + if start_at_zero and not np.isclose(timestamps[0], 0.0): + timestamps -= timestamps[0] return timestamps, raw_data From b5fb966c2def208942f45b9f5f7aa6418ed1b11b Mon Sep 17 00:00:00 2001 From: Seung Hyun Kim Date: Mon, 9 May 2022 07:32:31 -0600 Subject: [PATCH 07/15] update: raw load module doc --- miv/io/binary.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/miv/io/binary.py b/miv/io/binary.py index bf38481b..1a0f5b14 100644 --- a/miv/io/binary.py +++ b/miv/io/binary.py @@ -187,7 +187,8 @@ def load_continuous_data( start_at_zero: bool = True, ): """ - Describe function + Load single continous data file and return timestamps and raw data in numpy array. + Typical `data_path` from OpenEphys has a name `continuous.dat`. Parameters ---------- @@ -208,8 +209,8 @@ def load_continuous_data( Returns ------- - timestamps: TimestampsType - raw_data: SignalType + timestamps: TimestampsType, numpy array + raw_data: SignalType, numpy array Raises ------ From e266b54063235ee9727d941a0cc06561637b19ae Mon Sep 17 00:00:00 2001 From: Seung Hyun Kim Date: Tue, 10 May 2022 06:19:13 -0600 Subject: [PATCH 08/15] doc: add contribution guideline as page --- CONTRIBUTING.md | 11 ++++++++--- docs/index.rst | 11 ++++++++--- docs/overview/contribution.md | 3 +++ 3 files changed, 19 insertions(+), 6 deletions(-) create mode 100644 docs/overview/contribution.md diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index c34b9254..f64d7411 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -4,7 +4,7 @@ Thanks for your interest in contributing MiV-OS project. The following is a set of guidelines how to contributes. These are mostly guidelines, not rules. Use your best judgment, and feel free to propose changes to this document in a pull request. -#### Table Of Contents +**Table Of Contents** [TLTR! I need three-line summary!!](#three-line-summary) @@ -36,10 +36,15 @@ The following is a set of guidelines how to contributes. These are mostly guidel ### Installation and packages First **create the fork repository and clone** to your local machine. -We provide [requirements.txt](requirements.txt) to include all the dependencies. +We provide [requirements.txt](https://github.com/GazzolaLab/MiV-OS/blob/main/requirements.txt) to include all the dependencies that is required to develop. You can either install using `pip install -r requirements.txt` or ```bash -$ pip install -r requirements.txt +$ pip install miv-os[dev] ``` +If you are more interested in working for documentation, use +```bash +$ pip install miv-os[docs] +``` +More details are included [here](https://github.com/GazzolaLab/MiV-OS/blob/main/docs/README.md). ### Pre-Commit diff --git a/docs/index.rst b/docs/index.rst index faa40ce7..2d2e15b5 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -1,7 +1,5 @@ .. MiV-OS documentation master file, created by sphinx-quickstart on Thu Mar 24 23:35:49 2022. - You can adapt this file completely to your liking, but it should at least - contain the root `toctree` directive. MiV-OS documentation! ===================== @@ -17,14 +15,21 @@ Installation Instruction You can also download the source code from `GitHub `_ directly. +Contribution +------------ + +Any contribution to this project is welcome! If you are interested or have any questions, please don't hesitate to contact us. +If you are interested in contributing to this project, we prepared contribution guideline :ref:`here `. + .. toctree:: :maxdepth: 2 - :caption: Contents: + :caption: Overview overview/about overview/dependencies overview/references + overview/contribution .. toctree:: :maxdepth: 2 diff --git a/docs/overview/contribution.md b/docs/overview/contribution.md new file mode 100644 index 00000000..5b272509 --- /dev/null +++ b/docs/overview/contribution.md @@ -0,0 +1,3 @@ +```{include} ../../CONTRIBUTING.md +:relative-images: +``` From a062975b5a21f3dce18c7a0aae6c3d69317d01f4 Mon Sep 17 00:00:00 2001 From: Seung Hyun Kim Date: Wed, 11 May 2022 09:52:41 -0600 Subject: [PATCH 09/15] update: single data module and documentation --- miv/io/binary.py | 4 +++ miv/io/data.py | 87 ++++++++++++++++++++++++++++++++++++++---------- 2 files changed, 73 insertions(+), 18 deletions(-) diff --git a/miv/io/binary.py b/miv/io/binary.py index 1a0f5b14..0cb716dd 100644 --- a/miv/io/binary.py +++ b/miv/io/binary.py @@ -11,6 +11,10 @@ - date: 2019-07-27 - last modified by: skim449 - date: 2022-04-11 + +Raw Data Loader +############### + """ __all__ = ["load_continuous_data"] from typing import Optional diff --git a/miv/io/data.py b/miv/io/data.py index 377c3fde..72b529cb 100644 --- a/miv/io/data.py +++ b/miv/io/data.py @@ -1,5 +1,11 @@ __doc__ = """ +.. Note:: + For simple experiments, you may prefer to use :ref:`api/io:Raw Data Loader`. + However, we generally recommend to use ``Data`` or ``DataManager`` for + handling data, especially when you want to avoid storing raw signal in + the memory space. + Data Manager ############ @@ -19,33 +25,84 @@ from collections.abc import MutableSequence import os -import glob +from glob import glob import numpy as np +from contextlib import contextmanager +from miv.io.binary import load_continuous_data from miv.signal.filter import FilterProtocol from miv.typing import SignalType class Data: - """ - For each continues.dat file, there will be one Data object + """Single data unit handler. + + Each data unit that contains single recording. This class provides useful tools, + such as masking channel, export data, interface with other packages, etc. + If you have multiple recordings you would like to handle at the same time, use + `DataManager` instead. + + By default, the following directory structure is expected in ``data_path``:: + + recording1 # <- recording data_path + ├── continuous + │   └── Rhythm_FPGA-100.0 + │   ├── continuous.dat + │   ├── synchronized_timestamps.npy + │   └── timestamps.npy + ├── events + │   ├── Message_Center-904.0 + │   │   └── TEXT_group_1 + │   │   ├── channels.npy + │   │   ├── text.npy + │   │   └── timestamps.npy + │   └── Rhythm_FPGA-100.0 + │   └── TTL_1 + │   ├── channel_states.npy + │   ├── channels.npy + │   ├── full_words.npy + │   └── timestamps.npy + ├── structure.oebin + ├── sync_messages.txt + ├── structure.oebin + └── analysis # <- post-processing result + ├── spike_data.npz + ├── plot + ├── spike + └── mea_overlay + + + Parameters + ---------- + data_path : str """ def __init__( self, data_path: str, - channels: int, - sampling_rate: float = 30000, - timestamps_npy: Optional[str] = "", ): self.data_path = data_path - self.channels = channels - self.sampling_rate = sampling_rate - self.timestamps_npy = timestamps_npy - def load( - self, - ): + @contextmanager + def load_data(self): + """ + Context manager for loading data instantly. + + Examples + -------- + >>> data = Data(data_path) + >>> with data.load() as (timestamps, raw_signal): + ... ... + + """ + try: + pass + # yield data + finally: + pass + # del data + + def load(self): """ Describe function @@ -78,12 +135,6 @@ def load( # TODO: do we want timestaps a member of the class? return np.array(timestamps), np.array(raw_data) - def unload( - self, - ): - # TODO: remove the data from memory - pass - def save(self, tag: str, format: str): assert tag == "continuous", "You cannot alter raw data, change the data tag" # save_path = os.path.join(self.data_path, tag) From 79daf4f57463a104f877a552d025a2fa2facf548 Mon Sep 17 00:00:00 2001 From: Seung Hyun Kim Date: Thu, 12 May 2022 05:19:13 -0600 Subject: [PATCH 10/15] rm doc: dependency page, merged to reference page --- docs/index.rst | 1 - docs/overview/dependencies.rst | 3 --- 2 files changed, 4 deletions(-) delete mode 100644 docs/overview/dependencies.rst diff --git a/docs/index.rst b/docs/index.rst index 2d2e15b5..3ebebdc6 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -27,7 +27,6 @@ If you are interested in contributing to this project, we prepared contribution :caption: Overview overview/about - overview/dependencies overview/references overview/contribution diff --git a/docs/overview/dependencies.rst b/docs/overview/dependencies.rst deleted file mode 100644 index 2dcb9b93..00000000 --- a/docs/overview/dependencies.rst +++ /dev/null @@ -1,3 +0,0 @@ -************ -Dependencies -************ From ffed4bd3a6ed3ca74b6f1920243878196ef7f730 Mon Sep 17 00:00:00 2001 From: Seung Hyun Kim Date: Fri, 13 May 2022 02:01:41 -0600 Subject: [PATCH 11/15] doc: add references for neo and elephant --- docs/overview/references.rst | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/docs/overview/references.rst b/docs/overview/references.rst index b0d98898..680151c6 100644 --- a/docs/overview/references.rst +++ b/docs/overview/references.rst @@ -1,3 +1,15 @@ ********** References ********** + +Neural Ensemble +############### + +- Python-Neo [1]_ +- Elephant/Viziphant [2]_ + +--------------- + +.. [1] Garcia S., Guarino D., Jaillet F., Jennings T.R., Pröpper R., Rautenberg P.L., Rodgers C., Sobolev A.,Wachtler T., Yger P. and Davison A.P. (2014) Neo: an object model for handling electrophysiology data in multiple formats. Frontiers in Neuroinformatics 8:10: doi:10.3389/fninf.2014.00010 + +.. [2] Denker M, Yegenoglu A, Grün S (2018) Collaborative HPC-enabled workflows on the HBP Collaboratory using the Elephant framework. Neuroinformatics 2018, P19. doi:10.12751/incf.ni2018.0019 From 111c51550bd57a93787c426c467f682f37ba8c48 Mon Sep 17 00:00:00 2001 From: Seung Hyun Kim Date: Fri, 13 May 2022 02:35:21 -0600 Subject: [PATCH 12/15] update: add channel masking method --- miv/io/binary.py | 43 +++++++++++++++++++++++++++++++++++-------- 1 file changed, 35 insertions(+), 8 deletions(-) diff --git a/miv/io/binary.py b/miv/io/binary.py index 0cb716dd..65b278e9 100644 --- a/miv/io/binary.py +++ b/miv/io/binary.py @@ -16,31 +16,58 @@ ############### """ -__all__ = ["load_continuous_data"] -from typing import Optional +__all__ = ["load_continuous_data", "load_recording", "oebin_read", "apply_channel_mask"] + +from typing import Any, Dict, Optional, Union, List import os import numpy as np from ast import literal_eval from glob import glob +import quantities as pq +import neo from miv.typing import SignalType, TimestampsType -def ApplyChannelMap(Data, ChannelMap): - print("Retrieving channels according to ChannelMap... ", end="") - for R, Rec in Data.items(): - if Rec.shape[1] < len(ChannelMap) or max(ChannelMap) > Rec.shape[1] - 1: +def apply_channel_mask(signal: np.ndarray, channel_mask: List[int]): + """Apply channel mask on the given signal. + + Parameters + ---------- + signal : np.ndarray + Shape of the signal is expected to be (num_data_point, num_channels). + channel_mask : List[int] + + Returns + ------- + output signal : SignalType + + """ + print("Retrieving channels according to channel_mask... ", end="") + for R, Rec in signal.items(): + if Rec.shape[1] < len(channel_mask) or max(channel_mask) > Rec.shape[1] - 1: print("") print("Not enough channels in data to apply channel map. Skipping...") continue - Data[R] = Data[R][:, ChannelMap] + signal[R] = signal[R][:, channel_mask] return Data -def BitsToVolts(Data, ChInfo, Unit): + signal, timestamps = load_continuous_data(file_path, num_channels, sampling_rate) + if channel_mask is not None: + signal = apply_channel_mask(signal, channel_mask) + + # TODO in the future: check inside the channel_info, + # and convert mismatch unit (mV->uV) + + signal = neo.core.AnalogSignal(signal.T, unit=unit, sampling_rate=sampling_rate) + return signal, timestamps, sampling_rate + + +def _bitsToVolts(Data, ChInfo, Unit): # TODO: need refactor print("Converting to uV... ", end="") Data = {R: Rec.astype("float32") for R, Rec in Data.items()} From 99d3f85964d03c7cc410c878a448659be27fba5f Mon Sep 17 00:00:00 2001 From: Seung Hyun Kim Date: Sat, 14 May 2022 07:27:37 -0600 Subject: [PATCH 13/15] update: binary oe readout module --- miv/io/binary.py | 198 ++++++++++++++++++++++++++++++----------------- 1 file changed, 127 insertions(+), 71 deletions(-) diff --git a/miv/io/binary.py b/miv/io/binary.py index 65b278e9..1b77eb8d 100644 --- a/miv/io/binary.py +++ b/miv/io/binary.py @@ -2,16 +2,6 @@ ------------------------------------- -We expect the data structure to follow the default format exported from OpenEphys system: `format `_. - -Original Author - -- open-ephys/analysis-tools/Python3/Binary.py (commit: 871e003) -- original author: malfatti - - date: 2019-07-27 -- last modified by: skim449 - - date: 2022-04-11 - Raw Data Loader ############### @@ -53,8 +43,68 @@ def apply_channel_mask(signal: np.ndarray, channel_mask: List[int]): signal[R] = signal[R][:, channel_mask] - return Data + return signal + + +def oebin_read(file_path: str): + """ + Oebin file reader in dictionary form + + Parameters + ---------- + file_path : str + Returns + ------- + info : Dict[str, any] + recording information stored in oebin file. + """ + # TODO: may need fix for multiple continuous data. + # TODO: may need to include processor name/id + info = literal_eval(open(file_path).read()) + return info + + +def load_recording( + folder: str, + channel_mask: Optional[List[int]] = None, + unit: Union[str, pq.Quantity] = "uV", +): + """ + Loads data recorded by Open Ephys in Binary format as numpy memmap. + The path should contain + + - continuous//continuous.dat: signal (cannot have multiple file) + - continuous//timestamps.dat: timestamps + - structure.oebin: number of channels and sampling rate. + + Parameters + ---------- + folder: str + folder containing at least the subfolder 'experiment1'. + channel_mask: List[int], optional + Channel index list to ignore in import (default=None) + unit: str or pq.Quantity + Unit to return the data, either 'uV' or 'mV', case insensitive. (Default='uV') + + Returns + ------- + signal : SignalType, neo.core.AnalogSignal + sampling_rate : float + + """ + + file_path: str = glob(os.path.join(folder, "**", "*.dat", recursive=True)) + assert ( + len(file_path) == 1 + ), f"There should be only one 'continuous.dat' file. (There exists {file_path}" + + # load structure information dictionary + info_file: str = os.path.join(folder, "structure.oebin") + info: Dict[str, Any] = oebin_read(info_file) + num_channels: int = info["continuous"][0]["num_channels"] + sampling_rate: float = info["continuous"][0]["sample_rate"] + # channel_info: Dict[str, Any] = info["continuous"][0]["channels"] signal, timestamps = load_continuous_data(file_path, num_channels, sampling_rate) if channel_mask is not None: @@ -85,70 +135,76 @@ def _bitsToVolts(Data, ChInfo, Unit): # TODO: need refactor return Data -def Load( - Folder, Processor=None, Experiment=None, Recording=None, Unit="uV", ChannelMap=[] +def _load( # TODO: Need refactor + folder, processor=None, experiment=None, recording=None, unit="uV", channel_map=[] ): """ Loads data recorded by Open Ephys in Binary format as numpy memmap. - Load(Folder, Processor=None, Experiment=None, Recording=None, Unit='uV', ChannelMap=[]) + Here is example usage:: - Parameters - ---------- - Folder: str - Folder containing at least the subfolder 'experiment1'. - - Processor: str or None, optional - Processor number to load, according to subsubsubfolders under - Folder>experimentX/recordingY/continuous . The number used is the one - after the processor name. For example, to load data from the folder - 'Channel_Map-109_100.0' the value used should be '109'. - If not set, load all processors. - - Experiment: int or None, optional - Experiment number to load, according to subfolders under Folder. - If not set, load all experiments. - - Recording: int or None, optional - Recording number to load, according to subsubfolders under Folder>experimentX . - If not set, load all recordings. - - Unit: str or None, optional - Unit to return the data, either 'uV' or 'mV' (case insensitive). In - both cases, return data in float32. Defaults to 'uV'. - If anything else, return data in int16. - - ChannelMap: list, optional - If empty (default), load all channels. - If not empty, return only channels in ChannelMap, in the provided order. - CHANNELS ARE COUNTED STARTING AT 0. + from miv.io.Binary import load - Returns - ------- - Data: dict - Dictionary with data in the structure Data[Processor][Experiment][Recording]. + folder = '/home/user//2019-07-27_00-00-00' + Data, Rate = load(folder) + + channel_map = [0,15,1,14] + recording = 3 + Data2, Rate2 = load(folder, recording=recording, channel_map=channel_map, unit='Bits') - Rate: dict - Dictionary with sampling rates in the structure Rate[Processor][Experiment]. + Original Author: + - open-ephys/analysis-tools/Python3/Binary.py (commit: 871e003) + - original author: malfatti + - date: 2019-07-27 + - last modified by: skim449 + - date: 2022-04-11 - Example + Parameters + ---------- + folder: str + folder containing at least the subfolder 'experiment1'. + + processor: str or None, optional + Processor number to load, according to subsubsubfolders under + folder>experimentX/recordingY/continuous . The number used is the one + after the processor name. For example, to load data from the folder + 'Channel_Map-109_100.0' the value used should be '109'. + If not set, load all processors. + + experiment: int or None, optional + Experiment number to load, according to subfolders under folder. + If not set, load all experiments. + + recording: int or None, optional + Recording number to load, according to subsubfolders under folder>experimentX . + If not set, load all recordings. + + unit: str or None, optional + Unit to return the data, either 'uV' or 'mV' (case insensitive). In + both cases, return data in float32. Defaults to 'uV'. + If anything else, return data in int16. + + channel_map: list, optional + If empty (default), load all channels. + If not empty, return only channels in channel_map, in the provided order. + CHANNELS ARE COUNTED STARTING AT 0. + + Returns ------- - import Binary + Data: dict + Dictionary with data in the structure Data[processor][experiment][recording]. + Rate: dict + Dictionary with sampling rates in the structure Rate[processor][experiment]. - Folder = '/home/user/PathToData/2019-07-27_00-00-00' - Data, Rate = Binary.Load(Folder) - ChannelMap = [0,15,1,14] - Recording = 3 - Data2, Rate2 = Binary.Load(Folder, Recording=Recording, ChannelMap=ChannelMap, Unit='Bits') """ - Files = sorted(glob(Folder + "/**/*.dat", recursive=True)) - InfoFiles = sorted(glob(Folder + "/*/*/structure.oebin")) + files = sorted(glob(folder + "/**/*.dat", recursive=True)) + info_file = sorted(glob(folder + "/*/*/structure.oebin")) Data, Rate = {}, {} - for F, File in enumerate(Files): + for F, File in enumerate(files): File = File.replace("\\", "/") # Replace windows file delims Exp, Rec, _, Proc = File.split("/")[-5:-1] Exp = str(int(Exp[10:]) - 1) @@ -160,16 +216,16 @@ def Load( if Proc not in Data.keys(): Data[Proc], Rate[Proc] = {}, {} - if Experiment: - if int(Exp) != Experiment - 1: + if experiment: + if int(Exp) != experiment - 1: continue - if Recording: - if int(Rec) != Recording - 1: + if recording: + if int(Rec) != recording - 1: continue - if Processor: - if Proc != Processor: + if processor: + if Proc != processor: continue print("Loading recording", int(Rec) + 1, "...") @@ -177,7 +233,7 @@ def Load( Data[Proc][Exp] = {} Data[Proc][Exp][Rec] = np.memmap(File, dtype="int16", mode="c") - Info = literal_eval(open(InfoFiles[F]).read()) + Info = literal_eval(open(info_file[F]).read()) ProcIndex = [ Info["continuous"].index(_) for _ in Info["continuous"] @@ -198,16 +254,16 @@ def Load( for Proc in Data.keys(): for Exp in Data[Proc].keys(): - if Unit.lower() in ["uv", "mv"]: + if unit.lower() in ["uv", "mv"]: ChInfo = Info["continuous"][ProcIndex]["channels"] - Data[Proc][Exp] = BitsToVolts(Data[Proc][Exp], ChInfo, Unit) + Data[Proc][Exp] = _bitsToVolts(Data[Proc][Exp], ChInfo, unit) - if ChannelMap: - Data[Proc][Exp] = ApplyChannelMap(Data[Proc][Exp], ChannelMap) + if channel_map: + Data[Proc][Exp] = apply_channel_mask(Data[Proc][Exp], channel_map) print("Done.") - return (Data, Rate) + return Data, Rate def load_continuous_data( From 42ffe75a9cd140407350e4a252cfbe52ab3717c8 Mon Sep 17 00:00:00 2001 From: Seung Hyun Kim Date: Sat, 14 May 2022 07:53:33 -0600 Subject: [PATCH 14/15] update: single-data module add masking --- miv/io/data.py | 87 +++++++++++++++++++++++++++----------------------- 1 file changed, 47 insertions(+), 40 deletions(-) diff --git a/miv/io/data.py b/miv/io/data.py index 72b529cb..63af561d 100644 --- a/miv/io/data.py +++ b/miv/io/data.py @@ -1,13 +1,17 @@ __doc__ = """ +.. Note:: + We expect the data structure to follow the default format + exported from OpenEphys system: + `format `_. + .. Note:: For simple experiments, you may prefer to use :ref:`api/io:Raw Data Loader`. However, we generally recommend to use ``Data`` or ``DataManager`` for - handling data, especially when you want to avoid storing raw signal in - the memory space. + handling data, especially when the size of the raw data is large. -Data Manager -############ +Module +###### .. currentmodule:: miv.io.data @@ -20,16 +24,17 @@ """ __all__ = ["Data", "DataManager"] -from typing import Any, Optional, Iterable, Callable +from typing import Any, Optional, Iterable, Callable, List from collections.abc import MutableSequence +import logging import os from glob import glob import numpy as np from contextlib import contextmanager -from miv.io.binary import load_continuous_data +from miv.io.binary import load_continuous_data, load_recording from miv.signal.filter import FilterProtocol from miv.typing import SignalType @@ -42,7 +47,7 @@ class Data: If you have multiple recordings you would like to handle at the same time, use `DataManager` instead. - By default, the following directory structure is expected in ``data_path``:: + By default recording setup, the following directory structure is expected in ``data_path``:: recording1 # <- recording data_path ├── continuous @@ -62,7 +67,6 @@ class Data: │   ├── channels.npy │   ├── full_words.npy │   └── timestamps.npy - ├── structure.oebin ├── sync_messages.txt ├── structure.oebin └── analysis # <- post-processing result @@ -82,9 +86,10 @@ def __init__( data_path: str, ): self.data_path = data_path + self.channel_mask = [] @contextmanager - def load_data(self): + def load(self): """ Context manager for loading data instantly. @@ -94,46 +99,48 @@ def load_data(self): >>> with data.load() as (timestamps, raw_signal): ... ... + Returns + ------- + signal : SignalType, neo.core.AnalogSignal + timestamps : TimestampsType, numpy array + sampling_rate : float + """ + # TODO: Not sure this is safe implementation try: - pass - # yield data + signal, timestamps, sampling_rate = load_recording( + self.data_path, self.masking_channel_list + ) + yield signal, timestamps, sampling_rate + except FileNotFoundError as e: + logging.error( + f"The file could not be loaded because the file {self.data_path} does not exist." + ) + logging.error(e.strerror) + except ValueError as e: + logging.error( + "The data size does not match the number of channel. Check if oebin or continuous.dat file is corrupted." + ) + logging.error(e.strerror) finally: - pass - # del data - - def load(self): + del timestamps + del signal + def set_channel_mask(self, channel_id: List[int]): """ - Describe function - Parameters - ---------- - data_file: continuous.dat file from Open_Ethys recording - channels: number of recording channels recorded from + Set the channel masking. - Returns - ------- - raw_data: - timestamps: + Notes + ----- + If the index exceed the number of channels, it will be ignored. + Parameters + ---------- + channel_id : List[int] + List of channel id that will be ignored. """ - - raw_data: np.ndarray = np.memmap(self.data_path, dtype="int16") - length = raw_data.size // self.channels - raw_data = np.reshape(raw_data, (length, self.channels)) - - timestamps_zeroed = np.array(range(0, length)) / self.sampling_rate - if self.timestamps_npy == "": - timestamps = timestamps_zeroed - else: - timestamps = np.load(self.timestamps_npy) / self.sampling_rate - - # only take first 32 channels - raw_data = raw_data[:, 0 : self.channels] - - # TODO: do we want timestaps a member of the class? - return np.array(timestamps), np.array(raw_data) + self.masking_channel_list = channel_id def save(self, tag: str, format: str): assert tag == "continuous", "You cannot alter raw data, change the data tag" From ccd84ebe1320236141a1c310d11dff5b000dfd44 Mon Sep 17 00:00:00 2001 From: Seung Hyun Kim Date: Sun, 15 May 2022 09:26:51 -0500 Subject: [PATCH 15/15] update: finish channel masking --- miv/io/binary.py | 24 ++++++++++-------------- miv/io/data.py | 23 ++++++++++++++--------- 2 files changed, 24 insertions(+), 23 deletions(-) diff --git a/miv/io/binary.py b/miv/io/binary.py index 1b77eb8d..8e66ade8 100644 --- a/miv/io/binary.py +++ b/miv/io/binary.py @@ -8,7 +8,7 @@ """ __all__ = ["load_continuous_data", "load_recording", "oebin_read", "apply_channel_mask"] -from typing import Any, Dict, Optional, Union, List +from typing import Any, Dict, Optional, Union, List, Set import os import numpy as np @@ -20,29 +20,25 @@ from miv.typing import SignalType, TimestampsType -def apply_channel_mask(signal: np.ndarray, channel_mask: List[int]): +def apply_channel_mask(signal: np.ndarray, channel_mask: Set[int]): """Apply channel mask on the given signal. Parameters ---------- signal : np.ndarray Shape of the signal is expected to be (num_data_point, num_channels). - channel_mask : List[int] + channel_mask : Set[int] Returns ------- output signal : SignalType """ - print("Retrieving channels according to channel_mask... ", end="") - for R, Rec in signal.items(): - if Rec.shape[1] < len(channel_mask) or max(channel_mask) > Rec.shape[1] - 1: - print("") - print("Not enough channels in data to apply channel map. Skipping...") - continue - - signal[R] = signal[R][:, channel_mask] + num_channels = signal.shape[1] + channel_index = set(range(num_channels)) - channel_mask + channel_index = np.array(np.sort(list(channel_index))) + signal = signal[:, channel_index] return signal @@ -67,7 +63,7 @@ def oebin_read(file_path: str): def load_recording( folder: str, - channel_mask: Optional[List[int]] = None, + channel_mask: Optional[Set[int]] = None, unit: Union[str, pq.Quantity] = "uV", ): """ @@ -82,7 +78,7 @@ def load_recording( ---------- folder: str folder containing at least the subfolder 'experiment1'. - channel_mask: List[int], optional + channel_mask: Set[int], optional Channel index list to ignore in import (default=None) unit: str or pq.Quantity Unit to return the data, either 'uV' or 'mV', case insensitive. (Default='uV') @@ -113,7 +109,7 @@ def load_recording( # TODO in the future: check inside the channel_info, # and convert mismatch unit (mV->uV) - signal = neo.core.AnalogSignal(signal.T, unit=unit, sampling_rate=sampling_rate) + signal = neo.core.AnalogSignal(signal, unit=unit, sampling_rate=sampling_rate) return signal, timestamps, sampling_rate diff --git a/miv/io/data.py b/miv/io/data.py index 63af561d..8a1ee9b8 100644 --- a/miv/io/data.py +++ b/miv/io/data.py @@ -86,7 +86,7 @@ def __init__( data_path: str, ): self.data_path = data_path - self.channel_mask = [] + self.masking_channel_set = set() @contextmanager def load(self): @@ -109,7 +109,7 @@ def load(self): # TODO: Not sure this is safe implementation try: signal, timestamps, sampling_rate = load_recording( - self.data_path, self.masking_channel_list + self.data_path, self.masking_channel_set ) yield signal, timestamps, sampling_rate except FileNotFoundError as e: @@ -126,21 +126,26 @@ def load(self): del timestamps del signal - def set_channel_mask(self, channel_id: List[int]): + def set_channel_mask(self, channel_id: Iterable[int]): """ - Set the channel masking. + Parameters + ---------- + channel_id : Iterable[int], list + List of channel id that will be ignored. + Notes ----- If the index exceed the number of channels, it will be ignored. - Parameters - ---------- - channel_id : List[int] - List of channel id that will be ignored. + Examples + -------- + >>> data = Data(data_path) + >>> data.set_channel_mask(range(12,23)) + """ - self.masking_channel_list = channel_id + self.masking_channel_set.update(channel_id) def save(self, tag: str, format: str): assert tag == "continuous", "You cannot alter raw data, change the data tag"