From 737772ffd2745cb36f741f54677acbc2b203b711 Mon Sep 17 00:00:00 2001
From: Seung Hyun Kim <skim449@illinois.edu>
Date: Sat, 7 May 2022 08:42:59 -0500
Subject: [PATCH 01/15] wip: documentation on signal pre-processing

---
 .../miv.signal.filter.FilterCollection.rst    |  7 ----
 docs/api/signal.rst                           | 37 +++----------------
 miv/signal/filter/butter_bandpass_filter.py   |  4 +-
 miv/signal/filter/filter_collection.py        | 21 ++++++++++-
 miv/signal/filter/protocol.py                 |  4 ++
 miv/signal/spike/detection.py                 | 18 ++++++++-
 6 files changed, 50 insertions(+), 41 deletions(-)
 delete mode 100644 docs/api/_toctree/FilterAPI/miv.signal.filter.FilterCollection.rst
diff --git a/docs/api/_toctree/FilterAPI/miv.signal.filter.FilterCollection.rst b/docs/api/_toctree/FilterAPI/miv.signal.filter.FilterCollection.rst
deleted file mode 100644
index b21e210b..00000000
--- a/docs/api/_toctree/FilterAPI/miv.signal.filter.FilterCollection.rst
+++ /dev/null
@@ -1,7 +0,0 @@
-﻿miv.signal.filter.FilterCollection
-==================================
-
-.. currentmodule:: miv.signal.filter
-
-.. autoclass:: FilterCollection
-   :members: append, insert
diff --git a/docs/api/signal.rst b/docs/api/signal.rst
index b8461213..2f1ad546 100644
--- a/docs/api/signal.rst
+++ b/docs/api/signal.rst
@@ -1,34 +1,9 @@
-*************************
-Signal Processing Modules
-*************************
+*********************
+Signal Pre-Processing
+*********************
 
+<Preprocessing text>
 
-Filter
-######
+.. automodule:: miv.signal.filter.filter_collection
 
-.. currentmodule:: miv.signal.filter
-
-.. automodule:: miv.signal.filter
-
-   .. autosummary::
-       :nosignatures:
-       :toctree: _toctree/FilterAPI
-
-       FilterProtocol
-       ButterBandpass
-       FilterCollection
-
-Spike Detection
-###############
-
-.. automodule:: miv.signal.spike
-
-   .. autosummary::
-       :nosignatures:
-       :toctree: _toctree/DetectionAPI
-
-       SpikeDetectionProtocol
-       ThresholdCutoff
-
-Spike Sorting
-#############
+.. automodule:: miv.signal.spike.detection
diff --git a/miv/signal/filter/butter_bandpass_filter.py b/miv/signal/filter/butter_bandpass_filter.py
index 3d882f8f..d0d4a923 100644
--- a/miv/signal/filter/butter_bandpass_filter.py
+++ b/miv/signal/filter/butter_bandpass_filter.py
@@ -32,7 +32,9 @@ class ButterBandpass:
     order: int = 5
     tag: str = ""
 
-    def __call__(self, signal: SignalType, sampling_rate: float) -> SignalType:
+    def __call__(
+        self, signal: SignalType, sampling_rate: float, **kwargs
+    ) -> SignalType:
         b, a = self._butter_bandpass(sampling_rate)
         y = sps.lfilter(b, a, signal)
         return y
diff --git a/miv/signal/filter/filter_collection.py b/miv/signal/filter/filter_collection.py
index 26ea0f7d..35338479 100644
--- a/miv/signal/filter/filter_collection.py
+++ b/miv/signal/filter/filter_collection.py
@@ -1,4 +1,23 @@
-__doc__ = ""
+__doc__ = """
+
+Signal Filter
+#############
+
+<filter doc string>
+
+.. currentmodule:: miv.signal.filter
+
+.. autoclass:: FilterCollection
+   :members: append, insert
+
+.. autosummary::
+   :nosignatures:
+   :toctree: _toctree/FilterAPI
+
+   FilterProtocol
+   ButterBandpass
+
+"""
 __all__ = ["FilterCollection"]
 
 from typing import Union, List
diff --git a/miv/signal/filter/protocol.py b/miv/signal/filter/protocol.py
index ece88bca..9393f508 100644
--- a/miv/signal/filter/protocol.py
+++ b/miv/signal/filter/protocol.py
@@ -24,3 +24,7 @@ def __call__(self, array: SignalType, sampling_rate: float, **kwargs) -> SignalT
         samping_rate : float
         """
         ...
+
+    def __repr__(self) -> str:
+        """String representation for interactive debugging."""
+        ...
diff --git a/miv/signal/spike/detection.py b/miv/signal/spike/detection.py
index 2e9657cc..d179148e 100644
--- a/miv/signal/spike/detection.py
+++ b/miv/signal/spike/detection.py
@@ -1,4 +1,20 @@
-__doc__ = ""
+__doc__ = """
+
+Spike Detection
+###############
+
+<detection doc string>
+
+.. currentmodule:: miv.signal.spike
+
+.. autosummary::
+   :nosignatures:
+   :toctree: _toctree/DetectionAPI
+
+   SpikeDetectionProtocol
+   ThresholdCutoff
+
+"""
 __all__ = ["ThresholdCutoff"]
 
 from typing import Union, List, Iterable

From 633f10f266bd4e4c8f0d544cc5a40f63b4133386 Mon Sep 17 00:00:00 2001
From: Seung Hyun Kim <skim449@illinois.edu>
Date: Sat, 7 May 2022 08:43:30 -0500
Subject: [PATCH 02/15] update: fix link for FilterCollection

---
 docs/guide/signal_processing.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/guide/signal_processing.md b/docs/guide/signal_processing.md
index 25ebe1fb..e61f4a6c 100644
--- a/docs/guide/signal_processing.md
+++ b/docs/guide/signal_processing.md
@@ -104,7 +104,7 @@ from miv.signal.filter import FilterCollection, ButterBandpass
 
 [Here](../api/signal.html#filter) is the list of provided filters.
 All filters are `Callable`, taking `signal` and `sampling_rate` as parameters.
-To define a multiple filters together, we provide [`FilterCollection`](../api/_toctree/FilterAPI/miv.signal.filter.FilterCollection) that execute multiple filters in a series.
+To define a multiple filters together, we provide [`FilterCollection`](miv.signal.filter.FilterCollection) that execute multiple filters in a series.
 
 ```{code-cell} ipython3
 # Butter bandpass filter

From cb20afa77f03b0e2098efd2e5f28263f14646d14 Mon Sep 17 00:00:00 2001
From: Seung Hyun Kim <skim449@illinois.edu>
Date: Sun, 8 May 2022 09:22:13 -0600
Subject: [PATCH 03/15] add: coverage check in make commmand

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 81021c25..e5d5a257 100644
--- a/Makefile
+++ b/Makefile
@@ -7,5 +7,5 @@ mypy:
 coverage:
 	@pytest --cov=miv tests/
 
-all:test mypy
+all:test mypy coverage
 ci: test mypy

From 090490ba7e818024f6f029d3663315497ff24afd Mon Sep 17 00:00:00 2001
From: Seung Hyun Kim <skim449@illinois.edu>
Date: Sun, 8 May 2022 11:15:27 -0600
Subject: [PATCH 04/15] doc: Add data_management guide and rename sections

---
 docs/api/io.rst               |  9 ++--
 docs/api/signal.rst           |  2 -
 docs/guide/data_management.md | 86 +++++++++++++++++++++++++++++++++++
 docs/index.rst                |  1 +
 4 files changed, 93 insertions(+), 5 deletions(-)
 create mode 100644 docs/guide/data_management.md

diff --git a/docs/api/io.rst b/docs/api/io.rst
index da6820c9..c47f8dee 100644
--- a/docs/api/io.rst
+++ b/docs/api/io.rst
@@ -1,6 +1,9 @@
-*********************
-Input / Output Module
-*********************
+********************
+Data Managing Module
+********************
+
+.. automodule:: miv.io.data
+   :members:
 
 .. automodule:: miv.io.binary
    :members:
diff --git a/docs/api/signal.rst b/docs/api/signal.rst
index 2f1ad546..2f096736 100644
--- a/docs/api/signal.rst
+++ b/docs/api/signal.rst
@@ -2,8 +2,6 @@
 Signal Pre-Processing
 *********************
 
-<Preprocessing text>
-
 .. automodule:: miv.signal.filter.filter_collection
 
 .. automodule:: miv.signal.spike.detection
diff --git a/docs/guide/data_management.md b/docs/guide/data_management.md
new file mode 100644
index 00000000..0735c012
--- /dev/null
+++ b/docs/guide/data_management.md
@@ -0,0 +1,86 @@
+---
+jupytext:
+  text_representation:
+    extension: .md
+    format_name: myst
+    format_version: 0.13
+    jupytext_version: 1.13.8
+kernelspec:
+  display_name: Python 3 (ipykernel)
+  language: python
+  name: python3
+---
+
+# Data Management
+
+```{code-cell} ipython3
+:tags: [hide-cell]
+
+import os
+import numpy as np
+import quantities as pq
+import matplotlib.pyplot as plt
+
+```
+
+## 1. Data Load
+
+```{code-cell} ipython3
+:tags: [hide-cell]
+
+from miv.io import load_data
+from miv.io.data import Data, Dataset
+```
+
+```{code-cell} ipython3
+# Load dataset from OpenEphys recording
+folder_path: str = "~/Open Ephys/2022-03-10-16-19-09"  # Data Path
+# Provide the path of experimental recording tree to the DataSet class
+# Data set class will load the data and create a list of objects for each data
+# dataset = load_data(folder_path, device="OpenEphys")
+dataset = Dataset(data_folder_path=folder_path,
+                  device="OpenEphys",
+                  channels=32,
+                  sampling_rate=30E3,
+                  timestamps_npy="", # We can read similar to continuous.dat
+
+                  )
+#TODO: synchornized_timestamp what for shifted ??
+# Masking channels for data set. Channels can be a list.
+# Show user the tree. Implement representation method. filter_collection.html#FilterCollection.insert
+# An example code to get the tree https://github.com/skim0119/mindinvitro/blob/master/utility/common.py
+# Trimming the tree??
+```
+
+### 1.1. Meta Data Structure
+
+```{code-cell} ipython3
+# Get signal and rate(hz)
+record_node: int = dataset.get_nodes[0]
+recording = dataset[record_node]["experiment1"]["recording1"]   # Returns the object for recording 1
+# TODO: does openephys returns the timestamp??
+timestamp = recording.timestamp # returns the time stamp for the recording.
+
+signal, _, rate = recording.continuous["100"]
+# time = recording.continuous["100"].timestamp / rate
+num_channels = signal.shape[1]
+```
+
+### 1.2 Raw Data
+
++++
+
+If the data is provided in single `continuous.dat` instead of meta-data, user must provide number of channels and sampling rate in order to import data accurately.
+
+> **WARNING** The size of the raw datafile can be _large_ depending on sampling rate and the amount of recorded duration. We highly recommand using meta-data structure to handle datafiles, since it only loads the data during the processing and unloads once the processing is done.
+
+```{code-cell} ipython3
+from miv.io import load_continuous_data_file
+
+datapath = 'continuous.dat'
+rate = 30_000
+num_channel = 64
+timestamps, signal = load_continuous_data_file(datapath, num_channel, rate)
+```
+
+## 2. Instant Visualization
diff --git a/docs/index.rst b/docs/index.rst
index 0900f672..faa40ce7 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -30,6 +30,7 @@ You can also download the source code from `GitHub <https://github.com/GazzolaLa
    :maxdepth: 2
    :caption: User Guide
 
+   guide/data_management
    guide/signal_processing
    guide/spike_cutout
    guide/spike_sorting

From e0a201c9ec0e5f6b7f5b6eaa82f2f4bd5db3e624 Mon Sep 17 00:00:00 2001
From: Seung Hyun Kim <skim449@illinois.edu>
Date: Mon, 9 May 2022 05:45:22 -0600
Subject: [PATCH 05/15] wip: doc update

---
 docs/api/io.rst    |  1 -
 miv/io/__init__.py |  1 +
 miv/io/binary.py   |  2 ++
 miv/io/data.py     | 54 ++++++++++++++++++++++++++++++++++++++++------
 4 files changed, 50 insertions(+), 8 deletions(-)

diff --git a/docs/api/io.rst b/docs/api/io.rst
index c47f8dee..4c67339e 100644
--- a/docs/api/io.rst
+++ b/docs/api/io.rst
@@ -3,7 +3,6 @@ Data Managing Module
 ********************
 
 .. automodule:: miv.io.data
-   :members:
 
 .. automodule:: miv.io.binary
    :members:
diff --git a/miv/io/__init__.py b/miv/io/__init__.py
index e49ec5ed..0ec7ac30 100644
--- a/miv/io/__init__.py
+++ b/miv/io/__init__.py
@@ -1 +1,2 @@
+from miv.io.data import *
 from miv.io.binary import *
diff --git a/miv/io/binary.py b/miv/io/binary.py
index ce9cf107..89136381 100644
--- a/miv/io/binary.py
+++ b/miv/io/binary.py
@@ -1,5 +1,7 @@
 __doc__ = """
 
+-------------------------------------
+
 We expect the data structure to follow the default format exported from OpenEphys system: `format <https://open-ephys.atlassian.net/wiki/spaces/OEW/pages/491632/Data+format>`_.
 
 Original Author
diff --git a/miv/io/data.py b/miv/io/data.py
index 5486b2c9..377c3fde 100644
--- a/miv/io/data.py
+++ b/miv/io/data.py
@@ -1,7 +1,27 @@
+__doc__ = """
+
+Data Manager
+############
+
+.. currentmodule:: miv.io.data
+
+.. autoclass:: Data
+   :members:
+
+.. autoclass:: DataManager
+   :members:
+
+"""
+__all__ = ["Data", "DataManager"]
+
+from typing import Any, Optional, Iterable, Callable
+
 from collections.abc import MutableSequence
-from typing import Optional
+
 import os
+import glob
 import numpy as np
+
 from miv.signal.filter import FilterProtocol
 from miv.typing import SignalType
 
@@ -32,13 +52,13 @@ def load(
 
         Parameters
         ----------
-            data_file: continuous.dat file from Open_Ethys recording
-            channels: number of recording channels recorded from
+        data_file: continuous.dat file from Open_Ethys recording
+        channels: number of recording channels recorded from
 
         Returns
         -------
-            raw_data:
-            timestamps:
+        raw_data:
+        timestamps:
 
         """
 
@@ -81,12 +101,12 @@ def save(self, tag: str, format: str):
             )
 
 
-class Dataset(MutableSequence):
+class DataManager(MutableSequence):
     def __init__(
         self,
         data_folder_path: str,
         channels: int,
-        sampling_rate: float = 30000,
+        sampling_rate: float,
         timestamps_npy: Optional[str] = "",
         device="",
     ):
@@ -159,3 +179,23 @@ def __setitem__(self, idx, system):
 
     def __call__(self, *args, **kwargs):
         pass
+
+
+def get_experiments_recordings(data_paths: str) -> Iterable[str]:
+    # fmt: off
+    list_of_experiments_to_process = []
+    for path in data_paths:
+        path_list = [path for path in glob.glob(os.path.join(path, "*", "*", "*")) if "Record Node" in path and "recording" in path and os.path.isdir(path)]
+        list_of_experiments_to_process.extend(path_list)
+    # fmt: on
+    return list_of_experiments_to_process
+
+
+def get_analysis_paths(data_paths: str, output_folder_name: str) -> Iterable[str]:
+    # fmt: off
+    list_of_analysis_paths = []
+    for path in data_paths:
+        path_list = [path for path in glob.glob(os.path.join(path, "*", "*", "*", "*")) if ("Record Node" in path) and ("recording" in path) and (output_folder_name in path) and os.path.isdir(path)]
+        list_of_analysis_paths.extend(path_list)
+    # fmt: on
+    return list_of_analysis_paths

From d655bd74b5d5042130f8fdff80a7ea42b84e4e10 Mon Sep 17 00:00:00 2001
From: Seung Hyun Kim <skim449@illinois.edu>
Date: Mon, 9 May 2022 07:23:58 -0600
Subject: [PATCH 06/15] update: raw load module

---
 miv/io/binary.py | 63 +++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 51 insertions(+), 12 deletions(-)

diff --git a/miv/io/binary.py b/miv/io/binary.py
index 89136381..bf38481b 100644
--- a/miv/io/binary.py
+++ b/miv/io/binary.py
@@ -12,8 +12,10 @@
 - last modified by: skim449
   - date: 2022-04-11
 """
-__all__ = ["Load", "load_data", "load_continuous_data_file"]
+__all__ = ["load_continuous_data"]
 from typing import Optional
+
+import os
 import numpy as np
 from ast import literal_eval
 from glob import glob
@@ -177,30 +179,67 @@ def Load(
     return (Data, Rate)
 
 
-def load_data():
-    raise NotImplementedError
-
-
-def load_continuous_data_file(data_file: str, num_channels: int, sampling_rate: float):
+def load_continuous_data(
+    data_path: str,
+    num_channels: int,
+    sampling_rate: float,
+    timestamps_path: Optional[str] = None,
+    start_at_zero: bool = True,
+):
     """
     Describe function
 
     Parameters
     ----------
-        data_file: continuous.dat file from Open_Ethys recording
-        num_channels: number of recording channels recorded from
+    data_path : str
+        continuous.dat file path from Open_Ethys recording.
+    num_channels : int
+        number of recording channels recorded. Note, this method will not throw an error
+        if you don't provide the correct number of channels.
+    sampling_rate : float
+        data sampling rate.
+    timestamps_path : Optional[str]
+        If None, first check if the file "timestamps.npy" exists on the same directory.
+        If the file doesn't exist, we deduce the timestamps based on the sampling rate
+        and the length of the data.
+    start_at_zero : bool
+        If True, the timestamps is adjusted to start at zero.
+        Note, recorded timestamps might not start at zero for some reason.
 
     Returns
     -------
-        timestamps: TimestampsType
-        raw_data: SignalType
+    timestamps: TimestampsType
+    raw_data: SignalType
+
+    Raises
+    ------
+    FileNotFoundError
+        If data_path is invalid.
+    ValueError
+        If the error message shows the array cannot be reshaped due to shape,
+        make sure the num_channels is set accurately.
 
     """
 
-    raw_data: np.ndarray = np.memmap(data_file, dtype="int16")
+    # Read raw data signal
+    raw_data: np.ndarray = np.memmap(data_path, dtype="int16")
     length = raw_data.size // num_channels
     raw_data = np.reshape(raw_data, (length, num_channels))
 
-    timestamps = np.array(range(0, length)) / sampling_rate
+    # Get timestamps_path
+    if timestamps_path is None:
+        dirname = os.path.dirname(data_path)
+        timestamps_path = os.path.join(dirname, "timestamps.npy")
+
+    # Get timestamps
+    if os.path.exists(timestamps_path):
+        timestamps = np.load(timestamps_path)
+        timestamps /= sampling_rate
+    else:  # If timestamps_path doesn't exist, deduce the stamps
+        timestamps = np.array(range(0, length)) / sampling_rate
+
+    # Adjust timestamps to start from zero
+    if start_at_zero and not np.isclose(timestamps[0], 0.0):
+        timestamps -= timestamps[0]
 
     return timestamps, raw_data

From b5fb966c2def208942f45b9f5f7aa6418ed1b11b Mon Sep 17 00:00:00 2001
From: Seung Hyun Kim <skim449@illinois.edu>
Date: Mon, 9 May 2022 07:32:31 -0600
Subject: [PATCH 07/15] update: raw load module doc

---
 miv/io/binary.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/miv/io/binary.py b/miv/io/binary.py
index bf38481b..1a0f5b14 100644
--- a/miv/io/binary.py
+++ b/miv/io/binary.py
@@ -187,7 +187,8 @@ def load_continuous_data(
     start_at_zero: bool = True,
 ):
     """
-    Describe function
+    Load single continous data file and return timestamps and raw data in numpy array.
+    Typical `data_path` from OpenEphys has a name `continuous.dat`.
 
     Parameters
     ----------
@@ -208,8 +209,8 @@ def load_continuous_data(
 
     Returns
     -------
-    timestamps: TimestampsType
-    raw_data: SignalType
+    timestamps: TimestampsType, numpy array
+    raw_data: SignalType, numpy array
 
     Raises
     ------

From e266b54063235ee9727d941a0cc06561637b19ae Mon Sep 17 00:00:00 2001
From: Seung Hyun Kim <skim449@illinois.edu>
Date: Tue, 10 May 2022 06:19:13 -0600
Subject: [PATCH 08/15] doc: add contribution guideline as page

---
 CONTRIBUTING.md               | 11 ++++++++---
 docs/index.rst                | 11 ++++++++---
 docs/overview/contribution.md |  3 +++
 3 files changed, 19 insertions(+), 6 deletions(-)
 create mode 100644 docs/overview/contribution.md

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index c34b9254..f64d7411 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -4,7 +4,7 @@ Thanks for your interest in contributing MiV-OS project.
 
 The following is a set of guidelines how to contributes. These are mostly guidelines, not rules. Use your best judgment, and feel free to propose changes to this document in a pull request.
 
-#### Table Of Contents
+**Table Of Contents**
 
 [TLTR! I need three-line summary!!](#three-line-summary)
 
@@ -36,10 +36,15 @@ The following is a set of guidelines how to contributes. These are mostly guidel
 ### Installation and packages
 
 First **create the fork repository and clone** to your local machine.
-We provide [requirements.txt](requirements.txt) to include all the dependencies.
+We provide [requirements.txt](https://github.com/GazzolaLab/MiV-OS/blob/main/requirements.txt) to include all the dependencies that is required to develop. You can either install using `pip install -r requirements.txt` or
 ```bash
-$ pip install -r requirements.txt
+$ pip install miv-os[dev]
 ```
+If you are more interested in working for documentation, use
+```bash
+$ pip install miv-os[docs]
+```
+More details are included [here](https://github.com/GazzolaLab/MiV-OS/blob/main/docs/README.md).
 
 ### Pre-Commit
 
diff --git a/docs/index.rst b/docs/index.rst
index faa40ce7..2d2e15b5 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -1,7 +1,5 @@
 .. MiV-OS documentation master file, created by
    sphinx-quickstart on Thu Mar 24 23:35:49 2022.
-   You can adapt this file completely to your liking, but it should at least
-   contain the root `toctree` directive.
 
 MiV-OS documentation!
 =====================
@@ -17,14 +15,21 @@ Installation Instruction
 
 You can also download the source code from `GitHub <https://github.com/GazzolaLab/MiV-OS>`_ directly.
 
+Contribution
+------------
+
+Any contribution to this project is welcome! If you are interested or have any questions, please don't hesitate to contact us.
+If you are interested in contributing to this project, we prepared contribution guideline :ref:`here <overview/contribution:Contributing to MiV-OS>`.
+
 
 .. toctree::
    :maxdepth: 2
-   :caption: Contents:
+   :caption: Overview
 
    overview/about
    overview/dependencies
    overview/references
+   overview/contribution
 
 .. toctree::
    :maxdepth: 2
diff --git a/docs/overview/contribution.md b/docs/overview/contribution.md
new file mode 100644
index 00000000..5b272509
--- /dev/null
+++ b/docs/overview/contribution.md
@@ -0,0 +1,3 @@
+```{include} ../../CONTRIBUTING.md
+:relative-images:
+```

From a062975b5a21f3dce18c7a0aae6c3d69317d01f4 Mon Sep 17 00:00:00 2001
From: Seung Hyun Kim <skim449@illinois.edu>
Date: Wed, 11 May 2022 09:52:41 -0600
Subject: [PATCH 09/15] update: single data module and documentation

---
 miv/io/binary.py |  4 +++
 miv/io/data.py   | 87 ++++++++++++++++++++++++++++++++++++++----------
 2 files changed, 73 insertions(+), 18 deletions(-)

diff --git a/miv/io/binary.py b/miv/io/binary.py
index 1a0f5b14..0cb716dd 100644
--- a/miv/io/binary.py
+++ b/miv/io/binary.py
@@ -11,6 +11,10 @@
   - date: 2019-07-27
 - last modified by: skim449
   - date: 2022-04-11
+
+Raw Data Loader
+###############
+
 """
 __all__ = ["load_continuous_data"]
 from typing import Optional
diff --git a/miv/io/data.py b/miv/io/data.py
index 377c3fde..72b529cb 100644
--- a/miv/io/data.py
+++ b/miv/io/data.py
@@ -1,5 +1,11 @@
 __doc__ = """
 
+.. Note::
+    For simple experiments, you may prefer to use :ref:`api/io:Raw Data Loader`.
+    However, we generally recommend to use ``Data`` or ``DataManager`` for
+    handling data, especially when you want to avoid storing raw signal in
+    the memory space.
+
 Data Manager
 ############
 
@@ -19,33 +25,84 @@
 from collections.abc import MutableSequence
 
 import os
-import glob
+from glob import glob
 import numpy as np
+from contextlib import contextmanager
 
+from miv.io.binary import load_continuous_data
 from miv.signal.filter import FilterProtocol
 from miv.typing import SignalType
 
 
 class Data:
-    """
-    For each continues.dat file, there will be one Data object
+    """Single data unit handler.
+
+    Each data unit that contains single recording. This class provides useful tools,
+    such as masking channel, export data, interface with other packages, etc.
+    If you have multiple recordings you would like to handle at the same time, use
+    `DataManager` instead.
+
+    By default, the following directory structure is expected in ``data_path``::
+
+        recording1                              # <- recording data_path
+        ├── continuous
+        │   └── Rhythm_FPGA-100.0
+        │       ├── continuous.dat
+        │       ├── synchronized_timestamps.npy
+        │       └── timestamps.npy
+        ├── events
+        │   ├── Message_Center-904.0
+        │   │   └── TEXT_group_1
+        │   │       ├── channels.npy
+        │   │       ├── text.npy
+        │   │       └── timestamps.npy
+        │   └── Rhythm_FPGA-100.0
+        │       └── TTL_1
+        │           ├── channel_states.npy
+        │           ├── channels.npy
+        │           ├── full_words.npy
+        │           └── timestamps.npy
+        ├── structure.oebin
+        ├── sync_messages.txt
+        ├── structure.oebin
+        └── analysis                            # <- post-processing result
+            ├── spike_data.npz
+            ├── plot
+            ├── spike
+            └── mea_overlay
+
+
+        Parameters
+        ----------
+        data_path : str
     """
 
     def __init__(
         self,
         data_path: str,
-        channels: int,
-        sampling_rate: float = 30000,
-        timestamps_npy: Optional[str] = "",
     ):
         self.data_path = data_path
-        self.channels = channels
-        self.sampling_rate = sampling_rate
-        self.timestamps_npy = timestamps_npy
 
-    def load(
-        self,
-    ):
+    @contextmanager
+    def load_data(self):
+        """
+        Context manager for loading data instantly.
+
+        Examples
+        --------
+            >>> data = Data(data_path)
+            >>> with data.load() as (timestamps, raw_signal):
+            ...     ...
+
+        """
+        try:
+            pass
+            # yield data
+        finally:
+            pass
+            # del data
+
+    def load(self):
 
         """
         Describe function
@@ -78,12 +135,6 @@ def load(
         # TODO: do we want timestaps a member of the class?
         return np.array(timestamps), np.array(raw_data)
 
-    def unload(
-        self,
-    ):
-        # TODO: remove the data from memory
-        pass
-
     def save(self, tag: str, format: str):
         assert tag == "continuous", "You cannot alter raw data, change the data tag"
         # save_path = os.path.join(self.data_path, tag)

From 79daf4f57463a104f877a552d025a2fa2facf548 Mon Sep 17 00:00:00 2001
From: Seung Hyun Kim <skim449@illinois.edu>
Date: Thu, 12 May 2022 05:19:13 -0600
Subject: [PATCH 10/15] rm doc: dependency page, merged to reference page

---
 docs/index.rst                 | 1 -
 docs/overview/dependencies.rst | 3 ---
 2 files changed, 4 deletions(-)
 delete mode 100644 docs/overview/dependencies.rst

diff --git a/docs/index.rst b/docs/index.rst
index 2d2e15b5..3ebebdc6 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -27,7 +27,6 @@ If you are interested in contributing to this project, we prepared contribution
    :caption: Overview
 
    overview/about
-   overview/dependencies
    overview/references
    overview/contribution
 
diff --git a/docs/overview/dependencies.rst b/docs/overview/dependencies.rst
deleted file mode 100644
index 2dcb9b93..00000000
--- a/docs/overview/dependencies.rst
+++ /dev/null
@@ -1,3 +0,0 @@
-************
-Dependencies
-************

From ffed4bd3a6ed3ca74b6f1920243878196ef7f730 Mon Sep 17 00:00:00 2001
From: Seung Hyun Kim <skim449@illinois.edu>
Date: Fri, 13 May 2022 02:01:41 -0600
Subject: [PATCH 11/15] doc: add references for neo and elephant

---
 docs/overview/references.rst | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/docs/overview/references.rst b/docs/overview/references.rst
index b0d98898..680151c6 100644
--- a/docs/overview/references.rst
+++ b/docs/overview/references.rst
@@ -1,3 +1,15 @@
 **********
 References
 **********
+
+Neural Ensemble
+###############
+
+- Python-Neo [1]_
+- Elephant/Viziphant [2]_
+
+---------------
+
+.. [1] Garcia S., Guarino D., Jaillet F., Jennings T.R., Pröpper R., Rautenberg P.L., Rodgers C., Sobolev A.,Wachtler T., Yger P. and Davison A.P. (2014) Neo: an object model for handling electrophysiology data in multiple formats. Frontiers in Neuroinformatics 8:10: doi:10.3389/fninf.2014.00010
+
+.. [2] Denker M, Yegenoglu A, Grün S (2018) Collaborative HPC-enabled workflows on the HBP Collaboratory using the Elephant framework. Neuroinformatics 2018, P19. doi:10.12751/incf.ni2018.0019

From 111c51550bd57a93787c426c467f682f37ba8c48 Mon Sep 17 00:00:00 2001
From: Seung Hyun Kim <skim449@illinois.edu>
Date: Fri, 13 May 2022 02:35:21 -0600
Subject: [PATCH 12/15] update: add channel masking method

---
 miv/io/binary.py | 43 +++++++++++++++++++++++++++++++++++--------
 1 file changed, 35 insertions(+), 8 deletions(-)

diff --git a/miv/io/binary.py b/miv/io/binary.py
index 0cb716dd..65b278e9 100644
--- a/miv/io/binary.py
+++ b/miv/io/binary.py
@@ -16,31 +16,58 @@
 ###############
 
 """
-__all__ = ["load_continuous_data"]
-from typing import Optional
+__all__ = ["load_continuous_data", "load_recording", "oebin_read", "apply_channel_mask"]
+
+from typing import Any, Dict, Optional, Union, List
 
 import os
 import numpy as np
 from ast import literal_eval
 from glob import glob
+import quantities as pq
+import neo
 
 from miv.typing import SignalType, TimestampsType
 
 
-def ApplyChannelMap(Data, ChannelMap):
-    print("Retrieving channels according to ChannelMap... ", end="")
-    for R, Rec in Data.items():
-        if Rec.shape[1] < len(ChannelMap) or max(ChannelMap) > Rec.shape[1] - 1:
+def apply_channel_mask(signal: np.ndarray, channel_mask: List[int]):
+    """Apply channel mask on the given signal.
+
+    Parameters
+    ----------
+    signal : np.ndarray
+        Shape of the signal is expected to be (num_data_point, num_channels).
+    channel_mask : List[int]
+
+    Returns
+    -------
+    output signal : SignalType
+
+    """
+    print("Retrieving channels according to channel_mask... ", end="")
+    for R, Rec in signal.items():
+        if Rec.shape[1] < len(channel_mask) or max(channel_mask) > Rec.shape[1] - 1:
             print("")
             print("Not enough channels in data to apply channel map. Skipping...")
             continue
 
-        Data[R] = Data[R][:, ChannelMap]
+        signal[R] = signal[R][:, channel_mask]
 
     return Data
 
 
-def BitsToVolts(Data, ChInfo, Unit):
+    signal, timestamps = load_continuous_data(file_path, num_channels, sampling_rate)
+    if channel_mask is not None:
+        signal = apply_channel_mask(signal, channel_mask)
+
+    # TODO in the future: check inside the channel_info,
+    #       and convert mismatch unit (mV->uV)
+
+    signal = neo.core.AnalogSignal(signal.T, unit=unit, sampling_rate=sampling_rate)
+    return signal, timestamps, sampling_rate
+
+
+def _bitsToVolts(Data, ChInfo, Unit):  # TODO: need refactor
     print("Converting to uV... ", end="")
     Data = {R: Rec.astype("float32") for R, Rec in Data.items()}
 

From 99d3f85964d03c7cc410c878a448659be27fba5f Mon Sep 17 00:00:00 2001
From: Seung Hyun Kim <skim449@illinois.edu>
Date: Sat, 14 May 2022 07:27:37 -0600
Subject: [PATCH 13/15] update: binary oe readout module

---
 miv/io/binary.py | 198 ++++++++++++++++++++++++++++++-----------------
 1 file changed, 127 insertions(+), 71 deletions(-)

diff --git a/miv/io/binary.py b/miv/io/binary.py
index 65b278e9..1b77eb8d 100644
--- a/miv/io/binary.py
+++ b/miv/io/binary.py
@@ -2,16 +2,6 @@
 
 -------------------------------------
 
-We expect the data structure to follow the default format exported from OpenEphys system: `format <https://open-ephys.atlassian.net/wiki/spaces/OEW/pages/491632/Data+format>`_.
-
-Original Author
-
-- open-ephys/analysis-tools/Python3/Binary.py (commit: 871e003)
-- original author: malfatti
-  - date: 2019-07-27
-- last modified by: skim449
-  - date: 2022-04-11
-
 Raw Data Loader
 ###############
 
@@ -53,8 +43,68 @@ def apply_channel_mask(signal: np.ndarray, channel_mask: List[int]):
 
         signal[R] = signal[R][:, channel_mask]
 
-    return Data
+    return signal
+
+
+def oebin_read(file_path: str):
+    """
+    Oebin file reader in dictionary form
+
+    Parameters
+    ----------
+    file_path : str
 
+    Returns
+    -------
+    info : Dict[str, any]
+        recording information stored in oebin file.
+    """
+    # TODO: may need fix for multiple continuous data.
+    # TODO: may need to include processor name/id
+    info = literal_eval(open(file_path).read())
+    return info
+
+
+def load_recording(
+    folder: str,
+    channel_mask: Optional[List[int]] = None,
+    unit: Union[str, pq.Quantity] = "uV",
+):
+    """
+    Loads data recorded by Open Ephys in Binary format as numpy memmap.
+    The path should contain
+
+    - continuous/<processor name>/continuous.dat: signal (cannot have multiple file)
+    - continuous/<processor name>/timestamps.dat: timestamps
+    - structure.oebin: number of channels and sampling rate.
+
+    Parameters
+    ----------
+    folder: str
+        folder containing at least the subfolder 'experiment1'.
+    channel_mask: List[int], optional
+        Channel index list to ignore in import (default=None)
+    unit: str or pq.Quantity
+        Unit to return the data, either 'uV' or 'mV', case insensitive. (Default='uV')
+
+    Returns
+    -------
+    signal : SignalType, neo.core.AnalogSignal
+    sampling_rate : float
+
+    """
+
+    file_path: str = glob(os.path.join(folder, "**", "*.dat", recursive=True))
+    assert (
+        len(file_path) == 1
+    ), f"There should be only one 'continuous.dat' file. (There exists {file_path}"
+
+    # load structure information dictionary
+    info_file: str = os.path.join(folder, "structure.oebin")
+    info: Dict[str, Any] = oebin_read(info_file)
+    num_channels: int = info["continuous"][0]["num_channels"]
+    sampling_rate: float = info["continuous"][0]["sample_rate"]
+    # channel_info: Dict[str, Any] = info["continuous"][0]["channels"]
 
     signal, timestamps = load_continuous_data(file_path, num_channels, sampling_rate)
     if channel_mask is not None:
@@ -85,70 +135,76 @@ def _bitsToVolts(Data, ChInfo, Unit):  # TODO: need refactor
     return Data
 
 
-def Load(
-    Folder, Processor=None, Experiment=None, Recording=None, Unit="uV", ChannelMap=[]
+def _load(  # TODO: Need refactor
+    folder, processor=None, experiment=None, recording=None, unit="uV", channel_map=[]
 ):
     """
     Loads data recorded by Open Ephys in Binary format as numpy memmap.
 
-        Load(Folder, Processor=None, Experiment=None, Recording=None, Unit='uV', ChannelMap=[])
+    Here is example usage::
 
-    Parameters
-    ----------
-        Folder: str
-            Folder containing at least the subfolder 'experiment1'.
-
-        Processor: str or None, optional
-            Processor number to load, according to subsubsubfolders under
-            Folder>experimentX/recordingY/continuous . The number used is the one
-            after the processor name. For example, to load data from the folder
-            'Channel_Map-109_100.0' the value used should be '109'.
-            If not set, load all processors.
-
-        Experiment: int or None, optional
-            Experiment number to load, according to subfolders under Folder.
-            If not set, load all experiments.
-
-        Recording: int or None, optional
-            Recording number to load, according to subsubfolders under Folder>experimentX .
-            If not set, load all recordings.
-
-        Unit: str or None, optional
-            Unit to return the data, either 'uV' or 'mV' (case insensitive). In
-            both cases, return data in float32. Defaults to 'uV'.
-            If anything else, return data in int16.
-
-        ChannelMap: list, optional
-            If empty (default), load all channels.
-            If not empty, return only channels in ChannelMap, in the provided order.
-            CHANNELS ARE COUNTED STARTING AT 0.
+        from miv.io.Binary import load
 
-    Returns
-    -------
-        Data: dict
-            Dictionary with data in the structure Data[Processor][Experiment][Recording].
+        folder = '/home/user/<PathToData>/2019-07-27_00-00-00'
+        Data, Rate = load(folder)
+
+        channel_map = [0,15,1,14]
+        recording = 3
+        Data2, Rate2 = load(folder, recording=recording, channel_map=channel_map, unit='Bits')
 
-        Rate: dict
-            Dictionary with sampling rates in the structure Rate[Processor][Experiment].
+    Original Author:
 
+    - open-ephys/analysis-tools/Python3/Binary.py (commit: 871e003)
+    - original author: malfatti
+        - date: 2019-07-27
+    - last modified by: skim449
+        - date: 2022-04-11
 
-    Example
+    Parameters
+    ----------
+    folder: str
+        folder containing at least the subfolder 'experiment1'.
+
+    processor: str or None, optional
+        Processor number to load, according to subsubsubfolders under
+        folder>experimentX/recordingY/continuous . The number used is the one
+        after the processor name. For example, to load data from the folder
+        'Channel_Map-109_100.0' the value used should be '109'.
+        If not set, load all processors.
+
+    experiment: int or None, optional
+        Experiment number to load, according to subfolders under folder.
+        If not set, load all experiments.
+
+    recording: int or None, optional
+        Recording number to load, according to subsubfolders under folder>experimentX .
+        If not set, load all recordings.
+
+    unit: str or None, optional
+        Unit to return the data, either 'uV' or 'mV' (case insensitive). In
+        both cases, return data in float32. Defaults to 'uV'.
+        If anything else, return data in int16.
+
+    channel_map: list, optional
+        If empty (default), load all channels.
+        If not empty, return only channels in channel_map, in the provided order.
+        CHANNELS ARE COUNTED STARTING AT 0.
+
+    Returns
     -------
-        import Binary
+    Data: dict
+        Dictionary with data in the structure Data[processor][experiment][recording].
+    Rate: dict
+        Dictionary with sampling rates in the structure Rate[processor][experiment].
 
-        Folder = '/home/user/PathToData/2019-07-27_00-00-00'
-        Data, Rate = Binary.Load(Folder)
 
-        ChannelMap = [0,15,1,14]
-        Recording = 3
-        Data2, Rate2 = Binary.Load(Folder, Recording=Recording, ChannelMap=ChannelMap, Unit='Bits')
     """
 
-    Files = sorted(glob(Folder + "/**/*.dat", recursive=True))
-    InfoFiles = sorted(glob(Folder + "/*/*/structure.oebin"))
+    files = sorted(glob(folder + "/**/*.dat", recursive=True))
+    info_file = sorted(glob(folder + "/*/*/structure.oebin"))
 
     Data, Rate = {}, {}
-    for F, File in enumerate(Files):
+    for F, File in enumerate(files):
         File = File.replace("\\", "/")  # Replace windows file delims
         Exp, Rec, _, Proc = File.split("/")[-5:-1]
         Exp = str(int(Exp[10:]) - 1)
@@ -160,16 +216,16 @@ def Load(
         if Proc not in Data.keys():
             Data[Proc], Rate[Proc] = {}, {}
 
-        if Experiment:
-            if int(Exp) != Experiment - 1:
+        if experiment:
+            if int(Exp) != experiment - 1:
                 continue
 
-        if Recording:
-            if int(Rec) != Recording - 1:
+        if recording:
+            if int(Rec) != recording - 1:
                 continue
 
-        if Processor:
-            if Proc != Processor:
+        if processor:
+            if Proc != processor:
                 continue
 
         print("Loading recording", int(Rec) + 1, "...")
@@ -177,7 +233,7 @@ def Load(
             Data[Proc][Exp] = {}
         Data[Proc][Exp][Rec] = np.memmap(File, dtype="int16", mode="c")
 
-        Info = literal_eval(open(InfoFiles[F]).read())
+        Info = literal_eval(open(info_file[F]).read())
         ProcIndex = [
             Info["continuous"].index(_)
             for _ in Info["continuous"]
@@ -198,16 +254,16 @@ def Load(
 
     for Proc in Data.keys():
         for Exp in Data[Proc].keys():
-            if Unit.lower() in ["uv", "mv"]:
+            if unit.lower() in ["uv", "mv"]:
                 ChInfo = Info["continuous"][ProcIndex]["channels"]
-                Data[Proc][Exp] = BitsToVolts(Data[Proc][Exp], ChInfo, Unit)
+                Data[Proc][Exp] = _bitsToVolts(Data[Proc][Exp], ChInfo, unit)
 
-            if ChannelMap:
-                Data[Proc][Exp] = ApplyChannelMap(Data[Proc][Exp], ChannelMap)
+            if channel_map:
+                Data[Proc][Exp] = apply_channel_mask(Data[Proc][Exp], channel_map)
 
     print("Done.")
 
-    return (Data, Rate)
+    return Data, Rate
 
 
 def load_continuous_data(

From 42ffe75a9cd140407350e4a252cfbe52ab3717c8 Mon Sep 17 00:00:00 2001
From: Seung Hyun Kim <skim449@illinois.edu>
Date: Sat, 14 May 2022 07:53:33 -0600
Subject: [PATCH 14/15] update: single-data module add masking

---
 miv/io/data.py | 87 +++++++++++++++++++++++++++-----------------------
 1 file changed, 47 insertions(+), 40 deletions(-)

diff --git a/miv/io/data.py b/miv/io/data.py
index 72b529cb..63af561d 100644
--- a/miv/io/data.py
+++ b/miv/io/data.py
@@ -1,13 +1,17 @@
 __doc__ = """
 
+.. Note::
+    We expect the data structure to follow the default format
+    exported from OpenEphys system:
+    `format <https://open-ephys.atlassian.net/wiki/spaces/OEW/pages/491632/Data+format>`_.
+
 .. Note::
     For simple experiments, you may prefer to use :ref:`api/io:Raw Data Loader`.
     However, we generally recommend to use ``Data`` or ``DataManager`` for
-    handling data, especially when you want to avoid storing raw signal in
-    the memory space.
+    handling data, especially when the size of the raw data is large.
 
-Data Manager
-############
+Module
+######
 
 .. currentmodule:: miv.io.data
 
@@ -20,16 +24,17 @@
 """
 __all__ = ["Data", "DataManager"]
 
-from typing import Any, Optional, Iterable, Callable
+from typing import Any, Optional, Iterable, Callable, List
 
 from collections.abc import MutableSequence
+import logging
 
 import os
 from glob import glob
 import numpy as np
 from contextlib import contextmanager
 
-from miv.io.binary import load_continuous_data
+from miv.io.binary import load_continuous_data, load_recording
 from miv.signal.filter import FilterProtocol
 from miv.typing import SignalType
 
@@ -42,7 +47,7 @@ class Data:
     If you have multiple recordings you would like to handle at the same time, use
     `DataManager` instead.
 
-    By default, the following directory structure is expected in ``data_path``::
+    By default recording setup, the following directory structure is expected in ``data_path``::
 
         recording1                              # <- recording data_path
         ├── continuous
@@ -62,7 +67,6 @@ class Data:
         │           ├── channels.npy
         │           ├── full_words.npy
         │           └── timestamps.npy
-        ├── structure.oebin
         ├── sync_messages.txt
         ├── structure.oebin
         └── analysis                            # <- post-processing result
@@ -82,9 +86,10 @@ def __init__(
         data_path: str,
     ):
         self.data_path = data_path
+        self.channel_mask = []
 
     @contextmanager
-    def load_data(self):
+    def load(self):
         """
         Context manager for loading data instantly.
 
@@ -94,46 +99,48 @@ def load_data(self):
             >>> with data.load() as (timestamps, raw_signal):
             ...     ...
 
+        Returns
+        -------
+        signal : SignalType, neo.core.AnalogSignal
+        timestamps : TimestampsType, numpy array
+        sampling_rate : float
+
         """
+        # TODO: Not sure this is safe implementation
         try:
-            pass
-            # yield data
+            signal, timestamps, sampling_rate = load_recording(
+                self.data_path, self.masking_channel_list
+            )
+            yield signal, timestamps, sampling_rate
+        except FileNotFoundError as e:
+            logging.error(
+                f"The file could not be loaded because the file {self.data_path} does not exist."
+            )
+            logging.error(e.strerror)
+        except ValueError as e:
+            logging.error(
+                "The data size does not match the number of channel. Check if oebin or continuous.dat file is corrupted."
+            )
+            logging.error(e.strerror)
         finally:
-            pass
-            # del data
-
-    def load(self):
+            del timestamps
+            del signal
 
+    def set_channel_mask(self, channel_id: List[int]):
         """
-        Describe function
 
-        Parameters
-        ----------
-        data_file: continuous.dat file from Open_Ethys recording
-        channels: number of recording channels recorded from
+        Set the channel masking.
 
-        Returns
-        -------
-        raw_data:
-        timestamps:
+        Notes
+        -----
+        If the index exceed the number of channels, it will be ignored.
 
+        Parameters
+        ----------
+        channel_id : List[int]
+            List of channel id that will be ignored.
         """
-
-        raw_data: np.ndarray = np.memmap(self.data_path, dtype="int16")
-        length = raw_data.size // self.channels
-        raw_data = np.reshape(raw_data, (length, self.channels))
-
-        timestamps_zeroed = np.array(range(0, length)) / self.sampling_rate
-        if self.timestamps_npy == "":
-            timestamps = timestamps_zeroed
-        else:
-            timestamps = np.load(self.timestamps_npy) / self.sampling_rate
-
-        # only take first 32 channels
-        raw_data = raw_data[:, 0 : self.channels]
-
-        # TODO: do we want timestaps a member of the class?
-        return np.array(timestamps), np.array(raw_data)
+        self.masking_channel_list = channel_id
 
     def save(self, tag: str, format: str):
         assert tag == "continuous", "You cannot alter raw data, change the data tag"

From ccd84ebe1320236141a1c310d11dff5b000dfd44 Mon Sep 17 00:00:00 2001
From: Seung Hyun Kim <skim449@illinois.edu>
Date: Sun, 15 May 2022 09:26:51 -0500
Subject: [PATCH 15/15] update: finish channel masking

---
 miv/io/binary.py | 24 ++++++++++--------------
 miv/io/data.py   | 23 ++++++++++++++---------
 2 files changed, 24 insertions(+), 23 deletions(-)

diff --git a/miv/io/binary.py b/miv/io/binary.py
index 1b77eb8d..8e66ade8 100644
--- a/miv/io/binary.py
+++ b/miv/io/binary.py
@@ -8,7 +8,7 @@
 """
 __all__ = ["load_continuous_data", "load_recording", "oebin_read", "apply_channel_mask"]
 
-from typing import Any, Dict, Optional, Union, List
+from typing import Any, Dict, Optional, Union, List, Set
 
 import os
 import numpy as np
@@ -20,29 +20,25 @@
 from miv.typing import SignalType, TimestampsType
 
 
-def apply_channel_mask(signal: np.ndarray, channel_mask: List[int]):
+def apply_channel_mask(signal: np.ndarray, channel_mask: Set[int]):
     """Apply channel mask on the given signal.
 
     Parameters
     ----------
     signal : np.ndarray
         Shape of the signal is expected to be (num_data_point, num_channels).
-    channel_mask : List[int]
+    channel_mask : Set[int]
 
     Returns
     -------
     output signal : SignalType
 
     """
-    print("Retrieving channels according to channel_mask... ", end="")
-    for R, Rec in signal.items():
-        if Rec.shape[1] < len(channel_mask) or max(channel_mask) > Rec.shape[1] - 1:
-            print("")
-            print("Not enough channels in data to apply channel map. Skipping...")
-            continue
-
-        signal[R] = signal[R][:, channel_mask]
 
+    num_channels = signal.shape[1]
+    channel_index = set(range(num_channels)) - channel_mask
+    channel_index = np.array(np.sort(list(channel_index)))
+    signal = signal[:, channel_index]
     return signal
 
 
@@ -67,7 +63,7 @@ def oebin_read(file_path: str):
 
 def load_recording(
     folder: str,
-    channel_mask: Optional[List[int]] = None,
+    channel_mask: Optional[Set[int]] = None,
     unit: Union[str, pq.Quantity] = "uV",
 ):
     """
@@ -82,7 +78,7 @@ def load_recording(
     ----------
     folder: str
         folder containing at least the subfolder 'experiment1'.
-    channel_mask: List[int], optional
+    channel_mask: Set[int], optional
         Channel index list to ignore in import (default=None)
     unit: str or pq.Quantity
         Unit to return the data, either 'uV' or 'mV', case insensitive. (Default='uV')
@@ -113,7 +109,7 @@ def load_recording(
     # TODO in the future: check inside the channel_info,
     #       and convert mismatch unit (mV->uV)
 
-    signal = neo.core.AnalogSignal(signal.T, unit=unit, sampling_rate=sampling_rate)
+    signal = neo.core.AnalogSignal(signal, unit=unit, sampling_rate=sampling_rate)
     return signal, timestamps, sampling_rate
 
 
diff --git a/miv/io/data.py b/miv/io/data.py
index 63af561d..8a1ee9b8 100644
--- a/miv/io/data.py
+++ b/miv/io/data.py
@@ -86,7 +86,7 @@ def __init__(
         data_path: str,
     ):
         self.data_path = data_path
-        self.channel_mask = []
+        self.masking_channel_set = set()
 
     @contextmanager
     def load(self):
@@ -109,7 +109,7 @@ def load(self):
         # TODO: Not sure this is safe implementation
         try:
             signal, timestamps, sampling_rate = load_recording(
-                self.data_path, self.masking_channel_list
+                self.data_path, self.masking_channel_set
             )
             yield signal, timestamps, sampling_rate
         except FileNotFoundError as e:
@@ -126,21 +126,26 @@ def load(self):
             del timestamps
             del signal
 
-    def set_channel_mask(self, channel_id: List[int]):
+    def set_channel_mask(self, channel_id: Iterable[int]):
         """
-
         Set the channel masking.
 
+        Parameters
+        ----------
+        channel_id : Iterable[int], list
+            List of channel id that will be ignored.
+
         Notes
         -----
         If the index exceed the number of channels, it will be ignored.
 
-        Parameters
-        ----------
-        channel_id : List[int]
-            List of channel id that will be ignored.
+        Examples
+        --------
+        >>> data = Data(data_path)
+        >>> data.set_channel_mask(range(12,23))
+
         """
-        self.masking_channel_list = channel_id
+        self.masking_channel_set.update(channel_id)
 
     def save(self, tag: str, format: str):
         assert tag == "continuous", "You cannot alter raw data, change the data tag"