GazzolaLab · skim0119 · May 16, 2022 · May 15, 2022 · May 15, 2022 · May 15, 2022
diff --git a/docs/guide/data_management.md b/docs/guide/data_management.md
@@ -13,25 +13,33 @@ kernelspec:
 
 # Data Management
 
+- Data
+- DataManager
+- load_continuous_data (raw)
+
 ```{code-cell} ipython3
 :tags: [hide-cell]
 
 import os
 import numpy as np
 import quantities as pq
 import matplotlib.pyplot as plt
-
+from glob import glob
+from miv.io import *
 ```
 
-## 1. Data Load
-
 ```{code-cell} ipython3
-:tags: [hide-cell]
+datapath = './2022-03-10_16-19-09'
+os.path.exists(datapath)
+```
 
-from miv.io import load_data
-from miv.io.data import Data, Dataset
+```{code-cell} ipython3
+filepath = './2022-03-10_16-19-09/Record Node 104/experiment1/recording1/continuous/Rhythm_FPGA-100.0/continuous.dat'
+os.path.exists(filepath)
 ```
 
+## 1. Data Load
+
 ```{code-cell} ipython3
 # Load dataset from OpenEphys recording
 folder_path: str = "~/Open Ephys/2022-03-10-16-19-09"  # Data Path

diff --git a/docs/overview/references.rst b/docs/overview/references.rst
@@ -8,8 +8,15 @@ Neural Ensemble
 - Python-Neo [1]_
 - Elephant/Viziphant [2]_
 
+Algorithm
+#########
+
+- PyWavelets [3]_
+
 ---------------
 
 .. [1] Garcia S., Guarino D., Jaillet F., Jennings T.R., Pröpper R., Rautenberg P.L., Rodgers C., Sobolev A.,Wachtler T., Yger P. and Davison A.P. (2014) Neo: an object model for handling electrophysiology data in multiple formats. Frontiers in Neuroinformatics 8:10: doi:10.3389/fninf.2014.00010
 
 .. [2] Denker M, Yegenoglu A, Grün S (2018) Collaborative HPC-enabled workflows on the HBP Collaboratory using the Elephant framework. Neuroinformatics 2018, P19. doi:10.12751/incf.ni2018.0019
+
+.. [3] Gregory R. Lee, Ralf Gommers, Filip Wasilewski, Kai Wohlfahrt, Aaron O’Leary (2019). PyWavelets: A Python package for wavelet analysis. Journal of Open Source Software, 4(36), 1237, https://doi.org/10.21105/joss.01237.
diff --git a/miv/io/binary.py b/miv/io/binary.py
@@ -33,11 +33,18 @@ def apply_channel_mask(signal: np.ndarray, channel_mask: Set[int]):
     -------
     output signal : SignalType
 
+    Raises
+    ------
+    IndexError
+        Typically raise index error when the dimension of the signal is less than 2.
+    AttributeError
+        If signal is non numpy array type.
+
     """
 
     num_channels = signal.shape[1]
-    channel_index = set(range(num_channels)) - channel_mask
-    channel_index = np.array(np.sort(list(channel_index)))
+    channel_index_set = set(range(num_channels)) - channel_mask
+    channel_index = np.array(np.sort(list(channel_index_set)))
     signal = signal[:, channel_index]
     return signal
 
@@ -88,12 +95,17 @@ def load_recording(
     signal : SignalType, neo.core.AnalogSignal
     sampling_rate : float
 
+    Raises
+    ------
+    AssertionError
+        If more than one "continuous.dat" file exist in the directory.
+
     """
 
-    file_path: str = glob(os.path.join(folder, "**", "*.dat", recursive=True))
+    file_path: List[str] = glob(os.path.join(folder, "**", "*.dat"), recursive=True)
     assert (
         len(file_path) == 1
-    ), f"There should be only one 'continuous.dat' file. (There exists {file_path}"
+    ), f"There should be only one 'continuous.dat' file. (There exists {file_path})"
 
     # load structure information dictionary
     info_file: str = os.path.join(folder, "structure.oebin")
@@ -102,7 +114,8 @@ def load_recording(
     sampling_rate: float = info["continuous"][0]["sample_rate"]
     # channel_info: Dict[str, Any] = info["continuous"][0]["channels"]
 
-    signal, timestamps = load_continuous_data(file_path, num_channels, sampling_rate)
+    # TODO: maybe need to support multiple continuous.dat files
+    signal, timestamps = load_continuous_data(file_path[0], num_channels, sampling_rate)
     if channel_mask is not None:
         signal = apply_channel_mask(signal, channel_mask)
 
@@ -113,155 +126,6 @@ def load_recording(
     return signal, timestamps, sampling_rate
 
 
-def _bitsToVolts(Data, ChInfo, Unit):  # TODO: need refactor
-    print("Converting to uV... ", end="")
-    Data = {R: Rec.astype("float32") for R, Rec in Data.items()}
-
-    if Unit.lower() == "uv":
-        U = 1
-    elif Unit.lower() == "mv":
-        U = 10 ** -3
-
-    for R in Data.keys():
-        for C in range(len(ChInfo)):
-            Data[R][:, C] = Data[R][:, C] * ChInfo[C]["bit_volts"] * U
-            if "ADC" in ChInfo[C]["channel_name"]:
-                Data[R][:, C] *= 10 ** 6
-
-    return Data
-
-
-def _load(  # TODO: Need refactor
-    folder, processor=None, experiment=None, recording=None, unit="uV", channel_map=[]
-):
-    """
-    Loads data recorded by Open Ephys in Binary format as numpy memmap.
-
-    Here is example usage::
-
-        from miv.io.Binary import load
-
-        folder = '/home/user/<PathToData>/2019-07-27_00-00-00'
-        Data, Rate = load(folder)
-
-        channel_map = [0,15,1,14]
-        recording = 3
-        Data2, Rate2 = load(folder, recording=recording, channel_map=channel_map, unit='Bits')
-
-    Original Author:
-
-    - open-ephys/analysis-tools/Python3/Binary.py (commit: 871e003)
-    - original author: malfatti
-        - date: 2019-07-27
-    - last modified by: skim449
-        - date: 2022-04-11
-
-    Parameters
-    ----------
-    folder: str
-        folder containing at least the subfolder 'experiment1'.
-
-    processor: str or None, optional
-        Processor number to load, according to subsubsubfolders under
-        folder>experimentX/recordingY/continuous . The number used is the one
-        after the processor name. For example, to load data from the folder
-        'Channel_Map-109_100.0' the value used should be '109'.
-        If not set, load all processors.
-
-    experiment: int or None, optional
-        Experiment number to load, according to subfolders under folder.
-        If not set, load all experiments.
-
-    recording: int or None, optional
-        Recording number to load, according to subsubfolders under folder>experimentX .
-        If not set, load all recordings.
-
-    unit: str or None, optional
-        Unit to return the data, either 'uV' or 'mV' (case insensitive). In
-        both cases, return data in float32. Defaults to 'uV'.
-        If anything else, return data in int16.
-
-    channel_map: list, optional
-        If empty (default), load all channels.
-        If not empty, return only channels in channel_map, in the provided order.
-        CHANNELS ARE COUNTED STARTING AT 0.
-
-    Returns
-    -------
-    Data: dict
-        Dictionary with data in the structure Data[processor][experiment][recording].
-    Rate: dict
-        Dictionary with sampling rates in the structure Rate[processor][experiment].
-
-
-    """
-
-    files = sorted(glob(folder + "/**/*.dat", recursive=True))
-    info_file = sorted(glob(folder + "/*/*/structure.oebin"))
-
-    Data, Rate = {}, {}
-    for F, File in enumerate(files):
-        File = File.replace("\\", "/")  # Replace windows file delims
-        Exp, Rec, _, Proc = File.split("/")[-5:-1]
-        Exp = str(int(Exp[10:]) - 1)
-        Rec = str(int(Rec[9:]) - 1)
-        Proc = Proc.split(".")[0].split("-")[-1]
-        if "_" in Proc:
-            Proc = Proc.split("_")[0]
-
-        if Proc not in Data.keys():
-            Data[Proc], Rate[Proc] = {}, {}
-
-        if experiment:
-            if int(Exp) != experiment - 1:
-                continue
-
-        if recording:
-            if int(Rec) != recording - 1:
-                continue
-
-        if processor:
-            if Proc != processor:
-                continue
-
-        print("Loading recording", int(Rec) + 1, "...")
-        if Exp not in Data[Proc]:
-            Data[Proc][Exp] = {}
-        Data[Proc][Exp][Rec] = np.memmap(File, dtype="int16", mode="c")
-
-        Info = literal_eval(open(info_file[F]).read())
-        ProcIndex = [
-            Info["continuous"].index(_)
-            for _ in Info["continuous"]
-            if str(_["source_processor_id"]) == Proc
-        ][
-            0
-        ]  # Changed to source_processor_id from recorded_processor_id
-
-        ChNo = Info["continuous"][ProcIndex]["num_channels"]
-        if Data[Proc][Exp][Rec].shape[0] % ChNo:
-            print("Rec", Rec, "is broken")
-            del Data[Proc][Exp][Rec]
-            continue
-
-        SamplesPerCh = Data[Proc][Exp][Rec].shape[0] // ChNo
-        Data[Proc][Exp][Rec] = Data[Proc][Exp][Rec].reshape((SamplesPerCh, ChNo))
-        Rate[Proc][Exp] = Info["continuous"][ProcIndex]["sample_rate"]
-
-    for Proc in Data.keys():
-        for Exp in Data[Proc].keys():
-            if unit.lower() in ["uv", "mv"]:
-                ChInfo = Info["continuous"][ProcIndex]["channels"]
-                Data[Proc][Exp] = _bitsToVolts(Data[Proc][Exp], ChInfo, unit)
-
-            if channel_map:
-                Data[Proc][Exp] = apply_channel_mask(Data[Proc][Exp], channel_map)
-
-    print("Done.")
-
-    return Data, Rate
-
-
 def load_continuous_data(
     data_path: str,
     num_channels: int,
@@ -326,4 +190,4 @@ def load_continuous_data(
     if start_at_zero and not np.isclose(timestamps[0], 0.0):
         timestamps -= timestamps[0]
 
-    return timestamps, raw_data
+    return timestamps, np.array(raw_data)
diff --git a/miv/io/data.py b/miv/io/data.py
@@ -24,7 +24,7 @@
 """
 __all__ = ["Data", "DataManager"]
 
-from typing import Any, Optional, Iterable, Callable, List
+from typing import Any, Optional, Iterable, Callable, List, Set
 
 from collections.abc import MutableSequence
 import logging
@@ -85,8 +85,8 @@ def __init__(
         self,
         data_path: str,
     ):
-        self.data_path = data_path
-        self.masking_channel_set = set()
+        self.data_path: str = data_path
+        self.masking_channel_set: Set[int] = set()
 
     @contextmanager
     def load(self):
@@ -248,7 +248,7 @@ def get_experiments_recordings(data_paths: str) -> Iterable[str]:
     # fmt: off
     list_of_experiments_to_process = []
     for path in data_paths:
-        path_list = [path for path in glob.glob(os.path.join(path, "*", "*", "*")) if "Record Node" in path and "recording" in path and os.path.isdir(path)]
+        path_list = [path for path in glob(os.path.join(path, "*", "*", "*")) if "Record Node" in path and "recording" in path and os.path.isdir(path)]
         list_of_experiments_to_process.extend(path_list)
     # fmt: on
     return list_of_experiments_to_process
@@ -258,7 +258,7 @@ def get_analysis_paths(data_paths: str, output_folder_name: str) -> Iterable[str
     # fmt: off
     list_of_analysis_paths = []
     for path in data_paths:
-        path_list = [path for path in glob.glob(os.path.join(path, "*", "*", "*", "*")) if ("Record Node" in path) and ("recording" in path) and (output_folder_name in path) and os.path.isdir(path)]
+        path_list = [path for path in glob(os.path.join(path, "*", "*", "*", "*")) if ("Record Node" in path) and ("recording" in path) and (output_folder_name in path) and os.path.isdir(path)]
         list_of_analysis_paths.extend(path_list)
     # fmt: on
     return list_of_analysis_paths
diff --git a/requirements.txt b/requirements.txt
@@ -8,6 +8,7 @@ numpy>=1.19.2
 omegaconf
 pandas
 Pillow
+PyWavelets
 quantities
 scikit-learn
 scipy>=1.5.2

diff --git a/tests/io/__init__.py b/tests/io/__init__.py
diff --git a/tests/io/mock_continuous_signal.py b/tests/io/mock_continuous_signal.py