Skip to content

Commit

Permalink
one_offline prototype
Browse files Browse the repository at this point in the history
  • Loading branch information
oliche committed Jul 31, 2020
1 parent 5ed59a9 commit 40dad3b
Show file tree
Hide file tree
Showing 12 changed files with 354 additions and 135 deletions.
25 changes: 0 additions & 25 deletions brainbox/brainbox_env.yml

This file was deleted.

5 changes: 2 additions & 3 deletions brainbox/core/core.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
'''
"""
Creates core data types and functions which support all of brainbox.
'''
"""
from pathlib import Path

import numpy as np
import pandas as pd

Expand Down
99 changes: 98 additions & 1 deletion brainbox/io/parquet.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,31 @@
import numpy as np
import uuid
import numpy as np

from numba import jit
import pyarrow.parquet as pq
import pyarrow as pa
import pandas as pd

from brainbox.core import Bunch


def load(file):
"""
Loads parquet file into pandas dataframe
:param file:
:return:
"""
return pq.read_table(file).to_pandas()


def save(file, table):
"""
Save pandas dataframe to parquet
:param file:
:param table:
:return:
"""
pq.write_table(pa.Table.from_pandas(table), file)


def uuid2np(eids_uuid):
Expand All @@ -8,12 +34,83 @@ def uuid2np(eids_uuid):


def str2np(eids_str):
if isinstance(eids_str, str):
eids_str = [eids_str]
return uuid2np([uuid.UUID(eid) for eid in eids_str])


def np2uuid(eids_np):
if isinstance(eids_np, pd.DataFrame):
eids_np = eids_np.to_numpy()
return [uuid.UUID(bytes=npu.tobytes()) for npu in eids_np]


def np2str(eids_np):
return [str(u) for u in np2uuid(eids_np)]


def rec2col(rec, join=None, include=None, exclude=None, uuid_fields=None):
"""
Change a record list (usually from a REST API endpoint) to a column based dictionary
(pandas dataframe).
:param rec (list): list of dictionaries with consistent keys
:param join (dictionary): dictionary of scalar keys that will be replicated over the full
array (join operation)
:param include: list of strings representing dictionary keys: if specified will only include
the keys specified here
:param exclude: list of strings representing dictionary keys: if specified will exclude the
keys specified here
:param uuid_fields: if the field is an UUID, will split it into 2 distinct int64 columns for
efficient lookups and intersections
:return: a Bunch
"""
if isinstance(rec, dict):
rec = [rec]
if len(rec) == 0:
return Bunch()
if include is None:
include = rec.keys()
if exclude is None:
exclude = []
if uuid_fields is None:
uuid_fields = []
if join is None:
join = {}

# first loop over the records and create each columns as a numpy array
nrecs = len(rec)
col = {}
keys = [k for k in rec[0] if k in include and k not in exclude]
for key in keys:
if key in uuid_fields:
npuuid = str2np(np.array([c[key] for c in rec]))
col[f"{key}_0"] = npuuid[:, 0]
col[f"{key}_1"] = npuuid[:, 1]
else:
col[key] = np.array([c[key] for c in rec])

# then perform the joins if any
for key in join:
if key in uuid_fields:
npuuid = str2np([join[key]])
col[f"{key}_0"] = np.tile(npuuid[0, 0], (nrecs,))
col[f"{key}_1"] = np.tile(npuuid[0, 1], (nrecs,))
else:
col[key] = np.tile(np.array(join[key]), (nrecs,))

return Bunch(col)


@jit(nopython=True)
def find_first_2d(mat, val):
"""
Returns first index where
The purpose of this function is performance: uses low level numba and avoids looping
through the full array
:param mat: np.array
:param val: values to search for
:return: index or empty array
"""
for i in np.arange(mat.shape[0]):
if np.all(mat[i] == val):
return i
File renamed without changes.
1 change: 1 addition & 0 deletions brainbox/tests/fixtures/parquet_records.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
[{"id": "490dbe0e-8d96-44ef-bb3f-3443285376e4", "name": "_ibl_trials.intervals.npy", "dataset_type": "trials.intervals", "data_url": "http://ibl.flatironinstitute.org/churchlandlab/Subjects/CSHL046/2020-06-20/002/alf/_ibl_trials.intervals.490dbe0e-8d96-44ef-bb3f-3443285376e4.npy", "url": "https://alyx.internationalbrainlab.org/datasets/490dbe0e-8d96-44ef-bb3f-3443285376e4", "file_size": 5824, "hash": "a9d372ce849439c12243dd4c1bbf29d5", "version": "1.4.14", "collection": "alf"}, {"id": "6b6aeba4-a6c2-4a42-aa8c-b2f12e790623", "name": "_ibl_log.info.register_v1.4.11.log", "dataset_type": "_ibl_log.info", "data_url": "http://ibl.flatironinstitute.org/churchlandlab/Subjects/CSHL046/2020-06-20/002/logs/_ibl_log.info.register_v1.4.11.6b6aeba4-a6c2-4a42-aa8c-b2f12e790623.log", "url": "https://alyx.internationalbrainlab.org/datasets/6b6aeba4-a6c2-4a42-aa8c-b2f12e790623", "file_size": 146, "hash": "fe2dd9fee9720049b02fe318165222bf", "version": "1.4.14", "collection": "logs"}, {"id": "1c633c8c-7b04-458f-b7a8-c1ac62ce9dcb", "name": "_iblrig_bodyCamera.raw.mp4", "dataset_type": "_iblrig_Camera.raw", "data_url": "http://ibl.flatironinstitute.org/churchlandlab/Subjects/CSHL046/2020-06-20/002/raw_video_data/_iblrig_bodyCamera.raw.1c633c8c-7b04-458f-b7a8-c1ac62ce9dcb.mp4", "url": "https://alyx.internationalbrainlab.org/datasets/1c633c8c-7b04-458f-b7a8-c1ac62ce9dcb", "file_size": 741255803, "hash": "c8e605c3112c639ac7fd7a1a1e1e782e", "version": "1.4.14", "collection": "raw_video_data"}, {"id": "88ca7cad-6bba-49f3-af69-1056a4099474", "name": "_spikeglx_ephysData_g1_t0.imec.lf.cbin", "dataset_type": "ephysData.raw.lf", "data_url": "http://ibl.flatironinstitute.org/churchlandlab/Subjects/CSHL046/2020-06-20/002/raw_ephys_data/probe01/_spikeglx_ephysData_g1_t0.imec.lf.88ca7cad-6bba-49f3-af69-1056a4099474.cbin", "url": "https://alyx.internationalbrainlab.org/datasets/88ca7cad-6bba-49f3-af69-1056a4099474", "file_size": 3276657550, "hash": null, "version": "1.4.14", "collection": "raw_ephys_data/probe01"}, {"id": "3e7f3a3d-5992-4d3e-86b2-69016464beae", "name": "_spikeglx_sync.polarities.probe00.npy", "dataset_type": "_spikeglx_sync.polarities", "data_url": "http://ibl.flatironinstitute.org/churchlandlab/Subjects/CSHL046/2020-06-20/002/raw_ephys_data/probe00/_spikeglx_sync.polarities.probe00.3e7f3a3d-5992-4d3e-86b2-69016464beae.npy", "url": "https://alyx.internationalbrainlab.org/datasets/3e7f3a3d-5992-4d3e-86b2-69016464beae", "file_size": 20475896, "hash": "22ee1f879df4c0705474586b64412ea3", "version": "1.4.14", "collection": "raw_ephys_data/probe00"}]
23 changes: 22 additions & 1 deletion brainbox/tests/test_io.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,35 @@
import json
from pathlib import Path
import unittest
import uuid

import numpy as np

from brainbox.core import intersect2d, ismember2d, ismember
from brainbox.io.parquet import uuid2np, np2uuid
from brainbox.io.parquet import uuid2np, np2uuid, rec2col


class TestParquet(unittest.TestCase):

def test_rec2col(self):
json_fixture = Path(__file__).parent.joinpath('fixtures', 'parquet_records.json')
with open(json_fixture, 'r') as fid:
datasets = json.loads(fid.read())
# test with includes / joins and uuid fields in both join and includes
include = ['id', 'hash', 'dataset_type', 'name', 'file_size', 'collection']
uuid_fields = ['id', 'eid']
join = {'subject': 'Bernard', 'lab': 'thelab',
'eid': '150f92bc-e755-4f54-96c1-84e1eaf832b4'}
arr = rec2col(datasets, include=include, uuid_fields=uuid_fields, join=join)
self.assertTrue(np.all(np.array([arr[k].size for k in arr]) == 5))
self.assertTrue(len(arr.keys()) == len(include) + len(uuid_fields) + len(join.keys()))
# test single dictionary
arr_single = rec2col(datasets[0], include=include, uuid_fields=uuid_fields, join=join)
self.assertTrue(np.all(arr.to_df().iloc[0] == arr_single.to_df()))
# test empty
arr_empty = rec2col([], include=include, uuid_fields=uuid_fields, join=join)
self.assertTrue(arr_empty.to_df().size == 0)

def test_uuids_intersections(self):
ntotal = 500
nsub = 17
Expand Down
2 changes: 1 addition & 1 deletion brainbox/tests/test_task.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ class TestTask(unittest.TestCase):

def setUp(self):
# Test data is a dictionary of spike times and clusters and event times and groups
pickle_file = Path(__file__).parent.joinpath('ephys_test.p')
pickle_file = Path(__file__).parent.joinpath('fixtures', 'ephys_test.p')
if not pickle_file.exists():
self.test_data = None
else:
Expand Down
18 changes: 10 additions & 8 deletions ibllib/io/extractors/ephys_fpga.py
Original file line number Diff line number Diff line change
Expand Up @@ -417,14 +417,10 @@ def extract_behaviour_sync(sync, chmap=None, display=False, tmax=np.inf):
else:
ax = display
r0 = _get_sync_fronts(sync, chmap['rotary_encoder_0'])
plots.squares(bpod['times'], bpod['polarities'] * 0.4 + 1,
ax=ax, color='k')
plots.squares(frame2ttl['times'], frame2ttl['polarities'] * 0.4 + 2,
ax=ax, color='k')
plots.squares(audio['times'], audio['polarities'] * 0.4 + 3,
ax=ax, color='k')
plots.squares(r0['times'], r0['polarities'] * 0.4 + 4,
ax=ax, color='k')
plots.squares(bpod['times'], bpod['polarities'] * 0.4 + 1, ax=ax, color='k')
plots.squares(frame2ttl['times'], frame2ttl['polarities'] * 0.4 + 2, ax=ax, color='k')
plots.squares(audio['times'], audio['polarities'] * 0.4 + 3, ax=ax, color='k')
plots.squares(r0['times'], r0['polarities'] * 0.4 + 4, ax=ax, color='k')
plots.vertical_lines(t_ready_tone_in, ymin=0, ymax=ymax,
ax=ax, label='goCue_times', color='b', linewidth=width)
plots.vertical_lines(t_trial_start, ymin=0, ymax=ymax,
Expand All @@ -439,6 +435,12 @@ def extract_behaviour_sync(sync, chmap=None, display=False, tmax=np.inf):
ax=ax, label='stim off', color='c', linewidth=width)
plots.vertical_lines(trials['stimOn_times'], ymin=0, ymax=ymax,
ax=ax, label='stimOn_times', color='tab:orange', linewidth=width)
c = _get_sync_fronts(sync, chmap['left_camera'])
plots.squares(c['times'], c['polarities'] * 0.4 + 5, ax=ax, color='k')
c = _get_sync_fronts(sync, chmap['right_camera'])
plots.squares(c['times'], c['polarities'] * 0.4 + 6, ax=ax, color='k')
c = _get_sync_fronts(sync, chmap['body_camera'])
plots.squares(c['times'], c['polarities'] * 0.4 + 7, ax=ax, color='k')
ax.legend()
ax.set_yticklabels(['', 'bpod', 'f2ttl', 'audio', 're_0', ''])
ax.set_yticks([0, 1, 2, 3, 4, 5])
Expand Down
16 changes: 0 additions & 16 deletions ibllib/io/one.py

This file was deleted.

2 changes: 1 addition & 1 deletion ibllib/qc/oneutils.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

log = logging.getLogger("ibllib")

one = ONE(printout=False)
one = ONE()


def download_bpodqc_raw_data(eid, one=None):
Expand Down
22 changes: 20 additions & 2 deletions oneibl/dataclass.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from dataclasses import dataclass, field, fields
from pathlib import Path

from brainbox.io import parquet
from ibllib.misc import flatten


Expand Down Expand Up @@ -64,9 +65,9 @@ def from_datasets(dsets, dataset_types=None, eid=None):
dataset_type=[d['dataset_type'] for d in dsets],
dataset_id=[d['id'] for d in dsets],
local_path=[None for d in dsets],
eid=[eid for d in dsets], # [ses_info['url'][-36:] for d in dsets],
eid=[eid for _ in dsets], # [ses_info['url'][-36:] for d in dsets],
url=[d['data_url'] for d in dsets],
data=[None for d in dsets],
data=[None for _ in dsets],
hash=[d['hash'] for d in dsets],
file_size=[d['file_size'] for d in dsets]
)
Expand All @@ -75,6 +76,23 @@ def from_datasets(dsets, dataset_types=None, eid=None):
def from_session_details(ses_info, **kwargs):
return _session_details_to_dataclasses(ses_info, **kwargs)

@staticmethod
def from_pandas(df, cache_dir):
fcn_local_path = lambda rec: Path(cache_dir).joinpath( # noqa
rec['lab'], 'Subjects', rec['subject'], rec['start_time'][:10],
str(rec['number']).zfill(3), rec['collection'], rec['name'])
nrecs = df.shape[0]
return SessionDataInfo(
dataset_type=df.dataset_type.to_list(),
dataset_id=list(parquet.np2str(df[['id_0', 'id_1']])),
local_path=df.apply(fcn_local_path, axis=1).to_list(),
eid=list(parquet.np2str(df[['eid_0', 'eid_1']])),
url=[None for _ in range(nrecs)],
data=[None for _ in range(nrecs)],
hash=df.hash.to_list(),
file_size=df.file_size.to_list()
)


@singledispatch
def _session_details_to_dataclasses(ses_info, **kwargs):
Expand Down
Loading

0 comments on commit 40dad3b

Please sign in to comment.