Skip to content

Commit

Permalink
load_object fix, tests and flake
Browse files Browse the repository at this point in the history
  • Loading branch information
k1o0 committed Jul 21, 2020
1 parent 664c0bd commit 71f0325
Show file tree
Hide file tree
Showing 52 changed files with 658 additions and 154 deletions.
56 changes: 35 additions & 21 deletions alf/files.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,12 @@
An ALF file has the following components (those in brackets are optional):
(_namespace_)object.attribute(_timescale)(.extra.parts).ext
Note the following:
Object attributes may not contain an underscore unless followed by 'times' or 'intervals'.
A namespace must not contain extra underscores (i.e. `name_space` and `__namespace__` are not
valid)
ALF files must always have an extension
For more information, see the following documentation:
https://docs.internationalbrainlab.org/en/latest/04_reference.html#alf
Expand All @@ -16,12 +22,13 @@
from fnmatch import fnmatch

# to include underscores: r'(?P<namespace>(?:^_)\w+(?:_))?'
# to treat _times and _intervals as timescale: (?P<attribute>[a-zA-Z]+)_?
ALF_EXP = re.compile(
r'^_?(?P<namespace>(?<=_)[a-zA-Z]+)?_?'
r'^_?(?P<namespace>(?<=_)[a-zA-Z0-9]+)?_?'
r'(?P<object>\w+)\.'
r'(?P<attribute>[a-zA-Z]+)_?'
r'(?P<attribute>[a-zA-Z0-9]+(?:_times(?=[_\b.])|_intervals(?=[_\b.]))?)_?'
r'(?P<timescale>(?:_?)\w+)*\.?'
r'(?P<extra>[.\w]+)*\.'
r'(?P<extra>[.\w-]+)*\.'
r'(?P<extension>\w+$)')


Expand All @@ -32,7 +39,9 @@ def is_valid(filename):
Examples:
>>> is_valid('trials.feedbackType.npy')
True
>>> is_valid('spike_trian.npy')
>>> is_valid('_ns_obj.attr1.2622b17c-9408-4910-99cb-abf16d9225b9.metadata.json')
True
>>> is_valid('spike_train.npy')
False
>>> is_valid('channels._phy_ids.csv')
False
Expand Down Expand Up @@ -66,8 +75,10 @@ def alf_parts(filename, as_dict=False):
('iblmic', 'audioSpectrogram', 'frequencies', None, None, 'npy')
>>> alf_parts('_spikeglx_ephysData_g0_t0.imec.wiring.json')
('spikeglx', 'ephysData_g0_t0', 'imec', None, 'wiring', 'json')
>>> alf_parts('_spikeglx_ephysData_g0_t0.imec0.lf.bin')
('spikeglx', 'ephysData_g0_t0', 'imec0', None, 'lf', 'bin')
>>> alf_parts('_ibl_trials.goCue_times_bpod.csv')
('ibl', 'trials', 'goCue', 'times_bpod', None, 'csv')
('ibl', 'trials', 'goCue_times', 'bpod', None, 'csv')
Args:
filename (str): The name of the file
Expand All @@ -89,7 +100,8 @@ def alf_parts(filename, as_dict=False):

def to_alf(object, attribute, extension, namespace=None, timescale=None, extra=None):
"""
Given a set of ALF file parts, return a valid ALF file name
Given a set of ALF file parts, return a valid ALF file name. Essential periods and
underscores are added by the function.
Args:
object (str): The ALF object name
Expand All @@ -114,10 +126,26 @@ def to_alf(object, attribute, extension, namespace=None, timescale=None, extra=N
>>> to_alf('wheel', 'timestamps', 'npy', 'ibl', 'bpod', ('raw', 'v12'))
'_ibl_wheel.timestamps_bpod.raw.v12.npy'
"""
# Validate inputs
if not extension:
raise TypeError('An extension must be provided')
elif extension.startswith('.'):
extension = extension[1:]
if re.search('_(?!times$|intervals)', attribute):
raise ValueError('Object attributes must not contain underscores')
if any(pt is not None and '.' in pt for pt in
(object, attribute, namespace, extension, timescale)):
raise ValueError('ALF parts must not contain a period (`.`)')
if '_' in (namespace or ''):
raise ValueError('Namespace must not contain extra underscores')

# Optional extras may be provided as string or tuple of strings
if not extra:
extra = ()
elif isinstance(extra, str):
extra = extra.split('.')

# Construct ALF file
parts = (('_%s_' % namespace if namespace else '') + object,
attribute + ('_%s' % timescale if timescale else ''),
*extra,
Expand Down Expand Up @@ -179,7 +207,7 @@ def filter_by(alf_path, **kwargs):
match = v is attr[k]
elif k == 'extra':
# Check all provided extra fields match those in ALF
match = all(elem in attr[k].split('.') for elem in v)
match = all(elem in attr[k].split('.') for elem in v if elem)
else:
# Check given attribute matches, allowing wildcards
match = fnmatch(attr[k], v)
Expand All @@ -191,20 +219,6 @@ def filter_by(alf_path, **kwargs):

return alf_files, [tuple(attr.values()) for attr in attributes]

# def attributes_as_keys(parts):
# """
# parts = [('ibl', 'trials', 'goCue', 'times', 'bpod', 'raw', 'npy'),
# (None, 'trials', 'pLeft', None, None, 'npy'),
# ('ibl', 'trials', 'goCue', 'times', 'pbod', 'raw', 'csv')]
# :param parts:
# :return:
# """
# attributes = [p[2] if not p[3] else '_'.join(p[2:4]) for p in parts]
# seen = set()
# dupes = [x for x in attributes if ((x in seen) is (seen.add(x) is None))]
# for dup in dupes:
# if dup:


if __name__ == "__main__":
import doctest
Expand Down
48 changes: 33 additions & 15 deletions alf/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,17 +139,18 @@ def read_ts(filename):
filename = Path(filename)

# alf format is object.attribute.extension, for example '_ibl_wheel.position.npy'
_, obj, attr, *_, ext = files.alf_parts(filename)
_, obj, attr, *_, ext = files.alf_parts(filename.parts[-1])

# looking for matching object with attribute timestamps: '_ibl_wheel.timestamps.npy'
time_file = files.filter_by(filename.parent, object=obj, attribute='timestamps', extension=ext)
(time_file,), _ = files.filter_by(filename.parent, object=obj,
attribute='timestamps', extension=ext)

if not time_file:
name = files.to_alf(obj, attr, ext)
_logger.error(name + ' not found! no time-scale for' + str(filename))
raise FileNotFoundError(name + ' not found! no time-scale for' + str(filename))

return np.load(time_file), np.load(filename)
return np.load(filename.parent / time_file), np.load(filename)


def load_file_content(fil):
Expand Down Expand Up @@ -185,7 +186,7 @@ def load_file_content(fil):
return Path(fil)


def _ls(alfpath, object, **kwargs):
def _ls(alfpath, object=None, **kwargs):
"""
Given a path, an object and a filter, returns all files and associated attributes
:param alfpath: containing folder
Expand Down Expand Up @@ -218,7 +219,7 @@ def exists(alfpath, object, attributes=None, **kwargs):
:param alfpath: str or pathlib.Path of the folder to look into
:param object: str ALF object name
:param attributes: list or list of strings for wanted attributes
:return: Bool. For multiple attributes, returns True only if all attributes are found
:return: bool. For multiple attributes, returns True only if all attributes are found
"""

# if the object is not found, return False
Expand All @@ -238,7 +239,7 @@ def exists(alfpath, object, attributes=None, **kwargs):
return set(attributes).issubset(attributes_found)


def load_object(alfpath, object=None, **kwargs):
def load_object(alfpath, object=None, short_keys=False, **kwargs):
"""
Reads all files (ie. attributes) sharing the same object.
For example, if the file provided to the function is `spikes.times`, the function will
Expand All @@ -249,14 +250,28 @@ def load_object(alfpath, object=None, **kwargs):
:param alfpath: any alf file pertaining to the object OR directory containing files
:param object: if a directory is provided and object is None, all valid ALF files returned
:param short_keys: by default, the output dictionary keys will be compounds of attributes,
timescale and any eventual parts separated by a dot. Use True to shorten the keys to the
attribute and timescale.
:return: a dictionary of all attributes pertaining to the object
example: spikes = ibllib.io.alf.load_object('/path/to/my/alffolder/', 'spikes')
FIXME Overwrites if two files with same object and attribute
Examples:
# Load `spikes` object
spikes = ibllib.io.alf.load_object('/path/to/my/alffolder/', 'spikes')
# Load `trials` object under the `ibl` namespace
trials = ibllib.io.alf.load_object(session_path, 'trials', namespace='ibl')
"""
# prepare the glob input argument if it's a list
if Path(alfpath).is_dir() and object is None:
raise ValueError('If a directory is provided, the object name should be provided too')
files_alf, parts = _ls(alfpath, object, **kwargs)
attributes = [part[2] + '_' + part[3] if part[3] else part[2] for part in parts]
# Take attribute and timescale from parts list
attributes = [p[2] if not p[3] else '_'.join(p[2:4]) for p in parts]
if not short_keys: # Include extra parts in the keys
attributes = [attr + ('.' + p[4] if p[4] else '') for attr, p in zip(attributes, parts)]
assert len(set(attributes)) == len(attributes), \
'multiple object files with the same attribute found, please restrict on namespace etc.'
out = AlfBunch({})
# load content for each file
for fil, att in zip(files_alf, attributes):
Expand All @@ -283,7 +298,7 @@ def load_object(alfpath, object=None, **kwargs):
return out


def save_object_npy(alfpath, dico, object, parts=None, namespace=None):
def save_object_npy(alfpath, dico, object, parts=None, namespace=None, timescale=None):
"""
Saves a dictionary in alf format using object as object name and dictionary keys as attribute
names. Dimensions have to be consistent.
Expand All @@ -293,8 +308,9 @@ def save_object_npy(alfpath, dico, object, parts=None, namespace=None):
:param alfpath: path of the folder to save data to
:param dico: dictionary to save to npy; keys correspond to ALF attributes
:param object: name of the object to save
:param namespace: the optional namespace of the object
:param parts: extra parts to the ALF name
:param namespace: the optional namespace of the object
:param timescale: the optional timescale of the object
:return: List of written files
example: ibllib.io.alf.save_object_npy('/path/to/my/alffolder/', spikes, 'spikes')
Expand All @@ -306,7 +322,8 @@ def save_object_npy(alfpath, dico, object, parts=None, namespace=None):
str([(k, v.shape) for k, v in dico.items()]))
out_files = []
for k, v in dico.items():
out_file = alfpath / files.to_alf(object, k, 'npy', extra=parts, namespace=namespace)
out_file = alfpath / files.to_alf(object, k, 'npy',
extra=parts, namespace=namespace, timescale=timescale)
np.save(out_file, v)
out_files.append(out_file)
return out_files
Expand All @@ -316,8 +333,8 @@ def save_metadata(file_alf, dico):
"""
Writes a meta data file matching a current alf file object.
For example given an alf file
`clusters.ccf_location.ssv` this will write a dictionary in json format in
`clusters.ccf_location.metadata.json`
`clusters.ccfLocation.ssv` this will write a dictionary in json format in
`clusters.ccfLocation.metadata.json`
Reserved keywords:
- **columns**: column names for binary tables.
- **row**: row names for binary tables.
Expand All @@ -327,6 +344,7 @@ def save_metadata(file_alf, dico):
:param dico: dictionary containing meta-data.
:return: None
"""
assert files.is_valid(file_alf.parts[-1]), 'ALF filename not valid'
file_meta_data = file_alf.parent / (file_alf.stem + '.metadata.json')
with open(file_meta_data, 'w+') as fid:
fid.write(json.dumps(dico, indent=1))
Expand Down
2 changes: 1 addition & 1 deletion brainbox/examples/DLC_pupil_event.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,7 @@ def plot_mean_std_around_event(event, diameter, times, eid):
dclass_output=True)
alf_path = Path(D.local_path[0]).parent.parent / 'alf'

trials = alf.io.load_object(alf_path, '_ibl_trials')
trials = alf.io.load_object(alf_path, 'trials')
add_stim_off_times(trials)

times = np.load(alf_path / '_ibl_leftCamera.times.npy')
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,7 @@
alf_probe_dir = Path.joinpath(alf_dir, probe)

# get trials bunch
trials = aio.load_object(alf_dir, '_ibl_trials')
trials = aio.load_object(alf_dir, 'trials')

# plot peth without raster (spike times, all cluster ids, event times, cluster id)
bb.plot.peri_event_time_histogram(spks_b.times, spks_b.clusters, trials.goCue_times, 1)
Expand Down
2 changes: 1 addition & 1 deletion brainbox/examples/brainbox_plot_peth_func.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
ses_path = datasets[0].local_path.parent # local path where the data has been downloaded

spikes = alf.io.load_object(ses_path, 'spikes')
trials = alf.io.load_object(ses_path, '_ibl_trials')
trials = alf.io.load_object(ses_path, 'trials')

# For a simple peth plot without a raster, all we need to input is spike times, clusters, event
# times, and the identity of the cluster we want to plot, e.g. in this case cluster 121
Expand Down
4 changes: 2 additions & 2 deletions brainbox/examples/count_wheel_time_impossibilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@ def check_wheel_angle(eid):
D = one.load(eid, dataset_types=Dataset_types, clobber=False, download_only=True)
session_path = Path(D[0]).parent

wheel = alf.io.load_object(session_path, '_ibl_wheel')
trials = alf.io.load_object(session_path, '_ibl_trials')
wheel = alf.io.load_object(session_path, 'wheel')
trials = alf.io.load_object(session_path, 'trials')
reward_success = trials['feedback_times'][trials['feedbackType'] == 1]
reward_failure = trials['feedback_times'][trials['feedbackType'] == -1]

Expand Down
2 changes: 1 addition & 1 deletion brainbox/examples/dim_reduction.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ def color_3D_projection(
spikes = alf.io.load_object(alf_path, 'spikes')
clusters = alf.io.load_object(alf_path, 'clusters')
channels = alf.io.load_object(alf_path, 'channels')
trials = alf.io.load_object(alf_path, '_ibl_trials')
trials = alf.io.load_object(alf_path, 'trials')

# Print number of clusters for each brain region
locDict_bothProbes = clusters['brainAcronyms']['brainAcronyms'].to_dict()
Expand Down
2 changes: 1 addition & 1 deletion brainbox/examples/plot_all_peths.py
Original file line number Diff line number Diff line change
Expand Up @@ -259,7 +259,7 @@ def plot_multi_peths(
# load objects
spikes = ioalf.load_object(alf_path, 'spikes')
clusters = ioalf.load_object(alf_path, 'clusters')
trials = ioalf.load_object(alf_path, '_ibl_trials')
trials = ioalf.load_object(alf_path, 'trials')

# containers to store results
align_events = ['stimOn', 'stimOff', 'feedback']
Expand Down
2 changes: 1 addition & 1 deletion brainbox/examples/raster_cluster_ordered.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
spikes = ioalf.load_object(session_path, 'spikes')
clusters = ioalf.load_object(session_path, 'clusters')
channels = ioalf.load_object(session_path, 'channels')
trials = ioalf.load_object(session_path, '_ibl_trials')
trials = ioalf.load_object(session_path, 'trials')

# compute raster map as a function of cluster number
R, times, clusters = bincount2D(spikes['times'], spikes['clusters'], T_BIN)
Expand Down
2 changes: 1 addition & 1 deletion brainbox/examples/raster_clusters.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
spikes = ioalf.load_object(session_path, 'spikes')
clusters = ioalf.load_object(session_path, 'clusters')
channels = ioalf.load_object(session_path, 'channels')
trials = ioalf.load_object(session_path, '_ibl_trials')
trials = ioalf.load_object(session_path, 'trials')

# compute raster map as a function of cluster number
R, times, clusters = bincount2D(spikes['times'], spikes['clusters'], T_BIN)
Expand Down
2 changes: 1 addition & 1 deletion brainbox/examples/raster_per_trial.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
spikes = alf.io.load_object(alf_path, 'spikes')
clusters = alf.io.load_object(alf_path, 'clusters')
channels = alf.io.load_object(alf_path, 'channels')
trials = alf.io.load_object(alf_path, '_ibl_trials')
trials = alf.io.load_object(alf_path, 'trials')

T_BIN = 0.01 # time bin in sec

Expand Down
2 changes: 1 addition & 1 deletion brainbox/examples/simplest_peth_plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
ses_path = datasets[0].local_path.parent

spikes = alf.io.load_object(ses_path, 'spikes')
trials = alf.io.load_object(ses_path, '_ibl_trials')
trials = alf.io.load_object(ses_path, 'trials')

peth, bs = calculate_peths(spikes.times, spikes.clusters, [225, 52], trials.goCue_times)

Expand Down
2 changes: 1 addition & 1 deletion brainbox/examples/simplest_peth_plot_aligned_to_StimON.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
ses_path = datasets[0].local_path.parent #local path where the data has been downloaded

spikes = alf.io.load_object(ses_path, 'spikes')
trials = alf.io.load_object(ses_path, '_ibl_trials')
trials = alf.io.load_object(ses_path, 'trials')

# check which neurons are responsive
#are_neurons_responsive(spike_times,spike_clusters,stimulus_intervals=None,spontaneous_period=None,p_value_threshold=.05):
Expand Down
2 changes: 1 addition & 1 deletion brainbox/population/cca.py
Original file line number Diff line number Diff line change
Expand Up @@ -393,7 +393,7 @@ def get_event_bin_indexes(event_times, bin_times, window):
spikes = ioalf.load_object(session_path, 'spikes')
clusters = ioalf.load_object(session_path, 'clusters')
# channels = ioalf.load_object(session_path, 'channels')
trials = ioalf.load_object(session_path, '_ibl_trials')
trials = ioalf.load_object(session_path, 'trials')

# bin spikes and get trial IDs associated with them
binned_spikes, binned_trialIDs, _ = bin_spikes_trials(spikes, trials, bin_size=0.01)
Expand Down
4 changes: 2 additions & 2 deletions brainbox/quality/lfp_qc.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def _plot_spectra(outpath, typ, savefig=True):
TODO document this function
'''

spec = alf.io.load_object(outpath, '_spikeglx_ephysQcFreq' + typ.upper())
spec = alf.io.load_object(outpath, 'ephysQcFreq' + typ.upper(), namespace='spikeglx')

# hack to ensure a single key name
if 'power.probe_00' in spec.keys():
Expand Down Expand Up @@ -62,7 +62,7 @@ def _plot_rmsmap(outpath, typ, savefig=True):
TODO document this function
'''

rmsmap = alf.io.load_object(outpath, '_spikeglx_ephysQcTime' + typ.upper())
rmsmap = alf.io.load_object(outpath, 'ephysQcTime' + typ.upper(), namespace='spikeglx')

# hack to ensure a single key name
if 'times.probe_00' in rmsmap.keys():
Expand Down
3 changes: 2 additions & 1 deletion examples/WIP/bpod-qc-sound.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,8 @@
n_trial = len(c)

# -- Get spectrogram
TF = alf.io.load_object(session_path.joinpath('raw_behavior_data'), '_iblmic_audioSpectrogram')
TF = alf.io.load_object(session_path.joinpath('raw_behavior_data'), 'audioSpectrogram',
namespace='iblmic')

# -- Detect goCue
# Assume quietness before goCue isplayed > use diff to detect onset
Expand Down
Loading

0 comments on commit 71f0325

Please sign in to comment.