load_object fix, tests and flake

Yiman00 · Jul 21, 2020 · 71f0325 · 71f0325
1 parent 664c0bd
commit 71f0325
Show file tree

Hide file tree

Showing 52 changed files with 658 additions and 154 deletions.
diff --git a/alf/files.py b/alf/files.py
@@ -4,6 +4,12 @@
 An ALF file has the following components (those in brackets are optional):
     (_namespace_)object.attribute(_timescale)(.extra.parts).ext
 
+Note the following:
+    Object attributes may not contain an underscore unless followed by 'times' or 'intervals'.
+    A namespace must not contain extra underscores (i.e. `name_space` and `__namespace__` are not
+    valid)
+    ALF files must always have an extension
+
 For more information, see the following documentation:
     https://docs.internationalbrainlab.org/en/latest/04_reference.html#alf
 
@@ -16,12 +22,13 @@
 from fnmatch import fnmatch
 
 # to include underscores: r'(?P<namespace>(?:^_)\w+(?:_))?'
+# to treat _times and _intervals as timescale: (?P<attribute>[a-zA-Z]+)_?
 ALF_EXP = re.compile(
-    r'^_?(?P<namespace>(?<=_)[a-zA-Z]+)?_?'
+    r'^_?(?P<namespace>(?<=_)[a-zA-Z0-9]+)?_?'
     r'(?P<object>\w+)\.'
-    r'(?P<attribute>[a-zA-Z]+)_?'
+    r'(?P<attribute>[a-zA-Z0-9]+(?:_times(?=[_\b.])|_intervals(?=[_\b.]))?)_?'
     r'(?P<timescale>(?:_?)\w+)*\.?'
-    r'(?P<extra>[.\w]+)*\.'
+    r'(?P<extra>[.\w-]+)*\.'
     r'(?P<extension>\w+$)')
 
 
@@ -32,7 +39,9 @@ def is_valid(filename):
     Examples:
         >>> is_valid('trials.feedbackType.npy')
         True
-        >>> is_valid('spike_trian.npy')
+        >>> is_valid('_ns_obj.attr1.2622b17c-9408-4910-99cb-abf16d9225b9.metadata.json')
+        True
+        >>> is_valid('spike_train.npy')
         False
         >>> is_valid('channels._phy_ids.csv')
         False
@@ -66,8 +75,10 @@ def alf_parts(filename, as_dict=False):
         ('iblmic', 'audioSpectrogram', 'frequencies', None, None, 'npy')
         >>> alf_parts('_spikeglx_ephysData_g0_t0.imec.wiring.json')
         ('spikeglx', 'ephysData_g0_t0', 'imec', None, 'wiring', 'json')
+        >>> alf_parts('_spikeglx_ephysData_g0_t0.imec0.lf.bin')
+        ('spikeglx', 'ephysData_g0_t0', 'imec0', None, 'lf', 'bin')
         >>> alf_parts('_ibl_trials.goCue_times_bpod.csv')
-        ('ibl', 'trials', 'goCue', 'times_bpod', None, 'csv')
+        ('ibl', 'trials', 'goCue_times', 'bpod', None, 'csv')
 
     Args:
         filename (str): The name of the file
@@ -89,7 +100,8 @@ def alf_parts(filename, as_dict=False):
 
 def to_alf(object, attribute, extension, namespace=None, timescale=None, extra=None):
     """
-    Given a set of ALF file parts, return a valid ALF file name
+    Given a set of ALF file parts, return a valid ALF file name.  Essential periods and
+    underscores are added by the function.
 
     Args:
         object (str): The ALF object name
@@ -114,10 +126,26 @@ def to_alf(object, attribute, extension, namespace=None, timescale=None, extra=N
     >>> to_alf('wheel', 'timestamps', 'npy', 'ibl', 'bpod', ('raw', 'v12'))
     '_ibl_wheel.timestamps_bpod.raw.v12.npy'
     """
+    # Validate inputs
+    if not extension:
+        raise TypeError('An extension must be provided')
+    elif extension.startswith('.'):
+        extension = extension[1:]
+    if re.search('_(?!times$|intervals)', attribute):
+        raise ValueError('Object attributes must not contain underscores')
+    if any(pt is not None and '.' in pt for pt in
+           (object, attribute, namespace, extension, timescale)):
+        raise ValueError('ALF parts must not contain a period (`.`)')
+    if '_' in (namespace or ''):
+        raise ValueError('Namespace must not contain extra underscores')
+
+    # Optional extras may be provided as string or tuple of strings
     if not extra:
         extra = ()
     elif isinstance(extra, str):
         extra = extra.split('.')
+
+    # Construct ALF file
     parts = (('_%s_' % namespace if namespace else '') + object,
              attribute + ('_%s' % timescale if timescale else ''),
              *extra,
@@ -179,7 +207,7 @@ def filter_by(alf_path, **kwargs):
                     match = v is attr[k]
                 elif k == 'extra':
                     # Check all provided extra fields match those in ALF
-                    match = all(elem in attr[k].split('.') for elem in v)
+                    match = all(elem in attr[k].split('.') for elem in v if elem)
                 else:
                     # Check given attribute matches, allowing wildcards
                     match = fnmatch(attr[k], v)
@@ -191,20 +219,6 @@ def filter_by(alf_path, **kwargs):
 
     return alf_files, [tuple(attr.values()) for attr in attributes]
 
-# def attributes_as_keys(parts):
-#     """
-#     parts = [('ibl', 'trials', 'goCue', 'times', 'bpod', 'raw', 'npy'),
-#          (None, 'trials', 'pLeft', None, None, 'npy'),
-#          ('ibl', 'trials', 'goCue', 'times', 'pbod', 'raw', 'csv')]
-#     :param parts:
-#     :return:
-#     """
-#     attributes = [p[2] if not p[3] else '_'.join(p[2:4]) for p in parts]
-#     seen = set()
-#     dupes = [x for x in attributes if ((x in seen) is (seen.add(x) is None))]
-#     for dup in dupes:
-#         if dup:
-
 
 if __name__ == "__main__":
     import doctest

diff --git a/alf/io.py b/alf/io.py
@@ -139,17 +139,18 @@ def read_ts(filename):
         filename = Path(filename)
 
     # alf format is object.attribute.extension, for example '_ibl_wheel.position.npy'
-    _, obj, attr, *_, ext = files.alf_parts(filename)
+    _, obj, attr, *_, ext = files.alf_parts(filename.parts[-1])
 
     # looking for matching object with attribute timestamps: '_ibl_wheel.timestamps.npy'
-    time_file = files.filter_by(filename.parent, object=obj, attribute='timestamps', extension=ext)
+    (time_file,), _ = files.filter_by(filename.parent, object=obj,
+                                      attribute='timestamps', extension=ext)
 
     if not time_file:
         name = files.to_alf(obj, attr, ext)
         _logger.error(name + ' not found! no time-scale for' + str(filename))
         raise FileNotFoundError(name + ' not found! no time-scale for' + str(filename))
 
-    return np.load(time_file), np.load(filename)
+    return np.load(filename.parent / time_file), np.load(filename)
 
 
 def load_file_content(fil):
@@ -185,7 +186,7 @@ def load_file_content(fil):
     return Path(fil)
 
 
-def _ls(alfpath, object, **kwargs):
+def _ls(alfpath, object=None, **kwargs):
     """
     Given a path, an object and a filter, returns all files and associated attributes
     :param alfpath: containing folder
@@ -218,7 +219,7 @@ def exists(alfpath, object, attributes=None, **kwargs):
     :param alfpath: str or pathlib.Path of the folder to look into
     :param object: str ALF object name
     :param attributes: list or list of strings for wanted attributes
-    :return: Bool. For multiple attributes, returns True only if all attributes are found
+    :return: bool. For multiple attributes, returns True only if all attributes are found
     """
 
     # if the object is not found, return False
@@ -238,7 +239,7 @@ def exists(alfpath, object, attributes=None, **kwargs):
     return set(attributes).issubset(attributes_found)
 
 
-def load_object(alfpath, object=None, **kwargs):
+def load_object(alfpath, object=None, short_keys=False, **kwargs):
     """
     Reads all files (ie. attributes) sharing the same object.
     For example, if the file provided to the function is `spikes.times`, the function will
@@ -249,14 +250,28 @@ def load_object(alfpath, object=None, **kwargs):
 
     :param alfpath: any alf file pertaining to the object OR directory containing files
     :param object: if a directory is provided and object is None, all valid ALF files returned
+    :param short_keys: by default, the output dictionary keys will be compounds of attributes,
+     timescale and any eventual parts separated by a dot. Use True to shorten the keys to the
+     attribute and timescale.
     :return: a dictionary of all attributes pertaining to the object
 
-    example: spikes = ibllib.io.alf.load_object('/path/to/my/alffolder/', 'spikes')
-    FIXME Overwrites if two files with same object and attribute
+    Examples:
+        # Load `spikes` object
+        spikes = ibllib.io.alf.load_object('/path/to/my/alffolder/', 'spikes')
+
+        # Load `trials` object under the `ibl` namespace
+        trials = ibllib.io.alf.load_object(session_path, 'trials', namespace='ibl')
+
     """
-    # prepare the glob input argument if it's a list
+    if Path(alfpath).is_dir() and object is None:
+        raise ValueError('If a directory is provided, the object name should be provided too')
     files_alf, parts = _ls(alfpath, object, **kwargs)
-    attributes = [part[2] + '_' + part[3] if part[3] else part[2] for part in parts]
+    # Take attribute and timescale from parts list
+    attributes = [p[2] if not p[3] else '_'.join(p[2:4]) for p in parts]
+    if not short_keys:  # Include extra parts in the keys
+        attributes = [attr + ('.' + p[4] if p[4] else '') for attr, p in zip(attributes, parts)]
+    assert len(set(attributes)) == len(attributes), \
+        'multiple object files with the same attribute found, please restrict on namespace etc.'
     out = AlfBunch({})
     # load content for each file
     for fil, att in zip(files_alf, attributes):
@@ -283,7 +298,7 @@ def load_object(alfpath, object=None, **kwargs):
     return out
 
 
-def save_object_npy(alfpath, dico, object, parts=None, namespace=None):
+def save_object_npy(alfpath, dico, object, parts=None, namespace=None, timescale=None):
     """
     Saves a dictionary in alf format using object as object name and dictionary keys as attribute
     names. Dimensions have to be consistent.
@@ -293,8 +308,9 @@ def save_object_npy(alfpath, dico, object, parts=None, namespace=None):
     :param alfpath: path of the folder to save data to
     :param dico: dictionary to save to npy; keys correspond to ALF attributes
     :param object: name of the object to save
-    :param namespace: the optional namespace of the object
     :param parts: extra parts to the ALF name
+    :param namespace: the optional namespace of the object
+    :param timescale: the optional timescale of the object
     :return: List of written files
 
     example: ibllib.io.alf.save_object_npy('/path/to/my/alffolder/', spikes, 'spikes')
@@ -306,7 +322,8 @@ def save_object_npy(alfpath, dico, object, parts=None, namespace=None):
                          str([(k, v.shape) for k, v in dico.items()]))
     out_files = []
     for k, v in dico.items():
-        out_file = alfpath / files.to_alf(object, k, 'npy', extra=parts, namespace=namespace)
+        out_file = alfpath / files.to_alf(object, k, 'npy',
+                                          extra=parts, namespace=namespace, timescale=timescale)
         np.save(out_file, v)
         out_files.append(out_file)
     return out_files
@@ -316,8 +333,8 @@ def save_metadata(file_alf, dico):
     """
     Writes a meta data file matching a current alf file object.
     For example given an alf file
-    `clusters.ccf_location.ssv` this will write a dictionary in json format in
-    `clusters.ccf_location.metadata.json`
+    `clusters.ccfLocation.ssv` this will write a dictionary in json format in
+    `clusters.ccfLocation.metadata.json`
     Reserved keywords:
      - **columns**: column names for binary tables.
      - **row**: row names for binary tables.
@@ -327,6 +344,7 @@ def save_metadata(file_alf, dico):
     :param dico: dictionary containing meta-data.
     :return: None
     """
+    assert files.is_valid(file_alf.parts[-1]), 'ALF filename not valid'
     file_meta_data = file_alf.parent / (file_alf.stem + '.metadata.json')
     with open(file_meta_data, 'w+') as fid:
         fid.write(json.dumps(dico, indent=1))

diff --git a/brainbox/examples/DLC_pupil_event.py b/brainbox/examples/DLC_pupil_event.py
@@ -187,7 +187,7 @@ def plot_mean_std_around_event(event, diameter, times, eid):
         dclass_output=True)
     alf_path = Path(D.local_path[0]).parent.parent / 'alf'
 
-    trials = alf.io.load_object(alf_path, '_ibl_trials')
+    trials = alf.io.load_object(alf_path, 'trials')
     add_stim_off_times(trials)
 
     times = np.load(alf_path / '_ibl_leftCamera.times.npy')

diff --git a/brainbox/examples/Loading_from_ONE_and_running_functions.py b/brainbox/examples/Loading_from_ONE_and_running_functions.py
@@ -171,7 +171,7 @@
 alf_probe_dir = Path.joinpath(alf_dir, probe)
 
 # get trials bunch
-trials = aio.load_object(alf_dir, '_ibl_trials')
+trials = aio.load_object(alf_dir, 'trials')
 
 # plot peth without raster (spike times, all cluster ids, event times, cluster id)
 bb.plot.peri_event_time_histogram(spks_b.times, spks_b.clusters, trials.goCue_times, 1)

diff --git a/brainbox/examples/brainbox_plot_peth_func.py b/brainbox/examples/brainbox_plot_peth_func.py
@@ -11,7 +11,7 @@
 ses_path = datasets[0].local_path.parent  # local path where the data has been downloaded
 
 spikes = alf.io.load_object(ses_path, 'spikes')
-trials = alf.io.load_object(ses_path, '_ibl_trials')
+trials = alf.io.load_object(ses_path, 'trials')
 
 # For a simple peth plot without a raster, all we need to input is spike times, clusters, event
 # times, and the identity of the cluster we want to plot, e.g. in this case cluster 121

diff --git a/brainbox/examples/count_wheel_time_impossibilities.py b/brainbox/examples/count_wheel_time_impossibilities.py
@@ -25,8 +25,8 @@ def check_wheel_angle(eid):
     D = one.load(eid, dataset_types=Dataset_types, clobber=False, download_only=True)
     session_path = Path(D[0]).parent
 
-    wheel = alf.io.load_object(session_path, '_ibl_wheel')
-    trials = alf.io.load_object(session_path, '_ibl_trials')
+    wheel = alf.io.load_object(session_path, 'wheel')
+    trials = alf.io.load_object(session_path, 'trials')
     reward_success = trials['feedback_times'][trials['feedbackType'] == 1]
     reward_failure = trials['feedback_times'][trials['feedbackType'] == -1]
 

diff --git a/brainbox/examples/dim_reduction.py b/brainbox/examples/dim_reduction.py
@@ -146,7 +146,7 @@ def color_3D_projection(
     spikes = alf.io.load_object(alf_path, 'spikes')
     clusters = alf.io.load_object(alf_path, 'clusters')
     channels = alf.io.load_object(alf_path, 'channels')
-    trials = alf.io.load_object(alf_path, '_ibl_trials')
+    trials = alf.io.load_object(alf_path, 'trials')
 
     # Print number of clusters for each brain region
     locDict_bothProbes = clusters['brainAcronyms']['brainAcronyms'].to_dict()

diff --git a/brainbox/examples/plot_all_peths.py b/brainbox/examples/plot_all_peths.py
@@ -259,7 +259,7 @@ def plot_multi_peths(
     # load objects
     spikes = ioalf.load_object(alf_path, 'spikes')
     clusters = ioalf.load_object(alf_path, 'clusters')
-    trials = ioalf.load_object(alf_path, '_ibl_trials')
+    trials = ioalf.load_object(alf_path, 'trials')
 
     # containers to store results
     align_events = ['stimOn', 'stimOff', 'feedback']

diff --git a/brainbox/examples/raster_cluster_ordered.py b/brainbox/examples/raster_cluster_ordered.py
@@ -19,7 +19,7 @@
 spikes = ioalf.load_object(session_path, 'spikes')
 clusters = ioalf.load_object(session_path, 'clusters')
 channels = ioalf.load_object(session_path, 'channels')
-trials = ioalf.load_object(session_path, '_ibl_trials')
+trials = ioalf.load_object(session_path, 'trials')
 
 # compute raster map as a function of cluster number
 R, times, clusters = bincount2D(spikes['times'], spikes['clusters'], T_BIN)

diff --git a/brainbox/examples/raster_clusters.py b/brainbox/examples/raster_clusters.py
@@ -20,7 +20,7 @@
 spikes = ioalf.load_object(session_path, 'spikes')
 clusters = ioalf.load_object(session_path, 'clusters')
 channels = ioalf.load_object(session_path, 'channels')
-trials = ioalf.load_object(session_path, '_ibl_trials')
+trials = ioalf.load_object(session_path, 'trials')
 
 # compute raster map as a function of cluster number
 R, times, clusters = bincount2D(spikes['times'], spikes['clusters'], T_BIN)

diff --git a/brainbox/examples/raster_per_trial.py b/brainbox/examples/raster_per_trial.py
@@ -9,7 +9,7 @@
 spikes = alf.io.load_object(alf_path, 'spikes')
 clusters = alf.io.load_object(alf_path, 'clusters')
 channels = alf.io.load_object(alf_path, 'channels')
-trials = alf.io.load_object(alf_path, '_ibl_trials')
+trials = alf.io.load_object(alf_path, 'trials')
 
 T_BIN = 0.01  # time bin in sec
 

diff --git a/brainbox/examples/simplest_peth_plot.py b/brainbox/examples/simplest_peth_plot.py
@@ -12,7 +12,7 @@
 ses_path = datasets[0].local_path.parent
 
 spikes = alf.io.load_object(ses_path, 'spikes')
-trials = alf.io.load_object(ses_path, '_ibl_trials')
+trials = alf.io.load_object(ses_path, 'trials')
 
 peth, bs = calculate_peths(spikes.times, spikes.clusters, [225, 52], trials.goCue_times)
 

diff --git a/brainbox/examples/simplest_peth_plot_aligned_to_StimON.py b/brainbox/examples/simplest_peth_plot_aligned_to_StimON.py
@@ -16,7 +16,7 @@
 ses_path = datasets[0].local_path.parent #local path where the data has been downloaded
 
 spikes = alf.io.load_object(ses_path, 'spikes')
-trials = alf.io.load_object(ses_path, '_ibl_trials')
+trials = alf.io.load_object(ses_path, 'trials')
 
 # check which neurons are responsive
 #are_neurons_responsive(spike_times,spike_clusters,stimulus_intervals=None,spontaneous_period=None,p_value_threshold=.05):

diff --git a/brainbox/population/cca.py b/brainbox/population/cca.py
@@ -393,7 +393,7 @@ def get_event_bin_indexes(event_times, bin_times, window):
     spikes = ioalf.load_object(session_path, 'spikes')
     clusters = ioalf.load_object(session_path, 'clusters')
     # channels = ioalf.load_object(session_path, 'channels')
-    trials = ioalf.load_object(session_path, '_ibl_trials')
+    trials = ioalf.load_object(session_path, 'trials')
 
     # bin spikes and get trial IDs associated with them
     binned_spikes, binned_trialIDs, _ = bin_spikes_trials(spikes, trials, bin_size=0.01)

diff --git a/brainbox/quality/lfp_qc.py b/brainbox/quality/lfp_qc.py
@@ -27,7 +27,7 @@ def _plot_spectra(outpath, typ, savefig=True):
     TODO document this function
     '''
 
-    spec = alf.io.load_object(outpath, '_spikeglx_ephysQcFreq' + typ.upper())
+    spec = alf.io.load_object(outpath, 'ephysQcFreq' + typ.upper(), namespace='spikeglx')
 
     # hack to ensure a single key name
     if 'power.probe_00' in spec.keys():
@@ -62,7 +62,7 @@ def _plot_rmsmap(outpath, typ, savefig=True):
     TODO document this function
     '''
 
-    rmsmap = alf.io.load_object(outpath, '_spikeglx_ephysQcTime' + typ.upper())
+    rmsmap = alf.io.load_object(outpath, 'ephysQcTime' + typ.upper(), namespace='spikeglx')
 
     # hack to ensure a single key name
     if 'times.probe_00' in rmsmap.keys():

diff --git a/examples/WIP/bpod-qc-sound.py b/examples/WIP/bpod-qc-sound.py
@@ -45,7 +45,8 @@
     n_trial = len(c)
 
     # -- Get spectrogram
-    TF = alf.io.load_object(session_path.joinpath('raw_behavior_data'), '_iblmic_audioSpectrogram')
+    TF = alf.io.load_object(session_path.joinpath('raw_behavior_data'), 'audioSpectrogram',
+                            namespace='iblmic')
 
     # -- Detect goCue
     # Assume quietness before goCue isplayed > use diff to detect onset