Source code for alf.files

"""
Module for identifying and parsing ALF file names.

An ALF file has the following components (those in brackets are optional):
    (_namespace_)object.attribute(_timescale)(.extra.parts).ext

Note the following:
    Object attributes may not contain an underscore unless followed by 'times' or 'intervals'.
    A namespace must not contain extra underscores (i.e. `name_space` and `__namespace__` are not
    valid)
    ALF files must always have an extension

For more information, see the following documentation:
    https://docs.internationalbrainlab.org/en/latest/04_reference.html#alf

Created on Tue Sep 11 18:06:21 2018

@author: Miles
"""
import re
import os
from fnmatch import fnmatch

# to include underscores: r'(?P<namespace>(?:^_)\w+(?:_))?'
# to treat _times and _intervals as timescale: (?P<attribute>[a-zA-Z]+)_?
ALF_EXP = re.compile(
    r'^_?(?P<namespace>(?<=_)[a-zA-Z0-9]+)?_?'
    r'(?P<object>\w+)\.'
    r'(?P<attribute>[a-zA-Z0-9]+(?:_times(?=[_\b.])|_intervals(?=[_\b.]))?)_?'
    r'(?P<timescale>(?:_?)\w+)*\.?'
    r'(?P<extra>[.\w-]+)*\.'
    r'(?P<extension>\w+$)')
STR_SPEC = 'collection/(_namespace_)object.attribute(_timescale)(.extra.parts).ext'


[docs]def is_valid(filename): """ Returns a True for a given file name if it is an ALF file, otherwise returns False Examples: >>> is_valid('trials.feedbackType.npy') True >>> is_valid('_ns_obj.attr1.2622b17c-9408-4910-99cb-abf16d9225b9.metadata.json') True >>> is_valid('spike_train.npy') False >>> is_valid('channels._phy_ids.csv') False Args: filename (str): The name of the file Returns: bool """ return ALF_EXP.match(filename) is not None
[docs]def alf_parts(filename, as_dict=False): """ Return the parsed elements of a given ALF filename. Examples: >>> alf_parts('_namespace_obj.times_timescale.extra.foo.ext') ('namespace', 'obj', 'times', 'timescale', 'extra.foo', 'ext') >>> alf_parts('spikes.clusters.npy', as_dict=True) {'namespace': None, 'object': 'spikes', 'attribute': 'clusters', 'timescale': None, 'extra': None, 'extension': 'npy'} >>> alf_parts('spikes.times_ephysClock.npy') (None, 'spikes', 'times', 'ephysClock', None, 'npy') >>> alf_parts('_iblmic_audioSpectrogram.frequencies.npy') ('iblmic', 'audioSpectrogram', 'frequencies', None, None, 'npy') >>> alf_parts('_spikeglx_ephysData_g0_t0.imec.wiring.json') ('spikeglx', 'ephysData_g0_t0', 'imec', None, 'wiring', 'json') >>> alf_parts('_spikeglx_ephysData_g0_t0.imec0.lf.bin') ('spikeglx', 'ephysData_g0_t0', 'imec0', None, 'lf', 'bin') >>> alf_parts('_ibl_trials.goCue_times_bpod.csv') ('ibl', 'trials', 'goCue_times', 'bpod', None, 'csv') Args: filename (str): The name of the file as_dict (bool): when True a dict of matches is returned Returns: namespace (str): The _namespace_ or None if not present object (str): ALF object attribute (str): The ALF attribute timescale (str): The ALF _timescale or None if not present extra (str): Any extra parts to the filename, or None if not present extension (str): The file extension """ m = ALF_EXP.match(filename) if not m: raise ValueError('Invalid ALF filename') return m.groupdict() if as_dict else m.groups()
[docs]def to_alf(object, attribute, extension, namespace=None, timescale=None, extra=None): """ Given a set of ALF file parts, return a valid ALF file name. Essential periods and underscores are added by the function. Args: object (str): The ALF object name attribute (str): The ALF object attribute name extension (str): The file extension namespace (str): An optional namespace timescale (str): An optional timescale extra (str, tuple): One or more optional extra ALF attributes Returns: filename (str): a file name string built from the ALF parts Examples: >>> to_alf('spikes', 'times', 'ssv') 'spikes.times.ssv' >>> to_alf('spikes', 'times', 'ssv', namespace='ibl') '_ibl_spikes.times.ssv' >>> to_alf('spikes', 'times', 'ssv', namespace='ibl', timescale='ephysClock') '_ibl_spikes.times_ephysClock.ssv' >>> to_alf('spikes', 'times', 'npy', namespace='ibl', timescale='ephysClock', extra='raw') '_ibl_spikes.times_ephysClock.raw.npy' >>> to_alf('wheel', 'timestamps', 'npy', 'ibl', 'bpod', ('raw', 'v12')) '_ibl_wheel.timestamps_bpod.raw.v12.npy' """ # Validate inputs if not extension: raise TypeError('An extension must be provided') elif extension.startswith('.'): extension = extension[1:] if re.search('_(?!times$|intervals)', attribute): raise ValueError('Object attributes must not contain underscores') if any(pt is not None and '.' in pt for pt in (object, attribute, namespace, extension, timescale)): raise ValueError('ALF parts must not contain a period (`.`)') if '_' in (namespace or ''): raise ValueError('Namespace must not contain extra underscores') # Optional extras may be provided as string or tuple of strings if not extra: extra = () elif isinstance(extra, str): extra = extra.split('.') # Construct ALF file parts = (('_%s_' % namespace if namespace else '') + object, attribute + ('_%s' % timescale if timescale else ''), *extra, extension) return '.'.join(parts)
[docs]def filter_by(alf_path, **kwargs): """ Given a path and optional filters, returns all ALF files and their associated parts. The filters constitute a logical AND. Args: alf_path (str): A Path to a directory containing ALF files object (str): filter by a given object (e.g. 'spikes') attribute (str): filter by a given attribute (e.g. 'intervals') extension (str): filter by extension (e.g. 'npy') namespace (str): filter by a given namespace (e.g. 'ibl') or None for files without one timescale (str): filter by a given timescale (e.g. 'bpod') or None for files without one extra (str, list): filter by extra parameters (e.g. 'raw') or None for files without extra parts. NB: Wild cards not permitted here. Returns: alf_files (list): list of ALF files and tuples of their parts attributes (list of dicts): list of parsed file parts Examples: # Filter files with universal timescale filter_by(alf_path, timescale=None) # Filter files by a given ALF object filter_by(alf_path, object='wheel') # Filter using wildcard, e.g. 'wheel' and 'wheelMoves' ALF objects filter_by(alf_path, object='wh*') # Filter all intervals that are in bpod time filter_by(alf_path, attribute='intervals', timescale='bpod') """ alf_files = [f for f in os.listdir(alf_path) if is_valid(f)] attributes = [alf_parts(f, as_dict=True) for f in alf_files] if kwargs: # Validate keyword arguments against regex group names invalid = kwargs.keys() - ALF_EXP.groupindex.keys() if invalid: raise TypeError("%s() got an unexpected keyword argument '%s'" % (__name__, set(invalid).pop())) # Ensure 'extra' input is a list; if str split on dot if 'extra' in kwargs and isinstance(kwargs['extra'], str): kwargs['extra'] = kwargs['extra'].split('.') # Iterate over ALF files for file, attr in zip(alf_files.copy(), attributes.copy()): for k, v in kwargs.items(): # Iterate over attributes if v is None or attr[k] is None: # If either is None, both should be None to match match = v is attr[k] elif k == 'extra': # Check all provided extra fields match those in ALF match = all(elem in attr[k].split('.') for elem in v if elem) else: # Check given attribute matches, allowing wildcards match = fnmatch(attr[k], v) if not match: # Remove file from list and move on to next file alf_files.remove(file) attributes.remove(attr) break return alf_files, [tuple(attr.values()) for attr in attributes]
if __name__ == "__main__": import doctest doctest.testmod(optionflags=doctest.NORMALIZE_WHITESPACE | doctest.ELLIPSIS)