Source code for one.alf.files

"""
Module for identifying and parsing ALF file names.

An ALF file has the following components (those in brackets are optional):
    (_namespace_)object.attribute(_timescale)(.extra.parts).ext

Note the following:
    Object attributes may not contain an underscore unless followed by 'times' or 'intervals'.
    A namespace must not contain extra underscores (i.e. `name_space` and `__namespace__` are not
    valid)
    ALF files must always have an extension

For more information, see the following documentation:
    https://int-brain-lab.github.io/iblenv/one_docs/one_reference.html#alf  # FIXME Change link

Created on Tue Sep 11 18:06:21 2018

@author: Miles
"""
from collections import OrderedDict
from datetime import datetime
from typing import Union, Optional
from pathlib import Path
import logging

from . import spec
from .spec import SESSION_SPEC, COLLECTION_SPEC, FILE_SPEC, REL_PATH_SPEC

_logger = logging.getLogger(__name__)


[docs]def rel_path_parts(rel_path, as_dict=False, assert_valid=True): """Parse a relative path into the relevant parts. A relative path follows the pattern (collection/)(#revision#/)_namespace_object.attribute_timescale.extra.extension Parameters ---------- rel_path : str A relative path string as_dict : bool If true, an OrderedDict of parts are returned with the keys ('lab', 'subject', 'date', 'number'), otherwise a tuple of values are returned assert_valid : bool If true a ValueError is raised when the session cannot be parsed, otherwise an empty dict of tuple of Nones is returned Returns ------- An OrderedDict if as_dict is true, or a tuple of parsed values """ compiled = spec.regex(REL_PATH_SPEC) if hasattr(rel_path, 'as_posix'): rel_path = rel_path.as_posix() match = compiled.match(rel_path) # py 3.8 if match: return OrderedDict(**match.groupdict()) if as_dict else tuple(match.groupdict().values()) elif assert_valid: raise ValueError('Invalid relative path') else: parts = compiled.groupindex.keys() return OrderedDict.fromkeys(parts) if as_dict else tuple([None] * len(parts))
[docs]def session_path_parts(session_path: str, as_dict=False, assert_valid=True): """Parse a session path into the relevant parts Parameters ---------- session_path : str A session path string as_dict : bool If true, an OrderedDict of parts are returned with the keys ('lab', 'subject', 'date', 'number'), otherwise a tuple of values are returned assert_valid : bool If true a ValueError is raised when the session cannot be parsed, otherwise an empty dict of tuple of Nones is returned Returns ------- An OrderedDict if as_dict is true, or a tuple of parsed values """ parsed = spec.regex(SESSION_SPEC).search(session_path) if parsed: return OrderedDict(**parsed.groupdict()) if as_dict else (*parsed.groupdict().values(),) elif assert_valid: raise ValueError('Invalid session path') empty = spec.regex(SESSION_SPEC).groupindex.keys() return OrderedDict.fromkeys(empty) if as_dict else tuple([None] * len(empty))
[docs]def filename_parts(filename, as_dict=False, assert_valid=True): """ Return the parsed elements of a given ALF filename. Parameters ---------- filename : str The name of the file as_dict : bool When true a dict of matches is returned assert_valid : bool When true an exception is raised when the filename cannot be parsed Returns ------- namespace : str The _namespace_ or None if not present object : str ALF object attribute : str The ALF attribute timescale : str The ALF _timescale or None if not present extra : str Any extra parts to the filename, or None if not present extension : str The file extension Examples -------- >>> filename_parts('_namespace_obj.times_timescale.extra.foo.ext') ('namespace', 'obj', 'times', 'timescale', 'extra.foo', 'ext') >>> filename_parts('spikes.clusters.npy', as_dict=True) {'namespace': None, 'object': 'spikes', 'attribute': 'clusters', 'timescale': None, 'extra': None, 'extension': 'npy'} >>> filename_parts('spikes.times_ephysClock.npy') (None, 'spikes', 'times', 'ephysClock', None, 'npy') >>> filename_parts('_iblmic_audioSpectrogram.frequencies.npy') ('iblmic', 'audioSpectrogram', 'frequencies', None, None, 'npy') >>> filename_parts('_spikeglx_ephysData_g0_t0.imec.wiring.json') ('spikeglx', 'ephysData_g0_t0', 'imec', None, 'wiring', 'json') >>> filename_parts('_spikeglx_ephysData_g0_t0.imec0.lf.bin') ('spikeglx', 'ephysData_g0_t0', 'imec0', None, 'lf', 'bin') >>> filename_parts('_ibl_trials.goCue_times_bpod.csv') ('ibl', 'trials', 'goCue_times', 'bpod', None, 'csv') """ pattern = spec.regex(FILE_SPEC) empty = OrderedDict.fromkeys(pattern.groupindex.keys()) m = pattern.match(str(filename)) if m: # py3.8 return OrderedDict(m.groupdict()) if as_dict else m.groups() elif assert_valid: raise ValueError(f'Invalid ALF filename: "{filename}"') else: return empty if as_dict else empty.values()
[docs]def path_parts(file_path: str) -> dict: pass
[docs]def folder_parts(folder_path: str) -> dict: pass
def _isdatetime(s: str) -> bool: try: datetime.strptime(s, '%Y-%m-%d') return True except ValueError: return False
[docs]def get_session_path(path: Union[str, Path]) -> Optional[Path]: """Returns the session path from any filepath if the date/number pattern is found""" if path is None: return if isinstance(path, str): path = Path(path) sess = None for i, p in enumerate(path.parts): if p.isdigit() and _isdatetime(path.parts[i - 1]): sess = Path().joinpath(*path.parts[:i + 1]) return sess
[docs]def get_alf_path(path: Union[str, Path]) -> str: """Returns the ALF part of a path or filename Attempts to return the first valid part of the path, first searching for a session path, then relative path (collection/revision/filename), then just the filename. If all invalid, None is returned. NB: There is no way to discern between lab/Subjects/subject/date/number and irrelevant/subject/date/number Parameters ---------- path : str, pathlib.Path A path to parse Returns ------- A string containing the full ALF path, session path, relative path or filename Examples -------- get_alf_path('etc/etc/lab/subj/2021-01-21/001') 'lab/subj/2021-01-21/001/collection/file.attr.ext' get_alf_path('subj/2021-01-21/001/collection/file.attr.ext') 'file.attr.ext' get_alf_path('collection/file.attr.ext') 'collection/file.attr.ext' """ if not isinstance(path, str): path = Path(path).as_posix() path = path.strip('/') # Check if session path match_session = spec.regex(SESSION_SPEC).search(path) if match_session: return path[match_session.start():] # Check if filename / relative path (i.e. collection + filename) parts = path.rsplit('/', 1) match_filename = spec.regex(FILE_SPEC).match(parts[-1]) if match_filename: return path if spec.regex(f'{COLLECTION_SPEC}{FILE_SPEC}').match(path) else parts[-1]
[docs]def add_uuid_string(file_path, uuid): """ Add a UUID and an extra part to the filename of an ALF path Parameters ---------- file_path : str, pathlib.Path An ALF path to add the UUID to uuid : str, uuid.UUID The UUID to add Returns ------- A new Path object with a UUID in the filename """ if isinstance(uuid, str) and not spec.is_uuid_string(uuid): raise ValueError('Should provide a valid UUID v4') uuid = str(uuid) # NB: Only instantiate as Path if not already a Path, otherwise we risk changing the class if isinstance(file_path, str): file_path = Path(file_path) name_parts = file_path.stem.split('.') if uuid == name_parts[-1]: _logger.warning(f'UUID already found in file name: {file_path.name}: IGNORE') return file_path return file_path.parent.joinpath(f"{'.'.join(name_parts)}.{uuid}{file_path.suffix}")