"""
Module for identifying and parsing ALF file names.
An ALF file has the following components (those in brackets are optional):
(_namespace_)object.attribute(_timescale)(.extra.parts).ext
Note the following:
Object attributes may not contain an underscore unless followed by 'times' or 'intervals'.
A namespace must not contain extra underscores (i.e. `name_space` and `__namespace__` are not
valid)
ALF files must always have an extension
For more information, see the following documentation:
https://int-brain-lab.github.io/iblenv/one_docs/one_reference.html#alf # FIXME Change link
Created on Tue Sep 11 18:06:21 2018
@author: Miles
"""
from collections import OrderedDict
from datetime import datetime
from typing import Union, Optional
from pathlib import Path
import logging
from . import spec
from .spec import SESSION_SPEC, COLLECTION_SPEC, FILE_SPEC, REL_PATH_SPEC
_logger = logging.getLogger(__name__)
[docs]def rel_path_parts(rel_path, as_dict=False, assert_valid=True):
"""Parse a relative path into the relevant parts. A relative path follows the pattern
(collection/)(#revision#/)_namespace_object.attribute_timescale.extra.extension
Parameters
----------
rel_path : str
A relative path string
as_dict : bool
If true, an OrderedDict of parts are returned with the keys ('lab', 'subject', 'date',
'number'), otherwise a tuple of values are returned
assert_valid : bool
If true a ValueError is raised when the session cannot be parsed, otherwise an empty
dict of tuple of Nones is returned
Returns
-------
An OrderedDict if as_dict is true, or a tuple of parsed values
"""
compiled = spec.regex(REL_PATH_SPEC)
if hasattr(rel_path, 'as_posix'):
rel_path = rel_path.as_posix()
match = compiled.match(rel_path) # py 3.8
if match:
return OrderedDict(**match.groupdict()) if as_dict else tuple(match.groupdict().values())
elif assert_valid:
raise ValueError('Invalid relative path')
else:
parts = compiled.groupindex.keys()
return OrderedDict.fromkeys(parts) if as_dict else tuple([None] * len(parts))
[docs]def session_path_parts(session_path: str, as_dict=False, assert_valid=True):
"""Parse a session path into the relevant parts
Parameters
----------
session_path : str
A session path string
as_dict : bool
If true, an OrderedDict of parts are returned with the keys ('lab', 'subject', 'date',
'number'), otherwise a tuple of values are returned
assert_valid : bool
If true a ValueError is raised when the session cannot be parsed, otherwise an empty
dict of tuple of Nones is returned
Returns
-------
An OrderedDict if as_dict is true, or a tuple of parsed values
"""
parsed = spec.regex(SESSION_SPEC).search(session_path)
if parsed:
return OrderedDict(**parsed.groupdict()) if as_dict else (*parsed.groupdict().values(),)
elif assert_valid:
raise ValueError('Invalid session path')
empty = spec.regex(SESSION_SPEC).groupindex.keys()
return OrderedDict.fromkeys(empty) if as_dict else tuple([None] * len(empty))
[docs]def filename_parts(filename, as_dict=False, assert_valid=True):
"""
Return the parsed elements of a given ALF filename.
Parameters
----------
filename : str
The name of the file
as_dict : bool
When true a dict of matches is returned
assert_valid : bool
When true an exception is raised when the filename cannot be parsed
Returns
-------
namespace : str
The _namespace_ or None if not present
object : str
ALF object
attribute : str
The ALF attribute
timescale : str
The ALF _timescale or None if not present
extra : str
Any extra parts to the filename, or None if not present
extension : str
The file extension
Examples
--------
>>> filename_parts('_namespace_obj.times_timescale.extra.foo.ext')
('namespace', 'obj', 'times', 'timescale', 'extra.foo', 'ext')
>>> filename_parts('spikes.clusters.npy', as_dict=True)
{'namespace': None,
'object': 'spikes',
'attribute': 'clusters',
'timescale': None,
'extra': None,
'extension': 'npy'}
>>> filename_parts('spikes.times_ephysClock.npy')
(None, 'spikes', 'times', 'ephysClock', None, 'npy')
>>> filename_parts('_iblmic_audioSpectrogram.frequencies.npy')
('iblmic', 'audioSpectrogram', 'frequencies', None, None, 'npy')
>>> filename_parts('_spikeglx_ephysData_g0_t0.imec.wiring.json')
('spikeglx', 'ephysData_g0_t0', 'imec', None, 'wiring', 'json')
>>> filename_parts('_spikeglx_ephysData_g0_t0.imec0.lf.bin')
('spikeglx', 'ephysData_g0_t0', 'imec0', None, 'lf', 'bin')
>>> filename_parts('_ibl_trials.goCue_times_bpod.csv')
('ibl', 'trials', 'goCue_times', 'bpod', None, 'csv')
"""
pattern = spec.regex(FILE_SPEC)
empty = OrderedDict.fromkeys(pattern.groupindex.keys())
m = pattern.match(str(filename))
if m: # py3.8
return OrderedDict(m.groupdict()) if as_dict else m.groups()
elif assert_valid:
raise ValueError(f'Invalid ALF filename: "{filename}"')
else:
return empty if as_dict else empty.values()
[docs]def path_parts(file_path: str) -> dict:
pass
[docs]def folder_parts(folder_path: str) -> dict:
pass
def _isdatetime(s: str) -> bool:
try:
datetime.strptime(s, '%Y-%m-%d')
return True
except ValueError:
return False
[docs]def get_session_path(path: Union[str, Path]) -> Optional[Path]:
"""Returns the session path from any filepath if the date/number
pattern is found"""
if path is None:
return
if isinstance(path, str):
path = Path(path)
sess = None
for i, p in enumerate(path.parts):
if p.isdigit() and _isdatetime(path.parts[i - 1]):
sess = Path().joinpath(*path.parts[:i + 1])
return sess
[docs]def get_alf_path(path: Union[str, Path]) -> str:
"""Returns the ALF part of a path or filename
Attempts to return the first valid part of the path, first searching for a session path,
then relative path (collection/revision/filename), then just the filename. If all invalid,
None is returned.
NB: There is no way to discern between lab/Subjects/subject/date/number and
irrelevant/subject/date/number
Parameters
----------
path : str, pathlib.Path
A path to parse
Returns
-------
A string containing the full ALF path, session path, relative path or filename
Examples
--------
get_alf_path('etc/etc/lab/subj/2021-01-21/001')
'lab/subj/2021-01-21/001/collection/file.attr.ext'
get_alf_path('subj/2021-01-21/001/collection/file.attr.ext')
'file.attr.ext'
get_alf_path('collection/file.attr.ext')
'collection/file.attr.ext'
"""
if not isinstance(path, str):
path = Path(path).as_posix()
path = path.strip('/')
# Check if session path
match_session = spec.regex(SESSION_SPEC).search(path)
if match_session:
return path[match_session.start():]
# Check if filename / relative path (i.e. collection + filename)
parts = path.rsplit('/', 1)
match_filename = spec.regex(FILE_SPEC).match(parts[-1])
if match_filename:
return path if spec.regex(f'{COLLECTION_SPEC}{FILE_SPEC}').match(path) else parts[-1]
[docs]def add_uuid_string(file_path, uuid):
"""
Add a UUID and an extra part to the filename of an ALF path
Parameters
----------
file_path : str, pathlib.Path
An ALF path to add the UUID to
uuid : str, uuid.UUID
The UUID to add
Returns
-------
A new Path object with a UUID in the filename
"""
if isinstance(uuid, str) and not spec.is_uuid_string(uuid):
raise ValueError('Should provide a valid UUID v4')
uuid = str(uuid)
# NB: Only instantiate as Path if not already a Path, otherwise we risk changing the class
if isinstance(file_path, str):
file_path = Path(file_path)
name_parts = file_path.stem.split('.')
if uuid == name_parts[-1]:
_logger.warning(f'UUID already found in file name: {file_path.name}: IGNORE')
return file_path
return file_path.parent.joinpath(f"{'.'.join(name_parts)}.{uuid}{file_path.suffix}")