"""
Module for identifying and parsing ALF file names.
An ALF file has the following components (those in brackets are optional):
(_namespace_)object.attribute(_timescale)(.extra.parts).ext
Note the following:
Object attributes may not contain an underscore unless followed by 'times' or 'intervals'.
A namespace must not contain extra underscores (i.e. `name_space` and `__namespace__` are not
valid).
ALF files must always have an extension.
For more information, see the following documentation:
https://int-brain-lab.github.io/ONE/alf_intro.html
"""
from collections import OrderedDict
from datetime import datetime
from typing import Union, Optional
from pathlib import Path
import logging
from . import spec
from .spec import SESSION_SPEC, COLLECTION_SPEC, FILE_SPEC, REL_PATH_SPEC
_logger = logging.getLogger(__name__)
[docs]
def rel_path_parts(rel_path, as_dict=False, assert_valid=True):
"""Parse a relative path into the relevant parts.
A relative path follows the pattern
(collection/)(#revision#/)_namespace_object.attribute_timescale.extra.extension
Parameters
----------
rel_path : str, pathlib.Path
A relative path string.
as_dict : bool
If true, an OrderedDict of parts are returned with the keys ('lab', 'subject', 'date',
'number'), otherwise a tuple of values are returned.
assert_valid : bool
If true a ValueError is raised when the session cannot be parsed, otherwise an empty
dict of tuple of Nones is returned.
Returns
-------
OrderedDict, tuple
A dict if as_dict is true, or a tuple of parsed values.
"""
return _path_parts(rel_path, REL_PATH_SPEC, True, as_dict, assert_valid)
[docs]
def session_path_parts(session_path, as_dict=False, assert_valid=True):
"""Parse a session path into the relevant parts.
Return keys:
- lab
- subject
- date
- number
Parameters
----------
session_path : str, pathlib.Path
A session path string.
as_dict : bool
If true, an OrderedDict of parts are returned with the keys ('lab', 'subject', 'date',
'number'), otherwise a tuple of values are returned.
assert_valid : bool
If true a ValueError is raised when the session cannot be parsed, otherwise an empty
dict of tuple of Nones is returned.
Returns
-------
OrderedDict, tuple
A dict if as_dict is true, or a tuple of parsed values.
Raises
------
ValueError
Invalid ALF session path (assert_valid is True).
"""
return _path_parts(session_path, SESSION_SPEC, False, as_dict, assert_valid)
def _path_parts(path, spec_str, match=True, as_dict=False, assert_valid=True):
"""Given a ALF and a spec string, parse into parts.
Parameters
----------
path : str, pathlib.Path
An ALF path or dataset.
match : bool
If True, string must match exactly, otherwise search for expression within path.
as_dict : bool
When true a dict of matches is returned.
assert_valid : bool
When true an exception is raised when the filename cannot be parsed.
Returns
-------
OrderedDict, tuple
A dict if as_dict is true, or a tuple of parsed values.
Raises
------
ValueError
Invalid ALF path (assert_valid is True).
"""
if hasattr(path, 'as_posix'):
path = path.as_posix()
pattern = spec.regex(spec_str)
empty = OrderedDict.fromkeys(pattern.groupindex.keys())
parsed = (pattern.match if match else pattern.search)(path)
if parsed: # py3.8
parsed_dict = parsed.groupdict()
return OrderedDict(parsed_dict) if as_dict else tuple(parsed_dict.values())
elif assert_valid:
raise ValueError(f'Invalid ALF: "{path}"')
else:
return empty if as_dict else tuple(empty.values())
[docs]
def filename_parts(filename, as_dict=False, assert_valid=True) -> Union[dict, tuple]:
"""
Return the parsed elements of a given ALF filename.
Parameters
----------
filename : str
The name of the file.
as_dict : bool
When true a dict of matches is returned.
assert_valid : bool
When true an exception is raised when the filename cannot be parsed.
Returns
-------
namespace : str
The _namespace_ or None if not present.
object : str
ALF object.
attribute : str
The ALF attribute.
timescale : str
The ALF _timescale or None if not present.
extra : str
Any extra parts to the filename, or None if not present.
extension : str
The file extension.
Examples
--------
>>> filename_parts('_namespace_obj.times_timescale.extra.foo.ext')
('namespace', 'obj', 'times', 'timescale', 'extra.foo', 'ext')
>>> filename_parts('spikes.clusters.npy', as_dict=True)
{'namespace': None,
'object': 'spikes',
'attribute': 'clusters',
'timescale': None,
'extra': None,
'extension': 'npy'}
>>> filename_parts('spikes.times_ephysClock.npy')
(None, 'spikes', 'times', 'ephysClock', None, 'npy')
>>> filename_parts('_iblmic_audioSpectrogram.frequencies.npy')
('iblmic', 'audioSpectrogram', 'frequencies', None, None, 'npy')
>>> filename_parts('_spikeglx_ephysData_g0_t0.imec.wiring.json')
('spikeglx', 'ephysData_g0_t0', 'imec', None, 'wiring', 'json')
>>> filename_parts('_spikeglx_ephysData_g0_t0.imec0.lf.bin')
('spikeglx', 'ephysData_g0_t0', 'imec0', None, 'lf', 'bin')
>>> filename_parts('_ibl_trials.goCue_times_bpod.csv')
('ibl', 'trials', 'goCue_times', 'bpod', None, 'csv')
Raises
------
ValueError
Invalid ALF dataset (assert_valid is True).
"""
return _path_parts(filename, FILE_SPEC, True, as_dict, assert_valid)
[docs]
def full_path_parts(path, as_dict=False, assert_valid=True) -> Union[dict, tuple]:
"""Parse all filename and folder parts.
Parameters
----------
path : str, pathlib.Path.
The ALF path
as_dict : bool
When true a dict of matches is returned.
assert_valid : bool
When true an exception is raised when the filename cannot be parsed.
Returns
-------
OrderedDict, tuple
A dict if as_dict is true, or a tuple of parsed values.
Examples
--------
>>> full_path_parts(
... 'lab/Subjects/subject/2020-01-01/001/collection/#revision#/'
... '_namespace_obj.times_timescale.extra.foo.ext')
('lab', 'subject', '2020-01-01', '001', 'collection', 'revision',
'namespace', 'obj', 'times','timescale', 'extra.foo', 'ext')
>>> full_path_parts('spikes.clusters.npy', as_dict=True)
{'lab': None,
'subject': None,
'date': None,
'number': None,
'collection': None,
'revision': None,
'namespace': None,
'object': 'spikes',
'attribute': 'clusters',
'timescale': None,
'extra': None,
'extension': 'npy'}
Raises
------
ValueError
Invalid ALF path (assert_valid is True).
"""
path = Path(path)
# NB We try to determine whether we have a folder or filename path. Filenames contain at
# least two periods, however it is currently permitted to have any number of periods in a
# collection, making the ALF path ambiguous.
if sum(x == '.' for x in path.name) < 2: # folder only
folders = folder_parts(path, as_dict, assert_valid)
dataset = filename_parts('', as_dict, assert_valid=False)
elif '/' not in path.as_posix(): # filename only
folders = folder_parts('', as_dict, assert_valid=False)
dataset = filename_parts(path.name, as_dict, assert_valid)
else: # full filepath
folders = folder_parts(path.parent, as_dict, assert_valid)
dataset = filename_parts(path.name, as_dict, assert_valid)
if as_dict:
return OrderedDict(**folders, **dataset)
else:
return folders + dataset
[docs]
def folder_parts(folder_path, as_dict=False, assert_valid=True) -> Union[dict, tuple]:
"""Parse all folder parts, including session, collection and revision.
Parameters
----------
folder_path : str, pathlib.Path
The ALF folder path.
as_dict : bool
When true a dict of matches is returned.
assert_valid : bool
When true an exception is raised when the filename cannot be parsed.
Returns
-------
OrderedDict, tuple
A dict if as_dict is true, or a tuple of parsed values.
Examples
--------
>>> folder_parts('lab/Subjects/subject/2020-01-01/001/collection/#revision#')
('lab', 'subject', '2020-01-01', '001', 'collection', 'revision')
>>> folder_parts(Path('lab/Subjects/subject/2020-01-01/001'), as_dict=True)
{'lab': 'lab',
'subject': 'subject',
'date': '2020-01-01',
'number': '001',
'collection': None,
'revision': None}
Raises
------
ValueError
Invalid ALF path (assert_valid is True).
"""
if hasattr(folder_path, 'as_posix'):
folder_path = folder_path.as_posix()
if folder_path and folder_path[-1] != '/': # Slash required for regex pattern
folder_path = folder_path + '/'
spec_str = f'{SESSION_SPEC}/{COLLECTION_SPEC}'
return _path_parts(folder_path, spec_str, False, as_dict, assert_valid)
def _isdatetime(s: str) -> bool:
"""Returns True if input is valid ISO date string."""
try:
datetime.strptime(s, '%Y-%m-%d')
return True
except ValueError:
return False
[docs]
def get_session_path(path: Union[str, Path]) -> Optional[Path]:
"""
Returns the session path from any filepath if the date/number pattern is found,
including the root directory.
Returns
-------
pathlib.Path
The session path part of the input path or None if path invalid.
Examples
--------
>>> get_session_path('/mnt/sd0/Data/lab/Subjects/subject/2020-01-01/001')
Path('/mnt/sd0/Data/lab/Subjects/subject/2020-01-01/001')
>>> get_session_path('C:\\Data\\subject\\2020-01-01\\1\\trials.intervals.npy')
Path('C:/Data/subject/2020-01-01/1')
"""
if path is None:
return
if isinstance(path, str):
path = Path(path)
sess = None
for i, p in enumerate(path.parts):
if p.isdigit() and _isdatetime(path.parts[i - 1]):
sess = Path().joinpath(*path.parts[:i + 1])
return sess
[docs]
def get_alf_path(path: Union[str, Path]) -> str:
"""Returns the ALF part of a path or filename.
Attempts to return the first valid part of the path, first searching for a session path,
then relative path (collection/revision/filename), then just the filename. If all invalid,
None is returned.
Parameters
----------
path : str, pathlib.Path
A path to parse.
Returns
-------
str
A string containing the full ALF path, session path, relative path or filename.
Examples
--------
>>> get_alf_path('etc/etc/lab/Subjects/subj/2021-01-21/001')
'lab/Subjects/subj/2021-01-21/001/collection/file.attr.ext'
>>> get_alf_path('etc/etc/subj/2021-01-21/001/collection/file.attr.ext')
'subj/2021-01-21/001/collection/file.attr.ext'
>>> get_alf_path('collection/file.attr.ext')
'collection/file.attr.ext'
"""
if not isinstance(path, str):
path = Path(path).as_posix()
path = path.strip('/')
# Check if session path
match_session = spec.regex(SESSION_SPEC).search(path)
if match_session:
return path[match_session.start():]
# Check if filename / relative path (i.e. collection + filename)
parts = path.rsplit('/', 1)
match_filename = spec.regex(FILE_SPEC).match(parts[-1])
if match_filename:
return path if spec.regex(f'{COLLECTION_SPEC}{FILE_SPEC}').match(path) else parts[-1]
[docs]
def add_uuid_string(file_path, uuid):
"""
Add a UUID to the filename of an ALF path.
Adds a UUID to an ALF filename as an extra part, e.g.
'obj.attr.ext' -> 'obj.attr.a976e418-c8b8-4d24-be47-d05120b18341.ext'.
Parameters
----------
file_path : str, pathlib.Path, pathlib.PurePath
An ALF path to add the UUID to.
uuid : str, uuid.UUID
The UUID to add.
Returns
-------
pathlib.Path, pathlib.PurePath
A new Path or PurePath object with a UUID in the filename.
Examples
--------
>>> add_uuid_string('/path/to/trials.intervals.npy', 'a976e418-c8b8-4d24-be47-d05120b18341')
Path('/path/to/trials.intervals.a976e418-c8b8-4d24-be47-d05120b18341.npy')
Raises
------
ValueError
`uuid` must be a valid hyphen-separated hexadecimal UUID.
See Also
--------
one.alf.files.remove_uuid_string
one.alf.spec.is_uuid
"""
if isinstance(uuid, str) and not spec.is_uuid_string(uuid):
raise ValueError('Should provide a valid UUID v4')
uuid = str(uuid)
# NB: Only instantiate as Path if not already a Path, otherwise we risk changing the class
if isinstance(file_path, str):
file_path = Path(file_path)
name_parts = file_path.stem.split('.')
if spec.is_uuid(name_parts[-1]):
*name_parts, old_uuid = name_parts
if old_uuid == uuid:
_logger.warning(f'UUID already found in file name: {file_path.name}: IGNORE')
return file_path
else:
_logger.debug('Replacing %s with %s in %s', old_uuid, uuid, file_path)
return file_path.parent.joinpath(f"{'.'.join(name_parts)}.{uuid}{file_path.suffix}")
[docs]
def remove_uuid_string(file_path):
"""
Remove UUID from a filename of an ALF path.
Parameters
----------
file_path : str, pathlib.Path, pathlib.PurePath
An ALF path to add the UUID to.
Returns
-------
pathlib.Path, pathlib.PurePath
A new Path or PurePath object without a UUID in the filename.
Examples
--------
>>> add_uuid_string('/path/to/trials.intervals.a976e418-c8b8-4d24-be47-d05120b18341.npy')
Path('/path/to/trials.intervals.npy')
>>> add_uuid_string('/path/to/trials.intervals.npy')
Path('/path/to/trials.intervals.npy')
See Also
--------
one.alf.files.add_uuid_string
"""
if isinstance(file_path, str):
file_path = Path(file_path)
name_parts = file_path.stem.split('.')
if spec.is_uuid_string(name_parts[-1]):
file_path = file_path.with_name('.'.join(name_parts[:-1]) + file_path.suffix)
return file_path
[docs]
def padded_sequence(file_path):
"""
Ensures a file path contains a zero-padded experiment sequence folder.
Parameters
----------
file_path : str, pathlib.Path, pathlib.PurePath
A session or file path to convert.
Returns
-------
pathlib.Path, pathlib.PurePath
The same path but with the experiment sequence folder zero-padded. If a PurePath was
passed, a PurePath will be returned, otherwise a Path object is returned.
Examples
--------
>>> file_path = '/iblrigdata/subject/2023-01-01/1/_ibl_experiment.description.yaml'
>>> padded_sequence(file_path)
pathlib.Path('/iblrigdata/subject/2023-01-01/001/_ibl_experiment.description.yaml')
Supports folders and will not affect already padded paths
>>> session_path = pathlib.PurePosixPath('subject/2023-01-01/001')
>>> padded_sequence(file_path)
pathlib.PurePosixPath('subject/2023-01-01/001')
"""
if isinstance(file_path, str):
file_path = Path(file_path)
if (session_path := get_session_path(file_path)) is None:
raise ValueError('path must include a valid ALF session path, e.g. subject/YYYY-MM-DD/N')
idx = len(file_path.parts) - len(session_path.parts)
sequence = str(int(session_path.parts[-1])).zfill(3) # zero-pad if necessary
return file_path.parents[idx].joinpath(sequence, file_path.relative_to(session_path))
[docs]
def without_revision(file_path):
"""
Return file path without a revision folder.
Parameters
----------
file_path : str, pathlib.Path
A valid ALF dataset path.
Returns
-------
pathlib.Path
The input file path without a revision folder.
Examples
--------
>>> without_revision('/lab/Subjects/subject/2023-01-01/001/collection/#revision#/obj.attr.ext')
Path('/lab/Subjects/subject/2023-01-01/001/collection/obj.attr.ext')
"""
if isinstance(file_path, str):
file_path = Path(file_path)
*_, collection, revision = folder_parts(file_path.parent)
return get_session_path(file_path).joinpath(*filter(None, (collection, file_path.name)))