Source code for ibllib.io.session_params

"""A module for handling experiment description files.

Each device computer adds its piece of information and consolidates into the final acquisition
description.

The purpose is 3-fold:
    - provide modularity in the extraction: the acquisition description allows to dynamically build
    pipelines.
    - assist the copying of the experimental data from device computers to the server computers, in
    a way that each device is independent from another.
    - assist the copying of the experimental data from device computers to the server computers, in
    a way that intermediate states (failed copies) are easily recoverable from and completion
    criteria (ie. session ready to extract) is objective and simple (all device files copied).

INGRESS
    - each device computer needs to know the session path on the server.
    - create a device file locally in a queue directory. This will serve as a copy flag.
    - copy the device file to the local server.

EGRESS
    - go through the queue and for each item:
        - if the device file is not on the server create it.
        - once copy is complete aggregate the qc from file.
"""
import yaml
import uuid
import logging
import socket
from pathlib import Path
from itertools import chain
from copy import deepcopy

from one.converters import ConversionMixin
from iblutil.util import flatten
from iblutil.io.params import FileLock
from packaging import version

_logger = logging.getLogger(__name__)
SPEC_VERSION = '1.0.0'



[docs]
def write_yaml(file_path, data):
    """
    Write a device file. This is basically just a yaml dump that ensures the folder tree exists.

    Parameters
    ----------
    file_path : pathlib.Path
        The full path to the description yaml file to write to.
    data : dict
        The data to write to the yaml file.

    """
    file_path.parent.mkdir(exist_ok=True, parents=True)
    with open(file_path, 'w') as fp:
        yaml.safe_dump(data, fp)



def _patch_file(data: dict) -> dict:
    """
    Update older description data to conform to the most recent specification.

    Parameters
    ----------
    data : dict
        The description yaml data.

    Returns
    -------
    dict
        The patched description data.
    """
    if data and (v := data.get('version', '0')) != SPEC_VERSION:
        if version.parse(v) > version.parse(SPEC_VERSION):
            _logger.warning('Description file generated by more recent code')
        elif version.parse(v) <= version.parse('0.1.0'):
            # Change tasks key from dict to list of dicts
            if 'tasks' in data and isinstance(data['tasks'], dict):
                data['tasks'] = [{k: v} for k, v in data['tasks'].copy().items()]
        data['version'] = SPEC_VERSION
        # Ensure all items in tasks list are single value dicts
        if 'tasks' in data:
            data['tasks'] = [{k: v} for k, v in chain.from_iterable(map(dict.items, data['tasks']))]
    return data



[docs]
def write_params(session_path, data) -> Path:
    """
    Write acquisition description data to the session path.

    Parameters
    ----------
    session_path : str, pathlib.Path
        A session path containing an _ibl_experiment.description.yaml file.
    data : dict
        The acquisition description data to save

    Returns
    -------
    pathlib.Path
        The full path to the saved acquisition description.
    """
    yaml_file = Path(session_path).joinpath('_ibl_experiment.description.yaml')
    write_yaml(yaml_file, data)
    return yaml_file




[docs]
def read_params(path) -> dict:
    """
    Load an experiment description file.

    In addition to reading the yaml data, this functions ensures that the specification is the most
    recent one.  If the file is missing None is returned.  If the file cannot be parsed an empty
    dict is returned.

    Parameters
    ----------
    path : pathlib.Path, str
        The path to the description yaml file (or it's containing folder) to load.

    Returns
    -------
    dict, None
        The parsed yaml data, or None if the file was not found.

    Examples
    --------
    # Load a session's _ibl_experiment.description.yaml file

    >>> data = read_params('/home/data/subject/2020-01-01/001')

    # Load a specific device's description file

    >>> data = read_params('/home/data/subject/2020-01-01/001/_devices/behaviour.yaml')

    """
    if (path := Path(path)).is_dir():
        yaml_file = next(path.glob('_ibl_experiment.description*.yaml'), None)
    else:
        yaml_file = path if path.exists() else None
    if not yaml_file:
        _logger.debug('Experiment description not found: %s', path)
        return

    with open(yaml_file, 'r') as fp:
        data = _patch_file(yaml.safe_load(fp) or {})
    return data




[docs]
def merge_params(a, b, copy=False):
    """
    Given two experiment descriptions, update first with fields in second.

    Parameters
    ----------
    a : dict
        An experiment description dictionary to be updated with fields from `b`.
    b : dict
        An experiment description dictionary to update `a` with
    copy : bool
        If true, return a deep copy of `a` instead of updating directly.

    Returns
    -------
    dict
        A merged dictionary consisting of fields from `a` and `b`.
    """
    def to_hashable(dict_item):
        """Convert protocol -> dict map to hashable tuple of protocol + sorted key value pairs."""
        hashable = (dict_item[0], *chain.from_iterable(sorted(dict_item[1].items())))
        return tuple(tuple(x) if isinstance(x, list) else x for x in hashable)

    if copy:
        a = deepcopy(a)
    for k in b:
        if k == 'sync':
            assert k not in a or a[k] == b[k], 'multiple sync fields defined'
        if isinstance(b[k], list):
            prev = list(a.get(k, []))
            if k == 'tasks':
                # For tasks, keep order and skip duplicates
                # Assert tasks is a list of single value dicts
                assert (not prev or set(map(len, prev)) == {1}) and set(map(len, b[k])) == {1}
                # Get the set of previous tasks
                prev_tasks = set(map(to_hashable, chain.from_iterable(map(dict.items, prev))))
                tasks = chain.from_iterable(map(dict.items, b[k]))
                to_add = [dict([itm]) for itm in tasks if to_hashable(itm) not in prev_tasks]
            else:
                # For procedures and projects, remove duplicates
                to_add = set(b[k]) - set(prev)
            a[k] = prev + list(to_add)
        elif isinstance(b[k], dict):
            a[k] = {**a.get(k, {}), **b[k]}
        else:  # A string
            a[k] = b[k]
    return a




[docs]
def aggregate_device(file_device, file_acquisition_description, unlink=False):
    """
    Add the contents of a device file to the main acquisition description file.

    Parameters
    ----------
    file_device : pathlib.Path
        The full path to the device yaml file to add to the main description file.
    file_acquisition_description : pathlib.Path
        The full path to the main acquisition description yaml file to add the device file to.
    unlink : bool
        If True, the device file is removed after successfully aggregation.

    Returns
    -------
    dict
        The aggregated experiment description data.

    Raises
    ------
    AssertionError
        Device file contains a main 'sync' key that is already present in the main description
        file.  For an experiment only one main sync device is allowed.
    """
    # reads in the partial device data
    data_device = read_params(file_device)

    if not data_device:
        _logger.warning('empty device file "%s"', file_device)
        return

    with FileLock(file_acquisition_description, log=_logger, timeout_action='delete'):
        # if the acquisition description file already exists, read in the yaml content
        if file_acquisition_description.exists():
            acq_desc = read_params(file_acquisition_description)
        else:
            acq_desc = {}

        # merge the dictionaries (NB: acq_desc modified in place)
        acq_desc = merge_params(acq_desc, data_device)

        with open(file_acquisition_description, 'w') as fp:
            yaml.safe_dump(acq_desc, fp)

    # delete the original file if necessary
    if unlink:
        file_device.unlink()
        stub_folder = file_acquisition_description.with_name('_devices')
        if stub_folder.exists() and not any(stub_folder.glob('*.*')):
            stub_folder.rmdir()

    return acq_desc




[docs]
def get_cameras(sess_params):
    devices = sess_params.get('devices', {})
    cameras = devices.get('cameras', None)
    return None if not cameras else list(cameras.keys())




[docs]
def get_sync_label(sess_params):
    if not sess_params:
        return None
    sync_keys = list((sess_params.get('sync') or {}).keys())
    if len(sync_keys) == 0:
        return None
    if len(sync_keys) > 1:
        _logger.warning('Multiple sync keys found in experiment description: %s', sync_keys)
    return sync_keys[0]




[docs]
def get_sync(sess_params):
    sync_label = get_sync_label(sess_params)
    if sync_label:
        return sync_label, sess_params['sync'][sync_label] or {}
    return None, {}




[docs]
def get_sync_values(sess_params):
    key = get_sync_label(sess_params)
    if key:
        return sess_params['sync'][key]




[docs]
def get_sync_collection(sess_params):
    return (get_sync_values(sess_params) or {}).get('collection')




[docs]
def get_sync_extension(sess_params):
    return (get_sync_values(sess_params) or {}).get('extension')




[docs]
def get_sync_namespace(sess_params):
    return (get_sync_values(sess_params) or {}).get('acquisition_software')




[docs]
def get_task_protocol(sess_params, task_collection=None):
    """
    Fetch the task protocol from an experiment description dict.

    Parameters
    ----------
    sess_params : dict
        The loaded experiment.description file.
    task_collection : str, optional
        Return the protocol that corresponds to this collection (returns the first matching
        protocol in the list). If None, all protocols are returned.

    Returns
    -------
    str, set, None
        If task_collection is None, returns the set of task protocols, otherwise returns the first
        protocol that corresponds to the collection, or None if collection not present.
    """
    collections = get_collections({'tasks': sess_params.get('tasks')})
    if task_collection is None:
        if len(collections) == 0:
            return None
        else:
            return set(collections.keys())  # Return all protocols
    else:
        return next((k for k, v in collections.items() if v == task_collection), None)




[docs]
def get_task_collection(sess_params, task_protocol=None):
    """
    Fetch the task collection from an experiment description dict.

    Parameters
    ----------
    sess_params : dict
        The loaded experiment.description file.
    task_protocol : str, optional
        Return the collection that corresponds to this protocol (returns the first matching
        protocol in the list). If None, all collections are returned.

    Returns
    -------
    str, set, None
        If task_protocol is None, returns the set of collections, otherwise returns the first
        collection that corresponds to the protocol, or None if protocol not present.

    Notes
    -----
    - The order of the set may not be the same as the descriptions tasks order when iterating.
    """
    protocols = sess_params.get('tasks', [])
    if task_protocol is not None:
        task = next((x for x in protocols if task_protocol in x), None)
        return (task.get(task_protocol) or {}).get('collection')
    else:  # Return set of all task collections
        cset = set(filter(None, (next(iter(x.values()), {}).get('collection') for x in protocols)))
        return (next(iter(cset)) if len(cset) == 1 else cset) or None




[docs]
def get_task_protocol_number(sess_params, task_protocol=None):
    """
    Fetch the task protocol number from an experiment description dict.

    Parameters
    ----------
    sess_params : dict
        The loaded experiment.description file.
    task_protocol : str, optional
        Return the number that corresponds to this protocol (returns the first matching
        protocol in the list). If None, all numbers are returned.

    Returns
    -------
    str, list, None
        If task_protocol is None, returns list of all numbers, otherwise returns the first
        number that corresponds to the protocol, or None if protocol not present.
    """
    protocols = sess_params.get('tasks', [])
    if task_protocol is None:  # Return set of all task numbers
        numbers = (next(iter(x.values()), {}).get('protocol_number') for x in protocols)
        numbers = list(map(int, filter(lambda x: x is not None, numbers)))
        return (next(iter(numbers)) if len(numbers) == 1 else numbers) or None
    else:
        task = next((x for x in protocols if task_protocol in x), {})
        number = (task.get(task_protocol) or {}).get('protocol_number')
        return int(number) if isinstance(number, str) else number




[docs]
def get_collections(sess_params, flat=False):
    """
    Find all collections associated with the session.

    Parameters
    ----------
    sess_params : dict
        The loaded experiment description map.
    flat : bool (False)
        If True, return a flat set of collections, otherwise return a map of device/sync/task

    Returns
    -------
    dict[str, str]
        A map of device/sync/task and the corresponding collection name.

    set[str]
        A set of unique collection names.

    Notes
    -----
    - Assumes only the following data types contained: list, dict, None, str.
    """
    collection_map = {}

    def iter_dict(d):
        for k, v in d.items():
            if isinstance(v, list):
                for d in filter(lambda x: isinstance(x, dict), v):
                    iter_dict(d)
            elif isinstance(v, dict) and 'collection' in v:
                # if the key already exists, append the collection name to the list
                if k in collection_map:
                    clist = collection_map[k] if isinstance(collection_map[k], list) else [collection_map[k]]
                    collection_map[k] = list(set(clist + [v['collection']]))
                else:
                    collection_map[k] = v['collection']
            elif isinstance(v, dict):
                iter_dict(v)

    iter_dict(sess_params)
    return set(flatten(collection_map.values())) if flat else collection_map




[docs]
def get_video_compressed(sess_params):
    videos = sess_params.get('devices', {}).get('cameras', None)
    if not videos:
        return None

    # This is all or nothing, assumes either all videos or not compressed
    for key, vals in videos.items():
        compressed = vals.get('compressed', False)

    return compressed




[docs]
def get_remote_stub_name(session_path, device_id=None):
    """
    Get or create device specific file path for the remote experiment.description stub.

    Parameters
    ----------
    session_path : pathlib.Path
        A remote session path.
    device_id : str, optional
        A device name, if None the TRANSFER_LABEL parameter is used (defaults to this device's
        hostname with a unique numeric ID)

    Returns
    -------
    pathlib.Path
        The full file path to the remote experiment description stub.

    Example
    -------
    >>> get_remote_stub_name(Path.home().joinpath('subject', '2020-01-01', '001'), 'host-123')
    Path.home() / 'subject/2020-01-01/001/_devices/2020-01-01_1_subject@host-123.yaml'
    """
    device_id = device_id or f'{socket.gethostname()}_{uuid.getnode()}'
    exp_ref = '{date}_{sequence:d}_{subject:s}'.format(**ConversionMixin.path2ref(session_path))
    remote_filename = f'{exp_ref}@{device_id}.yaml'
    return session_path / '_devices' / remote_filename