Source code for ibl_alignment_gui.utils.parse_yaml

from collections import defaultdict
from pathlib import Path

import yaml
from pydantic import BaseModel

# -------------------------------
# Pydantic Models
# -------------------------------



[docs]
class DatasetPaths(BaseModel):
    """
    Container for resolved dataset paths for a single probe.

    Attributes
    ----------
    spike_sorting : Path | None
        Path to spike sorting output directory
    processed_ephys : Path | None
        Path to processed electrophysiology data directory
    raw_ephys : Path | None
        Path to raw electrophysiology recordings directory
    task : Path | None
        Path to task data directory
    raw_task : Path | None
        Path to raw task data directory
    picks : Path | None
        Path to probe trajectory pick files directory
    histology : Path | None
        Path to histology volume directory
    output : Path | None
        Path to alignment output directory
    """

    spike_sorting: Path | None = None
    processed_ephys: Path | None = None
    raw_ephys: Path | None = None
    task: Path | None = None
    raw_task: Path | None = None
    picks: Path | None = None
    histology: Path | None = None
    output: Path | None = None




[docs]
class Datasets(BaseModel):
    """
    Dataset configuration with optional path and backend specification.

    Attributes
    ----------
    path : Path
        Relative or absolute path to the dataset directory
    backend : str | None
        Data format backend (e.g., 'phylib', 'spikeglx')
    """

    path: Path | None = None
    backend: str | None = None




[docs]
class Probe(BaseModel):
    """
    Configuration for a single probe.

    Attributes
    ----------
    datasets : dict[str, Datasets] | None
        Dictionary mapping dataset names to their configurations
    path : Path | None
        Probe-level base path for resolving relative dataset paths
    """

    datasets: dict[str, Datasets] | None = None
    path: Path | None = None  # Probe-level root




[docs]
class Configuration(BaseModel):
    """
    Configuration for a single experimental configuration.

    Attributes
    ----------
    probes : dict[str, Probe]
        Dictionary mapping probe names to their configurations
    path : Path | None
        Configuration-level base path for resolving relative probe paths
    """

    probes: dict[str, Probe]
    path: Path | None = None  # Config-level root




[docs]
class AlignmentYAML(BaseModel):
    """
    Root-level YAML configuration structure.

    Attributes
    ----------
    defaults : dict[str, Datasets]
        Default dataset configurations applied to all probes
    configurations : dict[str, Configuration]
        Dictionary mapping configuration names to their configurations
    path : Path | None
        Global root path for resolving all relative paths
    """

    defaults: dict[str, Datasets] | None = None
    configurations: dict[str, Configuration]
    path: Path | None = None  # Global root



# -------------------------------
# Path resolution logic
# -------------------------------



[docs]
def resolve_path(
    dataset_path: Path | None = None,
    probe_path: Path | None = None,
    config_path: Path | None = None,
    global_path: Path | None = None,
    default_path: Path | None = None,
) -> Path | None:
    """
    Resolve dataset path using hierarchical path resolution.

    If path is absolute at any stage, it is returned immediately. Otherwise path
    is resolved progressively through the provided paths.

    Resolution order:
        dataset
        probe / dataset
        config / probe/ dataset
        global / config / probe / dataset

    If that path is still relative at the end, an error is raised.
    """
    # Pick value or default
    path = dataset_path if dataset_path is not None else default_path
    if path is None:
        return None

    resolved_path = Path(path).expanduser()

    # Absolute value wins immediately
    if resolved_path.is_absolute():
        return resolved_path.resolve()

    # Helper to prepend a root if present
    def prepend(root: Path | None, p: Path) -> Path:
        return root / p if root is not None else p

    # Progressive buildup
    resolved_path = prepend(probe_path, resolved_path)
    if resolved_path.is_absolute():
        return resolved_path.resolve()

    resolved_path = prepend(config_path, resolved_path)
    if resolved_path.is_absolute():
        return resolved_path.resolve()

    resolved_path = prepend(global_path, resolved_path)
    if resolved_path.is_absolute():
        return resolved_path.resolve()

    # Still relative → cannot resolve fully
    raise ValueError('No absolute root provided to resolve relative path.')



# -------------------------------
# Loader
# -------------------------------



[docs]
def load_alignment_yaml(
    yaml_file: str,
) -> tuple[list[str], list[str], dict[str, dict[str, DatasetPaths]]]:
    """
    Load and parse alignment configuration YAML file.

    Resolves all dataset paths using hierarchical path resolution and applies
    defaults. Creates output directories if they don't exist.

    Parameters
    ----------
    yaml_file : str
        Path to the YAML configuration file

    Returns
    -------
    configs : list of str
        List of configuration names
    probes : list of str
        List of unique probe names across all configurations
    data_paths : A dict of dicts of DatasetPaths
        Nested dictionary of resolved paths:
        data_paths[config_name][probe_name] -> DatasetPaths

    Notes
    -----
    - If no 'configurations' section exists, creates a 'default' configuration
    - Falls back to raw_ephys path if processed_ephys is not specified
    - Falls back to spike_sorting path if output path is not specified
    - Creates output directories automatically with parents
    """
    yaml_file = Path(yaml_file)

    if not yaml_file.exists():
        raise FileNotFoundError(f'YAML file {yaml_file} does not exist')

    with open(yaml_file) as f:
        data = yaml.safe_load(f)

    # Support files without explicit 'configurations' section
    if 'configurations' not in data:
        probes = data.pop('probes', {})
        data['configurations'] = {'default': {'probes': probes}}

    alignment = AlignmentYAML(**data)
    global_path = alignment.path

    data_paths = defaultdict(dict)
    configs = []
    probes = []

    for cname, config in alignment.configurations.items():
        config_path = config.path
        configs.append(cname)

        for pname, probe in config.probes.items():
            probe_path = probe.path
            probes.append(pname)
            datasets = probe.datasets

            resolved_paths = DatasetPaths()

            def get_path(dname: str) -> str | None:
                """Get path for a specific dataset from probe configuration."""
                dataset = datasets.get(dname)
                return dataset.path if dataset else None

            def get_default_path(dname: str) -> str | None:
                """Get default path for a specific dataset from defaults section."""
                if alignment.defaults:
                    default_dataset = alignment.defaults.get(dname)
                    return default_dataset.path if default_dataset else None
                return None

            # Resolve all paths
            for dataset_name in [
                'spike_sorting',
                'processed_ephys',
                'raw_ephys',
                'picks',
                'histology',
                'output',
            ]:
                path_value = get_path(dataset_name)
                default_value = get_default_path(dataset_name)
                resolved_path = resolve_path(
                    path_value, probe_path, config_path, global_path, default_value
                )
                setattr(resolved_paths, dataset_name, resolved_path)

            if resolved_paths.processed_ephys is None:
                resolved_paths.processed_ephys = resolved_paths.raw_ephys

            if resolved_paths.output is None:
                resolved_paths.output = resolved_paths.spike_sorting

            # resolved_paths.output.mkdir(parents=True, exist_ok=True)

            data_paths[cname][pname] = resolved_paths

    assert len(configs) <= 2, (
        'More than two configurations found in YAML, alignment GUI supports up to two.'
    )

    return configs, list(set(probes)), data_paths