Source code for ibllib.pipes.base_tasks

"""Abstract base classes for dynamic pipeline tasks."""
import logging
from pathlib import Path

from packaging import version
from one.webclient import no_cache
from iblutil.util import flatten, ensure_list
import matplotlib.image
from skimage.io import ImageCollection, imread

from ibllib.pipes.tasks import Task
import ibllib.io.session_params as sess_params
from ibllib.qc.base import sign_off_dict, SIGN_OFF_CATEGORIES
from ibllib.io.raw_daq_loaders import load_timeline_sync_and_chmap
from ibllib.oneibl.data_handlers import update_collections

_logger = logging.getLogger(__name__)


[docs] class DynamicTask(Task): def __init__(self, session_path, **kwargs): super().__init__(session_path, **kwargs) self.session_params = self.read_params_file() # TODO Which should be default? # Sync collection self.sync_collection = self.get_sync_collection(kwargs.get('sync_collection', None)) # Sync type self.sync = self.get_sync(kwargs.get('sync', None)) # Sync extension self.sync_ext = self.get_sync_extension(kwargs.get('sync_ext', None)) # Sync namespace self.sync_namespace = self.get_sync_namespace(kwargs.get('sync_namespace', None))
[docs] def get_sync_collection(self, sync_collection=None): return sync_collection if sync_collection else sess_params.get_sync_collection(self.session_params)
[docs] def get_sync(self, sync=None): return sync if sync else sess_params.get_sync_label(self.session_params)
[docs] def get_sync_extension(self, sync_ext=None): return sync_ext if sync_ext else sess_params.get_sync_extension(self.session_params)
[docs] def get_sync_namespace(self, sync_namespace=None): return sync_namespace if sync_namespace else sess_params.get_sync_namespace(self.session_params)
[docs] def get_protocol(self, protocol=None, task_collection=None): return protocol if protocol else sess_params.get_task_protocol(self.session_params, task_collection)
[docs] def get_task_collection(self, collection=None): if not collection: collection = sess_params.get_task_collection(self.session_params) # If inferring the collection from the experiment description, assert only one returned assert collection is None or isinstance(collection, str) or len(collection) == 1 return collection
[docs] def get_device_collection(self, device, device_collection=None): if device_collection: return device_collection collection_map = sess_params.get_collections(self.session_params['devices']) return collection_map.get(device)
[docs] def read_params_file(self): params = sess_params.read_params(self.session_path) if params is None: return {} # TODO figure out the best way # if params is None and self.one: # # Try to read params from alyx or try to download params file # params = self.one.load_dataset(self.one.path2eid(self.session_path), 'params.yml') # params = self.one.alyx.rest() return params
[docs] class BehaviourTask(DynamicTask): extractor = None """ibllib.io.extractors.base.BaseBpodExtractor: A trials extractor object.""" def __init__(self, session_path, **kwargs): super().__init__(session_path, **kwargs) self.collection = self.get_task_collection(kwargs.get('collection', None)) # Task type (protocol) self.protocol = self.get_protocol(kwargs.get('protocol', None), task_collection=self.collection) self.protocol_number = self.get_protocol_number(kwargs.get('protocol_number'), task_protocol=self.protocol) self.output_collection = 'alf' # Do not use kwargs.get('number', None) -- this will return None if number is 0 if self.protocol_number is not None: self.output_collection += f'/task_{self.protocol_number:02}'
[docs] def get_protocol(self, protocol=None, task_collection=None): """ Return the task protocol name. This returns the task protocol based on the task collection. If `protocol` is not None, this acts as an identity function. If both `task_collection` and `protocol` are None, returns the protocol defined in the experiment description file only if a single protocol was run. If the `task_collection` is not None, the associated protocol name is returned. Parameters ---------- protocol : str A task protocol name. If not None, the same value is returned. task_collection : str The task collection whose protocol name to return. May be None if only one protocol run. Returns ------- str, None The task protocol name, or None, if no protocol found. Raises ------ ValueError For session with multiple task protocols, a task collection must be passed. """ if protocol: return protocol protocol = sess_params.get_task_protocol(self.session_params, task_collection) or None if isinstance(protocol, set): if len(protocol) == 1: protocol = next(iter(protocol)) else: raise ValueError('Multiple task protocols for session. Task collection must be explicitly defined.') return protocol
[docs] def get_task_collection(self, collection=None): """ Return the task collection. If `collection` is not None, this acts as an identity function. Otherwise loads it from the experiment description if only one protocol was run. Parameters ---------- collection : str A task collection. If not None, the same value is returned. Returns ------- str, None The task collection, or None if no task protocols were run. Raises ------ AssertionError Raised if multiple protocols were run and collection is None, or if experiment description file is improperly formatted. """ if not collection: collection = sess_params.get_task_collection(self.session_params) # If inferring the collection from the experiment description, assert only one returned assert collection is None or isinstance(collection, str) or len(collection) == 1 return collection
[docs] def get_protocol_number(self, number=None, task_protocol=None): """ Return the task protocol number. Numbering starts from 0. If the 'protocol_number' field is missing from the experiment description, None is returned. If `task_protocol` is None, the first protocol number if n protocols == 1, otherwise raises an AssertionError. NB: :func:`ibllib.pipes.dynamic_pipeline.make_pipeline` will determine the protocol number from the order of the tasks in the experiment description if the task collection follows the pattern 'raw_task_data_XX'. If the task protocol does not follow this pattern, the experiment description file should explicitly define the number with the 'protocol_number' field. Parameters ---------- number : int The protocol number. If not None, the same value is returned. task_protocol : str The task protocol name. Returns ------- int, None The task protocol number, if defined. """ if number is None: # Do not use "if not number" as that will return True if number is 0 number = sess_params.get_task_protocol_number(self.session_params, task_protocol) elif not isinstance(number, int): number = int(number) # If inferring the number from the experiment description, assert only one returned (or something went wrong) assert number is None or isinstance(number, int), 'ambiguous protocol number; no task protocol defined' return number
@staticmethod def _spacer_support(settings): """ Spacer support was introduced in v7.1 for iblrig v7 and v8.0.1 in v8. Parameters ---------- settings : dict The task settings dict. Returns ------- bool True if task spacers are to be expected. """ v = version.parse ver = v(settings.get('IBLRIG_VERSION') or '100.0.0') return ver not in (v('100.0.0'), v('8.0.0')) and ver >= v('7.1.0')
[docs] def extract_behaviour(self, save=True): """Extract trials data. This is an abstract method called by `_run` and `run_qc` methods. Subclasses should return the extracted trials data and a list of output files. This method should also save the trials extractor object to the :prop:`extractor` property for use by `run_qc`. Parameters ---------- save : bool Whether to save the extracted data as ALF datasets. Returns ------- dict A dictionary of trials data. list of pathlib.Path A list of output file paths if save == true. """ return None, None
[docs] def run_qc(self, trials_data=None, update=True): """Run task QC. Subclass method should return the QC object. This just validates the trials_data is not None. Parameters ---------- trials_data : dict A dictionary of extracted trials data. The output of :meth:`extract_behaviour`. update : bool If true, update Alyx with the QC outcome. Returns ------- ibllib.qc.task_metrics.TaskQC A TaskQC object replete with task data and computed metrics. """ self._assert_trials_data(trials_data) return None
def _assert_trials_data(self, trials_data=None): """Check trials data available. Called by :meth:`run_qc`, this extracts the trial data if `trials_data` is None, and raises if :meth:`extract_behaviour` returns None. Parameters ---------- trials_data : dict, None A dictionary of extracted trials data or None. Returns ------- trials_data : dict A dictionary of extracted trials data. The output of :meth:`extract_behaviour`. """ if not self.extractor or trials_data is None: trials_data, _ = self.extract_behaviour(save=False) if not (trials_data and self.extractor): raise ValueError('No trials data and/or extractor found') return trials_data
[docs] class VideoTask(DynamicTask): def __init__(self, session_path, cameras, **kwargs): super().__init__(session_path, cameras=cameras, **kwargs) self.cameras = cameras self.device_collection = self.get_device_collection('cameras', kwargs.get('device_collection', 'raw_video_data')) # self.collection = self.get_task_collection(kwargs.get('collection', None))
[docs] def extract_camera(self, save=True): """Extract trials data. This is an abstract method called by `_run` and `run_qc` methods. Subclasses should return the extracted trials data and a list of output files. This method should also save the trials extractor object to the :prop:`extractor` property for use by `run_qc`. Parameters ---------- save : bool Whether to save the extracted data as ALF datasets. Returns ------- dict A dictionary of trials data. list of pathlib.Path A list of output file paths if save == true. """ return None, None
[docs] def run_qc(self, camera_data=None, update=True): """Run camera QC. Subclass method should return the QC object. This just validates the trials_data is not None. Parameters ---------- camera_data : dict A dictionary of extracted trials data. The output of :meth:`extract_behaviour`. update : bool If true, update Alyx with the QC outcome. Returns ------- ibllib.qc.task_metrics.TaskQC A TaskQC object replete with task data and computed metrics. """ self._assert_trials_data(camera_data) return None
[docs] class AudioTask(DynamicTask): def __init__(self, session_path, **kwargs): super().__init__(session_path, **kwargs) self.device_collection = self.get_device_collection('microphone', kwargs.get('device_collection', 'raw_behavior_data'))
[docs] class EphysTask(DynamicTask): def __init__(self, session_path, **kwargs): super().__init__(session_path, **kwargs) self.pname = self.get_pname(kwargs.get('pname', None)) self.nshanks, self.pextra = self.get_nshanks(kwargs.get('nshanks', None)) self.device_collection = self.get_device_collection('neuropixel', kwargs.get('device_collection', 'raw_ephys_data'))
[docs] def get_pname(self, pname): # pname can be a list or a string pname = self.kwargs.get('pname', pname) return pname
[docs] def get_nshanks(self, nshanks=None): nshanks = self.kwargs.get('nshanks', nshanks) if nshanks is not None: pextra = [chr(97 + int(shank)) for shank in range(nshanks)] else: pextra = [] return nshanks, pextra
[docs] class WidefieldTask(DynamicTask): def __init__(self, session_path, **kwargs): super().__init__(session_path, **kwargs) self.device_collection = self.get_device_collection('widefield', kwargs.get('device_collection', 'raw_widefield_data'))
[docs] class MesoscopeTask(DynamicTask): def __init__(self, session_path, **kwargs): super().__init__(session_path, **kwargs) self.device_collection = self.get_device_collection( 'mesoscope', kwargs.get('device_collection', 'raw_imaging_data_[0-9]*'))
[docs] def get_signatures(self, **kwargs): """ From the template signature of the task, create the exact list of inputs and outputs to expect based on the available device collection folders Necessary because we don't know in advance how many device collection folders ("imaging bouts") to expect """ self.session_path = Path(self.session_path) # Glob for all device collection (raw imaging data) folders raw_imaging_folders = [p.name for p in self.session_path.glob(self.device_collection)] super().get_signatures(**kwargs) # Set inputs and outputs # For all inputs and outputs that are part of the device collection, expand to one file per folder # All others keep unchanged self.input_files = [update_collections(x, raw_imaging_folders, self.device_collection) for x in self.input_files] self.output_files = [update_collections(x, raw_imaging_folders, self.device_collection) for x in self.output_files]
[docs] def load_sync(self): """ Load the sync and channel map. This method may be expanded to support other raw DAQ data formats. Returns ------- one.alf.io.AlfBunch A dictionary with keys ('times', 'polarities', 'channels'), containing the sync pulses and the corresponding channel numbers. dict A map of channel names and their corresponding indices. """ alf_path = self.session_path / self.sync_collection if self.get_sync_namespace() == 'timeline': # Load the sync and channel map from the raw DAQ data sync, chmap = load_timeline_sync_and_chmap(alf_path) else: raise NotImplementedError return sync, chmap
[docs] class RegisterRawDataTask(DynamicTask): """ Base register raw task. To rename files 1. input and output must have the same length 2. output files must have full filename """ priority = 100 job_size = 'small'
[docs] def rename_files(self, symlink_old=False): # If either no inputs or no outputs are given, we don't do any renaming if not all(map(len, (self.input_files, self.output_files))): return # Otherwise we need to make sure there is one to one correspondence for renaming files assert len(self.input_files) == len(self.output_files) for before, after in zip(self.input_files, self.output_files): ok, old_paths, missing = before.find_files(self.session_path) if not old_paths: if ok: # if the file doesn't exist and it is not required we are okay to continue continue else: raise FileNotFoundError(f'file(s) {", ".join(missing)} not found') new_paths = list(map(self.session_path.joinpath, ensure_list(after.glob_pattern))) assert len(old_paths) == len(new_paths) for old_path, new_path in zip(old_paths, new_paths): if old_path == new_path: continue new_path.parent.mkdir(parents=True, exist_ok=True) _logger.debug('%s -> %s', old_path.relative_to(self.session_path), new_path.relative_to(self.session_path)) old_path.replace(new_path) if symlink_old: old_path.symlink_to(new_path)
@staticmethod def _is_animated_gif(snapshot: Path) -> bool: """ Test if image is an animated GIF file. Parameters ---------- snapshot : pathlib.Path An image filepath to test. Returns ------- bool True if image is an animated GIF. Notes ----- This could be achieved more succinctly with `from PIL import Image; Image.open(snapshot).is_animated`, however despite being an indirect dependency, the Pillow library is not in the requirements, whereas skimage is. """ return snapshot.suffix == '.gif' and len(ImageCollection(str(snapshot))) > 1 @staticmethod def _save_as_png(snapshot: Path) -> Path: """ Save an image to PNG format. Parameters ---------- snapshot : pathlib.Path An image filepath to convert. Returns ------- pathlib.Path The new PNG image filepath. """ img = imread(snapshot, as_gray=True) matplotlib.image.imsave(snapshot.with_suffix('.png'), img, cmap='gray') return snapshot.with_suffix('.png')
[docs] def register_snapshots(self, unlink=False, collection=None): """ Register any photos in the snapshots folder to the session. Typically imaging users will take numerous photos for reference. Supported extensions: .jpg, .jpeg, .png, .tif, .tiff If a .txt file with the same name exists in the same location, the contents will be added to the note text. Parameters ---------- unlink : bool If true, files are deleted after upload. collection : str, list, optional Location of 'snapshots' folder relative to the session path. If None, uses 'device_collection' attribute (if exists) or root session path. Returns ------- list of dict The newly registered Alyx notes. Notes ----- - Animated GIF files are not resized and therefore may take up significant space on the database. - TIFF files are converted to PNG format before upload. The original file is not replaced. - JPEG and PNG files are resized by Alyx. """ collection = getattr(self, 'device_collection', None) if collection is None else collection collection = collection or '' # If not defined, use no collection if collection and '*' in collection: collection = [p.name for p in self.session_path.glob(collection)] # Check whether folders on disk contain '*'; this is to stop an infinite recursion assert not any('*' in c for c in collection), 'folders containing asterisks not supported' # If more than one collection exists, register snapshots in each collection if collection and not isinstance(collection, str): return flatten(filter(None, [self.register_snapshots(unlink, c) for c in collection])) snapshots_path = self.session_path.joinpath(*filter(None, (collection, 'snapshots'))) if not snapshots_path.exists(): return eid = self.one.path2eid(self.session_path, query_type='remote') if not eid: _logger.warning('Failed to upload snapshots: session not found on Alyx') return note = dict(user=self.one.alyx.user, content_type='session', object_id=eid, text='') notes = [] exts = ('.jpg', '.jpeg', '.png', '.tif', '.tiff', '.gif') for snapshot in filter(lambda x: x.suffix.lower() in exts, snapshots_path.glob('*.*')): if snapshot.suffix in ('.tif', '.tiff') and not snapshot.with_suffix('.png').exists(): _logger.debug('converting "%s" to png...', snapshot.relative_to(self.session_path)) snapshot = self._save_as_png(snapshot_tif := snapshot) if unlink: snapshot_tif.unlink() _logger.info('Uploading "%s"...', snapshot.relative_to(self.session_path)) if snapshot.with_suffix('.txt').exists(): with open(snapshot.with_suffix('.txt'), 'r') as txt_file: note['text'] = txt_file.read().strip() else: note['text'] = '' note['width'] = 'orig' if self._is_animated_gif(snapshot) else None with open(snapshot, 'rb') as img_file: files = {'image': img_file} notes.append(self.one.alyx.rest('notes', 'create', data=note, files=files)) if unlink: snapshot.unlink() # If nothing else in the snapshots folder, delete the folder if unlink and next(snapshots_path.rglob('*'), None) is None: snapshots_path.rmdir() _logger.info('%i snapshots uploaded to Alyx', len(notes)) return notes
def _run(self, **kwargs): self.rename_files(**kwargs) if not self.output_files: return [] # FIXME Can be done with Task.assert_expected_outputs ok, out_files, missing = map(flatten, zip(*map(lambda x: x.find_files(self.session_path), self.output_files))) if not ok: _logger.error('The following expected are missing: %s', ', '.join(missing)) self.status = -1 return out_files
[docs] class ExperimentDescriptionRegisterRaw(RegisterRawDataTask): """dict of list: custom sign off keys corresponding to specific devices""" sign_off_categories = SIGN_OFF_CATEGORIES @property def signature(self): signature = { 'input_files': [], 'output_files': [('*experiment.description.yaml', '', True)] } return signature def _run(self, **kwargs): # Register experiment description file out_files = super(ExperimentDescriptionRegisterRaw, self)._run(**kwargs) if not self.one.offline and self.status == 0: with no_cache(self.one.alyx): # Ensure we don't load the cached JSON response eid = self.one.path2eid(self.session_path, query_type='remote') exp_dec = sess_params.read_params(out_files[0]) data = sign_off_dict(exp_dec, sign_off_categories=self.sign_off_categories) self.one.alyx.json_field_update('sessions', eid, data=data) return out_files