Source code for brainbox.modeling.utils

import numpy as np
import pandas as pd
from tqdm import tqdm
from numpy.matlib import repmat


[docs]def raised_cosine(duration, nbases, binfun): nbins = binfun(duration) ttb = repmat(np.arange(1, nbins + 1).reshape(-1, 1), 1, nbases) dbcenter = nbins / nbases cwidth = 4 * dbcenter bcenters = 0.5 * dbcenter + dbcenter * np.arange(0, nbases) x = ttb - repmat(bcenters.reshape(1, -1), nbins, 1) bases = (np.abs(x / cwidth) < 0.5) * (np.cos(x * np.pi * 2 / cwidth) * 0.5 + 0.5) return bases
[docs]def full_rcos(duration, nbases, binfun, n_before=1): if not isinstance(n_before, int): n_before = int(n_before) nbins = binfun(duration) ttb = repmat(np.arange(1, nbins + 1).reshape(-1, 1), 1, nbases) dbcenter = nbins / (nbases - 2) cwidth = 4 * dbcenter bcenters = 0.5 * dbcenter + dbcenter * np.arange(-n_before, nbases - n_before) x = ttb - repmat(bcenters.reshape(1, -1), nbins, 1) bases = (np.abs(x / cwidth) < 0.5) * (np.cos(x * np.pi * 2 / cwidth) * 0.5 + 0.5) return bases
[docs]def neglog(weights, x, y): xproj = x @ weights f = np.exp(xproj) nzidx = f != 0 if np.any(y[~nzidx] != 0): return np.inf return -y[nzidx].reshape(1, -1) @ xproj[nzidx] + np.sum(f)
[docs]class SequentialSelector: def __init__(self, model, n_features_to_select=None, direction='forward', scoring=None, train=None, test=None): """ Sequential feature selection for neural models Parameters ---------- model : brainbox.modeling.neural_model.NeuralModel Any class which inherits NeuralModel and has already been instantiated. n_features_to_select : int, optional Number of covariates to select. When None, will sequentially fit all parameters and store the associated scores. By default None direction : str, optional Direction of sequential selection. 'forward' indicates model will be built from 1 regressor up, while 'backward' indicates regrssors will be removed one at a time until n_features_to_select is reached or 1 regressor remains. By default 'forward' scoring : str, optional Scoring function to use. Must be a valid argument to the subclass of NeuralModel passed to SequentialSelector. By default None """ self.model = model self.design = model.design if n_features_to_select: self.n_features_to_select = int(n_features_to_select) else: self.n_features_to_select = len(self.design.covar) self.direction = direction self.scoring = scoring self.delta_scores = pd.DataFrame(index=self.model.clu_ids) self.trlabels = self.design.trlabels if train is None: self.train = np.isin(self.trlabels, self.model.traininds).flatten() else: self.train = np.isin(self.trlabels, train).flatten() if test is None: self.test = ~self.train else: self.test = np.isin(self.trlabels, test).flatten() self.features = np.array(list(self.design.covar.keys()))
[docs] def fit(self, train_idx=None, full_scores=False, progress=False): """ Fit the sequential feature selection Parameters ---------- train_idx : array-like indices of trials to use in the training set. If the model passed to the SFS instance did not already have training indices, this must be specified. If it did have indices, then this will override those. full_scores : bool, optional Whether to store the full set of submodel scores at each step. Produces additional attributes .full_scores_train_ and .full_scores_test_ progress : bool, optional Whether to show a progress bar, by default False """ if train_idx is None and self.train is None: raise ValueError('train_idx cannot be None if model used to create SFS did not have ' 'any training indices') if train_idx is not None: self.train = np.isin(self.trlabels, train_idx).flatten() self.test = ~self.train n_features = len(self.features) maskdf = pd.DataFrame(index=self.model.clu_ids, columns=self.features, dtype=bool) maskdf.loc[:, :] = False seqdf = pd.DataFrame(index=self.model.clu_ids, columns=range(self.n_features_to_select)) trainscoredf = pd.DataFrame(index=self.model.clu_ids, columns=range(self.n_features_to_select)) testscoredf = pd.DataFrame(index=self.model.clu_ids, columns=range(self.n_features_to_select)) if not 0 < self.n_features_to_select <= n_features: raise ValueError('n_features_to_select is not a valid number in the context' ' of the model.') n_iterations = (self.n_features_to_select if self.direction == 'forward' else n_features - self.n_features_to_select) if full_scores: fullindex = pd.MultiIndex.from_product([self.model.clu_ids, np.arange(n_iterations)], names=['clu_id', 'feature_iter']) fulltrain = pd.DataFrame(index=fullindex, columns=range(len(self.design.covar))) fulltest = pd.DataFrame(index=fullindex, columns=range(len(self.design.covar))) for i in tqdm(range(n_iterations), desc='step', leave=False, disable=not progress): masks_set = maskdf.groupby(self.features.tolist()).groups for current_mask in tqdm(masks_set, desc='feature subset', leave=False): cells = masks_set[current_mask] outputs = self._get_best_new_feature(current_mask, cells, full_scores) if full_scores: new_feature_idx, nf_train, nf_test, nf_fulltrain, nf_fulltest = outputs else: new_feature_idx, nf_train, nf_test = outputs for cell in cells: maskdf.at[cell, self.features[new_feature_idx.loc[cell]]] = True seqdf.loc[cell, i] = self.features[new_feature_idx.loc[cell]] trainscoredf.loc[cell, i] = nf_train.loc[cell] testscoredf.loc[cell, i] = nf_test.loc[cell] if full_scores: fulltest.loc[cell, i] = nf_fulltest.loc[cell] fulltrain.loc[cell, i] = nf_fulltrain.loc[cell] self.support_ = maskdf self.sequences_ = seqdf self.scores_test_ = testscoredf self.scores_train_ = trainscoredf if full_scores: self.full_scores_train_ = fulltrain self.full_scores_test_ = fulltest
def _get_best_new_feature(self, mask, cells, full_scores=False): """ Returns ------- maxind, trainmax, testmax, trainscores, testscores """ mask = np.array(mask) candidate_features = np.flatnonzero(~mask) cell_idxs = np.argwhere(np.isin(self.model.clu_ids, cells)).flatten() my = self.model.binnedspikes[np.ix_(self.train, cell_idxs)] my_test = self.model.binnedspikes[np.ix_(self.test, cell_idxs)] trainscores = pd.DataFrame(index=cells, columns=candidate_features, dtype=float) testscores = pd.DataFrame(index=cells, columns=candidate_features, dtype=float) for feature_idx in candidate_features: candidate_mask = mask.copy() candidate_mask[feature_idx] = True if self.direction == 'backward': candidate_mask = ~candidate_mask fitfeatures = self.features[candidate_mask] feat_idx = np.hstack([self.design.covar[feat]['dmcol_idx'] for feat in fitfeatures]) mdm = self.design[np.ix_(self.train, feat_idx)] mdm_test = self.design[np.ix_(self.test, feat_idx)] coefs, intercepts = self.model._fit(mdm, my, cells=cells) for i, cell in enumerate(cells): trainscores.at[cell, feature_idx] = self.model._scorer(coefs.loc[cell], intercepts.loc[cell], mdm, my[:, i]) testscores.at[cell, feature_idx] = self.model._scorer(coefs.loc[cell], intercepts.loc[cell], mdm_test, my_test[:, i]) maxind = trainscores.idxmax(axis=1) trainmax = trainscores.max(axis=1) # Ugly kludge to compensate for DataFrame.lookup being deprecated midx, cols = pd.factorize(maxind) testmax = pd.Series(testscores.reindex(cols, axis=1).to_numpy()[np.arange(len(testscores)), midx], index=testscores.index) if full_scores: return maxind, trainmax, testmax, trainscores, testscores else: return maxind, trainmax, testmax