Source code for brainbox.numerical

from typing import TypeVar, Sequence, Union, Optional, Type
import logging
import warnings

import numpy as np
from numba import jit

D = TypeVar('D', bound=np.generic)
Array = Union[np.ndarray, Sequence]

logger = logging.getLogger('ibllib')
warnings.warn('brainbox.numerical is deprecated; use iblutil.numerical',
              category=DeprecationWarning, stacklevel=2)  # iblrplate


[docs]def between_sorted(sorted_v, bounds=None):
    """
    Given a vector of sorted values, returns a boolean vector indices True when the value
    is between bounds. If multiple bounds are given, returns the equivalent OR of individual
    bounds tuple
    Especially useful for spike times
      indices = between_sorted(spike_times, [tstart, tstop])
    :param sorted_v: vector containing sorted values (won't check)
    :param bounds: minimum included value and maximum included value
        can be a list[tstart, tstop] or an array of dimension (n, 2)
    :return:
    """
    bounds = np.array(bounds)
    starts, stops = (np.take(bounds, 0, axis=-1), np.take(bounds, 1, axis=-1))
    sbounds = np.logical_and(starts <= sorted_v[-1], stops >= sorted_v[0])
    starts = starts[sbounds]
    stops = stops[sbounds]
    sel = sorted_v * 0
    sel[np.searchsorted(sorted_v, starts)] = 1
    istops = np.searchsorted(sorted_v, stops, side='right')
    sel[istops[istops < sorted_v.size]] += -1
    return np.cumsum(sel).astype(bool)


[docs]def ismember(a, b):
    """
    equivalent of np.isin but returns indices as in the matlab ismember function
    returns an array containing logical 1 (true) where the data in A is B
    also returns the location of members in b such as a[lia] == b[locb]
    :param a: 1d - array
    :param b: 1d - array
    :return: isin, locb
    """
    lia = np.isin(a, b)
    aun, _, iuainv = np.unique(a[lia], return_index=True, return_inverse=True)
    _, ibu, iau = np.intersect1d(b, aun, return_indices=True)
    locb = ibu[iuainv]
    return lia, locb


[docs]def ismember2d(a, b):
    """
    Equivalent of np.isin but returns indices as in the matlab ismember function
    returns an array containing logical 1 (true) where the data in A is B
    also returns the location of members in b such as a[lia, :] == b[locb, :]
    :param a: 2d array
    :param b: 2d array
    :return: isin, locb
    """
    amask = np.ones(a.shape[0], dtype=bool)
    ia = np.zeros(a.shape, dtype=bool)
    ib = np.zeros(a.shape, dtype=np.int32) - 1
    ina = np.zeros(a.shape[0], dtype=bool)
    bind = np.zeros(a.shape[0], dtype=np.int32) - 1
    # get a 1d ismember first for each column
    for n in np.arange(a.shape[1]):
        iaa, ibb = ismember(a[amask, n], b[:, n])
        ia[amask, n] = iaa
        ib[np.where(amask)[0][iaa], n] = ibb
    # those that have at least one mismatch are not in
    amask[~np.all(ia, axis=1)] = False
    # those that get the same index for all column do not need further testing
    ifound = np.where(amask)[0][np.sum(np.abs(np.diff(ib[amask], axis=1)), axis=1) == 0]
    ina[ifound] = True
    amask[ifound] = False
    bind[ifound] = ib[ifound, 0]
    # the remaining ones have to be check manually (almost never happens for uuids)
    for iaa in np.where(amask)[0]:
        ibb = find_first_2d(b, a[iaa, :])
        if ibb:
            ina[iaa] = True
            bind[iaa] = ibb
    return ina, bind[ina]


[docs]def intersect2d(a0, a1, assume_unique=False):
    """
    Performs intersection on multiple columns arrays a0 and a1
    :param a0:
    :param a1:
    :param assume_unique: If True, the input arrays are both assumed to be unique,
    which can speed up the calculation.
    :return: intersection
    :return: index of a0 such as intersection = a0[ia, :]
    :return: index of b0 such as intersection = b0[ib, :]
    """
    _, i0, i1 = np.intersect1d(a0[:, 0], a1[:, 0],
                               return_indices=True, assume_unique=assume_unique)
    for n in np.arange(1, a0.shape[1]):
        _, ii0, ii1 = np.intersect1d(a0[i0, n], a1[i1, n],
                                     return_indices=True, assume_unique=assume_unique)
        i0 = i0[ii0]
        i1 = i1[ii1]
    return a0[i0, :], i0, i1


[docs]@jit(nopython=True)
def find_first_2d(mat, val):
    """
    Returns first index where
    The purpose of this function is performance: uses low level numba and avoids looping
    through the full array
    :param mat: np.array
    :param val: values to search for
    :return: index or empty array
    """
    for i in np.arange(mat.shape[0]):
        if np.all(mat[i] == val):
            return i


[docs]def within_ranges(x: np.ndarray, ranges: Array, labels: Optional[Array] = None,
                  mode: str = 'vector', dtype: Type[D] = 'int8') -> np.ndarray:
    """
    Detects which points of the input vector lie within one of the ranges specified in the ranges.
    Returns an array the size of x with a 1 if the corresponding point is within a range.

    The function uses a stable sort algorithm (timsort) to find the edges within the input array.
    Edge behaviour is inclusive.

    Ranges are [(start0, stop0), (start1, stop1), etc.] or n-by-2 numpy array.
    The ranges may be optionally assigned a row in 'matrix' mode or a numerical label in 'vector'
    mode. Labels must have a length of n.  Overlapping ranges have a value that is the sum of the
    relevant range labels (ones in 'matrix' mode).

    If mode is 'vector' (default) it will give a vector, specifying the range of each point.
    If mode is 'matrix' it will give a matrix output where each range is assigned a particular row
    index with 1 if the point belongs to that range label.  Multiple ranges can be assigned to a
    particular row, e.g. [0, 0,1] would give a 2-by-N matrix with the first two ranges in the
    first row.  Points within more than one range are given a value > 1

    Parameters
    ----------
    x : array_like
        An array whose points are tested against the ranges.  multi-dimensional arrays are
        flattened to 1D
    ranges : array_like
        A list of tuples or N-by-2 array of ranges to test, where N is the number of ranges,
        i.e. [[start0, stop0],
        [start1, stop1]]
    labels : vector, list
        If mode is 'vector'; a list of integer labels to demarcate which points lie within each
        range.  In 'matrix' mode; a list of column indices (ranges can share indices).
        The number of labels should match the number of ranges.  If None, ones are used for all
        ranges.
    mode : {'matrix', 'vector'}
        The type of output to return.  If 'matrix' (default), an N-by-M matrix is returned where N
        is the size of x and M corresponds to the max index in labels, e.g. with labels=[0,1,2],
        the output matrix would have 3 columns.  If 'vector' a vector the size of x is returned.
    dtype : str, numeric or boolean type
        The data type of the returned array.  If type is bool, the labels in vector mode will be
        ignored.  Default is int8.


    Returns
    -------
    A vector of size like x where zeros indicate that the points do not lie within ranges (
    'vector' mode) or a matrix where out.shape[0] == x.size and out.shape[1] == max(labels) + 1.

    Examples
    -------
    # Assert that points in ranges are mutually exclusive
    np.all(within_ranges(x, ranges) <= 1)

    Tests
    -----
    >>> import numpy as np
    >>> within_ranges(np.arange(11), [(1, 2), (5, 8)])
    array([0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0], dtype=int8)
    >>> ranges = np.array([[1, 2], [5, 8]])
    >>> within_ranges(np.arange(10) + 1, ranges, labels=np.array([0,1]), mode='matrix')
    array([[1, 1, 0, 0, 0, 0, 0, 0, 0, 0],
           [0, 0, 0, 0, 1, 1, 1, 1, 0, 0]], dtype=int8)
    >>> within_ranges(np.arange(11), [(1,2), (5,8), (4,6)], labels=[0,1,1], mode='matrix')
    array([[0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0],
           [0, 0, 0, 0, 1, 2, 2, 1, 1, 0, 0]], dtype=int8)
    >>> within_ranges(np.arange(10) + 1, ranges, np.array([3,1]), mode='vector')
    array([3, 3, 0, 0, 1, 1, 1, 1, 0, 0], dtype=int8)
    >>> within_ranges(np.arange(11), [(1,2), (5,8), (4,6)], dtype=bool)
    array([False,  True,  True, False,  True,  True,  True,  True,  True,
           False, False])
    """
    # Flatten
    x = x.ravel()

    # Ensure ranges are numpy
    ranges = np.array(ranges)

    # Get size info
    n_points = x.size
    n_ranges = ranges.shape[0]

    if labels is None:
        # In 'matrix' mode default row index is 0
        labels = np.zeros((n_ranges,), dtype='uint32')
        if mode == 'vector':  # Otherwise default numerical label is 1
            labels += 1
    assert len(labels) >= n_ranges, 'range labels do not match number of ranges'
    n_labels = np.unique(labels).size

    # If no ranges given, short circuit function and return zeros
    if n_ranges == 0:
        return np.zeros_like(x, dtype=dtype)

    # Check end comes after start in each case
    assert np.all(np.diff(ranges, axis=1) > 0), 'ranges ends must all be greater than starts'

    # Make array containing points, starts and finishes

    # This order means it will be inclusive
    to_sort = np.concatenate((ranges[:, 0], x, ranges[:, 1]))
    # worst case O(n*log(n)) but will be better than  this as most of the array is ordered;
    # memory overhead ~n/2
    idx = np.argsort(to_sort, kind='stable')

    # Make delta array containing 1 for every start and -1 for every stop
    # with one row for each range label
    if mode == 'matrix':
        delta_shape = (n_labels, n_points + 2 * n_ranges)
        delta = np.zeros(delta_shape, dtype='int8')

        delta[labels, np.arange(n_ranges)] = 1
        delta[labels, n_points + n_ranges + np.arange(n_ranges)] = -1

        # Arrange in order
        delta_sorted = delta[:, idx]

        # Take cumulative sums
        summed = np.cumsum(delta_sorted, axis=1)

        # Reorder back to original order
        reordered = np.zeros(delta_shape, dtype=dtype)
        reordered[:, idx] = summed.reshape(delta_shape[0], -1)
        return reordered[:, np.arange(n_ranges, n_points + n_ranges)]

    elif mode == 'vector':
        delta_shape = (n_points + 2 * n_ranges,)
        r_delta = np.zeros(delta_shape, dtype='int32')
        r_delta[np.arange(n_ranges)] = labels
        r_delta[n_points + n_ranges + np.arange(n_ranges)] = -labels

        # Arrange in order
        r_delta_sorted = r_delta[idx]

        # Take cumulative sum
        r_summed = np.cumsum(r_delta_sorted)

        # Reorder back to original
        r_reordered = np.zeros_like(r_summed, dtype=dtype)
        r_reordered[idx] = r_summed

        return r_reordered[np.arange(n_ranges, n_points + n_ranges)]
    else:
        raise ValueError('unknown mode type, options are "matrix" and "vector"')