Source code for ibllib.io.extractors.training_audio

#!/usr/bin/env python
# -*- coding:utf-8 -*-
from pathlib import Path
import logging

import numpy as np
import scipy.signal
import scipy.ndimage
from scipy.io import wavfile


from ibldsp.utils import WindowGenerator
from ibldsp import fourier
import ibllib.io.raw_data_loaders as ioraw
from ibllib.io.extractors.training_trials import GoCueTimes


logger_ = logging.getLogger(__name__)

NS_WIN = 2 ** 18  # 2 ** np.ceil(np.log2(1 * fs))
OVERLAP = NS_WIN / 2
NS_WELCH = 512
FTONE = 5000
UNIT = 'dBFS'  # dBFS or dbSPL
READY_TONE_SPL = 85



[docs]
def detect_ready_tone(w, fs, ftone=FTONE, threshold=0.8):
    """
    Detects a transient sinusoid signal in a time-series
    :param w: audio time seried
    :param fs: sampling frequency (Hz)
    :param ftone: frequency of the tone to detect
    :param threshold: ratio of the Hilbert to the signal, between 0 and 1 (set to 0.8)
    :return:
    """
    # get envelope of DC free signal and envelope of BP signal around freq of interest
    h = np.abs(scipy.signal.hilbert(w - np.median(w)))
    fh = np.abs(scipy.signal.hilbert(fourier.bp(w, si=1 / fs, b=ftone * np.array([0.9, 0.95, 1.15, 1.1]))))
    dtect = scipy.ndimage.uniform_filter1d(fh / (h + 1e-3), int(fs * 0.1)) > threshold
    return np.where(np.diff(dtect.astype(int)) == 1)[0]

    # tone = np.sin(2 * np.pi * FTONE * np.arange(0, fs * 0.1) / fs)
    # tone = tone / np.sum(tone ** 2)
    # xc = np.abs(signal.hilbert(signal.correlate(w - np.mean(w), tone)))


def _get_conversion_factor(unit=UNIT, ready_tone_spl=READY_TONE_SPL):
    # 3 approaches here (not exclusive):
    # a- get the mic sensitivity, the preamp gain and DAC parameters and do the math
    # b- treat the whole thing as a black box and do a calibration run (cf. people at Renard's lab)
    # c- use calibrated ready tone
    # The reference of acoustic pressure is 0dBSPL @ 1kHz which is threshold of hearing (20 μPa).
    # Usual calibration is 1 Pa (94 dBSPL) at 1 kHz
    # c) here we know that the ready tone is 55dB SPL at 5kHz, assuming a flat spectrum between
    # 1 and 5 kHz, and observing the peak value on the 5k at the microphone.
    if unit == 'dBFS':
        return 1.0
    distance_to_the_mic = .155
    peak_value_observed = 60
    rms_value_observed = np.sqrt(2) / 2 * peak_value_observed
    fac = 10 ** ((ready_tone_spl - 20 * np.log10(rms_value_observed)) / 20) * distance_to_the_mic
    return fac



[docs]
def welchogram(fs, wav, nswin=NS_WIN, overlap=OVERLAP, nperseg=NS_WELCH, detect_kwargs=None):
    """
    Computes a spectrogram on a very large audio file.

    :param fs: sampling frequency (Hz)
    :param wav: wav signal (vector or memmap)
    :param nswin: n samples of the sliding window
    :param overlap: n samples of the overlap between windows
    :param nperseg: n samples for the computation of the spectrogram
    :param detect_kwargs: specified paramaters for detection
    :return: tscale, fscale, downsampled_spectrogram
    """
    ns = wav.shape[0]
    window_generator = WindowGenerator(ns=ns, nswin=nswin, overlap=overlap)
    nwin = window_generator.nwin
    fscale = fourier.fscale(nperseg, 1 / fs, one_sided=True)
    W = np.zeros((nwin, len(fscale)))
    tscale = window_generator.tscale(fs=fs)
    detect = []
    for first, last in window_generator.firstlast:
        # load the current window into memory
        w = np.float64(wav[first:last]) * _get_conversion_factor()
        # detection of ready tones
        detect_kwargs = detect_kwargs or {}
        a = [d + first for d in detect_ready_tone(w, fs, **detect_kwargs)]
        if len(a):
            detect += a
        # the last window may not allow a pwelch
        if (last - first) < nperseg:
            continue
        # compute PSD estimate for the current window
        iw = window_generator.iw
        _, W[iw, :] = scipy.signal.welch(w, fs=fs, window='hann', nperseg=nperseg, axis=-1,
                                         detrend='constant', return_onesided=True, scaling='density')
    # the onset detection may have duplicates with sliding window, average them and remove
    detect = np.sort(np.array(detect)) / fs
    ind = np.where(np.diff(detect) < 0.1)[0]
    detect[ind] = (detect[ind] + detect[ind + 1]) / 2
    detect = np.delete(detect, ind + 1)
    return tscale, fscale, W, detect




[docs]
def extract_sound(ses_path, task_collection='raw_behavior_data', device_collection='raw_behavior_data', save=True, force=False,
                  delete=False):
    """
    Simple audio features extraction for ambient sound characterization.
    From a wav file, generates several ALF files to be registered on Alyx

    :param ses_path: ALF full session path: (/mysubject001/YYYY-MM-DD/001)
    :param delete: if True, removes the wav file after processing
    :return: list of output files
    """
    ses_path = Path(ses_path)
    wav_file = ses_path.joinpath(device_collection, '_iblrig_micData.raw.wav')
    out_folder = ses_path.joinpath(device_collection)
    files_out = {'power': out_folder.joinpath('_iblmic_audioSpectrogram.power.npy'),
                 'frequencies': out_folder.joinpath('_iblmic_audioSpectrogram.frequencies.npy'),
                 'onset_times': out_folder.joinpath('_iblmic_audioOnsetGoCue.times_mic.npy'),
                 'times_microphone': out_folder.joinpath('_iblmic_audioSpectrogram.times_mic.npy'),
                 }
    if not wav_file.exists():
        logger_.warning(f"Wav file doesn't exist: {wav_file}")
        return [files_out[k] for k in files_out if files_out[k].exists()]
    # crunch the wav file
    fs, wav = wavfile.read(wav_file, mmap=False)
    if len(wav) == 0:
        status = _fix_wav_file(wav_file)
        if status != 0:
            logger_.error(f"WAV Header empty. Sox couldn't fix it, Abort. {wav_file}")
            return
        else:
            fs, wav = wavfile.read(wav_file, mmap=False)
    tscale, fscale, W, detect = welchogram(fs, wav)
    # save files
    if save:
        out_folder.mkdir(exist_ok=True)
        np.save(file=files_out['power'], arr=W.astype(np.single))
        np.save(file=files_out['frequencies'], arr=fscale[None, :].astype(np.single))
        np.save(file=files_out['onset_times'], arr=detect)
        np.save(file=files_out['times_microphone'], arr=tscale[:, None].astype(np.single))
    # for the time scale, attempt to synchronize using onset sound detection and task data
    data = ioraw.load_data(ses_path, task_collection=task_collection)
    if data is None:  # if no session data, we're done
        if delete:
            wav_file.unlink()
        return
    tgocue, _ = GoCueTimes(ses_path).extract(task_collection=task_collection, save=False, bpod_trials=data)
    ilast = min(len(tgocue), len(detect))
    dt = tgocue[:ilast] - detect[: ilast]
    # only save if dt is consistent for the whole session
    if np.std(dt) < 0.2 and save:
        files_out['times'] = out_folder / '_iblmic_audioSpectrogram.times.npy'
        tscale += np.median(dt)
        np.save(file=files_out['times'], arr=tscale[:, None].astype(np.single))
    if delete:
        wav_file.unlink()
    return [files_out[k] for k in files_out]



def _fix_wav_file(wav_file):
    import platform
    import subprocess
    status = -1
    if platform.system() != 'Linux':
        return status
    wav_file_tmp = wav_file.with_suffix('.wav_')
    wav_file.rename(wav_file_tmp)
    command2run = f'sox --ignore-length {wav_file_tmp} {wav_file}'
    process = subprocess.Popen(command2run, shell=True, stdout=subprocess.PIPE,
                               stderr=subprocess.PIPE)
    process.communicate()
    if process.returncode == 0:
        wav_file_tmp.unlink()
    else:
        wav_file_tmp.rename(wav_file)
    return process.returncode