Source code for iblutil.io.parquet

import uuid
import json
import logging

import numpy as np
import pyarrow.parquet as pq
import pyarrow as pa
import pandas as pd


_logger = logging.getLogger('ibllib')


[docs] def load(filename): """ Loads parquet file into pandas dataframe :param filename: :return: """ table = pq.read_table(filename) try: metadata = json.loads(table.schema.metadata[b'one_metadata']) except KeyError: _logger.debug("No parquet metadata in %s" % filename) metadata = {} df = table.to_pandas() return df, metadata
[docs] def save(filename, table, metadata=None): """ Save pandas dataframe to parquet :param filename: :param table: :param metadata: :return: """ # cf https://towardsdatascience.com/saving-metadata-with-dataframes-71f51f558d8e # from dataframe to parquet table = pa.Table.from_pandas(table) # Add user metadata table = table.replace_schema_metadata({ 'one_metadata': json.dumps(metadata or {}).encode(), **table.schema.metadata }) # Save to parquet. pq.write_table(table, filename)
[docs] def uuid2np(eids_uuid): return np.asfortranarray( np.array([np.frombuffer(eid.bytes, dtype=np.int64) for eid in eids_uuid]))
[docs] def str2np(eids_str): """ Converts uuid string or list of uuid strings to int64 numpy array with 2 cols Returns [0, 0] for None list entries """ if isinstance(eids_str, str): eids_str = [eids_str] return uuid2np([uuid.UUID(eid) if eid else uuid.UUID('0' * 32) for eid in eids_str])
[docs] def np2uuid(eids_np): if isinstance(eids_np, pd.DataFrame) | isinstance(eids_np, pd.Series): eids_np = eids_np.to_numpy() if eids_np.ndim >= 2: return [uuid.UUID(bytes=npu.tobytes()) for npu in eids_np] else: return uuid.UUID(bytes=eids_np.tobytes())
[docs] def np2str(eids_np): eids = np2uuid(eids_np) eids = str(eids) if isinstance(eids, uuid.UUID) else [str(u) for u in np2uuid(eids_np)] return eids
[docs] def is_np_id(id): """ The purpose of this is to correctly identify ids even as object arrays :param id: :return: """ # TODO Document and test id = np.asarray(id) is_int = id.dtype == int or np.all(isinstance(x, int) for x in id) return id.shape[1] == 2 and is_int