Source code for heamy.cache

import hashlib

import numpy as np
import os
import json
import pandas as pd


[docs]class Cache(object): def __init__(self, hashval, prefix='', cache_dir='.cache/heamy/'): if prefix != '': hashval = '%s%s' % (prefix, hashval) self._hash = hashval self._cache_dir = cache_dir self._hash_dir = os.path.join(cache_dir, hashval)
[docs] def store(self, key, data): """Takes an array and stores it in the cache.""" if not os.path.exists(self._hash_dir): os.makedirs(self._hash_dir) if isinstance(data, pd.DataFrame): columns = data.columns.tolist() np.save(os.path.join(self._hash_dir, key), data.values) json.dump(columns, open(os.path.join(self._hash_dir, '%s.json' % key), 'w')) else: np.save(os.path.join(self._hash_dir, key), data)
[docs] def retrieve(self, key): """Retrieves a cached array if possible.""" column_file = os.path.join(self._hash_dir, '%s.json' % key) cache_file = os.path.join(self._hash_dir, '%s.npy' % key) if os.path.exists(cache_file): data = np.load(cache_file) if os.path.exists(column_file): with open(column_file, 'r') as json_file: columns = json.load(json_file) data = pd.DataFrame(data, columns=columns) else: return None return data
@property def available(self): return os.path.exists(os.path.join(self._cache_dir, self._hash))
[docs]def numpy_buffer(ndarray): """Creates a buffer from c_contiguous numpy ndarray.""" # Credits to: https://github.com/joblib/joblib/blob/04b001861e1dd03a857b7b419c336de64e05714c/joblib/hashing.py if isinstance(ndarray, (pd.Series, pd.DataFrame)): ndarray = ndarray.values if ndarray.flags.c_contiguous: obj_c_contiguous = ndarray elif ndarray.flags.f_contiguous: obj_c_contiguous = ndarray.T else: obj_c_contiguous = ndarray.flatten() obj_c_contiguous = obj_c_contiguous.view(np.uint8) if hasattr(np, 'getbuffer'): return np.getbuffer(obj_c_contiguous) else: return memoryview(obj_c_contiguous)
[docs]def np_hash(x): m = hashlib.new('md5') m.update(numpy_buffer(x)) return m.hexdigest()