Source code for heamy.utils.main

# coding:utf-8
import os
import shutil
from collections import defaultdict
from itertools import combinations

import numpy as np
import pandas as pd
from six.moves import range

try:
    import tqdm as tqdm
except:
    pass


[docs]def report_score(scores, metric=None): if metric is not None: print('Metric: %s' % metric.__name__) if len(scores) == 1: print('Accuracy: %s' % scores[0]) else: print('Folds accuracy: %s' % scores) print('Mean accuracy: %s' % np.mean(scores)) print('Standard Deviation: %s' % np.std(scores)) print('Variance: %s' % np.var(scores))
[docs]def tsplit(df, shape): """Split array into two parts.""" if isinstance(df, (pd.DataFrame, pd.Series)): return df.iloc[0:shape], df.iloc[shape:] else: return df[0:shape], df[shape:]
[docs]def concat(x, y, axis=0): """Concatenate a sequence of pandas or numpy objects into one entity.""" if all([isinstance(df, (pd.DataFrame, pd.Series)) for df in [x, y]]): return pd.concat([x, y], axis=axis) else: if axis == 0: return np.concatenate([x, y]) else: return np.column_stack([x, y])
[docs]def reshape_1d(df): """If parameter is 1D row vector then convert it into 2D matrix.""" shape = df.shape if len(shape) == 1: return df.reshape(shape[0], 1) else: return df
[docs]def idx(df, index): """Universal indexing for numpy and pandas objects.""" if isinstance(df, (pd.DataFrame, pd.Series)): return df.iloc[index] else: return df[index, :]
[docs]def generate_columns(df, name): if len(df.shape) == 1: col_count = 1 else: col_count = df.shape[1] if col_count == 1: return [name] else: return ['%s_%s' % (name, i) for i in range(col_count)]
[docs]def group_models(models, params): y_preds_grouped = defaultdict(list) y_true_grouped = {} for model in models: y_true_list, y_pred_list = model.validate(**params) for i, (y_true, y_pred) in enumerate(zip(y_true_list, y_pred_list)): if i not in y_true_grouped: y_true_grouped[i] = y_true y_preds_grouped[i].append(y_pred) return y_preds_grouped, y_true_grouped
[docs]def feature_combiner(df): combs = list(combinations(df.columns, 2)) for i, j in combs: column_name = '%s-%s' % (i, j) df[column_name] = df[i] - df[j] return df
[docs]def flush_cache(cache_dir = '.cache/heamy/'): if os.path.exists(cache_dir): shutil.rmtree(cache_dir)
[docs]def xgb_progressbar(rounds=1000): """Progressbar for xgboost using tqdm library. Examples -------- >>> model = xgb.train(params, X_train, 1000, callbacks=[xgb_progress(1000), ]) """ pbar = tqdm(total=rounds) def callback(_, ): pbar.update(1) return callback