Source code for heamy.utils.main

# coding:utf-8
import os
import shutil
from collections import defaultdict
from itertools import combinations

import numpy as np
import pandas as pd
from six.moves import range

try:
    import tqdm as tqdm
except:
    pass


[docs]def report_score(scores, metric=None):
    if metric is not None:
        print('Metric: %s' % metric.__name__)
    if len(scores) == 1:
        print('Accuracy: %s' % scores[0])
    else:
        print('Folds accuracy: %s' % scores)
        print('Mean accuracy: %s' % np.mean(scores))
        print('Standard Deviation: %s' % np.std(scores))
        print('Variance: %s' % np.var(scores))


[docs]def tsplit(df, shape):
    """Split array into two parts."""
    if isinstance(df, (pd.DataFrame, pd.Series)):
        return df.iloc[0:shape], df.iloc[shape:]
    else:
        return df[0:shape], df[shape:]


[docs]def concat(x, y, axis=0):
    """Concatenate a sequence of pandas or numpy objects into one entity."""
    if all([isinstance(df, (pd.DataFrame, pd.Series)) for df in [x, y]]):
        return pd.concat([x, y], axis=axis)
    else:
        if axis == 0:
            return np.concatenate([x, y])
        else:
            return np.column_stack([x, y])


[docs]def reshape_1d(df):
    """If parameter is 1D row vector then convert it into 2D matrix."""
    shape = df.shape
    if len(shape) == 1:
        return df.reshape(shape[0], 1)
    else:
        return df


[docs]def idx(df, index):
    """Universal indexing for numpy and pandas objects."""
    if isinstance(df, (pd.DataFrame, pd.Series)):
        return df.iloc[index]
    else:
        return df[index, :]


[docs]def generate_columns(df, name):
    if len(df.shape) == 1:
        col_count = 1
    else:
        col_count = df.shape[1]
    if col_count == 1:
        return [name]
    else:
        return ['%s_%s' % (name, i) for i in range(col_count)]


[docs]def group_models(models, params):
    y_preds_grouped = defaultdict(list)
    y_true_grouped = {}
    for model in models:
        y_true_list, y_pred_list = model.validate(**params)
        for i, (y_true, y_pred) in enumerate(zip(y_true_list, y_pred_list)):
            if i not in y_true_grouped:
                y_true_grouped[i] = y_true
            y_preds_grouped[i].append(y_pred)
    return y_preds_grouped, y_true_grouped


[docs]def feature_combiner(df):
    combs = list(combinations(df.columns, 2))
    for i, j in combs:
        column_name = '%s-%s' % (i, j)
        df[column_name] = df[i] - df[j]
    return df


[docs]def flush_cache(cache_dir = '.cache/heamy/'):
    if os.path.exists(cache_dir):
        shutil.rmtree(cache_dir)


[docs]def xgb_progressbar(rounds=1000):
    """Progressbar for xgboost using tqdm library.

    Examples
    --------

    >>> model = xgb.train(params, X_train, 1000, callbacks=[xgb_progress(1000), ])
    """
    pbar = tqdm(total=rounds)

    def callback(_, ):
        pbar.update(1)

    return callback