Spaces:
Sleeping
Sleeping
from src.models.train_model import train_model | |
from src.utils.helper_functions import post_process | |
import numpy as np | |
class MonthlyKFold: | |
def __init__(self, n_splits=3): | |
self.n_splits = n_splits | |
def split(self, X, y=None, groups=None): | |
dates = 12 * X["_year"] + X["_month"] | |
timesteps = sorted(dates.unique().tolist()) | |
X = X.reset_index() | |
for t in timesteps[-self.n_splits:]: | |
idx_train = X[dates.values < t].index | |
idx_test = X[dates.values == t].index | |
yield idx_train, idx_test | |
def get_n_splits(self, X, y=None, groups=None): | |
return self.n_splits | |
def evaluate( | |
X, y, | |
model_params, | |
cat_features, | |
scorer, | |
FOLD=5, | |
model_type='CATBOOST' | |
): | |
print('Evaluating...') | |
tscv = MonthlyKFold(FOLD) | |
scores = [] | |
models = [] | |
iterations = [] | |
test_preds = [] | |
oof = np.zeros(len(X)) | |
for i, (train_index, valid_index) in enumerate(tscv.split(X)): | |
print(f'FOLD:{i+1}') | |
X_train, y_train = X.iloc[train_index, :], y.iloc[train_index] | |
X_valid, y_valid = X.iloc[valid_index, :], y.iloc[valid_index] | |
model = train_model( | |
train=(X_train, y_train), | |
model_params=model_params, | |
model_type=model_type, | |
cat_features=cat_features, | |
valid=(X_valid, y_valid)) | |
score = scorer(y_valid, post_process(model.predict(X_valid))) | |
print(f'Score:{score:.5f}') | |
models.append(model) | |
scores.append(score) | |
print(f"Scores:{scores}") | |
print(f'Mean Score:{np.mean(scores):.5f} +- {np.std(scores):.3f}') | |
return models, scores | |