InfineonForecastSystem / src /models /evaluate_model.py
alpertml's picture
Upload 16 files
fa10c3d verified
from src.models.train_model import train_model
from src.utils.helper_functions import post_process
import numpy as np
class MonthlyKFold:
def __init__(self, n_splits=3):
self.n_splits = n_splits
def split(self, X, y=None, groups=None):
dates = 12 * X["_year"] + X["_month"]
timesteps = sorted(dates.unique().tolist())
X = X.reset_index()
for t in timesteps[-self.n_splits:]:
idx_train = X[dates.values < t].index
idx_test = X[dates.values == t].index
yield idx_train, idx_test
def get_n_splits(self, X, y=None, groups=None):
return self.n_splits
def evaluate(
X, y,
model_params,
cat_features,
scorer,
FOLD=5,
model_type='CATBOOST'
):
print('Evaluating...')
tscv = MonthlyKFold(FOLD)
scores = []
models = []
iterations = []
test_preds = []
oof = np.zeros(len(X))
for i, (train_index, valid_index) in enumerate(tscv.split(X)):
print(f'FOLD:{i+1}')
X_train, y_train = X.iloc[train_index, :], y.iloc[train_index]
X_valid, y_valid = X.iloc[valid_index, :], y.iloc[valid_index]
model = train_model(
train=(X_train, y_train),
model_params=model_params,
model_type=model_type,
cat_features=cat_features,
valid=(X_valid, y_valid))
score = scorer(y_valid, post_process(model.predict(X_valid)))
print(f'Score:{score:.5f}')
models.append(model)
scores.append(score)
print(f"Scores:{scores}")
print(f'Mean Score:{np.mean(scores):.5f} +- {np.std(scores):.3f}')
return models, scores