File size: 2,024 Bytes
fa10c3d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
from src.models.train_model import train_model
from src.utils.helper_functions import post_process
import numpy as np

class MonthlyKFold:
    def __init__(self, n_splits=3):
        self.n_splits = n_splits
        
    def split(self, X, y=None, groups=None):
        dates = 12 * X["_year"] + X["_month"]
        timesteps = sorted(dates.unique().tolist())
        X = X.reset_index()
        
        for t in timesteps[-self.n_splits:]:
            idx_train = X[dates.values < t].index
            idx_test = X[dates.values == t].index
            
            yield idx_train, idx_test
            
    def get_n_splits(self, X, y=None, groups=None):
        return self.n_splits
    

def evaluate(

        X, y,

        model_params,

        cat_features,

        scorer,

        FOLD=5,

        model_type='CATBOOST'

):
        
        print('Evaluating...')
    
        tscv = MonthlyKFold(FOLD)


        scores = []
        models = []
        iterations = []
        test_preds = []
        
        oof = np.zeros(len(X))
        for i, (train_index, valid_index) in enumerate(tscv.split(X)):

                print(f'FOLD:{i+1}')

                X_train, y_train = X.iloc[train_index, :], y.iloc[train_index]
                X_valid, y_valid = X.iloc[valid_index, :], y.iloc[valid_index]

                model = train_model(
                        train=(X_train, y_train),
                        model_params=model_params,
                        model_type=model_type,
                        cat_features=cat_features,
                        valid=(X_valid, y_valid))
                
                score = scorer(y_valid, post_process(model.predict(X_valid)))
                print(f'Score:{score:.5f}')

                models.append(model)
                scores.append(score)

        print(f"Scores:{scores}")
        print(f'Mean Score:{np.mean(scores):.5f} +- {np.std(scores):.3f}')

        return models, scores