Spaces:
Sleeping
Sleeping
Upload 16 files
Browse files- app.py +4 -26
- config.py +24 -1
- pipeline.py +53 -0
- src/data/__pycache__/load_data.cpython-311.pyc +0 -0
- src/data/__pycache__/preprocess.cpython-311.pyc +0 -0
- src/data/load_data.py +92 -0
- src/data/preprocess.py +23 -0
- src/features/__pycache__/build_features.cpython-311.pyc +0 -0
- src/features/build_features.py +20 -0
- src/models/__pycache__/evaluate_model.cpython-311.pyc +0 -0
- src/models/__pycache__/train_model.cpython-311.pyc +0 -0
- src/models/evaluate_model.py +68 -0
- src/models/train_model.py +27 -0
- src/utils/__pycache__/helper_functions.cpython-311.pyc +0 -0
- src/utils/helper_functions.py +68 -0
- src/visualization/visualize.py +0 -0
app.py
CHANGED
@@ -1,10 +1,10 @@
|
|
1 |
# external libraries
|
2 |
import streamlit as st
|
3 |
import pandas as pd
|
4 |
-
import numpy as np
|
5 |
import os
|
6 |
import datetime
|
7 |
|
|
|
8 |
from config import Config
|
9 |
|
10 |
config = vars(Config)
|
@@ -62,7 +62,7 @@ def main():
|
|
62 |
'planning_method_latest'])
|
63 |
|
64 |
st.write(f'Average demand by category "{category}"')
|
65 |
-
st.bar_chart(st.session_state["predictions_df"].
|
66 |
|
67 |
input_save = st.checkbox(config['SAVE_CHECKBOX_TEXT'])
|
68 |
confirm_params = {
|
@@ -92,38 +92,16 @@ def save(params):
|
|
92 |
|
93 |
if params['input_save']:
|
94 |
today = datetime.datetime.today().strftime("%d-%m-%Y")
|
95 |
-
st.session_state["predictions_df"].to_excel(f'{dir}/predictions_{today}.xlsx', index=False)
|
96 |
|
97 |
|
98 |
# forecasting
|
99 |
def predict(input_date):
|
100 |
|
101 |
-
data = {
|
102 |
-
'product_id': [f'P{1}' for i in range(5)],
|
103 |
-
'date':['2022-01-01','2022-02-01','2022-03-01','2022-04-01','2022-05-01'],
|
104 |
-
'demand': np.random.randint(1, 100, size=5),
|
105 |
-
'product_application': ['A','A','A','B','B']
|
106 |
-
}
|
107 |
-
|
108 |
-
data2 = {
|
109 |
-
'product_id': [f'P{2}' for i in range(5)],
|
110 |
-
'date':['2022-01-01','2022-02-01','2022-03-01','2022-04-01','2022-05-01'],
|
111 |
-
'demand': np.random.randint(1, 100, size=5),
|
112 |
-
'product_application': ['A','A','A','B','B']
|
113 |
-
}
|
114 |
-
|
115 |
-
df1 = pd.DataFrame(data)
|
116 |
-
df2 = pd.DataFrame(data2)
|
117 |
-
|
118 |
-
# Concatenate the two DataFrames vertically
|
119 |
-
combined_df = pd.concat([df1, df2], ignore_index=True)
|
120 |
-
|
121 |
-
|
122 |
forecast_start_date = input_date[0].strftime("%Y-%m-%d")
|
123 |
forecast_end_date = input_date[1].strftime("%Y-%m-%d")
|
124 |
-
print(forecast_start_date, forecast_end_date)
|
125 |
|
126 |
-
st.session_state["predictions_df"] =
|
127 |
|
128 |
if __name__ == "__main__":
|
129 |
main()
|
|
|
1 |
# external libraries
|
2 |
import streamlit as st
|
3 |
import pandas as pd
|
|
|
4 |
import os
|
5 |
import datetime
|
6 |
|
7 |
+
import pipeline
|
8 |
from config import Config
|
9 |
|
10 |
config = vars(Config)
|
|
|
62 |
'planning_method_latest'])
|
63 |
|
64 |
st.write(f'Average demand by category "{category}"')
|
65 |
+
st.bar_chart(st.session_state["predictions_df"].groupby(category)['demand'].mean())
|
66 |
|
67 |
input_save = st.checkbox(config['SAVE_CHECKBOX_TEXT'])
|
68 |
confirm_params = {
|
|
|
92 |
|
93 |
if params['input_save']:
|
94 |
today = datetime.datetime.today().strftime("%d-%m-%Y")
|
95 |
+
st.session_state["predictions_df"][['product_id','date','demand']].to_excel(f'{dir}/predictions_{today}.xlsx', index=False)
|
96 |
|
97 |
|
98 |
# forecasting
|
99 |
def predict(input_date):
|
100 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
101 |
forecast_start_date = input_date[0].strftime("%Y-%m-%d")
|
102 |
forecast_end_date = input_date[1].strftime("%Y-%m-%d")
|
|
|
103 |
|
104 |
+
st.session_state["predictions_df"] = pipeline.run(forecast_start_date, forecast_end_date)
|
105 |
|
106 |
if __name__ == "__main__":
|
107 |
main()
|
config.py
CHANGED
@@ -1,9 +1,12 @@
|
|
|
|
|
|
1 |
class Config():
|
2 |
|
3 |
def __init__(self):
|
4 |
pass
|
5 |
|
6 |
target = 'demand'
|
|
|
7 |
|
8 |
not_include_features = [
|
9 |
target,
|
@@ -18,7 +21,27 @@ class Config():
|
|
18 |
'planning_method_latest'
|
19 |
]
|
20 |
|
21 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
22 |
MAIN_TITLE = 'Infineon Product Demand Forecasting System'
|
23 |
SUB_TITLE = 'Data Analytics in Applications'
|
24 |
ICON_PATH = 'images/infineon-icon-1.png'
|
|
|
1 |
+
from sklearn.metrics import r2_score
|
2 |
+
|
3 |
class Config():
|
4 |
|
5 |
def __init__(self):
|
6 |
pass
|
7 |
|
8 |
target = 'demand'
|
9 |
+
split_local_test = False
|
10 |
|
11 |
not_include_features = [
|
12 |
target,
|
|
|
21 |
'planning_method_latest'
|
22 |
]
|
23 |
|
24 |
+
scorer = r2_score
|
25 |
+
model_type = 'CATBOOST'
|
26 |
+
fold = 5
|
27 |
+
fold_models_directory = 'models/date_models_test'
|
28 |
+
fold_input_directory = 'maps/date_models_test'
|
29 |
+
|
30 |
+
catboost_params = {
|
31 |
+
'learning_rate': 0.03,
|
32 |
+
'objective':'RMSE',
|
33 |
+
'depth': 5,
|
34 |
+
'early_stopping_rounds':200,
|
35 |
+
'iterations': 2000,
|
36 |
+
'use_best_model': True,
|
37 |
+
# 'eval_metric': CatBoostEvalMetricSMAPE(),
|
38 |
+
'eval_metric': 'R2',
|
39 |
+
'random_state': 42,
|
40 |
+
'allow_writing_files': False,
|
41 |
+
'thread_count':-1
|
42 |
+
}
|
43 |
+
|
44 |
+
# deployment
|
45 |
MAIN_TITLE = 'Infineon Product Demand Forecasting System'
|
46 |
SUB_TITLE = 'Data Analytics in Applications'
|
47 |
ICON_PATH = 'images/infineon-icon-1.png'
|
pipeline.py
ADDED
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from src.data.load_data import get_data, generate_test_data
|
2 |
+
from src.features.build_features import prepare_data
|
3 |
+
from src.data.preprocess import get_Xy
|
4 |
+
from src.utils.helper_functions import load_models, get_predictions, load_parquet
|
5 |
+
from config import Config
|
6 |
+
import numpy as np
|
7 |
+
|
8 |
+
|
9 |
+
import pandas as pd
|
10 |
+
|
11 |
+
config = vars(Config)
|
12 |
+
|
13 |
+
def run(forecast_start_date, forecast_end_date):
|
14 |
+
|
15 |
+
print('Script Executing...')
|
16 |
+
|
17 |
+
|
18 |
+
generated_test = generate_test_data(forecast_start_date,
|
19 |
+
forecast_end_date,
|
20 |
+
product_ids=load_parquet(f'{config["fold_input_directory"]}/unique_products.parquet').values)
|
21 |
+
|
22 |
+
generated_test['date'] = pd.to_datetime(generated_test['date'])
|
23 |
+
|
24 |
+
# merge the fixed columns
|
25 |
+
generated_test = pd.merge(load_parquet(f'{config["fold_input_directory"]}/fixed_columns.parquet'),
|
26 |
+
generated_test, on=['product_id'], how='right')
|
27 |
+
|
28 |
+
dataframe = prepare_data(
|
29 |
+
dataframe=pd.concat([generated_test], axis=0),
|
30 |
+
add_datetime_features=True
|
31 |
+
)
|
32 |
+
|
33 |
+
dataframe[config['target']] = np.nan
|
34 |
+
|
35 |
+
X, X_test, y = get_Xy(
|
36 |
+
dataframe=dataframe,
|
37 |
+
not_include=config['not_include_features'],
|
38 |
+
cat_features=config['cat_features'],
|
39 |
+
cat_encoding='category'
|
40 |
+
)
|
41 |
+
|
42 |
+
models = load_models(config['fold_models_directory'])
|
43 |
+
|
44 |
+
y_test_preds = get_predictions(models, X_test)
|
45 |
+
|
46 |
+
generated_test[config['target']] = y_test_preds
|
47 |
+
|
48 |
+
print('Script Done!')
|
49 |
+
|
50 |
+
return generated_test
|
51 |
+
|
52 |
+
# if __name__ == '__main__':
|
53 |
+
# run()
|
src/data/__pycache__/load_data.cpython-311.pyc
ADDED
Binary file (4.05 kB). View file
|
|
src/data/__pycache__/preprocess.cpython-311.pyc
ADDED
Binary file (1.38 kB). View file
|
|
src/data/load_data.py
ADDED
@@ -0,0 +1,92 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
import numpy as np
|
3 |
+
from src.utils.helper_functions import save_parquet
|
4 |
+
import os
|
5 |
+
from config import Config
|
6 |
+
|
7 |
+
config = vars(Config)
|
8 |
+
|
9 |
+
def get_data(
|
10 |
+
data_dir = '../data/raw/input/',
|
11 |
+
file_name = 'demand_data_IFX.csv',
|
12 |
+
date_columns = ['reporting_month_start'],
|
13 |
+
split_local_test = False,
|
14 |
+
target = 'demand',
|
15 |
+
fixed_columns = [
|
16 |
+
'product_id',
|
17 |
+
'product_application',
|
18 |
+
'product_marketing_name',
|
19 |
+
'product_main_family',
|
20 |
+
'planning_method_latest',
|
21 |
+
],
|
22 |
+
prediction_interval = ('2023-11-01', '2024-07-01')
|
23 |
+
):
|
24 |
+
|
25 |
+
print('Loading data...')
|
26 |
+
|
27 |
+
dataframe = pd.read_csv(os.path.join(data_dir, file_name), parse_dates=date_columns)
|
28 |
+
dataframe['date'] = pd.to_datetime(dataframe['reporting_month_start'].dt.date)
|
29 |
+
dataframe.sort_values(by='date', inplace=True)
|
30 |
+
|
31 |
+
if split_local_test:
|
32 |
+
train, test = split_train_test(
|
33 |
+
dataframe = dataframe
|
34 |
+
)
|
35 |
+
|
36 |
+
test_min_date, test_max_date = test.date.min(), test.date.max()
|
37 |
+
else:
|
38 |
+
train, test = dataframe, None
|
39 |
+
|
40 |
+
test_min_date, test_max_date = prediction_interval
|
41 |
+
|
42 |
+
|
43 |
+
generated_test = generate_test_data(
|
44 |
+
start_date=test_min_date,
|
45 |
+
end_date=test_max_date,
|
46 |
+
product_ids=train.product_id.unique()
|
47 |
+
)
|
48 |
+
|
49 |
+
generated_test['date'] = pd.to_datetime(generated_test['date'])
|
50 |
+
|
51 |
+
# merge the fixed columns
|
52 |
+
generated_test = pd.merge(train[fixed_columns].drop_duplicates(subset=fixed_columns), generated_test, on=['product_id'], how='right')
|
53 |
+
|
54 |
+
save_parquet(
|
55 |
+
dataframe= train[fixed_columns].drop_duplicates(subset=fixed_columns),
|
56 |
+
path=f'{config["fold_input_directory"]}/fixed_columns.parquet'
|
57 |
+
)
|
58 |
+
|
59 |
+
# merge the ground-truth
|
60 |
+
if split_local_test:
|
61 |
+
generated_test = pd.merge(test[[target,'date','product_id']], generated_test, on=['product_id','date'], how='right')
|
62 |
+
generated_test[target] = generated_test[target].fillna(0)
|
63 |
+
else:
|
64 |
+
generated_test[target] = np.nan
|
65 |
+
|
66 |
+
# generate fixed train
|
67 |
+
generated_train = train[[target,'date'] + fixed_columns]
|
68 |
+
|
69 |
+
y_test = generated_test[target]
|
70 |
+
generated_test.drop(target, axis=1, inplace=True)
|
71 |
+
|
72 |
+
return generated_train, generated_test, y_test
|
73 |
+
|
74 |
+
def split_train_test(dataframe):
|
75 |
+
|
76 |
+
train = dataframe[dataframe['date'] < pd.to_datetime('2022-11-01')]
|
77 |
+
test = dataframe[(dataframe['date'] >= pd.to_datetime('2022-11-01'))&
|
78 |
+
(dataframe['date'] <= pd.to_datetime('2023-07-01'))]
|
79 |
+
|
80 |
+
return train, test
|
81 |
+
|
82 |
+
def generate_test_data(start_date, end_date, product_ids):
|
83 |
+
|
84 |
+
# Generate a range of monthly start dates
|
85 |
+
monthly_starts = pd.date_range(start=start_date, end=end_date, freq='MS')
|
86 |
+
monthly_starts = pd.DataFrame(monthly_starts, columns=['date'])
|
87 |
+
|
88 |
+
product_ids = pd.DataFrame(product_ids, columns=['product_id'])
|
89 |
+
|
90 |
+
joined_df = product_ids.merge(monthly_starts, how='cross')
|
91 |
+
|
92 |
+
return joined_df
|
src/data/preprocess.py
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
|
3 |
+
def get_Xy(
|
4 |
+
dataframe,
|
5 |
+
not_include,
|
6 |
+
cat_features,
|
7 |
+
target='demand',
|
8 |
+
cat_encoding='category'
|
9 |
+
):
|
10 |
+
|
11 |
+
print('Preprocessing...')
|
12 |
+
|
13 |
+
tmp_df = dataframe.copy()
|
14 |
+
|
15 |
+
features = [col for col in tmp_df.columns if col not in not_include]
|
16 |
+
|
17 |
+
if cat_encoding == 'category':
|
18 |
+
tmp_df[cat_features] = tmp_df[cat_features].astype('category')
|
19 |
+
|
20 |
+
X, y = tmp_df.loc[~tmp_df[target].isnull(), features], tmp_df.loc[~tmp_df[target].isnull(), target]
|
21 |
+
X_test = tmp_df.loc[tmp_df[target].isnull(), features]
|
22 |
+
|
23 |
+
return X, X_test, y
|
src/features/__pycache__/build_features.cpython-311.pyc
ADDED
Binary file (1.12 kB). View file
|
|
src/features/build_features.py
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
def prepare_data(
|
2 |
+
dataframe,
|
3 |
+
add_datetime_features=True
|
4 |
+
):
|
5 |
+
|
6 |
+
print('Building features...')
|
7 |
+
|
8 |
+
if add_datetime_features:
|
9 |
+
dataframe = datetime_features(dataframe)
|
10 |
+
|
11 |
+
return dataframe
|
12 |
+
|
13 |
+
def datetime_features(dataframe, date='date', suffix=''):
|
14 |
+
|
15 |
+
dataframe[f'{suffix}_month'] = dataframe[date].dt.month
|
16 |
+
dataframe[f'{suffix}_year'] = dataframe[date].dt.year
|
17 |
+
dataframe[f'{suffix}_quarter'] = dataframe[date].dt.quarter
|
18 |
+
dataframe[f'{suffix}_weekofyear'] = dataframe[date].dt.isocalendar().week
|
19 |
+
|
20 |
+
return dataframe
|
src/models/__pycache__/evaluate_model.cpython-311.pyc
ADDED
Binary file (3.57 kB). View file
|
|
src/models/__pycache__/train_model.cpython-311.pyc
ADDED
Binary file (836 Bytes). View file
|
|
src/models/evaluate_model.py
ADDED
@@ -0,0 +1,68 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from src.models.train_model import train_model
|
2 |
+
from src.utils.helper_functions import post_process
|
3 |
+
import numpy as np
|
4 |
+
|
5 |
+
class MonthlyKFold:
|
6 |
+
def __init__(self, n_splits=3):
|
7 |
+
self.n_splits = n_splits
|
8 |
+
|
9 |
+
def split(self, X, y=None, groups=None):
|
10 |
+
dates = 12 * X["_year"] + X["_month"]
|
11 |
+
timesteps = sorted(dates.unique().tolist())
|
12 |
+
X = X.reset_index()
|
13 |
+
|
14 |
+
for t in timesteps[-self.n_splits:]:
|
15 |
+
idx_train = X[dates.values < t].index
|
16 |
+
idx_test = X[dates.values == t].index
|
17 |
+
|
18 |
+
yield idx_train, idx_test
|
19 |
+
|
20 |
+
def get_n_splits(self, X, y=None, groups=None):
|
21 |
+
return self.n_splits
|
22 |
+
|
23 |
+
|
24 |
+
def evaluate(
|
25 |
+
X, y,
|
26 |
+
model_params,
|
27 |
+
cat_features,
|
28 |
+
scorer,
|
29 |
+
FOLD=5,
|
30 |
+
model_type='CATBOOST'
|
31 |
+
):
|
32 |
+
|
33 |
+
print('Evaluating...')
|
34 |
+
|
35 |
+
tscv = MonthlyKFold(FOLD)
|
36 |
+
|
37 |
+
|
38 |
+
scores = []
|
39 |
+
models = []
|
40 |
+
iterations = []
|
41 |
+
test_preds = []
|
42 |
+
|
43 |
+
oof = np.zeros(len(X))
|
44 |
+
for i, (train_index, valid_index) in enumerate(tscv.split(X)):
|
45 |
+
|
46 |
+
print(f'FOLD:{i+1}')
|
47 |
+
|
48 |
+
X_train, y_train = X.iloc[train_index, :], y.iloc[train_index]
|
49 |
+
X_valid, y_valid = X.iloc[valid_index, :], y.iloc[valid_index]
|
50 |
+
|
51 |
+
model = train_model(
|
52 |
+
train=(X_train, y_train),
|
53 |
+
model_params=model_params,
|
54 |
+
model_type=model_type,
|
55 |
+
cat_features=cat_features,
|
56 |
+
valid=(X_valid, y_valid))
|
57 |
+
|
58 |
+
score = scorer(y_valid, post_process(model.predict(X_valid)))
|
59 |
+
print(f'Score:{score:.5f}')
|
60 |
+
|
61 |
+
models.append(model)
|
62 |
+
scores.append(score)
|
63 |
+
|
64 |
+
print(f"Scores:{scores}")
|
65 |
+
print(f'Mean Score:{np.mean(scores):.5f} +- {np.std(scores):.3f}')
|
66 |
+
|
67 |
+
return models, scores
|
68 |
+
|
src/models/train_model.py
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from catboost import CatBoostRegressor
|
2 |
+
|
3 |
+
def train_model(
|
4 |
+
train,
|
5 |
+
model_params,
|
6 |
+
model_type,
|
7 |
+
cat_features,
|
8 |
+
valid=None,
|
9 |
+
):
|
10 |
+
|
11 |
+
X_train, y_train = train
|
12 |
+
|
13 |
+
if model_type == 'CATBOOST':
|
14 |
+
|
15 |
+
model = CatBoostRegressor(**model_params,
|
16 |
+
cat_features=cat_features)
|
17 |
+
|
18 |
+
if valid:
|
19 |
+
X_valid, y_valid = valid
|
20 |
+
eval_set=[(X_valid,y_valid)]
|
21 |
+
|
22 |
+
model.fit(X_train,y_train,
|
23 |
+
eval_set=eval_set,
|
24 |
+
verbose=200
|
25 |
+
)
|
26 |
+
|
27 |
+
return model
|
src/utils/__pycache__/helper_functions.cpython-311.pyc
ADDED
Binary file (3.69 kB). View file
|
|
src/utils/helper_functions.py
ADDED
@@ -0,0 +1,68 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pickle
|
2 |
+
import os
|
3 |
+
import numpy as np
|
4 |
+
from datetime import datetime
|
5 |
+
import pandas as pd
|
6 |
+
|
7 |
+
def save_models(models, model_type, directory):
|
8 |
+
|
9 |
+
print('Saving models...')
|
10 |
+
|
11 |
+
for i, model in enumerate(models):
|
12 |
+
|
13 |
+
with open(f'{directory}/{model_type}_FOLD_{i+1}.pkl', 'wb') as file:
|
14 |
+
pickle.dump(model, file)
|
15 |
+
|
16 |
+
|
17 |
+
def load_models(directory):
|
18 |
+
|
19 |
+
print('Loading models...')
|
20 |
+
|
21 |
+
models = []
|
22 |
+
# List all files in the directory
|
23 |
+
files = os.listdir(directory)
|
24 |
+
|
25 |
+
pkl_files = [file for file in files if file.endswith('.pkl')]
|
26 |
+
|
27 |
+
for file in pkl_files:
|
28 |
+
with open(os.path.join(directory, file), 'rb') as file:
|
29 |
+
model = pickle.load(file)
|
30 |
+
models.append(model)
|
31 |
+
|
32 |
+
return models
|
33 |
+
|
34 |
+
def get_predictions(models, X_test):
|
35 |
+
|
36 |
+
print('Forecasting test data...')
|
37 |
+
|
38 |
+
preds = []
|
39 |
+
for model in models:
|
40 |
+
preds.append(post_process(model.predict(X_test)))
|
41 |
+
|
42 |
+
return np.mean(preds, axis=0)
|
43 |
+
|
44 |
+
def post_process(predictions):
|
45 |
+
|
46 |
+
predictions = predictions.clip(0)
|
47 |
+
|
48 |
+
return predictions
|
49 |
+
|
50 |
+
def save_results(dataframe, file_name):
|
51 |
+
|
52 |
+
print('Saving results...')
|
53 |
+
|
54 |
+
today_date = datetime.now().strftime("%Y-%m-%d")
|
55 |
+
|
56 |
+
dataframe.to_excel(f'demand_predictions/{file_name}_{today_date}.xlsx', index=False)
|
57 |
+
|
58 |
+
def save_parquet(dataframe, path):
|
59 |
+
dataframe.to_parquet(path, index=False)
|
60 |
+
|
61 |
+
def load_parquet(path):
|
62 |
+
return pd.read_parquet(path)
|
63 |
+
|
64 |
+
|
65 |
+
|
66 |
+
|
67 |
+
|
68 |
+
|
src/visualization/visualize.py
ADDED
File without changes
|