InfineonForecastSystem / pipeline.py
alpertml's picture
Upload 26 files
43e3ffb verified
from src.data.load_data import get_data, generate_test_data
from src.features.build_features import prepare_data
from src.data.preprocess import get_Xy
from src.utils.helper_functions import load_models, get_predictions, load_parquet
from config import Config
import numpy as np
import pandas as pd
config = vars(Config)
def run(forecast_start_date, forecast_end_date):
print('Script Executing...')
generated_test = generate_test_data(forecast_start_date,
forecast_end_date,
product_ids=load_parquet(f'{config["fold_input_directory"]}/unique_products.parquet').values)
generated_test['date'] = pd.to_datetime(generated_test['date'])
# merge the fixed columns
generated_test = pd.merge(load_parquet(f'{config["fold_input_directory"]}/fixed_columns.parquet'),
generated_test, on=['product_id'], how='right')
dataframe = prepare_data(
dataframe=pd.concat([generated_test], axis=0),
data=None,
split_local_test=config['split_local_test'],
add_datetime_features=True,
add_lag_features=True
)
dataframe[config['target']] = np.nan
X, X_test, y = get_Xy(
dataframe=dataframe,
not_include=config['not_include_features'],
cat_features=config['cat_features'],
cat_encoding='category'
)
models = load_models(config['fold_models_directory'])
y_test_preds = get_predictions(models, X_test)
generated_test[config['target']] = y_test_preds
print('Script Done!')
return generated_test
# if __name__ == '__main__':
# run()