Spaces:
Sleeping
Sleeping
import pandas as pd | |
from src.utils.helper_functions import save_parquet, load_parquet | |
from config import Config | |
config = vars(Config) | |
def prepare_data( | |
dataframe, | |
data, | |
split_local_test, | |
add_datetime_features=True, | |
add_lag_features=True | |
): | |
print('Building features...') | |
if add_datetime_features: | |
dataframe = datetime_features(dataframe) | |
if add_lag_features: | |
dataframe = lag_features(dataframe, data, split_local_test) | |
return dataframe | |
def lag_features(dataframe, data, split_local_test): | |
if split_local_test: | |
backlog_cols = [col for col in data.columns if col.endswith('_backlog')] | |
lag_backlog_cols = [] | |
for col in backlog_cols: | |
for shift in range(9,13,1): | |
shift_col_name = f'{col}_shift_{shift}' | |
data.loc[:, shift_col_name] = data.groupby('product_id')[col].shift(shift) | |
lag_backlog_cols.append(shift_col_name) | |
save_parquet( | |
dataframe=data[lag_backlog_cols + ['product_id','date']], | |
path=f'{config["fold_input_directory"]}/shift_features.parquet' | |
) | |
map_data = data[lag_backlog_cols + ['product_id','date']] | |
else: | |
map_data = load_parquet(f'{config["fold_input_directory"]}/shift_features.parquet') | |
dataframe = pd.merge(dataframe, map_data, how='left', on=['product_id','date']) | |
return dataframe | |
def datetime_features(dataframe, date='date', suffix=''): | |
dataframe[f'{suffix}_month'] = dataframe[date].dt.month | |
dataframe[f'{suffix}_year'] = dataframe[date].dt.year | |
dataframe[f'{suffix}_quarter'] = dataframe[date].dt.quarter | |
dataframe[f'{suffix}_weekofyear'] = dataframe[date].dt.isocalendar().week | |
return dataframe |