alpertml's picture
Upload 26 files
43e3ffb verified
import pandas as pd
from src.utils.helper_functions import save_parquet, load_parquet
from config import Config
config = vars(Config)
def prepare_data(
dataframe,
data,
split_local_test,
add_datetime_features=True,
add_lag_features=True
):
print('Building features...')
if add_datetime_features:
dataframe = datetime_features(dataframe)
if add_lag_features:
dataframe = lag_features(dataframe, data, split_local_test)
return dataframe
def lag_features(dataframe, data, split_local_test):
if split_local_test:
backlog_cols = [col for col in data.columns if col.endswith('_backlog')]
lag_backlog_cols = []
for col in backlog_cols:
for shift in range(9,13,1):
shift_col_name = f'{col}_shift_{shift}'
data.loc[:, shift_col_name] = data.groupby('product_id')[col].shift(shift)
lag_backlog_cols.append(shift_col_name)
save_parquet(
dataframe=data[lag_backlog_cols + ['product_id','date']],
path=f'{config["fold_input_directory"]}/shift_features.parquet'
)
map_data = data[lag_backlog_cols + ['product_id','date']]
else:
map_data = load_parquet(f'{config["fold_input_directory"]}/shift_features.parquet')
dataframe = pd.merge(dataframe, map_data, how='left', on=['product_id','date'])
return dataframe
def datetime_features(dataframe, date='date', suffix=''):
dataframe[f'{suffix}_month'] = dataframe[date].dt.month
dataframe[f'{suffix}_year'] = dataframe[date].dt.year
dataframe[f'{suffix}_quarter'] = dataframe[date].dt.quarter
dataframe[f'{suffix}_weekofyear'] = dataframe[date].dt.isocalendar().week
return dataframe