import pandas as pd from src.utils.helper_functions import save_parquet, load_parquet from config import Config config = vars(Config) def prepare_data( dataframe, data, split_local_test, add_datetime_features=True, add_lag_features=True ): print('Building features...') if add_datetime_features: dataframe = datetime_features(dataframe) if add_lag_features: dataframe = lag_features(dataframe, data, split_local_test) return dataframe def lag_features(dataframe, data, split_local_test): if split_local_test: backlog_cols = [col for col in data.columns if col.endswith('_backlog')] lag_backlog_cols = [] for col in backlog_cols: for shift in range(9,13,1): shift_col_name = f'{col}_shift_{shift}' data.loc[:, shift_col_name] = data.groupby('product_id')[col].shift(shift) lag_backlog_cols.append(shift_col_name) save_parquet( dataframe=data[lag_backlog_cols + ['product_id','date']], path=f'{config["fold_input_directory"]}/shift_features.parquet' ) map_data = data[lag_backlog_cols + ['product_id','date']] else: map_data = load_parquet(f'{config["fold_input_directory"]}/shift_features.parquet') dataframe = pd.merge(dataframe, map_data, how='left', on=['product_id','date']) return dataframe def datetime_features(dataframe, date='date', suffix=''): dataframe[f'{suffix}_month'] = dataframe[date].dt.month dataframe[f'{suffix}_year'] = dataframe[date].dt.year dataframe[f'{suffix}_quarter'] = dataframe[date].dt.quarter dataframe[f'{suffix}_weekofyear'] = dataframe[date].dt.isocalendar().week return dataframe