Customer-Segmentation / data_preparation.py
simran0608's picture
Upload 9 files
98b0379 verified
raw
history blame
622 Bytes
import pandas as pd
import numpy as np
def preprocess_data(data):
if 'CustID' in data.columns:
data = data.drop(columns=['CustID'])
if 'Channel' in data.columns:
data = data.drop(columns=['Channel'])
if 'Region' in data.columns:
data = data.drop(columns=['Region'])
data = remove_outliers(data)
return data
def remove_outliers(df, threshold=3):
df_numeric = df.select_dtypes(include=[float, int])
z_scores = np.abs((df_numeric - df_numeric.mean()) / df_numeric.std())
df_clean = df[(z_scores < threshold).all(axis=1)]
return df_clean