File size: 622 Bytes
98b0379 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 |
import pandas as pd
import numpy as np
def preprocess_data(data):
if 'CustID' in data.columns:
data = data.drop(columns=['CustID'])
if 'Channel' in data.columns:
data = data.drop(columns=['Channel'])
if 'Region' in data.columns:
data = data.drop(columns=['Region'])
data = remove_outliers(data)
return data
def remove_outliers(df, threshold=3):
df_numeric = df.select_dtypes(include=[float, int])
z_scores = np.abs((df_numeric - df_numeric.mean()) / df_numeric.std())
df_clean = df[(z_scores < threshold).all(axis=1)]
return df_clean
|