File size: 622 Bytes
98b0379
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
import pandas as pd
import numpy as np

def preprocess_data(data):
    if 'CustID' in data.columns:
        data = data.drop(columns=['CustID'])
    if 'Channel' in data.columns:
        data = data.drop(columns=['Channel'])
    if 'Region' in data.columns:
        data = data.drop(columns=['Region'])
    

    data = remove_outliers(data)
    return data

def remove_outliers(df, threshold=3):
    df_numeric = df.select_dtypes(include=[float, int])
    z_scores = np.abs((df_numeric - df_numeric.mean()) / df_numeric.std())
    df_clean = df[(z_scores < threshold).all(axis=1)]
    return df_clean