import pandas as pd
import numpy as np

def preprocess_data(data):
    if 'CustID' in data.columns:
        data = data.drop(columns=['CustID'])
    if 'Channel' in data.columns:
        data = data.drop(columns=['Channel'])
    if 'Region' in data.columns:
        data = data.drop(columns=['Region'])
    

    data = remove_outliers(data)
    return data

def remove_outliers(df, threshold=3):
    df_numeric = df.select_dtypes(include=[float, int])
    z_scores = np.abs((df_numeric - df_numeric.mean()) / df_numeric.std())
    df_clean = df[(z_scores < threshold).all(axis=1)]
    return df_clean