Spaces:

bacancydataprophets
/

Customer-Segmentation

Sleeping

Customer-Segmentation / data_preparation.py

Upload 9 files

98b0379 verified about 1 year ago

622 Bytes

	import pandas as pd
	import numpy as np

	def preprocess_data(data):
	if 'CustID' in data.columns:
	data = data.drop(columns=['CustID'])
	if 'Channel' in data.columns:
	data = data.drop(columns=['Channel'])
	if 'Region' in data.columns:
	data = data.drop(columns=['Region'])


	data = remove_outliers(data)
	return data

	def remove_outliers(df, threshold=3):
	df_numeric = df.select_dtypes(include=[float, int])
	z_scores = np.abs((df_numeric - df_numeric.mean()) / df_numeric.std())
	df_clean = df[(z_scores < threshold).all(axis=1)]
	return df_clean