Spaces:

wvsu-dti-aidev-team
/

customer_churn_model_builder

Sleeping

App Files Files Community

customer_churn_model_builder / app.py

louiecerv

sync with remote

c1900ae 7 months ago

raw

history blame contribute delete

2.94 kB

	import streamlit as st
	import pandas as pd
	import numpy as np
	import matplotlib.pyplot as plt
	import seaborn as sns
	import pickle
	import os

	from sklearn.model_selection import train_test_split
	from sklearn.linear_model import LogisticRegression
	from sklearn.preprocessing import LabelEncoder, StandardScaler
	from sklearn.metrics import confusion_matrix, classification_report

	# Load the dataset
	df = pd.read_csv('WA_Fn-UseC_-Telco-Customer-Churn.csv')

	# Display the dataset
	st.write("## Telco Customer Churn Dataset")
	st.write(df)

	# Preprocess the data
	df = df.drop(columns=['customerID'])
	df['TotalCharges'] = pd.to_numeric(df['TotalCharges'], errors='coerce')
	df = df.dropna()

	# Encode categorical features
	label_encoders = {}
	for column in df.select_dtypes(include=['object']).columns:
	le = LabelEncoder()
	df[column] = le.fit_transform(df[column])
	label_encoders[column] = le

	# Split the data into features and target
	X = df.drop(columns=['Churn'])
	y = df['Churn']

	# Scale the features
	scaler = StandardScaler()
	X_scaled = scaler.fit_transform(X)

	# Split the data into training and testing sets
	X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

	# Train the logistic regression model with increased iterations and a different solver
	model = LogisticRegression(max_iter=5000, solver='saga') # 'saga' works well for large datasets
	model.fit(X_train, y_train)

	# Save the trained model to a pickle file
	with open('customer_churn_logres_model.pkl', 'wb') as f:
	pickle.dump(model, f)

	# Plot feature importance
	importance = model.coef_[0]
	feature_importance = pd.Series(importance, index=X.columns).sort_values(ascending=False)

	st.write("## Feature Importance")
	fig, ax = plt.subplots()
	feature_importance.plot(kind='bar', ax=ax)
	st.pyplot(fig)

	# Model evaluation
	y_pred = model.predict(X_test)

	# Confusion matrix
	st.write("## Confusion Matrix")
	cm = confusion_matrix(y_test, y_pred)
	fig, ax = plt.subplots()
	sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=['No Churn', 'Churn'], yticklabels=['No Churn', 'Churn'])
	ax.set_xlabel('Predicted Label')
	ax.set_ylabel('True Label')
	ax.set_title('Confusion Matrix')
	st.pyplot(fig)

	# Classification report
	st.write("## Classification Report")
	st.text(classification_report(y_test, y_pred))

	# Upload the trained model to Hugging Face when the button is clicked
	if st.button('Upload Model to Hugging Face'):
	hf_token = os.getenv("HF_TOKEN")
	if hf_token:
	from huggingface_hub import HfApi

	api = HfApi()
	api.upload_file(
	path_or_fileobj='customer_churn_logres_model.pkl',
	path_in_repo='customer_churn_logres_model.pkl',
	repo_id='wvsu-dti-aidev-team/customer_churn_logres_model',
	token=hf_token,
	)
	st.success("Model uploaded successfully!")
	else:
	st.error("HF_TOKEN environment variable not set.")