louiecerv's picture
sync with remote
c1900ae
import streamlit as st
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pickle
import os
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import confusion_matrix, classification_report
# Load the dataset
df = pd.read_csv('WA_Fn-UseC_-Telco-Customer-Churn.csv')
# Display the dataset
st.write("## Telco Customer Churn Dataset")
st.write(df)
# Preprocess the data
df = df.drop(columns=['customerID'])
df['TotalCharges'] = pd.to_numeric(df['TotalCharges'], errors='coerce')
df = df.dropna()
# Encode categorical features
label_encoders = {}
for column in df.select_dtypes(include=['object']).columns:
le = LabelEncoder()
df[column] = le.fit_transform(df[column])
label_encoders[column] = le
# Split the data into features and target
X = df.drop(columns=['Churn'])
y = df['Churn']
# Scale the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)
# Train the logistic regression model with increased iterations and a different solver
model = LogisticRegression(max_iter=5000, solver='saga') # 'saga' works well for large datasets
model.fit(X_train, y_train)
# Save the trained model to a pickle file
with open('customer_churn_logres_model.pkl', 'wb') as f:
pickle.dump(model, f)
# Plot feature importance
importance = model.coef_[0]
feature_importance = pd.Series(importance, index=X.columns).sort_values(ascending=False)
st.write("## Feature Importance")
fig, ax = plt.subplots()
feature_importance.plot(kind='bar', ax=ax)
st.pyplot(fig)
# Model evaluation
y_pred = model.predict(X_test)
# Confusion matrix
st.write("## Confusion Matrix")
cm = confusion_matrix(y_test, y_pred)
fig, ax = plt.subplots()
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=['No Churn', 'Churn'], yticklabels=['No Churn', 'Churn'])
ax.set_xlabel('Predicted Label')
ax.set_ylabel('True Label')
ax.set_title('Confusion Matrix')
st.pyplot(fig)
# Classification report
st.write("## Classification Report")
st.text(classification_report(y_test, y_pred))
# Upload the trained model to Hugging Face when the button is clicked
if st.button('Upload Model to Hugging Face'):
hf_token = os.getenv("HF_TOKEN")
if hf_token:
from huggingface_hub import HfApi
api = HfApi()
api.upload_file(
path_or_fileobj='customer_churn_logres_model.pkl',
path_in_repo='customer_churn_logres_model.pkl',
repo_id='wvsu-dti-aidev-team/customer_churn_logres_model',
token=hf_token,
)
st.success("Model uploaded successfully!")
else:
st.error("HF_TOKEN environment variable not set.")