File size: 2,942 Bytes
c1900ae |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 |
import streamlit as st
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pickle
import os
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import confusion_matrix, classification_report
# Load the dataset
df = pd.read_csv('WA_Fn-UseC_-Telco-Customer-Churn.csv')
# Display the dataset
st.write("## Telco Customer Churn Dataset")
st.write(df)
# Preprocess the data
df = df.drop(columns=['customerID'])
df['TotalCharges'] = pd.to_numeric(df['TotalCharges'], errors='coerce')
df = df.dropna()
# Encode categorical features
label_encoders = {}
for column in df.select_dtypes(include=['object']).columns:
le = LabelEncoder()
df[column] = le.fit_transform(df[column])
label_encoders[column] = le
# Split the data into features and target
X = df.drop(columns=['Churn'])
y = df['Churn']
# Scale the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)
# Train the logistic regression model with increased iterations and a different solver
model = LogisticRegression(max_iter=5000, solver='saga') # 'saga' works well for large datasets
model.fit(X_train, y_train)
# Save the trained model to a pickle file
with open('customer_churn_logres_model.pkl', 'wb') as f:
pickle.dump(model, f)
# Plot feature importance
importance = model.coef_[0]
feature_importance = pd.Series(importance, index=X.columns).sort_values(ascending=False)
st.write("## Feature Importance")
fig, ax = plt.subplots()
feature_importance.plot(kind='bar', ax=ax)
st.pyplot(fig)
# Model evaluation
y_pred = model.predict(X_test)
# Confusion matrix
st.write("## Confusion Matrix")
cm = confusion_matrix(y_test, y_pred)
fig, ax = plt.subplots()
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=['No Churn', 'Churn'], yticklabels=['No Churn', 'Churn'])
ax.set_xlabel('Predicted Label')
ax.set_ylabel('True Label')
ax.set_title('Confusion Matrix')
st.pyplot(fig)
# Classification report
st.write("## Classification Report")
st.text(classification_report(y_test, y_pred))
# Upload the trained model to Hugging Face when the button is clicked
if st.button('Upload Model to Hugging Face'):
hf_token = os.getenv("HF_TOKEN")
if hf_token:
from huggingface_hub import HfApi
api = HfApi()
api.upload_file(
path_or_fileobj='customer_churn_logres_model.pkl',
path_in_repo='customer_churn_logres_model.pkl',
repo_id='wvsu-dti-aidev-team/customer_churn_logres_model',
token=hf_token,
)
st.success("Model uploaded successfully!")
else:
st.error("HF_TOKEN environment variable not set.")
|