|
import streamlit as st |
|
import pandas as pd |
|
import numpy as np |
|
import matplotlib.pyplot as plt |
|
import seaborn as sns |
|
import pickle |
|
import os |
|
|
|
from sklearn.model_selection import train_test_split |
|
from sklearn.linear_model import LogisticRegression |
|
from sklearn.preprocessing import LabelEncoder, StandardScaler |
|
from sklearn.metrics import confusion_matrix, classification_report |
|
|
|
|
|
df = pd.read_csv('WA_Fn-UseC_-Telco-Customer-Churn.csv') |
|
|
|
|
|
st.write("## Telco Customer Churn Dataset") |
|
st.write(df) |
|
|
|
|
|
df = df.drop(columns=['customerID']) |
|
df['TotalCharges'] = pd.to_numeric(df['TotalCharges'], errors='coerce') |
|
df = df.dropna() |
|
|
|
|
|
label_encoders = {} |
|
for column in df.select_dtypes(include=['object']).columns: |
|
le = LabelEncoder() |
|
df[column] = le.fit_transform(df[column]) |
|
label_encoders[column] = le |
|
|
|
|
|
X = df.drop(columns=['Churn']) |
|
y = df['Churn'] |
|
|
|
|
|
scaler = StandardScaler() |
|
X_scaled = scaler.fit_transform(X) |
|
|
|
|
|
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42) |
|
|
|
|
|
model = LogisticRegression(max_iter=5000, solver='saga') |
|
model.fit(X_train, y_train) |
|
|
|
|
|
with open('customer_churn_logres_model.pkl', 'wb') as f: |
|
pickle.dump(model, f) |
|
|
|
|
|
importance = model.coef_[0] |
|
feature_importance = pd.Series(importance, index=X.columns).sort_values(ascending=False) |
|
|
|
st.write("## Feature Importance") |
|
fig, ax = plt.subplots() |
|
feature_importance.plot(kind='bar', ax=ax) |
|
st.pyplot(fig) |
|
|
|
|
|
y_pred = model.predict(X_test) |
|
|
|
|
|
st.write("## Confusion Matrix") |
|
cm = confusion_matrix(y_test, y_pred) |
|
fig, ax = plt.subplots() |
|
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=['No Churn', 'Churn'], yticklabels=['No Churn', 'Churn']) |
|
ax.set_xlabel('Predicted Label') |
|
ax.set_ylabel('True Label') |
|
ax.set_title('Confusion Matrix') |
|
st.pyplot(fig) |
|
|
|
|
|
st.write("## Classification Report") |
|
st.text(classification_report(y_test, y_pred)) |
|
|
|
|
|
if st.button('Upload Model to Hugging Face'): |
|
hf_token = os.getenv("HF_TOKEN") |
|
if hf_token: |
|
from huggingface_hub import HfApi |
|
|
|
api = HfApi() |
|
api.upload_file( |
|
path_or_fileobj='customer_churn_logres_model.pkl', |
|
path_in_repo='customer_churn_logres_model.pkl', |
|
repo_id='wvsu-dti-aidev-team/customer_churn_logres_model', |
|
token=hf_token, |
|
) |
|
st.success("Model uploaded successfully!") |
|
else: |
|
st.error("HF_TOKEN environment variable not set.") |
|
|