sync with remote
Browse files- WA_Fn-UseC_-Telco-Customer-Churn.csv +0 -0
- app.py +93 -0
- customer_churn_logres_model.pkl +3 -0
- requirements.txt +7 -0
WA_Fn-UseC_-Telco-Customer-Churn.csv
ADDED
The diff for this file is too large to render.
See raw diff
|
|
app.py
ADDED
@@ -0,0 +1,93 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import pandas as pd
|
3 |
+
import numpy as np
|
4 |
+
import matplotlib.pyplot as plt
|
5 |
+
import seaborn as sns
|
6 |
+
import pickle
|
7 |
+
import os
|
8 |
+
|
9 |
+
from sklearn.model_selection import train_test_split
|
10 |
+
from sklearn.linear_model import LogisticRegression
|
11 |
+
from sklearn.preprocessing import LabelEncoder, StandardScaler
|
12 |
+
from sklearn.metrics import confusion_matrix, classification_report
|
13 |
+
|
14 |
+
# Load the dataset
|
15 |
+
df = pd.read_csv('WA_Fn-UseC_-Telco-Customer-Churn.csv')
|
16 |
+
|
17 |
+
# Display the dataset
|
18 |
+
st.write("## Telco Customer Churn Dataset")
|
19 |
+
st.write(df)
|
20 |
+
|
21 |
+
# Preprocess the data
|
22 |
+
df = df.drop(columns=['customerID'])
|
23 |
+
df['TotalCharges'] = pd.to_numeric(df['TotalCharges'], errors='coerce')
|
24 |
+
df = df.dropna()
|
25 |
+
|
26 |
+
# Encode categorical features
|
27 |
+
label_encoders = {}
|
28 |
+
for column in df.select_dtypes(include=['object']).columns:
|
29 |
+
le = LabelEncoder()
|
30 |
+
df[column] = le.fit_transform(df[column])
|
31 |
+
label_encoders[column] = le
|
32 |
+
|
33 |
+
# Split the data into features and target
|
34 |
+
X = df.drop(columns=['Churn'])
|
35 |
+
y = df['Churn']
|
36 |
+
|
37 |
+
# Scale the features
|
38 |
+
scaler = StandardScaler()
|
39 |
+
X_scaled = scaler.fit_transform(X)
|
40 |
+
|
41 |
+
# Split the data into training and testing sets
|
42 |
+
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)
|
43 |
+
|
44 |
+
# Train the logistic regression model with increased iterations and a different solver
|
45 |
+
model = LogisticRegression(max_iter=5000, solver='saga') # 'saga' works well for large datasets
|
46 |
+
model.fit(X_train, y_train)
|
47 |
+
|
48 |
+
# Save the trained model to a pickle file
|
49 |
+
with open('customer_churn_logres_model.pkl', 'wb') as f:
|
50 |
+
pickle.dump(model, f)
|
51 |
+
|
52 |
+
# Plot feature importance
|
53 |
+
importance = model.coef_[0]
|
54 |
+
feature_importance = pd.Series(importance, index=X.columns).sort_values(ascending=False)
|
55 |
+
|
56 |
+
st.write("## Feature Importance")
|
57 |
+
fig, ax = plt.subplots()
|
58 |
+
feature_importance.plot(kind='bar', ax=ax)
|
59 |
+
st.pyplot(fig)
|
60 |
+
|
61 |
+
# Model evaluation
|
62 |
+
y_pred = model.predict(X_test)
|
63 |
+
|
64 |
+
# Confusion matrix
|
65 |
+
st.write("## Confusion Matrix")
|
66 |
+
cm = confusion_matrix(y_test, y_pred)
|
67 |
+
fig, ax = plt.subplots()
|
68 |
+
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=['No Churn', 'Churn'], yticklabels=['No Churn', 'Churn'])
|
69 |
+
ax.set_xlabel('Predicted Label')
|
70 |
+
ax.set_ylabel('True Label')
|
71 |
+
ax.set_title('Confusion Matrix')
|
72 |
+
st.pyplot(fig)
|
73 |
+
|
74 |
+
# Classification report
|
75 |
+
st.write("## Classification Report")
|
76 |
+
st.text(classification_report(y_test, y_pred))
|
77 |
+
|
78 |
+
# Upload the trained model to Hugging Face when the button is clicked
|
79 |
+
if st.button('Upload Model to Hugging Face'):
|
80 |
+
hf_token = os.getenv("HF_TOKEN")
|
81 |
+
if hf_token:
|
82 |
+
from huggingface_hub import HfApi
|
83 |
+
|
84 |
+
api = HfApi()
|
85 |
+
api.upload_file(
|
86 |
+
path_or_fileobj='customer_churn_logres_model.pkl',
|
87 |
+
path_in_repo='customer_churn_logres_model.pkl',
|
88 |
+
repo_id='wvsu-dti-aidev-team/customer_churn_logres_model',
|
89 |
+
token=hf_token,
|
90 |
+
)
|
91 |
+
st.success("Model uploaded successfully!")
|
92 |
+
else:
|
93 |
+
st.error("HF_TOKEN environment variable not set.")
|
customer_churn_logres_model.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4ce5b985e0333a3568b66958257baba9c1c1d8e88563f9ece335640a80720ecb
|
3 |
+
size 867
|
requirements.txt
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
streamlit
|
2 |
+
pandas
|
3 |
+
numpy
|
4 |
+
scikit-learn
|
5 |
+
matplotlib
|
6 |
+
huggingface_hub
|
7 |
+
seaborn
|