Spaces:

wvsu-dti-aidev-team
/

customer_churn_model_builder

Sleeping

App Files Files Community

louiecerv commited on Feb 8

Commit

c1900ae

1 Parent(s): 1afef48

sync with remote

Browse files

Files changed (4) hide show

WA_Fn-UseC_-Telco-Customer-Churn.csv +0 -0
app.py +93 -0
customer_churn_logres_model.pkl +3 -0
requirements.txt +7 -0

WA_Fn-UseC_-Telco-Customer-Churn.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

app.py ADDED Viewed

	@@ -0,0 +1,93 @@

+import streamlit as st
+import pandas as pd
+import numpy as np
+import matplotlib.pyplot as plt
+import seaborn as sns
+import pickle
+import os
+from sklearn.model_selection import train_test_split
+from sklearn.linear_model import LogisticRegression
+from sklearn.preprocessing import LabelEncoder, StandardScaler
+from sklearn.metrics import confusion_matrix, classification_report
+# Load the dataset
+df = pd.read_csv('WA_Fn-UseC_-Telco-Customer-Churn.csv')
+# Display the dataset
+st.write("## Telco Customer Churn Dataset")
+st.write(df)
+# Preprocess the data
+df = df.drop(columns=['customerID'])
+df['TotalCharges'] = pd.to_numeric(df['TotalCharges'], errors='coerce')
+df = df.dropna()
+# Encode categorical features
+label_encoders = {}
+for column in df.select_dtypes(include=['object']).columns:
+    le = LabelEncoder()
+    df[column] = le.fit_transform(df[column])
+    label_encoders[column] = le
+# Split the data into features and target
+X = df.drop(columns=['Churn'])
+y = df['Churn']
+# Scale the features
+scaler = StandardScaler()
+X_scaled = scaler.fit_transform(X)
+# Split the data into training and testing sets
+X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)
+# Train the logistic regression model with increased iterations and a different solver
+model = LogisticRegression(max_iter=5000, solver='saga')  # 'saga' works well for large datasets
+model.fit(X_train, y_train)
+# Save the trained model to a pickle file
+with open('customer_churn_logres_model.pkl', 'wb') as f:
+    pickle.dump(model, f)
+# Plot feature importance
+importance = model.coef_[0]
+feature_importance = pd.Series(importance, index=X.columns).sort_values(ascending=False)
+st.write("## Feature Importance")
+fig, ax = plt.subplots()
+feature_importance.plot(kind='bar', ax=ax)
+st.pyplot(fig)
+# Model evaluation
+y_pred = model.predict(X_test)
+# Confusion matrix
+st.write("## Confusion Matrix")
+cm = confusion_matrix(y_test, y_pred)
+fig, ax = plt.subplots()
+sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=['No Churn', 'Churn'], yticklabels=['No Churn', 'Churn'])
+ax.set_xlabel('Predicted Label')
+ax.set_ylabel('True Label')
+ax.set_title('Confusion Matrix')
+st.pyplot(fig)
+# Classification report
+st.write("## Classification Report")
+st.text(classification_report(y_test, y_pred))
+# Upload the trained model to Hugging Face when the button is clicked
+if st.button('Upload Model to Hugging Face'):
+    hf_token = os.getenv("HF_TOKEN")
+    if hf_token:
+        from huggingface_hub import HfApi
+        api = HfApi()
+        api.upload_file(
+            path_or_fileobj='customer_churn_logres_model.pkl',
+            path_in_repo='customer_churn_logres_model.pkl',
+            repo_id='wvsu-dti-aidev-team/customer_churn_logres_model',
+            token=hf_token,
+        )
+        st.success("Model uploaded successfully!")
+    else:
+        st.error("HF_TOKEN environment variable not set.")

customer_churn_logres_model.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4ce5b985e0333a3568b66958257baba9c1c1d8e88563f9ece335640a80720ecb
+size 867

requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+streamlit
+pandas
+numpy
+scikit-learn
+matplotlib
+huggingface_hub
+seaborn