louiecerv commited on
Commit
c1900ae
·
1 Parent(s): 1afef48

sync with remote

Browse files
WA_Fn-UseC_-Telco-Customer-Churn.csv ADDED
The diff for this file is too large to render. See raw diff
 
app.py ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import numpy as np
4
+ import matplotlib.pyplot as plt
5
+ import seaborn as sns
6
+ import pickle
7
+ import os
8
+
9
+ from sklearn.model_selection import train_test_split
10
+ from sklearn.linear_model import LogisticRegression
11
+ from sklearn.preprocessing import LabelEncoder, StandardScaler
12
+ from sklearn.metrics import confusion_matrix, classification_report
13
+
14
+ # Load the dataset
15
+ df = pd.read_csv('WA_Fn-UseC_-Telco-Customer-Churn.csv')
16
+
17
+ # Display the dataset
18
+ st.write("## Telco Customer Churn Dataset")
19
+ st.write(df)
20
+
21
+ # Preprocess the data
22
+ df = df.drop(columns=['customerID'])
23
+ df['TotalCharges'] = pd.to_numeric(df['TotalCharges'], errors='coerce')
24
+ df = df.dropna()
25
+
26
+ # Encode categorical features
27
+ label_encoders = {}
28
+ for column in df.select_dtypes(include=['object']).columns:
29
+ le = LabelEncoder()
30
+ df[column] = le.fit_transform(df[column])
31
+ label_encoders[column] = le
32
+
33
+ # Split the data into features and target
34
+ X = df.drop(columns=['Churn'])
35
+ y = df['Churn']
36
+
37
+ # Scale the features
38
+ scaler = StandardScaler()
39
+ X_scaled = scaler.fit_transform(X)
40
+
41
+ # Split the data into training and testing sets
42
+ X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)
43
+
44
+ # Train the logistic regression model with increased iterations and a different solver
45
+ model = LogisticRegression(max_iter=5000, solver='saga') # 'saga' works well for large datasets
46
+ model.fit(X_train, y_train)
47
+
48
+ # Save the trained model to a pickle file
49
+ with open('customer_churn_logres_model.pkl', 'wb') as f:
50
+ pickle.dump(model, f)
51
+
52
+ # Plot feature importance
53
+ importance = model.coef_[0]
54
+ feature_importance = pd.Series(importance, index=X.columns).sort_values(ascending=False)
55
+
56
+ st.write("## Feature Importance")
57
+ fig, ax = plt.subplots()
58
+ feature_importance.plot(kind='bar', ax=ax)
59
+ st.pyplot(fig)
60
+
61
+ # Model evaluation
62
+ y_pred = model.predict(X_test)
63
+
64
+ # Confusion matrix
65
+ st.write("## Confusion Matrix")
66
+ cm = confusion_matrix(y_test, y_pred)
67
+ fig, ax = plt.subplots()
68
+ sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=['No Churn', 'Churn'], yticklabels=['No Churn', 'Churn'])
69
+ ax.set_xlabel('Predicted Label')
70
+ ax.set_ylabel('True Label')
71
+ ax.set_title('Confusion Matrix')
72
+ st.pyplot(fig)
73
+
74
+ # Classification report
75
+ st.write("## Classification Report")
76
+ st.text(classification_report(y_test, y_pred))
77
+
78
+ # Upload the trained model to Hugging Face when the button is clicked
79
+ if st.button('Upload Model to Hugging Face'):
80
+ hf_token = os.getenv("HF_TOKEN")
81
+ if hf_token:
82
+ from huggingface_hub import HfApi
83
+
84
+ api = HfApi()
85
+ api.upload_file(
86
+ path_or_fileobj='customer_churn_logres_model.pkl',
87
+ path_in_repo='customer_churn_logres_model.pkl',
88
+ repo_id='wvsu-dti-aidev-team/customer_churn_logres_model',
89
+ token=hf_token,
90
+ )
91
+ st.success("Model uploaded successfully!")
92
+ else:
93
+ st.error("HF_TOKEN environment variable not set.")
customer_churn_logres_model.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4ce5b985e0333a3568b66958257baba9c1c1d8e88563f9ece335640a80720ecb
3
+ size 867
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ streamlit
2
+ pandas
3
+ numpy
4
+ scikit-learn
5
+ matplotlib
6
+ huggingface_hub
7
+ seaborn