import streamlit as st import pandas as pd import numpy as np import matplotlib.pyplot as plt import seaborn as sns import pickle import os from sklearn.model_selection import train_test_split from sklearn.linear_model import LogisticRegression from sklearn.preprocessing import LabelEncoder, StandardScaler from sklearn.metrics import confusion_matrix, classification_report # Load the dataset df = pd.read_csv('WA_Fn-UseC_-Telco-Customer-Churn.csv') # Display the dataset st.write("## Telco Customer Churn Dataset") st.write(df) # Preprocess the data df = df.drop(columns=['customerID']) df['TotalCharges'] = pd.to_numeric(df['TotalCharges'], errors='coerce') df = df.dropna() # Encode categorical features label_encoders = {} for column in df.select_dtypes(include=['object']).columns: le = LabelEncoder() df[column] = le.fit_transform(df[column]) label_encoders[column] = le # Split the data into features and target X = df.drop(columns=['Churn']) y = df['Churn'] # Scale the features scaler = StandardScaler() X_scaled = scaler.fit_transform(X) # Split the data into training and testing sets X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42) # Train the logistic regression model with increased iterations and a different solver model = LogisticRegression(max_iter=5000, solver='saga') # 'saga' works well for large datasets model.fit(X_train, y_train) # Save the trained model to a pickle file with open('customer_churn_logres_model.pkl', 'wb') as f: pickle.dump(model, f) # Plot feature importance importance = model.coef_[0] feature_importance = pd.Series(importance, index=X.columns).sort_values(ascending=False) st.write("## Feature Importance") fig, ax = plt.subplots() feature_importance.plot(kind='bar', ax=ax) st.pyplot(fig) # Model evaluation y_pred = model.predict(X_test) # Confusion matrix st.write("## Confusion Matrix") cm = confusion_matrix(y_test, y_pred) fig, ax = plt.subplots() sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=['No Churn', 'Churn'], yticklabels=['No Churn', 'Churn']) ax.set_xlabel('Predicted Label') ax.set_ylabel('True Label') ax.set_title('Confusion Matrix') st.pyplot(fig) # Classification report st.write("## Classification Report") st.text(classification_report(y_test, y_pred)) # Upload the trained model to Hugging Face when the button is clicked if st.button('Upload Model to Hugging Face'): hf_token = os.getenv("HF_TOKEN") if hf_token: from huggingface_hub import HfApi api = HfApi() api.upload_file( path_or_fileobj='customer_churn_logres_model.pkl', path_in_repo='customer_churn_logres_model.pkl', repo_id='wvsu-dti-aidev-team/customer_churn_logres_model', token=hf_token, ) st.success("Model uploaded successfully!") else: st.error("HF_TOKEN environment variable not set.")