import streamlit as st import pandas as pd import numpy as np import matplotlib.pyplot as plt import seaborn as sns from sklearn.preprocessing import LabelEncoder from sklearn.ensemble import RandomForestClassifier from sklearn.tree import DecisionTreeClassifier from sklearn.ensemble import GradientBoostingClassifier from sklearn.linear_model import LogisticRegression from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix import plotly.express as px import plotly.graph_objects as go # Configuration de la page st.set_page_config(layout="wide", page_title="ML Dashboard") # Fonction pour charger les données @st.cache_data def load_data(file): data = pd.read_csv(file) return data # Fonction pour entraîner les modèles def train_model(X_train, y_train, model_name): models = { "Logistic Regression": LogisticRegression(), "Decision Tree": DecisionTreeClassifier(), "Random Forest": RandomForestClassifier(), "Gradient Boost": GradientBoostingClassifier() } model = models[model_name] model.fit(X_train, y_train) return model def app(): # Sidebar pour la navigation st.sidebar.title("Navigation") page = st.sidebar.radio("", ["📊 Vue d'ensemble", "🎯 Prédiction", "🔍 Interprétation", "⚙️ Entraînement"]) # Charger les données par défaut if 'data' not in st.session_state: try: st.session_state.data = load_data('exported_named_train_good.csv') st.session_state.test_data = load_data('exported_named_test_good.csv') except: st.session_state.data = None st.session_state.test_data = None # Vue d'ensemble if page == "📊 Vue d'ensemble": st.title("Tableau de bord ML") # Layout en colonnes col1, col2 = st.columns([2, 1]) with col1: # Upload de données uploaded_file = st.file_uploader("Charger de nouvelles données", type=['csv']) if uploaded_file is not None: st.session_state.data = load_data(uploaded_file) with col2: # Sélection du modèle model_name = st.selectbox( "Sélectionner un modèle", ["Logistic Regression", "Decision Tree", "Random Forest", "Gradient Boost"] ) if st.session_state.data is not None: # Métriques principales col1, col2, col3, col4, col5 = st.columns(5) # Supposons que nous avons déjà un modèle entraîné X = st.session_state.data.drop("Target", axis=1) y = st.session_state.data["Target"] model = train_model(X, y, model_name) y_pred = model.predict(X) with col1: st.metric("Accuracy", f"{accuracy_score(y, y_pred):.2%}") with col2: st.metric("Precision", f"{precision_score(y, y_pred):.2%}") with col3: st.metric("Recall", f"{recall_score(y, y_pred):.2%}") with col4: st.metric("F1-Score", f"{f1_score(y, y_pred):.2%}") with col5: st.metric("ROC AUC", f"{roc_auc_score(y, y_pred):.2%}") # Graphiques col1, col2 = st.columns(2) with col1: st.subheader("Importance des features") if hasattr(model, 'feature_importances_'): importances = pd.DataFrame({ 'feature': X.columns, 'importance': model.feature_importances_ }).sort_values('importance', ascending=True) fig = px.bar(importances, x='importance', y='feature', orientation='h') st.plotly_chart(fig, use_container_width=True) with col2: st.subheader("Matrice de confusion") cm = confusion_matrix(y, y_pred) fig = px.imshow(cm, labels=dict(x="Prédit", y="Réel"), text=cm) st.plotly_chart(fig, use_container_width=True) elif page == "🎯 Prédiction": st.title("Prédiction") if st.session_state.data is not None: X = st.session_state.data.drop("Target", axis=1) # Interface de prédiction st.subheader("Entrer les valeurs pour la prédiction") input_values = {} cols = st.columns(3) for idx, feature in enumerate(X.columns): with cols[idx % 3]: if X[feature].dtype == 'object': input_values[feature] = st.selectbox( f"{feature}", options=X[feature].unique() ) else: input_values[feature] = st.number_input( f"{feature}", value=float(X[feature].mean()) ) if st.button("Prédire"): model = train_model(X, st.session_state.data["Target"], "Random Forest") pred = model.predict_proba(pd.DataFrame([input_values])) st.subheader("Résultat de la prédiction") proba_df = pd.DataFrame({ 'Classe': ['0', '1'], 'Probabilité': pred[0] }) fig = px.bar(proba_df, x='Classe', y='Probabilité') st.plotly_chart(fig) elif page == "🔍 Interprétation": st.title("Interprétation du modèle") if st.session_state.data is not None: # SHAP values ou autres méthodes d'interprétation st.subheader("Analyse des features") X = st.session_state.data.drop("Target", axis=1) y = st.session_state.data["Target"] feature_1 = st.selectbox("Sélectionner la première feature", X.columns) feature_2 = st.selectbox("Sélectionner la deuxième feature", X.columns) fig = px.scatter(st.session_state.data, x=feature_1, y=feature_2, color='Target', title=f"Relation entre {feature_1} et {feature_2}") st.plotly_chart(fig) elif page == "⚙️ Entraînement": st.title("Entraînement du modèle") if st.session_state.data is not None: # Options d'entraînement model_name = st.selectbox( "Sélectionner le modèle à entraîner", ["Logistic Regression", "Decision Tree", "Random Forest", "Gradient Boost"] ) # Paramètres du modèle st.subheader("Paramètres du modèle") if model_name == "Random Forest": n_estimators = st.slider("Nombre d'arbres", 10, 200, 100) max_depth = st.slider("Profondeur maximale", 1, 20, 10) if st.button("Entraîner le modèle"): with st.spinner("Entraînement en cours..."): X = st.session_state.data.drop("Target", axis=1) y = st.session_state.data["Target"] model = train_model(X, y, model_name) st.success("Modèle entraîné avec succès!") # Afficher les métriques y_pred = model.predict(X) col1, col2, col3 = st.columns(3) with col1: st.metric("Accuracy", f"{accuracy_score(y, y_pred):.2%}") with col2: st.metric("Precision", f"{precision_score(y, y_pred):.2%}") with col3: st.metric("Recall", f"{recall_score(y, y_pred):.2%}") if __name__ == '__main__': app()