import streamlit as st import pandas as pd import numpy as np import matplotlib.pyplot as plt from sklearn.tree import plot_tree, export_text import seaborn as sns from sklearn.preprocessing import LabelEncoder from sklearn.ensemble import RandomForestClassifier from sklearn.tree import DecisionTreeClassifier from sklearn.ensemble import GradientBoostingClassifier from sklearn.linear_model import LogisticRegression from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, roc_curve import shap def load_data(): data = pd.read_csv('exported_named_train_good.csv') data_test = pd.read_csv('exported_named_test_good.csv') X_train = data.drop("Target", axis=1) y_train = data['Target'] X_test = data_test.drop('Target', axis=1) y_test = data_test['Target'] return X_train, y_train, X_test, y_test, X_train.columns def train_models(X_train, y_train, X_test, y_test): models = { "Logistic Regression": LogisticRegression(random_state=42), "Decision Tree": DecisionTreeClassifier(random_state=42), "Random Forest": RandomForestClassifier(random_state=42), "Gradient Boost": GradientBoostingClassifier(random_state=42) } results = {} for name, model in models.items(): model.fit(X_train, y_train) # Predictions y_train_pred = model.predict(X_train) y_test_pred = model.predict(X_test) # Metrics results[name] = { 'model': model, 'train_metrics': { 'accuracy': accuracy_score(y_train, y_train_pred), 'f1': f1_score(y_train, y_train_pred, average='weighted'), 'precision': precision_score(y_train, y_train_pred), 'recall': recall_score(y_train, y_train_pred), 'roc_auc': roc_auc_score(y_train, y_train_pred) }, 'test_metrics': { 'accuracy': accuracy_score(y_test, y_test_pred), 'f1': f1_score(y_test, y_test_pred, average='weighted'), 'precision': precision_score(y_test, y_test_pred), 'recall': recall_score(y_test, y_test_pred), 'roc_auc': roc_auc_score(y_test, y_test_pred) } } return results def plot_model_performance(results): metrics = ['accuracy', 'f1', 'precision', 'recall', 'roc_auc'] fig, axes = plt.subplots(1, 2, figsize=(15, 6)) # Training metrics train_data = {model: [results[model]['train_metrics'][metric] for metric in metrics] for model in results.keys()} train_df = pd.DataFrame(train_data, index=metrics) train_df.plot(kind='bar', ax=axes[0], title='Training Performance') axes[0].set_ylim(0, 1) # Test metrics test_data = {model: [results[model]['test_metrics'][metric] for metric in metrics] for model in results.keys()} test_df = pd.DataFrame(test_data, index=metrics) test_df.plot(kind='bar', ax=axes[1], title='Test Performance') axes[1].set_ylim(0, 1) plt.tight_layout() return fig def plot_feature_importance(model, feature_names, model_type): plt.figure(figsize=(10, 6)) if model_type in ["Decision Tree", "Random Forest", "Gradient Boost"]: importance = model.feature_importances_ elif model_type == "Logistic Regression": importance = np.abs(model.coef_[0]) importance_df = pd.DataFrame({ 'feature': feature_names, 'importance': importance }).sort_values('importance', ascending=True) plt.barh(importance_df['feature'], importance_df['importance']) plt.title(f"Feature Importance - {model_type}") return plt.gcf() import streamlit as st import pandas as pd import numpy as np import matplotlib.pyplot as plt from sklearn.tree import plot_tree, export_text import seaborn as sns from sklearn.preprocessing import LabelEncoder from sklearn.ensemble import RandomForestClassifier from sklearn.tree import DecisionTreeClassifier from sklearn.ensemble import GradientBoostingClassifier from sklearn.linear_model import LogisticRegression from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, roc_curve import shap # Configuration de la page st.set_page_config( page_title="ML Model Interpreter", layout="wide", initial_sidebar_state="expanded" ) # CSS personnalisé st.markdown(""" """, unsafe_allow_html=True) def custom_metric_card(title, value, prefix=""): return f"""

{title}

{prefix}{value:.4f}

""" def set_plot_style(fig): """Configure le style des graphiques""" colors = ['#1E88E5', '#90CAF9', '#0D47A1', '#42A5F5'] for ax in fig.axes: ax.set_facecolor('#F8F9FA') ax.grid(True, linestyle='--', alpha=0.3, color='#666666') ax.spines['top'].set_visible(False) ax.spines['right'].set_visible(False) ax.tick_params(axis='both', colors='#666666') ax.set_axisbelow(True) return fig, colors def plot_model_performance(results): metrics = ['accuracy', 'f1', 'precision', 'recall', 'roc_auc'] fig, axes = plt.subplots(1, 2, figsize=(15, 6)) fig, colors = set_plot_style(fig) # Training metrics train_data = {model: [results[model]['train_metrics'][metric] for metric in metrics] for model in results.keys()} train_df = pd.DataFrame(train_data, index=metrics) train_df.plot(kind='bar', ax=axes[0], color=colors) axes[0].set_title('Performance d\'Entraînement', color='#0D47A1', pad=20) axes[0].set_ylim(0, 1) # Test metrics test_data = {model: [results[model]['test_metrics'][metric] for metric in metrics] for model in results.keys()} test_df = pd.DataFrame(test_data, index=metrics) test_df.plot(kind='bar', ax=axes[1], color=colors) axes[1].set_title('Performance de Test', color='#0D47A1', pad=20) axes[1].set_ylim(0, 1) # Style des graphiques for ax in axes: plt.setp(ax.get_xticklabels(), rotation=45, ha='right') ax.legend(bbox_to_anchor=(1.05, 1), loc='upper left') plt.tight_layout() return fig def plot_feature_importance(model, feature_names, model_type): fig, ax = plt.subplots(figsize=(10, 6)) fig, colors = set_plot_style(fig) if model_type in ["Decision Tree", "Random Forest", "Gradient Boost"]: importance = model.feature_importances_ elif model_type == "Logistic Regression": importance = np.abs(model.coef_[0]) importance_df = pd.DataFrame({ 'feature': feature_names, 'importance': importance }).sort_values('importance', ascending=True) ax.barh(importance_df['feature'], importance_df['importance'], color='#1E88E5', alpha=0.8) ax.set_title("Importance des Caractéristiques", color='#0D47A1', pad=20) return fig def plot_correlation_matrix(data): fig, ax = plt.subplots(figsize=(10, 8)) fig, _ = set_plot_style(fig) sns.heatmap(data.corr(), annot=True, cmap='coolwarm', center=0, ax=ax, fmt='.2f', square=True) ax.set_title("Matrice de Corrélation", color='#0D47A1', pad=20) return fig def app(): st.markdown('

Interpréteur de Modèles ML

', unsafe_allow_html=True) # Load data X_train, y_train, X_test, y_test, feature_names = load_data() # Train models if not in session state if 'model_results' not in st.session_state: with st.spinner("🔄 Entraînement des modèles en cours..."): st.session_state.model_results = train_models(X_train, y_train, X_test, y_test) # Sidebar with st.sidebar: st.markdown('

Navigation

', unsafe_allow_html=True) selected_model = st.selectbox( "📊 Sélectionnez un modèle", list(st.session_state.model_results.keys()) ) st.markdown('
', unsafe_allow_html=True) page = st.radio( "📑 Sélectionnez une section", ["Performance des modèles", "Interprétation du modèle", "Analyse des caractéristiques", "Simulateur de prédictions"] ) current_model = st.session_state.model_results[selected_model]['model'] # Main content if page == "Performance des modèles": st.markdown('

Performance des modèles

', unsafe_allow_html=True) performance_fig = plot_model_performance(st.session_state.model_results) st.pyplot(performance_fig) st.markdown('

Métriques détaillées

', unsafe_allow_html=True) col1, col2 = st.columns(2) with col1: st.markdown('

Entraînement

', unsafe_allow_html=True) for metric, value in st.session_state.model_results[selected_model]['train_metrics'].items(): st.markdown(custom_metric_card(metric.capitalize(), value), unsafe_allow_html=True) with col2: st.markdown('

Test

', unsafe_allow_html=True) for metric, value in st.session_state.model_results[selected_model]['test_metrics'].items(): st.markdown(custom_metric_card(metric.capitalize(), value), unsafe_allow_html=True) elif page == "Analyse des caractéristiques": st.markdown('

Analyse des caractéristiques

', unsafe_allow_html=True) importance_fig = plot_feature_importance(current_model, feature_names, selected_model) st.pyplot(importance_fig) st.markdown('

Corrélations

', unsafe_allow_html=True) corr_fig = plot_correlation_matrix(X_train) st.pyplot(corr_fig) if __name__ == "__main__": app()