import streamlit as st import pandas as pd import numpy as np import matplotlib.pyplot as plt from sklearn.tree import plot_tree, export_text import seaborn as sns from sklearn.preprocessing import LabelEncoder from sklearn.ensemble import RandomForestClassifier from sklearn.tree import DecisionTreeClassifier, plot_tree from sklearn.ensemble import GradientBoostingClassifier from sklearn.linear_model import LogisticRegression from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, roc_curve data = pd.read_csv('exported_named_train_good.csv') data_test = pd.read_csv('exported_named_test_good.csv') X_train = data.drop("Target", axis=1).values y_train = data['Target'].values X_test = data_test.drop('Target', axis=1).values y_test = data_test['Target'].values models={ "Logisitic Regression":LogisticRegression(), "Decision Tree":DecisionTreeClassifier(), "Random Forest":RandomForestClassifier(), "Gradient Boost":GradientBoostingClassifier() } for name, model in models.items(): model.fit(X_train, y_train) # Make predictions y_train_pred = model.predict(X_train) y_test_pred = model.predict(X_test) # Training set performance model_train_accuracy = accuracy_score(y_train, y_train_pred) # Calculate Accuracy model_train_f1 = f1_score(y_train, y_train_pred, average='weighted') # Calculate F1-score model_train_precision = precision_score(y_train, y_train_pred) # Calculate Precision model_train_recall = recall_score(y_train, y_train_pred) # Calculate Recall model_train_rocauc_score = roc_auc_score(y_train, y_train_pred) # Test set performance model_test_accuracy = accuracy_score(y_test, y_test_pred) # Calculate Accuracy model_test_f1 = f1_score(y_test, y_test_pred, average='weighted') # Calculate F1-score model_test_precision = precision_score(y_test, y_test_pred) # Calculate Precision model_test_recall = recall_score(y_test, y_test_pred) # Calculate Recall model_test_rocauc_score = roc_auc_score(y_test, y_test_pred) #Calculate Roc print(name) print('Model performance for Training set') print("- Accuracy: {:.4f}".format(model_train_accuracy)) print('- F1 score: {:.4f}'.format(model_train_f1)) print('- Precision: {:.4f}'.format(model_train_precision)) print('- Recall: {:.4f}'.format(model_train_recall)) print('- Roc Auc Score: {:.4f}'.format(model_train_rocauc_score)) print('----------------------------------') print('Model performance for Test set') print('- Accuracy: {:.4f}'.format(model_test_accuracy)) print('- F1 score: {:.4f}'.format(model_test_f1)) print('- Precision: {:.4f}'.format(model_test_precision)) print('- Recall: {:.4f}'.format(model_test_recall)) print('- Roc Auc Score: {:.4f}'.format(model_test_rocauc_score)) print('='*35) print('\n') def load_model_and_data(): model = models['Decision Tree'] data = pd.read_csv('exported_named_train_good.csv') X = data.drop("Target", axis=1) y = data['Target'] return model, X, y, X.columns import streamlit as st import pandas as pd import numpy as np import matplotlib.pyplot as plt from sklearn.tree import plot_tree, export_text import seaborn as sns from sklearn.preprocessing import LabelEncoder from dtreeviz import trees def app(): st.title("Interpréteur d'Arbre de Décision") # Chargement du modèle et des données model, X, y, feature_names = load_model_and_data() if model is None: st.warning("Veuillez charger un modèle pour commencer.") return # Sidebar avec les sections st.sidebar.title("Navigation") page = st.sidebar.radio( "Sélectionnez une section", ["Vue globale du modèle", "Explorateur de règles", "Analyse de cohortes", "Simulateur de prédictions"] ) # Vue globale du modèle if page == "Vue globale du modèle": st.header("Vue globale du modèle") col1, col2 = st.columns(2) with col1: st.subheader("Importance des caractéristiques") importance_plot = plt.figure(figsize=(10, 6)) feature_importance = pd.DataFrame({ 'feature': feature_names, 'importance': model.feature_importances_ }).sort_values('importance', ascending=True) plt.barh(feature_importance['feature'], feature_importance['importance']) st.pyplot(importance_plot) with col2: st.subheader("Statistiques du modèle") st.write(f"Profondeur de l'arbre: {model.get_depth()}") st.write(f"Nombre de feuilles: {model.get_n_leaves()}") # Explorateur de règles elif page == "Explorateur de règles": st.header("Explorateur de règles de décision") viz_type = st.radio( "Type de visualisation", ["Texte", "Graphique interactif"] ) max_depth = st.slider("Profondeur maximale à afficher", 1, model.get_depth(), 3) if viz_type == "Texte": tree_text = export_text(model, feature_names=list(feature_names), max_depth=max_depth) st.text(tree_text) else: # Création de la visualisation dtreeviz viz = dtreeviz( model, X, y, target_name="target", feature_names=list(feature_names), class_names=list(map(str, model.classes_)), max_depth=max_depth ) # Sauvegarde temporaire et affichage st.set_option('deprecation.showPyplotGlobalUse', False) fig = viz.view() st.pyplot(fig) # Analyse de cohortes elif page == "Analyse de cohortes": st.header("Analyse de cohortes") selected_features = st.multiselect( "Sélectionnez les caractéristiques pour définir les cohortes", feature_names, max_selections=2 ) if len(selected_features) > 0: def create_cohorts(X, features): cohort_def = X[features].copy() for feat in features: if X[feat].dtype == 'object' or len(X[feat].unique()) < 10: cohort_def[feat] = X[feat] else: cohort_def[feat] = pd.qcut(X[feat], q=4, labels=['Q1', 'Q2', 'Q3', 'Q4']) return cohort_def.apply(lambda x: ' & '.join(x.astype(str)), axis=1) cohorts = create_cohorts(X, selected_features) cohort_analysis = pd.DataFrame({ 'Cohorte': cohorts, 'Prédiction': model.predict(X) }) cohort_stats = cohort_analysis.groupby('Cohorte')['Prédiction'].agg(['count', 'mean']) cohort_stats.columns = ['Nombre d\'observations', 'Taux de prédiction positive'] st.write("Statistiques par cohorte:") st.dataframe(cohort_stats) cohort_viz = plt.figure(figsize=(10, 6)) sns.barplot(data=cohort_analysis, x='Cohorte', y='Prédiction') plt.xticks(rotation=45) st.pyplot(cohort_viz) # Simulateur de prédictions else: st.header("Simulateur de prédictions") input_values = {} for feature in feature_names: if X[feature].dtype == 'object': input_values[feature] = st.selectbox( f"Sélectionnez {feature}", options=X[feature].unique() ) else: input_values[feature] = st.slider( f"Valeur pour {feature}", float(X[feature].min()), float(X[feature].max()), float(X[feature].mean()) ) if st.button("Prédire"): input_df = pd.DataFrame([input_values]) prediction = model.predict_proba(input_df) st.write("Probabilités prédites:") st.write({f"Classe {i}": f"{prob:.2%}" for i, prob in enumerate(prediction[0])}) st.subheader("Chemin de décision") node_indicator = model.decision_path(input_df) leaf_id = model.apply(input_df) node_index = node_indicator.indices[node_indicator.indptr[0]:node_indicator.indptr[1]] rules = [] for node_id in node_index: if node_id != leaf_id[0]: threshold = model.tree_.threshold[node_id] feature = feature_names[model.tree_.feature[node_id]] if input_df.iloc[0][feature] <= threshold: rules.append(f"{feature} ≤ {threshold:.2f}") else: rules.append(f"{feature} > {threshold:.2f}") for rule in rules: st.write(rule) if __name__ == "__main__": app()