analist commited on
Commit
1d486bb
·
verified ·
1 Parent(s): 8384234

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +178 -244
app.py CHANGED
@@ -2,265 +2,199 @@ import streamlit as st
2
  import pandas as pd
3
  import numpy as np
4
  import matplotlib.pyplot as plt
5
- from sklearn.tree import plot_tree, export_text
6
  import seaborn as sns
7
  from sklearn.preprocessing import LabelEncoder
8
  from sklearn.ensemble import RandomForestClassifier
9
  from sklearn.tree import DecisionTreeClassifier
10
  from sklearn.ensemble import GradientBoostingClassifier
11
  from sklearn.linear_model import LogisticRegression
12
- from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, roc_curve
13
- import shap
 
14
 
15
- def load_data():
16
- data = pd.read_csv('exported_named_train_good.csv')
17
- data_test = pd.read_csv('exported_named_test_good.csv')
18
- X_train = data.drop("Target", axis=1)
19
- y_train = data['Target']
20
- X_test = data_test.drop('Target', axis=1)
21
- y_test = data_test['Target']
22
- return X_train, y_train, X_test, y_test, X_train.columns
23
 
24
- def train_models(X_train, y_train, X_test, y_test):
25
- models = {
26
- "Logistic Regression": LogisticRegression(random_state=42),
27
- "Decision Tree": DecisionTreeClassifier(random_state=42),
28
- "Random Forest": RandomForestClassifier(random_state=42),
29
- "Gradient Boost": GradientBoostingClassifier(random_state=42)
30
- }
31
-
32
- results = {}
33
- for name, model in models.items():
34
- model.fit(X_train, y_train)
35
-
36
- # Predictions
37
- y_train_pred = model.predict(X_train)
38
- y_test_pred = model.predict(X_test)
39
-
40
- # Metrics
41
- results[name] = {
42
- 'model': model,
43
- 'train_metrics': {
44
- 'accuracy': accuracy_score(y_train, y_train_pred),
45
- 'f1': f1_score(y_train, y_train_pred, average='weighted'),
46
- 'precision': precision_score(y_train, y_train_pred),
47
- 'recall': recall_score(y_train, y_train_pred),
48
- 'roc_auc': roc_auc_score(y_train, y_train_pred)
49
- },
50
- 'test_metrics': {
51
- 'accuracy': accuracy_score(y_test, y_test_pred),
52
- 'f1': f1_score(y_test, y_test_pred, average='weighted'),
53
- 'precision': precision_score(y_test, y_test_pred),
54
- 'recall': recall_score(y_test, y_test_pred),
55
- 'roc_auc': roc_auc_score(y_test, y_test_pred)
56
- }
57
- }
58
-
59
- return results
60
 
61
- def plot_model_performance(results):
62
- metrics = ['accuracy', 'f1', 'precision', 'recall', 'roc_auc']
63
- fig, axes = plt.subplots(1, 2, figsize=(15, 6))
64
-
65
- # Training metrics
66
- train_data = {model: [results[model]['train_metrics'][metric] for metric in metrics]
67
- for model in results.keys()}
68
- train_df = pd.DataFrame(train_data, index=metrics)
69
- train_df.plot(kind='bar', ax=axes[0], title='Training Performance')
70
- axes[0].set_ylim(0, 1)
71
-
72
- # Test metrics
73
- test_data = {model: [results[model]['test_metrics'][metric] for metric in metrics]
74
- for model in results.keys()}
75
- test_df = pd.DataFrame(test_data, index=metrics)
76
- test_df.plot(kind='bar', ax=axes[1], title='Test Performance')
77
- axes[1].set_ylim(0, 1)
78
-
79
- plt.tight_layout()
80
- return fig
81
 
82
- def plot_feature_importance(model, feature_names, model_type):
83
- plt.figure(figsize=(10, 6))
84
-
85
- if model_type in ["Decision Tree", "Random Forest", "Gradient Boost"]:
86
- importance = model.feature_importances_
87
- elif model_type == "Logistic Regression":
88
- importance = np.abs(model.coef_[0])
89
-
90
- importance_df = pd.DataFrame({
91
- 'feature': feature_names,
92
- 'importance': importance
93
- }).sort_values('importance', ascending=True)
94
-
95
- plt.barh(importance_df['feature'], importance_df['importance'])
96
- plt.title(f"Feature Importance - {model_type}")
97
- return plt.gcf()
98
 
99
- import streamlit as st
100
- import pandas as pd
101
- import numpy as np
102
- import matplotlib.pyplot as plt
103
- from sklearn.tree import DecisionTreeClassifier
104
- from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
105
- from sklearn.linear_model import LogisticRegression
106
- from sklearn.metrics import accuracy_score, recall_score, f1_score, roc_auc_score
107
- import seaborn as sns
108
 
109
- # Configuration de la page
110
- st.set_page_config(layout="wide", page_title="ML Dashboard")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
111
 
112
- # Style personnalisé
113
- st.markdown("""
114
- <style>
115
- /* Cartes stylisées */
116
- div.css-1r6slb0.e1tzin5v2 {
117
- background-color: #FFFFFF;
118
- border: 1px solid #EEEEEE;
119
- padding: 1.5rem;
120
- border-radius: 10px;
121
- box-shadow: 0 2px 4px rgba(0,0,0,0.1);
122
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
123
 
124
- /* Headers */
125
- .main-header {
126
- font-size: 2rem;
127
- font-weight: 700;
128
- color: #1E88E5;
129
- text-align: center;
130
- margin-bottom: 2rem;
131
- }
132
-
133
- /* Metric containers */
134
- div.css-12w0qpk.e1tzin5v2 {
135
- background-color: #F8F9FA;
136
- padding: 1rem;
137
- border-radius: 8px;
138
- text-align: center;
139
- }
140
-
141
- /* Metric values */
142
- div.css-1xarl3l.e16fv1kl1 {
143
- font-size: 1.8rem;
144
- font-weight: 700;
145
- color: #1E88E5;
146
- }
147
- </style>
148
- """, unsafe_allow_html=True)
149
 
150
- def plot_performance_comparison(results, metric='test_metrics'):
151
- """Crée un graphique de comparaison des performances avec des couleurs distinctes"""
152
- metrics = ['accuracy', 'f1', 'recall', 'roc_auc']
153
- model_names = list(results.keys())
154
-
155
- # Définir des couleurs distinctes pour chaque modèle
156
- colors = ['#FF6B6B', '#4ECDC4', '#45B7D1', '#96CEB4']
157
-
158
- data = {model: [results[model][metric][m] for m in metrics]
159
- for model in model_names}
160
-
161
- fig, ax = plt.subplots(figsize=(10, 6))
162
- x = np.arange(len(metrics))
163
- width = 0.2
164
-
165
- for i, (model, values) in enumerate(data.items()):
166
- ax.bar(x + i*width, values, width, label=model, color=colors[i])
167
-
168
- ax.set_ylabel('Score')
169
- ax.set_title(f'Comparaison des performances ({metric.split("_")[0].title()})')
170
- ax.set_xticks(x + width * (len(model_names)-1)/2)
171
- ax.set_xticklabels(metrics)
172
- ax.legend()
173
- ax.grid(True, alpha=0.3)
174
- plt.ylim(0, 1)
175
-
176
- return fig
 
 
 
 
 
177
 
178
- def create_metric_card(title, value):
179
- """Crée une carte de métrique stylisée"""
180
- st.markdown(f"""
181
- <div style="
182
- background-color: white;
183
- padding: 1rem;
184
- border-radius: 8px;
185
- box-shadow: 0 2px 4px rgba(0,0,0,0.1);
186
- text-align: center;
187
- margin-bottom: 1rem;
188
- ">
189
- <h3 style="color: #666; font-size: 1rem; margin-bottom: 0.5rem;">{title}</h3>
190
- <p style="color: #1E88E5; font-size: 1.8rem; font-weight: bold; margin: 0;">{value:.3f}</p>
191
- </div>
192
- """, unsafe_allow_html=True)
193
 
194
- def app():
195
- # Header
196
- st.markdown('<h1 class="main-header">Tableau de Bord ML</h1>', unsafe_allow_html=True)
197
-
198
- # Charger et préparer les données
199
- X_train, y_train, X_test, y_test, feature_names = load_data()
200
-
201
- # Sidebar pour la sélection du modèle
202
- with st.sidebar:
203
- st.markdown('<h2 style="color: #1E88E5;">Configuration</h2>', unsafe_allow_html=True)
204
- selected_model = st.selectbox(
205
- "Sélectionner un modèle",
206
- ["Logistic Regression", "Decision Tree", "Random Forest", "Gradient Boost"]
207
- )
208
-
209
- # Entraînement des modèles si pas déjà fait
210
- if 'model_results' not in st.session_state:
211
- with st.spinner("⏳ Entraînement des modèles..."):
212
- st.session_state.model_results = train_models(X_train, y_train, X_test, y_test)
213
-
214
- # Layout principal
215
- col1, col2 = st.columns([2, 1])
216
-
217
- with col1:
218
- # Graphiques de performance
219
- st.markdown("### 📊 Comparaison des Performances")
220
-
221
- tab1, tab2 = st.tabs(["🎯 Test", "📈 Entraînement"])
222
-
223
- with tab1:
224
- fig_test = plot_performance_comparison(st.session_state.model_results, 'test_metrics')
225
- st.pyplot(fig_test)
226
-
227
- with tab2:
228
- fig_train = plot_performance_comparison(st.session_state.model_results, 'train_metrics')
229
- st.pyplot(fig_train)
230
-
231
- with col2:
232
- # Métriques détaillées du modèle sélectionné
233
- st.markdown(f"### 📌 Métriques - {selected_model}")
234
-
235
- metrics = st.session_state.model_results[selected_model]['test_metrics']
236
- for metric, value in metrics.items():
237
- if metric != 'precision': # On exclut la précision
238
- create_metric_card(metric.upper(), value)
239
-
240
- # Section inférieure
241
- st.markdown("### 🔍 Analyse Détaillée")
242
- col3, col4 = st.columns(2)
243
-
244
- with col3:
245
- # Feature Importance
246
- current_model = st.session_state.model_results[selected_model]['model']
247
- if hasattr(current_model, 'feature_importances_') or hasattr(current_model, 'coef_'):
248
- fig_importance = plt.figure(figsize=(10, 6))
249
- if hasattr(current_model, 'feature_importances_'):
250
- importances = current_model.feature_importances_
251
- else:
252
- importances = np.abs(current_model.coef_[0])
253
-
254
- plt.barh(feature_names, importances)
255
- plt.title("Importance des Caractéristiques")
256
- st.pyplot(fig_importance)
257
-
258
- with col4:
259
- # Matrice de corrélation
260
- fig_corr = plt.figure(figsize=(10, 8))
261
- sns.heatmap(X_train.corr(), annot=True, cmap='coolwarm', center=0)
262
- plt.title("Matrice de Corrélation")
263
- st.pyplot(fig_corr)
264
 
265
- if __name__ == "__main__":
266
- app()
 
2
  import pandas as pd
3
  import numpy as np
4
  import matplotlib.pyplot as plt
 
5
  import seaborn as sns
6
  from sklearn.preprocessing import LabelEncoder
7
  from sklearn.ensemble import RandomForestClassifier
8
  from sklearn.tree import DecisionTreeClassifier
9
  from sklearn.ensemble import GradientBoostingClassifier
10
  from sklearn.linear_model import LogisticRegression
11
+ from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix
12
+ import plotly.express as px
13
+ import plotly.graph_objects as go
14
 
15
+ # Configuration de la page
16
+ st.set_page_config(layout="wide", page_title="ML Dashboard")
 
 
 
 
 
 
17
 
18
+ # Fonction pour charger les données
19
+ @st.cache_data
20
+ def load_data(file):
21
+ data = pd.read_csv(file)
22
+ return data
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
 
24
+ # Fonction pour entraîner les modèles
25
+ def train_model(X_train, y_train, model_name):
26
+ models = {
27
+ "Logistic Regression": LogisticRegression(),
28
+ "Decision Tree": DecisionTreeClassifier(),
29
+ "Random Forest": RandomForestClassifier(),
30
+ "Gradient Boost": GradientBoostingClassifier()
31
+ }
32
+ model = models[model_name]
33
+ model.fit(X_train, y_train)
34
+ return model
 
 
 
 
 
 
 
 
 
35
 
36
+ # Sidebar pour la navigation
37
+ st.sidebar.title("Navigation")
38
+ page = st.sidebar.radio("", ["📊 Vue d'ensemble", "🎯 Prédiction", "🔍 Interprétation", "⚙️ Entraînement"])
 
 
 
 
 
 
 
 
 
 
 
 
 
39
 
40
+ # Charger les données par défaut
41
+ if 'data' not in st.session_state:
42
+ try:
43
+ st.session_state.data = load_data('exported_named_train_good.csv')
44
+ st.session_state.test_data = load_data('exported_named_test_good.csv')
45
+ except:
46
+ st.session_state.data = None
47
+ st.session_state.test_data = None
 
48
 
49
+ # Vue d'ensemble
50
+ if page == "📊 Vue d'ensemble":
51
+ st.title("Tableau de bord ML")
52
+
53
+ # Layout en colonnes
54
+ col1, col2 = st.columns([2, 1])
55
+
56
+ with col1:
57
+ # Upload de données
58
+ uploaded_file = st.file_uploader("Charger de nouvelles données", type=['csv'])
59
+ if uploaded_file is not None:
60
+ st.session_state.data = load_data(uploaded_file)
61
+
62
+ with col2:
63
+ # Sélection du modèle
64
+ model_name = st.selectbox(
65
+ "Sélectionner un modèle",
66
+ ["Logistic Regression", "Decision Tree", "Random Forest", "Gradient Boost"]
67
+ )
68
+
69
+ if st.session_state.data is not None:
70
+ # Métriques principales
71
+ col1, col2, col3, col4, col5 = st.columns(5)
72
+
73
+ # Supposons que nous avons déjà un modèle entraîné
74
+ X = st.session_state.data.drop("Target", axis=1)
75
+ y = st.session_state.data["Target"]
76
+ model = train_model(X, y, model_name)
77
+ y_pred = model.predict(X)
78
+
79
+ with col1:
80
+ st.metric("Accuracy", f"{accuracy_score(y, y_pred):.2%}")
81
+ with col2:
82
+ st.metric("Precision", f"{precision_score(y, y_pred):.2%}")
83
+ with col3:
84
+ st.metric("Recall", f"{recall_score(y, y_pred):.2%}")
85
+ with col4:
86
+ st.metric("F1-Score", f"{f1_score(y, y_pred):.2%}")
87
+ with col5:
88
+ st.metric("ROC AUC", f"{roc_auc_score(y, y_pred):.2%}")
89
+
90
+ # Graphiques
91
+ col1, col2 = st.columns(2)
92
+
93
+ with col1:
94
+ st.subheader("Importance des features")
95
+ if hasattr(model, 'feature_importances_'):
96
+ importances = pd.DataFrame({
97
+ 'feature': X.columns,
98
+ 'importance': model.feature_importances_
99
+ }).sort_values('importance', ascending=True)
100
+ fig = px.bar(importances, x='importance', y='feature', orientation='h')
101
+ st.plotly_chart(fig, use_container_width=True)
102
+
103
+ with col2:
104
+ st.subheader("Matrice de confusion")
105
+ cm = confusion_matrix(y, y_pred)
106
+ fig = px.imshow(cm,
107
+ labels=dict(x="Prédit", y="Réel"),
108
+ text=cm)
109
+ st.plotly_chart(fig, use_container_width=True)
110
 
111
+ elif page == "🎯 Prédiction":
112
+ st.title("Prédiction")
113
+
114
+ if st.session_state.data is not None:
115
+ X = st.session_state.data.drop("Target", axis=1)
116
+
117
+ # Interface de prédiction
118
+ st.subheader("Entrer les valeurs pour la prédiction")
119
+
120
+ input_values = {}
121
+ cols = st.columns(3)
122
+ for idx, feature in enumerate(X.columns):
123
+ with cols[idx % 3]:
124
+ if X[feature].dtype == 'object':
125
+ input_values[feature] = st.selectbox(
126
+ f"{feature}",
127
+ options=X[feature].unique()
128
+ )
129
+ else:
130
+ input_values[feature] = st.number_input(
131
+ f"{feature}",
132
+ value=float(X[feature].mean())
133
+ )
134
+
135
+ if st.button("Prédire"):
136
+ model = train_model(X, st.session_state.data["Target"], "Random Forest")
137
+ pred = model.predict_proba(pd.DataFrame([input_values]))
138
+
139
+ st.subheader("Résultat de la prédiction")
140
+ proba_df = pd.DataFrame({
141
+ 'Classe': ['0', '1'],
142
+ 'Probabilité': pred[0]
143
+ })
144
+ fig = px.bar(proba_df, x='Classe', y='Probabilité')
145
+ st.plotly_chart(fig)
146
 
147
+ elif page == "🔍 Interprétation":
148
+ st.title("Interprétation du modèle")
149
+
150
+ if st.session_state.data is not None:
151
+ # SHAP values ou autres méthodes d'interprétation
152
+ st.subheader("Analyse des features")
153
+ X = st.session_state.data.drop("Target", axis=1)
154
+ y = st.session_state.data["Target"]
155
+
156
+ feature_1 = st.selectbox("Sélectionner la première feature", X.columns)
157
+ feature_2 = st.selectbox("Sélectionner la deuxième feature", X.columns)
158
+
159
+ fig = px.scatter(st.session_state.data,
160
+ x=feature_1,
161
+ y=feature_2,
162
+ color='Target',
163
+ title=f"Relation entre {feature_1} et {feature_2}")
164
+ st.plotly_chart(fig)
 
 
 
 
 
 
 
165
 
166
+ elif page == "⚙️ Entraînement":
167
+ st.title("Entraînement du modèle")
168
+
169
+ if st.session_state.data is not None:
170
+ # Options d'entraînement
171
+ model_name = st.selectbox(
172
+ "Sélectionner le modèle à entraîner",
173
+ ["Logistic Regression", "Decision Tree", "Random Forest", "Gradient Boost"]
174
+ )
175
+
176
+ # Paramètres du modèle
177
+ st.subheader("Paramètres du modèle")
178
+ if model_name == "Random Forest":
179
+ n_estimators = st.slider("Nombre d'arbres", 10, 200, 100)
180
+ max_depth = st.slider("Profondeur maximale", 1, 20, 10)
181
+
182
+ if st.button("Entraîner le modèle"):
183
+ with st.spinner("Entraînement en cours..."):
184
+ X = st.session_state.data.drop("Target", axis=1)
185
+ y = st.session_state.data["Target"]
186
+ model = train_model(X, y, model_name)
187
+ st.success("Modèle entraîné avec succès!")
188
+
189
+ # Afficher les métriques
190
+ y_pred = model.predict(X)
191
+ col1, col2, col3 = st.columns(3)
192
+ with col1:
193
+ st.metric("Accuracy", f"{accuracy_score(y, y_pred):.2%}")
194
+ with col2:
195
+ st.metric("Precision", f"{precision_score(y, y_pred):.2%}")
196
+ with col3:
197
+ st.metric("Recall", f"{recall_score(y, y_pred):.2%}")
198
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
199
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
200