Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -2,265 +2,199 @@ import streamlit as st
|
|
2 |
import pandas as pd
|
3 |
import numpy as np
|
4 |
import matplotlib.pyplot as plt
|
5 |
-
from sklearn.tree import plot_tree, export_text
|
6 |
import seaborn as sns
|
7 |
from sklearn.preprocessing import LabelEncoder
|
8 |
from sklearn.ensemble import RandomForestClassifier
|
9 |
from sklearn.tree import DecisionTreeClassifier
|
10 |
from sklearn.ensemble import GradientBoostingClassifier
|
11 |
from sklearn.linear_model import LogisticRegression
|
12 |
-
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score,
|
13 |
-
import
|
|
|
14 |
|
15 |
-
|
16 |
-
|
17 |
-
data_test = pd.read_csv('exported_named_test_good.csv')
|
18 |
-
X_train = data.drop("Target", axis=1)
|
19 |
-
y_train = data['Target']
|
20 |
-
X_test = data_test.drop('Target', axis=1)
|
21 |
-
y_test = data_test['Target']
|
22 |
-
return X_train, y_train, X_test, y_test, X_train.columns
|
23 |
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
"Gradient Boost": GradientBoostingClassifier(random_state=42)
|
30 |
-
}
|
31 |
-
|
32 |
-
results = {}
|
33 |
-
for name, model in models.items():
|
34 |
-
model.fit(X_train, y_train)
|
35 |
-
|
36 |
-
# Predictions
|
37 |
-
y_train_pred = model.predict(X_train)
|
38 |
-
y_test_pred = model.predict(X_test)
|
39 |
-
|
40 |
-
# Metrics
|
41 |
-
results[name] = {
|
42 |
-
'model': model,
|
43 |
-
'train_metrics': {
|
44 |
-
'accuracy': accuracy_score(y_train, y_train_pred),
|
45 |
-
'f1': f1_score(y_train, y_train_pred, average='weighted'),
|
46 |
-
'precision': precision_score(y_train, y_train_pred),
|
47 |
-
'recall': recall_score(y_train, y_train_pred),
|
48 |
-
'roc_auc': roc_auc_score(y_train, y_train_pred)
|
49 |
-
},
|
50 |
-
'test_metrics': {
|
51 |
-
'accuracy': accuracy_score(y_test, y_test_pred),
|
52 |
-
'f1': f1_score(y_test, y_test_pred, average='weighted'),
|
53 |
-
'precision': precision_score(y_test, y_test_pred),
|
54 |
-
'recall': recall_score(y_test, y_test_pred),
|
55 |
-
'roc_auc': roc_auc_score(y_test, y_test_pred)
|
56 |
-
}
|
57 |
-
}
|
58 |
-
|
59 |
-
return results
|
60 |
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
# Test metrics
|
73 |
-
test_data = {model: [results[model]['test_metrics'][metric] for metric in metrics]
|
74 |
-
for model in results.keys()}
|
75 |
-
test_df = pd.DataFrame(test_data, index=metrics)
|
76 |
-
test_df.plot(kind='bar', ax=axes[1], title='Test Performance')
|
77 |
-
axes[1].set_ylim(0, 1)
|
78 |
-
|
79 |
-
plt.tight_layout()
|
80 |
-
return fig
|
81 |
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
if model_type in ["Decision Tree", "Random Forest", "Gradient Boost"]:
|
86 |
-
importance = model.feature_importances_
|
87 |
-
elif model_type == "Logistic Regression":
|
88 |
-
importance = np.abs(model.coef_[0])
|
89 |
-
|
90 |
-
importance_df = pd.DataFrame({
|
91 |
-
'feature': feature_names,
|
92 |
-
'importance': importance
|
93 |
-
}).sort_values('importance', ascending=True)
|
94 |
-
|
95 |
-
plt.barh(importance_df['feature'], importance_df['importance'])
|
96 |
-
plt.title(f"Feature Importance - {model_type}")
|
97 |
-
return plt.gcf()
|
98 |
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
import seaborn as sns
|
108 |
|
109 |
-
#
|
110 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
111 |
|
112 |
-
|
113 |
-
st.
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
123 |
|
124 |
-
|
125 |
-
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
-
|
132 |
-
|
133 |
-
|
134 |
-
|
135 |
-
|
136 |
-
|
137 |
-
|
138 |
-
|
139 |
-
|
140 |
-
|
141 |
-
|
142 |
-
div.css-1xarl3l.e16fv1kl1 {
|
143 |
-
font-size: 1.8rem;
|
144 |
-
font-weight: 700;
|
145 |
-
color: #1E88E5;
|
146 |
-
}
|
147 |
-
</style>
|
148 |
-
""", unsafe_allow_html=True)
|
149 |
|
150 |
-
|
151 |
-
|
152 |
-
|
153 |
-
|
154 |
-
|
155 |
-
|
156 |
-
|
157 |
-
|
158 |
-
|
159 |
-
|
160 |
-
|
161 |
-
|
162 |
-
|
163 |
-
|
164 |
-
|
165 |
-
|
166 |
-
|
167 |
-
|
168 |
-
|
169 |
-
|
170 |
-
|
171 |
-
|
172 |
-
|
173 |
-
|
174 |
-
|
175 |
-
|
176 |
-
|
|
|
|
|
|
|
|
|
|
|
177 |
|
178 |
-
def create_metric_card(title, value):
|
179 |
-
"""Crée une carte de métrique stylisée"""
|
180 |
-
st.markdown(f"""
|
181 |
-
<div style="
|
182 |
-
background-color: white;
|
183 |
-
padding: 1rem;
|
184 |
-
border-radius: 8px;
|
185 |
-
box-shadow: 0 2px 4px rgba(0,0,0,0.1);
|
186 |
-
text-align: center;
|
187 |
-
margin-bottom: 1rem;
|
188 |
-
">
|
189 |
-
<h3 style="color: #666; font-size: 1rem; margin-bottom: 0.5rem;">{title}</h3>
|
190 |
-
<p style="color: #1E88E5; font-size: 1.8rem; font-weight: bold; margin: 0;">{value:.3f}</p>
|
191 |
-
</div>
|
192 |
-
""", unsafe_allow_html=True)
|
193 |
|
194 |
-
def app():
|
195 |
-
# Header
|
196 |
-
st.markdown('<h1 class="main-header">Tableau de Bord ML</h1>', unsafe_allow_html=True)
|
197 |
-
|
198 |
-
# Charger et préparer les données
|
199 |
-
X_train, y_train, X_test, y_test, feature_names = load_data()
|
200 |
-
|
201 |
-
# Sidebar pour la sélection du modèle
|
202 |
-
with st.sidebar:
|
203 |
-
st.markdown('<h2 style="color: #1E88E5;">Configuration</h2>', unsafe_allow_html=True)
|
204 |
-
selected_model = st.selectbox(
|
205 |
-
"Sélectionner un modèle",
|
206 |
-
["Logistic Regression", "Decision Tree", "Random Forest", "Gradient Boost"]
|
207 |
-
)
|
208 |
-
|
209 |
-
# Entraînement des modèles si pas déjà fait
|
210 |
-
if 'model_results' not in st.session_state:
|
211 |
-
with st.spinner("⏳ Entraînement des modèles..."):
|
212 |
-
st.session_state.model_results = train_models(X_train, y_train, X_test, y_test)
|
213 |
-
|
214 |
-
# Layout principal
|
215 |
-
col1, col2 = st.columns([2, 1])
|
216 |
-
|
217 |
-
with col1:
|
218 |
-
# Graphiques de performance
|
219 |
-
st.markdown("### 📊 Comparaison des Performances")
|
220 |
-
|
221 |
-
tab1, tab2 = st.tabs(["🎯 Test", "📈 Entraînement"])
|
222 |
-
|
223 |
-
with tab1:
|
224 |
-
fig_test = plot_performance_comparison(st.session_state.model_results, 'test_metrics')
|
225 |
-
st.pyplot(fig_test)
|
226 |
-
|
227 |
-
with tab2:
|
228 |
-
fig_train = plot_performance_comparison(st.session_state.model_results, 'train_metrics')
|
229 |
-
st.pyplot(fig_train)
|
230 |
-
|
231 |
-
with col2:
|
232 |
-
# Métriques détaillées du modèle sélectionné
|
233 |
-
st.markdown(f"### 📌 Métriques - {selected_model}")
|
234 |
-
|
235 |
-
metrics = st.session_state.model_results[selected_model]['test_metrics']
|
236 |
-
for metric, value in metrics.items():
|
237 |
-
if metric != 'precision': # On exclut la précision
|
238 |
-
create_metric_card(metric.upper(), value)
|
239 |
-
|
240 |
-
# Section inférieure
|
241 |
-
st.markdown("### 🔍 Analyse Détaillée")
|
242 |
-
col3, col4 = st.columns(2)
|
243 |
-
|
244 |
-
with col3:
|
245 |
-
# Feature Importance
|
246 |
-
current_model = st.session_state.model_results[selected_model]['model']
|
247 |
-
if hasattr(current_model, 'feature_importances_') or hasattr(current_model, 'coef_'):
|
248 |
-
fig_importance = plt.figure(figsize=(10, 6))
|
249 |
-
if hasattr(current_model, 'feature_importances_'):
|
250 |
-
importances = current_model.feature_importances_
|
251 |
-
else:
|
252 |
-
importances = np.abs(current_model.coef_[0])
|
253 |
-
|
254 |
-
plt.barh(feature_names, importances)
|
255 |
-
plt.title("Importance des Caractéristiques")
|
256 |
-
st.pyplot(fig_importance)
|
257 |
-
|
258 |
-
with col4:
|
259 |
-
# Matrice de corrélation
|
260 |
-
fig_corr = plt.figure(figsize=(10, 8))
|
261 |
-
sns.heatmap(X_train.corr(), annot=True, cmap='coolwarm', center=0)
|
262 |
-
plt.title("Matrice de Corrélation")
|
263 |
-
st.pyplot(fig_corr)
|
264 |
|
265 |
-
if __name__ == "__main__":
|
266 |
-
app()
|
|
|
2 |
import pandas as pd
|
3 |
import numpy as np
|
4 |
import matplotlib.pyplot as plt
|
|
|
5 |
import seaborn as sns
|
6 |
from sklearn.preprocessing import LabelEncoder
|
7 |
from sklearn.ensemble import RandomForestClassifier
|
8 |
from sklearn.tree import DecisionTreeClassifier
|
9 |
from sklearn.ensemble import GradientBoostingClassifier
|
10 |
from sklearn.linear_model import LogisticRegression
|
11 |
+
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix
|
12 |
+
import plotly.express as px
|
13 |
+
import plotly.graph_objects as go
|
14 |
|
15 |
+
# Configuration de la page
|
16 |
+
st.set_page_config(layout="wide", page_title="ML Dashboard")
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
|
18 |
+
# Fonction pour charger les données
|
19 |
+
@st.cache_data
|
20 |
+
def load_data(file):
|
21 |
+
data = pd.read_csv(file)
|
22 |
+
return data
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
23 |
|
24 |
+
# Fonction pour entraîner les modèles
|
25 |
+
def train_model(X_train, y_train, model_name):
|
26 |
+
models = {
|
27 |
+
"Logistic Regression": LogisticRegression(),
|
28 |
+
"Decision Tree": DecisionTreeClassifier(),
|
29 |
+
"Random Forest": RandomForestClassifier(),
|
30 |
+
"Gradient Boost": GradientBoostingClassifier()
|
31 |
+
}
|
32 |
+
model = models[model_name]
|
33 |
+
model.fit(X_train, y_train)
|
34 |
+
return model
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
35 |
|
36 |
+
# Sidebar pour la navigation
|
37 |
+
st.sidebar.title("Navigation")
|
38 |
+
page = st.sidebar.radio("", ["📊 Vue d'ensemble", "🎯 Prédiction", "🔍 Interprétation", "⚙️ Entraînement"])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
39 |
|
40 |
+
# Charger les données par défaut
|
41 |
+
if 'data' not in st.session_state:
|
42 |
+
try:
|
43 |
+
st.session_state.data = load_data('exported_named_train_good.csv')
|
44 |
+
st.session_state.test_data = load_data('exported_named_test_good.csv')
|
45 |
+
except:
|
46 |
+
st.session_state.data = None
|
47 |
+
st.session_state.test_data = None
|
|
|
48 |
|
49 |
+
# Vue d'ensemble
|
50 |
+
if page == "📊 Vue d'ensemble":
|
51 |
+
st.title("Tableau de bord ML")
|
52 |
+
|
53 |
+
# Layout en colonnes
|
54 |
+
col1, col2 = st.columns([2, 1])
|
55 |
+
|
56 |
+
with col1:
|
57 |
+
# Upload de données
|
58 |
+
uploaded_file = st.file_uploader("Charger de nouvelles données", type=['csv'])
|
59 |
+
if uploaded_file is not None:
|
60 |
+
st.session_state.data = load_data(uploaded_file)
|
61 |
+
|
62 |
+
with col2:
|
63 |
+
# Sélection du modèle
|
64 |
+
model_name = st.selectbox(
|
65 |
+
"Sélectionner un modèle",
|
66 |
+
["Logistic Regression", "Decision Tree", "Random Forest", "Gradient Boost"]
|
67 |
+
)
|
68 |
+
|
69 |
+
if st.session_state.data is not None:
|
70 |
+
# Métriques principales
|
71 |
+
col1, col2, col3, col4, col5 = st.columns(5)
|
72 |
+
|
73 |
+
# Supposons que nous avons déjà un modèle entraîné
|
74 |
+
X = st.session_state.data.drop("Target", axis=1)
|
75 |
+
y = st.session_state.data["Target"]
|
76 |
+
model = train_model(X, y, model_name)
|
77 |
+
y_pred = model.predict(X)
|
78 |
+
|
79 |
+
with col1:
|
80 |
+
st.metric("Accuracy", f"{accuracy_score(y, y_pred):.2%}")
|
81 |
+
with col2:
|
82 |
+
st.metric("Precision", f"{precision_score(y, y_pred):.2%}")
|
83 |
+
with col3:
|
84 |
+
st.metric("Recall", f"{recall_score(y, y_pred):.2%}")
|
85 |
+
with col4:
|
86 |
+
st.metric("F1-Score", f"{f1_score(y, y_pred):.2%}")
|
87 |
+
with col5:
|
88 |
+
st.metric("ROC AUC", f"{roc_auc_score(y, y_pred):.2%}")
|
89 |
+
|
90 |
+
# Graphiques
|
91 |
+
col1, col2 = st.columns(2)
|
92 |
+
|
93 |
+
with col1:
|
94 |
+
st.subheader("Importance des features")
|
95 |
+
if hasattr(model, 'feature_importances_'):
|
96 |
+
importances = pd.DataFrame({
|
97 |
+
'feature': X.columns,
|
98 |
+
'importance': model.feature_importances_
|
99 |
+
}).sort_values('importance', ascending=True)
|
100 |
+
fig = px.bar(importances, x='importance', y='feature', orientation='h')
|
101 |
+
st.plotly_chart(fig, use_container_width=True)
|
102 |
+
|
103 |
+
with col2:
|
104 |
+
st.subheader("Matrice de confusion")
|
105 |
+
cm = confusion_matrix(y, y_pred)
|
106 |
+
fig = px.imshow(cm,
|
107 |
+
labels=dict(x="Prédit", y="Réel"),
|
108 |
+
text=cm)
|
109 |
+
st.plotly_chart(fig, use_container_width=True)
|
110 |
|
111 |
+
elif page == "🎯 Prédiction":
|
112 |
+
st.title("Prédiction")
|
113 |
+
|
114 |
+
if st.session_state.data is not None:
|
115 |
+
X = st.session_state.data.drop("Target", axis=1)
|
116 |
+
|
117 |
+
# Interface de prédiction
|
118 |
+
st.subheader("Entrer les valeurs pour la prédiction")
|
119 |
+
|
120 |
+
input_values = {}
|
121 |
+
cols = st.columns(3)
|
122 |
+
for idx, feature in enumerate(X.columns):
|
123 |
+
with cols[idx % 3]:
|
124 |
+
if X[feature].dtype == 'object':
|
125 |
+
input_values[feature] = st.selectbox(
|
126 |
+
f"{feature}",
|
127 |
+
options=X[feature].unique()
|
128 |
+
)
|
129 |
+
else:
|
130 |
+
input_values[feature] = st.number_input(
|
131 |
+
f"{feature}",
|
132 |
+
value=float(X[feature].mean())
|
133 |
+
)
|
134 |
+
|
135 |
+
if st.button("Prédire"):
|
136 |
+
model = train_model(X, st.session_state.data["Target"], "Random Forest")
|
137 |
+
pred = model.predict_proba(pd.DataFrame([input_values]))
|
138 |
+
|
139 |
+
st.subheader("Résultat de la prédiction")
|
140 |
+
proba_df = pd.DataFrame({
|
141 |
+
'Classe': ['0', '1'],
|
142 |
+
'Probabilité': pred[0]
|
143 |
+
})
|
144 |
+
fig = px.bar(proba_df, x='Classe', y='Probabilité')
|
145 |
+
st.plotly_chart(fig)
|
146 |
|
147 |
+
elif page == "🔍 Interprétation":
|
148 |
+
st.title("Interprétation du modèle")
|
149 |
+
|
150 |
+
if st.session_state.data is not None:
|
151 |
+
# SHAP values ou autres méthodes d'interprétation
|
152 |
+
st.subheader("Analyse des features")
|
153 |
+
X = st.session_state.data.drop("Target", axis=1)
|
154 |
+
y = st.session_state.data["Target"]
|
155 |
+
|
156 |
+
feature_1 = st.selectbox("Sélectionner la première feature", X.columns)
|
157 |
+
feature_2 = st.selectbox("Sélectionner la deuxième feature", X.columns)
|
158 |
+
|
159 |
+
fig = px.scatter(st.session_state.data,
|
160 |
+
x=feature_1,
|
161 |
+
y=feature_2,
|
162 |
+
color='Target',
|
163 |
+
title=f"Relation entre {feature_1} et {feature_2}")
|
164 |
+
st.plotly_chart(fig)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
165 |
|
166 |
+
elif page == "⚙️ Entraînement":
|
167 |
+
st.title("Entraînement du modèle")
|
168 |
+
|
169 |
+
if st.session_state.data is not None:
|
170 |
+
# Options d'entraînement
|
171 |
+
model_name = st.selectbox(
|
172 |
+
"Sélectionner le modèle à entraîner",
|
173 |
+
["Logistic Regression", "Decision Tree", "Random Forest", "Gradient Boost"]
|
174 |
+
)
|
175 |
+
|
176 |
+
# Paramètres du modèle
|
177 |
+
st.subheader("Paramètres du modèle")
|
178 |
+
if model_name == "Random Forest":
|
179 |
+
n_estimators = st.slider("Nombre d'arbres", 10, 200, 100)
|
180 |
+
max_depth = st.slider("Profondeur maximale", 1, 20, 10)
|
181 |
+
|
182 |
+
if st.button("Entraîner le modèle"):
|
183 |
+
with st.spinner("Entraînement en cours..."):
|
184 |
+
X = st.session_state.data.drop("Target", axis=1)
|
185 |
+
y = st.session_state.data["Target"]
|
186 |
+
model = train_model(X, y, model_name)
|
187 |
+
st.success("Modèle entraîné avec succès!")
|
188 |
+
|
189 |
+
# Afficher les métriques
|
190 |
+
y_pred = model.predict(X)
|
191 |
+
col1, col2, col3 = st.columns(3)
|
192 |
+
with col1:
|
193 |
+
st.metric("Accuracy", f"{accuracy_score(y, y_pred):.2%}")
|
194 |
+
with col2:
|
195 |
+
st.metric("Precision", f"{precision_score(y, y_pred):.2%}")
|
196 |
+
with col3:
|
197 |
+
st.metric("Recall", f"{recall_score(y, y_pred):.2%}")
|
198 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
199 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
200 |
|
|
|
|