|
import streamlit as st |
|
import pandas as pd |
|
import plotly.express as px |
|
import plotly.graph_objects as go |
|
from comparison import ModelEvaluator, ModelComparison |
|
import matplotlib.pyplot as plt |
|
import seaborn as sns |
|
import io |
|
import os |
|
import base64 |
|
|
|
|
|
st.set_page_config( |
|
page_title="Nexar Driving Leaderboard", |
|
page_icon="nexar_logo.png", |
|
layout="wide" |
|
) |
|
|
|
|
|
st.markdown(""" |
|
<style> |
|
.main { padding: 2rem; } |
|
.stTabs [data-baseweb="tab-list"] { gap: 8px; } |
|
.stTabs [data-baseweb="tab"] { |
|
padding: 8px 16px; |
|
border-radius: 4px; |
|
} |
|
.metric-card { |
|
background-color: #f8f9fa; |
|
padding: 20px; |
|
border-radius: 10px; |
|
box-shadow: 0 2px 4px rgba(0,0,0,0.1); |
|
} |
|
</style> |
|
""", unsafe_allow_html=True) |
|
|
|
|
|
col1, col2 = st.columns([0.15, 0.85]) |
|
with col1: |
|
st.image("nexar_logo.png", width=600) |
|
with col2: |
|
st.title("Driving Leaderboard") |
|
|
|
|
|
@st.cache_data |
|
def load_data(directory='results', labels_filename='Labels.csv'): |
|
labels_path = os.path.join(directory, labels_filename) |
|
df_labels = pd.read_csv(labels_path) |
|
|
|
evaluators = [] |
|
for filename in os.listdir(directory): |
|
if filename.endswith('.csv') and filename != labels_filename: |
|
model_name = os.path.splitext(filename)[0] |
|
df_model = pd.read_csv(os.path.join(directory, filename)) |
|
evaluator = ModelEvaluator(df_labels, df_model, model_name) |
|
evaluators.append(evaluator) |
|
|
|
model_comparison = ModelComparison(evaluators) |
|
return model_comparison |
|
|
|
|
|
if 'model_comparison' not in st.session_state: |
|
st.session_state.model_comparison = load_data() |
|
st.session_state.leaderboard_df = st.session_state.model_comparison.transform_to_leaderboard() |
|
st.session_state.combined_df = st.session_state.model_comparison.combined_df |
|
|
|
|
|
tab1, tab2, tab3, tab4 = st.tabs([ |
|
"π Leaderboard", |
|
"π Class Performance", |
|
"π Detailed Metrics", |
|
"βοΈ Model Comparison" |
|
]) |
|
|
|
def style_dataframe(df, highlight_first_column=True, show_progress_bars=True): |
|
numeric_cols = df.select_dtypes(include=['float64']).columns |
|
|
|
def color_background(val): |
|
"""Return background color style based on value""" |
|
return f'background-color: rgba({int(255 * (1 - val))}, {int(255 * val)}, 0, 0.2)' |
|
|
|
def apply_colors_to_series(s): |
|
"""Apply color gradient to a series of values""" |
|
if len(s) == 0: |
|
return [] |
|
normalized = (s - s.min()) / (s.max() - s.min()) if s.max() != s.min() else [0.5] * len(s) |
|
return [color_background(val) for val in normalized] |
|
|
|
styled = df.style.format({col: '{:.2f}%' for col in numeric_cols}) |
|
|
|
|
|
if highlight_first_column and len(numeric_cols) > 0: |
|
first_numeric_col = numeric_cols[0] |
|
styled = styled.apply(lambda x: [ |
|
'background-color: rgba(74, 144, 226, 0.2)' if col == first_numeric_col else '' |
|
for col in df.columns |
|
], axis=1) |
|
|
|
|
|
if show_progress_bars: |
|
for col in numeric_cols: |
|
styled = styled.apply(lambda s: apply_colors_to_series(s), subset=[col]) |
|
|
|
styled = styled.set_properties(**{ |
|
'padding': '10px', |
|
'border': '1px solid #dee2e6', |
|
'text-align': 'center' |
|
}) |
|
|
|
styled = styled.set_table_styles([ |
|
{'selector': 'th', 'props': [ |
|
('background-color', '#4a90e2'), |
|
('color', 'white'), |
|
('font-weight', 'bold'), |
|
('padding', '10px'), |
|
('text-align', 'center') |
|
]}, |
|
{'selector': 'tr:hover', 'props': [ |
|
('background-color', '#edf2f7') |
|
]} |
|
]) |
|
|
|
return styled |
|
|
|
def style_comparison_dataframe(df): |
|
"""Style dataframe specifically for model comparison tables""" |
|
|
|
numeric_cols = df.select_dtypes(include=['float64']).columns |
|
|
|
styled = df.style.format({col: '{:.2f}%' for col in numeric_cols}) |
|
|
|
def color_difference(x): |
|
"""Color the difference column from red to green""" |
|
if pd.isna(x): |
|
return '' |
|
|
|
normalized = max(min(x / 10, 1), -1) |
|
if normalized > 0: |
|
return f'background-color: rgba(0, 128, 0, {abs(normalized) * 0.3})' |
|
else: |
|
return f'background-color: rgba(255, 0, 0, {abs(normalized) * 0.3})' |
|
|
|
|
|
if 'Difference' in df.columns: |
|
styled = styled.applymap(color_difference, subset=['Difference']) |
|
|
|
styled = styled.set_properties(**{ |
|
'padding': '10px', |
|
'border': '1px solid #dee2e6', |
|
'text-align': 'center' |
|
}) |
|
|
|
styled = styled.set_table_styles([ |
|
{'selector': 'th', 'props': [ |
|
('background-color', '#4a90e2'), |
|
('color', 'white'), |
|
('font-weight', 'bold'), |
|
('padding', '10px'), |
|
('text-align', 'center') |
|
]}, |
|
{'selector': 'tr:hover', 'props': [ |
|
('background-color', '#edf2f7') |
|
]} |
|
]) |
|
|
|
return styled |
|
|
|
|
|
with tab1: |
|
|
|
st.subheader("Model Performance Leaderboard") |
|
|
|
st.markdown(""" |
|
**Welcome to the Nexar Driving Leaderboard!** |
|
|
|
This dashboard compares the performance of various AI models in detecting driving incidents. |
|
The models are evaluated based on key metrics such as F1 Score, Precision, and Recall. |
|
You can sort the table by different metrics using the dropdown menu. |
|
""") |
|
|
|
st.markdown(""" |
|
The table below ranks models based on their ability to detect driving events. |
|
Use the dropdown below to sort by a specific metric. |
|
""") |
|
|
|
sort_col = st.selectbox( |
|
"Sort by metric:", |
|
options=[col for col in st.session_state.leaderboard_df.columns if col not in ['Rank', 'Model']], |
|
key='leaderboard_sort' |
|
) |
|
|
|
sorted_df = st.session_state.leaderboard_df.sort_values(by=sort_col, ascending=False) |
|
|
|
st.dataframe( |
|
style_dataframe(sorted_df), |
|
use_container_width=True, |
|
) |
|
|
|
metrics = ['F1 Score', 'Precision', 'Recall'] |
|
selected_metric = st.selectbox("Select Metric for Category Analysis:", metrics) |
|
|
|
category_data = st.session_state.combined_df[ |
|
st.session_state.combined_df['Class'].str.contains('Overall') |
|
] |
|
|
|
fig = px.bar( |
|
category_data, |
|
x='Category', |
|
y=selected_metric, |
|
color='Model', |
|
barmode='group', |
|
title=f'Category-level {selected_metric} by Model', |
|
) |
|
|
|
fig.update_layout( |
|
xaxis_title="Category", |
|
yaxis_title=selected_metric, |
|
legend_title="Model" |
|
) |
|
|
|
st.plotly_chart(fig, use_container_width=True) |
|
|
|
|
|
with tab2: |
|
st.subheader("Class-Level Performance Analysis") |
|
|
|
st.markdown(""" |
|
This section provides a detailed breakdown of model performance across specific event classes. |
|
You can select a category, metric, and models to compare their effectiveness in recognizing |
|
different types of driving incidents. |
|
""") |
|
|
|
categories = st.session_state.combined_df['Category'].unique() |
|
|
|
col1, col2, col3 = st.columns(3) |
|
with col1: |
|
selected_category = st.selectbox( |
|
"Select Category:", |
|
categories, |
|
key='class_category' |
|
) |
|
with col2: |
|
selected_metric = st.selectbox( |
|
"Select Metric:", |
|
metrics, |
|
key='class_metric' |
|
) |
|
with col3: |
|
selected_models = st.multiselect( |
|
"Select Models:", |
|
st.session_state.combined_df['Model'].unique(), |
|
default=st.session_state.combined_df['Model'].unique() |
|
) |
|
|
|
|
|
plotly_colors = ['#636EFA', '#EF553B', '#00CC96', '#AB63FA', '#FFA15A', '#19D3F3', '#FF6692', '#B6E880', '#FF97FF', '#FECB52'] |
|
model_colors = {model: plotly_colors[i % len(plotly_colors)] for i, model in enumerate(sorted(st.session_state.combined_df['Model'].unique()))} |
|
|
|
class_data = st.session_state.combined_df[ |
|
(st.session_state.combined_df['Category'] == selected_category) & |
|
(~st.session_state.combined_df['Class'].str.contains('Overall')) & |
|
(st.session_state.combined_df['Model'].isin(selected_models)) |
|
] |
|
|
|
|
|
fig = px.bar( |
|
class_data, |
|
x='Class', |
|
y=selected_metric, |
|
color='Model', |
|
barmode='group', |
|
title=f'{selected_metric} by Class for {selected_category}', |
|
color_discrete_map=model_colors, |
|
range_y=[0, 1] if selected_metric in ['F1 Score', 'Precision', 'Recall'] else None |
|
) |
|
st.plotly_chart(fig, use_container_width=True) |
|
|
|
|
|
models_per_row = 4 |
|
num_rows = (len(selected_models) + models_per_row - 1) // models_per_row |
|
|
|
st.markdown("### Select Models to Display:") |
|
|
|
|
|
for row in range(num_rows): |
|
cols = st.columns(models_per_row) |
|
for col_idx in range(models_per_row): |
|
model_idx = row * models_per_row + col_idx |
|
if model_idx < len(selected_models): |
|
model = selected_models[model_idx] |
|
container = cols[col_idx].container() |
|
|
|
|
|
color = model_colors[model] |
|
|
|
|
|
toggle_key = f"toggle_{model}" |
|
if toggle_key not in st.session_state: |
|
st.session_state[toggle_key] = True |
|
|
|
|
|
container.markdown( |
|
f""" |
|
<div style='display: flex; align-items: center; margin-bottom: -40px; pointer-events: none;'> |
|
<span style='display: inline-block; width: 12px; height: 12px; background-color: {color}; border-radius: 50%; margin-right: 8px;'></span> |
|
</div> |
|
""", |
|
unsafe_allow_html=True |
|
) |
|
|
|
|
|
container.checkbox( |
|
f" {model}", |
|
value=st.session_state[toggle_key], |
|
key=toggle_key |
|
) |
|
|
|
|
|
unique_classes = class_data['Class'].unique() |
|
num_classes = len(unique_classes) |
|
|
|
|
|
num_rows = (num_classes + 2) // 3 |
|
|
|
|
|
for row in range(num_rows): |
|
cols = st.columns(3) |
|
for col_idx in range(3): |
|
class_idx = row * 3 + col_idx |
|
if class_idx < num_classes: |
|
current_class = unique_classes[class_idx] |
|
|
|
|
|
visible_models = [model for model in selected_models |
|
if st.session_state[f"toggle_{model}"]] |
|
|
|
class_specific_data = class_data[ |
|
(class_data['Class'] == current_class) & |
|
(class_data['Model'].isin(visible_models)) |
|
] |
|
|
|
fig = px.scatter( |
|
class_specific_data, |
|
x='Precision', |
|
y='Recall', |
|
color='Model', |
|
title=f'Precision vs Recall: {current_class}', |
|
height=300, |
|
color_discrete_map=model_colors |
|
) |
|
|
|
|
|
fig.update_layout( |
|
xaxis_range=[0, 1], |
|
yaxis_range=[0, 1], |
|
margin=dict(l=40, r=40, t=40, b=40), |
|
showlegend=False |
|
) |
|
|
|
|
|
fig.add_trace( |
|
go.Scatter( |
|
x=[0, 1], |
|
y=[0, 1], |
|
mode='lines', |
|
line=dict(dash='dash', color='gray'), |
|
showlegend=False |
|
) |
|
) |
|
|
|
cols[col_idx].plotly_chart(fig, use_container_width=True) |
|
|
|
|
|
with tab3: |
|
st.subheader("Detailed Metrics Analysis") |
|
|
|
selected_model = st.selectbox( |
|
"Select Model for Detailed Analysis:", |
|
st.session_state.combined_df['Model'].unique() |
|
) |
|
|
|
model_data = st.session_state.combined_df[ |
|
st.session_state.combined_df['Model'] == selected_model |
|
] |
|
|
|
|
|
st.markdown("### Performance Metrics by Category") |
|
|
|
|
|
categories = model_data['Category'].unique() |
|
metrics = ['F1 Score', 'Precision', 'Recall'] |
|
|
|
|
|
for category in categories: |
|
st.markdown(f"#### {category}") |
|
|
|
|
|
category_data = model_data[model_data['Category'] == category].copy() |
|
|
|
|
|
category_metrics = pd.DataFrame() |
|
|
|
|
|
classes = category_data[~category_data['Class'].str.contains('Overall')]['Class'].unique() |
|
|
|
|
|
overall_data = category_data[category_data['Class'].str.contains('Overall')] |
|
|
|
|
|
category_metrics = pd.DataFrame(index=classes) |
|
|
|
|
|
for metric in metrics: |
|
|
|
class_metrics = {} |
|
for class_name in classes: |
|
class_data = category_data[category_data['Class'] == class_name] |
|
if not class_data.empty: |
|
class_metrics[class_name] = class_data[metric].iloc[0] |
|
|
|
category_metrics[metric] = pd.Series(class_metrics) |
|
|
|
|
|
if not overall_data.empty: |
|
overall_row = pd.DataFrame({ |
|
metric: [overall_data[metric].iloc[0]] for metric in metrics |
|
}, index=['Overall']) |
|
category_metrics = pd.concat([overall_row, category_metrics]) |
|
|
|
|
|
styled_metrics = style_dataframe(category_metrics.round(4)) |
|
st.dataframe(styled_metrics, use_container_width=True) |
|
|
|
|
|
st.markdown("---") |
|
|
|
|
|
st.markdown("### Export Data") |
|
|
|
|
|
export_data = pd.DataFrame() |
|
for category in categories: |
|
category_data = model_data[model_data['Category'] == category].copy() |
|
category_metrics = pd.pivot_table( |
|
category_data, |
|
index='Class', |
|
values=metrics, |
|
aggfunc='first' |
|
).round(4) |
|
export_data = pd.concat([export_data, category_metrics]) |
|
|
|
|
|
csv = export_data.to_csv().encode() |
|
st.download_button( |
|
"Download Detailed Metrics", |
|
csv, |
|
f"detailed_metrics_{selected_model}.csv", |
|
"text/csv", |
|
key='download-csv' |
|
) |
|
|
|
|
|
with tab4: |
|
st.header("Model Comparison Analysis") |
|
|
|
st.markdown(""" |
|
Compare two models side by side across different categories. |
|
The bar chart visualizes the differences in performance across selected categories, |
|
while the scatter plot provides an overview of Precision vs. Recall per class. |
|
""") |
|
|
|
|
|
col1, col2 = st.columns(2) |
|
|
|
|
|
with col1: |
|
model1 = st.selectbox( |
|
"Select First Model:", |
|
st.session_state.combined_df['Model'].unique(), |
|
key='model1' |
|
) |
|
|
|
with col2: |
|
|
|
available_models = [m for m in st.session_state.combined_df['Model'].unique() if m != model1] |
|
model2 = st.selectbox( |
|
"Select Second Model:", |
|
available_models, |
|
key='model2' |
|
) |
|
|
|
|
|
selected_category = st.selectbox( |
|
"Select Category for Comparison:", |
|
st.session_state.combined_df['Category'].unique(), |
|
key='compare_category' |
|
) |
|
|
|
|
|
model1_data = st.session_state.combined_df[ |
|
(st.session_state.combined_df['Model'] == model1) & |
|
(st.session_state.combined_df['Category'] == selected_category) |
|
] |
|
|
|
model2_data = st.session_state.combined_df[ |
|
(st.session_state.combined_df['Model'] == model2) & |
|
(st.session_state.combined_df['Category'] == selected_category) |
|
] |
|
|
|
|
|
metrics = ['F1 Score', 'Precision', 'Recall'] |
|
|
|
|
|
st.subheader("Detailed Metrics Comparison") |
|
|
|
|
|
for metric in metrics: |
|
st.markdown(f"#### {metric} Comparison") |
|
|
|
|
|
metric_data = [] |
|
for class_name in model1_data['Class'].unique(): |
|
|
|
m1_value = model1_data[model1_data['Class'] == class_name][metric].iloc[0] |
|
m2_value = model2_data[model2_data['Class'] == class_name][metric].iloc[0] |
|
diff = m1_value - m2_value |
|
|
|
|
|
metric_data.append({ |
|
'Class': class_name, |
|
model1: m1_value, |
|
model2: m2_value, |
|
'Difference': diff |
|
}) |
|
|
|
|
|
metric_df = pd.DataFrame(metric_data) |
|
|
|
|
|
def style_metric_table(df): |
|
return df.style\ |
|
.format({ |
|
model1: '{:.2f}%', |
|
model2: '{:.2f}%', |
|
'Difference': '{:+.2f}%' |
|
})\ |
|
.background_gradient( |
|
cmap='RdYlGn', |
|
subset=['Difference'], |
|
vmin=-10, |
|
vmax=10 |
|
)\ |
|
.set_properties(**{ |
|
'text-align': 'center', |
|
'padding': '10px', |
|
'border': '1px solid #dee2e6' |
|
})\ |
|
.set_table_styles([ |
|
{'selector': 'th', 'props': [ |
|
('background-color', '#4a90e2'), |
|
('color', 'white'), |
|
('font-weight', 'bold'), |
|
('text-align', 'center'), |
|
('padding', '10px') |
|
]} |
|
]) |
|
|
|
|
|
def color_negative_positive(val): |
|
try: |
|
color = 'green' if float(val) > 0 else 'red' if float(val) < 0 else 'black' |
|
return f'color: {color}' |
|
except: |
|
return '' |
|
|
|
styled_df = metric_df.style\ |
|
.applymap(color_negative_positive)\ |
|
.format(precision=2) |
|
|
|
st.dataframe(styled_df, use_container_width=True) |
|
|
|
|
|
st.markdown("---") |
|
|
|
|
|
st.subheader("Visual Performance Analysis") |
|
|
|
|
|
selected_metric = st.selectbox( |
|
"Select Metric for Comparison:", |
|
metrics, |
|
key='compare_metric' |
|
) |
|
|
|
|
|
comparison_data = pd.DataFrame() |
|
|
|
|
|
for idx, (model_name, model_data) in enumerate([(model1, model1_data), (model2, model2_data)]): |
|
|
|
model_metrics = model_data[~model_data['Class'].str.contains('Overall', na=False)][['Class', selected_metric]] |
|
model_metrics = model_metrics.rename(columns={selected_metric: model_name}) |
|
|
|
|
|
if idx == 0: |
|
comparison_data = model_metrics |
|
else: |
|
comparison_data = comparison_data.merge(model_metrics, on='Class', how='outer') |
|
|
|
|
|
fig_bar = go.Figure() |
|
|
|
|
|
fig_bar.add_trace(go.Bar( |
|
name=model1, |
|
x=comparison_data['Class'], |
|
y=comparison_data[model1], |
|
marker_color='rgb(55, 83, 109)' |
|
)) |
|
|
|
|
|
fig_bar.add_trace(go.Bar( |
|
name=model2, |
|
x=comparison_data['Class'], |
|
y=comparison_data[model2], |
|
marker_color='rgb(26, 118, 255)' |
|
)) |
|
|
|
|
|
fig_bar.update_layout( |
|
title=f"{selected_metric} Comparison by Class", |
|
xaxis_title="Class", |
|
yaxis_title=f"{selected_metric} (%)", |
|
barmode='group', |
|
xaxis_tickangle=-45, |
|
height=500, |
|
showlegend=True, |
|
legend=dict( |
|
yanchor="top", |
|
y=0.99, |
|
xanchor="right", |
|
x=0.99 |
|
), |
|
yaxis=dict(range=[0, 1]) |
|
) |
|
|
|
|
|
st.plotly_chart(fig_bar, use_container_width=True) |
|
|
|
|
|
st.markdown("#### Precision-Recall Analysis") |
|
|
|
|
|
model1_scatter = model1_data[~model1_data['Class'].str.contains('Overall', na=False)] |
|
model2_scatter = model2_data[~model2_data['Class'].str.contains('Overall', na=False)] |
|
|
|
|
|
fig_scatter = go.Figure() |
|
|
|
|
|
fig_scatter.add_trace(go.Scatter( |
|
x=model1_scatter['Precision']*100, |
|
y=model1_scatter['Recall']*100, |
|
mode='markers+text', |
|
name=model1, |
|
text=model1_scatter['Class'], |
|
textposition="top center", |
|
marker=dict(size=10) |
|
)) |
|
|
|
|
|
fig_scatter.add_trace(go.Scatter( |
|
x=model2_scatter['Precision']*100, |
|
y=model2_scatter['Recall']*100, |
|
mode='markers+text', |
|
name=model2, |
|
text=model2_scatter['Class'], |
|
textposition="top center", |
|
marker=dict(size=10) |
|
)) |
|
|
|
|
|
fig_scatter.add_trace(go.Scatter( |
|
x=[0, 100], |
|
y=[0, 100], |
|
mode='lines', |
|
line=dict(dash='dash', color='gray'), |
|
showlegend=False |
|
)) |
|
|
|
|
|
fig_scatter.update_layout( |
|
title="Precision vs Recall Analysis by Class", |
|
xaxis_title="Precision (%)", |
|
yaxis_title="Recall (%)", |
|
xaxis=dict(range=[0, 100]), |
|
yaxis=dict(range=[0, 100]), |
|
height=600, |
|
showlegend=True, |
|
legend=dict( |
|
yanchor="top", |
|
y=0.99, |
|
xanchor="right", |
|
x=0.99 |
|
) |
|
) |
|
|
|
|
|
st.plotly_chart(fig_scatter, use_container_width=True) |
|
|
|
|
|
|
|
st.markdown("---") |
|
st.markdown("Dashboard created for model evaluation and comparison") |
|
st.markdown("Β© 2024 Nexar") |