Roni Goldshmidt
Initial leaderboard setup
a7bc2a5
import streamlit as st
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from comparison import ModelEvaluator, ModelComparison
import matplotlib.pyplot as plt
import seaborn as sns
import io
import os
import base64
# Page config
st.set_page_config(
page_title="Nexar Driving Leaderboard",
page_icon="nexar_logo.png",
layout="wide"
)
# Custom styling
st.markdown("""
<style>
.main { padding: 2rem; }
.stTabs [data-baseweb="tab-list"] { gap: 8px; }
.stTabs [data-baseweb="tab"] {
padding: 8px 16px;
border-radius: 4px;
}
.metric-card {
background-color: #f8f9fa;
padding: 20px;
border-radius: 10px;
box-shadow: 0 2px 4px rgba(0,0,0,0.1);
}
</style>
""", unsafe_allow_html=True)
# Header
col1, col2 = st.columns([0.15, 0.85])
with col1:
st.image("nexar_logo.png", width=600)
with col2:
st.title("Driving Leaderboard")
# Data loading function
@st.cache_data
def load_data(directory='results', labels_filename='Labels.csv'):
labels_path = os.path.join(directory, labels_filename)
df_labels = pd.read_csv(labels_path)
evaluators = []
for filename in os.listdir(directory):
if filename.endswith('.csv') and filename != labels_filename:
model_name = os.path.splitext(filename)[0]
df_model = pd.read_csv(os.path.join(directory, filename))
evaluator = ModelEvaluator(df_labels, df_model, model_name)
evaluators.append(evaluator)
model_comparison = ModelComparison(evaluators)
return model_comparison
# Initialize session state
if 'model_comparison' not in st.session_state:
st.session_state.model_comparison = load_data()
st.session_state.leaderboard_df = st.session_state.model_comparison.transform_to_leaderboard()
st.session_state.combined_df = st.session_state.model_comparison.combined_df
# Create tabs
tab1, tab2, tab3, tab4 = st.tabs([
"πŸ“ˆ Leaderboard",
"πŸ“Š Class Performance",
"πŸ” Detailed Metrics",
"βš–οΈ Model Comparison"
])
def style_dataframe(df, highlight_first_column=True, show_progress_bars=True):
numeric_cols = df.select_dtypes(include=['float64']).columns
def color_background(val):
"""Return background color style based on value"""
return f'background-color: rgba({int(255 * (1 - val))}, {int(255 * val)}, 0, 0.2)'
def apply_colors_to_series(s):
"""Apply color gradient to a series of values"""
if len(s) == 0:
return []
normalized = (s - s.min()) / (s.max() - s.min()) if s.max() != s.min() else [0.5] * len(s)
return [color_background(val) for val in normalized]
styled = df.style.format({col: '{:.2f}%' for col in numeric_cols})
# First apply highlighting to first column if needed
if highlight_first_column and len(numeric_cols) > 0:
first_numeric_col = numeric_cols[0]
styled = styled.apply(lambda x: [
'background-color: rgba(74, 144, 226, 0.2)' if col == first_numeric_col else ''
for col in df.columns
], axis=1)
# Then apply color gradients if needed
if show_progress_bars:
for col in numeric_cols:
styled = styled.apply(lambda s: apply_colors_to_series(s), subset=[col])
styled = styled.set_properties(**{
'padding': '10px',
'border': '1px solid #dee2e6',
'text-align': 'center'
})
styled = styled.set_table_styles([
{'selector': 'th', 'props': [
('background-color', '#4a90e2'),
('color', 'white'),
('font-weight', 'bold'),
('padding', '10px'),
('text-align', 'center')
]},
{'selector': 'tr:hover', 'props': [
('background-color', '#edf2f7')
]}
])
return styled
def style_comparison_dataframe(df):
"""Style dataframe specifically for model comparison tables"""
# Format all numeric columns as percentages
numeric_cols = df.select_dtypes(include=['float64']).columns
styled = df.style.format({col: '{:.2f}%' for col in numeric_cols})
def color_difference(x):
"""Color the difference column from red to green"""
if pd.isna(x):
return ''
# Normalize the value to a -1 to 1 scale for coloring
normalized = max(min(x / 10, 1), -1) # Scale of Β±10%
if normalized > 0:
return f'background-color: rgba(0, 128, 0, {abs(normalized) * 0.3})'
else:
return f'background-color: rgba(255, 0, 0, {abs(normalized) * 0.3})'
# Apply color gradient only to the 'Difference' column
if 'Difference' in df.columns:
styled = styled.applymap(color_difference, subset=['Difference'])
styled = styled.set_properties(**{
'padding': '10px',
'border': '1px solid #dee2e6',
'text-align': 'center'
})
styled = styled.set_table_styles([
{'selector': 'th', 'props': [
('background-color', '#4a90e2'),
('color', 'white'),
('font-weight', 'bold'),
('padding', '10px'),
('text-align', 'center')
]},
{'selector': 'tr:hover', 'props': [
('background-color', '#edf2f7')
]}
])
return styled
# Tab 1: Leaderboard
with tab1:
st.subheader("Model Performance Leaderboard")
st.markdown("""
**Welcome to the Nexar Driving Leaderboard!**
This dashboard compares the performance of various AI models in detecting driving incidents.
The models are evaluated based on key metrics such as F1 Score, Precision, and Recall.
You can sort the table by different metrics using the dropdown menu.
""")
st.markdown("""
The table below ranks models based on their ability to detect driving events.
Use the dropdown below to sort by a specific metric.
""")
sort_col = st.selectbox(
"Sort by metric:",
options=[col for col in st.session_state.leaderboard_df.columns if col not in ['Rank', 'Model']],
key='leaderboard_sort'
)
sorted_df = st.session_state.leaderboard_df.sort_values(by=sort_col, ascending=False)
st.dataframe(
style_dataframe(sorted_df),
use_container_width=True,
)
metrics = ['F1 Score', 'Precision', 'Recall']
selected_metric = st.selectbox("Select Metric for Category Analysis:", metrics)
category_data = st.session_state.combined_df[
st.session_state.combined_df['Class'].str.contains('Overall')
]
fig = px.bar(
category_data,
x='Category',
y=selected_metric,
color='Model',
barmode='group',
title=f'Category-level {selected_metric} by Model',
)
fig.update_layout(
xaxis_title="Category",
yaxis_title=selected_metric,
legend_title="Model"
)
st.plotly_chart(fig, use_container_width=True)
# Tab 2: Class Performance
with tab2:
st.subheader("Class-Level Performance Analysis")
st.markdown("""
This section provides a detailed breakdown of model performance across specific event classes.
You can select a category, metric, and models to compare their effectiveness in recognizing
different types of driving incidents.
""")
categories = st.session_state.combined_df['Category'].unique()
col1, col2, col3 = st.columns(3)
with col1:
selected_category = st.selectbox(
"Select Category:",
categories,
key='class_category'
)
with col2:
selected_metric = st.selectbox(
"Select Metric:",
metrics,
key='class_metric'
)
with col3:
selected_models = st.multiselect(
"Select Models:",
st.session_state.combined_df['Model'].unique(),
default=st.session_state.combined_df['Model'].unique()
)
# Create a consistent color mapping for all models
plotly_colors = ['#636EFA', '#EF553B', '#00CC96', '#AB63FA', '#FFA15A', '#19D3F3', '#FF6692', '#B6E880', '#FF97FF', '#FECB52']
model_colors = {model: plotly_colors[i % len(plotly_colors)] for i, model in enumerate(sorted(st.session_state.combined_df['Model'].unique()))}
class_data = st.session_state.combined_df[
(st.session_state.combined_df['Category'] == selected_category) &
(~st.session_state.combined_df['Class'].str.contains('Overall')) &
(st.session_state.combined_df['Model'].isin(selected_models))
]
# Bar chart with consistent colors
fig = px.bar(
class_data,
x='Class',
y=selected_metric,
color='Model',
barmode='group',
title=f'{selected_metric} by Class for {selected_category}',
color_discrete_map=model_colors,
range_y=[0, 1] if selected_metric in ['F1 Score', 'Precision', 'Recall'] else None
)
st.plotly_chart(fig, use_container_width=True)
# Calculate how many columns we need (aim for about 4-5 models per row)
models_per_row = 4
num_rows = (len(selected_models) + models_per_row - 1) // models_per_row
st.markdown("### Select Models to Display:")
# Create toggles for models using st.columns
for row in range(num_rows):
cols = st.columns(models_per_row)
for col_idx in range(models_per_row):
model_idx = row * models_per_row + col_idx
if model_idx < len(selected_models):
model = selected_models[model_idx]
container = cols[col_idx].container()
# Get the consistent color for this model
color = model_colors[model]
# Initialize toggle state if needed
toggle_key = f"toggle_{model}"
if toggle_key not in st.session_state:
st.session_state[toggle_key] = True
# Create colored legend item with HTML
container.markdown(
f"""
<div style='display: flex; align-items: center; margin-bottom: -40px; pointer-events: none;'>
<span style='display: inline-block; width: 12px; height: 12px; background-color: {color}; border-radius: 50%; margin-right: 8px;'></span>
</div>
""",
unsafe_allow_html=True
)
# Create the checkbox without reassigning to session state
container.checkbox(
f" {model}", # Add some spacing to account for the circle
value=st.session_state[toggle_key],
key=toggle_key # Use toggle_key directly as the key
)
# Individual Precision-Recall plots for each class
unique_classes = class_data['Class'].unique()
num_classes = len(unique_classes)
# Calculate number of rows needed (3 plots per row)
num_rows = (num_classes + 2) // 3 # Using ceiling division
# Create plots row by row
for row in range(num_rows):
cols = st.columns(3)
for col_idx in range(3):
class_idx = row * 3 + col_idx
if class_idx < num_classes:
current_class = unique_classes[class_idx]
# Filter data based on visible models
visible_models = [model for model in selected_models
if st.session_state[f"toggle_{model}"]]
class_specific_data = class_data[
(class_data['Class'] == current_class) &
(class_data['Model'].isin(visible_models))
]
fig = px.scatter(
class_specific_data,
x='Precision',
y='Recall',
color='Model',
title=f'Precision vs Recall: {current_class}',
height=300,
color_discrete_map=model_colors # Use consistent colors
)
# Update layout for better visibility
fig.update_layout(
xaxis_range=[0, 1],
yaxis_range=[0, 1],
margin=dict(l=40, r=40, t=40, b=40),
showlegend=False # Hide individual legends
)
# Add diagonal reference line
fig.add_trace(
go.Scatter(
x=[0, 1],
y=[0, 1],
mode='lines',
line=dict(dash='dash', color='gray'),
showlegend=False
)
)
cols[col_idx].plotly_chart(fig, use_container_width=True)
# Tab 3: Detailed Metrics
with tab3:
st.subheader("Detailed Metrics Analysis")
selected_model = st.selectbox(
"Select Model for Detailed Analysis:",
st.session_state.combined_df['Model'].unique()
)
model_data = st.session_state.combined_df[
st.session_state.combined_df['Model'] == selected_model
]
# Create metrics tables
st.markdown("### Performance Metrics by Category")
# Get unique categories and relevant classes for each category
categories = model_data['Category'].unique()
metrics = ['F1 Score', 'Precision', 'Recall']
# Process data for each category
for category in categories:
st.markdown(f"#### {category}")
# Filter data for this category
category_data = model_data[model_data['Category'] == category].copy()
# Create a clean table for this category
category_metrics = pd.DataFrame()
# Get classes for this category (excluding 'Overall' prefix)
classes = category_data[~category_data['Class'].str.contains('Overall')]['Class'].unique()
# Add the overall metric for this category
overall_data = category_data[category_data['Class'].str.contains('Overall')]
# Initialize the DataFrame with classes as index
category_metrics = pd.DataFrame(index=classes)
# Add metrics columns
for metric in metrics:
# Add class-specific metrics
class_metrics = {}
for class_name in classes:
class_data = category_data[category_data['Class'] == class_name]
if not class_data.empty:
class_metrics[class_name] = class_data[metric].iloc[0]
category_metrics[metric] = pd.Series(class_metrics)
# Add overall metrics as a separate row
if not overall_data.empty:
overall_row = pd.DataFrame({
metric: [overall_data[metric].iloc[0]] for metric in metrics
}, index=['Overall'])
category_metrics = pd.concat([overall_row, category_metrics])
# Display the table
styled_metrics = style_dataframe(category_metrics.round(4))
st.dataframe(styled_metrics, use_container_width=True)
# Add spacing between categories
st.markdown("---")
# Export functionality
st.markdown("### Export Data")
# Prepare export data
export_data = pd.DataFrame()
for category in categories:
category_data = model_data[model_data['Category'] == category].copy()
category_metrics = pd.pivot_table(
category_data,
index='Class',
values=metrics,
aggfunc='first'
).round(4)
export_data = pd.concat([export_data, category_metrics])
# Create download button
csv = export_data.to_csv().encode()
st.download_button(
"Download Detailed Metrics",
csv,
f"detailed_metrics_{selected_model}.csv",
"text/csv",
key='download-csv'
)
# Tab 4: Model Comparison
with tab4:
st.header("Model Comparison Analysis")
st.markdown("""
Compare two models side by side across different categories.
The bar chart visualizes the differences in performance across selected categories,
while the scatter plot provides an overview of Precision vs. Recall per class.
""")
# Create two columns for model selection
col1, col2 = st.columns(2)
# Model selection dropdown menus
with col1:
model1 = st.selectbox(
"Select First Model:",
st.session_state.combined_df['Model'].unique(),
key='model1'
)
with col2:
# Filter out the first selected model from options
available_models = [m for m in st.session_state.combined_df['Model'].unique() if m != model1]
model2 = st.selectbox(
"Select Second Model:",
available_models,
key='model2'
)
# Category selection
selected_category = st.selectbox(
"Select Category for Comparison:",
st.session_state.combined_df['Category'].unique(),
key='compare_category'
)
# Filter data for both models
model1_data = st.session_state.combined_df[
(st.session_state.combined_df['Model'] == model1) &
(st.session_state.combined_df['Category'] == selected_category)
]
model2_data = st.session_state.combined_df[
(st.session_state.combined_df['Model'] == model2) &
(st.session_state.combined_df['Category'] == selected_category)
]
# Define metrics list
metrics = ['F1 Score', 'Precision', 'Recall']
# Create comparison tables section
st.subheader("Detailed Metrics Comparison")
# Create a table for each metric
for metric in metrics:
st.markdown(f"#### {metric} Comparison")
# Prepare data for the metric table
metric_data = []
for class_name in model1_data['Class'].unique():
# Get values for both models
m1_value = model1_data[model1_data['Class'] == class_name][metric].iloc[0]
m2_value = model2_data[model2_data['Class'] == class_name][metric].iloc[0]
diff = m1_value - m2_value
# Add to comparison data
metric_data.append({
'Class': class_name,
model1: m1_value,
model2: m2_value,
'Difference': diff
})
# Create DataFrame for the metric
metric_df = pd.DataFrame(metric_data)
# Style the table
def style_metric_table(df):
return df.style\
.format({
model1: '{:.2f}%',
model2: '{:.2f}%',
'Difference': '{:+.2f}%'
})\
.background_gradient(
cmap='RdYlGn',
subset=['Difference'],
vmin=-10,
vmax=10
)\
.set_properties(**{
'text-align': 'center',
'padding': '10px',
'border': '1px solid #dee2e6'
})\
.set_table_styles([
{'selector': 'th', 'props': [
('background-color', '#4a90e2'),
('color', 'white'),
('font-weight', 'bold'),
('text-align', 'center'),
('padding', '10px')
]}
])
# Display the styled table
def color_negative_positive(val):
try:
color = 'green' if float(val) > 0 else 'red' if float(val) < 0 else 'black'
return f'color: {color}'
except:
return ''
styled_df = metric_df.style\
.applymap(color_negative_positive)\
.format(precision=2)
st.dataframe(styled_df, use_container_width=True)
# Add visual separator
st.markdown("---")
# Visualizations section
st.subheader("Visual Performance Analysis")
# Metric selector for bar chart
selected_metric = st.selectbox(
"Select Metric for Comparison:",
metrics,
key='compare_metric'
)
# Prepare data for bar chart
comparison_data = pd.DataFrame()
# Get data for both models
for idx, (model_name, model_data) in enumerate([(model1, model1_data), (model2, model2_data)]):
# Filter out Overall classes and select relevant columns
model_metrics = model_data[~model_data['Class'].str.contains('Overall', na=False)][['Class', selected_metric]]
model_metrics = model_metrics.rename(columns={selected_metric: model_name})
# Merge with existing data or create new DataFrame
if idx == 0:
comparison_data = model_metrics
else:
comparison_data = comparison_data.merge(model_metrics, on='Class', how='outer')
# Create bar chart
fig_bar = go.Figure()
# Add bars for first model
fig_bar.add_trace(go.Bar(
name=model1,
x=comparison_data['Class'],
y=comparison_data[model1],
marker_color='rgb(55, 83, 109)'
))
# Add bars for second model
fig_bar.add_trace(go.Bar(
name=model2,
x=comparison_data['Class'],
y=comparison_data[model2],
marker_color='rgb(26, 118, 255)'
))
# Update bar chart layout
fig_bar.update_layout(
title=f"{selected_metric} Comparison by Class",
xaxis_title="Class",
yaxis_title=f"{selected_metric} (%)",
barmode='group',
xaxis_tickangle=-45,
height=500,
showlegend=True,
legend=dict(
yanchor="top",
y=0.99,
xanchor="right",
x=0.99
),
yaxis=dict(range=[0, 1])
)
# Display bar chart
st.plotly_chart(fig_bar, use_container_width=True)
# Create Precision-Recall scatter plot
st.markdown("#### Precision-Recall Analysis")
# Filter data for scatter plot
model1_scatter = model1_data[~model1_data['Class'].str.contains('Overall', na=False)]
model2_scatter = model2_data[~model2_data['Class'].str.contains('Overall', na=False)]
# Create scatter plot
fig_scatter = go.Figure()
# Add scatter points for first model
fig_scatter.add_trace(go.Scatter(
x=model1_scatter['Precision']*100,
y=model1_scatter['Recall']*100,
mode='markers+text',
name=model1,
text=model1_scatter['Class'],
textposition="top center",
marker=dict(size=10)
))
# Add scatter points for second model
fig_scatter.add_trace(go.Scatter(
x=model2_scatter['Precision']*100,
y=model2_scatter['Recall']*100,
mode='markers+text',
name=model2,
text=model2_scatter['Class'],
textposition="top center",
marker=dict(size=10)
))
# Add reference line
fig_scatter.add_trace(go.Scatter(
x=[0, 100],
y=[0, 100],
mode='lines',
line=dict(dash='dash', color='gray'),
showlegend=False
))
# Update scatter plot layout
fig_scatter.update_layout(
title="Precision vs Recall Analysis by Class",
xaxis_title="Precision (%)",
yaxis_title="Recall (%)",
xaxis=dict(range=[0, 100]),
yaxis=dict(range=[0, 100]),
height=600,
showlegend=True,
legend=dict(
yanchor="top",
y=0.99,
xanchor="right",
x=0.99
)
)
# Display scatter plot
st.plotly_chart(fig_scatter, use_container_width=True)
# Footer
st.markdown("---")
st.markdown("Dashboard created for model evaluation and comparison")
st.markdown("Β© 2024 Nexar")