Spaces:

nexar-ai
/

nexar-driving-leaderboard

Running

Roni Goldshmidt

fix

e117025 4 months ago

26.1 kB

	import streamlit as st
	import pandas as pd
	import plotly.express as px
	import plotly.graph_objects as go
	from comparison import ModelEvaluator, ModelComparison
	import matplotlib.pyplot as plt
	import seaborn as sns
	import io
	import os
	import base64

	# Page config
	st.set_page_config(
	page_title="Nexar Driving Leaderboard",
	page_icon="nexar_logo.png",
	layout="wide"
	)

	# Custom styling
	st.markdown("""
	<style>
	.main { padding: 2rem; }
	.stTabs [data-baseweb="tab-list"] { gap: 8px; }
	.stTabs [data-baseweb="tab"] {
	padding: 8px 16px;
	border-radius: 4px;
	}
	.metric-card {
	background-color: #f8f9fa;
	padding: 20px;
	border-radius: 10px;
	box-shadow: 0 2px 4px rgba(0,0,0,0.1);
	}
	</style>
	""", unsafe_allow_html=True)

	# Header
	col1, col2 = st.columns([0.15, 0.85])
	with col1:
	st.image("nexar_logo.png", width=600)
	with col2:
	st.title("Driving Leaderboard")

	# Data loading function
	@st.cache_data
	def load_data(directory='results', labels_filename='Labels.csv'):
	labels_path = os.path.join(directory, labels_filename)
	df_labels = pd.read_csv(labels_path)

	evaluators = []
	for filename in os.listdir(directory):
	if filename.endswith('.csv') and filename != labels_filename:
	model_name = os.path.splitext(filename)[0]
	df_model = pd.read_csv(os.path.join(directory, filename))
	evaluator = ModelEvaluator(df_labels, df_model, model_name)
	evaluators.append(evaluator)

	model_comparison = ModelComparison(evaluators)
	return model_comparison

	# Initialize session state
	if 'model_comparison' not in st.session_state:
	st.session_state.model_comparison = load_data()
	st.session_state.leaderboard_df = st.session_state.model_comparison.transform_to_leaderboard()
	st.session_state.combined_df = st.session_state.model_comparison.combined_df

	# Create tabs
	tab1, tab2, tab3, tab4 = st.tabs([
	"📈 Leaderboard",
	"📊 Class Performance",
	"🔍 Detailed Metrics",
	"⚖️ Model Comparison"
	])

	def style_dataframe(df, highlight_first_column=True, show_progress_bars=True,
	fixed_scale=False, clip_out_of_bounds=True,
	min_value=0, max_value=1):
	"""
	Style a DataFrame with color gradients and formatting.

	Parameters:
	-----------
	df : pandas.DataFrame
	The DataFrame to style.
	highlight_first_column : bool, default=True
	Whether to highlight the first numeric column.
	show_progress_bars : bool, default=True
	Whether to show color gradients based on values.
	fixed_scale : bool, default=False
	If True, uses an absolute scale between min_value and max_value instead of relative scaling.
	clip_out_of_bounds : bool, default=True
	If True, clips values outside of min_value and max_value to those boundaries.
	min_value : float, default=0
	The minimum value for the fixed scale. Only used when fixed_scale=True.
	max_value : float, default=1
	The maximum value for the fixed scale. Only used when fixed_scale=True.

	Returns:
	--------
	styled : pandas.io.formats.style.Styler
	The styled DataFrame.
	"""
	numeric_cols = df.select_dtypes(include=['float64']).columns

	def color_background(val):
	"""Return background color style based on value"""
	return f'background-color: rgba({int(255 * (1 - val))}, {int(255 * val)}, 0, 0.2)'

	def apply_colors_fixed_scale(s):
	"""Apply fixed color scale using specified min_value and max_value"""
	if len(s) == 0:
	return []

	# Normalize to [0,1] range based on min_value and max_value
	if max_value == min_value:
	normalized = pd.Series([0.5] * len(s), index=s.index)
	else:
	normalized = (s - min_value) / (max_value - min_value)

	if clip_out_of_bounds:
	normalized = normalized.clip(0, 1)

	return [color_background(val) for val in normalized]

	def apply_colors_to_series(s):
	"""Apply color gradient relative to min-max of the series"""
	if len(s) == 0:
	return []
	normalized = (s - s.min()) / (s.max() - s.min()) if s.max() != s.min() else pd.Series([0.5] * len(s), index=s.index)
	return [color_background(val) for val in normalized]

	styled = df.style.format({col: '{:.2f}%' for col in numeric_cols})

	# Highlight first column
	if highlight_first_column and len(numeric_cols) > 0:
	first_numeric_col = numeric_cols[0]
	styled = styled.apply(lambda x: [
	'background-color: rgba(74, 144, 226, 0.2)' if col == first_numeric_col else ''
	for col in df.columns
	], axis=1)

	# Apply coloring
	if show_progress_bars:
	for col in numeric_cols:
	if fixed_scale:
	styled = styled.apply(lambda s: apply_colors_fixed_scale(s), subset=[col])
	else:
	styled = styled.apply(lambda s: apply_colors_to_series(s), subset=[col])

	styled = styled.set_properties(**{
	'padding': '10px',
	'border': '1px solid #dee2e6',
	'text-align': 'center'
	})

	styled = styled.set_table_styles([
	{'selector': 'th', 'props': [
	('background-color', '#4a90e2'),
	('color', 'white'),
	('font-weight', 'bold'),
	('padding', '10px'),
	('text-align', 'center')
	]},
	{'selector': 'tr:hover', 'props': [
	('background-color', '#edf2f7')
	]}
	])

	return styled

	def style_comparison_dataframe(df):
	"""Style dataframe specifically for model comparison tables"""
	# Format all numeric columns as percentages
	numeric_cols = df.select_dtypes(include=['float64']).columns

	styled = df.style.format({col: '{:.2f}%' for col in numeric_cols})

	def color_difference(x):
	"""Color the difference column from red to green"""
	if pd.isna(x):
	return ''
	# Normalize the value to a -1 to 1 scale for coloring
	normalized = max(min(x / 10, 1), -1) # Scale of ±10%
	if normalized > 0:
	return f'background-color: rgba(0, 128, 0, {abs(normalized) * 0.3})'
	else:
	return f'background-color: rgba(255, 0, 0, {abs(normalized) * 0.3})'

	# Apply color gradient only to the 'Difference' column
	if 'Difference' in df.columns:
	styled = styled.applymap(color_difference, subset=['Difference'])

	styled = styled.set_properties(**{
	'padding': '10px',
	'border': '1px solid #dee2e6',
	'text-align': 'center'
	})

	styled = styled.set_table_styles([
	{'selector': 'th', 'props': [
	('background-color', '#4a90e2'),
	('color', 'white'),
	('font-weight', 'bold'),
	('padding', '10px'),
	('text-align', 'center')
	]},
	{'selector': 'tr:hover', 'props': [
	('background-color', '#edf2f7')
	]}
	])

	return styled

	# Tab 1: Leaderboard
	with tab1:

	st.subheader("Model Performance Leaderboard")

	st.markdown("""
	Welcome to the Nexar Driving Leaderboard!

	This dashboard compares the performance of various AI models in detecting driving incidents.
	The models are evaluated based on key metrics such as F1 Score, Precision, and Recall.
	You can sort the table by different metrics using the dropdown menu.
	""")

	st.markdown("""
	The table below ranks models based on their ability to detect driving events.
	Use the dropdown below to sort by a specific metric.
	""")

	sort_col = st.selectbox(
	"Sort by metric:",
	options=[col for col in st.session_state.leaderboard_df.columns if col not in ['Rank', 'Model']],
	key='leaderboard_sort'
	)

	sorted_df = st.session_state.leaderboard_df.sort_values(by=sort_col, ascending=False)

	st.dataframe(
	style_dataframe(sorted_df),
	use_container_width=True,
	)

	metrics = ['F1 Score', 'Precision', 'Recall']
	selected_metric = st.selectbox("Select Metric for Category Analysis:", metrics)

	category_data = st.session_state.combined_df[
	st.session_state.combined_df['Class'].str.contains('Overall')
	]

	fig = px.bar(
	category_data,
	x='Category',
	y=selected_metric,
	color='Model',
	barmode='group',
	title=f'Category-level {selected_metric} by Model',
	)

	fig.update_layout(
	xaxis_title="Category",
	yaxis_title=selected_metric,
	legend_title="Model"
	)

	st.plotly_chart(fig, use_container_width=True)

	# Tab 2: Class Performance
	with tab2:
	st.subheader("Class-Level Performance Analysis")

	st.markdown("""
	This section provides a detailed breakdown of model performance across specific event classes.
	You can select a category, metric, and models to compare their effectiveness in recognizing
	different types of driving incidents.
	""")

	categories = st.session_state.combined_df['Category'].unique()

	col1, col2, col3 = st.columns(3)
	with col1:
	selected_category = st.selectbox(
	"Select Category:",
	categories,
	key='class_category'
	)
	with col2:
	selected_metric = st.selectbox(
	"Select Metric:",
	metrics,
	key='class_metric'
	)
	with col3:
	selected_models = st.multiselect(
	"Select Models:",
	st.session_state.combined_df['Model'].unique(),
	default=st.session_state.combined_df['Model'].unique()
	)

	# Create a consistent color mapping for all models
	plotly_colors = ['#636EFA', '#EF553B', '#00CC96', '#AB63FA', '#FFA15A', '#19D3F3', '#FF6692', '#B6E880', '#FF97FF', '#FECB52']
	model_colors = {model: plotly_colors[i % len(plotly_colors)] for i, model in enumerate(sorted(st.session_state.combined_df['Model'].unique()))}

	class_data = st.session_state.combined_df[
	(st.session_state.combined_df['Category'] == selected_category) &
	(~st.session_state.combined_df['Class'].str.contains('Overall')) &
	(st.session_state.combined_df['Model'].isin(selected_models))
	]

	# Bar chart with consistent colors
	fig = px.bar(
	class_data,
	x='Class',
	y=selected_metric,
	color='Model',
	barmode='group',
	title=f'{selected_metric} by Class for {selected_category}',
	color_discrete_map=model_colors,
	range_y=[0, 1] if selected_metric in ['F1 Score', 'Precision', 'Recall'] else None
	)
	st.plotly_chart(fig, use_container_width=True)

	# Calculate how many columns we need (aim for about 4-5 models per row)
	models_per_row = 4
	num_rows = (len(selected_models) + models_per_row - 1) // models_per_row

	st.markdown("### Select Models to Display:")

	# Create toggles for models using st.columns
	for row in range(num_rows):
	cols = st.columns(models_per_row)
	for col_idx in range(models_per_row):
	model_idx = row * models_per_row + col_idx
	if model_idx < len(selected_models):
	model = selected_models[model_idx]
	container = cols[col_idx].container()

	# Get the consistent color for this model
	color = model_colors[model]

	# Initialize toggle state if needed
	toggle_key = f"toggle_{model}"
	if toggle_key not in st.session_state:
	st.session_state[toggle_key] = True

	# Create colored legend item with HTML
	container.markdown(
	f"""
	<div style='display: flex; align-items: center; margin-bottom: -40px; pointer-events: none;'>
	<span style='display: inline-block; width: 12px; height: 12px; background-color: {color}; border-radius: 50%; margin-right: 8px;'></span>
	</div>
	""",
	unsafe_allow_html=True
	)

	# Create the checkbox without reassigning to session state
	container.checkbox(
	f" {model}", # Add some spacing to account for the circle
	value=st.session_state[toggle_key],
	key=toggle_key # Use toggle_key directly as the key
	)

	# Individual Precision-Recall plots for each class
	unique_classes = class_data['Class'].unique()
	num_classes = len(unique_classes)

	# Calculate number of rows needed (3 plots per row)
	num_rows = (num_classes + 2) // 3 # Using ceiling division

	# Create plots row by row
	for row in range(num_rows):
	cols = st.columns(3)
	for col_idx in range(3):
	class_idx = row * 3 + col_idx
	if class_idx < num_classes:
	current_class = unique_classes[class_idx]

	# Filter data based on visible models
	visible_models = [model for model in selected_models
	if st.session_state[f"toggle_{model}"]]

	class_specific_data = class_data[
	(class_data['Class'] == current_class) &
	(class_data['Model'].isin(visible_models))
	]

	fig = px.scatter(
	class_specific_data,
	x='Precision',
	y='Recall',
	color='Model',
	title=f'Precision vs Recall: {current_class}',
	height=300,
	color_discrete_map=model_colors # Use consistent colors
	)

	# Update layout for better visibility
	fig.update_layout(
	xaxis_range=[0, 1],
	yaxis_range=[0, 1],
	margin=dict(l=40, r=40, t=40, b=40),
	showlegend=False # Hide individual legends
	)

	# Add diagonal reference line
	fig.add_trace(
	go.Scatter(
	x=[0, 1],
	y=[0, 1],
	mode='lines',
	line=dict(dash='dash', color='gray'),
	showlegend=False
	)
	)

	cols[col_idx].plotly_chart(fig, use_container_width=True)

	# Tab 3: Detailed Metrics
	with tab3:
	st.subheader("Detailed Metrics Analysis")

	selected_model = st.selectbox(
	"Select Model for Detailed Analysis:",
	st.session_state.combined_df['Model'].unique()
	)

	model_data = st.session_state.combined_df[
	st.session_state.combined_df['Model'] == selected_model
	]

	# Create metrics tables
	st.markdown("### Performance Metrics by Category")

	# Get unique categories and relevant classes for each category
	categories = model_data['Category'].unique()
	metrics = ['F1 Score', 'Precision', 'Recall']

	# Process data for each category
	for category in categories:
	st.markdown(f"#### {category}")

	# Filter data for this category
	category_data = model_data[model_data['Category'] == category].copy()

	# Create a clean table for this category
	category_metrics = pd.DataFrame()

	# Get classes for this category (excluding 'Overall' prefix)
	classes = category_data[~category_data['Class'].str.contains('Overall')]['Class'].unique()

	# Add the overall metric for this category
	overall_data = category_data[category_data['Class'].str.contains('Overall')]

	# Initialize the DataFrame with classes as index
	category_metrics = pd.DataFrame(index=classes)

	# Add metrics columns
	for metric in metrics:
	# Add class-specific metrics
	class_metrics = {}
	for class_name in classes:
	class_data = category_data[category_data['Class'] == class_name]
	if not class_data.empty:
	class_metrics[class_name] = class_data[metric].iloc[0]*100

	category_metrics[metric] = pd.Series(class_metrics)

	# Add overall metrics as a separate row
	if not overall_data.empty:
	overall_row = pd.DataFrame({
	metric: [overall_data[metric].iloc[0]*100] for metric in metrics
	}, index=['Overall'])
	category_metrics = pd.concat([overall_row, category_metrics])

	# Display the table
	styled_metrics = style_dataframe(category_metrics.round(4), fixed_scale=True, min_value=0, max_value=100)
	st.dataframe(styled_metrics, use_container_width=True)

	# Add spacing between categories
	st.markdown("---")

	# Export functionality
	st.markdown("### Export Data")

	# Prepare export data
	export_data = pd.DataFrame()
	for category in categories:
	category_data = model_data[model_data['Category'] == category].copy()
	category_metrics = pd.pivot_table(
	category_data,
	index='Class',
	values=metrics,
	aggfunc='first'
	).round(4)
	export_data = pd.concat([export_data, category_metrics])

	# Create download button
	csv = export_data.to_csv().encode()
	st.download_button(
	"Download Detailed Metrics",
	csv,
	f"detailed_metrics_{selected_model}.csv",
	"text/csv",
	key='download-csv'
	)

	# Tab 4: Model Comparison
	with tab4:
	st.header("Model Comparison Analysis")

	st.markdown("""
	Compare two models side by side across different categories.
	The bar chart visualizes the differences in performance across selected categories,
	while the scatter plot provides an overview of Precision vs. Recall per class.
	""")

	# Create two columns for model selection
	col1, col2 = st.columns(2)

	# Model selection dropdown menus
	with col1:
	model1 = st.selectbox(
	"Select First Model:",
	st.session_state.combined_df['Model'].unique(),
	key='model1'
	)

	with col2:
	# Filter out the first selected model from options
	available_models = [m for m in st.session_state.combined_df['Model'].unique() if m != model1]
	model2 = st.selectbox(
	"Select Second Model:",
	available_models,
	key='model2'
	)

	# Category selection
	selected_category = st.selectbox(
	"Select Category for Comparison:",
	st.session_state.combined_df['Category'].unique(),
	key='compare_category'
	)

	# Filter data for both models
	model1_data = st.session_state.combined_df[
	(st.session_state.combined_df['Model'] == model1) &
	(st.session_state.combined_df['Category'] == selected_category)
	]

	model2_data = st.session_state.combined_df[
	(st.session_state.combined_df['Model'] == model2) &
	(st.session_state.combined_df['Category'] == selected_category)
	]

	# Define metrics list
	metrics = ['F1 Score', 'Precision', 'Recall']

	# Create comparison tables section
	st.subheader("Detailed Metrics Comparison")

	# Create a table for each metric
	for metric in metrics:
	st.markdown(f"#### {metric} Comparison")

	# Prepare data for the metric table
	metric_data = []
	for class_name in model1_data['Class'].unique():
	# Get values for both models
	m1_value = model1_data[model1_data['Class'] == class_name][metric].iloc[0]
	m2_value = model2_data[model2_data['Class'] == class_name][metric].iloc[0]
	diff = m1_value - m2_value

	# Add to comparison data
	metric_data.append({
	'Class': class_name,
	model1: m1_value,
	model2: m2_value,
	'Difference': diff
	})

	# Create DataFrame for the metric
	metric_df = pd.DataFrame(metric_data)

	# Style the table
	def style_metric_table(df):
	return df.style\
	.format({
	model1: '{:.2f}%',
	model2: '{:.2f}%',
	'Difference': '{:+.2f}%'
	})\
	.background_gradient(
	cmap='RdYlGn',
	subset=['Difference'],
	vmin=-10,
	vmax=10
	)\
	.set_properties(**{
	'text-align': 'center',
	'padding': '10px',
	'border': '1px solid #dee2e6'
	})\
	.set_table_styles([
	{'selector': 'th', 'props': [
	('background-color', '#4a90e2'),
	('color', 'white'),
	('font-weight', 'bold'),
	('text-align', 'center'),
	('padding', '10px')
	]}
	])

	# Display the styled table
	def color_negative_positive(val):
	try:
	color = 'green' if float(val) > 0 else 'red' if float(val) < 0 else 'black'
	return f'color: {color}'
	except:
	return ''

	styled_df = metric_df.style\
	.applymap(color_negative_positive)\
	.format(precision=2)

	st.dataframe(styled_df, use_container_width=True)

	# Add visual separator
	st.markdown("---")

	# Visualizations section
	st.subheader("Visual Performance Analysis")

	# Metric selector for bar chart
	selected_metric = st.selectbox(
	"Select Metric for Comparison:",
	metrics,
	key='compare_metric'
	)

	# Prepare data for bar chart
	comparison_data = pd.DataFrame()

	# Get data for both models
	for idx, (model_name, model_data) in enumerate([(model1, model1_data), (model2, model2_data)]):
	# Filter out Overall classes and select relevant columns
	model_metrics = model_data[~model_data['Class'].str.contains('Overall', na=False)][['Class', selected_metric]]
	model_metrics = model_metrics.rename(columns={selected_metric: model_name})

	# Merge with existing data or create new DataFrame
	if idx == 0:
	comparison_data = model_metrics
	else:
	comparison_data = comparison_data.merge(model_metrics, on='Class', how='outer')

	# Create bar chart
	fig_bar = go.Figure()

	# Add bars for first model
	fig_bar.add_trace(go.Bar(
	name=model1,
	x=comparison_data['Class'],
	y=comparison_data[model1],
	marker_color='rgb(55, 83, 109)'
	))

	# Add bars for second model
	fig_bar.add_trace(go.Bar(
	name=model2,
	x=comparison_data['Class'],
	y=comparison_data[model2],
	marker_color='rgb(26, 118, 255)'
	))

	# Update bar chart layout
	fig_bar.update_layout(
	title=f"{selected_metric} Comparison by Class",
	xaxis_title="Class",
	yaxis_title=f"{selected_metric} (%)",
	barmode='group',
	xaxis_tickangle=-45,
	height=500,
	showlegend=True,
	legend=dict(
	yanchor="top",
	y=0.99,
	xanchor="right",
	x=0.99
	),
	yaxis=dict(range=[0, 1])
	)

	# Display bar chart
	st.plotly_chart(fig_bar, use_container_width=True)

	# Create Precision-Recall scatter plot
	st.markdown("#### Precision-Recall Analysis")

	# Filter data for scatter plot
	model1_scatter = model1_data[~model1_data['Class'].str.contains('Overall', na=False)]
	model2_scatter = model2_data[~model2_data['Class'].str.contains('Overall', na=False)]

	# Create scatter plot
	fig_scatter = go.Figure()

	# Add scatter points for first model
	fig_scatter.add_trace(go.Scatter(
	x=model1_scatter['Precision']*100,
	y=model1_scatter['Recall']*100,
	mode='markers+text',
	name=model1,
	text=model1_scatter['Class'],
	textposition="top center",
	marker=dict(size=10)
	))

	# Add scatter points for second model
	fig_scatter.add_trace(go.Scatter(
	x=model2_scatter['Precision']*100,
	y=model2_scatter['Recall']*100,
	mode='markers+text',
	name=model2,
	text=model2_scatter['Class'],
	textposition="top center",
	marker=dict(size=10)
	))

	# Add reference line
	fig_scatter.add_trace(go.Scatter(
	x=[0, 100],
	y=[0, 100],
	mode='lines',
	line=dict(dash='dash', color='gray'),
	showlegend=False
	))

	# Update scatter plot layout
	fig_scatter.update_layout(
	title="Precision vs Recall Analysis by Class",
	xaxis_title="Precision (%)",
	yaxis_title="Recall (%)",
	xaxis=dict(range=[0, 100]),
	yaxis=dict(range=[0, 100]),
	height=600,
	showlegend=True,
	legend=dict(
	yanchor="top",
	y=0.99,
	xanchor="right",
	x=0.99
	)
	)

	# Display scatter plot
	st.plotly_chart(fig_scatter, use_container_width=True)


	# Footer
	st.markdown("---")
	st.markdown("Dashboard created for model evaluation and comparison")
	st.markdown("© 2024 Nexar")