import streamlit as st import pandas as pd import plotly.express as px import plotly.graph_objects as go from comparison import ModelEvaluator, ModelComparison import matplotlib.pyplot as plt import seaborn as sns import io import os import base64 # Page config st.set_page_config( page_title="Nexar Driving Leaderboard", page_icon="nexar_logo.png", layout="wide" ) # Custom styling st.markdown(""" """, unsafe_allow_html=True) # Header col1, col2 = st.columns([0.15, 0.85]) with col1: st.image("nexar_logo.png", width=600) with col2: st.title("Driving Leaderboard") # Data loading function @st.cache_data def load_data(directory='results', labels_filename='Labels.csv'): labels_path = os.path.join(directory, labels_filename) df_labels = pd.read_csv(labels_path) evaluators = [] for filename in os.listdir(directory): if filename.endswith('.csv') and filename != labels_filename: model_name = os.path.splitext(filename)[0] df_model = pd.read_csv(os.path.join(directory, filename)) evaluator = ModelEvaluator(df_labels, df_model, model_name) evaluators.append(evaluator) model_comparison = ModelComparison(evaluators) return model_comparison # Initialize session state if 'model_comparison' not in st.session_state: st.session_state.model_comparison = load_data() st.session_state.leaderboard_df = st.session_state.model_comparison.transform_to_leaderboard() st.session_state.combined_df = st.session_state.model_comparison.combined_df # Create tabs tab1, tab2, tab3, tab4 = st.tabs([ "📈 Leaderboard", "📊 Class Performance", "🔍 Detailed Metrics", "⚖️ Model Comparison" ]) def style_dataframe(df, highlight_first_column=True, show_progress_bars=True): numeric_cols = df.select_dtypes(include=['float64']).columns def color_background(val): """Return background color style based on value""" return f'background-color: rgba({int(255 * (1 - val))}, {int(255 * val)}, 0, 0.2)' def apply_colors_to_series(s): """Apply color gradient to a series of values""" if len(s) == 0: return [] normalized = (s - s.min()) / (s.max() - s.min()) if s.max() != s.min() else [0.5] * len(s) return [color_background(val) for val in normalized] styled = df.style.format({col: '{:.2f}%' for col in numeric_cols}) # First apply highlighting to first column if needed if highlight_first_column and len(numeric_cols) > 0: first_numeric_col = numeric_cols[0] styled = styled.apply(lambda x: [ 'background-color: rgba(74, 144, 226, 0.2)' if col == first_numeric_col else '' for col in df.columns ], axis=1) # Then apply color gradients if needed if show_progress_bars: for col in numeric_cols: styled = styled.apply(lambda s: apply_colors_to_series(s), subset=[col]) styled = styled.set_properties(**{ 'padding': '10px', 'border': '1px solid #dee2e6', 'text-align': 'center' }) styled = styled.set_table_styles([ {'selector': 'th', 'props': [ ('background-color', '#4a90e2'), ('color', 'white'), ('font-weight', 'bold'), ('padding', '10px'), ('text-align', 'center') ]}, {'selector': 'tr:hover', 'props': [ ('background-color', '#edf2f7') ]} ]) return styled def style_comparison_dataframe(df): """Style dataframe specifically for model comparison tables""" # Format all numeric columns as percentages numeric_cols = df.select_dtypes(include=['float64']).columns styled = df.style.format({col: '{:.2f}%' for col in numeric_cols}) def color_difference(x): """Color the difference column from red to green""" if pd.isna(x): return '' # Normalize the value to a -1 to 1 scale for coloring normalized = max(min(x / 10, 1), -1) # Scale of ±10% if normalized > 0: return f'background-color: rgba(0, 128, 0, {abs(normalized) * 0.3})' else: return f'background-color: rgba(255, 0, 0, {abs(normalized) * 0.3})' # Apply color gradient only to the 'Difference' column if 'Difference' in df.columns: styled = styled.applymap(color_difference, subset=['Difference']) styled = styled.set_properties(**{ 'padding': '10px', 'border': '1px solid #dee2e6', 'text-align': 'center' }) styled = styled.set_table_styles([ {'selector': 'th', 'props': [ ('background-color', '#4a90e2'), ('color', 'white'), ('font-weight', 'bold'), ('padding', '10px'), ('text-align', 'center') ]}, {'selector': 'tr:hover', 'props': [ ('background-color', '#edf2f7') ]} ]) return styled # Tab 1: Leaderboard with tab1: st.subheader("Model Performance Leaderboard") st.markdown(""" **Welcome to the Nexar Driving Leaderboard!** This dashboard compares the performance of various AI models in detecting driving incidents. The models are evaluated based on key metrics such as F1 Score, Precision, and Recall. You can sort the table by different metrics using the dropdown menu. """) st.markdown(""" The table below ranks models based on their ability to detect driving events. Use the dropdown below to sort by a specific metric. """) sort_col = st.selectbox( "Sort by metric:", options=[col for col in st.session_state.leaderboard_df.columns if col not in ['Rank', 'Model']], key='leaderboard_sort' ) sorted_df = st.session_state.leaderboard_df.sort_values(by=sort_col, ascending=False) st.dataframe( style_dataframe(sorted_df), use_container_width=True, ) metrics = ['F1 Score', 'Precision', 'Recall'] selected_metric = st.selectbox("Select Metric for Category Analysis:", metrics) category_data = st.session_state.combined_df[ st.session_state.combined_df['Class'].str.contains('Overall') ] fig = px.bar( category_data, x='Category', y=selected_metric, color='Model', barmode='group', title=f'Category-level {selected_metric} by Model', ) fig.update_layout( xaxis_title="Category", yaxis_title=selected_metric, legend_title="Model" ) st.plotly_chart(fig, use_container_width=True) # Tab 2: Class Performance with tab2: st.subheader("Class-Level Performance Analysis") st.markdown(""" This section provides a detailed breakdown of model performance across specific event classes. You can select a category, metric, and models to compare their effectiveness in recognizing different types of driving incidents. """) categories = st.session_state.combined_df['Category'].unique() col1, col2, col3 = st.columns(3) with col1: selected_category = st.selectbox( "Select Category:", categories, key='class_category' ) with col2: selected_metric = st.selectbox( "Select Metric:", metrics, key='class_metric' ) with col3: selected_models = st.multiselect( "Select Models:", st.session_state.combined_df['Model'].unique(), default=st.session_state.combined_df['Model'].unique() ) # Create a consistent color mapping for all models plotly_colors = ['#636EFA', '#EF553B', '#00CC96', '#AB63FA', '#FFA15A', '#19D3F3', '#FF6692', '#B6E880', '#FF97FF', '#FECB52'] model_colors = {model: plotly_colors[i % len(plotly_colors)] for i, model in enumerate(sorted(st.session_state.combined_df['Model'].unique()))} class_data = st.session_state.combined_df[ (st.session_state.combined_df['Category'] == selected_category) & (~st.session_state.combined_df['Class'].str.contains('Overall')) & (st.session_state.combined_df['Model'].isin(selected_models)) ] # Bar chart with consistent colors fig = px.bar( class_data, x='Class', y=selected_metric, color='Model', barmode='group', title=f'{selected_metric} by Class for {selected_category}', color_discrete_map=model_colors, range_y=[0, 1] if selected_metric in ['F1 Score', 'Precision', 'Recall'] else None ) st.plotly_chart(fig, use_container_width=True) # Calculate how many columns we need (aim for about 4-5 models per row) models_per_row = 4 num_rows = (len(selected_models) + models_per_row - 1) // models_per_row st.markdown("### Select Models to Display:") # Create toggles for models using st.columns for row in range(num_rows): cols = st.columns(models_per_row) for col_idx in range(models_per_row): model_idx = row * models_per_row + col_idx if model_idx < len(selected_models): model = selected_models[model_idx] container = cols[col_idx].container() # Get the consistent color for this model color = model_colors[model] # Initialize toggle state if needed toggle_key = f"toggle_{model}" if toggle_key not in st.session_state: st.session_state[toggle_key] = True # Create colored legend item with HTML container.markdown( f"""
""", unsafe_allow_html=True ) # Create the checkbox without reassigning to session state container.checkbox( f" {model}", # Add some spacing to account for the circle value=st.session_state[toggle_key], key=toggle_key # Use toggle_key directly as the key ) # Individual Precision-Recall plots for each class unique_classes = class_data['Class'].unique() num_classes = len(unique_classes) # Calculate number of rows needed (3 plots per row) num_rows = (num_classes + 2) // 3 # Using ceiling division # Create plots row by row for row in range(num_rows): cols = st.columns(3) for col_idx in range(3): class_idx = row * 3 + col_idx if class_idx < num_classes: current_class = unique_classes[class_idx] # Filter data based on visible models visible_models = [model for model in selected_models if st.session_state[f"toggle_{model}"]] class_specific_data = class_data[ (class_data['Class'] == current_class) & (class_data['Model'].isin(visible_models)) ] fig = px.scatter( class_specific_data, x='Precision', y='Recall', color='Model', title=f'Precision vs Recall: {current_class}', height=300, color_discrete_map=model_colors # Use consistent colors ) # Update layout for better visibility fig.update_layout( xaxis_range=[0, 1], yaxis_range=[0, 1], margin=dict(l=40, r=40, t=40, b=40), showlegend=False # Hide individual legends ) # Add diagonal reference line fig.add_trace( go.Scatter( x=[0, 1], y=[0, 1], mode='lines', line=dict(dash='dash', color='gray'), showlegend=False ) ) cols[col_idx].plotly_chart(fig, use_container_width=True) # Tab 3: Detailed Metrics with tab3: st.subheader("Detailed Metrics Analysis") selected_model = st.selectbox( "Select Model for Detailed Analysis:", st.session_state.combined_df['Model'].unique() ) model_data = st.session_state.combined_df[ st.session_state.combined_df['Model'] == selected_model ] # Create metrics tables st.markdown("### Performance Metrics by Category") # Get unique categories and relevant classes for each category categories = model_data['Category'].unique() metrics = ['F1 Score', 'Precision', 'Recall'] # Process data for each category for category in categories: st.markdown(f"#### {category}") # Filter data for this category category_data = model_data[model_data['Category'] == category].copy() # Create a clean table for this category category_metrics = pd.DataFrame() # Get classes for this category (excluding 'Overall' prefix) classes = category_data[~category_data['Class'].str.contains('Overall')]['Class'].unique() # Add the overall metric for this category overall_data = category_data[category_data['Class'].str.contains('Overall')] # Initialize the DataFrame with classes as index category_metrics = pd.DataFrame(index=classes) # Add metrics columns for metric in metrics: # Add class-specific metrics class_metrics = {} for class_name in classes: class_data = category_data[category_data['Class'] == class_name] if not class_data.empty: class_metrics[class_name] = class_data[metric].iloc[0] category_metrics[metric] = pd.Series(class_metrics) # Add overall metrics as a separate row if not overall_data.empty: overall_row = pd.DataFrame({ metric: [overall_data[metric].iloc[0]] for metric in metrics }, index=['Overall']) category_metrics = pd.concat([overall_row, category_metrics]) # Display the table styled_metrics = style_dataframe(category_metrics.round(4)) st.dataframe(styled_metrics, use_container_width=True) # Add spacing between categories st.markdown("---") # Export functionality st.markdown("### Export Data") # Prepare export data export_data = pd.DataFrame() for category in categories: category_data = model_data[model_data['Category'] == category].copy() category_metrics = pd.pivot_table( category_data, index='Class', values=metrics, aggfunc='first' ).round(4) export_data = pd.concat([export_data, category_metrics]) # Create download button csv = export_data.to_csv().encode() st.download_button( "Download Detailed Metrics", csv, f"detailed_metrics_{selected_model}.csv", "text/csv", key='download-csv' ) # Tab 4: Model Comparison with tab4: st.header("Model Comparison Analysis") st.markdown(""" Compare two models side by side across different categories. The bar chart visualizes the differences in performance across selected categories, while the scatter plot provides an overview of Precision vs. Recall per class. """) # Create two columns for model selection col1, col2 = st.columns(2) # Model selection dropdown menus with col1: model1 = st.selectbox( "Select First Model:", st.session_state.combined_df['Model'].unique(), key='model1' ) with col2: # Filter out the first selected model from options available_models = [m for m in st.session_state.combined_df['Model'].unique() if m != model1] model2 = st.selectbox( "Select Second Model:", available_models, key='model2' ) # Category selection selected_category = st.selectbox( "Select Category for Comparison:", st.session_state.combined_df['Category'].unique(), key='compare_category' ) # Filter data for both models model1_data = st.session_state.combined_df[ (st.session_state.combined_df['Model'] == model1) & (st.session_state.combined_df['Category'] == selected_category) ] model2_data = st.session_state.combined_df[ (st.session_state.combined_df['Model'] == model2) & (st.session_state.combined_df['Category'] == selected_category) ] # Define metrics list metrics = ['F1 Score', 'Precision', 'Recall'] # Create comparison tables section st.subheader("Detailed Metrics Comparison") # Create a table for each metric for metric in metrics: st.markdown(f"#### {metric} Comparison") # Prepare data for the metric table metric_data = [] for class_name in model1_data['Class'].unique(): # Get values for both models m1_value = model1_data[model1_data['Class'] == class_name][metric].iloc[0] m2_value = model2_data[model2_data['Class'] == class_name][metric].iloc[0] diff = m1_value - m2_value # Add to comparison data metric_data.append({ 'Class': class_name, model1: m1_value, model2: m2_value, 'Difference': diff }) # Create DataFrame for the metric metric_df = pd.DataFrame(metric_data) # Style the table def style_metric_table(df): return df.style\ .format({ model1: '{:.2f}%', model2: '{:.2f}%', 'Difference': '{:+.2f}%' })\ .background_gradient( cmap='RdYlGn', subset=['Difference'], vmin=-10, vmax=10 )\ .set_properties(**{ 'text-align': 'center', 'padding': '10px', 'border': '1px solid #dee2e6' })\ .set_table_styles([ {'selector': 'th', 'props': [ ('background-color', '#4a90e2'), ('color', 'white'), ('font-weight', 'bold'), ('text-align', 'center'), ('padding', '10px') ]} ]) # Display the styled table def color_negative_positive(val): try: color = 'green' if float(val) > 0 else 'red' if float(val) < 0 else 'black' return f'color: {color}' except: return '' styled_df = metric_df.style\ .applymap(color_negative_positive)\ .format(precision=2) st.dataframe(styled_df, use_container_width=True) # Add visual separator st.markdown("---") # Visualizations section st.subheader("Visual Performance Analysis") # Metric selector for bar chart selected_metric = st.selectbox( "Select Metric for Comparison:", metrics, key='compare_metric' ) # Prepare data for bar chart comparison_data = pd.DataFrame() # Get data for both models for idx, (model_name, model_data) in enumerate([(model1, model1_data), (model2, model2_data)]): # Filter out Overall classes and select relevant columns model_metrics = model_data[~model_data['Class'].str.contains('Overall', na=False)][['Class', selected_metric]] model_metrics = model_metrics.rename(columns={selected_metric: model_name}) # Merge with existing data or create new DataFrame if idx == 0: comparison_data = model_metrics else: comparison_data = comparison_data.merge(model_metrics, on='Class', how='outer') # Create bar chart fig_bar = go.Figure() # Add bars for first model fig_bar.add_trace(go.Bar( name=model1, x=comparison_data['Class'], y=comparison_data[model1], marker_color='rgb(55, 83, 109)' )) # Add bars for second model fig_bar.add_trace(go.Bar( name=model2, x=comparison_data['Class'], y=comparison_data[model2], marker_color='rgb(26, 118, 255)' )) # Update bar chart layout fig_bar.update_layout( title=f"{selected_metric} Comparison by Class", xaxis_title="Class", yaxis_title=f"{selected_metric} (%)", barmode='group', xaxis_tickangle=-45, height=500, showlegend=True, legend=dict( yanchor="top", y=0.99, xanchor="right", x=0.99 ), yaxis=dict(range=[0, 1]) ) # Display bar chart st.plotly_chart(fig_bar, use_container_width=True) # Create Precision-Recall scatter plot st.markdown("#### Precision-Recall Analysis") # Filter data for scatter plot model1_scatter = model1_data[~model1_data['Class'].str.contains('Overall', na=False)] model2_scatter = model2_data[~model2_data['Class'].str.contains('Overall', na=False)] # Create scatter plot fig_scatter = go.Figure() # Add scatter points for first model fig_scatter.add_trace(go.Scatter( x=model1_scatter['Precision']*100, y=model1_scatter['Recall']*100, mode='markers+text', name=model1, text=model1_scatter['Class'], textposition="top center", marker=dict(size=10) )) # Add scatter points for second model fig_scatter.add_trace(go.Scatter( x=model2_scatter['Precision']*100, y=model2_scatter['Recall']*100, mode='markers+text', name=model2, text=model2_scatter['Class'], textposition="top center", marker=dict(size=10) )) # Add reference line fig_scatter.add_trace(go.Scatter( x=[0, 100], y=[0, 100], mode='lines', line=dict(dash='dash', color='gray'), showlegend=False )) # Update scatter plot layout fig_scatter.update_layout( title="Precision vs Recall Analysis by Class", xaxis_title="Precision (%)", yaxis_title="Recall (%)", xaxis=dict(range=[0, 100]), yaxis=dict(range=[0, 100]), height=600, showlegend=True, legend=dict( yanchor="top", y=0.99, xanchor="right", x=0.99 ) ) # Display scatter plot st.plotly_chart(fig_scatter, use_container_width=True) # Footer st.markdown("---") st.markdown("Dashboard created for model evaluation and comparison") st.markdown("© 2024 Nexar")