Spaces:

nexar-ai
/

nexar-driving-leaderboard

Running

File size: 24,323 Bytes

import streamlit as st
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from comparison import ModelEvaluator, ModelComparison
import matplotlib.pyplot as plt
import seaborn as sns
import io
import os
import base64

# Page config
st.set_page_config(
    page_title="Nexar Driving Leaderboard",
    page_icon="nexar_logo.png",
    layout="wide"
)

# Custom styling
st.markdown("""
    <style>
    .main { padding: 2rem; }
    .stTabs [data-baseweb="tab-list"] { gap: 8px; }
    .stTabs [data-baseweb="tab"] {
        padding: 8px 16px;
        border-radius: 4px;
    }
    .metric-card {
        background-color: #f8f9fa;
        padding: 20px;
        border-radius: 10px;
        box-shadow: 0 2px 4px rgba(0,0,0,0.1);
    }
    </style>
""", unsafe_allow_html=True)

# Header
col1, col2 = st.columns([0.15, 0.85])
with col1:
    st.image("nexar_logo.png", width=600)
with col2:
    st.title("Driving Leaderboard")

# Data loading function
@st.cache_data
def load_data(directory='results', labels_filename='Labels.csv'):
    labels_path = os.path.join(directory, labels_filename)
    df_labels = pd.read_csv(labels_path)

    evaluators = []
    for filename in os.listdir(directory):
        if filename.endswith('.csv') and filename != labels_filename:
            model_name = os.path.splitext(filename)[0]
            df_model = pd.read_csv(os.path.join(directory, filename))
            evaluator = ModelEvaluator(df_labels, df_model, model_name)
            evaluators.append(evaluator)

    model_comparison = ModelComparison(evaluators)
    return model_comparison

# Initialize session state
if 'model_comparison' not in st.session_state:
    st.session_state.model_comparison = load_data()
    st.session_state.leaderboard_df = st.session_state.model_comparison.transform_to_leaderboard()
    st.session_state.combined_df = st.session_state.model_comparison.combined_df

# Create tabs
tab1, tab2, tab3, tab4 = st.tabs([
    "📈 Leaderboard", 
    "📊 Class Performance",
    "🔍 Detailed Metrics",
    "⚖️ Model Comparison"
])

def style_dataframe(df, highlight_first_column=True, show_progress_bars=True):
    numeric_cols = df.select_dtypes(include=['float64']).columns
    
    def color_background(val):
        """Return background color style based on value"""
        return f'background-color: rgba({int(255 * (1 - val))}, {int(255 * val)}, 0, 0.2)'
    
    def apply_colors_to_series(s):
        """Apply color gradient to a series of values"""
        if len(s) == 0:
            return []
        normalized = (s - s.min()) / (s.max() - s.min()) if s.max() != s.min() else [0.5] * len(s)
        return [color_background(val) for val in normalized]
    
    styled = df.style.format({col: '{:.2f}%' for col in numeric_cols})
    
    # First apply highlighting to first column if needed
    if highlight_first_column and len(numeric_cols) > 0:
        first_numeric_col = numeric_cols[0]
        styled = styled.apply(lambda x: [
            'background-color: rgba(74, 144, 226, 0.2)' if col == first_numeric_col else '' 
            for col in df.columns
        ], axis=1)
    
    # Then apply color gradients if needed
    if show_progress_bars:
        for col in numeric_cols:
            styled = styled.apply(lambda s: apply_colors_to_series(s), subset=[col])
    
    styled = styled.set_properties(**{
        'padding': '10px',
        'border': '1px solid #dee2e6',
        'text-align': 'center'
    })
    
    styled = styled.set_table_styles([
        {'selector': 'th', 'props': [
            ('background-color', '#4a90e2'),
            ('color', 'white'),
            ('font-weight', 'bold'),
            ('padding', '10px'),
            ('text-align', 'center')
        ]},
        {'selector': 'tr:hover', 'props': [
            ('background-color', '#edf2f7')
        ]}
    ])
    
    return styled

def style_comparison_dataframe(df):
    """Style dataframe specifically for model comparison tables"""
    # Format all numeric columns as percentages
    numeric_cols = df.select_dtypes(include=['float64']).columns
    
    styled = df.style.format({col: '{:.2f}%' for col in numeric_cols})
    
    def color_difference(x):
        """Color the difference column from red to green"""
        if pd.isna(x):
            return ''
        # Normalize the value to a -1 to 1 scale for coloring
        normalized = max(min(x / 10, 1), -1)  # Scale of ±10%
        if normalized > 0:
            return f'background-color: rgba(0, 128, 0, {abs(normalized) * 0.3})'
        else:
            return f'background-color: rgba(255, 0, 0, {abs(normalized) * 0.3})'
    
    # Apply color gradient only to the 'Difference' column
    if 'Difference' in df.columns:
        styled = styled.applymap(color_difference, subset=['Difference'])
    
    styled = styled.set_properties(**{
        'padding': '10px',
        'border': '1px solid #dee2e6',
        'text-align': 'center'
    })
    
    styled = styled.set_table_styles([
        {'selector': 'th', 'props': [
            ('background-color', '#4a90e2'),
            ('color', 'white'),
            ('font-weight', 'bold'),
            ('padding', '10px'),
            ('text-align', 'center')
        ]},
        {'selector': 'tr:hover', 'props': [
            ('background-color', '#edf2f7')
        ]}
    ])
    
    return styled
    
# Tab 1: Leaderboard
with tab1:

    st.subheader("Model Performance Leaderboard")
    
    st.markdown("""
        **Welcome to the Nexar Driving Leaderboard!**  
        
        This dashboard compares the performance of various AI models in detecting driving incidents.  
        The models are evaluated based on key metrics such as F1 Score, Precision, and Recall.
        You can sort the table by different metrics using the dropdown menu.
    """)

    st.markdown("""
    The table below ranks models based on their ability to detect driving events.  
    Use the dropdown below to sort by a specific metric.
    """)
    
    sort_col = st.selectbox(
        "Sort by metric:",
        options=[col for col in st.session_state.leaderboard_df.columns if col not in ['Rank', 'Model']],
        key='leaderboard_sort'
    )
    
    sorted_df = st.session_state.leaderboard_df.sort_values(by=sort_col, ascending=False)
    
    st.dataframe(
        style_dataframe(sorted_df),
        use_container_width=True,
    )
    
    metrics = ['F1 Score', 'Precision', 'Recall']
    selected_metric = st.selectbox("Select Metric for Category Analysis:", metrics)
    
    category_data = st.session_state.combined_df[
        st.session_state.combined_df['Class'].str.contains('Overall')
    ]
    
    fig = px.bar(
        category_data,
        x='Category',
        y=selected_metric,
        color='Model',
        barmode='group',
        title=f'Category-level {selected_metric} by Model',
    )
    
    fig.update_layout(
        xaxis_title="Category",
        yaxis_title=selected_metric,
        legend_title="Model"
    )
    
    st.plotly_chart(fig, use_container_width=True)

# Tab 2: Class Performance
with tab2:
    st.subheader("Class-Level Performance Analysis")
    
    st.markdown("""
        This section provides a detailed breakdown of model performance across specific event classes.  
        You can select a category, metric, and models to compare their effectiveness in recognizing  
        different types of driving incidents.
    """)

    categories = st.session_state.combined_df['Category'].unique()

    col1, col2, col3 = st.columns(3)
    with col1:
        selected_category = st.selectbox(
            "Select Category:",
            categories,
            key='class_category'
        )
    with col2:
        selected_metric = st.selectbox(
            "Select Metric:",
            metrics,
            key='class_metric'
        )
    with col3:
        selected_models = st.multiselect(
            "Select Models:",
            st.session_state.combined_df['Model'].unique(),
            default=st.session_state.combined_df['Model'].unique()
        )
    
    # Create a consistent color mapping for all models
    plotly_colors = ['#636EFA', '#EF553B', '#00CC96', '#AB63FA', '#FFA15A', '#19D3F3', '#FF6692', '#B6E880', '#FF97FF', '#FECB52']
    model_colors = {model: plotly_colors[i % len(plotly_colors)] for i, model in enumerate(sorted(st.session_state.combined_df['Model'].unique()))}
    
    class_data = st.session_state.combined_df[
        (st.session_state.combined_df['Category'] == selected_category) &
        (~st.session_state.combined_df['Class'].str.contains('Overall')) &
        (st.session_state.combined_df['Model'].isin(selected_models))
    ]
    
    # Bar chart with consistent colors
    fig = px.bar(
        class_data,
        x='Class',
        y=selected_metric,
        color='Model',
        barmode='group',
        title=f'{selected_metric} by Class for {selected_category}',
        color_discrete_map=model_colors,
        range_y=[0, 1] if selected_metric in ['F1 Score', 'Precision', 'Recall'] else None
    )
    st.plotly_chart(fig, use_container_width=True)
    
    # Calculate how many columns we need (aim for about 4-5 models per row)
    models_per_row = 4
    num_rows = (len(selected_models) + models_per_row - 1) // models_per_row
    
    st.markdown("### Select Models to Display:")
    
    # Create toggles for models using st.columns
    for row in range(num_rows):
        cols = st.columns(models_per_row)
        for col_idx in range(models_per_row):
            model_idx = row * models_per_row + col_idx
            if model_idx < len(selected_models):
                model = selected_models[model_idx]
                container = cols[col_idx].container()
                
                # Get the consistent color for this model
                color = model_colors[model]
                
                # Initialize toggle state if needed
                toggle_key = f"toggle_{model}"
                if toggle_key not in st.session_state:
                    st.session_state[toggle_key] = True
                
                # Create colored legend item with HTML
                container.markdown(
                    f"""
                    <div style='display: flex; align-items: center; margin-bottom: -40px; pointer-events: none;'>
                        <span style='display: inline-block; width: 12px; height: 12px; background-color: {color}; border-radius: 50%; margin-right: 8px;'></span>
                    </div>
                    """, 
                    unsafe_allow_html=True
                )
                
                # Create the checkbox without reassigning to session state
                container.checkbox(
                    f"    {model}",  # Add some spacing to account for the circle
                    value=st.session_state[toggle_key],
                    key=toggle_key  # Use toggle_key directly as the key
                )
    
    # Individual Precision-Recall plots for each class
    unique_classes = class_data['Class'].unique()
    num_classes = len(unique_classes)
    
    # Calculate number of rows needed (3 plots per row)
    num_rows = (num_classes + 2) // 3  # Using ceiling division
    
    # Create plots row by row
    for row in range(num_rows):
        cols = st.columns(3)
        for col_idx in range(3):
            class_idx = row * 3 + col_idx
            if class_idx < num_classes:
                current_class = unique_classes[class_idx]
                
                # Filter data based on visible models
                visible_models = [model for model in selected_models 
                                if st.session_state[f"toggle_{model}"]]
                
                class_specific_data = class_data[
                    (class_data['Class'] == current_class) & 
                    (class_data['Model'].isin(visible_models))
                ]
                
                fig = px.scatter(
                    class_specific_data,
                    x='Precision',
                    y='Recall',
                    color='Model',
                    title=f'Precision vs Recall: {current_class}',
                    height=300,
                    color_discrete_map=model_colors  # Use consistent colors
                )
                
                # Update layout for better visibility
                fig.update_layout(
                    xaxis_range=[0, 1],
                    yaxis_range=[0, 1],
                    margin=dict(l=40, r=40, t=40, b=40),
                    showlegend=False  # Hide individual legends
                )
                
                # Add diagonal reference line
                fig.add_trace(
                    go.Scatter(
                        x=[0, 1],
                        y=[0, 1],
                        mode='lines',
                        line=dict(dash='dash', color='gray'),
                        showlegend=False
                    )
                )
                
                cols[col_idx].plotly_chart(fig, use_container_width=True)

# Tab 3: Detailed Metrics
with tab3:
    st.subheader("Detailed Metrics Analysis")
    
    selected_model = st.selectbox(
        "Select Model for Detailed Analysis:",
        st.session_state.combined_df['Model'].unique()
    )
    
    model_data = st.session_state.combined_df[
        st.session_state.combined_df['Model'] == selected_model
    ]
    
    # Create metrics tables
    st.markdown("### Performance Metrics by Category")
    
    # Get unique categories and relevant classes for each category
    categories = model_data['Category'].unique()
    metrics = ['F1 Score', 'Precision', 'Recall']
    
    # Process data for each category
    for category in categories:
        st.markdown(f"#### {category}")
        
        # Filter data for this category
        category_data = model_data[model_data['Category'] == category].copy()
        
        # Create a clean table for this category
        category_metrics = pd.DataFrame()
        
        # Get classes for this category (excluding 'Overall' prefix)
        classes = category_data[~category_data['Class'].str.contains('Overall')]['Class'].unique()
        
        # Add the overall metric for this category
        overall_data = category_data[category_data['Class'].str.contains('Overall')]
        
        # Initialize the DataFrame with classes as index
        category_metrics = pd.DataFrame(index=classes)
        
        # Add metrics columns
        for metric in metrics:
            # Add class-specific metrics
            class_metrics = {}
            for class_name in classes:
                class_data = category_data[category_data['Class'] == class_name]
                if not class_data.empty:
                    class_metrics[class_name] = class_data[metric].iloc[0]
            
            category_metrics[metric] = pd.Series(class_metrics)
        
        # Add overall metrics as a separate row
        if not overall_data.empty:
            overall_row = pd.DataFrame({
                metric: [overall_data[metric].iloc[0]] for metric in metrics
            }, index=['Overall'])
            category_metrics = pd.concat([overall_row, category_metrics])
        
        # Display the table
        styled_metrics = style_dataframe(category_metrics.round(4))
        st.dataframe(styled_metrics, use_container_width=True)
        
        # Add spacing between categories
        st.markdown("---")
    
    # Export functionality
    st.markdown("### Export Data")
    
    # Prepare export data
    export_data = pd.DataFrame()
    for category in categories:
        category_data = model_data[model_data['Category'] == category].copy()
        category_metrics = pd.pivot_table(
            category_data,
            index='Class',
            values=metrics,
            aggfunc='first'
        ).round(4)
        export_data = pd.concat([export_data, category_metrics])
    
    # Create download button
    csv = export_data.to_csv().encode()
    st.download_button(
        "Download Detailed Metrics",
        csv,
        f"detailed_metrics_{selected_model}.csv",
        "text/csv",
        key='download-csv'
    )

# Tab 4: Model Comparison
with tab4:
    st.header("Model Comparison Analysis")
    
    st.markdown("""
        Compare two models side by side across different categories.  
        The bar chart visualizes the differences in performance across selected categories,  
        while the scatter plot provides an overview of Precision vs. Recall per class.
    """)
    
    # Create two columns for model selection
    col1, col2 = st.columns(2)
    
    # Model selection dropdown menus
    with col1:
        model1 = st.selectbox(
            "Select First Model:",
            st.session_state.combined_df['Model'].unique(),
            key='model1'
        )
    
    with col2:
        # Filter out the first selected model from options
        available_models = [m for m in st.session_state.combined_df['Model'].unique() if m != model1]
        model2 = st.selectbox(
            "Select Second Model:",
            available_models,
            key='model2'
        )
    
    # Category selection
    selected_category = st.selectbox(
        "Select Category for Comparison:",
        st.session_state.combined_df['Category'].unique(),
        key='compare_category'
    )
    
    # Filter data for both models
    model1_data = st.session_state.combined_df[
        (st.session_state.combined_df['Model'] == model1) &
        (st.session_state.combined_df['Category'] == selected_category)
    ]
    
    model2_data = st.session_state.combined_df[
        (st.session_state.combined_df['Model'] == model2) &
        (st.session_state.combined_df['Category'] == selected_category)
    ]

    # Define metrics list
    metrics = ['F1 Score', 'Precision', 'Recall']
    
    # Create comparison tables section
    st.subheader("Detailed Metrics Comparison")
    
    # Create a table for each metric
    for metric in metrics:
        st.markdown(f"#### {metric} Comparison")
        
        # Prepare data for the metric table
        metric_data = []
        for class_name in model1_data['Class'].unique():
            # Get values for both models
            m1_value = model1_data[model1_data['Class'] == class_name][metric].iloc[0]
            m2_value = model2_data[model2_data['Class'] == class_name][metric].iloc[0]
            diff = m1_value - m2_value
            
            # Add to comparison data
            metric_data.append({
                'Class': class_name,
                model1: m1_value,
                model2: m2_value,
                'Difference': diff
            })
        
        # Create DataFrame for the metric
        metric_df = pd.DataFrame(metric_data)
        
        # Style the table
        def style_metric_table(df):
            return df.style\
                .format({
                    model1: '{:.2f}%',
                    model2: '{:.2f}%',
                    'Difference': '{:+.2f}%'
                })\
                .background_gradient(
                    cmap='RdYlGn',
                    subset=['Difference'],
                    vmin=-10,
                    vmax=10
                )\
                .set_properties(**{
                    'text-align': 'center',
                    'padding': '10px',
                    'border': '1px solid #dee2e6'
                })\
                .set_table_styles([
                    {'selector': 'th', 'props': [
                        ('background-color', '#4a90e2'),
                        ('color', 'white'),
                        ('font-weight', 'bold'),
                        ('text-align', 'center'),
                        ('padding', '10px')
                    ]}
                ])
        
        # Display the styled table
        def color_negative_positive(val):
            try:
                color = 'green' if float(val) > 0 else 'red' if float(val) < 0 else 'black'
                return f'color: {color}'
            except:
                return ''
        
        styled_df = metric_df.style\
            .applymap(color_negative_positive)\
            .format(precision=2)
        
        st.dataframe(styled_df, use_container_width=True)

        # Add visual separator
        st.markdown("---")
    
    # Visualizations section
    st.subheader("Visual Performance Analysis")
    
    # Metric selector for bar chart
    selected_metric = st.selectbox(
        "Select Metric for Comparison:",
        metrics,
        key='compare_metric'
    )
    
    # Prepare data for bar chart
    comparison_data = pd.DataFrame()
    
    # Get data for both models
    for idx, (model_name, model_data) in enumerate([(model1, model1_data), (model2, model2_data)]):
        # Filter out Overall classes and select relevant columns
        model_metrics = model_data[~model_data['Class'].str.contains('Overall', na=False)][['Class', selected_metric]]
        model_metrics = model_metrics.rename(columns={selected_metric: model_name})
        
        # Merge with existing data or create new DataFrame
        if idx == 0:
            comparison_data = model_metrics
        else:
            comparison_data = comparison_data.merge(model_metrics, on='Class', how='outer')
    
    # Create bar chart
    fig_bar = go.Figure()
    
    # Add bars for first model
    fig_bar.add_trace(go.Bar(
        name=model1,
        x=comparison_data['Class'],
        y=comparison_data[model1],
        marker_color='rgb(55, 83, 109)'
    ))
    
    # Add bars for second model
    fig_bar.add_trace(go.Bar(
        name=model2,
        x=comparison_data['Class'],
        y=comparison_data[model2],
        marker_color='rgb(26, 118, 255)'
    ))
    
    # Update bar chart layout
    fig_bar.update_layout(
        title=f"{selected_metric} Comparison by Class",
        xaxis_title="Class",
        yaxis_title=f"{selected_metric} (%)",
        barmode='group',
        xaxis_tickangle=-45,
        height=500,
        showlegend=True,
        legend=dict(
            yanchor="top",
            y=0.99,
            xanchor="right",
            x=0.99
        ),
        yaxis=dict(range=[0, 1])
    )
    
    # Display bar chart
    st.plotly_chart(fig_bar, use_container_width=True)
    
    # Create Precision-Recall scatter plot
    st.markdown("#### Precision-Recall Analysis")
    
    # Filter data for scatter plot
    model1_scatter = model1_data[~model1_data['Class'].str.contains('Overall', na=False)]
    model2_scatter = model2_data[~model2_data['Class'].str.contains('Overall', na=False)]
    
    # Create scatter plot
    fig_scatter = go.Figure()
    
    # Add scatter points for first model
    fig_scatter.add_trace(go.Scatter(
        x=model1_scatter['Precision']*100,
        y=model1_scatter['Recall']*100,
        mode='markers+text',
        name=model1,
        text=model1_scatter['Class'],
        textposition="top center",
        marker=dict(size=10)
    ))
    
    # Add scatter points for second model
    fig_scatter.add_trace(go.Scatter(
        x=model2_scatter['Precision']*100,
        y=model2_scatter['Recall']*100,
        mode='markers+text',
        name=model2,
        text=model2_scatter['Class'],
        textposition="top center",
        marker=dict(size=10)
    ))
    
    # Add reference line
    fig_scatter.add_trace(go.Scatter(
        x=[0, 100],
        y=[0, 100],
        mode='lines',
        line=dict(dash='dash', color='gray'),
        showlegend=False
    ))
    
    # Update scatter plot layout
    fig_scatter.update_layout(
        title="Precision vs Recall Analysis by Class",
        xaxis_title="Precision (%)",
        yaxis_title="Recall (%)",
        xaxis=dict(range=[0, 100]),
        yaxis=dict(range=[0, 100]),
        height=600,
        showlegend=True,
        legend=dict(
            yanchor="top",
            y=0.99,
            xanchor="right",
            x=0.99
        )
    )
    
    # Display scatter plot
    st.plotly_chart(fig_scatter, use_container_width=True)


# Footer
st.markdown("---")
st.markdown("Dashboard created for model evaluation and comparison")
st.markdown("© 2024 Nexar")