Spaces:

nexar-ai
/

nexar-driving-leaderboard

Running

App Files Files Community

Roni Goldshmidt commited on Feb 11

Commit

dc5408d

1 Parent(s): 154bb23

Initial leaderboard setup

Browse files

Files changed (2) hide show

.ipynb_checkpoints/app-checkpoint.py +132 -205
app.py +132 -205

.ipynb_checkpoints/app-checkpoint.py CHANGED Viewed

@@ -9,40 +9,15 @@ import io
 import os
 import base64
-# Page config
-st.set_page_config(
-    page_title="Nexar Driving Leaderboard",
-    page_icon="nexar_logo.png",
-    layout="wide"
-)
-# Custom styling
-st.markdown("""
-    <style>
-    .main { padding: 2rem; }
-    .stTabs [data-baseweb="tab-list"] { gap: 8px; }
-    .stTabs [data-baseweb="tab"] {
-        padding: 8px 16px;
-        border-radius: 4px;
-    }
-    .metric-card {
-        background-color: #f8f9fa;
-        padding: 20px;
-        border-radius: 10px;
-        box-shadow: 0 2px 4px rgba(0,0,0,0.1);
-    }
-    </style>
-""", unsafe_allow_html=True)
-# Header
-col1, col2 = st.columns([0.16, 0.84])
-with col1:
-    st.image("nexar_logo.png", width=600)
-with col2:
-    st.title("Driving Leaderboard")
 # Data loading function
-@st.cache_data(experimental_allow_widgets=True)
 def load_data(directory='results', labels_filename='Labels.csv'):
     labels_path = os.path.join(directory, labels_filename)
     df_labels = pd.read_csv(labels_path)
@@ -58,15 +33,7 @@ def load_data(directory='results', labels_filename='Labels.csv'):
     model_comparison = ModelComparison(evaluators)
     return model_comparison
-# Initialize session state
-if 'model_comparison' not in st.session_state:
-    st.session_state.model_comparison = load_data()
-    st.session_state.leaderboard_df = st.session_state.model_comparison.transform_to_leaderboard()
-    st.session_state.combined_df = st.session_state.model_comparison.combined_df
-# Create tabs
-tab1, tab2, tab3, tab4 = st.tabs(["📈 Leaderboard", "📊 Class Performance", "🔍 Detailed Metrics", "⚖️ Model Comparison"])
 def style_dataframe(df, highlight_first_column=True, show_progress_bars=True):
     numeric_cols = df.select_dtypes(include=['float64']).columns
@@ -110,8 +77,70 @@ def style_dataframe(df, highlight_first_column=True, show_progress_bars=True):
         ])
     return styled
-# Tab 1: Leaderboard
-with tab1:
     st.subheader("Model Performance Leaderboard")
     sort_col = st.selectbox(
@@ -121,11 +150,7 @@ with tab1:
     )
     sorted_df = st.session_state.leaderboard_df.sort_values(by=sort_col, ascending=False)
-    st.dataframe(
-        style_dataframe(sorted_df),
-        use_container_width=True,
-    )
     metrics = ['F1 Score', 'Precision', 'Recall']
     selected_metric = st.selectbox("Select Metric for Category Analysis:", metrics)
@@ -151,11 +176,10 @@ with tab1:
     st.plotly_chart(fig, use_container_width=True)
-# Tab 2: Class Performance
-with tab2:
     st.subheader("Class-level Performance")
     categories = st.session_state.combined_df['Category'].unique()
     col1, col2, col3 = st.columns(3)
     with col1:
         selected_category = st.selectbox(
@@ -170,23 +194,26 @@ with tab2:
             key='class_metric'
         )
     with col3:
         selected_models = st.multiselect(
             "Select Models:",
-            st.session_state.combined_df['Model'].unique(),
-            default=st.session_state.combined_df['Model'].unique()
         )
-    # Create a consistent color mapping for all models
     plotly_colors = ['#636EFA', '#EF553B', '#00CC96', '#AB63FA', '#FFA15A', '#19D3F3', '#FF6692', '#B6E880', '#FF97FF', '#FECB52']
-    model_colors = {model: plotly_colors[i % len(plotly_colors)] for i, model in enumerate(sorted(st.session_state.combined_df['Model'].unique()))}
     class_data = st.session_state.combined_df[
         (st.session_state.combined_df['Category'] == selected_category) &
         (~st.session_state.combined_df['Class'].str.contains('Overall')) &
         (st.session_state.combined_df['Model'].isin(selected_models))
     ]
-    # Bar chart with consistent colors
     fig = px.bar(
         class_data,
         x='Class',
@@ -198,13 +225,11 @@ with tab2:
     )
     st.plotly_chart(fig, use_container_width=True)
-    # Calculate how many columns we need (aim for about 4-5 models per row)
     models_per_row = 4
     num_rows = (len(selected_models) + models_per_row - 1) // models_per_row
-    st.markdown("### Select Models to Display:")
-    # Create toggles for models using st.columns
     for row in range(num_rows):
         cols = st.columns(models_per_row)
         for col_idx in range(models_per_row):
@@ -212,50 +237,44 @@ with tab2:
             if model_idx < len(selected_models):
                 model = selected_models[model_idx]
                 container = cols[col_idx].container()
-                # Get the consistent color for this model
                 color = model_colors[model]
-                # Initialize toggle state if needed
-                toggle_key = f"toggle_{model}"
-                if toggle_key not in st.session_state:
-                    st.session_state[toggle_key] = True
-                # Create colored legend item with HTML
                 container.markdown(
                     f"""
-                    <div style='display: flex; align-items: center; margin-bottom: -40px; pointer-events: none;'>
                         <span style='display: inline-block; width: 12px; height: 12px; background-color: {color}; border-radius: 50%; margin-right: 8px;'></span>
                     </div>
                     """,
                     unsafe_allow_html=True
                 )
-                # Create the checkbox without reassigning to session state
-                container.checkbox(
-                    f"    {model}",  # Add some spacing to account for the circle
-                    value=st.session_state[toggle_key],
-                    key=toggle_key  # Use toggle_key directly as the key
                 )
-    # Individual Precision-Recall plots for each class
     unique_classes = class_data['Class'].unique()
     num_classes = len(unique_classes)
-    # Calculate number of rows needed (3 plots per row)
-    num_rows = (num_classes + 2) // 3  # Using ceiling division
-    # Create plots row by row
-    for row in range(num_rows):
-        cols = st.columns(3)
-        for col_idx in range(3):
-            class_idx = row * 3 + col_idx
             if class_idx < num_classes:
                 current_class = unique_classes[class_idx]
-                # Filter data based on visible models
                 visible_models = [model for model in selected_models
-                                if st.session_state[f"toggle_{model}"]]
                 class_specific_data = class_data[
                     (class_data['Class'] == current_class) &
@@ -269,18 +288,16 @@ with tab2:
                     color='Model',
                     title=f'Precision vs Recall: {current_class}',
                     height=300,
-                    color_discrete_map=model_colors  # Use consistent colors
                 )
-                # Update layout for better visibility
                 fig.update_layout(
                     xaxis_range=[0, 1],
                     yaxis_range=[0, 1],
                     margin=dict(l=40, r=40, t=40, b=40),
-                    showlegend=False  # Hide individual legends
                 )
-                # Add diagonal reference line
                 fig.add_trace(
                     go.Scatter(
                         x=[0, 1],
@@ -293,74 +310,53 @@ with tab2:
                 cols[col_idx].plotly_chart(fig, use_container_width=True)
-# Tab 3: Detailed Metrics
-with tab3:
     st.subheader("Detailed Metrics Analysis")
     selected_model = st.selectbox(
         "Select Model for Detailed Analysis:",
-        st.session_state.combined_df['Model'].unique()
     )
     model_data = st.session_state.combined_df[
         st.session_state.combined_df['Model'] == selected_model
     ]
-    # Create metrics tables
     st.markdown("### Performance Metrics by Category")
-    # Get unique categories and relevant classes for each category
     categories = model_data['Category'].unique()
     metrics = ['F1 Score', 'Precision', 'Recall']
-    # Process data for each category
     for category in categories:
         st.markdown(f"#### {category}")
-        # Filter data for this category
         category_data = model_data[model_data['Category'] == category].copy()
-        # Create a clean table for this category
-        category_metrics = pd.DataFrame()
-        # Get classes for this category (excluding 'Overall' prefix)
         classes = category_data[~category_data['Class'].str.contains('Overall')]['Class'].unique()
-        # Add the overall metric for this category
         overall_data = category_data[category_data['Class'].str.contains('Overall')]
-        # Initialize the DataFrame with classes as index
         category_metrics = pd.DataFrame(index=classes)
-        # Add metrics columns
         for metric in metrics:
-            # Add class-specific metrics
             class_metrics = {}
             for class_name in classes:
                 class_data = category_data[category_data['Class'] == class_name]
                 if not class_data.empty:
                     class_metrics[class_name] = class_data[metric].iloc[0]
             category_metrics[metric] = pd.Series(class_metrics)
-        # Add overall metrics as a separate row
         if not overall_data.empty:
             overall_row = pd.DataFrame({
                 metric: [overall_data[metric].iloc[0]] for metric in metrics
             }, index=['Overall'])
             category_metrics = pd.concat([overall_row, category_metrics])
-        # Display the table
-        styled_metrics = style_dataframe(category_metrics.round(4))
-        st.dataframe(styled_metrics, use_container_width=True)
-        # Add spacing between categories
         st.markdown("---")
     # Export functionality
     st.markdown("### Export Data")
-    # Prepare export data
     export_data = pd.DataFrame()
     for category in categories:
         category_data = model_data[model_data['Category'] == category].copy()
@@ -372,7 +368,6 @@ with tab3:
         ).round(4)
         export_data = pd.concat([export_data, category_metrics])
-    # Create download button
     csv = export_data.to_csv().encode()
     st.download_button(
         "Download Detailed Metrics",
@@ -382,31 +377,25 @@ with tab3:
         key='download-csv'
     )
-# Tab 4: Model Comparison
-with tab4:
     st.header("Model Comparison Analysis")
-    # Create two columns for model selection
     col1, col2 = st.columns(2)
-    # Model selection dropdown menus
     with col1:
         model1 = st.selectbox(
             "Select First Model:",
             st.session_state.combined_df['Model'].unique(),
-            key='model1'
         )
     with col2:
-        # Filter out the first selected model from options
         available_models = [m for m in st.session_state.combined_df['Model'].unique() if m != model1]
         model2 = st.selectbox(
             "Select Second Model:",
             available_models,
-            key='model2'
         )
-    # Category selection
     selected_category = st.selectbox(
         "Select Category for Comparison:",
         st.session_state.combined_df['Category'].unique(),
@@ -423,26 +412,19 @@ with tab4:
         (st.session_state.combined_df['Model'] == model2) &
         (st.session_state.combined_df['Category'] == selected_category)
     ]
-    # Define metrics list
-    metrics = ['F1 Score', 'Precision', 'Recall']
-    # Create comparison tables section
     st.subheader("Detailed Metrics Comparison")
-    # Create a table for each metric
     for metric in metrics:
         st.markdown(f"#### {metric} Comparison")
-        # Prepare data for the metric table
         metric_data = []
         for class_name in model1_data['Class'].unique():
-            # Get values for both models
-            m1_value = model1_data[model1_data['Class'] == class_name][metric].iloc[0]
-            m2_value = model2_data[model2_data['Class'] == class_name][metric].iloc[0]
             diff = m1_value - m2_value
-            # Add to comparison data
             metric_data.append({
                 'Class': class_name,
                 model1: m1_value,
@@ -450,92 +432,46 @@ with tab4:
                 'Difference': diff
             })
-        # Create DataFrame for the metric
         metric_df = pd.DataFrame(metric_data)
-        # Style the table
-        def style_metric_table(df):
-            return df.style\
-                .format({
-                    model1: '{:.2f}%',
-                    model2: '{:.2f}%',
-                    'Difference': '{:+.2f}%'
-                })\
-                .background_gradient(
-                    cmap='RdYlGn',
-                    subset=['Difference'],
-                    vmin=-10,
-                    vmax=10
-                )\
-                .set_properties(**{
-                    'text-align': 'center',
-                    'padding': '10px',
-                    'border': '1px solid #dee2e6'
-                })\
-                .set_table_styles([
-                    {'selector': 'th', 'props': [
-                        ('background-color', '#4a90e2'),
-                        ('color', 'white'),
-                        ('font-weight', 'bold'),
-                        ('text-align', 'center'),
-                        ('padding', '10px')
-                    ]}
-                ])
-        # Display the styled table
-        st.dataframe(
-            style_dataframe(metric_df),
-            use_container_width=True,
-        )
-        # Add visual separator
         st.markdown("---")
-    # Visualizations section
     st.subheader("Visual Performance Analysis")
-    # Metric selector for bar chart
     selected_metric = st.selectbox(
         "Select Metric for Comparison:",
         metrics,
-        key='compare_metric'
     )
-    # Prepare data for bar chart
     comparison_data = pd.DataFrame()
-    # Get data for both models
     for idx, (model_name, model_data) in enumerate([(model1, model1_data), (model2, model2_data)]):
-        # Filter out Overall classes and select relevant columns
         model_metrics = model_data[~model_data['Class'].str.contains('Overall', na=False)][['Class', selected_metric]]
         model_metrics = model_metrics.rename(columns={selected_metric: model_name})
-        # Merge with existing data or create new DataFrame
         if idx == 0:
             comparison_data = model_metrics
         else:
             comparison_data = comparison_data.merge(model_metrics, on='Class', how='outer')
-    # Create bar chart
     fig_bar = go.Figure()
-    # Add bars for first model
     fig_bar.add_trace(go.Bar(
         name=model1,
         x=comparison_data['Class'],
-        y=comparison_data[model1],
         marker_color='rgb(55, 83, 109)'
     ))
-    # Add bars for second model
     fig_bar.add_trace(go.Bar(
         name=model2,
         x=comparison_data['Class'],
-        y=comparison_data[model2],
         marker_color='rgb(26, 118, 255)'
     ))
-    # Update bar chart layout
     fig_bar.update_layout(
         title=f"{selected_metric} Comparison by Class",
         xaxis_title="Class",
@@ -552,23 +488,19 @@ with tab4:
         )
     )
-    # Display bar chart
     st.plotly_chart(fig_bar, use_container_width=True)
-    # Create Precision-Recall scatter plot
     st.markdown("#### Precision-Recall Analysis")
-    # Filter data for scatter plot
     model1_scatter = model1_data[~model1_data['Class'].str.contains('Overall', na=False)]
     model2_scatter = model2_data[~model2_data['Class'].str.contains('Overall', na=False)]
-    # Create scatter plot
     fig_scatter = go.Figure()
-    # Add scatter points for first model
     fig_scatter.add_trace(go.Scatter(
-        x=model1_scatter['Precision']*100,
-        y=model1_scatter['Recall']*100,
         mode='markers+text',
         name=model1,
         text=model1_scatter['Class'],
@@ -576,10 +508,9 @@ with tab4:
         marker=dict(size=10)
     ))
-    # Add scatter points for second model
     fig_scatter.add_trace(go.Scatter(
-        x=model2_scatter['Precision']*100,
-        y=model2_scatter['Recall']*100,
         mode='markers+text',
         name=model2,
         text=model2_scatter['Class'],
@@ -587,7 +518,6 @@ with tab4:
         marker=dict(size=10)
     ))
-    # Add reference line
     fig_scatter.add_trace(go.Scatter(
         x=[0, 100],
         y=[0, 100],
@@ -596,7 +526,6 @@ with tab4:
         showlegend=False
     ))
-    # Update scatter plot layout
     fig_scatter.update_layout(
         title="Precision vs Recall Analysis by Class",
         xaxis_title="Precision (%)",
@@ -613,10 +542,8 @@ with tab4:
         )
     )
-    # Display scatter plot
     st.plotly_chart(fig_scatter, use_container_width=True)
 # Footer
 st.markdown("---")
 st.markdown("Dashboard created for model evaluation and comparison")

 import os
 import base64
+# Initialize session state
+if 'active_tab' not in st.session_state:
+    st.session_state.active_tab = "📈 Leaderboard"
+if 'toggle_states' not in st.session_state:
+    st.session_state.toggle_states = {}
 # Data loading function
+@st.cache_data
 def load_data(directory='results', labels_filename='Labels.csv'):
     labels_path = os.path.join(directory, labels_filename)
     df_labels = pd.read_csv(labels_path)
     model_comparison = ModelComparison(evaluators)
     return model_comparison
+# Helper functions for styling
 def style_dataframe(df, highlight_first_column=True, show_progress_bars=True):
     numeric_cols = df.select_dtypes(include=['float64']).columns
         ])
     return styled
+# Toggle state management
+def get_toggle_state(model_name):
+    key = f"toggle_{model_name}"
+    if key not in st.session_state.toggle_states:
+        st.session_state.toggle_states[key] = True
+    return st.session_state.toggle_states[key]
+def set_toggle_state(model_name, value):
+    key = f"toggle_{model_name}"
+    st.session_state.toggle_states[key] = value
+# Page configuration
+st.set_page_config(
+    page_title="Nexar Driving Leaderboard",
+    page_icon="nexar_logo.png",
+    layout="wide"
+)
+# Custom styling
+st.markdown("""
+    <style>
+    .main { padding: 2rem; }
+    .stTabs [data-baseweb="tab-list"] { gap: 8px; }
+    .stTabs [data-baseweb="tab"] {
+        padding: 8px 16px;
+        border-radius: 4px;
+    }
+    .metric-card {
+        background-color: #f8f9fa;
+        padding: 20px;
+        border-radius: 10px;
+        box-shadow: 0 2px 4px rgba(0,0,0,0.1);
+    }
+    </style>
+""", unsafe_allow_html=True)
+# Header
+col1, col2 = st.columns([0.16, 0.84])
+with col1:
+    st.image("nexar_logo.png", width=600)
+with col2:
+    st.title("Driving Leaderboard")
+# Initialize data in session state
+if 'model_comparison' not in st.session_state:
+    st.session_state.model_comparison = load_data()
+    st.session_state.leaderboard_df = st.session_state.model_comparison.transform_to_leaderboard()
+    st.session_state.combined_df = st.session_state.model_comparison.combined_df
+# Tab callback
+def handle_tab_change(tab_name):
+    st.session_state.active_tab = tab_name
+# Define tab names
+tab_names = ["📈 Leaderboard", "📊 Class Performance", "🔍 Detailed Metrics", "⚖️ Model Comparison"]
+# Create tabs
+selected_tab = st.radio("", tab_names, key="tab_selector",
+                       horizontal=True, label_visibility="collapsed",
+                       index=tab_names.index(st.session_state.active_tab))
+handle_tab_change(selected_tab)
+# Content based on selected tab
+if st.session_state.active_tab == "📈 Leaderboard":
     st.subheader("Model Performance Leaderboard")
     sort_col = st.selectbox(
     )
     sorted_df = st.session_state.leaderboard_df.sort_values(by=sort_col, ascending=False)
+    st.dataframe(style_dataframe(sorted_df), use_container_width=True)
     metrics = ['F1 Score', 'Precision', 'Recall']
     selected_metric = st.selectbox("Select Metric for Category Analysis:", metrics)
     st.plotly_chart(fig, use_container_width=True)
+elif st.session_state.active_tab == "📊 Class Performance":
     st.subheader("Class-level Performance")
     categories = st.session_state.combined_df['Category'].unique()
+    metrics = ['F1 Score', 'Precision', 'Recall']
     col1, col2, col3 = st.columns(3)
     with col1:
         selected_category = st.selectbox(
             key='class_metric'
         )
     with col3:
+        all_models = sorted(st.session_state.combined_df['Model'].unique())
         selected_models = st.multiselect(
             "Select Models:",
+            all_models,
+            default=all_models,
+            key='selected_models'
         )
+    # Create consistent color mapping
     plotly_colors = ['#636EFA', '#EF553B', '#00CC96', '#AB63FA', '#FFA15A', '#19D3F3', '#FF6692', '#B6E880', '#FF97FF', '#FECB52']
+    model_colors = {model: plotly_colors[i % len(plotly_colors)] for i, model in enumerate(all_models)}
+    # Filter data
     class_data = st.session_state.combined_df[
         (st.session_state.combined_df['Category'] == selected_category) &
         (~st.session_state.combined_df['Class'].str.contains('Overall')) &
         (st.session_state.combined_df['Model'].isin(selected_models))
     ]
+    # Bar chart
     fig = px.bar(
         class_data,
         x='Class',
     )
     st.plotly_chart(fig, use_container_width=True)
+    # Model toggles
+    st.markdown("### Model Visibility Controls")
     models_per_row = 4
     num_rows = (len(selected_models) + models_per_row - 1) // models_per_row
     for row in range(num_rows):
         cols = st.columns(models_per_row)
         for col_idx in range(models_per_row):
             if model_idx < len(selected_models):
                 model = selected_models[model_idx]
                 container = cols[col_idx].container()
                 color = model_colors[model]
+                # Create colored indicator
                 container.markdown(
                     f"""
+                    <div style='display: flex; align-items: center; margin-bottom: -40px;'>
                         <span style='display: inline-block; width: 12px; height: 12px; background-color: {color}; border-radius: 50%; margin-right: 8px;'></span>
                     </div>
                     """,
                     unsafe_allow_html=True
                 )
+                # Toggle checkbox
+                value = container.checkbox(
+                    f"    {model}",
+                    value=get_toggle_state(model),
+                    key=f"vis_{model}",
+                    on_change=set_toggle_state,
+                    args=(model, not get_toggle_state(model))
                 )
+    # Precision-Recall plots
+    st.markdown("### Precision-Recall Analysis by Class")
     unique_classes = class_data['Class'].unique()
     num_classes = len(unique_classes)
+    plots_per_row = 3
+    num_plot_rows = (num_classes + plots_per_row - 1) // plots_per_row
+    for row in range(num_plot_rows):
+        cols = st.columns(plots_per_row)
+        for col_idx in range(plots_per_row):
+            class_idx = row * plots_per_row + col_idx
             if class_idx < num_classes:
                 current_class = unique_classes[class_idx]
+                # Get visible models
                 visible_models = [model for model in selected_models
+                                if get_toggle_state(model)]
                 class_specific_data = class_data[
                     (class_data['Class'] == current_class) &
                     color='Model',
                     title=f'Precision vs Recall: {current_class}',
                     height=300,
+                    color_discrete_map=model_colors
                 )
                 fig.update_layout(
                     xaxis_range=[0, 1],
                     yaxis_range=[0, 1],
                     margin=dict(l=40, r=40, t=40, b=40),
+                    showlegend=False
                 )
                 fig.add_trace(
                     go.Scatter(
                         x=[0, 1],
                 cols[col_idx].plotly_chart(fig, use_container_width=True)
+elif st.session_state.active_tab == "🔍 Detailed Metrics":
     st.subheader("Detailed Metrics Analysis")
     selected_model = st.selectbox(
         "Select Model for Detailed Analysis:",
+        st.session_state.combined_df['Model'].unique(),
+        key='detailed_model'
     )
     model_data = st.session_state.combined_df[
         st.session_state.combined_df['Model'] == selected_model
     ]
     st.markdown("### Performance Metrics by Category")
     categories = model_data['Category'].unique()
     metrics = ['F1 Score', 'Precision', 'Recall']
     for category in categories:
         st.markdown(f"#### {category}")
         category_data = model_data[model_data['Category'] == category].copy()
+        # Get classes excluding Overall
         classes = category_data[~category_data['Class'].str.contains('Overall')]['Class'].unique()
         overall_data = category_data[category_data['Class'].str.contains('Overall')]
+        # Create metrics DataFrame
         category_metrics = pd.DataFrame(index=classes)
         for metric in metrics:
             class_metrics = {}
             for class_name in classes:
                 class_data = category_data[category_data['Class'] == class_name]
                 if not class_data.empty:
                     class_metrics[class_name] = class_data[metric].iloc[0]
             category_metrics[metric] = pd.Series(class_metrics)
+        # Add overall metrics
         if not overall_data.empty:
             overall_row = pd.DataFrame({
                 metric: [overall_data[metric].iloc[0]] for metric in metrics
             }, index=['Overall'])
             category_metrics = pd.concat([overall_row, category_metrics])
+        st.dataframe(style_dataframe(category_metrics.round(4)), use_container_width=True)
         st.markdown("---")
     # Export functionality
     st.markdown("### Export Data")
     export_data = pd.DataFrame()
     for category in categories:
         category_data = model_data[model_data['Category'] == category].copy()
         ).round(4)
         export_data = pd.concat([export_data, category_metrics])
     csv = export_data.to_csv().encode()
     st.download_button(
         "Download Detailed Metrics",
         key='download-csv'
     )
+elif st.session_state.active_tab == "⚖️ Model Comparison":
     st.header("Model Comparison Analysis")
     col1, col2 = st.columns(2)
     with col1:
         model1 = st.selectbox(
             "Select First Model:",
             st.session_state.combined_df['Model'].unique(),
+            key='compare_model1'
         )
     with col2:
         available_models = [m for m in st.session_state.combined_df['Model'].unique() if m != model1]
         model2 = st.selectbox(
             "Select Second Model:",
             available_models,
+            key='compare_model2'
         )
     selected_category = st.selectbox(
         "Select Category for Comparison:",
         st.session_state.combined_df['Category'].unique(),
         (st.session_state.combined_df['Model'] == model2) &
         (st.session_state.combined_df['Category'] == selected_category)
     ]
     st.subheader("Detailed Metrics Comparison")
+    metrics = ['F1 Score', 'Precision', 'Recall']
     for metric in metrics:
         st.markdown(f"#### {metric} Comparison")
         metric_data = []
         for class_name in model1_data['Class'].unique():
+            m1_value = model1_data[model1_data['Class'] == class_name][metric].iloc[0] * 100
+            m2_value = model2_data[model2_data['Class'] == class_name][metric].iloc[0] * 100
             diff = m1_value - m2_value
             metric_data.append({
                 'Class': class_name,
                 model1: m1_value,
                 'Difference': diff
             })
         metric_df = pd.DataFrame(metric_data)
+        st.dataframe(style_dataframe(metric_df), use_container_width=True)
         st.markdown("---")
+    # Visual comparison
     st.subheader("Visual Performance Analysis")
     selected_metric = st.selectbox(
         "Select Metric for Comparison:",
         metrics,
+        key='visual_compare_metric'
     )
+    # Prepare data for visualization
     comparison_data = pd.DataFrame()
     for idx, (model_name, model_data) in enumerate([(model1, model1_data), (model2, model2_data)]):
         model_metrics = model_data[~model_data['Class'].str.contains('Overall', na=False)][['Class', selected_metric]]
         model_metrics = model_metrics.rename(columns={selected_metric: model_name})
         if idx == 0:
             comparison_data = model_metrics
         else:
             comparison_data = comparison_data.merge(model_metrics, on='Class', how='outer')
+    # Bar chart
     fig_bar = go.Figure()
     fig_bar.add_trace(go.Bar(
         name=model1,
         x=comparison_data['Class'],
+        y=comparison_data[model1] * 100,
         marker_color='rgb(55, 83, 109)'
     ))
     fig_bar.add_trace(go.Bar(
         name=model2,
         x=comparison_data['Class'],
+        y=comparison_data[model2] * 100,
         marker_color='rgb(26, 118, 255)'
     ))
     fig_bar.update_layout(
         title=f"{selected_metric} Comparison by Class",
         xaxis_title="Class",
         )
     )
     st.plotly_chart(fig_bar, use_container_width=True)
+    # Precision-Recall Analysis
     st.markdown("#### Precision-Recall Analysis")
     model1_scatter = model1_data[~model1_data['Class'].str.contains('Overall', na=False)]
     model2_scatter = model2_data[~model2_data['Class'].str.contains('Overall', na=False)]
     fig_scatter = go.Figure()
     fig_scatter.add_trace(go.Scatter(
+        x=model1_scatter['Precision'] * 100,
+        y=model1_scatter['Recall'] * 100,
         mode='markers+text',
         name=model1,
         text=model1_scatter['Class'],
         marker=dict(size=10)
     ))
     fig_scatter.add_trace(go.Scatter(
+        x=model2_scatter['Precision'] * 100,
+        y=model2_scatter['Recall'] * 100,
         mode='markers+text',
         name=model2,
         text=model2_scatter['Class'],
         marker=dict(size=10)
     ))
     fig_scatter.add_trace(go.Scatter(
         x=[0, 100],
         y=[0, 100],
         showlegend=False
     ))
     fig_scatter.update_layout(
         title="Precision vs Recall Analysis by Class",
         xaxis_title="Precision (%)",
         )
     )
     st.plotly_chart(fig_scatter, use_container_width=True)
 # Footer
 st.markdown("---")
 st.markdown("Dashboard created for model evaluation and comparison")

app.py CHANGED Viewed

@@ -9,40 +9,15 @@ import io
 import os
 import base64
-# Page config
-st.set_page_config(
-    page_title="Nexar Driving Leaderboard",
-    page_icon="nexar_logo.png",
-    layout="wide"
-)
-# Custom styling
-st.markdown("""
-    <style>
-    .main { padding: 2rem; }
-    .stTabs [data-baseweb="tab-list"] { gap: 8px; }
-    .stTabs [data-baseweb="tab"] {
-        padding: 8px 16px;
-        border-radius: 4px;
-    }
-    .metric-card {
-        background-color: #f8f9fa;
-        padding: 20px;
-        border-radius: 10px;
-        box-shadow: 0 2px 4px rgba(0,0,0,0.1);
-    }
-    </style>
-""", unsafe_allow_html=True)
-# Header
-col1, col2 = st.columns([0.16, 0.84])
-with col1:
-    st.image("nexar_logo.png", width=600)
-with col2:
-    st.title("Driving Leaderboard")
 # Data loading function
-@st.cache_data(experimental_allow_widgets=True)
 def load_data(directory='results', labels_filename='Labels.csv'):
     labels_path = os.path.join(directory, labels_filename)
     df_labels = pd.read_csv(labels_path)
@@ -58,15 +33,7 @@ def load_data(directory='results', labels_filename='Labels.csv'):
     model_comparison = ModelComparison(evaluators)
     return model_comparison
-# Initialize session state
-if 'model_comparison' not in st.session_state:
-    st.session_state.model_comparison = load_data()
-    st.session_state.leaderboard_df = st.session_state.model_comparison.transform_to_leaderboard()
-    st.session_state.combined_df = st.session_state.model_comparison.combined_df
-# Create tabs
-tab1, tab2, tab3, tab4 = st.tabs(["📈 Leaderboard", "📊 Class Performance", "🔍 Detailed Metrics", "⚖️ Model Comparison"])
 def style_dataframe(df, highlight_first_column=True, show_progress_bars=True):
     numeric_cols = df.select_dtypes(include=['float64']).columns
@@ -110,8 +77,70 @@ def style_dataframe(df, highlight_first_column=True, show_progress_bars=True):
         ])
     return styled
-# Tab 1: Leaderboard
-with tab1:
     st.subheader("Model Performance Leaderboard")
     sort_col = st.selectbox(
@@ -121,11 +150,7 @@ with tab1:
     )
     sorted_df = st.session_state.leaderboard_df.sort_values(by=sort_col, ascending=False)
-    st.dataframe(
-        style_dataframe(sorted_df),
-        use_container_width=True,
-    )
     metrics = ['F1 Score', 'Precision', 'Recall']
     selected_metric = st.selectbox("Select Metric for Category Analysis:", metrics)
@@ -151,11 +176,10 @@ with tab1:
     st.plotly_chart(fig, use_container_width=True)
-# Tab 2: Class Performance
-with tab2:
     st.subheader("Class-level Performance")
     categories = st.session_state.combined_df['Category'].unique()
     col1, col2, col3 = st.columns(3)
     with col1:
         selected_category = st.selectbox(
@@ -170,23 +194,26 @@ with tab2:
             key='class_metric'
         )
     with col3:
         selected_models = st.multiselect(
             "Select Models:",
-            st.session_state.combined_df['Model'].unique(),
-            default=st.session_state.combined_df['Model'].unique()
         )
-    # Create a consistent color mapping for all models
     plotly_colors = ['#636EFA', '#EF553B', '#00CC96', '#AB63FA', '#FFA15A', '#19D3F3', '#FF6692', '#B6E880', '#FF97FF', '#FECB52']
-    model_colors = {model: plotly_colors[i % len(plotly_colors)] for i, model in enumerate(sorted(st.session_state.combined_df['Model'].unique()))}
     class_data = st.session_state.combined_df[
         (st.session_state.combined_df['Category'] == selected_category) &
         (~st.session_state.combined_df['Class'].str.contains('Overall')) &
         (st.session_state.combined_df['Model'].isin(selected_models))
     ]
-    # Bar chart with consistent colors
     fig = px.bar(
         class_data,
         x='Class',
@@ -198,13 +225,11 @@ with tab2:
     )
     st.plotly_chart(fig, use_container_width=True)
-    # Calculate how many columns we need (aim for about 4-5 models per row)
     models_per_row = 4
     num_rows = (len(selected_models) + models_per_row - 1) // models_per_row
-    st.markdown("### Select Models to Display:")
-    # Create toggles for models using st.columns
     for row in range(num_rows):
         cols = st.columns(models_per_row)
         for col_idx in range(models_per_row):
@@ -212,50 +237,44 @@ with tab2:
             if model_idx < len(selected_models):
                 model = selected_models[model_idx]
                 container = cols[col_idx].container()
-                # Get the consistent color for this model
                 color = model_colors[model]
-                # Initialize toggle state if needed
-                toggle_key = f"toggle_{model}"
-                if toggle_key not in st.session_state:
-                    st.session_state[toggle_key] = True
-                # Create colored legend item with HTML
                 container.markdown(
                     f"""
-                    <div style='display: flex; align-items: center; margin-bottom: -40px; pointer-events: none;'>
                         <span style='display: inline-block; width: 12px; height: 12px; background-color: {color}; border-radius: 50%; margin-right: 8px;'></span>
                     </div>
                     """,
                     unsafe_allow_html=True
                 )
-                # Create the checkbox without reassigning to session state
-                container.checkbox(
-                    f"    {model}",  # Add some spacing to account for the circle
-                    value=st.session_state[toggle_key],
-                    key=toggle_key  # Use toggle_key directly as the key
                 )
-    # Individual Precision-Recall plots for each class
     unique_classes = class_data['Class'].unique()
     num_classes = len(unique_classes)
-    # Calculate number of rows needed (3 plots per row)
-    num_rows = (num_classes + 2) // 3  # Using ceiling division
-    # Create plots row by row
-    for row in range(num_rows):
-        cols = st.columns(3)
-        for col_idx in range(3):
-            class_idx = row * 3 + col_idx
             if class_idx < num_classes:
                 current_class = unique_classes[class_idx]
-                # Filter data based on visible models
                 visible_models = [model for model in selected_models
-                                if st.session_state[f"toggle_{model}"]]
                 class_specific_data = class_data[
                     (class_data['Class'] == current_class) &
@@ -269,18 +288,16 @@ with tab2:
                     color='Model',
                     title=f'Precision vs Recall: {current_class}',
                     height=300,
-                    color_discrete_map=model_colors  # Use consistent colors
                 )
-                # Update layout for better visibility
                 fig.update_layout(
                     xaxis_range=[0, 1],
                     yaxis_range=[0, 1],
                     margin=dict(l=40, r=40, t=40, b=40),
-                    showlegend=False  # Hide individual legends
                 )
-                # Add diagonal reference line
                 fig.add_trace(
                     go.Scatter(
                         x=[0, 1],
@@ -293,74 +310,53 @@ with tab2:
                 cols[col_idx].plotly_chart(fig, use_container_width=True)
-# Tab 3: Detailed Metrics
-with tab3:
     st.subheader("Detailed Metrics Analysis")
     selected_model = st.selectbox(
         "Select Model for Detailed Analysis:",
-        st.session_state.combined_df['Model'].unique()
     )
     model_data = st.session_state.combined_df[
         st.session_state.combined_df['Model'] == selected_model
     ]
-    # Create metrics tables
     st.markdown("### Performance Metrics by Category")
-    # Get unique categories and relevant classes for each category
     categories = model_data['Category'].unique()
     metrics = ['F1 Score', 'Precision', 'Recall']
-    # Process data for each category
     for category in categories:
         st.markdown(f"#### {category}")
-        # Filter data for this category
         category_data = model_data[model_data['Category'] == category].copy()
-        # Create a clean table for this category
-        category_metrics = pd.DataFrame()
-        # Get classes for this category (excluding 'Overall' prefix)
         classes = category_data[~category_data['Class'].str.contains('Overall')]['Class'].unique()
-        # Add the overall metric for this category
         overall_data = category_data[category_data['Class'].str.contains('Overall')]
-        # Initialize the DataFrame with classes as index
         category_metrics = pd.DataFrame(index=classes)
-        # Add metrics columns
         for metric in metrics:
-            # Add class-specific metrics
             class_metrics = {}
             for class_name in classes:
                 class_data = category_data[category_data['Class'] == class_name]
                 if not class_data.empty:
                     class_metrics[class_name] = class_data[metric].iloc[0]
             category_metrics[metric] = pd.Series(class_metrics)
-        # Add overall metrics as a separate row
         if not overall_data.empty:
             overall_row = pd.DataFrame({
                 metric: [overall_data[metric].iloc[0]] for metric in metrics
             }, index=['Overall'])
             category_metrics = pd.concat([overall_row, category_metrics])
-        # Display the table
-        styled_metrics = style_dataframe(category_metrics.round(4))
-        st.dataframe(styled_metrics, use_container_width=True)
-        # Add spacing between categories
         st.markdown("---")
     # Export functionality
     st.markdown("### Export Data")
-    # Prepare export data
     export_data = pd.DataFrame()
     for category in categories:
         category_data = model_data[model_data['Category'] == category].copy()
@@ -372,7 +368,6 @@ with tab3:
         ).round(4)
         export_data = pd.concat([export_data, category_metrics])
-    # Create download button
     csv = export_data.to_csv().encode()
     st.download_button(
         "Download Detailed Metrics",
@@ -382,31 +377,25 @@ with tab3:
         key='download-csv'
     )
-# Tab 4: Model Comparison
-with tab4:
     st.header("Model Comparison Analysis")
-    # Create two columns for model selection
     col1, col2 = st.columns(2)
-    # Model selection dropdown menus
     with col1:
         model1 = st.selectbox(
             "Select First Model:",
             st.session_state.combined_df['Model'].unique(),
-            key='model1'
         )
     with col2:
-        # Filter out the first selected model from options
         available_models = [m for m in st.session_state.combined_df['Model'].unique() if m != model1]
         model2 = st.selectbox(
             "Select Second Model:",
             available_models,
-            key='model2'
         )
-    # Category selection
     selected_category = st.selectbox(
         "Select Category for Comparison:",
         st.session_state.combined_df['Category'].unique(),
@@ -423,26 +412,19 @@ with tab4:
         (st.session_state.combined_df['Model'] == model2) &
         (st.session_state.combined_df['Category'] == selected_category)
     ]
-    # Define metrics list
-    metrics = ['F1 Score', 'Precision', 'Recall']
-    # Create comparison tables section
     st.subheader("Detailed Metrics Comparison")
-    # Create a table for each metric
     for metric in metrics:
         st.markdown(f"#### {metric} Comparison")
-        # Prepare data for the metric table
         metric_data = []
         for class_name in model1_data['Class'].unique():
-            # Get values for both models
-            m1_value = model1_data[model1_data['Class'] == class_name][metric].iloc[0]
-            m2_value = model2_data[model2_data['Class'] == class_name][metric].iloc[0]
             diff = m1_value - m2_value
-            # Add to comparison data
             metric_data.append({
                 'Class': class_name,
                 model1: m1_value,
@@ -450,92 +432,46 @@ with tab4:
                 'Difference': diff
             })
-        # Create DataFrame for the metric
         metric_df = pd.DataFrame(metric_data)
-        # Style the table
-        def style_metric_table(df):
-            return df.style\
-                .format({
-                    model1: '{:.2f}%',
-                    model2: '{:.2f}%',
-                    'Difference': '{:+.2f}%'
-                })\
-                .background_gradient(
-                    cmap='RdYlGn',
-                    subset=['Difference'],
-                    vmin=-10,
-                    vmax=10
-                )\
-                .set_properties(**{
-                    'text-align': 'center',
-                    'padding': '10px',
-                    'border': '1px solid #dee2e6'
-                })\
-                .set_table_styles([
-                    {'selector': 'th', 'props': [
-                        ('background-color', '#4a90e2'),
-                        ('color', 'white'),
-                        ('font-weight', 'bold'),
-                        ('text-align', 'center'),
-                        ('padding', '10px')
-                    ]}
-                ])
-        # Display the styled table
-        st.dataframe(
-            style_dataframe(metric_df),
-            use_container_width=True,
-        )
-        # Add visual separator
         st.markdown("---")
-    # Visualizations section
     st.subheader("Visual Performance Analysis")
-    # Metric selector for bar chart
     selected_metric = st.selectbox(
         "Select Metric for Comparison:",
         metrics,
-        key='compare_metric'
     )
-    # Prepare data for bar chart
     comparison_data = pd.DataFrame()
-    # Get data for both models
     for idx, (model_name, model_data) in enumerate([(model1, model1_data), (model2, model2_data)]):
-        # Filter out Overall classes and select relevant columns
         model_metrics = model_data[~model_data['Class'].str.contains('Overall', na=False)][['Class', selected_metric]]
         model_metrics = model_metrics.rename(columns={selected_metric: model_name})
-        # Merge with existing data or create new DataFrame
         if idx == 0:
             comparison_data = model_metrics
         else:
             comparison_data = comparison_data.merge(model_metrics, on='Class', how='outer')
-    # Create bar chart
     fig_bar = go.Figure()
-    # Add bars for first model
     fig_bar.add_trace(go.Bar(
         name=model1,
         x=comparison_data['Class'],
-        y=comparison_data[model1],
         marker_color='rgb(55, 83, 109)'
     ))
-    # Add bars for second model
     fig_bar.add_trace(go.Bar(
         name=model2,
         x=comparison_data['Class'],
-        y=comparison_data[model2],
         marker_color='rgb(26, 118, 255)'
     ))
-    # Update bar chart layout
     fig_bar.update_layout(
         title=f"{selected_metric} Comparison by Class",
         xaxis_title="Class",
@@ -552,23 +488,19 @@ with tab4:
         )
     )
-    # Display bar chart
     st.plotly_chart(fig_bar, use_container_width=True)
-    # Create Precision-Recall scatter plot
     st.markdown("#### Precision-Recall Analysis")
-    # Filter data for scatter plot
     model1_scatter = model1_data[~model1_data['Class'].str.contains('Overall', na=False)]
     model2_scatter = model2_data[~model2_data['Class'].str.contains('Overall', na=False)]
-    # Create scatter plot
     fig_scatter = go.Figure()
-    # Add scatter points for first model
     fig_scatter.add_trace(go.Scatter(
-        x=model1_scatter['Precision']*100,
-        y=model1_scatter['Recall']*100,
         mode='markers+text',
         name=model1,
         text=model1_scatter['Class'],
@@ -576,10 +508,9 @@ with tab4:
         marker=dict(size=10)
     ))
-    # Add scatter points for second model
     fig_scatter.add_trace(go.Scatter(
-        x=model2_scatter['Precision']*100,
-        y=model2_scatter['Recall']*100,
         mode='markers+text',
         name=model2,
         text=model2_scatter['Class'],
@@ -587,7 +518,6 @@ with tab4:
         marker=dict(size=10)
     ))
-    # Add reference line
     fig_scatter.add_trace(go.Scatter(
         x=[0, 100],
         y=[0, 100],
@@ -596,7 +526,6 @@ with tab4:
         showlegend=False
     ))
-    # Update scatter plot layout
     fig_scatter.update_layout(
         title="Precision vs Recall Analysis by Class",
         xaxis_title="Precision (%)",
@@ -613,10 +542,8 @@ with tab4:
         )
     )
-    # Display scatter plot
     st.plotly_chart(fig_scatter, use_container_width=True)
 # Footer
 st.markdown("---")
 st.markdown("Dashboard created for model evaluation and comparison")

 import os
 import base64
+# Initialize session state
+if 'active_tab' not in st.session_state:
+    st.session_state.active_tab = "📈 Leaderboard"
+if 'toggle_states' not in st.session_state:
+    st.session_state.toggle_states = {}
 # Data loading function
+@st.cache_data
 def load_data(directory='results', labels_filename='Labels.csv'):
     labels_path = os.path.join(directory, labels_filename)
     df_labels = pd.read_csv(labels_path)
     model_comparison = ModelComparison(evaluators)
     return model_comparison
+# Helper functions for styling
 def style_dataframe(df, highlight_first_column=True, show_progress_bars=True):
     numeric_cols = df.select_dtypes(include=['float64']).columns
         ])
     return styled
+# Toggle state management
+def get_toggle_state(model_name):
+    key = f"toggle_{model_name}"
+    if key not in st.session_state.toggle_states:
+        st.session_state.toggle_states[key] = True
+    return st.session_state.toggle_states[key]
+def set_toggle_state(model_name, value):
+    key = f"toggle_{model_name}"
+    st.session_state.toggle_states[key] = value
+# Page configuration
+st.set_page_config(
+    page_title="Nexar Driving Leaderboard",
+    page_icon="nexar_logo.png",
+    layout="wide"
+)
+# Custom styling
+st.markdown("""
+    <style>
+    .main { padding: 2rem; }
+    .stTabs [data-baseweb="tab-list"] { gap: 8px; }
+    .stTabs [data-baseweb="tab"] {
+        padding: 8px 16px;
+        border-radius: 4px;
+    }
+    .metric-card {
+        background-color: #f8f9fa;
+        padding: 20px;
+        border-radius: 10px;
+        box-shadow: 0 2px 4px rgba(0,0,0,0.1);
+    }
+    </style>
+""", unsafe_allow_html=True)
+# Header
+col1, col2 = st.columns([0.16, 0.84])
+with col1:
+    st.image("nexar_logo.png", width=600)
+with col2:
+    st.title("Driving Leaderboard")
+# Initialize data in session state
+if 'model_comparison' not in st.session_state:
+    st.session_state.model_comparison = load_data()
+    st.session_state.leaderboard_df = st.session_state.model_comparison.transform_to_leaderboard()
+    st.session_state.combined_df = st.session_state.model_comparison.combined_df
+# Tab callback
+def handle_tab_change(tab_name):
+    st.session_state.active_tab = tab_name
+# Define tab names
+tab_names = ["📈 Leaderboard", "📊 Class Performance", "🔍 Detailed Metrics", "⚖️ Model Comparison"]
+# Create tabs
+selected_tab = st.radio("", tab_names, key="tab_selector",
+                       horizontal=True, label_visibility="collapsed",
+                       index=tab_names.index(st.session_state.active_tab))
+handle_tab_change(selected_tab)
+# Content based on selected tab
+if st.session_state.active_tab == "📈 Leaderboard":
     st.subheader("Model Performance Leaderboard")
     sort_col = st.selectbox(
     )
     sorted_df = st.session_state.leaderboard_df.sort_values(by=sort_col, ascending=False)
+    st.dataframe(style_dataframe(sorted_df), use_container_width=True)
     metrics = ['F1 Score', 'Precision', 'Recall']
     selected_metric = st.selectbox("Select Metric for Category Analysis:", metrics)
     st.plotly_chart(fig, use_container_width=True)
+elif st.session_state.active_tab == "📊 Class Performance":
     st.subheader("Class-level Performance")
     categories = st.session_state.combined_df['Category'].unique()
+    metrics = ['F1 Score', 'Precision', 'Recall']
     col1, col2, col3 = st.columns(3)
     with col1:
         selected_category = st.selectbox(
             key='class_metric'
         )
     with col3:
+        all_models = sorted(st.session_state.combined_df['Model'].unique())
         selected_models = st.multiselect(
             "Select Models:",
+            all_models,
+            default=all_models,
+            key='selected_models'
         )
+    # Create consistent color mapping
     plotly_colors = ['#636EFA', '#EF553B', '#00CC96', '#AB63FA', '#FFA15A', '#19D3F3', '#FF6692', '#B6E880', '#FF97FF', '#FECB52']
+    model_colors = {model: plotly_colors[i % len(plotly_colors)] for i, model in enumerate(all_models)}
+    # Filter data
     class_data = st.session_state.combined_df[
         (st.session_state.combined_df['Category'] == selected_category) &
         (~st.session_state.combined_df['Class'].str.contains('Overall')) &
         (st.session_state.combined_df['Model'].isin(selected_models))
     ]
+    # Bar chart
     fig = px.bar(
         class_data,
         x='Class',
     )
     st.plotly_chart(fig, use_container_width=True)
+    # Model toggles
+    st.markdown("### Model Visibility Controls")
     models_per_row = 4
     num_rows = (len(selected_models) + models_per_row - 1) // models_per_row
     for row in range(num_rows):
         cols = st.columns(models_per_row)
         for col_idx in range(models_per_row):
             if model_idx < len(selected_models):
                 model = selected_models[model_idx]
                 container = cols[col_idx].container()
                 color = model_colors[model]
+                # Create colored indicator
                 container.markdown(
                     f"""
+                    <div style='display: flex; align-items: center; margin-bottom: -40px;'>
                         <span style='display: inline-block; width: 12px; height: 12px; background-color: {color}; border-radius: 50%; margin-right: 8px;'></span>
                     </div>
                     """,
                     unsafe_allow_html=True
                 )
+                # Toggle checkbox
+                value = container.checkbox(
+                    f"    {model}",
+                    value=get_toggle_state(model),
+                    key=f"vis_{model}",
+                    on_change=set_toggle_state,
+                    args=(model, not get_toggle_state(model))
                 )
+    # Precision-Recall plots
+    st.markdown("### Precision-Recall Analysis by Class")
     unique_classes = class_data['Class'].unique()
     num_classes = len(unique_classes)
+    plots_per_row = 3
+    num_plot_rows = (num_classes + plots_per_row - 1) // plots_per_row
+    for row in range(num_plot_rows):
+        cols = st.columns(plots_per_row)
+        for col_idx in range(plots_per_row):
+            class_idx = row * plots_per_row + col_idx
             if class_idx < num_classes:
                 current_class = unique_classes[class_idx]
+                # Get visible models
                 visible_models = [model for model in selected_models
+                                if get_toggle_state(model)]
                 class_specific_data = class_data[
                     (class_data['Class'] == current_class) &
                     color='Model',
                     title=f'Precision vs Recall: {current_class}',
                     height=300,
+                    color_discrete_map=model_colors
                 )
                 fig.update_layout(
                     xaxis_range=[0, 1],
                     yaxis_range=[0, 1],
                     margin=dict(l=40, r=40, t=40, b=40),
+                    showlegend=False
                 )
                 fig.add_trace(
                     go.Scatter(
                         x=[0, 1],
                 cols[col_idx].plotly_chart(fig, use_container_width=True)
+elif st.session_state.active_tab == "🔍 Detailed Metrics":
     st.subheader("Detailed Metrics Analysis")
     selected_model = st.selectbox(
         "Select Model for Detailed Analysis:",
+        st.session_state.combined_df['Model'].unique(),
+        key='detailed_model'
     )
     model_data = st.session_state.combined_df[
         st.session_state.combined_df['Model'] == selected_model
     ]
     st.markdown("### Performance Metrics by Category")
     categories = model_data['Category'].unique()
     metrics = ['F1 Score', 'Precision', 'Recall']
     for category in categories:
         st.markdown(f"#### {category}")
         category_data = model_data[model_data['Category'] == category].copy()
+        # Get classes excluding Overall
         classes = category_data[~category_data['Class'].str.contains('Overall')]['Class'].unique()
         overall_data = category_data[category_data['Class'].str.contains('Overall')]
+        # Create metrics DataFrame
         category_metrics = pd.DataFrame(index=classes)
         for metric in metrics:
             class_metrics = {}
             for class_name in classes:
                 class_data = category_data[category_data['Class'] == class_name]
                 if not class_data.empty:
                     class_metrics[class_name] = class_data[metric].iloc[0]
             category_metrics[metric] = pd.Series(class_metrics)
+        # Add overall metrics
         if not overall_data.empty:
             overall_row = pd.DataFrame({
                 metric: [overall_data[metric].iloc[0]] for metric in metrics
             }, index=['Overall'])
             category_metrics = pd.concat([overall_row, category_metrics])
+        st.dataframe(style_dataframe(category_metrics.round(4)), use_container_width=True)
         st.markdown("---")
     # Export functionality
     st.markdown("### Export Data")
     export_data = pd.DataFrame()
     for category in categories:
         category_data = model_data[model_data['Category'] == category].copy()
         ).round(4)
         export_data = pd.concat([export_data, category_metrics])
     csv = export_data.to_csv().encode()
     st.download_button(
         "Download Detailed Metrics",
         key='download-csv'
     )
+elif st.session_state.active_tab == "⚖️ Model Comparison":
     st.header("Model Comparison Analysis")
     col1, col2 = st.columns(2)
     with col1:
         model1 = st.selectbox(
             "Select First Model:",
             st.session_state.combined_df['Model'].unique(),
+            key='compare_model1'
         )
     with col2:
         available_models = [m for m in st.session_state.combined_df['Model'].unique() if m != model1]
         model2 = st.selectbox(
             "Select Second Model:",
             available_models,
+            key='compare_model2'
         )
     selected_category = st.selectbox(
         "Select Category for Comparison:",
         st.session_state.combined_df['Category'].unique(),
         (st.session_state.combined_df['Model'] == model2) &
         (st.session_state.combined_df['Category'] == selected_category)
     ]
     st.subheader("Detailed Metrics Comparison")
+    metrics = ['F1 Score', 'Precision', 'Recall']
     for metric in metrics:
         st.markdown(f"#### {metric} Comparison")
         metric_data = []
         for class_name in model1_data['Class'].unique():
+            m1_value = model1_data[model1_data['Class'] == class_name][metric].iloc[0] * 100
+            m2_value = model2_data[model2_data['Class'] == class_name][metric].iloc[0] * 100
             diff = m1_value - m2_value
             metric_data.append({
                 'Class': class_name,
                 model1: m1_value,
                 'Difference': diff
             })
         metric_df = pd.DataFrame(metric_data)
+        st.dataframe(style_dataframe(metric_df), use_container_width=True)
         st.markdown("---")
+    # Visual comparison
     st.subheader("Visual Performance Analysis")
     selected_metric = st.selectbox(
         "Select Metric for Comparison:",
         metrics,
+        key='visual_compare_metric'
     )
+    # Prepare data for visualization
     comparison_data = pd.DataFrame()
     for idx, (model_name, model_data) in enumerate([(model1, model1_data), (model2, model2_data)]):
         model_metrics = model_data[~model_data['Class'].str.contains('Overall', na=False)][['Class', selected_metric]]
         model_metrics = model_metrics.rename(columns={selected_metric: model_name})
         if idx == 0:
             comparison_data = model_metrics
         else:
             comparison_data = comparison_data.merge(model_metrics, on='Class', how='outer')
+    # Bar chart
     fig_bar = go.Figure()
     fig_bar.add_trace(go.Bar(
         name=model1,
         x=comparison_data['Class'],
+        y=comparison_data[model1] * 100,
         marker_color='rgb(55, 83, 109)'
     ))
     fig_bar.add_trace(go.Bar(
         name=model2,
         x=comparison_data['Class'],
+        y=comparison_data[model2] * 100,
         marker_color='rgb(26, 118, 255)'
     ))
     fig_bar.update_layout(
         title=f"{selected_metric} Comparison by Class",
         xaxis_title="Class",
         )
     )
     st.plotly_chart(fig_bar, use_container_width=True)
+    # Precision-Recall Analysis
     st.markdown("#### Precision-Recall Analysis")
     model1_scatter = model1_data[~model1_data['Class'].str.contains('Overall', na=False)]
     model2_scatter = model2_data[~model2_data['Class'].str.contains('Overall', na=False)]
     fig_scatter = go.Figure()
     fig_scatter.add_trace(go.Scatter(
+        x=model1_scatter['Precision'] * 100,
+        y=model1_scatter['Recall'] * 100,
         mode='markers+text',
         name=model1,
         text=model1_scatter['Class'],
         marker=dict(size=10)
     ))
     fig_scatter.add_trace(go.Scatter(
+        x=model2_scatter['Precision'] * 100,
+        y=model2_scatter['Recall'] * 100,
         mode='markers+text',
         name=model2,
         text=model2_scatter['Class'],
         marker=dict(size=10)
     ))
     fig_scatter.add_trace(go.Scatter(
         x=[0, 100],
         y=[0, 100],
         showlegend=False
     ))
     fig_scatter.update_layout(
         title="Precision vs Recall Analysis by Class",
         xaxis_title="Precision (%)",
         )
     )
     st.plotly_chart(fig_scatter, use_container_width=True)
 # Footer
 st.markdown("---")
 st.markdown("Dashboard created for model evaluation and comparison")