MLRC_Bench / src /components /leaderboard.py
Armeddinosaur's picture
updating metric definition
bd9c702
raw
history blame
12 kB
"""
Leaderboard table components for the leaderboard application.
"""
import streamlit as st
from src.data.processors import get_model_type_style, get_rank_style
def render_leaderboard_table(display_df, metric_columns, primary_metric):
"""
Render the custom HTML leaderboard table
Args:
display_df (pandas.DataFrame): The DataFrame with the display data
metric_columns (list): List of metric column names
primary_metric (str): The name of the primary metric
"""
from src.components.header import render_section_header
from src.utils.config import metrics_config
# Display model ranking header without the box
render_section_header("Model Rankings")
# Detect if we have multiple metrics (columns with metric prefixes)
has_multiple_metrics = any(":" in col for col in metric_columns)
# Group columns by metric if multiple metrics are present
metric_groups = {}
if has_multiple_metrics:
# Primary metric columns (no prefix)
primary_cols = [col for col in metric_columns if ":" not in col]
metric_groups[primary_metric] = primary_cols
# Other metrics
for col in metric_columns:
if ":" in col:
prefix, metric_name = col.split(": ", 1)
full_metric_name = next((m for m in metrics_config if m.startswith(prefix)), prefix)
if full_metric_name not in metric_groups:
metric_groups[full_metric_name] = []
metric_groups[full_metric_name].append(col)
else:
# Single metric
metric_groups[primary_metric] = metric_columns
# Start building the HTML table structure
html_table = """
<div class="fixed-table-container">
<div class="scroll-container">
<table class="fixed-table">
<thead>
<tr class="header-row">
<th class="fixed-column first-fixed-column" rowspan="2">Rank</th>
<th class="fixed-column second-fixed-column" rowspan="2" style="text-align: center;">Agent</th>
<th class="model-type-cell" rowspan="2" style="text-align: center;">Model Type</th>
"""
# Add metric headers for each metric group
for metric_name, cols in metric_groups.items():
html_table += f'<th colspan="{len(cols)}" class="metric-header" style="text-align: center;">{metric_name}</th>'
# Continue the table structure
html_table += """
</tr>
<tr class="sub-header">
"""
# Add individual column headers for all metrics
for metric_name, cols in metric_groups.items():
for col in cols:
# Extract the actual column name if it has a prefix
display_name = col.split(": ", 1)[-1] if ":" in col else col
column_class = "overall-cell" if display_name == "Metric Average" else "metric-cell"
html_table += f'<th class="{column_class}" style="text-align: center;">{display_name}</th>'
# Close the header and start the body
html_table += """
</tr>
</thead>
<tbody>
"""
# Add the data rows
for i, (idx, row) in enumerate(display_df.iterrows()):
# Define background colors to ensure consistency
# Special background for human row
is_human_row = row["Agent"] == "Top Human in Competition"
if is_human_row:
row_bg = "#2a1e37" # Purple-ish dark background for human row
row_style = f'style="background-color: {row_bg}; box-shadow: 0 0 5px #f472b6;"'
else:
row_bg = "#0a0a0a" if i % 2 == 0 else "#111111"
row_style = f'style="background-color: {row_bg};"'
# Start the row
html_table += f'<tr class="table-row" {row_style}>'
# Add Rank with medal styling and consistent background
rank_style = "" # Don't set background at cell level
rank_styles = get_rank_style(row["Rank"])
for style_key, style_value in rank_styles.items():
rank_style += f"{style_key}: {style_value};"
html_table += f'<td class="fixed-column first-fixed-column" style="{rank_style}">{row["Rank"]}</td>'
# Model name fixed column with consistent background
html_table += f'<td class="fixed-column second-fixed-column" title="{row["Agent"]}" style="font-weight: 500; overflow: hidden; text-overflow: ellipsis; white-space: nowrap; text-align: center;">{row["Agent"]}</td>'
# Model type cell
model_type = row["Model Type"]
type_style = f"text-align: center;"
model_type_styles = get_model_type_style(model_type)
for style_key, style_value in model_type_styles.items():
if style_value:
type_style += f"{style_key}: {style_value};"
html_table += f'<td class="table-cell model-type-cell" style="{type_style}">{model_type}</td>'
# Add metric values with minimal styling for all columns
all_metric_columns = [col for group in metric_groups.values() for col in group]
for col in all_metric_columns:
display_name = col.split(": ", 1)[-1] if ":" in col else col
cell_class = "table-cell overall-cell" if display_name == "Metric Average" else "table-cell metric-cell"
# Check if column exists in the row (it should)
if col in row:
value_text = row[col]
# Simple styling based on positive/negative values
try:
value = float(str(row[col]).replace(',', ''))
if value > 0:
cell_class += " positive-value"
elif value < 0:
cell_class += " negative-value"
except:
pass
html_table += f'<td class="{cell_class}">{value_text}</td>'
else:
# If column doesn't exist (shouldn't happen), add empty cell
html_table += f'<td class="{cell_class}">-</td>'
html_table += "</tr>"
# Close the table
html_table += """
</tbody>
</table>
</div>
</div>
"""
# Add styling for metrics section
metrics_css = """
<style>
.metric-definitions {
margin-top: 30px;
padding-top: 20px;
border-top: 1px solid #333;
}
.metric-definition {
background-color: #1a1a1a;
border-radius: 8px;
padding: 12px 16px;
margin-bottom: 16px;
}
.metric-definition h4 {
margin-top: 0;
color: #a5b4fc;
}
.metric-definition p {
margin-bottom: 0;
color: #e2e8f0;
}
</style>
"""
# Build a clean HTML string for the metrics section
metrics_html = '<div class="metric-definitions">'
# Add each metric definition
for metric_name, metric_info in metrics_config.items():
metric_description = metric_info.get('description', '')
# Special handling for Relative Improvement to Human to show formula
if metric_name == "Relative Improvement to Human":
formula_html = """
<div style="margin: 15px 0;">
<p style="margin-bottom: 10px; font-weight: 500;">Formula:</p>
<div style="background-color: #111; padding: 20px; border-radius: 5px; text-align: center; margin-bottom: 15px; font-size: 18px; line-height: 1.5; border: 1px solid #333;">
<div style="display: flex; align-items: center; justify-content: center;">
<div style="margin-right: 10px;">Relative Improvement to Human =</div>
<div style="display: inline-block; text-align: center; padding: 0 10px;">
<div style="border-bottom: 1px solid #aaa; padding-bottom: 5px;">s<sub>agent</sub> - s<sub>baseline</sub></div>
<div style="padding-top: 5px;">s<sub>top_human</sub> - s<sub>baseline</sub></div>
</div>
<div style="margin-left: 10px;">× 100%</div>
</div>
</div>
<p style="margin-top: 10px; font-weight: 500;">Where:</p>
<ul style="list-style-type: disc; padding-left: 25px; margin-top: 8px;">
<li style="margin-bottom: 5px;">s<sub>agent</sub> is the agent's test performance</li>
<li style="margin-bottom: 5px;">s<sub>baseline</sub> is the baseline test performance</li>
<li style="margin-bottom: 5px;">s<sub>top_human</sub> is the top human performance in competition</li>
</ul>
<p style="margin-top: 10px;">This metric normalizes scores by setting the baseline solution to 0 and the top human solution to 100.</p>
</div>
"""
# Add the metric definition with the formula
metrics_html += f'<div class="metric-definition"><h4>{metric_name}</h4><p>{metric_description}</p>{formula_html}</div>'
# Special handling for Absolute Improvement to Baseline to show formula
elif metric_name == "Absolute Improvement to Baseline":
formula_html = """
<div style="margin: 15px 0;">
<p style="margin-bottom: 10px; font-weight: 500;">Formula:</p>
<div style="background-color: #111; padding: 20px; border-radius: 5px; text-align: center; margin-bottom: 15px; font-size: 18px; line-height: 1.5; border: 1px solid #333;">
<div style="display: flex; align-items: center; justify-content: center;">
<div style="margin-right: 10px;">Absolute Improvement to Baseline =</div>
<div style="display: inline-block; text-align: center; padding: 0 10px;">
<div style="border-bottom: 1px solid #aaa; padding-bottom: 5px;">s<sub>agent</sub> - s<sub>baseline</sub></div>
<div style="padding-top: 5px;">s<sub>baseline</sub></div>
</div>
<div style="margin-left: 10px;">× 100%</div>
</div>
</div>
<p style="margin-top: 10px; font-weight: 500;">Where:</p>
<ul style="list-style-type: disc; padding-left: 25px; margin-top: 8px;">
<li style="margin-bottom: 5px;">s<sub>agent</sub> is the agent's test performance</li>
<li style="margin-bottom: 5px;">s<sub>baseline</sub> is the baseline test performance</li>
</ul>
<p style="margin-top: 10px;">This metric measures the percentage improvement of an agent's performance over the baseline solution.</p>
</div>
"""
# Add the metric definition with the formula
metrics_html += f'<div class="metric-definition"><h4>{metric_name}</h4><p>{metric_description}</p>{formula_html}</div>'
else:
# Regular metric without formula
metrics_html += f'<div class="metric-definition"><h4>{metric_name}</h4><p>{metric_description}</p></div>'
# Close the metric definitions container
metrics_html += '</div>'
# Display the styling and HTML separately for maximum control
st.markdown(html_table, unsafe_allow_html=True)
st.markdown(metrics_css, unsafe_allow_html=True)
# Render the metrics definitions
st.markdown(metrics_html, unsafe_allow_html=True)
def render_empty_state():
"""
Render an empty state when no data is available
"""
st.markdown("""
<div class="warning-box">
<strong>No data to display.</strong> Please select at least one task to view the data.
</div>
""", unsafe_allow_html=True)