MLRC_Bench

Running

App Files Files Community

MLRC_Bench / src /components /leaderboard.py

Armeddinosaur

updating metric definition

bd9c702 3 months ago

raw

history blame

12 kB

	"""
	Leaderboard table components for the leaderboard application.
	"""
	import streamlit as st
	from src.data.processors import get_model_type_style, get_rank_style

	def render_leaderboard_table(display_df, metric_columns, primary_metric):
	"""
	Render the custom HTML leaderboard table

	Args:
	display_df (pandas.DataFrame): The DataFrame with the display data
	metric_columns (list): List of metric column names
	primary_metric (str): The name of the primary metric
	"""
	from src.components.header import render_section_header
	from src.utils.config import metrics_config

	# Display model ranking header without the box
	render_section_header("Model Rankings")

	# Detect if we have multiple metrics (columns with metric prefixes)
	has_multiple_metrics = any(":" in col for col in metric_columns)

	# Group columns by metric if multiple metrics are present
	metric_groups = {}
	if has_multiple_metrics:
	# Primary metric columns (no prefix)
	primary_cols = [col for col in metric_columns if ":" not in col]
	metric_groups[primary_metric] = primary_cols

	# Other metrics
	for col in metric_columns:
	if ":" in col:
	prefix, metric_name = col.split(": ", 1)
	full_metric_name = next((m for m in metrics_config if m.startswith(prefix)), prefix)
	if full_metric_name not in metric_groups:
	metric_groups[full_metric_name] = []
	metric_groups[full_metric_name].append(col)
	else:
	# Single metric
	metric_groups[primary_metric] = metric_columns

	# Start building the HTML table structure
	html_table = """
	<div class="fixed-table-container">
	<div class="scroll-container">
	<table class="fixed-table">
	<thead>
	<tr class="header-row">
	<th class="fixed-column first-fixed-column" rowspan="2">Rank</th>
	<th class="fixed-column second-fixed-column" rowspan="2" style="text-align: center;">Agent</th>
	<th class="model-type-cell" rowspan="2" style="text-align: center;">Model Type</th>
	"""

	# Add metric headers for each metric group
	for metric_name, cols in metric_groups.items():
	html_table += f'<th colspan="{len(cols)}" class="metric-header" style="text-align: center;">{metric_name}</th>'

	# Continue the table structure
	html_table += """
	</tr>
	<tr class="sub-header">
	"""

	# Add individual column headers for all metrics
	for metric_name, cols in metric_groups.items():
	for col in cols:
	# Extract the actual column name if it has a prefix
	display_name = col.split(": ", 1)[-1] if ":" in col else col
	column_class = "overall-cell" if display_name == "Metric Average" else "metric-cell"
	html_table += f'<th class="{column_class}" style="text-align: center;">{display_name}</th>'

	# Close the header and start the body
	html_table += """
	</tr>
	</thead>
	<tbody>
	"""

	# Add the data rows
	for i, (idx, row) in enumerate(display_df.iterrows()):
	# Define background colors to ensure consistency
	# Special background for human row
	is_human_row = row["Agent"] == "Top Human in Competition"
	if is_human_row:
	row_bg = "#2a1e37" # Purple-ish dark background for human row
	row_style = f'style="background-color: {row_bg}; box-shadow: 0 0 5px #f472b6;"'
	else:
	row_bg = "#0a0a0a" if i % 2 == 0 else "#111111"
	row_style = f'style="background-color: {row_bg};"'

	# Start the row
	html_table += f'<tr class="table-row" {row_style}>'

	# Add Rank with medal styling and consistent background
	rank_style = "" # Don't set background at cell level
	rank_styles = get_rank_style(row["Rank"])
	for style_key, style_value in rank_styles.items():
	rank_style += f"{style_key}: {style_value};"

	html_table += f'<td class="fixed-column first-fixed-column" style="{rank_style}">{row["Rank"]}</td>'

	# Model name fixed column with consistent background
	html_table += f'<td class="fixed-column second-fixed-column" title="{row["Agent"]}" style="font-weight: 500; overflow: hidden; text-overflow: ellipsis; white-space: nowrap; text-align: center;">{row["Agent"]}</td>'

	# Model type cell
	model_type = row["Model Type"]
	type_style = f"text-align: center;"
	model_type_styles = get_model_type_style(model_type)
	for style_key, style_value in model_type_styles.items():
	if style_value:
	type_style += f"{style_key}: {style_value};"

	html_table += f'<td class="table-cell model-type-cell" style="{type_style}">{model_type}</td>'

	# Add metric values with minimal styling for all columns
	all_metric_columns = [col for group in metric_groups.values() for col in group]
	for col in all_metric_columns:
	display_name = col.split(": ", 1)[-1] if ":" in col else col
	cell_class = "table-cell overall-cell" if display_name == "Metric Average" else "table-cell metric-cell"

	# Check if column exists in the row (it should)
	if col in row:
	value_text = row[col]

	# Simple styling based on positive/negative values
	try:
	value = float(str(row[col]).replace(',', ''))
	if value > 0:
	cell_class += " positive-value"
	elif value < 0:
	cell_class += " negative-value"
	except:
	pass

	html_table += f'<td class="{cell_class}">{value_text}</td>'
	else:
	# If column doesn't exist (shouldn't happen), add empty cell
	html_table += f'<td class="{cell_class}">-</td>'

	html_table += "</tr>"

	# Close the table
	html_table += """
	</tbody>
	</table>
	</div>
	</div>
	"""

	# Add styling for metrics section
	metrics_css = """
	<style>
	.metric-definitions {
	margin-top: 30px;
	padding-top: 20px;
	border-top: 1px solid #333;
	}
	.metric-definition {
	background-color: #1a1a1a;
	border-radius: 8px;
	padding: 12px 16px;
	margin-bottom: 16px;
	}
	.metric-definition h4 {
	margin-top: 0;
	color: #a5b4fc;
	}
	.metric-definition p {
	margin-bottom: 0;
	color: #e2e8f0;
	}
	</style>
	"""

	# Build a clean HTML string for the metrics section
	metrics_html = '<div class="metric-definitions">'

	# Add each metric definition
	for metric_name, metric_info in metrics_config.items():
	metric_description = metric_info.get('description', '')

	# Special handling for Relative Improvement to Human to show formula
	if metric_name == "Relative Improvement to Human":
	formula_html = """
	<div style="margin: 15px 0;">
	<p style="margin-bottom: 10px; font-weight: 500;">Formula:</p>
	<div style="background-color: #111; padding: 20px; border-radius: 5px; text-align: center; margin-bottom: 15px; font-size: 18px; line-height: 1.5; border: 1px solid #333;">
	<div style="display: flex; align-items: center; justify-content: center;">
	<div style="margin-right: 10px;">Relative Improvement to Human =</div>
	<div style="display: inline-block; text-align: center; padding: 0 10px;">
	<div style="border-bottom: 1px solid #aaa; padding-bottom: 5px;">s<sub>agent</sub> - s<sub>baseline</sub></div>
	<div style="padding-top: 5px;">s<sub>top_human</sub> - s<sub>baseline</sub></div>
	</div>
	<div style="margin-left: 10px;">× 100%</div>
	</div>
	</div>
	<p style="margin-top: 10px; font-weight: 500;">Where:</p>
	<ul style="list-style-type: disc; padding-left: 25px; margin-top: 8px;">
	<li style="margin-bottom: 5px;">s<sub>agent</sub> is the agent's test performance</li>
	<li style="margin-bottom: 5px;">s<sub>baseline</sub> is the baseline test performance</li>
	<li style="margin-bottom: 5px;">s<sub>top_human</sub> is the top human performance in competition</li>
	</ul>
	<p style="margin-top: 10px;">This metric normalizes scores by setting the baseline solution to 0 and the top human solution to 100.</p>
	</div>
	"""

	# Add the metric definition with the formula
	metrics_html += f'<div class="metric-definition"><h4>{metric_name}</h4><p>{metric_description}</p>{formula_html}</div>'
	# Special handling for Absolute Improvement to Baseline to show formula
	elif metric_name == "Absolute Improvement to Baseline":
	formula_html = """
	<div style="margin: 15px 0;">
	<p style="margin-bottom: 10px; font-weight: 500;">Formula:</p>
	<div style="background-color: #111; padding: 20px; border-radius: 5px; text-align: center; margin-bottom: 15px; font-size: 18px; line-height: 1.5; border: 1px solid #333;">
	<div style="display: flex; align-items: center; justify-content: center;">
	<div style="margin-right: 10px;">Absolute Improvement to Baseline =</div>
	<div style="display: inline-block; text-align: center; padding: 0 10px;">
	<div style="border-bottom: 1px solid #aaa; padding-bottom: 5px;">s<sub>agent</sub> - s<sub>baseline</sub></div>
	<div style="padding-top: 5px;">s<sub>baseline</sub></div>
	</div>
	<div style="margin-left: 10px;">× 100%</div>
	</div>
	</div>
	<p style="margin-top: 10px; font-weight: 500;">Where:</p>
	<ul style="list-style-type: disc; padding-left: 25px; margin-top: 8px;">
	<li style="margin-bottom: 5px;">s<sub>agent</sub> is the agent's test performance</li>
	<li style="margin-bottom: 5px;">s<sub>baseline</sub> is the baseline test performance</li>
	</ul>
	<p style="margin-top: 10px;">This metric measures the percentage improvement of an agent's performance over the baseline solution.</p>
	</div>
	"""

	# Add the metric definition with the formula
	metrics_html += f'<div class="metric-definition"><h4>{metric_name}</h4><p>{metric_description}</p>{formula_html}</div>'
	else:
	# Regular metric without formula
	metrics_html += f'<div class="metric-definition"><h4>{metric_name}</h4><p>{metric_description}</p></div>'

	# Close the metric definitions container
	metrics_html += '</div>'

	# Display the styling and HTML separately for maximum control
	st.markdown(html_table, unsafe_allow_html=True)
	st.markdown(metrics_css, unsafe_allow_html=True)

	# Render the metrics definitions
	st.markdown(metrics_html, unsafe_allow_html=True)

	def render_empty_state():
	"""
	Render an empty state when no data is available
	"""
	st.markdown("""
	<div class="warning-box">
	<strong>No data to display.</strong> Please select at least one task to view the data.
	</div>
	""", unsafe_allow_html=True)