Spaces:
Running
Running
""" | |
Data loading and processing utilities for the leaderboard application. | |
""" | |
import pandas as pd | |
import json | |
from src.utils.config import model_categories | |
from src.utils.task_mapping import get_display_name | |
def load_metric_data(file_path): | |
""" | |
Load metric data from a JSON file | |
Args: | |
file_path (str): Path to the JSON file containing metric data | |
Returns: | |
dict: Dictionary containing the loaded metric data | |
""" | |
try: | |
with open(file_path, "r") as f: | |
return json.load(f) | |
except FileNotFoundError: | |
print(f"Error: File {file_path} not found.") | |
return {} | |
except json.JSONDecodeError: | |
print(f"Error: File {file_path} is not a valid JSON file.") | |
return {} | |
def process_data(metric_data): | |
""" | |
Process the metric data into a pandas DataFrame | |
Args: | |
metric_data (dict): Dictionary containing the metric data | |
Returns: | |
pandas.DataFrame: DataFrame containing the processed data | |
""" | |
# Create a DataFrame to store the model metric data | |
tasks = list(metric_data.keys()) | |
models = [] | |
model_data = {} | |
# Extract model names and their metric values for each task | |
for task in tasks: | |
for model in metric_data[task]: | |
if model not in models: | |
models.append(model) | |
model_data[model] = {} | |
# Store the metric value for this task | |
model_data[model][task] = metric_data[task][model] | |
# Create DataFrame from the model_data dictionary | |
df = pd.DataFrame.from_dict(model_data, orient='index') | |
# Replace NaN values with '-' | |
df.fillna('-', inplace=True) | |
# First convert raw task names to standard format (spaces instead of hyphens/underscores) | |
standardized_columns = [task.replace("-", " ").replace("_", " ").title() for task in df.columns] | |
df.columns = standardized_columns | |
# Then apply our display name mapping | |
display_name_columns = {col: get_display_name(col) for col in df.columns} | |
df = df.rename(columns=display_name_columns) | |
# Add a model type column to the dataframe | |
df['Model Type'] = df.index.map(lambda x: model_categories.get(x, "Unknown")) | |
return df | |
def calculate_selected_overall(row, selected_tasks): | |
""" | |
Calculate overall average for selected tasks | |
Args: | |
row (pandas.Series): Row of data | |
selected_tasks (list): List of task names to include in the average | |
Returns: | |
float or str: The calculated average or '-' if no numeric values | |
""" | |
numeric_values = [] | |
for task in selected_tasks: | |
value = row[task] | |
# Check if the value is numeric (could be float or string representing float) | |
if isinstance(value, (int, float)) or (isinstance(value, str) and value.replace('.', '', 1).replace('-', '', 1).isdigit()): | |
numeric_values.append(float(value)) | |
# Calculate average if there are numeric values | |
if numeric_values: | |
return sum(numeric_values) / len(numeric_values) | |
else: | |
return '-' | |
def filter_and_prepare_data(df, selected_tasks, selected_model_types): | |
""" | |
Filter and prepare data based on selections | |
Args: | |
df (pandas.DataFrame): The original DataFrame | |
selected_tasks (list): List of selected task names | |
selected_model_types (list): List of selected model types | |
Returns: | |
pandas.DataFrame: Filtered and prepared DataFrame | |
""" | |
# Filter the dataframe based on selected model types | |
filtered_df = df[df['Model Type'].isin(selected_model_types)] | |
# Calculate the average for selected tasks only | |
selected_tasks_df = filtered_df[selected_tasks] | |
filtered_df['Selected Overall'] = selected_tasks_df.mean(axis=1) | |
# Separate human entries from other models for ranking | |
is_human = filtered_df['Model Type'] == 'Human' | |
human_df = filtered_df[is_human] | |
non_human_df = filtered_df[~is_human] | |
# Sort non-human models by Selected Overall and add rank | |
non_human_df = non_human_df.sort_values('Selected Overall', ascending=False) | |
non_human_df.insert(0, 'Rank', range(1, len(non_human_df) + 1)) | |
# Add rank for human (use '-' to indicate not ranked) | |
human_df.insert(0, 'Rank', '-') | |
# Combine dataframes - put humans at appropriate position based on score | |
combined_df = pd.concat([non_human_df, human_df]) | |
combined_df = combined_df.sort_values('Selected Overall', ascending=False) | |
# Add a Model Name column that shows the index (actual model name) | |
combined_df['Model Name'] = combined_df.index | |
return combined_df | |
def format_display_dataframe(filtered_df, selected_tasks): | |
""" | |
Create and format the display DataFrame for the leaderboard table | |
Args: | |
filtered_df (pandas.DataFrame): The filtered DataFrame | |
selected_tasks (list): List of selected task names | |
Returns: | |
tuple: (pandas.DataFrame, list) - The display DataFrame and the metric columns | |
""" | |
# Create a fixed display DataFrame with only the model info | |
display_df = filtered_df[['Rank', 'Model Name', 'Model Type']].copy() | |
# Format the rank column with medals | |
medal_ranks = {1: "🥇 1", 2: "🥈 2", 3: "🥉 3"} | |
display_df['Rank'] = display_df['Rank'].apply(lambda x: medal_ranks.get(x, str(x))) | |
# Rename 'Model Name' to 'Agent' | |
display_df = display_df.rename(columns={"Model Name": "Agent"}) | |
# Add metrics columns (Selected Overall and individual tasks) | |
metric_columns = ['Selected Overall'] + selected_tasks | |
for col in metric_columns: | |
if col in filtered_df.columns: | |
# Format numeric columns to 1 decimal place | |
if filtered_df[col].dtype in ['float64', 'float32']: | |
display_df[col] = filtered_df[col].apply(lambda x: f"{x:.1f}" if isinstance(x, (int, float)) else x) | |
else: | |
display_df[col] = filtered_df[col] | |
# Rename "Selected Overall" to "Metric Average" in display_df | |
if "Selected Overall" in display_df.columns: | |
display_df = display_df.rename(columns={"Selected Overall": "Metric Average"}) | |
# Also update the metric_columns list to reflect the rename | |
metric_columns = ['Metric Average'] + selected_tasks | |
return display_df, metric_columns |