Spaces:
Running
Running
File size: 6,441 Bytes
ed2eb44 06d4ee9 ed2eb44 06d4ee9 ed2eb44 06d4ee9 ed2eb44 06d4ee9 ed2eb44 06d4ee9 ed2eb44 06d4ee9 ed2eb44 06d4ee9 ed2eb44 06d4ee9 ed2eb44 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 |
"""
Data loading and processing utilities for the leaderboard application.
"""
import pandas as pd
import json
from src.utils.config import model_categories
from src.utils.task_mapping import get_display_name
def load_metric_data(file_path):
"""
Load metric data from a JSON file
Args:
file_path (str): Path to the JSON file containing metric data
Returns:
dict: Dictionary containing the loaded metric data
"""
try:
with open(file_path, "r") as f:
return json.load(f)
except FileNotFoundError:
print(f"Error: File {file_path} not found.")
return {}
except json.JSONDecodeError:
print(f"Error: File {file_path} is not a valid JSON file.")
return {}
def process_data(metric_data):
"""
Process the metric data into a pandas DataFrame
Args:
metric_data (dict): Dictionary containing the metric data
Returns:
pandas.DataFrame: DataFrame containing the processed data
"""
# Create a DataFrame to store the model metric data
tasks = list(metric_data.keys())
models = []
model_data = {}
# Extract model names and their metric values for each task
for task in tasks:
for model in metric_data[task]:
if model not in models:
models.append(model)
model_data[model] = {}
# Store the metric value for this task
model_data[model][task] = metric_data[task][model]
# Create DataFrame from the model_data dictionary
df = pd.DataFrame.from_dict(model_data, orient='index')
# Replace NaN values with '-'
df.fillna('-', inplace=True)
# First convert raw task names to standard format (spaces instead of hyphens/underscores)
standardized_columns = [task.replace("-", " ").replace("_", " ").title() for task in df.columns]
df.columns = standardized_columns
# Then apply our display name mapping
display_name_columns = {col: get_display_name(col) for col in df.columns}
df = df.rename(columns=display_name_columns)
# Add a model type column to the dataframe
df['Model Type'] = df.index.map(lambda x: model_categories.get(x, "Unknown"))
return df
def calculate_selected_overall(row, selected_tasks):
"""
Calculate overall average for selected tasks
Args:
row (pandas.Series): Row of data
selected_tasks (list): List of task names to include in the average
Returns:
float or str: The calculated average or '-' if no numeric values
"""
numeric_values = []
for task in selected_tasks:
value = row[task]
# Check if the value is numeric (could be float or string representing float)
if isinstance(value, (int, float)) or (isinstance(value, str) and value.replace('.', '', 1).replace('-', '', 1).isdigit()):
numeric_values.append(float(value))
# Calculate average if there are numeric values
if numeric_values:
return sum(numeric_values) / len(numeric_values)
else:
return '-'
def filter_and_prepare_data(df, selected_tasks, selected_model_types):
"""
Filter and prepare data based on selections
Args:
df (pandas.DataFrame): The original DataFrame
selected_tasks (list): List of selected task names
selected_model_types (list): List of selected model types
Returns:
pandas.DataFrame: Filtered and prepared DataFrame
"""
# Filter the dataframe based on selected model types
filtered_df = df[df['Model Type'].isin(selected_model_types)]
# Calculate the average for selected tasks only
selected_tasks_df = filtered_df[selected_tasks]
filtered_df['Selected Overall'] = selected_tasks_df.mean(axis=1)
# Separate human entries from other models for ranking
is_human = filtered_df['Model Type'] == 'Human'
human_df = filtered_df[is_human]
non_human_df = filtered_df[~is_human]
# Sort non-human models by Selected Overall and add rank
non_human_df = non_human_df.sort_values('Selected Overall', ascending=False)
non_human_df.insert(0, 'Rank', range(1, len(non_human_df) + 1))
# Add rank for human (use '-' to indicate not ranked)
human_df.insert(0, 'Rank', '-')
# Combine dataframes - put humans at appropriate position based on score
combined_df = pd.concat([non_human_df, human_df])
combined_df = combined_df.sort_values('Selected Overall', ascending=False)
# Add a Model Name column that shows the index (actual model name)
combined_df['Model Name'] = combined_df.index
return combined_df
def format_display_dataframe(filtered_df, selected_tasks):
"""
Create and format the display DataFrame for the leaderboard table
Args:
filtered_df (pandas.DataFrame): The filtered DataFrame
selected_tasks (list): List of selected task names
Returns:
tuple: (pandas.DataFrame, list) - The display DataFrame and the metric columns
"""
# Create a fixed display DataFrame with only the model info
display_df = filtered_df[['Rank', 'Model Name', 'Model Type']].copy()
# Format the rank column with medals
medal_ranks = {1: "🥇 1", 2: "🥈 2", 3: "🥉 3"}
display_df['Rank'] = display_df['Rank'].apply(lambda x: medal_ranks.get(x, str(x)))
# Rename 'Model Name' to 'Agent'
display_df = display_df.rename(columns={"Model Name": "Agent"})
# Add metrics columns (Selected Overall and individual tasks)
metric_columns = ['Selected Overall'] + selected_tasks
for col in metric_columns:
if col in filtered_df.columns:
# Format numeric columns to 1 decimal place
if filtered_df[col].dtype in ['float64', 'float32']:
display_df[col] = filtered_df[col].apply(lambda x: f"{x:.1f}" if isinstance(x, (int, float)) else x)
else:
display_df[col] = filtered_df[col]
# Rename "Selected Overall" to "Metric Average" in display_df
if "Selected Overall" in display_df.columns:
display_df = display_df.rename(columns={"Selected Overall": "Metric Average"})
# Also update the metric_columns list to reflect the rename
metric_columns = ['Metric Average'] + selected_tasks
return display_df, metric_columns |