MLRC_Bench / src /utils /data_loader.py
Armeddinosaur's picture
Updating table
06d4ee9
raw
history blame
6.44 kB
"""
Data loading and processing utilities for the leaderboard application.
"""
import pandas as pd
import json
from src.utils.config import model_categories
from src.utils.task_mapping import get_display_name
def load_metric_data(file_path):
"""
Load metric data from a JSON file
Args:
file_path (str): Path to the JSON file containing metric data
Returns:
dict: Dictionary containing the loaded metric data
"""
try:
with open(file_path, "r") as f:
return json.load(f)
except FileNotFoundError:
print(f"Error: File {file_path} not found.")
return {}
except json.JSONDecodeError:
print(f"Error: File {file_path} is not a valid JSON file.")
return {}
def process_data(metric_data):
"""
Process the metric data into a pandas DataFrame
Args:
metric_data (dict): Dictionary containing the metric data
Returns:
pandas.DataFrame: DataFrame containing the processed data
"""
# Create a DataFrame to store the model metric data
tasks = list(metric_data.keys())
models = []
model_data = {}
# Extract model names and their metric values for each task
for task in tasks:
for model in metric_data[task]:
if model not in models:
models.append(model)
model_data[model] = {}
# Store the metric value for this task
model_data[model][task] = metric_data[task][model]
# Create DataFrame from the model_data dictionary
df = pd.DataFrame.from_dict(model_data, orient='index')
# Replace NaN values with '-'
df.fillna('-', inplace=True)
# First convert raw task names to standard format (spaces instead of hyphens/underscores)
standardized_columns = [task.replace("-", " ").replace("_", " ").title() for task in df.columns]
df.columns = standardized_columns
# Then apply our display name mapping
display_name_columns = {col: get_display_name(col) for col in df.columns}
df = df.rename(columns=display_name_columns)
# Add a model type column to the dataframe
df['Model Type'] = df.index.map(lambda x: model_categories.get(x, "Unknown"))
return df
def calculate_selected_overall(row, selected_tasks):
"""
Calculate overall average for selected tasks
Args:
row (pandas.Series): Row of data
selected_tasks (list): List of task names to include in the average
Returns:
float or str: The calculated average or '-' if no numeric values
"""
numeric_values = []
for task in selected_tasks:
value = row[task]
# Check if the value is numeric (could be float or string representing float)
if isinstance(value, (int, float)) or (isinstance(value, str) and value.replace('.', '', 1).replace('-', '', 1).isdigit()):
numeric_values.append(float(value))
# Calculate average if there are numeric values
if numeric_values:
return sum(numeric_values) / len(numeric_values)
else:
return '-'
def filter_and_prepare_data(df, selected_tasks, selected_model_types):
"""
Filter and prepare data based on selections
Args:
df (pandas.DataFrame): The original DataFrame
selected_tasks (list): List of selected task names
selected_model_types (list): List of selected model types
Returns:
pandas.DataFrame: Filtered and prepared DataFrame
"""
# Filter the dataframe based on selected model types
filtered_df = df[df['Model Type'].isin(selected_model_types)]
# Calculate the average for selected tasks only
selected_tasks_df = filtered_df[selected_tasks]
filtered_df['Selected Overall'] = selected_tasks_df.mean(axis=1)
# Separate human entries from other models for ranking
is_human = filtered_df['Model Type'] == 'Human'
human_df = filtered_df[is_human]
non_human_df = filtered_df[~is_human]
# Sort non-human models by Selected Overall and add rank
non_human_df = non_human_df.sort_values('Selected Overall', ascending=False)
non_human_df.insert(0, 'Rank', range(1, len(non_human_df) + 1))
# Add rank for human (use '-' to indicate not ranked)
human_df.insert(0, 'Rank', '-')
# Combine dataframes - put humans at appropriate position based on score
combined_df = pd.concat([non_human_df, human_df])
combined_df = combined_df.sort_values('Selected Overall', ascending=False)
# Add a Model Name column that shows the index (actual model name)
combined_df['Model Name'] = combined_df.index
return combined_df
def format_display_dataframe(filtered_df, selected_tasks):
"""
Create and format the display DataFrame for the leaderboard table
Args:
filtered_df (pandas.DataFrame): The filtered DataFrame
selected_tasks (list): List of selected task names
Returns:
tuple: (pandas.DataFrame, list) - The display DataFrame and the metric columns
"""
# Create a fixed display DataFrame with only the model info
display_df = filtered_df[['Rank', 'Model Name', 'Model Type']].copy()
# Format the rank column with medals
medal_ranks = {1: "🥇 1", 2: "🥈 2", 3: "🥉 3"}
display_df['Rank'] = display_df['Rank'].apply(lambda x: medal_ranks.get(x, str(x)))
# Rename 'Model Name' to 'Agent'
display_df = display_df.rename(columns={"Model Name": "Agent"})
# Add metrics columns (Selected Overall and individual tasks)
metric_columns = ['Selected Overall'] + selected_tasks
for col in metric_columns:
if col in filtered_df.columns:
# Format numeric columns to 1 decimal place
if filtered_df[col].dtype in ['float64', 'float32']:
display_df[col] = filtered_df[col].apply(lambda x: f"{x:.1f}" if isinstance(x, (int, float)) else x)
else:
display_df[col] = filtered_df[col]
# Rename "Selected Overall" to "Metric Average" in display_df
if "Selected Overall" in display_df.columns:
display_df = display_df.rename(columns={"Selected Overall": "Metric Average"})
# Also update the metric_columns list to reflect the rename
metric_columns = ['Metric Average'] + selected_tasks
return display_df, metric_columns