MLRC_Bench

Running

App Files Files Community

MLRC_Bench / src /utils /data_loader.py

Armeddinosaur

Updating table

06d4ee9 about 1 month ago

raw

history blame

6.44 kB

	"""
	Data loading and processing utilities for the leaderboard application.
	"""
	import pandas as pd
	import json
	from src.utils.config import model_categories
	from src.utils.task_mapping import get_display_name

	def load_metric_data(file_path):
	"""
	Load metric data from a JSON file

	Args:
	file_path (str): Path to the JSON file containing metric data

	Returns:
	dict: Dictionary containing the loaded metric data
	"""
	try:
	with open(file_path, "r") as f:
	return json.load(f)
	except FileNotFoundError:
	print(f"Error: File {file_path} not found.")
	return {}
	except json.JSONDecodeError:
	print(f"Error: File {file_path} is not a valid JSON file.")
	return {}

	def process_data(metric_data):
	"""
	Process the metric data into a pandas DataFrame

	Args:
	metric_data (dict): Dictionary containing the metric data

	Returns:
	pandas.DataFrame: DataFrame containing the processed data
	"""
	# Create a DataFrame to store the model metric data
	tasks = list(metric_data.keys())
	models = []
	model_data = {}

	# Extract model names and their metric values for each task
	for task in tasks:
	for model in metric_data[task]:
	if model not in models:
	models.append(model)
	model_data[model] = {}

	# Store the metric value for this task
	model_data[model][task] = metric_data[task][model]

	# Create DataFrame from the model_data dictionary
	df = pd.DataFrame.from_dict(model_data, orient='index')

	# Replace NaN values with '-'
	df.fillna('-', inplace=True)

	# First convert raw task names to standard format (spaces instead of hyphens/underscores)
	standardized_columns = [task.replace("-", " ").replace("_", " ").title() for task in df.columns]
	df.columns = standardized_columns

	# Then apply our display name mapping
	display_name_columns = {col: get_display_name(col) for col in df.columns}
	df = df.rename(columns=display_name_columns)

	# Add a model type column to the dataframe
	df['Model Type'] = df.index.map(lambda x: model_categories.get(x, "Unknown"))

	return df

	def calculate_selected_overall(row, selected_tasks):
	"""
	Calculate overall average for selected tasks

	Args:
	row (pandas.Series): Row of data
	selected_tasks (list): List of task names to include in the average

	Returns:
	float or str: The calculated average or '-' if no numeric values
	"""
	numeric_values = []

	for task in selected_tasks:
	value = row[task]
	# Check if the value is numeric (could be float or string representing float)
	if isinstance(value, (int, float)) or (isinstance(value, str) and value.replace('.', '', 1).replace('-', '', 1).isdigit()):
	numeric_values.append(float(value))

	# Calculate average if there are numeric values
	if numeric_values:
	return sum(numeric_values) / len(numeric_values)
	else:
	return '-'

	def filter_and_prepare_data(df, selected_tasks, selected_model_types):
	"""
	Filter and prepare data based on selections

	Args:
	df (pandas.DataFrame): The original DataFrame
	selected_tasks (list): List of selected task names
	selected_model_types (list): List of selected model types

	Returns:
	pandas.DataFrame: Filtered and prepared DataFrame
	"""
	# Filter the dataframe based on selected model types
	filtered_df = df[df['Model Type'].isin(selected_model_types)]

	# Calculate the average for selected tasks only
	selected_tasks_df = filtered_df[selected_tasks]
	filtered_df['Selected Overall'] = selected_tasks_df.mean(axis=1)

	# Separate human entries from other models for ranking
	is_human = filtered_df['Model Type'] == 'Human'
	human_df = filtered_df[is_human]
	non_human_df = filtered_df[~is_human]

	# Sort non-human models by Selected Overall and add rank
	non_human_df = non_human_df.sort_values('Selected Overall', ascending=False)
	non_human_df.insert(0, 'Rank', range(1, len(non_human_df) + 1))

	# Add rank for human (use '-' to indicate not ranked)
	human_df.insert(0, 'Rank', '-')

	# Combine dataframes - put humans at appropriate position based on score
	combined_df = pd.concat([non_human_df, human_df])
	combined_df = combined_df.sort_values('Selected Overall', ascending=False)

	# Add a Model Name column that shows the index (actual model name)
	combined_df['Model Name'] = combined_df.index

	return combined_df

	def format_display_dataframe(filtered_df, selected_tasks):
	"""
	Create and format the display DataFrame for the leaderboard table

	Args:
	filtered_df (pandas.DataFrame): The filtered DataFrame
	selected_tasks (list): List of selected task names

	Returns:
	tuple: (pandas.DataFrame, list) - The display DataFrame and the metric columns
	"""
	# Create a fixed display DataFrame with only the model info
	display_df = filtered_df[['Rank', 'Model Name', 'Model Type']].copy()

	# Format the rank column with medals
	medal_ranks = {1: "🥇 1", 2: "🥈 2", 3: "🥉 3"}
	display_df['Rank'] = display_df['Rank'].apply(lambda x: medal_ranks.get(x, str(x)))

	# Rename 'Model Name' to 'Agent'
	display_df = display_df.rename(columns={"Model Name": "Agent"})

	# Add metrics columns (Selected Overall and individual tasks)
	metric_columns = ['Selected Overall'] + selected_tasks
	for col in metric_columns:
	if col in filtered_df.columns:
	# Format numeric columns to 1 decimal place
	if filtered_df[col].dtype in ['float64', 'float32']:
	display_df[col] = filtered_df[col].apply(lambda x: f"{x:.1f}" if isinstance(x, (int, float)) else x)
	else:
	display_df[col] = filtered_df[col]

	# Rename "Selected Overall" to "Metric Average" in display_df
	if "Selected Overall" in display_df.columns:
	display_df = display_df.rename(columns={"Selected Overall": "Metric Average"})
	# Also update the metric_columns list to reflect the rename
	metric_columns = ['Metric Average'] + selected_tasks

	return display_df, metric_columns