MLRC_Bench

Running

File size: 6,441 Bytes

"""
Data loading and processing utilities for the leaderboard application.
"""
import pandas as pd
import json
from src.utils.config import model_categories
from src.utils.task_mapping import get_display_name

def load_metric_data(file_path):
    """
    Load metric data from a JSON file
    
    Args:
        file_path (str): Path to the JSON file containing metric data
        
    Returns:
        dict: Dictionary containing the loaded metric data
    """
    try:
        with open(file_path, "r") as f:
            return json.load(f)
    except FileNotFoundError:
        print(f"Error: File {file_path} not found.")
        return {}
    except json.JSONDecodeError:
        print(f"Error: File {file_path} is not a valid JSON file.")
        return {}

def process_data(metric_data):
    """
    Process the metric data into a pandas DataFrame
    
    Args:
        metric_data (dict): Dictionary containing the metric data
        
    Returns:
        pandas.DataFrame: DataFrame containing the processed data
    """
    # Create a DataFrame to store the model metric data
    tasks = list(metric_data.keys())
    models = []
    model_data = {}

    # Extract model names and their metric values for each task
    for task in tasks:
        for model in metric_data[task]:
            if model not in models:
                models.append(model)
                model_data[model] = {}
            
            # Store the metric value for this task
            model_data[model][task] = metric_data[task][model]

    # Create DataFrame from the model_data dictionary
    df = pd.DataFrame.from_dict(model_data, orient='index')

    # Replace NaN values with '-'
    df.fillna('-', inplace=True)

    # First convert raw task names to standard format (spaces instead of hyphens/underscores)
    standardized_columns = [task.replace("-", " ").replace("_", " ").title() for task in df.columns]
    df.columns = standardized_columns
    
    # Then apply our display name mapping
    display_name_columns = {col: get_display_name(col) for col in df.columns}
    df = df.rename(columns=display_name_columns)

    # Add a model type column to the dataframe
    df['Model Type'] = df.index.map(lambda x: model_categories.get(x, "Unknown"))
    
    return df

def calculate_selected_overall(row, selected_tasks):
    """
    Calculate overall average for selected tasks
    
    Args:
        row (pandas.Series): Row of data
        selected_tasks (list): List of task names to include in the average
        
    Returns:
        float or str: The calculated average or '-' if no numeric values
    """
    numeric_values = []
    
    for task in selected_tasks:
        value = row[task]
        # Check if the value is numeric (could be float or string representing float)
        if isinstance(value, (int, float)) or (isinstance(value, str) and value.replace('.', '', 1).replace('-', '', 1).isdigit()):
            numeric_values.append(float(value))
    
    # Calculate average if there are numeric values
    if numeric_values:
        return sum(numeric_values) / len(numeric_values)
    else:
        return '-'

def filter_and_prepare_data(df, selected_tasks, selected_model_types):
    """
    Filter and prepare data based on selections
    
    Args:
        df (pandas.DataFrame): The original DataFrame
        selected_tasks (list): List of selected task names
        selected_model_types (list): List of selected model types
        
    Returns:
        pandas.DataFrame: Filtered and prepared DataFrame
    """
    # Filter the dataframe based on selected model types
    filtered_df = df[df['Model Type'].isin(selected_model_types)]
    
    # Calculate the average for selected tasks only
    selected_tasks_df = filtered_df[selected_tasks]
    filtered_df['Selected Overall'] = selected_tasks_df.mean(axis=1)
    
    # Separate human entries from other models for ranking
    is_human = filtered_df['Model Type'] == 'Human'
    human_df = filtered_df[is_human]
    non_human_df = filtered_df[~is_human]
    
    # Sort non-human models by Selected Overall and add rank
    non_human_df = non_human_df.sort_values('Selected Overall', ascending=False)
    non_human_df.insert(0, 'Rank', range(1, len(non_human_df) + 1))
    
    # Add rank for human (use '-' to indicate not ranked)
    human_df.insert(0, 'Rank', '-')
    
    # Combine dataframes - put humans at appropriate position based on score
    combined_df = pd.concat([non_human_df, human_df])
    combined_df = combined_df.sort_values('Selected Overall', ascending=False)
    
    # Add a Model Name column that shows the index (actual model name)
    combined_df['Model Name'] = combined_df.index
    
    return combined_df

def format_display_dataframe(filtered_df, selected_tasks):
    """
    Create and format the display DataFrame for the leaderboard table
    
    Args:
        filtered_df (pandas.DataFrame): The filtered DataFrame
        selected_tasks (list): List of selected task names
        
    Returns:
        tuple: (pandas.DataFrame, list) - The display DataFrame and the metric columns
    """
    # Create a fixed display DataFrame with only the model info
    display_df = filtered_df[['Rank', 'Model Name', 'Model Type']].copy()
    
    # Format the rank column with medals
    medal_ranks = {1: "🥇 1", 2: "🥈 2", 3: "🥉 3"}
    display_df['Rank'] = display_df['Rank'].apply(lambda x: medal_ranks.get(x, str(x)))
    
    # Rename 'Model Name' to 'Agent'
    display_df = display_df.rename(columns={"Model Name": "Agent"})
    
    # Add metrics columns (Selected Overall and individual tasks)
    metric_columns = ['Selected Overall'] + selected_tasks
    for col in metric_columns:
        if col in filtered_df.columns:
            # Format numeric columns to 1 decimal place
            if filtered_df[col].dtype in ['float64', 'float32']:
                display_df[col] = filtered_df[col].apply(lambda x: f"{x:.1f}" if isinstance(x, (int, float)) else x)
            else:
                display_df[col] = filtered_df[col]
    
    # Rename "Selected Overall" to "Metric Average" in display_df
    if "Selected Overall" in display_df.columns:
        display_df = display_df.rename(columns={"Selected Overall": "Metric Average"})
        # Also update the metric_columns list to reflect the rename
        metric_columns = ['Metric Average'] + selected_tasks
    
    return display_df, metric_columns