""" Data loading and processing utilities for the leaderboard application. """ import pandas as pd import json from src.utils.config import model_categories from src.utils.task_mapping import get_display_name def load_metric_data(file_path): """ Load metric data from a JSON file Args: file_path (str): Path to the JSON file containing metric data Returns: dict: Dictionary containing the loaded metric data """ try: with open(file_path, "r") as f: return json.load(f) except FileNotFoundError: print(f"Error: File {file_path} not found.") return {} except json.JSONDecodeError: print(f"Error: File {file_path} is not a valid JSON file.") return {} def process_data(metric_data): """ Process the metric data into a pandas DataFrame Args: metric_data (dict): Dictionary containing the metric data Returns: pandas.DataFrame: DataFrame containing the processed data """ # Create a DataFrame to store the model metric data tasks = list(metric_data.keys()) models = [] model_data = {} # Extract model names and their metric values for each task for task in tasks: for model in metric_data[task]: if model not in models: models.append(model) model_data[model] = {} # Store the metric value for this task model_data[model][task] = metric_data[task][model] # Create DataFrame from the model_data dictionary df = pd.DataFrame.from_dict(model_data, orient='index') # Replace NaN values with '-' df.fillna('-', inplace=True) # First convert raw task names to standard format (spaces instead of hyphens/underscores) standardized_columns = [task.replace("-", " ").replace("_", " ").title() for task in df.columns] df.columns = standardized_columns # Then apply our display name mapping display_name_columns = {col: get_display_name(col) for col in df.columns} df = df.rename(columns=display_name_columns) # Add a model type column to the dataframe df['Model Type'] = df.index.map(lambda x: model_categories.get(x, "Unknown")) return df def calculate_selected_overall(row, selected_tasks): """ Calculate overall average for selected tasks Args: row (pandas.Series): Row of data selected_tasks (list): List of task names to include in the average Returns: float or str: The calculated average or '-' if no numeric values """ numeric_values = [] for task in selected_tasks: value = row[task] # Check if the value is numeric (could be float or string representing float) if isinstance(value, (int, float)) or (isinstance(value, str) and value.replace('.', '', 1).replace('-', '', 1).isdigit()): numeric_values.append(float(value)) # Calculate average if there are numeric values if numeric_values: return sum(numeric_values) / len(numeric_values) else: return '-' def filter_and_prepare_data(df, selected_tasks, selected_model_types): """ Filter and prepare data based on selections Args: df (pandas.DataFrame): The original DataFrame selected_tasks (list): List of selected task names selected_model_types (list): List of selected model types Returns: pandas.DataFrame: Filtered and prepared DataFrame """ # Filter the dataframe based on selected model types filtered_df = df[df['Model Type'].isin(selected_model_types)] # Calculate the average for selected tasks only selected_tasks_df = filtered_df[selected_tasks] filtered_df['Selected Overall'] = selected_tasks_df.mean(axis=1) # Separate human entries from other models for ranking is_human = filtered_df['Model Type'] == 'Human' human_df = filtered_df[is_human] non_human_df = filtered_df[~is_human] # Sort non-human models by Selected Overall and add rank non_human_df = non_human_df.sort_values('Selected Overall', ascending=False) non_human_df.insert(0, 'Rank', range(1, len(non_human_df) + 1)) # Add rank for human (use '-' to indicate not ranked) human_df.insert(0, 'Rank', '-') # Combine dataframes - put humans at appropriate position based on score combined_df = pd.concat([non_human_df, human_df]) combined_df = combined_df.sort_values('Selected Overall', ascending=False) # Add a Model Name column that shows the index (actual model name) combined_df['Model Name'] = combined_df.index return combined_df def format_display_dataframe(filtered_df, selected_tasks): """ Create and format the display DataFrame for the leaderboard table Args: filtered_df (pandas.DataFrame): The filtered DataFrame selected_tasks (list): List of selected task names Returns: tuple: (pandas.DataFrame, list) - The display DataFrame and the metric columns """ # Create a fixed display DataFrame with only the model info display_df = filtered_df[['Rank', 'Model Name', 'Model Type']].copy() # Format the rank column with medals medal_ranks = {1: "🥇 1", 2: "🥈 2", 3: "🥉 3"} display_df['Rank'] = display_df['Rank'].apply(lambda x: medal_ranks.get(x, str(x))) # Rename 'Model Name' to 'Agent' display_df = display_df.rename(columns={"Model Name": "Agent"}) # Add metrics columns (Selected Overall and individual tasks) metric_columns = ['Selected Overall'] + selected_tasks for col in metric_columns: if col in filtered_df.columns: # Format numeric columns to 1 decimal place if filtered_df[col].dtype in ['float64', 'float32']: display_df[col] = filtered_df[col].apply(lambda x: f"{x:.1f}" if isinstance(x, (int, float)) else x) else: display_df[col] = filtered_df[col] # Rename "Selected Overall" to "Metric Average" in display_df if "Selected Overall" in display_df.columns: display_df = display_df.rename(columns={"Selected Overall": "Metric Average"}) # Also update the metric_columns list to reflect the rename metric_columns = ['Metric Average'] + selected_tasks return display_df, metric_columns