import pandas as pd from src.display.formatting import has_no_nan_values, make_clickable_model from src.display.utils import AutoEvalColumn from src.leaderboard.read_evals import get_raw_eval_results def get_leaderboard_df(results_path: str, cols: list, benchmark_cols: list) -> pd.DataFrame: """Creates a dataframe from all the individual experiment results""" try: raw_data = get_raw_eval_results(results_path) all_data_json = [v.to_dict() for v in raw_data] if not all_data_json: # Create empty DataFrame with correct columns empty_df = pd.DataFrame(columns=cols) # Ensure correct column types empty_df[AutoEvalColumn.average.name] = pd.Series(dtype=float) for col in benchmark_cols: empty_df[col] = pd.Series(dtype=float) return empty_df df = pd.DataFrame.from_records(all_data_json) df = df.sort_values(by=[AutoEvalColumn.average.name], ascending=False) df = df[cols].round(decimals=2) # filter out if perplexity hasn't been evaluated df = df[has_no_nan_values(df, benchmark_cols)] return df except Exception as e: print(f"Error creating leaderboard: {e}") # Return empty DataFrame with correct structure empty_df = pd.DataFrame(columns=cols) empty_df[AutoEvalColumn.average.name] = pd.Series(dtype=float) for col in benchmark_cols: empty_df[col] = pd.Series(dtype=float) return empty_df