Spaces:
Runtime error
Runtime error
File size: 2,667 Bytes
359f755 77c0f20 359f755 77c0f20 359f755 ce8066d 24c8512 ce8066d 24c8512 ce8066d 24c8512 ce8066d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 |
import pandas as pd
from src.display.formatting import has_no_nan_values, make_clickable_model
from src.display.utils import AutoEvalColumn
from src.leaderboard.read_evals import get_raw_eval_results
def get_leaderboard_df(results_path: str, cols: list, benchmark_cols: list) -> pd.DataFrame:
"""Creates a dataframe from all the individual experiment results"""
print("\n=== Starting leaderboard creation ===", flush=True)
print(f"Looking for results in: {results_path}", flush=True)
print(f"Expected columns: {cols}", flush=True)
print(f"Benchmark columns: {benchmark_cols}", flush=True)
raw_data = get_raw_eval_results(results_path)
print(f"\nFound {len(raw_data)} raw results", flush=True)
all_data_json = [v.to_dict() for v in raw_data]
print(f"\nConverted to {len(all_data_json)} JSON records", flush=True)
if all_data_json:
print("Sample record keys:", list(all_data_json[0].keys()), flush=True)
if not all_data_json:
print("\nNo data found, creating empty DataFrame", flush=True)
empty_df = pd.DataFrame(columns=cols)
# Ensure correct column types
empty_df[AutoEvalColumn.average.name] = pd.Series(dtype=float)
for col in benchmark_cols:
empty_df[col] = pd.Series(dtype=float)
return empty_df
df = pd.DataFrame.from_records(all_data_json)
print("\nCreated DataFrame with columns:", df.columns.tolist(), flush=True)
print("DataFrame shape:", df.shape, flush=True)
try:
df = df.sort_values(by=[AutoEvalColumn.average.name], ascending=False)
print("\nSorted DataFrame by average", flush=True)
except KeyError as e:
print(f"\nError sorting DataFrame: {e}", flush=True)
print("Available columns:", df.columns.tolist(), flush=True)
try:
df = df[cols].round(decimals=2)
print("\nSelected and rounded columns", flush=True)
except KeyError as e:
print(f"\nError selecting columns: {e}", flush=True)
print("Requested columns:", cols, flush=True)
print("Available columns:", df.columns.tolist(), flush=True)
# Create empty DataFrame with correct structure
empty_df = pd.DataFrame(columns=cols)
empty_df[AutoEvalColumn.average.name] = pd.Series(dtype=float)
for col in benchmark_cols:
empty_df[col] = pd.Series(dtype=float)
return empty_df
# filter out if perplexity hasn't been evaluated
df = df[has_no_nan_values(df, benchmark_cols)]
print("\nFinal DataFrame shape after filtering:", df.shape, flush=True)
print("Final columns:", df.columns.tolist(), flush=True)
return df
|