Spaces:
Runtime error
Runtime error
File size: 8,413 Bytes
b98f07f 3b86dfc b98f07f 3b3db42 91e8a06 ceb2102 370d5a0 b98f07f 3b86dfc 385e405 5f7fcf4 3b86dfc 385e405 3b86dfc 5fe3b95 b98f07f 3b86dfc 5f7fcf4 370d5a0 3b86dfc 5f7fcf4 370d5a0 3b86dfc 5909269 3b86dfc 5f7fcf4 3b86dfc 5f7fcf4 b98f07f 5f7fcf4 385e405 3b86dfc 385e405 5f7fcf4 385e405 5f7fcf4 385e405 5f7fcf4 385e405 370d5a0 b98f07f 818f024 5fe3b95 b98f07f 3b86dfc b98f07f abebeac b98f07f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 |
import json
import os
import pandas as pd
import numpy as np
from src.display.formatting import has_no_nan_values, make_clickable_model
from src.display.utils import AutoEvalColumn, EvalQueueColumn
from src.leaderboard.read_evals import get_raw_eval_results
from src.about import Tasks, SingleTableTasks, SingleColumnTasks
# def get_leaderboard_df(results_path: str, requests_path: str, cols: list, benchmark_cols: list) -> pd.DataFrame:
# """Creates a dataframe from all the individual experiment results"""
# raw_data = get_raw_eval_results(results_path, requests_path)
# all_data_json = [v.to_dict() for v in raw_data]
# df = pd.DataFrame.from_records(all_data_json)
# df = df.sort_values(by=[AutoEvalColumn.average.name], ascending=False)
# df = df[cols].round(decimals=2)
# # filter out if any of the benchmarks have not been produced
# df = df[has_no_nan_values(df, benchmark_cols)]
# return df
def strip_emoji(text: str) -> str:
"""Removes emojis from text"""
return text.encode("ascii", "ignore").decode("ascii").rstrip()
def get_leaderboard_df(results_path: str, cols: list, benchmark_cols: list) -> pd.DataFrame:
"""Creates a dataframe from all the individual experiment results"""
# iterate thorugh all files in the results path and read them into json
all_data_json = []
res_path = os.path.join(results_path, "demo-leaderboard", "syntherela-demo")
for entry in os.listdir(res_path):
if entry.endswith(".json"):
file_path = os.path.join(res_path, entry)
with open(file_path) as fp:
data = json.load(fp)
all_data_json.append(data)
multi_table_metrics = [task.value.col_name for task in Tasks]
single_table_metrics = [task.value.col_name for task in SingleTableTasks]
single_column_metrics = [task.value.col_name for task in SingleColumnTasks]
# create empty dataframe with the columns multi_table_metrics
multitable_df = pd.DataFrame(columns=["Dataset", "Model"] + multi_table_metrics)
singletable_df = pd.DataFrame(columns=["Dataset", "Model"] + single_table_metrics)
singlecolumn_df = pd.DataFrame(columns=["Dataset", "Table", "Model"] + single_column_metrics)
# iterate through all json files and add the data to the dataframe
for data in all_data_json:
model = data["method_name"]
dataset = data["dataset_name"]
row = {"Dataset": dataset, "Model": model}
for metric in multi_table_metrics:
stripped_metric = strip_emoji(metric)
if stripped_metric in data["multi_table_metrics"]:
metric_values = []
for table in data["multi_table_metrics"][stripped_metric].keys():
if "accuracy" in data["multi_table_metrics"][stripped_metric][table]:
metric_values.append(data["multi_table_metrics"][stripped_metric][table]["accuracy"])
if "statistic" in data["multi_table_metrics"][stripped_metric][table]:
metric_values.append(data["multi_table_metrics"][stripped_metric][table]["statistic"])
row[metric] = np.mean(metric_values).round(decimals=2)
else:
row[metric] = np.nan
multitable_df = pd.concat([multitable_df, pd.DataFrame([row])], ignore_index=True)
singletable_row = {"Dataset": dataset, "Model": model}
for metric in single_table_metrics:
stripped_metric = strip_emoji(metric)
if stripped_metric in data["single_table_metrics"]:
metric_values = []
for table in data["single_table_metrics"][stripped_metric].keys():
if "accuracy" in data["single_table_metrics"][stripped_metric][table]:
metric_values.append(data["single_table_metrics"][stripped_metric][table]["accuracy"])
if "value" in data["single_table_metrics"][stripped_metric][table]:
metric_values.append(data["single_table_metrics"][stripped_metric][table]["value"])
singletable_row[metric] = np.mean(metric_values).round(decimals=2)
else:
singletable_row[metric] = np.nan
singletable_df = pd.concat([singletable_df, pd.DataFrame([singletable_row])], ignore_index=True)
singlecolumn_row = {"Dataset": dataset, "Model": model, "Table": ""}
# insert row
for metric in single_column_metrics:
stripped_metric = strip_emoji(metric)
if stripped_metric in data["single_column_metrics"]:
for table in data["single_column_metrics"][stripped_metric].keys():
# check if row where dataset = dataset, model = model, table = table exists
if singlecolumn_df[
(singlecolumn_df["Dataset"] == dataset) &
(singlecolumn_df["Model"] == model) &
(singlecolumn_df["Table"] == table)
].empty:
singlecolumn_row = {"Dataset": dataset, "Model": model, "Table": table}
singlecolumn_df = pd.concat([singlecolumn_df, pd.DataFrame([singlecolumn_row])], ignore_index=True)
metric_values = []
for column in data["single_column_metrics"][stripped_metric][table].keys():
if "accuracy" in data["single_column_metrics"][stripped_metric][table][column]:
metric_values.append(data["single_column_metrics"][stripped_metric][table][column]["accuracy"])
if "value" in data["single_column_metrics"][stripped_metric][table][column]:
metric_values.append(data["single_column_metrics"][stripped_metric][table][column]["value"])
if "statistic" in data["single_column_metrics"][stripped_metric][table][column]:
metric_values.append(data["single_column_metrics"][stripped_metric][table][column]["statistic"])
# save np.mean(metric_values).round(decimals=2) to singlecolumn_df where dataset = dataset, model = model, table = table
singlecolumn_df.loc[
(singlecolumn_df["Dataset"] == dataset) &
(singlecolumn_df["Model"] == model) &
(singlecolumn_df["Table"] == table), metric] = np.mean(metric_values).round(decimals=2)
return singlecolumn_df, singletable_df, multitable_df
def get_evaluation_queue_df(save_path: str, cols: list) -> list[pd.DataFrame]:
"""Creates the different dataframes for the evaluation queues requestes"""
entries = [entry for entry in os.listdir(save_path) if not entry.startswith(".")]
all_evals = []
for entry in entries:
if ".json" in entry:
file_path = os.path.join(save_path, entry)
with open(file_path) as fp:
data = json.load(fp)
data[EvalQueueColumn.model.name] = make_clickable_model(data["model"])
data[EvalQueueColumn.revision.name] = data.get("revision", "main")
all_evals.append(data)
elif ".md" not in entry:
# this is a folder
sub_entries = [
e for e in os.listdir(f"{save_path}/{entry}") if os.path.isfile(e) and not e.startswith(".")
]
for sub_entry in sub_entries:
file_path = os.path.join(save_path, entry, sub_entry)
with open(file_path) as fp:
data = json.load(fp)
data[EvalQueueColumn.model.name] = make_clickable_model(data["model"])
data[EvalQueueColumn.revision.name] = data.get("revision", "main")
all_evals.append(data)
pending_list = [e for e in all_evals if e["status"] in ["PENDING", "RERUN"]]
running_list = [e for e in all_evals if e["status"] == "RUNNING"]
finished_list = [e for e in all_evals if e["status"].startswith("FINISHED") or e["status"] == "PENDING_NEW_EVAL"]
df_pending = pd.DataFrame.from_records(pending_list, columns=cols)
df_running = pd.DataFrame.from_records(running_list, columns=cols)
df_finished = pd.DataFrame.from_records(finished_list, columns=cols)
return df_finished[cols], df_running[cols], df_pending[cols]
|