Spaces:
Running
Running
File size: 4,533 Bytes
f2a3e70 bd5b131 87ad165 bd5b131 87ad165 bd5b131 87ad165 bd5b131 87ad165 bd5b131 87ad165 e9ebca3 bd5b131 f2a3e70 bd5b131 f2a3e70 bd5b131 e9ebca3 f2a3e70 bd5b131 e9ebca3 de1d88f e9ebca3 de1d88f bd5b131 f2a3e70 bd5b131 e9ebca3 f2a3e70 bd5b131 f2a3e70 bd5b131 87ad165 bd5b131 87ad165 bd5b131 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 |
import json
import re
import gradio as gr
import numpy
import pandas as pd
from src.display.css_html_js import custom_css
from src.about import (
INTRODUCTION_TEXT,
TITLE,
AUTHORS,
)
from src.display.formatting import make_clickable_model
demo = gr.Blocks(css=custom_css)
with demo:
gr.HTML(TITLE)
gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
NUMBER_OF_QUESTIONS = 171.0
# load dataframe from csv
# leaderboard_df = pd.read_csv("benchmark_results.csv")
leaderboard_df = []
with open("benchmark_results.csv", "r") as f:
header = f.readline().strip().split(",")
header = [h.strip() for h in header]
for i, line in enumerate(f):
leaderboard_df.append(line.strip().split(",", 13))
metadata = json.load(open('metadata.json'))
for k, v in list(metadata.items()):
metadata[k.split(",")[0]] = v
# create dataframe from list and header
leaderboard_df = pd.DataFrame(leaderboard_df, columns=header)
# filter column with value eq-bench_v2_pl
print(header)
leaderboard_df = leaderboard_df[(leaderboard_df["Benchmark Version"] == "eq-bench_v2_pl") | (
leaderboard_df["Benchmark Version"] == 'eq-bench_pl')]
# fix: ValueError: The truth value of a Series is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().
# leave only defined columns
leaderboard_df = leaderboard_df[["Model Path", "Benchmark Score", "Num Questions Parseable", "Error"]]
# create new column with model name
def parse_parseable(x):
if x["Num Questions Parseable"] == 'FAILED':
m = re.match(r'(\d+)\.0 questions were parseable', x["Error"])
return m.group(1)
return x["Num Questions Parseable"]
leaderboard_df["Num Questions Parseable"] = leaderboard_df[["Num Questions Parseable", "Error"]].apply(
lambda x: parse_parseable(x), axis=1)
def fraction_to_percentage(numerator: float, denominator: float) -> float:
return (numerator / denominator) * 100
leaderboard_df["Num Questions Parseable"] = leaderboard_df["Num Questions Parseable"].apply(lambda x: fraction_to_percentage(float(x), NUMBER_OF_QUESTIONS))
def get_params(model_name):
if model_name in metadata:
return metadata[model_name]
return numpy.nan
leaderboard_df["Params"] = leaderboard_df["Model Path"].apply(lambda x: get_params(x))
# move column order
leaderboard_df = leaderboard_df[["Model Path", "Params", "Benchmark Score", "Num Questions Parseable", 'Error']]
leaderboard_df["Model Path"] = leaderboard_df["Model Path"].apply(lambda x: make_clickable_model(x))
# change value of column to nan
leaderboard_df["Benchmark Score"] = leaderboard_df["Benchmark Score"].replace('FAILED', numpy.nan)
# set datatype of column
leaderboard_df["Benchmark Score"] = leaderboard_df["Benchmark Score"].astype(float)
leaderboard_df["Num Questions Parseable"] = leaderboard_df["Num Questions Parseable"].astype(float)
# set nan if value of column is less than 0
leaderboard_df.loc[leaderboard_df["Benchmark Score"] < 0, "Benchmark Score"] = 0
# sort by 2 columns
leaderboard_df = leaderboard_df.sort_values(by=["Benchmark Score", "Num Questions Parseable"],
ascending=[False, False])
# rename columns
leaderboard_df = leaderboard_df.rename(columns={"Model Path": "Model"})
leaderboard_df = leaderboard_df.rename(columns={"Num Questions Parseable": "Percentage Questions Parseable"})
leaderboard_df_styled = leaderboard_df.style.background_gradient(cmap="RdYlGn")
leaderboard_df_styled = leaderboard_df_styled.background_gradient(cmap="RdYlGn_r", subset=['Params'])
rounding = {}
# for col in ["Benchmark Score", "Num Questions Parseable"]:
rounding["Benchmark Score"] = "{:.2f}"
rounding["Percentage Questions Parseable"] = "{:.2f}"
rounding["Params"] = "{:.0f}"
leaderboard_df_styled = leaderboard_df_styled.format(rounding)
leaderboard_table = gr.components.Dataframe(
value=leaderboard_df_styled,
# headers=[c.name for c in fields(AutoEvalColumn) if c.never_hidden] + shown_columns.value,
datatype=['markdown', 'number', 'number', 'number', 'str'],
elem_id="leaderboard-table",
interactive=False,
visible=True,
)
gr.Markdown(AUTHORS, elem_classes="markdown-text")
demo.queue(default_concurrency_limit=40).launch()
|