import json import re import gradio as gr import numpy import pandas as pd from src.display.css_html_js import custom_css from src.about import ( INTRODUCTION_TEXT, TITLE, AUTHORS, ) from src.display.formatting import make_clickable_model from plot_results import create_performance_plot demo = gr.Blocks(css=custom_css) with demo: gr.HTML(TITLE) gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text") NUMBER_OF_QUESTIONS = 171.0 # load dataframe from csv # leaderboard_df = pd.read_csv("benchmark_results.csv") leaderboard_df = [] with open("benchmark_results.csv", "r") as f: header = f.readline().strip().split(",") header = [h.strip() for h in header] for i, line in enumerate(f): leaderboard_df.append(line.strip().split(",", 13)) metadata = json.load(open('metadata.json')) for k, v in list(metadata.items()): metadata[k.split(",")[0]] = v # create dataframe from list and header leaderboard_df = pd.DataFrame(leaderboard_df, columns=header) # filter column with value eq-bench_v2_pl print(header) leaderboard_df = leaderboard_df[(leaderboard_df["Benchmark Version"] == "eq-bench_v2_pl") | ( leaderboard_df["Benchmark Version"] == 'eq-bench_pl')] # fix: ValueError: The truth value of a Series is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all(). # leave only defined columns leaderboard_df = leaderboard_df[["Model Path", "Benchmark Score", "Num Questions Parseable", "Error"]] # create new column with model name def parse_parseable(x): if x["Num Questions Parseable"] == 'FAILED': m = re.match(r'(\d+)\.0 questions were parseable', x["Error"]) return m.group(1) return x["Num Questions Parseable"] leaderboard_df["Num Questions Parseable"] = leaderboard_df[["Num Questions Parseable", "Error"]].apply( lambda x: parse_parseable(x), axis=1) def fraction_to_percentage(numerator: float, denominator: float) -> float: return (numerator / denominator) * 100 leaderboard_df["Num Questions Parseable"] = leaderboard_df["Num Questions Parseable"].apply(lambda x: fraction_to_percentage(float(x), NUMBER_OF_QUESTIONS)) def get_params(model_name): if model_name in metadata: return metadata[model_name] else: print(model_name) return numpy.nan leaderboard_df["Params"] = leaderboard_df["Model Path"].apply(lambda x: get_params(x)) # move column order leaderboard_df = leaderboard_df[["Model Path", "Params", "Benchmark Score", "Num Questions Parseable", 'Error']] # change value of column to nan leaderboard_df["Benchmark Score"] = leaderboard_df["Benchmark Score"].replace('FAILED', numpy.nan) #scale Benchmark Score by Num Questions Parseable*100 leaderboard_df["Benchmark Score"] = leaderboard_df["Benchmark Score"].astype(float) * ((leaderboard_df["Num Questions Parseable"].astype(float) / 100)) # set datatype of column leaderboard_df["Benchmark Score"] = leaderboard_df["Benchmark Score"].astype(float) leaderboard_df["Num Questions Parseable"] = leaderboard_df["Num Questions Parseable"].astype(float) # set nan if value of column is less than 0 leaderboard_df.loc[leaderboard_df["Benchmark Score"] < 0, "Benchmark Score"] = 0 # sort by 2 columns leaderboard_df = leaderboard_df.sort_values(by=["Benchmark Score", "Num Questions Parseable"], ascending=[False, False]) # Print model names and scores to console before HTML formatting print("\n===== MODEL RESULTS =====") for index, row in leaderboard_df.iterrows(): print(f"{row['Model Path']}: {row['Benchmark Score']:.2f}") print("========================\n") # Apply HTML formatting for display leaderboard_df["Model Path"] = leaderboard_df["Model Path"].apply(lambda x: make_clickable_model(x)) # rename columns leaderboard_df = leaderboard_df.rename(columns={"Model Path": "Model"}) leaderboard_df = leaderboard_df.rename(columns={"Num Questions Parseable": "Percentage Questions Parseable"}) # Set midpoint for gradient coloring based on data ranges leaderboard_df_styled = leaderboard_df.style.background_gradient( cmap="RdYlGn" ) leaderboard_df_styled = leaderboard_df_styled.background_gradient( cmap="RdYlGn_r", subset=['Params'], vmax=150 ) rounding = {} # for col in ["Benchmark Score", "Num Questions Parseable"]: rounding["Benchmark Score"] = "{:.2f}" rounding["Percentage Questions Parseable"] = "{:.2f}" rounding["Params"] = "{:.0f}" leaderboard_df_styled = leaderboard_df_styled.format(rounding) leaderboard_table = gr.components.Dataframe( value=leaderboard_df_styled, datatype=['markdown', 'number', 'number', 'number', 'str'], elem_id="leaderboard-table", interactive=False, visible=True, ) # Create and show the performance plot below the table fig = create_performance_plot() plot = gr.Plot(value=fig, elem_id="performance-plot") gr.Markdown(AUTHORS, elem_classes="markdown-text") demo.queue(default_concurrency_limit=40).launch()