Spaces:
Runtime error
Runtime error
import gradio as gr | |
import numpy as np | |
import pandas as pd | |
import argparse | |
def make_default_md(): | |
leaderboard_md = f""" | |
# π LLms Benchmark | |
The main goal of this project is to utilize Large Language Models (LLMs) to extract specific information from PDF documents and organize it into a structured JSON format. | |
To achieve this objective, we are assessing various LLMs on two benchmarks: | |
1. [Benchmark1](https://huggingface.co/spaces/Nechba/LLms-Benchmark/blob/main/dataset.jsonl): | |
This benchmark consists of a dataset of 59 pages as context and corresponding JSON extracts from "Interchange and Service Fees Manual: Europe Region". | |
2. [Benchmark2](https://huggingface.co/datasets/Effyis/Table-Extraction): | |
This benchmark comprises a dataset of 16573 tables as context and corresponding JSON extracts. | |
""" | |
return leaderboard_md | |
def make_arena_leaderboard_md(total_models): | |
leaderboard_md = f""" | |
Total #models: **{total_models}**. Last updated: Juin 01, 2024. | |
""" | |
return leaderboard_md | |
def model_hyperlink(model_name, link): | |
return f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>' | |
def load_leaderboard_table_csv(filename, add_hyperlink=True): | |
rows = [] | |
with open(filename, 'r') as file: | |
lines = file.readlines() | |
heads = [v.strip() for v in lines[0].split(",")] | |
for line in lines[1:]: | |
row = [v.strip() for v in line.split(",")] | |
item = {} | |
for h, v in zip(heads, row): | |
item[h] = v | |
if add_hyperlink: | |
item["Model"] = model_hyperlink(item["Model"], item["Link"]) | |
item["Notebook link"] = model_hyperlink("Notebook", item["Notebook link"]) | |
rows.append(item) | |
return rows | |
def get_arena_table(model_table_df): | |
# change type Percentage of values column of df | |
model_table_df["Percentage of values"] = model_table_df["Percentage of values"].astype(float) | |
model_table_df["Percentage of keys"] = model_table_df["Percentage of keys"].astype(float) | |
model_table_df["Average time (s)"] = model_table_df["Average time (s)"].astype(float) | |
arena_df = model_table_df.sort_values(by=["Percentage of values"], ascending=False) | |
values = [] | |
if not arena_df.empty: # Check if arena_df is not empty | |
for i in range(len(arena_df)): | |
row = [] | |
model_name = arena_df["Model"].values[i] # Access model name directly without index 0 | |
row.append(model_name) | |
row.append(arena_df.iloc[i]["Percentage of values"]) | |
row.append(arena_df.iloc[i]["Percentage of keys"]) | |
row.append(arena_df.iloc[i]["Average time (s)"]) | |
row.append(arena_df.iloc[i]["Notebook link"]) | |
row.append(arena_df.iloc[i]["License"]) | |
# row.append(arena_df.iloc[i]["Link"]) | |
values.append(row) | |
return values | |
def build_leaderboard_tab(leaderboard_table_file1, leaderboard_table_file2, show_plot=False): | |
default_md = make_default_md() | |
md_1 = gr.Markdown(default_md, elem_id="leaderboard_markdown") | |
if leaderboard_table_file1: | |
data1 = load_leaderboard_table_csv(leaderboard_table_file1) | |
model_table_df1 = pd.DataFrame(data1) | |
data2 = load_leaderboard_table_csv(leaderboard_table_file2) | |
model_table_df2 = pd.DataFrame(data2) | |
with gr.Tabs() as tabs: | |
with gr.Tab(" π Benchmark 1", id=0): | |
arena_table_vals = get_arena_table(model_table_df1) | |
md = make_arena_leaderboard_md(len(arena_table_vals)) | |
gr.Markdown(md, elem_id="leaderboard_markdown") | |
# Remove height argument | |
gr.Dataframe( | |
headers=[ | |
"Model", | |
"Percentage of values (%)", | |
"Percentage of keys (%)", | |
"Average time (s)", | |
"Code", | |
"License", | |
], | |
datatype=[ | |
"markdown", | |
"number", | |
"number", | |
"number", | |
"markdown", | |
"str" | |
], | |
value=arena_table_vals, | |
elem_id="arena_leaderboard_dataframe", | |
column_widths=[200, 150, 150, 130, 100, 140], | |
wrap=True, | |
) | |
# Display additional Markdown notes as needed... | |
with gr.Tab("π Benchmark 2", id=1): | |
arena_table_vals = get_arena_table(model_table_df2) | |
md = make_arena_leaderboard_md(len(arena_table_vals)) | |
gr.Markdown(md, elem_id="leaderboard_markdown") | |
# Remove height argument | |
gr.Dataframe( | |
headers=[ | |
"Model", | |
"Percentage of values (%)", | |
"Percentage of keys (%)", | |
"Average time (s)", | |
"Code", | |
"License", | |
], | |
datatype=[ | |
"markdown", | |
"number", | |
"number", | |
"number", | |
"markdown", | |
"str" | |
], | |
value=arena_table_vals, | |
elem_id="arena_leaderboard_dataframe", | |
column_widths=[200, 150, 150, 130, 100, 140], | |
wrap=True, | |
) | |
else: | |
pass | |
return [md_1, plot_1, plot_2] | |
block_css = """ | |
#notice_markdown { | |
font-size: 104% | |
} | |
#notice_markdown th { | |
display: none; | |
} | |
#notice_markdown td { | |
padding-top: 6px; | |
padding-bottom: 6px; | |
} | |
#leaderboard_markdown { | |
font-size: 104% | |
} | |
#leaderboard_markdown td { | |
padding-top: 6px; | |
padding-bottom: 6px; | |
} | |
#leaderboard_dataframe td { | |
line-height: 0.1em; | |
} | |
footer { | |
display:none !important | |
} | |
.sponsor-image-about img { | |
margin: 0 20px; | |
margin-top: 20px; | |
height: 40px; | |
max-height: 100%; | |
width: auto; | |
float: left; | |
} | |
""" | |
def build_demo(leaderboard_table_file1, leaderboard_table_file2): | |
text_size = gr.themes.sizes.text_lg | |
with gr.Blocks( | |
title="LLMS Benchmark", | |
theme=gr.themes.Base(text_size=text_size), | |
css=block_css, | |
) as demo: | |
leader_components = build_leaderboard_tab( | |
leaderboard_table_file1,leaderboard_table_file2, show_plot=True | |
) | |
return demo | |
if __name__ == "__main__": | |
parser = argparse.ArgumentParser() | |
parser.add_argument("--share", action="store_true") | |
args = parser.parse_args() | |
leaderboard_table_file1 = "./Benchmark1/leaderboard.csv" | |
leaderboard_table_file2 = "./Benchmark2/leaderboard.csv" | |
demo = build_demo(leaderboard_table_file1,leaderboard_table_file2) | |
demo.launch(share=args.share) | |