|
import gradio as gr |
|
import pandas as pd |
|
from src.display.about import ( |
|
CITATION_BUTTON_LABEL, |
|
CITATION_BUTTON_TEXT, |
|
EVALUATION_QUEUE_TEXT, |
|
INTRODUCTION_TEXT, |
|
LLM_BENCHMARKS_TEXT, |
|
FAQ_TEXT, |
|
TITLE, |
|
) |
|
from src.display.css_html_js import custom_css |
|
from src.display.utils import ( |
|
BENCHMARK_COLS, |
|
COLS, |
|
EVAL_COLS, |
|
EVAL_TYPES, |
|
NUMERIC_INTERVALS, |
|
TYPES, |
|
AutoEvalColumn, |
|
ModelType, |
|
fields, |
|
WeightType, |
|
Precision |
|
) |
|
from src.envs import API, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, H4_TOKEN, IS_PUBLIC, QUEUE_REPO, REPO_ID, RESULTS_REPO |
|
from PIL import Image |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
from dummydatagen import dummy_data_for_plot, create_metric_plot_obj_1, dummydf |
|
import copy |
|
|
|
|
|
def restart_space(): |
|
API.restart_space(repo_id=REPO_ID, token=H4_TOKEN) |
|
|
|
|
|
def add_average_col(df): |
|
|
|
always_here_cols = [ |
|
"Model", "Agent", "Opponent Model", "Opponent Agent" |
|
] |
|
desired_col = [i for i in list(df.columns) if i not in always_here_cols] |
|
newdf = df[desired_col].mean(axis=1).round(3) |
|
return newdf |
|
|
|
|
|
gtbench_raw_data = dummydf() |
|
gtbench_raw_data["Average"] = add_average_col(gtbench_raw_data) |
|
|
|
column_to_move = "Average" |
|
|
|
gtbench_raw_data.insert( |
|
4, column_to_move, gtbench_raw_data.pop(column_to_move)) |
|
|
|
models = list(set(gtbench_raw_data['Model'])) |
|
|
|
opponent_models = list(set(gtbench_raw_data['Opponent Model'])) |
|
|
|
|
|
agents = list(set(gtbench_raw_data['Agent'])) |
|
|
|
|
|
opponent_agents = list(set(gtbench_raw_data['Opponent Agent'])) |
|
|
|
|
|
|
|
|
|
def update_table( |
|
hidden_df: pd.DataFrame, |
|
columns: list, |
|
model1: list, |
|
model2: list, |
|
agent1: list, |
|
agent2: list |
|
): |
|
|
|
filtered_df = select_columns(hidden_df, columns) |
|
|
|
filtered_df = filter_model1(filtered_df, model1) |
|
filtered_df = filter_model2(filtered_df, model2) |
|
filtered_df = filter_agent1(filtered_df, agent1) |
|
filtered_df = filter_agent2(filtered_df, agent2) |
|
|
|
return filtered_df |
|
|
|
|
|
|
|
|
|
def load_query(request: gr.Request): |
|
query = request.query_params.get("query") or "" |
|
return query, query |
|
|
|
|
|
def search_table(df: pd.DataFrame, query: str) -> pd.DataFrame: |
|
return df[(df[AutoEvalColumn.dummy.name].str.contains(query, case=False))] |
|
|
|
|
|
def select_columns(df: pd.DataFrame, columns: list) -> pd.DataFrame: |
|
always_here_cols = [ |
|
"Model", "Agent", "Opponent Model", "Opponent Agent" |
|
] |
|
|
|
all_columns = games |
|
|
|
if len(columns) == 0: |
|
filtered_df = df[ |
|
always_here_cols + |
|
[c for c in all_columns if c in df.columns] |
|
] |
|
filtered_df["Average"] = add_average_col(filtered_df) |
|
column_to_move = "Average" |
|
current_index = filtered_df.columns.get_loc(column_to_move) |
|
|
|
|
|
filtered_df.insert(4, column_to_move, filtered_df.pop(column_to_move)) |
|
return filtered_df |
|
|
|
filtered_df = df[ |
|
always_here_cols + |
|
[c for c in all_columns if c in df.columns and c in columns] |
|
] |
|
if "Average" in columns: |
|
filtered_df["Average"] = add_average_col(filtered_df) |
|
|
|
column_to_move = "Average" |
|
current_index = filtered_df.columns.get_loc(column_to_move) |
|
|
|
|
|
filtered_df.insert(4, column_to_move, filtered_df.pop(column_to_move)) |
|
else: |
|
if "Average" in filtered_df.columns: |
|
|
|
filtered_df = filtered_df.drop(columns=["Average"]) |
|
|
|
return filtered_df |
|
|
|
|
|
def filter_model1( |
|
df: pd.DataFrame, model_query: list |
|
) -> pd.DataFrame: |
|
|
|
if len(model_query) == 0: |
|
return df |
|
filtered_df = df |
|
|
|
filtered_df = filtered_df[filtered_df["Model"].isin( |
|
model_query)] |
|
return filtered_df |
|
|
|
|
|
def filter_model2( |
|
df: pd.DataFrame, model_query: list |
|
) -> pd.DataFrame: |
|
|
|
if len(model_query) == 0: |
|
return df |
|
filtered_df = df |
|
|
|
filtered_df = filtered_df[filtered_df["Opponent Model"].isin( |
|
model_query)] |
|
return filtered_df |
|
|
|
|
|
def filter_agent1( |
|
df: pd.DataFrame, agent_query: list |
|
) -> pd.DataFrame: |
|
|
|
if len(agent_query) == 0: |
|
return df |
|
filtered_df = df |
|
|
|
filtered_df = filtered_df[filtered_df["Agent"].isin( |
|
agent_query)] |
|
return filtered_df |
|
|
|
|
|
def filter_agent2( |
|
df: pd.DataFrame, agent_query: list |
|
) -> pd.DataFrame: |
|
|
|
if len(agent_query) == 0: |
|
return df |
|
filtered_df = df |
|
|
|
filtered_df = filtered_df[filtered_df["Opponent Agent"].isin( |
|
agent_query)] |
|
return filtered_df |
|
|
|
|
|
|
|
|
|
|
|
class LLM_Model: |
|
def __init__(self, t_value, model_value, average_value, arc_value, hellaSwag_value, mmlu_value) -> None: |
|
self.t = t_value |
|
self.model = model_value |
|
self.average = average_value |
|
self.arc = arc_value |
|
self.hellaSwag = hellaSwag_value |
|
self.mmlu = mmlu_value |
|
|
|
|
|
games = ["Breakthrough", "Connect Four", "Blind Auction", "Kuhn Poker", |
|
"Liar's Dice", "Negotiation", "Nim", "Pig", "Iterated Prisoner's Dilemma", "Tic-Tac-Toe"] |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
demo = gr.Blocks(css=custom_css) |
|
|
|
|
|
def load_image(image_path): |
|
image = Image.open(image_path) |
|
return image |
|
|
|
|
|
with demo: |
|
with gr.Row(): |
|
gr.Image("./assets/logo.png", height="200px", width="200px", scale=0.1, |
|
show_download_button=False, container=False) |
|
gr.HTML(TITLE, elem_id="title") |
|
|
|
gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text") |
|
with gr.Tabs(elem_classes="tab-buttons") as tabs: |
|
with gr.TabItem("π
UnlearnCanvas Benchmark", elem_id="llm-benchmark-tab-table", id=0): |
|
with gr.Row(): |
|
with gr.Column(): |
|
with gr.Row(): |
|
|
|
shown_columns = gr.CheckboxGroup( |
|
choices=[ |
|
'Average' |
|
]+games, |
|
label="Select columns to show", |
|
elem_id="column-select", |
|
interactive=True, |
|
) |
|
with gr.Column(min_width=320): |
|
|
|
model1_column = gr.CheckboxGroup( |
|
label="Model", |
|
choices=models, |
|
interactive=True, |
|
elem_id="filter-columns-type", |
|
) |
|
|
|
agent1_column = gr.CheckboxGroup( |
|
label="Agents", |
|
choices=agents, |
|
interactive=True, |
|
elem_id="filter-columns-precision", |
|
) |
|
|
|
model2_column = gr.CheckboxGroup( |
|
label="Opponent Model", |
|
choices=opponent_models, |
|
interactive=True, |
|
elem_id="filter-columns-type", |
|
) |
|
agent2_column = gr.CheckboxGroup( |
|
label="Opponent Agents", |
|
choices=opponent_agents, |
|
interactive=True, |
|
elem_id="filter-columns-precision", |
|
) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
leaderboard_table = gr.components.Dataframe( |
|
value=gtbench_raw_data, |
|
elem_id="leaderboard-table", |
|
interactive=False, |
|
visible=True, |
|
|
|
) |
|
|
|
game_bench_df_for_search = gr.components.Dataframe( |
|
value=gtbench_raw_data, |
|
elem_id="leaderboard-table", |
|
interactive=False, |
|
visible=False, |
|
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
for selector in [shown_columns, model1_column, model2_column, agent1_column, agent2_column]: |
|
selector.change( |
|
update_table, |
|
[ |
|
game_bench_df_for_search, |
|
shown_columns, |
|
model1_column, |
|
model2_column, |
|
agent1_column, |
|
agent2_column |
|
|
|
|
|
|
|
|
|
|
|
], |
|
leaderboard_table, |
|
queue=True, |
|
) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
with gr.TabItem("π About", elem_id="llm-benchmark-tab-table", id=2): |
|
gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text") |
|
gr.Markdown(FAQ_TEXT, elem_classes="markdown-text") |
|
|
|
''' |
|
with gr.TabItem("π Submit here! ", elem_id="llm-benchmark-tab-table", id=3): |
|
with gr.Column(): |
|
with gr.Row(): |
|
gr.Markdown(EVALUATION_QUEUE_TEXT, |
|
elem_classes="markdown-text") |
|
|
|
with gr.Column(): |
|
with gr.Accordion( |
|
f"β
Finished Evaluations ({9})", |
|
open=False, |
|
): |
|
with gr.Row(): |
|
finished_eval_table = gr.components.Dataframe( |
|
value=None, |
|
headers=EVAL_COLS, |
|
datatype=EVAL_TYPES, |
|
row_count=5, |
|
) |
|
with gr.Accordion( |
|
f"π Running Evaluation Queue ({5})", |
|
open=False, |
|
): |
|
with gr.Row(): |
|
running_eval_table = gr.components.Dataframe( |
|
value=None, |
|
headers=EVAL_COLS, |
|
datatype=EVAL_TYPES, |
|
row_count=5, |
|
) |
|
|
|
with gr.Accordion( |
|
f"β³ Pending Evaluation Queue ({7})", |
|
open=False, |
|
): |
|
with gr.Row(): |
|
pending_eval_table = gr.components.Dataframe( |
|
value=None, |
|
headers=EVAL_COLS, |
|
datatype=EVAL_TYPES, |
|
row_count=5, |
|
) |
|
with gr.Row(): |
|
gr.Markdown("# βοΈβ¨ Submit your Agent here!", |
|
elem_classes="markdown-text") |
|
|
|
with gr.Row(): |
|
with gr.Column(): |
|
model_name_textbox = gr.Textbox(label="Agent name") |
|
# revision_name_textbox = gr.Textbox( |
|
# label="Revision commit", placeholder="main") |
|
# private = gr.Checkbox( |
|
# False, label="Private", visible=not IS_PUBLIC) |
|
model_type = gr.Dropdown( |
|
choices=[t.to_str(" : ") |
|
for t in ModelType if t != ModelType.Unknown], |
|
label="Agent type", |
|
multiselect=False, |
|
value=ModelType.FT.to_str(" : "), |
|
interactive=True, |
|
) |
|
|
|
# with gr.Column(): |
|
# precision = gr.Dropdown( |
|
# choices=[i.value.name for i in Precision if i != |
|
# Precision.Unknown], |
|
# label="Precision", |
|
# multiselect=False, |
|
# value="float16", |
|
# interactive=True, |
|
# ) |
|
# weight_type = gr.Dropdown( |
|
# choices=[i.value.name for i in WeightType], |
|
# label="Weights type", |
|
# multiselect=False, |
|
# value="Original", |
|
# interactive=True, |
|
# ) |
|
# base_model_name_textbox = gr.Textbox( |
|
# label="Base model (for delta or adapter weights)") |
|
|
|
submit_button = gr.Button("Submit Eval") |
|
submission_result = gr.Markdown() |
|
# submit_button.click( |
|
# add_new_eval, |
|
# [ |
|
# model_name_textbox, |
|
# base_model_name_textbox, |
|
# revision_name_textbox, |
|
# precision, |
|
# private, |
|
# weight_type, |
|
# model_type, |
|
# ], |
|
# submission_result, |
|
# ) |
|
|
|
''' |
|
with gr.Row(): |
|
with gr.Accordion("π Citation", open=False): |
|
citation_button = gr.Textbox( |
|
value=CITATION_BUTTON_TEXT, |
|
label=CITATION_BUTTON_LABEL, |
|
lines=20, |
|
elem_id="citation-button", |
|
show_copy_button=True, |
|
) |
|
|
|
|
|
|
|
|
|
demo.launch() |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|