Omartificial-Intelligence-Space's picture
update app.py
7beb781 verified
raw
history blame
10.1 kB
import gradio as gr
from gradio_leaderboard import Leaderboard, ColumnFilter, SelectColumns
import pandas as pd
from apscheduler.schedulers.background import BackgroundScheduler
from huggingface_hub import snapshot_download
from src.about import (
CITATION_BUTTON_LABEL,
CITATION_BUTTON_TEXT,
EVALUATION_QUEUE_TEXT,
INTRODUCTION_TEXT,
LLM_BENCHMARKS_TEXT,
TITLE,
)
from src.display.css_html_js import custom_css
from src.display.utils import (
COLUMNS,
COLS,
BENCHMARK_COLS,
EVAL_COLS,
EVAL_TYPES,
ModelType,
WeightType,
Precision
)
from src.envs import API, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, QUEUE_REPO, REPO_ID, RESULTS_REPO, TOKEN
from src.populate import get_evaluation_queue_df, get_leaderboard_df
from src.submission.submit import add_new_eval
# Add this CSS to make column selection more compact
custom_css_additions = """
.select-columns-container {
max-height: 300px;
overflow-y: auto;
display: grid;
grid-template-columns: repeat(4, 1fr);
gap: 5px;
}
.select-columns-container label {
font-size: 0.9em;
padding: 2px;
margin: 0;
}
.column-categories {
margin-bottom: 10px;
}
"""
# Update your CSS
if 'custom_css' in locals():
custom_css += custom_css_additions
else:
custom_css = custom_css_additions
def restart_space():
API.restart_space(repo_id=REPO_ID)
### Space initialization
try:
print(EVAL_REQUESTS_PATH)
snapshot_download(
repo_id=QUEUE_REPO, local_dir=EVAL_REQUESTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30, token=TOKEN
)
except Exception:
restart_space()
try:
print(EVAL_RESULTS_PATH)
snapshot_download(
repo_id=RESULTS_REPO, local_dir=EVAL_RESULTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30, token=TOKEN
)
except Exception:
restart_space()
# Load the leaderboard DataFrame
LEADERBOARD_DF = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS, BENCHMARK_COLS)
print("LEADERBOARD_DF Shape:", LEADERBOARD_DF.shape) # Debug
print("LEADERBOARD_DF Columns:", LEADERBOARD_DF.columns.tolist()) # Debug
# Load the evaluation queue DataFrames
finished_eval_queue_df, running_eval_queue_df, pending_eval_queue_df = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
# Group columns by category for better organization
COLUMN_CATEGORIES = {
"Model Info": ["model_name", "model_type", "license", "likes", "base_model", "params", "precision", "weight_type", "still_on_hub", "average"],
"Academic Knowledge": ["abstract_algebra", "anatomy", "astronomy", "college_biology", "college_chemistry", "college_computer_science",
"college_mathematics", "college_medicine", "college_physics"],
"General Knowledge": ["business_ethics", "clinical_knowledge", "conceptual_physics", "econometrics", "electrical_engineering",
"elementary_mathematics", "formal_logic", "global_facts"],
"High School Subjects": ["high_school_biology", "high_school_chemistry", "high_school_computer_science",
"high_school_european_history", "high_school_geography", "high_school_government_and_politics"]
}
demo = gr.Blocks(css=custom_css)
with demo:
gr.HTML(TITLE)
gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
with gr.Tabs(elem_classes="tab-buttons") as tabs:
with gr.TabItem("πŸ… LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0):
if LEADERBOARD_DF.empty:
gr.Markdown("No evaluations have been performed yet. The leaderboard is currently empty.")
else:
default_selection = [col.name for col in COLUMNS if col.displayed_by_default]
print("Default Selection before ensuring 'model_name':", default_selection) # Debug
# Ensure "model_name" is included
if "model_name" not in default_selection:
default_selection.insert(0, "model_name")
print("Default Selection after ensuring 'model_name':", default_selection) # Debug
# Create an accordion for column selection
with gr.Accordion("Select Columns to Display", open=False):
column_selections = {}
for category, cols in COLUMN_CATEGORIES.items():
# Filter to only include columns that exist
available_cols = [c for c in cols if c in [col.name for col in COLUMNS]]
if available_cols:
with gr.Column(elem_classes="column-categories"):
gr.Markdown(f"**{category}**")
column_selections[category] = gr.CheckboxGroup(
choices=available_cols,
value=[c for c in available_cols if c in default_selection],
label=""
)
# Create the leaderboard with standard SelectColumns (it will be hidden via CSS)
leaderboard = Leaderboard(
value=LEADERBOARD_DF,
datatype=[col.type for col in COLUMNS],
select_columns=SelectColumns(
default_selection=default_selection,
cant_deselect=[col.name for col in COLUMNS if col.never_hidden],
label="Select Columns to Display:",
render=False, # Don't render the built-in selector if this option is available
),
search_columns=[col.name for col in COLUMNS if col.name in ["model_name", "license"]],
hide_columns=[col.name for col in COLUMNS if col.hidden],
filter_columns=[
ColumnFilter("model_type", type="checkboxgroup", label="Model types"),
ColumnFilter("precision", type="checkboxgroup", label="Precision"),
ColumnFilter(
"still_on_hub", type="boolean", label="Deleted/incomplete", default=True
),
],
bool_checkboxgroup_label="Hide models",
interactive=False,
)
# Add event handlers to update visible columns when custom checkboxes are changed
for category, checkbox_group in column_selections.items():
# For each category, when checkboxes change, update the visible columns
# This might need adjustment based on how the Leaderboard component works
checkbox_group.change(
fn=lambda *values: leaderboard.update(visible_columns=sum(values, [])),
inputs=list(column_selections.values()),
outputs=[leaderboard]
)
with gr.TabItem("πŸ“ About", elem_id="llm-benchmark-tab-table", id=2):
gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
with gr.TabItem("πŸš€ Submit here! ", elem_id="llm-benchmark-tab-table", id=3):
with gr.Column():
with gr.Row():
gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")
# Since the evaluation queues are empty, display a message
with gr.Column():
gr.Markdown("Evaluations are performed immediately upon submission. There are no pending or running evaluations.")
with gr.Row():
gr.Markdown("# βœ‰οΈβœ¨ Submit your model here!", elem_classes="markdown-text")
with gr.Row():
with gr.Column():
model_name_textbox = gr.Textbox(label="Model name")
revision_name_textbox = gr.Textbox(label="Revision commit", placeholder="main")
model_type = gr.Dropdown(
choices=[t.to_str(" : ") for t in ModelType if t != ModelType.Unknown],
label="Model type",
multiselect=False,
value=None,
interactive=True,
)
with gr.Column():
precision = gr.Dropdown(
choices=[i.value for i in Precision if i != Precision.Unknown],
label="Precision",
multiselect=False,
value="float16",
interactive=True,
)
weight_type = gr.Dropdown(
choices=[i.value for i in WeightType],
label="Weights type",
multiselect=False,
value="Original",
interactive=True,
)
base_model_name_textbox = gr.Textbox(label="Base model (for delta or adapter weights)")
submit_button = gr.Button("Submit Eval")
submission_result = gr.Markdown()
submit_button.click(
add_new_eval,
[
model_name_textbox,
base_model_name_textbox,
revision_name_textbox,
precision,
weight_type,
model_type,
],
submission_result,
)
with gr.Row():
with gr.Accordion("πŸ“™ Citation", open=False):
citation_button = gr.Textbox(
value=CITATION_BUTTON_TEXT,
label=CITATION_BUTTON_LABEL,
lines=20,
elem_id="citation-button",
show_copy_button=True,
)
scheduler = BackgroundScheduler()
scheduler.add_job(restart_space, "interval", seconds=1800)
scheduler.start()
demo.queue(default_concurrency_limit=40).launch()