Spaces:
Runtime error
Runtime error
File size: 3,154 Bytes
359f755 77c0f20 359f755 77c0f20 70ea05e 359f755 77c0f20 359f755 77c0f20 359f755 70ea05e 77c0f20 70ea05e 77c0f20 359f755 77c0f20 359f755 77c0f20 359f755 77c0f20 359f755 77c0f20 359f755 70ea05e 77c0f20 70ea05e 77c0f20 70ea05e 77c0f20 359f755 77c0f20 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 |
import gradio as gr
from gradio_leaderboard import Leaderboard
import pandas as pd
from huggingface_hub import snapshot_download
from src.about import (
INTRODUCTION_TEXT,
LLM_BENCHMARKS_TEXT,
TITLE,
)
from src.display.css_html_js import custom_css
from src.display.utils import (
BENCHMARK_COLS,
COLS,
AutoEvalColumn,
)
from src.envs import API, EVAL_RESULTS_PATH, RESULTS_REPO, TOKEN
from src.populate import get_leaderboard_df
from src.evaluation.dynamic_eval import run_dynamic_perplexity_eval
def init_leaderboard(dataframe):
if dataframe is None or dataframe.empty:
raise ValueError("Leaderboard DataFrame is empty or None.")
return Leaderboard(
dataframe,
headers=COLS,
column_config={
AutoEvalColumn.model.name: "markdown",
},
)
def run_perplexity_test(model_name, revision, precision):
"""Run perplexity evaluation on demand."""
if not model_name:
return "Please enter a model name."
success, result = run_dynamic_perplexity_eval(model_name, revision, precision)
if success:
return f"β
Perplexity evaluation completed!\nPerplexity: {result:.4f}\n\nResults have been saved and will appear in the leaderboard shortly."
else:
return f"β Evaluation failed: {result}"
# Initialize results directory
try:
print(EVAL_RESULTS_PATH)
snapshot_download(
repo_id=RESULTS_REPO, local_dir=EVAL_RESULTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30, token=TOKEN
)
except Exception as e:
print(f"Error initializing results: {e}")
# Get initial leaderboard data
LEADERBOARD_DF = get_leaderboard_df(EVAL_RESULTS_PATH, COLS, BENCHMARK_COLS)
# Create the Gradio interface
demo = gr.Blocks(css=custom_css)
with demo:
gr.HTML(TITLE)
gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
with gr.Tabs(elem_classes="tab-buttons") as tabs:
with gr.TabItem("π
Leaderboard", elem_id="leaderboard-tab", id=0):
leaderboard = init_leaderboard(LEADERBOARD_DF)
with gr.TabItem("π About", elem_id="about-tab", id=1):
gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
with gr.TabItem("π§ͺ Test Model", elem_id="test-model-tab", id=2):
with gr.Row():
with gr.Column():
model_name = gr.Textbox(label="Model name", placeholder="org/model-name")
revision = gr.Textbox(label="Revision", placeholder="main", value="main")
precision = gr.Dropdown(
choices=["float16", "bfloat16"],
label="Precision",
value="float16"
)
with gr.Column():
test_button = gr.Button("π Run Perplexity Test", variant="primary")
result = gr.Markdown()
test_button.click(
run_perplexity_test,
[model_name, revision, precision],
result
)
demo.queue(default_concurrency_limit=5).launch() |