TAG-Leaderboard / app.py
abiswal's picture
update
83bd87e
import gradio as gr
import pandas as pd
from src.about import (
CITATION_BUTTON_LABEL,
CITATION_BUTTON_TEXT,
EVALUATION_QUEUE_TEXT,
INTRODUCTION_TEXT,
LLM_BENCHMARKS_TEXT,
TITLE,
)
data = {
"Method": [
"Human Performance (Handwritten LOTUS Llama-3.1-70B)",
"Zero-shot Text2SQL (Llama-3.1-70B)",
"Zero-shot Text2SQL + LM Generation (Llama-3.1-70B)",
"RAG (E5 + Llama-3.1-70B)",
"RAG (E5) + LM Rerank (Llama-3.1-70B)",
"Human Performance (Handwritten LOTUS GPT-4o)",
"Zero-shot Text2SQL (GPT-4o)",
"Zero-shot Text2SQL + LM Generation (GPT-4o)",
"RAG (E5 + GPT-4o)",
"RAG (E5) + LM Rerank (GPT-4o)",
"Human Performance (Handwritten LOTUS o3-mini)",
"Zero-shot Text2SQL (o3-mini)",
"Zero-shot Text2SQL + LM Generation (o3-mini)",
"RAG (E5 + o3-mini)",
"RAG (E5) + LM Rerank (o3-mini)",
"Zero-shot Text2SQL (Deepseek-R1)",
"Zero-shot Text2SQL + LM Generation (Deepseek-R1)",
],
# "Model": ["meta-llama/Llama-3.1-70B"] * 5,
"Execution Accuracy": [55.0, 17.0, 13.0, 0.0, 2.0, 55.0, 18.0, 15.0, 3.0, 3.0, 65.0, 18.0, 30.0, 7.0, 7.0, 12.0, 0.0],
# "Execution Accuracy": [0.0, 2.0, 55.0, 18.0, 3.0, 3.0, 65.0, 18.0, 7.0, 7.0, 12.0],
}
leaderboard_df = pd.DataFrame(data)
leaderboard_df = leaderboard_df.sort_values(
"Execution Accuracy", ascending=False
).reset_index(drop=True)
leaderboard_df.insert(0, "Rank", leaderboard_df.index - 2)
leaderboard_df.loc[0, "Rank"] = ""
leaderboard_df.loc[1, "Rank"] = ""
leaderboard_df.loc[2, "Rank"] = ""
def hyperlink_method(method):
base_url = "https://github.com/TAG-Research/TAG-Bench/tree/main"
return f'<a href="{base_url}" target="_blank">{method}</a>'
def hyperlink_model(model):
base_url = "https://huggingface.co/meta-llama/Llama-3.1-70B"
return f'<a href="{base_url}" target="_blank">{model}</a>'
leaderboard_df["Method"] = leaderboard_df["Method"].apply(hyperlink_method)
def highlight_row(row):
if row["Rank"] == "": # First row
return ["background-color: #d4edda; font-weight: bold;" for _ in row]
return [""] * len(row)
# Apply the style
leaderboard_df = leaderboard_df.style.apply(highlight_row, axis=1)
# leaderboard_df["Model"] = leaderboard_df["Model"].apply(hyperlink_model)
with gr.Blocks() as demo:
gr.HTML(
"""
<div style="text-align: center;">
<h1 style="font-size: 2.5rem; margin-bottom: 0.5rem;">TAG Leaderboard</h1>
<p style="font-size: 1.25rem; color: gray;">A benchmark for natural language queries over data</p>
</div>
"""
)
with gr.Tabs(elem_classes="tab-buttons") as tabs:
with gr.TabItem("πŸ… LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0):
with gr.Row():
gr.Dataframe(
value=leaderboard_df,
headers=["Rank", "Method", "Execution Accuracy"],
datatype=["str", "html", "number"],
row_count=(5, "dynamic"),
wrap=True,
elem_id="leaderboard",
type="pandas"
)
with gr.TabItem("πŸ“ About", elem_id="llm-benchmark-tab-table", id=2):
gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
with gr.TabItem("πŸš€ Submission Instructions ", elem_id="llm-benchmark-tab-table", id=3):
with gr.Accordion("1️⃣ Required Materials", open=True):
gr.Markdown(
"""
Ensure the following files are included in your submission:
- **output.json**: File containing the evaluation outputs generated by your model. Please refer to [] for format instructions.
- **requirements.txt**: A list of dependencies needed to run your model or script.
- **README.md**: A detailed description of your submission, including:
- Purpose and overview of the submission.
- Instructions to reproduce the results.
- Any additional notes for evaluators.
- **Model/Keys**: Upload your models or API keys to [Hugging Face](https://huggingface.co/) if they are not publicly accessible.
**Note**: Submissions missing any of these materials will not be processed.
"""
)
# Section 2: Submission Frequency
with gr.Accordion("2️⃣ Submission Frequency", open=True):
gr.Markdown(
"""
- Submissions are accepted **once a month** to ensure sufficient evaluation bandwidth.
- Plan your submission timeline accordingly to avoid delays.
"""
)
# Section 3: How to Upload Materials
with gr.Accordion("3️⃣ How to Upload Materials", open=True):
gr.Markdown(
"""
Follow these steps to upload your materials:
1. Compress all files in the code into a single `.zip` file, or provide a public repository to refer to.
2. Email the `.zip` file or repositoty link to our email [email protected].
"""
)
# Section 4: Submission Process
with gr.Accordion("4️⃣ Submission Process", open=True):
gr.Markdown(
"""
After uploading your materials:
-
- Provide accurate contact information for follow-ups.
- Double-check your materials for completeness to avoid processing delays.
**Important:** Your submission will be added to the evaluation queue. Depending on the queue size, evaluations may take up to a few weeks.
"""
)
# Footer
gr.Markdown(
"""
<div style="text-align: center; margin-top: 2rem;">
For further assistance, reach out to [email protected] with questions.
</div>
"""
)
demo.launch()