TAG Leaderboard

import gradio as gr
import pandas as pd

from src.about import (
    CITATION_BUTTON_LABEL,
    CITATION_BUTTON_TEXT,
    EVALUATION_QUEUE_TEXT,
    INTRODUCTION_TEXT,
    LLM_BENCHMARKS_TEXT,
    TITLE,
)

data = {
    "Method": [
        "Human Performance (Handwritten LOTUS Llama-3.1-70B)",
        "Zero-shot Text2SQL (Llama-3.1-70B)",
        "Zero-shot Text2SQL + LM Generation (Llama-3.1-70B)",
        "RAG (E5 + Llama-3.1-70B)",
        "RAG (E5) + LM Rerank (Llama-3.1-70B)",
        "Human Performance (Handwritten LOTUS GPT-4o)",
        "Zero-shot Text2SQL (GPT-4o)",
       "Zero-shot Text2SQL + LM Generation (GPT-4o)",
        "RAG (E5 + GPT-4o)",
        "RAG (E5) + LM Rerank (GPT-4o)",
        "Human Performance (Handwritten LOTUS o3-mini)",
        "Zero-shot Text2SQL (o3-mini)",
        "Zero-shot Text2SQL + LM Generation (o3-mini)",
        "RAG (E5 + o3-mini)",
        "RAG (E5) + LM Rerank (o3-mini)",
        "Zero-shot Text2SQL (Deepseek-R1)",
        "Zero-shot Text2SQL + LM Generation (Deepseek-R1)",
    ],
    # "Model": ["meta-llama/Llama-3.1-70B"] * 5,
    "Execution Accuracy": [55.0, 17.0, 13.0, 0.0, 2.0, 55.0, 18.0, 15.0, 3.0, 3.0, 65.0, 18.0, 30.0, 7.0, 7.0, 12.0, 0.0],
    # "Execution Accuracy": [0.0, 2.0, 55.0, 18.0, 3.0, 3.0, 65.0, 18.0, 7.0, 7.0, 12.0],
}

leaderboard_df = pd.DataFrame(data)

leaderboard_df = leaderboard_df.sort_values(
    "Execution Accuracy", ascending=False
).reset_index(drop=True)
leaderboard_df.insert(0, "Rank", leaderboard_df.index - 2)
leaderboard_df.loc[0, "Rank"] = ""
leaderboard_df.loc[1, "Rank"] = ""
leaderboard_df.loc[2, "Rank"] = ""

def hyperlink_method(method):
    base_url = "https://github.com/TAG-Research/TAG-Bench/tree/main"
    return f'<a href="{base_url}" target="_blank">{method}</a>'

def hyperlink_model(model):
    base_url = "https://huggingface.co/meta-llama/Llama-3.1-70B"
    return f'<a href="{base_url}" target="_blank">{model}</a>'

leaderboard_df["Method"] = leaderboard_df["Method"].apply(hyperlink_method)

def highlight_row(row):
    if row["Rank"] == "":  # First row
        return ["background-color: #d4edda; font-weight: bold;" for _ in row]
    return [""] * len(row)


# Apply the style
leaderboard_df = leaderboard_df.style.apply(highlight_row, axis=1)


# leaderboard_df["Model"] = leaderboard_df["Model"].apply(hyperlink_model)


with gr.Blocks() as demo:
    gr.HTML(
        """
        <div style="text-align: center;">
            <h1 style="font-size: 2.5rem; margin-bottom: 0.5rem;">TAG Leaderboard</h1>
            <p style="font-size: 1.25rem; color: gray;">A benchmark for natural language queries over data</p>
        </div>
        """
    )

    with gr.Tabs(elem_classes="tab-buttons") as tabs:
        with gr.TabItem("🏅 LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0):
            with gr.Row():
                gr.Dataframe(
                    value=leaderboard_df,
                    headers=["Rank", "Method", "Execution Accuracy"],
                    datatype=["str", "html",  "number"],
                    row_count=(5, "dynamic"),
                    wrap=True,
                    elem_id="leaderboard",
                    type="pandas"
                )

        with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-table", id=2):
            gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")

        with gr.TabItem("🚀 Submission Instructions ", elem_id="llm-benchmark-tab-table", id=3):
            with gr.Accordion("1️⃣ Required Materials", open=True):
                gr.Markdown(
                    """
                    Ensure the following files are included in your submission:
                    - **output.json**: File containing the evaluation outputs generated by your model. Please refer to [] for format instructions.
                    - **requirements.txt**: A list of dependencies needed to run your model or script.
                    - **README.md**: A detailed description of your submission, including:
                        - Purpose and overview of the submission.
                        - Instructions to reproduce the results.
                        - Any additional notes for evaluators.
                    - **Model/Keys**: Upload your models or API keys to [Hugging Face](https://huggingface.co/) if they are not publicly accessible.
                    
                    **Note**: Submissions missing any of these materials will not be processed.
                    """
                )

            # Section 2: Submission Frequency
            with gr.Accordion("2️⃣ Submission Frequency", open=True):
                gr.Markdown(
                    """
                    - Submissions are accepted **once a month** to ensure sufficient evaluation bandwidth.
                    - Plan your submission timeline accordingly to avoid delays.
                    """
                )

            # Section 3: How to Upload Materials
            with gr.Accordion("3️⃣ How to Upload Materials", open=True):
                gr.Markdown(
                    """
                    Follow these steps to upload your materials:
                    1. Compress all files in the code into a single `.zip` file, or provide a public repository to refer to.
                    2. Email the `.zip` file or repositoty link to our email tagbenchmark@gmail.com.
                    """
                )

            # Section 4: Submission Process
            with gr.Accordion("4️⃣ Submission Process", open=True):
                gr.Markdown(
                    """
                    After uploading your materials:
                    - 
                    - Provide accurate contact information for follow-ups.
                    - Double-check your materials for completeness to avoid processing delays.
                    
                    **Important:** Your submission will be added to the evaluation queue. Depending on the queue size, evaluations may take up to a few weeks.
                    """
                )

            # Footer
            gr.Markdown(
                """
                <div style="text-align: center; margin-top: 2rem;">
                    For further assistance, reach out to tagbenchmark@gmail.com with questions.
                </div>
                """
            )


demo.launch()