File size: 5,234 Bytes
0547e3e
 
 
e1247b7
 
 
 
 
 
 
 
 
cacf673
e25cebf
cacf673
e25cebf
 
cacf673
d11d433
cacf673
e25cebf
 
cacf673
 
 
 
d11d433
e25cebf
d11d433
 
c945edb
d11d433
e25cebf
d11d433
e25cebf
 
 
 
7fb3cda
d11d433
 
e25cebf
 
 
 
d11d433
 
 
 
d369ab3
e25cebf
d11d433
 
 
c945edb
e1247b7
 
 
 
 
e25cebf
738c269
e1247b7
 
 
 
 
 
 
 
 
 
e25cebf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c945edb
cacf673
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
import gradio as gr
import pandas as pd

from src.about import (
    CITATION_BUTTON_LABEL,
    CITATION_BUTTON_TEXT,
    EVALUATION_QUEUE_TEXT,
    INTRODUCTION_TEXT,
    LLM_BENCHMARKS_TEXT,
    TITLE,
)

data = {
    "Method": [
        "Handwritten TAG",
        "Zero-shot Text2SQL (llama-3.1-70B)",
        "Zero-shot Text2SQL + LM Generation (llama-3.1-70B)",
        "RAG (E5)",
        "RAG (E5) + LM Rerank",
    ],
    # "Model": ["meta-llama/Llama-3.1-70B"] * 5,
    "Execution Accuracy": [55.0, 17.0, 13.0, 0.0, 2.0],
}

leaderboard_df = pd.DataFrame(data)

leaderboard_df = leaderboard_df.sort_values(
    "Execution Accuracy", ascending=False
).reset_index(drop=True)
leaderboard_df.insert(0, "Rank", leaderboard_df.index + 1)


def hyperlink_method(method):
    base_url = "https://github.com/TAG-Research/TAG-Bench/tree/main"
    return f'<a href="{base_url}" target="_blank">{method}</a>'

def hyperlink_model(model):
    base_url = "https://huggingface.co/meta-llama/Llama-3.1-70B"
    return f'<a href="{base_url}" target="_blank">{model}</a>'


leaderboard_df["Method"] = leaderboard_df["Method"].apply(hyperlink_method)
# leaderboard_df["Model"] = leaderboard_df["Model"].apply(hyperlink_model)


with gr.Blocks() as demo:
    gr.HTML(
        """
        <div style="text-align: center;">
            <h1 style="font-size: 2.5rem; margin-bottom: 0.5rem;">TAG Leaderboard</h1>
            <p style="font-size: 1.25rem; color: gray;">Evaluating complex natural language queries over structured data.</p>
        </div>
        """
    )

    with gr.Tabs(elem_classes="tab-buttons") as tabs:
        with gr.TabItem("🏅 LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0):
            with gr.Row():
                gr.Dataframe(
                    value=leaderboard_df,
                    headers=["Rank", "Method", "Execution Accuracy"],
                    datatype=["str", "html",  "number"],
                    row_count=(5, "dynamic"),
                    wrap=True,
                    elem_id="leaderboard",
                    type="pandas"
                )

        with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-table", id=2):
            gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")

        with gr.TabItem("🚀 Submission Instructions ", elem_id="llm-benchmark-tab-table", id=3):
            with gr.Accordion("1️⃣ Required Materials", open=True):
                gr.Markdown(
                    """
                    Ensure the following files are included in your submission:
                    - **output.json**: File containing the evaluation outputs generated by your model. Please refer to [] for format instructions.
                    - **requirements.txt**: A list of dependencies needed to run your model or script.
                    - **README.md**: A detailed description of your submission, including:
                        - Purpose and overview of the submission.
                        - Instructions to reproduce the results.
                        - Any additional notes for evaluators.
                    - **Model/Keys**: Upload your models or API keys to [Hugging Face](https://huggingface.co/) if they are not publicly accessible.
                    
                    **Note**: Submissions missing any of these materials will not be processed.
                    """
                )

            # Section 2: Submission Frequency
            with gr.Accordion("2️⃣ Submission Frequency", open=False):
                gr.Markdown(
                    """
                    - Submissions are accepted **once a month** to ensure sufficient evaluation bandwidth.
                    - Plan your submission timeline accordingly to avoid delays.
                    """
                )

            # Section 3: How to Upload Materials
            with gr.Accordion("3️⃣ How to Upload Materials", open=False):
                gr.Markdown(
                    """
                    Follow these steps to upload your materials:
                    1. Compress all files in the code into a single `.zip` file, or provide a public repository to refer to.
                    2. Email the `.zip` file or repositoty link to our email [email].
                    """
                )

            # Section 4: Submission Process
            with gr.Accordion("4️⃣ Submission Process", open=False):
                gr.Markdown(
                    """
                    After uploading your materials:
                    - 
                    - Provide accurate contact information for follow-ups.
                    - Double-check your materials for completeness to avoid processing delays.
                    
                    **Important:** Your submission will be added to the evaluation queue. Depending on the queue size, evaluations may take up to a few weeks.
                    """
                )

            # Footer
            gr.Markdown(
                """
                <div style="text-align: center; margin-top: 2rem;">
                    For further assistance, reach out to [email] with questions.
                </div>
                """
            )


demo.launch()