File size: 2,701 Bytes
0547e3e
 
 
e1247b7
 
 
 
 
 
 
 
 
 
cacf673
 
 
 
 
 
d11d433
cacf673
d11d433
cacf673
 
d11d433
cacf673
 
d11d433
 
 
 
 
 
 
c945edb
d11d433
 
c945edb
d11d433
 
 
 
 
 
7fb3cda
d11d433
 
 
e1247b7
d11d433
 
 
 
d369ab3
d11d433
 
 
 
e1247b7
c945edb
e1247b7
 
 
 
 
 
 
cbfcc33
e1247b7
 
 
 
 
 
 
 
 
 
 
c945edb
cacf673
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
import gradio as gr
import pandas as pd

from src.about import (
    CITATION_BUTTON_LABEL,
    CITATION_BUTTON_TEXT,
    EVALUATION_QUEUE_TEXT,
    INTRODUCTION_TEXT,
    LLM_BENCHMARKS_TEXT,
    TITLE,
)

# Simplified DataFrame for the leaderboard
data = {
    "Model": [
        "Handwritten TAG",
        "Zero-shot Text2SQL",
        "Zero-shot Text2SQL + LM Generation",
        "RAG (E5)",
        "RAG (E5) + LM Rerank",
    ],
    "Execution Accuracy": ["55%", "17%", "13%", "0%", "2%"],
}

# Create a DataFrame
leaderboard_df = pd.DataFrame(data)

# Convert Execution Accuracy to numeric for sorting
leaderboard_df["Execution Accuracy (numeric)"] = (
    leaderboard_df["Execution Accuracy"].str.rstrip("%").astype(float)
)
leaderboard_df = leaderboard_df.sort_values(
    "Execution Accuracy (numeric)", ascending=False
).reset_index(drop=True)

# Add the Rank column
leaderboard_df.insert(0, "Rank", leaderboard_df.index + 1)

# Drop the numeric column for display
leaderboard_df = leaderboard_df.drop(columns=["Execution Accuracy (numeric)"])

# Add hyperlinks to the Model column
def hyperlink_model(model):
    base_url = "https://github.com/TAG-Research/TAG-Bench/tree/main"
    return f'<a href="{base_url}" target="_blank">{model}</a>'

leaderboard_df["Model"] = leaderboard_df["Model"].apply(hyperlink_model)

# Simplified Gradio app
with gr.Blocks() as demo:
    gr.HTML(
        """
        <div style="text-align: center;">
            <h1 style="font-size: 2.5rem; margin-bottom: 0.5rem;">TAG Leaderboard</h1>
            <p style="font-size: 1.25rem; color: gray;">Comparing baseline approaches for structured data queries</p>
        </div>
        """
    )
    gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")

    with gr.Tabs(elem_classes="tab-buttons") as tabs:
        with gr.TabItem("πŸ… LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0):
            # Highlight the top row in green for "Handwritten TAG"
            with gr.Row():
                gr.Dataframe(
                    value=leaderboard_df,
                    headers=["Model", "Code", "Execution Accuracy"],
                    datatype=["str", "html", "str"],
                    row_count=(5, "dynamic"),
                    wrap=True,
                    elem_id="leaderboard",
                    type="pandas"
                )

        with gr.TabItem("πŸ“ About", elem_id="llm-benchmark-tab-table", id=2):
            gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")

        with gr.TabItem("πŸš€ Submission Instructions ", elem_id="llm-benchmark-tab-table", id=3):
            gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")


demo.launch()