Spaces:
Running
Running
cleaner update
Browse files
app.py
CHANGED
@@ -1,66 +1,66 @@
|
|
1 |
import gradio as gr
|
2 |
import pandas as pd
|
3 |
|
4 |
-
|
5 |
-
CITATION_BUTTON_LABEL,
|
6 |
-
CITATION_BUTTON_TEXT,
|
7 |
-
EVALUATION_QUEUE_TEXT,
|
8 |
-
INTRODUCTION_TEXT,
|
9 |
-
LLM_BENCHMARKS_TEXT,
|
10 |
-
TITLE,
|
11 |
-
)
|
12 |
-
|
13 |
-
# Simplified DataFrame for the leaderboard
|
14 |
data = {
|
15 |
"Model": [
|
16 |
"Handwritten TAG",
|
17 |
"Zero-shot Text2SQL",
|
18 |
"Zero-shot Text2SQL + LM Generation",
|
19 |
"RAG (E5)",
|
20 |
-
"RAG (E5) + LM Rerank"
|
21 |
-
],
|
22 |
-
"Code": [
|
23 |
-
"", # Handwritten TAG doesn't have a code link
|
24 |
-
"", # Zero-shot Text2SQL doesn't have a code link
|
25 |
-
"", # Zero-shot Text2SQL + LM Generation doesn't have a code link
|
26 |
-
"", # RAG (E5) doesn't have a code link
|
27 |
-
"" # RAG (E5) + LM Rerank doesn't have a code link
|
28 |
],
|
29 |
-
"Execution Accuracy": [
|
30 |
-
"55%", # Handwritten TAG
|
31 |
-
"17%", # Zero-shot Text2SQL
|
32 |
-
"13%", # Zero-shot Text2SQL + LM Generation
|
33 |
-
"0%", # RAG (E5)
|
34 |
-
"2%" # RAG (E5) + LM Rerank
|
35 |
-
]
|
36 |
}
|
37 |
|
|
|
38 |
leaderboard_df = pd.DataFrame(data)
|
39 |
|
40 |
-
#
|
41 |
-
|
42 |
-
|
43 |
-
|
|
|
|
|
|
|
44 |
|
45 |
-
|
46 |
-
|
47 |
-
# Highlight the top row in green for "Handwritten TAG"
|
48 |
-
with gr.Row():
|
49 |
-
gr.Dataframe(
|
50 |
-
value=leaderboard_df,
|
51 |
-
headers=["Model", "Code", "Execution Accuracy"],
|
52 |
-
datatype=["str", "str", "str"],
|
53 |
-
row_count=(5, "dynamic"),
|
54 |
-
wrap=True,
|
55 |
-
elem_id="leaderboard",
|
56 |
-
type="pandas"
|
57 |
-
)
|
58 |
|
59 |
-
|
60 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
61 |
|
62 |
-
|
63 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
64 |
|
|
|
|
|
|
|
|
|
65 |
|
66 |
demo.launch()
|
|
|
1 |
import gradio as gr
|
2 |
import pandas as pd
|
3 |
|
4 |
+
# Simplified leaderboard data
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
data = {
|
6 |
"Model": [
|
7 |
"Handwritten TAG",
|
8 |
"Zero-shot Text2SQL",
|
9 |
"Zero-shot Text2SQL + LM Generation",
|
10 |
"RAG (E5)",
|
11 |
+
"RAG (E5) + LM Rerank",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
],
|
13 |
+
"Execution Accuracy": ["55%", "17%", "13%", "0%", "2%"],
|
|
|
|
|
|
|
|
|
|
|
|
|
14 |
}
|
15 |
|
16 |
+
# Create a DataFrame
|
17 |
leaderboard_df = pd.DataFrame(data)
|
18 |
|
19 |
+
# Convert Execution Accuracy to numeric for sorting
|
20 |
+
leaderboard_df["Execution Accuracy (numeric)"] = (
|
21 |
+
leaderboard_df["Execution Accuracy"].str.rstrip("%").astype(float)
|
22 |
+
)
|
23 |
+
leaderboard_df = leaderboard_df.sort_values(
|
24 |
+
"Execution Accuracy (numeric)", ascending=False
|
25 |
+
).reset_index(drop=True)
|
26 |
|
27 |
+
# Add the Rank column
|
28 |
+
leaderboard_df.insert(0, "Rank", leaderboard_df.index + 1)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
29 |
|
30 |
+
# Drop the numeric column for display
|
31 |
+
leaderboard_df = leaderboard_df.drop(columns=["Execution Accuracy (numeric)"])
|
32 |
+
|
33 |
+
# Add hyperlinks to the Model column
|
34 |
+
def hyperlink_model(model):
|
35 |
+
base_url = "https://github.com/TAG-Research/TAG-Bench/tree/main"
|
36 |
+
return f'<a href="{base_url}" target="_blank">{model}</a>'
|
37 |
+
|
38 |
+
leaderboard_df["Model"] = leaderboard_df["Model"].apply(hyperlink_model)
|
39 |
+
|
40 |
+
# Gradio app
|
41 |
+
with gr.Blocks() as demo:
|
42 |
+
# Title and subtitle
|
43 |
+
gr.HTML(
|
44 |
+
"""
|
45 |
+
<div style="text-align: center;">
|
46 |
+
<h1 style="font-size: 2.5rem; margin-bottom: 0.5rem;">Execution Accuracy Leaderboard</h1>
|
47 |
+
<p style="font-size: 1.25rem; color: gray;">Comparing baseline approaches for structured data queries</p>
|
48 |
+
</div>
|
49 |
+
"""
|
50 |
+
)
|
51 |
|
52 |
+
# Leaderboard table
|
53 |
+
gr.HTML(
|
54 |
+
leaderboard_df.to_html(
|
55 |
+
index=False,
|
56 |
+
escape=False,
|
57 |
+
classes="leaderboard-table",
|
58 |
+
)
|
59 |
+
)
|
60 |
|
61 |
+
# Footer or additional info (optional)
|
62 |
+
gr.Markdown(
|
63 |
+
"Note: Execution accuracy is based on the percentage of correctly answered queries."
|
64 |
+
)
|
65 |
|
66 |
demo.launch()
|