Spaces:
Sleeping
Sleeping
leaderboard update
Browse files- app.py +17 -7
- src/about.py +1 -1
app.py
CHANGED
@@ -12,7 +12,7 @@ from src.about import (
|
|
12 |
|
13 |
data = {
|
14 |
"Method": [
|
15 |
-
"Handwritten
|
16 |
"Zero-shot Text2SQL (llama-3.1-70B)",
|
17 |
"Zero-shot Text2SQL + LM Generation (llama-3.1-70B)",
|
18 |
"RAG (E5)",
|
@@ -27,7 +27,17 @@ leaderboard_df = pd.DataFrame(data)
|
|
27 |
leaderboard_df = leaderboard_df.sort_values(
|
28 |
"Execution Accuracy", ascending=False
|
29 |
).reset_index(drop=True)
|
30 |
-
leaderboard_df.insert(0, "Rank", leaderboard_df.index +
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
31 |
|
32 |
|
33 |
def hyperlink_method(method):
|
@@ -87,7 +97,7 @@ with gr.Blocks() as demo:
|
|
87 |
)
|
88 |
|
89 |
# Section 2: Submission Frequency
|
90 |
-
with gr.Accordion("2️⃣ Submission Frequency", open=
|
91 |
gr.Markdown(
|
92 |
"""
|
93 |
- Submissions are accepted **once a month** to ensure sufficient evaluation bandwidth.
|
@@ -96,17 +106,17 @@ with gr.Blocks() as demo:
|
|
96 |
)
|
97 |
|
98 |
# Section 3: How to Upload Materials
|
99 |
-
with gr.Accordion("3️⃣ How to Upload Materials", open=
|
100 |
gr.Markdown(
|
101 |
"""
|
102 |
Follow these steps to upload your materials:
|
103 |
1. Compress all files in the code into a single `.zip` file, or provide a public repository to refer to.
|
104 |
-
2. Email the `.zip` file or repositoty link to our email
|
105 |
"""
|
106 |
)
|
107 |
|
108 |
# Section 4: Submission Process
|
109 |
-
with gr.Accordion("4️⃣ Submission Process", open=
|
110 |
gr.Markdown(
|
111 |
"""
|
112 |
After uploading your materials:
|
@@ -122,7 +132,7 @@ with gr.Blocks() as demo:
|
|
122 |
gr.Markdown(
|
123 |
"""
|
124 |
<div style="text-align: center; margin-top: 2rem;">
|
125 |
-
For further assistance, reach out to
|
126 |
</div>
|
127 |
"""
|
128 |
)
|
|
|
12 |
|
13 |
data = {
|
14 |
"Method": [
|
15 |
+
"Handwritten LOTUS (llama-3.1-70B)",
|
16 |
"Zero-shot Text2SQL (llama-3.1-70B)",
|
17 |
"Zero-shot Text2SQL + LM Generation (llama-3.1-70B)",
|
18 |
"RAG (E5)",
|
|
|
27 |
leaderboard_df = leaderboard_df.sort_values(
|
28 |
"Execution Accuracy", ascending=False
|
29 |
).reset_index(drop=True)
|
30 |
+
leaderboard_df.insert(0, "Rank", leaderboard_df.index + 2)
|
31 |
+
leaderboard_df.loc[0, "Rank"] = None
|
32 |
+
|
33 |
+
def highlight_row(row):
|
34 |
+
if pd.isna(row["Rank"]): # First row
|
35 |
+
return ["background-color: #d4edda; font-weight: bold;" for _ in row]
|
36 |
+
return [""] * len(row)
|
37 |
+
|
38 |
+
|
39 |
+
# Apply the style
|
40 |
+
leaderboard_df = leaderboard_df.style.apply(highlight_row, axis=1)
|
41 |
|
42 |
|
43 |
def hyperlink_method(method):
|
|
|
97 |
)
|
98 |
|
99 |
# Section 2: Submission Frequency
|
100 |
+
with gr.Accordion("2️⃣ Submission Frequency", open=True):
|
101 |
gr.Markdown(
|
102 |
"""
|
103 |
- Submissions are accepted **once a month** to ensure sufficient evaluation bandwidth.
|
|
|
106 |
)
|
107 |
|
108 |
# Section 3: How to Upload Materials
|
109 |
+
with gr.Accordion("3️⃣ How to Upload Materials", open=True):
|
110 |
gr.Markdown(
|
111 |
"""
|
112 |
Follow these steps to upload your materials:
|
113 |
1. Compress all files in the code into a single `.zip` file, or provide a public repository to refer to.
|
114 |
+
2. Email the `.zip` file or repositoty link to our email tagbenchmark@gmail.com.
|
115 |
"""
|
116 |
)
|
117 |
|
118 |
# Section 4: Submission Process
|
119 |
+
with gr.Accordion("4️⃣ Submission Process", open=True):
|
120 |
gr.Markdown(
|
121 |
"""
|
122 |
After uploading your materials:
|
|
|
132 |
gr.Markdown(
|
133 |
"""
|
134 |
<div style="text-align: center; margin-top: 2rem;">
|
135 |
+
For further assistance, reach out to [email protected] with questions.
|
136 |
</div>
|
137 |
"""
|
138 |
)
|
src/about.py
CHANGED
@@ -31,7 +31,7 @@ Intro text
|
|
31 |
# Which evaluations are you running? how can people reproduce what you have?
|
32 |
LLM_BENCHMARKS_TEXT = f"""
|
33 |
## What does the TAG leaderboard evaluate?
|
34 |
-
In this leaderboard, you'll find execution accuracy comparisons of table question answering approaches on [TAG-Bench]
|
35 |
|
36 |
## How is accuracy measured?
|
37 |
Execution accuracy is measured as the number of exact matches to our annotated ground truth answers which are hand-labeled by experts.
|
|
|
31 |
# Which evaluations are you running? how can people reproduce what you have?
|
32 |
LLM_BENCHMARKS_TEXT = f"""
|
33 |
## What does the TAG leaderboard evaluate?
|
34 |
+
In this leaderboard, you'll find execution accuracy comparisons of table question answering approaches on [TAG-Bench](https://github.com/TAG-Research/TAG-Bench/tree/main). TAG-Bench contains complex queries requiring world knowledge or semantic reasoning that goes beyond the information explicitly available in the database.
|
35 |
|
36 |
## How is accuracy measured?
|
37 |
Execution accuracy is measured as the number of exact matches to our annotated ground truth answers which are hand-labeled by experts.
|