abiswal commited on
Commit
d3a0f24
·
1 Parent(s): 738c269

leaderboard update

Browse files
Files changed (2) hide show
  1. app.py +17 -7
  2. src/about.py +1 -1
app.py CHANGED
@@ -12,7 +12,7 @@ from src.about import (
12
 
13
  data = {
14
  "Method": [
15
- "Handwritten TAG",
16
  "Zero-shot Text2SQL (llama-3.1-70B)",
17
  "Zero-shot Text2SQL + LM Generation (llama-3.1-70B)",
18
  "RAG (E5)",
@@ -27,7 +27,17 @@ leaderboard_df = pd.DataFrame(data)
27
  leaderboard_df = leaderboard_df.sort_values(
28
  "Execution Accuracy", ascending=False
29
  ).reset_index(drop=True)
30
- leaderboard_df.insert(0, "Rank", leaderboard_df.index + 1)
 
 
 
 
 
 
 
 
 
 
31
 
32
 
33
  def hyperlink_method(method):
@@ -87,7 +97,7 @@ with gr.Blocks() as demo:
87
  )
88
 
89
  # Section 2: Submission Frequency
90
- with gr.Accordion("2️⃣ Submission Frequency", open=False):
91
  gr.Markdown(
92
  """
93
  - Submissions are accepted **once a month** to ensure sufficient evaluation bandwidth.
@@ -96,17 +106,17 @@ with gr.Blocks() as demo:
96
  )
97
 
98
  # Section 3: How to Upload Materials
99
- with gr.Accordion("3️⃣ How to Upload Materials", open=False):
100
  gr.Markdown(
101
  """
102
  Follow these steps to upload your materials:
103
  1. Compress all files in the code into a single `.zip` file, or provide a public repository to refer to.
104
- 2. Email the `.zip` file or repositoty link to our email [email].
105
  """
106
  )
107
 
108
  # Section 4: Submission Process
109
- with gr.Accordion("4️⃣ Submission Process", open=False):
110
  gr.Markdown(
111
  """
112
  After uploading your materials:
@@ -122,7 +132,7 @@ with gr.Blocks() as demo:
122
  gr.Markdown(
123
  """
124
  <div style="text-align: center; margin-top: 2rem;">
125
- For further assistance, reach out to [email] with questions.
126
  </div>
127
  """
128
  )
 
12
 
13
  data = {
14
  "Method": [
15
+ "Handwritten LOTUS (llama-3.1-70B)",
16
  "Zero-shot Text2SQL (llama-3.1-70B)",
17
  "Zero-shot Text2SQL + LM Generation (llama-3.1-70B)",
18
  "RAG (E5)",
 
27
  leaderboard_df = leaderboard_df.sort_values(
28
  "Execution Accuracy", ascending=False
29
  ).reset_index(drop=True)
30
+ leaderboard_df.insert(0, "Rank", leaderboard_df.index + 2)
31
+ leaderboard_df.loc[0, "Rank"] = None
32
+
33
+ def highlight_row(row):
34
+ if pd.isna(row["Rank"]): # First row
35
+ return ["background-color: #d4edda; font-weight: bold;" for _ in row]
36
+ return [""] * len(row)
37
+
38
+
39
+ # Apply the style
40
+ leaderboard_df = leaderboard_df.style.apply(highlight_row, axis=1)
41
 
42
 
43
  def hyperlink_method(method):
 
97
  )
98
 
99
  # Section 2: Submission Frequency
100
+ with gr.Accordion("2️⃣ Submission Frequency", open=True):
101
  gr.Markdown(
102
  """
103
  - Submissions are accepted **once a month** to ensure sufficient evaluation bandwidth.
 
106
  )
107
 
108
  # Section 3: How to Upload Materials
109
+ with gr.Accordion("3️⃣ How to Upload Materials", open=True):
110
  gr.Markdown(
111
  """
112
  Follow these steps to upload your materials:
113
  1. Compress all files in the code into a single `.zip` file, or provide a public repository to refer to.
114
+ 2. Email the `.zip` file or repositoty link to our email tagbenchmark@gmail.com.
115
  """
116
  )
117
 
118
  # Section 4: Submission Process
119
+ with gr.Accordion("4️⃣ Submission Process", open=True):
120
  gr.Markdown(
121
  """
122
  After uploading your materials:
 
132
  gr.Markdown(
133
  """
134
  <div style="text-align: center; margin-top: 2rem;">
135
+ For further assistance, reach out to [email protected] with questions.
136
  </div>
137
  """
138
  )
src/about.py CHANGED
@@ -31,7 +31,7 @@ Intro text
31
  # Which evaluations are you running? how can people reproduce what you have?
32
  LLM_BENCHMARKS_TEXT = f"""
33
  ## What does the TAG leaderboard evaluate?
34
- In this leaderboard, you'll find execution accuracy comparisons of table question answering approaches on [TAG-Bench] (https://github.com/TAG-Research/TAG-Bench/tree/main). TAG-Bench contains complex queries requiring world knowledge or semantic reasoning that goes beyond the information explicitly available in the database.
35
 
36
  ## How is accuracy measured?
37
  Execution accuracy is measured as the number of exact matches to our annotated ground truth answers which are hand-labeled by experts.
 
31
  # Which evaluations are you running? how can people reproduce what you have?
32
  LLM_BENCHMARKS_TEXT = f"""
33
  ## What does the TAG leaderboard evaluate?
34
+ In this leaderboard, you'll find execution accuracy comparisons of table question answering approaches on [TAG-Bench](https://github.com/TAG-Research/TAG-Bench/tree/main). TAG-Bench contains complex queries requiring world knowledge or semantic reasoning that goes beyond the information explicitly available in the database.
35
 
36
  ## How is accuracy measured?
37
  Execution accuracy is measured as the number of exact matches to our annotated ground truth answers which are hand-labeled by experts.