Jude Khouja commited on
Commit
6226c1b
·
1 Parent(s): 36ce9ab

Change description and change color or baseline scores

Browse files
Files changed (3) hide show
  1. data_loader.py +2 -2
  2. tabs/leaderboard.py +5 -4
  3. utils.py +31 -0
data_loader.py CHANGED
@@ -269,8 +269,8 @@ HEADER_CONTENT = (
269
  </div>
270
 
271
  <div class="description">
272
- LingOly-TOO (L2) is a challenging reasoning benchmark designed to minimize the chance of answering by guessing.
273
- It is developed by rewriting (obfuscating) parts of questions and answers so that the chance of leakage in training data is minimum.
274
  <div class="highlight-question">
275
  "How do top LLMs reason on unseen linguistic questions?"
276
  </div>
 
269
  </div>
270
 
271
  <div class="description">
272
+ LingOly-TOO (L2) is a challenging linguistics reasoning benchmark designed to counteracts answering without reasoning (e.g. by guessing or memorizing answers).
273
+ We permute <b>Ling</b>uistics <b>Oly</b>mpiad problems with <b>T</b>emplates and <b>O</b>rthographic <b>O</b>bfuscations. By rewriting (obfuscating) parts of questions and answers, the chance of benchmark leakage in training data is minimized.
274
  <div class="highlight-question">
275
  "How do top LLMs reason on unseen linguistic questions?"
276
  </div>
tabs/leaderboard.py CHANGED
@@ -3,13 +3,14 @@ from data_loader import METHODOLOGY
3
  from utils import (
4
  get_rank_badge,
5
  get_score_bar,
 
6
  get_type_badge,
7
  )
8
 
9
  def filter_leaderboard(df, sort_by):
10
  filtered_df = df.copy()
11
 
12
- if sort_by == "Score after obfuscation":
13
  filtered_df = filtered_df.sort_values(by="Obfuscated score", ascending=False)
14
  else:
15
  filtered_df = filtered_df.sort_values(by="Baseline score", ascending=False)
@@ -129,7 +130,7 @@ def filter_leaderboard(df, sort_by):
129
  <td class="vendor-cell">{row['Provider']}</td>
130
  <td>{get_type_badge(row['Type'])}</td>
131
  <td class="score-cell">{get_score_bar(row['Obfuscated score'])}</td>
132
- <td class="score-cell">{get_score_bar(row['Baseline score'])}</td>
133
  </tr>
134
  """
135
 
@@ -143,8 +144,8 @@ def create_leaderboard_tab(df, HEADER_CONTENT, CARDS):
143
  with gr.Row(equal_height=True):
144
  with gr.Column(scale=0.4):
145
  sort_by = gr.Dropdown(
146
- choices=["Score after obfuscation", "Score on all"],
147
- value="Score after obfuscation",
148
  label="Sort by",
149
  )
150
 
 
3
  from utils import (
4
  get_rank_badge,
5
  get_score_bar,
6
+ get_score_bar_secondary,
7
  get_type_badge,
8
  )
9
 
10
  def filter_leaderboard(df, sort_by):
11
  filtered_df = df.copy()
12
 
13
+ if sort_by == "Score on obfuscated questions":
14
  filtered_df = filtered_df.sort_values(by="Obfuscated score", ascending=False)
15
  else:
16
  filtered_df = filtered_df.sort_values(by="Baseline score", ascending=False)
 
130
  <td class="vendor-cell">{row['Provider']}</td>
131
  <td>{get_type_badge(row['Type'])}</td>
132
  <td class="score-cell">{get_score_bar(row['Obfuscated score'])}</td>
133
+ <td class="score-cell">{get_score_bar_secondary(row['Baseline score'])}</td>
134
  </tr>
135
  """
136
 
 
144
  with gr.Row(equal_height=True):
145
  with gr.Column(scale=0.4):
146
  sort_by = gr.Dropdown(
147
+ choices=["Score on obfuscated questions", "Score on all questions"],
148
+ value="Score on obfuscated questions",
149
  label="Sort by",
150
  )
151
 
utils.py CHANGED
@@ -67,6 +67,37 @@ def get_score_bar(score):
67
  ">{width:.1f}</span>
68
  </div>
69
  """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
70
  def get_chart_colors():
71
  # if is_dark_theme():
72
  # return {
 
67
  ">{width:.1f}</span>
68
  </div>
69
  """
70
+
71
+ def get_score_bar_secondary(score):
72
+ """Generate HTML for score bar with gradient styling"""
73
+ width = score * 100
74
+ return f"""
75
+ <div style="display: flex; align-items: center; gap: 12px; width: 100%;">
76
+ <div style="
77
+ flex-grow: 1;
78
+ height: 8px;
79
+ background: var(--score-bg, rgba(255, 255, 255, 0.1));
80
+ border-radius: 4px;
81
+ overflow: hidden;
82
+ max-width: 200px;
83
+ ">
84
+ <div style="
85
+ width: {width}%;
86
+ height: 100%;
87
+ background: linear-gradient(90deg, var(--accent-gray, #1f2937), var(--accent-gray-light, #9ca3af));
88
+ border-radius: 4px;
89
+ transition: width 0.3s ease;
90
+ "></div>
91
+ </div>
92
+ <span style="
93
+ font-family: 'SF Mono', monospace;
94
+ font-weight: 600;
95
+ color: var(--text-primary, #ffffff);
96
+ min-width: 60px;
97
+ ">{width:.1f}</span>
98
+ </div>
99
+ """
100
+
101
  def get_chart_colors():
102
  # if is_dark_theme():
103
  # return {