Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Commit
·
727eb6f
1
Parent(s):
f477fda
added Validated tab
Browse files- app.py +24 -6
- dabstep_benchmark/leaderboard.py +5 -2
app.py
CHANGED
@@ -18,13 +18,31 @@ if __name__ == "__main__":
|
|
18 |
with demo:
|
19 |
gr.Markdown(TITLE)
|
20 |
gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
|
|
|
|
|
|
|
21 |
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
28 |
)
|
29 |
# create a Gradio event listener that runs when the page is loaded to populate the dataframe
|
30 |
demo.load(lambda: generate_leaderboard_df(), None, leaderboard_table)
|
|
|
18 |
with demo:
|
19 |
gr.Markdown(TITLE)
|
20 |
gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
|
21 |
+
|
22 |
+
# Generate leaderboard data once
|
23 |
+
leaderboard_df = generate_leaderboard_df()
|
24 |
|
25 |
+
# Filter validated and unvalidated
|
26 |
+
validated = leaderboard_df[leaderboard_df["validated"] == True].drop(columns=["validated"]).copy()
|
27 |
+
unvalidated = leaderboard_df[leaderboard_df["validated"] == False].drop(columns=["validated"]).copy()
|
28 |
+
|
29 |
+
|
30 |
+
with gr.Tab("Validated"):
|
31 |
+
leaderboard_table = gr.components.Dataframe(
|
32 |
+
value=validated,
|
33 |
+
datatype=["markdown", "str", "str", "str", "markdown", "str", "str", "str"],
|
34 |
+
interactive=False,
|
35 |
+
column_widths=["20%"],
|
36 |
+
wrap=True,
|
37 |
+
)
|
38 |
+
|
39 |
+
with gr.Tab("Unvalidated"):
|
40 |
+
leaderboard_table = gr.components.Dataframe(
|
41 |
+
value=unvalidated,
|
42 |
+
datatype=["markdown", "str", "str", "str", "markdown", "str", "str", "str"],
|
43 |
+
interactive=False,
|
44 |
+
column_widths=["20%"],
|
45 |
+
wrap=True,
|
46 |
)
|
47 |
# create a Gradio event listener that runs when the page is loaded to populate the dataframe
|
48 |
demo.load(lambda: generate_leaderboard_df(), None, leaderboard_table)
|
dabstep_benchmark/leaderboard.py
CHANGED
@@ -154,6 +154,7 @@ def process_submission(
|
|
154 |
submission_df["organisation"] = f"{organisation} | user {profile.username}"
|
155 |
submission_df["repo_url"] = repo_url
|
156 |
submission_df["date"] = datetime.date.today().strftime("%d-%m-%Y")
|
|
|
157 |
|
158 |
# add empty reasoning trace if one is not provided to not break schema of datasets
|
159 |
if "reasoning_trace" not in submission_df.columns:
|
@@ -242,7 +243,8 @@ def generate_leaderboard_df() -> pd.DataFrame:
|
|
242 |
"model_family",
|
243 |
"organisation",
|
244 |
"repo_url",
|
245 |
-
"date"
|
|
|
246 |
]
|
247 |
]
|
248 |
)
|
@@ -288,7 +290,8 @@ def generate_leaderboard_df() -> pd.DataFrame:
|
|
288 |
"organisation": "Organization",
|
289 |
"repo_url": "Repo URL",
|
290 |
"model_family": "Model Family",
|
291 |
-
"date": "Date"
|
|
|
292 |
}
|
293 |
col_order = [new_col_name for new_col_name in col_map.values()]
|
294 |
leaderboard_df.rename(columns=col_map, inplace=True)
|
|
|
154 |
submission_df["organisation"] = f"{organisation} | user {profile.username}"
|
155 |
submission_df["repo_url"] = repo_url
|
156 |
submission_df["date"] = datetime.date.today().strftime("%d-%m-%Y")
|
157 |
+
submission_df["validated"] = False #unvalidated by default
|
158 |
|
159 |
# add empty reasoning trace if one is not provided to not break schema of datasets
|
160 |
if "reasoning_trace" not in submission_df.columns:
|
|
|
243 |
"model_family",
|
244 |
"organisation",
|
245 |
"repo_url",
|
246 |
+
"date",
|
247 |
+
"validated"
|
248 |
]
|
249 |
]
|
250 |
)
|
|
|
290 |
"organisation": "Organization",
|
291 |
"repo_url": "Repo URL",
|
292 |
"model_family": "Model Family",
|
293 |
+
"date": "Date",
|
294 |
+
"validated": "validated"
|
295 |
}
|
296 |
col_order = [new_col_name for new_col_name in col_map.values()]
|
297 |
leaderboard_df.rename(columns=col_map, inplace=True)
|