Update space
Browse files- app.py +20 -1
- src/leaderboard/read_evals.py +1 -0
app.py
CHANGED
|
@@ -97,6 +97,25 @@ def init_leaderboard(dataframe):
|
|
| 97 |
)
|
| 98 |
|
| 99 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 100 |
demo = gr.Blocks(css=custom_css)
|
| 101 |
with demo:
|
| 102 |
gr.HTML(TITLE)
|
|
@@ -104,7 +123,7 @@ with demo:
|
|
| 104 |
|
| 105 |
with gr.Tabs(elem_classes="tab-buttons") as tabs:
|
| 106 |
with gr.TabItem("π
Overall", elem_id="llm-benchmark-tab-table", id=0):
|
| 107 |
-
leaderboard =
|
| 108 |
|
| 109 |
with gr.TabItem("π’ Math", elem_id="math-tab-table", id=1):
|
| 110 |
|
|
|
|
| 97 |
)
|
| 98 |
|
| 99 |
|
| 100 |
+
|
| 101 |
+
def overall_leaderboard(dataframe):
|
| 102 |
+
if dataframe is None or dataframe.empty:
|
| 103 |
+
raise ValueError("Leaderboard DataFrame is empty or None.")
|
| 104 |
+
|
| 105 |
+
return Leaderboard(
|
| 106 |
+
value=dataframe,
|
| 107 |
+
datatype=[c.type for c in fields(AutoEvalColumn)],
|
| 108 |
+
select_columns=None,
|
| 109 |
+
search_columns=SearchColumns(primary_column=AutoEvalColumn.model.name, secondary_columns=[],
|
| 110 |
+
placeholder="Search by the model name",
|
| 111 |
+
label="Searching"),
|
| 112 |
+
hide_columns=[c.name for c in fields(AutoEvalColumn) if c.hidden],
|
| 113 |
+
filter_columns=None,
|
| 114 |
+
interactive=False,
|
| 115 |
+
)
|
| 116 |
+
|
| 117 |
+
|
| 118 |
+
|
| 119 |
demo = gr.Blocks(css=custom_css)
|
| 120 |
with demo:
|
| 121 |
gr.HTML(TITLE)
|
|
|
|
| 123 |
|
| 124 |
with gr.Tabs(elem_classes="tab-buttons") as tabs:
|
| 125 |
with gr.TabItem("π
Overall", elem_id="llm-benchmark-tab-table", id=0):
|
| 126 |
+
leaderboard = overall_leaderboard(LEADERBOARD_DF)
|
| 127 |
|
| 128 |
with gr.TabItem("π’ Math", elem_id="math-tab-table", id=1):
|
| 129 |
|
src/leaderboard/read_evals.py
CHANGED
|
@@ -194,3 +194,4 @@ def get_raw_eval_results(results_path: str, requests_path: str) -> list[EvalResu
|
|
| 194 |
continue
|
| 195 |
|
| 196 |
return results
|
|
|
|
|
|
| 194 |
continue
|
| 195 |
|
| 196 |
return results
|
| 197 |
+
|