Spaces:
Runtime error
Runtime error
display button unification for benchmarks (#28)
Browse files- add button unification (670054face4e2bee73a77f83e6141c1a99e09a7c)
- app.py +17 -4
- src/display/utils.py +10 -10
app.py
CHANGED
|
@@ -89,6 +89,17 @@ def init_space():
|
|
| 89 |
EVAL_REQUESTS_PATH, EVAL_COLS
|
| 90 |
)
|
| 91 |
return dataset_df, original_df, finished_eval_queue_df, running_eval_queue_df, pending_eval_queue_df
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 92 |
|
| 93 |
# Searching and filtering
|
| 94 |
def update_table(
|
|
@@ -96,7 +107,8 @@ def update_table(
|
|
| 96 |
):
|
| 97 |
filtered_df = filter_models(hidden_df, type_query, size_query, precision_query)
|
| 98 |
filtered_df = filter_queries(query, filtered_df)
|
| 99 |
-
|
|
|
|
| 100 |
return df
|
| 101 |
|
| 102 |
|
|
@@ -270,18 +282,19 @@ with demo:
|
|
| 270 |
# )
|
| 271 |
|
| 272 |
# breakpoint()
|
| 273 |
-
|
| 274 |
leaderboard_table = gr.components.Dataframe(
|
| 275 |
value=(
|
| 276 |
leaderboard_df[
|
| 277 |
[c.name for c in fields(AutoEvalColumn) if c.never_hidden]
|
| 278 |
+ shown_columns.value
|
|
|
|
| 279 |
+ [AutoEvalColumn.dummy.name]
|
| 280 |
]
|
| 281 |
if leaderboard_df.empty is False
|
| 282 |
else leaderboard_df
|
| 283 |
),
|
| 284 |
-
headers=[c.name for c in fields(AutoEvalColumn) if c.never_hidden] + shown_columns.value,
|
| 285 |
datatype=TYPES,
|
| 286 |
elem_id="leaderboard-table",
|
| 287 |
interactive=False,
|
|
@@ -313,7 +326,7 @@ with demo:
|
|
| 313 |
demo.load(load_query, inputs=[], outputs=[search_bar])
|
| 314 |
|
| 315 |
for selector in [shown_columns, filter_columns_type, filter_columns_precision, filter_columns_size]:
|
| 316 |
-
selector.
|
| 317 |
update_table,
|
| 318 |
[
|
| 319 |
hidden_leaderboard_table_for_search,
|
|
|
|
| 89 |
EVAL_REQUESTS_PATH, EVAL_COLS
|
| 90 |
)
|
| 91 |
return dataset_df, original_df, finished_eval_queue_df, running_eval_queue_df, pending_eval_queue_df
|
| 92 |
+
|
| 93 |
+
|
| 94 |
+
def add_benchmark_columns(shown_columns):
|
| 95 |
+
benchmark_columns = []
|
| 96 |
+
for benchmark in BENCHMARK_COLS:
|
| 97 |
+
if benchmark in shown_columns:
|
| 98 |
+
for c in COLS:
|
| 99 |
+
if benchmark in c and benchmark != c:
|
| 100 |
+
benchmark_columns.append(c)
|
| 101 |
+
return benchmark_columns
|
| 102 |
+
|
| 103 |
|
| 104 |
# Searching and filtering
|
| 105 |
def update_table(
|
|
|
|
| 107 |
):
|
| 108 |
filtered_df = filter_models(hidden_df, type_query, size_query, precision_query)
|
| 109 |
filtered_df = filter_queries(query, filtered_df)
|
| 110 |
+
benchmark_columns = add_benchmark_columns(columns)
|
| 111 |
+
df = select_columns(filtered_df, columns + benchmark_columns)
|
| 112 |
return df
|
| 113 |
|
| 114 |
|
|
|
|
| 282 |
# )
|
| 283 |
|
| 284 |
# breakpoint()
|
| 285 |
+
benchmark_columns = add_benchmark_columns(shown_columns.value)
|
| 286 |
leaderboard_table = gr.components.Dataframe(
|
| 287 |
value=(
|
| 288 |
leaderboard_df[
|
| 289 |
[c.name for c in fields(AutoEvalColumn) if c.never_hidden]
|
| 290 |
+ shown_columns.value
|
| 291 |
+
+ benchmark_columns
|
| 292 |
+ [AutoEvalColumn.dummy.name]
|
| 293 |
]
|
| 294 |
if leaderboard_df.empty is False
|
| 295 |
else leaderboard_df
|
| 296 |
),
|
| 297 |
+
headers=[c.name for c in fields(AutoEvalColumn) if c.never_hidden] + shown_columns.value + benchmark_columns,
|
| 298 |
datatype=TYPES,
|
| 299 |
elem_id="leaderboard-table",
|
| 300 |
interactive=False,
|
|
|
|
| 326 |
demo.load(load_query, inputs=[], outputs=[search_bar])
|
| 327 |
|
| 328 |
for selector in [shown_columns, filter_columns_type, filter_columns_precision, filter_columns_size]:
|
| 329 |
+
selector.change(
|
| 330 |
update_table,
|
| 331 |
[
|
| 332 |
hidden_leaderboard_table_for_search,
|
src/display/utils.py
CHANGED
|
@@ -104,16 +104,16 @@ auto_eval_column_dict.append(["inference_framework", ColumnContent, ColumnConten
|
|
| 104 |
for task in Tasks:
|
| 105 |
auto_eval_column_dict.append([task.name, ColumnContent, ColumnContent(task.value.col_name, "number", True)])
|
| 106 |
# System performance metrics
|
| 107 |
-
auto_eval_column_dict.append([f"{task.name}_end_to_end_time", ColumnContent, ColumnContent(f"{task.value.col_name} {E2Es}", "number", True)])
|
| 108 |
-
auto_eval_column_dict.append([f"{task.name}_batch_size", ColumnContent, ColumnContent(f"{task.value.col_name} {BATCH_SIZE}", "number", True)])
|
| 109 |
-
# auto_eval_column_dict.append([f"{task.name}_precision", ColumnContent, ColumnContent(f"{task.value.col_name} {PRECISION}", "str", True)])
|
| 110 |
-
auto_eval_column_dict.append([f"{task.name}_gpu_mem", ColumnContent, ColumnContent(f"{task.value.col_name} {GPU_Mem}", "number", True)])
|
| 111 |
-
auto_eval_column_dict.append([f"{task.name}_gpu", ColumnContent, ColumnContent(f"{task.value.col_name} {GPU_Name}", "str", True)])
|
| 112 |
-
auto_eval_column_dict.append([f"{task.name}_gpu_util", ColumnContent, ColumnContent(f"{task.value.col_name} {GPU_Util}", "number", True)])
|
| 113 |
if task.value.benchmark in MULTIPLE_CHOICEs:
|
| 114 |
continue
|
| 115 |
-
# auto_eval_column_dict.append([f"{task.name}_prefilling_time", ColumnContent, ColumnContent(f"{task.value.col_name} {PREs}", "number", False)])
|
| 116 |
-
auto_eval_column_dict.append([f"{task.name}_decoding_throughput", ColumnContent, ColumnContent(f"{task.value.col_name} {TS}", "number", True)])
|
| 117 |
|
| 118 |
|
| 119 |
# Model information
|
|
@@ -242,8 +242,8 @@ class Precision(Enum):
|
|
| 242 |
|
| 243 |
|
| 244 |
# Column selection
|
| 245 |
-
COLS = [c.name for c in fields(AutoEvalColumn)
|
| 246 |
-
TYPES = [c.type for c in fields(AutoEvalColumn)
|
| 247 |
COLS_LITE = [c.name for c in fields(AutoEvalColumn) if c.displayed_by_default and not c.hidden]
|
| 248 |
TYPES_LITE = [c.type for c in fields(AutoEvalColumn) if c.displayed_by_default and not c.hidden]
|
| 249 |
|
|
|
|
| 104 |
for task in Tasks:
|
| 105 |
auto_eval_column_dict.append([task.name, ColumnContent, ColumnContent(task.value.col_name, "number", True)])
|
| 106 |
# System performance metrics
|
| 107 |
+
auto_eval_column_dict.append([f"{task.name}_end_to_end_time", ColumnContent, ColumnContent(f"{task.value.col_name} {E2Es}", "number", True, hidden=True)])
|
| 108 |
+
auto_eval_column_dict.append([f"{task.name}_batch_size", ColumnContent, ColumnContent(f"{task.value.col_name} {BATCH_SIZE}", "number", True, hidden=True)])
|
| 109 |
+
# auto_eval_column_dict.append([f"{task.name}_precision", ColumnContent, ColumnContent(f"{task.value.col_name} {PRECISION}", "str", True, hidden=True)])
|
| 110 |
+
auto_eval_column_dict.append([f"{task.name}_gpu_mem", ColumnContent, ColumnContent(f"{task.value.col_name} {GPU_Mem}", "number", True, hidden=True)])
|
| 111 |
+
auto_eval_column_dict.append([f"{task.name}_gpu", ColumnContent, ColumnContent(f"{task.value.col_name} {GPU_Name}", "str", True, hidden=True)])
|
| 112 |
+
auto_eval_column_dict.append([f"{task.name}_gpu_util", ColumnContent, ColumnContent(f"{task.value.col_name} {GPU_Util}", "number", True, hidden=True)])
|
| 113 |
if task.value.benchmark in MULTIPLE_CHOICEs:
|
| 114 |
continue
|
| 115 |
+
# auto_eval_column_dict.append([f"{task.name}_prefilling_time", ColumnContent, ColumnContent(f"{task.value.col_name} {PREs}", "number", False, hidden=True)])
|
| 116 |
+
auto_eval_column_dict.append([f"{task.name}_decoding_throughput", ColumnContent, ColumnContent(f"{task.value.col_name} {TS}", "number", True, hidden=True)])
|
| 117 |
|
| 118 |
|
| 119 |
# Model information
|
|
|
|
| 242 |
|
| 243 |
|
| 244 |
# Column selection
|
| 245 |
+
COLS = [c.name for c in fields(AutoEvalColumn)]
|
| 246 |
+
TYPES = [c.type for c in fields(AutoEvalColumn)]
|
| 247 |
COLS_LITE = [c.name for c in fields(AutoEvalColumn) if c.displayed_by_default and not c.hidden]
|
| 248 |
TYPES_LITE = [c.type for c in fields(AutoEvalColumn) if c.displayed_by_default and not c.hidden]
|
| 249 |
|