Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Commit
·
1b2e131
1
Parent(s):
eef299c
show baseline
Browse files- src/display/utils.py +7 -7
- src/leaderboard/read_evals.py +0 -1
src/display/utils.py
CHANGED
|
@@ -100,7 +100,7 @@ for task in Tasks:
|
|
| 100 |
auto_eval_column_dict.append(["model_type", ColumnContent, ColumnContent("Type", "str", False)])
|
| 101 |
auto_eval_column_dict.append(["architecture", ColumnContent, ColumnContent("Architecture", "str", False)])
|
| 102 |
auto_eval_column_dict.append(["weight_type", ColumnContent, ColumnContent("Weight type", "str", False, True)])
|
| 103 |
-
auto_eval_column_dict.append(["precision", ColumnContent, ColumnContent("Precision", "str",
|
| 104 |
auto_eval_column_dict.append(["merged", ColumnContent, ColumnContent("Merged", "bool", False)])
|
| 105 |
auto_eval_column_dict.append(["license", ColumnContent, ColumnContent("Hub License", "str", False)])
|
| 106 |
auto_eval_column_dict.append(["params", ColumnContent, ColumnContent("#Params (B)", "number", False)])
|
|
@@ -128,7 +128,7 @@ class EvalQueueColumn: # Queue column
|
|
| 128 |
baseline_row = {
|
| 129 |
AutoEvalColumn.model.name: "<p>Baseline</p>",
|
| 130 |
AutoEvalColumn.revision.name: "N/A",
|
| 131 |
-
AutoEvalColumn.precision.name:
|
| 132 |
AutoEvalColumn.merged.name: False,
|
| 133 |
#AutoEvalColumn.average.name: 31.0,
|
| 134 |
#AutoEvalColumn.arc.name: 25.0,
|
|
@@ -140,7 +140,7 @@ baseline_row = {
|
|
| 140 |
AutoEvalColumn.dummy.name: "baseline",
|
| 141 |
AutoEvalColumn.model_type.name: "",
|
| 142 |
AutoEvalColumn.flagged.name: False,
|
| 143 |
-
AutoEvalColumn.model_type_symbol.name:
|
| 144 |
AutoEvalColumn.architecture.name: None,
|
| 145 |
AutoEvalColumn.weight_type.name: None,
|
| 146 |
AutoEvalColumn.params.name: 0,
|
|
@@ -152,7 +152,7 @@ baseline_row = {
|
|
| 152 |
|
| 153 |
baseline_list = []
|
| 154 |
for task in Tasks:
|
| 155 |
-
baseline_row[task.
|
| 156 |
if task.value.baseline is not None:
|
| 157 |
baseline_list.append(task.value.baseline)
|
| 158 |
baseline_row[AutoEvalColumn.average.name] = round(sum(baseline_list) / len(baseline_list), 2)
|
|
@@ -168,7 +168,7 @@ baseline_row[AutoEvalColumn.average.name] = round(sum(baseline_list) / len(basel
|
|
| 168 |
human_baseline_row = {
|
| 169 |
AutoEvalColumn.model.name: "<p>Human performance</p>",
|
| 170 |
AutoEvalColumn.revision.name: "N/A",
|
| 171 |
-
AutoEvalColumn.precision.name:
|
| 172 |
#AutoEvalColumn.average.name: 92.75,
|
| 173 |
AutoEvalColumn.merged.name: False,
|
| 174 |
#AutoEvalColumn.arc.name: 80.0,
|
|
@@ -180,7 +180,7 @@ human_baseline_row = {
|
|
| 180 |
AutoEvalColumn.dummy.name: "human_baseline",
|
| 181 |
AutoEvalColumn.model_type.name: "",
|
| 182 |
AutoEvalColumn.flagged.name: False,
|
| 183 |
-
AutoEvalColumn.model_type_symbol.name:
|
| 184 |
AutoEvalColumn.architecture.name: None,
|
| 185 |
AutoEvalColumn.weight_type.name: None,
|
| 186 |
AutoEvalColumn.params.name: 0,
|
|
@@ -192,7 +192,7 @@ human_baseline_row = {
|
|
| 192 |
|
| 193 |
baseline_list = []
|
| 194 |
for task in Tasks:
|
| 195 |
-
human_baseline_row[task.
|
| 196 |
if task.value.human_baseline is not None:
|
| 197 |
baseline_list.append(task.value.human_baseline)
|
| 198 |
human_baseline_row[AutoEvalColumn.average.name] = round(sum(baseline_list) / len(baseline_list), 2)
|
|
|
|
| 100 |
auto_eval_column_dict.append(["model_type", ColumnContent, ColumnContent("Type", "str", False)])
|
| 101 |
auto_eval_column_dict.append(["architecture", ColumnContent, ColumnContent("Architecture", "str", False)])
|
| 102 |
auto_eval_column_dict.append(["weight_type", ColumnContent, ColumnContent("Weight type", "str", False, True)])
|
| 103 |
+
auto_eval_column_dict.append(["precision", ColumnContent, ColumnContent("Precision", "str", True)])
|
| 104 |
auto_eval_column_dict.append(["merged", ColumnContent, ColumnContent("Merged", "bool", False)])
|
| 105 |
auto_eval_column_dict.append(["license", ColumnContent, ColumnContent("Hub License", "str", False)])
|
| 106 |
auto_eval_column_dict.append(["params", ColumnContent, ColumnContent("#Params (B)", "number", False)])
|
|
|
|
| 128 |
baseline_row = {
|
| 129 |
AutoEvalColumn.model.name: "<p>Baseline</p>",
|
| 130 |
AutoEvalColumn.revision.name: "N/A",
|
| 131 |
+
AutoEvalColumn.precision.name: "?",
|
| 132 |
AutoEvalColumn.merged.name: False,
|
| 133 |
#AutoEvalColumn.average.name: 31.0,
|
| 134 |
#AutoEvalColumn.arc.name: 25.0,
|
|
|
|
| 140 |
AutoEvalColumn.dummy.name: "baseline",
|
| 141 |
AutoEvalColumn.model_type.name: "",
|
| 142 |
AutoEvalColumn.flagged.name: False,
|
| 143 |
+
AutoEvalColumn.model_type_symbol.name: "?",
|
| 144 |
AutoEvalColumn.architecture.name: None,
|
| 145 |
AutoEvalColumn.weight_type.name: None,
|
| 146 |
AutoEvalColumn.params.name: 0,
|
|
|
|
| 152 |
|
| 153 |
baseline_list = []
|
| 154 |
for task in Tasks:
|
| 155 |
+
baseline_row[task.value.col_name] = task.value.baseline
|
| 156 |
if task.value.baseline is not None:
|
| 157 |
baseline_list.append(task.value.baseline)
|
| 158 |
baseline_row[AutoEvalColumn.average.name] = round(sum(baseline_list) / len(baseline_list), 2)
|
|
|
|
| 168 |
human_baseline_row = {
|
| 169 |
AutoEvalColumn.model.name: "<p>Human performance</p>",
|
| 170 |
AutoEvalColumn.revision.name: "N/A",
|
| 171 |
+
AutoEvalColumn.precision.name: "?",
|
| 172 |
#AutoEvalColumn.average.name: 92.75,
|
| 173 |
AutoEvalColumn.merged.name: False,
|
| 174 |
#AutoEvalColumn.arc.name: 80.0,
|
|
|
|
| 180 |
AutoEvalColumn.dummy.name: "human_baseline",
|
| 181 |
AutoEvalColumn.model_type.name: "",
|
| 182 |
AutoEvalColumn.flagged.name: False,
|
| 183 |
+
AutoEvalColumn.model_type_symbol.name: "?",
|
| 184 |
AutoEvalColumn.architecture.name: None,
|
| 185 |
AutoEvalColumn.weight_type.name: None,
|
| 186 |
AutoEvalColumn.params.name: 0,
|
|
|
|
| 192 |
|
| 193 |
baseline_list = []
|
| 194 |
for task in Tasks:
|
| 195 |
+
human_baseline_row[task.value.col_name] = task.value.human_baseline
|
| 196 |
if task.value.human_baseline is not None:
|
| 197 |
baseline_list.append(task.value.human_baseline)
|
| 198 |
human_baseline_row[AutoEvalColumn.average.name] = round(sum(baseline_list) / len(baseline_list), 2)
|
src/leaderboard/read_evals.py
CHANGED
|
@@ -206,7 +206,6 @@ def get_raw_eval_results(results_path: str, requests_path: str, dynamic_path: st
|
|
| 206 |
eval_results = {}
|
| 207 |
for model_result_filepath in model_result_filepaths:
|
| 208 |
# Creation of result
|
| 209 |
-
print(model_result_filepath)
|
| 210 |
eval_result = EvalResult.init_from_json_file(model_result_filepath)
|
| 211 |
eval_result.update_with_request_file(requests_path)
|
| 212 |
if eval_result.full_model in dynamic_data:
|
|
|
|
| 206 |
eval_results = {}
|
| 207 |
for model_result_filepath in model_result_filepaths:
|
| 208 |
# Creation of result
|
|
|
|
| 209 |
eval_result = EvalResult.init_from_json_file(model_result_filepath)
|
| 210 |
eval_result.update_with_request_file(requests_path)
|
| 211 |
if eval_result.full_model in dynamic_data:
|