Commit
·
49f3697
1
Parent(s):
ea50e77
add
Browse files
src/leaderboard/read_evals.py
CHANGED
@@ -73,6 +73,7 @@ class EvalResult:
|
|
73 |
|
74 |
# We average all scores of a given metric (not all metrics are present in all files)
|
75 |
accs = np.array([v.get(task.metric, None) for k, v in data["results"].items() if task.benchmark == k])
|
|
|
76 |
if accs.size == 0 or any([acc is None for acc in accs]):
|
77 |
continue
|
78 |
|
@@ -175,7 +176,7 @@ def get_raw_eval_results(results_path: str, requests_path: str) -> list[EvalResu
|
|
175 |
eval_results = {}
|
176 |
for model_result_filepath in model_result_filepaths:
|
177 |
# Creation of result
|
178 |
-
|
179 |
eval_result = EvalResult.init_from_json_file(model_result_filepath)
|
180 |
print(eval_result.results)
|
181 |
# print(eval_result)
|
|
|
73 |
|
74 |
# We average all scores of a given metric (not all metrics are present in all files)
|
75 |
accs = np.array([v.get(task.metric, None) for k, v in data["results"].items() if task.benchmark == k])
|
76 |
+
print(f"{task}: {accs}")
|
77 |
if accs.size == 0 or any([acc is None for acc in accs]):
|
78 |
continue
|
79 |
|
|
|
176 |
eval_results = {}
|
177 |
for model_result_filepath in model_result_filepaths:
|
178 |
# Creation of result
|
179 |
+
print(f"Model result filepath: {model_result_filepath}")
|
180 |
eval_result = EvalResult.init_from_json_file(model_result_filepath)
|
181 |
print(eval_result.results)
|
182 |
# print(eval_result)
|