Spaces:

yentinglin
/

open-tw-llm-leaderboard

Running

yentinglin commited on May 24, 2024

Commit

49f3697

1 Parent(s): ea50e77

add

Files changed (1) hide show

src/leaderboard/read_evals.py CHANGED Viewed

@@ -73,6 +73,7 @@ class EvalResult:
             # We average all scores of a given metric (not all metrics are present in all files)
             accs = np.array([v.get(task.metric, None) for k, v in data["results"].items() if task.benchmark == k])
             if accs.size == 0 or any([acc is None for acc in accs]):
                 continue
@@ -175,7 +176,7 @@ def get_raw_eval_results(results_path: str, requests_path: str) -> list[EvalResu
     eval_results = {}
     for model_result_filepath in model_result_filepaths:
         # Creation of result
-        # print(f"Model result filepath: {model_result_filepath}")
         eval_result = EvalResult.init_from_json_file(model_result_filepath)
         print(eval_result.results)
         # print(eval_result)

             # We average all scores of a given metric (not all metrics are present in all files)
             accs = np.array([v.get(task.metric, None) for k, v in data["results"].items() if task.benchmark == k])
+            print(f"{task}: {accs}")
             if accs.size == 0 or any([acc is None for acc in accs]):
                 continue
     eval_results = {}
     for model_result_filepath in model_result_filepaths:
         # Creation of result
+        print(f"Model result filepath: {model_result_filepath}")
         eval_result = EvalResult.init_from_json_file(model_result_filepath)
         print(eval_result.results)
         # print(eval_result)