alKoGolik's picture
Upload 169 files
c87c295 verified
raw
history blame
2.41 kB
import json
import os
import numpy as np
from evalplus.data import get_human_eval_plus, get_human_eval_plus_inputs
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser()
parser.add_argument("--root", type=str, default="/JawTitan/EvalPlus/humaneval")
args = parser.parse_args()
plus_inputs = get_human_eval_plus_inputs()
problems = get_human_eval_plus().values()
base_bvs = {}
plus_bvs = {}
id2idx = {}
for i, problem in enumerate(problems):
task_id = problem["task_id"]
id2idx[task_id] = i
base_bvs[task_id] = np.zeros(len(problem["base_input"]), dtype=bool)
plus_bvs[task_id] = np.zeros(len(plus_inputs[task_id]), dtype=bool)
for path in os.listdir(args.root):
eval_json_path = os.path.join(args.root, path, "eval_results.json")
if not os.path.isfile(eval_json_path) or not path[-1].isdigit():
print(f"skip {path}")
continue
res = json.load(open(eval_json_path, "r"))["eval"]
for task_id, v in res.items():
for status, details in v["base"]:
if details is None: # all fail => skip
continue
fails = np.logical_not(details)
base_bvs[task_id][: len(details)] = np.logical_xor(
base_bvs[task_id][: len(details)], fails
)
for status, details in v["plus"]:
if details is None:
continue
fails = np.logical_not(details)
plus_bvs[task_id][: len(details)] = np.logical_xor(
plus_bvs[task_id][: len(details)], fails
)
testsuite = []
new_sizes = []
for task_id, bbv in base_bvs.items():
new_inputs = []
idx = id2idx[task_id]
for i in np.nonzero(bbv)[0]:
new_inputs.append(problems[idx]["base_input"][i])
pbv = plus_bvs[task_id]
for i in np.nonzero(pbv)[0]:
new_inputs.append(plus_inputs[task_id][i])
testsuite.append({"task_id": task_id, "inputs": new_inputs})
print(
task_id, f" org base {len(bbv)}; org plus {len(pbv)}; new {len(new_inputs)}"
)
new_sizes.append(len(new_inputs))
new_sizes = np.array(new_sizes)
print(f"{new_sizes.mean() = }, {new_sizes.min() = }, {new_sizes.max() = }")