Spaces:
Runtime error
Runtime error
codellama-CodeLlama-7b-hf
/
Llama2-Code-Interpreter-main
/OpenCodeInterpreter
/evaluation
/evalplus
/tools
/_experimental
/topset_distill.py
import json | |
import os | |
import numpy as np | |
from evalplus.data import get_human_eval_plus, get_human_eval_plus_inputs | |
if __name__ == "__main__": | |
import argparse | |
parser = argparse.ArgumentParser() | |
parser.add_argument("--root", type=str, default="/JawTitan/EvalPlus/humaneval") | |
args = parser.parse_args() | |
plus_inputs = get_human_eval_plus_inputs() | |
problems = get_human_eval_plus().values() | |
base_bvs = {} | |
plus_bvs = {} | |
id2idx = {} | |
for i, problem in enumerate(problems): | |
task_id = problem["task_id"] | |
id2idx[task_id] = i | |
base_bvs[task_id] = np.zeros(len(problem["base_input"]), dtype=bool) | |
plus_bvs[task_id] = np.zeros(len(plus_inputs[task_id]), dtype=bool) | |
for path in os.listdir(args.root): | |
eval_json_path = os.path.join(args.root, path, "eval_results.json") | |
if not os.path.isfile(eval_json_path) or not path[-1].isdigit(): | |
print(f"skip {path}") | |
continue | |
res = json.load(open(eval_json_path, "r"))["eval"] | |
for task_id, v in res.items(): | |
for status, details in v["base"]: | |
if details is None: # all fail => skip | |
continue | |
fails = np.logical_not(details) | |
base_bvs[task_id][: len(details)] = np.logical_xor( | |
base_bvs[task_id][: len(details)], fails | |
) | |
for status, details in v["plus"]: | |
if details is None: | |
continue | |
fails = np.logical_not(details) | |
plus_bvs[task_id][: len(details)] = np.logical_xor( | |
plus_bvs[task_id][: len(details)], fails | |
) | |
testsuite = [] | |
new_sizes = [] | |
for task_id, bbv in base_bvs.items(): | |
new_inputs = [] | |
idx = id2idx[task_id] | |
for i in np.nonzero(bbv)[0]: | |
new_inputs.append(problems[idx]["base_input"][i]) | |
pbv = plus_bvs[task_id] | |
for i in np.nonzero(pbv)[0]: | |
new_inputs.append(plus_inputs[task_id][i]) | |
testsuite.append({"task_id": task_id, "inputs": new_inputs}) | |
print( | |
task_id, f" org base {len(bbv)}; org plus {len(pbv)}; new {len(new_inputs)}" | |
) | |
new_sizes.append(len(new_inputs)) | |
new_sizes = np.array(new_sizes) | |
print(f"{new_sizes.mean() = }, {new_sizes.min() = }, {new_sizes.max() = }") | |