Spaces:
Runtime error
Runtime error
codellama-CodeLlama-7b-hf
/
Llama2-Code-Interpreter-main
/OpenCodeInterpreter
/evaluation
/evalplus
/tools
/_experimental
/set_cover.py
import json | |
import os | |
from rich.progress import track | |
from evalplus.data import get_human_eval_plus, get_human_eval_plus_inputs | |
LLM_HOME_PATH = "/JawTitan/EvalPlus/humaneval" | |
model_paths = os.listdir(LLM_HOME_PATH) | |
problems = get_human_eval_plus().values() | |
new_inputs = get_human_eval_plus_inputs() | |
cover_info = {f"HumanEval_{i}": {} for i in range(164)} | |
# One dict is super huge, so split them into separate JSON files | |
def get_cover_info(): | |
for model_path in track(model_paths, description="Collecting sets..."): | |
if not model_path[-1].isdigit(): | |
continue | |
eval_json_path = os.path.join(LLM_HOME_PATH, model_path, "eval_results.json") | |
if not os.path.exists(eval_json_path): | |
continue | |
with open(eval_json_path, "r") as f: | |
res = json.load(f)["eval"] | |
for task_id, v in res.items(): | |
for i_code, (status, res_list) in enumerate(v["base"]): | |
if status == "success": | |
continue | |
code_id = hash(v["files"][i_code]) | |
for i_test, res in enumerate(res_list): | |
test_id = f"base_{i_test}" | |
if res == False: | |
cover_info[task_id].setdefault(test_id, []).append(code_id) | |
for i_code, (status, res_list) in enumerate(v["plus"]): | |
if status == "success": | |
continue | |
code_id = hash(v["files"][i_code]) | |
for i_test, res in enumerate(res_list): | |
test_id = f"plus_{i_test}" | |
if res == False: | |
cover_info[task_id].setdefault(test_id, []).append(code_id) | |
if __name__ == "__main__": | |
get_cover_info() | |
for i in track(range(164), description="Solving set covering..."): | |
task_id = f"HumanEval_{i}" | |
tests = cover_info[task_id] | |
q, U = [], set() | |
for test_name, test_cover in tests.items(): | |
cover_set = set(test_cover) | |
q.append((test_name, cover_set)) | |
U = U.union(cover_set) | |
# Greedy | |
min_cover = [] | |
while len(U) > 0: | |
max_uncover_set, max_test_name = {}, "" | |
for test_name, cover_set in q: | |
if len(cover_set) > len(max_uncover_set): | |
max_uncover_set = cover_set | |
max_test_name = test_name | |
min_cover.append(max_test_name) | |
U = U - max_uncover_set | |
qq = [] | |
for test_name, cover_set in q: | |
new_cover_set = U.intersection(cover_set) | |
if len(new_cover_set) != 0: | |
qq.append((test_name, new_cover_set)) | |
q = qq | |
d = {"task_id": task_id, "inputs": []} | |
for test in min_cover: | |
tmp = test.split("_") | |
t, n = tmp[0], int(tmp[1]) | |
if t == "base": | |
d["inputs"].append(problems[i]["base_input"][n]) | |
else: | |
print(task_id, n) | |
d["inputs"].append(new_inputs[task_id][n]) | |
with open("HumanEvalPlusInputsMin.jsonl", "a") as f: | |
f.write(json.dumps(d) + "\n") | |