alKoGolik's picture
Upload 169 files
c87c295 verified
raw
history blame
3.81 kB
import argparse
import json
import os
from typing import Any, Dict, List
from rich.progress import track
from evalplus.eval.utils import swallow_io
from evalplus.evaluate import evaluate
from tools.tsr.utils import (
clean,
execute_cmd,
get_cmd_output,
get_problems,
get_task_ids,
to_path,
)
def prepare_mutants(mutation_dir: str, dataset: str):
pwd = os.getcwd()
task_ids = get_task_ids(dataset)
problems = get_problems(dataset)
os.makedirs(mutation_dir, exist_ok=True)
for task_id in track(task_ids, "Generating mutants"):
task_dir = os.path.join(mutation_dir, to_path(task_id))
os.makedirs(task_dir, exist_ok=True)
if any(map(lambda filename: filename.startswith("m"), os.listdir(task_dir))):
# already have mutants
continue
# Make groundtruth
groundtruth_code = (
problems[task_id]["prompt"] + problems[task_id]["canonical_solution"]
)
with open(os.path.join(task_dir, "gt.py"), "w") as f:
f.write(groundtruth_code)
# Make dummy pytest
with open(os.path.join(task_dir, "test_dummy.py"), "w") as f:
f.write("def test_dummy():\n pass")
# Use mutmut to generate mutants
os.chdir(task_dir)
clean(".mutmut-cache")
execute_cmd(["mutmut run", "--paths-to-mutate=gt.py", "1>/dev/null"])
try:
# Collect metainfo
total_mutants = int(
get_cmd_output(["mutmut", "results"]).split("\n")[-2].split("-")[-1]
)
except:
total_mutants = 0
# Dump mutants
for i in range(1, total_mutants + 1):
execute_cmd(["cp", "gt.py", "gt_copy.py"])
execute_cmd(["mutmut", "apply", str(i)])
execute_cmd(["mv", "gt.py", f"m{i}.py"])
execute_cmd(["mv", "gt_copy.py", "gt.py"])
# Remove gt and dummy pytest
execute_cmd(["rm", "gt.py"])
execute_cmd(["rm", "test_dummy.py"])
clean(".mutmut-cache")
os.chdir(pwd)
def mutants_eval(mutation_dir: str, dataset: str):
args = argparse.Namespace(
dataset=dataset,
samples=mutation_dir,
base_only=False,
parallel=None,
i_just_wanna_run=False,
test_details=True,
min_time_limit=0.2,
gt_time_limit_factor=4.0,
mini=False,
)
print("Evaluating mutants... ", end="", flush=True)
with swallow_io():
evaluate(args)
print("Done")
def collect_mutation_info(
eval_path: str, dataset: str
) -> Dict[str, Dict[str, List[Any]]]:
mutation_info = {task_id: {} for task_id in get_task_ids(dataset)}
assert os.path.isfile(
eval_path
), f"mutation testing result file {eval_path} missing!"
eval_res = json.load(open(eval_path, "r"))["eval"]
for task_id, v in eval_res.items():
for i_code, (status, res_list) in enumerate(v["plus"]):
if status == "success":
continue
for i_test, res in enumerate(res_list):
test_id = f"plus_{i_test}"
if res == False:
mutation_info[task_id].setdefault(test_id, []).append(
("mutant", i_code)
)
return mutation_info
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--dataset", type=str, choices=["humaneval", "mbpp"])
parser.add_argument("--report_dir", required=True, type=str)
args = parser.parse_args()
mutation_dir = os.path.join(args.report_dir, "mutation_cache")
prepare_mutants(mutation_dir, args.dataset)
mutants_eval(mutation_dir, args.dataset)
collect_mutation_info(os.path.join(mutation_dir, "eval_results.json"), args.dataset)