Spaces:
Runtime error
Runtime error
codellama-CodeLlama-7b-hf
/
Llama2-Code-Interpreter-main
/OpenCodeInterpreter
/evaluation
/evalplus
/tools
/sanitize.py
"""Purpose of this file: Sanitize the code produced by LLMs for the following reasons. | |
1. Vicuna generated code could miss one white space. We fix the white space to make Vicuna more capable. | |
2. {Our fault lol.} We find more EOFs tokens afterwards and truncate some messy code afterwards. | |
""" | |
import os | |
from tqdm import tqdm | |
from evalplus.data import ( | |
get_human_eval_plus, | |
get_mbpp_plus, | |
load_solutions, | |
write_directory, | |
write_jsonl, | |
) | |
from evalplus.sanitize import sanitize | |
def remove_unindented_lines(code, protect_before, execeptions, trim_tails): | |
lines = code.splitlines() | |
cut_idx = [] | |
cut_enabled = False | |
for i, line in enumerate(lines): | |
if not cut_enabled and line.startswith(protect_before): | |
cut_enabled = True | |
continue | |
if line.strip() == "": | |
continue | |
if any(line.startswith(e) for e in execeptions): | |
continue | |
lspace = len(line) - len(line.lstrip()) | |
if lspace == 0: | |
cut_idx.append(i) | |
if any(line.rstrip().startswith(t) for t in trim_tails): | |
# cut off everything behind | |
cut_idx.extend(list(range(i, len(lines)))) | |
break | |
return "\n".join([line for i, line in enumerate(lines) if i not in cut_idx]) | |
def to_four_space_indents(old_code): | |
new_code = "" | |
for line in old_code.splitlines(): | |
lspace = len(line) - len(line.lstrip()) | |
if lspace == 3: | |
new_code += " " | |
new_code += line + "\n" | |
return new_code | |
if __name__ == "__main__": | |
import argparse | |
import pathlib | |
parser = argparse.ArgumentParser() | |
parser.add_argument("--samples", type=str, required=True) | |
parser.add_argument("--eofs", nargs="+", type=str, default=[]) | |
parser.add_argument("--inplace", action="store_true") | |
parser.add_argument( | |
"--rm-prefix-lines", type=str, help="Remove lines starting with this" | |
) | |
parser.add_argument( | |
"--dataset", required=True, type=str, choices=["humaneval", "mbpp"] | |
) | |
parser.add_argument( | |
"--debug-task", type=str, help="Enter the task ID to only sanitize that task." | |
) | |
args = parser.parse_args() | |
# task_id -> entry_point | |
entry_point = {} | |
if args.dataset == "humaneval": | |
dataset = get_human_eval_plus() | |
elif args.dataset == "mbpp": | |
dataset = get_mbpp_plus() | |
for task_id, problem in dataset.items(): | |
entry_point[task_id] = problem["entry_point"] | |
# make a new folder with "-sanitized" suffix | |
is_folder = os.path.isdir(args.samples) | |
target_path = pathlib.Path(args.samples) | |
if not args.inplace: | |
if is_folder: | |
new_name = target_path.name + "-sanitized" | |
else: | |
new_name = target_path.name.replace(".jsonl", "-sanitized.jsonl") | |
target_path = target_path.parent / new_name | |
target_path = str(target_path) | |
nsan = 0 | |
ntotal = 0 | |
new_solutions = [] | |
for solution in tqdm(load_solutions(args.samples)): | |
task_id = solution["task_id"] | |
dbg_identifier = solution["_identifier"] | |
if args.debug_task is not None and task_id != args.debug_task: | |
continue | |
ntotal += 1 | |
if "solution" in solution: | |
old_code = solution["solution"] | |
else: | |
assert "completion" in solution | |
old_code = dataset[task_id]["prompt"] + "\n" + solution["completion"] | |
old_code = old_code.strip() | |
new_code = sanitize( | |
old_code=old_code, | |
entry_point=entry_point[task_id], | |
rm_prefix_lines=args.rm_prefix_lines, | |
eofs=args.eofs, | |
).strip() | |
# if changed, print the message | |
if new_code != old_code: | |
msg = "Sanitized: " + dbg_identifier | |
if is_folder: | |
msg += " -> " + dbg_identifier.replace(args.samples, target_path) | |
print(msg) | |
nsan += 1 | |
new_solutions.append({"task_id": task_id, "solution": new_code}) | |
if is_folder: | |
write_directory(target_path, new_solutions) | |
else: | |
write_jsonl(target_path, new_solutions) | |
print(f"Sanitized {nsan} out of {ntotal} files.") | |