File size: 2,871 Bytes
246d201
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
import logging

from utils import check_correctness

from evaluation.benchmarks.mint.tasks.base import Task

LOGGER = logging.getLogger('MINT')


class CodeGenTask(Task):
    """Generic code generation task instance."""

    def __init__(self, id: str, prompt: str, reference: str, **kwargs):
        super().__init__(**kwargs)
        self._id = id
        self._prompt = prompt
        self._reference = reference

    def success(self, solution: str) -> bool:
        """This checks whether the given solution can complete the current task.



        Can be used to provides binary feedback.

        """
        code_to_exec = self.extract_answer(solution)
        LOGGER.debug(f'CODE_TO_EXEC:\n{code_to_exec}')
        LOGGER.debug(f'TEST_CODE:\n{self._reference}')
        res = check_correctness(
            solution_code=code_to_exec, test_code=self._reference, timeout=10
        )
        return res['success']


class MBPPTask(CodeGenTask):
    task_name = 'mbpp'

    @property
    def prompt(self) -> str:
        """Return the prompt for this task.



        MBPP prompt contains \"\"\" enclosed at both ends. Need to remove it.

        """
        return self._prompt.replace('"""', '').strip()

    def extract_answer(self, solution: str) -> str | None:
        """Extract the answer from the given solution.



        Split off first block of code by scanning for class, def etc. on newlines.



        Modified from:

        https://github.com/bigcode-project/bigcode-evaluation-harness/blob/d61afde130005ecc65cf800ad8eca790a9bc2115/lm_eval/tasks/mbpp.py#L67

        """
        # STOP_WORDS = ["\nclass", "\nassert", '\n"""', "\nprint", "\nif", "\n<|/"]
        # return re.split("|".join(STOP_WORDS), solution)[0].rstrip()
        return solution


class HumanEvalTask(CodeGenTask):
    task_name = 'humaneval'

    @property
    def prompt(self) -> str:
        """Return the prompt for this task.



        MBPP prompt contains \"\"\" enclosed at both ends. Need to remove it.

        """
        return 'Complete the following code:\n\n' + self._prompt

    def extract_answer(self, solution: str) -> str | None:
        """Extract the answer from the given solution.



        Split off first block of code by scanning for class, def etc. on newlines.



        Modified from:

        https://github.com/bigcode-project/bigcode-evaluation-harness/blob/d61afde130005ecc65cf800ad8eca790a9bc2115/lm_eval/tasks/humaneval.py#L56

        """
        # STOP_WORDS = ["\nclass", "\ndef", "\n#", "\n@", "\nprint", "\nif"]
        # # Remove the last block of the code containing stop_words for HumanEval
        # string_list = re.split("(%s)" % "|".join(STOP_WORDS), solution)
        # # last string should be ""
        # return "".join(string_list[:-2])
        return solution