JeffYang52415 commited on
Commit
e9b694b
·
unverified ·
1 Parent(s): 424ff6a

feat: add humaneval parser

Browse files
llmdataparser/humaneval_parser.py ADDED
@@ -0,0 +1,145 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from dataclasses import dataclass
2
+ from typing import Any, ClassVar
3
+
4
+ from llmdataparser.base_parser import HuggingFaceDatasetParser, HuggingFaceParseEntry
5
+ from llmdataparser.prompts import HUMANEVAL_SYSTEM_PROMPT
6
+
7
+
8
+ @dataclass(frozen=True, kw_only=True, slots=True)
9
+ class HumanEvalParseEntry(HuggingFaceParseEntry):
10
+ """Custom entry class for HumanEval, with fields specific to this dataset parser."""
11
+
12
+ task_id: str
13
+ task_name: str
14
+ entry_point: str
15
+ test: str
16
+
17
+ @classmethod
18
+ def create(
19
+ cls,
20
+ prompt: str,
21
+ answer: str,
22
+ raw_question: str,
23
+ task_id: str,
24
+ entry_point: str,
25
+ test: str,
26
+ task_name: str,
27
+ ) -> "HumanEvalParseEntry":
28
+ if not task_id:
29
+ raise ValueError("Task ID cannot be empty")
30
+ if not entry_point:
31
+ raise ValueError("Entry point cannot be empty")
32
+ return cls(
33
+ prompt=prompt,
34
+ answer=answer,
35
+ raw_question=raw_question,
36
+ raw_answer=answer, # In HumanEval, the canonical solution is the raw answer
37
+ task_id=task_id,
38
+ entry_point=entry_point,
39
+ test=test,
40
+ task_name=task_name,
41
+ )
42
+
43
+
44
+ class HumanEvalDatasetParser(HuggingFaceDatasetParser[HumanEvalParseEntry]):
45
+ """Parser for the HumanEval dataset."""
46
+
47
+ _data_source: ClassVar[str] = "openai/openai_humaneval"
48
+ _default_task: ClassVar[str] = "openai_humaneval"
49
+ _task_names: ClassVar[list[str]] = ["openai_humaneval"]
50
+ _default_system_prompt: ClassVar[str] = HUMANEVAL_SYSTEM_PROMPT
51
+
52
+ def process_entry(
53
+ self, row: dict[str, Any], task_name: str | None = None, **kwargs: Any
54
+ ) -> HumanEvalParseEntry:
55
+ """Process a single HumanEval entry."""
56
+ raw_question = row["prompt"]
57
+ answer = row["canonical_solution"]
58
+ task_id = row["task_id"]
59
+ entry_point = row["entry_point"]
60
+ test = row["test"]
61
+
62
+ # Combine system prompt with the function signature and docstring
63
+ prompt = f"{self._system_prompt}\n\n{raw_question}"
64
+
65
+ # Use task_name if provided, otherwise use default
66
+ task = task_name or self._get_current_task(row)
67
+
68
+ return HumanEvalParseEntry.create(
69
+ prompt=prompt,
70
+ answer=answer,
71
+ raw_question=raw_question,
72
+ task_id=task_id,
73
+ entry_point=entry_point,
74
+ test=test,
75
+ task_name=task, # Guarantee non-None
76
+ )
77
+
78
+
79
+ class HumanEvalDatasetPlusParser(HumanEvalDatasetParser):
80
+ """Parser for the HumanEval dataset."""
81
+
82
+ _data_source: ClassVar[str] = "evalplus/humanevalplus"
83
+ _default_task: ClassVar[str] = "default"
84
+ _task_names: ClassVar[list[str]] = ["default"]
85
+ _default_system_prompt: ClassVar[str] = HUMANEVAL_SYSTEM_PROMPT
86
+
87
+ def process_entry(
88
+ self, row: dict[str, Any], task_name: str | None = None, **kwargs: Any
89
+ ) -> HumanEvalParseEntry:
90
+ """Process a single HumanEval entry."""
91
+ raw_question = row["prompt"]
92
+ answer = row["canonical_solution"]
93
+ task_id = row["task_id"]
94
+ entry_point = row["entry_point"]
95
+ test = row["test"]
96
+
97
+ # Combine system prompt with the function signature and docstring
98
+ prompt = f"{self._system_prompt}\n\n{raw_question}"
99
+
100
+ # Use task_name if provided, otherwise use default
101
+ task = task_name or self._get_current_task(row)
102
+
103
+ return HumanEvalParseEntry.create(
104
+ prompt=prompt,
105
+ answer=answer,
106
+ raw_question=raw_question,
107
+ task_id=task_id,
108
+ entry_point=entry_point,
109
+ test=test,
110
+ task_name=task, # task is guaranteed to be str from _get_current_task
111
+ )
112
+
113
+
114
+ if __name__ == "__main__":
115
+ # Example usage
116
+ parser = HumanEvalDatasetParser()
117
+
118
+ # Load the dataset
119
+ parser.load()
120
+
121
+ # Parse all splits
122
+ parser.parse()
123
+
124
+ # Get parsed data
125
+ parsed_data = parser.get_parsed_data
126
+
127
+ # Print example entry
128
+ if parsed_data:
129
+ example = parsed_data[0]
130
+ print("\nExample parsed entry:")
131
+ print(f"Task ID: {example.task_id}")
132
+ print(f"Entry Point: {example.entry_point}")
133
+ print(f"Prompt:\n{example.prompt}")
134
+ print(f"Solution:\n{example.answer}")
135
+
136
+ parser = HumanEvalDatasetPlusParser()
137
+ parser.load()
138
+ parser.parse()
139
+ parsed_data = parser.get_parsed_data
140
+ if parsed_data:
141
+ example = parsed_data[0]
142
+ print("\nExample parsed entry:")
143
+ print(f"Task: {example.task_name}")
144
+ print(f"Question: {example.raw_question}")
145
+ print(f"Correct Answer: {example.answer}")
tests/test_humaneval_parser.py ADDED
@@ -0,0 +1,173 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pytest
2
+
3
+ from llmdataparser.humaneval_parser import (
4
+ HumanEvalDatasetParser,
5
+ HumanEvalDatasetPlusParser,
6
+ HumanEvalParseEntry,
7
+ )
8
+
9
+
10
+ @pytest.fixture
11
+ def sample_entry():
12
+ return {
13
+ "prompt": 'def add(a, b):\n """Add two numbers."""\n',
14
+ "canonical_solution": "def add(a, b):\n return a + b\n",
15
+ "task_id": "HumanEval/0",
16
+ "entry_point": "add",
17
+ "test": "def test_add(): assert add(2, 3) == 5",
18
+ }
19
+
20
+
21
+ @pytest.fixture
22
+ def parser():
23
+ return HumanEvalDatasetParser()
24
+
25
+
26
+ @pytest.fixture
27
+ def plus_parser():
28
+ return HumanEvalDatasetPlusParser()
29
+
30
+
31
+ @pytest.fixture
32
+ def plus_sample_entry():
33
+ return {
34
+ "prompt": 'def add(a, b):\n """Add two numbers."""\n',
35
+ "canonical_solution": "def add(a, b):\n return a + b\n",
36
+ "task_id": "HumanEval/0",
37
+ "entry_point": "add",
38
+ "test": "def test_add(): assert add(2, 3) == 5",
39
+ }
40
+
41
+
42
+ def test_humaneval_parse_entry_creation():
43
+ """Test creation of HumanEvalParseEntry"""
44
+ entry = HumanEvalParseEntry.create(
45
+ prompt="test prompt",
46
+ answer="test answer",
47
+ raw_question="raw question",
48
+ task_id="HumanEval/1",
49
+ entry_point="test_func",
50
+ test="test case",
51
+ task_name="openai_humaneval",
52
+ )
53
+
54
+ assert entry.prompt == "test prompt"
55
+ assert entry.answer == "test answer"
56
+ assert entry.raw_question == "raw question"
57
+ assert entry.raw_answer == "test answer" # Should match answer
58
+ assert entry.task_id == "HumanEval/1"
59
+ assert entry.entry_point == "test_func"
60
+ assert entry.test == "test case"
61
+ assert entry.task_name == "openai_humaneval"
62
+
63
+
64
+ def test_humaneval_parse_entry_validation():
65
+ """Test validation of required fields"""
66
+ with pytest.raises(ValueError, match="Task ID cannot be empty"):
67
+ HumanEvalParseEntry.create(
68
+ prompt="test",
69
+ answer="test",
70
+ raw_question="test",
71
+ task_id="", # Empty task_id should raise error
72
+ entry_point="test",
73
+ test="test",
74
+ task_name="test",
75
+ )
76
+
77
+ with pytest.raises(ValueError, match="Entry point cannot be empty"):
78
+ HumanEvalParseEntry.create(
79
+ prompt="test",
80
+ answer="test",
81
+ raw_question="test",
82
+ task_id="test",
83
+ entry_point="", # Empty entry_point should raise error
84
+ test="test",
85
+ task_name="test",
86
+ )
87
+
88
+
89
+ def test_process_entry(parser, sample_entry):
90
+ """Test processing of a single entry"""
91
+ result = parser.process_entry(sample_entry, task_name="openai_humaneval")
92
+
93
+ assert isinstance(result, HumanEvalParseEntry)
94
+ assert result.task_id == "HumanEval/0"
95
+ assert result.entry_point == "add"
96
+ assert (
97
+ result.prompt == f"{parser._default_system_prompt}\n\n{sample_entry['prompt']}"
98
+ )
99
+ assert result.answer == sample_entry["canonical_solution"]
100
+ assert result.test == sample_entry["test"]
101
+ assert result.task_name == "openai_humaneval"
102
+
103
+
104
+ def test_parser_initialization(parser):
105
+ """Test parser initialization and properties"""
106
+ assert parser._data_source == "openai/openai_humaneval"
107
+ assert parser._default_task == "openai_humaneval"
108
+ assert parser._task_names == ["openai_humaneval"]
109
+ assert (
110
+ parser.get_huggingface_link
111
+ == "https://huggingface.co/datasets/openai/openai_humaneval"
112
+ )
113
+
114
+
115
+ @pytest.mark.integration
116
+ def test_parser_load_and_parse(parser):
117
+ """Integration test for loading and parsing data"""
118
+ parser.load()
119
+ parser.parse()
120
+ parsed_data = parser.get_parsed_data
121
+
122
+ assert len(parsed_data) > 0
123
+ assert all(isinstance(entry, HumanEvalParseEntry) for entry in parsed_data)
124
+
125
+
126
+ def test_get_current_task(parser, sample_entry):
127
+ """Test _get_current_task method"""
128
+ task = parser._get_current_task(sample_entry)
129
+ assert task == parser._default_task
130
+
131
+
132
+ def test_plus_parser_initialization(plus_parser):
133
+ """Test HumanEvalDatasetPlusParser initialization and properties"""
134
+ assert plus_parser._data_source == "evalplus/humanevalplus"
135
+ assert plus_parser._default_task == "default"
136
+ assert plus_parser._task_names == ["default"]
137
+ assert (
138
+ plus_parser.get_huggingface_link
139
+ == "https://huggingface.co/datasets/evalplus/humanevalplus"
140
+ )
141
+
142
+
143
+ def test_plus_process_entry(plus_parser, plus_sample_entry):
144
+ """Test processing of a single entry in HumanEvalDatasetPlusParser"""
145
+ result = plus_parser.process_entry(plus_sample_entry, task_name="default")
146
+
147
+ assert isinstance(result, HumanEvalParseEntry)
148
+ assert result.task_id == "HumanEval/0"
149
+ assert result.entry_point == "add"
150
+ assert (
151
+ result.prompt
152
+ == f"{plus_parser._default_system_prompt}\n\n{plus_sample_entry['prompt']}"
153
+ )
154
+ assert result.answer == plus_sample_entry["canonical_solution"]
155
+ assert result.test == plus_sample_entry["test"]
156
+ assert result.task_name == "default"
157
+
158
+
159
+ @pytest.mark.integration
160
+ def test_plus_parser_load_and_parse(plus_parser):
161
+ """Integration test for loading and parsing data with HumanEvalDatasetPlusParser"""
162
+ plus_parser.load()
163
+ plus_parser.parse()
164
+ parsed_data = plus_parser.get_parsed_data
165
+
166
+ assert len(parsed_data) > 0
167
+ assert all(isinstance(entry, HumanEvalParseEntry) for entry in parsed_data)
168
+
169
+
170
+ def test_plus_get_current_task(plus_parser, plus_sample_entry):
171
+ """Test _get_current_task method for HumanEvalDatasetPlusParser"""
172
+ task = plus_parser._get_current_task(plus_sample_entry)
173
+ assert task == plus_parser._default_task