feat: add mbpp parser
Browse files- llmdataparser/__init__.py +24 -1
- llmdataparser/base_parser.py +2 -0
- llmdataparser/mbpp_parser.py +107 -0
- llmdataparser/mmlu_parser.py +2 -1
- llmdataparser/prompts.py +17 -0
- tests/test_mbpp_parser.py +154 -0
- tests/test_mmlu_parser.py +2 -2
llmdataparser/__init__.py
CHANGED
@@ -2,7 +2,19 @@
|
|
2 |
from typing import Type
|
3 |
|
4 |
from .base_parser import DatasetParser
|
5 |
-
from .
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6 |
|
7 |
|
8 |
class ParserRegistry:
|
@@ -31,3 +43,14 @@ class ParserRegistry:
|
|
31 |
|
32 |
# Register parsers
|
33 |
ParserRegistry.register_parser("mmlu", MMLUDatasetParser)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
from typing import Type
|
3 |
|
4 |
from .base_parser import DatasetParser
|
5 |
+
from .bbh_parser import BBHDatasetParser
|
6 |
+
from .gsm8k_parser import GSM8KDatasetParser
|
7 |
+
from .humaneval_parser import HumanEvalDatasetParser, HumanEvalDatasetPlusParser
|
8 |
+
from .ifeval_parser import IFEvalDatasetParser
|
9 |
+
from .math_parser import MATHDatasetParser
|
10 |
+
from .mbpp_parser import MBPPDatasetParser
|
11 |
+
from .mgsm_parser import MGSMDatasetParser
|
12 |
+
from .mmlu_parser import (
|
13 |
+
MMLUDatasetParser,
|
14 |
+
MMLUProDatasetParser,
|
15 |
+
MMLUReduxDatasetParser,
|
16 |
+
TMMLUPlusDatasetParser,
|
17 |
+
)
|
18 |
|
19 |
|
20 |
class ParserRegistry:
|
|
|
43 |
|
44 |
# Register parsers
|
45 |
ParserRegistry.register_parser("mmlu", MMLUDatasetParser)
|
46 |
+
ParserRegistry.register_parser("mmlupro", MMLUProDatasetParser)
|
47 |
+
ParserRegistry.register_parser("mmluredux", MMLUReduxDatasetParser)
|
48 |
+
ParserRegistry.register_parser("tmmluplus", TMMLUPlusDatasetParser)
|
49 |
+
ParserRegistry.register_parser("gsm8k", GSM8KDatasetParser)
|
50 |
+
ParserRegistry.register_parser("math", MATHDatasetParser)
|
51 |
+
ParserRegistry.register_parser("mgsm", MGSMDatasetParser)
|
52 |
+
ParserRegistry.register_parser("humaneval", HumanEvalDatasetParser)
|
53 |
+
ParserRegistry.register_parser("humanevalplus", HumanEvalDatasetPlusParser)
|
54 |
+
ParserRegistry.register_parser("bbh", BBHDatasetParser)
|
55 |
+
ParserRegistry.register_parser("mbpp", MBPPDatasetParser)
|
56 |
+
ParserRegistry.register_parser("ifeval", IFEvalDatasetParser)
|
llmdataparser/base_parser.py
CHANGED
@@ -80,6 +80,8 @@ class HuggingFaceDatasetParser(DatasetParser[T]):
|
|
80 |
_default_task: ClassVar[str]
|
81 |
# _default_system_prompt is the default system prompt to use if no system prompt is specified
|
82 |
_default_system_prompt: ClassVar[str]
|
|
|
|
|
83 |
|
84 |
def __init__(self, system_prompt: str | None = None, **kwargs):
|
85 |
"""
|
|
|
80 |
_default_task: ClassVar[str]
|
81 |
# _default_system_prompt is the default system prompt to use if no system prompt is specified
|
82 |
_default_system_prompt: ClassVar[str]
|
83 |
+
# _hidden_task_names is the list of task names that are hidden in the dataset, e.g. ["math", "physics", "chemistry"]
|
84 |
+
_hidden_task_names: ClassVar[list[str]] = []
|
85 |
|
86 |
def __init__(self, system_prompt: str | None = None, **kwargs):
|
87 |
"""
|
llmdataparser/mbpp_parser.py
ADDED
@@ -0,0 +1,107 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from dataclasses import dataclass
|
2 |
+
from typing import Any, ClassVar
|
3 |
+
|
4 |
+
from llmdataparser.base_parser import HuggingFaceDatasetParser, HuggingFaceParseEntry
|
5 |
+
from llmdataparser.prompts import MBPP_SYSTEM_PROMPT
|
6 |
+
|
7 |
+
|
8 |
+
@dataclass(frozen=True, kw_only=True, slots=True)
|
9 |
+
class MBPPParseEntry(HuggingFaceParseEntry):
|
10 |
+
"""Custom entry class for MBPP, with fields specific to this dataset parser."""
|
11 |
+
|
12 |
+
task_id: int
|
13 |
+
test_list: list[str]
|
14 |
+
test_setup_code: str
|
15 |
+
challenge_test_list: list[str]
|
16 |
+
source_file: str
|
17 |
+
|
18 |
+
@classmethod
|
19 |
+
def create(
|
20 |
+
cls,
|
21 |
+
prompt: str,
|
22 |
+
answer: str,
|
23 |
+
raw_question: str,
|
24 |
+
task_id: int,
|
25 |
+
test_list: list[str],
|
26 |
+
test_setup_code: str,
|
27 |
+
challenge_test_list: list[str],
|
28 |
+
task_name: str,
|
29 |
+
source_file: str,
|
30 |
+
) -> "MBPPParseEntry":
|
31 |
+
if not isinstance(task_id, int):
|
32 |
+
raise ValueError("Task ID must be an integer")
|
33 |
+
|
34 |
+
return cls(
|
35 |
+
prompt=prompt,
|
36 |
+
answer=answer,
|
37 |
+
raw_question=raw_question,
|
38 |
+
raw_answer=answer, # In MBPP, the code solution is the raw answer
|
39 |
+
task_id=task_id,
|
40 |
+
test_list=test_list,
|
41 |
+
test_setup_code=test_setup_code,
|
42 |
+
challenge_test_list=challenge_test_list,
|
43 |
+
task_name=task_name,
|
44 |
+
source_file=source_file,
|
45 |
+
)
|
46 |
+
|
47 |
+
|
48 |
+
class MBPPDatasetParser(HuggingFaceDatasetParser[MBPPParseEntry]):
|
49 |
+
"""Parser for the MBPP (Mostly Basic Python Programming) dataset."""
|
50 |
+
|
51 |
+
_data_source: ClassVar[str] = "google-research-datasets/mbpp"
|
52 |
+
_default_task: ClassVar[str] = "full" # Can be 'full' or 'sanitized'
|
53 |
+
_task_names: ClassVar[list[str]] = ["full", "sanitized"]
|
54 |
+
_default_system_prompt: ClassVar[str] = MBPP_SYSTEM_PROMPT
|
55 |
+
|
56 |
+
def process_entry(
|
57 |
+
self, row: dict[str, Any], task_name: str | None = None, **kwargs: Any
|
58 |
+
) -> MBPPParseEntry:
|
59 |
+
"""Process a single MBPP entry."""
|
60 |
+
raw_question = row.get("text", row.get("prompt"))
|
61 |
+
answer = row["code"]
|
62 |
+
task_id = row["task_id"]
|
63 |
+
test_list = row["test_list"]
|
64 |
+
test_setup_code = row.get("test_setup_code", "")
|
65 |
+
challenge_test_list = row.get("challenge_test_list", [])
|
66 |
+
|
67 |
+
# Combine system prompt with the task description
|
68 |
+
prompt = f"{self._system_prompt}\n\nTask: {raw_question}"
|
69 |
+
|
70 |
+
# Use task_name if provided, otherwise use default
|
71 |
+
task = task_name or self._get_current_task(row)
|
72 |
+
source_file = row.get("source_file", "")
|
73 |
+
|
74 |
+
return MBPPParseEntry.create(
|
75 |
+
prompt=prompt,
|
76 |
+
answer=answer,
|
77 |
+
raw_question=raw_question,
|
78 |
+
task_id=task_id,
|
79 |
+
test_list=test_list,
|
80 |
+
test_setup_code=test_setup_code,
|
81 |
+
challenge_test_list=challenge_test_list,
|
82 |
+
task_name=task,
|
83 |
+
source_file=source_file,
|
84 |
+
)
|
85 |
+
|
86 |
+
|
87 |
+
if __name__ == "__main__":
|
88 |
+
# Example usage
|
89 |
+
parser = MBPPDatasetParser()
|
90 |
+
|
91 |
+
# Load the dataset
|
92 |
+
parser.load()
|
93 |
+
|
94 |
+
# Parse all splits
|
95 |
+
parser.parse()
|
96 |
+
|
97 |
+
# Get parsed data
|
98 |
+
parsed_data = parser.get_parsed_data
|
99 |
+
|
100 |
+
# Print example entry
|
101 |
+
if parsed_data:
|
102 |
+
example = parsed_data[0]
|
103 |
+
print("\nExample parsed entry:")
|
104 |
+
print(f"Task ID: {example.task_id}")
|
105 |
+
print(f"Task: {example.raw_question}")
|
106 |
+
print(f"Solution:\n{example.answer}")
|
107 |
+
print(f"Test Cases:\n{example.test_list}")
|
llmdataparser/mmlu_parser.py
CHANGED
@@ -339,7 +339,8 @@ class MMLUProDatasetParser(HuggingFaceDatasetParser[MMLUProParseEntry]):
|
|
339 |
|
340 |
_data_source = "TIGER-Lab/MMLU-Pro"
|
341 |
_default_task = "default"
|
342 |
-
_task_names = [
|
|
|
343 |
"math",
|
344 |
"physics",
|
345 |
"chemistry",
|
|
|
339 |
|
340 |
_data_source = "TIGER-Lab/MMLU-Pro"
|
341 |
_default_task = "default"
|
342 |
+
_task_names = ["default"]
|
343 |
+
_hidden_task_names = [
|
344 |
"math",
|
345 |
"physics",
|
346 |
"chemistry",
|
llmdataparser/prompts.py
CHANGED
@@ -121,3 +121,20 @@ BBH_SYSTEM_PROMPT: Final[str] = textwrap.dedent(
|
|
121 |
6. Respond with ONLY the letter (A, B, C, etc.) or "True"/"False" or "Yes"/"No" - no explanations or additional text
|
122 |
"""
|
123 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
121 |
6. Respond with ONLY the letter (A, B, C, etc.) or "True"/"False" or "Yes"/"No" - no explanations or additional text
|
122 |
"""
|
123 |
)
|
124 |
+
|
125 |
+
MBPP_SYSTEM_PROMPT: Final[str] = textwrap.dedent(
|
126 |
+
"""\
|
127 |
+
You are an expert Python programmer tasked with solving basic programming problems. Your goal is to write clean, efficient, and well-tested Python code that solves the given task.
|
128 |
+
|
129 |
+
Instructions:
|
130 |
+
1. Read the task description carefully
|
131 |
+
2. Write a complete Python solution that solves the problem
|
132 |
+
3. Follow Python best practices and PEP 8 style guidelines
|
133 |
+
4. Write clear, readable code with descriptive variable names
|
134 |
+
5. Handle edge cases and input validation appropriately
|
135 |
+
6. Include docstrings or comments to explain complex logic
|
136 |
+
7. Focus on fundamental programming concepts and standard library usage
|
137 |
+
8. Optimize for readability and maintainability
|
138 |
+
9. Return only the implementation code, no additional text
|
139 |
+
"""
|
140 |
+
)
|
tests/test_mbpp_parser.py
ADDED
@@ -0,0 +1,154 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pytest
|
2 |
+
|
3 |
+
from llmdataparser.mbpp_parser import MBPPDatasetParser, MBPPParseEntry
|
4 |
+
|
5 |
+
|
6 |
+
@pytest.fixture
|
7 |
+
def sample_entry():
|
8 |
+
return {
|
9 |
+
"text": "Write a function to find the sum of numbers in a list.",
|
10 |
+
"code": "def sum_list(lst):\n return sum(lst)",
|
11 |
+
"task_id": 42,
|
12 |
+
"test_list": ["assert sum_list([1, 2, 3]) == 6"],
|
13 |
+
"test_setup_code": "",
|
14 |
+
"challenge_test_list": ["assert sum_list([4, 5, 6]) == 15"],
|
15 |
+
}
|
16 |
+
|
17 |
+
|
18 |
+
@pytest.fixture
|
19 |
+
def parser():
|
20 |
+
return MBPPDatasetParser()
|
21 |
+
|
22 |
+
|
23 |
+
def test_mbpp_parse_entry_creation():
|
24 |
+
"""Test creation of MBPPParseEntry"""
|
25 |
+
entry = MBPPParseEntry.create(
|
26 |
+
prompt="test prompt",
|
27 |
+
answer="test answer",
|
28 |
+
raw_question="raw question",
|
29 |
+
task_id=42,
|
30 |
+
test_list=["test1", "test2"],
|
31 |
+
test_setup_code="setup code",
|
32 |
+
challenge_test_list=["challenge1"],
|
33 |
+
task_name="full",
|
34 |
+
source_file="test.pdf",
|
35 |
+
)
|
36 |
+
|
37 |
+
assert entry.prompt == "test prompt"
|
38 |
+
assert entry.answer == "test answer"
|
39 |
+
assert entry.raw_question == "raw question"
|
40 |
+
assert entry.raw_answer == "test answer"
|
41 |
+
assert entry.task_id == 42
|
42 |
+
assert entry.test_list == ["test1", "test2"]
|
43 |
+
assert entry.test_setup_code == "setup code"
|
44 |
+
assert entry.challenge_test_list == ["challenge1"]
|
45 |
+
assert entry.task_name == "full"
|
46 |
+
|
47 |
+
|
48 |
+
def test_mbpp_parse_entry_validation():
|
49 |
+
"""Test validation of required fields"""
|
50 |
+
with pytest.raises(ValueError, match="Task ID must be an integer"):
|
51 |
+
MBPPParseEntry.create(
|
52 |
+
prompt="test",
|
53 |
+
answer="test",
|
54 |
+
raw_question="test",
|
55 |
+
task_id="not_an_int", # Invalid task_id type
|
56 |
+
test_list=[],
|
57 |
+
test_setup_code="",
|
58 |
+
challenge_test_list=[],
|
59 |
+
task_name="full",
|
60 |
+
source_file="test.pdf",
|
61 |
+
)
|
62 |
+
|
63 |
+
|
64 |
+
def test_process_entry(parser, sample_entry):
|
65 |
+
"""Test processing of a single entry"""
|
66 |
+
result = parser.process_entry(sample_entry, task_name="full")
|
67 |
+
|
68 |
+
assert isinstance(result, MBPPParseEntry)
|
69 |
+
assert result.task_id == 42
|
70 |
+
assert result.raw_question == sample_entry["text"]
|
71 |
+
assert result.answer == sample_entry["code"]
|
72 |
+
assert result.test_list == sample_entry["test_list"]
|
73 |
+
assert result.challenge_test_list == sample_entry["challenge_test_list"]
|
74 |
+
expected_prompt = f"{parser._system_prompt}\n\nTask: {sample_entry['text']}"
|
75 |
+
assert result.prompt == expected_prompt
|
76 |
+
assert result.task_name == "full"
|
77 |
+
|
78 |
+
|
79 |
+
def test_parser_initialization(parser):
|
80 |
+
"""Test parser initialization and properties"""
|
81 |
+
assert parser._data_source == "google-research-datasets/mbpp"
|
82 |
+
assert parser._default_task == "full"
|
83 |
+
assert parser._task_names == ["full", "sanitized"]
|
84 |
+
assert (
|
85 |
+
parser.get_huggingface_link
|
86 |
+
== "https://huggingface.co/datasets/google-research-datasets/mbpp"
|
87 |
+
)
|
88 |
+
|
89 |
+
|
90 |
+
@pytest.mark.integration
|
91 |
+
@pytest.mark.skip(reason="Requires access to HuggingFace MBPP dataset")
|
92 |
+
def test_parser_load_and_parse(parser):
|
93 |
+
"""Integration test for loading and parsing data"""
|
94 |
+
parser.load(split="train")
|
95 |
+
parser.parse(force=True)
|
96 |
+
parsed_data = parser.get_parsed_data
|
97 |
+
|
98 |
+
assert len(parsed_data) > 0
|
99 |
+
assert all(isinstance(entry, MBPPParseEntry) for entry in parsed_data)
|
100 |
+
|
101 |
+
|
102 |
+
def test_get_current_task(parser, sample_entry):
|
103 |
+
"""Test _get_current_task method"""
|
104 |
+
task = parser._get_current_task(sample_entry)
|
105 |
+
assert task == parser._default_task
|
106 |
+
|
107 |
+
|
108 |
+
@pytest.mark.parametrize("task_name", ["full", "sanitized"])
|
109 |
+
@pytest.mark.skip(reason="Requires access to HuggingFace MBPP dataset")
|
110 |
+
def test_different_tasks_loading(parser, task_name):
|
111 |
+
"""Test loading different tasks of the dataset"""
|
112 |
+
parser.load(task_name=task_name, split="train")
|
113 |
+
assert parser._current_task == task_name
|
114 |
+
|
115 |
+
|
116 |
+
def test_parser_string_representation(parser):
|
117 |
+
"""Test string representation of parser"""
|
118 |
+
repr_str = str(parser)
|
119 |
+
assert "MBPPDatasetParser" in repr_str
|
120 |
+
assert "google-research-datasets/mbpp" in repr_str
|
121 |
+
assert "not loaded" in repr_str
|
122 |
+
|
123 |
+
|
124 |
+
def test_parse_without_loaded_data(parser):
|
125 |
+
"""Test parsing without loading data first"""
|
126 |
+
with pytest.raises(
|
127 |
+
ValueError, match="No data loaded. Please load the dataset first"
|
128 |
+
):
|
129 |
+
parser.parse()
|
130 |
+
|
131 |
+
|
132 |
+
@pytest.mark.integration
|
133 |
+
@pytest.mark.skip(reason="Requires access to HuggingFace MBPP dataset")
|
134 |
+
def test_full_workflow_with_different_splits(parser):
|
135 |
+
"""Test the complete workflow with different splits"""
|
136 |
+
parser.load(split="train")
|
137 |
+
parser.parse(force=True)
|
138 |
+
train_data = parser.get_parsed_data
|
139 |
+
|
140 |
+
assert len(train_data) > 0
|
141 |
+
assert all(isinstance(entry, MBPPParseEntry) for entry in train_data)
|
142 |
+
assert all(entry.task_name == "full" for entry in train_data)
|
143 |
+
|
144 |
+
|
145 |
+
def test_custom_system_prompt():
|
146 |
+
"""Test parser initialization with custom system prompt"""
|
147 |
+
custom_prompt = "Custom system prompt"
|
148 |
+
parser = MBPPDatasetParser(system_prompt=custom_prompt)
|
149 |
+
assert parser._system_prompt == custom_prompt
|
150 |
+
|
151 |
+
|
152 |
+
def test_default_system_prompt(parser):
|
153 |
+
"""Test parser uses default system prompt when none provided"""
|
154 |
+
assert parser._system_prompt == parser._default_system_prompt
|
tests/test_mmlu_parser.py
CHANGED
@@ -112,7 +112,7 @@ def test_process_entry_base(base_parser, sample_mmlu_entries):
|
|
112 |
assert "D. Madrid" in entry.prompt
|
113 |
assert entry.raw_question == "What is the capital of France?"
|
114 |
assert entry.raw_choices == ["London", "Paris", "Berlin", "Madrid"]
|
115 |
-
assert entry.raw_answer == 1
|
116 |
assert entry.task_name == "geography"
|
117 |
|
118 |
|
@@ -169,7 +169,7 @@ def test_tmmlu_process_entry(tmmlu_parser):
|
|
169 |
("base_parser", 57, "cais/mmlu"),
|
170 |
("redux_parser", 30, "edinburgh-dawg/mmlu-redux"),
|
171 |
("tmmlu_parser", 66, "ikala/tmmluplus"),
|
172 |
-
("mmlu_pro_parser",
|
173 |
],
|
174 |
)
|
175 |
def test_parser_initialization(
|
|
|
112 |
assert "D. Madrid" in entry.prompt
|
113 |
assert entry.raw_question == "What is the capital of France?"
|
114 |
assert entry.raw_choices == ["London", "Paris", "Berlin", "Madrid"]
|
115 |
+
assert entry.raw_answer == "1"
|
116 |
assert entry.task_name == "geography"
|
117 |
|
118 |
|
|
|
169 |
("base_parser", 57, "cais/mmlu"),
|
170 |
("redux_parser", 30, "edinburgh-dawg/mmlu-redux"),
|
171 |
("tmmlu_parser", 66, "ikala/tmmluplus"),
|
172 |
+
("mmlu_pro_parser", 1, "TIGER-Lab/MMLU-Pro"),
|
173 |
],
|
174 |
)
|
175 |
def test_parser_initialization(
|