Spaces:

JeffYang52415
/

LLMEval-Dataset-Parser

Running

App Files Files Community

JeffYang52415 commited on Dec 28, 2024

Commit

18bf871

unverified ·

1 Parent(s): cddf75e

feat: add mbpp parser

Browse files

Files changed (7) hide show

llmdataparser/__init__.py +24 -1
llmdataparser/base_parser.py +2 -0
llmdataparser/mbpp_parser.py +107 -0
llmdataparser/mmlu_parser.py +2 -1
llmdataparser/prompts.py +17 -0
tests/test_mbpp_parser.py +154 -0
tests/test_mmlu_parser.py +2 -2

llmdataparser/__init__.py CHANGED Viewed

@@ -2,7 +2,19 @@
 from typing import Type
 from .base_parser import DatasetParser
-from .mmlu_parser import MMLUDatasetParser
 class ParserRegistry:
@@ -31,3 +43,14 @@ class ParserRegistry:
 # Register parsers
 ParserRegistry.register_parser("mmlu", MMLUDatasetParser)

 from typing import Type
 from .base_parser import DatasetParser
+from .bbh_parser import BBHDatasetParser
+from .gsm8k_parser import GSM8KDatasetParser
+from .humaneval_parser import HumanEvalDatasetParser, HumanEvalDatasetPlusParser
+from .ifeval_parser import IFEvalDatasetParser
+from .math_parser import MATHDatasetParser
+from .mbpp_parser import MBPPDatasetParser
+from .mgsm_parser import MGSMDatasetParser
+from .mmlu_parser import (
+    MMLUDatasetParser,
+    MMLUProDatasetParser,
+    MMLUReduxDatasetParser,
+    TMMLUPlusDatasetParser,
+)
 class ParserRegistry:
 # Register parsers
 ParserRegistry.register_parser("mmlu", MMLUDatasetParser)
+ParserRegistry.register_parser("mmlupro", MMLUProDatasetParser)
+ParserRegistry.register_parser("mmluredux", MMLUReduxDatasetParser)
+ParserRegistry.register_parser("tmmluplus", TMMLUPlusDatasetParser)
+ParserRegistry.register_parser("gsm8k", GSM8KDatasetParser)
+ParserRegistry.register_parser("math", MATHDatasetParser)
+ParserRegistry.register_parser("mgsm", MGSMDatasetParser)
+ParserRegistry.register_parser("humaneval", HumanEvalDatasetParser)
+ParserRegistry.register_parser("humanevalplus", HumanEvalDatasetPlusParser)
+ParserRegistry.register_parser("bbh", BBHDatasetParser)
+ParserRegistry.register_parser("mbpp", MBPPDatasetParser)
+ParserRegistry.register_parser("ifeval", IFEvalDatasetParser)

llmdataparser/base_parser.py CHANGED Viewed

@@ -80,6 +80,8 @@ class HuggingFaceDatasetParser(DatasetParser[T]):
     _default_task: ClassVar[str]
     # _default_system_prompt is the default system prompt to use if no system prompt is specified
     _default_system_prompt: ClassVar[str]
     def __init__(self, system_prompt: str | None = None, **kwargs):
         """

     _default_task: ClassVar[str]
     # _default_system_prompt is the default system prompt to use if no system prompt is specified
     _default_system_prompt: ClassVar[str]
+    # _hidden_task_names is the list of task names that are hidden in the dataset, e.g. ["math", "physics", "chemistry"]
+    _hidden_task_names: ClassVar[list[str]] = []
     def __init__(self, system_prompt: str | None = None, **kwargs):
         """

llmdataparser/mbpp_parser.py ADDED Viewed

	@@ -0,0 +1,107 @@

+from dataclasses import dataclass
+from typing import Any, ClassVar
+from llmdataparser.base_parser import HuggingFaceDatasetParser, HuggingFaceParseEntry
+from llmdataparser.prompts import MBPP_SYSTEM_PROMPT
+@dataclass(frozen=True, kw_only=True, slots=True)
+class MBPPParseEntry(HuggingFaceParseEntry):
+    """Custom entry class for MBPP, with fields specific to this dataset parser."""
+    task_id: int
+    test_list: list[str]
+    test_setup_code: str
+    challenge_test_list: list[str]
+    source_file: str
+    @classmethod
+    def create(
+        cls,
+        prompt: str,
+        answer: str,
+        raw_question: str,
+        task_id: int,
+        test_list: list[str],
+        test_setup_code: str,
+        challenge_test_list: list[str],
+        task_name: str,
+        source_file: str,
+    ) -> "MBPPParseEntry":
+        if not isinstance(task_id, int):
+            raise ValueError("Task ID must be an integer")
+        return cls(
+            prompt=prompt,
+            answer=answer,
+            raw_question=raw_question,
+            raw_answer=answer,  # In MBPP, the code solution is the raw answer
+            task_id=task_id,
+            test_list=test_list,
+            test_setup_code=test_setup_code,
+            challenge_test_list=challenge_test_list,
+            task_name=task_name,
+            source_file=source_file,
+        )
+class MBPPDatasetParser(HuggingFaceDatasetParser[MBPPParseEntry]):
+    """Parser for the MBPP (Mostly Basic Python Programming) dataset."""
+    _data_source: ClassVar[str] = "google-research-datasets/mbpp"
+    _default_task: ClassVar[str] = "full"  # Can be 'full' or 'sanitized'
+    _task_names: ClassVar[list[str]] = ["full", "sanitized"]
+    _default_system_prompt: ClassVar[str] = MBPP_SYSTEM_PROMPT
+    def process_entry(
+        self, row: dict[str, Any], task_name: str | None = None, **kwargs: Any
+    ) -> MBPPParseEntry:
+        """Process a single MBPP entry."""
+        raw_question = row.get("text", row.get("prompt"))
+        answer = row["code"]
+        task_id = row["task_id"]
+        test_list = row["test_list"]
+        test_setup_code = row.get("test_setup_code", "")
+        challenge_test_list = row.get("challenge_test_list", [])
+        # Combine system prompt with the task description
+        prompt = f"{self._system_prompt}\n\nTask: {raw_question}"
+        # Use task_name if provided, otherwise use default
+        task = task_name or self._get_current_task(row)
+        source_file = row.get("source_file", "")
+        return MBPPParseEntry.create(
+            prompt=prompt,
+            answer=answer,
+            raw_question=raw_question,
+            task_id=task_id,
+            test_list=test_list,
+            test_setup_code=test_setup_code,
+            challenge_test_list=challenge_test_list,
+            task_name=task,
+            source_file=source_file,
+        )
+if __name__ == "__main__":
+    # Example usage
+    parser = MBPPDatasetParser()
+    # Load the dataset
+    parser.load()
+    # Parse all splits
+    parser.parse()
+    # Get parsed data
+    parsed_data = parser.get_parsed_data
+    # Print example entry
+    if parsed_data:
+        example = parsed_data[0]
+        print("\nExample parsed entry:")
+        print(f"Task ID: {example.task_id}")
+        print(f"Task: {example.raw_question}")
+        print(f"Solution:\n{example.answer}")
+        print(f"Test Cases:\n{example.test_list}")

llmdataparser/mmlu_parser.py CHANGED Viewed

@@ -339,7 +339,8 @@ class MMLUProDatasetParser(HuggingFaceDatasetParser[MMLUProParseEntry]):
     _data_source = "TIGER-Lab/MMLU-Pro"
     _default_task = "default"
-    _task_names = [
         "math",
         "physics",
         "chemistry",

     _data_source = "TIGER-Lab/MMLU-Pro"
     _default_task = "default"
+    _task_names = ["default"]
+    _hidden_task_names = [
         "math",
         "physics",
         "chemistry",

llmdataparser/prompts.py CHANGED Viewed

@@ -121,3 +121,20 @@ BBH_SYSTEM_PROMPT: Final[str] = textwrap.dedent(
     6. Respond with ONLY the letter (A, B, C, etc.) or "True"/"False" or "Yes"/"No" - no explanations or additional text
 """
 )

     6. Respond with ONLY the letter (A, B, C, etc.) or "True"/"False" or "Yes"/"No" - no explanations or additional text
 """
 )
+MBPP_SYSTEM_PROMPT: Final[str] = textwrap.dedent(
+    """\
+    You are an expert Python programmer tasked with solving basic programming problems. Your goal is to write clean, efficient, and well-tested Python code that solves the given task.
+    Instructions:
+    1. Read the task description carefully
+    2. Write a complete Python solution that solves the problem
+    3. Follow Python best practices and PEP 8 style guidelines
+    4. Write clear, readable code with descriptive variable names
+    5. Handle edge cases and input validation appropriately
+    6. Include docstrings or comments to explain complex logic
+    7. Focus on fundamental programming concepts and standard library usage
+    8. Optimize for readability and maintainability
+    9. Return only the implementation code, no additional text
+"""
+)

tests/test_mbpp_parser.py ADDED Viewed

	@@ -0,0 +1,154 @@

+import pytest
+from llmdataparser.mbpp_parser import MBPPDatasetParser, MBPPParseEntry
+@pytest.fixture
+def sample_entry():
+    return {
+        "text": "Write a function to find the sum of numbers in a list.",
+        "code": "def sum_list(lst):\n    return sum(lst)",
+        "task_id": 42,
+        "test_list": ["assert sum_list([1, 2, 3]) == 6"],
+        "test_setup_code": "",
+        "challenge_test_list": ["assert sum_list([4, 5, 6]) == 15"],
+    }
+@pytest.fixture
+def parser():
+    return MBPPDatasetParser()
+def test_mbpp_parse_entry_creation():
+    """Test creation of MBPPParseEntry"""
+    entry = MBPPParseEntry.create(
+        prompt="test prompt",
+        answer="test answer",
+        raw_question="raw question",
+        task_id=42,
+        test_list=["test1", "test2"],
+        test_setup_code="setup code",
+        challenge_test_list=["challenge1"],
+        task_name="full",
+        source_file="test.pdf",
+    )
+    assert entry.prompt == "test prompt"
+    assert entry.answer == "test answer"
+    assert entry.raw_question == "raw question"
+    assert entry.raw_answer == "test answer"
+    assert entry.task_id == 42
+    assert entry.test_list == ["test1", "test2"]
+    assert entry.test_setup_code == "setup code"
+    assert entry.challenge_test_list == ["challenge1"]
+    assert entry.task_name == "full"
+def test_mbpp_parse_entry_validation():
+    """Test validation of required fields"""
+    with pytest.raises(ValueError, match="Task ID must be an integer"):
+        MBPPParseEntry.create(
+            prompt="test",
+            answer="test",
+            raw_question="test",
+            task_id="not_an_int",  # Invalid task_id type
+            test_list=[],
+            test_setup_code="",
+            challenge_test_list=[],
+            task_name="full",
+            source_file="test.pdf",
+        )
+def test_process_entry(parser, sample_entry):
+    """Test processing of a single entry"""
+    result = parser.process_entry(sample_entry, task_name="full")
+    assert isinstance(result, MBPPParseEntry)
+    assert result.task_id == 42
+    assert result.raw_question == sample_entry["text"]
+    assert result.answer == sample_entry["code"]
+    assert result.test_list == sample_entry["test_list"]
+    assert result.challenge_test_list == sample_entry["challenge_test_list"]
+    expected_prompt = f"{parser._system_prompt}\n\nTask: {sample_entry['text']}"
+    assert result.prompt == expected_prompt
+    assert result.task_name == "full"
+def test_parser_initialization(parser):
+    """Test parser initialization and properties"""
+    assert parser._data_source == "google-research-datasets/mbpp"
+    assert parser._default_task == "full"
+    assert parser._task_names == ["full", "sanitized"]
+    assert (
+        parser.get_huggingface_link
+        == "https://huggingface.co/datasets/google-research-datasets/mbpp"
+    )
+@pytest.mark.integration
+@pytest.mark.skip(reason="Requires access to HuggingFace MBPP dataset")
+def test_parser_load_and_parse(parser):
+    """Integration test for loading and parsing data"""
+    parser.load(split="train")
+    parser.parse(force=True)
+    parsed_data = parser.get_parsed_data
+    assert len(parsed_data) > 0
+    assert all(isinstance(entry, MBPPParseEntry) for entry in parsed_data)
+def test_get_current_task(parser, sample_entry):
+    """Test _get_current_task method"""
+    task = parser._get_current_task(sample_entry)
+    assert task == parser._default_task
+@pytest.mark.parametrize("task_name", ["full", "sanitized"])
+@pytest.mark.skip(reason="Requires access to HuggingFace MBPP dataset")
+def test_different_tasks_loading(parser, task_name):
+    """Test loading different tasks of the dataset"""
+    parser.load(task_name=task_name, split="train")
+    assert parser._current_task == task_name
+def test_parser_string_representation(parser):
+    """Test string representation of parser"""
+    repr_str = str(parser)
+    assert "MBPPDatasetParser" in repr_str
+    assert "google-research-datasets/mbpp" in repr_str
+    assert "not loaded" in repr_str
+def test_parse_without_loaded_data(parser):
+    """Test parsing without loading data first"""
+    with pytest.raises(
+        ValueError, match="No data loaded. Please load the dataset first"
+    ):
+        parser.parse()
+@pytest.mark.integration
+@pytest.mark.skip(reason="Requires access to HuggingFace MBPP dataset")
+def test_full_workflow_with_different_splits(parser):
+    """Test the complete workflow with different splits"""
+    parser.load(split="train")
+    parser.parse(force=True)
+    train_data = parser.get_parsed_data
+    assert len(train_data) > 0
+    assert all(isinstance(entry, MBPPParseEntry) for entry in train_data)
+    assert all(entry.task_name == "full" for entry in train_data)
+def test_custom_system_prompt():
+    """Test parser initialization with custom system prompt"""
+    custom_prompt = "Custom system prompt"
+    parser = MBPPDatasetParser(system_prompt=custom_prompt)
+    assert parser._system_prompt == custom_prompt
+def test_default_system_prompt(parser):
+    """Test parser uses default system prompt when none provided"""
+    assert parser._system_prompt == parser._default_system_prompt

tests/test_mmlu_parser.py CHANGED Viewed

@@ -112,7 +112,7 @@ def test_process_entry_base(base_parser, sample_mmlu_entries):
     assert "D. Madrid" in entry.prompt
     assert entry.raw_question == "What is the capital of France?"
     assert entry.raw_choices == ["London", "Paris", "Berlin", "Madrid"]
-    assert entry.raw_answer == 1
     assert entry.task_name == "geography"
@@ -169,7 +169,7 @@ def test_tmmlu_process_entry(tmmlu_parser):
         ("base_parser", 57, "cais/mmlu"),
         ("redux_parser", 30, "edinburgh-dawg/mmlu-redux"),
         ("tmmlu_parser", 66, "ikala/tmmluplus"),
-        ("mmlu_pro_parser", 14, "TIGER-Lab/MMLU-Pro"),
     ],
 )
 def test_parser_initialization(

     assert "D. Madrid" in entry.prompt
     assert entry.raw_question == "What is the capital of France?"
     assert entry.raw_choices == ["London", "Paris", "Berlin", "Madrid"]
+    assert entry.raw_answer == "1"
     assert entry.task_name == "geography"
         ("base_parser", 57, "cais/mmlu"),
         ("redux_parser", 30, "edinburgh-dawg/mmlu-redux"),
         ("tmmlu_parser", 66, "ikala/tmmluplus"),
+        ("mmlu_pro_parser", 1, "TIGER-Lab/MMLU-Pro"),
     ],
 )
 def test_parser_initialization(