Spaces:

JeffYang52415
/

LLMEval-Dataset-Parser

Running

App Files Files Community

JeffYang52415 commited on Dec 29, 2024

Commit

dd0b07f

unverified ·

1 Parent(s): 18bf871

feat: add tw_legal parser

Browse files

Files changed (4) hide show

llmdataparser/__init__.py +2 -0
llmdataparser/prompts.py +13 -0
llmdataparser/tw_legal_parser.py +92 -0
tests/test_tw_legal_parser.py +140 -0

llmdataparser/__init__.py CHANGED Viewed

@@ -15,6 +15,7 @@ from .mmlu_parser import (
     MMLUReduxDatasetParser,
     TMMLUPlusDatasetParser,
 )
 class ParserRegistry:
@@ -54,3 +55,4 @@ ParserRegistry.register_parser("humanevalplus", HumanEvalDatasetPlusParser)
 ParserRegistry.register_parser("bbh", BBHDatasetParser)
 ParserRegistry.register_parser("mbpp", MBPPDatasetParser)
 ParserRegistry.register_parser("ifeval", IFEvalDatasetParser)

     MMLUReduxDatasetParser,
     TMMLUPlusDatasetParser,
 )
+from .tw_legal_parser import TWLegalDatasetParser
 class ParserRegistry:
 ParserRegistry.register_parser("bbh", BBHDatasetParser)
 ParserRegistry.register_parser("mbpp", MBPPDatasetParser)
 ParserRegistry.register_parser("ifeval", IFEvalDatasetParser)
+ParserRegistry.register_parser("twlegal", TWLegalDatasetParser)

llmdataparser/prompts.py CHANGED Viewed

@@ -138,3 +138,16 @@ MBPP_SYSTEM_PROMPT: Final[str] = textwrap.dedent(
     9. Return only the implementation code, no additional text
 """
 )

     9. Return only the implementation code, no additional text
 """
 )
+TW_LEGAL_SYSTEM_PROMPT: Final[str] = textwrap.dedent(
+    """\
+    You are an expert lawyer with deep knowledge of Taiwan's legal system. You are taking the Taiwan Bar Examination (臺灣律師資格考試). For each question, you will analyze legal scenarios or concepts based on Taiwan's laws and regulations. Your task is to select the most appropriate answer that aligns with Taiwan's legal principles.
+    Instructions:
+    1. Carefully analyze the legal question and all options
+    2. Consider Taiwan's specific legal context and terminology
+    3. Apply relevant laws, regulations, and legal principles
+    4. Select the single most accurate answer
+    5. Respond with ONLY the letter (A, B, C, or D) - no explanations or additional text
+"""
+)

llmdataparser/tw_legal_parser.py ADDED Viewed

	@@ -0,0 +1,92 @@

+from dataclasses import dataclass
+from typing import Any, Final
+from llmdataparser.base_parser import HuggingFaceDatasetParser, HuggingFaceParseEntry
+from llmdataparser.prompts import TW_LEGAL_SYSTEM_PROMPT
+TW_LEGAL_VALID_ANSWERS: Final[set[str]] = {"A", "B", "C", "D"}
+TW_LEGAL_VALID_ANSWER_STR: Final[str] = ", ".join(sorted(TW_LEGAL_VALID_ANSWERS))
+@dataclass(frozen=True, kw_only=True, slots=True)
+class TWLegalParseEntry(HuggingFaceParseEntry):
+    """Custom entry class for Taiwan Legal Benchmark, with fields specific to this dataset parser."""
+    raw_choices: list[str]
+    @classmethod
+    def create(
+        cls,
+        prompt: str,
+        answer: str,
+        raw_question: str,
+        raw_choices: list[str],
+        raw_answer: str,
+        task_name: str,
+    ) -> "TWLegalParseEntry":
+        if answer not in TW_LEGAL_VALID_ANSWERS:
+            raise ValueError(
+                f"Invalid answer_letter '{answer}'; must be one of {TW_LEGAL_VALID_ANSWER_STR}"
+            )
+        return cls(
+            prompt=prompt,
+            answer=answer,
+            raw_question=raw_question,
+            raw_answer=raw_answer,
+            raw_choices=raw_choices,
+            task_name=task_name,
+        )
+class TWLegalDatasetParser(HuggingFaceDatasetParser[TWLegalParseEntry]):
+    """Parser for the Taiwan Legal Benchmark dataset."""
+    _data_source = "lianghsun/tw-legal-benchmark-v1"
+    _default_task = "default"
+    _task_names = ["default"]
+    _default_system_prompt = TW_LEGAL_SYSTEM_PROMPT
+    def process_entry(
+        self, row: dict[str, Any], task_name: str | None = None, **kwargs: Any
+    ) -> TWLegalParseEntry:
+        """Process a single Taiwan Legal Benchmark entry."""
+        # Extract choices in order
+        task = task_name or self._get_current_task(row)
+        raw_choices = [row["A"], row["B"], row["C"], row["D"]]
+        choices = "\n".join(
+            f"{chr(65 + i)}. {choice}" for i, choice in enumerate(raw_choices)
+        )
+        raw_question = row["question"]
+        raw_answer = row["answer"]
+        prompt = f"{self._system_prompt}\nQuestion: {raw_question}\n{choices}\nAnswer:"
+        return TWLegalParseEntry.create(
+            prompt=prompt,
+            answer=raw_answer,
+            raw_question=raw_question,
+            raw_choices=raw_choices,
+            raw_answer=raw_answer,
+            task_name=task,
+        )
+if __name__ == "__main__":
+    # Example usage
+    parser = TWLegalDatasetParser()
+    parser.load()
+    parser.parse()
+    # Get parsed data with correct type
+    parsed_data = parser.get_parsed_data
+    # Print example entry
+    if parsed_data:
+        example = parsed_data[0]
+        print("\nExample parsed entry:")
+        print(f"Question: {example.raw_question}")
+        print("Choices:")
+        for i, choice in enumerate(example.raw_choices):
+            print(f"{chr(65 + i)}. {choice}")
+        print(f"Correct Answer: {example.answer}")
+        print(f"Task Name: {example.task_name}")

tests/test_tw_legal_parser.py ADDED Viewed

	@@ -0,0 +1,140 @@

+import pytest
+from llmdataparser.tw_legal_parser import TWLegalDatasetParser, TWLegalParseEntry
+@pytest.fixture
+def tw_legal_parser():
+    """Create a Taiwan Legal parser instance for testing."""
+    return TWLegalDatasetParser()
+@pytest.fixture
+def sample_tw_legal_entries():
+    """Create sample Taiwan Legal dataset entries for testing."""
+    return [
+        {
+            "question": "依民法規定，下列關於法人之敘述，何者錯誤？",
+            "A": "法人於法令限制內，有享受權利負擔義務之能力",
+            "B": "法人因目的之達到而消滅",
+            "C": "法人非依法律之規定，不得成立",
+            "D": "法人於登記前，即取得權利能力",
+            "answer": "D",
+        },
+        {
+            "question": "關於刑法第321條第1項第4款之結夥三人以上而犯竊盜罪，下列敘述何者正確？",
+            "A": "須行為人主觀上有結夥犯竊盜之認識",
+            "B": "三人以上當場在場實施竊盜行為始足當之",
+            "C": "三人以上已達成犯意聯絡即可成立",
+            "D": "三人以上須全部在現場實施竊盜行為",
+            "answer": "A",
+        },
+    ]
+def test_tw_legal_parse_entry_creation_valid():
+    """Test valid creation of TWLegalParseEntry."""
+    entry = TWLegalParseEntry.create(
+        prompt="Test prompt",
+        answer="A",
+        raw_question="Test question",
+        raw_choices=["choice1", "choice2", "choice3", "choice4"],
+        raw_answer="A",
+        task_name="default",
+    )
+    assert isinstance(entry, TWLegalParseEntry)
+    assert entry.prompt == "Test prompt"
+    assert entry.answer == "A"
+    assert entry.raw_choices == ["choice1", "choice2", "choice3", "choice4"]
+@pytest.mark.parametrize("invalid_answer", ["E", "F", "1", "", None])
+def test_tw_legal_parse_entry_creation_invalid(invalid_answer):
+    """Test invalid answer handling in TWLegalParseEntry creation."""
+    with pytest.raises(
+        ValueError, match="Invalid answer_letter.*must be one of A, B, C, D"
+    ):
+        TWLegalParseEntry.create(
+            prompt="Test prompt",
+            answer=invalid_answer,
+            raw_question="Test question",
+            raw_choices=["choice1", "choice2", "choice3", "choice4"],
+            raw_answer=invalid_answer,
+            task_name="default",
+        )
+def test_process_entry(tw_legal_parser, sample_tw_legal_entries):
+    """Test processing entries in Taiwan Legal parser."""
+    entry = tw_legal_parser.process_entry(sample_tw_legal_entries[0])
+    assert isinstance(entry, TWLegalParseEntry)
+    assert entry.answer == "D"
+    assert "A. 法人於法令限制內，有享受權利負擔義務之能力" in entry.prompt
+    assert "B. 法人因目的之達到而消滅" in entry.prompt
+    assert "C. 法人非依法律之規定，不得成立" in entry.prompt
+    assert "D. 法人於登記前，即取得權利能力" in entry.prompt
+    assert entry.raw_question == "依民法規定，下列關於法人之敘述，何者錯誤？"
+    assert len(entry.raw_choices) == 4
+def test_tw_legal_parser_initialization(tw_legal_parser):
+    """Test Taiwan Legal parser initialization and properties."""
+    assert isinstance(tw_legal_parser.task_names, list)
+    assert len(tw_legal_parser.task_names) == 1  # Only default task
+    assert tw_legal_parser._data_source == "lianghsun/tw-legal-benchmark-v1"
+    assert tw_legal_parser._default_task == "default"
+    assert (
+        tw_legal_parser.get_huggingface_link
+        == "https://huggingface.co/datasets/lianghsun/tw-legal-benchmark-v1"
+    )
+@pytest.mark.integration
+def test_load_dataset(tw_legal_parser):
+    """Test loading the Taiwan Legal dataset."""
+    tw_legal_parser.load(split="train")
+    assert tw_legal_parser.raw_data is not None
+    assert tw_legal_parser.split_names == ["train"]
+    assert tw_legal_parser._current_task == "default"
+def test_parser_string_representation(tw_legal_parser):
+    """Test string representation of Taiwan Legal parser."""
+    repr_str = str(tw_legal_parser)
+    assert "TWLegalDatasetParser" in repr_str
+    assert "lianghsun/tw-legal-benchmark-v1" in repr_str
+    assert "not loaded" in repr_str
+@pytest.mark.integration
+def test_data_parsing(tw_legal_parser):
+    """Test parsing the dataset."""
+    # Load and parse train split
+    tw_legal_parser.load(split="train")
+    tw_legal_parser.parse(split_names="train", force=True)
+    train_count = len(tw_legal_parser.get_parsed_data)
+    assert train_count > 0
+    # Additional assertions about the parsed data
+    parsed_data = tw_legal_parser.get_parsed_data
+    assert all(isinstance(entry, TWLegalParseEntry) for entry in parsed_data)
+    assert all(entry.answer in {"A", "B", "C", "D"} for entry in parsed_data)
+def test_system_prompt_override(tw_legal_parser):
+    """Test overriding the default system prompt."""
+    custom_prompt = "Custom system prompt for testing"
+    parser = TWLegalDatasetParser(system_prompt=custom_prompt)
+    test_entry = {
+        "question": "Test question",
+        "A": "Choice A",
+        "B": "Choice B",
+        "C": "Choice C",
+        "D": "Choice D",
+        "answer": "A",
+    }
+    entry = parser.process_entry(test_entry)
+    assert custom_prompt in entry.prompt