JeffYang52415 commited on
Commit
dd0b07f
·
unverified ·
1 Parent(s): 18bf871

feat: add tw_legal parser

Browse files
llmdataparser/__init__.py CHANGED
@@ -15,6 +15,7 @@ from .mmlu_parser import (
15
  MMLUReduxDatasetParser,
16
  TMMLUPlusDatasetParser,
17
  )
 
18
 
19
 
20
  class ParserRegistry:
@@ -54,3 +55,4 @@ ParserRegistry.register_parser("humanevalplus", HumanEvalDatasetPlusParser)
54
  ParserRegistry.register_parser("bbh", BBHDatasetParser)
55
  ParserRegistry.register_parser("mbpp", MBPPDatasetParser)
56
  ParserRegistry.register_parser("ifeval", IFEvalDatasetParser)
 
 
15
  MMLUReduxDatasetParser,
16
  TMMLUPlusDatasetParser,
17
  )
18
+ from .tw_legal_parser import TWLegalDatasetParser
19
 
20
 
21
  class ParserRegistry:
 
55
  ParserRegistry.register_parser("bbh", BBHDatasetParser)
56
  ParserRegistry.register_parser("mbpp", MBPPDatasetParser)
57
  ParserRegistry.register_parser("ifeval", IFEvalDatasetParser)
58
+ ParserRegistry.register_parser("twlegal", TWLegalDatasetParser)
llmdataparser/prompts.py CHANGED
@@ -138,3 +138,16 @@ MBPP_SYSTEM_PROMPT: Final[str] = textwrap.dedent(
138
  9. Return only the implementation code, no additional text
139
  """
140
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
138
  9. Return only the implementation code, no additional text
139
  """
140
  )
141
+
142
+ TW_LEGAL_SYSTEM_PROMPT: Final[str] = textwrap.dedent(
143
+ """\
144
+ You are an expert lawyer with deep knowledge of Taiwan's legal system. You are taking the Taiwan Bar Examination (臺灣律師資格考試). For each question, you will analyze legal scenarios or concepts based on Taiwan's laws and regulations. Your task is to select the most appropriate answer that aligns with Taiwan's legal principles.
145
+
146
+ Instructions:
147
+ 1. Carefully analyze the legal question and all options
148
+ 2. Consider Taiwan's specific legal context and terminology
149
+ 3. Apply relevant laws, regulations, and legal principles
150
+ 4. Select the single most accurate answer
151
+ 5. Respond with ONLY the letter (A, B, C, or D) - no explanations or additional text
152
+ """
153
+ )
llmdataparser/tw_legal_parser.py ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from dataclasses import dataclass
2
+ from typing import Any, Final
3
+
4
+ from llmdataparser.base_parser import HuggingFaceDatasetParser, HuggingFaceParseEntry
5
+ from llmdataparser.prompts import TW_LEGAL_SYSTEM_PROMPT
6
+
7
+ TW_LEGAL_VALID_ANSWERS: Final[set[str]] = {"A", "B", "C", "D"}
8
+ TW_LEGAL_VALID_ANSWER_STR: Final[str] = ", ".join(sorted(TW_LEGAL_VALID_ANSWERS))
9
+
10
+
11
+ @dataclass(frozen=True, kw_only=True, slots=True)
12
+ class TWLegalParseEntry(HuggingFaceParseEntry):
13
+ """Custom entry class for Taiwan Legal Benchmark, with fields specific to this dataset parser."""
14
+
15
+ raw_choices: list[str]
16
+
17
+ @classmethod
18
+ def create(
19
+ cls,
20
+ prompt: str,
21
+ answer: str,
22
+ raw_question: str,
23
+ raw_choices: list[str],
24
+ raw_answer: str,
25
+ task_name: str,
26
+ ) -> "TWLegalParseEntry":
27
+ if answer not in TW_LEGAL_VALID_ANSWERS:
28
+ raise ValueError(
29
+ f"Invalid answer_letter '{answer}'; must be one of {TW_LEGAL_VALID_ANSWER_STR}"
30
+ )
31
+ return cls(
32
+ prompt=prompt,
33
+ answer=answer,
34
+ raw_question=raw_question,
35
+ raw_answer=raw_answer,
36
+ raw_choices=raw_choices,
37
+ task_name=task_name,
38
+ )
39
+
40
+
41
+ class TWLegalDatasetParser(HuggingFaceDatasetParser[TWLegalParseEntry]):
42
+ """Parser for the Taiwan Legal Benchmark dataset."""
43
+
44
+ _data_source = "lianghsun/tw-legal-benchmark-v1"
45
+ _default_task = "default"
46
+ _task_names = ["default"]
47
+ _default_system_prompt = TW_LEGAL_SYSTEM_PROMPT
48
+
49
+ def process_entry(
50
+ self, row: dict[str, Any], task_name: str | None = None, **kwargs: Any
51
+ ) -> TWLegalParseEntry:
52
+ """Process a single Taiwan Legal Benchmark entry."""
53
+ # Extract choices in order
54
+ task = task_name or self._get_current_task(row)
55
+ raw_choices = [row["A"], row["B"], row["C"], row["D"]]
56
+ choices = "\n".join(
57
+ f"{chr(65 + i)}. {choice}" for i, choice in enumerate(raw_choices)
58
+ )
59
+ raw_question = row["question"]
60
+ raw_answer = row["answer"]
61
+
62
+ prompt = f"{self._system_prompt}\nQuestion: {raw_question}\n{choices}\nAnswer:"
63
+
64
+ return TWLegalParseEntry.create(
65
+ prompt=prompt,
66
+ answer=raw_answer,
67
+ raw_question=raw_question,
68
+ raw_choices=raw_choices,
69
+ raw_answer=raw_answer,
70
+ task_name=task,
71
+ )
72
+
73
+
74
+ if __name__ == "__main__":
75
+ # Example usage
76
+ parser = TWLegalDatasetParser()
77
+ parser.load()
78
+ parser.parse()
79
+
80
+ # Get parsed data with correct type
81
+ parsed_data = parser.get_parsed_data
82
+
83
+ # Print example entry
84
+ if parsed_data:
85
+ example = parsed_data[0]
86
+ print("\nExample parsed entry:")
87
+ print(f"Question: {example.raw_question}")
88
+ print("Choices:")
89
+ for i, choice in enumerate(example.raw_choices):
90
+ print(f"{chr(65 + i)}. {choice}")
91
+ print(f"Correct Answer: {example.answer}")
92
+ print(f"Task Name: {example.task_name}")
tests/test_tw_legal_parser.py ADDED
@@ -0,0 +1,140 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pytest
2
+
3
+ from llmdataparser.tw_legal_parser import TWLegalDatasetParser, TWLegalParseEntry
4
+
5
+
6
+ @pytest.fixture
7
+ def tw_legal_parser():
8
+ """Create a Taiwan Legal parser instance for testing."""
9
+ return TWLegalDatasetParser()
10
+
11
+
12
+ @pytest.fixture
13
+ def sample_tw_legal_entries():
14
+ """Create sample Taiwan Legal dataset entries for testing."""
15
+ return [
16
+ {
17
+ "question": "依民法規定,下列關於法人之敘述,何者錯誤?",
18
+ "A": "法人於法令限制內,有享受權利負擔義務之能力",
19
+ "B": "法人因目的之達到而消滅",
20
+ "C": "法人非依法律之規定,不得成立",
21
+ "D": "法人於登記前,即取得權利能力",
22
+ "answer": "D",
23
+ },
24
+ {
25
+ "question": "關於刑法第321條第1項第4款之結夥三人以上而犯竊盜罪,下列敘述何者正確?",
26
+ "A": "須行為人主觀上有結夥犯竊盜之認識",
27
+ "B": "三人以上當場在場實施竊盜行為始足當之",
28
+ "C": "三人以上已達成犯意聯絡即可成立",
29
+ "D": "三人以上須全部在現場實施竊盜行為",
30
+ "answer": "A",
31
+ },
32
+ ]
33
+
34
+
35
+ def test_tw_legal_parse_entry_creation_valid():
36
+ """Test valid creation of TWLegalParseEntry."""
37
+ entry = TWLegalParseEntry.create(
38
+ prompt="Test prompt",
39
+ answer="A",
40
+ raw_question="Test question",
41
+ raw_choices=["choice1", "choice2", "choice3", "choice4"],
42
+ raw_answer="A",
43
+ task_name="default",
44
+ )
45
+ assert isinstance(entry, TWLegalParseEntry)
46
+ assert entry.prompt == "Test prompt"
47
+ assert entry.answer == "A"
48
+ assert entry.raw_choices == ["choice1", "choice2", "choice3", "choice4"]
49
+
50
+
51
+ @pytest.mark.parametrize("invalid_answer", ["E", "F", "1", "", None])
52
+ def test_tw_legal_parse_entry_creation_invalid(invalid_answer):
53
+ """Test invalid answer handling in TWLegalParseEntry creation."""
54
+ with pytest.raises(
55
+ ValueError, match="Invalid answer_letter.*must be one of A, B, C, D"
56
+ ):
57
+ TWLegalParseEntry.create(
58
+ prompt="Test prompt",
59
+ answer=invalid_answer,
60
+ raw_question="Test question",
61
+ raw_choices=["choice1", "choice2", "choice3", "choice4"],
62
+ raw_answer=invalid_answer,
63
+ task_name="default",
64
+ )
65
+
66
+
67
+ def test_process_entry(tw_legal_parser, sample_tw_legal_entries):
68
+ """Test processing entries in Taiwan Legal parser."""
69
+ entry = tw_legal_parser.process_entry(sample_tw_legal_entries[0])
70
+
71
+ assert isinstance(entry, TWLegalParseEntry)
72
+ assert entry.answer == "D"
73
+ assert "A. 法人於法令限制內,有享受權利負擔義務之能力" in entry.prompt
74
+ assert "B. 法人因目的之達到而消滅" in entry.prompt
75
+ assert "C. 法人非依法律之規定,不得成立" in entry.prompt
76
+ assert "D. 法人於登記前,即取得權利能力" in entry.prompt
77
+ assert entry.raw_question == "依民法規定,下列關於法人之敘述,何者錯誤?"
78
+ assert len(entry.raw_choices) == 4
79
+
80
+
81
+ def test_tw_legal_parser_initialization(tw_legal_parser):
82
+ """Test Taiwan Legal parser initialization and properties."""
83
+ assert isinstance(tw_legal_parser.task_names, list)
84
+ assert len(tw_legal_parser.task_names) == 1 # Only default task
85
+ assert tw_legal_parser._data_source == "lianghsun/tw-legal-benchmark-v1"
86
+ assert tw_legal_parser._default_task == "default"
87
+ assert (
88
+ tw_legal_parser.get_huggingface_link
89
+ == "https://huggingface.co/datasets/lianghsun/tw-legal-benchmark-v1"
90
+ )
91
+
92
+
93
+ @pytest.mark.integration
94
+ def test_load_dataset(tw_legal_parser):
95
+ """Test loading the Taiwan Legal dataset."""
96
+ tw_legal_parser.load(split="train")
97
+ assert tw_legal_parser.raw_data is not None
98
+ assert tw_legal_parser.split_names == ["train"]
99
+ assert tw_legal_parser._current_task == "default"
100
+
101
+
102
+ def test_parser_string_representation(tw_legal_parser):
103
+ """Test string representation of Taiwan Legal parser."""
104
+ repr_str = str(tw_legal_parser)
105
+ assert "TWLegalDatasetParser" in repr_str
106
+ assert "lianghsun/tw-legal-benchmark-v1" in repr_str
107
+ assert "not loaded" in repr_str
108
+
109
+
110
+ @pytest.mark.integration
111
+ def test_data_parsing(tw_legal_parser):
112
+ """Test parsing the dataset."""
113
+ # Load and parse train split
114
+ tw_legal_parser.load(split="train")
115
+ tw_legal_parser.parse(split_names="train", force=True)
116
+ train_count = len(tw_legal_parser.get_parsed_data)
117
+
118
+ assert train_count > 0
119
+ # Additional assertions about the parsed data
120
+ parsed_data = tw_legal_parser.get_parsed_data
121
+ assert all(isinstance(entry, TWLegalParseEntry) for entry in parsed_data)
122
+ assert all(entry.answer in {"A", "B", "C", "D"} for entry in parsed_data)
123
+
124
+
125
+ def test_system_prompt_override(tw_legal_parser):
126
+ """Test overriding the default system prompt."""
127
+ custom_prompt = "Custom system prompt for testing"
128
+ parser = TWLegalDatasetParser(system_prompt=custom_prompt)
129
+
130
+ test_entry = {
131
+ "question": "Test question",
132
+ "A": "Choice A",
133
+ "B": "Choice B",
134
+ "C": "Choice C",
135
+ "D": "Choice D",
136
+ "answer": "A",
137
+ }
138
+
139
+ entry = parser.process_entry(test_entry)
140
+ assert custom_prompt in entry.prompt