LLMEval-Dataset-Parser / llmdataparser /humaneval_parser.py
JeffYang52415's picture
feat: add humaneval parser
e9b694b unverified
raw
history blame
4.65 kB
from dataclasses import dataclass
from typing import Any, ClassVar
from llmdataparser.base_parser import HuggingFaceDatasetParser, HuggingFaceParseEntry
from llmdataparser.prompts import HUMANEVAL_SYSTEM_PROMPT
@dataclass(frozen=True, kw_only=True, slots=True)
class HumanEvalParseEntry(HuggingFaceParseEntry):
"""Custom entry class for HumanEval, with fields specific to this dataset parser."""
task_id: str
task_name: str
entry_point: str
test: str
@classmethod
def create(
cls,
prompt: str,
answer: str,
raw_question: str,
task_id: str,
entry_point: str,
test: str,
task_name: str,
) -> "HumanEvalParseEntry":
if not task_id:
raise ValueError("Task ID cannot be empty")
if not entry_point:
raise ValueError("Entry point cannot be empty")
return cls(
prompt=prompt,
answer=answer,
raw_question=raw_question,
raw_answer=answer, # In HumanEval, the canonical solution is the raw answer
task_id=task_id,
entry_point=entry_point,
test=test,
task_name=task_name,
)
class HumanEvalDatasetParser(HuggingFaceDatasetParser[HumanEvalParseEntry]):
"""Parser for the HumanEval dataset."""
_data_source: ClassVar[str] = "openai/openai_humaneval"
_default_task: ClassVar[str] = "openai_humaneval"
_task_names: ClassVar[list[str]] = ["openai_humaneval"]
_default_system_prompt: ClassVar[str] = HUMANEVAL_SYSTEM_PROMPT
def process_entry(
self, row: dict[str, Any], task_name: str | None = None, **kwargs: Any
) -> HumanEvalParseEntry:
"""Process a single HumanEval entry."""
raw_question = row["prompt"]
answer = row["canonical_solution"]
task_id = row["task_id"]
entry_point = row["entry_point"]
test = row["test"]
# Combine system prompt with the function signature and docstring
prompt = f"{self._system_prompt}\n\n{raw_question}"
# Use task_name if provided, otherwise use default
task = task_name or self._get_current_task(row)
return HumanEvalParseEntry.create(
prompt=prompt,
answer=answer,
raw_question=raw_question,
task_id=task_id,
entry_point=entry_point,
test=test,
task_name=task, # Guarantee non-None
)
class HumanEvalDatasetPlusParser(HumanEvalDatasetParser):
"""Parser for the HumanEval dataset."""
_data_source: ClassVar[str] = "evalplus/humanevalplus"
_default_task: ClassVar[str] = "default"
_task_names: ClassVar[list[str]] = ["default"]
_default_system_prompt: ClassVar[str] = HUMANEVAL_SYSTEM_PROMPT
def process_entry(
self, row: dict[str, Any], task_name: str | None = None, **kwargs: Any
) -> HumanEvalParseEntry:
"""Process a single HumanEval entry."""
raw_question = row["prompt"]
answer = row["canonical_solution"]
task_id = row["task_id"]
entry_point = row["entry_point"]
test = row["test"]
# Combine system prompt with the function signature and docstring
prompt = f"{self._system_prompt}\n\n{raw_question}"
# Use task_name if provided, otherwise use default
task = task_name or self._get_current_task(row)
return HumanEvalParseEntry.create(
prompt=prompt,
answer=answer,
raw_question=raw_question,
task_id=task_id,
entry_point=entry_point,
test=test,
task_name=task, # task is guaranteed to be str from _get_current_task
)
if __name__ == "__main__":
# Example usage
parser = HumanEvalDatasetParser()
# Load the dataset
parser.load()
# Parse all splits
parser.parse()
# Get parsed data
parsed_data = parser.get_parsed_data
# Print example entry
if parsed_data:
example = parsed_data[0]
print("\nExample parsed entry:")
print(f"Task ID: {example.task_id}")
print(f"Entry Point: {example.entry_point}")
print(f"Prompt:\n{example.prompt}")
print(f"Solution:\n{example.answer}")
parser = HumanEvalDatasetPlusParser()
parser.load()
parser.parse()
parsed_data = parser.get_parsed_data
if parsed_data:
example = parsed_data[0]
print("\nExample parsed entry:")
print(f"Task: {example.task_name}")
print(f"Question: {example.raw_question}")
print(f"Correct Answer: {example.answer}")