File size: 3,108 Bytes
289c905
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
import pytest

from llmdataparser.ifeval_parser import IFEvalDatasetParser, IFEvalParseEntry


@pytest.fixture
def sample_ifeval_entries():
    """Create sample IFEval dataset entries for testing."""
    return [
        {
            "key": 1,
            "prompt": "Write a function to calculate factorial.",
            "instruction_id_list": ["math_001", "programming_001"],
            "kwargs": {"difficulty": "medium", "category": "mathematics"},
        },
        {
            "key": 2,
            "prompt": "Explain quantum computing.",
            "instruction_id_list": ["physics_001"],
            "kwargs": {"difficulty": "hard", "category": "physics"},
        },
    ]


@pytest.fixture
def ifeval_parser():
    """Create an IFEval parser instance."""
    return IFEvalDatasetParser()


def test_ifeval_parse_entry_creation_valid():
    """Test valid creation of IFEvalParseEntry."""
    entry = IFEvalParseEntry.create(
        prompt="Test system prompt\n\nTest instruction",
        answer="",  # IFEval doesn't have answers
        raw_question="Test instruction",
        raw_answer="",
        key=1,
        instruction_id_list=["test_001", "test_002"],
        kwargs={"difficulty": "easy"},
        task_name="default",
    )

    assert isinstance(entry, IFEvalParseEntry)
    assert entry.prompt == "Test system prompt\n\nTest instruction"
    assert entry.answer == ""
    assert entry.key == 1
    assert entry.instruction_id_list == ["test_001", "test_002"]
    assert entry.kwargs == {"difficulty": "easy"}
    assert entry.task_name == "default"


def test_process_entry_ifeval(ifeval_parser, sample_ifeval_entries):
    """Test processing entries in IFEval parser."""
    entry = ifeval_parser.process_entry(sample_ifeval_entries[0])

    assert isinstance(entry, IFEvalParseEntry)
    assert entry.key == 1
    assert entry.instruction_id_list == ["math_001", "programming_001"]
    assert entry.kwargs == {"difficulty": "medium", "category": "mathematics"}
    assert entry.raw_question == "Write a function to calculate factorial."
    assert entry.answer == ""  # IFEval doesn't have answers
    assert entry.task_name == "default"


def test_parser_initialization(ifeval_parser):
    """Test initialization of IFEval parser."""
    assert ifeval_parser._data_source == "google/IFEval"
    assert ifeval_parser._default_task == "default"
    assert ifeval_parser.task_names == ["default"]
    assert (
        ifeval_parser.get_huggingface_link
        == "https://huggingface.co/datasets/google/IFEval"
    )


@pytest.mark.integration
def test_load_dataset(ifeval_parser):
    """Test loading the IFEval dataset."""
    ifeval_parser.load(split="train")
    assert ifeval_parser.raw_data is not None
    assert ifeval_parser.split_names == ["train"]
    assert ifeval_parser._current_task == "default"


def test_parser_string_representation(ifeval_parser):
    """Test string representation of IFEval parser."""
    repr_str = str(ifeval_parser)
    assert "IFEvalDatasetParser" in repr_str
    assert "google/IFEval" in repr_str
    assert "not loaded" in repr_str