File size: 2,561 Bytes
fea07c2
5d76917
 
7758a19
5d76917
 
 
 
 
 
 
 
 
ffb4e87
 
5d76917
eef7dd3
5d76917
 
 
eef7dd3
5d76917
 
 
 
 
eef7dd3
5d76917
7758a19
5d76917
94a64b0
 
5d76917
 
fea07c2
5d76917
ef766f7
5d76917
eef7dd3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0b1aa61
 
 
 
 
 
 
 
 
5d76917
94a64b0
7758a19
 
5d76917
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
from typing import Dict, List, Any
from pydantic import BaseModel, Field, ConfigDict
import yaml
from litellm import validate_environment


class InputModel(BaseModel):
    """Input configuration for the surf spot finder test case"""

    model_config = ConfigDict(extra="forbid")
    location: str
    date: str
    max_driving_hours: int


class CheckpointCriteria(BaseModel):
    """Represents a checkpoint criteria with a description"""

    model_config = ConfigDict(extra="forbid")
    criteria: str
    points: int


class TestCase(BaseModel):
    model_config = ConfigDict(extra="forbid")
    input: InputModel
    ground_truth: List[Dict[str, Any]] = Field(default_factory=list)
    checkpoints: List[CheckpointCriteria] = Field(default_factory=list)
    llm_judge: str
    final_answer_criteria: List[CheckpointCriteria] = Field(default_factory=list)
    test_case_path: str
    output_path: str = "output/results.json"

    @classmethod
    def from_yaml(cls, test_case_path: str) -> "TestCase":
        """Load a test case from a YAML file and process it"""
        with open(test_case_path, "r") as f:
            test_case_dict = yaml.safe_load(f)
        final_answer_criteria = []

        def add_gt_final_answer_criteria(ground_truth_list):
            """Add checkpoints for each item in the ground_truth list"""
            for item in ground_truth_list:
                if isinstance(item, dict) and "name" in item and "value" in item:
                    points = item.get(
                        "points", 1
                    )  # Default to 1 if points not specified
                    final_answer_criteria.append(
                        {
                            "points": points,
                            "criteria": f"Check if {item['name']} is approximately '{item['value']}'.",
                        }
                    )

        if "ground_truth" in test_case_dict:
            add_gt_final_answer_criteria(test_case_dict["ground_truth"])
            test_case_dict["final_answer_criteria"] = final_answer_criteria
            # remove the points from the ground_truth list but keep the name and value
            test_case_dict["ground_truth"] = [
                item
                for item in test_case_dict["ground_truth"]
                if isinstance(item, dict)
            ]

        test_case_dict["test_case_path"] = test_case_path
        # verify that the llm_judge is a valid litellm model
        validate_environment(test_case_dict["llm_judge"])
        return cls.model_validate(test_case_dict)