File size: 1,334 Bytes
7758a19
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
from typing import Dict, List, Any

from surf_spot_finder.evaluation.evaluators.LLMEvaluator import LLMEvaluator
from surf_spot_finder.evaluation.evaluators.schemas import EvaluationResult
from surf_spot_finder.evaluation.telemetry import TelemetryProcessor
from surf_spot_finder.evaluation.test_case import CheckpointCriteria


class CheckpointEvaluator(LLMEvaluator):
    """Evaluates checkpoints against telemetry"""

    def evaluate(
        self,
        telemetry: List[Dict[str, Any]],
        checkpoints: List[CheckpointCriteria],
        processor: TelemetryProcessor,
    ) -> List[EvaluationResult]:
        """
        Verify each checkpoint against the telemetry data using LLM

        Args:
            telemetry: The telemetry data to evaluate
            checkpoints: List of checkpoint criteria to verify
            processor: Telemetry processor to extract evidence

        Returns:
            List of evaluation results
        """
        evidence = processor.extract_evidence(telemetry)
        results = []

        for checkpoint in checkpoints:
            evaluation = self.llm_evaluate_with_criterion(
                criteria=checkpoint.criteria,
                points=checkpoint.points,
                evidence=evidence,
            )
            results.append(evaluation)

        return results