Nathan Brake
Add SquadQA metric, split out the files and support for ollama LLM-as-judge (#35)
7758a19 unverified
raw
history blame
252 Bytes
from pydantic import BaseModel, ConfigDict
class EvaluationResult(BaseModel):
"""Represents the result of evaluating a criterion"""
model_config = ConfigDict(extra="forbid")
passed: bool
reason: str
criteria: str
points: int