Spaces:
Sleeping
Sleeping
add evaluation model record
Browse files
src/know_lang_bot/config.py
CHANGED
@@ -119,8 +119,7 @@ class LLMConfig(BaseSettings):
|
|
119 |
elif info.data['model_provider'] == ModelProvider.ANTHROPIC:
|
120 |
os.environ["ANTHROPIC_API_KEY"] = v
|
121 |
elif info.data['model_provider'] == ModelProvider.OPENAI:
|
122 |
-
|
123 |
-
openai.api_key = v
|
124 |
|
125 |
return v
|
126 |
|
|
|
119 |
elif info.data['model_provider'] == ModelProvider.ANTHROPIC:
|
120 |
os.environ["ANTHROPIC_API_KEY"] = v
|
121 |
elif info.data['model_provider'] == ModelProvider.OPENAI:
|
122 |
+
os.environ["OPENAI_API_KEY"] = v
|
|
|
123 |
|
124 |
return v
|
125 |
|
src/know_lang_bot/evaluation/chatbot_evaluation.py
CHANGED
@@ -22,6 +22,7 @@ class EvalCase(BaseModel):
|
|
22 |
|
23 |
class EvalResult(BaseModel):
|
24 |
"""Evaluation result with scores and feedback"""
|
|
|
25 |
case: EvalCase
|
26 |
metrics: Dict[EvalMetric, float]
|
27 |
total_score: float
|
@@ -117,6 +118,7 @@ Format your response as JSON:
|
|
117 |
metrics=metrics,
|
118 |
total_score=total_score,
|
119 |
feedback=eval_response.feedback,
|
|
|
120 |
)
|
121 |
|
122 |
async def evaluate_batch(
|
|
|
22 |
|
23 |
class EvalResult(BaseModel):
|
24 |
"""Evaluation result with scores and feedback"""
|
25 |
+
evaluator_model: str
|
26 |
case: EvalCase
|
27 |
metrics: Dict[EvalMetric, float]
|
28 |
total_score: float
|
|
|
118 |
metrics=metrics,
|
119 |
total_score=total_score,
|
120 |
feedback=eval_response.feedback,
|
121 |
+
evaluator_model=f"{self.config.evaluator.model_provider}:{self.config.evaluator.model_name}"
|
122 |
)
|
123 |
|
124 |
async def evaluate_batch(
|