Spaces:
Running
Running
Nathan Brake
daavoo
commited on
Messaging capabilities come to tools (#37)
Browse files* Messaging capabilities come to tools
* Remove print statement from QuestionAnsweringSquadEvaluator to clean up output
* docs use ssh token
* Update config
---------
Co-authored-by: daavoo <[email protected]>
- examples/langchain_single_agent_user_confirmation.yaml +21 -0
- examples/openai_single_agent_user_confirmation.yaml +20 -0
- examples/smolagents_single_agent_user_confirmation.yaml +22 -0
- src/surf_spot_finder/config.py +3 -1
- src/surf_spot_finder/evaluation/evaluate.py +1 -0
- src/surf_spot_finder/evaluation/evaluators/QuestionAnsweringSquadEvaluator.py +0 -1
- src/surf_spot_finder/evaluation/test_cases/alpha.yaml +3 -3
examples/langchain_single_agent_user_confirmation.yaml
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
input:
|
2 |
+
location: Pontevedra
|
3 |
+
date: 2025-03-27 12:00
|
4 |
+
max_driving_hours: 2
|
5 |
+
input_prompt_template: |
|
6 |
+
According to the forecast, what will be the best spot to surf around {LOCATION},
|
7 |
+
in a {MAX_DRIVING_HOURS} hour driving radius, at {DATE}?
|
8 |
+
Find a few options and then discuss it with David de la Iglesia Castro. You should recommend him some choices,
|
9 |
+
and then confirm the final selection with him.
|
10 |
+
agent:
|
11 |
+
model_id: openai/gpt-4o
|
12 |
+
# model_id: ollama/llama3.1:latest
|
13 |
+
api_key_var: OPENAI_API_KEY
|
14 |
+
agent_type: langchain
|
15 |
+
tools:
|
16 |
+
- "surf_spot_finder.tools.driving_hours_to_meters"
|
17 |
+
- "surf_spot_finder.tools.get_area_lat_lon"
|
18 |
+
- "surf_spot_finder.tools.get_surfing_spots"
|
19 |
+
- "surf_spot_finder.tools.get_wave_forecast"
|
20 |
+
- "surf_spot_finder.tools.get_wind_forecast"
|
21 |
+
- "any_agent.tools.send_slack_message"
|
examples/openai_single_agent_user_confirmation.yaml
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
input:
|
2 |
+
location: Pontevedra
|
3 |
+
date: 2025-03-27 12:00
|
4 |
+
max_driving_hours: 2
|
5 |
+
input_prompt_template: |
|
6 |
+
According to the forecast, what will be the best spot to surf around {LOCATION},
|
7 |
+
in a {MAX_DRIVING_HOURS} hour driving radius, at {DATE}?
|
8 |
+
Find a few options and then discuss it with David de la Iglesia Castro. You should recommend him some choices,
|
9 |
+
and then confirm the final selection with him.
|
10 |
+
agent:
|
11 |
+
model_id: gpt-4o
|
12 |
+
api_key_var: OPENAI_API_KEY
|
13 |
+
agent_type: openai
|
14 |
+
tools:
|
15 |
+
- "surf_spot_finder.tools.driving_hours_to_meters"
|
16 |
+
- "surf_spot_finder.tools.get_area_lat_lon"
|
17 |
+
- "surf_spot_finder.tools.get_surfing_spots"
|
18 |
+
- "surf_spot_finder.tools.get_wave_forecast"
|
19 |
+
- "surf_spot_finder.tools.get_wind_forecast"
|
20 |
+
- "any_agent.tools.send_slack_message"
|
examples/smolagents_single_agent_user_confirmation.yaml
ADDED
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
input:
|
2 |
+
location: Pontevedra
|
3 |
+
date: 2025-03-27 12:00
|
4 |
+
max_driving_hours: 2
|
5 |
+
input_prompt_template: |
|
6 |
+
According to the forecast, what will be the best spot to surf around {LOCATION},
|
7 |
+
in a {MAX_DRIVING_HOURS} hour driving radius, at {DATE}?
|
8 |
+
Find a few options and then discuss it with David de la Iglesia Castro. You should recommend him some choices,
|
9 |
+
and then confirm the final selection with him.
|
10 |
+
agent:
|
11 |
+
model_id: openai/gpt-4o
|
12 |
+
# model_id: ollama/llama3.1:latest
|
13 |
+
api_key_var: OPENAI_API_KEY
|
14 |
+
agent_type: smolagents
|
15 |
+
tools:
|
16 |
+
- "surf_spot_finder.tools.driving_hours_to_meters"
|
17 |
+
- "surf_spot_finder.tools.get_area_lat_lon"
|
18 |
+
- "surf_spot_finder.tools.get_surfing_spots"
|
19 |
+
- "surf_spot_finder.tools.get_wave_forecast"
|
20 |
+
- "surf_spot_finder.tools.get_wind_forecast"
|
21 |
+
- "any_agent.tools.send_slack_message"
|
22 |
+
- "smolagents.FinalAnswerTool"
|
src/surf_spot_finder/config.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
from typing import Annotated
|
2 |
|
3 |
from any_agent.schema import AgentSchema
|
4 |
-
from pydantic import AfterValidator, BaseModel, FutureDatetime, PositiveInt
|
5 |
|
6 |
|
7 |
INPUT_PROMPT_TEMPLATE = """
|
@@ -19,6 +19,8 @@ def validate_prompt(value) -> str:
|
|
19 |
|
20 |
|
21 |
class Config(BaseModel):
|
|
|
|
|
22 |
location: str
|
23 |
max_driving_hours: PositiveInt
|
24 |
date: FutureDatetime
|
|
|
1 |
from typing import Annotated
|
2 |
|
3 |
from any_agent.schema import AgentSchema
|
4 |
+
from pydantic import AfterValidator, BaseModel, ConfigDict, FutureDatetime, PositiveInt
|
5 |
|
6 |
|
7 |
INPUT_PROMPT_TEMPLATE = """
|
|
|
19 |
|
20 |
|
21 |
class Config(BaseModel):
|
22 |
+
model_config = ConfigDict(extra="forbid")
|
23 |
+
|
24 |
location: str
|
25 |
max_driving_hours: PositiveInt
|
26 |
date: FutureDatetime
|
src/surf_spot_finder/evaluation/evaluate.py
CHANGED
@@ -42,6 +42,7 @@ def run_agent(test_case: TestCase, agent_config_path: str) -> str:
|
|
42 |
json_tracer=config.json_tracer,
|
43 |
api_base=config.api_base,
|
44 |
tools=config.tools,
|
|
|
45 |
)
|
46 |
|
47 |
|
|
|
42 |
json_tracer=config.json_tracer,
|
43 |
api_base=config.api_base,
|
44 |
tools=config.tools,
|
45 |
+
input_prompt_template=config.input_prompt_template,
|
46 |
)
|
47 |
|
48 |
|
src/surf_spot_finder/evaluation/evaluators/QuestionAnsweringSquadEvaluator.py
CHANGED
@@ -30,7 +30,6 @@ class QuestionAnsweringSquadEvaluator:
|
|
30 |
result = self.metric.compute(
|
31 |
predictions=hypothesis_answer, references=ground_truth_answer
|
32 |
)
|
33 |
-
print(result)
|
34 |
|
35 |
match = EvaluationResult(
|
36 |
passed=True if int(result["exact_match"]) == 1 else False,
|
|
|
30 |
result = self.metric.compute(
|
31 |
predictions=hypothesis_answer, references=ground_truth_answer
|
32 |
)
|
|
|
33 |
|
34 |
match = EvaluationResult(
|
35 |
passed=True if int(result["exact_match"]) == 1 else False,
|
src/surf_spot_finder/evaluation/test_cases/alpha.yaml
CHANGED
@@ -18,8 +18,6 @@ ground_truth:
|
|
18 |
# These evaluators for these checkpoints
|
19 |
# will not consider the hypothesis answer or final answer in their decision making
|
20 |
checkpoints:
|
21 |
-
- points: 1
|
22 |
-
criteria: "Check if the agent did a web search for nearby surf locations."
|
23 |
- points: 1
|
24 |
criteria: "Check if the agent used the get_surfing_spots tool and it succeeded"
|
25 |
- points: 1
|
@@ -28,8 +26,10 @@ checkpoints:
|
|
28 |
criteria: "Check if the agent used the get_wind_forecast tool and it succeeded"
|
29 |
- points: 1
|
30 |
criteria: "Check if the agent used the get_area_lat_lon tool and it succeeded"
|
|
|
|
|
31 |
- points: 1
|
32 |
criteria: "Check if the final answer contains any description about the weather at the chosen location"
|
33 |
|
34 |
|
35 |
-
llm_judge: "
|
|
|
18 |
# These evaluators for these checkpoints
|
19 |
# will not consider the hypothesis answer or final answer in their decision making
|
20 |
checkpoints:
|
|
|
|
|
21 |
- points: 1
|
22 |
criteria: "Check if the agent used the get_surfing_spots tool and it succeeded"
|
23 |
- points: 1
|
|
|
26 |
criteria: "Check if the agent used the get_wind_forecast tool and it succeeded"
|
27 |
- points: 1
|
28 |
criteria: "Check if the agent used the get_area_lat_lon tool and it succeeded"
|
29 |
+
- points: 1
|
30 |
+
criteria: "Check if the agent confirmed the selection with David de la Iglesia Castro"
|
31 |
- points: 1
|
32 |
criteria: "Check if the final answer contains any description about the weather at the chosen location"
|
33 |
|
34 |
|
35 |
+
llm_judge: "openai/gpt-4o"
|