Nathan Brake daavoo commited on
Commit
6646f61
·
unverified ·
1 Parent(s): 1cb4327

Messaging capabilities come to tools (#37)

Browse files

* Messaging capabilities come to tools

* Remove print statement from QuestionAnsweringSquadEvaluator to clean up output

* docs use ssh token

* Update config

---------

Co-authored-by: daavoo <[email protected]>

examples/langchain_single_agent_user_confirmation.yaml ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ input:
2
+ location: Pontevedra
3
+ date: 2025-03-27 12:00
4
+ max_driving_hours: 2
5
+ input_prompt_template: |
6
+ According to the forecast, what will be the best spot to surf around {LOCATION},
7
+ in a {MAX_DRIVING_HOURS} hour driving radius, at {DATE}?
8
+ Find a few options and then discuss it with David de la Iglesia Castro. You should recommend him some choices,
9
+ and then confirm the final selection with him.
10
+ agent:
11
+ model_id: openai/gpt-4o
12
+ # model_id: ollama/llama3.1:latest
13
+ api_key_var: OPENAI_API_KEY
14
+ agent_type: langchain
15
+ tools:
16
+ - "surf_spot_finder.tools.driving_hours_to_meters"
17
+ - "surf_spot_finder.tools.get_area_lat_lon"
18
+ - "surf_spot_finder.tools.get_surfing_spots"
19
+ - "surf_spot_finder.tools.get_wave_forecast"
20
+ - "surf_spot_finder.tools.get_wind_forecast"
21
+ - "any_agent.tools.send_slack_message"
examples/openai_single_agent_user_confirmation.yaml ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ input:
2
+ location: Pontevedra
3
+ date: 2025-03-27 12:00
4
+ max_driving_hours: 2
5
+ input_prompt_template: |
6
+ According to the forecast, what will be the best spot to surf around {LOCATION},
7
+ in a {MAX_DRIVING_HOURS} hour driving radius, at {DATE}?
8
+ Find a few options and then discuss it with David de la Iglesia Castro. You should recommend him some choices,
9
+ and then confirm the final selection with him.
10
+ agent:
11
+ model_id: gpt-4o
12
+ api_key_var: OPENAI_API_KEY
13
+ agent_type: openai
14
+ tools:
15
+ - "surf_spot_finder.tools.driving_hours_to_meters"
16
+ - "surf_spot_finder.tools.get_area_lat_lon"
17
+ - "surf_spot_finder.tools.get_surfing_spots"
18
+ - "surf_spot_finder.tools.get_wave_forecast"
19
+ - "surf_spot_finder.tools.get_wind_forecast"
20
+ - "any_agent.tools.send_slack_message"
examples/smolagents_single_agent_user_confirmation.yaml ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ input:
2
+ location: Pontevedra
3
+ date: 2025-03-27 12:00
4
+ max_driving_hours: 2
5
+ input_prompt_template: |
6
+ According to the forecast, what will be the best spot to surf around {LOCATION},
7
+ in a {MAX_DRIVING_HOURS} hour driving radius, at {DATE}?
8
+ Find a few options and then discuss it with David de la Iglesia Castro. You should recommend him some choices,
9
+ and then confirm the final selection with him.
10
+ agent:
11
+ model_id: openai/gpt-4o
12
+ # model_id: ollama/llama3.1:latest
13
+ api_key_var: OPENAI_API_KEY
14
+ agent_type: smolagents
15
+ tools:
16
+ - "surf_spot_finder.tools.driving_hours_to_meters"
17
+ - "surf_spot_finder.tools.get_area_lat_lon"
18
+ - "surf_spot_finder.tools.get_surfing_spots"
19
+ - "surf_spot_finder.tools.get_wave_forecast"
20
+ - "surf_spot_finder.tools.get_wind_forecast"
21
+ - "any_agent.tools.send_slack_message"
22
+ - "smolagents.FinalAnswerTool"
src/surf_spot_finder/config.py CHANGED
@@ -1,7 +1,7 @@
1
  from typing import Annotated
2
 
3
  from any_agent.schema import AgentSchema
4
- from pydantic import AfterValidator, BaseModel, FutureDatetime, PositiveInt
5
 
6
 
7
  INPUT_PROMPT_TEMPLATE = """
@@ -19,6 +19,8 @@ def validate_prompt(value) -> str:
19
 
20
 
21
  class Config(BaseModel):
 
 
22
  location: str
23
  max_driving_hours: PositiveInt
24
  date: FutureDatetime
 
1
  from typing import Annotated
2
 
3
  from any_agent.schema import AgentSchema
4
+ from pydantic import AfterValidator, BaseModel, ConfigDict, FutureDatetime, PositiveInt
5
 
6
 
7
  INPUT_PROMPT_TEMPLATE = """
 
19
 
20
 
21
  class Config(BaseModel):
22
+ model_config = ConfigDict(extra="forbid")
23
+
24
  location: str
25
  max_driving_hours: PositiveInt
26
  date: FutureDatetime
src/surf_spot_finder/evaluation/evaluate.py CHANGED
@@ -42,6 +42,7 @@ def run_agent(test_case: TestCase, agent_config_path: str) -> str:
42
  json_tracer=config.json_tracer,
43
  api_base=config.api_base,
44
  tools=config.tools,
 
45
  )
46
 
47
 
 
42
  json_tracer=config.json_tracer,
43
  api_base=config.api_base,
44
  tools=config.tools,
45
+ input_prompt_template=config.input_prompt_template,
46
  )
47
 
48
 
src/surf_spot_finder/evaluation/evaluators/QuestionAnsweringSquadEvaluator.py CHANGED
@@ -30,7 +30,6 @@ class QuestionAnsweringSquadEvaluator:
30
  result = self.metric.compute(
31
  predictions=hypothesis_answer, references=ground_truth_answer
32
  )
33
- print(result)
34
 
35
  match = EvaluationResult(
36
  passed=True if int(result["exact_match"]) == 1 else False,
 
30
  result = self.metric.compute(
31
  predictions=hypothesis_answer, references=ground_truth_answer
32
  )
 
33
 
34
  match = EvaluationResult(
35
  passed=True if int(result["exact_match"]) == 1 else False,
src/surf_spot_finder/evaluation/test_cases/alpha.yaml CHANGED
@@ -18,8 +18,6 @@ ground_truth:
18
  # These evaluators for these checkpoints
19
  # will not consider the hypothesis answer or final answer in their decision making
20
  checkpoints:
21
- - points: 1
22
- criteria: "Check if the agent did a web search for nearby surf locations."
23
  - points: 1
24
  criteria: "Check if the agent used the get_surfing_spots tool and it succeeded"
25
  - points: 1
@@ -28,8 +26,10 @@ checkpoints:
28
  criteria: "Check if the agent used the get_wind_forecast tool and it succeeded"
29
  - points: 1
30
  criteria: "Check if the agent used the get_area_lat_lon tool and it succeeded"
 
 
31
  - points: 1
32
  criteria: "Check if the final answer contains any description about the weather at the chosen location"
33
 
34
 
35
- llm_judge: "ollama/gemma3:4b-it-fp16"
 
18
  # These evaluators for these checkpoints
19
  # will not consider the hypothesis answer or final answer in their decision making
20
  checkpoints:
 
 
21
  - points: 1
22
  criteria: "Check if the agent used the get_surfing_spots tool and it succeeded"
23
  - points: 1
 
26
  criteria: "Check if the agent used the get_wind_forecast tool and it succeeded"
27
  - points: 1
28
  criteria: "Check if the agent used the get_area_lat_lon tool and it succeeded"
29
+ - points: 1
30
+ criteria: "Check if the agent confirmed the selection with David de la Iglesia Castro"
31
  - points: 1
32
  criteria: "Check if the final answer contains any description about the weather at the chosen location"
33
 
34
 
35
+ llm_judge: "openai/gpt-4o"