Spaces:

mozilla-ai
/

surf-spot-finder

Running

Nathan Brake daavoo commited on Mar 26

Commit

6646f61

unverified ·

1 Parent(s): 1cb4327

Messaging capabilities come to tools (#37)

* Messaging capabilities come to tools

* Remove print statement from QuestionAnsweringSquadEvaluator to clean up output

* docs use ssh token

* Update config

---------

Co-authored-by: daavoo <[email protected]>

Files changed (7) hide show

examples/langchain_single_agent_user_confirmation.yaml +21 -0
examples/openai_single_agent_user_confirmation.yaml +20 -0
examples/smolagents_single_agent_user_confirmation.yaml +22 -0
src/surf_spot_finder/config.py +3 -1
src/surf_spot_finder/evaluation/evaluate.py +1 -0
src/surf_spot_finder/evaluation/evaluators/QuestionAnsweringSquadEvaluator.py +0 -1
src/surf_spot_finder/evaluation/test_cases/alpha.yaml +3 -3

examples/langchain_single_agent_user_confirmation.yaml ADDED Viewed

	@@ -0,0 +1,21 @@

+input:
+  location: Pontevedra
+  date: 2025-03-27 12:00
+  max_driving_hours: 2
+  input_prompt_template: |
+    According to the forecast, what will be the best spot to surf around {LOCATION},
+    in a {MAX_DRIVING_HOURS} hour driving radius, at {DATE}?
+    Find a few options and then discuss it with David de la Iglesia Castro. You should recommend him some choices,
+    and then confirm the final selection with him.
+agent:
+  model_id: openai/gpt-4o
+  # model_id: ollama/llama3.1:latest
+  api_key_var: OPENAI_API_KEY
+  agent_type: langchain
+  tools:
+  - "surf_spot_finder.tools.driving_hours_to_meters"
+  - "surf_spot_finder.tools.get_area_lat_lon"
+  - "surf_spot_finder.tools.get_surfing_spots"
+  - "surf_spot_finder.tools.get_wave_forecast"
+  - "surf_spot_finder.tools.get_wind_forecast"
+  - "any_agent.tools.send_slack_message"

examples/openai_single_agent_user_confirmation.yaml ADDED Viewed

	@@ -0,0 +1,20 @@

+input:
+  location: Pontevedra
+  date: 2025-03-27 12:00
+  max_driving_hours: 2
+  input_prompt_template: |
+    According to the forecast, what will be the best spot to surf around {LOCATION},
+    in a {MAX_DRIVING_HOURS} hour driving radius, at {DATE}?
+    Find a few options and then discuss it with David de la Iglesia Castro. You should recommend him some choices,
+    and then confirm the final selection with him.
+agent:
+  model_id: gpt-4o
+  api_key_var: OPENAI_API_KEY
+  agent_type: openai
+  tools:
+  - "surf_spot_finder.tools.driving_hours_to_meters"
+  - "surf_spot_finder.tools.get_area_lat_lon"
+  - "surf_spot_finder.tools.get_surfing_spots"
+  - "surf_spot_finder.tools.get_wave_forecast"
+  - "surf_spot_finder.tools.get_wind_forecast"
+  - "any_agent.tools.send_slack_message"

examples/smolagents_single_agent_user_confirmation.yaml ADDED Viewed

	@@ -0,0 +1,22 @@

+input:
+  location: Pontevedra
+  date: 2025-03-27 12:00
+  max_driving_hours: 2
+  input_prompt_template: |
+    According to the forecast, what will be the best spot to surf around {LOCATION},
+    in a {MAX_DRIVING_HOURS} hour driving radius, at {DATE}?
+    Find a few options and then discuss it with David de la Iglesia Castro. You should recommend him some choices,
+    and then confirm the final selection with him.
+agent:
+  model_id: openai/gpt-4o
+  # model_id: ollama/llama3.1:latest
+  api_key_var: OPENAI_API_KEY
+  agent_type: smolagents
+  tools:
+  - "surf_spot_finder.tools.driving_hours_to_meters"
+  - "surf_spot_finder.tools.get_area_lat_lon"
+  - "surf_spot_finder.tools.get_surfing_spots"
+  - "surf_spot_finder.tools.get_wave_forecast"
+  - "surf_spot_finder.tools.get_wind_forecast"
+  - "any_agent.tools.send_slack_message"
+  - "smolagents.FinalAnswerTool"

src/surf_spot_finder/config.py CHANGED Viewed

@@ -1,7 +1,7 @@
 from typing import Annotated
 from any_agent.schema import AgentSchema
-from pydantic import AfterValidator, BaseModel, FutureDatetime, PositiveInt
 INPUT_PROMPT_TEMPLATE = """
@@ -19,6 +19,8 @@ def validate_prompt(value) -> str:
 class Config(BaseModel):
     location: str
     max_driving_hours: PositiveInt
     date: FutureDatetime

 from typing import Annotated
 from any_agent.schema import AgentSchema
+from pydantic import AfterValidator, BaseModel, ConfigDict, FutureDatetime, PositiveInt
 INPUT_PROMPT_TEMPLATE = """
 class Config(BaseModel):
+    model_config = ConfigDict(extra="forbid")
     location: str
     max_driving_hours: PositiveInt
     date: FutureDatetime

src/surf_spot_finder/evaluation/evaluate.py CHANGED Viewed

@@ -42,6 +42,7 @@ def run_agent(test_case: TestCase, agent_config_path: str) -> str:
         json_tracer=config.json_tracer,
         api_base=config.api_base,
         tools=config.tools,
     )

         json_tracer=config.json_tracer,
         api_base=config.api_base,
         tools=config.tools,
+        input_prompt_template=config.input_prompt_template,
     )

src/surf_spot_finder/evaluation/evaluators/QuestionAnsweringSquadEvaluator.py CHANGED Viewed

@@ -30,7 +30,6 @@ class QuestionAnsweringSquadEvaluator:
         result = self.metric.compute(
             predictions=hypothesis_answer, references=ground_truth_answer
         )
-        print(result)
         match = EvaluationResult(
             passed=True if int(result["exact_match"]) == 1 else False,

         result = self.metric.compute(
             predictions=hypothesis_answer, references=ground_truth_answer
         )
         match = EvaluationResult(
             passed=True if int(result["exact_match"]) == 1 else False,

src/surf_spot_finder/evaluation/test_cases/alpha.yaml CHANGED Viewed

@@ -18,8 +18,6 @@ ground_truth:
 # These evaluators for these checkpoints
 # will not consider the hypothesis answer or final answer in their decision making
 checkpoints:
-  - points: 1
-    criteria: "Check if the agent did a web search for nearby surf locations."
   - points: 1
     criteria: "Check if the agent used the get_surfing_spots tool and it succeeded"
   - points: 1
@@ -28,8 +26,10 @@ checkpoints:
     criteria: "Check if the agent used the get_wind_forecast tool and it succeeded"
   - points: 1
     criteria: "Check if the agent used the get_area_lat_lon tool and it succeeded"
   - points: 1
     criteria: "Check if the final answer contains any description about the weather at the chosen location"
-llm_judge: "ollama/gemma3:4b-it-fp16"

 # These evaluators for these checkpoints
 # will not consider the hypothesis answer or final answer in their decision making
 checkpoints:
   - points: 1
     criteria: "Check if the agent used the get_surfing_spots tool and it succeeded"
   - points: 1
     criteria: "Check if the agent used the get_wind_forecast tool and it succeeded"
   - points: 1
     criteria: "Check if the agent used the get_area_lat_lon tool and it succeeded"
+  - points: 1
+    criteria: "Check if the agent confirmed the selection with David de la Iglesia Castro"
   - points: 1
     criteria: "Check if the final answer contains any description about the weather at the chosen location"
+llm_judge: "openai/gpt-4o"