Spaces:

mozilla-ai
/

surf-spot-finder

Running

Nathan Brake Copilot commited on Mar 28

Commit

9ba790f

unverified ·

1 Parent(s): 4dc38cc

Test Case can set input template (#50)

* use input prompt template

* Remove unnecessary agent deletion from run function

* Update src/surf_spot_finder/evaluation/test_cases/alpha.yaml

Co-authored-by: Copilot <[email protected]>

---------

Co-authored-by: Copilot <[email protected]>

Files changed (4) hide show

pyproject.toml +1 -1
src/surf_spot_finder/evaluation/{evaluate.py → main.py} +5 -0
src/surf_spot_finder/evaluation/test_case.py +1 -0
src/surf_spot_finder/evaluation/test_cases/alpha.yaml +18 -8

pyproject.toml CHANGED Viewed

@@ -55,4 +55,4 @@ namespaces = false
 [project.scripts]
 surf-spot-finder = "surf_spot_finder.cli:main"
 surf-spot-finder-no-framework = "surf_spot_finder.no_framework:main"
-surf-spot-finder-evaluate = "surf_spot_finder.evaluation.evaluate:main"

 [project.scripts]
 surf-spot-finder = "surf_spot_finder.cli:main"
 surf-spot-finder-no-framework = "surf_spot_finder.no_framework:main"
+surf-spot-finder-evaluate = "surf_spot_finder.evaluation.main:main"

src/surf_spot_finder/evaluation/{evaluate.py → main.py} RENAMED Viewed

@@ -28,9 +28,14 @@ def run(test_case: TestCase, agent_config_path: str) -> str:
     logger.info("Loading config")
     config = Config.from_yaml(agent_config_path)
     config.location = input_data.location
     config.date = input_data.date
     config.max_driving_hours = input_data.max_driving_hours
     logger.info("Setting up tracing")
     tracer_provider, tracing_path = get_tracer_provider(
         project_name="surf-spot-finder", agent_framework=config.framework

     logger.info("Loading config")
     config = Config.from_yaml(agent_config_path)
+    # pretty print
+    logger.info(
+        f"Overriding config with test case input:\n{json.dumps(input_data.model_dump(), indent=2)}"
+    )
     config.location = input_data.location
     config.date = input_data.date
     config.max_driving_hours = input_data.max_driving_hours
+    config.input_prompt_template = input_data.input_prompt_template
     logger.info("Setting up tracing")
     tracer_provider, tracing_path = get_tracer_provider(
         project_name="surf-spot-finder", agent_framework=config.framework

src/surf_spot_finder/evaluation/test_case.py CHANGED Viewed

@@ -11,6 +11,7 @@ class InputModel(BaseModel):
     location: str
     date: str
     max_driving_hours: int
 class CheckpointCriteria(BaseModel):

     location: str
     date: str
     max_driving_hours: int
+    input_prompt_template: str
 class CheckpointCriteria(BaseModel):

src/surf_spot_finder/evaluation/test_cases/alpha.yaml CHANGED Viewed

@@ -3,15 +3,21 @@
 # You only need this input data if you want to run the test case, if you pass in a path to a telemetry file this
 # is ignored
 input:
-  location: "Vigo"
-  date: "2025-03-27 22:00"
-  max_driving_hours: 3
-# ground_truth:
-#   - name: "Surf location"
-#     points: 5
-#     value: "Playa de Samil"
 # Base checkpoints for agent behavior
 # These evaluators for these checkpoints
@@ -25,10 +31,14 @@ checkpoints:
     criteria: "Check if the agent used the get_wind_forecast tool and it succeeded"
   - points: 1
     criteria: "Check if the agent used the get_area_lat_lon tool and it succeeded"
   - points: 1
     criteria: "Check if the agent confirmed the selection with David de la Iglesia Castro"
   - points: 1
     criteria: "Check if the final answer contains any description about the weather at the chosen location"
 llm_judge: "openai/gpt-4o"

 # You only need this input data if you want to run the test case, if you pass in a path to a telemetry file this
 # is ignored
 input:
+  location: "Huntington Beach, California"
+  date: "2025-03-29 22:00"
+  max_driving_hours: 1
+  input_prompt_template: |
+    Please help find the best place to surf around {LOCATION},
+    in a {MAX_DRIVING_HOURS} hour driving radius, at {DATE}?
+    Find a few options and then message David de la Iglesia Castro to discuss it with him.
+    Make sure he agrees with the choice. Your final answer should be a detailed description of the surf location, wind, wave, and weather conditions.
+    In addition to making it your final answer, also include description of the weather at the chosen location by writing to the file /projects/final_answer.txt
+# Optionally I can check whether it picked the right response
+ground_truth:
+  - name: "Surf location"
+    points: 5
+    value: "Bolsa Chica State Beach"
 # Base checkpoints for agent behavior
 # These evaluators for these checkpoints
     criteria: "Check if the agent used the get_wind_forecast tool and it succeeded"
   - points: 1
     criteria: "Check if the agent used the get_area_lat_lon tool and it succeeded"
+  - points: 1
+    criteria: "Check if the agent used the driving_hours_to_meters tool to convert the driving hours to meters and it succeeded"
   - points: 1
     criteria: "Check if the agent confirmed the selection with David de la Iglesia Castro"
   - points: 1
     criteria: "Check if the final answer contains any description about the weather at the chosen location"
+  - points: 1
+    criteria: "Check if the final answer contains one of the surf spots found by a call of the get_surfing_spots tool"
 llm_judge: "openai/gpt-4o"