surf-spot-finder / examples /single_agent_with_tools.yaml
Nathan Brake
Optionally Evaluate Cases after generating trace (#57)
515cbf5 unverified
raw
history blame
1.48 kB
main_agent:
model_id: # optional, will prompt for it if not provided
tools:
- "surf_spot_finder.tools.driving_hours_to_meters"
- "surf_spot_finder.tools.get_area_lat_lon"
- "surf_spot_finder.tools.get_surfing_spots"
- "surf_spot_finder.tools.get_wave_forecast"
- "surf_spot_finder.tools.get_wind_forecast"
- "any_agent.tools.search_web"
- "any_agent.tools.visit_webpage"
evaluation_cases:
- llm_judge: openai/gpt-4.1-mini
checkpoints:
- criteria: "Check if the agent used the get_surfing_spots tool and it succeeded, and that the tool was used before the get_wave_forecast and get_wind_forecast tools"
points: 1
- criteria: "Check if the agent used the get_wave_forecast tool and it succeeded"
points: 1
- criteria: "Check if the agent used the get_wind_forecast tool and it succeeded"
points: 1
- criteria: "Check if the agent used the get_area_lat_lon tool and it succeeded"
points: 1
- criteria: "Check if the agent used the driving_hours_to_meters tool to convert the driving hours to meters and it succeeded"
points: 1
- criteria: "Check if the final answer contains any description about the weather at the chosen location"
points: 1
- criteria: "Check if the final answer contains one of the surf spots found by a call of the get_surfing_spots tool"
points: 1
- criteria: "Check that the agent completed in fewer than 10 steps"
points: 1