Spaces:
Running
Running
File size: 1,483 Bytes
1cb4327 ba24588 ffb4e87 cdd4ebc 515cbf5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 |
main_agent:
model_id: # optional, will prompt for it if not provided
tools:
- "surf_spot_finder.tools.driving_hours_to_meters"
- "surf_spot_finder.tools.get_area_lat_lon"
- "surf_spot_finder.tools.get_surfing_spots"
- "surf_spot_finder.tools.get_wave_forecast"
- "surf_spot_finder.tools.get_wind_forecast"
- "any_agent.tools.search_web"
- "any_agent.tools.visit_webpage"
evaluation_cases:
- llm_judge: openai/gpt-4.1-mini
checkpoints:
- criteria: "Check if the agent used the get_surfing_spots tool and it succeeded, and that the tool was used before the get_wave_forecast and get_wind_forecast tools"
points: 1
- criteria: "Check if the agent used the get_wave_forecast tool and it succeeded"
points: 1
- criteria: "Check if the agent used the get_wind_forecast tool and it succeeded"
points: 1
- criteria: "Check if the agent used the get_area_lat_lon tool and it succeeded"
points: 1
- criteria: "Check if the agent used the driving_hours_to_meters tool to convert the driving hours to meters and it succeeded"
points: 1
- criteria: "Check if the final answer contains any description about the weather at the chosen location"
points: 1
- criteria: "Check if the final answer contains one of the surf spots found by a call of the get_surfing_spots tool"
points: 1
- criteria: "Check that the agent completed in fewer than 10 steps"
points: 1
|