Spaces:
Running
Running
main_agent: | |
model_id: # optional, will prompt for it if not provided | |
tools: | |
- "surf_spot_finder.tools.driving_hours_to_meters" | |
- "surf_spot_finder.tools.get_area_lat_lon" | |
- "surf_spot_finder.tools.get_surfing_spots" | |
- "surf_spot_finder.tools.get_wave_forecast" | |
- "surf_spot_finder.tools.get_wind_forecast" | |
- "any_agent.tools.search_web" | |
- "any_agent.tools.visit_webpage" | |
evaluation_cases: | |
- llm_judge: openai/gpt-4.1-mini | |
checkpoints: | |
- criteria: "Check if the agent used the get_surfing_spots tool and it succeeded, and that the tool was used before the get_wave_forecast and get_wind_forecast tools" | |
points: 1 | |
- criteria: "Check if the agent used the get_wave_forecast tool and it succeeded" | |
points: 1 | |
- criteria: "Check if the agent used the get_wind_forecast tool and it succeeded" | |
points: 1 | |
- criteria: "Check if the agent used the get_area_lat_lon tool and it succeeded" | |
points: 1 | |
- criteria: "Check if the agent used the driving_hours_to_meters tool to convert the driving hours to meters and it succeeded" | |
points: 1 | |
- criteria: "Check if the final answer contains any description about the weather at the chosen location" | |
points: 1 | |
- criteria: "Check if the final answer contains one of the surf spots found by a call of the get_surfing_spots tool" | |
points: 1 | |
- criteria: "Check that the agent completed in fewer than 10 steps" | |
points: 1 | |