File size: 1,483 Bytes
1cb4327
ba24588
ffb4e87
 
 
 
 
 
cdd4ebc
 
515cbf5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
main_agent:
  model_id: # optional, will prompt for it if not provided
  tools:
  - "surf_spot_finder.tools.driving_hours_to_meters"
  - "surf_spot_finder.tools.get_area_lat_lon"
  - "surf_spot_finder.tools.get_surfing_spots"
  - "surf_spot_finder.tools.get_wave_forecast"
  - "surf_spot_finder.tools.get_wind_forecast"
  - "any_agent.tools.search_web"
  - "any_agent.tools.visit_webpage"


evaluation_cases:
  - llm_judge: openai/gpt-4.1-mini
    checkpoints:
      - criteria: "Check if the agent used the get_surfing_spots tool and it succeeded, and that the tool was used before the get_wave_forecast and get_wind_forecast tools"
        points: 1
      - criteria: "Check if the agent used the get_wave_forecast tool and it succeeded"
        points: 1
      - criteria: "Check if the agent used the get_wind_forecast tool and it succeeded"
        points: 1
      - criteria: "Check if the agent used the get_area_lat_lon tool and it succeeded"
        points: 1
      - criteria: "Check if the agent used the driving_hours_to_meters tool to convert the driving hours to meters and it succeeded"
        points: 1
      - criteria: "Check if the final answer contains any description about the weather at the chosen location"
        points: 1
      - criteria: "Check if the final answer contains one of the surf spots found by a call of the get_surfing_spots tool"
        points: 1
      - criteria: "Check that the agent completed in fewer than 10 steps"
        points: 1