Spaces:

mozilla-ai
/

surf-spot-finder

Running

App Files Files Community

Nathan Brake commited on Mar 26

Commit

0b1aa61

unverified ·

1 Parent(s): 6646f61

support for anyagent (#41)

Browse files

Files changed (13) hide show

.gitignore +0 -1
examples/langchain_single_agent_user_confirmation.yaml +15 -12
examples/openai_single_agent_user_confirmation.yaml +14 -11
examples/smolagents_single_agent_user_confirmation.yaml +14 -11
pyproject.toml +0 -2
src/surf_spot_finder/config.py +15 -0
src/surf_spot_finder/evaluation/__init__.py +8 -0
src/surf_spot_finder/evaluation/evaluate.py +34 -21
src/surf_spot_finder/evaluation/telemetry/langchain_telemetry.py +1 -1
src/surf_spot_finder/evaluation/telemetry/openai_telemetry.py +1 -1
src/surf_spot_finder/evaluation/telemetry/smolagents_telemetry.py +1 -1
src/surf_spot_finder/evaluation/telemetry/telemetry.py +2 -1
src/surf_spot_finder/evaluation/test_case.py +9 -7

.gitignore CHANGED Viewed

@@ -168,4 +168,3 @@ cython_debug/
 .vscode/
 output
-telemetry_output


168	.vscode/
169
170	output

examples/langchain_single_agent_user_confirmation.yaml CHANGED Viewed

@@ -1,14 +1,17 @@
-input:
-  location: Pontevedra
-  date: 2025-03-27 12:00
-  max_driving_hours: 2
-  input_prompt_template: |
-    According to the forecast, what will be the best spot to surf around {LOCATION},
-    in a {MAX_DRIVING_HOURS} hour driving radius, at {DATE}?
-    Find a few options and then discuss it with David de la Iglesia Castro. You should recommend him some choices,
-    and then confirm the final selection with him.
-agent:
-  model_id: openai/gpt-4o
   # model_id: ollama/llama3.1:latest
   api_key_var: OPENAI_API_KEY
   agent_type: langchain
@@ -18,4 +21,4 @@ agent:
   - "surf_spot_finder.tools.get_surfing_spots"
   - "surf_spot_finder.tools.get_wave_forecast"
   - "surf_spot_finder.tools.get_wind_forecast"
-  - "any_agent.tools.send_slack_message"

+location: Pontevedra
+date: 2025-03-27 12:00
+max_driving_hours: 2
+input_prompt_template: |
+  According to the forecast, what will be the best spot to surf around {LOCATION},
+  in a {MAX_DRIVING_HOURS} hour driving radius, at {DATE}?
+  Find a few options and then discuss it with David de la Iglesia Castro. You should recommend him some choices,
+  and then confirm the final selection with him.
+framework: langchain
+main_agent:
+  model_id: gpt-4o
   # model_id: ollama/llama3.1:latest
   api_key_var: OPENAI_API_KEY
   agent_type: langchain
   - "surf_spot_finder.tools.get_surfing_spots"
   - "surf_spot_finder.tools.get_wave_forecast"
   - "surf_spot_finder.tools.get_wind_forecast"
+  - "any_agent.tools.send_console_message"

examples/openai_single_agent_user_confirmation.yaml CHANGED Viewed

@@ -1,13 +1,16 @@
-input:
-  location: Pontevedra
-  date: 2025-03-27 12:00
-  max_driving_hours: 2
-  input_prompt_template: |
-    According to the forecast, what will be the best spot to surf around {LOCATION},
-    in a {MAX_DRIVING_HOURS} hour driving radius, at {DATE}?
-    Find a few options and then discuss it with David de la Iglesia Castro. You should recommend him some choices,
-    and then confirm the final selection with him.
-agent:
   model_id: gpt-4o
   api_key_var: OPENAI_API_KEY
   agent_type: openai
@@ -17,4 +20,4 @@ agent:
   - "surf_spot_finder.tools.get_surfing_spots"
   - "surf_spot_finder.tools.get_wave_forecast"
   - "surf_spot_finder.tools.get_wind_forecast"
-  - "any_agent.tools.send_slack_message"

+location: Pontevedra
+date: 2025-03-27 12:00
+max_driving_hours: 2
+input_prompt_template: |
+  According to the forecast, what will be the best spot to surf around {LOCATION},
+  in a {MAX_DRIVING_HOURS} hour driving radius, at {DATE}?
+  Find a few options and then discuss it with David de la Iglesia Castro. You should recommend him some choices,
+  and then confirm the final selection with him.
+framework: openai
+main_agent:
   model_id: gpt-4o
   api_key_var: OPENAI_API_KEY
   agent_type: openai
   - "surf_spot_finder.tools.get_surfing_spots"
   - "surf_spot_finder.tools.get_wave_forecast"
   - "surf_spot_finder.tools.get_wind_forecast"
+  - "any_agent.tools.send_console_message"

examples/smolagents_single_agent_user_confirmation.yaml CHANGED Viewed

@@ -1,13 +1,16 @@
-input:
-  location: Pontevedra
-  date: 2025-03-27 12:00
-  max_driving_hours: 2
-  input_prompt_template: |
-    According to the forecast, what will be the best spot to surf around {LOCATION},
-    in a {MAX_DRIVING_HOURS} hour driving radius, at {DATE}?
-    Find a few options and then discuss it with David de la Iglesia Castro. You should recommend him some choices,
-    and then confirm the final selection with him.
-agent:
   model_id: openai/gpt-4o
   # model_id: ollama/llama3.1:latest
   api_key_var: OPENAI_API_KEY
@@ -18,5 +21,5 @@ agent:
   - "surf_spot_finder.tools.get_surfing_spots"
   - "surf_spot_finder.tools.get_wave_forecast"
   - "surf_spot_finder.tools.get_wind_forecast"
-  - "any_agent.tools.send_slack_message"
   - "smolagents.FinalAnswerTool"

+location: Pontevedra
+date: 2025-03-27 12:00
+max_driving_hours: 2
+input_prompt_template: |
+  According to the forecast, what will be the best spot to surf around {LOCATION},
+  in a {MAX_DRIVING_HOURS} hour driving radius, at {DATE}?
+  Find a few options and then discuss it with David de la Iglesia Castro. You should recommend him some choices,
+  and then confirm the final selection with him.
+framework: smolagents
+main_agent:
   model_id: openai/gpt-4o
   # model_id: ollama/llama3.1:latest
   api_key_var: OPENAI_API_KEY
   - "surf_spot_finder.tools.get_surfing_spots"
   - "surf_spot_finder.tools.get_wave_forecast"
   - "surf_spot_finder.tools.get_wind_forecast"
+  - "any_agent.tools.send_console_message"
   - "smolagents.FinalAnswerTool"

pyproject.toml CHANGED Viewed

@@ -55,5 +55,3 @@ dev = [
 [project.scripts]
 surf-spot-finder = "surf_spot_finder.cli:main"
 surf-spot-finder-evaluate = "surf_spot_finder.evaluation.evaluate:main"
-# TODO maybe this would be lumigator
-start-phoenix = "phoenix.server.main:main"

 [project.scripts]
 surf-spot-finder = "surf_spot_finder.cli:main"
 surf-spot-finder-evaluate = "surf_spot_finder.evaluation.evaluate:main"

src/surf_spot_finder/config.py CHANGED Viewed

@@ -2,6 +2,7 @@ from typing import Annotated
 from any_agent.schema import AgentSchema
 from pydantic import AfterValidator, BaseModel, ConfigDict, FutureDatetime, PositiveInt
 INPUT_PROMPT_TEMPLATE = """
@@ -32,3 +33,17 @@ class Config(BaseModel):
     main_agent: AgentSchema
     managed_agents: list[AgentSchema] | None = None

 from any_agent.schema import AgentSchema
 from pydantic import AfterValidator, BaseModel, ConfigDict, FutureDatetime, PositiveInt
+import yaml
 INPUT_PROMPT_TEMPLATE = """
     main_agent: AgentSchema
     managed_agents: list[AgentSchema] | None = None
+    @classmethod
+    def from_yaml(cls, yaml_path: str) -> "Config":
+        """
+        with open(yaml_path, "r") as f:
+            data = yaml.safe_load(f)
+        return cls(**data)    yaml_path: Path to the YAML configuration file
+        Returns:
+            Config: A new Config instance populated with values from the YAML file
+        """
+        with open(yaml_path, "r") as f:
+            data = yaml.safe_load(f)
+        return cls(**data)

src/surf_spot_finder/evaluation/__init__.py CHANGED Viewed

	@@ -0,0 +1,8 @@

+from enum import Enum
+class AgentType(str, Enum):
+    LANGCHAIN = "langchain"
+    OPENAI = "openai"
+    OPENAI_MULTI_AGENT = "openai_multi_agent"
+    SMOLAGENTS = "smolagents"

src/surf_spot_finder/evaluation/evaluate.py CHANGED Viewed

@@ -6,7 +6,6 @@ from typing import Any, Dict, List, Optional
 from loguru import logger
 from fire import Fire
 import pandas as pd
-from surf_spot_finder.cli import find_surf_spot
 from surf_spot_finder.config import (
     Config,
 )
@@ -17,13 +16,15 @@ from surf_spot_finder.evaluation.evaluators import (
     HypothesisEvaluator,
 )
 from surf_spot_finder.evaluation.test_case import TestCase
 logger.remove()
 logger = logger.opt(ansi=True)
 logger.add(sys.stdout, colorize=True, format="{message}")
-def run_agent(test_case: TestCase, agent_config_path: str) -> str:
     input_data = test_case.input
     logger.info("Loading config")
@@ -31,20 +32,30 @@ def run_agent(test_case: TestCase, agent_config_path: str) -> str:
     config.location = input_data.location
     config.date = input_data.date
     config.max_driving_hours = input_data.max_driving_hours
-    config.json_tracer = input_data.json_tracer
-    return find_surf_spot(
-        location=config.location,
-        date=config.date,
-        max_driving_hours=config.max_driving_hours,
-        agent_type=config.agent_type,
-        model_id=config.model_id,
-        api_key_var=config.api_key_var,
-        json_tracer=config.json_tracer,
-        api_base=config.api_base,
-        tools=config.tools,
-        input_prompt_template=config.input_prompt_template,
     )
 def evaluate_telemetry(test_case: TestCase, telemetry_path: str) -> bool:
     # load the json file
@@ -75,12 +86,14 @@ def evaluate_telemetry(test_case: TestCase, telemetry_path: str) -> bool:
     )
     # Direct answer evaluation (new)
-    direct_evaluator = QuestionAnsweringSquadEvaluator()
-    direct_results = direct_evaluator.evaluate(
-        hypothesis_answer=hypothesis_answer,
-        ground_truth_answer=test_case.ground_truth,
-    )
     # Combine all results
     verification_results = (
         checkpoint_results + hypothesis_answer_results + direct_results
@@ -171,7 +184,7 @@ def evaluate(
         assert (
             agent_config_path is not None
         ), "Agent config path must be provided if running agent"
-        telemetry_path = run_agent(test_case, agent_config_path)
     else:
         logger.info(f"Using provided telemetry file: {telemetry_path}")
         logger.info(

 from loguru import logger
 from fire import Fire
 import pandas as pd
 from surf_spot_finder.config import (
     Config,
 )
     HypothesisEvaluator,
 )
 from surf_spot_finder.evaluation.test_case import TestCase
+from any_agent import load_agent, run_agent
+from any_agent.tracing import get_tracer_provider, setup_tracing
 logger.remove()
 logger = logger.opt(ansi=True)
 logger.add(sys.stdout, colorize=True, format="{message}")
+def run(test_case: TestCase, agent_config_path: str) -> str:
     input_data = test_case.input
     logger.info("Loading config")
     config.location = input_data.location
     config.date = input_data.date
     config.max_driving_hours = input_data.max_driving_hours
+    logger.info("Setting up tracing")
+    tracer_provider, tracing_path = get_tracer_provider(project_name="surf-spot-finder")
+    setup_tracing(tracer_provider, config.framework)
+    logger.info(f"Loading {config.framework} agent")
+    logger.info(f"{config.managed_agents}")
+    agent = load_agent(
+        framework=config.framework,
+        main_agent=config.main_agent,
+        managed_agents=config.managed_agents,
     )
+    query = config.input_prompt_template.format(
+        LOCATION=config.location,
+        MAX_DRIVING_HOURS=config.max_driving_hours,
+        DATE=config.date,
+    )
+    logger.info(f"Running agent with query:\n{query}")
+    run_agent(agent, query)
+    logger.success("Done!")
+    return tracing_path
 def evaluate_telemetry(test_case: TestCase, telemetry_path: str) -> bool:
     # load the json file
     )
     # Direct answer evaluation (new)
+    if test_case.ground_truth:
+        direct_evaluator = QuestionAnsweringSquadEvaluator()
+        direct_results = direct_evaluator.evaluate(
+            hypothesis_answer=hypothesis_answer,
+            ground_truth_answer=test_case.ground_truth,
+        )
+    else:
+        direct_results = []
     # Combine all results
     verification_results = (
         checkpoint_results + hypothesis_answer_results + direct_results
         assert (
             agent_config_path is not None
         ), "Agent config path must be provided if running agent"
+        telemetry_path = run(test_case, agent_config_path)
     else:
         logger.info(f"Using provided telemetry file: {telemetry_path}")
         logger.info(

src/surf_spot_finder/evaluation/telemetry/langchain_telemetry.py CHANGED Viewed

@@ -2,7 +2,7 @@ from typing import Any, Dict, List
 import json
 from langchain_core.messages import BaseMessage
-from surf_spot_finder.agents import AgentType
 from surf_spot_finder.evaluation.telemetry import TelemetryProcessor

 import json
 from langchain_core.messages import BaseMessage
+from surf_spot_finder.evaluation import AgentType
 from surf_spot_finder.evaluation.telemetry import TelemetryProcessor

src/surf_spot_finder/evaluation/telemetry/openai_telemetry.py CHANGED Viewed

@@ -1,7 +1,7 @@
 from typing import Any, Dict, List
 import json
-from surf_spot_finder.agents import AgentType
 from surf_spot_finder.evaluation.telemetry import TelemetryProcessor

 from typing import Any, Dict, List
 import json
+from surf_spot_finder.evaluation import AgentType
 from surf_spot_finder.evaluation.telemetry import TelemetryProcessor

src/surf_spot_finder/evaluation/telemetry/smolagents_telemetry.py CHANGED Viewed

@@ -1,7 +1,7 @@
 from typing import Any, Dict, List
 import json
-from surf_spot_finder.agents import AgentType
 from surf_spot_finder.evaluation.telemetry import TelemetryProcessor

 from typing import Any, Dict, List
 import json
+from surf_spot_finder.evaluation import AgentType
 from surf_spot_finder.evaluation.telemetry import TelemetryProcessor

src/surf_spot_finder/evaluation/telemetry/telemetry.py CHANGED Viewed

@@ -3,7 +3,8 @@ import json
 import re
 from abc import ABC, abstractmethod
 from loguru import logger
-from surf_spot_finder.agents import AgentType
 class TelemetryProcessor(ABC):

 import re
 from abc import ABC, abstractmethod
 from loguru import logger
+from surf_spot_finder.evaluation import AgentType
 class TelemetryProcessor(ABC):

src/surf_spot_finder/evaluation/test_case.py CHANGED Viewed

@@ -11,7 +11,6 @@ class InputModel(BaseModel):
     location: str
     date: str
     max_driving_hours: int
-    json_tracer: bool
 class CheckpointCriteria(BaseModel):
@@ -53,12 +52,15 @@ class TestCase(BaseModel):
                         }
                     )
-        add_gt_final_answer_criteria(test_case_dict["ground_truth"])
-        test_case_dict["final_answer_criteria"] = final_answer_criteria
-        # remove the points from the ground_truth list but keep the name and value
-        test_case_dict["ground_truth"] = [
-            item for item in test_case_dict["ground_truth"] if isinstance(item, dict)
-        ]
         test_case_dict["test_case_path"] = test_case_path
         # verify that the llm_judge is a valid litellm model

     location: str
     date: str
     max_driving_hours: int
 class CheckpointCriteria(BaseModel):
                         }
                     )
+        if "ground_truth" in test_case_dict:
+            add_gt_final_answer_criteria(test_case_dict["ground_truth"])
+            test_case_dict["final_answer_criteria"] = final_answer_criteria
+            # remove the points from the ground_truth list but keep the name and value
+            test_case_dict["ground_truth"] = [
+                item
+                for item in test_case_dict["ground_truth"]
+                if isinstance(item, dict)
+            ]
         test_case_dict["test_case_path"] = test_case_path
         # verify that the llm_judge is a valid litellm model