Spaces:

mozilla-ai
/

surf-spot-finder

Running

App Files Files Community

Nathan Brake commited on Mar 27

Commit

a9fb876

unverified ·

1 Parent(s): 9ff5413

Refactor agent type handling to use AgentFramework enum. Remove agent_type from YAML configurations and update telemetry processing to accommodate the new framework structure. Enhance smolagents configuration with file saving capabilities. (#43)

Browse files

Files changed (11) hide show

examples/langchain_single_agent_user_confirmation.yaml +0 -1
examples/openai_single_agent_user_confirmation.yaml +0 -1
examples/smolagents_single_agent_user_confirmation.yaml +16 -2
src/surf_spot_finder/config.py +2 -1
src/surf_spot_finder/evaluation/__init__.py +0 -8
src/surf_spot_finder/evaluation/evaluate.py +2 -2
src/surf_spot_finder/evaluation/telemetry/langchain_telemetry.py +4 -3
src/surf_spot_finder/evaluation/telemetry/openai_telemetry.py +10 -7
src/surf_spot_finder/evaluation/telemetry/smolagents_telemetry.py +4 -3
src/surf_spot_finder/evaluation/telemetry/telemetry.py +12 -13
src/surf_spot_finder/evaluation/test_cases/alpha.yaml +4 -5

examples/langchain_single_agent_user_confirmation.yaml CHANGED Viewed

@@ -14,7 +14,6 @@ main_agent:
   model_id: gpt-4o
   # model_id: ollama/llama3.1:latest
   api_key_var: OPENAI_API_KEY
-  agent_type: langchain
   tools:
   - "surf_spot_finder.tools.driving_hours_to_meters"
   - "surf_spot_finder.tools.get_area_lat_lon"

   model_id: gpt-4o
   # model_id: ollama/llama3.1:latest
   api_key_var: OPENAI_API_KEY
   tools:
   - "surf_spot_finder.tools.driving_hours_to_meters"
   - "surf_spot_finder.tools.get_area_lat_lon"

examples/openai_single_agent_user_confirmation.yaml CHANGED Viewed

@@ -13,7 +13,6 @@ framework: openai
 main_agent:
   model_id: gpt-4o
   api_key_var: OPENAI_API_KEY
-  agent_type: openai
   tools:
   - "surf_spot_finder.tools.driving_hours_to_meters"
   - "surf_spot_finder.tools.get_area_lat_lon"

 main_agent:
   model_id: gpt-4o
   api_key_var: OPENAI_API_KEY
   tools:
   - "surf_spot_finder.tools.driving_hours_to_meters"
   - "surf_spot_finder.tools.get_area_lat_lon"

examples/smolagents_single_agent_user_confirmation.yaml CHANGED Viewed

@@ -1,4 +1,3 @@
 location: Pontevedra
 date: 2025-03-27 12:00
 max_driving_hours: 2
@@ -7,6 +6,8 @@ input_prompt_template: |
   in a {MAX_DRIVING_HOURS} hour driving radius, at {DATE}?
   Find a few options and then discuss it with David de la Iglesia Castro. You should recommend him some choices,
   and then confirm the final selection with him.
 framework: smolagents
@@ -14,7 +15,6 @@ main_agent:
   model_id: openai/gpt-4o
   # model_id: ollama/llama3.1:latest
   api_key_var: OPENAI_API_KEY
-  agent_type: smolagents
   tools:
   - "surf_spot_finder.tools.driving_hours_to_meters"
   - "surf_spot_finder.tools.get_area_lat_lon"
@@ -23,3 +23,17 @@ main_agent:
   - "surf_spot_finder.tools.get_wind_forecast"
   - "any_agent.tools.send_console_message"
   - "smolagents.FinalAnswerTool"

 location: Pontevedra
 date: 2025-03-27 12:00
 max_driving_hours: 2
   in a {MAX_DRIVING_HOURS} hour driving radius, at {DATE}?
   Find a few options and then discuss it with David de la Iglesia Castro. You should recommend him some choices,
   and then confirm the final selection with him.
+  Once he gives the final selection, save a detailed description of the weather at the chosen location into a file
+  named "final_answer.txt". Also save a file called "history.txt" which has a list of your thought process in the choice.
 framework: smolagents
   model_id: openai/gpt-4o
   # model_id: ollama/llama3.1:latest
   api_key_var: OPENAI_API_KEY
   tools:
   - "surf_spot_finder.tools.driving_hours_to_meters"
   - "surf_spot_finder.tools.get_area_lat_lon"
   - "surf_spot_finder.tools.get_wind_forecast"
   - "any_agent.tools.send_console_message"
   - "smolagents.FinalAnswerTool"
+  - command: "docker"
+    args:
+      - "run"
+      - "-i"
+      - "--rm"
+      - "--mount"
+      - "type=bind,src=/tmp/surf-spot-finder,dst=/projects"
+      - "mcp/filesystem"
+      - "/projects"
+    tools:
+      - "read_file"
+      - "write_file"
+      - "directory_tree"
+      - "list_allowed_directories"

src/surf_spot_finder/config.py CHANGED Viewed

@@ -1,5 +1,6 @@
 from typing import Annotated
 from any_agent.schema import AgentSchema
 from pydantic import AfterValidator, BaseModel, ConfigDict, FutureDatetime, PositiveInt
 import yaml
@@ -29,7 +30,7 @@ class Config(BaseModel):
         INPUT_PROMPT_TEMPLATE
     )
-    framework: str
     main_agent: AgentSchema
     managed_agents: list[AgentSchema] | None = None

 from typing import Annotated
+from any_agent import AgentFramework
 from any_agent.schema import AgentSchema
 from pydantic import AfterValidator, BaseModel, ConfigDict, FutureDatetime, PositiveInt
 import yaml
         INPUT_PROMPT_TEMPLATE
     )
+    framework: AgentFramework
     main_agent: AgentSchema
     managed_agents: list[AgentSchema] | None = None

src/surf_spot_finder/evaluation/__init__.py CHANGED Viewed

@@ -1,8 +0,0 @@
-from enum import Enum
-class AgentType(str, Enum):
-    LANGCHAIN = "langchain"
-    OPENAI = "openai"
-    OPENAI_MULTI_AGENT = "openai_multi_agent"
-    SMOLAGENTS = "smolagents"

src/surf_spot_finder/evaluation/evaluate.py CHANGED Viewed

@@ -63,10 +63,10 @@ def evaluate_telemetry(test_case: TestCase, telemetry_path: str) -> bool:
         telemetry: List[Dict[str, Any]] = json.loads(f.read())
     logger.info(f"Telemetry loaded from {telemetry_path}")
-    agent_type = TelemetryProcessor.determine_agent_type(telemetry)
     # Extract the final answer from the telemetry
-    processor = TelemetryProcessor.create(agent_type)
     hypothesis_answer = processor.extract_hypothesis_answer(trace=telemetry)
     # Checkpoint evaluation

         telemetry: List[Dict[str, Any]] = json.loads(f.read())
     logger.info(f"Telemetry loaded from {telemetry_path}")
+    agent_framework = TelemetryProcessor.determine_agent_framework(telemetry)
     # Extract the final answer from the telemetry
+    processor = TelemetryProcessor.create(agent_framework)
     hypothesis_answer = processor.extract_hypothesis_answer(trace=telemetry)
     # Checkpoint evaluation

src/surf_spot_finder/evaluation/telemetry/langchain_telemetry.py CHANGED Viewed

@@ -1,16 +1,17 @@
 from typing import Any, Dict, List
 import json
 from langchain_core.messages import BaseMessage
-from surf_spot_finder.evaluation import AgentType
 from surf_spot_finder.evaluation.telemetry import TelemetryProcessor
 class LangchainTelemetryProcessor(TelemetryProcessor):
     """Processor for Langchain agent telemetry data."""
-    def _get_agent_type(self) -> AgentType:
-        return AgentType.LANGCHAIN
     def extract_hypothesis_answer(self, trace: List[Dict[str, Any]]) -> str:
         for span in reversed(trace):

 from typing import Any, Dict, List
 import json
+from any_agent import AgentFramework
 from langchain_core.messages import BaseMessage
 from surf_spot_finder.evaluation.telemetry import TelemetryProcessor
 class LangchainTelemetryProcessor(TelemetryProcessor):
     """Processor for Langchain agent telemetry data."""
+    def _get_agent_framework(self) -> AgentFramework:
+        return AgentFramework.LANGCHAIN
     def extract_hypothesis_answer(self, trace: List[Dict[str, Any]]) -> str:
         for span in reversed(trace):

src/surf_spot_finder/evaluation/telemetry/openai_telemetry.py CHANGED Viewed

@@ -1,15 +1,16 @@
 from typing import Any, Dict, List
 import json
-from surf_spot_finder.evaluation import AgentType
 from surf_spot_finder.evaluation.telemetry import TelemetryProcessor
 class OpenAITelemetryProcessor(TelemetryProcessor):
     """Processor for OpenAI agent telemetry data."""
-    def _get_agent_type(self) -> AgentType:
-        return AgentType.OPENAI
     def extract_hypothesis_answer(self, trace: List[Dict[str, Any]]) -> str:
         for span in reversed(trace):
@@ -82,10 +83,10 @@ class OpenAITelemetryProcessor(TelemetryProcessor):
 # Backward compatibility functions that use the new class structure
 def extract_hypothesis_answer(
-    trace: List[Dict[str, Any]], agent_type: AgentType
 ) -> str:
     """Extract the hypothesis agent final answer from the trace"""
-    processor = TelemetryProcessor.create(agent_type)
     return processor.extract_hypothesis_answer(trace)
@@ -97,7 +98,9 @@ def parse_generic_key_value_string(text: str) -> Dict[str, str]:
     return TelemetryProcessor.parse_generic_key_value_string(text)
-def extract_evidence(telemetry: List[Dict[str, Any]], agent_type: AgentType) -> str:
     """Extract relevant telemetry evidence based on the agent type."""
-    processor = TelemetryProcessor.create(agent_type)
     return processor.extract_evidence(telemetry)

 from typing import Any, Dict, List
 import json
+from any_agent import AgentFramework
 from surf_spot_finder.evaluation.telemetry import TelemetryProcessor
 class OpenAITelemetryProcessor(TelemetryProcessor):
     """Processor for OpenAI agent telemetry data."""
+    def _get_agent_framework(self) -> AgentFramework:
+        return AgentFramework.OPENAI
     def extract_hypothesis_answer(self, trace: List[Dict[str, Any]]) -> str:
         for span in reversed(trace):
 # Backward compatibility functions that use the new class structure
 def extract_hypothesis_answer(
+    trace: List[Dict[str, Any]], agent_framework: AgentFramework
 ) -> str:
     """Extract the hypothesis agent final answer from the trace"""
+    processor = TelemetryProcessor.create(agent_framework)
     return processor.extract_hypothesis_answer(trace)
     return TelemetryProcessor.parse_generic_key_value_string(text)
+def extract_evidence(
+    telemetry: List[Dict[str, Any]], agent_framework: AgentFramework
+) -> str:
     """Extract relevant telemetry evidence based on the agent type."""
+    processor = TelemetryProcessor.create(agent_framework)
     return processor.extract_evidence(telemetry)

src/surf_spot_finder/evaluation/telemetry/smolagents_telemetry.py CHANGED Viewed

@@ -1,15 +1,16 @@
 from typing import Any, Dict, List
 import json
-from surf_spot_finder.evaluation import AgentType
 from surf_spot_finder.evaluation.telemetry import TelemetryProcessor
 class SmolagentsTelemetryProcessor(TelemetryProcessor):
     """Processor for SmoL Agents telemetry data."""
-    def _get_agent_type(self) -> AgentType:
-        return AgentType.SMOLAGENTS
     def extract_hypothesis_answer(self, trace: List[Dict[str, Any]]) -> str:
         for span in reversed(trace):

 from typing import Any, Dict, List
 import json
+from any_agent import AgentFramework
 from surf_spot_finder.evaluation.telemetry import TelemetryProcessor
 class SmolagentsTelemetryProcessor(TelemetryProcessor):
     """Processor for SmoL Agents telemetry data."""
+    def _get_agent_framework(self) -> AgentFramework:
+        return AgentFramework.SMOLAGENTS
     def extract_hypothesis_answer(self, trace: List[Dict[str, Any]]) -> str:
         for span in reversed(trace):

src/surf_spot_finder/evaluation/telemetry/telemetry.py CHANGED Viewed

@@ -2,10 +2,9 @@ from typing import Any, Dict, List, ClassVar
 import json
 import re
 from abc import ABC, abstractmethod
 from loguru import logger
-from surf_spot_finder.evaluation import AgentType
 class TelemetryProcessor(ABC):
     """Base class for processing telemetry data from different agent types."""
@@ -13,31 +12,31 @@ class TelemetryProcessor(ABC):
     MAX_EVIDENCE_LENGTH: ClassVar[int] = 400
     @classmethod
-    def create(cls, agent_type: AgentType) -> "TelemetryProcessor":
         """Factory method to create the appropriate telemetry processor."""
-        if agent_type == AgentType.LANGCHAIN:
             from surf_spot_finder.evaluation.telemetry.langchain_telemetry import (
                 LangchainTelemetryProcessor,
             )
             return LangchainTelemetryProcessor()
-        elif agent_type == AgentType.SMOLAGENTS:
             from surf_spot_finder.evaluation.telemetry.smolagents_telemetry import (
                 SmolagentsTelemetryProcessor,
             )
             return SmolagentsTelemetryProcessor()
-        elif agent_type == AgentType.OPENAI:
             from surf_spot_finder.evaluation.telemetry.openai_telemetry import (
                 OpenAITelemetryProcessor,
             )
             return OpenAITelemetryProcessor()
         else:
-            raise ValueError(f"Unsupported agent type {agent_type}")
     @staticmethod
-    def determine_agent_type(trace: List[Dict[str, Any]]) -> AgentType:
         """Determine the agent type based on the trace.
         These are not really stable ways to find it, because we're waiting on some
         reliable method for determining the agent type. This is a temporary solution.
@@ -45,15 +44,15 @@ class TelemetryProcessor(ABC):
         for span in trace:
             if "langchain" in span.get("attributes", {}).get("input.value", ""):
                 logger.info("Agent type is LANGCHAIN")
-                return AgentType.LANGCHAIN
             if span.get("attributes", {}).get("smolagents.max_steps"):
                 logger.info("Agent type is SMOLAGENTS")
-                return AgentType.SMOLAGENTS
             # This is extremely fragile but there currently isn't
             # any specific key to indicate the agent type
             if span.get("name") == "response":
                 logger.info("Agent type is OPENAI")
-                return AgentType.OPENAI
         raise ValueError(
             "Could not determine agent type from trace, or agent type not supported"
         )
@@ -75,7 +74,7 @@ class TelemetryProcessor(ABC):
     def _format_evidence(self, calls: List[Dict]) -> str:
         """Format extracted data into a standardized output format."""
-        evidence = f"## {self._get_agent_type().name} Agent Execution\n\n"
         for idx, call in enumerate(calls, start=1):
             evidence += f"### Call {idx}\n"
@@ -96,7 +95,7 @@ class TelemetryProcessor(ABC):
         return evidence
     @abstractmethod
-    def _get_agent_type(self) -> AgentType:
         """Get the agent type associated with this processor."""
         pass

 import json
 import re
 from abc import ABC, abstractmethod
+from any_agent import AgentFramework
 from loguru import logger
 class TelemetryProcessor(ABC):
     """Base class for processing telemetry data from different agent types."""
     MAX_EVIDENCE_LENGTH: ClassVar[int] = 400
     @classmethod
+    def create(cls, agent_framework: AgentFramework) -> "TelemetryProcessor":
         """Factory method to create the appropriate telemetry processor."""
+        if agent_framework == AgentFramework.LANGCHAIN:
             from surf_spot_finder.evaluation.telemetry.langchain_telemetry import (
                 LangchainTelemetryProcessor,
             )
             return LangchainTelemetryProcessor()
+        elif agent_framework == AgentFramework.SMOLAGENTS:
             from surf_spot_finder.evaluation.telemetry.smolagents_telemetry import (
                 SmolagentsTelemetryProcessor,
             )
             return SmolagentsTelemetryProcessor()
+        elif agent_framework == AgentFramework.OPENAI:
             from surf_spot_finder.evaluation.telemetry.openai_telemetry import (
                 OpenAITelemetryProcessor,
             )
             return OpenAITelemetryProcessor()
         else:
+            raise ValueError(f"Unsupported agent type {agent_framework}")
     @staticmethod
+    def determine_agent_framework(trace: List[Dict[str, Any]]) -> AgentFramework:
         """Determine the agent type based on the trace.
         These are not really stable ways to find it, because we're waiting on some
         reliable method for determining the agent type. This is a temporary solution.
         for span in trace:
             if "langchain" in span.get("attributes", {}).get("input.value", ""):
                 logger.info("Agent type is LANGCHAIN")
+                return AgentFramework.LANGCHAIN
             if span.get("attributes", {}).get("smolagents.max_steps"):
                 logger.info("Agent type is SMOLAGENTS")
+                return AgentFramework.SMOLAGENTS
             # This is extremely fragile but there currently isn't
             # any specific key to indicate the agent type
             if span.get("name") == "response":
                 logger.info("Agent type is OPENAI")
+                return AgentFramework.OPENAI
         raise ValueError(
             "Could not determine agent type from trace, or agent type not supported"
         )
     def _format_evidence(self, calls: List[Dict]) -> str:
         """Format extracted data into a standardized output format."""
+        evidence = f"## {self._get_agent_framework().name} Agent Execution\n\n"
         for idx, call in enumerate(calls, start=1):
             evidence += f"### Call {idx}\n"
         return evidence
     @abstractmethod
+    def _get_agent_framework(self) -> AgentFramework:
         """Get the agent type associated with this processor."""
         pass

src/surf_spot_finder/evaluation/test_cases/alpha.yaml CHANGED Viewed

@@ -6,13 +6,12 @@ input:
   location: "Vigo"
   date: "2025-03-27 22:00"
   max_driving_hours: 3
-  json_tracer: true
-ground_truth:
-  - name: "Surf location"
-    points: 5
-    value: "Playa de Samil"
 # Base checkpoints for agent behavior
 # These evaluators for these checkpoints

   location: "Vigo"
   date: "2025-03-27 22:00"
   max_driving_hours: 3
+# ground_truth:
+#   - name: "Surf location"
+#     points: 5
+#     value: "Playa de Samil"
 # Base checkpoints for agent behavior
 # These evaluators for these checkpoints