Spaces:
Running
Running
Nathan Brake
commited on
telemetry (#48)
Browse files- pyproject.toml +4 -5
- src/surf_spot_finder/evaluation/evaluate.py +8 -6
- src/surf_spot_finder/evaluation/telemetry/__init__.py +0 -3
- src/surf_spot_finder/evaluation/telemetry/langchain_telemetry.py +0 -88
- src/surf_spot_finder/evaluation/telemetry/openai_telemetry.py +0 -106
- src/surf_spot_finder/evaluation/telemetry/smolagents_telemetry.py +0 -104
- src/surf_spot_finder/evaluation/telemetry/telemetry.py +0 -125
pyproject.toml
CHANGED
@@ -35,6 +35,10 @@ tests = [
|
|
35 |
"evaluate>=0.4.3",
|
36 |
]
|
37 |
|
|
|
|
|
|
|
|
|
38 |
[project.urls]
|
39 |
Documentation = "https://mozilla-ai.github.io/surf-spot-finder/"
|
40 |
Issues = "https://github.com/mozilla-ai/surf-spot-finder/issues"
|
@@ -47,11 +51,6 @@ namespaces = false
|
|
47 |
|
48 |
[tool.setuptools_scm]
|
49 |
|
50 |
-
[dependency-groups]
|
51 |
-
dev = [
|
52 |
-
"pre-commit>=4.1.0",
|
53 |
-
]
|
54 |
-
|
55 |
[project.scripts]
|
56 |
surf-spot-finder = "surf_spot_finder.cli:main"
|
57 |
surf-spot-finder-no-framework = "surf_spot_finder.no_framework:main"
|
|
|
35 |
"evaluate>=0.4.3",
|
36 |
]
|
37 |
|
38 |
+
dev = [
|
39 |
+
"pre-commit>=4.1.0",
|
40 |
+
]
|
41 |
+
|
42 |
[project.urls]
|
43 |
Documentation = "https://mozilla-ai.github.io/surf-spot-finder/"
|
44 |
Issues = "https://github.com/mozilla-ai/surf-spot-finder/issues"
|
|
|
51 |
|
52 |
[tool.setuptools_scm]
|
53 |
|
|
|
|
|
|
|
|
|
|
|
54 |
[project.scripts]
|
55 |
surf-spot-finder = "surf_spot_finder.cli:main"
|
56 |
surf-spot-finder-no-framework = "surf_spot_finder.no_framework:main"
|
src/surf_spot_finder/evaluation/evaluate.py
CHANGED
@@ -3,21 +3,23 @@ import os
|
|
3 |
import sys
|
4 |
from textwrap import dedent
|
5 |
from typing import Any, Dict, List, Optional
|
6 |
-
|
7 |
-
from fire import Fire
|
8 |
import pandas as pd
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
from surf_spot_finder.config import (
|
10 |
Config,
|
11 |
)
|
12 |
-
from surf_spot_finder.evaluation.telemetry import TelemetryProcessor
|
13 |
from surf_spot_finder.evaluation.evaluators import (
|
14 |
CheckpointEvaluator,
|
15 |
-
QuestionAnsweringSquadEvaluator,
|
16 |
HypothesisEvaluator,
|
|
|
17 |
)
|
18 |
from surf_spot_finder.evaluation.test_case import TestCase
|
19 |
-
from any_agent import AnyAgent
|
20 |
-
from any_agent.tracing import get_tracer_provider, setup_tracing
|
21 |
|
22 |
logger.remove()
|
23 |
logger = logger.opt(ansi=True)
|
|
|
3 |
import sys
|
4 |
from textwrap import dedent
|
5 |
from typing import Any, Dict, List, Optional
|
6 |
+
|
|
|
7 |
import pandas as pd
|
8 |
+
from any_agent import AnyAgent
|
9 |
+
from any_agent.telemetry import TelemetryProcessor
|
10 |
+
from any_agent.tracing import get_tracer_provider, setup_tracing
|
11 |
+
from fire import Fire
|
12 |
+
from loguru import logger
|
13 |
+
|
14 |
from surf_spot_finder.config import (
|
15 |
Config,
|
16 |
)
|
|
|
17 |
from surf_spot_finder.evaluation.evaluators import (
|
18 |
CheckpointEvaluator,
|
|
|
19 |
HypothesisEvaluator,
|
20 |
+
QuestionAnsweringSquadEvaluator,
|
21 |
)
|
22 |
from surf_spot_finder.evaluation.test_case import TestCase
|
|
|
|
|
23 |
|
24 |
logger.remove()
|
25 |
logger = logger.opt(ansi=True)
|
src/surf_spot_finder/evaluation/telemetry/__init__.py
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
from .telemetry import TelemetryProcessor
|
2 |
-
|
3 |
-
__all__ = ["TelemetryProcessor"]
|
|
|
|
|
|
|
|
src/surf_spot_finder/evaluation/telemetry/langchain_telemetry.py
DELETED
@@ -1,88 +0,0 @@
|
|
1 |
-
from typing import Any, Dict, List
|
2 |
-
import json
|
3 |
-
from any_agent import AgentFramework
|
4 |
-
from langchain_core.messages import BaseMessage
|
5 |
-
|
6 |
-
|
7 |
-
from surf_spot_finder.evaluation.telemetry import TelemetryProcessor
|
8 |
-
|
9 |
-
|
10 |
-
class LangchainTelemetryProcessor(TelemetryProcessor):
|
11 |
-
"""Processor for Langchain agent telemetry data."""
|
12 |
-
|
13 |
-
def _get_agent_framework(self) -> AgentFramework:
|
14 |
-
return AgentFramework.LANGCHAIN
|
15 |
-
|
16 |
-
def extract_hypothesis_answer(self, trace: List[Dict[str, Any]]) -> str:
|
17 |
-
for span in reversed(trace):
|
18 |
-
if span["attributes"]["openinference.span.kind"] == "AGENT":
|
19 |
-
content = span["attributes"]["output.value"]
|
20 |
-
# Extract content from serialized langchain message
|
21 |
-
message = json.loads(content)["messages"][0]
|
22 |
-
message = self.parse_generic_key_value_string(message)
|
23 |
-
base_message = BaseMessage(content=message["content"], type="AGENT")
|
24 |
-
# Use the interpreted string for printing
|
25 |
-
final_text = base_message.text()
|
26 |
-
# Either decode escape sequences if they're present
|
27 |
-
try:
|
28 |
-
final_text = final_text.encode().decode("unicode_escape")
|
29 |
-
except UnicodeDecodeError:
|
30 |
-
# If that fails, the escape sequences might already be interpreted
|
31 |
-
pass
|
32 |
-
return final_text
|
33 |
-
|
34 |
-
raise ValueError("No agent final answer found in trace")
|
35 |
-
|
36 |
-
def _extract_telemetry_data(self, telemetry: List[Dict[str, Any]]) -> List[Dict]:
|
37 |
-
"""Extract LLM calls and tool calls from LangChain telemetry."""
|
38 |
-
calls = []
|
39 |
-
|
40 |
-
for span in telemetry:
|
41 |
-
if "attributes" not in span:
|
42 |
-
continue
|
43 |
-
|
44 |
-
attributes = span.get("attributes", {})
|
45 |
-
span_kind = attributes.get("openinference.span.kind", "")
|
46 |
-
|
47 |
-
# Collect LLM calls
|
48 |
-
if (
|
49 |
-
span_kind == "LLM"
|
50 |
-
and "llm.output_messages.0.message.content" in attributes
|
51 |
-
):
|
52 |
-
llm_info = {
|
53 |
-
"model": attributes.get("llm.model_name", "Unknown model"),
|
54 |
-
"input": attributes.get("llm.input_messages.0.message.content", ""),
|
55 |
-
"output": attributes.get(
|
56 |
-
"llm.output_messages.0.message.content", ""
|
57 |
-
),
|
58 |
-
"type": "reasoning",
|
59 |
-
}
|
60 |
-
calls.append(llm_info)
|
61 |
-
|
62 |
-
# Try to find tool calls
|
63 |
-
if "tool.name" in attributes or span.get("name", "").endswith("Tool"):
|
64 |
-
tool_info = {
|
65 |
-
"tool_name": attributes.get(
|
66 |
-
"tool.name", span.get("name", "Unknown tool")
|
67 |
-
),
|
68 |
-
"status": "success"
|
69 |
-
if span.get("status", {}).get("status_code") == "OK"
|
70 |
-
else "error",
|
71 |
-
"error": span.get("status", {}).get("description", None),
|
72 |
-
}
|
73 |
-
|
74 |
-
if "input.value" in attributes:
|
75 |
-
try:
|
76 |
-
input_value = json.loads(attributes["input.value"])
|
77 |
-
tool_info["input"] = input_value
|
78 |
-
except Exception:
|
79 |
-
tool_info["input"] = attributes["input.value"]
|
80 |
-
|
81 |
-
if "output.value" in attributes:
|
82 |
-
tool_info["output"] = self.parse_generic_key_value_string(
|
83 |
-
json.loads(attributes["output.value"])["output"]
|
84 |
-
)["content"]
|
85 |
-
|
86 |
-
calls.append(tool_info)
|
87 |
-
|
88 |
-
return calls
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/surf_spot_finder/evaluation/telemetry/openai_telemetry.py
DELETED
@@ -1,106 +0,0 @@
|
|
1 |
-
from typing import Any, Dict, List
|
2 |
-
import json
|
3 |
-
|
4 |
-
from any_agent import AgentFramework
|
5 |
-
from loguru import logger
|
6 |
-
from surf_spot_finder.evaluation.telemetry import TelemetryProcessor
|
7 |
-
|
8 |
-
|
9 |
-
class OpenAITelemetryProcessor(TelemetryProcessor):
|
10 |
-
"""Processor for OpenAI agent telemetry data."""
|
11 |
-
|
12 |
-
def _get_agent_framework(self) -> AgentFramework:
|
13 |
-
return AgentFramework.OPENAI
|
14 |
-
|
15 |
-
def extract_hypothesis_answer(self, trace: List[Dict[str, Any]]) -> str:
|
16 |
-
for span in reversed(trace):
|
17 |
-
# Looking for the final response that has the summary answer
|
18 |
-
if (
|
19 |
-
"attributes" in span
|
20 |
-
and span.get("attributes", {}).get("openinference.span.kind") == "LLM"
|
21 |
-
):
|
22 |
-
output_key = (
|
23 |
-
"llm.output_messages.0.message.contents.0.message_content.text"
|
24 |
-
)
|
25 |
-
if output_key in span["attributes"]:
|
26 |
-
return span["attributes"][output_key]
|
27 |
-
logger.warning("No agent final answer found in trace")
|
28 |
-
return "NO FINAL ANSWER FOUND"
|
29 |
-
|
30 |
-
def _extract_telemetry_data(self, telemetry: List[Dict[str, Any]]) -> list:
|
31 |
-
"""Extract LLM calls and tool calls from OpenAI telemetry."""
|
32 |
-
calls = []
|
33 |
-
|
34 |
-
for span in telemetry:
|
35 |
-
if "attributes" not in span:
|
36 |
-
continue
|
37 |
-
|
38 |
-
attributes = span.get("attributes", {})
|
39 |
-
span_kind = attributes.get("openinference.span.kind", "")
|
40 |
-
|
41 |
-
# Collect LLM interactions - look for direct message content first
|
42 |
-
if span_kind == "LLM":
|
43 |
-
# Initialize the LLM info dictionary
|
44 |
-
span_info = {}
|
45 |
-
|
46 |
-
# Try to get input message
|
47 |
-
input_key = "llm.input_messages.1.message.content" # User message is usually at index 1
|
48 |
-
if input_key in attributes:
|
49 |
-
span_info["input"] = attributes[input_key]
|
50 |
-
|
51 |
-
# Try to get output message directly
|
52 |
-
output_content = None
|
53 |
-
# Try in multiple possible locations
|
54 |
-
for key in [
|
55 |
-
"llm.output_messages.0.message.content",
|
56 |
-
"llm.output_messages.0.message.contents.0.message_content.text",
|
57 |
-
]:
|
58 |
-
if key in attributes:
|
59 |
-
output_content = attributes[key]
|
60 |
-
break
|
61 |
-
|
62 |
-
# If we found direct output content, use it
|
63 |
-
if output_content:
|
64 |
-
span_info["output"] = output_content
|
65 |
-
calls.append(span_info)
|
66 |
-
elif span_kind == "TOOL":
|
67 |
-
tool_name = attributes.get("tool.name", "Unknown tool")
|
68 |
-
tool_output = attributes.get("output.value", "")
|
69 |
-
|
70 |
-
span_info = {
|
71 |
-
"tool_name": tool_name,
|
72 |
-
"input": attributes.get("input.value", ""),
|
73 |
-
"output": tool_output,
|
74 |
-
# Can't add status yet because it isn't being set by openinference
|
75 |
-
# "status": span.get("status", {}).get("status_code"),
|
76 |
-
}
|
77 |
-
span_info["input"] = json.loads(span_info["input"])
|
78 |
-
|
79 |
-
calls.append(span_info)
|
80 |
-
|
81 |
-
return calls
|
82 |
-
|
83 |
-
|
84 |
-
# Backward compatibility functions that use the new class structure
|
85 |
-
def extract_hypothesis_answer(
|
86 |
-
trace: List[Dict[str, Any]], agent_framework: AgentFramework
|
87 |
-
) -> str:
|
88 |
-
"""Extract the hypothesis agent final answer from the trace"""
|
89 |
-
processor = TelemetryProcessor.create(agent_framework)
|
90 |
-
return processor.extract_hypothesis_answer(trace)
|
91 |
-
|
92 |
-
|
93 |
-
def parse_generic_key_value_string(text: str) -> Dict[str, str]:
|
94 |
-
"""
|
95 |
-
Parse a string that has items of a dict with key-value pairs separated by '='.
|
96 |
-
Only splits on '=' signs, handling quoted strings properly.
|
97 |
-
"""
|
98 |
-
return TelemetryProcessor.parse_generic_key_value_string(text)
|
99 |
-
|
100 |
-
|
101 |
-
def extract_evidence(
|
102 |
-
telemetry: List[Dict[str, Any]], agent_framework: AgentFramework
|
103 |
-
) -> str:
|
104 |
-
"""Extract relevant telemetry evidence based on the agent type."""
|
105 |
-
processor = TelemetryProcessor.create(agent_framework)
|
106 |
-
return processor.extract_evidence(telemetry)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/surf_spot_finder/evaluation/telemetry/smolagents_telemetry.py
DELETED
@@ -1,104 +0,0 @@
|
|
1 |
-
from typing import Any, Dict, List
|
2 |
-
import json
|
3 |
-
|
4 |
-
from any_agent import AgentFramework
|
5 |
-
|
6 |
-
from surf_spot_finder.evaluation.telemetry import TelemetryProcessor
|
7 |
-
|
8 |
-
|
9 |
-
class SmolagentsTelemetryProcessor(TelemetryProcessor):
|
10 |
-
"""Processor for SmoL Agents telemetry data."""
|
11 |
-
|
12 |
-
def _get_agent_framework(self) -> AgentFramework:
|
13 |
-
return AgentFramework.SMOLAGENTS
|
14 |
-
|
15 |
-
def extract_hypothesis_answer(self, trace: List[Dict[str, Any]]) -> str:
|
16 |
-
for span in reversed(trace):
|
17 |
-
if span["attributes"]["openinference.span.kind"] == "AGENT":
|
18 |
-
content = span["attributes"]["output.value"]
|
19 |
-
return content
|
20 |
-
|
21 |
-
raise ValueError("No agent final answer found in trace")
|
22 |
-
|
23 |
-
def _extract_telemetry_data(self, telemetry: List[Dict[str, Any]]) -> List[Dict]:
|
24 |
-
"""Extract LLM calls and tool calls from SmoL Agents telemetry."""
|
25 |
-
calls = []
|
26 |
-
|
27 |
-
for span in telemetry:
|
28 |
-
# Skip spans without attributes
|
29 |
-
if "attributes" not in span:
|
30 |
-
continue
|
31 |
-
|
32 |
-
attributes = span["attributes"]
|
33 |
-
|
34 |
-
# Extract tool information
|
35 |
-
if "tool.name" in attributes or span.get("name", "").startswith(
|
36 |
-
"SimpleTool"
|
37 |
-
):
|
38 |
-
tool_info = {
|
39 |
-
"tool_name": attributes.get(
|
40 |
-
"tool.name", span.get("name", "Unknown tool")
|
41 |
-
),
|
42 |
-
"status": "success"
|
43 |
-
if span.get("status", {}).get("status_code") == "OK"
|
44 |
-
else "error",
|
45 |
-
"error": span.get("status", {}).get("description", None),
|
46 |
-
}
|
47 |
-
|
48 |
-
# Extract input if available
|
49 |
-
if "input.value" in attributes:
|
50 |
-
try:
|
51 |
-
input_value = json.loads(attributes["input.value"])
|
52 |
-
if "kwargs" in input_value:
|
53 |
-
# For SmoLAgents, the actual input is often in the kwargs field
|
54 |
-
tool_info["input"] = input_value["kwargs"]
|
55 |
-
else:
|
56 |
-
tool_info["input"] = input_value
|
57 |
-
except (json.JSONDecodeError, TypeError):
|
58 |
-
tool_info["input"] = attributes["input.value"]
|
59 |
-
|
60 |
-
# Extract output if available
|
61 |
-
if "output.value" in attributes:
|
62 |
-
try:
|
63 |
-
# Try to parse JSON output
|
64 |
-
output_value = (
|
65 |
-
json.loads(attributes["output.value"])
|
66 |
-
if isinstance(attributes["output.value"], str)
|
67 |
-
else attributes["output.value"]
|
68 |
-
)
|
69 |
-
tool_info["output"] = output_value
|
70 |
-
except (json.JSONDecodeError, TypeError):
|
71 |
-
tool_info["output"] = attributes["output.value"]
|
72 |
-
else:
|
73 |
-
tool_info["output"] = "No output found"
|
74 |
-
|
75 |
-
calls.append(tool_info)
|
76 |
-
|
77 |
-
# Extract LLM calls to see reasoning
|
78 |
-
elif "LiteLLMModel.__call__" in span.get("name", ""):
|
79 |
-
# The LLM output may be in different places depending on the implementation
|
80 |
-
output_content = None
|
81 |
-
|
82 |
-
# Try to get the output from the llm.output_messages.0.message.content attribute
|
83 |
-
if "llm.output_messages.0.message.content" in attributes:
|
84 |
-
output_content = attributes["llm.output_messages.0.message.content"]
|
85 |
-
|
86 |
-
# Or try to parse it from the output.value as JSON
|
87 |
-
elif "output.value" in attributes:
|
88 |
-
try:
|
89 |
-
output_value = json.loads(attributes["output.value"])
|
90 |
-
if "content" in output_value:
|
91 |
-
output_content = output_value["content"]
|
92 |
-
except (json.JSONDecodeError, TypeError):
|
93 |
-
pass
|
94 |
-
|
95 |
-
if output_content:
|
96 |
-
calls.append(
|
97 |
-
{
|
98 |
-
"model": attributes.get("llm.model_name", "Unknown model"),
|
99 |
-
"output": output_content,
|
100 |
-
"type": "reasoning",
|
101 |
-
}
|
102 |
-
)
|
103 |
-
|
104 |
-
return calls
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/surf_spot_finder/evaluation/telemetry/telemetry.py
DELETED
@@ -1,125 +0,0 @@
|
|
1 |
-
from typing import Any, Dict, List, ClassVar
|
2 |
-
import json
|
3 |
-
import re
|
4 |
-
from abc import ABC, abstractmethod
|
5 |
-
from any_agent import AgentFramework
|
6 |
-
from loguru import logger
|
7 |
-
|
8 |
-
|
9 |
-
class TelemetryProcessor(ABC):
|
10 |
-
"""Base class for processing telemetry data from different agent types."""
|
11 |
-
|
12 |
-
MAX_EVIDENCE_LENGTH: ClassVar[int] = 400
|
13 |
-
|
14 |
-
@classmethod
|
15 |
-
def create(cls, agent_framework: AgentFramework) -> "TelemetryProcessor":
|
16 |
-
"""Factory method to create the appropriate telemetry processor."""
|
17 |
-
if agent_framework == AgentFramework.LANGCHAIN:
|
18 |
-
from surf_spot_finder.evaluation.telemetry.langchain_telemetry import (
|
19 |
-
LangchainTelemetryProcessor,
|
20 |
-
)
|
21 |
-
|
22 |
-
return LangchainTelemetryProcessor()
|
23 |
-
elif agent_framework == AgentFramework.SMOLAGENTS:
|
24 |
-
from surf_spot_finder.evaluation.telemetry.smolagents_telemetry import (
|
25 |
-
SmolagentsTelemetryProcessor,
|
26 |
-
)
|
27 |
-
|
28 |
-
return SmolagentsTelemetryProcessor()
|
29 |
-
elif agent_framework == AgentFramework.OPENAI:
|
30 |
-
from surf_spot_finder.evaluation.telemetry.openai_telemetry import (
|
31 |
-
OpenAITelemetryProcessor,
|
32 |
-
)
|
33 |
-
|
34 |
-
return OpenAITelemetryProcessor()
|
35 |
-
else:
|
36 |
-
raise ValueError(f"Unsupported agent type {agent_framework}")
|
37 |
-
|
38 |
-
@staticmethod
|
39 |
-
def determine_agent_framework(trace: List[Dict[str, Any]]) -> AgentFramework:
|
40 |
-
"""Determine the agent type based on the trace.
|
41 |
-
These are not really stable ways to find it, because we're waiting on some
|
42 |
-
reliable method for determining the agent type. This is a temporary solution.
|
43 |
-
"""
|
44 |
-
for span in trace:
|
45 |
-
if "langchain" in span.get("attributes", {}).get("input.value", ""):
|
46 |
-
logger.info("Agent type is LANGCHAIN")
|
47 |
-
return AgentFramework.LANGCHAIN
|
48 |
-
if span.get("attributes", {}).get("smolagents.max_steps"):
|
49 |
-
logger.info("Agent type is SMOLAGENTS")
|
50 |
-
return AgentFramework.SMOLAGENTS
|
51 |
-
# This is extremely fragile but there currently isn't
|
52 |
-
# any specific key to indicate the agent type
|
53 |
-
if span.get("name") == "response":
|
54 |
-
logger.info("Agent type is OPENAI")
|
55 |
-
return AgentFramework.OPENAI
|
56 |
-
raise ValueError(
|
57 |
-
"Could not determine agent type from trace, or agent type not supported"
|
58 |
-
)
|
59 |
-
|
60 |
-
@abstractmethod
|
61 |
-
def extract_hypothesis_answer(self, trace: List[Dict[str, Any]]) -> str:
|
62 |
-
"""Extract the hypothesis agent final answer from the trace."""
|
63 |
-
pass
|
64 |
-
|
65 |
-
@abstractmethod
|
66 |
-
def _extract_telemetry_data(self, telemetry: List[Dict[str, Any]]) -> List[Dict]:
|
67 |
-
"""Extract the agent-specific data from telemetry."""
|
68 |
-
pass
|
69 |
-
|
70 |
-
def extract_evidence(self, telemetry: List[Dict[str, Any]]) -> str:
|
71 |
-
"""Extract relevant telemetry evidence."""
|
72 |
-
calls = self._extract_telemetry_data(telemetry)
|
73 |
-
return self._format_evidence(calls)
|
74 |
-
|
75 |
-
def _format_evidence(self, calls: List[Dict]) -> str:
|
76 |
-
"""Format extracted data into a standardized output format."""
|
77 |
-
evidence = f"## {self._get_agent_framework().name} Agent Execution\n\n"
|
78 |
-
|
79 |
-
for idx, call in enumerate(calls, start=1):
|
80 |
-
evidence += f"### Call {idx}\n"
|
81 |
-
|
82 |
-
# Truncate any values that are too long
|
83 |
-
call = {
|
84 |
-
k: (
|
85 |
-
v[: self.MAX_EVIDENCE_LENGTH] + "..."
|
86 |
-
if isinstance(v, str) and len(v) > self.MAX_EVIDENCE_LENGTH
|
87 |
-
else v
|
88 |
-
)
|
89 |
-
for k, v in call.items()
|
90 |
-
}
|
91 |
-
|
92 |
-
# Use ensure_ascii=False to prevent escaping Unicode characters
|
93 |
-
evidence += json.dumps(call, indent=2, ensure_ascii=False) + "\n\n"
|
94 |
-
|
95 |
-
return evidence
|
96 |
-
|
97 |
-
@abstractmethod
|
98 |
-
def _get_agent_framework(self) -> AgentFramework:
|
99 |
-
"""Get the agent type associated with this processor."""
|
100 |
-
pass
|
101 |
-
|
102 |
-
@staticmethod
|
103 |
-
def parse_generic_key_value_string(text: str) -> Dict[str, str]:
|
104 |
-
"""
|
105 |
-
Parse a string that has items of a dict with key-value pairs separated by '='.
|
106 |
-
Only splits on '=' signs, handling quoted strings properly.
|
107 |
-
"""
|
108 |
-
pattern = r"(\w+)=('.*?'|\".*?\"|[^'\"=]*?)(?=\s+\w+=|\s*$)"
|
109 |
-
result = {}
|
110 |
-
|
111 |
-
matches = re.findall(pattern, text)
|
112 |
-
for key, value in matches:
|
113 |
-
# Clean up the key
|
114 |
-
key = key.strip()
|
115 |
-
|
116 |
-
# Clean up the value - remove surrounding quotes if present
|
117 |
-
if (value.startswith("'") and value.endswith("'")) or (
|
118 |
-
value.startswith('"') and value.endswith('"')
|
119 |
-
):
|
120 |
-
value = value[1:-1]
|
121 |
-
|
122 |
-
# Store in result dictionary
|
123 |
-
result[key] = value
|
124 |
-
|
125 |
-
return result
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|