Spaces:
Running
Running
github-actions[bot]
commited on
Commit
Β·
178738b
1
Parent(s):
89845e5
Sync with https://github.com/mozilla-ai/surf-spot-finder
Browse files- components/agent_status.py +47 -0
- constants.py +5 -5
- services/agent.py +63 -19
components/agent_status.py
ADDED
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from any_agent import AnyAgent
|
2 |
+
from opentelemetry.sdk.trace.export import SimpleSpanProcessor
|
3 |
+
from collections.abc import Sequence
|
4 |
+
from typing import TYPE_CHECKING, Callable
|
5 |
+
|
6 |
+
from opentelemetry.sdk.trace.export import (
|
7 |
+
SpanExporter,
|
8 |
+
SpanExportResult,
|
9 |
+
)
|
10 |
+
|
11 |
+
from any_agent import AgentFramework
|
12 |
+
|
13 |
+
from any_agent.tracing import TracingProcessor
|
14 |
+
from any_agent.tracing.trace import AgentSpan
|
15 |
+
|
16 |
+
if TYPE_CHECKING:
|
17 |
+
from opentelemetry.sdk.trace import ReadableSpan
|
18 |
+
|
19 |
+
|
20 |
+
class StreamlitExporter(SpanExporter):
|
21 |
+
"""Build an `AgentTrace` and export to the different outputs."""
|
22 |
+
|
23 |
+
def __init__( # noqa: D107
|
24 |
+
self, agent_framework: AgentFramework, callback: Callable
|
25 |
+
):
|
26 |
+
self.agent_framework = agent_framework
|
27 |
+
self.processor: TracingProcessor | None = TracingProcessor.create(
|
28 |
+
agent_framework
|
29 |
+
)
|
30 |
+
self.callback = callback
|
31 |
+
|
32 |
+
def export(self, spans: Sequence["ReadableSpan"]) -> SpanExportResult: # noqa: D102
|
33 |
+
if not self.processor:
|
34 |
+
return SpanExportResult.SUCCESS
|
35 |
+
|
36 |
+
for readable_span in spans:
|
37 |
+
# Check if this span belongs to our run
|
38 |
+
span = AgentSpan.from_readable_span(readable_span)
|
39 |
+
self.callback(span)
|
40 |
+
|
41 |
+
return SpanExportResult.SUCCESS
|
42 |
+
|
43 |
+
|
44 |
+
def export_logs(agent: AnyAgent, callback: Callable) -> None:
|
45 |
+
exporter = StreamlitExporter(agent.framework, callback)
|
46 |
+
span_processor = SimpleSpanProcessor(exporter)
|
47 |
+
agent._tracer_provider.add_span_processor(span_processor)
|
constants.py
CHANGED
@@ -31,7 +31,11 @@ DEFAULT_EVALUATION_CASE = EvaluationCase(
|
|
31 |
"points": 1,
|
32 |
},
|
33 |
{
|
34 |
-
"criteria": "Check if the agent gathered wind
|
|
|
|
|
|
|
|
|
35 |
"points": 1,
|
36 |
},
|
37 |
{
|
@@ -50,10 +54,6 @@ DEFAULT_EVALUATION_CASE = EvaluationCase(
|
|
50 |
"criteria": "Check if the final answer includes information about some alternative surf spots if the user is not satisfied with the chosen one",
|
51 |
"points": 1,
|
52 |
},
|
53 |
-
{
|
54 |
-
"criteria": "Check that the agent completed in fewer than 10 calls",
|
55 |
-
"points": 1,
|
56 |
-
},
|
57 |
],
|
58 |
)
|
59 |
|
|
|
31 |
"points": 1,
|
32 |
},
|
33 |
{
|
34 |
+
"criteria": "Check if the agent gathered wind forecasts for each surf spot being evaluated.",
|
35 |
+
"points": 1,
|
36 |
+
},
|
37 |
+
{
|
38 |
+
"criteria": "Check if the agent gathered wave forecasts for each surf spot being evaluated.",
|
39 |
"points": 1,
|
40 |
},
|
41 |
{
|
|
|
54 |
"criteria": "Check if the final answer includes information about some alternative surf spots if the user is not satisfied with the chosen one",
|
55 |
"points": 1,
|
56 |
},
|
|
|
|
|
|
|
|
|
57 |
],
|
58 |
)
|
59 |
|
services/agent.py
CHANGED
@@ -1,11 +1,12 @@
|
|
1 |
import json
|
2 |
from components.inputs import UserInputs
|
3 |
from constants import DEFAULT_TOOLS
|
|
|
4 |
import streamlit as st
|
5 |
import time
|
6 |
from surf_spot_finder.config import Config
|
7 |
from any_agent import AgentConfig, AnyAgent, TracingConfig, AgentFramework
|
8 |
-
from any_agent.tracing.trace import AgentTrace, TotalTokenUseAndCost
|
9 |
from any_agent.tracing.otel_types import StatusCode
|
10 |
from any_agent.evaluation import evaluate, TraceEvaluationResult
|
11 |
|
@@ -102,18 +103,6 @@ async def configure_agent(user_inputs: UserInputs) -> tuple[AnyAgent, Config]:
|
|
102 |
|
103 |
|
104 |
async def display_output(agent_trace: AgentTrace, execution_time: float):
|
105 |
-
cost: TotalTokenUseAndCost = agent_trace.get_total_cost()
|
106 |
-
with st.expander("### π Results", expanded=True):
|
107 |
-
time_col, cost_col, tokens_col = st.columns(3)
|
108 |
-
with time_col:
|
109 |
-
st.info(f"β±οΈ Execution Time: {execution_time:.2f} seconds")
|
110 |
-
with cost_col:
|
111 |
-
st.info(f"π° Estimated Cost: ${cost.total_cost:.6f}")
|
112 |
-
with tokens_col:
|
113 |
-
st.info(f"π¦ Total Tokens: {cost.total_tokens:,}")
|
114 |
-
st.markdown("#### Final Output")
|
115 |
-
st.info(agent_trace.final_output)
|
116 |
-
|
117 |
# Display the agent trace in a more organized way
|
118 |
with st.expander("### π§© Agent Trace"):
|
119 |
for span in agent_trace.spans:
|
@@ -150,6 +139,18 @@ async def display_output(agent_trace: AgentTrace, execution_time: float):
|
|
150 |
unsafe_allow_html=True,
|
151 |
)
|
152 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
153 |
|
154 |
async def run_agent(agent, config) -> tuple[AgentTrace, float]:
|
155 |
st.markdown("#### π Running Surf Spot Finder with query")
|
@@ -178,11 +179,54 @@ async def run_agent(agent, config) -> tuple[AgentTrace, float]:
|
|
178 |
|
179 |
kwargs["run_config"] = RunConfig(max_llm_calls=20)
|
180 |
|
181 |
-
|
182 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
183 |
agent_trace: AgentTrace = await agent.run_async(query, **kwargs)
|
184 |
-
|
|
|
185 |
|
186 |
-
|
187 |
-
|
188 |
-
|
|
|
1 |
import json
|
2 |
from components.inputs import UserInputs
|
3 |
from constants import DEFAULT_TOOLS
|
4 |
+
from components.agent_status import export_logs
|
5 |
import streamlit as st
|
6 |
import time
|
7 |
from surf_spot_finder.config import Config
|
8 |
from any_agent import AgentConfig, AnyAgent, TracingConfig, AgentFramework
|
9 |
+
from any_agent.tracing.trace import AgentTrace, TotalTokenUseAndCost, AgentSpan
|
10 |
from any_agent.tracing.otel_types import StatusCode
|
11 |
from any_agent.evaluation import evaluate, TraceEvaluationResult
|
12 |
|
|
|
103 |
|
104 |
|
105 |
async def display_output(agent_trace: AgentTrace, execution_time: float):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
106 |
# Display the agent trace in a more organized way
|
107 |
with st.expander("### π§© Agent Trace"):
|
108 |
for span in agent_trace.spans:
|
|
|
139 |
unsafe_allow_html=True,
|
140 |
)
|
141 |
|
142 |
+
cost: TotalTokenUseAndCost = agent_trace.get_total_cost()
|
143 |
+
with st.expander("### π Results", expanded=True):
|
144 |
+
time_col, cost_col, tokens_col = st.columns(3)
|
145 |
+
with time_col:
|
146 |
+
st.info(f"β±οΈ Execution Time: {execution_time:.2f} seconds")
|
147 |
+
with cost_col:
|
148 |
+
st.info(f"π° Estimated Cost: ${cost.total_cost:.6f}")
|
149 |
+
with tokens_col:
|
150 |
+
st.info(f"π¦ Total Tokens: {cost.total_tokens:,}")
|
151 |
+
st.markdown("#### Final Output")
|
152 |
+
st.info(agent_trace.final_output)
|
153 |
+
|
154 |
|
155 |
async def run_agent(agent, config) -> tuple[AgentTrace, float]:
|
156 |
st.markdown("#### π Running Surf Spot Finder with query")
|
|
|
179 |
|
180 |
kwargs["run_config"] = RunConfig(max_llm_calls=20)
|
181 |
|
182 |
+
with st.status("Agent is running...", expanded=False, state="running") as status:
|
183 |
+
|
184 |
+
def update_span(span: AgentSpan):
|
185 |
+
# Process input value
|
186 |
+
input_value = span.attributes.get("input.value", "")
|
187 |
+
if input_value:
|
188 |
+
try:
|
189 |
+
parsed_input = json.loads(input_value)
|
190 |
+
if isinstance(parsed_input, list) and len(parsed_input) > 0:
|
191 |
+
input_value = str(parsed_input[-1])
|
192 |
+
except Exception:
|
193 |
+
pass
|
194 |
+
|
195 |
+
# Process output value
|
196 |
+
output_value = span.attributes.get("output.value", "")
|
197 |
+
if output_value:
|
198 |
+
try:
|
199 |
+
parsed_output = json.loads(output_value)
|
200 |
+
if isinstance(parsed_output, list) and len(parsed_output) > 0:
|
201 |
+
output_value = str(parsed_output[-1])
|
202 |
+
except Exception:
|
203 |
+
pass
|
204 |
+
|
205 |
+
# Truncate long values
|
206 |
+
max_length = 800
|
207 |
+
if len(input_value) > max_length:
|
208 |
+
input_value = f"[Truncated]...{input_value[-max_length:]}"
|
209 |
+
if len(output_value) > max_length:
|
210 |
+
output_value = f"[Truncated]...{output_value[-max_length:]}"
|
211 |
+
|
212 |
+
# Create a cleaner message format
|
213 |
+
if input_value or output_value:
|
214 |
+
message = f"Step: {span.name}\n"
|
215 |
+
if input_value:
|
216 |
+
message += f"Input: {input_value}\n"
|
217 |
+
if output_value:
|
218 |
+
message += f"Output: {output_value}"
|
219 |
+
else:
|
220 |
+
message = f"Step: {span.name}\n{span}"
|
221 |
+
|
222 |
+
status.update(label=message, expanded=False, state="running")
|
223 |
+
|
224 |
+
export_logs(agent, update_span)
|
225 |
+
start_time = time.time()
|
226 |
agent_trace: AgentTrace = await agent.run_async(query, **kwargs)
|
227 |
+
status.update(label="Finished!", expanded=False, state="complete")
|
228 |
+
end_time = time.time()
|
229 |
|
230 |
+
agent.exit()
|
231 |
+
execution_time = end_time - start_time
|
232 |
+
return agent_trace, execution_time
|