Spaces:
Build error
Build error
new v
Browse files
README.md
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
---
|
2 |
-
title:
|
3 |
emoji: π΅π»ββοΈ
|
4 |
colorFrom: indigo
|
5 |
colorTo: indigo
|
|
|
1 |
---
|
2 |
+
title: HF Agents Course Final Assignment
|
3 |
emoji: π΅π»ββοΈ
|
4 |
colorFrom: indigo
|
5 |
colorTo: indigo
|
app.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
# app.py (
|
2 |
|
3 |
import os
|
4 |
import re
|
@@ -9,28 +9,48 @@ import logging
|
|
9 |
import numexpr
|
10 |
from typing import TypedDict, Annotated
|
11 |
|
12 |
-
# --- Langchain & HF Imports ---
|
13 |
from langchain_huggingface import HuggingFaceEndpoint
|
14 |
from langchain_community.tools import DuckDuckGoSearchRun
|
15 |
from langchain_core.prompts import PromptTemplate
|
16 |
from langchain_core.output_parsers import StrOutputParser
|
17 |
from langchain_core.tools import tool
|
18 |
from langgraph.graph import StateGraph, END
|
|
|
19 |
from langchain_community.document_loaders.youtube import YoutubeLoader
|
20 |
-
from transformers
|
21 |
|
22 |
# --- Constants ---
|
23 |
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
24 |
-
|
25 |
-
1
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
34 |
image_to_text_pipeline = None
|
35 |
|
36 |
|
@@ -52,7 +72,6 @@ def math_calculator(expression: str) -> str:
|
|
52 |
result = numexpr.evaluate(expression).item()
|
53 |
return str(result)
|
54 |
except Exception as e:
|
55 |
-
logging.error(f"Calculator error: {e}")
|
56 |
return f"Error: {e}"
|
57 |
|
58 |
|
@@ -63,50 +82,33 @@ def image_analyzer(image_url: str) -> str:
|
|
63 |
logging.info(f"--- Calling Image Analyzer Tool with URL: {image_url} ---")
|
64 |
try:
|
65 |
if image_to_text_pipeline is None:
|
66 |
-
logging.info(
|
67 |
-
"--- Initializing Image Analyzer pipeline (lazy loading)... ---"
|
68 |
-
)
|
69 |
image_to_text_pipeline = hf_pipeline(
|
70 |
"image-to-text", model="Salesforce/blip-image-captioning-base"
|
71 |
)
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
pipeline_output
|
76 |
-
and isinstance(pipeline_output, list)
|
77 |
-
and len(pipeline_output) > 0
|
78 |
-
):
|
79 |
-
description = pipeline_output[0].get(
|
80 |
-
"generated_text", "Error: Could not generate text."
|
81 |
-
)
|
82 |
-
else:
|
83 |
-
description = "Error: Could not analyze image."
|
84 |
return description
|
85 |
except Exception as e:
|
86 |
-
logging.error(f"Error analyzing image: {e}")
|
87 |
return f"Error analyzing image: {e}"
|
88 |
|
89 |
|
90 |
@tool
|
91 |
def youtube_transcript_reader(youtube_url: str) -> str:
|
92 |
"""Reads the transcript of a YouTube video from its URL."""
|
93 |
-
logging.info(
|
94 |
-
f"--- Calling YouTube Transcript Reader Tool with URL: {youtube_url} ---"
|
95 |
-
)
|
96 |
try:
|
97 |
loader = YoutubeLoader.from_youtube_url(youtube_url, add_video_info=False)
|
98 |
-
|
99 |
-
transcript = " ".join([doc.page_content for doc in docs])
|
100 |
-
return transcript[:4000]
|
101 |
except Exception as e:
|
102 |
-
|
103 |
-
return f"Error: {e}"
|
104 |
|
105 |
|
|
|
106 |
class AgentState(TypedDict):
|
107 |
question: str
|
108 |
messages: Annotated[list, lambda x, y: x + y]
|
109 |
-
sender: str
|
110 |
|
111 |
|
112 |
class GaiaAgent:
|
@@ -119,54 +121,27 @@ class GaiaAgent:
|
|
119 |
youtube_transcript_reader,
|
120 |
]
|
121 |
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
llm = HuggingFaceEndpoint(
|
126 |
-
repo_id="
|
127 |
temperature=0.1,
|
128 |
max_new_tokens=1024,
|
129 |
-
|
130 |
)
|
131 |
|
132 |
-
|
133 |
-
|
134 |
-
# The rest of the class remains the same
|
135 |
-
prompt = PromptTemplate(
|
136 |
-
template=SYSTEM_PROMPT
|
137 |
-
+ "\nHere is the current conversation:\n{messages}\n\nQuestion: {question}",
|
138 |
-
input_variables=["messages", "question"],
|
139 |
)
|
140 |
self.agent = prompt | llm | StrOutputParser()
|
141 |
self.graph = self._create_graph()
|
142 |
logging.info("GaiaAgent initialized successfully.")
|
143 |
|
144 |
-
def _create_graph(self):
|
145 |
-
graph = StateGraph(AgentState)
|
146 |
-
graph.add_node("agent", self._call_agent)
|
147 |
-
graph.add_node("tools", self._call_tools)
|
148 |
-
graph.add_conditional_edges(
|
149 |
-
"agent", self._decide_action, {END: END, "tools": "tools"}
|
150 |
-
)
|
151 |
-
graph.add_edge("tools", "agent")
|
152 |
-
graph.set_entry_point("agent")
|
153 |
-
return graph.compile()
|
154 |
-
|
155 |
def _call_agent(self, state: AgentState):
|
156 |
logging.info("--- Calling Agent ---")
|
157 |
-
|
158 |
-
|
159 |
-
{"messages": message_history, "question": state["question"]}
|
160 |
-
)
|
161 |
-
return {"messages": [response], "sender": "agent"}
|
162 |
-
|
163 |
-
def _decide_action(self, state: AgentState):
|
164 |
-
logging.info("--- Deciding Action ---")
|
165 |
-
response = state["messages"][-1]
|
166 |
-
if "FINAL ANSWER:" in response:
|
167 |
-
return END
|
168 |
-
else:
|
169 |
-
return "tools"
|
170 |
|
171 |
def _call_tools(self, state: AgentState):
|
172 |
logging.info("--- Calling Tools ---")
|
@@ -176,57 +151,66 @@ class GaiaAgent:
|
|
176 |
logging.warning("No valid tool call found in agent response.")
|
177 |
return {
|
178 |
"messages": [
|
179 |
-
|
180 |
-
]
|
181 |
-
"sender": "tools",
|
182 |
}
|
|
|
183 |
tool_name = tool_call_match.group(1).strip()
|
184 |
-
tool_input_str = tool_call_match.group(2).strip()
|
185 |
-
|
186 |
-
tool_input_str.startswith("'") and tool_input_str.endswith("'")
|
187 |
-
):
|
188 |
-
tool_input = tool_input_str[1:-1]
|
189 |
-
else:
|
190 |
-
tool_input = tool_input_str
|
191 |
tool_to_call = next((t for t in self.tools if t.name == tool_name), None)
|
192 |
if tool_to_call:
|
193 |
try:
|
194 |
-
result = tool_to_call.run(
|
195 |
-
return {"messages": [str(result)]
|
196 |
except Exception as e:
|
197 |
-
|
198 |
-
return {
|
199 |
-
"messages": [f"Error executing tool {tool_name}: {e}"],
|
200 |
-
"sender": "tools",
|
201 |
-
}
|
202 |
else:
|
203 |
-
|
204 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
205 |
|
206 |
def __call__(self, question: str) -> str:
|
207 |
logging.info(f"Agent received question: {question[:100]}...")
|
208 |
try:
|
209 |
-
initial_state = {"question": question, "messages": []
|
|
|
210 |
final_state = self.graph.invoke(initial_state, {"recursion_limit": 15})
|
211 |
final_response = final_state["messages"][-1]
|
212 |
match = re.search(
|
213 |
r"FINAL ANSWER:\s*(.*)", final_response, re.IGNORECASE | re.DOTALL
|
214 |
)
|
215 |
-
|
216 |
-
|
217 |
-
|
218 |
-
|
219 |
-
|
220 |
-
|
221 |
-
"Agent could not find a final answer. Returning the last message."
|
222 |
-
)
|
223 |
-
return final_response
|
224 |
except Exception as e:
|
225 |
logging.error(f"Error during agent invocation: {e}", exc_info=True)
|
226 |
-
return f"Error
|
227 |
|
228 |
|
|
|
229 |
def run_and_submit_all(profile: gr.OAuthProfile | None):
|
|
|
|
|
230 |
if not profile:
|
231 |
return "Please Login to Hugging Face.", None
|
232 |
username = profile.username
|
@@ -249,13 +233,9 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
|
|
249 |
response = requests.get(f"{DEFAULT_API_URL}/questions", timeout=20)
|
250 |
response.raise_for_status()
|
251 |
questions_data = response.json()
|
252 |
-
if not questions_data:
|
253 |
-
return "Fetched questions list is empty.", None
|
254 |
-
logging.info(f"Successfully fetched {len(questions_data)} questions.")
|
255 |
except Exception as e:
|
256 |
return f"Error fetching questions: {e}", None
|
257 |
|
258 |
-
# The loop will now process the full 'questions_data' list
|
259 |
logging.info(
|
260 |
f"FULL EVALUATION MODE: Processing all {len(questions_data)} questions..."
|
261 |
)
|
@@ -304,34 +284,23 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
|
|
304 |
)
|
305 |
response.raise_for_status()
|
306 |
result_data = response.json()
|
307 |
-
status = (
|
308 |
-
f"Submission Successful!\n"
|
309 |
-
f"User: {result_data.get('username')}\n"
|
310 |
-
f"Overall Score: {result_data.get('score', 'N/A')}% "
|
311 |
-
f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
|
312 |
-
f"Message: {result_data.get('message', 'No message received.')}"
|
313 |
-
)
|
314 |
return status, pd.DataFrame(results_log)
|
315 |
except Exception as e:
|
316 |
return f"Submission Failed: {e}", pd.DataFrame(results_log)
|
317 |
|
318 |
|
|
|
319 |
with gr.Blocks() as demo:
|
320 |
gr.Markdown("# GAIA Agent Evaluation Runner")
|
321 |
-
gr.Markdown(
|
322 |
-
"This agent uses LangGraph and Mistral-7B to answer questions from the GAIA benchmark."
|
323 |
-
)
|
324 |
gr.LoginButton()
|
325 |
-
run_button = gr.Button("Run Evaluation & Submit All Answers")
|
326 |
-
status_output = gr.Textbox(
|
327 |
-
|
328 |
-
)
|
329 |
-
results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
|
330 |
run_button.click(fn=run_and_submit_all, outputs=[status_output, results_table])
|
331 |
|
332 |
if __name__ == "__main__":
|
333 |
logging.basicConfig(
|
334 |
level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
|
335 |
)
|
336 |
-
logging.info("App Starting (Final Version)...")
|
337 |
demo.launch()
|
|
|
1 |
+
# app.py (Refactored for Improved Performance)
|
2 |
|
3 |
import os
|
4 |
import re
|
|
|
9 |
import numexpr
|
10 |
from typing import TypedDict, Annotated
|
11 |
|
12 |
+
# --- Langchain & HF Imports (Modern and Correct) ---
|
13 |
from langchain_huggingface import HuggingFaceEndpoint
|
14 |
from langchain_community.tools import DuckDuckGoSearchRun
|
15 |
from langchain_core.prompts import PromptTemplate
|
16 |
from langchain_core.output_parsers import StrOutputParser
|
17 |
from langchain_core.tools import tool
|
18 |
from langgraph.graph import StateGraph, END
|
19 |
+
from langgraph.errors import GraphRecursionError
|
20 |
from langchain_community.document_loaders.youtube import YoutubeLoader
|
21 |
+
from transformers import pipeline as hf_pipeline # Renamed to avoid conflict
|
22 |
|
23 |
# --- Constants ---
|
24 |
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
25 |
+
|
26 |
+
### --- REFACTOR 1: A much stricter and more detailed System Prompt --- ###
|
27 |
+
SYSTEM_PROMPT = """You are GAIA, a powerful expert assistant. You are designed to answer questions accurately and efficiently by using a set of available tools.
|
28 |
+
|
29 |
+
**Your STRICT Process:**
|
30 |
+
|
31 |
+
1. **Analyze the User's Question:** Carefully determine the user's intent and what information is needed.
|
32 |
+
|
33 |
+
2. **Tool Selection and Execution:**
|
34 |
+
* **Is a tool necessary?**
|
35 |
+
* For questions about public information, facts, current events, statistics, people, companies, etc., you **MUST** use the `web_search` tool. Do not rely on your internal knowledge.
|
36 |
+
* If the question includes a URL pointing to an image (`.png`, `.jpg`, etc.), you **MUST** use the `image_analyzer` tool.
|
37 |
+
* If the question includes a YouTube URL, you **MUST** use the `youtube_transcript_reader` tool.
|
38 |
+
* If the question requires a calculation, you **MUST** use the `math_calculator` tool.
|
39 |
+
* If the question is a simple logic puzzle, riddle, or language task you can solve directly, you do not need a tool.
|
40 |
+
* **Tool Call Format:** To use a tool, you **MUST** respond with **only** the tool call on a single line. Do not add any other text or explanation.
|
41 |
+
* Example: `web_search("How many albums did Mercedes Sosa release after 2000?")`
|
42 |
+
|
43 |
+
3. **Analyze Tool Output:**
|
44 |
+
* Review the information returned by the tool.
|
45 |
+
* If you have enough information to answer the user's question, proceed to the final step.
|
46 |
+
* If you need more information, you may use another tool.
|
47 |
+
|
48 |
+
4. **Final Answer:**
|
49 |
+
* Once you have a definitive answer, you **MUST** format it as follows, and nothing else:
|
50 |
+
`FINAL ANSWER: [Your concise and accurate answer]`
|
51 |
+
"""
|
52 |
+
|
53 |
+
# --- Tool Definitions (Unchanged) ---
|
54 |
image_to_text_pipeline = None
|
55 |
|
56 |
|
|
|
72 |
result = numexpr.evaluate(expression).item()
|
73 |
return str(result)
|
74 |
except Exception as e:
|
|
|
75 |
return f"Error: {e}"
|
76 |
|
77 |
|
|
|
82 |
logging.info(f"--- Calling Image Analyzer Tool with URL: {image_url} ---")
|
83 |
try:
|
84 |
if image_to_text_pipeline is None:
|
85 |
+
logging.info("--- Initializing Image Analyzer pipeline... ---")
|
|
|
|
|
86 |
image_to_text_pipeline = hf_pipeline(
|
87 |
"image-to-text", model="Salesforce/blip-image-captioning-base"
|
88 |
)
|
89 |
+
description = image_to_text_pipeline(image_url)[0].get(
|
90 |
+
"generated_text", "Error"
|
91 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
92 |
return description
|
93 |
except Exception as e:
|
|
|
94 |
return f"Error analyzing image: {e}"
|
95 |
|
96 |
|
97 |
@tool
|
98 |
def youtube_transcript_reader(youtube_url: str) -> str:
|
99 |
"""Reads the transcript of a YouTube video from its URL."""
|
100 |
+
logging.info(f"--- Calling YouTube Transcript Reader with URL: {youtube_url} ---")
|
|
|
|
|
101 |
try:
|
102 |
loader = YoutubeLoader.from_youtube_url(youtube_url, add_video_info=False)
|
103 |
+
return " ".join([doc.page_content for doc in loader.load()])[:4000]
|
|
|
|
|
104 |
except Exception as e:
|
105 |
+
return f"Error reading YouTube transcript: {e}"
|
|
|
106 |
|
107 |
|
108 |
+
# --- Agent State & Graph (Unchanged) ---
|
109 |
class AgentState(TypedDict):
|
110 |
question: str
|
111 |
messages: Annotated[list, lambda x, y: x + y]
|
|
|
112 |
|
113 |
|
114 |
class GaiaAgent:
|
|
|
121 |
youtube_transcript_reader,
|
122 |
]
|
123 |
|
124 |
+
### --- REFACTOR 2: Switched to the more powerful Mistral-7B model --- ###
|
125 |
+
# IMPORTANT: Make sure you have accepted the terms of use for this model on the Hugging Face Hub!
|
126 |
+
logging.info("Initializing LLM with Mistral-7B...")
|
127 |
llm = HuggingFaceEndpoint(
|
128 |
+
repo_id="mistralai/Mistral-7B-Instruct-v0.2",
|
129 |
temperature=0.1,
|
130 |
max_new_tokens=1024,
|
131 |
+
huggingface_api_token=os.getenv("HUGGINGFACEHUB_API_TOKEN"),
|
132 |
)
|
133 |
|
134 |
+
prompt = PromptTemplate.from_template(
|
135 |
+
SYSTEM_PROMPT + "\n{messages}\n\nQuestion: {question}"
|
|
|
|
|
|
|
|
|
|
|
136 |
)
|
137 |
self.agent = prompt | llm | StrOutputParser()
|
138 |
self.graph = self._create_graph()
|
139 |
logging.info("GaiaAgent initialized successfully.")
|
140 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
141 |
def _call_agent(self, state: AgentState):
|
142 |
logging.info("--- Calling Agent ---")
|
143 |
+
response = self.agent.invoke(state)
|
144 |
+
return {"messages": [response]}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
145 |
|
146 |
def _call_tools(self, state: AgentState):
|
147 |
logging.info("--- Calling Tools ---")
|
|
|
151 |
logging.warning("No valid tool call found in agent response.")
|
152 |
return {
|
153 |
"messages": [
|
154 |
+
"No valid tool call found. Please try again or provide a FINAL ANSWER."
|
155 |
+
]
|
|
|
156 |
}
|
157 |
+
|
158 |
tool_name = tool_call_match.group(1).strip()
|
159 |
+
tool_input_str = tool_call_match.group(2).strip().strip("'\"")
|
160 |
+
|
|
|
|
|
|
|
|
|
|
|
161 |
tool_to_call = next((t for t in self.tools if t.name == tool_name), None)
|
162 |
if tool_to_call:
|
163 |
try:
|
164 |
+
result = tool_to_call.run(tool_input_str)
|
165 |
+
return {"messages": [str(result)]}
|
166 |
except Exception as e:
|
167 |
+
return {"messages": [f"Error executing tool {tool_name}: {e}"]}
|
|
|
|
|
|
|
|
|
168 |
else:
|
169 |
+
return {
|
170 |
+
"messages": [
|
171 |
+
f"Tool '{tool_name}' not found. Available tools: web_search, math_calculator, image_analyzer, youtube_transcript_reader."
|
172 |
+
]
|
173 |
+
}
|
174 |
+
|
175 |
+
def _decide_action(self, state: AgentState):
|
176 |
+
return "tools" if "FINAL ANSWER:" not in state["messages"][-1] else END
|
177 |
+
|
178 |
+
def _create_graph(self):
|
179 |
+
graph = StateGraph(AgentState)
|
180 |
+
graph.add_node("agent", self._call_agent)
|
181 |
+
graph.add_node("tools", self._call_tools)
|
182 |
+
graph.add_conditional_edges(
|
183 |
+
"agent", self._decide_action, {"tools": "tools", END: END}
|
184 |
+
)
|
185 |
+
graph.add_edge("tools", "agent")
|
186 |
+
graph.set_entry_point("agent")
|
187 |
+
return graph.compile()
|
188 |
|
189 |
def __call__(self, question: str) -> str:
|
190 |
logging.info(f"Agent received question: {question[:100]}...")
|
191 |
try:
|
192 |
+
initial_state = {"question": question, "messages": []}
|
193 |
+
### --- REFACTOR 3: Gracefully handle recursion errors --- ###
|
194 |
final_state = self.graph.invoke(initial_state, {"recursion_limit": 15})
|
195 |
final_response = final_state["messages"][-1]
|
196 |
match = re.search(
|
197 |
r"FINAL ANSWER:\s*(.*)", final_response, re.IGNORECASE | re.DOTALL
|
198 |
)
|
199 |
+
return (
|
200 |
+
match.group(1).strip() if match else "Could not determine final answer."
|
201 |
+
)
|
202 |
+
except GraphRecursionError:
|
203 |
+
logging.error("Agent got stuck in a loop.")
|
204 |
+
return "Agent Error: Stuck in a loop."
|
|
|
|
|
|
|
205 |
except Exception as e:
|
206 |
logging.error(f"Error during agent invocation: {e}", exc_info=True)
|
207 |
+
return f"Error: {e}"
|
208 |
|
209 |
|
210 |
+
# --- Main Application Logic (Unchanged) ---
|
211 |
def run_and_submit_all(profile: gr.OAuthProfile | None):
|
212 |
+
# This function is restored to run all questions.
|
213 |
+
# ... (The rest of this function and the Gradio UI code is the same as the last working version) ...
|
214 |
if not profile:
|
215 |
return "Please Login to Hugging Face.", None
|
216 |
username = profile.username
|
|
|
233 |
response = requests.get(f"{DEFAULT_API_URL}/questions", timeout=20)
|
234 |
response.raise_for_status()
|
235 |
questions_data = response.json()
|
|
|
|
|
|
|
236 |
except Exception as e:
|
237 |
return f"Error fetching questions: {e}", None
|
238 |
|
|
|
239 |
logging.info(
|
240 |
f"FULL EVALUATION MODE: Processing all {len(questions_data)} questions..."
|
241 |
)
|
|
|
284 |
)
|
285 |
response.raise_for_status()
|
286 |
result_data = response.json()
|
287 |
+
status = f"Submission Successful!\nScore: {result_data.get('score', 'N/A')}%"
|
|
|
|
|
|
|
|
|
|
|
|
|
288 |
return status, pd.DataFrame(results_log)
|
289 |
except Exception as e:
|
290 |
return f"Submission Failed: {e}", pd.DataFrame(results_log)
|
291 |
|
292 |
|
293 |
+
# --- Gradio Interface (Unchanged) ---
|
294 |
with gr.Blocks() as demo:
|
295 |
gr.Markdown("# GAIA Agent Evaluation Runner")
|
|
|
|
|
|
|
296 |
gr.LoginButton()
|
297 |
+
run_button = gr.Button("Run Full Evaluation & Submit All Answers")
|
298 |
+
status_output = gr.Textbox(label="Run Status / Result", lines=4)
|
299 |
+
results_table = gr.DataFrame(label="Questions and Answers", wrap=True)
|
|
|
|
|
300 |
run_button.click(fn=run_and_submit_all, outputs=[status_output, results_table])
|
301 |
|
302 |
if __name__ == "__main__":
|
303 |
logging.basicConfig(
|
304 |
level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
|
305 |
)
|
|
|
306 |
demo.launch()
|