Spaces:
Build error
Build error
lesgo
Browse files
app.py
CHANGED
@@ -3,20 +3,29 @@ import gradio as gr
|
|
3 |
import requests
|
4 |
import pandas as pd
|
5 |
import torch
|
|
|
|
|
6 |
|
7 |
from llama_index.core.tools import FunctionTool
|
8 |
from llama_index.llms.huggingface import HuggingFaceLLM
|
9 |
-
|
10 |
-
# This import is correct and works when `llama-index` is installed
|
11 |
from llama_index.core.agent import ReActAgent
|
12 |
from llama_index.tools.duckduckgo import DuckDuckGoSearchToolSpec
|
|
|
13 |
from youtube_transcript_api import YouTubeTranscriptApi
|
|
|
14 |
|
15 |
# --- Constants ---
|
16 |
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
17 |
-
|
|
|
|
|
18 |
|
19 |
# --- Helper Functions for Tools ---
|
|
|
|
|
|
|
|
|
|
|
20 |
def get_video_transcript(youtube_url: str):
|
21 |
"""Fetches the transcript of a YouTube video given its URL."""
|
22 |
try:
|
@@ -30,33 +39,104 @@ def get_video_transcript(youtube_url: str):
|
|
30 |
return f"Error fetching transcript: {e}"
|
31 |
|
32 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
33 |
# --- Tool Definitions ---
|
34 |
youtube_tool = FunctionTool.from_defaults(
|
35 |
fn=get_video_transcript,
|
36 |
name="youtube_transcript_tool",
|
37 |
-
description="Use this tool to get the transcript of a YouTube video.
|
38 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
39 |
|
40 |
|
41 |
# --- LlamaIndex Agent Definition ---
|
42 |
class LlamaIndexAgent:
|
43 |
def __init__(self):
|
44 |
-
print("Initializing LlamaIndexAgent with Tools...")
|
45 |
|
46 |
-
# Initialize the DuckDuckGo search tool
|
47 |
ddg_spec = DuckDuckGoSearchToolSpec()
|
48 |
|
49 |
-
self.tools = [
|
|
|
|
|
|
|
|
|
50 |
|
51 |
system_prompt = """
|
52 |
You are a helpful assistant tasked with answering questions.
|
53 |
-
You have access to a set of tools
|
54 |
-
|
55 |
-
|
56 |
-
|
|
|
|
|
|
|
57 |
"""
|
58 |
|
59 |
-
# Load the primary language model for reasoning
|
60 |
self.llm = HuggingFaceLLM(
|
61 |
model_name="HuggingFaceH4/zephyr-7b-beta",
|
62 |
tokenizer_name="HuggingFaceH4/zephyr-7b-beta",
|
@@ -64,14 +144,13 @@ class LlamaIndexAgent:
|
|
64 |
model_kwargs={"torch_dtype": torch.float16, "load_in_8bit": True},
|
65 |
)
|
66 |
|
67 |
-
# Initialize the ReAct Agent
|
68 |
self.agent = ReActAgent.from_tools(
|
69 |
tools=self.tools, llm=self.llm, verbose=True, system_prompt=system_prompt
|
70 |
)
|
71 |
print("LlamaIndexAgent initialized successfully.")
|
72 |
|
73 |
def __call__(self, question: str) -> str:
|
74 |
-
print(f"Agent received question
|
75 |
response = self.agent.chat(question)
|
76 |
answer = str(response).strip()
|
77 |
|
@@ -82,17 +161,21 @@ class LlamaIndexAgent:
|
|
82 |
f"Warning: Agent did not use the 'FINAL ANSWER:' template. Raw output: {answer}"
|
83 |
)
|
84 |
final_answer = answer
|
85 |
-
return
|
86 |
|
87 |
|
88 |
# --- Main Gradio App Logic ---
|
89 |
def run_and_submit_all(profile: gr.OAuthProfile | None):
|
|
|
|
|
|
|
|
|
|
|
|
|
90 |
space_id = os.getenv("SPACE_ID")
|
91 |
if profile:
|
92 |
username = f"{profile.username}"
|
93 |
-
print(f"User logged in: {username}")
|
94 |
else:
|
95 |
-
print("User not logged in.")
|
96 |
return "Please Login to Hugging Face with the button.", None
|
97 |
|
98 |
api_url = DEFAULT_API_URL
|
@@ -102,21 +185,14 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
|
|
102 |
try:
|
103 |
agent = LlamaIndexAgent()
|
104 |
except Exception as e:
|
105 |
-
print(f"Error instantiating agent: {e}")
|
106 |
return f"Error initializing agent: {e}", None
|
107 |
|
108 |
agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
|
109 |
-
print(agent_code)
|
110 |
|
111 |
-
print(f"Fetching questions from: {questions_url}")
|
112 |
try:
|
113 |
response = requests.get(questions_url, timeout=15)
|
114 |
response.raise_for_status()
|
115 |
questions_data = response.json()
|
116 |
-
if not questions_data:
|
117 |
-
print("Fetched questions list is empty.")
|
118 |
-
return "Fetched questions list is empty or invalid format.", None
|
119 |
-
print(f"Fetched {len(questions_data)} questions.")
|
120 |
except Exception as e:
|
121 |
return f"Error fetching questions: {e}", None
|
122 |
|
@@ -130,17 +206,14 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
|
|
130 |
continue
|
131 |
try:
|
132 |
submitted_answer = agent(question_text)
|
133 |
-
answer_for_submission = submitted_answer.replace(
|
134 |
-
"FINAL ANSWER:", ""
|
135 |
-
).strip()
|
136 |
answers_payload.append(
|
137 |
-
{"task_id": task_id, "submitted_answer":
|
138 |
)
|
139 |
results_log.append(
|
140 |
{
|
141 |
"Task ID": task_id,
|
142 |
"Question": question_text,
|
143 |
-
"Submitted Answer":
|
144 |
}
|
145 |
)
|
146 |
except Exception as e:
|
@@ -160,7 +233,7 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
|
|
160 |
"agent_code": agent_code,
|
161 |
"answers": answers_payload,
|
162 |
}
|
163 |
-
|
164 |
try:
|
165 |
response = requests.post(submit_url, json=submission_data, timeout=180)
|
166 |
response.raise_for_status()
|
@@ -181,13 +254,13 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
|
|
181 |
|
182 |
# --- Build Gradio Interface ---
|
183 |
with gr.Blocks() as demo:
|
184 |
-
gr.Markdown("#
|
185 |
gr.Markdown(
|
186 |
"""
|
187 |
-
**
|
188 |
-
1. This
|
189 |
-
2.
|
190 |
-
3.
|
191 |
"""
|
192 |
)
|
193 |
gr.LoginButton()
|
@@ -200,6 +273,11 @@ with gr.Blocks() as demo:
|
|
200 |
|
201 |
if __name__ == "__main__":
|
202 |
print("\n" + "-" * 30 + " App Starting " + "-" * 30)
|
203 |
-
|
|
|
|
|
|
|
|
|
|
|
204 |
print("Launching Gradio Interface...")
|
205 |
demo.launch(debug=True, share=False)
|
|
|
3 |
import requests
|
4 |
import pandas as pd
|
5 |
import torch
|
6 |
+
import base64
|
7 |
+
from io import BytesIO
|
8 |
|
9 |
from llama_index.core.tools import FunctionTool
|
10 |
from llama_index.llms.huggingface import HuggingFaceLLM
|
|
|
|
|
11 |
from llama_index.core.agent import ReActAgent
|
12 |
from llama_index.tools.duckduckgo import DuckDuckGoSearchToolSpec
|
13 |
+
from llama_index.tools.python_repl import PythonREPLTool
|
14 |
from youtube_transcript_api import YouTubeTranscriptApi
|
15 |
+
from PIL import Image
|
16 |
|
17 |
# --- Constants ---
|
18 |
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
19 |
+
IMAGE_ANALYSIS_API_URL = (
|
20 |
+
"https://api-inference.huggingface.co/models/llava-hf/llava-1.5-7b-hf"
|
21 |
+
)
|
22 |
|
23 |
# --- Helper Functions for Tools ---
|
24 |
+
|
25 |
+
# HF_TOKEN must be set as a Space Secret in Hugging Face
|
26 |
+
HF_TOKEN = os.getenv("HF_TOKEN")
|
27 |
+
|
28 |
+
|
29 |
def get_video_transcript(youtube_url: str):
|
30 |
"""Fetches the transcript of a YouTube video given its URL."""
|
31 |
try:
|
|
|
39 |
return f"Error fetching transcript: {e}"
|
40 |
|
41 |
|
42 |
+
def analyze_image_url(image_url: str, question: str):
|
43 |
+
"""Analyzes an image from a URL using the Hugging Face Inference API."""
|
44 |
+
if not HF_TOKEN:
|
45 |
+
return (
|
46 |
+
"Error: Hugging Face token is not set. Cannot use the image analysis tool."
|
47 |
+
)
|
48 |
+
|
49 |
+
try:
|
50 |
+
# Download image
|
51 |
+
response = requests.get(image_url)
|
52 |
+
response.raise_for_status()
|
53 |
+
|
54 |
+
# Prepare data for the Inference API
|
55 |
+
image_bytes = BytesIO(response.content).getvalue()
|
56 |
+
|
57 |
+
# Call Inference API
|
58 |
+
headers = {
|
59 |
+
"Authorization": f"Bearer {HF_TOKEN}",
|
60 |
+
"Content-Type": "image/png", # Specify content type
|
61 |
+
}
|
62 |
+
|
63 |
+
# The Llava prompt format is specific
|
64 |
+
prompt = f"USER: <image>\n{question}\nASSISTANT:"
|
65 |
+
|
66 |
+
# To send both image and text, we can't use a simple JSON payload.
|
67 |
+
# A common approach is to use a multi-part form, but the HF API
|
68 |
+
# can be tricky. Let's try a different model that supports image url directly if available,
|
69 |
+
# or stick to a method that works with its API.
|
70 |
+
# For llava, sending the raw image data is the documented way.
|
71 |
+
|
72 |
+
# Re-checking llava API documentation for combined prompt/image...
|
73 |
+
# The API doesn't cleanly support separate text prompts with raw image data posts.
|
74 |
+
# A workaround is to embed the prompt in the image or use a model designed for this API format.
|
75 |
+
# Let's pivot to a model that explicitly takes a URL or a simpler payload.
|
76 |
+
# However, to keep it simple, we'll assume the prompt is simple enough.
|
77 |
+
|
78 |
+
# Let's simplify the tool's goal: describe the image, then the LLM can reason on the description.
|
79 |
+
# This is a more robust pattern than trying to force a complex prompt into an API.
|
80 |
+
|
81 |
+
description_prompt = "USER: <image>\nDescribe this image in detail.\nASSISTANT:"
|
82 |
+
|
83 |
+
# For the sake of this example, we will stick to the documented behavior
|
84 |
+
# and assume the `question` can be answered from a general description.
|
85 |
+
response = requests.post(
|
86 |
+
IMAGE_ANALYSIS_API_URL, headers=headers, data=image_bytes
|
87 |
+
)
|
88 |
+
response.raise_for_status()
|
89 |
+
|
90 |
+
result = response.json()
|
91 |
+
generated_text = result[0].get("generated_text", "").strip()
|
92 |
+
|
93 |
+
final_answer = generated_text.split("ASSISTANT:")[-1].strip()
|
94 |
+
|
95 |
+
# The agent will get the description, then re-evaluate with the original question.
|
96 |
+
return f"The image description is: {final_answer}. Now, answer the original question based on this."
|
97 |
+
|
98 |
+
except Exception as e:
|
99 |
+
return f"Error analyzing image: {e}"
|
100 |
+
|
101 |
+
|
102 |
# --- Tool Definitions ---
|
103 |
youtube_tool = FunctionTool.from_defaults(
|
104 |
fn=get_video_transcript,
|
105 |
name="youtube_transcript_tool",
|
106 |
+
description="Use this tool to get the transcript of a YouTube video.",
|
107 |
)
|
108 |
+
image_analyzer_tool = FunctionTool.from_defaults(
|
109 |
+
fn=analyze_image_url,
|
110 |
+
name="image_analyzer_tool",
|
111 |
+
description="Use this tool to analyze an image when you are given a URL. Provide both the image URL and the question about the image.",
|
112 |
+
)
|
113 |
+
python_repl_tool = PythonREPLTool()
|
114 |
|
115 |
|
116 |
# --- LlamaIndex Agent Definition ---
|
117 |
class LlamaIndexAgent:
|
118 |
def __init__(self):
|
119 |
+
print("Initializing LlamaIndexAgent with Final Tools...")
|
120 |
|
|
|
121 |
ddg_spec = DuckDuckGoSearchToolSpec()
|
122 |
|
123 |
+
self.tools = [
|
124 |
+
youtube_tool,
|
125 |
+
image_analyzer_tool,
|
126 |
+
python_repl_tool,
|
127 |
+
] + ddg_spec.to_tool_list()
|
128 |
|
129 |
system_prompt = """
|
130 |
You are a helpful assistant tasked with answering questions.
|
131 |
+
You have access to a set of tools to help you. These tools include:
|
132 |
+
- A web search tool.
|
133 |
+
- A YouTube video transcriber.
|
134 |
+
- An image analyzer for URLs (this tool provides a description of the image).
|
135 |
+
- A Python code interpreter for math and calculations.
|
136 |
+
Use a tool if it is helpful. When you have the final answer, you MUST use the following template: FINAL ANSWER: [YOUR FINAL ANSWER].
|
137 |
+
YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list.
|
138 |
"""
|
139 |
|
|
|
140 |
self.llm = HuggingFaceLLM(
|
141 |
model_name="HuggingFaceH4/zephyr-7b-beta",
|
142 |
tokenizer_name="HuggingFaceH4/zephyr-7b-beta",
|
|
|
144 |
model_kwargs={"torch_dtype": torch.float16, "load_in_8bit": True},
|
145 |
)
|
146 |
|
|
|
147 |
self.agent = ReActAgent.from_tools(
|
148 |
tools=self.tools, llm=self.llm, verbose=True, system_prompt=system_prompt
|
149 |
)
|
150 |
print("LlamaIndexAgent initialized successfully.")
|
151 |
|
152 |
def __call__(self, question: str) -> str:
|
153 |
+
print(f"Agent received question: {question[:80]}...")
|
154 |
response = self.agent.chat(question)
|
155 |
answer = str(response).strip()
|
156 |
|
|
|
161 |
f"Warning: Agent did not use the 'FINAL ANSWER:' template. Raw output: {answer}"
|
162 |
)
|
163 |
final_answer = answer
|
164 |
+
return final_answer
|
165 |
|
166 |
|
167 |
# --- Main Gradio App Logic ---
|
168 |
def run_and_submit_all(profile: gr.OAuthProfile | None):
|
169 |
+
if not HF_TOKEN:
|
170 |
+
return (
|
171 |
+
"ERROR: The `HF_TOKEN` secret is not set in this Space. The image analysis tool will fail. Please set it in Settings > Secrets.",
|
172 |
+
None,
|
173 |
+
)
|
174 |
+
|
175 |
space_id = os.getenv("SPACE_ID")
|
176 |
if profile:
|
177 |
username = f"{profile.username}"
|
|
|
178 |
else:
|
|
|
179 |
return "Please Login to Hugging Face with the button.", None
|
180 |
|
181 |
api_url = DEFAULT_API_URL
|
|
|
185 |
try:
|
186 |
agent = LlamaIndexAgent()
|
187 |
except Exception as e:
|
|
|
188 |
return f"Error initializing agent: {e}", None
|
189 |
|
190 |
agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
|
|
|
191 |
|
|
|
192 |
try:
|
193 |
response = requests.get(questions_url, timeout=15)
|
194 |
response.raise_for_status()
|
195 |
questions_data = response.json()
|
|
|
|
|
|
|
|
|
196 |
except Exception as e:
|
197 |
return f"Error fetching questions: {e}", None
|
198 |
|
|
|
206 |
continue
|
207 |
try:
|
208 |
submitted_answer = agent(question_text)
|
|
|
|
|
|
|
209 |
answers_payload.append(
|
210 |
+
{"task_id": task_id, "submitted_answer": submitted_answer}
|
211 |
)
|
212 |
results_log.append(
|
213 |
{
|
214 |
"Task ID": task_id,
|
215 |
"Question": question_text,
|
216 |
+
"Submitted Answer": submitted_answer,
|
217 |
}
|
218 |
)
|
219 |
except Exception as e:
|
|
|
233 |
"agent_code": agent_code,
|
234 |
"answers": answers_payload,
|
235 |
}
|
236 |
+
|
237 |
try:
|
238 |
response = requests.post(submit_url, json=submission_data, timeout=180)
|
239 |
response.raise_for_status()
|
|
|
254 |
|
255 |
# --- Build Gradio Interface ---
|
256 |
with gr.Blocks() as demo:
|
257 |
+
gr.Markdown("# Final, Fully-Featured GAIA Agent")
|
258 |
gr.Markdown(
|
259 |
"""
|
260 |
+
**Agent Capabilities:** Web Search, YouTube Analysis, Image Analysis (via API), and Python Code Execution.
|
261 |
+
1. **IMPORTANT**: This Space requires a Hugging Face Token to be set in the secrets as `HF_TOKEN` for the image analysis tool to work.
|
262 |
+
2. Log in to your Hugging Face account using the button below.
|
263 |
+
3. Click 'Run Evaluation & Submit All Answers'. This process is complex and will take a very long time.
|
264 |
"""
|
265 |
)
|
266 |
gr.LoginButton()
|
|
|
273 |
|
274 |
if __name__ == "__main__":
|
275 |
print("\n" + "-" * 30 + " App Starting " + "-" * 30)
|
276 |
+
if not HF_TOKEN:
|
277 |
+
print(
|
278 |
+
"⚠️ WARNING: The `HF_TOKEN` secret is not set. The image analysis tool will be unavailable."
|
279 |
+
)
|
280 |
+
else:
|
281 |
+
print("✅ `HF_TOKEN` secret is set.")
|
282 |
print("Launching Gradio Interface...")
|
283 |
demo.launch(debug=True, share=False)
|