Spaces:
Build error
Build error
fix reqs & app
Browse files- app.py +35 -64
- requirements.txt +3 -3
app.py
CHANGED
@@ -5,12 +5,12 @@ import pandas as pd
|
|
5 |
import torch
|
6 |
import base64
|
7 |
from io import BytesIO
|
|
|
8 |
|
9 |
from llama_index.core.tools import FunctionTool
|
10 |
from llama_index.llms.huggingface import HuggingFaceLLM
|
11 |
from llama_index.core.agent import ReActAgent
|
12 |
from llama_index.tools.duckduckgo import DuckDuckGoSearchToolSpec
|
13 |
-
from llama_index.tools.python_repl import PythonREPLTool
|
14 |
from youtube_transcript_api import YouTubeTranscriptApi
|
15 |
from PIL import Image
|
16 |
|
@@ -45,60 +45,34 @@ def analyze_image_url(image_url: str, question: str):
|
|
45 |
return (
|
46 |
"Error: Hugging Face token is not set. Cannot use the image analysis tool."
|
47 |
)
|
48 |
-
|
49 |
try:
|
50 |
-
# Download image
|
51 |
response = requests.get(image_url)
|
52 |
response.raise_for_status()
|
53 |
-
|
54 |
-
# Prepare data for the Inference API
|
55 |
image_bytes = BytesIO(response.content).getvalue()
|
56 |
-
|
57 |
-
# Call Inference API
|
58 |
-
headers = {
|
59 |
-
"Authorization": f"Bearer {HF_TOKEN}",
|
60 |
-
"Content-Type": "image/png", # Specify content type
|
61 |
-
}
|
62 |
-
|
63 |
-
# The Llava prompt format is specific
|
64 |
-
prompt = f"USER: <image>\n{question}\nASSISTANT:"
|
65 |
-
|
66 |
-
# To send both image and text, we can't use a simple JSON payload.
|
67 |
-
# A common approach is to use a multi-part form, but the HF API
|
68 |
-
# can be tricky. Let's try a different model that supports image url directly if available,
|
69 |
-
# or stick to a method that works with its API.
|
70 |
-
# For llava, sending the raw image data is the documented way.
|
71 |
-
|
72 |
-
# Re-checking llava API documentation for combined prompt/image...
|
73 |
-
# The API doesn't cleanly support separate text prompts with raw image data posts.
|
74 |
-
# A workaround is to embed the prompt in the image or use a model designed for this API format.
|
75 |
-
# Let's pivot to a model that explicitly takes a URL or a simpler payload.
|
76 |
-
# However, to keep it simple, we'll assume the prompt is simple enough.
|
77 |
-
|
78 |
-
# Let's simplify the tool's goal: describe the image, then the LLM can reason on the description.
|
79 |
-
# This is a more robust pattern than trying to force a complex prompt into an API.
|
80 |
-
|
81 |
-
description_prompt = "USER: <image>\nDescribe this image in detail.\nASSISTANT:"
|
82 |
-
|
83 |
-
# For the sake of this example, we will stick to the documented behavior
|
84 |
-
# and assume the `question` can be answered from a general description.
|
85 |
response = requests.post(
|
86 |
IMAGE_ANALYSIS_API_URL, headers=headers, data=image_bytes
|
87 |
)
|
88 |
response.raise_for_status()
|
89 |
-
|
90 |
result = response.json()
|
91 |
generated_text = result[0].get("generated_text", "").strip()
|
92 |
-
|
93 |
final_answer = generated_text.split("ASSISTANT:")[-1].strip()
|
94 |
-
|
95 |
-
# The agent will get the description, then re-evaluate with the original question.
|
96 |
return f"The image description is: {final_answer}. Now, answer the original question based on this."
|
97 |
-
|
98 |
except Exception as e:
|
99 |
return f"Error analyzing image: {e}"
|
100 |
|
101 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
102 |
# --- Tool Definitions ---
|
103 |
youtube_tool = FunctionTool.from_defaults(
|
104 |
fn=get_video_transcript,
|
@@ -110,40 +84,39 @@ image_analyzer_tool = FunctionTool.from_defaults(
|
|
110 |
name="image_analyzer_tool",
|
111 |
description="Use this tool to analyze an image when you are given a URL. Provide both the image URL and the question about the image.",
|
112 |
)
|
113 |
-
|
|
|
|
|
|
|
|
|
114 |
|
115 |
|
116 |
# --- LlamaIndex Agent Definition ---
|
117 |
class LlamaIndexAgent:
|
118 |
def __init__(self):
|
119 |
print("Initializing LlamaIndexAgent with Final Tools...")
|
120 |
-
|
121 |
ddg_spec = DuckDuckGoSearchToolSpec()
|
122 |
-
|
123 |
self.tools = [
|
124 |
youtube_tool,
|
125 |
image_analyzer_tool,
|
126 |
-
|
127 |
] + ddg_spec.to_tool_list()
|
128 |
-
|
129 |
system_prompt = """
|
130 |
You are a helpful assistant tasked with answering questions.
|
131 |
You have access to a set of tools to help you. These tools include:
|
132 |
- A web search tool.
|
133 |
- A YouTube video transcriber.
|
134 |
-
- An image analyzer for URLs
|
135 |
-
- A
|
136 |
Use a tool if it is helpful. When you have the final answer, you MUST use the following template: FINAL ANSWER: [YOUR FINAL ANSWER].
|
137 |
YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list.
|
138 |
"""
|
139 |
-
|
140 |
self.llm = HuggingFaceLLM(
|
141 |
model_name="HuggingFaceH4/zephyr-7b-beta",
|
142 |
tokenizer_name="HuggingFaceH4/zephyr-7b-beta",
|
143 |
device_map="auto",
|
144 |
model_kwargs={"torch_dtype": torch.float16, "load_in_8bit": True},
|
145 |
)
|
146 |
-
|
147 |
self.agent = ReActAgent.from_tools(
|
148 |
tools=self.tools, llm=self.llm, verbose=True, system_prompt=system_prompt
|
149 |
)
|
@@ -153,7 +126,6 @@ class LlamaIndexAgent:
|
|
153 |
print(f"Agent received question: {question[:80]}...")
|
154 |
response = self.agent.chat(question)
|
155 |
answer = str(response).strip()
|
156 |
-
|
157 |
if "FINAL ANSWER:" in answer:
|
158 |
final_answer = answer.split("FINAL ANSWER:")[-1].strip()
|
159 |
else:
|
@@ -171,31 +143,26 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
|
|
171 |
"ERROR: The `HF_TOKEN` secret is not set in this Space. The image analysis tool will fail. Please set it in Settings > Secrets.",
|
172 |
None,
|
173 |
)
|
174 |
-
|
175 |
space_id = os.getenv("SPACE_ID")
|
176 |
if profile:
|
177 |
username = f"{profile.username}"
|
178 |
else:
|
179 |
return "Please Login to Hugging Face with the button.", None
|
180 |
-
|
181 |
api_url = DEFAULT_API_URL
|
182 |
questions_url = f"{api_url}/questions"
|
183 |
submit_url = f"{api_url}/submit"
|
184 |
-
|
185 |
try:
|
|
|
186 |
agent = LlamaIndexAgent()
|
187 |
except Exception as e:
|
188 |
return f"Error initializing agent: {e}", None
|
189 |
-
|
190 |
agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
|
191 |
-
|
192 |
try:
|
193 |
response = requests.get(questions_url, timeout=15)
|
194 |
response.raise_for_status()
|
195 |
questions_data = response.json()
|
196 |
except Exception as e:
|
197 |
return f"Error fetching questions: {e}", None
|
198 |
-
|
199 |
results_log = []
|
200 |
answers_payload = []
|
201 |
print(f"Running agent on {len(questions_data)} questions...")
|
@@ -224,16 +191,13 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
|
|
224 |
"Submitted Answer": f"AGENT ERROR: {e}",
|
225 |
}
|
226 |
)
|
227 |
-
|
228 |
if not answers_payload:
|
229 |
return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
|
230 |
-
|
231 |
submission_data = {
|
232 |
"username": username.strip(),
|
233 |
"agent_code": agent_code,
|
234 |
"answers": answers_payload,
|
235 |
}
|
236 |
-
|
237 |
try:
|
238 |
response = requests.post(submit_url, json=submission_data, timeout=180)
|
239 |
response.raise_for_status()
|
@@ -252,15 +216,22 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
|
|
252 |
)
|
253 |
|
254 |
|
255 |
-
# --- Build Gradio Interface ---
|
|
|
256 |
with gr.Blocks() as demo:
|
257 |
-
gr.Markdown("#
|
258 |
gr.Markdown(
|
259 |
"""
|
260 |
-
**
|
261 |
-
|
262 |
-
|
263 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
264 |
"""
|
265 |
)
|
266 |
gr.LoginButton()
|
|
|
5 |
import torch
|
6 |
import base64
|
7 |
from io import BytesIO
|
8 |
+
import numexpr # Using a dedicated and safe math library
|
9 |
|
10 |
from llama_index.core.tools import FunctionTool
|
11 |
from llama_index.llms.huggingface import HuggingFaceLLM
|
12 |
from llama_index.core.agent import ReActAgent
|
13 |
from llama_index.tools.duckduckgo import DuckDuckGoSearchToolSpec
|
|
|
14 |
from youtube_transcript_api import YouTubeTranscriptApi
|
15 |
from PIL import Image
|
16 |
|
|
|
45 |
return (
|
46 |
"Error: Hugging Face token is not set. Cannot use the image analysis tool."
|
47 |
)
|
|
|
48 |
try:
|
|
|
49 |
response = requests.get(image_url)
|
50 |
response.raise_for_status()
|
|
|
|
|
51 |
image_bytes = BytesIO(response.content).getvalue()
|
52 |
+
headers = {"Authorization": f"Bearer {HF_TOKEN}", "Content-Type": "image/png"}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
53 |
response = requests.post(
|
54 |
IMAGE_ANALYSIS_API_URL, headers=headers, data=image_bytes
|
55 |
)
|
56 |
response.raise_for_status()
|
|
|
57 |
result = response.json()
|
58 |
generated_text = result[0].get("generated_text", "").strip()
|
|
|
59 |
final_answer = generated_text.split("ASSISTANT:")[-1].strip()
|
|
|
|
|
60 |
return f"The image description is: {final_answer}. Now, answer the original question based on this."
|
|
|
61 |
except Exception as e:
|
62 |
return f"Error analyzing image: {e}"
|
63 |
|
64 |
|
65 |
+
# NEW: A custom, reliable math tool using a safe evaluator
|
66 |
+
def evaluate_math_expression(expression: str):
|
67 |
+
"""Evaluates a mathematical expression safely."""
|
68 |
+
try:
|
69 |
+
# Using numexpr for safe evaluation of numerical expressions
|
70 |
+
result = numexpr.evaluate(expression).item()
|
71 |
+
return result
|
72 |
+
except Exception as e:
|
73 |
+
return f"Error evaluating expression: {e}"
|
74 |
+
|
75 |
+
|
76 |
# --- Tool Definitions ---
|
77 |
youtube_tool = FunctionTool.from_defaults(
|
78 |
fn=get_video_transcript,
|
|
|
84 |
name="image_analyzer_tool",
|
85 |
description="Use this tool to analyze an image when you are given a URL. Provide both the image URL and the question about the image.",
|
86 |
)
|
87 |
+
math_tool = FunctionTool.from_defaults(
|
88 |
+
fn=evaluate_math_expression,
|
89 |
+
name="math_evaluator_tool",
|
90 |
+
description="Use this tool to evaluate simple mathematical expressions (e.g., '3 * (4 + 2)').",
|
91 |
+
)
|
92 |
|
93 |
|
94 |
# --- LlamaIndex Agent Definition ---
|
95 |
class LlamaIndexAgent:
|
96 |
def __init__(self):
|
97 |
print("Initializing LlamaIndexAgent with Final Tools...")
|
|
|
98 |
ddg_spec = DuckDuckGoSearchToolSpec()
|
|
|
99 |
self.tools = [
|
100 |
youtube_tool,
|
101 |
image_analyzer_tool,
|
102 |
+
math_tool,
|
103 |
] + ddg_spec.to_tool_list()
|
|
|
104 |
system_prompt = """
|
105 |
You are a helpful assistant tasked with answering questions.
|
106 |
You have access to a set of tools to help you. These tools include:
|
107 |
- A web search tool.
|
108 |
- A YouTube video transcriber.
|
109 |
+
- An image analyzer for URLs.
|
110 |
+
- A safe calculator for mathematical expressions.
|
111 |
Use a tool if it is helpful. When you have the final answer, you MUST use the following template: FINAL ANSWER: [YOUR FINAL ANSWER].
|
112 |
YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list.
|
113 |
"""
|
|
|
114 |
self.llm = HuggingFaceLLM(
|
115 |
model_name="HuggingFaceH4/zephyr-7b-beta",
|
116 |
tokenizer_name="HuggingFaceH4/zephyr-7b-beta",
|
117 |
device_map="auto",
|
118 |
model_kwargs={"torch_dtype": torch.float16, "load_in_8bit": True},
|
119 |
)
|
|
|
120 |
self.agent = ReActAgent.from_tools(
|
121 |
tools=self.tools, llm=self.llm, verbose=True, system_prompt=system_prompt
|
122 |
)
|
|
|
126 |
print(f"Agent received question: {question[:80]}...")
|
127 |
response = self.agent.chat(question)
|
128 |
answer = str(response).strip()
|
|
|
129 |
if "FINAL ANSWER:" in answer:
|
130 |
final_answer = answer.split("FINAL ANSWER:")[-1].strip()
|
131 |
else:
|
|
|
143 |
"ERROR: The `HF_TOKEN` secret is not set in this Space. The image analysis tool will fail. Please set it in Settings > Secrets.",
|
144 |
None,
|
145 |
)
|
|
|
146 |
space_id = os.getenv("SPACE_ID")
|
147 |
if profile:
|
148 |
username = f"{profile.username}"
|
149 |
else:
|
150 |
return "Please Login to Hugging Face with the button.", None
|
|
|
151 |
api_url = DEFAULT_API_URL
|
152 |
questions_url = f"{api_url}/questions"
|
153 |
submit_url = f"{api_url}/submit"
|
|
|
154 |
try:
|
155 |
+
# We instantiate our new powerful agent instead of the BasicAgent
|
156 |
agent = LlamaIndexAgent()
|
157 |
except Exception as e:
|
158 |
return f"Error initializing agent: {e}", None
|
|
|
159 |
agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
|
|
|
160 |
try:
|
161 |
response = requests.get(questions_url, timeout=15)
|
162 |
response.raise_for_status()
|
163 |
questions_data = response.json()
|
164 |
except Exception as e:
|
165 |
return f"Error fetching questions: {e}", None
|
|
|
166 |
results_log = []
|
167 |
answers_payload = []
|
168 |
print(f"Running agent on {len(questions_data)} questions...")
|
|
|
191 |
"Submitted Answer": f"AGENT ERROR: {e}",
|
192 |
}
|
193 |
)
|
|
|
194 |
if not answers_payload:
|
195 |
return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
|
|
|
196 |
submission_data = {
|
197 |
"username": username.strip(),
|
198 |
"agent_code": agent_code,
|
199 |
"answers": answers_payload,
|
200 |
}
|
|
|
201 |
try:
|
202 |
response = requests.post(submit_url, json=submission_data, timeout=180)
|
203 |
response.raise_for_status()
|
|
|
216 |
)
|
217 |
|
218 |
|
219 |
+
# --- Build Gradio Interface using Blocks ---
|
220 |
+
# UI HAS BEEN REVERTED TO THE INITIAL TEMPLATE AS REQUESTED
|
221 |
with gr.Blocks() as demo:
|
222 |
+
gr.Markdown("# Basic Agent Evaluation Runner")
|
223 |
gr.Markdown(
|
224 |
"""
|
225 |
+
**Instructions:**
|
226 |
+
|
227 |
+
1. Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
|
228 |
+
2. Log in to your Hugging Face account using the button below. This uses your HF username for submission.
|
229 |
+
3. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
|
230 |
+
|
231 |
+
---
|
232 |
+
**Disclaimers:**
|
233 |
+
Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
|
234 |
+
This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a seperate action or even to answer the questions in async.
|
235 |
"""
|
236 |
)
|
237 |
gr.LoginButton()
|
requirements.txt
CHANGED
@@ -6,9 +6,9 @@ torch
|
|
6 |
transformers
|
7 |
accelerate
|
8 |
bitsandbytes
|
9 |
-
# Dependencies for tools
|
10 |
youtube-transcript-api
|
11 |
beautifulsoup4
|
12 |
llama-index-tools-duckduckgo
|
13 |
-
|
14 |
-
|
|
|
|
6 |
transformers
|
7 |
accelerate
|
8 |
bitsandbytes
|
|
|
9 |
youtube-transcript-api
|
10 |
beautifulsoup4
|
11 |
llama-index-tools-duckduckgo
|
12 |
+
llama-index-llms-huggingface
|
13 |
+
# A reliable library for safe math evaluation
|
14 |
+
numexpr
|