leofltt commited on
Commit
97a46b7
·
1 Parent(s): 070630f

fix reqs & app

Browse files
Files changed (2) hide show
  1. app.py +35 -64
  2. requirements.txt +3 -3
app.py CHANGED
@@ -5,12 +5,12 @@ import pandas as pd
5
  import torch
6
  import base64
7
  from io import BytesIO
 
8
 
9
  from llama_index.core.tools import FunctionTool
10
  from llama_index.llms.huggingface import HuggingFaceLLM
11
  from llama_index.core.agent import ReActAgent
12
  from llama_index.tools.duckduckgo import DuckDuckGoSearchToolSpec
13
- from llama_index.tools.python_repl import PythonREPLTool
14
  from youtube_transcript_api import YouTubeTranscriptApi
15
  from PIL import Image
16
 
@@ -45,60 +45,34 @@ def analyze_image_url(image_url: str, question: str):
45
  return (
46
  "Error: Hugging Face token is not set. Cannot use the image analysis tool."
47
  )
48
-
49
  try:
50
- # Download image
51
  response = requests.get(image_url)
52
  response.raise_for_status()
53
-
54
- # Prepare data for the Inference API
55
  image_bytes = BytesIO(response.content).getvalue()
56
-
57
- # Call Inference API
58
- headers = {
59
- "Authorization": f"Bearer {HF_TOKEN}",
60
- "Content-Type": "image/png", # Specify content type
61
- }
62
-
63
- # The Llava prompt format is specific
64
- prompt = f"USER: <image>\n{question}\nASSISTANT:"
65
-
66
- # To send both image and text, we can't use a simple JSON payload.
67
- # A common approach is to use a multi-part form, but the HF API
68
- # can be tricky. Let's try a different model that supports image url directly if available,
69
- # or stick to a method that works with its API.
70
- # For llava, sending the raw image data is the documented way.
71
-
72
- # Re-checking llava API documentation for combined prompt/image...
73
- # The API doesn't cleanly support separate text prompts with raw image data posts.
74
- # A workaround is to embed the prompt in the image or use a model designed for this API format.
75
- # Let's pivot to a model that explicitly takes a URL or a simpler payload.
76
- # However, to keep it simple, we'll assume the prompt is simple enough.
77
-
78
- # Let's simplify the tool's goal: describe the image, then the LLM can reason on the description.
79
- # This is a more robust pattern than trying to force a complex prompt into an API.
80
-
81
- description_prompt = "USER: <image>\nDescribe this image in detail.\nASSISTANT:"
82
-
83
- # For the sake of this example, we will stick to the documented behavior
84
- # and assume the `question` can be answered from a general description.
85
  response = requests.post(
86
  IMAGE_ANALYSIS_API_URL, headers=headers, data=image_bytes
87
  )
88
  response.raise_for_status()
89
-
90
  result = response.json()
91
  generated_text = result[0].get("generated_text", "").strip()
92
-
93
  final_answer = generated_text.split("ASSISTANT:")[-1].strip()
94
-
95
- # The agent will get the description, then re-evaluate with the original question.
96
  return f"The image description is: {final_answer}. Now, answer the original question based on this."
97
-
98
  except Exception as e:
99
  return f"Error analyzing image: {e}"
100
 
101
 
 
 
 
 
 
 
 
 
 
 
 
102
  # --- Tool Definitions ---
103
  youtube_tool = FunctionTool.from_defaults(
104
  fn=get_video_transcript,
@@ -110,40 +84,39 @@ image_analyzer_tool = FunctionTool.from_defaults(
110
  name="image_analyzer_tool",
111
  description="Use this tool to analyze an image when you are given a URL. Provide both the image URL and the question about the image.",
112
  )
113
- python_repl_tool = PythonREPLTool()
 
 
 
 
114
 
115
 
116
  # --- LlamaIndex Agent Definition ---
117
  class LlamaIndexAgent:
118
  def __init__(self):
119
  print("Initializing LlamaIndexAgent with Final Tools...")
120
-
121
  ddg_spec = DuckDuckGoSearchToolSpec()
122
-
123
  self.tools = [
124
  youtube_tool,
125
  image_analyzer_tool,
126
- python_repl_tool,
127
  ] + ddg_spec.to_tool_list()
128
-
129
  system_prompt = """
130
  You are a helpful assistant tasked with answering questions.
131
  You have access to a set of tools to help you. These tools include:
132
  - A web search tool.
133
  - A YouTube video transcriber.
134
- - An image analyzer for URLs (this tool provides a description of the image).
135
- - A Python code interpreter for math and calculations.
136
  Use a tool if it is helpful. When you have the final answer, you MUST use the following template: FINAL ANSWER: [YOUR FINAL ANSWER].
137
  YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list.
138
  """
139
-
140
  self.llm = HuggingFaceLLM(
141
  model_name="HuggingFaceH4/zephyr-7b-beta",
142
  tokenizer_name="HuggingFaceH4/zephyr-7b-beta",
143
  device_map="auto",
144
  model_kwargs={"torch_dtype": torch.float16, "load_in_8bit": True},
145
  )
146
-
147
  self.agent = ReActAgent.from_tools(
148
  tools=self.tools, llm=self.llm, verbose=True, system_prompt=system_prompt
149
  )
@@ -153,7 +126,6 @@ class LlamaIndexAgent:
153
  print(f"Agent received question: {question[:80]}...")
154
  response = self.agent.chat(question)
155
  answer = str(response).strip()
156
-
157
  if "FINAL ANSWER:" in answer:
158
  final_answer = answer.split("FINAL ANSWER:")[-1].strip()
159
  else:
@@ -171,31 +143,26 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
171
  "ERROR: The `HF_TOKEN` secret is not set in this Space. The image analysis tool will fail. Please set it in Settings > Secrets.",
172
  None,
173
  )
174
-
175
  space_id = os.getenv("SPACE_ID")
176
  if profile:
177
  username = f"{profile.username}"
178
  else:
179
  return "Please Login to Hugging Face with the button.", None
180
-
181
  api_url = DEFAULT_API_URL
182
  questions_url = f"{api_url}/questions"
183
  submit_url = f"{api_url}/submit"
184
-
185
  try:
 
186
  agent = LlamaIndexAgent()
187
  except Exception as e:
188
  return f"Error initializing agent: {e}", None
189
-
190
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
191
-
192
  try:
193
  response = requests.get(questions_url, timeout=15)
194
  response.raise_for_status()
195
  questions_data = response.json()
196
  except Exception as e:
197
  return f"Error fetching questions: {e}", None
198
-
199
  results_log = []
200
  answers_payload = []
201
  print(f"Running agent on {len(questions_data)} questions...")
@@ -224,16 +191,13 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
224
  "Submitted Answer": f"AGENT ERROR: {e}",
225
  }
226
  )
227
-
228
  if not answers_payload:
229
  return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
230
-
231
  submission_data = {
232
  "username": username.strip(),
233
  "agent_code": agent_code,
234
  "answers": answers_payload,
235
  }
236
-
237
  try:
238
  response = requests.post(submit_url, json=submission_data, timeout=180)
239
  response.raise_for_status()
@@ -252,15 +216,22 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
252
  )
253
 
254
 
255
- # --- Build Gradio Interface ---
 
256
  with gr.Blocks() as demo:
257
- gr.Markdown("# Final, Fully-Featured GAIA Agent")
258
  gr.Markdown(
259
  """
260
- **Agent Capabilities:** Web Search, YouTube Analysis, Image Analysis (via API), and Python Code Execution.
261
- 1. **IMPORTANT**: This Space requires a Hugging Face Token to be set in the secrets as `HF_TOKEN` for the image analysis tool to work.
262
- 2. Log in to your Hugging Face account using the button below.
263
- 3. Click 'Run Evaluation & Submit All Answers'. This process is complex and will take a very long time.
 
 
 
 
 
 
264
  """
265
  )
266
  gr.LoginButton()
 
5
  import torch
6
  import base64
7
  from io import BytesIO
8
+ import numexpr # Using a dedicated and safe math library
9
 
10
  from llama_index.core.tools import FunctionTool
11
  from llama_index.llms.huggingface import HuggingFaceLLM
12
  from llama_index.core.agent import ReActAgent
13
  from llama_index.tools.duckduckgo import DuckDuckGoSearchToolSpec
 
14
  from youtube_transcript_api import YouTubeTranscriptApi
15
  from PIL import Image
16
 
 
45
  return (
46
  "Error: Hugging Face token is not set. Cannot use the image analysis tool."
47
  )
 
48
  try:
 
49
  response = requests.get(image_url)
50
  response.raise_for_status()
 
 
51
  image_bytes = BytesIO(response.content).getvalue()
52
+ headers = {"Authorization": f"Bearer {HF_TOKEN}", "Content-Type": "image/png"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
  response = requests.post(
54
  IMAGE_ANALYSIS_API_URL, headers=headers, data=image_bytes
55
  )
56
  response.raise_for_status()
 
57
  result = response.json()
58
  generated_text = result[0].get("generated_text", "").strip()
 
59
  final_answer = generated_text.split("ASSISTANT:")[-1].strip()
 
 
60
  return f"The image description is: {final_answer}. Now, answer the original question based on this."
 
61
  except Exception as e:
62
  return f"Error analyzing image: {e}"
63
 
64
 
65
+ # NEW: A custom, reliable math tool using a safe evaluator
66
+ def evaluate_math_expression(expression: str):
67
+ """Evaluates a mathematical expression safely."""
68
+ try:
69
+ # Using numexpr for safe evaluation of numerical expressions
70
+ result = numexpr.evaluate(expression).item()
71
+ return result
72
+ except Exception as e:
73
+ return f"Error evaluating expression: {e}"
74
+
75
+
76
  # --- Tool Definitions ---
77
  youtube_tool = FunctionTool.from_defaults(
78
  fn=get_video_transcript,
 
84
  name="image_analyzer_tool",
85
  description="Use this tool to analyze an image when you are given a URL. Provide both the image URL and the question about the image.",
86
  )
87
+ math_tool = FunctionTool.from_defaults(
88
+ fn=evaluate_math_expression,
89
+ name="math_evaluator_tool",
90
+ description="Use this tool to evaluate simple mathematical expressions (e.g., '3 * (4 + 2)').",
91
+ )
92
 
93
 
94
  # --- LlamaIndex Agent Definition ---
95
  class LlamaIndexAgent:
96
  def __init__(self):
97
  print("Initializing LlamaIndexAgent with Final Tools...")
 
98
  ddg_spec = DuckDuckGoSearchToolSpec()
 
99
  self.tools = [
100
  youtube_tool,
101
  image_analyzer_tool,
102
+ math_tool,
103
  ] + ddg_spec.to_tool_list()
 
104
  system_prompt = """
105
  You are a helpful assistant tasked with answering questions.
106
  You have access to a set of tools to help you. These tools include:
107
  - A web search tool.
108
  - A YouTube video transcriber.
109
+ - An image analyzer for URLs.
110
+ - A safe calculator for mathematical expressions.
111
  Use a tool if it is helpful. When you have the final answer, you MUST use the following template: FINAL ANSWER: [YOUR FINAL ANSWER].
112
  YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list.
113
  """
 
114
  self.llm = HuggingFaceLLM(
115
  model_name="HuggingFaceH4/zephyr-7b-beta",
116
  tokenizer_name="HuggingFaceH4/zephyr-7b-beta",
117
  device_map="auto",
118
  model_kwargs={"torch_dtype": torch.float16, "load_in_8bit": True},
119
  )
 
120
  self.agent = ReActAgent.from_tools(
121
  tools=self.tools, llm=self.llm, verbose=True, system_prompt=system_prompt
122
  )
 
126
  print(f"Agent received question: {question[:80]}...")
127
  response = self.agent.chat(question)
128
  answer = str(response).strip()
 
129
  if "FINAL ANSWER:" in answer:
130
  final_answer = answer.split("FINAL ANSWER:")[-1].strip()
131
  else:
 
143
  "ERROR: The `HF_TOKEN` secret is not set in this Space. The image analysis tool will fail. Please set it in Settings > Secrets.",
144
  None,
145
  )
 
146
  space_id = os.getenv("SPACE_ID")
147
  if profile:
148
  username = f"{profile.username}"
149
  else:
150
  return "Please Login to Hugging Face with the button.", None
 
151
  api_url = DEFAULT_API_URL
152
  questions_url = f"{api_url}/questions"
153
  submit_url = f"{api_url}/submit"
 
154
  try:
155
+ # We instantiate our new powerful agent instead of the BasicAgent
156
  agent = LlamaIndexAgent()
157
  except Exception as e:
158
  return f"Error initializing agent: {e}", None
 
159
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
 
160
  try:
161
  response = requests.get(questions_url, timeout=15)
162
  response.raise_for_status()
163
  questions_data = response.json()
164
  except Exception as e:
165
  return f"Error fetching questions: {e}", None
 
166
  results_log = []
167
  answers_payload = []
168
  print(f"Running agent on {len(questions_data)} questions...")
 
191
  "Submitted Answer": f"AGENT ERROR: {e}",
192
  }
193
  )
 
194
  if not answers_payload:
195
  return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
 
196
  submission_data = {
197
  "username": username.strip(),
198
  "agent_code": agent_code,
199
  "answers": answers_payload,
200
  }
 
201
  try:
202
  response = requests.post(submit_url, json=submission_data, timeout=180)
203
  response.raise_for_status()
 
216
  )
217
 
218
 
219
+ # --- Build Gradio Interface using Blocks ---
220
+ # UI HAS BEEN REVERTED TO THE INITIAL TEMPLATE AS REQUESTED
221
  with gr.Blocks() as demo:
222
+ gr.Markdown("# Basic Agent Evaluation Runner")
223
  gr.Markdown(
224
  """
225
+ **Instructions:**
226
+
227
+ 1. Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
228
+ 2. Log in to your Hugging Face account using the button below. This uses your HF username for submission.
229
+ 3. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
230
+
231
+ ---
232
+ **Disclaimers:**
233
+ Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
234
+ This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a seperate action or even to answer the questions in async.
235
  """
236
  )
237
  gr.LoginButton()
requirements.txt CHANGED
@@ -6,9 +6,9 @@ torch
6
  transformers
7
  accelerate
8
  bitsandbytes
9
- # Dependencies for tools
10
  youtube-transcript-api
11
  beautifulsoup4
12
  llama-index-tools-duckduckgo
13
- # CORRECTED: The missing package for HuggingFaceLLM
14
- llama-index-llms-huggingface
 
 
6
  transformers
7
  accelerate
8
  bitsandbytes
 
9
  youtube-transcript-api
10
  beautifulsoup4
11
  llama-index-tools-duckduckgo
12
+ llama-index-llms-huggingface
13
+ # A reliable library for safe math evaluation
14
+ numexpr