leofltt commited on
Commit
f85ab70
·
1 Parent(s): 82109f5

new day new attempt

Browse files
Files changed (1) hide show
  1. app.py +110 -97
app.py CHANGED
@@ -1,38 +1,44 @@
 
 
1
  import os
2
  import re
3
  import gradio as gr
4
  import requests
5
  import pandas as pd
6
- import torch
7
  import logging
8
- from transformers.pipelines import pipeline
 
 
 
 
 
9
  from langchain_community.tools import DuckDuckGoSearchRun
10
- from langchain_core.prompts import ChatPromptTemplate
11
- from langchain.prompts import PromptTemplate
12
- from langchain_huggingface import HuggingFacePipeline
13
  from langchain_core.output_parsers import StrOutputParser
14
  from langchain_core.tools import tool
15
  from langgraph.graph import StateGraph, END
16
- from typing import TypedDict, Annotated, List
17
  from langchain_community.document_loaders.youtube import YoutubeLoader
18
- import numexpr
19
 
20
  # --- Constants ---
21
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
22
- SYSTEM_PROMPT = """You are a helpful assistant tasked with answering questions.
23
-
24
- You have access to a set of tools to help you. The question you receive may require you to use these tools.
25
- When you receive a question, you should first think about what steps you need to take.
26
- Based on your plan, you can then call the necessary tools.
27
- After calling a tool, you will get a result. You should analyze the result and decide if you need to call another tool or if you have enough information to answer the question.
28
-
29
- When you have the final answer, you must output it in the following format:
 
 
 
 
30
  FINAL ANSWER: [YOUR FINAL ANSWER]
31
 
32
- YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma-separated list of numbers and/or strings.
33
- - If you are asked for a number, do not use commas for thousands separators or units like '$' or '%' unless specified.
34
- - If you are asked for a string, do not use articles or abbreviations (e.g., for cities).
35
- - If you are asked for a comma-separated list, apply the above rules to each element.
36
 
37
  Example:
38
  Question: What is the capital of France?
@@ -44,48 +50,51 @@ Your final answer: FINAL ANSWER: Paris
44
 
45
  # --- Tool Definitions ---
46
 
47
- # Global variable to cache the image-to-text pipeline. This allows for "lazy loading".
48
  image_to_text_pipeline = None
49
 
50
 
51
  @tool
52
- def web_search(query: str):
53
- """Searches the web using DuckDuckGo."""
54
  logging.info(f"--- Calling Web Search Tool with query: {query} ---")
55
  search = DuckDuckGoSearchRun()
56
  return search.run(query)
57
 
58
 
59
  @tool
60
- def math_calculator(expression: str):
61
- """Calculates the result of a mathematical expression."""
62
  logging.info(f"--- Calling Math Calculator Tool with expression: {expression} ---")
63
  try:
64
- # Use numexpr for safe evaluation
 
 
65
  result = numexpr.evaluate(expression).item()
66
- return result
67
  except Exception as e:
68
- logging.error(f"Error evaluating expression: {e}")
69
- return f"Error evaluating expression: {e}"
70
 
71
 
72
  @tool
73
- def image_analyzer(image_url: str):
74
- """Analyzes an image and returns a description. Loads the model on first use."""
75
  global image_to_text_pipeline
76
  logging.info(f"--- Calling Image Analyzer Tool with URL: {image_url} ---")
77
  try:
78
  if image_to_text_pipeline is None:
79
  logging.info(
80
- "--- Initializing Image Analyzer pipeline for the first time... ---"
81
  )
82
- # Lazy-load the pipeline to conserve memory on startup
 
 
83
  image_to_text_pipeline = pipeline(
84
  "image-to-text", model="Salesforce/blip-image-captioning-base"
85
  )
86
  logging.info("--- Image Analyzer pipeline initialized. ---")
87
 
88
- # More robustly handle the pipeline output
89
  pipeline_output = image_to_text_pipeline(image_url)
90
  if (
91
  pipeline_output
@@ -93,14 +102,10 @@ def image_analyzer(image_url: str):
93
  and len(pipeline_output) > 0
94
  ):
95
  description = pipeline_output[0].get(
96
- "generated_text", "Error: Could not find text in image analysis result."
97
  )
98
  else:
99
- logging.error(
100
- f"Image analyzer returned no or invalid output for URL: {image_url}"
101
- )
102
  description = "Error: Could not analyze image."
103
-
104
  return description
105
  except Exception as e:
106
  logging.error(f"Error analyzing image: {e}")
@@ -108,8 +113,8 @@ def image_analyzer(image_url: str):
108
 
109
 
110
  @tool
111
- def youtube_transcript_reader(youtube_url: str):
112
- """Reads the transcript of a YouTube video."""
113
  logging.info(
114
  f"--- Calling YouTube Transcript Reader Tool with URL: {youtube_url} ---"
115
  )
@@ -117,11 +122,11 @@ def youtube_transcript_reader(youtube_url: str):
117
  loader = YoutubeLoader.from_youtube_url(youtube_url, add_video_info=False)
118
  docs = loader.load()
119
  transcript = " ".join([doc.page_content for doc in docs])
120
- # Return a manageable chunk of the transcript
121
  return transcript[:4000]
122
  except Exception as e:
123
  logging.error(f"Error reading YouTube transcript: {e}")
124
- return f"Error reading YouTube transcript: {e}"
125
 
126
 
127
  # --- Agent State Definition ---
@@ -142,32 +147,22 @@ class GaiaAgent:
142
  youtube_transcript_reader,
143
  ]
144
 
145
- # Initialize the LLM
146
- logging.info("Loading LLM... This may take a few minutes on first startup.")
147
- # Removing device_map for better compatibility with ZeroGPU environments
148
- llm = HuggingFacePipeline.from_model_id(
149
- model_id="microsoft/Phi-3-mini-4k-instruct",
 
 
150
  task="text-generation",
151
- pipeline_kwargs={
152
- "max_new_tokens": 512,
153
- "top_k": 50,
154
- "temperature": 0.1,
155
- "do_sample": False,
156
- },
157
- torch_dtype="auto",
158
- trust_remote_code=True, # Required for Phi-3
159
  )
160
- logging.info("LLM loaded successfully.")
161
 
162
  # Create the agent graph
163
  prompt = PromptTemplate(
164
  template=SYSTEM_PROMPT
165
- + """
166
- Here is the current conversation:
167
- {messages}
168
-
169
- Question: {question}
170
- """,
171
  input_variables=["messages", "question"],
172
  )
173
 
@@ -180,7 +175,7 @@ Question: {question}
180
  graph.add_node("agent", self._call_agent)
181
  graph.add_node("tools", self._call_tools)
182
  graph.add_conditional_edges(
183
- "agent", self._decide_action, {"tools": "tools", END: END}
184
  )
185
  graph.add_edge("tools", "agent")
186
  graph.set_entry_point("agent")
@@ -206,11 +201,15 @@ Question: {question}
206
  logging.info("--- Calling Tools ---")
207
  raw_tool_call = state["messages"][-1]
208
 
209
- # Simple regex to find tool calls like tool_name("argument") or tool_name(argument)
210
  tool_call_match = re.search(r"(\w+)\s*\((.*?)\)", raw_tool_call, re.DOTALL)
211
  if not tool_call_match:
212
  logging.warning("No valid tool call found in agent response.")
213
- return {"messages": ["No valid tool call found."], "sender": "tools"}
 
 
 
 
 
214
 
215
  tool_name = tool_call_match.group(1).strip()
216
  tool_input_str = tool_call_match.group(2).strip()
@@ -237,31 +236,40 @@ Question: {question}
237
  }
238
  else:
239
  logging.warning(f"Tool '{tool_name}' not found.")
240
- return {"messages": [f"Tool '{tool_name}' not found."], "sender": "tools"}
 
 
 
 
 
241
 
242
  def __call__(self, question: str) -> str:
243
- logging.info(f"Agent received question: {question[:100]}...")
244
-
245
- initial_state = {"question": question, "messages": [], "sender": "user"}
 
 
 
246
 
247
- final_state = self.graph.invoke(initial_state, {"recursion_limit": 10})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
248
 
249
- final_answer = final_state["messages"][-1]
250
 
251
- # Extract the answer after "FINAL ANSWER:"
252
- match = re.search(
253
- r"FINAL ANSWER:\s*(.*)", final_answer, re.IGNORECASE | re.DOTALL
254
- )
255
- if match:
256
- extracted_answer = match.group(1).strip()
257
- logging.info(f"Agent returning final answer: {extracted_answer}")
258
- return extracted_answer
259
- else:
260
- logging.warning(
261
- "Agent could not find a final answer in the required format."
262
- )
263
- # Return a fallback answer if parsing fails
264
- return "Could not determine the final answer."
265
 
266
 
267
  def run_and_submit_all(profile: gr.OAuthProfile | None):
@@ -279,7 +287,10 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
279
  space_id = os.getenv("SPACE_ID")
280
  if not space_id:
281
  logging.error("SPACE_ID environment variable is not set. Cannot proceed.")
282
- return "SPACE_ID environment variable is not set. Cannot proceed.", None
 
 
 
283
 
284
  api_url = DEFAULT_API_URL
285
  questions_url = f"{api_url}/questions"
@@ -289,8 +300,8 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
289
  try:
290
  agent = GaiaAgent()
291
  except Exception as e:
292
- logging.critical(f"Error instantiating agent: {e}", exc_info=True)
293
- return f"Error initializing agent: {e}", None
294
 
295
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
296
  logging.info(f"Agent code URL: {agent_code}")
@@ -305,7 +316,7 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
305
  logging.warning("Fetched questions list is empty.")
306
  return "Fetched questions list is empty.", None
307
  logging.info(f"Fetched {len(questions_data)} questions.")
308
- except requests.exceptions.RequestException as e:
309
  logging.error(f"Error fetching questions: {e}")
310
  return f"Error fetching questions: {e}", None
311
 
@@ -313,9 +324,12 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
313
  results_log = []
314
  answers_payload = []
315
  logging.info(f"Running agent on {len(questions_data)} questions...")
316
- for item in questions_data:
317
  task_id = item.get("task_id")
318
  question_text = item.get("question")
 
 
 
319
  if not task_id or question_text is None:
320
  continue
321
 
@@ -378,18 +392,20 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
378
  )
379
 
380
 
381
- # --- Build Gradio Interface ---
382
  with gr.Blocks() as demo:
383
  gr.Markdown("# GAIA Agent Evaluation Runner")
384
  gr.Markdown(
385
  """
386
  **Instructions:**
 
387
  1. This Space contains a `langgraph`-based agent equipped with tools for web search, math, image analysis, and YouTube transcript reading.
388
  2. Log in to your Hugging Face account using the button below. Your HF username is used for the submission.
389
  3. Click 'Run Evaluation & Submit All Answers' to fetch the questions, run the agent, submit the answers, and see your score.
 
390
  ---
391
  **Disclaimer:**
392
- - Once you click the submit button, please be patient. The agent needs time to process all the questions, which can take several minutes depending on the model and hardware.
393
  """
394
  )
395
 
@@ -408,11 +424,8 @@ with gr.Blocks() as demo:
408
  )
409
 
410
  if __name__ == "__main__":
411
- # Configure logging
412
  logging.basicConfig(
413
- level=logging.INFO,
414
- format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
415
- datefmt="%Y-%m-%d %H:%M:%S",
416
  )
417
- logging.info("\n" + "-" * 30 + " App Starting " + "-" * 30)
418
  demo.launch(debug=True, share=False)
 
1
+ # app.py
2
+
3
  import os
4
  import re
5
  import gradio as gr
6
  import requests
7
  import pandas as pd
 
8
  import logging
9
+ import numexpr
10
+ from typing import TypedDict, Annotated
11
+
12
+ # --- Langchain & HF Imports ---
13
+ # CHANGED: Swapped local pipeline for Inference API and removed torch
14
+ from langchain_huggingface import HuggingFaceInferenceAPI
15
  from langchain_community.tools import DuckDuckGoSearchRun
16
+ from langchain_core.prompts import PromptTemplate
 
 
17
  from langchain_core.output_parsers import StrOutputParser
18
  from langchain_core.tools import tool
19
  from langgraph.graph import StateGraph, END
 
20
  from langchain_community.document_loaders.youtube import YoutubeLoader
21
+
22
 
23
  # --- Constants ---
24
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
25
+ # ADDED: A more robust prompt tailored for tool use with Llama 3
26
+ SYSTEM_PROMPT = """You are a helpful and expert assistant named GAIA, designed to answer questions accurately.
27
+
28
+ To do this, you have access to a set of tools. Based on the user's question, you must decide which tool to use, if any.
29
+ Your process is:
30
+ 1. **Analyze the Question**: Understand what is being asked.
31
+ 2. **Select a Tool**: If necessary, choose the best tool for the job. Your available tools are: `web_search`, `math_calculator`, `image_analyzer`, `youtube_transcript_reader`.
32
+ 3. **Call the Tool**: Output a tool call in the format `tool_name("argument")`. For example: `web_search("what is the weather in Paris?")`.
33
+ 4. **Analyze the Result**: Look at the tool's output.
34
+ 5. **Final Answer**: If you have enough information, provide the final answer. If not, you can use another tool.
35
+
36
+ When you have the final answer, you **must** output it in the following format, and nothing else:
37
  FINAL ANSWER: [YOUR FINAL ANSWER]
38
 
39
+ - YOUR FINAL ANSWER should be a number, a short string, or a comma-separated list.
40
+ - Do not use formatting like thousands separators or units unless the question explicitly asks for it.
41
+ - Do not add explanations or prose in the final answer.
 
42
 
43
  Example:
44
  Question: What is the capital of France?
 
50
 
51
  # --- Tool Definitions ---
52
 
53
+ # Global variable for lazy loading the image pipeline
54
  image_to_text_pipeline = None
55
 
56
 
57
  @tool
58
+ def web_search(query: str) -> str:
59
+ """Searches the web using DuckDuckGo for up-to-date information."""
60
  logging.info(f"--- Calling Web Search Tool with query: {query} ---")
61
  search = DuckDuckGoSearchRun()
62
  return search.run(query)
63
 
64
 
65
  @tool
66
+ def math_calculator(expression: str) -> str:
67
+ """Calculates the result of a mathematical expression. Use it for any math operation."""
68
  logging.info(f"--- Calling Math Calculator Tool with expression: {expression} ---")
69
  try:
70
+ # Sanitize expression: allow only numbers, basic operators, and parentheses
71
+ if not re.match(r"^[0-9\.\+\-\*\/\(\)\s]+$", expression):
72
+ return "Error: Invalid characters in expression."
73
  result = numexpr.evaluate(expression).item()
74
+ return str(result)
75
  except Exception as e:
76
+ logging.error(f"Calculator error: {e}")
77
+ return f"Error: {e}"
78
 
79
 
80
  @tool
81
+ def image_analyzer(image_url: str) -> str:
82
+ """Analyzes an image from a URL and returns a text description."""
83
  global image_to_text_pipeline
84
  logging.info(f"--- Calling Image Analyzer Tool with URL: {image_url} ---")
85
  try:
86
  if image_to_text_pipeline is None:
87
  logging.info(
88
+ "--- Initializing Image Analyzer pipeline (lazy loading)... ---"
89
  )
90
+ # This pipeline is small enough to be loaded on demand in a ZeroGPU space
91
+ from transformers import pipeline
92
+
93
  image_to_text_pipeline = pipeline(
94
  "image-to-text", model="Salesforce/blip-image-captioning-base"
95
  )
96
  logging.info("--- Image Analyzer pipeline initialized. ---")
97
 
 
98
  pipeline_output = image_to_text_pipeline(image_url)
99
  if (
100
  pipeline_output
 
102
  and len(pipeline_output) > 0
103
  ):
104
  description = pipeline_output[0].get(
105
+ "generated_text", "Error: Could not generate text."
106
  )
107
  else:
 
 
 
108
  description = "Error: Could not analyze image."
 
109
  return description
110
  except Exception as e:
111
  logging.error(f"Error analyzing image: {e}")
 
113
 
114
 
115
  @tool
116
+ def youtube_transcript_reader(youtube_url: str) -> str:
117
+ """Reads the transcript of a YouTube video from its URL."""
118
  logging.info(
119
  f"--- Calling YouTube Transcript Reader Tool with URL: {youtube_url} ---"
120
  )
 
122
  loader = YoutubeLoader.from_youtube_url(youtube_url, add_video_info=False)
123
  docs = loader.load()
124
  transcript = " ".join([doc.page_content for doc in docs])
125
+ # Return a manageable chunk
126
  return transcript[:4000]
127
  except Exception as e:
128
  logging.error(f"Error reading YouTube transcript: {e}")
129
+ return f"Error: {e}"
130
 
131
 
132
  # --- Agent State Definition ---
 
147
  youtube_transcript_reader,
148
  ]
149
 
150
+ # CHANGED: Replaced local HuggingFacePipeline with HuggingFaceInferenceAPI
151
+ # This uses the Hugging Face Serverless API, offloading the memory and compute.
152
+ # It requires a HUGGING_FACE_HUB_TOKEN to be set in the Space secrets.
153
+ logging.info("Initializing LLM via Inference API...")
154
+ llm = HuggingFaceInferenceAPI(
155
+ model_id="meta-llama/Meta-Llama-3-8B-Instruct",
156
+ # repo_id="meta-llama/Meta-Llama-3-8B-Instruct", # Use repo_id if model_id gives issues
157
  task="text-generation",
158
+ token=os.getenv("HUGGING_FACE_HUB_TOKEN"),
 
 
 
 
 
 
 
159
  )
160
+ logging.info("LLM initialized successfully.")
161
 
162
  # Create the agent graph
163
  prompt = PromptTemplate(
164
  template=SYSTEM_PROMPT
165
+ + "\nHere is the current conversation:\n{messages}\n\nQuestion: {question}",
 
 
 
 
 
166
  input_variables=["messages", "question"],
167
  )
168
 
 
175
  graph.add_node("agent", self._call_agent)
176
  graph.add_node("tools", self._call_tools)
177
  graph.add_conditional_edges(
178
+ "agent", self._decide_action, {END: END, "tools": "tools"}
179
  )
180
  graph.add_edge("tools", "agent")
181
  graph.set_entry_point("agent")
 
201
  logging.info("--- Calling Tools ---")
202
  raw_tool_call = state["messages"][-1]
203
 
 
204
  tool_call_match = re.search(r"(\w+)\s*\((.*?)\)", raw_tool_call, re.DOTALL)
205
  if not tool_call_match:
206
  logging.warning("No valid tool call found in agent response.")
207
+ return {
208
+ "messages": [
209
+ 'No valid tool call found. Please format your response as `tool_name("argument")` or provide a `FINAL ANSWER:`.'
210
+ ],
211
+ "sender": "tools",
212
+ }
213
 
214
  tool_name = tool_call_match.group(1).strip()
215
  tool_input_str = tool_call_match.group(2).strip()
 
236
  }
237
  else:
238
  logging.warning(f"Tool '{tool_name}' not found.")
239
+ return {
240
+ "messages": [
241
+ f"Tool '{tool_name}' not found. Available tools are: web_search, math_calculator, image_analyzer, youtube_transcript_reader."
242
+ ],
243
+ "sender": "tools",
244
+ }
245
 
246
  def __call__(self, question: str) -> str:
247
+ logging.info(f"Agent received question (first 100 chars): {question[:100]}...")
248
+ try:
249
+ initial_state = {"question": question, "messages": [], "sender": "user"}
250
+ # Increased recursion limit for potentially complex questions
251
+ final_state = self.graph.invoke(initial_state, {"recursion_limit": 15})
252
+ final_response = final_state["messages"][-1]
253
 
254
+ match = re.search(
255
+ r"FINAL ANSWER:\s*(.*)", final_response, re.IGNORECASE | re.DOTALL
256
+ )
257
+ if match:
258
+ extracted_answer = match.group(1).strip()
259
+ logging.info(f"Agent returning final answer: {extracted_answer}")
260
+ return extracted_answer
261
+ else:
262
+ logging.warning(
263
+ "Agent could not find a final answer. Returning the last message."
264
+ )
265
+ # Fallback: return the last piece of the conversation if parsing fails
266
+ return final_response
267
+ except Exception as e:
268
+ logging.error(f"Error during agent invocation: {e}", exc_info=True)
269
+ return f"Error during agent invocation: {e}"
270
 
 
271
 
272
+ # --- Gradio App Logic (largely unchanged, but with enhanced logging) ---
 
 
 
 
 
 
 
 
 
 
 
 
 
273
 
274
 
275
  def run_and_submit_all(profile: gr.OAuthProfile | None):
 
287
  space_id = os.getenv("SPACE_ID")
288
  if not space_id:
289
  logging.error("SPACE_ID environment variable is not set. Cannot proceed.")
290
+ return (
291
+ "CRITICAL ERROR: SPACE_ID environment variable is not set. Cannot generate submission.",
292
+ None,
293
+ )
294
 
295
  api_url = DEFAULT_API_URL
296
  questions_url = f"{api_url}/questions"
 
300
  try:
301
  agent = GaiaAgent()
302
  except Exception as e:
303
+ logging.critical(f"Fatal error instantiating agent: {e}", exc_info=True)
304
+ return f"Fatal error initializing agent: {e}", None
305
 
306
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
307
  logging.info(f"Agent code URL: {agent_code}")
 
316
  logging.warning("Fetched questions list is empty.")
317
  return "Fetched questions list is empty.", None
318
  logging.info(f"Fetched {len(questions_data)} questions.")
319
+ except Exception as e:
320
  logging.error(f"Error fetching questions: {e}")
321
  return f"Error fetching questions: {e}", None
322
 
 
324
  results_log = []
325
  answers_payload = []
326
  logging.info(f"Running agent on {len(questions_data)} questions...")
327
+ for i, item in enumerate(questions_data):
328
  task_id = item.get("task_id")
329
  question_text = item.get("question")
330
+ logging.info(
331
+ f"--- Processing question {i+1}/{len(questions_data)} (Task ID: {task_id}) ---"
332
+ )
333
  if not task_id or question_text is None:
334
  continue
335
 
 
392
  )
393
 
394
 
395
+ # --- Build Gradio Interface (UI text is maintained as requested) ---
396
  with gr.Blocks() as demo:
397
  gr.Markdown("# GAIA Agent Evaluation Runner")
398
  gr.Markdown(
399
  """
400
  **Instructions:**
401
+
402
  1. This Space contains a `langgraph`-based agent equipped with tools for web search, math, image analysis, and YouTube transcript reading.
403
  2. Log in to your Hugging Face account using the button below. Your HF username is used for the submission.
404
  3. Click 'Run Evaluation & Submit All Answers' to fetch the questions, run the agent, submit the answers, and see your score.
405
+
406
  ---
407
  **Disclaimer:**
408
+ Once you click the submit button, please be patient. The agent needs time to process all the questions, which can take several minutes.
409
  """
410
  )
411
 
 
424
  )
425
 
426
  if __name__ == "__main__":
 
427
  logging.basicConfig(
428
+ level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
 
 
429
  )
430
+ logging.info("App Starting...")
431
  demo.launch(debug=True, share=False)