leofltt commited on
Commit
8a4a946
·
1 Parent(s): 9fcd442

import fixes

Browse files
Files changed (1) hide show
  1. app.py +13 -43
app.py CHANGED
@@ -10,8 +10,8 @@ import numexpr
10
  from typing import TypedDict, Annotated
11
 
12
  # --- Langchain & HF Imports ---
13
- # CHANGED: Swapped local pipeline for Inference API and removed torch
14
- from langchain_huggingface import HuggingFaceInferenceAPI
15
  from langchain_community.tools import DuckDuckGoSearchRun
16
  from langchain_core.prompts import PromptTemplate
17
  from langchain_core.output_parsers import StrOutputParser
@@ -22,7 +22,6 @@ from langchain_community.document_loaders.youtube import YoutubeLoader
22
 
23
  # --- Constants ---
24
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
25
- # ADDED: A more robust prompt tailored for tool use with Llama 3
26
  SYSTEM_PROMPT = """You are a helpful and expert assistant named GAIA, designed to answer questions accurately.
27
 
28
  To do this, you have access to a set of tools. Based on the user's question, you must decide which tool to use, if any.
@@ -67,7 +66,6 @@ def math_calculator(expression: str) -> str:
67
  """Calculates the result of a mathematical expression. Use it for any math operation."""
68
  logging.info(f"--- Calling Math Calculator Tool with expression: {expression} ---")
69
  try:
70
- # Sanitize expression: allow only numbers, basic operators, and parentheses
71
  if not re.match(r"^[0-9\.\+\-\*\/\(\)\s]+$", expression):
72
  return "Error: Invalid characters in expression."
73
  result = numexpr.evaluate(expression).item()
@@ -87,8 +85,7 @@ def image_analyzer(image_url: str) -> str:
87
  logging.info(
88
  "--- Initializing Image Analyzer pipeline (lazy loading)... ---"
89
  )
90
- # This pipeline is small enough to be loaded on demand in a ZeroGPU space
91
- from transformers import pipeline
92
 
93
  image_to_text_pipeline = pipeline(
94
  "image-to-text", model="Salesforce/blip-image-captioning-base"
@@ -122,7 +119,6 @@ def youtube_transcript_reader(youtube_url: str) -> str:
122
  loader = YoutubeLoader.from_youtube_url(youtube_url, add_video_info=False)
123
  docs = loader.load()
124
  transcript = " ".join([doc.page_content for doc in docs])
125
- # Return a manageable chunk
126
  return transcript[:4000]
127
  except Exception as e:
128
  logging.error(f"Error reading YouTube transcript: {e}")
@@ -147,19 +143,17 @@ class GaiaAgent:
147
  youtube_transcript_reader,
148
  ]
149
 
150
- # CHANGED: Replaced local HuggingFacePipeline with HuggingFaceInferenceAPI
151
- # This uses the Hugging Face Serverless API, offloading the memory and compute.
152
- # It requires a HUGGING_FACE_HUB_TOKEN to be set in the Space secrets.
153
- logging.info("Initializing LLM via Inference API...")
154
- llm = HuggingFaceInferenceAPI(
155
- model_id="meta-llama/Meta-Llama-3-8B-Instruct",
156
- # repo_id="meta-llama/Meta-Llama-3-8B-Instruct", # Use repo_id if model_id gives issues
157
- task="text-generation",
158
- token=os.getenv("HUGGING_FACE_HUB_TOKEN"),
159
  )
160
  logging.info("LLM initialized successfully.")
161
 
162
- # Create the agent graph
163
  prompt = PromptTemplate(
164
  template=SYSTEM_PROMPT
165
  + "\nHere is the current conversation:\n{messages}\n\nQuestion: {question}",
@@ -214,7 +208,6 @@ class GaiaAgent:
214
  tool_name = tool_call_match.group(1).strip()
215
  tool_input_str = tool_call_match.group(2).strip()
216
 
217
- # Remove quotes from the input string if they exist
218
  if (tool_input_str.startswith('"') and tool_input_str.endswith('"')) or (
219
  tool_input_str.startswith("'") and tool_input_str.endswith("'")
220
  ):
@@ -247,7 +240,6 @@ class GaiaAgent:
247
  logging.info(f"Agent received question (first 100 chars): {question[:100]}...")
248
  try:
249
  initial_state = {"question": question, "messages": [], "sender": "user"}
250
- # Increased recursion limit for potentially complex questions
251
  final_state = self.graph.invoke(initial_state, {"recursion_limit": 15})
252
  final_response = final_state["messages"][-1]
253
 
@@ -262,28 +254,21 @@ class GaiaAgent:
262
  logging.warning(
263
  "Agent could not find a final answer. Returning the last message."
264
  )
265
- # Fallback: return the last piece of the conversation if parsing fails
266
  return final_response
267
  except Exception as e:
268
  logging.error(f"Error during agent invocation: {e}", exc_info=True)
269
  return f"Error during agent invocation: {e}"
270
 
271
 
272
- # --- Gradio App Logic (largely unchanged, but with enhanced logging) ---
273
 
274
 
275
  def run_and_submit_all(profile: gr.OAuthProfile | None):
276
- """
277
- Fetches all questions, runs the GaiaAgent on them, submits all answers,
278
- and displays the results.
279
- """
280
  if not profile:
281
  logging.warning("User not logged in.")
282
  return "Please Login to Hugging Face with the button.", None
283
-
284
  username = profile.username
285
  logging.info(f"User logged in: {username}")
286
-
287
  space_id = os.getenv("SPACE_ID")
288
  if not space_id:
289
  logging.error("SPACE_ID environment variable is not set. Cannot proceed.")
@@ -291,22 +276,16 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
291
  "CRITICAL ERROR: SPACE_ID environment variable is not set. Cannot generate submission.",
292
  None,
293
  )
294
-
295
  api_url = DEFAULT_API_URL
296
  questions_url = f"{api_url}/questions"
297
  submit_url = f"{api_url}/submit"
298
-
299
- # 1. Instantiate Agent
300
  try:
301
  agent = GaiaAgent()
302
  except Exception as e:
303
  logging.critical(f"Fatal error instantiating agent: {e}", exc_info=True)
304
  return f"Fatal error initializing agent: {e}", None
305
-
306
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
307
  logging.info(f"Agent code URL: {agent_code}")
308
-
309
- # 2. Fetch Questions
310
  logging.info(f"Fetching questions from: {questions_url}")
311
  try:
312
  response = requests.get(questions_url, timeout=20)
@@ -319,8 +298,6 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
319
  except Exception as e:
320
  logging.error(f"Error fetching questions: {e}")
321
  return f"Error fetching questions: {e}", None
322
-
323
- # 3. Run your Agent
324
  results_log = []
325
  answers_payload = []
326
  logging.info(f"Running agent on {len(questions_data)} questions...")
@@ -332,7 +309,6 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
332
  )
333
  if not task_id or question_text is None:
334
  continue
335
-
336
  try:
337
  submitted_answer = agent(question_text)
338
  answers_payload.append(
@@ -354,12 +330,9 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
354
  "Submitted Answer": f"AGENT ERROR: {e}",
355
  }
356
  )
357
-
358
  if not answers_payload:
359
  logging.warning("Agent did not produce any answers.")
360
  return "Agent did not produce any answers.", pd.DataFrame(results_log)
361
-
362
- # 4. Prepare and Submit
363
  submission_data = {
364
  "username": username.strip(),
365
  "agent_code": agent_code,
@@ -392,7 +365,7 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
392
  )
393
 
394
 
395
- # --- Build Gradio Interface (UI text is maintained as requested) ---
396
  with gr.Blocks() as demo:
397
  gr.Markdown("# GAIA Agent Evaluation Runner")
398
  gr.Markdown(
@@ -408,15 +381,12 @@ with gr.Blocks() as demo:
408
  Once you click the submit button, please be patient. The agent needs time to process all the questions, which can take several minutes.
409
  """
410
  )
411
-
412
  gr.LoginButton()
413
-
414
  run_button = gr.Button("Run Evaluation & Submit All Answers")
415
  status_output = gr.Textbox(
416
  label="Run Status / Submission Result", lines=5, interactive=False
417
  )
418
  results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
419
-
420
  run_button.click(
421
  fn=run_and_submit_all,
422
  outputs=[status_output, results_table],
 
10
  from typing import TypedDict, Annotated
11
 
12
  # --- Langchain & HF Imports ---
13
+ # VERIFIED AND CORRECT FINAL IMPORT
14
+ from langchain_community.llms import HuggingFaceHub
15
  from langchain_community.tools import DuckDuckGoSearchRun
16
  from langchain_core.prompts import PromptTemplate
17
  from langchain_core.output_parsers import StrOutputParser
 
22
 
23
  # --- Constants ---
24
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 
25
  SYSTEM_PROMPT = """You are a helpful and expert assistant named GAIA, designed to answer questions accurately.
26
 
27
  To do this, you have access to a set of tools. Based on the user's question, you must decide which tool to use, if any.
 
66
  """Calculates the result of a mathematical expression. Use it for any math operation."""
67
  logging.info(f"--- Calling Math Calculator Tool with expression: {expression} ---")
68
  try:
 
69
  if not re.match(r"^[0-9\.\+\-\*\/\(\)\s]+$", expression):
70
  return "Error: Invalid characters in expression."
71
  result = numexpr.evaluate(expression).item()
 
85
  logging.info(
86
  "--- Initializing Image Analyzer pipeline (lazy loading)... ---"
87
  )
88
+ from transformers.pipelines import pipeline
 
89
 
90
  image_to_text_pipeline = pipeline(
91
  "image-to-text", model="Salesforce/blip-image-captioning-base"
 
119
  loader = YoutubeLoader.from_youtube_url(youtube_url, add_video_info=False)
120
  docs = loader.load()
121
  transcript = " ".join([doc.page_content for doc in docs])
 
122
  return transcript[:4000]
123
  except Exception as e:
124
  logging.error(f"Error reading YouTube transcript: {e}")
 
143
  youtube_transcript_reader,
144
  ]
145
 
146
+ # --- THIS SECTION IS NOW CORRECT ---
147
+ logging.info("Initializing LLM via HuggingFaceHub...")
148
+ # We use HuggingFaceHub which is the correct class for this job.
149
+ # Note the parameter names: repo_id, model_kwargs, and huggingfacehub_api_token.
150
+ llm = HuggingFaceHub(
151
+ repo_id="meta-llama/Meta-Llama-3-8B-Instruct",
152
+ model_kwargs={"temperature": 0.1, "max_new_tokens": 1024},
153
+ huggingfacehub_api_token=os.getenv("HUGGING_FACE_HUB_TOKEN"),
 
154
  )
155
  logging.info("LLM initialized successfully.")
156
 
 
157
  prompt = PromptTemplate(
158
  template=SYSTEM_PROMPT
159
  + "\nHere is the current conversation:\n{messages}\n\nQuestion: {question}",
 
208
  tool_name = tool_call_match.group(1).strip()
209
  tool_input_str = tool_call_match.group(2).strip()
210
 
 
211
  if (tool_input_str.startswith('"') and tool_input_str.endswith('"')) or (
212
  tool_input_str.startswith("'") and tool_input_str.endswith("'")
213
  ):
 
240
  logging.info(f"Agent received question (first 100 chars): {question[:100]}...")
241
  try:
242
  initial_state = {"question": question, "messages": [], "sender": "user"}
 
243
  final_state = self.graph.invoke(initial_state, {"recursion_limit": 15})
244
  final_response = final_state["messages"][-1]
245
 
 
254
  logging.warning(
255
  "Agent could not find a final answer. Returning the last message."
256
  )
 
257
  return final_response
258
  except Exception as e:
259
  logging.error(f"Error during agent invocation: {e}", exc_info=True)
260
  return f"Error during agent invocation: {e}"
261
 
262
 
263
+ # --- Gradio App Logic (Unchanged) ---
264
 
265
 
266
  def run_and_submit_all(profile: gr.OAuthProfile | None):
 
 
 
 
267
  if not profile:
268
  logging.warning("User not logged in.")
269
  return "Please Login to Hugging Face with the button.", None
 
270
  username = profile.username
271
  logging.info(f"User logged in: {username}")
 
272
  space_id = os.getenv("SPACE_ID")
273
  if not space_id:
274
  logging.error("SPACE_ID environment variable is not set. Cannot proceed.")
 
276
  "CRITICAL ERROR: SPACE_ID environment variable is not set. Cannot generate submission.",
277
  None,
278
  )
 
279
  api_url = DEFAULT_API_URL
280
  questions_url = f"{api_url}/questions"
281
  submit_url = f"{api_url}/submit"
 
 
282
  try:
283
  agent = GaiaAgent()
284
  except Exception as e:
285
  logging.critical(f"Fatal error instantiating agent: {e}", exc_info=True)
286
  return f"Fatal error initializing agent: {e}", None
 
287
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
288
  logging.info(f"Agent code URL: {agent_code}")
 
 
289
  logging.info(f"Fetching questions from: {questions_url}")
290
  try:
291
  response = requests.get(questions_url, timeout=20)
 
298
  except Exception as e:
299
  logging.error(f"Error fetching questions: {e}")
300
  return f"Error fetching questions: {e}", None
 
 
301
  results_log = []
302
  answers_payload = []
303
  logging.info(f"Running agent on {len(questions_data)} questions...")
 
309
  )
310
  if not task_id or question_text is None:
311
  continue
 
312
  try:
313
  submitted_answer = agent(question_text)
314
  answers_payload.append(
 
330
  "Submitted Answer": f"AGENT ERROR: {e}",
331
  }
332
  )
 
333
  if not answers_payload:
334
  logging.warning("Agent did not produce any answers.")
335
  return "Agent did not produce any answers.", pd.DataFrame(results_log)
 
 
336
  submission_data = {
337
  "username": username.strip(),
338
  "agent_code": agent_code,
 
365
  )
366
 
367
 
368
+ # --- Build Gradio Interface (Unchanged) ---
369
  with gr.Blocks() as demo:
370
  gr.Markdown("# GAIA Agent Evaluation Runner")
371
  gr.Markdown(
 
381
  Once you click the submit button, please be patient. The agent needs time to process all the questions, which can take several minutes.
382
  """
383
  )
 
384
  gr.LoginButton()
 
385
  run_button = gr.Button("Run Evaluation & Submit All Answers")
386
  status_output = gr.Textbox(
387
  label="Run Status / Submission Result", lines=5, interactive=False
388
  )
389
  results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
 
390
  run_button.click(
391
  fn=run_and_submit_all,
392
  outputs=[status_output, results_table],