LamiaYT commited on
Commit
672de84
·
1 Parent(s): 4e482b6
Files changed (1) hide show
  1. app.py +149 -151
app.py CHANGED
@@ -19,7 +19,14 @@ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
19
 
20
  @tool
21
  def serper_search(query: str) -> str:
22
- """Enhanced search tool optimized for GAIA question types"""
 
 
 
 
 
 
 
23
  try:
24
  api_key = os.getenv("SERPER_API_KEY")
25
  if not api_key:
@@ -75,7 +82,14 @@ def serper_search(query: str) -> str:
75
 
76
  @tool
77
  def math_solver(problem: str) -> str:
78
- """Enhanced math solver for GAIA questions"""
 
 
 
 
 
 
 
79
  try:
80
  # Handle chess-related questions
81
  if "chess" in problem.lower():
@@ -105,7 +119,15 @@ def math_solver(problem: str) -> str:
105
 
106
  @tool
107
  def text_processor(text: str, operation: str = "reverse") -> str:
108
- """Enhanced text processing for GAIA questions"""
 
 
 
 
 
 
 
 
109
  try:
110
  # Handle specific reversed text question
111
  if "ecnetnes siht dnatsrednu uoy fi" in text.lower():
@@ -130,7 +152,15 @@ def text_processor(text: str, operation: str = "reverse") -> str:
130
 
131
  @tool
132
  def data_extractor(source: str, target: str) -> str:
133
- """Enhanced data extraction for GAIA questions"""
 
 
 
 
 
 
 
 
134
  try:
135
  # Handle botanical classification questions
136
  if "botanical" in target.lower() or "vegetable" in target.lower():
@@ -231,167 +261,135 @@ with gr.Blocks() as demo:
231
  test_btn.click(test_agent, inputs=question_input, outputs=output)
232
 
233
  # Full evaluation handler
234
- run_btn.click(run_and_submit_all, outputs=[status, results])
235
- def run_and_submit_all(profile: gr.OAuthProfile | None):
236
- """
237
- Fetches all questions, runs the GAIA Agent on them, submits all answers,
238
- and displays the results.
239
- """
240
- space_id = os.getenv("SPACE_ID")
241
 
242
- if profile:
243
- username = f"{profile.username}"
244
- print(f"User logged in: {username}")
245
- else:
246
- print("User not logged in.")
247
- return "Please Login to Hugging Face with the button.", None
248
 
249
- api_url = DEFAULT_API_URL
250
- questions_url = f"{api_url}/questions"
251
- submit_url = f"{api_url}/submit"
252
 
253
- # 1. Instantiate Agent
254
- try:
255
- agent = GAIAAgent()
256
- except Exception as e:
257
- print(f"Error instantiating agent: {e}")
258
- return f"Error initializing agent: {e}", None
259
-
260
- agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
261
- print(agent_code)
262
-
263
- # 2. Fetch Questions
264
- print(f"Fetching questions from: {questions_url}")
265
- try:
266
- response = requests.get(questions_url, timeout=15)
267
- response.raise_for_status()
268
- questions_data = response.json()
269
- if not questions_data:
270
- print("Fetched questions list is empty.")
271
- return "Fetched questions list is empty or invalid format.", None
272
- print(f"Fetched {len(questions_data)} questions.")
273
- except requests.exceptions.RequestException as e:
274
- print(f"Error fetching questions: {e}")
275
- return f"Error fetching questions: {e}", None
276
- except requests.exceptions.JSONDecodeError as e:
277
- print(f"Error decoding JSON response from questions endpoint: {e}")
278
- print(f"Response text: {response.text[:500]}")
279
- return f"Error decoding server response for questions: {e}", None
280
- except Exception as e:
281
- print(f"An unexpected error occurred fetching questions: {e}")
282
- return f"An unexpected error occurred fetching questions: {e}", None
283
-
284
- # 3. Run Agent
285
- results_log = []
286
- answers_payload = []
287
- print(f"Running agent on {len(questions_data)} questions...")
288
-
289
- for i, item in enumerate(questions_data):
290
- task_id = item.get("task_id")
291
- question_text = item.get("question")
292
- if not task_id or question_text is None:
293
- print(f"Skipping item with missing task_id or question: {item}")
294
- continue
295
-
296
- print(f"Processing question {i+1}/{len(questions_data)}: {task_id}")
297
  try:
298
- submitted_answer = agent(question_text)
299
- answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
300
- results_log.append({"Task ID": task_id, "Question": question_text[:100] + "...", "Submitted Answer": submitted_answer[:200] + "..."})
301
-
302
- # Add small delay to avoid rate limiting
303
- time.sleep(1)
304
-
305
  except Exception as e:
306
- print(f"Error running agent on task {task_id}: {e}")
307
- results_log.append({"Task ID": task_id, "Question": question_text[:100] + "...", "Submitted Answer": f"AGENT ERROR: {e}"})
308
 
309
- if not answers_payload:
310
- print("Agent did not produce any answers to submit.")
311
- return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
312
 
313
- # 4. Prepare Submission
314
- submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
315
- status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
316
- print(status_update)
317
-
318
- # 5. Submit
319
- print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
320
- try:
321
- response = requests.post(submit_url, json=submission_data, timeout=60)
322
- response.raise_for_status()
323
- result_data = response.json()
324
- final_status = (
325
- f"Submission Successful!\n"
326
- f"User: {result_data.get('username')}\n"
327
- f"Overall Score: {result_data.get('score', 'N/A')}% "
328
- f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
329
- f"Message: {result_data.get('message', 'No message received.')}"
330
- )
331
- print("Submission successful.")
332
- results_df = pd.DataFrame(results_log)
333
- return final_status, results_df
334
- except requests.exceptions.HTTPError as e:
335
- error_detail = f"Server responded with status {e.response.status_code}."
336
  try:
337
- error_json = e.response.json()
338
- error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
339
- except requests.exceptions.JSONDecodeError:
340
- error_detail += f" Response: {e.response.text[:500]}"
341
- status_message = f"Submission Failed: {error_detail}"
342
- print(status_message)
343
- results_df = pd.DataFrame(results_log)
344
- return status_message, results_df
345
- except requests.exceptions.Timeout:
346
- status_message = "Submission Failed: The request timed out."
347
- print(status_message)
348
- results_df = pd.DataFrame(results_log)
349
- return status_message, results_df
350
- except requests.exceptions.RequestException as e:
351
- status_message = f"Submission Failed: Network error - {e}"
352
- print(status_message)
353
- results_df = pd.DataFrame(results_log)
354
- return status_message, results_df
355
- except Exception as e:
356
- status_message = f"An unexpected error occurred during submission: {e}"
357
- print(status_message)
358
- results_df = pd.DataFrame(results_log)
359
- return status_message, results_df
360
 
361
- # --- Build Gradio Interface ---
362
- with gr.Blocks() as demo:
363
- gr.Markdown("# GAIA Benchmark Agent")
364
- gr.Markdown(
365
- """
366
- **Enhanced Agent for GAIA Benchmark**
367
-
368
- This agent uses multiple specialized tools to handle diverse question types:
369
- - Web search (Serper API + DuckDuckGo)
370
- - Wikipedia search
371
- - YouTube video analysis
372
- - Text processing and reversal
373
- - Mathematical problem solving
374
- - Data extraction and botanical classification
375
-
376
- **Instructions:**
377
- 1. Log in to your Hugging Face account
378
- 2. Click 'Run Evaluation & Submit All Answers' to start the benchmark
379
- 3. The agent will process all questions and submit results automatically
380
 
381
- **Note:** Processing may take several minutes due to the complexity of questions.
382
- """
383
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
384
 
385
- gr.LoginButton()
 
 
386
 
387
- run_button = gr.Button("Run Evaluation & Submit All Answers", variant="primary")
 
 
 
388
 
389
- status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
390
- results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
391
 
392
- run_button.click(
393
- fn=run_and_submit_all,
394
- outputs=[status_output, results_table]
395
  )
396
 
397
  if __name__ == "__main__":
 
19
 
20
  @tool
21
  def serper_search(query: str) -> str:
22
+ """Enhanced search tool optimized for GAIA question types
23
+
24
+ Args:
25
+ query: The search query to execute
26
+
27
+ Returns:
28
+ Search results as a formatted string
29
+ """
30
  try:
31
  api_key = os.getenv("SERPER_API_KEY")
32
  if not api_key:
 
82
 
83
  @tool
84
  def math_solver(problem: str) -> str:
85
+ """Enhanced math solver for GAIA questions
86
+
87
+ Args:
88
+ problem: The mathematical problem to solve
89
+
90
+ Returns:
91
+ Solution or analysis of the problem
92
+ """
93
  try:
94
  # Handle chess-related questions
95
  if "chess" in problem.lower():
 
119
 
120
  @tool
121
  def text_processor(text: str, operation: str = "reverse") -> str:
122
+ """Enhanced text processing for GAIA questions
123
+
124
+ Args:
125
+ text: The text to process
126
+ operation: The operation to perform (reverse, extract, etc.)
127
+
128
+ Returns:
129
+ Processed text result
130
+ """
131
  try:
132
  # Handle specific reversed text question
133
  if "ecnetnes siht dnatsrednu uoy fi" in text.lower():
 
152
 
153
  @tool
154
  def data_extractor(source: str, target: str) -> str:
155
+ """Enhanced data extraction for GAIA questions
156
+
157
+ Args:
158
+ source: The source data to extract from
159
+ target: The type of data to extract
160
+
161
+ Returns:
162
+ Extracted data as a string
163
+ """
164
  try:
165
  # Handle botanical classification questions
166
  if "botanical" in target.lower() or "vegetable" in target.lower():
 
261
  test_btn.click(test_agent, inputs=question_input, outputs=output)
262
 
263
  # Full evaluation handler
264
+ def run_and_submit_all(profile: gr.OAuthProfile | None):
265
+ """
266
+ Fetches all questions, runs the GAIA Agent on them, submits all answers,
267
+ and displays the results.
268
+ """
269
+ space_id = os.getenv("SPACE_ID")
 
270
 
271
+ if profile:
272
+ username = f"{profile.username}"
273
+ print(f"User logged in: {username}")
274
+ else:
275
+ print("User not logged in.")
276
+ return "Please Login to Hugging Face with the button.", None
277
 
278
+ api_url = DEFAULT_API_URL
279
+ questions_url = f"{api_url}/questions"
280
+ submit_url = f"{api_url}/submit"
281
 
282
+ # 1. Instantiate Agent
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
283
  try:
284
+ agent = GAIAAgent()
 
 
 
 
 
 
285
  except Exception as e:
286
+ print(f"Error instantiating agent: {e}")
287
+ return f"Error initializing agent: {e}", None
288
 
289
+ agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
290
+ print(agent_code)
 
291
 
292
+ # 2. Fetch Questions
293
+ print(f"Fetching questions from: {questions_url}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
294
  try:
295
+ response = requests.get(questions_url, timeout=15)
296
+ response.raise_for_status()
297
+ questions_data = response.json()
298
+ if not questions_data:
299
+ print("Fetched questions list is empty.")
300
+ return "Fetched questions list is empty or invalid format.", None
301
+ print(f"Fetched {len(questions_data)} questions.")
302
+ except requests.exceptions.RequestException as e:
303
+ print(f"Error fetching questions: {e}")
304
+ return f"Error fetching questions: {e}", None
305
+ except requests.exceptions.JSONDecodeError as e:
306
+ print(f"Error decoding JSON response from questions endpoint: {e}")
307
+ print(f"Response text: {response.text[:500]}")
308
+ return f"Error decoding server response for questions: {e}", None
309
+ except Exception as e:
310
+ print(f"An unexpected error occurred fetching questions: {e}")
311
+ return f"An unexpected error occurred fetching questions: {e}", None
 
 
 
 
 
 
312
 
313
+ # 3. Run Agent
314
+ results_log = []
315
+ answers_payload = []
316
+ print(f"Running agent on {len(questions_data)} questions...")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
317
 
318
+ for i, item in enumerate(questions_data):
319
+ task_id = item.get("task_id")
320
+ question_text = item.get("question")
321
+ if not task_id or question_text is None:
322
+ print(f"Skipping item with missing task_id or question: {item}")
323
+ continue
324
+
325
+ print(f"Processing question {i+1}/{len(questions_data)}: {task_id}")
326
+ try:
327
+ submitted_answer = agent(question_text)
328
+ answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
329
+ results_log.append({"Task ID": task_id, "Question": question_text[:100] + "...", "Submitted Answer": submitted_answer[:200] + "..."})
330
+
331
+ # Add small delay to avoid rate limiting
332
+ time.sleep(1)
333
+
334
+ except Exception as e:
335
+ print(f"Error running agent on task {task_id}: {e}")
336
+ results_log.append({"Task ID": task_id, "Question": question_text[:100] + "...", "Submitted Answer": f"AGENT ERROR: {e}"})
337
 
338
+ if not answers_payload:
339
+ print("Agent did not produce any answers to submit.")
340
+ return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
341
 
342
+ # 4. Prepare Submission
343
+ submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
344
+ status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
345
+ print(status_update)
346
 
347
+ # 5. Submit
348
+ print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
349
+ try:
350
+ response = requests.post(submit_url, json=submission_data, timeout=60)
351
+ response.raise_for_status()
352
+ result_data = response.json()
353
+ final_status = (
354
+ f"Submission Successful!\n"
355
+ f"User: {result_data.get('username')}\n"
356
+ f"Overall Score: {result_data.get('score', 'N/A')}% "
357
+ f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
358
+ f"Message: {result_data.get('message', 'No message received.')}"
359
+ )
360
+ print("Submission successful.")
361
+ results_df = pd.DataFrame(results_log)
362
+ return final_status, results_df
363
+ except requests.exceptions.HTTPError as e:
364
+ error_detail = f"Server responded with status {e.response.status_code}."
365
+ try:
366
+ error_json = e.response.json()
367
+ error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
368
+ except requests.exceptions.JSONDecodeError:
369
+ error_detail += f" Response: {e.response.text[:500]}"
370
+ status_message = f"Submission Failed: {error_detail}"
371
+ print(status_message)
372
+ results_df = pd.DataFrame(results_log)
373
+ return status_message, results_df
374
+ except requests.exceptions.Timeout:
375
+ status_message = "Submission Failed: The request timed out."
376
+ print(status_message)
377
+ results_df = pd.DataFrame(results_log)
378
+ return status_message, results_df
379
+ except requests.exceptions.RequestException as e:
380
+ status_message = f"Submission Failed: Network error - {e}"
381
+ print(status_message)
382
+ results_df = pd.DataFrame(results_log)
383
+ return status_message, results_df
384
+ except Exception as e:
385
+ status_message = f"An unexpected error occurred during submission: {e}"
386
+ print(status_message)
387
+ results_df = pd.DataFrame(results_log)
388
+ return status_message, results_df
389
 
390
+ run_btn.click(
391
+ run_and_submit_all,
392
+ outputs=[status, results]
393
  )
394
 
395
  if __name__ == "__main__":