Spaces:
Runtime error
Runtime error
Deploy GAIA agent
Browse files- app.py +429 -507
- requirements.txt +33 -15
app.py
CHANGED
@@ -1,590 +1,512 @@
|
|
1 |
-
# app.py - Production-Ready GAIA Agent with Robust Error Handling
|
2 |
-
|
3 |
import os
|
4 |
import gradio as gr
|
5 |
import requests
|
|
|
6 |
import pandas as pd
|
7 |
-
import traceback
|
8 |
-
import torch
|
9 |
-
import re
|
10 |
import json
|
11 |
-
import
|
12 |
-
import
|
13 |
-
import
|
14 |
-
from
|
15 |
-
import
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
try:
|
23 |
-
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
|
24 |
-
HF_AVAILABLE = True
|
25 |
-
except ImportError:
|
26 |
-
logger.warning("Transformers not available")
|
27 |
-
HF_AVAILABLE = False
|
28 |
-
|
29 |
-
try:
|
30 |
-
import requests
|
31 |
-
from bs4 import BeautifulSoup
|
32 |
-
WEB_SCRAPING_AVAILABLE = True
|
33 |
-
except ImportError:
|
34 |
-
logger.warning("Web scraping dependencies not available")
|
35 |
-
WEB_SCRAPING_AVAILABLE = False
|
36 |
-
|
37 |
-
try:
|
38 |
-
from sympy import sympify, simplify, N, solve
|
39 |
-
from sympy.core.sympify import SympifyError
|
40 |
-
SYMPY_AVAILABLE = True
|
41 |
-
except ImportError:
|
42 |
-
logger.warning("SymPy not available")
|
43 |
-
SYMPY_AVAILABLE = False
|
44 |
|
45 |
# --- Constants ---
|
46 |
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
47 |
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
'srsearch': clean_query,
|
78 |
-
'srlimit': 3
|
79 |
-
}
|
80 |
-
|
81 |
-
response = self.session.get(search_api, params=params, timeout=10)
|
82 |
-
if response.status_code == 200:
|
83 |
-
data = response.json()
|
84 |
-
results = data.get('query', {}).get('search', [])
|
85 |
-
if results:
|
86 |
-
titles = [r['title'] for r in results[:3]]
|
87 |
-
return f"Wikipedia search results: {', '.join(titles)}"
|
88 |
-
|
89 |
-
return "Wikipedia search failed"
|
90 |
-
|
91 |
-
except Exception as e:
|
92 |
-
logger.error(f"Wikipedia search error: {e}")
|
93 |
-
return f"Wikipedia search error: {str(e)}"
|
94 |
-
|
95 |
-
def search_basic_web(self, query: str) -> str:
|
96 |
-
"""Basic web search using public APIs"""
|
97 |
-
try:
|
98 |
-
# Try searching for specific patterns
|
99 |
-
if "mercedes sosa" in query.lower():
|
100 |
-
return self._search_mercedes_sosa_albums()
|
101 |
-
elif "bird species" in query.lower() and "youtube" in query.lower():
|
102 |
-
return self._analyze_youtube_video(query)
|
103 |
-
elif "malko competition" in query.lower():
|
104 |
-
return self._search_malko_competition()
|
105 |
-
else:
|
106 |
-
return self.search_wikipedia(query)
|
107 |
-
|
108 |
-
except Exception as e:
|
109 |
-
return f"Web search failed: {str(e)}"
|
110 |
-
|
111 |
-
def _search_mercedes_sosa_albums(self) -> str:
|
112 |
-
"""Specific search for Mercedes Sosa discography"""
|
113 |
-
return """Mercedes Sosa Albums 2000-2009:
|
114 |
-
Based on discography information:
|
115 |
-
- "Misa Criolla" (2000)
|
116 |
-
- "Cantora 1" (2009)
|
117 |
-
- Several compilation albums but limited new studio releases
|
118 |
-
- Total studio albums in this period: approximately 2-3"""
|
119 |
-
|
120 |
-
def _analyze_youtube_video(self, query: str) -> str:
|
121 |
-
"""Analyze YouTube video for bird species"""
|
122 |
-
video_match = re.search(r'youtube\.com/watch\?v=([a-zA-Z0-9_-]+)', query)
|
123 |
-
if video_match:
|
124 |
-
video_id = video_match.group(1)
|
125 |
-
return f"Cannot directly analyze YouTube video {video_id} content. Would need video analysis tools to count bird species simultaneously on camera."
|
126 |
-
return "Cannot analyze YouTube video without direct access"
|
127 |
-
|
128 |
-
def _search_malko_competition(self) -> str:
|
129 |
-
"""Search for Malko competition information"""
|
130 |
-
return """Herbert von Karajan International Conducting Competition (Malko Competition):
|
131 |
-
- Annual conducting competition
|
132 |
-
- Winners from various countries
|
133 |
-
- Some winners from countries that no longer exist (Soviet Union, Yugoslavia)
|
134 |
-
- Would need specific year and winner list to determine exact nationality"""
|
135 |
-
|
136 |
-
class EnhancedCalculator:
|
137 |
-
"""Enhanced calculator with multiple calculation strategies"""
|
138 |
-
|
139 |
-
def calculate(self, expression: str) -> str:
|
140 |
-
"""Perform calculations with multiple fallback methods"""
|
141 |
-
try:
|
142 |
-
# Check if it's actually a math problem
|
143 |
-
if not self._is_math_expression(expression):
|
144 |
-
return "This doesn't appear to be a mathematical expression"
|
145 |
-
|
146 |
-
# Clean the expression
|
147 |
-
clean_expr = self._clean_expression(expression)
|
148 |
-
|
149 |
-
# Try basic evaluation
|
150 |
-
try:
|
151 |
-
if self._is_safe_expression(clean_expr):
|
152 |
-
result = eval(clean_expr)
|
153 |
-
return f"Result: {result}"
|
154 |
-
except:
|
155 |
-
pass
|
156 |
-
|
157 |
-
# Try SymPy if available
|
158 |
-
if SYMPY_AVAILABLE:
|
159 |
-
try:
|
160 |
-
expr = sympify(clean_expr)
|
161 |
-
result = simplify(expr)
|
162 |
-
numerical = N(result, 8)
|
163 |
-
return f"Mathematical result: {numerical}"
|
164 |
-
except:
|
165 |
-
pass
|
166 |
-
|
167 |
-
# Try basic arithmetic parsing
|
168 |
-
return self._parse_arithmetic(clean_expr)
|
169 |
-
|
170 |
-
except Exception as e:
|
171 |
-
return f"Calculation error: {str(e)}"
|
172 |
-
|
173 |
-
def _is_math_expression(self, text: str) -> bool:
|
174 |
-
"""Check if text contains mathematical expressions"""
|
175 |
-
math_indicators = ['+', '-', '*', '/', '=', '%', 'calculate', 'solve', 'equation']
|
176 |
-
return any(indicator in text.lower() for indicator in math_indicators)
|
177 |
-
|
178 |
-
def _clean_expression(self, expr: str) -> str:
|
179 |
-
"""Clean mathematical expression"""
|
180 |
-
expr = expr.replace('^', '**').replace('ร', '*').replace('รท', '/')
|
181 |
-
expr = re.sub(r'(\d)\s*\(', r'\1*(', expr)
|
182 |
-
return expr
|
183 |
-
|
184 |
-
def _is_safe_expression(self, expr: str) -> bool:
|
185 |
-
"""Check if expression is safe to evaluate"""
|
186 |
allowed_chars = set('0123456789+-*/.() ')
|
187 |
-
|
188 |
-
|
189 |
-
def _parse_arithmetic(self, expr: str) -> str:
|
190 |
-
"""Parse basic arithmetic expressions"""
|
191 |
-
try:
|
192 |
-
# Simple addition/subtraction/multiplication/division
|
193 |
-
if '+' in expr:
|
194 |
-
parts = expr.split('+')
|
195 |
-
if len(parts) == 2:
|
196 |
-
result = float(parts[0].strip()) + float(parts[1].strip())
|
197 |
-
return f"Addition result: {result}"
|
198 |
-
elif '-' in expr and expr.count('-') == 1:
|
199 |
-
parts = expr.split('-')
|
200 |
-
if len(parts) == 2:
|
201 |
-
result = float(parts[0].strip()) - float(parts[1].strip())
|
202 |
-
return f"Subtraction result: {result}"
|
203 |
-
elif '*' in expr:
|
204 |
-
parts = expr.split('*')
|
205 |
-
if len(parts) == 2:
|
206 |
-
result = float(parts[0].strip()) * float(parts[1].strip())
|
207 |
-
return f"Multiplication result: {result}"
|
208 |
-
elif '/' in expr:
|
209 |
-
parts = expr.split('/')
|
210 |
-
if len(parts) == 2:
|
211 |
-
result = float(parts[0].strip()) / float(parts[1].strip())
|
212 |
-
return f"Division result: {result}"
|
213 |
-
except:
|
214 |
-
pass
|
215 |
|
216 |
-
|
217 |
-
|
218 |
-
|
219 |
-
|
220 |
-
|
221 |
-
def __init__(self):
|
222 |
-
self.pipeline = None
|
223 |
-
if HF_AVAILABLE:
|
224 |
-
try:
|
225 |
-
# Use a very small, reliable model
|
226 |
-
self.pipeline = pipeline(
|
227 |
-
"text-generation",
|
228 |
-
model="gpt2",
|
229 |
-
device=-1, # CPU only
|
230 |
-
torch_dtype=torch.float32
|
231 |
-
)
|
232 |
-
logger.info("Loaded GPT-2 for text generation")
|
233 |
-
except Exception as e:
|
234 |
-
logger.error(f"Failed to load text generation model: {e}")
|
235 |
-
|
236 |
-
def generate_response(self, prompt: str, max_length: int = 150) -> str:
|
237 |
-
"""Generate a response to the prompt"""
|
238 |
-
try:
|
239 |
-
if self.pipeline:
|
240 |
-
# Generate with conservative settings
|
241 |
-
result = self.pipeline(
|
242 |
-
prompt,
|
243 |
-
max_length=max_length,
|
244 |
-
num_return_sequences=1,
|
245 |
-
temperature=0.7,
|
246 |
-
do_sample=True,
|
247 |
-
pad_token_id=50256
|
248 |
-
)
|
249 |
-
return result[0]['generated_text'][len(prompt):].strip()
|
250 |
-
else:
|
251 |
-
return "Text generation not available"
|
252 |
-
except Exception as e:
|
253 |
-
logger.error(f"Text generation error: {e}")
|
254 |
-
return f"Generation error: {str(e)}"
|
255 |
|
256 |
-
|
257 |
-
|
258 |
-
|
259 |
-
|
260 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
261 |
|
262 |
-
|
263 |
-
self.searcher = RobustWebSearcher()
|
264 |
-
self.calculator = EnhancedCalculator()
|
265 |
-
self.text_generator = SimpleTextGenerator()
|
266 |
|
267 |
-
#
|
268 |
-
|
269 |
-
|
270 |
-
|
271 |
-
'youtube': [r'youtube\.com', r'video'],
|
272 |
-
'wikipedia': [r'wikipedia', r'wiki'],
|
273 |
-
'biographical': [r'born', r'nationality', r'country']
|
274 |
-
}
|
275 |
|
276 |
-
|
277 |
-
|
278 |
-
|
279 |
-
|
280 |
-
|
|
|
|
|
|
|
|
|
|
|
281 |
|
282 |
-
|
283 |
-
|
284 |
-
|
|
|
|
|
|
|
|
|
285 |
|
286 |
-
|
287 |
-
|
288 |
-
|
289 |
-
|
290 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
291 |
|
292 |
-
|
293 |
-
logger.info(f"Question type: {question_type}")
|
294 |
|
295 |
-
|
296 |
-
|
297 |
-
|
298 |
-
elif question_type == 'youtube':
|
299 |
-
return self._handle_youtube_question(question)
|
300 |
-
elif question_type in ['factual', 'biographical', 'wikipedia']:
|
301 |
-
return self._handle_factual_question(question)
|
302 |
-
else:
|
303 |
-
return self._handle_general_question(question)
|
304 |
-
|
305 |
-
except Exception as e:
|
306 |
-
logger.error(f"Error processing question: {e}")
|
307 |
-
return f"Error processing question: {str(e)}"
|
308 |
-
|
309 |
-
def _handle_mathematical_question(self, question: str) -> str:
|
310 |
-
"""Handle mathematical questions"""
|
311 |
-
logger.info("Handling mathematical question")
|
312 |
-
result = self.calculator.calculate(question)
|
313 |
|
314 |
-
|
315 |
-
|
316 |
-
|
|
|
317 |
|
318 |
-
|
319 |
-
|
320 |
-
|
321 |
-
|
322 |
-
logger.info("Handling YouTube question")
|
323 |
|
324 |
-
|
325 |
-
|
326 |
-
if video_match:
|
327 |
-
video_id = video_match.group(1)
|
328 |
|
329 |
-
|
330 |
-
|
331 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
332 |
|
333 |
-
|
334 |
-
|
335 |
-
def _handle_factual_question(self, question: str) -> str:
|
336 |
-
"""Handle factual questions"""
|
337 |
-
logger.info("Handling factual question")
|
338 |
|
339 |
-
|
340 |
-
|
|
|
341 |
|
342 |
-
|
|
|
343 |
|
344 |
-
|
345 |
-
|
346 |
-
return self._provide_contextual_answer(question)
|
347 |
|
348 |
-
|
349 |
-
|
350 |
-
|
351 |
-
"""Handle general questions"""
|
352 |
-
logger.info("Handling general question")
|
353 |
|
354 |
-
|
355 |
-
|
|
|
356 |
|
357 |
-
|
358 |
-
return
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
359 |
|
360 |
-
#
|
361 |
-
|
362 |
-
|
363 |
-
|
364 |
-
|
365 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
366 |
|
367 |
-
#
|
368 |
-
|
369 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
370 |
|
371 |
-
|
372 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
373 |
|
374 |
-
|
375 |
-
|
|
|
376 |
|
377 |
-
|
378 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
379 |
|
380 |
-
|
381 |
-
"""Clean up memory and cache"""
|
382 |
-
try:
|
383 |
-
if torch.cuda.is_available():
|
384 |
-
torch.cuda.empty_cache()
|
385 |
-
logger.info("Memory cleaned")
|
386 |
-
except Exception as e:
|
387 |
-
logger.error(f"Memory cleanup error: {e}")
|
388 |
|
389 |
-
|
390 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
391 |
|
392 |
-
|
393 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
394 |
|
395 |
-
|
396 |
-
|
|
|
|
|
|
|
|
|
397 |
|
398 |
-
# API endpoints
|
399 |
api_url = DEFAULT_API_URL
|
400 |
questions_url = f"{api_url}/questions"
|
401 |
submit_url = f"{api_url}/submit"
|
402 |
-
|
403 |
-
cleanup_memory()
|
404 |
|
405 |
-
#
|
406 |
try:
|
407 |
-
|
408 |
-
agent = ProductionGAIAAgent()
|
409 |
-
logger.info("Agent initialized successfully")
|
410 |
except Exception as e:
|
411 |
-
|
412 |
-
|
413 |
-
return error_msg, None
|
414 |
|
415 |
-
# Get space info
|
416 |
-
space_id = os.getenv("SPACE_ID", "unknown")
|
417 |
agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
|
|
|
418 |
|
419 |
-
# Fetch
|
|
|
420 |
try:
|
421 |
-
|
422 |
-
response = requests.get(questions_url, timeout=30)
|
423 |
response.raise_for_status()
|
424 |
questions_data = response.json()
|
425 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
426 |
except Exception as e:
|
427 |
-
|
|
|
428 |
|
429 |
-
#
|
430 |
results_log = []
|
431 |
answers_payload = []
|
|
|
432 |
|
433 |
-
|
434 |
-
logger.info("๐ STARTING PRODUCTION GAIA EVALUATION")
|
435 |
-
logger.info("="*50)
|
436 |
-
|
437 |
-
for i, item in enumerate(questions_data, 1):
|
438 |
task_id = item.get("task_id")
|
439 |
question_text = item.get("question")
|
440 |
-
|
441 |
-
|
442 |
continue
|
443 |
-
|
444 |
-
|
445 |
-
logger.info(f"ID: {task_id}")
|
446 |
-
logger.info(f"Question: {question_text}")
|
447 |
|
448 |
try:
|
449 |
-
|
450 |
-
|
451 |
-
|
452 |
-
# Ensure answer quality
|
453 |
-
if not answer or len(answer.strip()) < 10:
|
454 |
-
answer = f"Unable to determine specific answer for: {question_text[:100]}..."
|
455 |
-
|
456 |
-
logger.info(f"Answer: {answer[:200]}...")
|
457 |
-
|
458 |
-
# Store results
|
459 |
-
answers_payload.append({
|
460 |
-
"task_id": task_id,
|
461 |
-
"submitted_answer": answer
|
462 |
-
})
|
463 |
-
|
464 |
results_log.append({
|
465 |
-
"Task ID": task_id,
|
466 |
-
"Question": question_text[:
|
467 |
-
"Answer":
|
468 |
})
|
469 |
-
|
470 |
-
# Memory management and rate limiting
|
471 |
-
if i % 3 == 0:
|
472 |
-
cleanup_memory()
|
473 |
-
logger.info("Cooling down...")
|
474 |
-
time.sleep(random.uniform(3, 6))
|
475 |
-
|
476 |
except Exception as e:
|
477 |
-
|
478 |
-
error_answer = f"
|
479 |
-
|
480 |
-
answers_payload.append({
|
481 |
-
"task_id": task_id,
|
482 |
-
"submitted_answer": error_answer
|
483 |
-
})
|
484 |
-
|
485 |
results_log.append({
|
486 |
-
"Task ID": task_id,
|
487 |
-
"Question": question_text[:
|
488 |
-
"Answer": error_answer
|
489 |
})
|
490 |
|
491 |
-
|
|
|
|
|
492 |
|
493 |
-
#
|
494 |
-
submission_data = {
|
495 |
-
|
496 |
-
|
497 |
-
|
498 |
-
|
499 |
-
|
500 |
try:
|
501 |
-
response = requests.post(submit_url, json=submission_data, timeout=
|
502 |
response.raise_for_status()
|
503 |
result_data = response.json()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
504 |
|
505 |
-
|
506 |
-
|
507 |
-
|
508 |
-
|
509 |
-
|
510 |
-
|
511 |
-
|
512 |
-
|
513 |
-
๐ค User: {username}
|
514 |
-
๐ฅ๏ธ Hardware: 2 vCPU + 16GB RAM (Production Optimized)
|
515 |
-
๐ค Architecture: Multi-strategy Agent with Robust Error Handling
|
516 |
-
๐ Final Score: {score}%
|
517 |
-
โ
Correct: {correct}/{total}
|
518 |
-
๐ฏ Target: 10%+ {'๐ SUCCESS!' if score >= 10 else '๐ Significant Improvement Expected'}
|
519 |
-
|
520 |
-
๐ Message: {message}
|
521 |
-
|
522 |
-
๐ง Production Features:
|
523 |
-
- โ
Robust error handling and fallbacks
|
524 |
-
- โ
Multiple search strategies (Wikipedia API, web scraping)
|
525 |
-
- โ
Smart question classification and routing
|
526 |
-
- โ
Enhanced calculator with SymPy support
|
527 |
-
- โ
Rate limiting and memory management
|
528 |
-
- โ
Contextual answers when search fails
|
529 |
-
- โ
Production-grade logging and monitoring
|
530 |
-
|
531 |
-
๐ก Strategy: Reliability, accuracy, and comprehensive coverage
|
532 |
-
"""
|
533 |
|
534 |
-
|
535 |
-
|
|
|
|
|
536 |
|
537 |
-
|
538 |
-
|
539 |
-
logger.error(error_msg)
|
540 |
-
return error_msg, pd.DataFrame(results_log)
|
541 |
-
|
542 |
-
# --- Gradio Interface ---
|
543 |
-
with gr.Blocks(title="Production GAIA Agent", theme=gr.themes.Default()) as demo:
|
544 |
-
gr.Markdown("# ๐ Production-Ready GAIA Agent")
|
545 |
-
gr.Markdown("""
|
546 |
-
**Production Features:**
|
547 |
-
- ๐ง **Robust Error Handling**: Multiple fallback strategies
|
548 |
-
- ๐ **Multi-Source Search**: Wikipedia API, web scraping, contextual answers
|
549 |
-
- ๐งฎ **Enhanced Calculator**: SymPy integration with basic arithmetic fallbacks
|
550 |
-
- ๐ฏ **Smart Routing**: Question classification for optimal processing
|
551 |
-
- โก **Memory Optimized**: Efficient resource usage for 2 vCPU + 16GB RAM
|
552 |
-
- ๐ **Production Logging**: Comprehensive monitoring and debugging
|
553 |
-
|
554 |
-
**Target: Achieve 10%+ accuracy on GAIA benchmark**
|
555 |
-
""")
|
556 |
-
|
557 |
-
with gr.Row():
|
558 |
-
gr.LoginButton()
|
559 |
-
|
560 |
-
with gr.Row():
|
561 |
-
run_button = gr.Button(
|
562 |
-
"๐ Run Production GAIA Evaluation",
|
563 |
-
variant="primary",
|
564 |
-
size="lg"
|
565 |
-
)
|
566 |
-
|
567 |
-
status_output = gr.Textbox(
|
568 |
-
label="๐ Evaluation Results",
|
569 |
-
lines=25,
|
570 |
-
interactive=False
|
571 |
-
)
|
572 |
-
|
573 |
-
results_table = gr.DataFrame(
|
574 |
-
label="๐ Detailed Results",
|
575 |
-
wrap=True
|
576 |
)
|
577 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
578 |
run_button.click(
|
579 |
fn=run_and_submit_all,
|
580 |
outputs=[status_output, results_table]
|
581 |
)
|
582 |
|
583 |
if __name__ == "__main__":
|
584 |
-
|
585 |
-
|
586 |
-
|
587 |
-
|
588 |
-
|
589 |
-
|
590 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import os
|
2 |
import gradio as gr
|
3 |
import requests
|
4 |
+
import inspect
|
5 |
import pandas as pd
|
|
|
|
|
|
|
6 |
import json
|
7 |
+
import re
|
8 |
+
import io
|
9 |
+
import base64
|
10 |
+
from PIL import Image
|
11 |
+
import matplotlib.pyplot as plt
|
12 |
+
import numpy as np
|
13 |
+
from pathlib import Path
|
14 |
+
|
15 |
+
# SmolaAgent imports
|
16 |
+
from smolagents import CodeAgent, tool, DuckDuckGoSearchTool, PythonInterpreterTool
|
17 |
+
from smolagents.models import LiteLLMModel
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
|
19 |
# --- Constants ---
|
20 |
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
21 |
|
22 |
+
# --- Enhanced Tools for GAIA ---
|
23 |
+
|
24 |
+
@tool
|
25 |
+
def web_search_tool(query: str) -> str:
|
26 |
+
"""
|
27 |
+
Search the web for information using DuckDuckGo.
|
28 |
+
Args:
|
29 |
+
query: The search query string
|
30 |
+
Returns:
|
31 |
+
String containing search results
|
32 |
+
"""
|
33 |
+
try:
|
34 |
+
search_tool = DuckDuckGoSearchTool()
|
35 |
+
results = search_tool(query)
|
36 |
+
return str(results)
|
37 |
+
except Exception as e:
|
38 |
+
return f"Search failed: {str(e)}"
|
39 |
+
|
40 |
+
@tool
|
41 |
+
def calculator_tool(expression: str) -> str:
|
42 |
+
"""
|
43 |
+
Evaluate mathematical expressions safely.
|
44 |
+
Args:
|
45 |
+
expression: Mathematical expression as string
|
46 |
+
Returns:
|
47 |
+
Result of the calculation
|
48 |
+
"""
|
49 |
+
try:
|
50 |
+
# Safe evaluation - only allow basic math operations
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
51 |
allowed_chars = set('0123456789+-*/.() ')
|
52 |
+
if not all(c in allowed_chars for c in expression.replace(' ', '')):
|
53 |
+
return "Error: Expression contains invalid characters"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
54 |
|
55 |
+
result = eval(expression)
|
56 |
+
return str(result)
|
57 |
+
except Exception as e:
|
58 |
+
return f"Calculation error: {str(e)}"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
59 |
|
60 |
+
@tool
|
61 |
+
def image_analyzer_tool(image_path: str) -> str:
|
62 |
+
"""
|
63 |
+
Analyze images and extract information.
|
64 |
+
Args:
|
65 |
+
image_path: Path to the image file
|
66 |
+
Returns:
|
67 |
+
Description of image content
|
68 |
+
"""
|
69 |
+
try:
|
70 |
+
if not os.path.exists(image_path):
|
71 |
+
return "Error: Image file not found"
|
72 |
|
73 |
+
img = Image.open(image_path)
|
|
|
|
|
|
|
74 |
|
75 |
+
# Basic image analysis
|
76 |
+
width, height = img.size
|
77 |
+
mode = img.mode
|
78 |
+
format_info = img.format if img.format else "Unknown"
|
|
|
|
|
|
|
|
|
79 |
|
80 |
+
# Simple color analysis
|
81 |
+
if mode == 'RGB':
|
82 |
+
colors = img.getcolors(maxcolors=256*256*256)
|
83 |
+
if colors:
|
84 |
+
dominant_color = max(colors, key=lambda x: x[0])[1]
|
85 |
+
color_info = f"Dominant color: RGB{dominant_color}"
|
86 |
+
else:
|
87 |
+
color_info = "Complex color palette"
|
88 |
+
else:
|
89 |
+
color_info = f"Color mode: {mode}"
|
90 |
|
91 |
+
analysis = f"""Image Analysis:
|
92 |
+
- Dimensions: {width}x{height} pixels
|
93 |
+
- Format: {format_info}
|
94 |
+
- {color_info}
|
95 |
+
- File size: {os.path.getsize(image_path)} bytes
|
96 |
+
"""
|
97 |
+
return analysis
|
98 |
|
99 |
+
except Exception as e:
|
100 |
+
return f"Image analysis error: {str(e)}"
|
101 |
+
|
102 |
+
@tool
|
103 |
+
def file_reader_tool(file_path: str) -> str:
|
104 |
+
"""
|
105 |
+
Read and analyze various file types (text, CSV, JSON, etc.).
|
106 |
+
Args:
|
107 |
+
file_path: Path to the file
|
108 |
+
Returns:
|
109 |
+
File content or analysis
|
110 |
+
"""
|
111 |
+
try:
|
112 |
+
if not os.path.exists(file_path):
|
113 |
+
return "Error: File not found"
|
114 |
|
115 |
+
file_ext = Path(file_path).suffix.lower()
|
|
|
116 |
|
117 |
+
if file_ext == '.csv':
|
118 |
+
df = pd.read_csv(file_path)
|
119 |
+
return f"CSV file with {len(df)} rows and {len(df.columns)} columns.\nColumns: {list(df.columns)}\nFirst 5 rows:\n{df.head().to_string()}"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
120 |
|
121 |
+
elif file_ext == '.json':
|
122 |
+
with open(file_path, 'r', encoding='utf-8') as f:
|
123 |
+
data = json.load(f)
|
124 |
+
return f"JSON file content:\n{json.dumps(data, indent=2)[:1000]}..."
|
125 |
|
126 |
+
elif file_ext in ['.txt', '.md', '.py', '.js', '.html', '.css']:
|
127 |
+
with open(file_path, 'r', encoding='utf-8') as f:
|
128 |
+
content = f.read()
|
129 |
+
return f"Text file content ({len(content)} characters):\n{content[:1000]}..."
|
|
|
130 |
|
131 |
+
else:
|
132 |
+
return f"Binary file: {file_ext}, size: {os.path.getsize(file_path)} bytes"
|
|
|
|
|
133 |
|
134 |
+
except Exception as e:
|
135 |
+
return f"File reading error: {str(e)}"
|
136 |
+
|
137 |
+
@tool
|
138 |
+
def data_processor_tool(data: str, operation: str) -> str:
|
139 |
+
"""
|
140 |
+
Process data with various operations (sort, filter, calculate statistics).
|
141 |
+
Args:
|
142 |
+
data: Data as string (JSON, CSV format, or numbers)
|
143 |
+
operation: Operation to perform (sort, sum, average, count, etc.)
|
144 |
+
Returns:
|
145 |
+
Processed data result
|
146 |
+
"""
|
147 |
+
try:
|
148 |
+
# Try to parse as JSON first
|
149 |
+
try:
|
150 |
+
parsed_data = json.loads(data)
|
151 |
+
except:
|
152 |
+
# Try to parse as numbers
|
153 |
+
try:
|
154 |
+
parsed_data = [float(x.strip()) for x in data.replace(',', ' ').split() if x.strip()]
|
155 |
+
except:
|
156 |
+
return "Error: Could not parse data"
|
157 |
|
158 |
+
if operation.lower() == 'sum' and isinstance(parsed_data, list):
|
159 |
+
return str(sum([x for x in parsed_data if isinstance(x, (int, float))]))
|
|
|
|
|
|
|
160 |
|
161 |
+
elif operation.lower() == 'average' and isinstance(parsed_data, list):
|
162 |
+
nums = [x for x in parsed_data if isinstance(x, (int, float))]
|
163 |
+
return str(sum(nums) / len(nums) if nums else 0)
|
164 |
|
165 |
+
elif operation.lower() == 'count':
|
166 |
+
return str(len(parsed_data))
|
167 |
|
168 |
+
elif operation.lower() == 'sort' and isinstance(parsed_data, list):
|
169 |
+
return str(sorted(parsed_data))
|
|
|
170 |
|
171 |
+
elif operation.lower() == 'max' and isinstance(parsed_data, list):
|
172 |
+
nums = [x for x in parsed_data if isinstance(x, (int, float))]
|
173 |
+
return str(max(nums) if nums else "No numbers found")
|
|
|
|
|
174 |
|
175 |
+
elif operation.lower() == 'min' and isinstance(parsed_data, list):
|
176 |
+
nums = [x for x in parsed_data if isinstance(x, (int, float))]
|
177 |
+
return str(min(nums) if nums else "No numbers found")
|
178 |
|
179 |
+
else:
|
180 |
+
return f"Unsupported operation: {operation}"
|
181 |
+
|
182 |
+
except Exception as e:
|
183 |
+
return f"Data processing error: {str(e)}"
|
184 |
+
|
185 |
+
# --- Enhanced GAIA Agent ---
|
186 |
+
class GAIAAgent:
|
187 |
+
def __init__(self):
|
188 |
+
print("GAIAAgent initialized with SmolaAgent framework.")
|
189 |
|
190 |
+
# Initialize model - using a lightweight model for resource efficiency
|
191 |
+
try:
|
192 |
+
# Use HuggingFace's free inference API or local model
|
193 |
+
self.model = LiteLLMModel(
|
194 |
+
model_id="microsoft/DialoGPT-medium", # Lightweight model
|
195 |
+
max_tokens=512,
|
196 |
+
temperature=0.1
|
197 |
+
)
|
198 |
+
except:
|
199 |
+
# Fallback to a basic model
|
200 |
+
print("Warning: Using fallback model configuration")
|
201 |
+
self.model = None
|
202 |
|
203 |
+
# Initialize tools
|
204 |
+
self.tools = [
|
205 |
+
web_search_tool,
|
206 |
+
calculator_tool,
|
207 |
+
image_analyzer_tool,
|
208 |
+
file_reader_tool,
|
209 |
+
data_processor_tool,
|
210 |
+
PythonInterpreterTool()
|
211 |
+
]
|
212 |
|
213 |
+
# Initialize the agent
|
214 |
+
try:
|
215 |
+
self.agent = CodeAgent(
|
216 |
+
tools=self.tools,
|
217 |
+
model=self.model,
|
218 |
+
max_iterations=5,
|
219 |
+
verbosity_level=1
|
220 |
+
)
|
221 |
+
except Exception as e:
|
222 |
+
print(f"Agent initialization error: {e}")
|
223 |
+
self.agent = None
|
224 |
+
|
225 |
+
def __call__(self, question: str) -> str:
|
226 |
+
print(f"GAIAAgent processing question: {question[:100]}...")
|
227 |
|
228 |
+
if not self.agent:
|
229 |
+
# Fallback logic if agent failed to initialize
|
230 |
+
return self._fallback_processing(question)
|
231 |
|
232 |
+
try:
|
233 |
+
# Enhanced prompt for GAIA tasks
|
234 |
+
enhanced_prompt = f"""
|
235 |
+
You are a helpful AI assistant designed to solve complex real-world problems that may require:
|
236 |
+
- Web searching for current information
|
237 |
+
- Mathematical calculations
|
238 |
+
- Image analysis
|
239 |
+
- File processing
|
240 |
+
- Multi-step reasoning
|
241 |
|
242 |
+
Question: {question}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
243 |
|
244 |
+
Please approach this systematically:
|
245 |
+
1. Analyze what type of problem this is
|
246 |
+
2. Determine what tools/information you need
|
247 |
+
3. Use available tools to gather information
|
248 |
+
4. Reason through the problem step by step
|
249 |
+
5. Provide a clear, concise final answer
|
250 |
+
|
251 |
+
Remember to be precise and factual in your response.
|
252 |
+
"""
|
253 |
+
|
254 |
+
response = self.agent.run(enhanced_prompt)
|
255 |
+
|
256 |
+
# Extract the final answer if it's in the response
|
257 |
+
if isinstance(response, str):
|
258 |
+
# Look for common answer patterns
|
259 |
+
answer_patterns = [
|
260 |
+
r"Final answer:?\s*(.+)",
|
261 |
+
r"Answer:?\s*(.+)",
|
262 |
+
r"The answer is:?\s*(.+)",
|
263 |
+
r"Result:?\s*(.+)"
|
264 |
+
]
|
265 |
+
|
266 |
+
for pattern in answer_patterns:
|
267 |
+
match = re.search(pattern, response, re.IGNORECASE)
|
268 |
+
if match:
|
269 |
+
return match.group(1).strip()
|
270 |
+
|
271 |
+
# If no pattern found, return the last sentence or the whole response
|
272 |
+
sentences = response.split('.')
|
273 |
+
return sentences[-1].strip() if sentences else response
|
274 |
+
|
275 |
+
return str(response)
|
276 |
+
|
277 |
+
except Exception as e:
|
278 |
+
print(f"Error in agent processing: {e}")
|
279 |
+
return self._fallback_processing(question)
|
280 |
|
281 |
+
def _fallback_processing(self, question: str) -> str:
|
282 |
+
"""Fallback processing when main agent fails"""
|
283 |
+
try:
|
284 |
+
# Simple heuristic-based processing
|
285 |
+
question_lower = question.lower()
|
286 |
+
|
287 |
+
# Math questions
|
288 |
+
if any(op in question for op in ['+', '-', '*', '/', 'calculate', 'sum', 'average']):
|
289 |
+
# Extract numbers and try basic calculation
|
290 |
+
numbers = re.findall(r'-?\d+\.?\d*', question)
|
291 |
+
if len(numbers) >= 2:
|
292 |
+
try:
|
293 |
+
if 'sum' in question_lower or '+' in question:
|
294 |
+
result = sum(float(n) for n in numbers)
|
295 |
+
return str(result)
|
296 |
+
elif 'average' in question_lower:
|
297 |
+
result = sum(float(n) for n in numbers) / len(numbers)
|
298 |
+
return str(result)
|
299 |
+
except:
|
300 |
+
pass
|
301 |
+
|
302 |
+
# Search-based questions
|
303 |
+
if any(word in question_lower for word in ['what', 'who', 'when', 'where', 'how', 'why']):
|
304 |
+
try:
|
305 |
+
search_result = web_search_tool(question)
|
306 |
+
# Extract key information from search results
|
307 |
+
lines = search_result.split('\n')
|
308 |
+
relevant_lines = [line for line in lines if len(line.strip()) > 20]
|
309 |
+
return relevant_lines[0] if relevant_lines else "Unable to find specific information"
|
310 |
+
except:
|
311 |
+
pass
|
312 |
+
|
313 |
+
# Default response
|
314 |
+
return "I need more context or tools to answer this question accurately."
|
315 |
+
|
316 |
+
except Exception as e:
|
317 |
+
return f"Processing error: {str(e)}"
|
318 |
+
|
319 |
+
def run_and_submit_all(profile: gr.OAuthProfile | None):
|
320 |
+
"""
|
321 |
+
Fetches all questions, runs the GAIAAgent on them, submits all answers,
|
322 |
+
and displays the results.
|
323 |
+
"""
|
324 |
+
# --- Determine HF Space Runtime URL and Repo URL ---
|
325 |
+
space_id = os.getenv("SPACE_ID")
|
326 |
|
327 |
+
if profile:
|
328 |
+
username = f"{profile.username}"
|
329 |
+
print(f"User logged in: {username}")
|
330 |
+
else:
|
331 |
+
print("User not logged in.")
|
332 |
+
return "Please Login to Hugging Face with the button.", None
|
333 |
|
|
|
334 |
api_url = DEFAULT_API_URL
|
335 |
questions_url = f"{api_url}/questions"
|
336 |
submit_url = f"{api_url}/submit"
|
|
|
|
|
337 |
|
338 |
+
# 1. Instantiate Agent
|
339 |
try:
|
340 |
+
agent = GAIAAgent()
|
|
|
|
|
341 |
except Exception as e:
|
342 |
+
print(f"Error instantiating agent: {e}")
|
343 |
+
return f"Error initializing agent: {e}", None
|
|
|
344 |
|
|
|
|
|
345 |
agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
|
346 |
+
print(agent_code)
|
347 |
|
348 |
+
# 2. Fetch Questions
|
349 |
+
print(f"Fetching questions from: {questions_url}")
|
350 |
try:
|
351 |
+
response = requests.get(questions_url, timeout=15)
|
|
|
352 |
response.raise_for_status()
|
353 |
questions_data = response.json()
|
354 |
+
if not questions_data:
|
355 |
+
print("Fetched questions list is empty.")
|
356 |
+
return "Fetched questions list is empty or invalid format.", None
|
357 |
+
print(f"Fetched {len(questions_data)} questions.")
|
358 |
+
except requests.exceptions.RequestException as e:
|
359 |
+
print(f"Error fetching questions: {e}")
|
360 |
+
return f"Error fetching questions: {e}", None
|
361 |
+
except requests.exceptions.JSONDecodeError as e:
|
362 |
+
print(f"Error decoding JSON response from questions endpoint: {e}")
|
363 |
+
print(f"Response text: {response.text[:500]}")
|
364 |
+
return f"Error decoding server response for questions: {e}", None
|
365 |
except Exception as e:
|
366 |
+
print(f"An unexpected error occurred fetching questions: {e}")
|
367 |
+
return f"An unexpected error occurred fetching questions: {e}", None
|
368 |
|
369 |
+
# 3. Run GAIA Agent
|
370 |
results_log = []
|
371 |
answers_payload = []
|
372 |
+
print(f"Running GAIA agent on {len(questions_data)} questions...")
|
373 |
|
374 |
+
for i, item in enumerate(questions_data):
|
|
|
|
|
|
|
|
|
375 |
task_id = item.get("task_id")
|
376 |
question_text = item.get("question")
|
377 |
+
if not task_id or question_text is None:
|
378 |
+
print(f"Skipping item with missing task_id or question: {item}")
|
379 |
continue
|
380 |
+
|
381 |
+
print(f"Processing question {i+1}/{len(questions_data)}: {task_id}")
|
|
|
|
|
382 |
|
383 |
try:
|
384 |
+
submitted_answer = agent(question_text)
|
385 |
+
answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
386 |
results_log.append({
|
387 |
+
"Task ID": task_id,
|
388 |
+
"Question": question_text[:100] + "..." if len(question_text) > 100 else question_text,
|
389 |
+
"Submitted Answer": submitted_answer
|
390 |
})
|
391 |
+
print(f"Answer for {task_id}: {submitted_answer[:50]}...")
|
|
|
|
|
|
|
|
|
|
|
|
|
392 |
except Exception as e:
|
393 |
+
print(f"Error running agent on task {task_id}: {e}")
|
394 |
+
error_answer = f"AGENT ERROR: {e}"
|
395 |
+
answers_payload.append({"task_id": task_id, "submitted_answer": error_answer})
|
|
|
|
|
|
|
|
|
|
|
396 |
results_log.append({
|
397 |
+
"Task ID": task_id,
|
398 |
+
"Question": question_text[:100] + "..." if len(question_text) > 100 else question_text,
|
399 |
+
"Submitted Answer": error_answer
|
400 |
})
|
401 |
|
402 |
+
if not answers_payload:
|
403 |
+
print("Agent did not produce any answers to submit.")
|
404 |
+
return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
|
405 |
|
406 |
+
# 4. Prepare Submission
|
407 |
+
submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
|
408 |
+
status_update = f"GAIA Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
|
409 |
+
print(status_update)
|
410 |
+
|
411 |
+
# 5. Submit
|
412 |
+
print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
|
413 |
try:
|
414 |
+
response = requests.post(submit_url, json=submission_data, timeout=60)
|
415 |
response.raise_for_status()
|
416 |
result_data = response.json()
|
417 |
+
final_status = (
|
418 |
+
f"Submission Successful!\n"
|
419 |
+
f"User: {result_data.get('username')}\n"
|
420 |
+
f"Overall Score: {result_data.get('score', 'N/A')}% "
|
421 |
+
f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
|
422 |
+
f"Message: {result_data.get('message', 'No message received.')}"
|
423 |
+
)
|
424 |
+
print("Submission successful.")
|
425 |
+
results_df = pd.DataFrame(results_log)
|
426 |
+
return final_status, results_df
|
427 |
+
except requests.exceptions.HTTPError as e:
|
428 |
+
error_detail = f"Server responded with status {e.response.status_code}."
|
429 |
+
try:
|
430 |
+
error_json = e.response.json()
|
431 |
+
error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
|
432 |
+
except requests.exceptions.JSONDecodeError:
|
433 |
+
error_detail += f" Response: {e.response.text[:500]}"
|
434 |
+
status_message = f"Submission Failed: {error_detail}"
|
435 |
+
print(status_message)
|
436 |
+
results_df = pd.DataFrame(results_log)
|
437 |
+
return status_message, results_df
|
438 |
+
except requests.exceptions.Timeout:
|
439 |
+
status_message = "Submission Failed: The request timed out."
|
440 |
+
print(status_message)
|
441 |
+
results_df = pd.DataFrame(results_log)
|
442 |
+
return status_message, results_df
|
443 |
+
except requests.exceptions.RequestException as e:
|
444 |
+
status_message = f"Submission Failed: Network error - {e}"
|
445 |
+
print(status_message)
|
446 |
+
results_df = pd.DataFrame(results_log)
|
447 |
+
return status_message, results_df
|
448 |
+
except Exception as e:
|
449 |
+
status_message = f"An unexpected error occurred during submission: {e}"
|
450 |
+
print(status_message)
|
451 |
+
results_df = pd.DataFrame(results_log)
|
452 |
+
return status_message, results_df
|
453 |
+
|
454 |
+
|
455 |
+
# --- Build Gradio Interface using Blocks ---
|
456 |
+
with gr.Blocks() as demo:
|
457 |
+
gr.Markdown("# GAIA Agent Evaluation Runner")
|
458 |
+
gr.Markdown(
|
459 |
+
"""
|
460 |
+
**Enhanced GAIA Agent with SmolaAgent Framework**
|
461 |
|
462 |
+
This agent is equipped with:
|
463 |
+
- ๐ Web search capabilities (DuckDuckGo)
|
464 |
+
- ๐งฎ Mathematical calculator
|
465 |
+
- ๐ผ๏ธ Image analysis
|
466 |
+
- ๐ File processing (CSV, JSON, text files)
|
467 |
+
- ๐ Data processing and statistics
|
468 |
+
- ๐ Python code execution
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
469 |
|
470 |
+
**Instructions:**
|
471 |
+
1. Log in to your Hugging Face account using the button below
|
472 |
+
2. Click 'Run GAIA Evaluation & Submit All Answers' to start the evaluation
|
473 |
+
3. The agent will process each question systematically using available tools
|
474 |
|
475 |
+
**Note:** Processing may take time as the agent analyzes each question thoroughly.
|
476 |
+
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
477 |
)
|
478 |
|
479 |
+
gr.LoginButton()
|
480 |
+
|
481 |
+
run_button = gr.Button("Run GAIA Evaluation & Submit All Answers", variant="primary")
|
482 |
+
|
483 |
+
status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
|
484 |
+
results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
|
485 |
+
|
486 |
run_button.click(
|
487 |
fn=run_and_submit_all,
|
488 |
outputs=[status_output, results_table]
|
489 |
)
|
490 |
|
491 |
if __name__ == "__main__":
|
492 |
+
print("\n" + "-"*30 + " GAIA Agent Starting " + "-"*30)
|
493 |
+
|
494 |
+
space_host_startup = os.getenv("SPACE_HOST")
|
495 |
+
space_id_startup = os.getenv("SPACE_ID")
|
496 |
+
|
497 |
+
if space_host_startup:
|
498 |
+
print(f"โ
SPACE_HOST found: {space_host_startup}")
|
499 |
+
print(f" Runtime URL should be: https://{space_host_startup}.hf.space")
|
500 |
+
else:
|
501 |
+
print("โน๏ธ SPACE_HOST environment variable not found (running locally?).")
|
502 |
+
|
503 |
+
if space_id_startup:
|
504 |
+
print(f"โ
SPACE_ID found: {space_id_startup}")
|
505 |
+
print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
|
506 |
+
print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
|
507 |
+
else:
|
508 |
+
print("โน๏ธ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
|
509 |
+
|
510 |
+
print("-"*(60 + len(" GAIA Agent Starting ")) + "\n")
|
511 |
+
print("Launching Gradio Interface for GAIA Agent Evaluation...")
|
512 |
+
demo.launch(debug=True, share=False)
|
requirements.txt
CHANGED
@@ -1,15 +1,33 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
|
4 |
-
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Core dependencies
|
2 |
+
gradio==4.44.0
|
3 |
+
requests==2.31.0
|
4 |
+
pandas==2.0.3
|
5 |
+
numpy==1.24.3
|
6 |
+
|
7 |
+
# SmolaAgent framework - lightweight agent framework
|
8 |
+
smolagents==0.3.3
|
9 |
+
|
10 |
+
# Image processing (lightweight)
|
11 |
+
Pillow==10.0.1
|
12 |
+
|
13 |
+
# Plotting (lightweight alternative to matplotlib)
|
14 |
+
matplotlib==3.7.2
|
15 |
+
|
16 |
+
# JSON and data processing
|
17 |
+
pathlib
|
18 |
+
|
19 |
+
# Web search
|
20 |
+
duckduckgo-search==3.9.6
|
21 |
+
|
22 |
+
# LLM integration (lightweight)
|
23 |
+
litellm==1.44.14
|
24 |
+
|
25 |
+
# Optional: For better performance with limited resources
|
26 |
+
psutil==5.9.5
|
27 |
+
|
28 |
+
# File processing utilities
|
29 |
+
openpyxl==3.1.2 # For Excel files if needed
|
30 |
+
python-magic==0.4.27 # For file type detection
|
31 |
+
|
32 |
+
# Math and scientific computing (minimal)
|
33 |
+
sympy==1.12 # For symbolic math if needed
|