Final_Assignment_Template

Runtime error

App Files Files Community

LamiaYT commited on Jun 26

Commit

7963312

1 Parent(s): 82a1534

Deploy GAIA agent

Browse files

Files changed (2) hide show

app.py +429 -507
requirements.txt +33 -15

app.py CHANGED Viewed

@@ -1,590 +1,512 @@
-# app.py - Production-Ready GAIA Agent with Robust Error Handling
 import os
 import gradio as gr
 import requests
 import pandas as pd
-import traceback
-import torch
-import re
 import json
-import time
-import random
-import urllib.parse
-from typing import Dict, List, Any
-import logging
-# Set up logging
-logging.basicConfig(level=logging.INFO)
-logger = logging.getLogger(__name__)
-# Import dependencies with better error handling
-try:
-    from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
-    HF_AVAILABLE = True
-except ImportError:
-    logger.warning("Transformers not available")
-    HF_AVAILABLE = False
-try:
-    import requests
-    from bs4 import BeautifulSoup
-    WEB_SCRAPING_AVAILABLE = True
-except ImportError:
-    logger.warning("Web scraping dependencies not available")
-    WEB_SCRAPING_AVAILABLE = False
-try:
-    from sympy import sympify, simplify, N, solve
-    from sympy.core.sympify import SympifyError
-    SYMPY_AVAILABLE = True
-except ImportError:
-    logger.warning("SymPy not available")
-    SYMPY_AVAILABLE = False
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
-class RobustWebSearcher:
-    """Robust web searcher with multiple fallback strategies"""
-    def __init__(self):
-        self.session = requests.Session()
-        self.session.headers.update({
-            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
-        })
-    def search_wikipedia(self, query: str) -> str:
-        """Search Wikipedia directly via API"""
-        try:
-            # Clean query for Wikipedia
-            clean_query = re.sub(r'[^\w\s]', ' ', query).strip()
-            # Wikipedia API search
-            search_url = "https://en.wikipedia.org/api/rest_v1/page/summary/" + urllib.parse.quote(clean_query)
-            response = self.session.get(search_url, timeout=10)
-            if response.status_code == 200:
-                data = response.json()
-                return f"Wikipedia: {data.get('extract', 'No summary available')}"
-            # Fallback to search API
-            search_api = "https://en.wikipedia.org/w/api.php"
-            params = {
-                'action': 'query',
-                'format': 'json',
-                'list': 'search',
-                'srsearch': clean_query,
-                'srlimit': 3
-            }
-            response = self.session.get(search_api, params=params, timeout=10)
-            if response.status_code == 200:
-                data = response.json()
-                results = data.get('query', {}).get('search', [])
-                if results:
-                    titles = [r['title'] for r in results[:3]]
-                    return f"Wikipedia search results: {', '.join(titles)}"
-            return "Wikipedia search failed"
-        except Exception as e:
-            logger.error(f"Wikipedia search error: {e}")
-            return f"Wikipedia search error: {str(e)}"
-    def search_basic_web(self, query: str) -> str:
-        """Basic web search using public APIs"""
-        try:
-            # Try searching for specific patterns
-            if "mercedes sosa" in query.lower():
-                return self._search_mercedes_sosa_albums()
-            elif "bird species" in query.lower() and "youtube" in query.lower():
-                return self._analyze_youtube_video(query)
-            elif "malko competition" in query.lower():
-                return self._search_malko_competition()
-            else:
-                return self.search_wikipedia(query)
-        except Exception as e:
-            return f"Web search failed: {str(e)}"
-    def _search_mercedes_sosa_albums(self) -> str:
-        """Specific search for Mercedes Sosa discography"""
-        return """Mercedes Sosa Albums 2000-2009:
-Based on discography information:
-- "Misa Criolla" (2000)
-- "Cantora 1" (2009)
-- Several compilation albums but limited new studio releases
-- Total studio albums in this period: approximately 2-3"""
-    def _analyze_youtube_video(self, query: str) -> str:
-        """Analyze YouTube video for bird species"""
-        video_match = re.search(r'youtube\.com/watch\?v=([a-zA-Z0-9_-]+)', query)
-        if video_match:
-            video_id = video_match.group(1)
-            return f"Cannot directly analyze YouTube video {video_id} content. Would need video analysis tools to count bird species simultaneously on camera."
-        return "Cannot analyze YouTube video without direct access"
-    def _search_malko_competition(self) -> str:
-        """Search for Malko competition information"""
-        return """Herbert von Karajan International Conducting Competition (Malko Competition):
-- Annual conducting competition
-- Winners from various countries
-- Some winners from countries that no longer exist (Soviet Union, Yugoslavia)
-- Would need specific year and winner list to determine exact nationality"""
-class EnhancedCalculator:
-    """Enhanced calculator with multiple calculation strategies"""
-    def calculate(self, expression: str) -> str:
-        """Perform calculations with multiple fallback methods"""
-        try:
-            # Check if it's actually a math problem
-            if not self._is_math_expression(expression):
-                return "This doesn't appear to be a mathematical expression"
-            # Clean the expression
-            clean_expr = self._clean_expression(expression)
-            # Try basic evaluation
-            try:
-                if self._is_safe_expression(clean_expr):
-                    result = eval(clean_expr)
-                    return f"Result: {result}"
-            except:
-                pass
-            # Try SymPy if available
-            if SYMPY_AVAILABLE:
-                try:
-                    expr = sympify(clean_expr)
-                    result = simplify(expr)
-                    numerical = N(result, 8)
-                    return f"Mathematical result: {numerical}"
-                except:
-                    pass
-            # Try basic arithmetic parsing
-            return self._parse_arithmetic(clean_expr)
-        except Exception as e:
-            return f"Calculation error: {str(e)}"
-    def _is_math_expression(self, text: str) -> bool:
-        """Check if text contains mathematical expressions"""
-        math_indicators = ['+', '-', '*', '/', '=', '%', 'calculate', 'solve', 'equation']
-        return any(indicator in text.lower() for indicator in math_indicators)
-    def _clean_expression(self, expr: str) -> str:
-        """Clean mathematical expression"""
-        expr = expr.replace('^', '**').replace('×', '*').replace('÷', '/')
-        expr = re.sub(r'(\d)\s*\(', r'\1*(', expr)
-        return expr
-    def _is_safe_expression(self, expr: str) -> bool:
-        """Check if expression is safe to evaluate"""
         allowed_chars = set('0123456789+-*/.() ')
-        return all(char in allowed_chars for char in expr)
-    def _parse_arithmetic(self, expr: str) -> str:
-        """Parse basic arithmetic expressions"""
-        try:
-            # Simple addition/subtraction/multiplication/division
-            if '+' in expr:
-                parts = expr.split('+')
-                if len(parts) == 2:
-                    result = float(parts[0].strip()) + float(parts[1].strip())
-                    return f"Addition result: {result}"
-            elif '-' in expr and expr.count('-') == 1:
-                parts = expr.split('-')
-                if len(parts) == 2:
-                    result = float(parts[0].strip()) - float(parts[1].strip())
-                    return f"Subtraction result: {result}"
-            elif '*' in expr:
-                parts = expr.split('*')
-                if len(parts) == 2:
-                    result = float(parts[0].strip()) * float(parts[1].strip())
-                    return f"Multiplication result: {result}"
-            elif '/' in expr:
-                parts = expr.split('/')
-                if len(parts) == 2:
-                    result = float(parts[0].strip()) / float(parts[1].strip())
-                    return f"Division result: {result}"
-        except:
-            pass
-        return f"Could not calculate: {expr}"
-class SimpleTextGenerator:
-    """Simple text generator without complex dependencies"""
-    def __init__(self):
-        self.pipeline = None
-        if HF_AVAILABLE:
-            try:
-                # Use a very small, reliable model
-                self.pipeline = pipeline(
-                    "text-generation",
-                    model="gpt2",
-                    device=-1,  # CPU only
-                    torch_dtype=torch.float32
-                )
-                logger.info("Loaded GPT-2 for text generation")
-            except Exception as e:
-                logger.error(f"Failed to load text generation model: {e}")
-    def generate_response(self, prompt: str, max_length: int = 150) -> str:
-        """Generate a response to the prompt"""
-        try:
-            if self.pipeline:
-                # Generate with conservative settings
-                result = self.pipeline(
-                    prompt,
-                    max_length=max_length,
-                    num_return_sequences=1,
-                    temperature=0.7,
-                    do_sample=True,
-                    pad_token_id=50256
-                )
-                return result[0]['generated_text'][len(prompt):].strip()
-            else:
-                return "Text generation not available"
-        except Exception as e:
-            logger.error(f"Text generation error: {e}")
-            return f"Generation error: {str(e)}"
-class ProductionGAIAAgent:
-    """Production-ready GAIA agent with robust error handling"""
-    def __init__(self):
-        logger.info("Initializing Production GAIA Agent...")
-        # Initialize components
-        self.searcher = RobustWebSearcher()
-        self.calculator = EnhancedCalculator()
-        self.text_generator = SimpleTextGenerator()
-        # Question type patterns
-        self.question_patterns = {
-            'mathematical': [r'\+', r'-', r'\*', r'/', r'calculate', r'solve', r'equation', r'percent', r'%'],
-            'factual': [r'who is', r'what is', r'when was', r'where is', r'how many'],
-            'youtube': [r'youtube\.com', r'video'],
-            'wikipedia': [r'wikipedia', r'wiki'],
-            'biographical': [r'born', r'nationality', r'country']
-        }
-        logger.info("Production GAIA Agent initialized successfully")
-    def classify_question(self, question: str) -> str:
-        """Classify question type for appropriate routing"""
-        question_lower = question.lower()
-        for question_type, patterns in self.question_patterns.items():
-            if any(re.search(pattern, question_lower) for pattern in patterns):
-                return question_type
-        return 'general'
-    def process_question(self, question: str) -> str:
-        """Process question with appropriate strategy"""
-        logger.info(f"Processing question: {question[:100]}...")
-        question_type = self.classify_question(question)
-        logger.info(f"Question type: {question_type}")
-        try:
-            if question_type == 'mathematical':
-                return self._handle_mathematical_question(question)
-            elif question_type == 'youtube':
-                return self._handle_youtube_question(question)
-            elif question_type in ['factual', 'biographical', 'wikipedia']:
-                return self._handle_factual_question(question)
-            else:
-                return self._handle_general_question(question)
-        except Exception as e:
-            logger.error(f"Error processing question: {e}")
-            return f"Error processing question: {str(e)}"
-    def _handle_mathematical_question(self, question: str) -> str:
-        """Handle mathematical questions"""
-        logger.info("Handling mathematical question")
-        result = self.calculator.calculate(question)
-        if "doesn't appear to be" in result:
-            # Maybe it's a factual question about numbers
-            return self._handle_factual_question(question)
-        return result
-    def _handle_youtube_question(self, question: str) -> str:
-        """Handle YouTube video questions"""
-        logger.info("Handling YouTube question")
-        # Extract video ID
-        video_match = re.search(r'youtube\.com/watch\?v=([a-zA-Z0-9_-]+)', question)
-        if video_match:
-            video_id = video_match.group(1)
-            # For bird species counting, provide a reasonable approach
-            if "bird species" in question.lower() and "simultaneously" in question.lower():
-                return f"Cannot directly analyze YouTube video {video_id} for simultaneous bird species count. This would require:\n1. Video frame analysis\n2. Species identification AI\n3. Temporal tracking\n\nWithout access to video analysis tools, cannot provide specific count."
-        return self.searcher.search_basic_web(question)
-    def _handle_factual_question(self, question: str) -> str:
-        """Handle factual questions"""
-        logger.info("Handling factual question")
-        # Add delay to avoid rate limiting
-        time.sleep(random.uniform(2, 4))
-        result = self.searcher.search_basic_web(question)
-        # If search failed, try to provide some context
-        if "failed" in result.lower() or "error" in result.lower():
-            return self._provide_contextual_answer(question)
-        return result
-    def _handle_general_question(self, question: str) -> str:
-        """Handle general questions"""
-        logger.info("Handling general question")
-        # Try factual approach first
-        factual_result = self._handle_factual_question(question)
-        if "failed" not in factual_result.lower():
-            return factual_result
-        # Fallback to contextual answer
-        return self._provide_contextual_answer(question)
-    def _provide_contextual_answer(self, question: str) -> str:
-        """Provide contextual answer when search fails"""
-        question_lower = question.lower()
-        # Specific question patterns
-        if "mercedes sosa" in question_lower and "album" in question_lower:
-            return "Mercedes Sosa released several albums between 2000-2009, including 'Misa Criolla' (2000) and 'Cantora 1' (2009). Exact studio album count requires discography verification."
-        elif "malko competition" in question_lower:
-            return "The Herbert von Karajan International Conducting Competition (Malko Competition) has had winners from various countries, including some from countries that no longer exist like the Soviet Union and Yugoslavia."
-        elif "youtube" in question_lower and "bird" in question_lower:
-            return "Counting simultaneous bird species in a video requires specialized video analysis tools and ornithological expertise."
-        else:
-            return f"Unable to provide specific information for: {question}. This may require specialized tools or access to current databases."
-def cleanup_memory():
-    """Clean up memory and cache"""
-    try:
-        if torch.cuda.is_available():
-            torch.cuda.empty_cache()
-        logger.info("Memory cleaned")
-    except Exception as e:
-        logger.error(f"Memory cleanup error: {e}")
-def run_and_submit_all(profile: gr.OAuthProfile | None):
-    """Run evaluation with production-ready agent"""
-    if not profile:
-        return "❌ Please login to Hugging Face first", None
-    username = profile.username
-    logger.info(f"User: {username}")
-    # API endpoints
     api_url = DEFAULT_API_URL
     questions_url = f"{api_url}/questions"
     submit_url = f"{api_url}/submit"
-    cleanup_memory()
-    # Initialize production agent
     try:
-        logger.info("Initializing Production GAIA Agent...")
-        agent = ProductionGAIAAgent()
-        logger.info("Agent initialized successfully")
     except Exception as e:
-        error_msg = f"❌ Agent initialization failed: {str(e)}\n{traceback.format_exc()}"
-        logger.error(error_msg)
-        return error_msg, None
-    # Get space info
-    space_id = os.getenv("SPACE_ID", "unknown")
     agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
-    # Fetch questions
     try:
-        logger.info("Fetching questions...")
-        response = requests.get(questions_url, timeout=30)
         response.raise_for_status()
         questions_data = response.json()
-        logger.info(f"Got {len(questions_data)} questions")
     except Exception as e:
-        return f"❌ Failed to fetch questions: {str(e)}", None
-    # Process questions
     results_log = []
     answers_payload = []
-    logger.info("="*50)
-    logger.info("🚀 STARTING PRODUCTION GAIA EVALUATION")
-    logger.info("="*50)
-    for i, item in enumerate(questions_data, 1):
         task_id = item.get("task_id")
         question_text = item.get("question")
-        if not task_id or not question_text:
             continue
-        logger.info(f"\nQuestion {i}/{len(questions_data)}")
-        logger.info(f"ID: {task_id}")
-        logger.info(f"Question: {question_text}")
         try:
-            # Process with production agent
-            answer = agent.process_question(question_text)
-            # Ensure answer quality
-            if not answer or len(answer.strip()) < 10:
-                answer = f"Unable to determine specific answer for: {question_text[:100]}..."
-            logger.info(f"Answer: {answer[:200]}...")
-            # Store results
-            answers_payload.append({
-                "task_id": task_id,
-                "submitted_answer": answer
-            })
             results_log.append({
-                "Task ID": task_id,
-                "Question": question_text[:200] + ("..." if len(question_text) > 200 else ""),
-                "Answer": answer[:300] + ("..." if len(answer) > 300 else "")
             })
-            # Memory management and rate limiting
-            if i % 3 == 0:
-                cleanup_memory()
-                logger.info("Cooling down...")
-                time.sleep(random.uniform(3, 6))
         except Exception as e:
-            logger.error(f"Error processing {task_id}: {e}")
-            error_answer = f"Processing error: {str(e)[:200]}"
-            answers_payload.append({
-                "task_id": task_id,
-                "submitted_answer": error_answer
-            })
             results_log.append({
-                "Task ID": task_id,
-                "Question": question_text[:200] + "...",
-                "Answer": error_answer
             })
-    logger.info(f"Submitting {len(answers_payload)} answers...")
-    # Submit answers
-    submission_data = {
-        "username": username,
-        "agent_code": agent_code,
-        "answers": answers_payload
-    }
     try:
-        response = requests.post(submit_url, json=submission_data, timeout=180)
         response.raise_for_status()
         result_data = response.json()
-        score = result_data.get('score', 0)
-        correct = result_data.get('correct_count', 0)
-        total = result_data.get('total_attempted', len(answers_payload))
-        message = result_data.get('message', '')
-        # Create final status message
-        final_status = f"""🎉 PRODUCTION GAIA EVALUATION COMPLETE!
-👤 User: {username}
-🖥️ Hardware: 2 vCPU + 16GB RAM (Production Optimized)
-🤖 Architecture: Multi-strategy Agent with Robust Error Handling
-📊 Final Score: {score}%
-✅ Correct: {correct}/{total}
-🎯 Target: 10%+ {'🎉 SUCCESS!' if score >= 10 else '📈 Significant Improvement Expected'}
-📝 Message: {message}
-🔧 Production Features:
-- ✅ Robust error handling and fallbacks
-- ✅ Multiple search strategies (Wikipedia API, web scraping)
-- ✅ Smart question classification and routing
-- ✅ Enhanced calculator with SymPy support
-- ✅ Rate limiting and memory management
-- ✅ Contextual answers when search fails
-- ✅ Production-grade logging and monitoring
-💡 Strategy: Reliability, accuracy, and comprehensive coverage
-"""
-        logger.info(f"FINAL SCORE: {score}%")
-        return final_status, pd.DataFrame(results_log)
-    except Exception as e:
-        error_msg = f"❌ Submission failed: {str(e)}"
-        logger.error(error_msg)
-        return error_msg, pd.DataFrame(results_log)
-# --- Gradio Interface ---
-with gr.Blocks(title="Production GAIA Agent", theme=gr.themes.Default()) as demo:
-    gr.Markdown("# 🚀 Production-Ready GAIA Agent")
-    gr.Markdown("""
-    **Production Features:**
-    - 🔧 **Robust Error Handling**: Multiple fallback strategies
-    - 🌐 **Multi-Source Search**: Wikipedia API, web scraping, contextual answers
-    - 🧮 **Enhanced Calculator**: SymPy integration with basic arithmetic fallbacks
-    - 🎯 **Smart Routing**: Question classification for optimal processing
-    - ⚡ **Memory Optimized**: Efficient resource usage for 2 vCPU + 16GB RAM
-    - 📊 **Production Logging**: Comprehensive monitoring and debugging
-    **Target: Achieve 10%+ accuracy on GAIA benchmark**
-    """)
-    with gr.Row():
-        gr.LoginButton()
-    with gr.Row():
-        run_button = gr.Button(
-            "🚀 Run Production GAIA Evaluation",
-            variant="primary",
-            size="lg"
-        )
-    status_output = gr.Textbox(
-        label="📊 Evaluation Results",
-        lines=25,
-        interactive=False
-    )
-    results_table = gr.DataFrame(
-        label="📝 Detailed Results",
-        wrap=True
     )
     run_button.click(
         fn=run_and_submit_all,
         outputs=[status_output, results_table]
     )
 if __name__ == "__main__":
-    logger.info("🚀 Starting Production GAIA Agent...")
-    logger.info("💻 Optimized for 2 vCPU + 16GB RAM environment")
-    demo.launch(
-        server_name="0.0.0.0",
-        server_port=7860,
-        show_error=True
-    )

 import os
 import gradio as gr
 import requests
+import inspect
 import pandas as pd
 import json
+import re
+import io
+import base64
+from PIL import Image
+import matplotlib.pyplot as plt
+import numpy as np
+from pathlib import Path
+# SmolaAgent imports
+from smolagents import CodeAgent, tool, DuckDuckGoSearchTool, PythonInterpreterTool
+from smolagents.models import LiteLLMModel
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
+# --- Enhanced Tools for GAIA ---
+@tool
+def web_search_tool(query: str) -> str:
+    """
+    Search the web for information using DuckDuckGo.
+    Args:
+        query: The search query string
+    Returns:
+        String containing search results
+    """
+    try:
+        search_tool = DuckDuckGoSearchTool()
+        results = search_tool(query)
+        return str(results)
+    except Exception as e:
+        return f"Search failed: {str(e)}"
+@tool
+def calculator_tool(expression: str) -> str:
+    """
+    Evaluate mathematical expressions safely.
+    Args:
+        expression: Mathematical expression as string
+    Returns:
+        Result of the calculation
+    """
+    try:
+        # Safe evaluation - only allow basic math operations
         allowed_chars = set('0123456789+-*/.() ')
+        if not all(c in allowed_chars for c in expression.replace(' ', '')):
+            return "Error: Expression contains invalid characters"
+        result = eval(expression)
+        return str(result)
+    except Exception as e:
+        return f"Calculation error: {str(e)}"
+@tool
+def image_analyzer_tool(image_path: str) -> str:
+    """
+    Analyze images and extract information.
+    Args:
+        image_path: Path to the image file
+    Returns:
+        Description of image content
+    """
+    try:
+        if not os.path.exists(image_path):
+            return "Error: Image file not found"
+        img = Image.open(image_path)
+        # Basic image analysis
+        width, height = img.size
+        mode = img.mode
+        format_info = img.format if img.format else "Unknown"
+        # Simple color analysis
+        if mode == 'RGB':
+            colors = img.getcolors(maxcolors=256*256*256)
+            if colors:
+                dominant_color = max(colors, key=lambda x: x[0])[1]
+                color_info = f"Dominant color: RGB{dominant_color}"
+            else:
+                color_info = "Complex color palette"
+        else:
+            color_info = f"Color mode: {mode}"
+        analysis = f"""Image Analysis:
+- Dimensions: {width}x{height} pixels
+- Format: {format_info}
+- {color_info}
+- File size: {os.path.getsize(image_path)} bytes
+"""
+        return analysis
+    except Exception as e:
+        return f"Image analysis error: {str(e)}"
+@tool
+def file_reader_tool(file_path: str) -> str:
+    """
+    Read and analyze various file types (text, CSV, JSON, etc.).
+    Args:
+        file_path: Path to the file
+    Returns:
+        File content or analysis
+    """
+    try:
+        if not os.path.exists(file_path):
+            return "Error: File not found"
+        file_ext = Path(file_path).suffix.lower()
+        if file_ext == '.csv':
+            df = pd.read_csv(file_path)
+            return f"CSV file with {len(df)} rows and {len(df.columns)} columns.\nColumns: {list(df.columns)}\nFirst 5 rows:\n{df.head().to_string()}"
+        elif file_ext == '.json':
+            with open(file_path, 'r', encoding='utf-8') as f:
+                data = json.load(f)
+            return f"JSON file content:\n{json.dumps(data, indent=2)[:1000]}..."
+        elif file_ext in ['.txt', '.md', '.py', '.js', '.html', '.css']:
+            with open(file_path, 'r', encoding='utf-8') as f:
+                content = f.read()
+            return f"Text file content ({len(content)} characters):\n{content[:1000]}..."
+        else:
+            return f"Binary file: {file_ext}, size: {os.path.getsize(file_path)} bytes"
+    except Exception as e:
+        return f"File reading error: {str(e)}"
+@tool
+def data_processor_tool(data: str, operation: str) -> str:
+    """
+    Process data with various operations (sort, filter, calculate statistics).
+    Args:
+        data: Data as string (JSON, CSV format, or numbers)
+        operation: Operation to perform (sort, sum, average, count, etc.)
+    Returns:
+        Processed data result
+    """
+    try:
+        # Try to parse as JSON first
+        try:
+            parsed_data = json.loads(data)
+        except:
+            # Try to parse as numbers
+            try:
+                parsed_data = [float(x.strip()) for x in data.replace(',', ' ').split() if x.strip()]
+            except:
+                return "Error: Could not parse data"
+        if operation.lower() == 'sum' and isinstance(parsed_data, list):
+            return str(sum([x for x in parsed_data if isinstance(x, (int, float))]))
+        elif operation.lower() == 'average' and isinstance(parsed_data, list):
+            nums = [x for x in parsed_data if isinstance(x, (int, float))]
+            return str(sum(nums) / len(nums) if nums else 0)
+        elif operation.lower() == 'count':
+            return str(len(parsed_data))
+        elif operation.lower() == 'sort' and isinstance(parsed_data, list):
+            return str(sorted(parsed_data))
+        elif operation.lower() == 'max' and isinstance(parsed_data, list):
+            nums = [x for x in parsed_data if isinstance(x, (int, float))]
+            return str(max(nums) if nums else "No numbers found")
+        elif operation.lower() == 'min' and isinstance(parsed_data, list):
+            nums = [x for x in parsed_data if isinstance(x, (int, float))]
+            return str(min(nums) if nums else "No numbers found")
+        else:
+            return f"Unsupported operation: {operation}"
+    except Exception as e:
+        return f"Data processing error: {str(e)}"
+# --- Enhanced GAIA Agent ---
+class GAIAAgent:
+    def __init__(self):
+        print("GAIAAgent initialized with SmolaAgent framework.")
+        # Initialize model - using a lightweight model for resource efficiency
+        try:
+            # Use HuggingFace's free inference API or local model
+            self.model = LiteLLMModel(
+                model_id="microsoft/DialoGPT-medium",  # Lightweight model
+                max_tokens=512,
+                temperature=0.1
+            )
+        except:
+            # Fallback to a basic model
+            print("Warning: Using fallback model configuration")
+            self.model = None
+        # Initialize tools
+        self.tools = [
+            web_search_tool,
+            calculator_tool,
+            image_analyzer_tool,
+            file_reader_tool,
+            data_processor_tool,
+            PythonInterpreterTool()
+        ]
+        # Initialize the agent
+        try:
+            self.agent = CodeAgent(
+                tools=self.tools,
+                model=self.model,
+                max_iterations=5,
+                verbosity_level=1
+            )
+        except Exception as e:
+            print(f"Agent initialization error: {e}")
+            self.agent = None
+    def __call__(self, question: str) -> str:
+        print(f"GAIAAgent processing question: {question[:100]}...")
+        if not self.agent:
+            # Fallback logic if agent failed to initialize
+            return self._fallback_processing(question)
+        try:
+            # Enhanced prompt for GAIA tasks
+            enhanced_prompt = f"""
+You are a helpful AI assistant designed to solve complex real-world problems that may require:
+- Web searching for current information
+- Mathematical calculations
+- Image analysis
+- File processing
+- Multi-step reasoning
+Question: {question}
+Please approach this systematically:
+1. Analyze what type of problem this is
+2. Determine what tools/information you need
+3. Use available tools to gather information
+4. Reason through the problem step by step
+5. Provide a clear, concise final answer
+Remember to be precise and factual in your response.
+"""
+            response = self.agent.run(enhanced_prompt)
+            # Extract the final answer if it's in the response
+            if isinstance(response, str):
+                # Look for common answer patterns
+                answer_patterns = [
+                    r"Final answer:?\s*(.+)",
+                    r"Answer:?\s*(.+)",
+                    r"The answer is:?\s*(.+)",
+                    r"Result:?\s*(.+)"
+                ]
+                for pattern in answer_patterns:
+                    match = re.search(pattern, response, re.IGNORECASE)
+                    if match:
+                        return match.group(1).strip()
+                # If no pattern found, return the last sentence or the whole response
+                sentences = response.split('.')
+                return sentences[-1].strip() if sentences else response
+            return str(response)
+        except Exception as e:
+            print(f"Error in agent processing: {e}")
+            return self._fallback_processing(question)
+    def _fallback_processing(self, question: str) -> str:
+        """Fallback processing when main agent fails"""
+        try:
+            # Simple heuristic-based processing
+            question_lower = question.lower()
+            # Math questions
+            if any(op in question for op in ['+', '-', '*', '/', 'calculate', 'sum', 'average']):
+                # Extract numbers and try basic calculation
+                numbers = re.findall(r'-?\d+\.?\d*', question)
+                if len(numbers) >= 2:
+                    try:
+                        if 'sum' in question_lower or '+' in question:
+                            result = sum(float(n) for n in numbers)
+                            return str(result)
+                        elif 'average' in question_lower:
+                            result = sum(float(n) for n in numbers) / len(numbers)
+                            return str(result)
+                    except:
+                        pass
+            # Search-based questions
+            if any(word in question_lower for word in ['what', 'who', 'when', 'where', 'how', 'why']):
+                try:
+                    search_result = web_search_tool(question)
+                    # Extract key information from search results
+                    lines = search_result.split('\n')
+                    relevant_lines = [line for line in lines if len(line.strip()) > 20]
+                    return relevant_lines[0] if relevant_lines else "Unable to find specific information"
+                except:
+                    pass
+            # Default response
+            return "I need more context or tools to answer this question accurately."
+        except Exception as e:
+            return f"Processing error: {str(e)}"
+def run_and_submit_all(profile: gr.OAuthProfile | None):
+    """
+    Fetches all questions, runs the GAIAAgent on them, submits all answers,
+    and displays the results.
+    """
+    # --- Determine HF Space Runtime URL and Repo URL ---
+    space_id = os.getenv("SPACE_ID")
+    if profile:
+        username = f"{profile.username}"
+        print(f"User logged in: {username}")
+    else:
+        print("User not logged in.")
+        return "Please Login to Hugging Face with the button.", None
     api_url = DEFAULT_API_URL
     questions_url = f"{api_url}/questions"
     submit_url = f"{api_url}/submit"
+    # 1. Instantiate Agent
     try:
+        agent = GAIAAgent()
     except Exception as e:
+        print(f"Error instantiating agent: {e}")
+        return f"Error initializing agent: {e}", None
     agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
+    print(agent_code)
+    # 2. Fetch Questions
+    print(f"Fetching questions from: {questions_url}")
     try:
+        response = requests.get(questions_url, timeout=15)
         response.raise_for_status()
         questions_data = response.json()
+        if not questions_data:
+            print("Fetched questions list is empty.")
+            return "Fetched questions list is empty or invalid format.", None
+        print(f"Fetched {len(questions_data)} questions.")
+    except requests.exceptions.RequestException as e:
+        print(f"Error fetching questions: {e}")
+        return f"Error fetching questions: {e}", None
+    except requests.exceptions.JSONDecodeError as e:
+        print(f"Error decoding JSON response from questions endpoint: {e}")
+        print(f"Response text: {response.text[:500]}")
+        return f"Error decoding server response for questions: {e}", None
     except Exception as e:
+        print(f"An unexpected error occurred fetching questions: {e}")
+        return f"An unexpected error occurred fetching questions: {e}", None
+    # 3. Run GAIA Agent
     results_log = []
     answers_payload = []
+    print(f"Running GAIA agent on {len(questions_data)} questions...")
+    for i, item in enumerate(questions_data):
         task_id = item.get("task_id")
         question_text = item.get("question")
+        if not task_id or question_text is None:
+            print(f"Skipping item with missing task_id or question: {item}")
             continue
+        print(f"Processing question {i+1}/{len(questions_data)}: {task_id}")
         try:
+            submitted_answer = agent(question_text)
+            answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
             results_log.append({
+                "Task ID": task_id,
+                "Question": question_text[:100] + "..." if len(question_text) > 100 else question_text,
+                "Submitted Answer": submitted_answer
             })
+            print(f"Answer for {task_id}: {submitted_answer[:50]}...")
         except Exception as e:
+            print(f"Error running agent on task {task_id}: {e}")
+            error_answer = f"AGENT ERROR: {e}"
+            answers_payload.append({"task_id": task_id, "submitted_answer": error_answer})
             results_log.append({
+                "Task ID": task_id,
+                "Question": question_text[:100] + "..." if len(question_text) > 100 else question_text,
+                "Submitted Answer": error_answer
             })
+    if not answers_payload:
+        print("Agent did not produce any answers to submit.")
+        return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
+    # 4. Prepare Submission
+    submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
+    status_update = f"GAIA Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
+    print(status_update)
+    # 5. Submit
+    print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
     try:
+        response = requests.post(submit_url, json=submission_data, timeout=60)
         response.raise_for_status()
         result_data = response.json()
+        final_status = (
+            f"Submission Successful!\n"
+            f"User: {result_data.get('username')}\n"
+            f"Overall Score: {result_data.get('score', 'N/A')}% "
+            f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
+            f"Message: {result_data.get('message', 'No message received.')}"
+        )
+        print("Submission successful.")
+        results_df = pd.DataFrame(results_log)
+        return final_status, results_df
+    except requests.exceptions.HTTPError as e:
+        error_detail = f"Server responded with status {e.response.status_code}."
+        try:
+            error_json = e.response.json()
+            error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
+        except requests.exceptions.JSONDecodeError:
+            error_detail += f" Response: {e.response.text[:500]}"
+        status_message = f"Submission Failed: {error_detail}"
+        print(status_message)
+        results_df = pd.DataFrame(results_log)
+        return status_message, results_df
+    except requests.exceptions.Timeout:
+        status_message = "Submission Failed: The request timed out."
+        print(status_message)
+        results_df = pd.DataFrame(results_log)
+        return status_message, results_df
+    except requests.exceptions.RequestException as e:
+        status_message = f"Submission Failed: Network error - {e}"
+        print(status_message)
+        results_df = pd.DataFrame(results_log)
+        return status_message, results_df
+    except Exception as e:
+        status_message = f"An unexpected error occurred during submission: {e}"
+        print(status_message)
+        results_df = pd.DataFrame(results_log)
+        return status_message, results_df
+# --- Build Gradio Interface using Blocks ---
+with gr.Blocks() as demo:
+    gr.Markdown("# GAIA Agent Evaluation Runner")
+    gr.Markdown(
+        """
+        **Enhanced GAIA Agent with SmolaAgent Framework**
+        This agent is equipped with:
+        - 🔍 Web search capabilities (DuckDuckGo)
+        - 🧮 Mathematical calculator
+        - 🖼️ Image analysis
+        - 📁 File processing (CSV, JSON, text files)
+        - 📊 Data processing and statistics
+        - 🐍 Python code execution
+        **Instructions:**
+        1. Log in to your Hugging Face account using the button below
+        2. Click 'Run GAIA Evaluation & Submit All Answers' to start the evaluation
+        3. The agent will process each question systematically using available tools
+        **Note:** Processing may take time as the agent analyzes each question thoroughly.
+        """
     )
+    gr.LoginButton()
+    run_button = gr.Button("Run GAIA Evaluation & Submit All Answers", variant="primary")
+    status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
+    results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
     run_button.click(
         fn=run_and_submit_all,
         outputs=[status_output, results_table]
     )
 if __name__ == "__main__":
+    print("\n" + "-"*30 + " GAIA Agent Starting " + "-"*30)
+    space_host_startup = os.getenv("SPACE_HOST")
+    space_id_startup = os.getenv("SPACE_ID")
+    if space_host_startup:
+        print(f"✅ SPACE_HOST found: {space_host_startup}")
+        print(f"   Runtime URL should be: https://{space_host_startup}.hf.space")
+    else:
+        print("ℹ️  SPACE_HOST environment variable not found (running locally?).")
+    if space_id_startup:
+        print(f"✅ SPACE_ID found: {space_id_startup}")
+        print(f"   Repo URL: https://huggingface.co/spaces/{space_id_startup}")
+        print(f"   Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
+    else:
+        print("ℹ️  SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
+    print("-"*(60 + len(" GAIA Agent Starting ")) + "\n")
+    print("Launching Gradio Interface for GAIA Agent Evaluation...")
+    demo.launch(debug=True, share=False)

requirements.txt CHANGED Viewed

@@ -1,15 +1,33 @@
-llama-index-core
-llama-index-llms-huggingface
-transformers>=4.30.0
-torch>=2.0.0
-accelerate
-bitsandbytes  # For 8-bit quantization
-gradio>=4.0.0
-requests
-pandas
-python-dotenv
-duckduckgo-search
-sympy
-sentencepiece
-protobuf
-peft

+# Core dependencies
+gradio==4.44.0
+requests==2.31.0
+pandas==2.0.3
+numpy==1.24.3
+# SmolaAgent framework - lightweight agent framework
+smolagents==0.3.3
+# Image processing (lightweight)
+Pillow==10.0.1
+# Plotting (lightweight alternative to matplotlib)
+matplotlib==3.7.2
+# JSON and data processing
+pathlib
+# Web search
+duckduckgo-search==3.9.6
+# LLM integration (lightweight)
+litellm==1.44.14
+# Optional: For better performance with limited resources
+psutil==5.9.5
+# File processing utilities
+openpyxl==3.1.2  # For Excel files if needed
+python-magic==0.4.27  # For file type detection
+# Math and scientific computing (minimal)
+sympy==1.12  # For symbolic math if needed