Spaces:

schoolkithub
/

multi-agent-gaia-system

Runtime error

File size: 15,098 Bytes

26eff0c

#!/usr/bin/env python3
"""
🚀 SmoLAgents-Powered GAIA System
Enhanced GAIA benchmark agent using smolagents framework for 60+ point performance boost

Integrates our existing 18-tool arsenal with proven agentic framework patterns.
Target: 67%+ GAIA Level 1 accuracy (vs 30% requirement)
"""

import os
import logging
import tempfile
from typing import Dict, Any, List, Optional
from dataclasses import dataclass

# Core imports
try:
    from smolagents import CodeAgent, InferenceClientModel, tool, DuckDuckGoSearchTool
    from smolagents.tools import VisitWebpageTool
    SMOLAGENTS_AVAILABLE = True
    print("✅ SmoLAgents framework loaded successfully")
except ImportError as e:
    SMOLAGENTS_AVAILABLE = False
    print(f"⚠️ SmoLAgents not available: {e}")
    # Fallback to our existing system
    from gaia_system import BasicAgent as FallbackAgent

# Import our existing system for tool wrapping
from gaia_system import UniversalMultimodalToolkit, EnhancedMultiModelGAIASystem

# Set up logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

class SmoLAgentsGAIASystem:
    """🚀 Enhanced GAIA system powered by SmoLAgents framework"""
    
    def __init__(self, hf_token: str = None, openai_key: str = None):
        """Initialize SmoLAgents-powered GAIA system"""
        self.hf_token = hf_token or os.getenv('HF_TOKEN')
        self.openai_key = openai_key or os.getenv('OPENAI_API_KEY')
        
        if not SMOLAGENTS_AVAILABLE:
            logger.warning("🔄 SmoLAgents unavailable, falling back to custom system")
            self.fallback_agent = FallbackAgent(hf_token, openai_key)
            self.agent = None
            return
        
        # Initialize our existing toolkit for tool wrapping
        self.toolkit = UniversalMultimodalToolkit(self.hf_token, self.openai_key)
        
        # Create model with priority system (Qwen3-235B-A22B first)
        self.model = self._create_model()
        
        # Initialize smolagents with our wrapped tools
        self.agent = self._create_smolagents_agent()
        
        logger.info("🚀 SmoLAgents GAIA System initialized with 18+ tools")
    
    def _create_model(self):
        """Create model with our priority system - Qwen3-235B-A22B first"""
        try:
            # Priority 1: Qwen3-235B-A22B (Best reasoning for GAIA)
            if self.hf_token:
                return InferenceClientModel(
                    provider="fireworks-ai",
                    api_key=self.hf_token,
                    model="Qwen/Qwen3-235B-A22B"
                )
        except Exception as e:
            logger.warning(f"⚠️ Qwen3-235B-A22B unavailable: {e}")
        
        try:
            # Priority 2: DeepSeek-R1 (Strong reasoning)
            if self.hf_token:
                return InferenceClientModel(
                    model="deepseek-ai/DeepSeek-R1",
                    token=self.hf_token
                )
        except Exception as e:
            logger.warning(f"⚠️ DeepSeek-R1 unavailable: {e}")
        
        try:
            # Priority 3: GPT-4o (Vision capabilities)
            if self.openai_key:
                return InferenceClientModel(
                    provider="openai",
                    api_key=self.openai_key,
                    model="gpt-4o"
                )
        except Exception as e:
            logger.warning(f"⚠️ GPT-4o unavailable: {e}")
        
        # Fallback to HF default
        return InferenceClientModel(
            model="meta-llama/Llama-3.1-8B-Instruct",
            token=self.hf_token
        )
    
    def _create_smolagents_agent(self):
        """Create CodeAgent with our comprehensive tool suite"""
        
        # Core tools from smolagents
        tools = [
            DuckDuckGoSearchTool(),
            VisitWebpageTool(),
        ]
        
        # Add our wrapped custom tools
        tools.extend([
            self.download_file_tool,
            self.read_pdf_tool,
            self.analyze_image_tool,
            self.transcribe_speech_tool,
            self.calculator_tool,
            self.process_video_tool,
            self.generate_image_tool,
            self.create_visualization_tool,
            self.scientific_compute_tool,
            self.detect_objects_tool,
            self.analyze_audio_tool,
            self.synthesize_speech_tool,
        ])
        
        # Create CodeAgent with optimized system prompt for GAIA
        agent = CodeAgent(
            tools=tools,
            model=self.model,
            system_prompt=self._get_gaia_optimized_prompt(),
            max_steps=5,  # Allow multi-step reasoning
            verbosity=0   # Clean output for GAIA compliance
        )
        
        return agent
    
    def _get_gaia_optimized_prompt(self):
        """GAIA-optimized system prompt for exact answer format"""
        return """You are an expert AI assistant specialized in solving GAIA benchmark questions.

CRITICAL INSTRUCTIONS:
1. Use available tools to gather information, process files, analyze content
2. Think step-by-step through complex multi-hop reasoning
3. For GAIA questions, provide ONLY the final answer - no explanations or thinking process
4. Answer format: number OR few words OR comma-separated list
5. No units (like $ or %) unless specified
6. No articles or abbreviations for strings
7. Write digits in plain text unless specified
8. For lists, apply above rules to each element

AVAILABLE TOOLS:
- DuckDuckGoSearchTool: Search the web for current information
- VisitWebpageTool: Visit and extract content from URLs
- download_file_tool: Download files from GAIA tasks or URLs
- read_pdf_tool: Extract text from PDF documents
- analyze_image_tool: Analyze images and answer questions about them
- transcribe_speech_tool: Convert audio to text using Whisper
- calculator_tool: Perform mathematical calculations
- process_video_tool: Analyze video content and extract frames
- generate_image_tool: Create images from text descriptions
- create_visualization_tool: Create charts and data visualizations
- scientific_compute_tool: Statistical analysis and scientific computing
- detect_objects_tool: Identify objects in images
- analyze_audio_tool: Analyze audio features and content
- synthesize_speech_tool: Convert text to speech

Approach each question systematically:
1. Understand what information is needed
2. Use appropriate tools to gather data
3. Process and analyze the information
4. Provide the exact answer in the required format"""

    # === TOOL WRAPPERS FOR SMOLAGENTS ===
    
    @tool
    def download_file_tool(self, url: str = "", task_id: str = "") -> str:
        """📥 Download files from URLs or GAIA API
        
        Args:
            url: URL to download from
            task_id: GAIA task ID for file download
        """
        return self.toolkit.download_file(url, task_id)
    
    @tool
    def read_pdf_tool(self, file_path: str) -> str:
        """📄 Extract text from PDF documents
        
        Args:
            file_path: Path to the PDF file
        """
        return self.toolkit.read_pdf(file_path)
    
    @tool
    def analyze_image_tool(self, image_path: str, question: str = "") -> str:
        """🖼️ Analyze images and answer questions about them
        
        Args:
            image_path: Path to the image file
            question: Specific question about the image
        """
        return self.toolkit.analyze_image(image_path, question)
    
    @tool
    def transcribe_speech_tool(self, audio_path: str) -> str:
        """🎙️ Convert speech to text using Whisper
        
        Args:
            audio_path: Path to the audio file
        """
        return self.toolkit.transcribe_speech(audio_path)
    
    @tool
    def calculator_tool(self, expression: str) -> str:
        """🧮 Perform mathematical calculations
        
        Args:
            expression: Mathematical expression to evaluate
        """
        return self.toolkit.calculator(expression)
    
    @tool
    def process_video_tool(self, video_path: str, task: str = "analyze") -> str:
        """🎥 Process and analyze video content
        
        Args:
            video_path: Path to the video file
            task: Type of analysis (analyze, extract_frames, motion_detection)
        """
        return self.toolkit.process_video(video_path, task)
    
    @tool
    def generate_image_tool(self, prompt: str, style: str = "realistic") -> str:
        """🎨 Generate images from text descriptions
        
        Args:
            prompt: Text description of the image to generate
            style: Style of the image (realistic, artistic, etc.)
        """
        return self.toolkit.generate_image(prompt, style)
    
    @tool
    def create_visualization_tool(self, data: str, chart_type: str = "bar") -> str:
        """📊 Create data visualizations and charts
        
        Args:
            data: JSON string of data to visualize
            chart_type: Type of chart (bar, line, scatter, pie)
        """
        try:
            import json
            data_dict = json.loads(data)
            return self.toolkit.create_visualization(data_dict, chart_type)
        except:
            return "❌ Invalid data format. Provide JSON with 'x' and 'y' keys."
    
    @tool
    def scientific_compute_tool(self, operation: str, data: str) -> str:
        """🧬 Perform scientific computations and analysis
        
        Args:
            operation: Type of operation (statistics, correlation, clustering)
            data: JSON string of data for computation
        """
        try:
            import json
            data_dict = json.loads(data)
            return self.toolkit.scientific_compute(operation, data_dict)
        except:
            return "❌ Invalid data format. Provide JSON data."
    
    @tool
    def detect_objects_tool(self, image_path: str) -> str:
        """🎯 Detect and identify objects in images
        
        Args:
            image_path: Path to the image file
        """
        return self.toolkit.detect_objects(image_path)
    
    @tool
    def analyze_audio_tool(self, audio_path: str, task: str = "analyze") -> str:
        """🎵 Analyze audio content and features
        
        Args:
            audio_path: Path to the audio file
            task: Type of analysis (analyze, transcribe, features)
        """
        return self.toolkit.analyze_audio(audio_path, task)
    
    @tool
    def synthesize_speech_tool(self, text: str, voice: str = "default") -> str:
        """🗣️ Convert text to speech
        
        Args:
            text: Text to convert to speech
            voice: Voice type (default, female, male)
        """
        return self.toolkit.synthesize_speech(text, voice)
    
    # === MAIN INTERFACE ===
    
    def query(self, question: str) -> str:
        """Process GAIA question with smolagents framework"""
        if not SMOLAGENTS_AVAILABLE:
            logger.info("🔄 Using fallback agent")
            return self.fallback_agent.query(question)
        
        try:
            logger.info(f"🚀 Processing with SmoLAgents: {question[:100]}...")
            
            # Use CodeAgent for processing
            response = self.agent.run(question)
            
            # Clean response for GAIA compliance
            cleaned_response = self._clean_for_gaia_submission(response)
            
            logger.info(f"✅ SmoLAgents response: {cleaned_response}")
            return cleaned_response
            
        except Exception as e:
            logger.error(f"❌ SmoLAgents error: {e}")
            # Fallback to our existing system
            if hasattr(self, 'fallback_agent'):
                return self.fallback_agent.query(question)
            else:
                return f"❌ Processing failed: {e}"
    
    def _clean_for_gaia_submission(self, response: str) -> str:
        """Clean response for GAIA API submission"""
        if not response:
            return "Unable to provide answer"
        
        # Remove common prefixes and suffixes
        response = response.strip()
        
        # Remove "The answer is:", "Final answer:", etc.
        prefixes_to_remove = [
            "the answer is:", "final answer:", "answer:", "result:", 
            "final result:", "conclusion:", "solution:", "output:",
            "the final answer is:", "my answer is:", "i think the answer is:"
        ]
        
        response_lower = response.lower()
        for prefix in prefixes_to_remove:
            if response_lower.startswith(prefix):
                response = response[len(prefix):].strip()
                break
        
        # Remove trailing periods and common suffixes
        response = response.rstrip('.')
        
        # Final validation
        if len(response) < 1:
            return "Unable to provide answer"
        
        return response.strip()
    
    def cleanup(self):
        """Clean up resources"""
        if hasattr(self.toolkit, 'cleanup'):
            self.toolkit.cleanup()


class SmoLAgentsBasicAgent:
    """🚀 Simple interface compatible with existing app.py"""
    
    def __init__(self, hf_token: str = None, openai_key: str = None):
        self.system = SmoLAgentsGAIASystem(hf_token, openai_key)
    
    def query(self, question: str) -> str:
        """Process question with SmoLAgents system"""
        return self.system.query(question)
    
    def clean_for_api_submission(self, response: str) -> str:
        """Clean response for GAIA API submission"""
        return self.system._clean_for_gaia_submission(response)
    
    def __call__(self, question: str) -> str:
        """Make agent callable"""
        return self.query(question)
    
    def cleanup(self):
        """Clean up resources"""
        self.system.cleanup()


def create_smolagents_gaia_system(hf_token: str = None, openai_key: str = None) -> SmoLAgentsGAIASystem:
    """Factory function to create SmoLAgents GAIA system"""
    return SmoLAgentsGAIASystem(hf_token, openai_key)


# === TESTING FUNCTION ===
def test_smolagents_system():
    """Test SmoLAgents integration with GAIA questions"""
    print("🧪 Testing SmoLAgents GAIA System...")
    
    try:
        agent = SmoLAgentsBasicAgent()
        
        test_questions = [
            "What is 15 + 27?",
            "What is the capital of France?", 
            "How many days are in a week?",
            "What color is the sky during the day?"
        ]
        
        for i, question in enumerate(test_questions, 1):
            print(f"\n📝 Test {i}: {question}")
            try:
                answer = agent.query(question)
                print(f"✅ Answer: {answer}")
            except Exception as e:
                print(f"❌ Error: {e}")
        
        print("\n🚀 SmoLAgents system test completed!")
        
    except Exception as e:
        print(f"❌ Test failed: {e}")


if __name__ == "__main__":
    test_smolagents_system()