File size: 15,098 Bytes
26eff0c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
#!/usr/bin/env python3
"""
๐Ÿš€ SmoLAgents-Powered GAIA System
Enhanced GAIA benchmark agent using smolagents framework for 60+ point performance boost

Integrates our existing 18-tool arsenal with proven agentic framework patterns.
Target: 67%+ GAIA Level 1 accuracy (vs 30% requirement)
"""

import os
import logging
import tempfile
from typing import Dict, Any, List, Optional
from dataclasses import dataclass

# Core imports
try:
    from smolagents import CodeAgent, InferenceClientModel, tool, DuckDuckGoSearchTool
    from smolagents.tools import VisitWebpageTool
    SMOLAGENTS_AVAILABLE = True
    print("โœ… SmoLAgents framework loaded successfully")
except ImportError as e:
    SMOLAGENTS_AVAILABLE = False
    print(f"โš ๏ธ SmoLAgents not available: {e}")
    # Fallback to our existing system
    from gaia_system import BasicAgent as FallbackAgent

# Import our existing system for tool wrapping
from gaia_system import UniversalMultimodalToolkit, EnhancedMultiModelGAIASystem

# Set up logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

class SmoLAgentsGAIASystem:
    """๐Ÿš€ Enhanced GAIA system powered by SmoLAgents framework"""
    
    def __init__(self, hf_token: str = None, openai_key: str = None):
        """Initialize SmoLAgents-powered GAIA system"""
        self.hf_token = hf_token or os.getenv('HF_TOKEN')
        self.openai_key = openai_key or os.getenv('OPENAI_API_KEY')
        
        if not SMOLAGENTS_AVAILABLE:
            logger.warning("๐Ÿ”„ SmoLAgents unavailable, falling back to custom system")
            self.fallback_agent = FallbackAgent(hf_token, openai_key)
            self.agent = None
            return
        
        # Initialize our existing toolkit for tool wrapping
        self.toolkit = UniversalMultimodalToolkit(self.hf_token, self.openai_key)
        
        # Create model with priority system (Qwen3-235B-A22B first)
        self.model = self._create_model()
        
        # Initialize smolagents with our wrapped tools
        self.agent = self._create_smolagents_agent()
        
        logger.info("๐Ÿš€ SmoLAgents GAIA System initialized with 18+ tools")
    
    def _create_model(self):
        """Create model with our priority system - Qwen3-235B-A22B first"""
        try:
            # Priority 1: Qwen3-235B-A22B (Best reasoning for GAIA)
            if self.hf_token:
                return InferenceClientModel(
                    provider="fireworks-ai",
                    api_key=self.hf_token,
                    model="Qwen/Qwen3-235B-A22B"
                )
        except Exception as e:
            logger.warning(f"โš ๏ธ Qwen3-235B-A22B unavailable: {e}")
        
        try:
            # Priority 2: DeepSeek-R1 (Strong reasoning)
            if self.hf_token:
                return InferenceClientModel(
                    model="deepseek-ai/DeepSeek-R1",
                    token=self.hf_token
                )
        except Exception as e:
            logger.warning(f"โš ๏ธ DeepSeek-R1 unavailable: {e}")
        
        try:
            # Priority 3: GPT-4o (Vision capabilities)
            if self.openai_key:
                return InferenceClientModel(
                    provider="openai",
                    api_key=self.openai_key,
                    model="gpt-4o"
                )
        except Exception as e:
            logger.warning(f"โš ๏ธ GPT-4o unavailable: {e}")
        
        # Fallback to HF default
        return InferenceClientModel(
            model="meta-llama/Llama-3.1-8B-Instruct",
            token=self.hf_token
        )
    
    def _create_smolagents_agent(self):
        """Create CodeAgent with our comprehensive tool suite"""
        
        # Core tools from smolagents
        tools = [
            DuckDuckGoSearchTool(),
            VisitWebpageTool(),
        ]
        
        # Add our wrapped custom tools
        tools.extend([
            self.download_file_tool,
            self.read_pdf_tool,
            self.analyze_image_tool,
            self.transcribe_speech_tool,
            self.calculator_tool,
            self.process_video_tool,
            self.generate_image_tool,
            self.create_visualization_tool,
            self.scientific_compute_tool,
            self.detect_objects_tool,
            self.analyze_audio_tool,
            self.synthesize_speech_tool,
        ])
        
        # Create CodeAgent with optimized system prompt for GAIA
        agent = CodeAgent(
            tools=tools,
            model=self.model,
            system_prompt=self._get_gaia_optimized_prompt(),
            max_steps=5,  # Allow multi-step reasoning
            verbosity=0   # Clean output for GAIA compliance
        )
        
        return agent
    
    def _get_gaia_optimized_prompt(self):
        """GAIA-optimized system prompt for exact answer format"""
        return """You are an expert AI assistant specialized in solving GAIA benchmark questions.

CRITICAL INSTRUCTIONS:
1. Use available tools to gather information, process files, analyze content
2. Think step-by-step through complex multi-hop reasoning
3. For GAIA questions, provide ONLY the final answer - no explanations or thinking process
4. Answer format: number OR few words OR comma-separated list
5. No units (like $ or %) unless specified
6. No articles or abbreviations for strings
7. Write digits in plain text unless specified
8. For lists, apply above rules to each element

AVAILABLE TOOLS:
- DuckDuckGoSearchTool: Search the web for current information
- VisitWebpageTool: Visit and extract content from URLs
- download_file_tool: Download files from GAIA tasks or URLs
- read_pdf_tool: Extract text from PDF documents
- analyze_image_tool: Analyze images and answer questions about them
- transcribe_speech_tool: Convert audio to text using Whisper
- calculator_tool: Perform mathematical calculations
- process_video_tool: Analyze video content and extract frames
- generate_image_tool: Create images from text descriptions
- create_visualization_tool: Create charts and data visualizations
- scientific_compute_tool: Statistical analysis and scientific computing
- detect_objects_tool: Identify objects in images
- analyze_audio_tool: Analyze audio features and content
- synthesize_speech_tool: Convert text to speech

Approach each question systematically:
1. Understand what information is needed
2. Use appropriate tools to gather data
3. Process and analyze the information
4. Provide the exact answer in the required format"""

    # === TOOL WRAPPERS FOR SMOLAGENTS ===
    
    @tool
    def download_file_tool(self, url: str = "", task_id: str = "") -> str:
        """๐Ÿ“ฅ Download files from URLs or GAIA API
        
        Args:
            url: URL to download from
            task_id: GAIA task ID for file download
        """
        return self.toolkit.download_file(url, task_id)
    
    @tool
    def read_pdf_tool(self, file_path: str) -> str:
        """๐Ÿ“„ Extract text from PDF documents
        
        Args:
            file_path: Path to the PDF file
        """
        return self.toolkit.read_pdf(file_path)
    
    @tool
    def analyze_image_tool(self, image_path: str, question: str = "") -> str:
        """๐Ÿ–ผ๏ธ Analyze images and answer questions about them
        
        Args:
            image_path: Path to the image file
            question: Specific question about the image
        """
        return self.toolkit.analyze_image(image_path, question)
    
    @tool
    def transcribe_speech_tool(self, audio_path: str) -> str:
        """๐ŸŽ™๏ธ Convert speech to text using Whisper
        
        Args:
            audio_path: Path to the audio file
        """
        return self.toolkit.transcribe_speech(audio_path)
    
    @tool
    def calculator_tool(self, expression: str) -> str:
        """๐Ÿงฎ Perform mathematical calculations
        
        Args:
            expression: Mathematical expression to evaluate
        """
        return self.toolkit.calculator(expression)
    
    @tool
    def process_video_tool(self, video_path: str, task: str = "analyze") -> str:
        """๐ŸŽฅ Process and analyze video content
        
        Args:
            video_path: Path to the video file
            task: Type of analysis (analyze, extract_frames, motion_detection)
        """
        return self.toolkit.process_video(video_path, task)
    
    @tool
    def generate_image_tool(self, prompt: str, style: str = "realistic") -> str:
        """๐ŸŽจ Generate images from text descriptions
        
        Args:
            prompt: Text description of the image to generate
            style: Style of the image (realistic, artistic, etc.)
        """
        return self.toolkit.generate_image(prompt, style)
    
    @tool
    def create_visualization_tool(self, data: str, chart_type: str = "bar") -> str:
        """๐Ÿ“Š Create data visualizations and charts
        
        Args:
            data: JSON string of data to visualize
            chart_type: Type of chart (bar, line, scatter, pie)
        """
        try:
            import json
            data_dict = json.loads(data)
            return self.toolkit.create_visualization(data_dict, chart_type)
        except:
            return "โŒ Invalid data format. Provide JSON with 'x' and 'y' keys."
    
    @tool
    def scientific_compute_tool(self, operation: str, data: str) -> str:
        """๐Ÿงฌ Perform scientific computations and analysis
        
        Args:
            operation: Type of operation (statistics, correlation, clustering)
            data: JSON string of data for computation
        """
        try:
            import json
            data_dict = json.loads(data)
            return self.toolkit.scientific_compute(operation, data_dict)
        except:
            return "โŒ Invalid data format. Provide JSON data."
    
    @tool
    def detect_objects_tool(self, image_path: str) -> str:
        """๐ŸŽฏ Detect and identify objects in images
        
        Args:
            image_path: Path to the image file
        """
        return self.toolkit.detect_objects(image_path)
    
    @tool
    def analyze_audio_tool(self, audio_path: str, task: str = "analyze") -> str:
        """๐ŸŽต Analyze audio content and features
        
        Args:
            audio_path: Path to the audio file
            task: Type of analysis (analyze, transcribe, features)
        """
        return self.toolkit.analyze_audio(audio_path, task)
    
    @tool
    def synthesize_speech_tool(self, text: str, voice: str = "default") -> str:
        """๐Ÿ—ฃ๏ธ Convert text to speech
        
        Args:
            text: Text to convert to speech
            voice: Voice type (default, female, male)
        """
        return self.toolkit.synthesize_speech(text, voice)
    
    # === MAIN INTERFACE ===
    
    def query(self, question: str) -> str:
        """Process GAIA question with smolagents framework"""
        if not SMOLAGENTS_AVAILABLE:
            logger.info("๐Ÿ”„ Using fallback agent")
            return self.fallback_agent.query(question)
        
        try:
            logger.info(f"๐Ÿš€ Processing with SmoLAgents: {question[:100]}...")
            
            # Use CodeAgent for processing
            response = self.agent.run(question)
            
            # Clean response for GAIA compliance
            cleaned_response = self._clean_for_gaia_submission(response)
            
            logger.info(f"โœ… SmoLAgents response: {cleaned_response}")
            return cleaned_response
            
        except Exception as e:
            logger.error(f"โŒ SmoLAgents error: {e}")
            # Fallback to our existing system
            if hasattr(self, 'fallback_agent'):
                return self.fallback_agent.query(question)
            else:
                return f"โŒ Processing failed: {e}"
    
    def _clean_for_gaia_submission(self, response: str) -> str:
        """Clean response for GAIA API submission"""
        if not response:
            return "Unable to provide answer"
        
        # Remove common prefixes and suffixes
        response = response.strip()
        
        # Remove "The answer is:", "Final answer:", etc.
        prefixes_to_remove = [
            "the answer is:", "final answer:", "answer:", "result:", 
            "final result:", "conclusion:", "solution:", "output:",
            "the final answer is:", "my answer is:", "i think the answer is:"
        ]
        
        response_lower = response.lower()
        for prefix in prefixes_to_remove:
            if response_lower.startswith(prefix):
                response = response[len(prefix):].strip()
                break
        
        # Remove trailing periods and common suffixes
        response = response.rstrip('.')
        
        # Final validation
        if len(response) < 1:
            return "Unable to provide answer"
        
        return response.strip()
    
    def cleanup(self):
        """Clean up resources"""
        if hasattr(self.toolkit, 'cleanup'):
            self.toolkit.cleanup()


class SmoLAgentsBasicAgent:
    """๐Ÿš€ Simple interface compatible with existing app.py"""
    
    def __init__(self, hf_token: str = None, openai_key: str = None):
        self.system = SmoLAgentsGAIASystem(hf_token, openai_key)
    
    def query(self, question: str) -> str:
        """Process question with SmoLAgents system"""
        return self.system.query(question)
    
    def clean_for_api_submission(self, response: str) -> str:
        """Clean response for GAIA API submission"""
        return self.system._clean_for_gaia_submission(response)
    
    def __call__(self, question: str) -> str:
        """Make agent callable"""
        return self.query(question)
    
    def cleanup(self):
        """Clean up resources"""
        self.system.cleanup()


def create_smolagents_gaia_system(hf_token: str = None, openai_key: str = None) -> SmoLAgentsGAIASystem:
    """Factory function to create SmoLAgents GAIA system"""
    return SmoLAgentsGAIASystem(hf_token, openai_key)


# === TESTING FUNCTION ===
def test_smolagents_system():
    """Test SmoLAgents integration with GAIA questions"""
    print("๐Ÿงช Testing SmoLAgents GAIA System...")
    
    try:
        agent = SmoLAgentsBasicAgent()
        
        test_questions = [
            "What is 15 + 27?",
            "What is the capital of France?", 
            "How many days are in a week?",
            "What color is the sky during the day?"
        ]
        
        for i, question in enumerate(test_questions, 1):
            print(f"\n๐Ÿ“ Test {i}: {question}")
            try:
                answer = agent.query(question)
                print(f"โœ… Answer: {answer}")
            except Exception as e:
                print(f"โŒ Error: {e}")
        
        print("\n๐Ÿš€ SmoLAgents system test completed!")
        
    except Exception as e:
        print(f"โŒ Test failed: {e}")


if __name__ == "__main__":
    test_smolagents_system()