Spaces:

schoolkithub
/

multi-agent-gaia-system

Runtime error

multi-agent-gaia-system / smolagents_gaia_system.py

Omachoko

🚀 ULTIMATE GAIA Enhancement: 25+ Tool Arsenal

26eff0c about 2 months ago

15.1 kB

	#!/usr/bin/env python3
	"""
	🚀 SmoLAgents-Powered GAIA System
	Enhanced GAIA benchmark agent using smolagents framework for 60+ point performance boost

	Integrates our existing 18-tool arsenal with proven agentic framework patterns.
	Target: 67%+ GAIA Level 1 accuracy (vs 30% requirement)
	"""

	import os
	import logging
	import tempfile
	from typing import Dict, Any, List, Optional
	from dataclasses import dataclass

	# Core imports
	try:
	from smolagents import CodeAgent, InferenceClientModel, tool, DuckDuckGoSearchTool
	from smolagents.tools import VisitWebpageTool
	SMOLAGENTS_AVAILABLE = True
	print("✅ SmoLAgents framework loaded successfully")
	except ImportError as e:
	SMOLAGENTS_AVAILABLE = False
	print(f"⚠️ SmoLAgents not available: {e}")
	# Fallback to our existing system
	from gaia_system import BasicAgent as FallbackAgent

	# Import our existing system for tool wrapping
	from gaia_system import UniversalMultimodalToolkit, EnhancedMultiModelGAIASystem

	# Set up logging
	logging.basicConfig(level=logging.INFO)
	logger = logging.getLogger(__name__)

	class SmoLAgentsGAIASystem:
	"""🚀 Enhanced GAIA system powered by SmoLAgents framework"""

	def __init__(self, hf_token: str = None, openai_key: str = None):
	"""Initialize SmoLAgents-powered GAIA system"""
	self.hf_token = hf_token or os.getenv('HF_TOKEN')
	self.openai_key = openai_key or os.getenv('OPENAI_API_KEY')

	if not SMOLAGENTS_AVAILABLE:
	logger.warning("🔄 SmoLAgents unavailable, falling back to custom system")
	self.fallback_agent = FallbackAgent(hf_token, openai_key)
	self.agent = None
	return

	# Initialize our existing toolkit for tool wrapping
	self.toolkit = UniversalMultimodalToolkit(self.hf_token, self.openai_key)

	# Create model with priority system (Qwen3-235B-A22B first)
	self.model = self._create_model()

	# Initialize smolagents with our wrapped tools
	self.agent = self._create_smolagents_agent()

	logger.info("🚀 SmoLAgents GAIA System initialized with 18+ tools")

	def _create_model(self):
	"""Create model with our priority system - Qwen3-235B-A22B first"""
	try:
	# Priority 1: Qwen3-235B-A22B (Best reasoning for GAIA)
	if self.hf_token:
	return InferenceClientModel(
	provider="fireworks-ai",
	api_key=self.hf_token,
	model="Qwen/Qwen3-235B-A22B"
	)
	except Exception as e:
	logger.warning(f"⚠️ Qwen3-235B-A22B unavailable: {e}")

	try:
	# Priority 2: DeepSeek-R1 (Strong reasoning)
	if self.hf_token:
	return InferenceClientModel(
	model="deepseek-ai/DeepSeek-R1",
	token=self.hf_token
	)
	except Exception as e:
	logger.warning(f"⚠️ DeepSeek-R1 unavailable: {e}")

	try:
	# Priority 3: GPT-4o (Vision capabilities)
	if self.openai_key:
	return InferenceClientModel(
	provider="openai",
	api_key=self.openai_key,
	model="gpt-4o"
	)
	except Exception as e:
	logger.warning(f"⚠️ GPT-4o unavailable: {e}")

	# Fallback to HF default
	return InferenceClientModel(
	model="meta-llama/Llama-3.1-8B-Instruct",
	token=self.hf_token
	)

	def _create_smolagents_agent(self):
	"""Create CodeAgent with our comprehensive tool suite"""

	# Core tools from smolagents
	tools = [
	DuckDuckGoSearchTool(),
	VisitWebpageTool(),
	]

	# Add our wrapped custom tools
	tools.extend([
	self.download_file_tool,
	self.read_pdf_tool,
	self.analyze_image_tool,
	self.transcribe_speech_tool,
	self.calculator_tool,
	self.process_video_tool,
	self.generate_image_tool,
	self.create_visualization_tool,
	self.scientific_compute_tool,
	self.detect_objects_tool,
	self.analyze_audio_tool,
	self.synthesize_speech_tool,
	])

	# Create CodeAgent with optimized system prompt for GAIA
	agent = CodeAgent(
	tools=tools,
	model=self.model,
	system_prompt=self._get_gaia_optimized_prompt(),
	max_steps=5, # Allow multi-step reasoning
	verbosity=0 # Clean output for GAIA compliance
	)

	return agent

	def _get_gaia_optimized_prompt(self):
	"""GAIA-optimized system prompt for exact answer format"""
	return """You are an expert AI assistant specialized in solving GAIA benchmark questions.

	CRITICAL INSTRUCTIONS:
	1. Use available tools to gather information, process files, analyze content
	2. Think step-by-step through complex multi-hop reasoning
	3. For GAIA questions, provide ONLY the final answer - no explanations or thinking process
	4. Answer format: number OR few words OR comma-separated list
	5. No units (like $ or %) unless specified
	6. No articles or abbreviations for strings
	7. Write digits in plain text unless specified
	8. For lists, apply above rules to each element

	AVAILABLE TOOLS:
	- DuckDuckGoSearchTool: Search the web for current information
	- VisitWebpageTool: Visit and extract content from URLs
	- download_file_tool: Download files from GAIA tasks or URLs
	- read_pdf_tool: Extract text from PDF documents
	- analyze_image_tool: Analyze images and answer questions about them
	- transcribe_speech_tool: Convert audio to text using Whisper
	- calculator_tool: Perform mathematical calculations
	- process_video_tool: Analyze video content and extract frames
	- generate_image_tool: Create images from text descriptions
	- create_visualization_tool: Create charts and data visualizations
	- scientific_compute_tool: Statistical analysis and scientific computing
	- detect_objects_tool: Identify objects in images
	- analyze_audio_tool: Analyze audio features and content
	- synthesize_speech_tool: Convert text to speech

	Approach each question systematically:
	1. Understand what information is needed
	2. Use appropriate tools to gather data
	3. Process and analyze the information
	4. Provide the exact answer in the required format"""

	# === TOOL WRAPPERS FOR SMOLAGENTS ===

	@tool
	def download_file_tool(self, url: str = "", task_id: str = "") -> str:
	"""📥 Download files from URLs or GAIA API

	Args:
	url: URL to download from
	task_id: GAIA task ID for file download
	"""
	return self.toolkit.download_file(url, task_id)

	@tool
	def read_pdf_tool(self, file_path: str) -> str:
	"""📄 Extract text from PDF documents

	Args:
	file_path: Path to the PDF file
	"""
	return self.toolkit.read_pdf(file_path)

	@tool
	def analyze_image_tool(self, image_path: str, question: str = "") -> str:
	"""🖼️ Analyze images and answer questions about them

	Args:
	image_path: Path to the image file
	question: Specific question about the image
	"""
	return self.toolkit.analyze_image(image_path, question)

	@tool
	def transcribe_speech_tool(self, audio_path: str) -> str:
	"""🎙️ Convert speech to text using Whisper

	Args:
	audio_path: Path to the audio file
	"""
	return self.toolkit.transcribe_speech(audio_path)

	@tool
	def calculator_tool(self, expression: str) -> str:
	"""🧮 Perform mathematical calculations

	Args:
	expression: Mathematical expression to evaluate
	"""
	return self.toolkit.calculator(expression)

	@tool
	def process_video_tool(self, video_path: str, task: str = "analyze") -> str:
	"""🎥 Process and analyze video content

	Args:
	video_path: Path to the video file
	task: Type of analysis (analyze, extract_frames, motion_detection)
	"""
	return self.toolkit.process_video(video_path, task)

	@tool
	def generate_image_tool(self, prompt: str, style: str = "realistic") -> str:
	"""🎨 Generate images from text descriptions

	Args:
	prompt: Text description of the image to generate
	style: Style of the image (realistic, artistic, etc.)
	"""
	return self.toolkit.generate_image(prompt, style)

	@tool
	def create_visualization_tool(self, data: str, chart_type: str = "bar") -> str:
	"""📊 Create data visualizations and charts

	Args:
	data: JSON string of data to visualize
	chart_type: Type of chart (bar, line, scatter, pie)
	"""
	try:
	import json
	data_dict = json.loads(data)
	return self.toolkit.create_visualization(data_dict, chart_type)
	except:
	return "❌ Invalid data format. Provide JSON with 'x' and 'y' keys."

	@tool
	def scientific_compute_tool(self, operation: str, data: str) -> str:
	"""🧬 Perform scientific computations and analysis

	Args:
	operation: Type of operation (statistics, correlation, clustering)
	data: JSON string of data for computation
	"""
	try:
	import json
	data_dict = json.loads(data)
	return self.toolkit.scientific_compute(operation, data_dict)
	except:
	return "❌ Invalid data format. Provide JSON data."

	@tool
	def detect_objects_tool(self, image_path: str) -> str:
	"""🎯 Detect and identify objects in images

	Args:
	image_path: Path to the image file
	"""
	return self.toolkit.detect_objects(image_path)

	@tool
	def analyze_audio_tool(self, audio_path: str, task: str = "analyze") -> str:
	"""🎵 Analyze audio content and features

	Args:
	audio_path: Path to the audio file
	task: Type of analysis (analyze, transcribe, features)
	"""
	return self.toolkit.analyze_audio(audio_path, task)

	@tool
	def synthesize_speech_tool(self, text: str, voice: str = "default") -> str:
	"""🗣️ Convert text to speech

	Args:
	text: Text to convert to speech
	voice: Voice type (default, female, male)
	"""
	return self.toolkit.synthesize_speech(text, voice)

	# === MAIN INTERFACE ===

	def query(self, question: str) -> str:
	"""Process GAIA question with smolagents framework"""
	if not SMOLAGENTS_AVAILABLE:
	logger.info("🔄 Using fallback agent")
	return self.fallback_agent.query(question)

	try:
	logger.info(f"🚀 Processing with SmoLAgents: {question[:100]}...")

	# Use CodeAgent for processing
	response = self.agent.run(question)

	# Clean response for GAIA compliance
	cleaned_response = self._clean_for_gaia_submission(response)

	logger.info(f"✅ SmoLAgents response: {cleaned_response}")
	return cleaned_response

	except Exception as e:
	logger.error(f"❌ SmoLAgents error: {e}")
	# Fallback to our existing system
	if hasattr(self, 'fallback_agent'):
	return self.fallback_agent.query(question)
	else:
	return f"❌ Processing failed: {e}"

	def _clean_for_gaia_submission(self, response: str) -> str:
	"""Clean response for GAIA API submission"""
	if not response:
	return "Unable to provide answer"

	# Remove common prefixes and suffixes
	response = response.strip()

	# Remove "The answer is:", "Final answer:", etc.
	prefixes_to_remove = [
	"the answer is:", "final answer:", "answer:", "result:",
	"final result:", "conclusion:", "solution:", "output:",
	"the final answer is:", "my answer is:", "i think the answer is:"
	]

	response_lower = response.lower()
	for prefix in prefixes_to_remove:
	if response_lower.startswith(prefix):
	response = response[len(prefix):].strip()
	break

	# Remove trailing periods and common suffixes
	response = response.rstrip('.')

	# Final validation
	if len(response) < 1:
	return "Unable to provide answer"

	return response.strip()

	def cleanup(self):
	"""Clean up resources"""
	if hasattr(self.toolkit, 'cleanup'):
	self.toolkit.cleanup()


	class SmoLAgentsBasicAgent:
	"""🚀 Simple interface compatible with existing app.py"""

	def __init__(self, hf_token: str = None, openai_key: str = None):
	self.system = SmoLAgentsGAIASystem(hf_token, openai_key)

	def query(self, question: str) -> str:
	"""Process question with SmoLAgents system"""
	return self.system.query(question)

	def clean_for_api_submission(self, response: str) -> str:
	"""Clean response for GAIA API submission"""
	return self.system._clean_for_gaia_submission(response)

	def __call__(self, question: str) -> str:
	"""Make agent callable"""
	return self.query(question)

	def cleanup(self):
	"""Clean up resources"""
	self.system.cleanup()


	def create_smolagents_gaia_system(hf_token: str = None, openai_key: str = None) -> SmoLAgentsGAIASystem:
	"""Factory function to create SmoLAgents GAIA system"""
	return SmoLAgentsGAIASystem(hf_token, openai_key)


	# === TESTING FUNCTION ===
	def test_smolagents_system():
	"""Test SmoLAgents integration with GAIA questions"""
	print("🧪 Testing SmoLAgents GAIA System...")

	try:
	agent = SmoLAgentsBasicAgent()

	test_questions = [
	"What is 15 + 27?",
	"What is the capital of France?",
	"How many days are in a week?",
	"What color is the sky during the day?"
	]

	for i, question in enumerate(test_questions, 1):
	print(f"\n📝 Test {i}: {question}")
	try:
	answer = agent.query(question)
	print(f"✅ Answer: {answer}")
	except Exception as e:
	print(f"❌ Error: {e}")

	print("\n🚀 SmoLAgents system test completed!")

	except Exception as e:
	print(f"❌ Test failed: {e}")


	if __name__ == "__main__":
	test_smolagents_system()