Spaces:

LamiaYT
/

gaia-llamaindex-agent

Sleeping

App Files Files Community

gaia-llamaindex-agent / app.py

LamiaYT

Fix Dockerfile & Gradio compatibility

c2e1cfe about 2 months ago

raw

history blame contribute delete

14.4 kB

	import os
	import sys
	import json
	import traceback
	from typing import List, Dict
	import warnings

	# Suppress warnings for cleaner output
	warnings.filterwarnings("ignore", category=FutureWarning)
	warnings.filterwarnings("ignore", category=UserWarning)

	import gradio as gr

	# --- Environment variable setup to fix permission issues ---
	def setup_environment():
	env_vars = {
	"NLTK_DATA": "/tmp/nltk_data",
	"MPLCONFIGDIR": "/tmp/matplotlib_cache",
	"HF_HOME": "/tmp/huggingface_cache",
	"TORCH_HOME": "/tmp/torch_cache",
	"TRANSFORMERS_CACHE": "/tmp/huggingface_cache"
	}

	for var, path in env_vars.items():
	os.environ[var] = path
	os.makedirs(path, exist_ok=True)


	for var, path in env_vars.items():
	os.environ[var] = path
	# Create directory if it doesn't exist
	try:
	os.makedirs(path, exist_ok=True)
	print(f"✅ Created/verified directory: {path}")
	except PermissionError:
	print(f"⚠️ Permission denied for {path}, using /tmp fallback")
	fallback_path = f"/tmp/{var.lower()}"
	os.environ[var] = fallback_path
	os.makedirs(fallback_path, exist_ok=True)
	except Exception as e:
	print(f"❌ Error setting up {var}: {e}")

	# Setup environment first
	setup_environment()

	# Import nltk AFTER setting environment variables
	try:
	import nltk
	# Download required NLTK data upfront
	nltk.download('punkt', download_dir=os.environ["NLTK_DATA"], quiet=True)
	nltk.download('stopwords', download_dir=os.environ["NLTK_DATA"], quiet=True)
	print("✅ NLTK data downloaded successfully")
	except Exception as e:
	print(f"⚠️ NLTK setup warning: {e}")

	# Add current directory to path for local imports
	sys.path.append(os.path.dirname(os.path.abspath(__file__)))

	# Import dependencies with better error handling
	try:
	from utils.gaia_api import GaiaAPI
	print("✅ GaiaAPI imported successfully")
	except ImportError as e:
	print(f"⚠️ Failed to import GaiaAPI: {e}")
	# Create a fallback GaiaAPI
	class GaiaAPI:
	@classmethod
	def get_questions(cls):
	return [{"task_id": "fallback", "question": "What is 2+2?"}]
	@classmethod
	def get_random_question(cls):
	return {"task_id": "fallback", "question": "What is 2+2?"}
	@classmethod
	def submit_answers(cls, username, code_url, answers):
	return {"error": "GaiaAPI not available", "score": 0}

	# Initialize global agent state
	AGENT_READY = False
	agent = None
	initialization_error = None
	agent_info = {}

	def initialize_agent():
	"""Initialize the LlamaIndex agent with comprehensive error handling"""
	global agent, AGENT_READY, initialization_error, agent_info

	try:
	print("🔄 Starting agent initialization...")

	# Import agent-related modules
	print("📦 Importing modules...")
	from agent.local_llm import LocalLLM
	from agent.tools import gaia_tools
	from llama_index.core.agent import ReActAgent
	from llama_index.core.memory import ChatMemoryBuffer

	agent_info["modules_imported"] = True
	print("✅ All modules imported successfully!")

	print("🤖 Initializing Local LLM...")
	local_llm = LocalLLM()
	llm = local_llm.get_llm()
	agent_info["llm_type"] = llm.__class__.__name__

	print("🧠 Creating ReAct Agent...")
	memory = ChatMemoryBuffer.from_defaults(token_limit=2000)

	# Check if we have a proper LLM or mock
	if hasattr(llm, 'chat') and llm.__class__.__name__ != 'MockLLM':
	agent = ReActAgent.from_tools(
	tools=gaia_tools,
	llm=llm,
	memory=memory,
	verbose=True,
	max_iterations=3
	)
	agent_info["agent_type"] = "ReActAgent"
	print("✅ ReAct Agent initialized successfully!")
	else:
	agent = llm # Use the mock LLM directly
	agent_info["agent_type"] = "MockLLM"
	print("⚠️ Using mock mode - agent partially ready")

	agent_info["tools_count"] = len(gaia_tools) if 'gaia_tools' in locals() else 0
	AGENT_READY = True
	print("🎉 Agent initialization complete!")

	except Exception as e:
	error_msg = f"Failed to initialize agent: {str(e)}"
	print(f"❌ {error_msg}")
	traceback.print_exc()
	AGENT_READY = False
	agent = None
	initialization_error = error_msg
	agent_info["error"] = error_msg

	# Initialize agent
	initialize_agent()

	def process_single_question(question_text: str) -> str:
	"""Process a single GAIA question through the agent"""
	if not AGENT_READY:
	error_msg = "❌ Agent not ready. "
	if initialization_error:
	error_msg += f"Error: {initialization_error}"
	return error_msg

	if not question_text.strip():
	return "❌ Please enter a question."

	try:
	enhanced_prompt = f"""
	Answer the following question directly and concisely. Do not include "FINAL ANSWER" or any other prefixes in your response. Just provide the answer.

	Question: {question_text}
	"""

	print(f"🤔 Processing question: {question_text[:50]}...")

	# FIXED: Use .complete() instead of .chat() to avoid chat template errors
	if hasattr(agent, 'query'):
	response = agent.query(enhanced_prompt)
	elif hasattr(agent, 'complete'):
	# Use complete() method for models without chat templates
	response = agent.complete(enhanced_prompt)
	answer = response.text if hasattr(response, 'text') else str(response)
	elif hasattr(agent, 'chat'):
	# Only use chat if it's the MockLLM or a proper chat model
	try:
	response = agent.chat([{"role": "user", "content": enhanced_prompt}])
	answer = response.message.content if hasattr(response, 'message') else str(response)
	except Exception as chat_error:
	# Fallback to complete if chat fails
	print(f"⚠️ Chat method failed, trying complete: {chat_error}")
	if hasattr(agent, 'complete'):
	response = agent.complete(enhanced_prompt)
	answer = response.text if hasattr(response, 'text') else str(response)
	else:
	raise chat_error
	else:
	answer = "Mock response: I would analyze this question and provide an answer."

	# Clean up the answer if it wasn't already processed above
	if 'answer' not in locals():
	answer = str(response).strip()

	# Remove common prefixes from the answer
	for prefix in ["FINAL ANSWER:", "Answer:", "The answer is:", "Final answer:"]:
	if answer.startswith(prefix):
	answer = answer[len(prefix):].strip()

	print(f"✅ Generated answer: {answer[:50]}...")
	return answer

	except Exception as e:
	error_msg = f"❌ Error processing question: {str(e)}"
	print(error_msg)
	return error_msg

	def process_all_questions() -> str:
	"""Process all GAIA questions and prepare answers for submission"""
	if not AGENT_READY:
	return "❌ Agent not ready."

	try:
	print("📥 Fetching all GAIA questions...")
	questions = GaiaAPI.get_questions()
	processed_answers = []

	print(f"🔄 Processing {len(questions)} questions...")
	for i, question in enumerate(questions):
	print(f"Processing question {i + 1}/{len(questions)}: {question['task_id']}")
	answer = process_single_question(question['question'])
	processed_answers.append({
	"task_id": question['task_id'],
	"submitted_answer": answer
	})

	# Save answers to file
	output_file = "/app/gaia_answers.json"
	with open(output_file, "w") as f:
	json.dump(processed_answers, f, indent=2)

	summary = f"✅ Processed {len(processed_answers)} questions.\n"
	summary += f"💾 Answers saved to {output_file}\n"
	summary += "📋 First 3 answers:\n"
	for ans in processed_answers[:3]:
	summary += f"- {ans['task_id']}: {ans['submitted_answer'][:50]}...\n"

	print(summary)
	return summary

	except Exception as e:
	error_msg = f"❌ Error processing questions: {str(e)}"
	print(error_msg)
	traceback.print_exc()
	return error_msg

	def submit_to_gaia(username: str, code_url: str) -> str:
	"""Submit answers to GAIA benchmark"""
	if not AGENT_READY:
	return "❌ Agent not ready."

	if not username or not code_url:
	return "❌ Please provide both username and code URL."

	try:
	answers_file = "/app/gaia_answers.json"
	with open(answers_file, "r") as f:
	answers = json.load(f)
	print(f"📤 Submitting {len(answers)} answers...")
	except FileNotFoundError:
	return "❌ No processed answers found. Please process them first."

	try:
	result = GaiaAPI.submit_answers(username, code_url, answers)
	if "error" in result:
	return f"❌ Submission failed: {result['error']}"
	score = result.get("score", "Unknown")
	success_msg = f"✅ Submission successful!\n📊 Score: {score}"
	print(success_msg)
	return success_msg
	except Exception as e:
	error_msg = f"❌ Submission error: {str(e)}"
	print(error_msg)
	return error_msg

	def get_sample_question() -> str:
	"""Load a sample question for testing"""
	try:
	question = GaiaAPI.get_random_question()
	return question['question']
	except Exception as e:
	return f"Error loading sample question: {str(e)}"

	def get_system_status() -> str:
	"""Get detailed system status for debugging"""
	status = "🔍 System Status:\n\n"

	# Agent status
	status += f"🤖 Agent Ready: {'✅ Yes' if AGENT_READY else '❌ No'}\n"
	if initialization_error:
	status += f"❌ Error: {initialization_error}\n"

	# Agent info
	status += f"🧠 LLM Type: {agent_info.get('llm_type', 'Unknown')}\n"
	status += f"🔧 Agent Type: {agent_info.get('agent_type', 'Unknown')}\n"
	status += f"🛠️ Tools Count: {agent_info.get('tools_count', 0)}\n"

	# Environment
	status += "\n📁 Environment Variables:\n"
	for var in ["NLTK_DATA", "HF_HOME", "MPLCONFIGDIR", "TORCH_HOME"]:
	path = os.environ.get(var, 'Not set')
	exists = "✅" if os.path.exists(path) else "❌"
	status += f" {var}: {path} {exists}\n"

	# Directory permissions
	status += "\n📂 Directory Status:\n"
	for path in ["/app", "/tmp"]:
	try:
	writable = os.access(path, os.W_OK)
	status += f" {path}: {'✅ Writable' if writable else '❌ Not writable'}\n"
	except:
	status += f" {path}: ❌ Error checking\n"

	return status

	# ---------- Gradio UI ----------
	with gr.Blocks(title="🦙 GAIA LlamaIndex Agent", theme=gr.themes.Soft()) as demo:
	gr.Markdown(f"""
	# 🦙 GAIA Benchmark Agent with LlamaIndex

	This agent uses LlamaIndex with a local LLM to tackle GAIA benchmark questions.

	Status: {"✅ Ready" if AGENT_READY else "❌ Not Ready"}
	{f"Error: {initialization_error}" if initialization_error else ""}
	""")

	with gr.Tab("🔬 Test Single Question"):
	gr.Markdown("Test the agent with individual questions")

	with gr.Row():
	with gr.Column():
	question_input = gr.Textbox(
	label="Question",
	placeholder="Enter a GAIA question or click 'Load Sample'",
	lines=3
	)
	with gr.Row():
	sample_btn = gr.Button("🎲 Load Sample Question")
	process_btn = gr.Button("🚀 Process Question", variant="primary")

	with gr.Column():
	answer_output = gr.Textbox(
	label="Agent Answer",
	lines=5,
	interactive=False
	)

	sample_btn.click(get_sample_question, outputs=question_input)
	process_btn.click(process_single_question, inputs=question_input, outputs=answer_output)

	with gr.Tab("📊 Full Evaluation"):
	gr.Markdown("Process all GAIA questions and prepare for submission")

	process_all_btn = gr.Button("🔄 Process All Questions", variant="primary")
	processing_output = gr.Textbox(label="Processing Status", lines=10, interactive=False)

	process_all_btn.click(process_all_questions, outputs=processing_output)

	with gr.Tab("🏆 Submit to GAIA"):
	gr.Markdown("""
	Submit your processed answers to the GAIA benchmark for official scoring.

	Requirements:
	1. Your Hugging Face username
	2. Link to your Space code (e.g., https://huggingface.co/spaces/your-username/gaia-agent)
	""")

	with gr.Row():
	with gr.Column():
	username_input = gr.Textbox(label="HF Username", placeholder="your-username")
	code_url_input = gr.Textbox(label="Space Code URL", placeholder="https://huggingface.co/spaces/your-username/gaia-agent")
	submit_btn = gr.Button("🎯 Submit to GAIA", variant="primary")

	with gr.Column():
	submission_output = gr.Textbox(label="Submission Result", lines=5, interactive=False)

	submit_btn.click(submit_to_gaia, inputs=[username_input, code_url_input], outputs=submission_output)

	with gr.Tab("ℹ️ System Status"):
	gr.Markdown("## System Information and Debugging")

	refresh_btn = gr.Button("🔄 Refresh Status")
	status_output = gr.Textbox(label="System Status", lines=20, interactive=False)

	# Load initial status
	demo.load(get_system_status, outputs=status_output)
	refresh_btn.click(get_system_status, outputs=status_output)

	if __name__ == "__main__":
	print("🚀 Starting Gradio interface...")
	demo.launch(
	server_name="0.0.0.0",
	server_port=7860,
	show_error=True

	)