import os import sys import json import traceback from typing import List, Dict import warnings # Suppress warnings for cleaner output warnings.filterwarnings("ignore", category=FutureWarning) warnings.filterwarnings("ignore", category=UserWarning) import gradio as gr # --- Environment variable setup to fix permission issues --- def setup_environment(): env_vars = { "NLTK_DATA": "/tmp/nltk_data", "MPLCONFIGDIR": "/tmp/matplotlib_cache", "HF_HOME": "/tmp/huggingface_cache", "TORCH_HOME": "/tmp/torch_cache", "TRANSFORMERS_CACHE": "/tmp/huggingface_cache" } for var, path in env_vars.items(): os.environ[var] = path os.makedirs(path, exist_ok=True) for var, path in env_vars.items(): os.environ[var] = path # Create directory if it doesn't exist try: os.makedirs(path, exist_ok=True) print(f"✅ Created/verified directory: {path}") except PermissionError: print(f"⚠️ Permission denied for {path}, using /tmp fallback") fallback_path = f"/tmp/{var.lower()}" os.environ[var] = fallback_path os.makedirs(fallback_path, exist_ok=True) except Exception as e: print(f"❌ Error setting up {var}: {e}") # Setup environment first setup_environment() # Import nltk AFTER setting environment variables try: import nltk # Download required NLTK data upfront nltk.download('punkt', download_dir=os.environ["NLTK_DATA"], quiet=True) nltk.download('stopwords', download_dir=os.environ["NLTK_DATA"], quiet=True) print("✅ NLTK data downloaded successfully") except Exception as e: print(f"⚠️ NLTK setup warning: {e}") # Add current directory to path for local imports sys.path.append(os.path.dirname(os.path.abspath(__file__))) # Import dependencies with better error handling try: from utils.gaia_api import GaiaAPI print("✅ GaiaAPI imported successfully") except ImportError as e: print(f"⚠️ Failed to import GaiaAPI: {e}") # Create a fallback GaiaAPI class GaiaAPI: @classmethod def get_questions(cls): return [{"task_id": "fallback", "question": "What is 2+2?"}] @classmethod def get_random_question(cls): return {"task_id": "fallback", "question": "What is 2+2?"} @classmethod def submit_answers(cls, username, code_url, answers): return {"error": "GaiaAPI not available", "score": 0} # Initialize global agent state AGENT_READY = False agent = None initialization_error = None agent_info = {} def initialize_agent(): """Initialize the LlamaIndex agent with comprehensive error handling""" global agent, AGENT_READY, initialization_error, agent_info try: print("🔄 Starting agent initialization...") # Import agent-related modules print("📦 Importing modules...") from agent.local_llm import LocalLLM from agent.tools import gaia_tools from llama_index.core.agent import ReActAgent from llama_index.core.memory import ChatMemoryBuffer agent_info["modules_imported"] = True print("✅ All modules imported successfully!") print("🤖 Initializing Local LLM...") local_llm = LocalLLM() llm = local_llm.get_llm() agent_info["llm_type"] = llm.__class__.__name__ print("🧠 Creating ReAct Agent...") memory = ChatMemoryBuffer.from_defaults(token_limit=2000) # Check if we have a proper LLM or mock if hasattr(llm, 'chat') and llm.__class__.__name__ != 'MockLLM': agent = ReActAgent.from_tools( tools=gaia_tools, llm=llm, memory=memory, verbose=True, max_iterations=3 ) agent_info["agent_type"] = "ReActAgent" print("✅ ReAct Agent initialized successfully!") else: agent = llm # Use the mock LLM directly agent_info["agent_type"] = "MockLLM" print("⚠️ Using mock mode - agent partially ready") agent_info["tools_count"] = len(gaia_tools) if 'gaia_tools' in locals() else 0 AGENT_READY = True print("🎉 Agent initialization complete!") except Exception as e: error_msg = f"Failed to initialize agent: {str(e)}" print(f"❌ {error_msg}") traceback.print_exc() AGENT_READY = False agent = None initialization_error = error_msg agent_info["error"] = error_msg # Initialize agent initialize_agent() def process_single_question(question_text: str) -> str: """Process a single GAIA question through the agent""" if not AGENT_READY: error_msg = "❌ Agent not ready. " if initialization_error: error_msg += f"Error: {initialization_error}" return error_msg if not question_text.strip(): return "❌ Please enter a question." try: enhanced_prompt = f""" Answer the following question directly and concisely. Do not include "FINAL ANSWER" or any other prefixes in your response. Just provide the answer. Question: {question_text} """ print(f"🤔 Processing question: {question_text[:50]}...") # FIXED: Use .complete() instead of .chat() to avoid chat template errors if hasattr(agent, 'query'): response = agent.query(enhanced_prompt) elif hasattr(agent, 'complete'): # Use complete() method for models without chat templates response = agent.complete(enhanced_prompt) answer = response.text if hasattr(response, 'text') else str(response) elif hasattr(agent, 'chat'): # Only use chat if it's the MockLLM or a proper chat model try: response = agent.chat([{"role": "user", "content": enhanced_prompt}]) answer = response.message.content if hasattr(response, 'message') else str(response) except Exception as chat_error: # Fallback to complete if chat fails print(f"⚠️ Chat method failed, trying complete: {chat_error}") if hasattr(agent, 'complete'): response = agent.complete(enhanced_prompt) answer = response.text if hasattr(response, 'text') else str(response) else: raise chat_error else: answer = "Mock response: I would analyze this question and provide an answer." # Clean up the answer if it wasn't already processed above if 'answer' not in locals(): answer = str(response).strip() # Remove common prefixes from the answer for prefix in ["FINAL ANSWER:", "Answer:", "The answer is:", "Final answer:"]: if answer.startswith(prefix): answer = answer[len(prefix):].strip() print(f"✅ Generated answer: {answer[:50]}...") return answer except Exception as e: error_msg = f"❌ Error processing question: {str(e)}" print(error_msg) return error_msg def process_all_questions() -> str: """Process all GAIA questions and prepare answers for submission""" if not AGENT_READY: return "❌ Agent not ready." try: print("📥 Fetching all GAIA questions...") questions = GaiaAPI.get_questions() processed_answers = [] print(f"🔄 Processing {len(questions)} questions...") for i, question in enumerate(questions): print(f"Processing question {i + 1}/{len(questions)}: {question['task_id']}") answer = process_single_question(question['question']) processed_answers.append({ "task_id": question['task_id'], "submitted_answer": answer }) # Save answers to file output_file = "/app/gaia_answers.json" with open(output_file, "w") as f: json.dump(processed_answers, f, indent=2) summary = f"✅ Processed {len(processed_answers)} questions.\n" summary += f"💾 Answers saved to {output_file}\n" summary += "📋 First 3 answers:\n" for ans in processed_answers[:3]: summary += f"- {ans['task_id']}: {ans['submitted_answer'][:50]}...\n" print(summary) return summary except Exception as e: error_msg = f"❌ Error processing questions: {str(e)}" print(error_msg) traceback.print_exc() return error_msg def submit_to_gaia(username: str, code_url: str) -> str: """Submit answers to GAIA benchmark""" if not AGENT_READY: return "❌ Agent not ready." if not username or not code_url: return "❌ Please provide both username and code URL." try: answers_file = "/app/gaia_answers.json" with open(answers_file, "r") as f: answers = json.load(f) print(f"📤 Submitting {len(answers)} answers...") except FileNotFoundError: return "❌ No processed answers found. Please process them first." try: result = GaiaAPI.submit_answers(username, code_url, answers) if "error" in result: return f"❌ Submission failed: {result['error']}" score = result.get("score", "Unknown") success_msg = f"✅ Submission successful!\n📊 Score: {score}" print(success_msg) return success_msg except Exception as e: error_msg = f"❌ Submission error: {str(e)}" print(error_msg) return error_msg def get_sample_question() -> str: """Load a sample question for testing""" try: question = GaiaAPI.get_random_question() return question['question'] except Exception as e: return f"Error loading sample question: {str(e)}" def get_system_status() -> str: """Get detailed system status for debugging""" status = "🔍 System Status:\n\n" # Agent status status += f"🤖 Agent Ready: {'✅ Yes' if AGENT_READY else '❌ No'}\n" if initialization_error: status += f"❌ Error: {initialization_error}\n" # Agent info status += f"🧠 LLM Type: {agent_info.get('llm_type', 'Unknown')}\n" status += f"🔧 Agent Type: {agent_info.get('agent_type', 'Unknown')}\n" status += f"🛠️ Tools Count: {agent_info.get('tools_count', 0)}\n" # Environment status += "\n📁 Environment Variables:\n" for var in ["NLTK_DATA", "HF_HOME", "MPLCONFIGDIR", "TORCH_HOME"]: path = os.environ.get(var, 'Not set') exists = "✅" if os.path.exists(path) else "❌" status += f" {var}: {path} {exists}\n" # Directory permissions status += "\n📂 Directory Status:\n" for path in ["/app", "/tmp"]: try: writable = os.access(path, os.W_OK) status += f" {path}: {'✅ Writable' if writable else '❌ Not writable'}\n" except: status += f" {path}: ❌ Error checking\n" return status # ---------- Gradio UI ---------- with gr.Blocks(title="🦙 GAIA LlamaIndex Agent", theme=gr.themes.Soft()) as demo: gr.Markdown(f""" # 🦙 GAIA Benchmark Agent with LlamaIndex This agent uses LlamaIndex with a local LLM to tackle GAIA benchmark questions. **Status:** {"✅ Ready" if AGENT_READY else "❌ Not Ready"} {f"**Error:** {initialization_error}" if initialization_error else ""} """) with gr.Tab("🔬 Test Single Question"): gr.Markdown("Test the agent with individual questions") with gr.Row(): with gr.Column(): question_input = gr.Textbox( label="Question", placeholder="Enter a GAIA question or click 'Load Sample'", lines=3 ) with gr.Row(): sample_btn = gr.Button("🎲 Load Sample Question") process_btn = gr.Button("🚀 Process Question", variant="primary") with gr.Column(): answer_output = gr.Textbox( label="Agent Answer", lines=5, interactive=False ) sample_btn.click(get_sample_question, outputs=question_input) process_btn.click(process_single_question, inputs=question_input, outputs=answer_output) with gr.Tab("📊 Full Evaluation"): gr.Markdown("Process all GAIA questions and prepare for submission") process_all_btn = gr.Button("🔄 Process All Questions", variant="primary") processing_output = gr.Textbox(label="Processing Status", lines=10, interactive=False) process_all_btn.click(process_all_questions, outputs=processing_output) with gr.Tab("🏆 Submit to GAIA"): gr.Markdown(""" Submit your processed answers to the GAIA benchmark for official scoring. **Requirements:** 1. Your Hugging Face username 2. Link to your Space code (e.g., https://huggingface.co/spaces/your-username/gaia-agent) """) with gr.Row(): with gr.Column(): username_input = gr.Textbox(label="HF Username", placeholder="your-username") code_url_input = gr.Textbox(label="Space Code URL", placeholder="https://huggingface.co/spaces/your-username/gaia-agent") submit_btn = gr.Button("🎯 Submit to GAIA", variant="primary") with gr.Column(): submission_output = gr.Textbox(label="Submission Result", lines=5, interactive=False) submit_btn.click(submit_to_gaia, inputs=[username_input, code_url_input], outputs=submission_output) with gr.Tab("ℹ️ System Status"): gr.Markdown("## System Information and Debugging") refresh_btn = gr.Button("🔄 Refresh Status") status_output = gr.Textbox(label="System Status", lines=20, interactive=False) # Load initial status demo.load(get_system_status, outputs=status_output) refresh_btn.click(get_system_status, outputs=status_output) if __name__ == "__main__": print("🚀 Starting Gradio interface...") demo.launch( server_name="0.0.0.0", server_port=7860, show_error=True )