Spaces:

LamiaYT
/

gaia-llamaindex-agent

Sleeping

File size: 14,447 Bytes

580bcf5
e51386e
580bcf5
0fda38b
e51386e
f1c2e53
 
 
 
 
e51386e
0fda38b
 
72146a4
f1c2e53
 
bbe4b6b
 
 
 
 
f1c2e53
bbe4b6b
 
 
 
 
f1c2e53
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0fda38b
72146a4
 
 
 
 
 
f1c2e53
72146a4
f1c2e53
0fda38b
 
e51386e
8ac5ef4
f1c2e53
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e51386e
0fda38b
e51386e
 
72146a4
f1c2e53
e51386e
f1c2e53
 
 
72146a4
f1c2e53
 
 
 
 
 
 
 
 
 
 
 
e51386e
f1c2e53
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2828102
f1c2e53
 
0fda38b
580bcf5
 
 
72146a4
 
 
 
e51386e
 
 
 
8ac5ef4
580bcf5
0fda38b
e51386e
0fda38b
 
72146a4
f1c2e53
 
c2e1cfe
72146a4
 
c2e1cfe
 
 
 
72146a4
c2e1cfe
 
 
 
 
 
 
 
 
 
 
 
72146a4
c2e1cfe
72146a4
c2e1cfe
 
 
e51386e
0fda38b
e51386e
580bcf5
 
e51386e
f1c2e53
580bcf5
e51386e
8ac5ef4
f1c2e53
 
 
0fda38b
580bcf5
 
 
e51386e
 
8ac5ef4
f1c2e53
8ac5ef4
580bcf5
e51386e
f1c2e53
580bcf5
0fda38b
580bcf5
 
 
8ac5ef4
 
e51386e
f1c2e53
 
 
580bcf5
e51386e
580bcf5
f1c2e53
 
580bcf5
 
e51386e
f1c2e53
580bcf5
e51386e
580bcf5
f1c2e53
 
 
 
0fda38b
580bcf5
 
 
e51386e
 
580bcf5
 
e51386e
 
f1c2e53
 
e51386e
f1c2e53
e51386e
 
 
580bcf5
8ac5ef4
580bcf5
 
e51386e
f1c2e53
 
 
580bcf5
f1c2e53
 
 
0fda38b
580bcf5
 
 
 
 
8ac5ef4
580bcf5
8ac5ef4
f1c2e53
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0fda38b
e51386e
72146a4
e51386e
0fda38b
 
 
 
 
72146a4
0fda38b
e51386e
580bcf5
 
e51386e
580bcf5
 
 
 
 
 
 
 
 
 
e51386e
580bcf5
 
 
 
 
 
e51386e
580bcf5
 
e51386e
580bcf5
 
e51386e
 
 
 
580bcf5
e51386e
580bcf5
 
0fda38b
e51386e
0fda38b
 
 
 
e51386e
580bcf5
 
e51386e
 
580bcf5
e51386e
580bcf5
e51386e
 
 
 
f1c2e53
 
 
 
 
 
 
 
 
8ac5ef4
580bcf5
f1c2e53
 
 
 
bbe4b6b
 
f1c2e53

import os
import sys
import json
import traceback
from typing import List, Dict
import warnings

# Suppress warnings for cleaner output
warnings.filterwarnings("ignore", category=FutureWarning)
warnings.filterwarnings("ignore", category=UserWarning)

import gradio as gr

# --- Environment variable setup to fix permission issues ---
def setup_environment():
    env_vars = {
        "NLTK_DATA": "/tmp/nltk_data",
        "MPLCONFIGDIR": "/tmp/matplotlib_cache",
        "HF_HOME": "/tmp/huggingface_cache",
        "TORCH_HOME": "/tmp/torch_cache",
        "TRANSFORMERS_CACHE": "/tmp/huggingface_cache"
    }

    for var, path in env_vars.items():
        os.environ[var] = path
        os.makedirs(path, exist_ok=True)

    
    for var, path in env_vars.items():
        os.environ[var] = path
        # Create directory if it doesn't exist
        try:
            os.makedirs(path, exist_ok=True)
            print(f"✅ Created/verified directory: {path}")
        except PermissionError:
            print(f"⚠️ Permission denied for {path}, using /tmp fallback")
            fallback_path = f"/tmp/{var.lower()}"
            os.environ[var] = fallback_path
            os.makedirs(fallback_path, exist_ok=True)
        except Exception as e:
            print(f"❌ Error setting up {var}: {e}")

# Setup environment first
setup_environment()

# Import nltk AFTER setting environment variables
try:
    import nltk
    # Download required NLTK data upfront
    nltk.download('punkt', download_dir=os.environ["NLTK_DATA"], quiet=True)
    nltk.download('stopwords', download_dir=os.environ["NLTK_DATA"], quiet=True)
    print("✅ NLTK data downloaded successfully")
except Exception as e:
    print(f"⚠️ NLTK setup warning: {e}")

# Add current directory to path for local imports
sys.path.append(os.path.dirname(os.path.abspath(__file__)))

# Import dependencies with better error handling
try:
    from utils.gaia_api import GaiaAPI
    print("✅ GaiaAPI imported successfully")
except ImportError as e:
    print(f"⚠️ Failed to import GaiaAPI: {e}")
    # Create a fallback GaiaAPI
    class GaiaAPI:
        @classmethod
        def get_questions(cls):
            return [{"task_id": "fallback", "question": "What is 2+2?"}]
        @classmethod 
        def get_random_question(cls):
            return {"task_id": "fallback", "question": "What is 2+2?"}
        @classmethod
        def submit_answers(cls, username, code_url, answers):
            return {"error": "GaiaAPI not available", "score": 0}

# Initialize global agent state
AGENT_READY = False
agent = None
initialization_error = None
agent_info = {}

def initialize_agent():
    """Initialize the LlamaIndex agent with comprehensive error handling"""
    global agent, AGENT_READY, initialization_error, agent_info
    
    try:
        print("🔄 Starting agent initialization...")
        
        # Import agent-related modules
        print("📦 Importing modules...")
        from agent.local_llm import LocalLLM
        from agent.tools import gaia_tools
        from llama_index.core.agent import ReActAgent
        from llama_index.core.memory import ChatMemoryBuffer
        
        agent_info["modules_imported"] = True
        print("✅ All modules imported successfully!")

        print("🤖 Initializing Local LLM...")
        local_llm = LocalLLM()
        llm = local_llm.get_llm()
        agent_info["llm_type"] = llm.__class__.__name__
        
        print("🧠 Creating ReAct Agent...")
        memory = ChatMemoryBuffer.from_defaults(token_limit=2000)
        
        # Check if we have a proper LLM or mock
        if hasattr(llm, 'chat') and llm.__class__.__name__ != 'MockLLM':
            agent = ReActAgent.from_tools(
                tools=gaia_tools,
                llm=llm,
                memory=memory,
                verbose=True,
                max_iterations=3
            )
            agent_info["agent_type"] = "ReActAgent"
            print("✅ ReAct Agent initialized successfully!")
        else:
            agent = llm  # Use the mock LLM directly
            agent_info["agent_type"] = "MockLLM"
            print("⚠️ Using mock mode - agent partially ready")
        
        agent_info["tools_count"] = len(gaia_tools) if 'gaia_tools' in locals() else 0
        AGENT_READY = True
        print("🎉 Agent initialization complete!")
        
    except Exception as e:
        error_msg = f"Failed to initialize agent: {str(e)}"
        print(f"❌ {error_msg}")
        traceback.print_exc()
        AGENT_READY = False
        agent = None
        initialization_error = error_msg
        agent_info["error"] = error_msg

# Initialize agent
initialize_agent()

def process_single_question(question_text: str) -> str:
    """Process a single GAIA question through the agent"""
    if not AGENT_READY:
        error_msg = "❌ Agent not ready. "
        if initialization_error:
            error_msg += f"Error: {initialization_error}"
        return error_msg

    if not question_text.strip():
        return "❌ Please enter a question."

    try:
        enhanced_prompt = f"""
Answer the following question directly and concisely. Do not include "FINAL ANSWER" or any other prefixes in your response. Just provide the answer.

Question: {question_text}
"""
        
        print(f"🤔 Processing question: {question_text[:50]}...")
        
        # FIXED: Use .complete() instead of .chat() to avoid chat template errors
        if hasattr(agent, 'query'):
            response = agent.query(enhanced_prompt)
        elif hasattr(agent, 'complete'):
            # Use complete() method for models without chat templates
            response = agent.complete(enhanced_prompt)
            answer = response.text if hasattr(response, 'text') else str(response)
        elif hasattr(agent, 'chat'):
            # Only use chat if it's the MockLLM or a proper chat model
            try:
                response = agent.chat([{"role": "user", "content": enhanced_prompt}])
                answer = response.message.content if hasattr(response, 'message') else str(response)
            except Exception as chat_error:
                # Fallback to complete if chat fails
                print(f"⚠️ Chat method failed, trying complete: {chat_error}")
                if hasattr(agent, 'complete'):
                    response = agent.complete(enhanced_prompt)
                    answer = response.text if hasattr(response, 'text') else str(response)
                else:
                    raise chat_error
        else:
            answer = "Mock response: I would analyze this question and provide an answer."
        
        # Clean up the answer if it wasn't already processed above
        if 'answer' not in locals():
            answer = str(response).strip()

        # Remove common prefixes from the answer
        for prefix in ["FINAL ANSWER:", "Answer:", "The answer is:", "Final answer:"]:
            if answer.startswith(prefix):
                answer = answer[len(prefix):].strip()

        print(f"✅ Generated answer: {answer[:50]}...")
        return answer

    except Exception as e:
        error_msg = f"❌ Error processing question: {str(e)}"
        print(error_msg)
        return error_msg

def process_all_questions() -> str:
    """Process all GAIA questions and prepare answers for submission"""
    if not AGENT_READY:
        return "❌ Agent not ready."

    try:
        print("📥 Fetching all GAIA questions...")
        questions = GaiaAPI.get_questions()
        processed_answers = []

        print(f"🔄 Processing {len(questions)} questions...")
        for i, question in enumerate(questions):
            print(f"Processing question {i + 1}/{len(questions)}: {question['task_id']}")
            answer = process_single_question(question['question'])
            processed_answers.append({
                "task_id": question['task_id'],
                "submitted_answer": answer
            })

        # Save answers to file
        output_file = "/app/gaia_answers.json"
        with open(output_file, "w") as f:
            json.dump(processed_answers, f, indent=2)

        summary = f"✅ Processed {len(processed_answers)} questions.\n"
        summary += f"💾 Answers saved to {output_file}\n"
        summary += "📋 First 3 answers:\n"
        for ans in processed_answers[:3]:
            summary += f"- {ans['task_id']}: {ans['submitted_answer'][:50]}...\n"

        print(summary)
        return summary

    except Exception as e:
        error_msg = f"❌ Error processing questions: {str(e)}"
        print(error_msg)
        traceback.print_exc()
        return error_msg

def submit_to_gaia(username: str, code_url: str) -> str:
    """Submit answers to GAIA benchmark"""
    if not AGENT_READY:
        return "❌ Agent not ready."

    if not username or not code_url:
        return "❌ Please provide both username and code URL."

    try:
        answers_file = "/app/gaia_answers.json"
        with open(answers_file, "r") as f:
            answers = json.load(f)
        print(f"📤 Submitting {len(answers)} answers...")
    except FileNotFoundError:
        return "❌ No processed answers found. Please process them first."

    try:
        result = GaiaAPI.submit_answers(username, code_url, answers)
        if "error" in result:
            return f"❌ Submission failed: {result['error']}"
        score = result.get("score", "Unknown")
        success_msg = f"✅ Submission successful!\n📊 Score: {score}"
        print(success_msg)
        return success_msg
    except Exception as e:
        error_msg = f"❌ Submission error: {str(e)}"
        print(error_msg)
        return error_msg

def get_sample_question() -> str:
    """Load a sample question for testing"""
    try:
        question = GaiaAPI.get_random_question()
        return question['question']
    except Exception as e:
        return f"Error loading sample question: {str(e)}"

def get_system_status() -> str:
    """Get detailed system status for debugging"""
    status = "🔍 System Status:\n\n"
    
    # Agent status
    status += f"🤖 Agent Ready: {'✅ Yes' if AGENT_READY else '❌ No'}\n"
    if initialization_error:
        status += f"❌ Error: {initialization_error}\n"
    
    # Agent info
    status += f"🧠 LLM Type: {agent_info.get('llm_type', 'Unknown')}\n"
    status += f"🔧 Agent Type: {agent_info.get('agent_type', 'Unknown')}\n"
    status += f"🛠️ Tools Count: {agent_info.get('tools_count', 0)}\n"
    
    # Environment
    status += "\n📁 Environment Variables:\n"
    for var in ["NLTK_DATA", "HF_HOME", "MPLCONFIGDIR", "TORCH_HOME"]:
        path = os.environ.get(var, 'Not set')
        exists = "✅" if os.path.exists(path) else "❌"
        status += f"  {var}: {path} {exists}\n"
    
    # Directory permissions
    status += "\n📂 Directory Status:\n"
    for path in ["/app", "/tmp"]:
        try:
            writable = os.access(path, os.W_OK)
            status += f"  {path}: {'✅ Writable' if writable else '❌ Not writable'}\n"
        except:
            status += f"  {path}: ❌ Error checking\n"
    
    return status

# ---------- Gradio UI ----------
with gr.Blocks(title="🦙 GAIA LlamaIndex Agent", theme=gr.themes.Soft()) as demo:
    gr.Markdown(f"""
# 🦙 GAIA Benchmark Agent with LlamaIndex

This agent uses LlamaIndex with a local LLM to tackle GAIA benchmark questions.

**Status:** {"✅ Ready" if AGENT_READY else "❌ Not Ready"}
{f"**Error:** {initialization_error}" if initialization_error else ""}
""")

    with gr.Tab("🔬 Test Single Question"):
        gr.Markdown("Test the agent with individual questions")

        with gr.Row():
            with gr.Column():
                question_input = gr.Textbox(
                    label="Question",
                    placeholder="Enter a GAIA question or click 'Load Sample'",
                    lines=3
                )
                with gr.Row():
                    sample_btn = gr.Button("🎲 Load Sample Question")
                    process_btn = gr.Button("🚀 Process Question", variant="primary")

            with gr.Column():
                answer_output = gr.Textbox(
                    label="Agent Answer",
                    lines=5,
                    interactive=False
                )

        sample_btn.click(get_sample_question, outputs=question_input)
        process_btn.click(process_single_question, inputs=question_input, outputs=answer_output)

    with gr.Tab("📊 Full Evaluation"):
        gr.Markdown("Process all GAIA questions and prepare for submission")

        process_all_btn = gr.Button("🔄 Process All Questions", variant="primary")
        processing_output = gr.Textbox(label="Processing Status", lines=10, interactive=False)

        process_all_btn.click(process_all_questions, outputs=processing_output)

    with gr.Tab("🏆 Submit to GAIA"):
        gr.Markdown("""
Submit your processed answers to the GAIA benchmark for official scoring.

**Requirements:**
1. Your Hugging Face username
2. Link to your Space code (e.g., https://huggingface.co/spaces/your-username/gaia-agent)
""")

        with gr.Row():
            with gr.Column():
                username_input = gr.Textbox(label="HF Username", placeholder="your-username")
                code_url_input = gr.Textbox(label="Space Code URL", placeholder="https://huggingface.co/spaces/your-username/gaia-agent")
                submit_btn = gr.Button("🎯 Submit to GAIA", variant="primary")

            with gr.Column():
                submission_output = gr.Textbox(label="Submission Result", lines=5, interactive=False)

        submit_btn.click(submit_to_gaia, inputs=[username_input, code_url_input], outputs=submission_output)

    with gr.Tab("ℹ️ System Status"):
        gr.Markdown("## System Information and Debugging")
        
        refresh_btn = gr.Button("🔄 Refresh Status")
        status_output = gr.Textbox(label="System Status", lines=20, interactive=False)
        
        # Load initial status
        demo.load(get_system_status, outputs=status_output)
        refresh_btn.click(get_system_status, outputs=status_output)

if __name__ == "__main__":
    print("🚀 Starting Gradio interface...")
    demo.launch(
        server_name="0.0.0.0", 
        server_port=7860, 
        show_error=True
        
    )