Spaces:

LamiaYT
/

gaia-llamaindex-agent

Sleeping

File size: 8,637 Bytes

# app.py
import gradio as gr
import os
from typing import List, Dict
import json

# Import our modules
try:
    from agent.local_llm import LocalLLM
    from agent.tools import gaia_tools
    from utils.gaia_api import GaiaAPI
    from llama_index.core.agent import ReActAgent
    from llama_index.core.memory import ChatMemoryBuffer
    
    # Initialize components
    print("Initializing Local LLM...")
    local_llm = LocalLLM()
    llm = local_llm.get_llm()
    
    print("Creating ReAct Agent...")
    memory = ChatMemoryBuffer.from_defaults(token_limit=2000)
    agent = ReActAgent.from_tools(
        tools=gaia_tools,
        llm=llm,
        memory=memory,
        verbose=True,
        max_iterations=3  # Limit iterations to avoid long processing
    )
    
    print("Agent initialized successfully!")
    AGENT_READY = True
    
except Exception as e:
    print(f"Failed to initialize agent: {str(e)}")
    AGENT_READY = False
    agent = None

def process_single_question(question_text: str) -> str:
    """Process a single GAIA question through the agent"""
    if not AGENT_READY:
        return "❌ Agent not ready. Please check the logs for initialization errors."
    
    try:
        # Add instruction to give direct answers only
        enhanced_prompt = f"""
        Answer the following question directly and concisely. Do not include "FINAL ANSWER" or any other prefixes in your response. Just provide the answer.
        
        Question: {question_text}
        """
        
        response = agent.query(enhanced_prompt)
        
        # Clean the response to ensure it's just the answer
        answer = str(response).strip()
        
        # Remove common prefixes that might appear
        prefixes_to_remove = ["FINAL ANSWER:", "Answer:", "The answer is:", "Final answer:"]
        for prefix in prefixes_to_remove:
            if answer.startswith(prefix):
                answer = answer[len(prefix):].strip()
        
        return answer
        
    except Exception as e:
        return f"❌ Error processing question: {str(e)}"

def process_all_questions() -> str:
    """Process all GAIA questions and prepare answers for submission"""
    if not AGENT_READY:
        return "❌ Agent not ready. Cannot process questions."
    
    try:
        questions = GaiaAPI.get_questions()
        processed_answers = []
        
        for i, question in enumerate(questions):
            print(f"Processing question {i+1}/{len(questions)}: {question['task_id']}")
            
            answer = process_single_question(question['question'])
            
            processed_answers.append({
                "task_id": question['task_id'],
                "submitted_answer": answer
            })
        
        # Save answers to file for review
        with open("gaia_answers.json", "w") as f:
            json.dump(processed_answers, f, indent=2)
        
        summary = f"✅ Processed {len(processed_answers)} questions.\n"
        summary += f"Answers saved to gaia_answers.json\n"
        summary += f"First 3 answers:\n"
        
        for ans in processed_answers[:3]:
            summary += f"- {ans['task_id']}: {ans['submitted_answer'][:50]}...\n"
        
        return summary
        
    except Exception as e:
        return f"❌ Error processing all questions: {str(e)}"

def submit_to_gaia(username: str, code_url: str) -> str:
    """Submit answers to GAIA benchmark"""
    if not AGENT_READY:
        return "❌ Agent not ready. Cannot submit."
    
    if not username or not code_url:
        return "❌ Please provide both username and code URL."
    
    try:
        # Load processed answers
        try:
            with open("gaia_answers.json", "r") as f:
                answers = json.load(f)
        except FileNotFoundError:
            return "❌ No processed answers found. Please process questions first."
        
        # Submit to GAIA
        result = GaiaAPI.submit_answers(username, code_url, answers)
        
        if "error" in result:
            return f"❌ Submission failed: {result['error']}"
        
        score = result.get('score', 'Unknown')
        return f"✅ Submission successful!\n📊 Score: {score}\n🎯 Check the leaderboard for your ranking!"
        
    except Exception as e:
        return f"❌ Submission error: {str(e)}"

def get_sample_question() -> str:
    """Load a sample question for testing"""
    try:
        question = GaiaAPI.get_random_question()
        return question['question']
    except Exception as e:
        return f"Error loading sample question: {str(e)}"

# Create Gradio interface
with gr.Blocks(title="🦙 GAIA LlamaIndex Agent") as demo:
    gr.Markdown("""
    # 🦙 GAIA Benchmark Agent with LlamaIndex
    
    This agent uses LlamaIndex with a local LLM to tackle GAIA benchmark questions.
    
    **Status:** {"✅ Ready" if AGENT_READY else "❌ Not Ready"}
    """)
    
    with gr.Tab("🔬 Test Single Question"):
        gr.Markdown("Test the agent with individual questions")
        
        with gr.Row():
            with gr.Column():
                question_input = gr.Textbox(
                    label="Question",
                    placeholder="Enter a GAIA question or click 'Load Sample'",
                    lines=3
                )
                with gr.Row():
                    sample_btn = gr.Button("🎲 Load Sample Question")
                    process_btn = gr.Button("🚀 Process Question", variant="primary")
            
            with gr.Column():
                answer_output = gr.Textbox(
                    label="Agent Answer",
                    lines=5,
                    interactive=False
                )
        
        sample_btn.click(get_sample_question, outputs=question_input)
        process_btn.click(process_single_question, inputs=question_input, outputs=answer_output)
    
    with gr.Tab("📊 Full Evaluation"):
        gr.Markdown("Process all GAIA questions and prepare for submission")
        
        with gr.Row():
            process_all_btn = gr.Button("🔄 Process All Questions", variant="primary")
            
        processing_output = gr.Textbox(
            label="Processing Status",
            lines=10,
            interactive=False
        )
        
        process_all_btn.click(process_all_questions, outputs=processing_output)
    
    with gr.Tab("🏆 Submit to GAIA"):
        gr.Markdown("""
        Submit your processed answers to the GAIA benchmark for official scoring.
        
        **Requirements:**
        1. Your Hugging Face username
        2. Link to your Space code (e.g., `https://huggingface.co/spaces/YOUR_USERNAME/gaia-llamaindex-agent/tree/main`)
        3. Questions must be processed first in the "Full Evaluation" tab
        """)
        
        with gr.Row():
            with gr.Column():
                username_input = gr.Textbox(
                    label="HF Username",
                    placeholder="your-username"
                )
                code_url_input = gr.Textbox(
                    label="Space Code URL",
                    placeholder="https://huggingface.co/spaces/your-username/gaia-llamaindex-agent/tree/main"
                )
                submit_btn = gr.Button("🎯 Submit to GAIA", variant="primary")
            
            with gr.Column():
                submission_output = gr.Textbox(
                    label="Submission Result",
                    lines=5,
                    interactive=False
                )
        
        submit_btn.click(
            submit_to_gaia,
            inputs=[username_input, code_url_input],
            outputs=submission_output
        )
    
    with gr.Tab("ℹ️ Info"):
        gr.Markdown("""
        ## About This Agent
        
        This agent combines:
        - **LlamaIndex**: For orchestrating the agent workflow
        - **Local LLM**: Running entirely on Hugging Face Spaces  
        - **ReAct Framework**: For reasoning and acting iteratively
        - **GAIA Tools**: Web search, calculation, file reading, etc.
        
        ## Usage Tips
        
        1. **Start with single questions** to test the agent
        2. **Process all questions** when ready for full evaluation
        3. **Submit to GAIA** for official scoring
        
        ## Troubleshooting
        
        - If agent fails to initialize, check the model loading
        - For memory issues, try restarting the Space
        - For API errors, verify the GAIA endpoint URL
        """)

if __name__ == "__main__":
    demo.launch(show_error=True)