LamiaYT's picture
Complete GAIA agent with LlamaIndex - fixed all issues
580bcf5
raw
history blame
8.64 kB
# app.py
import gradio as gr
import os
from typing import List, Dict
import json
# Import our modules
try:
from agent.local_llm import LocalLLM
from agent.tools import gaia_tools
from utils.gaia_api import GaiaAPI
from llama_index.core.agent import ReActAgent
from llama_index.core.memory import ChatMemoryBuffer
# Initialize components
print("Initializing Local LLM...")
local_llm = LocalLLM()
llm = local_llm.get_llm()
print("Creating ReAct Agent...")
memory = ChatMemoryBuffer.from_defaults(token_limit=2000)
agent = ReActAgent.from_tools(
tools=gaia_tools,
llm=llm,
memory=memory,
verbose=True,
max_iterations=3 # Limit iterations to avoid long processing
)
print("Agent initialized successfully!")
AGENT_READY = True
except Exception as e:
print(f"Failed to initialize agent: {str(e)}")
AGENT_READY = False
agent = None
def process_single_question(question_text: str) -> str:
"""Process a single GAIA question through the agent"""
if not AGENT_READY:
return "❌ Agent not ready. Please check the logs for initialization errors."
try:
# Add instruction to give direct answers only
enhanced_prompt = f"""
Answer the following question directly and concisely. Do not include "FINAL ANSWER" or any other prefixes in your response. Just provide the answer.
Question: {question_text}
"""
response = agent.query(enhanced_prompt)
# Clean the response to ensure it's just the answer
answer = str(response).strip()
# Remove common prefixes that might appear
prefixes_to_remove = ["FINAL ANSWER:", "Answer:", "The answer is:", "Final answer:"]
for prefix in prefixes_to_remove:
if answer.startswith(prefix):
answer = answer[len(prefix):].strip()
return answer
except Exception as e:
return f"❌ Error processing question: {str(e)}"
def process_all_questions() -> str:
"""Process all GAIA questions and prepare answers for submission"""
if not AGENT_READY:
return "❌ Agent not ready. Cannot process questions."
try:
questions = GaiaAPI.get_questions()
processed_answers = []
for i, question in enumerate(questions):
print(f"Processing question {i+1}/{len(questions)}: {question['task_id']}")
answer = process_single_question(question['question'])
processed_answers.append({
"task_id": question['task_id'],
"submitted_answer": answer
})
# Save answers to file for review
with open("gaia_answers.json", "w") as f:
json.dump(processed_answers, f, indent=2)
summary = f"βœ… Processed {len(processed_answers)} questions.\n"
summary += f"Answers saved to gaia_answers.json\n"
summary += f"First 3 answers:\n"
for ans in processed_answers[:3]:
summary += f"- {ans['task_id']}: {ans['submitted_answer'][:50]}...\n"
return summary
except Exception as e:
return f"❌ Error processing all questions: {str(e)}"
def submit_to_gaia(username: str, code_url: str) -> str:
"""Submit answers to GAIA benchmark"""
if not AGENT_READY:
return "❌ Agent not ready. Cannot submit."
if not username or not code_url:
return "❌ Please provide both username and code URL."
try:
# Load processed answers
try:
with open("gaia_answers.json", "r") as f:
answers = json.load(f)
except FileNotFoundError:
return "❌ No processed answers found. Please process questions first."
# Submit to GAIA
result = GaiaAPI.submit_answers(username, code_url, answers)
if "error" in result:
return f"❌ Submission failed: {result['error']}"
score = result.get('score', 'Unknown')
return f"βœ… Submission successful!\nπŸ“Š Score: {score}\n🎯 Check the leaderboard for your ranking!"
except Exception as e:
return f"❌ Submission error: {str(e)}"
def get_sample_question() -> str:
"""Load a sample question for testing"""
try:
question = GaiaAPI.get_random_question()
return question['question']
except Exception as e:
return f"Error loading sample question: {str(e)}"
# Create Gradio interface
with gr.Blocks(title="πŸ¦™ GAIA LlamaIndex Agent") as demo:
gr.Markdown("""
# πŸ¦™ GAIA Benchmark Agent with LlamaIndex
This agent uses LlamaIndex with a local LLM to tackle GAIA benchmark questions.
**Status:** {"βœ… Ready" if AGENT_READY else "❌ Not Ready"}
""")
with gr.Tab("πŸ”¬ Test Single Question"):
gr.Markdown("Test the agent with individual questions")
with gr.Row():
with gr.Column():
question_input = gr.Textbox(
label="Question",
placeholder="Enter a GAIA question or click 'Load Sample'",
lines=3
)
with gr.Row():
sample_btn = gr.Button("🎲 Load Sample Question")
process_btn = gr.Button("πŸš€ Process Question", variant="primary")
with gr.Column():
answer_output = gr.Textbox(
label="Agent Answer",
lines=5,
interactive=False
)
sample_btn.click(get_sample_question, outputs=question_input)
process_btn.click(process_single_question, inputs=question_input, outputs=answer_output)
with gr.Tab("πŸ“Š Full Evaluation"):
gr.Markdown("Process all GAIA questions and prepare for submission")
with gr.Row():
process_all_btn = gr.Button("πŸ”„ Process All Questions", variant="primary")
processing_output = gr.Textbox(
label="Processing Status",
lines=10,
interactive=False
)
process_all_btn.click(process_all_questions, outputs=processing_output)
with gr.Tab("πŸ† Submit to GAIA"):
gr.Markdown("""
Submit your processed answers to the GAIA benchmark for official scoring.
**Requirements:**
1. Your Hugging Face username
2. Link to your Space code (e.g., `https://huggingface.co/spaces/YOUR_USERNAME/gaia-llamaindex-agent/tree/main`)
3. Questions must be processed first in the "Full Evaluation" tab
""")
with gr.Row():
with gr.Column():
username_input = gr.Textbox(
label="HF Username",
placeholder="your-username"
)
code_url_input = gr.Textbox(
label="Space Code URL",
placeholder="https://huggingface.co/spaces/your-username/gaia-llamaindex-agent/tree/main"
)
submit_btn = gr.Button("🎯 Submit to GAIA", variant="primary")
with gr.Column():
submission_output = gr.Textbox(
label="Submission Result",
lines=5,
interactive=False
)
submit_btn.click(
submit_to_gaia,
inputs=[username_input, code_url_input],
outputs=submission_output
)
with gr.Tab("ℹ️ Info"):
gr.Markdown("""
## About This Agent
This agent combines:
- **LlamaIndex**: For orchestrating the agent workflow
- **Local LLM**: Running entirely on Hugging Face Spaces
- **ReAct Framework**: For reasoning and acting iteratively
- **GAIA Tools**: Web search, calculation, file reading, etc.
## Usage Tips
1. **Start with single questions** to test the agent
2. **Process all questions** when ready for full evaluation
3. **Submit to GAIA** for official scoring
## Troubleshooting
- If agent fails to initialize, check the model loading
- For memory issues, try restarting the Space
- For API errors, verify the GAIA endpoint URL
""")
if __name__ == "__main__":
demo.launch(show_error=True)