Spaces:
Sleeping
Sleeping
File size: 8,637 Bytes
580bcf5 8ac5ef4 580bcf5 8ac5ef4 580bcf5 e0860a0 580bcf5 e0860a0 580bcf5 e0860a0 2828102 580bcf5 2828102 8ac5ef4 580bcf5 8ac5ef4 580bcf5 8ac5ef4 580bcf5 8ac5ef4 580bcf5 8ac5ef4 580bcf5 8ac5ef4 580bcf5 8ac5ef4 580bcf5 8ac5ef4 580bcf5 8ac5ef4 580bcf5 8ac5ef4 580bcf5 8ac5ef4 580bcf5 8ac5ef4 580bcf5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 |
# app.py
import gradio as gr
import os
from typing import List, Dict
import json
# Import our modules
try:
from agent.local_llm import LocalLLM
from agent.tools import gaia_tools
from utils.gaia_api import GaiaAPI
from llama_index.core.agent import ReActAgent
from llama_index.core.memory import ChatMemoryBuffer
# Initialize components
print("Initializing Local LLM...")
local_llm = LocalLLM()
llm = local_llm.get_llm()
print("Creating ReAct Agent...")
memory = ChatMemoryBuffer.from_defaults(token_limit=2000)
agent = ReActAgent.from_tools(
tools=gaia_tools,
llm=llm,
memory=memory,
verbose=True,
max_iterations=3 # Limit iterations to avoid long processing
)
print("Agent initialized successfully!")
AGENT_READY = True
except Exception as e:
print(f"Failed to initialize agent: {str(e)}")
AGENT_READY = False
agent = None
def process_single_question(question_text: str) -> str:
"""Process a single GAIA question through the agent"""
if not AGENT_READY:
return "β Agent not ready. Please check the logs for initialization errors."
try:
# Add instruction to give direct answers only
enhanced_prompt = f"""
Answer the following question directly and concisely. Do not include "FINAL ANSWER" or any other prefixes in your response. Just provide the answer.
Question: {question_text}
"""
response = agent.query(enhanced_prompt)
# Clean the response to ensure it's just the answer
answer = str(response).strip()
# Remove common prefixes that might appear
prefixes_to_remove = ["FINAL ANSWER:", "Answer:", "The answer is:", "Final answer:"]
for prefix in prefixes_to_remove:
if answer.startswith(prefix):
answer = answer[len(prefix):].strip()
return answer
except Exception as e:
return f"β Error processing question: {str(e)}"
def process_all_questions() -> str:
"""Process all GAIA questions and prepare answers for submission"""
if not AGENT_READY:
return "β Agent not ready. Cannot process questions."
try:
questions = GaiaAPI.get_questions()
processed_answers = []
for i, question in enumerate(questions):
print(f"Processing question {i+1}/{len(questions)}: {question['task_id']}")
answer = process_single_question(question['question'])
processed_answers.append({
"task_id": question['task_id'],
"submitted_answer": answer
})
# Save answers to file for review
with open("gaia_answers.json", "w") as f:
json.dump(processed_answers, f, indent=2)
summary = f"β
Processed {len(processed_answers)} questions.\n"
summary += f"Answers saved to gaia_answers.json\n"
summary += f"First 3 answers:\n"
for ans in processed_answers[:3]:
summary += f"- {ans['task_id']}: {ans['submitted_answer'][:50]}...\n"
return summary
except Exception as e:
return f"β Error processing all questions: {str(e)}"
def submit_to_gaia(username: str, code_url: str) -> str:
"""Submit answers to GAIA benchmark"""
if not AGENT_READY:
return "β Agent not ready. Cannot submit."
if not username or not code_url:
return "β Please provide both username and code URL."
try:
# Load processed answers
try:
with open("gaia_answers.json", "r") as f:
answers = json.load(f)
except FileNotFoundError:
return "β No processed answers found. Please process questions first."
# Submit to GAIA
result = GaiaAPI.submit_answers(username, code_url, answers)
if "error" in result:
return f"β Submission failed: {result['error']}"
score = result.get('score', 'Unknown')
return f"β
Submission successful!\nπ Score: {score}\nπ― Check the leaderboard for your ranking!"
except Exception as e:
return f"β Submission error: {str(e)}"
def get_sample_question() -> str:
"""Load a sample question for testing"""
try:
question = GaiaAPI.get_random_question()
return question['question']
except Exception as e:
return f"Error loading sample question: {str(e)}"
# Create Gradio interface
with gr.Blocks(title="π¦ GAIA LlamaIndex Agent") as demo:
gr.Markdown("""
# π¦ GAIA Benchmark Agent with LlamaIndex
This agent uses LlamaIndex with a local LLM to tackle GAIA benchmark questions.
**Status:** {"β
Ready" if AGENT_READY else "β Not Ready"}
""")
with gr.Tab("π¬ Test Single Question"):
gr.Markdown("Test the agent with individual questions")
with gr.Row():
with gr.Column():
question_input = gr.Textbox(
label="Question",
placeholder="Enter a GAIA question or click 'Load Sample'",
lines=3
)
with gr.Row():
sample_btn = gr.Button("π² Load Sample Question")
process_btn = gr.Button("π Process Question", variant="primary")
with gr.Column():
answer_output = gr.Textbox(
label="Agent Answer",
lines=5,
interactive=False
)
sample_btn.click(get_sample_question, outputs=question_input)
process_btn.click(process_single_question, inputs=question_input, outputs=answer_output)
with gr.Tab("π Full Evaluation"):
gr.Markdown("Process all GAIA questions and prepare for submission")
with gr.Row():
process_all_btn = gr.Button("π Process All Questions", variant="primary")
processing_output = gr.Textbox(
label="Processing Status",
lines=10,
interactive=False
)
process_all_btn.click(process_all_questions, outputs=processing_output)
with gr.Tab("π Submit to GAIA"):
gr.Markdown("""
Submit your processed answers to the GAIA benchmark for official scoring.
**Requirements:**
1. Your Hugging Face username
2. Link to your Space code (e.g., `https://huggingface.co/spaces/YOUR_USERNAME/gaia-llamaindex-agent/tree/main`)
3. Questions must be processed first in the "Full Evaluation" tab
""")
with gr.Row():
with gr.Column():
username_input = gr.Textbox(
label="HF Username",
placeholder="your-username"
)
code_url_input = gr.Textbox(
label="Space Code URL",
placeholder="https://huggingface.co/spaces/your-username/gaia-llamaindex-agent/tree/main"
)
submit_btn = gr.Button("π― Submit to GAIA", variant="primary")
with gr.Column():
submission_output = gr.Textbox(
label="Submission Result",
lines=5,
interactive=False
)
submit_btn.click(
submit_to_gaia,
inputs=[username_input, code_url_input],
outputs=submission_output
)
with gr.Tab("βΉοΈ Info"):
gr.Markdown("""
## About This Agent
This agent combines:
- **LlamaIndex**: For orchestrating the agent workflow
- **Local LLM**: Running entirely on Hugging Face Spaces
- **ReAct Framework**: For reasoning and acting iteratively
- **GAIA Tools**: Web search, calculation, file reading, etc.
## Usage Tips
1. **Start with single questions** to test the agent
2. **Process all questions** when ready for full evaluation
3. **Submit to GAIA** for official scoring
## Troubleshooting
- If agent fails to initialize, check the model loading
- For memory issues, try restarting the Space
- For API errors, verify the GAIA endpoint URL
""")
if __name__ == "__main__":
demo.launch(show_error=True) |