Spaces:
Sleeping
Sleeping
Fix GAIA API
Browse files- agent/local_llm.py +18 -7
- agent/tools.py +71 -42
- app.py +88 -114
- utils/__init__.py +0 -0
- utils/gaia_api.py +3 -3
agent/local_llm.py
CHANGED
@@ -1,19 +1,28 @@
|
|
1 |
-
# agent/local_llm.py
|
2 |
-
|
3 |
-
from
|
4 |
-
import
|
|
|
|
|
|
|
|
|
5 |
|
6 |
class LocalLLM:
|
7 |
def __init__(self):
|
8 |
# Use smaller model that works reliably
|
9 |
-
self.model_name = "microsoft/DialoGPT-medium"
|
|
|
10 |
self.llm = self._create_llama_index_llm()
|
11 |
|
12 |
def _create_llama_index_llm(self):
|
13 |
"""Create LlamaIndex compatible LLM"""
|
14 |
try:
|
15 |
-
|
16 |
tokenizer = AutoTokenizer.from_pretrained(self.model_name)
|
|
|
|
|
|
|
|
|
17 |
model = AutoModelForCausalLM.from_pretrained(
|
18 |
self.model_name,
|
19 |
torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
|
@@ -21,7 +30,7 @@ class LocalLLM:
|
|
21 |
low_cpu_mem_usage=True
|
22 |
)
|
23 |
|
24 |
-
|
25 |
llm = HuggingFaceLLM(
|
26 |
model=model,
|
27 |
tokenizer=tokenizer,
|
@@ -33,6 +42,7 @@ class LocalLLM:
|
|
33 |
}
|
34 |
)
|
35 |
|
|
|
36 |
return llm
|
37 |
|
38 |
except Exception as e:
|
@@ -42,6 +52,7 @@ class LocalLLM:
|
|
42 |
|
43 |
def _create_fallback_llm(self):
|
44 |
"""Fallback to a very basic model"""
|
|
|
45 |
model_name = "gpt2"
|
46 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
47 |
tokenizer.pad_token = tokenizer.eos_token
|
|
|
1 |
+
# File: agent/local_llm.py
|
2 |
+
try:
|
3 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer
|
4 |
+
from llama_index.llms.huggingface import HuggingFaceLLM
|
5 |
+
import torch
|
6 |
+
except ImportError as e:
|
7 |
+
print(f"Import error in local_llm.py: {e}")
|
8 |
+
raise
|
9 |
|
10 |
class LocalLLM:
|
11 |
def __init__(self):
|
12 |
# Use smaller model that works reliably
|
13 |
+
self.model_name = "microsoft/DialoGPT-medium"
|
14 |
+
print(f"Initializing LocalLLM with model: {self.model_name}")
|
15 |
self.llm = self._create_llama_index_llm()
|
16 |
|
17 |
def _create_llama_index_llm(self):
|
18 |
"""Create LlamaIndex compatible LLM"""
|
19 |
try:
|
20 |
+
print("Loading tokenizer...")
|
21 |
tokenizer = AutoTokenizer.from_pretrained(self.model_name)
|
22 |
+
if tokenizer.pad_token is None:
|
23 |
+
tokenizer.pad_token = tokenizer.eos_token
|
24 |
+
|
25 |
+
print("Loading model...")
|
26 |
model = AutoModelForCausalLM.from_pretrained(
|
27 |
self.model_name,
|
28 |
torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
|
|
|
30 |
low_cpu_mem_usage=True
|
31 |
)
|
32 |
|
33 |
+
print("Creating LlamaIndex LLM...")
|
34 |
llm = HuggingFaceLLM(
|
35 |
model=model,
|
36 |
tokenizer=tokenizer,
|
|
|
42 |
}
|
43 |
)
|
44 |
|
45 |
+
print("LLM created successfully!")
|
46 |
return llm
|
47 |
|
48 |
except Exception as e:
|
|
|
52 |
|
53 |
def _create_fallback_llm(self):
|
54 |
"""Fallback to a very basic model"""
|
55 |
+
print("Using fallback model: gpt2")
|
56 |
model_name = "gpt2"
|
57 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
58 |
tokenizer.pad_token = tokenizer.eos_token
|
agent/tools.py
CHANGED
@@ -1,22 +1,47 @@
|
|
1 |
-
# agent/tools.py
|
2 |
-
|
3 |
-
from
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4 |
import requests
|
5 |
from typing import Optional
|
6 |
import json
|
7 |
|
8 |
def get_gaia_questions() -> str:
|
9 |
"""Fetch all GAIA benchmark questions for reference"""
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
|
|
|
|
|
|
15 |
|
16 |
def get_random_gaia_question() -> str:
|
17 |
"""Get a single random GAIA question to work on"""
|
18 |
-
|
19 |
-
|
|
|
|
|
|
|
20 |
|
21 |
def search_web(query: str) -> str:
|
22 |
"""Search the web for information (mock implementation)"""
|
@@ -54,35 +79,39 @@ def get_current_info(topic: str) -> str:
|
|
54 |
return f"Current information about '{topic}': This is a mock response. In a real implementation, this would fetch current information from reliable sources."
|
55 |
|
56 |
# Create the tools list for the agent
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
|
|
|
|
|
|
|
|
|
1 |
+
# File: agent/tools.py
|
2 |
+
try:
|
3 |
+
from llama_index.core.tools import FunctionTool
|
4 |
+
import sys
|
5 |
+
import os
|
6 |
+
|
7 |
+
# Add the parent directory to the path so we can import utils
|
8 |
+
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
9 |
+
|
10 |
+
from utils.gaia_api import GaiaAPI
|
11 |
+
except ImportError as e:
|
12 |
+
print(f"Import error in tools.py: {e}")
|
13 |
+
# Create a fallback GaiaAPI class if import fails
|
14 |
+
class GaiaAPI:
|
15 |
+
@classmethod
|
16 |
+
def get_questions(cls):
|
17 |
+
return [{"task_id": "fallback", "question": "What is 2+2?"}]
|
18 |
+
|
19 |
+
@classmethod
|
20 |
+
def get_random_question(cls):
|
21 |
+
return {"task_id": "fallback", "question": "What is 2+2?"}
|
22 |
+
|
23 |
import requests
|
24 |
from typing import Optional
|
25 |
import json
|
26 |
|
27 |
def get_gaia_questions() -> str:
|
28 |
"""Fetch all GAIA benchmark questions for reference"""
|
29 |
+
try:
|
30 |
+
questions = GaiaAPI.get_questions()
|
31 |
+
result = "Available GAIA Questions:\n"
|
32 |
+
for q in questions[:5]: # Show first 5 questions
|
33 |
+
result += f"ID: {q['task_id']} - {q['question'][:100]}...\n"
|
34 |
+
return result
|
35 |
+
except Exception as e:
|
36 |
+
return f"Error fetching questions: {str(e)}"
|
37 |
|
38 |
def get_random_gaia_question() -> str:
|
39 |
"""Get a single random GAIA question to work on"""
|
40 |
+
try:
|
41 |
+
question = GaiaAPI.get_random_question()
|
42 |
+
return f"Task ID: {question['task_id']}\nQuestion: {question['question']}"
|
43 |
+
except Exception as e:
|
44 |
+
return f"Error getting random question: {str(e)}"
|
45 |
|
46 |
def search_web(query: str) -> str:
|
47 |
"""Search the web for information (mock implementation)"""
|
|
|
79 |
return f"Current information about '{topic}': This is a mock response. In a real implementation, this would fetch current information from reliable sources."
|
80 |
|
81 |
# Create the tools list for the agent
|
82 |
+
try:
|
83 |
+
gaia_tools = [
|
84 |
+
FunctionTool.from_defaults(
|
85 |
+
fn=get_gaia_questions,
|
86 |
+
name="get_gaia_questions",
|
87 |
+
description="Fetch all available GAIA benchmark questions"
|
88 |
+
),
|
89 |
+
FunctionTool.from_defaults(
|
90 |
+
fn=get_random_gaia_question,
|
91 |
+
name="get_random_question",
|
92 |
+
description="Get a single random GAIA question to work on"
|
93 |
+
),
|
94 |
+
FunctionTool.from_defaults(
|
95 |
+
fn=search_web,
|
96 |
+
name="search_web",
|
97 |
+
description="Search the web for information about a topic"
|
98 |
+
),
|
99 |
+
FunctionTool.from_defaults(
|
100 |
+
fn=calculate,
|
101 |
+
name="calculate",
|
102 |
+
description="Perform mathematical calculations safely"
|
103 |
+
),
|
104 |
+
FunctionTool.from_defaults(
|
105 |
+
fn=read_file_content,
|
106 |
+
name="read_file",
|
107 |
+
description="Read content from a file associated with GAIA tasks"
|
108 |
+
),
|
109 |
+
FunctionTool.from_defaults(
|
110 |
+
fn=get_current_info,
|
111 |
+
name="get_current_info",
|
112 |
+
description="Get current information about a specific topic"
|
113 |
+
)
|
114 |
+
]
|
115 |
+
except Exception as e:
|
116 |
+
print(f"Error creating tools: {e}")
|
117 |
+
gaia_tools = []
|
app.py
CHANGED
@@ -1,22 +1,34 @@
|
|
1 |
-
# app.py
|
2 |
import gradio as gr
|
3 |
import os
|
4 |
-
|
5 |
import json
|
|
|
|
|
|
|
|
|
6 |
|
7 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
try:
|
|
|
9 |
from agent.local_llm import LocalLLM
|
10 |
from agent.tools import gaia_tools
|
11 |
-
from utils.gaia_api import GaiaAPI
|
12 |
from llama_index.core.agent import ReActAgent
|
13 |
from llama_index.core.memory import ChatMemoryBuffer
|
14 |
-
|
|
|
|
|
15 |
# Initialize components
|
16 |
print("Initializing Local LLM...")
|
17 |
local_llm = LocalLLM()
|
18 |
llm = local_llm.get_llm()
|
19 |
-
|
20 |
print("Creating ReAct Agent...")
|
21 |
memory = ChatMemoryBuffer.from_defaults(token_limit=2000)
|
22 |
agent = ReActAgent.from_tools(
|
@@ -24,14 +36,16 @@ try:
|
|
24 |
llm=llm,
|
25 |
memory=memory,
|
26 |
verbose=True,
|
27 |
-
max_iterations=3
|
28 |
)
|
29 |
-
|
30 |
print("Agent initialized successfully!")
|
31 |
AGENT_READY = True
|
32 |
-
|
33 |
except Exception as e:
|
34 |
print(f"Failed to initialize agent: {str(e)}")
|
|
|
|
|
35 |
AGENT_READY = False
|
36 |
agent = None
|
37 |
|
@@ -39,91 +53,81 @@ def process_single_question(question_text: str) -> str:
|
|
39 |
"""Process a single GAIA question through the agent"""
|
40 |
if not AGENT_READY:
|
41 |
return "β Agent not ready. Please check the logs for initialization errors."
|
42 |
-
|
|
|
|
|
|
|
43 |
try:
|
44 |
-
# Add instruction to give direct answers only
|
45 |
enhanced_prompt = f"""
|
46 |
Answer the following question directly and concisely. Do not include "FINAL ANSWER" or any other prefixes in your response. Just provide the answer.
|
47 |
-
|
48 |
Question: {question_text}
|
49 |
"""
|
50 |
-
|
51 |
response = agent.query(enhanced_prompt)
|
52 |
-
|
53 |
-
# Clean the response to ensure it's just the answer
|
54 |
answer = str(response).strip()
|
55 |
-
|
56 |
-
# Remove common prefixes
|
57 |
-
|
58 |
-
for prefix in prefixes_to_remove:
|
59 |
if answer.startswith(prefix):
|
60 |
answer = answer[len(prefix):].strip()
|
61 |
-
|
62 |
return answer
|
63 |
-
|
64 |
except Exception as e:
|
65 |
-
|
|
|
66 |
|
67 |
def process_all_questions() -> str:
|
68 |
"""Process all GAIA questions and prepare answers for submission"""
|
69 |
if not AGENT_READY:
|
70 |
-
return "β Agent not ready.
|
71 |
-
|
72 |
try:
|
73 |
questions = GaiaAPI.get_questions()
|
74 |
processed_answers = []
|
75 |
-
|
76 |
for i, question in enumerate(questions):
|
77 |
print(f"Processing question {i+1}/{len(questions)}: {question['task_id']}")
|
78 |
-
|
79 |
answer = process_single_question(question['question'])
|
80 |
-
|
81 |
processed_answers.append({
|
82 |
"task_id": question['task_id'],
|
83 |
"submitted_answer": answer
|
84 |
})
|
85 |
-
|
86 |
-
# Save answers to file for review
|
87 |
with open("gaia_answers.json", "w") as f:
|
88 |
json.dump(processed_answers, f, indent=2)
|
89 |
-
|
90 |
summary = f"β
Processed {len(processed_answers)} questions.\n"
|
91 |
-
summary +=
|
92 |
-
summary += f"First 3 answers:\n"
|
93 |
-
|
94 |
for ans in processed_answers[:3]:
|
95 |
summary += f"- {ans['task_id']}: {ans['submitted_answer'][:50]}...\n"
|
96 |
-
|
97 |
return summary
|
98 |
-
|
99 |
except Exception as e:
|
100 |
-
|
|
|
101 |
|
102 |
def submit_to_gaia(username: str, code_url: str) -> str:
|
103 |
"""Submit answers to GAIA benchmark"""
|
104 |
if not AGENT_READY:
|
105 |
-
return "β Agent not ready.
|
106 |
-
|
107 |
if not username or not code_url:
|
108 |
return "β Please provide both username and code URL."
|
109 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
110 |
try:
|
111 |
-
# Load processed answers
|
112 |
-
try:
|
113 |
-
with open("gaia_answers.json", "r") as f:
|
114 |
-
answers = json.load(f)
|
115 |
-
except FileNotFoundError:
|
116 |
-
return "β No processed answers found. Please process questions first."
|
117 |
-
|
118 |
-
# Submit to GAIA
|
119 |
result = GaiaAPI.submit_answers(username, code_url, answers)
|
120 |
-
|
121 |
if "error" in result:
|
122 |
return f"β Submission failed: {result['error']}"
|
123 |
-
|
124 |
-
|
125 |
-
return f"β
Submission successful!\nπ Score: {score}\nπ― Check the leaderboard for your ranking!"
|
126 |
-
|
127 |
except Exception as e:
|
128 |
return f"β Submission error: {str(e)}"
|
129 |
|
@@ -135,19 +139,19 @@ def get_sample_question() -> str:
|
|
135 |
except Exception as e:
|
136 |
return f"Error loading sample question: {str(e)}"
|
137 |
|
138 |
-
#
|
139 |
with gr.Blocks(title="π¦ GAIA LlamaIndex Agent") as demo:
|
140 |
-
gr.Markdown("""
|
141 |
# π¦ GAIA Benchmark Agent with LlamaIndex
|
142 |
|
143 |
This agent uses LlamaIndex with a local LLM to tackle GAIA benchmark questions.
|
144 |
|
145 |
**Status:** {"β
Ready" if AGENT_READY else "β Not Ready"}
|
146 |
""")
|
147 |
-
|
148 |
with gr.Tab("π¬ Test Single Question"):
|
149 |
gr.Markdown("Test the agent with individual questions")
|
150 |
-
|
151 |
with gr.Row():
|
152 |
with gr.Column():
|
153 |
question_input = gr.Textbox(
|
@@ -158,88 +162,58 @@ with gr.Blocks(title="π¦ GAIA LlamaIndex Agent") as demo:
|
|
158 |
with gr.Row():
|
159 |
sample_btn = gr.Button("π² Load Sample Question")
|
160 |
process_btn = gr.Button("π Process Question", variant="primary")
|
161 |
-
|
162 |
with gr.Column():
|
163 |
answer_output = gr.Textbox(
|
164 |
label="Agent Answer",
|
165 |
lines=5,
|
166 |
interactive=False
|
167 |
)
|
168 |
-
|
169 |
sample_btn.click(get_sample_question, outputs=question_input)
|
170 |
process_btn.click(process_single_question, inputs=question_input, outputs=answer_output)
|
171 |
-
|
172 |
with gr.Tab("π Full Evaluation"):
|
173 |
gr.Markdown("Process all GAIA questions and prepare for submission")
|
174 |
-
|
175 |
-
|
176 |
-
|
177 |
-
|
178 |
-
processing_output = gr.Textbox(
|
179 |
-
label="Processing Status",
|
180 |
-
lines=10,
|
181 |
-
interactive=False
|
182 |
-
)
|
183 |
-
|
184 |
process_all_btn.click(process_all_questions, outputs=processing_output)
|
185 |
-
|
186 |
with gr.Tab("π Submit to GAIA"):
|
187 |
gr.Markdown("""
|
188 |
Submit your processed answers to the GAIA benchmark for official scoring.
|
189 |
-
|
190 |
**Requirements:**
|
191 |
1. Your Hugging Face username
|
192 |
-
2. Link to your Space code (e.g.,
|
193 |
-
3. Questions must be processed first in the "Full Evaluation" tab
|
194 |
""")
|
195 |
-
|
196 |
with gr.Row():
|
197 |
with gr.Column():
|
198 |
-
username_input = gr.Textbox(
|
199 |
-
|
200 |
-
placeholder="your-username"
|
201 |
-
)
|
202 |
-
code_url_input = gr.Textbox(
|
203 |
-
label="Space Code URL",
|
204 |
-
placeholder="https://huggingface.co/spaces/your-username/gaia-llamaindex-agent/tree/main"
|
205 |
-
)
|
206 |
submit_btn = gr.Button("π― Submit to GAIA", variant="primary")
|
207 |
-
|
208 |
with gr.Column():
|
209 |
-
submission_output = gr.Textbox(
|
210 |
-
|
211 |
-
|
212 |
-
|
213 |
-
|
214 |
-
|
215 |
-
submit_btn.click(
|
216 |
-
submit_to_gaia,
|
217 |
-
inputs=[username_input, code_url_input],
|
218 |
-
outputs=submission_output
|
219 |
-
)
|
220 |
-
|
221 |
-
with gr.Tab("βΉοΈ Info"):
|
222 |
-
gr.Markdown("""
|
223 |
## About This Agent
|
224 |
|
225 |
-
This agent
|
226 |
-
- **LlamaIndex
|
227 |
-
- **Local LLM
|
228 |
-
- **
|
229 |
-
- **GAIA Tools**: Web search, calculation, file reading, etc.
|
230 |
-
|
231 |
-
## Usage Tips
|
232 |
-
|
233 |
-
1. **Start with single questions** to test the agent
|
234 |
-
2. **Process all questions** when ready for full evaluation
|
235 |
-
3. **Submit to GAIA** for official scoring
|
236 |
-
|
237 |
-
## Troubleshooting
|
238 |
|
239 |
-
|
240 |
-
-
|
241 |
-
-
|
242 |
""")
|
243 |
|
244 |
if __name__ == "__main__":
|
245 |
-
demo.launch(show_error=True)
|
|
|
|
|
1 |
import gradio as gr
|
2 |
import os
|
3 |
+
import sys
|
4 |
import json
|
5 |
+
from typing import List, Dict
|
6 |
+
|
7 |
+
# Add the current directory to Python path
|
8 |
+
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
|
9 |
|
10 |
+
# β
Ensure GaiaAPI is imported at the top level
|
11 |
+
from utils.gaia_api import GaiaAPI
|
12 |
+
|
13 |
+
# Initialize variables
|
14 |
+
AGENT_READY = False
|
15 |
+
agent = None
|
16 |
+
|
17 |
+
# Import other agent modules
|
18 |
try:
|
19 |
+
print("Importing modules...")
|
20 |
from agent.local_llm import LocalLLM
|
21 |
from agent.tools import gaia_tools
|
|
|
22 |
from llama_index.core.agent import ReActAgent
|
23 |
from llama_index.core.memory import ChatMemoryBuffer
|
24 |
+
|
25 |
+
print("All imports successful!")
|
26 |
+
|
27 |
# Initialize components
|
28 |
print("Initializing Local LLM...")
|
29 |
local_llm = LocalLLM()
|
30 |
llm = local_llm.get_llm()
|
31 |
+
|
32 |
print("Creating ReAct Agent...")
|
33 |
memory = ChatMemoryBuffer.from_defaults(token_limit=2000)
|
34 |
agent = ReActAgent.from_tools(
|
|
|
36 |
llm=llm,
|
37 |
memory=memory,
|
38 |
verbose=True,
|
39 |
+
max_iterations=3
|
40 |
)
|
41 |
+
|
42 |
print("Agent initialized successfully!")
|
43 |
AGENT_READY = True
|
44 |
+
|
45 |
except Exception as e:
|
46 |
print(f"Failed to initialize agent: {str(e)}")
|
47 |
+
import traceback
|
48 |
+
traceback.print_exc()
|
49 |
AGENT_READY = False
|
50 |
agent = None
|
51 |
|
|
|
53 |
"""Process a single GAIA question through the agent"""
|
54 |
if not AGENT_READY:
|
55 |
return "β Agent not ready. Please check the logs for initialization errors."
|
56 |
+
|
57 |
+
if not question_text.strip():
|
58 |
+
return "β Please enter a question."
|
59 |
+
|
60 |
try:
|
|
|
61 |
enhanced_prompt = f"""
|
62 |
Answer the following question directly and concisely. Do not include "FINAL ANSWER" or any other prefixes in your response. Just provide the answer.
|
63 |
+
|
64 |
Question: {question_text}
|
65 |
"""
|
|
|
66 |
response = agent.query(enhanced_prompt)
|
|
|
|
|
67 |
answer = str(response).strip()
|
68 |
+
|
69 |
+
# Remove common prefixes
|
70 |
+
for prefix in ["FINAL ANSWER:", "Answer:", "The answer is:", "Final answer:"]:
|
|
|
71 |
if answer.startswith(prefix):
|
72 |
answer = answer[len(prefix):].strip()
|
73 |
+
|
74 |
return answer
|
75 |
+
|
76 |
except Exception as e:
|
77 |
+
import traceback
|
78 |
+
return f"β Error: {str(e)}\n\n{traceback.format_exc()}"
|
79 |
|
80 |
def process_all_questions() -> str:
|
81 |
"""Process all GAIA questions and prepare answers for submission"""
|
82 |
if not AGENT_READY:
|
83 |
+
return "β Agent not ready."
|
84 |
+
|
85 |
try:
|
86 |
questions = GaiaAPI.get_questions()
|
87 |
processed_answers = []
|
88 |
+
|
89 |
for i, question in enumerate(questions):
|
90 |
print(f"Processing question {i+1}/{len(questions)}: {question['task_id']}")
|
|
|
91 |
answer = process_single_question(question['question'])
|
|
|
92 |
processed_answers.append({
|
93 |
"task_id": question['task_id'],
|
94 |
"submitted_answer": answer
|
95 |
})
|
96 |
+
|
|
|
97 |
with open("gaia_answers.json", "w") as f:
|
98 |
json.dump(processed_answers, f, indent=2)
|
99 |
+
|
100 |
summary = f"β
Processed {len(processed_answers)} questions.\n"
|
101 |
+
summary += "First 3 answers:\n"
|
|
|
|
|
102 |
for ans in processed_answers[:3]:
|
103 |
summary += f"- {ans['task_id']}: {ans['submitted_answer'][:50]}...\n"
|
104 |
+
|
105 |
return summary
|
106 |
+
|
107 |
except Exception as e:
|
108 |
+
import traceback
|
109 |
+
return f"β Error: {str(e)}\n\n{traceback.format_exc()}"
|
110 |
|
111 |
def submit_to_gaia(username: str, code_url: str) -> str:
|
112 |
"""Submit answers to GAIA benchmark"""
|
113 |
if not AGENT_READY:
|
114 |
+
return "β Agent not ready."
|
115 |
+
|
116 |
if not username or not code_url:
|
117 |
return "β Please provide both username and code URL."
|
118 |
+
|
119 |
+
try:
|
120 |
+
with open("gaia_answers.json", "r") as f:
|
121 |
+
answers = json.load(f)
|
122 |
+
except FileNotFoundError:
|
123 |
+
return "β No processed answers found. Please process them first."
|
124 |
+
|
125 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
126 |
result = GaiaAPI.submit_answers(username, code_url, answers)
|
|
|
127 |
if "error" in result:
|
128 |
return f"β Submission failed: {result['error']}"
|
129 |
+
score = result.get("score", "Unknown")
|
130 |
+
return f"β
Submission successful!\nπ Score: {score}"
|
|
|
|
|
131 |
except Exception as e:
|
132 |
return f"β Submission error: {str(e)}"
|
133 |
|
|
|
139 |
except Exception as e:
|
140 |
return f"Error loading sample question: {str(e)}"
|
141 |
|
142 |
+
# ---------- Gradio UI ----------
|
143 |
with gr.Blocks(title="π¦ GAIA LlamaIndex Agent") as demo:
|
144 |
+
gr.Markdown(f"""
|
145 |
# π¦ GAIA Benchmark Agent with LlamaIndex
|
146 |
|
147 |
This agent uses LlamaIndex with a local LLM to tackle GAIA benchmark questions.
|
148 |
|
149 |
**Status:** {"β
Ready" if AGENT_READY else "β Not Ready"}
|
150 |
""")
|
151 |
+
|
152 |
with gr.Tab("π¬ Test Single Question"):
|
153 |
gr.Markdown("Test the agent with individual questions")
|
154 |
+
|
155 |
with gr.Row():
|
156 |
with gr.Column():
|
157 |
question_input = gr.Textbox(
|
|
|
162 |
with gr.Row():
|
163 |
sample_btn = gr.Button("π² Load Sample Question")
|
164 |
process_btn = gr.Button("π Process Question", variant="primary")
|
165 |
+
|
166 |
with gr.Column():
|
167 |
answer_output = gr.Textbox(
|
168 |
label="Agent Answer",
|
169 |
lines=5,
|
170 |
interactive=False
|
171 |
)
|
172 |
+
|
173 |
sample_btn.click(get_sample_question, outputs=question_input)
|
174 |
process_btn.click(process_single_question, inputs=question_input, outputs=answer_output)
|
175 |
+
|
176 |
with gr.Tab("π Full Evaluation"):
|
177 |
gr.Markdown("Process all GAIA questions and prepare for submission")
|
178 |
+
|
179 |
+
process_all_btn = gr.Button("π Process All Questions", variant="primary")
|
180 |
+
processing_output = gr.Textbox(label="Processing Status", lines=10, interactive=False)
|
181 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
182 |
process_all_btn.click(process_all_questions, outputs=processing_output)
|
183 |
+
|
184 |
with gr.Tab("π Submit to GAIA"):
|
185 |
gr.Markdown("""
|
186 |
Submit your processed answers to the GAIA benchmark for official scoring.
|
187 |
+
|
188 |
**Requirements:**
|
189 |
1. Your Hugging Face username
|
190 |
+
2. Link to your Space code (e.g., https://huggingface.co/spaces/your-username/gaia-agent)
|
|
|
191 |
""")
|
192 |
+
|
193 |
with gr.Row():
|
194 |
with gr.Column():
|
195 |
+
username_input = gr.Textbox(label="HF Username", placeholder="your-username")
|
196 |
+
code_url_input = gr.Textbox(label="Space Code URL", placeholder="https://huggingface.co/spaces/your-username/gaia-agent")
|
|
|
|
|
|
|
|
|
|
|
|
|
197 |
submit_btn = gr.Button("π― Submit to GAIA", variant="primary")
|
198 |
+
|
199 |
with gr.Column():
|
200 |
+
submission_output = gr.Textbox(label="Submission Result", lines=5, interactive=False)
|
201 |
+
|
202 |
+
submit_btn.click(submit_to_gaia, inputs=[username_input, code_url_input], outputs=submission_output)
|
203 |
+
|
204 |
+
with gr.Tab("βΉοΈ Info & Debug"):
|
205 |
+
gr.Markdown(f"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
206 |
## About This Agent
|
207 |
|
208 |
+
This agent uses:
|
209 |
+
- **LlamaIndex** (ReAct Agent + Tools)
|
210 |
+
- **Local LLM** (e.g., DialoGPT or fallback GPT2)
|
211 |
+
- **GAIA Tools** (question fetch, file reader, math, etc.)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
212 |
|
213 |
+
## Current Status
|
214 |
+
- Agent Ready: {"β
Yes" if AGENT_READY else "β No"}
|
215 |
+
- Tools Loaded: {len(gaia_tools) if 'gaia_tools' in globals() else 0}
|
216 |
""")
|
217 |
|
218 |
if __name__ == "__main__":
|
219 |
+
demo.launch(show_error=True)
|
utils/__init__.py
ADDED
File without changes
|
utils/gaia_api.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
# utils/gaia_api.py
|
2 |
import requests
|
3 |
from typing import List, Dict, Optional
|
4 |
import json
|
@@ -7,7 +7,7 @@ class GaiaAPI:
|
|
7 |
"""Client for interacting with GAIA Benchmark API"""
|
8 |
|
9 |
# Fixed API endpoint
|
10 |
-
BASE_URL = "https://agents-course-unit4-scoring.hf.space
|
11 |
|
12 |
@classmethod
|
13 |
def get_questions(cls) -> List[Dict]:
|
@@ -81,4 +81,4 @@ class GaiaAPI:
|
|
81 |
"level": 1,
|
82 |
"final_answer": "JavaScript, Python, PHP"
|
83 |
}
|
84 |
-
]
|
|
|
1 |
+
# File: utils/gaia_api.py
|
2 |
import requests
|
3 |
from typing import List, Dict, Optional
|
4 |
import json
|
|
|
7 |
"""Client for interacting with GAIA Benchmark API"""
|
8 |
|
9 |
# Fixed API endpoint
|
10 |
+
BASE_URL = "https://agents-course-unit4-scoring.hf.space"
|
11 |
|
12 |
@classmethod
|
13 |
def get_questions(cls) -> List[Dict]:
|
|
|
81 |
"level": 1,
|
82 |
"final_answer": "JavaScript, Python, PHP"
|
83 |
}
|
84 |
+
]
|