Spaces:
Runtime error
Runtime error
Deploy GAIA agent
Browse files
app.py
CHANGED
@@ -1,353 +1,59 @@
|
|
|
|
|
|
1 |
import os
|
2 |
import gradio as gr
|
3 |
import requests
|
4 |
import inspect
|
5 |
import pandas as pd
|
6 |
-
import json
|
7 |
-
import re
|
8 |
-
import io
|
9 |
-
import base64
|
10 |
-
from PIL import Image
|
11 |
-
import matplotlib.pyplot as plt
|
12 |
-
import numpy as np
|
13 |
-
from pathlib import Path
|
14 |
-
from duckduckgo_search import DDGS
|
15 |
|
16 |
-
#
|
17 |
-
from smolagents import CodeAgent,
|
18 |
-
from smolagents.models import LiteLLMModel
|
19 |
|
20 |
# --- Constants ---
|
21 |
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
22 |
|
23 |
-
# --- Enhanced
|
24 |
-
|
25 |
-
from smolagents import tool
|
26 |
-
from duckduckgo_search import DDGS
|
27 |
-
|
28 |
-
@tool
|
29 |
-
def web_search_tool(query: str) -> str:
|
30 |
-
"""
|
31 |
-
Perform a web search using DuckDuckGo and return top results.
|
32 |
-
|
33 |
-
Args:
|
34 |
-
query (str): Search query.
|
35 |
-
|
36 |
-
Returns:
|
37 |
-
str: Formatted search result string.
|
38 |
-
"""
|
39 |
-
try:
|
40 |
-
with DDGS() as ddgs:
|
41 |
-
results = ddgs.text(query, max_results=3)
|
42 |
-
output = []
|
43 |
-
for r in results:
|
44 |
-
output.append(f"Title: {r['title']}\nURL: {r['href']}\nSnippet: {r['body']}")
|
45 |
-
return "\n\n".join(output)
|
46 |
-
except Exception as e:
|
47 |
-
return f"Web search failed: {str(e)}"
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
@tool
|
52 |
-
def calculator_tool(expression: str) -> str:
|
53 |
-
"""
|
54 |
-
Evaluate mathematical expressions safely.
|
55 |
-
Args:
|
56 |
-
expression: Mathematical expression as string
|
57 |
-
Returns:
|
58 |
-
Result of the calculation
|
59 |
-
"""
|
60 |
-
try:
|
61 |
-
# Safe evaluation - only allow basic math operations
|
62 |
-
allowed_chars = set('0123456789+-*/.() ')
|
63 |
-
if not all(c in allowed_chars for c in expression.replace(' ', '')):
|
64 |
-
return "Error: Expression contains invalid characters"
|
65 |
-
|
66 |
-
result = eval(expression)
|
67 |
-
return str(result)
|
68 |
-
except Exception as e:
|
69 |
-
return f"Calculation error: {str(e)}"
|
70 |
-
|
71 |
-
@tool
|
72 |
-
def image_analyzer_tool(image_path: str) -> str:
|
73 |
-
"""
|
74 |
-
Analyze images and extract information.
|
75 |
-
Args:
|
76 |
-
image_path: Path to the image file
|
77 |
-
Returns:
|
78 |
-
Description of image content
|
79 |
-
"""
|
80 |
-
try:
|
81 |
-
if not os.path.exists(image_path):
|
82 |
-
return "Error: Image file not found"
|
83 |
-
|
84 |
-
img = Image.open(image_path)
|
85 |
-
|
86 |
-
# Basic image analysis
|
87 |
-
width, height = img.size
|
88 |
-
mode = img.mode
|
89 |
-
format_info = img.format if img.format else "Unknown"
|
90 |
-
|
91 |
-
# Simple color analysis
|
92 |
-
if mode == 'RGB':
|
93 |
-
colors = img.getcolors(maxcolors=256*256*256)
|
94 |
-
if colors:
|
95 |
-
dominant_color = max(colors, key=lambda x: x[0])[1]
|
96 |
-
color_info = f"Dominant color: RGB{dominant_color}"
|
97 |
-
else:
|
98 |
-
color_info = "Complex color palette"
|
99 |
-
else:
|
100 |
-
color_info = f"Color mode: {mode}"
|
101 |
-
|
102 |
-
analysis = f"""Image Analysis:
|
103 |
-
- Dimensions: {width}x{height} pixels
|
104 |
-
- Format: {format_info}
|
105 |
-
- {color_info}
|
106 |
-
- File size: {os.path.getsize(image_path)} bytes
|
107 |
-
"""
|
108 |
-
return analysis
|
109 |
-
|
110 |
-
except Exception as e:
|
111 |
-
return f"Image analysis error: {str(e)}"
|
112 |
-
|
113 |
-
@tool
|
114 |
-
def file_reader_tool(file_path: str) -> str:
|
115 |
-
"""
|
116 |
-
Read and analyze various file types (text, CSV, JSON, etc.).
|
117 |
-
Args:
|
118 |
-
file_path: Path to the file
|
119 |
-
Returns:
|
120 |
-
File content or analysis
|
121 |
-
"""
|
122 |
-
try:
|
123 |
-
if not os.path.exists(file_path):
|
124 |
-
return "Error: File not found"
|
125 |
-
|
126 |
-
file_ext = Path(file_path).suffix.lower()
|
127 |
-
|
128 |
-
if file_ext == '.csv':
|
129 |
-
df = pd.read_csv(file_path)
|
130 |
-
return f"CSV file with {len(df)} rows and {len(df.columns)} columns.\nColumns: {list(df.columns)}\nFirst 5 rows:\n{df.head().to_string()}"
|
131 |
-
|
132 |
-
elif file_ext == '.json':
|
133 |
-
with open(file_path, 'r', encoding='utf-8') as f:
|
134 |
-
data = json.load(f)
|
135 |
-
return f"JSON file content:\n{json.dumps(data, indent=2)[:1000]}..."
|
136 |
-
|
137 |
-
elif file_ext in ['.txt', '.md', '.py', '.js', '.html', '.css']:
|
138 |
-
with open(file_path, 'r', encoding='utf-8') as f:
|
139 |
-
content = f.read()
|
140 |
-
return f"Text file content ({len(content)} characters):\n{content[:1000]}..."
|
141 |
-
|
142 |
-
else:
|
143 |
-
return f"Binary file: {file_ext}, size: {os.path.getsize(file_path)} bytes"
|
144 |
-
|
145 |
-
except Exception as e:
|
146 |
-
return f"File reading error: {str(e)}"
|
147 |
-
|
148 |
-
@tool
|
149 |
-
def data_processor_tool(data: str, operation: str) -> str:
|
150 |
-
"""
|
151 |
-
Process data with various operations (sort, filter, calculate statistics).
|
152 |
-
Args:
|
153 |
-
data: Data as string (JSON, CSV format, or numbers)
|
154 |
-
operation: Operation to perform (sort, sum, average, count, etc.)
|
155 |
-
Returns:
|
156 |
-
Processed data result
|
157 |
-
"""
|
158 |
-
try:
|
159 |
-
# Try to parse as JSON first
|
160 |
-
try:
|
161 |
-
parsed_data = json.loads(data)
|
162 |
-
except:
|
163 |
-
# Try to parse as numbers
|
164 |
-
try:
|
165 |
-
parsed_data = [float(x.strip()) for x in data.replace(',', ' ').split() if x.strip()]
|
166 |
-
except:
|
167 |
-
return "Error: Could not parse data"
|
168 |
-
|
169 |
-
if operation.lower() == 'sum' and isinstance(parsed_data, list):
|
170 |
-
return str(sum([x for x in parsed_data if isinstance(x, (int, float))]))
|
171 |
-
|
172 |
-
elif operation.lower() == 'average' and isinstance(parsed_data, list):
|
173 |
-
nums = [x for x in parsed_data if isinstance(x, (int, float))]
|
174 |
-
return str(sum(nums) / len(nums) if nums else 0)
|
175 |
-
|
176 |
-
elif operation.lower() == 'count':
|
177 |
-
return str(len(parsed_data))
|
178 |
-
|
179 |
-
elif operation.lower() == 'sort' and isinstance(parsed_data, list):
|
180 |
-
return str(sorted(parsed_data))
|
181 |
-
|
182 |
-
elif operation.lower() == 'max' and isinstance(parsed_data, list):
|
183 |
-
nums = [x for x in parsed_data if isinstance(x, (int, float))]
|
184 |
-
return str(max(nums) if nums else "No numbers found")
|
185 |
-
|
186 |
-
elif operation.lower() == 'min' and isinstance(parsed_data, list):
|
187 |
-
nums = [x for x in parsed_data if isinstance(x, (int, float))]
|
188 |
-
return str(min(nums) if nums else "No numbers found")
|
189 |
-
|
190 |
-
else:
|
191 |
-
return f"Unsupported operation: {operation}"
|
192 |
-
|
193 |
-
except Exception as e:
|
194 |
-
return f"Data processing error: {str(e)}"
|
195 |
-
|
196 |
-
# --- Enhanced GAIA Agent ---
|
197 |
-
class GAIAAgent:
|
198 |
def __init__(self):
|
199 |
-
print("
|
200 |
-
|
201 |
-
# Initialize
|
202 |
-
|
203 |
-
|
204 |
-
|
205 |
-
|
206 |
-
|
207 |
-
|
208 |
-
)
|
209 |
-
except:
|
210 |
-
# Fallback to a basic model
|
211 |
-
print("Warning: Using fallback model configuration")
|
212 |
-
self.model = None
|
213 |
-
|
214 |
-
# Initialize tools
|
215 |
-
self.tools = [
|
216 |
-
web_search_tool,
|
217 |
-
calculator_tool,
|
218 |
-
image_analyzer_tool,
|
219 |
-
file_reader_tool,
|
220 |
-
data_processor_tool,
|
221 |
-
PythonInterpreterTool()
|
222 |
-
]
|
223 |
-
|
224 |
-
# Initialize the agent
|
225 |
-
try:
|
226 |
-
self.agent = CodeAgent(
|
227 |
-
tools=self.tools,
|
228 |
-
model=self.model,
|
229 |
-
verbosity_level=1
|
230 |
-
)
|
231 |
-
except Exception as e:
|
232 |
-
print(f"Agent initialization error: {e}")
|
233 |
-
self.agent = None
|
234 |
-
|
235 |
-
def __call__(self, question: str) -> str:
|
236 |
-
print(f"GAIAAgent processing question: {question[:100]}...")
|
237 |
-
|
238 |
-
if not self.agent:
|
239 |
-
# Fallback logic if agent failed to initialize
|
240 |
-
return self._fallback_processing(question)
|
241 |
-
|
242 |
-
try:
|
243 |
-
# Enhanced prompt for GAIA tasks
|
244 |
-
enhanced_prompt = f"""
|
245 |
-
You are a helpful AI assistant designed to solve complex real-world problems that may require:
|
246 |
-
- Web searching for current information
|
247 |
-
- Mathematical calculations
|
248 |
-
- Image analysis
|
249 |
-
- File processing
|
250 |
-
- Multi-step reasoning
|
251 |
-
|
252 |
-
Question: {question}
|
253 |
-
|
254 |
-
Please approach this systematically:
|
255 |
-
1. Analyze what type of problem this is
|
256 |
-
2. Determine what tools/information you need
|
257 |
-
3. Use available tools to gather information
|
258 |
-
4. Reason through the problem step by step
|
259 |
-
5. Provide a clear, concise final answer
|
260 |
|
261 |
-
|
262 |
-
""
|
263 |
-
|
264 |
-
response = self.agent.run(enhanced_prompt)
|
265 |
-
|
266 |
-
# Extract the final answer if it's in the response
|
267 |
-
if isinstance(response, str):
|
268 |
-
# Look for common answer patterns
|
269 |
-
answer_patterns = [
|
270 |
-
r"Final answer:?\s*(.+)",
|
271 |
-
r"Answer:?\s*(.+)",
|
272 |
-
r"The answer is:?\s*(.+)",
|
273 |
-
r"Result:?\s*(.+)"
|
274 |
-
]
|
275 |
-
|
276 |
-
for pattern in answer_patterns:
|
277 |
-
match = re.search(pattern, response, re.IGNORECASE)
|
278 |
-
if match:
|
279 |
-
return match.group(1).strip()
|
280 |
-
|
281 |
-
# If no pattern found, return the last sentence or the whole response
|
282 |
-
sentences = response.split('.')
|
283 |
-
return sentences[-1].strip() if sentences else response
|
284 |
-
|
285 |
-
return str(response)
|
286 |
-
|
287 |
-
except Exception as e:
|
288 |
-
print(f"Error in agent processing: {e}")
|
289 |
-
return self._fallback_processing(question)
|
290 |
-
|
291 |
-
def _fallback_processing(self, question: str) -> str:
|
292 |
-
"""Fallback processing when main agent fails"""
|
293 |
try:
|
294 |
-
|
295 |
-
|
296 |
-
|
297 |
-
# Math questions
|
298 |
-
if any(op in question for op in ['+', '-', '*', '/', 'calculate', 'sum', 'average']):
|
299 |
-
# Extract numbers and try basic calculation
|
300 |
-
numbers = re.findall(r'-?\d+\.?\d*', question)
|
301 |
-
if len(numbers) >= 2:
|
302 |
-
try:
|
303 |
-
if 'sum' in question_lower or '+' in question:
|
304 |
-
result = sum(float(n) for n in numbers)
|
305 |
-
return str(result)
|
306 |
-
elif 'average' in question_lower:
|
307 |
-
result = sum(float(n) for n in numbers) / len(numbers)
|
308 |
-
return str(result)
|
309 |
-
except:
|
310 |
-
pass
|
311 |
-
|
312 |
-
# Search-based questions
|
313 |
-
if any(word in question_lower for word in ['what', 'who', 'when', 'where', 'how', 'why']):
|
314 |
-
try:
|
315 |
-
search_result = web_search_tool(question)
|
316 |
-
# Extract key information from search results
|
317 |
-
lines = search_result.split('\n')
|
318 |
-
relevant_lines = [line for line in lines if len(line.strip()) > 20]
|
319 |
-
return relevant_lines[0] if relevant_lines else "Unable to find specific information"
|
320 |
-
except:
|
321 |
-
pass
|
322 |
-
|
323 |
-
# Default response
|
324 |
-
return "I need more context or tools to answer this question accurately."
|
325 |
-
|
326 |
except Exception as e:
|
327 |
-
|
|
|
328 |
|
329 |
def run_and_submit_all(profile: gr.OAuthProfile | None):
|
330 |
"""
|
331 |
-
Fetches all questions, runs the
|
332 |
and displays the results.
|
333 |
"""
|
334 |
-
# --- Determine HF Space Runtime URL and Repo URL ---
|
335 |
space_id = os.getenv("SPACE_ID")
|
336 |
|
337 |
if profile:
|
338 |
-
username =
|
339 |
print(f"User logged in: {username}")
|
340 |
else:
|
341 |
print("User not logged in.")
|
342 |
-
return "Please
|
343 |
|
344 |
-
|
345 |
-
|
346 |
-
submit_url = f"{api_url}/submit"
|
347 |
|
348 |
-
# 1. Instantiate Agent
|
349 |
try:
|
350 |
-
agent =
|
351 |
except Exception as e:
|
352 |
print(f"Error instantiating agent: {e}")
|
353 |
return f"Error initializing agent: {e}", None
|
@@ -355,168 +61,68 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
|
|
355 |
agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
|
356 |
print(agent_code)
|
357 |
|
358 |
-
#
|
359 |
-
print(f"Fetching questions from: {questions_url}")
|
360 |
try:
|
361 |
-
|
362 |
-
|
363 |
-
questions_data =
|
364 |
if not questions_data:
|
365 |
-
|
366 |
-
return "Fetched questions list is empty or invalid format.", None
|
367 |
print(f"Fetched {len(questions_data)} questions.")
|
368 |
-
except
|
369 |
print(f"Error fetching questions: {e}")
|
370 |
return f"Error fetching questions: {e}", None
|
371 |
-
except requests.exceptions.JSONDecodeError as e:
|
372 |
-
print(f"Error decoding JSON response from questions endpoint: {e}")
|
373 |
-
print(f"Response text: {response.text[:500]}")
|
374 |
-
return f"Error decoding server response for questions: {e}", None
|
375 |
-
except Exception as e:
|
376 |
-
print(f"An unexpected error occurred fetching questions: {e}")
|
377 |
-
return f"An unexpected error occurred fetching questions: {e}", None
|
378 |
|
379 |
-
#
|
380 |
results_log = []
|
381 |
answers_payload = []
|
382 |
-
|
383 |
-
|
384 |
-
for i, item in enumerate(questions_data):
|
385 |
task_id = item.get("task_id")
|
386 |
question_text = item.get("question")
|
387 |
if not task_id or question_text is None:
|
388 |
-
print(f"Skipping item with missing task_id or question: {item}")
|
389 |
continue
|
390 |
-
|
391 |
-
print(f"Processing question {i+1}/{len(questions_data)}: {task_id}")
|
392 |
-
|
393 |
try:
|
394 |
-
|
395 |
-
answers_payload.append({"task_id": task_id, "submitted_answer":
|
396 |
-
results_log.append({
|
397 |
-
"Task ID": task_id,
|
398 |
-
"Question": question_text[:100] + "..." if len(question_text) > 100 else question_text,
|
399 |
-
"Submitted Answer": submitted_answer
|
400 |
-
})
|
401 |
-
print(f"Answer for {task_id}: {submitted_answer[:50]}...")
|
402 |
except Exception as e:
|
403 |
-
print(f"Error
|
404 |
-
|
405 |
-
answers_payload.append({"task_id": task_id, "submitted_answer": error_answer})
|
406 |
-
results_log.append({
|
407 |
-
"Task ID": task_id,
|
408 |
-
"Question": question_text[:100] + "..." if len(question_text) > 100 else question_text,
|
409 |
-
"Submitted Answer": error_answer
|
410 |
-
})
|
411 |
|
412 |
if not answers_payload:
|
413 |
-
|
414 |
-
return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
|
415 |
-
|
416 |
-
# 4. Prepare Submission
|
417 |
-
submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
|
418 |
-
status_update = f"GAIA Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
|
419 |
-
print(status_update)
|
420 |
|
421 |
-
#
|
422 |
-
|
423 |
try:
|
424 |
-
|
425 |
-
|
426 |
-
|
427 |
final_status = (
|
428 |
f"Submission Successful!\n"
|
429 |
-
f"User: {
|
430 |
-
f"
|
431 |
-
f"({
|
432 |
-
f"Message: {
|
433 |
)
|
434 |
-
|
435 |
-
results_df = pd.DataFrame(results_log)
|
436 |
-
return final_status, results_df
|
437 |
-
except requests.exceptions.HTTPError as e:
|
438 |
-
error_detail = f"Server responded with status {e.response.status_code}."
|
439 |
-
try:
|
440 |
-
error_json = e.response.json()
|
441 |
-
error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
|
442 |
-
except requests.exceptions.JSONDecodeError:
|
443 |
-
error_detail += f" Response: {e.response.text[:500]}"
|
444 |
-
status_message = f"Submission Failed: {error_detail}"
|
445 |
-
print(status_message)
|
446 |
-
results_df = pd.DataFrame(results_log)
|
447 |
-
return status_message, results_df
|
448 |
-
except requests.exceptions.Timeout:
|
449 |
-
status_message = "Submission Failed: The request timed out."
|
450 |
-
print(status_message)
|
451 |
-
results_df = pd.DataFrame(results_log)
|
452 |
-
return status_message, results_df
|
453 |
-
except requests.exceptions.RequestException as e:
|
454 |
-
status_message = f"Submission Failed: Network error - {e}"
|
455 |
-
print(status_message)
|
456 |
-
results_df = pd.DataFrame(results_log)
|
457 |
-
return status_message, results_df
|
458 |
except Exception as e:
|
459 |
-
|
460 |
-
print(status_message)
|
461 |
-
results_df = pd.DataFrame(results_log)
|
462 |
-
return status_message, results_df
|
463 |
-
|
464 |
|
465 |
-
# ---
|
466 |
with gr.Blocks() as demo:
|
467 |
-
gr.Markdown("#
|
468 |
-
gr.Markdown(
|
469 |
-
|
470 |
-
|
471 |
-
|
472 |
-
This agent is equipped with:
|
473 |
-
- 🔍 Web search capabilities (DuckDuckGo) - **FIXED**
|
474 |
-
- 🧮 Mathematical calculator
|
475 |
-
- 🖼️ Image analysis
|
476 |
-
- 📁 File processing (CSV, JSON, text files)
|
477 |
-
- 📊 Data processing and statistics
|
478 |
-
- 🐍 Python code execution
|
479 |
-
|
480 |
-
**Instructions:**
|
481 |
-
1. Log in to your Hugging Face account using the button below
|
482 |
-
2. Click 'Run GAIA Evaluation & Submit All Answers' to start the evaluation
|
483 |
-
3. The agent will process each question systematically using available tools
|
484 |
-
|
485 |
-
**Note:** Processing may take time as the agent analyzes each question thoroughly.
|
486 |
-
"""
|
487 |
-
)
|
488 |
-
|
489 |
gr.LoginButton()
|
|
|
|
|
|
|
490 |
|
491 |
-
|
492 |
-
|
493 |
-
status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
|
494 |
-
results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
|
495 |
-
|
496 |
-
run_button.click(
|
497 |
-
fn=run_and_submit_all,
|
498 |
-
outputs=[status_output, results_table]
|
499 |
-
)
|
500 |
|
501 |
if __name__ == "__main__":
|
502 |
-
print("
|
503 |
-
|
504 |
-
space_host_startup = os.getenv("SPACE_HOST")
|
505 |
-
space_id_startup = os.getenv("SPACE_ID")
|
506 |
-
|
507 |
-
if space_host_startup:
|
508 |
-
print(f"✅ SPACE_HOST found: {space_host_startup}")
|
509 |
-
print(f" Runtime URL should be: https://{space_host_startup}.hf.space")
|
510 |
-
else:
|
511 |
-
print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
|
512 |
-
|
513 |
-
if space_id_startup:
|
514 |
-
print(f"✅ SPACE_ID found: {space_id_startup}")
|
515 |
-
print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
|
516 |
-
print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
|
517 |
-
else:
|
518 |
-
print("ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
|
519 |
-
|
520 |
-
print("-"*(60 + len(" GAIA Agent Starting ")) + "\n")
|
521 |
-
print("Launching Gradio Interface for GAIA Agent Evaluation...")
|
522 |
-
demo.launch(debug=True, share=False)
|
|
|
1 |
+
# app.py
|
2 |
+
|
3 |
import os
|
4 |
import gradio as gr
|
5 |
import requests
|
6 |
import inspect
|
7 |
import pandas as pd
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
|
9 |
+
# SmolAgents imports
|
10 |
+
from smolagents import CodeAgent, DuckDuckGoSearchTool, InferenceClientModel
|
|
|
11 |
|
12 |
# --- Constants ---
|
13 |
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
14 |
|
15 |
+
# --- Enhanced Agent Definition ---
|
16 |
+
class BasicAgent:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
def __init__(self):
|
18 |
+
print("BasicAgent initialized with real agentic capabilities.")
|
19 |
+
|
20 |
+
# Initialize tools and model
|
21 |
+
self.search_tool = DuckDuckGoSearchTool()
|
22 |
+
self.model = InferenceClientModel()
|
23 |
+
self.agent = CodeAgent(
|
24 |
+
model=self.model,
|
25 |
+
tools=[self.search_tool]
|
26 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
27 |
|
28 |
+
def __call__(self, question: str) -> str:
|
29 |
+
print(f"Agent received question (first 50 chars): {question[:50]}...")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
30 |
try:
|
31 |
+
response = self.agent.run(question)
|
32 |
+
print(f"Agent response (first 50 chars): {response[:50]}...")
|
33 |
+
return response
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
34 |
except Exception as e:
|
35 |
+
print(f"Agent error during run: {e}")
|
36 |
+
return f"Error in agent: {e}"
|
37 |
|
38 |
def run_and_submit_all(profile: gr.OAuthProfile | None):
|
39 |
"""
|
40 |
+
Fetches all questions, runs the BasicAgent on them, submits all answers,
|
41 |
and displays the results.
|
42 |
"""
|
|
|
43 |
space_id = os.getenv("SPACE_ID")
|
44 |
|
45 |
if profile:
|
46 |
+
username = profile.username
|
47 |
print(f"User logged in: {username}")
|
48 |
else:
|
49 |
print("User not logged in.")
|
50 |
+
return "Please login to Hugging Face to submit answers.", None
|
51 |
|
52 |
+
questions_url = f"{DEFAULT_API_URL}/questions"
|
53 |
+
submit_url = f"{DEFAULT_API_URL}/submit"
|
|
|
54 |
|
|
|
55 |
try:
|
56 |
+
agent = BasicAgent()
|
57 |
except Exception as e:
|
58 |
print(f"Error instantiating agent: {e}")
|
59 |
return f"Error initializing agent: {e}", None
|
|
|
61 |
agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
|
62 |
print(agent_code)
|
63 |
|
64 |
+
# Fetch questions
|
|
|
65 |
try:
|
66 |
+
resp = requests.get(questions_url, timeout=15)
|
67 |
+
resp.raise_for_status()
|
68 |
+
questions_data = resp.json()
|
69 |
if not questions_data:
|
70 |
+
return "Empty or invalid question list.", None
|
|
|
71 |
print(f"Fetched {len(questions_data)} questions.")
|
72 |
+
except Exception as e:
|
73 |
print(f"Error fetching questions: {e}")
|
74 |
return f"Error fetching questions: {e}", None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
75 |
|
76 |
+
# Run agent on questions
|
77 |
results_log = []
|
78 |
answers_payload = []
|
79 |
+
for item in questions_data:
|
|
|
|
|
80 |
task_id = item.get("task_id")
|
81 |
question_text = item.get("question")
|
82 |
if not task_id or question_text is None:
|
|
|
83 |
continue
|
|
|
|
|
|
|
84 |
try:
|
85 |
+
submitted = agent(question_text)
|
86 |
+
answers_payload.append({"task_id": task_id, "submitted_answer": submitted})
|
87 |
+
results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted})
|
|
|
|
|
|
|
|
|
|
|
88 |
except Exception as e:
|
89 |
+
print(f"Error on task {task_id}: {e}")
|
90 |
+
results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"ERROR: {e}"})
|
|
|
|
|
|
|
|
|
|
|
|
|
91 |
|
92 |
if not answers_payload:
|
93 |
+
return "Agent did not produce any answers.", pd.DataFrame(results_log)
|
|
|
|
|
|
|
|
|
|
|
|
|
94 |
|
95 |
+
# Prepare & submit
|
96 |
+
payload = {"username": username, "agent_code": agent_code, "answers": answers_payload}
|
97 |
try:
|
98 |
+
submit_resp = requests.post(submit_url, json=payload, timeout=60)
|
99 |
+
submit_resp.raise_for_status()
|
100 |
+
result_json = submit_resp.json()
|
101 |
final_status = (
|
102 |
f"Submission Successful!\n"
|
103 |
+
f"User: {result_json.get('username')}\n"
|
104 |
+
f"Score: {result_json.get('score', 'N/A')}% "
|
105 |
+
f"({result_json.get('correct_count', '?')}/{result_json.get('total_attempted', '?')} correct)\n"
|
106 |
+
f"Message: {result_json.get('message', '')}"
|
107 |
)
|
108 |
+
return final_status, pd.DataFrame(results_log)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
109 |
except Exception as e:
|
110 |
+
return f"Submission failed: {e}", pd.DataFrame(results_log)
|
|
|
|
|
|
|
|
|
111 |
|
112 |
+
# --- Gradio UI ---
|
113 |
with gr.Blocks() as demo:
|
114 |
+
gr.Markdown("# Basic Agent Evaluation Runner")
|
115 |
+
gr.Markdown("""
|
116 |
+
Modify `BasicAgent` to add more tools or logic.
|
117 |
+
Log in, click **Run Evaluation & Submit All Answers**, and watch it process automatically.
|
118 |
+
""")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
119 |
gr.LoginButton()
|
120 |
+
run_btn = gr.Button("Run Evaluation & Submit All Answers")
|
121 |
+
status = gr.Textbox(label="Status / Submission Result", lines=5, interactive=False)
|
122 |
+
results = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
|
123 |
|
124 |
+
run_btn.click(fn=run_and_submit_all, outputs=[status, results])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
125 |
|
126 |
if __name__ == "__main__":
|
127 |
+
print("Launching app...")
|
128 |
+
demo.launch(debug=True, share=False)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|