LamiaYT commited on
Commit
a39e119
·
1 Parent(s): 7dacad3

Deploy GAIA agent

Browse files
Files changed (1) hide show
  1. app.py +65 -459
app.py CHANGED
@@ -1,353 +1,59 @@
 
 
1
  import os
2
  import gradio as gr
3
  import requests
4
  import inspect
5
  import pandas as pd
6
- import json
7
- import re
8
- import io
9
- import base64
10
- from PIL import Image
11
- import matplotlib.pyplot as plt
12
- import numpy as np
13
- from pathlib import Path
14
- from duckduckgo_search import DDGS
15
 
16
- # SmolaAgent imports
17
- from smolagents import CodeAgent, tool, PythonInterpreterTool
18
- from smolagents.models import LiteLLMModel
19
 
20
  # --- Constants ---
21
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
22
 
23
- # --- Enhanced Tools for GAIA ---
24
-
25
- from smolagents import tool
26
- from duckduckgo_search import DDGS
27
-
28
- @tool
29
- def web_search_tool(query: str) -> str:
30
- """
31
- Perform a web search using DuckDuckGo and return top results.
32
-
33
- Args:
34
- query (str): Search query.
35
-
36
- Returns:
37
- str: Formatted search result string.
38
- """
39
- try:
40
- with DDGS() as ddgs:
41
- results = ddgs.text(query, max_results=3)
42
- output = []
43
- for r in results:
44
- output.append(f"Title: {r['title']}\nURL: {r['href']}\nSnippet: {r['body']}")
45
- return "\n\n".join(output)
46
- except Exception as e:
47
- return f"Web search failed: {str(e)}"
48
-
49
-
50
-
51
- @tool
52
- def calculator_tool(expression: str) -> str:
53
- """
54
- Evaluate mathematical expressions safely.
55
- Args:
56
- expression: Mathematical expression as string
57
- Returns:
58
- Result of the calculation
59
- """
60
- try:
61
- # Safe evaluation - only allow basic math operations
62
- allowed_chars = set('0123456789+-*/.() ')
63
- if not all(c in allowed_chars for c in expression.replace(' ', '')):
64
- return "Error: Expression contains invalid characters"
65
-
66
- result = eval(expression)
67
- return str(result)
68
- except Exception as e:
69
- return f"Calculation error: {str(e)}"
70
-
71
- @tool
72
- def image_analyzer_tool(image_path: str) -> str:
73
- """
74
- Analyze images and extract information.
75
- Args:
76
- image_path: Path to the image file
77
- Returns:
78
- Description of image content
79
- """
80
- try:
81
- if not os.path.exists(image_path):
82
- return "Error: Image file not found"
83
-
84
- img = Image.open(image_path)
85
-
86
- # Basic image analysis
87
- width, height = img.size
88
- mode = img.mode
89
- format_info = img.format if img.format else "Unknown"
90
-
91
- # Simple color analysis
92
- if mode == 'RGB':
93
- colors = img.getcolors(maxcolors=256*256*256)
94
- if colors:
95
- dominant_color = max(colors, key=lambda x: x[0])[1]
96
- color_info = f"Dominant color: RGB{dominant_color}"
97
- else:
98
- color_info = "Complex color palette"
99
- else:
100
- color_info = f"Color mode: {mode}"
101
-
102
- analysis = f"""Image Analysis:
103
- - Dimensions: {width}x{height} pixels
104
- - Format: {format_info}
105
- - {color_info}
106
- - File size: {os.path.getsize(image_path)} bytes
107
- """
108
- return analysis
109
-
110
- except Exception as e:
111
- return f"Image analysis error: {str(e)}"
112
-
113
- @tool
114
- def file_reader_tool(file_path: str) -> str:
115
- """
116
- Read and analyze various file types (text, CSV, JSON, etc.).
117
- Args:
118
- file_path: Path to the file
119
- Returns:
120
- File content or analysis
121
- """
122
- try:
123
- if not os.path.exists(file_path):
124
- return "Error: File not found"
125
-
126
- file_ext = Path(file_path).suffix.lower()
127
-
128
- if file_ext == '.csv':
129
- df = pd.read_csv(file_path)
130
- return f"CSV file with {len(df)} rows and {len(df.columns)} columns.\nColumns: {list(df.columns)}\nFirst 5 rows:\n{df.head().to_string()}"
131
-
132
- elif file_ext == '.json':
133
- with open(file_path, 'r', encoding='utf-8') as f:
134
- data = json.load(f)
135
- return f"JSON file content:\n{json.dumps(data, indent=2)[:1000]}..."
136
-
137
- elif file_ext in ['.txt', '.md', '.py', '.js', '.html', '.css']:
138
- with open(file_path, 'r', encoding='utf-8') as f:
139
- content = f.read()
140
- return f"Text file content ({len(content)} characters):\n{content[:1000]}..."
141
-
142
- else:
143
- return f"Binary file: {file_ext}, size: {os.path.getsize(file_path)} bytes"
144
-
145
- except Exception as e:
146
- return f"File reading error: {str(e)}"
147
-
148
- @tool
149
- def data_processor_tool(data: str, operation: str) -> str:
150
- """
151
- Process data with various operations (sort, filter, calculate statistics).
152
- Args:
153
- data: Data as string (JSON, CSV format, or numbers)
154
- operation: Operation to perform (sort, sum, average, count, etc.)
155
- Returns:
156
- Processed data result
157
- """
158
- try:
159
- # Try to parse as JSON first
160
- try:
161
- parsed_data = json.loads(data)
162
- except:
163
- # Try to parse as numbers
164
- try:
165
- parsed_data = [float(x.strip()) for x in data.replace(',', ' ').split() if x.strip()]
166
- except:
167
- return "Error: Could not parse data"
168
-
169
- if operation.lower() == 'sum' and isinstance(parsed_data, list):
170
- return str(sum([x for x in parsed_data if isinstance(x, (int, float))]))
171
-
172
- elif operation.lower() == 'average' and isinstance(parsed_data, list):
173
- nums = [x for x in parsed_data if isinstance(x, (int, float))]
174
- return str(sum(nums) / len(nums) if nums else 0)
175
-
176
- elif operation.lower() == 'count':
177
- return str(len(parsed_data))
178
-
179
- elif operation.lower() == 'sort' and isinstance(parsed_data, list):
180
- return str(sorted(parsed_data))
181
-
182
- elif operation.lower() == 'max' and isinstance(parsed_data, list):
183
- nums = [x for x in parsed_data if isinstance(x, (int, float))]
184
- return str(max(nums) if nums else "No numbers found")
185
-
186
- elif operation.lower() == 'min' and isinstance(parsed_data, list):
187
- nums = [x for x in parsed_data if isinstance(x, (int, float))]
188
- return str(min(nums) if nums else "No numbers found")
189
-
190
- else:
191
- return f"Unsupported operation: {operation}"
192
-
193
- except Exception as e:
194
- return f"Data processing error: {str(e)}"
195
-
196
- # --- Enhanced GAIA Agent ---
197
- class GAIAAgent:
198
  def __init__(self):
199
- print("GAIAAgent initialized with SmolaAgent framework.")
200
-
201
- # Initialize model - using a lightweight model for resource efficiency
202
- try:
203
- # Use HuggingFace's free inference API or local model
204
- self.model = LiteLLMModel(
205
- model_id="microsoft/DialoGPT-medium", # Lightweight model
206
- max_tokens=512,
207
- temperature=0.1
208
- )
209
- except:
210
- # Fallback to a basic model
211
- print("Warning: Using fallback model configuration")
212
- self.model = None
213
-
214
- # Initialize tools
215
- self.tools = [
216
- web_search_tool,
217
- calculator_tool,
218
- image_analyzer_tool,
219
- file_reader_tool,
220
- data_processor_tool,
221
- PythonInterpreterTool()
222
- ]
223
-
224
- # Initialize the agent
225
- try:
226
- self.agent = CodeAgent(
227
- tools=self.tools,
228
- model=self.model,
229
- verbosity_level=1
230
- )
231
- except Exception as e:
232
- print(f"Agent initialization error: {e}")
233
- self.agent = None
234
-
235
- def __call__(self, question: str) -> str:
236
- print(f"GAIAAgent processing question: {question[:100]}...")
237
-
238
- if not self.agent:
239
- # Fallback logic if agent failed to initialize
240
- return self._fallback_processing(question)
241
-
242
- try:
243
- # Enhanced prompt for GAIA tasks
244
- enhanced_prompt = f"""
245
- You are a helpful AI assistant designed to solve complex real-world problems that may require:
246
- - Web searching for current information
247
- - Mathematical calculations
248
- - Image analysis
249
- - File processing
250
- - Multi-step reasoning
251
-
252
- Question: {question}
253
-
254
- Please approach this systematically:
255
- 1. Analyze what type of problem this is
256
- 2. Determine what tools/information you need
257
- 3. Use available tools to gather information
258
- 4. Reason through the problem step by step
259
- 5. Provide a clear, concise final answer
260
 
261
- Remember to be precise and factual in your response.
262
- """
263
-
264
- response = self.agent.run(enhanced_prompt)
265
-
266
- # Extract the final answer if it's in the response
267
- if isinstance(response, str):
268
- # Look for common answer patterns
269
- answer_patterns = [
270
- r"Final answer:?\s*(.+)",
271
- r"Answer:?\s*(.+)",
272
- r"The answer is:?\s*(.+)",
273
- r"Result:?\s*(.+)"
274
- ]
275
-
276
- for pattern in answer_patterns:
277
- match = re.search(pattern, response, re.IGNORECASE)
278
- if match:
279
- return match.group(1).strip()
280
-
281
- # If no pattern found, return the last sentence or the whole response
282
- sentences = response.split('.')
283
- return sentences[-1].strip() if sentences else response
284
-
285
- return str(response)
286
-
287
- except Exception as e:
288
- print(f"Error in agent processing: {e}")
289
- return self._fallback_processing(question)
290
-
291
- def _fallback_processing(self, question: str) -> str:
292
- """Fallback processing when main agent fails"""
293
  try:
294
- # Simple heuristic-based processing
295
- question_lower = question.lower()
296
-
297
- # Math questions
298
- if any(op in question for op in ['+', '-', '*', '/', 'calculate', 'sum', 'average']):
299
- # Extract numbers and try basic calculation
300
- numbers = re.findall(r'-?\d+\.?\d*', question)
301
- if len(numbers) >= 2:
302
- try:
303
- if 'sum' in question_lower or '+' in question:
304
- result = sum(float(n) for n in numbers)
305
- return str(result)
306
- elif 'average' in question_lower:
307
- result = sum(float(n) for n in numbers) / len(numbers)
308
- return str(result)
309
- except:
310
- pass
311
-
312
- # Search-based questions
313
- if any(word in question_lower for word in ['what', 'who', 'when', 'where', 'how', 'why']):
314
- try:
315
- search_result = web_search_tool(question)
316
- # Extract key information from search results
317
- lines = search_result.split('\n')
318
- relevant_lines = [line for line in lines if len(line.strip()) > 20]
319
- return relevant_lines[0] if relevant_lines else "Unable to find specific information"
320
- except:
321
- pass
322
-
323
- # Default response
324
- return "I need more context or tools to answer this question accurately."
325
-
326
  except Exception as e:
327
- return f"Processing error: {str(e)}"
 
328
 
329
  def run_and_submit_all(profile: gr.OAuthProfile | None):
330
  """
331
- Fetches all questions, runs the GAIAAgent on them, submits all answers,
332
  and displays the results.
333
  """
334
- # --- Determine HF Space Runtime URL and Repo URL ---
335
  space_id = os.getenv("SPACE_ID")
336
 
337
  if profile:
338
- username = f"{profile.username}"
339
  print(f"User logged in: {username}")
340
  else:
341
  print("User not logged in.")
342
- return "Please Login to Hugging Face with the button.", None
343
 
344
- api_url = DEFAULT_API_URL
345
- questions_url = f"{api_url}/questions"
346
- submit_url = f"{api_url}/submit"
347
 
348
- # 1. Instantiate Agent
349
  try:
350
- agent = GAIAAgent()
351
  except Exception as e:
352
  print(f"Error instantiating agent: {e}")
353
  return f"Error initializing agent: {e}", None
@@ -355,168 +61,68 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
355
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
356
  print(agent_code)
357
 
358
- # 2. Fetch Questions
359
- print(f"Fetching questions from: {questions_url}")
360
  try:
361
- response = requests.get(questions_url, timeout=15)
362
- response.raise_for_status()
363
- questions_data = response.json()
364
  if not questions_data:
365
- print("Fetched questions list is empty.")
366
- return "Fetched questions list is empty or invalid format.", None
367
  print(f"Fetched {len(questions_data)} questions.")
368
- except requests.exceptions.RequestException as e:
369
  print(f"Error fetching questions: {e}")
370
  return f"Error fetching questions: {e}", None
371
- except requests.exceptions.JSONDecodeError as e:
372
- print(f"Error decoding JSON response from questions endpoint: {e}")
373
- print(f"Response text: {response.text[:500]}")
374
- return f"Error decoding server response for questions: {e}", None
375
- except Exception as e:
376
- print(f"An unexpected error occurred fetching questions: {e}")
377
- return f"An unexpected error occurred fetching questions: {e}", None
378
 
379
- # 3. Run GAIA Agent
380
  results_log = []
381
  answers_payload = []
382
- print(f"Running GAIA agent on {len(questions_data)} questions...")
383
-
384
- for i, item in enumerate(questions_data):
385
  task_id = item.get("task_id")
386
  question_text = item.get("question")
387
  if not task_id or question_text is None:
388
- print(f"Skipping item with missing task_id or question: {item}")
389
  continue
390
-
391
- print(f"Processing question {i+1}/{len(questions_data)}: {task_id}")
392
-
393
  try:
394
- submitted_answer = agent(question_text)
395
- answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
396
- results_log.append({
397
- "Task ID": task_id,
398
- "Question": question_text[:100] + "..." if len(question_text) > 100 else question_text,
399
- "Submitted Answer": submitted_answer
400
- })
401
- print(f"Answer for {task_id}: {submitted_answer[:50]}...")
402
  except Exception as e:
403
- print(f"Error running agent on task {task_id}: {e}")
404
- error_answer = f"AGENT ERROR: {e}"
405
- answers_payload.append({"task_id": task_id, "submitted_answer": error_answer})
406
- results_log.append({
407
- "Task ID": task_id,
408
- "Question": question_text[:100] + "..." if len(question_text) > 100 else question_text,
409
- "Submitted Answer": error_answer
410
- })
411
 
412
  if not answers_payload:
413
- print("Agent did not produce any answers to submit.")
414
- return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
415
-
416
- # 4. Prepare Submission
417
- submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
418
- status_update = f"GAIA Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
419
- print(status_update)
420
 
421
- # 5. Submit
422
- print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
423
  try:
424
- response = requests.post(submit_url, json=submission_data, timeout=60)
425
- response.raise_for_status()
426
- result_data = response.json()
427
  final_status = (
428
  f"Submission Successful!\n"
429
- f"User: {result_data.get('username')}\n"
430
- f"Overall Score: {result_data.get('score', 'N/A')}% "
431
- f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
432
- f"Message: {result_data.get('message', 'No message received.')}"
433
  )
434
- print("Submission successful.")
435
- results_df = pd.DataFrame(results_log)
436
- return final_status, results_df
437
- except requests.exceptions.HTTPError as e:
438
- error_detail = f"Server responded with status {e.response.status_code}."
439
- try:
440
- error_json = e.response.json()
441
- error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
442
- except requests.exceptions.JSONDecodeError:
443
- error_detail += f" Response: {e.response.text[:500]}"
444
- status_message = f"Submission Failed: {error_detail}"
445
- print(status_message)
446
- results_df = pd.DataFrame(results_log)
447
- return status_message, results_df
448
- except requests.exceptions.Timeout:
449
- status_message = "Submission Failed: The request timed out."
450
- print(status_message)
451
- results_df = pd.DataFrame(results_log)
452
- return status_message, results_df
453
- except requests.exceptions.RequestException as e:
454
- status_message = f"Submission Failed: Network error - {e}"
455
- print(status_message)
456
- results_df = pd.DataFrame(results_log)
457
- return status_message, results_df
458
  except Exception as e:
459
- status_message = f"An unexpected error occurred during submission: {e}"
460
- print(status_message)
461
- results_df = pd.DataFrame(results_log)
462
- return status_message, results_df
463
-
464
 
465
- # --- Build Gradio Interface using Blocks ---
466
  with gr.Blocks() as demo:
467
- gr.Markdown("# GAIA Agent Evaluation Runner")
468
- gr.Markdown(
469
- """
470
- **Enhanced GAIA Agent with SmolaAgent Framework**
471
-
472
- This agent is equipped with:
473
- - 🔍 Web search capabilities (DuckDuckGo) - **FIXED**
474
- - 🧮 Mathematical calculator
475
- - 🖼️ Image analysis
476
- - 📁 File processing (CSV, JSON, text files)
477
- - 📊 Data processing and statistics
478
- - 🐍 Python code execution
479
-
480
- **Instructions:**
481
- 1. Log in to your Hugging Face account using the button below
482
- 2. Click 'Run GAIA Evaluation & Submit All Answers' to start the evaluation
483
- 3. The agent will process each question systematically using available tools
484
-
485
- **Note:** Processing may take time as the agent analyzes each question thoroughly.
486
- """
487
- )
488
-
489
  gr.LoginButton()
 
 
 
490
 
491
- run_button = gr.Button("Run GAIA Evaluation & Submit All Answers", variant="primary")
492
-
493
- status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
494
- results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
495
-
496
- run_button.click(
497
- fn=run_and_submit_all,
498
- outputs=[status_output, results_table]
499
- )
500
 
501
  if __name__ == "__main__":
502
- print("\n" + "-"*30 + " GAIA Agent Starting " + "-"*30)
503
-
504
- space_host_startup = os.getenv("SPACE_HOST")
505
- space_id_startup = os.getenv("SPACE_ID")
506
-
507
- if space_host_startup:
508
- print(f"✅ SPACE_HOST found: {space_host_startup}")
509
- print(f" Runtime URL should be: https://{space_host_startup}.hf.space")
510
- else:
511
- print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
512
-
513
- if space_id_startup:
514
- print(f"✅ SPACE_ID found: {space_id_startup}")
515
- print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
516
- print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
517
- else:
518
- print("ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
519
-
520
- print("-"*(60 + len(" GAIA Agent Starting ")) + "\n")
521
- print("Launching Gradio Interface for GAIA Agent Evaluation...")
522
- demo.launch(debug=True, share=False)
 
1
+ # app.py
2
+
3
  import os
4
  import gradio as gr
5
  import requests
6
  import inspect
7
  import pandas as pd
 
 
 
 
 
 
 
 
 
8
 
9
+ # SmolAgents imports
10
+ from smolagents import CodeAgent, DuckDuckGoSearchTool, InferenceClientModel
 
11
 
12
  # --- Constants ---
13
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
14
 
15
+ # --- Enhanced Agent Definition ---
16
+ class BasicAgent:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  def __init__(self):
18
+ print("BasicAgent initialized with real agentic capabilities.")
19
+
20
+ # Initialize tools and model
21
+ self.search_tool = DuckDuckGoSearchTool()
22
+ self.model = InferenceClientModel()
23
+ self.agent = CodeAgent(
24
+ model=self.model,
25
+ tools=[self.search_tool]
26
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
 
28
+ def __call__(self, question: str) -> str:
29
+ print(f"Agent received question (first 50 chars): {question[:50]}...")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
  try:
31
+ response = self.agent.run(question)
32
+ print(f"Agent response (first 50 chars): {response[:50]}...")
33
+ return response
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
  except Exception as e:
35
+ print(f"Agent error during run: {e}")
36
+ return f"Error in agent: {e}"
37
 
38
  def run_and_submit_all(profile: gr.OAuthProfile | None):
39
  """
40
+ Fetches all questions, runs the BasicAgent on them, submits all answers,
41
  and displays the results.
42
  """
 
43
  space_id = os.getenv("SPACE_ID")
44
 
45
  if profile:
46
+ username = profile.username
47
  print(f"User logged in: {username}")
48
  else:
49
  print("User not logged in.")
50
+ return "Please login to Hugging Face to submit answers.", None
51
 
52
+ questions_url = f"{DEFAULT_API_URL}/questions"
53
+ submit_url = f"{DEFAULT_API_URL}/submit"
 
54
 
 
55
  try:
56
+ agent = BasicAgent()
57
  except Exception as e:
58
  print(f"Error instantiating agent: {e}")
59
  return f"Error initializing agent: {e}", None
 
61
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
62
  print(agent_code)
63
 
64
+ # Fetch questions
 
65
  try:
66
+ resp = requests.get(questions_url, timeout=15)
67
+ resp.raise_for_status()
68
+ questions_data = resp.json()
69
  if not questions_data:
70
+ return "Empty or invalid question list.", None
 
71
  print(f"Fetched {len(questions_data)} questions.")
72
+ except Exception as e:
73
  print(f"Error fetching questions: {e}")
74
  return f"Error fetching questions: {e}", None
 
 
 
 
 
 
 
75
 
76
+ # Run agent on questions
77
  results_log = []
78
  answers_payload = []
79
+ for item in questions_data:
 
 
80
  task_id = item.get("task_id")
81
  question_text = item.get("question")
82
  if not task_id or question_text is None:
 
83
  continue
 
 
 
84
  try:
85
+ submitted = agent(question_text)
86
+ answers_payload.append({"task_id": task_id, "submitted_answer": submitted})
87
+ results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted})
 
 
 
 
 
88
  except Exception as e:
89
+ print(f"Error on task {task_id}: {e}")
90
+ results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"ERROR: {e}"})
 
 
 
 
 
 
91
 
92
  if not answers_payload:
93
+ return "Agent did not produce any answers.", pd.DataFrame(results_log)
 
 
 
 
 
 
94
 
95
+ # Prepare & submit
96
+ payload = {"username": username, "agent_code": agent_code, "answers": answers_payload}
97
  try:
98
+ submit_resp = requests.post(submit_url, json=payload, timeout=60)
99
+ submit_resp.raise_for_status()
100
+ result_json = submit_resp.json()
101
  final_status = (
102
  f"Submission Successful!\n"
103
+ f"User: {result_json.get('username')}\n"
104
+ f"Score: {result_json.get('score', 'N/A')}% "
105
+ f"({result_json.get('correct_count', '?')}/{result_json.get('total_attempted', '?')} correct)\n"
106
+ f"Message: {result_json.get('message', '')}"
107
  )
108
+ return final_status, pd.DataFrame(results_log)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
109
  except Exception as e:
110
+ return f"Submission failed: {e}", pd.DataFrame(results_log)
 
 
 
 
111
 
112
+ # --- Gradio UI ---
113
  with gr.Blocks() as demo:
114
+ gr.Markdown("# Basic Agent Evaluation Runner")
115
+ gr.Markdown("""
116
+ Modify `BasicAgent` to add more tools or logic.
117
+ Log in, click **Run Evaluation & Submit All Answers**, and watch it process automatically.
118
+ """)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
119
  gr.LoginButton()
120
+ run_btn = gr.Button("Run Evaluation & Submit All Answers")
121
+ status = gr.Textbox(label="Status / Submission Result", lines=5, interactive=False)
122
+ results = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
123
 
124
+ run_btn.click(fn=run_and_submit_all, outputs=[status, results])
 
 
 
 
 
 
 
 
125
 
126
  if __name__ == "__main__":
127
+ print("Launching app...")
128
+ demo.launch(debug=True, share=False)