LamiaYT commited on
Commit
cd4ed8b
·
1 Parent(s): 556a849
Files changed (2) hide show
  1. app.py +115 -401
  2. txt.txt +1 -0
app.py CHANGED
@@ -1,348 +1,43 @@
1
  import os
2
  import gradio as gr
3
  import requests
 
4
  import pandas as pd
5
- import json
6
- import re
7
- import time
8
- from smolagents import CodeAgent, DuckDuckGoSearchTool, InferenceClientModel, tool
9
- from typing import Dict, Any, List
10
- import base64
11
- from io import BytesIO
12
- from PIL import Image
13
- import numpy as np
14
 
15
  # --- Constants ---
16
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
17
 
18
- # --- Enhanced Knowledge Base ---
19
- KNOWLEDGE_BASE = {
20
- "mercedes_sosa": {
21
- "birthplace": "Tucumán",
22
- "province": "Tucumán",
23
- "country": "Argentina",
24
- "nickname": "La Negra",
25
- "birth_year": 1935,
26
- "death_year": 2009,
27
- "genre": "Nueva Canción folk music"
28
- },
29
- "geography": {
30
- "tucuman": "Tucumán is a province in northwestern Argentina, capital San Miguel de Tucumán",
31
- "argentina_provinces": ["Buenos Aires", "Catamarca", "Chaco", "Chubut", "Córdoba", "Corrientes", "Entre Ríos", "Formosa", "Jujuy", "La Pampa", "La Rioja", "Mendoza", "Misiones", "Neuquén", "Río Negro", "Salta", "San Juan", "San Luis", "Santa Cruz", "Santa Fe", "Santiago del Estero", "Tierra del Fuego", "Tucumán"]
32
- },
33
- "botanical": {
34
- "true_vegetables": ["artichoke", "asparagus", "beet", "broccoli", "brussels sprouts", "cabbage", "carrot", "cauliflower", "celery", "chard", "collard", "kale", "lettuce", "onion", "parsnip", "potato", "radish", "spinach", "sweet potato", "turnip"],
35
- "fruits_used_as_vegetables": ["tomato", "pepper", "eggplant", "cucumber", "zucchini", "squash", "pumpkin", "okra", "avocado"]
36
- },
37
- "mathematics": {
38
- "non_commutative_examples": ["matrix multiplication", "subtraction", "division", "function composition", "cross product"],
39
- "commutative_examples": ["addition", "multiplication", "union", "intersection"]
40
- }
41
- }
42
-
43
- # System prompt for better reasoning
44
- SYSTEM_PROMPT = """You are an expert AI agent solving GAIA benchmark questions.
45
-
46
- CRITICAL RULES:
47
- 1. For reversed text questions, ALWAYS reverse the text first to understand it
48
- 2. For botanical questions, distinguish true vegetables from fruits used as vegetables
49
- 3. For factual questions, use your knowledge base first, then search if needed
50
- 4. For mathematical problems, provide concrete examples
51
- 5. Give direct, precise answers - no unnecessary explanation
52
-
53
- KNOWLEDGE:
54
- - Mercedes Sosa was born in Tucumán province, Argentina
55
- - True vegetables: broccoli, celery, lettuce, carrot, onion, potato, etc.
56
- - Fruits used as vegetables: tomato, pepper, eggplant, cucumber
57
- - Non-commutative operations: subtraction, division, matrix multiplication
58
- """
59
-
60
- # --- Enhanced Custom Tools ---
61
-
62
- @tool
63
- def enhanced_web_search(query: str) -> str:
64
- """Advanced web search using Serper API with intelligent result processing
65
-
66
- Args:
67
- query: The search query string
68
-
69
- Returns:
70
- Processed search results with key information extracted
71
- """
72
- try:
73
- api_key = os.getenv("SERPER_API_KEY")
74
- if not api_key:
75
- return "SERPER_API_KEY not found - using fallback search"
76
-
77
- url = "https://google.serper.dev/search"
78
- payload = json.dumps({"q": query, "num": 8})
79
- headers = {
80
- 'X-API-KEY': api_key,
81
- 'Content-Type': 'application/json'
82
- }
83
- response = requests.post(url, headers=headers, data=payload, timeout=30)
84
- response.raise_for_status()
85
-
86
- data = response.json()
87
- results = []
88
-
89
- # Process knowledge graph first
90
- if 'knowledgeGraph' in data:
91
- kg = data['knowledgeGraph']
92
- results.append(f"FACT: {kg.get('title', '')} - {kg.get('description', '')}")
93
-
94
- # Process organic results
95
- if 'organic' in data:
96
- for item in data['organic'][:4]:
97
- title = item.get('title', '')
98
- snippet = item.get('snippet', '')
99
- results.append(f"{title}: {snippet}")
100
-
101
- return "\n".join(results) if results else "No search results found"
102
-
103
- except Exception as e:
104
- return f"Search failed: {str(e)}"
105
-
106
- @tool
107
- def knowledge_lookup(topic: str) -> str:
108
- """Look up information from curated knowledge base
109
-
110
- Args:
111
- topic: Topic to search for in knowledge base
112
-
113
- Returns:
114
- Relevant information from knowledge base
115
- """
116
- topic_lower = topic.lower()
117
-
118
- # Mercedes Sosa queries
119
- if "mercedes sosa" in topic_lower:
120
- if "born" in topic_lower or "birthplace" in topic_lower or "province" in topic_lower:
121
- return f"Mercedes Sosa was born in {KNOWLEDGE_BASE['mercedes_sosa']['province']} province, Argentina in {KNOWLEDGE_BASE['mercedes_sosa']['birth_year']}"
122
- return f"Mercedes Sosa (1935-2009) was an Argentine folk singer known as 'La Negra', born in Tucumán province"
123
-
124
- # Botanical classification
125
- if "botanical" in topic_lower and "vegetable" in topic_lower:
126
- true_vegs = KNOWLEDGE_BASE['botanical']['true_vegetables']
127
- fruits_as_vegs = KNOWLEDGE_BASE['botanical']['fruits_used_as_vegetables']
128
- return f"True vegetables: {', '.join(true_vegs[:10])}. Fruits used as vegetables: {', '.join(fruits_as_vegs[:5])}"
129
-
130
- # Mathematical operations
131
- if "commutative" in topic_lower:
132
- non_comm = KNOWLEDGE_BASE['mathematics']['non_commutative_examples']
133
- return f"Non-commutative operations: {', '.join(non_comm)}. Example: 5-3=2 but 3-5=-2"
134
-
135
- return f"No specific knowledge found for: {topic}"
136
-
137
- @tool
138
- def text_reverser(text: str) -> str:
139
- """Reverse text to decode reversed questions
140
-
141
- Args:
142
- text: Text to reverse
143
-
144
- Returns:
145
- Reversed text
146
- """
147
- return text[::-1]
148
 
149
- @tool
150
- def botanical_classifier(food_list: str) -> str:
151
- """Classify foods into botanical categories
152
-
153
- Args:
154
- food_list: Comma-separated list of foods
155
-
156
- Returns:
157
- Botanically correct classification
158
- """
159
- items = [item.strip().lower() for item in food_list.split(',')]
160
- true_vegetables = []
161
-
162
- for item in items:
163
- # Check against true vegetables
164
- if any(veg in item for veg in KNOWLEDGE_BASE['botanical']['true_vegetables']):
165
- true_vegetables.append(item)
166
-
167
- true_vegetables.sort()
168
- return ', '.join(true_vegetables)
169
 
170
- @tool
171
- def math_analyzer(problem: str) -> str:
172
- """Analyze mathematical problems and provide solutions
173
-
174
- Args:
175
- problem: Mathematical problem description
176
-
177
- Returns:
178
- Mathematical analysis and solution
179
- """
180
- problem_lower = problem.lower()
181
-
182
- if "commutative" in problem_lower:
183
- return "Matrix multiplication is not commutative. Example: If A=[[1,2],[3,4]] and B=[[5,6],[7,8]], then AB ≠ BA. Generally: AB ≠ BA for matrices."
184
-
185
- if "chess" in problem_lower:
186
- return "In chess analysis: 1) Check for immediate threats 2) Look for tactical motifs (pins, forks, skewers) 3) Evaluate material and position 4) Calculate forcing moves"
187
-
188
- return f"Mathematical analysis needed for: {problem[:100]}"
189
 
190
- @tool
191
- def youtube_content_analyzer(url: str) -> str:
192
- """Analyze YouTube video content and metadata
193
-
194
- Args:
195
- url: YouTube video URL
196
-
197
- Returns:
198
- Video analysis results
199
- """
200
- try:
201
- # Extract video ID
202
- video_id_match = re.search(r'(?:v=|\/)([0-9A-Za-z_-]{11})', url)
203
- if not video_id_match:
204
- return "Invalid YouTube URL format"
205
-
206
- video_id = video_id_match.group(1)
207
-
208
- # Use oEmbed API
209
- oembed_url = f"https://www.youtube.com/oembed?url=https://www.youtube.com/watch?v={video_id}&format=json"
210
- response = requests.get(oembed_url, timeout=15)
211
-
212
- if response.status_code == 200:
213
- data = response.json()
214
- return f"Video: {data.get('title', 'Unknown')} by {data.get('author_name', 'Unknown')}"
215
- else:
216
- return f"Could not analyze video {video_id}"
217
-
218
- except Exception as e:
219
- return f"YouTube analysis error: {str(e)}"
220
-
221
- # --- Enhanced GAIA Agent ---
222
- class EnhancedGAIAAgent:
223
- def __init__(self):
224
- print("Initializing Enhanced GAIA Agent...")
225
-
226
- # Use a more reliable model
227
- try:
228
- self.model = InferenceClientModel(
229
- model_id="HuggingFaceH4/zephyr-7b-beta",
230
- token=os.getenv("HUGGINGFACE_INFERENCE_TOKEN")
231
- )
232
- except Exception as e:
233
- print(f"Model initialization warning: {e}")
234
- # Fallback model
235
- self.model = InferenceClientModel(model_id="microsoft/DialoGPT-medium")
236
-
237
- # Define tools
238
- self.tools = [
239
- enhanced_web_search,
240
- knowledge_lookup,
241
- text_reverser,
242
- botanical_classifier,
243
- math_analyzer,
244
- youtube_content_analyzer,
245
- DuckDuckGoSearchTool()
246
- ]
247
-
248
- # Create agent
249
  self.agent = CodeAgent(
250
- tools=self.tools,
251
- model=self.model
252
  )
253
-
254
- print("Enhanced GAIA Agent initialized.")
255
 
256
  def __call__(self, question: str) -> str:
257
- print(f"Processing: {question[:80]}...")
258
-
259
- try:
260
- # Pre-process question
261
- question_lower = question.lower()
262
-
263
- # Handle reversed text immediately
264
- if self._is_reversed_text(question):
265
- return self._handle_reversed_text(question)
266
-
267
- # Handle specific question types
268
- if "mercedes sosa" in question_lower and ("born" in question_lower or "province" in question_lower):
269
- return knowledge_lookup("mercedes sosa birthplace")
270
-
271
- if "botanical" in question_lower and "vegetable" in question_lower:
272
- return self._handle_botanical_question(question)
273
-
274
- if "commutative" in question_lower:
275
- return math_analyzer("commutative operation example")
276
-
277
- if "youtube.com" in question:
278
- return self._handle_youtube_question(question)
279
-
280
- # Default: use agent with search
281
- try:
282
- result = self.agent.run(question)
283
- return str(result)
284
- except Exception as e:
285
- # Fallback to direct search
286
- return enhanced_web_search(question)
287
-
288
- except Exception as e:
289
- print(f"Agent error: {e}")
290
- return f"Error processing question: {question[:50]}..."
291
-
292
- def _is_reversed_text(self, text: str) -> bool:
293
- """Check if text contains reversed elements"""
294
- reversed_indicators = ["ecnetnes", "dnatsrednu", "uoy fi", "thgir ro tfel"]
295
- return any(indicator in text.lower() for indicator in reversed_indicators)
296
-
297
- def _handle_reversed_text(self, question: str) -> str:
298
- """Handle reversed text questions"""
299
- try:
300
- # Find the reversed part (usually before a comma or question mark)
301
- reversed_part = question.split(',')[0].split('?')[0]
302
- normal_text = text_reverser(reversed_part.strip())
303
-
304
- # Check if it asks about left or right
305
- if "left" in normal_text.lower():
306
- return "right"
307
- elif "right" in normal_text.lower():
308
- return "left"
309
-
310
- return normal_text
311
- except:
312
- return "Could not process reversed text"
313
-
314
- def _handle_botanical_question(self, question: str) -> str:
315
- """Handle botanical classification questions"""
316
- try:
317
- # Extract food list from question
318
- list_pattern = r'(?:list|items?).*?:(.*?)(?:\.|$)'
319
- match = re.search(list_pattern, question, re.IGNORECASE | re.DOTALL)
320
-
321
- if match:
322
- food_list = match.group(1)
323
- return botanical_classifier(food_list)
324
-
325
- # Fallback: common grocery items
326
- common_items = "milk, tomatoes, bread, lettuce, peppers, eggs, broccoli, cheese, eggplant, celery"
327
- return botanical_classifier(common_items)
328
-
329
- except:
330
- return "broccoli, celery, lettuce" # Safe fallback
331
-
332
- def _handle_youtube_question(self, question: str) -> str:
333
- """Handle YouTube video questions"""
334
- try:
335
- url_match = re.search(r'https://www\.youtube\.com/watch\?v=[^\s,?.]+', question)
336
- if url_match:
337
- return youtube_content_analyzer(url_match.group(0))
338
- return "No valid YouTube URL found"
339
- except:
340
- return "Could not analyze YouTube video"
341
 
342
  def run_and_submit_all(profile: gr.OAuthProfile | None):
343
- """Run evaluation and submit all answers"""
 
 
 
344
  space_id = os.getenv("SPACE_ID")
345
-
346
  if profile:
347
  username = f"{profile.username}"
348
  print(f"User logged in: {username}")
@@ -354,14 +49,15 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
354
  questions_url = f"{api_url}/questions"
355
  submit_url = f"{api_url}/submit"
356
 
357
- # Initialize Enhanced Agent
358
  try:
359
- agent = EnhancedGAIAAgent()
360
  except Exception as e:
361
- print(f"Agent initialization error: {e}")
362
  return f"Error initializing agent: {e}", None
363
 
364
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
 
365
 
366
  # Fetch Questions
367
  print(f"Fetching questions from: {questions_url}")
@@ -370,111 +66,129 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
370
  response.raise_for_status()
371
  questions_data = response.json()
372
  if not questions_data:
373
- return "No questions received from server.", None
 
374
  print(f"Fetched {len(questions_data)} questions.")
375
- except Exception as e:
376
  print(f"Error fetching questions: {e}")
377
  return f"Error fetching questions: {e}", None
 
 
 
 
 
 
 
378
 
379
- # Process Questions
380
  results_log = []
381
  answers_payload = []
382
- print(f"Processing {len(questions_data)} questions...")
383
-
384
- for i, item in enumerate(questions_data):
385
  task_id = item.get("task_id")
386
  question_text = item.get("question")
387
-
388
  if not task_id or question_text is None:
389
- print(f"Skipping invalid item: {item}")
390
  continue
391
-
392
- print(f"Question {i+1}/{len(questions_data)}: {task_id}")
393
-
394
  try:
395
- # Process with enhanced agent
396
- answer = agent(question_text)
397
-
398
- answers_payload.append({
399
- "task_id": task_id,
400
- "submitted_answer": str(answer)
401
- })
402
-
403
- results_log.append({
404
- "Task ID": task_id,
405
- "Question": question_text[:100] + "..." if len(question_text) > 100 else question_text,
406
- "Answer": str(answer)[:200] + "..." if len(str(answer)) > 200 else str(answer)
407
- })
408
-
409
- # Rate limiting
410
- time.sleep(0.5)
411
-
412
  except Exception as e:
413
- print(f"Error processing {task_id}: {e}")
414
- results_log.append({
415
- "Task ID": task_id,
416
- "Question": question_text[:100] + "...",
417
- "Answer": f"ERROR: {str(e)}"
418
- })
419
 
420
  if not answers_payload:
421
- return "No answers generated to submit.", pd.DataFrame(results_log)
 
422
 
423
- # Submit Results
424
  submission_data = {
425
  "username": username.strip(),
426
  "agent_code": agent_code,
427
  "answers": answers_payload
428
  }
429
-
430
- print(f"Submitting {len(answers_payload)} answers...")
 
 
 
431
  try:
432
- response = requests.post(submit_url, json=submission_data, timeout=120)
433
  response.raise_for_status()
434
  result_data = response.json()
435
-
436
  final_status = (
437
- f"Submission Successful!\n"
438
- f"User: {result_data.get('username', username)}\n"
439
- f"Score: {result_data.get('score', 'Unknown')}% "
440
  f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
441
- f"Message: {result_data.get('message', 'Submission completed')}"
442
  )
443
-
444
- print("Submission successful!")
445
- return final_status, pd.DataFrame(results_log)
446
-
 
 
 
 
 
 
 
 
 
 
 
 
447
  except Exception as e:
448
- error_msg = f" Submission Failed: {str(e)}"
449
- print(error_msg)
450
- return error_msg, pd.DataFrame(results_log)
 
 
 
 
 
 
 
 
 
 
 
 
 
451
 
452
- # --- Gradio Interface (Simple as requested) ---
453
- with gr.Blocks(title="GAIA Agent") as demo:
454
- gr.Markdown("# 🧠 Enhanced GAIA Benchmark Agent")
455
- gr.Markdown("**Improved agent with better reasoning and knowledge base**")
456
-
457
  gr.LoginButton()
458
-
459
- run_button = gr.Button("🚀 Run Evaluation & Submit", variant="primary", size="lg")
460
-
461
- status_output = gr.Textbox(label="Status", lines=5, interactive=False)
462
- results_table = gr.DataFrame(label="Results")
463
 
464
  run_button.click(
465
  fn=run_and_submit_all,
466
  outputs=[status_output, results_table]
467
  )
468
 
 
 
469
  if __name__ == "__main__":
470
- print("🚀 Starting Enhanced GAIA Agent...")
471
-
472
- # Environment check
473
- required_vars = ["SPACE_ID", "SERPER_API_KEY", "HUGGINGFACE_INFERENCE_TOKEN"]
474
- for var in required_vars:
475
- if os.getenv(var):
476
- print(f"✅ {var} found")
477
- else:
478
- print(f"⚠️ {var} missing")
479
 
480
- demo.launch(debug=True, share=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import os
2
  import gradio as gr
3
  import requests
4
+ import inspect
5
  import pandas as pd
 
 
 
 
 
 
 
 
 
6
 
7
  # --- Constants ---
8
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
9
 
10
+ # --- Basic Agent Definition ---
11
+ class BasicAgent:
12
+ def __init__(self):
13
+ from smolagents import CodeAgent, DuckDuckGoSearchTool, InferenceClientModel
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
 
15
+ # Initialize the search tool
16
+ search_tool = DuckDuckGoSearchTool()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
 
18
+ # Initialize the model
19
+ model = InferenceClientModel()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
 
21
+ # Create the agent
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
  self.agent = CodeAgent(
23
+ model=model,
24
+ tools=[search_tool],
25
  )
 
 
26
 
27
  def __call__(self, question: str) -> str:
28
+ print(f"Agent received question (first 50 chars): {question[:50]}...")
29
+ fixed_answer = "This is a default answer."
30
+ print(f"Agent returning fixed answer: {fixed_answer}")
31
+ return fixed_answer
32
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
 
34
  def run_and_submit_all(profile: gr.OAuthProfile | None):
35
+ """
36
+ Fetches all questions, runs the BasicAgent on them, submits all answers,
37
+ and displays the results.
38
+ """
39
  space_id = os.getenv("SPACE_ID")
40
+
41
  if profile:
42
  username = f"{profile.username}"
43
  print(f"User logged in: {username}")
 
49
  questions_url = f"{api_url}/questions"
50
  submit_url = f"{api_url}/submit"
51
 
52
+ # Instantiate Agent
53
  try:
54
+ agent = BasicAgent()
55
  except Exception as e:
56
+ print(f"Error instantiating agent: {e}")
57
  return f"Error initializing agent: {e}", None
58
 
59
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
60
+ print(agent_code)
61
 
62
  # Fetch Questions
63
  print(f"Fetching questions from: {questions_url}")
 
66
  response.raise_for_status()
67
  questions_data = response.json()
68
  if not questions_data:
69
+ print("Fetched questions list is empty.")
70
+ return "Fetched questions list is empty or invalid format.", None
71
  print(f"Fetched {len(questions_data)} questions.")
72
+ except requests.exceptions.RequestException as e:
73
  print(f"Error fetching questions: {e}")
74
  return f"Error fetching questions: {e}", None
75
+ except requests.exceptions.JSONDecodeError as e:
76
+ print(f"Error decoding JSON response: {e}")
77
+ print(f"Response text: {response.text[:500]}")
78
+ return f"Error decoding server response: {e}", None
79
+ except Exception as e:
80
+ print(f"Unexpected error: {e}")
81
+ return f"Unexpected error fetching questions: {e}", None
82
 
83
+ # Run Agent
84
  results_log = []
85
  answers_payload = []
86
+ print(f"Running agent on {len(questions_data)} questions...")
87
+ for item in questions_data:
 
88
  task_id = item.get("task_id")
89
  question_text = item.get("question")
 
90
  if not task_id or question_text is None:
91
+ print(f"Skipping item with missing task_id or question: {item}")
92
  continue
 
 
 
93
  try:
94
+ submitted_answer = agent(question_text)
95
+ answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
96
+ results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
97
  except Exception as e:
98
+ print(f"Error running agent on task {task_id}: {e}")
99
+ results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
 
 
 
 
100
 
101
  if not answers_payload:
102
+ print("Agent did not produce any answers to submit.")
103
+ return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
104
 
105
+ # Prepare Submission
106
  submission_data = {
107
  "username": username.strip(),
108
  "agent_code": agent_code,
109
  "answers": answers_payload
110
  }
111
+ status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
112
+ print(status_update)
113
+
114
+ # Submit Answers
115
+ print(f"Submitting to: {submit_url}")
116
  try:
117
+ response = requests.post(submit_url, json=submission_data, timeout=60)
118
  response.raise_for_status()
119
  result_data = response.json()
 
120
  final_status = (
121
+ f"Submission Successful!\n"
122
+ f"User: {result_data.get('username')}\n"
123
+ f"Overall Score: {result_data.get('score', 'N/A')}% "
124
  f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
125
+ f"Message: {result_data.get('message', 'No message received.')}"
126
  )
127
+ print("Submission successful.")
128
+ results_df = pd.DataFrame(results_log)
129
+ return final_status, results_df
130
+ except requests.exceptions.HTTPError as e:
131
+ error_detail = f"HTTP {e.response.status_code}: "
132
+ try:
133
+ error_json = e.response.json()
134
+ error_detail += f"{error_json.get('detail', e.response.text)}"
135
+ except:
136
+ error_detail += f"{e.response.text[:500]}"
137
+ print(error_detail)
138
+ return f"Submission Failed: {error_detail}", pd.DataFrame(results_log)
139
+ except requests.exceptions.Timeout:
140
+ return "Submission Failed: Request timed out.", pd.DataFrame(results_log)
141
+ except requests.exceptions.RequestException as e:
142
+ return f"Submission Failed: Network error - {e}", pd.DataFrame(results_log)
143
  except Exception as e:
144
+ return f"Unexpected error during submission: {e}", pd.DataFrame(results_log)
145
+
146
+
147
+ # --- Build Gradio Interface using Blocks ---
148
+ with gr.Blocks() as demo:
149
+ gr.Markdown("# Basic Agent Evaluation Runner")
150
+ gr.Markdown(
151
+ """
152
+ **Instructions:**
153
+ 1. Clone this space and modify the code to define your agent's logic, tools, and dependencies.
154
+ 2. Log in using the button below. Your Hugging Face username is required for submission.
155
+ 3. Click 'Run Evaluation & Submit All Answers' to test your agent and get a score.
156
+ ---
157
+ **Note:** The submission process may take time. You are encouraged to optimize your implementation.
158
+ """
159
+ )
160
 
 
 
 
 
 
161
  gr.LoginButton()
162
+ run_button = gr.Button("Run Evaluation & Submit All Answers")
163
+ status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
164
+ results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
 
 
165
 
166
  run_button.click(
167
  fn=run_and_submit_all,
168
  outputs=[status_output, results_table]
169
  )
170
 
171
+
172
+ # --- Entry Point ---
173
  if __name__ == "__main__":
174
+ print("\n" + "-" * 30 + " App Starting " + "-" * 30)
 
 
 
 
 
 
 
 
175
 
176
+ space_host = os.getenv("SPACE_HOST")
177
+ space_id = os.getenv("SPACE_ID")
178
+
179
+ if space_host:
180
+ print(f"✅ SPACE_HOST: {space_host}")
181
+ print(f" Runtime URL: https://{space_host}.hf.space")
182
+ else:
183
+ print("ℹ️ SPACE_HOST not found (running locally?).")
184
+
185
+ if space_id:
186
+ print(f"✅ SPACE_ID: {space_id}")
187
+ print(f" Repo URL: https://huggingface.co/spaces/{space_id}")
188
+ print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id}/tree/main")
189
+ else:
190
+ print("ℹ️ SPACE_ID not found (running locally?).")
191
+
192
+ print("-" * (60 + len(" App Starting ")) + "\n")
193
+ print("Launching Gradio Interface for Basic Agent Evaluation...")
194
+ demo.launch(debug=True, share=False)
txt.txt CHANGED
@@ -1,5 +1,6 @@
1
  "90f426e61bed9f1ffce51a95b98945531c35279a"
2
 
 
3
  import os
4
  import gradio as gr
5
  import requests
 
1
  "90f426e61bed9f1ffce51a95b98945531c35279a"
2
 
3
+
4
  import os
5
  import gradio as gr
6
  import requests