LamiaYT commited on
Commit
19b7914
Β·
1 Parent(s): 34105a6

fixing ver3

Browse files
Files changed (1) hide show
  1. app.py +136 -240
app.py CHANGED
@@ -27,22 +27,23 @@ os.environ["TOKENIZERS_PARALLELISM"] = "false"
27
  load_dotenv()
28
  SERPER_API_KEY = os.getenv("SERPER_API_KEY")
29
 
30
- # --- Balanced Constants ---
31
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
32
- MAX_STEPS = 4 # Reasonable steps
33
- MAX_TOKENS = 150 # Enough for reasoning
34
  MODEL_NAME = "microsoft/Phi-3-mini-4k-instruct"
35
- TIMEOUT_PER_QUESTION = 25 # 25 seconds - enough time
36
- MAX_CONTEXT = 1500 # Reasonable context
37
 
38
  # --- Configure Environment ---
39
  os.environ["PIP_BREAK_SYSTEM_PACKAGES"] = "1"
40
  os.environ["HF_HUB_DISABLE_SYMLINKS_WARNING"] = "1"
41
  os.environ["BITSANDBYTES_NOWELCOME"] = "1"
42
 
43
- print("Loading model (BALANCED FAST mode)...")
44
  start_time = time.time()
45
 
 
46
  model = AutoModelForCausalLM.from_pretrained(
47
  MODEL_NAME,
48
  trust_remote_code=True,
@@ -55,83 +56,80 @@ model = AutoModelForCausalLM.from_pretrained(
55
  tokenizer = AutoTokenizer.from_pretrained(
56
  MODEL_NAME,
57
  use_fast=True,
58
- trust_remote_code=True
 
59
  )
60
 
61
  if tokenizer.pad_token is None:
62
  tokenizer.pad_token = tokenizer.eos_token
63
 
 
 
 
 
 
 
 
 
 
 
 
64
  load_time = time.time() - start_time
65
  print(f"Model loaded in {load_time:.2f} seconds")
66
 
67
- # --- Reliable Tools ---
68
  def web_search(query: str) -> str:
69
- """Fast but reliable web search"""
70
  try:
71
  if SERPER_API_KEY:
72
- params = {'q': query[:150], 'num': 2}
73
  headers = {'X-API-KEY': SERPER_API_KEY, 'Content-Type': 'application/json'}
74
  response = requests.post(
75
  'https://google.serper.dev/search',
76
  headers=headers,
77
  json=params,
78
- timeout=8
79
  )
80
  results = response.json()
81
  if 'organic' in results and results['organic']:
82
- output = []
83
- for r in results['organic'][:2]:
84
- output.append(f"{r['title']}: {r['snippet']}")
85
- return " | ".join(output)
86
- return "No search results found"
87
  else:
88
  with DDGS() as ddgs:
89
- results = []
90
- for r in ddgs.text(query, max_results=2):
91
- results.append(f"{r['title']}: {r['body'][:200]}")
92
- return " | ".join(results) if results else "No search results"
93
- except Exception as e:
94
- return f"Search failed: {str(e)}"
95
 
96
  def calculator(expression: str) -> str:
97
- """Reliable calculator"""
98
  try:
99
- # Clean the expression but keep more characters
100
- clean_expr = re.sub(r'[^0-9+\-*/().\s]', '', str(expression))
101
  if not clean_expr.strip():
102
- return "Invalid mathematical expression"
103
-
104
- # Use numexpr for safety
105
- result = numexpr.evaluate(clean_expr)
106
  return str(float(result))
107
- except Exception as e:
108
- return f"Calculation error: {str(e)}"
109
 
110
  def read_pdf(file_path: str) -> str:
111
- """PDF reader with better error handling"""
112
  try:
113
  text = extract_text(file_path)
114
- if text:
115
- return text[:800] # More text for context
116
- return "No text could be extracted from PDF"
117
- except Exception as e:
118
- return f"PDF reading error: {str(e)}"
119
 
120
  def read_webpage(url: str) -> str:
121
- """Reliable webpage reader"""
122
  try:
123
- headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'}
124
- response = requests.get(url, timeout=8, headers=headers)
125
- response.raise_for_status()
126
-
127
  soup = BeautifulSoup(response.text, 'html.parser')
128
- for script in soup(["script", "style"]):
129
- script.decompose()
130
-
131
  text = soup.get_text(separator=' ', strip=True)
132
- return text[:800] if text else "No content found on webpage"
133
- except Exception as e:
134
- return f"Webpage error: {str(e)}"
135
 
136
  TOOLS = {
137
  "web_search": web_search,
@@ -140,74 +138,55 @@ TOOLS = {
140
  "read_webpage": read_webpage
141
  }
142
 
143
- # --- Balanced GAIA Agent ---
144
- class BalancedGAIA_Agent:
145
  def __init__(self):
146
  self.tools = TOOLS
147
- self.system_prompt = (
148
- "You are a GAIA problem solver. Available tools: web_search, calculator, read_pdf, read_webpage.\n"
149
- "Think step by step and use tools when needed.\n\n"
150
- "Tool usage format:\n"
151
- "```json\n{\"tool\": \"tool_name\", \"args\": {\"parameter\": \"value\"}}\n```\n\n"
152
- "Always end with: Final Answer: [your exact answer]\n\n"
153
- "Example:\n"
154
- "Question: What is 15 * 23?\n"
155
- "I need to calculate 15 * 23.\n"
156
- "```json\n{\"tool\": \"calculator\", \"args\": {\"expression\": \"15 * 23\"}}\n```\n"
157
- "Final Answer: 345"
158
  )
159
 
160
  def __call__(self, question: str) -> str:
161
  start_time = time.time()
162
- print(f"πŸ€” Solving: {question[:60]}...")
163
 
164
  try:
165
- conversation = [f"Question: {question}"]
166
 
167
  for step in range(MAX_STEPS):
168
- # Check timeout but be more generous
169
  if time.time() - start_time > TIMEOUT_PER_QUESTION:
170
- print(f"⏰ Timeout after {TIMEOUT_PER_QUESTION}s")
171
- return "TIMEOUT: Question took too long to solve"
172
 
173
- # Generate response
174
- response = self._generate_response(conversation)
175
- print(f"Step {step+1}: {response[:80]}...")
176
 
177
- # Check for final answer
178
  if "Final Answer:" in response:
179
- answer = self._extract_final_answer(response)
180
- elapsed = time.time() - start_time
181
- print(f"βœ… Solved in {elapsed:.1f}s: {answer[:50]}...")
182
- return answer
183
 
184
- # Try to use tools
185
- tool_result = self._execute_tools(response)
186
  if tool_result:
187
- conversation.append(f"Tool used: {tool_result}")
188
- print(f"πŸ”§ Tool result: {tool_result[:60]}...")
189
  else:
190
- conversation.append(f"Reasoning: {response}")
191
 
192
- # Keep conversation manageable
193
- if len(" ".join(conversation)) > 1200:
194
- conversation = conversation[-3:] # Keep last 3 entries
195
 
196
- print("❌ No solution found within step limit")
197
- return "Could not solve within step limit"
198
 
199
  except Exception as e:
200
- print(f"πŸ’₯ Agent error: {str(e)}")
201
- return f"Agent error: {str(e)}"
202
 
203
- def _generate_response(self, conversation: List[str]) -> str:
204
  try:
205
- # Build prompt
206
- prompt = f"<|system|>\n{self.system_prompt}<|end|>\n"
207
- prompt += f"<|user|>\n{chr(10).join(conversation)}<|end|>\n"
208
- prompt += "<|assistant|>"
209
 
210
- # Tokenize
211
  inputs = tokenizer(
212
  prompt,
213
  return_tensors="pt",
@@ -216,108 +195,72 @@ class BalancedGAIA_Agent:
216
  padding=False
217
  )
218
 
219
- # Generate
220
- generation_config = GenerationConfig(
221
- max_new_tokens=MAX_TOKENS,
222
- temperature=0.2, # Lower temperature for more focused responses
223
- do_sample=True,
224
- pad_token_id=tokenizer.pad_token_id,
225
- eos_token_id=tokenizer.eos_token_id,
226
- use_cache=False
227
- )
228
-
229
  with torch.no_grad():
230
  outputs = model.generate(
231
  inputs.input_ids,
232
- generation_config=generation_config,
233
  attention_mask=inputs.attention_mask
234
  )
235
 
236
- # Decode
237
- full_response = tokenizer.decode(outputs[0], skip_special_tokens=True)
238
- response = full_response.split("<|assistant|>")[-1].strip()
239
 
240
- # Cleanup
241
  del inputs, outputs
242
  gc.collect()
243
 
244
  return response
245
 
246
  except Exception as e:
247
- return f"Generation error: {str(e)}"
248
 
249
- def _extract_final_answer(self, text: str) -> str:
250
- """Extract the final answer more reliably"""
251
  try:
252
- if "Final Answer:" in text:
253
- answer_part = text.split("Final Answer:")[-1].strip()
254
- # Take first line of the answer
255
- answer = answer_part.split('\n')[0].strip()
256
- return answer if answer else "No answer provided"
257
- return "No final answer found"
258
- except:
259
- return "Answer extraction failed"
260
-
261
- def _execute_tools(self, text: str) -> str:
262
- """Execute tools found in the response"""
263
- try:
264
- # Look for JSON tool calls
265
- json_pattern = r'```json\s*(\{[^}]*\})\s*```'
266
- matches = re.findall(json_pattern, text, re.DOTALL)
267
-
268
- for match in matches:
269
- try:
270
- tool_call = json.loads(match)
271
- tool_name = tool_call.get("tool")
272
- args = tool_call.get("args", {})
273
-
274
- if tool_name in self.tools:
275
- print(f"πŸ”§ Executing {tool_name} with {args}")
276
- result = self.tools[tool_name](**args)
277
- return f"{tool_name}: {str(result)[:400]}"
278
-
279
- except json.JSONDecodeError:
280
- continue
281
- except Exception as e:
282
- return f"Tool execution error: {str(e)}"
283
 
284
- return None
 
 
285
 
286
- except Exception as e:
287
- return f"Tool parsing error: {str(e)}"
 
288
 
289
- # --- Efficient Runner ---
290
  def run_and_submit_all(profile: gr.OAuthProfile | None):
291
  if not profile:
292
- return "❌ Please login to Hugging Face first", None
293
 
294
  username = profile.username
295
- print(f"πŸš€ Starting evaluation for user: {username}")
296
-
297
- # Initialize agent
298
- try:
299
- agent = BalancedGAIA_Agent()
300
- except Exception as e:
301
- return f"❌ Failed to initialize agent: {e}", None
302
 
303
- # Setup
 
304
  api_url = DEFAULT_API_URL
305
  space_id = os.getenv("SPACE_ID", "unknown")
306
 
307
- # Fetch questions
 
 
308
  try:
309
- print("πŸ“₯ Fetching questions...")
310
- response = requests.get(f"{api_url}/questions", timeout=15)
311
- response.raise_for_status()
312
  questions = response.json()
313
- print(f"πŸ“ Retrieved {len(questions)} questions")
314
  except Exception as e:
315
- return f"❌ Failed to fetch questions: {e}", None
316
 
317
- # Process questions
318
  results = []
319
  answers = []
320
- total_start = time.time()
321
 
322
  for i, item in enumerate(questions):
323
  task_id = item.get("task_id")
@@ -326,125 +269,78 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
326
  if not task_id:
327
  continue
328
 
329
- print(f"\nπŸ“‹ [{i+1}/{len(questions)}] Task: {task_id}")
330
 
331
  try:
332
  answer = agent(question)
333
  answers.append({"task_id": task_id, "submitted_answer": answer})
334
-
335
- # Truncate for display
336
- q_display = question[:80] + "..." if len(question) > 80 else question
337
- a_display = answer[:100] + "..." if len(answer) > 100 else answer
338
-
339
  results.append({
340
- "Task": task_id[:8] + "...",
341
- "Question": q_display,
342
- "Answer": a_display,
343
- "Status": "βœ…" if "error" not in answer.lower() and "timeout" not in answer.lower() else "❌"
344
  })
345
-
346
  except Exception as e:
347
- error_answer = f"PROCESSING_ERROR: {str(e)}"
348
- answers.append({"task_id": task_id, "submitted_answer": error_answer})
349
  results.append({
350
- "Task": task_id[:8] + "...",
351
- "Question": question[:80] + "..." if len(question) > 80 else question,
352
- "Answer": error_answer,
353
- "Status": "πŸ’₯"
354
  })
355
 
356
- # Memory cleanup
357
- if i % 3 == 0:
358
  gc.collect()
359
 
360
- total_time = time.time() - total_start
361
- avg_time = total_time / len(questions)
362
- print(f"\n⏱️ Total processing time: {total_time:.1f}s ({avg_time:.1f}s per question)")
363
 
364
  # Submit results
365
  try:
366
- print("πŸ“€ Submitting results...")
367
  submission = {
368
  "username": username,
369
  "agent_code": f"https://huggingface.co/spaces/{space_id}/tree/main",
370
  "answers": answers
371
  }
372
 
373
- response = requests.post(f"{api_url}/submit", json=submission, timeout=60)
374
- response.raise_for_status()
375
  result = response.json()
376
 
377
- # Calculate success rate
378
- successful = sum(1 for r in results if r["Status"] == "βœ…")
379
- success_rate = (successful / len(results)) * 100
380
-
381
  status = (
382
- f"🎯 EVALUATION COMPLETED\n"
383
  f"πŸ‘€ User: {result.get('username', username)}\n"
384
  f"πŸ“Š Score: {result.get('score', 'N/A')}% "
385
- f"({result.get('correct_count', '?')}/{result.get('total_attempted', '?')} correct)\n"
386
- f"⚑ Processing: {total_time:.1f}s total, {avg_time:.1f}s/question\n"
387
- f"βœ… Success Rate: {success_rate:.1f}% ({successful}/{len(results)} processed)\n"
388
- f"πŸ’¬ Message: {result.get('message', 'Evaluation completed!')}"
389
  )
390
 
391
  return status, pd.DataFrame(results)
392
 
393
  except Exception as e:
394
- error_status = (
395
- f"❌ SUBMISSION FAILED\n"
396
- f"Error: {str(e)}\n"
397
- f"⏱️ Processing completed in {total_time:.1f}s\n"
398
- f"βœ… Questions processed: {len(results)}"
399
- )
400
  return error_status, pd.DataFrame(results)
401
 
402
- # --- Clean UI ---
403
- with gr.Blocks(title="GAIA Agent - Balanced Fast") as demo:
404
- gr.Markdown("# ⚑ GAIA Agent - Balanced Fast Mode")
405
- gr.Markdown(
406
- """
407
- **Optimized for reliability and speed:**
408
- - 4 reasoning steps max
409
- - 25 second timeout per question
410
- - 150 token responses
411
- - Enhanced error handling
412
- """
413
- )
414
 
415
- with gr.Row():
416
- gr.LoginButton()
417
 
418
- with gr.Row():
419
- run_btn = gr.Button("οΏ½οΏ½οΏ½ Run Balanced Evaluation", variant="primary", size="lg")
420
 
421
- with gr.Row():
422
- status = gr.Textbox(
423
- label="πŸ“Š Evaluation Status & Results",
424
- lines=8,
425
- interactive=False,
426
- placeholder="Ready to run evaluation. Please login first."
427
- )
428
-
429
- with gr.Row():
430
- table = gr.DataFrame(
431
- label="πŸ“‹ Question Results",
432
- interactive=False,
433
- wrap=True
434
- )
435
 
436
- run_btn.click(
437
- fn=run_and_submit_all,
438
- outputs=[status, table],
439
- show_progress=True
440
- )
441
 
442
  if __name__ == "__main__":
443
- print("⚑ GAIA Agent - Balanced Fast Mode Starting...")
444
- print(f"βš™οΈ Settings: {MAX_STEPS} steps, {MAX_TOKENS} tokens, {TIMEOUT_PER_QUESTION}s timeout")
445
 
446
  demo.launch(
447
- share=True,
448
  server_name="0.0.0.0",
449
  server_port=7860,
450
  debug=False,
 
27
  load_dotenv()
28
  SERPER_API_KEY = os.getenv("SERPER_API_KEY")
29
 
30
+ # --- Constants (ULTRA FAST MODE) ---
31
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
32
+ MAX_STEPS = 5 # Reduced to 3
33
+ MAX_TOKENS = 100 # Very short responses
34
  MODEL_NAME = "microsoft/Phi-3-mini-4k-instruct"
35
+ TIMEOUT_PER_QUESTION = 20 # 15 seconds max
36
+ MAX_CONTEXT = 1024 # Very short context
37
 
38
  # --- Configure Environment ---
39
  os.environ["PIP_BREAK_SYSTEM_PACKAGES"] = "1"
40
  os.environ["HF_HUB_DISABLE_SYMLINKS_WARNING"] = "1"
41
  os.environ["BITSANDBYTES_NOWELCOME"] = "1"
42
 
43
+ print("Loading model (ULTRA FAST mode)...")
44
  start_time = time.time()
45
 
46
+ # Minimal model loading
47
  model = AutoModelForCausalLM.from_pretrained(
48
  MODEL_NAME,
49
  trust_remote_code=True,
 
56
  tokenizer = AutoTokenizer.from_pretrained(
57
  MODEL_NAME,
58
  use_fast=True,
59
+ trust_remote_code=True,
60
+ padding_side="left"
61
  )
62
 
63
  if tokenizer.pad_token is None:
64
  tokenizer.pad_token = tokenizer.eos_token
65
 
66
+ # Pre-compile generation config
67
+ GENERATION_CONFIG = GenerationConfig(
68
+ max_new_tokens=MAX_TOKENS,
69
+ temperature=0.3,
70
+ do_sample=True,
71
+ pad_token_id=tokenizer.pad_token_id,
72
+ eos_token_id=tokenizer.eos_token_id,
73
+ use_cache=False,
74
+ repetition_penalty=1.1
75
+ )
76
+
77
  load_time = time.time() - start_time
78
  print(f"Model loaded in {load_time:.2f} seconds")
79
 
80
+ # --- Lightning Fast Tools ---
81
  def web_search(query: str) -> str:
82
+ """Ultra-fast web search"""
83
  try:
84
  if SERPER_API_KEY:
85
+ params = {'q': query[:100], 'num': 1} # Single result
86
  headers = {'X-API-KEY': SERPER_API_KEY, 'Content-Type': 'application/json'}
87
  response = requests.post(
88
  'https://google.serper.dev/search',
89
  headers=headers,
90
  json=params,
91
+ timeout=3
92
  )
93
  results = response.json()
94
  if 'organic' in results and results['organic']:
95
+ return f"{results['organic'][0]['title']}: {results['organic'][0]['snippet'][:200]}"
96
+ return "No results"
 
 
 
97
  else:
98
  with DDGS() as ddgs:
99
+ for result in ddgs.text(query, max_results=1):
100
+ return f"{result['title']}: {result['body'][:200]}"
101
+ return "No results"
102
+ except:
103
+ return "Search failed"
 
104
 
105
  def calculator(expression: str) -> str:
106
+ """Lightning calculator"""
107
  try:
108
+ clean_expr = re.sub(r'[^\d+\-*/().\s]', '', str(expression))
 
109
  if not clean_expr.strip():
110
+ return "Invalid expression"
111
+ result = eval(clean_expr) # Simple eval for speed
 
 
112
  return str(float(result))
113
+ except:
114
+ return "Calc error"
115
 
116
  def read_pdf(file_path: str) -> str:
117
+ """Fast PDF reader"""
118
  try:
119
  text = extract_text(file_path)
120
+ return text[:500] if text else "No PDF text"
121
+ except:
122
+ return "PDF error"
 
 
123
 
124
  def read_webpage(url: str) -> str:
125
+ """Fast webpage reader"""
126
  try:
127
+ response = requests.get(url, timeout=3, headers={'User-Agent': 'Bot'})
 
 
 
128
  soup = BeautifulSoup(response.text, 'html.parser')
 
 
 
129
  text = soup.get_text(separator=' ', strip=True)
130
+ return text[:500] if text else "No webpage text"
131
+ except:
132
+ return "Webpage error"
133
 
134
  TOOLS = {
135
  "web_search": web_search,
 
138
  "read_webpage": read_webpage
139
  }
140
 
141
+ # --- Ultra Fast Agent ---
142
+ class FastGAIA_Agent:
143
  def __init__(self):
144
  self.tools = TOOLS
145
+ self.prompt_template = (
146
+ "<|system|>You solve GAIA questions fast. Tools: web_search, calculator, read_pdf, read_webpage.\n"
147
+ "Format: ```json\n{\"tool\": \"name\", \"args\": {\"key\": \"value\"}}```\n"
148
+ "Always end with: Final Answer: [answer]<|end|>\n"
149
+ "<|user|>{history}<|end|>\n<|assistant|>"
 
 
 
 
 
 
150
  )
151
 
152
  def __call__(self, question: str) -> str:
153
  start_time = time.time()
 
154
 
155
  try:
156
+ history = f"Question: {question}"
157
 
158
  for step in range(MAX_STEPS):
 
159
  if time.time() - start_time > TIMEOUT_PER_QUESTION:
160
+ return "TIMEOUT"
 
161
 
162
+ response = self._fast_generate(history)
 
 
163
 
164
+ # Quick final answer check
165
  if "Final Answer:" in response:
166
+ answer = response.split("Final Answer:")[-1].strip().split('\n')[0]
167
+ return answer[:200] # Limit answer length
 
 
168
 
169
+ # Quick tool parsing
170
+ tool_result = self._quick_tool_use(response)
171
  if tool_result:
172
+ history += f"\nAction: {tool_result}"
 
173
  else:
174
+ history += f"\nThought: {response[:100]}"
175
 
176
+ # Keep history short
177
+ if len(history) > 800:
178
+ history = history[-800:]
179
 
180
+ return "No solution found"
 
181
 
182
  except Exception as e:
183
+ return f"Error: {str(e)[:50]}"
 
184
 
185
+ def _fast_generate(self, history: str) -> str:
186
  try:
187
+ prompt = self.prompt_template.format(history=history)
 
 
 
188
 
189
+ # Fast tokenization
190
  inputs = tokenizer(
191
  prompt,
192
  return_tensors="pt",
 
195
  padding=False
196
  )
197
 
198
+ # Fast generation
 
 
 
 
 
 
 
 
 
199
  with torch.no_grad():
200
  outputs = model.generate(
201
  inputs.input_ids,
202
+ generation_config=GENERATION_CONFIG,
203
  attention_mask=inputs.attention_mask
204
  )
205
 
206
+ # Fast decoding
207
+ response = tokenizer.decode(outputs[0], skip_special_tokens=True)
208
+ response = response.split("<|assistant|>")[-1].strip()
209
 
210
+ # Immediate cleanup
211
  del inputs, outputs
212
  gc.collect()
213
 
214
  return response
215
 
216
  except Exception as e:
217
+ return f"Gen error: {str(e)}"
218
 
219
+ def _quick_tool_use(self, text: str) -> str:
 
220
  try:
221
+ # Quick JSON extraction
222
+ json_match = re.search(r'```json\s*({[^}]*})\s*```', text)
223
+ if not json_match:
224
+ return ""
225
+
226
+ tool_data = json.loads(json_match.group(1))
227
+ tool_name = tool_data.get("tool", "")
228
+ args = tool_data.get("args", {})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
229
 
230
+ if tool_name in self.tools:
231
+ result = self.tools[tool_name](**args)
232
+ return f"Used {tool_name}: {str(result)[:150]}"
233
 
234
+ except:
235
+ pass
236
+ return ""
237
 
238
+ # --- Lightning Fast Runner ---
239
  def run_and_submit_all(profile: gr.OAuthProfile | None):
240
  if not profile:
241
+ return "❌ Please login first", None
242
 
243
  username = profile.username
 
 
 
 
 
 
 
244
 
245
+ # Quick setup
246
+ agent = FastGAIA_Agent()
247
  api_url = DEFAULT_API_URL
248
  space_id = os.getenv("SPACE_ID", "unknown")
249
 
250
+ print(f"πŸš€ ULTRA FAST mode - User: {username}")
251
+
252
+ # Fetch questions quickly
253
  try:
254
+ response = requests.get(f"{api_url}/questions", timeout=10)
 
 
255
  questions = response.json()
256
+ print(f"πŸ“ Got {len(questions)} questions")
257
  except Exception as e:
258
+ return f"❌ Failed to get questions: {e}", None
259
 
260
+ # Process at lightning speed
261
  results = []
262
  answers = []
263
+ start_time = time.time()
264
 
265
  for i, item in enumerate(questions):
266
  task_id = item.get("task_id")
 
269
  if not task_id:
270
  continue
271
 
272
+ print(f"⚑ [{i+1}/{len(questions)}] {task_id[:8]}...")
273
 
274
  try:
275
  answer = agent(question)
276
  answers.append({"task_id": task_id, "submitted_answer": answer})
 
 
 
 
 
277
  results.append({
278
+ "ID": task_id[:8],
279
+ "Question": question[:60] + "...",
280
+ "Answer": answer[:80] + "..." if len(answer) > 80 else answer
 
281
  })
 
282
  except Exception as e:
283
+ error_ans = f"ERROR: {str(e)[:30]}"
284
+ answers.append({"task_id": task_id, "submitted_answer": error_ans})
285
  results.append({
286
+ "ID": task_id[:8],
287
+ "Question": question[:60] + "...",
288
+ "Answer": error_ans
 
289
  })
290
 
291
+ # Quick memory cleanup
292
+ if i % 5 == 0:
293
  gc.collect()
294
 
295
+ total_time = time.time() - start_time
296
+ print(f"⏱️ Completed in {total_time:.1f}s ({total_time/len(questions):.1f}s per question)")
 
297
 
298
  # Submit results
299
  try:
 
300
  submission = {
301
  "username": username,
302
  "agent_code": f"https://huggingface.co/spaces/{space_id}/tree/main",
303
  "answers": answers
304
  }
305
 
306
+ response = requests.post(f"{api_url}/submit", json=submission, timeout=30)
 
307
  result = response.json()
308
 
 
 
 
 
309
  status = (
310
+ f"🎯 ULTRA FAST RESULTS\n"
311
  f"πŸ‘€ User: {result.get('username', username)}\n"
312
  f"πŸ“Š Score: {result.get('score', 'N/A')}% "
313
+ f"({result.get('correct_count', '?')}/{result.get('total_attempted', '?')})\n"
314
+ f"⏱️ Time: {total_time:.1f}s ({total_time/len(questions):.1f}s/question)\n"
315
+ f"πŸ’¬ {result.get('message', 'Completed!')}"
 
316
  )
317
 
318
  return status, pd.DataFrame(results)
319
 
320
  except Exception as e:
321
+ error_status = f"❌ Submission failed: {str(e)}\n⏱️ Processing time: {total_time:.1f}s"
 
 
 
 
 
322
  return error_status, pd.DataFrame(results)
323
 
324
+ # --- Ultra Simple UI ---
325
+ with gr.Blocks(title="GAIA Agent - ULTRA FAST") as demo:
326
+ gr.Markdown("# ⚑ GAIA Agent - ULTRA FAST MODE")
327
+ gr.Markdown("**Speed settings:** 3 steps max β€’ 64 tokens β€’ 15s timeout β€’ Lightning tools")
 
 
 
 
 
 
 
 
328
 
329
+ gr.LoginButton()
 
330
 
331
+ run_btn = gr.Button("πŸš€ RUN ULTRA FAST", variant="primary", size="lg")
 
332
 
333
+ status = gr.Textbox(label="πŸ“Š Results", lines=6, interactive=False)
334
+ table = gr.DataFrame(label="πŸ“‹ Answers", interactive=False)
 
 
 
 
 
 
 
 
 
 
 
 
335
 
336
+ run_btn.click(run_and_submit_all, outputs=[status, table], show_progress=True)
 
 
 
 
337
 
338
  if __name__ == "__main__":
339
+ print("⚑ ULTRA FAST GAIA Agent Starting...")
340
+ print(f"βš™οΈ {MAX_STEPS} steps, {MAX_TOKENS} tokens, {TIMEOUT_PER_QUESTION}s timeout")
341
 
342
  demo.launch(
343
+ share=True, # Added share=True for public link
344
  server_name="0.0.0.0",
345
  server_port=7860,
346
  debug=False,