LamiaYT commited on
Commit
34105a6
Β·
1 Parent(s): d3c0517

fixing ver3

Browse files
Files changed (1) hide show
  1. app.py +240 -136
app.py CHANGED
@@ -27,23 +27,22 @@ os.environ["TOKENIZERS_PARALLELISM"] = "false"
27
  load_dotenv()
28
  SERPER_API_KEY = os.getenv("SERPER_API_KEY")
29
 
30
- # --- Constants (ULTRA FAST MODE) ---
31
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
32
- MAX_STEPS = 3 # Reduced to 3
33
- MAX_TOKENS = 64 # Very short responses
34
  MODEL_NAME = "microsoft/Phi-3-mini-4k-instruct"
35
- TIMEOUT_PER_QUESTION = 15 # 15 seconds max
36
- MAX_CONTEXT = 1024 # Very short context
37
 
38
  # --- Configure Environment ---
39
  os.environ["PIP_BREAK_SYSTEM_PACKAGES"] = "1"
40
  os.environ["HF_HUB_DISABLE_SYMLINKS_WARNING"] = "1"
41
  os.environ["BITSANDBYTES_NOWELCOME"] = "1"
42
 
43
- print("Loading model (ULTRA FAST mode)...")
44
  start_time = time.time()
45
 
46
- # Minimal model loading
47
  model = AutoModelForCausalLM.from_pretrained(
48
  MODEL_NAME,
49
  trust_remote_code=True,
@@ -56,80 +55,83 @@ model = AutoModelForCausalLM.from_pretrained(
56
  tokenizer = AutoTokenizer.from_pretrained(
57
  MODEL_NAME,
58
  use_fast=True,
59
- trust_remote_code=True,
60
- padding_side="left"
61
  )
62
 
63
  if tokenizer.pad_token is None:
64
  tokenizer.pad_token = tokenizer.eos_token
65
 
66
- # Pre-compile generation config
67
- GENERATION_CONFIG = GenerationConfig(
68
- max_new_tokens=MAX_TOKENS,
69
- temperature=0.3,
70
- do_sample=True,
71
- pad_token_id=tokenizer.pad_token_id,
72
- eos_token_id=tokenizer.eos_token_id,
73
- use_cache=False,
74
- repetition_penalty=1.1
75
- )
76
-
77
  load_time = time.time() - start_time
78
  print(f"Model loaded in {load_time:.2f} seconds")
79
 
80
- # --- Lightning Fast Tools ---
81
  def web_search(query: str) -> str:
82
- """Ultra-fast web search"""
83
  try:
84
  if SERPER_API_KEY:
85
- params = {'q': query[:100], 'num': 1} # Single result
86
  headers = {'X-API-KEY': SERPER_API_KEY, 'Content-Type': 'application/json'}
87
  response = requests.post(
88
  'https://google.serper.dev/search',
89
  headers=headers,
90
  json=params,
91
- timeout=3
92
  )
93
  results = response.json()
94
  if 'organic' in results and results['organic']:
95
- return f"{results['organic'][0]['title']}: {results['organic'][0]['snippet'][:200]}"
96
- return "No results"
 
 
 
97
  else:
98
  with DDGS() as ddgs:
99
- for result in ddgs.text(query, max_results=1):
100
- return f"{result['title']}: {result['body'][:200]}"
101
- return "No results"
102
- except:
103
- return "Search failed"
 
104
 
105
  def calculator(expression: str) -> str:
106
- """Lightning calculator"""
107
  try:
108
- clean_expr = re.sub(r'[^\d+\-*/().\s]', '', str(expression))
 
109
  if not clean_expr.strip():
110
- return "Invalid expression"
111
- result = eval(clean_expr) # Simple eval for speed
 
 
112
  return str(float(result))
113
- except:
114
- return "Calc error"
115
 
116
  def read_pdf(file_path: str) -> str:
117
- """Fast PDF reader"""
118
  try:
119
  text = extract_text(file_path)
120
- return text[:500] if text else "No PDF text"
121
- except:
122
- return "PDF error"
 
 
123
 
124
  def read_webpage(url: str) -> str:
125
- """Fast webpage reader"""
126
  try:
127
- response = requests.get(url, timeout=3, headers={'User-Agent': 'Bot'})
 
 
 
128
  soup = BeautifulSoup(response.text, 'html.parser')
 
 
 
129
  text = soup.get_text(separator=' ', strip=True)
130
- return text[:500] if text else "No webpage text"
131
- except:
132
- return "Webpage error"
133
 
134
  TOOLS = {
135
  "web_search": web_search,
@@ -138,55 +140,74 @@ TOOLS = {
138
  "read_webpage": read_webpage
139
  }
140
 
141
- # --- Ultra Fast Agent ---
142
- class FastGAIA_Agent:
143
  def __init__(self):
144
  self.tools = TOOLS
145
- self.prompt_template = (
146
- "<|system|>You solve GAIA questions fast. Tools: web_search, calculator, read_pdf, read_webpage.\n"
147
- "Format: ```json\n{\"tool\": \"name\", \"args\": {\"key\": \"value\"}}```\n"
148
- "Always end with: Final Answer: [answer]<|end|>\n"
149
- "<|user|>{history}<|end|>\n<|assistant|>"
 
 
 
 
 
 
150
  )
151
 
152
  def __call__(self, question: str) -> str:
153
  start_time = time.time()
 
154
 
155
  try:
156
- history = f"Question: {question}"
157
 
158
  for step in range(MAX_STEPS):
 
159
  if time.time() - start_time > TIMEOUT_PER_QUESTION:
160
- return "TIMEOUT"
 
161
 
162
- response = self._fast_generate(history)
 
 
163
 
164
- # Quick final answer check
165
  if "Final Answer:" in response:
166
- answer = response.split("Final Answer:")[-1].strip().split('\n')[0]
167
- return answer[:200] # Limit answer length
 
 
168
 
169
- # Quick tool parsing
170
- tool_result = self._quick_tool_use(response)
171
  if tool_result:
172
- history += f"\nAction: {tool_result}"
 
173
  else:
174
- history += f"\nThought: {response[:100]}"
175
 
176
- # Keep history short
177
- if len(history) > 800:
178
- history = history[-800:]
179
 
180
- return "No solution found"
 
181
 
182
  except Exception as e:
183
- return f"Error: {str(e)[:50]}"
 
184
 
185
- def _fast_generate(self, history: str) -> str:
186
  try:
187
- prompt = self.prompt_template.format(history=history)
 
 
 
188
 
189
- # Fast tokenization
190
  inputs = tokenizer(
191
  prompt,
192
  return_tensors="pt",
@@ -195,72 +216,108 @@ class FastGAIA_Agent:
195
  padding=False
196
  )
197
 
198
- # Fast generation
 
 
 
 
 
 
 
 
 
199
  with torch.no_grad():
200
  outputs = model.generate(
201
  inputs.input_ids,
202
- generation_config=GENERATION_CONFIG,
203
  attention_mask=inputs.attention_mask
204
  )
205
 
206
- # Fast decoding
207
- response = tokenizer.decode(outputs[0], skip_special_tokens=True)
208
- response = response.split("<|assistant|>")[-1].strip()
209
 
210
- # Immediate cleanup
211
  del inputs, outputs
212
  gc.collect()
213
 
214
  return response
215
 
216
  except Exception as e:
217
- return f"Gen error: {str(e)}"
218
 
219
- def _quick_tool_use(self, text: str) -> str:
 
220
  try:
221
- # Quick JSON extraction
222
- json_match = re.search(r'```json\s*({[^}]*})\s*```', text)
223
- if not json_match:
224
- return ""
225
-
226
- tool_data = json.loads(json_match.group(1))
227
- tool_name = tool_data.get("tool", "")
228
- args = tool_data.get("args", {})
 
 
 
 
 
 
 
229
 
230
- if tool_name in self.tools:
231
- result = self.tools[tool_name](**args)
232
- return f"Used {tool_name}: {str(result)[:150]}"
 
 
 
 
 
 
 
 
 
 
 
 
233
 
234
- except:
235
- pass
236
- return ""
 
237
 
238
- # --- Lightning Fast Runner ---
239
  def run_and_submit_all(profile: gr.OAuthProfile | None):
240
  if not profile:
241
- return "❌ Please login first", None
242
 
243
  username = profile.username
 
 
 
 
 
 
 
244
 
245
- # Quick setup
246
- agent = FastGAIA_Agent()
247
  api_url = DEFAULT_API_URL
248
  space_id = os.getenv("SPACE_ID", "unknown")
249
 
250
- print(f"πŸš€ ULTRA FAST mode - User: {username}")
251
-
252
- # Fetch questions quickly
253
  try:
254
- response = requests.get(f"{api_url}/questions", timeout=10)
 
 
255
  questions = response.json()
256
- print(f"πŸ“ Got {len(questions)} questions")
257
  except Exception as e:
258
- return f"❌ Failed to get questions: {e}", None
259
 
260
- # Process at lightning speed
261
  results = []
262
  answers = []
263
- start_time = time.time()
264
 
265
  for i, item in enumerate(questions):
266
  task_id = item.get("task_id")
@@ -269,78 +326,125 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
269
  if not task_id:
270
  continue
271
 
272
- print(f"⚑ [{i+1}/{len(questions)}] {task_id[:8]}...")
273
 
274
  try:
275
  answer = agent(question)
276
  answers.append({"task_id": task_id, "submitted_answer": answer})
 
 
 
 
 
277
  results.append({
278
- "ID": task_id[:8],
279
- "Question": question[:60] + "...",
280
- "Answer": answer[:80] + "..." if len(answer) > 80 else answer
 
281
  })
 
282
  except Exception as e:
283
- error_ans = f"ERROR: {str(e)[:30]}"
284
- answers.append({"task_id": task_id, "submitted_answer": error_ans})
285
  results.append({
286
- "ID": task_id[:8],
287
- "Question": question[:60] + "...",
288
- "Answer": error_ans
 
289
  })
290
 
291
- # Quick memory cleanup
292
- if i % 5 == 0:
293
  gc.collect()
294
 
295
- total_time = time.time() - start_time
296
- print(f"⏱️ Completed in {total_time:.1f}s ({total_time/len(questions):.1f}s per question)")
 
297
 
298
  # Submit results
299
  try:
 
300
  submission = {
301
  "username": username,
302
  "agent_code": f"https://huggingface.co/spaces/{space_id}/tree/main",
303
  "answers": answers
304
  }
305
 
306
- response = requests.post(f"{api_url}/submit", json=submission, timeout=30)
 
307
  result = response.json()
308
 
 
 
 
 
309
  status = (
310
- f"🎯 ULTRA FAST RESULTS\n"
311
  f"πŸ‘€ User: {result.get('username', username)}\n"
312
  f"πŸ“Š Score: {result.get('score', 'N/A')}% "
313
- f"({result.get('correct_count', '?')}/{result.get('total_attempted', '?')})\n"
314
- f"⏱️ Time: {total_time:.1f}s ({total_time/len(questions):.1f}s/question)\n"
315
- f"πŸ’¬ {result.get('message', 'Completed!')}"
 
316
  )
317
 
318
  return status, pd.DataFrame(results)
319
 
320
  except Exception as e:
321
- error_status = f"❌ Submission failed: {str(e)}\n⏱️ Processing time: {total_time:.1f}s"
 
 
 
 
 
322
  return error_status, pd.DataFrame(results)
323
 
324
- # --- Ultra Simple UI ---
325
- with gr.Blocks(title="GAIA Agent - ULTRA FAST") as demo:
326
- gr.Markdown("# ⚑ GAIA Agent - ULTRA FAST MODE")
327
- gr.Markdown("**Speed settings:** 3 steps max β€’ 64 tokens β€’ 15s timeout β€’ Lightning tools")
 
 
 
 
 
 
 
 
328
 
329
- gr.LoginButton()
 
330
 
331
- run_btn = gr.Button("πŸš€ RUN ULTRA FAST", variant="primary", size="lg")
 
332
 
333
- status = gr.Textbox(label="πŸ“Š Results", lines=6, interactive=False)
334
- table = gr.DataFrame(label="πŸ“‹ Answers", interactive=False)
 
 
 
 
 
 
 
 
 
 
 
 
335
 
336
- run_btn.click(run_and_submit_all, outputs=[status, table], show_progress=True)
 
 
 
 
337
 
338
  if __name__ == "__main__":
339
- print("⚑ ULTRA FAST GAIA Agent Starting...")
340
- print(f"βš™οΈ {MAX_STEPS} steps, {MAX_TOKENS} tokens, {TIMEOUT_PER_QUESTION}s timeout")
341
 
342
  demo.launch(
343
- share=True, # Added share=True for public link
344
  server_name="0.0.0.0",
345
  server_port=7860,
346
  debug=False,
 
27
  load_dotenv()
28
  SERPER_API_KEY = os.getenv("SERPER_API_KEY")
29
 
30
+ # --- Balanced Constants ---
31
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
32
+ MAX_STEPS = 4 # Reasonable steps
33
+ MAX_TOKENS = 150 # Enough for reasoning
34
  MODEL_NAME = "microsoft/Phi-3-mini-4k-instruct"
35
+ TIMEOUT_PER_QUESTION = 25 # 25 seconds - enough time
36
+ MAX_CONTEXT = 1500 # Reasonable context
37
 
38
  # --- Configure Environment ---
39
  os.environ["PIP_BREAK_SYSTEM_PACKAGES"] = "1"
40
  os.environ["HF_HUB_DISABLE_SYMLINKS_WARNING"] = "1"
41
  os.environ["BITSANDBYTES_NOWELCOME"] = "1"
42
 
43
+ print("Loading model (BALANCED FAST mode)...")
44
  start_time = time.time()
45
 
 
46
  model = AutoModelForCausalLM.from_pretrained(
47
  MODEL_NAME,
48
  trust_remote_code=True,
 
55
  tokenizer = AutoTokenizer.from_pretrained(
56
  MODEL_NAME,
57
  use_fast=True,
58
+ trust_remote_code=True
 
59
  )
60
 
61
  if tokenizer.pad_token is None:
62
  tokenizer.pad_token = tokenizer.eos_token
63
 
 
 
 
 
 
 
 
 
 
 
 
64
  load_time = time.time() - start_time
65
  print(f"Model loaded in {load_time:.2f} seconds")
66
 
67
+ # --- Reliable Tools ---
68
  def web_search(query: str) -> str:
69
+ """Fast but reliable web search"""
70
  try:
71
  if SERPER_API_KEY:
72
+ params = {'q': query[:150], 'num': 2}
73
  headers = {'X-API-KEY': SERPER_API_KEY, 'Content-Type': 'application/json'}
74
  response = requests.post(
75
  'https://google.serper.dev/search',
76
  headers=headers,
77
  json=params,
78
+ timeout=8
79
  )
80
  results = response.json()
81
  if 'organic' in results and results['organic']:
82
+ output = []
83
+ for r in results['organic'][:2]:
84
+ output.append(f"{r['title']}: {r['snippet']}")
85
+ return " | ".join(output)
86
+ return "No search results found"
87
  else:
88
  with DDGS() as ddgs:
89
+ results = []
90
+ for r in ddgs.text(query, max_results=2):
91
+ results.append(f"{r['title']}: {r['body'][:200]}")
92
+ return " | ".join(results) if results else "No search results"
93
+ except Exception as e:
94
+ return f"Search failed: {str(e)}"
95
 
96
  def calculator(expression: str) -> str:
97
+ """Reliable calculator"""
98
  try:
99
+ # Clean the expression but keep more characters
100
+ clean_expr = re.sub(r'[^0-9+\-*/().\s]', '', str(expression))
101
  if not clean_expr.strip():
102
+ return "Invalid mathematical expression"
103
+
104
+ # Use numexpr for safety
105
+ result = numexpr.evaluate(clean_expr)
106
  return str(float(result))
107
+ except Exception as e:
108
+ return f"Calculation error: {str(e)}"
109
 
110
  def read_pdf(file_path: str) -> str:
111
+ """PDF reader with better error handling"""
112
  try:
113
  text = extract_text(file_path)
114
+ if text:
115
+ return text[:800] # More text for context
116
+ return "No text could be extracted from PDF"
117
+ except Exception as e:
118
+ return f"PDF reading error: {str(e)}"
119
 
120
  def read_webpage(url: str) -> str:
121
+ """Reliable webpage reader"""
122
  try:
123
+ headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'}
124
+ response = requests.get(url, timeout=8, headers=headers)
125
+ response.raise_for_status()
126
+
127
  soup = BeautifulSoup(response.text, 'html.parser')
128
+ for script in soup(["script", "style"]):
129
+ script.decompose()
130
+
131
  text = soup.get_text(separator=' ', strip=True)
132
+ return text[:800] if text else "No content found on webpage"
133
+ except Exception as e:
134
+ return f"Webpage error: {str(e)}"
135
 
136
  TOOLS = {
137
  "web_search": web_search,
 
140
  "read_webpage": read_webpage
141
  }
142
 
143
+ # --- Balanced GAIA Agent ---
144
+ class BalancedGAIA_Agent:
145
  def __init__(self):
146
  self.tools = TOOLS
147
+ self.system_prompt = (
148
+ "You are a GAIA problem solver. Available tools: web_search, calculator, read_pdf, read_webpage.\n"
149
+ "Think step by step and use tools when needed.\n\n"
150
+ "Tool usage format:\n"
151
+ "```json\n{\"tool\": \"tool_name\", \"args\": {\"parameter\": \"value\"}}\n```\n\n"
152
+ "Always end with: Final Answer: [your exact answer]\n\n"
153
+ "Example:\n"
154
+ "Question: What is 15 * 23?\n"
155
+ "I need to calculate 15 * 23.\n"
156
+ "```json\n{\"tool\": \"calculator\", \"args\": {\"expression\": \"15 * 23\"}}\n```\n"
157
+ "Final Answer: 345"
158
  )
159
 
160
  def __call__(self, question: str) -> str:
161
  start_time = time.time()
162
+ print(f"πŸ€” Solving: {question[:60]}...")
163
 
164
  try:
165
+ conversation = [f"Question: {question}"]
166
 
167
  for step in range(MAX_STEPS):
168
+ # Check timeout but be more generous
169
  if time.time() - start_time > TIMEOUT_PER_QUESTION:
170
+ print(f"⏰ Timeout after {TIMEOUT_PER_QUESTION}s")
171
+ return "TIMEOUT: Question took too long to solve"
172
 
173
+ # Generate response
174
+ response = self._generate_response(conversation)
175
+ print(f"Step {step+1}: {response[:80]}...")
176
 
177
+ # Check for final answer
178
  if "Final Answer:" in response:
179
+ answer = self._extract_final_answer(response)
180
+ elapsed = time.time() - start_time
181
+ print(f"βœ… Solved in {elapsed:.1f}s: {answer[:50]}...")
182
+ return answer
183
 
184
+ # Try to use tools
185
+ tool_result = self._execute_tools(response)
186
  if tool_result:
187
+ conversation.append(f"Tool used: {tool_result}")
188
+ print(f"πŸ”§ Tool result: {tool_result[:60]}...")
189
  else:
190
+ conversation.append(f"Reasoning: {response}")
191
 
192
+ # Keep conversation manageable
193
+ if len(" ".join(conversation)) > 1200:
194
+ conversation = conversation[-3:] # Keep last 3 entries
195
 
196
+ print("❌ No solution found within step limit")
197
+ return "Could not solve within step limit"
198
 
199
  except Exception as e:
200
+ print(f"πŸ’₯ Agent error: {str(e)}")
201
+ return f"Agent error: {str(e)}"
202
 
203
+ def _generate_response(self, conversation: List[str]) -> str:
204
  try:
205
+ # Build prompt
206
+ prompt = f"<|system|>\n{self.system_prompt}<|end|>\n"
207
+ prompt += f"<|user|>\n{chr(10).join(conversation)}<|end|>\n"
208
+ prompt += "<|assistant|>"
209
 
210
+ # Tokenize
211
  inputs = tokenizer(
212
  prompt,
213
  return_tensors="pt",
 
216
  padding=False
217
  )
218
 
219
+ # Generate
220
+ generation_config = GenerationConfig(
221
+ max_new_tokens=MAX_TOKENS,
222
+ temperature=0.2, # Lower temperature for more focused responses
223
+ do_sample=True,
224
+ pad_token_id=tokenizer.pad_token_id,
225
+ eos_token_id=tokenizer.eos_token_id,
226
+ use_cache=False
227
+ )
228
+
229
  with torch.no_grad():
230
  outputs = model.generate(
231
  inputs.input_ids,
232
+ generation_config=generation_config,
233
  attention_mask=inputs.attention_mask
234
  )
235
 
236
+ # Decode
237
+ full_response = tokenizer.decode(outputs[0], skip_special_tokens=True)
238
+ response = full_response.split("<|assistant|>")[-1].strip()
239
 
240
+ # Cleanup
241
  del inputs, outputs
242
  gc.collect()
243
 
244
  return response
245
 
246
  except Exception as e:
247
+ return f"Generation error: {str(e)}"
248
 
249
+ def _extract_final_answer(self, text: str) -> str:
250
+ """Extract the final answer more reliably"""
251
  try:
252
+ if "Final Answer:" in text:
253
+ answer_part = text.split("Final Answer:")[-1].strip()
254
+ # Take first line of the answer
255
+ answer = answer_part.split('\n')[0].strip()
256
+ return answer if answer else "No answer provided"
257
+ return "No final answer found"
258
+ except:
259
+ return "Answer extraction failed"
260
+
261
+ def _execute_tools(self, text: str) -> str:
262
+ """Execute tools found in the response"""
263
+ try:
264
+ # Look for JSON tool calls
265
+ json_pattern = r'```json\s*(\{[^}]*\})\s*```'
266
+ matches = re.findall(json_pattern, text, re.DOTALL)
267
 
268
+ for match in matches:
269
+ try:
270
+ tool_call = json.loads(match)
271
+ tool_name = tool_call.get("tool")
272
+ args = tool_call.get("args", {})
273
+
274
+ if tool_name in self.tools:
275
+ print(f"πŸ”§ Executing {tool_name} with {args}")
276
+ result = self.tools[tool_name](**args)
277
+ return f"{tool_name}: {str(result)[:400]}"
278
+
279
+ except json.JSONDecodeError:
280
+ continue
281
+ except Exception as e:
282
+ return f"Tool execution error: {str(e)}"
283
 
284
+ return None
285
+
286
+ except Exception as e:
287
+ return f"Tool parsing error: {str(e)}"
288
 
289
+ # --- Efficient Runner ---
290
  def run_and_submit_all(profile: gr.OAuthProfile | None):
291
  if not profile:
292
+ return "❌ Please login to Hugging Face first", None
293
 
294
  username = profile.username
295
+ print(f"πŸš€ Starting evaluation for user: {username}")
296
+
297
+ # Initialize agent
298
+ try:
299
+ agent = BalancedGAIA_Agent()
300
+ except Exception as e:
301
+ return f"❌ Failed to initialize agent: {e}", None
302
 
303
+ # Setup
 
304
  api_url = DEFAULT_API_URL
305
  space_id = os.getenv("SPACE_ID", "unknown")
306
 
307
+ # Fetch questions
 
 
308
  try:
309
+ print("πŸ“₯ Fetching questions...")
310
+ response = requests.get(f"{api_url}/questions", timeout=15)
311
+ response.raise_for_status()
312
  questions = response.json()
313
+ print(f"πŸ“ Retrieved {len(questions)} questions")
314
  except Exception as e:
315
+ return f"❌ Failed to fetch questions: {e}", None
316
 
317
+ # Process questions
318
  results = []
319
  answers = []
320
+ total_start = time.time()
321
 
322
  for i, item in enumerate(questions):
323
  task_id = item.get("task_id")
 
326
  if not task_id:
327
  continue
328
 
329
+ print(f"\nπŸ“‹ [{i+1}/{len(questions)}] Task: {task_id}")
330
 
331
  try:
332
  answer = agent(question)
333
  answers.append({"task_id": task_id, "submitted_answer": answer})
334
+
335
+ # Truncate for display
336
+ q_display = question[:80] + "..." if len(question) > 80 else question
337
+ a_display = answer[:100] + "..." if len(answer) > 100 else answer
338
+
339
  results.append({
340
+ "Task": task_id[:8] + "...",
341
+ "Question": q_display,
342
+ "Answer": a_display,
343
+ "Status": "βœ…" if "error" not in answer.lower() and "timeout" not in answer.lower() else "❌"
344
  })
345
+
346
  except Exception as e:
347
+ error_answer = f"PROCESSING_ERROR: {str(e)}"
348
+ answers.append({"task_id": task_id, "submitted_answer": error_answer})
349
  results.append({
350
+ "Task": task_id[:8] + "...",
351
+ "Question": question[:80] + "..." if len(question) > 80 else question,
352
+ "Answer": error_answer,
353
+ "Status": "πŸ’₯"
354
  })
355
 
356
+ # Memory cleanup
357
+ if i % 3 == 0:
358
  gc.collect()
359
 
360
+ total_time = time.time() - total_start
361
+ avg_time = total_time / len(questions)
362
+ print(f"\n⏱️ Total processing time: {total_time:.1f}s ({avg_time:.1f}s per question)")
363
 
364
  # Submit results
365
  try:
366
+ print("πŸ“€ Submitting results...")
367
  submission = {
368
  "username": username,
369
  "agent_code": f"https://huggingface.co/spaces/{space_id}/tree/main",
370
  "answers": answers
371
  }
372
 
373
+ response = requests.post(f"{api_url}/submit", json=submission, timeout=60)
374
+ response.raise_for_status()
375
  result = response.json()
376
 
377
+ # Calculate success rate
378
+ successful = sum(1 for r in results if r["Status"] == "βœ…")
379
+ success_rate = (successful / len(results)) * 100
380
+
381
  status = (
382
+ f"🎯 EVALUATION COMPLETED\n"
383
  f"πŸ‘€ User: {result.get('username', username)}\n"
384
  f"πŸ“Š Score: {result.get('score', 'N/A')}% "
385
+ f"({result.get('correct_count', '?')}/{result.get('total_attempted', '?')} correct)\n"
386
+ f"⚑ Processing: {total_time:.1f}s total, {avg_time:.1f}s/question\n"
387
+ f"βœ… Success Rate: {success_rate:.1f}% ({successful}/{len(results)} processed)\n"
388
+ f"πŸ’¬ Message: {result.get('message', 'Evaluation completed!')}"
389
  )
390
 
391
  return status, pd.DataFrame(results)
392
 
393
  except Exception as e:
394
+ error_status = (
395
+ f"❌ SUBMISSION FAILED\n"
396
+ f"Error: {str(e)}\n"
397
+ f"⏱️ Processing completed in {total_time:.1f}s\n"
398
+ f"βœ… Questions processed: {len(results)}"
399
+ )
400
  return error_status, pd.DataFrame(results)
401
 
402
+ # --- Clean UI ---
403
+ with gr.Blocks(title="GAIA Agent - Balanced Fast") as demo:
404
+ gr.Markdown("# ⚑ GAIA Agent - Balanced Fast Mode")
405
+ gr.Markdown(
406
+ """
407
+ **Optimized for reliability and speed:**
408
+ - 4 reasoning steps max
409
+ - 25 second timeout per question
410
+ - 150 token responses
411
+ - Enhanced error handling
412
+ """
413
+ )
414
 
415
+ with gr.Row():
416
+ gr.LoginButton()
417
 
418
+ with gr.Row():
419
+ run_btn = gr.Button("πŸš€ Run Balanced Evaluation", variant="primary", size="lg")
420
 
421
+ with gr.Row():
422
+ status = gr.Textbox(
423
+ label="πŸ“Š Evaluation Status & Results",
424
+ lines=8,
425
+ interactive=False,
426
+ placeholder="Ready to run evaluation. Please login first."
427
+ )
428
+
429
+ with gr.Row():
430
+ table = gr.DataFrame(
431
+ label="πŸ“‹ Question Results",
432
+ interactive=False,
433
+ wrap=True
434
+ )
435
 
436
+ run_btn.click(
437
+ fn=run_and_submit_all,
438
+ outputs=[status, table],
439
+ show_progress=True
440
+ )
441
 
442
  if __name__ == "__main__":
443
+ print("⚑ GAIA Agent - Balanced Fast Mode Starting...")
444
+ print(f"βš™οΈ Settings: {MAX_STEPS} steps, {MAX_TOKENS} tokens, {TIMEOUT_PER_QUESTION}s timeout")
445
 
446
  demo.launch(
447
+ share=True,
448
  server_name="0.0.0.0",
449
  server_port=7860,
450
  debug=False,