LamiaYT commited on
Commit
cccb073
Β·
1 Parent(s): a7b11ed
Files changed (1) hide show
  1. app.py +119 -193
app.py CHANGED
@@ -17,6 +17,8 @@ from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig
17
  import torch
18
  import time
19
  import gc
 
 
20
 
21
  # --- Load Environment Variables ---
22
  load_dotenv()
@@ -24,109 +26,96 @@ SERPER_API_KEY = os.getenv("SERPER_API_KEY")
24
 
25
  # --- Constants ---
26
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
27
- MAX_STEPS = 6
28
- MAX_TOKENS = 256
29
  MODEL_NAME = "microsoft/Phi-3-mini-4k-instruct"
 
30
 
31
  # --- Configure Environment for Hugging Face Spaces ---
32
  os.environ["PIP_BREAK_SYSTEM_PACKAGES"] = "1"
33
  os.environ["HF_HUB_DISABLE_SYMLINKS_WARNING"] = "1"
34
  os.environ["BITSANDBYTES_NOWELCOME"] = "1"
35
 
36
- print("Loading model (CPU-compatible)...")
37
  start_time = time.time()
38
 
39
- # Load model with explicit configuration for better compatibility
40
  model = AutoModelForCausalLM.from_pretrained(
41
  MODEL_NAME,
42
  trust_remote_code=True,
43
- torch_dtype=torch.float32, # Use float32 for CPU compatibility
44
- device_map="cpu", # Explicitly set to CPU
45
- low_cpu_mem_usage=True, # Optimize for low memory usage
46
- use_cache=False # Disable cache to avoid DynamicCache issues
 
47
  )
48
 
49
  tokenizer = AutoTokenizer.from_pretrained(
50
  MODEL_NAME,
51
- use_fast=False,
52
  trust_remote_code=True
53
  )
54
 
55
- # Ensure pad token is set
56
  if tokenizer.pad_token is None:
57
  tokenizer.pad_token = tokenizer.eos_token
58
 
59
  load_time = time.time() - start_time
60
  print(f"Model loaded in {load_time:.2f} seconds")
61
 
62
- # --- Tools for GAIA Agent ---
63
  def web_search(query: str) -> str:
64
- """Search the web using DuckDuckGo or Serper API"""
65
  try:
66
  if SERPER_API_KEY:
67
- # Use Serper API if key is available
68
- params = {
69
- 'q': query,
70
- 'num': 3,
71
- 'hl': 'en',
72
- 'gl': 'us'
73
- }
74
- headers = {
75
- 'X-API-KEY': SERPER_API_KEY,
76
- 'Content-Type': 'application/json'
77
- }
78
  response = requests.post(
79
  'https://google.serper.dev/search',
80
  headers=headers,
81
  json=params,
82
- timeout=10
83
  )
84
  results = response.json()
85
  if 'organic' in results:
86
- return json.dumps([r['title'] + ": " + r['snippet'] for r in results['organic'][:3]])
87
  return "No results found"
88
  else:
89
- # Fallback to DuckDuckGo
90
  with DDGS() as ddgs:
91
- results = [r for r in ddgs.text(query, max_results=3)]
92
- return json.dumps([r['title'] + ": " + r['body'] for r in results])
93
  except Exception as e:
94
  return f"Search error: {str(e)}"
95
 
96
  def calculator(expression: str) -> str:
97
- """Evaluate mathematical expressions safely"""
98
  try:
99
- # Clean the expression
100
  expression = re.sub(r'[^\d+\-*/().\s]', '', expression)
101
  result = numexpr.evaluate(expression)
102
- return str(result)
103
  except Exception as e:
104
  return f"Calculation error: {str(e)}"
105
 
106
  def read_pdf(file_path: str) -> str:
107
- """Extract text from PDF files"""
108
  try:
109
  text = extract_text(file_path)
110
- return text[:2000] if text else "No text found in PDF"
111
  except Exception as e:
112
  return f"PDF read error: {str(e)}"
113
 
114
  def read_webpage(url: str) -> str:
115
- """Fetch and extract text from web pages"""
116
  try:
117
- headers = {
118
- 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
119
- }
120
- response = requests.get(url, timeout=10, headers=headers)
121
  response.raise_for_status()
122
  soup = BeautifulSoup(response.text, 'html.parser')
123
 
124
- # Remove script and style elements
125
  for script in soup(["script", "style"]):
126
  script.decompose()
127
 
128
  text = soup.get_text(separator=' ', strip=True)
129
- return text[:2000] if text else "No text found on webpage"
130
  except Exception as e:
131
  return f"Webpage read error: {str(e)}"
132
 
@@ -137,115 +126,98 @@ TOOLS = {
137
  "read_webpage": read_webpage
138
  }
139
 
140
- # --- GAIA Agent Implementation ---
141
  class GAIA_Agent:
142
  def __init__(self):
143
  self.tools = TOOLS
144
- self.history = []
145
  self.system_prompt = (
146
- "You are an expert GAIA problem solver. Use these tools: {web_search, calculator, read_pdf, read_webpage}.\n"
147
- "Guidelines:\n"
148
- "1. Think step-by-step. Explain reasoning\n"
149
- "2. Use tools for calculations, searches, or file operations\n"
150
- "3. Tools must be called as: ```json\n{'tool': 'tool_name', 'args': {'arg1': value}}```\n"
151
- "4. Final Answer must be exact and standalone\n\n"
152
- "Example:\n"
153
- "Question: \"What's the population density of France? (File: france_data.pdf)\"\n"
154
- "Thought: Need population and area. Read PDF first.\n"
155
- "Action: ```json\n{'tool': 'read_pdf', 'args': {'file_path': 'france_data.pdf'}}```\n"
156
- "Observation: Population: 67.8M, Area: 643,801 kmΒ²\n"
157
- "Thought: Now calculate density: 67,800,000 / 643,801\n"
158
- "Action: ```json\n{'tool': 'calculator', 'args': {'expression': '67800000 / 643801'}}```\n"
159
- "Observation: 105.32\n"
160
- "Final Answer: 105.32 people/kmΒ²"
161
  )
162
 
163
  def __call__(self, question: str) -> str:
164
- print(f"\nProcessing: {question[:80]}...")
165
- self.history = [f"Question: {question}"]
166
 
167
  try:
 
 
168
  for step in range(MAX_STEPS):
169
- prompt = self._build_prompt()
 
 
 
 
170
  response = self._call_model(prompt)
171
 
172
  if "Final Answer" in response:
173
  answer = response.split("Final Answer:")[-1].strip()
174
- print(f"Final Answer: {answer}")
 
175
  return answer
176
 
177
  tool_call = self._parse_tool_call(response)
178
  if tool_call:
179
  tool_name, args = tool_call
180
  observation = self._use_tool(tool_name, args)
181
- self.history.append(f"Observation: {observation}")
 
182
  else:
183
- self.history.append(f"Thought: {response}")
184
-
185
- # Clean up memory after each step
186
- if step % 2 == 0:
187
- gc.collect()
188
 
189
- return "Agent couldn't find solution within step limit"
190
 
191
  except Exception as e:
192
- print(f"Error in agent execution: {str(e)}")
193
- return f"Agent error: {str(e)}"
194
 
195
- def _build_prompt(self) -> str:
196
  prompt = "<|system|>\n" + self.system_prompt + "<|end|>\n"
197
- prompt += "<|user|>\n" + "\n".join(self.history) + "<|end|>\n"
198
  prompt += "<|assistant|>"
199
  return prompt
200
 
201
  def _call_model(self, prompt: str) -> str:
202
- start_time = time.time()
203
-
204
  try:
205
- # Tokenize input
206
  inputs = tokenizer(
207
  prompt,
208
  return_tensors="pt",
209
- return_attention_mask=True,
210
  truncation=True,
211
- max_length=3072 # Leave room for generation
 
212
  )
213
 
214
- # Move to same device as model
215
- inputs = {k: v.to(model.device) for k, v in inputs.items()}
216
-
217
- # Create generation config
218
  generation_config = GenerationConfig(
219
  max_new_tokens=MAX_TOKENS,
220
- temperature=0.01,
221
  do_sample=True,
222
  pad_token_id=tokenizer.pad_token_id,
223
  eos_token_id=tokenizer.eos_token_id,
224
- use_cache=False # Disable cache to avoid DynamicCache issues
225
  )
226
 
227
- # Generate response
228
  with torch.no_grad():
229
  outputs = model.generate(
230
- **inputs,
231
- generation_config=generation_config
 
232
  )
233
 
234
- # Decode response
235
  full_response = tokenizer.decode(outputs[0], skip_special_tokens=True)
236
  response = full_response.split("<|assistant|>")[-1].strip()
237
 
238
- gen_time = time.time() - start_time
239
- print(f"Generated {len(response)} tokens in {gen_time:.2f}s: {response[:60]}...")
240
-
241
- # Clean up
242
  del inputs, outputs
243
- gc.collect()
244
 
245
  return response
246
 
247
  except Exception as e:
248
- print(f"Model generation error: {str(e)}")
249
  return f"Generation error: {str(e)}"
250
 
251
  def _parse_tool_call(self, text: str) -> Optional[Tuple[str, Dict]]:
@@ -255,36 +227,29 @@ class GAIA_Agent:
255
  tool_call = json.loads(json_match.group(1))
256
  if "tool" in tool_call and "args" in tool_call:
257
  return tool_call["tool"], tool_call["args"]
258
- except Exception as e:
259
- print(f"Tool parse error: {str(e)}")
260
  return None
261
 
262
  def _use_tool(self, tool_name: str, args: Dict) -> str:
263
  if tool_name not in self.tools:
264
- return f"Error: Unknown tool {tool_name}"
265
 
266
- print(f"Using tool: {tool_name}({args})")
267
  try:
268
- start_time = time.time()
269
  result = self.tools[tool_name](**args)
270
- exec_time = time.time() - start_time
271
- print(f"Tool executed in {exec_time:.2f}s")
272
- return str(result)[:500] # Truncate long outputs
273
  except Exception as e:
274
  return f"Tool error: {str(e)}"
275
 
276
- # --- Evaluation Runner ---
277
  def run_and_submit_all(profile: gr.OAuthProfile | None):
278
- """Fetches questions, runs agent, submits answers, and displays results"""
279
  space_id = os.getenv("SPACE_ID")
280
 
281
- if profile:
282
- username = f"{profile.username}"
283
- print(f"User logged in: {username}")
284
- else:
285
- print("User not logged in.")
286
  return "Please Login to Hugging Face with the button.", None
287
 
 
288
  api_url = DEFAULT_API_URL
289
  questions_url = f"{api_url}/questions"
290
  submit_url = f"{api_url}/submit"
@@ -292,127 +257,103 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
292
  try:
293
  agent = GAIA_Agent()
294
  except Exception as e:
295
- print(f"Error instantiating agent: {e}")
296
  return f"Error initializing agent: {e}", None
297
 
298
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
299
- print(f"Agent code URL: {agent_code}")
300
 
301
  # Fetch Questions
302
- print(f"Fetching questions from: {questions_url}")
303
  try:
304
- response = requests.get(questions_url, timeout=30)
305
  response.raise_for_status()
306
  questions_data = response.json()
307
  if not questions_data:
308
- print("Fetched questions list is empty.")
309
- return "Fetched questions list is empty or invalid format.", None
310
- print(f"Fetched {len(questions_data)} questions.")
311
- except requests.exceptions.RequestException as e:
312
- print(f"Error fetching questions: {e}")
313
- return f"Error fetching questions: {e}", None
314
  except Exception as e:
315
- print(f"An unexpected error occurred fetching questions: {e}")
316
- return f"An unexpected error occurred fetching questions: {e}", None
317
 
318
- # Run Agent
319
  results_log = []
320
  answers_payload = []
321
- print(f"Running agent on {len(questions_data)} questions...")
322
 
323
  for i, item in enumerate(questions_data):
324
  task_id = item.get("task_id")
325
  question_text = item.get("question")
326
 
327
  if not task_id or question_text is None:
328
- print(f"Skipping item with missing task_id or question: {item}")
329
  continue
330
 
331
  try:
332
- print(f"Processing question {i+1}/{len(questions_data)}")
333
  submitted_answer = agent(question_text)
334
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
335
  results_log.append({
336
  "Task ID": task_id,
337
- "Question": question_text[:100] + "..." if len(question_text) > 100 else question_text,
338
- "Submitted Answer": submitted_answer
339
  })
340
 
341
- # Clean up memory periodically
342
- if i % 5 == 0:
343
  gc.collect()
344
 
345
  except Exception as e:
346
- print(f"Error running agent on task {task_id}: {e}")
347
- error_answer = f"AGENT ERROR: {str(e)}"
348
  answers_payload.append({"task_id": task_id, "submitted_answer": error_answer})
349
  results_log.append({
350
  "Task ID": task_id,
351
- "Question": question_text[:100] + "..." if len(question_text) > 100 else question_text,
352
- "Submitted Answer": error_answer
353
  })
354
 
 
 
 
355
  if not answers_payload:
356
- print("Agent did not produce any answers to submit.")
357
- return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
358
 
359
- # Prepare Submission
360
  submission_data = {
361
  "username": username.strip(),
362
  "agent_code": agent_code,
363
  "answers": answers_payload
364
  }
365
- status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
366
- print(status_update)
367
 
368
- # Submit
369
- print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
370
  try:
371
- response = requests.post(submit_url, json=submission_data, timeout=120)
372
  response.raise_for_status()
373
  result_data = response.json()
 
374
  final_status = (
375
- f"Submission Successful!\n"
376
  f"User: {result_data.get('username')}\n"
377
- f"Overall Score: {result_data.get('score', 'N/A')}% "
378
  f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
379
- f"Message: {result_data.get('message', 'No message received.')}"
 
380
  )
381
- print("Submission successful.")
382
  results_df = pd.DataFrame(results_log)
383
  return final_status, results_df
384
- except requests.exceptions.HTTPError as e:
385
- error_detail = f"Server responded with status {e.response.status_code}."
386
- try:
387
- error_json = e.response.json()
388
- error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
389
- except requests.exceptions.JSONDecodeError:
390
- error_detail += f" Response: {e.response.text[:500]}"
391
- status_message = f"Submission Failed: {error_detail}"
392
- print(status_message)
393
- results_df = pd.DataFrame(results_log)
394
- return status_message, results_df
395
  except Exception as e:
396
- status_message = f"An unexpected error occurred during submission: {e}"
397
- print(status_message)
398
  results_df = pd.DataFrame(results_log)
399
- return status_message, results_df
400
 
401
  # --- Gradio Interface ---
402
- with gr.Blocks(title="GAIA Agent Evaluation") as demo:
403
- gr.Markdown("# GAIA Agent Evaluation Runner")
404
  gr.Markdown(
405
  """
406
- **Instructions:**
407
- 1. Log in to your Hugging Face account using the button below
408
- 2. Click 'Run Evaluation & Submit All Answers' to start the evaluation
409
- 3. View results and score in the output sections
 
410
 
411
- **Agent Information:**
412
- - Model: Phi-3-mini-4k-instruct (CPU optimized)
413
- - Tools: Web Search, Calculator, PDF Reader, Webpage Reader
414
- - Max Steps: 6 per question
415
- - Memory: Optimized for 2vCPU/16GB environment
416
  """
417
  )
418
 
@@ -420,19 +361,19 @@ with gr.Blocks(title="GAIA Agent Evaluation") as demo:
420
  gr.LoginButton()
421
 
422
  with gr.Row():
423
- run_button = gr.Button("Run Evaluation & Submit All Answers", variant="primary", size="lg")
424
 
425
  with gr.Row():
426
  status_output = gr.Textbox(
427
- label="Evaluation Status & Submission Result",
428
- lines=5,
429
  interactive=False,
430
- placeholder="Click the button above to start evaluation..."
431
  )
432
 
433
  with gr.Row():
434
  results_table = gr.DataFrame(
435
- label="Questions and Agent Answers",
436
  wrap=True,
437
  interactive=False
438
  )
@@ -444,27 +385,12 @@ with gr.Blocks(title="GAIA Agent Evaluation") as demo:
444
  )
445
 
446
  if __name__ == "__main__":
447
- print("\n" + "="*50)
448
- print("GAIA Agent Evaluation System Starting")
449
- print("="*50)
450
-
451
- space_host = os.getenv("SPACE_HOST")
452
- space_id = os.getenv("SPACE_ID")
453
-
454
- if space_host:
455
- print(f"βœ… SPACE_HOST found: {space_host}")
456
- else:
457
- print("⚠️ SPACE_HOST not found")
458
-
459
- if space_id:
460
- print(f"βœ… SPACE_ID found: {space_id}")
461
- else:
462
- print("⚠️ SPACE_ID not found")
463
 
464
- print("="*50)
465
- print("Launching Gradio Interface...")
466
  demo.launch(
467
- debug=False, # Disable debug in production
468
  share=False,
469
  server_name="0.0.0.0",
470
  server_port=7860,
 
17
  import torch
18
  import time
19
  import gc
20
+ import threading
21
+ from concurrent.futures import ThreadPoolExecutor, as_completed
22
 
23
  # --- Load Environment Variables ---
24
  load_dotenv()
 
26
 
27
  # --- Constants ---
28
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
29
+ MAX_STEPS = 4 # Reduced from 6
30
+ MAX_TOKENS = 128 # Reduced from 256
31
  MODEL_NAME = "microsoft/Phi-3-mini-4k-instruct"
32
+ TIMEOUT_PER_QUESTION = 30 # 30 seconds max per question
33
 
34
  # --- Configure Environment for Hugging Face Spaces ---
35
  os.environ["PIP_BREAK_SYSTEM_PACKAGES"] = "1"
36
  os.environ["HF_HUB_DISABLE_SYMLINKS_WARNING"] = "1"
37
  os.environ["BITSANDBYTES_NOWELCOME"] = "1"
38
 
39
+ print("Loading model (CPU-optimized)...")
40
  start_time = time.time()
41
 
42
+ # Load model with aggressive optimization
43
  model = AutoModelForCausalLM.from_pretrained(
44
  MODEL_NAME,
45
  trust_remote_code=True,
46
+ torch_dtype=torch.float32,
47
+ device_map="cpu",
48
+ low_cpu_mem_usage=True,
49
+ use_cache=False,
50
+ attn_implementation="eager" # Use eager attention for better CPU performance
51
  )
52
 
53
  tokenizer = AutoTokenizer.from_pretrained(
54
  MODEL_NAME,
55
+ use_fast=True, # Changed to True for faster tokenization
56
  trust_remote_code=True
57
  )
58
 
 
59
  if tokenizer.pad_token is None:
60
  tokenizer.pad_token = tokenizer.eos_token
61
 
62
  load_time = time.time() - start_time
63
  print(f"Model loaded in {load_time:.2f} seconds")
64
 
65
+ # --- Optimized Tools ---
66
  def web_search(query: str) -> str:
67
+ """Search the web with timeout and result limiting"""
68
  try:
69
  if SERPER_API_KEY:
70
+ params = {'q': query, 'num': 2, 'hl': 'en', 'gl': 'us'}
71
+ headers = {'X-API-KEY': SERPER_API_KEY, 'Content-Type': 'application/json'}
 
 
 
 
 
 
 
 
 
72
  response = requests.post(
73
  'https://google.serper.dev/search',
74
  headers=headers,
75
  json=params,
76
+ timeout=5 # Reduced timeout
77
  )
78
  results = response.json()
79
  if 'organic' in results:
80
+ return json.dumps([f"{r['title']}: {r['snippet'][:100]}" for r in results['organic'][:2]])
81
  return "No results found"
82
  else:
 
83
  with DDGS() as ddgs:
84
+ results = [r for r in ddgs.text(query, max_results=2)]
85
+ return json.dumps([f"{r['title']}: {r['body'][:100]}" for r in results])
86
  except Exception as e:
87
  return f"Search error: {str(e)}"
88
 
89
  def calculator(expression: str) -> str:
90
+ """Fast mathematical evaluation"""
91
  try:
 
92
  expression = re.sub(r'[^\d+\-*/().\s]', '', expression)
93
  result = numexpr.evaluate(expression)
94
+ return str(float(result))
95
  except Exception as e:
96
  return f"Calculation error: {str(e)}"
97
 
98
  def read_pdf(file_path: str) -> str:
99
+ """Extract text from PDF with length limit"""
100
  try:
101
  text = extract_text(file_path)
102
+ return text[:1000] if text else "No text found in PDF" # Reduced limit
103
  except Exception as e:
104
  return f"PDF read error: {str(e)}"
105
 
106
  def read_webpage(url: str) -> str:
107
+ """Fast webpage reading with aggressive limits"""
108
  try:
109
+ headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'}
110
+ response = requests.get(url, timeout=5, headers=headers) # Reduced timeout
 
 
111
  response.raise_for_status()
112
  soup = BeautifulSoup(response.text, 'html.parser')
113
 
 
114
  for script in soup(["script", "style"]):
115
  script.decompose()
116
 
117
  text = soup.get_text(separator=' ', strip=True)
118
+ return text[:1000] if text else "No text found on webpage" # Reduced limit
119
  except Exception as e:
120
  return f"Webpage read error: {str(e)}"
121
 
 
126
  "read_webpage": read_webpage
127
  }
128
 
129
+ # --- Optimized GAIA Agent ---
130
  class GAIA_Agent:
131
  def __init__(self):
132
  self.tools = TOOLS
 
133
  self.system_prompt = (
134
+ "You are a GAIA problem solver. Tools: {web_search, calculator, read_pdf, read_webpage}.\n"
135
+ "Be concise and direct. Use tools efficiently.\n"
136
+ "Tool format: ```json\n{'tool': 'tool_name', 'args': {'arg1': value}}```\n"
137
+ "End with: Final Answer: [exact answer]"
 
 
 
 
 
 
 
 
 
 
 
138
  )
139
 
140
  def __call__(self, question: str) -> str:
141
+ start_time = time.time()
142
+ print(f"Processing: {question[:50]}...")
143
 
144
  try:
145
+ history = [f"Question: {question}"]
146
+
147
  for step in range(MAX_STEPS):
148
+ # Check timeout
149
+ if time.time() - start_time > TIMEOUT_PER_QUESTION:
150
+ return "TIMEOUT: Question took too long"
151
+
152
+ prompt = self._build_prompt(history)
153
  response = self._call_model(prompt)
154
 
155
  if "Final Answer" in response:
156
  answer = response.split("Final Answer:")[-1].strip()
157
+ elapsed = time.time() - start_time
158
+ print(f"Completed in {elapsed:.1f}s: {answer[:30]}...")
159
  return answer
160
 
161
  tool_call = self._parse_tool_call(response)
162
  if tool_call:
163
  tool_name, args = tool_call
164
  observation = self._use_tool(tool_name, args)
165
+ history.append(f"Action: {tool_name}")
166
+ history.append(f"Result: {observation}")
167
  else:
168
+ history.append(f"Thought: {response}")
169
+
170
+ # Aggressive memory cleanup
171
+ gc.collect()
 
172
 
173
+ return "Could not solve within step limit"
174
 
175
  except Exception as e:
176
+ print(f"Agent error: {str(e)}")
177
+ return f"Error: {str(e)}"
178
 
179
+ def _build_prompt(self, history: List[str]) -> str:
180
  prompt = "<|system|>\n" + self.system_prompt + "<|end|>\n"
181
+ prompt += "<|user|>\n" + "\n".join(history) + "<|end|>\n"
182
  prompt += "<|assistant|>"
183
  return prompt
184
 
185
  def _call_model(self, prompt: str) -> str:
 
 
186
  try:
 
187
  inputs = tokenizer(
188
  prompt,
189
  return_tensors="pt",
 
190
  truncation=True,
191
+ max_length=2048, # Reduced context
192
+ padding=False
193
  )
194
 
 
 
 
 
195
  generation_config = GenerationConfig(
196
  max_new_tokens=MAX_TOKENS,
197
+ temperature=0.1, # Less randomness for faster convergence
198
  do_sample=True,
199
  pad_token_id=tokenizer.pad_token_id,
200
  eos_token_id=tokenizer.eos_token_id,
201
+ use_cache=False
202
  )
203
 
 
204
  with torch.no_grad():
205
  outputs = model.generate(
206
+ inputs.input_ids,
207
+ generation_config=generation_config,
208
+ attention_mask=inputs.attention_mask
209
  )
210
 
 
211
  full_response = tokenizer.decode(outputs[0], skip_special_tokens=True)
212
  response = full_response.split("<|assistant|>")[-1].strip()
213
 
214
+ # Immediate cleanup
 
 
 
215
  del inputs, outputs
216
+ torch.cuda.empty_cache() if torch.cuda.is_available() else None
217
 
218
  return response
219
 
220
  except Exception as e:
 
221
  return f"Generation error: {str(e)}"
222
 
223
  def _parse_tool_call(self, text: str) -> Optional[Tuple[str, Dict]]:
 
227
  tool_call = json.loads(json_match.group(1))
228
  if "tool" in tool_call and "args" in tool_call:
229
  return tool_call["tool"], tool_call["args"]
230
+ except:
231
+ pass
232
  return None
233
 
234
  def _use_tool(self, tool_name: str, args: Dict) -> str:
235
  if tool_name not in self.tools:
236
+ return f"Unknown tool: {tool_name}"
237
 
 
238
  try:
 
239
  result = self.tools[tool_name](**args)
240
+ return str(result)[:300] # Truncate results
 
 
241
  except Exception as e:
242
  return f"Tool error: {str(e)}"
243
 
244
+ # --- Optimized Evaluation Runner ---
245
  def run_and_submit_all(profile: gr.OAuthProfile | None):
246
+ """Fast evaluation with parallel processing where possible"""
247
  space_id = os.getenv("SPACE_ID")
248
 
249
+ if not profile:
 
 
 
 
250
  return "Please Login to Hugging Face with the button.", None
251
 
252
+ username = profile.username
253
  api_url = DEFAULT_API_URL
254
  questions_url = f"{api_url}/questions"
255
  submit_url = f"{api_url}/submit"
 
257
  try:
258
  agent = GAIA_Agent()
259
  except Exception as e:
 
260
  return f"Error initializing agent: {e}", None
261
 
262
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
 
263
 
264
  # Fetch Questions
 
265
  try:
266
+ response = requests.get(questions_url, timeout=15)
267
  response.raise_for_status()
268
  questions_data = response.json()
269
  if not questions_data:
270
+ return "No questions found.", None
271
+ print(f"Processing {len(questions_data)} questions...")
 
 
 
 
272
  except Exception as e:
273
+ return f"Error fetching questions: {e}", None
 
274
 
275
+ # Process questions with progress tracking
276
  results_log = []
277
  answers_payload = []
278
+ total_start = time.time()
279
 
280
  for i, item in enumerate(questions_data):
281
  task_id = item.get("task_id")
282
  question_text = item.get("question")
283
 
284
  if not task_id or question_text is None:
 
285
  continue
286
 
287
  try:
288
+ print(f"[{i+1}/{len(questions_data)}] Processing {task_id}...")
289
  submitted_answer = agent(question_text)
290
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
291
  results_log.append({
292
  "Task ID": task_id,
293
+ "Question": question_text[:80] + "..." if len(question_text) > 80 else question_text,
294
+ "Answer": submitted_answer[:100] + "..." if len(submitted_answer) > 100 else submitted_answer
295
  })
296
 
297
+ # Memory cleanup every few questions
298
+ if i % 3 == 0:
299
  gc.collect()
300
 
301
  except Exception as e:
302
+ error_answer = f"ERROR: {str(e)}"
 
303
  answers_payload.append({"task_id": task_id, "submitted_answer": error_answer})
304
  results_log.append({
305
  "Task ID": task_id,
306
+ "Question": question_text[:80] + "..." if len(question_text) > 80 else question_text,
307
+ "Answer": error_answer
308
  })
309
 
310
+ total_time = time.time() - total_start
311
+ print(f"All questions processed in {total_time:.1f} seconds")
312
+
313
  if not answers_payload:
314
+ return "No answers generated.", pd.DataFrame(results_log)
 
315
 
316
+ # Submit results
317
  submission_data = {
318
  "username": username.strip(),
319
  "agent_code": agent_code,
320
  "answers": answers_payload
321
  }
 
 
322
 
 
 
323
  try:
324
+ response = requests.post(submit_url, json=submission_data, timeout=60)
325
  response.raise_for_status()
326
  result_data = response.json()
327
+
328
  final_status = (
329
+ f"βœ… Submission Successful!\n"
330
  f"User: {result_data.get('username')}\n"
331
+ f"Score: {result_data.get('score', 'N/A')}% "
332
  f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
333
+ f"Processing Time: {total_time:.1f}s\n"
334
+ f"Message: {result_data.get('message', 'No message')}"
335
  )
336
+
337
  results_df = pd.DataFrame(results_log)
338
  return final_status, results_df
339
+
 
 
 
 
 
 
 
 
 
 
340
  except Exception as e:
341
+ error_msg = f"❌ Submission Failed: {str(e)}"
 
342
  results_df = pd.DataFrame(results_log)
343
+ return error_msg, results_df
344
 
345
  # --- Gradio Interface ---
346
+ with gr.Blocks(title="GAIA Agent - Fast Mode") as demo:
347
+ gr.Markdown("# πŸš€ GAIA Agent Evaluation (Optimized)")
348
  gr.Markdown(
349
  """
350
+ **Fast Mode Optimizations:**
351
+ - Reduced max steps: 4 per question
352
+ - Shorter token generation: 128 tokens max
353
+ - 30s timeout per question
354
+ - Aggressive memory management
355
 
356
+ **Usage:** Login β†’ Click Run β†’ View Results
 
 
 
 
357
  """
358
  )
359
 
 
361
  gr.LoginButton()
362
 
363
  with gr.Row():
364
+ run_button = gr.Button("πŸƒβ€β™‚οΈ Run Fast Evaluation", variant="primary", size="lg")
365
 
366
  with gr.Row():
367
  status_output = gr.Textbox(
368
+ label="πŸ“Š Status & Results",
369
+ lines=6,
370
  interactive=False,
371
+ placeholder="Ready to run evaluation..."
372
  )
373
 
374
  with gr.Row():
375
  results_table = gr.DataFrame(
376
+ label="πŸ“ Questions & Answers",
377
  wrap=True,
378
  interactive=False
379
  )
 
385
  )
386
 
387
  if __name__ == "__main__":
388
+ print("πŸš€ GAIA Agent Fast Mode Starting...")
389
+ print(f"βš™οΈ Max Steps: {MAX_STEPS}, Max Tokens: {MAX_TOKENS}")
390
+ print(f"⏱️ Timeout per question: {TIMEOUT_PER_QUESTION}s")
 
 
 
 
 
 
 
 
 
 
 
 
 
391
 
 
 
392
  demo.launch(
393
+ debug=False,
394
  share=False,
395
  server_name="0.0.0.0",
396
  server_port=7860,