LamiaYT commited on
Commit
d66e9b7
·
1 Parent(s): 19b7914

fixing ver3

Browse files
Files changed (1) hide show
  1. app.py +201 -231
app.py CHANGED
@@ -6,343 +6,313 @@ import re
6
  import numexpr
7
  import pandas as pd
8
  import math
9
- import pdfminer
10
- from duckduckgo_search import DDGS
11
  from pdfminer.high_level import extract_text
12
  from bs4 import BeautifulSoup
13
- import html2text
14
- from typing import Dict, Any, List, Tuple, Callable, Optional
15
  from dotenv import load_dotenv
16
  from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig
17
  import torch
18
  import time
19
  import gc
20
- import warnings
21
-
22
- # Suppress warnings
23
- warnings.filterwarnings("ignore")
24
- os.environ["TOKENIZERS_PARALLELISM"] = "false"
25
 
26
  # --- Load Environment Variables ---
27
  load_dotenv()
28
  SERPER_API_KEY = os.getenv("SERPER_API_KEY")
29
 
30
- # --- Constants (ULTRA FAST MODE) ---
31
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
32
- MAX_STEPS = 5 # Reduced to 3
33
- MAX_TOKENS = 100 # Very short responses
34
  MODEL_NAME = "microsoft/Phi-3-mini-4k-instruct"
35
- TIMEOUT_PER_QUESTION = 20 # 15 seconds max
36
- MAX_CONTEXT = 1024 # Very short context
37
-
38
- # --- Configure Environment ---
39
- os.environ["PIP_BREAK_SYSTEM_PACKAGES"] = "1"
40
- os.environ["HF_HUB_DISABLE_SYMLINKS_WARNING"] = "1"
41
- os.environ["BITSANDBYTES_NOWELCOME"] = "1"
42
 
43
- print("Loading model (ULTRA FAST mode)...")
 
44
  start_time = time.time()
45
 
46
- # Minimal model loading
47
  model = AutoModelForCausalLM.from_pretrained(
48
  MODEL_NAME,
49
  trust_remote_code=True,
50
  torch_dtype=torch.float32,
51
- device_map="cpu",
52
- low_cpu_mem_usage=True,
53
- use_cache=False
54
  )
55
 
56
  tokenizer = AutoTokenizer.from_pretrained(
57
- MODEL_NAME,
58
  use_fast=True,
59
- trust_remote_code=True,
60
- padding_side="left"
61
  )
62
 
63
  if tokenizer.pad_token is None:
64
  tokenizer.pad_token = tokenizer.eos_token
65
 
66
- # Pre-compile generation config
67
- GENERATION_CONFIG = GenerationConfig(
68
- max_new_tokens=MAX_TOKENS,
69
- temperature=0.3,
70
- do_sample=True,
71
- pad_token_id=tokenizer.pad_token_id,
72
- eos_token_id=tokenizer.eos_token_id,
73
- use_cache=False,
74
- repetition_penalty=1.1
75
- )
76
-
77
- load_time = time.time() - start_time
78
- print(f"Model loaded in {load_time:.2f} seconds")
79
 
80
- # --- Lightning Fast Tools ---
81
  def web_search(query: str) -> str:
82
- """Ultra-fast web search"""
83
  try:
84
  if SERPER_API_KEY:
85
- params = {'q': query[:100], 'num': 1} # Single result
86
- headers = {'X-API-KEY': SERPER_API_KEY, 'Content-Type': 'application/json'}
87
  response = requests.post(
88
  'https://google.serper.dev/search',
89
  headers=headers,
90
  json=params,
91
- timeout=3
92
  )
93
  results = response.json()
94
- if 'organic' in results and results['organic']:
95
- return f"{results['organic'][0]['title']}: {results['organic'][0]['snippet'][:200]}"
96
- return "No results"
 
 
 
 
 
97
  else:
98
  with DDGS() as ddgs:
99
- for result in ddgs.text(query, max_results=1):
100
- return f"{result['title']}: {result['body'][:200]}"
101
- return "No results"
102
- except:
103
- return "Search failed"
104
 
105
  def calculator(expression: str) -> str:
106
- """Lightning calculator"""
107
  try:
108
- clean_expr = re.sub(r'[^\d+\-*/().\s]', '', str(expression))
109
- if not clean_expr.strip():
110
- return "Invalid expression"
111
- result = eval(clean_expr) # Simple eval for speed
 
 
 
 
112
  return str(float(result))
113
- except:
114
- return "Calc error"
115
 
116
  def read_pdf(file_path: str) -> str:
117
- """Fast PDF reader"""
118
  try:
119
  text = extract_text(file_path)
120
- return text[:500] if text else "No PDF text"
121
- except:
122
- return "PDF error"
 
 
 
 
 
123
 
124
  def read_webpage(url: str) -> str:
125
- """Fast webpage reader"""
126
  try:
127
- response = requests.get(url, timeout=3, headers={'User-Agent': 'Bot'})
 
 
 
128
  soup = BeautifulSoup(response.text, 'html.parser')
129
- text = soup.get_text(separator=' ', strip=True)
130
- return text[:500] if text else "No webpage text"
131
- except:
132
- return "Webpage error"
 
 
 
 
 
 
 
 
133
 
134
  TOOLS = {
135
  "web_search": web_search,
136
- "calculator": calculator,
137
  "read_pdf": read_pdf,
138
  "read_webpage": read_webpage
139
  }
140
 
141
- # --- Ultra Fast Agent ---
142
- class FastGAIA_Agent:
143
  def __init__(self):
144
  self.tools = TOOLS
145
- self.prompt_template = (
146
- "<|system|>You solve GAIA questions fast. Tools: web_search, calculator, read_pdf, read_webpage.\n"
147
- "Format: ```json\n{\"tool\": \"name\", \"args\": {\"key\": \"value\"}}```\n"
148
- "Always end with: Final Answer: [answer]<|end|>\n"
149
- "<|user|>{history}<|end|>\n<|assistant|>"
150
- )
 
 
 
 
 
 
 
 
 
 
151
 
152
  def __call__(self, question: str) -> str:
153
  start_time = time.time()
 
154
 
155
  try:
156
- history = f"Question: {question}"
157
-
158
  for step in range(MAX_STEPS):
159
  if time.time() - start_time > TIMEOUT_PER_QUESTION:
160
- return "TIMEOUT"
161
 
162
- response = self._fast_generate(history)
 
163
 
164
- # Quick final answer check
165
  if "Final Answer:" in response:
166
- answer = response.split("Final Answer:")[-1].strip().split('\n')[0]
167
- return answer[:200] # Limit answer length
168
 
169
- # Quick tool parsing
170
- tool_result = self._quick_tool_use(response)
171
- if tool_result:
172
- history += f"\nAction: {tool_result}"
 
 
173
  else:
174
- history += f"\nThought: {response[:100]}"
175
 
176
- # Keep history short
177
- if len(history) > 800:
178
- history = history[-800:]
179
-
180
- return "No solution found"
181
 
 
182
  except Exception as e:
183
- return f"Error: {str(e)[:50]}"
184
 
185
- def _fast_generate(self, history: str) -> str:
186
- try:
187
- prompt = self.prompt_template.format(history=history)
188
-
189
- # Fast tokenization
190
- inputs = tokenizer(
191
- prompt,
192
- return_tensors="pt",
193
- truncation=True,
194
- max_length=MAX_CONTEXT,
195
- padding=False
 
 
 
 
 
 
 
 
 
 
 
 
 
 
196
  )
197
-
198
- # Fast generation
199
- with torch.no_grad():
200
- outputs = model.generate(
201
- inputs.input_ids,
202
- generation_config=GENERATION_CONFIG,
203
- attention_mask=inputs.attention_mask
204
- )
205
-
206
- # Fast decoding
207
- response = tokenizer.decode(outputs[0], skip_special_tokens=True)
208
- response = response.split("<|assistant|>")[-1].strip()
209
-
210
- # Immediate cleanup
211
- del inputs, outputs
212
- gc.collect()
213
-
214
- return response
215
-
216
- except Exception as e:
217
- return f"Gen error: {str(e)}"
218
 
219
- def _quick_tool_use(self, text: str) -> str:
220
  try:
221
- # Quick JSON extraction
222
- json_match = re.search(r'```json\s*({[^}]*})\s*```', text)
223
- if not json_match:
224
- return ""
225
-
226
- tool_data = json.loads(json_match.group(1))
227
- tool_name = tool_data.get("tool", "")
228
- args = tool_data.get("args", {})
229
-
230
- if tool_name in self.tools:
231
- result = self.tools[tool_name](**args)
232
- return f"Used {tool_name}: {str(result)[:150]}"
233
-
234
  except:
235
- pass
236
- return ""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
237
 
238
- # --- Lightning Fast Runner ---
239
  def run_and_submit_all(profile: gr.OAuthProfile | None):
240
  if not profile:
241
- return "Please login first", None
242
-
243
- username = profile.username
244
-
245
- # Quick setup
246
- agent = FastGAIA_Agent()
247
- api_url = DEFAULT_API_URL
248
- space_id = os.getenv("SPACE_ID", "unknown")
249
 
250
- print(f"🚀 ULTRA FAST mode - User: {username}")
 
 
251
 
252
- # Fetch questions quickly
253
  try:
254
- response = requests.get(f"{api_url}/questions", timeout=10)
255
- questions = response.json()
256
- print(f"📝 Got {len(questions)} questions")
257
  except Exception as e:
258
- return f"Failed to get questions: {e}", None
259
-
260
- # Process at lightning speed
261
  results = []
262
  answers = []
263
- start_time = time.time()
264
 
265
- for i, item in enumerate(questions):
266
  task_id = item.get("task_id")
267
- question = item.get("question", "")
268
 
269
- if not task_id:
270
  continue
271
 
272
- print(f" [{i+1}/{len(questions)}] {task_id[:8]}...")
273
-
274
- try:
275
- answer = agent(question)
276
- answers.append({"task_id": task_id, "submitted_answer": answer})
277
- results.append({
278
- "ID": task_id[:8],
279
- "Question": question[:60] + "...",
280
- "Answer": answer[:80] + "..." if len(answer) > 80 else answer
281
- })
282
- except Exception as e:
283
- error_ans = f"ERROR: {str(e)[:30]}"
284
- answers.append({"task_id": task_id, "submitted_answer": error_ans})
285
- results.append({
286
- "ID": task_id[:8],
287
- "Question": question[:60] + "...",
288
- "Answer": error_ans
289
- })
290
 
291
- # Quick memory cleanup
292
- if i % 5 == 0:
293
- gc.collect()
 
 
 
294
 
295
- total_time = time.time() - start_time
296
- print(f"⏱️ Completed in {total_time:.1f}s ({total_time/len(questions):.1f}s per question)")
 
 
 
297
 
298
- # Submit results
299
  try:
300
- submission = {
301
- "username": username,
302
- "agent_code": f"https://huggingface.co/spaces/{space_id}/tree/main",
303
- "answers": answers
304
- }
305
-
306
- response = requests.post(f"{api_url}/submit", json=submission, timeout=30)
307
  result = response.json()
308
-
309
- status = (
310
- f"🎯 ULTRA FAST RESULTS\n"
311
- f"👤 User: {result.get('username', username)}\n"
312
- f"📊 Score: {result.get('score', 'N/A')}% "
313
- f"({result.get('correct_count', '?')}/{result.get('total_attempted', '?')})\n"
314
- f"⏱️ Time: {total_time:.1f}s ({total_time/len(questions):.1f}s/question)\n"
315
- f"💬 {result.get('message', 'Completed!')}"
316
- )
317
-
318
- return status, pd.DataFrame(results)
319
-
320
  except Exception as e:
321
- error_status = f"Submission failed: {str(e)}\n⏱️ Processing time: {total_time:.1f}s"
322
- return error_status, pd.DataFrame(results)
323
-
324
- # --- Ultra Simple UI ---
325
- with gr.Blocks(title="GAIA Agent - ULTRA FAST") as demo:
326
- gr.Markdown("# ⚡ GAIA Agent - ULTRA FAST MODE")
327
- gr.Markdown("**Speed settings:** 3 steps max • 64 tokens • 15s timeout • Lightning tools")
328
 
329
- gr.LoginButton()
 
 
 
 
 
 
 
 
330
 
331
- run_btn = gr.Button("🚀 RUN ULTRA FAST", variant="primary", size="lg")
 
 
332
 
333
- status = gr.Textbox(label="📊 Results", lines=6, interactive=False)
334
- table = gr.DataFrame(label="📋 Answers", interactive=False)
335
 
336
- run_btn.click(run_and_submit_all, outputs=[status, table], show_progress=True)
 
 
 
337
 
338
  if __name__ == "__main__":
339
- print(" ULTRA FAST GAIA Agent Starting...")
340
- print(f"⚙️ {MAX_STEPS} steps, {MAX_TOKENS} tokens, {TIMEOUT_PER_QUESTION}s timeout")
341
-
342
- demo.launch(
343
- share=True, # Added share=True for public link
344
- server_name="0.0.0.0",
345
- server_port=7860,
346
- debug=False,
347
- show_error=True
348
- )
 
6
  import numexpr
7
  import pandas as pd
8
  import math
 
 
9
  from pdfminer.high_level import extract_text
10
  from bs4 import BeautifulSoup
11
+ from typing import Dict, Any, List, Tuple, Optional
 
12
  from dotenv import load_dotenv
13
  from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig
14
  import torch
15
  import time
16
  import gc
 
 
 
 
 
17
 
18
  # --- Load Environment Variables ---
19
  load_dotenv()
20
  SERPER_API_KEY = os.getenv("SERPER_API_KEY")
21
 
22
+ # --- Constants ---
23
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
24
+ MAX_STEPS = 6 # Increased from 4
25
+ MAX_TOKENS = 256 # Increased from 128
26
  MODEL_NAME = "microsoft/Phi-3-mini-4k-instruct"
27
+ TIMEOUT_PER_QUESTION = 45 # Increased from 30
28
+ MAX_RESULT_LENGTH = 500 # For tool outputs
 
 
 
 
 
29
 
30
+ # --- Model Loading ---
31
+ print("Loading optimized model...")
32
  start_time = time.time()
33
 
 
34
  model = AutoModelForCausalLM.from_pretrained(
35
  MODEL_NAME,
36
  trust_remote_code=True,
37
  torch_dtype=torch.float32,
38
+ device_map="auto",
39
+ low_cpu_mem_usage=True
 
40
  )
41
 
42
  tokenizer = AutoTokenizer.from_pretrained(
43
+ MODEL_NAME,
44
  use_fast=True,
45
+ trust_remote_code=True
 
46
  )
47
 
48
  if tokenizer.pad_token is None:
49
  tokenizer.pad_token = tokenizer.eos_token
50
 
51
+ print(f"Model loaded in {time.time() - start_time:.2f} seconds")
 
 
 
 
 
 
 
 
 
 
 
 
52
 
53
+ # --- Enhanced Tools ---
54
  def web_search(query: str) -> str:
55
+ """Enhanced web search with better result parsing"""
56
  try:
57
  if SERPER_API_KEY:
58
+ params = {'q': query, 'num': 3, 'hl': 'en', 'gl': 'us'}
59
+ headers = {'X-API-KEY': SERPER_API_KEY}
60
  response = requests.post(
61
  'https://google.serper.dev/search',
62
  headers=headers,
63
  json=params,
64
+ timeout=10
65
  )
66
  results = response.json()
67
+
68
+ if 'organic' in results:
69
+ output = []
70
+ for r in results['organic'][:3]:
71
+ if 'title' in r and 'snippet' in r:
72
+ output.append(f"{r['title']}: {r['snippet']}")
73
+ return "\n".join(output)[:MAX_RESULT_LENGTH]
74
+ return "No relevant results found"
75
  else:
76
  with DDGS() as ddgs:
77
+ results = [r for r in ddgs.text(query, max_results=3)]
78
+ return "\n".join([f"{r['title']}: {r['body']}" for r in results])[:MAX_RESULT_LENGTH]
79
+ except Exception as e:
80
+ return f"Search error: {str(e)}"
 
81
 
82
  def calculator(expression: str) -> str:
83
+ """More robust calculator with validation"""
84
  try:
85
+ # Clean and validate expression
86
+ expression = re.sub(r'[^\d+\-*/().^%,\s]', '', expression)
87
+ if not expression:
88
+ return "Invalid empty expression"
89
+
90
+ # Handle percentages and commas
91
+ expression = expression.replace('%', '/100').replace(',', '')
92
+ result = numexpr.evaluate(expression)
93
  return str(float(result))
94
+ except Exception as e:
95
+ return f"Calculation error: {str(e)}"
96
 
97
  def read_pdf(file_path: str) -> str:
98
+ """PDF reader with better text extraction"""
99
  try:
100
  text = extract_text(file_path)
101
+ if not text:
102
+ return "No readable text found in PDF"
103
+
104
+ # Clean and condense text
105
+ text = re.sub(r'\s+', ' ', text).strip()
106
+ return text[:MAX_RESULT_LENGTH]
107
+ except Exception as e:
108
+ return f"PDF read error: {str(e)}"
109
 
110
  def read_webpage(url: str) -> str:
111
+ """Improved webpage reader with better content extraction"""
112
  try:
113
+ headers = {'User-Agent': 'Mozilla/5.0'}
114
+ response = requests.get(url, timeout=10, headers=headers)
115
+ response.raise_for_status()
116
+
117
  soup = BeautifulSoup(response.text, 'html.parser')
118
+
119
+ # Remove unwanted elements
120
+ for element in soup(['script', 'style', 'nav', 'footer']):
121
+ element.decompose()
122
+
123
+ # Get text with better formatting
124
+ text = soup.get_text(separator='\n', strip=True)
125
+ text = re.sub(r'\n{3,}', '\n\n', text)
126
+
127
+ return text[:MAX_RESULT_LENGTH] if text else "No main content found"
128
+ except Exception as e:
129
+ return f"Webpage read error: {str(e)}"
130
 
131
  TOOLS = {
132
  "web_search": web_search,
133
+ "calculator": calculator,
134
  "read_pdf": read_pdf,
135
  "read_webpage": read_webpage
136
  }
137
 
138
+ # --- Improved GAIA Agent ---
139
+ class GAIA_Agent:
140
  def __init__(self):
141
  self.tools = TOOLS
142
+ self.system_prompt = """You are an advanced GAIA problem solver. Follow these steps:
143
+ 1. Analyze the question carefully
144
+ 2. Choose the most appropriate tool
145
+ 3. Process the results
146
+ 4. Provide a precise final answer
147
+
148
+ Available Tools:
149
+ - web_search: For general knowledge questions
150
+ - calculator: For math problems
151
+ - read_pdf: For PDF content extraction
152
+ - read_webpage: For webpage content extraction
153
+
154
+ Tool format: ```json
155
+ {"tool": "tool_name", "args": {"arg1": value}}```
156
+
157
+ Always end with: Final Answer: [your answer]"""
158
 
159
  def __call__(self, question: str) -> str:
160
  start_time = time.time()
161
+ history = [f"Question: {question}"]
162
 
163
  try:
 
 
164
  for step in range(MAX_STEPS):
165
  if time.time() - start_time > TIMEOUT_PER_QUESTION:
166
+ return "Timeout: Processing took too long"
167
 
168
+ prompt = self._build_prompt(history)
169
+ response = self._call_model(prompt)
170
 
 
171
  if "Final Answer:" in response:
172
+ answer = response.split("Final Answer:")[-1].strip()
173
+ return answer[:500] # Limit answer length
174
 
175
+ tool_call = self._parse_tool_call(response)
176
+ if tool_call:
177
+ tool_name, args = tool_call
178
+ observation = self._use_tool(tool_name, args)
179
+ history.append(f"Tool Used: {tool_name}")
180
+ history.append(f"Tool Result: {observation[:300]}...") # Truncate long results
181
  else:
182
+ history.append(f"Analysis: {response}")
183
 
184
+ gc.collect()
 
 
 
 
185
 
186
+ return "Maximum steps reached without final answer"
187
  except Exception as e:
188
+ return f"Error: {str(e)}"
189
 
190
+ def _build_prompt(self, history: List[str]) -> str:
191
+ return f"<|system|>\n{self.system_prompt}<|end|>\n<|user|>\n" + "\n".join(history) + "<|end|>\n<|assistant|>"
192
+
193
+ def _call_model(self, prompt: str) -> str:
194
+ inputs = tokenizer(
195
+ prompt,
196
+ return_tensors="pt",
197
+ truncation=True,
198
+ max_length=3072,
199
+ padding=False
200
+ )
201
+
202
+ generation_config = GenerationConfig(
203
+ max_new_tokens=MAX_TOKENS,
204
+ temperature=0.3,
205
+ top_p=0.9,
206
+ do_sample=True,
207
+ pad_token_id=tokenizer.pad_token_id
208
+ )
209
+
210
+ with torch.no_grad():
211
+ outputs = model.generate(
212
+ inputs.input_ids,
213
+ generation_config=generation_config,
214
+ attention_mask=inputs.attention_mask
215
  )
216
+
217
+ return tokenizer.decode(outputs[0], skip_special_tokens=True).split("<|assistant|>")[-1].strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
218
 
219
+ def _parse_tool_call(self, text: str) -> Optional[Tuple[str, Dict]]:
220
  try:
221
+ json_match = re.search(r'```json\s*({.+?})\s*```', text, re.DOTALL)
222
+ if json_match:
223
+ tool_call = json.loads(json_match.group(1))
224
+ if "tool" in tool_call and "args" in tool_call:
225
+ return tool_call["tool"], tool_call["args"]
 
 
 
 
 
 
 
 
226
  except:
227
+ return None
228
+ return None
229
+
230
+ def _use_tool(self, tool_name: str, args: Dict) -> str:
231
+ if tool_name not in self.tools:
232
+ return f"Unknown tool: {tool_name}"
233
+
234
+ try:
235
+ # Special handling for URL-containing questions
236
+ if tool_name == "read_webpage" and "url" not in args:
237
+ if "args" in args and isinstance(args["args"], dict) and "url" in args["args"]:
238
+ args = args["args"]
239
+ elif "http" in str(args):
240
+ url = re.search(r'https?://[^\s]+', str(args)).group()
241
+ args = {"url": url}
242
+
243
+ return str(self.tools[tool_name](**args))[:MAX_RESULT_LENGTH]
244
+ except Exception as e:
245
+ return f"Tool error: {str(e)}"
246
 
247
+ # --- Evaluation Runner ---
248
  def run_and_submit_all(profile: gr.OAuthProfile | None):
249
  if not profile:
250
+ return "Please login first", None
 
 
 
 
 
 
 
251
 
252
+ agent = GAIA_Agent()
253
+ questions_url = f"{DEFAULT_API_URL}/questions"
254
+ submit_url = f"{DEFAULT_API_URL}/submit"
255
 
 
256
  try:
257
+ response = requests.get(questions_url, timeout=15)
258
+ questions_data = response.json()
 
259
  except Exception as e:
260
+ return f"Failed to get questions: {str(e)}", None
261
+
 
262
  results = []
263
  answers = []
 
264
 
265
+ for i, item in enumerate(questions_data):
266
  task_id = item.get("task_id")
267
+ question = item.get("question")
268
 
269
+ if not task_id or not question:
270
  continue
271
 
272
+ print(f"Processing question {i+1}/{len(questions_data)}")
273
+ answer = agent(question)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
274
 
275
+ answers.append({"task_id": task_id, "submitted_answer": answer})
276
+ results.append({
277
+ "Task ID": task_id,
278
+ "Question": question[:100] + "..." if len(question) > 100 else question,
279
+ "Answer": answer[:100] + "..." if len(answer) > 100 else answer
280
+ })
281
 
282
+ submission = {
283
+ "username": profile.username,
284
+ "agent_code": f"https://huggingface.co/spaces/{os.getenv('SPACE_ID')}",
285
+ "answers": answers
286
+ }
287
 
 
288
  try:
289
+ response = requests.post(submit_url, json=submission, timeout=30)
 
 
 
 
 
 
290
  result = response.json()
291
+ return f"Submitted! Score: {result.get('score', 'N/A')}", pd.DataFrame(results)
 
 
 
 
 
 
 
 
 
 
 
292
  except Exception as e:
293
+ return f"Submission failed: {str(e)}", pd.DataFrame(results)
 
 
 
 
 
 
294
 
295
+ # --- Gradio Interface ---
296
+ with gr.Blocks(title="Enhanced GAIA Agent") as demo:
297
+ gr.Markdown("## 🚀 Enhanced GAIA Agent Evaluation")
298
+ gr.Markdown("""
299
+ Improved version with:
300
+ - Better tool utilization
301
+ - Increased step/token limits
302
+ - Enhanced error handling
303
+ """)
304
 
305
+ with gr.Row():
306
+ gr.LoginButton()
307
+ run_btn = gr.Button("Run Evaluation", variant="primary")
308
 
309
+ output_status = gr.Textbox(label="Status")
310
+ results_table = gr.DataFrame(label="Results")
311
 
312
+ run_btn.click(
313
+ run_and_submit_all,
314
+ outputs=[output_status, results_table]
315
+ )
316
 
317
  if __name__ == "__main__":
318
+ demo.launch(server_name="0.0.0.0", server_port=7860)