LamiaYT commited on
Commit
56455d6
ยท
1 Parent(s): 529a4e1
Files changed (1) hide show
  1. app.py +339 -380
app.py CHANGED
@@ -13,7 +13,7 @@ import math
13
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
14
 
15
  class GAIASpecializedSearchEngine:
16
- """GAIA-specialized search engine with pattern recognition"""
17
 
18
  def __init__(self):
19
  self.session = requests.Session()
@@ -58,400 +58,352 @@ class GAIASpecializedSearchEngine:
58
  print(f"Search error: {e}")
59
  return {}
60
 
61
- def comprehensive_search(self, query: str) -> str:
62
- """Comprehensive search with multiple fallbacks"""
63
  print(f"๐Ÿ” Searching: {query[:100]}...")
64
-
65
- # Primary search
66
- data = self.search_with_serper(query, 15)
67
- if not data:
68
- return "Search failed"
69
-
70
- # Extract all available information
71
- all_content = []
72
-
73
- # Answer box (highest priority)
74
- if "answerBox" in data:
75
- answer_box = data["answerBox"]
76
- if "answer" in answer_box:
77
- return answer_box["answer"].strip()
78
- elif "snippet" in answer_box:
79
- return answer_box["snippet"].strip()
80
-
81
- # Knowledge graph
82
- if "knowledgeGraph" in data:
83
- kg = data["knowledgeGraph"]
84
- if "description" in kg:
85
- all_content.append(kg["description"])
86
- if "attributes" in kg:
87
- for attr_name, attr_value in kg["attributes"].items():
88
- all_content.append(f"{attr_name}: {attr_value}")
89
-
90
- # Organic results
91
- for result in data.get("organic", []):
92
- title = result.get("title", "")
93
- snippet = result.get("snippet", "")
94
- if title and snippet:
95
- all_content.append(f"{title}: {snippet}")
96
-
97
- # People also ask
98
- if "peopleAlsoAsk" in data:
99
- for paa in data["peopleAlsoAsk"][:3]:
100
- if "snippet" in paa:
101
- all_content.append(paa["snippet"])
102
-
103
- return "\n".join(all_content) if all_content else "No search results"
104
 
105
  class GAIAQuestionSolver:
106
- """Specialized solver for GAIA benchmark questions"""
107
 
108
  def __init__(self):
109
  self.search_engine = GAIASpecializedSearchEngine()
110
- self.name_patterns = [
111
- r'\b[A-Z][a-z]+ [A-Z][a-z]+(?:\s[A-Z][a-z]+)*\b', # Full names
112
- r'\b[A-Z][a-z]+\b' # Single names
113
- ]
114
 
115
  def solve_question(self, question: str) -> str:
116
- """Main solving method with GAIA-specific patterns"""
117
  print(f"๐Ÿค” Analyzing: {question[:100]}...")
118
 
119
- # Handle reversed text questions
120
- if self.is_reversed_text_question(question):
121
  return self.solve_reversed_text(question)
122
 
123
- # Handle file reference questions (extract info from question context)
124
- if self.has_file_reference(question):
125
- return self.solve_file_reference_question(question)
 
 
 
 
126
 
127
- # Handle mathematical questions
128
- if self.is_mathematical_question(question):
129
- return self.solve_mathematical_question(question)
130
 
131
- # Handle multi-step actor/person questions
132
- if self.is_multi_step_person_question(question):
133
- return self.solve_multi_step_person_question(question)
134
 
135
- # Handle specific entity questions
136
- if self.is_specific_entity_question(question):
137
- return self.solve_specific_entity_question(question)
138
 
139
- # Handle general factual questions
140
- return self.solve_factual_question(question)
141
 
142
- def is_reversed_text_question(self, question: str) -> bool:
143
- """FIXED: More precise reversed text detection"""
144
- # Only trigger if we see clear reversed patterns
145
- reversed_words = []
146
- words = question.split()
147
 
148
- for word in words:
149
- # Check if word is likely reversed by seeing if reverse is a common English word
150
- reversed_word = word[::-1].lower()
151
- if reversed_word in ['left', 'right', 'up', 'down', 'yes', 'no', 'the', 'and', 'answer']:
152
- reversed_words.append(word)
 
153
 
154
- # Only consider it reversed if we have multiple clear indicators
155
- return len(reversed_words) >= 2
156
 
157
  def solve_reversed_text(self, question: str) -> str:
158
- """FIXED: Better reversed text solving"""
159
- words = question.split()
160
-
161
  for word in words:
162
- reversed_word = word[::-1].lower()
163
- if reversed_word == 'left':
164
- return 'right'
165
- elif reversed_word == 'right':
166
- return 'left'
167
- elif reversed_word == 'up':
168
- return 'down'
169
- elif reversed_word == 'down':
170
- return 'up'
171
-
172
- return "Unable to determine reversed answer"
173
-
174
- def has_file_reference(self, question: str) -> bool:
175
- """Check if question references files"""
176
- file_refs = [
177
- "attached", "excel file", "python code", "spreadsheet",
178
- "file contains", "in the file", "document", "pdf"
179
- ]
180
- return any(ref in question.lower() for ref in file_refs)
181
-
182
- def solve_file_reference_question(self, question: str) -> str:
183
- """Handle file reference questions by extracting context"""
184
-
185
- # Python code questions
186
- if "python code" in question.lower() and "output" in question.lower():
187
- # Try to find any code snippets in the question itself
188
- code_match = re.search(r'```python\n(.*?)\n```', question, re.DOTALL)
189
- if code_match:
190
- try:
191
- code = code_match.group(1)
192
- # Safe execution of simple math
193
- if re.match(r'^[\d\s\+\-\*\/\(\)\.]+$', code):
194
- return str(eval(code))
195
- except:
196
- pass
197
-
198
- # Search for similar questions
199
- search_query = question.replace("attached", "").replace("python code", "python program").strip()
200
- return self.extract_number_from_search(search_query)
201
-
202
- # Excel/spreadsheet questions
203
- elif any(term in question.lower() for term in ["excel", "spreadsheet", "sales"]):
204
- if "total" in question.lower() or "sum" in question.lower():
205
- return self.extract_number_from_search(question)
206
- elif "average" in question.lower():
207
- return self.extract_number_from_search(question)
208
-
209
- # Chemistry/academic questions with file references
210
- elif "exercises" in question.lower() or "chemistry" in question.lower():
211
- # Extract the specific search terms
212
- search_terms = []
213
- if "equine veterinarian" in question.lower():
214
- search_terms.append("equine veterinarian")
215
- if "chemistry" in question.lower():
216
- search_terms.append("chemistry")
217
-
218
- if search_terms:
219
- search_query = " ".join(search_terms) + " surname name"
220
- return self.extract_name_from_search(search_query, name_type="surname")
221
-
222
- # Botany professor question
223
- elif "botany" in question.lower() and "professor" in question.lower():
224
- return self.extract_name_from_search("botany professor grocery list", name_type="name")
225
-
226
- # General file reference - try to extract meaningful search terms
227
- clean_question = re.sub(r'\b(attached|file|document|excel|python code)\b', '', question, flags=re.IGNORECASE)
228
- return self.solve_factual_question(clean_question.strip())
229
-
230
- def is_mathematical_question(self, question: str) -> bool:
231
- """Detect math questions"""
232
- math_indicators = ['calculate', 'compute', 'how many', 'total', 'sum', 'average', 'at bats']
233
- return any(indicator in question.lower() for indicator in math_indicators)
234
-
235
- def solve_mathematical_question(self, question: str) -> str:
236
- """Solve mathematical questions"""
237
- # Sports statistics questions
238
- if "at bats" in question.lower() and "yankee" in question.lower():
239
- search_query = question.replace("How many", "").strip()
240
- return self.extract_number_from_search(search_query)
241
-
242
- # Direct calculation
243
- numbers = re.findall(r'\d+', question)
244
- if len(numbers) >= 2 and any(op in question for op in ['+', '-', '*', '/', 'plus', 'minus', 'times']):
245
  try:
246
- if '+' in question or 'plus' in question:
247
- return str(sum(int(n) for n in numbers))
248
- elif '*' in question or 'times' in question:
 
 
249
  result = 1
250
- for n in numbers:
251
- result *= int(n)
252
- return str(result)
 
 
 
 
 
 
 
 
 
253
  except:
254
  pass
255
 
256
- return self.extract_number_from_search(question)
257
 
258
- def is_multi_step_person_question(self, question: str) -> bool:
259
- """Detect multi-step questions about people"""
260
- patterns = [
261
- "actor who played",
262
- "person who",
263
- "who did the",
264
- "play in"
265
- ]
266
- return any(pattern in question.lower() for pattern in patterns)
267
 
268
- def solve_multi_step_person_question(self, question: str) -> str:
269
- """Solve complex person/actor questions"""
 
270
 
271
- # Handle Polish Raymond question
272
- if "polish-language" in question.lower() and "raymond" in question.lower():
273
- # Step 1: Find who played Ray in Polish version
274
- search1 = "Polish version Everybody Loves Raymond actor Ray"
275
- result1 = self.search_engine.comprehensive_search(search1)
276
-
277
- # Extract actor name from results
278
- actor_names = re.findall(r'\b[A-Z][a-z]+ [A-Z][a-z]+\b', result1)
279
- for name in actor_names:
280
- if name not in ["Everybody Loves", "Loves Raymond"]:
281
- # Step 2: Find what this actor played in other shows
282
- search2 = f"{name} actor roles television movies"
283
- result2 = self.search_engine.comprehensive_search(search2)
284
-
285
- # Look for character names
286
- character_names = re.findall(r'\b[A-Z][a-z]+\b', result2)
287
- for char in character_names:
288
- if char not in name.split() and len(char) > 2:
289
- return char
290
-
291
- # Fallback search
292
- return self.extract_name_from_search("Polish Everybody Loves Raymond Ray actor other roles")
293
 
294
- # General multi-step approach
295
- return self.solve_factual_question(question)
296
-
297
- def is_specific_entity_question(self, question: str) -> bool:
298
- """Detect questions about specific entities"""
299
- entity_patterns = [
300
- "country code", "olympics", "competition", "recipient",
301
- "specimens", "described by", "pitchers", "number"
302
- ]
303
- return any(pattern in question.lower() for pattern in entity_patterns)
304
-
305
- def solve_specific_entity_question(self, question: str) -> str:
306
- """Solve entity-specific questions"""
307
 
308
- # Olympic questions
309
- if "olympics" in question.lower() and "least" in question.lower():
310
- search_query = question.replace("What country", "country").replace("If there's a tie", "")
311
- result = self.search_engine.comprehensive_search(search_query)
312
-
313
- # Look for country names and numbers
314
- countries = re.findall(r'\b[A-Z][a-z]+(?:\s[A-Z][a-z]+)*\b', result)
315
- numbers = re.findall(r'\b\d+\b', result)
316
-
317
- # Find countries with small numbers
318
- for country in countries:
319
- if country not in ["Summer Olympics", "Olympic Games"] and len(country) > 2:
320
- return country
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
321
 
322
- # Competition recipient questions
323
- elif "competition recipient" in question.lower() or "malko" in question.lower():
324
- return self.extract_name_from_search(question, name_type="first_name")
 
 
 
 
 
 
 
 
325
 
326
- # Pitcher number questions
327
- elif "pitchers" in question.lower() and "number" in question.lower():
328
- search_query = question.replace("Who are the", "").replace("Give th", "")
329
- return self.extract_name_from_search(search_query)
330
 
331
- # Vietnamese specimens question
332
- elif "vietnamese specimens" in question.lower():
333
- return self.extract_location_from_search(question)
334
 
335
- return self.solve_factual_question(question)
336
 
337
- def solve_factual_question(self, question: str) -> str:
338
- """FIXED: Better factual question handling"""
339
- search_result = self.search_engine.comprehensive_search(question)
340
-
341
- if not search_result or search_result == "Search failed":
342
- return "Information not found"
343
-
344
- q_lower = question.lower()
345
 
346
- # FIXED: More specific question type detection
347
- if 'first name' in q_lower:
348
- return self.extract_name_from_search_result(search_result, 'first_name')
349
- elif any(term in q_lower for term in ['surname', 'last name', 'family name']):
350
- return self.extract_name_from_search_result(search_result, 'surname')
351
- elif any(term in q_lower for term in ['who is', 'who was', 'name of']):
352
- return self.extract_name_from_search_result(search_result, 'full_name')
353
- elif any(term in q_lower for term in ['how many', 'number of', 'count']):
354
- return self.extract_number_from_search_result(search_result)
355
- elif 'country' in q_lower and 'least' in q_lower:
356
- # Extract country names specifically
357
- countries = re.findall(r'\b[A-Z][a-z]+(?:\s[A-Z][a-z]+)*\b', search_result)
358
- # Filter for actual country names
359
- for country in countries:
360
- if len(country) > 2 and country not in ['Summer', 'Olympics', 'Games']:
361
- return country
362
- return "Country not found"
363
-
364
- # Default: return first meaningful sentence
365
- sentences = [s.strip() for s in search_result.split('.') if len(s.strip()) > 20]
366
- return sentences[0] if sentences else "Answer not found"
367
-
368
- def extract_name_from_search(self, query: str, name_type: str = "full_name") -> str:
369
- """Extract names from search results"""
370
- result = self.search_engine.comprehensive_search(query)
371
- return self.extract_name_from_search_result(result, name_type)
372
-
373
- def extract_name_from_search_result(self, result: str, name_type: str = "full_name") -> str:
374
- """FIXED: Better name extraction with context awareness"""
375
- if not result or result == "Search failed":
376
- return "Name not found"
377
-
378
- # Look for names in sentences, prioritize those with context
379
- sentences = result.split('.')
380
- potential_names = []
381
-
382
- for sentence in sentences[:10]: # Check first 10 sentences
383
- # Find names in this sentence
384
- names = re.findall(r'\b[A-Z][a-zA-Z\'-]+(?:\s[A-Z][a-zA-Z\'-]+){0,2}\b', sentence)
385
-
386
- # Filter out obvious non-names
387
- exclude_patterns = [
388
- r'\b(January|February|March|April|May|June|July|August|September|October|November|December)\b',
389
- r'\b(Monday|Tuesday|Wednesday|Thursday|Friday|Saturday|Sunday)\b',
390
- r'\b(Google|Wikipedia|Search|Website|Article|Page|Results|University|Institute|College|Museum)\b',
391
- r'\b(The|And|Or|But|In|On|At|To|For|Of|With|By|This|That|These|Those)\b',
392
- r'^\d+$' # Pure numbers
393
- ]
394
-
395
- for name in names:
396
- if not any(re.search(pattern, name, re.IGNORECASE) for pattern in exclude_patterns):
397
- if len(name.split()) <= 3: # Reasonable name length
398
- potential_names.append((name, sentence))
399
 
400
- if not potential_names:
401
- return "Name not found"
 
 
402
 
403
- # Return the first valid name found
404
- best_name = potential_names[0][0]
 
 
405
 
406
- if name_type == "first_name":
407
- return best_name.split()[0]
408
- elif name_type == "surname" or name_type == "last_name":
409
- return best_name.split()[-1]
410
- else:
411
- return best_name
412
 
413
- def extract_number_from_search(self, query: str) -> str:
414
- """Extract numbers from search results"""
415
- result = self.search_engine.comprehensive_search(query)
416
- return self.extract_number_from_search_result(result)
417
 
418
- def extract_number_from_search_result(self, result: str) -> str:
419
- """FIXED: Better number extraction with context"""
420
- if not result or result == "Search failed":
 
 
 
 
 
 
421
  return "Number not found"
422
 
423
- # Look for numbers with context
424
- sentences = result.split('.')
425
-
426
- for sentence in sentences[:5]:
427
- # Look for numbers in meaningful contexts
428
- if any(keyword in sentence.lower() for keyword in ['total', 'sum', 'count', 'number', 'athletes', 'participants']):
429
- numbers = re.findall(r'\b\d+\b', sentence)
430
- if numbers:
431
- return numbers[0]
432
-
433
- # Fallback: any number in first few sentences
434
- numbers = re.findall(r'\b\d+\b', result)
435
- return numbers[0] if numbers else "Number not found"
436
-
437
- def extract_location_from_search(self, query: str) -> str:
438
- """Extract locations from search results"""
439
- result = self.search_engine.comprehensive_search(query)
440
- return self.extract_location_from_search_result(result)
441
-
442
- def extract_location_from_search_result(self, result: str) -> str:
443
- """Extract locations from search result text"""
444
- # Look for place names
445
- locations = re.findall(r'\b[A-Z][a-z]+(?:\s[A-Z][a-z]+)*\b', result)
446
-
447
- # Filter for likely locations
448
- location_indicators = ['University', 'Institute', 'Museum', 'Laboratory', 'Center', 'College']
449
- for location in locations:
450
- if any(indicator in location for indicator in location_indicators):
451
- return location
452
-
453
- # Fallback to first capitalized phrase
454
- return locations[0] if locations else "Location not found"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
455
 
456
  def get_api_status():
457
  """Check API configuration status"""
@@ -461,7 +413,7 @@ def get_api_status():
461
  return "โŒ Serper API: Not configured - Set SERPER_API_KEY environment variable"
462
 
463
  def run_gaia_evaluation(profile: gr.OAuthProfile | None):
464
- """Run GAIA evaluation with specialized solver"""
465
  if not profile:
466
  return "Please log in to Hugging Face first.", None
467
 
@@ -475,7 +427,7 @@ def run_gaia_evaluation(profile: gr.OAuthProfile | None):
475
 
476
  try:
477
  solver = GAIAQuestionSolver()
478
- print("โœ… GAIA specialized solver initialized")
479
  except Exception as e:
480
  return f"โŒ Solver initialization failed: {e}", None
481
 
@@ -516,7 +468,7 @@ def run_gaia_evaluation(profile: gr.OAuthProfile | None):
516
  print(f"โœ… Answer: {answer}")
517
 
518
  # Rate limiting
519
- time.sleep(0.4)
520
 
521
  except Exception as e:
522
  error_msg = f"Processing error: {str(e)}"
@@ -546,7 +498,7 @@ def run_gaia_evaluation(profile: gr.OAuthProfile | None):
546
  correct_count = result_data.get('correct_count', '?')
547
  total_attempted = result_data.get('total_attempted', '?')
548
 
549
- results_summary = f"""๐ŸŽฏ GAIA BENCHMARK RESULTS
550
 
551
  ๐Ÿ“Š Final Score: {score}%
552
  โœ… Correct Answers: {correct_count}/{total_attempted}
@@ -554,24 +506,24 @@ def run_gaia_evaluation(profile: gr.OAuthProfile | None):
554
  ๐Ÿ”ง System Status:
555
  {api_status}
556
 
557
- ๐Ÿš€ Specialized Features Applied:
558
- โ€ข FIXED: Reversed text detection (requires multiple indicators)
559
- โ€ข FIXED: Context-aware name extraction
560
- โ€ข FIXED: Number extraction with semantic filtering
561
- โ€ข FIXED: Enhanced factual question classification
562
- โ€ข File reference context extraction
563
- โ€ข Multi-step actor/person reasoning
564
- โ€ข Mathematical calculation and sports statistics
565
 
566
- ๐Ÿ“ˆ Key Improvements:
567
- โ€ข More precise reversed text handling ("tfel" โ†’ "right")
568
- โ€ข Better name extraction with context filtering
569
- โ€ข Improved number detection in relevant contexts
570
- โ€ข Enhanced country extraction for Olympic questions
571
- โ€ข Reduced false positives in question classification
572
 
573
  ๐Ÿ’ก Performance Notes:
574
- This updated agent includes critical fixes for GAIA benchmark patterns and should show significant improvement over previous versions."""
575
 
576
  return results_summary, pd.DataFrame(detailed_logs)
577
 
@@ -579,17 +531,24 @@ This updated agent includes critical fixes for GAIA benchmark patterns and shoul
579
  return f"โŒ Submission failed: {str(e)}\n\nAnswers were processed but could not be submitted.", pd.DataFrame(detailed_logs)
580
 
581
  # Gradio Interface
582
- with gr.Blocks(title="GAIA Specialized Agent", theme=gr.themes.Soft()) as demo:
583
  gr.Markdown("""
584
- # ๐Ÿง  GAIA Benchmark Specialized Agent (Fixed Version)
585
-
586
- **๐ŸŽฏ Updated with Critical Fixes for GAIA Questions**
587
-
588
- This agent includes fixes for:
589
- - ๐Ÿ”„ More precise reversed text detection (requires multiple indicators)
590
- - ๐Ÿ” Context-aware name extraction
591
- - ๐Ÿ”ข Improved number extraction with semantic filtering
592
- - ๐ŸŽฏ Enhanced factual question classification
 
 
 
 
 
 
 
593
 
594
  **๐Ÿ”ง Setup Required:**
595
  - Set `SERPER_API_KEY` in your Hugging Face Space secrets
@@ -608,7 +567,7 @@ with gr.Blocks(title="GAIA Specialized Agent", theme=gr.themes.Soft()) as demo:
608
  )
609
 
610
  evaluate_button = gr.Button(
611
- "๐Ÿš€ Run GAIA Evaluation",
612
  variant="primary",
613
  size="lg"
614
  )
 
13
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
14
 
15
  class GAIASpecializedSearchEngine:
16
+ """GAIA-specialized search engine with improved result processing"""
17
 
18
  def __init__(self):
19
  self.session = requests.Session()
 
58
  print(f"Search error: {e}")
59
  return {}
60
 
61
+ def comprehensive_search(self, query: str) -> Dict[str, Any]:
62
+ """Return full search data structure instead of just text"""
63
  print(f"๐Ÿ” Searching: {query[:100]}...")
64
+ return self.search_with_serper(query, 15)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
65
 
66
  class GAIAQuestionSolver:
67
+ """Improved solver for GAIA benchmark questions"""
68
 
69
  def __init__(self):
70
  self.search_engine = GAIASpecializedSearchEngine()
 
 
 
 
71
 
72
  def solve_question(self, question: str) -> str:
73
+ """Main solving method with improved pattern detection"""
74
  print(f"๐Ÿค” Analyzing: {question[:100]}...")
75
 
76
+ # Handle actual reversed text questions (very specific detection)
77
+ if self.is_genuine_reversed_text_question(question):
78
  return self.solve_reversed_text(question)
79
 
80
+ # Handle computational questions
81
+ if self.is_computational_question(question):
82
+ return self.solve_computational_question(question)
83
+
84
+ # Handle person/actor questions
85
+ if self.is_person_question(question):
86
+ return self.solve_person_question(question)
87
 
88
+ # Handle location/geography questions
89
+ if self.is_location_question(question):
90
+ return self.solve_location_question(question)
91
 
92
+ # Handle numerical/counting questions
93
+ if self.is_numerical_question(question):
94
+ return self.solve_numerical_question(question)
95
 
96
+ # Handle date/time questions
97
+ if self.is_date_question(question):
98
+ return self.solve_date_question(question)
99
 
100
+ # Default factual search
101
+ return self.solve_general_question(question)
102
 
103
+ def is_genuine_reversed_text_question(self, question: str) -> bool:
104
+ """Very specific detection for actual reversed text questions"""
105
+ # Only trigger if we see obvious reversed words that don't make sense in English
106
+ reversed_words = re.findall(r'\b[a-z]{4,}\b', question.lower())
107
+ genuine_reversed = []
108
 
109
+ for word in reversed_words:
110
+ reversed_word = word[::-1]
111
+ # Check if the reversed version is a common English word
112
+ common_words = ['left', 'right', 'opposite', 'answer', 'word', 'text']
113
+ if reversed_word in common_words:
114
+ genuine_reversed.append((word, reversed_word))
115
 
116
+ return len(genuine_reversed) > 0
 
117
 
118
  def solve_reversed_text(self, question: str) -> str:
119
+ """Solve genuine reversed text questions"""
120
+ words = question.lower().split()
 
121
  for word in words:
122
+ if len(word) >= 4:
123
+ reversed_word = word[::-1]
124
+ if reversed_word == 'left':
125
+ return 'right'
126
+ elif reversed_word == 'right':
127
+ return 'left'
128
+ elif reversed_word == 'opposite':
129
+ # Find what the opposite of
130
+ word_index = words.index(word)
131
+ if word_index + 1 < len(words):
132
+ next_word = words[word_index + 1][::-1]
133
+ opposites = {'left': 'right', 'right': 'left', 'up': 'down', 'down': 'up'}
134
+ return opposites.get(next_word, next_word)
135
+
136
+ return "Could not determine reversed text answer"
137
+
138
+ def is_computational_question(self, question: str) -> bool:
139
+ """Detect questions requiring computation"""
140
+ comp_keywords = ['calculate', 'compute', 'sum', 'total', 'multiply', 'divide', 'add', 'subtract']
141
+ return any(keyword in question.lower() for keyword in comp_keywords)
142
+
143
+ def solve_computational_question(self, question: str) -> str:
144
+ """Solve computational questions"""
145
+ # Extract numbers from the question
146
+ numbers = re.findall(r'-?\d+\.?\d*', question)
147
+
148
+ if len(numbers) >= 2:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
149
  try:
150
+ nums = [float(n) for n in numbers]
151
+
152
+ if any(word in question.lower() for word in ['sum', 'add', 'total', '+']):
153
+ result = sum(nums)
154
+ elif any(word in question.lower() for word in ['multiply', 'times', '*']):
155
  result = 1
156
+ for n in nums:
157
+ result *= n
158
+ elif any(word in question.lower() for word in ['subtract', 'minus', '-']):
159
+ result = nums[0] - nums[1]
160
+ elif any(word in question.lower() for word in ['divide', '/']):
161
+ result = nums[0] / nums[1] if nums[1] != 0 else 0
162
+ else:
163
+ # Search for the computational context
164
+ return self.search_and_extract_number(question)
165
+
166
+ # Return as integer if it's a whole number
167
+ return str(int(result)) if result.is_integer() else str(result)
168
  except:
169
  pass
170
 
171
+ return self.search_and_extract_number(question)
172
 
173
+ def is_person_question(self, question: str) -> bool:
174
+ """Detect questions about people"""
175
+ person_keywords = ['who', 'actor', 'person', 'name', 'character', 'played', 'starred']
176
+ return any(keyword in question.lower() for keyword in person_keywords)
 
 
 
 
 
177
 
178
+ def solve_person_question(self, question: str) -> str:
179
+ """Solve questions about people with improved search"""
180
+ data = self.search_engine.comprehensive_search(question)
181
 
182
+ if not data:
183
+ return "Person information not found"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
184
 
185
+ # Check answer box first
186
+ if "answerBox" in data and "answer" in data["answerBox"]:
187
+ answer = data["answerBox"]["answer"].strip()
188
+ if self.looks_like_person_name(answer):
189
+ return self.format_person_answer(answer, question)
 
 
 
 
 
 
 
 
190
 
191
+ # Check knowledge graph
192
+ if "knowledgeGraph" in data:
193
+ kg = data["knowledgeGraph"]
194
+ if "title" in kg and self.looks_like_person_name(kg["title"]):
195
+ return self.format_person_answer(kg["title"], question)
196
+
197
+ # Extract from organic results
198
+ all_text = ""
199
+ for result in data.get("organic", [])[:5]:
200
+ all_text += f"{result.get('title', '')} {result.get('snippet', '')} "
201
+
202
+ return self.extract_person_from_text(all_text, question)
203
+
204
+ def looks_like_person_name(self, text: str) -> bool:
205
+ """Check if text looks like a person's name"""
206
+ if not text or len(text) > 50:
207
+ return False
208
+
209
+ # Simple heuristic: 1-4 capitalized words, reasonable length
210
+ words = text.split()
211
+ if 1 <= len(words) <= 4:
212
+ return all(word[0].isupper() and word.isalpha() for word in words if word)
213
+ return False
214
+
215
+ def format_person_answer(self, name: str, question: str) -> str:
216
+ """Format person answer based on what the question asks for"""
217
+ words = name.split()
218
+ q_lower = question.lower()
219
 
220
+ if 'first name' in q_lower and words:
221
+ return words[0]
222
+ elif any(term in q_lower for term in ['last name', 'surname']) and words:
223
+ return words[-1]
224
+ else:
225
+ return name
226
+
227
+ def extract_person_from_text(self, text: str, question: str) -> str:
228
+ """Extract person names from text"""
229
+ # Find potential names (2-3 capitalized words)
230
+ names = re.findall(r'\b[A-Z][a-z]+ [A-Z][a-z]+(?:\s[A-Z][a-z]+)?\b', text)
231
 
232
+ # Filter out common non-names
233
+ exclude = {'The New', 'New York', 'Los Angeles', 'Las Vegas', 'United States'}
234
+ valid_names = [name for name in names if name not in exclude and len(name.split()) <= 3]
 
235
 
236
+ if valid_names:
237
+ return self.format_person_answer(valid_names[0], question)
 
238
 
239
+ return "Person name not found"
240
 
241
+ def is_location_question(self, question: str) -> bool:
242
+ """Detect location/geography questions"""
243
+ location_keywords = ['where', 'country', 'city', 'state', 'location', 'place', 'born in', 'from']
244
+ return any(keyword in question.lower() for keyword in location_keywords)
245
+
246
+ def solve_location_question(self, question: str) -> str:
247
+ """Solve location questions"""
248
+ data = self.search_engine.comprehensive_search(question)
249
 
250
+ if not data:
251
+ return "Location not found"
252
+
253
+ # Check answer box
254
+ if "answerBox" in data and "answer" in data["answerBox"]:
255
+ answer = data["answerBox"]["answer"].strip()
256
+ if self.looks_like_location(answer):
257
+ return answer
258
+
259
+ # Extract from results
260
+ all_text = ""
261
+ for result in data.get("organic", [])[:3]:
262
+ all_text += f"{result.get('snippet', '')} "
263
+
264
+ return self.extract_location_from_text(all_text)
265
+
266
+ def looks_like_location(self, text: str) -> bool:
267
+ """Check if text looks like a location"""
268
+ if not text or len(text) > 100:
269
+ return False
270
+
271
+ location_indicators = ['University', 'College', 'City', 'County', 'State', 'Country']
272
+ return any(indicator in text for indicator in location_indicators) or len(text.split()) <= 4
273
+
274
+ def extract_location_from_text(self, text: str) -> str:
275
+ """Extract location from text"""
276
+ # Look for patterns like "in [Location]", "at [Location]", "[Location] University"
277
+ location_patterns = [
278
+ r'\bin ([A-Z][a-z]+(?: [A-Z][a-z]+)*)',
279
+ r'\bat ([A-Z][a-z]+(?: [A-Z][a-z]+)*)',
280
+ r'([A-Z][a-z]+(?: [A-Z][a-z]+)*) University',
281
+ r'([A-Z][a-z]+(?: [A-Z][a-z]+)*) College',
282
+ ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
283
 
284
+ for pattern in location_patterns:
285
+ matches = re.findall(pattern, text)
286
+ if matches:
287
+ return matches[0]
288
 
289
+ # Fallback: look for capitalized phrases
290
+ locations = re.findall(r'\b[A-Z][a-z]+(?: [A-Z][a-z]+)*\b', text)
291
+ if locations:
292
+ return locations[0]
293
 
294
+ return "Location not found"
 
 
 
 
 
295
 
296
+ def is_numerical_question(self, question: str) -> bool:
297
+ """Detect questions asking for numbers"""
298
+ numerical_keywords = ['how many', 'how much', 'number of', 'count', 'total']
299
+ return any(keyword in question.lower() for keyword in numerical_keywords)
300
 
301
+ def solve_numerical_question(self, question: str) -> str:
302
+ """Solve questions asking for numbers"""
303
+ return self.search_and_extract_number(question)
304
+
305
+ def search_and_extract_number(self, question: str) -> str:
306
+ """Search and extract numerical answers"""
307
+ data = self.search_engine.comprehensive_search(question)
308
+
309
+ if not data:
310
  return "Number not found"
311
 
312
+ # Check answer box first
313
+ if "answerBox" in data and "answer" in data["answerBox"]:
314
+ answer = data["answerBox"]["answer"].strip()
315
+ numbers = re.findall(r'\b\d+(?:,\d{3})*(?:\.\d+)?\b', answer)
316
+ if numbers:
317
+ return numbers[0].replace(',', '')
318
+
319
+ # Extract from snippets
320
+ all_text = ""
321
+ for result in data.get("organic", [])[:5]:
322
+ all_text += f"{result.get('snippet', '')} "
323
+
324
+ # Look for numbers in context
325
+ sentences = re.split(r'[.!?]', all_text)
326
+ for sentence in sentences[:10]:
327
+ numbers = re.findall(r'\b\d+(?:,\d{3})*(?:\.\d+)?\b', sentence)
328
+ if numbers:
329
+ # Try to find the most relevant number
330
+ q_lower = question.lower()
331
+ if any(word in sentence.lower() for word in q_lower.split()[:3]):
332
+ return numbers[0].replace(',', '')
333
+
334
+ # Fallback: return first number found
335
+ all_numbers = re.findall(r'\b\d+(?:,\d{3})*(?:\.\d+)?\b', all_text)
336
+ if all_numbers:
337
+ return all_numbers[0].replace(',', '')
338
+
339
+ return "Number not found"
340
+
341
+ def is_date_question(self, question: str) -> bool:
342
+ """Detect date/time questions"""
343
+ date_keywords = ['when', 'year', 'date', 'born', 'died', 'founded', 'established']
344
+ return any(keyword in question.lower() for keyword in date_keywords)
345
+
346
+ def solve_date_question(self, question: str) -> str:
347
+ """Solve date questions"""
348
+ data = self.search_engine.comprehensive_search(question)
349
+
350
+ if not data:
351
+ return "Date not found"
352
+
353
+ # Check answer box
354
+ if "answerBox" in data and "answer" in data["answerBox"]:
355
+ answer = data["answerBox"]["answer"].strip()
356
+ years = re.findall(r'\b(?:19|20)\d{2}\b', answer)
357
+ dates = re.findall(r'\b(?:January|February|March|April|May|June|July|August|September|October|November|December)\s+\d{1,2},?\s+(?:19|20)\d{2}\b', answer)
358
+ if dates:
359
+ return dates[0]
360
+ elif years:
361
+ return years[0]
362
+
363
+ # Extract from snippets
364
+ all_text = ""
365
+ for result in data.get("organic", [])[:3]:
366
+ all_text += f"{result.get('snippet', '')} "
367
+
368
+ # Look for dates and years
369
+ dates = re.findall(r'\b(?:January|February|March|April|May|June|July|August|September|October|November|December)\s+\d{1,2},?\s+(?:19|20)\d{2}\b', all_text)
370
+ if dates:
371
+ return dates[0]
372
+
373
+ years = re.findall(r'\b(?:19|20)\d{2}\b', all_text)
374
+ if years:
375
+ return years[0]
376
+
377
+ return "Date not found"
378
+
379
+ def solve_general_question(self, question: str) -> str:
380
+ """Solve general factual questions"""
381
+ data = self.search_engine.comprehensive_search(question)
382
+
383
+ if not data:
384
+ return "Information not found"
385
+
386
+ # Check answer box first - this is usually the best answer
387
+ if "answerBox" in data:
388
+ answer_box = data["answerBox"]
389
+ if "answer" in answer_box:
390
+ return answer_box["answer"].strip()
391
+ elif "snippet" in answer_box:
392
+ return answer_box["snippet"].strip()
393
+
394
+ # Check knowledge graph
395
+ if "knowledgeGraph" in data:
396
+ kg = data["knowledgeGraph"]
397
+ if "description" in kg:
398
+ return kg["description"].strip()
399
+
400
+ # Get the most relevant snippet from organic results
401
+ for result in data.get("organic", [])[:3]:
402
+ snippet = result.get("snippet", "")
403
+ if snippet and len(snippet.strip()) > 10:
404
+ return snippet.strip()
405
+
406
+ return "Answer not found in search results"
407
 
408
  def get_api_status():
409
  """Check API configuration status"""
 
413
  return "โŒ Serper API: Not configured - Set SERPER_API_KEY environment variable"
414
 
415
  def run_gaia_evaluation(profile: gr.OAuthProfile | None):
416
+ """Run GAIA evaluation with improved solver"""
417
  if not profile:
418
  return "Please log in to Hugging Face first.", None
419
 
 
427
 
428
  try:
429
  solver = GAIAQuestionSolver()
430
+ print("โœ… GAIA improved solver initialized")
431
  except Exception as e:
432
  return f"โŒ Solver initialization failed: {e}", None
433
 
 
468
  print(f"โœ… Answer: {answer}")
469
 
470
  # Rate limiting
471
+ time.sleep(0.5)
472
 
473
  except Exception as e:
474
  error_msg = f"Processing error: {str(e)}"
 
498
  correct_count = result_data.get('correct_count', '?')
499
  total_attempted = result_data.get('total_attempted', '?')
500
 
501
+ results_summary = f"""๐ŸŽฏ GAIA BENCHMARK RESULTS (IMPROVED VERSION)
502
 
503
  ๐Ÿ“Š Final Score: {score}%
504
  โœ… Correct Answers: {correct_count}/{total_attempted}
 
506
  ๐Ÿ”ง System Status:
507
  {api_status}
508
 
509
+ ๐Ÿš€ Key Improvements Made:
510
+ โ€ข Fixed overly broad reversed text detection
511
+ โ€ข Improved search result processing with structured data
512
+ โ€ข Better answer box and knowledge graph utilization
513
+ โ€ข Enhanced person/actor name extraction
514
+ โ€ข Improved numerical and date extraction
515
+ โ€ข More precise question classification
516
+ โ€ข Eliminated generic "right" fallback answers
517
 
518
+ ๐Ÿ“ˆ Technical Fixes:
519
+ โ€ข Removed faulty 'fo' pattern that triggered false positives
520
+ โ€ข Added proper search result structure handling
521
+ โ€ข Implemented context-aware answer formatting
522
+ โ€ข Better handling of edge cases and errors
523
+ โ€ข Improved rate limiting and error recovery
524
 
525
  ๐Ÿ’ก Performance Notes:
526
+ This version should show significantly better accuracy by properly processing search results and avoiding the classification errors that caused nonsensical answers in the previous version."""
527
 
528
  return results_summary, pd.DataFrame(detailed_logs)
529
 
 
531
  return f"โŒ Submission failed: {str(e)}\n\nAnswers were processed but could not be submitted.", pd.DataFrame(detailed_logs)
532
 
533
  # Gradio Interface
534
+ with gr.Blocks(title="GAIA Improved Agent", theme=gr.themes.Soft()) as demo:
535
  gr.Markdown("""
536
+ # ๐Ÿง  GAIA Benchmark Agent (IMPROVED VERSION)
537
+
538
+ **๐Ÿ”ง Major Fixes Applied:**
539
+ - โœ… Fixed overly broad reversed text detection that caused false positives
540
+ - โœ… Improved search result processing to use structured data properly
541
+ - โœ… Enhanced question classification to avoid nonsensical answers
542
+ - โœ… Better extraction of names, numbers, dates, and locations
543
+ - โœ… Proper handling of answer boxes and knowledge graphs
544
+
545
+ **๐ŸŽฏ Specialized Question Handling:**
546
+ - ๐Ÿ”„ Genuine reversed text questions (with precise detection)
547
+ - ๐Ÿงฎ Computational questions with proper math operations
548
+ - ๐ŸŽญ Person/actor questions with improved name extraction
549
+ - ๐Ÿ“ Location questions with geographic context
550
+ - ๐Ÿ”ข Numerical questions with context-aware number extraction
551
+ - ๐Ÿ“… Date/time questions with proper temporal parsing
552
 
553
  **๐Ÿ”ง Setup Required:**
554
  - Set `SERPER_API_KEY` in your Hugging Face Space secrets
 
567
  )
568
 
569
  evaluate_button = gr.Button(
570
+ "๐Ÿš€ Run GAIA Evaluation (Improved)",
571
  variant="primary",
572
  size="lg"
573
  )