Final_Assignment_Template

Runtime error

App Files Files Community

LamiaYT commited on Jun 27

Commit

56455d6

1 Parent(s): 529a4e1

Last

Browse files

Files changed (1) hide show

app.py +339 -380

app.py CHANGED Viewed

@@ -13,7 +13,7 @@ import math
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 class GAIASpecializedSearchEngine:
-    """GAIA-specialized search engine with pattern recognition"""
     def __init__(self):
         self.session = requests.Session()
@@ -58,400 +58,352 @@ class GAIASpecializedSearchEngine:
             print(f"Search error: {e}")
             return {}
-    def comprehensive_search(self, query: str) -> str:
-        """Comprehensive search with multiple fallbacks"""
         print(f"🔍 Searching: {query[:100]}...")
-        # Primary search
-        data = self.search_with_serper(query, 15)
-        if not data:
-            return "Search failed"
-        # Extract all available information
-        all_content = []
-        # Answer box (highest priority)
-        if "answerBox" in data:
-            answer_box = data["answerBox"]
-            if "answer" in answer_box:
-                return answer_box["answer"].strip()
-            elif "snippet" in answer_box:
-                return answer_box["snippet"].strip()
-        # Knowledge graph
-        if "knowledgeGraph" in data:
-            kg = data["knowledgeGraph"]
-            if "description" in kg:
-                all_content.append(kg["description"])
-            if "attributes" in kg:
-                for attr_name, attr_value in kg["attributes"].items():
-                    all_content.append(f"{attr_name}: {attr_value}")
-        # Organic results
-        for result in data.get("organic", []):
-            title = result.get("title", "")
-            snippet = result.get("snippet", "")
-            if title and snippet:
-                all_content.append(f"{title}: {snippet}")
-        # People also ask
-        if "peopleAlsoAsk" in data:
-            for paa in data["peopleAlsoAsk"][:3]:
-                if "snippet" in paa:
-                    all_content.append(paa["snippet"])
-        return "\n".join(all_content) if all_content else "No search results"
 class GAIAQuestionSolver:
-    """Specialized solver for GAIA benchmark questions"""
     def __init__(self):
         self.search_engine = GAIASpecializedSearchEngine()
-        self.name_patterns = [
-            r'\b[A-Z][a-z]+ [A-Z][a-z]+(?:\s[A-Z][a-z]+)*\b',  # Full names
-            r'\b[A-Z][a-z]+\b'  # Single names
-        ]
     def solve_question(self, question: str) -> str:
-        """Main solving method with GAIA-specific patterns"""
         print(f"🤔 Analyzing: {question[:100]}...")
-        # Handle reversed text questions
-        if self.is_reversed_text_question(question):
             return self.solve_reversed_text(question)
-        # Handle file reference questions (extract info from question context)
-        if self.has_file_reference(question):
-            return self.solve_file_reference_question(question)
-        # Handle mathematical questions
-        if self.is_mathematical_question(question):
-            return self.solve_mathematical_question(question)
-        # Handle multi-step actor/person questions
-        if self.is_multi_step_person_question(question):
-            return self.solve_multi_step_person_question(question)
-        # Handle specific entity questions
-        if self.is_specific_entity_question(question):
-            return self.solve_specific_entity_question(question)
-        # Handle general factual questions
-        return self.solve_factual_question(question)
-    def is_reversed_text_question(self, question: str) -> bool:
-        """FIXED: More precise reversed text detection"""
-        # Only trigger if we see clear reversed patterns
-        reversed_words = []
-        words = question.split()
-        for word in words:
-            # Check if word is likely reversed by seeing if reverse is a common English word
-            reversed_word = word[::-1].lower()
-            if reversed_word in ['left', 'right', 'up', 'down', 'yes', 'no', 'the', 'and', 'answer']:
-                reversed_words.append(word)
-        # Only consider it reversed if we have multiple clear indicators
-        return len(reversed_words) >= 2
     def solve_reversed_text(self, question: str) -> str:
-        """FIXED: Better reversed text solving"""
-        words = question.split()
         for word in words:
-            reversed_word = word[::-1].lower()
-            if reversed_word == 'left':
-                return 'right'
-            elif reversed_word == 'right':
-                return 'left'
-            elif reversed_word == 'up':
-                return 'down'
-            elif reversed_word == 'down':
-                return 'up'
-        return "Unable to determine reversed answer"
-    def has_file_reference(self, question: str) -> bool:
-        """Check if question references files"""
-        file_refs = [
-            "attached", "excel file", "python code", "spreadsheet",
-            "file contains", "in the file", "document", "pdf"
-        ]
-        return any(ref in question.lower() for ref in file_refs)
-    def solve_file_reference_question(self, question: str) -> str:
-        """Handle file reference questions by extracting context"""
-        # Python code questions
-        if "python code" in question.lower() and "output" in question.lower():
-            # Try to find any code snippets in the question itself
-            code_match = re.search(r'```python\n(.*?)\n```', question, re.DOTALL)
-            if code_match:
-                try:
-                    code = code_match.group(1)
-                    # Safe execution of simple math
-                    if re.match(r'^[\d\s\+\-\*\/\(\)\.]+$', code):
-                        return str(eval(code))
-                except:
-                    pass
-            # Search for similar questions
-            search_query = question.replace("attached", "").replace("python code", "python program").strip()
-            return self.extract_number_from_search(search_query)
-        # Excel/spreadsheet questions
-        elif any(term in question.lower() for term in ["excel", "spreadsheet", "sales"]):
-            if "total" in question.lower() or "sum" in question.lower():
-                return self.extract_number_from_search(question)
-            elif "average" in question.lower():
-                return self.extract_number_from_search(question)
-        # Chemistry/academic questions with file references
-        elif "exercises" in question.lower() or "chemistry" in question.lower():
-            # Extract the specific search terms
-            search_terms = []
-            if "equine veterinarian" in question.lower():
-                search_terms.append("equine veterinarian")
-            if "chemistry" in question.lower():
-                search_terms.append("chemistry")
-            if search_terms:
-                search_query = " ".join(search_terms) + " surname name"
-                return self.extract_name_from_search(search_query, name_type="surname")
-        # Botany professor question
-        elif "botany" in question.lower() and "professor" in question.lower():
-            return self.extract_name_from_search("botany professor grocery list", name_type="name")
-        # General file reference - try to extract meaningful search terms
-        clean_question = re.sub(r'\b(attached|file|document|excel|python code)\b', '', question, flags=re.IGNORECASE)
-        return self.solve_factual_question(clean_question.strip())
-    def is_mathematical_question(self, question: str) -> bool:
-        """Detect math questions"""
-        math_indicators = ['calculate', 'compute', 'how many', 'total', 'sum', 'average', 'at bats']
-        return any(indicator in question.lower() for indicator in math_indicators)
-    def solve_mathematical_question(self, question: str) -> str:
-        """Solve mathematical questions"""
-        # Sports statistics questions
-        if "at bats" in question.lower() and "yankee" in question.lower():
-            search_query = question.replace("How many", "").strip()
-            return self.extract_number_from_search(search_query)
-        # Direct calculation
-        numbers = re.findall(r'\d+', question)
-        if len(numbers) >= 2 and any(op in question for op in ['+', '-', '*', '/', 'plus', 'minus', 'times']):
             try:
-                if '+' in question or 'plus' in question:
-                    return str(sum(int(n) for n in numbers))
-                elif '*' in question or 'times' in question:
                     result = 1
-                    for n in numbers:
-                        result *= int(n)
-                    return str(result)
             except:
                 pass
-        return self.extract_number_from_search(question)
-    def is_multi_step_person_question(self, question: str) -> bool:
-        """Detect multi-step questions about people"""
-        patterns = [
-            "actor who played",
-            "person who",
-            "who did the",
-            "play in"
-        ]
-        return any(pattern in question.lower() for pattern in patterns)
-    def solve_multi_step_person_question(self, question: str) -> str:
-        """Solve complex person/actor questions"""
-        # Handle Polish Raymond question
-        if "polish-language" in question.lower() and "raymond" in question.lower():
-            # Step 1: Find who played Ray in Polish version
-            search1 = "Polish version Everybody Loves Raymond actor Ray"
-            result1 = self.search_engine.comprehensive_search(search1)
-            # Extract actor name from results
-            actor_names = re.findall(r'\b[A-Z][a-z]+ [A-Z][a-z]+\b', result1)
-            for name in actor_names:
-                if name not in ["Everybody Loves", "Loves Raymond"]:
-                    # Step 2: Find what this actor played in other shows
-                    search2 = f"{name} actor roles television movies"
-                    result2 = self.search_engine.comprehensive_search(search2)
-                    # Look for character names
-                    character_names = re.findall(r'\b[A-Z][a-z]+\b', result2)
-                    for char in character_names:
-                        if char not in name.split() and len(char) > 2:
-                            return char
-            # Fallback search
-            return self.extract_name_from_search("Polish Everybody Loves Raymond Ray actor other roles")
-        # General multi-step approach
-        return self.solve_factual_question(question)
-    def is_specific_entity_question(self, question: str) -> bool:
-        """Detect questions about specific entities"""
-        entity_patterns = [
-            "country code", "olympics", "competition", "recipient",
-            "specimens", "described by", "pitchers", "number"
-        ]
-        return any(pattern in question.lower() for pattern in entity_patterns)
-    def solve_specific_entity_question(self, question: str) -> str:
-        """Solve entity-specific questions"""
-        # Olympic questions
-        if "olympics" in question.lower() and "least" in question.lower():
-            search_query = question.replace("What country", "country").replace("If there's a tie", "")
-            result = self.search_engine.comprehensive_search(search_query)
-            # Look for country names and numbers
-            countries = re.findall(r'\b[A-Z][a-z]+(?:\s[A-Z][a-z]+)*\b', result)
-            numbers = re.findall(r'\b\d+\b', result)
-            # Find countries with small numbers
-            for country in countries:
-                if country not in ["Summer Olympics", "Olympic Games"] and len(country) > 2:
-                    return country
-        # Competition recipient questions
-        elif "competition recipient" in question.lower() or "malko" in question.lower():
-            return self.extract_name_from_search(question, name_type="first_name")
-        # Pitcher number questions
-        elif "pitchers" in question.lower() and "number" in question.lower():
-            search_query = question.replace("Who are the", "").replace("Give th", "")
-            return self.extract_name_from_search(search_query)
-        # Vietnamese specimens question
-        elif "vietnamese specimens" in question.lower():
-            return self.extract_location_from_search(question)
-        return self.solve_factual_question(question)
-    def solve_factual_question(self, question: str) -> str:
-        """FIXED: Better factual question handling"""
-        search_result = self.search_engine.comprehensive_search(question)
-        if not search_result or search_result == "Search failed":
-            return "Information not found"
-        q_lower = question.lower()
-        # FIXED: More specific question type detection
-        if 'first name' in q_lower:
-            return self.extract_name_from_search_result(search_result, 'first_name')
-        elif any(term in q_lower for term in ['surname', 'last name', 'family name']):
-            return self.extract_name_from_search_result(search_result, 'surname')
-        elif any(term in q_lower for term in ['who is', 'who was', 'name of']):
-            return self.extract_name_from_search_result(search_result, 'full_name')
-        elif any(term in q_lower for term in ['how many', 'number of', 'count']):
-            return self.extract_number_from_search_result(search_result)
-        elif 'country' in q_lower and 'least' in q_lower:
-            # Extract country names specifically
-            countries = re.findall(r'\b[A-Z][a-z]+(?:\s[A-Z][a-z]+)*\b', search_result)
-            # Filter for actual country names
-            for country in countries:
-                if len(country) > 2 and country not in ['Summer', 'Olympics', 'Games']:
-                    return country
-            return "Country not found"
-        # Default: return first meaningful sentence
-        sentences = [s.strip() for s in search_result.split('.') if len(s.strip()) > 20]
-        return sentences[0] if sentences else "Answer not found"
-    def extract_name_from_search(self, query: str, name_type: str = "full_name") -> str:
-        """Extract names from search results"""
-        result = self.search_engine.comprehensive_search(query)
-        return self.extract_name_from_search_result(result, name_type)
-    def extract_name_from_search_result(self, result: str, name_type: str = "full_name") -> str:
-        """FIXED: Better name extraction with context awareness"""
-        if not result or result == "Search failed":
-            return "Name not found"
-        # Look for names in sentences, prioritize those with context
-        sentences = result.split('.')
-        potential_names = []
-        for sentence in sentences[:10]:  # Check first 10 sentences
-            # Find names in this sentence
-            names = re.findall(r'\b[A-Z][a-zA-Z\'-]+(?:\s[A-Z][a-zA-Z\'-]+){0,2}\b', sentence)
-            # Filter out obvious non-names
-            exclude_patterns = [
-                r'\b(January|February|March|April|May|June|July|August|September|October|November|December)\b',
-                r'\b(Monday|Tuesday|Wednesday|Thursday|Friday|Saturday|Sunday)\b',
-                r'\b(Google|Wikipedia|Search|Website|Article|Page|Results|University|Institute|College|Museum)\b',
-                r'\b(The|And|Or|But|In|On|At|To|For|Of|With|By|This|That|These|Those)\b',
-                r'^\d+$'  # Pure numbers
-            ]
-            for name in names:
-                if not any(re.search(pattern, name, re.IGNORECASE) for pattern in exclude_patterns):
-                    if len(name.split()) <= 3:  # Reasonable name length
-                        potential_names.append((name, sentence))
-        if not potential_names:
-            return "Name not found"
-        # Return the first valid name found
-        best_name = potential_names[0][0]
-        if name_type == "first_name":
-            return best_name.split()[0]
-        elif name_type == "surname" or name_type == "last_name":
-            return best_name.split()[-1]
-        else:
-            return best_name
-    def extract_number_from_search(self, query: str) -> str:
-        """Extract numbers from search results"""
-        result = self.search_engine.comprehensive_search(query)
-        return self.extract_number_from_search_result(result)
-    def extract_number_from_search_result(self, result: str) -> str:
-        """FIXED: Better number extraction with context"""
-        if not result or result == "Search failed":
             return "Number not found"
-        # Look for numbers with context
-        sentences = result.split('.')
-        for sentence in sentences[:5]:
-            # Look for numbers in meaningful contexts
-            if any(keyword in sentence.lower() for keyword in ['total', 'sum', 'count', 'number', 'athletes', 'participants']):
-                numbers = re.findall(r'\b\d+\b', sentence)
-                if numbers:
-                    return numbers[0]
-        # Fallback: any number in first few sentences
-        numbers = re.findall(r'\b\d+\b', result)
-        return numbers[0] if numbers else "Number not found"
-    def extract_location_from_search(self, query: str) -> str:
-        """Extract locations from search results"""
-        result = self.search_engine.comprehensive_search(query)
-        return self.extract_location_from_search_result(result)
-    def extract_location_from_search_result(self, result: str) -> str:
-        """Extract locations from search result text"""
-        # Look for place names
-        locations = re.findall(r'\b[A-Z][a-z]+(?:\s[A-Z][a-z]+)*\b', result)
-        # Filter for likely locations
-        location_indicators = ['University', 'Institute', 'Museum', 'Laboratory', 'Center', 'College']
-        for location in locations:
-            if any(indicator in location for indicator in location_indicators):
-                return location
-        # Fallback to first capitalized phrase
-        return locations[0] if locations else "Location not found"
 def get_api_status():
     """Check API configuration status"""
@@ -461,7 +413,7 @@ def get_api_status():
         return "❌ Serper API: Not configured - Set SERPER_API_KEY environment variable"
 def run_gaia_evaluation(profile: gr.OAuthProfile | None):
-    """Run GAIA evaluation with specialized solver"""
     if not profile:
         return "Please log in to Hugging Face first.", None
@@ -475,7 +427,7 @@ def run_gaia_evaluation(profile: gr.OAuthProfile | None):
     try:
         solver = GAIAQuestionSolver()
-        print("✅ GAIA specialized solver initialized")
     except Exception as e:
         return f"❌ Solver initialization failed: {e}", None
@@ -516,7 +468,7 @@ def run_gaia_evaluation(profile: gr.OAuthProfile | None):
             print(f"✅ Answer: {answer}")
             # Rate limiting
-            time.sleep(0.4)
         except Exception as e:
             error_msg = f"Processing error: {str(e)}"
@@ -546,7 +498,7 @@ def run_gaia_evaluation(profile: gr.OAuthProfile | None):
         correct_count = result_data.get('correct_count', '?')
         total_attempted = result_data.get('total_attempted', '?')
-        results_summary = f"""🎯 GAIA BENCHMARK RESULTS
 📊 Final Score: {score}%
 ✅ Correct Answers: {correct_count}/{total_attempted}
@@ -554,24 +506,24 @@ def run_gaia_evaluation(profile: gr.OAuthProfile | None):
 🔧 System Status:
 {api_status}
-🚀 Specialized Features Applied:
-• FIXED: Reversed text detection (requires multiple indicators)
-• FIXED: Context-aware name extraction
-• FIXED: Number extraction with semantic filtering
-• FIXED: Enhanced factual question classification
-• File reference context extraction
-• Multi-step actor/person reasoning
-• Mathematical calculation and sports statistics
-📈 Key Improvements:
-• More precise reversed text handling ("tfel" → "right")
-• Better name extraction with context filtering
-• Improved number detection in relevant contexts
-• Enhanced country extraction for Olympic questions
-• Reduced false positives in question classification
 💡 Performance Notes:
-This updated agent includes critical fixes for GAIA benchmark patterns and should show significant improvement over previous versions."""
         return results_summary, pd.DataFrame(detailed_logs)
@@ -579,17 +531,24 @@ This updated agent includes critical fixes for GAIA benchmark patterns and shoul
         return f"❌ Submission failed: {str(e)}\n\nAnswers were processed but could not be submitted.", pd.DataFrame(detailed_logs)
 # Gradio Interface
-with gr.Blocks(title="GAIA Specialized Agent", theme=gr.themes.Soft()) as demo:
     gr.Markdown("""
-    # 🧠 GAIA Benchmark Specialized Agent (Fixed Version)
-    **🎯 Updated with Critical Fixes for GAIA Questions**
-    This agent includes fixes for:
-    - 🔄 More precise reversed text detection (requires multiple indicators)
-    - 🔍 Context-aware name extraction
-    - 🔢 Improved number extraction with semantic filtering
-    - 🎯 Enhanced factual question classification
     **🔧 Setup Required:**
     - Set `SERPER_API_KEY` in your Hugging Face Space secrets
@@ -608,7 +567,7 @@ with gr.Blocks(title="GAIA Specialized Agent", theme=gr.themes.Soft()) as demo:
             )
             evaluate_button = gr.Button(
-                "🚀 Run GAIA Evaluation",
                 variant="primary",
                 size="lg"
             )

 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 class GAIASpecializedSearchEngine:
+    """GAIA-specialized search engine with improved result processing"""
     def __init__(self):
         self.session = requests.Session()
             print(f"Search error: {e}")
             return {}
+    def comprehensive_search(self, query: str) -> Dict[str, Any]:
+        """Return full search data structure instead of just text"""
         print(f"🔍 Searching: {query[:100]}...")
+        return self.search_with_serper(query, 15)
 class GAIAQuestionSolver:
+    """Improved solver for GAIA benchmark questions"""
     def __init__(self):
         self.search_engine = GAIASpecializedSearchEngine()
     def solve_question(self, question: str) -> str:
+        """Main solving method with improved pattern detection"""
         print(f"🤔 Analyzing: {question[:100]}...")
+        # Handle actual reversed text questions (very specific detection)
+        if self.is_genuine_reversed_text_question(question):
             return self.solve_reversed_text(question)
+        # Handle computational questions
+        if self.is_computational_question(question):
+            return self.solve_computational_question(question)
+        # Handle person/actor questions
+        if self.is_person_question(question):
+            return self.solve_person_question(question)
+        # Handle location/geography questions
+        if self.is_location_question(question):
+            return self.solve_location_question(question)
+        # Handle numerical/counting questions
+        if self.is_numerical_question(question):
+            return self.solve_numerical_question(question)
+        # Handle date/time questions
+        if self.is_date_question(question):
+            return self.solve_date_question(question)
+        # Default factual search
+        return self.solve_general_question(question)
+    def is_genuine_reversed_text_question(self, question: str) -> bool:
+        """Very specific detection for actual reversed text questions"""
+        # Only trigger if we see obvious reversed words that don't make sense in English
+        reversed_words = re.findall(r'\b[a-z]{4,}\b', question.lower())
+        genuine_reversed = []
+        for word in reversed_words:
+            reversed_word = word[::-1]
+            # Check if the reversed version is a common English word
+            common_words = ['left', 'right', 'opposite', 'answer', 'word', 'text']
+            if reversed_word in common_words:
+                genuine_reversed.append((word, reversed_word))
+        return len(genuine_reversed) > 0
     def solve_reversed_text(self, question: str) -> str:
+        """Solve genuine reversed text questions"""
+        words = question.lower().split()
         for word in words:
+            if len(word) >= 4:
+                reversed_word = word[::-1]
+                if reversed_word == 'left':
+                    return 'right'
+                elif reversed_word == 'right':
+                    return 'left'
+                elif reversed_word == 'opposite':
+                    # Find what the opposite of
+                    word_index = words.index(word)
+                    if word_index + 1 < len(words):
+                        next_word = words[word_index + 1][::-1]
+                        opposites = {'left': 'right', 'right': 'left', 'up': 'down', 'down': 'up'}
+                        return opposites.get(next_word, next_word)
+        return "Could not determine reversed text answer"
+    def is_computational_question(self, question: str) -> bool:
+        """Detect questions requiring computation"""
+        comp_keywords = ['calculate', 'compute', 'sum', 'total', 'multiply', 'divide', 'add', 'subtract']
+        return any(keyword in question.lower() for keyword in comp_keywords)
+    def solve_computational_question(self, question: str) -> str:
+        """Solve computational questions"""
+        # Extract numbers from the question
+        numbers = re.findall(r'-?\d+\.?\d*', question)
+        if len(numbers) >= 2:
             try:
+                nums = [float(n) for n in numbers]
+                if any(word in question.lower() for word in ['sum', 'add', 'total', '+']):
+                    result = sum(nums)
+                elif any(word in question.lower() for word in ['multiply', 'times', '*']):
                     result = 1
+                    for n in nums:
+                        result *= n
+                elif any(word in question.lower() for word in ['subtract', 'minus', '-']):
+                    result = nums[0] - nums[1]
+                elif any(word in question.lower() for word in ['divide', '/']):
+                    result = nums[0] / nums[1] if nums[1] != 0 else 0
+                else:
+                    # Search for the computational context
+                    return self.search_and_extract_number(question)
+                # Return as integer if it's a whole number
+                return str(int(result)) if result.is_integer() else str(result)
             except:
                 pass
+        return self.search_and_extract_number(question)
+    def is_person_question(self, question: str) -> bool:
+        """Detect questions about people"""
+        person_keywords = ['who', 'actor', 'person', 'name', 'character', 'played', 'starred']
+        return any(keyword in question.lower() for keyword in person_keywords)
+    def solve_person_question(self, question: str) -> str:
+        """Solve questions about people with improved search"""
+        data = self.search_engine.comprehensive_search(question)
+        if not data:
+            return "Person information not found"
+        # Check answer box first
+        if "answerBox" in data and "answer" in data["answerBox"]:
+            answer = data["answerBox"]["answer"].strip()
+            if self.looks_like_person_name(answer):
+                return self.format_person_answer(answer, question)
+        # Check knowledge graph
+        if "knowledgeGraph" in data:
+            kg = data["knowledgeGraph"]
+            if "title" in kg and self.looks_like_person_name(kg["title"]):
+                return self.format_person_answer(kg["title"], question)
+        # Extract from organic results
+        all_text = ""
+        for result in data.get("organic", [])[:5]:
+            all_text += f"{result.get('title', '')} {result.get('snippet', '')} "
+        return self.extract_person_from_text(all_text, question)
+    def looks_like_person_name(self, text: str) -> bool:
+        """Check if text looks like a person's name"""
+        if not text or len(text) > 50:
+            return False
+        # Simple heuristic: 1-4 capitalized words, reasonable length
+        words = text.split()
+        if 1 <= len(words) <= 4:
+            return all(word[0].isupper() and word.isalpha() for word in words if word)
+        return False
+    def format_person_answer(self, name: str, question: str) -> str:
+        """Format person answer based on what the question asks for"""
+        words = name.split()
+        q_lower = question.lower()
+        if 'first name' in q_lower and words:
+            return words[0]
+        elif any(term in q_lower for term in ['last name', 'surname']) and words:
+            return words[-1]
+        else:
+            return name
+    def extract_person_from_text(self, text: str, question: str) -> str:
+        """Extract person names from text"""
+        # Find potential names (2-3 capitalized words)
+        names = re.findall(r'\b[A-Z][a-z]+ [A-Z][a-z]+(?:\s[A-Z][a-z]+)?\b', text)
+        # Filter out common non-names
+        exclude = {'The New', 'New York', 'Los Angeles', 'Las Vegas', 'United States'}
+        valid_names = [name for name in names if name not in exclude and len(name.split()) <= 3]
+        if valid_names:
+            return self.format_person_answer(valid_names[0], question)
+        return "Person name not found"
+    def is_location_question(self, question: str) -> bool:
+        """Detect location/geography questions"""
+        location_keywords = ['where', 'country', 'city', 'state', 'location', 'place', 'born in', 'from']
+        return any(keyword in question.lower() for keyword in location_keywords)
+    def solve_location_question(self, question: str) -> str:
+        """Solve location questions"""
+        data = self.search_engine.comprehensive_search(question)
+        if not data:
+            return "Location not found"
+        # Check answer box
+        if "answerBox" in data and "answer" in data["answerBox"]:
+            answer = data["answerBox"]["answer"].strip()
+            if self.looks_like_location(answer):
+                return answer
+        # Extract from results
+        all_text = ""
+        for result in data.get("organic", [])[:3]:
+            all_text += f"{result.get('snippet', '')} "
+        return self.extract_location_from_text(all_text)
+    def looks_like_location(self, text: str) -> bool:
+        """Check if text looks like a location"""
+        if not text or len(text) > 100:
+            return False
+        location_indicators = ['University', 'College', 'City', 'County', 'State', 'Country']
+        return any(indicator in text for indicator in location_indicators) or len(text.split()) <= 4
+    def extract_location_from_text(self, text: str) -> str:
+        """Extract location from text"""
+        # Look for patterns like "in [Location]", "at [Location]", "[Location] University"
+        location_patterns = [
+            r'\bin ([A-Z][a-z]+(?: [A-Z][a-z]+)*)',
+            r'\bat ([A-Z][a-z]+(?: [A-Z][a-z]+)*)',
+            r'([A-Z][a-z]+(?: [A-Z][a-z]+)*) University',
+            r'([A-Z][a-z]+(?: [A-Z][a-z]+)*) College',
+        ]
+        for pattern in location_patterns:
+            matches = re.findall(pattern, text)
+            if matches:
+                return matches[0]
+        # Fallback: look for capitalized phrases
+        locations = re.findall(r'\b[A-Z][a-z]+(?: [A-Z][a-z]+)*\b', text)
+        if locations:
+            return locations[0]
+        return "Location not found"
+    def is_numerical_question(self, question: str) -> bool:
+        """Detect questions asking for numbers"""
+        numerical_keywords = ['how many', 'how much', 'number of', 'count', 'total']
+        return any(keyword in question.lower() for keyword in numerical_keywords)
+    def solve_numerical_question(self, question: str) -> str:
+        """Solve questions asking for numbers"""
+        return self.search_and_extract_number(question)
+    def search_and_extract_number(self, question: str) -> str:
+        """Search and extract numerical answers"""
+        data = self.search_engine.comprehensive_search(question)
+        if not data:
             return "Number not found"
+        # Check answer box first
+        if "answerBox" in data and "answer" in data["answerBox"]:
+            answer = data["answerBox"]["answer"].strip()
+            numbers = re.findall(r'\b\d+(?:,\d{3})*(?:\.\d+)?\b', answer)
+            if numbers:
+                return numbers[0].replace(',', '')
+        # Extract from snippets
+        all_text = ""
+        for result in data.get("organic", [])[:5]:
+            all_text += f"{result.get('snippet', '')} "
+        # Look for numbers in context
+        sentences = re.split(r'[.!?]', all_text)
+        for sentence in sentences[:10]:
+            numbers = re.findall(r'\b\d+(?:,\d{3})*(?:\.\d+)?\b', sentence)
+            if numbers:
+                # Try to find the most relevant number
+                q_lower = question.lower()
+                if any(word in sentence.lower() for word in q_lower.split()[:3]):
+                    return numbers[0].replace(',', '')
+        # Fallback: return first number found
+        all_numbers = re.findall(r'\b\d+(?:,\d{3})*(?:\.\d+)?\b', all_text)
+        if all_numbers:
+            return all_numbers[0].replace(',', '')
+        return "Number not found"
+    def is_date_question(self, question: str) -> bool:
+        """Detect date/time questions"""
+        date_keywords = ['when', 'year', 'date', 'born', 'died', 'founded', 'established']
+        return any(keyword in question.lower() for keyword in date_keywords)
+    def solve_date_question(self, question: str) -> str:
+        """Solve date questions"""
+        data = self.search_engine.comprehensive_search(question)
+        if not data:
+            return "Date not found"
+        # Check answer box
+        if "answerBox" in data and "answer" in data["answerBox"]:
+            answer = data["answerBox"]["answer"].strip()
+            years = re.findall(r'\b(?:19|20)\d{2}\b', answer)
+            dates = re.findall(r'\b(?:January|February|March|April|May|June|July|August|September|October|November|December)\s+\d{1,2},?\s+(?:19|20)\d{2}\b', answer)
+            if dates:
+                return dates[0]
+            elif years:
+                return years[0]
+        # Extract from snippets
+        all_text = ""
+        for result in data.get("organic", [])[:3]:
+            all_text += f"{result.get('snippet', '')} "
+        # Look for dates and years
+        dates = re.findall(r'\b(?:January|February|March|April|May|June|July|August|September|October|November|December)\s+\d{1,2},?\s+(?:19|20)\d{2}\b', all_text)
+        if dates:
+            return dates[0]
+        years = re.findall(r'\b(?:19|20)\d{2}\b', all_text)
+        if years:
+            return years[0]
+        return "Date not found"
+    def solve_general_question(self, question: str) -> str:
+        """Solve general factual questions"""
+        data = self.search_engine.comprehensive_search(question)
+        if not data:
+            return "Information not found"
+        # Check answer box first - this is usually the best answer
+        if "answerBox" in data:
+            answer_box = data["answerBox"]
+            if "answer" in answer_box:
+                return answer_box["answer"].strip()
+            elif "snippet" in answer_box:
+                return answer_box["snippet"].strip()
+        # Check knowledge graph
+        if "knowledgeGraph" in data:
+            kg = data["knowledgeGraph"]
+            if "description" in kg:
+                return kg["description"].strip()
+        # Get the most relevant snippet from organic results
+        for result in data.get("organic", [])[:3]:
+            snippet = result.get("snippet", "")
+            if snippet and len(snippet.strip()) > 10:
+                return snippet.strip()
+        return "Answer not found in search results"
 def get_api_status():
     """Check API configuration status"""
         return "❌ Serper API: Not configured - Set SERPER_API_KEY environment variable"
 def run_gaia_evaluation(profile: gr.OAuthProfile | None):
+    """Run GAIA evaluation with improved solver"""
     if not profile:
         return "Please log in to Hugging Face first.", None
     try:
         solver = GAIAQuestionSolver()
+        print("✅ GAIA improved solver initialized")
     except Exception as e:
         return f"❌ Solver initialization failed: {e}", None
             print(f"✅ Answer: {answer}")
             # Rate limiting
+            time.sleep(0.5)
         except Exception as e:
             error_msg = f"Processing error: {str(e)}"
         correct_count = result_data.get('correct_count', '?')
         total_attempted = result_data.get('total_attempted', '?')
+        results_summary = f"""🎯 GAIA BENCHMARK RESULTS (IMPROVED VERSION)
 📊 Final Score: {score}%
 ✅ Correct Answers: {correct_count}/{total_attempted}
 🔧 System Status:
 {api_status}
+🚀 Key Improvements Made:
+• Fixed overly broad reversed text detection
+• Improved search result processing with structured data
+• Better answer box and knowledge graph utilization
+• Enhanced person/actor name extraction
+• Improved numerical and date extraction
+• More precise question classification
+• Eliminated generic "right" fallback answers
+📈 Technical Fixes:
+• Removed faulty 'fo' pattern that triggered false positives
+• Added proper search result structure handling
+• Implemented context-aware answer formatting
+• Better handling of edge cases and errors
+• Improved rate limiting and error recovery
 💡 Performance Notes:
+This version should show significantly better accuracy by properly processing search results and avoiding the classification errors that caused nonsensical answers in the previous version."""
         return results_summary, pd.DataFrame(detailed_logs)
         return f"❌ Submission failed: {str(e)}\n\nAnswers were processed but could not be submitted.", pd.DataFrame(detailed_logs)
 # Gradio Interface
+with gr.Blocks(title="GAIA Improved Agent", theme=gr.themes.Soft()) as demo:
     gr.Markdown("""
+    # 🧠 GAIA Benchmark Agent (IMPROVED VERSION)
+    **🔧 Major Fixes Applied:**
+    - ✅ Fixed overly broad reversed text detection that caused false positives
+    - ✅ Improved search result processing to use structured data properly
+    - ✅ Enhanced question classification to avoid nonsensical answers
+    - ✅ Better extraction of names, numbers, dates, and locations
+    - ✅ Proper handling of answer boxes and knowledge graphs
+    **🎯 Specialized Question Handling:**
+    - 🔄 Genuine reversed text questions (with precise detection)
+    - 🧮 Computational questions with proper math operations
+    - 🎭 Person/actor questions with improved name extraction
+    - 📍 Location questions with geographic context
+    - 🔢 Numerical questions with context-aware number extraction
+    - 📅 Date/time questions with proper temporal parsing
     **🔧 Setup Required:**
     - Set `SERPER_API_KEY` in your Hugging Face Space secrets
             )
             evaluate_button = gr.Button(
+                "🚀 Run GAIA Evaluation (Improved)",
                 variant="primary",
                 size="lg"
             )