Final_Assignment_Template

Runtime error

App Files Files Community

LamiaYT commited on Jun 27

Commit

d591a7a

1 Parent(s): 56455d6

other

Browse files

Files changed (3) hide show

app.py +323 -559
requirements.txt +10 -12
run.py +8 -0

app.py CHANGED Viewed

@@ -1,594 +1,358 @@
 import os
 import gradio as gr
 import requests
-import pandas as pd
 import re
 import time
-import json
-from typing import Dict, Any, List, Optional, Tuple
-from io import StringIO
-import ast
 import math
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
-class GAIASpecializedSearchEngine:
-    """GAIA-specialized search engine with improved result processing"""
     def __init__(self):
-        self.session = requests.Session()
-        self.session.headers.update({
-            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
-        })
-        self.serper_api_key = os.getenv("SERPER_API_KEY")
-        self.search_cache = {}
-    def search_with_serper(self, query: str, num_results: int = 10) -> Dict[str, Any]:
-        """Enhanced Serper search with better parameters"""
-        if not self.serper_api_key:
-            return {}
-        cache_key = f"{query}_{num_results}"
-        if cache_key in self.search_cache:
-            return self.search_cache[cache_key]
-        try:
-            url = "https://google.serper.dev/search"
-            payload = {
-                "q": query,
-                "num": num_results,
-                "gl": "us",
-                "hl": "en"
-            }
-            headers = {
-                "X-API-KEY": self.serper_api_key,
-                "Content-Type": "application/json"
-            }
-            response = self.session.post(url, json=payload, headers=headers, timeout=25)
-            if response.status_code == 200:
-                result = response.json()
-                self.search_cache[cache_key] = result
-                return result
-            else:
-                print(f"Search API error: {response.status_code}")
-                return {}
         except Exception as e:
-            print(f"Search error: {e}")
-            return {}
-    def comprehensive_search(self, query: str) -> Dict[str, Any]:
-        """Return full search data structure instead of just text"""
-        print(f"🔍 Searching: {query[:100]}...")
-        return self.search_with_serper(query, 15)
-class GAIAQuestionSolver:
-    """Improved solver for GAIA benchmark questions"""
-    def __init__(self):
-        self.search_engine = GAIASpecializedSearchEngine()
-    def solve_question(self, question: str) -> str:
-        """Main solving method with improved pattern detection"""
-        print(f"🤔 Analyzing: {question[:100]}...")
-        # Handle actual reversed text questions (very specific detection)
-        if self.is_genuine_reversed_text_question(question):
-            return self.solve_reversed_text(question)
-        # Handle computational questions
-        if self.is_computational_question(question):
-            return self.solve_computational_question(question)
-        # Handle person/actor questions
-        if self.is_person_question(question):
-            return self.solve_person_question(question)
-        # Handle location/geography questions
-        if self.is_location_question(question):
-            return self.solve_location_question(question)
-        # Handle numerical/counting questions
-        if self.is_numerical_question(question):
-            return self.solve_numerical_question(question)
-        # Handle date/time questions
-        if self.is_date_question(question):
-            return self.solve_date_question(question)
-        # Default factual search
-        return self.solve_general_question(question)
-    def is_genuine_reversed_text_question(self, question: str) -> bool:
-        """Very specific detection for actual reversed text questions"""
-        # Only trigger if we see obvious reversed words that don't make sense in English
-        reversed_words = re.findall(r'\b[a-z]{4,}\b', question.lower())
-        genuine_reversed = []
-        for word in reversed_words:
-            reversed_word = word[::-1]
-            # Check if the reversed version is a common English word
-            common_words = ['left', 'right', 'opposite', 'answer', 'word', 'text']
-            if reversed_word in common_words:
-                genuine_reversed.append((word, reversed_word))
-        return len(genuine_reversed) > 0
-    def solve_reversed_text(self, question: str) -> str:
-        """Solve genuine reversed text questions"""
-        words = question.lower().split()
-        for word in words:
-            if len(word) >= 4:
-                reversed_word = word[::-1]
-                if reversed_word == 'left':
-                    return 'right'
-                elif reversed_word == 'right':
-                    return 'left'
-                elif reversed_word == 'opposite':
-                    # Find what the opposite of
-                    word_index = words.index(word)
-                    if word_index + 1 < len(words):
-                        next_word = words[word_index + 1][::-1]
-                        opposites = {'left': 'right', 'right': 'left', 'up': 'down', 'down': 'up'}
-                        return opposites.get(next_word, next_word)
-        return "Could not determine reversed text answer"
-    def is_computational_question(self, question: str) -> bool:
-        """Detect questions requiring computation"""
-        comp_keywords = ['calculate', 'compute', 'sum', 'total', 'multiply', 'divide', 'add', 'subtract']
-        return any(keyword in question.lower() for keyword in comp_keywords)
-    def solve_computational_question(self, question: str) -> str:
-        """Solve computational questions"""
-        # Extract numbers from the question
-        numbers = re.findall(r'-?\d+\.?\d*', question)
-        if len(numbers) >= 2:
-            try:
-                nums = [float(n) for n in numbers]
-                if any(word in question.lower() for word in ['sum', 'add', 'total', '+']):
-                    result = sum(nums)
-                elif any(word in question.lower() for word in ['multiply', 'times', '*']):
-                    result = 1
-                    for n in nums:
-                        result *= n
-                elif any(word in question.lower() for word in ['subtract', 'minus', '-']):
-                    result = nums[0] - nums[1]
-                elif any(word in question.lower() for word in ['divide', '/']):
-                    result = nums[0] / nums[1] if nums[1] != 0 else 0
-                else:
-                    # Search for the computational context
-                    return self.search_and_extract_number(question)
-                # Return as integer if it's a whole number
-                return str(int(result)) if result.is_integer() else str(result)
-            except:
-                pass
-        return self.search_and_extract_number(question)
-    def is_person_question(self, question: str) -> bool:
-        """Detect questions about people"""
-        person_keywords = ['who', 'actor', 'person', 'name', 'character', 'played', 'starred']
-        return any(keyword in question.lower() for keyword in person_keywords)
-    def solve_person_question(self, question: str) -> str:
-        """Solve questions about people with improved search"""
-        data = self.search_engine.comprehensive_search(question)
-        if not data:
-            return "Person information not found"
-        # Check answer box first
-        if "answerBox" in data and "answer" in data["answerBox"]:
-            answer = data["answerBox"]["answer"].strip()
-            if self.looks_like_person_name(answer):
-                return self.format_person_answer(answer, question)
-        # Check knowledge graph
-        if "knowledgeGraph" in data:
-            kg = data["knowledgeGraph"]
-            if "title" in kg and self.looks_like_person_name(kg["title"]):
-                return self.format_person_answer(kg["title"], question)
-        # Extract from organic results
-        all_text = ""
-        for result in data.get("organic", [])[:5]:
-            all_text += f"{result.get('title', '')} {result.get('snippet', '')} "
-        return self.extract_person_from_text(all_text, question)
-    def looks_like_person_name(self, text: str) -> bool:
-        """Check if text looks like a person's name"""
-        if not text or len(text) > 50:
-            return False
-        # Simple heuristic: 1-4 capitalized words, reasonable length
-        words = text.split()
-        if 1 <= len(words) <= 4:
-            return all(word[0].isupper() and word.isalpha() for word in words if word)
-        return False
-    def format_person_answer(self, name: str, question: str) -> str:
-        """Format person answer based on what the question asks for"""
-        words = name.split()
-        q_lower = question.lower()
-        if 'first name' in q_lower and words:
-            return words[0]
-        elif any(term in q_lower for term in ['last name', 'surname']) and words:
-            return words[-1]
-        else:
-            return name
-    def extract_person_from_text(self, text: str, question: str) -> str:
-        """Extract person names from text"""
-        # Find potential names (2-3 capitalized words)
-        names = re.findall(r'\b[A-Z][a-z]+ [A-Z][a-z]+(?:\s[A-Z][a-z]+)?\b', text)
-        # Filter out common non-names
-        exclude = {'The New', 'New York', 'Los Angeles', 'Las Vegas', 'United States'}
-        valid_names = [name for name in names if name not in exclude and len(name.split()) <= 3]
-        if valid_names:
-            return self.format_person_answer(valid_names[0], question)
-        return "Person name not found"
-    def is_location_question(self, question: str) -> bool:
-        """Detect location/geography questions"""
-        location_keywords = ['where', 'country', 'city', 'state', 'location', 'place', 'born in', 'from']
-        return any(keyword in question.lower() for keyword in location_keywords)
-    def solve_location_question(self, question: str) -> str:
-        """Solve location questions"""
-        data = self.search_engine.comprehensive_search(question)
-        if not data:
-            return "Location not found"
-        # Check answer box
-        if "answerBox" in data and "answer" in data["answerBox"]:
-            answer = data["answerBox"]["answer"].strip()
-            if self.looks_like_location(answer):
-                return answer
-        # Extract from results
-        all_text = ""
-        for result in data.get("organic", [])[:3]:
-            all_text += f"{result.get('snippet', '')} "
-        return self.extract_location_from_text(all_text)
-    def looks_like_location(self, text: str) -> bool:
-        """Check if text looks like a location"""
-        if not text or len(text) > 100:
-            return False
-        location_indicators = ['University', 'College', 'City', 'County', 'State', 'Country']
-        return any(indicator in text for indicator in location_indicators) or len(text.split()) <= 4
-    def extract_location_from_text(self, text: str) -> str:
-        """Extract location from text"""
-        # Look for patterns like "in [Location]", "at [Location]", "[Location] University"
-        location_patterns = [
-            r'\bin ([A-Z][a-z]+(?: [A-Z][a-z]+)*)',
-            r'\bat ([A-Z][a-z]+(?: [A-Z][a-z]+)*)',
-            r'([A-Z][a-z]+(?: [A-Z][a-z]+)*) University',
-            r'([A-Z][a-z]+(?: [A-Z][a-z]+)*) College',
-        ]
-        for pattern in location_patterns:
-            matches = re.findall(pattern, text)
-            if matches:
-                return matches[0]
-        # Fallback: look for capitalized phrases
-        locations = re.findall(r'\b[A-Z][a-z]+(?: [A-Z][a-z]+)*\b', text)
-        if locations:
-            return locations[0]
-        return "Location not found"
-    def is_numerical_question(self, question: str) -> bool:
-        """Detect questions asking for numbers"""
-        numerical_keywords = ['how many', 'how much', 'number of', 'count', 'total']
-        return any(keyword in question.lower() for keyword in numerical_keywords)
-    def solve_numerical_question(self, question: str) -> str:
-        """Solve questions asking for numbers"""
-        return self.search_and_extract_number(question)
-    def search_and_extract_number(self, question: str) -> str:
-        """Search and extract numerical answers"""
-        data = self.search_engine.comprehensive_search(question)
-        if not data:
-            return "Number not found"
-        # Check answer box first
-        if "answerBox" in data and "answer" in data["answerBox"]:
-            answer = data["answerBox"]["answer"].strip()
-            numbers = re.findall(r'\b\d+(?:,\d{3})*(?:\.\d+)?\b', answer)
-            if numbers:
-                return numbers[0].replace(',', '')
-        # Extract from snippets
-        all_text = ""
-        for result in data.get("organic", [])[:5]:
-            all_text += f"{result.get('snippet', '')} "
-        # Look for numbers in context
-        sentences = re.split(r'[.!?]', all_text)
-        for sentence in sentences[:10]:
-            numbers = re.findall(r'\b\d+(?:,\d{3})*(?:\.\d+)?\b', sentence)
-            if numbers:
-                # Try to find the most relevant number
-                q_lower = question.lower()
-                if any(word in sentence.lower() for word in q_lower.split()[:3]):
-                    return numbers[0].replace(',', '')
-        # Fallback: return first number found
-        all_numbers = re.findall(r'\b\d+(?:,\d{3})*(?:\.\d+)?\b', all_text)
-        if all_numbers:
-            return all_numbers[0].replace(',', '')
-        return "Number not found"
-    def is_date_question(self, question: str) -> bool:
-        """Detect date/time questions"""
-        date_keywords = ['when', 'year', 'date', 'born', 'died', 'founded', 'established']
-        return any(keyword in question.lower() for keyword in date_keywords)
-    def solve_date_question(self, question: str) -> str:
-        """Solve date questions"""
-        data = self.search_engine.comprehensive_search(question)
-        if not data:
-            return "Date not found"
-        # Check answer box
-        if "answerBox" in data and "answer" in data["answerBox"]:
-            answer = data["answerBox"]["answer"].strip()
-            years = re.findall(r'\b(?:19|20)\d{2}\b', answer)
-            dates = re.findall(r'\b(?:January|February|March|April|May|June|July|August|September|October|November|December)\s+\d{1,2},?\s+(?:19|20)\d{2}\b', answer)
-            if dates:
-                return dates[0]
-            elif years:
-                return years[0]
-        # Extract from snippets
-        all_text = ""
-        for result in data.get("organic", [])[:3]:
-            all_text += f"{result.get('snippet', '')} "
-        # Look for dates and years
-        dates = re.findall(r'\b(?:January|February|March|April|May|June|July|August|September|October|November|December)\s+\d{1,2},?\s+(?:19|20)\d{2}\b', all_text)
-        if dates:
-            return dates[0]
-        years = re.findall(r'\b(?:19|20)\d{2}\b', all_text)
-        if years:
-            return years[0]
-        return "Date not found"
-    def solve_general_question(self, question: str) -> str:
-        """Solve general factual questions"""
-        data = self.search_engine.comprehensive_search(question)
-        if not data:
-            return "Information not found"
-        # Check answer box first - this is usually the best answer
-        if "answerBox" in data:
-            answer_box = data["answerBox"]
-            if "answer" in answer_box:
-                return answer_box["answer"].strip()
-            elif "snippet" in answer_box:
-                return answer_box["snippet"].strip()
-        # Check knowledge graph
-        if "knowledgeGraph" in data:
-            kg = data["knowledgeGraph"]
-            if "description" in kg:
-                return kg["description"].strip()
-        # Get the most relevant snippet from organic results
-        for result in data.get("organic", [])[:3]:
-            snippet = result.get("snippet", "")
-            if snippet and len(snippet.strip()) > 10:
-                return snippet.strip()
-        return "Answer not found in search results"
-def get_api_status():
-    """Check API configuration status"""
-    if os.getenv("SERPER_API_KEY"):
-        return "✅ Serper API: Configured and Ready"
     else:
-        return "❌ Serper API: Not configured - Set SERPER_API_KEY environment variable"
-def run_gaia_evaluation(profile: gr.OAuthProfile | None):
-    """Run GAIA evaluation with improved solver"""
-    if not profile:
-        return "Please log in to Hugging Face first.", None
-    api_status = get_api_status()
-    if "❌" in api_status:
-        return f"⚠️ Configuration Error!\n\n{api_status}\n\nGet your free API key at: https://serper.dev", None
-    username = profile.username
-    questions_url = f"{DEFAULT_API_URL}/questions"
-    submit_url = f"{DEFAULT_API_URL}/submit"
     try:
-        solver = GAIAQuestionSolver()
-        print("✅ GAIA improved solver initialized")
     except Exception as e:
-        return f"❌ Solver initialization failed: {e}", None
     try:
-        print("📥 Fetching GAIA questions...")
-        response = requests.get(questions_url, timeout=30)
         response.raise_for_status()
-        questions = response.json()
-        print(f"✅ Retrieved {len(questions)} questions")
     except Exception as e:
-        return f"❌ Failed to fetch questions: {e}", None
-    answers = []
-    detailed_logs = []
-    for i, item in enumerate(questions):
         task_id = item.get("task_id")
-        question = item.get("question")
-        if not task_id or not question:
             continue
-        print(f"\n🔄 Processing {i+1}/{len(questions)}: {task_id}")
         try:
-            start_time = time.time()
-            answer = solver.solve_question(question)
-            processing_time = time.time() - start_time
-            answers.append({"task_id": task_id, "submitted_answer": answer})
-            detailed_logs.append({
-                "Task ID": task_id,
-                "Question Preview": question[:120] + "..." if len(question) > 120 else question,
-                "Answer": answer[:80] + "..." if len(answer) > 80 else answer,
-                "Processing Time": f"{processing_time:.2f}s"
-            })
-            print(f"✅ Answer: {answer}")
-            # Rate limiting
-            time.sleep(0.5)
         except Exception as e:
-            error_msg = f"Processing error: {str(e)}"
-            answers.append({"task_id": task_id, "submitted_answer": error_msg})
-            detailed_logs.append({
-                "Task ID": task_id,
-                "Question Preview": question[:120] + "..." if len(question) > 120 else question,
-                "Answer": error_msg,
-                "Processing Time": "Error"
-            })
-            print(f"❌ Error processing {task_id}: {e}")
-    # Submit answers
-    print(f"\n📤 Submitting {len(answers)} answers to GAIA benchmark...")
-    submission_payload = {
-        "username": username,
-        "agent_code": f"https://huggingface.co/spaces/{os.getenv('SPACE_ID', 'your-space')}/tree/main",
-        "answers": answers
-    }
     try:
-        submit_response = requests.post(submit_url, json=submission_payload, timeout=240)
-        submit_response.raise_for_status()
-        result_data = submit_response.json()
-        score = result_data.get('score', 'N/A')
-        correct_count = result_data.get('correct_count', '?')
-        total_attempted = result_data.get('total_attempted', '?')
-        results_summary = f"""🎯 GAIA BENCHMARK RESULTS (IMPROVED VERSION)
-📊 Final Score: {score}%
-✅ Correct Answers: {correct_count}/{total_attempted}
-🔧 System Status:
-{api_status}
-🚀 Key Improvements Made:
-• Fixed overly broad reversed text detection
-• Improved search result processing with structured data
-• Better answer box and knowledge graph utilization
-• Enhanced person/actor name extraction
-• Improved numerical and date extraction
-• More precise question classification
-• Eliminated generic "right" fallback answers
-📈 Technical Fixes:
-• Removed faulty 'fo' pattern that triggered false positives
-• Added proper search result structure handling
-• Implemented context-aware answer formatting
-• Better handling of edge cases and errors
-• Improved rate limiting and error recovery
-💡 Performance Notes:
-This version should show significantly better accuracy by properly processing search results and avoiding the classification errors that caused nonsensical answers in the previous version."""
-        return results_summary, pd.DataFrame(detailed_logs)
     except Exception as e:
-        return f"❌ Submission failed: {str(e)}\n\nAnswers were processed but could not be submitted.", pd.DataFrame(detailed_logs)
-# Gradio Interface
-with gr.Blocks(title="GAIA Improved Agent", theme=gr.themes.Soft()) as demo:
-    gr.Markdown("""
-    # 🧠 GAIA Benchmark Agent (IMPROVED VERSION)
-    **🔧 Major Fixes Applied:**
-    - ✅ Fixed overly broad reversed text detection that caused false positives
-    - ✅ Improved search result processing to use structured data properly
-    - ✅ Enhanced question classification to avoid nonsensical answers
-    - ✅ Better extraction of names, numbers, dates, and locations
-    - ✅ Proper handling of answer boxes and knowledge graphs
-    **🎯 Specialized Question Handling:**
-    - 🔄 Genuine reversed text questions (with precise detection)
-    - 🧮 Computational questions with proper math operations
-    - 🎭 Person/actor questions with improved name extraction
-    - 📍 Location questions with geographic context
-    - 🔢 Numerical questions with context-aware number extraction
-    - 📅 Date/time questions with proper temporal parsing
-    **🔧 Setup Required:**
-    - Set `SERPER_API_KEY` in your Hugging Face Space secrets
-    - Get free 2500 searches/month at [serper.dev](https://serper.dev)
-    """)
     gr.LoginButton()
-    with gr.Row():
-        with gr.Column(scale=1):
-            status_display = gr.Textbox(
-                label="🔧 API Status",
-                value=get_api_status(),
-                lines=3,
-                interactive=False
-            )
-            evaluate_button = gr.Button(
-                "🚀 Run GAIA Evaluation (Improved)",
-                variant="primary",
-                size="lg"
-            )
-    with gr.Row():
-        results_output = gr.Textbox(
-            label="📊 Evaluation Results",
-            lines=20,
-            interactive=False
-        )
-    with gr.Row():
-        logs_table = gr.DataFrame(
-            label="📋 Detailed Processing Logs",
-            wrap=True
-        )
-    evaluate_button.click(
-        fn=run_gaia_evaluation,
-        outputs=[results_output, logs_table]
     )
 if __name__ == "__main__":
-    demo.launch(share=True, debug=True)

 import os
 import gradio as gr
 import requests
+import json
 import re
+import numexpr
+import pandas as pd
 import time
 import math
+import pdfminer
+from ctransformers import AutoModelForCausalLM
+from duckduckgo_search import DDGS
+from pdfminer.high_level import extract_text
+from bs4 import BeautifulSoup
+import html2text
+from typing import Dict, Any, List, Tuple, Callable
+# --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
+MAX_STEPS = 6  # Limit reasoning steps for performance
+MAX_TOKENS = 256  # Limit token generation
+MODEL_NAME = "TheBloke/phi-3-mini-128k-instruct-GGUF"
+MODEL_FILE = "phi-3-mini-128k-instruct.Q4_K_M.gguf"
+# --- Load Quantized Model ---
+print("Loading quantized model...")
+start_time = time.time()
+model = AutoModelForCausalLM.from_pretrained(
+    MODEL_NAME,
+    model_file=MODEL_FILE,
+    model_type="phi3",
+    gpu_layers=0,  # CPU only
+    context_length=4096
+)
+load_time = time.time() - start_time
+print(f"Model loaded in {load_time:.2f} seconds")
+# --- Tools for GAIA Agent ---
+def web_search(query: str) -> str:
+    """Search the web using DuckDuckGo"""
+    try:
+        with DDGS() as ddgs:
+            results = [r for r in ddgs.text(query, max_results=3)]
+            return json.dumps(results)
+    except Exception as e:
+        return f"Search error: {str(e)}"
+def calculator(expression: str) -> str:
+    """Evaluate mathematical expressions safely"""
+    try:
+        return str(numexpr.evaluate(expression))
+    except Exception as e:
+        return f"Calculation error: {str(e)}"
+def read_pdf(file_path: str) -> str:
+    """Extract text from PDF files"""
+    try:
+        return extract_text(file_path)
+    except Exception as e:
+        return f"PDF read error: {str(e)}"
+def read_webpage(url: str) -> str:
+    """Fetch and extract text from web pages"""
+    try:
+        response = requests.get(url, timeout=10)
+        soup = BeautifulSoup(response.text, 'html.parser')
+        return soup.get_text(separator=' ', strip=True)[:2000]  # Limit text
+    except Exception as e:
+        return f"Webpage read error: {str(e)}"
+TOOLS = {
+    "web_search": web_search,
+    "calculator": calculator,
+    "read_pdf": read_pdf,
+    "read_webpage": read_webpage
+}
+# --- GAIA Agent Implementation ---
+class GAIA_Agent:
     def __init__(self):
+        self.tools = TOOLS
+        self.history = []
+        self.system_prompt = (
+            "You are an expert GAIA problem solver. Use these tools: {web_search, calculator, read_pdf, read_webpage}.\n"
+            "Guidelines:\n"
+            "1. Think step-by-step. Explain reasoning\n"
+            "2. Use tools for calculations, searches, or file operations\n"
+            "3. Tools must be called as: ```json\n{'tool': 'tool_name', 'args': {'arg1': value}}```\n"
+            "4. Final Answer must be exact and standalone\n\n"
+            "Example:\n"
+            "Question: \"What's the population density of France? (File: france_data.pdf)\"\n"
+            "Thought: Need population and area. Read PDF first.\n"
+            "Action: ```json\n{'tool': 'read_pdf', 'args': {'file_path': 'france_data.pdf'}}```\n"
+            "Observation: Population: 67.8M, Area: 643,801 km²\n"
+            "Thought: Now calculate density: 67,800,000 / 643,801\n"
+            "Action: ```json\n{'tool': 'calculator', 'args': {'expression': '67800000 / 643801'}}```\n"
+            "Observation: 105.32\n"
+            "Final Answer: 105.32 people/km²"
+        )
+    def __call__(self, question: str) -> str:
+        print(f"\nProcessing: {question[:80]}...")
+        self.history = [f"Question: {question}"]
+        for step in range(MAX_STEPS):
+            prompt = self._build_prompt()
+            response = self._call_model(prompt)
+            if "Final Answer" in response:
+                answer = response.split("Final Answer:")[-1].strip()
+                print(f"Final Answer: {answer}")
+                return answer
+            tool_call = self._parse_tool_call(response)
+            if tool_call:
+                tool_name, args = tool_call
+                observation = self._use_tool(tool_name, args)
+                self.history.append(f"Observation: {observation}")
+            else:
+                self.history.append(f"Thought: {response}")
+        return "Agent couldn't find solution within step limit"
+    def _build_prompt(self) -> str:
+        prompt = f"<|system|>\n{self.system_prompt}<|end|>\n"
+        prompt += "<|user|>\n" + "\n".join(self.history) + "<|end|>\n"
+        prompt += "<|assistant|>"
+        return prompt
+    def _call_model(self, prompt: str) -> str:
+        start_time = time.time()
+        response = model(
+            prompt,
+            max_new_tokens=MAX_TOKENS,
+            temperature=0.01,
+            stop=["<|end|>", "Observation:", "```"]
+        )
+        gen_time = time.time() - start_time
+        print(f"Generated {len(response)} tokens in {gen_time:.2f}s: {response[:60]}...")
+        return response
+    def _parse_tool_call(self, text: str) -> Tuple[str, Dict] or None:
+        try:
+            json_match = re.search(r'```json\s*({.*?})\s*```', text, re.DOTALL)
+            if json_match:
+                tool_call = json.loads(json_match.group(1))
+                return tool_call["tool"], tool_call["args"]
         except Exception as e:
+            print(f"Tool parse error: {str(e)}")
+        return None
+    def _use_tool(self, tool_name: str, args: Dict) -> str:
+        if tool_name not in self.tools:
+            return f"Error: Unknown tool {tool_name}"
+        print(f"Using tool: {tool_name}({args})")
+        try:
+            start_time = time.time()
+            result = self.tools[tool_name](**args)
+            exec_time = time.time() - start_time
+            print(f"Tool executed in {exec_time:.2f}s")
+            return str(result)[:500]  # Truncate long outputs
+        except Exception as e:
+            return f"Tool error: {str(e)}"
+# --- Evaluation Runner ---
+def run_and_submit_all(profile: gr.OAuthProfile | None):
+    # ... [Keep the original run_and_submit_all function structure] ...
+    # Only change the agent initialization:
+    try:
+        agent = GAIA_Agent()  # Use our custom agent
+    except Exception as e:
+        print(f"Error instantiating agent: {e}")
+        return f"Error initializing agent: {e}", None
+    # ... [rest of the function remains unchanged] ...
+# --- Gradio Interface ---
+with gr.Blocks() as demo:
+    # ... [Keep the original Gradio interface] ...
+    # Only add resource monitoring:
+    gr.Markdown(f"**Resource Info:** Using {MODEL_FILE} | Max steps: {MAX_STEPS} | Max tokens: {MAX_TOKENS}")
+    # Add a clear button for history
+    clear_btn = gr.Button("Clear History")
+    clear_btn.click(lambda: [None, None], outputs=[status_output, results_table])
+def run_and_submit_all( profile: gr.OAuthProfile | None):
+    """
+    Fetches all questions, runs the BasicAgent on them, submits all answers,
+    and displays the results.
+    """
+    # --- Determine HF Space Runtime URL and Repo URL ---
+    space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
+    if profile:
+        username= f"{profile.username}"
+        print(f"User logged in: {username}")
     else:
+        print("User not logged in.")
+        return "Please Login to Hugging Face with the button.", None
+    api_url = DEFAULT_API_URL
+    questions_url = f"{api_url}/questions"
+    submit_url = f"{api_url}/submit"
+    # 1. Instantiate Agent ( modify this part to create your agent)
     try:
+        agent = BasicAgent()
     except Exception as e:
+        print(f"Error instantiating agent: {e}")
+        return f"Error initializing agent: {e}", None
+    # In the case of an app running as a hugging Face space, this link points toward your codebase ( usefull for others so please keep it public)
+    agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
+    print(agent_code)
+    # 2. Fetch Questions
+    print(f"Fetching questions from: {questions_url}")
     try:
+        response = requests.get(questions_url, timeout=15)
         response.raise_for_status()
+        questions_data = response.json()
+        if not questions_data:
+             print("Fetched questions list is empty.")
+             return "Fetched questions list is empty or invalid format.", None
+        print(f"Fetched {len(questions_data)} questions.")
+    except requests.exceptions.RequestException as e:
+        print(f"Error fetching questions: {e}")
+        return f"Error fetching questions: {e}", None
+    except requests.exceptions.JSONDecodeError as e:
+         print(f"Error decoding JSON response from questions endpoint: {e}")
+         print(f"Response text: {response.text[:500]}")
+         return f"Error decoding server response for questions: {e}", None
     except Exception as e:
+        print(f"An unexpected error occurred fetching questions: {e}")
+        return f"An unexpected error occurred fetching questions: {e}", None
+    # 3. Run your Agent
+    results_log = []
+    answers_payload = []
+    print(f"Running agent on {len(questions_data)} questions...")
+    for item in questions_data:
         task_id = item.get("task_id")
+        question_text = item.get("question")
+        if not task_id or question_text is None:
+            print(f"Skipping item with missing task_id or question: {item}")
             continue
         try:
+            submitted_answer = agent(question_text)
+            answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
+            results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
         except Exception as e:
+             print(f"Error running agent on task {task_id}: {e}")
+             results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
+    if not answers_payload:
+        print("Agent did not produce any answers to submit.")
+        return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
+    # 4. Prepare Submission
+    submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
+    status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
+    print(status_update)
+    # 5. Submit
+    print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
     try:
+        response = requests.post(submit_url, json=submission_data, timeout=60)
+        response.raise_for_status()
+        result_data = response.json()
+        final_status = (
+            f"Submission Successful!\n"
+            f"User: {result_data.get('username')}\n"
+            f"Overall Score: {result_data.get('score', 'N/A')}% "
+            f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
+            f"Message: {result_data.get('message', 'No message received.')}"
+        )
+        print("Submission successful.")
+        results_df = pd.DataFrame(results_log)
+        return final_status, results_df
+    except requests.exceptions.HTTPError as e:
+        error_detail = f"Server responded with status {e.response.status_code}."
+        try:
+            error_json = e.response.json()
+            error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
+        except requests.exceptions.JSONDecodeError:
+            error_detail += f" Response: {e.response.text[:500]}"
+        status_message = f"Submission Failed: {error_detail}"
+        print(status_message)
+        results_df = pd.DataFrame(results_log)
+        return status_message, results_df
+    except requests.exceptions.Timeout:
+        status_message = "Submission Failed: The request timed out."
+        print(status_message)
+        results_df = pd.DataFrame(results_log)
+        return status_message, results_df
+    except requests.exceptions.RequestException as e:
+        status_message = f"Submission Failed: Network error - {e}"
+        print(status_message)
+        results_df = pd.DataFrame(results_log)
+        return status_message, results_df
     except Exception as e:
+        status_message = f"An unexpected error occurred during submission: {e}"
+        print(status_message)
+        results_df = pd.DataFrame(results_log)
+        return status_message, results_df
+# --- Build Gradio Interface using Blocks ---
+with gr.Blocks() as demo:
+    gr.Markdown("# Basic Agent Evaluation Runner")
+    gr.Markdown(
+        """
+        **Instructions:**
+        1.  Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
+        2.  Log in to your Hugging Face account using the button below. This uses your HF username for submission.
+        3.  Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
+        ---
+        **Disclaimers:**
+        Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
+        This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a seperate action or even to answer the questions in async.
+        """
+    )
     gr.LoginButton()
+    run_button = gr.Button("Run Evaluation & Submit All Answers")
+    status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
+    # Removed max_rows=10 from DataFrame constructor
+    results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
+    run_button.click(
+        fn=run_and_submit_all,
+        outputs=[status_output, results_table]
     )
 if __name__ == "__main__":
+    print("\n" + "-"*30 + " App Starting " + "-"*30)
+    # Check for SPACE_HOST and SPACE_ID at startup for information
+    space_host_startup = os.getenv("SPACE_HOST")
+    space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
+    if space_host_startup:
+        print(f"✅ SPACE_HOST found: {space_host_startup}")
+        print(f"   Runtime URL should be: https://{space_host_startup}.hf.space")
+    else:
+        print("ℹ️  SPACE_HOST environment variable not found (running locally?).")
+    if space_id_startup: # Print repo URLs if SPACE_ID is found
+        print(f"✅ SPACE_ID found: {space_id_startup}")
+        print(f"   Repo URL: https://huggingface.co/spaces/{space_id_startup}")
+        print(f"   Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
+    else:
+        print("ℹ️  SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
+    print("-"*(60 + len(" App Starting ")) + "\n")
+    print("Launching Gradio Interface for Basic Agent Evaluation...")
+    demo.launch(debug=True, share=False)

requirements.txt CHANGED Viewed

@@ -1,13 +1,11 @@
-gradio>=4.0.0
-transformers>=4.35.0
-torch>=2.0.0
-pandas>=1.5.0
-requests>=2.28.0
-beautifulsoup4>=4.11.0
-wikipedia>=1.4.0
-smolagents>=0.1.0
-accelerate>=0.20.0
-sentencepiece>=0.1.99
-openpyxl
 PyPDF2
-pillow

+ctransformers==0.2.27
+gradio==4.19.0
+requests
+pandas
+python-dotenv
+duckduckgo-search
+numexpr
 PyPDF2
+pdfminer.six
+beautifulsoup4
+html2text

run.py ADDED Viewed

	@@ -0,0 +1,8 @@

+from smolagents import DuckDuckGoSearchTool
+# Initialize the DuckDuckGo search tool
+search_tool = DuckDuckGoSearchTool()
+# Example usage
+results = search_tool("Who's the current President of France?")
+print(results)