Final_Assignment_Template

Runtime error

App Files Files Community

LamiaYT commited on Jun 26

Commit

03ca047

1 Parent(s): 086b425

Deploy GAIA agent

Browse files

Files changed (1) hide show

app.py +63 -223

app.py CHANGED Viewed

@@ -1,28 +1,21 @@
-# app.py
 import os
 import gradio as gr
 import requests
 import pandas as pd
-from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
 import torch
-import json
 import re
 from typing import Dict, Any
-# --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
-# --- Enhanced Web Search Tool ---
 def enhanced_search(query: str) -> str:
-    """Enhanced search with multiple fallbacks"""
     try:
-        # Try DuckDuckGo first
         resp = requests.get(
             "https://html.duckduckgo.com/html/",
             params={"q": query},
             timeout=10,
-            headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'}
         )
         resp.raise_for_status()
         from bs4 import BeautifulSoup
@@ -32,149 +25,93 @@ def enhanced_search(query: str) -> str:
             return "\n\n".join(f"Title: {a.get_text()}\nURL: {a.get('href', '')}" for a in items)
     except:
         pass
-    # Fallback to Wikipedia
     try:
         import wikipedia
         wikipedia.set_lang("en")
         results = wikipedia.search(query, results=2)
-        if results:
-            summaries = []
-            for title in results:
-                try:
-                    summary = wikipedia.summary(title, sentences=2)
-                    summaries.append(f"**{title}**: {summary}")
-                except:
-                    continue
-            if summaries:
-                return "\n\n".join(summaries)
     except:
         pass
     return f"Could not find reliable information for: {query}"
-# --- Mathematical Expression Evaluator ---
 def safe_eval(expression: str) -> str:
-    """Safely evaluate mathematical expressions"""
     try:
-        # Clean the expression
         expression = re.sub(r'[^0-9+\-*/().\s]', '', expression)
         if not expression.strip():
             return "Invalid expression"
-        # Simple safety check
         if any(word in expression.lower() for word in ['import', 'exec', 'eval', '__']):
             return "Unsafe expression"
         result = eval(expression)
         return str(result)
     except:
         return "Could not calculate"
-# --- Enhanced Language Model ---
 class EnhancedModel:
     def __init__(self):
-        print("Loading enhanced model...")
         self.device = "cuda" if torch.cuda.is_available() else "cpu"
-        # Try multiple models in order of preference
         models_to_try = [
             "microsoft/DialoGPT-medium",
             "distilgpt2",
             "gpt2"
         ]
         self.model = None
         self.tokenizer = None
         for model_name in models_to_try:
             try:
-                print(f"Attempting to load {model_name}...")
                 self.tokenizer = AutoTokenizer.from_pretrained(model_name)
                 if self.tokenizer.pad_token is None:
                     self.tokenizer.pad_token = self.tokenizer.eos_token
                 self.model = AutoModelForCausalLM.from_pretrained(
                     model_name,
                     torch_dtype=torch.float16 if self.device == "cuda" else torch.float32,
                     device_map="auto" if self.device == "cuda" else None
                 )
                 if self.device == "cpu":
                     self.model = self.model.to(self.device)
-                print(f"Successfully loaded {model_name}")
                 break
-            except Exception as e:
-                print(f"Failed to load {model_name}: {e}")
                 continue
         if self.model is None:
             raise Exception("Could not load any model")
     def generate_answer(self, question: str, context: str = "") -> str:
-        """Generate answer with better prompting"""
         try:
-            # Create a more structured prompt
-            if context:
-                prompt = f"""Context: {context}
-Question: {question}
-Based on the context above, provide a clear and accurate answer:"""
-            else:
-                prompt = f"""Question: {question}
-Provide a clear, factual answer. If you're not certain, say so.
-Answer:"""
-            # Tokenize
-            inputs = self.tokenizer.encode(
-                prompt,
-                return_tensors="pt",
-                truncation=True,
-                max_length=400
             )
             if self.device == "cuda":
                 inputs = inputs.to(self.device)
-            # Generate
             with torch.no_grad():
                 outputs = self.model.generate(
                     inputs,
                     max_length=inputs.size(1) + 150,
-                    num_return_sequences=1,
                     temperature=0.7,
                     do_sample=True,
                     pad_token_id=self.tokenizer.eos_token_id,
                     eos_token_id=self.tokenizer.eos_token_id,
                     no_repeat_ngram_size=3
                 )
-            # Decode
             response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
-            # Extract answer part
-            if "Answer:" in response:
-                answer = response.split("Answer:")[-1].strip()
-            else:
-                answer = response[len(prompt):].strip()
-            return answer if answer else "I need more information to answer this question."
         except Exception as e:
             return f"Error generating answer: {e}"
-# --- Smart Agent ---
 class SmartAgent:
     def __init__(self):
-        print("Initializing Smart Agent...")
         self.model = EnhancedModel()
-        # Pattern matching for different question types
         self.patterns = {
             'math': [r'\d+[\+\-\*\/]\d+', r'calculate', r'compute', r'sum', r'total', r'equals'],
             'search': [r'who is', r'what is', r'when did', r'where is', r'how many', r'which'],
@@ -185,108 +122,59 @@ class SmartAgent:
         }
     def classify_question(self, question: str) -> str:
-        """Classify the type of question"""
-        question_lower = question.lower()
         for category, patterns in self.patterns.items():
             for pattern in patterns:
-                if re.search(pattern, question_lower):
                     return category
         return 'general'
     def handle_math_question(self, question: str) -> str:
-        """Handle mathematical questions"""
-        # Extract numbers and operators
-        math_expressions = re.findall(r'[\d\+\-\*\/\(\)\.\s]+', question)
-        for expr in math_expressions:
-            if any(op in expr for op in ['+', '-', '*', '/']):
                 result = safe_eval(expr.strip())
                 if result != "Could not calculate":
                     return f"The answer is: {result}"
-        return "Could not identify a mathematical expression to calculate."
     def handle_reversed_question(self, question: str) -> str:
-        """Handle reversed text questions"""
-        # If the question itself is reversed, reverse it
         if question.endswith('.'):
-            reversed_question = question[::-1]
-            # Look for "left" in the reversed question
-            if 'left' in reversed_question.lower():
                 return "right"
         return "Could not determine the reversed answer."
     def handle_search_question(self, question: str) -> str:
-        """Handle questions requiring search"""
-        search_result = enhanced_search(question)
-        # Use the model to process search results
-        if "Could not find" not in search_result:
-            answer = self.model.generate_answer(question, search_result)
-            return answer
-        return search_result
     def handle_media_question(self, question: str) -> str:
-        """Handle media-related questions"""
         if 'youtube.com' in question:
-            return "I cannot directly access YouTube videos. Please provide the video content or transcript."
-        elif '.mp3' in question or 'audio' in question.lower():
-            return "I cannot process audio files directly. Please provide a transcript or description."
-        else:
-            return "I cannot process media files in this environment."
     def handle_file_question(self, question: str) -> str:
-        """Handle file-related questions"""
-        return "I cannot access attached files in this environment. Please provide the file content directly."
     def handle_general_question(self, question: str) -> str:
-        """Handle general questions with the language model"""
-        # For complex questions, try to search for context first
-        if len(question.split()) > 10:
-            search_context = enhanced_search(question)
-            if "Could not find" not in search_context:
-                return self.model.generate_answer(question, search_context)
-        return self.model.generate_answer(question)
     def __call__(self, question: str) -> str:
-        """Main entry point for the agent"""
-        print(f"Processing: {question[:100]}...")
         try:
-            # Classify the question
-            question_type = self.classify_question(question)
-            print(f"Question type: {question_type}")
-            # Route to appropriate handler
-            if question_type == 'math':
-                return self.handle_math_question(question)
-            elif question_type == 'reversed':
-                return self.handle_reversed_question(question)
-            elif question_type == 'search' or question_type == 'wikipedia':
-                return self.handle_search_question(question)
-            elif question_type == 'media':
-                return self.handle_media_question(question)
-            elif question_type == 'file':
-                return self.handle_file_question(question)
-            else:
-                return self.handle_general_question(question)
         except Exception as e:
-            print(f"Error processing question: {e}")
-            return f"I encountered an error: {e}"
 def run_and_submit_all(profile: gr.OAuthProfile | None):
     if not profile:
         return "Please log in to Hugging Face to submit answers.", None
     username = profile.username
     space_id = os.getenv("SPACE_ID", "")
     questions_url = f"{DEFAULT_API_URL}/questions"
     submit_url = f"{DEFAULT_API_URL}/submit"
@@ -295,8 +183,6 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
     except Exception as e:
         return f"Agent initialization failed: {e}", None
-    agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
     try:
         r = requests.get(questions_url, timeout=15)
         r.raise_for_status()
@@ -305,66 +191,41 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
         return f"Error fetching questions: {e}", None
     logs, answers = [], []
-    total_questions = len(questions)
     for i, item in enumerate(questions):
-        task_id = item.get("task_id")
-        question = item.get("question")
         if not task_id or question is None:
             continue
-        print(f"\n=== Question {i+1}/{total_questions} ===")
-        print(f"Task ID: {task_id}")
         try:
             ans = agent(question)
             answers.append({"task_id": task_id, "submitted_answer": ans})
-            # Create log entry
-            log_entry = {
-                "Task ID": task_id,
-                "Question": question[:150] + "..." if len(question) > 150 else question,
-                "Answer": ans[:300] + "..." if len(ans) > 300 else ans
-            }
-            logs.append(log_entry)
-            print(f"Answer: {ans[:100]}...")
-        except Exception as e:
-            error_msg = f"Error processing question: {e}"
-            answers.append({"task_id": task_id, "submitted_answer": error_msg})
             logs.append({
                 "Task ID": task_id,
-                "Question": question[:150] + "..." if len(question) > 150 else question,
-                "Answer": error_msg
             })
-            print(f"Error: {e}")
     if not answers:
-        return "Agent produced no answers.", pd.DataFrame(logs)
-    # Submit answers
-    payload = {"username": username, "agent_code": agent_code, "answers": answers}
     try:
-        print(f"\nSubmitting {len(answers)} answers...")
         resp = requests.post(submit_url, json=payload, timeout=120)
         resp.raise_for_status()
         data = resp.json()
         score = data.get('score', 'N/A')
         correct = data.get('correct_count', '?')
         total = data.get('total_attempted', '?')
-        status = (
-            f"🎯 Submission Results:\n"
-            f"Score: {score}% ({correct}/{total} correct)\n"
             f"Target: 30% for GAIA benchmark\n"
             f"Status: {'✅ TARGET REACHED!' if isinstance(score, (int, float)) and score >= 30 else '📈 Keep improving!'}\n"
-            f"\nMessage: {data.get('message', 'No additional message')}"
         )
-        return status, pd.DataFrame(logs)
     except Exception as e:
         return f"❌ Submission failed: {e}", pd.DataFrame(logs)
@@ -372,43 +233,22 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
 with gr.Blocks(title="GAIA Agent", theme=gr.themes.Soft()) as demo:
     gr.Markdown("""
     # 🤖 GAIA Benchmark Agent
-    **Goal**: Achieve 30% accuracy on GAIA benchmark questions
-    **Features**:
-    - 🧠 Enhanced language model reasoning
-    - 🔍 Web search capabilities
-    - 🧮 Mathematical calculations
-    - 📚 Wikipedia integration
-    - 🎯 Smart question classification
-    **Hardware**: Optimized for 2vCPU + 16GB RAM (no external APIs)
     """)
     gr.LoginButton()
     with gr.Row():
         run_button = gr.Button("🚀 Run GAIA Evaluation", variant="primary", size="lg")
     with gr.Column():
-        status_box = gr.Textbox(
-            label="📊 Evaluation Results",
-            lines=10,
-            interactive=False,
-            placeholder="Click 'Run GAIA Evaluation' to start..."
-        )
-        result_table = gr.DataFrame(
-            label="📋 Detailed Results",
-            wrap=True,
-            height=400
-        )
-    run_button.click(
-        run_and_submit_all,
-        outputs=[status_box, result_table]
-    )
 if __name__ == "__main__":
     print("🚀 Launching GAIA Agent...")
-    demo.launch(debug=True, share=False)

 import os
 import gradio as gr
 import requests
 import pandas as pd
+from transformers import AutoTokenizer, AutoModelForCausalLM
 import torch
 import re
 from typing import Dict, Any
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 def enhanced_search(query: str) -> str:
     try:
         resp = requests.get(
             "https://html.duckduckgo.com/html/",
             params={"q": query},
             timeout=10,
+            headers={'User-Agent': 'Mozilla/5.0'}
         )
         resp.raise_for_status()
         from bs4 import BeautifulSoup
             return "\n\n".join(f"Title: {a.get_text()}\nURL: {a.get('href', '')}" for a in items)
     except:
         pass
     try:
         import wikipedia
         wikipedia.set_lang("en")
         results = wikipedia.search(query, results=2)
+        summaries = []
+        for title in results:
+            try:
+                summary = wikipedia.summary(title, sentences=2)
+                summaries.append(f"**{title}**: {summary}")
+            except:
+                continue
+        if summaries:
+            return "\n\n".join(summaries)
     except:
         pass
     return f"Could not find reliable information for: {query}"
 def safe_eval(expression: str) -> str:
     try:
         expression = re.sub(r'[^0-9+\-*/().\s]', '', expression)
         if not expression.strip():
             return "Invalid expression"
         if any(word in expression.lower() for word in ['import', 'exec', 'eval', '__']):
             return "Unsafe expression"
         result = eval(expression)
         return str(result)
     except:
         return "Could not calculate"
 class EnhancedModel:
     def __init__(self):
         self.device = "cuda" if torch.cuda.is_available() else "cpu"
         models_to_try = [
             "microsoft/DialoGPT-medium",
             "distilgpt2",
             "gpt2"
         ]
         self.model = None
         self.tokenizer = None
         for model_name in models_to_try:
             try:
                 self.tokenizer = AutoTokenizer.from_pretrained(model_name)
                 if self.tokenizer.pad_token is None:
                     self.tokenizer.pad_token = self.tokenizer.eos_token
                 self.model = AutoModelForCausalLM.from_pretrained(
                     model_name,
                     torch_dtype=torch.float16 if self.device == "cuda" else torch.float32,
                     device_map="auto" if self.device == "cuda" else None
                 )
                 if self.device == "cpu":
                     self.model = self.model.to(self.device)
                 break
+            except:
                 continue
         if self.model is None:
             raise Exception("Could not load any model")
     def generate_answer(self, question: str, context: str = "") -> str:
         try:
+            prompt = (
+                f"Context: {context}\n\nQuestion: {question}\n\nAnswer:"
+                if context else
+                f"Question: {question}\n\nAnswer:"
             )
+            inputs = self.tokenizer.encode(prompt, return_tensors="pt", truncation=True, max_length=400)
             if self.device == "cuda":
                 inputs = inputs.to(self.device)
             with torch.no_grad():
                 outputs = self.model.generate(
                     inputs,
                     max_length=inputs.size(1) + 150,
                     temperature=0.7,
                     do_sample=True,
                     pad_token_id=self.tokenizer.eos_token_id,
                     eos_token_id=self.tokenizer.eos_token_id,
                     no_repeat_ngram_size=3
                 )
             response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
+            return response.split("Answer:")[-1].strip() if "Answer:" in response else response[len(prompt):].strip()
         except Exception as e:
             return f"Error generating answer: {e}"
 class SmartAgent:
     def __init__(self):
         self.model = EnhancedModel()
         self.patterns = {
             'math': [r'\d+[\+\-\*\/]\d+', r'calculate', r'compute', r'sum', r'total', r'equals'],
             'search': [r'who is', r'what is', r'when did', r'where is', r'how many', r'which'],
         }
     def classify_question(self, question: str) -> str:
+        q = question.lower()
         for category, patterns in self.patterns.items():
             for pattern in patterns:
+                if re.search(pattern, q):
                     return category
         return 'general'
     def handle_math_question(self, question: str) -> str:
+        expressions = re.findall(r'[\d\+\-\*\/\(\)\.\s]+', question)
+        for expr in expressions:
+            if any(op in expr for op in '+-*/'):
                 result = safe_eval(expr.strip())
                 if result != "Could not calculate":
                     return f"The answer is: {result}"
+        return "Could not identify a mathematical expression."
     def handle_reversed_question(self, question: str) -> str:
         if question.endswith('.'):
+            reversed_q = question[::-1]
+            if 'left' in reversed_q.lower():
                 return "right"
         return "Could not determine the reversed answer."
     def handle_search_question(self, question: str) -> str:
+        context = enhanced_search(question)
+        return self.model.generate_answer(question, context) if "Could not find" not in context else context
     def handle_media_question(self, question: str) -> str:
         if 'youtube.com' in question:
+            return "I cannot access YouTube directly. Provide transcript or description."
+        return "I cannot process media files in this environment."
     def handle_file_question(self, question: str) -> str:
+        return "File access not supported here. Please paste the contents."
     def handle_general_question(self, question: str) -> str:
+        context = enhanced_search(question) if len(question.split()) > 10 else ""
+        return self.model.generate_answer(question, context)
     def __call__(self, question: str) -> str:
         try:
+            qtype = self.classify_question(question)
+            handler = getattr(self, f"handle_{qtype}_question", self.handle_general_question)
+            return handler(question)
         except Exception as e:
+            return f"Error: {e}"
 def run_and_submit_all(profile: gr.OAuthProfile | None):
     if not profile:
         return "Please log in to Hugging Face to submit answers.", None
     username = profile.username
     space_id = os.getenv("SPACE_ID", "")
     questions_url = f"{DEFAULT_API_URL}/questions"
     submit_url = f"{DEFAULT_API_URL}/submit"
     except Exception as e:
         return f"Agent initialization failed: {e}", None
     try:
         r = requests.get(questions_url, timeout=15)
         r.raise_for_status()
         return f"Error fetching questions: {e}", None
     logs, answers = [], []
     for i, item in enumerate(questions):
+        task_id, question = item.get("task_id"), item.get("question")
         if not task_id or question is None:
             continue
         try:
             ans = agent(question)
             answers.append({"task_id": task_id, "submitted_answer": ans})
             logs.append({
                 "Task ID": task_id,
+                "Question": question,
+                "Answer": ans
             })
+        except Exception as e:
+            msg = f"Error: {e}"
+            answers.append({"task_id": task_id, "submitted_answer": msg})
+            logs.append({"Task ID": task_id, "Question": question, "Answer": msg})
     if not answers:
+        return "No answers produced.", pd.DataFrame(logs)
+    payload = {"username": username, "agent_code": f"https://huggingface.co/spaces/{space_id}/tree/main", "answers": answers}
     try:
         resp = requests.post(submit_url, json=payload, timeout=120)
         resp.raise_for_status()
         data = resp.json()
         score = data.get('score', 'N/A')
         correct = data.get('correct_count', '?')
         total = data.get('total_attempted', '?')
+        return (
+            f"🎯 Submission Results:\nScore: {score}% ({correct}/{total})\n"
             f"Target: 30% for GAIA benchmark\n"
             f"Status: {'✅ TARGET REACHED!' if isinstance(score, (int, float)) and score >= 30 else '📈 Keep improving!'}\n"
+            f"\nMessage: {data.get('message', '')}",
+            pd.DataFrame(logs)
         )
     except Exception as e:
         return f"❌ Submission failed: {e}", pd.DataFrame(logs)
 with gr.Blocks(title="GAIA Agent", theme=gr.themes.Soft()) as demo:
     gr.Markdown("""
     # 🤖 GAIA Benchmark Agent
+    - Enhanced reasoning
+    - Search + math
+    - Goal: 30%+ score
     """)
     gr.LoginButton()
     with gr.Row():
         run_button = gr.Button("🚀 Run GAIA Evaluation", variant="primary", size="lg")
     with gr.Column():
+        status_box = gr.Textbox(label="📊 Evaluation Results", lines=10, interactive=False)
+        result_table = gr.DataFrame(label="📋 Detailed Results", wrap=True)
+    run_button.click(run_and_submit_all, outputs=[status_box, result_table])
 if __name__ == "__main__":
     print("🚀 Launching GAIA Agent...")
+    demo.launch(debug=True, share=False)