Final_Assignment_Template

Runtime error

App Files Files Community

LamiaYT commited on Jun 28

Commit

57b9551

1 Parent(s): d66e9b7

fixing ver3

Browse files

Files changed (1) hide show

app.py +43 -89

app.py CHANGED Viewed

@@ -5,10 +5,9 @@ import json
 import re
 import numexpr
 import pandas as pd
-import math
 from pdfminer.high_level import extract_text
 from bs4 import BeautifulSoup
-from typing import Dict, Any, List, Tuple, Optional
 from dotenv import load_dotenv
 from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig
 import torch
@@ -21,14 +20,14 @@ SERPER_API_KEY = os.getenv("SERPER_API_KEY")
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
-MAX_STEPS = 6  # Increased from 4
-MAX_TOKENS = 256  # Increased from 128
 MODEL_NAME = "microsoft/Phi-3-mini-4k-instruct"
-TIMEOUT_PER_QUESTION = 45  # Increased from 30
-MAX_RESULT_LENGTH = 500  # For tool outputs
-# --- Model Loading ---
-print("Loading optimized model...")
 start_time = time.time()
 model = AutoModelForCausalLM.from_pretrained(
@@ -50,12 +49,12 @@ if tokenizer.pad_token is None:
 print(f"Model loaded in {time.time() - start_time:.2f} seconds")
-# --- Enhanced Tools ---
 def web_search(query: str) -> str:
-    """Enhanced web search with better result parsing"""
     try:
         if SERPER_API_KEY:
-            params = {'q': query, 'num': 3, 'hl': 'en', 'gl': 'us'}
             headers = {'X-API-KEY': SERPER_API_KEY}
             response = requests.post(
                 'https://google.serper.dev/search',
@@ -64,97 +63,64 @@ def web_search(query: str) -> str:
                 timeout=10
             )
             results = response.json()
             if 'organic' in results:
-                output = []
-                for r in results['organic'][:3]:
-                    if 'title' in r and 'snippet' in r:
-                        output.append(f"{r['title']}: {r['snippet']}")
-                return "\n".join(output)[:MAX_RESULT_LENGTH]
-            return "No relevant results found"
         else:
-            with DDGS() as ddgs:
-                results = [r for r in ddgs.text(query, max_results=3)]
-                return "\n".join([f"{r['title']}: {r['body']}" for r in results])[:MAX_RESULT_LENGTH]
     except Exception as e:
         return f"Search error: {str(e)}"
 def calculator(expression: str) -> str:
-    """More robust calculator with validation"""
     try:
-        # Clean and validate expression
         expression = re.sub(r'[^\d+\-*/().^%,\s]', '', expression)
         if not expression:
             return "Invalid empty expression"
-        # Handle percentages and commas
-        expression = expression.replace('%', '/100').replace(',', '')
-        result = numexpr.evaluate(expression)
-        return str(float(result))
     except Exception as e:
         return f"Calculation error: {str(e)}"
-def read_pdf(file_path: str) -> str:
-    """PDF reader with better text extraction"""
-    try:
-        text = extract_text(file_path)
-        if not text:
-            return "No readable text found in PDF"
-        # Clean and condense text
-        text = re.sub(r'\s+', ' ', text).strip()
-        return text[:MAX_RESULT_LENGTH]
-    except Exception as e:
-        return f"PDF read error: {str(e)}"
 def read_webpage(url: str) -> str:
-    """Improved webpage reader with better content extraction"""
     try:
         headers = {'User-Agent': 'Mozilla/5.0'}
         response = requests.get(url, timeout=10, headers=headers)
-        response.raise_for_status()
         soup = BeautifulSoup(response.text, 'html.parser')
-        # Remove unwanted elements
         for element in soup(['script', 'style', 'nav', 'footer']):
             element.decompose()
-        # Get text with better formatting
         text = soup.get_text(separator='\n', strip=True)
-        text = re.sub(r'\n{3,}', '\n\n', text)
-        return text[:MAX_RESULT_LENGTH] if text else "No main content found"
     except Exception as e:
-        return f"Webpage read error: {str(e)}"
 TOOLS = {
     "web_search": web_search,
     "calculator": calculator,
-    "read_pdf": read_pdf,
     "read_webpage": read_webpage
 }
-# --- Improved GAIA Agent ---
 class GAIA_Agent:
     def __init__(self):
         self.tools = TOOLS
         self.system_prompt = """You are an advanced GAIA problem solver. Follow these steps:
-1. Analyze the question carefully
-2. Choose the most appropriate tool
-3. Process the results
-4. Provide a precise final answer
-Available Tools:
-- web_search: For general knowledge questions
-- calculator: For math problems
-- read_pdf: For PDF content extraction
-- read_webpage: For webpage content extraction
 Tool format: ```json
 {"tool": "tool_name", "args": {"arg1": value}}```
-Always end with: Final Answer: [your answer]"""
     def __call__(self, question: str) -> str:
         start_time = time.time()
@@ -169,21 +135,20 @@ Always end with: Final Answer: [your answer]"""
                 response = self._call_model(prompt)
                 if "Final Answer:" in response:
-                    answer = response.split("Final Answer:")[-1].strip()
-                    return answer[:500]  # Limit answer length
                 tool_call = self._parse_tool_call(response)
                 if tool_call:
                     tool_name, args = tool_call
                     observation = self._use_tool(tool_name, args)
-                    history.append(f"Tool Used: {tool_name}")
-                    history.append(f"Tool Result: {observation[:300]}...")  # Truncate long results
                 else:
-                    history.append(f"Analysis: {response}")
                 gc.collect()
-            return "Maximum steps reached without final answer"
         except Exception as e:
             return f"Error: {str(e)}"
@@ -199,21 +164,17 @@ Always end with: Final Answer: [your answer]"""
             padding=False
         )
-        generation_config = GenerationConfig(
             max_new_tokens=MAX_TOKENS,
             temperature=0.3,
             top_p=0.9,
             do_sample=True,
-            pad_token_id=tokenizer.pad_token_id
         )
-        with torch.no_grad():
-            outputs = model.generate(
-                inputs.input_ids,
-                generation_config=generation_config,
-                attention_mask=inputs.attention_mask
-            )
         return tokenizer.decode(outputs[0], skip_special_tokens=True).split("<|assistant|>")[-1].strip()
     def _parse_tool_call(self, text: str) -> Optional[Tuple[str, Dict]]:
@@ -232,11 +193,9 @@ Always end with: Final Answer: [your answer]"""
             return f"Unknown tool: {tool_name}"
         try:
-            # Special handling for URL-containing questions
             if tool_name == "read_webpage" and "url" not in args:
-                if "args" in args and isinstance(args["args"], dict) and "url" in args["args"]:
-                    args = args["args"]
-                elif "http" in str(args):
                     url = re.search(r'https?://[^\s]+', str(args)).group()
                     args = {"url": url}
@@ -293,14 +252,9 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
         return f"Submission failed: {str(e)}", pd.DataFrame(results)
 # --- Gradio Interface ---
-with gr.Blocks(title="Enhanced GAIA Agent") as demo:
-    gr.Markdown("## 🚀 Enhanced GAIA Agent Evaluation")
-    gr.Markdown("""
-    Improved version with:
-    - Better tool utilization
-    - Increased step/token limits
-    - Enhanced error handling
-    """)
     with gr.Row():
         gr.LoginButton()

 import re
 import numexpr
 import pandas as pd
 from pdfminer.high_level import extract_text
 from bs4 import BeautifulSoup
+from typing import List, Dict, Optional, Tuple
 from dotenv import load_dotenv
 from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig
 import torch
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
+MAX_STEPS = 6
+MAX_TOKENS = 256
 MODEL_NAME = "microsoft/Phi-3-mini-4k-instruct"
+TIMEOUT_PER_QUESTION = 45
+MAX_RESULT_LENGTH = 500
+# --- Fixed Model Loading ---
+print("Loading model with fixed configuration...")
 start_time = time.time()
 model = AutoModelForCausalLM.from_pretrained(
 print(f"Model loaded in {time.time() - start_time:.2f} seconds")
+# --- Tools Implementation ---
 def web_search(query: str) -> str:
+    """Enhanced web search with better error handling"""
     try:
         if SERPER_API_KEY:
+            params = {'q': query, 'num': 3}
             headers = {'X-API-KEY': SERPER_API_KEY}
             response = requests.post(
                 'https://google.serper.dev/search',
                 timeout=10
             )
             results = response.json()
             if 'organic' in results:
+                return "\n".join([f"{r['title']}: {r['snippet']}" for r in results['organic'][:3]])[:MAX_RESULT_LENGTH]
+            return "No search results found"
         else:
+            return "Search API key not configured"
     except Exception as e:
         return f"Search error: {str(e)}"
 def calculator(expression: str) -> str:
+    """Safe mathematical evaluation"""
     try:
         expression = re.sub(r'[^\d+\-*/().^%,\s]', '', expression)
         if not expression:
             return "Invalid empty expression"
+        return str(numexpr.evaluate(expression))
     except Exception as e:
         return f"Calculation error: {str(e)}"
 def read_webpage(url: str) -> str:
+    """Robust webpage content extraction"""
     try:
         headers = {'User-Agent': 'Mozilla/5.0'}
         response = requests.get(url, timeout=10, headers=headers)
         soup = BeautifulSoup(response.text, 'html.parser')
         for element in soup(['script', 'style', 'nav', 'footer']):
             element.decompose()
         text = soup.get_text(separator='\n', strip=True)
+        return re.sub(r'\n{3,}', '\n\n', text)[:MAX_RESULT_LENGTH]
     except Exception as e:
+        return f"Webpage error: {str(e)}"
 TOOLS = {
     "web_search": web_search,
     "calculator": calculator,
     "read_webpage": read_webpage
 }
+# --- Fixed GAIA Agent ---
 class GAIA_Agent:
     def __init__(self):
         self.tools = TOOLS
         self.system_prompt = """You are an advanced GAIA problem solver. Follow these steps:
+1. Analyze the question
+2. Choose the best tool
+3. Process results
+4. Provide final answer
+Tools:
+- web_search: For general knowledge
+- calculator: For math
+- read_webpage: For web content
 Tool format: ```json
 {"tool": "tool_name", "args": {"arg1": value}}```
+Always end with: Final Answer: [answer]"""
     def __call__(self, question: str) -> str:
         start_time = time.time()
                 response = self._call_model(prompt)
                 if "Final Answer:" in response:
+                    return response.split("Final Answer:")[-1].strip()[:500]
                 tool_call = self._parse_tool_call(response)
                 if tool_call:
                     tool_name, args = tool_call
                     observation = self._use_tool(tool_name, args)
+                    history.append(f"Tool: {tool_name}")
+                    history.append(f"Result: {observation[:300]}...")
                 else:
+                    history.append(f"Thought: {response}")
                 gc.collect()
+            return "Maximum steps reached"
         except Exception as e:
             return f"Error: {str(e)}"
             padding=False
         )
+        # Fixed generation config without problematic parameters
+        outputs = model.generate(
+            inputs.input_ids,
             max_new_tokens=MAX_TOKENS,
             temperature=0.3,
             top_p=0.9,
             do_sample=True,
+            pad_token_id=tokenizer.pad_token_id,
+            attention_mask=inputs.attention_mask
         )
         return tokenizer.decode(outputs[0], skip_special_tokens=True).split("<|assistant|>")[-1].strip()
     def _parse_tool_call(self, text: str) -> Optional[Tuple[str, Dict]]:
             return f"Unknown tool: {tool_name}"
         try:
+            # Handle URL extraction for webpage reading
             if tool_name == "read_webpage" and "url" not in args:
+                if "http" in str(args):
                     url = re.search(r'https?://[^\s]+', str(args)).group()
                     args = {"url": url}
         return f"Submission failed: {str(e)}", pd.DataFrame(results)
 # --- Gradio Interface ---
+with gr.Blocks(title="Fixed GAIA Agent") as demo:
+    gr.Markdown("## 🛠️ Fixed GAIA Agent")
+    gr.Markdown("Resolved the 'DynamicCache' error with improved configuration")
     with gr.Row():
         gr.LoginButton()