Final_Assignment_Template

Runtime error

App Files Files Community

LamiaYT commited on Jun 28

Commit

8c139ea

1 Parent(s): 1d9a78b

Fixing

Browse files

Files changed (2) hide show

app.py +185 -68
requirements.txt +11 -24

app.py CHANGED Viewed

@@ -11,11 +11,13 @@ from duckduckgo_search import DDGS
 from pdfminer.high_level import extract_text
 from bs4 import BeautifulSoup
 import html2text
-from typing import Dict, Any, List, Tuple, Callable
 from dotenv import load_dotenv
-from transformers import AutoModelForCausalLM, AutoTokenizer
 import torch
 import time
 # --- Load Environment Variables ---
 load_dotenv()
 SERPER_API_KEY = os.getenv("SERPER_API_KEY")
@@ -31,24 +33,32 @@ os.environ["PIP_BREAK_SYSTEM_PACKAGES"] = "1"
 os.environ["HF_HUB_DISABLE_SYMLINKS_WARNING"] = "1"
 os.environ["BITSANDBYTES_NOWELCOME"] = "1"
-MODEL_NAME = "microsoft/Phi-3-mini-4k-instruct"
 print("Loading model (CPU-compatible)...")
 start_time = time.time()
 model = AutoModelForCausalLM.from_pretrained(
     MODEL_NAME,
     trust_remote_code=True,
-    torch_dtype=torch.float32  # Use float32 for CPU compatibility
 )
-tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_fast=False)
 load_time = time.time() - start_time
 print(f"Model loaded in {load_time:.2f} seconds")
 # --- Tools for GAIA Agent ---
 def web_search(query: str) -> str:
     """Search the web using DuckDuckGo or Serper API"""
@@ -68,7 +78,8 @@ def web_search(query: str) -> str:
             response = requests.post(
                 'https://google.serper.dev/search',
                 headers=headers,
-                json=params
             )
             results = response.json()
             if 'organic' in results:
@@ -85,23 +96,37 @@ def web_search(query: str) -> str:
 def calculator(expression: str) -> str:
     """Evaluate mathematical expressions safely"""
     try:
-        return str(numexpr.evaluate(expression))
     except Exception as e:
         return f"Calculation error: {str(e)}"
 def read_pdf(file_path: str) -> str:
     """Extract text from PDF files"""
     try:
-        return extract_text(file_path)[:2000]  # Limit to first 2000 characters
     except Exception as e:
         return f"PDF read error: {str(e)}"
 def read_webpage(url: str) -> str:
     """Fetch and extract text from web pages"""
     try:
-        response = requests.get(url, timeout=10)
         soup = BeautifulSoup(response.text, 'html.parser')
-        return soup.get_text(separator=' ', strip=True)[:2000]  # Limit text
     except Exception as e:
         return f"Webpage read error: {str(e)}"
@@ -139,24 +164,33 @@ class GAIA_Agent:
         print(f"\nProcessing: {question[:80]}...")
         self.history = [f"Question: {question}"]
-        for step in range(MAX_STEPS):
-            prompt = self._build_prompt()
-            response = self._call_model(prompt)
-            if "Final Answer" in response:
-                answer = response.split("Final Answer:")[-1].strip()
-                print(f"Final Answer: {answer}")
-                return answer
-            tool_call = self._parse_tool_call(response)
-            if tool_call:
-                tool_name, args = tool_call
-                observation = self._use_tool(tool_name, args)
-                self.history.append(f"Observation: {observation}")
-            else:
-                self.history.append(f"Thought: {response}")
-        return "Agent couldn't find solution within step limit"
     def _build_prompt(self) -> str:
         prompt = "<|system|>\n" + self.system_prompt + "<|end|>\n"
@@ -167,28 +201,60 @@ class GAIA_Agent:
     def _call_model(self, prompt: str) -> str:
         start_time = time.time()
-        inputs = tokenizer(prompt, return_tensors="pt", return_attention_mask=True).to(model.device)
-        outputs = model.generate(
-            **inputs,
-            max_new_tokens=MAX_TOKENS,
-            temperature=0.01,
-            do_sample=True,
-            pad_token_id=tokenizer.eos_token_id
-        )
-        response = tokenizer.decode(outputs[0], skip_special_tokens=True)
-        response = response.split("<|assistant|>")[-1].strip()
-        gen_time = time.time() - start_time
-        print(f"Generated {len(response)} tokens in {gen_time:.2f}s: {response[:60]}...")
-        return response
-    def _parse_tool_call(self, text: str) -> Tuple[str, Dict] or None:
         try:
             json_match = re.search(r'```json\s*({.*?})\s*```', text, re.DOTALL)
             if json_match:
                 tool_call = json.loads(json_match.group(1))
-                return tool_call["tool"], tool_call["args"]
         except Exception as e:
             print(f"Tool parse error: {str(e)}")
         return None
@@ -230,12 +296,12 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
         return f"Error initializing agent: {e}", None
     agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
-    print(agent_code)
     # Fetch Questions
     print(f"Fetching questions from: {questions_url}")
     try:
-        response = requests.get(questions_url, timeout=15)
         response.raise_for_status()
         questions_data = response.json()
         if not questions_data:
@@ -253,19 +319,38 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
     results_log = []
     answers_payload = []
     print(f"Running agent on {len(questions_data)} questions...")
-    for item in questions_data:
         task_id = item.get("task_id")
         question_text = item.get("question")
         if not task_id or question_text is None:
             print(f"Skipping item with missing task_id or question: {item}")
             continue
         try:
             submitted_answer = agent(question_text)
             answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
-            results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
         except Exception as e:
             print(f"Error running agent on task {task_id}: {e}")
-            results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
     if not answers_payload:
         print("Agent did not produce any answers to submit.")
@@ -283,7 +368,7 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
     # Submit
     print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
     try:
-        response = requests.post(submit_url, json=submission_data, timeout=60)
         response.raise_for_status()
         result_data = response.json()
         final_status = (
@@ -314,42 +399,74 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
         return status_message, results_df
 # --- Gradio Interface ---
-with gr.Blocks() as demo:
     gr.Markdown("# GAIA Agent Evaluation Runner")
     gr.Markdown(
         """
         **Instructions:**
-        1. Log in to your Hugging Face account
-        2. Click 'Run Evaluation & Submit All Answers'
-        3. View results and score
-        **Agent Info:**
-        - Model: Phi-3-mini-4k-instruct (4-bit quantized)
         - Tools: Web Search, Calculator, PDF Reader, Webpage Reader
-        - Max Steps: 6
         """
     )
-    gr.LoginButton()
-    run_button = gr.Button("Run Evaluation & Submit All Answers", variant="primary")
-    status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
-    results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
     run_button.click(
         fn=run_and_submit_all,
-        outputs=[status_output, results_table]
     )
 if __name__ == "__main__":
-    print("\n" + "-"*30 + " App Starting " + "-"*30)
     space_host = os.getenv("SPACE_HOST")
     space_id = os.getenv("SPACE_ID")
     if space_host:
         print(f"✅ SPACE_HOST found: {space_host}")
     if space_id:
         print(f"✅ SPACE_ID found: {space_id}")
-    print("-"*(60 + len(" App Starting ")) + "\n")
     print("Launching Gradio Interface...")
-    demo.launch(debug=True, share=False)

 from pdfminer.high_level import extract_text
 from bs4 import BeautifulSoup
 import html2text
+from typing import Dict, Any, List, Tuple, Callable, Optional
 from dotenv import load_dotenv
+from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig
 import torch
 import time
+import gc
 # --- Load Environment Variables ---
 load_dotenv()
 SERPER_API_KEY = os.getenv("SERPER_API_KEY")
 os.environ["HF_HUB_DISABLE_SYMLINKS_WARNING"] = "1"
 os.environ["BITSANDBYTES_NOWELCOME"] = "1"
 print("Loading model (CPU-compatible)...")
 start_time = time.time()
+# Load model with explicit configuration for better compatibility
 model = AutoModelForCausalLM.from_pretrained(
     MODEL_NAME,
     trust_remote_code=True,
+    torch_dtype=torch.float32,  # Use float32 for CPU compatibility
+    device_map="cpu",  # Explicitly set to CPU
+    low_cpu_mem_usage=True,  # Optimize for low memory usage
+    use_cache=False  # Disable cache to avoid DynamicCache issues
+)
+tokenizer = AutoTokenizer.from_pretrained(
+    MODEL_NAME,
+    use_fast=False,
+    trust_remote_code=True
 )
+# Ensure pad token is set
+if tokenizer.pad_token is None:
+    tokenizer.pad_token = tokenizer.eos_token
 load_time = time.time() - start_time
 print(f"Model loaded in {load_time:.2f} seconds")
 # --- Tools for GAIA Agent ---
 def web_search(query: str) -> str:
     """Search the web using DuckDuckGo or Serper API"""
             response = requests.post(
                 'https://google.serper.dev/search',
                 headers=headers,
+                json=params,
+                timeout=10
             )
             results = response.json()
             if 'organic' in results:
 def calculator(expression: str) -> str:
     """Evaluate mathematical expressions safely"""
     try:
+        # Clean the expression
+        expression = re.sub(r'[^\d+\-*/().\s]', '', expression)
+        result = numexpr.evaluate(expression)
+        return str(result)
     except Exception as e:
         return f"Calculation error: {str(e)}"
 def read_pdf(file_path: str) -> str:
     """Extract text from PDF files"""
     try:
+        text = extract_text(file_path)
+        return text[:2000] if text else "No text found in PDF"
     except Exception as e:
         return f"PDF read error: {str(e)}"
 def read_webpage(url: str) -> str:
     """Fetch and extract text from web pages"""
     try:
+        headers = {
+            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
+        }
+        response = requests.get(url, timeout=10, headers=headers)
+        response.raise_for_status()
         soup = BeautifulSoup(response.text, 'html.parser')
+        # Remove script and style elements
+        for script in soup(["script", "style"]):
+            script.decompose()
+        text = soup.get_text(separator=' ', strip=True)
+        return text[:2000] if text else "No text found on webpage"
     except Exception as e:
         return f"Webpage read error: {str(e)}"
         print(f"\nProcessing: {question[:80]}...")
         self.history = [f"Question: {question}"]
+        try:
+            for step in range(MAX_STEPS):
+                prompt = self._build_prompt()
+                response = self._call_model(prompt)
+                if "Final Answer" in response:
+                    answer = response.split("Final Answer:")[-1].strip()
+                    print(f"Final Answer: {answer}")
+                    return answer
+                tool_call = self._parse_tool_call(response)
+                if tool_call:
+                    tool_name, args = tool_call
+                    observation = self._use_tool(tool_name, args)
+                    self.history.append(f"Observation: {observation}")
+                else:
+                    self.history.append(f"Thought: {response}")
+                # Clean up memory after each step
+                if step % 2 == 0:
+                    gc.collect()
+            return "Agent couldn't find solution within step limit"
+        except Exception as e:
+            print(f"Error in agent execution: {str(e)}")
+            return f"Agent error: {str(e)}"
     def _build_prompt(self) -> str:
         prompt = "<|system|>\n" + self.system_prompt + "<|end|>\n"
     def _call_model(self, prompt: str) -> str:
         start_time = time.time()
+        try:
+            # Tokenize input
+            inputs = tokenizer(
+                prompt,
+                return_tensors="pt",
+                return_attention_mask=True,
+                truncation=True,
+                max_length=3072  # Leave room for generation
+            )
+            # Move to same device as model
+            inputs = {k: v.to(model.device) for k, v in inputs.items()}
+            # Create generation config
+            generation_config = GenerationConfig(
+                max_new_tokens=MAX_TOKENS,
+                temperature=0.01,
+                do_sample=True,
+                pad_token_id=tokenizer.pad_token_id,
+                eos_token_id=tokenizer.eos_token_id,
+                use_cache=False  # Disable cache to avoid DynamicCache issues
+            )
+            # Generate response
+            with torch.no_grad():
+                outputs = model.generate(
+                    **inputs,
+                    generation_config=generation_config
+                )
+            # Decode response
+            full_response = tokenizer.decode(outputs[0], skip_special_tokens=True)
+            response = full_response.split("<|assistant|>")[-1].strip()
+            gen_time = time.time() - start_time
+            print(f"Generated {len(response)} tokens in {gen_time:.2f}s: {response[:60]}...")
+            # Clean up
+            del inputs, outputs
+            gc.collect()
+            return response
+        except Exception as e:
+            print(f"Model generation error: {str(e)}")
+            return f"Generation error: {str(e)}"
+    def _parse_tool_call(self, text: str) -> Optional[Tuple[str, Dict]]:
         try:
             json_match = re.search(r'```json\s*({.*?})\s*```', text, re.DOTALL)
             if json_match:
                 tool_call = json.loads(json_match.group(1))
+                if "tool" in tool_call and "args" in tool_call:
+                    return tool_call["tool"], tool_call["args"]
         except Exception as e:
             print(f"Tool parse error: {str(e)}")
         return None
         return f"Error initializing agent: {e}", None
     agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
+    print(f"Agent code URL: {agent_code}")
     # Fetch Questions
     print(f"Fetching questions from: {questions_url}")
     try:
+        response = requests.get(questions_url, timeout=30)
         response.raise_for_status()
         questions_data = response.json()
         if not questions_data:
     results_log = []
     answers_payload = []
     print(f"Running agent on {len(questions_data)} questions...")
+    for i, item in enumerate(questions_data):
         task_id = item.get("task_id")
         question_text = item.get("question")
         if not task_id or question_text is None:
             print(f"Skipping item with missing task_id or question: {item}")
             continue
         try:
+            print(f"Processing question {i+1}/{len(questions_data)}")
             submitted_answer = agent(question_text)
             answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
+            results_log.append({
+                "Task ID": task_id,
+                "Question": question_text[:100] + "..." if len(question_text) > 100 else question_text,
+                "Submitted Answer": submitted_answer
+            })
+            # Clean up memory periodically
+            if i % 5 == 0:
+                gc.collect()
         except Exception as e:
             print(f"Error running agent on task {task_id}: {e}")
+            error_answer = f"AGENT ERROR: {str(e)}"
+            answers_payload.append({"task_id": task_id, "submitted_answer": error_answer})
+            results_log.append({
+                "Task ID": task_id,
+                "Question": question_text[:100] + "..." if len(question_text) > 100 else question_text,
+                "Submitted Answer": error_answer
+            })
     if not answers_payload:
         print("Agent did not produce any answers to submit.")
     # Submit
     print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
     try:
+        response = requests.post(submit_url, json=submission_data, timeout=120)
         response.raise_for_status()
         result_data = response.json()
         final_status = (
         return status_message, results_df
 # --- Gradio Interface ---
+with gr.Blocks(title="GAIA Agent Evaluation") as demo:
     gr.Markdown("# GAIA Agent Evaluation Runner")
     gr.Markdown(
         """
         **Instructions:**
+        1. Log in to your Hugging Face account using the button below
+        2. Click 'Run Evaluation & Submit All Answers' to start the evaluation
+        3. View results and score in the output sections
+        **Agent Information:**
+        - Model: Phi-3-mini-4k-instruct (CPU optimized)
         - Tools: Web Search, Calculator, PDF Reader, Webpage Reader
+        - Max Steps: 6 per question
+        - Memory: Optimized for 2vCPU/16GB environment
         """
     )
+    with gr.Row():
+        gr.LoginButton()
+    with gr.Row():
+        run_button = gr.Button("Run Evaluation & Submit All Answers", variant="primary", size="lg")
+    with gr.Row():
+        status_output = gr.Textbox(
+            label="Evaluation Status & Submission Result",
+            lines=5,
+            interactive=False,
+            placeholder="Click the button above to start evaluation..."
+        )
+    with gr.Row():
+        results_table = gr.DataFrame(
+            label="Questions and Agent Answers",
+            wrap=True,
+            interactive=False
+        )
     run_button.click(
         fn=run_and_submit_all,
+        outputs=[status_output, results_table],
+        show_progress=True
     )
 if __name__ == "__main__":
+    print("\n" + "="*50)
+    print("GAIA Agent Evaluation System Starting")
+    print("="*50)
     space_host = os.getenv("SPACE_HOST")
     space_id = os.getenv("SPACE_ID")
     if space_host:
         print(f"✅ SPACE_HOST found: {space_host}")
+    else:
+        print("⚠️  SPACE_HOST not found")
     if space_id:
         print(f"✅ SPACE_ID found: {space_id}")
+    else:
+        print("⚠️  SPACE_ID not found")
+    print("="*50)
     print("Launching Gradio Interface...")
+    demo.launch(
+        debug=False,  # Disable debug in production
+        share=False,
+        server_name="0.0.0.0",
+        server_port=7860,
+        show_error=True
+    )

requirements.txt CHANGED Viewed

@@ -1,26 +1,13 @@
-# Core dependencies
 gradio>=4.0.0
-requests>=2.31.0
-pandas>=2.0.0
-# Local LLM support
-ctransformers>=0.2.27
-# Mathematical operations
-numpy>=1.24.0
-# Logging and utilities
-python-dotenv>=1.0.0
-# Additional utilities for text processing
-regex>=2023.10.3
-numexpr
-torch
-pdfminer.six
-transformers>=4.0.0
-duckduckgo-search>=0.8
-beautifulsoup4>=4.12.0
 html2text>=2020.1.16
-bitsandbytes
-accelerate
-sentencepiece

 gradio>=4.0.0
+torch>=2.0.0
+transformers>=4.35.0
+requests>=2.25.0
+pandas>=1.3.0
+numpy>=1.21.0
+duckduckgo-search>=3.8.0
+pdfminer.six>=20220524
+beautifulsoup4>=4.9.0
 html2text>=2020.1.16
+numexpr>=2.8.0
+python-dotenv>=0.19.0
+accelerate>=0.20.0