Final_Assignment_Template

Runtime error

App Files Files Community

LamiaYT commited on Jun 28

Commit

34105a6

1 Parent(s): d3c0517

fixing ver3

Browse files

Files changed (1) hide show

app.py +240 -136

app.py CHANGED Viewed

@@ -27,23 +27,22 @@ os.environ["TOKENIZERS_PARALLELISM"] = "false"
 load_dotenv()
 SERPER_API_KEY = os.getenv("SERPER_API_KEY")
-# --- Constants (ULTRA FAST MODE) ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
-MAX_STEPS = 3  # Reduced to 3
-MAX_TOKENS = 64  # Very short responses
 MODEL_NAME = "microsoft/Phi-3-mini-4k-instruct"
-TIMEOUT_PER_QUESTION = 15  # 15 seconds max
-MAX_CONTEXT = 1024  # Very short context
 # --- Configure Environment ---
 os.environ["PIP_BREAK_SYSTEM_PACKAGES"] = "1"
 os.environ["HF_HUB_DISABLE_SYMLINKS_WARNING"] = "1"
 os.environ["BITSANDBYTES_NOWELCOME"] = "1"
-print("Loading model (ULTRA FAST mode)...")
 start_time = time.time()
-# Minimal model loading
 model = AutoModelForCausalLM.from_pretrained(
     MODEL_NAME,
     trust_remote_code=True,
@@ -56,80 +55,83 @@ model = AutoModelForCausalLM.from_pretrained(
 tokenizer = AutoTokenizer.from_pretrained(
     MODEL_NAME,
     use_fast=True,
-    trust_remote_code=True,
-    padding_side="left"
 )
 if tokenizer.pad_token is None:
     tokenizer.pad_token = tokenizer.eos_token
-# Pre-compile generation config
-GENERATION_CONFIG = GenerationConfig(
-    max_new_tokens=MAX_TOKENS,
-    temperature=0.3,
-    do_sample=True,
-    pad_token_id=tokenizer.pad_token_id,
-    eos_token_id=tokenizer.eos_token_id,
-    use_cache=False,
-    repetition_penalty=1.1
-)
 load_time = time.time() - start_time
 print(f"Model loaded in {load_time:.2f} seconds")
-# --- Lightning Fast Tools ---
 def web_search(query: str) -> str:
-    """Ultra-fast web search"""
     try:
         if SERPER_API_KEY:
-            params = {'q': query[:100], 'num': 1}  # Single result
             headers = {'X-API-KEY': SERPER_API_KEY, 'Content-Type': 'application/json'}
             response = requests.post(
                 'https://google.serper.dev/search',
                 headers=headers,
                 json=params,
-                timeout=3
             )
             results = response.json()
             if 'organic' in results and results['organic']:
-                return f"{results['organic'][0]['title']}: {results['organic'][0]['snippet'][:200]}"
-            return "No results"
         else:
             with DDGS() as ddgs:
-                for result in ddgs.text(query, max_results=1):
-                    return f"{result['title']}: {result['body'][:200]}"
-            return "No results"
-    except:
-        return "Search failed"
 def calculator(expression: str) -> str:
-    """Lightning calculator"""
     try:
-        clean_expr = re.sub(r'[^\d+\-*/().\s]', '', str(expression))
         if not clean_expr.strip():
-            return "Invalid expression"
-        result = eval(clean_expr)  # Simple eval for speed
         return str(float(result))
-    except:
-        return "Calc error"
 def read_pdf(file_path: str) -> str:
-    """Fast PDF reader"""
     try:
         text = extract_text(file_path)
-        return text[:500] if text else "No PDF text"
-    except:
-        return "PDF error"
 def read_webpage(url: str) -> str:
-    """Fast webpage reader"""
     try:
-        response = requests.get(url, timeout=3, headers={'User-Agent': 'Bot'})
         soup = BeautifulSoup(response.text, 'html.parser')
         text = soup.get_text(separator=' ', strip=True)
-        return text[:500] if text else "No webpage text"
-    except:
-        return "Webpage error"
 TOOLS = {
     "web_search": web_search,
@@ -138,55 +140,74 @@ TOOLS = {
     "read_webpage": read_webpage
 }
-# --- Ultra Fast Agent ---
-class FastGAIA_Agent:
     def __init__(self):
         self.tools = TOOLS
-        self.prompt_template = (
-            "<|system|>You solve GAIA questions fast. Tools: web_search, calculator, read_pdf, read_webpage.\n"
-            "Format: ```json\n{\"tool\": \"name\", \"args\": {\"key\": \"value\"}}```\n"
-            "Always end with: Final Answer: [answer]<|end|>\n"
-            "<|user|>{history}<|end|>\n<|assistant|>"
         )
     def __call__(self, question: str) -> str:
         start_time = time.time()
         try:
-            history = f"Question: {question}"
             for step in range(MAX_STEPS):
                 if time.time() - start_time > TIMEOUT_PER_QUESTION:
-                    return "TIMEOUT"
-                response = self._fast_generate(history)
-                # Quick final answer check
                 if "Final Answer:" in response:
-                    answer = response.split("Final Answer:")[-1].strip().split('\n')[0]
-                    return answer[:200]  # Limit answer length
-                # Quick tool parsing
-                tool_result = self._quick_tool_use(response)
                 if tool_result:
-                    history += f"\nAction: {tool_result}"
                 else:
-                    history += f"\nThought: {response[:100]}"
-                # Keep history short
-                if len(history) > 800:
-                    history = history[-800:]
-            return "No solution found"
         except Exception as e:
-            return f"Error: {str(e)[:50]}"
-    def _fast_generate(self, history: str) -> str:
         try:
-            prompt = self.prompt_template.format(history=history)
-            # Fast tokenization
             inputs = tokenizer(
                 prompt,
                 return_tensors="pt",
@@ -195,72 +216,108 @@ class FastGAIA_Agent:
                 padding=False
             )
-            # Fast generation
             with torch.no_grad():
                 outputs = model.generate(
                     inputs.input_ids,
-                    generation_config=GENERATION_CONFIG,
                     attention_mask=inputs.attention_mask
                 )
-            # Fast decoding
-            response = tokenizer.decode(outputs[0], skip_special_tokens=True)
-            response = response.split("<|assistant|>")[-1].strip()
-            # Immediate cleanup
             del inputs, outputs
             gc.collect()
             return response
         except Exception as e:
-            return f"Gen error: {str(e)}"
-    def _quick_tool_use(self, text: str) -> str:
         try:
-            # Quick JSON extraction
-            json_match = re.search(r'```json\s*({[^}]*})\s*```', text)
-            if not json_match:
-                return ""
-            tool_data = json.loads(json_match.group(1))
-            tool_name = tool_data.get("tool", "")
-            args = tool_data.get("args", {})
-            if tool_name in self.tools:
-                result = self.tools[tool_name](**args)
-                return f"Used {tool_name}: {str(result)[:150]}"
-        except:
-            pass
-        return ""
-# --- Lightning Fast Runner ---
 def run_and_submit_all(profile: gr.OAuthProfile | None):
     if not profile:
-        return "❌ Please login first", None
     username = profile.username
-    # Quick setup
-    agent = FastGAIA_Agent()
     api_url = DEFAULT_API_URL
     space_id = os.getenv("SPACE_ID", "unknown")
-    print(f"🚀 ULTRA FAST mode - User: {username}")
-    # Fetch questions quickly
     try:
-        response = requests.get(f"{api_url}/questions", timeout=10)
         questions = response.json()
-        print(f"📝 Got {len(questions)} questions")
     except Exception as e:
-        return f"❌ Failed to get questions: {e}", None
-    # Process at lightning speed
     results = []
     answers = []
-    start_time = time.time()
     for i, item in enumerate(questions):
         task_id = item.get("task_id")
@@ -269,78 +326,125 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
         if not task_id:
             continue
-        print(f"⚡ [{i+1}/{len(questions)}] {task_id[:8]}...")
         try:
             answer = agent(question)
             answers.append({"task_id": task_id, "submitted_answer": answer})
             results.append({
-                "ID": task_id[:8],
-                "Question": question[:60] + "...",
-                "Answer": answer[:80] + "..." if len(answer) > 80 else answer
             })
         except Exception as e:
-            error_ans = f"ERROR: {str(e)[:30]}"
-            answers.append({"task_id": task_id, "submitted_answer": error_ans})
             results.append({
-                "ID": task_id[:8],
-                "Question": question[:60] + "...",
-                "Answer": error_ans
             })
-        # Quick memory cleanup
-        if i % 5 == 0:
             gc.collect()
-    total_time = time.time() - start_time
-    print(f"⏱️  Completed in {total_time:.1f}s ({total_time/len(questions):.1f}s per question)")
     # Submit results
     try:
         submission = {
             "username": username,
             "agent_code": f"https://huggingface.co/spaces/{space_id}/tree/main",
             "answers": answers
         }
-        response = requests.post(f"{api_url}/submit", json=submission, timeout=30)
         result = response.json()
         status = (
-            f"🎯 ULTRA FAST RESULTS\n"
             f"👤 User: {result.get('username', username)}\n"
             f"📊 Score: {result.get('score', 'N/A')}% "
-            f"({result.get('correct_count', '?')}/{result.get('total_attempted', '?')})\n"
-            f"⏱️  Time: {total_time:.1f}s ({total_time/len(questions):.1f}s/question)\n"
-            f"💬 {result.get('message', 'Completed!')}"
         )
         return status, pd.DataFrame(results)
     except Exception as e:
-        error_status = f"❌ Submission failed: {str(e)}\n⏱️  Processing time: {total_time:.1f}s"
         return error_status, pd.DataFrame(results)
-# --- Ultra Simple UI ---
-with gr.Blocks(title="GAIA Agent - ULTRA FAST") as demo:
-    gr.Markdown("# ⚡ GAIA Agent - ULTRA FAST MODE")
-    gr.Markdown("**Speed settings:** 3 steps max • 64 tokens • 15s timeout • Lightning tools")
-    gr.LoginButton()
-    run_btn = gr.Button("🚀 RUN ULTRA FAST", variant="primary", size="lg")
-    status = gr.Textbox(label="📊 Results", lines=6, interactive=False)
-    table = gr.DataFrame(label="📋 Answers", interactive=False)
-    run_btn.click(run_and_submit_all, outputs=[status, table], show_progress=True)
 if __name__ == "__main__":
-    print("⚡ ULTRA FAST GAIA Agent Starting...")
-    print(f"⚙️  {MAX_STEPS} steps, {MAX_TOKENS} tokens, {TIMEOUT_PER_QUESTION}s timeout")
     demo.launch(
-        share=True,  # Added share=True for public link
         server_name="0.0.0.0",
         server_port=7860,
         debug=False,

 load_dotenv()
 SERPER_API_KEY = os.getenv("SERPER_API_KEY")
+# --- Balanced Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
+MAX_STEPS = 4  # Reasonable steps
+MAX_TOKENS = 150  # Enough for reasoning
 MODEL_NAME = "microsoft/Phi-3-mini-4k-instruct"
+TIMEOUT_PER_QUESTION = 25  # 25 seconds - enough time
+MAX_CONTEXT = 1500  # Reasonable context
 # --- Configure Environment ---
 os.environ["PIP_BREAK_SYSTEM_PACKAGES"] = "1"
 os.environ["HF_HUB_DISABLE_SYMLINKS_WARNING"] = "1"
 os.environ["BITSANDBYTES_NOWELCOME"] = "1"
+print("Loading model (BALANCED FAST mode)...")
 start_time = time.time()
 model = AutoModelForCausalLM.from_pretrained(
     MODEL_NAME,
     trust_remote_code=True,
 tokenizer = AutoTokenizer.from_pretrained(
     MODEL_NAME,
     use_fast=True,
+    trust_remote_code=True
 )
 if tokenizer.pad_token is None:
     tokenizer.pad_token = tokenizer.eos_token
 load_time = time.time() - start_time
 print(f"Model loaded in {load_time:.2f} seconds")
+# --- Reliable Tools ---
 def web_search(query: str) -> str:
+    """Fast but reliable web search"""
     try:
         if SERPER_API_KEY:
+            params = {'q': query[:150], 'num': 2}
             headers = {'X-API-KEY': SERPER_API_KEY, 'Content-Type': 'application/json'}
             response = requests.post(
                 'https://google.serper.dev/search',
                 headers=headers,
                 json=params,
+                timeout=8
             )
             results = response.json()
             if 'organic' in results and results['organic']:
+                output = []
+                for r in results['organic'][:2]:
+                    output.append(f"{r['title']}: {r['snippet']}")
+                return " | ".join(output)
+            return "No search results found"
         else:
             with DDGS() as ddgs:
+                results = []
+                for r in ddgs.text(query, max_results=2):
+                    results.append(f"{r['title']}: {r['body'][:200]}")
+                return " | ".join(results) if results else "No search results"
+    except Exception as e:
+        return f"Search failed: {str(e)}"
 def calculator(expression: str) -> str:
+    """Reliable calculator"""
     try:
+        # Clean the expression but keep more characters
+        clean_expr = re.sub(r'[^0-9+\-*/().\s]', '', str(expression))
         if not clean_expr.strip():
+            return "Invalid mathematical expression"
+        # Use numexpr for safety
+        result = numexpr.evaluate(clean_expr)
         return str(float(result))
+    except Exception as e:
+        return f"Calculation error: {str(e)}"
 def read_pdf(file_path: str) -> str:
+    """PDF reader with better error handling"""
     try:
         text = extract_text(file_path)
+        if text:
+            return text[:800]  # More text for context
+        return "No text could be extracted from PDF"
+    except Exception as e:
+        return f"PDF reading error: {str(e)}"
 def read_webpage(url: str) -> str:
+    """Reliable webpage reader"""
     try:
+        headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'}
+        response = requests.get(url, timeout=8, headers=headers)
+        response.raise_for_status()
         soup = BeautifulSoup(response.text, 'html.parser')
+        for script in soup(["script", "style"]):
+            script.decompose()
         text = soup.get_text(separator=' ', strip=True)
+        return text[:800] if text else "No content found on webpage"
+    except Exception as e:
+        return f"Webpage error: {str(e)}"
 TOOLS = {
     "web_search": web_search,
     "read_webpage": read_webpage
 }
+# --- Balanced GAIA Agent ---
+class BalancedGAIA_Agent:
     def __init__(self):
         self.tools = TOOLS
+        self.system_prompt = (
+            "You are a GAIA problem solver. Available tools: web_search, calculator, read_pdf, read_webpage.\n"
+            "Think step by step and use tools when needed.\n\n"
+            "Tool usage format:\n"
+            "```json\n{\"tool\": \"tool_name\", \"args\": {\"parameter\": \"value\"}}\n```\n\n"
+            "Always end with: Final Answer: [your exact answer]\n\n"
+            "Example:\n"
+            "Question: What is 15 * 23?\n"
+            "I need to calculate 15 * 23.\n"
+            "```json\n{\"tool\": \"calculator\", \"args\": {\"expression\": \"15 * 23\"}}\n```\n"
+            "Final Answer: 345"
         )
     def __call__(self, question: str) -> str:
         start_time = time.time()
+        print(f"🤔 Solving: {question[:60]}...")
         try:
+            conversation = [f"Question: {question}"]
             for step in range(MAX_STEPS):
+                # Check timeout but be more generous
                 if time.time() - start_time > TIMEOUT_PER_QUESTION:
+                    print(f"⏰ Timeout after {TIMEOUT_PER_QUESTION}s")
+                    return "TIMEOUT: Question took too long to solve"
+                # Generate response
+                response = self._generate_response(conversation)
+                print(f"Step {step+1}: {response[:80]}...")
+                # Check for final answer
                 if "Final Answer:" in response:
+                    answer = self._extract_final_answer(response)
+                    elapsed = time.time() - start_time
+                    print(f"✅ Solved in {elapsed:.1f}s: {answer[:50]}...")
+                    return answer
+                # Try to use tools
+                tool_result = self._execute_tools(response)
                 if tool_result:
+                    conversation.append(f"Tool used: {tool_result}")
+                    print(f"🔧 Tool result: {tool_result[:60]}...")
                 else:
+                    conversation.append(f"Reasoning: {response}")
+                # Keep conversation manageable
+                if len(" ".join(conversation)) > 1200:
+                    conversation = conversation[-3:]  # Keep last 3 entries
+            print("❌ No solution found within step limit")
+            return "Could not solve within step limit"
         except Exception as e:
+            print(f"💥 Agent error: {str(e)}")
+            return f"Agent error: {str(e)}"
+    def _generate_response(self, conversation: List[str]) -> str:
         try:
+            # Build prompt
+            prompt = f"<|system|>\n{self.system_prompt}<|end|>\n"
+            prompt += f"<|user|>\n{chr(10).join(conversation)}<|end|>\n"
+            prompt += "<|assistant|>"
+            # Tokenize
             inputs = tokenizer(
                 prompt,
                 return_tensors="pt",
                 padding=False
             )
+            # Generate
+            generation_config = GenerationConfig(
+                max_new_tokens=MAX_TOKENS,
+                temperature=0.2,  # Lower temperature for more focused responses
+                do_sample=True,
+                pad_token_id=tokenizer.pad_token_id,
+                eos_token_id=tokenizer.eos_token_id,
+                use_cache=False
+            )
             with torch.no_grad():
                 outputs = model.generate(
                     inputs.input_ids,
+                    generation_config=generation_config,
                     attention_mask=inputs.attention_mask
                 )
+            # Decode
+            full_response = tokenizer.decode(outputs[0], skip_special_tokens=True)
+            response = full_response.split("<|assistant|>")[-1].strip()
+            # Cleanup
             del inputs, outputs
             gc.collect()
             return response
         except Exception as e:
+            return f"Generation error: {str(e)}"
+    def _extract_final_answer(self, text: str) -> str:
+        """Extract the final answer more reliably"""
         try:
+            if "Final Answer:" in text:
+                answer_part = text.split("Final Answer:")[-1].strip()
+                # Take first line of the answer
+                answer = answer_part.split('\n')[0].strip()
+                return answer if answer else "No answer provided"
+            return "No final answer found"
+        except:
+            return "Answer extraction failed"
+    def _execute_tools(self, text: str) -> str:
+        """Execute tools found in the response"""
+        try:
+            # Look for JSON tool calls
+            json_pattern = r'```json\s*(\{[^}]*\})\s*```'
+            matches = re.findall(json_pattern, text, re.DOTALL)
+            for match in matches:
+                try:
+                    tool_call = json.loads(match)
+                    tool_name = tool_call.get("tool")
+                    args = tool_call.get("args", {})
+                    if tool_name in self.tools:
+                        print(f"🔧 Executing {tool_name} with {args}")
+                        result = self.tools[tool_name](**args)
+                        return f"{tool_name}: {str(result)[:400]}"
+                except json.JSONDecodeError:
+                    continue
+                except Exception as e:
+                    return f"Tool execution error: {str(e)}"
+            return None
+        except Exception as e:
+            return f"Tool parsing error: {str(e)}"
+# --- Efficient Runner ---
 def run_and_submit_all(profile: gr.OAuthProfile | None):
     if not profile:
+        return "❌ Please login to Hugging Face first", None
     username = profile.username
+    print(f"🚀 Starting evaluation for user: {username}")
+    # Initialize agent
+    try:
+        agent = BalancedGAIA_Agent()
+    except Exception as e:
+        return f"❌ Failed to initialize agent: {e}", None
+    # Setup
     api_url = DEFAULT_API_URL
     space_id = os.getenv("SPACE_ID", "unknown")
+    # Fetch questions
     try:
+        print("📥 Fetching questions...")
+        response = requests.get(f"{api_url}/questions", timeout=15)
+        response.raise_for_status()
         questions = response.json()
+        print(f"📝 Retrieved {len(questions)} questions")
     except Exception as e:
+        return f"❌ Failed to fetch questions: {e}", None
+    # Process questions
     results = []
     answers = []
+    total_start = time.time()
     for i, item in enumerate(questions):
         task_id = item.get("task_id")
         if not task_id:
             continue
+        print(f"\n📋 [{i+1}/{len(questions)}] Task: {task_id}")
         try:
             answer = agent(question)
             answers.append({"task_id": task_id, "submitted_answer": answer})
+            # Truncate for display
+            q_display = question[:80] + "..." if len(question) > 80 else question
+            a_display = answer[:100] + "..." if len(answer) > 100 else answer
             results.append({
+                "Task": task_id[:8] + "...",
+                "Question": q_display,
+                "Answer": a_display,
+                "Status": "✅" if "error" not in answer.lower() and "timeout" not in answer.lower() else "❌"
             })
         except Exception as e:
+            error_answer = f"PROCESSING_ERROR: {str(e)}"
+            answers.append({"task_id": task_id, "submitted_answer": error_answer})
             results.append({
+                "Task": task_id[:8] + "...",
+                "Question": question[:80] + "..." if len(question) > 80 else question,
+                "Answer": error_answer,
+                "Status": "💥"
             })
+        # Memory cleanup
+        if i % 3 == 0:
             gc.collect()
+    total_time = time.time() - total_start
+    avg_time = total_time / len(questions)
+    print(f"\n⏱️ Total processing time: {total_time:.1f}s ({avg_time:.1f}s per question)")
     # Submit results
     try:
+        print("📤 Submitting results...")
         submission = {
             "username": username,
             "agent_code": f"https://huggingface.co/spaces/{space_id}/tree/main",
             "answers": answers
         }
+        response = requests.post(f"{api_url}/submit", json=submission, timeout=60)
+        response.raise_for_status()
         result = response.json()
+        # Calculate success rate
+        successful = sum(1 for r in results if r["Status"] == "✅")
+        success_rate = (successful / len(results)) * 100
         status = (
+            f"🎯 EVALUATION COMPLETED\n"
             f"👤 User: {result.get('username', username)}\n"
             f"📊 Score: {result.get('score', 'N/A')}% "
+            f"({result.get('correct_count', '?')}/{result.get('total_attempted', '?')} correct)\n"
+            f"⚡ Processing: {total_time:.1f}s total, {avg_time:.1f}s/question\n"
+            f"✅ Success Rate: {success_rate:.1f}% ({successful}/{len(results)} processed)\n"
+            f"💬 Message: {result.get('message', 'Evaluation completed!')}"
         )
         return status, pd.DataFrame(results)
     except Exception as e:
+        error_status = (
+            f"❌ SUBMISSION FAILED\n"
+            f"Error: {str(e)}\n"
+            f"⏱️ Processing completed in {total_time:.1f}s\n"
+            f"✅ Questions processed: {len(results)}"
+        )
         return error_status, pd.DataFrame(results)
+# --- Clean UI ---
+with gr.Blocks(title="GAIA Agent - Balanced Fast") as demo:
+    gr.Markdown("# ⚡ GAIA Agent - Balanced Fast Mode")
+    gr.Markdown(
+        """
+        **Optimized for reliability and speed:**
+        - 4 reasoning steps max
+        - 25 second timeout per question
+        - 150 token responses
+        - Enhanced error handling
+        """
+    )
+    with gr.Row():
+        gr.LoginButton()
+    with gr.Row():
+        run_btn = gr.Button("🚀 Run Balanced Evaluation", variant="primary", size="lg")
+    with gr.Row():
+        status = gr.Textbox(
+            label="📊 Evaluation Status & Results",
+            lines=8,
+            interactive=False,
+            placeholder="Ready to run evaluation. Please login first."
+        )
+    with gr.Row():
+        table = gr.DataFrame(
+            label="📋 Question Results",
+            interactive=False,
+            wrap=True
+        )
+    run_btn.click(
+        fn=run_and_submit_all,
+        outputs=[status, table],
+        show_progress=True
+    )
 if __name__ == "__main__":
+    print("⚡ GAIA Agent - Balanced Fast Mode Starting...")
+    print(f"⚙️ Settings: {MAX_STEPS} steps, {MAX_TOKENS} tokens, {TIMEOUT_PER_QUESTION}s timeout")
     demo.launch(
+        share=True,
         server_name="0.0.0.0",
         server_port=7860,
         debug=False,