Final_Assignment_Template

Runtime error

App Files Files Community

LamiaYT commited on Jun 25

Commit

26e4907

1 Parent(s): c549c70

Initial commit with LlamaIndex-based agent

Browse files

Files changed (1) hide show

app.py +319 -343

app.py CHANGED Viewed

@@ -1,14 +1,17 @@
-# app.py
 from llama_index.llms.huggingface import HuggingFaceLLM
 from llama_index.core.agent import ReActAgent
 from llama_index.core.tools import FunctionTool
-from transformers import AutoTokenizer
 import os
 import gradio as gr
 import requests
 import pandas as pd
 import traceback
 import torch
 # Import real tool dependencies
 try:
@@ -27,262 +30,317 @@ except ImportError:
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 # --- Advanced Agent Definition ---
 class SmartAgent:
     def __init__(self):
-        print("Initializing Local LLM Agent...")
-        # Check available memory and CUDA
-        if torch.cuda.is_available():
-            print(f"CUDA available. GPU memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f}GB")
-        else:
-            print("CUDA not available, using CPU")
-        # Use a smaller, more efficient model for Hugging Face Spaces
         model_options = [
-            "microsoft/DialoGPT-medium",  # Much smaller, works well for chat
-            "google/flan-t5-base",        # Good for reasoning tasks
-            "microsoft/DialoGPT-small",   # Smallest fallback
-            "HuggingFaceH4/zephyr-7b-beta"  # Original (may fail in limited memory)
         ]
-        model_name = model_options[1]  # Start with flan-t5-base
-        print(f"Attempting to load model: {model_name}")
-        try:
-            # Initialize with memory-efficient settings
-            self.llm = HuggingFaceLLM(
-                model_name=model_name,
-                tokenizer_name=model_name,
-                context_window=1024,  # Increased for better reasoning
-                max_new_tokens=256,   # Increased for better responses
-                generate_kwargs={
-                    "temperature": 0.3,   # Lower temperature for more focused responses
-                    "do_sample": True,
-                    "top_p": 0.9,
-                    "repetition_penalty": 1.1
-                },
-                device_map="auto",
-                # Add memory optimization parameters
-                model_kwargs={
-                    "torch_dtype": torch.float16,  # Use half precision
-                    "low_cpu_mem_usage": True,
-                    "load_in_8bit": True,  # Enable 8-bit quantization if available
-                },
-                # Add system message for better instruction following
-                system_message="You are a helpful AI assistant that can search the web and perform calculations. Always provide detailed, accurate answers."
-            )
-            print(f"Successfully loaded model: {model_name}")
-        except Exception as e:
-            print(f"Failed to load {model_name}: {e}")
-            # Fallback to an even smaller model
-            try:
-                fallback_model = "microsoft/DialoGPT-small"
-                print(f"Falling back to: {fallback_model}")
-                self.llm = HuggingFaceLLM(
-                    model_name=fallback_model,
-                    tokenizer_name=fallback_model,
-                    context_window=256,
-                    max_new_tokens=64,
-                    generate_kwargs={"temperature": 0.7, "do_sample": True},
-                    device_map="cpu",  # Force CPU to avoid memory issues
-                    model_kwargs={"low_cpu_mem_usage": True}
-                )
-                print(f"Successfully loaded fallback model: {fallback_model}")
-            except Exception as e2:
-                print(f"Flan-T5 also failed: {e2}")
-                # Try an even more basic approach with a very small model
-                try:
-                    basic_model = "microsoft/DialoGPT-small"
-                    print(f"Final fallback to: {basic_model}")
-                    self.llm = HuggingFaceLLM(
-                        model_name=basic_model,
-                        tokenizer_name=basic_model,
-                        context_window=512,
-                        max_new_tokens=128,
-                        generate_kwargs={"temperature": 0.3, "do_sample": True},
-                        device_map="cpu",  # Force CPU to avoid memory issues
-                        model_kwargs={"low_cpu_mem_usage": True}
-                    )
-                    print(f"Successfully loaded final fallback: {basic_model}")
-                except Exception as e3:
-                    print(f"All model loading attempts failed: {e3}")
-                    raise Exception("Unable to load any language model")
-        # Define tools with real implementations
         self.tools = [
             FunctionTool.from_defaults(
-                fn=self.web_search,
                 name="web_search",
-                description="Searches the web for current information using DuckDuckGo when questions require up-to-date knowledge"
             ),
             FunctionTool.from_defaults(
-                fn=self.math_calculator,
                 name="math_calculator",
-                description="Performs mathematical calculations and symbolic math using SymPy when questions involve numbers or equations"
             )
         ]
-        # Create ReAct agent with tools
         try:
             self.agent = ReActAgent.from_tools(
                 tools=self.tools,
                 llm=self.llm,
                 verbose=True,
-                max_iterations=3  # Limit iterations to prevent infinite loops
             )
-            print("Local LLM Agent initialized successfully.")
         except Exception as e:
-            print(f"Error creating ReAct agent: {e}")
-            # Create a simple fallback agent
             self.agent = None
-            print("Using fallback direct tool calling approach")
-    def web_search(self, query: str) -> str:
-        """Real web search using DuckDuckGo"""
-        print(f"Web search triggered for: {query[:50]}...")
         if not DDGS:
-            return "Web search unavailable - duckduckgo_search not installed"
         try:
             with DDGS() as ddgs:
-                results = list(ddgs.text(query, max_results=5))  # Get more results
-                if results:
-                    formatted_results = []
-                    for i, r in enumerate(results, 1):
-                        title = r.get('title', 'No title')
-                        body = r.get('body', 'No description')[:300]  # More context
-                        url = r.get('href', '')
-                        formatted_results.append(f"{i}. **{title}**\n{body}...\nSource: {url}")
-                    return "\n\n".join(formatted_results)
-                else:
-                    return f"No search results found for '{query}'. Try rephrasing your search terms."
         except Exception as e:
-            print(f"Web search error: {e}")
-            return f"Error during web search for '{query}': {str(e)}"
-    def math_calculator(self, expression: str) -> str:
-        """Safe math evaluation using SymPy"""
-        print(f"Math calculation triggered for: {expression}")
-        if not sympify:
-            # Fallback to basic eval with safety checks
-            try:
-                # Only allow basic math operations
-                allowed_chars = set('0123456789+-*/().^ ')
-                if not all(c in allowed_chars for c in expression.replace(' ', '')):
-                    return "Error: Only basic math operations are allowed"
-                result = eval(expression.replace('^', '**'))
-                return str(result)
-            except Exception as e:
-                return f"Error: Could not evaluate the mathematical expression - {str(e)}"
-        try:
-            # Use SymPy for safe evaluation
-            result = sympify(expression).evalf()
-            return str(result)
-        except SympifyError as e:
-            return f"Error: Could not parse the mathematical expression - {str(e)}"
-        except Exception as e:
-            return f"Error: Calculation failed - {str(e)}"
-    def __call__(self, question: str) -> str:
-        print(f"Processing question (first 50 chars): {question[:50]}...")
-        # Enhanced reasoning approach
-        question_lower = question.lower()
-        # Check if we need to analyze files
-        if any(word in question_lower for word in ['file', 'excel', 'csv', 'spreadsheet', 'data', 'attached']):
-            return "I cannot access attached files in this environment. Please ensure the file is accessible via a direct URL or describe the data content directly in your question."
-        # Check if we need web search
-        needs_web_search = any(word in question_lower for word in [
-            'current', 'latest', 'recent', 'today', 'news', 'who is', 'what is',
-            'competition', 'winner', 'recipient', 'nationality', 'country',
-            'malko', 'century', 'award', 'born', 'died'
-        ])
-        # Check if we need math calculation
-        needs_calculation = any(word in question_lower for word in [
-            'calculate', 'compute', 'sum', 'total', 'average', 'percentage',
-            'equation', 'solve', 'math', 'number'
-        ]) or any(char in question for char in '+-*/=()0123456789')
         try:
-            if self.agent:
-                # Try using the ReAct agent first
-                response = self.agent.query(question)
-                response_str = str(response)
-                # Check if the response is too short or nonsensical
-                if len(response_str.strip()) < 3 or response_str.strip() in ['!', '?', 'what', 'I', 'The', 'A']:
-                    print("Agent gave a poor response, trying direct tool approach...")
-                    return self._direct_tool_approach(question, needs_web_search, needs_calculation)
-                return response_str
             else:
-                # Use direct tool approach
-                return self._direct_tool_approach(question, needs_web_search, needs_calculation)
         except Exception as e:
-            print(f"Agent error: {str(e)}")
-            print(f"Full traceback: {traceback.format_exc()}")
-            # Try direct tool approach as fallback
-            try:
-                return self._direct_tool_approach(question, needs_web_search, needs_calculation)
-            except:
-                return f"I apologize, but I'm having technical difficulties processing your question. The question appears to be: {question[:100]}..."
-    def _direct_tool_approach(self, question: str, needs_web_search: bool, needs_calculation: bool) -> str:
-        """Direct tool usage when agent fails"""
-        if needs_web_search:
-            # Extract key search terms
-            search_terms = []
-            important_words = question.split()
-            for word in important_words:
-                if len(word) > 3 and word.lower() not in ['what', 'when', 'where', 'who', 'how', 'the', 'and', 'or', 'but', 'from', 'with']:
-                    search_terms.append(word)
-            search_query = ' '.join(search_terms[:5])  # Limit to 5 key terms
-            print(f"Performing web search for: {search_query}")
-            search_result = self.web_search(search_query)
-            return f"Based on my web search for '{search_query}':\n\n{search_result}\n\nPlease review the search results above to find the specific information you're looking for."
-        if needs_calculation:
-            # Try to extract mathematical expressions
-            import re
-            # Look for mathematical expressions
-            math_patterns = re.findall(r'[\d+\-*/().\s]+', question)
-            for pattern in math_patterns:
-                if any(char in pattern for char in '+-*/') and any(char.isdigit() for char in pattern):
-                    result = self.math_calculator(pattern.strip())
-                    return f"Mathematical calculation result: {result}"
-        # Default response with better reasoning
-        return f"I understand you're asking about: {question[:150]}... However, I need more specific information or context to provide an accurate answer. Could you please rephrase your question or provide additional details?"
-# --- Memory cleanup function ---
-def cleanup_memory():
-    """Clean up GPU memory"""
-    if torch.cuda.is_available():
-        torch.cuda.empty_cache()
-        print("GPU memory cleared")
 # --- Submission Logic ---
 def run_and_submit_all(profile: gr.OAuthProfile | None):
-    """
-    Fetches all questions, runs the agent on them, submits all answers,
-    and displays the results.
-    """
     space_id = os.getenv("SPACE_ID")
     if profile:
@@ -296,15 +354,11 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
     questions_url = f"{api_url}/questions"
     submit_url = f"{api_url}/submit"
-    # Clean memory before starting
-    cleanup_memory()
-    # Instantiate Agent
     try:
         agent = SmartAgent()
     except Exception as e:
-        print(f"Error instantiating agent: {e}")
-        print(f"Full traceback: {traceback.format_exc()}")
         return f"Error initializing agent: {e}", None
     agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
@@ -317,197 +371,119 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
         response.raise_for_status()
         questions_data = response.json()
         if not questions_data:
-            print("Fetched questions list is empty.")
-            return "Fetched questions list is empty or invalid format.", None
         print(f"Fetched {len(questions_data)} questions.")
-    except requests.exceptions.RequestException as e:
-        print(f"Error fetching questions: {e}")
-        return f"Error fetching questions: {e}", None
-    except requests.exceptions.JSONDecodeError as e:
-        print(f"Error decoding JSON response from questions endpoint: {e}")
-        return f"Error decoding server response for questions: {e}", None
     except Exception as e:
-        print(f"An unexpected error occurred fetching questions: {e}")
-        return f"An unexpected error occurred fetching questions: {e}", None
-    # Run Agent on all questions
     results_log = []
     answers_payload = []
-    print(f"Running agent on {len(questions_data)} questions...")
     for i, item in enumerate(questions_data, 1):
         task_id = item.get("task_id")
-        question_text = item.get("question")
-        if not task_id or question_text is None:
-            print(f"Skipping item with missing task_id or question: {item}")
             continue
-        print(f"Processing question {i}/{len(questions_data)}: {task_id}")
         try:
-            submitted_answer = agent(question_text)
-            answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
             results_log.append({
-                "Task ID": task_id,
-                "Question": question_text[:100] + "..." if len(question_text) > 100 else question_text,
-                "Submitted Answer": submitted_answer[:200] + "..." if len(submitted_answer) > 200 else submitted_answer
             })
-            print(f"✅ Completed question {i}: {task_id}")
-            # Clean memory after each question
-            if i % 5 == 0:  # Every 5 questions
-                cleanup_memory()
         except Exception as e:
-            print(f"❌ Error running agent on task {task_id}: {e}")
-            error_answer = f"AGENT ERROR: {str(e)}"
-            answers_payload.append({"task_id": task_id, "submitted_answer": error_answer})
             results_log.append({
-                "Task ID": task_id,
-                "Question": question_text[:100] + "..." if len(question_text) > 100 else question_text,
-                "Submitted Answer": error_answer
             })
-    if not answers_payload:
-        print("Agent did not produce any answers to submit.")
-        return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
-    # Prepare submission
     submission_data = {
-        "username": username.strip(),
-        "agent_code": agent_code,
         "answers": answers_payload
     }
-    status_update = f"Agent finished processing. Submitting {len(answers_payload)} answers for user '{username}'..."
-    print(status_update)
-    # Submit answers
-    print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
     try:
         response = requests.post(submit_url, json=submission_data, timeout=60)
         response.raise_for_status()
-        result_data = response.json()
-        final_status = (
-            f"🎉 Submission Successful!\n\n"
-            f"User: {result_data.get('username')}\n"
-            f"Overall Score: {result_data.get('score', 'N/A')}% "
-            f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
-            f"Message: {result_data.get('message', 'No message received.')}"
         )
-        print("✅ Submission successful!")
-        results_df = pd.DataFrame(results_log)
-        return final_status, results_df
-    except requests.exceptions.HTTPError as e:
-        error_detail = f"Server responded with status {e.response.status_code}."
-        try:
-            error_json = e.response.json()
-            error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
-        except requests.exceptions.JSONDecodeError:
-            error_detail += f" Response: {e.response.text[:500]}"
-        status_message = f"❌ Submission Failed: {error_detail}"
-        print(status_message)
-        results_df = pd.DataFrame(results_log)
-        return status_message, results_df
-    except requests.exceptions.Timeout:
-        status_message = "❌ Submission Failed: The request timed out."
-        print(status_message)
-        results_df = pd.DataFrame(results_log)
-        return status_message, results_df
-    except requests.exceptions.RequestException as e:
-        status_message = f"❌ Submission Failed: Network error - {e}"
-        print(status_message)
-        results_df = pd.DataFrame(results_log)
-        return status_message, results_df
     except Exception as e:
-        status_message = f"❌ An unexpected error occurred during submission: {e}"
-        print(status_message)
-        results_df = pd.DataFrame(results_log)
-        return status_message, results_df
 # --- Gradio UI ---
 with gr.Blocks(title="Local LLM Agent Evaluation") as demo:
-    gr.Markdown("# 🤖 Local LLM Agent Evaluation Runner")
-    gr.Markdown(
-        """
-        **Instructions:**
-        1. 🔐 Log in to your Hugging Face account using the button below
-        2. 🚀 Click 'Run Evaluation & Submit All Answers'
-        3. ⏳ Wait for the local LLM to process all questions (using memory-optimized smaller model)
-        4. 📊 View your results and submission status
-        **Features:**
-        - 🔍 Real web search using DuckDuckGo
-        - 🧮 Advanced math calculations with SymPy
-        - 🧠 Memory-optimized language model with fallback options
-        - 🛡️ Error handling and recovery mechanisms
-        """
-    )
     with gr.Row():
         gr.LoginButton()
-    with gr.Row():
-        run_button = gr.Button(
-            "🚀 Run Evaluation & Submit All Answers",
-            variant="primary",
-            size="lg"
-        )
-    status_output = gr.Textbox(
-        label="📋 Run Status / Submission Result",
-        lines=8,
-        interactive=False,
-        placeholder="Click the button above to start the evaluation..."
     )
     results_table = gr.DataFrame(
-        label="📊 Questions and Agent Answers",
-        wrap=True,
-        interactive=False
     )
-    # Wire up the button
-    run_button.click(
         fn=run_and_submit_all,
-        outputs=[status_output, results_table]
     )
 if __name__ == "__main__":
     print("\n" + "="*60)
-    print("🚀 Application Startup at", pd.Timestamp.now().strftime("%Y-%m-%d %H:%M:%S"))
     print("="*60)
-    space_host_startup = os.getenv("SPACE_HOST")
-    space_id_startup = os.getenv("SPACE_ID")
-    if space_host_startup:
-        print(f"✅ SPACE_HOST found: {space_host_startup}")
-        print(f"   Runtime URL should be: https://{space_host_startup}")
-    else:
-        print("ℹ️  SPACE_HOST environment variable not found (running locally?).")
-    if space_id_startup:
-        print(f"✅ SPACE_ID found: {space_id_startup}")
-        print(f"   Repo URL: https://huggingface.co/spaces/{space_id_startup}")
-        print(f"   Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
-    else:
-        print("ℹ️  SPACE_ID environment variable not found (running locally?).")
-    print("-" * 60)
-    print("🎯 Launching Gradio Interface for Local LLM Agent Evaluation...")
-    # Launch without share=True for Hugging Face Spaces
     demo.launch(
         server_name="0.0.0.0",
-        server_port=7860,
-        show_error=True
     )

 from llama_index.llms.huggingface import HuggingFaceLLM
 from llama_index.core.agent import ReActAgent
 from llama_index.core.tools import FunctionTool
+from transformers import AutoTokenizer, pipeline
 import os
 import gradio as gr
 import requests
 import pandas as pd
 import traceback
 import torch
+import re
+import gc
+from typing import List, Dict
+from datetime import datetime
 # Import real tool dependencies
 try:
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
+MEMORY_LIMIT_GB = 16  # Your system's memory limit
 # --- Advanced Agent Definition ---
 class SmartAgent:
     def __init__(self):
+        print(f"Initializing Local LLM Agent (Memory Limit: {MEMORY_LIMIT_GB}GB)...")
+        self.model_loaded = False
+        # Model options sorted by capability (name, approx size in GB, quantization)
         model_options = [
+            ("google/flan-t5-large", 3, "8-bit"),  # Best balance for 16GB
+            ("google/flan-t5-base", 1, "8-bit"),    # Smaller fallback
+            ("facebook/opt-1.3b", 2.5, "8-bit")     # Alternative option
         ]
+        # Try loading models until success
+        for model_name, size_gb, quantization in model_options:
+            if size_gb <= MEMORY_LIMIT_GB and self._try_load_model(model_name, quantization):
+                self.model_loaded = True
+                break
+        if not self.model_loaded:
+            raise RuntimeError("Failed to load any suitable model within memory constraints")
+        # Initialize tools with enhanced implementations
         self.tools = [
             FunctionTool.from_defaults(
+                fn=self.smart_web_search,
                 name="web_search",
+                description="Searches the web for current information. Use for questions about recent events, people, or facts not in the model's training data."
             ),
             FunctionTool.from_defaults(
+                fn=self.robust_math_calculator,
                 name="math_calculator",
+                description="Solves mathematical expressions and equations. Use for calculations, arithmetic, algebra, or numerical problems."
             )
         ]
+        # Initialize ReAct agent with memory optimization
         try:
             self.agent = ReActAgent.from_tools(
                 tools=self.tools,
                 llm=self.llm,
                 verbose=True,
+                max_iterations=4,
+                react_context="""Think step by step. Use tools when needed:
+                - For current/recent information: web_search
+                - For calculations: math_calculator
+                - Be concise but accurate"""
             )
+            print("ReAct Agent initialized successfully")
         except Exception as e:
+            print(f"ReAct Agent init failed: {e}")
             self.agent = None
+    def _try_load_model(self, model_name: str, quantization: str) -> bool:
+        """Attempt to load model with memory constraints"""
+        try:
+            print(f"Loading {model_name} with {quantization} quantization...")
+            model_kwargs = {
+                "torch_dtype": torch.float16,
+                "low_cpu_mem_usage": True,
+            }
+            if quantization == "8-bit":
+                model_kwargs["load_in_8bit"] = True
+            elif quantization == "4-bit":
+                model_kwargs["load_in_4bit"] = True
+            self.llm = HuggingFaceLLM(
+                model_name=model_name,
+                tokenizer_name=model_name,
+                context_window=2048,
+                max_new_tokens=256,
+                generate_kwargs={
+                    "temperature": 0.4,
+                    "do_sample": True,
+                    "top_p": 0.9,
+                    "repetition_penalty": 1.1
+                },
+                device_map="auto" if torch.cuda.is_available() else "cpu",
+                model_kwargs=model_kwargs
+            )
+            # Test the model
+            test_response = self.llm.complete("Test response:")
+            if not test_response:
+                raise ValueError("Model failed test response")
+            print(f"Successfully loaded {model_name}")
+            return True
+        except Exception as e:
+            print(f"Failed to load {model_name}: {str(e)}")
+            self.cleanup_memory()
+            return False
+    def smart_web_search(self, query: str) -> str:
+        """Enhanced web search with focused results"""
+        print(f"Searching: {query[:60]}...")
         if not DDGS:
+            return "Web search unavailable (duckduckgo_search not installed)"
         try:
             with DDGS() as ddgs:
+                # Get focused results with longer snippets
+                results = list(ddgs.text(query, max_results=3))
+                if not results:
+                    return "No results found"
+                # Process results for key information
+                processed = []
+                for i, res in enumerate(results, 1):
+                    title = res.get('title', 'No title')
+                    body = res.get('body', 'No description')
+                    url = res.get('href', '')
+                    # Extract most relevant part for the query
+                    key_info = self._extract_relevant_info(query, body)
+                    processed.append(
+                        f"🔍 Result {i}:\n"
+                        f"Title: {title}\n"
+                        f"Info: {key_info[:250]}\n"
+                        f"Source: {url}\n"
+                    )
+                return "\n".join(processed)
         except Exception as e:
+            return f"Search error: {str(e)}"
+    def _extract_relevant_info(self, query: str, text: str) -> str:
+        """Extract the most relevant portion of text for the query"""
+        query_lower = query.lower()
+        text_lower = text.lower()
+        # Handle different question types
+        if any(w in query_lower for w in ['who is', 'biography', 'born']):
+            # Look for birth/death info
+            match = re.search(r"(born [^.]+? in [^.]+?\.)", text, re.I)
+            return match.group(1) if match else text[:250]
+        elif any(w in query_lower for w in ['died', 'death']):
+            match = re.search(r"(died [^.]+?\.)", text, re.I)
+            return match.group(1) if match else text[:250]
+        elif any(w in query_lower for w in ['award', 'prize', 'won']):
+            match = re.search(r"(awarded [^.]+? in [^.]+?\.)", text, re.I)
+            return match.group(1) if match else text[:250]
+        # Default: return first 250 chars with important sentences
+        sentences = re.split(r'(?<=[.!?]) +', text)
+        important = [s for s in sentences if any(w in s.lower() for w in query.lower().split())]
+        return " ".join(important[:3]) if important else text[:250]
+    def robust_math_calculator(self, expression: str) -> str:
+        """Improved math calculator with better parsing"""
+        print(f"Calculating: {expression}")
+        # Clean and preprocess the expression
+        expr = expression.strip("'\"")
+        # Replace words with operators
+        replacements = {
+            'plus': '+', 'minus': '-', 'times': '*', 'divided by': '/',
+            '^': '**', 'percent': '/100', 'modulo': '%'
+        }
+        for word, op in replacements.items():
+            expr = expr.replace(word, op)
+        # Extract math expression from text
+        math_match = re.search(r"([-+]?\d*\.?\d+[+\-*/%^()\s]+\d+\.?\d*)", expr)
+        if math_match:
+            expr = math_match.group(1)
+        # Safety check
+        allowed_chars = set("0123456789+-*/().%^ ")
+        if not all(c in allowed_chars for c in expr.replace(" ", "")):
+            return "Error: Invalid characters in expression"
         try:
+            # Try direct evaluation first
+            result = eval(expr)
+            return f"Result: {result}"
+        except:
+            # Fallback to sympy if available
+            if sympify:
+                try:
+                    result = sympify(expr).evalf()
+                    return f"Result: {result}"
+                except SympifyError as e:
+                    return f"Math error: {str(e)}"
+            return "Error: Could not evaluate the expression"
+    def __call__(self, question: str) -> str:
+        """Main interface for answering questions"""
+        print(f"\nQuestion: {question[:100]}...")
+        try:
+            # Step 1: Classify question type
+            q_type = self._classify_question(question)
+            # Step 2: Use appropriate strategy
+            if q_type == "fact":
+                return self._answer_fact_question(question)
+            elif q_type == "math":
+                return self._answer_math_question(question)
             else:
+                return self._answer_general_question(question)
         except Exception as e:
+            print(f"Error processing question: {str(e)}")
+            return self._fallback_response(question)
+    def _classify_question(self, question: str) -> str:
+        """Determine the type of question"""
+        q_lower = question.lower()
+        # Math questions
+        math_keywords = ['calculate', 'compute', 'sum', 'total', 'average',
+                        'percentage', 'equation', 'solve', 'math', 'number',
+                        '+', '-', '*', '/', '=']
+        if any(kw in q_lower for kw in math_keywords):
+            return "math"
+        # Fact-based questions
+        fact_keywords = ['current', 'latest', 'recent', 'today', 'news',
+                        'who is', 'what is', 'when did', 'where is',
+                        'competition', 'winner', 'recipient', 'nationality',
+                        'country', 'malko', 'century', 'award', 'born', 'died']
+        if any(kw in q_lower for kw in fact_keywords):
+            return "fact"
+        return "general"
+    def _answer_fact_question(self, question: str) -> str:
+        """Handle fact-based questions with web search"""
+        # Extract key entities for focused search
+        entities = re.findall(r"([A-Z][a-z]+(?:\s+[A-Z][a-z]+)*)", question)
+        search_query = " ".join(entities[:3]) or question[:50]
+        # Get search results
+        search_results = self.smart_web_search(search_query)
+        # Process with LLM if available
+        if self.model_loaded:
+            prompt = f"""Question: {question}
+            Search Results:
+            {search_results}
+            Based ONLY on these results, provide a concise answer.
+            If the answer isn't there, say so."""
+            try:
+                response = self.llm.complete(prompt)
+                return str(response).strip()
+            except:
+                return f"Search results for '{search_query}':\n{search_results}"
+        return f"Search results for '{search_query}':\n{search_results}"
+    def _answer_math_question(self, question: str) -> str:
+        """Handle math questions with calculator"""
+        # Try to extract math expression
+        math_expr = re.search(r"([\d\s+\-*/().^]+)", question)
+        if math_expr:
+            return self.robust_math_calculator(math_expr.group(1))
+        # If no clear expression, use agent reasoning
+        if self.agent:
+            try:
+                response = self.agent.query(question)
+                return str(response)
+            except:
+                return self._fallback_response(question)
+        return self._fallback_response(question)
+    def _answer_general_question(self, question: str) -> str:
+        """Handle general knowledge questions"""
+        if self.agent:
+            try:
+                response = self.agent.query(question)
+                return str(response)
+            except:
+                return self._fallback_response(question)
+        # Fallback to simple LLM response
+        try:
+            response = self.llm.complete(question)
+            return str(response)
+        except:
+            return self._fallback_response(question)
+    def _fallback_response(self, question: str) -> str:
+        """Final fallback when all else fails"""
+        return f"I couldn't generate a complete answer for: {question[:150]}... Please try rephrasing or ask about something more specific."
+    def cleanup_memory(self):
+        """Clean up memory resources"""
+        if torch.cuda.is_available():
+            torch.cuda.empty_cache()
+        gc.collect()
 # --- Submission Logic ---
 def run_and_submit_all(profile: gr.OAuthProfile | None):
+    """Handle the full evaluation process"""
     space_id = os.getenv("SPACE_ID")
     if profile:
     questions_url = f"{api_url}/questions"
     submit_url = f"{api_url}/submit"
+    # Initialize agent with memory management
     try:
         agent = SmartAgent()
     except Exception as e:
+        print(f"Agent initialization failed: {e}")
         return f"Error initializing agent: {e}", None
     agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
         response.raise_for_status()
         questions_data = response.json()
         if not questions_data:
+            return "No questions received from server.", None
         print(f"Fetched {len(questions_data)} questions.")
     except Exception as e:
+        return f"Error fetching questions: {e}", None
+    # Process Questions
     results_log = []
     answers_payload = []
     for i, item in enumerate(questions_data, 1):
         task_id = item.get("task_id")
+        question = item.get("question")
+        if not task_id or not question:
             continue
+        print(f"Processing question {i}/{len(questions_data)} (ID: {task_id})")
         try:
+            answer = agent(question)
+            answers_payload.append({
+                "task_id": task_id,
+                "submitted_answer": answer[:2000]  # Limit answer length
+            })
             results_log.append({
+                "Task ID": task_id,
+                "Question": question[:100] + "..." if len(question) > 100 else question,
+                "Answer": answer[:200] + "..." if len(answer) > 200 else answer
             })
+            # Clean memory every 5 questions
+            if i % 5 == 0:
+                agent.cleanup_memory()
         except Exception as e:
+            print(f"Error on question {task_id}: {e}")
+            answers_payload.append({
+                "task_id": task_id,
+                "submitted_answer": f"Error processing question: {str(e)}"
+            })
             results_log.append({
+                "Task ID": task_id,
+                "Question": question[:100] + "..." if len(question) > 100 else question,
+                "Answer": f"Error: {str(e)}"
             })
+    # Submit Answers
     submission_data = {
+        "username": username.strip(),
+        "agent_code": agent_code,
         "answers": answers_payload
     }
+    print(f"Submitting {len(answers_payload)} answers...")
     try:
         response = requests.post(submit_url, json=submission_data, timeout=60)
         response.raise_for_status()
+        result = response.json()
+        status = (
+            f"✅ Submission Successful!\n\n"
+            f"User: {result.get('username')}\n"
+            f"Score: {result.get('score', 'N/A')}% "
+            f"({result.get('correct_count', '?')}/{result.get('total_attempted', '?')})\n"
+            f"Message: {result.get('message', '')}"
         )
+        return status, pd.DataFrame(results_log)
     except Exception as e:
+        error_msg = f"❌ Submission Failed: {str(e)}"
+        print(error_msg)
+        return error_msg, pd.DataFrame(results_log)
 # --- Gradio UI ---
 with gr.Blocks(title="Local LLM Agent Evaluation") as demo:
+    gr.Markdown("""
+    # � Local LLM Agent Evaluation
+    **Run your local agent against the course evaluation questions**
+    """)
     with gr.Row():
         gr.LoginButton()
+    run_btn = gr.Button(
+        "🚀 Run Evaluation & Submit Answers",
+        variant="primary"
+    )
+    status_out = gr.Textbox(
+        label="📋 Status",
+        interactive=False
     )
     results_table = gr.DataFrame(
+        label="📊 Results",
+        interactive=False,
+        wrap=True
     )
+    run_btn.click(
         fn=run_and_submit_all,
+        outputs=[status_out, results_table]
     )
 if __name__ == "__main__":
     print("\n" + "="*60)
+    print(f"🚀 Starting Agent Evaluation - {datetime.now().strftime('%Y-%m-%d %H:%M')}")
+    print(f"Memory Limit: {MEMORY_LIMIT_GB}GB")
     print("="*60)
     demo.launch(
         server_name="0.0.0.0",
+        server_port=7860
     )