Final_Assignment_Template

Runtime error

App Files Files Community

LamiaYT commited on Jun 25

Commit

51e7f46

1 Parent(s): a42d6f7

Initial commit with LlamaIndex-based agent

Browse files

Files changed (3) hide show

app.py +113 -19
requirements.txt +1 -0
txt.txt +383 -0

app.py CHANGED Viewed

@@ -8,6 +8,7 @@ import gradio as gr
 import requests
 import pandas as pd
 import traceback
 # Import real tool dependencies
 try:
@@ -32,15 +33,63 @@ class SmartAgent:
     def __init__(self):
         print("Initializing Local LLM Agent...")
-        # Initialize Zephyr-7B model
-        self.llm = HuggingFaceLLM(
-            model_name="HuggingFaceH4/zephyr-7b-beta",
-            tokenizer_name="HuggingFaceH4/zephyr-7b-beta",
-            context_window=2048,
-            max_new_tokens=256,
-            generate_kwargs={"temperature": 0.7, "do_sample": True},
-            device_map="auto"
-        )
         # Define tools with real implementations
         self.tools = [
@@ -57,12 +106,19 @@ class SmartAgent:
         ]
         # Create ReAct agent with tools
-        self.agent = ReActAgent.from_tools(
-            tools=self.tools,
-            llm=self.llm,
-            verbose=True
-        )
-        print("Local LLM Agent initialized successfully.")
     def web_search(self, query: str) -> str:
         """Real web search using DuckDuckGo"""
@@ -116,14 +172,43 @@ class SmartAgent:
     def __call__(self, question: str) -> str:
         print(f"Processing question (first 50 chars): {question[:50]}...")
         try:
-            response = self.agent.query(question)
-            return str(response)
         except Exception as e:
             print(f"Agent error: {str(e)}")
             print(f"Full traceback: {traceback.format_exc()}")
             return f"Error processing question: {str(e)}"
 # --- Submission Logic ---
 def run_and_submit_all(profile: gr.OAuthProfile | None):
     """
@@ -143,6 +228,9 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
     questions_url = f"{api_url}/questions"
     submit_url = f"{api_url}/submit"
     # Instantiate Agent
     try:
         agent = SmartAgent()
@@ -198,6 +286,11 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
                 "Submitted Answer": submitted_answer[:200] + "..." if len(submitted_answer) > 200 else submitted_answer
             })
             print(f"✅ Completed question {i}: {task_id}")
         except Exception as e:
             print(f"❌ Error running agent on task {task_id}: {e}")
             error_answer = f"AGENT ERROR: {str(e)}"
@@ -279,13 +372,14 @@ with gr.Blocks(title="Local LLM Agent Evaluation") as demo:
         **Instructions:**
         1. 🔐 Log in to your Hugging Face account using the button below
         2. 🚀 Click 'Run Evaluation & Submit All Answers'
-        3. ⏳ Wait for the local LLM (Zephyr-7B) to process all questions
         4. 📊 View your results and submission status
         **Features:**
         - 🔍 Real web search using DuckDuckGo
         - 🧮 Advanced math calculations with SymPy
-        - 🧠 Powered by HuggingFace Zephyr-7B model
         """
     )

 import requests
 import pandas as pd
 import traceback
+import torch
 # Import real tool dependencies
 try:
     def __init__(self):
         print("Initializing Local LLM Agent...")
+        # Check available memory and CUDA
+        if torch.cuda.is_available():
+            print(f"CUDA available. GPU memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f}GB")
+        else:
+            print("CUDA not available, using CPU")
+        # Use a smaller, more efficient model for Hugging Face Spaces
+        model_options = [
+            "microsoft/DialoGPT-medium",  # Much smaller, works well for chat
+            "google/flan-t5-base",        # Good for reasoning tasks
+            "HuggingFaceH4/zephyr-7b-beta"  # Original (may fail in limited memory)
+        ]
+        model_name = model_options[1]  # Start with flan-t5-base
+        print(f"Attempting to load model: {model_name}")
+        try:
+            # Initialize with memory-efficient settings
+            self.llm = HuggingFaceLLM(
+                model_name=model_name,
+                tokenizer_name=model_name,
+                context_window=512,  # Reduced context window
+                max_new_tokens=128,  # Reduced max tokens
+                generate_kwargs={
+                    "temperature": 0.7,
+                    "do_sample": True,
+                    "pad_token_id": 0  # Prevent padding issues
+                },
+                device_map="auto",
+                # Add memory optimization parameters
+                model_kwargs={
+                    "torch_dtype": torch.float16,  # Use half precision
+                    "low_cpu_mem_usage": True,
+                    "load_in_8bit": True,  # Enable 8-bit quantization if available
+                }
+            )
+            print(f"Successfully loaded model: {model_name}")
+        except Exception as e:
+            print(f"Failed to load {model_name}: {e}")
+            # Fallback to an even smaller model
+            try:
+                fallback_model = "microsoft/DialoGPT-small"
+                print(f"Falling back to: {fallback_model}")
+                self.llm = HuggingFaceLLM(
+                    model_name=fallback_model,
+                    tokenizer_name=fallback_model,
+                    context_window=256,
+                    max_new_tokens=64,
+                    generate_kwargs={"temperature": 0.7, "do_sample": True},
+                    device_map="cpu",  # Force CPU to avoid memory issues
+                    model_kwargs={"low_cpu_mem_usage": True}
+                )
+                print(f"Successfully loaded fallback model: {fallback_model}")
+            except Exception as e2:
+                print(f"All model loading attempts failed: {e2}")
+                raise Exception("Unable to load any language model")
         # Define tools with real implementations
         self.tools = [
         ]
         # Create ReAct agent with tools
+        try:
+            self.agent = ReActAgent.from_tools(
+                tools=self.tools,
+                llm=self.llm,
+                verbose=True,
+                max_iterations=3  # Limit iterations to prevent infinite loops
+            )
+            print("Local LLM Agent initialized successfully.")
+        except Exception as e:
+            print(f"Error creating ReAct agent: {e}")
+            # Create a simple fallback agent
+            self.agent = None
+            print("Using fallback direct tool calling approach")
     def web_search(self, query: str) -> str:
         """Real web search using DuckDuckGo"""
     def __call__(self, question: str) -> str:
         print(f"Processing question (first 50 chars): {question[:50]}...")
         try:
+            if self.agent:
+                response = self.agent.query(question)
+                return str(response)
+            else:
+                # Fallback: Direct tool usage based on question content
+                question_lower = question.lower()
+                if any(word in question_lower for word in ['calculate', 'math', 'equation', '+', '-', '*', '/', '=']):
+                    # Try math calculator
+                    math_terms = []
+                    for word in question.split():
+                        if any(char in word for char in '0123456789+-*/()'):
+                            math_terms.append(word)
+                    if math_terms:
+                        expression = ' '.join(math_terms)
+                        return self.math_calculator(expression)
+                if any(word in question_lower for word in ['search', 'find', 'what is', 'current', 'latest', 'news']):
+                    # Try web search
+                    return self.web_search(question)
+                # Default response
+                return f"I understand you're asking: {question[:100]}... However, I'm having trouble processing this with the current model configuration. Please try rephrasing your question or breaking it into smaller parts."
         except Exception as e:
             print(f"Agent error: {str(e)}")
             print(f"Full traceback: {traceback.format_exc()}")
             return f"Error processing question: {str(e)}"
+# --- Memory cleanup function ---
+def cleanup_memory():
+    """Clean up GPU memory"""
+    if torch.cuda.is_available():
+        torch.cuda.empty_cache()
+        print("GPU memory cleared")
 # --- Submission Logic ---
 def run_and_submit_all(profile: gr.OAuthProfile | None):
     """
     questions_url = f"{api_url}/questions"
     submit_url = f"{api_url}/submit"
+    # Clean memory before starting
+    cleanup_memory()
     # Instantiate Agent
     try:
         agent = SmartAgent()
                 "Submitted Answer": submitted_answer[:200] + "..." if len(submitted_answer) > 200 else submitted_answer
             })
             print(f"✅ Completed question {i}: {task_id}")
+            # Clean memory after each question
+            if i % 5 == 0:  # Every 5 questions
+                cleanup_memory()
         except Exception as e:
             print(f"❌ Error running agent on task {task_id}: {e}")
             error_answer = f"AGENT ERROR: {str(e)}"
         **Instructions:**
         1. 🔐 Log in to your Hugging Face account using the button below
         2. 🚀 Click 'Run Evaluation & Submit All Answers'
+        3. ⏳ Wait for the local LLM to process all questions (using memory-optimized smaller model)
         4. 📊 View your results and submission status
         **Features:**
         - 🔍 Real web search using DuckDuckGo
         - 🧮 Advanced math calculations with SymPy
+        - 🧠 Memory-optimized language model with fallback options
+        - 🛡️ Error handling and recovery mechanisms
         """
     )

requirements.txt CHANGED Viewed

@@ -3,6 +3,7 @@ llama-index-llms-huggingface
 transformers>=4.30.0
 torch>=2.0.0
 accelerate
 gradio>=4.0.0
 requests
 pandas

 transformers>=4.30.0
 torch>=2.0.0
 accelerate
+bitsandbytes  # For 8-bit quantization
 gradio>=4.0.0
 requests
 pandas

txt.txt ADDED Viewed

	@@ -0,0 +1,383 @@

+llama-index-core
+llama-index-llms-huggingface
+transformers>=4.30.0
+torch>=2.0.0
+accelerate
+gradio>=4.0.0
+requests
+pandas
+python-dotenv
+duckduckgo-search
+sympy
+sentencepiece
+protobuf
+app.py
+# app.py
+from llama_index.llms.huggingface import HuggingFaceLLM
+from llama_index.core.agent import ReActAgent
+from llama_index.core.tools import FunctionTool
+from transformers import AutoTokenizer
+import os
+import gradio as gr
+import requests
+import pandas as pd
+import traceback
+# Import real tool dependencies
+try:
+    from duckduckgo_search import DDGS
+except ImportError:
+    print("Warning: duckduckgo_search not installed. Web search will be limited.")
+    DDGS = None
+try:
+    from sympy import sympify
+    from sympy.core.sympify import SympifyError
+except ImportError:
+    print("Warning: sympy not installed. Math calculator will be limited.")
+    sympify = None
+    SympifyError = Exception
+# --- Constants ---
+DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
+# --- Advanced Agent Definition ---
+class SmartAgent:
+    def __init__(self):
+        print("Initializing Local LLM Agent...")
+        # Initialize Zephyr-7B model
+        self.llm = HuggingFaceLLM(
+            model_name="HuggingFaceH4/zephyr-7b-beta",
+            tokenizer_name="HuggingFaceH4/zephyr-7b-beta",
+            context_window=2048,
+            max_new_tokens=256,
+            generate_kwargs={"temperature": 0.7, "do_sample": True},
+            device_map="auto"
+        )
+        # Define tools with real implementations
+        self.tools = [
+            FunctionTool.from_defaults(
+                fn=self.web_search,
+                name="web_search",
+                description="Searches the web for current information using DuckDuckGo when questions require up-to-date knowledge"
+            ),
+            FunctionTool.from_defaults(
+                fn=self.math_calculator,
+                name="math_calculator",
+                description="Performs mathematical calculations and symbolic math using SymPy when questions involve numbers or equations"
+            )
+        ]
+        # Create ReAct agent with tools
+        self.agent = ReActAgent.from_tools(
+            tools=self.tools,
+            llm=self.llm,
+            verbose=True
+        )
+        print("Local LLM Agent initialized successfully.")
+    def web_search(self, query: str) -> str:
+        """Real web search using DuckDuckGo"""
+        print(f"Web search triggered for: {query[:50]}...")
+        if not DDGS:
+            return "Web search unavailable - duckduckgo_search not installed"
+        try:
+            with DDGS() as ddgs:
+                results = list(ddgs.text(query, max_results=3))
+                if results:
+                    formatted_results = []
+                    for i, r in enumerate(results, 1):
+                        title = r.get('title', 'No title')
+                        body = r.get('body', 'No description')[:200]
+                        url = r.get('href', '')
+                        formatted_results.append(f"{i}. {title}\n{body}...\nSource: {url}")
+                    return "\n\n".join(formatted_results)
+                else:
+                    return "No search results found for the query."
+        except Exception as e:
+            print(f"Web search error: {e}")
+            return f"Error during web search: {str(e)}"
+    def math_calculator(self, expression: str) -> str:
+        """Safe math evaluation using SymPy"""
+        print(f"Math calculation triggered for: {expression}")
+        if not sympify:
+            # Fallback to basic eval with safety checks
+            try:
+                # Only allow basic math operations
+                allowed_chars = set('0123456789+-*/().^ ')
+                if not all(c in allowed_chars for c in expression.replace(' ', '')):
+                    return "Error: Only basic math operations are allowed"
+                result = eval(expression.replace('^', '**'))
+                return str(result)
+            except Exception as e:
+                return f"Error: Could not evaluate the mathematical expression - {str(e)}"
+        try:
+            # Use SymPy for safe evaluation
+            result = sympify(expression).evalf()
+            return str(result)
+        except SympifyError as e:
+            return f"Error: Could not parse the mathematical expression - {str(e)}"
+        except Exception as e:
+            return f"Error: Calculation failed - {str(e)}"
+    def __call__(self, question: str) -> str:
+        print(f"Processing question (first 50 chars): {question[:50]}...")
+        try:
+            response = self.agent.query(question)
+            return str(response)
+        except Exception as e:
+            print(f"Agent error: {str(e)}")
+            print(f"Full traceback: {traceback.format_exc()}")
+            return f"Error processing question: {str(e)}"
+# --- Submission Logic ---
+def run_and_submit_all(profile: gr.OAuthProfile | None):
+    """
+    Fetches all questions, runs the agent on them, submits all answers,
+    and displays the results.
+    """
+    space_id = os.getenv("SPACE_ID")
+    if profile:
+        username = f"{profile.username}"
+        print(f"User logged in: {username}")
+    else:
+        print("User not logged in.")
+        return "Please Login to Hugging Face with the button.", None
+    api_url = DEFAULT_API_URL
+    questions_url = f"{api_url}/questions"
+    submit_url = f"{api_url}/submit"
+    # Instantiate Agent
+    try:
+        agent = SmartAgent()
+    except Exception as e:
+        print(f"Error instantiating agent: {e}")
+        print(f"Full traceback: {traceback.format_exc()}")
+        return f"Error initializing agent: {e}", None
+    agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
+    print(f"Agent code URL: {agent_code}")
+    # Fetch Questions
+    print(f"Fetching questions from: {questions_url}")
+    try:
+        response = requests.get(questions_url, timeout=15)
+        response.raise_for_status()
+        questions_data = response.json()
+        if not questions_data:
+            print("Fetched questions list is empty.")
+            return "Fetched questions list is empty or invalid format.", None
+        print(f"Fetched {len(questions_data)} questions.")
+    except requests.exceptions.RequestException as e:
+        print(f"Error fetching questions: {e}")
+        return f"Error fetching questions: {e}", None
+    except requests.exceptions.JSONDecodeError as e:
+        print(f"Error decoding JSON response from questions endpoint: {e}")
+        return f"Error decoding server response for questions: {e}", None
+    except Exception as e:
+        print(f"An unexpected error occurred fetching questions: {e}")
+        return f"An unexpected error occurred fetching questions: {e}", None
+    # Run Agent on all questions
+    results_log = []
+    answers_payload = []
+    print(f"Running agent on {len(questions_data)} questions...")
+    for i, item in enumerate(questions_data, 1):
+        task_id = item.get("task_id")
+        question_text = item.get("question")
+        if not task_id or question_text is None:
+            print(f"Skipping item with missing task_id or question: {item}")
+            continue
+        print(f"Processing question {i}/{len(questions_data)}: {task_id}")
+        try:
+            submitted_answer = agent(question_text)
+            answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
+            results_log.append({
+                "Task ID": task_id,
+                "Question": question_text[:100] + "..." if len(question_text) > 100 else question_text,
+                "Submitted Answer": submitted_answer[:200] + "..." if len(submitted_answer) > 200 else submitted_answer
+            })
+            print(f"✅ Completed question {i}: {task_id}")
+        except Exception as e:
+            print(f"❌ Error running agent on task {task_id}: {e}")
+            error_answer = f"AGENT ERROR: {str(e)}"
+            answers_payload.append({"task_id": task_id, "submitted_answer": error_answer})
+            results_log.append({
+                "Task ID": task_id,
+                "Question": question_text[:100] + "..." if len(question_text) > 100 else question_text,
+                "Submitted Answer": error_answer
+            })
+    if not answers_payload:
+        print("Agent did not produce any answers to submit.")
+        return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
+    # Prepare submission
+    submission_data = {
+        "username": username.strip(),
+        "agent_code": agent_code,
+        "answers": answers_payload
+    }
+    status_update = f"Agent finished processing. Submitting {len(answers_payload)} answers for user '{username}'..."
+    print(status_update)
+    # Submit answers
+    print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
+    try:
+        response = requests.post(submit_url, json=submission_data, timeout=60)
+        response.raise_for_status()
+        result_data = response.json()
+        final_status = (
+            f"🎉 Submission Successful!\n\n"
+            f"User: {result_data.get('username')}\n"
+            f"Overall Score: {result_data.get('score', 'N/A')}% "
+            f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
+            f"Message: {result_data.get('message', 'No message received.')}"
+        )
+        print("✅ Submission successful!")
+        results_df = pd.DataFrame(results_log)
+        return final_status, results_df
+    except requests.exceptions.HTTPError as e:
+        error_detail = f"Server responded with status {e.response.status_code}."
+        try:
+            error_json = e.response.json()
+            error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
+        except requests.exceptions.JSONDecodeError:
+            error_detail += f" Response: {e.response.text[:500]}"
+        status_message = f"❌ Submission Failed: {error_detail}"
+        print(status_message)
+        results_df = pd.DataFrame(results_log)
+        return status_message, results_df
+    except requests.exceptions.Timeout:
+        status_message = "❌ Submission Failed: The request timed out."
+        print(status_message)
+        results_df = pd.DataFrame(results_log)
+        return status_message, results_df
+    except requests.exceptions.RequestException as e:
+        status_message = f"❌ Submission Failed: Network error - {e}"
+        print(status_message)
+        results_df = pd.DataFrame(results_log)
+        return status_message, results_df
+    except Exception as e:
+        status_message = f"❌ An unexpected error occurred during submission: {e}"
+        print(status_message)
+        results_df = pd.DataFrame(results_log)
+        return status_message, results_df
+# --- Gradio UI ---
+with gr.Blocks(title="Local LLM Agent Evaluation") as demo:
+    gr.Markdown("# 🤖 Local LLM Agent Evaluation Runner")
+    gr.Markdown(
+        """
+        **Instructions:**
+        1. 🔐 Log in to your Hugging Face account using the button below
+        2. 🚀 Click 'Run Evaluation & Submit All Answers'
+        3. ⏳ Wait for the local LLM (Zephyr-7B) to process all questions
+        4. 📊 View your results and submission status
+        **Features:**
+        - 🔍 Real web search using DuckDuckGo
+        - 🧮 Advanced math calculations with SymPy
+        - 🧠 Powered by HuggingFace Zephyr-7B model
+        """
+    )
+    with gr.Row():
+        gr.LoginButton()
+    with gr.Row():
+        run_button = gr.Button(
+            "🚀 Run Evaluation & Submit All Answers",
+            variant="primary",
+            size="lg"
+        )
+    status_output = gr.Textbox(
+        label="📋 Run Status / Submission Result",
+        lines=8,
+        interactive=False,
+        placeholder="Click the button above to start the evaluation..."
+    )
+    results_table = gr.DataFrame(
+        label="📊 Questions and Agent Answers",
+        wrap=True,
+        interactive=False
+    )
+    # Wire up the button
+    run_button.click(
+        fn=run_and_submit_all,
+        outputs=[status_output, results_table]
+    )
+if __name__ == "__main__":
+    print("\n" + "="*60)
+    print("🚀 Application Startup at", pd.Timestamp.now().strftime("%Y-%m-%d %H:%M:%S"))
+    print("="*60)
+    space_host_startup = os.getenv("SPACE_HOST")
+    space_id_startup = os.getenv("SPACE_ID")
+    if space_host_startup:
+        print(f"✅ SPACE_HOST found: {space_host_startup}")
+        print(f"   Runtime URL should be: https://{space_host_startup}")
+    else:
+        print("ℹ️  SPACE_HOST environment variable not found (running locally?).")
+    if space_id_startup:
+        print(f"✅ SPACE_ID found: {space_id_startup}")
+        print(f"   Repo URL: https://huggingface.co/spaces/{space_id_startup}")
+        print(f"   Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
+    else:
+        print("ℹ️  SPACE_ID environment variable not found (running locally?).")
+    print("-" * 60)
+    print("🎯 Launching Gradio Interface for Local LLM Agent Evaluation...")
+    # Launch without share=True for Hugging Face Spaces
+    demo.launch(
+        server_name="0.0.0.0",
+        server_port=7860,
+        show_error=True
+    )
+readme
+---
+title: Template Final Assignment
+emoji: 🕵🏻‍♂️
+colorFrom: indigo
+colorTo: indigo
+sdk: gradio
+sdk_version: 5.25.2
+app_file: app.py
+pinned: false
+hf_oauth: true
+# optional, default duration is 8 hours/480 minutes. Max duration is 30 days/43200 minutes.
+hf_oauth_expiration_minutes: 480
+---
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference