Spaces:

Ais203
/

aigen

Sleeping

App Files Files Community

Ais commited on 13 days ago

Commit

84677b5

verified ·

1 Parent(s): b397650

Update app/main.py

Browse files

Files changed (1) hide show

app/main.py +358 -209

app/main.py CHANGED Viewed

@@ -44,164 +44,309 @@ model.eval()
 print("✅ Qwen2-0.5B model ready with optimized settings!")
-def get_simple_system_prompt(is_force_mode: bool) -> str:
     """
-    SIMPLIFIED system prompts optimized for Qwen2-0.5B's 500M parameters.
-    Shorter, clearer instructions that small models can follow better.
     """
     if is_force_mode:
-        return """You are Apollo AI. Give direct, complete answers.
-Rules:
-- Provide full working code
-- Be concise, max 3 sentences explanation
-- Never ask questions back
-- Give complete solutions immediately
-Example:
-User: "print hello world python"
-You: "Use print('Hello World'). This outputs text to console."
-"""
     else:
-        return """You are Apollo AI tutor. Guide learning with questions.
-Rules:
-- Ask guiding questions instead of giving answers
-- Never give complete working code
-- Use hints and partial examples only
-- Make students think and discover
-Example:
-User: "print hello world python"
-You: "What function displays text in Python? Try looking up output functions."
-"""
-def create_simple_force_responses(user_message: str) -> str:
     """
-    Pre-defined responses for common questions in force mode.
-    This helps the 0.5B model give consistent direct answers.
     """
-    user_lower = user_message.lower()
-    # Python print
-    if 'print' in user_lower and ('hello' in user_lower or 'world' in user_lower):
-        return 'Use `print("Hello World")`. This function outputs text to the console.'
-    # Basic math
-    if '2+2' in user_lower or '2 + 2' in user_lower:
-        return '2 + 2 = 4. Addition combines two numbers to get their sum.'
-    # Python variable
-    if 'variable' in user_lower and ('python' in user_lower or 'create' in user_lower):
-        return 'Use `name = "value"`. Variables store data: `x = 5` or `text = "hello"`.'
-    # Python list
-    if 'list' in user_lower and 'python' in user_lower and 'create' in user_lower:
-        return 'Use square brackets: `my_list = [1, 2, 3]`. Lists store multiple items.'
-    # Python function
-    if 'function' in user_lower and 'python' in user_lower and ('create' in user_lower or 'define' in user_lower):
-        return '''Use def keyword:
-```python
-def my_function():
-    return "Hello"
 ```
-Functions are reusable code blocks.'''
-    # Calculator
-    if 'calculator' in user_lower and ('create' in user_lower or 'make' in user_lower or 'build' in user_lower):
-        return '''Here's a simple calculator:
-```python
-a = float(input("First number: "))
-b = float(input("Second number: "))
-op = input("Operator (+,-,*,/): ")
-if op == '+': print(a + b)
-elif op == '-': print(a - b)
-elif op == '*': print(a * b)
-elif op == '/': print(a / b)
 ```
-This performs basic math operations.'''
-    return None
-def create_simple_mentor_responses(user_message: str) -> str:
     """
-    Pre-defined mentor responses for common questions.
-    This helps the 0.5B model give consistent guided learning.
     """
     user_lower = user_message.lower()
-    # Python print
-    if 'print' in user_lower and ('hello' in user_lower or 'world' in user_lower):
-        return 'What function do you think displays text in Python? Think about showing output. What would it be called?'
-    # Basic math
-    if '2+2' in user_lower or '2 + 2' in user_lower:
-        return 'What do you think 2 + 2 equals? Try calculating it step by step.'
-    # Python variable
-    if 'variable' in user_lower and ('python' in user_lower or 'create' in user_lower):
-        return 'How do you think Python stores data? What symbol might assign a value to a name? Try: name = value'
-    # Python list
-    if 'list' in user_lower and 'python' in user_lower and 'create' in user_lower:
-        return 'What brackets do you think hold multiple items? Try making a list with [item1, item2]. What goes inside?'
-    # Python function
-    if 'function' in user_lower and 'python' in user_lower and ('create' in user_lower or 'define' in user_lower):
-        return '''What keyword defines a function in Python? Try this structure:
 ```python
-___ function_name():
-    # your code here
 ```
-What goes in the blank? How would you call it?'''
-    # Calculator
-    if 'calculator' in user_lower and ('create' in user_lower or 'make' in user_lower or 'build' in user_lower):
-        return '''What steps would a calculator need?
-1. Get two numbers from user - what function gets input?
-2. Get operation (+,-,*,/) - how to choose?
-3. Calculate result - what structure handles choices?
-4. Show result - what displays output?
-Try building step 1 first. What function gets user input?'''
-    return None
-def extract_clean_answer(full_response: str, formatted_prompt: str, user_message: str, is_force_mode: bool) -> str:
     """
-    Optimized cleaning for Qwen2-0.5B responses.
-    Simpler extraction since 0.5B models produce cleaner output.
     """
     if not full_response or len(full_response.strip()) < 5:
         return "I apologize, but I couldn't generate a response. Please try again."
     print(f"🔍 Raw response length: {len(full_response)}")
     print(f"🔍 Mode: {'FORCE' if is_force_mode else 'MENTOR'}")
-    # Check for pre-defined responses first
     if is_force_mode:
-        predefined = create_simple_force_responses(user_message)
-        if predefined:
-            print("✅ Using predefined force response")
             return predefined
     else:
-        predefined = create_simple_mentor_responses(user_message)
-        if predefined:
-            print("✅ Using predefined mentor response")
             return predefined
-    # Step 1: Remove the input prompt
     generated_text = full_response
     if formatted_prompt in full_response:
         parts = full_response.split(formatted_prompt)
         if len(parts) > 1:
             generated_text = parts[-1]
-    # Step 2: Extract assistant content - simplified for 0.5B
     assistant_content = generated_text
-    # Look for assistant markers
     if "<|im_start|>assistant" in generated_text:
         assistant_parts = generated_text.split("<|im_start|>assistant")
         if len(assistant_parts) > 1:
@@ -209,7 +354,7 @@ def extract_clean_answer(full_response: str, formatted_prompt: str, user_message
             if "<|im_end|>" in assistant_content:
                 assistant_content = assistant_content.split("<|im_end|>")[0]
-    # Step 3: Basic cleaning - gentler for 0.5B
     clean_text = assistant_content.strip()
     # Remove template tokens
@@ -225,89 +370,100 @@ def extract_clean_answer(full_response: str, formatted_prompt: str, user_message
     clean_text = re.sub(r'\n{3,}', '\n\n', clean_text)
     clean_text = clean_text.strip()
-    # Step 4: Fallback handling for 0.5B
     if not clean_text or len(clean_text) < 10:
         if is_force_mode:
-            return "Could you please be more specific about what you need?"
         else:
-            return "What specific aspect would you like to explore? What's your approach?"
-    # Step 5: Length control for 0.5B
-    if len(clean_text) > 500:  # Keep responses shorter for 0.5B
-        sentences = clean_text.split('. ')
-        if len(sentences) > 3:
-            clean_text = '. '.join(sentences[:3]) + '.'
     print(f"🧹 Final cleaned answer length: {len(clean_text)}")
     return clean_text
 def generate_response(messages: list, is_force_mode: bool = False, max_tokens: int = 200, temperature: float = 0.7) -> str:
     """
-    Optimized generation for Qwen2-0.5B with shorter contexts and conservative settings.
     """
     try:
-        # Check for simple predefined responses first
-        if messages and len(messages) > 0:
-            last_user_msg = ""
-            for msg in reversed(messages):
-                if msg.get("role") == "user":
-                    last_user_msg = msg.get("content", "")
-                    break
-            if last_user_msg:
-                if is_force_mode:
-                    predefined = create_simple_force_responses(last_user_msg)
-                    if predefined:
-                        return predefined
-                else:
-                    predefined = create_simple_mentor_responses(last_user_msg)
-                    if predefined:
-                        return predefined
-        # Build simple conversation for 0.5B model
-        clean_messages = []
-        # Add simple system prompt
-        system_prompt = get_simple_system_prompt(is_force_mode)
-        clean_messages.append({
-            "role": "system",
-            "content": system_prompt
-        })
-        # Add only the last user message to keep context short for 0.5B
-        if messages and len(messages) > 0:
-            for msg in reversed(messages):
-                if msg.get("role") == "user":
-                    clean_messages.append({
-                        "role": "user",
-                        "content": msg.get("content", "")
-                    })
-                    break
-        print(f"🔍 Processing {len(clean_messages)} messages for Qwen2-0.5B in {'FORCE' if is_force_mode else 'MENTOR'} mode")
         # Apply chat template
         try:
             formatted_prompt = tokenizer.apply_chat_template(
-                clean_messages,
                 tokenize=False,
                 add_generation_prompt=True
             )
         except Exception as e:
             print(f"⚠️ Chat template failed, using simple format: {e}")
-            # Fallback to simple format
-            formatted_prompt = f"System: {clean_messages[0]['content']}\nUser: {clean_messages[1]['content']}\nAssistant:"
-        # Tokenize with conservative limits for 0.5B
         inputs = tokenizer(
             formatted_prompt,
             return_tensors="pt",
             truncation=True,
-            max_length=800  # Shorter context for 0.5B
         )
-        # Conservative generation settings for 0.5B model
         generation_params = {
             "input_ids": inputs.input_ids,
             "attention_mask": inputs.attention_mask,
@@ -317,60 +473,53 @@ def generate_response(messages: list, is_force_mode: bool = False, max_tokens: i
         }
         if is_force_mode:
-            # Force mode: Very conservative for 0.5B
             generation_params.update({
-                "max_new_tokens": min(max_tokens, 150),  # Very short
-                "temperature": 0.1,  # Very focused
-                "top_p": 0.7,
-                "top_k": 20,
                 "repetition_penalty": 1.05,
             })
         else:
-            # Mentor mode: Still conservative but allows more creativity
             generation_params.update({
-                "max_new_tokens": min(max_tokens, 200),
-                "temperature": 0.3,  # Lower than original
-                "top_p": 0.8,
-                "top_k": 30,
                 "repetition_penalty": 1.02,
             })
-        # Generate with timeout for 0.5B
         with torch.no_grad():
             outputs = model.generate(**generation_params)
-        # Decode response
         full_response = tokenizer.decode(outputs[0], skip_special_tokens=False)
-        # Extract user message for context
-        user_message = ""
-        for msg in reversed(clean_messages):
-            if msg.get("role") == "user":
-                user_message = msg.get("content", "")
-                break
-        # Clean and return
-        clean_answer = extract_clean_answer(full_response, formatted_prompt, user_message, is_force_mode)
         return clean_answer
     except Exception as e:
         print(f"❌ Generation error with Qwen2-0.5B: {e}")
-        mode_text = "direct answer" if is_force_mode else "guided learning"
-        return f"I encountered an error generating a {mode_text}. Please try a simpler question."
 # === Routes ===
 @app.get("/")
 def root():
     return {
-        "message": "🤖 Apollo AI Backend v2.1 - Qwen2-0.5B Optimized",
         "model": "Qwen/Qwen2-0.5B-Instruct with LoRA",
         "status": "ready",
-        "optimizations": ["short_contexts", "conservative_generation", "predefined_responses"],
-        "features": ["mentor_mode", "force_mode", "0.5B_optimized"],
         "modes": {
-            "mentor": "Guides learning with simple questions",
-            "force": "Provides direct answers quickly"
         }
     }
@@ -380,7 +529,7 @@ def health():
         "status": "healthy",
         "model_loaded": True,
         "model_size": "0.5B",
-        "optimizations": "qwen2_0.5B_specific"
     }
 @app.post("/v1/chat/completions")
@@ -404,10 +553,9 @@ async def chat_completions(request: Request):
     try:
         body = await request.json()
         messages = body.get("messages", [])
-        max_tokens = min(body.get("max_tokens", 200), 300)  # Cap at 300 for 0.5B
-        temperature = max(0.1, min(body.get("temperature", 0.5), 0.8))  # Conservative range
-        # Get mode information
         is_force_mode = body.get("force_mode", False)
         if not messages or not isinstance(messages, list):
@@ -428,8 +576,8 @@ async def chat_completions(request: Request):
             )
     try:
-        print(f"📥 Processing request for Qwen2-0.5B in {'FORCE' if is_force_mode else 'MENTOR'} mode")
-        print(f"📊 Settings: max_tokens={max_tokens}, temperature={temperature}")
         response_content = generate_response(
             messages=messages,
@@ -438,12 +586,11 @@ async def chat_completions(request: Request):
             temperature=temperature
         )
-        # Return OpenAI-compatible response
         return {
             "id": f"chatcmpl-apollo-qwen05b-{hash(str(messages)) % 10000}",
             "object": "chat.completion",
             "created": int(torch.tensor(0).item()),
-            "model": f"qwen2-0.5b-{'force' if is_force_mode else 'mentor'}-mode",
             "choices": [
                 {
                     "index": 0,
@@ -460,7 +607,7 @@ async def chat_completions(request: Request):
                 "total_tokens": len(str(messages)) + len(response_content)
             },
             "apollo_mode": "force" if is_force_mode else "mentor",
-            "model_optimizations": "qwen2_0.5B_specific"
         }
     except Exception as e:
@@ -470,42 +617,44 @@ async def chat_completions(request: Request):
             content={"error": f"Internal server error: {str(e)}"}
         )
-# === Test endpoint optimized for 0.5B ===
 @app.post("/test")
 async def test_generation(request: Request):
-    """Test endpoint for debugging both modes with 0.5B optimizations"""
     try:
         body = await request.json()
         prompt = body.get("prompt", "How do I print hello world in Python?")
-        max_tokens = min(body.get("max_tokens", 200), 300)
         test_both_modes = body.get("test_both_modes", True)
         results = {}
         # Test mentor mode
-        messages_mentor = [{"role": "user", "content": prompt}]
-        mentor_response = generate_response(messages_mentor, is_force_mode=False, max_tokens=max_tokens, temperature=0.3)
         results["mentor_mode"] = {
             "response": mentor_response,
             "length": len(mentor_response),
-            "mode": "mentor"
         }
         if test_both_modes:
             # Test force mode
-            messages_force = [{"role": "user", "content": prompt}]
-            force_response = generate_response(messages_force, is_force_mode=True, max_tokens=max_tokens, temperature=0.1)
             results["force_mode"] = {
                 "response": force_response,
                 "length": len(force_response),
-                "mode": "force"
             }
         return {
             "prompt": prompt,
             "results": results,
             "model": "Qwen2-0.5B-Instruct",
-            "optimizations": "0.5B_specific",
             "status": "success"
         }
@@ -517,8 +666,8 @@ async def test_generation(request: Request):
 if __name__ == "__main__":
     import uvicorn
-    print("🚀 Starting Apollo AI Backend v2.1 - Qwen2-0.5B Optimized...")
     print("🧠 Model: Qwen/Qwen2-0.5B-Instruct (500M parameters)")
-    print("⚡ Optimizations: Short contexts, conservative generation, predefined responses")
-    print("🎯 Modes: Mentor (simple questions) vs Force (direct answers)")
     uvicorn.run(app, host="0.0.0.0", port=7860)

 print("✅ Qwen2-0.5B model ready with optimized settings!")
+def get_enhanced_system_prompt(is_force_mode: bool) -> str:
     """
+    Enhanced system prompts that clearly define behavior for Qwen2-0.5B.
     """
     if is_force_mode:
+        return """You are Apollo AI in DIRECT ANSWER mode. You must give complete, working solutions immediately.
+STRICT RULES:
+- Provide full working code when asked
+- Give direct explanations (max 2-3 sentences)
+- NEVER ask questions back to the user
+- Always give complete solutions
+- Be concise but thorough
+EXAMPLES:
+User: "How do I print hello world in Python?"
+You: "Use `print('Hello World')`. This function outputs text to the console."
+User: "Create a calculator in Python"
+You: "Here's a simple calculator:
+```python
+a = float(input('First number: '))
+b = float(input('Second number: '))
+op = input('Operator (+,-,*,/): ')
+if op == '+': print(a + b)
+elif op == '-': print(a - b)
+elif op == '*': print(a * b)
+elif op == '/': print(a / b)
+```
+This performs basic math operations on two numbers."
+REMEMBER: Give direct answers, not questions. Provide working code."""
     else:
+        return """You are Apollo AI in MENTOR mode. You must guide learning through questions and hints only.
+STRICT RULES:
+- ASK guiding questions instead of giving direct answers
+- NEVER provide complete working code
+- Give hints and partial examples only
+- Make the user think and discover the solution
+- Build on their previous attempts
+EXAMPLES:
+User: "How do I print hello world in Python?"
+You: "What function do you think displays text in Python? Think about showing output to the user. What would such a function be called?"
+User: "Create a calculator in Python"
+You: "Great project! Let's break it down step by step:
+1. What information would a calculator need from the user?
+2. How would you get input from someone using your program?
+3. What operations should it support?
+Start with step 1 - what function gets user input in Python?"
+User: "I tried input() but it's not working"
+You: "Good start with input()! What type of data does input() return? If you need to do math, what might you need to convert it to? Try looking up type conversion functions."
+REMEMBER: Guide with questions, never give direct answers or complete code."""
+def analyze_conversation_context(messages: list) -> dict:
     """
+    Analyze conversation history to understand context and user progress.
     """
+    context = {
+        "user_messages": [],
+        "assistant_messages": [],
+        "topics": [],
+        "user_attempted_code": False,
+        "user_stuck": False,
+        "repeated_questions": 0
+    }
+    # Extract recent messages
+    for msg in messages[-6:]:  # Last 6 messages
+        if msg.get("role") == "user":
+            content = msg.get("content", "").lower()
+            context["user_messages"].append(msg.get("content", ""))
+            # Check if user attempted code
+            if any(keyword in content for keyword in ["tried", "attempted", "doesn't work", "error", "not working"]):
+                context["user_attempted_code"] = True
+            # Detect topic
+            if "calculator" in content:
+                context["topics"].append("calculator")
+            elif "print" in content and "hello" in content:
+                context["topics"].append("hello_world")
+            elif "function" in content:
+                context["topics"].append("functions")
+            elif "list" in content:
+                context["topics"].append("lists")
+            elif "variable" in content:
+                context["topics"].append("variables")
+        elif msg.get("role") == "assistant":
+            context["assistant_messages"].append(msg.get("content", ""))
+    # Check if user seems stuck (repeated similar questions)
+    if len(context["user_messages"]) >= 2:
+        last_two = context["user_messages"][-2:]
+        if any(word in last_two[0].lower() and word in last_two[1].lower()
+               for word in ["how", "what", "help", "create", "make"]):
+            context["repeated_questions"] += 1
+    return context
+def generate_mentor_response(user_message: str, context: dict) -> str:
+    """
+    Generate mentor responses that ask guiding questions based on context.
+    """
+    user_lower = user_message.lower()
+    topics = context.get("topics", [])
+    user_attempted = context.get("user_attempted_code", False)
+    # Hello World - Progressive questioning
+    if "print" in user_lower and ("hello" in user_lower or "world" in user_lower):
+        if user_attempted:
+            return "Good effort! What happened when you tried? Did you use parentheses and quotes? Try: function_name('your text here')"
+        return "What function do you think displays text in Python? Think about showing output to the user. What would such a function be called?"
+    # Calculator - Step by step guidance
+    if "calculator" in user_lower:
+        if "hello_world" in topics or len(context["user_messages"]) > 1:
+            return """Great! Since you understand output, let's build a calculator step by step:
+1. How do you get numbers from the user? (Think about input)
+2. What operations should it support? (+, -, *, /)
+3. How do you make decisions in code? (Think about choosing operations)
+Start with step 1 - what function gets user input? What type of data does it return?"""
+        return """Excellent project choice! Let's think through this:
+What are the main steps a calculator needs?
+1. Get first number from user
+2. Get operation (+, -, *, /)
+3. Get second number from user
+4. Calculate result
+5. Show result
+Which step should we tackle first? What function gets input from users?"""
+    # Variables
+    if "variable" in user_lower:
+        if user_attempted:
+            return "What symbol did you use to assign the value? In Python, we use = to store data. Try: name = value"
+        return "How do you think Python remembers information? What symbol might connect a name to a value? Think: name __ value"
+    # Functions
+    if "function" in user_lower and ("create" in user_lower or "define" in user_lower):
+        if "variables" in topics:
+            return """Good! You know variables. Functions are similar but hold code instead of data.
+What keyword do you think starts a function definition? Here's the pattern:
 ```
+______ function_name():
+    # code goes here
 ```
+What goes in the blank? How would you call it afterward?"""
+        return "What keyword do you think defines a function in Python? Functions are reusable blocks of code. Think about the word 'define'..."
+    # Lists
+    if "list" in user_lower and "python" in user_lower:
+        return "What symbols do you think hold multiple items together? Think about containers. Try creating: container_symbol item1, item2, item3 container_symbol"
+    # Input function help
+    if "input" in user_lower and ("not working" in user_lower or "error" in user_lower):
+        return "Good start with input()! What type of data does input() return - text or numbers? If you need to do math, what function converts text to numbers? Try looking up 'int()' or 'float()'."
+    # Math operations
+    if any(op in user_lower for op in ["+", "-", "*", "/", "add", "subtract", "multiply", "divide"]):
+        return "Great! You're thinking about operations. How do you make choices in code? If user picks '+', do addition. If '-', do subtraction. What code structure makes decisions based on conditions?"
+    # Default mentor response with context
+    if user_attempted:
+        return "I see you're experimenting - that's great! What specific part isn't working? What error do you see? Let's debug it step by step."
+    return "Interesting question! Let's break it down - what's your goal? What have you tried so far? What specific step are you stuck on?"
+def generate_force_response(user_message: str, context: dict) -> str:
     """
+    Generate direct answers for force mode.
     """
     user_lower = user_message.lower()
+    # Hello World
+    if "print" in user_lower and ("hello" in user_lower or "world" in user_lower):
+        return "Use `print('Hello World')`. This function outputs text to the console."
+    # Calculator - Complete working solution
+    if "calculator" in user_lower:
+        return '''Here's a complete calculator:
+```python
+# Get input from user
+num1 = float(input("Enter first number: "))
+operator = input("Enter operator (+, -, *, /): ")
+num2 = float(input("Enter second number: "))
+# Calculate based on operator
+if operator == '+':
+    result = num1 + num2
+elif operator == '-':
+    result = num1 - num2
+elif operator == '*':
+    result = num1 * num2
+elif operator == '/':
+    if num2 != 0:
+        result = num1 / num2
+    else:
+        result = "Error: Division by zero"
+else:
+    result = "Error: Invalid operator"
+# Display result
+print(f"Result: {result}")
+```
+This calculator gets two numbers and an operator, performs the calculation, and displays the result.'''
+    # Variables
+    if "variable" in user_lower:
+        return 'Create variables using the assignment operator: `name = value`. Examples: `x = 5`, `text = "hello"`, `pi = 3.14`. Variables store data for later use.'
+    # Functions
+    if "function" in user_lower and ("create" in user_lower or "define" in user_lower):
+        return '''Define functions with the `def` keyword:
 ```python
+def my_function():
+    return "Hello"
+def add_numbers(a, b):
+    return a + b
+# Call functions
+result = my_function()  # Returns "Hello"
+sum_result = add_numbers(5, 3)  # Returns 8
 ```
+Functions are reusable code blocks that can take parameters and return values.'''
+    # Lists
+    if "list" in user_lower and "python" in user_lower:
+        return 'Create lists with square brackets: `my_list = [1, 2, 3, "hello"]`. Access items with index: `my_list[0]` gets first item. Add items: `my_list.append(4)`.'
+    # Input function
+    if "input" in user_lower:
+        return 'Use `input("Your prompt: ")` to get user input. It returns a string. For numbers, convert with `int(input())` or `float(input())`. Example: `age = int(input("Enter age: "))`'
+    # Loops
+    if "loop" in user_lower:
+        return '''Two main types of loops:
+```python
+# For loop (known iterations)
+for i in range(5):
+    print(i)  # Prints 0 to 4
+# While loop (condition-based)
+count = 0
+while count < 5:
+    print(count)
+    count += 1
+```
+Use for loops when you know how many times to repeat, while loops for conditions.'''
+    # Default force response
+    return "I need more specific information to provide a direct answer. Please clarify what exactly you want to accomplish."
+def extract_clean_answer(full_response: str, formatted_prompt: str, user_message: str, context: dict, is_force_mode: bool) -> str:
     """
+    Enhanced cleaning for Qwen2-0.5B responses with context awareness.
     """
     if not full_response or len(full_response.strip()) < 5:
         return "I apologize, but I couldn't generate a response. Please try again."
     print(f"🔍 Raw response length: {len(full_response)}")
     print(f"🔍 Mode: {'FORCE' if is_force_mode else 'MENTOR'}")
+    print(f"🔍 Context topics: {context.get('topics', [])}")
+    # Use context-aware predefined responses first
     if is_force_mode:
+        predefined = generate_force_response(user_message, context)
+        if predefined != "I need more specific information to provide a direct answer. Please clarify what exactly you want to accomplish.":
+            print("✅ Using context-aware force response")
             return predefined
     else:
+        predefined = generate_mentor_response(user_message, context)
+        if predefined != "Interesting question! Let's break it down - what's your goal? What have you tried so far? What specific step are you stuck on?":
+            print("✅ Using context-aware mentor response")
             return predefined
+    # If no predefined response, clean the model output
     generated_text = full_response
     if formatted_prompt in full_response:
         parts = full_response.split(formatted_prompt)
         if len(parts) > 1:
             generated_text = parts[-1]
+    # Extract assistant content
     assistant_content = generated_text
     if "<|im_start|>assistant" in generated_text:
         assistant_parts = generated_text.split("<|im_start|>assistant")
         if len(assistant_parts) > 1:
             if "<|im_end|>" in assistant_content:
                 assistant_content = assistant_content.split("<|im_end|>")[0]
+    # Clean the response
     clean_text = assistant_content.strip()
     # Remove template tokens
     clean_text = re.sub(r'\n{3,}', '\n\n', clean_text)
     clean_text = clean_text.strip()
+    # Validate response matches mode
+    if not is_force_mode and clean_text:
+        # In mentor mode, response should ask questions or provide hints
+        if not any(marker in clean_text for marker in ['?', 'think', 'try', 'what', 'how', 'consider', 'break it down']):
+            # Model didn't follow mentor instructions, use fallback
+            return generate_mentor_response(user_message, context)
+    # Length control
+    if len(clean_text) > 600:
+        sentences = clean_text.split('. ')
+        if len(sentences) > 4:
+            clean_text = '. '.join(sentences[:4]) + '.'
+    # Fallback
     if not clean_text or len(clean_text) < 10:
         if is_force_mode:
+            return generate_force_response(user_message, context)
         else:
+            return generate_mentor_response(user_message, context)
     print(f"🧹 Final cleaned answer length: {len(clean_text)}")
     return clean_text
 def generate_response(messages: list, is_force_mode: bool = False, max_tokens: int = 200, temperature: float = 0.7) -> str:
     """
+    Enhanced generation with proper conversation history and context awareness.
     """
     try:
+        # Analyze conversation context
+        context = analyze_conversation_context(messages)
+        print(f"📊 Conversation context: {context}")
+        # Get the last user message
+        last_user_msg = ""
+        for msg in reversed(messages):
+            if msg.get("role") == "user":
+                last_user_msg = msg.get("content", "")
+                break
+        if not last_user_msg:
+            return "I didn't receive a message. Please ask me something!"
+        # Try context-aware predefined responses first
+        context_response = generate_force_response(last_user_msg, context) if is_force_mode else generate_mentor_response(last_user_msg, context)
+        # Check if we got a meaningful predefined response
+        if is_force_mode:
+            if context_response != "I need more specific information to provide a direct answer. Please clarify what exactly you want to accomplish.":
+                return context_response
+        else:
+            if context_response != "Interesting question! Let's break it down - what's your goal? What have you tried so far? What specific step are you stuck on?":
+                return context_response
+        # Fallback to model generation with conversation history
+        conversation_messages = []
+        # Add enhanced system prompt
+        system_prompt = get_enhanced_system_prompt(is_force_mode)
+        conversation_messages.append({"role": "system", "content": system_prompt})
+        # Add conversation history (last 6 messages: 3 user + 3 assistant)
+        recent_messages = messages[-6:] if len(messages) > 6 else messages
+        for msg in recent_messages:
+            if msg.get("role") in ["user", "assistant"] and msg.get("content"):
+                conversation_messages.append({
+                    "role": msg["role"],
+                    "content": msg["content"]
+                })
+        print(f"🔍 Processing {len(conversation_messages)} messages for Qwen2-0.5B in {'FORCE' if is_force_mode else 'MENTOR'} mode")
         # Apply chat template
         try:
             formatted_prompt = tokenizer.apply_chat_template(
+                conversation_messages,
                 tokenize=False,
                 add_generation_prompt=True
             )
         except Exception as e:
             print(f"⚠️ Chat template failed, using simple format: {e}")
+            formatted_prompt = f"System: {conversation_messages[0]['content']}\n"
+            for msg in conversation_messages[1:]:
+                formatted_prompt += f"{msg['role'].title()}: {msg['content']}\n"
+            formatted_prompt += "Assistant:"
+        # Tokenize
         inputs = tokenizer(
             formatted_prompt,
             return_tensors="pt",
             truncation=True,
+            max_length=1000
         )
+        # Generation parameters
         generation_params = {
             "input_ids": inputs.input_ids,
             "attention_mask": inputs.attention_mask,
         }
         if is_force_mode:
             generation_params.update({
+                "max_new_tokens": min(max_tokens, 200),
+                "temperature": 0.2,
+                "top_p": 0.8,
+                "top_k": 25,
                 "repetition_penalty": 1.05,
             })
         else:
             generation_params.update({
+                "max_new_tokens": min(max_tokens, 180),
+                "temperature": 0.4,
+                "top_p": 0.85,
+                "top_k": 35,
                 "repetition_penalty": 1.02,
             })
+        # Generate
         with torch.no_grad():
             outputs = model.generate(**generation_params)
         full_response = tokenizer.decode(outputs[0], skip_special_tokens=False)
+        # Clean and return with context
+        clean_answer = extract_clean_answer(full_response, formatted_prompt, last_user_msg, context, is_force_mode)
         return clean_answer
     except Exception as e:
         print(f"❌ Generation error with Qwen2-0.5B: {e}")
+        # Return context-appropriate fallback
+        if is_force_mode:
+            return "I encountered an error. Please try rephrasing your request more specifically."
+        else:
+            return "I had trouble processing that. What specific aspect would you like to explore? Can you break down your question?"
 # === Routes ===
 @app.get("/")
 def root():
     return {
+        "message": "🤖 Apollo AI Backend v2.1 - Qwen2-0.5B Context-Aware",
         "model": "Qwen/Qwen2-0.5B-Instruct with LoRA",
         "status": "ready",
+        "optimizations": ["context_aware", "conversation_history", "progressive_guidance"],
+        "features": ["mentor_mode", "force_mode", "context_analysis"],
         "modes": {
+            "mentor": "Guides learning with contextual questions",
+            "force": "Provides direct answers based on conversation"
         }
     }
         "status": "healthy",
         "model_loaded": True,
         "model_size": "0.5B",
+        "optimizations": "context_aware_responses"
     }
 @app.post("/v1/chat/completions")
     try:
         body = await request.json()
         messages = body.get("messages", [])
+        max_tokens = min(body.get("max_tokens", 200), 400)
+        temperature = max(0.1, min(body.get("temperature", 0.5), 0.8))
         is_force_mode = body.get("force_mode", False)
         if not messages or not isinstance(messages, list):
             )
     try:
+        print(f"📥 Processing context-aware request for Qwen2-0.5B in {'FORCE' if is_force_mode else 'MENTOR'} mode")
+        print(f"📊 Conversation length: {len(messages)} messages")
         response_content = generate_response(
             messages=messages,
             temperature=temperature
         )
         return {
             "id": f"chatcmpl-apollo-qwen05b-{hash(str(messages)) % 10000}",
             "object": "chat.completion",
             "created": int(torch.tensor(0).item()),
+            "model": f"qwen2-0.5b-{'force' if is_force_mode else 'mentor'}-contextaware",
             "choices": [
                 {
                     "index": 0,
                 "total_tokens": len(str(messages)) + len(response_content)
             },
             "apollo_mode": "force" if is_force_mode else "mentor",
+            "model_optimizations": "context_aware_conversation"
         }
     except Exception as e:
             content={"error": f"Internal server error: {str(e)}"}
         )
 @app.post("/test")
 async def test_generation(request: Request):
+    """Enhanced test endpoint with conversation context"""
     try:
         body = await request.json()
         prompt = body.get("prompt", "How do I print hello world in Python?")
+        max_tokens = min(body.get("max_tokens", 200), 400)
         test_both_modes = body.get("test_both_modes", True)
+        # Simulate conversation context
+        messages = [{"role": "user", "content": prompt}]
         results = {}
         # Test mentor mode
+        mentor_response = generate_response(messages, is_force_mode=False, max_tokens=max_tokens, temperature=0.4)
         results["mentor_mode"] = {
             "response": mentor_response,
             "length": len(mentor_response),
+            "mode": "mentor",
+            "asks_questions": "?" in mentor_response
         }
         if test_both_modes:
             # Test force mode
+            force_response = generate_response(messages, is_force_mode=True, max_tokens=max_tokens, temperature=0.2)
             results["force_mode"] = {
                 "response": force_response,
                 "length": len(force_response),
+                "mode": "force",
+                "provides_code": "```" in force_response or "`" in force_response
             }
         return {
             "prompt": prompt,
             "results": results,
             "model": "Qwen2-0.5B-Instruct",
+            "optimizations": "context_aware_conversation",
             "status": "success"
         }
 if __name__ == "__main__":
     import uvicorn
+    print("🚀 Starting Apollo AI Backend v2.1 - Context-Aware Qwen2-0.5B...")
     print("🧠 Model: Qwen/Qwen2-0.5B-Instruct (500M parameters)")
+    print("⚡ Optimizations: Context-aware responses, conversation history, progressive guidance")
+    print("🎯 Modes: Mentor (guided questions) vs Force (direct answers)")
     uvicorn.run(app, host="0.0.0.0", port=7860)