Ais commited on
Commit
84677b5
·
verified ·
1 Parent(s): b397650

Update app/main.py

Browse files
Files changed (1) hide show
  1. app/main.py +358 -209
app/main.py CHANGED
@@ -44,164 +44,309 @@ model.eval()
44
 
45
  print("✅ Qwen2-0.5B model ready with optimized settings!")
46
 
47
- def get_simple_system_prompt(is_force_mode: bool) -> str:
48
  """
49
- SIMPLIFIED system prompts optimized for Qwen2-0.5B's 500M parameters.
50
- Shorter, clearer instructions that small models can follow better.
51
  """
52
  if is_force_mode:
53
- return """You are Apollo AI. Give direct, complete answers.
54
-
55
- Rules:
56
- - Provide full working code
57
- - Be concise, max 3 sentences explanation
58
- - Never ask questions back
59
- - Give complete solutions immediately
60
-
61
- Example:
62
- User: "print hello world python"
63
- You: "Use print('Hello World'). This outputs text to console."
64
- """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
65
  else:
66
- return """You are Apollo AI tutor. Guide learning with questions.
67
 
68
- Rules:
69
- - Ask guiding questions instead of giving answers
70
- - Never give complete working code
71
- - Use hints and partial examples only
72
- - Make students think and discover
 
73
 
74
- Example:
75
- User: "print hello world python"
76
- You: "What function displays text in Python? Try looking up output functions."
77
- """
78
 
79
- def create_simple_force_responses(user_message: str) -> str:
 
 
 
 
 
 
 
 
 
 
 
 
80
  """
81
- Pre-defined responses for common questions in force mode.
82
- This helps the 0.5B model give consistent direct answers.
83
  """
84
- user_lower = user_message.lower()
 
 
 
 
 
 
 
85
 
86
- # Python print
87
- if 'print' in user_lower and ('hello' in user_lower or 'world' in user_lower):
88
- return 'Use `print("Hello World")`. This function outputs text to the console.'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89
 
90
- # Basic math
91
- if '2+2' in user_lower or '2 + 2' in user_lower:
92
- return '2 + 2 = 4. Addition combines two numbers to get their sum.'
 
 
93
 
94
- # Python variable
95
- if 'variable' in user_lower and ('python' in user_lower or 'create' in user_lower):
96
- return 'Use `name = "value"`. Variables store data: `x = 5` or `text = "hello"`.'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
97
 
98
- # Python list
99
- if 'list' in user_lower and 'python' in user_lower and 'create' in user_lower:
100
- return 'Use square brackets: `my_list = [1, 2, 3]`. Lists store multiple items.'
 
 
101
 
102
- # Python function
103
- if 'function' in user_lower and 'python' in user_lower and ('create' in user_lower or 'define' in user_lower):
104
- return '''Use def keyword:
105
- ```python
106
- def my_function():
107
- return "Hello"
108
  ```
109
- Functions are reusable code blocks.'''
110
-
111
- # Calculator
112
- if 'calculator' in user_lower and ('create' in user_lower or 'make' in user_lower or 'build' in user_lower):
113
- return '''Here's a simple calculator:
114
- ```python
115
- a = float(input("First number: "))
116
- b = float(input("Second number: "))
117
- op = input("Operator (+,-,*,/): ")
118
- if op == '+': print(a + b)
119
- elif op == '-': print(a - b)
120
- elif op == '*': print(a * b)
121
- elif op == '/': print(a / b)
122
  ```
123
- This performs basic math operations.'''
 
 
 
 
 
124
 
125
- return None
 
 
 
 
 
 
 
 
 
 
 
 
126
 
127
- def create_simple_mentor_responses(user_message: str) -> str:
128
  """
129
- Pre-defined mentor responses for common questions.
130
- This helps the 0.5B model give consistent guided learning.
131
  """
132
  user_lower = user_message.lower()
133
 
134
- # Python print
135
- if 'print' in user_lower and ('hello' in user_lower or 'world' in user_lower):
136
- return 'What function do you think displays text in Python? Think about showing output. What would it be called?'
137
 
138
- # Basic math
139
- if '2+2' in user_lower or '2 + 2' in user_lower:
140
- return 'What do you think 2 + 2 equals? Try calculating it step by step.'
141
-
142
- # Python variable
143
- if 'variable' in user_lower and ('python' in user_lower or 'create' in user_lower):
144
- return 'How do you think Python stores data? What symbol might assign a value to a name? Try: name = value'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
145
 
146
- # Python list
147
- if 'list' in user_lower and 'python' in user_lower and 'create' in user_lower:
148
- return 'What brackets do you think hold multiple items? Try making a list with [item1, item2]. What goes inside?'
149
 
150
- # Python function
151
- if 'function' in user_lower and 'python' in user_lower and ('create' in user_lower or 'define' in user_lower):
152
- return '''What keyword defines a function in Python? Try this structure:
 
153
  ```python
154
- ___ function_name():
155
- # your code here
 
 
 
 
 
 
 
156
  ```
157
- What goes in the blank? How would you call it?'''
 
 
 
 
 
 
 
 
 
158
 
159
- # Calculator
160
- if 'calculator' in user_lower and ('create' in user_lower or 'make' in user_lower or 'build' in user_lower):
161
- return '''What steps would a calculator need?
162
- 1. Get two numbers from user - what function gets input?
163
- 2. Get operation (+,-,*,/) - how to choose?
164
- 3. Calculate result - what structure handles choices?
165
- 4. Show result - what displays output?
166
 
167
- Try building step 1 first. What function gets user input?'''
 
 
 
 
 
 
 
 
 
 
 
 
168
 
169
- return None
 
170
 
171
- def extract_clean_answer(full_response: str, formatted_prompt: str, user_message: str, is_force_mode: bool) -> str:
172
  """
173
- Optimized cleaning for Qwen2-0.5B responses.
174
- Simpler extraction since 0.5B models produce cleaner output.
175
  """
176
  if not full_response or len(full_response.strip()) < 5:
177
  return "I apologize, but I couldn't generate a response. Please try again."
178
 
179
  print(f"🔍 Raw response length: {len(full_response)}")
180
  print(f"🔍 Mode: {'FORCE' if is_force_mode else 'MENTOR'}")
 
181
 
182
- # Check for pre-defined responses first
183
  if is_force_mode:
184
- predefined = create_simple_force_responses(user_message)
185
- if predefined:
186
- print("✅ Using predefined force response")
187
  return predefined
188
  else:
189
- predefined = create_simple_mentor_responses(user_message)
190
- if predefined:
191
- print("✅ Using predefined mentor response")
192
  return predefined
193
 
194
- # Step 1: Remove the input prompt
195
  generated_text = full_response
196
  if formatted_prompt in full_response:
197
  parts = full_response.split(formatted_prompt)
198
  if len(parts) > 1:
199
  generated_text = parts[-1]
200
 
201
- # Step 2: Extract assistant content - simplified for 0.5B
202
  assistant_content = generated_text
203
 
204
- # Look for assistant markers
205
  if "<|im_start|>assistant" in generated_text:
206
  assistant_parts = generated_text.split("<|im_start|>assistant")
207
  if len(assistant_parts) > 1:
@@ -209,7 +354,7 @@ def extract_clean_answer(full_response: str, formatted_prompt: str, user_message
209
  if "<|im_end|>" in assistant_content:
210
  assistant_content = assistant_content.split("<|im_end|>")[0]
211
 
212
- # Step 3: Basic cleaning - gentler for 0.5B
213
  clean_text = assistant_content.strip()
214
 
215
  # Remove template tokens
@@ -225,89 +370,100 @@ def extract_clean_answer(full_response: str, formatted_prompt: str, user_message
225
  clean_text = re.sub(r'\n{3,}', '\n\n', clean_text)
226
  clean_text = clean_text.strip()
227
 
228
- # Step 4: Fallback handling for 0.5B
 
 
 
 
 
 
 
 
 
 
 
 
 
229
  if not clean_text or len(clean_text) < 10:
230
  if is_force_mode:
231
- return "Could you please be more specific about what you need?"
232
  else:
233
- return "What specific aspect would you like to explore? What's your approach?"
234
-
235
- # Step 5: Length control for 0.5B
236
- if len(clean_text) > 500: # Keep responses shorter for 0.5B
237
- sentences = clean_text.split('. ')
238
- if len(sentences) > 3:
239
- clean_text = '. '.join(sentences[:3]) + '.'
240
 
241
  print(f"🧹 Final cleaned answer length: {len(clean_text)}")
242
-
243
  return clean_text
244
 
245
  def generate_response(messages: list, is_force_mode: bool = False, max_tokens: int = 200, temperature: float = 0.7) -> str:
246
  """
247
- Optimized generation for Qwen2-0.5B with shorter contexts and conservative settings.
248
  """
249
  try:
250
- # Check for simple predefined responses first
251
- if messages and len(messages) > 0:
252
- last_user_msg = ""
253
- for msg in reversed(messages):
254
- if msg.get("role") == "user":
255
- last_user_msg = msg.get("content", "")
256
- break
257
-
258
- if last_user_msg:
259
- if is_force_mode:
260
- predefined = create_simple_force_responses(last_user_msg)
261
- if predefined:
262
- return predefined
263
- else:
264
- predefined = create_simple_mentor_responses(last_user_msg)
265
- if predefined:
266
- return predefined
 
 
 
 
 
 
 
267
 
268
- # Build simple conversation for 0.5B model
269
- clean_messages = []
270
 
271
- # Add simple system prompt
272
- system_prompt = get_simple_system_prompt(is_force_mode)
273
- clean_messages.append({
274
- "role": "system",
275
- "content": system_prompt
276
- })
277
 
278
- # Add only the last user message to keep context short for 0.5B
279
- if messages and len(messages) > 0:
280
- for msg in reversed(messages):
281
- if msg.get("role") == "user":
282
- clean_messages.append({
283
- "role": "user",
284
- "content": msg.get("content", "")
285
- })
286
- break
287
 
288
- print(f"🔍 Processing {len(clean_messages)} messages for Qwen2-0.5B in {'FORCE' if is_force_mode else 'MENTOR'} mode")
289
 
290
  # Apply chat template
291
  try:
292
  formatted_prompt = tokenizer.apply_chat_template(
293
- clean_messages,
294
  tokenize=False,
295
  add_generation_prompt=True
296
  )
297
  except Exception as e:
298
  print(f"⚠️ Chat template failed, using simple format: {e}")
299
- # Fallback to simple format
300
- formatted_prompt = f"System: {clean_messages[0]['content']}\nUser: {clean_messages[1]['content']}\nAssistant:"
 
 
301
 
302
- # Tokenize with conservative limits for 0.5B
303
  inputs = tokenizer(
304
  formatted_prompt,
305
  return_tensors="pt",
306
  truncation=True,
307
- max_length=800 # Shorter context for 0.5B
308
  )
309
 
310
- # Conservative generation settings for 0.5B model
311
  generation_params = {
312
  "input_ids": inputs.input_ids,
313
  "attention_mask": inputs.attention_mask,
@@ -317,60 +473,53 @@ def generate_response(messages: list, is_force_mode: bool = False, max_tokens: i
317
  }
318
 
319
  if is_force_mode:
320
- # Force mode: Very conservative for 0.5B
321
  generation_params.update({
322
- "max_new_tokens": min(max_tokens, 150), # Very short
323
- "temperature": 0.1, # Very focused
324
- "top_p": 0.7,
325
- "top_k": 20,
326
  "repetition_penalty": 1.05,
327
  })
328
  else:
329
- # Mentor mode: Still conservative but allows more creativity
330
  generation_params.update({
331
- "max_new_tokens": min(max_tokens, 200),
332
- "temperature": 0.3, # Lower than original
333
- "top_p": 0.8,
334
- "top_k": 30,
335
  "repetition_penalty": 1.02,
336
  })
337
 
338
- # Generate with timeout for 0.5B
339
  with torch.no_grad():
340
  outputs = model.generate(**generation_params)
341
 
342
- # Decode response
343
  full_response = tokenizer.decode(outputs[0], skip_special_tokens=False)
344
 
345
- # Extract user message for context
346
- user_message = ""
347
- for msg in reversed(clean_messages):
348
- if msg.get("role") == "user":
349
- user_message = msg.get("content", "")
350
- break
351
-
352
- # Clean and return
353
- clean_answer = extract_clean_answer(full_response, formatted_prompt, user_message, is_force_mode)
354
 
355
  return clean_answer
356
 
357
  except Exception as e:
358
  print(f"❌ Generation error with Qwen2-0.5B: {e}")
359
- mode_text = "direct answer" if is_force_mode else "guided learning"
360
- return f"I encountered an error generating a {mode_text}. Please try a simpler question."
 
 
 
361
 
362
  # === Routes ===
363
  @app.get("/")
364
  def root():
365
  return {
366
- "message": "🤖 Apollo AI Backend v2.1 - Qwen2-0.5B Optimized",
367
  "model": "Qwen/Qwen2-0.5B-Instruct with LoRA",
368
  "status": "ready",
369
- "optimizations": ["short_contexts", "conservative_generation", "predefined_responses"],
370
- "features": ["mentor_mode", "force_mode", "0.5B_optimized"],
371
  "modes": {
372
- "mentor": "Guides learning with simple questions",
373
- "force": "Provides direct answers quickly"
374
  }
375
  }
376
 
@@ -380,7 +529,7 @@ def health():
380
  "status": "healthy",
381
  "model_loaded": True,
382
  "model_size": "0.5B",
383
- "optimizations": "qwen2_0.5B_specific"
384
  }
385
 
386
  @app.post("/v1/chat/completions")
@@ -404,10 +553,9 @@ async def chat_completions(request: Request):
404
  try:
405
  body = await request.json()
406
  messages = body.get("messages", [])
407
- max_tokens = min(body.get("max_tokens", 200), 300) # Cap at 300 for 0.5B
408
- temperature = max(0.1, min(body.get("temperature", 0.5), 0.8)) # Conservative range
409
 
410
- # Get mode information
411
  is_force_mode = body.get("force_mode", False)
412
 
413
  if not messages or not isinstance(messages, list):
@@ -428,8 +576,8 @@ async def chat_completions(request: Request):
428
  )
429
 
430
  try:
431
- print(f"📥 Processing request for Qwen2-0.5B in {'FORCE' if is_force_mode else 'MENTOR'} mode")
432
- print(f"📊 Settings: max_tokens={max_tokens}, temperature={temperature}")
433
 
434
  response_content = generate_response(
435
  messages=messages,
@@ -438,12 +586,11 @@ async def chat_completions(request: Request):
438
  temperature=temperature
439
  )
440
 
441
- # Return OpenAI-compatible response
442
  return {
443
  "id": f"chatcmpl-apollo-qwen05b-{hash(str(messages)) % 10000}",
444
  "object": "chat.completion",
445
  "created": int(torch.tensor(0).item()),
446
- "model": f"qwen2-0.5b-{'force' if is_force_mode else 'mentor'}-mode",
447
  "choices": [
448
  {
449
  "index": 0,
@@ -460,7 +607,7 @@ async def chat_completions(request: Request):
460
  "total_tokens": len(str(messages)) + len(response_content)
461
  },
462
  "apollo_mode": "force" if is_force_mode else "mentor",
463
- "model_optimizations": "qwen2_0.5B_specific"
464
  }
465
 
466
  except Exception as e:
@@ -470,42 +617,44 @@ async def chat_completions(request: Request):
470
  content={"error": f"Internal server error: {str(e)}"}
471
  )
472
 
473
- # === Test endpoint optimized for 0.5B ===
474
  @app.post("/test")
475
  async def test_generation(request: Request):
476
- """Test endpoint for debugging both modes with 0.5B optimizations"""
477
  try:
478
  body = await request.json()
479
  prompt = body.get("prompt", "How do I print hello world in Python?")
480
- max_tokens = min(body.get("max_tokens", 200), 300)
481
  test_both_modes = body.get("test_both_modes", True)
482
 
 
 
 
483
  results = {}
484
 
485
  # Test mentor mode
486
- messages_mentor = [{"role": "user", "content": prompt}]
487
- mentor_response = generate_response(messages_mentor, is_force_mode=False, max_tokens=max_tokens, temperature=0.3)
488
  results["mentor_mode"] = {
489
  "response": mentor_response,
490
  "length": len(mentor_response),
491
- "mode": "mentor"
 
492
  }
493
 
494
  if test_both_modes:
495
  # Test force mode
496
- messages_force = [{"role": "user", "content": prompt}]
497
- force_response = generate_response(messages_force, is_force_mode=True, max_tokens=max_tokens, temperature=0.1)
498
  results["force_mode"] = {
499
  "response": force_response,
500
  "length": len(force_response),
501
- "mode": "force"
 
502
  }
503
 
504
  return {
505
  "prompt": prompt,
506
  "results": results,
507
  "model": "Qwen2-0.5B-Instruct",
508
- "optimizations": "0.5B_specific",
509
  "status": "success"
510
  }
511
 
@@ -517,8 +666,8 @@ async def test_generation(request: Request):
517
 
518
  if __name__ == "__main__":
519
  import uvicorn
520
- print("🚀 Starting Apollo AI Backend v2.1 - Qwen2-0.5B Optimized...")
521
  print("🧠 Model: Qwen/Qwen2-0.5B-Instruct (500M parameters)")
522
- print("⚡ Optimizations: Short contexts, conservative generation, predefined responses")
523
- print("🎯 Modes: Mentor (simple questions) vs Force (direct answers)")
524
  uvicorn.run(app, host="0.0.0.0", port=7860)
 
44
 
45
  print("✅ Qwen2-0.5B model ready with optimized settings!")
46
 
47
+ def get_enhanced_system_prompt(is_force_mode: bool) -> str:
48
  """
49
+ Enhanced system prompts that clearly define behavior for Qwen2-0.5B.
 
50
  """
51
  if is_force_mode:
52
+ return """You are Apollo AI in DIRECT ANSWER mode. You must give complete, working solutions immediately.
53
+
54
+ STRICT RULES:
55
+ - Provide full working code when asked
56
+ - Give direct explanations (max 2-3 sentences)
57
+ - NEVER ask questions back to the user
58
+ - Always give complete solutions
59
+ - Be concise but thorough
60
+
61
+ EXAMPLES:
62
+ User: "How do I print hello world in Python?"
63
+ You: "Use `print('Hello World')`. This function outputs text to the console."
64
+
65
+ User: "Create a calculator in Python"
66
+ You: "Here's a simple calculator:
67
+ ```python
68
+ a = float(input('First number: '))
69
+ b = float(input('Second number: '))
70
+ op = input('Operator (+,-,*,/): ')
71
+ if op == '+': print(a + b)
72
+ elif op == '-': print(a - b)
73
+ elif op == '*': print(a * b)
74
+ elif op == '/': print(a / b)
75
+ ```
76
+ This performs basic math operations on two numbers."
77
+
78
+ REMEMBER: Give direct answers, not questions. Provide working code."""
79
+
80
  else:
81
+ return """You are Apollo AI in MENTOR mode. You must guide learning through questions and hints only.
82
 
83
+ STRICT RULES:
84
+ - ASK guiding questions instead of giving direct answers
85
+ - NEVER provide complete working code
86
+ - Give hints and partial examples only
87
+ - Make the user think and discover the solution
88
+ - Build on their previous attempts
89
 
90
+ EXAMPLES:
91
+ User: "How do I print hello world in Python?"
92
+ You: "What function do you think displays text in Python? Think about showing output to the user. What would such a function be called?"
 
93
 
94
+ User: "Create a calculator in Python"
95
+ You: "Great project! Let's break it down step by step:
96
+ 1. What information would a calculator need from the user?
97
+ 2. How would you get input from someone using your program?
98
+ 3. What operations should it support?
99
+ Start with step 1 - what function gets user input in Python?"
100
+
101
+ User: "I tried input() but it's not working"
102
+ You: "Good start with input()! What type of data does input() return? If you need to do math, what might you need to convert it to? Try looking up type conversion functions."
103
+
104
+ REMEMBER: Guide with questions, never give direct answers or complete code."""
105
+
106
+ def analyze_conversation_context(messages: list) -> dict:
107
  """
108
+ Analyze conversation history to understand context and user progress.
 
109
  """
110
+ context = {
111
+ "user_messages": [],
112
+ "assistant_messages": [],
113
+ "topics": [],
114
+ "user_attempted_code": False,
115
+ "user_stuck": False,
116
+ "repeated_questions": 0
117
+ }
118
 
119
+ # Extract recent messages
120
+ for msg in messages[-6:]: # Last 6 messages
121
+ if msg.get("role") == "user":
122
+ content = msg.get("content", "").lower()
123
+ context["user_messages"].append(msg.get("content", ""))
124
+
125
+ # Check if user attempted code
126
+ if any(keyword in content for keyword in ["tried", "attempted", "doesn't work", "error", "not working"]):
127
+ context["user_attempted_code"] = True
128
+
129
+ # Detect topic
130
+ if "calculator" in content:
131
+ context["topics"].append("calculator")
132
+ elif "print" in content and "hello" in content:
133
+ context["topics"].append("hello_world")
134
+ elif "function" in content:
135
+ context["topics"].append("functions")
136
+ elif "list" in content:
137
+ context["topics"].append("lists")
138
+ elif "variable" in content:
139
+ context["topics"].append("variables")
140
+
141
+ elif msg.get("role") == "assistant":
142
+ context["assistant_messages"].append(msg.get("content", ""))
143
+
144
+ # Check if user seems stuck (repeated similar questions)
145
+ if len(context["user_messages"]) >= 2:
146
+ last_two = context["user_messages"][-2:]
147
+ if any(word in last_two[0].lower() and word in last_two[1].lower()
148
+ for word in ["how", "what", "help", "create", "make"]):
149
+ context["repeated_questions"] += 1
150
+
151
+ return context
152
+
153
+ def generate_mentor_response(user_message: str, context: dict) -> str:
154
+ """
155
+ Generate mentor responses that ask guiding questions based on context.
156
+ """
157
+ user_lower = user_message.lower()
158
+ topics = context.get("topics", [])
159
+ user_attempted = context.get("user_attempted_code", False)
160
 
161
+ # Hello World - Progressive questioning
162
+ if "print" in user_lower and ("hello" in user_lower or "world" in user_lower):
163
+ if user_attempted:
164
+ return "Good effort! What happened when you tried? Did you use parentheses and quotes? Try: function_name('your text here')"
165
+ return "What function do you think displays text in Python? Think about showing output to the user. What would such a function be called?"
166
 
167
+ # Calculator - Step by step guidance
168
+ if "calculator" in user_lower:
169
+ if "hello_world" in topics or len(context["user_messages"]) > 1:
170
+ return """Great! Since you understand output, let's build a calculator step by step:
171
+
172
+ 1. How do you get numbers from the user? (Think about input)
173
+ 2. What operations should it support? (+, -, *, /)
174
+ 3. How do you make decisions in code? (Think about choosing operations)
175
+
176
+ Start with step 1 - what function gets user input? What type of data does it return?"""
177
+ return """Excellent project choice! Let's think through this:
178
+
179
+ What are the main steps a calculator needs?
180
+ 1. Get first number from user
181
+ 2. Get operation (+, -, *, /)
182
+ 3. Get second number from user
183
+ 4. Calculate result
184
+ 5. Show result
185
+
186
+ Which step should we tackle first? What function gets input from users?"""
187
 
188
+ # Variables
189
+ if "variable" in user_lower:
190
+ if user_attempted:
191
+ return "What symbol did you use to assign the value? In Python, we use = to store data. Try: name = value"
192
+ return "How do you think Python remembers information? What symbol might connect a name to a value? Think: name __ value"
193
 
194
+ # Functions
195
+ if "function" in user_lower and ("create" in user_lower or "define" in user_lower):
196
+ if "variables" in topics:
197
+ return """Good! You know variables. Functions are similar but hold code instead of data.
198
+
199
+ What keyword do you think starts a function definition? Here's the pattern:
200
  ```
201
+ ______ function_name():
202
+ # code goes here
 
 
 
 
 
 
 
 
 
 
 
203
  ```
204
+ What goes in the blank? How would you call it afterward?"""
205
+ return "What keyword do you think defines a function in Python? Functions are reusable blocks of code. Think about the word 'define'..."
206
+
207
+ # Lists
208
+ if "list" in user_lower and "python" in user_lower:
209
+ return "What symbols do you think hold multiple items together? Think about containers. Try creating: container_symbol item1, item2, item3 container_symbol"
210
 
211
+ # Input function help
212
+ if "input" in user_lower and ("not working" in user_lower or "error" in user_lower):
213
+ return "Good start with input()! What type of data does input() return - text or numbers? If you need to do math, what function converts text to numbers? Try looking up 'int()' or 'float()'."
214
+
215
+ # Math operations
216
+ if any(op in user_lower for op in ["+", "-", "*", "/", "add", "subtract", "multiply", "divide"]):
217
+ return "Great! You're thinking about operations. How do you make choices in code? If user picks '+', do addition. If '-', do subtraction. What code structure makes decisions based on conditions?"
218
+
219
+ # Default mentor response with context
220
+ if user_attempted:
221
+ return "I see you're experimenting - that's great! What specific part isn't working? What error do you see? Let's debug it step by step."
222
+
223
+ return "Interesting question! Let's break it down - what's your goal? What have you tried so far? What specific step are you stuck on?"
224
 
225
+ def generate_force_response(user_message: str, context: dict) -> str:
226
  """
227
+ Generate direct answers for force mode.
 
228
  """
229
  user_lower = user_message.lower()
230
 
231
+ # Hello World
232
+ if "print" in user_lower and ("hello" in user_lower or "world" in user_lower):
233
+ return "Use `print('Hello World')`. This function outputs text to the console."
234
 
235
+ # Calculator - Complete working solution
236
+ if "calculator" in user_lower:
237
+ return '''Here's a complete calculator:
238
+
239
+ ```python
240
+ # Get input from user
241
+ num1 = float(input("Enter first number: "))
242
+ operator = input("Enter operator (+, -, *, /): ")
243
+ num2 = float(input("Enter second number: "))
244
+
245
+ # Calculate based on operator
246
+ if operator == '+':
247
+ result = num1 + num2
248
+ elif operator == '-':
249
+ result = num1 - num2
250
+ elif operator == '*':
251
+ result = num1 * num2
252
+ elif operator == '/':
253
+ if num2 != 0:
254
+ result = num1 / num2
255
+ else:
256
+ result = "Error: Division by zero"
257
+ else:
258
+ result = "Error: Invalid operator"
259
+
260
+ # Display result
261
+ print(f"Result: {result}")
262
+ ```
263
+
264
+ This calculator gets two numbers and an operator, performs the calculation, and displays the result.'''
265
 
266
+ # Variables
267
+ if "variable" in user_lower:
268
+ return 'Create variables using the assignment operator: `name = value`. Examples: `x = 5`, `text = "hello"`, `pi = 3.14`. Variables store data for later use.'
269
 
270
+ # Functions
271
+ if "function" in user_lower and ("create" in user_lower or "define" in user_lower):
272
+ return '''Define functions with the `def` keyword:
273
+
274
  ```python
275
+ def my_function():
276
+ return "Hello"
277
+
278
+ def add_numbers(a, b):
279
+ return a + b
280
+
281
+ # Call functions
282
+ result = my_function() # Returns "Hello"
283
+ sum_result = add_numbers(5, 3) # Returns 8
284
  ```
285
+
286
+ Functions are reusable code blocks that can take parameters and return values.'''
287
+
288
+ # Lists
289
+ if "list" in user_lower and "python" in user_lower:
290
+ return 'Create lists with square brackets: `my_list = [1, 2, 3, "hello"]`. Access items with index: `my_list[0]` gets first item. Add items: `my_list.append(4)`.'
291
+
292
+ # Input function
293
+ if "input" in user_lower:
294
+ return 'Use `input("Your prompt: ")` to get user input. It returns a string. For numbers, convert with `int(input())` or `float(input())`. Example: `age = int(input("Enter age: "))`'
295
 
296
+ # Loops
297
+ if "loop" in user_lower:
298
+ return '''Two main types of loops:
 
 
 
 
299
 
300
+ ```python
301
+ # For loop (known iterations)
302
+ for i in range(5):
303
+ print(i) # Prints 0 to 4
304
+
305
+ # While loop (condition-based)
306
+ count = 0
307
+ while count < 5:
308
+ print(count)
309
+ count += 1
310
+ ```
311
+
312
+ Use for loops when you know how many times to repeat, while loops for conditions.'''
313
 
314
+ # Default force response
315
+ return "I need more specific information to provide a direct answer. Please clarify what exactly you want to accomplish."
316
 
317
+ def extract_clean_answer(full_response: str, formatted_prompt: str, user_message: str, context: dict, is_force_mode: bool) -> str:
318
  """
319
+ Enhanced cleaning for Qwen2-0.5B responses with context awareness.
 
320
  """
321
  if not full_response or len(full_response.strip()) < 5:
322
  return "I apologize, but I couldn't generate a response. Please try again."
323
 
324
  print(f"🔍 Raw response length: {len(full_response)}")
325
  print(f"🔍 Mode: {'FORCE' if is_force_mode else 'MENTOR'}")
326
+ print(f"🔍 Context topics: {context.get('topics', [])}")
327
 
328
+ # Use context-aware predefined responses first
329
  if is_force_mode:
330
+ predefined = generate_force_response(user_message, context)
331
+ if predefined != "I need more specific information to provide a direct answer. Please clarify what exactly you want to accomplish.":
332
+ print("✅ Using context-aware force response")
333
  return predefined
334
  else:
335
+ predefined = generate_mentor_response(user_message, context)
336
+ if predefined != "Interesting question! Let's break it down - what's your goal? What have you tried so far? What specific step are you stuck on?":
337
+ print("✅ Using context-aware mentor response")
338
  return predefined
339
 
340
+ # If no predefined response, clean the model output
341
  generated_text = full_response
342
  if formatted_prompt in full_response:
343
  parts = full_response.split(formatted_prompt)
344
  if len(parts) > 1:
345
  generated_text = parts[-1]
346
 
347
+ # Extract assistant content
348
  assistant_content = generated_text
349
 
 
350
  if "<|im_start|>assistant" in generated_text:
351
  assistant_parts = generated_text.split("<|im_start|>assistant")
352
  if len(assistant_parts) > 1:
 
354
  if "<|im_end|>" in assistant_content:
355
  assistant_content = assistant_content.split("<|im_end|>")[0]
356
 
357
+ # Clean the response
358
  clean_text = assistant_content.strip()
359
 
360
  # Remove template tokens
 
370
  clean_text = re.sub(r'\n{3,}', '\n\n', clean_text)
371
  clean_text = clean_text.strip()
372
 
373
+ # Validate response matches mode
374
+ if not is_force_mode and clean_text:
375
+ # In mentor mode, response should ask questions or provide hints
376
+ if not any(marker in clean_text for marker in ['?', 'think', 'try', 'what', 'how', 'consider', 'break it down']):
377
+ # Model didn't follow mentor instructions, use fallback
378
+ return generate_mentor_response(user_message, context)
379
+
380
+ # Length control
381
+ if len(clean_text) > 600:
382
+ sentences = clean_text.split('. ')
383
+ if len(sentences) > 4:
384
+ clean_text = '. '.join(sentences[:4]) + '.'
385
+
386
+ # Fallback
387
  if not clean_text or len(clean_text) < 10:
388
  if is_force_mode:
389
+ return generate_force_response(user_message, context)
390
  else:
391
+ return generate_mentor_response(user_message, context)
 
 
 
 
 
 
392
 
393
  print(f"🧹 Final cleaned answer length: {len(clean_text)}")
 
394
  return clean_text
395
 
396
  def generate_response(messages: list, is_force_mode: bool = False, max_tokens: int = 200, temperature: float = 0.7) -> str:
397
  """
398
+ Enhanced generation with proper conversation history and context awareness.
399
  """
400
  try:
401
+ # Analyze conversation context
402
+ context = analyze_conversation_context(messages)
403
+ print(f"📊 Conversation context: {context}")
404
+
405
+ # Get the last user message
406
+ last_user_msg = ""
407
+ for msg in reversed(messages):
408
+ if msg.get("role") == "user":
409
+ last_user_msg = msg.get("content", "")
410
+ break
411
+
412
+ if not last_user_msg:
413
+ return "I didn't receive a message. Please ask me something!"
414
+
415
+ # Try context-aware predefined responses first
416
+ context_response = generate_force_response(last_user_msg, context) if is_force_mode else generate_mentor_response(last_user_msg, context)
417
+
418
+ # Check if we got a meaningful predefined response
419
+ if is_force_mode:
420
+ if context_response != "I need more specific information to provide a direct answer. Please clarify what exactly you want to accomplish.":
421
+ return context_response
422
+ else:
423
+ if context_response != "Interesting question! Let's break it down - what's your goal? What have you tried so far? What specific step are you stuck on?":
424
+ return context_response
425
 
426
+ # Fallback to model generation with conversation history
427
+ conversation_messages = []
428
 
429
+ # Add enhanced system prompt
430
+ system_prompt = get_enhanced_system_prompt(is_force_mode)
431
+ conversation_messages.append({"role": "system", "content": system_prompt})
 
 
 
432
 
433
+ # Add conversation history (last 6 messages: 3 user + 3 assistant)
434
+ recent_messages = messages[-6:] if len(messages) > 6 else messages
435
+ for msg in recent_messages:
436
+ if msg.get("role") in ["user", "assistant"] and msg.get("content"):
437
+ conversation_messages.append({
438
+ "role": msg["role"],
439
+ "content": msg["content"]
440
+ })
 
441
 
442
+ print(f"🔍 Processing {len(conversation_messages)} messages for Qwen2-0.5B in {'FORCE' if is_force_mode else 'MENTOR'} mode")
443
 
444
  # Apply chat template
445
  try:
446
  formatted_prompt = tokenizer.apply_chat_template(
447
+ conversation_messages,
448
  tokenize=False,
449
  add_generation_prompt=True
450
  )
451
  except Exception as e:
452
  print(f"⚠️ Chat template failed, using simple format: {e}")
453
+ formatted_prompt = f"System: {conversation_messages[0]['content']}\n"
454
+ for msg in conversation_messages[1:]:
455
+ formatted_prompt += f"{msg['role'].title()}: {msg['content']}\n"
456
+ formatted_prompt += "Assistant:"
457
 
458
+ # Tokenize
459
  inputs = tokenizer(
460
  formatted_prompt,
461
  return_tensors="pt",
462
  truncation=True,
463
+ max_length=1000
464
  )
465
 
466
+ # Generation parameters
467
  generation_params = {
468
  "input_ids": inputs.input_ids,
469
  "attention_mask": inputs.attention_mask,
 
473
  }
474
 
475
  if is_force_mode:
 
476
  generation_params.update({
477
+ "max_new_tokens": min(max_tokens, 200),
478
+ "temperature": 0.2,
479
+ "top_p": 0.8,
480
+ "top_k": 25,
481
  "repetition_penalty": 1.05,
482
  })
483
  else:
 
484
  generation_params.update({
485
+ "max_new_tokens": min(max_tokens, 180),
486
+ "temperature": 0.4,
487
+ "top_p": 0.85,
488
+ "top_k": 35,
489
  "repetition_penalty": 1.02,
490
  })
491
 
492
+ # Generate
493
  with torch.no_grad():
494
  outputs = model.generate(**generation_params)
495
 
 
496
  full_response = tokenizer.decode(outputs[0], skip_special_tokens=False)
497
 
498
+ # Clean and return with context
499
+ clean_answer = extract_clean_answer(full_response, formatted_prompt, last_user_msg, context, is_force_mode)
 
 
 
 
 
 
 
500
 
501
  return clean_answer
502
 
503
  except Exception as e:
504
  print(f"❌ Generation error with Qwen2-0.5B: {e}")
505
+ # Return context-appropriate fallback
506
+ if is_force_mode:
507
+ return "I encountered an error. Please try rephrasing your request more specifically."
508
+ else:
509
+ return "I had trouble processing that. What specific aspect would you like to explore? Can you break down your question?"
510
 
511
  # === Routes ===
512
  @app.get("/")
513
  def root():
514
  return {
515
+ "message": "🤖 Apollo AI Backend v2.1 - Qwen2-0.5B Context-Aware",
516
  "model": "Qwen/Qwen2-0.5B-Instruct with LoRA",
517
  "status": "ready",
518
+ "optimizations": ["context_aware", "conversation_history", "progressive_guidance"],
519
+ "features": ["mentor_mode", "force_mode", "context_analysis"],
520
  "modes": {
521
+ "mentor": "Guides learning with contextual questions",
522
+ "force": "Provides direct answers based on conversation"
523
  }
524
  }
525
 
 
529
  "status": "healthy",
530
  "model_loaded": True,
531
  "model_size": "0.5B",
532
+ "optimizations": "context_aware_responses"
533
  }
534
 
535
  @app.post("/v1/chat/completions")
 
553
  try:
554
  body = await request.json()
555
  messages = body.get("messages", [])
556
+ max_tokens = min(body.get("max_tokens", 200), 400)
557
+ temperature = max(0.1, min(body.get("temperature", 0.5), 0.8))
558
 
 
559
  is_force_mode = body.get("force_mode", False)
560
 
561
  if not messages or not isinstance(messages, list):
 
576
  )
577
 
578
  try:
579
+ print(f"📥 Processing context-aware request for Qwen2-0.5B in {'FORCE' if is_force_mode else 'MENTOR'} mode")
580
+ print(f"📊 Conversation length: {len(messages)} messages")
581
 
582
  response_content = generate_response(
583
  messages=messages,
 
586
  temperature=temperature
587
  )
588
 
 
589
  return {
590
  "id": f"chatcmpl-apollo-qwen05b-{hash(str(messages)) % 10000}",
591
  "object": "chat.completion",
592
  "created": int(torch.tensor(0).item()),
593
+ "model": f"qwen2-0.5b-{'force' if is_force_mode else 'mentor'}-contextaware",
594
  "choices": [
595
  {
596
  "index": 0,
 
607
  "total_tokens": len(str(messages)) + len(response_content)
608
  },
609
  "apollo_mode": "force" if is_force_mode else "mentor",
610
+ "model_optimizations": "context_aware_conversation"
611
  }
612
 
613
  except Exception as e:
 
617
  content={"error": f"Internal server error: {str(e)}"}
618
  )
619
 
 
620
  @app.post("/test")
621
  async def test_generation(request: Request):
622
+ """Enhanced test endpoint with conversation context"""
623
  try:
624
  body = await request.json()
625
  prompt = body.get("prompt", "How do I print hello world in Python?")
626
+ max_tokens = min(body.get("max_tokens", 200), 400)
627
  test_both_modes = body.get("test_both_modes", True)
628
 
629
+ # Simulate conversation context
630
+ messages = [{"role": "user", "content": prompt}]
631
+
632
  results = {}
633
 
634
  # Test mentor mode
635
+ mentor_response = generate_response(messages, is_force_mode=False, max_tokens=max_tokens, temperature=0.4)
 
636
  results["mentor_mode"] = {
637
  "response": mentor_response,
638
  "length": len(mentor_response),
639
+ "mode": "mentor",
640
+ "asks_questions": "?" in mentor_response
641
  }
642
 
643
  if test_both_modes:
644
  # Test force mode
645
+ force_response = generate_response(messages, is_force_mode=True, max_tokens=max_tokens, temperature=0.2)
 
646
  results["force_mode"] = {
647
  "response": force_response,
648
  "length": len(force_response),
649
+ "mode": "force",
650
+ "provides_code": "```" in force_response or "`" in force_response
651
  }
652
 
653
  return {
654
  "prompt": prompt,
655
  "results": results,
656
  "model": "Qwen2-0.5B-Instruct",
657
+ "optimizations": "context_aware_conversation",
658
  "status": "success"
659
  }
660
 
 
666
 
667
  if __name__ == "__main__":
668
  import uvicorn
669
+ print("🚀 Starting Apollo AI Backend v2.1 - Context-Aware Qwen2-0.5B...")
670
  print("🧠 Model: Qwen/Qwen2-0.5B-Instruct (500M parameters)")
671
+ print("⚡ Optimizations: Context-aware responses, conversation history, progressive guidance")
672
+ print("🎯 Modes: Mentor (guided questions) vs Force (direct answers)")
673
  uvicorn.run(app, host="0.0.0.0", port=7860)