Ais
commited on
Update app/main.py
Browse files- app/main.py +358 -209
app/main.py
CHANGED
@@ -44,164 +44,309 @@ model.eval()
|
|
44 |
|
45 |
print("✅ Qwen2-0.5B model ready with optimized settings!")
|
46 |
|
47 |
-
def
|
48 |
"""
|
49 |
-
|
50 |
-
Shorter, clearer instructions that small models can follow better.
|
51 |
"""
|
52 |
if is_force_mode:
|
53 |
-
return """You are Apollo AI.
|
54 |
-
|
55 |
-
|
56 |
-
- Provide full working code
|
57 |
-
-
|
58 |
-
-
|
59 |
-
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
65 |
else:
|
66 |
-
return """You are Apollo AI
|
67 |
|
68 |
-
|
69 |
-
-
|
70 |
-
-
|
71 |
-
-
|
72 |
-
- Make
|
|
|
73 |
|
74 |
-
|
75 |
-
User: "print hello world
|
76 |
-
You: "What function displays text in Python?
|
77 |
-
"""
|
78 |
|
79 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
80 |
"""
|
81 |
-
|
82 |
-
This helps the 0.5B model give consistent direct answers.
|
83 |
"""
|
84 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
85 |
|
86 |
-
#
|
87 |
-
|
88 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
89 |
|
90 |
-
#
|
91 |
-
if
|
92 |
-
|
|
|
|
|
93 |
|
94 |
-
#
|
95 |
-
if
|
96 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
97 |
|
98 |
-
#
|
99 |
-
if
|
100 |
-
|
|
|
|
|
101 |
|
102 |
-
#
|
103 |
-
if
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
```
|
109 |
-
|
110 |
-
|
111 |
-
# Calculator
|
112 |
-
if 'calculator' in user_lower and ('create' in user_lower or 'make' in user_lower or 'build' in user_lower):
|
113 |
-
return '''Here's a simple calculator:
|
114 |
-
```python
|
115 |
-
a = float(input("First number: "))
|
116 |
-
b = float(input("Second number: "))
|
117 |
-
op = input("Operator (+,-,*,/): ")
|
118 |
-
if op == '+': print(a + b)
|
119 |
-
elif op == '-': print(a - b)
|
120 |
-
elif op == '*': print(a * b)
|
121 |
-
elif op == '/': print(a / b)
|
122 |
```
|
123 |
-
|
|
|
|
|
|
|
|
|
|
|
124 |
|
125 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
126 |
|
127 |
-
def
|
128 |
"""
|
129 |
-
|
130 |
-
This helps the 0.5B model give consistent guided learning.
|
131 |
"""
|
132 |
user_lower = user_message.lower()
|
133 |
|
134 |
-
#
|
135 |
-
if
|
136 |
-
return '
|
137 |
|
138 |
-
#
|
139 |
-
if
|
140 |
-
return '
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
145 |
|
146 |
-
#
|
147 |
-
if
|
148 |
-
return '
|
149 |
|
150 |
-
#
|
151 |
-
if
|
152 |
-
return '''
|
|
|
153 |
```python
|
154 |
-
|
155 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
156 |
```
|
157 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
158 |
|
159 |
-
#
|
160 |
-
if
|
161 |
-
return '''
|
162 |
-
1. Get two numbers from user - what function gets input?
|
163 |
-
2. Get operation (+,-,*,/) - how to choose?
|
164 |
-
3. Calculate result - what structure handles choices?
|
165 |
-
4. Show result - what displays output?
|
166 |
|
167 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
168 |
|
169 |
-
|
|
|
170 |
|
171 |
-
def extract_clean_answer(full_response: str, formatted_prompt: str, user_message: str, is_force_mode: bool) -> str:
|
172 |
"""
|
173 |
-
|
174 |
-
Simpler extraction since 0.5B models produce cleaner output.
|
175 |
"""
|
176 |
if not full_response or len(full_response.strip()) < 5:
|
177 |
return "I apologize, but I couldn't generate a response. Please try again."
|
178 |
|
179 |
print(f"🔍 Raw response length: {len(full_response)}")
|
180 |
print(f"🔍 Mode: {'FORCE' if is_force_mode else 'MENTOR'}")
|
|
|
181 |
|
182 |
-
#
|
183 |
if is_force_mode:
|
184 |
-
predefined =
|
185 |
-
if predefined:
|
186 |
-
print("✅ Using
|
187 |
return predefined
|
188 |
else:
|
189 |
-
predefined =
|
190 |
-
if predefined:
|
191 |
-
print("✅ Using
|
192 |
return predefined
|
193 |
|
194 |
-
#
|
195 |
generated_text = full_response
|
196 |
if formatted_prompt in full_response:
|
197 |
parts = full_response.split(formatted_prompt)
|
198 |
if len(parts) > 1:
|
199 |
generated_text = parts[-1]
|
200 |
|
201 |
-
#
|
202 |
assistant_content = generated_text
|
203 |
|
204 |
-
# Look for assistant markers
|
205 |
if "<|im_start|>assistant" in generated_text:
|
206 |
assistant_parts = generated_text.split("<|im_start|>assistant")
|
207 |
if len(assistant_parts) > 1:
|
@@ -209,7 +354,7 @@ def extract_clean_answer(full_response: str, formatted_prompt: str, user_message
|
|
209 |
if "<|im_end|>" in assistant_content:
|
210 |
assistant_content = assistant_content.split("<|im_end|>")[0]
|
211 |
|
212 |
-
#
|
213 |
clean_text = assistant_content.strip()
|
214 |
|
215 |
# Remove template tokens
|
@@ -225,89 +370,100 @@ def extract_clean_answer(full_response: str, formatted_prompt: str, user_message
|
|
225 |
clean_text = re.sub(r'\n{3,}', '\n\n', clean_text)
|
226 |
clean_text = clean_text.strip()
|
227 |
|
228 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
229 |
if not clean_text or len(clean_text) < 10:
|
230 |
if is_force_mode:
|
231 |
-
return
|
232 |
else:
|
233 |
-
return
|
234 |
-
|
235 |
-
# Step 5: Length control for 0.5B
|
236 |
-
if len(clean_text) > 500: # Keep responses shorter for 0.5B
|
237 |
-
sentences = clean_text.split('. ')
|
238 |
-
if len(sentences) > 3:
|
239 |
-
clean_text = '. '.join(sentences[:3]) + '.'
|
240 |
|
241 |
print(f"🧹 Final cleaned answer length: {len(clean_text)}")
|
242 |
-
|
243 |
return clean_text
|
244 |
|
245 |
def generate_response(messages: list, is_force_mode: bool = False, max_tokens: int = 200, temperature: float = 0.7) -> str:
|
246 |
"""
|
247 |
-
|
248 |
"""
|
249 |
try:
|
250 |
-
#
|
251 |
-
|
252 |
-
|
253 |
-
|
254 |
-
|
255 |
-
|
256 |
-
|
257 |
-
|
258 |
-
|
259 |
-
|
260 |
-
|
261 |
-
|
262 |
-
|
263 |
-
|
264 |
-
|
265 |
-
|
266 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
267 |
|
268 |
-
#
|
269 |
-
|
270 |
|
271 |
-
# Add
|
272 |
-
system_prompt =
|
273 |
-
|
274 |
-
"role": "system",
|
275 |
-
"content": system_prompt
|
276 |
-
})
|
277 |
|
278 |
-
# Add
|
279 |
-
|
280 |
-
|
281 |
-
|
282 |
-
|
283 |
-
|
284 |
-
|
285 |
-
|
286 |
-
break
|
287 |
|
288 |
-
print(f"🔍 Processing {len(
|
289 |
|
290 |
# Apply chat template
|
291 |
try:
|
292 |
formatted_prompt = tokenizer.apply_chat_template(
|
293 |
-
|
294 |
tokenize=False,
|
295 |
add_generation_prompt=True
|
296 |
)
|
297 |
except Exception as e:
|
298 |
print(f"⚠️ Chat template failed, using simple format: {e}")
|
299 |
-
|
300 |
-
|
|
|
|
|
301 |
|
302 |
-
# Tokenize
|
303 |
inputs = tokenizer(
|
304 |
formatted_prompt,
|
305 |
return_tensors="pt",
|
306 |
truncation=True,
|
307 |
-
max_length=
|
308 |
)
|
309 |
|
310 |
-
#
|
311 |
generation_params = {
|
312 |
"input_ids": inputs.input_ids,
|
313 |
"attention_mask": inputs.attention_mask,
|
@@ -317,60 +473,53 @@ def generate_response(messages: list, is_force_mode: bool = False, max_tokens: i
|
|
317 |
}
|
318 |
|
319 |
if is_force_mode:
|
320 |
-
# Force mode: Very conservative for 0.5B
|
321 |
generation_params.update({
|
322 |
-
"max_new_tokens": min(max_tokens,
|
323 |
-
"temperature": 0.
|
324 |
-
"top_p": 0.
|
325 |
-
"top_k":
|
326 |
"repetition_penalty": 1.05,
|
327 |
})
|
328 |
else:
|
329 |
-
# Mentor mode: Still conservative but allows more creativity
|
330 |
generation_params.update({
|
331 |
-
"max_new_tokens": min(max_tokens,
|
332 |
-
"temperature": 0.
|
333 |
-
"top_p": 0.
|
334 |
-
"top_k":
|
335 |
"repetition_penalty": 1.02,
|
336 |
})
|
337 |
|
338 |
-
# Generate
|
339 |
with torch.no_grad():
|
340 |
outputs = model.generate(**generation_params)
|
341 |
|
342 |
-
# Decode response
|
343 |
full_response = tokenizer.decode(outputs[0], skip_special_tokens=False)
|
344 |
|
345 |
-
#
|
346 |
-
|
347 |
-
for msg in reversed(clean_messages):
|
348 |
-
if msg.get("role") == "user":
|
349 |
-
user_message = msg.get("content", "")
|
350 |
-
break
|
351 |
-
|
352 |
-
# Clean and return
|
353 |
-
clean_answer = extract_clean_answer(full_response, formatted_prompt, user_message, is_force_mode)
|
354 |
|
355 |
return clean_answer
|
356 |
|
357 |
except Exception as e:
|
358 |
print(f"❌ Generation error with Qwen2-0.5B: {e}")
|
359 |
-
|
360 |
-
|
|
|
|
|
|
|
361 |
|
362 |
# === Routes ===
|
363 |
@app.get("/")
|
364 |
def root():
|
365 |
return {
|
366 |
-
"message": "🤖 Apollo AI Backend v2.1 - Qwen2-0.5B
|
367 |
"model": "Qwen/Qwen2-0.5B-Instruct with LoRA",
|
368 |
"status": "ready",
|
369 |
-
"optimizations": ["
|
370 |
-
"features": ["mentor_mode", "force_mode", "
|
371 |
"modes": {
|
372 |
-
"mentor": "Guides learning with
|
373 |
-
"force": "Provides direct answers
|
374 |
}
|
375 |
}
|
376 |
|
@@ -380,7 +529,7 @@ def health():
|
|
380 |
"status": "healthy",
|
381 |
"model_loaded": True,
|
382 |
"model_size": "0.5B",
|
383 |
-
"optimizations": "
|
384 |
}
|
385 |
|
386 |
@app.post("/v1/chat/completions")
|
@@ -404,10 +553,9 @@ async def chat_completions(request: Request):
|
|
404 |
try:
|
405 |
body = await request.json()
|
406 |
messages = body.get("messages", [])
|
407 |
-
max_tokens = min(body.get("max_tokens", 200),
|
408 |
-
temperature = max(0.1, min(body.get("temperature", 0.5), 0.8))
|
409 |
|
410 |
-
# Get mode information
|
411 |
is_force_mode = body.get("force_mode", False)
|
412 |
|
413 |
if not messages or not isinstance(messages, list):
|
@@ -428,8 +576,8 @@ async def chat_completions(request: Request):
|
|
428 |
)
|
429 |
|
430 |
try:
|
431 |
-
print(f"📥 Processing request for Qwen2-0.5B in {'FORCE' if is_force_mode else 'MENTOR'} mode")
|
432 |
-
print(f"📊
|
433 |
|
434 |
response_content = generate_response(
|
435 |
messages=messages,
|
@@ -438,12 +586,11 @@ async def chat_completions(request: Request):
|
|
438 |
temperature=temperature
|
439 |
)
|
440 |
|
441 |
-
# Return OpenAI-compatible response
|
442 |
return {
|
443 |
"id": f"chatcmpl-apollo-qwen05b-{hash(str(messages)) % 10000}",
|
444 |
"object": "chat.completion",
|
445 |
"created": int(torch.tensor(0).item()),
|
446 |
-
"model": f"qwen2-0.5b-{'force' if is_force_mode else 'mentor'}-
|
447 |
"choices": [
|
448 |
{
|
449 |
"index": 0,
|
@@ -460,7 +607,7 @@ async def chat_completions(request: Request):
|
|
460 |
"total_tokens": len(str(messages)) + len(response_content)
|
461 |
},
|
462 |
"apollo_mode": "force" if is_force_mode else "mentor",
|
463 |
-
"model_optimizations": "
|
464 |
}
|
465 |
|
466 |
except Exception as e:
|
@@ -470,42 +617,44 @@ async def chat_completions(request: Request):
|
|
470 |
content={"error": f"Internal server error: {str(e)}"}
|
471 |
)
|
472 |
|
473 |
-
# === Test endpoint optimized for 0.5B ===
|
474 |
@app.post("/test")
|
475 |
async def test_generation(request: Request):
|
476 |
-
"""
|
477 |
try:
|
478 |
body = await request.json()
|
479 |
prompt = body.get("prompt", "How do I print hello world in Python?")
|
480 |
-
max_tokens = min(body.get("max_tokens", 200),
|
481 |
test_both_modes = body.get("test_both_modes", True)
|
482 |
|
|
|
|
|
|
|
483 |
results = {}
|
484 |
|
485 |
# Test mentor mode
|
486 |
-
|
487 |
-
mentor_response = generate_response(messages_mentor, is_force_mode=False, max_tokens=max_tokens, temperature=0.3)
|
488 |
results["mentor_mode"] = {
|
489 |
"response": mentor_response,
|
490 |
"length": len(mentor_response),
|
491 |
-
"mode": "mentor"
|
|
|
492 |
}
|
493 |
|
494 |
if test_both_modes:
|
495 |
# Test force mode
|
496 |
-
|
497 |
-
force_response = generate_response(messages_force, is_force_mode=True, max_tokens=max_tokens, temperature=0.1)
|
498 |
results["force_mode"] = {
|
499 |
"response": force_response,
|
500 |
"length": len(force_response),
|
501 |
-
"mode": "force"
|
|
|
502 |
}
|
503 |
|
504 |
return {
|
505 |
"prompt": prompt,
|
506 |
"results": results,
|
507 |
"model": "Qwen2-0.5B-Instruct",
|
508 |
-
"optimizations": "
|
509 |
"status": "success"
|
510 |
}
|
511 |
|
@@ -517,8 +666,8 @@ async def test_generation(request: Request):
|
|
517 |
|
518 |
if __name__ == "__main__":
|
519 |
import uvicorn
|
520 |
-
print("🚀 Starting Apollo AI Backend v2.1 - Qwen2-0.5B
|
521 |
print("🧠 Model: Qwen/Qwen2-0.5B-Instruct (500M parameters)")
|
522 |
-
print("⚡ Optimizations:
|
523 |
-
print("🎯 Modes: Mentor (
|
524 |
uvicorn.run(app, host="0.0.0.0", port=7860)
|
|
|
44 |
|
45 |
print("✅ Qwen2-0.5B model ready with optimized settings!")
|
46 |
|
47 |
+
def get_enhanced_system_prompt(is_force_mode: bool) -> str:
|
48 |
"""
|
49 |
+
Enhanced system prompts that clearly define behavior for Qwen2-0.5B.
|
|
|
50 |
"""
|
51 |
if is_force_mode:
|
52 |
+
return """You are Apollo AI in DIRECT ANSWER mode. You must give complete, working solutions immediately.
|
53 |
+
|
54 |
+
STRICT RULES:
|
55 |
+
- Provide full working code when asked
|
56 |
+
- Give direct explanations (max 2-3 sentences)
|
57 |
+
- NEVER ask questions back to the user
|
58 |
+
- Always give complete solutions
|
59 |
+
- Be concise but thorough
|
60 |
+
|
61 |
+
EXAMPLES:
|
62 |
+
User: "How do I print hello world in Python?"
|
63 |
+
You: "Use `print('Hello World')`. This function outputs text to the console."
|
64 |
+
|
65 |
+
User: "Create a calculator in Python"
|
66 |
+
You: "Here's a simple calculator:
|
67 |
+
```python
|
68 |
+
a = float(input('First number: '))
|
69 |
+
b = float(input('Second number: '))
|
70 |
+
op = input('Operator (+,-,*,/): ')
|
71 |
+
if op == '+': print(a + b)
|
72 |
+
elif op == '-': print(a - b)
|
73 |
+
elif op == '*': print(a * b)
|
74 |
+
elif op == '/': print(a / b)
|
75 |
+
```
|
76 |
+
This performs basic math operations on two numbers."
|
77 |
+
|
78 |
+
REMEMBER: Give direct answers, not questions. Provide working code."""
|
79 |
+
|
80 |
else:
|
81 |
+
return """You are Apollo AI in MENTOR mode. You must guide learning through questions and hints only.
|
82 |
|
83 |
+
STRICT RULES:
|
84 |
+
- ASK guiding questions instead of giving direct answers
|
85 |
+
- NEVER provide complete working code
|
86 |
+
- Give hints and partial examples only
|
87 |
+
- Make the user think and discover the solution
|
88 |
+
- Build on their previous attempts
|
89 |
|
90 |
+
EXAMPLES:
|
91 |
+
User: "How do I print hello world in Python?"
|
92 |
+
You: "What function do you think displays text in Python? Think about showing output to the user. What would such a function be called?"
|
|
|
93 |
|
94 |
+
User: "Create a calculator in Python"
|
95 |
+
You: "Great project! Let's break it down step by step:
|
96 |
+
1. What information would a calculator need from the user?
|
97 |
+
2. How would you get input from someone using your program?
|
98 |
+
3. What operations should it support?
|
99 |
+
Start with step 1 - what function gets user input in Python?"
|
100 |
+
|
101 |
+
User: "I tried input() but it's not working"
|
102 |
+
You: "Good start with input()! What type of data does input() return? If you need to do math, what might you need to convert it to? Try looking up type conversion functions."
|
103 |
+
|
104 |
+
REMEMBER: Guide with questions, never give direct answers or complete code."""
|
105 |
+
|
106 |
+
def analyze_conversation_context(messages: list) -> dict:
|
107 |
"""
|
108 |
+
Analyze conversation history to understand context and user progress.
|
|
|
109 |
"""
|
110 |
+
context = {
|
111 |
+
"user_messages": [],
|
112 |
+
"assistant_messages": [],
|
113 |
+
"topics": [],
|
114 |
+
"user_attempted_code": False,
|
115 |
+
"user_stuck": False,
|
116 |
+
"repeated_questions": 0
|
117 |
+
}
|
118 |
|
119 |
+
# Extract recent messages
|
120 |
+
for msg in messages[-6:]: # Last 6 messages
|
121 |
+
if msg.get("role") == "user":
|
122 |
+
content = msg.get("content", "").lower()
|
123 |
+
context["user_messages"].append(msg.get("content", ""))
|
124 |
+
|
125 |
+
# Check if user attempted code
|
126 |
+
if any(keyword in content for keyword in ["tried", "attempted", "doesn't work", "error", "not working"]):
|
127 |
+
context["user_attempted_code"] = True
|
128 |
+
|
129 |
+
# Detect topic
|
130 |
+
if "calculator" in content:
|
131 |
+
context["topics"].append("calculator")
|
132 |
+
elif "print" in content and "hello" in content:
|
133 |
+
context["topics"].append("hello_world")
|
134 |
+
elif "function" in content:
|
135 |
+
context["topics"].append("functions")
|
136 |
+
elif "list" in content:
|
137 |
+
context["topics"].append("lists")
|
138 |
+
elif "variable" in content:
|
139 |
+
context["topics"].append("variables")
|
140 |
+
|
141 |
+
elif msg.get("role") == "assistant":
|
142 |
+
context["assistant_messages"].append(msg.get("content", ""))
|
143 |
+
|
144 |
+
# Check if user seems stuck (repeated similar questions)
|
145 |
+
if len(context["user_messages"]) >= 2:
|
146 |
+
last_two = context["user_messages"][-2:]
|
147 |
+
if any(word in last_two[0].lower() and word in last_two[1].lower()
|
148 |
+
for word in ["how", "what", "help", "create", "make"]):
|
149 |
+
context["repeated_questions"] += 1
|
150 |
+
|
151 |
+
return context
|
152 |
+
|
153 |
+
def generate_mentor_response(user_message: str, context: dict) -> str:
|
154 |
+
"""
|
155 |
+
Generate mentor responses that ask guiding questions based on context.
|
156 |
+
"""
|
157 |
+
user_lower = user_message.lower()
|
158 |
+
topics = context.get("topics", [])
|
159 |
+
user_attempted = context.get("user_attempted_code", False)
|
160 |
|
161 |
+
# Hello World - Progressive questioning
|
162 |
+
if "print" in user_lower and ("hello" in user_lower or "world" in user_lower):
|
163 |
+
if user_attempted:
|
164 |
+
return "Good effort! What happened when you tried? Did you use parentheses and quotes? Try: function_name('your text here')"
|
165 |
+
return "What function do you think displays text in Python? Think about showing output to the user. What would such a function be called?"
|
166 |
|
167 |
+
# Calculator - Step by step guidance
|
168 |
+
if "calculator" in user_lower:
|
169 |
+
if "hello_world" in topics or len(context["user_messages"]) > 1:
|
170 |
+
return """Great! Since you understand output, let's build a calculator step by step:
|
171 |
+
|
172 |
+
1. How do you get numbers from the user? (Think about input)
|
173 |
+
2. What operations should it support? (+, -, *, /)
|
174 |
+
3. How do you make decisions in code? (Think about choosing operations)
|
175 |
+
|
176 |
+
Start with step 1 - what function gets user input? What type of data does it return?"""
|
177 |
+
return """Excellent project choice! Let's think through this:
|
178 |
+
|
179 |
+
What are the main steps a calculator needs?
|
180 |
+
1. Get first number from user
|
181 |
+
2. Get operation (+, -, *, /)
|
182 |
+
3. Get second number from user
|
183 |
+
4. Calculate result
|
184 |
+
5. Show result
|
185 |
+
|
186 |
+
Which step should we tackle first? What function gets input from users?"""
|
187 |
|
188 |
+
# Variables
|
189 |
+
if "variable" in user_lower:
|
190 |
+
if user_attempted:
|
191 |
+
return "What symbol did you use to assign the value? In Python, we use = to store data. Try: name = value"
|
192 |
+
return "How do you think Python remembers information? What symbol might connect a name to a value? Think: name __ value"
|
193 |
|
194 |
+
# Functions
|
195 |
+
if "function" in user_lower and ("create" in user_lower or "define" in user_lower):
|
196 |
+
if "variables" in topics:
|
197 |
+
return """Good! You know variables. Functions are similar but hold code instead of data.
|
198 |
+
|
199 |
+
What keyword do you think starts a function definition? Here's the pattern:
|
200 |
```
|
201 |
+
______ function_name():
|
202 |
+
# code goes here
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
203 |
```
|
204 |
+
What goes in the blank? How would you call it afterward?"""
|
205 |
+
return "What keyword do you think defines a function in Python? Functions are reusable blocks of code. Think about the word 'define'..."
|
206 |
+
|
207 |
+
# Lists
|
208 |
+
if "list" in user_lower and "python" in user_lower:
|
209 |
+
return "What symbols do you think hold multiple items together? Think about containers. Try creating: container_symbol item1, item2, item3 container_symbol"
|
210 |
|
211 |
+
# Input function help
|
212 |
+
if "input" in user_lower and ("not working" in user_lower or "error" in user_lower):
|
213 |
+
return "Good start with input()! What type of data does input() return - text or numbers? If you need to do math, what function converts text to numbers? Try looking up 'int()' or 'float()'."
|
214 |
+
|
215 |
+
# Math operations
|
216 |
+
if any(op in user_lower for op in ["+", "-", "*", "/", "add", "subtract", "multiply", "divide"]):
|
217 |
+
return "Great! You're thinking about operations. How do you make choices in code? If user picks '+', do addition. If '-', do subtraction. What code structure makes decisions based on conditions?"
|
218 |
+
|
219 |
+
# Default mentor response with context
|
220 |
+
if user_attempted:
|
221 |
+
return "I see you're experimenting - that's great! What specific part isn't working? What error do you see? Let's debug it step by step."
|
222 |
+
|
223 |
+
return "Interesting question! Let's break it down - what's your goal? What have you tried so far? What specific step are you stuck on?"
|
224 |
|
225 |
+
def generate_force_response(user_message: str, context: dict) -> str:
|
226 |
"""
|
227 |
+
Generate direct answers for force mode.
|
|
|
228 |
"""
|
229 |
user_lower = user_message.lower()
|
230 |
|
231 |
+
# Hello World
|
232 |
+
if "print" in user_lower and ("hello" in user_lower or "world" in user_lower):
|
233 |
+
return "Use `print('Hello World')`. This function outputs text to the console."
|
234 |
|
235 |
+
# Calculator - Complete working solution
|
236 |
+
if "calculator" in user_lower:
|
237 |
+
return '''Here's a complete calculator:
|
238 |
+
|
239 |
+
```python
|
240 |
+
# Get input from user
|
241 |
+
num1 = float(input("Enter first number: "))
|
242 |
+
operator = input("Enter operator (+, -, *, /): ")
|
243 |
+
num2 = float(input("Enter second number: "))
|
244 |
+
|
245 |
+
# Calculate based on operator
|
246 |
+
if operator == '+':
|
247 |
+
result = num1 + num2
|
248 |
+
elif operator == '-':
|
249 |
+
result = num1 - num2
|
250 |
+
elif operator == '*':
|
251 |
+
result = num1 * num2
|
252 |
+
elif operator == '/':
|
253 |
+
if num2 != 0:
|
254 |
+
result = num1 / num2
|
255 |
+
else:
|
256 |
+
result = "Error: Division by zero"
|
257 |
+
else:
|
258 |
+
result = "Error: Invalid operator"
|
259 |
+
|
260 |
+
# Display result
|
261 |
+
print(f"Result: {result}")
|
262 |
+
```
|
263 |
+
|
264 |
+
This calculator gets two numbers and an operator, performs the calculation, and displays the result.'''
|
265 |
|
266 |
+
# Variables
|
267 |
+
if "variable" in user_lower:
|
268 |
+
return 'Create variables using the assignment operator: `name = value`. Examples: `x = 5`, `text = "hello"`, `pi = 3.14`. Variables store data for later use.'
|
269 |
|
270 |
+
# Functions
|
271 |
+
if "function" in user_lower and ("create" in user_lower or "define" in user_lower):
|
272 |
+
return '''Define functions with the `def` keyword:
|
273 |
+
|
274 |
```python
|
275 |
+
def my_function():
|
276 |
+
return "Hello"
|
277 |
+
|
278 |
+
def add_numbers(a, b):
|
279 |
+
return a + b
|
280 |
+
|
281 |
+
# Call functions
|
282 |
+
result = my_function() # Returns "Hello"
|
283 |
+
sum_result = add_numbers(5, 3) # Returns 8
|
284 |
```
|
285 |
+
|
286 |
+
Functions are reusable code blocks that can take parameters and return values.'''
|
287 |
+
|
288 |
+
# Lists
|
289 |
+
if "list" in user_lower and "python" in user_lower:
|
290 |
+
return 'Create lists with square brackets: `my_list = [1, 2, 3, "hello"]`. Access items with index: `my_list[0]` gets first item. Add items: `my_list.append(4)`.'
|
291 |
+
|
292 |
+
# Input function
|
293 |
+
if "input" in user_lower:
|
294 |
+
return 'Use `input("Your prompt: ")` to get user input. It returns a string. For numbers, convert with `int(input())` or `float(input())`. Example: `age = int(input("Enter age: "))`'
|
295 |
|
296 |
+
# Loops
|
297 |
+
if "loop" in user_lower:
|
298 |
+
return '''Two main types of loops:
|
|
|
|
|
|
|
|
|
299 |
|
300 |
+
```python
|
301 |
+
# For loop (known iterations)
|
302 |
+
for i in range(5):
|
303 |
+
print(i) # Prints 0 to 4
|
304 |
+
|
305 |
+
# While loop (condition-based)
|
306 |
+
count = 0
|
307 |
+
while count < 5:
|
308 |
+
print(count)
|
309 |
+
count += 1
|
310 |
+
```
|
311 |
+
|
312 |
+
Use for loops when you know how many times to repeat, while loops for conditions.'''
|
313 |
|
314 |
+
# Default force response
|
315 |
+
return "I need more specific information to provide a direct answer. Please clarify what exactly you want to accomplish."
|
316 |
|
317 |
+
def extract_clean_answer(full_response: str, formatted_prompt: str, user_message: str, context: dict, is_force_mode: bool) -> str:
|
318 |
"""
|
319 |
+
Enhanced cleaning for Qwen2-0.5B responses with context awareness.
|
|
|
320 |
"""
|
321 |
if not full_response or len(full_response.strip()) < 5:
|
322 |
return "I apologize, but I couldn't generate a response. Please try again."
|
323 |
|
324 |
print(f"🔍 Raw response length: {len(full_response)}")
|
325 |
print(f"🔍 Mode: {'FORCE' if is_force_mode else 'MENTOR'}")
|
326 |
+
print(f"🔍 Context topics: {context.get('topics', [])}")
|
327 |
|
328 |
+
# Use context-aware predefined responses first
|
329 |
if is_force_mode:
|
330 |
+
predefined = generate_force_response(user_message, context)
|
331 |
+
if predefined != "I need more specific information to provide a direct answer. Please clarify what exactly you want to accomplish.":
|
332 |
+
print("✅ Using context-aware force response")
|
333 |
return predefined
|
334 |
else:
|
335 |
+
predefined = generate_mentor_response(user_message, context)
|
336 |
+
if predefined != "Interesting question! Let's break it down - what's your goal? What have you tried so far? What specific step are you stuck on?":
|
337 |
+
print("✅ Using context-aware mentor response")
|
338 |
return predefined
|
339 |
|
340 |
+
# If no predefined response, clean the model output
|
341 |
generated_text = full_response
|
342 |
if formatted_prompt in full_response:
|
343 |
parts = full_response.split(formatted_prompt)
|
344 |
if len(parts) > 1:
|
345 |
generated_text = parts[-1]
|
346 |
|
347 |
+
# Extract assistant content
|
348 |
assistant_content = generated_text
|
349 |
|
|
|
350 |
if "<|im_start|>assistant" in generated_text:
|
351 |
assistant_parts = generated_text.split("<|im_start|>assistant")
|
352 |
if len(assistant_parts) > 1:
|
|
|
354 |
if "<|im_end|>" in assistant_content:
|
355 |
assistant_content = assistant_content.split("<|im_end|>")[0]
|
356 |
|
357 |
+
# Clean the response
|
358 |
clean_text = assistant_content.strip()
|
359 |
|
360 |
# Remove template tokens
|
|
|
370 |
clean_text = re.sub(r'\n{3,}', '\n\n', clean_text)
|
371 |
clean_text = clean_text.strip()
|
372 |
|
373 |
+
# Validate response matches mode
|
374 |
+
if not is_force_mode and clean_text:
|
375 |
+
# In mentor mode, response should ask questions or provide hints
|
376 |
+
if not any(marker in clean_text for marker in ['?', 'think', 'try', 'what', 'how', 'consider', 'break it down']):
|
377 |
+
# Model didn't follow mentor instructions, use fallback
|
378 |
+
return generate_mentor_response(user_message, context)
|
379 |
+
|
380 |
+
# Length control
|
381 |
+
if len(clean_text) > 600:
|
382 |
+
sentences = clean_text.split('. ')
|
383 |
+
if len(sentences) > 4:
|
384 |
+
clean_text = '. '.join(sentences[:4]) + '.'
|
385 |
+
|
386 |
+
# Fallback
|
387 |
if not clean_text or len(clean_text) < 10:
|
388 |
if is_force_mode:
|
389 |
+
return generate_force_response(user_message, context)
|
390 |
else:
|
391 |
+
return generate_mentor_response(user_message, context)
|
|
|
|
|
|
|
|
|
|
|
|
|
392 |
|
393 |
print(f"🧹 Final cleaned answer length: {len(clean_text)}")
|
|
|
394 |
return clean_text
|
395 |
|
396 |
def generate_response(messages: list, is_force_mode: bool = False, max_tokens: int = 200, temperature: float = 0.7) -> str:
|
397 |
"""
|
398 |
+
Enhanced generation with proper conversation history and context awareness.
|
399 |
"""
|
400 |
try:
|
401 |
+
# Analyze conversation context
|
402 |
+
context = analyze_conversation_context(messages)
|
403 |
+
print(f"📊 Conversation context: {context}")
|
404 |
+
|
405 |
+
# Get the last user message
|
406 |
+
last_user_msg = ""
|
407 |
+
for msg in reversed(messages):
|
408 |
+
if msg.get("role") == "user":
|
409 |
+
last_user_msg = msg.get("content", "")
|
410 |
+
break
|
411 |
+
|
412 |
+
if not last_user_msg:
|
413 |
+
return "I didn't receive a message. Please ask me something!"
|
414 |
+
|
415 |
+
# Try context-aware predefined responses first
|
416 |
+
context_response = generate_force_response(last_user_msg, context) if is_force_mode else generate_mentor_response(last_user_msg, context)
|
417 |
+
|
418 |
+
# Check if we got a meaningful predefined response
|
419 |
+
if is_force_mode:
|
420 |
+
if context_response != "I need more specific information to provide a direct answer. Please clarify what exactly you want to accomplish.":
|
421 |
+
return context_response
|
422 |
+
else:
|
423 |
+
if context_response != "Interesting question! Let's break it down - what's your goal? What have you tried so far? What specific step are you stuck on?":
|
424 |
+
return context_response
|
425 |
|
426 |
+
# Fallback to model generation with conversation history
|
427 |
+
conversation_messages = []
|
428 |
|
429 |
+
# Add enhanced system prompt
|
430 |
+
system_prompt = get_enhanced_system_prompt(is_force_mode)
|
431 |
+
conversation_messages.append({"role": "system", "content": system_prompt})
|
|
|
|
|
|
|
432 |
|
433 |
+
# Add conversation history (last 6 messages: 3 user + 3 assistant)
|
434 |
+
recent_messages = messages[-6:] if len(messages) > 6 else messages
|
435 |
+
for msg in recent_messages:
|
436 |
+
if msg.get("role") in ["user", "assistant"] and msg.get("content"):
|
437 |
+
conversation_messages.append({
|
438 |
+
"role": msg["role"],
|
439 |
+
"content": msg["content"]
|
440 |
+
})
|
|
|
441 |
|
442 |
+
print(f"🔍 Processing {len(conversation_messages)} messages for Qwen2-0.5B in {'FORCE' if is_force_mode else 'MENTOR'} mode")
|
443 |
|
444 |
# Apply chat template
|
445 |
try:
|
446 |
formatted_prompt = tokenizer.apply_chat_template(
|
447 |
+
conversation_messages,
|
448 |
tokenize=False,
|
449 |
add_generation_prompt=True
|
450 |
)
|
451 |
except Exception as e:
|
452 |
print(f"⚠️ Chat template failed, using simple format: {e}")
|
453 |
+
formatted_prompt = f"System: {conversation_messages[0]['content']}\n"
|
454 |
+
for msg in conversation_messages[1:]:
|
455 |
+
formatted_prompt += f"{msg['role'].title()}: {msg['content']}\n"
|
456 |
+
formatted_prompt += "Assistant:"
|
457 |
|
458 |
+
# Tokenize
|
459 |
inputs = tokenizer(
|
460 |
formatted_prompt,
|
461 |
return_tensors="pt",
|
462 |
truncation=True,
|
463 |
+
max_length=1000
|
464 |
)
|
465 |
|
466 |
+
# Generation parameters
|
467 |
generation_params = {
|
468 |
"input_ids": inputs.input_ids,
|
469 |
"attention_mask": inputs.attention_mask,
|
|
|
473 |
}
|
474 |
|
475 |
if is_force_mode:
|
|
|
476 |
generation_params.update({
|
477 |
+
"max_new_tokens": min(max_tokens, 200),
|
478 |
+
"temperature": 0.2,
|
479 |
+
"top_p": 0.8,
|
480 |
+
"top_k": 25,
|
481 |
"repetition_penalty": 1.05,
|
482 |
})
|
483 |
else:
|
|
|
484 |
generation_params.update({
|
485 |
+
"max_new_tokens": min(max_tokens, 180),
|
486 |
+
"temperature": 0.4,
|
487 |
+
"top_p": 0.85,
|
488 |
+
"top_k": 35,
|
489 |
"repetition_penalty": 1.02,
|
490 |
})
|
491 |
|
492 |
+
# Generate
|
493 |
with torch.no_grad():
|
494 |
outputs = model.generate(**generation_params)
|
495 |
|
|
|
496 |
full_response = tokenizer.decode(outputs[0], skip_special_tokens=False)
|
497 |
|
498 |
+
# Clean and return with context
|
499 |
+
clean_answer = extract_clean_answer(full_response, formatted_prompt, last_user_msg, context, is_force_mode)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
500 |
|
501 |
return clean_answer
|
502 |
|
503 |
except Exception as e:
|
504 |
print(f"❌ Generation error with Qwen2-0.5B: {e}")
|
505 |
+
# Return context-appropriate fallback
|
506 |
+
if is_force_mode:
|
507 |
+
return "I encountered an error. Please try rephrasing your request more specifically."
|
508 |
+
else:
|
509 |
+
return "I had trouble processing that. What specific aspect would you like to explore? Can you break down your question?"
|
510 |
|
511 |
# === Routes ===
|
512 |
@app.get("/")
|
513 |
def root():
|
514 |
return {
|
515 |
+
"message": "🤖 Apollo AI Backend v2.1 - Qwen2-0.5B Context-Aware",
|
516 |
"model": "Qwen/Qwen2-0.5B-Instruct with LoRA",
|
517 |
"status": "ready",
|
518 |
+
"optimizations": ["context_aware", "conversation_history", "progressive_guidance"],
|
519 |
+
"features": ["mentor_mode", "force_mode", "context_analysis"],
|
520 |
"modes": {
|
521 |
+
"mentor": "Guides learning with contextual questions",
|
522 |
+
"force": "Provides direct answers based on conversation"
|
523 |
}
|
524 |
}
|
525 |
|
|
|
529 |
"status": "healthy",
|
530 |
"model_loaded": True,
|
531 |
"model_size": "0.5B",
|
532 |
+
"optimizations": "context_aware_responses"
|
533 |
}
|
534 |
|
535 |
@app.post("/v1/chat/completions")
|
|
|
553 |
try:
|
554 |
body = await request.json()
|
555 |
messages = body.get("messages", [])
|
556 |
+
max_tokens = min(body.get("max_tokens", 200), 400)
|
557 |
+
temperature = max(0.1, min(body.get("temperature", 0.5), 0.8))
|
558 |
|
|
|
559 |
is_force_mode = body.get("force_mode", False)
|
560 |
|
561 |
if not messages or not isinstance(messages, list):
|
|
|
576 |
)
|
577 |
|
578 |
try:
|
579 |
+
print(f"📥 Processing context-aware request for Qwen2-0.5B in {'FORCE' if is_force_mode else 'MENTOR'} mode")
|
580 |
+
print(f"📊 Conversation length: {len(messages)} messages")
|
581 |
|
582 |
response_content = generate_response(
|
583 |
messages=messages,
|
|
|
586 |
temperature=temperature
|
587 |
)
|
588 |
|
|
|
589 |
return {
|
590 |
"id": f"chatcmpl-apollo-qwen05b-{hash(str(messages)) % 10000}",
|
591 |
"object": "chat.completion",
|
592 |
"created": int(torch.tensor(0).item()),
|
593 |
+
"model": f"qwen2-0.5b-{'force' if is_force_mode else 'mentor'}-contextaware",
|
594 |
"choices": [
|
595 |
{
|
596 |
"index": 0,
|
|
|
607 |
"total_tokens": len(str(messages)) + len(response_content)
|
608 |
},
|
609 |
"apollo_mode": "force" if is_force_mode else "mentor",
|
610 |
+
"model_optimizations": "context_aware_conversation"
|
611 |
}
|
612 |
|
613 |
except Exception as e:
|
|
|
617 |
content={"error": f"Internal server error: {str(e)}"}
|
618 |
)
|
619 |
|
|
|
620 |
@app.post("/test")
|
621 |
async def test_generation(request: Request):
|
622 |
+
"""Enhanced test endpoint with conversation context"""
|
623 |
try:
|
624 |
body = await request.json()
|
625 |
prompt = body.get("prompt", "How do I print hello world in Python?")
|
626 |
+
max_tokens = min(body.get("max_tokens", 200), 400)
|
627 |
test_both_modes = body.get("test_both_modes", True)
|
628 |
|
629 |
+
# Simulate conversation context
|
630 |
+
messages = [{"role": "user", "content": prompt}]
|
631 |
+
|
632 |
results = {}
|
633 |
|
634 |
# Test mentor mode
|
635 |
+
mentor_response = generate_response(messages, is_force_mode=False, max_tokens=max_tokens, temperature=0.4)
|
|
|
636 |
results["mentor_mode"] = {
|
637 |
"response": mentor_response,
|
638 |
"length": len(mentor_response),
|
639 |
+
"mode": "mentor",
|
640 |
+
"asks_questions": "?" in mentor_response
|
641 |
}
|
642 |
|
643 |
if test_both_modes:
|
644 |
# Test force mode
|
645 |
+
force_response = generate_response(messages, is_force_mode=True, max_tokens=max_tokens, temperature=0.2)
|
|
|
646 |
results["force_mode"] = {
|
647 |
"response": force_response,
|
648 |
"length": len(force_response),
|
649 |
+
"mode": "force",
|
650 |
+
"provides_code": "```" in force_response or "`" in force_response
|
651 |
}
|
652 |
|
653 |
return {
|
654 |
"prompt": prompt,
|
655 |
"results": results,
|
656 |
"model": "Qwen2-0.5B-Instruct",
|
657 |
+
"optimizations": "context_aware_conversation",
|
658 |
"status": "success"
|
659 |
}
|
660 |
|
|
|
666 |
|
667 |
if __name__ == "__main__":
|
668 |
import uvicorn
|
669 |
+
print("🚀 Starting Apollo AI Backend v2.1 - Context-Aware Qwen2-0.5B...")
|
670 |
print("🧠 Model: Qwen/Qwen2-0.5B-Instruct (500M parameters)")
|
671 |
+
print("⚡ Optimizations: Context-aware responses, conversation history, progressive guidance")
|
672 |
+
print("🎯 Modes: Mentor (guided questions) vs Force (direct answers)")
|
673 |
uvicorn.run(app, host="0.0.0.0", port=7860)
|