Ais commited on
Commit
afb15e3
·
verified ·
1 Parent(s): c5cd891

Update app/main.py

Browse files
Files changed (1) hide show
  1. app/main.py +127 -37
app/main.py CHANGED
@@ -7,7 +7,7 @@ from peft import PeftModel
7
  from starlette.middleware.cors import CORSMiddleware
8
 
9
  # === Setup FastAPI ===
10
- app = FastAPI(title="Apollo AI Backend - Qwen2-0.5B", version="3.0.0")
11
 
12
  # === CORS ===
13
  app.add_middleware(
@@ -45,17 +45,34 @@ print("✅ Qwen2-0.5B model ready!")
45
 
46
  def create_conversation_prompt(messages: list, is_force_mode: bool) -> str:
47
  """
48
- Create a simple conversation prompt with appropriate system instruction
49
  """
50
  if is_force_mode:
51
- system_prompt = "DIRECT ANSWER MODE: Give immediate, complete solutions. Provide working code and clear explanations. Don't ask questions - just solve the problem directly."
 
 
 
 
 
 
 
 
 
 
52
  else:
53
- system_prompt = "TEACHER MODE: You are a teacher. NEVER give direct answers or solutions. ALWAYS respond with guiding questions. Ask things like 'What do you think this does?' or 'How would you approach this?' Help them discover answers themselves through questions."
 
 
 
 
 
 
 
54
 
55
- # Build conversation
56
  conversation = f"System: {system_prompt}\n\n"
57
 
58
- # Add last 6 messages (3 pairs) for context
59
  recent_messages = messages[-6:] if len(messages) > 6 else messages
60
 
61
  for msg in recent_messages:
@@ -69,27 +86,79 @@ def create_conversation_prompt(messages: list, is_force_mode: bool) -> str:
69
  conversation += "Assistant:"
70
  return conversation
71
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72
  def generate_response(messages: list, is_force_mode: bool = False, max_tokens: int = 200, temperature: float = 0.7) -> str:
73
  """
74
- Generate response using the actual AI model
75
  """
76
  try:
77
- # Create conversation prompt
78
  prompt = create_conversation_prompt(messages, is_force_mode)
79
 
80
- print(f"🎯 Generating {'FORCE' if is_force_mode else 'MENTOR'} response")
81
  print(f"🔍 DEBUG: force_mode = {is_force_mode}")
82
- print(f"📝 System prompt: {prompt.split('Student:')[0][:100]}...")
 
 
 
 
 
 
 
 
 
 
 
 
83
 
84
  # Tokenize input
85
  inputs = tokenizer(prompt, return_tensors="pt", max_length=1024, truncation=True)
86
 
87
- # Generate response
88
  with torch.no_grad():
89
  outputs = model.generate(
90
  inputs.input_ids,
91
- max_new_tokens=max_tokens,
92
- temperature=temperature,
93
  do_sample=True,
94
  pad_token_id=tokenizer.eos_token_id,
95
  eos_token_id=tokenizer.eos_token_id,
@@ -103,42 +172,50 @@ def generate_response(messages: list, is_force_mode: bool = False, max_tokens: i
103
  # Extract only the new generated part
104
  response = full_response[len(prompt):].strip()
105
 
106
- # Clean up response
107
- response = response.replace("Student:", "").replace("Assistant:", "").strip()
 
 
 
 
 
 
108
 
109
- # Remove any system mentions
110
- if response.startswith("System:"):
111
- response = response.split("\n", 1)[-1].strip()
112
 
113
- print(f"✅ Generated response length: {len(response)}")
 
114
 
115
- if not response or len(response) < 10:
116
- # Stronger fallback responses
117
  if is_force_mode:
118
- return "Here's the direct answer: I need you to provide a more specific question so I can give you the exact solution you need."
119
  else:
120
- return "What do you think this code is trying to do? Can you trace through it step by step and tell me what you notice?"
121
 
122
- return response
123
 
124
  except Exception as e:
125
  print(f"❌ Generation error: {e}")
 
126
  if is_force_mode:
127
- return "I encountered an error. Please try rephrasing your question."
128
  else:
129
- return "I had trouble processing that. Can you tell me what you're trying to understand?"
130
 
131
  # === Routes ===
132
  @app.get("/")
133
  def root():
134
  return {
135
- "message": "🤖 Apollo AI Backend v3.0 - Qwen2-0.5B",
136
  "model": "Qwen/Qwen2-0.5B-Instruct with LoRA",
137
  "status": "ready",
138
  "modes": {
139
- "mentor": "Guides learning with questions",
140
- "force": "Provides direct answers"
141
- }
 
142
  }
143
 
144
  @app.get("/health")
@@ -146,7 +223,8 @@ def health():
146
  return {
147
  "status": "healthy",
148
  "model_loaded": True,
149
- "model_size": "0.5B"
 
150
  }
151
 
152
  @app.post("/v1/chat/completions")
@@ -173,8 +251,12 @@ async def chat_completions(request: Request):
173
  max_tokens = min(body.get("max_tokens", 200), 400)
174
  temperature = max(0.1, min(body.get("temperature", 0.7), 1.0))
175
 
 
176
  is_force_mode = body.get("force_mode", False)
177
 
 
 
 
178
  if not messages or not isinstance(messages, list):
179
  raise ValueError("Messages field is required and must be a list")
180
 
@@ -193,9 +275,11 @@ async def chat_completions(request: Request):
193
  )
194
 
195
  try:
196
- print(f"📥 Processing request in {'FORCE' if is_force_mode else 'MENTOR'} mode")
197
  print(f"📊 Total messages: {len(messages)}")
 
198
 
 
199
  response_content = generate_response(
200
  messages=messages,
201
  is_force_mode=is_force_mode,
@@ -203,11 +287,14 @@ async def chat_completions(request: Request):
203
  temperature=temperature
204
  )
205
 
 
 
 
206
  return {
207
  "id": f"chatcmpl-apollo-{hash(str(messages)) % 10000}",
208
  "object": "chat.completion",
209
  "created": int(torch.tensor(0).item()),
210
- "model": f"qwen2-0.5b-{'force' if is_force_mode else 'mentor'}",
211
  "choices": [
212
  {
213
  "index": 0,
@@ -223,7 +310,9 @@ async def chat_completions(request: Request):
223
  "completion_tokens": len(response_content),
224
  "total_tokens": len(str(messages)) + len(response_content)
225
  },
226
- "apollo_mode": "force" if is_force_mode else "mentor"
 
 
227
  }
228
 
229
  except Exception as e:
@@ -235,8 +324,9 @@ async def chat_completions(request: Request):
235
 
236
  if __name__ == "__main__":
237
  import uvicorn
238
- print("🚀 Starting Apollo AI Backend v3.0 - Simple & Clean...")
239
  print("🧠 Model: Qwen/Qwen2-0.5B-Instruct (500M parameters)")
240
- print("🎯 Mentor Mode: Asks guiding questions")
241
- print("⚡ Force Mode: Gives direct answers")
 
242
  uvicorn.run(app, host="0.0.0.0", port=7860)
 
7
  from starlette.middleware.cors import CORSMiddleware
8
 
9
  # === Setup FastAPI ===
10
+ app = FastAPI(title="Apollo AI Backend - Qwen2-0.5B", version="3.1.0-FIXED")
11
 
12
  # === CORS ===
13
  app.add_middleware(
 
45
 
46
  def create_conversation_prompt(messages: list, is_force_mode: bool) -> str:
47
  """
48
+ Create a conversation prompt with STRONG mode enforcement
49
  """
50
  if is_force_mode:
51
+ system_prompt = """FORCE MODE - DIRECT ANSWERS ONLY:
52
+ You MUST give direct, complete, factual answers. Do NOT ask questions. Provide exact solutions, working code, and clear explanations.
53
+
54
+ EXAMPLE FORCE RESPONSE:
55
+ Q: What does len() do in Python?
56
+ A: len() returns the number of items in an object. Examples:
57
+ - len([1,2,3]) returns 3
58
+ - len("hello") returns 5
59
+ - len({1,2,3}) returns 3
60
+
61
+ Always be direct and informative. Never ask "What do you think?" or similar questions."""
62
  else:
63
+ system_prompt = """MENTOR MODE - GUIDED LEARNING ONLY:
64
+ You are a programming teacher. You MUST guide students to discover answers themselves. NEVER give direct answers or complete solutions. ALWAYS respond with guiding questions and hints.
65
+
66
+ EXAMPLE MENTOR RESPONSE:
67
+ Q: What does len() do in Python?
68
+ A: Great question! What do you think might happen if you run len([1,2,3]) in Python? Can you guess what number it would return? Try it and see! What pattern do you notice?
69
+
70
+ Always ask questions to guide learning. Never give direct answers."""
71
 
72
+ # Build conversation with recent context
73
  conversation = f"System: {system_prompt}\n\n"
74
 
75
+ # Add last 6 messages (3 pairs) for context but prioritize mode compliance
76
  recent_messages = messages[-6:] if len(messages) > 6 else messages
77
 
78
  for msg in recent_messages:
 
86
  conversation += "Assistant:"
87
  return conversation
88
 
89
+ def validate_response_mode(response: str, is_force_mode: bool) -> str:
90
+ """
91
+ CRITICAL: Enforce mode compliance in responses
92
+ """
93
+ response = response.strip()
94
+
95
+ if is_force_mode:
96
+ # Force mode: Must be direct, no questions
97
+ has_questioning = any(phrase in response.lower() for phrase in [
98
+ "what do you think", "can you tell me", "what would happen",
99
+ "try it", "guess", "what pattern", "how do you", "what's your"
100
+ ])
101
+
102
+ if has_questioning or response.count("?") > 1:
103
+ # Convert to direct answer
104
+ print("🔧 Converting to direct answer for force mode")
105
+ direct_parts = []
106
+ for sentence in response.split("."):
107
+ if "?" not in sentence and len(sentence.strip()) > 10:
108
+ direct_parts.append(sentence.strip())
109
+
110
+ if direct_parts:
111
+ return ". ".join(direct_parts[:2]) + "."
112
+ else:
113
+ return "Here's the direct answer: " + response.split("?")[0].strip() + "."
114
+
115
+ else:
116
+ # Mentor mode: Must have questions and guidance
117
+ has_questions = "?" in response
118
+ has_guidance = any(phrase in response.lower() for phrase in [
119
+ "what do you think", "can you", "try", "what would", "how do you", "what pattern"
120
+ ])
121
+
122
+ if not has_questions and not has_guidance:
123
+ # Convert to guiding questions
124
+ print("🔧 Adding guiding questions for mentor mode")
125
+ return f"Interesting! {response} What do you think about this? Can you tell me what part makes most sense to you?"
126
+
127
+ return response
128
+
129
  def generate_response(messages: list, is_force_mode: bool = False, max_tokens: int = 200, temperature: float = 0.7) -> str:
130
  """
131
+ Generate response using the AI model with STRONG mode enforcement
132
  """
133
  try:
134
+ # Create conversation prompt with strong mode directives
135
  prompt = create_conversation_prompt(messages, is_force_mode)
136
 
137
+ print(f"🎯 Generating {'FORCE' if is_force_mode else 'MENTOR'} response with FIXED logic")
138
  print(f"🔍 DEBUG: force_mode = {is_force_mode}")
139
+ print(f"📝 System prompt preview: {prompt.split('Student:')[0][:150]}...")
140
+
141
+ # Adjust generation parameters based on mode
142
+ if is_force_mode:
143
+ # Force mode: Lower temperature for more focused, direct responses
144
+ generation_temp = 0.2
145
+ generation_tokens = min(max_tokens, 250)
146
+ else:
147
+ # Mentor mode: Slightly higher temperature for more varied questioning
148
+ generation_temp = 0.4
149
+ generation_tokens = min(max_tokens, 200)
150
+
151
+ print(f"🎛️ Using temperature: {generation_temp}, max_tokens: {generation_tokens}")
152
 
153
  # Tokenize input
154
  inputs = tokenizer(prompt, return_tensors="pt", max_length=1024, truncation=True)
155
 
156
+ # Generate response with mode-specific parameters
157
  with torch.no_grad():
158
  outputs = model.generate(
159
  inputs.input_ids,
160
+ max_new_tokens=generation_tokens,
161
+ temperature=generation_temp,
162
  do_sample=True,
163
  pad_token_id=tokenizer.eos_token_id,
164
  eos_token_id=tokenizer.eos_token_id,
 
172
  # Extract only the new generated part
173
  response = full_response[len(prompt):].strip()
174
 
175
+ # Clean up response - remove role markers
176
+ response = response.replace("Student:", "").replace("Assistant:", "").replace("System:", "").strip()
177
+
178
+ # Remove any remaining conversation artifacts
179
+ if "\n" in response:
180
+ response = response.split("\n")[0].strip()
181
+
182
+ print(f"✅ Raw generated response: {response[:100]}...")
183
 
184
+ # CRITICAL: Validate and enforce mode compliance
185
+ validated_response = validate_response_mode(response, is_force_mode)
 
186
 
187
+ print(f"✅ Final validated response length: {len(validated_response)}")
188
+ print(f"📝 Mode compliance: {'FORCE' if is_force_mode else 'MENTOR'}")
189
 
190
+ if not validated_response or len(validated_response) < 10:
191
+ # Strong fallback responses based on mode
192
  if is_force_mode:
193
+ return "len() returns the number of items in a sequence. For example: len([1,2,3]) returns 3, len('hello') returns 5."
194
  else:
195
+ return "What do you think len() might do? Try running len([1,2,3]) and see what happens! What number do you get?"
196
 
197
+ return validated_response
198
 
199
  except Exception as e:
200
  print(f"❌ Generation error: {e}")
201
+ # Mode-specific error fallbacks
202
  if is_force_mode:
203
+ return "I need you to provide a more specific question so I can give you the exact answer you need."
204
  else:
205
+ return "That's an interesting question! What do you think might be the answer? Can you break it down step by step?"
206
 
207
  # === Routes ===
208
  @app.get("/")
209
  def root():
210
  return {
211
+ "message": "🤖 Apollo AI Backend v3.1-FIXED - Qwen2-0.5B",
212
  "model": "Qwen/Qwen2-0.5B-Instruct with LoRA",
213
  "status": "ready",
214
  "modes": {
215
+ "mentor": "Guides learning with questions - FIXED",
216
+ "force": "Provides direct answers - FIXED"
217
+ },
218
+ "fixes": "Strong mode enforcement, response validation"
219
  }
220
 
221
  @app.get("/health")
 
223
  return {
224
  "status": "healthy",
225
  "model_loaded": True,
226
+ "model_size": "0.5B",
227
+ "version": "3.1-FIXED"
228
  }
229
 
230
  @app.post("/v1/chat/completions")
 
251
  max_tokens = min(body.get("max_tokens", 200), 400)
252
  temperature = max(0.1, min(body.get("temperature", 0.7), 1.0))
253
 
254
+ # CRITICAL: Get force mode flag
255
  is_force_mode = body.get("force_mode", False)
256
 
257
+ print(f"🚨 RECEIVED REQUEST - force_mode from body: {is_force_mode}")
258
+ print(f"🚨 Type of force_mode: {type(is_force_mode)}")
259
+
260
  if not messages or not isinstance(messages, list):
261
  raise ValueError("Messages field is required and must be a list")
262
 
 
275
  )
276
 
277
  try:
278
+ print(f"📥 Processing request in {'FORCE' if is_force_mode else 'MENTOR'} mode - FIXED")
279
  print(f"📊 Total messages: {len(messages)}")
280
+ print(f"🎯 CRITICAL - Mode flag received: {is_force_mode}")
281
 
282
+ # Generate response with FIXED mode handling
283
  response_content = generate_response(
284
  messages=messages,
285
  is_force_mode=is_force_mode,
 
287
  temperature=temperature
288
  )
289
 
290
+ print(f"✅ Generated response in {'FORCE' if is_force_mode else 'MENTOR'} mode")
291
+ print(f"📝 Response preview: {response_content[:100]}...")
292
+
293
  return {
294
  "id": f"chatcmpl-apollo-{hash(str(messages)) % 10000}",
295
  "object": "chat.completion",
296
  "created": int(torch.tensor(0).item()),
297
+ "model": f"qwen2-0.5b-{'force' if is_force_mode else 'mentor'}-fixed",
298
  "choices": [
299
  {
300
  "index": 0,
 
310
  "completion_tokens": len(response_content),
311
  "total_tokens": len(str(messages)) + len(response_content)
312
  },
313
+ "apollo_mode": "force" if is_force_mode else "mentor",
314
+ "mode_validation": "FIXED - Strong enforcement",
315
+ "model_optimizations": "qwen2_0.5B_fixed"
316
  }
317
 
318
  except Exception as e:
 
324
 
325
  if __name__ == "__main__":
326
  import uvicorn
327
+ print("🚀 Starting Apollo AI Backend v3.1-FIXED - Strong Mode Enforcement...")
328
  print("🧠 Model: Qwen/Qwen2-0.5B-Instruct (500M parameters)")
329
+ print("🎯 Mentor Mode: FIXED - Always asks guiding questions")
330
+ print("⚡ Force Mode: FIXED - Always gives direct answers")
331
+ print("🔧 New: Response validation and mode enforcement")
332
  uvicorn.run(app, host="0.0.0.0", port=7860)