Ais commited on
Commit
6b66b4f
·
verified ·
1 Parent(s): 973ee50

Update app/main.py

Browse files
Files changed (1) hide show
  1. app/main.py +92 -162
app/main.py CHANGED
@@ -7,7 +7,7 @@ from peft import PeftModel
7
  from starlette.middleware.cors import CORSMiddleware
8
 
9
  # === Setup FastAPI ===
10
- app = FastAPI(title="Apollo AI Backend - Qwen2-0.5B", version="4.0.0-TRULY-FIXED")
11
 
12
  # === CORS ===
13
  app.add_middleware(
@@ -24,12 +24,12 @@ BASE_MODEL = "Qwen/Qwen2-0.5B-Instruct"
24
  ADAPTER_PATH = "adapter"
25
 
26
  # === Load Model ===
27
- print("🔧 Loading tokenizer for Qwen2-0.5B...")
28
  tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, trust_remote_code=True)
29
  if tokenizer.pad_token is None:
30
  tokenizer.pad_token = tokenizer.eos_token
31
 
32
- print("🧠 Loading Qwen2-0.5B base model...")
33
  base_model = AutoModelForCausalLM.from_pretrained(
34
  BASE_MODEL,
35
  trust_remote_code=True,
@@ -37,240 +37,170 @@ base_model = AutoModelForCausalLM.from_pretrained(
37
  device_map="cpu"
38
  )
39
 
40
- print("🔗 Applying LoRA adapter to Qwen2-0.5B...")
41
  model = PeftModel.from_pretrained(base_model, ADAPTER_PATH)
42
  model.eval()
 
43
 
44
- print("✅ Qwen2-0.5B model ready!")
45
-
46
- def create_conversation_prompt(messages: list, is_force_mode: bool) -> str:
47
- """Create a conversation prompt with clear mode instructions"""
48
 
49
- if is_force_mode:
50
- system_prompt = """You are a helpful programming assistant. Give direct, complete answers with examples. Do not ask questions back to the user. Provide clear explanations and working code when relevant.
51
- When asked about Python functions, provide:
52
- 1. What the function does
53
- 2. Clear examples with output
54
- 3. Common use cases
55
- Be direct and informative."""
56
  else:
57
- system_prompt = """You are a programming teacher focused on helping students learn through discovery. Guide students with questions and hints rather than giving direct answers.
58
- When asked about concepts:
59
- 1. Ask what they think might happen
60
- 2. Encourage them to try things out
61
- 3. Guide them to discover patterns
62
- 4. Ask follow-up questions to deepen understanding
63
- Help them learn by thinking, not by giving answers directly."""
64
 
65
  # Build conversation
66
- conversation = f"<|im_start|>system\n{system_prompt}<|im_end|>\n"
67
 
68
- # Add conversation history (last 4 messages for context)
69
- recent_messages = messages[-4:] if len(messages) > 4 else messages
70
 
71
  for msg in recent_messages:
72
- role = msg.get("role", "")
73
  content = msg.get("content", "")
74
- conversation += f"<|im_start|>{role}\n{content}<|im_end|>\n"
75
 
76
- conversation += "<|im_start|>assistant\n"
77
- return conversation
78
 
79
- def generate_response(messages: list, is_force_mode: bool = False, max_tokens: int = 200, temperature: float = 0.7) -> str:
80
- """Generate response using the AI model"""
81
  try:
82
- # Create conversation prompt
83
- prompt = create_conversation_prompt(messages, is_force_mode)
84
-
85
- print(f"🎯 Generating {'FORCE (Direct)' if is_force_mode else 'MENTOR (Questions)'} response")
86
- print(f"🔍 Mode flag: {is_force_mode}")
87
-
88
- # Adjust parameters based on mode
89
- if is_force_mode:
90
- generation_temp = 0.3 # More focused for direct answers
91
- generation_tokens = min(max_tokens, 300)
92
- else:
93
- generation_temp = 0.5 # More creative for questions
94
- generation_tokens = min(max_tokens, 250)
95
-
96
- # Tokenize input
97
- inputs = tokenizer(prompt, return_tensors="pt", max_length=1500, truncation=True)
98
 
99
- # Generate response
100
  with torch.no_grad():
101
  outputs = model.generate(
102
  inputs.input_ids,
103
- max_new_tokens=generation_tokens,
104
- temperature=generation_temp,
105
  do_sample=True,
106
  pad_token_id=tokenizer.eos_token_id,
107
  eos_token_id=tokenizer.eos_token_id,
108
  top_p=0.9,
109
- repetition_penalty=1.1,
110
- no_repeat_ngram_size=3
111
  )
112
 
113
- # Decode response
114
- full_response = tokenizer.decode(outputs[0], skip_special_tokens=True)
115
 
116
- # Extract only the new generated part
117
- response = full_response[len(prompt):].strip()
118
 
119
- # Clean up response
120
  response = response.replace("<|im_end|>", "").strip()
121
 
122
- # Remove conversation artifacts
123
  lines = response.split('\n')
124
  clean_lines = []
125
  for line in lines:
126
  line = line.strip()
127
- if not line.startswith(('<|im_start|>', '<|im_end|>', 'system:', 'user:', 'assistant:')):
128
  clean_lines.append(line)
129
 
130
- response = '\n'.join(clean_lines).strip()
131
 
132
- # Take first paragraph if too long
133
- if len(response) > max_tokens * 4:
134
- paragraphs = response.split('\n\n')
135
- response = paragraphs[0] if paragraphs else response[:max_tokens * 4]
136
-
137
- print(f"✅ Generated response: {response[:100]}...")
138
-
139
- # Simple validation - no template injection
140
- if not response or len(response) < 10:
141
- if is_force_mode:
142
- return "I need more specific information to provide a direct answer. Could you clarify your question?"
143
  else:
144
- return "That's a great question to explore! What do you think might be the answer? Try experimenting and see what you discover!"
 
 
 
 
 
 
145
 
146
- return response
 
147
 
148
  except Exception as e:
149
- print(f"❌ Generation error: {e}")
150
- if is_force_mode:
151
- return "I encountered an error generating a direct response. Please try rephrasing your question."
152
- else:
153
- return "Interesting challenge! What approach do you think might work here? Let's explore this together."
154
 
155
  # === Routes ===
156
  @app.get("/")
157
  def root():
158
  return {
159
- "message": "🤖 Apollo AI Backend v4.0-TRULY-FIXED - Qwen2-0.5B",
160
- "model": "Qwen/Qwen2-0.5B-Instruct with LoRA",
161
  "status": "ready",
162
- "modes": {
163
- "mentor": "Guides learning with questions - REALLY FIXED",
164
- "force": "Provides direct answers - REALLY FIXED"
165
- },
166
- "fixes": "Removed all template responses, pure AI generation"
167
  }
168
 
169
  @app.get("/health")
170
  def health():
171
- return {
172
- "status": "healthy",
173
- "model_loaded": True,
174
- "model_size": "0.5B",
175
- "version": "4.0-TRULY-FIXED"
176
- }
177
 
178
  @app.post("/v1/chat/completions")
179
  async def chat_completions(request: Request):
180
- # Validate API key
181
  auth_header = request.headers.get("Authorization", "")
182
  if not auth_header.startswith("Bearer "):
183
- return JSONResponse(
184
- status_code=401,
185
- content={"error": "Missing or invalid Authorization header"}
186
- )
187
 
188
  token = auth_header.replace("Bearer ", "").strip()
189
  if token != API_KEY:
190
- return JSONResponse(
191
- status_code=401,
192
- content={"error": "Invalid API key"}
193
- )
194
 
195
- # Parse request body
196
  try:
197
  body = await request.json()
198
  messages = body.get("messages", [])
199
- max_tokens = min(body.get("max_tokens", 200), 400)
200
- temperature = max(0.1, min(body.get("temperature", 0.7), 1.0))
201
-
202
- # Get force mode flag
203
- is_force_mode = body.get("force_mode", False)
204
 
205
- print(f"🚨 REQUEST RECEIVED - force_mode: {is_force_mode}")
206
- print(f"📝 Last user message: {messages[-1].get('content', '') if messages else 'None'}")
207
 
208
- if not messages or not isinstance(messages, list):
209
- raise ValueError("Messages field is required and must be a list")
210
 
211
  except Exception as e:
212
- return JSONResponse(
213
- status_code=400,
214
- content={"error": f"Invalid request body: {str(e)}"}
215
- )
216
-
217
- # Validate messages
218
- for i, msg in enumerate(messages):
219
- if not isinstance(msg, dict) or "role" not in msg or "content" not in msg:
220
- return JSONResponse(
221
- status_code=400,
222
- content={"error": f"Invalid message format at index {i}"}
223
- )
224
 
225
  try:
226
- print(f"📥 Processing in {'FORCE (Direct Answer)' if is_force_mode else 'MENTOR (Guiding Questions)'} mode")
227
-
228
- # Generate response - NO POST-PROCESSING
229
- response_content = generate_response(
230
  messages=messages,
231
- is_force_mode=is_force_mode,
232
- max_tokens=max_tokens,
233
- temperature=temperature
234
  )
235
 
236
- print(f"✅ Pure AI response generated: {response_content[:150]}...")
237
-
238
  return {
239
- "id": f"chatcmpl-apollo-{hash(str(messages)) % 10000}",
240
- "object": "chat.completion",
241
- "created": int(torch.tensor(0).item()),
242
- "model": f"qwen2-0.5b-{'force' if is_force_mode else 'mentor'}-truly-fixed",
243
- "choices": [
244
- {
245
- "index": 0,
246
- "message": {
247
- "role": "assistant",
248
- "content": response_content
249
- },
250
- "finish_reason": "stop"
251
- }
252
- ],
253
- "usage": {
254
- "prompt_tokens": len(str(messages)),
255
- "completion_tokens": len(response_content),
256
- "total_tokens": len(str(messages)) + len(response_content)
257
- },
258
- "apollo_mode": "force_direct" if is_force_mode else "mentor_questions",
259
- "pure_ai_response": True
260
  }
261
 
262
  except Exception as e:
263
- print(f"❌ Chat completion error: {e}")
264
- return JSONResponse(
265
- status_code=500,
266
- content={"error": f"Internal server error: {str(e)}"}
267
- )
268
 
269
  if __name__ == "__main__":
270
  import uvicorn
271
- print("🚀 Starting Apollo AI Backend v4.0-TRULY-FIXED")
272
- print("🧠 Model: Qwen/Qwen2-0.5B-Instruct (500M parameters)")
273
- print("🎯 Mentor Mode: Pure AI questions and guidance")
274
- print("⚡ Force Mode: Pure AI direct answers")
275
- print("🚫 NO MORE TEMPLATES - Pure AI responses only")
276
  uvicorn.run(app, host="0.0.0.0", port=7860)
 
7
  from starlette.middleware.cors import CORSMiddleware
8
 
9
  # === Setup FastAPI ===
10
+ app = FastAPI(title="Apollo AI Backend - Fixed", version="5.0.0")
11
 
12
  # === CORS ===
13
  app.add_middleware(
 
24
  ADAPTER_PATH = "adapter"
25
 
26
  # === Load Model ===
27
+ print("🔧 Loading tokenizer...")
28
  tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, trust_remote_code=True)
29
  if tokenizer.pad_token is None:
30
  tokenizer.pad_token = tokenizer.eos_token
31
 
32
+ print("🧠 Loading base model...")
33
  base_model = AutoModelForCausalLM.from_pretrained(
34
  BASE_MODEL,
35
  trust_remote_code=True,
 
37
  device_map="cpu"
38
  )
39
 
40
+ print("🔗 Loading adapter...")
41
  model = PeftModel.from_pretrained(base_model, ADAPTER_PATH)
42
  model.eval()
43
+ print("✅ Model ready!")
44
 
45
+ def build_simple_prompt(messages: list, force_mode: bool = False) -> str:
46
+ """Create a clean, simple prompt"""
 
 
47
 
48
+ # Simple system prompts
49
+ if force_mode:
50
+ system = "You are a helpful coding assistant. Give clear, direct answers with examples when asked."
 
 
 
 
51
  else:
52
+ system = "You are a coding teacher. Help students learn by asking guiding questions instead of giving direct answers."
 
 
 
 
 
 
53
 
54
  # Build conversation
55
+ prompt = f"<|im_start|>system\n{system}<|im_end|>\n"
56
 
57
+ # Add only the last few messages for context
58
+ recent_messages = messages[-3:] if len(messages) > 3 else messages
59
 
60
  for msg in recent_messages:
61
+ role = msg.get("role", "user")
62
  content = msg.get("content", "")
63
+ prompt += f"<|im_start|>{role}\n{content}<|im_end|>\n"
64
 
65
+ prompt += "<|im_start|>assistant\n"
66
+ return prompt
67
 
68
+ def generate_clean_response(messages: list, force_mode: bool = False, max_tokens: int = 200) -> str:
69
+ """Generate a clean response"""
70
  try:
71
+ # Build prompt
72
+ prompt = build_simple_prompt(messages, force_mode)
73
+
74
+ print(f"🎯 Mode: {'FORCE' if force_mode else 'MENTOR'}")
75
+ print(f"📝 Prompt length: {len(prompt)} chars")
76
+
77
+ # Tokenize
78
+ inputs = tokenizer(
79
+ prompt,
80
+ return_tensors="pt",
81
+ max_length=1000,
82
+ truncation=True
83
+ )
 
 
 
84
 
85
+ # Generate
86
  with torch.no_grad():
87
  outputs = model.generate(
88
  inputs.input_ids,
89
+ max_new_tokens=max_tokens,
90
+ temperature=0.4 if force_mode else 0.6,
91
  do_sample=True,
92
  pad_token_id=tokenizer.eos_token_id,
93
  eos_token_id=tokenizer.eos_token_id,
94
  top_p=0.9,
95
+ repetition_penalty=1.1
 
96
  )
97
 
98
+ # Decode
99
+ full_output = tokenizer.decode(outputs[0], skip_special_tokens=True)
100
 
101
+ # Extract only the assistant's response
102
+ response = full_output[len(prompt):].strip()
103
 
104
+ # Clean up
105
  response = response.replace("<|im_end|>", "").strip()
106
 
107
+ # Remove any leftover formatting
108
  lines = response.split('\n')
109
  clean_lines = []
110
  for line in lines:
111
  line = line.strip()
112
+ if line and not line.startswith(('<|im_start|>', '<|im_end|>')):
113
  clean_lines.append(line)
114
 
115
+ final_response = '\n'.join(clean_lines).strip()
116
 
117
+ # Validate response
118
+ if len(final_response) < 5:
119
+ if force_mode:
120
+ return "I need more details to give you a specific answer."
 
 
 
 
 
 
 
121
  else:
122
+ return "What do you think the answer might be? Try exploring it step by step."
123
+
124
+ # Truncate if too long
125
+ if len(final_response) > max_tokens * 5:
126
+ sentences = final_response.split('. ')
127
+ truncated = '. '.join(sentences[:3]) + '.' if len(sentences) > 3 else final_response
128
+ final_response = truncated
129
 
130
+ print(f"✅ Response: {final_response[:100]}...")
131
+ return final_response
132
 
133
  except Exception as e:
134
+ print(f"❌ Error: {e}")
135
+ return "I encountered an issue. Could you try rephrasing your question?"
 
 
 
136
 
137
  # === Routes ===
138
  @app.get("/")
139
  def root():
140
  return {
141
+ "message": "🤖 Apollo AI Backend - Fixed",
 
142
  "status": "ready",
143
+ "version": "5.0.0"
 
 
 
 
144
  }
145
 
146
  @app.get("/health")
147
  def health():
148
+ return {"status": "healthy", "model_loaded": True}
 
 
 
 
 
149
 
150
  @app.post("/v1/chat/completions")
151
  async def chat_completions(request: Request):
152
+ # Auth check
153
  auth_header = request.headers.get("Authorization", "")
154
  if not auth_header.startswith("Bearer "):
155
+ return JSONResponse(status_code=401, content={"error": "Missing Authorization"})
 
 
 
156
 
157
  token = auth_header.replace("Bearer ", "").strip()
158
  if token != API_KEY:
159
+ return JSONResponse(status_code=401, content={"error": "Invalid API key"})
 
 
 
160
 
161
+ # Parse request
162
  try:
163
  body = await request.json()
164
  messages = body.get("messages", [])
165
+ max_tokens = min(body.get("max_tokens", 200), 300)
166
+ force_mode = body.get("force_mode", False)
 
 
 
167
 
168
+ print(f"🔥 Request: force_mode={force_mode}, messages={len(messages)}")
 
169
 
170
+ if not messages:
171
+ raise ValueError("Messages required")
172
 
173
  except Exception as e:
174
+ return JSONResponse(status_code=400, content={"error": str(e)})
 
 
 
 
 
 
 
 
 
 
 
175
 
176
  try:
177
+ # Generate response
178
+ response_content = generate_clean_response(
 
 
179
  messages=messages,
180
+ force_mode=force_mode,
181
+ max_tokens=max_tokens
 
182
  )
183
 
 
 
184
  return {
185
+ "id": f"chatcmpl-{hash(str(messages)) % 10000}",
186
+ "object": "chat.completion",
187
+ "model": f"qwen2-{'force' if force_mode else 'mentor'}",
188
+ "choices": [{
189
+ "index": 0,
190
+ "message": {
191
+ "role": "assistant",
192
+ "content": response_content
193
+ },
194
+ "finish_reason": "stop"
195
+ }],
196
+ "apollo_mode": "force" if force_mode else "mentor"
 
 
 
 
 
 
 
 
 
197
  }
198
 
199
  except Exception as e:
200
+ print(f"❌ Chat error: {e}")
201
+ return JSONResponse(status_code=500, content={"error": str(e)})
 
 
 
202
 
203
  if __name__ == "__main__":
204
  import uvicorn
205
+ print("🚀 Starting Apollo AI Backend v5.0 - FIXED")
 
 
 
 
206
  uvicorn.run(app, host="0.0.0.0", port=7860)