Ais
commited on
Update app/main.py
Browse files- app/main.py +92 -162
app/main.py
CHANGED
@@ -7,7 +7,7 @@ from peft import PeftModel
|
|
7 |
from starlette.middleware.cors import CORSMiddleware
|
8 |
|
9 |
# === Setup FastAPI ===
|
10 |
-
app = FastAPI(title="Apollo AI Backend -
|
11 |
|
12 |
# === CORS ===
|
13 |
app.add_middleware(
|
@@ -24,12 +24,12 @@ BASE_MODEL = "Qwen/Qwen2-0.5B-Instruct"
|
|
24 |
ADAPTER_PATH = "adapter"
|
25 |
|
26 |
# === Load Model ===
|
27 |
-
print("🔧 Loading tokenizer
|
28 |
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, trust_remote_code=True)
|
29 |
if tokenizer.pad_token is None:
|
30 |
tokenizer.pad_token = tokenizer.eos_token
|
31 |
|
32 |
-
print("🧠 Loading
|
33 |
base_model = AutoModelForCausalLM.from_pretrained(
|
34 |
BASE_MODEL,
|
35 |
trust_remote_code=True,
|
@@ -37,240 +37,170 @@ base_model = AutoModelForCausalLM.from_pretrained(
|
|
37 |
device_map="cpu"
|
38 |
)
|
39 |
|
40 |
-
print("🔗
|
41 |
model = PeftModel.from_pretrained(base_model, ADAPTER_PATH)
|
42 |
model.eval()
|
|
|
43 |
|
44 |
-
|
45 |
-
|
46 |
-
def create_conversation_prompt(messages: list, is_force_mode: bool) -> str:
|
47 |
-
"""Create a conversation prompt with clear mode instructions"""
|
48 |
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
1. What the function does
|
53 |
-
2. Clear examples with output
|
54 |
-
3. Common use cases
|
55 |
-
Be direct and informative."""
|
56 |
else:
|
57 |
-
|
58 |
-
When asked about concepts:
|
59 |
-
1. Ask what they think might happen
|
60 |
-
2. Encourage them to try things out
|
61 |
-
3. Guide them to discover patterns
|
62 |
-
4. Ask follow-up questions to deepen understanding
|
63 |
-
Help them learn by thinking, not by giving answers directly."""
|
64 |
|
65 |
# Build conversation
|
66 |
-
|
67 |
|
68 |
-
# Add
|
69 |
-
recent_messages = messages[-
|
70 |
|
71 |
for msg in recent_messages:
|
72 |
-
role = msg.get("role", "")
|
73 |
content = msg.get("content", "")
|
74 |
-
|
75 |
|
76 |
-
|
77 |
-
return
|
78 |
|
79 |
-
def
|
80 |
-
"""Generate
|
81 |
try:
|
82 |
-
#
|
83 |
-
prompt =
|
84 |
-
|
85 |
-
print(f"🎯
|
86 |
-
print(f"
|
87 |
-
|
88 |
-
#
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
# Tokenize input
|
97 |
-
inputs = tokenizer(prompt, return_tensors="pt", max_length=1500, truncation=True)
|
98 |
|
99 |
-
# Generate
|
100 |
with torch.no_grad():
|
101 |
outputs = model.generate(
|
102 |
inputs.input_ids,
|
103 |
-
max_new_tokens=
|
104 |
-
temperature=
|
105 |
do_sample=True,
|
106 |
pad_token_id=tokenizer.eos_token_id,
|
107 |
eos_token_id=tokenizer.eos_token_id,
|
108 |
top_p=0.9,
|
109 |
-
repetition_penalty=1.1
|
110 |
-
no_repeat_ngram_size=3
|
111 |
)
|
112 |
|
113 |
-
# Decode
|
114 |
-
|
115 |
|
116 |
-
# Extract only the
|
117 |
-
response =
|
118 |
|
119 |
-
# Clean up
|
120 |
response = response.replace("<|im_end|>", "").strip()
|
121 |
|
122 |
-
# Remove
|
123 |
lines = response.split('\n')
|
124 |
clean_lines = []
|
125 |
for line in lines:
|
126 |
line = line.strip()
|
127 |
-
if not line.startswith(('<|im_start|>', '<|im_end|>'
|
128 |
clean_lines.append(line)
|
129 |
|
130 |
-
|
131 |
|
132 |
-
#
|
133 |
-
if len(
|
134 |
-
|
135 |
-
|
136 |
-
|
137 |
-
print(f"✅ Generated response: {response[:100]}...")
|
138 |
-
|
139 |
-
# Simple validation - no template injection
|
140 |
-
if not response or len(response) < 10:
|
141 |
-
if is_force_mode:
|
142 |
-
return "I need more specific information to provide a direct answer. Could you clarify your question?"
|
143 |
else:
|
144 |
-
return "
|
|
|
|
|
|
|
|
|
|
|
|
|
145 |
|
146 |
-
|
|
|
147 |
|
148 |
except Exception as e:
|
149 |
-
print(f"❌
|
150 |
-
|
151 |
-
return "I encountered an error generating a direct response. Please try rephrasing your question."
|
152 |
-
else:
|
153 |
-
return "Interesting challenge! What approach do you think might work here? Let's explore this together."
|
154 |
|
155 |
# === Routes ===
|
156 |
@app.get("/")
|
157 |
def root():
|
158 |
return {
|
159 |
-
"message": "🤖 Apollo AI Backend
|
160 |
-
"model": "Qwen/Qwen2-0.5B-Instruct with LoRA",
|
161 |
"status": "ready",
|
162 |
-
"
|
163 |
-
"mentor": "Guides learning with questions - REALLY FIXED",
|
164 |
-
"force": "Provides direct answers - REALLY FIXED"
|
165 |
-
},
|
166 |
-
"fixes": "Removed all template responses, pure AI generation"
|
167 |
}
|
168 |
|
169 |
@app.get("/health")
|
170 |
def health():
|
171 |
-
return {
|
172 |
-
"status": "healthy",
|
173 |
-
"model_loaded": True,
|
174 |
-
"model_size": "0.5B",
|
175 |
-
"version": "4.0-TRULY-FIXED"
|
176 |
-
}
|
177 |
|
178 |
@app.post("/v1/chat/completions")
|
179 |
async def chat_completions(request: Request):
|
180 |
-
#
|
181 |
auth_header = request.headers.get("Authorization", "")
|
182 |
if not auth_header.startswith("Bearer "):
|
183 |
-
return JSONResponse(
|
184 |
-
status_code=401,
|
185 |
-
content={"error": "Missing or invalid Authorization header"}
|
186 |
-
)
|
187 |
|
188 |
token = auth_header.replace("Bearer ", "").strip()
|
189 |
if token != API_KEY:
|
190 |
-
return JSONResponse(
|
191 |
-
status_code=401,
|
192 |
-
content={"error": "Invalid API key"}
|
193 |
-
)
|
194 |
|
195 |
-
# Parse request
|
196 |
try:
|
197 |
body = await request.json()
|
198 |
messages = body.get("messages", [])
|
199 |
-
max_tokens = min(body.get("max_tokens", 200),
|
200 |
-
|
201 |
-
|
202 |
-
# Get force mode flag
|
203 |
-
is_force_mode = body.get("force_mode", False)
|
204 |
|
205 |
-
print(f"
|
206 |
-
print(f"📝 Last user message: {messages[-1].get('content', '') if messages else 'None'}")
|
207 |
|
208 |
-
if not messages
|
209 |
-
raise ValueError("Messages
|
210 |
|
211 |
except Exception as e:
|
212 |
-
return JSONResponse(
|
213 |
-
status_code=400,
|
214 |
-
content={"error": f"Invalid request body: {str(e)}"}
|
215 |
-
)
|
216 |
-
|
217 |
-
# Validate messages
|
218 |
-
for i, msg in enumerate(messages):
|
219 |
-
if not isinstance(msg, dict) or "role" not in msg or "content" not in msg:
|
220 |
-
return JSONResponse(
|
221 |
-
status_code=400,
|
222 |
-
content={"error": f"Invalid message format at index {i}"}
|
223 |
-
)
|
224 |
|
225 |
try:
|
226 |
-
|
227 |
-
|
228 |
-
# Generate response - NO POST-PROCESSING
|
229 |
-
response_content = generate_response(
|
230 |
messages=messages,
|
231 |
-
|
232 |
-
max_tokens=max_tokens
|
233 |
-
temperature=temperature
|
234 |
)
|
235 |
|
236 |
-
print(f"✅ Pure AI response generated: {response_content[:150]}...")
|
237 |
-
|
238 |
return {
|
239 |
-
"id": f"chatcmpl-
|
240 |
-
"object": "chat.completion",
|
241 |
-
"
|
242 |
-
"
|
243 |
-
|
244 |
-
{
|
245 |
-
"
|
246 |
-
"
|
247 |
-
|
248 |
-
|
249 |
-
|
250 |
-
|
251 |
-
}
|
252 |
-
],
|
253 |
-
"usage": {
|
254 |
-
"prompt_tokens": len(str(messages)),
|
255 |
-
"completion_tokens": len(response_content),
|
256 |
-
"total_tokens": len(str(messages)) + len(response_content)
|
257 |
-
},
|
258 |
-
"apollo_mode": "force_direct" if is_force_mode else "mentor_questions",
|
259 |
-
"pure_ai_response": True
|
260 |
}
|
261 |
|
262 |
except Exception as e:
|
263 |
-
print(f"❌ Chat
|
264 |
-
return JSONResponse(
|
265 |
-
status_code=500,
|
266 |
-
content={"error": f"Internal server error: {str(e)}"}
|
267 |
-
)
|
268 |
|
269 |
if __name__ == "__main__":
|
270 |
import uvicorn
|
271 |
-
print("🚀 Starting Apollo AI Backend
|
272 |
-
print("🧠 Model: Qwen/Qwen2-0.5B-Instruct (500M parameters)")
|
273 |
-
print("🎯 Mentor Mode: Pure AI questions and guidance")
|
274 |
-
print("⚡ Force Mode: Pure AI direct answers")
|
275 |
-
print("🚫 NO MORE TEMPLATES - Pure AI responses only")
|
276 |
uvicorn.run(app, host="0.0.0.0", port=7860)
|
|
|
7 |
from starlette.middleware.cors import CORSMiddleware
|
8 |
|
9 |
# === Setup FastAPI ===
|
10 |
+
app = FastAPI(title="Apollo AI Backend - Fixed", version="5.0.0")
|
11 |
|
12 |
# === CORS ===
|
13 |
app.add_middleware(
|
|
|
24 |
ADAPTER_PATH = "adapter"
|
25 |
|
26 |
# === Load Model ===
|
27 |
+
print("🔧 Loading tokenizer...")
|
28 |
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, trust_remote_code=True)
|
29 |
if tokenizer.pad_token is None:
|
30 |
tokenizer.pad_token = tokenizer.eos_token
|
31 |
|
32 |
+
print("🧠 Loading base model...")
|
33 |
base_model = AutoModelForCausalLM.from_pretrained(
|
34 |
BASE_MODEL,
|
35 |
trust_remote_code=True,
|
|
|
37 |
device_map="cpu"
|
38 |
)
|
39 |
|
40 |
+
print("🔗 Loading adapter...")
|
41 |
model = PeftModel.from_pretrained(base_model, ADAPTER_PATH)
|
42 |
model.eval()
|
43 |
+
print("✅ Model ready!")
|
44 |
|
45 |
+
def build_simple_prompt(messages: list, force_mode: bool = False) -> str:
|
46 |
+
"""Create a clean, simple prompt"""
|
|
|
|
|
47 |
|
48 |
+
# Simple system prompts
|
49 |
+
if force_mode:
|
50 |
+
system = "You are a helpful coding assistant. Give clear, direct answers with examples when asked."
|
|
|
|
|
|
|
|
|
51 |
else:
|
52 |
+
system = "You are a coding teacher. Help students learn by asking guiding questions instead of giving direct answers."
|
|
|
|
|
|
|
|
|
|
|
|
|
53 |
|
54 |
# Build conversation
|
55 |
+
prompt = f"<|im_start|>system\n{system}<|im_end|>\n"
|
56 |
|
57 |
+
# Add only the last few messages for context
|
58 |
+
recent_messages = messages[-3:] if len(messages) > 3 else messages
|
59 |
|
60 |
for msg in recent_messages:
|
61 |
+
role = msg.get("role", "user")
|
62 |
content = msg.get("content", "")
|
63 |
+
prompt += f"<|im_start|>{role}\n{content}<|im_end|>\n"
|
64 |
|
65 |
+
prompt += "<|im_start|>assistant\n"
|
66 |
+
return prompt
|
67 |
|
68 |
+
def generate_clean_response(messages: list, force_mode: bool = False, max_tokens: int = 200) -> str:
|
69 |
+
"""Generate a clean response"""
|
70 |
try:
|
71 |
+
# Build prompt
|
72 |
+
prompt = build_simple_prompt(messages, force_mode)
|
73 |
+
|
74 |
+
print(f"🎯 Mode: {'FORCE' if force_mode else 'MENTOR'}")
|
75 |
+
print(f"📝 Prompt length: {len(prompt)} chars")
|
76 |
+
|
77 |
+
# Tokenize
|
78 |
+
inputs = tokenizer(
|
79 |
+
prompt,
|
80 |
+
return_tensors="pt",
|
81 |
+
max_length=1000,
|
82 |
+
truncation=True
|
83 |
+
)
|
|
|
|
|
|
|
84 |
|
85 |
+
# Generate
|
86 |
with torch.no_grad():
|
87 |
outputs = model.generate(
|
88 |
inputs.input_ids,
|
89 |
+
max_new_tokens=max_tokens,
|
90 |
+
temperature=0.4 if force_mode else 0.6,
|
91 |
do_sample=True,
|
92 |
pad_token_id=tokenizer.eos_token_id,
|
93 |
eos_token_id=tokenizer.eos_token_id,
|
94 |
top_p=0.9,
|
95 |
+
repetition_penalty=1.1
|
|
|
96 |
)
|
97 |
|
98 |
+
# Decode
|
99 |
+
full_output = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
100 |
|
101 |
+
# Extract only the assistant's response
|
102 |
+
response = full_output[len(prompt):].strip()
|
103 |
|
104 |
+
# Clean up
|
105 |
response = response.replace("<|im_end|>", "").strip()
|
106 |
|
107 |
+
# Remove any leftover formatting
|
108 |
lines = response.split('\n')
|
109 |
clean_lines = []
|
110 |
for line in lines:
|
111 |
line = line.strip()
|
112 |
+
if line and not line.startswith(('<|im_start|>', '<|im_end|>')):
|
113 |
clean_lines.append(line)
|
114 |
|
115 |
+
final_response = '\n'.join(clean_lines).strip()
|
116 |
|
117 |
+
# Validate response
|
118 |
+
if len(final_response) < 5:
|
119 |
+
if force_mode:
|
120 |
+
return "I need more details to give you a specific answer."
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
121 |
else:
|
122 |
+
return "What do you think the answer might be? Try exploring it step by step."
|
123 |
+
|
124 |
+
# Truncate if too long
|
125 |
+
if len(final_response) > max_tokens * 5:
|
126 |
+
sentences = final_response.split('. ')
|
127 |
+
truncated = '. '.join(sentences[:3]) + '.' if len(sentences) > 3 else final_response
|
128 |
+
final_response = truncated
|
129 |
|
130 |
+
print(f"✅ Response: {final_response[:100]}...")
|
131 |
+
return final_response
|
132 |
|
133 |
except Exception as e:
|
134 |
+
print(f"❌ Error: {e}")
|
135 |
+
return "I encountered an issue. Could you try rephrasing your question?"
|
|
|
|
|
|
|
136 |
|
137 |
# === Routes ===
|
138 |
@app.get("/")
|
139 |
def root():
|
140 |
return {
|
141 |
+
"message": "🤖 Apollo AI Backend - Fixed",
|
|
|
142 |
"status": "ready",
|
143 |
+
"version": "5.0.0"
|
|
|
|
|
|
|
|
|
144 |
}
|
145 |
|
146 |
@app.get("/health")
|
147 |
def health():
|
148 |
+
return {"status": "healthy", "model_loaded": True}
|
|
|
|
|
|
|
|
|
|
|
149 |
|
150 |
@app.post("/v1/chat/completions")
|
151 |
async def chat_completions(request: Request):
|
152 |
+
# Auth check
|
153 |
auth_header = request.headers.get("Authorization", "")
|
154 |
if not auth_header.startswith("Bearer "):
|
155 |
+
return JSONResponse(status_code=401, content={"error": "Missing Authorization"})
|
|
|
|
|
|
|
156 |
|
157 |
token = auth_header.replace("Bearer ", "").strip()
|
158 |
if token != API_KEY:
|
159 |
+
return JSONResponse(status_code=401, content={"error": "Invalid API key"})
|
|
|
|
|
|
|
160 |
|
161 |
+
# Parse request
|
162 |
try:
|
163 |
body = await request.json()
|
164 |
messages = body.get("messages", [])
|
165 |
+
max_tokens = min(body.get("max_tokens", 200), 300)
|
166 |
+
force_mode = body.get("force_mode", False)
|
|
|
|
|
|
|
167 |
|
168 |
+
print(f"🔥 Request: force_mode={force_mode}, messages={len(messages)}")
|
|
|
169 |
|
170 |
+
if not messages:
|
171 |
+
raise ValueError("Messages required")
|
172 |
|
173 |
except Exception as e:
|
174 |
+
return JSONResponse(status_code=400, content={"error": str(e)})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
175 |
|
176 |
try:
|
177 |
+
# Generate response
|
178 |
+
response_content = generate_clean_response(
|
|
|
|
|
179 |
messages=messages,
|
180 |
+
force_mode=force_mode,
|
181 |
+
max_tokens=max_tokens
|
|
|
182 |
)
|
183 |
|
|
|
|
|
184 |
return {
|
185 |
+
"id": f"chatcmpl-{hash(str(messages)) % 10000}",
|
186 |
+
"object": "chat.completion",
|
187 |
+
"model": f"qwen2-{'force' if force_mode else 'mentor'}",
|
188 |
+
"choices": [{
|
189 |
+
"index": 0,
|
190 |
+
"message": {
|
191 |
+
"role": "assistant",
|
192 |
+
"content": response_content
|
193 |
+
},
|
194 |
+
"finish_reason": "stop"
|
195 |
+
}],
|
196 |
+
"apollo_mode": "force" if force_mode else "mentor"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
197 |
}
|
198 |
|
199 |
except Exception as e:
|
200 |
+
print(f"❌ Chat error: {e}")
|
201 |
+
return JSONResponse(status_code=500, content={"error": str(e)})
|
|
|
|
|
|
|
202 |
|
203 |
if __name__ == "__main__":
|
204 |
import uvicorn
|
205 |
+
print("🚀 Starting Apollo AI Backend v5.0 - FIXED")
|
|
|
|
|
|
|
|
|
206 |
uvicorn.run(app, host="0.0.0.0", port=7860)
|