Ais
commited on
Update app/main.py
Browse files- app/main.py +184 -62
app/main.py
CHANGED
@@ -8,7 +8,7 @@ from starlette.middleware.cors import CORSMiddleware
|
|
8 |
import re
|
9 |
|
10 |
# === Setup FastAPI ===
|
11 |
-
app = FastAPI(title="Apollo AI Backend", version="
|
12 |
|
13 |
# === CORS ===
|
14 |
app.add_middleware(
|
@@ -42,15 +42,48 @@ model.eval()
|
|
42 |
|
43 |
print("✅ Model ready!")
|
44 |
|
45 |
-
def
|
46 |
"""
|
47 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
48 |
"""
|
49 |
if not full_response or len(full_response.strip()) < 5:
|
50 |
return "I apologize, but I couldn't generate a response. Please try again."
|
51 |
|
52 |
print(f"🔍 Raw response length: {len(full_response)}")
|
53 |
-
print(f"🔍
|
54 |
|
55 |
# Step 1: Remove the input prompt to get only generated content
|
56 |
generated_text = full_response
|
@@ -59,10 +92,10 @@ def extract_clean_answer(full_response: str, formatted_prompt: str, user_message
|
|
59 |
if len(parts) > 1:
|
60 |
generated_text = parts[-1]
|
61 |
|
62 |
-
# Step 2: Extract assistant content
|
63 |
assistant_content = generated_text
|
64 |
|
65 |
-
# Look for assistant tags
|
66 |
if "<|im_start|>assistant" in generated_text:
|
67 |
assistant_parts = generated_text.split("<|im_start|>assistant")
|
68 |
if len(assistant_parts) > 1:
|
@@ -71,53 +104,95 @@ def extract_clean_answer(full_response: str, formatted_prompt: str, user_message
|
|
71 |
if "<|im_end|>" in assistant_content:
|
72 |
assistant_content = assistant_content.split("<|im_end|>")[0]
|
73 |
|
74 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
75 |
clean_text = assistant_content.strip()
|
76 |
|
77 |
-
# Remove template tokens
|
78 |
clean_text = re.sub(r'<\|im_start\|>', '', clean_text)
|
79 |
clean_text = re.sub(r'<\|im_end\|>', '', clean_text)
|
|
|
80 |
|
81 |
-
# Remove role prefixes
|
82 |
clean_text = re.sub(r'^(system|user|assistant):\s*', '', clean_text, flags=re.MULTILINE)
|
|
|
83 |
|
84 |
-
#
|
|
|
|
|
|
|
85 |
|
86 |
-
#
|
|
|
87 |
clean_text = clean_text.strip()
|
88 |
|
89 |
-
#
|
90 |
if not clean_text or len(clean_text) < 10:
|
91 |
-
#
|
92 |
-
if user_message and any(
|
93 |
-
|
94 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
95 |
|
96 |
print(f"🧹 Final cleaned answer length: {len(clean_text)}")
|
97 |
-
print(f"🧹
|
|
|
98 |
return clean_text
|
99 |
|
100 |
-
def generate_response(messages: list, max_tokens: int = 400, temperature: float = 0.7) -> str:
|
101 |
"""
|
102 |
-
|
103 |
"""
|
104 |
try:
|
105 |
-
# Create clean conversation
|
106 |
clean_messages = []
|
107 |
|
108 |
-
# Add
|
|
|
109 |
clean_messages.append({
|
110 |
"role": "system",
|
111 |
-
"content":
|
112 |
})
|
113 |
|
114 |
-
# Add recent conversation context (last 2-3 messages)
|
115 |
recent_messages = messages[-3:] if len(messages) > 3 else messages
|
116 |
for msg in recent_messages:
|
117 |
if msg.get("role") in ["user", "assistant"]:
|
|
|
|
|
|
|
118 |
clean_messages.append(msg)
|
119 |
|
120 |
-
print(f"🔍 Processing {len(clean_messages)} messages")
|
121 |
|
122 |
# Build conversation using tokenizer's chat template
|
123 |
formatted_prompt = tokenizer.apply_chat_template(
|
@@ -129,22 +204,37 @@ def generate_response(messages: list, max_tokens: int = 400, temperature: float
|
|
129 |
# Tokenize with proper length limits
|
130 |
inputs = tokenizer(formatted_prompt, return_tensors="pt", truncation=True, max_length=1500)
|
131 |
|
132 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
133 |
with torch.no_grad():
|
134 |
-
outputs = model.generate(
|
135 |
-
inputs.input_ids,
|
136 |
-
attention_mask=inputs.attention_mask,
|
137 |
-
max_new_tokens=min(max_tokens, 500), # INCREASED from 150 to 500
|
138 |
-
temperature=max(0.3, min(temperature, 0.9)),
|
139 |
-
top_p=0.9,
|
140 |
-
do_sample=True,
|
141 |
-
pad_token_id=tokenizer.eos_token_id,
|
142 |
-
eos_token_id=tokenizer.eos_token_id,
|
143 |
-
repetition_penalty=1.05, # Reduced to allow natural repetition
|
144 |
-
length_penalty=1.0, # Neutral length penalty
|
145 |
-
early_stopping=False, # Don't stop early
|
146 |
-
no_repeat_ngram_size=2, # Reduced to allow more natural flow
|
147 |
-
)
|
148 |
|
149 |
# Decode the full response
|
150 |
full_response = tokenizer.decode(outputs[0], skip_special_tokens=False)
|
@@ -156,28 +246,33 @@ def generate_response(messages: list, max_tokens: int = 400, temperature: float
|
|
156 |
user_message = msg.get("content", "")
|
157 |
break
|
158 |
|
159 |
-
# Clean and extract the answer
|
160 |
-
clean_answer = extract_clean_answer(full_response, formatted_prompt, user_message)
|
161 |
|
162 |
return clean_answer
|
163 |
|
164 |
except Exception as e:
|
165 |
print(f"❌ Generation error: {e}")
|
166 |
-
|
|
|
167 |
|
168 |
# === Routes ===
|
169 |
@app.get("/")
|
170 |
def root():
|
171 |
return {
|
172 |
-
"message": "🤖 Apollo AI Backend
|
173 |
"model": "Qwen2-0.5B-Instruct with LoRA",
|
174 |
"status": "ready",
|
175 |
-
"
|
|
|
|
|
|
|
|
|
176 |
}
|
177 |
|
178 |
@app.get("/health")
|
179 |
def health():
|
180 |
-
return {"status": "healthy", "model_loaded": True}
|
181 |
|
182 |
@app.post("/v1/chat/completions")
|
183 |
async def chat_completions(request: Request):
|
@@ -200,9 +295,12 @@ async def chat_completions(request: Request):
|
|
200 |
try:
|
201 |
body = await request.json()
|
202 |
messages = body.get("messages", [])
|
203 |
-
max_tokens = body.get("max_tokens", 400)
|
204 |
temperature = body.get("temperature", 0.7)
|
205 |
|
|
|
|
|
|
|
206 |
if not messages or not isinstance(messages, list):
|
207 |
raise ValueError("Messages field is required and must be a list")
|
208 |
|
@@ -221,11 +319,14 @@ async def chat_completions(request: Request):
|
|
221 |
)
|
222 |
|
223 |
try:
|
224 |
-
# Generate response with
|
225 |
-
print(f"📥 Processing {len(messages)} messages
|
|
|
|
|
226 |
response_content = generate_response(
|
227 |
messages=messages,
|
228 |
-
|
|
|
229 |
temperature=max(0.1, min(temperature, 1.0))
|
230 |
)
|
231 |
|
@@ -234,7 +335,7 @@ async def chat_completions(request: Request):
|
|
234 |
"id": f"chatcmpl-apollo-{hash(str(messages)) % 10000}",
|
235 |
"object": "chat.completion",
|
236 |
"created": int(torch.tensor(0).item()),
|
237 |
-
"model": "qwen2-0.5b-
|
238 |
"choices": [
|
239 |
{
|
240 |
"index": 0,
|
@@ -249,7 +350,8 @@ async def chat_completions(request: Request):
|
|
249 |
"prompt_tokens": len(str(messages)),
|
250 |
"completion_tokens": len(response_content),
|
251 |
"total_tokens": len(str(messages)) + len(response_content)
|
252 |
-
}
|
|
|
253 |
}
|
254 |
|
255 |
except Exception as e:
|
@@ -259,27 +361,46 @@ async def chat_completions(request: Request):
|
|
259 |
content={"error": f"Internal server error: {str(e)}"}
|
260 |
)
|
261 |
|
262 |
-
# === Test endpoint for debugging ===
|
263 |
@app.post("/test")
|
264 |
async def test_generation(request: Request):
|
265 |
-
"""Test endpoint for debugging
|
266 |
try:
|
267 |
body = await request.json()
|
268 |
-
prompt = body.get("prompt", "
|
269 |
max_tokens = body.get("max_tokens", 300)
|
|
|
|
|
|
|
270 |
|
271 |
-
|
272 |
-
|
273 |
{"role": "user", "content": prompt}
|
274 |
]
|
|
|
|
|
|
|
|
|
|
|
|
|
275 |
|
276 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
277 |
|
278 |
return {
|
279 |
"prompt": prompt,
|
280 |
-
"
|
281 |
-
"
|
282 |
-
"
|
283 |
}
|
284 |
|
285 |
except Exception as e:
|
@@ -290,6 +411,7 @@ async def test_generation(request: Request):
|
|
290 |
|
291 |
if __name__ == "__main__":
|
292 |
import uvicorn
|
293 |
-
print("🚀 Starting Apollo AI Backend
|
294 |
-
print("📊
|
|
|
295 |
uvicorn.run(app, host="0.0.0.0", port=7860)
|
|
|
8 |
import re
|
9 |
|
10 |
# === Setup FastAPI ===
|
11 |
+
app = FastAPI(title="Apollo AI Backend", version="2.0.0")
|
12 |
|
13 |
# === CORS ===
|
14 |
app.add_middleware(
|
|
|
42 |
|
43 |
print("✅ Model ready!")
|
44 |
|
45 |
+
def get_system_prompt(is_force_mode: bool) -> str:
|
46 |
"""
|
47 |
+
Returns mode-specific system prompts for proper AI behavior.
|
48 |
+
"""
|
49 |
+
if is_force_mode:
|
50 |
+
return """You are Apollo AI in DIRECT ANSWER mode. Provide:
|
51 |
+
- Clear, concise, direct answers
|
52 |
+
- Complete working code when requested
|
53 |
+
- Brief explanations (2-3 sentences max)
|
54 |
+
- Immediate solutions without teaching moments
|
55 |
+
- No lengthy tutorials or step-by-step guides
|
56 |
+
- Get straight to the point
|
57 |
+
|
58 |
+
Example:
|
59 |
+
User: "How do I print hello world in Python?"
|
60 |
+
You: "Use `print("Hello World")`. This function outputs text to the console."
|
61 |
+
"""
|
62 |
+
else:
|
63 |
+
return """You are Apollo AI in MENTOR mode. Your role is to guide learning:
|
64 |
+
- Ask leading questions instead of giving direct answers
|
65 |
+
- Provide hints and concepts, never complete solutions
|
66 |
+
- Encourage thinking: "What do you think would happen if...?"
|
67 |
+
- Give partial code with blanks: "Try filling in the _____ part"
|
68 |
+
- Guide discovery: "Have you considered looking into...?"
|
69 |
+
- Make students work for understanding
|
70 |
+
- Never give full working code - always leave something for them to figure out
|
71 |
+
|
72 |
+
Example:
|
73 |
+
User: "How do I print hello world in Python?"
|
74 |
+
You: "Great question! What function do you think might be used to display text on screen? Think about what action you want to perform. Try looking up Python's built-in functions for output."
|
75 |
+
"""
|
76 |
+
|
77 |
+
def extract_clean_answer(full_response: str, formatted_prompt: str, user_message: str, is_force_mode: bool) -> str:
|
78 |
+
"""
|
79 |
+
SINGLE POWERFUL CLEANING FUNCTION - The only place where response cleaning happens.
|
80 |
+
All frontend cleaning is removed, this is the source of truth.
|
81 |
"""
|
82 |
if not full_response or len(full_response.strip()) < 5:
|
83 |
return "I apologize, but I couldn't generate a response. Please try again."
|
84 |
|
85 |
print(f"🔍 Raw response length: {len(full_response)}")
|
86 |
+
print(f"🔍 Mode: {'FORCE' if is_force_mode else 'MENTOR'}")
|
87 |
|
88 |
# Step 1: Remove the input prompt to get only generated content
|
89 |
generated_text = full_response
|
|
|
92 |
if len(parts) > 1:
|
93 |
generated_text = parts[-1]
|
94 |
|
95 |
+
# Step 2: Extract assistant content using multiple strategies
|
96 |
assistant_content = generated_text
|
97 |
|
98 |
+
# Strategy A: Look for assistant tags
|
99 |
if "<|im_start|>assistant" in generated_text:
|
100 |
assistant_parts = generated_text.split("<|im_start|>assistant")
|
101 |
if len(assistant_parts) > 1:
|
|
|
104 |
if "<|im_end|>" in assistant_content:
|
105 |
assistant_content = assistant_content.split("<|im_end|>")[0]
|
106 |
|
107 |
+
# Strategy B: Look for role-based prefixes
|
108 |
+
elif "assistant:" in generated_text.lower():
|
109 |
+
parts = generated_text.lower().split("assistant:")
|
110 |
+
if len(parts) > 1:
|
111 |
+
# Get the content after the last "assistant:" occurrence
|
112 |
+
assistant_content = generated_text[generated_text.lower().rfind("assistant:") + 10:]
|
113 |
+
|
114 |
+
# Step 3: POWERFUL CLEANING - Remove all template artifacts
|
115 |
clean_text = assistant_content.strip()
|
116 |
|
117 |
+
# Remove all chat template tokens
|
118 |
clean_text = re.sub(r'<\|im_start\|>', '', clean_text)
|
119 |
clean_text = re.sub(r'<\|im_end\|>', '', clean_text)
|
120 |
+
clean_text = re.sub(r'<\|endoftext\|>', '', clean_text)
|
121 |
|
122 |
+
# Remove role prefixes from anywhere in text
|
123 |
clean_text = re.sub(r'^(system|user|assistant):\s*', '', clean_text, flags=re.MULTILINE)
|
124 |
+
clean_text = re.sub(r'\n(system|user|assistant):\s*', '\n', clean_text, flags=re.MULTILINE)
|
125 |
|
126 |
+
# Remove common system prompt artifacts
|
127 |
+
clean_text = re.sub(r'You are Apollo AI.*?mode[^\n]*\n?', '', clean_text, flags=re.IGNORECASE)
|
128 |
+
clean_text = re.sub(r'Guidelines?:.*?\n', '', clean_text, flags=re.IGNORECASE)
|
129 |
+
clean_text = re.sub(r'Example:.*?\n', '', clean_text, flags=re.IGNORECASE)
|
130 |
|
131 |
+
# Clean up excessive whitespace but preserve formatting
|
132 |
+
clean_text = re.sub(r'\n{4,}', '\n\n\n', clean_text)
|
133 |
clean_text = clean_text.strip()
|
134 |
|
135 |
+
# Step 4: Handle edge cases and fallbacks
|
136 |
if not clean_text or len(clean_text) < 10:
|
137 |
+
# Special handling for simple math questions
|
138 |
+
if user_message and any(term in user_message.lower() for term in ['2+2', '2 + 2', 'calculate', 'what is']):
|
139 |
+
if '2+2' in user_message.lower() or '2 + 2' in user_message.lower():
|
140 |
+
return "4" if is_force_mode else "What do you think 2 + 2 equals? Try calculating it step by step."
|
141 |
+
|
142 |
+
# Generic fallback based on mode
|
143 |
+
if is_force_mode:
|
144 |
+
return "I understand your question. Could you please be more specific about what you need?"
|
145 |
+
else:
|
146 |
+
return "That's an interesting question! What approach do you think we should take to solve this? What's your initial thought?"
|
147 |
+
|
148 |
+
# Step 5: Mode-specific post-processing
|
149 |
+
if is_force_mode:
|
150 |
+
# For force mode, ensure response is concise
|
151 |
+
if len(clean_text) > 800: # If too long, truncate but keep it coherent
|
152 |
+
sentences = clean_text.split('. ')
|
153 |
+
if len(sentences) > 3:
|
154 |
+
clean_text = '. '.join(sentences[:3]) + '.'
|
155 |
+
else:
|
156 |
+
# For mentor mode, ensure it's not giving away complete solutions
|
157 |
+
# Check if response contains complete code without guidance
|
158 |
+
code_block_pattern = r'```[\w]*\n(.*?)\n```'
|
159 |
+
code_blocks = re.findall(code_block_pattern, clean_text, re.DOTALL)
|
160 |
+
|
161 |
+
for code in code_blocks:
|
162 |
+
# If code looks complete and there's no guidance, add mentor touch
|
163 |
+
if len(code.strip()) > 50 and 'try' not in clean_text.lower() and '?' not in clean_text:
|
164 |
+
clean_text += "\n\nTry implementing this step by step. What do you think each part does?"
|
165 |
|
166 |
print(f"🧹 Final cleaned answer length: {len(clean_text)}")
|
167 |
+
print(f"🧹 Preview: {clean_text[:150]}...")
|
168 |
+
|
169 |
return clean_text
|
170 |
|
171 |
+
def generate_response(messages: list, is_force_mode: bool = False, max_tokens: int = 400, temperature: float = 0.7) -> str:
|
172 |
"""
|
173 |
+
Generate response with mode-specific system prompts and proper settings.
|
174 |
"""
|
175 |
try:
|
176 |
+
# Create clean conversation with mode-specific system prompt
|
177 |
clean_messages = []
|
178 |
|
179 |
+
# Add mode-specific system message
|
180 |
+
system_prompt = get_system_prompt(is_force_mode)
|
181 |
clean_messages.append({
|
182 |
"role": "system",
|
183 |
+
"content": system_prompt
|
184 |
})
|
185 |
|
186 |
+
# Add recent conversation context (last 2-3 messages, but filter appropriately)
|
187 |
recent_messages = messages[-3:] if len(messages) > 3 else messages
|
188 |
for msg in recent_messages:
|
189 |
if msg.get("role") in ["user", "assistant"]:
|
190 |
+
# Skip system messages from frontend to avoid conflicts
|
191 |
+
if msg.get("role") == "system":
|
192 |
+
continue
|
193 |
clean_messages.append(msg)
|
194 |
|
195 |
+
print(f"🔍 Processing {len(clean_messages)} messages in {'FORCE' if is_force_mode else 'MENTOR'} mode")
|
196 |
|
197 |
# Build conversation using tokenizer's chat template
|
198 |
formatted_prompt = tokenizer.apply_chat_template(
|
|
|
204 |
# Tokenize with proper length limits
|
205 |
inputs = tokenizer(formatted_prompt, return_tensors="pt", truncation=True, max_length=1500)
|
206 |
|
207 |
+
# Mode-specific generation settings
|
208 |
+
generation_params = {
|
209 |
+
"input_ids": inputs.input_ids,
|
210 |
+
"attention_mask": inputs.attention_mask,
|
211 |
+
"pad_token_id": tokenizer.eos_token_id,
|
212 |
+
"eos_token_id": tokenizer.eos_token_id,
|
213 |
+
"do_sample": True,
|
214 |
+
}
|
215 |
+
|
216 |
+
if is_force_mode:
|
217 |
+
# Force mode: Direct, concise answers
|
218 |
+
generation_params.update({
|
219 |
+
"max_new_tokens": min(max_tokens, 300), # Shorter responses
|
220 |
+
"temperature": 0.3, # More focused
|
221 |
+
"top_p": 0.8,
|
222 |
+
"repetition_penalty": 1.1,
|
223 |
+
"length_penalty": 0.8, # Encourage shorter responses
|
224 |
+
})
|
225 |
+
else:
|
226 |
+
# Mentor mode: More thoughtful, questioning responses
|
227 |
+
generation_params.update({
|
228 |
+
"max_new_tokens": min(max_tokens, 500), # Allow longer explanations
|
229 |
+
"temperature": 0.7, # More creative for questions
|
230 |
+
"top_p": 0.9,
|
231 |
+
"repetition_penalty": 1.05,
|
232 |
+
"length_penalty": 1.0, # Neutral length
|
233 |
+
})
|
234 |
+
|
235 |
+
# Generate response
|
236 |
with torch.no_grad():
|
237 |
+
outputs = model.generate(**generation_params)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
238 |
|
239 |
# Decode the full response
|
240 |
full_response = tokenizer.decode(outputs[0], skip_special_tokens=False)
|
|
|
246 |
user_message = msg.get("content", "")
|
247 |
break
|
248 |
|
249 |
+
# Clean and extract the answer using our SINGLE POWERFUL cleaning function
|
250 |
+
clean_answer = extract_clean_answer(full_response, formatted_prompt, user_message, is_force_mode)
|
251 |
|
252 |
return clean_answer
|
253 |
|
254 |
except Exception as e:
|
255 |
print(f"❌ Generation error: {e}")
|
256 |
+
mode_text = "direct answer" if is_force_mode else "guided learning approach"
|
257 |
+
return f"I encountered an error while generating a {mode_text}. Please try rephrasing your question."
|
258 |
|
259 |
# === Routes ===
|
260 |
@app.get("/")
|
261 |
def root():
|
262 |
return {
|
263 |
+
"message": "🤖 Apollo AI Backend v2.0 - Mode-Specific AI",
|
264 |
"model": "Qwen2-0.5B-Instruct with LoRA",
|
265 |
"status": "ready",
|
266 |
+
"features": ["mentor_mode", "force_mode", "single_powerful_cleaning"],
|
267 |
+
"modes": {
|
268 |
+
"mentor": "Guides learning with questions and hints",
|
269 |
+
"force": "Provides direct answers and solutions"
|
270 |
+
}
|
271 |
}
|
272 |
|
273 |
@app.get("/health")
|
274 |
def health():
|
275 |
+
return {"status": "healthy", "model_loaded": True, "cleaning": "single_backend_only"}
|
276 |
|
277 |
@app.post("/v1/chat/completions")
|
278 |
async def chat_completions(request: Request):
|
|
|
295 |
try:
|
296 |
body = await request.json()
|
297 |
messages = body.get("messages", [])
|
298 |
+
max_tokens = body.get("max_tokens", 400)
|
299 |
temperature = body.get("temperature", 0.7)
|
300 |
|
301 |
+
# NEW: Get mode information from request
|
302 |
+
is_force_mode = body.get("force_mode", False) # Default to mentor mode
|
303 |
+
|
304 |
if not messages or not isinstance(messages, list):
|
305 |
raise ValueError("Messages field is required and must be a list")
|
306 |
|
|
|
319 |
)
|
320 |
|
321 |
try:
|
322 |
+
# Generate response with mode-specific behavior
|
323 |
+
print(f"📥 Processing {len(messages)} messages in {'FORCE' if is_force_mode else 'MENTOR'} mode")
|
324 |
+
print(f"📊 Settings: max_tokens={max_tokens}, temperature={temperature}")
|
325 |
+
|
326 |
response_content = generate_response(
|
327 |
messages=messages,
|
328 |
+
is_force_mode=is_force_mode,
|
329 |
+
max_tokens=min(max_tokens, 600),
|
330 |
temperature=max(0.1, min(temperature, 1.0))
|
331 |
)
|
332 |
|
|
|
335 |
"id": f"chatcmpl-apollo-{hash(str(messages)) % 10000}",
|
336 |
"object": "chat.completion",
|
337 |
"created": int(torch.tensor(0).item()),
|
338 |
+
"model": f"qwen2-0.5b-{'force' if is_force_mode else 'mentor'}-mode",
|
339 |
"choices": [
|
340 |
{
|
341 |
"index": 0,
|
|
|
350 |
"prompt_tokens": len(str(messages)),
|
351 |
"completion_tokens": len(response_content),
|
352 |
"total_tokens": len(str(messages)) + len(response_content)
|
353 |
+
},
|
354 |
+
"apollo_mode": "force" if is_force_mode else "mentor"
|
355 |
}
|
356 |
|
357 |
except Exception as e:
|
|
|
361 |
content={"error": f"Internal server error: {str(e)}"}
|
362 |
)
|
363 |
|
364 |
+
# === Test endpoint for debugging modes ===
|
365 |
@app.post("/test")
|
366 |
async def test_generation(request: Request):
|
367 |
+
"""Test endpoint for debugging both modes"""
|
368 |
try:
|
369 |
body = await request.json()
|
370 |
+
prompt = body.get("prompt", "How do I print hello world in Python?")
|
371 |
max_tokens = body.get("max_tokens", 300)
|
372 |
+
test_both_modes = body.get("test_both_modes", True)
|
373 |
+
|
374 |
+
results = {}
|
375 |
|
376 |
+
# Test mentor mode
|
377 |
+
messages_mentor = [
|
378 |
{"role": "user", "content": prompt}
|
379 |
]
|
380 |
+
mentor_response = generate_response(messages_mentor, is_force_mode=False, max_tokens=max_tokens, temperature=0.7)
|
381 |
+
results["mentor_mode"] = {
|
382 |
+
"response": mentor_response,
|
383 |
+
"length": len(mentor_response),
|
384 |
+
"mode": "mentor"
|
385 |
+
}
|
386 |
|
387 |
+
if test_both_modes:
|
388 |
+
# Test force mode
|
389 |
+
messages_force = [
|
390 |
+
{"role": "user", "content": prompt}
|
391 |
+
]
|
392 |
+
force_response = generate_response(messages_force, is_force_mode=True, max_tokens=max_tokens, temperature=0.3)
|
393 |
+
results["force_mode"] = {
|
394 |
+
"response": force_response,
|
395 |
+
"length": len(force_response),
|
396 |
+
"mode": "force"
|
397 |
+
}
|
398 |
|
399 |
return {
|
400 |
"prompt": prompt,
|
401 |
+
"results": results,
|
402 |
+
"status": "success",
|
403 |
+
"cleaning": "single_backend_only"
|
404 |
}
|
405 |
|
406 |
except Exception as e:
|
|
|
411 |
|
412 |
if __name__ == "__main__":
|
413 |
import uvicorn
|
414 |
+
print("🚀 Starting Apollo AI Backend v2.0...")
|
415 |
+
print("📊 Features: Mode-specific prompts, Single powerful cleaning")
|
416 |
+
print("🎯 Modes: Mentor (guides learning) vs Force (direct answers)")
|
417 |
uvicorn.run(app, host="0.0.0.0", port=7860)
|