Ais commited on
Commit
18f4dad
·
verified ·
1 Parent(s): 988fa7f

Update app/main.py

Browse files
Files changed (1) hide show
  1. app/main.py +26 -11
app/main.py CHANGED
@@ -19,7 +19,7 @@ app.add_middleware(
19
  )
20
 
21
  # === Load API Key from Hugging Face Secrets ===
22
- API_KEY = os.getenv("API_KEY", "undefined") # Add API_KEY in your HF Space Secrets
23
 
24
  # === Model Settings ===
25
  BASE_MODEL = "Qwen/Qwen2-0.5B-Instruct"
@@ -65,17 +65,19 @@ async def chat(request: Request):
65
  if not messages or not isinstance(messages, list):
66
  raise ValueError("Invalid or missing 'messages' field.")
67
 
68
- # ✅ FIXED: Process full conversation history, not just last message
69
  temperature = body.get("temperature", 0.7)
70
  max_tokens = body.get("max_tokens", 512)
71
 
72
  except Exception as e:
73
  return JSONResponse(status_code=400, content={"error": f"Bad request: {str(e)}"})
74
 
75
- # ✅ FIXED: Build full conversation prompt with history
 
 
 
76
  formatted_prompt = ""
77
 
78
- for message in messages:
79
  role = message.get("role", "")
80
  content = message.get("content", "")
81
 
@@ -89,9 +91,8 @@ async def chat(request: Request):
89
  # Add the assistant start token for generation
90
  formatted_prompt += "<|im_start|>assistant\n"
91
 
92
- print(f"🤖 Processing conversation with {len(messages)} messages")
93
- print(f"📝 Full prompt preview: {formatted_prompt[:200]}...")
94
-
95
  inputs = tokenizer(formatted_prompt, return_tensors="pt").to("cpu")
96
 
97
  # ✅ Generate Response
@@ -102,19 +103,33 @@ async def chat(request: Request):
102
  temperature=temperature,
103
  top_p=0.9,
104
  do_sample=True,
105
- pad_token_id=tokenizer.eos_token_id
 
106
  )
107
 
108
  decoded = tokenizer.decode(outputs[0], skip_special_tokens=True)
109
 
110
- # ✅ FIXED: Clean extraction of only the new assistant response
111
  final_answer = decoded.split("<|im_start|>assistant\n")[-1].strip()
112
 
113
- # Remove any potential end tokens or artifacts
114
  if "<|im_end|>" in final_answer:
115
  final_answer = final_answer.split("<|im_end|>")[0].strip()
116
 
117
- print(f"✅ Generated response: {final_answer[:100]}...")
 
 
 
 
 
 
 
 
 
 
 
 
 
118
 
119
  # ✅ OpenAI-style Response
120
  return {
 
19
  )
20
 
21
  # === Load API Key from Hugging Face Secrets ===
22
+ API_KEY = os.getenv("API_KEY", "undefined")
23
 
24
  # === Model Settings ===
25
  BASE_MODEL = "Qwen/Qwen2-0.5B-Instruct"
 
65
  if not messages or not isinstance(messages, list):
66
  raise ValueError("Invalid or missing 'messages' field.")
67
 
 
68
  temperature = body.get("temperature", 0.7)
69
  max_tokens = body.get("max_tokens", 512)
70
 
71
  except Exception as e:
72
  return JSONResponse(status_code=400, content={"error": f"Bad request: {str(e)}"})
73
 
74
+ # ✅ FIXED: Only use last 4 messages to prevent stacking
75
+ recent_messages = messages[-4:] if len(messages) > 4 else messages
76
+
77
+ # ✅ Build clean conversation prompt
78
  formatted_prompt = ""
79
 
80
+ for message in recent_messages:
81
  role = message.get("role", "")
82
  content = message.get("content", "")
83
 
 
91
  # Add the assistant start token for generation
92
  formatted_prompt += "<|im_start|>assistant\n"
93
 
94
+ print(f"🤖 Processing {len(recent_messages)} recent messages")
95
+
 
96
  inputs = tokenizer(formatted_prompt, return_tensors="pt").to("cpu")
97
 
98
  # ✅ Generate Response
 
103
  temperature=temperature,
104
  top_p=0.9,
105
  do_sample=True,
106
+ pad_token_id=tokenizer.eos_token_id,
107
+ eos_token_id=tokenizer.eos_token_id
108
  )
109
 
110
  decoded = tokenizer.decode(outputs[0], skip_special_tokens=True)
111
 
112
+ # ✅ FIXED: Extract ONLY the new assistant response
113
  final_answer = decoded.split("<|im_start|>assistant\n")[-1].strip()
114
 
115
+ # Remove any end tokens or artifacts
116
  if "<|im_end|>" in final_answer:
117
  final_answer = final_answer.split("<|im_end|>")[0].strip()
118
 
119
+ # Remove any repeated system prompts or guidelines that leaked through
120
+ if "Guidelines:" in final_answer:
121
+ final_answer = final_answer.split("Guidelines:")[0].strip()
122
+
123
+ if "Response format:" in final_answer:
124
+ final_answer = final_answer.split("Response format:")[0].strip()
125
+
126
+ # Remove VS Code context if it leaked through
127
+ if "[VS Code Context:" in final_answer:
128
+ lines = final_answer.split('\n')
129
+ cleaned_lines = [line for line in lines if not line.strip().startswith('[VS Code Context:')]
130
+ final_answer = '\n'.join(cleaned_lines).strip()
131
+
132
+ print(f"✅ Clean response: {final_answer[:100]}...")
133
 
134
  # ✅ OpenAI-style Response
135
  return {