Spaces:
Running
on
Zero
Running
on
Zero
fix: prevent self-talking issue by using tokenizer chat_template formatting
Browse files- Updated prompt construction to use `tokenizer.apply_chat_template()` for chat-tuned models
- Ensured backward compatibility with non-chat models by falling back to manual prompt formatting
- Resolves issue where models would echo both user and assistant messages due to incorrect flat-text prompts
app.py
CHANGED
@@ -95,22 +95,21 @@ def retrieve_context(query, max_results=6, max_chars=600):
|
|
95 |
except Exception:
|
96 |
return []
|
97 |
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
prompt
|
113 |
-
return prompt
|
114 |
|
115 |
@spaces.GPU(duration=60)
|
116 |
def chat_response(user_msg, chat_history, system_prompt,
|
@@ -166,9 +165,8 @@ def chat_response(user_msg, chat_history, system_prompt,
|
|
166 |
else:
|
167 |
enriched = system_prompt
|
168 |
|
169 |
-
prompt = format_conversation(history, enriched)
|
170 |
-
|
171 |
pipe = load_pipeline(model_name)
|
|
|
172 |
streamer = TextIteratorStreamer(pipe.tokenizer,
|
173 |
skip_prompt=True,
|
174 |
skip_special_tokens=True)
|
|
|
95 |
except Exception:
|
96 |
return []
|
97 |
|
98 |
+
def format_conversation(history, system_prompt, tokenizer):
|
99 |
+
if hasattr(tokenizer, "chat_template") and tokenizer.chat_template:
|
100 |
+
messages = [{"role": "system", "content": system_prompt.strip()}] + history
|
101 |
+
return tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
|
102 |
+
else:
|
103 |
+
# Fallback for base LMs without chat template
|
104 |
+
prompt = system_prompt.strip() + "\n"
|
105 |
+
for msg in history:
|
106 |
+
if msg['role'] == 'user':
|
107 |
+
prompt += "User: " + msg['content'].strip() + "\n"
|
108 |
+
elif msg['role'] == 'assistant':
|
109 |
+
prompt += "Assistant: " + msg['content'].strip() + "\n"
|
110 |
+
if not prompt.strip().endswith("Assistant:"):
|
111 |
+
prompt += "Assistant: "
|
112 |
+
return prompt
|
|
|
113 |
|
114 |
@spaces.GPU(duration=60)
|
115 |
def chat_response(user_msg, chat_history, system_prompt,
|
|
|
165 |
else:
|
166 |
enriched = system_prompt
|
167 |
|
|
|
|
|
168 |
pipe = load_pipeline(model_name)
|
169 |
+
prompt = format_conversation(history, enriched, pipe.tokenizer)
|
170 |
streamer = TextIteratorStreamer(pipe.tokenizer,
|
171 |
skip_prompt=True,
|
172 |
skip_special_tokens=True)
|