Spaces:
Running
on
Zero
Running
on
Zero
apply history flatten before it goint to prompt
Browse files
app.py
CHANGED
@@ -109,6 +109,30 @@ def retrieve_context(query, max_results=6, max_chars_per_result=600):
|
|
109 |
except Exception:
|
110 |
return ""
|
111 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
112 |
# ------------------------------
|
113 |
# Chat Response Generation with ZeroGPU using Pipeline
|
114 |
# ------------------------------
|
@@ -120,7 +144,8 @@ def chat_response(user_message, chat_history, system_prompt, enable_search,
|
|
120 |
|
121 |
- Appends the user's message to the conversation history.
|
122 |
- Optionally retrieves web search context and inserts it as an additional system message.
|
123 |
-
-
|
|
|
124 |
- Returns the updated conversation history and a debug message.
|
125 |
"""
|
126 |
cancel_event.clear()
|
@@ -131,7 +156,6 @@ def chat_response(user_message, chat_history, system_prompt, enable_search,
|
|
131 |
|
132 |
# Retrieve web search context if enabled.
|
133 |
debug_message = ""
|
134 |
-
retrieved_context = ""
|
135 |
if enable_search:
|
136 |
debug_message = "Initiating web search..."
|
137 |
yield conversation, debug_message
|
@@ -155,23 +179,26 @@ def chat_response(user_message, chat_history, system_prompt, enable_search,
|
|
155 |
conversation.append({"role": "assistant", "content": ""})
|
156 |
|
157 |
try:
|
|
|
|
|
|
|
158 |
# Load the pipeline (cached) for the selected model.
|
159 |
pipe = load_pipeline(model_name)
|
160 |
|
161 |
-
#
|
162 |
response = pipe(
|
163 |
-
|
164 |
max_new_tokens=max_tokens,
|
165 |
temperature=temperature,
|
166 |
top_k=top_k,
|
167 |
top_p=top_p,
|
168 |
repetition_penalty=repeat_penalty,
|
169 |
)
|
170 |
-
|
171 |
-
|
172 |
-
|
173 |
-
|
174 |
-
|
175 |
|
176 |
# Update the conversation history.
|
177 |
conversation[-1]["content"] = assistant_text
|
|
|
109 |
except Exception:
|
110 |
return ""
|
111 |
|
112 |
+
# ----------------------------------------------------------------------------
|
113 |
+
# NEW HELPER FUNCTION: Format Conversation History into a Clean Prompt
|
114 |
+
# ----------------------------------------------------------------------------
|
115 |
+
def format_conversation(conversation, system_prompt):
|
116 |
+
"""
|
117 |
+
Converts a list of conversation messages (each a dict with 'role' and 'content')
|
118 |
+
and a system prompt into a single plain text string.
|
119 |
+
This prevents raw role labels from being passed to the model.
|
120 |
+
"""
|
121 |
+
# Start with the system prompt.
|
122 |
+
prompt = system_prompt.strip() + "\n"
|
123 |
+
# Loop through conversation and format user and assistant messages.
|
124 |
+
for msg in conversation:
|
125 |
+
if msg["role"] == "user":
|
126 |
+
prompt += "User: " + msg["content"].strip() + "\n"
|
127 |
+
elif msg["role"] == "assistant":
|
128 |
+
prompt += "Assistant: " + msg["content"].strip() + "\n"
|
129 |
+
elif msg["role"] == "system":
|
130 |
+
prompt += msg["content"].strip() + "\n"
|
131 |
+
# Append the assistant cue to indicate the start of the reply.
|
132 |
+
if not prompt.strip().endswith("Assistant:"):
|
133 |
+
prompt += "Assistant: "
|
134 |
+
return prompt
|
135 |
+
|
136 |
# ------------------------------
|
137 |
# Chat Response Generation with ZeroGPU using Pipeline
|
138 |
# ------------------------------
|
|
|
144 |
|
145 |
- Appends the user's message to the conversation history.
|
146 |
- Optionally retrieves web search context and inserts it as an additional system message.
|
147 |
+
- Converts the conversation into a formatted prompt to avoid leaking role labels.
|
148 |
+
- Uses the cached pipeline to generate a response.
|
149 |
- Returns the updated conversation history and a debug message.
|
150 |
"""
|
151 |
cancel_event.clear()
|
|
|
156 |
|
157 |
# Retrieve web search context if enabled.
|
158 |
debug_message = ""
|
|
|
159 |
if enable_search:
|
160 |
debug_message = "Initiating web search..."
|
161 |
yield conversation, debug_message
|
|
|
179 |
conversation.append({"role": "assistant", "content": ""})
|
180 |
|
181 |
try:
|
182 |
+
# Format the entire conversation into a single prompt (this fixes both issues).
|
183 |
+
prompt_text = format_conversation(conversation, system_prompt)
|
184 |
+
|
185 |
# Load the pipeline (cached) for the selected model.
|
186 |
pipe = load_pipeline(model_name)
|
187 |
|
188 |
+
# Generate a response using the formatted prompt.
|
189 |
response = pipe(
|
190 |
+
prompt_text,
|
191 |
max_new_tokens=max_tokens,
|
192 |
temperature=temperature,
|
193 |
top_k=top_k,
|
194 |
top_p=top_p,
|
195 |
repetition_penalty=repeat_penalty,
|
196 |
)
|
197 |
+
|
198 |
+
# Extract the generated text.
|
199 |
+
generated = response[0]["generated_text"]
|
200 |
+
# Remove the prompt portion so we only keep the new assistant reply.
|
201 |
+
assistant_text = generated[len(prompt_text):].strip()
|
202 |
|
203 |
# Update the conversation history.
|
204 |
conversation[-1]["content"] = assistant_text
|