Spaces:
Running
on
Zero
Running
on
Zero
fix role disorder error in history
Browse files
app.py
CHANGED
@@ -216,14 +216,16 @@ if user_input:
|
|
216 |
if st.session_state.pending_response:
|
217 |
st.warning("Please wait for the assistant to finish responding.")
|
218 |
else:
|
219 |
-
#
|
220 |
-
st.session_state.chat_history.append({"role": "user", "content": user_input})
|
221 |
with st.chat_message("user"):
|
222 |
st.markdown(user_input)
|
223 |
-
|
|
|
|
|
|
|
224 |
st.session_state.pending_response = True
|
225 |
|
226 |
-
#
|
227 |
if enable_search:
|
228 |
retrieved_context = retrieve_context(user_input, max_results=2, max_chars_per_result=150)
|
229 |
else:
|
@@ -231,20 +233,26 @@ if user_input:
|
|
231 |
st.sidebar.markdown("### Retrieved Context" if enable_search else "Web Search Disabled")
|
232 |
st.sidebar.text(retrieved_context or "No context found.")
|
233 |
|
234 |
-
# Build an augmented
|
235 |
-
if retrieved_context:
|
236 |
-
|
237 |
-
"
|
238 |
-
f"
|
|
|
|
|
239 |
)
|
240 |
else:
|
241 |
-
|
242 |
-
|
243 |
-
|
244 |
-
# Limit conversation history to the last 2 turns
|
245 |
MAX_TURNS = 2
|
246 |
trimmed_history = st.session_state.chat_history[-(MAX_TURNS * 2):]
|
247 |
-
|
|
|
|
|
|
|
|
|
|
|
248 |
|
249 |
# Generate response with the LLM in a streaming fashion
|
250 |
with st.chat_message("assistant"):
|
@@ -259,7 +267,6 @@ if user_input:
|
|
259 |
repeat_penalty=repeat_penalty,
|
260 |
stream=True,
|
261 |
)
|
262 |
-
|
263 |
for chunk in stream:
|
264 |
if "choices" in chunk:
|
265 |
delta = chunk["choices"][0]["delta"].get("content", "")
|
@@ -268,7 +275,8 @@ if user_input:
|
|
268 |
visible_response = re.sub(r"<think>.*?</think>", "", full_response, flags=re.DOTALL)
|
269 |
visible_response = re.sub(r"<think>.*$", "", visible_response, flags=re.DOTALL)
|
270 |
visible_placeholder.markdown(visible_response)
|
271 |
-
|
|
|
272 |
st.session_state.chat_history.append({"role": "assistant", "content": full_response})
|
273 |
st.session_state.pending_response = False
|
274 |
-
gc.collect() #
|
|
|
216 |
if st.session_state.pending_response:
|
217 |
st.warning("Please wait for the assistant to finish responding.")
|
218 |
else:
|
219 |
+
# Display the raw user input immediately in the chat view.
|
|
|
220 |
with st.chat_message("user"):
|
221 |
st.markdown(user_input)
|
222 |
+
|
223 |
+
# Append the plain user message to chat history for display purposes.
|
224 |
+
# (We will later override the last user message in the API call with the augmented version.)
|
225 |
+
st.session_state.chat_history.append({"role": "user", "content": user_input})
|
226 |
st.session_state.pending_response = True
|
227 |
|
228 |
+
# Retrieve extra context from web search if enabled
|
229 |
if enable_search:
|
230 |
retrieved_context = retrieve_context(user_input, max_results=2, max_chars_per_result=150)
|
231 |
else:
|
|
|
233 |
st.sidebar.markdown("### Retrieved Context" if enable_search else "Web Search Disabled")
|
234 |
st.sidebar.text(retrieved_context or "No context found.")
|
235 |
|
236 |
+
# Build an augmented user query by merging the system prompt (and search context when available)
|
237 |
+
if enable_search and retrieved_context:
|
238 |
+
augmented_user_input = (
|
239 |
+
f"{system_prompt_base.strip()}\n\n"
|
240 |
+
f"Use the following recent web search context to help answer the query:\n\n"
|
241 |
+
f"{retrieved_context}\n\n"
|
242 |
+
f"User Query: {user_input}"
|
243 |
)
|
244 |
else:
|
245 |
+
augmented_user_input = f"{system_prompt_base.strip()}\n\nUser Query: {user_input}"
|
246 |
+
|
247 |
+
# Limit conversation history to the last MAX_TURNS turns (user/assistant pairs)
|
|
|
248 |
MAX_TURNS = 2
|
249 |
trimmed_history = st.session_state.chat_history[-(MAX_TURNS * 2):]
|
250 |
+
|
251 |
+
# Replace the last user message (which is plain) with the augmented version for model input.
|
252 |
+
if trimmed_history and trimmed_history[-1]["role"] == "user":
|
253 |
+
messages = trimmed_history[:-1] + [{"role": "user", "content": augmented_user_input}]
|
254 |
+
else:
|
255 |
+
messages = trimmed_history + [{"role": "user", "content": augmented_user_input}]
|
256 |
|
257 |
# Generate response with the LLM in a streaming fashion
|
258 |
with st.chat_message("assistant"):
|
|
|
267 |
repeat_penalty=repeat_penalty,
|
268 |
stream=True,
|
269 |
)
|
|
|
270 |
for chunk in stream:
|
271 |
if "choices" in chunk:
|
272 |
delta = chunk["choices"][0]["delta"].get("content", "")
|
|
|
275 |
visible_response = re.sub(r"<think>.*?</think>", "", full_response, flags=re.DOTALL)
|
276 |
visible_response = re.sub(r"<think>.*$", "", visible_response, flags=re.DOTALL)
|
277 |
visible_placeholder.markdown(visible_response)
|
278 |
+
|
279 |
+
# Append the assistant's response to conversation history.
|
280 |
st.session_state.chat_history.append({"role": "assistant", "content": full_response})
|
281 |
st.session_state.pending_response = False
|
282 |
+
gc.collect() # Free memory
|