Luigi commited on
Commit
06a162a
·
1 Parent(s): 4e60755

fix role disorder error in history

Browse files
Files changed (1) hide show
  1. app.py +25 -17
app.py CHANGED
@@ -216,14 +216,16 @@ if user_input:
216
  if st.session_state.pending_response:
217
  st.warning("Please wait for the assistant to finish responding.")
218
  else:
219
- # Append the user query to chat history
220
- st.session_state.chat_history.append({"role": "user", "content": user_input})
221
  with st.chat_message("user"):
222
  st.markdown(user_input)
223
-
 
 
 
224
  st.session_state.pending_response = True
225
 
226
- # Only retrieve search context if search feature is enabled
227
  if enable_search:
228
  retrieved_context = retrieve_context(user_input, max_results=2, max_chars_per_result=150)
229
  else:
@@ -231,20 +233,26 @@ if user_input:
231
  st.sidebar.markdown("### Retrieved Context" if enable_search else "Web Search Disabled")
232
  st.sidebar.text(retrieved_context or "No context found.")
233
 
234
- # Build an augmented system prompt that includes the retrieved context if available
235
- if retrieved_context:
236
- augmented_prompt = (
237
- "Use the following recent web search context to help answer the query:\n\n"
238
- f"{retrieved_context}\n\nUser Query: {user_input}"
 
 
239
  )
240
  else:
241
- augmented_prompt = f"User Query: {user_input}"
242
- full_system_prompt = system_prompt_base.strip() + "\n\n" + augmented_prompt
243
-
244
- # Limit conversation history to the last 2 turns
245
  MAX_TURNS = 2
246
  trimmed_history = st.session_state.chat_history[-(MAX_TURNS * 2):]
247
- messages = [{"role": "system", "content": full_system_prompt}] + trimmed_history
 
 
 
 
 
248
 
249
  # Generate response with the LLM in a streaming fashion
250
  with st.chat_message("assistant"):
@@ -259,7 +267,6 @@ if user_input:
259
  repeat_penalty=repeat_penalty,
260
  stream=True,
261
  )
262
-
263
  for chunk in stream:
264
  if "choices" in chunk:
265
  delta = chunk["choices"][0]["delta"].get("content", "")
@@ -268,7 +275,8 @@ if user_input:
268
  visible_response = re.sub(r"<think>.*?</think>", "", full_response, flags=re.DOTALL)
269
  visible_response = re.sub(r"<think>.*$", "", visible_response, flags=re.DOTALL)
270
  visible_placeholder.markdown(visible_response)
271
-
 
272
  st.session_state.chat_history.append({"role": "assistant", "content": full_response})
273
  st.session_state.pending_response = False
274
- gc.collect() # Trigger garbage collection to free memory
 
216
  if st.session_state.pending_response:
217
  st.warning("Please wait for the assistant to finish responding.")
218
  else:
219
+ # Display the raw user input immediately in the chat view.
 
220
  with st.chat_message("user"):
221
  st.markdown(user_input)
222
+
223
+ # Append the plain user message to chat history for display purposes.
224
+ # (We will later override the last user message in the API call with the augmented version.)
225
+ st.session_state.chat_history.append({"role": "user", "content": user_input})
226
  st.session_state.pending_response = True
227
 
228
+ # Retrieve extra context from web search if enabled
229
  if enable_search:
230
  retrieved_context = retrieve_context(user_input, max_results=2, max_chars_per_result=150)
231
  else:
 
233
  st.sidebar.markdown("### Retrieved Context" if enable_search else "Web Search Disabled")
234
  st.sidebar.text(retrieved_context or "No context found.")
235
 
236
+ # Build an augmented user query by merging the system prompt (and search context when available)
237
+ if enable_search and retrieved_context:
238
+ augmented_user_input = (
239
+ f"{system_prompt_base.strip()}\n\n"
240
+ f"Use the following recent web search context to help answer the query:\n\n"
241
+ f"{retrieved_context}\n\n"
242
+ f"User Query: {user_input}"
243
  )
244
  else:
245
+ augmented_user_input = f"{system_prompt_base.strip()}\n\nUser Query: {user_input}"
246
+
247
+ # Limit conversation history to the last MAX_TURNS turns (user/assistant pairs)
 
248
  MAX_TURNS = 2
249
  trimmed_history = st.session_state.chat_history[-(MAX_TURNS * 2):]
250
+
251
+ # Replace the last user message (which is plain) with the augmented version for model input.
252
+ if trimmed_history and trimmed_history[-1]["role"] == "user":
253
+ messages = trimmed_history[:-1] + [{"role": "user", "content": augmented_user_input}]
254
+ else:
255
+ messages = trimmed_history + [{"role": "user", "content": augmented_user_input}]
256
 
257
  # Generate response with the LLM in a streaming fashion
258
  with st.chat_message("assistant"):
 
267
  repeat_penalty=repeat_penalty,
268
  stream=True,
269
  )
 
270
  for chunk in stream:
271
  if "choices" in chunk:
272
  delta = chunk["choices"][0]["delta"].get("content", "")
 
275
  visible_response = re.sub(r"<think>.*?</think>", "", full_response, flags=re.DOTALL)
276
  visible_response = re.sub(r"<think>.*$", "", visible_response, flags=re.DOTALL)
277
  visible_placeholder.markdown(visible_response)
278
+
279
+ # Append the assistant's response to conversation history.
280
  st.session_state.chat_history.append({"role": "assistant", "content": full_response})
281
  st.session_state.pending_response = False
282
+ gc.collect() # Free memory