Luigi commited on
Commit
14564aa
·
1 Parent(s): 5db22d5

bugfix for think tag handling

Browse files
Files changed (1) hide show
  1. app.py +3 -7
app.py CHANGED
@@ -116,7 +116,6 @@ def validate_or_download_model():
116
  cleanup_old_models()
117
  download_model()
118
 
119
- # First load attempt
120
  result = try_load_model(model_path)
121
  if isinstance(result, str):
122
  st.warning(f"Initial load failed: {result}\nAttempting re-download...")
@@ -153,7 +152,6 @@ st.caption(f"Powered by `llama.cpp` | Model: {selected_model['filename']}")
153
  user_input = st.chat_input("Ask something...")
154
 
155
  if user_input:
156
- # Prevent appending user message if assistant hasn't replied yet
157
  if len(st.session_state.chat_history) % 2 == 1:
158
  st.warning("Please wait for the assistant to respond before sending another message.")
159
  else:
@@ -162,15 +160,12 @@ if user_input:
162
  with st.chat_message("user"):
163
  st.markdown(user_input)
164
 
165
- # Trim conversation history to max 8 turns (user+assistant)
166
  MAX_TURNS = 8
167
  trimmed_history = st.session_state.chat_history[-MAX_TURNS * 2:]
168
  messages = [{"role": "system", "content": system_prompt}] + trimmed_history
169
 
170
  with st.chat_message("assistant"):
171
  full_response = ""
172
- response_area = st.empty()
173
-
174
  stream = llm.create_chat_completion(
175
  messages=messages,
176
  max_tokens=max_tokens,
@@ -185,8 +180,9 @@ if user_input:
185
  if "choices" in chunk:
186
  delta = chunk["choices"][0]["delta"].get("content", "")
187
  full_response += delta
188
- visible = re.sub(r"<think>.*?</think>", "", full_response, flags=re.DOTALL)
189
- response_area.markdown(visible)
 
190
 
191
  st.session_state.chat_history.append({"role": "assistant", "content": full_response})
192
 
 
116
  cleanup_old_models()
117
  download_model()
118
 
 
119
  result = try_load_model(model_path)
120
  if isinstance(result, str):
121
  st.warning(f"Initial load failed: {result}\nAttempting re-download...")
 
152
  user_input = st.chat_input("Ask something...")
153
 
154
  if user_input:
 
155
  if len(st.session_state.chat_history) % 2 == 1:
156
  st.warning("Please wait for the assistant to respond before sending another message.")
157
  else:
 
160
  with st.chat_message("user"):
161
  st.markdown(user_input)
162
 
 
163
  MAX_TURNS = 8
164
  trimmed_history = st.session_state.chat_history[-MAX_TURNS * 2:]
165
  messages = [{"role": "system", "content": system_prompt}] + trimmed_history
166
 
167
  with st.chat_message("assistant"):
168
  full_response = ""
 
 
169
  stream = llm.create_chat_completion(
170
  messages=messages,
171
  max_tokens=max_tokens,
 
180
  if "choices" in chunk:
181
  delta = chunk["choices"][0]["delta"].get("content", "")
182
  full_response += delta
183
+
184
+ visible_response = re.sub(r"<think>.*?</think>", "", full_response, flags=re.DOTALL)
185
+ st.markdown(visible_response)
186
 
187
  st.session_state.chat_history.append({"role": "assistant", "content": full_response})
188