Luigi commited on
Commit
ef361b0
·
1 Parent(s): 5f6306a

apply history flatten before it goint to prompt

Browse files
Files changed (1) hide show
  1. app.py +36 -9
app.py CHANGED
@@ -109,6 +109,30 @@ def retrieve_context(query, max_results=6, max_chars_per_result=600):
109
  except Exception:
110
  return ""
111
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
112
  # ------------------------------
113
  # Chat Response Generation with ZeroGPU using Pipeline
114
  # ------------------------------
@@ -120,7 +144,8 @@ def chat_response(user_message, chat_history, system_prompt, enable_search,
120
 
121
  - Appends the user's message to the conversation history.
122
  - Optionally retrieves web search context and inserts it as an additional system message.
123
- - Uses a cached pipeline (loaded via load_pipeline) to generate a response.
 
124
  - Returns the updated conversation history and a debug message.
125
  """
126
  cancel_event.clear()
@@ -131,7 +156,6 @@ def chat_response(user_message, chat_history, system_prompt, enable_search,
131
 
132
  # Retrieve web search context if enabled.
133
  debug_message = ""
134
- retrieved_context = ""
135
  if enable_search:
136
  debug_message = "Initiating web search..."
137
  yield conversation, debug_message
@@ -155,23 +179,26 @@ def chat_response(user_message, chat_history, system_prompt, enable_search,
155
  conversation.append({"role": "assistant", "content": ""})
156
 
157
  try:
 
 
 
158
  # Load the pipeline (cached) for the selected model.
159
  pipe = load_pipeline(model_name)
160
 
161
- # Use the pipeline directly with conversation history.
162
  response = pipe(
163
- conversation,
164
  max_new_tokens=max_tokens,
165
  temperature=temperature,
166
  top_k=top_k,
167
  top_p=top_p,
168
  repetition_penalty=repeat_penalty,
169
  )
170
- # Extract the assistant's reply.
171
- try:
172
- assistant_text = response[0]["generated_text"][-1]["content"]
173
- except (KeyError, IndexError, TypeError):
174
- assistant_text = response[0]["generated_text"]
175
 
176
  # Update the conversation history.
177
  conversation[-1]["content"] = assistant_text
 
109
  except Exception:
110
  return ""
111
 
112
+ # ----------------------------------------------------------------------------
113
+ # NEW HELPER FUNCTION: Format Conversation History into a Clean Prompt
114
+ # ----------------------------------------------------------------------------
115
+ def format_conversation(conversation, system_prompt):
116
+ """
117
+ Converts a list of conversation messages (each a dict with 'role' and 'content')
118
+ and a system prompt into a single plain text string.
119
+ This prevents raw role labels from being passed to the model.
120
+ """
121
+ # Start with the system prompt.
122
+ prompt = system_prompt.strip() + "\n"
123
+ # Loop through conversation and format user and assistant messages.
124
+ for msg in conversation:
125
+ if msg["role"] == "user":
126
+ prompt += "User: " + msg["content"].strip() + "\n"
127
+ elif msg["role"] == "assistant":
128
+ prompt += "Assistant: " + msg["content"].strip() + "\n"
129
+ elif msg["role"] == "system":
130
+ prompt += msg["content"].strip() + "\n"
131
+ # Append the assistant cue to indicate the start of the reply.
132
+ if not prompt.strip().endswith("Assistant:"):
133
+ prompt += "Assistant: "
134
+ return prompt
135
+
136
  # ------------------------------
137
  # Chat Response Generation with ZeroGPU using Pipeline
138
  # ------------------------------
 
144
 
145
  - Appends the user's message to the conversation history.
146
  - Optionally retrieves web search context and inserts it as an additional system message.
147
+ - Converts the conversation into a formatted prompt to avoid leaking role labels.
148
+ - Uses the cached pipeline to generate a response.
149
  - Returns the updated conversation history and a debug message.
150
  """
151
  cancel_event.clear()
 
156
 
157
  # Retrieve web search context if enabled.
158
  debug_message = ""
 
159
  if enable_search:
160
  debug_message = "Initiating web search..."
161
  yield conversation, debug_message
 
179
  conversation.append({"role": "assistant", "content": ""})
180
 
181
  try:
182
+ # Format the entire conversation into a single prompt (this fixes both issues).
183
+ prompt_text = format_conversation(conversation, system_prompt)
184
+
185
  # Load the pipeline (cached) for the selected model.
186
  pipe = load_pipeline(model_name)
187
 
188
+ # Generate a response using the formatted prompt.
189
  response = pipe(
190
+ prompt_text,
191
  max_new_tokens=max_tokens,
192
  temperature=temperature,
193
  top_k=top_k,
194
  top_p=top_p,
195
  repetition_penalty=repeat_penalty,
196
  )
197
+
198
+ # Extract the generated text.
199
+ generated = response[0]["generated_text"]
200
+ # Remove the prompt portion so we only keep the new assistant reply.
201
+ assistant_text = generated[len(prompt_text):].strip()
202
 
203
  # Update the conversation history.
204
  conversation[-1]["content"] = assistant_text