Update app.py
Browse files
app.py
CHANGED
@@ -9,7 +9,7 @@ client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
|
|
9 |
# Define a maximum context length (tokens). Check your model's documentation!
|
10 |
MAX_CONTEXT_LENGTH = 4096 # Example: Adjust this based on your model!
|
11 |
|
12 |
-
nvc_prompt_template = """
|
13 |
You are Roos, an NVC (Nonviolent Communication) Chatbot. Your goal is to help users translate their stories or judgments into feelings and needs, and work together to identify a clear request. Follow these steps:
|
14 |
|
15 |
1. **Goal of the Conversation**
|
@@ -88,17 +88,8 @@ You are Roos, an NVC (Nonviolent Communication) Chatbot. Your goal is to help us
|
|
88 |
|
89 |
13. **Ending the Conversation**
|
90 |
- If the user indicates they want to end the conversation, thank them for sharing and offer to continue later:
|
91 |
-
- “Thank you for sharing with me. If you’d like to continue this conversation later, I’m here to help
|
92 |
-
|
93 |
-
**Please respond with:**
|
94 |
-
1. Your internal reasoning wrapped in <think> tags
|
95 |
-
2. Your NVC-formatted response after </think>
|
96 |
-
<</SYS>>
|
97 |
-
|
98 |
-
**User Input:**
|
99 |
-
{user_input}
|
100 |
-
|
101 |
-
[/INST]"""
|
102 |
|
103 |
|
104 |
def count_tokens(text: str) -> int:
|
@@ -142,29 +133,27 @@ def respond(
|
|
142 |
temperature,
|
143 |
top_p,
|
144 |
):
|
145 |
-
"""Responds to a user message, maintaining conversation history."""
|
146 |
|
147 |
-
|
148 |
-
formatted_system_message = nvc_prompt_template.format(user_input="") # User input is inserted later
|
149 |
|
150 |
truncated_history = truncate_history(history, formatted_system_message, MAX_CONTEXT_LENGTH - max_tokens - 100) # Reserve space for the new message and some generation
|
151 |
|
152 |
-
|
|
|
153 |
for user_msg, assistant_msg in truncated_history:
|
154 |
if user_msg:
|
155 |
-
|
156 |
if assistant_msg:
|
157 |
-
|
158 |
|
159 |
-
|
160 |
-
formatted_user_message = nvc_prompt_template.format(user_input=message)
|
161 |
-
messages.append({"role": "user", "content": formatted_user_message})
|
162 |
|
163 |
|
164 |
response = ""
|
165 |
try:
|
166 |
for chunk in client.chat_completion(
|
167 |
-
messages
|
168 |
max_tokens=max_tokens,
|
169 |
stream=True,
|
170 |
temperature=temperature,
|
@@ -181,7 +170,7 @@ def respond(
|
|
181 |
demo = gr.ChatInterface(
|
182 |
respond,
|
183 |
additional_inputs=[
|
184 |
-
gr.Textbox(value=nvc_prompt_template
|
185 |
gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
|
186 |
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
|
187 |
gr.Slider(
|
|
|
9 |
# Define a maximum context length (tokens). Check your model's documentation!
|
10 |
MAX_CONTEXT_LENGTH = 4096 # Example: Adjust this based on your model!
|
11 |
|
12 |
+
nvc_prompt_template = """<|system|>
|
13 |
You are Roos, an NVC (Nonviolent Communication) Chatbot. Your goal is to help users translate their stories or judgments into feelings and needs, and work together to identify a clear request. Follow these steps:
|
14 |
|
15 |
1. **Goal of the Conversation**
|
|
|
88 |
|
89 |
13. **Ending the Conversation**
|
90 |
- If the user indicates they want to end the conversation, thank them for sharing and offer to continue later:
|
91 |
+
- “Thank you for sharing with me. If you’d like to continue this conversation later, I’m here to help.”</s>
|
92 |
+
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
93 |
|
94 |
|
95 |
def count_tokens(text: str) -> int:
|
|
|
133 |
temperature,
|
134 |
top_p,
|
135 |
):
|
136 |
+
"""Responds to a user message, maintaining conversation history, using special tokens."""
|
137 |
|
138 |
+
formatted_system_message = nvc_prompt_template
|
|
|
139 |
|
140 |
truncated_history = truncate_history(history, formatted_system_message, MAX_CONTEXT_LENGTH - max_tokens - 100) # Reserve space for the new message and some generation
|
141 |
|
142 |
+
full_prompt = formatted_system_message # Start with the system message
|
143 |
+
|
144 |
for user_msg, assistant_msg in truncated_history:
|
145 |
if user_msg:
|
146 |
+
full_prompt += f"<|user|>\n{user_msg}</s>\n"
|
147 |
if assistant_msg:
|
148 |
+
full_prompt += f"<|assistant|>\n{assistant_msg}</s>\n"
|
149 |
|
150 |
+
full_prompt += f"<|user|>\n{message}</s>\n" # Add the current user message
|
|
|
|
|
151 |
|
152 |
|
153 |
response = ""
|
154 |
try:
|
155 |
for chunk in client.chat_completion(
|
156 |
+
full_prompt, # Send the full prompt string instead of messages list
|
157 |
max_tokens=max_tokens,
|
158 |
stream=True,
|
159 |
temperature=temperature,
|
|
|
170 |
demo = gr.ChatInterface(
|
171 |
respond,
|
172 |
additional_inputs=[
|
173 |
+
gr.Textbox(value=nvc_prompt_template, label="System message", visible=False), # Set the NVC prompt as default and hide the system message box
|
174 |
gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
|
175 |
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
|
176 |
gr.Slider(
|