Update app.py
Browse files
app.py
CHANGED
|
@@ -9,7 +9,7 @@ client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
|
|
| 9 |
# Define a maximum context length (tokens). Check your model's documentation!
|
| 10 |
MAX_CONTEXT_LENGTH = 4096 # Example: Adjust this based on your model!
|
| 11 |
|
| 12 |
-
nvc_prompt_template = """
|
| 13 |
You are Roos, an NVC (Nonviolent Communication) Chatbot. Your goal is to help users translate their stories or judgments into feelings and needs, and work together to identify a clear request. Follow these steps:
|
| 14 |
|
| 15 |
1. **Goal of the Conversation**
|
|
@@ -88,17 +88,8 @@ You are Roos, an NVC (Nonviolent Communication) Chatbot. Your goal is to help us
|
|
| 88 |
|
| 89 |
13. **Ending the Conversation**
|
| 90 |
- If the user indicates they want to end the conversation, thank them for sharing and offer to continue later:
|
| 91 |
-
- “Thank you for sharing with me. If you’d like to continue this conversation later, I’m here to help
|
| 92 |
-
|
| 93 |
-
**Please respond with:**
|
| 94 |
-
1. Your internal reasoning wrapped in <think> tags
|
| 95 |
-
2. Your NVC-formatted response after </think>
|
| 96 |
-
<</SYS>>
|
| 97 |
-
|
| 98 |
-
**User Input:**
|
| 99 |
-
{user_input}
|
| 100 |
-
|
| 101 |
-
[/INST]"""
|
| 102 |
|
| 103 |
|
| 104 |
def count_tokens(text: str) -> int:
|
|
@@ -142,29 +133,27 @@ def respond(
|
|
| 142 |
temperature,
|
| 143 |
top_p,
|
| 144 |
):
|
| 145 |
-
"""Responds to a user message, maintaining conversation history."""
|
| 146 |
|
| 147 |
-
|
| 148 |
-
formatted_system_message = nvc_prompt_template.format(user_input="") # User input is inserted later
|
| 149 |
|
| 150 |
truncated_history = truncate_history(history, formatted_system_message, MAX_CONTEXT_LENGTH - max_tokens - 100) # Reserve space for the new message and some generation
|
| 151 |
|
| 152 |
-
|
|
|
|
| 153 |
for user_msg, assistant_msg in truncated_history:
|
| 154 |
if user_msg:
|
| 155 |
-
|
| 156 |
if assistant_msg:
|
| 157 |
-
|
| 158 |
|
| 159 |
-
|
| 160 |
-
formatted_user_message = nvc_prompt_template.format(user_input=message)
|
| 161 |
-
messages.append({"role": "user", "content": formatted_user_message})
|
| 162 |
|
| 163 |
|
| 164 |
response = ""
|
| 165 |
try:
|
| 166 |
for chunk in client.chat_completion(
|
| 167 |
-
messages
|
| 168 |
max_tokens=max_tokens,
|
| 169 |
stream=True,
|
| 170 |
temperature=temperature,
|
|
@@ -181,7 +170,7 @@ def respond(
|
|
| 181 |
demo = gr.ChatInterface(
|
| 182 |
respond,
|
| 183 |
additional_inputs=[
|
| 184 |
-
gr.Textbox(value=nvc_prompt_template
|
| 185 |
gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
|
| 186 |
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
|
| 187 |
gr.Slider(
|
|
|
|
| 9 |
# Define a maximum context length (tokens). Check your model's documentation!
|
| 10 |
MAX_CONTEXT_LENGTH = 4096 # Example: Adjust this based on your model!
|
| 11 |
|
| 12 |
+
nvc_prompt_template = """<|system|>
|
| 13 |
You are Roos, an NVC (Nonviolent Communication) Chatbot. Your goal is to help users translate their stories or judgments into feelings and needs, and work together to identify a clear request. Follow these steps:
|
| 14 |
|
| 15 |
1. **Goal of the Conversation**
|
|
|
|
| 88 |
|
| 89 |
13. **Ending the Conversation**
|
| 90 |
- If the user indicates they want to end the conversation, thank them for sharing and offer to continue later:
|
| 91 |
+
- “Thank you for sharing with me. If you’d like to continue this conversation later, I’m here to help.”</s>
|
| 92 |
+
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 93 |
|
| 94 |
|
| 95 |
def count_tokens(text: str) -> int:
|
|
|
|
| 133 |
temperature,
|
| 134 |
top_p,
|
| 135 |
):
|
| 136 |
+
"""Responds to a user message, maintaining conversation history, using special tokens."""
|
| 137 |
|
| 138 |
+
formatted_system_message = nvc_prompt_template
|
|
|
|
| 139 |
|
| 140 |
truncated_history = truncate_history(history, formatted_system_message, MAX_CONTEXT_LENGTH - max_tokens - 100) # Reserve space for the new message and some generation
|
| 141 |
|
| 142 |
+
full_prompt = formatted_system_message # Start with the system message
|
| 143 |
+
|
| 144 |
for user_msg, assistant_msg in truncated_history:
|
| 145 |
if user_msg:
|
| 146 |
+
full_prompt += f"<|user|>\n{user_msg}</s>\n"
|
| 147 |
if assistant_msg:
|
| 148 |
+
full_prompt += f"<|assistant|>\n{assistant_msg}</s>\n"
|
| 149 |
|
| 150 |
+
full_prompt += f"<|user|>\n{message}</s>\n" # Add the current user message
|
|
|
|
|
|
|
| 151 |
|
| 152 |
|
| 153 |
response = ""
|
| 154 |
try:
|
| 155 |
for chunk in client.chat_completion(
|
| 156 |
+
full_prompt, # Send the full prompt string instead of messages list
|
| 157 |
max_tokens=max_tokens,
|
| 158 |
stream=True,
|
| 159 |
temperature=temperature,
|
|
|
|
| 170 |
demo = gr.ChatInterface(
|
| 171 |
respond,
|
| 172 |
additional_inputs=[
|
| 173 |
+
gr.Textbox(value=nvc_prompt_template, label="System message", visible=False), # Set the NVC prompt as default and hide the system message box
|
| 174 |
gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
|
| 175 |
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
|
| 176 |
gr.Slider(
|