Medical-Chatbot-Ros-with-Memory

Sleeping

App Files Files Community

ruslanmv commited on Feb 23

Commit

2f9706d

verified ·

1 Parent(s): 15eb759

Update app.py

Browse files

Files changed (1) hide show

app.py +12 -23

app.py CHANGED Viewed

@@ -9,7 +9,7 @@ client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
 # Define a maximum context length (tokens).  Check your model's documentation!
 MAX_CONTEXT_LENGTH = 4096  # Example: Adjust this based on your model!
-nvc_prompt_template = """[INST] <<SYS>>
 You are Roos, an NVC (Nonviolent Communication) Chatbot. Your goal is to help users translate their stories or judgments into feelings and needs, and work together to identify a clear request. Follow these steps:
 1. **Goal of the Conversation**
@@ -88,17 +88,8 @@ You are Roos, an NVC (Nonviolent Communication) Chatbot. Your goal is to help us
 13. **Ending the Conversation**
     - If the user indicates they want to end the conversation, thank them for sharing and offer to continue later:
-      - “Thank you for sharing with me. If you’d like to continue this conversation later, I’m here to help.”
-**Please respond with:**
-1. Your internal reasoning wrapped in <think> tags
-2. Your NVC-formatted response after </think>
-<</SYS>>
-**User Input:**
-{user_input}
-[/INST]"""
 def count_tokens(text: str) -> int:
@@ -142,29 +133,27 @@ def respond(
     temperature,
     top_p,
 ):
-    """Responds to a user message, maintaining conversation history."""
-    # Format the system message with the user input
-    formatted_system_message = nvc_prompt_template.format(user_input="") # User input is inserted later
     truncated_history = truncate_history(history, formatted_system_message, MAX_CONTEXT_LENGTH - max_tokens - 100) # Reserve space for the new message and some generation
-    messages = [{"role": "system", "content": formatted_system_message}]
     for user_msg, assistant_msg in truncated_history:
         if user_msg:
-            messages.append({"role": "user", "content": user_msg})
         if assistant_msg:
-            messages.append({"role": "assistant", "content": assistant_msg})
-    # Format the user message with the user input template
-    formatted_user_message = nvc_prompt_template.format(user_input=message)
-    messages.append({"role": "user", "content": formatted_user_message})
     response = ""
     try:
       for chunk in client.chat_completion(
-          messages,
           max_tokens=max_tokens,
           stream=True,
           temperature=temperature,
@@ -181,7 +170,7 @@ def respond(
 demo = gr.ChatInterface(
     respond,
     additional_inputs=[
-        gr.Textbox(value=nvc_prompt_template.split("<<SYS>>")[1].split("<</SYS>>")[0].strip(), label="System message", visible=False), # Set the NVC prompt as default and hide the system message box
         gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
         gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
         gr.Slider(

 # Define a maximum context length (tokens).  Check your model's documentation!
 MAX_CONTEXT_LENGTH = 4096  # Example: Adjust this based on your model!
+nvc_prompt_template = """<|system|>
 You are Roos, an NVC (Nonviolent Communication) Chatbot. Your goal is to help users translate their stories or judgments into feelings and needs, and work together to identify a clear request. Follow these steps:
 1. **Goal of the Conversation**
 13. **Ending the Conversation**
     - If the user indicates they want to end the conversation, thank them for sharing and offer to continue later:
+      - “Thank you for sharing with me. If you’d like to continue this conversation later, I’m here to help.”</s>
+"""
 def count_tokens(text: str) -> int:
     temperature,
     top_p,
 ):
+    """Responds to a user message, maintaining conversation history, using special tokens."""
+    formatted_system_message = nvc_prompt_template
     truncated_history = truncate_history(history, formatted_system_message, MAX_CONTEXT_LENGTH - max_tokens - 100) # Reserve space for the new message and some generation
+    full_prompt = formatted_system_message  # Start with the system message
     for user_msg, assistant_msg in truncated_history:
         if user_msg:
+            full_prompt += f"<|user|>\n{user_msg}</s>\n"
         if assistant_msg:
+            full_prompt += f"<|assistant|>\n{assistant_msg}</s>\n"
+    full_prompt += f"<|user|>\n{message}</s>\n" # Add the current user message
     response = ""
     try:
       for chunk in client.chat_completion(
+          full_prompt, # Send the full prompt string instead of messages list
           max_tokens=max_tokens,
           stream=True,
           temperature=temperature,
 demo = gr.ChatInterface(
     respond,
     additional_inputs=[
+        gr.Textbox(value=nvc_prompt_template, label="System message", visible=False), # Set the NVC prompt as default and hide the system message box
         gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
         gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
         gr.Slider(