Update app.py
Browse files
    	
        app.py
    CHANGED
    
    | @@ -9,7 +9,7 @@ client = InferenceClient("HuggingFaceH4/zephyr-7b-beta") | |
| 9 | 
             
            # Define a maximum context length (tokens).  Check your model's documentation!
         | 
| 10 | 
             
            MAX_CONTEXT_LENGTH = 4096  # Example: Adjust this based on your model!
         | 
| 11 |  | 
| 12 | 
            -
            nvc_prompt_template = """ | 
| 13 | 
             
            You are Roos, an NVC (Nonviolent Communication) Chatbot. Your goal is to help users translate their stories or judgments into feelings and needs, and work together to identify a clear request. Follow these steps:
         | 
| 14 |  | 
| 15 | 
             
            1. **Goal of the Conversation**
         | 
| @@ -88,17 +88,8 @@ You are Roos, an NVC (Nonviolent Communication) Chatbot. Your goal is to help us | |
| 88 |  | 
| 89 | 
             
            13. **Ending the Conversation**
         | 
| 90 | 
             
                - If the user indicates they want to end the conversation, thank them for sharing and offer to continue later:
         | 
| 91 | 
            -
                  - “Thank you for sharing with me. If you’d like to continue this conversation later, I’m here to help | 
| 92 | 
            -
             | 
| 93 | 
            -
            **Please respond with:**
         | 
| 94 | 
            -
            1. Your internal reasoning wrapped in <think> tags
         | 
| 95 | 
            -
            2. Your NVC-formatted response after </think>
         | 
| 96 | 
            -
            <</SYS>>
         | 
| 97 | 
            -
             | 
| 98 | 
            -
            **User Input:**
         | 
| 99 | 
            -
            {user_input}
         | 
| 100 | 
            -
             | 
| 101 | 
            -
            [/INST]"""
         | 
| 102 |  | 
| 103 |  | 
| 104 | 
             
            def count_tokens(text: str) -> int:
         | 
| @@ -142,29 +133,27 @@ def respond( | |
| 142 | 
             
                temperature,
         | 
| 143 | 
             
                top_p,
         | 
| 144 | 
             
            ):
         | 
| 145 | 
            -
                """Responds to a user message, maintaining conversation history."""
         | 
| 146 |  | 
| 147 | 
            -
                 | 
| 148 | 
            -
                formatted_system_message = nvc_prompt_template.format(user_input="") # User input is inserted later
         | 
| 149 |  | 
| 150 | 
             
                truncated_history = truncate_history(history, formatted_system_message, MAX_CONTEXT_LENGTH - max_tokens - 100) # Reserve space for the new message and some generation
         | 
| 151 |  | 
| 152 | 
            -
                 | 
|  | |
| 153 | 
             
                for user_msg, assistant_msg in truncated_history:
         | 
| 154 | 
             
                    if user_msg:
         | 
| 155 | 
            -
                         | 
| 156 | 
             
                    if assistant_msg:
         | 
| 157 | 
            -
                         | 
| 158 |  | 
| 159 | 
            -
                 | 
| 160 | 
            -
                formatted_user_message = nvc_prompt_template.format(user_input=message)
         | 
| 161 | 
            -
                messages.append({"role": "user", "content": formatted_user_message})
         | 
| 162 |  | 
| 163 |  | 
| 164 | 
             
                response = ""
         | 
| 165 | 
             
                try:
         | 
| 166 | 
             
                  for chunk in client.chat_completion(
         | 
| 167 | 
            -
                      messages | 
| 168 | 
             
                      max_tokens=max_tokens,
         | 
| 169 | 
             
                      stream=True,
         | 
| 170 | 
             
                      temperature=temperature,
         | 
| @@ -181,7 +170,7 @@ def respond( | |
| 181 | 
             
            demo = gr.ChatInterface(
         | 
| 182 | 
             
                respond,
         | 
| 183 | 
             
                additional_inputs=[
         | 
| 184 | 
            -
                    gr.Textbox(value=nvc_prompt_template | 
| 185 | 
             
                    gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
         | 
| 186 | 
             
                    gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
         | 
| 187 | 
             
                    gr.Slider(
         | 
|  | |
| 9 | 
             
            # Define a maximum context length (tokens).  Check your model's documentation!
         | 
| 10 | 
             
            MAX_CONTEXT_LENGTH = 4096  # Example: Adjust this based on your model!
         | 
| 11 |  | 
| 12 | 
            +
            nvc_prompt_template = """<|system|>
         | 
| 13 | 
             
            You are Roos, an NVC (Nonviolent Communication) Chatbot. Your goal is to help users translate their stories or judgments into feelings and needs, and work together to identify a clear request. Follow these steps:
         | 
| 14 |  | 
| 15 | 
             
            1. **Goal of the Conversation**
         | 
|  | |
| 88 |  | 
| 89 | 
             
            13. **Ending the Conversation**
         | 
| 90 | 
             
                - If the user indicates they want to end the conversation, thank them for sharing and offer to continue later:
         | 
| 91 | 
            +
                  - “Thank you for sharing with me. If you’d like to continue this conversation later, I’m here to help.”</s>
         | 
| 92 | 
            +
            """
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 93 |  | 
| 94 |  | 
| 95 | 
             
            def count_tokens(text: str) -> int:
         | 
|  | |
| 133 | 
             
                temperature,
         | 
| 134 | 
             
                top_p,
         | 
| 135 | 
             
            ):
         | 
| 136 | 
            +
                """Responds to a user message, maintaining conversation history, using special tokens."""
         | 
| 137 |  | 
| 138 | 
            +
                formatted_system_message = nvc_prompt_template
         | 
|  | |
| 139 |  | 
| 140 | 
             
                truncated_history = truncate_history(history, formatted_system_message, MAX_CONTEXT_LENGTH - max_tokens - 100) # Reserve space for the new message and some generation
         | 
| 141 |  | 
| 142 | 
            +
                full_prompt = formatted_system_message  # Start with the system message
         | 
| 143 | 
            +
             | 
| 144 | 
             
                for user_msg, assistant_msg in truncated_history:
         | 
| 145 | 
             
                    if user_msg:
         | 
| 146 | 
            +
                        full_prompt += f"<|user|>\n{user_msg}</s>\n"
         | 
| 147 | 
             
                    if assistant_msg:
         | 
| 148 | 
            +
                        full_prompt += f"<|assistant|>\n{assistant_msg}</s>\n"
         | 
| 149 |  | 
| 150 | 
            +
                full_prompt += f"<|user|>\n{message}</s>\n" # Add the current user message
         | 
|  | |
|  | |
| 151 |  | 
| 152 |  | 
| 153 | 
             
                response = ""
         | 
| 154 | 
             
                try:
         | 
| 155 | 
             
                  for chunk in client.chat_completion(
         | 
| 156 | 
            +
                      full_prompt, # Send the full prompt string instead of messages list
         | 
| 157 | 
             
                      max_tokens=max_tokens,
         | 
| 158 | 
             
                      stream=True,
         | 
| 159 | 
             
                      temperature=temperature,
         | 
|  | |
| 170 | 
             
            demo = gr.ChatInterface(
         | 
| 171 | 
             
                respond,
         | 
| 172 | 
             
                additional_inputs=[
         | 
| 173 | 
            +
                    gr.Textbox(value=nvc_prompt_template, label="System message", visible=False), # Set the NVC prompt as default and hide the system message box
         | 
| 174 | 
             
                    gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
         | 
| 175 | 
             
                    gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
         | 
| 176 | 
             
                    gr.Slider(
         | 
 
			
