File size: 5,074 Bytes
e00ad77
 
fded6f6
e00ad77
fded6f6
e8d4ae4
e00ad77
58bcb23
fded6f6
 
58bcb23
fded6f6
 
e8d4ae4
 
fded6f6
e8d4ae4
fded6f6
e8d4ae4
fded6f6
e8d4ae4
fded6f6
e8d4ae4
fded6f6
e8d4ae4
fded6f6
 
 
e8d4ae4
fded6f6
e8d4ae4
fded6f6
 
 
e8d4ae4
fded6f6
 
 
e8d4ae4
fded6f6
 
58bcb23
 
2d10bdd
58bcb23
 
 
fded6f6
cdfa6da
 
 
58bcb23
e8d4ae4
cdfa6da
 
 
 
58bcb23
cdfa6da
fded6f6
cdfa6da
 
fded6f6
58bcb23
 
 
fded6f6
 
 
 
58bcb23
fded6f6
 
 
58bcb23
fded6f6
 
cdfa6da
 
fded6f6
cdfa6da
fded6f6
 
e00ad77
cdfa6da
fa909a7
fded6f6
 
 
 
 
 
 
 
 
 
fa909a7
fded6f6
 
f2b4cb5
e8d4ae4
fa909a7
9b81770
fa909a7
e8d4ae4
 
 
 
fded6f6
e8d4ae4
2d10bdd
 
fded6f6
fa909a7
 
c1faa76
 
fded6f6
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
import gradio as gr
from huggingface_hub import InferenceClient
from transformers import AutoTokenizer

# Initialize tokenizer and client
tokenizer = AutoTokenizer.from_pretrained("HuggingFaceH4/zephyr-7b-beta")
client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")

# Maximum context length (adjust if needed)
MAX_CONTEXT_LENGTH = 4096

default_nvc_prompt_template = r"""<|system|>
You are Roos, an NVC (Nonviolent Communication) Chatbot. Your goal is to help users translate their stories or judgments into feelings and needs, and work together to identify a clear request. Follow these steps:
1. **Goal of the Conversation**
    - Translate the user’s story or judgments into feelings and needs.
    - Work together to identify a clear request using observation, feeling, need, and request.
2. **Greeting and Invitation**
    - Greet users back if they say hello and ask what they'd like to talk about.
3. **Exploring the Feeling**
    - Ask if the user would like to share more about what they’re feeling.
4. **Identifying the Feeling**
    - Offer one feeling and one need per guess (e.g., “Do you feel anger because you want to be appreciated?”).
5. **Clarifying the Need**
    - If the need isn’t clear, ask for clarification.
6. **Creating the Request**
    - Help the user form a clear action or connection request.
7. **Formulating the Full Sentence**
    - Assist the user in creating a full sentence that includes an observation, a feeling, a need, and a request.
8. **No Advice**
    - Do not provide advice—focus on identifying feelings and needs.
9. **Response Length**
    - Limit responses to a maximum of 100 words.
10. **Handling Quasi-Feelings**
    - Translate vague feelings into clearer ones and ask for clarification.
11. **No Theoretical Explanations**
    - Avoid detailed theory or background about NVC.
12. **Handling Resistance**
    - Gently reflect the user's feelings and needs if they seem confused.
13. **Ending the Conversation**
    - Thank the user for sharing if they indicate ending the conversation.
</s>"""

def count_tokens(text: str) -> int:
    """Counts the number of tokens in a given string."""
    return len(tokenizer.encode(text))

def truncate_history(history: list[tuple[str, str]], system_message: str, max_length: int) -> list[tuple[str, str]]:
    """Truncates conversation history to fit within the token limit."""
    truncated_history = []
    system_message_tokens = count_tokens(system_message)
    current_length = system_message_tokens

    # Iterate backwards through the history (newest to oldest)
    for user_msg, assistant_msg in reversed(history):
        user_tokens = count_tokens(user_msg) if user_msg else 0
        assistant_tokens = count_tokens(assistant_msg) if assistant_msg else 0
        turn_tokens = user_tokens + assistant_tokens

        if current_length + turn_tokens <= max_length:
            truncated_history.insert(0, (user_msg, assistant_msg))
            current_length += turn_tokens
        else:
            break

    return truncated_history

def respond(message, history: list[tuple[str, str]], system_message, max_tokens, temperature, top_p):
    """Responds to a user message, using conversation history and a system prompt."""
    if message.lower() == "clear memory":
        return "", []  # Reset chat history if requested

    formatted_system_message = system_message
    # Reserve space for new tokens and some extra margin
    truncated_history = truncate_history(history, formatted_system_message, MAX_CONTEXT_LENGTH - max_tokens - 100)

    # Build the conversation messages without extra formatting tokens
    messages = [{"role": "system", "content": formatted_system_message}]
    for user_msg, assistant_msg in truncated_history:
        if user_msg:
            messages.append({"role": "user", "content": user_msg})
        if assistant_msg:
            messages.append({"role": "assistant", "content": assistant_msg})
    messages.append({"role": "user", "content": message})

    response = ""
    try:
        for chunk in client.chat_completion(
            messages,
            max_tokens=max_tokens,
            stream=True,
            temperature=temperature,
            top_p=top_p,
        ):
            token = chunk.choices[0].delta.content
            response += token
            yield response
    except Exception as e:
        print(f"An error occurred: {e}")
        yield "I'm sorry, I encountered an error. Please try again."

# --- Gradio Interface ---
demo = gr.ChatInterface(
    respond,
    additional_inputs=[
        gr.Textbox(
            value=default_nvc_prompt_template,
            label="System message",
            visible=True,
            lines=10,
        ),
        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
        gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
    ],
)

if __name__ == "__main__":
    demo.launch(share=True)