File size: 5,074 Bytes
e00ad77 fded6f6 e00ad77 fded6f6 e8d4ae4 e00ad77 58bcb23 fded6f6 58bcb23 fded6f6 e8d4ae4 fded6f6 e8d4ae4 fded6f6 e8d4ae4 fded6f6 e8d4ae4 fded6f6 e8d4ae4 fded6f6 e8d4ae4 fded6f6 e8d4ae4 fded6f6 e8d4ae4 fded6f6 e8d4ae4 fded6f6 e8d4ae4 fded6f6 58bcb23 2d10bdd 58bcb23 fded6f6 cdfa6da 58bcb23 e8d4ae4 cdfa6da 58bcb23 cdfa6da fded6f6 cdfa6da fded6f6 58bcb23 fded6f6 58bcb23 fded6f6 58bcb23 fded6f6 cdfa6da fded6f6 cdfa6da fded6f6 e00ad77 cdfa6da fa909a7 fded6f6 fa909a7 fded6f6 f2b4cb5 e8d4ae4 fa909a7 9b81770 fa909a7 e8d4ae4 fded6f6 e8d4ae4 2d10bdd fded6f6 fa909a7 c1faa76 fded6f6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 |
import gradio as gr
from huggingface_hub import InferenceClient
from transformers import AutoTokenizer
# Initialize tokenizer and client
tokenizer = AutoTokenizer.from_pretrained("HuggingFaceH4/zephyr-7b-beta")
client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
# Maximum context length (adjust if needed)
MAX_CONTEXT_LENGTH = 4096
default_nvc_prompt_template = r"""<|system|>
You are Roos, an NVC (Nonviolent Communication) Chatbot. Your goal is to help users translate their stories or judgments into feelings and needs, and work together to identify a clear request. Follow these steps:
1. **Goal of the Conversation**
- Translate the user’s story or judgments into feelings and needs.
- Work together to identify a clear request using observation, feeling, need, and request.
2. **Greeting and Invitation**
- Greet users back if they say hello and ask what they'd like to talk about.
3. **Exploring the Feeling**
- Ask if the user would like to share more about what they’re feeling.
4. **Identifying the Feeling**
- Offer one feeling and one need per guess (e.g., “Do you feel anger because you want to be appreciated?”).
5. **Clarifying the Need**
- If the need isn’t clear, ask for clarification.
6. **Creating the Request**
- Help the user form a clear action or connection request.
7. **Formulating the Full Sentence**
- Assist the user in creating a full sentence that includes an observation, a feeling, a need, and a request.
8. **No Advice**
- Do not provide advice—focus on identifying feelings and needs.
9. **Response Length**
- Limit responses to a maximum of 100 words.
10. **Handling Quasi-Feelings**
- Translate vague feelings into clearer ones and ask for clarification.
11. **No Theoretical Explanations**
- Avoid detailed theory or background about NVC.
12. **Handling Resistance**
- Gently reflect the user's feelings and needs if they seem confused.
13. **Ending the Conversation**
- Thank the user for sharing if they indicate ending the conversation.
</s>"""
def count_tokens(text: str) -> int:
"""Counts the number of tokens in a given string."""
return len(tokenizer.encode(text))
def truncate_history(history: list[tuple[str, str]], system_message: str, max_length: int) -> list[tuple[str, str]]:
"""Truncates conversation history to fit within the token limit."""
truncated_history = []
system_message_tokens = count_tokens(system_message)
current_length = system_message_tokens
# Iterate backwards through the history (newest to oldest)
for user_msg, assistant_msg in reversed(history):
user_tokens = count_tokens(user_msg) if user_msg else 0
assistant_tokens = count_tokens(assistant_msg) if assistant_msg else 0
turn_tokens = user_tokens + assistant_tokens
if current_length + turn_tokens <= max_length:
truncated_history.insert(0, (user_msg, assistant_msg))
current_length += turn_tokens
else:
break
return truncated_history
def respond(message, history: list[tuple[str, str]], system_message, max_tokens, temperature, top_p):
"""Responds to a user message, using conversation history and a system prompt."""
if message.lower() == "clear memory":
return "", [] # Reset chat history if requested
formatted_system_message = system_message
# Reserve space for new tokens and some extra margin
truncated_history = truncate_history(history, formatted_system_message, MAX_CONTEXT_LENGTH - max_tokens - 100)
# Build the conversation messages without extra formatting tokens
messages = [{"role": "system", "content": formatted_system_message}]
for user_msg, assistant_msg in truncated_history:
if user_msg:
messages.append({"role": "user", "content": user_msg})
if assistant_msg:
messages.append({"role": "assistant", "content": assistant_msg})
messages.append({"role": "user", "content": message})
response = ""
try:
for chunk in client.chat_completion(
messages,
max_tokens=max_tokens,
stream=True,
temperature=temperature,
top_p=top_p,
):
token = chunk.choices[0].delta.content
response += token
yield response
except Exception as e:
print(f"An error occurred: {e}")
yield "I'm sorry, I encountered an error. Please try again."
# --- Gradio Interface ---
demo = gr.ChatInterface(
respond,
additional_inputs=[
gr.Textbox(
value=default_nvc_prompt_template,
label="System message",
visible=True,
lines=10,
),
gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
],
)
if __name__ == "__main__":
demo.launch(share=True)
|