File size: 1,754 Bytes
3d8b295
 
814e23a
3d8b295
 
 
814e23a
3d8b295
 
814e23a
3d8b295
814e23a
3d8b295
 
814e23a
3d8b295
69460b6
f75ac1d
69460b6
 
f75ac1d
69460b6
 
3d8b295
 
814e23a
3d8b295
9512d4d
814e23a
 
3770856
 
814e23a
9512d4d
3396f08
3d8b295
69460b6
 
814e23a
 
 
 
 
 
3d8b295
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
import gradio as gr
from transformers import AutoTokenizer
import json

tokenizer = AutoTokenizer.from_pretrained("HuggingFaceH4/zephyr-7b-beta")

demo_conversation1 = """[
    {"role": "user", "content": "Hi there!"},
    {"role": "assistant", "content": "Hello, human!"}
]"""

demo_conversation2 = """[
    {"role": "system", "content": "You are a helpful chatbot."},
    {"role": "user", "content": "Hi there!"}
]"""

default_template = """{% for message in messages %}
{{ "<|im_start|>" + message["role"] + "\\n" + message["content"] + "<|im_end|>\\n" }}
{% endfor %}
{% if add_generation_prompt %}
{{ "<|im_start|>assistant\\n" }}
{% endif %}"""

conversations = [demo_conversation1, demo_conversation2]

def apply_chat_template(template, test_conversation1, test_conversation2):
    tokenizer.chat_template = template
    outputs = []
    for i, conversation_str in enumerate(test_conversation1, test_conversation2):
        conversation = json.loads(conversation)
        without_gen = tokenizer.apply_chat_template(conversation, tokenize=False)
        with_gen = tokenizer.apply_chat_template(conversation, tokenize=False, add_generation_prompt=True)
        out = f"Conversation {i}:\n\n{conversation_str}\n\nOutput without generation prompt:\n\n{without_gen}\n\nOutput with generation prompt:\n\n{with_gen}\n\n"
        outputs.append(out)
    return tuple(outputs)

iface = gr.Interface(
    fn=apply_chat_template,
    inputs=[
        gr.TextArea(value=default_template, lines=10, max_lines=30, label="Chat Template"),
        gr.TextArea(value=str(demo_conversation1), lines=5, label="Conversation 1"),
        gr.TextArea(value=str(demo_conversation2), lines=5, label="Conversation 2")
    ],
    outputs=["text", "text"])
iface.launch()