File size: 3,766 Bytes
3d8b295
 
814e23a
3d8b295
 
 
434878e
95dfd9b
3d8b295
434878e
 
814e23a
3d8b295
69460b6
fcf6d89
69460b6
 
fcf6d89
69460b6
 
c6e51bd
 
 
95dfd9b
d35cae7
3d8b295
434878e
fcf6d89
 
3d8b295
9512d4d
434878e
aeb84b0
21346d9
aeb84b0
 
ea3b994
aeb84b0
 
 
 
3d8b295
bf8ac05
c6e51bd
73dfdae
c6e51bd
 
fa7d51d
93a35a7
fa7d51d
 
 
0e4386f
 
 
 
 
 
52cb3f9
c6e51bd
 
 
 
fa7d51d
c6e51bd
 
 
cce5ad0
1ecbf2a
 
c6e51bd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
import gradio as gr
from transformers import AutoTokenizer
import json

tokenizer = AutoTokenizer.from_pretrained("HuggingFaceH4/zephyr-7b-beta")

demo_conversation = """[
    {"role": "system", "content": "You are a helpful chatbot."},
    {"role": "user", "content": "Hi there!"},
    {"role": "assistant", "content": "Hello, human!"},
    {"role": "user", "content": "Can I ask a question?"}
]"""

default_template = """{% for message in messages %}
    {{ "<|im_start|>" + message["role"] + "\\n" + message["content"] + "<|im_end|>\\n" }}
{% endfor %}
{% if add_generation_prompt %}
    {{ "<|im_start|>assistant\\n" }}
{% endif %}"""

description_text = """# Chat Template Creator

### This space is a helper app for writing [Chat Templates](https://huggingface.co/docs/transformers/main/en/chat_templating).

### When you're happy with the outputs from your template, you can use the code block at the end to add it to a PR!"""

def apply_chat_template(template, test_conversation, add_generation_prompt, cleanup_whitespace):
    if cleanup_whitespace:
        template = "".join([line.strip() for line in template.split('\n')])
    tokenizer.chat_template = template
    outputs = []
    conversation = json.loads(test_conversation)
    pr_snippet = (
        "CHECKPOINT = \"big-ai-company/cool-new-model\"\n"
        "tokenizer = AutoTokenizer.from_pretrained(CHECKPOINT)",
        f"tokenizer.chat_template = \"{template}\"",
        "tokenizer.push_to_hub(CHECKPOINT, create_pr=True)"
    )
    pr_snippet = "\n".join(pr_snippet)
    formatted = tokenizer.apply_chat_template(conversation, tokenize=False, add_generation_prompt=add_generation_prompt)
    return formatted, pr_snippet

with gr.Blocks() as demo:

    gr.Markdown(description_text)

    with gr.Row():
            with gr.Row():
                gr.Markdown("### Pick an existing template to start:")
            with gr.Row():
                gr.Button("ChatML")
                gr.Button("Zephyr")
            with gr.Row():
                gr.Button("LLaMA")
                gr.Button("Alpaca")
            with gr.Row():
                gr.Button("Vicuna")
                gr.Button("Something else")
        with gr.Column():
            template_in = gr.TextArea(value=default_template, lines=10, max_lines=30, label="Chat Template")
            conversation_in = gr.TextArea(value=demo_conversation, lines=6, label="Conversation")
            generation_prompt_check = gr.Checkbox(value=False, label="Add generation prompt")
            cleanup_whitespace_check = gr.Checkbox(value=True, label="Cleanup template whitespace")
            submit = gr.Button("Apply template", variant="primary")
        with gr.Column():
            formatted_out = gr.TextArea(label="Formatted conversation")
            code_snippet_out = gr.TextArea(label="Code snippet to create PR", lines=3, show_label=True, show_copy_button=True)
        submit.click(fn=apply_chat_template, 
                  inputs=[template_in, conversation_in, generation_prompt_check, cleanup_whitespace_check],
                  outputs=[formatted_out, code_snippet_out]
                  )

demo.launch()

#iface = gr.Interface(
#    description=description_text,
#    fn=apply_chat_template,
#    inputs=[
#        gr.TextArea(value=default_template, lines=10, max_lines=30, label="Chat Template"),
#        gr.TextArea(value=demo_conversation, lines=6, label="Conversation"),
#        gr.Checkbox(value=False, label="Add generation prompt"),
#        gr.Checkbox(value=True, label="Cleanup template whitespace"),
#    ],
#    outputs=[
#        gr.TextArea(label="Formatted conversation"),
#        gr.TextArea(label="Code snippet to create PR", lines=3, show_label=True, show_copy_button=True)
#    ]
#)
#iface.launch()