Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
File size: 9,143 Bytes
94753b6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 |
import { describe, expect, it } from "vitest";
import { Template } from "../src/index";
import { downloadFile } from "@huggingface/hub";
const EXAMPLE_CHAT = [
{ role: "user", content: "Hello, how are you?" },
{ role: "assistant", content: "I'm doing great. How can I help you today?" },
{ role: "user", content: "I'd like to show off how chat templating works!" },
];
const EXAMPLE_CHAT_WITH_SYTEM = [
{
role: "system",
content: "You are a friendly chatbot who always responds in the style of a pirate",
},
...EXAMPLE_CHAT,
];
/**
* Defined in https://github.com/huggingface/transformers
* Keys correspond to `model_type` in the transformers repo.
*/
const TEST_DEFAULT_TEMPLATES = Object.freeze({
_base: {
chat_template: `{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}`,
data: {
messages: EXAMPLE_CHAT,
add_generation_prompt: false,
},
target: `<|im_start|>user\nHello, how are you?<|im_end|>\n<|im_start|>assistant\nI'm doing great. How can I help you today?<|im_end|>\n<|im_start|>user\nI'd like to show off how chat templating works!<|im_end|>\n`,
},
blenderbot: {
// facebook/blenderbot-400M-distill
chat_template: `{% for message in messages %}{% if message['role'] == 'user' %}{{ ' ' }}{% endif %}{{ message['content'] }}{% if not loop.last %}{{ ' ' }}{% endif %}{% endfor %}{{ eos_token }}`,
data: {
messages: EXAMPLE_CHAT,
eos_token: "</s>",
},
target: ` Hello, how are you? I'm doing great. How can I help you today? I'd like to show off how chat templating works!</s>`,
},
blenderbot_small: {
// facebook/blenderbot_small-90M
chat_template: `{% for message in messages %}{% if message['role'] == 'user' %}{{ ' ' }}{% endif %}{{ message['content'] }}{% if not loop.last %}{{ ' ' }}{% endif %}{% endfor %}{{ eos_token }}`,
data: {
messages: EXAMPLE_CHAT,
eos_token: "</s>",
},
target: ` Hello, how are you? I'm doing great. How can I help you today? I'd like to show off how chat templating works!</s>`,
},
bloom: {
// bigscience/bloom
chat_template: `{% for message in messages %}{{ message.content }}{{ eos_token }}{% endfor %}`,
data: {
messages: EXAMPLE_CHAT,
eos_token: "</s>",
},
target: `Hello, how are you?</s>I'm doing great. How can I help you today?</s>I'd like to show off how chat templating works!</s>`,
},
gpt_neox: {
// EleutherAI/gpt-neox-20b
chat_template: `{% for message in messages %}{{ message.content }}{{ eos_token }}{% endfor %}`,
data: {
messages: EXAMPLE_CHAT,
eos_token: "<|endoftext|>",
},
target: `Hello, how are you?<|endoftext|>I'm doing great. How can I help you today?<|endoftext|>I'd like to show off how chat templating works!<|endoftext|>`,
},
gpt2: {
// gpt2
chat_template: `{% for message in messages %}{{ message.content }}{{ eos_token }}{% endfor %}`,
data: {
messages: EXAMPLE_CHAT,
eos_token: "<|endoftext|>",
},
target: `Hello, how are you?<|endoftext|>I'm doing great. How can I help you today?<|endoftext|>I'd like to show off how chat templating works!<|endoftext|>`,
},
llama: {
// hf-internal-testing/llama-tokenizer
chat_template: `{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% elif USE_DEFAULT_PROMPT == true and not '<<SYS>>' in messages[0]['content'] %}{% set loop_messages = messages %}{% set system_message = 'DEFAULT_SYSTEM_MESSAGE' %}{% else %}{% set loop_messages = messages %}{% set system_message = false %}{% endif %}{% for message in loop_messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if loop.index0 == 0 and system_message != false %}{% set content = '<<SYS>>\\n' + system_message + '\\n<</SYS>>\\n\\n' + message['content'] %}{% else %}{% set content = message['content'] %}{% endif %}{% if message['role'] == 'user' %}{{ bos_token + '[INST] ' + content.strip() + ' [/INST]' }}{% elif message['role'] == 'system' %}{{ '<<SYS>>\\n' + content.strip() + '\\n<</SYS>>\\n\\n' }}{% elif message['role'] == 'assistant' %}{{ ' ' + content.strip() + ' ' + eos_token }}{% endif %}{% endfor %}`,
data: {
messages: EXAMPLE_CHAT_WITH_SYTEM,
bos_token: "<s>",
eos_token: "</s>",
USE_DEFAULT_PROMPT: true,
},
target: `<s>[INST] <<SYS>>\nYou are a friendly chatbot who always responds in the style of a pirate\n<</SYS>>\n\nHello, how are you? [/INST] I'm doing great. How can I help you today? </s><s>[INST] I'd like to show off how chat templating works! [/INST]`,
},
whisper: {
// openai/whisper-large-v3
chat_template: `{% for message in messages %}{{ message.content }}{{ eos_token }}{% endfor %}`,
data: {
messages: EXAMPLE_CHAT,
eos_token: "<|endoftext|>",
},
target: `Hello, how are you?<|endoftext|>I'm doing great. How can I help you today?<|endoftext|>I'd like to show off how chat templating works!<|endoftext|>`,
},
});
/**
* Custom templates that are not defined in the transformers repo.
* Keys are repo ids on the Hugging Face Hub (https://hf.co/models)
*/
const TEST_CUSTOM_TEMPLATES = Object.freeze({
"HuggingFaceH4/zephyr-7b-beta (add_generation_prompt=false)": {
chat_template: `{% for message in messages %}\n{% if message['role'] == 'user' %}\n{{ '<|user|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'system' %}\n{{ '<|system|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'assistant' %}\n{{ '<|assistant|>\n' + message['content'] + eos_token }}\n{% endif %}\n{% if loop.last and add_generation_prompt %}\n{{ '<|assistant|>' }}\n{% endif %}\n{% endfor %}`,
data: {
messages: EXAMPLE_CHAT_WITH_SYTEM,
eos_token: "</s>",
add_generation_prompt: false,
},
target: `<|system|>\nYou are a friendly chatbot who always responds in the style of a pirate</s>\n<|user|>\nHello, how are you?</s>\n<|assistant|>\nI'm doing great. How can I help you today?</s>\n<|user|>\nI'd like to show off how chat templating works!</s>\n`,
},
"HuggingFaceH4/zephyr-7b-beta (add_generation_prompt=true)": {
chat_template: `{% for message in messages %}\n{% if message['role'] == 'user' %}\n{{ '<|user|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'system' %}\n{{ '<|system|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'assistant' %}\n{{ '<|assistant|>\n' + message['content'] + eos_token }}\n{% endif %}\n{% if loop.last and add_generation_prompt %}\n{{ '<|assistant|>' }}\n{% endif %}\n{% endfor %}`,
data: {
messages: [
{ role: "system", content: "You are a friendly chatbot who always responds in the style of a pirate" },
{ role: "user", content: "How many helicopters can a human eat in one sitting?" },
],
eos_token: "</s>",
add_generation_prompt: true,
},
target: `<|system|>\nYou are a friendly chatbot who always responds in the style of a pirate</s>\n<|user|>\nHow many helicopters can a human eat in one sitting?</s>\n<|assistant|>\n`,
},
"mistralai/Mistral-7B-Instruct-v0.1": {
chat_template: `{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ message['content'] + eos_token + ' ' }}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}`,
data: {
messages: EXAMPLE_CHAT,
bos_token: "<s>",
eos_token: "</s>",
},
target: `<s>[INST] Hello, how are you? [/INST]I'm doing great. How can I help you today?</s> [INST] I'd like to show off how chat templating works! [/INST]`,
},
});
describe("End-to-end tests", () => {
describe("Default templates", async () => {
for (const [model_type, test_data] of Object.entries(TEST_DEFAULT_TEMPLATES)) {
it(model_type, async () => {
const template = new Template(test_data.chat_template);
const result = template.render(test_data.data);
expect(result).toEqual(test_data.target);
});
}
});
describe("Custom templates", async () => {
for (const [model_type, test_data] of Object.entries(TEST_CUSTOM_TEMPLATES)) {
it(model_type, async () => {
const template = new Template(test_data.chat_template);
const result = template.render(test_data.data);
expect(result).toEqual(test_data.target);
});
}
});
it("should parse a chat template from the Hugging Face Hub", async () => {
const repo = "mistralai/Mistral-7B-Instruct-v0.1";
const tokenizerConfig = await (
await downloadFile({
repo,
path: "tokenizer_config.json",
})
).json();
const template = new Template(tokenizerConfig.chat_template);
const result = template.render(TEST_CUSTOM_TEMPLATES[repo].data);
expect(result).toEqual(TEST_CUSTOM_TEMPLATES[repo].target);
});
});
|