Spaces:

huggingfacejs
/

inference-widgets

Running on CPU Upgrade

App Files Files Community

inference-widgets / packages /jinja /test /e2e.test.js

machineuser

Sync widgets demo

94753b6 over 1 year ago

9.14 kB

	import { describe, expect, it } from "vitest";

	import { Template } from "../src/index";

	import { downloadFile } from "@huggingface/hub";

	const EXAMPLE_CHAT = [
	{ role: "user", content: "Hello, how are you?" },
	{ role: "assistant", content: "I'm doing great. How can I help you today?" },
	{ role: "user", content: "I'd like to show off how chat templating works!" },
	];

	const EXAMPLE_CHAT_WITH_SYTEM = [
	{
	role: "system",
	content: "You are a friendly chatbot who always responds in the style of a pirate",
	},
	...EXAMPLE_CHAT,
	];

	/**
	* Defined in https://github.com/huggingface/transformers
	* Keys correspond to `model_type` in the transformers repo.
	*/
	const TEST_DEFAULT_TEMPLATES = Object.freeze({
	_base: {
	chat_template: `{% for message in messages %}{{'<\|im_start\|>' + message['role'] + '\n' + message['content'] + '<\|im_end\|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<\|im_start\|>assistant\n' }}{% endif %}`,
	data: {
	messages: EXAMPLE_CHAT,
	add_generation_prompt: false,
	},
	target: `<\|im_start\|>user\nHello, how are you?<\|im_end\|>\n<\|im_start\|>assistant\nI'm doing great. How can I help you today?<\|im_end\|>\n<\|im_start\|>user\nI'd like to show off how chat templating works!<\|im_end\|>\n`,
	},
	blenderbot: {
	// facebook/blenderbot-400M-distill
	chat_template: `{% for message in messages %}{% if message['role'] == 'user' %}{{ ' ' }}{% endif %}{{ message['content'] }}{% if not loop.last %}{{ ' ' }}{% endif %}{% endfor %}{{ eos_token }}`,
	data: {
	messages: EXAMPLE_CHAT,
	eos_token: "</s>",
	},
	target: ` Hello, how are you? I'm doing great. How can I help you today? I'd like to show off how chat templating works!</s>`,
	},
	blenderbot_small: {
	// facebook/blenderbot_small-90M
	chat_template: `{% for message in messages %}{% if message['role'] == 'user' %}{{ ' ' }}{% endif %}{{ message['content'] }}{% if not loop.last %}{{ ' ' }}{% endif %}{% endfor %}{{ eos_token }}`,
	data: {
	messages: EXAMPLE_CHAT,
	eos_token: "</s>",
	},
	target: ` Hello, how are you? I'm doing great. How can I help you today? I'd like to show off how chat templating works!</s>`,
	},
	bloom: {
	// bigscience/bloom
	chat_template: `{% for message in messages %}{{ message.content }}{{ eos_token }}{% endfor %}`,
	data: {
	messages: EXAMPLE_CHAT,
	eos_token: "</s>",
	},
	target: `Hello, how are you?</s>I'm doing great. How can I help you today?</s>I'd like to show off how chat templating works!</s>`,
	},
	gpt_neox: {
	// EleutherAI/gpt-neox-20b
	chat_template: `{% for message in messages %}{{ message.content }}{{ eos_token }}{% endfor %}`,
	data: {
	messages: EXAMPLE_CHAT,
	eos_token: "<\|endoftext\|>",
	},
	target: `Hello, how are you?<\|endoftext\|>I'm doing great. How can I help you today?<\|endoftext\|>I'd like to show off how chat templating works!<\|endoftext\|>`,
	},
	gpt2: {
	// gpt2
	chat_template: `{% for message in messages %}{{ message.content }}{{ eos_token }}{% endfor %}`,
	data: {
	messages: EXAMPLE_CHAT,
	eos_token: "<\|endoftext\|>",
	},
	target: `Hello, how are you?<\|endoftext\|>I'm doing great. How can I help you today?<\|endoftext\|>I'd like to show off how chat templating works!<\|endoftext\|>`,
	},
	llama: {
	// hf-internal-testing/llama-tokenizer
	chat_template: `{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% elif USE_DEFAULT_PROMPT == true and not '<<SYS>>' in messages[0]['content'] %}{% set loop_messages = messages %}{% set system_message = 'DEFAULT_SYSTEM_MESSAGE' %}{% else %}{% set loop_messages = messages %}{% set system_message = false %}{% endif %}{% for message in loop_messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if loop.index0 == 0 and system_message != false %}{% set content = '<<SYS>>\\n' + system_message + '\\n<</SYS>>\\n\\n' + message['content'] %}{% else %}{% set content = message['content'] %}{% endif %}{% if message['role'] == 'user' %}{{ bos_token + '[INST] ' + content.strip() + ' [/INST]' }}{% elif message['role'] == 'system' %}{{ '<<SYS>>\\n' + content.strip() + '\\n<</SYS>>\\n\\n' }}{% elif message['role'] == 'assistant' %}{{ ' ' + content.strip() + ' ' + eos_token }}{% endif %}{% endfor %}`,
	data: {
	messages: EXAMPLE_CHAT_WITH_SYTEM,
	bos_token: "<s>",
	eos_token: "</s>",
	USE_DEFAULT_PROMPT: true,
	},
	target: `<s>[INST] <<SYS>>\nYou are a friendly chatbot who always responds in the style of a pirate\n<</SYS>>\n\nHello, how are you? [/INST] I'm doing great. How can I help you today? </s><s>[INST] I'd like to show off how chat templating works! [/INST]`,
	},
	whisper: {
	// openai/whisper-large-v3
	chat_template: `{% for message in messages %}{{ message.content }}{{ eos_token }}{% endfor %}`,
	data: {
	messages: EXAMPLE_CHAT,
	eos_token: "<\|endoftext\|>",
	},
	target: `Hello, how are you?<\|endoftext\|>I'm doing great. How can I help you today?<\|endoftext\|>I'd like to show off how chat templating works!<\|endoftext\|>`,
	},
	});

	/**
	* Custom templates that are not defined in the transformers repo.
	* Keys are repo ids on the Hugging Face Hub (https://hf.co/models)
	*/
	const TEST_CUSTOM_TEMPLATES = Object.freeze({
	"HuggingFaceH4/zephyr-7b-beta (add_generation_prompt=false)": {
	chat_template: `{% for message in messages %}\n{% if message['role'] == 'user' %}\n{{ '<\|user\|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'system' %}\n{{ '<\|system\|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'assistant' %}\n{{ '<\|assistant\|>\n' + message['content'] + eos_token }}\n{% endif %}\n{% if loop.last and add_generation_prompt %}\n{{ '<\|assistant\|>' }}\n{% endif %}\n{% endfor %}`,
	data: {
	messages: EXAMPLE_CHAT_WITH_SYTEM,
	eos_token: "</s>",
	add_generation_prompt: false,
	},
	target: `<\|system\|>\nYou are a friendly chatbot who always responds in the style of a pirate</s>\n<\|user\|>\nHello, how are you?</s>\n<\|assistant\|>\nI'm doing great. How can I help you today?</s>\n<\|user\|>\nI'd like to show off how chat templating works!</s>\n`,
	},
	"HuggingFaceH4/zephyr-7b-beta (add_generation_prompt=true)": {
	chat_template: `{% for message in messages %}\n{% if message['role'] == 'user' %}\n{{ '<\|user\|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'system' %}\n{{ '<\|system\|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'assistant' %}\n{{ '<\|assistant\|>\n' + message['content'] + eos_token }}\n{% endif %}\n{% if loop.last and add_generation_prompt %}\n{{ '<\|assistant\|>' }}\n{% endif %}\n{% endfor %}`,
	data: {
	messages: [
	{ role: "system", content: "You are a friendly chatbot who always responds in the style of a pirate" },
	{ role: "user", content: "How many helicopters can a human eat in one sitting?" },
	],
	eos_token: "</s>",
	add_generation_prompt: true,
	},
	target: `<\|system\|>\nYou are a friendly chatbot who always responds in the style of a pirate</s>\n<\|user\|>\nHow many helicopters can a human eat in one sitting?</s>\n<\|assistant\|>\n`,
	},
	"mistralai/Mistral-7B-Instruct-v0.1": {
	chat_template: `{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ message['content'] + eos_token + ' ' }}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}`,
	data: {
	messages: EXAMPLE_CHAT,
	bos_token: "<s>",
	eos_token: "</s>",
	},
	target: `<s>[INST] Hello, how are you? [/INST]I'm doing great. How can I help you today?</s> [INST] I'd like to show off how chat templating works! [/INST]`,
	},
	});

	describe("End-to-end tests", () => {
	describe("Default templates", async () => {
	for (const [model_type, test_data] of Object.entries(TEST_DEFAULT_TEMPLATES)) {
	it(model_type, async () => {
	const template = new Template(test_data.chat_template);
	const result = template.render(test_data.data);
	expect(result).toEqual(test_data.target);
	});
	}
	});

	describe("Custom templates", async () => {
	for (const [model_type, test_data] of Object.entries(TEST_CUSTOM_TEMPLATES)) {
	it(model_type, async () => {
	const template = new Template(test_data.chat_template);
	const result = template.render(test_data.data);
	expect(result).toEqual(test_data.target);
	});
	}
	});

	it("should parse a chat template from the Hugging Face Hub", async () => {
	const repo = "mistralai/Mistral-7B-Instruct-v0.1";
	const tokenizerConfig = await (
	await downloadFile({
	repo,
	path: "tokenizer_config.json",
	})
	).json();

	const template = new Template(tokenizerConfig.chat_template);
	const result = template.render(TEST_CUSTOM_TEMPLATES[repo].data);
	expect(result).toEqual(TEST_CUSTOM_TEMPLATES[repo].target);
	});
	});