Spaces:

huggingface
/

inference-playground

Running on CPU Upgrade

File size: 1,925 Bytes

60216ec
 
0fa6cab
8c7e6f1
563dba8
8c7e6f1
f977d49
8c7e6f1
 
563dba8
8c7e6f1
f977d49
573aa88
2ac97e2
573aa88
 
8c7e6f1
573aa88
60216ec
 
573aa88
60216ec
5f94ff7
 
 
 
 
60216ec
5f94ff7
 
 
 
 
 
f977d49
 
8c7e6f1
 
563dba8
8c7e6f1
f977d49
573aa88
 
38434c2
573aa88
60216ec
 
573aa88
 
f977d49
573aa88
 
 
60216ec
f977d49
8c7e6f1
f977d49
 
 
60216ec
8c7e6f1
dd66861
563dba8
dd66861
60216ec
dd66861

import { type ChatCompletionInputMessage } from "@huggingface/tasks";
import type { Conversation, ModelEntryWithTokenizer } from "$lib/types";
import { HfInference } from "@huggingface/inference";


export function createHfInference(token: string): HfInference {
	return new HfInference(token);
}


export async function handleStreamingResponse(
	hf: HfInference,
	conversation: Conversation,
	onChunk: (content: string) => void,
	abortController: AbortController,
	systemMessage?: ChatCompletionInputMessage
): Promise<void> {
	const messages = [
		...(isSystemPromptSupported(conversation.model) && systemMessage?.content?.length ? [systemMessage] : []),
		...conversation.messages,
	];
	let out = "";
	for await (const chunk of hf.chatCompletionStream(
		{
			model: conversation.model.id,
			messages,
			temperature: conversation.config.temperature,
			max_tokens: conversation.config.maxTokens,
		},
		{ signal: abortController.signal }
	)) {
		if (chunk.choices && chunk.choices.length > 0 && chunk.choices[0]?.delta?.content) {
			out += chunk.choices[0].delta.content;
			onChunk(out);
		}
	}
}


export async function handleNonStreamingResponse(
	hf: HfInference,
	conversation: Conversation,
	systemMessage?: ChatCompletionInputMessage
): Promise<ChatCompletionInputMessage> {
	const messages = [
		...(isSystemPromptSupported(conversation.model) && systemMessage?.content?.length ? [systemMessage] : []),
		...conversation.messages,
	];

	const response = await hf.chatCompletion({
		model: conversation.model,
		messages,
		temperature: conversation.config.temperature,
		max_tokens: conversation.config.maxTokens,
	});

	if (response.choices && response.choices.length > 0) {
		return response.choices[0].message;
	}
	throw new Error("No response from the model");
}


export function isSystemPromptSupported(model: ModelEntryWithTokenizer) {
	return model.tokenizerConfig?.chat_template?.includes("system");
}