Spaces:

huggingface
/

inference-playground

Running on CPU Upgrade

File size: 1,688 Bytes

5213b80
8c7e6f1
dd66861
8c7e6f1
 
f977d49
8c7e6f1
 
 
f977d49
 
38434c2
f977d49
 
2ac97e2
 
8c7e6f1
f977d49
2ac97e2
5213b80
 
 
 
 
35f4e45
5213b80
 
 
2ac97e2
 
 
 
 
 
 
 
 
 
f977d49
 
8c7e6f1
 
 
f977d49
 
38434c2
f977d49
35f4e45
38434c2
f977d49
 
 
 
35f4e45
f977d49
8c7e6f1
f977d49
 
 
 
8c7e6f1
dd66861
 
e9a4671
dd66861

import { type ChatCompletionInputMessage } from '@huggingface/tasks';
import { HfInference } from '@huggingface/inference';
import type { ModelEntryWithTokenizer } from '$lib/types';

export function createHfInference(token: string): HfInference {
	return new HfInference(token);
}

export async function handleStreamingResponse(
	hf: HfInference,
	model: string,
	messages: ChatCompletionInputMessage[],
	temperature: number,
	maxTokens: number,
	onChunk: (content: string) => void,
	abortController: AbortController
): Promise<void> {
	let out = '';
	try {
		for await (const chunk of hf.chatCompletionStream(
			{
				model: model,
				messages: messages,
				temperature: temperature,
				max_tokens: maxTokens
			},
			{ signal: abortController.signal }
		)) {
			if (chunk.choices && chunk.choices.length > 0 && chunk.choices[0]?.delta?.content) {
				out += chunk.choices[0].delta.content;
				onChunk(out);
			}
		}
	} catch (error) {
		if (error.name === 'AbortError') {
			console.log('Stream aborted');
		} else {
			throw error;
		}
	}
}

export async function handleNonStreamingResponse(
	hf: HfInference,
	model: string,
	messages: ChatCompletionInputMessage[],
	temperature: number,
	maxTokens: number
): Promise<ChatCompletionInputMessage> {
	const response = await hf.chatCompletion({
		model: model,
		messages: messages,
		temperature: temperature,
		max_tokens: maxTokens
	});

	if (response.choices && response.choices.length > 0) {
		return response.choices[0].message;
	}
	throw new Error('No response from the model');
}

export function isSystemPromptSupported(model: ModelEntryWithTokenizer) {
	return model.tokenizerConfig?.chat_template?.includes('system');
}