Spaces:

huggingface
/

inference-playground

Running on CPU Upgrade

App Files Files Community

victor HF Staff commited on Jun 29, 2024

Commit

8c7e6f1

1 Parent(s): 37bd724

update

Browse files

Files changed (2) hide show

src/lib/components/Playground/Playground.svelte +41 -40
src/lib/components/Playground/playgroundUtils.ts +64 -0

src/lib/components/Playground/Playground.svelte CHANGED Viewed

@@ -1,7 +1,9 @@
 <script lang="ts">
 	import { HfInference } from '@huggingface/inference';
 	import PlaygroundCode from './PlaygroundCode.svelte';
 	import PlaygroundMessage from '$lib/components/Playground/PlaygroundMessage.svelte';
 	import PlaygroundOptions from '$lib/components/Playground/PlaygroundOptions.svelte';
 	import PlaygroundTokenModal from './PlaygroundTokenModal.svelte';
@@ -34,9 +36,18 @@
 	const startMessages: Message[] = [{ role: 'user', content: '' }];
 	let systemMessage: Message = { role: 'system', content: '' };
 	let messages = startMessages;
-	let currentModel = compatibleModels[0];
 	let temperature = 0.5;
 	let maxTokens = 2048;
 	let streaming = true;
@@ -76,52 +87,42 @@
 		const startTime = performance.now();
 		try {
-			const hf = new HfInference(hfToken);
-			const requestMessages: Message[] = [
-				...(systemMessage.content.length ? [systemMessage] : []),
-				...messages
-			];
 			if (streaming) {
 				streamingMessage = { role: 'assistant', content: '' };
 				messages = [...messages, streamingMessage];
-				let out = '';
-				for await (const chunk of hf.chatCompletionStream({
-					model: currentModel,
-					messages: requestMessages,
-					temperature: temperature,
-					max_tokens: maxTokens,
-					json_mode: jsonMode
-				})) {
-					if (chunk.choices && chunk.choices.length > 0) {
-						if (streamingMessage && chunk.choices[0]?.delta?.content) {
-							out += chunk.choices[0].delta.content;
-							streamingMessage.content = out;
 							messages = [...messages];
 							scrollToBottom();
 						}
 					}
-				}
 			} else {
-				const response = await hf.chatCompletion({
-					model: currentModel,
-					messages: requestMessages,
-					temperature: temperature,
-					max_tokens: maxTokens,
-					json_mode: jsonMode
-				});
-				if (response.choices && response.choices.length > 0) {
-					console.log(response.choice);
-					const newMessage: Message = response.choices[0].message;
-					messages = [...messages, newMessage];
-					scrollToBottom();
-				}
 			}
 		} catch (error) {
-			alert('error: ' + error.message);
 		} finally {
 			const endTime = performance.now();
 			latency = Math.round(endTime - startTime);
@@ -168,7 +169,7 @@
 			id=""
 			placeholder="Enter a custom prompt"
 			bind:value={systemMessage.content}
-			class="absolute inset-x-0 bottom-0 h-full resize-none bg-transparent p-2 px-5 pr-4 pt-14 text-sm outline-none"
 		></textarea>
 	</div>
 	<div class="relative divide-y divide-gray-200 dark:divide-gray-800">
@@ -263,7 +264,7 @@
 						d="m31 16l-7 7l-1.41-1.41L28.17 16l-5.58-5.59L24 9l7 7zM1 16l7-7l1.41 1.41L3.83 16l5.58 5.59L8 23l-7-7zm11.42 9.484L17.64 6l1.932.517L14.352 26z"
 					/></svg
 				>
-				{!viewCode ? 'View Code' : 'Hide Code'}</button
 			>
 			<button
 				on:click={() => {
@@ -300,7 +301,7 @@
 	<div class="flex flex-col gap-6 overflow-y-hidden p-5">
 		<PlaygroundModelSelector {compatibleModels} bind:currentModel />
 		<PlaygroundOptions bind:temperature bind:maxTokens bind:jsonMode bind:streaming />
-		<div
 			class="mt-auto flex max-w-xs flex-col items-start gap-2.5 rounded-lg border bg-white p-4 text-gray-500 shadow dark:border-gray-800 dark:bg-gray-800/50 dark:text-gray-400"
 			role="alert"
 		>
@@ -311,7 +312,7 @@
 				class="inline-flex rounded-lg bg-black px-2.5 py-1.5 text-center text-xs font-medium text-white hover:bg-blue-700 focus:outline-none focus:ring-4 focus:ring-blue-300 dark:hover:bg-black dark:focus:ring-blue-800"
 				>Get PRO ($9/month)</a
 			>
-		</div>
 		<!-- <div
 		class="flex max-w-xs flex-col items-start gap-2.5 rounded-lg border bg-white p-4 text-gray-500 shadow dark:bg-gray-800 dark:text-gray-400"
 		role="alert"
@@ -324,7 +325,7 @@
 			>Deploy dedicated</a
 		>
 	</div> -->
-		<div>
 			<div class="mb-3 flex items-center justify-between gap-2">
 				<label for="default-range" class="block text-sm font-medium text-gray-900 dark:text-white"
 					>API Quota</label

 <script lang="ts">
 	import { HfInference } from '@huggingface/inference';
+	import type { ChatCompletionStreamOutput, ChatCompletionOutput } from '@huggingface/inference';
 	import PlaygroundCode from './PlaygroundCode.svelte';
+	import { createHfInference, prepareRequestMessages, handleStreamingResponse, handleNonStreamingResponse } from './playgroundUtils';
 	import PlaygroundMessage from '$lib/components/Playground/PlaygroundMessage.svelte';
 	import PlaygroundOptions from '$lib/components/Playground/PlaygroundOptions.svelte';
 	import PlaygroundTokenModal from './PlaygroundTokenModal.svelte';
 	const startMessages: Message[] = [{ role: 'user', content: '' }];
+	const conversations: Conversation[] = [
+		{
+			id: String(Math.random()),
+			model: '01-ai/Yi-1.5-34B-Chat',
+			config: { temperature: 0.5, maxTokens: 2048, streaming: true, jsonMode: false },
+			messages: startMessages
+		}
+	];
 	let systemMessage: Message = { role: 'system', content: '' };
 	let messages = startMessages;
+	let currentModel = conversations[0].model;
 	let temperature = 0.5;
 	let maxTokens = 2048;
 	let streaming = true;
 		const startTime = performance.now();
 		try {
+			const hf = createHfInference(hfToken);
+			const requestMessages = prepareRequestMessages(systemMessage, messages);
 			if (streaming) {
 				streamingMessage = { role: 'assistant', content: '' };
 				messages = [...messages, streamingMessage];
+				await handleStreamingResponse(
+					hf,
+					currentModel,
+					requestMessages,
+					temperature,
+					maxTokens,
+					jsonMode,
+					(content) => {
+						if (streamingMessage) {
+							streamingMessage.content = content;
 							messages = [...messages];
 							scrollToBottom();
 						}
 					}
+				);
 			} else {
+				const newMessage = await handleNonStreamingResponse(
+					hf,
+					currentModel,
+					requestMessages,
+					temperature,
+					maxTokens,
+					jsonMode
+				);
+				messages = [...messages, newMessage];
+				scrollToBottom();
 			}
 		} catch (error) {
+			alert('error: ' + (error as Error).message);
 		} finally {
 			const endTime = performance.now();
 			latency = Math.round(endTime - startTime);
 			id=""
 			placeholder="Enter a custom prompt"
 			bind:value={systemMessage.content}
+			class="absolute inset-x-0 bottom-0 h-full resize-none bg-transparent p-2 px-5 pr-4 pt-16 text-sm outline-none"
 		></textarea>
 	</div>
 	<div class="relative divide-y divide-gray-200 dark:divide-gray-800">
 						d="m31 16l-7 7l-1.41-1.41L28.17 16l-5.58-5.59L24 9l7 7zM1 16l7-7l1.41 1.41L3.83 16l5.58 5.59L8 23l-7-7zm11.42 9.484L17.64 6l1.932.517L14.352 26z"
 					/></svg
 				>
+				{!viewCode ? 'Get Code' : 'Hide Code'}</button
 			>
 			<button
 				on:click={() => {
 	<div class="flex flex-col gap-6 overflow-y-hidden p-5">
 		<PlaygroundModelSelector {compatibleModels} bind:currentModel />
 		<PlaygroundOptions bind:temperature bind:maxTokens bind:jsonMode bind:streaming />
+		<!-- <div
 			class="mt-auto flex max-w-xs flex-col items-start gap-2.5 rounded-lg border bg-white p-4 text-gray-500 shadow dark:border-gray-800 dark:bg-gray-800/50 dark:text-gray-400"
 			role="alert"
 		>
 				class="inline-flex rounded-lg bg-black px-2.5 py-1.5 text-center text-xs font-medium text-white hover:bg-blue-700 focus:outline-none focus:ring-4 focus:ring-blue-300 dark:hover:bg-black dark:focus:ring-blue-800"
 				>Get PRO ($9/month)</a
 			>
+		</div> -->
 		<!-- <div
 		class="flex max-w-xs flex-col items-start gap-2.5 rounded-lg border bg-white p-4 text-gray-500 shadow dark:bg-gray-800 dark:text-gray-400"
 		role="alert"
 			>Deploy dedicated</a
 		>
 	</div> -->
+		<div class="mt-auto">
 			<div class="mb-3 flex items-center justify-between gap-2">
 				<label for="default-range" class="block text-sm font-medium text-gray-900 dark:text-white"
 					>API Quota</label

src/lib/components/Playground/playgroundUtils.ts ADDED Viewed

	@@ -0,0 +1,64 @@

+import { HfInference } from '@huggingface/inference';
+import type { ChatCompletionStreamOutput, ChatCompletionOutput } from '@huggingface/inference';
+export interface Message {
+    role: string;
+    content: string;
+}
+export function createHfInference(token: string): HfInference {
+    return new HfInference(token);
+}
+export function prepareRequestMessages(systemMessage: Message, messages: Message[]): Message[] {
+    return [
+        ...(systemMessage.content.length ? [systemMessage] : []),
+        ...messages
+    ];
+}
+export async function handleStreamingResponse(
+    hf: HfInference,
+    model: string,
+    messages: Message[],
+    temperature: number,
+    maxTokens: number,
+    jsonMode: boolean,
+    onChunk: (content: string) => void
+): Promise<void> {
+    let out = '';
+    for await (const chunk of hf.chatCompletionStream({
+        model: model,
+        messages: messages,
+        temperature: temperature,
+        max_tokens: maxTokens,
+        json_mode: jsonMode
+    })) {
+        if (chunk.choices && chunk.choices.length > 0 && chunk.choices[0]?.delta?.content) {
+            out += chunk.choices[0].delta.content;
+            onChunk(out);
+        }
+    }
+}
+export async function handleNonStreamingResponse(
+    hf: HfInference,
+    model: string,
+    messages: Message[],
+    temperature: number,
+    maxTokens: number,
+    jsonMode: boolean
+): Promise<Message> {
+    const response = await hf.chatCompletion({
+        model: model,
+        messages: messages,
+        temperature: temperature,
+        max_tokens: maxTokens,
+        json_mode: jsonMode
+    });
+    if (response.choices && response.choices.length > 0) {
+        return response.choices[0].message;
+    }
+    throw new Error('No response from the model');
+}