Spaces:
				
			
			
	
			
			
					
		Running
		
			on 
			
			CPU Upgrade
	
	
	
			
			
	
	
	
	
		
		
					
		Running
		
			on 
			
			CPU Upgrade
	update
Browse files
    	
        src/lib/components/Playground/Playground.svelte
    CHANGED
    
    | @@ -1,7 +1,9 @@ | |
| 1 | 
             
            <script lang="ts">
         | 
| 2 | 
             
            	import { HfInference } from '@huggingface/inference';
         | 
|  | |
| 3 |  | 
| 4 | 
             
            	import PlaygroundCode from './PlaygroundCode.svelte';
         | 
|  | |
| 5 | 
             
            	import PlaygroundMessage from '$lib/components/Playground/PlaygroundMessage.svelte';
         | 
| 6 | 
             
            	import PlaygroundOptions from '$lib/components/Playground/PlaygroundOptions.svelte';
         | 
| 7 | 
             
            	import PlaygroundTokenModal from './PlaygroundTokenModal.svelte';
         | 
| @@ -34,9 +36,18 @@ | |
| 34 |  | 
| 35 | 
             
            	const startMessages: Message[] = [{ role: 'user', content: '' }];
         | 
| 36 |  | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 37 | 
             
            	let systemMessage: Message = { role: 'system', content: '' };
         | 
| 38 | 
             
            	let messages = startMessages;
         | 
| 39 | 
            -
            	let currentModel =  | 
| 40 | 
             
            	let temperature = 0.5;
         | 
| 41 | 
             
            	let maxTokens = 2048;
         | 
| 42 | 
             
            	let streaming = true;
         | 
| @@ -76,52 +87,42 @@ | |
| 76 | 
             
            		const startTime = performance.now();
         | 
| 77 |  | 
| 78 | 
             
            		try {
         | 
| 79 | 
            -
            			const hf =  | 
| 80 | 
            -
             | 
| 81 | 
            -
            			const requestMessages: Message[] = [
         | 
| 82 | 
            -
            				...(systemMessage.content.length ? [systemMessage] : []),
         | 
| 83 | 
            -
            				...messages
         | 
| 84 | 
            -
            			];
         | 
| 85 |  | 
| 86 | 
             
            			if (streaming) {
         | 
| 87 | 
             
            				streamingMessage = { role: 'assistant', content: '' };
         | 
| 88 | 
             
            				messages = [...messages, streamingMessage];
         | 
| 89 | 
            -
            				let out = '';
         | 
| 90 |  | 
| 91 | 
            -
            				 | 
| 92 | 
            -
            					 | 
| 93 | 
            -
            					 | 
| 94 | 
            -
            					 | 
| 95 | 
            -
            					 | 
| 96 | 
            -
            					 | 
| 97 | 
            -
             | 
| 98 | 
            -
            					 | 
| 99 | 
            -
            						if (streamingMessage | 
| 100 | 
            -
            							 | 
| 101 | 
            -
            							streamingMessage.content = out;
         | 
| 102 | 
             
            							messages = [...messages];
         | 
| 103 | 
             
            							scrollToBottom();
         | 
| 104 | 
             
            						}
         | 
| 105 | 
             
            					}
         | 
| 106 | 
            -
            				 | 
| 107 | 
             
            			} else {
         | 
| 108 | 
            -
            				const  | 
| 109 | 
            -
            					 | 
| 110 | 
            -
            					 | 
| 111 | 
            -
            					 | 
| 112 | 
            -
            					 | 
| 113 | 
            -
            					 | 
| 114 | 
            -
             | 
| 115 | 
            -
             | 
| 116 | 
            -
            				 | 
| 117 | 
            -
             | 
| 118 | 
            -
            					const newMessage: Message = response.choices[0].message;
         | 
| 119 | 
            -
            					messages = [...messages, newMessage];
         | 
| 120 | 
            -
            					scrollToBottom();
         | 
| 121 | 
            -
            				}
         | 
| 122 | 
             
            			}
         | 
| 123 | 
             
            		} catch (error) {
         | 
| 124 | 
            -
            			alert('error: ' + error.message);
         | 
| 125 | 
             
            		} finally {
         | 
| 126 | 
             
            			const endTime = performance.now();
         | 
| 127 | 
             
            			latency = Math.round(endTime - startTime);
         | 
| @@ -168,7 +169,7 @@ | |
| 168 | 
             
            			id=""
         | 
| 169 | 
             
            			placeholder="Enter a custom prompt"
         | 
| 170 | 
             
            			bind:value={systemMessage.content}
         | 
| 171 | 
            -
            			class="absolute inset-x-0 bottom-0 h-full resize-none bg-transparent p-2 px-5 pr-4 pt- | 
| 172 | 
             
            		></textarea>
         | 
| 173 | 
             
            	</div>
         | 
| 174 | 
             
            	<div class="relative divide-y divide-gray-200 dark:divide-gray-800">
         | 
| @@ -263,7 +264,7 @@ | |
| 263 | 
             
            						d="m31 16l-7 7l-1.41-1.41L28.17 16l-5.58-5.59L24 9l7 7zM1 16l7-7l1.41 1.41L3.83 16l5.58 5.59L8 23l-7-7zm11.42 9.484L17.64 6l1.932.517L14.352 26z"
         | 
| 264 | 
             
            					/></svg
         | 
| 265 | 
             
            				>
         | 
| 266 | 
            -
            				{!viewCode ? ' | 
| 267 | 
             
            			>
         | 
| 268 | 
             
            			<button
         | 
| 269 | 
             
            				on:click={() => {
         | 
| @@ -300,7 +301,7 @@ | |
| 300 | 
             
            	<div class="flex flex-col gap-6 overflow-y-hidden p-5">
         | 
| 301 | 
             
            		<PlaygroundModelSelector {compatibleModels} bind:currentModel />
         | 
| 302 | 
             
            		<PlaygroundOptions bind:temperature bind:maxTokens bind:jsonMode bind:streaming />
         | 
| 303 | 
            -
            		<div
         | 
| 304 | 
             
            			class="mt-auto flex max-w-xs flex-col items-start gap-2.5 rounded-lg border bg-white p-4 text-gray-500 shadow dark:border-gray-800 dark:bg-gray-800/50 dark:text-gray-400"
         | 
| 305 | 
             
            			role="alert"
         | 
| 306 | 
             
            		>
         | 
| @@ -311,7 +312,7 @@ | |
| 311 | 
             
            				class="inline-flex rounded-lg bg-black px-2.5 py-1.5 text-center text-xs font-medium text-white hover:bg-blue-700 focus:outline-none focus:ring-4 focus:ring-blue-300 dark:hover:bg-black dark:focus:ring-blue-800"
         | 
| 312 | 
             
            				>Get PRO ($9/month)</a
         | 
| 313 | 
             
            			>
         | 
| 314 | 
            -
            		</div>
         | 
| 315 | 
             
            		<!-- <div
         | 
| 316 | 
             
            		class="flex max-w-xs flex-col items-start gap-2.5 rounded-lg border bg-white p-4 text-gray-500 shadow dark:bg-gray-800 dark:text-gray-400"
         | 
| 317 | 
             
            		role="alert"
         | 
| @@ -324,7 +325,7 @@ | |
| 324 | 
             
            			>Deploy dedicated</a
         | 
| 325 | 
             
            		>
         | 
| 326 | 
             
            	</div> -->
         | 
| 327 | 
            -
            		<div>
         | 
| 328 | 
             
            			<div class="mb-3 flex items-center justify-between gap-2">
         | 
| 329 | 
             
            				<label for="default-range" class="block text-sm font-medium text-gray-900 dark:text-white"
         | 
| 330 | 
             
            					>API Quota</label
         | 
|  | |
| 1 | 
             
            <script lang="ts">
         | 
| 2 | 
             
            	import { HfInference } from '@huggingface/inference';
         | 
| 3 | 
            +
            	import type { ChatCompletionStreamOutput, ChatCompletionOutput } from '@huggingface/inference';
         | 
| 4 |  | 
| 5 | 
             
            	import PlaygroundCode from './PlaygroundCode.svelte';
         | 
| 6 | 
            +
            	import { createHfInference, prepareRequestMessages, handleStreamingResponse, handleNonStreamingResponse } from './playgroundUtils';
         | 
| 7 | 
             
            	import PlaygroundMessage from '$lib/components/Playground/PlaygroundMessage.svelte';
         | 
| 8 | 
             
            	import PlaygroundOptions from '$lib/components/Playground/PlaygroundOptions.svelte';
         | 
| 9 | 
             
            	import PlaygroundTokenModal from './PlaygroundTokenModal.svelte';
         | 
|  | |
| 36 |  | 
| 37 | 
             
            	const startMessages: Message[] = [{ role: 'user', content: '' }];
         | 
| 38 |  | 
| 39 | 
            +
            	const conversations: Conversation[] = [
         | 
| 40 | 
            +
            		{
         | 
| 41 | 
            +
            			id: String(Math.random()),
         | 
| 42 | 
            +
            			model: '01-ai/Yi-1.5-34B-Chat',
         | 
| 43 | 
            +
            			config: { temperature: 0.5, maxTokens: 2048, streaming: true, jsonMode: false },
         | 
| 44 | 
            +
            			messages: startMessages
         | 
| 45 | 
            +
            		}
         | 
| 46 | 
            +
            	];
         | 
| 47 | 
            +
             | 
| 48 | 
             
            	let systemMessage: Message = { role: 'system', content: '' };
         | 
| 49 | 
             
            	let messages = startMessages;
         | 
| 50 | 
            +
            	let currentModel = conversations[0].model;
         | 
| 51 | 
             
            	let temperature = 0.5;
         | 
| 52 | 
             
            	let maxTokens = 2048;
         | 
| 53 | 
             
            	let streaming = true;
         | 
|  | |
| 87 | 
             
            		const startTime = performance.now();
         | 
| 88 |  | 
| 89 | 
             
            		try {
         | 
| 90 | 
            +
            			const hf = createHfInference(hfToken);
         | 
| 91 | 
            +
            			const requestMessages = prepareRequestMessages(systemMessage, messages);
         | 
|  | |
|  | |
|  | |
|  | |
| 92 |  | 
| 93 | 
             
            			if (streaming) {
         | 
| 94 | 
             
            				streamingMessage = { role: 'assistant', content: '' };
         | 
| 95 | 
             
            				messages = [...messages, streamingMessage];
         | 
|  | |
| 96 |  | 
| 97 | 
            +
            				await handleStreamingResponse(
         | 
| 98 | 
            +
            					hf,
         | 
| 99 | 
            +
            					currentModel,
         | 
| 100 | 
            +
            					requestMessages,
         | 
| 101 | 
            +
            					temperature,
         | 
| 102 | 
            +
            					maxTokens,
         | 
| 103 | 
            +
            					jsonMode,
         | 
| 104 | 
            +
            					(content) => {
         | 
| 105 | 
            +
            						if (streamingMessage) {
         | 
| 106 | 
            +
            							streamingMessage.content = content;
         | 
|  | |
| 107 | 
             
            							messages = [...messages];
         | 
| 108 | 
             
            							scrollToBottom();
         | 
| 109 | 
             
            						}
         | 
| 110 | 
             
            					}
         | 
| 111 | 
            +
            				);
         | 
| 112 | 
             
            			} else {
         | 
| 113 | 
            +
            				const newMessage = await handleNonStreamingResponse(
         | 
| 114 | 
            +
            					hf,
         | 
| 115 | 
            +
            					currentModel,
         | 
| 116 | 
            +
            					requestMessages,
         | 
| 117 | 
            +
            					temperature,
         | 
| 118 | 
            +
            					maxTokens,
         | 
| 119 | 
            +
            					jsonMode
         | 
| 120 | 
            +
            				);
         | 
| 121 | 
            +
            				messages = [...messages, newMessage];
         | 
| 122 | 
            +
            				scrollToBottom();
         | 
|  | |
|  | |
|  | |
|  | |
| 123 | 
             
            			}
         | 
| 124 | 
             
            		} catch (error) {
         | 
| 125 | 
            +
            			alert('error: ' + (error as Error).message);
         | 
| 126 | 
             
            		} finally {
         | 
| 127 | 
             
            			const endTime = performance.now();
         | 
| 128 | 
             
            			latency = Math.round(endTime - startTime);
         | 
|  | |
| 169 | 
             
            			id=""
         | 
| 170 | 
             
            			placeholder="Enter a custom prompt"
         | 
| 171 | 
             
            			bind:value={systemMessage.content}
         | 
| 172 | 
            +
            			class="absolute inset-x-0 bottom-0 h-full resize-none bg-transparent p-2 px-5 pr-4 pt-16 text-sm outline-none"
         | 
| 173 | 
             
            		></textarea>
         | 
| 174 | 
             
            	</div>
         | 
| 175 | 
             
            	<div class="relative divide-y divide-gray-200 dark:divide-gray-800">
         | 
|  | |
| 264 | 
             
            						d="m31 16l-7 7l-1.41-1.41L28.17 16l-5.58-5.59L24 9l7 7zM1 16l7-7l1.41 1.41L3.83 16l5.58 5.59L8 23l-7-7zm11.42 9.484L17.64 6l1.932.517L14.352 26z"
         | 
| 265 | 
             
            					/></svg
         | 
| 266 | 
             
            				>
         | 
| 267 | 
            +
            				{!viewCode ? 'Get Code' : 'Hide Code'}</button
         | 
| 268 | 
             
            			>
         | 
| 269 | 
             
            			<button
         | 
| 270 | 
             
            				on:click={() => {
         | 
|  | |
| 301 | 
             
            	<div class="flex flex-col gap-6 overflow-y-hidden p-5">
         | 
| 302 | 
             
            		<PlaygroundModelSelector {compatibleModels} bind:currentModel />
         | 
| 303 | 
             
            		<PlaygroundOptions bind:temperature bind:maxTokens bind:jsonMode bind:streaming />
         | 
| 304 | 
            +
            		<!-- <div
         | 
| 305 | 
             
            			class="mt-auto flex max-w-xs flex-col items-start gap-2.5 rounded-lg border bg-white p-4 text-gray-500 shadow dark:border-gray-800 dark:bg-gray-800/50 dark:text-gray-400"
         | 
| 306 | 
             
            			role="alert"
         | 
| 307 | 
             
            		>
         | 
|  | |
| 312 | 
             
            				class="inline-flex rounded-lg bg-black px-2.5 py-1.5 text-center text-xs font-medium text-white hover:bg-blue-700 focus:outline-none focus:ring-4 focus:ring-blue-300 dark:hover:bg-black dark:focus:ring-blue-800"
         | 
| 313 | 
             
            				>Get PRO ($9/month)</a
         | 
| 314 | 
             
            			>
         | 
| 315 | 
            +
            		</div> -->
         | 
| 316 | 
             
            		<!-- <div
         | 
| 317 | 
             
            		class="flex max-w-xs flex-col items-start gap-2.5 rounded-lg border bg-white p-4 text-gray-500 shadow dark:bg-gray-800 dark:text-gray-400"
         | 
| 318 | 
             
            		role="alert"
         | 
|  | |
| 325 | 
             
            			>Deploy dedicated</a
         | 
| 326 | 
             
            		>
         | 
| 327 | 
             
            	</div> -->
         | 
| 328 | 
            +
            		<div class="mt-auto">
         | 
| 329 | 
             
            			<div class="mb-3 flex items-center justify-between gap-2">
         | 
| 330 | 
             
            				<label for="default-range" class="block text-sm font-medium text-gray-900 dark:text-white"
         | 
| 331 | 
             
            					>API Quota</label
         | 
    	
        src/lib/components/Playground/playgroundUtils.ts
    ADDED
    
    | @@ -0,0 +1,64 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            import { HfInference } from '@huggingface/inference';
         | 
| 2 | 
            +
            import type { ChatCompletionStreamOutput, ChatCompletionOutput } from '@huggingface/inference';
         | 
| 3 | 
            +
             | 
| 4 | 
            +
            export interface Message {
         | 
| 5 | 
            +
                role: string;
         | 
| 6 | 
            +
                content: string;
         | 
| 7 | 
            +
            }
         | 
| 8 | 
            +
             | 
| 9 | 
            +
            export function createHfInference(token: string): HfInference {
         | 
| 10 | 
            +
                return new HfInference(token);
         | 
| 11 | 
            +
            }
         | 
| 12 | 
            +
             | 
| 13 | 
            +
            export function prepareRequestMessages(systemMessage: Message, messages: Message[]): Message[] {
         | 
| 14 | 
            +
                return [
         | 
| 15 | 
            +
                    ...(systemMessage.content.length ? [systemMessage] : []),
         | 
| 16 | 
            +
                    ...messages
         | 
| 17 | 
            +
                ];
         | 
| 18 | 
            +
            }
         | 
| 19 | 
            +
             | 
| 20 | 
            +
            export async function handleStreamingResponse(
         | 
| 21 | 
            +
                hf: HfInference,
         | 
| 22 | 
            +
                model: string,
         | 
| 23 | 
            +
                messages: Message[],
         | 
| 24 | 
            +
                temperature: number,
         | 
| 25 | 
            +
                maxTokens: number,
         | 
| 26 | 
            +
                jsonMode: boolean,
         | 
| 27 | 
            +
                onChunk: (content: string) => void
         | 
| 28 | 
            +
            ): Promise<void> {
         | 
| 29 | 
            +
                let out = '';
         | 
| 30 | 
            +
                for await (const chunk of hf.chatCompletionStream({
         | 
| 31 | 
            +
                    model: model,
         | 
| 32 | 
            +
                    messages: messages,
         | 
| 33 | 
            +
                    temperature: temperature,
         | 
| 34 | 
            +
                    max_tokens: maxTokens,
         | 
| 35 | 
            +
                    json_mode: jsonMode
         | 
| 36 | 
            +
                })) {
         | 
| 37 | 
            +
                    if (chunk.choices && chunk.choices.length > 0 && chunk.choices[0]?.delta?.content) {
         | 
| 38 | 
            +
                        out += chunk.choices[0].delta.content;
         | 
| 39 | 
            +
                        onChunk(out);
         | 
| 40 | 
            +
                    }
         | 
| 41 | 
            +
                }
         | 
| 42 | 
            +
            }
         | 
| 43 | 
            +
             | 
| 44 | 
            +
            export async function handleNonStreamingResponse(
         | 
| 45 | 
            +
                hf: HfInference,
         | 
| 46 | 
            +
                model: string,
         | 
| 47 | 
            +
                messages: Message[],
         | 
| 48 | 
            +
                temperature: number,
         | 
| 49 | 
            +
                maxTokens: number,
         | 
| 50 | 
            +
                jsonMode: boolean
         | 
| 51 | 
            +
            ): Promise<Message> {
         | 
| 52 | 
            +
                const response = await hf.chatCompletion({
         | 
| 53 | 
            +
                    model: model,
         | 
| 54 | 
            +
                    messages: messages,
         | 
| 55 | 
            +
                    temperature: temperature,
         | 
| 56 | 
            +
                    max_tokens: maxTokens,
         | 
| 57 | 
            +
                    json_mode: jsonMode
         | 
| 58 | 
            +
                });
         | 
| 59 | 
            +
             | 
| 60 | 
            +
                if (response.choices && response.choices.length > 0) {
         | 
| 61 | 
            +
                    return response.choices[0].message;
         | 
| 62 | 
            +
                }
         | 
| 63 | 
            +
                throw new Error('No response from the model');
         | 
| 64 | 
            +
            }
         | 

