Spaces:
Sleeping
Sleeping
| <script lang="ts"> | |
| import Textarea from "@/lib/components/ui/textarea/textarea.svelte"; | |
| import Badge from "@/lib/components/ui/badge/badge.svelte"; | |
| import * as webllm from "@mlc-ai/web-llm"; | |
| import { onMount, tick } from 'svelte'; | |
| let selectedModel = "smollm-360M-instruct-add-basics-q0f32-MLC"; | |
| let engine: webllm.MLCEngineInterface; | |
| let isLoading = false; | |
| let loadingStatus = ''; | |
| let inputText = ''; | |
| let outputText = ''; | |
| let error = ''; | |
| let completionSpeed: number | null = null; | |
| let tokensPerSecond: number | null = null; | |
| let isGenerating = false; | |
| let pendingRequest: string | null = null; | |
| const promptExamples = [ | |
| "Tell me a story about a cat.", | |
| "Write a poem about the ocean.", | |
| "What is refraction?", | |
| "Explain thermal conductivity", | |
| "What is Newton's first law of motion?", | |
| ] | |
| async function setPrompt(prompt: string) { | |
| inputText = prompt; | |
| generateCompletion(prompt); | |
| } | |
| async function loadWebLLM() { | |
| isLoading = true; | |
| error = ''; | |
| const initProgressCallback = (report: webllm.InitProgressReport) => { | |
| loadingStatus = report.text; | |
| }; | |
| const appConfig: webllm.AppConfig = { | |
| model_list: [{ | |
| model: `https://huggingface.co/reach-vb/smollm-360M-instruct-add-basics-q0f16-MLC`, | |
| model_id: 'smollm-360M-instruct-add-basics-q0f32-MLC', | |
| model_lib: `${webllm.modelLibURLPrefix}${webllm.modelVersion}/SmolLM-360M-Instruct-q0f16-ctx2k_cs1k-webgpu.wasm`, | |
| overrides: { context_window_size: 2048 }, | |
| }, | |
| { | |
| model: `https://huggingface.co/mlc-ai/Qwen2-0.5B-Instruct-q4f16_1-MLC`, | |
| model_id: 'Qwen2-0.5B-Instruct-q4f16_1-MLC', | |
| model_lib: `${webllm.modelLibURLPrefix}${webllm.modelVersion}/Qwen2-0.5B-Instruct-q4f16_1-ctx4k_cs1k-webgpu.wasm`, | |
| overrides: { context_window_size: 2048 }, | |
| } | |
| ], | |
| }; | |
| try { | |
| engine = await webllm.CreateMLCEngine(selectedModel, { | |
| appConfig, | |
| initProgressCallback, | |
| logLevel: "INFO", | |
| }); | |
| } catch (err) { | |
| error = `Failed to load the model: ${(err as Error).message}`; | |
| } finally { | |
| isLoading = false; | |
| } | |
| } | |
| async function generateCompletion(content: string) { | |
| if (!engine || isGenerating) { | |
| /** | |
| * This is used to store the most recent request from user | |
| * while the current request is being processed. | |
| */ | |
| pendingRequest = content.trim(); | |
| return; | |
| } | |
| if (!content.trim()) return; | |
| isGenerating = true; | |
| const startTime = performance.now(); | |
| try { | |
| console.log("Generating completion:", content); | |
| const response = await engine.chat.completions.create({ | |
| messages: [ | |
| {role: "user", content: content} | |
| ], | |
| max_tokens: 15, | |
| }); | |
| outputText = response.choices[0].message.content || ""; | |
| // indicate that the response was cut short | |
| if (response.choices[0].finish_reason === "length") { | |
| outputText += "..."; | |
| } | |
| const endTime = performance.now(); | |
| const elapsedTimeInSeconds = (endTime - startTime) / 1000; | |
| completionSpeed = Math.round(endTime - startTime); | |
| const generatedTokens = response.usage?.completion_tokens || 0; | |
| tokensPerSecond = Math.round(generatedTokens / elapsedTimeInSeconds); | |
| error = ''; | |
| } catch (err) { | |
| error = `Error: ${(err as Error).message}`; | |
| } finally { | |
| isGenerating = false; | |
| // process pending request if exists | |
| if (pendingRequest && pendingRequest !== content) { | |
| const nextRequest = pendingRequest; | |
| pendingRequest = null; | |
| await generateCompletion(nextRequest); | |
| } | |
| } | |
| } | |
| onMount(loadWebLLM); | |
| </script> | |
| <div class="flex my-20 flex-col items-center gap-4 max-w-xl mx-auto"> | |
| <h1 class="text-center font-mono font-bold text-4xl">SmolLM 🤗</h1> | |
| <p class="text-center font-mono text-sm mb-4">Powered by <a href="https://huggingface.co/mlc-ai" target="_blank" class="underline text-blue-500">MLC</a> WebLLM <a class="underline text-blue-500" href="https://huggingface.co/HuggingFaceTB/smollm-360M-instruct-add-basics" target="_blank">SmolLM-360M-Instruct-Add-Basics</a> <span class="text-xs italic">(15 Max Tokens)</span></p> | |
| <Textarea | |
| bind:value={inputText} | |
| on:input={() => generateCompletion(inputText)} | |
| disabled={isLoading} | |
| class="w-full text-lg" | |
| placeholder="Say something..." | |
| /> | |
| <p class="text-center text-xs italic">This is a smol model, go easy on it.</p> | |
| {#if isLoading} | |
| <p class="text-sm text-slate-600 text-center">{loadingStatus}</p> | |
| {:else if error} | |
| <p class="text-sm text-red-600">{error}</p> | |
| {:else} | |
| <div class="flex gap-2"> | |
| {#if completionSpeed !== null} | |
| <Badge>{completionSpeed}ms</Badge> | |
| {/if} | |
| {#if tokensPerSecond !== null} | |
| <Badge>{tokensPerSecond} tok/s</Badge> | |
| {/if} | |
| </div> | |
| {/if} | |
| <div class="flex flex-col items-center mb-4"> | |
| {#if inputText === ''} | |
| <p class="text-sm mb-2">Try these examples:</p> | |
| <div class="flex flex-wrap justify-center gap-2"> | |
| {#each promptExamples as prompt} | |
| <button on:click={() => setPrompt(prompt)}> | |
| <Badge | |
| variant="outline" | |
| class="cursor-pointer bg-orange-100 hover:bg-orange-200" | |
| > | |
| {prompt} | |
| </Badge> | |
| </button> | |
| {/each} | |
| </div> | |
| {/if} | |
| </div> | |
| <pre class="text-xl font-bold whitespace-pre-wrap">{outputText}</pre> | |
| </div> |