|
<svelte:options accessors={true} /> |
|
|
|
<script lang="ts"> |
|
|
|
import { onMount } from "svelte"; |
|
import type { Gradio } from "@gradio/utils"; |
|
import TextBox from "./shared/Textbox.svelte"; |
|
import { Block } from "@gradio/atoms"; |
|
import { StatusTracker } from "@gradio/statustracker"; |
|
import type { LoadingStatus } from "@gradio/statustracker"; |
|
import { AutoTokenizer, env } from "@xenova/transformers"; |
|
env.allowLocalModels = false; |
|
|
|
|
|
export let gradio: Gradio<{ change: never }>; |
|
export let value: { |
|
text: string; |
|
tokens: string[]; |
|
token_ids: number[]; |
|
} = { text: "", tokens: [], token_ids: [] }; |
|
|
|
export let hide_input: boolean = false; |
|
export let model: string; |
|
export let display_mode: 'text' | 'token_ids' | 'hidden'; |
|
|
|
|
|
export let label: string; |
|
export let info: string | undefined; |
|
export let elem_id: string; |
|
export let elem_classes: string[]; |
|
export let visible: boolean; |
|
export let lines: number; |
|
export let placeholder: string; |
|
export let show_label: boolean; |
|
export let max_lines: number | undefined; |
|
export let type: "text" | "password" | "email"; |
|
export let container: boolean; |
|
export let scale: number | null; |
|
export let min_width: number | undefined; |
|
export let submit_btn: string | boolean | null; |
|
export let stop_btn: string | boolean | null; |
|
export let show_copy_button: boolean; |
|
export let loading_status: LoadingStatus | undefined; |
|
export let rtl: boolean; |
|
export let text_align: "left" | "right" | undefined; |
|
export let autofocus: boolean; |
|
export let autoscroll: boolean; |
|
export let interactive: boolean; |
|
export let max_length: number | undefined; |
|
|
|
|
|
let tokenizer: any = null; |
|
let status: string = "Initializing..."; |
|
let showVisualization = true; |
|
const colors = ["#d8b4fe", "#bbf7d0", "#fde047", "#fca5a5", "#93c5fd"]; |
|
let currentModel: string = ""; |
|
let lastTokenizedText: string | null = null; |
|
|
|
|
|
async function run_tokenization(text_to_process: string) { |
|
// Prevent re-tokenizing the same text, which breaks the reactive loop |
|
if (!tokenizer || text_to_process === lastTokenizedText) { |
|
return; |
|
} |
|
lastTokenizedText = text_to_process; |
|
|
|
try { |
|
const ids = tokenizer.encode(text_to_process); |
|
const tokens = ids.map((id: number) => tokenizer.decode([id])); |
|
|
|
// Update the single source of truth |
|
value = { |
|
text: text_to_process, |
|
tokens: tokens, |
|
token_ids: ids |
|
}; |
|
|
|
|
|
gradio.dispatch("change"); |
|
|
|
} catch (e: any) { |
|
status = `Tokenization error: ${e.message}`; |
|
} |
|
} |
|
|
|
async function loadTokenizer(model_name: string) { |
|
if (currentModel === model_name && tokenizer) return; |
|
status = `Loading tokenizer: ${model_name}...`; |
|
currentModel = model_name; |
|
tokenizer = null; |
|
|
|
try { |
|
tokenizer = await AutoTokenizer.from_pretrained(model_name); |
|
status = `Tokenizer "${model_name}" loaded.`; |
|
|
|
// Reset the tracker and re-tokenize with the new model |
|
lastTokenizedText = null; |
|
await run_tokenization(value.text); |
|
} catch (e: any) { |
|
status = `Error loading model: ${e.message}`; |
|
} |
|
} |
|
|
|
// --- SVELTE LIFECYCLE AND REACTIVITY --- |
|
onMount(() => { |
|
loadTokenizer(model); |
|
}); |
|
|
|
|
|
|
|
$: if (value && value.text !== undefined) { |
|
run_tokenization(value.text); |
|
} |
|
|
|
$: if (model && model !== currentModel) { |
|
loadTokenizer(model); |
|
} |
|
</script> |
|
|
|
<Block {visible} {elem_id} {elem_classes} {scale} {min_width} allow_overflow={false} padding={container}> |
|
{#if loading_status} |
|
<StatusTracker {...loading_status} on:clear_status={() => gradio.dispatch("clear_status", loading_status)} /> |
|
{/if} |
|
|
|
<div class="component-header"> |
|
{#if display_mode !== 'hidden'} |
|
<div class="visualization-toggle"> |
|
<input type="checkbox" id="show-viz-{elem_id}" bind:checked={showVisualization}> |
|
<label for="show-viz-{elem_id}">Preview tokens</label> |
|
</div> |
|
<div class="counters"> |
|
<span>Tokens: {value?.tokens?.length || 0}</span> |
|
<span>Characters: {value?.text?.length || 0}</span> |
|
</div> |
|
{/if} |
|
</div> |
|
|
|
<!-- This now correctly uses the `hide_input` prop --> |
|
{#if !hide_input} |
|
<TextBox |
|
bind:value={value.text} |
|
{label} |
|
{info} |
|
{lines} |
|
{placeholder} |
|
{show_label} |
|
{max_lines} |
|
{type} |
|
{container} |
|
{submit_btn} |
|
{stop_btn} |
|
{show_copy_button} |
|
{rtl} |
|
{text_align} |
|
{autofocus} |
|
{autoscroll} |
|
{max_length} |
|
disabled={!interactive} |
|
on:change={() => gradio.dispatch("change")} |
|
/> |
|
{/if} |
|
|
|
<!-- The visualization panel --> |
|
{#if showVisualization && display_mode !== 'hidden'} |
|
<div class="token-visualization-container"> |
|
{#if display_mode === 'text'} |
|
<div class="token-display"> |
|
{#if value?.tokens?.length > 0} |
|
{#each value.tokens as token, i} |
|
<span class="token" style="background-color: {colors[i % colors.length]};"> |
|
{token.replace(/ /g, '\u00A0')} |
|
</span> |
|
{/each} |
|
{:else} |
|
<span class="status">{status}</span> |
|
{/if} |
|
</div> |
|
{:else if display_mode === 'token_ids'} |
|
<div class="token-display token-ids"> |
|
{#if value?.token_ids?.length > 0} |
|
[{value.token_ids.join(", ")}] |
|
{:else} |
|
<span class="status">{status}</span> |
|
{/if} |
|
</div> |
|
{/if} |
|
</div> |
|
{/if} |
|
</Block> |
|
|
|
<style> |
|
|
|
.component-header { |
|
display: flex; |
|
justify-content: space-between; |
|
align-items: center; |
|
margin-bottom: var(--spacing-sm); |
|
min-height: 20px; |
|
} |
|
.visualization-toggle { |
|
display: flex; |
|
align-items: center; |
|
gap: 6px; |
|
font-size: var(--text-sm); |
|
color: var(--body-text-color); |
|
} |
|
.visualization-toggle label { |
|
cursor: pointer; |
|
user-select: none; |
|
} |
|
|
|
.visualization-toggle input[type="checkbox"] { |
|
/* Reset browser default styles */ |
|
-webkit-appearance: none; |
|
-moz-appearance: none; |
|
appearance: none; |
|
|
|
/* Define our own box */ |
|
width: 16px; |
|
height: 16px; |
|
border: 1px solid var(--body-text-color-subdued); /* A visible border in both themes */ |
|
border-radius: var(--radius-sm); |
|
background-color: var(--background-fill-primary); |
|
cursor: pointer; |
|
position: relative; |
|
display: inline-block; |
|
vertical-align: middle; |
|
} |
|
|
|
|
|
.visualization-toggle input[type="checkbox"]:checked::before { |
|
content: '✔'; /* You can also use an SVG here */ |
|
position: absolute; |
|
font-size: 12px; |
|
font-weight: bold; |
|
top: 0px; |
|
left: 2px; |
|
/* Use a high-contrast color that works on both themes */ |
|
color: var(--primary-500); |
|
} |
|
|
|
.visualization-toggle input[type="checkbox"]:focus { |
|
outline: 2px solid var(--primary-200); /* Add focus ring for accessibility */ |
|
} |
|
|
|
.counters { |
|
display: flex; |
|
gap: var(--spacing-lg); |
|
font-size: var(--text-sm); |
|
/* Use a darker gray that is readable in light mode but still subdued */ |
|
color: var(--neutral-500); |
|
font-family: var(--font-mono); |
|
} |
|
|
|
|
|
.token-visualization-container { |
|
margin-top: var(--spacing-lg); |
|
} |
|
.token-display { |
|
color: #212529 !important; /* Force dark text on colored backgrounds */ |
|
padding: var(--spacing-md); |
|
border: 1px solid var(--border-color-primary); |
|
background-color: var(--background-fill-secondary); |
|
border-radius: var(--radius-lg); |
|
min-height: 70px; |
|
line-height: 1.8; |
|
white-space: pre-wrap; |
|
overflow-y: auto; |
|
font-family: var(--font-mono); |
|
font-size: var(--text-md); |
|
} |
|
.token { |
|
display: inline-block; |
|
padding: var(--spacing-xs) var(--spacing-sm); |
|
border-radius: var(--radius-lg); |
|
margin: 2px; |
|
} |
|
.token-ids { |
|
word-break: break-all; |
|
} |
|
</style> |
|
Use code with caution. |