ysdede's picture
- Fixes a runtime error caused by the `enableGraphCapture` flag in recent ONNX Runtime Web builds. See parakeet.js library repo for details.
09b5094
raw
history blame
10.9 kB
import React, { useState, useRef, useEffect } from 'react';
import { ParakeetModel, getParakeetModel } from 'parakeet.js';
import './App.css';
export default function App() {
const repoId = 'istupakov/parakeet-tdt-0.6b-v2-onnx';
const [backend, setBackend] = useState('webgpu-hybrid');
const [encoderQuant, setEncoderQuant] = useState('fp32');
const [decoderQuant, setDecoderQuant] = useState('int8');
const [preprocessor, setPreprocessor] = useState('nemo128');
const [status, setStatus] = useState('Idle');
const [progress, setProgress] = useState('');
const [progressText, setProgressText] = useState('');
const [progressPct, setProgressPct] = useState(null);
const [text, setText] = useState('');
const [latestMetrics, setLatestMetrics] = useState(null);
const [transcriptions, setTranscriptions] = useState([]);
const [isTranscribing, setIsTranscribing] = useState(false);
const [verboseLog, setVerboseLog] = useState(false);
const [frameStride, setFrameStride] = useState(1);
const [dumpDetail, setDumpDetail] = useState(false);
const maxCores = navigator.hardwareConcurrency || 8;
const [cpuThreads, setCpuThreads] = useState(Math.max(1, maxCores - 2));
const modelRef = useRef(null);
const fileInputRef = useRef(null);
// Auto-adjust quant presets when backend changes
useEffect(() => {
if (backend.startsWith('webgpu')) {
setEncoderQuant('fp32');
setDecoderQuant('int8');
} else {
setEncoderQuant('int8');
setDecoderQuant('int8');
}
}, [backend]);
async function loadModel() {
setStatus('Loading model…');
setProgress('');
setProgressText('');
setProgressPct(0);
console.time('LoadModel');
try {
const progressCallback = (p) => setProgress(`${p.file}: ${Math.round(p.loaded/p.total*100)}%`);
// 1. Download all model files from HuggingFace Hub
const modelUrls = await getParakeetModel(repoId, {
encoderQuant,
decoderQuant,
preprocessor,
progress: progressCallback
});
// Show compiling sessions stage
setStatus('Creating sessions…');
setProgressText('Compiling model (this may take ~10 s)…');
setProgressPct(null);
// 2. Create the model instance with all file URLs
modelRef.current = await ParakeetModel.fromUrls({
...modelUrls.urls,
backend,
});
// 3. Warm-up and verify
setStatus('Warming up & verifying…');
setProgressText('Model ready! Upload an audio file to transcribe.');
setProgressPct(null);
console.timeEnd('LoadModel');
setStatus('Model ready ✔');
setProgressText('');
} catch (e) {
console.error(e);
setStatus(`Failed: ${e.message}`);
setProgress('');
}
}
async function transcribeFile(e) {
if (!modelRef.current) return alert('Load model first');
const file = e.target.files?.[0];
if (!file) return;
setIsTranscribing(true);
setStatus(`Transcribing "${file.name}"…`);
try {
const buf = await file.arrayBuffer();
const audioCtx = new AudioContext({ sampleRate: 16000 });
const decoded = await audioCtx.decodeAudioData(buf);
const pcm = decoded.getChannelData(0);
console.time(`Transcribe-${file.name}`);
const res = await modelRef.current.transcribe(pcm, 16_000, {
returnTimestamps: true,
returnConfidences: true,
frameStride
});
console.timeEnd(`Transcribe-${file.name}`);
if (dumpDetail) {
console.log('[Parakeet] Detailed transcription output', res);
}
setLatestMetrics(res.metrics);
// Add to transcriptions list
const newTranscription = {
id: Date.now(),
filename: file.name,
text: res.utterance_text,
timestamp: new Date().toLocaleTimeString(),
duration: pcm.length / 16000, // duration in seconds
wordCount: res.words?.length || 0,
confidence: res.confidence_scores?.token_avg ?? res.confidence_scores?.word_avg ?? null,
metrics: res.metrics
};
setTranscriptions(prev => [newTranscription, ...prev]);
setText(res.utterance_text); // Show latest transcription
setStatus('Model ready ✔'); // Ready for next file
} catch (error) {
console.error('Transcription failed:', error);
setStatus('Transcription failed');
alert(`Failed to transcribe "${file.name}": ${error.message}`);
} finally {
setIsTranscribing(false);
// Clear the file input so the same file can be selected again
if (fileInputRef.current) {
fileInputRef.current.value = '';
}
}
}
function clearTranscriptions() {
setTranscriptions([]);
setText('');
}
return (
<div className="app">
<h2>🦜 Parakeet.js - HF Spaces Demo</h2>
<p>NVIDIA Parakeet speech recognition for the browser using WebGPU/WASM</p>
<div className="controls">
<p>
<strong>Model:</strong> {repoId}
</p>
</div>
<div className="controls">
<label>
Backend:
<select value={backend} onChange={e=>setBackend(e.target.value)}>
<option value="webgpu-hybrid">WebGPU</option>
<option value="wasm">WASM (CPU)</option>
</select>
</label>
{' '}
<label>
Encoder Quant:
<select value={encoderQuant} onChange={e=>setEncoderQuant(e.target.value)}>
<option value="int8">int8 (faster)</option>
<option value="fp32">fp32 (higher quality)</option>
</select>
</label>
{' '}
<label>
Decoder Quant:
<select value={decoderQuant} onChange={e=>setDecoderQuant(e.target.value)}>
<option value="int8">int8 (faster)</option>
<option value="fp32">fp32 (higher quality)</option>
</select>
</label>
{' '}
<label>
Preprocessor:
<select value={preprocessor} onChange={e=>setPreprocessor(e.target.value)}>
<option value="nemo128">nemo128 (default)</option>
</select>
</label>
{' '}
<label>
Stride:
<select value={frameStride} onChange={e=>setFrameStride(Number(e.target.value))}>
<option value={1}>1</option>
<option value={2}>2</option>
<option value={4}>4</option>
</select>
</label>
{' '}
<label>
<input type="checkbox" checked={verboseLog} onChange={e => setVerboseLog(e.target.checked)} />
Verbose Log
</label>
{' '}
<label style={{fontSize:'0.9em'}}>
<input type="checkbox" checked={dumpDetail} onChange={e=>setDumpDetail(e.target.checked)} />
Dump result to console
</label>
{(backend === 'wasm') && (
<label style={{fontSize:'0.9em'}}>
Threads:
<input type="number" min="1" max={maxCores} value={cpuThreads} onChange={e=>setCpuThreads(Number(e.target.value))} style={{width:'4rem'}} />
</label>
)}
<button
onClick={loadModel}
disabled={!status.toLowerCase().includes('fail') && status !== 'Idle'}
className="primary"
>
{status === 'Model ready ✔' ? 'Model Loaded' : 'Load Model'}
</button>
</div>
{typeof SharedArrayBuffer === 'undefined' && backend === 'wasm' && (
<div style={{
marginBottom: '1rem',
padding: '0.5rem',
backgroundColor: '#fff3cd',
border: '1px solid #ffeaa7',
borderRadius: '4px',
fontSize: '0.9em'
}}>
⚠️ <strong>Performance Note:</strong> SharedArrayBuffer is not available.
WASM will run single-threaded. For better performance, use WebGPU.
</div>
)}
<div className="controls">
<input
ref={fileInputRef}
type="file"
accept="audio/*"
onChange={transcribeFile}
disabled={status !== 'Model ready ✔' || isTranscribing}
/>
{transcriptions.length > 0 && (
<button
onClick={clearTranscriptions}
style={{ marginLeft: '1rem', padding: '0.25rem 0.5rem' }}
>
Clear History
</button>
)}
</div>
<p>Status: {status}</p>
{progressPct!==null && (
<div className="progress-wrapper">
<div className="progress-bar"><div style={{ width: `${progressPct}%` }} /></div>
<p className="progress-text">{progressText}</p>
</div>
)}
{/* Latest transcription */}
<div className="controls">
<h3>Latest Transcription:</h3>
<textarea
value={text}
readOnly
className="textarea"
placeholder="Transcribed text will appear here..."
/>
</div>
{/* Latest transcription performace info */}
{latestMetrics && (
<div className="performance">
<strong>RTF:</strong> {latestMetrics.rtf?.toFixed(2)}x &nbsp;|&nbsp; Total: {latestMetrics.total_ms} ms<br/>
Preprocess {latestMetrics.preprocess_ms} ms · Encode {latestMetrics.encode_ms} ms · Decode {latestMetrics.decode_ms} ms · Tokenize {latestMetrics.tokenize_ms} ms
</div>
)}
{/* Transcription history */}
{transcriptions.length > 0 && (
<div className="history">
<h3>Transcription History ({transcriptions.length} files):</h3>
<div style={{ maxHeight: '400px', overflowY: 'auto', border: '1px solid #ddd', borderRadius: '4px' }}>
{transcriptions.map((trans) => (
<div className="history-item" key={trans.id}>
<div className="history-meta"><strong>{trans.filename}</strong><span>{trans.timestamp}</span></div>
<div className="history-stats">Duration: {trans.duration.toFixed(1)}s | Words: {trans.wordCount}{trans.confidence && ` | Confidence: ${trans.confidence.toFixed(2)}`}{trans.metrics && ` | RTF: ${trans.metrics.rtf?.toFixed(2)}x`}</div>
<div className="history-text">{trans.text}</div>
</div>
))}
</div>
</div>
)}
<div style={{ marginTop: '2rem', padding: '1rem', backgroundColor: '#f8f9fa', borderRadius: '4px', fontSize: '0.9em' }}>
<h4>🔗 Links:</h4>
<p>
<a href="https://github.com/ysdede/parakeet.js" target="_blank" rel="noopener noreferrer">
GitHub Repository
</a>
{' | '}
<a href="https://www.npmjs.com/package/parakeet.js" target="_blank" rel="noopener noreferrer">
npm Package
</a>
</p>
</div>
</div>
);
}