import { useEffect, useState, useRef, useCallback } from "react"; import Progress from "./components/Progress"; import MediaInput from "./components/MediaInput"; import Transcript from "./components/Transcript"; import LanguageSelector from "./components/LanguageSelector"; async function hasWebGPU() { if (!navigator.gpu) { return false; } try { const adapter = await navigator.gpu.requestAdapter(); return !!adapter; } catch (e) { return false; } } function App() { // Create a reference to the worker object. const worker = useRef(null); // Model loading and progress const [status, setStatus] = useState(null); const [loadingMessage, setLoadingMessage] = useState(""); const [progressItems, setProgressItems] = useState([]); const mediaInputRef = useRef(null); const [audio, setAudio] = useState(null); const [language, setLanguage] = useState("en"); const [result, setResult] = useState(null); const [time, setTime] = useState(null); const [currentTime, setCurrentTime] = useState(0); const [device, setDevice] = useState("webgpu"); // Try use WebGPU first const [modelSize, setModelSize] = useState("gpu" in navigator ? 196 : 77); // WebGPU=196MB, WebAssembly=77MB useEffect(() => { hasWebGPU().then((result) => { setModelSize(result ? 196 : 77); setDevice(result ? "webgpu" : "wasm"); }); }, []); // We use the `useEffect` hook to setup the worker as soon as the `App` component is mounted. useEffect(() => { // Create the worker if it does not yet exist. worker.current ??= new Worker(new URL("./worker.js", import.meta.url), { type: "module", }); // Create a callback function for messages from the worker thread. const onMessageReceived = (e) => { switch (e.data.status) { case "loading": // Model file start load: add a new progress item to the list. setStatus("loading"); setLoadingMessage(e.data.data); break; case "initiate": setProgressItems((prev) => [...prev, e.data]); break; case "progress": // Model file progress: update one of the progress items. setProgressItems((prev) => prev.map((item) => { if (item.file === e.data.file) { return { ...item, ...e.data }; } return item; }), ); break; case "done": // Model file loaded: remove the progress item from the list. setProgressItems((prev) => prev.filter((item) => item.file !== e.data.file), ); break; case "ready": // Pipeline ready: the worker is ready to accept messages. setStatus("ready"); break; case "complete": setResult(e.data.result); setTime(e.data.time); setStatus("ready"); break; } }; // Attach the callback function as an event listener. worker.current.addEventListener("message", onMessageReceived); // Define a cleanup function for when the component is unmounted. return () => { worker.current.removeEventListener("message", onMessageReceived); }; }, []); const handleClick = useCallback(() => { setResult(null); setTime(null); if (status === null) { setStatus("loading"); worker.current.postMessage({ type: "load", data: { device } }); } else { setStatus("running"); worker.current.postMessage({ type: "run", data: { audio, language }, }); } }, [status, audio, language, device]); return (
{loadingMessage}
{progressItems.map(({ file, progress, total }, i) => ( ))}
You are about to download{" "}
whisper-base (timestamped)
, a 73 million parameter speech recognition model with the
ability to generate word-level timestamps across 100 different
languages. Once loaded, the model ({modelSize} MB) will be
cached and reused when you revisit the page.
Everything runs locally in your browser using{" "}
🤗 Transformers.js
{" "}
and ONNX Runtime Web, meaning no API calls are made to a server
for inference. You can even disconnect from the internet after
the model has loaded!
Generation time:{" "} {time.toFixed(2)}ms
> )}