Spaces:
Sleeping
Sleeping
import { useEffect, useState, useRef } from "react"; | |
import { AudioVisualizer } from "./components/AudioVisualizer"; | |
import Progress from "./components/Progress"; | |
import { LanguageSelector } from "./components/LanguageSelector"; | |
const IS_WEBGPU_AVAILABLE = !!navigator.gpu; | |
const WHISPER_SAMPLING_RATE = 16_000; | |
const MAX_AUDIO_LENGTH = 30; // seconds | |
const MAX_SAMPLES = WHISPER_SAMPLING_RATE * MAX_AUDIO_LENGTH; | |
function App() { | |
// Create a reference to the worker object. | |
const worker = useRef(null); | |
const recorderRef = useRef(null); | |
// Model loading and progress | |
const [status, setStatus] = useState(null); | |
const [loadingMessage, setLoadingMessage] = useState(""); | |
const [progressItems, setProgressItems] = useState([]); | |
// Inputs and outputs | |
const [text, setText] = useState(""); | |
const [tps, setTps] = useState(null); | |
const [language, setLanguage] = useState("en"); | |
// Processing | |
const [recording, setRecording] = useState(false); | |
const [isProcessing, setIsProcessing] = useState(false); | |
const [chunks, setChunks] = useState([]); | |
const [stream, setStream] = useState(null); | |
const audioContextRef = useRef(null); | |
// We use the `useEffect` hook to setup the worker as soon as the `App` component is mounted. | |
useEffect(() => { | |
if (!worker.current) { | |
// Create the worker if it does not yet exist. | |
worker.current = new Worker(new URL("./worker.js", import.meta.url), { | |
type: "module", | |
}); | |
} | |
// Create a callback function for messages from the worker thread. | |
const onMessageReceived = (e) => { | |
switch (e.data.status) { | |
case "loading": | |
// Model file start load: add a new progress item to the list. | |
setStatus("loading"); | |
setLoadingMessage(e.data.data); | |
break; | |
case "initiate": | |
setProgressItems((prev) => [...prev, e.data]); | |
break; | |
case "progress": | |
// Model file progress: update one of the progress items. | |
setProgressItems((prev) => | |
prev.map((item) => { | |
if (item.file === e.data.file) { | |
return { ...item, ...e.data }; | |
} | |
return item; | |
}), | |
); | |
break; | |
case "done": | |
// Model file loaded: remove the progress item from the list. | |
setProgressItems((prev) => | |
prev.filter((item) => item.file !== e.data.file), | |
); | |
break; | |
case "ready": | |
// Pipeline ready: the worker is ready to accept messages. | |
setStatus("ready"); | |
recorderRef.current?.start(); | |
break; | |
case "start": | |
{ | |
// Start generation | |
setIsProcessing(true); | |
// Request new data from the recorder | |
recorderRef.current?.requestData(); | |
} | |
break; | |
case "update": | |
{ | |
// Generation update: update the output text. | |
const { tps } = e.data; | |
setTps(tps); | |
} | |
break; | |
case "complete": | |
// Generation complete: re-enable the "Generate" button | |
setIsProcessing(false); | |
setText(e.data.output); | |
break; | |
} | |
}; | |
// Attach the callback function as an event listener. | |
worker.current.addEventListener("message", onMessageReceived); | |
// Define a cleanup function for when the component is unmounted. | |
return () => { | |
worker.current.removeEventListener("message", onMessageReceived); | |
}; | |
}, []); | |
useEffect(() => { | |
if (recorderRef.current) return; // Already set | |
if (navigator.mediaDevices.getUserMedia) { | |
navigator.mediaDevices | |
.getUserMedia({ audio: true }) | |
.then((stream) => { | |
setStream(stream); | |
recorderRef.current = new MediaRecorder(stream); | |
audioContextRef.current = new AudioContext({ | |
sampleRate: WHISPER_SAMPLING_RATE, | |
}); | |
recorderRef.current.onstart = () => { | |
setRecording(true); | |
setChunks([]); | |
}; | |
recorderRef.current.ondataavailable = (e) => { | |
if (e.data.size > 0) { | |
setChunks((prev) => [...prev, e.data]); | |
} else { | |
// Empty chunk received, so we request new data after a short timeout | |
setTimeout(() => { | |
recorderRef.current.requestData(); | |
}, 25); | |
} | |
}; | |
recorderRef.current.onstop = () => { | |
setRecording(false); | |
}; | |
}) | |
.catch((err) => console.error("The following error occurred: ", err)); | |
} else { | |
console.error("getUserMedia not supported on your browser!"); | |
} | |
return () => { | |
recorderRef.current?.stop(); | |
recorderRef.current = null; | |
}; | |
}, []); | |
useEffect(() => { | |
if (!recorderRef.current) return; | |
if (!recording) return; | |
if (isProcessing) return; | |
if (status !== "ready") return; | |
if (chunks.length > 0) { | |
// Generate from data | |
const blob = new Blob(chunks, { type: recorderRef.current.mimeType }); | |
const fileReader = new FileReader(); | |
fileReader.onloadend = async () => { | |
const arrayBuffer = fileReader.result; | |
const decoded = | |
await audioContextRef.current.decodeAudioData(arrayBuffer); | |
let audio = decoded.getChannelData(0); | |
if (audio.length > MAX_SAMPLES) { | |
// Get last MAX_SAMPLES | |
audio = audio.slice(-MAX_SAMPLES); | |
} | |
worker.current.postMessage({ | |
type: "generate", | |
data: { audio, language }, | |
}); | |
}; | |
fileReader.readAsArrayBuffer(blob); | |
} else { | |
recorderRef.current?.requestData(); | |
} | |
}, [status, recording, isProcessing, chunks, language]); | |
return IS_WEBGPU_AVAILABLE ? ( | |
<div className="flex flex-col h-screen mx-auto justify-end text-gray-800 dark:text-gray-200 bg-white dark:bg-gray-900"> | |
{ | |
<div className="h-full overflow-auto scrollbar-thin flex justify-center items-center flex-col relative"> | |
<div className="flex flex-col items-center mb-1 max-w-[400px] text-center"> | |
<img | |
src="logo.png" | |
width="50%" | |
height="auto" | |
className="block" | |
></img> | |
<h1 className="text-4xl font-bold mb-1">Whisper WebGPU</h1> | |
<h2 className="text-xl font-semibold"> | |
Real-time in-browser speech recognition | |
</h2> | |
</div> | |
<div className="flex flex-col items-center px-4"> | |
{status === null && ( | |
<> | |
<p className="max-w-[480px] mb-4"> | |
<br /> | |
You are about to load{" "} | |
<a | |
href="https://huggingface.co/onnx-community/whisper-base" | |
target="_blank" | |
rel="noreferrer" | |
className="font-medium underline" | |
> | |
whisper-base | |
</a> | |
, a 73 million parameter speech recognition model that is | |
optimized for inference on the web. Once downloaded, the model | |
(~200 MB) will be cached and reused when you revisit the | |
page. | |
<br /> | |
<br /> | |
Everything runs directly in your browser using{" "} | |
<a | |
href="https://huggingface.co/docs/transformers.js" | |
target="_blank" | |
rel="noreferrer" | |
className="underline" | |
> | |
🤗 Transformers.js | |
</a>{" "} | |
and ONNX Runtime Web, meaning no data is sent to a server. You | |
can even disconnect from the internet after the model has | |
loaded! | |
</p> | |
<button | |
className="border px-4 py-2 rounded-lg bg-blue-400 text-white hover:bg-blue-500 disabled:bg-blue-100 disabled:cursor-not-allowed select-none" | |
onClick={() => { | |
worker.current.postMessage({ type: "load" }); | |
setStatus("loading"); | |
}} | |
disabled={status !== null} | |
> | |
Load model | |
</button> | |
</> | |
)} | |
<div className="w-[500px] p-2"> | |
<AudioVisualizer className="w-full rounded-lg" stream={stream} /> | |
{status === "ready" && ( | |
<div className="relative"> | |
<p className="w-full h-[80px] overflow-y-auto overflow-wrap-anywhere border rounded-lg p-2"> | |
{text} | |
</p> | |
{tps && ( | |
<span className="absolute bottom-0 right-0 px-1"> | |
{tps.toFixed(2)} tok/s | |
</span> | |
)} | |
</div> | |
)} | |
</div> | |
{status === "ready" && ( | |
<div className="relative w-full flex justify-center"> | |
<LanguageSelector | |
language={language} | |
setLanguage={(e) => { | |
recorderRef.current?.stop(); | |
setLanguage(e); | |
recorderRef.current?.start(); | |
}} | |
/> | |
<button | |
className="border rounded-lg px-2 absolute right-2" | |
onClick={() => { | |
recorderRef.current?.stop(); | |
recorderRef.current?.start(); | |
}} | |
> | |
Reset | |
</button> | |
</div> | |
)} | |
{status === "loading" && ( | |
<div className="w-full max-w-[500px] text-left mx-auto p-4"> | |
<p className="text-center">{loadingMessage}</p> | |
{progressItems.map(({ file, progress, total }, i) => ( | |
<Progress | |
key={i} | |
text={file} | |
percentage={progress} | |
total={total} | |
/> | |
))} | |
</div> | |
)} | |
</div> | |
</div> | |
} | |
</div> | |
) : ( | |
<div className="fixed w-screen h-screen bg-black z-10 bg-opacity-[92%] text-white text-2xl font-semibold flex justify-center items-center text-center"> | |
WebGPU is not supported | |
<br /> | |
by this browser :( | |
</div> | |
); | |
} | |
export default App; | |