Spaces:
Sleeping
Sleeping
File size: 11,000 Bytes
5e1b738 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 |
import { useEffect, useState, useRef } from "react";
import { AudioVisualizer } from "./components/AudioVisualizer";
import Progress from "./components/Progress";
import { LanguageSelector } from "./components/LanguageSelector";
const IS_WEBGPU_AVAILABLE = !!navigator.gpu;
const WHISPER_SAMPLING_RATE = 16_000;
const MAX_AUDIO_LENGTH = 30; // seconds
const MAX_SAMPLES = WHISPER_SAMPLING_RATE * MAX_AUDIO_LENGTH;
function App() {
// Create a reference to the worker object.
const worker = useRef(null);
const recorderRef = useRef(null);
// Model loading and progress
const [status, setStatus] = useState(null);
const [loadingMessage, setLoadingMessage] = useState("");
const [progressItems, setProgressItems] = useState([]);
// Inputs and outputs
const [text, setText] = useState("");
const [tps, setTps] = useState(null);
const [language, setLanguage] = useState("en");
// Processing
const [recording, setRecording] = useState(false);
const [isProcessing, setIsProcessing] = useState(false);
const [chunks, setChunks] = useState([]);
const [stream, setStream] = useState(null);
const audioContextRef = useRef(null);
// We use the `useEffect` hook to setup the worker as soon as the `App` component is mounted.
useEffect(() => {
if (!worker.current) {
// Create the worker if it does not yet exist.
worker.current = new Worker(new URL("./worker.js", import.meta.url), {
type: "module",
});
}
// Create a callback function for messages from the worker thread.
const onMessageReceived = (e) => {
switch (e.data.status) {
case "loading":
// Model file start load: add a new progress item to the list.
setStatus("loading");
setLoadingMessage(e.data.data);
break;
case "initiate":
setProgressItems((prev) => [...prev, e.data]);
break;
case "progress":
// Model file progress: update one of the progress items.
setProgressItems((prev) =>
prev.map((item) => {
if (item.file === e.data.file) {
return { ...item, ...e.data };
}
return item;
}),
);
break;
case "done":
// Model file loaded: remove the progress item from the list.
setProgressItems((prev) =>
prev.filter((item) => item.file !== e.data.file),
);
break;
case "ready":
// Pipeline ready: the worker is ready to accept messages.
setStatus("ready");
recorderRef.current?.start();
break;
case "start":
{
// Start generation
setIsProcessing(true);
// Request new data from the recorder
recorderRef.current?.requestData();
}
break;
case "update":
{
// Generation update: update the output text.
const { tps } = e.data;
setTps(tps);
}
break;
case "complete":
// Generation complete: re-enable the "Generate" button
setIsProcessing(false);
setText(e.data.output);
break;
}
};
// Attach the callback function as an event listener.
worker.current.addEventListener("message", onMessageReceived);
// Define a cleanup function for when the component is unmounted.
return () => {
worker.current.removeEventListener("message", onMessageReceived);
};
}, []);
useEffect(() => {
if (recorderRef.current) return; // Already set
if (navigator.mediaDevices.getUserMedia) {
navigator.mediaDevices
.getUserMedia({ audio: true })
.then((stream) => {
setStream(stream);
recorderRef.current = new MediaRecorder(stream);
audioContextRef.current = new AudioContext({
sampleRate: WHISPER_SAMPLING_RATE,
});
recorderRef.current.onstart = () => {
setRecording(true);
setChunks([]);
};
recorderRef.current.ondataavailable = (e) => {
if (e.data.size > 0) {
setChunks((prev) => [...prev, e.data]);
} else {
// Empty chunk received, so we request new data after a short timeout
setTimeout(() => {
recorderRef.current.requestData();
}, 25);
}
};
recorderRef.current.onstop = () => {
setRecording(false);
};
})
.catch((err) => console.error("The following error occurred: ", err));
} else {
console.error("getUserMedia not supported on your browser!");
}
return () => {
recorderRef.current?.stop();
recorderRef.current = null;
};
}, []);
useEffect(() => {
if (!recorderRef.current) return;
if (!recording) return;
if (isProcessing) return;
if (status !== "ready") return;
if (chunks.length > 0) {
// Generate from data
const blob = new Blob(chunks, { type: recorderRef.current.mimeType });
const fileReader = new FileReader();
fileReader.onloadend = async () => {
const arrayBuffer = fileReader.result;
const decoded =
await audioContextRef.current.decodeAudioData(arrayBuffer);
let audio = decoded.getChannelData(0);
if (audio.length > MAX_SAMPLES) {
// Get last MAX_SAMPLES
audio = audio.slice(-MAX_SAMPLES);
}
worker.current.postMessage({
type: "generate",
data: { audio, language },
});
};
fileReader.readAsArrayBuffer(blob);
} else {
recorderRef.current?.requestData();
}
}, [status, recording, isProcessing, chunks, language]);
return IS_WEBGPU_AVAILABLE ? (
<div className="flex flex-col h-screen mx-auto justify-end text-gray-800 dark:text-gray-200 bg-white dark:bg-gray-900">
{
<div className="h-full overflow-auto scrollbar-thin flex justify-center items-center flex-col relative">
<div className="flex flex-col items-center mb-1 max-w-[400px] text-center">
<img
src="logo.png"
width="50%"
height="auto"
className="block"
></img>
<h1 className="text-4xl font-bold mb-1">Whisper WebGPU</h1>
<h2 className="text-xl font-semibold">
Real-time in-browser speech recognition
</h2>
</div>
<div className="flex flex-col items-center px-4">
{status === null && (
<>
<p className="max-w-[480px] mb-4">
<br />
You are about to load{" "}
<a
href="https://huggingface.co/onnx-community/whisper-base"
target="_blank"
rel="noreferrer"
className="font-medium underline"
>
whisper-base
</a>
, a 73 million parameter speech recognition model that is
optimized for inference on the web. Once downloaded, the model
(~200 MB) will be cached and reused when you revisit the
page.
<br />
<br />
Everything runs directly in your browser using{" "}
<a
href="https://huggingface.co/docs/transformers.js"
target="_blank"
rel="noreferrer"
className="underline"
>
🤗 Transformers.js
</a>{" "}
and ONNX Runtime Web, meaning no data is sent to a server. You
can even disconnect from the internet after the model has
loaded!
</p>
<button
className="border px-4 py-2 rounded-lg bg-blue-400 text-white hover:bg-blue-500 disabled:bg-blue-100 disabled:cursor-not-allowed select-none"
onClick={() => {
worker.current.postMessage({ type: "load" });
setStatus("loading");
}}
disabled={status !== null}
>
Load model
</button>
</>
)}
<div className="w-[500px] p-2">
<AudioVisualizer className="w-full rounded-lg" stream={stream} />
{status === "ready" && (
<div className="relative">
<p className="w-full h-[80px] overflow-y-auto overflow-wrap-anywhere border rounded-lg p-2">
{text}
</p>
{tps && (
<span className="absolute bottom-0 right-0 px-1">
{tps.toFixed(2)} tok/s
</span>
)}
</div>
)}
</div>
{status === "ready" && (
<div className="relative w-full flex justify-center">
<LanguageSelector
language={language}
setLanguage={(e) => {
recorderRef.current?.stop();
setLanguage(e);
recorderRef.current?.start();
}}
/>
<button
className="border rounded-lg px-2 absolute right-2"
onClick={() => {
recorderRef.current?.stop();
recorderRef.current?.start();
}}
>
Reset
</button>
</div>
)}
{status === "loading" && (
<div className="w-full max-w-[500px] text-left mx-auto p-4">
<p className="text-center">{loadingMessage}</p>
{progressItems.map(({ file, progress, total }, i) => (
<Progress
key={i}
text={file}
percentage={progress}
total={total}
/>
))}
</div>
)}
</div>
</div>
}
</div>
) : (
<div className="fixed w-screen h-screen bg-black z-10 bg-opacity-[92%] text-white text-2xl font-semibold flex justify-center items-center text-center">
WebGPU is not supported
<br />
by this browser :(
</div>
);
}
export default App;
|