Spaces:

Jwrockon
/

ArtemisAIWhisper

Build error

App Files Files Community

ArtemisAIWhisper / realtime-whisper-webgpu /src /App.jsx

Jwrockon

Upload 232 files (#1)

5e1b738 verified 9 months ago

raw

history blame

11 kB

	import { useEffect, useState, useRef } from "react";

	import { AudioVisualizer } from "./components/AudioVisualizer";
	import Progress from "./components/Progress";
	import { LanguageSelector } from "./components/LanguageSelector";

	const IS_WEBGPU_AVAILABLE = !!navigator.gpu;

	const WHISPER_SAMPLING_RATE = 16_000;
	const MAX_AUDIO_LENGTH = 30; // seconds
	const MAX_SAMPLES = WHISPER_SAMPLING_RATE * MAX_AUDIO_LENGTH;

	function App() {
	// Create a reference to the worker object.
	const worker = useRef(null);

	const recorderRef = useRef(null);

	// Model loading and progress
	const [status, setStatus] = useState(null);
	const [loadingMessage, setLoadingMessage] = useState("");
	const [progressItems, setProgressItems] = useState([]);

	// Inputs and outputs
	const [text, setText] = useState("");
	const [tps, setTps] = useState(null);
	const [language, setLanguage] = useState("en");

	// Processing
	const [recording, setRecording] = useState(false);
	const [isProcessing, setIsProcessing] = useState(false);
	const [chunks, setChunks] = useState([]);
	const [stream, setStream] = useState(null);
	const audioContextRef = useRef(null);

	// We use the `useEffect` hook to setup the worker as soon as the `App` component is mounted.
	useEffect(() => {
	if (!worker.current) {
	// Create the worker if it does not yet exist.
	worker.current = new Worker(new URL("./worker.js", import.meta.url), {
	type: "module",
	});
	}

	// Create a callback function for messages from the worker thread.
	const onMessageReceived = (e) => {
	switch (e.data.status) {
	case "loading":
	// Model file start load: add a new progress item to the list.
	setStatus("loading");
	setLoadingMessage(e.data.data);
	break;

	case "initiate":
	setProgressItems((prev) => [...prev, e.data]);
	break;

	case "progress":
	// Model file progress: update one of the progress items.
	setProgressItems((prev) =>
	prev.map((item) => {
	if (item.file === e.data.file) {
	return { ...item, ...e.data };
	}
	return item;
	}),
	);
	break;

	case "done":
	// Model file loaded: remove the progress item from the list.
	setProgressItems((prev) =>
	prev.filter((item) => item.file !== e.data.file),
	);
	break;

	case "ready":
	// Pipeline ready: the worker is ready to accept messages.
	setStatus("ready");
	recorderRef.current?.start();
	break;

	case "start":
	{
	// Start generation
	setIsProcessing(true);

	// Request new data from the recorder
	recorderRef.current?.requestData();
	}
	break;

	case "update":
	{
	// Generation update: update the output text.
	const { tps } = e.data;
	setTps(tps);
	}
	break;

	case "complete":
	// Generation complete: re-enable the "Generate" button
	setIsProcessing(false);
	setText(e.data.output);
	break;
	}
	};

	// Attach the callback function as an event listener.
	worker.current.addEventListener("message", onMessageReceived);

	// Define a cleanup function for when the component is unmounted.
	return () => {
	worker.current.removeEventListener("message", onMessageReceived);
	};
	}, []);

	useEffect(() => {
	if (recorderRef.current) return; // Already set

	if (navigator.mediaDevices.getUserMedia) {
	navigator.mediaDevices
	.getUserMedia({ audio: true })
	.then((stream) => {
	setStream(stream);

	recorderRef.current = new MediaRecorder(stream);
	audioContextRef.current = new AudioContext({
	sampleRate: WHISPER_SAMPLING_RATE,
	});

	recorderRef.current.onstart = () => {
	setRecording(true);
	setChunks([]);
	};
	recorderRef.current.ondataavailable = (e) => {
	if (e.data.size > 0) {
	setChunks((prev) => [...prev, e.data]);
	} else {
	// Empty chunk received, so we request new data after a short timeout
	setTimeout(() => {
	recorderRef.current.requestData();
	}, 25);
	}
	};

	recorderRef.current.onstop = () => {
	setRecording(false);
	};
	})
	.catch((err) => console.error("The following error occurred: ", err));
	} else {
	console.error("getUserMedia not supported on your browser!");
	}

	return () => {
	recorderRef.current?.stop();
	recorderRef.current = null;
	};
	}, []);

	useEffect(() => {
	if (!recorderRef.current) return;
	if (!recording) return;
	if (isProcessing) return;
	if (status !== "ready") return;

	if (chunks.length > 0) {
	// Generate from data
	const blob = new Blob(chunks, { type: recorderRef.current.mimeType });

	const fileReader = new FileReader();

	fileReader.onloadend = async () => {
	const arrayBuffer = fileReader.result;
	const decoded =
	await audioContextRef.current.decodeAudioData(arrayBuffer);
	let audio = decoded.getChannelData(0);
	if (audio.length > MAX_SAMPLES) {
	// Get last MAX_SAMPLES
	audio = audio.slice(-MAX_SAMPLES);
	}

	worker.current.postMessage({
	type: "generate",
	data: { audio, language },
	});
	};
	fileReader.readAsArrayBuffer(blob);
	} else {
	recorderRef.current?.requestData();
	}
	}, [status, recording, isProcessing, chunks, language]);

	return IS_WEBGPU_AVAILABLE ? (
	<div className="flex flex-col h-screen mx-auto justify-end text-gray-800 dark:text-gray-200 bg-white dark:bg-gray-900">
	{
	<div className="h-full overflow-auto scrollbar-thin flex justify-center items-center flex-col relative">
	<div className="flex flex-col items-center mb-1 max-w-[400px] text-center">
	<img
	src="logo.png"
	width="50%"
	height="auto"
	className="block"
	></img>
	<h1 className="text-4xl font-bold mb-1">Whisper WebGPU</h1>
	<h2 className="text-xl font-semibold">
	Real-time in-browser speech recognition
	</h2>
	</div>

	<div className="flex flex-col items-center px-4">
	{status === null && (
	<>
	<p className="max-w-[480px] mb-4">
	<br />
	You are about to load{" "}
	<a
	href="https://huggingface.co/onnx-community/whisper-base"
	target="_blank"
	rel="noreferrer"
	className="font-medium underline"
	>
	whisper-base
	</a>
	, a 73 million parameter speech recognition model that is
	optimized for inference on the web. Once downloaded, the model
	(~200 MB) will be cached and reused when you revisit the
	page.
	<br />
	<br />
	Everything runs directly in your browser using{" "}
	<a
	href="https://huggingface.co/docs/transformers.js"
	target="_blank"
	rel="noreferrer"
	className="underline"
	>
	🤗 Transformers.js
	</a>{" "}
	and ONNX Runtime Web, meaning no data is sent to a server. You
	can even disconnect from the internet after the model has
	loaded!
	</p>

	<button
	className="border px-4 py-2 rounded-lg bg-blue-400 text-white hover:bg-blue-500 disabled:bg-blue-100 disabled:cursor-not-allowed select-none"
	onClick={() => {
	worker.current.postMessage({ type: "load" });
	setStatus("loading");
	}}
	disabled={status !== null}
	>
	Load model
	</button>
	</>
	)}

	<div className="w-[500px] p-2">
	<AudioVisualizer className="w-full rounded-lg" stream={stream} />
	{status === "ready" && (
	<div className="relative">
	<p className="w-full h-[80px] overflow-y-auto overflow-wrap-anywhere border rounded-lg p-2">
	{text}
	</p>
	{tps && (
	<span className="absolute bottom-0 right-0 px-1">
	{tps.toFixed(2)} tok/s
	</span>
	)}
	</div>
	)}
	</div>
	{status === "ready" && (
	<div className="relative w-full flex justify-center">
	<LanguageSelector
	language={language}
	setLanguage={(e) => {
	recorderRef.current?.stop();
	setLanguage(e);
	recorderRef.current?.start();
	}}
	/>
	<button
	className="border rounded-lg px-2 absolute right-2"
	onClick={() => {
	recorderRef.current?.stop();
	recorderRef.current?.start();
	}}
	>
	Reset
	</button>
	</div>
	)}
	{status === "loading" && (
	<div className="w-full max-w-[500px] text-left mx-auto p-4">
	<p className="text-center">{loadingMessage}</p>
	{progressItems.map(({ file, progress, total }, i) => (
	<Progress
	key={i}
	text={file}
	percentage={progress}
	total={total}
	/>
	))}
	</div>
	)}
	</div>
	</div>
	}
	</div>
	) : (
	<div className="fixed w-screen h-screen bg-black z-10 bg-opacity-[92%] text-white text-2xl font-semibold flex justify-center items-center text-center">
	WebGPU is not supported
	<br />
	by this browser :(
	</div>
	);
	}

	export default App;