import React, { useRef, useState } from "react"; import { Mic, MicOff, Loader2 } from "lucide-react"; import { Button } from "@/components/ui/button"; import { useToast } from "@/hooks/use-toast"; import { useAppDispatch, useAppSelector } from "@/redux/hooks"; import { addMessage } from "@/redux/slices/chatSlice"; import { setIsListening } from "@/redux/slices/chatSlice"; import { setConnected, setThinking, setTranscribing, } from "@/redux/slices/sessionSlice"; interface MessageMetadata { session_id: string; sequence_id: string; transcript: string; } const VoiceChat = () => { // WebSocket and audio state const [socketConnected, setSocketConnected] = useState(false); const [audioStream, setAudioStream] = useState(null); const [audioContext, setAudioContext] = useState(null); const [audioContextState, setAudioContextState] = useState(false); const [elapsedTime, setElapsedTime] = useState(0); const [latency, setLatency] = useState(0); const [generating, setGenerating] = useState(false); const [isPlaying, setIsPlaying] = useState(false); const { sessionId, transcribing, thinking, connected } = useAppSelector( (state) => state.session ); // App state management with Redux const { toast } = useToast(); const dispatch = useAppDispatch(); const { activeChat } = useAppSelector((state) => state.chats); const isListening = useAppSelector((state) => state.chat.isListening); const { temperature, activeVoice, maxTokens, threshold, silenceDuration } = useAppSelector((state) => state.settings); // Refs for audio processing const socketRef = useRef(null); const sourceRef = useRef(null); const audioContextRef = useRef(null); const playingRef = useRef(false); const lastShiftedRef = useRef(null); const bufferQueueRef = useRef([]); const isGrpcRef = useRef(null); const formatTime = (milliseconds: number): string => { const totalSeconds = Math.floor(milliseconds / 1000); const hours = Math.floor(totalSeconds / 3600); const minutes = Math.floor((totalSeconds % 3600) / 60); const seconds = totalSeconds % 60; return `${String(hours).padStart(2, "0")}:${String(minutes).padStart( 2, "0" )}:${String(seconds).padStart(2, "0")}`; }; const floatTo16BitPCM = (input: Float32Array): ArrayBuffer => { const output = new Int16Array(input.length); for (let i = 0; i < input.length; i++) { const sample = Math.max(-1, Math.min(1, input[i])); output[i] = sample < 0 ? sample * 0x8000 : sample * 0x7fff; } return output.buffer; }; const handlePlay = async (timestamp: number): Promise => { try { if (!isGrpcRef.current) { return; } playingRef.current = true; setIsPlaying(true); const base64Data = bufferQueueRef.current.shift(); if (!base64Data) { playingRef.current = false; setIsPlaying(false); return; } lastShiftedRef.current = base64Data; const bytes = new Uint8Array( atob(base64Data) .split("") .map((char) => char.charCodeAt(0)) ); let arrayBuffer = bytes.buffer; const metadataEndIndex = bytes.indexOf(0); const metadataStr = new TextDecoder().decode( bytes.slice(0, metadataEndIndex) ); const metadata = JSON.parse(metadataStr) as MessageMetadata; const { session_id, sequence_id, transcript } = metadata; if (sequence_id !== "-2") { if (socketRef.current && (socketRef.current as any).interval) { clearInterval((socketRef.current as any).interval); (socketRef.current as any).interval = null; setLatency((prev) => prev + 150); dispatch(setThinking(false)); } arrayBuffer = arrayBuffer.slice(metadataEndIndex + 1); try { if (audioContextRef.current?.state === "suspended") { await audioContextRef.current.resume(); } if (!audioContextRef.current) { return; } const audioBuffer = await audioContextRef.current.decodeAudioData( arrayBuffer ); if (sourceRef.current) { sourceRef.current.disconnect(); } const source = audioContextRef.current.createBufferSource(); source.buffer = audioBuffer; source.connect(audioContextRef.current.destination); source.start(0); sourceRef.current = source; (sourceRef.current as any).session_id = session_id; (sourceRef.current as any).sequence_id = sequence_id; (sourceRef.current as any).transcript = transcript; sourceRef.current.onended = () => { lastShiftedRef.current = null; if ( socketRef.current?.readyState === WebSocket.OPEN && (sourceRef.current as any)?.sequence_id ) { socketRef.current.send( JSON.stringify({ type: "status", msg: { session_id: (sourceRef.current as any)?.session_id, sequence_id: (sourceRef.current as any)?.sequence_id, transcript: (sourceRef.current as any)?.transcript, }, }) ); } if (bufferQueueRef.current.length > 0) { playingRef.current = true; setIsPlaying(true); const currentTimestamp = Date.now(); handlePlay(currentTimestamp); } else { playingRef.current = false; setIsPlaying(false); } }; } catch (error) { console.error("Error decoding audio data:", error); } } else { const startTime = Date.now(); const interval = setInterval(() => { setLatency(Date.now() - startTime); }, 10); (socketRef.current as any).interval = interval; if (bufferQueueRef.current.length > 0) { playingRef.current = true; setIsPlaying(true); const currentTimestamp = Date.now(); handlePlay(currentTimestamp); } else { playingRef.current = false; setIsPlaying(false); } } } catch (error) { console.error("Error in handlePlay: ", error); } }; const connectToRealtimeTTS = async (): Promise => { return new Promise((resolve, reject) => { try { const newAudioContext = new (window.AudioContext || (window as any).webkitAudioContext)(); audioContextRef.current = newAudioContext; setAudioContextState(true); let websocketURL = import.meta.env.VITE_WEBSOCKET_URL; websocketURL = import.meta.env.VITE_WEBSOCKET_URL.includes("localhost") ? `ws://${import.meta.env.VITE_WEBSOCKET_URL}/v2v` : `wss://${import.meta.env.VITE_WEBSOCKET_URL}/v2v`; const ws = new WebSocket(websocketURL); ws.onopen = () => { // Initial connection established }; ws.onmessage = async (event) => { try { const data = JSON.parse(event.data); const { type, msg } = data; switch (type) { case "initial": socketRef.current = ws; setSocketConnected(true); resolve(); break; case "media": const timestamp = Date.now(); bufferQueueRef.current.push(msg); if (!playingRef.current && bufferQueueRef.current.length > 0) { handlePlay(timestamp); } break; case "info": toast({ variant: "destructive", title: "Error", description: msg, }); break; case "thinking": dispatch(setThinking(true)); break; case "transcribing": dispatch(setTranscribing(true)); break; case "stop_transcribing": dispatch(setTranscribing(false)); break; case "connected": dispatch(setConnected(true)); break; case "ready": isGrpcRef.current = true; startAudioStream(); break; case "pause": if (sourceRef.current) { sourceRef.current.onended = null; sourceRef.current.stop(); sourceRef.current.disconnect(); sourceRef.current = null; } playingRef.current = false; setGenerating(true); setIsPlaying(false); break; case "continue": if (lastShiftedRef.current) { bufferQueueRef.current.unshift(lastShiftedRef.current); lastShiftedRef.current = null; } setGenerating(false); const currentTimestamp = Date.now(); handlePlay(currentTimestamp); if ((socketRef.current as any).interval) { clearInterval((socketRef.current as any).interval); (socketRef.current as any).interval = null; } break; case "clear": bufferQueueRef.current = []; playingRef.current = false; setGenerating(false); setIsPlaying(false); if (sourceRef.current) { sourceRef.current.onended = null; sourceRef.current.stop(); sourceRef.current.disconnect(); sourceRef.current = null; } break; case "end": try { if (audioContext) { audioContext .close() .then(() => { setAudioContextState(false); if (isListening) { toast({ variant: "destructive", title: "Connection closed", description: "Please restart the conversation.", }); } }) .catch(() => { if (isListening) { toast({ variant: "destructive", title: "Error", description: "Please restart the conversation.", }); } }); } stopAudioStream(); dispatch(setIsListening(false)); } catch (error) { console.error("Error in closing audioContext."); } break; case "chat": if (msg && activeChat) { if (msg.role && msg.content) { const messageType = msg.role.toLowerCase() === "user" ? "user" : "assistant"; dispatch( addMessage({ role: messageType, content: msg.content, }) ); } // dispatch(updateChatTimestamp(activeChat)); } break; case "chathistory": console.info("Chathistory"); break; default: break; } } catch (error) { console.error("Error in websocket message handling:", error); } }; ws.onclose = async () => { try { if (audioStream) { audioStream.getTracks().forEach((track) => track.stop()); setAudioStream(null); } setElapsedTime(0); } catch (err) { console.error("Error in closing audio stream:", err); } }; ws.onerror = (err) => { console.error("WebSocket Error:", err); reject(err); }; } catch (err) { console.error("Error in making WebSocket connection:", err); reject(err); } }); }; const startAudioStream = async (): Promise => { try { const startTime = Date.now(); setLatency(0); const interval = setInterval(() => { setElapsedTime(Date.now() - startTime); }, 1000); dispatch(setThinking(false)); if (!socketRef.current) { toast({ variant: "destructive", title: "Connection Error", description: "Please try again. Socket not connected.", }); return; } audioContextRef.current = new (window.AudioContext || (window as any).webkitAudioContext)(); const stream = await navigator.mediaDevices.getUserMedia({ audio: true }); setAudioStream(stream); const newAudioContext = new AudioContext({ sampleRate: 16000, }); setAudioContext(newAudioContext); const audioInput = newAudioContext.createMediaStreamSource(stream); const bufferSize = 512; const scriptProcessorNode = newAudioContext.createScriptProcessor( bufferSize, 1, 1 ); scriptProcessorNode.onaudioprocess = async (e) => { const inputData = e.inputBuffer.getChannelData(0); // const l16Data = inputData; const l16Data = floatTo16BitPCM(inputData); try { if (!isGrpcRef.current) { try { if (audioContextState && newAudioContext) { newAudioContext .close() .then(() => { setAudioContextState(false); toast({ variant: "destructive", title: "Connection Error", description: "Please restart the conversation.", }); }) .catch(() => { toast({ variant: "destructive", title: "Connection Error", description: "Please restart the conversation.", }); }); await stopAudioStream(); dispatch(setIsListening(false)); } } catch (error) { console.error("Error in closing audioContext:", error); } } if ( isGrpcRef.current && socketRef.current && socketRef.current.readyState === WebSocket.OPEN ) { socketRef.current.send(l16Data); } } catch (err) { console.error("Error in sending buffer:", err); } }; audioInput.connect(scriptProcessorNode); scriptProcessorNode.connect(newAudioContext.destination); } catch (error) { console.error("Error accessing microphone:", error); toast({ variant: "destructive", title: "Microphone Error", description: "Could not access your microphone. Please check your permissions.", }); } }; const stopAudioStream = async (): Promise => { setGenerating(false); setLatency(0); dispatch(setThinking(false)); dispatch(setConnected(false)); dispatch(setIsListening(false)); setElapsedTime(0); bufferQueueRef.current = []; if (socketRef.current && socketRef.current.readyState === WebSocket.OPEN) { if ((socketRef.current as any).interval) { clearInterval((socketRef.current as any).interval); (socketRef.current as any).interval = null; } if (audioStream) { try { audioStream.getTracks().forEach((track) => track.stop()); setAudioStream(null); } catch (err) { console.error("Error stopping audio stream:", err); } } try { isGrpcRef.current = false; socketRef.current.send(JSON.stringify({ type: "stop", msg: "stop" })); socketRef.current.close(); socketRef.current = null; } catch (err) { console.error("Error closing WebSocket:", err); } try { if (audioContext) { await audioContext.close(); setAudioContext(null); } } catch (err) { console.error("Error closing AudioContext:", err); } } }; const handleVoiceChatToggle = async (): Promise => { if (!activeChat) { toast({ variant: "destructive", title: "Error", description: "Please select a chat or create one", }); return; } if (!isListening) { setLatency(0); dispatch(setIsListening(true)); try { await connectToRealtimeTTS(); if (socketRef.current) { socketRef.current.send( JSON.stringify({ type: "start", msg: JSON.stringify({ temperature: temperature, silenceDuration: silenceDuration, activeVoice: activeVoice, threshold: threshold, sessionId: activeChat, maxTokens: maxTokens, }), }) ); } } catch (error) { console.error("Failed to start voice chat:", error); dispatch(setIsListening(false)); toast({ variant: "destructive", title: "Connection Error", description: "Failed to start voice chat. Please try again.", }); } } else { socketRef.current.close(); await stopAudioStream(); socketRef.current = null; } }; return (
{/*
{isListening ? `Active: ${formatTime(elapsedTime)}` : "Ready"}
*/} {isListening && (
Latency: {latency}ms
)} {/*
{isListening && }
*/} {/* */} {transcribing && (
Transcribing
)} {thinking && (
Thinking
)}

{isListening ? "Click to stop listening" : "Click to start voice chat"}

); }; export default VoiceChat;