Spaces:
Running
Running
<html lang="en"> | |
<head> | |
<meta charset="UTF-8" /> | |
<title>Gemini Live Chat - Voice and Text Interaction</title> | |
<style> | |
body { | |
max-width: 800px; | |
margin: 2em auto; | |
padding: 0 1em; | |
font-family: system-ui, -apple-system, sans-serif; | |
} | |
#visualizer { | |
width: 100%; | |
height: 80px; | |
background: #f0f0f0; | |
border-radius: 4px; | |
margin: 0; | |
} | |
#log { | |
background: #f0f0f0; | |
padding: 1em; | |
border-radius: 4px; | |
font-family: monospace; | |
max-height: 400px; | |
overflow-y: auto; | |
} | |
.controls { | |
margin: 1em 0; | |
padding: 1em; | |
background: #f8f8f8; | |
border-radius: 4px; | |
} | |
.function-card { | |
padding: 0.8em; | |
background: white; | |
border-radius: 4px; | |
box-shadow: 0 1px 3px rgba(0,0,0,0.1); | |
} | |
.function-card strong { | |
color: #1976d2; | |
} | |
.function-card ul { | |
color: #555; | |
} | |
button { | |
border: none; | |
padding: 0.5em 1em; | |
border-radius: 3px; | |
cursor: pointer; | |
transition: opacity 0.2s; | |
} | |
button:hover { | |
opacity: 0.9; | |
} | |
#connectButton { | |
background: #2196f3; | |
color: white; | |
} | |
.voice-start { | |
background: #4caf50; | |
color: white; | |
} | |
.voice-stop { | |
background: #f44336; | |
color: white; | |
} | |
</style> | |
</head> | |
<body> | |
<h1>Gemini Live Chat</h1> | |
<p>Interactive voice and text chat powered by Gemini AI that supports server-side function calling, code execution, and Google search capabilities.</p> | |
<p style="font-size: 0.9em; color: #666;">For client-side function calling, visit: <a href="https://huggingface.co/spaces/Nirav121/gemini-live-ffc" target="_blank">Gemini Live Client Function Call</a></p> | |
<div class="controls" style="background: #e3f2fd;"> | |
<h3 style="margin-top: 0;">Available Functions:</h3> | |
<div style="display: grid; grid-template-columns: repeat(auto-fit, minmax(200px, 1fr)); gap: 1em;"> | |
<div class="function-card"> | |
<strong>💡 Light Control</strong> | |
<ul style="margin: 0.5em 0; padding-left: 1.5em;"> | |
<li>Turn lights on</li> | |
<li>Turn lights off</li> | |
</ul> | |
</div> | |
<div class="function-card"> | |
<strong>🔍 Search</strong> | |
<ul style="margin: 0.5em 0; padding-left: 1.5em;"> | |
<li>Google search</li> | |
</ul> | |
</div> | |
<div class="function-card"> | |
<strong>💻 Code</strong> | |
<ul style="margin: 0.5em 0; padding-left: 1.5em;"> | |
<li>Execute code</li> | |
<li>Run commands</li> | |
</ul> | |
</div> | |
</div> | |
<p style="margin: 0.5em 0 0 0; font-size: 0.9em; color: #666;"> | |
Try saying: "Turn on the lights" or "Search for weather in London" or ask any question! | |
</p> | |
</div> | |
<div class="controls"> | |
<div style="display: flex; align-items: center; justify-content: space-between; gap: 1em;"> | |
<div style="display: flex; align-items: center; gap: 1em;"> | |
<div> | |
<span style="font-weight: 500; color: #666;">Server:</span> | |
<span id="connectionStatus" style="display: inline-block; padding: 0.3em 0.6em; margin-left: 0.5em; border-radius: 3px; background: #f44336; color: white;">Not connected</span> | |
</div> | |
<div id="micStatus" style="display: none;"> | |
<span style="font-weight: 500; color: #666;">Voice:</span> | |
<span style="display: inline-block; padding: 0.3em 0.6em; margin-left: 0.5em; border-radius: 3px; background: #4caf50; color: white;">Recording</span> | |
</div> | |
</div> | |
<div style="display: flex; gap: 0.5em;"> | |
<button id="connectButton" onclick="toggleConnection()"> | |
<span style="margin-right: 0.3em;">🔌</span> Connect to Server | |
</button> | |
<button id="voiceStartButton" class="voice-start" onclick="startCapture()" style="display: none;"> | |
<span style="margin-right: 0.3em;">🎤</span> Start Voice Chat | |
</button> | |
<button id="voiceStopButton" class="voice-stop" onclick="stopCapture()" style="display: none;"> | |
<span style="margin-right: 0.3em;">⏹️</span> Stop Voice Chat | |
</button> | |
</div> | |
</div> | |
<div style="margin-top: 1em; display: flex; gap: 0.5em;"> | |
<input type="text" id="textMessage" placeholder="Type your message here" style="flex: 1; padding: 0.5em; border: 1px solid #ddd; border-radius: 3px;" | |
onkeydown="if(event.key === 'Enter') { event.preventDefault(); sendText(); }" /> | |
<button onclick="sendText()" style="white-space: nowrap;"> | |
<span style="margin-right: 0.3em;">📤</span> Send | |
</button> | |
</div> | |
</div> | |
<div class="controls"> | |
<canvas id="visualizer"></canvas> | |
</div> | |
<div style="margin: 1em 0;"> | |
<strong>Log Settings:</strong><br> | |
<label><input type="checkbox" id="logWebSocket"> WebSocket Events</label> | |
<label style="margin-left: 1em"><input type="checkbox" id="logAudio"> Audio Events</label> | |
<label style="margin-left: 1em"><input type="checkbox" id="logText"> Text Events</label> | |
<label style="margin-left: 1em"><input type="checkbox" id="logError" checked> Error Events</label> | |
</div> | |
<pre id="log"></pre> | |
<script> | |
let socket; | |
let playbackCtx = null; | |
let nextPlaybackTime = 0; | |
let audioCtx; | |
let scriptNode; | |
let micStream; | |
let isCapturing = false; | |
let audioSeq = 0; | |
let scheduledSources = []; // Track scheduled audio sources | |
let analyser; | |
let visualizerCanvas; | |
let visualizerCtx; | |
let animationFrame; | |
function updateConnectionStatus(connected) { | |
const statusEl = document.getElementById('connectionStatus'); | |
const connectButton = document.getElementById('connectButton'); | |
const voiceStartButton = document.getElementById('voiceStartButton'); | |
if (connected) { | |
statusEl.textContent = 'Connected'; | |
statusEl.style.background = '#4caf50'; | |
connectButton.textContent = '🔌 Disconnect Server'; | |
voiceStartButton.style.display = ''; | |
} else { | |
statusEl.textContent = 'Not connected'; | |
statusEl.style.background = '#f44336'; | |
connectButton.textContent = '🔌 Connect to Server'; | |
voiceStartButton.style.display = 'none'; | |
// Also stop recording if we're disconnected | |
if (isCapturing) { | |
stopCapture(); | |
} | |
} | |
} | |
function updateMicStatus(recording) { | |
const micStatus = document.getElementById('micStatus'); | |
const voiceStartButton = document.getElementById('voiceStartButton'); | |
const voiceStopButton = document.getElementById('voiceStopButton'); | |
if (recording) { | |
micStatus.style.display = ''; | |
voiceStartButton.style.display = 'none'; | |
voiceStopButton.style.display = ''; | |
} else { | |
micStatus.style.display = 'none'; | |
voiceStartButton.style.display = ''; | |
voiceStopButton.style.display = 'none'; | |
} | |
} | |
function toggleConnection() { | |
if (socket && socket.readyState === WebSocket.OPEN) { | |
socket.close(); | |
} else { | |
connectWebSocket(); | |
} | |
} | |
function logMessage(category, ...args) { | |
const pre = document.getElementById("log"); | |
const logCategory = document.getElementById(`log${category.charAt(0).toUpperCase() + category.slice(1)}`); | |
const shouldLog = logCategory ? logCategory.checked : false; | |
if (shouldLog) { | |
const timestamp = new Date().toLocaleTimeString(); | |
pre.textContent += `[${timestamp}] [${category}] ` + args.join(" ") + "\n"; | |
console.log(`[${category}]`, ...args); | |
} | |
} | |
function clearScheduledAudio() { | |
// Stop and disconnect all scheduled audio sources | |
while (scheduledSources.length > 0) { | |
const source = scheduledSources.pop(); | |
try { | |
source.stop(); | |
source.disconnect(); | |
} catch (err) { | |
// Ignore errors if source already finished playing | |
} | |
} | |
// Reset next playback time | |
if (playbackCtx) { | |
nextPlaybackTime = playbackCtx.currentTime; | |
} | |
logMessage("Audio", "Cleared all scheduled audio"); | |
} | |
function setupVisualizer() { | |
visualizerCanvas = document.getElementById('visualizer'); | |
visualizerCtx = visualizerCanvas.getContext('2d'); | |
// Make canvas resolution match display size | |
const rect = visualizerCanvas.getBoundingClientRect(); | |
visualizerCanvas.width = rect.width; | |
visualizerCanvas.height = rect.height; | |
if (!analyser && playbackCtx) { | |
analyser = playbackCtx.createAnalyser(); | |
analyser.fftSize = 256; // Reduced for wider bars | |
analyser.minDecibels = -90; | |
analyser.maxDecibels = -10; | |
analyser.smoothingTimeConstant = 0.85; | |
} | |
} | |
function drawVisualizer() { | |
if (!analyser) return; | |
const bufferLength = analyser.frequencyBinCount; | |
const dataArray = new Uint8Array(bufferLength); | |
analyser.getByteFrequencyData(dataArray); | |
visualizerCtx.fillStyle = '#f0f0f0'; | |
visualizerCtx.fillRect(0, 0, visualizerCanvas.width, visualizerCanvas.height); | |
const barWidth = (visualizerCanvas.width / bufferLength) * 2.5; | |
const centerY = visualizerCanvas.height / 2; | |
let x = 0; | |
for (let i = 0; i < bufferLength; i++) { | |
const barHeight = (dataArray[i] / 255) * (visualizerCanvas.height / 2); // Half height for centering | |
// Create gradient for top half (going up) | |
const gradientTop = visualizerCtx.createLinearGradient(0, centerY, 0, centerY - barHeight); | |
gradientTop.addColorStop(0, '#4caf50'); // Green at center | |
gradientTop.addColorStop(1, '#81c784'); // Lighter green at top | |
// Create gradient for bottom half (going down) | |
const gradientBottom = visualizerCtx.createLinearGradient(0, centerY, 0, centerY + barHeight); | |
gradientBottom.addColorStop(0, '#4caf50'); // Green at center | |
gradientBottom.addColorStop(1, '#81c784'); // Lighter green at bottom | |
// Draw top half of the bar | |
visualizerCtx.fillStyle = gradientTop; | |
visualizerCtx.fillRect(x, centerY - barHeight, barWidth, barHeight); | |
// Draw bottom half of the bar | |
visualizerCtx.fillStyle = gradientBottom; | |
visualizerCtx.fillRect(x, centerY, barWidth, barHeight); | |
x += barWidth + 1; // Add 1 pixel gap between bars | |
} | |
animationFrame = requestAnimationFrame(drawVisualizer); | |
} | |
function stopVisualizer() { | |
if (animationFrame) { | |
cancelAnimationFrame(animationFrame); | |
animationFrame = null; | |
} | |
if (visualizerCtx) { | |
visualizerCtx.fillStyle = '#f0f0f0'; | |
visualizerCtx.fillRect(0, 0, visualizerCanvas.width, visualizerCanvas.height); | |
} | |
} | |
function connectWebSocket() { | |
logMessage("WebSocket", "Connecting..."); | |
updateConnectionStatus(false); | |
// Use current origin and replace http(s) with ws(s) | |
const wsUrl = `${window.location.protocol === 'https:' ? 'wss:' : 'ws:'}//${window.location.host}/ws`; | |
socket = new WebSocket(wsUrl); | |
socket.onopen = () => { | |
logMessage("WebSocket", "Opened connection"); | |
updateConnectionStatus(true); | |
if (!playbackCtx) { | |
playbackCtx = new (window.AudioContext || window.webkitAudioContext)(); | |
setupVisualizer(); | |
} | |
nextPlaybackTime = playbackCtx.currentTime; | |
}; | |
socket.onerror = (err) => { | |
logMessage("Error", "WebSocket error:", err); | |
updateConnectionStatus(false); | |
}; | |
socket.onclose = () => { | |
logMessage("WebSocket", "Connection closed"); | |
updateConnectionStatus(false); | |
if (isCapturing) { | |
stopCapture(); | |
} | |
}; | |
socket.onmessage = (event) => { | |
try { | |
const data = JSON.parse(event.data); | |
if (data.type === "audio" && data.payload) { | |
const arrayBuffer = base64ToArrayBuffer(data.payload); | |
const int16View = new Int16Array(arrayBuffer); | |
const float32Buffer = new Float32Array(int16View.length); | |
for (let i = 0; i < int16View.length; i++) { | |
float32Buffer[i] = int16View[i] / 32768; | |
} | |
const sampleRate = 24000; // RECEIVED_SAMPLE_RATE from app.py | |
const audioBuffer = playbackCtx.createBuffer(1, float32Buffer.length, sampleRate); | |
audioBuffer.copyToChannel(float32Buffer, 0); | |
let scheduledTime = playbackCtx.currentTime > nextPlaybackTime ? playbackCtx.currentTime : nextPlaybackTime; | |
const source = playbackCtx.createBufferSource(); | |
source.buffer = audioBuffer; | |
// Connect through analyser for visualization | |
if (analyser) { | |
source.connect(analyser); | |
analyser.connect(playbackCtx.destination); | |
if (!animationFrame) { | |
drawVisualizer(); | |
} | |
} else { | |
source.connect(playbackCtx.destination); | |
} | |
source.start(scheduledTime); | |
// Add source to tracked sources | |
scheduledSources.push(source); | |
// Remove source from tracking once it finishes | |
source.onended = () => { | |
const index = scheduledSources.indexOf(source); | |
if (index > -1) { | |
scheduledSources.splice(index, 1); | |
} | |
// Stop visualizer if no more audio | |
if (scheduledSources.length === 0) { | |
stopVisualizer(); | |
} | |
}; | |
nextPlaybackTime = scheduledTime + audioBuffer.duration; | |
logMessage("Audio", "Scheduled playback. Start time:", scheduledTime, "Duration:", audioBuffer.duration); | |
} else if (data.type === "text" && data.content) { | |
logMessage("Text", "Received:", data.content); | |
} else { | |
logMessage("WebSocket", "Received message:", event.data); | |
} | |
} catch (err) { | |
logMessage("Error", "Failed to process message:", err); | |
} | |
}; | |
} | |
async function startCapture() { | |
if (!socket || socket.readyState !== WebSocket.OPEN) { | |
logMessage("WebSocket", "Not connected. Click 'Connect to Server' first."); | |
return; | |
} | |
if (isCapturing) { | |
logMessage("Audio", "Already capturing!"); | |
return; | |
} | |
isCapturing = true; | |
updateMicStatus(true); | |
logMessage("Audio", "Starting microphone capture..."); | |
try { | |
micStream = await navigator.mediaDevices.getUserMedia({ audio: true }); | |
logMessage("Audio", "Got microphone access"); | |
audioCtx = new (window.AudioContext || window.webkitAudioContext)(); | |
logMessage("Audio", "Created AudioContext with sample rate:", audioCtx.sampleRate); | |
// Create a media source from the mic stream | |
const source = audioCtx.createMediaStreamSource(micStream); | |
logMessage("Audio", "Created MediaStreamSource"); | |
// Create a ScriptProcessorNode | |
const bufferSize = 4096; // You can adjust this | |
const inputChannels = 1; | |
const outputChannels = 1; | |
scriptNode = audioCtx.createScriptProcessor(bufferSize, inputChannels, outputChannels); | |
logMessage("Audio", "Created ScriptProcessorNode with buffer size:", bufferSize); | |
scriptNode.onaudioprocess = (audioEvent) => { | |
if (!isCapturing) return; | |
// Get raw samples and resample to 16kHz | |
const inputBuffer = audioEvent.inputBuffer.getChannelData(0); | |
// Check if there's actual audio input (not just silence) | |
const hasAudio = inputBuffer.some(sample => Math.abs(sample) > 0.01); // Threshold for noise | |
if (hasAudio) { | |
clearScheduledAudio(); // Only clear when we detect actual audio input | |
} | |
const resampled = resampleAudio(inputBuffer, audioCtx.sampleRate, 16000); | |
// Convert resampled audio to 16-bit PCM | |
const pcm16 = floatTo16BitPCM(resampled); | |
// Encode as base64 and send over WebSocket | |
const bytes = new Uint8Array(pcm16.buffer); | |
const b64 = btoa(String.fromCharCode(...bytes)); | |
const audioMsg = { | |
type: "audio", | |
payload: b64, | |
seq: audioSeq++, | |
config: { | |
sampleRate: 16000, | |
bitDepth: 16, | |
channels: 1 | |
} | |
}; | |
logMessage("Audio", "Processing chunk. Seq:", audioMsg.seq); | |
try { | |
if (socket.readyState === WebSocket.OPEN) { | |
socket.send(JSON.stringify(audioMsg)); | |
} else { | |
logMessage("WebSocket", "Not open, stopping capture"); | |
stopCapture(); | |
} | |
} catch (err) { | |
logMessage("Error", "Failed to send audio:", err); | |
stopCapture(); | |
} | |
}; | |
// Connect the pipeline: mic -> script -> (optional) audioCtx.destination | |
source.connect(scriptNode); | |
logMessage("Audio", "Connected audio pipeline"); | |
logMessage("Audio", "Recording..."); | |
} catch (err) { | |
logMessage("Error", "Failed to get microphone access:", err); | |
isCapturing = false; | |
} | |
} | |
function stopCapture() { | |
if (!isCapturing) return; | |
isCapturing = false; | |
updateMicStatus(false); | |
logMessage("Audio", "Stopped microphone capture"); | |
if (scriptNode) { | |
scriptNode.disconnect(); | |
scriptNode.onaudioprocess = null; | |
scriptNode = null; | |
} | |
if (micStream) { | |
// Stop all tracks | |
micStream.getTracks().forEach(track => track.stop()); | |
micStream = null; | |
} | |
if (audioCtx) { | |
audioCtx.close(); | |
audioCtx = null; | |
} | |
} | |
function floatTo16BitPCM(floatSamples) { | |
// Convert an array of floats [-1, 1] to a Int16Array | |
const out = new Int16Array(floatSamples.length); | |
for (let i = 0; i < floatSamples.length; i++) { | |
let s = Math.max(-1, Math.min(1, floatSamples[i])); | |
// scale range | |
s = s < 0 ? s * 0x8000 : s * 0x7FFF; | |
out[i] = s; | |
} | |
return out; | |
} | |
function resampleAudio(inputBuffer, fromRate, toRate) { | |
const ratio = toRate / fromRate; | |
const newLength = Math.round(inputBuffer.length * ratio); | |
const resampled = new Float32Array(newLength); | |
for(let i = 0; i < newLength; i++) { | |
const index = Math.round(i / ratio); | |
resampled[i] = inputBuffer[Math.min(index, inputBuffer.length-1)]; | |
} | |
return resampled; | |
} | |
function sendText() { | |
const textInput = document.getElementById("textMessage"); | |
const text = textInput.value.trim(); | |
if (text && socket && socket.readyState === WebSocket.OPEN) { | |
// Clear any scheduled audio before sending text | |
clearScheduledAudio(); | |
socket.send(JSON.stringify({ type: "text", content: text })); | |
logMessage("Text", "Sent:", text); | |
textInput.value = ""; | |
} else { | |
logMessage("WebSocket", "Not connected or text is empty"); | |
} | |
} | |
function base64ToArrayBuffer(b64) { | |
const binaryString = window.atob(b64); | |
const len = binaryString.length; | |
const bytes = new Uint8Array(len); | |
for (let i = 0; i < len; i++) { | |
bytes[i] = binaryString.charCodeAt(i); | |
} | |
return bytes.buffer; | |
} | |
</script> | |
</body> | |
</html> | |