gemini-live / index.html
Nirav Madhani
FFC Reference
5cb0198
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8" />
<title>Gemini Live Chat - Voice and Text Interaction</title>
<style>
body {
max-width: 800px;
margin: 2em auto;
padding: 0 1em;
font-family: system-ui, -apple-system, sans-serif;
}
#visualizer {
width: 100%;
height: 80px;
background: #f0f0f0;
border-radius: 4px;
margin: 0;
}
#log {
background: #f0f0f0;
padding: 1em;
border-radius: 4px;
font-family: monospace;
max-height: 400px;
overflow-y: auto;
}
.controls {
margin: 1em 0;
padding: 1em;
background: #f8f8f8;
border-radius: 4px;
}
.function-card {
padding: 0.8em;
background: white;
border-radius: 4px;
box-shadow: 0 1px 3px rgba(0,0,0,0.1);
}
.function-card strong {
color: #1976d2;
}
.function-card ul {
color: #555;
}
button {
border: none;
padding: 0.5em 1em;
border-radius: 3px;
cursor: pointer;
transition: opacity 0.2s;
}
button:hover {
opacity: 0.9;
}
#connectButton {
background: #2196f3;
color: white;
}
.voice-start {
background: #4caf50;
color: white;
}
.voice-stop {
background: #f44336;
color: white;
}
</style>
</head>
<body>
<h1>Gemini Live Chat</h1>
<p>Interactive voice and text chat powered by Gemini AI that supports server-side function calling, code execution, and Google search capabilities.</p>
<p style="font-size: 0.9em; color: #666;">For client-side function calling, visit: <a href="https://huggingface.co/spaces/Nirav121/gemini-live-ffc" target="_blank">Gemini Live Client Function Call</a></p>
<div class="controls" style="background: #e3f2fd;">
<h3 style="margin-top: 0;">Available Functions:</h3>
<div style="display: grid; grid-template-columns: repeat(auto-fit, minmax(200px, 1fr)); gap: 1em;">
<div class="function-card">
<strong>💡 Light Control</strong>
<ul style="margin: 0.5em 0; padding-left: 1.5em;">
<li>Turn lights on</li>
<li>Turn lights off</li>
</ul>
</div>
<div class="function-card">
<strong>🔍 Search</strong>
<ul style="margin: 0.5em 0; padding-left: 1.5em;">
<li>Google search</li>
</ul>
</div>
<div class="function-card">
<strong>💻 Code</strong>
<ul style="margin: 0.5em 0; padding-left: 1.5em;">
<li>Execute code</li>
<li>Run commands</li>
</ul>
</div>
</div>
<p style="margin: 0.5em 0 0 0; font-size: 0.9em; color: #666;">
Try saying: "Turn on the lights" or "Search for weather in London" or ask any question!
</p>
</div>
<div class="controls">
<div style="display: flex; align-items: center; justify-content: space-between; gap: 1em;">
<div style="display: flex; align-items: center; gap: 1em;">
<div>
<span style="font-weight: 500; color: #666;">Server:</span>
<span id="connectionStatus" style="display: inline-block; padding: 0.3em 0.6em; margin-left: 0.5em; border-radius: 3px; background: #f44336; color: white;">Not connected</span>
</div>
<div id="micStatus" style="display: none;">
<span style="font-weight: 500; color: #666;">Voice:</span>
<span style="display: inline-block; padding: 0.3em 0.6em; margin-left: 0.5em; border-radius: 3px; background: #4caf50; color: white;">Recording</span>
</div>
</div>
<div style="display: flex; gap: 0.5em;">
<button id="connectButton" onclick="toggleConnection()">
<span style="margin-right: 0.3em;">🔌</span> Connect to Server
</button>
<button id="voiceStartButton" class="voice-start" onclick="startCapture()" style="display: none;">
<span style="margin-right: 0.3em;">🎤</span> Start Voice Chat
</button>
<button id="voiceStopButton" class="voice-stop" onclick="stopCapture()" style="display: none;">
<span style="margin-right: 0.3em;">⏹️</span> Stop Voice Chat
</button>
</div>
</div>
<div style="margin-top: 1em; display: flex; gap: 0.5em;">
<input type="text" id="textMessage" placeholder="Type your message here" style="flex: 1; padding: 0.5em; border: 1px solid #ddd; border-radius: 3px;"
onkeydown="if(event.key === 'Enter') { event.preventDefault(); sendText(); }" />
<button onclick="sendText()" style="white-space: nowrap;">
<span style="margin-right: 0.3em;">📤</span> Send
</button>
</div>
</div>
<div class="controls">
<canvas id="visualizer"></canvas>
</div>
<div style="margin: 1em 0;">
<strong>Log Settings:</strong><br>
<label><input type="checkbox" id="logWebSocket"> WebSocket Events</label>
<label style="margin-left: 1em"><input type="checkbox" id="logAudio"> Audio Events</label>
<label style="margin-left: 1em"><input type="checkbox" id="logText"> Text Events</label>
<label style="margin-left: 1em"><input type="checkbox" id="logError" checked> Error Events</label>
</div>
<pre id="log"></pre>
<script>
let socket;
let playbackCtx = null;
let nextPlaybackTime = 0;
let audioCtx;
let scriptNode;
let micStream;
let isCapturing = false;
let audioSeq = 0;
let scheduledSources = []; // Track scheduled audio sources
let analyser;
let visualizerCanvas;
let visualizerCtx;
let animationFrame;
function updateConnectionStatus(connected) {
const statusEl = document.getElementById('connectionStatus');
const connectButton = document.getElementById('connectButton');
const voiceStartButton = document.getElementById('voiceStartButton');
if (connected) {
statusEl.textContent = 'Connected';
statusEl.style.background = '#4caf50';
connectButton.textContent = '🔌 Disconnect Server';
voiceStartButton.style.display = '';
} else {
statusEl.textContent = 'Not connected';
statusEl.style.background = '#f44336';
connectButton.textContent = '🔌 Connect to Server';
voiceStartButton.style.display = 'none';
// Also stop recording if we're disconnected
if (isCapturing) {
stopCapture();
}
}
}
function updateMicStatus(recording) {
const micStatus = document.getElementById('micStatus');
const voiceStartButton = document.getElementById('voiceStartButton');
const voiceStopButton = document.getElementById('voiceStopButton');
if (recording) {
micStatus.style.display = '';
voiceStartButton.style.display = 'none';
voiceStopButton.style.display = '';
} else {
micStatus.style.display = 'none';
voiceStartButton.style.display = '';
voiceStopButton.style.display = 'none';
}
}
function toggleConnection() {
if (socket && socket.readyState === WebSocket.OPEN) {
socket.close();
} else {
connectWebSocket();
}
}
function logMessage(category, ...args) {
const pre = document.getElementById("log");
const logCategory = document.getElementById(`log${category.charAt(0).toUpperCase() + category.slice(1)}`);
const shouldLog = logCategory ? logCategory.checked : false;
if (shouldLog) {
const timestamp = new Date().toLocaleTimeString();
pre.textContent += `[${timestamp}] [${category}] ` + args.join(" ") + "\n";
console.log(`[${category}]`, ...args);
}
}
function clearScheduledAudio() {
// Stop and disconnect all scheduled audio sources
while (scheduledSources.length > 0) {
const source = scheduledSources.pop();
try {
source.stop();
source.disconnect();
} catch (err) {
// Ignore errors if source already finished playing
}
}
// Reset next playback time
if (playbackCtx) {
nextPlaybackTime = playbackCtx.currentTime;
}
logMessage("Audio", "Cleared all scheduled audio");
}
function setupVisualizer() {
visualizerCanvas = document.getElementById('visualizer');
visualizerCtx = visualizerCanvas.getContext('2d');
// Make canvas resolution match display size
const rect = visualizerCanvas.getBoundingClientRect();
visualizerCanvas.width = rect.width;
visualizerCanvas.height = rect.height;
if (!analyser && playbackCtx) {
analyser = playbackCtx.createAnalyser();
analyser.fftSize = 256; // Reduced for wider bars
analyser.minDecibels = -90;
analyser.maxDecibels = -10;
analyser.smoothingTimeConstant = 0.85;
}
}
function drawVisualizer() {
if (!analyser) return;
const bufferLength = analyser.frequencyBinCount;
const dataArray = new Uint8Array(bufferLength);
analyser.getByteFrequencyData(dataArray);
visualizerCtx.fillStyle = '#f0f0f0';
visualizerCtx.fillRect(0, 0, visualizerCanvas.width, visualizerCanvas.height);
const barWidth = (visualizerCanvas.width / bufferLength) * 2.5;
const centerY = visualizerCanvas.height / 2;
let x = 0;
for (let i = 0; i < bufferLength; i++) {
const barHeight = (dataArray[i] / 255) * (visualizerCanvas.height / 2); // Half height for centering
// Create gradient for top half (going up)
const gradientTop = visualizerCtx.createLinearGradient(0, centerY, 0, centerY - barHeight);
gradientTop.addColorStop(0, '#4caf50'); // Green at center
gradientTop.addColorStop(1, '#81c784'); // Lighter green at top
// Create gradient for bottom half (going down)
const gradientBottom = visualizerCtx.createLinearGradient(0, centerY, 0, centerY + barHeight);
gradientBottom.addColorStop(0, '#4caf50'); // Green at center
gradientBottom.addColorStop(1, '#81c784'); // Lighter green at bottom
// Draw top half of the bar
visualizerCtx.fillStyle = gradientTop;
visualizerCtx.fillRect(x, centerY - barHeight, barWidth, barHeight);
// Draw bottom half of the bar
visualizerCtx.fillStyle = gradientBottom;
visualizerCtx.fillRect(x, centerY, barWidth, barHeight);
x += barWidth + 1; // Add 1 pixel gap between bars
}
animationFrame = requestAnimationFrame(drawVisualizer);
}
function stopVisualizer() {
if (animationFrame) {
cancelAnimationFrame(animationFrame);
animationFrame = null;
}
if (visualizerCtx) {
visualizerCtx.fillStyle = '#f0f0f0';
visualizerCtx.fillRect(0, 0, visualizerCanvas.width, visualizerCanvas.height);
}
}
function connectWebSocket() {
logMessage("WebSocket", "Connecting...");
updateConnectionStatus(false);
// Use current origin and replace http(s) with ws(s)
const wsUrl = `${window.location.protocol === 'https:' ? 'wss:' : 'ws:'}//${window.location.host}/ws`;
socket = new WebSocket(wsUrl);
socket.onopen = () => {
logMessage("WebSocket", "Opened connection");
updateConnectionStatus(true);
if (!playbackCtx) {
playbackCtx = new (window.AudioContext || window.webkitAudioContext)();
setupVisualizer();
}
nextPlaybackTime = playbackCtx.currentTime;
};
socket.onerror = (err) => {
logMessage("Error", "WebSocket error:", err);
updateConnectionStatus(false);
};
socket.onclose = () => {
logMessage("WebSocket", "Connection closed");
updateConnectionStatus(false);
if (isCapturing) {
stopCapture();
}
};
socket.onmessage = (event) => {
try {
const data = JSON.parse(event.data);
if (data.type === "audio" && data.payload) {
const arrayBuffer = base64ToArrayBuffer(data.payload);
const int16View = new Int16Array(arrayBuffer);
const float32Buffer = new Float32Array(int16View.length);
for (let i = 0; i < int16View.length; i++) {
float32Buffer[i] = int16View[i] / 32768;
}
const sampleRate = 24000; // RECEIVED_SAMPLE_RATE from app.py
const audioBuffer = playbackCtx.createBuffer(1, float32Buffer.length, sampleRate);
audioBuffer.copyToChannel(float32Buffer, 0);
let scheduledTime = playbackCtx.currentTime > nextPlaybackTime ? playbackCtx.currentTime : nextPlaybackTime;
const source = playbackCtx.createBufferSource();
source.buffer = audioBuffer;
// Connect through analyser for visualization
if (analyser) {
source.connect(analyser);
analyser.connect(playbackCtx.destination);
if (!animationFrame) {
drawVisualizer();
}
} else {
source.connect(playbackCtx.destination);
}
source.start(scheduledTime);
// Add source to tracked sources
scheduledSources.push(source);
// Remove source from tracking once it finishes
source.onended = () => {
const index = scheduledSources.indexOf(source);
if (index > -1) {
scheduledSources.splice(index, 1);
}
// Stop visualizer if no more audio
if (scheduledSources.length === 0) {
stopVisualizer();
}
};
nextPlaybackTime = scheduledTime + audioBuffer.duration;
logMessage("Audio", "Scheduled playback. Start time:", scheduledTime, "Duration:", audioBuffer.duration);
} else if (data.type === "text" && data.content) {
logMessage("Text", "Received:", data.content);
} else {
logMessage("WebSocket", "Received message:", event.data);
}
} catch (err) {
logMessage("Error", "Failed to process message:", err);
}
};
}
async function startCapture() {
if (!socket || socket.readyState !== WebSocket.OPEN) {
logMessage("WebSocket", "Not connected. Click 'Connect to Server' first.");
return;
}
if (isCapturing) {
logMessage("Audio", "Already capturing!");
return;
}
isCapturing = true;
updateMicStatus(true);
logMessage("Audio", "Starting microphone capture...");
try {
micStream = await navigator.mediaDevices.getUserMedia({ audio: true });
logMessage("Audio", "Got microphone access");
audioCtx = new (window.AudioContext || window.webkitAudioContext)();
logMessage("Audio", "Created AudioContext with sample rate:", audioCtx.sampleRate);
// Create a media source from the mic stream
const source = audioCtx.createMediaStreamSource(micStream);
logMessage("Audio", "Created MediaStreamSource");
// Create a ScriptProcessorNode
const bufferSize = 4096; // You can adjust this
const inputChannels = 1;
const outputChannels = 1;
scriptNode = audioCtx.createScriptProcessor(bufferSize, inputChannels, outputChannels);
logMessage("Audio", "Created ScriptProcessorNode with buffer size:", bufferSize);
scriptNode.onaudioprocess = (audioEvent) => {
if (!isCapturing) return;
// Get raw samples and resample to 16kHz
const inputBuffer = audioEvent.inputBuffer.getChannelData(0);
// Check if there's actual audio input (not just silence)
const hasAudio = inputBuffer.some(sample => Math.abs(sample) > 0.01); // Threshold for noise
if (hasAudio) {
clearScheduledAudio(); // Only clear when we detect actual audio input
}
const resampled = resampleAudio(inputBuffer, audioCtx.sampleRate, 16000);
// Convert resampled audio to 16-bit PCM
const pcm16 = floatTo16BitPCM(resampled);
// Encode as base64 and send over WebSocket
const bytes = new Uint8Array(pcm16.buffer);
const b64 = btoa(String.fromCharCode(...bytes));
const audioMsg = {
type: "audio",
payload: b64,
seq: audioSeq++,
config: {
sampleRate: 16000,
bitDepth: 16,
channels: 1
}
};
logMessage("Audio", "Processing chunk. Seq:", audioMsg.seq);
try {
if (socket.readyState === WebSocket.OPEN) {
socket.send(JSON.stringify(audioMsg));
} else {
logMessage("WebSocket", "Not open, stopping capture");
stopCapture();
}
} catch (err) {
logMessage("Error", "Failed to send audio:", err);
stopCapture();
}
};
// Connect the pipeline: mic -> script -> (optional) audioCtx.destination
source.connect(scriptNode);
logMessage("Audio", "Connected audio pipeline");
logMessage("Audio", "Recording...");
} catch (err) {
logMessage("Error", "Failed to get microphone access:", err);
isCapturing = false;
}
}
function stopCapture() {
if (!isCapturing) return;
isCapturing = false;
updateMicStatus(false);
logMessage("Audio", "Stopped microphone capture");
if (scriptNode) {
scriptNode.disconnect();
scriptNode.onaudioprocess = null;
scriptNode = null;
}
if (micStream) {
// Stop all tracks
micStream.getTracks().forEach(track => track.stop());
micStream = null;
}
if (audioCtx) {
audioCtx.close();
audioCtx = null;
}
}
function floatTo16BitPCM(floatSamples) {
// Convert an array of floats [-1, 1] to a Int16Array
const out = new Int16Array(floatSamples.length);
for (let i = 0; i < floatSamples.length; i++) {
let s = Math.max(-1, Math.min(1, floatSamples[i]));
// scale range
s = s < 0 ? s * 0x8000 : s * 0x7FFF;
out[i] = s;
}
return out;
}
function resampleAudio(inputBuffer, fromRate, toRate) {
const ratio = toRate / fromRate;
const newLength = Math.round(inputBuffer.length * ratio);
const resampled = new Float32Array(newLength);
for(let i = 0; i < newLength; i++) {
const index = Math.round(i / ratio);
resampled[i] = inputBuffer[Math.min(index, inputBuffer.length-1)];
}
return resampled;
}
function sendText() {
const textInput = document.getElementById("textMessage");
const text = textInput.value.trim();
if (text && socket && socket.readyState === WebSocket.OPEN) {
// Clear any scheduled audio before sending text
clearScheduledAudio();
socket.send(JSON.stringify({ type: "text", content: text }));
logMessage("Text", "Sent:", text);
textInput.value = "";
} else {
logMessage("WebSocket", "Not connected or text is empty");
}
}
function base64ToArrayBuffer(b64) {
const binaryString = window.atob(b64);
const len = binaryString.length;
const bytes = new Uint8Array(len);
for (let i = 0; i < len; i++) {
bytes[i] = binaryString.charCodeAt(i);
}
return bytes.buffer;
}
</script>
</body>
</html>