gemini-live / index copy.html
Nirav Madhani
First commit
cc7c705
raw
history blame
6.27 kB
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8" />
<title>Raw PCM Capture Demo</title>
</head>
<body>
<h1>Capture Raw PCM via ScriptProcessorNode</h1>
<p>
<button onclick="connectWebSocket()">Connect WebSocket</button>
<button onclick="startCapture()">Start Raw PCM</button>
<button onclick="stopCapture()">Stop Raw PCM</button>
</p>
<p>
<input type="text" id="textMessage" placeholder="Type your message here" />
<button onclick="sendText()">Send Text</button>
</p>
<pre id="log" style="background:#f0f0f0;padding:1em;"></pre>
<script>
let socket;
let playbackCtx = null;
let nextPlaybackTime = 0;
let audioCtx;
let scriptNode;
let micStream;
let isCapturing = false;
function logMessage(...args) {
const pre = document.getElementById("log");
pre.textContent += args.join(" ") + "\n";
console.log(...args);
}
function connectWebSocket() {
logMessage("[WebSocket] Connecting...");
// Adjust port/host if your FastAPI server is elsewhere
socket = new WebSocket("ws://localhost:8000/ws");
socket.onopen = () => {
logMessage("[WebSocket] Opened connection");
if (!playbackCtx) {
playbackCtx = new (window.AudioContext || window.webkitAudioContext)();
}
nextPlaybackTime = playbackCtx.currentTime;
};
socket.onerror = (err) => {
logMessage("[WebSocket] Error:", err);
};
socket.onclose = () => {
logMessage("[WebSocket] Closed");
};
socket.onmessage = (event) => {
try {
const data = JSON.parse(event.data);
if (data.type === "audio" && data.payload) {
const arrayBuffer = base64ToArrayBuffer(data.payload);
const int16View = new Int16Array(arrayBuffer);
const float32Buffer = new Float32Array(int16View.length);
for (let i = 0; i < int16View.length; i++) {
float32Buffer[i] = int16View[i] / 32768;
}
const sampleRate = 24000; // RECEIVED_SAMPLE_RATE from app.py
const audioBuffer = playbackCtx.createBuffer(1, float32Buffer.length, sampleRate);
audioBuffer.copyToChannel(float32Buffer, 0);
let scheduledTime = playbackCtx.currentTime > nextPlaybackTime ? playbackCtx.currentTime : nextPlaybackTime;
const source = playbackCtx.createBufferSource();
source.buffer = audioBuffer;
source.connect(playbackCtx.destination);
source.start(scheduledTime);
nextPlaybackTime = scheduledTime + audioBuffer.duration;
logMessage("[Audio] Scheduled playback. Start time:", scheduledTime, "Duration:", audioBuffer.duration);
} else if (data.type === "text" && data.content) {
logMessage("[Text] Received:", data.content);
} else {
logMessage("[WebSocket] Received:", event.data);
}
} catch (err) {
logMessage("[WebSocket] Error processing message:", err);
}
};
}
async function startCapture() {
if (!socket || socket.readyState !== WebSocket.OPEN) {
logMessage("WebSocket not connected. Click 'Connect WebSocket' first.");
return;
}
if (isCapturing) {
logMessage("Already capturing!");
return;
}
isCapturing = true;
logMessage("Starting microphone capture as raw PCM...");
try {
micStream = await navigator.mediaDevices.getUserMedia({ audio: true });
audioCtx = new (window.AudioContext || window.webkitAudioContext)();
// Create a media source from the mic stream
const source = audioCtx.createMediaStreamSource(micStream);
// Create a ScriptProcessorNode
const bufferSize = 4096; // You can adjust this
const inputChannels = 1;
const outputChannels = 1;
scriptNode = audioCtx.createScriptProcessor(bufferSize, inputChannels, outputChannels);
scriptNode.onaudioprocess = (audioEvent) => {
if (!isCapturing) return;
// Get raw floating-point samples [ -1.0 .. +1.0 ]
const inputBuffer = audioEvent.inputBuffer.getChannelData(0);
// Convert float samples to 16-bit signed
const pcm16 = floatTo16BitPCM(inputBuffer);
// Encode as base64 and send over WebSocket
const bytes = new Uint8Array(pcm16.buffer);
const b64 = btoa(String.fromCharCode(...bytes));
socket.send(JSON.stringify({
type: "audio",
payload: b64
}));
};
// Connect the pipeline: mic -> script -> (optional) audioCtx.destination
source.connect(scriptNode);
scriptNode.connect(audioCtx.destination);
logMessage("Recording...");
} catch (err) {
logMessage("Error getting user mic:", err);
}
}
function stopCapture() {
if (!isCapturing) return;
isCapturing = false;
logMessage("Stopped microphone capture.");
if (scriptNode) {
scriptNode.disconnect();
scriptNode.onaudioprocess = null;
scriptNode = null;
}
if (micStream) {
// Stop all tracks
micStream.getTracks().forEach(track => track.stop());
micStream = null;
}
if (audioCtx) {
audioCtx.close();
audioCtx = null;
}
}
function floatTo16BitPCM(floatSamples) {
// Convert an array of floats [-1, 1] to a Int16Array
const out = new Int16Array(floatSamples.length);
for (let i = 0; i < floatSamples.length; i++) {
let s = Math.max(-1, Math.min(1, floatSamples[i]));
// scale range
s = s < 0 ? s * 0x8000 : s * 0x7FFF;
out[i] = s;
}
return out;
}
function sendText() {
const textInput = document.getElementById("textMessage");
const text = textInput.value.trim();
if (text && socket && socket.readyState === WebSocket.OPEN) {
socket.send(JSON.stringify({ type: "text", content: text }));
logMessage("[Text] Sent:", text);
textInput.value = "";
} else {
logMessage("WebSocket not connected or text is empty.");
}
}
function base64ToArrayBuffer(b64) {
const binaryString = window.atob(b64);
const len = binaryString.length;
const bytes = new Uint8Array(len);
for (let i = 0; i < len; i++) {
bytes[i] = binaryString.charCodeAt(i);
}
return bytes.buffer;
}
</script>
</body>
</html>