Spaces:
Runtime error
Runtime error
<html lang="en"> | |
<head> | |
<meta charset="UTF-8" /> | |
<title>Raw PCM Capture Demo</title> | |
</head> | |
<body> | |
<h1>Capture Raw PCM via ScriptProcessorNode</h1> | |
<p> | |
<button onclick="connectWebSocket()">Connect WebSocket</button> | |
<button onclick="startCapture()">Start Raw PCM</button> | |
<button onclick="stopCapture()">Stop Raw PCM</button> | |
</p> | |
<p> | |
<input type="text" id="textMessage" placeholder="Type your message here" /> | |
<button onclick="sendText()">Send Text</button> | |
</p> | |
<pre id="log" style="background:#f0f0f0;padding:1em;"></pre> | |
<script> | |
let socket; | |
let playbackCtx = null; | |
let nextPlaybackTime = 0; | |
let audioCtx; | |
let scriptNode; | |
let micStream; | |
let isCapturing = false; | |
function logMessage(...args) { | |
const pre = document.getElementById("log"); | |
pre.textContent += args.join(" ") + "\n"; | |
console.log(...args); | |
} | |
function connectWebSocket() { | |
logMessage("[WebSocket] Connecting..."); | |
// Adjust port/host if your FastAPI server is elsewhere | |
socket = new WebSocket("ws://localhost:8000/ws"); | |
socket.onopen = () => { | |
logMessage("[WebSocket] Opened connection"); | |
if (!playbackCtx) { | |
playbackCtx = new (window.AudioContext || window.webkitAudioContext)(); | |
} | |
nextPlaybackTime = playbackCtx.currentTime; | |
}; | |
socket.onerror = (err) => { | |
logMessage("[WebSocket] Error:", err); | |
}; | |
socket.onclose = () => { | |
logMessage("[WebSocket] Closed"); | |
}; | |
socket.onmessage = (event) => { | |
try { | |
const data = JSON.parse(event.data); | |
if (data.type === "audio" && data.payload) { | |
const arrayBuffer = base64ToArrayBuffer(data.payload); | |
const int16View = new Int16Array(arrayBuffer); | |
const float32Buffer = new Float32Array(int16View.length); | |
for (let i = 0; i < int16View.length; i++) { | |
float32Buffer[i] = int16View[i] / 32768; | |
} | |
const sampleRate = 24000; // RECEIVED_SAMPLE_RATE from app.py | |
const audioBuffer = playbackCtx.createBuffer(1, float32Buffer.length, sampleRate); | |
audioBuffer.copyToChannel(float32Buffer, 0); | |
let scheduledTime = playbackCtx.currentTime > nextPlaybackTime ? playbackCtx.currentTime : nextPlaybackTime; | |
const source = playbackCtx.createBufferSource(); | |
source.buffer = audioBuffer; | |
source.connect(playbackCtx.destination); | |
source.start(scheduledTime); | |
nextPlaybackTime = scheduledTime + audioBuffer.duration; | |
logMessage("[Audio] Scheduled playback. Start time:", scheduledTime, "Duration:", audioBuffer.duration); | |
} else if (data.type === "text" && data.content) { | |
logMessage("[Text] Received:", data.content); | |
} else { | |
logMessage("[WebSocket] Received:", event.data); | |
} | |
} catch (err) { | |
logMessage("[WebSocket] Error processing message:", err); | |
} | |
}; | |
} | |
async function startCapture() { | |
if (!socket || socket.readyState !== WebSocket.OPEN) { | |
logMessage("WebSocket not connected. Click 'Connect WebSocket' first."); | |
return; | |
} | |
if (isCapturing) { | |
logMessage("Already capturing!"); | |
return; | |
} | |
isCapturing = true; | |
logMessage("Starting microphone capture as raw PCM..."); | |
try { | |
micStream = await navigator.mediaDevices.getUserMedia({ audio: true }); | |
audioCtx = new (window.AudioContext || window.webkitAudioContext)(); | |
// Create a media source from the mic stream | |
const source = audioCtx.createMediaStreamSource(micStream); | |
// Create a ScriptProcessorNode | |
const bufferSize = 4096; // You can adjust this | |
const inputChannels = 1; | |
const outputChannels = 1; | |
scriptNode = audioCtx.createScriptProcessor(bufferSize, inputChannels, outputChannels); | |
scriptNode.onaudioprocess = (audioEvent) => { | |
if (!isCapturing) return; | |
// Get raw floating-point samples [ -1.0 .. +1.0 ] | |
const inputBuffer = audioEvent.inputBuffer.getChannelData(0); | |
// Convert float samples to 16-bit signed | |
const pcm16 = floatTo16BitPCM(inputBuffer); | |
// Encode as base64 and send over WebSocket | |
const bytes = new Uint8Array(pcm16.buffer); | |
const b64 = btoa(String.fromCharCode(...bytes)); | |
socket.send(JSON.stringify({ | |
type: "audio", | |
payload: b64 | |
})); | |
}; | |
// Connect the pipeline: mic -> script -> (optional) audioCtx.destination | |
source.connect(scriptNode); | |
scriptNode.connect(audioCtx.destination); | |
logMessage("Recording..."); | |
} catch (err) { | |
logMessage("Error getting user mic:", err); | |
} | |
} | |
function stopCapture() { | |
if (!isCapturing) return; | |
isCapturing = false; | |
logMessage("Stopped microphone capture."); | |
if (scriptNode) { | |
scriptNode.disconnect(); | |
scriptNode.onaudioprocess = null; | |
scriptNode = null; | |
} | |
if (micStream) { | |
// Stop all tracks | |
micStream.getTracks().forEach(track => track.stop()); | |
micStream = null; | |
} | |
if (audioCtx) { | |
audioCtx.close(); | |
audioCtx = null; | |
} | |
} | |
function floatTo16BitPCM(floatSamples) { | |
// Convert an array of floats [-1, 1] to a Int16Array | |
const out = new Int16Array(floatSamples.length); | |
for (let i = 0; i < floatSamples.length; i++) { | |
let s = Math.max(-1, Math.min(1, floatSamples[i])); | |
// scale range | |
s = s < 0 ? s * 0x8000 : s * 0x7FFF; | |
out[i] = s; | |
} | |
return out; | |
} | |
function sendText() { | |
const textInput = document.getElementById("textMessage"); | |
const text = textInput.value.trim(); | |
if (text && socket && socket.readyState === WebSocket.OPEN) { | |
socket.send(JSON.stringify({ type: "text", content: text })); | |
logMessage("[Text] Sent:", text); | |
textInput.value = ""; | |
} else { | |
logMessage("WebSocket not connected or text is empty."); | |
} | |
} | |
function base64ToArrayBuffer(b64) { | |
const binaryString = window.atob(b64); | |
const len = binaryString.length; | |
const bytes = new Uint8Array(len); | |
for (let i = 0; i < len; i++) { | |
bytes[i] = binaryString.charCodeAt(i); | |
} | |
return bytes.buffer; | |
} | |
</script> | |
</body> | |
</html> | |