const { createClient, LiveTranscriptionEvents } = require("@deepgram/sdk"); const EventEmitter = require("events"); const crypto = require("crypto"); class TranscriptionClient extends EventEmitter { constructor() { super(); this.deepgramStream = null; this.deepgramSessionId = null; this.currentTranscript = ""; this.currentDiarization = {}; this.releaseTimeout = null; this.killTimeout = null; this.releaseThresholdMS = 4000; this.killThresholdMS = 1000 * 60 * 2; this.diarize = false; this.speakerLabels = {}; } startTranscriptionStream(language) { console.log("started deepgram"); const localSessionId = crypto.randomUUID(); this.deepgramSessionId = localSessionId; const deepgram = createClient(process.env.DEEPGRAM_API_KEY); this.deepgramStream = deepgram.listen.live({ model: "nova-2", punctuate: true, language, interim_results: true, diarize: this.diarize, smart_format: true, endpointing: "2", }); this.deepgramStream.on(LiveTranscriptionEvents.Error, (err) => { console.log("Deepgram error: ", err); }); this.deepgramStream.on(LiveTranscriptionEvents.Warning, (err) => { console.log("Deepgram error: ", err); }); this.deepgramStream.on(LiveTranscriptionEvents.Open, () => { this.resetKillTimeout(); this.deepgramStream.on( LiveTranscriptionEvents.Transcript, async (data) => { try { const response = data.channel.alternatives[0]; const text = response?.transcript || ""; if (text.length > 1) { clearTimeout(this.releaseTimeout); this.releaseTimeout = setTimeout(() => { this.releaseTranslations(true); }, this.releaseThresholdMS); this.resetKillTimeout(); } // important not to translate interim results if (response.transcript && data.is_final) { // console.log(response.transcript); const words = data.channel?.alternatives[0]?.words || []; words.forEach(({ punctuated_word, speaker, start, end }) => { if (!this.currentDiarization[speaker]) this.currentDiarization[speaker] = ""; this.currentDiarization[speaker] += " " + punctuated_word; }); this.emit("transcript", text) this.currentTranscript += " " + text; this.releaseTranslations(); // this.fullTranscript += " " + this.currentTranscript; } } catch (err) { console.log( "TranscribeTranslate.LiveTranscriptionEvents.Transcript:", err ); } } ); }); return this.deepgramSessionId; } resetKillTimeout = () => { clearTimeout(this.killTimeout); this.killTimeout = setTimeout( () => this.endTranscriptionStream(), this.killThresholdMS ); }; releaseTranslations = async (triggeredByPause = false) => { try { let segment = ""; let speaker = null; if (this.diarize) { const processedSpeakers = Object.entries(this.currentDiarization).map( ([speaker, transcript]) => ({ ...this.checkShouldSegment(transcript, triggeredByPause ? 5 : 50), speaker, }) ); const chosen = processedSpeakers.find((s) => s.canRelease); if (!chosen) return; this.currentDiarization = { [chosen.speaker]: chosen.secondPart }; segment = chosen.firstPart; speaker = this.getSpeakerLabel(chosen.speaker); } else { const { canRelease, firstPart, secondPart } = this.checkShouldSegment( this.currentTranscript, triggeredByPause ? 5 : 50 ); if (!canRelease) return; this.currentTranscript = secondPart; segment = firstPart; } // translate segment this.emit("translation", segment) this.lastEmittedSpeaker = speaker; } catch (err) { console.log("TranscribeTranslate.releaseTranslations:", err); } }; endTranscriptionStream() { try { clearTimeout(this.releaseTimeout); clearTimeout(this.killTimeout); if (!this.deepgramStream) return; this.deepgramStream.finish(); this.deepgramStream = null; this.currentTranscript = ""; } catch (err) { console.log("Failed to end deepgram stream", err); } } checkShouldSegment = (str, minCharLimit = 25) => { let firstPart = ""; let secondPart = ""; const punct = new Set([".", "!", "?", "。", "۔"]); for (let i = 0; i < str.length; i += 1) { const char = str[i]; if (i > minCharLimit) { if (punct.has(char)) { firstPart = str.slice(0, i + 1); secondPart = str.slice(i + 1); } } } return { canRelease: !!firstPart.length, firstPart, secondPart }; }; send(payload) { try { if (!this.deepgramStream) return; if (this.deepgramStream.getReadyState() === 1) { this.deepgramStream.send(payload); } } catch (err) { console.log("Failed to start deepgram stream", err); } } } module.exports = TranscriptionClient;