streaming-translation / node-server /transcription-client.js
rcastriotta
publish
1a3fc6f
const { createClient, LiveTranscriptionEvents } = require("@deepgram/sdk");
const EventEmitter = require("events");
const crypto = require("crypto");
class TranscriptionClient extends EventEmitter {
constructor() {
super();
this.deepgramStream = null;
this.deepgramSessionId = null;
this.currentTranscript = "";
this.currentDiarization = {};
this.releaseTimeout = null;
this.killTimeout = null;
this.releaseThresholdMS = 4000;
this.killThresholdMS = 1000 * 60 * 2;
this.diarize = false;
this.speakerLabels = {};
}
startTranscriptionStream(language) {
console.log("started deepgram");
const localSessionId = crypto.randomUUID();
this.deepgramSessionId = localSessionId;
const deepgram = createClient(process.env.DEEPGRAM_API_KEY);
this.deepgramStream = deepgram.listen.live({
model: "nova-2",
punctuate: true,
language,
interim_results: true,
diarize: this.diarize,
smart_format: true,
endpointing: "2",
});
this.deepgramStream.on(LiveTranscriptionEvents.Error, (err) => {
console.log("Deepgram error: ", err);
});
this.deepgramStream.on(LiveTranscriptionEvents.Warning, (err) => {
console.log("Deepgram error: ", err);
});
this.deepgramStream.on(LiveTranscriptionEvents.Open, () => {
this.resetKillTimeout();
this.deepgramStream.on(
LiveTranscriptionEvents.Transcript,
async (data) => {
try {
const response = data.channel.alternatives[0];
const text = response?.transcript || "";
if (text.length > 1) {
clearTimeout(this.releaseTimeout);
this.releaseTimeout = setTimeout(() => {
this.releaseTranslations(true);
}, this.releaseThresholdMS);
this.resetKillTimeout();
}
// important not to translate interim results
if (response.transcript && data.is_final) {
// console.log(response.transcript);
const words = data.channel?.alternatives[0]?.words || [];
words.forEach(({ punctuated_word, speaker, start, end }) => {
if (!this.currentDiarization[speaker])
this.currentDiarization[speaker] = "";
this.currentDiarization[speaker] += " " + punctuated_word;
});
this.emit("transcript", text)
this.currentTranscript += " " + text;
this.releaseTranslations();
// this.fullTranscript += " " + this.currentTranscript;
}
} catch (err) {
console.log(
"TranscribeTranslate.LiveTranscriptionEvents.Transcript:",
err
);
}
}
);
});
return this.deepgramSessionId;
}
resetKillTimeout = () => {
clearTimeout(this.killTimeout);
this.killTimeout = setTimeout(
() => this.endTranscriptionStream(),
this.killThresholdMS
);
};
releaseTranslations = async (triggeredByPause = false) => {
try {
let segment = "";
let speaker = null;
if (this.diarize) {
const processedSpeakers = Object.entries(this.currentDiarization).map(
([speaker, transcript]) => ({
...this.checkShouldSegment(transcript, triggeredByPause ? 5 : 50),
speaker,
})
);
const chosen = processedSpeakers.find((s) => s.canRelease);
if (!chosen) return;
this.currentDiarization = { [chosen.speaker]: chosen.secondPart };
segment = chosen.firstPart;
speaker = this.getSpeakerLabel(chosen.speaker);
} else {
const { canRelease, firstPart, secondPart } = this.checkShouldSegment(
this.currentTranscript,
triggeredByPause ? 5 : 50
);
if (!canRelease) return;
this.currentTranscript = secondPart;
segment = firstPart;
}
// translate segment
this.emit("translation", segment)
this.lastEmittedSpeaker = speaker;
} catch (err) {
console.log("TranscribeTranslate.releaseTranslations:", err);
}
};
endTranscriptionStream() {
try {
clearTimeout(this.releaseTimeout);
clearTimeout(this.killTimeout);
if (!this.deepgramStream) return;
this.deepgramStream.finish();
this.deepgramStream = null;
this.currentTranscript = "";
} catch (err) {
console.log("Failed to end deepgram stream", err);
}
}
checkShouldSegment = (str, minCharLimit = 25) => {
let firstPart = "";
let secondPart = "";
const punct = new Set([".", "!", "?", "。", "۔"]);
for (let i = 0; i < str.length; i += 1) {
const char = str[i];
if (i > minCharLimit) {
if (punct.has(char)) {
firstPart = str.slice(0, i + 1);
secondPart = str.slice(i + 1);
}
}
}
return { canRelease: !!firstPart.length, firstPart, secondPart };
};
send(payload) {
try {
if (!this.deepgramStream) return;
if (this.deepgramStream.getReadyState() === 1) {
this.deepgramStream.send(payload);
}
} catch (err) {
console.log("Failed to start deepgram stream", err);
}
}
}
module.exports = TranscriptionClient;