|
import { Buffer } from 'node:buffer'; |
|
import express from 'express'; |
|
import wavefile from 'wavefile'; |
|
import { getPipeline } from '../transformers.js'; |
|
|
|
export const router = express.Router(); |
|
|
|
|
|
|
|
|
|
|
|
|
|
function getWaveFile(audio) { |
|
const wav = new wavefile.WaveFile(); |
|
wav.fromDataURI(audio); |
|
wav.toBitDepth('32f'); |
|
wav.toSampleRate(16000); |
|
let audioData = wav.getSamples(); |
|
if (Array.isArray(audioData)) { |
|
if (audioData.length > 1) { |
|
const SCALING_FACTOR = Math.sqrt(2); |
|
|
|
|
|
for (let i = 0; i < audioData[0].length; ++i) { |
|
audioData[0][i] = SCALING_FACTOR * (audioData[0][i] + audioData[1][i]) / 2; |
|
} |
|
} |
|
|
|
|
|
audioData = audioData[0]; |
|
} |
|
|
|
return audioData; |
|
} |
|
|
|
router.post('/recognize', async (req, res) => { |
|
try { |
|
const TASK = 'automatic-speech-recognition'; |
|
const { model, audio, lang } = req.body; |
|
const pipe = await getPipeline(TASK, model); |
|
const wav = getWaveFile(audio); |
|
const start = performance.now(); |
|
const result = await pipe(wav, { language: lang || null, task: 'transcribe' }); |
|
const end = performance.now(); |
|
console.info(`Execution duration: ${(end - start) / 1000} seconds`); |
|
console.info('Transcribed audio:', result.text); |
|
|
|
return res.json({ text: result.text }); |
|
} catch (error) { |
|
console.error(error); |
|
return res.sendStatus(500); |
|
} |
|
}); |
|
|
|
router.post('/synthesize', async (req, res) => { |
|
try { |
|
const TASK = 'text-to-speech'; |
|
const { text, model, speaker } = req.body; |
|
const pipe = await getPipeline(TASK, model); |
|
const speaker_embeddings = speaker |
|
? new Float32Array(new Uint8Array(Buffer.from(speaker.startsWith('data:') ? speaker.split(',')[1] : speaker, 'base64')).buffer) |
|
: null; |
|
const start = performance.now(); |
|
const result = await pipe(text, { speaker_embeddings: speaker_embeddings }); |
|
const end = performance.now(); |
|
console.debug(`Execution duration: ${(end - start) / 1000} seconds`); |
|
|
|
const wav = new wavefile.WaveFile(); |
|
wav.fromScratch(1, result.sampling_rate, '32f', result.audio); |
|
const buffer = wav.toBuffer(); |
|
|
|
res.set('Content-Type', 'audio/wav'); |
|
return res.send(Buffer.from(buffer)); |
|
} catch (error) { |
|
console.error(error); |
|
return res.sendStatus(500); |
|
} |
|
}); |
|
|