Spaces:
Running
Running
| function freeConfig(config, Module) { | |
| if ('buffer' in config) { | |
| Module._free(config.buffer); | |
| } | |
| if ('config' in config) { | |
| freeConfig(config.config, Module) | |
| } | |
| if ('transducer' in config) { | |
| freeConfig(config.transducer, Module) | |
| } | |
| if ('paraformer' in config) { | |
| freeConfig(config.paraformer, Module) | |
| } | |
| if ('ctc' in config) { | |
| freeConfig(config.ctc, Module) | |
| } | |
| if ('feat' in config) { | |
| freeConfig(config.feat, Module) | |
| } | |
| if ('model' in config) { | |
| freeConfig(config.model, Module) | |
| } | |
| if ('nemoCtc' in config) { | |
| freeConfig(config.nemoCtc, Module) | |
| } | |
| if ('whisper' in config) { | |
| freeConfig(config.whisper, Module) | |
| } | |
| if ('tdnn' in config) { | |
| freeConfig(config.tdnn, Module) | |
| } | |
| if ('senseVoice' in config) { | |
| freeConfig(config.senseVoice, Module) | |
| } | |
| if ('lm' in config) { | |
| freeConfig(config.lm, Module) | |
| } | |
| if ('ctcFstDecoder' in config) { | |
| freeConfig(config.ctcFstDecoder, Module) | |
| } | |
| Module._free(config.ptr); | |
| } | |
| // The user should free the returned pointers | |
| function initSherpaOnnxOnlineTransducerModelConfig(config, Module) { | |
| const encoderLen = Module.lengthBytesUTF8(config.encoder || '') + 1; | |
| const decoderLen = Module.lengthBytesUTF8(config.decoder || '') + 1; | |
| const joinerLen = Module.lengthBytesUTF8(config.joiner || '') + 1; | |
| const n = encoderLen + decoderLen + joinerLen; | |
| const buffer = Module._malloc(n); | |
| const len = 3 * 4; // 3 pointers | |
| const ptr = Module._malloc(len); | |
| let offset = 0; | |
| Module.stringToUTF8(config.encoder || '', buffer + offset, encoderLen); | |
| offset += encoderLen; | |
| Module.stringToUTF8(config.decoder || '', buffer + offset, decoderLen); | |
| offset += decoderLen; | |
| Module.stringToUTF8(config.joiner || '', buffer + offset, joinerLen); | |
| offset = 0; | |
| Module.setValue(ptr, buffer + offset, 'i8*'); | |
| offset += encoderLen; | |
| Module.setValue(ptr + 4, buffer + offset, 'i8*'); | |
| offset += decoderLen; | |
| Module.setValue(ptr + 8, buffer + offset, 'i8*'); | |
| return { | |
| buffer: buffer, ptr: ptr, len: len, | |
| } | |
| } | |
| function initSherpaOnnxOnlineParaformerModelConfig(config, Module) { | |
| const encoderLen = Module.lengthBytesUTF8(config.encoder || '') + 1; | |
| const decoderLen = Module.lengthBytesUTF8(config.decoder || '') + 1; | |
| const n = encoderLen + decoderLen; | |
| const buffer = Module._malloc(n); | |
| const len = 2 * 4; // 2 pointers | |
| const ptr = Module._malloc(len); | |
| let offset = 0; | |
| Module.stringToUTF8(config.encoder || '', buffer + offset, encoderLen); | |
| offset += encoderLen; | |
| Module.stringToUTF8(config.decoder || '', buffer + offset, decoderLen); | |
| offset = 0; | |
| Module.setValue(ptr, buffer + offset, 'i8*'); | |
| offset += encoderLen; | |
| Module.setValue(ptr + 4, buffer + offset, 'i8*'); | |
| return { | |
| buffer: buffer, ptr: ptr, len: len, | |
| } | |
| } | |
| function initSherpaOnnxOnlineZipformer2CtcModelConfig(config, Module) { | |
| const n = Module.lengthBytesUTF8(config.model || '') + 1; | |
| const buffer = Module._malloc(n); | |
| const len = 1 * 4; // 1 pointer | |
| const ptr = Module._malloc(len); | |
| Module.stringToUTF8(config.model || '', buffer, n); | |
| Module.setValue(ptr, buffer, 'i8*'); | |
| return { | |
| buffer: buffer, ptr: ptr, len: len, | |
| } | |
| } | |
| function initSherpaOnnxOnlineModelConfig(config, Module) { | |
| if (!('transducer' in config)) { | |
| config.transducer = { | |
| encoder: '', | |
| decoder: '', | |
| joiner: '', | |
| }; | |
| } | |
| if (!('paraformer' in config)) { | |
| config.paraformer = { | |
| encoder: '', | |
| decoder: '', | |
| }; | |
| } | |
| if (!('zipformer2Ctc' in config)) { | |
| config.zipformer2Ctc = { | |
| model: '', | |
| }; | |
| } | |
| const transducer = | |
| initSherpaOnnxOnlineTransducerModelConfig(config.transducer, Module); | |
| const paraformer = | |
| initSherpaOnnxOnlineParaformerModelConfig(config.paraformer, Module); | |
| const ctc = initSherpaOnnxOnlineZipformer2CtcModelConfig( | |
| config.zipformer2Ctc, Module); | |
| const len = transducer.len + paraformer.len + ctc.len + 7 * 4; | |
| const ptr = Module._malloc(len); | |
| let offset = 0; | |
| Module._CopyHeap(transducer.ptr, transducer.len, ptr + offset); | |
| offset += transducer.len; | |
| Module._CopyHeap(paraformer.ptr, paraformer.len, ptr + offset); | |
| offset += paraformer.len; | |
| Module._CopyHeap(ctc.ptr, ctc.len, ptr + offset); | |
| offset += ctc.len; | |
| const tokensLen = Module.lengthBytesUTF8(config.tokens || '') + 1; | |
| const providerLen = Module.lengthBytesUTF8(config.provider || 'cpu') + 1; | |
| const modelTypeLen = Module.lengthBytesUTF8(config.modelType || '') + 1; | |
| const modelingUnitLen = Module.lengthBytesUTF8(config.modelingUnit || '') + 1; | |
| const bpeVocabLen = Module.lengthBytesUTF8(config.bpeVocab || '') + 1; | |
| const bufferLen = | |
| tokensLen + providerLen + modelTypeLen + modelingUnitLen + bpeVocabLen; | |
| const buffer = Module._malloc(bufferLen); | |
| offset = 0; | |
| Module.stringToUTF8(config.tokens || '', buffer, tokensLen); | |
| offset += tokensLen; | |
| Module.stringToUTF8(config.provider || 'cpu', buffer + offset, providerLen); | |
| offset += providerLen; | |
| Module.stringToUTF8(config.modelType || '', buffer + offset, modelTypeLen); | |
| offset += modelTypeLen; | |
| Module.stringToUTF8( | |
| config.modelingUnit || '', buffer + offset, modelingUnitLen); | |
| offset += modelingUnitLen; | |
| Module.stringToUTF8(config.bpeVocab || '', buffer + offset, bpeVocabLen); | |
| offset += bpeVocabLen; | |
| offset = transducer.len + paraformer.len + ctc.len; | |
| Module.setValue(ptr + offset, buffer, 'i8*'); // tokens | |
| offset += 4; | |
| Module.setValue(ptr + offset, config.numThreads || 1, 'i32'); | |
| offset += 4; | |
| Module.setValue(ptr + offset, buffer + tokensLen, 'i8*'); // provider | |
| offset += 4; | |
| Module.setValue(ptr + offset, config.debug || 0, 'i32'); | |
| offset += 4; | |
| Module.setValue( | |
| ptr + offset, buffer + tokensLen + providerLen, 'i8*'); // modelType | |
| offset += 4; | |
| Module.setValue( | |
| ptr + offset, buffer + tokensLen + providerLen + modelTypeLen, | |
| 'i8*'); // modelingUnit | |
| offset += 4; | |
| Module.setValue( | |
| ptr + offset, | |
| buffer + tokensLen + providerLen + modelTypeLen + modelingUnitLen, | |
| 'i8*'); // bpeVocab | |
| offset += 4; | |
| return { | |
| buffer: buffer, ptr: ptr, len: len, transducer: transducer, | |
| paraformer: paraformer, ctc: ctc | |
| } | |
| } | |
| function initSherpaOnnxFeatureConfig(config, Module) { | |
| const len = 2 * 4; // 2 pointers | |
| const ptr = Module._malloc(len); | |
| Module.setValue(ptr, config.sampleRate || 16000, 'i32'); | |
| Module.setValue(ptr + 4, config.featureDim || 80, 'i32'); | |
| return {ptr: ptr, len: len}; | |
| } | |
| function initSherpaOnnxOnlineCtcFstDecoderConfig(config, Module) { | |
| const len = 2 * 4; | |
| const ptr = Module._malloc(len); | |
| const graphLen = Module.lengthBytesUTF8(config.graph || '') + 1; | |
| const buffer = Module._malloc(graphLen); | |
| Module.stringToUTF8(config.graph, buffer, graphLen); | |
| Module.setValue(ptr, buffer, 'i8*'); | |
| Module.setValue(ptr + 4, config.maxActive || 3000, 'i32'); | |
| return {ptr: ptr, len: len, buffer: buffer}; | |
| } | |
| function initSherpaOnnxOnlineRecognizerConfig(config, Module) { | |
| if (!('featConfig' in config)) { | |
| config.featConfig = { | |
| sampleRate: 16000, | |
| featureDim: 80, | |
| }; | |
| } | |
| if (!('ctcFstDecoderConfig' in config)) { | |
| config.ctcFstDecoderConfig = { | |
| graph: '', | |
| maxActive: 3000, | |
| }; | |
| } | |
| const feat = initSherpaOnnxFeatureConfig(config.featConfig, Module); | |
| const model = initSherpaOnnxOnlineModelConfig(config.modelConfig, Module); | |
| const ctcFstDecoder = initSherpaOnnxOnlineCtcFstDecoderConfig( | |
| config.ctcFstDecoderConfig, Module) | |
| const len = feat.len + model.len + 8 * 4 + ctcFstDecoder.len + 3 * 4; | |
| const ptr = Module._malloc(len); | |
| let offset = 0; | |
| Module._CopyHeap(feat.ptr, feat.len, ptr + offset); | |
| offset += feat.len; | |
| Module._CopyHeap(model.ptr, model.len, ptr + offset); | |
| offset += model.len; | |
| const decodingMethodLen = | |
| Module.lengthBytesUTF8(config.decodingMethod || 'greedy_search') + 1; | |
| const hotwordsFileLen = Module.lengthBytesUTF8(config.hotwordsFile || '') + 1; | |
| const ruleFstsFileLen = Module.lengthBytesUTF8(config.ruleFsts || '') + 1; | |
| const ruleFarsFileLen = Module.lengthBytesUTF8(config.ruleFars || '') + 1; | |
| const bufferLen = | |
| decodingMethodLen + hotwordsFileLen + ruleFstsFileLen + ruleFarsFileLen; | |
| const buffer = Module._malloc(bufferLen); | |
| offset = 0; | |
| Module.stringToUTF8( | |
| config.decodingMethod || 'greedy_search', buffer, decodingMethodLen); | |
| offset += decodingMethodLen; | |
| Module.stringToUTF8( | |
| config.hotwordsFile || '', buffer + offset, hotwordsFileLen); | |
| offset += hotwordsFileLen; | |
| Module.stringToUTF8(config.ruleFsts || '', buffer + offset, ruleFstsFileLen); | |
| offset += ruleFstsFileLen; | |
| Module.stringToUTF8(config.ruleFars || '', buffer + offset, ruleFarsFileLen); | |
| offset += ruleFarsFileLen; | |
| offset = feat.len + model.len; | |
| Module.setValue(ptr + offset, buffer, 'i8*'); // decoding method | |
| offset += 4; | |
| Module.setValue(ptr + offset, config.maxActivePaths || 4, 'i32'); | |
| offset += 4; | |
| Module.setValue(ptr + offset, config.enableEndpoint || 0, 'i32'); | |
| offset += 4; | |
| Module.setValue(ptr + offset, config.rule1MinTrailingSilence || 2.4, 'float'); | |
| offset += 4; | |
| Module.setValue(ptr + offset, config.rule2MinTrailingSilence || 1.2, 'float'); | |
| offset += 4; | |
| Module.setValue(ptr + offset, config.rule3MinUtteranceLength || 20, 'float'); | |
| offset += 4; | |
| Module.setValue(ptr + offset, buffer + decodingMethodLen, 'i8*'); | |
| offset += 4; | |
| Module.setValue(ptr + offset, config.hotwordsScore || 1.5, 'float'); | |
| offset += 4; | |
| Module._CopyHeap(ctcFstDecoder.ptr, ctcFstDecoder.len, ptr + offset); | |
| offset += ctcFstDecoder.len; | |
| Module.setValue( | |
| ptr + offset, buffer + decodingMethodLen + hotwordsFileLen, 'i8*'); | |
| offset += 4; | |
| Module.setValue( | |
| ptr + offset, | |
| buffer + decodingMethodLen + hotwordsFileLen + ruleFstsFileLen, 'i8*'); | |
| offset += 4; | |
| Module.setValue(ptr + offset, config.blankPenalty || 0, 'float'); | |
| offset += 4; | |
| return { | |
| buffer: buffer, ptr: ptr, len: len, feat: feat, model: model, | |
| ctcFstDecoder: ctcFstDecoder | |
| } | |
| } | |
| function createOnlineRecognizer(Module, myConfig) { | |
| const onlineTransducerModelConfig = { | |
| encoder: '', | |
| decoder: '', | |
| joiner: '', | |
| }; | |
| const onlineParaformerModelConfig = { | |
| encoder: '', | |
| decoder: '', | |
| }; | |
| const onlineZipformer2CtcModelConfig = { | |
| model: '', | |
| }; | |
| let type = 0; | |
| switch (type) { | |
| case 0: | |
| // transducer | |
| onlineTransducerModelConfig.encoder = './encoder.onnx'; | |
| onlineTransducerModelConfig.decoder = './decoder.onnx'; | |
| onlineTransducerModelConfig.joiner = './joiner.onnx'; | |
| break; | |
| case 1: | |
| // paraformer | |
| onlineParaformerModelConfig.encoder = './encoder.onnx'; | |
| onlineParaformerModelConfig.decoder = './decoder.onnx'; | |
| break; | |
| case 2: | |
| // ctc | |
| onlineZipformer2CtcModelConfig.model = './encoder.onnx'; | |
| break; | |
| } | |
| const onlineModelConfig = { | |
| transducer: onlineTransducerModelConfig, | |
| paraformer: onlineParaformerModelConfig, | |
| zipformer2Ctc: onlineZipformer2CtcModelConfig, | |
| tokens: './tokens.txt', | |
| numThreads: 1, | |
| provider: 'cpu', | |
| debug: 1, | |
| modelType: '', | |
| modelingUnit: 'cjkchar', | |
| bpeVocab: '', | |
| }; | |
| const featureConfig = { | |
| sampleRate: 16000, | |
| featureDim: 80, | |
| }; | |
| let recognizerConfig = { | |
| featConfig: featureConfig, | |
| modelConfig: onlineModelConfig, | |
| decodingMethod: 'modified_beam_search', | |
| maxActivePaths: 4, | |
| enableEndpoint: 0, | |
| rule1MinTrailingSilence: 2.4, | |
| rule2MinTrailingSilence: 1.2, | |
| rule3MinUtteranceLength: 20, | |
| hotwordsFile: '', | |
| hotwordsScore: 1.5, | |
| ctcFstDecoderConfig: { | |
| graph: '', | |
| maxActive: 3000, | |
| }, | |
| ruleFsts: '', | |
| ruleFars: '', | |
| }; | |
| if (myConfig) { | |
| recognizerConfig = myConfig; | |
| } | |
| return new OnlineRecognizer(recognizerConfig, Module); | |
| } | |
| function initSherpaOnnxOfflineTransducerModelConfig(config, Module) { | |
| const encoderLen = Module.lengthBytesUTF8(config.encoder || '') + 1; | |
| const decoderLen = Module.lengthBytesUTF8(config.decoder || '') + 1; | |
| const joinerLen = Module.lengthBytesUTF8(config.joiner || '') + 1; | |
| const n = encoderLen + decoderLen + joinerLen; | |
| const buffer = Module._malloc(n); | |
| const len = 3 * 4; // 3 pointers | |
| const ptr = Module._malloc(len); | |
| let offset = 0; | |
| Module.stringToUTF8(config.encoder || '', buffer + offset, encoderLen); | |
| offset += encoderLen; | |
| Module.stringToUTF8(config.decoder || '', buffer + offset, decoderLen); | |
| offset += decoderLen; | |
| Module.stringToUTF8(config.joiner || '', buffer + offset, joinerLen); | |
| offset = 0; | |
| Module.setValue(ptr, buffer + offset, 'i8*'); | |
| offset += encoderLen; | |
| Module.setValue(ptr + 4, buffer + offset, 'i8*'); | |
| offset += decoderLen; | |
| Module.setValue(ptr + 8, buffer + offset, 'i8*'); | |
| return { | |
| buffer: buffer, ptr: ptr, len: len, | |
| } | |
| } | |
| function initSherpaOnnxOfflineParaformerModelConfig(config, Module) { | |
| const n = Module.lengthBytesUTF8(config.model || '') + 1; | |
| const buffer = Module._malloc(n); | |
| const len = 1 * 4; // 1 pointer | |
| const ptr = Module._malloc(len); | |
| Module.stringToUTF8(config.model || '', buffer, n); | |
| Module.setValue(ptr, buffer, 'i8*'); | |
| return { | |
| buffer: buffer, ptr: ptr, len: len, | |
| } | |
| } | |
| function initSherpaOnnxOfflineNemoEncDecCtcModelConfig(config, Module) { | |
| const n = Module.lengthBytesUTF8(config.model || '') + 1; | |
| const buffer = Module._malloc(n); | |
| const len = 1 * 4; // 1 pointer | |
| const ptr = Module._malloc(len); | |
| Module.stringToUTF8(config.model || '', buffer, n); | |
| Module.setValue(ptr, buffer, 'i8*'); | |
| return { | |
| buffer: buffer, ptr: ptr, len: len, | |
| } | |
| } | |
| function initSherpaOnnxOfflineWhisperModelConfig(config, Module) { | |
| const encoderLen = Module.lengthBytesUTF8(config.encoder || '') + 1; | |
| const decoderLen = Module.lengthBytesUTF8(config.decoder || '') + 1; | |
| const languageLen = Module.lengthBytesUTF8(config.language || '') + 1; | |
| const taskLen = Module.lengthBytesUTF8(config.task || '') + 1; | |
| const n = encoderLen + decoderLen + languageLen + taskLen; | |
| const buffer = Module._malloc(n); | |
| const len = 5 * 4; // 4 pointers | |
| const ptr = Module._malloc(len); | |
| let offset = 0; | |
| Module.stringToUTF8(config.encoder || '', buffer + offset, encoderLen); | |
| offset += encoderLen; | |
| Module.stringToUTF8(config.decoder || '', buffer + offset, decoderLen); | |
| offset += decoderLen; | |
| Module.stringToUTF8(config.language || '', buffer + offset, languageLen); | |
| offset += languageLen; | |
| Module.stringToUTF8(config.task || '', buffer + offset, taskLen); | |
| offset = 0; | |
| Module.setValue(ptr, buffer + offset, 'i8*'); | |
| offset += encoderLen; | |
| Module.setValue(ptr + 4, buffer + offset, 'i8*'); | |
| offset += decoderLen; | |
| Module.setValue(ptr + 8, buffer + offset, 'i8*'); | |
| offset += languageLen; | |
| Module.setValue(ptr + 12, buffer + offset, 'i8*'); | |
| offset += taskLen; | |
| Module.setValue(ptr + 16, config.tailPaddings || 2000, 'i32'); | |
| return { | |
| buffer: buffer, ptr: ptr, len: len, | |
| } | |
| } | |
| function initSherpaOnnxOfflineTdnnModelConfig(config, Module) { | |
| const n = Module.lengthBytesUTF8(config.model || '') + 1; | |
| const buffer = Module._malloc(n); | |
| const len = 1 * 4; // 1 pointer | |
| const ptr = Module._malloc(len); | |
| Module.stringToUTF8(config.model || '', buffer, n); | |
| Module.setValue(ptr, buffer, 'i8*'); | |
| return { | |
| buffer: buffer, ptr: ptr, len: len, | |
| } | |
| } | |
| function initSherpaOnnxOfflineSenseVoiceModelConfig(config, Module) { | |
| const modelLen = Module.lengthBytesUTF8(config.model || '') + 1; | |
| const languageLen = Module.lengthBytesUTF8(config.language || '') + 1; | |
| // useItn is a integer with 4 bytes | |
| const n = modelLen + languageLen; | |
| const buffer = Module._malloc(n); | |
| const len = 3 * 4; // 2 pointers + 1 int | |
| const ptr = Module._malloc(len); | |
| let offset = 0; | |
| Module.stringToUTF8(config.model || '', buffer + offset, modelLen); | |
| offset += modelLen; | |
| Module.stringToUTF8(config.language || '', buffer + offset, languageLen); | |
| offset += languageLen; | |
| offset = 0; | |
| Module.setValue(ptr, buffer + offset, 'i8*'); | |
| offset += modelLen; | |
| Module.setValue(ptr + 4, buffer + offset, 'i8*'); | |
| offset += languageLen; | |
| Module.setValue(ptr + 8, config.useInverseTextNormalization || 0, 'i32'); | |
| return { | |
| buffer: buffer, ptr: ptr, len: len, | |
| } | |
| } | |
| function initSherpaOnnxOfflineLMConfig(config, Module) { | |
| const n = Module.lengthBytesUTF8(config.model || '') + 1; | |
| const buffer = Module._malloc(n); | |
| const len = 2 * 4; | |
| const ptr = Module._malloc(len); | |
| Module.stringToUTF8(config.model || '', buffer, n); | |
| Module.setValue(ptr, buffer, 'i8*'); | |
| Module.setValue(ptr + 4, config.scale || 1, 'float'); | |
| return { | |
| buffer: buffer, ptr: ptr, len: len, | |
| } | |
| } | |
| function initSherpaOnnxOfflineModelConfig(config, Module) { | |
| if (!('transducer' in config)) { | |
| config.transducer = { | |
| encoder: '', | |
| decoder: '', | |
| joiner: '', | |
| }; | |
| } | |
| if (!('paraformer' in config)) { | |
| config.paraformer = { | |
| model: '', | |
| }; | |
| } | |
| if (!('nemoCtc' in config)) { | |
| config.nemoCtc = { | |
| model: '', | |
| }; | |
| } | |
| if (!('whisper' in config)) { | |
| config.whisper = { | |
| encoder: '', | |
| decoder: '', | |
| language: '', | |
| task: '', | |
| tailPaddings: -1, | |
| }; | |
| } | |
| if (!('tdnn' in config)) { | |
| config.tdnn = { | |
| model: '', | |
| }; | |
| } | |
| if (!('senseVoice' in config)) { | |
| config.senseVoice = { | |
| model: '', | |
| language: '', | |
| useInverseTextNormalization: 0, | |
| }; | |
| } | |
| const transducer = | |
| initSherpaOnnxOfflineTransducerModelConfig(config.transducer, Module); | |
| const paraformer = | |
| initSherpaOnnxOfflineParaformerModelConfig(config.paraformer, Module); | |
| const nemoCtc = | |
| initSherpaOnnxOfflineNemoEncDecCtcModelConfig(config.nemoCtc, Module); | |
| const whisper = | |
| initSherpaOnnxOfflineWhisperModelConfig(config.whisper, Module); | |
| const tdnn = initSherpaOnnxOfflineTdnnModelConfig(config.tdnn, Module); | |
| const senseVoice = | |
| initSherpaOnnxOfflineSenseVoiceModelConfig(config.senseVoice, Module); | |
| const len = transducer.len + paraformer.len + nemoCtc.len + whisper.len + | |
| tdnn.len + 8 * 4 + senseVoice.len; | |
| const ptr = Module._malloc(len); | |
| let offset = 0; | |
| Module._CopyHeap(transducer.ptr, transducer.len, ptr + offset); | |
| offset += transducer.len; | |
| Module._CopyHeap(paraformer.ptr, paraformer.len, ptr + offset); | |
| offset += paraformer.len; | |
| Module._CopyHeap(nemoCtc.ptr, nemoCtc.len, ptr + offset); | |
| offset += nemoCtc.len; | |
| Module._CopyHeap(whisper.ptr, whisper.len, ptr + offset); | |
| offset += whisper.len; | |
| Module._CopyHeap(tdnn.ptr, tdnn.len, ptr + offset); | |
| offset += tdnn.len; | |
| const tokensLen = Module.lengthBytesUTF8(config.tokens || '') + 1; | |
| const providerLen = Module.lengthBytesUTF8(config.provider || 'cpu') + 1; | |
| const modelTypeLen = Module.lengthBytesUTF8(config.modelType || '') + 1; | |
| const modelingUnitLen = Module.lengthBytesUTF8(config.modelingUnit || '') + 1; | |
| const bpeVocabLen = Module.lengthBytesUTF8(config.bpeVocab || '') + 1; | |
| const teleSpeechCtcLen = | |
| Module.lengthBytesUTF8(config.teleSpeechCtc || '') + 1; | |
| const bufferLen = tokensLen + providerLen + modelTypeLen + modelingUnitLen + | |
| bpeVocabLen + teleSpeechCtcLen; | |
| const buffer = Module._malloc(bufferLen); | |
| offset = 0; | |
| Module.stringToUTF8(config.tokens, buffer, tokensLen); | |
| offset += tokensLen; | |
| Module.stringToUTF8(config.provider || 'cpu', buffer + offset, providerLen); | |
| offset += providerLen; | |
| Module.stringToUTF8(config.modelType || '', buffer + offset, modelTypeLen); | |
| offset += modelTypeLen; | |
| Module.stringToUTF8( | |
| config.modelingUnit || '', buffer + offset, modelingUnitLen); | |
| offset += modelingUnitLen; | |
| Module.stringToUTF8(config.bpeVocab || '', buffer + offset, bpeVocabLen); | |
| offset += bpeVocabLen; | |
| Module.stringToUTF8( | |
| config.teleSpeechCtc || '', buffer + offset, teleSpeechCtcLen); | |
| offset += teleSpeechCtcLen; | |
| offset = | |
| transducer.len + paraformer.len + nemoCtc.len + whisper.len + tdnn.len; | |
| Module.setValue(ptr + offset, buffer, 'i8*'); // tokens | |
| offset += 4; | |
| Module.setValue(ptr + offset, config.numThreads || 1, 'i32'); | |
| offset += 4; | |
| Module.setValue(ptr + offset, config.debug || 0, 'i32'); | |
| offset += 4; | |
| Module.setValue(ptr + offset, buffer + tokensLen, 'i8*'); // provider | |
| offset += 4; | |
| Module.setValue( | |
| ptr + offset, buffer + tokensLen + providerLen, 'i8*'); // modelType | |
| offset += 4; | |
| Module.setValue( | |
| ptr + offset, buffer + tokensLen + providerLen + modelTypeLen, | |
| 'i8*'); // modelingUnit | |
| offset += 4; | |
| Module.setValue( | |
| ptr + offset, | |
| buffer + tokensLen + providerLen + modelTypeLen + modelingUnitLen, | |
| 'i8*'); // bpeVocab | |
| offset += 4; | |
| Module.setValue( | |
| ptr + offset, | |
| buffer + tokensLen + providerLen + modelTypeLen + modelingUnitLen + | |
| bpeVocabLen, | |
| 'i8*'); // teleSpeechCtc | |
| offset += 4; | |
| Module._CopyHeap(senseVoice.ptr, senseVoice.len, ptr + offset); | |
| return { | |
| buffer: buffer, ptr: ptr, len: len, transducer: transducer, | |
| paraformer: paraformer, nemoCtc: nemoCtc, whisper: whisper, tdnn: tdnn, | |
| senseVoice: senseVoice, | |
| } | |
| } | |
| function initSherpaOnnxOfflineRecognizerConfig(config, Module) { | |
| if (!('featConfig' in config)) { | |
| config.featConfig = { | |
| sampleRate: 16000, | |
| featureDim: 80, | |
| }; | |
| } | |
| if (!('lmConfig' in config)) { | |
| config.lmConfig = { | |
| model: '', | |
| scale: 1.0, | |
| }; | |
| } | |
| const feat = initSherpaOnnxFeatureConfig(config.featConfig, Module); | |
| const model = initSherpaOnnxOfflineModelConfig(config.modelConfig, Module); | |
| const lm = initSherpaOnnxOfflineLMConfig(config.lmConfig, Module); | |
| const len = feat.len + model.len + lm.len + 7 * 4; | |
| const ptr = Module._malloc(len); | |
| let offset = 0; | |
| Module._CopyHeap(feat.ptr, feat.len, ptr + offset); | |
| offset += feat.len; | |
| Module._CopyHeap(model.ptr, model.len, ptr + offset); | |
| offset += model.len; | |
| Module._CopyHeap(lm.ptr, lm.len, ptr + offset); | |
| offset += lm.len; | |
| const decodingMethodLen = | |
| Module.lengthBytesUTF8(config.decodingMethod || 'greedy_search') + 1; | |
| const hotwordsFileLen = Module.lengthBytesUTF8(config.hotwordsFile || '') + 1; | |
| const ruleFstsLen = Module.lengthBytesUTF8(config.ruleFsts || '') + 1; | |
| const ruleFarsLen = Module.lengthBytesUTF8(config.ruleFars || '') + 1; | |
| const bufferLen = | |
| decodingMethodLen + hotwordsFileLen + ruleFstsLen + ruleFarsLen; | |
| const buffer = Module._malloc(bufferLen); | |
| offset = 0; | |
| Module.stringToUTF8( | |
| config.decodingMethod || 'greedy_search', buffer, decodingMethodLen); | |
| offset += decodingMethodLen; | |
| Module.stringToUTF8( | |
| config.hotwordsFile || '', buffer + offset, hotwordsFileLen); | |
| offset += hotwordsFileLen; | |
| Module.stringToUTF8(config.ruleFsts || '', buffer + offset, ruleFstsLen); | |
| offset += ruleFstsLen; | |
| Module.stringToUTF8(config.ruleFars || '', buffer + offset, ruleFarsLen); | |
| offset += ruleFarsLen; | |
| offset = feat.len + model.len + lm.len; | |
| Module.setValue(ptr + offset, buffer, 'i8*'); // decoding method | |
| offset += 4; | |
| Module.setValue(ptr + offset, config.maxActivePaths || 4, 'i32'); | |
| offset += 4; | |
| Module.setValue(ptr + offset, buffer + decodingMethodLen, 'i8*'); | |
| offset += 4; | |
| Module.setValue(ptr + offset, config.hotwordsScore || 1.5, 'float'); | |
| offset += 4; | |
| Module.setValue( | |
| ptr + offset, buffer + decodingMethodLen + hotwordsFileLen, 'i8*'); | |
| offset += 4; | |
| Module.setValue( | |
| ptr + offset, buffer + decodingMethodLen + hotwordsFileLen + ruleFstsLen, | |
| 'i8*'); | |
| offset += 4; | |
| Module.setValue(ptr + offset, config.blankPenalty || 0, 'float'); | |
| offset += 4; | |
| return { | |
| buffer: buffer, ptr: ptr, len: len, feat: feat, model: model, lm: lm | |
| } | |
| } | |
| class OfflineStream { | |
| constructor(handle, Module) { | |
| this.handle = handle; | |
| this.Module = Module; | |
| } | |
| free() { | |
| if (this.handle) { | |
| this.Module._SherpaOnnxDestroyOfflineStream(this.handle); | |
| this.handle = null; | |
| } | |
| } | |
| /** | |
| * @param sampleRate {Number} | |
| * @param samples {Float32Array} Containing samples in the range [-1, 1] | |
| */ | |
| acceptWaveform(sampleRate, samples) { | |
| const pointer = | |
| this.Module._malloc(samples.length * samples.BYTES_PER_ELEMENT); | |
| this.Module.HEAPF32.set(samples, pointer / samples.BYTES_PER_ELEMENT); | |
| this.Module._SherpaOnnxAcceptWaveformOffline( | |
| this.handle, sampleRate, pointer, samples.length); | |
| this.Module._free(pointer); | |
| } | |
| }; | |
| class OfflineRecognizer { | |
| constructor(configObj, Module) { | |
| this.config = configObj; | |
| const config = initSherpaOnnxOfflineRecognizerConfig(configObj, Module); | |
| const handle = Module._SherpaOnnxCreateOfflineRecognizer(config.ptr); | |
| freeConfig(config, Module); | |
| this.handle = handle; | |
| this.Module = Module; | |
| } | |
| free() { | |
| this.Module._SherpaOnnxDestroyOfflineRecognizer(this.handle); | |
| this.handle = 0 | |
| } | |
| createStream() { | |
| const handle = this.Module._SherpaOnnxCreateOfflineStream(this.handle); | |
| return new OfflineStream(handle, this.Module); | |
| } | |
| decode(stream) { | |
| this.Module._SherpaOnnxDecodeOfflineStream(this.handle, stream.handle); | |
| } | |
| getResult(stream) { | |
| const r = | |
| this.Module._SherpaOnnxGetOfflineStreamResultAsJson(stream.handle); | |
| const jsonStr = this.Module.UTF8ToString(r); | |
| const ans = JSON.parse(jsonStr); | |
| this.Module._SherpaOnnxDestroyOfflineStreamResultJson(r); | |
| return ans; | |
| } | |
| }; | |
| class OnlineStream { | |
| constructor(handle, Module) { | |
| this.handle = handle; | |
| this.pointer = null; // buffer | |
| this.n = 0; // buffer size | |
| this.Module = Module; | |
| } | |
| free() { | |
| if (this.handle) { | |
| this.Module._SherpaOnnxDestroyOnlineStream(this.handle); | |
| this.handle = null; | |
| this.Module._free(this.pointer); | |
| this.pointer = null; | |
| this.n = 0; | |
| } | |
| } | |
| /** | |
| * @param sampleRate {Number} | |
| * @param samples {Float32Array} Containing samples in the range [-1, 1] | |
| */ | |
| acceptWaveform(sampleRate, samples) { | |
| if (this.n < samples.length) { | |
| this.Module._free(this.pointer); | |
| this.pointer = | |
| this.Module._malloc(samples.length * samples.BYTES_PER_ELEMENT); | |
| this.n = samples.length | |
| } | |
| this.Module.HEAPF32.set(samples, this.pointer / samples.BYTES_PER_ELEMENT); | |
| this.Module._SherpaOnnxOnlineStreamAcceptWaveform( | |
| this.handle, sampleRate, this.pointer, samples.length); | |
| } | |
| inputFinished() { | |
| this.Module._SherpaOnnxOnlineStreamInputFinished(this.handle); | |
| } | |
| }; | |
| class OnlineRecognizer { | |
| constructor(configObj, Module) { | |
| this.config = configObj; | |
| const config = initSherpaOnnxOnlineRecognizerConfig(configObj, Module) | |
| const handle = Module._SherpaOnnxCreateOnlineRecognizer(config.ptr); | |
| freeConfig(config, Module); | |
| this.handle = handle; | |
| this.Module = Module; | |
| } | |
| free() { | |
| this.Module._SherpaOnnxDestroyOnlineRecognizer(this.handle); | |
| this.handle = 0 | |
| } | |
| createStream() { | |
| const handle = this.Module._SherpaOnnxCreateOnlineStream(this.handle); | |
| return new OnlineStream(handle, this.Module); | |
| } | |
| isReady(stream) { | |
| return this.Module._SherpaOnnxIsOnlineStreamReady( | |
| this.handle, stream.handle) == 1; | |
| } | |
| decode(stream) { | |
| this.Module._SherpaOnnxDecodeOnlineStream(this.handle, stream.handle); | |
| } | |
| isEndpoint(stream) { | |
| return this.Module._SherpaOnnxOnlineStreamIsEndpoint( | |
| this.handle, stream.handle) == 1; | |
| } | |
| reset(stream) { | |
| this.Module._SherpaOnnxOnlineStreamReset(this.handle, stream.handle); | |
| } | |
| getResult(stream) { | |
| const r = this.Module._SherpaOnnxGetOnlineStreamResultAsJson( | |
| this.handle, stream.handle); | |
| const jsonStr = this.Module.UTF8ToString(r); | |
| const ans = JSON.parse(jsonStr); | |
| this.Module._SherpaOnnxDestroyOnlineStreamResultJson(r); | |
| return ans; | |
| } | |
| } | |
| if (typeof process == 'object' && typeof process.versions == 'object' && | |
| typeof process.versions.node == 'string') { | |
| module.exports = { | |
| createOnlineRecognizer, | |
| OfflineRecognizer, | |
| }; | |
| } | |