File size: 6,357 Bytes
0fcbf28 3eeee98 1d88bdc 3eeee98 5d2461b 3eeee98 aefaec7 3eeee98 aefaec7 3eeee98 9dc01a4 3eeee98 9dc01a4 3eeee98 9dc01a4 3eeee98 6be5772 3eeee98 6be5772 5d2461b 3eeee98 5d2461b 3eeee98 5d2461b aefaec7 278802e 5d2461b 3eeee98 cb8e9a3 3eeee98 5d2461b 1d88bdc 3eeee98 0fcbf28 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 |
<!doctype html>
<html lang="en">
<head>
<meta name="viewport" content="width=device-width" />
<link rel="stylesheet" href="style.css" />
<meta charset="UTF-8">
<title>Match-TTS Onnx En001-English</title>
</head>
<body>
<h1>Match-TTS Onnx En001-English(Faster GPU Version)</h1>
<div>this example using Quantized version(lowquality and slow) because of Github Page 100MB limitation</div>
<p><a href = "https://huggingface.co/Akjava/matcha_tts_common_voice_01_en_001">common_voice_01_en_001</a> - my trained model.you can create too!</p>
<br>
<script src="https://cdn.jsdelivr.net/npm/onnxruntime-web/dist/ort.webgpu.min.js" ></script>
<script type="module">
import { MatchaTTSRaw } from "./js-esm/matcha_tts_raw.js";
import { webWavPlay } from "./js-esm/web_wav_play.js";
import { arpa_to_ipa } from "./js-esm/arpa_to_ipa.js";
import { loadCmudict } from "./js-esm/cmudict_loader.js";
import { env,textToArpa} from "./js-esm/text_to_arpa.js";
env.allowLocalModels = true;
env.localModelPath = "./models/";
env.backends.onnx.logLevel = "error";
let matcha_tts_raw
let cmudict ={}
let speaking = false
let total_infer_time=0
let count_infer=0
async function main() {
if (speaking){
console.log("speaking return")
}
speaking = true
console.log("main called")
if(!matcha_tts_raw){
matcha_tts_raw = new MatchaTTSRaw()
console.time("load model");
await matcha_tts_raw.load_model('./models/matcha-tts/en001_ep6399_univ_simplify_q8.onnx',{ executionProviders: ['webgpu','wasm'] });
console.timeEnd("load model");
let cmudictReady = loadCmudict(cmudict,'./dictionaries/cmudict-0.7b')
await cmudictReady
}else{
console.log("session exist skip load model")
}
const startTime = performance.now();
const text = document.getElementById('textInput').value
console.log("### textToArpa call")
const arpa_text = await textToArpa(cmudict,text)
console.log("### arpa returned")
const ipa_text = arpa_to_ipa(arpa_text).replace(/\s/g, "");
//console.log(ipa_text)
const spks = 0
const speed = document.getElementById('speed').value
const tempature = document.getElementById('temperature').value
console.time("infer");
const result = await matcha_tts_raw.infer(ipa_text, tempature, speed,spks);
if (result!=null){
console.timeEnd("infer");
const endTime = performance.now();
const infer_time = endTime-startTime
total_infer_time+=infer_time
count_infer += 1
update_infer_bench1()
webWavPlay(result)
}
speaking = false
}
function update_infer_bench1(){
const avg = total_infer_time/count_infer
const text = `Infer Count ${count_infer} avg infer-time ${avg.toFixed(2)} ms`;
document.getElementById('result').innerText=text
}
function update_range(){
const value = document.getElementById('spks').value
let formattedNumber = value.toString().padStart(3, '0');
document.getElementById('spks_label').textContent = formattedNumber
}
function update_range2(){
const value = document.getElementById('temperature').value
//let formattedNumber = value.toString().padStart(3, '0');
document.getElementById('tempature_label').textContent = value//formattedNumber
}
function update_range3(){
const value = document.getElementById('speed').value
//let formattedNumber = value.toString().padStart(3, '0');
document.getElementById('speed_label').textContent = value//sformattedNumber
}
window.onload = async function(){
//document.getElementById('textInput').onchange = main;
document.getElementById('myButton').onclick = main;
document.getElementById('temperature').onchange = update_range2
document.getElementById('speed').onchange = update_range3
}
</script>
<div id="result"></div>
<br><br>
<input type="text" id="textInput" value ="Hello Huggingface." placeholder="Enter some text here...">
<button id="myButton">Text To Speak</button><br>
<label for ="temperature" style="width: 110px;display: inline-block;">Temperature</label>
<input type="range" id="temperature" min="0" max="1.0" value="0.5" step="0.1"/>
<label for ="temperature" id="tempature_label">0.5</label><br>
<label for ="speed" style="width: 110px;display: inline-block;">Speed</label>
<input type="range" id="speed" min="0.1" max="2.0" value="1.0" step="0.1"/>
<label for ="speed" id="speed_label">1.0</label>
<br>
<br>
<div id="footer">
<b>Credits</b><br>
<a href="https://github.com/akjava/Matcha-TTS-Japanese" style="font-size: 9px" target="link">Matcha-TTS-Japanese</a> |
<a href = "http://www.udialogue.org/download/cstr-vctk-corpus.html" style="font-size: 9px" target="link">CSTR VCTK Corpus</a> |
<a href = "https://github.com/cmusphinx/cmudict" style="font-size: 9px" target="link">CMUDict</a> |
<a href = "https://huggingface.co/docs/transformers.js/index" style="font-size: 9px" target="link">Transformer.js</a> |
<a href = "https://huggingface.co/cisco-ai/mini-bart-g2p" style="font-size: 9px" target="link">mini-bart-g2p</a> |
<a href = "https://onnxruntime.ai/docs/get-started/with-javascript/web.html" style="font-size: 9px" target="link">ONNXRuntime-Web</a> |
<a href = "https://github.com/akjava/English-To-IPA-Collections" style="font-size: 9px" target="link">English-To-IPA-Collections</a> |
<a href ="https://huggingface.co/papers/2309.03199" style="font-size: 9px" target="link">Matcha-TTS Paper</a>
</div>
</body>
</html>
|