<html> | |
<head> | |
<script type="module" crossorigin src="https://cdn.jsdelivr.net/npm/@gradio/lite/dist/lite.js"></script> | |
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@gradio/lite/dist/lite.css" /> | |
</head> | |
<body> | |
<gradio-lite> | |
<gradio-requirements> | |
transformers_js_py | |
</gradio-requirements> | |
<gradio-file name="app.py" entrypoint> | |
from transformers_js_py import import_transformers_js | |
import gradio as gr | |
import numpy as np | |
transformers_js = await import_transformers_js("3.0.0") | |
pipeline = transformers_js.pipeline | |
synthesizer = await pipeline( | |
'text-to-speech', | |
'Xenova/speecht5_tts', | |
{ "quantized": False } | |
) | |
speaker_embeddings = 'https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/speaker_embeddings.bin'; | |
async def synthesize(text): | |
out = await synthesizer(text, { "speaker_embeddings": speaker_embeddings }); | |
audio_data_memory_view = out["audio"] | |
sampling_rate = out["sampling_rate"] | |
audio_data = np.frombuffer(audio_data_memory_view, dtype=np.float32) | |
audio_data_16bit = (audio_data * 32767).astype(np.int16) | |
return sampling_rate, audio_data_16bit | |
demo = gr.Interface(synthesize, "textbox", "audio") | |
demo.launch() | |
</gradio-file> | |
</gradio-lite> | |
</body> | |
</html> | |