SeyedAli's picture
Update app.py
16803e6
raw
history blame
712 Bytes
import tempfile ,os
import gradio as gr
from transformers import VitsModel, AutoTokenizer,pipeline
import torch
import numpy as np
import scipy
def TTS(text):
model = VitsModel.from_pretrained("SeyedAli/Persian-Speech-synthesis")
tokenizer = AutoTokenizer.from_pretrained("SeyedAli/Persian-Speech-synthesis")
inputs = tokenizer(text, return_tensors="pt")
pipe = pipeline("text-to-speech", model=model,tokenizer=tokenizer)
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
scipy.io.wavfile.write(fp, rate=pipe(text)['sampling_rate'], data=pipe(text)['audio'])
return fp.name
iface = gr.Interface(fn=TTS, inputs="text", outputs="text")
iface.launch(share=False)