SeyedAli's picture
Update app.py
e82e1d5
raw
history blame
700 Bytes
import tempfile ,os
import gradio as gr
from transformers import VitsModel, AutoTokenizer,pipeline
import torch
import numpy as np
import torchaudio
def TTS(text):
model = VitsModel.from_pretrained("SeyedAli/Persian-Speech-synthesis")
tokenizer = AutoTokenizer.from_pretrained("SeyedAli/Persian-Speech-synthesis")
inputs = tokenizer(text, return_tensors="pt")
pipe = pipeline("text-to-speech", model=model,tokenizer=tokenizer)
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
torchaudio.save(fp, pipe(text)['audio'], pipe(text)['sampling_rate'])
return fp.name
iface = gr.Interface(fn=TTS, inputs="text", outputs="audio")
iface.launch(share=False)