tts / app.py
Naksh786's picture
Update app.py
03088e5 verified
raw
history blame
1.09 kB
import gradio as gr
from transformers import SpeechT5ForTextToSpeech, SpeechT5Processor
import torch
import torchaudio
import tempfile
# Load model and processor
processor = SpeechT5Processor.from_pretrained("microsoft/speecht5_tts")
model = SpeechT5ForTextToSpeech.from_pretrained("microsoft/speecht5_tts")
# Load a voice embedding (necessary for the SpeechT5 model)
speaker_embedding, _ = torchaudio.load("https://huggingface.co/microsoft/speecht5_tts/blob/main/speaker_embeddings/english/vctk_speaker_0.pt")
def text_to_speech(text):
inputs = processor(text, return_tensors="pt")
speech = model.generate_speech(inputs["input_ids"], speaker_embedding)
# Save the output to a temporary file
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as f:
torchaudio.save(f.name, speech, 16000)
return f.name
# Gradio interface
interface = gr.Interface(
fn=text_to_speech,
inputs="text",
outputs="audio",
title="Text to Speech",
description="Convert text to speech using the microsoft/speecht5_tts model"
)
interface.launch()