File size: 1,088 Bytes
df165b9 03088e5 df165b9 03088e5 df165b9 03088e5 df165b9 03088e5 df165b9 03088e5 df165b9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 |
import gradio as gr
from transformers import SpeechT5ForTextToSpeech, SpeechT5Processor
import torch
import torchaudio
import tempfile
# Load model and processor
processor = SpeechT5Processor.from_pretrained("microsoft/speecht5_tts")
model = SpeechT5ForTextToSpeech.from_pretrained("microsoft/speecht5_tts")
# Load a voice embedding (necessary for the SpeechT5 model)
speaker_embedding, _ = torchaudio.load("https://huggingface.co/microsoft/speecht5_tts/blob/main/speaker_embeddings/english/vctk_speaker_0.pt")
def text_to_speech(text):
inputs = processor(text, return_tensors="pt")
speech = model.generate_speech(inputs["input_ids"], speaker_embedding)
# Save the output to a temporary file
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as f:
torchaudio.save(f.name, speech, 16000)
return f.name
# Gradio interface
interface = gr.Interface(
fn=text_to_speech,
inputs="text",
outputs="audio",
title="Text to Speech",
description="Convert text to speech using the microsoft/speecht5_tts model"
)
interface.launch()
|