|
import gradio as gr |
|
from transformers import AutoModelForTextToSpeech, AutoTokenizer |
|
import torch |
|
|
|
|
|
model = AutoModelForTextToSpeech.from_pretrained("parler-tts/parler_tts") |
|
tokenizer = AutoTokenizer.from_pretrained("parler-tts/parler_tts") |
|
|
|
|
|
def text_to_speech(text): |
|
inputs = tokenizer(text, return_tensors="pt") |
|
speech = model.generate(**inputs) |
|
|
|
|
|
audio = speech[0].cpu().detach().numpy() |
|
return 16000, audio |
|
|
|
|
|
interface = gr.Interface( |
|
fn=text_to_speech, |
|
inputs="text", |
|
outputs="audio", |
|
title="Text to Speech", |
|
description="Convert text to speech using the parler-tts/parler_tts model" |
|
) |
|
|
|
interface.launch() |
|
|