import gradio as gr from transformers import AutoModelForTextToSpeech, AutoTokenizer import torch # Load the model and tokenizer model = AutoModelForTextToSpeech.from_pretrained("parler-tts/parler_tts") tokenizer = AutoTokenizer.from_pretrained("parler-tts/parler_tts") # Define the TTS function def text_to_speech(text): inputs = tokenizer(text, return_tensors="pt") speech = model.generate(**inputs) # Convert to an audio format, e.g., 16kHz waveform audio = speech[0].cpu().detach().numpy() return 16000, audio # Sample rate, waveform # Gradio interface interface = gr.Interface( fn=text_to_speech, inputs="text", outputs="audio", title="Text to Speech", description="Convert text to speech using the parler-tts/parler_tts model" ) interface.launch()