import torch from TTS.api import TTS import gradio as gr import soundfile as sf # Get device device = "cuda" if torch.cuda.is_available() else "cpu" # Load the model from the local path tts = TTS(model_path="models/xtts_v2/model_file.pth", config_path="models/xtts_v2/config.json").to(device) # Define available languages AVAILABLE_LANGUAGES = ["en", "de", "fr", "it", "es", "tr"] # Function to generate cloned voice def clone_voice(text, language, speaker_wav): if language not in AVAILABLE_LANGUAGES: raise ValueError(f"Language '{language}' is not supported. Available languages: {AVAILABLE_LANGUAGES}") wav = tts.tts(text=text, speaker_wav=speaker_wav, language=language) output_file = "output.wav" sf.write(output_file, wav, 22050) return output_file # Gradio interface def gradio_interface(text, language, speaker_wav): try: output_file = clone_voice(text, language, speaker_wav) return output_file except Exception as e: return str(e) # Define Gradio inputs and outputs inputs = [ gr.Textbox(label="Text to speak", placeholder="Enter text here..."), gr.Dropdown(label="Language", choices=AVAILABLE_LANGUAGES, value="en"), gr.Audio(label="Reference Voice (Upload or Record)", type="filepath"), ] outputs = gr.Audio(label="Cloned Voice Output") # Create Gradio interface interface = gr.Interface( fn=gradio_interface, inputs=inputs, outputs=outputs, title="Voice Cloning with Coqui TTS", description="Upload or record a reference voice, enter text, and select a language to generate a cloned voice.", live=False, ) # Launch the interface interface.launch()