import sys import os from fastapi import Request # By using XTTS you agree to CPML license https://coqui.ai/cpml os.environ["COQUI_TOS_AGREED"] = "1" import gradio as gr from TTS.api import TTS from TTS.utils.manage import ModelManager model_names = TTS().list_models() print(model_names.__dict__) print(model_names.__dir__()) model_name = "tts_models/multilingual/multi-dataset/xtts_v2" #m = ModelManager().download_model(model_name) #print(m) m = model_name tts = TTS(model_name, gpu=False) tts.to("cpu") # no GPU or Amd #tts.to("cuda") # cuda only def predict(prompt, language, audio_file_pth, mic_file_path, use_mic, agree, request: gr.Request): return (None, None) title = "XTTS Glz's remake (Fonctional Text-2-Speech)" description = "" article = "" examples = [ ] gr.Interface( fn=predict, inputs=[ gr.Textbox( label="Text Prompt", info="One or two sentences at a time is better", value="Hello, World !, here is an example of light voice cloning. Try to upload your best audio samples quality", ), gr.Dropdown( label="Language", info="Select an output language for the synthesised speech", choices=[ "en", "es", "fr", "de", "it", "pt", "pl", "tr", "ru", "nl", "cs", "ar", "zh-cn", ], max_choices=1, value="en", ), gr.Audio( label="Reference Audio", #info="Click on the ✎ button to upload your own target speaker audio", type="filepath", value="examples/female.wav", ), gr.Audio(sources=["microphone"], type="filepath", #info="Use your microphone to record audio", label="Use Microphone for Reference"), gr.Checkbox(label="Check to use Microphone as Reference", value=False, info="Notice: Microphone input may not work properly under traffic",), gr.Checkbox( label="Agree", value=True, info="I agree to the terms of the Coqui Public Model License at https://coqui.ai/cpml", ), ], outputs=[ gr.Video(label="Waveform Visual"), gr.Audio(label="Synthesised Audio"), ], title=title, description=description, article=article, examples=examples, ).queue().launch(debug=True)