TTS_PT

Sleeping

File size: 4,560 Bytes

b59e10d
 
 
 
 
69aeb7e
b59e10d
 
69aeb7e
 
 
a295d2a
 
 
 
 
 
 
 
 
b59e10d
69aeb7e
b59e10d
69aeb7e
 
b59e10d
 
69aeb7e
b59e10d
bd8bee9
 
b59e10d
 
69aeb7e
 
 
a295d2a
69aeb7e
bd8bee9
 
69aeb7e
b59e10d
69aeb7e
 
 
b59e10d
 
 
 
 
69aeb7e
b59e10d
 
 
 
 
 
 
 
 
 
 
 
bd8bee9
69aeb7e
 
 
 
 
bd8bee9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9c6803e
bd8bee9
 
9c6803e
bd8bee9
 
 
 
 
 
 
 
 
 
 
 
 
9c6803e
bd8bee9
 
 
9c6803e
bd8bee9

import gradio as gr
import tempfile
from TTS.utils.synthesizer import Synthesizer
from huggingface_hub import hf_hub_download

# Repositório do modelo no Hugging Face Hub
REPO_ID = "mbarnig/lb-de-fr-en-pt-coqui-vits-tts"

# Configurações da interface
my_title = "🇵🇹 Sintetizador de Fala em Português com Coqui TTS"
my_description = "Um sintetizador de fala em português baseado no modelo YourTTS da Coqui.ai. Insira o texto e gere o áudio!"

# Texto de exemplo com o máximo de tokens (500 tokens)
max_tokens_text = """
O vento norte e o Sol discutiam quem era o mais forte, quando surgiu um viajante envolvido numa capa. 
O vento começou a soprar com toda a força, mas quanto mais soprava, mais o viajante se enrolava em sua capa. 
Então, o Sol começou a brilhar suavemente, e o viajante, sentindo o calor, logo tirou a capa. 
Assim, o Sol provou que o calor e a gentileza são mais eficazes do que a força bruta. 
Esta história nos ensina que, muitas vezes, a delicadeza e a paciência são mais poderosas do que a agressividade. 
"""

# Vozes disponíveis para português
TTS_VOICES = [
    "Ed",  # Voz masculina
    "Linda"  # Voz feminina
]

# Exemplo de uso
my_examples = [
    [max_tokens_text, "Ed"],
    [max_tokens_text, "Linda"]
]

# Artigo com informações adicionais
my_article = """
<h3>Guia do Usuário</h3>
<p>1. Insira o texto em português no campo de entrada (até 500 tokens).</p>
<p>2. Selecione a voz desejada (masculina ou feminina).</p>
<p>3. Clique em "Gerar Áudio" para criar o áudio correspondente.</p>
<p>4. Ouça o áudio gerado ou faça o download diretamente do player.</p>
"""

# Função para sintetizar a fala
def tts(text: str, speaker_idx: str):
    best_model_path = hf_hub_download(repo_id=REPO_ID, filename="best_model.pth")
    config_path = hf_hub_download(repo_id=REPO_ID, filename="config.json")
    speakers_path = hf_hub_download(repo_id=REPO_ID, filename="speakers.pth")
    languages_path = hf_hub_download(repo_id=REPO_ID, filename="language_ids.json")
    speaker_encoder_model_path = hf_hub_download(repo_id=REPO_ID, filename="model_se.pth")
    speaker_encoder_config_path = hf_hub_download(repo_id=REPO_ID, filename="config_se.json")

    synthesizer = Synthesizer(
        best_model_path,
        config_path,
        speakers_path,
        languages_path,
        None,
        None,
        speaker_encoder_model_path,
        speaker_encoder_config_path,
        False
    )

    wavs = synthesizer.tts(text, speaker_idx, "Português")

    with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
        synthesizer.save_wav(wavs, fp)
    return fp.name

# Criar a interface Gradio aprimorada
with gr.Blocks(title=my_title, css="""
    .gradio-container {
        max-width: 800px; 
        margin: auto; 
        font-family: 'Arial', sans-serif;
    }
    .header { 
        text-align: center; 
        margin-bottom: 20px; 
    }
    .footer {
        font-size: 0.85rem; 
        color: gray; 
        text-align: center; 
        margin-top: 30px; 
    }
""") as demo:
    # Cabeçalho
    with gr.Row(variant="compact"):
        gr.Markdown(f"<h1 class='header'>{my_title}</h1>")
    gr.Markdown(my_description)

    # Entrada do usuário
    with gr.Row():
        with gr.Column(scale=1):
            text_input = gr.Textbox(
                lines=10, 
                label="Texto em Português", 
                placeholder="Insira o texto aqui... (até 500 tokens)",
                max_length=500
            )
            token_counter = gr.Label(value="Tokens usados: 0 / 500", label="Progresso")
            voice_selector = gr.Radio(
                label="Voz", 
                choices=TTS_VOICES, 
                value="Ed"
            )
            submit_button = gr.Button("Gerar Áudio", variant="primary")
        with gr.Column(scale=1):
            audio_output = gr.Audio(type="filepath", label="Áudio Gerado", interactive=False)
            download_button = gr.File(label="Download do Áudio")
            gr.Markdown(my_article)

    # Exemplo de uso
    gr.Examples(
        examples=my_examples, 
        inputs=[text_input, voice_selector], 
        outputs=[audio_output],
        cache_examples=True
    )

    # Feedback e progresso
    submit_button.click(
        fn=tts, 
        inputs=[text_input, voice_selector], 
        outputs=[audio_output],
        show_progress=True
    )

    # Rodapé
    gr.Markdown("<p class='footer'>Desenvolvido com ❤️ usando Gradio e Coqui TTS</p>")

# Iniciar
demo.launch(share=True)