Edge-TTS-Text-to-Speech

Sleeping

File size: 4,962 Bytes

63f1d6d
 
 
 
 
299c2df
 
 
63f1d6d
299c2df
63f1d6d
 
 
 
299c2df
63f1d6d
 
8529fe9
63f1d6d
8529fe9
63f1d6d
 
 
 
 
 
 
 
 
 
299c2df
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8529fe9
299c2df
 
 
 
 
 
 
 
 
 
 
 
 
 
d81bde6
63f1d6d
 
 
d81bde6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63f1d6d
 
 
 
 
 
299c2df
 
63f1d6d
 
 
299c2df
63f1d6d
 
 
d81bde6
 
63f1d6d
4f5115c
 
63f1d6d
 
 
8529fe9
 
 
 
 
63f1d6d
299c2df

import gradio as gr
import edge_tts
import asyncio
import tempfile
import os
from pydub import AudioSegment
from pydub.playback import play
import math

# Función para obtener las voces disponibles
async def get_voices():
    voices = await edge_tts.list_voices()
    return {f"{v['ShortName']} - {v['Locale']} ({v['Gender']})": v['ShortName'] for v in voices}

# Función principal de conversión de texto a voz
async def text_to_speech(text, voice, rate, pitch):
    if not text.strip():
        return None, "Please enter text to convert."
    if not voice:
        return None, "Please select a voice."
    
    voice_short_name = voice.split(" - ")[0]
    rate_str = f"{rate:+d}%"
    pitch_str = f"{pitch:+d}Hz"
    communicate = edge_tts.Communicate(text, voice_short_name, rate=rate_str, pitch=pitch_str)
    with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
        tmp_path = tmp_file.name
        await communicate.save(tmp_path)
    return tmp_path, None

# Función para agregar el fondo musical al speech
def add_background_music(speech_file, background_music_file, output_file):
    # Cargar los archivos de audio
    speech = AudioSegment.from_mp3(speech_file)
    background_music = AudioSegment.from_mp3(background_music_file)

    # Ajustar el volumen del fondo musical al 15%
    background_music = background_music - 16  # Reducción aproximada para 15%

    # Repetir el fondo musical si es más corto que el speech
    if len(background_music) < len(speech):
        repetitions = math.ceil(len(speech) / len(background_music))
        background_music = background_music * repetitions

    # Cortar el fondo musical para que coincida con la duración del speech
    background_music = background_music[:len(speech)]

    # Superponer el speech y el fondo musical
    final_audio = speech.overlay(background_music)

    # Exportar el audio resultante
    final_audio.export(output_file, format="mp3")
    print(f"Archivo generado exitosamente: {output_file}")

# Interfaz Gradio
async def tts_interface(text, voice, rate, pitch, background_music):
    # Generar el speech
    speech_file, warning = await text_to_speech(text, voice, rate, pitch)
    if warning:
        return None, None, gr.Warning(warning)
    
    # Verificar si se proporcionó un archivo de fondo musical
    if background_music is None or background_music == "":
        return speech_file, None, None
    
    # Agregar el fondo musical
    with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
        output_file = tmp_file.name
        add_background_music(speech_file, background_music, output_file)
    
    # Eliminar el archivo temporal del speech original
    os.remove(speech_file)
    return output_file, None, None

async def create_demo():
    voices = await get_voices()
    
    description = """
    Convert text to speech using Microsoft Edge TTS. Adjust speech rate and pitch: 0 is default, positive values increase, negative values decrease.
    
    🎥 **Exciting News: Introducing our Text-to-Video Converter!** 🎥
    
    Take your content creation to the next level with our cutting-edge Text-to-Video Converter! 
    Transform your words into stunning, professional-quality videos in just a few clicks. 
    
    ✨ Features:
    • Convert text to engaging videos with customizable visuals
    • Choose from 40+ languages and 300+ voices
    • Perfect for creating audiobooks, storytelling, and language learning materials
    • Ideal for educators, content creators, and language enthusiasts
    
    Ready to revolutionize your content? [Click here to try our Text-to-Video Converter now!](https://text2video.wingetgui.com/)
    """
    
    demo = gr.Interface(
        fn=tts_interface,
        inputs=[
            gr.Textbox(label="Input Text", lines=5),
            gr.Dropdown(choices=[""] + list(voices.keys()), label="Select Voice", value=""),
            gr.Slider(minimum=-50, maximum=50, value=0, label="Speech Rate Adjustment (%)", step=1),
            gr.Slider(minimum=-20, maximum=20, value=0, label="Pitch Adjustment (Hz)", step=1),
            gr.Audio(label="Background Music", type="filepath", optional=True)
        ],
        outputs=[
            gr.Audio(label="Generated Audio", type="filepath"),
            gr.Image(label="Visualization", visible=False),
            gr.Markdown(label="Warning", visible=False)
        ],
        title="Edge TTS Text-to-Speech",
        description=description,
        article="Experience the power of Edge TTS for text-to-speech conversion, and explore our advanced Text-to-Video Converter for even more creative possibilities!",
        analytics_enabled=False,
        allow_flagging="manual",
        api_name=None
    )
    return demo

async def main():
    demo = await create_demo()
    demo.queue(default_concurrency_limit=5)
    demo.launch(show_api=False)

if __name__ == "__main__":
    asyncio.run(main())