File size: 4,962 Bytes
63f1d6d 299c2df 63f1d6d 299c2df 63f1d6d 299c2df 63f1d6d 8529fe9 63f1d6d 8529fe9 63f1d6d 299c2df 8529fe9 299c2df d81bde6 63f1d6d d81bde6 63f1d6d 299c2df 63f1d6d 299c2df 63f1d6d d81bde6 63f1d6d 4f5115c 63f1d6d 8529fe9 63f1d6d 299c2df |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 |
import gradio as gr
import edge_tts
import asyncio
import tempfile
import os
from pydub import AudioSegment
from pydub.playback import play
import math
# Función para obtener las voces disponibles
async def get_voices():
voices = await edge_tts.list_voices()
return {f"{v['ShortName']} - {v['Locale']} ({v['Gender']})": v['ShortName'] for v in voices}
# Función principal de conversión de texto a voz
async def text_to_speech(text, voice, rate, pitch):
if not text.strip():
return None, "Please enter text to convert."
if not voice:
return None, "Please select a voice."
voice_short_name = voice.split(" - ")[0]
rate_str = f"{rate:+d}%"
pitch_str = f"{pitch:+d}Hz"
communicate = edge_tts.Communicate(text, voice_short_name, rate=rate_str, pitch=pitch_str)
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
tmp_path = tmp_file.name
await communicate.save(tmp_path)
return tmp_path, None
# Función para agregar el fondo musical al speech
def add_background_music(speech_file, background_music_file, output_file):
# Cargar los archivos de audio
speech = AudioSegment.from_mp3(speech_file)
background_music = AudioSegment.from_mp3(background_music_file)
# Ajustar el volumen del fondo musical al 15%
background_music = background_music - 16 # Reducción aproximada para 15%
# Repetir el fondo musical si es más corto que el speech
if len(background_music) < len(speech):
repetitions = math.ceil(len(speech) / len(background_music))
background_music = background_music * repetitions
# Cortar el fondo musical para que coincida con la duración del speech
background_music = background_music[:len(speech)]
# Superponer el speech y el fondo musical
final_audio = speech.overlay(background_music)
# Exportar el audio resultante
final_audio.export(output_file, format="mp3")
print(f"Archivo generado exitosamente: {output_file}")
# Interfaz Gradio
async def tts_interface(text, voice, rate, pitch, background_music):
# Generar el speech
speech_file, warning = await text_to_speech(text, voice, rate, pitch)
if warning:
return None, None, gr.Warning(warning)
# Verificar si se proporcionó un archivo de fondo musical
if background_music is None or background_music == "":
return speech_file, None, None
# Agregar el fondo musical
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
output_file = tmp_file.name
add_background_music(speech_file, background_music, output_file)
# Eliminar el archivo temporal del speech original
os.remove(speech_file)
return output_file, None, None
async def create_demo():
voices = await get_voices()
description = """
Convert text to speech using Microsoft Edge TTS. Adjust speech rate and pitch: 0 is default, positive values increase, negative values decrease.
🎥 **Exciting News: Introducing our Text-to-Video Converter!** 🎥
Take your content creation to the next level with our cutting-edge Text-to-Video Converter!
Transform your words into stunning, professional-quality videos in just a few clicks.
✨ Features:
• Convert text to engaging videos with customizable visuals
• Choose from 40+ languages and 300+ voices
• Perfect for creating audiobooks, storytelling, and language learning materials
• Ideal for educators, content creators, and language enthusiasts
Ready to revolutionize your content? [Click here to try our Text-to-Video Converter now!](https://text2video.wingetgui.com/)
"""
demo = gr.Interface(
fn=tts_interface,
inputs=[
gr.Textbox(label="Input Text", lines=5),
gr.Dropdown(choices=[""] + list(voices.keys()), label="Select Voice", value=""),
gr.Slider(minimum=-50, maximum=50, value=0, label="Speech Rate Adjustment (%)", step=1),
gr.Slider(minimum=-20, maximum=20, value=0, label="Pitch Adjustment (Hz)", step=1),
gr.Audio(label="Background Music", type="filepath", optional=True)
],
outputs=[
gr.Audio(label="Generated Audio", type="filepath"),
gr.Image(label="Visualization", visible=False),
gr.Markdown(label="Warning", visible=False)
],
title="Edge TTS Text-to-Speech",
description=description,
article="Experience the power of Edge TTS for text-to-speech conversion, and explore our advanced Text-to-Video Converter for even more creative possibilities!",
analytics_enabled=False,
allow_flagging="manual",
api_name=None
)
return demo
async def main():
demo = await create_demo()
demo.queue(default_concurrency_limit=5)
demo.launch(show_api=False)
if __name__ == "__main__":
asyncio.run(main()) |