File size: 6,882 Bytes
800530a 63f1d6d 800530a 299c2df 2e5ba54 4aea5de 800530a 2e5ba54 800530a 2e5ba54 63f1d6d 011525c 55cc4ac 8676795 011525c 8676795 011525c 8676795 011525c 0945a8f 011525c 0945a8f 011525c 299c2df 0945a8f 011525c 0945a8f 011525c 0945a8f 011525c 2e5ba54 011525c 2e5ba54 011525c 0945a8f 2e5ba54 0945a8f 011525c 0945a8f 2e5ba54 0945a8f 011525c 0945a8f 011525c 2e5ba54 0945a8f 2e5ba54 0945a8f 011525c 0945a8f 011525c 2e5ba54 011525c 0945a8f d81bde6 0945a8f 63f1d6d 55cc4ac 63f1d6d 011525c 800530a 209bf9f 63f1d6d 800530a 011525c 63f1d6d a241f1d d068ede 011525c d068ede |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 |
import gradio as gr
import edge_tts
import asyncio
import tempfile
import os
import math
from pydub import AudioSegment
import subprocess
# Función para obtener voces disponibles
async def get_voices():
voices = await edge_tts.list_voices()
return {f"{v['ShortName']} - {v['Locale']} ({v['Gender']})": v['ShortName'] for v in voices}
# Conversión de texto a voz
async def text_to_speech(text, voice, rate, pitch):
if not text.strip() or not voice:
return (None, "Please enter text and select a voice") if not text else (None, "Please select a voice")
try:
communicate = edge_tts.Communicate(
text,
voice.split(" - ")[0],
rate=f"{rate:+d}%",
pitch=f"{pitch:+d}Hz"
)
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
await communicate.save(tmp_file.name)
return tmp_file.name, None
except Exception as e:
return None, f"Speech generation failed: {str(e)}"
# Agregar música de fondo (ahora elimina el audio original)
def add_background_music(speech_path, bg_music_path):
speech = AudioSegment.from_file(speech_path)
background = AudioSegment.from_file(bg_music_path) - 16 # 15% volume
if len(background) < len(speech) + 3000:
background = background * math.ceil((len(speech)+3000)/len(background))
combined = speech.overlay(background[:len(speech)])
fade_out = background[len(speech):len(speech)+3000].fade_out(3000)
final_audio = combined + fade_out
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
final_audio.export(tmp_file.name, format="mp3")
# Eliminar audio original
if os.path.exists(speech_path):
os.remove(speech_path)
return tmp_file.name
# Procesar múltiples videos (ahora elimina archivos temporales)
def process_videos(audio_path, video_files):
temp_files = []
try:
audio_duration = AudioSegment.from_file(audio_path).duration_seconds
# Concatenar videos
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as concat_video:
temp_files.append(concat_video.name)
with tempfile.NamedTemporaryFile(mode="w", delete=False, suffix=".txt") as list_file:
temp_files.append(list_file.name)
list_file.write("\n".join([f"file '{v.name}'" for v in video_files]))
list_file.close()
subprocess.run([
"ffmpeg", "-y",
"-f", "concat",
"-safe", "0",
"-i", list_file.name,
"-c", "copy",
concat_video.name
], check=True)
# Crear video final
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as final_video:
subprocess.run([
"ffmpeg", "-y",
"-stream_loop", "-1",
"-i", concat_video.name,
"-i", audio_path,
"-t", str(audio_duration + 3),
"-c:v", "libx264",
"-c:a", "aac",
"-vf", "fade=t=out:st={}:d=3".format(audio_duration),
"-af", "afade=t=out:st={}:d=3".format(audio_duration),
"-shortest",
final_video.name
], check=True)
return final_video.name
finally:
# Eliminar archivos temporales
for f in temp_files:
if os.path.exists(f):
os.remove(f)
# Función principal (ahora elimina videos originales)
async def tts_interface(text, voice, rate, pitch, bg_music, video_files):
temp_audio = None
try:
# Generar audio principal
temp_audio, warning = await text_to_speech(text, voice, rate, pitch)
if warning:
return None, None, gr.Warning(warning)
# Agregar música de fondo
if bg_music:
temp_audio = add_background_music(temp_audio, bg_music)
# Procesar videos
video_path = None
if video_files:
video_path = process_videos(temp_audio, video_files)
# Eliminar videos originales subidos
for video in video_files:
if hasattr(video, 'name') and os.path.exists(video.name):
os.remove(video.name)
return temp_audio, video_path, None
except Exception as e:
return None, None, gr.Warning(f"Processing error: {str(e)}")
finally:
# Eliminar audio temporal si existe y no es la salida final
if temp_audio and os.path.exists(temp_audio):
try:
if video_path and temp_audio != video_path:
os.remove(temp_audio)
except: # Evitar errores si el archivo ya fue eliminado
pass
# Crear interfaz (sin cambios)
async def create_demo():
voices = await get_voices()
demo = gr.Interface(
fn=tts_interface,
inputs=[
gr.Textbox(label="Input Text", lines=5, placeholder="Enter your text here..."),
gr.Dropdown(choices=[""] + list(voices.keys()), label="Select Voice"),
gr.Slider(-50, 50, 0, label="Speech Rate (%)"),
gr.Slider(-20, 20, 0, label="Pitch (Hz)"),
gr.Audio(label="Background Music", type="filepath"),
gr.File(label="Upload Videos", file_types=[".mp4", ".mov"], file_count="multiple")
],
outputs=[
gr.Audio(label="Generated Audio", type="filepath"),
gr.Video(label="Final Video"),
gr.Markdown(visible=False)
],
title="Multi-Video TTS con Bucle",
description="""
Este script permite crear videos personalizados combinando texto, audio y múltiples clips de video.
Convierte texto en voz usando tecnología avanzada de síntesis de voz (Text-to-Speech),
opcionalmente añade música de fondo para enriquecer el audio generado y procesa varios videos subidos por el usuario
para reproducirlos en secuencia y en bucle infinito.
El resultado final es un video que sincroniza el audio con la concatenación de los clips,
asegurando una transición suave entre ellos y un fade-out al final de cada ciclo. Además, el script está diseñado para
limpiar automáticamente los archivos temporales y los videos originales subidos, evitando acumulación innecesaria en el servidor.
Es ideal para generar contenido dinámico como videos motivacionales, presentaciones automáticas o material promocional.
""",
css="#component-0 {max-width: 800px}"
)
return demo
async def main():
demo = await create_demo()
demo.queue()
demo.launch()
if __name__ == "__main__":
asyncio.run(main()) |