Creador-de-videos-con-imagen

Running

File size: 6,882 Bytes

import gradio as gr
import edge_tts
import asyncio
import tempfile
import os
import math
from pydub import AudioSegment
import subprocess

# Función para obtener voces disponibles
async def get_voices():
    voices = await edge_tts.list_voices()
    return {f"{v['ShortName']} - {v['Locale']} ({v['Gender']})": v['ShortName'] for v in voices}

# Conversión de texto a voz
async def text_to_speech(text, voice, rate, pitch):
    if not text.strip() or not voice:
        return (None, "Please enter text and select a voice") if not text else (None, "Please select a voice")

    try:
        communicate = edge_tts.Communicate(
            text,
            voice.split(" - ")[0],
            rate=f"{rate:+d}%",
            pitch=f"{pitch:+d}Hz"
        )
        
        with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
            await communicate.save(tmp_file.name)
            return tmp_file.name, None
    except Exception as e:
        return None, f"Speech generation failed: {str(e)}"

# Agregar música de fondo (ahora elimina el audio original)
def add_background_music(speech_path, bg_music_path):
    speech = AudioSegment.from_file(speech_path)
    background = AudioSegment.from_file(bg_music_path) - 16  # 15% volume
    
    if len(background) < len(speech) + 3000:
        background = background * math.ceil((len(speech)+3000)/len(background))
    
    combined = speech.overlay(background[:len(speech)])
    fade_out = background[len(speech):len(speech)+3000].fade_out(3000)
    final_audio = combined + fade_out
    
    with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
        final_audio.export(tmp_file.name, format="mp3")
        # Eliminar audio original
        if os.path.exists(speech_path):
            os.remove(speech_path)
        return tmp_file.name

# Procesar múltiples videos (ahora elimina archivos temporales)
def process_videos(audio_path, video_files):
    temp_files = []
    try:
        audio_duration = AudioSegment.from_file(audio_path).duration_seconds
        
        # Concatenar videos
        with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as concat_video:
            temp_files.append(concat_video.name)
            
            with tempfile.NamedTemporaryFile(mode="w", delete=False, suffix=".txt") as list_file:
                temp_files.append(list_file.name)
                list_file.write("\n".join([f"file '{v.name}'" for v in video_files]))
                list_file.close()
                
                subprocess.run([
                    "ffmpeg", "-y",
                    "-f", "concat",
                    "-safe", "0",
                    "-i", list_file.name,
                    "-c", "copy",
                    concat_video.name
                ], check=True)
            
        # Crear video final
        with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as final_video:
            subprocess.run([
                "ffmpeg", "-y",
                "-stream_loop", "-1",
                "-i", concat_video.name,
                "-i", audio_path,
                "-t", str(audio_duration + 3),
                "-c:v", "libx264",
                "-c:a", "aac",
                "-vf", "fade=t=out:st={}:d=3".format(audio_duration),
                "-af", "afade=t=out:st={}:d=3".format(audio_duration),
                "-shortest",
                final_video.name
            ], check=True)
            
            return final_video.name
            
    finally:
        # Eliminar archivos temporales
        for f in temp_files:
            if os.path.exists(f):
                os.remove(f)

# Función principal (ahora elimina videos originales)
async def tts_interface(text, voice, rate, pitch, bg_music, video_files):
    temp_audio = None
    try:
        # Generar audio principal
        temp_audio, warning = await text_to_speech(text, voice, rate, pitch)
        if warning:
            return None, None, gr.Warning(warning)
        
        # Agregar música de fondo
        if bg_music:
            temp_audio = add_background_music(temp_audio, bg_music)
        
        # Procesar videos
        video_path = None
        if video_files:
            video_path = process_videos(temp_audio, video_files)
            # Eliminar videos originales subidos
            for video in video_files:
                if hasattr(video, 'name') and os.path.exists(video.name):
                    os.remove(video.name)
        
        return temp_audio, video_path, None
    
    except Exception as e:
        return None, None, gr.Warning(f"Processing error: {str(e)}")
    finally:
        # Eliminar audio temporal si existe y no es la salida final
        if temp_audio and os.path.exists(temp_audio):
            try:
                if video_path and temp_audio != video_path:
                    os.remove(temp_audio)
            except:  # Evitar errores si el archivo ya fue eliminado
                pass

# Crear interfaz (sin cambios)
async def create_demo():
    voices = await get_voices()

    demo = gr.Interface(
        fn=tts_interface,
        inputs=[
            gr.Textbox(label="Input Text", lines=5, placeholder="Enter your text here..."),
            gr.Dropdown(choices=[""] + list(voices.keys()), label="Select Voice"),
            gr.Slider(-50, 50, 0, label="Speech Rate (%)"),
            gr.Slider(-20, 20, 0, label="Pitch (Hz)"),
            gr.Audio(label="Background Music", type="filepath"),
            gr.File(label="Upload Videos", file_types=[".mp4", ".mov"], file_count="multiple")
        ],
        outputs=[
            gr.Audio(label="Generated Audio", type="filepath"),
            gr.Video(label="Final Video"),
            gr.Markdown(visible=False)
        ],
        title="Multi-Video TTS con Bucle",
description="""
Este script permite crear videos personalizados combinando texto, audio y múltiples clips de video. 
Convierte texto en voz usando tecnología avanzada de síntesis de voz (Text-to-Speech), 
opcionalmente añade música de fondo para enriquecer el audio generado y procesa varios videos subidos por el usuario 
para reproducirlos en secuencia y en bucle infinito. 
El resultado final es un video que sincroniza el audio con la concatenación de los clips, 
asegurando una transición suave entre ellos y un fade-out al final de cada ciclo. Además, el script está diseñado para 
limpiar automáticamente los archivos temporales y los videos originales subidos, evitando acumulación innecesaria en el servidor. 
Es ideal para generar contenido dinámico como videos motivacionales, presentaciones automáticas o material promocional.
""",
css="#component-0 {max-width: 800px}"
    )
    return demo

async def main():
    demo = await create_demo()
    demo.queue()
    demo.launch()

if __name__ == "__main__":
    asyncio.run(main())