Spaces:

gnosticdev
/

Creador-de-videos-con-imagen

Running

File size: 5,444 Bytes

import gradio as gr
import edge_tts
import asyncio
import tempfile
import os
import math
from pydub import AudioSegment
import subprocess

# Función para obtener voces disponibles
async def get_voices():
    voices = await edge_tts.list_voices()
    return {f"{v['ShortName']} - {v['Locale']} ({v['Gender']})": v['ShortName'] for v in voices}

# Conversión de texto a voz
async def text_to_speech(text, voice, rate, pitch):
    if not text.strip() or not voice:
        return (None, "Please enter text and select a voice") if not text else (None, "Please select a voice")

    try:
        communicate = edge_tts.Communicate(
            text,
            voice.split(" - ")[0],
            rate=f"{rate:+d}%",
            pitch=f"{pitch:+d}Hz"
        )
        
        with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
            await communicate.save(tmp_file.name)
            return tmp_file.name, None
    except Exception as e:
        return None, f"Speech generation failed: {str(e)}"

# Agregar música de fondo
def add_background_music(speech_path, bg_music_path):
    speech = AudioSegment.from_file(speech_path)
    background = AudioSegment.from_file(bg_music_path) - 16  # 15% volume
    
    # Asegurar que la música de fondo dure al menos como el speech + 3s fadeout
    if len(background) < len(speech) + 3000:
        background = background * math.ceil((len(speech)+3000)/len(background))
    
    # Combinar audio con fadeout
    combined = speech.overlay(background[:len(speech)])
    fade_out = background[len(speech):len(speech)+3000].fade_out(3000)
    final_audio = combined + fade_out
    
    with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
        final_audio.export(tmp_file.name, format="mp3")
        return tmp_file.name

# Procesar múltiples videos
def process_videos(audio_path, video_files):
    audio_duration = AudioSegment.from_file(audio_path).duration_seconds
    
    # Crear video concatenado
    with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as concat_video:
        # Crear lista de videos para concatenar
        with tempfile.NamedTemporaryFile(mode="w", delete=False, suffix=".txt") as list_file:
            list_file.write("\n".join([f"file '{v.name}'" for v in video_files]))
            list_file.close()
            
            subprocess.run([
                "ffmpeg", "-y",
                "-f", "concat",
                "-safe", "0",
                "-i", list_file.name,
                "-c", "copy",
                concat_video.name
            ], check=True)
        
        # Crear video final con loop
        with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as final_video:
            subprocess.run([
                "ffmpeg", "-y",
                "-stream_loop", "-1",
                "-i", concat_video.name,
                "-i", audio_path,
                "-t", str(audio_duration + 3),
                "-c:v", "libx264",
                "-c:a", "aac",
                "-vf", "fade=t=out:st={}:d=3".format(audio_duration),
                "-af", "afade=t=out:st={}:d=3".format(audio_duration),
                "-shortest",
                final_video.name
            ], check=True)
            
            return final_video.name

# Función principal
async def tts_interface(text, voice, rate, pitch, bg_music, video_files):
    # Generar audio principal
    audio_path, warning = await text_to_speech(text, voice, rate, pitch)
    if warning:
        return None, None, gr.Warning(warning)
    
    try:
        # Agregar música de fondo
        if bg_music:
            audio_path = add_background_music(audio_path, bg_music)
        
        # Procesar videos
        if video_files:
            video_path = process_videos(audio_path, video_files)
        else:
            video_path = None
        
        return audio_path, video_path, None
    
    except Exception as e:
        return None, None, gr.Warning(f"Processing error: {str(e)}")
    finally:
        if 'audio_path' in locals() and os.path.exists(audio_path):
            os.remove(audio_path)

# Crear interfaz
async def create_demo():
    voices = await get_voices()

    demo = gr.Interface(
        fn=tts_interface,
        inputs=[
            gr.Textbox(label="Input Text", lines=5, placeholder="Enter your text here..."),
            gr.Dropdown(choices=[""] + list(voices.keys()), label="Select Voice"),
            gr.Slider(-50, 50, 0, label="Speech Rate (%)"),
            gr.Slider(-20, 20, 0, label="Pitch (Hz)"),
            gr.Audio(label="Background Music", type="filepath"),
            gr.File(label="Upload Videos", file_types=[".mp4", ".mov"], file_count="multiple")
        ],
        outputs=[
            gr.Audio(label="Generated Audio", type="filepath"),
            gr.Video(label="Final Video"),
            gr.Markdown(visible=False)
        ],
        title="Multi-Video TTS with Loop",
        description="Create videos with: 1. Text-to-speech 2. Background music 3. Multiple video loop",
        examples=[
            ["Hello world! This is a test with multiple videos.", 
             "en-US-JennyNeural - en-US (Female)", 
             0, 0, None, None]
        ],
        css="#component-0 {max-width: 800px}"
    )
    return demo

async def main():
    demo = await create_demo()
    demo.queue()
    demo.launch()

if __name__ == "__main__":
    asyncio.run(main())