File size: 6,882 Bytes
800530a
 
63f1d6d
 
800530a
299c2df
2e5ba54
4aea5de
800530a
2e5ba54
800530a
 
 
 
2e5ba54
63f1d6d
011525c
 
55cc4ac
8676795
011525c
 
 
 
 
 
 
8676795
011525c
 
8676795
011525c
 
0945a8f
011525c
 
 
 
 
 
 
 
 
 
 
 
 
0945a8f
 
 
011525c
299c2df
0945a8f
011525c
0945a8f
 
 
011525c
0945a8f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
011525c
2e5ba54
 
011525c
 
 
 
 
 
 
 
 
 
2e5ba54
011525c
 
0945a8f
 
 
 
 
 
2e5ba54
0945a8f
011525c
0945a8f
2e5ba54
0945a8f
 
 
 
 
011525c
 
0945a8f
011525c
2e5ba54
0945a8f
2e5ba54
0945a8f
 
 
 
 
011525c
0945a8f
011525c
2e5ba54
 
011525c
0945a8f
 
 
 
 
 
 
d81bde6
0945a8f
63f1d6d
 
55cc4ac
63f1d6d
 
 
011525c
 
 
 
800530a
209bf9f
63f1d6d
 
800530a
011525c
 
63f1d6d
a241f1d
 
 
 
 
 
 
 
 
 
 
 
d068ede
 
 
 
 
011525c
 
d068ede
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
import gradio as gr
import edge_tts
import asyncio
import tempfile
import os
import math
from pydub import AudioSegment
import subprocess

# Función para obtener voces disponibles
async def get_voices():
    voices = await edge_tts.list_voices()
    return {f"{v['ShortName']} - {v['Locale']} ({v['Gender']})": v['ShortName'] for v in voices}

# Conversión de texto a voz
async def text_to_speech(text, voice, rate, pitch):
    if not text.strip() or not voice:
        return (None, "Please enter text and select a voice") if not text else (None, "Please select a voice")

    try:
        communicate = edge_tts.Communicate(
            text,
            voice.split(" - ")[0],
            rate=f"{rate:+d}%",
            pitch=f"{pitch:+d}Hz"
        )
        
        with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
            await communicate.save(tmp_file.name)
            return tmp_file.name, None
    except Exception as e:
        return None, f"Speech generation failed: {str(e)}"

# Agregar música de fondo (ahora elimina el audio original)
def add_background_music(speech_path, bg_music_path):
    speech = AudioSegment.from_file(speech_path)
    background = AudioSegment.from_file(bg_music_path) - 16  # 15% volume
    
    if len(background) < len(speech) + 3000:
        background = background * math.ceil((len(speech)+3000)/len(background))
    
    combined = speech.overlay(background[:len(speech)])
    fade_out = background[len(speech):len(speech)+3000].fade_out(3000)
    final_audio = combined + fade_out
    
    with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
        final_audio.export(tmp_file.name, format="mp3")
        # Eliminar audio original
        if os.path.exists(speech_path):
            os.remove(speech_path)
        return tmp_file.name

# Procesar múltiples videos (ahora elimina archivos temporales)
def process_videos(audio_path, video_files):
    temp_files = []
    try:
        audio_duration = AudioSegment.from_file(audio_path).duration_seconds
        
        # Concatenar videos
        with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as concat_video:
            temp_files.append(concat_video.name)
            
            with tempfile.NamedTemporaryFile(mode="w", delete=False, suffix=".txt") as list_file:
                temp_files.append(list_file.name)
                list_file.write("\n".join([f"file '{v.name}'" for v in video_files]))
                list_file.close()
                
                subprocess.run([
                    "ffmpeg", "-y",
                    "-f", "concat",
                    "-safe", "0",
                    "-i", list_file.name,
                    "-c", "copy",
                    concat_video.name
                ], check=True)
            
        # Crear video final
        with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as final_video:
            subprocess.run([
                "ffmpeg", "-y",
                "-stream_loop", "-1",
                "-i", concat_video.name,
                "-i", audio_path,
                "-t", str(audio_duration + 3),
                "-c:v", "libx264",
                "-c:a", "aac",
                "-vf", "fade=t=out:st={}:d=3".format(audio_duration),
                "-af", "afade=t=out:st={}:d=3".format(audio_duration),
                "-shortest",
                final_video.name
            ], check=True)
            
            return final_video.name
            
    finally:
        # Eliminar archivos temporales
        for f in temp_files:
            if os.path.exists(f):
                os.remove(f)

# Función principal (ahora elimina videos originales)
async def tts_interface(text, voice, rate, pitch, bg_music, video_files):
    temp_audio = None
    try:
        # Generar audio principal
        temp_audio, warning = await text_to_speech(text, voice, rate, pitch)
        if warning:
            return None, None, gr.Warning(warning)
        
        # Agregar música de fondo
        if bg_music:
            temp_audio = add_background_music(temp_audio, bg_music)
        
        # Procesar videos
        video_path = None
        if video_files:
            video_path = process_videos(temp_audio, video_files)
            # Eliminar videos originales subidos
            for video in video_files:
                if hasattr(video, 'name') and os.path.exists(video.name):
                    os.remove(video.name)
        
        return temp_audio, video_path, None
    
    except Exception as e:
        return None, None, gr.Warning(f"Processing error: {str(e)}")
    finally:
        # Eliminar audio temporal si existe y no es la salida final
        if temp_audio and os.path.exists(temp_audio):
            try:
                if video_path and temp_audio != video_path:
                    os.remove(temp_audio)
            except:  # Evitar errores si el archivo ya fue eliminado
                pass

# Crear interfaz (sin cambios)
async def create_demo():
    voices = await get_voices()

    demo = gr.Interface(
        fn=tts_interface,
        inputs=[
            gr.Textbox(label="Input Text", lines=5, placeholder="Enter your text here..."),
            gr.Dropdown(choices=[""] + list(voices.keys()), label="Select Voice"),
            gr.Slider(-50, 50, 0, label="Speech Rate (%)"),
            gr.Slider(-20, 20, 0, label="Pitch (Hz)"),
            gr.Audio(label="Background Music", type="filepath"),
            gr.File(label="Upload Videos", file_types=[".mp4", ".mov"], file_count="multiple")
        ],
        outputs=[
            gr.Audio(label="Generated Audio", type="filepath"),
            gr.Video(label="Final Video"),
            gr.Markdown(visible=False)
        ],
        title="Multi-Video TTS con Bucle",
description="""
Este script permite crear videos personalizados combinando texto, audio y múltiples clips de video. 
Convierte texto en voz usando tecnología avanzada de síntesis de voz (Text-to-Speech), 
opcionalmente añade música de fondo para enriquecer el audio generado y procesa varios videos subidos por el usuario 
para reproducirlos en secuencia y en bucle infinito. 
El resultado final es un video que sincroniza el audio con la concatenación de los clips, 
asegurando una transición suave entre ellos y un fade-out al final de cada ciclo. Además, el script está diseñado para 
limpiar automáticamente los archivos temporales y los videos originales subidos, evitando acumulación innecesaria en el servidor. 
Es ideal para generar contenido dinámico como videos motivacionales, presentaciones automáticas o material promocional.
""",
css="#component-0 {max-width: 800px}"
    )
    return demo

async def main():
    demo = await create_demo()
    demo.queue()
    demo.launch()

if __name__ == "__main__":
    asyncio.run(main())