File size: 5,444 Bytes
800530a
 
63f1d6d
 
800530a
299c2df
2e5ba54
4aea5de
800530a
2e5ba54
800530a
 
 
 
2e5ba54
63f1d6d
011525c
 
55cc4ac
8676795
011525c
 
 
 
 
 
 
8676795
011525c
 
8676795
011525c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
299c2df
011525c
 
 
 
 
 
2e5ba54
 
011525c
2e5ba54
011525c
2e5ba54
011525c
 
 
 
 
 
2e5ba54
011525c
2e5ba54
011525c
2e5ba54
 
011525c
 
 
 
 
 
 
 
 
 
2e5ba54
011525c
 
2e5ba54
011525c
 
 
 
8529fe9
2e5ba54
011525c
2e5ba54
011525c
 
 
 
2e5ba54
 
011525c
 
 
 
 
 
2e5ba54
 
011525c
 
 
d81bde6
2e5ba54
63f1d6d
 
55cc4ac
63f1d6d
 
 
011525c
 
 
 
800530a
209bf9f
63f1d6d
 
800530a
011525c
 
63f1d6d
2e5ba54
011525c
 
 
 
 
 
 
d068ede
 
 
 
 
011525c
 
d068ede
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
import gradio as gr
import edge_tts
import asyncio
import tempfile
import os
import math
from pydub import AudioSegment
import subprocess

# Funci贸n para obtener voces disponibles
async def get_voices():
    voices = await edge_tts.list_voices()
    return {f"{v['ShortName']} - {v['Locale']} ({v['Gender']})": v['ShortName'] for v in voices}

# Conversi贸n de texto a voz
async def text_to_speech(text, voice, rate, pitch):
    if not text.strip() or not voice:
        return (None, "Please enter text and select a voice") if not text else (None, "Please select a voice")

    try:
        communicate = edge_tts.Communicate(
            text,
            voice.split(" - ")[0],
            rate=f"{rate:+d}%",
            pitch=f"{pitch:+d}Hz"
        )
        
        with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
            await communicate.save(tmp_file.name)
            return tmp_file.name, None
    except Exception as e:
        return None, f"Speech generation failed: {str(e)}"

# Agregar m煤sica de fondo
def add_background_music(speech_path, bg_music_path):
    speech = AudioSegment.from_file(speech_path)
    background = AudioSegment.from_file(bg_music_path) - 16  # 15% volume
    
    # Asegurar que la m煤sica de fondo dure al menos como el speech + 3s fadeout
    if len(background) < len(speech) + 3000:
        background = background * math.ceil((len(speech)+3000)/len(background))
    
    # Combinar audio con fadeout
    combined = speech.overlay(background[:len(speech)])
    fade_out = background[len(speech):len(speech)+3000].fade_out(3000)
    final_audio = combined + fade_out
    
    with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
        final_audio.export(tmp_file.name, format="mp3")
        return tmp_file.name

# Procesar m煤ltiples videos
def process_videos(audio_path, video_files):
    audio_duration = AudioSegment.from_file(audio_path).duration_seconds
    
    # Crear video concatenado
    with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as concat_video:
        # Crear lista de videos para concatenar
        with tempfile.NamedTemporaryFile(mode="w", delete=False, suffix=".txt") as list_file:
            list_file.write("\n".join([f"file '{v.name}'" for v in video_files]))
            list_file.close()
            
            subprocess.run([
                "ffmpeg", "-y",
                "-f", "concat",
                "-safe", "0",
                "-i", list_file.name,
                "-c", "copy",
                concat_video.name
            ], check=True)
        
        # Crear video final con loop
        with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as final_video:
            subprocess.run([
                "ffmpeg", "-y",
                "-stream_loop", "-1",
                "-i", concat_video.name,
                "-i", audio_path,
                "-t", str(audio_duration + 3),
                "-c:v", "libx264",
                "-c:a", "aac",
                "-vf", "fade=t=out:st={}:d=3".format(audio_duration),
                "-af", "afade=t=out:st={}:d=3".format(audio_duration),
                "-shortest",
                final_video.name
            ], check=True)
            
            return final_video.name

# Funci贸n principal
async def tts_interface(text, voice, rate, pitch, bg_music, video_files):
    # Generar audio principal
    audio_path, warning = await text_to_speech(text, voice, rate, pitch)
    if warning:
        return None, None, gr.Warning(warning)
    
    try:
        # Agregar m煤sica de fondo
        if bg_music:
            audio_path = add_background_music(audio_path, bg_music)
        
        # Procesar videos
        if video_files:
            video_path = process_videos(audio_path, video_files)
        else:
            video_path = None
        
        return audio_path, video_path, None
    
    except Exception as e:
        return None, None, gr.Warning(f"Processing error: {str(e)}")
    finally:
        if 'audio_path' in locals() and os.path.exists(audio_path):
            os.remove(audio_path)

# Crear interfaz
async def create_demo():
    voices = await get_voices()

    demo = gr.Interface(
        fn=tts_interface,
        inputs=[
            gr.Textbox(label="Input Text", lines=5, placeholder="Enter your text here..."),
            gr.Dropdown(choices=[""] + list(voices.keys()), label="Select Voice"),
            gr.Slider(-50, 50, 0, label="Speech Rate (%)"),
            gr.Slider(-20, 20, 0, label="Pitch (Hz)"),
            gr.Audio(label="Background Music", type="filepath"),
            gr.File(label="Upload Videos", file_types=[".mp4", ".mov"], file_count="multiple")
        ],
        outputs=[
            gr.Audio(label="Generated Audio", type="filepath"),
            gr.Video(label="Final Video"),
            gr.Markdown(visible=False)
        ],
        title="Multi-Video TTS with Loop",
        description="Create videos with: 1. Text-to-speech 2. Background music 3. Multiple video loop",
        examples=[
            ["Hello world! This is a test with multiple videos.", 
             "en-US-JennyNeural - en-US (Female)", 
             0, 0, None, None]
        ],
        css="#component-0 {max-width: 800px}"
    )
    return demo

async def main():
    demo = await create_demo()
    demo.queue()
    demo.launch()

if __name__ == "__main__":
    asyncio.run(main())