Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -7,24 +7,20 @@ import edge_tts
|
|
7 |
import gradio as gr
|
8 |
from pydub import AudioSegment
|
9 |
|
10 |
-
# Configuraci贸n de Logs
|
11 |
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
|
12 |
|
13 |
-
# CONSTANTES DE ARCHIVOS
|
14 |
INTRO_VIDEO = "introvideo.mp4"
|
15 |
OUTRO_VIDEO = "outrovideo.mp4"
|
16 |
MUSIC_BG = "musicafondo.mp3"
|
17 |
EJEMPLO_VIDEO = "ejemplo.mp4"
|
18 |
|
19 |
-
# Validar existencia de archivos
|
20 |
for file in [INTRO_VIDEO, OUTRO_VIDEO, MUSIC_BG, EJEMPLO_VIDEO]:
|
21 |
if not os.path.exists(file):
|
22 |
logging.error(f"Falta archivo necesario: {file}")
|
23 |
raise FileNotFoundError(f"Falta: {file}")
|
24 |
|
25 |
-
|
26 |
-
|
27 |
-
TRANSITION_DURATION = 1.5 # Duraci贸n del efecto slide
|
28 |
|
29 |
def eliminar_archivo_tiempo(ruta, delay=1800):
|
30 |
def eliminar():
|
@@ -44,11 +40,8 @@ async def generar_tts(texto, voz, duracion_total):
|
|
44 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_tts:
|
45 |
await communicate.save(tmp_tts.name)
|
46 |
tts_audio = AudioFileClip(tmp_tts.name)
|
47 |
-
|
48 |
-
# Asegurar que el TTS no exceda la duraci贸n del video
|
49 |
if tts_audio.duration > duracion_total:
|
50 |
tts_audio = tts_audio.subclip(0, duracion_total)
|
51 |
-
|
52 |
return tts_audio, tmp_tts.name
|
53 |
except Exception as e:
|
54 |
logging.error(f"Fallo en TTS: {str(e)}")
|
@@ -60,25 +53,19 @@ def crear_musica_fondo(duracion_total):
|
|
60 |
repeticiones = needed_ms // len(bg_music) + 1
|
61 |
bg_music = bg_music * repeticiones
|
62 |
bg_music = bg_music[:needed_ms].fade_out(1000)
|
63 |
-
|
64 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_bg:
|
65 |
bg_music.export(tmp_bg.name, format="mp3")
|
66 |
return AudioFileClip(tmp_bg.name).volumex(0.15), tmp_bg.name
|
67 |
|
68 |
def create_slide_transition(clip1, clip2, duration=TRANSITION_DURATION):
|
69 |
-
"""Transici贸n slide con movimiento m谩s pronunciado"""
|
70 |
-
# Tomar la 煤ltima parte del clip1 y la primera parte del clip2
|
71 |
part1 = clip1.subclip(clip1.duration - duration)
|
72 |
part2 = clip2.subclip(0, duration)
|
73 |
-
|
74 |
-
# Crear animaci贸n de deslizamiento
|
75 |
transition = CompositeVideoClip([
|
76 |
part1.fx(vfx.fadeout, duration),
|
77 |
part2.fx(vfx.fadein, duration).set_position(
|
78 |
-
lambda t: ('center', 720 - (720 * (t/duration)))
|
79 |
)
|
80 |
], size=(1280, 720)).set_duration(duration)
|
81 |
-
|
82 |
return transition
|
83 |
|
84 |
async def procesar_video(video_input, texto_tts, voz_seleccionada):
|
@@ -88,70 +75,38 @@ async def procesar_video(video_input, texto_tts, voz_seleccionada):
|
|
88 |
logging.info("Iniciando procesamiento")
|
89 |
video_original = VideoFileClip(video_input, target_resolution=(720, 1280))
|
90 |
duracion_video = video_original.duration
|
91 |
-
|
92 |
-
# Generar TTS y m煤sica de fondo
|
93 |
tts_audio, tts_path = await generar_tts(texto_tts, voz_seleccionada, duracion_video)
|
94 |
bg_audio, bg_path = crear_musica_fondo(duracion_video)
|
95 |
temp_files.extend([tts_path, bg_path])
|
96 |
-
|
97 |
-
# Combinar audios
|
98 |
audio_original = video_original.audio.volumex(0.7) if video_original.audio else None
|
99 |
audios = [bg_audio.set_duration(duracion_video)]
|
100 |
if audio_original:
|
101 |
audios.append(audio_original)
|
102 |
audios.append(tts_audio.set_start(0).volumex(0.85))
|
103 |
audio_final = CompositeAudioClip(audios).set_duration(duracion_video)
|
104 |
-
|
105 |
-
# CORRECCI脫N: Simplificar la creaci贸n de segmentos y transiciones
|
106 |
-
# Dividir el video en segmentos exactos de 30 segundos y aplicar transiciones solo en esos puntos
|
107 |
-
|
108 |
-
# Crear un 煤nico clip con todo el contenido, con transiciones solo cada 30 segundos
|
109 |
video_final = video_original.copy()
|
110 |
-
|
111 |
-
# Si es necesario, realizar cortes y transiciones cada 30 segundos
|
112 |
if duracion_video > SEGMENT_DURATION:
|
113 |
clips = []
|
114 |
num_segments = int(duracion_video // SEGMENT_DURATION) + (1 if duracion_video % SEGMENT_DURATION > 0 else 0)
|
115 |
-
|
116 |
for i in range(num_segments):
|
117 |
start_time = i * SEGMENT_DURATION
|
118 |
end_time = min(start_time + SEGMENT_DURATION, duracion_video)
|
119 |
-
|
120 |
-
# Obtener segmento actual
|
121 |
segment = video_original.subclip(start_time, end_time)
|
122 |
-
|
123 |
-
# Para el primer segmento, solo a帽adirlo al resultado
|
124 |
if i == 0:
|
125 |
clips.append(segment)
|
126 |
else:
|
127 |
-
# Para los dem谩s segmentos, a帽adir transici贸n
|
128 |
prev_segment = clips[-1]
|
129 |
-
# Crear transici贸n entre segmentos
|
130 |
transition = create_slide_transition(prev_segment, segment)
|
131 |
-
|
132 |
-
# Recortar el segmento anterior para que termine justo antes de la transici贸n
|
133 |
prev_end = prev_segment.duration - TRANSITION_DURATION
|
134 |
if prev_end > 0:
|
135 |
clips[-1] = prev_segment.subclip(0, prev_end)
|
136 |
-
|
137 |
-
# A帽adir la transici贸n
|
138 |
clips.append(transition)
|
139 |
-
|
140 |
-
# A帽adir el segmento actual, empezando despu茅s de la transici贸n
|
141 |
clips.append(segment)
|
142 |
-
|
143 |
-
# Combinar todos los clips
|
144 |
video_final = concatenate_videoclips(clips, method="compose")
|
145 |
-
|
146 |
-
# Establecer el audio final
|
147 |
video_final = video_final.set_audio(audio_final)
|
148 |
-
|
149 |
-
# Agregar intro y outro
|
150 |
intro = VideoFileClip(INTRO_VIDEO, target_resolution=(720, 1280))
|
151 |
outro = VideoFileClip(OUTRO_VIDEO, target_resolution=(720, 1280))
|
152 |
video_final = concatenate_videoclips([intro, video_final, outro], method="compose")
|
153 |
-
|
154 |
-
# Renderizado final
|
155 |
with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmp:
|
156 |
video_final.write_videofile(
|
157 |
tmp.name,
|
@@ -171,7 +126,6 @@ async def procesar_video(video_input, texto_tts, voz_seleccionada):
|
|
171 |
eliminar_archivo_tiempo(tmp.name, 1800)
|
172 |
logging.info(f"Video final guardado: {tmp.name}")
|
173 |
return tmp.name
|
174 |
-
|
175 |
except Exception as e:
|
176 |
logging.error(f"Fallo general: {str(e)}")
|
177 |
raise
|
@@ -191,10 +145,8 @@ async def procesar_video(video_input, texto_tts, voz_seleccionada):
|
|
191 |
except Exception as e:
|
192 |
logging.warning(f"Error al cerrar recursos: {str(e)}")
|
193 |
|
194 |
-
# Interfaz Gradio
|
195 |
with gr.Blocks() as demo:
|
196 |
gr.Markdown("# Editor de Video con IA")
|
197 |
-
|
198 |
with gr.Tab("Principal"):
|
199 |
video_input = gr.Video(label="Subir video")
|
200 |
texto_tts = gr.Textbox(
|
@@ -203,75 +155,73 @@ with gr.Blocks() as demo:
|
|
203 |
placeholder="Escribe aqu铆 tu texto..."
|
204 |
)
|
205 |
voz_seleccionada = gr.Dropdown(
|
206 |
-
|
207 |
-
|
208 |
-
|
209 |
-
|
210 |
-
|
211 |
-
|
212 |
-
|
213 |
-
|
214 |
-
|
215 |
-
|
216 |
-
|
217 |
-
|
218 |
-
|
219 |
-
|
220 |
-
|
221 |
-
|
222 |
-
|
223 |
-
|
224 |
-
|
225 |
-
|
226 |
-
|
227 |
-
|
228 |
-
|
229 |
-
|
230 |
-
|
231 |
-
|
232 |
-
|
233 |
-
|
234 |
-
|
235 |
-
|
236 |
-
|
237 |
-
|
238 |
-
|
239 |
-
|
240 |
-
|
241 |
-
|
242 |
-
|
243 |
-
|
244 |
-
|
245 |
-
|
246 |
-
|
247 |
-
|
248 |
-
|
249 |
-
|
250 |
-
|
251 |
-
|
252 |
-
|
253 |
-
|
254 |
-
|
255 |
-
|
256 |
-
|
257 |
-
|
258 |
-
)
|
259 |
procesar_btn = gr.Button("Generar Video")
|
260 |
video_output = gr.Video(label="Video Procesado")
|
261 |
-
|
262 |
with gr.Accordion("Ejemplos de Uso", open=False):
|
263 |
gr.Examples(
|
264 |
examples=[[EJEMPLO_VIDEO, "隆Hola! Esto es una prueba. Suscr铆bete al canal."]],
|
265 |
inputs=[video_input, texto_tts],
|
266 |
label="Ejemplos"
|
267 |
)
|
268 |
-
|
269 |
procesar_btn.click(
|
270 |
procesar_video,
|
271 |
inputs=[video_input, texto_tts, voz_seleccionada],
|
272 |
outputs=video_output
|
273 |
)
|
274 |
-
|
275 |
gr.Markdown("""
|
276 |
### 鈩癸笍 Notas importantes:
|
277 |
- Las transiciones ocurren solamente cada 30 segundos
|
@@ -280,10 +230,5 @@ with gr.Blocks() as demo:
|
|
280 |
- Para mejores resultados, usa videos de dimensiones 720p o 1080p
|
281 |
""")
|
282 |
|
283 |
-
procesar_btn.click(
|
284 |
-
procesar_video,
|
285 |
-
inputs=[video_input, texto_tts, voz_seleccionada],
|
286 |
-
outputs=video_output
|
287 |
-
)
|
288 |
if __name__ == "__main__":
|
289 |
demo.queue().launch()
|
|
|
7 |
import gradio as gr
|
8 |
from pydub import AudioSegment
|
9 |
|
|
|
10 |
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
|
11 |
|
|
|
12 |
INTRO_VIDEO = "introvideo.mp4"
|
13 |
OUTRO_VIDEO = "outrovideo.mp4"
|
14 |
MUSIC_BG = "musicafondo.mp3"
|
15 |
EJEMPLO_VIDEO = "ejemplo.mp4"
|
16 |
|
|
|
17 |
for file in [INTRO_VIDEO, OUTRO_VIDEO, MUSIC_BG, EJEMPLO_VIDEO]:
|
18 |
if not os.path.exists(file):
|
19 |
logging.error(f"Falta archivo necesario: {file}")
|
20 |
raise FileNotFoundError(f"Falta: {file}")
|
21 |
|
22 |
+
SEGMENT_DURATION = 30
|
23 |
+
TRANSITION_DURATION = 1.5
|
|
|
24 |
|
25 |
def eliminar_archivo_tiempo(ruta, delay=1800):
|
26 |
def eliminar():
|
|
|
40 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_tts:
|
41 |
await communicate.save(tmp_tts.name)
|
42 |
tts_audio = AudioFileClip(tmp_tts.name)
|
|
|
|
|
43 |
if tts_audio.duration > duracion_total:
|
44 |
tts_audio = tts_audio.subclip(0, duracion_total)
|
|
|
45 |
return tts_audio, tmp_tts.name
|
46 |
except Exception as e:
|
47 |
logging.error(f"Fallo en TTS: {str(e)}")
|
|
|
53 |
repeticiones = needed_ms // len(bg_music) + 1
|
54 |
bg_music = bg_music * repeticiones
|
55 |
bg_music = bg_music[:needed_ms].fade_out(1000)
|
|
|
56 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_bg:
|
57 |
bg_music.export(tmp_bg.name, format="mp3")
|
58 |
return AudioFileClip(tmp_bg.name).volumex(0.15), tmp_bg.name
|
59 |
|
60 |
def create_slide_transition(clip1, clip2, duration=TRANSITION_DURATION):
|
|
|
|
|
61 |
part1 = clip1.subclip(clip1.duration - duration)
|
62 |
part2 = clip2.subclip(0, duration)
|
|
|
|
|
63 |
transition = CompositeVideoClip([
|
64 |
part1.fx(vfx.fadeout, duration),
|
65 |
part2.fx(vfx.fadein, duration).set_position(
|
66 |
+
lambda t: ('center', 720 - (720 * (t/duration)))
|
67 |
)
|
68 |
], size=(1280, 720)).set_duration(duration)
|
|
|
69 |
return transition
|
70 |
|
71 |
async def procesar_video(video_input, texto_tts, voz_seleccionada):
|
|
|
75 |
logging.info("Iniciando procesamiento")
|
76 |
video_original = VideoFileClip(video_input, target_resolution=(720, 1280))
|
77 |
duracion_video = video_original.duration
|
|
|
|
|
78 |
tts_audio, tts_path = await generar_tts(texto_tts, voz_seleccionada, duracion_video)
|
79 |
bg_audio, bg_path = crear_musica_fondo(duracion_video)
|
80 |
temp_files.extend([tts_path, bg_path])
|
|
|
|
|
81 |
audio_original = video_original.audio.volumex(0.7) if video_original.audio else None
|
82 |
audios = [bg_audio.set_duration(duracion_video)]
|
83 |
if audio_original:
|
84 |
audios.append(audio_original)
|
85 |
audios.append(tts_audio.set_start(0).volumex(0.85))
|
86 |
audio_final = CompositeAudioClip(audios).set_duration(duracion_video)
|
|
|
|
|
|
|
|
|
|
|
87 |
video_final = video_original.copy()
|
|
|
|
|
88 |
if duracion_video > SEGMENT_DURATION:
|
89 |
clips = []
|
90 |
num_segments = int(duracion_video // SEGMENT_DURATION) + (1 if duracion_video % SEGMENT_DURATION > 0 else 0)
|
|
|
91 |
for i in range(num_segments):
|
92 |
start_time = i * SEGMENT_DURATION
|
93 |
end_time = min(start_time + SEGMENT_DURATION, duracion_video)
|
|
|
|
|
94 |
segment = video_original.subclip(start_time, end_time)
|
|
|
|
|
95 |
if i == 0:
|
96 |
clips.append(segment)
|
97 |
else:
|
|
|
98 |
prev_segment = clips[-1]
|
|
|
99 |
transition = create_slide_transition(prev_segment, segment)
|
|
|
|
|
100 |
prev_end = prev_segment.duration - TRANSITION_DURATION
|
101 |
if prev_end > 0:
|
102 |
clips[-1] = prev_segment.subclip(0, prev_end)
|
|
|
|
|
103 |
clips.append(transition)
|
|
|
|
|
104 |
clips.append(segment)
|
|
|
|
|
105 |
video_final = concatenate_videoclips(clips, method="compose")
|
|
|
|
|
106 |
video_final = video_final.set_audio(audio_final)
|
|
|
|
|
107 |
intro = VideoFileClip(INTRO_VIDEO, target_resolution=(720, 1280))
|
108 |
outro = VideoFileClip(OUTRO_VIDEO, target_resolution=(720, 1280))
|
109 |
video_final = concatenate_videoclips([intro, video_final, outro], method="compose")
|
|
|
|
|
110 |
with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmp:
|
111 |
video_final.write_videofile(
|
112 |
tmp.name,
|
|
|
126 |
eliminar_archivo_tiempo(tmp.name, 1800)
|
127 |
logging.info(f"Video final guardado: {tmp.name}")
|
128 |
return tmp.name
|
|
|
129 |
except Exception as e:
|
130 |
logging.error(f"Fallo general: {str(e)}")
|
131 |
raise
|
|
|
145 |
except Exception as e:
|
146 |
logging.warning(f"Error al cerrar recursos: {str(e)}")
|
147 |
|
|
|
148 |
with gr.Blocks() as demo:
|
149 |
gr.Markdown("# Editor de Video con IA")
|
|
|
150 |
with gr.Tab("Principal"):
|
151 |
video_input = gr.Video(label="Subir video")
|
152 |
texto_tts = gr.Textbox(
|
|
|
155 |
placeholder="Escribe aqu铆 tu texto..."
|
156 |
)
|
157 |
voz_seleccionada = gr.Dropdown(
|
158 |
+
label="Voz",
|
159 |
+
choices=[
|
160 |
+
"es-ES-AlvaroNeural", "es-MX-BeatrizNeural",
|
161 |
+
"es-ES-ElviraNeural", "es-MX-JavierNeural",
|
162 |
+
"es-AR-ElenaNeural", "es-AR-TomasNeural",
|
163 |
+
"es-CL-CatalinaNeural", "es-CL-LorenzoNeural",
|
164 |
+
"es-CO-SofiaNeural", "es-CO-GonzaloNeural",
|
165 |
+
"es-PE-CamilaNeural", "es-PE-AlexNeural",
|
166 |
+
"es-VE-MariaNeural", "es-VE-ManuelNeural",
|
167 |
+
"es-US-AlonsoNeural", "es-US-PalomaNeural",
|
168 |
+
"es-ES-AbrilNeural", "es-ES-DarioNeural",
|
169 |
+
"es-ES-HelenaRUS", "es-ES-LauraNeural",
|
170 |
+
"es-ES-PabloNeural", "es-ES-TriniNeural",
|
171 |
+
"en-US-AriaNeural", "en-US-GuyNeural",
|
172 |
+
"en-US-JennyNeural", "en-US-AmberNeural",
|
173 |
+
"en-US-AnaNeural", "en-US-AshleyNeural",
|
174 |
+
"en-US-BrandonNeural", "en-US-ChristopherNeural",
|
175 |
+
"en-US-CoraNeural", "en-US-DavisNeural",
|
176 |
+
"en-US-ElizabethNeural", "en-US-EricNeural",
|
177 |
+
"en-US-GinaNeural", "en-US-JacobNeural",
|
178 |
+
"en-US-JaneNeural", "en-US-JasonNeural",
|
179 |
+
"en-US-MichelleNeural", "en-US-MonicaNeural",
|
180 |
+
"en-US-SaraNeural", "en-US-SteffanNeural",
|
181 |
+
"en-US-TonyNeural", "en-US-YaraNeural",
|
182 |
+
"fr-FR-AlainNeural", "fr-FR-BrigitteNeural",
|
183 |
+
"fr-FR-CelesteNeural", "fr-FR-ClaudeNeural",
|
184 |
+
"fr-FR-CoralieNeural", "fr-FR-DeniseNeural",
|
185 |
+
"fr-FR-EloiseNeural", "fr-FR-HenriNeural",
|
186 |
+
"fr-FR-JacquelineNeural", "fr-FR-JeromeNeural",
|
187 |
+
"fr-FR-JosephineNeural", "fr-FR-MauriceNeural",
|
188 |
+
"fr-FR-YvesNeural", "fr-FR-YvetteNeural",
|
189 |
+
"de-DE-AmalaNeural", "de-DE-BerndNeural",
|
190 |
+
"de-DE-ChristophNeural", "de-DE-ConradNeural",
|
191 |
+
"de-DE-ElkeNeural", "de-DE-GiselaNeural",
|
192 |
+
"de-DE-KasperNeural", "de-DE-KatjaNeural",
|
193 |
+
"de-DE-KillianNeural", "de-DE-KlarissaNeural",
|
194 |
+
"de-DE-KlausNeural", "de-DE-LouisaNeural",
|
195 |
+
"de-DE-MajaNeural", "de-DE-RalfNeural",
|
196 |
+
"de-DE-TanjaNeural", "de-DE-ViktoriaNeural",
|
197 |
+
"it-IT-BenignoNeural", "it-IT-CalimeroNeural",
|
198 |
+
"it-IT-CataldoNeural", "it-IT-DiegoNeural",
|
199 |
+
"it-IT-ElsaNeural", "it-IT-FabiolaNeural",
|
200 |
+
"it-IT-GianniNeural", "it-IT-ImeldaNeural",
|
201 |
+
"it-IT-IrmaNeural", "it-IT-IsabellaNeural",
|
202 |
+
"it-IT-LisandroNeural", "it-IT-PalmiraNeural",
|
203 |
+
"it-IT-PierinaNeural", "it-IT-RinaldoNeural",
|
204 |
+
"ja-JP-AoiNeural", "ja-JP-DaichiNeural",
|
205 |
+
"ja-JP-HarukaNeural", "ja-JP-KeitaNeural",
|
206 |
+
"ja-JP-MayuNeural", "ja-JP-NanamiNeural",
|
207 |
+
"ja-JP-NaokiNeural", "ja-JP-ShioriNeural"
|
208 |
+
],
|
209 |
+
value="es-ES-AlvaroNeural"
|
210 |
+
)
|
211 |
procesar_btn = gr.Button("Generar Video")
|
212 |
video_output = gr.Video(label="Video Procesado")
|
|
|
213 |
with gr.Accordion("Ejemplos de Uso", open=False):
|
214 |
gr.Examples(
|
215 |
examples=[[EJEMPLO_VIDEO, "隆Hola! Esto es una prueba. Suscr铆bete al canal."]],
|
216 |
inputs=[video_input, texto_tts],
|
217 |
label="Ejemplos"
|
218 |
)
|
|
|
219 |
procesar_btn.click(
|
220 |
procesar_video,
|
221 |
inputs=[video_input, texto_tts, voz_seleccionada],
|
222 |
outputs=video_output
|
223 |
)
|
224 |
+
|
225 |
gr.Markdown("""
|
226 |
### 鈩癸笍 Notas importantes:
|
227 |
- Las transiciones ocurren solamente cada 30 segundos
|
|
|
230 |
- Para mejores resultados, usa videos de dimensiones 720p o 1080p
|
231 |
""")
|
232 |
|
|
|
|
|
|
|
|
|
|
|
233 |
if __name__ == "__main__":
|
234 |
demo.queue().launch()
|