import tempfile import logging import os import asyncio from moviepy.editor import * import edge_tts import gradio as gr from pydub import AudioSegment # Configuración de Logs logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s") # CONSTANTES DE ARCHIVOS INTRO_VIDEO = "introvideo.mp4" OUTRO_VIDEO = "outrovideo.mp4" MUSIC_BG = "musicafondo.mp3" EJEMPLO_VIDEO = "ejemplo.mp4" # Validar existencia de archivos for file in [INTRO_VIDEO, OUTRO_VIDEO, MUSIC_BG, EJEMPLO_VIDEO]: if not os.path.exists(file): logging.error(f"Falta archivo necesario: {file}") raise FileNotFoundError(f"Falta: {file}") # Configuración de chunks SEGMENT_DURATION = 30 # Duración exacta entre transiciones (sin overlap) TRANSITION_DURATION = 1.5 # Duración del efecto slide def eliminar_archivo_tiempo(ruta, delay=1800): def eliminar(): try: if os.path.exists(ruta): os.remove(ruta) logging.info(f"Archivo eliminado: {ruta}") except Exception as e: logging.error(f"Error al eliminar {ruta}: {e}") from threading import Timer Timer(delay, eliminar).start() async def generar_tts(texto, voz, duracion_total): try: logging.info("Generando TTS") communicate = edge_tts.Communicate(texto, voz) with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_tts: await communicate.save(tmp_tts.name) tts_audio = AudioFileClip(tmp_tts.name) # Asegurar que el TTS no exceda la duración del video if tts_audio.duration > duracion_total: tts_audio = tts_audio.subclip(0, duracion_total) return tts_audio, tmp_tts.name except Exception as e: logging.error(f"Fallo en TTS: {str(e)}") raise def crear_musica_fondo(duracion_total): bg_music = AudioSegment.from_mp3(MUSIC_BG) needed_ms = int(duracion_total * 1000) repeticiones = needed_ms // len(bg_music) + 1 bg_music = bg_music * repeticiones bg_music = bg_music[:needed_ms].fade_out(1000) with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_bg: bg_music.export(tmp_bg.name, format="mp3") return AudioFileClip(tmp_bg.name).volumex(0.15), tmp_bg.name def create_slide_transition(clip1, clip2, duration=TRANSITION_DURATION): """Transición slide con movimiento más pronunciado""" # Tomar la última parte del clip1 y la primera parte del clip2 part1 = clip1.subclip(clip1.duration - duration) part2 = clip2.subclip(0, duration) # Crear animación de deslizamiento transition = CompositeVideoClip([ part1.fx(vfx.fadeout, duration), part2.fx(vfx.fadein, duration).set_position( lambda t: ('center', 720 - (720 * (t/duration))) # Movimiento desde abajo ) ], size=(1280, 720)).set_duration(duration) return transition async def procesar_video(video_input, texto_tts, voz_seleccionada): temp_files = [] intro, outro, video_original = None, None, None try: logging.info("Iniciando procesamiento") video_original = VideoFileClip(video_input, target_resolution=(720, 1280)) duracion_video = video_original.duration # Generar TTS y música de fondo tts_audio, tts_path = await generar_tts(texto_tts, voz_seleccionada, duracion_video) bg_audio, bg_path = crear_musica_fondo(duracion_video) temp_files.extend([tts_path, bg_path]) # Combinar audios audio_original = video_original.audio.volumex(0.7) if video_original.audio else None audios = [bg_audio.set_duration(duracion_video)] if audio_original: audios.append(audio_original) audios.append(tts_audio.set_start(0).volumex(0.85)) audio_final = CompositeAudioClip(audios).set_duration(duracion_video) # CORRECCIÓN: Simplificar la creación de segmentos y transiciones # Dividir el video en segmentos exactos de 30 segundos y aplicar transiciones solo en esos puntos # Crear un único clip con todo el contenido, con transiciones solo cada 30 segundos video_final = video_original.copy() # Si es necesario, realizar cortes y transiciones cada 30 segundos if duracion_video > SEGMENT_DURATION: clips = [] num_segments = int(duracion_video // SEGMENT_DURATION) + (1 if duracion_video % SEGMENT_DURATION > 0 else 0) for i in range(num_segments): start_time = i * SEGMENT_DURATION end_time = min(start_time + SEGMENT_DURATION, duracion_video) # Obtener segmento actual segment = video_original.subclip(start_time, end_time) # Para el primer segmento, solo añadirlo al resultado if i == 0: clips.append(segment) else: # Para los demás segmentos, añadir transición prev_segment = clips[-1] # Crear transición entre segmentos transition = create_slide_transition(prev_segment, segment) # Recortar el segmento anterior para que termine justo antes de la transición prev_end = prev_segment.duration - TRANSITION_DURATION if prev_end > 0: clips[-1] = prev_segment.subclip(0, prev_end) # Añadir la transición clips.append(transition) # Añadir el segmento actual, empezando después de la transición clips.append(segment) # Combinar todos los clips video_final = concatenate_videoclips(clips, method="compose") # Establecer el audio final video_final = video_final.set_audio(audio_final) # Agregar intro y outro intro = VideoFileClip(INTRO_VIDEO, target_resolution=(720, 1280)) outro = VideoFileClip(OUTRO_VIDEO, target_resolution=(720, 1280)) video_final = concatenate_videoclips([intro, video_final, outro], method="compose") # Renderizado final with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmp: video_final.write_videofile( tmp.name, codec="libx264", audio_codec="aac", fps=24, threads=2, bitrate="3M", ffmpeg_params=[ "-preset", "ultrafast", "-crf", "28", "-movflags", "+faststart", "-vf", "scale=1280:720" ], verbose=False ) eliminar_archivo_tiempo(tmp.name, 1800) logging.info(f"Video final guardado: {tmp.name}") return tmp.name except Exception as e: logging.error(f"Fallo general: {str(e)}") raise finally: try: if video_original: video_original.close() if intro: intro.close() if outro: outro.close() for file in temp_files: try: os.remove(file) except Exception as e: logging.warning(f"Error limpiando {file}: {e}") except Exception as e: logging.warning(f"Error al cerrar recursos: {str(e)}") # Interfaz Gradio with gr.Blocks() as demo: gr.Markdown("# Editor de Video con IA") with gr.Tab("Principal"): video_input = gr.Video(label="Subir video") texto_tts = gr.Textbox( label="Texto para TTS", lines=3, placeholder="Escribe aquí tu texto..." ) voz_seleccionada = gr.Dropdown( label="Voz", choices=[ "es-ES-AlvaroNeural", "es-MX-BeatrizNeural", "es-ES-ElviraNeural", "es-MX-JavierNeural", "es-AR-ElenaNeural", "es-AR-TomasNeural", "es-CL-CatalinaNeural", "es-CL-LorenzoNeural", "es-CO-SofiaNeural", "es-CO-GonzaloNeural", "es-PE-CamilaNeural", "es-PE-AlexNeural", "es-VE-MariaNeural", "es-VE-ManuelNeural", "es-US-AlonsoNeural", "es-US-PalomaNeural", "es-ES-AbrilNeural", "es-ES-DarioNeural", "es-ES-HelenaRUS", "es-ES-LauraNeural", "es-ES-PabloNeural", "es-ES-TriniNeural", "en-US-AriaNeural", "en-US-GuyNeural", "en-US-JennyNeural", "en-US-AmberNeural", "en-US-AnaNeural", "en-US-AshleyNeural", "en-US-BrandonNeural", "en-US-ChristopherNeural", "en-US-CoraNeural", "en-US-DavisNeural", "en-US-ElizabethNeural", "en-US-EricNeural", "en-US-GinaNeural", "en-US-JacobNeural", "en-US-JaneNeural", "en-US-JasonNeural", "en-US-MichelleNeural", "en-US-MonicaNeural", "en-US-SaraNeural", "en-US-SteffanNeural", "en-US-TonyNeural", "en-US-YaraNeural", "fr-FR-AlainNeural", "fr-FR-BrigitteNeural", "fr-FR-CelesteNeural", "fr-FR-ClaudeNeural", "fr-FR-CoralieNeural", "fr-FR-DeniseNeural", "fr-FR-EloiseNeural", "fr-FR-HenriNeural", "fr-FR-JacquelineNeural", "fr-FR-JeromeNeural", "fr-FR-JosephineNeural", "fr-FR-MauriceNeural", "fr-FR-YvesNeural", "fr-FR-YvetteNeural", "de-DE-AmalaNeural", "de-DE-BerndNeural", "de-DE-ChristophNeural", "de-DE-ConradNeural", "de-DE-ElkeNeural", "de-DE-GiselaNeural", "de-DE-KasperNeural", "de-DE-KatjaNeural", "de-DE-KillianNeural", "de-DE-KlarissaNeural", "de-DE-KlausNeural", "de-DE-LouisaNeural", "de-DE-MajaNeural", "de-DE-RalfNeural", "de-DE-TanjaNeural", "de-DE-ViktoriaNeural", "it-IT-BenignoNeural", "it-IT-CalimeroNeural", "it-IT-CataldoNeural", "it-IT-DiegoNeural", "it-IT-ElsaNeural", "it-IT-FabiolaNeural", "it-IT-GianniNeural", "it-IT-ImeldaNeural", "it-IT-IrmaNeural", "it-IT-IsabellaNeural", "it-IT-LisandroNeural", "it-IT-PalmiraNeural", "it-IT-PierinaNeural", "it-IT-RinaldoNeural", "ja-JP-AoiNeural", "ja-JP-DaichiNeural", "ja-JP-HarukaNeural", "ja-JP-KeitaNeural", "ja-JP-MayuNeural", "ja-JP-NanamiNeural", "ja-JP-NaokiNeural", "ja-JP-ShioriNeural" ], value="es-ES-AlvaroNeural" ) procesar_btn = gr.Button("Generar Video") video_output = gr.Video(label="Video Procesado") with gr.Accordion("Ejemplos de Uso", open=False): gr.Examples( examples=[[EJEMPLO_VIDEO, "¡Hola! Esto es una prueba. Suscríbete al canal."]], inputs=[video_input, texto_tts], label="Ejemplos" ) procesar_btn.click( procesar_video, inputs=[video_input, texto_tts, voz_seleccionada], outputs=video_output ) # Información adicional en pie de página gr.Markdown(""" ### ℹ️ Notas importantes: - Las transiciones ocurren solamente cada 30 segundos - El video contiene intro y outro predefinidos - El archivo generado se elimina después de 30 minutos - Para mejores resultados, usa videos de dimensiones 720p o 1080p """) procesar_btn.click( procesar_video, inputs=[video_input, texto_tts, voz_seleccionada], outputs=video_output ) if __name__ == "__main__": demo.queue().launch()