import os import math import tempfile from pydub import AudioSegment from moviepy.editor import ( VideoFileClip, AudioFileClip, ImageClip, concatenate_videoclips, CompositeVideoClip, concatenate_audioclips ) import edge_tts import gradio as gr import asyncio # CONSTANTES DE ARCHIVOS INTRO_VIDEO = "introvideo.mp4" OUTRO_VIDEO = "outrovideo.mp4" MUSIC_BG = "musicafondo.mp3" FX_SOUND = "fxsound.mp3" WATERMARK = "watermark.png" # Validar existencia de archivos obligatorios for file in [INTRO_VIDEO, OUTRO_VIDEO, MUSIC_BG, FX_SOUND, WATERMARK]: if not os.path.exists(file): raise FileNotFoundError(f"Falta archivo necesario: {file}") def cortar_video(video_path, metodo="inteligente", duracion=10): video = VideoFileClip(video_path) if metodo == "manual": return [video.subclip(i*duracion, (i+1)*duracion) for i in range(math.ceil(video.duration/duracion))] # Implementación básica de cortes por voz (requeriría VAD real) clips = [] ultimo_corte = 0 for i in range(1, math.ceil(video.duration)): if i % 5 == 0: # Simulación de detección de pausas clips.append(video.subclip(ultimo_corte, i)) ultimo_corte = i return clips def procesar_audio(texto, voz, clips_duracion): communicate = edge_tts.Communicate(texto, voz) with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp: asyncio.run(communicate.save(tmp.name)) tts_audio = AudioFileClip(tmp.name) # Ajustar TTS a duración de clips if tts_audio.duration < clips_duracion: tts_audio = tts_audio.loop(duration=clips_duracion) else: tts_audio = tts_audio.subclip(0, clips_duracion) # Mezclar con música de fondo bg_music = AudioSegment.from_mp3(MUSIC_BG) if len(bg_music) < clips_duracion*1000: bg_music = bg_music * math.ceil(clips_duracion*1000 / len(bg_music)) bg_music = bg_music[:clips_duracion*1000].fade_out(3000) with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp: bg_music.export(tmp.name, format="mp3") bg_audio = AudioFileClip(tmp.name).volumex(0.10) return CompositeAudioClip([bg_audio, tts_audio.volumex(0.9)]) def agregar_transiciones(clips): fx_audio = AudioFileClip(FX_SOUND).set_duration(2.5) transicion = ImageClip(WATERMARK).set_duration(2.5) transicion = transicion.resize(height=clips[0].h).set_position(("center", 0.1)) clips_con_fx = [] for i, clip in enumerate(clips): # Agregar watermark clip_watermarked = CompositeVideoClip([clip, transicion]) clips_con_fx.append(clip_watermarked) if i < len(clips)-1: clips_con_fx.append( CompositeVideoClip([transicion.set_position("center")]) .set_audio(fx_audio) ) return concatenate_videoclips(clips_con_fx) async def procesar_video( video_input, texto_tts, voz_seleccionada, metodo_corte, duracion_corte ): # Procesar video principal clips = cortar_video(video_input, metodo_corte, duracion_corte) video_editado = agregar_transiciones(clips) # Agregar intro/outro intro = VideoFileClip(INTRO_VIDEO) outro = VideoFileClip(OUTRO_VIDEO) video_final = concatenate_videoclips([intro, video_editado, outro]) # Procesar audio audio_final = procesar_audio( texto_tts, voz_seleccionada, video_editado.duration ) # Combinar y renderizar video_final = video_final.set_audio(audio_final) with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as tmp: video_final.write_videofile(tmp.name, codec="libx264", fps=24) return tmp.name # Interfaz Gradio with gr.Blocks() as demo: gr.Markdown("# Video Editor IA") with gr.Tab("Principal"): video_input = gr.Video(label="Subir video") texto_tts = gr.Textbox(label="Texto para TTS", lines=3) voz_seleccionada = gr.Dropdown( label="Seleccionar voz", choices=["es-ES-AlvaroNeural", "es-MX-BeatrizNeural"] ) procesar_btn = gr.Button("Generar Video") video_output = gr.Video(label="Resultado") with gr.Tab("Ajustes"): metodo_corte = gr.Radio( ["inteligente", "manual"], label="Método de cortes", value="inteligente" ) duracion_corte = gr.Slider( 1, 60, 10, label="Segundos por corte (solo manual)" ) procesar_btn.click( procesar_video, inputs=[ video_input, texto_tts, voz_seleccionada, metodo_corte, duracion_corte ], outputs=video_output ) if __name__ == "__main__": demo.queue().launch()