|
import tempfile |
|
import logging |
|
import os |
|
import asyncio |
|
from moviepy.editor import * |
|
import edge_tts |
|
import gradio as gr |
|
from pydub import AudioSegment |
|
|
|
|
|
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s") |
|
|
|
|
|
INTRO_VIDEO = "introvideo.mp4" |
|
OUTRO_VIDEO = "outrovideo.mp4" |
|
MUSIC_BG = "musicafondo.mp3" |
|
EJEMPLO_VIDEO = "ejemplo.mp4" |
|
|
|
|
|
for file in [INTRO_VIDEO, OUTRO_VIDEO, MUSIC_BG, EJEMPLO_VIDEO]: |
|
if not os.path.exists(file): |
|
logging.error(f"Falta archivo necesario: {file}") |
|
raise FileNotFoundError(f"Falta: {file}") |
|
|
|
|
|
SEGMENT_DURATION = 30 |
|
TRANSITION_DURATION = 1.5 |
|
|
|
def eliminar_archivo_tiempo(ruta, delay=1800): |
|
def eliminar(): |
|
try: |
|
if os.path.exists(ruta): |
|
os.remove(ruta) |
|
logging.info(f"Archivo eliminado: {ruta}") |
|
except Exception as e: |
|
logging.error(f"Error al eliminar {ruta}: {e}") |
|
from threading import Timer |
|
Timer(delay, eliminar).start() |
|
|
|
async def generar_tts(texto, voz, duracion_total): |
|
try: |
|
logging.info("Generando TTS") |
|
communicate = edge_tts.Communicate(texto, voz) |
|
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_tts: |
|
await communicate.save(tmp_tts.name) |
|
tts_audio = AudioFileClip(tmp_tts.name) |
|
|
|
|
|
if tts_audio.duration > duracion_total: |
|
tts_audio = tts_audio.subclip(0, duracion_total) |
|
|
|
return tts_audio, tmp_tts.name |
|
except Exception as e: |
|
logging.error(f"Fallo en TTS: {str(e)}") |
|
raise |
|
|
|
def crear_musica_fondo(duracion_total): |
|
bg_music = AudioSegment.from_mp3(MUSIC_BG) |
|
needed_ms = int(duracion_total * 1000) |
|
repeticiones = needed_ms // len(bg_music) + 1 |
|
bg_music = bg_music * repeticiones |
|
bg_music = bg_music[:needed_ms].fade_out(1000) |
|
|
|
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_bg: |
|
bg_music.export(tmp_bg.name, format="mp3") |
|
return AudioFileClip(tmp_bg.name).volumex(0.15), tmp_bg.name |
|
|
|
def create_slide_transition(clip1, clip2, duration=TRANSITION_DURATION): |
|
"""Transici贸n slide con movimiento m谩s pronunciado""" |
|
|
|
part1 = clip1.subclip(clip1.duration - duration) |
|
part2 = clip2.subclip(0, duration) |
|
|
|
|
|
transition = CompositeVideoClip([ |
|
part1.fx(vfx.fadeout, duration), |
|
part2.fx(vfx.fadein, duration).set_position( |
|
lambda t: ('center', 720 - (720 * (t/duration))) |
|
) |
|
], size=(1280, 720)).set_duration(duration) |
|
|
|
return transition |
|
|
|
async def procesar_video(video_input, texto_tts, voz_seleccionada): |
|
temp_files = [] |
|
intro, outro, video_original = None, None, None |
|
try: |
|
logging.info("Iniciando procesamiento") |
|
video_original = VideoFileClip(video_input, target_resolution=(720, 1280)) |
|
duracion_video = video_original.duration |
|
|
|
|
|
tts_audio, tts_path = await generar_tts(texto_tts, voz_seleccionada, duracion_video) |
|
bg_audio, bg_path = crear_musica_fondo(duracion_video) |
|
temp_files.extend([tts_path, bg_path]) |
|
|
|
|
|
audio_original = video_original.audio.volumex(0.7) if video_original.audio else None |
|
audios = [bg_audio.set_duration(duracion_video)] |
|
if audio_original: |
|
audios.append(audio_original) |
|
audios.append(tts_audio.set_start(0).volumex(0.85)) |
|
audio_final = CompositeAudioClip(audios).set_duration(duracion_video) |
|
|
|
|
|
|
|
|
|
|
|
video_final = video_original.copy() |
|
|
|
|
|
if duracion_video > SEGMENT_DURATION: |
|
clips = [] |
|
num_segments = int(duracion_video // SEGMENT_DURATION) + (1 if duracion_video % SEGMENT_DURATION > 0 else 0) |
|
|
|
for i in range(num_segments): |
|
start_time = i * SEGMENT_DURATION |
|
end_time = min(start_time + SEGMENT_DURATION, duracion_video) |
|
|
|
|
|
segment = video_original.subclip(start_time, end_time) |
|
|
|
|
|
if i == 0: |
|
clips.append(segment) |
|
else: |
|
|
|
prev_segment = clips[-1] |
|
|
|
transition = create_slide_transition(prev_segment, segment) |
|
|
|
|
|
prev_end = prev_segment.duration - TRANSITION_DURATION |
|
if prev_end > 0: |
|
clips[-1] = prev_segment.subclip(0, prev_end) |
|
|
|
|
|
clips.append(transition) |
|
|
|
|
|
clips.append(segment) |
|
|
|
|
|
video_final = concatenate_videoclips(clips, method="compose") |
|
|
|
|
|
video_final = video_final.set_audio(audio_final) |
|
|
|
|
|
intro = VideoFileClip(INTRO_VIDEO, target_resolution=(720, 1280)) |
|
outro = VideoFileClip(OUTRO_VIDEO, target_resolution=(720, 1280)) |
|
video_final = concatenate_videoclips([intro, video_final, outro], method="compose") |
|
|
|
|
|
with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmp: |
|
video_final.write_videofile( |
|
tmp.name, |
|
codec="libx264", |
|
audio_codec="aac", |
|
fps=24, |
|
threads=2, |
|
bitrate="3M", |
|
ffmpeg_params=[ |
|
"-preset", "ultrafast", |
|
"-crf", "28", |
|
"-movflags", "+faststart", |
|
"-vf", "scale=1280:720" |
|
], |
|
verbose=False |
|
) |
|
eliminar_archivo_tiempo(tmp.name, 1800) |
|
logging.info(f"Video final guardado: {tmp.name}") |
|
return tmp.name |
|
|
|
except Exception as e: |
|
logging.error(f"Fallo general: {str(e)}") |
|
raise |
|
finally: |
|
try: |
|
if video_original: |
|
video_original.close() |
|
if intro: |
|
intro.close() |
|
if outro: |
|
outro.close() |
|
for file in temp_files: |
|
try: |
|
os.remove(file) |
|
except Exception as e: |
|
logging.warning(f"Error limpiando {file}: {e}") |
|
except Exception as e: |
|
logging.warning(f"Error al cerrar recursos: {str(e)}") |
|
|
|
|
|
with gr.Blocks() as demo: |
|
gr.Markdown("# Editor de Video con IA") |
|
|
|
with gr.Tab("Principal"): |
|
video_input = gr.Video(label="Subir video") |
|
texto_tts = gr.Textbox( |
|
label="Texto para TTS", |
|
lines=3, |
|
placeholder="Escribe aqu铆 tu texto..." |
|
) |
|
voz_seleccionada = gr.Dropdown( |
|
label="Voz", |
|
choices=[ |
|
"es-ES-AlvaroNeural", "es-MX-BeatrizNeural", |
|
"es-ES-ElviraNeural", "es-MX-JavierNeural", |
|
"es-AR-ElenaNeural", "es-AR-TomasNeural", |
|
"es-CL-CatalinaNeural", "es-CL-LorenzoNeural", |
|
"es-CO-SofiaNeural", "es-CO-GonzaloNeural", |
|
"es-PE-CamilaNeural", "es-PE-AlexNeural", |
|
"es-VE-MariaNeural", "es-VE-ManuelNeural", |
|
"es-US-AlonsoNeural", "es-US-PalomaNeural", |
|
"es-ES-AbrilNeural", "es-ES-DarioNeural", |
|
"es-ES-HelenaRUS", "es-ES-LauraNeural", |
|
"es-ES-PabloNeural", "es-ES-TriniNeural", |
|
"en-US-AriaNeural", "en-US-GuyNeural", |
|
"en-US-JennyNeural", "en-US-AmberNeural", |
|
"en-US-AnaNeural", "en-US-AshleyNeural", |
|
"en-US-BrandonNeural", "en-US-ChristopherNeural", |
|
"en-US-CoraNeural", "en-US-DavisNeural", |
|
"en-US-ElizabethNeural", "en-US-EricNeural", |
|
"en-US-GinaNeural", "en-US-JacobNeural", |
|
"en-US-JaneNeural", "en-US-JasonNeural", |
|
"en-US-MichelleNeural", "en-US-MonicaNeural", |
|
"en-US-SaraNeural", "en-US-SteffanNeural", |
|
"en-US-TonyNeural", "en-US-YaraNeural", |
|
"fr-FR-AlainNeural", "fr-FR-BrigitteNeural", |
|
"fr-FR-CelesteNeural", "fr-FR-ClaudeNeural", |
|
"fr-FR-CoralieNeural", "fr-FR-DeniseNeural", |
|
"fr-FR-EloiseNeural", "fr-FR-HenriNeural", |
|
"fr-FR-JacquelineNeural", "fr-FR-JeromeNeural", |
|
"fr-FR-JosephineNeural", "fr-FR-MauriceNeural", |
|
"fr-FR-YvesNeural", "fr-FR-YvetteNeural", |
|
"de-DE-AmalaNeural", "de-DE-BerndNeural", |
|
"de-DE-ChristophNeural", "de-DE-ConradNeural", |
|
"de-DE-ElkeNeural", "de-DE-GiselaNeural", |
|
"de-DE-KasperNeural", "de-DE-KatjaNeural", |
|
"de-DE-KillianNeural", "de-DE-KlarissaNeural", |
|
"de-DE-KlausNeural", "de-DE-LouisaNeural", |
|
"de-DE-MajaNeural", "de-DE-RalfNeural", |
|
"de-DE-TanjaNeural", "de-DE-ViktoriaNeural", |
|
"it-IT-BenignoNeural", "it-IT-CalimeroNeural", |
|
"it-IT-CataldoNeural", "it-IT-DiegoNeural", |
|
"it-IT-ElsaNeural", "it-IT-FabiolaNeural", |
|
"it-IT-GianniNeural", "it-IT-ImeldaNeural", |
|
"it-IT-IrmaNeural", "it-IT-IsabellaNeural", |
|
"it-IT-LisandroNeural", "it-IT-PalmiraNeural", |
|
"it-IT-PierinaNeural", "it-IT-RinaldoNeural", |
|
"ja-JP-AoiNeural", "ja-JP-DaichiNeural", |
|
"ja-JP-HarukaNeural", "ja-JP-KeitaNeural", |
|
"ja-JP-MayuNeural", "ja-JP-NanamiNeural", |
|
"ja-JP-NaokiNeural", "ja-JP-ShioriNeural" |
|
], |
|
value="es-ES-AlvaroNeural" |
|
) |
|
procesar_btn = gr.Button("Generar Video") |
|
video_output = gr.Video(label="Video Procesado") |
|
|
|
with gr.Accordion("Ejemplos de Uso", open=False): |
|
gr.Examples( |
|
examples=[[EJEMPLO_VIDEO, "隆Hola! Esto es una prueba. Suscr铆bete al canal."]], |
|
inputs=[video_input, texto_tts], |
|
label="Ejemplos" |
|
) |
|
|
|
procesar_btn.click( |
|
procesar_video, |
|
inputs=[video_input, texto_tts, voz_seleccionada], |
|
outputs=video_output |
|
) |
|
|
|
gr.Markdown(""" |
|
### 鈩癸笍 Notas importantes: |
|
- Las transiciones ocurren solamente cada 30 segundos |
|
- El video contiene intro y outro predefinidos |
|
- El archivo generado se elimina despu茅s de 30 minutos |
|
- Para mejores resultados, usa videos de dimensiones 720p o 1080p |
|
""") |
|
|
|
procesar_btn.click( |
|
procesar_video, |
|
inputs=[video_input, texto_tts, voz_seleccionada], |
|
outputs=video_output |
|
) |
|
if __name__ == "__main__": |
|
demo.queue().launch() |