fastvideogen

Running

App Files Files Community

fantaxy commited on Dec 20, 2024

Commit

3fbccb1

verified ·

1 Parent(s): 6545e86

Update app.py

Browse files

Files changed (1) hide show

app.py +794 -288

app.py CHANGED Viewed

@@ -1,111 +1,386 @@
 import spaces
-from functools import lru_cache
 import gradio as gr
 from gradio_toggle import Toggle
 import torch
-from huggingface_hub import snapshot_download
-from transformers import CLIPProcessor, CLIPModel, pipeline
-import random
 from xora.models.autoencoders.causal_video_autoencoder import CausalVideoAutoencoder
 from xora.models.transformers.transformer3d import Transformer3DModel
 from xora.models.transformers.symmetric_patchifier import SymmetricPatchifier
 from xora.schedulers.rf import RectifiedFlowScheduler
 from xora.pipelines.pipeline_xora_video import XoraVideoPipeline
-from transformers import T5EncoderModel, T5Tokenizer
 from xora.utils.conditioning_method import ConditioningMethod
-from pathlib import Path
-import safetensors.torch
-import json
-import numpy as np
-import cv2
-from PIL import Image
-import tempfile
-import os
-import gc
-import csv
-from datetime import datetime
-from openai import OpenAI
-# 한글-영어 번역기 초기화
-translator = pipeline("translation", model="Helsinki-NLP/opus-mt-ko-en")
-torch.backends.cuda.matmul.allow_tf32 = False
-torch.backends.cuda.matmul.allow_bf16_reduced_precision_reduction = False
-torch.backends.cuda.matmul.allow_fp16_reduced_precision_reduction = False
-torch.backends.cudnn.allow_tf32 = False
-torch.backends.cudnn.deterministic = False
-torch.backends.cuda.preferred_blas_library="cublas"
-torch.set_float32_matmul_precision("highest")
 MAX_SEED = np.iinfo(np.int32).max
-# Load Hugging Face token if needed
-hf_token = os.getenv("HF_TOKEN")
-openai_api_key = os.getenv("OPENAI_API_KEY")
-client = OpenAI(api_key=openai_api_key)
-system_prompt_t2v_path = "assets/system_prompt_t2v.txt"
-with open(system_prompt_t2v_path, "r") as f:
-    system_prompt_t2v = f.read()
-# Set model download directory within Hugging Face Spaces
-model_path = "asset"
-commit_hash='c7c8ad4c2ddba847b94e8bfaefbd30bd8669fafc'
-if not os.path.exists(model_path):
-    snapshot_download("Lightricks/LTX-Video", revision=commit_hash, local_dir=model_path, repo_type="model", token=hf_token)
-# Global variables to load components
-vae_dir = Path(model_path) / "vae"
-unet_dir = Path(model_path) / "unet"
-scheduler_dir = Path(model_path) / "scheduler"
-device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32", cache_dir=model_path).to(torch.device("cuda:0"))
-clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32", cache_dir=model_path)
 def process_prompt(prompt):
-    # 한글이 포함되어 있는지 확인
     if any(ord('가') <= ord(char) <= ord('힣') for char in prompt):
-        # 한글을 영어로 번역
         translated = translator(prompt)[0]['translation_text']
         return translated
     return prompt
-def compute_clip_embedding(text=None):
-    inputs = clip_processor(text=text, return_tensors="pt", padding=True).to(device)
-    outputs = clip_model.get_text_features(**inputs)
-    embedding = outputs.detach().cpu().numpy().flatten().tolist()
-    return embedding
-def load_vae(vae_dir):
-    vae_ckpt_path = vae_dir / "vae_diffusion_pytorch_model.safetensors"
-    vae_config_path = vae_dir / "config.json"
-    with open(vae_config_path, "r") as f:
-        vae_config = json.load(f)
-    vae = CausalVideoAutoencoder.from_config(vae_config)
-    vae_state_dict = safetensors.torch.load_file(vae_ckpt_path)
-    vae.load_state_dict(vae_state_dict)
-    return vae.to(device).to(torch.bfloat16)
-def load_unet(unet_dir):
-    unet_ckpt_path = unet_dir / "unet_diffusion_pytorch_model.safetensors"
-    unet_config_path = unet_dir / "config.json"
-    transformer_config = Transformer3DModel.load_config(unet_config_path)
-    transformer = Transformer3DModel.from_config(transformer_config)
-    unet_state_dict = safetensors.torch.load_file(unet_ckpt_path)
-    transformer.load_state_dict(unet_state_dict, strict=True)
-    return transformer.to(device).to(torch.bfloat16)
-def load_scheduler(scheduler_dir):
-    scheduler_config_path = scheduler_dir / "scheduler_config.json"
-    scheduler_config = RectifiedFlowScheduler.load_config(scheduler_config_path)
-    return RectifiedFlowScheduler.from_config(scheduler_config)
-# Preset options for resolution and frame configuration
-preset_options = [
     {"label": "1216x704, 41 frames", "width": 1216, "height": 704, "num_frames": 41},
     {"label": "1088x704, 49 frames", "width": 1088, "height": 704, "num_frames": 49},
     {"label": "1056x640, 57 frames", "width": 1056, "height": 640, "num_frames": 57},
@@ -123,106 +398,99 @@ preset_options = [
     {"label": "512x512, 200 frames", "width": 512, "height": 512, "num_frames": 200},
 ]
-def preset_changed(preset):
-    if preset != "Custom":
-        selected = next(item for item in preset_options if item["label"] == preset)
-        return (
-            selected["height"],
-            selected["width"],
-            selected["num_frames"],
-            gr.update(visible=False),
-            gr.update(visible=False),
-            gr.update(visible=False),
-        )
-    else:
-        return (
-            None,
-            None,
-            None,
-            gr.update(visible=True),
-            gr.update(visible=True),
-            gr.update(visible=True),
-        )
-# Load models
-vae = load_vae(vae_dir)
-unet = load_unet(unet_dir)
-scheduler = load_scheduler(scheduler_dir)
-patchifier = SymmetricPatchifier(patch_size=1)
-text_encoder = T5EncoderModel.from_pretrained("PixArt-alpha/PixArt-XL-2-1024-MS", subfolder="text_encoder").to(torch.device("cuda:0"))
-tokenizer = T5Tokenizer.from_pretrained("PixArt-alpha/PixArt-XL-2-1024-MS", subfolder="tokenizer")
-pipeline = XoraVideoPipeline(
-    transformer=unet,
-    patchifier=patchifier,
-    text_encoder=text_encoder,
-    tokenizer=tokenizer,
-    scheduler=scheduler,
-    vae=vae,
-).to(torch.device("cuda:0"))
-def enhance_prompt_if_enabled(prompt, enhance_toggle):
-    if not enhance_toggle:
-        print("Enhance toggle is off, Prompt: ", prompt)
-        return prompt
-    messages = [
-        {"role": "system", "content": system_prompt_t2v},
-        {"role": "user", "content": prompt},
-    ]
     try:
-        response = client.chat.completions.create(
-            model="gpt-4-mini",
-            messages=messages,
-            max_tokens=200,
-        )
-        print("Enhanced Prompt: ", response.choices[0].message.content.strip())
-        return response.choices[0].message.content.strip()
     except Exception as e:
-        print(f"Error: {e}")
-        return prompt
 @spaces.GPU(duration=90)
-def generate_video_from_text_90(
-    prompt="",
-    enhance_prompt_toggle=False,
-    negative_prompt="",
-    frame_rate=25,
-    seed=random.randint(0, MAX_SEED),
-    num_inference_steps=30,
-    guidance_scale=3.2,
-    height=768,
-    width=768,
-    num_frames=60,
-    progress=gr.Progress(),
 ):
-    # 프롬프트 전처리 (한글 -> 영어)
-    prompt = process_prompt(prompt)
-    negative_prompt = process_prompt(negative_prompt)
-    if len(prompt.strip()) < 50:
-        raise gr.Error(
-            "Prompt must be at least 50 characters long. Please provide more details for the best results.",
-            duration=5,
-        )
-    prompt = enhance_prompt_if_enabled(prompt, enhance_prompt_toggle)
-    sample = {
-        "prompt": prompt,
-        "prompt_attention_mask": None,
-        "negative_prompt": negative_prompt,
-        "negative_prompt_attention_mask": None,
-        "media_items": None,
-    }
-    generator = torch.Generator(device="cuda").manual_seed(seed)
-    def gradio_progress_callback(self, step, timestep, kwargs):
-        progress((step + 1) / num_inference_steps)
-    try:
         with torch.no_grad():
             images = pipeline(
                 num_inference_steps=num_inference_steps,
@@ -239,137 +507,375 @@ def generate_video_from_text_90(
                 vae_per_channel_normalize=True,
                 conditioning_method=ConditioningMethod.UNCONDITIONAL,
                 mixed_precision=True,
-                callback_on_step_end=gradio_progress_callback,
             ).images
-    except Exception as e:
-        raise gr.Error(
-            f"An error occurred while generating the video. Please try again. Error: {e}",
-            duration=5,
-        )
-    finally:
-        torch.cuda.empty_cache()
-        gc.collect()
-    output_path = tempfile.mktemp(suffix=".mp4")
-    video_np = images.squeeze(0).permute(1, 2, 3, 0).cpu().float().numpy()
-    video_np = (video_np * 255).astype(np.uint8)
-    height, width = video_np.shape[1:3]
-    out = cv2.VideoWriter(output_path, cv2.VideoWriter_fourcc(*"mp4v"), frame_rate, (width, height))
-    for frame in video_np[..., ::-1]:
-        out.write(frame)
-    out.release()
-    del images
-    del video_np
-    torch.cuda.empty_cache()
-    return output_path
-def create_advanced_options():
-    with gr.Accordion("Step 4: Advanced Options (Optional)", open=False):
-        seed = gr.Slider(label="4.1 Seed", minimum=0, maximum=1000000, step=1, value=646373)
-        inference_steps = gr.Slider(label="4.2 Inference Steps", minimum=5, maximum=150, step=5, value=40)
-        guidance_scale = gr.Slider(label="4.3 Guidance Scale", minimum=1.0, maximum=10.0, step=0.1, value=4.2)
-        height_slider = gr.Slider(
-            label="4.4 Height",
-            minimum=256,
-            maximum=1024,
-            step=64,
-            value=768,
-            visible=False,
         )
-        width_slider = gr.Slider(
-            label="4.5 Width",
-            minimum=256,
-            maximum=1024,
-            step=64,
-            value=768,
-            visible=False,
-        )
-        num_frames_slider = gr.Slider(
-            label="4.5 Number of Frames",
-            minimum=1,
-            maximum=500,
-            step=1,
-            value=60,
-            visible=False,
-        )
-        return [
-            seed,
-            inference_steps,
-            guidance_scale,
-            height_slider,
-            width_slider,
-            num_frames_slider,
-        ]
-with gr.Blocks(theme=gr.themes.Soft()) as iface:
-    with gr.Column():
-        txt2vid_prompt = gr.Textbox(
-            label="Step 1: Enter Your Prompt (한글 또는 영어)",
-            placeholder="생성하고 싶은 비디오를 설명하세요 (최소 50자)...",
-            value="긴 갈색 머리와 밝은 피부를 가진 여성이 긴 금발 머리를 가진 다른 여성을 향해 미소 짓습니다. 갈색 머리 여성은 검은 재킷을 입고 있으며 오른쪽 뺨에 작고 거의 눈에 띄지 않는 점이 있습니다. 카메라 앵글은 갈색 머리 여성의 얼굴에 초점을 맞춘 클로즈업입니다. 조명은 따뜻하고 자연스러우며, 아마도 지는 해에서 나오는 것 같아 장면에 부드러운 빛을 비춥니다.",
-            lines=5,
-        )
-        txt2vid_enhance_toggle = Toggle(
-            label="Enhance Prompt",
-            value=False,
-            interactive=True,
         )
-        txt2vid_negative_prompt = gr.Textbox(
-            label="Step 2: Enter Negative Prompt",
-            placeholder="비디오에서 원하지 않는 요소를 설명하세요...",
-            value="low quality, worst quality, deformed, distorted, damaged, motion blur, motion artifacts, fused fingers, incorrect anatomy, strange hands, ugly",
-            lines=2,
-        )
-        txt2vid_preset = gr.Dropdown(
-            choices=[p["label"] for p in preset_options],
-            value="512x512, 160 frames",
-            label="Step 3.1: Choose Resolution Preset",
         )
-        txt2vid_frame_rate = gr.Slider(
-            label="Step 3.2: Frame Rate",
-            minimum=6,
-            maximum=60,
-            step=1,
-            value=20,
         )
-        txt2vid_advanced = create_advanced_options()
-        txt2vid_generate = gr.Button(
-            "Step 5: Generate Video",
-            variant="primary",
-            size="lg",
         )
-        txt2vid_output = gr.Video(label="Generated Output")
-    txt2vid_preset.change(
-        fn=preset_changed,
-        inputs=[txt2vid_preset],
-        outputs=txt2vid_advanced[3:],
-    )
-    txt2vid_generate.click(
-        fn=generate_video_from_text_90,
-        inputs=[
-            txt2vid_prompt,
-            txt2vid_enhance_toggle,
-            txt2vid_negative_prompt,
-            txt2vid_frame_rate,
-            *txt2vid_advanced,
-        ],
-        outputs=txt2vid_output,
-        concurrency_limit=1,
-        concurrency_id="generate_video",
-        queue=True,
     )
-iface.queue(max_size=64, default_concurrency_limit=1, api_open=False).launch(share=True, show_api=False)
-# ===== Application Startup at 2024-12-20 01:30:34 =====

 import spaces
+import argparse
+import os
+import time
+from os import path
+import shutil
+from datetime import datetime
+from safetensors.torch import load_file
+from huggingface_hub import hf_hub_download, snapshot_download
 import gradio as gr
 from gradio_toggle import Toggle
 import torch
+from diffusers import FluxPipeline
+from diffusers.pipelines.stable_diffusion import safety_checker
+from PIL import Image
+from transformers import pipeline, CLIPProcessor, CLIPModel, T5EncoderModel, T5Tokenizer
+import replicate
+import logging
+import requests
+from pathlib import Path
+import cv2
+import numpy as np
+import sys
+import io
+import json
+import gc
+import csv
+from openai import OpenAI
 from xora.models.autoencoders.causal_video_autoencoder import CausalVideoAutoencoder
 from xora.models.transformers.transformer3d import Transformer3DModel
 from xora.models.transformers.symmetric_patchifier import SymmetricPatchifier
 from xora.schedulers.rf import RectifiedFlowScheduler
 from xora.pipelines.pipeline_xora_video import XoraVideoPipeline
 from xora.utils.conditioning_method import ConditioningMethod
+from functools import lru_cache
+# 로깅 설정
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(levelname)s - %(message)s'
+)
+logger = logging.getLogger(__name__)
+# 상수 및 환경 변수 설정
 MAX_SEED = np.iinfo(np.int32).max
+PERSISTENT_DIR = os.environ.get("PERSISTENT_DIR", ".")
+MODEL_PATH = "asset"
+CACHE_PATH = path.join(path.dirname(path.abspath(__file__)), "models")
+GALLERY_PATH = path.join(PERSISTENT_DIR, "gallery")
+VIDEO_GALLERY_PATH = path.join(PERSISTENT_DIR, "video_gallery")
+# API 키 설정
+HF_TOKEN = os.getenv("HF_TOKEN")
+OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
+CATBOX_USER_HASH = "e7a96fc68dd4c7d2954040cd5"
+REPLICATE_API_TOKEN = os.getenv("API_KEY")
+# 시스템 프롬프트 로드
+SYSTEM_PROMPT_PATH = "assets/system_prompt_t2v.txt"
+with open(SYSTEM_PROMPT_PATH, "r") as f:
+    SYSTEM_PROMPT = f.read()
+# 디렉토리 초기화
+def init_directories():
+    """필요한 디렉토리들을 생성"""
+    directories = [GALLERY_PATH, VIDEO_GALLERY_PATH, CACHE_PATH]
+    for directory in directories:
+        os.makedirs(directory, exist_ok=True)
+        logger.info(f"Directory initialized: {directory}")
+# CUDA 설정
+def setup_cuda():
+    """CUDA 관련 설정 초기화"""
+    torch.backends.cuda.matmul.allow_tf32 = False
+    torch.backends.cuda.matmul.allow_bf16_reduced_precision_reduction = False
+    torch.backends.cuda.matmul.allow_fp16_reduced_precision_reduction = False
+    torch.backends.cudnn.allow_tf32 = False
+    torch.backends.cudnn.deterministic = False
+    torch.backends.cuda.preferred_blas_library = "cublas"
+    torch.set_float32_matmul_precision("highest")
+    logger.info("CUDA settings initialized")
+# 모델 관리 클래스
+class ModelManager:
+    def __init__(self):
+        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        self.models = {}
+        self.current_model = None
+        logger.info(f"ModelManager initialized with device: {self.device}")
+    def load_model(self, model_name):
+        """모델을 동적으로 로드"""
+        if self.current_model == model_name and model_name in self.models:
+            return self.models[model_name]
+        # 현재 로드된 모델 언로드
+        self.unload_current_model()
+        logger.info(f"Loading model: {model_name}")
+        try:
+            if model_name == "flux":
+                model = self._load_flux_model()
+            elif model_name == "xora":
+                model = self._load_xora_model()
+            elif model_name == "clip":
+                model = self._load_clip_model()
+            else:
+                raise ValueError(f"Unknown model: {model_name}")
+            self.models[model_name] = model
+            self.current_model = model_name
+            return model
+        except Exception as e:
+            logger.error(f"Error loading model {model_name}: {str(e)}")
+            raise
+    def unload_current_model(self):
+        """현재 ��드된 모델 언로드"""
+        if self.current_model:
+            logger.info(f"Unloading model: {self.current_model}")
+            if self.current_model in self.models:
+                del self.models[self.current_model]
+            self.current_model = None
+            torch.cuda.empty_cache()
+            gc.collect()
+    def _load_flux_model(self):
+        """Flux 모델 로드"""
+        pipe = FluxPipeline.from_pretrained(
+            "black-forest-labs/FLUX.1-dev",
+            torch_dtype=torch.bfloat16
+        )
+        pipe.load_lora_weights(
+            hf_hub_download(
+                "ByteDance/Hyper-SD",
+                "Hyper-FLUX.1-dev-8steps-lora.safetensors"
+            )
+        )
+        pipe.fuse_lora(lora_scale=0.125)
+        pipe.to(device=self.device, dtype=torch.bfloat16)
+        pipe.safety_checker = safety_checker.StableDiffusionSafetyChecker.from_pretrained(
+            "CompVis/stable-diffusion-safety-checker"
+        )
+        return pipe
+    def _load_xora_model(self):
+        """Xora 모델 로드"""
+        if not path.exists(MODEL_PATH):
+            snapshot_download(
+                "Lightricks/LTX-Video",
+                revision='c7c8ad4c2ddba847b94e8bfaefbd30bd8669fafc',
+                local_dir=MODEL_PATH,
+                repo_type="model",
+                token=HF_TOKEN
+            )
+        vae = load_vae(Path(MODEL_PATH) / "vae")
+        unet = load_unet(Path(MODEL_PATH) / "unet")
+        scheduler = load_scheduler(Path(MODEL_PATH) / "scheduler")
+        patchifier = SymmetricPatchifier(patch_size=1)
+        text_encoder = T5EncoderModel.from_pretrained(
+            "PixArt-alpha/PixArt-XL-2-1024-MS",
+            subfolder="text_encoder"
+        ).to(self.device)
+        tokenizer = T5Tokenizer.from_pretrained(
+            "PixArt-alpha/PixArt-XL-2-1024-MS",
+            subfolder="tokenizer"
+        )
+        return XoraVideoPipeline(
+            transformer=unet,
+            patchifier=patchifier,
+            text_encoder=text_encoder,
+            tokenizer=tokenizer,
+            scheduler=scheduler,
+            vae=vae
+        ).to(self.device)
+    def _load_clip_model(self):
+        """CLIP 모델 로드"""
+        model = CLIPModel.from_pretrained(
+            "openai/clip-vit-base-patch32",
+            cache_dir=MODEL_PATH
+        ).to(self.device)
+        processor = CLIPProcessor.from_pretrained(
+            "openai/clip-vit-base-patch32",
+            cache_dir=MODEL_PATH
+        )
+        return {"model": model, "processor": processor}
+# 번역기 초기화
+@lru_cache(maxsize=None)
+def get_translator():
+    """번역기를 lazy loading으로 초기화"""
+    return pipeline("translation", model="Helsinki-NLP/opus-mt-ko-en")
+# OpenAI 클라이언트 초기화
+@lru_cache(maxsize=None)
+def get_openai_client():
+    """OpenAI 클라이언트를 lazy loading으로 초기화"""
+    return OpenAI(api_key=OPENAI_API_KEY)
+# 유틸리티 함수들
+class Timer:
+    """작업 시간 측정을 위한 컨텍스트 매니저"""
+    def __init__(self, method_name="timed process"):
+        self.method = method_name
+    def __enter__(self):
+        self.start = time.time()
+        logger.info(f"{self.method} starts")
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        end = time.time()
+        logger.info(f"{self.method} took {str(round(end - self.start, 2))}s")
 def process_prompt(prompt):
+    """프롬프트 전처리 (한글 번역 및 필터링)"""
     if any(ord('가') <= ord(char) <= ord('힣') for char in prompt):
+        translator = get_translator()
         translated = translator(prompt)[0]['translation_text']
+        logger.info(f"Translated prompt: {translated}")
         return translated
     return prompt
+def filter_prompt(prompt):
+    """부적절한 내용 필터링"""
+    inappropriate_keywords = [
+        "nude", "naked", "nsfw", "porn", "sex", "explicit", "adult",
+        "xxx", "erotic", "sensual", "seductive", "provocative",
+        "intimate", "violence", "gore", "blood", "death", "kill",
+        "murder", "torture", "drug", "suicide", "abuse", "hate",
+        "discrimination"
+    ]
+    prompt_lower = prompt.lower()
+    for keyword in inappropriate_keywords:
+        if keyword in prompt_lower:
+            logger.warning(f"Inappropriate content detected: {keyword}")
+            return False, "부적절한 내용이 포함된 프롬프트입니다."
+    return True, prompt
+def enhance_prompt(prompt, enhance_toggle):
+    """GPT를 사용한 프롬프트 개선"""
+    if not enhance_toggle:
+        logger.info("Prompt enhancement disabled")
+        return prompt
+    try:
+        client = get_openai_client()
+        messages = [
+            {"role": "system", "content": SYSTEM_PROMPT},
+            {"role": "user", "content": prompt},
+        ]
+        response = client.chat.completions.create(
+            model="gpt-4-mini",
+            messages=messages,
+            max_tokens=200,
+        )
+        enhanced_prompt = response.choices[0].message.content.strip()
+        logger.info(f"Enhanced prompt: {enhanced_prompt}")
+        return enhanced_prompt
+    except Exception as e:
+        logger.error(f"Prompt enhancement failed: {str(e)}")
+        return prompt
+def save_image(image, directory=GALLERY_PATH):
+    """생성된 이미지 저장"""
+    try:
+        os.makedirs(directory, exist_ok=True)
+        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+        random_suffix = os.urandom(4).hex()
+        filename = f"generated_{timestamp}_{random_suffix}.png"
+        filepath = os.path.join(directory, filename)
+        if not isinstance(image, Image.Image):
+            image = Image.fromarray(image)
+        if image.mode != 'RGB':
+            image = image.convert('RGB')
+        image.save(filepath, format='PNG', optimize=True, quality=100)
+        logger.info(f"Image saved: {filepath}")
+        return filepath
+    except Exception as e:
+        logger.error(f"Error saving image: {str(e)}")
+        return None
+def add_watermark(video_path):
+    """비디오에 워터마크 추가"""
+    try:
+        cap = cv2.VideoCapture(video_path)
+        width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
+        height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
+        fps = int(cap.get(cv2.CAP_PROP_FPS))
+        text = "GiniGEN.AI"
+        font = cv2.FONT_HERSHEY_SIMPLEX
+        font_scale = height * 0.05 / 30
+        thickness = 2
+        color = (255, 255, 255)
+        (text_width, text_height), _ = cv2.getTextSize(text, font, font_scale, thickness)
+        margin = int(height * 0.02)
+        x_pos = width - text_width - margin
+        y_pos = height - margin
+        output_path = os.path.join(VIDEO_GALLERY_PATH, f"watermarked_{os.path.basename(video_path)}")
+        fourcc = cv2.VideoWriter_fourcc(*'mp4v')
+        out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
+        while cap.isOpened():
+            ret, frame = cap.read()
+            if not ret:
+                break
+            cv2.putText(frame, text, (x_pos, y_pos), font, font_scale, color, thickness)
+            out.write(frame)
+        cap.release()
+        out.release()
+        logger.info(f"Video watermarked: {output_path}")
+        return output_path
+    except Exception as e:
+        logger.error(f"Error adding watermark: {str(e)}")
+        return video_path
+def upload_to_catbox(file_path):
+    """파일을 catbox.moe에 업로드"""
+    try:
+        logger.info(f"Uploading file: {file_path}")
+        url = "https://catbox.moe/user/api.php"
+        file_extension = Path(file_path).suffix.lower()
+        supported_extensions = {
+            '.jpg': 'image/jpeg',
+            '.jpeg': 'image/jpeg',
+            '.png': 'image/png',
+            '.gif': 'image/gif',
+            '.mp4': 'video/mp4'
+        }
+        if file_extension not in supported_extensions:
+            logger.error(f"Unsupported file type: {file_extension}")
+            return None
+        files = {
+            'fileToUpload': (
+                os.path.basename(file_path),
+                open(file_path, 'rb'),
+                supported_extensions[file_extension]
+            )
+        }
+        data = {
+            'reqtype': 'fileupload',
+            'userhash': CATBOX_USER_HASH
+        }
+        response = requests.post(url, files=files, data=data)
+        if response.status_code == 200 and response.text.startswith('http'):
+            logger.info(f"Upload successful: {response.text}")
+            return response.text
+        else:
+            raise Exception(f"Upload failed: {response.text}")
+    except Exception as e:
+        logger.error(f"Upload error: {str(e)}")
+        return None
+# 모델 매니저 인스턴스 생성
+model_manager = ModelManager()
+# Gradio 인터페이스 관련 상수 및 설정
+PRESET_OPTIONS = [
     {"label": "1216x704, 41 frames", "width": 1216, "height": 704, "num_frames": 41},
     {"label": "1088x704, 49 frames", "width": 1088, "height": 704, "num_frames": 49},
     {"label": "1056x640, 57 frames", "width": 1056, "height": 640, "num_frames": 57},
     {"label": "512x512, 200 frames", "width": 512, "height": 512, "num_frames": 200},
 ]
+# 메인 처리 함수들
+@spaces.GPU(duration=90)
+def generate_image(
+    prompt,
+    height,
+    width,
+    steps,
+    scales,
+    seed,
+    enhance_prompt_toggle=False,
+    progress=gr.Progress()
+):
+    """이미지 생성 함수"""
     try:
+        # 프롬프트 전처리
+        processed_prompt = process_prompt(prompt)
+        is_safe, filtered_prompt = filter_prompt(processed_prompt)
+        if not is_safe:
+            raise gr.Error("부적절한 내용이 포함된 프롬프트입니다.")
+        if enhance_prompt_toggle:
+            filtered_prompt = enhance_prompt(filtered_prompt, True)
+        # Flux 모델 로드
+        pipe = model_manager.load_model("flux")
+        with Timer("Image generation"), torch.inference_mode(), torch.autocast("cuda", dtype=torch.bfloat16):
+            generated_image = pipe(
+                prompt=[filtered_prompt],
+                generator=torch.Generator().manual_seed(int(seed)),
+                num_inference_steps=int(steps),
+                guidance_scale=float(scales),
+                height=int(height),
+                width=int(width),
+                max_sequence_length=256
+            ).images[0]
+            # 이미지 저장 및 반환
+            saved_path = save_image(generated_image)
+            if saved_path is None:
+                raise gr.Error("이미지 저장에 실패했습니다.")
+            return Image.open(saved_path)
     except Exception as e:
+        logger.error(f"Image generation error: {str(e)}")
+        raise gr.Error(f"이미지 생성 중 오류가 발생했습니다: {str(e)}")
+    finally:
+        model_manager.unload_current_model()
+        torch.cuda.empty_cache()
+        gc.collect()
 @spaces.GPU(duration=90)
+def generate_video_xora(
+    prompt,
+    enhance_prompt_toggle,
+    negative_prompt,
+    frame_rate,
+    seed,
+    num_inference_steps,
+    guidance_scale,
+    height,
+    width,
+    num_frames,
+    progress=gr.Progress()
 ):
+    """Xora 비디오 생성 함수"""
+    try:
+        # 프롬프트 처리
+        prompt = process_prompt(prompt)
+        negative_prompt = process_prompt(negative_prompt)
+        if len(prompt.strip()) < 50:
+            raise gr.Error("프롬프트는 최소 50자 이상이어야 합니다.")
+        prompt = enhance_prompt(prompt, enhance_prompt_toggle)
+        # Xora 모델 로드
+        pipeline = model_manager.load_model("xora")
+        sample = {
+            "prompt": prompt,
+            "prompt_attention_mask": None,
+            "negative_prompt": negative_prompt,
+            "negative_prompt_attention_mask": None,
+            "media_items": None,
+        }
+        generator = torch.Generator(device="cuda").manual_seed(seed)
+        def progress_callback(step, timestep, kwargs):
+            progress((step + 1) / num_inference_steps)
         with torch.no_grad():
             images = pipeline(
                 num_inference_steps=num_inference_steps,
                 vae_per_channel_normalize=True,
                 conditioning_method=ConditioningMethod.UNCONDITIONAL,
                 mixed_precision=True,
+                callback_on_step_end=progress_callback,
             ).images
+        # 비디오 저장
+        output_path = os.path.join(VIDEO_GALLERY_PATH, f"generated_{int(time.time())}.mp4")
+        video_np = images.squeeze(0).permute(1, 2, 3, 0).cpu().float().numpy()
+        video_np = (video_np * 255).astype(np.uint8)
+        out = cv2.VideoWriter(
+            output_path,
+            cv2.VideoWriter_fourcc(*"mp4v"),
+            frame_rate,
+            (width, height)
         )
+        for frame in video_np[..., ::-1]:
+            out.write(frame)
+        out.release()
+        # 워터마크 추가
+        final_path = add_watermark(output_path)
+        return final_path
+    except Exception as e:
+        logger.error(f"Video generation error: {str(e)}")
+        raise gr.Error(f"비디오 생성 중 오류가 발생했습니다: {str(e)}")
+    finally:
+        model_manager.unload_current_model()
+        torch.cuda.empty_cache()
+        gc.collect()
+def generate_video_replicate(image, prompt):
+    """Replicate API를 사용한 비디오 생성 함수"""
+    try:
+        is_safe, filtered_prompt = filter_prompt(prompt)
+        if not is_safe:
+            raise gr.Error("부적절한 내용이 포함된 프롬프트입니다.")
+        if not image:
+            raise gr.Error("이미지를 업로드해주세요.")
+        # 이미지 업로드
+        image_url = upload_to_catbox(image)
+        if not image_url:
+            raise gr.Error("이미지 업로드에 실패했습니다.")
+        # Replicate API 호출
+        client = replicate.Client(api_token=REPLICATE_API_TOKEN)
+        output = client.run(
+            "minimax/video-01-live",
+            input={
+                "prompt": filtered_prompt,
+                "first_frame_image": image_url
+            }
         )
+        # 결과 비디오 저장
+        output_path = os.path.join(VIDEO_GALLERY_PATH, f"replicate_{int(time.time())}.mp4")
+        if hasattr(output, 'read'):
+            with open(output_path, "wb") as f:
+                f.write(output.read())
+        elif isinstance(output, str):
+            response = requests.get(output)
+            with open(output_path, "wb") as f:
+                f.write(response.content)
+        # 워터마크 추가
+        final_path = add_watermark(output_path)
+        return final_path
+    except Exception as e:
+        logger.error(f"Replicate video generation error: {str(e)}")
+        raise gr.Error(f"비디오 생성 중 오류가 발생했습니다: {str(e)}")
+# Gradio UI 스타일
+css = """
+.gradio-container {
+    font-family: 'Pretendard', 'Noto Sans KR', sans-serif !important;
+}
+.title {
+    text-align: center;
+    font-size: 2.5rem;
+    font-weight: bold;
+    color: #2a9d8f;
+    margin: 1rem 0;
+    padding: 1rem;
+    background: linear-gradient(to right, #264653, #2a9d8f);
+    -webkit-background-clip: text;
+    -webkit-text-fill-color: transparent;
+}
+.generate-btn {
+    background: linear-gradient(to right, #2a9d8f, #264653) !important;
+    border: none !important;
+    color: white !important;
+    font-weight: bold !important;
+    transition: all 0.3s ease !important;
+}
+.generate-btn:hover {
+    transform: translateY(-2px) !important;
+    box-shadow: 0 5px 15px rgba(42, 157, 143, 0.4) !important;
+}
+.gallery {
+    display: grid;
+    grid-template-columns: repeat(auto-fill, minmax(200px, 1fr));
+    gap: 1rem;
+    padding: 1rem;
+}
+.gallery img {
+    width: 100%;
+    height: auto;
+    border-radius: 8px;
+    transition: transform 0.3s ease;
+}
+.gallery img:hover {
+    transform: scale(1.05);
+}
+"""
+# Gradio 인터페이스 구성
+def create_ui():
+    with gr.Blocks(theme=gr.themes.Soft(), css=css) as demo:
+        gr.HTML('<div class="title">AI Image & Video Generator</div>')
+        with gr.Tabs():
+            # 이미지 생성 탭
+            with gr.Tab("Image Generation"):
+                with gr.Row():
+                    with gr.Column(scale=3):
+                        img_prompt = gr.Textbox(
+                            label="Image Description",
+                            placeholder="이미지 설명을 입력하세요... (한글 입력 가능)",
+                            lines=3
+                        )
+                        img_enhance_toggle = Toggle(
+                            label="Enhance Prompt",
+                            value=False,
+                            interactive=True,
+                        )
+                        with gr.Accordion("Advanced Settings", open=False):
+                            with gr.Row():
+                                img_height = gr.Slider(
+                                    label="Height",
+                                    minimum=256,
+                                    maximum=1024,
+                                    step=64,
+                                    value=768
+                                )
+                                img_width = gr.Slider(
+                                    label="Width",
+                                    minimum=256,
+                                    maximum=1024,
+                                    step=64,
+                                    value=768
+                                )
+                            with gr.Row():
+                                steps = gr.Slider(
+                                    label="Inference Steps",
+                                    minimum=6,
+                                    maximum=25,
+                                    step=1,
+                                    value=8
+                                )
+                                scales = gr.Slider(
+                                    label="Guidance Scale",
+                                    minimum=0.0,
+                                    maximum=5.0,
+                                    step=0.1,
+                                    value=3.5
+                                )
+                            seed = gr.Number(
+                                label="Seed",
+                                value=random.randint(0, MAX_SEED),
+                                precision=0
+                            )
+                        img_generate_btn = gr.Button(
+                            "Generate Image",
+                            variant="primary",
+                            elem_classes=["generate-btn"]
+                        )
+                    with gr.Column(scale=4):
+                        img_output = gr.Image(
+                            label="Generated Image",
+                            type="pil",
+                            format="png"
+                        )
+                        img_gallery = gr.Gallery(
+                            label="Image Gallery",
+                            show_label=True,
+                            elem_id="gallery",
+                            columns=[4],
+                            rows=[2],
+                            height="auto",
+                            object_fit="cover"
+                        )
+            # Xora 비디오 생성 탭
+            with gr.Tab("Xora Video Generation"):
+                with gr.Row():
+                    with gr.Column(scale=3):
+                        xora_prompt = gr.Textbox(
+                            label="Video Description",
+                            placeholder="비디오 설명을 입력하세요... (최소 50자)",
+                            lines=5
+                        )
+                        xora_enhance_toggle = Toggle(
+                            label="Enhance Prompt",
+                            value=False
+                        )
+                        xora_negative_prompt = gr.Textbox(
+                            label="Negative Prompt",
+                            value="low quality, worst quality, deformed, distorted",
+                            lines=2
+                        )
+                        xora_preset = gr.Dropdown(
+                            choices=[p["label"] for p in PRESET_OPTIONS],
+                            value="512x512, 160 frames",
+                            label="Resolution Preset"
+                        )
+                        xora_frame_rate = gr.Slider(
+                            label="Frame Rate",
+                            minimum=6,
+                            maximum=60,
+                            step=1,
+                            value=20
+                        )
+                        with gr.Accordion("Advanced Settings", open=False):
+                            xora_seed = gr.Slider(
+                                label="Seed",
+                                minimum=0,
+                                maximum=MAX_SEED,
+                                step=1,
+                                value=random.randint(0, MAX_SEED)
+                            )
+                            xora_steps = gr.Slider(
+                                label="Inference Steps",
+                                minimum=5,
+                                maximum=150,
+                                step=5,
+                                value=40
+                            )
+                            xora_guidance = gr.Slider(
+                                label="Guidance Scale",
+                                minimum=1.0,
+                                maximum=10.0,
+                                step=0.1,
+                                value=4.2
+                            )
+                        xora_generate_btn = gr.Button(
+                            "Generate Video",
+                            variant="primary",
+                            elem_classes=["generate-btn"]
+                        )
+                    with gr.Column(scale=4):
+                        xora_output = gr.Video(label="Generated Video")
+                        xora_gallery = gr.Gallery(
+                            label="Video Gallery",
+                            show_label=True,
+                            columns=[4],
+                            rows=[2],
+                            height="auto",
+                            object_fit="cover"
+                        )
+            # Replicate 비디오 생성 탭
+            with gr.Tab("Image to Video"):
+                with gr.Row():
+                    with gr.Column(scale=3):
+                        upload_image = gr.Image(
+                            type="filepath",
+                            label="Upload First Frame Image"
+                        )
+                        replicate_prompt = gr.Textbox(
+                            label="Video Description",
+                            placeholder="비디오 설명을 입력하세요...",
+                            lines=3
+                        )
+                        replicate_generate_btn = gr.Button(
+                            "Generate Video",
+                            variant="primary",
+                            elem_classes=["generate-btn"]
+                        )
+                    with gr.Column(scale=4):
+                        replicate_output = gr.Video(label="Generated Video")
+                        replicate_gallery = gr.Gallery(
+                            label="Video Gallery",
+                            show_label=True,
+                            columns=[4],
+                            rows=[2],
+                            height="auto",
+                            object_fit="cover"
+                        )
+        # 이벤트 핸들러 연결
+        img_generate_btn.click(
+            fn=generate_image,
+            inputs=[
+                img_prompt,
+                img_height,
+                img_width,
+                steps,
+                scales,
+                seed,
+                img_enhance_toggle
+            ],
+            outputs=img_output
         )
+        xora_generate_btn.click(
+            fn=generate_video_xora,
+            inputs=[
+                xora_prompt,
+                xora_enhance_toggle,
+                xora_negative_prompt,
+                xora_frame_rate,
+                xora_seed,
+                xora_steps,
+                xora_guidance,
+                img_height,
+                img_width,
+                gr.Slider(label="Number of Frames", value=60)
+            ],
+            outputs=xora_output
         )
+        replicate_generate_btn.click(
+            fn=generate_video_replicate,
+            inputs=[upload_image, replicate_prompt],
+            outputs=replicate_output
         )
+        # 갤러리 자동 업데이트
+        demo.load(lambda: None, None, [img_gallery, xora_gallery, replicate_gallery], every=30)
+        return demo
+if __name__ == "__main__":
+    # 초기화
+    init_directories()
+    setup_cuda()
+    # UI 실행
+    demo = create_ui()
+    demo.queue(max_size=64, default_concurrency_limit=1, api_open=False).launch(
+        share=True,
+        show_api=False,
+        server_name="0.0.0.0",
+        server_port=7860,
+        debug=False
     )