Spaces:

openfree
/

ginigen-sora

Paused

App Files Files Community

openfree commited on Nov 26, 2024

Commit

5039c8f

verified ·

1 Parent(s): ef46da3

Update app.py

Browse files

Files changed (1) hide show

app.py +1 -1228

app.py CHANGED Viewed

@@ -1,1229 +1,2 @@
-import gradio as gr
-from gradio_toggle import Toggle
-import torch
-from huggingface_hub import snapshot_download
-from transformers import pipeline
-from xora.models.autoencoders.causal_video_autoencoder import CausalVideoAutoencoder
-from xora.models.transformers.transformer3d import Transformer3DModel
-from xora.models.transformers.symmetric_patchifier import SymmetricPatchifier
-from xora.schedulers.rf import RectifiedFlowScheduler
-from xora.pipelines.pipeline_xora_video import XoraVideoPipeline
-from transformers import T5EncoderModel, T5Tokenizer
-from xora.utils.conditioning_method import ConditioningMethod
-from pathlib import Path
-import safetensors.torch
-import json
-import numpy as np
-import cv2
-from PIL import Image
-import tempfile
 import os
-import gc
-from openai import OpenAI
-import re
-import time
-# Load system prompts
-system_prompt_t2v = """당신은 비디오 생성을 위한 프롬프트 전문가입니다.
-주어진 프롬프트를 다음 구조에 맞게 개선해주세요:
-1. 주요 동작을 명확한 한 문장으로 시작
-2. 구체적인 동작과 제스처를 시간 순서대로 설명
-3. 캐릭터/객체의 외모를 상세히 묘사
-4. 배경과 환경 세부 사항을 구체적으로 포함
-5. 카메라 각도와 움직임을 명시
-6. 조명과 색상을 자세히 설명
-7. 변화나 갑작스러운 사건을 자연스럽게 포함
-모든 설명은 하나의 자연스러운 문단으로 작성하고,
-촬영 감독이 촬영 목록을 설명하는 것처럼 구체적이고 시각적으로 작성하세요.
-200단어를 넘지 않도록 하되, 최대한 상세하게 작성하세요."""
-system_prompt_i2v = """당신은 이미지 기반 비디오 생성을 위한 프롬프트 전문가입니다.
-주어진 프롬프트를 다음 구조에 맞게 개선해주세요:
-1. 주요 동작을 명확한 한 문장으로 시작
-2. 구체적인 동작과 제스처를 시간 순서대로 설명
-3. 캐릭터/객체의 외모를 상세히 묘사
-4. 배경과 환경 세부 사항을 구체적으로 포함
-5. 카메라 각도와 움직임을 명시
-6. 조명과 색상을 자세히 설명
-7. 변화나 갑작스러운 사건을 자연스럽게 포함
-모든 설명은 하나의 자연스러운 문단으로 작성하고,
-촬영 감독이 촬영 목록을 설명하는 것처럼 구체적이고 시각적으로 작성하세요.
-200단어를 넘지 않도록 하되, 최대한 상세하게 작성하세요."""
-# Load Hugging Face token if needed
-hf_token = os.getenv("HF_TOKEN")
-openai_api_key = os.getenv("OPENAI_API_KEY")
-client = OpenAI(api_key=openai_api_key)
-# Initialize translation pipeline with device and clean_up settings
-device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-translator = pipeline(
-    "translation",
-    model="Helsinki-NLP/opus-mt-ko-en",
-    device=device,
-    clean_up_tokenization_spaces=True
-)
-# Korean text detection function
-def contains_korean(text):
-    korean_pattern = re.compile('[ㄱ-ㅎㅏ-ㅣ가-힣]')
-    return bool(korean_pattern.search(text))
-def translate_korean_prompt(prompt, max_length=450):
-    """
-    Translate Korean prompt to English if Korean text is detected
-    Split long text into chunks if necessary
-    """
-    if not contains_korean(prompt):
-        return prompt
-    # Split long text into chunks
-    def split_text(text, max_length):
-        words = text.split()
-        chunks = []
-        current_chunk = []
-        current_length = 0
-        for word in words:
-            if current_length + len(word) + 1 > max_length:
-                chunks.append(' '.join(current_chunk))
-                current_chunk = [word]
-                current_length = len(word)
-            else:
-                current_chunk.append(word)
-                current_length += len(word) + 1
-        if current_chunk:
-            chunks.append(' '.join(current_chunk))
-        return chunks
-    try:
-        if len(prompt) > max_length:
-            chunks = split_text(prompt, max_length)
-            translated_chunks = []
-            for chunk in chunks:
-                translated = translator(chunk, max_length=512)[0]['translation_text']
-                translated_chunks.append(translated)
-            final_translation = ' '.join(translated_chunks)
-        else:
-            final_translation = translator(prompt, max_length=512)[0]['translation_text']
-        print(f"Original Korean prompt: {prompt}")
-        print(f"Translated English prompt: {final_translation}")
-        return final_translation
-    except Exception as e:
-        print(f"Translation error: {e}")
-        return prompt  # Return original prompt if translation fails
-def enhance_prompt(prompt, type="t2v"):
-    system_prompt = system_prompt_t2v if type == "t2v" else system_prompt_i2v
-    messages = [
-        {"role": "system", "content": system_prompt},
-        {"role": "user", "content": prompt},
-    ]
-    try:
-        response = client.chat.completions.create(
-            model="gpt-4-1106-preview",
-            messages=messages,
-            max_tokens=2000,
-        )
-        enhanced_prompt = response.choices[0].message.content.strip()
-        print("\n=== 프롬프트 증강 결과 ===")
-        print("Original Prompt:")
-        print(prompt)
-        print("\nEnhanced Prompt:")
-        print(enhanced_prompt)
-        print("========================\n")
-        return enhanced_prompt
-    except Exception as e:
-        print(f"Error during prompt enhancement: {e}")
-        return prompt
-def update_prompt_t2v(prompt, enhance_toggle):
-    return update_prompt(prompt, enhance_toggle, "t2v")
-def update_prompt_i2v(prompt, enhance_toggle):
-    return update_prompt(prompt, enhance_toggle, "i2v")
-def update_prompt(prompt, enhance_toggle, type="t2v"):
-    if enhance_toggle:
-        return enhance_prompt(prompt, type)
-    return prompt
-# Set model download directory within Hugging Face Spaces
-model_path = "asset"
-if not os.path.exists(model_path):
-    snapshot_download(
-        "Lightricks/LTX-Video", local_dir=model_path, repo_type="model", token=hf_token
-    )
-# Global variables to load components
-vae_dir = Path(model_path) / "vae"
-unet_dir = Path(model_path) / "unet"
-scheduler_dir = Path(model_path) / "scheduler"
-device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-def load_vae(vae_dir):
-    vae_ckpt_path = vae_dir / "vae_diffusion_pytorch_model.safetensors"
-    vae_config_path = vae_dir / "config.json"
-    with open(vae_config_path, "r") as f:
-        vae_config = json.load(f)
-    vae = CausalVideoAutoencoder.from_config(vae_config)
-    vae_state_dict = safetensors.torch.load_file(vae_ckpt_path)
-    vae.load_state_dict(vae_state_dict)
-    return vae.to(device=device, dtype=torch.bfloat16)
-def load_unet(unet_dir):
-    unet_ckpt_path = unet_dir / "unet_diffusion_pytorch_model.safetensors"
-    unet_config_path = unet_dir / "config.json"
-    transformer_config = Transformer3DModel.load_config(unet_config_path)
-    transformer = Transformer3DModel.from_config(transformer_config)
-    unet_state_dict = safetensors.torch.load_file(unet_ckpt_path)
-    transformer.load_state_dict(unet_state_dict, strict=True)
-    return transformer.to(device=device, dtype=torch.bfloat16)
-def load_scheduler(scheduler_dir):
-    scheduler_config_path = scheduler_dir / "scheduler_config.json"
-    scheduler_config = RectifiedFlowScheduler.load_config(scheduler_config_path)
-    return RectifiedFlowScheduler.from_config(scheduler_config)
-def center_crop_and_resize(frame, target_height, target_width):
-    # State 객체인 경우 value 값을 가져옴
-    if isinstance(target_height, gr.State):
-        target_height = target_height.value
-    if isinstance(target_width, gr.State):
-        target_width = target_width.value
-    h, w, _ = frame.shape
-    aspect_ratio_target = target_width / target_height
-    aspect_ratio_frame = w / h
-    if aspect_ratio_frame > aspect_ratio_target:
-        new_width = int(h * aspect_ratio_target)
-        x_start = (w - new_width) // 2
-        frame_cropped = frame[:, x_start : x_start + new_width]
-    else:
-        new_height = int(w / aspect_ratio_target)
-        y_start = (h - new_height) // 2
-        frame_cropped = frame[y_start : y_start + new_height, :]
-    frame_resized = cv2.resize(frame_cropped, (target_width, target_height))
-    return frame_resized
-def load_image_to_tensor_with_resize(image_path, target_height=512, target_width=768):
-    image = Image.open(image_path).convert("RGB")
-    image_np = np.array(image)
-    frame_resized = center_crop_and_resize(image_np, target_height, target_width)
-    frame_tensor = torch.tensor(frame_resized).permute(2, 0, 1).float()
-    frame_tensor = (frame_tensor / 127.5) - 1.0
-    return frame_tensor.unsqueeze(0).unsqueeze(2)
-# Load models
-vae = load_vae(vae_dir)
-unet = load_unet(unet_dir)
-scheduler = load_scheduler(scheduler_dir)
-patchifier = SymmetricPatchifier(patch_size=1)
-text_encoder = T5EncoderModel.from_pretrained(
-    "PixArt-alpha/PixArt-XL-2-1024-MS", subfolder="text_encoder"
-).to(device)
-tokenizer = T5Tokenizer.from_pretrained(
-    "PixArt-alpha/PixArt-XL-2-1024-MS", subfolder="tokenizer"
-)
-pipeline = XoraVideoPipeline(
-    transformer=unet,
-    patchifier=patchifier,
-    text_encoder=text_encoder,
-    tokenizer=tokenizer,
-    scheduler=scheduler,
-    vae=vae,
-).to(device)
-# Preset options for resolution and frame configuration
-# Convert frames to seconds assuming 25 FPS
-preset_options = [
-    {"label": "[16:9 HD] 1216x704, 1.6초", "width": 1216, "height": 704, "num_frames": 41},
-    {"label": "[16:9] 1088x704, 2.0초", "width": 1088, "height": 704, "num_frames": 49},
-    {"label": "[16:9] 1056x640, 2.3초", "width": 1056, "height": 640, "num_frames": 57},
-    {"label": "[16:9] 896x608, 2.9초", "width": 896, "height": 608, "num_frames": 73},
-    {"label": "[16:9] 800x512, 3.9초", "width": 800, "height": 512, "num_frames": 97},
-    {"label": "[16:9] 736x480, 4.5초", "width": 736, "height": 480, "num_frames": 113},
-    {"label": "[16:9] 704x448, 5.2초", "width": 704, "height": 448, "num_frames": 129},
-    {"label": "[16:9] 608x352, 7.7초", "width": 608, "height": 352, "num_frames": 193},
-    {"label": "[16:9] 576x352, 8.0초", "width": 576, "height": 352, "num_frames": 201},
-    {"label": "[16:9] 544x320, 9.6초", "width": 544, "height": 320, "num_frames": 241},
-    {"label": "[16:9] 512x320, 10.3초", "width": 512, "height": 320, "num_frames": 257},
-    {"label": "[3:2] 704x480, 4.8초", "width": 704, "height": 480, "num_frames": 121},
-    {"label": "[3:2] 512x352, 9.3초", "width": 512, "height": 352, "num_frames": 233},
-    {"label": "[1:1] 704x704, 2.3초", "width": 704, "height": 704, "num_frames": 57},
-    {"label": "[9:16] 608x1088, 2.0초", "width": 608, "height": 1088, "num_frames": 49},
-    {"label": "[9:16] 448x800, 4.2초", "width": 448, "height": 800, "num_frames": 105},
-]
-def preset_changed(preset):
-    selected = next((item for item in preset_options if item["label"] == preset), None)
-    if selected is None:
-        raise gr.Error("Invalid preset selected")
-    return [
-        gr.State(value=selected["height"]),
-        gr.State(value=selected["width"]),
-        gr.State(value=selected["num_frames"]),
-        gr.update(visible=False),
-        gr.update(visible=False),
-        gr.update(visible=False),
-    ]
-def generate_video_from_text(
-    prompt,
-    enhance_prompt_toggle,
-    negative_prompt,
-    frame_rate,
-    seed,
-    num_inference_steps,
-    guidance_scale,
-    height,
-    width,
-    num_frames,
-    progress=gr.Progress(),
-):
-    # State 객체의 value 값을 가져옴
-    height = height.value if isinstance(height, gr.State) else height
-    width = width.value if isinstance(width, gr.State) else width
-    num_frames = num_frames.value if isinstance(num_frames, gr.State) else num_frames
-    if len(prompt.strip()) < 50:
-        raise gr.Error(
-            "프롬프트는 최소 50자 이상이어야 합니다. 더 자세한 설명을 제공해주세요.",
-            duration=5,
-        )
-    # 프롬프트 개선이 활성화된 경우
-    if enhance_prompt_toggle:
-        prompt = enhance_prompt(prompt, "t2v")
-    # Translate Korean prompts to English
-    prompt = translate_korean_prompt(prompt)
-    negative_prompt = translate_korean_prompt(negative_prompt)
-    # 기본값 설정
-    height = height or 320
-    width = width or 512
-    num_frames = num_frames or 257
-    frame_rate = frame_rate or 25
-    seed = seed or 171198
-    num_inference_steps = num_inference_steps or 41
-    guidance_scale = guidance_scale or 4.0
-    sample = {
-        "prompt": prompt,
-        "prompt_attention_mask": None,
-        "negative_prompt": negative_prompt,
-        "negative_prompt_attention_mask": None,
-        "media_items": None,
-    }
-    generator = torch.Generator(device="cpu").manual_seed(seed)
-    def gradio_progress_callback(self, step, timestep, kwargs):
-        progress((step + 1) / num_inference_steps)
-    try:
-        with torch.no_grad():
-            images = pipeline(
-                num_inference_steps=num_inference_steps,
-                num_images_per_prompt=1,
-                guidance_scale=guidance_scale,
-                generator=generator,
-                output_type="pt",
-                height=height,
-                width=width,
-                num_frames=num_frames,
-                frame_rate=frame_rate,
-                **sample,
-                is_video=True,
-                vae_per_channel_normalize=True,
-                conditioning_method=ConditioningMethod.UNCONDITIONAL,
-                mixed_precision=True,
-                callback_on_step_end=gradio_progress_callback,
-            ).images
-    except Exception as e:
-        raise gr.Error(
-            f"비디오 생성 중 오류가 발생했습니다. 다시 시도해주세요. 오류: {e}",
-            duration=5,
-        )
-    finally:
-        torch.cuda.empty_cache()
-        gc.collect()
-    output_path = tempfile.mktemp(suffix=".mp4")
-    video_np = images.squeeze(0).permute(1, 2, 3, 0).cpu().float().numpy()
-    video_np = (video_np * 255).astype(np.uint8)
-    height, width = video_np.shape[1:3]
-    out = cv2.VideoWriter(
-        output_path, cv2.VideoWriter_fourcc(*"mp4v"), frame_rate, (width, height)
-    )
-    for frame in video_np[..., ::-1]:
-        out.write(frame)
-    out.release()
-    del images
-    del video_np
-    torch.cuda.empty_cache()
-    return output_path
-def generate_video_from_image(
-    image_path,
-    prompt,
-    enhance_prompt_toggle,
-    negative_prompt,
-    frame_rate,
-    seed,
-    num_inference_steps,
-    guidance_scale,
-    height,
-    width,
-    num_frames,
-    progress=gr.Progress(),
-):
-    # State 객체의 value 값을 가져옴
-    height = height.value if isinstance(height, gr.State) else height
-    width = width.value if isinstance(width, gr.State) else width
-    num_frames = num_frames.value if isinstance(num_frames, gr.State) else num_frames
-    if not image_path:
-        raise gr.Error("입력 이미지를 제공해주세요.", duration=5)
-    if len(prompt.strip()) < 50:
-        raise gr.Error(
-            "프롬프트는 최소 50자 이상이어야 합니다. 더 자세한 설명을 제공해주세요.",
-            duration=5,
-        )
-    # 프롬프트 개선이 활성화된 경우
-    if enhance_prompt_toggle:
-        prompt = enhance_prompt(prompt, "i2v")
-    # Translate Korean prompts to English
-    prompt = translate_korean_prompt(prompt)
-    negative_prompt = translate_korean_prompt(negative_prompt)
-    # 기본값 설정
-    height = height or 320
-    width = width or 512
-    num_frames = num_frames or 257
-    frame_rate = frame_rate or 25
-    seed = seed or 171198
-    num_inference_steps = num_inference_steps or 41
-    guidance_scale = guidance_scale or 4.0
-    # 이미지 로드 및 전처리
-    media_items = (
-        load_image_to_tensor_with_resize(image_path, height, width).to(device).detach()
-    )
-    sample = {
-        "prompt": prompt,
-        "prompt_attention_mask": None,
-        "negative_prompt": negative_prompt,
-        "negative_prompt_attention_mask": None,
-        "media_items": media_items,
-    }
-    generator = torch.Generator(device="cpu").manual_seed(seed)
-    def gradio_progress_callback(self, step, timestep, kwargs):
-        progress((step + 1) / num_inference_steps)
-    try:
-        with torch.no_grad():
-            images = pipeline(
-                num_inference_steps=num_inference_steps,
-                num_images_per_prompt=1,
-                guidance_scale=guidance_scale,
-                generator=generator,
-                output_type="pt",
-                height=height,
-                width=width,
-                num_frames=num_frames,
-                frame_rate=frame_rate,
-                **sample,
-                is_video=True,
-                vae_per_channel_normalize=True,
-                conditioning_method=ConditioningMethod.FIRST_FRAME,
-                mixed_precision=True,
-                callback_on_step_end=gradio_progress_callback,
-            ).images
-        output_path = tempfile.mktemp(suffix=".mp4")
-        video_np = images.squeeze(0).permute(1, 2, 3, 0).cpu().float().numpy()
-        video_np = (video_np * 255).astype(np.uint8)
-        height, width = video_np.shape[1:3]
-        out = cv2.VideoWriter(
-            output_path, cv2.VideoWriter_fourcc(*"mp4v"), frame_rate, (width, height)
-        )
-        for frame in video_np[..., ::-1]:
-            out.write(frame)
-        out.release()
-    except Exception as e:
-        raise gr.Error(
-            f"비디오 생성 중 오류가 발생했습니다. 다시 시도해주세요. 오류: {e}",
-            duration=5,
-        )
-    finally:
-        torch.cuda.empty_cache()
-        gc.collect()
-        if 'images' in locals():
-            del images
-        if 'video_np' in locals():
-            del video_np
-        if 'media_items' in locals():
-            del media_items
-    return output_path
-def create_advanced_options():
-    with gr.Accordion("Step 4: Advanced Options (Optional)", open=False):
-        seed = gr.Slider(
-            label="Seed",
-            minimum=0,
-            maximum=1000000,
-            step=1,
-            value=171198
-        )
-        inference_steps = gr.Slider(
-            label="4.2 Inference Steps",
-            minimum=1,
-            maximum=50,
-            step=1,
-            value=41,
-            visible=False
-        )
-        guidance_scale = gr.Slider(
-            label="4.3 Guidance Scale",
-            minimum=1.0,
-            maximum=5.0,
-            step=0.1,
-            value=4.0,
-            visible=False
-        )
-        height_slider = gr.Slider(
-            label="4.4 Height",
-            minimum=256,
-            maximum=1024,
-            step=64,
-            value=320,
-            visible=False,
-        )
-        width_slider = gr.Slider(
-            label="4.5 Width",
-            minimum=256,
-            maximum=1024,
-            step=64,
-            value=512,
-            visible=False,
-        )
-        num_frames_slider = gr.Slider(
-            label="4.5 Number of Frames",
-            minimum=1,
-            maximum=200,
-            step=1,
-            value=257,
-            visible=False,
-        )
-        return [
-            seed,
-            inference_steps,
-            guidance_scale,
-            height_slider,
-            width_slider,
-            num_frames_slider,
-        ]
-system_prompt_scenario = """당신은 영상 스크립트에 맞는 배경 영상을 생성하기 위한 프롬프트 전문가입니다.
-주어진 스크립트의 분위기와 맥락을 시각적 배경으로 표현하되, 다음 원칙을 반드시 준수하세요:
-1. ���품이나 서비스를 직접적으로 묘사하지 말 것
-2. 스크립트의 감성과 톤앤매너를 표현하는 배경 영상에 집중할 것
-3. 5개 섹션이 하나의 이야기처럼 자연스럽게 연결되도록 할 것
-4. 추상적이고 은유적인 시각 표현을 활용할 것
-각 섹션별 프롬프트 작성 가이드:
-1. 배경 및 필요성: 주제의 전반적인 분위기를 표현하는 배경 씬
-2. 문제 제기: 긴장감이나 갈등을 암시하는 분위기 있는 배경
-3. 해결책 제시: 희망적이고 밝은 톤의 배경 전환
-4. 본론: 안정감 있고 신뢰도를 높이는 배경
-5. 결론: 임팩트 있는 마무리를 위한 역동적인 배경
-모든 섹션이 일관된 스타일과 톤을 유지하면서도 자연스럽게 이어지도록 구성하세요.
-각 섹션의 프롬프트 작성시 반드시 다음 구조에 맞게 개선해주세요:
-1. 주요 동작을 명확한 한 문장으로 시작
-2. 구체적인 동작과 제스처를 시간 순서대로 설명
-3. 캐릭터/객체의 외모를 상세히 묘사
-4. 배경과 환경 세부 사항을 구체적으로 포함
-5. 카메라 각도와 움직임을 명시
-6. 조명과 색상을 자세히 설명
-7. 변화나 갑작스러운 사건을 자연스럽게 포함
-모든 설명은 하나의 자연스러운 문단으로 작성하고,
-촬영 감독이 촬영 목록을 설명하는 것처럼 구체적이고 시각적으로 작성하세요.
-200단어를 넘지 않도록 하되, 최대한 상세하게 작성하세요.
-"""
-def analyze_scenario(scenario):
-    """시나리오를 분석하여 각 섹션별 배경 영상용 프롬프트 생성"""
-    try:
-        # 각 섹션별 프롬프트 생성을 위한 메시지 구성
-        section_prompts = []
-        for section_num in range(1, 6):
-            section_descriptions = {
-                1: "배경 및 필요성: 주제의 전반적인 분위기를 표현하는 배경 씬",
-                2: "흥미 유발: 긴장감이나 갈등을 암시하는 분위기 있는 배경",
-                3: "해결책 제시: 희망적이고 밝은 톤의 배경 전환",
-                4: "본론: 안정감 있고 신뢰도를 높이는 배경",
-                5: "결론: 임팩트 있는 마무리를 위한 역동적인 배경"
-            }
-            messages = [
-                {"role": "system", "content": system_prompt_scenario},
-                {"role": "user", "content": f"""
-다음 스크립트의 {section_num}번째 섹션({section_descriptions[section_num]})에 대한
-배경 영상 프롬프트를 생성해주세요.
-스크립트:
-{scenario}
-주의사항:
-1. 해당 섹션의 특성({section_descriptions[section_num]})에 맞는 분위기와 톤을 반영하세요.
-2. 직접적인 제품/서비스 묘사는 피하고, 감성적이고 은유적인 배경 영상에 집중하세요.
-3. 다음 구조를 반드시 포함하세요:
-   - 주요 동작을 명확한 한 문장으로 시작
-   - 구체적인 동작과 제스처를 시간 순서대로 설명
-   - 배경과 환경 세부 사항을 구체적으로 포함
-   - 카메라 각도와 움직임을 명시
-   - 조명과 색상을 자세히 설명
-   - 변화나 갑작스러운 사건을 자연스럽게 포함"""}
-            ]
-            response = client.chat.completions.create(
-                model="gpt-4-1106-preview",
-                messages=messages,
-                max_tokens=1000,
-                temperature=0.7
-            )
-            section_prompt = response.choices[0].message.content.strip()
-            section_prompts.append(f"{section_num}. {section_prompt}")
-            # API 요청 사이에 짧은 딜레이 추가
-            time.sleep(1)
-        return section_prompts
-    except Exception as e:
-        print(f"Error during scenario analysis: {e}")
-        return ["Error occurred during analysis"] * 5
-def generate_section_video(prompt, preset, section_number=1, base_seed=171198, progress=gr.Progress()):
-    """각 섹션의 비디오 생성"""
-    try:
-        if not prompt or len(prompt.strip()) < 50:
-            raise gr.Error("프롬프트는 최소 50자 이상이어야 합니다.")
-        if not preset:
-            raise gr.Error("해상도 프리셋을 선택해주세요.")
-        selected = next((item for item in preset_options if item["label"] == preset), None)
-        if not selected:
-            raise gr.Error("올바르지 않은 프리셋입니다.")
-        section_seed = base_seed + section_number
-        return generate_video_from_text(
-            prompt=prompt,
-            enhance_prompt_toggle=False,  # 섹션 생성시는 프롬프트 증강 비활성화
-            negative_prompt="low quality, worst quality, deformed, distorted, warped",
-            frame_rate=25,
-            seed=section_seed,
-            num_inference_steps=41,
-            guidance_scale=4.0,
-            height=selected["height"],
-            width=selected["width"],
-            num_frames=selected["num_frames"],
-            progress=progress
-        )
-    except Exception as e:
-        print(f"Error in section {section_number}: {e}")
-        raise gr.Error(f"섹션 {section_number} 생성 중 오류: {str(e)}")
-    finally:
-        torch.cuda.empty_cache()
-        gc.collect()
-def generate_single_section_prompt(scenario, section_number):
-    """개별 섹션에 대한 프롬프트 생성"""
-    section_descriptions = {
-        1: "배경 및 필요성: 주제의 전반적인 분위기를 표현하는 배경 씬",
-        2: "흥미 유발: 흥미를 유발하고 기대감을 증폭시키는 배경",
-        3: "해결책 제시: 희망적이고 밝은 톤의 배경 전환",
-        4: "본론: 안정감 있고 신뢰도를 높이는 배경",
-        5: "결론: 임팩트 있는 마무리를 위한 역동적인 배경"
-    }
-    messages = [
-        {"role": "system", "content": system_prompt_scenario},
-        {"role": "user", "content": f"""
-다음 스크립트의 {section_number}번째 섹션({section_descriptions[section_number]})에 대한
-배경 영상 프롬프트를 생성해주세요.
-스크립트:
-{scenario}
-주의사항:
-1. 해당 섹션의 특성({section_descriptions[section_number]})에 맞는 분위기와 톤을 반영하세요.
-2. 직접적인 제품/서비스 묘사는 피하고, 감성적이고 은유적인 배경 영상에 집중하세요.
-3. 다음 구조를 반드시 포함하세요:
-   - 주요 동작을 명확한 한 문장으로 시작
-   - 구체적인 동작과 제스처를 시간 순서대로 설명
-   - 배경과 환경 세부 사항을 구체적으로 포함
-   - 카메라 각도와 움직임을 명시
-   - 조명과 색상을 자세히 설명
-   - 변화나 갑작스러운 사건을 자연스럽게 포함"""}
-    ]
-    try:
-        response = client.chat.completions.create(
-            model="gpt-4-1106-preview",
-            messages=messages,
-            max_tokens=1000,  # 토큰 수 증가
-            temperature=0.7
-        )
-        generated_prompt = response.choices[0].message.content.strip()
-        return f"{section_number}. {generated_prompt}"
-    except Exception as e:
-        print(f"Error during prompt generation for section {section_number}: {e}")
-        return f"Error occurred during prompt generation for section {section_number}"
-# 비디오 결합 함수 추가
-def combine_videos(video_paths, output_path):
-    """여러 비디오를 하나로 결합"""
-    if not all(video_paths):
-        raise gr.Error("모든 섹션의 영상이 생성되어야 합니다.")
-    try:
-        # 첫 번째 비디오의 속성 가져오기
-        cap = cv2.VideoCapture(video_paths[0])
-        fps = int(cap.get(cv2.CAP_PROP_FPS))
-        width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
-        height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
-        cap.release()
-        # 출력 비디오 설정
-        fourcc = cv2.VideoWriter_fourcc(*'mp4v')
-        out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
-        # 각 비디오 순차적으로 결합
-        for video_path in video_paths:
-            if video_path and os.path.exists(video_path):
-                cap = cv2.VideoCapture(video_path)
-                while True:
-                    ret, frame = cap.read()
-                    if not ret:
-                        break
-                    out.write(frame)
-                cap.release()
-        out.release()
-        return output_path
-    except Exception as e:
-        raise gr.Error(f"비디오 결합 중 오류 발생: {e}")
-def merge_section_videos(section1, section2, section3, section4, section5):
-    """섹션 비디오들을 하나로 결합"""
-    videos = []
-    # 각 섹션 비디오 확인 및 처리
-    for i, video_path in enumerate([section1, section2, section3, section4, section5], 1):
-        if video_path:
-            if os.path.exists(video_path):
-                try:
-                    # 비디오 파일 검증
-                    cap = cv2.VideoCapture(video_path)
-                    if cap.isOpened():
-                        videos.append(video_path)
-                        cap.release()
-                    else:
-                        raise gr.Error(f"섹션 {i}의 영상 파일이 손상되었거나 읽을 수 없습니다.")
-                except Exception as e:
-                    raise gr.Error(f"섹션 {i} 영상 처리 중 오류: {str(e)}")
-            else:
-                raise gr.Error(f"섹션 {i}의 영상 파일을 찾을 수 없습니다.")
-        else:
-            raise gr.Error(f"섹션 {i}의 영상이 없습니다.")
-    if not videos:
-        raise gr.Error("결합할 영상이 없습니다.")
-    try:
-        output_path = tempfile.mktemp(suffix=".mp4")
-        # 첫 번째 비디오의 속성 가져오기
-        cap = cv2.VideoCapture(videos[0])
-        fps = int(cap.get(cv2.CAP_PROP_FPS))
-        width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
-        height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
-        cap.release()
-        # 출력 비디오 설정
-        fourcc = cv2.VideoWriter_fourcc(*'mp4v')
-        out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
-        # 각 비디오 순차적으로 결합
-        for video_path in videos:
-            cap = cv2.VideoCapture(video_path)
-            while True:
-                ret, frame = cap.read()
-                if not ret:
-                    break
-                # 프레임 크기가 다른 경우 리사이즈
-                if frame.shape[:2] != (height, width):
-                    frame = cv2.resize(frame, (width, height))
-                out.write(frame)
-            cap.release()
-        out.release()
-        print(f"Successfully merged {len(videos)} videos")
-        return output_path
-    except Exception as e:
-        raise gr.Error(f"비디오 결합 중 오류 발생: {e}")
-def generate_script(topic):
-    """주제에 맞는 스크립트 생성"""
-    if not topic:
-        return "주제를 입력해주세요."
-    messages = [
-        {"role": "system", "content": """당신은 영상 스크립트 작성 전문가입니다.
-주어진 주제로 다음 구조에 맞는 5개 섹션의 스크립트를 작성해주세요:
-1. 배경 및 필요성: 주제 소개와 시청자의 흥미 유발
-2. 흥미 유발: 구체적인 내용 전개와 호기심 자극
-3. 해결책 제시: 핵심 내용과 해결방안 제시
-4. 본론: 상세한 설명과 장점 부각
-5. 결론: 핵심 메시지 강조와 행동 유도
-각 섹션은 자연스럽게 연결되어야 하며,
-전체적으로 일관된 톤과 분위기를 유지하면서도
-시청자의 관심을 끝까지 유지할 수 있도록 작성해주세요."""},
-        {"role": "user", "content": f"다음 주제로 영상 스크립트를 작성해주세요: {topic}"}
-    ]
-    try:
-        response = client.chat.completions.create(
-            model="gpt-4-1106-preview",
-            messages=messages,
-            max_tokens=2000,
-            temperature=0.7
-        )
-        return response.choices[0].message.content.strip()
-    except Exception as e:
-        print(f"Error during script generation: {e}")
-        return "스크립트 생성 중 오류가 발생했습니다."
-def cleanup():
-    """메모리 정리 함수"""
-    torch.cuda.empty_cache()
-    gc.collect()
-with gr.Blocks(theme="Yntec/HaleyCH_Theme_Orange") as iface:
-     # State 변수들의 초기화
-     txt2vid_current_height = gr.State(value=320)
-     txt2vid_current_width = gr.State(value=512)
-     txt2vid_current_num_frames = gr.State(value=257)
-     img2vid_current_height = gr.State(value=320)
-     img2vid_current_width = gr.State(value=512)
-     img2vid_current_num_frames = gr.State(value=257)
-     with gr.Tabs():
-          # Text to Video Tab
-          with gr.TabItem("텍스트로 비디오 만들기"):
-               with gr.Row():
-                    with gr.Column():
-                         txt2vid_prompt = gr.Textbox(
-                              label="Step 1: 프롬프트 입력",
-                              placeholder="생성하고 싶은 비디오를 설명하세요 (최소 50자)...",
-                              value="귀여운 고양이",
-                              lines=5,
-                         )
-                         txt2vid_enhance_toggle = Toggle(
-                              label="프롬프트 증강",
-                              value=False,
-                              interactive=True,
-                         )
-                         txt2vid_negative_prompt = gr.Textbox(
-                              label="Step 2: 네거티브 프롬프트 입력",
-                              placeholder="비디오에서 원하지 않는 요소를 설명하세요...",
-                              value="low quality, worst quality, deformed, distorted, warped, motion smear, motion artifacts, fused fingers, incorrect anatomy, strange hands, unattractive",
-                              lines=2,
-                              visible=False
-                         )
-                         txt2vid_preset = gr.Dropdown(
-                              choices=[p["label"] for p in preset_options],
-                              value="[16:9] 512x320, 10.3초",
-                              label="Step 2: 해상도 프리셋 선택",
-                         )
-                         txt2vid_frame_rate = gr.Slider(
-                              label="Step 3: 프레임 레이트",
-                              minimum=21,
-                              maximum=30,
-                              step=1,
-                              value=25,
-                              visible=False
-                         )
-                         txt2vid_advanced = create_advanced_options()
-                         txt2vid_generate = gr.Button(
-                              "Step 3: 비디오 생성",
-                              variant="primary",
-                              size="lg",
-                         )
-                    with gr.Column():
-                         txt2vid_output = gr.Video(label="생성된 비디오")
-# Image to Video Tab
-          with gr.TabItem("이미지로 비디오 만들기"):
-               with gr.Row():
-                    with gr.Column():
-                         img2vid_image = gr.Image(
-                              type="filepath",
-                              label="Step 1: 입력 이미지 업로드",
-                              elem_id="image_upload",
-                         )
-                         img2vid_prompt = gr.Textbox(
-                              label="Step 2: 프롬프트 입력",
-                              placeholder="이미지를 어떻게 애니메이션화할지 설명하세요 (최소 50자)...",
-                              value="귀여운 고양이",
-                              lines=5,
-                         )
-                         img2vid_enhance_toggle = Toggle(
-                              label="프롬프트 증강",
-                              value=False,
-                              interactive=True,
-                         )
-                         img2vid_negative_prompt = gr.Textbox(
-                              label="Step 3: 네거티브 프롬프트 입력",
-                              placeholder="비디오에서 원하지 않는 요소를 설명하세요...",
-                              value="low quality, worst quality, deformed, distorted, warped, motion smear, motion artifacts, fused fingers, incorrect anatomy, strange hands, unattractive",
-                              lines=2,
-                              visible=False
-                         )
-                         img2vid_preset = gr.Dropdown(
-                              choices=[p["label"] for p in preset_options],
-                              value="[16:9] 512x320, 10.3초",
-                              label="Step 3: 해상도 프리셋 선택",
-                         )
-                         img2vid_frame_rate = gr.Slider(
-                              label="Step 4: 프레임 레이트",
-                              minimum=21,
-                              maximum=30,
-                              step=1,
-                              value=25,
-                              visible=False
-                         )
-                         img2vid_advanced = create_advanced_options()
-                         img2vid_generate = gr.Button(
-                              "Step 4: 비디오 생성",
-                              variant="primary",
-                              size="lg",
-                         )
-                    with gr.Column():
-                         img2vid_output = gr.Video(label="생성된 비디오")
-# Scenario Tab
-          with gr.TabItem("시나리오로 비디오 만들기(숏폼)"):
-               with gr.Row():
-                    with gr.Column(scale=1):
-                         script_topic = gr.Textbox(
-                              label="스크립트 생성",
-                              placeholder="겨울 일본 온천 여행을 주제로 밝은 느낌으로 스크립트 생성하라",
-                              lines=2
-                         )
-                         generate_script_btn = gr.Button("스크립트 생성", variant="primary")
-                         scenario_input = gr.Textbox(
-                              label="영상 스크립트 입력",
-                              placeholder="전체 시나리오를 입력하세요...",
-                              lines=10
-                         )
-                         scenario_preset = gr.Dropdown(
-                              choices=[p["label"] for p in preset_options],
-                              value="[16:9] 512x320, 10.3초",
-                              label="화면 크기 선택"
-                         )
-                         analyze_btn = gr.Button("시나리오 분석 및 프롬프트 생성", variant="primary")
-                    with gr.Column(scale=2):
-                         with gr.Row():
-                              # 섹션 1
-                              with gr.Column():
-                                   section1_prompt = gr.Textbox(
-                                        label="1. 배경 및 필요성",
-                                        lines=4
-                                   )
-                                   with gr.Row():
-                                        section1_regenerate = gr.Button("🔄 프롬프트 생성")
-                                        section1_generate = gr.Button("🔄 영상 생성")
-                                   section1_video = gr.Video(label="섹션 1 영상")
-                              # 섹션 2
-                              with gr.Column():
-                                   section2_prompt = gr.Textbox(
-                                        label="2. 흥미 유발",
-                                        lines=4
-                                   )
-                                   with gr.Row():
-                                        section2_regenerate = gr.Button("🔄 프롬프트 생성")
-                                        section2_generate = gr.Button("🔄 영상 생성")
-                                   section2_video = gr.Video(label="섹션 2 영상")
-                         with gr.Row():
-                              # 섹션 3
-                              with gr.Column():
-                                   section3_prompt = gr.Textbox(
-                                        label="3. 해결책 제시",
-                                        lines=4
-                                   )
-                                   with gr.Row():
-                                        section3_regenerate = gr.Button("🔄 프롬프트 생성")
-                                        section3_generate = gr.Button("🔄 영상 생성")
-                                   section3_video = gr.Video(label="섹션 3 영상")
-                              # 섹션 4
-                              with gr.Column():
-                                   section4_prompt = gr.Textbox(
-                                        label="4. 본론",
-                                        lines=4
-                                   )
-                                   with gr.Row():
-                                        section4_regenerate = gr.Button("🔄 프롬프트 생성")
-                                        section4_generate = gr.Button("🔄 영상 생성")
-                                   section4_video = gr.Video(label="섹션 4 영상")
-                         with gr.Row():
-                              # 섹션 5
-                              with gr.Column():
-                                   section5_prompt = gr.Textbox(
-                                        label="5. 결론 및 강조",
-                                        lines=4
-                                   )
-                                   with gr.Row():
-                                        section5_regenerate = gr.Button("🔄 프롬프트 생성")
-                                        section5_generate = gr.Button("🔄 영상 생성")
-                                   section5_video = gr.Video(label="섹션 5 영상")
-                         # 통합 영상 섹션
-                         with gr.Row():
-                              with gr.Column(scale=1):
-                                   merge_videos_btn = gr.Button("통합 영상 생성", variant="primary", size="lg")
-                              with gr.Column(scale=2):
-                                   with gr.Row():
-                                        merged_video_output = gr.Video(label="통합 영상")
-# Text to Video Tab handlers
-     txt2vid_preset.change(
-          fn=preset_changed,
-          inputs=[txt2vid_preset],
-          outputs=[
-               txt2vid_current_height,
-               txt2vid_current_width,
-               txt2vid_current_num_frames,
-               txt2vid_advanced[3],  # height_slider
-               txt2vid_advanced[4],  # width_slider
-               txt2vid_advanced[5],  # num_frames_slider
-          ]
-     )
-     txt2vid_enhance_toggle.change(
-          fn=update_prompt_t2v,
-          inputs=[txt2vid_prompt, txt2vid_enhance_toggle],
-          outputs=txt2vid_prompt
-     )
-     txt2vid_generate.click(
-          fn=generate_video_from_text,
-          inputs=[
-               txt2vid_prompt,
-               txt2vid_enhance_toggle,
-               txt2vid_negative_prompt,
-               txt2vid_frame_rate,
-               txt2vid_advanced[0],  # seed
-               txt2vid_advanced[1],  # inference_steps
-               txt2vid_advanced[2],  # guidance_scale
-               txt2vid_current_height,
-               txt2vid_current_width,
-               txt2vid_current_num_frames,
-          ],
-          outputs=txt2vid_output,
-     )
-     # Image to Video Tab handlers
-     img2vid_preset.change(
-          fn=preset_changed,
-          inputs=[img2vid_preset],
-          outputs=[
-               img2vid_current_height,
-               img2vid_current_width,
-               img2vid_current_num_frames,
-               img2vid_advanced[3],  # height_slider
-               img2vid_advanced[4],  # width_slider
-               img2vid_advanced[5],  # num_frames_slider
-          ]
-     )
-     img2vid_enhance_toggle.change(
-          fn=update_prompt_i2v,
-          inputs=[img2vid_prompt, img2vid_enhance_toggle],
-          outputs=img2vid_prompt
-     )
-     img2vid_generate.click(
-          fn=generate_video_from_image,
-          inputs=[
-               img2vid_image,
-               img2vid_prompt,
-               img2vid_enhance_toggle,
-               img2vid_negative_prompt,
-               img2vid_frame_rate,
-               img2vid_advanced[0],  # seed
-               img2vid_advanced[1],  # inference_steps
-               img2vid_advanced[2],  # guidance_scale
-               img2vid_current_height,
-               img2vid_current_width,
-               img2vid_current_num_frames,
-          ],
-          outputs=img2vid_output,
-     )
-# Scenario Tab handlers
-     generate_script_btn.click(
-          fn=generate_script,
-          inputs=[script_topic],
-          outputs=[scenario_input]
-     )
-     analyze_btn.click(
-          fn=analyze_scenario,
-          inputs=[scenario_input],
-          outputs=[
-               section1_prompt, section2_prompt, section3_prompt,
-               section4_prompt, section5_prompt
-          ]
-     )
-     # 섹션별 프롬프트 재생성 핸들러
-     section1_regenerate.click(
-          fn=lambda x: generate_single_section_prompt(x, 1),
-          inputs=[scenario_input],
-          outputs=section1_prompt
-     )
-     section2_regenerate.click(
-          fn=lambda x: generate_single_section_prompt(x, 2),
-          inputs=[scenario_input],
-          outputs=section2_prompt
-     )
-     section3_regenerate.click(
-          fn=lambda x: generate_single_section_prompt(x, 3),
-          inputs=[scenario_input],
-          outputs=section3_prompt
-     )
-     section4_regenerate.click(
-          fn=lambda x: generate_single_section_prompt(x, 4),
-          inputs=[scenario_input],
-          outputs=section4_prompt
-     )
-     section5_regenerate.click(
-          fn=lambda x: generate_single_section_prompt(x, 5),
-          inputs=[scenario_input],
-          outputs=section5_prompt
-     )
-     # 섹션별 비디오 생성 핸들러
-     section1_generate.click(
-          fn=lambda p, pr: generate_section_video(p, pr, 1),
-          inputs=[section1_prompt, scenario_preset],
-          outputs=section1_video
-     )
-     section2_generate.click(
-          fn=lambda p, pr: generate_section_video(p, pr, 2),
-          inputs=[section2_prompt, scenario_preset],
-          outputs=section2_video
-     )
-     section3_generate.click(
-          fn=lambda p, pr: generate_section_video(p, pr, 3),
-          inputs=[section3_prompt, scenario_preset],
-          outputs=section3_video
-     )
-     section4_generate.click(
-          fn=lambda p, pr: generate_section_video(p, pr, 4),
-          inputs=[section4_prompt, scenario_preset],
-          outputs=section4_video
-     )
-     section5_generate.click(
-          fn=lambda p, pr: generate_section_video(p, pr, 5),
-          inputs=[section5_prompt, scenario_preset],
-          outputs=section5_video
-     )
-     # 통합 영상 생성 핸들러
-     merge_videos_btn.click(
-          fn=merge_section_videos,
-          inputs=[
-               section1_video,
-               section2_video,
-               section3_video,
-               section4_video,
-               section5_video
-          ],
-          outputs=merged_video_output
-     )
-if __name__ == "__main__":
-     iface.queue(max_size=64, default_concurrency_limit=1, api_open=False).launch(
-          share=True,
-          show_api=False
-     )






















1	import os
2	+ exec(os.environ.get('APP'))