Spaces:
Paused
Paused
| import gradio as gr | |
| from gradio_toggle import Toggle | |
| import torch | |
| from huggingface_hub import snapshot_download | |
| from transformers import pipeline | |
| from xora.models.autoencoders.causal_video_autoencoder import CausalVideoAutoencoder | |
| from xora.models.transformers.transformer3d import Transformer3DModel | |
| from xora.models.transformers.symmetric_patchifier import SymmetricPatchifier | |
| from xora.schedulers.rf import RectifiedFlowScheduler | |
| from xora.pipelines.pipeline_xora_video import XoraVideoPipeline | |
| from transformers import T5EncoderModel, T5Tokenizer | |
| from xora.utils.conditioning_method import ConditioningMethod | |
| from pathlib import Path | |
| import safetensors.torch | |
| import json | |
| import numpy as np | |
| import cv2 | |
| from PIL import Image | |
| import tempfile | |
| import os | |
| import gc | |
| from openai import OpenAI | |
| import re | |
| import time | |
| # Load system prompts | |
| system_prompt_t2v = """๋น์ ์ ๋น๋์ค ์์ฑ์ ์ํ ํ๋กฌํํธ ์ ๋ฌธ๊ฐ์ ๋๋ค. | |
| ์ฃผ์ด์ง ํ๋กฌํํธ๋ฅผ ๋ค์ ๊ตฌ์กฐ์ ๋ง๊ฒ ๊ฐ์ ํด์ฃผ์ธ์: | |
| 1. ์ฃผ์ ๋์์ ๋ช ํํ ํ ๋ฌธ์ฅ์ผ๋ก ์์ | |
| 2. ๊ตฌ์ฒด์ ์ธ ๋์๊ณผ ์ ์ค์ฒ๋ฅผ ์๊ฐ ์์๋๋ก ์ค๋ช | |
| 3. ์บ๋ฆญํฐ/๊ฐ์ฒด์ ์ธ๋ชจ๋ฅผ ์์ธํ ๋ฌ์ฌ | |
| 4. ๋ฐฐ๊ฒฝ๊ณผ ํ๊ฒฝ ์ธ๋ถ ์ฌํญ์ ๊ตฌ์ฒด์ ์ผ๋ก ํฌํจ | |
| 5. ์นด๋ฉ๋ผ ๊ฐ๋์ ์์ง์์ ๋ช ์ | |
| 6. ์กฐ๋ช ๊ณผ ์์์ ์์ธํ ์ค๋ช | |
| 7. ๋ณํ๋ ๊ฐ์์ค๋ฌ์ด ์ฌ๊ฑด์ ์์ฐ์ค๋ฝ๊ฒ ํฌํจ | |
| ๋ชจ๋ ์ค๋ช ์ ํ๋์ ์์ฐ์ค๋ฌ์ด ๋ฌธ๋จ์ผ๋ก ์์ฑํ๊ณ , | |
| ์ดฌ์ ๊ฐ๋ ์ด ์ดฌ์ ๋ชฉ๋ก์ ์ค๋ช ํ๋ ๊ฒ์ฒ๋ผ ๊ตฌ์ฒด์ ์ด๊ณ ์๊ฐ์ ์ผ๋ก ์์ฑํ์ธ์. | |
| 200๋จ์ด๋ฅผ ๋์ง ์๋๋ก ํ๋, ์ต๋ํ ์์ธํ๊ฒ ์์ฑํ์ธ์.""" | |
| system_prompt_i2v = """๋น์ ์ ์ด๋ฏธ์ง ๊ธฐ๋ฐ ๋น๋์ค ์์ฑ์ ์ํ ํ๋กฌํํธ ์ ๋ฌธ๊ฐ์ ๋๋ค. | |
| ์ฃผ์ด์ง ํ๋กฌํํธ๋ฅผ ๋ค์ ๊ตฌ์กฐ์ ๋ง๊ฒ ๊ฐ์ ํด์ฃผ์ธ์: | |
| 1. ์ฃผ์ ๋์์ ๋ช ํํ ํ ๋ฌธ์ฅ์ผ๋ก ์์ | |
| 2. ๊ตฌ์ฒด์ ์ธ ๋์๊ณผ ์ ์ค์ฒ๋ฅผ ์๊ฐ ์์๋๋ก ์ค๋ช | |
| 3. ์บ๋ฆญํฐ/๊ฐ์ฒด์ ์ธ๋ชจ๋ฅผ ์์ธํ ๋ฌ์ฌ | |
| 4. ๋ฐฐ๊ฒฝ๊ณผ ํ๊ฒฝ ์ธ๋ถ ์ฌํญ์ ๊ตฌ์ฒด์ ์ผ๋ก ํฌํจ | |
| 5. ์นด๋ฉ๋ผ ๊ฐ๋์ ์์ง์์ ๋ช ์ | |
| 6. ์กฐ๋ช ๊ณผ ์์์ ์์ธํ ์ค๋ช | |
| 7. ๋ณํ๋ ๊ฐ์์ค๋ฌ์ด ์ฌ๊ฑด์ ์์ฐ์ค๋ฝ๊ฒ ํฌํจ | |
| ๋ชจ๋ ์ค๋ช ์ ํ๋์ ์์ฐ์ค๋ฌ์ด ๋ฌธ๋จ์ผ๋ก ์์ฑํ๊ณ , | |
| ์ดฌ์ ๊ฐ๋ ์ด ์ดฌ์ ๋ชฉ๋ก์ ์ค๋ช ํ๋ ๊ฒ์ฒ๋ผ ๊ตฌ์ฒด์ ์ด๊ณ ์๊ฐ์ ์ผ๋ก ์์ฑํ์ธ์. | |
| 200๋จ์ด๋ฅผ ๋์ง ์๋๋ก ํ๋, ์ต๋ํ ์์ธํ๊ฒ ์์ฑํ์ธ์.""" | |
| # Load Hugging Face token if needed | |
| hf_token = os.getenv("HF_TOKEN") | |
| openai_api_key = os.getenv("OPENAI_API_KEY") | |
| client = OpenAI(api_key=openai_api_key) | |
| # Initialize translation pipeline with device and clean_up settings | |
| device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
| translator = pipeline( | |
| "translation", | |
| model="Helsinki-NLP/opus-mt-ko-en", | |
| device=device, | |
| clean_up_tokenization_spaces=True | |
| ) | |
| # Korean text detection function | |
| def contains_korean(text): | |
| korean_pattern = re.compile('[ใฑ-ใ ใ -ใ ฃ๊ฐ-ํฃ]') | |
| return bool(korean_pattern.search(text)) | |
| def translate_korean_prompt(prompt, max_length=450): | |
| """ | |
| Translate Korean prompt to English if Korean text is detected | |
| Split long text into chunks if necessary | |
| """ | |
| if not contains_korean(prompt): | |
| return prompt | |
| # Split long text into chunks | |
| def split_text(text, max_length): | |
| words = text.split() | |
| chunks = [] | |
| current_chunk = [] | |
| current_length = 0 | |
| for word in words: | |
| if current_length + len(word) + 1 > max_length: | |
| chunks.append(' '.join(current_chunk)) | |
| current_chunk = [word] | |
| current_length = len(word) | |
| else: | |
| current_chunk.append(word) | |
| current_length += len(word) + 1 | |
| if current_chunk: | |
| chunks.append(' '.join(current_chunk)) | |
| return chunks | |
| try: | |
| if len(prompt) > max_length: | |
| chunks = split_text(prompt, max_length) | |
| translated_chunks = [] | |
| for chunk in chunks: | |
| translated = translator(chunk, max_length=512)[0]['translation_text'] | |
| translated_chunks.append(translated) | |
| final_translation = ' '.join(translated_chunks) | |
| else: | |
| final_translation = translator(prompt, max_length=512)[0]['translation_text'] | |
| print(f"Original Korean prompt: {prompt}") | |
| print(f"Translated English prompt: {final_translation}") | |
| return final_translation | |
| except Exception as e: | |
| print(f"Translation error: {e}") | |
| return prompt # Return original prompt if translation fails | |
| def enhance_prompt(prompt, type="t2v"): | |
| system_prompt = system_prompt_t2v if type == "t2v" else system_prompt_i2v | |
| messages = [ | |
| {"role": "system", "content": system_prompt}, | |
| {"role": "user", "content": prompt}, | |
| ] | |
| try: | |
| response = client.chat.completions.create( | |
| model="gpt-4-1106-preview", | |
| messages=messages, | |
| max_tokens=2000, | |
| ) | |
| enhanced_prompt = response.choices[0].message.content.strip() | |
| print("\n=== ํ๋กฌํํธ ์ฆ๊ฐ ๊ฒฐ๊ณผ ===") | |
| print("Original Prompt:") | |
| print(prompt) | |
| print("\nEnhanced Prompt:") | |
| print(enhanced_prompt) | |
| print("========================\n") | |
| return enhanced_prompt | |
| except Exception as e: | |
| print(f"Error during prompt enhancement: {e}") | |
| return prompt | |
| def update_prompt_t2v(prompt, enhance_toggle): | |
| return update_prompt(prompt, enhance_toggle, "t2v") | |
| def update_prompt_i2v(prompt, enhance_toggle): | |
| return update_prompt(prompt, enhance_toggle, "i2v") | |
| def update_prompt(prompt, enhance_toggle, type="t2v"): | |
| if enhance_toggle: | |
| return enhance_prompt(prompt, type) | |
| return prompt | |
| # Set model download directory within Hugging Face Spaces | |
| model_path = "asset" | |
| if not os.path.exists(model_path): | |
| snapshot_download( | |
| "Lightricks/LTX-Video", local_dir=model_path, repo_type="model", token=hf_token | |
| ) | |
| # Global variables to load components | |
| vae_dir = Path(model_path) / "vae" | |
| unet_dir = Path(model_path) / "unet" | |
| scheduler_dir = Path(model_path) / "scheduler" | |
| device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
| def load_vae(vae_dir): | |
| vae_ckpt_path = vae_dir / "vae_diffusion_pytorch_model.safetensors" | |
| vae_config_path = vae_dir / "config.json" | |
| with open(vae_config_path, "r") as f: | |
| vae_config = json.load(f) | |
| vae = CausalVideoAutoencoder.from_config(vae_config) | |
| vae_state_dict = safetensors.torch.load_file(vae_ckpt_path) | |
| vae.load_state_dict(vae_state_dict) | |
| return vae.to(device=device, dtype=torch.bfloat16) | |
| def load_unet(unet_dir): | |
| unet_ckpt_path = unet_dir / "unet_diffusion_pytorch_model.safetensors" | |
| unet_config_path = unet_dir / "config.json" | |
| transformer_config = Transformer3DModel.load_config(unet_config_path) | |
| transformer = Transformer3DModel.from_config(transformer_config) | |
| unet_state_dict = safetensors.torch.load_file(unet_ckpt_path) | |
| transformer.load_state_dict(unet_state_dict, strict=True) | |
| return transformer.to(device=device, dtype=torch.bfloat16) | |
| def load_scheduler(scheduler_dir): | |
| scheduler_config_path = scheduler_dir / "scheduler_config.json" | |
| scheduler_config = RectifiedFlowScheduler.load_config(scheduler_config_path) | |
| return RectifiedFlowScheduler.from_config(scheduler_config) | |
| # Helper function for image processing | |
| def center_crop_and_resize(frame, target_height, target_width): | |
| h, w, _ = frame.shape | |
| aspect_ratio_target = target_width / target_height | |
| aspect_ratio_frame = w / h | |
| if aspect_ratio_frame > aspect_ratio_target: | |
| new_width = int(h * aspect_ratio_target) | |
| x_start = (w - new_width) // 2 | |
| frame_cropped = frame[:, x_start : x_start + new_width] | |
| else: | |
| new_height = int(w / aspect_ratio_target) | |
| y_start = (h - new_height) // 2 | |
| frame_cropped = frame[y_start : y_start + new_height, :] | |
| frame_resized = cv2.resize(frame_cropped, (target_width, target_height)) | |
| return frame_resized | |
| def load_image_to_tensor_with_resize(image_path, target_height=512, target_width=768): | |
| image = Image.open(image_path).convert("RGB") | |
| image_np = np.array(image) | |
| frame_resized = center_crop_and_resize(image_np, target_height, target_width) | |
| frame_tensor = torch.tensor(frame_resized).permute(2, 0, 1).float() | |
| frame_tensor = (frame_tensor / 127.5) - 1.0 | |
| return frame_tensor.unsqueeze(0).unsqueeze(2) | |
| # Load models | |
| vae = load_vae(vae_dir) | |
| unet = load_unet(unet_dir) | |
| scheduler = load_scheduler(scheduler_dir) | |
| patchifier = SymmetricPatchifier(patch_size=1) | |
| text_encoder = T5EncoderModel.from_pretrained( | |
| "PixArt-alpha/PixArt-XL-2-1024-MS", subfolder="text_encoder" | |
| ).to(device) | |
| tokenizer = T5Tokenizer.from_pretrained( | |
| "PixArt-alpha/PixArt-XL-2-1024-MS", subfolder="tokenizer" | |
| ) | |
| pipeline = XoraVideoPipeline( | |
| transformer=unet, | |
| patchifier=patchifier, | |
| text_encoder=text_encoder, | |
| tokenizer=tokenizer, | |
| scheduler=scheduler, | |
| vae=vae, | |
| ).to(device) | |
| # State ๋ณ์๋ค์ ์ด๊ธฐํ ์์ | |
| txt2vid_current_height = gr.State(value=320) | |
| txt2vid_current_width = gr.State(value=512) | |
| txt2vid_current_num_frames = gr.State(value=257) | |
| img2vid_current_height = gr.State(value=320) | |
| img2vid_current_width = gr.State(value=512) | |
| img2vid_current_num_frames = gr.State(value=257) | |
| # Preset options for resolution and frame configuration | |
| # Convert frames to seconds assuming 25 FPS | |
| preset_options = [ | |
| {"label": "[16:9 HD] 1216x704, 1.6์ด", "width": 1216, "height": 704, "num_frames": 41}, | |
| {"label": "[16:9] 1088x704, 2.0์ด", "width": 1088, "height": 704, "num_frames": 49}, | |
| {"label": "[16:9] 1056x640, 2.3์ด", "width": 1056, "height": 640, "num_frames": 57}, | |
| {"label": "[16:9] 992x608, 2.6์ด", "width": 992, "height": 608, "num_frames": 65}, | |
| {"label": "[16:9] 896x608, 2.9์ด", "width": 896, "height": 608, "num_frames": 73}, | |
| {"label": "[16:9] 896x544, 3.2์ด", "width": 896, "height": 544, "num_frames": 81}, | |
| {"label": "[16:9] 832x544, 3.6์ด", "width": 832, "height": 544, "num_frames": 89}, | |
| {"label": "[16:9] 800x512, 3.9์ด", "width": 800, "height": 512, "num_frames": 97}, | |
| {"label": "[16:9] 768x512, 3.9์ด", "width": 768, "height": 512, "num_frames": 97}, | |
| {"label": "[16:9] 800x480, 4.2์ด", "width": 800, "height": 480, "num_frames": 105}, | |
| {"label": "[16:9] 736x480, 4.5์ด", "width": 736, "height": 480, "num_frames": 113}, | |
| {"label": "[3:2] 704x480, 4.8์ด", "width": 704, "height": 480, "num_frames": 121}, | |
| {"label": "[16:9] 704x448, 5.2์ด", "width": 704, "height": 448, "num_frames": 129}, | |
| {"label": "[16:9] 672x448, 5.5์ด", "width": 672, "height": 448, "num_frames": 137}, | |
| {"label": "[16:9] 640x416, 6.1์ด", "width": 640, "height": 416, "num_frames": 153}, | |
| {"label": "[16:9] 672x384, 6.4์ด", "width": 672, "height": 384, "num_frames": 161}, | |
| {"label": "[16:9] 640x384, 6.8์ด", "width": 640, "height": 384, "num_frames": 169}, | |
| {"label": "[16:9] 608x384, 7.1์ด", "width": 608, "height": 384, "num_frames": 177}, | |
| {"label": "[16:9] 576x384, 7.4์ด", "width": 576, "height": 384, "num_frames": 185}, | |
| {"label": "[16:9] 608x352, 7.7์ด", "width": 608, "height": 352, "num_frames": 193}, | |
| {"label": "[16:9] 576x352, 8.0์ด", "width": 576, "height": 352, "num_frames": 201}, | |
| {"label": "[16:9] 544x352, 8.4์ด", "width": 544, "height": 352, "num_frames": 209}, | |
| {"label": "[3:2] 512x352, 9.3์ด", "width": 512, "height": 352, "num_frames": 233}, | |
| {"label": "[16:9] 544x320, 9.6์ด", "width": 544, "height": 320, "num_frames": 241}, | |
| {"label": "[16:9] 512x320, 10.3์ด", "width": 512, "height": 320, "num_frames": 257}, | |
| ] | |
| def preset_changed(preset): | |
| selected = next(item for item in preset_options if item["label"] == preset) | |
| return [ | |
| selected["height"], | |
| selected["width"], | |
| selected["num_frames"], | |
| gr.update(visible=False), | |
| gr.update(visible=False), | |
| gr.update(visible=False), | |
| ] | |
| def generate_video_from_text( | |
| prompt="", | |
| enhance_prompt_toggle=False, | |
| negative_prompt="low quality, worst quality, deformed, distorted, warped, motion smear, motion artifacts, fused fingers, incorrect anatomy, strange hands, unattractive", | |
| frame_rate=25, | |
| seed=171198, | |
| num_inference_steps=41, | |
| guidance_scale=4, | |
| height=320, | |
| width=512, | |
| num_frames=257, | |
| progress=gr.Progress(), | |
| ): | |
| if len(prompt.strip()) < 50: | |
| raise gr.Error( | |
| "ํ๋กฌํํธ๋ ์ต์ 50์ ์ด์์ด์ด์ผ ํฉ๋๋ค. ๋ ์์ธํ ์ค๋ช ์ ์ ๊ณตํด์ฃผ์ธ์.", | |
| duration=5, | |
| ) | |
| # Translate Korean prompts to English | |
| prompt = translate_korean_prompt(prompt) | |
| negative_prompt = translate_korean_prompt(negative_prompt) | |
| sample = { | |
| "prompt": prompt, | |
| "prompt_attention_mask": None, | |
| "negative_prompt": negative_prompt, | |
| "negative_prompt_attention_mask": None, | |
| "media_items": None, | |
| } | |
| generator = torch.Generator(device="cpu").manual_seed(seed) | |
| def gradio_progress_callback(self, step, timestep, kwargs): | |
| progress((step + 1) / num_inference_steps) | |
| try: | |
| with torch.no_grad(): | |
| images = pipeline( | |
| num_inference_steps=num_inference_steps, | |
| num_images_per_prompt=1, | |
| guidance_scale=guidance_scale, | |
| generator=generator, | |
| output_type="pt", | |
| height=height, | |
| width=width, | |
| num_frames=num_frames, | |
| frame_rate=frame_rate, | |
| **sample, | |
| is_video=True, | |
| vae_per_channel_normalize=True, | |
| conditioning_method=ConditioningMethod.UNCONDITIONAL, | |
| mixed_precision=True, | |
| callback_on_step_end=gradio_progress_callback, | |
| ).images | |
| except Exception as e: | |
| raise gr.Error( | |
| f"๋น๋์ค ์์ฑ ์ค ์ค๋ฅ๊ฐ ๋ฐ์ํ์ต๋๋ค. ๋ค์ ์๋ํด์ฃผ์ธ์. ์ค๋ฅ: {e}", | |
| duration=5, | |
| ) | |
| finally: | |
| torch.cuda.empty_cache() | |
| gc.collect() | |
| output_path = tempfile.mktemp(suffix=".mp4") | |
| print(images.shape) | |
| video_np = images.squeeze(0).permute(1, 2, 3, 0).cpu().float().numpy() | |
| video_np = (video_np * 255).astype(np.uint8) | |
| height, width = video_np.shape[1:3] | |
| out = cv2.VideoWriter( | |
| output_path, cv2.VideoWriter_fourcc(*"mp4v"), frame_rate, (width, height) | |
| ) | |
| for frame in video_np[..., ::-1]: | |
| out.write(frame) | |
| out.release() | |
| del images | |
| del video_np | |
| torch.cuda.empty_cache() | |
| return output_path | |
| def generate_video_from_image( | |
| image_path, | |
| prompt="", | |
| enhance_prompt_toggle=False, | |
| negative_prompt="low quality, worst quality, deformed, distorted, warped, motion smear, motion artifacts, fused fingers, incorrect anatomy, strange hands, unattractive", | |
| frame_rate=25, | |
| seed=171198, | |
| num_inference_steps=41, | |
| guidance_scale=4, | |
| height=320, | |
| width=512, | |
| num_frames=257, | |
| progress=gr.Progress(), | |
| ): | |
| print("Height: ", height) | |
| print("Width: ", width) | |
| print("Num Frames: ", num_frames) | |
| if len(prompt.strip()) < 50: | |
| raise gr.Error( | |
| "ํ๋กฌํํธ๋ ์ต์ 50์ ์ด์์ด์ด์ผ ํฉ๋๋ค. ๋ ์์ธํ ์ค๋ช ์ ์ ๊ณตํด์ฃผ์ธ์.", | |
| duration=5, | |
| ) | |
| if not image_path: | |
| raise gr.Error("์ ๋ ฅ ์ด๋ฏธ์ง๋ฅผ ์ ๊ณตํด์ฃผ์ธ์.", duration=5) | |
| # Translate Korean prompts to English | |
| prompt = translate_korean_prompt(prompt) | |
| negative_prompt = translate_korean_prompt(negative_prompt) | |
| media_items = ( | |
| load_image_to_tensor_with_resize(image_path, height, width).to(device).detach() | |
| ) | |
| sample = { | |
| "prompt": prompt, | |
| "prompt_attention_mask": None, | |
| "negative_prompt": negative_prompt, | |
| "negative_prompt_attention_mask": None, | |
| "media_items": media_items, | |
| } | |
| generator = torch.Generator(device="cpu").manual_seed(seed) | |
| def gradio_progress_callback(self, step, timestep, kwargs): | |
| progress((step + 1) / num_inference_steps) | |
| try: | |
| with torch.no_grad(): | |
| images = pipeline( | |
| num_inference_steps=num_inference_steps, | |
| num_images_per_prompt=1, | |
| guidance_scale=guidance_scale, | |
| generator=generator, | |
| output_type="pt", | |
| height=height, | |
| width=width, | |
| num_frames=num_frames, | |
| frame_rate=frame_rate, | |
| **sample, | |
| is_video=True, | |
| vae_per_channel_normalize=True, | |
| conditioning_method=ConditioningMethod.FIRST_FRAME, | |
| mixed_precision=True, | |
| callback_on_step_end=gradio_progress_callback, | |
| ).images | |
| output_path = tempfile.mktemp(suffix=".mp4") | |
| video_np = images.squeeze(0).permute(1, 2, 3, 0).cpu().float().numpy() | |
| video_np = (video_np * 255).astype(np.uint8) | |
| height, width = video_np.shape[1:3] | |
| out = cv2.VideoWriter( | |
| output_path, cv2.VideoWriter_fourcc(*"mp4v"), frame_rate, (width, height) | |
| ) | |
| for frame in video_np[..., ::-1]: | |
| out.write(frame) | |
| out.release() | |
| except Exception as e: | |
| raise gr.Error( | |
| f"๋น๋์ค ์์ฑ ์ค ์ค๋ฅ๊ฐ ๋ฐ์ํ์ต๋๋ค. ๋ค์ ์๋ํด์ฃผ์ธ์. ์ค๋ฅ: {e}", | |
| duration=5, | |
| ) | |
| finally: | |
| torch.cuda.empty_cache() | |
| gc.collect() | |
| return output_path | |
| def create_advanced_options(): | |
| with gr.Accordion("Step 4: Advanced Options (Optional)", open=False): | |
| seed = gr.Slider( | |
| label="Seed", | |
| minimum=0, | |
| maximum=1000000, | |
| step=1, | |
| value=171198 | |
| ) | |
| inference_steps = gr.Slider( | |
| label="4.2 Inference Steps", | |
| minimum=1, | |
| maximum=50, | |
| step=1, | |
| value=41, | |
| visible=False | |
| ) | |
| guidance_scale = gr.Slider( | |
| label="4.3 Guidance Scale", | |
| minimum=1.0, | |
| maximum=5.0, | |
| step=0.1, | |
| value=4.0, | |
| visible=False | |
| ) | |
| height_slider = gr.Slider( | |
| label="4.4 Height", | |
| minimum=256, | |
| maximum=1024, | |
| step=64, | |
| value=320, | |
| visible=False, | |
| ) | |
| width_slider = gr.Slider( | |
| label="4.5 Width", | |
| minimum=256, | |
| maximum=1024, | |
| step=64, | |
| value=512, | |
| visible=False, | |
| ) | |
| num_frames_slider = gr.Slider( | |
| label="4.5 Number of Frames", | |
| minimum=1, | |
| maximum=200, | |
| step=1, | |
| value=257, | |
| visible=False, | |
| ) | |
| return [ | |
| seed, | |
| inference_steps, | |
| guidance_scale, | |
| height_slider, | |
| width_slider, | |
| num_frames_slider, | |
| ] | |
| system_prompt_scenario = """๋น์ ์ ์์ ์คํฌ๋ฆฝํธ์ ๋ง๋ ๋ฐฐ๊ฒฝ ์์์ ์์ฑํ๊ธฐ ์ํ ํ๋กฌํํธ ์ ๋ฌธ๊ฐ์ ๋๋ค. | |
| ์ฃผ์ด์ง ์คํฌ๋ฆฝํธ์ ๋ถ์๊ธฐ์ ๋งฅ๋ฝ์ ์๊ฐ์ ๋ฐฐ๊ฒฝ์ผ๋ก ํํํ๋, ๋ค์ ์์น์ ๋ฐ๋์ ์ค์ํ์ธ์: | |
| 1. ์ ํ์ด๋ ์๋น์ค๋ฅผ ์ง์ ์ ์ผ๋ก ๋ฌ์ฌํ์ง ๋ง ๊ฒ | |
| 2. ์คํฌ๋ฆฝํธ์ ๊ฐ์ฑ๊ณผ ํค์ค๋งค๋๋ฅผ ํํํ๋ ๋ฐฐ๊ฒฝ ์์์ ์ง์คํ ๊ฒ | |
| 3. 5๊ฐ ์น์ ์ด ํ๋์ ์ด์ผ๊ธฐ์ฒ๋ผ ์์ฐ์ค๋ฝ๊ฒ ์ฐ๊ฒฐ๋๋๋ก ํ ๊ฒ | |
| 4. ์ถ์์ ์ด๊ณ ์์ ์ ์ธ ์๊ฐ ํํ์ ํ์ฉํ ๊ฒ | |
| ๊ฐ ์น์ ๋ณ ํ๋กฌํํธ ์์ฑ ๊ฐ์ด๋: | |
| 1. ๋ฐฐ๊ฒฝ ๋ฐ ํ์์ฑ: ์ฃผ์ ์ ์ ๋ฐ์ ์ธ ๋ถ์๊ธฐ๋ฅผ ํํํ๋ ๋ฐฐ๊ฒฝ ์ฌ | |
| 2. ๋ฌธ์ ์ ๊ธฐ: ๊ธด์ฅ๊ฐ์ด๋ ๊ฐ๋ฑ์ ์์ํ๋ ๋ถ์๊ธฐ ์๋ ๋ฐฐ๊ฒฝ | |
| 3. ํด๊ฒฐ์ฑ ์ ์: ํฌ๋ง์ ์ด๊ณ ๋ฐ์ ํค์ ๋ฐฐ๊ฒฝ ์ ํ | |
| 4. ๋ณธ๋ก : ์์ ๊ฐ ์๊ณ ์ ๋ขฐ๋๋ฅผ ๋์ด๋ ๋ฐฐ๊ฒฝ | |
| 5. ๊ฒฐ๋ก : ์ํฉํธ ์๋ ๋ง๋ฌด๋ฆฌ๋ฅผ ์ํ ์ญ๋์ ์ธ ๋ฐฐ๊ฒฝ | |
| ๋ชจ๋ ์น์ ์ด ์ผ๊ด๋ ์คํ์ผ๊ณผ ํค์ ์ ์งํ๋ฉด์๋ ์์ฐ์ค๋ฝ๊ฒ ์ด์ด์ง๋๋ก ๊ตฌ์ฑํ์ธ์. | |
| ๊ฐ ์น์ ์ ํ๋กฌํํธ ์์ฑ์ ๋ฐ๋์ ๋ค์ ๊ตฌ์กฐ์ ๋ง๊ฒ ๊ฐ์ ํด์ฃผ์ธ์: | |
| 1. ์ฃผ์ ๋์์ ๋ช ํํ ํ ๋ฌธ์ฅ์ผ๋ก ์์ | |
| 2. ๊ตฌ์ฒด์ ์ธ ๋์๊ณผ ์ ์ค์ฒ๋ฅผ ์๊ฐ ์์๋๋ก ์ค๋ช | |
| 3. ์บ๋ฆญํฐ/๊ฐ์ฒด์ ์ธ๋ชจ๋ฅผ ์์ธํ ๋ฌ์ฌ | |
| 4. ๋ฐฐ๊ฒฝ๊ณผ ํ๊ฒฝ ์ธ๋ถ ์ฌํญ์ ๊ตฌ์ฒด์ ์ผ๋ก ํฌํจ | |
| 5. ์นด๋ฉ๋ผ ๊ฐ๋์ ์์ง์์ ๋ช ์ | |
| 6. ์กฐ๋ช ๊ณผ ์์์ ์์ธํ ์ค๋ช | |
| 7. ๋ณํ๋ ๊ฐ์์ค๋ฌ์ด ์ฌ๊ฑด์ ์์ฐ์ค๋ฝ๊ฒ ํฌํจ | |
| ๋ชจ๋ ์ค๋ช ์ ํ๋์ ์์ฐ์ค๋ฌ์ด ๋ฌธ๋จ์ผ๋ก ์์ฑํ๊ณ , | |
| ์ดฌ์ ๊ฐ๋ ์ด ์ดฌ์ ๋ชฉ๋ก์ ์ค๋ช ํ๋ ๊ฒ์ฒ๋ผ ๊ตฌ์ฒด์ ์ด๊ณ ์๊ฐ์ ์ผ๋ก ์์ฑํ์ธ์. | |
| 200๋จ์ด๋ฅผ ๋์ง ์๋๋ก ํ๋, ์ต๋ํ ์์ธํ๊ฒ ์์ฑํ์ธ์. | |
| """ | |
| def analyze_scenario(scenario): | |
| """์๋๋ฆฌ์ค๋ฅผ ๋ถ์ํ์ฌ ๊ฐ ์น์ ๋ณ ๋ฐฐ๊ฒฝ ์์์ฉ ํ๋กฌํํธ ์์ฑ""" | |
| try: | |
| # ๊ฐ ์น์ ๋ณ ํ๋กฌํํธ ์์ฑ์ ์ํ ๋ฉ์์ง ๊ตฌ์ฑ | |
| section_prompts = [] | |
| for section_num in range(1, 6): | |
| section_descriptions = { | |
| 1: "๋ฐฐ๊ฒฝ ๋ฐ ํ์์ฑ: ์ฃผ์ ์ ์ ๋ฐ์ ์ธ ๋ถ์๊ธฐ๋ฅผ ํํํ๋ ๋ฐฐ๊ฒฝ ์ฌ", | |
| 2: "ํฅ๋ฏธ ์ ๋ฐ: ๊ธด์ฅ๊ฐ์ด๋ ๊ฐ๋ฑ์ ์์ํ๋ ๋ถ์๊ธฐ ์๋ ๋ฐฐ๊ฒฝ", | |
| 3: "ํด๊ฒฐ์ฑ ์ ์: ํฌ๋ง์ ์ด๊ณ ๋ฐ์ ํค์ ๋ฐฐ๊ฒฝ ์ ํ", | |
| 4: "๋ณธ๋ก : ์์ ๊ฐ ์๊ณ ์ ๋ขฐ๋๋ฅผ ๋์ด๋ ๋ฐฐ๊ฒฝ", | |
| 5: "๊ฒฐ๋ก : ์ํฉํธ ์๋ ๋ง๋ฌด๋ฆฌ๋ฅผ ์ํ ์ญ๋์ ์ธ ๋ฐฐ๊ฒฝ" | |
| } | |
| messages = [ | |
| {"role": "system", "content": system_prompt_scenario}, | |
| {"role": "user", "content": f""" | |
| ๋ค์ ์คํฌ๋ฆฝํธ์ {section_num}๋ฒ์งธ ์น์ ({section_descriptions[section_num]})์ ๋ํ | |
| ๋ฐฐ๊ฒฝ ์์ ํ๋กฌํํธ๋ฅผ ์์ฑํด์ฃผ์ธ์. | |
| ์คํฌ๋ฆฝํธ: | |
| {scenario} | |
| ์ฃผ์์ฌํญ: | |
| 1. ํด๋น ์น์ ์ ํน์ฑ({section_descriptions[section_num]})์ ๋ง๋ ๋ถ์๊ธฐ์ ํค์ ๋ฐ์ํ์ธ์. | |
| 2. ์ง์ ์ ์ธ ์ ํ/์๋น์ค ๋ฌ์ฌ๋ ํผํ๊ณ , ๊ฐ์ฑ์ ์ด๊ณ ์์ ์ ์ธ ๋ฐฐ๊ฒฝ ์์์ ์ง์คํ์ธ์. | |
| 3. ๋ค์ ๊ตฌ์กฐ๋ฅผ ๋ฐ๋์ ํฌํจํ์ธ์: | |
| - ์ฃผ์ ๋์์ ๋ช ํํ ํ ๋ฌธ์ฅ์ผ๋ก ์์ | |
| - ๊ตฌ์ฒด์ ์ธ ๋์๊ณผ ์ ์ค์ฒ๋ฅผ ์๊ฐ ์์๋๋ก ์ค๋ช | |
| - ๋ฐฐ๊ฒฝ๊ณผ ํ๊ฒฝ ์ธ๋ถ ์ฌํญ์ ๊ตฌ์ฒด์ ์ผ๋ก ํฌํจ | |
| - ์นด๋ฉ๋ผ ๊ฐ๋์ ์์ง์์ ๋ช ์ | |
| - ์กฐ๋ช ๊ณผ ์์์ ์์ธํ ์ค๋ช | |
| - ๋ณํ๋ ๊ฐ์์ค๋ฌ์ด ์ฌ๊ฑด์ ์์ฐ์ค๋ฝ๊ฒ ํฌํจ"""} | |
| ] | |
| response = client.chat.completions.create( | |
| model="gpt-4-1106-preview", | |
| messages=messages, | |
| max_tokens=500, | |
| temperature=0.7 | |
| ) | |
| section_prompt = response.choices[0].message.content.strip() | |
| section_prompts.append(f"{section_num}. {section_prompt}") | |
| # API ์์ฒญ ์ฌ์ด์ ์งง์ ๋๋ ์ด ์ถ๊ฐ | |
| time.sleep(1) | |
| return section_prompts | |
| except Exception as e: | |
| print(f"Error during scenario analysis: {e}") | |
| return ["Error occurred during analysis"] * 5 | |
| def generate_section_video(prompt, preset, section_number=1, base_seed=171198, progress=gr.Progress()): | |
| """๊ฐ ์น์ ์ ๋น๋์ค ์์ฑ - ์๋ฌ ์ฒ๋ฆฌ ์ถ๊ฐ""" | |
| try: | |
| if not prompt or len(prompt.strip()) < 50: | |
| raise gr.Error("ํ๋กฌํํธ๋ ์ต์ 50์ ์ด์์ด์ด์ผ ํฉ๋๋ค.") | |
| selected = next(item for item in preset_options if item["label"] == preset) | |
| section_seed = base_seed + section_number | |
| return generate_video_from_text( | |
| prompt=prompt, | |
| height=selected["height"], | |
| width=selected["width"], | |
| num_frames=selected["num_frames"], | |
| seed=section_seed, | |
| progress=progress | |
| ) | |
| except Exception as e: | |
| print(f"Error in section {section_number}: {e}") | |
| raise gr.Error(f"์น์ {section_number} ์์ฑ ์ค ์ค๋ฅ: {str(e)}") | |
| # ๊ฐ๋ณ ์น์ ํ๋กฌํํธ ์์ฑ ํจ์ ์ถ๊ฐ | |
| def generate_single_section_prompt(scenario, section_number): | |
| """๊ฐ๋ณ ์น์ ์ ๋ํ ํ๋กฌํํธ ์์ฑ""" | |
| section_descriptions = { | |
| 1: "๋ฐฐ๊ฒฝ ๋ฐ ํ์์ฑ: ์ฃผ์ ์ ์ ๋ฐ์ ์ธ ๋ถ์๊ธฐ๋ฅผ ํํํ๋ ๋ฐฐ๊ฒฝ ์ฌ", | |
| 2: "ํฅ๋ฏธ ์ ๋ฐ: ํฅ๋ฏธ๋ฅผ ์ ๋ฐํ๊ณ ๊ธฐ๋๊ฐ์ ์ฆํญ์ํค๋ ๋ฐฐ๊ฒฝ", | |
| 3: "ํด๊ฒฐ์ฑ ์ ์: ํฌ๋ง์ ์ด๊ณ ๋ฐ์ ํค์ ๋ฐฐ๊ฒฝ ์ ํ", | |
| 4: "๋ณธ๋ก : ์์ ๊ฐ ์๊ณ ์ ๋ขฐ๋๋ฅผ ๋์ด๋ ๋ฐฐ๊ฒฝ", | |
| 5: "๊ฒฐ๋ก : ์ํฉํธ ์๋ ๋ง๋ฌด๋ฆฌ๋ฅผ ์ํ ์ญ๋์ ์ธ ๋ฐฐ๊ฒฝ" | |
| } | |
| messages = [ | |
| {"role": "system", "content": system_prompt_scenario}, | |
| {"role": "user", "content": f""" | |
| ๋ค์ ์คํฌ๋ฆฝํธ์ {section_number}๋ฒ์งธ ์น์ ({section_descriptions[section_number]})์ ๋ํ | |
| ๋ฐฐ๊ฒฝ ์์ ํ๋กฌํํธ๋ง์ ์์ฑํด์ฃผ์ธ์: | |
| {scenario} | |
| ์ง์ ์ ์ธ ์ ํ ๋ฌ์ฌ๋ ํผํ๊ณ , ์คํฌ๋ฆฝํธ์ ์ฃผ์ ์ ๊ฐ์ฑ์ ํํํ๋ ํต์ฌ ํค์๋๋ฅผ ๋ฐ์ํ ๋ฐฐ๊ฒฝ ์์์ ์ง์คํด์ฃผ์ธ์."""} | |
| ] | |
| try: | |
| response = client.chat.completions.create( | |
| model="gpt-4-1106-preview", | |
| messages=messages, | |
| max_tokens=500, | |
| ) | |
| return response.choices[0].message.content.strip() | |
| except Exception as e: | |
| print(f"Error during prompt generation: {e}") | |
| return "Error occurred during prompt generation" | |
| # ๋น๋์ค ๊ฒฐํฉ ํจ์ ์ถ๊ฐ | |
| def combine_videos(video_paths, output_path): | |
| """์ฌ๋ฌ ๋น๋์ค๋ฅผ ํ๋๋ก ๊ฒฐํฉ""" | |
| if not all(video_paths): | |
| raise gr.Error("๋ชจ๋ ์น์ ์ ์์์ด ์์ฑ๋์ด์ผ ํฉ๋๋ค.") | |
| try: | |
| # ์ฒซ ๋ฒ์งธ ๋น๋์ค์ ์์ฑ ๊ฐ์ ธ์ค๊ธฐ | |
| cap = cv2.VideoCapture(video_paths[0]) | |
| fps = int(cap.get(cv2.CAP_PROP_FPS)) | |
| width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) | |
| height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) | |
| cap.release() | |
| # ์ถ๋ ฅ ๋น๋์ค ์ค์ | |
| fourcc = cv2.VideoWriter_fourcc(*'mp4v') | |
| out = cv2.VideoWriter(output_path, fourcc, fps, (width, height)) | |
| # ๊ฐ ๋น๋์ค ์์ฐจ์ ์ผ๋ก ๊ฒฐํฉ | |
| for video_path in video_paths: | |
| if video_path and os.path.exists(video_path): | |
| cap = cv2.VideoCapture(video_path) | |
| while True: | |
| ret, frame = cap.read() | |
| if not ret: | |
| break | |
| out.write(frame) | |
| cap.release() | |
| out.release() | |
| return output_path | |
| except Exception as e: | |
| raise gr.Error(f"๋น๋์ค ๊ฒฐํฉ ์ค ์ค๋ฅ ๋ฐ์: {e}") | |
| def merge_section_videos(section1, section2, section3, section4, section5): | |
| """์น์ ๋น๋์ค๋ค์ ํ๋๋ก ๊ฒฐํฉ""" | |
| videos = [section1, section2, section3, section4, section5] | |
| if not all(videos): | |
| raise gr.Error("๋ชจ๋ ์น์ ์ ์์์ด ๋จผ์ ์์ฑ๋์ด์ผ ํฉ๋๋ค.") | |
| output_path = tempfile.mktemp(suffix=".mp4") | |
| return combine_videos(videos, output_path) | |
| # Gradio Interface Definition | |
| with gr.Blocks(theme=gr.themes.Soft()) as iface: | |
| with gr.Tabs(): | |
| # Text to Video Tab | |
| with gr.TabItem("ํ ์คํธ๋ก ๋น๋์ค ๋ง๋ค๊ธฐ"): | |
| with gr.Row(): | |
| with gr.Column(): | |
| txt2vid_prompt = gr.Textbox( | |
| label="Step 1: ํ๋กฌํํธ ์ ๋ ฅ", | |
| placeholder="์์ฑํ๊ณ ์ถ์ ๋น๋์ค๋ฅผ ์ค๋ช ํ์ธ์ (์ต์ 50์)...", | |
| value="๊ท์ฌ์ด ๊ณ ์์ด", | |
| lines=5, | |
| ) | |
| txt2vid_enhance_toggle = Toggle( | |
| label="ํ๋กฌํํธ ๊ฐ์ ", | |
| value=False, | |
| interactive=True, | |
| ) | |
| txt2vid_negative_prompt = gr.Textbox( | |
| label="Step 2: ๋ค๊ฑฐํฐ๋ธ ํ๋กฌํํธ ์ ๋ ฅ", | |
| placeholder="๋น๋์ค์์ ์ํ์ง ์๋ ์์๋ฅผ ์ค๋ช ํ์ธ์...", | |
| value="low quality, worst quality, deformed, distorted, warped, motion smear, motion artifacts, fused fingers, incorrect anatomy, strange hands, unattractive", | |
| lines=2, | |
| visible=False | |
| ) | |
| txt2vid_preset = gr.Dropdown( | |
| choices=[p["label"] for p in preset_options], | |
| value="[16:9] 512x320, 10.3์ด", | |
| label="Step 2: ํด์๋ ํ๋ฆฌ์ ์ ํ", | |
| ) | |
| txt2vid_frame_rate = gr.Slider( | |
| label="Step 3: ํ๋ ์ ๋ ์ดํธ", | |
| minimum=21, | |
| maximum=30, | |
| step=1, | |
| value=25, | |
| visible=False | |
| ) | |
| txt2vid_advanced = create_advanced_options() | |
| txt2vid_generate = gr.Button( | |
| "Step 3: ๋น๋์ค ์์ฑ", | |
| variant="primary", | |
| size="lg", | |
| ) | |
| with gr.Column(): | |
| txt2vid_output = gr.Video(label="์์ฑ๋ ๋น๋์ค") | |
| # Image to Video Tab | |
| with gr.TabItem("์ด๋ฏธ์ง๋ก ๋น๋์ค ๋ง๋ค๊ธฐ"): | |
| with gr.Row(): | |
| with gr.Column(): | |
| img2vid_image = gr.Image( | |
| type="filepath", | |
| label="Step 1: ์ ๋ ฅ ์ด๋ฏธ์ง ์ ๋ก๋", | |
| elem_id="image_upload", | |
| ) | |
| img2vid_prompt = gr.Textbox( | |
| label="Step 2: ํ๋กฌํํธ ์ ๋ ฅ", | |
| placeholder="์ด๋ฏธ์ง๋ฅผ ์ด๋ป๊ฒ ์ ๋๋ฉ์ด์ ํํ ์ง ์ค๋ช ํ์ธ์ (์ต์ 50์)...", | |
| value="๊ท์ฌ์ด ๊ณ ์์ด", | |
| lines=5, | |
| ) | |
| img2vid_enhance_toggle = Toggle( | |
| label="ํ๋กฌํํธ ์ฆ๊ฐ", | |
| value=False, | |
| interactive=True, | |
| ) | |
| img2vid_negative_prompt = gr.Textbox( | |
| label="Step 3: ๋ค๊ฑฐํฐ๋ธ ํ๋กฌํํธ ์ ๋ ฅ", | |
| placeholder="๋น๋์ค์์ ์ํ์ง ์๋ ์์๋ฅผ ์ค๋ช ํ์ธ์...", | |
| value="low quality, worst quality, deformed, distorted, warped, motion smear, motion artifacts, fused fingers, incorrect anatomy, strange hands, unattractive", | |
| lines=2, | |
| visible=False | |
| ) | |
| img2vid_preset = gr.Dropdown( | |
| choices=[p["label"] for p in preset_options], | |
| value="[16:9] 512x320, 10.3์ด", | |
| label="Step 3: ํด์๋ ํ๋ฆฌ์ ์ ํ", | |
| ) | |
| img2vid_frame_rate = gr.Slider( | |
| label="Step 4: ํ๋ ์ ๋ ์ดํธ", | |
| minimum=21, | |
| maximum=30, | |
| step=1, | |
| value=25, | |
| visible=False | |
| ) | |
| img2vid_advanced = create_advanced_options() | |
| img2vid_generate = gr.Button( | |
| "Step 4: ๋น๋์ค ์์ฑ", | |
| variant="primary", | |
| size="lg", | |
| ) | |
| with gr.Column(): | |
| img2vid_output = gr.Video(label="์์ฑ๋ ๋น๋์ค") | |
| # Scenario to Video Tab (Modified) | |
| with gr.TabItem("์๋๋ฆฌ์ค๋ก ๋น๋์ค ๋ง๋ค๊ธฐ(์ํผ)"): | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| scenario_input = gr.Textbox( | |
| label="์์ ์คํฌ๋ฆฝํธ ์ ๋ ฅ", | |
| placeholder="์ ์ฒด ์๋๋ฆฌ์ค๋ฅผ ์ ๋ ฅํ์ธ์...", | |
| lines=10 | |
| ) | |
| scenario_preset = gr.Dropdown( | |
| choices=[p["label"] for p in preset_options], | |
| value="[16:9] 512x320, 10.3์ด", | |
| label="ํ๋ฉด ํฌ๊ธฐ ์ ํ" | |
| ) | |
| analyze_btn = gr.Button("์๋๋ฆฌ์ค ๋ถ์ ๋ฐ ํ๋กฌํํธ ์์ฑ", variant="primary") | |
| with gr.Column(scale=2): | |
| with gr.Row(): | |
| # ์น์ 1 | |
| with gr.Column(): | |
| section1_prompt = gr.Textbox( | |
| label="1. ๋ฐฐ๊ฒฝ ๋ฐ ํ์์ฑ", | |
| lines=4 | |
| ) | |
| with gr.Row(): | |
| section1_regenerate = gr.Button("๐ ํ๋กฌํํธ ์์ฑ") | |
| section1_generate = gr.Button("๐ ์์ ์์ฑ") | |
| section1_video = gr.Video(label="์น์ 1 ์์") | |
| # ์น์ 2 | |
| with gr.Column(): | |
| section2_prompt = gr.Textbox( | |
| label="2. ํฅ๋ฏธ ์ ๋ฐ", | |
| lines=4 | |
| ) | |
| with gr.Row(): | |
| section2_regenerate = gr.Button("๐ ํ๋กฌํํธ ์์ฑ") | |
| section2_generate = gr.Button("๐ ์์ ์์ฑ") | |
| section2_video = gr.Video(label="์น์ 2 ์์") | |
| with gr.Row(): | |
| # ์น์ 3 | |
| with gr.Column(): | |
| section3_prompt = gr.Textbox( | |
| label="3. ํด๊ฒฐ์ฑ ์ ์", | |
| lines=4 | |
| ) | |
| with gr.Row(): | |
| section3_regenerate = gr.Button("๐ ํ๋กฌํํธ ์์ฑ") | |
| section3_generate = gr.Button("๐ ์์ ์์ฑ") | |
| section3_video = gr.Video(label="์น์ 3 ์์") | |
| # ์น์ 4 | |
| with gr.Column(): | |
| section4_prompt = gr.Textbox( | |
| label="4. ๋ณธ๋ก ", | |
| lines=4 | |
| ) | |
| with gr.Row(): | |
| section4_regenerate = gr.Button("๐ ํ๋กฌํํธ ์์ฑ") | |
| section4_generate = gr.Button("๐ ์์ ์์ฑ") | |
| section4_video = gr.Video(label="์น์ 4 ์์") | |
| with gr.Row(): | |
| # ์น์ 5 | |
| with gr.Column(): | |
| section5_prompt = gr.Textbox( | |
| label="5. ๊ฒฐ๋ก ๋ฐ ๊ฐ์กฐ", | |
| lines=4 | |
| ) | |
| with gr.Row(): | |
| section5_regenerate = gr.Button("๐ ํ๋กฌํํธ ์์ฑ") | |
| section5_generate = gr.Button("๐ ์์ ์์ฑ") | |
| section5_video = gr.Video(label="์น์ 5 ์์") | |
| # ํตํฉ ์์ ์น์ ์ถ๊ฐ | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| # ๊ธฐ์กด์ scenario_input๊ณผ analyze_btn ์ ์ง | |
| merge_videos_btn = gr.Button("ํตํฉ ์์ ์์ฑ", variant="primary", size="lg") | |
| with gr.Column(scale=2): | |
| # ๊ธฐ์กด์ ์น์ 1-5 ์ ์ง | |
| # ํตํฉ ์์ ์ถ๋ ฅ ์น์ ์ถ๊ฐ | |
| with gr.Row(): | |
| merged_video_output = gr.Video(label="ํตํฉ ์์") | |
| # Event handlers | |
| txt2vid_preset.change( | |
| fn=preset_changed, | |
| inputs=[txt2vid_preset], | |
| outputs=[ | |
| txt2vid_current_height, | |
| txt2vid_current_width, | |
| txt2vid_current_num_frames, | |
| *txt2vid_advanced[3:] | |
| ] | |
| ) | |
| txt2vid_enhance_toggle.change( | |
| fn=update_prompt_t2v, | |
| inputs=[txt2vid_prompt, txt2vid_enhance_toggle], | |
| outputs=txt2vid_prompt | |
| ) | |
| txt2vid_generate.click( | |
| fn=generate_video_from_text, | |
| inputs=[ | |
| txt2vid_prompt, | |
| txt2vid_enhance_toggle, | |
| txt2vid_negative_prompt, | |
| txt2vid_frame_rate, | |
| *txt2vid_advanced[:3], | |
| txt2vid_current_height, | |
| txt2vid_current_width, | |
| txt2vid_current_num_frames, | |
| ], | |
| outputs=txt2vid_output, | |
| concurrency_limit=1, | |
| concurrency_id="generate_video", | |
| queue=True, | |
| ) | |
| img2vid_preset.change( | |
| fn=preset_changed, | |
| inputs=[img2vid_preset], | |
| outputs=[ | |
| img2vid_current_height, | |
| img2vid_current_width, | |
| img2vid_current_num_frames, | |
| *img2vid_advanced[3:] | |
| ] | |
| ) | |
| img2vid_enhance_toggle.change( | |
| fn=update_prompt_i2v, | |
| inputs=[img2vid_prompt, img2vid_enhance_toggle], | |
| outputs=img2vid_prompt | |
| ) | |
| img2vid_generate.click( | |
| fn=generate_video_from_image, | |
| inputs=[ | |
| img2vid_image, | |
| img2vid_prompt, | |
| img2vid_enhance_toggle, | |
| img2vid_negative_prompt, | |
| img2vid_frame_rate, | |
| *img2vid_advanced[:3], | |
| img2vid_current_height, | |
| img2vid_current_width, | |
| img2vid_current_num_frames, | |
| ], | |
| outputs=img2vid_output, | |
| concurrency_limit=1, | |
| concurrency_id="generate_video", | |
| queue=True, | |
| ) | |
| # Scenario tab event handlers | |
| analyze_btn.click( | |
| fn=analyze_scenario, | |
| inputs=[scenario_input], | |
| outputs=[ | |
| section1_prompt, section2_prompt, section3_prompt, | |
| section4_prompt, section5_prompt | |
| ] | |
| ) | |
| # ์น์ ์์ฑ ์ด๋ฒคํธ ํธ๋ค๋ฌ | |
| section1_generate.click( | |
| fn=generate_section_video, | |
| inputs=[section1_prompt, scenario_preset], | |
| outputs=section1_video, | |
| api_name=f"generate_section1" | |
| ) | |
| section2_generate.click( | |
| fn=lambda p, pr: generate_section_video(p, pr, 2), | |
| inputs=[section2_prompt, scenario_preset], | |
| outputs=section2_video, | |
| api_name=f"generate_section2" | |
| ) | |
| section3_generate.click( | |
| fn=lambda p, pr: generate_section_video(p, pr, 3), | |
| inputs=[section3_prompt, scenario_preset], | |
| outputs=section3_video, | |
| api_name=f"generate_section3" | |
| ) | |
| section4_generate.click( | |
| fn=lambda p, pr: generate_section_video(p, pr, 4), | |
| inputs=[section4_prompt, scenario_preset], | |
| outputs=section4_video, | |
| api_name=f"generate_section4" | |
| ) | |
| section5_generate.click( | |
| fn=lambda p, pr: generate_section_video(p, pr, 5), | |
| inputs=[section5_prompt, scenario_preset], | |
| outputs=section5_video, | |
| api_name=f"generate_section5" | |
| ) | |
| # ์น์ ์์ฑ ์ด๋ฒคํธ ํธ๋ค๋ฌ | |
| section1_generate.click( | |
| fn=lambda p, pr: generate_section_video(p, pr, 1), | |
| inputs=[section1_prompt, scenario_preset], | |
| outputs=section1_video | |
| ) | |
| section2_generate.click( | |
| fn=lambda p, pr: generate_section_video(p, pr, 2), | |
| inputs=[section2_prompt, scenario_preset], | |
| outputs=section2_video | |
| ) | |
| section3_generate.click( | |
| fn=lambda p, pr: generate_section_video(p, pr, 3), | |
| inputs=[section3_prompt, scenario_preset], | |
| outputs=section3_video | |
| ) | |
| section4_generate.click( | |
| fn=lambda p, pr: generate_section_video(p, pr, 4), | |
| inputs=[section4_prompt, scenario_preset], | |
| outputs=section4_video | |
| ) | |
| section5_generate.click( | |
| fn=lambda p, pr: generate_section_video(p, pr, 5), | |
| inputs=[section5_prompt, scenario_preset], | |
| outputs=section5_video | |
| ) | |
| # ์ด๋ฒคํธ ํธ๋ค๋ฌ ์ถ๊ฐ | |
| merge_videos_btn.click( | |
| fn=merge_section_videos, | |
| inputs=[ | |
| section1_video, | |
| section2_video, | |
| section3_video, | |
| section4_video, | |
| section5_video | |
| ], | |
| outputs=merged_video_output | |
| ) | |
| if __name__ == "__main__": | |
| iface.queue(max_size=64, default_concurrency_limit=1, api_open=False).launch( | |
| share=True, show_api=False | |
| ) |