openfree commited on
Commit
44f4dde
ยท
verified ยท
1 Parent(s): d256587

Create app-backup1.py

Browse files
Files changed (1) hide show
  1. app-backup1.py +679 -0
app-backup1.py ADDED
@@ -0,0 +1,679 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from gradio_toggle import Toggle
3
+ import torch
4
+ from huggingface_hub import snapshot_download
5
+ from transformers import pipeline
6
+
7
+ from xora.models.autoencoders.causal_video_autoencoder import CausalVideoAutoencoder
8
+ from xora.models.transformers.transformer3d import Transformer3DModel
9
+ from xora.models.transformers.symmetric_patchifier import SymmetricPatchifier
10
+ from xora.schedulers.rf import RectifiedFlowScheduler
11
+ from xora.pipelines.pipeline_xora_video import XoraVideoPipeline
12
+ from transformers import T5EncoderModel, T5Tokenizer
13
+ from xora.utils.conditioning_method import ConditioningMethod
14
+ from pathlib import Path
15
+ import safetensors.torch
16
+ import json
17
+ import numpy as np
18
+ import cv2
19
+ from PIL import Image
20
+ import tempfile
21
+ import os
22
+ import gc
23
+ from openai import OpenAI
24
+ import re
25
+
26
+ # Load system prompts
27
+ system_prompt_t2v = """๋‹น์‹ ์€ ๋น„๋””์˜ค ์ƒ์„ฑ์„ ์œ„ํ•œ ํ”„๋กฌํ”„ํŠธ ์ „๋ฌธ๊ฐ€์ž…๋‹ˆ๋‹ค.
28
+ ์ฃผ์–ด์ง„ ํ”„๋กฌํ”„ํŠธ๋ฅผ ๋‹ค์Œ ๊ตฌ์กฐ์— ๋งž๊ฒŒ ๊ฐœ์„ ํ•ด์ฃผ์„ธ์š”:
29
+ 1. ์ฃผ์š” ๋™์ž‘์„ ๋ช…ํ™•ํ•œ ํ•œ ๋ฌธ์žฅ์œผ๋กœ ์‹œ์ž‘
30
+ 2. ๊ตฌ์ฒด์ ์ธ ๋™์ž‘๊ณผ ์ œ์Šค์ฒ˜๋ฅผ ์‹œ๊ฐ„ ์ˆœ์„œ๋Œ€๋กœ ์„ค๋ช…
31
+ 3. ์บ๋ฆญํ„ฐ/๊ฐ์ฒด์˜ ์™ธ๋ชจ๋ฅผ ์ƒ์„ธํžˆ ๋ฌ˜์‚ฌ
32
+ 4. ๋ฐฐ๊ฒฝ๊ณผ ํ™˜๊ฒฝ ์„ธ๋ถ€ ์‚ฌํ•ญ์„ ๊ตฌ์ฒด์ ์œผ๋กœ ํฌํ•จ
33
+ 5. ์นด๋ฉ”๋ผ ๊ฐ๋„์™€ ์›€์ง์ž„์„ ๋ช…์‹œ
34
+ 6. ์กฐ๋ช…๊ณผ ์ƒ‰์ƒ์„ ์ž์„ธํžˆ ์„ค๋ช…
35
+ 7. ๋ณ€ํ™”๋‚˜ ๊ฐ‘์ž‘์Šค๋Ÿฌ์šด ์‚ฌ๊ฑด์„ ์ž์—ฐ์Šค๋Ÿฝ๊ฒŒ ํฌํ•จ
36
+ ๋ชจ๋“  ์„ค๋ช…์€ ํ•˜๋‚˜์˜ ์ž์—ฐ์Šค๋Ÿฌ์šด ๋ฌธ๋‹จ์œผ๋กœ ์ž‘์„ฑํ•˜๊ณ ,
37
+ ์ดฌ์˜ ๊ฐ๋…์ด ์ดฌ์˜ ๋ชฉ๋ก์„ ์„ค๋ช…ํ•˜๋Š” ๊ฒƒ์ฒ˜๋Ÿผ ๊ตฌ์ฒด์ ์ด๊ณ  ์‹œ๊ฐ์ ์œผ๋กœ ์ž‘์„ฑํ•˜์„ธ์š”.
38
+ 200๋‹จ์–ด๋ฅผ ๋„˜์ง€ ์•Š๋„๋ก ํ•˜๋˜, ์ตœ๋Œ€ํ•œ ์ƒ์„ธํ•˜๊ฒŒ ์ž‘์„ฑํ•˜์„ธ์š”."""
39
+
40
+ system_prompt_i2v = """๋‹น์‹ ์€ ์ด๋ฏธ์ง€ ๊ธฐ๋ฐ˜ ๋น„๋””์˜ค ์ƒ์„ฑ์„ ์œ„ํ•œ ํ”„๋กฌํ”„ํŠธ ์ „๋ฌธ๊ฐ€์ž…๋‹ˆ๋‹ค.
41
+ ์ฃผ์–ด์ง„ ํ”„๋กฌํ”„ํŠธ๋ฅผ ๋‹ค์Œ ๊ตฌ์กฐ์— ๋งž๊ฒŒ ๊ฐœ์„ ํ•ด์ฃผ์„ธ์š”:
42
+ 1. ์ฃผ์š” ๋™์ž‘์„ ๋ช…ํ™•ํ•œ ํ•œ ๋ฌธ์žฅ์œผ๋กœ ์‹œ์ž‘
43
+ 2. ๊ตฌ์ฒด์ ์ธ ๋™์ž‘๊ณผ ์ œ์Šค์ฒ˜๋ฅผ ์‹œ๊ฐ„ ์ˆœ์„œ๋Œ€๋กœ ์„ค๋ช…
44
+ 3. ์บ๋ฆญํ„ฐ/๊ฐ์ฒด์˜ ์™ธ๋ชจ๋ฅผ ์ƒ์„ธํžˆ ๋ฌ˜์‚ฌ
45
+ 4. ๋ฐฐ๊ฒฝ๊ณผ ํ™˜๊ฒฝ ์„ธ๋ถ€ ์‚ฌํ•ญ์„ ๊ตฌ์ฒด์ ์œผ๋กœ ํฌํ•จ
46
+ 5. ์นด๋ฉ”๋ผ ๊ฐ๋„์™€ ์›€์ง์ž„์„ ๋ช…์‹œ
47
+ 6. ์กฐ๋ช…๊ณผ ์ƒ‰์ƒ์„ ์ž์„ธํžˆ ์„ค๋ช…
48
+ 7. ๋ณ€ํ™”๋‚˜ ๊ฐ‘์ž‘์Šค๋Ÿฌ์šด ์‚ฌ๊ฑด์„ ์ž์—ฐ์Šค๋Ÿฝ๊ฒŒ ํฌํ•จ
49
+ ๋ชจ๋“  ์„ค๋ช…์€ ํ•˜๋‚˜์˜ ์ž์—ฐ์Šค๋Ÿฌ์šด ๋ฌธ๋‹จ์œผ๋กœ ์ž‘์„ฑํ•˜๊ณ ,
50
+ ์ดฌ์˜ ๊ฐ๋…์ด ์ดฌ์˜ ๋ชฉ๋ก์„ ์„ค๋ช…ํ•˜๋Š” ๊ฒƒ์ฒ˜๋Ÿผ ๊ตฌ์ฒด์ ์ด๊ณ  ์‹œ๊ฐ์ ์œผ๋กœ ์ž‘์„ฑํ•˜์„ธ์š”.
51
+ 200๋‹จ์–ด๋ฅผ ๋„˜์ง€ ์•Š๋„๋ก ํ•˜๋˜, ์ตœ๋Œ€ํ•œ ์ƒ์„ธํ•˜๊ฒŒ ์ž‘์„ฑํ•˜์„ธ์š”."""
52
+
53
+ # Load Hugging Face token if needed
54
+ hf_token = os.getenv("HF_TOKEN")
55
+ openai_api_key = os.getenv("OPENAI_API_KEY")
56
+ client = OpenAI(api_key=openai_api_key)
57
+
58
+ # Initialize translation pipeline
59
+ translator = pipeline("translation", model="Helsinki-NLP/opus-mt-ko-en")
60
+
61
+ # Korean text detection function
62
+ def contains_korean(text):
63
+ korean_pattern = re.compile('[ใ„ฑ-ใ…Žใ…-ใ…ฃ๊ฐ€-ํžฃ]')
64
+ return bool(korean_pattern.search(text))
65
+
66
+ def translate_korean_prompt(prompt):
67
+ """
68
+ Translate Korean prompt to English if Korean text is detected
69
+ """
70
+ if contains_korean(prompt):
71
+ translated = translator(prompt)[0]['translation_text']
72
+ print(f"Original Korean prompt: {prompt}")
73
+ print(f"Translated English prompt: {translated}")
74
+ return translated
75
+ return prompt
76
+
77
+ def enhance_prompt(prompt, type="t2v"):
78
+ system_prompt = system_prompt_t2v if type == "t2v" else system_prompt_i2v
79
+ messages = [
80
+ {"role": "system", "content": system_prompt},
81
+ {"role": "user", "content": prompt},
82
+ ]
83
+
84
+ try:
85
+ response = client.chat.completions.create(
86
+ model="gpt-4-1106-preview",
87
+ messages=messages,
88
+ max_tokens=2000,
89
+ )
90
+ enhanced_prompt = response.choices[0].message.content.strip()
91
+
92
+ print("\n=== ํ”„๋กฌํ”„ํŠธ ์ฆ๊ฐ• ๊ฒฐ๊ณผ ===")
93
+ print("Original Prompt:")
94
+ print(prompt)
95
+ print("\nEnhanced Prompt:")
96
+ print(enhanced_prompt)
97
+ print("========================\n")
98
+
99
+ return enhanced_prompt
100
+ except Exception as e:
101
+ print(f"Error during prompt enhancement: {e}")
102
+ return prompt
103
+
104
+ def update_prompt_t2v(prompt, enhance_toggle):
105
+ return update_prompt(prompt, enhance_toggle, "t2v")
106
+
107
+ def update_prompt_i2v(prompt, enhance_toggle):
108
+ return update_prompt(prompt, enhance_toggle, "i2v")
109
+
110
+ def update_prompt(prompt, enhance_toggle, type="t2v"):
111
+ if enhance_toggle:
112
+ return enhance_prompt(prompt, type)
113
+ return prompt
114
+
115
+ # Set model download directory within Hugging Face Spaces
116
+ model_path = "asset"
117
+ if not os.path.exists(model_path):
118
+ snapshot_download(
119
+ "Lightricks/LTX-Video", local_dir=model_path, repo_type="model", token=hf_token
120
+ )
121
+
122
+ # Global variables to load components
123
+ vae_dir = Path(model_path) / "vae"
124
+ unet_dir = Path(model_path) / "unet"
125
+ scheduler_dir = Path(model_path) / "scheduler"
126
+
127
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
128
+
129
+ def load_vae(vae_dir):
130
+ vae_ckpt_path = vae_dir / "vae_diffusion_pytorch_model.safetensors"
131
+ vae_config_path = vae_dir / "config.json"
132
+ with open(vae_config_path, "r") as f:
133
+ vae_config = json.load(f)
134
+ vae = CausalVideoAutoencoder.from_config(vae_config)
135
+ vae_state_dict = safetensors.torch.load_file(vae_ckpt_path)
136
+ vae.load_state_dict(vae_state_dict)
137
+ return vae.to(device=device, dtype=torch.bfloat16)
138
+
139
+ def load_unet(unet_dir):
140
+ unet_ckpt_path = unet_dir / "unet_diffusion_pytorch_model.safetensors"
141
+ unet_config_path = unet_dir / "config.json"
142
+ transformer_config = Transformer3DModel.load_config(unet_config_path)
143
+ transformer = Transformer3DModel.from_config(transformer_config)
144
+ unet_state_dict = safetensors.torch.load_file(unet_ckpt_path)
145
+ transformer.load_state_dict(unet_state_dict, strict=True)
146
+ return transformer.to(device=device, dtype=torch.bfloat16)
147
+
148
+ def load_scheduler(scheduler_dir):
149
+ scheduler_config_path = scheduler_dir / "scheduler_config.json"
150
+ scheduler_config = RectifiedFlowScheduler.load_config(scheduler_config_path)
151
+ return RectifiedFlowScheduler.from_config(scheduler_config)
152
+
153
+ # Helper function for image processing
154
+ def center_crop_and_resize(frame, target_height, target_width):
155
+ h, w, _ = frame.shape
156
+ aspect_ratio_target = target_width / target_height
157
+ aspect_ratio_frame = w / h
158
+ if aspect_ratio_frame > aspect_ratio_target:
159
+ new_width = int(h * aspect_ratio_target)
160
+ x_start = (w - new_width) // 2
161
+ frame_cropped = frame[:, x_start : x_start + new_width]
162
+ else:
163
+ new_height = int(w / aspect_ratio_target)
164
+ y_start = (h - new_height) // 2
165
+ frame_cropped = frame[y_start : y_start + new_height, :]
166
+ frame_resized = cv2.resize(frame_cropped, (target_width, target_height))
167
+ return frame_resized
168
+
169
+ def load_image_to_tensor_with_resize(image_path, target_height=512, target_width=768):
170
+ image = Image.open(image_path).convert("RGB")
171
+ image_np = np.array(image)
172
+ frame_resized = center_crop_and_resize(image_np, target_height, target_width)
173
+ frame_tensor = torch.tensor(frame_resized).permute(2, 0, 1).float()
174
+ frame_tensor = (frame_tensor / 127.5) - 1.0
175
+ return frame_tensor.unsqueeze(0).unsqueeze(2)
176
+
177
+ # Load models
178
+ vae = load_vae(vae_dir)
179
+ unet = load_unet(unet_dir)
180
+ scheduler = load_scheduler(scheduler_dir)
181
+ patchifier = SymmetricPatchifier(patch_size=1)
182
+ text_encoder = T5EncoderModel.from_pretrained(
183
+ "PixArt-alpha/PixArt-XL-2-1024-MS", subfolder="text_encoder"
184
+ ).to(device)
185
+ tokenizer = T5Tokenizer.from_pretrained(
186
+ "PixArt-alpha/PixArt-XL-2-1024-MS", subfolder="tokenizer"
187
+ )
188
+
189
+ pipeline = XoraVideoPipeline(
190
+ transformer=unet,
191
+ patchifier=patchifier,
192
+ text_encoder=text_encoder,
193
+ tokenizer=tokenizer,
194
+ scheduler=scheduler,
195
+ vae=vae,
196
+ ).to(device)
197
+
198
+ # Preset options for resolution and frame configuration
199
+ preset_options = [
200
+ {"label": "1216x704, 41 frames", "width": 1216, "height": 704, "num_frames": 41},
201
+ {"label": "1088x704, 49 frames", "width": 1088, "height": 704, "num_frames": 49},
202
+ {"label": "1056x640, 57 frames", "width": 1056, "height": 640, "num_frames": 57},
203
+ {"label": "992x608, 65 frames", "width": 992, "height": 608, "num_frames": 65},
204
+ {"label": "896x608, 73 frames", "width": 896, "height": 608, "num_frames": 73},
205
+ {"label": "896x544, 81 frames", "width": 896, "height": 544, "num_frames": 81},
206
+ {"label": "832x544, 89 frames", "width": 832, "height": 544, "num_frames": 89},
207
+ {"label": "800x512, 97 frames", "width": 800, "height": 512, "num_frames": 97},
208
+ {"label": "768x512, 97 frames", "width": 768, "height": 512, "num_frames": 97},
209
+ {"label": "800x480, 105 frames", "width": 800, "height": 480, "num_frames": 105},
210
+ {"label": "736x480, 113 frames", "width": 736, "height": 480, "num_frames": 113},
211
+ {"label": "704x480, 121 frames", "width": 704, "height": 480, "num_frames": 121},
212
+ {"label": "704x448, 129 frames", "width": 704, "height": 448, "num_frames": 129},
213
+ {"label": "672x448, 137 frames", "width": 672, "height": 448, "num_frames": 137},
214
+ {"label": "640x416, 153 frames", "width": 640, "height": 416, "num_frames": 153},
215
+ {"label": "672x384, 161 frames", "width": 672, "height": 384, "num_frames": 161},
216
+ {"label": "640x384, 169 frames", "width": 640, "height": 384, "num_frames": 169},
217
+ {"label": "608x384, 177 frames", "width": 608, "height": 384, "num_frames": 177},
218
+ {"label": "576x384, 185 frames", "width": 576, "height": 384, "num_frames": 185},
219
+ {"label": "608x352, 193 frames", "width": 608, "height": 352, "num_frames": 193},
220
+ {"label": "576x352, 201 frames", "width": 576, "height": 352, "num_frames": 201},
221
+ {"label": "544x352, 209 frames", "width": 544, "height": 352, "num_frames": 209},
222
+ {"label": "512x352, 225 frames", "width": 512, "height": 352, "num_frames": 225},
223
+ {"label": "512x352, 233 frames", "width": 512, "height": 352, "num_frames": 233},
224
+ {"label": "544x320, 241 frames", "width": 544, "height": 320, "num_frames": 241},
225
+ {"label": "512x320, 249 frames", "width": 512, "height": 320, "num_frames": 249},
226
+ {"label": "512x320, 257 frames", "width": 512, "height": 320, "num_frames": 257},
227
+ ]
228
+
229
+ def preset_changed(preset):
230
+ if preset != "Custom":
231
+ selected = next(item for item in preset_options if item["label"] == preset)
232
+ return (
233
+ selected["height"],
234
+ selected["width"],
235
+ selected["num_frames"],
236
+ gr.update(visible=False),
237
+ gr.update(visible=False),
238
+ gr.update(visible=False),
239
+ )
240
+ else:
241
+ return (
242
+ None,
243
+ None,
244
+ None,
245
+ gr.update(visible=True),
246
+ gr.update(visible=True),
247
+ gr.update(visible=True),
248
+ )
249
+
250
+ def generate_video_from_text(
251
+ prompt="",
252
+ enhance_prompt_toggle=False,
253
+ negative_prompt="",
254
+ frame_rate=25,
255
+ seed=171198,
256
+ num_inference_steps=30,
257
+ guidance_scale=3,
258
+ height=512,
259
+ width=768,
260
+ num_frames=121,
261
+ progress=gr.Progress(),
262
+ ):
263
+ if len(prompt.strip()) < 50:
264
+ raise gr.Error(
265
+ "ํ”„๋กฌํ”„ํŠธ๋Š” ์ตœ์†Œ 50์ž ์ด์ƒ์ด์–ด์•ผ ํ•ฉ๋‹ˆ๋‹ค. ๋” ์ž์„ธํ•œ ์„ค๋ช…์„ ์ œ๊ณตํ•ด์ฃผ์„ธ์š”.",
266
+ duration=5,
267
+ )
268
+
269
+ # Translate Korean prompts to English
270
+ prompt = translate_korean_prompt(prompt)
271
+ negative_prompt = translate_korean_prompt(negative_prompt)
272
+
273
+ sample = {
274
+ "prompt": prompt,
275
+ "prompt_attention_mask": None,
276
+ "negative_prompt": negative_prompt,
277
+ "negative_prompt_attention_mask": None,
278
+ "media_items": None,
279
+ }
280
+
281
+ generator = torch.Generator(device="cpu").manual_seed(seed)
282
+
283
+ def gradio_progress_callback(self, step, timestep, kwargs):
284
+ progress((step + 1) / num_inference_steps)
285
+
286
+ try:
287
+ with torch.no_grad():
288
+ images = pipeline(
289
+ num_inference_steps=num_inference_steps,
290
+ num_images_per_prompt=1,
291
+ guidance_scale=guidance_scale,
292
+ generator=generator,
293
+ output_type="pt",
294
+ height=height,
295
+ width=width,
296
+ num_frames=num_frames,
297
+ frame_rate=frame_rate,
298
+ **sample,
299
+ is_video=True,
300
+ vae_per_channel_normalize=True,
301
+ conditioning_method=ConditioningMethod.UNCONDITIONAL,
302
+ mixed_precision=True,
303
+ callback_on_step_end=gradio_progress_callback,
304
+ ).images
305
+ except Exception as e:
306
+ raise gr.Error(
307
+ f"๋น„๋””์˜ค ์ƒ์„ฑ ์ค‘ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค. ๋‹ค์‹œ ์‹œ๋„ํ•ด์ฃผ์„ธ์š”. ์˜ค๋ฅ˜: {e}",
308
+ duration=5,
309
+ )
310
+ finally:
311
+ torch.cuda.empty_cache()
312
+ gc.collect()
313
+
314
+ output_path = tempfile.mktemp(suffix=".mp4")
315
+ print(images.shape)
316
+ video_np = images.squeeze(0).permute(1, 2, 3, 0).cpu().float().numpy()
317
+ video_np = (video_np * 255).astype(np.uint8)
318
+ height, width = video_np.shape[1:3]
319
+ out = cv2.VideoWriter(
320
+ output_path, cv2.VideoWriter_fourcc(*"mp4v"), frame_rate, (width, height)
321
+ )
322
+ for frame in video_np[..., ::-1]:
323
+ out.write(frame)
324
+ out.release()
325
+ del images
326
+ del video_np
327
+ torch.cuda.empty_cache()
328
+ return output_path
329
+
330
+ def generate_video_from_image(
331
+ image_path,
332
+ prompt="",
333
+ enhance_prompt_toggle=False,
334
+ negative_prompt="",
335
+ frame_rate=25,
336
+ seed=171198,
337
+ num_inference_steps=30,
338
+ guidance_scale=3,
339
+ height=512,
340
+ width=768,
341
+ num_frames=121,
342
+ progress=gr.Progress(),
343
+ ):
344
+ print("Height: ", height)
345
+ print("Width: ", width)
346
+ print("Num Frames: ", num_frames)
347
+
348
+ if len(prompt.strip()) < 50:
349
+ raise gr.Error(
350
+ "ํ”„๋กฌํ”„ํŠธ๋Š” ์ตœ์†Œ 50์ž ์ด์ƒ์ด์–ด์•ผ ํ•ฉ๋‹ˆ๋‹ค. ๋” ์ž์„ธํ•œ ์„ค๋ช…์„ ์ œ๊ณตํ•ด์ฃผ์„ธ์š”.",
351
+ duration=5,
352
+ )
353
+
354
+ if not image_path:
355
+ raise gr.Error("์ž…๋ ฅ ์ด๋ฏธ์ง€๋ฅผ ์ œ๊ณตํ•ด์ฃผ์„ธ์š”.", duration=5)
356
+
357
+ # Translate Korean prompts to English
358
+ prompt = translate_korean_prompt(prompt)
359
+ negative_prompt = translate_korean_prompt(negative_prompt)
360
+
361
+ media_items = (
362
+ load_image_to_tensor_with_resize(image_path, height, width).to(device).detach()
363
+ )
364
+
365
+ sample = {
366
+ "prompt": prompt,
367
+ "prompt_attention_mask": None,
368
+ "negative_prompt": negative_prompt,
369
+ "negative_prompt_attention_mask": None,
370
+ "media_items": media_items,
371
+ }
372
+
373
+ generator = torch.Generator(device="cpu").manual_seed(seed)
374
+
375
+ def gradio_progress_callback(self, step, timestep, kwargs):
376
+ progress((step + 1) / num_inference_steps)
377
+
378
+ try:
379
+ with torch.no_grad():
380
+ images = pipeline(
381
+ num_inference_steps=num_inference_steps,
382
+ num_images_per_prompt=1,
383
+ guidance_scale=guidance_scale,
384
+ generator=generator,
385
+ output_type="pt",
386
+ height=height,
387
+ width=width,
388
+ num_frames=num_frames,
389
+ frame_rate=frame_rate,
390
+ **sample,
391
+ is_video=True,
392
+ vae_per_channel_normalize=True,
393
+ conditioning_method=ConditioningMethod.FIRST_FRAME,
394
+ mixed_precision=True,
395
+ callback_on_step_end=gradio_progress_callback,
396
+ ).images
397
+
398
+ output_path = tempfile.mktemp(suffix=".mp4")
399
+ video_np = images.squeeze(0).permute(1, 2, 3, 0).cpu().float().numpy()
400
+ video_np = (video_np * 255).astype(np.uint8)
401
+ height, width = video_np.shape[1:3]
402
+ out = cv2.VideoWriter(
403
+ output_path, cv2.VideoWriter_fourcc(*"mp4v"), frame_rate, (width, height)
404
+ )
405
+ for frame in video_np[..., ::-1]:
406
+ out.write(frame)
407
+ out.release()
408
+ except Exception as e:
409
+ raise gr.Error(
410
+ f"๋น„๋””์˜ค ์ƒ์„ฑ ์ค‘ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค. ๋‹ค์‹œ ์‹œ๋„ํ•ด์ฃผ์„ธ์š”. ์˜ค๋ฅ˜: {e}",
411
+ duration=5,
412
+ )
413
+
414
+ finally:
415
+ torch.cuda.empty_cache()
416
+ gc.collect()
417
+
418
+ return output_path
419
+
420
+ def create_advanced_options():
421
+ with gr.Accordion("Step 4: Advanced Options (Optional)", open=False):
422
+ seed = gr.Slider(
423
+ label="4.1 Seed", minimum=0, maximum=1000000, step=1, value=171198
424
+ )
425
+ inference_steps = gr.Slider(
426
+ label="4.2 Inference Steps", minimum=1, maximum=50, step=1, value=30
427
+ )
428
+ guidance_scale = gr.Slider(
429
+ label="4.3 Guidance Scale", minimum=1.0, maximum=5.0, step=0.1, value=3.0
430
+ )
431
+
432
+ height_slider = gr.Slider(
433
+ label="4.4 Height",
434
+ minimum=256,
435
+ maximum=1024,
436
+ step=64,
437
+ value=512,
438
+ visible=False,
439
+ )
440
+ width_slider = gr.Slider(
441
+ label="4.5 Width",
442
+ minimum=256,
443
+ maximum=1024,
444
+ step=64,
445
+ value=768,
446
+ visible=False,
447
+ )
448
+ num_frames_slider = gr.Slider(
449
+ label="4.5 Number of Frames",
450
+ minimum=1,
451
+ maximum=200,
452
+ step=1,
453
+ value=121,
454
+ visible=False,
455
+ )
456
+
457
+ return [
458
+ seed,
459
+ inference_steps,
460
+ guidance_scale,
461
+ height_slider,
462
+ width_slider,
463
+ num_frames_slider,
464
+ ]
465
+
466
+ # Gradio Interface Definition
467
+ with gr.Blocks(theme=gr.themes.Soft()) as iface:
468
+ with gr.Tabs():
469
+ # Text to Video Tab
470
+ with gr.TabItem("ํ…์ŠคํŠธ๋กœ ๋น„๋””์˜ค ๋งŒ๋“ค๊ธฐ"):
471
+ with gr.Row():
472
+ with gr.Column():
473
+ txt2vid_prompt = gr.Textbox(
474
+ label="Step 1: ํ”„๋กฌํ”„ํŠธ ์ž…๋ ฅ",
475
+ placeholder="์ƒ์„ฑํ•˜๊ณ  ์‹ถ์€ ๋น„๋””์˜ค๋ฅผ ์„ค๋ช…ํ•˜์„ธ์š” (์ตœ์†Œ 50์ž)...",
476
+ value="๊ฐˆ์ƒ‰ ๊ธด ๋จธ๋ฆฌ๋ฅผ ๊ฐ€์ง„ ์—ฌ์„ฑ์ด ๊ธˆ๋ฐœ์˜ ๊ธด ๋จธ๋ฆฌ๋ฅผ ๊ฐ€์ง„ ๋‹ค๋ฅธ ์—ฌ์„ฑ์„ ํ–ฅํ•ด ๋ฏธ์†Œ์ง“์Šต๋‹ˆ๋‹ค. ๊ฐˆ์ƒ‰ ๋จธ๋ฆฌ์˜ ์—ฌ์„ฑ์€ ๊ฒ€์€์ƒ‰ ์ž์ผ“์„ ์ž…๊ณ  ์žˆ์œผ๋ฉฐ ์˜ค๋ฅธ์ชฝ ๋บจ์— ์ž‘์€ ์ ์ด ์žˆ์Šต๋‹ˆ๋‹ค. ์นด๋ฉ”๋ผ ๊ฐ๋„๋Š” ๊ฐˆ์ƒ‰ ๋จธ๋ฆฌ ์—ฌ์„ฑ์˜ ์–ผ๊ตด์— ํด๋กœ์ฆˆ์—…๋˜์–ด ์žˆ์Šต๋‹ˆ๋‹ค. ์กฐ๋ช…์€ ์ž์—ฐ์Šค๋Ÿฝ๊ณ  ๋”ฐ๋œปํ•˜๋ฉฐ, ์„์–‘์—์„œ ์˜ค๋Š” ๋“ฏํ•œ ๋ถ€๋“œ๋Ÿฌ์šด ๋น›์ด ์žฅ๋ฉด์„ ๋น„์ถฅ๋‹ˆ๋‹ค. ์žฅ๋ฉด์€ ์‹ค์ œ ์˜์ƒ์ฒ˜๋Ÿผ ๋ณด์ž…๋‹ˆ๋‹ค.",
477
+ lines=5,
478
+ )
479
+ txt2vid_enhance_toggle = Toggle(
480
+ label="ํ”„๋กฌํ”„ํŠธ ๊ฐœ์„ ",
481
+ value=False,
482
+ interactive=True,
483
+ )
484
+
485
+ txt2vid_negative_prompt = gr.Textbox(
486
+ label="Step 2: ๋„ค๊ฑฐํ‹ฐ๋ธŒ ํ”„๋กฌํ”„ํŠธ ์ž…๋ ฅ",
487
+ placeholder="๋น„๋””์˜ค์—์„œ ์›ํ•˜์ง€ ์•Š๋Š” ์š”์†Œ๋ฅผ ์„ค๋ช…ํ•˜์„ธ์š”...",
488
+ value="low quality, worst quality, deformed, distorted, warped, motion smear, motion artifacts, fused fingers, incorrect anatomy, strange hands, unattractive",
489
+ lines=2,
490
+ )
491
+
492
+ txt2vid_preset = gr.Dropdown(
493
+ choices=[p["label"] for p in preset_options],
494
+ value="768x512, 97 frames",
495
+ label="Step 3.1: ํ•ด์ƒ๋„ ํ”„๋ฆฌ์…‹ ์„ ํƒ",
496
+ )
497
+
498
+ txt2vid_frame_rate = gr.Slider(
499
+ label="Step 3.2: ํ”„๋ ˆ์ž„ ๋ ˆ์ดํŠธ",
500
+ minimum=21,
501
+ maximum=30,
502
+ step=1,
503
+ value=25,
504
+ )
505
+
506
+ txt2vid_advanced = create_advanced_options()
507
+ txt2vid_generate = gr.Button(
508
+ "Step 5: ๋น„๋””์˜ค ์ƒ์„ฑ",
509
+ variant="primary",
510
+ size="lg",
511
+ )
512
+
513
+ with gr.Column():
514
+ txt2vid_output = gr.Video(label="์ƒ์„ฑ๋œ ๋น„๋””์˜ค")
515
+
516
+ with gr.Row():
517
+ gr.Examples(
518
+ examples=[
519
+ [
520
+ "์ „ํ†ต์ ์ธ ๋ชฝ๊ณจ ๋“œ๋ ˆ์Šค๋ฅผ ์ž…์€ ์ Š์€ ์—ฌ์„ฑ์ด ์–‡์€ ํฐ์ƒ‰ ์ปคํŠผ์„ ํ†ตํ•ด ํ˜ธ๊ธฐ์‹ฌ๊ณผ ๊ธด์žฅ์ด ์„ž์ธ ํ‘œ์ •์œผ๋กœ ๋“ค์—ฌ๋‹ค๋ณด๊ณ  ์žˆ์Šต๋‹ˆ๋‹ค. ์—ฌ์„ฑ์€ ํฐ ๊ตฌ์Šฌ๋กœ ์žฅ์‹๋œ ๋‘ ๊ฐœ์˜ ๋•‹์€ ๋จธ๋ฆฌ๋กœ ์Šคํƒ€์ผ๋ง๋œ ๊ธด ๊ฒ€์€ ๋จธ๋ฆฌ๋ฅผ ํ•˜๊ณ  ์žˆ์œผ๋ฉฐ, ๋ˆˆ์€ ๋†€๋žŒ์„ ๋„๋ฉฐ ํฌ๊ฒŒ ๋– ์ ธ ์žˆ์Šต๋‹ˆ๋‹ค. ๊ทธ๋…€์˜ ๋“œ๋ ˆ์Šค๋Š” ํ™”๋ คํ•œ ๊ธˆ์ƒ‰ ์ž์ˆ˜๊ฐ€ ์ƒˆ๊ฒจ์ง„ ์„ ๋ช…ํ•œ ํŒŒ๋ž€์ƒ‰์ด๋ฉฐ, ๋น„์Šทํ•œ ๋””์ž์ธ์˜ ๋จธ๋ฆฌ๋ ๋ฅผ ํ•˜๊ณ  ์žˆ์Šต๋‹ˆ๋‹ค. ๋ฐฐ๊ฒฝ์€ ์‹ ๋น„๋กœ์›€๊ณผ ํ˜ธ๊ธฐ์‹ฌ์„ ์ž์•„๋‚ด๋Š” ๋‹จ์ˆœํ•œ ํฐ์ƒ‰ ์ปคํŠผ์ž…๋‹ˆ๋‹ค.",
521
+ "low quality, worst quality, deformed, distorted, warped, motion smear, motion artifacts, fused fingers, incorrect anatomy, strange hands, unattractive",
522
+ "assets/t2v_2.mp4",
523
+ ],
524
+ [
525
+ "๋…ธ๋ž€์ƒ‰ ์žฌํ‚ท์„ ์ž…์€ ๊ธˆ๋ฐœ ๋จธ๋ฆฌ์˜ ์ Š์€ ๋‚จ์ž๊ฐ€ ์ˆฒ์— ์„œ์„œ ์ฃผ์œ„๋ฅผ ๋‘˜๋Ÿฌ๋ด…๋‹ˆ๋‹ค. ๊ทธ๋Š” ๋ฐ์€ ํ”ผ๋ถ€๋ฅผ ๊ฐ€์กŒ๊ณ  ๋จธ๋ฆฌ๋Š” ๊ฐ€์šด๋ฐ ๊ฐ€๋ฅด๋งˆ๋กœ ์Šคํƒ€์ผ๋ง๋˜์–ด ์žˆ์Šต๋‹ˆ๋‹ค. ๊ทธ๋Š” ์™ผ์ชฝ์„ ๋ณด๊ณ  ๋‚œ ํ›„ ์˜ค๋ฅธ์ชฝ์„ ๋ณด๋ฉฐ, ๊ฐ ๋ฐฉํ–ฅ์„ ์ž ์‹œ ์‘์‹œํ•ฉ๋‹ˆ๋‹ค. ์นด๋ฉ”๋ผ๋Š” ๋‚ฎ์€ ๊ฐ๋„์—์„œ ๋‚จ์ž๋ฅผ ์˜ฌ๋ ค๋‹ค๋ณด๋ฉฐ ๊ณ ์ •๋˜์–ด ์žˆ์Šต๋‹ˆ๋‹ค. ๋ฐฐ๊ฒฝ์€ ์•ฝ๊ฐ„ ํ๋ฆฟํ•˜๋ฉฐ, ๋…น์ƒ‰ ๋‚˜๋ฌด๋“ค๊ณผ ๋‚จ์ž์˜ ๋’ค์—์„œ ๋ฐ๊ฒŒ ๋น„์น˜๋Š” ํƒœ์–‘์ด ๋ณด์ž…๋‹ˆ๋‹ค. ์กฐ๋ช…์€ ์ž์—ฐ์Šค๋Ÿฝ๊ณ  ๋”ฐ๋œปํ•˜๋ฉฐ, ํƒœ์–‘ ๋น›์ด ๋‚จ์ž์˜ ์–ผ๊ตด์„ ๊ฐ€๋กœ์ง€๋ฅด๋Š” ๋ Œ์ฆˆ ํ”Œ๋ ˆ์–ด๋ฅผ ๋งŒ๋“ญ๋‹ˆ๋‹ค. ์žฅ๋ฉด์€ ์‹ค์ œ ์˜์ƒ์ฒ˜๋Ÿผ ์ดฌ์˜๋˜์—ˆ์Šต๋‹ˆ๋‹ค.",
526
+ "low quality, worst quality, deformed, distorted, warped, motion smear, motion artifacts, fused fingers, incorrect anatomy, strange hands, unattractive",
527
+ "assets/t2v_1.mp4",
528
+ ],
529
+ [
530
+ "ํ•œ ์‚ฌ์ดํด๋ฆฌ์ŠคํŠธ๊ฐ€ ๊ตฝ์ด์ง„ ์‚ฐ๊ธธ์„ ๋”ฐ๋ผ ๋‹ฌ๋ฆฝ๋‹ˆ๋‹ค. ๊ณต๊ธฐ์—ญํ•™์ ์ธ ์žฅ๋น„๋ฅผ ์ž…์€ ๊ทธ๋Š” ๊ฐ•ํ•˜๊ฒŒ ํŽ˜๋‹ฌ์„ ๋ฐŸ๊ณ  ์žˆ์œผ๋ฉฐ, ์ด๋งˆ์—๋Š” ๋•€๋ฐฉ์šธ์ด ๋ฐ˜์ง์ž…๋‹ˆ๋‹ค. ์นด๋ฉ”๋ผ๋Š” ๊ทธ์˜ ๊ฒฐ์—ฐํ•œ ํ‘œ์ •๊ณผ ์ˆจ ๋ง‰ํžˆ๋Š” ํ’๊ฒฝ์„ ๋ฒˆ๊ฐˆ์•„๊ฐ€๋ฉฐ ๋ณด์—ฌ์ค๋‹ˆ๋‹ค. ์†Œ๋‚˜๋ฌด๋“ค์ด ์Šค์ณ ์ง€๋‚˜๊ฐ€๊ณ , ํ•˜๋Š˜์€ ์„ ๋ช…ํ•œ ํŒŒ๋ž€์ƒ‰์ž…๋‹ˆ๋‹ค. ์ด ์žฅ๋ฉด์€ ํ™œ๊ธฐ์ฐจ๊ณ  ๊ฒฝ์Ÿ์ ์ธ ๋ถ„์œ„๊ธฐ๋ฅผ ์ž์•„๋ƒ…๋‹ˆ๋‹ค.",
531
+ "low quality, worst quality, deformed, distorted, warped, motion smear, motion artifacts, fused fingers, incorrect anatomy, strange hands, unattractive",
532
+ "assets/t2v_0.mp4",
533
+ ],
534
+ ],
535
+ inputs=[txt2vid_prompt, txt2vid_negative_prompt, txt2vid_output],
536
+ label="ํ…์ŠคํŠธ-๋น„๋””์˜ค ์ƒ์„ฑ ์˜ˆ์‹œ",
537
+ )
538
+
539
+ # Image to Video Tab
540
+ with gr.TabItem("์ด๋ฏธ์ง€๋กœ ๋น„๋””์˜ค ๋งŒ๋“ค๊ธฐ"):
541
+ with gr.Row():
542
+ with gr.Column():
543
+ img2vid_image = gr.Image(
544
+ type="filepath",
545
+ label="Step 1: ์ž…๋ ฅ ์ด๋ฏธ์ง€ ์—…๋กœ๋“œ",
546
+ elem_id="image_upload",
547
+ )
548
+ img2vid_prompt = gr.Textbox(
549
+ label="Step 2: ํ”„๋กฌํ”„ํŠธ ์ž…๋ ฅ",
550
+ placeholder="์ด๋ฏธ์ง€๋ฅผ ์–ด๋–ป๊ฒŒ ์• ๋‹ˆ๋ฉ”์ด์…˜ํ™”ํ• ์ง€ ์„ค๋ช…ํ•˜์„ธ์š” (์ตœ์†Œ 50์ž)...",
551
+ value="๊ฐˆ์ƒ‰ ๊ธด ๋จธ๋ฆฌ๋ฅผ ๊ฐ€์ง„ ์—ฌ์„ฑ์ด ๊ธˆ๋ฐœ์˜ ๊ธด ๋จธ๋ฆฌ๋ฅผ ๊ฐ€์ง„ ๋‹ค๋ฅธ ์—ฌ์„ฑ์„ ํ–ฅํ•ด ๋ฏธ์†Œ์ง“์Šต๋‹ˆ๋‹ค. ๊ฐˆ์ƒ‰ ๋จธ๋ฆฌ์˜ ์—ฌ์„ฑ์€ ๊ฒ€์€์ƒ‰ ์ž์ผ“์„ ์ž…๊ณ  ์žˆ์œผ๋ฉฐ ์˜ค๋ฅธ์ชฝ ๋บจ์— ์ž‘์€ ์ ์ด ์žˆ์Šต๋‹ˆ๋‹ค. ์นด๋ฉ”๋ผ ๊ฐ๋„๋Š” ๊ฐˆ์ƒ‰ ๋จธ๋ฆฌ ์—ฌ์„ฑ์˜ ์–ผ๊ตด์— ํด๋กœ์ฆˆ์—…๋˜์–ด ์žˆ์Šต๋‹ˆ๋‹ค. ์กฐ๋ช…์€ ์ž์—ฐ์Šค๋Ÿฝ๊ณ  ๋”ฐ๋œปํ•˜๋ฉฐ, ์„์–‘์—์„œ ์˜ค๋Š” ๋“ฏํ•œ ๋ถ€๋“œ๋Ÿฌ์šด ๋น›์ด ์žฅ๋ฉด์„ ๋น„์ถฅ๋‹ˆ๋‹ค. ์žฅ๋ฉด์€ ์‹ค์ œ ์˜์ƒ์ฒ˜๋Ÿผ ๋ณด์ž…๋‹ˆ๋‹ค.",
552
+ lines=5,
553
+ )
554
+ img2vid_enhance_toggle = Toggle(
555
+ label="ํ”„๋กฌํ”„ํŠธ ๊ฐœ์„ ",
556
+ value=False,
557
+ interactive=True,
558
+ )
559
+ img2vid_negative_prompt = gr.Textbox(
560
+ label="Step 3: ๋„ค๊ฑฐํ‹ฐ๋ธŒ ํ”„๋กฌํ”„ํŠธ ์ž…๋ ฅ",
561
+ placeholder="๋น„๋””์˜ค์—์„œ ์›ํ•˜์ง€ ์•Š๋Š” ์š”์†Œ๋ฅผ ์„ค๋ช…ํ•˜์„ธ์š”...",
562
+ value="low quality, worst quality, deformed, distorted, warped, motion smear, motion artifacts, fused fingers, incorrect anatomy, strange hands, unattractive",
563
+ lines=2,
564
+ )
565
+
566
+ img2vid_preset = gr.Dropdown(
567
+ choices=[p["label"] for p in preset_options],
568
+ value="768x512, 97 frames",
569
+ label="Step 3.1: ํ•ด์ƒ๋„ ํ”„๋ฆฌ์…‹ ์„ ํƒ",
570
+ )
571
+
572
+ img2vid_frame_rate = gr.Slider(
573
+ label="Step 3.2: ํ”„๋ ˆ์ž„ ๋ ˆ์ดํŠธ",
574
+ minimum=21,
575
+ maximum=30,
576
+ step=1,
577
+ value=25,
578
+ )
579
+
580
+ img2vid_advanced = create_advanced_options()
581
+ img2vid_generate = gr.Button(
582
+ "Step 6: ๋น„๋””์˜ค ์ƒ์„ฑ", variant="primary", size="lg"
583
+ )
584
+
585
+ with gr.Column():
586
+ img2vid_output = gr.Video(label="์ƒ์„ฑ๋œ ๋น„๋””์˜ค")
587
+
588
+ with gr.Row():
589
+ gr.Examples(
590
+ examples=[
591
+ [
592
+ "assets/i2v_i2.png",
593
+ "์—ฌ์„ฑ์ด ํฐ์ƒ‰ ์ „๊ธฐ ๋ฒ„๋„ˆ ์œ„์—์„œ ๋“๋Š” ๋ฌผ์ด ๋‹ด๊ธด ๋ƒ„๋น„๋ฅผ ์ “๊ณ  ์žˆ์Šต๋‹ˆ๋‹ค. ๋ณด๋ผ์ƒ‰ ๋งค๋‹ˆํ์–ด๋ฅผ ๋ฐ”๋ฅธ ๊ทธ๋…€์˜ ์†์ด ํ•˜์–€ ๋ƒ„๋น„ ์•ˆ์—์„œ ๋‚˜๋ฌด ์ˆŸ๊ฐ€๋ฝ์„ ์›ํ˜•์œผ๋กœ ์›€์ง์ž…๋‹ˆ๋‹ค. ๋ƒ„๋น„๋Š” ๊ฒ€์€์ƒ‰ ๋ฒ„ํŠผ๊ณผ ๋””์ง€ํ„ธ ๋””์Šคํ”Œ๋ ˆ์ด๊ฐ€ ์žˆ๋Š” ํฐ์ƒ‰ ์ „๊ธฐ ๋ฒ„๋„ˆ ์œ„์— ๋†“์—ฌ ์žˆ์Šต๋‹ˆ๋‹ค. ๋ฒ„๋„ˆ๋Š” ์˜ค๋ฅธ์ชฝ ์•„๋ž˜ ๋ชจ์„œ๋ฆฌ์— ๋นจ๊ฐ„์ƒ‰๊ณผ ํฐ์ƒ‰ ์ฒดํฌ๋ฌด๋Šฌ ์ฒœ์ด ๋ถ€๋ถ„์ ์œผ๋กœ ๋ณด์ด๋Š” ํฐ์ƒ‰ ์กฐ๋ฆฌ๋Œ€ ์œ„์— ๋†“์—ฌ ์žˆ์Šต๋‹ˆ๋‹ค. ์นด๋ฉ”๋ผ ๊ฐ๋„๋Š” ์ •ํ™•ํžˆ ์œ„์—์„œ ๋‚ด๋ ค๋‹ค๋ณด๋Š” ๊ฐ๋„์ด๋ฉฐ ์žฅ๋ฉด ๋‚ด๋‚ด ๊ณ ์ •๋˜์–ด ์žˆ์Šต๋‹ˆ๋‹ค. ์กฐ๋ช…์€ ๋ฐ๊ณ  ๊ณ ๋ฅธ ์ค‘์„ฑ์ ์ธ ํฐ์ƒ‰ ๋น›์œผ๋กœ ์žฅ๋ฉด์„ ๋น„์ถฅ๋‹ˆ๋‹ค. ์žฅ๋ฉด์€ ์‹ค์ œ ์˜์ƒ์ฒ˜๋Ÿผ ๋ณด์ž…๋‹ˆ๋‹ค.",
594
+ "low quality, worst quality, deformed, distorted, warped, motion smear, motion artifacts, fused fingers, incorrect anatomy, strange hands, unattractive",
595
+ "assets/i2v_2.mp4",
596
+ ],
597
+ [
598
+ "assets/i2v_i0.png",
599
+ "๊ธด ํ๋ฅด๋Š” ๋“œ๋ ˆ์Šค๋ฅผ ์ž…์€ ์—ฌ์„ฑ์ด ๋“คํŒ์— ์„œ์„œ ๋“ฑ์„ ์นด๋ฉ”๋ผ๋ฅผ ํ–ฅํ•œ ์ฑ„ ์ง€ํ‰์„ ์„ ๋ฐ”๋ผ๋ณด๊ณ  ์žˆ์Šต๋‹ˆ๋‹ค. ๊ทธ๋…€์˜ ๋จธ๋ฆฌ์นด๋ฝ์€ ๊ธธ๊ณ  ๋ฐ์œผ๋ฉฐ ๋“ฑ ์•„๋ž˜๋กœ ํ˜๋Ÿฌ๋‚ด๋ฆฝ๋‹ˆ๋‹ค. ๊ทธ๋…€๋Š” ํฐ ์ฐธ๋‚˜๋ฌด์˜ ๋„“๊ฒŒ ํผ์ง„ ๊ฐ€์ง€ ์•„๋ž˜์— ์„œ ์žˆ์Šต๋‹ˆ๋‹ค. ์™ผ์ชฝ์œผ๋กœ๋Š” ๋ง๋ผ๋ถ™์€ ์ž”๋”” ์œ„์— ํด๋ž˜์‹ํ•œ ๋ฏธ๊ตญ ์ž๋™์ฐจ๊ฐ€ ์ฃผ์ฐจ๋˜์–ด ์žˆ์Šต๋‹ˆ๋‹ค. ๋ฉ€๋ฆฌ์„œ๋Š” ํ•œ ๋Œ€์˜ ๋ถ€์„œ์ง„ ์ž๋™์ฐจ๊ฐ€ ์˜†์œผ๋กœ ๋ˆ„์›Œ ์žˆ์Šต๋‹ˆ๋‹ค. ์œ„์˜ ํ•˜๋Š˜์€ ์–ด๋‘์šด ํ•˜๋Š˜์„ ๋ฐฐ๊ฒฝ์œผ๋กœ ๋ฐ์€ ํฐ ๊ตฌ๋ฆ„์ด ๊ทน์ ์ธ ์บ”๋ฒ„์Šค๋ฅผ ์ด๋ฃจ๊ณ  ์žˆ์Šต๋‹ˆ๋‹ค. ์ „์ฒด ์ด๋ฏธ์ง€๋Š” ํ‘๋ฐฑ์œผ๋กœ, ๋น›๊ณผ ๊ทธ๋ฆผ์ž์˜ ๋Œ€๋น„๋ฅผ ๊ฐ•์กฐํ•ฉ๋‹ˆ๋‹ค. ์—ฌ์„ฑ์ด ์ฒœ์ฒœํžˆ ์ž๋™์ฐจ๋ฅผ ํ–ฅํ•ด ๊ฑธ์–ด๊ฐ€๊ณ  ์žˆ์Šต๋‹ˆ๋‹ค.",
600
+ "low quality, worst quality, deformed, distorted, warped, motion smear, motion artifacts, fused fingers, incorrect anatomy, strange hands, unattractive",
601
+ "assets/i2v_0.mp4",
602
+ ],
603
+ [
604
+ "assets/i2v_i1.png",
605
+ "ํ•œ ์Œ์˜ ์†์ด ๋„์ž๊ธฐ ๋ฌผ๋ ˆ ์œ„์—์„œ ์ ํ†  ์กฐ๊ฐ์„ ๋ชจ์–‘ ์žก์•„ ์ ์ฐจ์ ์œผ๋กœ ์›๋ฟ” ๋ชจ์–‘์„ ๋งŒ๋“ค์–ด๊ฐ€๊ณ  ์žˆ์Šต๋‹ˆ๋‹ค. ํ”„๋ ˆ์ž„ ๋ฐ–์˜ ์‚ฌ๋žŒ์˜ ์†์ด ์ ํ† ๋กœ ๋ฎ์—ฌ ์žˆ์œผ๋ฉฐ, ํšŒ์ „ํ•˜๋Š” ๋„์ž๊ธฐ ๋ฌผ๋ ˆ ์ค‘์•™์— ์ ํ†  ๋ฉ์–ด๋ฆฌ๋ฅผ ๋ถ€๋“œ๋Ÿฝ๊ฒŒ ๋ˆ„๋ฅด๊ณ  ์žˆ์Šต๋‹ˆ๋‹ค. ์†์€ ์›ํ˜•์œผ๋กœ ์›€์ง์ด๋ฉฐ, ์ ํ†  ์œ„์ชฝ์— ์ ์ฐจ์ ์œผ๋กœ ์›๋ฟ” ๋ชจ์–‘์„ ๋งŒ๋“ค์–ด๊ฐ‘๋‹ˆ๋‹ค. ์นด๋ฉ”๋ผ๋Š” ๋„์ž๊ธฐ ๋ฌผ๋ ˆ ๋ฐ”๋กœ ์œ„์— ์œ„์น˜ํ•˜์—ฌ ์ ํ† ๊ฐ€ ๋ชจ์–‘ ์žกํ˜€๊ฐ€๋Š” ๊ฒƒ์„ ์กฐ๊ฐ๋„๋กœ ๋ณด์—ฌ์ค๋‹ˆ๋‹ค. ์กฐ๋ช…์€ ๋ฐ๊ณ  ๊ณ ๋ฅด๋ฉฐ, ์ ํ† ์™€ ๊ทธ๊ฒƒ์„ ๋‹ค๋ฃจ๋Š” ์†์„ ๋ฐ๊ฒŒ ๋น„์ถฅ๋‹ˆ๋‹ค. ์žฅ๋ฉด์€ ์‹ค์ œ ์˜์ƒ์ฒ˜๋Ÿผ ์ดฌ์˜๋˜์—ˆ์Šต๋‹ˆ๋‹ค.",
606
+ "low quality, worst quality, deformed, distorted, warped, motion smear, motion artifacts, fused fingers, incorrect anatomy, strange hands, unattractive",
607
+ "assets/i2v_1.mp4",
608
+ ],
609
+ ],
610
+ inputs=[
611
+ img2vid_image,
612
+ img2vid_prompt,
613
+ img2vid_negative_prompt,
614
+ img2vid_output,
615
+ ],
616
+ label="์ด๋ฏธ์ง€-๋น„๋””์˜ค ์ƒ์„ฑ ์˜ˆ์‹œ",
617
+ )
618
+
619
+ # Event handlers
620
+ # Event handlers
621
+ txt2vid_preset.change(
622
+ fn=preset_changed,
623
+ inputs=[txt2vid_preset],
624
+ outputs=txt2vid_advanced[3:]
625
+ )
626
+
627
+ txt2vid_enhance_toggle.change(
628
+ fn=update_prompt_t2v,
629
+ inputs=[txt2vid_prompt, txt2vid_enhance_toggle],
630
+ outputs=txt2vid_prompt
631
+ )
632
+
633
+ txt2vid_generate.click(
634
+ fn=generate_video_from_text,
635
+ inputs=[
636
+ txt2vid_prompt,
637
+ txt2vid_enhance_toggle,
638
+ txt2vid_negative_prompt,
639
+ txt2vid_frame_rate,
640
+ *txt2vid_advanced,
641
+ ],
642
+ outputs=txt2vid_output,
643
+ concurrency_limit=1,
644
+ concurrency_id="generate_video",
645
+ queue=True,
646
+ )
647
+
648
+ img2vid_preset.change(
649
+ fn=preset_changed,
650
+ inputs=[img2vid_preset],
651
+ outputs=img2vid_advanced[3:]
652
+ )
653
+
654
+ img2vid_enhance_toggle.change(
655
+ fn=update_prompt_i2v,
656
+ inputs=[img2vid_prompt, img2vid_enhance_toggle],
657
+ outputs=img2vid_prompt
658
+ )
659
+
660
+ img2vid_generate.click(
661
+ fn=generate_video_from_image,
662
+ inputs=[
663
+ img2vid_image,
664
+ img2vid_prompt,
665
+ img2vid_enhance_toggle,
666
+ img2vid_negative_prompt,
667
+ img2vid_frame_rate,
668
+ *img2vid_advanced,
669
+ ],
670
+ outputs=img2vid_output,
671
+ concurrency_limit=1,
672
+ concurrency_id="generate_video",
673
+ queue=True,
674
+ )
675
+
676
+ if __name__ == "__main__":
677
+ iface.queue(max_size=64, default_concurrency_limit=1, api_open=False).launch(
678
+ share=True, show_api=False
679
+ )