openfree commited on
Commit
b7abbfc
Β·
verified Β·
1 Parent(s): 5039c8f

Delete app-backup1.py

Browse files
Files changed (1) hide show
  1. app-backup1.py +0 -1217
app-backup1.py DELETED
@@ -1,1217 +0,0 @@
1
- import gradio as gr
2
- from gradio_toggle import Toggle
3
- import torch
4
- from huggingface_hub import snapshot_download
5
- from transformers import pipeline
6
-
7
- from xora.models.autoencoders.causal_video_autoencoder import CausalVideoAutoencoder
8
- from xora.models.transformers.transformer3d import Transformer3DModel
9
- from xora.models.transformers.symmetric_patchifier import SymmetricPatchifier
10
- from xora.schedulers.rf import RectifiedFlowScheduler
11
- from xora.pipelines.pipeline_xora_video import XoraVideoPipeline
12
- from transformers import T5EncoderModel, T5Tokenizer
13
- from xora.utils.conditioning_method import ConditioningMethod
14
- from pathlib import Path
15
- import safetensors.torch
16
- import json
17
- import numpy as np
18
- import cv2
19
- from PIL import Image
20
- import tempfile
21
- import os
22
- import gc
23
- from openai import OpenAI
24
- import re
25
- import time
26
- # Load system prompts
27
- system_prompt_t2v = """당신은 λΉ„λ””μ˜€ 생성을 μœ„ν•œ ν”„λ‘¬ν”„νŠΈ μ „λ¬Έκ°€μž…λ‹ˆλ‹€.
28
- 주어진 ν”„λ‘¬ν”„νŠΈλ₯Ό λ‹€μŒ ꡬ쑰에 맞게 κ°œμ„ ν•΄μ£Όμ„Έμš”:
29
- 1. μ£Όμš” λ™μž‘μ„ λͺ…ν™•ν•œ ν•œ λ¬Έμž₯으둜 μ‹œμž‘
30
- 2. ꡬ체적인 λ™μž‘κ³Ό 제슀처λ₯Ό μ‹œκ°„ μˆœμ„œλŒ€λ‘œ μ„€λͺ…
31
- 3. 캐릭터/객체의 μ™Έλͺ¨λ₯Ό μƒμ„Ένžˆ λ¬˜μ‚¬
32
- 4. λ°°κ²½κ³Ό ν™˜κ²½ μ„ΈλΆ€ 사항을 ꡬ체적으둜 포함
33
- 5. 카메라 각도와 μ›€μ§μž„μ„ λͺ…μ‹œ
34
- 6. μ‘°λͺ…κ³Ό 색상을 μžμ„Ένžˆ μ„€λͺ…
35
- 7. λ³€ν™”λ‚˜ κ°‘μž‘μŠ€λŸ¬μš΄ 사건을 μžμ—°μŠ€λŸ½κ²Œ 포함
36
- λͺ¨λ“  μ„€λͺ…은 ν•˜λ‚˜μ˜ μžμ—°μŠ€λŸ¬μš΄ λ¬Έλ‹¨μœΌλ‘œ μž‘μ„±ν•˜κ³ ,
37
- 촬영 감독이 촬영 λͺ©λ‘μ„ μ„€λͺ…ν•˜λŠ” κ²ƒμ²˜λŸΌ ꡬ체적이고 μ‹œκ°μ μœΌλ‘œ μž‘μ„±ν•˜μ„Έμš”.
38
- 200단어λ₯Ό λ„˜μ§€ μ•Šλ„λ‘ ν•˜λ˜, μ΅œλŒ€ν•œ μƒμ„Έν•˜κ²Œ μž‘μ„±ν•˜μ„Έμš”."""
39
-
40
- system_prompt_i2v = """당신은 이미지 기반 λΉ„λ””μ˜€ 생성을 μœ„ν•œ ν”„λ‘¬ν”„νŠΈ μ „λ¬Έκ°€μž…λ‹ˆλ‹€.
41
- 주어진 ν”„λ‘¬ν”„νŠΈλ₯Ό λ‹€μŒ ꡬ쑰에 맞게 κ°œμ„ ν•΄μ£Όμ„Έμš”:
42
- 1. μ£Όμš” λ™μž‘μ„ λͺ…ν™•ν•œ ν•œ λ¬Έμž₯으둜 μ‹œμž‘
43
- 2. ꡬ체적인 λ™μž‘κ³Ό 제슀처λ₯Ό μ‹œκ°„ μˆœμ„œλŒ€λ‘œ μ„€λͺ…
44
- 3. 캐릭터/객체의 μ™Έλͺ¨λ₯Ό μƒμ„Ένžˆ λ¬˜μ‚¬
45
- 4. λ°°κ²½κ³Ό ν™˜κ²½ μ„ΈλΆ€ 사항을 ꡬ체적으둜 포함
46
- 5. 카메라 각도와 μ›€μ§μž„μ„ λͺ…μ‹œ
47
- 6. μ‘°λͺ…κ³Ό 색상을 μžμ„Ένžˆ μ„€λͺ…
48
- 7. λ³€ν™”λ‚˜ κ°‘μž‘μŠ€λŸ¬μš΄ 사건을 μžμ—°μŠ€λŸ½κ²Œ 포함
49
- λͺ¨λ“  μ„€λͺ…은 ν•˜λ‚˜μ˜ μžμ—°μŠ€λŸ¬μš΄ λ¬Έλ‹¨μœΌλ‘œ μž‘μ„±ν•˜κ³ ,
50
- 촬영 감독이 촬영 λͺ©λ‘μ„ μ„€λͺ…ν•˜λŠ” κ²ƒμ²˜λŸΌ ꡬ체적이고 μ‹œκ°μ μœΌλ‘œ μž‘μ„±ν•˜μ„Έμš”.
51
- 200단어λ₯Ό λ„˜μ§€ μ•Šλ„λ‘ ν•˜λ˜, μ΅œλŒ€ν•œ μƒμ„Έν•˜κ²Œ μž‘μ„±ν•˜μ„Έμš”."""
52
-
53
- # Load Hugging Face token if needed
54
- hf_token = os.getenv("HF_TOKEN")
55
- openai_api_key = os.getenv("OPENAI_API_KEY")
56
- client = OpenAI(api_key=openai_api_key)
57
-
58
- # Initialize translation pipeline with device and clean_up settings
59
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
60
- translator = pipeline(
61
- "translation",
62
- model="Helsinki-NLP/opus-mt-ko-en",
63
- device=device,
64
- clean_up_tokenization_spaces=True
65
- )
66
-
67
- # Korean text detection function
68
- def contains_korean(text):
69
- korean_pattern = re.compile('[γ„±-γ…Žγ…-γ…£κ°€-힣]')
70
- return bool(korean_pattern.search(text))
71
-
72
- def translate_korean_prompt(prompt, max_length=450):
73
- """
74
- Translate Korean prompt to English if Korean text is detected
75
- Split long text into chunks if necessary
76
- """
77
- if not contains_korean(prompt):
78
- return prompt
79
-
80
- # Split long text into chunks
81
- def split_text(text, max_length):
82
- words = text.split()
83
- chunks = []
84
- current_chunk = []
85
- current_length = 0
86
-
87
- for word in words:
88
- if current_length + len(word) + 1 > max_length:
89
- chunks.append(' '.join(current_chunk))
90
- current_chunk = [word]
91
- current_length = len(word)
92
- else:
93
- current_chunk.append(word)
94
- current_length += len(word) + 1
95
-
96
- if current_chunk:
97
- chunks.append(' '.join(current_chunk))
98
- return chunks
99
-
100
- try:
101
- if len(prompt) > max_length:
102
- chunks = split_text(prompt, max_length)
103
- translated_chunks = []
104
-
105
- for chunk in chunks:
106
- translated = translator(chunk, max_length=512)[0]['translation_text']
107
- translated_chunks.append(translated)
108
-
109
- final_translation = ' '.join(translated_chunks)
110
- else:
111
- final_translation = translator(prompt, max_length=512)[0]['translation_text']
112
-
113
- print(f"Original Korean prompt: {prompt}")
114
- print(f"Translated English prompt: {final_translation}")
115
- return final_translation
116
-
117
- except Exception as e:
118
- print(f"Translation error: {e}")
119
- return prompt # Return original prompt if translation fails
120
-
121
- def enhance_prompt(prompt, type="t2v"):
122
- system_prompt = system_prompt_t2v if type == "t2v" else system_prompt_i2v
123
- messages = [
124
- {"role": "system", "content": system_prompt},
125
- {"role": "user", "content": prompt},
126
- ]
127
-
128
- try:
129
- response = client.chat.completions.create(
130
- model="gpt-4-1106-preview",
131
- messages=messages,
132
- max_tokens=2000,
133
- )
134
- enhanced_prompt = response.choices[0].message.content.strip()
135
-
136
- print("\n=== ν”„λ‘¬ν”„νŠΈ 증강 κ²°κ³Ό ===")
137
- print("Original Prompt:")
138
- print(prompt)
139
- print("\nEnhanced Prompt:")
140
- print(enhanced_prompt)
141
- print("========================\n")
142
-
143
- return enhanced_prompt
144
- except Exception as e:
145
- print(f"Error during prompt enhancement: {e}")
146
- return prompt
147
-
148
- def update_prompt_t2v(prompt, enhance_toggle):
149
- return update_prompt(prompt, enhance_toggle, "t2v")
150
-
151
- def update_prompt_i2v(prompt, enhance_toggle):
152
- return update_prompt(prompt, enhance_toggle, "i2v")
153
-
154
- def update_prompt(prompt, enhance_toggle, type="t2v"):
155
- if enhance_toggle:
156
- return enhance_prompt(prompt, type)
157
- return prompt
158
-
159
- # Set model download directory within Hugging Face Spaces
160
- model_path = "asset"
161
- if not os.path.exists(model_path):
162
- snapshot_download(
163
- "Lightricks/LTX-Video", local_dir=model_path, repo_type="model", token=hf_token
164
- )
165
-
166
- # Global variables to load components
167
- vae_dir = Path(model_path) / "vae"
168
- unet_dir = Path(model_path) / "unet"
169
- scheduler_dir = Path(model_path) / "scheduler"
170
-
171
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
172
-
173
- def load_vae(vae_dir):
174
- vae_ckpt_path = vae_dir / "vae_diffusion_pytorch_model.safetensors"
175
- vae_config_path = vae_dir / "config.json"
176
- with open(vae_config_path, "r") as f:
177
- vae_config = json.load(f)
178
- vae = CausalVideoAutoencoder.from_config(vae_config)
179
- vae_state_dict = safetensors.torch.load_file(vae_ckpt_path)
180
- vae.load_state_dict(vae_state_dict)
181
- return vae.to(device=device, dtype=torch.bfloat16)
182
-
183
- def load_unet(unet_dir):
184
- unet_ckpt_path = unet_dir / "unet_diffusion_pytorch_model.safetensors"
185
- unet_config_path = unet_dir / "config.json"
186
- transformer_config = Transformer3DModel.load_config(unet_config_path)
187
- transformer = Transformer3DModel.from_config(transformer_config)
188
- unet_state_dict = safetensors.torch.load_file(unet_ckpt_path)
189
- transformer.load_state_dict(unet_state_dict, strict=True)
190
- return transformer.to(device=device, dtype=torch.bfloat16)
191
-
192
- def load_scheduler(scheduler_dir):
193
- scheduler_config_path = scheduler_dir / "scheduler_config.json"
194
- scheduler_config = RectifiedFlowScheduler.load_config(scheduler_config_path)
195
- return RectifiedFlowScheduler.from_config(scheduler_config)
196
-
197
- # Helper function for image processing
198
- def center_crop_and_resize(frame, target_height, target_width):
199
- h, w, _ = frame.shape
200
- aspect_ratio_target = target_width / target_height
201
- aspect_ratio_frame = w / h
202
- if aspect_ratio_frame > aspect_ratio_target:
203
- new_width = int(h * aspect_ratio_target)
204
- x_start = (w - new_width) // 2
205
- frame_cropped = frame[:, x_start : x_start + new_width]
206
- else:
207
- new_height = int(w / aspect_ratio_target)
208
- y_start = (h - new_height) // 2
209
- frame_cropped = frame[y_start : y_start + new_height, :]
210
- frame_resized = cv2.resize(frame_cropped, (target_width, target_height))
211
- return frame_resized
212
-
213
- def load_image_to_tensor_with_resize(image_path, target_height=512, target_width=768):
214
- image = Image.open(image_path).convert("RGB")
215
- image_np = np.array(image)
216
- frame_resized = center_crop_and_resize(image_np, target_height, target_width)
217
- frame_tensor = torch.tensor(frame_resized).permute(2, 0, 1).float()
218
- frame_tensor = (frame_tensor / 127.5) - 1.0
219
- return frame_tensor.unsqueeze(0).unsqueeze(2)
220
-
221
- # Load models
222
- vae = load_vae(vae_dir)
223
- unet = load_unet(unet_dir)
224
- scheduler = load_scheduler(scheduler_dir)
225
- patchifier = SymmetricPatchifier(patch_size=1)
226
- text_encoder = T5EncoderModel.from_pretrained(
227
- "PixArt-alpha/PixArt-XL-2-1024-MS", subfolder="text_encoder"
228
- ).to(device)
229
- tokenizer = T5Tokenizer.from_pretrained(
230
- "PixArt-alpha/PixArt-XL-2-1024-MS", subfolder="tokenizer"
231
- )
232
-
233
- pipeline = XoraVideoPipeline(
234
- transformer=unet,
235
- patchifier=patchifier,
236
- text_encoder=text_encoder,
237
- tokenizer=tokenizer,
238
- scheduler=scheduler,
239
- vae=vae,
240
- ).to(device)
241
-
242
-
243
-
244
- # Preset options for resolution and frame configuration
245
- # Convert frames to seconds assuming 25 FPS
246
- preset_options = [
247
- {"label": "[16:9 HD] 1216x704, 1.6초", "width": 1216, "height": 704, "num_frames": 41},
248
- {"label": "[16:9] 1088x704, 2.0초", "width": 1088, "height": 704, "num_frames": 49},
249
- {"label": "[16:9] 1056x640, 2.3초", "width": 1056, "height": 640, "num_frames": 57},
250
- {"label": "[16:9] 992x608, 2.6초", "width": 992, "height": 608, "num_frames": 65},
251
- {"label": "[16:9] 896x608, 2.9초", "width": 896, "height": 608, "num_frames": 73},
252
- {"label": "[16:9] 896x544, 3.2초", "width": 896, "height": 544, "num_frames": 81},
253
- {"label": "[16:9] 832x544, 3.6초", "width": 832, "height": 544, "num_frames": 89},
254
- {"label": "[16:9] 800x512, 3.9초", "width": 800, "height": 512, "num_frames": 97},
255
- {"label": "[16:9] 768x512, 3.9초", "width": 768, "height": 512, "num_frames": 97},
256
- {"label": "[16:9] 800x480, 4.2초", "width": 800, "height": 480, "num_frames": 105},
257
- {"label": "[16:9] 736x480, 4.5초", "width": 736, "height": 480, "num_frames": 113},
258
- {"label": "[3:2] 704x480, 4.8초", "width": 704, "height": 480, "num_frames": 121},
259
- {"label": "[16:9] 704x448, 5.2초", "width": 704, "height": 448, "num_frames": 129},
260
- {"label": "[16:9] 672x448, 5.5초", "width": 672, "height": 448, "num_frames": 137},
261
- {"label": "[16:9] 640x416, 6.1초", "width": 640, "height": 416, "num_frames": 153},
262
- {"label": "[16:9] 672x384, 6.4초", "width": 672, "height": 384, "num_frames": 161},
263
- {"label": "[16:9] 640x384, 6.8초", "width": 640, "height": 384, "num_frames": 169},
264
- {"label": "[16:9] 608x384, 7.1초", "width": 608, "height": 384, "num_frames": 177},
265
- {"label": "[16:9] 576x384, 7.4초", "width": 576, "height": 384, "num_frames": 185},
266
- {"label": "[16:9] 608x352, 7.7초", "width": 608, "height": 352, "num_frames": 193},
267
- {"label": "[16:9] 576x352, 8.0초", "width": 576, "height": 352, "num_frames": 201},
268
- {"label": "[16:9] 544x352, 8.4초", "width": 544, "height": 352, "num_frames": 209},
269
- {"label": "[3:2] 512x352, 9.3초", "width": 512, "height": 352, "num_frames": 233},
270
- {"label": "[16:9] 544x320, 9.6초", "width": 544, "height": 320, "num_frames": 241},
271
- {"label": "[16:9] 512x320, 10.3초", "width": 512, "height": 320, "num_frames": 257},
272
- ]
273
-
274
- def preset_changed(preset):
275
- selected = next((item for item in preset_options if item["label"] == preset), None)
276
- if selected is None:
277
- raise gr.Error("Invalid preset selected")
278
- return [
279
- gr.State(value=selected["height"]),
280
- gr.State(value=selected["width"]),
281
- gr.State(value=selected["num_frames"]),
282
- gr.update(visible=False),
283
- gr.update(visible=False),
284
- gr.update(visible=False),
285
- ]
286
-
287
- def generate_video_from_text(
288
- prompt,
289
- enhance_prompt_toggle,
290
- negative_prompt,
291
- frame_rate,
292
- seed,
293
- num_inference_steps,
294
- guidance_scale,
295
- height,
296
- width,
297
- num_frames,
298
- progress=gr.Progress(),
299
- ):
300
- if len(prompt.strip()) < 50:
301
- raise gr.Error(
302
- "ν”„λ‘¬ν”„νŠΈλŠ” μ΅œμ†Œ 50자 이상이어야 ν•©λ‹ˆλ‹€. 더 μžμ„Έν•œ μ„€λͺ…을 μ œκ³΅ν•΄μ£Όμ„Έμš”.",
303
- duration=5,
304
- )
305
-
306
- # ν”„λ‘¬ν”„νŠΈ κ°œμ„ μ΄ ν™œμ„±ν™”λœ 경우
307
- if enhance_prompt_toggle:
308
- prompt = enhance_prompt(prompt, "t2v")
309
-
310
- # Translate Korean prompts to English
311
- prompt = translate_korean_prompt(prompt)
312
- negative_prompt = translate_korean_prompt(negative_prompt)
313
-
314
- # κΈ°λ³Έκ°’ μ„€μ •
315
- height = height or 320
316
- width = width or 512
317
- num_frames = num_frames or 257
318
- frame_rate = frame_rate or 25
319
- seed = seed or 171198
320
- num_inference_steps = num_inference_steps or 41
321
- guidance_scale = guidance_scale or 4.0
322
-
323
- sample = {
324
- "prompt": prompt,
325
- "prompt_attention_mask": None,
326
- "negative_prompt": negative_prompt,
327
- "negative_prompt_attention_mask": None,
328
- "media_items": None,
329
- }
330
-
331
- generator = torch.Generator(device="cpu").manual_seed(seed)
332
-
333
- def gradio_progress_callback(self, step, timestep, kwargs):
334
- progress((step + 1) / num_inference_steps)
335
-
336
- try:
337
- with torch.no_grad():
338
- images = pipeline(
339
- num_inference_steps=num_inference_steps,
340
- num_images_per_prompt=1,
341
- guidance_scale=guidance_scale,
342
- generator=generator,
343
- output_type="pt",
344
- height=height,
345
- width=width,
346
- num_frames=num_frames,
347
- frame_rate=frame_rate,
348
- **sample,
349
- is_video=True,
350
- vae_per_channel_normalize=True,
351
- conditioning_method=ConditioningMethod.UNCONDITIONAL,
352
- mixed_precision=True,
353
- callback_on_step_end=gradio_progress_callback,
354
- ).images
355
- except Exception as e:
356
- raise gr.Error(
357
- f"λΉ„λ””μ˜€ 생성 쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€. λ‹€μ‹œ μ‹œλ„ν•΄μ£Όμ„Έμš”. 였λ₯˜: {e}",
358
- duration=5,
359
- )
360
- finally:
361
- torch.cuda.empty_cache()
362
- gc.collect()
363
-
364
- output_path = tempfile.mktemp(suffix=".mp4")
365
- video_np = images.squeeze(0).permute(1, 2, 3, 0).cpu().float().numpy()
366
- video_np = (video_np * 255).astype(np.uint8)
367
- height, width = video_np.shape[1:3]
368
- out = cv2.VideoWriter(
369
- output_path, cv2.VideoWriter_fourcc(*"mp4v"), frame_rate, (width, height)
370
- )
371
- for frame in video_np[..., ::-1]:
372
- out.write(frame)
373
- out.release()
374
- del images
375
- del video_np
376
- torch.cuda.empty_cache()
377
- return output_path
378
-
379
- def generate_video_from_image(
380
- image_path,
381
- prompt,
382
- enhance_prompt_toggle,
383
- negative_prompt,
384
- frame_rate,
385
- seed,
386
- num_inference_steps,
387
- guidance_scale,
388
- height,
389
- width,
390
- num_frames,
391
- progress=gr.Progress(),
392
- ):
393
- if not image_path:
394
- raise gr.Error("μž…λ ₯ 이미지λ₯Ό μ œκ³΅ν•΄μ£Όμ„Έμš”.", duration=5)
395
-
396
- if len(prompt.strip()) < 50:
397
- raise gr.Error(
398
- "ν”„λ‘¬ν”„νŠΈλŠ” μ΅œμ†Œ 50자 이상이어야 ν•©λ‹ˆλ‹€. 더 μžμ„Έν•œ μ„€λͺ…을 μ œκ³΅ν•΄μ£Όμ„Έμš”.",
399
- duration=5,
400
- )
401
-
402
- # ν”„λ‘¬ν”„νŠΈ κ°œμ„ μ΄ ν™œμ„±ν™”λœ 경우
403
- if enhance_prompt_toggle:
404
- prompt = enhance_prompt(prompt, "i2v")
405
-
406
- # Translate Korean prompts to English
407
- prompt = translate_korean_prompt(prompt)
408
- negative_prompt = translate_korean_prompt(negative_prompt)
409
-
410
- # κΈ°λ³Έκ°’ μ„€μ •
411
- height = height or 320
412
- width = width or 512
413
- num_frames = num_frames or 257
414
- frame_rate = frame_rate or 25
415
- seed = seed or 171198
416
- num_inference_steps = num_inference_steps or 41
417
- guidance_scale = guidance_scale or 4.0
418
-
419
- # 이미지 λ‘œλ“œ 및 μ „μ²˜λ¦¬
420
- media_items = (
421
- load_image_to_tensor_with_resize(image_path, height, width).to(device).detach()
422
- )
423
-
424
- sample = {
425
- "prompt": prompt,
426
- "prompt_attention_mask": None,
427
- "negative_prompt": negative_prompt,
428
- "negative_prompt_attention_mask": None,
429
- "media_items": media_items,
430
- }
431
-
432
- generator = torch.Generator(device="cpu").manual_seed(seed)
433
-
434
- def gradio_progress_callback(self, step, timestep, kwargs):
435
- progress((step + 1) / num_inference_steps)
436
-
437
- try:
438
- with torch.no_grad():
439
- images = pipeline(
440
- num_inference_steps=num_inference_steps,
441
- num_images_per_prompt=1,
442
- guidance_scale=guidance_scale,
443
- generator=generator,
444
- output_type="pt",
445
- height=height,
446
- width=width,
447
- num_frames=num_frames,
448
- frame_rate=frame_rate,
449
- **sample,
450
- is_video=True,
451
- vae_per_channel_normalize=True,
452
- conditioning_method=ConditioningMethod.FIRST_FRAME,
453
- mixed_precision=True,
454
- callback_on_step_end=gradio_progress_callback,
455
- ).images
456
-
457
- output_path = tempfile.mktemp(suffix=".mp4")
458
- video_np = images.squeeze(0).permute(1, 2, 3, 0).cpu().float().numpy()
459
- video_np = (video_np * 255).astype(np.uint8)
460
- height, width = video_np.shape[1:3]
461
- out = cv2.VideoWriter(
462
- output_path, cv2.VideoWriter_fourcc(*"mp4v"), frame_rate, (width, height)
463
- )
464
- for frame in video_np[..., ::-1]:
465
- out.write(frame)
466
- out.release()
467
-
468
- except Exception as e:
469
- raise gr.Error(
470
- f"λΉ„λ””μ˜€ 생성 쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€. λ‹€μ‹œ μ‹œλ„ν•΄μ£Όμ„Έμš”. 였λ₯˜: {e}",
471
- duration=5,
472
- )
473
-
474
- finally:
475
- torch.cuda.empty_cache()
476
- gc.collect()
477
- if 'images' in locals():
478
- del images
479
- if 'video_np' in locals():
480
- del video_np
481
- if 'media_items' in locals():
482
- del media_items
483
-
484
- return output_path
485
-
486
- def create_advanced_options():
487
- with gr.Accordion("Step 4: Advanced Options (Optional)", open=False):
488
- seed = gr.Slider(
489
- label="Seed",
490
- minimum=0,
491
- maximum=1000000,
492
- step=1,
493
- value=171198
494
- )
495
- inference_steps = gr.Slider(
496
- label="4.2 Inference Steps",
497
- minimum=1,
498
- maximum=50,
499
- step=1,
500
- value=41,
501
- visible=False
502
- )
503
- guidance_scale = gr.Slider(
504
- label="4.3 Guidance Scale",
505
- minimum=1.0,
506
- maximum=5.0,
507
- step=0.1,
508
- value=4.0,
509
- visible=False
510
- )
511
- height_slider = gr.Slider(
512
- label="4.4 Height",
513
- minimum=256,
514
- maximum=1024,
515
- step=64,
516
- value=320,
517
- visible=False,
518
- )
519
- width_slider = gr.Slider(
520
- label="4.5 Width",
521
- minimum=256,
522
- maximum=1024,
523
- step=64,
524
- value=512,
525
- visible=False,
526
- )
527
- num_frames_slider = gr.Slider(
528
- label="4.5 Number of Frames",
529
- minimum=1,
530
- maximum=200,
531
- step=1,
532
- value=257,
533
- visible=False,
534
- )
535
-
536
- return [
537
- seed,
538
- inference_steps,
539
- guidance_scale,
540
- height_slider,
541
- width_slider,
542
- num_frames_slider,
543
- ]
544
-
545
- system_prompt_scenario = """당신은 μ˜μƒ μŠ€ν¬λ¦½νŠΈμ— λ§žλŠ” λ°°κ²½ μ˜μƒμ„ μƒμ„±ν•˜κΈ° μœ„ν•œ ν”„λ‘¬ν”„νŠΈ μ „λ¬Έκ°€μž…λ‹ˆλ‹€.
546
- 주어진 슀크립트의 λΆ„μœ„κΈ°μ™€ λ§₯락을 μ‹œκ°μ  배경으둜 ν‘œν˜„ν•˜λ˜, λ‹€μŒ 원칙을 λ°˜λ“œμ‹œ μ€€μˆ˜ν•˜μ„Έμš”:
547
-
548
- 1. μ œν’ˆμ΄λ‚˜ μ„œλΉ„μŠ€λ₯Ό μ§μ ‘μ μœΌλ‘œ λ¬˜μ‚¬ν•˜μ§€ 말 것
549
- 2. 슀크립트의 감성과 ν†€μ•€λ§€λ„ˆλ₯Ό ν‘œν˜„ν•˜λŠ” λ°°κ²½ μ˜μƒμ— 집쀑할 것
550
- 3. 5개 μ„Ήμ…˜μ΄ ν•˜λ‚˜μ˜ μ΄μ•ΌκΈ°μ²˜λŸΌ μžμ—°μŠ€λŸ½κ²Œ μ—°κ²°λ˜λ„λ‘ ν•  것
551
- 4. 좔상적이고 μ€μœ μ μΈ μ‹œκ° ν‘œν˜„μ„ ν™œμš©ν•  것
552
-
553
- 각 μ„Ήμ…˜λ³„ ν”„λ‘¬ν”„νŠΈ μž‘μ„± κ°€μ΄λ“œ:
554
- 1. λ°°κ²½ 및 ν•„μš”μ„±: 주제의 μ „λ°˜μ μΈ λΆ„μœ„κΈ°λ₯Ό ν‘œν˜„ν•˜λŠ” λ°°κ²½ 씬
555
- 2. 문제 제기: κΈ΄μž₯κ°μ΄λ‚˜ κ°ˆλ“±μ„ μ•”μ‹œν•˜λŠ” λΆ„μœ„κΈ° μžˆλŠ” λ°°κ²½
556
- 3. ν•΄κ²°μ±… μ œμ‹œ: 희망적이고 밝은 ν†€μ˜ λ°°κ²½ μ „ν™˜
557
- 4. λ³Έλ‘ : μ•ˆμ •κ° 있고 신뒰도λ₯Ό λ†’μ΄λŠ” λ°°κ²½
558
- 5. κ²°λ‘ : μž„νŒ©νŠΈ μžˆλŠ” 마무리λ₯Ό μœ„ν•œ 역동적인 λ°°κ²½
559
-
560
- λͺ¨λ“  μ„Ήμ…˜μ΄ μΌκ΄€λœ μŠ€νƒ€μΌκ³Ό 톀을 μœ μ§€ν•˜λ©΄μ„œλ„ μžμ—°μŠ€λŸ½κ²Œ 이어지도둝 κ΅¬μ„±ν•˜μ„Έμš”.
561
-
562
- 각 μ„Ήμ…˜μ˜ ν”„λ‘¬ν”„νŠΈ μž‘μ„±μ‹œ λ°˜λ“œμ‹œ λ‹€μŒ ꡬ쑰에 맞게 κ°œμ„ ν•΄μ£Όμ„Έμš”:
563
- 1. μ£Όμš” λ™μž‘μ„ λͺ…ν™•ν•œ ν•œ λ¬Έμž₯으둜 μ‹œμž‘
564
- 2. ꡬ체적인 λ™μž‘κ³Ό 제슀처λ₯Ό μ‹œκ°„ μˆœμ„œλŒ€λ‘œ μ„€λͺ…
565
- 3. 캐릭터/객체의 μ™Έλͺ¨λ₯Ό μƒμ„Ένžˆ λ¬˜μ‚¬
566
- 4. λ°°κ²½κ³Ό ν™˜κ²½ μ„ΈλΆ€ 사항을 ꡬ체적으둜 포함
567
- 5. 카메라 각도와 μ›€μ§μž„μ„ λͺ…μ‹œ
568
- 6. μ‘°λͺ…κ³Ό 색상을 μžμ„Ένžˆ μ„€λͺ…
569
- 7. λ³€ν™”λ‚˜ κ°‘μž‘μŠ€λŸ¬μš΄ 사건을 μžμ—°μŠ€λŸ½κ²Œ 포함
570
- λͺ¨λ“  μ„€λͺ…은 ν•˜λ‚˜μ˜ μžμ—°μŠ€λŸ¬μš΄ λ¬Έλ‹¨μœΌλ‘œ μž‘μ„±ν•˜κ³ ,
571
- 촬영 감독이 촬영 λͺ©λ‘μ„ μ„€λͺ…ν•˜λŠ” κ²ƒμ²˜λŸΌ ꡬ체적이고 μ‹œκ°μ μœΌλ‘œ μž‘μ„±ν•˜μ„Έμš”.
572
- 200단어λ₯Ό λ„˜μ§€ μ•Šλ„λ‘ ν•˜λ˜, μ΅œλŒ€ν•œ μƒμ„Έν•˜κ²Œ μž‘μ„±ν•˜μ„Έμš”.
573
-
574
- """
575
-
576
-
577
- def analyze_scenario(scenario):
578
- """μ‹œλ‚˜λ¦¬μ˜€λ₯Ό λΆ„μ„ν•˜μ—¬ 각 μ„Ήμ…˜λ³„ λ°°κ²½ μ˜μƒμš© ν”„λ‘¬ν”„νŠΈ 생성"""
579
- try:
580
- # 각 μ„Ήμ…˜λ³„ ν”„λ‘¬ν”„νŠΈ 생성을 μœ„ν•œ λ©”μ‹œμ§€ ꡬ성
581
- section_prompts = []
582
-
583
- for section_num in range(1, 6):
584
- section_descriptions = {
585
- 1: "λ°°κ²½ 및 ν•„μš”μ„±: 주제의 μ „λ°˜μ μΈ λΆ„μœ„κΈ°λ₯Ό ν‘œν˜„ν•˜λŠ” λ°°κ²½ 씬",
586
- 2: "ν₯λ―Έ 유발: κΈ΄μž₯κ°μ΄λ‚˜ κ°ˆλ“±μ„ μ•”μ‹œν•˜λŠ” λΆ„μœ„κΈ° μžˆλŠ” λ°°κ²½",
587
- 3: "ν•΄κ²°μ±… μ œμ‹œ: 희망적이고 밝은 ν†€μ˜ λ°°κ²½ μ „ν™˜",
588
- 4: "λ³Έλ‘ : μ•ˆμ •κ° 있고 신뒰도λ₯Ό λ†’μ΄λŠ” λ°°κ²½",
589
- 5: "κ²°λ‘ : μž„νŒ©νŠΈ μžˆλŠ” 마무리λ₯Ό μœ„ν•œ 역동적인 λ°°κ²½"
590
- }
591
-
592
- messages = [
593
- {"role": "system", "content": system_prompt_scenario},
594
- {"role": "user", "content": f"""
595
- λ‹€μŒ 슀크립트의 {section_num}번째 μ„Ήμ…˜({section_descriptions[section_num]})에 λŒ€ν•œ
596
- λ°°κ²½ μ˜μƒ ν”„λ‘¬ν”„νŠΈλ₯Ό μƒμ„±ν•΄μ£Όμ„Έμš”.
597
-
598
- 슀크립트:
599
- {scenario}
600
-
601
- μ£Όμ˜μ‚¬ν•­:
602
- 1. ν•΄λ‹Ή μ„Ήμ…˜μ˜ νŠΉμ„±({section_descriptions[section_num]})에 λ§žλŠ” λΆ„μœ„κΈ°μ™€ 톀을 λ°˜μ˜ν•˜μ„Έμš”.
603
- 2. 직접적인 μ œν’ˆ/μ„œλΉ„μŠ€ λ¬˜μ‚¬λŠ” ν”Όν•˜κ³ , 감성적이고 μ€μœ μ μΈ λ°°κ²½ μ˜μƒμ— μ§‘μ€‘ν•˜μ„Έμš”.
604
- 3. λ‹€μŒ ꡬ쑰λ₯Ό λ°˜λ“œμ‹œ ν¬ν•¨ν•˜μ„Έμš”:
605
- - μ£Όμš” λ™μž‘μ„ λͺ…ν™•ν•œ ν•œ λ¬Έμž₯으둜 μ‹œμž‘
606
- - ꡬ체적인 λ™μž‘κ³Ό 제슀처λ₯Ό μ‹œκ°„ μˆœμ„œλŒ€λ‘œ μ„€λͺ…
607
- - λ°°κ²½κ³Ό ν™˜κ²½ μ„ΈλΆ€ 사항을 ꡬ체적으둜 포함
608
- - 카메라 각도와 μ›€μ§μž„μ„ λͺ…μ‹œ
609
- - μ‘°λͺ…κ³Ό 색상을 μžμ„Ένžˆ μ„€λͺ…
610
- - λ³€ν™”λ‚˜ κ°‘μž‘μŠ€λŸ¬μš΄ 사건을 μžμ—°μŠ€λŸ½κ²Œ 포함"""}
611
- ]
612
-
613
- response = client.chat.completions.create(
614
- model="gpt-4-1106-preview",
615
- messages=messages,
616
- max_tokens=1000,
617
- temperature=0.7
618
- )
619
-
620
- section_prompt = response.choices[0].message.content.strip()
621
- section_prompts.append(f"{section_num}. {section_prompt}")
622
-
623
- # API μš”μ²­ 사이에 짧은 λ”œλ ˆμ΄ μΆ”κ°€
624
- time.sleep(1)
625
-
626
- return section_prompts
627
-
628
- except Exception as e:
629
- print(f"Error during scenario analysis: {e}")
630
- return ["Error occurred during analysis"] * 5
631
-
632
- def generate_section_video(prompt, preset, section_number=1, base_seed=171198, progress=gr.Progress()):
633
- """각 μ„Ήμ…˜μ˜ λΉ„λ””μ˜€ 생성"""
634
- try:
635
- if not prompt or len(prompt.strip()) < 50:
636
- raise gr.Error("ν”„λ‘¬ν”„νŠΈλŠ” μ΅œμ†Œ 50자 이상이어야 ν•©λ‹ˆλ‹€.")
637
-
638
- if not preset:
639
- raise gr.Error("해상도 프리셋을 μ„ νƒν•΄μ£Όμ„Έμš”.")
640
-
641
- selected = next((item for item in preset_options if item["label"] == preset), None)
642
- if not selected:
643
- raise gr.Error("μ˜¬λ°”λ₯΄μ§€ μ•Šμ€ ν”„λ¦¬μ…‹μž…λ‹ˆλ‹€.")
644
-
645
- section_seed = base_seed + section_number
646
-
647
- return generate_video_from_text(
648
- prompt=prompt,
649
- enhance_prompt_toggle=False, # μ„Ήμ…˜ μƒμ„±μ‹œλŠ” ν”„λ‘¬ν”„νŠΈ 증강 λΉ„ν™œμ„±ν™”
650
- negative_prompt="low quality, worst quality, deformed, distorted, warped",
651
- frame_rate=25,
652
- seed=section_seed,
653
- num_inference_steps=41,
654
- guidance_scale=4.0,
655
- height=selected["height"],
656
- width=selected["width"],
657
- num_frames=selected["num_frames"],
658
- progress=progress
659
- )
660
- except Exception as e:
661
- print(f"Error in section {section_number}: {e}")
662
- raise gr.Error(f"μ„Ήμ…˜ {section_number} 생성 쀑 였λ₯˜: {str(e)}")
663
- finally:
664
- torch.cuda.empty_cache()
665
- gc.collect()
666
-
667
- def generate_single_section_prompt(scenario, section_number):
668
- """κ°œλ³„ μ„Ήμ…˜μ— λŒ€ν•œ ν”„λ‘¬ν”„νŠΈ 생성"""
669
- section_descriptions = {
670
- 1: "λ°°κ²½ 및 ν•„μš”μ„±: 주제의 μ „λ°˜μ μΈ λΆ„μœ„κΈ°λ₯Ό ν‘œν˜„ν•˜λŠ” λ°°κ²½ 씬",
671
- 2: "ν₯λ―Έ 유발: ν₯λ―Έλ₯Ό μœ λ°œν•˜κ³  κΈ°λŒ€κ°μ„ μ¦ν­μ‹œν‚€λŠ” λ°°κ²½",
672
- 3: "ν•΄κ²°μ±… μ œμ‹œ: 희망적이고 밝은 ν†€μ˜ λ°°κ²½ μ „ν™˜",
673
- 4: "λ³Έλ‘ : μ•ˆμ •κ° 있고 신뒰도λ₯Ό λ†’μ΄λŠ” λ°°κ²½",
674
- 5: "κ²°λ‘ : μž„νŒ©νŠΈ μžˆλŠ” 마무리λ₯Ό μœ„ν•œ 역동적인 λ°°κ²½"
675
- }
676
-
677
- messages = [
678
- {"role": "system", "content": system_prompt_scenario},
679
- {"role": "user", "content": f"""
680
- λ‹€μŒ 슀크립트의 {section_number}번째 μ„Ήμ…˜({section_descriptions[section_number]})에 λŒ€ν•œ
681
- λ°°κ²½ μ˜μƒ ν”„λ‘¬ν”„νŠΈλ₯Ό μƒμ„±ν•΄μ£Όμ„Έμš”.
682
-
683
- 슀크립트:
684
- {scenario}
685
-
686
- μ£Όμ˜μ‚¬ν•­:
687
- 1. ν•΄λ‹Ή μ„Ήμ…˜μ˜ νŠΉμ„±({section_descriptions[section_number]})에 λ§žλŠ” λΆ„μœ„κΈ°μ™€ 톀을 λ°˜μ˜ν•˜μ„Έμš”.
688
- 2. 직접적인 μ œν’ˆ/μ„œλΉ„μŠ€ λ¬˜μ‚¬λŠ” ν”Όν•˜κ³ , 감성적이고 μ€μœ μ μΈ λ°°κ²½ μ˜μƒμ— μ§‘μ€‘ν•˜μ„Έμš”.
689
- 3. λ‹€μŒ ꡬ쑰λ₯Ό λ°˜λ“œμ‹œ ν¬ν•¨ν•˜μ„Έμš”:
690
- - μ£Όμš” λ™μž‘μ„ λͺ…ν™•ν•œ ν•œ λ¬Έμž₯으둜 μ‹œμž‘
691
- - ꡬ체적인 λ™μž‘κ³Ό 제슀처λ₯Ό μ‹œκ°„ μˆœμ„œλŒ€λ‘œ μ„€λͺ…
692
- - λ°°κ²½κ³Ό ν™˜κ²½ μ„ΈλΆ€ 사항을 ꡬ체적으둜 포함
693
- - 카메라 각도와 μ›€μ§μž„μ„ λͺ…μ‹œ
694
- - μ‘°λͺ…κ³Ό 색상을 μžμ„Ένžˆ μ„€λͺ…
695
- - λ³€ν™”λ‚˜ κ°‘μž‘μŠ€λŸ¬μš΄ 사건을 μžμ—°μŠ€λŸ½κ²Œ 포함"""}
696
- ]
697
-
698
- try:
699
- response = client.chat.completions.create(
700
- model="gpt-4-1106-preview",
701
- messages=messages,
702
- max_tokens=1000, # 토큰 수 증가
703
- temperature=0.7
704
- )
705
- generated_prompt = response.choices[0].message.content.strip()
706
- return f"{section_number}. {generated_prompt}"
707
- except Exception as e:
708
- print(f"Error during prompt generation for section {section_number}: {e}")
709
- return f"Error occurred during prompt generation for section {section_number}"
710
-
711
-
712
- # λΉ„λ””μ˜€ κ²°ν•© ν•¨μˆ˜ μΆ”κ°€
713
- def combine_videos(video_paths, output_path):
714
- """μ—¬λŸ¬ λΉ„λ””μ˜€λ₯Ό ν•˜λ‚˜λ‘œ κ²°ν•©"""
715
- if not all(video_paths):
716
- raise gr.Error("λͺ¨λ“  μ„Ήμ…˜μ˜ μ˜μƒμ΄ μƒμ„±λ˜μ–΄μ•Ό ν•©λ‹ˆλ‹€.")
717
-
718
- try:
719
- # 첫 번째 λΉ„λ””μ˜€μ˜ 속성 κ°€μ Έμ˜€κΈ°
720
- cap = cv2.VideoCapture(video_paths[0])
721
- fps = int(cap.get(cv2.CAP_PROP_FPS))
722
- width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
723
- height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
724
- cap.release()
725
-
726
- # 좜λ ₯ λΉ„λ””μ˜€ μ„€μ •
727
- fourcc = cv2.VideoWriter_fourcc(*'mp4v')
728
- out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
729
-
730
- # 각 λΉ„λ””μ˜€ 순차적으둜 κ²°ν•©
731
- for video_path in video_paths:
732
- if video_path and os.path.exists(video_path):
733
- cap = cv2.VideoCapture(video_path)
734
- while True:
735
- ret, frame = cap.read()
736
- if not ret:
737
- break
738
- out.write(frame)
739
- cap.release()
740
-
741
- out.release()
742
- return output_path
743
- except Exception as e:
744
- raise gr.Error(f"λΉ„λ””μ˜€ κ²°ν•© 쀑 였λ₯˜ λ°œμƒ: {e}")
745
-
746
- def merge_section_videos(section1, section2, section3, section4, section5):
747
- """μ„Ήμ…˜ λΉ„λ””μ˜€λ“€μ„ ν•˜λ‚˜λ‘œ κ²°ν•©"""
748
- videos = []
749
-
750
- # 각 μ„Ήμ…˜ λΉ„λ””μ˜€ 확인 및 처리
751
- for i, video_path in enumerate([section1, section2, section3, section4, section5], 1):
752
- if video_path:
753
- if os.path.exists(video_path):
754
- try:
755
- # λΉ„λ””μ˜€ 파일 검증
756
- cap = cv2.VideoCapture(video_path)
757
- if cap.isOpened():
758
- videos.append(video_path)
759
- cap.release()
760
- else:
761
- raise gr.Error(f"μ„Ήμ…˜ {i}의 μ˜μƒ 파일이 μ†μƒλ˜μ—ˆκ±°λ‚˜ 읽을 수 μ—†μŠ΅λ‹ˆλ‹€.")
762
- except Exception as e:
763
- raise gr.Error(f"μ„Ήμ…˜ {i} μ˜μƒ 처리 쀑 였λ₯˜: {str(e)}")
764
- else:
765
- raise gr.Error(f"μ„Ήμ…˜ {i}의 μ˜μƒ νŒŒμΌμ„ 찾을 수 μ—†μŠ΅λ‹ˆλ‹€.")
766
- else:
767
- raise gr.Error(f"μ„Ήμ…˜ {i}의 μ˜μƒμ΄ μ—†μŠ΅λ‹ˆλ‹€.")
768
-
769
- if not videos:
770
- raise gr.Error("κ²°ν•©ν•  μ˜μƒμ΄ μ—†μŠ΅λ‹ˆλ‹€.")
771
-
772
- try:
773
- output_path = tempfile.mktemp(suffix=".mp4")
774
-
775
- # 첫 번째 λΉ„λ””μ˜€μ˜ 속성 κ°€μ Έμ˜€κΈ°
776
- cap = cv2.VideoCapture(videos[0])
777
- fps = int(cap.get(cv2.CAP_PROP_FPS))
778
- width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
779
- height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
780
- cap.release()
781
-
782
- # 좜λ ₯ λΉ„λ””μ˜€ μ„€μ •
783
- fourcc = cv2.VideoWriter_fourcc(*'mp4v')
784
- out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
785
-
786
- # 각 λΉ„λ””μ˜€ 순차적으둜 κ²°ν•©
787
- for video_path in videos:
788
- cap = cv2.VideoCapture(video_path)
789
- while True:
790
- ret, frame = cap.read()
791
- if not ret:
792
- break
793
- # ν”„λ ˆμž„ 크기가 λ‹€λ₯Έ 경우 λ¦¬μ‚¬μ΄μ¦ˆ
794
- if frame.shape[:2] != (height, width):
795
- frame = cv2.resize(frame, (width, height))
796
- out.write(frame)
797
- cap.release()
798
-
799
- out.release()
800
- print(f"Successfully merged {len(videos)} videos")
801
- return output_path
802
-
803
- except Exception as e:
804
- raise gr.Error(f"λΉ„λ””μ˜€ κ²°ν•© 쀑 였λ₯˜ λ°œμƒ: {e}")
805
-
806
- def generate_script(topic):
807
- """μ£Όμ œμ— λ§žλŠ” 슀크립트 생성"""
808
- if not topic:
809
- return "주제λ₯Ό μž…λ ₯ν•΄μ£Όμ„Έμš”."
810
-
811
- messages = [
812
- {"role": "system", "content": """당신은 μ˜μƒ 슀크립트 μž‘μ„± μ „λ¬Έκ°€μž…λ‹ˆλ‹€.
813
- 주어진 주제둜 λ‹€μŒ ꡬ쑰에 λ§žλŠ” 5개 μ„Ήμ…˜μ˜ 슀크립트λ₯Ό μž‘μ„±ν•΄μ£Όμ„Έμš”:
814
-
815
- 1. λ°°κ²½ 및 ν•„μš”μ„±: 주제 μ†Œκ°œμ™€ μ‹œμ²­μžμ˜ ν₯λ―Έ 유발
816
- 2. ν₯λ―Έ 유발: ꡬ체적인 λ‚΄μš© μ „κ°œμ™€ ν˜ΈκΈ°μ‹¬ 자극
817
- 3. ν•΄κ²°μ±… μ œμ‹œ: 핡심 λ‚΄μš©κ³Ό ν•΄κ²°λ°©μ•ˆ μ œμ‹œ
818
- 4. λ³Έλ‘ : μƒμ„Έν•œ μ„€λͺ…κ³Ό μž₯점 뢀각
819
- 5. κ²°λ‘ : 핡심 λ©”μ‹œμ§€ 강쑰와 행동 μœ λ„
820
-
821
- 각 μ„Ήμ…˜μ€ μžμ—°μŠ€λŸ½κ²Œ μ—°κ²°λ˜μ–΄μ•Ό ν•˜λ©°,
822
- μ „μ²΄μ μœΌλ‘œ μΌκ΄€λœ 톀과 λΆ„μœ„κΈ°λ₯Ό μœ μ§€ν•˜λ©΄μ„œλ„
823
- μ‹œμ²­μžμ˜ 관심을 λκΉŒμ§€ μœ μ§€ν•  수 μžˆλ„λ‘ μž‘μ„±ν•΄μ£Όμ„Έμš”."""},
824
- {"role": "user", "content": f"λ‹€μŒ 주제둜 μ˜μƒ 슀크립트λ₯Ό μž‘μ„±ν•΄μ£Όμ„Έμš”: {topic}"}
825
- ]
826
-
827
- try:
828
- response = client.chat.completions.create(
829
- model="gpt-4-1106-preview",
830
- messages=messages,
831
- max_tokens=2000,
832
- temperature=0.7
833
- )
834
- return response.choices[0].message.content.strip()
835
- except Exception as e:
836
- print(f"Error during script generation: {e}")
837
- return "슀크립트 생성 쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€."
838
-
839
-
840
- def cleanup():
841
- """λ©”λͺ¨λ¦¬ 정리 ν•¨μˆ˜"""
842
- torch.cuda.empty_cache()
843
- gc.collect()
844
-
845
- with gr.Blocks(theme="Yntec/HaleyCH_Theme_Orange") as iface:
846
- # State λ³€μˆ˜λ“€μ˜ μ΄ˆκΈ°ν™”
847
- txt2vid_current_height = gr.State(value=320)
848
- txt2vid_current_width = gr.State(value=512)
849
- txt2vid_current_num_frames = gr.State(value=257)
850
-
851
- img2vid_current_height = gr.State(value=320)
852
- img2vid_current_width = gr.State(value=512)
853
- img2vid_current_num_frames = gr.State(value=257)
854
-
855
- with gr.Tabs():
856
- # Text to Video Tab
857
- with gr.TabItem("ν…μŠ€νŠΈλ‘œ λΉ„λ””μ˜€ λ§Œλ“€κΈ°"):
858
- with gr.Row():
859
- with gr.Column():
860
- txt2vid_prompt = gr.Textbox(
861
- label="Step 1: ν”„λ‘¬ν”„νŠΈ μž…λ ₯",
862
- placeholder="μƒμ„±ν•˜κ³  싢은 λΉ„λ””μ˜€λ₯Ό μ„€λͺ…ν•˜μ„Έμš” (μ΅œμ†Œ 50자)...",
863
- value="κ·€μ—¬μš΄ 고양이",
864
- lines=5,
865
- )
866
- txt2vid_enhance_toggle = Toggle(
867
- label="ν”„λ‘¬ν”„νŠΈ 증강",
868
- value=False,
869
- interactive=True,
870
- )
871
- txt2vid_negative_prompt = gr.Textbox(
872
- label="Step 2: λ„€κ±°ν‹°λΈŒ ν”„λ‘¬ν”„νŠΈ μž…λ ₯",
873
- placeholder="λΉ„λ””μ˜€μ—μ„œ μ›ν•˜μ§€ μ•ŠλŠ” μš”μ†Œλ₯Ό μ„€λͺ…ν•˜μ„Έμš”...",
874
- value="low quality, worst quality, deformed, distorted, warped, motion smear, motion artifacts, fused fingers, incorrect anatomy, strange hands, unattractive",
875
- lines=2,
876
- visible=False
877
- )
878
- txt2vid_preset = gr.Dropdown(
879
- choices=[p["label"] for p in preset_options],
880
- value="[16:9] 512x320, 10.3초",
881
- label="Step 2: 해상도 프리셋 선택",
882
- )
883
- txt2vid_frame_rate = gr.Slider(
884
- label="Step 3: ν”„λ ˆμž„ 레이트",
885
- minimum=21,
886
- maximum=30,
887
- step=1,
888
- value=25,
889
- visible=False
890
- )
891
- txt2vid_advanced = create_advanced_options()
892
- txt2vid_generate = gr.Button(
893
- "Step 3: λΉ„λ””μ˜€ 생성",
894
- variant="primary",
895
- size="lg",
896
- )
897
- with gr.Column():
898
- txt2vid_output = gr.Video(label="μƒμ„±λœ λΉ„λ””μ˜€")
899
-
900
-
901
- # Image to Video Tab
902
- with gr.TabItem("μ΄λ―Έμ§€λ‘œ λΉ„λ””μ˜€ λ§Œλ“€κΈ°"):
903
- with gr.Row():
904
- with gr.Column():
905
- img2vid_image = gr.Image(
906
- type="filepath",
907
- label="Step 1: μž…λ ₯ 이미지 μ—…λ‘œλ“œ",
908
- elem_id="image_upload",
909
- )
910
- img2vid_prompt = gr.Textbox(
911
- label="Step 2: ν”„λ‘¬ν”„νŠΈ μž…λ ₯",
912
- placeholder="이미지λ₯Ό μ–΄λ–»κ²Œ μ• λ‹ˆλ©”μ΄μ…˜ν™”ν• μ§€ μ„€λͺ…ν•˜μ„Έμš” (μ΅œμ†Œ 50자)...",
913
- value="κ·€μ—¬μš΄ 고양이",
914
- lines=5,
915
- )
916
- img2vid_enhance_toggle = Toggle(
917
- label="ν”„λ‘¬ν”„νŠΈ 증강",
918
- value=False,
919
- interactive=True,
920
- )
921
- img2vid_negative_prompt = gr.Textbox(
922
- label="Step 3: λ„€κ±°ν‹°λΈŒ ν”„λ‘¬ν”„νŠΈ μž…λ ₯",
923
- placeholder="λΉ„λ””μ˜€μ—μ„œ μ›ν•˜μ§€ μ•ŠλŠ” μš”μ†Œλ₯Ό μ„€λͺ…ν•˜μ„Έμš”...",
924
- value="low quality, worst quality, deformed, distorted, warped, motion smear, motion artifacts, fused fingers, incorrect anatomy, strange hands, unattractive",
925
- lines=2,
926
- visible=False
927
- )
928
- img2vid_preset = gr.Dropdown(
929
- choices=[p["label"] for p in preset_options],
930
- value="[16:9] 512x320, 10.3초",
931
- label="Step 3: 해상도 프리셋 선택",
932
- )
933
- img2vid_frame_rate = gr.Slider(
934
- label="Step 4: ν”„λ ˆμž„ 레이트",
935
- minimum=21,
936
- maximum=30,
937
- step=1,
938
- value=25,
939
- visible=False
940
- )
941
- img2vid_advanced = create_advanced_options()
942
- img2vid_generate = gr.Button(
943
- "Step 4: λΉ„λ””μ˜€ 생성",
944
- variant="primary",
945
- size="lg",
946
- )
947
- with gr.Column():
948
- img2vid_output = gr.Video(label="μƒμ„±λœ λΉ„λ””μ˜€")
949
-
950
-
951
- # Scenario Tab
952
- with gr.TabItem("μ‹œλ‚˜λ¦¬μ˜€λ‘œ λΉ„λ””μ˜€ λ§Œλ“€κΈ°(숏폼)"):
953
- with gr.Row():
954
- with gr.Column(scale=1):
955
- script_topic = gr.Textbox(
956
- label="슀크립트 생성",
957
- placeholder="겨울 일본 온천 여행을 주제둜 밝은 λŠλ‚ŒμœΌλ‘œ 슀크립트 μƒμ„±ν•˜λΌ",
958
- lines=2
959
- )
960
- generate_script_btn = gr.Button("슀크립트 생성", variant="primary")
961
-
962
- scenario_input = gr.Textbox(
963
- label="μ˜μƒ 슀크립트 μž…λ ₯",
964
- placeholder="전체 μ‹œλ‚˜λ¦¬μ˜€λ₯Ό μž…λ ₯ν•˜μ„Έμš”...",
965
- lines=10
966
- )
967
- scenario_preset = gr.Dropdown(
968
- choices=[p["label"] for p in preset_options],
969
- value="[16:9] 512x320, 10.3초",
970
- label="ν™”λ©΄ 크기 선택"
971
- )
972
- analyze_btn = gr.Button("μ‹œλ‚˜λ¦¬μ˜€ 뢄석 및 ν”„λ‘¬ν”„νŠΈ 생성", variant="primary")
973
-
974
- with gr.Column(scale=2):
975
- with gr.Row():
976
- # μ„Ήμ…˜ 1
977
- with gr.Column():
978
- section1_prompt = gr.Textbox(
979
- label="1. λ°°κ²½ 및 ν•„μš”μ„±",
980
- lines=4
981
- )
982
- with gr.Row():
983
- section1_regenerate = gr.Button("πŸ”„ ν”„λ‘¬ν”„νŠΈ 생성")
984
- section1_generate = gr.Button("πŸ”„ μ˜μƒ 생성")
985
- section1_video = gr.Video(label="μ„Ήμ…˜ 1 μ˜μƒ")
986
-
987
- # μ„Ήμ…˜ 2
988
- with gr.Column():
989
- section2_prompt = gr.Textbox(
990
- label="2. ν₯λ―Έ 유발",
991
- lines=4
992
- )
993
- with gr.Row():
994
- section2_regenerate = gr.Button("πŸ”„ ν”„λ‘¬ν”„νŠΈ 생성")
995
- section2_generate = gr.Button("πŸ”„ μ˜μƒ 생성")
996
- section2_video = gr.Video(label="μ„Ήμ…˜ 2 μ˜μƒ")
997
-
998
-
999
-
1000
- with gr.Row():
1001
- # μ„Ήμ…˜ 3
1002
- with gr.Column():
1003
- section3_prompt = gr.Textbox(
1004
- label="3. ν•΄κ²°μ±… μ œμ‹œ",
1005
- lines=4
1006
- )
1007
- with gr.Row():
1008
- section3_regenerate = gr.Button("πŸ”„ ν”„λ‘¬ν”„νŠΈ 생성")
1009
- section3_generate = gr.Button("πŸ”„ μ˜μƒ 생성")
1010
- section3_video = gr.Video(label="μ„Ήμ…˜ 3 μ˜μƒ")
1011
-
1012
- # μ„Ήμ…˜ 4
1013
- with gr.Column():
1014
- section4_prompt = gr.Textbox(
1015
- label="4. λ³Έλ‘ ",
1016
- lines=4
1017
- )
1018
- with gr.Row():
1019
- section4_regenerate = gr.Button("πŸ”„ ν”„λ‘¬ν”„νŠΈ 생성")
1020
- section4_generate = gr.Button("πŸ”„ μ˜μƒ 생성")
1021
- section4_video = gr.Video(label="μ„Ήμ…˜ 4 μ˜μƒ")
1022
-
1023
- with gr.Row():
1024
- # μ„Ήμ…˜ 5
1025
- with gr.Column():
1026
- section5_prompt = gr.Textbox(
1027
- label="5. κ²°λ‘  및 κ°•μ‘°",
1028
- lines=4
1029
- )
1030
- with gr.Row():
1031
- section5_regenerate = gr.Button("πŸ”„ ν”„λ‘¬ν”„νŠΈ 생성")
1032
- section5_generate = gr.Button("πŸ”„ μ˜μƒ 생성")
1033
- section5_video = gr.Video(label="μ„Ήμ…˜ 5 μ˜μƒ")
1034
-
1035
- # 톡합 μ˜μƒ μ„Ήμ…˜
1036
- with gr.Row():
1037
- with gr.Column(scale=1):
1038
- merge_videos_btn = gr.Button("톡합 μ˜μƒ 생성", variant="primary", size="lg")
1039
-
1040
- with gr.Column(scale=2):
1041
- with gr.Row():
1042
- merged_video_output = gr.Video(label="톡합 μ˜μƒ")
1043
-
1044
-
1045
- # Text to Video Tab handlers
1046
- txt2vid_preset.change(
1047
- fn=preset_changed,
1048
- inputs=[txt2vid_preset],
1049
- outputs=[
1050
- txt2vid_current_height,
1051
- txt2vid_current_width,
1052
- txt2vid_current_num_frames,
1053
- txt2vid_advanced[3], # height_slider
1054
- txt2vid_advanced[4], # width_slider
1055
- txt2vid_advanced[5], # num_frames_slider
1056
- ]
1057
- )
1058
-
1059
- txt2vid_enhance_toggle.change(
1060
- fn=update_prompt_t2v,
1061
- inputs=[txt2vid_prompt, txt2vid_enhance_toggle],
1062
- outputs=txt2vid_prompt
1063
- )
1064
-
1065
- txt2vid_generate.click(
1066
- fn=generate_video_from_text,
1067
- inputs=[
1068
- txt2vid_prompt,
1069
- txt2vid_enhance_toggle,
1070
- txt2vid_negative_prompt,
1071
- txt2vid_frame_rate,
1072
- txt2vid_advanced[0], # seed
1073
- txt2vid_advanced[1], # inference_steps
1074
- txt2vid_advanced[2], # guidance_scale
1075
- txt2vid_current_height,
1076
- txt2vid_current_width,
1077
- txt2vid_current_num_frames,
1078
- ],
1079
- outputs=txt2vid_output,
1080
- )
1081
-
1082
- # Image to Video Tab handlers
1083
- img2vid_preset.change(
1084
- fn=preset_changed,
1085
- inputs=[img2vid_preset],
1086
- outputs=[
1087
- img2vid_current_height,
1088
- img2vid_current_width,
1089
- img2vid_current_num_frames,
1090
- img2vid_advanced[3], # height_slider
1091
- img2vid_advanced[4], # width_slider
1092
- img2vid_advanced[5], # num_frames_slider
1093
- ]
1094
- )
1095
-
1096
- img2vid_enhance_toggle.change(
1097
- fn=update_prompt_i2v,
1098
- inputs=[img2vid_prompt, img2vid_enhance_toggle],
1099
- outputs=img2vid_prompt
1100
- )
1101
-
1102
- img2vid_generate.click(
1103
- fn=generate_video_from_image,
1104
- inputs=[
1105
- img2vid_image,
1106
- img2vid_prompt,
1107
- img2vid_enhance_toggle,
1108
- img2vid_negative_prompt,
1109
- img2vid_frame_rate,
1110
- img2vid_advanced[0], # seed
1111
- img2vid_advanced[1], # inference_steps
1112
- img2vid_advanced[2], # guidance_scale
1113
- img2vid_current_height,
1114
- img2vid_current_width,
1115
- img2vid_current_num_frames,
1116
- ],
1117
- outputs=img2vid_output,
1118
- )
1119
-
1120
-
1121
-
1122
- # Scenario Tab handlers
1123
- generate_script_btn.click(
1124
- fn=generate_script,
1125
- inputs=[script_topic],
1126
- outputs=[scenario_input]
1127
- )
1128
-
1129
- analyze_btn.click(
1130
- fn=analyze_scenario,
1131
- inputs=[scenario_input],
1132
- outputs=[
1133
- section1_prompt, section2_prompt, section3_prompt,
1134
- section4_prompt, section5_prompt
1135
- ]
1136
- )
1137
-
1138
- # μ„Ήμ…˜λ³„ ν”„λ‘¬ν”„νŠΈ μž¬μƒμ„± ν•Έλ“€λŸ¬
1139
- section1_regenerate.click(
1140
- fn=lambda x: generate_single_section_prompt(x, 1),
1141
- inputs=[scenario_input],
1142
- outputs=section1_prompt
1143
- )
1144
-
1145
- section2_regenerate.click(
1146
- fn=lambda x: generate_single_section_prompt(x, 2),
1147
- inputs=[scenario_input],
1148
- outputs=section2_prompt
1149
- )
1150
-
1151
- section3_regenerate.click(
1152
- fn=lambda x: generate_single_section_prompt(x, 3),
1153
- inputs=[scenario_input],
1154
- outputs=section3_prompt
1155
- )
1156
-
1157
- section4_regenerate.click(
1158
- fn=lambda x: generate_single_section_prompt(x, 4),
1159
- inputs=[scenario_input],
1160
- outputs=section4_prompt
1161
- )
1162
-
1163
- section5_regenerate.click(
1164
- fn=lambda x: generate_single_section_prompt(x, 5),
1165
- inputs=[scenario_input],
1166
- outputs=section5_prompt
1167
- )
1168
-
1169
- # μ„Ήμ…˜λ³„ λΉ„λ””μ˜€ 생성 ν•Έλ“€λŸ¬
1170
- section1_generate.click(
1171
- fn=lambda p, pr: generate_section_video(p, pr, 1),
1172
- inputs=[section1_prompt, scenario_preset],
1173
- outputs=section1_video
1174
- )
1175
-
1176
- section2_generate.click(
1177
- fn=lambda p, pr: generate_section_video(p, pr, 2),
1178
- inputs=[section2_prompt, scenario_preset],
1179
- outputs=section2_video
1180
- )
1181
-
1182
- section3_generate.click(
1183
- fn=lambda p, pr: generate_section_video(p, pr, 3),
1184
- inputs=[section3_prompt, scenario_preset],
1185
- outputs=section3_video
1186
- )
1187
-
1188
- section4_generate.click(
1189
- fn=lambda p, pr: generate_section_video(p, pr, 4),
1190
- inputs=[section4_prompt, scenario_preset],
1191
- outputs=section4_video
1192
- )
1193
-
1194
- section5_generate.click(
1195
- fn=lambda p, pr: generate_section_video(p, pr, 5),
1196
- inputs=[section5_prompt, scenario_preset],
1197
- outputs=section5_video
1198
- )
1199
-
1200
- # 톡합 μ˜μƒ 생성 ν•Έλ“€λŸ¬
1201
- merge_videos_btn.click(
1202
- fn=merge_section_videos,
1203
- inputs=[
1204
- section1_video,
1205
- section2_video,
1206
- section3_video,
1207
- section4_video,
1208
- section5_video
1209
- ],
1210
- outputs=merged_video_output
1211
- )
1212
-
1213
- if __name__ == "__main__":
1214
- iface.queue(max_size=64, default_concurrency_limit=1, api_open=False).launch(
1215
- share=True,
1216
- show_api=False
1217
- )