Spaces:
Running
Running
import gradio as gr | |
from gradio_toggle import Toggle | |
import torch | |
from huggingface_hub import snapshot_download | |
from transformers import pipeline | |
from xora.models.autoencoders.causal_video_autoencoder import CausalVideoAutoencoder | |
from xora.models.transformers.transformer3d import Transformer3DModel | |
from xora.models.transformers.symmetric_patchifier import SymmetricPatchifier | |
from xora.schedulers.rf import RectifiedFlowScheduler | |
from xora.pipelines.pipeline_xora_video import XoraVideoPipeline | |
from transformers import T5EncoderModel, T5Tokenizer | |
from xora.utils.conditioning_method import ConditioningMethod | |
from pathlib import Path | |
import safetensors.torch | |
import json | |
import numpy as np | |
import cv2 | |
from PIL import Image | |
import tempfile | |
import os | |
import gc | |
from openai import OpenAI | |
import re | |
import time | |
# Load system prompts | |
system_prompt_t2v = """λΉμ μ λΉλμ€ μμ±μ μν ν둬ννΈ μ λ¬Έκ°μ λλ€. | |
μ£Όμ΄μ§ ν둬ννΈλ₯Ό λ€μ ꡬ쑰μ λ§κ² κ°μ ν΄μ£ΌμΈμ: | |
1. μ£Όμ λμμ λͺ νν ν λ¬Έμ₯μΌλ‘ μμ | |
2. ꡬ체μ μΈ λμκ³Ό μ μ€μ²λ₯Ό μκ° μμλλ‘ μ€λͺ | |
3. μΊλ¦ν°/κ°μ²΄μ μΈλͺ¨λ₯Ό μμΈν λ¬μ¬ | |
4. λ°°κ²½κ³Ό νκ²½ μΈλΆ μ¬νμ ꡬ체μ μΌλ‘ ν¬ν¨ | |
5. μΉ΄λ©λΌ κ°λμ μμ§μμ λͺ μ | |
6. μ‘°λͺ κ³Ό μμμ μμΈν μ€λͺ | |
7. λ³νλ κ°μμ€λ¬μ΄ μ¬κ±΄μ μμ°μ€λ½κ² ν¬ν¨ | |
λͺ¨λ μ€λͺ μ νλμ μμ°μ€λ¬μ΄ λ¬Έλ¨μΌλ‘ μμ±νκ³ , | |
촬μ κ°λ μ΄ μ΄¬μ λͺ©λ‘μ μ€λͺ νλ κ²μ²λΌ ꡬ체μ μ΄κ³ μκ°μ μΌλ‘ μμ±νμΈμ. | |
200λ¨μ΄λ₯Ό λμ§ μλλ‘ νλ, μ΅λν μμΈνκ² μμ±νμΈμ.""" | |
system_prompt_i2v = """λΉμ μ μ΄λ―Έμ§ κΈ°λ° λΉλμ€ μμ±μ μν ν둬ννΈ μ λ¬Έκ°μ λλ€. | |
μ£Όμ΄μ§ ν둬ννΈλ₯Ό λ€μ ꡬ쑰μ λ§κ² κ°μ ν΄μ£ΌμΈμ: | |
1. μ£Όμ λμμ λͺ νν ν λ¬Έμ₯μΌλ‘ μμ | |
2. ꡬ체μ μΈ λμκ³Ό μ μ€μ²λ₯Ό μκ° μμλλ‘ μ€λͺ | |
3. μΊλ¦ν°/κ°μ²΄μ μΈλͺ¨λ₯Ό μμΈν λ¬μ¬ | |
4. λ°°κ²½κ³Ό νκ²½ μΈλΆ μ¬νμ ꡬ체μ μΌλ‘ ν¬ν¨ | |
5. μΉ΄λ©λΌ κ°λμ μμ§μμ λͺ μ | |
6. μ‘°λͺ κ³Ό μμμ μμΈν μ€λͺ | |
7. λ³νλ κ°μμ€λ¬μ΄ μ¬κ±΄μ μμ°μ€λ½κ² ν¬ν¨ | |
λͺ¨λ μ€λͺ μ νλμ μμ°μ€λ¬μ΄ λ¬Έλ¨μΌλ‘ μμ±νκ³ , | |
촬μ κ°λ μ΄ μ΄¬μ λͺ©λ‘μ μ€λͺ νλ κ²μ²λΌ ꡬ체μ μ΄κ³ μκ°μ μΌλ‘ μμ±νμΈμ. | |
200λ¨μ΄λ₯Ό λμ§ μλλ‘ νλ, μ΅λν μμΈνκ² μμ±νμΈμ.""" | |
# Load Hugging Face token if needed | |
hf_token = os.getenv("HF_TOKEN") | |
openai_api_key = os.getenv("OPENAI_API_KEY") | |
client = OpenAI(api_key=openai_api_key) | |
# Initialize translation pipeline with device and clean_up settings | |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
translator = pipeline( | |
"translation", | |
model="Helsinki-NLP/opus-mt-ko-en", | |
device=device, | |
clean_up_tokenization_spaces=True | |
) | |
# Korean text detection function | |
def contains_korean(text): | |
korean_pattern = re.compile('[γ±-γ γ -γ £κ°-ν£]') | |
return bool(korean_pattern.search(text)) | |
def translate_korean_prompt(prompt, max_length=450): | |
""" | |
Translate Korean prompt to English if Korean text is detected | |
Split long text into chunks if necessary | |
""" | |
if not contains_korean(prompt): | |
return prompt | |
# Split long text into chunks | |
def split_text(text, max_length): | |
words = text.split() | |
chunks = [] | |
current_chunk = [] | |
current_length = 0 | |
for word in words: | |
if current_length + len(word) + 1 > max_length: | |
chunks.append(' '.join(current_chunk)) | |
current_chunk = [word] | |
current_length = len(word) | |
else: | |
current_chunk.append(word) | |
current_length += len(word) + 1 | |
if current_chunk: | |
chunks.append(' '.join(current_chunk)) | |
return chunks | |
try: | |
if len(prompt) > max_length: | |
chunks = split_text(prompt, max_length) | |
translated_chunks = [] | |
for chunk in chunks: | |
translated = translator(chunk, max_length=512)[0]['translation_text'] | |
translated_chunks.append(translated) | |
final_translation = ' '.join(translated_chunks) | |
else: | |
final_translation = translator(prompt, max_length=512)[0]['translation_text'] | |
print(f"Original Korean prompt: {prompt}") | |
print(f"Translated English prompt: {final_translation}") | |
return final_translation | |
except Exception as e: | |
print(f"Translation error: {e}") | |
return prompt # Return original prompt if translation fails | |
def enhance_prompt(prompt, type="t2v"): | |
system_prompt = system_prompt_t2v if type == "t2v" else system_prompt_i2v | |
messages = [ | |
{"role": "system", "content": system_prompt}, | |
{"role": "user", "content": prompt}, | |
] | |
try: | |
response = client.chat.completions.create( | |
model="gpt-4-1106-preview", | |
messages=messages, | |
max_tokens=2000, | |
) | |
enhanced_prompt = response.choices[0].message.content.strip() | |
print("\n=== ν둬ννΈ μ¦κ° κ²°κ³Ό ===") | |
print("Original Prompt:") | |
print(prompt) | |
print("\nEnhanced Prompt:") | |
print(enhanced_prompt) | |
print("========================\n") | |
return enhanced_prompt | |
except Exception as e: | |
print(f"Error during prompt enhancement: {e}") | |
return prompt | |
def update_prompt_t2v(prompt, enhance_toggle): | |
return update_prompt(prompt, enhance_toggle, "t2v") | |
def update_prompt_i2v(prompt, enhance_toggle): | |
return update_prompt(prompt, enhance_toggle, "i2v") | |
def update_prompt(prompt, enhance_toggle, type="t2v"): | |
if enhance_toggle: | |
return enhance_prompt(prompt, type) | |
return prompt | |
# Set model download directory within Hugging Face Spaces | |
model_path = "asset" | |
if not os.path.exists(model_path): | |
snapshot_download( | |
"Lightricks/LTX-Video", local_dir=model_path, repo_type="model", token=hf_token | |
) | |
# Global variables to load components | |
vae_dir = Path(model_path) / "vae" | |
unet_dir = Path(model_path) / "unet" | |
scheduler_dir = Path(model_path) / "scheduler" | |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
def load_vae(vae_dir): | |
vae_ckpt_path = vae_dir / "vae_diffusion_pytorch_model.safetensors" | |
vae_config_path = vae_dir / "config.json" | |
with open(vae_config_path, "r") as f: | |
vae_config = json.load(f) | |
vae = CausalVideoAutoencoder.from_config(vae_config) | |
vae_state_dict = safetensors.torch.load_file(vae_ckpt_path) | |
vae.load_state_dict(vae_state_dict) | |
return vae.to(device=device, dtype=torch.bfloat16) | |
def load_unet(unet_dir): | |
unet_ckpt_path = unet_dir / "unet_diffusion_pytorch_model.safetensors" | |
unet_config_path = unet_dir / "config.json" | |
transformer_config = Transformer3DModel.load_config(unet_config_path) | |
transformer = Transformer3DModel.from_config(transformer_config) | |
unet_state_dict = safetensors.torch.load_file(unet_ckpt_path) | |
transformer.load_state_dict(unet_state_dict, strict=True) | |
return transformer.to(device=device, dtype=torch.bfloat16) | |
def load_scheduler(scheduler_dir): | |
scheduler_config_path = scheduler_dir / "scheduler_config.json" | |
scheduler_config = RectifiedFlowScheduler.load_config(scheduler_config_path) | |
return RectifiedFlowScheduler.from_config(scheduler_config) | |
# Helper function for image processing | |
def center_crop_and_resize(frame, target_height, target_width): | |
h, w, _ = frame.shape | |
aspect_ratio_target = target_width / target_height | |
aspect_ratio_frame = w / h | |
if aspect_ratio_frame > aspect_ratio_target: | |
new_width = int(h * aspect_ratio_target) | |
x_start = (w - new_width) // 2 | |
frame_cropped = frame[:, x_start : x_start + new_width] | |
else: | |
new_height = int(w / aspect_ratio_target) | |
y_start = (h - new_height) // 2 | |
frame_cropped = frame[y_start : y_start + new_height, :] | |
frame_resized = cv2.resize(frame_cropped, (target_width, target_height)) | |
return frame_resized | |
def load_image_to_tensor_with_resize(image_path, target_height=512, target_width=768): | |
image = Image.open(image_path).convert("RGB") | |
image_np = np.array(image) | |
frame_resized = center_crop_and_resize(image_np, target_height, target_width) | |
frame_tensor = torch.tensor(frame_resized).permute(2, 0, 1).float() | |
frame_tensor = (frame_tensor / 127.5) - 1.0 | |
return frame_tensor.unsqueeze(0).unsqueeze(2) | |
# Load models | |
vae = load_vae(vae_dir) | |
unet = load_unet(unet_dir) | |
scheduler = load_scheduler(scheduler_dir) | |
patchifier = SymmetricPatchifier(patch_size=1) | |
text_encoder = T5EncoderModel.from_pretrained( | |
"PixArt-alpha/PixArt-XL-2-1024-MS", subfolder="text_encoder" | |
).to(device) | |
tokenizer = T5Tokenizer.from_pretrained( | |
"PixArt-alpha/PixArt-XL-2-1024-MS", subfolder="tokenizer" | |
) | |
pipeline = XoraVideoPipeline( | |
transformer=unet, | |
patchifier=patchifier, | |
text_encoder=text_encoder, | |
tokenizer=tokenizer, | |
scheduler=scheduler, | |
vae=vae, | |
).to(device) | |
# Preset options for resolution and frame configuration | |
# Convert frames to seconds assuming 25 FPS | |
preset_options = [ | |
{"label": "[16:9 HD] 1216x704, 1.6μ΄", "width": 1216, "height": 704, "num_frames": 41}, | |
{"label": "[16:9] 1088x704, 2.0μ΄", "width": 1088, "height": 704, "num_frames": 49}, | |
{"label": "[16:9] 1056x640, 2.3μ΄", "width": 1056, "height": 640, "num_frames": 57}, | |
{"label": "[16:9] 992x608, 2.6μ΄", "width": 992, "height": 608, "num_frames": 65}, | |
{"label": "[16:9] 896x608, 2.9μ΄", "width": 896, "height": 608, "num_frames": 73}, | |
{"label": "[16:9] 896x544, 3.2μ΄", "width": 896, "height": 544, "num_frames": 81}, | |
{"label": "[16:9] 832x544, 3.6μ΄", "width": 832, "height": 544, "num_frames": 89}, | |
{"label": "[16:9] 800x512, 3.9μ΄", "width": 800, "height": 512, "num_frames": 97}, | |
{"label": "[16:9] 768x512, 3.9μ΄", "width": 768, "height": 512, "num_frames": 97}, | |
{"label": "[16:9] 800x480, 4.2μ΄", "width": 800, "height": 480, "num_frames": 105}, | |
{"label": "[16:9] 736x480, 4.5μ΄", "width": 736, "height": 480, "num_frames": 113}, | |
{"label": "[3:2] 704x480, 4.8μ΄", "width": 704, "height": 480, "num_frames": 121}, | |
{"label": "[16:9] 704x448, 5.2μ΄", "width": 704, "height": 448, "num_frames": 129}, | |
{"label": "[16:9] 672x448, 5.5μ΄", "width": 672, "height": 448, "num_frames": 137}, | |
{"label": "[16:9] 640x416, 6.1μ΄", "width": 640, "height": 416, "num_frames": 153}, | |
{"label": "[16:9] 672x384, 6.4μ΄", "width": 672, "height": 384, "num_frames": 161}, | |
{"label": "[16:9] 640x384, 6.8μ΄", "width": 640, "height": 384, "num_frames": 169}, | |
{"label": "[16:9] 608x384, 7.1μ΄", "width": 608, "height": 384, "num_frames": 177}, | |
{"label": "[16:9] 576x384, 7.4μ΄", "width": 576, "height": 384, "num_frames": 185}, | |
{"label": "[16:9] 608x352, 7.7μ΄", "width": 608, "height": 352, "num_frames": 193}, | |
{"label": "[16:9] 576x352, 8.0μ΄", "width": 576, "height": 352, "num_frames": 201}, | |
{"label": "[16:9] 544x352, 8.4μ΄", "width": 544, "height": 352, "num_frames": 209}, | |
{"label": "[3:2] 512x352, 9.3μ΄", "width": 512, "height": 352, "num_frames": 233}, | |
{"label": "[16:9] 544x320, 9.6μ΄", "width": 544, "height": 320, "num_frames": 241}, | |
{"label": "[16:9] 512x320, 10.3μ΄", "width": 512, "height": 320, "num_frames": 257}, | |
] | |
def preset_changed(preset): | |
selected = next((item for item in preset_options if item["label"] == preset), None) | |
if selected is None: | |
raise gr.Error("Invalid preset selected") | |
return [ | |
gr.State(value=selected["height"]), | |
gr.State(value=selected["width"]), | |
gr.State(value=selected["num_frames"]), | |
gr.update(visible=False), | |
gr.update(visible=False), | |
gr.update(visible=False), | |
] | |
def generate_video_from_text( | |
prompt, | |
enhance_prompt_toggle, | |
negative_prompt, | |
frame_rate, | |
seed, | |
num_inference_steps, | |
guidance_scale, | |
height, | |
width, | |
num_frames, | |
progress=gr.Progress(), | |
): | |
if len(prompt.strip()) < 50: | |
raise gr.Error( | |
"ν둬ννΈλ μ΅μ 50μ μ΄μμ΄μ΄μΌ ν©λλ€. λ μμΈν μ€λͺ μ μ 곡ν΄μ£ΌμΈμ.", | |
duration=5, | |
) | |
# ν둬ννΈ κ°μ μ΄ νμ±νλ κ²½μ° | |
if enhance_prompt_toggle: | |
prompt = enhance_prompt(prompt, "t2v") | |
# Translate Korean prompts to English | |
prompt = translate_korean_prompt(prompt) | |
negative_prompt = translate_korean_prompt(negative_prompt) | |
# κΈ°λ³Έκ° μ€μ | |
height = height or 320 | |
width = width or 512 | |
num_frames = num_frames or 257 | |
frame_rate = frame_rate or 25 | |
seed = seed or 171198 | |
num_inference_steps = num_inference_steps or 41 | |
guidance_scale = guidance_scale or 4.0 | |
sample = { | |
"prompt": prompt, | |
"prompt_attention_mask": None, | |
"negative_prompt": negative_prompt, | |
"negative_prompt_attention_mask": None, | |
"media_items": None, | |
} | |
generator = torch.Generator(device="cpu").manual_seed(seed) | |
def gradio_progress_callback(self, step, timestep, kwargs): | |
progress((step + 1) / num_inference_steps) | |
try: | |
with torch.no_grad(): | |
images = pipeline( | |
num_inference_steps=num_inference_steps, | |
num_images_per_prompt=1, | |
guidance_scale=guidance_scale, | |
generator=generator, | |
output_type="pt", | |
height=height, | |
width=width, | |
num_frames=num_frames, | |
frame_rate=frame_rate, | |
**sample, | |
is_video=True, | |
vae_per_channel_normalize=True, | |
conditioning_method=ConditioningMethod.UNCONDITIONAL, | |
mixed_precision=True, | |
callback_on_step_end=gradio_progress_callback, | |
).images | |
except Exception as e: | |
raise gr.Error( | |
f"λΉλμ€ μμ± μ€ μ€λ₯κ° λ°μνμ΅λλ€. λ€μ μλν΄μ£ΌμΈμ. μ€λ₯: {e}", | |
duration=5, | |
) | |
finally: | |
torch.cuda.empty_cache() | |
gc.collect() | |
output_path = tempfile.mktemp(suffix=".mp4") | |
video_np = images.squeeze(0).permute(1, 2, 3, 0).cpu().float().numpy() | |
video_np = (video_np * 255).astype(np.uint8) | |
height, width = video_np.shape[1:3] | |
out = cv2.VideoWriter( | |
output_path, cv2.VideoWriter_fourcc(*"mp4v"), frame_rate, (width, height) | |
) | |
for frame in video_np[..., ::-1]: | |
out.write(frame) | |
out.release() | |
del images | |
del video_np | |
torch.cuda.empty_cache() | |
return output_path | |
def generate_video_from_image( | |
image_path, | |
prompt, | |
enhance_prompt_toggle, | |
negative_prompt, | |
frame_rate, | |
seed, | |
num_inference_steps, | |
guidance_scale, | |
height, | |
width, | |
num_frames, | |
progress=gr.Progress(), | |
): | |
if not image_path: | |
raise gr.Error("μ λ ₯ μ΄λ―Έμ§λ₯Ό μ 곡ν΄μ£ΌμΈμ.", duration=5) | |
if len(prompt.strip()) < 50: | |
raise gr.Error( | |
"ν둬ννΈλ μ΅μ 50μ μ΄μμ΄μ΄μΌ ν©λλ€. λ μμΈν μ€λͺ μ μ 곡ν΄μ£ΌμΈμ.", | |
duration=5, | |
) | |
# ν둬ννΈ κ°μ μ΄ νμ±νλ κ²½μ° | |
if enhance_prompt_toggle: | |
prompt = enhance_prompt(prompt, "i2v") | |
# Translate Korean prompts to English | |
prompt = translate_korean_prompt(prompt) | |
negative_prompt = translate_korean_prompt(negative_prompt) | |
# κΈ°λ³Έκ° μ€μ | |
height = height or 320 | |
width = width or 512 | |
num_frames = num_frames or 257 | |
frame_rate = frame_rate or 25 | |
seed = seed or 171198 | |
num_inference_steps = num_inference_steps or 41 | |
guidance_scale = guidance_scale or 4.0 | |
# μ΄λ―Έμ§ λ‘λ λ° μ μ²λ¦¬ | |
media_items = ( | |
load_image_to_tensor_with_resize(image_path, height, width).to(device).detach() | |
) | |
sample = { | |
"prompt": prompt, | |
"prompt_attention_mask": None, | |
"negative_prompt": negative_prompt, | |
"negative_prompt_attention_mask": None, | |
"media_items": media_items, | |
} | |
generator = torch.Generator(device="cpu").manual_seed(seed) | |
def gradio_progress_callback(self, step, timestep, kwargs): | |
progress((step + 1) / num_inference_steps) | |
try: | |
with torch.no_grad(): | |
images = pipeline( | |
num_inference_steps=num_inference_steps, | |
num_images_per_prompt=1, | |
guidance_scale=guidance_scale, | |
generator=generator, | |
output_type="pt", | |
height=height, | |
width=width, | |
num_frames=num_frames, | |
frame_rate=frame_rate, | |
**sample, | |
is_video=True, | |
vae_per_channel_normalize=True, | |
conditioning_method=ConditioningMethod.FIRST_FRAME, | |
mixed_precision=True, | |
callback_on_step_end=gradio_progress_callback, | |
).images | |
output_path = tempfile.mktemp(suffix=".mp4") | |
video_np = images.squeeze(0).permute(1, 2, 3, 0).cpu().float().numpy() | |
video_np = (video_np * 255).astype(np.uint8) | |
height, width = video_np.shape[1:3] | |
out = cv2.VideoWriter( | |
output_path, cv2.VideoWriter_fourcc(*"mp4v"), frame_rate, (width, height) | |
) | |
for frame in video_np[..., ::-1]: | |
out.write(frame) | |
out.release() | |
except Exception as e: | |
raise gr.Error( | |
f"λΉλμ€ μμ± μ€ μ€λ₯κ° λ°μνμ΅λλ€. λ€μ μλν΄μ£ΌμΈμ. μ€λ₯: {e}", | |
duration=5, | |
) | |
finally: | |
torch.cuda.empty_cache() | |
gc.collect() | |
if 'images' in locals(): | |
del images | |
if 'video_np' in locals(): | |
del video_np | |
if 'media_items' in locals(): | |
del media_items | |
return output_path | |
def create_advanced_options(): | |
with gr.Accordion("Step 4: Advanced Options (Optional)", open=False): | |
seed = gr.Slider( | |
label="Seed", | |
minimum=0, | |
maximum=1000000, | |
step=1, | |
value=171198 | |
) | |
inference_steps = gr.Slider( | |
label="4.2 Inference Steps", | |
minimum=1, | |
maximum=50, | |
step=1, | |
value=41, | |
visible=False | |
) | |
guidance_scale = gr.Slider( | |
label="4.3 Guidance Scale", | |
minimum=1.0, | |
maximum=5.0, | |
step=0.1, | |
value=4.0, | |
visible=False | |
) | |
height_slider = gr.Slider( | |
label="4.4 Height", | |
minimum=256, | |
maximum=1024, | |
step=64, | |
value=320, | |
visible=False, | |
) | |
width_slider = gr.Slider( | |
label="4.5 Width", | |
minimum=256, | |
maximum=1024, | |
step=64, | |
value=512, | |
visible=False, | |
) | |
num_frames_slider = gr.Slider( | |
label="4.5 Number of Frames", | |
minimum=1, | |
maximum=200, | |
step=1, | |
value=257, | |
visible=False, | |
) | |
return [ | |
seed, | |
inference_steps, | |
guidance_scale, | |
height_slider, | |
width_slider, | |
num_frames_slider, | |
] | |
system_prompt_scenario = """λΉμ μ μμ μ€ν¬λ¦½νΈμ λ§λ λ°°κ²½ μμμ μμ±νκΈ° μν ν둬ννΈ μ λ¬Έκ°μ λλ€. | |
μ£Όμ΄μ§ μ€ν¬λ¦½νΈμ λΆμκΈ°μ λ§₯λ½μ μκ°μ λ°°κ²½μΌλ‘ νννλ, λ€μ μμΉμ λ°λμ μ€μνμΈμ: | |
1. μ νμ΄λ μλΉμ€λ₯Ό μ§μ μ μΌλ‘ λ¬μ¬νμ§ λ§ κ² | |
2. μ€ν¬λ¦½νΈμ κ°μ±κ³Ό ν€μ€λ§€λλ₯Ό νννλ λ°°κ²½ μμμ μ§μ€ν κ² | |
3. 5κ° μΉμ μ΄ νλμ μ΄μΌκΈ°μ²λΌ μμ°μ€λ½κ² μ°κ²°λλλ‘ ν κ² | |
4. μΆμμ μ΄κ³ μμ μ μΈ μκ° ννμ νμ©ν κ² | |
κ° μΉμ λ³ ν둬ννΈ μμ± κ°μ΄λ: | |
1. λ°°κ²½ λ° νμμ±: μ£Όμ μ μ λ°μ μΈ λΆμκΈ°λ₯Ό νννλ λ°°κ²½ μ¬ | |
2. λ¬Έμ μ κΈ°: κΈ΄μ₯κ°μ΄λ κ°λ±μ μμνλ λΆμκΈ° μλ λ°°κ²½ | |
3. ν΄κ²°μ± μ μ: ν¬λ§μ μ΄κ³ λ°μ ν€μ λ°°κ²½ μ ν | |
4. λ³Έλ‘ : μμ κ° μκ³ μ λ’°λλ₯Ό λμ΄λ λ°°κ²½ | |
5. κ²°λ‘ : μν©νΈ μλ λ§λ¬΄λ¦¬λ₯Ό μν μλμ μΈ λ°°κ²½ | |
λͺ¨λ μΉμ μ΄ μΌκ΄λ μ€νμΌκ³Ό ν€μ μ μ§νλ©΄μλ μμ°μ€λ½κ² μ΄μ΄μ§λλ‘ κ΅¬μ±νμΈμ. | |
κ° μΉμ μ ν둬ννΈ μμ±μ λ°λμ λ€μ ꡬ쑰μ λ§κ² κ°μ ν΄μ£ΌμΈμ: | |
1. μ£Όμ λμμ λͺ νν ν λ¬Έμ₯μΌλ‘ μμ | |
2. ꡬ체μ μΈ λμκ³Ό μ μ€μ²λ₯Ό μκ° μμλλ‘ μ€λͺ | |
3. μΊλ¦ν°/κ°μ²΄μ μΈλͺ¨λ₯Ό μμΈν λ¬μ¬ | |
4. λ°°κ²½κ³Ό νκ²½ μΈλΆ μ¬νμ ꡬ체μ μΌλ‘ ν¬ν¨ | |
5. μΉ΄λ©λΌ κ°λμ μμ§μμ λͺ μ | |
6. μ‘°λͺ κ³Ό μμμ μμΈν μ€λͺ | |
7. λ³νλ κ°μμ€λ¬μ΄ μ¬κ±΄μ μμ°μ€λ½κ² ν¬ν¨ | |
λͺ¨λ μ€λͺ μ νλμ μμ°μ€λ¬μ΄ λ¬Έλ¨μΌλ‘ μμ±νκ³ , | |
촬μ κ°λ μ΄ μ΄¬μ λͺ©λ‘μ μ€λͺ νλ κ²μ²λΌ ꡬ체μ μ΄κ³ μκ°μ μΌλ‘ μμ±νμΈμ. | |
200λ¨μ΄λ₯Ό λμ§ μλλ‘ νλ, μ΅λν μμΈνκ² μμ±νμΈμ. | |
""" | |
def analyze_scenario(scenario): | |
"""μλ리μ€λ₯Ό λΆμνμ¬ κ° μΉμ λ³ λ°°κ²½ μμμ© ν둬ννΈ μμ±""" | |
try: | |
# κ° μΉμ λ³ ν둬ννΈ μμ±μ μν λ©μμ§ κ΅¬μ± | |
section_prompts = [] | |
for section_num in range(1, 6): | |
section_descriptions = { | |
1: "λ°°κ²½ λ° νμμ±: μ£Όμ μ μ λ°μ μΈ λΆμκΈ°λ₯Ό νννλ λ°°κ²½ μ¬", | |
2: "ν₯λ―Έ μ λ°: κΈ΄μ₯κ°μ΄λ κ°λ±μ μμνλ λΆμκΈ° μλ λ°°κ²½", | |
3: "ν΄κ²°μ± μ μ: ν¬λ§μ μ΄κ³ λ°μ ν€μ λ°°κ²½ μ ν", | |
4: "λ³Έλ‘ : μμ κ° μκ³ μ λ’°λλ₯Ό λμ΄λ λ°°κ²½", | |
5: "κ²°λ‘ : μν©νΈ μλ λ§λ¬΄λ¦¬λ₯Ό μν μλμ μΈ λ°°κ²½" | |
} | |
messages = [ | |
{"role": "system", "content": system_prompt_scenario}, | |
{"role": "user", "content": f""" | |
λ€μ μ€ν¬λ¦½νΈμ {section_num}λ²μ§Έ μΉμ ({section_descriptions[section_num]})μ λν | |
λ°°κ²½ μμ ν둬ννΈλ₯Ό μμ±ν΄μ£ΌμΈμ. | |
μ€ν¬λ¦½νΈ: | |
{scenario} | |
μ£Όμμ¬ν: | |
1. ν΄λΉ μΉμ μ νΉμ±({section_descriptions[section_num]})μ λ§λ λΆμκΈ°μ ν€μ λ°μνμΈμ. | |
2. μ§μ μ μΈ μ ν/μλΉμ€ λ¬μ¬λ νΌνκ³ , κ°μ±μ μ΄κ³ μμ μ μΈ λ°°κ²½ μμμ μ§μ€νμΈμ. | |
3. λ€μ ꡬ쑰λ₯Ό λ°λμ ν¬ν¨νμΈμ: | |
- μ£Όμ λμμ λͺ νν ν λ¬Έμ₯μΌλ‘ μμ | |
- ꡬ체μ μΈ λμκ³Ό μ μ€μ²λ₯Ό μκ° μμλλ‘ μ€λͺ | |
- λ°°κ²½κ³Ό νκ²½ μΈλΆ μ¬νμ ꡬ체μ μΌλ‘ ν¬ν¨ | |
- μΉ΄λ©λΌ κ°λμ μμ§μμ λͺ μ | |
- μ‘°λͺ κ³Ό μμμ μμΈν μ€λͺ | |
- λ³νλ κ°μμ€λ¬μ΄ μ¬κ±΄μ μμ°μ€λ½κ² ν¬ν¨"""} | |
] | |
response = client.chat.completions.create( | |
model="gpt-4-1106-preview", | |
messages=messages, | |
max_tokens=1000, | |
temperature=0.7 | |
) | |
section_prompt = response.choices[0].message.content.strip() | |
section_prompts.append(f"{section_num}. {section_prompt}") | |
# API μμ² μ¬μ΄μ 짧μ λλ μ΄ μΆκ° | |
time.sleep(1) | |
return section_prompts | |
except Exception as e: | |
print(f"Error during scenario analysis: {e}") | |
return ["Error occurred during analysis"] * 5 | |
def generate_section_video(prompt, preset, section_number=1, base_seed=171198, progress=gr.Progress()): | |
"""κ° μΉμ μ λΉλμ€ μμ±""" | |
try: | |
if not prompt or len(prompt.strip()) < 50: | |
raise gr.Error("ν둬ννΈλ μ΅μ 50μ μ΄μμ΄μ΄μΌ ν©λλ€.") | |
if not preset: | |
raise gr.Error("ν΄μλ ν리μ μ μ νν΄μ£ΌμΈμ.") | |
selected = next((item for item in preset_options if item["label"] == preset), None) | |
if not selected: | |
raise gr.Error("μ¬λ°λ₯΄μ§ μμ ν리μ μ λλ€.") | |
section_seed = base_seed + section_number | |
return generate_video_from_text( | |
prompt=prompt, | |
enhance_prompt_toggle=False, # μΉμ μμ±μλ ν둬ννΈ μ¦κ° λΉνμ±ν | |
negative_prompt="low quality, worst quality, deformed, distorted, warped", | |
frame_rate=25, | |
seed=section_seed, | |
num_inference_steps=41, | |
guidance_scale=4.0, | |
height=selected["height"], | |
width=selected["width"], | |
num_frames=selected["num_frames"], | |
progress=progress | |
) | |
except Exception as e: | |
print(f"Error in section {section_number}: {e}") | |
raise gr.Error(f"μΉμ {section_number} μμ± μ€ μ€λ₯: {str(e)}") | |
finally: | |
torch.cuda.empty_cache() | |
gc.collect() | |
def generate_single_section_prompt(scenario, section_number): | |
"""κ°λ³ μΉμ μ λν ν둬ννΈ μμ±""" | |
section_descriptions = { | |
1: "λ°°κ²½ λ° νμμ±: μ£Όμ μ μ λ°μ μΈ λΆμκΈ°λ₯Ό νννλ λ°°κ²½ μ¬", | |
2: "ν₯λ―Έ μ λ°: ν₯λ―Έλ₯Ό μ λ°νκ³ κΈ°λκ°μ μ¦νμν€λ λ°°κ²½", | |
3: "ν΄κ²°μ± μ μ: ν¬λ§μ μ΄κ³ λ°μ ν€μ λ°°κ²½ μ ν", | |
4: "λ³Έλ‘ : μμ κ° μκ³ μ λ’°λλ₯Ό λμ΄λ λ°°κ²½", | |
5: "κ²°λ‘ : μν©νΈ μλ λ§λ¬΄λ¦¬λ₯Ό μν μλμ μΈ λ°°κ²½" | |
} | |
messages = [ | |
{"role": "system", "content": system_prompt_scenario}, | |
{"role": "user", "content": f""" | |
λ€μ μ€ν¬λ¦½νΈμ {section_number}λ²μ§Έ μΉμ ({section_descriptions[section_number]})μ λν | |
λ°°κ²½ μμ ν둬ννΈλ₯Ό μμ±ν΄μ£ΌμΈμ. | |
μ€ν¬λ¦½νΈ: | |
{scenario} | |
μ£Όμμ¬ν: | |
1. ν΄λΉ μΉμ μ νΉμ±({section_descriptions[section_number]})μ λ§λ λΆμκΈ°μ ν€μ λ°μνμΈμ. | |
2. μ§μ μ μΈ μ ν/μλΉμ€ λ¬μ¬λ νΌνκ³ , κ°μ±μ μ΄κ³ μμ μ μΈ λ°°κ²½ μμμ μ§μ€νμΈμ. | |
3. λ€μ ꡬ쑰λ₯Ό λ°λμ ν¬ν¨νμΈμ: | |
- μ£Όμ λμμ λͺ νν ν λ¬Έμ₯μΌλ‘ μμ | |
- ꡬ체μ μΈ λμκ³Ό μ μ€μ²λ₯Ό μκ° μμλλ‘ μ€λͺ | |
- λ°°κ²½κ³Ό νκ²½ μΈλΆ μ¬νμ ꡬ체μ μΌλ‘ ν¬ν¨ | |
- μΉ΄λ©λΌ κ°λμ μμ§μμ λͺ μ | |
- μ‘°λͺ κ³Ό μμμ μμΈν μ€λͺ | |
- λ³νλ κ°μμ€λ¬μ΄ μ¬κ±΄μ μμ°μ€λ½κ² ν¬ν¨"""} | |
] | |
try: | |
response = client.chat.completions.create( | |
model="gpt-4-1106-preview", | |
messages=messages, | |
max_tokens=1000, # ν ν° μ μ¦κ° | |
temperature=0.7 | |
) | |
generated_prompt = response.choices[0].message.content.strip() | |
return f"{section_number}. {generated_prompt}" | |
except Exception as e: | |
print(f"Error during prompt generation for section {section_number}: {e}") | |
return f"Error occurred during prompt generation for section {section_number}" | |
# λΉλμ€ κ²°ν© ν¨μ μΆκ° | |
def combine_videos(video_paths, output_path): | |
"""μ¬λ¬ λΉλμ€λ₯Ό νλλ‘ κ²°ν©""" | |
if not all(video_paths): | |
raise gr.Error("λͺ¨λ μΉμ μ μμμ΄ μμ±λμ΄μΌ ν©λλ€.") | |
try: | |
# 첫 λ²μ§Έ λΉλμ€μ μμ± κ°μ Έμ€κΈ° | |
cap = cv2.VideoCapture(video_paths[0]) | |
fps = int(cap.get(cv2.CAP_PROP_FPS)) | |
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) | |
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) | |
cap.release() | |
# μΆλ ₯ λΉλμ€ μ€μ | |
fourcc = cv2.VideoWriter_fourcc(*'mp4v') | |
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height)) | |
# κ° λΉλμ€ μμ°¨μ μΌλ‘ κ²°ν© | |
for video_path in video_paths: | |
if video_path and os.path.exists(video_path): | |
cap = cv2.VideoCapture(video_path) | |
while True: | |
ret, frame = cap.read() | |
if not ret: | |
break | |
out.write(frame) | |
cap.release() | |
out.release() | |
return output_path | |
except Exception as e: | |
raise gr.Error(f"λΉλμ€ κ²°ν© μ€ μ€λ₯ λ°μ: {e}") | |
def merge_section_videos(section1, section2, section3, section4, section5): | |
"""μΉμ λΉλμ€λ€μ νλλ‘ κ²°ν©""" | |
videos = [] | |
# κ° μΉμ λΉλμ€ νμΈ λ° μ²λ¦¬ | |
for i, video_path in enumerate([section1, section2, section3, section4, section5], 1): | |
if video_path: | |
if os.path.exists(video_path): | |
try: | |
# λΉλμ€ νμΌ κ²μ¦ | |
cap = cv2.VideoCapture(video_path) | |
if cap.isOpened(): | |
videos.append(video_path) | |
cap.release() | |
else: | |
raise gr.Error(f"μΉμ {i}μ μμ νμΌμ΄ μμλμκ±°λ μ½μ μ μμ΅λλ€.") | |
except Exception as e: | |
raise gr.Error(f"μΉμ {i} μμ μ²λ¦¬ μ€ μ€λ₯: {str(e)}") | |
else: | |
raise gr.Error(f"μΉμ {i}μ μμ νμΌμ μ°Ύμ μ μμ΅λλ€.") | |
else: | |
raise gr.Error(f"μΉμ {i}μ μμμ΄ μμ΅λλ€.") | |
if not videos: | |
raise gr.Error("κ²°ν©ν μμμ΄ μμ΅λλ€.") | |
try: | |
output_path = tempfile.mktemp(suffix=".mp4") | |
# 첫 λ²μ§Έ λΉλμ€μ μμ± κ°μ Έμ€κΈ° | |
cap = cv2.VideoCapture(videos[0]) | |
fps = int(cap.get(cv2.CAP_PROP_FPS)) | |
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) | |
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) | |
cap.release() | |
# μΆλ ₯ λΉλμ€ μ€μ | |
fourcc = cv2.VideoWriter_fourcc(*'mp4v') | |
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height)) | |
# κ° λΉλμ€ μμ°¨μ μΌλ‘ κ²°ν© | |
for video_path in videos: | |
cap = cv2.VideoCapture(video_path) | |
while True: | |
ret, frame = cap.read() | |
if not ret: | |
break | |
# νλ μ ν¬κΈ°κ° λ€λ₯Έ κ²½μ° λ¦¬μ¬μ΄μ¦ | |
if frame.shape[:2] != (height, width): | |
frame = cv2.resize(frame, (width, height)) | |
out.write(frame) | |
cap.release() | |
out.release() | |
print(f"Successfully merged {len(videos)} videos") | |
return output_path | |
except Exception as e: | |
raise gr.Error(f"λΉλμ€ κ²°ν© μ€ μ€λ₯ λ°μ: {e}") | |
def generate_script(topic): | |
"""μ£Όμ μ λ§λ μ€ν¬λ¦½νΈ μμ±""" | |
if not topic: | |
return "μ£Όμ λ₯Ό μ λ ₯ν΄μ£ΌμΈμ." | |
messages = [ | |
{"role": "system", "content": """λΉμ μ μμ μ€ν¬λ¦½νΈ μμ± μ λ¬Έκ°μ λλ€. | |
μ£Όμ΄μ§ μ£Όμ λ‘ λ€μ ꡬ쑰μ λ§λ 5κ° μΉμ μ μ€ν¬λ¦½νΈλ₯Ό μμ±ν΄μ£ΌμΈμ: | |
1. λ°°κ²½ λ° νμμ±: μ£Όμ μκ°μ μμ²μμ ν₯λ―Έ μ λ° | |
2. ν₯λ―Έ μ λ°: ꡬ체μ μΈ λ΄μ© μ κ°μ νΈκΈ°μ¬ μκ·Ή | |
3. ν΄κ²°μ± μ μ: ν΅μ¬ λ΄μ©κ³Ό ν΄κ²°λ°©μ μ μ | |
4. λ³Έλ‘ : μμΈν μ€λͺ κ³Ό μ₯μ λΆκ° | |
5. κ²°λ‘ : ν΅μ¬ λ©μμ§ κ°μ‘°μ νλ μ λ | |
κ° μΉμ μ μμ°μ€λ½κ² μ°κ²°λμ΄μΌ νλ©°, | |
μ 체μ μΌλ‘ μΌκ΄λ ν€κ³Ό λΆμκΈ°λ₯Ό μ μ§νλ©΄μλ | |
μμ²μμ κ΄μ¬μ λκΉμ§ μ μ§ν μ μλλ‘ μμ±ν΄μ£ΌμΈμ."""}, | |
{"role": "user", "content": f"λ€μ μ£Όμ λ‘ μμ μ€ν¬λ¦½νΈλ₯Ό μμ±ν΄μ£ΌμΈμ: {topic}"} | |
] | |
try: | |
response = client.chat.completions.create( | |
model="gpt-4-1106-preview", | |
messages=messages, | |
max_tokens=2000, | |
temperature=0.7 | |
) | |
return response.choices[0].message.content.strip() | |
except Exception as e: | |
print(f"Error during script generation: {e}") | |
return "μ€ν¬λ¦½νΈ μμ± μ€ μ€λ₯κ° λ°μνμ΅λλ€." | |
def cleanup(): | |
"""λ©λͺ¨λ¦¬ μ 리 ν¨μ""" | |
torch.cuda.empty_cache() | |
gc.collect() | |
with gr.Blocks(theme="Yntec/HaleyCH_Theme_Orange") as iface: | |
# State λ³μλ€μ μ΄κΈ°ν | |
txt2vid_current_height = gr.State(value=320) | |
txt2vid_current_width = gr.State(value=512) | |
txt2vid_current_num_frames = gr.State(value=257) | |
img2vid_current_height = gr.State(value=320) | |
img2vid_current_width = gr.State(value=512) | |
img2vid_current_num_frames = gr.State(value=257) | |
with gr.Tabs(): | |
# Text to Video Tab | |
with gr.TabItem("ν μ€νΈλ‘ λΉλμ€ λ§λ€κΈ°"): | |
with gr.Row(): | |
with gr.Column(): | |
txt2vid_prompt = gr.Textbox( | |
label="Step 1: ν둬ννΈ μ λ ₯", | |
placeholder="μμ±νκ³ μΆμ λΉλμ€λ₯Ό μ€λͺ νμΈμ (μ΅μ 50μ)...", | |
value="κ·μ¬μ΄ κ³ μμ΄", | |
lines=5, | |
) | |
txt2vid_enhance_toggle = Toggle( | |
label="ν둬ννΈ μ¦κ°", | |
value=False, | |
interactive=True, | |
) | |
txt2vid_negative_prompt = gr.Textbox( | |
label="Step 2: λ€κ±°ν°λΈ ν둬ννΈ μ λ ₯", | |
placeholder="λΉλμ€μμ μνμ§ μλ μμλ₯Ό μ€λͺ νμΈμ...", | |
value="low quality, worst quality, deformed, distorted, warped, motion smear, motion artifacts, fused fingers, incorrect anatomy, strange hands, unattractive", | |
lines=2, | |
visible=False | |
) | |
txt2vid_preset = gr.Dropdown( | |
choices=[p["label"] for p in preset_options], | |
value="[16:9] 512x320, 10.3μ΄", | |
label="Step 2: ν΄μλ ν리μ μ ν", | |
) | |
txt2vid_frame_rate = gr.Slider( | |
label="Step 3: νλ μ λ μ΄νΈ", | |
minimum=21, | |
maximum=30, | |
step=1, | |
value=25, | |
visible=False | |
) | |
txt2vid_advanced = create_advanced_options() | |
txt2vid_generate = gr.Button( | |
"Step 3: λΉλμ€ μμ±", | |
variant="primary", | |
size="lg", | |
) | |
with gr.Column(): | |
txt2vid_output = gr.Video(label="μμ±λ λΉλμ€") | |
# Image to Video Tab | |
with gr.TabItem("μ΄λ―Έμ§λ‘ λΉλμ€ λ§λ€κΈ°"): | |
with gr.Row(): | |
with gr.Column(): | |
img2vid_image = gr.Image( | |
type="filepath", | |
label="Step 1: μ λ ₯ μ΄λ―Έμ§ μ λ‘λ", | |
elem_id="image_upload", | |
) | |
img2vid_prompt = gr.Textbox( | |
label="Step 2: ν둬ννΈ μ λ ₯", | |
placeholder="μ΄λ―Έμ§λ₯Ό μ΄λ»κ² μ λλ©μ΄μ νν μ§ μ€λͺ νμΈμ (μ΅μ 50μ)...", | |
value="κ·μ¬μ΄ κ³ μμ΄", | |
lines=5, | |
) | |
img2vid_enhance_toggle = Toggle( | |
label="ν둬ννΈ μ¦κ°", | |
value=False, | |
interactive=True, | |
) | |
img2vid_negative_prompt = gr.Textbox( | |
label="Step 3: λ€κ±°ν°λΈ ν둬ννΈ μ λ ₯", | |
placeholder="λΉλμ€μμ μνμ§ μλ μμλ₯Ό μ€λͺ νμΈμ...", | |
value="low quality, worst quality, deformed, distorted, warped, motion smear, motion artifacts, fused fingers, incorrect anatomy, strange hands, unattractive", | |
lines=2, | |
visible=False | |
) | |
img2vid_preset = gr.Dropdown( | |
choices=[p["label"] for p in preset_options], | |
value="[16:9] 512x320, 10.3μ΄", | |
label="Step 3: ν΄μλ ν리μ μ ν", | |
) | |
img2vid_frame_rate = gr.Slider( | |
label="Step 4: νλ μ λ μ΄νΈ", | |
minimum=21, | |
maximum=30, | |
step=1, | |
value=25, | |
visible=False | |
) | |
img2vid_advanced = create_advanced_options() | |
img2vid_generate = gr.Button( | |
"Step 4: λΉλμ€ μμ±", | |
variant="primary", | |
size="lg", | |
) | |
with gr.Column(): | |
img2vid_output = gr.Video(label="μμ±λ λΉλμ€") | |
# Scenario Tab | |
with gr.TabItem("μλ리μ€λ‘ λΉλμ€ λ§λ€κΈ°(μνΌ)"): | |
with gr.Row(): | |
with gr.Column(scale=1): | |
script_topic = gr.Textbox( | |
label="μ€ν¬λ¦½νΈ μμ±", | |
placeholder="κ²¨μΈ μΌλ³Έ μ¨μ² μ¬νμ μ£Όμ λ‘ λ°μ λλμΌλ‘ μ€ν¬λ¦½νΈ μμ±νλΌ", | |
lines=2 | |
) | |
generate_script_btn = gr.Button("μ€ν¬λ¦½νΈ μμ±", variant="primary") | |
scenario_input = gr.Textbox( | |
label="μμ μ€ν¬λ¦½νΈ μ λ ₯", | |
placeholder="μ 체 μλ리μ€λ₯Ό μ λ ₯νμΈμ...", | |
lines=10 | |
) | |
scenario_preset = gr.Dropdown( | |
choices=[p["label"] for p in preset_options], | |
value="[16:9] 512x320, 10.3μ΄", | |
label="νλ©΄ ν¬κΈ° μ ν" | |
) | |
analyze_btn = gr.Button("μλλ¦¬μ€ λΆμ λ° ν둬ννΈ μμ±", variant="primary") | |
with gr.Column(scale=2): | |
with gr.Row(): | |
# μΉμ 1 | |
with gr.Column(): | |
section1_prompt = gr.Textbox( | |
label="1. λ°°κ²½ λ° νμμ±", | |
lines=4 | |
) | |
with gr.Row(): | |
section1_regenerate = gr.Button("π ν둬ννΈ μμ±") | |
section1_generate = gr.Button("π μμ μμ±") | |
section1_video = gr.Video(label="μΉμ 1 μμ") | |
# μΉμ 2 | |
with gr.Column(): | |
section2_prompt = gr.Textbox( | |
label="2. ν₯λ―Έ μ λ°", | |
lines=4 | |
) | |
with gr.Row(): | |
section2_regenerate = gr.Button("π ν둬ννΈ μμ±") | |
section2_generate = gr.Button("π μμ μμ±") | |
section2_video = gr.Video(label="μΉμ 2 μμ") | |
with gr.Row(): | |
# μΉμ 3 | |
with gr.Column(): | |
section3_prompt = gr.Textbox( | |
label="3. ν΄κ²°μ± μ μ", | |
lines=4 | |
) | |
with gr.Row(): | |
section3_regenerate = gr.Button("π ν둬ννΈ μμ±") | |
section3_generate = gr.Button("π μμ μμ±") | |
section3_video = gr.Video(label="μΉμ 3 μμ") | |
# μΉμ 4 | |
with gr.Column(): | |
section4_prompt = gr.Textbox( | |
label="4. λ³Έλ‘ ", | |
lines=4 | |
) | |
with gr.Row(): | |
section4_regenerate = gr.Button("π ν둬ννΈ μμ±") | |
section4_generate = gr.Button("π μμ μμ±") | |
section4_video = gr.Video(label="μΉμ 4 μμ") | |
with gr.Row(): | |
# μΉμ 5 | |
with gr.Column(): | |
section5_prompt = gr.Textbox( | |
label="5. κ²°λ‘ λ° κ°μ‘°", | |
lines=4 | |
) | |
with gr.Row(): | |
section5_regenerate = gr.Button("π ν둬ννΈ μμ±") | |
section5_generate = gr.Button("π μμ μμ±") | |
section5_video = gr.Video(label="μΉμ 5 μμ") | |
# ν΅ν© μμ μΉμ | |
with gr.Row(): | |
with gr.Column(scale=1): | |
merge_videos_btn = gr.Button("ν΅ν© μμ μμ±", variant="primary", size="lg") | |
with gr.Column(scale=2): | |
with gr.Row(): | |
merged_video_output = gr.Video(label="ν΅ν© μμ") | |
# Text to Video Tab handlers | |
txt2vid_preset.change( | |
fn=preset_changed, | |
inputs=[txt2vid_preset], | |
outputs=[ | |
txt2vid_current_height, | |
txt2vid_current_width, | |
txt2vid_current_num_frames, | |
txt2vid_advanced[3], # height_slider | |
txt2vid_advanced[4], # width_slider | |
txt2vid_advanced[5], # num_frames_slider | |
] | |
) | |
txt2vid_enhance_toggle.change( | |
fn=update_prompt_t2v, | |
inputs=[txt2vid_prompt, txt2vid_enhance_toggle], | |
outputs=txt2vid_prompt | |
) | |
txt2vid_generate.click( | |
fn=generate_video_from_text, | |
inputs=[ | |
txt2vid_prompt, | |
txt2vid_enhance_toggle, | |
txt2vid_negative_prompt, | |
txt2vid_frame_rate, | |
txt2vid_advanced[0], # seed | |
txt2vid_advanced[1], # inference_steps | |
txt2vid_advanced[2], # guidance_scale | |
txt2vid_current_height, | |
txt2vid_current_width, | |
txt2vid_current_num_frames, | |
], | |
outputs=txt2vid_output, | |
) | |
# Image to Video Tab handlers | |
img2vid_preset.change( | |
fn=preset_changed, | |
inputs=[img2vid_preset], | |
outputs=[ | |
img2vid_current_height, | |
img2vid_current_width, | |
img2vid_current_num_frames, | |
img2vid_advanced[3], # height_slider | |
img2vid_advanced[4], # width_slider | |
img2vid_advanced[5], # num_frames_slider | |
] | |
) | |
img2vid_enhance_toggle.change( | |
fn=update_prompt_i2v, | |
inputs=[img2vid_prompt, img2vid_enhance_toggle], | |
outputs=img2vid_prompt | |
) | |
img2vid_generate.click( | |
fn=generate_video_from_image, | |
inputs=[ | |
img2vid_image, | |
img2vid_prompt, | |
img2vid_enhance_toggle, | |
img2vid_negative_prompt, | |
img2vid_frame_rate, | |
img2vid_advanced[0], # seed | |
img2vid_advanced[1], # inference_steps | |
img2vid_advanced[2], # guidance_scale | |
img2vid_current_height, | |
img2vid_current_width, | |
img2vid_current_num_frames, | |
], | |
outputs=img2vid_output, | |
) | |
# Scenario Tab handlers | |
generate_script_btn.click( | |
fn=generate_script, | |
inputs=[script_topic], | |
outputs=[scenario_input] | |
) | |
analyze_btn.click( | |
fn=analyze_scenario, | |
inputs=[scenario_input], | |
outputs=[ | |
section1_prompt, section2_prompt, section3_prompt, | |
section4_prompt, section5_prompt | |
] | |
) | |
# μΉμ λ³ ν둬ννΈ μ¬μμ± νΈλ€λ¬ | |
section1_regenerate.click( | |
fn=lambda x: generate_single_section_prompt(x, 1), | |
inputs=[scenario_input], | |
outputs=section1_prompt | |
) | |
section2_regenerate.click( | |
fn=lambda x: generate_single_section_prompt(x, 2), | |
inputs=[scenario_input], | |
outputs=section2_prompt | |
) | |
section3_regenerate.click( | |
fn=lambda x: generate_single_section_prompt(x, 3), | |
inputs=[scenario_input], | |
outputs=section3_prompt | |
) | |
section4_regenerate.click( | |
fn=lambda x: generate_single_section_prompt(x, 4), | |
inputs=[scenario_input], | |
outputs=section4_prompt | |
) | |
section5_regenerate.click( | |
fn=lambda x: generate_single_section_prompt(x, 5), | |
inputs=[scenario_input], | |
outputs=section5_prompt | |
) | |
# μΉμ λ³ λΉλμ€ μμ± νΈλ€λ¬ | |
section1_generate.click( | |
fn=lambda p, pr: generate_section_video(p, pr, 1), | |
inputs=[section1_prompt, scenario_preset], | |
outputs=section1_video | |
) | |
section2_generate.click( | |
fn=lambda p, pr: generate_section_video(p, pr, 2), | |
inputs=[section2_prompt, scenario_preset], | |
outputs=section2_video | |
) | |
section3_generate.click( | |
fn=lambda p, pr: generate_section_video(p, pr, 3), | |
inputs=[section3_prompt, scenario_preset], | |
outputs=section3_video | |
) | |
section4_generate.click( | |
fn=lambda p, pr: generate_section_video(p, pr, 4), | |
inputs=[section4_prompt, scenario_preset], | |
outputs=section4_video | |
) | |
section5_generate.click( | |
fn=lambda p, pr: generate_section_video(p, pr, 5), | |
inputs=[section5_prompt, scenario_preset], | |
outputs=section5_video | |
) | |
# ν΅ν© μμ μμ± νΈλ€λ¬ | |
merge_videos_btn.click( | |
fn=merge_section_videos, | |
inputs=[ | |
section1_video, | |
section2_video, | |
section3_video, | |
section4_video, | |
section5_video | |
], | |
outputs=merged_video_output | |
) | |
if __name__ == "__main__": | |
iface.queue(max_size=64, default_concurrency_limit=1, api_open=False).launch( | |
share=True, | |
show_api=False | |
) |