Spaces:
Running
on
Zero
Running
on
Zero
import spaces | |
import gc | |
import gradio as gr | |
import numpy as np | |
import os | |
from pathlib import Path | |
from diffusers import GGUFQuantizationConfig, HunyuanVideoPipeline, HunyuanVideoTransformer3DModel | |
from diffusers.utils import export_to_video | |
from huggingface_hub import snapshot_download | |
import torch | |
gc.collect() | |
torch.cuda.empty_cache() | |
torch.set_grad_enabled(False) | |
torch.backends.cudnn.deterministic = True | |
torch.backends.cudnn.benchmark = False | |
model_id = "hunyuanvideo-community/HunyuanVideo" | |
base_path = f"/home/user/app/{model_id}" | |
os.makedirs(base_path, exist_ok=True) | |
snapshot_download(repo_id=model_id, local_dir=base_path) | |
ckp_path = Path(base_path) | |
gguf_filename = "hunyuan-video-t2v-720p-Q4_0.gguf" | |
transformer_path = f"https://huggingface.co/city96/HunyuanVideo-gguf/blob/main/{gguf_filename}" | |
transformer = HunyuanVideoTransformer3DModel.from_single_file( | |
transformer_path, | |
quantization_config=GGUFQuantizationConfig(compute_dtype=torch.bfloat16), | |
torch_dtype=torch.bfloat16, | |
) | |
transformer = transformer.to('cuda') | |
pipe = HunyuanVideoPipeline.from_pretrained( | |
ckp_path, | |
transformer=transformer, | |
torch_dtype=torch.float16 | |
) | |
if pipe.text_encoder: | |
pipe.text_encoder = pipe.text_encoder.to('cuda') | |
pipe.text_encoder.eval() | |
pipe.vae.enable_tiling() | |
pipe.vae.enable_slicing() | |
pipe.vae.eval() | |
pipe.vae = pipe.vae.to("cuda") | |
pipe = pipe.to("cuda") | |
pipe.load_lora_weights( | |
"sergidev/IllustrationTTV", | |
weight_name="hunyuan_flat_color_v2.safetensors", | |
adapter_name="hyvid_lora_adapter" | |
) | |
pipe.set_adapters("hyvid_lora_adapter", 1.2) | |
gc.collect() | |
torch.cuda.empty_cache() | |
MAX_SEED = np.iinfo(np.int32).max | |
MAX_IMAGE_SIZE = 1024 | |
def generate( | |
prompt, | |
height, | |
width, | |
num_frames, | |
num_inference_steps, | |
seed_value, | |
fps, | |
progress=gr.Progress(track_tqdm=True) | |
): | |
with torch.cuda.device(0): | |
if seed_value == -1: | |
seed_value = torch.randint(0, MAX_SEED, (1,)).item() | |
generator = torch.Generator('cuda').manual_seed(seed_value) | |
with torch.amp.autocast_mode.autocast('cuda', dtype=torch.bfloat16), torch.inference_mode(), torch.no_grad(): | |
output = pipe( | |
prompt=prompt, | |
height=height, | |
width=width, | |
num_frames=num_frames, | |
num_inference_steps=num_inference_steps, | |
generator=generator, | |
).frames[0] | |
output_path = "output.mp4" | |
export_to_video(output, output_path, fps=fps) | |
torch.cuda.empty_cache() | |
gc.collect() | |
return output_path | |
def apply_preset(preset_name, *current_values): | |
if preset_name == "Higher Resolution": | |
return [608, 448, 24, 29, 12] | |
elif preset_name == "More Frames": | |
return [512, 320, 42, 27, 14] | |
return current_values | |
css = """ | |
#col-container { | |
margin: 0 auto; | |
max-width: 850px; | |
} | |
.dark-theme { | |
background-color: #1f1f1f; | |
color: #ffffff; | |
} | |
.container { | |
margin: 0 auto; | |
padding: 20px; | |
border-radius: 10px; | |
background-color: #2d2d2d; | |
box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1); | |
} | |
.title { | |
text-align: center; | |
margin-bottom: 1em; | |
color: #ffffff; | |
} | |
.description { | |
text-align: center; | |
margin-bottom: 2em; | |
color: #cccccc; | |
font-size: 0.95em; | |
line-height: 1.5; | |
} | |
.prompt-container { | |
background-color: #363636; | |
padding: 15px; | |
border-radius: 8px; | |
margin-bottom: 1em; | |
width: 100%; | |
} | |
.prompt-textbox { | |
min-height: 80px !important; | |
} | |
.preset-buttons { | |
display: flex; | |
gap: 10px; | |
justify-content: center; | |
margin-bottom: 1em; | |
} | |
.support-text { | |
text-align: center; | |
margin-top: 1em; | |
color: #cccccc; | |
font-size: 0.9em; | |
} | |
a { | |
color: #00a7e1; | |
text-decoration: none; | |
} | |
a:hover { | |
text-decoration: underline; | |
} | |
""" | |
with gr.Blocks(css=css, theme="dark") as demo: | |
with gr.Column(elem_id="col-container"): | |
gr.Markdown("# 🎬 Illustration TTV", elem_classes=["title"]) | |
gr.Markdown( | |
"""Transform your text descriptions into illustrative videos using HunyuanVideo for free! | |
This space uses the 'hunyuan flat color v2' LORA by Motimalu to generate better 2d animated sequences. Prompt only handles 77 tokens. | |
If you find this useful, please consider giving the space a ❤️ and supporting me on [Ko-Fi](https://ko-fi.com/sergidev)!""", | |
elem_classes=["description"] | |
) | |
with gr.Column(elem_classes=["prompt-container"]): | |
prompt = gr.Textbox( | |
label="Prompt", | |
placeholder="Enter your prompt here (Include the terms 'flat color, no lineart, blending' for 2d illustration)", | |
show_label=False, | |
elem_classes=["prompt-textbox"], | |
lines=3 | |
) | |
with gr.Row(): | |
run_button = gr.Button("🎨 Generate", variant="primary", size="lg") | |
with gr.Row(elem_classes=["preset-buttons"]): | |
preset_high_res = gr.Button("📺 Higher Resolution Preset") | |
preset_more_frames = gr.Button("🎞️ More Frames Preset") | |
with gr.Row(): | |
result = gr.Video(label="Generated Video") | |
with gr.Accordion("⚙️ Advanced Settings", open=False): | |
seed = gr.Slider( | |
label="Seed (-1 for random)", | |
minimum=-1, | |
maximum=MAX_SEED, | |
step=1, | |
value=-1, | |
) | |
with gr.Row(): | |
height = gr.Slider( | |
label="Height", | |
minimum=256, | |
maximum=MAX_IMAGE_SIZE, | |
step=16, | |
value=608, | |
) | |
width = gr.Slider( | |
label="Width", | |
minimum=256, | |
maximum=MAX_IMAGE_SIZE, | |
step=16, | |
value=448, | |
) | |
with gr.Row(): | |
num_frames = gr.Slider( | |
label="Number of frames to generate", | |
minimum=1.0, | |
maximum=257.0, | |
step=1, | |
value=24, | |
) | |
num_inference_steps = gr.Slider( | |
label="Number of inference steps", | |
minimum=1, | |
maximum=50, | |
step=1, | |
value=29, | |
) | |
fps = gr.Slider( | |
label="Frames per second", | |
minimum=1, | |
maximum=60, | |
step=1, | |
value=12, | |
) | |
# Event handling | |
run_button.click( | |
fn=generate, | |
inputs=[prompt, height, width, num_frames, num_inference_steps, seed, fps], | |
outputs=[result], | |
) | |
# Preset button handlers | |
preset_high_res.click( | |
fn=lambda: apply_preset("Higher Resolution"), | |
outputs=[height, width, num_frames, num_inference_steps, fps] | |
) | |
preset_more_frames.click( | |
fn=lambda: apply_preset("More Frames"), | |
outputs=[height, width, num_frames, num_inference_steps, fps] | |
) | |