Spaces:
Build error
Build error
import spaces | |
import gc | |
import gradio as gr | |
import numpy as np | |
import os | |
from pathlib import Path | |
from diffusers import GGUFQuantizationConfig, HunyuanVideoPipeline, HunyuanVideoTransformer3DModel | |
from diffusers.utils import export_to_video | |
from huggingface_hub import snapshot_download | |
import torch | |
from PIL import Image | |
# Configuration | |
gc.collect() | |
torch.cuda.empty_cache() | |
torch.set_grad_enabled(False) | |
torch.backends.cudnn.deterministic = True | |
torch.backends.cudnn.benchmark = False | |
# Load base model | |
model_id = "hunyuanvideo-community/HunyuanVideo" | |
base_path = f"/home/user/app/{model_id}" | |
os.makedirs(base_path, exist_ok=True) | |
snapshot_download(repo_id=model_id, local_dir=base_path) | |
# Load transformer | |
ckp_path = Path(base_path) | |
gguf_filename = "hunyuan-video-t2v-720p-Q4_0.gguf" | |
transformer_path = f"https://huggingface.co/city96/HunyuanVideo-gguf/blob/main/{gguf_filename}" | |
transformer = HunyuanVideoTransformer3DModel.from_single_file( | |
transformer_path, | |
quantization_config=GGUFQuantizationConfig(compute_dtype=torch.bfloat16), | |
torch_dtype=torch.bfloat16, | |
).to('cuda') | |
# Initialize pipeline | |
pipe = HunyuanVideoPipeline.from_pretrained( | |
ckp_path, | |
transformer=transformer, | |
torch_dtype=torch.float16 | |
).to("cuda") | |
# Configure VAE | |
pipe.vae.enable_tiling() | |
pipe.vae.enable_slicing() | |
pipe.vae.eval() | |
# Available LoRAs in the TTV4ME repository | |
TTV4ME_Loras = { | |
"Top_Off.safetensors": "Top_Off.safetensors", | |
"huanyan_helper.safetensors": "huanyan_helper.safetensors", | |
"huanyan_helper_alpha.safetensors": "huanyan_helper_alpha.safetensors", | |
"hunyuan-t-solo-v1.0.safetensors": "hunyuan-t-solo-v1.0.safetensors", | |
"stripe_v2.safetensors": "stripe_v2.safetensors" | |
} | |
# Illustration Lora | |
ILLUSTRATION_LORA = "sergidev/IllustrationTTV" | |
ILLUSTRATION_LORA_NAME = "hunyuan_flat_color_v2.safetensors" | |
ILLUSTRATION_ADAPTER_NAME = "hyvid_lora_adapter" | |
# Load default LoRA adapters | |
pipe.load_lora_weights( | |
"Sergidev/TTV4ME", # Private repository | |
weight_name="stripe_v2.safetensors", | |
adapter_name="hunyuanvideo-lora", | |
token=os.environ.get("HF_TOKEN") # Access token from Space secrets | |
) | |
pipe.load_lora_weights( | |
"sergidev/IllustrationTTV", | |
weight_name="hunyuan_flat_color_v2.safetensors", | |
adapter_name="hyvid_lora_adapter" | |
) | |
# Set combined adapter weights | |
pipe.set_adapters(["hunyuanvideo-lora", "hyvid_lora_adapter"], [0.9, 0.8]) | |
# Memory cleanup | |
gc.collect() | |
torch.cuda.empty_cache() | |
MAX_SEED = np.iinfo(np.int32).max | |
MAX_IMAGE_SIZE = 1024 | |
def generate( | |
prompt, | |
uploaded_image, | |
height, | |
width, | |
num_frames, | |
num_inference_steps, | |
seed_value, | |
fps, | |
lora_names, | |
lora_weights, | |
progress=gr.Progress(track_tqdm=True) | |
): | |
with torch.cuda.device(0): | |
if seed_value == -1: | |
seed_value = torch.randint(0, MAX_SEED, (1,)).item() | |
generator = torch.Generator('cuda').manual_seed(seed_value) | |
# Handle image input | |
if uploaded_image: | |
init_image = Image.open(uploaded_image).convert("RGB").resize((width, height)) | |
if init_image.size != (width, height): | |
raise gr.Error("Uploaded image resolution must match specified width and height.") | |
else: | |
init_image = None | |
# Configure LoRA adapters | |
adapter_names = ["hyvid_lora_adapter"] # Always include the illustration Lora | |
adapter_weights = [0.8] # Illustration Lora weight | |
for i, lora_name in enumerate(lora_names): | |
if lora_name != "None": | |
adapter_names.append("ttv4me_" + lora_name.split('.')[0]) # Create unique adapter name | |
adapter_weights.append(lora_weights[i]) | |
# Check if the LoRA is already loaded, if not, load it | |
if not hasattr(pipe, "ttv4me_" + lora_name.split('.')[0]): | |
pipe.load_lora_weights( | |
"Sergidev/TTV4ME", # Private repository | |
weight_name=lora_name, | |
adapter_name="ttv4me_" + lora_name.split('.')[0], | |
token=os.environ.get("HF_TOKEN") # Access token from Space secrets | |
) | |
pipe.set_adapters(adapter_names, adapter_weights) | |
with torch.amp.autocast_mode.autocast('cuda', dtype=torch.bfloat16), torch.inference_mode(), torch.no_grad(): | |
output = pipe( | |
prompt=prompt, | |
image=init_image, | |
height=height, | |
width=width, | |
num_frames=num_frames, | |
num_inference_steps=num_inference_steps, | |
generator=generator, | |
).frames[0] | |
output_path = "output.mp4" | |
export_to_video(output, output_path, fps=fps) | |
torch.cuda.empty_cache() | |
gc.collect() | |
return output_path | |
def apply_preset(preset_name, *current_values): | |
if preset_name == "Higher Resolution": | |
return [608, 448, 24, 29, 12] | |
elif preset_name == "More Frames": | |
return [512, 320, 42, 27, 14] | |
return current_values | |
css = """ | |
#col-container { | |
margin: 0 auto; | |
max-width: 850px; | |
} | |
.dark-theme { | |
background-color: #1f1f1f; | |
color: #ffffff; | |
} | |
.container { | |
margin: 0 auto; | |
padding: 20px; | |
border-radius: 10px; | |
background-color: #2d2d2d; | |
box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1); | |
} | |
.title { | |
text-align: center; | |
margin-bottom: 1em; | |
color: #ffffff; | |
} | |
.description { | |
text-align: center; | |
margin-bottom: 2em; | |
color: #cccccc; | |
font-size: 0.95em; | |
line-height: 1.5; | |
} | |
.prompt-container { | |
background-color: #363636; | |
padding: 15px; | |
border-radius: 8px; | |
margin-bottom: 1em; | |
width: 100%; | |
} | |
.prompt-textbox { | |
min-height: 80px !important; | |
} | |
.preset-buttons { | |
display: flex; | |
gap: 10px; | |
justify-content: center; | |
margin-bottom: 1em; | |
} | |
.support-text { | |
text-align: center; | |
margin-top: 1em; | |
color: #cccccc; | |
font-size: 0.9em; | |
} | |
a { | |
color: #00a7e1; | |
text-decoration: none; | |
} | |
a:hover { | |
text-decoration: underline; | |
} | |
""" | |
with gr.Blocks(css=css, theme="dark") as demo: | |
with gr.Column(elem_id="col-container"): | |
gr.Markdown("# 🎬 Huanyan Studio", elem_classes=["title"]) | |
gr.Markdown( | |
"""Image-to-video, text-to-video, with multiple LORAS to use. | |
This space uses the 'hunyuan flat color v2' LORA by Motimalu to generate better 2d animated sequences. Prompt only handles 77 tokens. | |
If you find this useful, please consider giving the space a ❤️ and supporting me on [Ko-Fi](https://ko-fi.com/sergidev)!""", | |
elem_classes=["description"] | |
) | |
with gr.Column(elem_classes=["prompt-container"]): | |
prompt = gr.Textbox( | |
label="Prompt", | |
placeholder="Enter your prompt here (Include the terms 'flat color, no lineart, blending' for 2d illustration)", | |
show_label=False, | |
elem_classes=["prompt-textbox"], | |
lines=3 | |
) | |
with gr.Column(elem_classes=["prompt-container"]): | |
image_input = gr.Image(label="Upload Image (Optional)", image_types=["png", "jpg", "jpeg"]) | |
with gr.Row(): | |
run_button = gr.Button("🎨 Generate", variant="primary", size="lg") | |
with gr.Row(elem_classes=["preset-buttons"]): | |
preset_high_res = gr.Button("📺 Higher Resolution Preset") | |
preset_more_frames = gr.Button("🎞️ More Frames Preset") | |
with gr.Row(): | |
result = gr.Video(label="Generated Video") | |
with gr.Accordion("⚙️ Advanced Settings", open=False): | |
seed = gr.Slider( | |
label="Seed (-1 for random)", | |
minimum=-1, | |
maximum=MAX_SEED, | |
step=1, | |
value=-1, | |
) | |
with gr.Row(): | |
height = gr.Slider( | |
label="Height", | |
minimum=256, | |
maximum=MAX_IMAGE_SIZE, | |
step=16, | |
value=608, | |
) | |
width = gr.Slider( | |
label="Width", | |
minimum=256, | |
maximum=MAX_IMAGE_SIZE, | |
step=16, | |
value=448, | |
) | |
with gr.Row(): | |
num_frames = gr.Slider( | |
label="Number of frames to generate", | |
minimum=1.0, | |
maximum=257.0, | |
step=1, | |
value=24, | |
) | |
num_inference_steps = gr.Slider( | |
label="Number of inference steps", | |
minimum=1, | |
maximum=50, | |
step=1, | |
value=29, | |
) | |
fps = gr.Slider( | |
label="Frames per second", | |
minimum=1, | |
maximum=60, | |
step=1, | |
value=12, | |
) | |
# LoRA Selection | |
lora_names = gr.CheckboxGroup( | |
choices=list(TTV4ME_Loras.keys()), | |
label="Select TTV4ME LoRAs" | |
) | |
lora_weights = [] | |
for i in range(len(TTV4ME_Loras)): | |
lora_weights.append(gr.Slider( | |
label=f"Weight for LoRA {i + 1}", | |
minimum=0.0, | |
maximum=1.0, | |
step=0.05, | |
value=0.5, | |
visible=False # Initially hidden | |
)) | |
def update_lora_visibility(selected_loras): | |
visibility = [lora in selected_loras for lora in TTV4ME_Loras.keys()] | |
return visibility | |
lora_names.change( | |
update_lora_visibility, | |
inputs=[lora_names], | |
outputs=lora_weights | |
) | |
# Event handling | |
input_components = [prompt, image_input, height, width, num_frames, num_inference_steps, seed, fps, lora_names] | |
input_components.extend(lora_weights) | |
run_button.click( | |
fn=generate, | |
inputs=input_components, | |
outputs=[result], | |
) | |
# Preset button handlers | |
preset_high_res.click( | |
fn=lambda: apply_preset("Higher Resolution"), | |
outputs=[height, width, num_frames, num_inference_steps, fps] | |
) | |
preset_more_frames.click( | |
fn=lambda: apply_preset("More Frames"), | |
outputs=[height, width, num_frames, num_inference_steps, fps] | |