Spaces:

Sergidev
/

Huanyan-Studio

Build error

App Files Files Community

Sergidev commited on Feb 21

Commit

0250a5e

1 Parent(s): eb9cf96

v2 p3

Browse files

Files changed (1) hide show

demo_app.py +334 -102

demo_app.py CHANGED Viewed

@@ -1,121 +1,353 @@
 import spaces
 import gradio as gr
 import numpy as np
-import torch
-from diffusers import HunyuanVideoPipeline
 from huggingface_hub import snapshot_download
 from PIL import Image
-import os
 # Configuration
-LORA_CHOICES = [
-    "Top_Off.safetensors",
-    "huanyan_helper.safetensors",
-    "huanyan_helper_alpha.safetensors",
-    "hunyuan-t-solo-v1.0.safetensors",
-    "stripe_v2.safetensors"
-]
-MAX_SEED = np.iinfo(np.int32).max
-MAX_IMAGE_SIZE = 1024
-# Initialize pipeline with ZeroGPU optimizations
-model_id = "Tencent-Hunyuan/Hunyuan-Video-Lite"
 pipe = HunyuanVideoPipeline.from_pretrained(
-    model_id,
     torch_dtype=torch.float16
 ).to("cuda")
-# Load LoRA adapters
-for lora_file in LORA_CHOICES:
-    pipe.load_lora_weights(
-        "Sergidev/TTV4ME",
-        weight_name=lora_file,
-        adapter_name=lora_file.split('.')[0],
-        token=os.environ.get("HF_TOKEN")
-    )
 @spaces.GPU(duration=300)
-def generate(prompt, image_input, height, width, num_frames,
-            num_inference_steps, seed_value, fps, selected_loras, lora_weights):
-    # Image validation
-    if image_input is not None:
-        img = Image.open(image_input)
-        if img.size != (width, height):
-            raise gr.Error(f"Image resolution {img.size} must match video resolution {width}x{height}")
-        prompt = f"Image prompt: {prompt}" if prompt else "Based on uploaded image"
-    # Set active LoRAs
-    active_adapters = []
-    adapter_weights = []
-    for idx, selected in enumerate(selected_loras):
-        if selected:
-            active_adapters.append(LORA_CHOICES[idx].split('.')[0])
-            adapter_weights.append(lora_weights[idx])
-    pipe.set_adapters(active_adapters, adapter_weights)
-    # Generate video
-    generator = torch.Generator('cuda').manual_seed(seed_value if seed_value != -1 else torch.seed())
-    if image_input:
-        output = pipe.image_to_video(
-            Image.open(image_input).convert("RGB"),
-            prompt=prompt,
-            height=height,
-            width=width,
-            num_frames=num_frames,
-            num_inference_steps=num_inference_steps,
-            generator=generator,
-        )
-    else:
-        output = pipe.text_to_video(
-            prompt=prompt,
-            height=height,
-            width=width,
-            num_frames=num_frames,
-            num_inference_steps=num_inference_steps,
-            generator=generator,
-        )
-    return output.frames[0]
-with gr.Blocks(theme="dark") as demo:
-    with gr.Column():
-        gr.Markdown("# 🎬 Hunyuan Studio")
-        with gr.Row():
-            with gr.Column():
-                prompt = gr.Textbox(label="Prompt")
-                image_input = gr.Image(label="Input Image", type="filepath")
-                with gr.Accordion("Advanced Settings"):
-                    resolution = gr.Dropdown(
-                        choices=["512x512", "768x768", "1024x1024"],
-                        value="512x512",
-                        label="Output Resolution"
                     )
-                    seed = gr.Slider(-1, MAX_SEED, value=-1, label="Seed")
-                    num_frames = gr.Slider(1, 257, 24, label="Frame Count")
-                    num_inference_steps = gr.Slider(1, 50, 25, label="Inference Steps")
-                    fps = gr.Slider(1, 60, 12, label="FPS")
-                    with gr.Accordion("LoRA Configuration"):
-                        lora_components = []
-                        for lora in LORA_CHOICES:
-                            lora_components.append(gr.Checkbox(label=f"Enable {lora}"))
-                            lora_components.append(gr.Slider(0.0, 1.0, 0.8, label=f"{lora} Weight"))
-                generate_btn = gr.Button("Generate Video")
-            with gr.Column():
-                output_video = gr.Video(label="Result")
-    generate_btn.click(
-        fn=generate,
-        inputs=[prompt, image_input,
-               gr.Number(512), gr.Number(512),  # Height/width from resolution
-               num_frames, num_inference_steps, seed, fps,
-               *lora_components],
-        outputs=output_video
-    )

 import spaces
+import gc
 import gradio as gr
 import numpy as np
+import os
+from pathlib import Path
+from diffusers import GGUFQuantizationConfig, HunyuanVideoPipeline, HunyuanVideoTransformer3DModel
+from diffusers.utils import export_to_video
 from huggingface_hub import snapshot_download
+import torch
 from PIL import Image
 # Configuration
+gc.collect()
+torch.cuda.empty_cache()
+torch.set_grad_enabled(False)
+torch.backends.cudnn.deterministic = True
+torch.backends.cudnn.benchmark = False
+# Load base model
+model_id = "hunyuanvideo-community/HunyuanVideo"
+base_path = f"/home/user/app/{model_id}"
+os.makedirs(base_path, exist_ok=True)
+snapshot_download(repo_id=model_id, local_dir=base_path)
+# Load transformer
+ckp_path = Path(base_path)
+gguf_filename = "hunyuan-video-t2v-720p-Q4_0.gguf"
+transformer_path = f"https://huggingface.co/city96/HunyuanVideo-gguf/blob/main/{gguf_filename}"
+transformer = HunyuanVideoTransformer3DModel.from_single_file(
+    transformer_path,
+    quantization_config=GGUFQuantizationConfig(compute_dtype=torch.bfloat16),
+    torch_dtype=torch.bfloat16,
+).to('cuda')
+# Initialize pipeline
 pipe = HunyuanVideoPipeline.from_pretrained(
+    ckp_path,
+    transformer=transformer,
     torch_dtype=torch.float16
 ).to("cuda")
+# Configure VAE
+pipe.vae.enable_tiling()
+pipe.vae.enable_slicing()
+pipe.vae.eval()
+# Available LoRAs in the TTV4ME repository
+TTV4ME_Loras = {
+    "Top_Off.safetensors": "Top_Off.safetensors",
+    "huanyan_helper.safetensors": "huanyan_helper.safetensors",
+    "huanyan_helper_alpha.safetensors": "huanyan_helper_alpha.safetensors",
+    "hunyuan-t-solo-v1.0.safetensors": "hunyuan-t-solo-v1.0.safetensors",
+    "stripe_v2.safetensors": "stripe_v2.safetensors"
+}
+# Illustration Lora
+ILLUSTRATION_LORA = "sergidev/IllustrationTTV"
+ILLUSTRATION_LORA_NAME = "hunyuan_flat_color_v2.safetensors"
+ILLUSTRATION_ADAPTER_NAME = "hyvid_lora_adapter"
+# Load default LoRA adapters
+pipe.load_lora_weights(
+    "Sergidev/TTV4ME",  # Private repository
+    weight_name="stripe_v2.safetensors",
+    adapter_name="hunyuanvideo-lora",
+    token=os.environ.get("HF_TOKEN")  # Access token from Space secrets
+)
+pipe.load_lora_weights(
+    "sergidev/IllustrationTTV",
+    weight_name="hunyuan_flat_color_v2.safetensors",
+    adapter_name="hyvid_lora_adapter"
+)
+# Set combined adapter weights
+pipe.set_adapters(["hunyuanvideo-lora", "hyvid_lora_adapter"], [0.9, 0.8])
+# Memory cleanup
+gc.collect()
+torch.cuda.empty_cache()
+MAX_SEED = np.iinfo(np.int32).max
+MAX_IMAGE_SIZE = 1024
 @spaces.GPU(duration=300)
+def generate(
+        prompt,
+        uploaded_image,
+        height,
+        width,
+        num_frames,
+        num_inference_steps,
+        seed_value,
+        fps,
+        lora_names,
+        lora_weights,
+        progress=gr.Progress(track_tqdm=True)
+):
+    with torch.cuda.device(0):
+        if seed_value == -1:
+            seed_value = torch.randint(0, MAX_SEED, (1,)).item()
+        generator = torch.Generator('cuda').manual_seed(seed_value)
+        # Handle image input
+        if uploaded_image:
+            init_image = Image.open(uploaded_image).convert("RGB").resize((width, height))
+            if init_image.size != (width, height):
+                raise gr.Error("Uploaded image resolution must match specified width and height.")
+        else:
+            init_image = None
+        # Configure LoRA adapters
+        adapter_names = ["hyvid_lora_adapter"]  # Always include the illustration Lora
+        adapter_weights = [0.8]  # Illustration Lora weight
+        for i, lora_name in enumerate(lora_names):
+            if lora_name != "None":
+                adapter_names.append("ttv4me_" + lora_name.split('.')[0])  # Create unique adapter name
+                adapter_weights.append(lora_weights[i])
+                # Check if the LoRA is already loaded, if not, load it
+                if not hasattr(pipe, "ttv4me_" + lora_name.split('.')[0]):
+                    pipe.load_lora_weights(
+                        "Sergidev/TTV4ME",  # Private repository
+                        weight_name=lora_name,
+                        adapter_name="ttv4me_" + lora_name.split('.')[0],
+                        token=os.environ.get("HF_TOKEN")  # Access token from Space secrets
                     )
+        pipe.set_adapters(adapter_names, adapter_weights)
+        with torch.amp.autocast_mode.autocast('cuda', dtype=torch.bfloat16), torch.inference_mode(), torch.no_grad():
+            output = pipe(
+                prompt=prompt,
+                image=init_image,
+                height=height,
+                width=width,
+                num_frames=num_frames,
+                num_inference_steps=num_inference_steps,
+                generator=generator,
+            ).frames[0]
+        output_path = "output.mp4"
+        export_to_video(output, output_path, fps=fps)
+        torch.cuda.empty_cache()
+        gc.collect()
+        return output_path
+def apply_preset(preset_name, *current_values):
+    if preset_name == "Higher Resolution":
+        return [608, 448, 24, 29, 12]
+    elif preset_name == "More Frames":
+        return [512, 320, 42, 27, 14]
+    return current_values
+css = """
+#col-container {
+    margin: 0 auto;
+    max-width: 850px;
+}
+.dark-theme {
+    background-color: #1f1f1f;
+    color: #ffffff;
+}
+.container {
+    margin: 0 auto;
+    padding: 20px;
+    border-radius: 10px;
+    background-color: #2d2d2d;
+    box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
+}
+.title {
+    text-align: center;
+    margin-bottom: 1em;
+    color: #ffffff;
+}
+.description {
+    text-align: center;
+    margin-bottom: 2em;
+    color: #cccccc;
+    font-size: 0.95em;
+    line-height: 1.5;
+}
+.prompt-container {
+    background-color: #363636;
+    padding: 15px;
+    border-radius: 8px;
+    margin-bottom: 1em;
+    width: 100%;
+}
+.prompt-textbox {
+    min-height: 80px !important;
+}
+.preset-buttons {
+    display: flex;
+    gap: 10px;
+    justify-content: center;
+    margin-bottom: 1em;
+}
+.support-text {
+    text-align: center;
+    margin-top: 1em;
+    color: #cccccc;
+    font-size: 0.9em;
+}
+a {
+    color: #00a7e1;
+    text-decoration: none;
+}
+a:hover {
+    text-decoration: underline;
+}
+"""
+with gr.Blocks(css=css, theme="dark") as demo:
+    with gr.Column(elem_id="col-container"):
+        gr.Markdown("# 🎬 Huanyan Studio", elem_classes=["title"])
+        gr.Markdown(
+            """Image-to-video, text-to-video, with multiple LORAS to use.
+            This space uses the 'hunyuan flat color v2' LORA by Motimalu to generate better 2d animated sequences. Prompt only handles 77 tokens.
+            If you find this useful, please consider giving the space a ❤️ and supporting me on [Ko-Fi](https://ko-fi.com/sergidev)!""",
+            elem_classes=["description"]
+        )
+        with gr.Column(elem_classes=["prompt-container"]):
+            prompt = gr.Textbox(
+                label="Prompt",
+                placeholder="Enter your prompt here (Include the terms 'flat color, no lineart, blending' for 2d illustration)",
+                show_label=False,
+                elem_classes=["prompt-textbox"],
+                lines=3
+            )
+        with gr.Column(elem_classes=["prompt-container"]):
+            image_input = gr.Image(label="Upload Image (Optional)", image_types=["png", "jpg", "jpeg"])
+        with gr.Row():
+            run_button = gr.Button("🎨 Generate", variant="primary", size="lg")
+        with gr.Row(elem_classes=["preset-buttons"]):
+            preset_high_res = gr.Button("📺 Higher Resolution Preset")
+            preset_more_frames = gr.Button("🎞️ More Frames Preset")
+        with gr.Row():
+            result = gr.Video(label="Generated Video")
+        with gr.Accordion("⚙️ Advanced Settings", open=False):
+            seed = gr.Slider(
+                label="Seed (-1 for random)",
+                minimum=-1,
+                maximum=MAX_SEED,
+                step=1,
+                value=-1,
+            )
+            with gr.Row():
+                height = gr.Slider(
+                    label="Height",
+                    minimum=256,
+                    maximum=MAX_IMAGE_SIZE,
+                    step=16,
+                    value=608,
+                )
+                width = gr.Slider(
+                    label="Width",
+                    minimum=256,
+                    maximum=MAX_IMAGE_SIZE,
+                    step=16,
+                    value=448,
+                )
+            with gr.Row():
+                num_frames = gr.Slider(
+                    label="Number of frames to generate",
+                    minimum=1.0,
+                    maximum=257.0,
+                    step=1,
+                    value=24,
+                )
+                num_inference_steps = gr.Slider(
+                    label="Number of inference steps",
+                    minimum=1,
+                    maximum=50,
+                    step=1,
+                    value=29,
+                )
+            fps = gr.Slider(
+                label="Frames per second",
+                minimum=1,
+                maximum=60,
+                step=1,
+                value=12,
+            )
+            # LoRA Selection
+            lora_names = gr.CheckboxGroup(
+                choices=list(TTV4ME_Loras.keys()),
+                label="Select TTV4ME LoRAs"
+            )
+            lora_weights = []
+            for i in range(len(TTV4ME_Loras)):
+                lora_weights.append(gr.Slider(
+                    label=f"Weight for LoRA {i + 1}",
+                    minimum=0.0,
+                    maximum=1.0,
+                    step=0.05,
+                    value=0.5,
+                    visible=False  # Initially hidden
+                ))
+            def update_lora_visibility(selected_loras):
+                visibility = [lora in selected_loras for lora in TTV4ME_Loras.keys()]
+                return visibility
+            lora_names.change(
+                update_lora_visibility,
+                inputs=[lora_names],
+                outputs=lora_weights
+            )
+        # Event handling
+        input_components = [prompt, image_input, height, width, num_frames, num_inference_steps, seed, fps, lora_names]
+        input_components.extend(lora_weights)
+        run_button.click(
+            fn=generate,
+            inputs=input_components,
+            outputs=[result],
+        )
+        # Preset button handlers
+        preset_high_res.click(
+            fn=lambda: apply_preset("Higher Resolution"),
+            outputs=[height, width, num_frames, num_inference_steps, fps]
+        )
+        preset_more_frames.click(
+            fn=lambda: apply_preset("More Frames"),
+            outputs=[height, width, num_frames, num_inference_steps, fps]