Spaces:

Sergidev
/

Huanyan-Studio

Build error

App Files Files Community

Sergidev commited on Feb 21

Commit

2dd154f

1 Parent(s): bd113ad

base v1

Browse files

Files changed (1) hide show

demo_app.py +264 -96

demo_app.py CHANGED Viewed

@@ -1,110 +1,278 @@
 import gradio as gr
-from PIL import Image
 import torch
-from diffusers import HunyuanVideoPipeline
-# ... other imports ...
-# Add LORA configuration
-LORA_LIST = [
-    "Top_Off.safetensors",
-    "huanyan_helper.safetensors",
-    "huanyan_helper_alpha.safetensors",
-    "hunyuan-t-solo-v1.0.safetensors",
-    "stripe_v2.safetensors"
-]
-def create_advanced_settings():
-    with gr.Accordion("Advanced Settings", open=False):
-        # LORA Selection
-        lora_choices = gr.CheckboxGroup(
-            choices=LORA_LIST,
-            label="Select LORAs",
-            value=[LORA_LIST[0]]
-        )
-        lora_weights = {}
-        for lora in LORA_LIST:
-            lora_weights[lora] = gr.Slider(0.0, 1.0, value=0.8,
-                                        label=f"{lora} Weight")
-        # Resolution Settings
-        resolution = gr.Dropdown(
-            choices=["512x512", "768x768", "1024x1024"],
-            value="512x512",
-            label="Output Resolution"
-        )
-    return lora_choices, lora_weights, resolution
-def validate_image_resolution(image, resolution):
-    if image is None:
-        return
-    img = Image.open(image)
-    w, h = img.size
-    if f"{w}x{h}" != resolution:
-        raise gr.Error(f"Image resolution ({w}x{h}) must match output resolution ({resolution})")
-def generate_video(prompt, negative_prompt, lora_choices, lora_weights,
-                  resolution, image_input=None, steps=30):
-    # Validate image resolution if provided
-    if image_input:
-        validate_image_resolution(image_input, resolution)
-    # Load base model
-    pipe = HunyuanVideoPipeline.from_pretrained(
-        "Tencent-Hunyuan/Hunyuan-Video-Lite",
-        torch_dtype=torch.float16
-    ).to("cuda")
-    # Apply selected LORAs
-    for lora in lora_choices:
-        pipe.load_lora_weights(
-            f"TTV4ME/{lora}",
-            adapter_name="hunyuanvideo-lora",
-            weight_name=lora_weights[lora]
-        )
-    # Generate from image or text
-    if image_input:
-        image = Image.open(image_input).convert("RGB")
-        output = pipe.image_to_video(
-            image,
-            prompt=prompt,
-            negative_prompt=negative_prompt,
-            num_frames=24,
-            height=int(resolution.split("x")[1]),
-            width=int(resolution.split("x")[0]),
-            num_inference_steps=steps
-        )
-    else:
-        output = pipe.text_to_video(
-            prompt=prompt,
-            negative_prompt=negative_prompt,
-            height=int(resolution.split("x")[1]),
-            width=int(resolution.split("x")[0]),
-            num_inference_steps=steps
         )
-    return output.video
-# Update interface
-with gr.Blocks() as demo:
-    with gr.Row():
-        with gr.Column():
-            prompt = gr.Textbox(label="Prompt")
-            negative_prompt = gr.Textbox(label="Negative Prompt")
-            image_input = gr.Image(label="Input Image", type="filepath")
-            lora_choices, lora_weights, resolution = create_advanced_settings()
-            generate_btn = gr.Button("Generate Video")
-        with gr.Column():
-            output_video = gr.Video(label="Generated Video")
-    generate_btn.click(
-        fn=generate_video,
-        inputs=[prompt, negative_prompt, lora_choices,
-               lora_weights, resolution, image_input],
-        outputs=output_video
     )

+import spaces
+import gc
 import gradio as gr
+import numpy as np
+import os
+from pathlib import Path
+from diffusers import GGUFQuantizationConfig, HunyuanVideoPipeline, HunyuanVideoTransformer3DModel
+from diffusers.utils import export_to_video
+from huggingface_hub import snapshot_download
 import torch
+# Configuration
+gc.collect()
+torch.cuda.empty_cache()
+torch.set_grad_enabled(False)
+torch.backends.cudnn.deterministic = True
+torch.backends.cudnn.benchmark = False
+# Load base model
+model_id = "hunyuanvideo-community/HunyuanVideo"
+base_path = f"/home/user/app/{model_id}"
+os.makedirs(base_path, exist_ok=True)
+snapshot_download(repo_id=model_id, local_dir=base_path)
+# Load transformer
+ckp_path = Path(base_path)
+gguf_filename = "hunyuan-video-t2v-720p-Q4_0.gguf"
+transformer_path = f"https://huggingface.co/city96/HunyuanVideo-gguf/blob/main/{gguf_filename}"
+transformer = HunyuanVideoTransformer3DModel.from_single_file(
+    transformer_path,
+    quantization_config=GGUFQuantizationConfig(compute_dtype=torch.bfloat16),
+    torch_dtype=torch.bfloat16,
+).to('cuda')
+# Initialize pipeline
+pipe = HunyuanVideoPipeline.from_pretrained(
+    ckp_path,
+    transformer=transformer,
+    torch_dtype=torch.float16
+).to("cuda")
+# Configure VAE
+pipe.vae.enable_tiling()
+pipe.vae.enable_slicing()
+pipe.vae.eval()
+# Load multiple LoRA adapters
+pipe.load_lora_weights(
+    "Sergidev/TTV4ME",  # Private repository
+    weight_name="stripe_v2.safetensors",
+    adapter_name="hunyuanvideo-lora",
+    token=os.environ.get("HF_TOKEN")  # Access token from Space secrets
+)
+pipe.load_lora_weights(
+    "Sergidev/TTV4ME",  # Private repository
+    weight_name="Top_Off.safetensors",
+    token=os.environ.get("HF_TOKEN")  # Access token from Space secrets
+)
+pipe.load_lora_weights(
+    "sergidev/IllustrationTTV",
+    weight_name="hunyuan_flat_color_v2.safetensors",
+    adapter_name="hyvid_lora_adapter"
+)
+# Set combined adapter weights
+pipe.set_adapters(["hunyuanvideo-lora", "hyvid_lora_adapter"], [0.9, 0.8])
+# Memory cleanup
+gc.collect()
+torch.cuda.empty_cache()
+# Remaining code unchanged...
+MAX_SEED = np.iinfo(np.int32).max
+MAX_IMAGE_SIZE = 1024
+@spaces.GPU(duration=300)
+def generate(
+    prompt,
+    height,
+    width,
+    num_frames,
+    num_inference_steps,
+    seed_value,
+    fps,
+    progress=gr.Progress(track_tqdm=True)
+):
+    with torch.cuda.device(0):
+        if seed_value == -1:
+            seed_value = torch.randint(0, MAX_SEED, (1,)).item()
+        generator = torch.Generator('cuda').manual_seed(seed_value)
+        with torch.amp.autocast_mode.autocast('cuda', dtype=torch.bfloat16), torch.inference_mode(), torch.no_grad():
+            output = pipe(
+                prompt=prompt,
+                height=height,
+                width=width,
+                num_frames=num_frames,
+                num_inference_steps=num_inference_steps,
+                generator=generator,
+            ).frames[0]
+        output_path = "output.mp4"
+        export_to_video(output, output_path, fps=fps)
+        torch.cuda.empty_cache()
+    gc.collect()
+    return output_path
+def apply_preset(preset_name, *current_values):
+    if preset_name == "Higher Resolution":
+        return [608, 448, 24, 29, 12]
+    elif preset_name == "More Frames":
+        return [512, 320, 42, 27, 14]
+    return current_values
+css = """
+#col-container {
+    margin: 0 auto;
+    max-width: 850px;
+}
+.dark-theme {
+    background-color: #1f1f1f;
+    color: #ffffff;
+}
+.container {
+    margin: 0 auto;
+    padding: 20px;
+    border-radius: 10px;
+    background-color: #2d2d2d;
+    box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
+}
+.title {
+    text-align: center;
+    margin-bottom: 1em;
+    color: #ffffff;
+}
+.description {
+    text-align: center;
+    margin-bottom: 2em;
+    color: #cccccc;
+    font-size: 0.95em;
+    line-height: 1.5;
+}
+.prompt-container {
+    background-color: #363636;
+    padding: 15px;
+    border-radius: 8px;
+    margin-bottom: 1em;
+    width: 100%;
+}
+.prompt-textbox {
+    min-height: 80px !important;
+}
+.preset-buttons {
+    display: flex;
+    gap: 10px;
+    justify-content: center;
+    margin-bottom: 1em;
+}
+.support-text {
+    text-align: center;
+    margin-top: 1em;
+    color: #cccccc;
+    font-size: 0.9em;
+}
+a {
+    color: #00a7e1;
+    text-decoration: none;
+}
+a:hover {
+    text-decoration: underline;
+}
+"""
+with gr.Blocks(css=css, theme="dark") as demo:
+    with gr.Column(elem_id="col-container"):
+        gr.Markdown("# 🎬 Anime TTV", elem_classes=["title"])
+        gr.Markdown(
+            """Duplicate of Illustration TTV but for Anime. May be unpredictable. THIS IS A PRO VERSION: you may need an account. as the generation duration is 300.
+            This space uses the 'hunyuan flat color v2' LORA by Motimalu to generate better 2d animated sequences. Prompt only handles 77 tokens.
+            If you find this useful, please consider giving the space a ❤️ and supporting me on [Ko-Fi](https://ko-fi.com/sergidev)!""",
+            elem_classes=["description"]
         )
+        with gr.Column(elem_classes=["prompt-container"]):
+            prompt = gr.Textbox(
+                label="Prompt",
+                placeholder="Enter your prompt here (Include the terms 'flat color, no lineart, blending' for 2d illustration)",
+                show_label=False,
+                elem_classes=["prompt-textbox"],
+                lines=3
+            )
+        with gr.Row():
+            run_button = gr.Button("🎨 Generate", variant="primary", size="lg")
+        with gr.Row(elem_classes=["preset-buttons"]):
+            preset_high_res = gr.Button("📺 Higher Resolution Preset")
+            preset_more_frames = gr.Button("🎞️ More Frames Preset")
+        with gr.Row():
+            result = gr.Video(label="Generated Video")
+        with gr.Accordion("⚙️ Advanced Settings", open=False):
+            seed = gr.Slider(
+                label="Seed (-1 for random)",
+                minimum=-1,
+                maximum=MAX_SEED,
+                step=1,
+                value=-1,
+            )
+            with gr.Row():
+                height = gr.Slider(
+                    label="Height",
+                    minimum=256,
+                    maximum=MAX_IMAGE_SIZE,
+                    step=16,
+                    value=608,
+                )
+                width = gr.Slider(
+                    label="Width",
+                    minimum=256,
+                    maximum=MAX_IMAGE_SIZE,
+                    step=16,
+                    value=448,
+                )
+            with gr.Row():
+                num_frames = gr.Slider(
+                    label="Number of frames to generate",
+                    minimum=1.0,
+                    maximum=257.0,
+                    step=1,
+                    value=24,
+                )
+                num_inference_steps = gr.Slider(
+                    label="Number of inference steps",
+                    minimum=1,
+                    maximum=50,
+                    step=1,
+                    value=29,
+                )
+            fps = gr.Slider(
+                label="Frames per second",
+                minimum=1,
+                maximum=60,
+                step=1,
+                value=12,
+            )
+    # Event handling
+    run_button.click(
+        fn=generate,
+        inputs=[prompt, height, width, num_frames, num_inference_steps, seed, fps],
+        outputs=[result],
+    )
+    # Preset button handlers
+    preset_high_res.click(
+        fn=lambda: apply_preset("Higher Resolution"),
+        outputs=[height, width, num_frames, num_inference_steps, fps]
+    )
+    preset_more_frames.click(
+        fn=lambda: apply_preset("More Frames"),
+        outputs=[height, width, num_frames, num_inference_steps, fps]
     )