Spaces:

Sergidev
/

Huanyan-Studio

Build error

App Files Files Community

Sergidev commited on Feb 21

Commit

5dfd9f8

1 Parent(s): 2dd154f

alpha v2

Browse files

Files changed (4) hide show

app.py +1 -2
demo_app.py +122 -176
requirements.txt +10 -46
utils.py +18 -33

app.py CHANGED Viewed

@@ -2,6 +2,5 @@ from utils import install_packages
 if __name__ == "__main__":
     install_packages()
     from demo_app import demo
-    demo.queue(max_size=20).launch()

 if __name__ == "__main__":
     install_packages()
     from demo_app import demo
+    demo.queue(max_size=15).launch()

demo_app.py CHANGED Viewed

@@ -1,272 +1,218 @@
 import spaces
-import gc
 import gradio as gr
 import numpy as np
 import os
 from pathlib import Path
-from diffusers import GGUFQuantizationConfig, HunyuanVideoPipeline, HunyuanVideoTransformer3DModel
-from diffusers.utils import export_to_video
 from huggingface_hub import snapshot_download
-import torch
 # Configuration
-gc.collect()
-torch.cuda.empty_cache()
-torch.set_grad_enabled(False)
-torch.backends.cudnn.deterministic = True
-torch.backends.cudnn.benchmark = False
-# Load base model
-model_id = "hunyuanvideo-community/HunyuanVideo"
-base_path = f"/home/user/app/{model_id}"
-os.makedirs(base_path, exist_ok=True)
-snapshot_download(repo_id=model_id, local_dir=base_path)
-# Load transformer
-ckp_path = Path(base_path)
-gguf_filename = "hunyuan-video-t2v-720p-Q4_0.gguf"
-transformer_path = f"https://huggingface.co/city96/HunyuanVideo-gguf/blob/main/{gguf_filename}"
-transformer = HunyuanVideoTransformer3DModel.from_single_file(
-    transformer_path,
-    quantization_config=GGUFQuantizationConfig(compute_dtype=torch.bfloat16),
-    torch_dtype=torch.bfloat16,
-).to('cuda')
-# Initialize pipeline
 pipe = HunyuanVideoPipeline.from_pretrained(
-    ckp_path,
-    transformer=transformer,
     torch_dtype=torch.float16
 ).to("cuda")
-# Configure VAE
-pipe.vae.enable_tiling()
-pipe.vae.enable_slicing()
-pipe.vae.eval()
-# Load multiple LoRA adapters
-pipe.load_lora_weights(
-    "Sergidev/TTV4ME",  # Private repository
-    weight_name="stripe_v2.safetensors",
-    adapter_name="hunyuanvideo-lora",
-    token=os.environ.get("HF_TOKEN")  # Access token from Space secrets
-)
-pipe.load_lora_weights(
-    "Sergidev/TTV4ME",  # Private repository
-    weight_name="Top_Off.safetensors",
-    token=os.environ.get("HF_TOKEN")  # Access token from Space secrets
-)
-pipe.load_lora_weights(
-    "sergidev/IllustrationTTV",
-    weight_name="hunyuan_flat_color_v2.safetensors",
-    adapter_name="hyvid_lora_adapter"
-)
-# Set combined adapter weights
-pipe.set_adapters(["hunyuanvideo-lora", "hyvid_lora_adapter"], [0.9, 0.8])
-# Memory cleanup
-gc.collect()
-torch.cuda.empty_cache()
-# Remaining code unchanged...
-MAX_SEED = np.iinfo(np.int32).max
-MAX_IMAGE_SIZE = 1024
 @spaces.GPU(duration=300)
 def generate(
     prompt,
     height,
     width,
     num_frames,
     num_inference_steps,
     seed_value,
     fps,
     progress=gr.Progress(track_tqdm=True)
 ):
-    with torch.cuda.device(0):
-        if seed_value == -1:
-            seed_value = torch.randint(0, MAX_SEED, (1,)).item()
-        generator = torch.Generator('cuda').manual_seed(seed_value)
-        with torch.amp.autocast_mode.autocast('cuda', dtype=torch.bfloat16), torch.inference_mode(), torch.no_grad():
-            output = pipe(
                 prompt=prompt,
                 height=height,
                 width=width,
                 num_frames=num_frames,
                 num_inference_steps=num_inference_steps,
                 generator=generator,
-            ).frames[0]
-        output_path = "output.mp4"
-        export_to_video(output, output_path, fps=fps)
         torch.cuda.empty_cache()
-    gc.collect()
-    return output_path
-def apply_preset(preset_name, *current_values):
     if preset_name == "Higher Resolution":
         return [608, 448, 24, 29, 12]
     elif preset_name == "More Frames":
         return [512, 320, 42, 27, 14]
-    return current_values
 css = """
-#col-container {
-    margin: 0 auto;
-    max-width: 850px;
-}
-.dark-theme {
-    background-color: #1f1f1f;
-    color: #ffffff;
-}
-.container {
-    margin: 0 auto;
-    padding: 20px;
-    border-radius: 10px;
-    background-color: #2d2d2d;
-    box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
-}
-.title {
-    text-align: center;
-    margin-bottom: 1em;
-    color: #ffffff;
-}
-.description {
-    text-align: center;
-    margin-bottom: 2em;
-    color: #cccccc;
-    font-size: 0.95em;
-    line-height: 1.5;
-}
-.prompt-container {
-    background-color: #363636;
-    padding: 15px;
-    border-radius: 8px;
-    margin-bottom: 1em;
-    width: 100%;
-}
-.prompt-textbox {
-    min-height: 80px !important;
-}
-.preset-buttons {
-    display: flex;
-    gap: 10px;
-    justify-content: center;
-    margin-bottom: 1em;
-}
-.support-text {
-    text-align: center;
-    margin-top: 1em;
-    color: #cccccc;
-    font-size: 0.9em;
-}
-a {
-    color: #00a7e1;
-    text-decoration: none;
-}
-a:hover {
-    text-decoration: underline;
-}
 """
 with gr.Blocks(css=css, theme="dark") as demo:
     with gr.Column(elem_id="col-container"):
-        gr.Markdown("# 🎬 Anime TTV", elem_classes=["title"])
         gr.Markdown(
-            """Duplicate of Illustration TTV but for Anime. May be unpredictable. THIS IS A PRO VERSION: you may need an account. as the generation duration is 300.
-            This space uses the 'hunyuan flat color v2' LORA by Motimalu to generate better 2d animated sequences. Prompt only handles 77 tokens.
-            If you find this useful, please consider giving the space a ❤️ and supporting me on [Ko-Fi](https://ko-fi.com/sergidev)!""",
             elem_classes=["description"]
         )
         with gr.Column(elem_classes=["prompt-container"]):
             prompt = gr.Textbox(
                 label="Prompt",
-                placeholder="Enter your prompt here (Include the terms 'flat color, no lineart, blending' for 2d illustration)",
-                show_label=False,
                 elem_classes=["prompt-textbox"],
                 lines=3
             )
         with gr.Row():
-            run_button = gr.Button("🎨 Generate", variant="primary", size="lg")
         with gr.Row(elem_classes=["preset-buttons"]):
-            preset_high_res = gr.Button("📺 Higher Resolution Preset")
-            preset_more_frames = gr.Button("🎞️ More Frames Preset")
         with gr.Row():
             result = gr.Video(label="Generated Video")
         with gr.Accordion("⚙️ Advanced Settings", open=False):
-            seed = gr.Slider(
-                label="Seed (-1 for random)",
-                minimum=-1,
-                maximum=MAX_SEED,
-                step=1,
-                value=-1,
-            )
             with gr.Row():
                 height = gr.Slider(
                     label="Height",
                     minimum=256,
                     maximum=MAX_IMAGE_SIZE,
                     step=16,
-                    value=608,
                 )
                 width = gr.Slider(
                     label="Width",
                     minimum=256,
                     maximum=MAX_IMAGE_SIZE,
                     step=16,
-                    value=448,
                 )
             with gr.Row():
                 num_frames = gr.Slider(
-                    label="Number of frames to generate",
-                    minimum=1.0,
-                    maximum=257.0,
                     step=1,
                     value=24,
                 )
                 num_inference_steps = gr.Slider(
-                    label="Number of inference steps",
                     minimum=1,
                     maximum=50,
                     step=1,
-                    value=29,
                 )
-            fps = gr.Slider(
-                label="Frames per second",
-                minimum=1,
-                maximum=60,
-                step=1,
-                value=12,
-            )
     # Event handling
     run_button.click(
         fn=generate,
-        inputs=[prompt, height, width, num_frames, num_inference_steps, seed, fps],
-        outputs=[result],
     )
-    # Preset button handlers
     preset_high_res.click(
         fn=lambda: apply_preset("Higher Resolution"),
         outputs=[height, width, num_frames, num_inference_steps, fps]

 import spaces
 import gradio as gr
 import numpy as np
 import os
+import torch
+from PIL import Image
 from pathlib import Path
+from diffusers import HunyuanVideoPipeline
 from huggingface_hub import snapshot_download
 # Configuration
+LORA_CHOICES = [
+    "Top_Off.safetensors",
+    "huanyan_helper.safetensors",
+    "huanyan_helper_alpha.safetensors",
+    "hunyuan-t-solo-v1.0.safetensors",
+    "stripe_v2.safetensors"
+]
+MAX_SEED = np.iinfo(np.int32).max
+MAX_IMAGE_SIZE = 1024
+# Initialize pipeline with ZeroGPU optimizations
+model_id = "Tencent-Hunyuan/Hunyuan-Video-Lite"
 pipe = HunyuanVideoPipeline.from_pretrained(
+    model_id,
     torch_dtype=torch.float16
 ).to("cuda")
+# Load all available LoRAs
+for lora_file in LORA_CHOICES:
+    try:
+        pipe.load_lora_weights(
+            "Sergidev/TTV4ME",
+            weight_name=lora_file,
+            adapter_name=lora_file.split('.')[0],
+            token=os.environ.get("HF_TOKEN")
+        )
+    except Exception as e:
+        print(f"Error loading {lora_file}: {str(e)}")
 @spaces.GPU(duration=300)
 def generate(
     prompt,
+    image_input,
     height,
     width,
     num_frames,
     num_inference_steps,
     seed_value,
     fps,
+    selected_loras,
+    lora_weights,
     progress=gr.Progress(track_tqdm=True)
 ):
+    # Image validation
+    if image_input is not None:
+        img = Image.open(image_input)
+        if img.size != (width, height):
+            raise gr.Error(f"Image resolution {img.size} must match video resolution {width}x{height}")
+        prompt = f"Image prompt: {prompt}" if prompt else "Based on uploaded image"
+    # Set active LoRAs
+    active_adapters = []
+    adapter_weights = []
+    for idx, selected in enumerate(selected_loras):
+        if selected:
+            active_adapters.append(LORA_CHOICES[idx].split('.')[0])
+            adapter_weights.append(lora_weights[idx])
+    if active_adapters:
+        pipe.set_adapters(active_adapters, adapter_weights)
+    # Generation logic
+    torch.cuda.empty_cache()
+    if seed_value == -1:
+        seed_value = torch.randint(0, MAX_SEED, (1,)).item()
+    generator = torch.Generator('cuda').manual_seed(seed_value)
+    try:
+        if image_input:
+            output = pipe.image_to_video(
+                Image.open(image_input).convert("RGB"),
+                prompt=prompt,
+                height=height,
+                width=width,
+                num_frames=num_frames,
+                num_inference_steps=num_inference_steps,
+                generator=generator,
+            )
+        else:
+            output = pipe.text_to_video(
                 prompt=prompt,
                 height=height,
                 width=width,
                 num_frames=num_frames,
                 num_inference_steps=num_inference_steps,
                 generator=generator,
+            )
+        return output.video
+    finally:
         torch.cuda.empty_cache()
+def apply_preset(preset_name):
     if preset_name == "Higher Resolution":
         return [608, 448, 24, 29, 12]
     elif preset_name == "More Frames":
         return [512, 320, 42, 27, 14]
+    return [512, 512, 24, 25, 12]
 css = """
+/* Existing CSS remains unchanged */
 """
 with gr.Blocks(css=css, theme="dark") as demo:
     with gr.Column(elem_id="col-container"):
+        gr.Markdown("# 🎬 Hunyuan Studio", elem_classes=["title"])
         gr.Markdown(
+            """Text-to-Video & Image-to-Video generation with multiple LoRA adapters.<br>
+            Ensure image resolution matches selected video dimensions.""",
             elem_classes=["description"]
         )
         with gr.Column(elem_classes=["prompt-container"]):
             prompt = gr.Textbox(
                 label="Prompt",
+                placeholder="Enter text prompt or describe the image...",
                 elem_classes=["prompt-textbox"],
                 lines=3
             )
+            image_input = gr.Image(
+                label="Upload Reference Image (Optional)",
+                type="filepath",
+                visible=True
+            )
         with gr.Row():
+            run_button = gr.Button("🎬 Generate Video", variant="primary", size="lg")
         with gr.Row(elem_classes=["preset-buttons"]):
+            preset_high_res = gr.Button("📺 Resolution Preset")
+            preset_more_frames = gr.Button("🎞️ Frames Preset")
         with gr.Row():
             result = gr.Video(label="Generated Video")
         with gr.Accordion("⚙️ Advanced Settings", open=False):
+            with gr.Row():
+                seed = gr.Slider(
+                    label="Seed (-1 for random)",
+                    minimum=-1,
+                    maximum=MAX_SEED,
+                    step=1,
+                    value=-1,
+                )
             with gr.Row():
                 height = gr.Slider(
                     label="Height",
                     minimum=256,
                     maximum=MAX_IMAGE_SIZE,
                     step=16,
+                    value=512,
                 )
                 width = gr.Slider(
                     label="Width",
                     minimum=256,
                     maximum=MAX_IMAGE_SIZE,
                     step=16,
+                    value=512,
                 )
             with gr.Row():
                 num_frames = gr.Slider(
+                    label="Frame Count",
+                    minimum=1,
+                    maximum=257,
                     step=1,
                     value=24,
                 )
                 num_inference_steps = gr.Slider(
+                    label="Inference Steps",
                     minimum=1,
                     maximum=50,
                     step=1,
+                    value=25,
                 )
+                fps = gr.Slider(
+                    label="FPS",
+                    minimum=1,
+                    maximum=60,
+                    step=1,
+                    value=12,
+                )
+            with gr.Accordion("🧩 LoRA Configuration", open=False):
+                lora_checkboxes = []
+                lora_sliders = []
+                for lora in LORA_CHOICES:
+                    with gr.Row():
+                        cb = gr.Checkbox(label=f"Enable {lora}", value=False)
+                        sl = gr.Slider(0.0, 1.0, value=0.8, label=f"{lora} Weight")
+                        lora_checkboxes.append(cb)
+                        lora_sliders.append(sl)
     # Event handling
     run_button.click(
         fn=generate,
+        inputs=[prompt, image_input, height, width, num_frames,
+               num_inference_steps, seed, fps, lora_checkboxes, lora_sliders],
+        outputs=result
     )
     preset_high_res.click(
         fn=lambda: apply_preset("Higher Resolution"),
         outputs=[height, width, num_frames, num_inference_steps, fps]

requirements.txt CHANGED Viewed

@@ -1,48 +1,12 @@
 --extra-index-url https://download.pytorch.org/whl/cu124
-bitsandbytes
-decord
-einops
-facexlib
-ftfy
-gguf
-git+https://github.com/huggingface/accelerate.git@main#egg=accelerate
-git+https://github.com/huggingface/diffusers.git@main#egg=diffusers
-git+https://github.com/huggingface/transformers.git@main#egg=transformers
-gradio
-hf_transfer
-huggingface_hub
-imageio
-imageio-ffmpeg
-insightface
-invisible_watermark
-matplotlib
-moviepy==1.0.3
 numpy<2.0
-onnxruntime
-onnxruntime-gpu
-omegaconf
-opencv-python
-opencv-python-headless
-git+https://github.com/huggingface/optimum-quanto
-packaging
-patch_conv
-Pillow==10.2.0
-psutil
-safetensors
-scipy
-scikit-learn
-scikit-image
-scikit-video
-sentencepiece
-setuptools
-spaces
-timm
-tokenizers>=0.13.3
-torch<2.6.0,>=2.4.0
-torchao
-torchaudio
-torchsde
-torchvision
-tqdm
-wheel
-git+https://github.com/huggingface/peft.git

 --extra-index-url https://download.pytorch.org/whl/cu124
+diffusers==0.29.0
+transformers==4.41.0
+gradio>=4.0.0
+torch>=2.4.0,<2.6.0
+safetensors>=0.4.2
+huggingface_hub>=0.23.0
+imageio>=2.34.0
+opencv-python-headless>=4.9.0
+Pillow>=10.2.0
 numpy<2.0
+accelerate>=0.30.0

utils.py CHANGED Viewed

@@ -3,38 +3,23 @@ def install_packages():
     import sys
     import importlib
-    def _is_package_available(name) -> bool:
-        try:
-            importlib.import_module(name)
-            return True
-        except (ImportError, ModuleNotFoundError):
-            return False
-    # upgrade pip
-    subprocess.run(
-        f"{sys.executable} -m pip install --upgrade pip", shell=True, check=True
-    )
-    subprocess.run(
-        f"{sys.executable} -m pip install --upgrade ninja wheel setuptools packaging", shell=True, check=True
-    )
-    # install ninja
-    if not _is_package_available("ninja"):
-        subprocess.run(f"{sys.executable} -m pip install ninja nvidia-cudnn-cu12==9.1.0.70 nvidia-cublas-cu12==12.4.5.8 torch==2.5.1 --extra-index-url https://download.pytorch.org/whl/cu124", shell=True, check=True)
-    # install flash attention
-    if not _is_package_available("flash_attn"):
-        subprocess.run(
-            f"{sys.executable} -m pip install -v -U flash-attention --no-build-isolation",
-            env={"MAX_JOBS": "1"},
-            shell=True,
-            check=True
-        )
-    # install xformers
-    if not _is_package_available("xformers"):
-        subprocess.run(
-            f"{sys.executable} -m pip install -v -U xformers nvidia-cudnn-cu12==9.1.0.70 nvidia-cublas-cu12==12.4.5.8 torch==2.5.1 --extra-index-url https://download.pytorch.org/whl/cu124",
-            shell=True,
-            check=True
-        )

     import sys
     import importlib
+    required = [
+        'torch>=2.4.0,<2.6.0',
+        'diffusers',
+        'transformers',
+        'gradio',
+        'safetensors',
+        'huggingface_hub',
+        'imageio',
+        'opencv-python-headless',
+        'Pillow'
+    ]
+    subprocess.run([
+        sys.executable, "-m", "pip", "install",
+        "--upgrade", "pip", "setuptools", "wheel"
+    ], check=True)
+    subprocess.run([
+        sys.executable, "-m", "pip", "install"
+    ] + required, check=True)