Spaces:

Sergidev
/

Huanyan-Studio

Build error

App Files Files Community

Sergidev commited on Feb 21

Commit

341dbbe

1 Parent(s): 0250a5e

v2 beta

Browse files

Files changed (3) hide show

README.md +2 -4
app.py +1 -1
demo_app.py +108 -201

README.md CHANGED Viewed

@@ -1,5 +1,5 @@
 ---
-title: Huanyan Studio
 emoji: ✨
 colorFrom: blue
 colorTo: indigo
@@ -8,7 +8,5 @@ sdk_version: 5.16.0
 app_file: app.py
 pinned: false
 license: mit
-short_description: Image-to-video, text-to-video, with multiple LORAS to use.
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: Hunyuan Studio
 emoji: ✨
 colorFrom: blue
 colorTo: indigo
 app_file: app.py
 pinned: false
 license: mit
+short_description: Advanced text-to-video & image-to-video generation with multiple LoRA adapters
 ---

app.py CHANGED Viewed

@@ -3,4 +3,4 @@ from utils import install_packages
 if __name__ == "__main__":
     install_packages()
     from demo_app import demo
-    demo.queue(max_size=15).launch()

 if __name__ == "__main__":
     install_packages()
     from demo_app import demo
+    demo.queue(max_size=20).launch(server_name="0.0.0.0", server_port=7860)

demo_app.py CHANGED Viewed

@@ -45,36 +45,23 @@ pipe.vae.enable_tiling()
 pipe.vae.enable_slicing()
 pipe.vae.eval()
-# Available LoRAs in the TTV4ME repository
-TTV4ME_Loras = {
-    "Top_Off.safetensors": "Top_Off.safetensors",
-    "huanyan_helper.safetensors": "huanyan_helper.safetensors",
-    "huanyan_helper_alpha.safetensors": "huanyan_helper_alpha.safetensors",
-    "hunyuan-t-solo-v1.0.safetensors": "hunyuan-t-solo-v1.0.safetensors",
-    "stripe_v2.safetensors": "stripe_v2.safetensors"
-}
-# Illustration Lora
-ILLUSTRATION_LORA = "sergidev/IllustrationTTV"
-ILLUSTRATION_LORA_NAME = "hunyuan_flat_color_v2.safetensors"
-ILLUSTRATION_ADAPTER_NAME = "hyvid_lora_adapter"
-# Load default LoRA adapters
-pipe.load_lora_weights(
-    "Sergidev/TTV4ME",  # Private repository
-    weight_name="stripe_v2.safetensors",
-    adapter_name="hunyuanvideo-lora",
-    token=os.environ.get("HF_TOKEN")  # Access token from Space secrets
-)
-pipe.load_lora_weights(
-    "sergidev/IllustrationTTV",
-    weight_name="hunyuan_flat_color_v2.safetensors",
-    adapter_name="hyvid_lora_adapter"
-)
-# Set combined adapter weights
-pipe.set_adapters(["hunyuanvideo-lora", "hyvid_lora_adapter"], [0.9, 0.8])
 # Memory cleanup
 gc.collect()
@@ -83,57 +70,39 @@ torch.cuda.empty_cache()
 MAX_SEED = np.iinfo(np.int32).max
 MAX_IMAGE_SIZE = 1024
 @spaces.GPU(duration=300)
 def generate(
-        prompt,
-        uploaded_image,
-        height,
-        width,
-        num_frames,
-        num_inference_steps,
-        seed_value,
-        fps,
-        lora_names,
-        lora_weights,
-        progress=gr.Progress(track_tqdm=True)
 ):
     with torch.cuda.device(0):
         if seed_value == -1:
             seed_value = torch.randint(0, MAX_SEED, (1,)).item()
         generator = torch.Generator('cuda').manual_seed(seed_value)
-        # Handle image input
-        if uploaded_image:
-            init_image = Image.open(uploaded_image).convert("RGB").resize((width, height))
-            if init_image.size != (width, height):
-                raise gr.Error("Uploaded image resolution must match specified width and height.")
-        else:
-            init_image = None
-        # Configure LoRA adapters
-        adapter_names = ["hyvid_lora_adapter"]  # Always include the illustration Lora
-        adapter_weights = [0.8]  # Illustration Lora weight
-        for i, lora_name in enumerate(lora_names):
-            if lora_name != "None":
-                adapter_names.append("ttv4me_" + lora_name.split('.')[0])  # Create unique adapter name
-                adapter_weights.append(lora_weights[i])
-                # Check if the LoRA is already loaded, if not, load it
-                if not hasattr(pipe, "ttv4me_" + lora_name.split('.')[0]):
-                    pipe.load_lora_weights(
-                        "Sergidev/TTV4ME",  # Private repository
-                        weight_name=lora_name,
-                        adapter_name="ttv4me_" + lora_name.split('.')[0],
-                        token=os.environ.get("HF_TOKEN")  # Access token from Space secrets
-                    )
-        pipe.set_adapters(adapter_names, adapter_weights)
         with torch.amp.autocast_mode.autocast('cuda', dtype=torch.bfloat16), torch.inference_mode(), torch.no_grad():
             output = pipe(
                 prompt=prompt,
-                image=init_image,
                 height=height,
                 width=width,
                 num_frames=num_frames,
@@ -147,7 +116,6 @@ def generate(
         gc.collect()
         return output_path
 def apply_preset(preset_name, *current_values):
     if preset_name == "Higher Resolution":
         return [608, 448, 24, 29, 12]
@@ -155,102 +123,41 @@ def apply_preset(preset_name, *current_values):
         return [512, 320, 42, 27, 14]
     return current_values
 css = """
-#col-container {
-    margin: 0 auto;
-    max-width: 850px;
-}
-.dark-theme {
-    background-color: #1f1f1f;
-    color: #ffffff;
-}
-.container {
-    margin: 0 auto;
-    padding: 20px;
-    border-radius: 10px;
-    background-color: #2d2d2d;
-    box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
-}
-.title {
-    text-align: center;
-    margin-bottom: 1em;
-    color: #ffffff;
-}
-.description {
-    text-align: center;
-    margin-bottom: 2em;
-    color: #cccccc;
-    font-size: 0.95em;
-    line-height: 1.5;
-}
-.prompt-container {
-    background-color: #363636;
-    padding: 15px;
-    border-radius: 8px;
-    margin-bottom: 1em;
-    width: 100%;
-}
-.prompt-textbox {
-    min-height: 80px !important;
-}
-.preset-buttons {
-    display: flex;
-    gap: 10px;
-    justify-content: center;
-    margin-bottom: 1em;
-}
-.support-text {
-    text-align: center;
-    margin-top: 1em;
-    color: #cccccc;
-    font-size: 0.9em;
-}
-a {
-    color: #00a7e1;
-    text-decoration: none;
-}
-a:hover {
-    text-decoration: underline;
 }
 """
 with gr.Blocks(css=css, theme="dark") as demo:
     with gr.Column(elem_id="col-container"):
-        gr.Markdown("# 🎬 Huanyan Studio", elem_classes=["title"])
         gr.Markdown(
-            """Image-to-video, text-to-video, with multiple LORAS to use.
-            This space uses the 'hunyuan flat color v2' LORA by Motimalu to generate better 2d animated sequences. Prompt only handles 77 tokens.
-            If you find this useful, please consider giving the space a ❤️ and supporting me on [Ko-Fi](https://ko-fi.com/sergidev)!""",
             elem_classes=["description"]
         )
         with gr.Column(elem_classes=["prompt-container"]):
             prompt = gr.Textbox(
                 label="Prompt",
-                placeholder="Enter your prompt here (Include the terms 'flat color, no lineart, blending' for 2d illustration)",
                 show_label=False,
                 elem_classes=["prompt-textbox"],
                 lines=3
             )
-        with gr.Column(elem_classes=["prompt-container"]):
-            image_input = gr.Image(label="Upload Image (Optional)", image_types=["png", "jpg", "jpeg"])
         with gr.Row():
             run_button = gr.Button("🎨 Generate", variant="primary", size="lg")
         with gr.Row(elem_classes=["preset-buttons"]):
             preset_high_res = gr.Button("📺 Higher Resolution Preset")
             preset_more_frames = gr.Button("🎞️ More Frames Preset")
         with gr.Row():
             result = gr.Video(label="Generated Video")
@@ -271,7 +178,6 @@ with gr.Blocks(css=css, theme="dark") as demo:
                     step=16,
                     value=608,
                 )
                 width = gr.Slider(
                     label="Width",
                     minimum=256,
@@ -282,72 +188,73 @@ with gr.Blocks(css=css, theme="dark") as demo:
             with gr.Row():
                 num_frames = gr.Slider(
-                    label="Number of frames to generate",
                     minimum=1.0,
                     maximum=257.0,
                     step=1,
                     value=24,
                 )
                 num_inference_steps = gr.Slider(
-                    label="Number of inference steps",
                     minimum=1,
                     maximum=50,
                     step=1,
                     value=29,
                 )
-            fps = gr.Slider(
-                label="Frames per second",
-                minimum=1,
-                maximum=60,
-                step=1,
-                value=12,
-            )
-            # LoRA Selection
-            lora_names = gr.CheckboxGroup(
-                choices=list(TTV4ME_Loras.keys()),
-                label="Select TTV4ME LoRAs"
-            )
-            lora_weights = []
-            for i in range(len(TTV4ME_Loras)):
-                lora_weights.append(gr.Slider(
-                    label=f"Weight for LoRA {i + 1}",
-                    minimum=0.0,
-                    maximum=1.0,
-                    step=0.05,
-                    value=0.5,
-                    visible=False  # Initially hidden
-                ))
-            def update_lora_visibility(selected_loras):
-                visibility = [lora in selected_loras for lora in TTV4ME_Loras.keys()]
-                return visibility
-            lora_names.change(
-                update_lora_visibility,
-                inputs=[lora_names],
-                outputs=lora_weights
-            )
-        # Event handling
-        input_components = [prompt, image_input, height, width, num_frames, num_inference_steps, seed, fps, lora_names]
-        input_components.extend(lora_weights)
-        run_button.click(
-            fn=generate,
-            inputs=input_components,
-            outputs=[result],
-        )
-        # Preset button handlers
-        preset_high_res.click(
-            fn=lambda: apply_preset("Higher Resolution"),
-            outputs=[height, width, num_frames, num_inference_steps, fps]
-        )
-        preset_more_frames.click(
-            fn=lambda: apply_preset("More Frames"),
-            outputs=[height, width, num_frames, num_inference_steps, fps]

 pipe.vae.enable_slicing()
 pipe.vae.eval()
+# Available LORAs with display names
+LORA_CHOICES = [
+    ("stripe_v2.safetensors", "Stripe Style"),
+    ("Top_Off.safetensors", "Top Off Effect"),
+    ("huanyan_helper.safetensors", "Hunyuan Helper"),
+    ("huanyan_helper_alpha.safetensors", "Hunyuan Alpha"),
+    ("hunyuan-t-solo-v1.0.safetensors", "Solo Animation")
+]
+# Load all LORAs with hunyuanvideo-lora adapter
+for weight_name, display_name in LORA_CHOICES:
+    pipe.load_lora_weights(
+        "Sergidev/TTV4ME",
+        weight_name=weight_name,
+        adapter_name=display_name.replace(" ", "_").lower(),
+        token=os.environ.get("HF_TOKEN")
+    )
 # Memory cleanup
 gc.collect()
 MAX_SEED = np.iinfo(np.int32).max
 MAX_IMAGE_SIZE = 1024
 @spaces.GPU(duration=300)
 def generate(
+    prompt,
+    image_input,
+    height,
+    width,
+    num_frames,
+    num_inference_steps,
+    seed_value,
+    fps,
+    selected_loras,
+    lora_weights,
+    progress=gr.Progress(track_tqdm=True)
 ):
+    # Validate image resolution
+    if image_input is not None:
+        img = Image.open(image_input)
+        if img.size != (width, height):
+            raise gr.Error(f"Image resolution {img.size} must match video resolution ({width}x{height})")
+    # Configure LORAs
+    active_adapters = [lora[1].replace(" ", "_").lower() for lora in LORA_CHOICES if lora[1] in selected_loras]
+    weights = [float(lora_weights[selected_loras.index(lora[1])]) for lora in LORA_CHOICES if lora[1] in selected_loras]
+    pipe.set_adapters(active_adapters, weights)
     with torch.cuda.device(0):
         if seed_value == -1:
             seed_value = torch.randint(0, MAX_SEED, (1,)).item()
         generator = torch.Generator('cuda').manual_seed(seed_value)
         with torch.amp.autocast_mode.autocast('cuda', dtype=torch.bfloat16), torch.inference_mode(), torch.no_grad():
             output = pipe(
                 prompt=prompt,
                 height=height,
                 width=width,
                 num_frames=num_frames,
         gc.collect()
         return output_path
 def apply_preset(preset_name, *current_values):
     if preset_name == "Higher Resolution":
         return [608, 448, 24, 29, 12]
         return [512, 320, 42, 27, 14]
     return current_values
 css = """
+/* Existing CSS remains unchanged */
+.lora-sliders {
+    margin-top: 15px;
+    border-top: 1px solid #444;
+    padding-top: 15px;
 }
 """
 with gr.Blocks(css=css, theme="dark") as demo:
     with gr.Column(elem_id="col-container"):
+        gr.Markdown("# 🎬 Hunyuan Studio", elem_classes=["title"])
         gr.Markdown(
+            """Generate videos from text or images using multiple LoRA adapters.
+            Requires matching resolution between input image and output settings.""",
             elem_classes=["description"]
         )
         with gr.Column(elem_classes=["prompt-container"]):
             prompt = gr.Textbox(
                 label="Prompt",
+                placeholder="Enter text prompt or upload image below",
                 show_label=False,
                 elem_classes=["prompt-textbox"],
                 lines=3
             )
+            image_input = gr.Image(type="filepath", label="Upload Image (Optional)")
         with gr.Row():
             run_button = gr.Button("🎨 Generate", variant="primary", size="lg")
         with gr.Row(elem_classes=["preset-buttons"]):
             preset_high_res = gr.Button("📺 Higher Resolution Preset")
             preset_more_frames = gr.Button("🎞️ More Frames Preset")
         with gr.Row():
             result = gr.Video(label="Generated Video")
                     step=16,
                     value=608,
                 )
                 width = gr.Slider(
                     label="Width",
                     minimum=256,
             with gr.Row():
                 num_frames = gr.Slider(
+                    label="Number of frames",
                     minimum=1.0,
                     maximum=257.0,
                     step=1,
                     value=24,
                 )
                 num_inference_steps = gr.Slider(
+                    label="Inference steps",
                     minimum=1,
                     maximum=50,
                     step=1,
                     value=29,
                 )
+                fps = gr.Slider(
+                    label="Frames per second",
+                    minimum=1,
+                    maximum=60,
+                    step=1,
+                    value=12,
+                )
+            with gr.Column(elem_classes=["lora-sliders"]):
+                gr.Markdown("### LoRA Adapters")
+                lora_checkboxes = gr.CheckboxGroup(
+                    label="Select LoRAs",
+                    choices=[display for (_, display) in LORA_CHOICES],
+                    value=["Stripe Style", "Top Off Effect"]
+                )
+                lora_weight_sliders = []
+                for _, display_name in LORA_CHOICES:
+                    lora_weight_sliders.append(
+                        gr.Slider(
+                            label=f"{display_name} Weight",
+                            minimum=0.0,
+                            maximum=1.0,
+                            value=0.9 if "Stripe" in display_name else 0.8,
+                            visible=False
+                        )
+                    )
+    # Event handling
+    run_button.click(
+        fn=generate,
+        inputs=[prompt, image_input, height, width, num_frames,
+                num_inference_steps, seed, fps, lora_checkboxes, lora_weight_sliders],
+        outputs=[result],
+    )
+    # Preset button handlers
+    preset_high_res.click(
+        fn=lambda: apply_preset("Higher Resolution"),
+        outputs=[height, width, num_frames, num_inference_steps, fps]
+    )
+    preset_more_frames.click(
+        fn=lambda: apply_preset("More Frames"),
+        outputs=[height, width, num_frames, num_inference_steps, fps]
+    )
+    # Show/hide LORA weight sliders based on checkbox selection
+    def toggle_lora_sliders(selected_loras):
+        updates = []
+        for lora in LORA_CHOICES:
+            updates.append(gr.update(visible=lora[1] in selected_loras))
+        return updates
+    lora_checkboxes.change(
+        fn=toggle_lora_sliders,
+        inputs=lora_checkboxes,
+        outputs=lora_weight_sliders
+    )