Wan_FusionX_with_Loras

Running on Zero

App Files Files Community

thankfulcarp commited on Jul 2

Commit

10f11a1

1 Parent(s): b6b20fb

Major Lora and Resolution enhancements

Browse files

Files changed (1) hide show

app.py +110 -28

app.py CHANGED Viewed

@@ -8,7 +8,7 @@ import tempfile
 import re
 import os
 import traceback
 from huggingface_hub import hf_hub_download
 import numpy as np
 from PIL import Image
@@ -20,6 +20,10 @@ I2V_BASE_MODEL_ID = "Wan-AI/Wan2.1-I2V-14B-480P-Diffusers" # Used for VAE/encode
 I2V_FUSIONX_REPO_ID = "vrgamedevgirl84/Wan14BT2VFusioniX"
 I2V_FUSIONX_FILENAME = "Wan14Bi2vFusioniX.safetensors"
 # --- Load Pipelines ---
 print("🚀 Loading I2V pipeline from single file...")
 i2v_pipe = None
@@ -58,15 +62,30 @@ except Exception as e:
     print(f"❌ Critical Error: Failed to load I2V pipeline from single file.")
     traceback.print_exc()
 # --- Constants and Configuration ---
-MOD_VALUE = 32
-DEFAULT_H_SLIDER_VALUE = 640
-DEFAULT_W_SLIDER_VALUE = 1024
-NEW_FORMULA_MAX_AREA = 640.0 * 1024.0
-SLIDER_MIN_H, SLIDER_MAX_H = 128, 1024
-SLIDER_MIN_W, SLIDER_MAX_W = 128, 1024
 MAX_SEED = np.iinfo(np.int32).max
 FIXED_FPS = 16
@@ -87,6 +106,25 @@ def sanitize_prompt_for_filename(prompt: str, max_len: int = 60) -> str:
     sanitized = re.sub(r'[\s_-]+', '_', sanitized)
     return sanitized[:max_len]
 def _calculate_new_dimensions_wan(pil_image, mod_val, calculation_max_area,
                                   min_slider_h, max_slider_h,
                                   min_slider_w, max_slider_w,
@@ -104,18 +142,25 @@ def _calculate_new_dimensions_wan(pil_image, mod_val, calculation_max_area,
     return new_h, new_w
 def handle_image_upload_for_dims_wan(uploaded_pil_image):
     if uploaded_pil_image is None:
-        return gr.update(value=DEFAULT_H_SLIDER_VALUE), gr.update(value=DEFAULT_W_SLIDER_VALUE)
     try:
         new_h, new_w = _calculate_new_dimensions_wan(
             uploaded_pil_image, MOD_VALUE, NEW_FORMULA_MAX_AREA,
             SLIDER_MIN_H, SLIDER_MAX_H, SLIDER_MIN_W, SLIDER_MAX_W,
             DEFAULT_H_SLIDER_VALUE, DEFAULT_W_SLIDER_VALUE
         )
-        return gr.update(value=new_h), gr.update(value=new_w)
     except Exception as e:
         gr.Warning("Error calculating new dimensions. Resetting to default.")
-        return gr.update(value=DEFAULT_H_SLIDER_VALUE), gr.update(value=DEFAULT_W_SLIDER_VALUE)
 # --- GPU Duration Estimators for @spaces.GPU ---
 def get_i2v_duration(steps, duration_seconds):
@@ -135,12 +180,14 @@ def get_t2v_duration(steps, duration_seconds):
 @spaces.GPU(duration_from_args=get_i2v_duration)
 def generate_i2v_video(input_image, prompt, height, width,
                       negative_prompt, duration_seconds,
-                      guidance_scale, steps,
-                      seed, randomize_seed,
                       progress=gr.Progress(track_tqdm=True)):
     """Generates a video from an initial image and a prompt."""
     if input_image is None:
         raise gr.Error("Please upload an input image for Image-to-Video generation.")
     target_h = max(MOD_VALUE, (int(height) // MOD_VALUE) * MOD_VALUE)
     target_w = max(MOD_VALUE, (int(width) // MOD_VALUE) * MOD_VALUE)
@@ -153,18 +200,39 @@ def generate_i2v_video(input_image, prompt, height, width,
     resized_image = input_image.resize((target_w, target_h))
     enhanced_prompt = f"{prompt}, cinematic quality, smooth motion, detailed animation, dynamic lighting"
-    with torch.inference_mode():
-        output_frames_list = i2v_pipe(
-            image=resized_image,
-            prompt=enhanced_prompt,
-            negative_prompt=negative_prompt,
-            height=target_h,
-            width=target_w,
-            num_frames=num_frames,
-            guidance_scale=float(guidance_scale),
-            num_inference_steps=int(steps),
-            generator=torch.Generator(device="cuda").manual_seed(current_seed)
-        ).frames[0]
     sanitized_prompt = sanitize_prompt_for_filename(prompt)
     filename = f"i2v_{sanitized_prompt}_{current_seed}.mp4"
@@ -177,6 +245,7 @@ def generate_i2v_video(input_image, prompt, height, width,
 # --- Gradio UI Layout ---
 with gr.Blocks() as demo:
     with gr.Column(elem_classes=["main-container"]):
         gr.Markdown("# ⚡ FusionX Enhanced Wan 2.1 Video Suite")
         with gr.Tabs(elem_classes=["gr-tabs"]):
@@ -203,9 +272,12 @@ with gr.Blocks() as demo:
                             i2v_neg_prompt = gr.Textbox(label="❌ Negative Prompt", value=default_negative_prompt, lines=4)
                             i2v_seed = gr.Slider(label="🎲 Seed", minimum=0, maximum=MAX_SEED, step=1, value=42, interactive=True)
                             i2v_rand_seed = gr.Checkbox(label="🔀 Randomize seed", value=True, interactive=True)
                             with gr.Row():
                                 i2v_height = gr.Slider(minimum=SLIDER_MIN_H, maximum=SLIDER_MAX_H, step=MOD_VALUE, value=DEFAULT_H_SLIDER_VALUE, label=f"📏 Height ({MOD_VALUE}px steps)")
                                 i2v_width = gr.Slider(minimum=SLIDER_MIN_W, maximum=SLIDER_MAX_W, step=MOD_VALUE, value=DEFAULT_W_SLIDER_VALUE, label=f"📐 Width ({MOD_VALUE}px steps)")
                             i2v_steps = gr.Slider(minimum=1, maximum=20, step=1, value=8, label="🚀 Inference Steps", info="8-10 recommended for great results.")
                             i2v_guidance = gr.Slider(minimum=0.0, maximum=20.0, step=0.5, value=1.0, label="🎯 Guidance Scale", visible=False)
@@ -222,18 +294,28 @@ with gr.Blocks() as demo:
     i2v_input_image.upload(
         fn=handle_image_upload_for_dims_wan,
         inputs=[i2v_input_image],
-        outputs=[i2v_height, i2v_width]
     )
     i2v_input_image.clear(
-        fn=lambda: (DEFAULT_H_SLIDER_VALUE, DEFAULT_W_SLIDER_VALUE),
         inputs=[],
-        outputs=[i2v_height, i2v_width]
     )
     i2v_generate_btn.click(
         fn=generate_i2v_video,
-        inputs=[i2v_input_image, i2v_prompt, i2v_height, i2v_width, i2v_neg_prompt, i2v_duration, i2v_guidance, i2v_steps, i2v_seed, i2v_rand_seed],
         outputs=[i2v_output_video, i2v_seed, i2v_download]
     )

 import re
 import os
 import traceback
+from huggingface_hub import list_repo_files
 from huggingface_hub import hf_hub_download
 import numpy as np
 from PIL import Image
 I2V_FUSIONX_REPO_ID = "vrgamedevgirl84/Wan14BT2VFusioniX"
 I2V_FUSIONX_FILENAME = "Wan14Bi2vFusioniX.safetensors"
+# --- I2V LoRA Configuration ---
+I2V_LORA_REPO_ID = "DeepBeepMeep/Wan2.1"
+I2V_LORA_SUBFOLDER = "loras_i2v"
 # --- Load Pipelines ---
 print("🚀 Loading I2V pipeline from single file...")
 i2v_pipe = None
     print(f"❌ Critical Error: Failed to load I2V pipeline from single file.")
     traceback.print_exc()
+# --- LoRA Discovery ---
+def get_available_loras(repo_id, subfolder):
+    """Fetches the list of available LoRA files from a Hugging Face Hub repo subfolder."""
+    try:
+        files = list_repo_files(repo_id=repo_id, repo_type='model', subfolder=subfolder)
+        # Filter for .safetensors and get just the filename
+        safetensors_files = [f.split('/')[-1] for f in files if f.endswith('.safetensors')]
+        print(f"✅ Discovered {len(safetensors_files)} LoRAs in {repo_id}/{subfolder}")
+        return ["None"] + sorted(safetensors_files)
+    except Exception as e:
+        print(f"⚠️ Warning: Could not fetch LoRAs from {repo_id}. LoRA selection will be disabled. Error: {e}")
+        return ["None"]
+available_i2v_loras = get_available_loras(I2V_LORA_REPO_ID, I2V_LORA_SUBFOLDER) if i2v_pipe else ["None"]
 # --- Constants and Configuration ---
+MOD_VALUE = 8
+DEFAULT_H_SLIDER_VALUE = 512
+DEFAULT_W_SLIDER_VALUE = 768
+NEW_FORMULA_MAX_AREA = 768.0 * 512.0
+SLIDER_MIN_H, SLIDER_MAX_H = 128, 896
+SLIDER_MIN_W, SLIDER_MAX_W = 128, 896
 MAX_SEED = np.iinfo(np.int32).max
 FIXED_FPS = 16
     sanitized = re.sub(r'[\s_-]+', '_', sanitized)
     return sanitized[:max_len]
+def update_linked_dimension(driving_value, other_value, aspect_ratio, mod_val, mode):
+    """Updates a dimension slider based on the other, maintaining aspect ratio."""
+    # aspect_ratio is stored as W/H
+    if aspect_ratio is None or aspect_ratio == 0:
+        return gr.update() # Do nothing if aspect ratio is not set
+    if mode == 'h_drives_w':
+        # new_w = h * (W/H)
+        new_other_value = driving_value * aspect_ratio
+    else: # 'w_drives_h'
+        # new_h = w / (W/H)
+        new_other_value = driving_value / aspect_ratio
+    # Round to the nearest multiple of mod_val
+    new_other_value = max(mod_val, (round(new_other_value / mod_val)) * mod_val)
+    # Return an update only if the value has changed to prevent infinite loops
+    return gr.update(value=new_other_value) if int(new_other_value) != int(other_value) else gr.update()
 def _calculate_new_dimensions_wan(pil_image, mod_val, calculation_max_area,
                                   min_slider_h, max_slider_h,
                                   min_slider_w, max_slider_w,
     return new_h, new_w
 def handle_image_upload_for_dims_wan(uploaded_pil_image):
+    default_aspect = DEFAULT_W_SLIDER_VALUE / DEFAULT_H_SLIDER_VALUE
     if uploaded_pil_image is None:
+        return gr.update(value=DEFAULT_H_SLIDER_VALUE), gr.update(value=DEFAULT_W_SLIDER_VALUE), default_aspect
     try:
+        # This function calculates initial slider positions based on a max area
         new_h, new_w = _calculate_new_dimensions_wan(
             uploaded_pil_image, MOD_VALUE, NEW_FORMULA_MAX_AREA,
             SLIDER_MIN_H, SLIDER_MAX_H, SLIDER_MIN_W, SLIDER_MAX_W,
             DEFAULT_H_SLIDER_VALUE, DEFAULT_W_SLIDER_VALUE
         )
+        # We need the original image's true aspect ratio (W/H) for locking the sliders
+        orig_w, orig_h = uploaded_pil_image.size
+        aspect_ratio = orig_w / orig_h if orig_h > 0 else default_aspect
+        return gr.update(value=new_h), gr.update(value=new_w), aspect_ratio
     except Exception as e:
         gr.Warning("Error calculating new dimensions. Resetting to default.")
+        return gr.update(value=DEFAULT_H_SLIDER_VALUE), gr.update(value=DEFAULT_W_SLIDER_VALUE), default_aspect
 # --- GPU Duration Estimators for @spaces.GPU ---
 def get_i2v_duration(steps, duration_seconds):
 @spaces.GPU(duration_from_args=get_i2v_duration)
 def generate_i2v_video(input_image, prompt, height, width,
                       negative_prompt, duration_seconds,
+                      guidance_scale, steps, seed, randomize_seed,
+                      lora_name, lora_weight,
                       progress=gr.Progress(track_tqdm=True)):
     """Generates a video from an initial image and a prompt."""
     if input_image is None:
         raise gr.Error("Please upload an input image for Image-to-Video generation.")
+    if i2v_pipe is None:
+        raise gr.Error("Image-to-Video pipeline is not available due to a loading error.")
     target_h = max(MOD_VALUE, (int(height) // MOD_VALUE) * MOD_VALUE)
     target_w = max(MOD_VALUE, (int(width) // MOD_VALUE) * MOD_VALUE)
     resized_image = input_image.resize((target_w, target_h))
     enhanced_prompt = f"{prompt}, cinematic quality, smooth motion, detailed animation, dynamic lighting"
+    adapter_name = "i2v_lora"
+    try:
+        # Dynamically load the selected LoRA
+        if lora_name and lora_name != "None":
+            print(f"🚀 Loading LoRA: {lora_name} with weight {lora_weight}")
+            i2v_pipe.load_lora_weights(
+                I2V_LORA_REPO_ID,
+                weight_name=lora_name,
+                adapter_name=adapter_name,
+                subfolder=I2V_LORA_SUBFOLDER
+            )
+            i2v_pipe.set_adapters([adapter_name], adapter_weights=[float(lora_weight)])
+        with torch.inference_mode():
+            output_frames_list = i2v_pipe(
+                image=resized_image,
+                prompt=enhanced_prompt,
+                negative_prompt=negative_prompt,
+                height=target_h,
+                width=target_w,
+                num_frames=num_frames,
+                guidance_scale=float(guidance_scale),
+                num_inference_steps=int(steps),
+                generator=torch.Generator(device="cuda").manual_seed(current_seed)
+            ).frames[0]
+    finally:
+        # Unload the LoRA to ensure a clean state for the next run
+        if lora_name and lora_name != "None" and hasattr(i2v_pipe, "unload_lora_weights"):
+            print(f"🧹 Unloading LoRA: {lora_name}")
+            i2v_pipe.unload_lora_weights()
+        # Clear GPU cache to free up memory for the next run
+        if torch.cuda.is_available():
+            torch.cuda.empty_cache()
     sanitized_prompt = sanitize_prompt_for_filename(prompt)
     filename = f"i2v_{sanitized_prompt}_{current_seed}.mp4"
 # --- Gradio UI Layout ---
 with gr.Blocks() as demo:
     with gr.Column(elem_classes=["main-container"]):
+        i2v_aspect_ratio = gr.State(value=DEFAULT_W_SLIDER_VALUE / DEFAULT_H_SLIDER_VALUE)
         gr.Markdown("# ⚡ FusionX Enhanced Wan 2.1 Video Suite")
         with gr.Tabs(elem_classes=["gr-tabs"]):
                             i2v_neg_prompt = gr.Textbox(label="❌ Negative Prompt", value=default_negative_prompt, lines=4)
                             i2v_seed = gr.Slider(label="🎲 Seed", minimum=0, maximum=MAX_SEED, step=1, value=42, interactive=True)
                             i2v_rand_seed = gr.Checkbox(label="🔀 Randomize seed", value=True, interactive=True)
+                            i2v_lora_name = gr.Dropdown(label="🎨 LoRA Style", choices=available_i2v_loras, value="None", info="Dynamically loaded from Hugging Face.", interactive=len(available_i2v_loras) > 1)
+                            i2v_lora_weight = gr.Slider(label="💪 LoRA Weight", minimum=0.0, maximum=2.0, step=0.1, value=0.8, interactive=True)
                             with gr.Row():
                                 i2v_height = gr.Slider(minimum=SLIDER_MIN_H, maximum=SLIDER_MAX_H, step=MOD_VALUE, value=DEFAULT_H_SLIDER_VALUE, label=f"📏 Height ({MOD_VALUE}px steps)")
                                 i2v_width = gr.Slider(minimum=SLIDER_MIN_W, maximum=SLIDER_MAX_W, step=MOD_VALUE, value=DEFAULT_W_SLIDER_VALUE, label=f"📐 Width ({MOD_VALUE}px steps)")
+                            gr.Markdown("<p style='color: #ffcc00; font-size: 0.9em;'>⚠️ High resolutions can lead to out-of-memory errors. If generation fails, try a smaller size.</p>")
                             i2v_steps = gr.Slider(minimum=1, maximum=20, step=1, value=8, label="🚀 Inference Steps", info="8-10 recommended for great results.")
                             i2v_guidance = gr.Slider(minimum=0.0, maximum=20.0, step=0.5, value=1.0, label="🎯 Guidance Scale", visible=False)
     i2v_input_image.upload(
         fn=handle_image_upload_for_dims_wan,
         inputs=[i2v_input_image],
+        outputs=[i2v_height, i2v_width, i2v_aspect_ratio]
     )
     i2v_input_image.clear(
+        fn=lambda: (DEFAULT_H_SLIDER_VALUE, DEFAULT_W_SLIDER_VALUE, DEFAULT_W_SLIDER_VALUE / DEFAULT_H_SLIDER_VALUE),
         inputs=[],
+        outputs=[i2v_height, i2v_width, i2v_aspect_ratio]
     )
     i2v_generate_btn.click(
         fn=generate_i2v_video,
+        inputs=[i2v_input_image, i2v_prompt, i2v_height, i2v_width, i2v_neg_prompt, i2v_duration, i2v_guidance, i2v_steps, i2v_seed, i2v_rand_seed, i2v_lora_name, i2v_lora_weight],
         outputs=[i2v_output_video, i2v_seed, i2v_download]
     )
+    i2v_height.release(
+        fn=update_linked_dimension,
+        inputs=[i2v_height, i2v_width, i2v_aspect_ratio, gr.State(MOD_VALUE), gr.State('h_drives_w')],
+        outputs=[i2v_width]
+    )
+    i2v_width.release(
+        fn=update_linked_dimension,
+        inputs=[i2v_width, i2v_height, i2v_aspect_ratio, gr.State(MOD_VALUE), gr.State('w_drives_h')],
+        outputs=[i2v_height]
+    )