Spaces:

multimodalart
/

wan2-1-fast

Running on Zero

App Files Files Community

multimodalart HF Staff commited on 24 days ago

Commit

5158fc3

verified ·

1 Parent(s): 9762ac2

Update app.py

Browse files

Files changed (1) hide show

app.py +158 -229

app.py CHANGED Viewed

@@ -1,13 +1,14 @@
 import torch
 from diffusers import AutoencoderKLWan, WanPipeline, UniPCMultistepScheduler
 from diffusers.utils import export_to_video
-from diffusers.loaders.lora_conversion_utils import _convert_non_diffusers_lora_to_diffusers # Keep this if it's the base
 import gradio as gr
 import tempfile
 import os
 import spaces
 from huggingface_hub import hf_hub_download
 import logging # For better logging
 # --- Global Model Loading & LoRA Handling ---
 MODEL_ID = "Wan-AI/Wan2.1-T2V-14B-Diffusers"
@@ -18,245 +19,175 @@ LORA_FILENAME = "Wan21_CausVid_14B_T2V_lora_rank32.safetensors"
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
-# This dictionary will store the manual patches extracted by the converter
-MANUAL_PATCHES_STORE = {}
 def _custom_convert_non_diffusers_wan_lora_to_diffusers(state_dict):
     global MANUAL_PATCHES_STORE
-    MANUAL_PATCHES_STORE = {} # Clear previous patches
-    peft_state_dict = {}
     unhandled_keys = []
-    original_keys = list(state_dict.keys())
-    processed_state_dict = {}
     for k, v in state_dict.items():
         if k.startswith("diffusion_model."):
-            processed_state_dict[k[len("diffusion_model."):]] = v
         elif k.startswith("difusion_model."): # Handle potential typo
-             processed_state_dict[k[len("difusion_model."):]] = v
         else:
-            unhandled_keys.append(k) # Will be logged later if not handled by diff/diff_b
-    block_indices = set()
-    for k_proc in processed_state_dict:
-        if k_proc.startswith("blocks."):
-            try:
-                block_idx_str = k_proc.split("blocks.")[1].split(".")[0]
-                if block_idx_str.isdigit():
-                    block_indices.add(int(block_idx_str))
-            except IndexError:
-                pass # Will be handled as a non-block key or logged
-    num_blocks = 0
-    if block_indices:
-        num_blocks = max(block_indices) + 1
-    is_i2v_lora = any("k_img" in k for k in processed_state_dict) and \
-                  any("v_img" in k for k in processed_state_dict)
-    handled_original_keys = set()
-    # --- Handle Block-level LoRAs & Diffs ---
-    for i in range(num_blocks):
-        # Self-attention (maps to attn1 in WanTransformerBlock)
-        for o_lora, c_diffusers in zip(["q", "k", "v", "o"], ["to_q", "to_k", "to_v", "to_out.0"]):
-            lora_down_key_proc = f"blocks.{i}.self_attn.{o_lora}.lora_down.weight"
-            lora_up_key_proc = f"blocks.{i}.self_attn.{o_lora}.lora_up.weight"
-            diff_b_key_proc = f"blocks.{i}.self_attn.{o_lora}.diff_b"
-            diff_w_key_proc = f"blocks.{i}.self_attn.{o_lora}.diff" # Assuming .diff for weight
-            if lora_down_key_proc in processed_state_dict and lora_up_key_proc in processed_state_dict:
-                peft_state_dict[f"transformer.blocks.{i}.attn1.{c_diffusers}.lora_A.weight"] = processed_state_dict[lora_down_key_proc]
-                peft_state_dict[f"transformer.blocks.{i}.attn1.{c_diffusers}.lora_B.weight"] = processed_state_dict[lora_up_key_proc]
-                handled_original_keys.add(f"diffusion_model.{lora_down_key_proc}")
-                handled_original_keys.add(f"diffusion_model.{lora_up_key_proc}")
-            if diff_b_key_proc in processed_state_dict:
-                target_bias_key = f"transformer.blocks.{i}.attn1.{c_diffusers}.bias"
-                MANUAL_PATCHES_STORE[target_bias_key] = ("diff_b", processed_state_dict[diff_b_key_proc])
-                handled_original_keys.add(f"diffusion_model.{diff_b_key_proc}")
-            if diff_w_key_proc in processed_state_dict:
-                target_weight_key = f"transformer.blocks.{i}.attn1.{c_diffusers}.weight"
-                MANUAL_PATCHES_STORE[target_weight_key] = ("diff", processed_state_dict[diff_w_key_proc])
-                handled_original_keys.add(f"diffusion_model.{diff_w_key_proc}")
-        # Cross-attention (maps to attn2 in WanTransformerBlock)
-        for o_lora, c_diffusers in zip(["q", "k", "v", "o"], ["to_q", "to_k", "to_v", "to_out.0"]):
-            lora_down_key_proc = f"blocks.{i}.cross_attn.{o_lora}.lora_down.weight"
-            lora_up_key_proc = f"blocks.{i}.cross_attn.{o_lora}.lora_up.weight"
-            diff_b_key_proc = f"blocks.{i}.cross_attn.{o_lora}.diff_b"
-            diff_w_key_proc = f"blocks.{i}.cross_attn.{o_lora}.diff"
-            norm_q_diff_key_proc = f"blocks.{i}.cross_attn.norm_q.diff" # specific norm diff
-            norm_k_diff_key_proc = f"blocks.{i}.cross_attn.norm_k.diff" # specific norm diff
-            if lora_down_key_proc in processed_state_dict and lora_up_key_proc in processed_state_dict:
-                peft_state_dict[f"transformer.blocks.{i}.attn2.{c_diffusers}.lora_A.weight"] = processed_state_dict[lora_down_key_proc]
-                peft_state_dict[f"transformer.blocks.{i}.attn2.{c_diffusers}.lora_B.weight"] = processed_state_dict[lora_up_key_proc]
-                handled_original_keys.add(f"diffusion_model.{lora_down_key_proc}")
-                handled_original_keys.add(f"diffusion_model.{lora_up_key_proc}")
-            if diff_b_key_proc in processed_state_dict:
-                target_bias_key = f"transformer.blocks.{i}.attn2.{c_diffusers}.bias"
-                MANUAL_PATCHES_STORE[target_bias_key] = ("diff_b", processed_state_dict[diff_b_key_proc])
-                handled_original_keys.add(f"diffusion_model.{diff_b_key_proc}")
-            if diff_w_key_proc in processed_state_dict:
-                target_weight_key = f"transformer.blocks.{i}.attn2.{c_diffusers}.weight"
-                MANUAL_PATCHES_STORE[target_weight_key] = ("diff", processed_state_dict[diff_w_key_proc])
-                handled_original_keys.add(f"diffusion_model.{diff_w_key_proc}")
-            if norm_q_diff_key_proc in processed_state_dict: # Assuming norm_q on q_proj
-                MANUAL_PATCHES_STORE[f"transformer.blocks.{i}.attn2.norm_q.weight"] = ("diff", processed_state_dict[norm_q_diff_key_proc])
-                handled_original_keys.add(f"diffusion_model.{norm_q_diff_key_proc}")
-            if norm_k_diff_key_proc in processed_state_dict: # Assuming norm_k on k_proj
-                MANUAL_PATCHES_STORE[f"transformer.blocks.{i}.attn2.norm_k.weight"] = ("diff", processed_state_dict[norm_k_diff_key_proc])
-                handled_original_keys.add(f"diffusion_model.{norm_k_diff_key_proc}")
-        if is_i2v_lora:
-            for o_lora, c_diffusers in zip(["k_img", "v_img"], ["add_k_proj", "add_v_proj"]):
-                lora_down_key_proc = f"blocks.{i}.cross_attn.{o_lora}.lora_down.weight"
-                lora_up_key_proc = f"blocks.{i}.cross_attn.{o_lora}.lora_up.weight"
-                diff_b_key_proc = f"blocks.{i}.cross_attn.{o_lora}.diff_b"
-                diff_w_key_proc = f"blocks.{i}.cross_attn.{o_lora}.diff"
-                if lora_down_key_proc in processed_state_dict and lora_up_key_proc in processed_state_dict:
-                    peft_state_dict[f"transformer.blocks.{i}.attn2.{c_diffusers}.lora_A.weight"] = processed_state_dict[lora_down_key_proc]
-                    peft_state_dict[f"transformer.blocks.{i}.attn2.{c_diffusers}.lora_B.weight"] = processed_state_dict[lora_up_key_proc]
-                    handled_original_keys.add(f"diffusion_model.{lora_down_key_proc}")
-                    handled_original_keys.add(f"diffusion_model.{lora_up_key_proc}")
-                if diff_b_key_proc in processed_state_dict:
-                    target_bias_key = f"transformer.blocks.{i}.attn2.{c_diffusers}.bias"
-                    MANUAL_PATCHES_STORE[target_bias_key] = ("diff_b", processed_state_dict[diff_b_key_proc])
-                    handled_original_keys.add(f"diffusion_model.{diff_b_key_proc}")
-                if diff_w_key_proc in processed_state_dict:
-                    target_weight_key = f"transformer.blocks.{i}.attn2.{c_diffusers}.weight"
-                    MANUAL_PATCHES_STORE[target_weight_key] = ("diff", processed_state_dict[diff_w_key_proc])
-                    handled_original_keys.add(f"diffusion_model.{diff_w_key_proc}")
-        # FFN
-        for o_lora_suffix, c_diffusers_path in zip([".0", ".2"], ["net.0.proj", "net.2"]):
-            lora_down_key_proc = f"blocks.{i}.ffn{o_lora_suffix}.lora_down.weight"
-            lora_up_key_proc = f"blocks.{i}.ffn{o_lora_suffix}.lora_up.weight"
-            diff_b_key_proc = f"blocks.{i}.ffn{o_lora_suffix}.diff_b"
-            diff_w_key_proc = f"blocks.{i}.ffn{o_lora_suffix}.diff" # Assuming .diff for weight
-            if lora_down_key_proc in processed_state_dict and lora_up_key_proc in processed_state_dict:
-                peft_state_dict[f"transformer.blocks.{i}.ffn.{c_diffusers_path}.lora_A.weight"] = processed_state_dict[lora_down_key_proc]
-                peft_state_dict[f"transformer.blocks.{i}.ffn.{c_diffusers_path}.lora_B.weight"] = processed_state_dict[lora_up_key_proc]
-                handled_original_keys.add(f"diffusion_model.{lora_down_key_proc}")
-                handled_original_keys.add(f"diffusion_model.{lora_up_key_proc}")
-            if diff_b_key_proc in processed_state_dict:
-                target_bias_key = f"transformer.blocks.{i}.ffn.{c_diffusers_path}.bias"
-                MANUAL_PATCHES_STORE[target_bias_key] = ("diff_b", processed_state_dict[diff_b_key_proc])
-                handled_original_keys.add(f"diffusion_model.{diff_b_key_proc}")
-            if diff_w_key_proc in processed_state_dict:
-                target_weight_key = f"transformer.blocks.{i}.ffn.{c_diffusers_path}.weight"
-                MANUAL_PATCHES_STORE[target_weight_key] = ("diff", processed_state_dict[diff_w_key_proc])
-                handled_original_keys.add(f"diffusion_model.{diff_w_key_proc}")
-        # Block norm3 diffs (assuming norm3 applies to the output of the FFN in the original Wan block structure)
-        norm3_diff_key_proc = f"blocks.{i}.norm3.diff"
-        norm3_diff_b_key_proc = f"blocks.{i}.norm3.diff_b"
-        if norm3_diff_key_proc in processed_state_dict:
-            MANUAL_PATCHES_STORE[f"transformer.blocks.{i}.norm3.weight"] = ("diff", processed_state_dict[norm3_diff_key_proc]) # Norms usually have .weight
-            handled_original_keys.add(f"diffusion_model.{norm3_diff_key_proc}")
-        if norm3_diff_b_key_proc in processed_state_dict:
-            MANUAL_PATCHES_STORE[f"transformer.blocks.{i}.norm3.bias"] = ("diff_b", processed_state_dict[norm3_diff_b_key_proc]) # And .bias
-            handled_original_keys.add(f"diffusion_model.{norm3_diff_b_key_proc}")
-    # --- Handle Top-level LoRAs & Diffs ---
-    top_level_mappings = [
-        # (lora_base_path_proc, diffusers_base_path, lora_suffixes, diffusers_suffixes)
-        ("text_embedding", "transformer.condition_embedder.text_embedder", ["0", "2"], ["linear_1", "linear_2"]),
-        ("time_embedding", "transformer.condition_embedder.time_embedder", ["0", "2"], ["linear_1", "linear_2"]),
-        ("time_projection", "transformer.condition_embedder.time_proj", ["1"], [""]), # Wan has .1, Diffusers has no suffix
-        ("head", "transformer.proj_out", ["head"], [""]), # Wan has .head, Diffusers has no suffix
-    ]
-    for lora_base_proc, diffusers_base, lora_suffixes, diffusers_suffixes in top_level_mappings:
-        for l_suffix, d_suffix in zip(lora_suffixes, diffusers_suffixes):
-            actual_lora_path_proc = f"{lora_base_proc}.{l_suffix}" if l_suffix else lora_base_proc
-            actual_diffusers_path = f"{diffusers_base}.{d_suffix}" if d_suffix else diffusers_base
-            lora_down_key_proc = f"{actual_lora_path_proc}.lora_down.weight"
-            lora_up_key_proc = f"{actual_lora_path_proc}.lora_up.weight"
-            diff_b_key_proc = f"{actual_lora_path_proc}.diff_b"
-            diff_w_key_proc = f"{actual_lora_path_proc}.diff"
-            if lora_down_key_proc in processed_state_dict and lora_up_key_proc in processed_state_dict:
-                peft_state_dict[f"{actual_diffusers_path}.lora_A.weight"] = processed_state_dict[lora_down_key_proc]
-                peft_state_dict[f"{actual_diffusers_path}.lora_B.weight"] = processed_state_dict[lora_up_key_proc]
-                handled_original_keys.add(f"diffusion_model.{lora_down_key_proc}")
-                handled_original_keys.add(f"diffusion_model.{lora_up_key_proc}")
-            if diff_b_key_proc in processed_state_dict:
-                MANUAL_PATCHES_STORE[f"{actual_diffusers_path}.bias"] = ("diff_b", processed_state_dict[diff_b_key_proc])
-                handled_original_keys.add(f"diffusion_model.{diff_b_key_proc}")
-            if diff_w_key_proc in processed_state_dict:
-                MANUAL_PATCHES_STORE[f"{actual_diffusers_path}.weight"] = ("diff", processed_state_dict[diff_w_key_proc])
-                handled_original_keys.add(f"diffusion_model.{diff_w_key_proc}")
-    # Patch Embedding
-    patch_emb_diff_b_key = "patch_embedding.diff_b"
-    if patch_emb_diff_b_key in processed_state_dict:
-        MANUAL_PATCHES_STORE["transformer.patch_embedding.bias"] = ("diff_b", processed_state_dict[patch_emb_diff_b_key])
-        handled_original_keys.add(f"diffusion_model.{patch_emb_diff_b_key}")
-    # Assuming .diff might exist for patch_embedding.weight, though not explicitly in your example list
-    patch_emb_diff_w_key = "patch_embedding.diff"
-    if patch_emb_diff_w_key in processed_state_dict:
-        MANUAL_PATCHES_STORE["transformer.patch_embedding.weight"] = ("diff", processed_state_dict[patch_emb_diff_w_key])
-        handled_original_keys.add(f"diffusion_model.{patch_emb_diff_w_key}")
-    # Log unhandled keys
-    final_unhandled_keys = []
-    for k_orig in original_keys:
-        # Reconstruct the processed key to check if it was actually handled by diff/diff_b or lora A/B logic
-        k_proc = None
-        if k_orig.startswith("diffusion_model."):
-            k_proc = k_orig[len("diffusion_model."):]
-        elif k_orig.startswith("difusion_model."):
-            k_proc = k_orig[len("difusion_model."):]
-        if k_orig not in handled_original_keys and (k_proc is None or not any(k_proc.endswith(s) for s in [".lora_down.weight", ".lora_up.weight", ".diff", ".diff_b", ".alpha"])):
-            final_unhandled_keys.append(k_orig)
-    if final_unhandled_keys:
-        logger.warning(
-            f"The following keys from the Wan 2.1 LoRA checkpoint were not converted to PEFT LoRA A/B format "
-            f"nor identified as manual diff patches: {final_unhandled_keys}."
-        )
-    if not peft_state_dict and not MANUAL_PATCHES_STORE:
-        logger.warning("No valid LoRA A/B weights or manual diff patches found after conversion.")
-    return peft_state_dict
-def apply_manual_diff_patches(pipe_model_component, patches_store, strength_model=1.0):
-    if not patches_store:
-        logger.info("No manual diff patches to apply.")
         return
-    logger.info(f"Applying {len(patches_store)} manual diff patches...")
-    for target_key, (patch_type, diff_tensor) in patches_store.items():
         try:
-            module_path, param_name = target_key.rsplit('.', 1)
-            module = pipe_model_component.get_submodule(module_path)
-            original_param = getattr(module, param_name)
             if original_param.shape != diff_tensor.shape:
-                logger.warning(f"Shape mismatch for {target_key}: model {original_param.shape}, LoRA {diff_tensor.shape}. Skipping patch.")
                 continue
             with torch.no_grad():
-                # Ensure diff_tensor is on the same device and dtype as the original parameter
-                diff_tensor_casted = diff_tensor.to(device=original_param.device, dtype=original_param.dtype)
-                scaled_diff = diff_tensor_casted * strength_model
-                original_param.add_(scaled_diff)
-            # logger.info(f"Applied {patch_type} to {target_key} with strength {strength_model}")
         except AttributeError:
-            logger.warning(f"Could not find parameter {target_key} in the model component. Skipping patch.")
         except Exception as e:
-            logger.error(f"Error applying patch to {target_key}: {e}")
-    logger.info("Finished applying manual diff patches.")
 # --- Model Loading ---
 logger.info(f"Loading VAE for {MODEL_ID}...")
@@ -271,7 +202,7 @@ pipe = WanPipeline.from_pretrained(
     vae=vae,
     torch_dtype=torch.bfloat16 # bfloat16 for pipeline
 )
-flow_shift = 8.0  # 5.0 for 720P, 3.0 for 480P
 pipe.scheduler = UniPCMultistepScheduler.from_config(
     pipe.scheduler.config, flow_shift=flow_shift
 )
@@ -284,14 +215,13 @@ causvid_path = hf_hub_download(repo_id=LORA_REPO_ID, filename=LORA_FILENAME)
 logger.info("Loading LoRA weights with custom converter...")
-# lora_state_dict_raw = WanPipeline.lora_state_dict(causvid_path) # This might already do some conversion
-# Alternative: Load raw state_dict and then convert
 from safetensors.torch import load_file as load_safetensors
 raw_lora_state_dict = load_safetensors(causvid_path)
 peft_state_dict = _custom_convert_non_diffusers_wan_lora_to_diffusers(raw_lora_state_dict)
 if peft_state_dict:
     pipe.load_lora_weights(
         peft_state_dict,
@@ -301,8 +231,8 @@ if peft_state_dict:
 else:
     logger.warning("No PEFT-compatible LoRA weights found after conversion.")
-lora_strength = 1.0
-apply_manual_diff_patches(pipe.transformer, MANUAL_PATCHES_STORE, strength_model=lora_strength)
 logger.info("Manual diff_b/diff patches applied.")
@@ -334,7 +264,7 @@ def generate_video(prompt, negative_prompt, height, width, num_frames, guidance_
     with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmpfile:
         video_path = tmpfile.name
     export_to_video(output_frames_list, video_path, fps=fps)
     logger.info(f"Video successfully generated and saved to {video_path}")
     return video_path
@@ -350,7 +280,6 @@ with gr.Blocks() as demo:
     Model is loaded into memory when the app starts. This might take a few minutes.
     Ensure you have a GPU with sufficient VRAM (e.g., ~24GB+ for these default settings).
     """)
-    # ... (rest of your Gradio UI definition remains the same) ...
     with gr.Row():
         with gr.Column(scale=2):
             prompt_input = gr.Textbox(label="Prompt", value=default_prompt, lines=3)
@@ -363,7 +292,7 @@ with gr.Blocks() as demo:
                 height_input = gr.Slider(minimum=256, maximum=768, step=64, value=480, label="Height (multiple of 8)")
                 width_input = gr.Slider(minimum=256, maximum=1024, step=64, value=832, label="Width (multiple of 8)")
             with gr.Row():
-                num_frames_input = gr.Slider(minimum=16, maximum=100, step=1, value=25, label="Number of Frames")
                 fps_input = gr.Slider(minimum=5, maximum=30, step=1, value=15, label="Output FPS")
             steps = gr.Slider(minimum=1.0, maximum=30.0, value=4.0, label="Steps")
             guidance_scale_input = gr.Slider(minimum=1.0, maximum=20.0, step=0.5, value=1.0, label="Guidance Scale")

 import torch
 from diffusers import AutoencoderKLWan, WanPipeline, UniPCMultistepScheduler
 from diffusers.utils import export_to_video
+from diffusers.loaders.lora_conversion_utils import _convert_non_diffusers_wan_lora_to_diffusers # Keep this if it's the base for standard LoRA parts
 import gradio as gr
 import tempfile
 import os
 import spaces
 from huggingface_hub import hf_hub_download
 import logging # For better logging
+import re # For key manipulation
 # --- Global Model Loading & LoRA Handling ---
 MODEL_ID = "Wan-AI/Wan2.1-T2V-14B-Diffusers"
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
+MANUAL_PATCHES_STORE = {"diff": {}, "diff_b": {}}
 def _custom_convert_non_diffusers_wan_lora_to_diffusers(state_dict):
     global MANUAL_PATCHES_STORE
+    MANUAL_PATCHES_STORE = {"diff": {}, "diff_b": {}} # Reset for each conversion
+    peft_compatible_state_dict = {}
     unhandled_keys = []
+    original_keys_map_to_diffusers = {}
+    # Mapping based on ComfyUI's WanModel structure and PeftAdapterMixin logic
+    # This needs to map the original LoRA key naming to Diffusers' expected PEFT keys
+    # diffusion_model.blocks.0.self_attn.q.lora_down.weight -> transformer.blocks.0.attn1.to_q.lora_A.weight
+    # diffusion_model.blocks.0.ffn.0.lora_down.weight -> transformer.blocks.0.ffn.net.0.proj.lora_A.weight
+    # diffusion_model.text_embedding.0.lora_down.weight -> transformer.condition_embedder.text_embedder.linear_1.lora_A.weight (example)
+    # Strip "diffusion_model." and map
     for k, v in state_dict.items():
+        original_k = k # Keep for logging/debugging
         if k.startswith("diffusion_model."):
+            k_stripped = k[len("diffusion_model."):]
         elif k.startswith("difusion_model."): # Handle potential typo
+            k_stripped = k[len("difusion_model."):]
+            logger.warning(f"Key '{original_k}' starts with 'difusion_model.' (potential typo), processing as 'diffusion_model.'.")
         else:
+            unhandled_keys.append(original_k)
+            continue
+        # Handle .diff and .diff_b keys by storing them separately
+        if k_stripped.endswith(".diff"):
+            target_model_key = k_stripped[:-len(".diff")] + ".weight"
+            MANUAL_PATCHES_STORE["diff"][target_model_key] = v
+            continue
+        elif k_stripped.endswith(".diff_b"):
+            target_model_key = k_stripped[:-len(".diff_b")] + ".bias"
+            MANUAL_PATCHES_STORE["diff_b"][target_model_key] = v
+            continue
+        # Handle standard LoRA A/B matrices
+        if ".lora_down.weight" in k_stripped:
+            diffusers_key_base = k_stripped.replace(".lora_down.weight", "")
+            # Apply transformations similar to _convert_non_diffusers_wan_lora_to_diffusers from diffusers
+            # but adapt to the PEFT naming convention (lora_A/lora_B)
+            # This part needs careful mapping based on WanTransformer3DModel structure
+            # Example mappings (these need to be comprehensive for all layers)
+            if diffusers_key_base.startswith("blocks."):
+                parts = diffusers_key_base.split(".")
+                block_idx = parts[1]
+                attn_type = parts[2] # self_attn or cross_attn
+                proj_type = parts[3] # q, k, v, o
+                if attn_type == "self_attn":
+                    diffusers_peft_key = f"transformer.blocks.{block_idx}.attn1.to_{proj_type}.lora_A.weight"
+                elif attn_type == "cross_attn":
+                    # WanTransformer3DModel uses attn2 for cross-attention like features
+                    diffusers_peft_key = f"transformer.blocks.{block_idx}.attn2.to_{proj_type}.lora_A.weight"
+                else: # ffn
+                    ffn_idx = proj_type # "0" or "2"
+                    diffusers_peft_key = f"transformer.blocks.{block_idx}.ffn.net.{ffn_idx}.proj.lora_A.weight"
+            elif diffusers_key_base.startswith("text_embedding."):
+                idx_map = {"0": "linear_1", "2": "linear_2"}
+                idx = diffusers_key_base.split(".")[1]
+                diffusers_peft_key = f"transformer.condition_embedder.text_embedder.{idx_map[idx]}.lora_A.weight"
+            elif diffusers_key_base.startswith("time_embedding."):
+                idx_map = {"0": "linear_1", "2": "linear_2"}
+                idx = diffusers_key_base.split(".")[1]
+                diffusers_peft_key = f"transformer.condition_embedder.time_embedder.{idx_map[idx]}.lora_A.weight"
+            elif diffusers_key_base.startswith("time_projection."): # Assuming '1' from your example
+                diffusers_peft_key = f"transformer.condition_embedder.time_proj.lora_A.weight"
+            elif diffusers_key_base.startswith("patch_embedding"):
+                # WanTransformer3DModel has 'patch_embedding' at the top level
+                diffusers_peft_key = f"transformer.patch_embedding.lora_A.weight" # This needs to match how PEFT would name it
+            elif diffusers_key_base.startswith("head.head"):
+                 diffusers_peft_key = f"transformer.proj_out.lora_A.weight"
+            else:
+                unhandled_keys.append(original_k)
+                continue
+            peft_compatible_state_dict[diffusers_peft_key] = v
+            original_keys_map_to_diffusers[k_stripped] = diffusers_peft_key
+        elif ".lora_up.weight" in k_stripped:
+            # Find the corresponding lora_down key to determine the base name
+            down_key_stripped = k_stripped.replace(".lora_up.weight", ".lora_down.weight")
+            if down_key_stripped in original_keys_map_to_diffusers:
+                diffusers_peft_key_A = original_keys_map_to_diffusers[down_key_stripped]
+                diffusers_peft_key_B = diffusers_peft_key_A.replace(".lora_A.weight", ".lora_B.weight")
+                peft_compatible_state_dict[diffusers_peft_key_B] = v
+            else:
+                unhandled_keys.append(original_k)
+        elif not (k_stripped.endswith(".alpha") or k_stripped.endswith(".dora_scale")): # Alphas are handled by PEFT if lora_A/B present
+            unhandled_keys.append(original_k)
+    if unhandled_keys:
+        logger.warning(f"Custom Wan LoRA Converter: Unhandled keys: {unhandled_keys}")
+    return peft_compatible_state_dict
+def apply_manual_diff_patches(pipe_model, patches_store, lora_strength=1.0):
+    if not hasattr(pipe_model, "transformer"):
+        logger.error("Pipeline model does not have a 'transformer' attribute to patch.")
         return
+    transformer = pipe_model.transformer
+    changed_params_count = 0
+    for key_base, diff_tensor in patches_store.get("diff", {}).items():
+        # key_base is like "blocks.0.self_attn.q.weight"
+        # We need to prepend "transformer." to match diffusers internal naming
+        target_key_full = f"transformer.{key_base}"
         try:
+            module_path_parts = target_key_full.split('.')
+            param_name = module_path_parts[-1]
+            module_path = ".".join(module_path_parts[:-1])
+            module = transformer
+            for part in module_path.split('.')[1:]: # Skip the first 'transformer'
+                module = getattr(module, part)
+            original_param = getattr(module, param_name)
             if original_param.shape != diff_tensor.shape:
+                logger.warning(f"Shape mismatch for diff patch on {target_key_full}: model {original_param.shape}, lora {diff_tensor.shape}. Skipping.")
                 continue
             with torch.no_grad():
+                scaled_diff = (lora_strength * diff_tensor.to(original_param.device, original_param.dtype))
+                original_param.data.add_(scaled_diff)
+                changed_params_count +=1
         except AttributeError:
+            logger.warning(f"Could not find parameter {target_key_full} in transformer to apply diff patch.")
         except Exception as e:
+            logger.error(f"Error applying diff patch to {target_key_full}: {e}")
+    for key_base, diff_b_tensor in patches_store.get("diff_b", {}).items():
+        # key_base is like "blocks.0.self_attn.q.bias"
+        target_key_full = f"transformer.{key_base}"
+        try:
+            module_path_parts = target_key_full.split('.')
+            param_name = module_path_parts[-1]
+            module_path = ".".join(module_path_parts[:-1])
+            module = transformer
+            for part in module_path.split('.')[1:]:
+                module = getattr(module, part)
+            original_param = getattr(module, param_name)
+            if original_param is None:
+                logger.warning(f"Bias parameter {target_key_full} is None in model. Skipping diff_b patch.")
+                continue
+            if original_param.shape != diff_b_tensor.shape:
+                logger.warning(f"Shape mismatch for diff_b patch on {target_key_full}: model {original_param.shape}, lora {diff_b_tensor.shape}. Skipping.")
+                continue
+            with torch.no_grad():
+                scaled_diff_b = (lora_strength * diff_b_tensor.to(original_param.device, original_param.dtype))
+                original_param.data.add_(scaled_diff_b)
+                changed_params_count +=1
+        except AttributeError:
+            logger.warning(f"Could not find parameter {target_key_full} in transformer to apply diff_b patch.")
+        except Exception as e:
+            logger.error(f"Error applying diff_b patch to {target_key_full}: {e}")
+    if changed_params_count > 0:
+        logger.info(f"Applied {changed_params_count} manual diff/diff_b patches.")
+    else:
+        logger.info("No manual diff/diff_b patches were applied.")
 # --- Model Loading ---
 logger.info(f"Loading VAE for {MODEL_ID}...")
     vae=vae,
     torch_dtype=torch.bfloat16 # bfloat16 for pipeline
 )
+flow_shift = 8.0
 pipe.scheduler = UniPCMultistepScheduler.from_config(
     pipe.scheduler.config, flow_shift=flow_shift
 )
 logger.info("Loading LoRA weights with custom converter...")
 from safetensors.torch import load_file as load_safetensors
 raw_lora_state_dict = load_safetensors(causvid_path)
+# Now call our custom converter which will populate MANUAL_PATCHES_STORE
 peft_state_dict = _custom_convert_non_diffusers_wan_lora_to_diffusers(raw_lora_state_dict)
+# Load the LoRA A/B matrices using PEFT
 if peft_state_dict:
     pipe.load_lora_weights(
         peft_state_dict,
 else:
     logger.warning("No PEFT-compatible LoRA weights found after conversion.")
+# Apply manual diff_b and diff patches
+apply_manual_diff_patches(pipe, MANUAL_PATCHES_STORE, lora_strength=1.0) # Assuming default strength 1.0
 logger.info("Manual diff_b/diff patches applied.")
     with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmpfile:
         video_path = tmpfile.name
     export_to_video(output_frames_list, video_path, fps=fps)
     logger.info(f"Video successfully generated and saved to {video_path}")
     return video_path
     Model is loaded into memory when the app starts. This might take a few minutes.
     Ensure you have a GPU with sufficient VRAM (e.g., ~24GB+ for these default settings).
     """)
     with gr.Row():
         with gr.Column(scale=2):
             prompt_input = gr.Textbox(label="Prompt", value=default_prompt, lines=3)
                 height_input = gr.Slider(minimum=256, maximum=768, step=64, value=480, label="Height (multiple of 8)")
                 width_input = gr.Slider(minimum=256, maximum=1024, step=64, value=832, label="Width (multiple of 8)")
             with gr.Row():
+                num_frames_input = gr.Slider(minimum=16, maximum=100, step=1, value=25, label="Number of Frames")
                 fps_input = gr.Slider(minimum=5, maximum=30, step=1, value=15, label="Output FPS")
             steps = gr.Slider(minimum=1.0, maximum=30.0, value=4.0, label="Steps")
             guidance_scale_input = gr.Slider(minimum=1.0, maximum=20.0, step=0.5, value=1.0, label="Guidance Scale")