thankfulcarp commited on
Commit
9530e57
Β·
1 Parent(s): 6d0f162

Awesome new Lora

Browse files
Files changed (1) hide show
  1. app.py +50 -57
app.py CHANGED
@@ -5,6 +5,8 @@ from diffusers.utils import export_to_video
5
  from transformers import CLIPVisionModel
6
  import gradio as gr
7
  import tempfile
 
 
8
 
9
  from huggingface_hub import hf_hub_download
10
  import numpy as np
@@ -12,15 +14,12 @@ from PIL import Image
12
  import random
13
 
14
  # Base MODEL_ID (using original Wan model that's compatible with diffusers)
15
- MODEL_ID = "Wan-AI/Wan2.1-I2V-14B-480P-Diffusers"
16
 
17
- # FusionX enhancement LoRAs (based on FusionX composition)
18
- LORA_REPO_ID = "Kijai/WanVideo_comfy"
19
- LORA_FILENAME = "Wan21_CausVid_14B_T2V_lora_rank32.safetensors"
20
-
21
- # Additional enhancement LoRAs for FusionX-like quality
22
- ACCVIDEO_LORA_REPO = "alibaba-pai/Wan2.1-Fun-Reward-LoRAs"
23
- MPS_LORA_FILENAME = "Wan2.1-Fun-14B-InP-MPS.safetensors"
24
 
25
  # Load enhanced model components
26
  print("πŸš€ Loading FusionX Enhanced Wan2.1 I2V Model...")
@@ -34,37 +33,18 @@ pipe = WanImageToVideoPipeline.from_pretrained(
34
  pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config, flow_shift=8.0)
35
  pipe.to("cuda")
36
 
37
- # Load FusionX enhancement LoRAs
38
- lora_adapters = []
39
- lora_weights = []
40
-
41
- try:
42
- # Load CausVid LoRA (strength 1.0 as per FusionX)
43
- causvid_path = hf_hub_download(repo_id=LORA_REPO_ID, filename=LORA_FILENAME)
44
- pipe.load_lora_weights(causvid_path, adapter_name="causvid_lora")
45
- lora_adapters.append("causvid_lora")
46
- lora_weights.append(1.0) # FusionX uses 1.0 for CausVid
47
- print("βœ… CausVid LoRA loaded (strength: 1.0)")
48
- except Exception as e:
49
- print(f"⚠️ CausVid LoRA not loaded: {e}")
50
-
51
  try:
52
- # Load MPS Rewards LoRA (strength 0.7 as per FusionX)
53
- mps_path = hf_hub_download(repo_id=ACCVIDEO_LORA_REPO, filename=MPS_LORA_FILENAME)
54
- pipe.load_lora_weights(mps_path, adapter_name="mps_lora")
55
- lora_adapters.append("mps_lora")
56
- lora_weights.append(0.7) # FusionX uses 0.7 for MPS
57
- print("βœ… MPS Rewards LoRA loaded (strength: 0.7)")
 
58
  except Exception as e:
59
- print(f"⚠️ MPS LoRA not loaded: {e}")
60
-
61
- # Apply LoRA adapters if any were loaded
62
- if lora_adapters:
63
- pipe.set_adapters(lora_adapters, adapter_weights=lora_weights)
64
- pipe.fuse_lora()
65
- print(f"πŸ”₯ FusionX Enhancement Applied: {len(lora_adapters)} LoRAs fused")
66
- else:
67
- print("πŸ“ No LoRAs loaded - using base Wan model")
68
 
69
  MOD_VALUE = 32
70
  DEFAULT_H_SLIDER_VALUE = 576 # FusionX optimized default
@@ -288,6 +268,17 @@ input[type="checkbox"] {
288
  }
289
  """
290
 
 
 
 
 
 
 
 
 
 
 
 
291
  def _calculate_new_dimensions_wan(pil_image, mod_val, calculation_max_area,
292
  min_slider_h, max_slider_h,
293
  min_slider_w, max_slider_w,
@@ -325,7 +316,7 @@ def handle_image_upload_for_dims_wan(uploaded_pil_image, current_h_val, current_
325
 
326
  def get_duration(input_image, prompt, height, width,
327
  negative_prompt, duration_seconds,
328
- guidance_scale, steps,
329
  seed, randomize_seed,
330
  progress):
331
  # FusionX optimized duration calculation
@@ -339,7 +330,7 @@ def get_duration(input_image, prompt, height, width,
339
  @spaces.GPU(duration=get_duration)
340
  def generate_video(input_image, prompt, height, width,
341
  negative_prompt=default_negative_prompt, duration_seconds=3,
342
- guidance_scale=1, steps=8, # FusionX optimized default
343
  seed=42, randomize_seed=False,
344
  progress=gr.Progress(track_tqdm=True)):
345
 
@@ -368,11 +359,17 @@ def generate_video(input_image, prompt, height, width,
368
  num_frames=num_frames,
369
  guidance_scale=float(guidance_scale),
370
  num_inference_steps=int(steps),
371
- generator=torch.Generator(device="cuda").manual_seed(current_seed)
 
372
  ).frames[0]
373
 
374
- with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmpfile:
375
- video_path = tmpfile.name
 
 
 
 
 
376
  export_to_video(output_frames_list, video_path, fps=FIXED_FPS)
377
  return video_path, current_seed
378
 
@@ -439,6 +436,14 @@ with gr.Blocks() as demo:
439
  value=DEFAULT_W_SLIDER_VALUE,
440
  label=f"πŸ“ Output Width (FusionX optimized: {MOD_VALUE} multiples)"
441
  )
 
 
 
 
 
 
 
 
442
  steps_slider = gr.Slider(
443
  minimum=1,
444
  maximum=20,
@@ -466,7 +471,8 @@ with gr.Blocks() as demo:
466
  video_output = gr.Video(
467
  label="πŸŽ₯ FusionX Enhanced Generated Video",
468
  autoplay=True,
469
- interactive=False
 
470
  )
471
 
472
  input_image_component.upload(
@@ -484,23 +490,10 @@ with gr.Blocks() as demo:
484
  ui_inputs = [
485
  input_image_component, prompt_input, height_input, width_input,
486
  negative_prompt_input, duration_seconds_input,
487
- guidance_scale_input, steps_slider, seed_input, randomize_seed_checkbox
488
  ]
489
  generate_button.click(fn=generate_video, inputs=ui_inputs, outputs=[video_output, seed_input])
490
 
491
- with gr.Column():
492
- gr.Examples(
493
- examples=[
494
- ["peng.png", "a penguin gracefully dancing in the pristine snow, cinematic motion with detailed feathers", 576, 576],
495
- ["frog.jpg", "the frog jumps energetically with smooth, lifelike motion and detailed texture", 576, 576],
496
- ],
497
- inputs=[input_image_component, prompt_input, height_input, width_input],
498
- outputs=[video_output, seed_input],
499
- fn=generate_video,
500
- cache_examples="lazy",
501
- label="🌟 FusionX Enhanced Example Gallery"
502
- )
503
-
504
 
505
  if __name__ == "__main__":
506
  demo.queue().launch()
 
5
  from transformers import CLIPVisionModel
6
  import gradio as gr
7
  import tempfile
8
+ import re
9
+ import os
10
 
11
  from huggingface_hub import hf_hub_download
12
  import numpy as np
 
14
  import random
15
 
16
  # Base MODEL_ID (using original Wan model that's compatible with diffusers)
17
+ MODEL_ID = "Wan-AI/Wan2.1-I2V-14B-720P-Diffusers"
18
 
19
+ # Merged FusionX enhancement LoRA
20
+ LORA_REPO_ID = "vrgamedevgirl84/Wan14BT2VFusioniX"
21
+ LORA_FILENAME = "Wan2.1_I2V_14B_FusionX_LoRA.safetensors"
22
+ LORA_SUBFOLDER = "FusionX_LoRa"
 
 
 
23
 
24
  # Load enhanced model components
25
  print("πŸš€ Loading FusionX Enhanced Wan2.1 I2V Model...")
 
33
  pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config, flow_shift=8.0)
34
  pipe.to("cuda")
35
 
36
+ # Load and fuse the single merged FusionX LoRA
 
 
 
 
 
 
 
 
 
 
 
 
 
37
  try:
38
+ lora_path = hf_hub_download(
39
+ repo_id=LORA_REPO_ID,
40
+ filename=LORA_FILENAME,
41
+ subfolder=LORA_SUBFOLDER
42
+ )
43
+ pipe.load_lora_weights(lora_path, adapter_name="fusionx")
44
+ print("βœ… Merged FusionX LoRA loaded. Use the 'LoRA Strength' slider to control the effect.")
45
  except Exception as e:
46
+ print(f"⚠️ Merged FusionX LoRA not loaded: {e}")
47
+ print("πŸ“ Using base Wan model without LoRA enhancement. The 'LoRA Strength' slider will have no effect.")
 
 
 
 
 
 
 
48
 
49
  MOD_VALUE = 32
50
  DEFAULT_H_SLIDER_VALUE = 576 # FusionX optimized default
 
268
  }
269
  """
270
 
271
+ def sanitize_prompt_for_filename(prompt: str, max_len: int = 60) -> str:
272
+ """Sanitizes a prompt string to be used as a valid filename."""
273
+ if not prompt:
274
+ prompt = "video"
275
+ # Remove non-alphanumeric characters (except spaces, hyphens, underscores)
276
+ sanitized = re.sub(r'[^\w\s_-]', '', prompt).strip()
277
+ # Replace spaces and multiple hyphens/underscores with a single underscore
278
+ sanitized = re.sub(r'[\s_-]+', '_', sanitized)
279
+ # Truncate to max_len
280
+ return sanitized[:max_len]
281
+
282
  def _calculate_new_dimensions_wan(pil_image, mod_val, calculation_max_area,
283
  min_slider_h, max_slider_h,
284
  min_slider_w, max_slider_w,
 
316
 
317
  def get_duration(input_image, prompt, height, width,
318
  negative_prompt, duration_seconds,
319
+ guidance_scale, steps, lora_scale,
320
  seed, randomize_seed,
321
  progress):
322
  # FusionX optimized duration calculation
 
330
  @spaces.GPU(duration=get_duration)
331
  def generate_video(input_image, prompt, height, width,
332
  negative_prompt=default_negative_prompt, duration_seconds=3,
333
+ guidance_scale=1, steps=8, lora_scale=1.0,
334
  seed=42, randomize_seed=False,
335
  progress=gr.Progress(track_tqdm=True)):
336
 
 
359
  num_frames=num_frames,
360
  guidance_scale=float(guidance_scale),
361
  num_inference_steps=int(steps),
362
+ generator=torch.Generator(device="cuda").manual_seed(current_seed),
363
+ cross_attention_kwargs={"scale": float(lora_scale)}
364
  ).frames[0]
365
 
366
+ # Create a unique filename for download
367
+ sanitized_prompt = sanitize_prompt_for_filename(prompt)
368
+ filename = f"{sanitized_prompt}_{current_seed}.mp4"
369
+
370
+ temp_dir = tempfile.mkdtemp()
371
+ video_path = os.path.join(temp_dir, filename)
372
+
373
  export_to_video(output_frames_list, video_path, fps=FIXED_FPS)
374
  return video_path, current_seed
375
 
 
436
  value=DEFAULT_W_SLIDER_VALUE,
437
  label=f"πŸ“ Output Width (FusionX optimized: {MOD_VALUE} multiples)"
438
  )
439
+ lora_scale_slider = gr.Slider(
440
+ minimum=0.0,
441
+ maximum=2.5,
442
+ step=0.05,
443
+ value=1.0,
444
+ label="πŸ’ͺ FusionX LoRA Strength",
445
+ info="Control the intensity of the FusionX effect. >1.0 for stronger effect, <1.0 for less."
446
+ )
447
  steps_slider = gr.Slider(
448
  minimum=1,
449
  maximum=20,
 
471
  video_output = gr.Video(
472
  label="πŸŽ₯ FusionX Enhanced Generated Video",
473
  autoplay=True,
474
+ interactive=False,
475
+ download=True
476
  )
477
 
478
  input_image_component.upload(
 
490
  ui_inputs = [
491
  input_image_component, prompt_input, height_input, width_input,
492
  negative_prompt_input, duration_seconds_input,
493
+ guidance_scale_input, steps_slider, lora_scale_slider, seed_input, randomize_seed_checkbox
494
  ]
495
  generate_button.click(fn=generate_video, inputs=ui_inputs, outputs=[video_output, seed_input])
496
 
 
 
 
 
 
 
 
 
 
 
 
 
 
497
 
498
  if __name__ == "__main__":
499
  demo.queue().launch()