thankfulcarp commited on
Commit
10f11a1
Β·
1 Parent(s): b6b20fb

Major Lora and Resolution enhancements

Browse files
Files changed (1) hide show
  1. app.py +110 -28
app.py CHANGED
@@ -8,7 +8,7 @@ import tempfile
8
  import re
9
  import os
10
  import traceback
11
-
12
  from huggingface_hub import hf_hub_download
13
  import numpy as np
14
  from PIL import Image
@@ -20,6 +20,10 @@ I2V_BASE_MODEL_ID = "Wan-AI/Wan2.1-I2V-14B-480P-Diffusers" # Used for VAE/encode
20
  I2V_FUSIONX_REPO_ID = "vrgamedevgirl84/Wan14BT2VFusioniX"
21
  I2V_FUSIONX_FILENAME = "Wan14Bi2vFusioniX.safetensors"
22
 
 
 
 
 
23
  # --- Load Pipelines ---
24
  print("πŸš€ Loading I2V pipeline from single file...")
25
  i2v_pipe = None
@@ -58,15 +62,30 @@ except Exception as e:
58
  print(f"❌ Critical Error: Failed to load I2V pipeline from single file.")
59
  traceback.print_exc()
60
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
 
62
  # --- Constants and Configuration ---
63
- MOD_VALUE = 32
64
- DEFAULT_H_SLIDER_VALUE = 640
65
- DEFAULT_W_SLIDER_VALUE = 1024
66
- NEW_FORMULA_MAX_AREA = 640.0 * 1024.0
67
 
68
- SLIDER_MIN_H, SLIDER_MAX_H = 128, 1024
69
- SLIDER_MIN_W, SLIDER_MAX_W = 128, 1024
70
  MAX_SEED = np.iinfo(np.int32).max
71
 
72
  FIXED_FPS = 16
@@ -87,6 +106,25 @@ def sanitize_prompt_for_filename(prompt: str, max_len: int = 60) -> str:
87
  sanitized = re.sub(r'[\s_-]+', '_', sanitized)
88
  return sanitized[:max_len]
89
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
90
  def _calculate_new_dimensions_wan(pil_image, mod_val, calculation_max_area,
91
  min_slider_h, max_slider_h,
92
  min_slider_w, max_slider_w,
@@ -104,18 +142,25 @@ def _calculate_new_dimensions_wan(pil_image, mod_val, calculation_max_area,
104
  return new_h, new_w
105
 
106
  def handle_image_upload_for_dims_wan(uploaded_pil_image):
 
107
  if uploaded_pil_image is None:
108
- return gr.update(value=DEFAULT_H_SLIDER_VALUE), gr.update(value=DEFAULT_W_SLIDER_VALUE)
109
  try:
 
110
  new_h, new_w = _calculate_new_dimensions_wan(
111
  uploaded_pil_image, MOD_VALUE, NEW_FORMULA_MAX_AREA,
112
  SLIDER_MIN_H, SLIDER_MAX_H, SLIDER_MIN_W, SLIDER_MAX_W,
113
  DEFAULT_H_SLIDER_VALUE, DEFAULT_W_SLIDER_VALUE
114
  )
115
- return gr.update(value=new_h), gr.update(value=new_w)
 
 
 
 
 
116
  except Exception as e:
117
  gr.Warning("Error calculating new dimensions. Resetting to default.")
118
- return gr.update(value=DEFAULT_H_SLIDER_VALUE), gr.update(value=DEFAULT_W_SLIDER_VALUE)
119
 
120
  # --- GPU Duration Estimators for @spaces.GPU ---
121
  def get_i2v_duration(steps, duration_seconds):
@@ -135,12 +180,14 @@ def get_t2v_duration(steps, duration_seconds):
135
  @spaces.GPU(duration_from_args=get_i2v_duration)
136
  def generate_i2v_video(input_image, prompt, height, width,
137
  negative_prompt, duration_seconds,
138
- guidance_scale, steps,
139
- seed, randomize_seed,
140
  progress=gr.Progress(track_tqdm=True)):
141
  """Generates a video from an initial image and a prompt."""
142
  if input_image is None:
143
  raise gr.Error("Please upload an input image for Image-to-Video generation.")
 
 
144
 
145
  target_h = max(MOD_VALUE, (int(height) // MOD_VALUE) * MOD_VALUE)
146
  target_w = max(MOD_VALUE, (int(width) // MOD_VALUE) * MOD_VALUE)
@@ -153,18 +200,39 @@ def generate_i2v_video(input_image, prompt, height, width,
153
  resized_image = input_image.resize((target_w, target_h))
154
  enhanced_prompt = f"{prompt}, cinematic quality, smooth motion, detailed animation, dynamic lighting"
155
 
156
- with torch.inference_mode():
157
- output_frames_list = i2v_pipe(
158
- image=resized_image,
159
- prompt=enhanced_prompt,
160
- negative_prompt=negative_prompt,
161
- height=target_h,
162
- width=target_w,
163
- num_frames=num_frames,
164
- guidance_scale=float(guidance_scale),
165
- num_inference_steps=int(steps),
166
- generator=torch.Generator(device="cuda").manual_seed(current_seed)
167
- ).frames[0]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
168
 
169
  sanitized_prompt = sanitize_prompt_for_filename(prompt)
170
  filename = f"i2v_{sanitized_prompt}_{current_seed}.mp4"
@@ -177,6 +245,7 @@ def generate_i2v_video(input_image, prompt, height, width,
177
  # --- Gradio UI Layout ---
178
  with gr.Blocks() as demo:
179
  with gr.Column(elem_classes=["main-container"]):
 
180
  gr.Markdown("# ⚑ FusionX Enhanced Wan 2.1 Video Suite")
181
 
182
  with gr.Tabs(elem_classes=["gr-tabs"]):
@@ -203,9 +272,12 @@ with gr.Blocks() as demo:
203
  i2v_neg_prompt = gr.Textbox(label="❌ Negative Prompt", value=default_negative_prompt, lines=4)
204
  i2v_seed = gr.Slider(label="🎲 Seed", minimum=0, maximum=MAX_SEED, step=1, value=42, interactive=True)
205
  i2v_rand_seed = gr.Checkbox(label="πŸ”€ Randomize seed", value=True, interactive=True)
 
 
206
  with gr.Row():
207
  i2v_height = gr.Slider(minimum=SLIDER_MIN_H, maximum=SLIDER_MAX_H, step=MOD_VALUE, value=DEFAULT_H_SLIDER_VALUE, label=f"πŸ“ Height ({MOD_VALUE}px steps)")
208
  i2v_width = gr.Slider(minimum=SLIDER_MIN_W, maximum=SLIDER_MAX_W, step=MOD_VALUE, value=DEFAULT_W_SLIDER_VALUE, label=f"πŸ“ Width ({MOD_VALUE}px steps)")
 
209
  i2v_steps = gr.Slider(minimum=1, maximum=20, step=1, value=8, label="πŸš€ Inference Steps", info="8-10 recommended for great results.")
210
  i2v_guidance = gr.Slider(minimum=0.0, maximum=20.0, step=0.5, value=1.0, label="🎯 Guidance Scale", visible=False)
211
 
@@ -222,18 +294,28 @@ with gr.Blocks() as demo:
222
  i2v_input_image.upload(
223
  fn=handle_image_upload_for_dims_wan,
224
  inputs=[i2v_input_image],
225
- outputs=[i2v_height, i2v_width]
226
  )
227
  i2v_input_image.clear(
228
- fn=lambda: (DEFAULT_H_SLIDER_VALUE, DEFAULT_W_SLIDER_VALUE),
229
  inputs=[],
230
- outputs=[i2v_height, i2v_width]
231
  )
232
  i2v_generate_btn.click(
233
  fn=generate_i2v_video,
234
- inputs=[i2v_input_image, i2v_prompt, i2v_height, i2v_width, i2v_neg_prompt, i2v_duration, i2v_guidance, i2v_steps, i2v_seed, i2v_rand_seed],
235
  outputs=[i2v_output_video, i2v_seed, i2v_download]
236
  )
 
 
 
 
 
 
 
 
 
 
237
 
238
 
239
 
 
8
  import re
9
  import os
10
  import traceback
11
+ from huggingface_hub import list_repo_files
12
  from huggingface_hub import hf_hub_download
13
  import numpy as np
14
  from PIL import Image
 
20
  I2V_FUSIONX_REPO_ID = "vrgamedevgirl84/Wan14BT2VFusioniX"
21
  I2V_FUSIONX_FILENAME = "Wan14Bi2vFusioniX.safetensors"
22
 
23
+ # --- I2V LoRA Configuration ---
24
+ I2V_LORA_REPO_ID = "DeepBeepMeep/Wan2.1"
25
+ I2V_LORA_SUBFOLDER = "loras_i2v"
26
+
27
  # --- Load Pipelines ---
28
  print("πŸš€ Loading I2V pipeline from single file...")
29
  i2v_pipe = None
 
62
  print(f"❌ Critical Error: Failed to load I2V pipeline from single file.")
63
  traceback.print_exc()
64
 
65
+ # --- LoRA Discovery ---
66
+ def get_available_loras(repo_id, subfolder):
67
+ """Fetches the list of available LoRA files from a Hugging Face Hub repo subfolder."""
68
+ try:
69
+ files = list_repo_files(repo_id=repo_id, repo_type='model', subfolder=subfolder)
70
+ # Filter for .safetensors and get just the filename
71
+ safetensors_files = [f.split('/')[-1] for f in files if f.endswith('.safetensors')]
72
+ print(f"βœ… Discovered {len(safetensors_files)} LoRAs in {repo_id}/{subfolder}")
73
+ return ["None"] + sorted(safetensors_files)
74
+ except Exception as e:
75
+ print(f"⚠️ Warning: Could not fetch LoRAs from {repo_id}. LoRA selection will be disabled. Error: {e}")
76
+ return ["None"]
77
+
78
+ available_i2v_loras = get_available_loras(I2V_LORA_REPO_ID, I2V_LORA_SUBFOLDER) if i2v_pipe else ["None"]
79
+
80
 
81
  # --- Constants and Configuration ---
82
+ MOD_VALUE = 8
83
+ DEFAULT_H_SLIDER_VALUE = 512
84
+ DEFAULT_W_SLIDER_VALUE = 768
85
+ NEW_FORMULA_MAX_AREA = 768.0 * 512.0
86
 
87
+ SLIDER_MIN_H, SLIDER_MAX_H = 128, 896
88
+ SLIDER_MIN_W, SLIDER_MAX_W = 128, 896
89
  MAX_SEED = np.iinfo(np.int32).max
90
 
91
  FIXED_FPS = 16
 
106
  sanitized = re.sub(r'[\s_-]+', '_', sanitized)
107
  return sanitized[:max_len]
108
 
109
+ def update_linked_dimension(driving_value, other_value, aspect_ratio, mod_val, mode):
110
+ """Updates a dimension slider based on the other, maintaining aspect ratio."""
111
+ # aspect_ratio is stored as W/H
112
+ if aspect_ratio is None or aspect_ratio == 0:
113
+ return gr.update() # Do nothing if aspect ratio is not set
114
+
115
+ if mode == 'h_drives_w':
116
+ # new_w = h * (W/H)
117
+ new_other_value = driving_value * aspect_ratio
118
+ else: # 'w_drives_h'
119
+ # new_h = w / (W/H)
120
+ new_other_value = driving_value / aspect_ratio
121
+
122
+ # Round to the nearest multiple of mod_val
123
+ new_other_value = max(mod_val, (round(new_other_value / mod_val)) * mod_val)
124
+
125
+ # Return an update only if the value has changed to prevent infinite loops
126
+ return gr.update(value=new_other_value) if int(new_other_value) != int(other_value) else gr.update()
127
+
128
  def _calculate_new_dimensions_wan(pil_image, mod_val, calculation_max_area,
129
  min_slider_h, max_slider_h,
130
  min_slider_w, max_slider_w,
 
142
  return new_h, new_w
143
 
144
  def handle_image_upload_for_dims_wan(uploaded_pil_image):
145
+ default_aspect = DEFAULT_W_SLIDER_VALUE / DEFAULT_H_SLIDER_VALUE
146
  if uploaded_pil_image is None:
147
+ return gr.update(value=DEFAULT_H_SLIDER_VALUE), gr.update(value=DEFAULT_W_SLIDER_VALUE), default_aspect
148
  try:
149
+ # This function calculates initial slider positions based on a max area
150
  new_h, new_w = _calculate_new_dimensions_wan(
151
  uploaded_pil_image, MOD_VALUE, NEW_FORMULA_MAX_AREA,
152
  SLIDER_MIN_H, SLIDER_MAX_H, SLIDER_MIN_W, SLIDER_MAX_W,
153
  DEFAULT_H_SLIDER_VALUE, DEFAULT_W_SLIDER_VALUE
154
  )
155
+
156
+ # We need the original image's true aspect ratio (W/H) for locking the sliders
157
+ orig_w, orig_h = uploaded_pil_image.size
158
+ aspect_ratio = orig_w / orig_h if orig_h > 0 else default_aspect
159
+
160
+ return gr.update(value=new_h), gr.update(value=new_w), aspect_ratio
161
  except Exception as e:
162
  gr.Warning("Error calculating new dimensions. Resetting to default.")
163
+ return gr.update(value=DEFAULT_H_SLIDER_VALUE), gr.update(value=DEFAULT_W_SLIDER_VALUE), default_aspect
164
 
165
  # --- GPU Duration Estimators for @spaces.GPU ---
166
  def get_i2v_duration(steps, duration_seconds):
 
180
  @spaces.GPU(duration_from_args=get_i2v_duration)
181
  def generate_i2v_video(input_image, prompt, height, width,
182
  negative_prompt, duration_seconds,
183
+ guidance_scale, steps, seed, randomize_seed,
184
+ lora_name, lora_weight,
185
  progress=gr.Progress(track_tqdm=True)):
186
  """Generates a video from an initial image and a prompt."""
187
  if input_image is None:
188
  raise gr.Error("Please upload an input image for Image-to-Video generation.")
189
+ if i2v_pipe is None:
190
+ raise gr.Error("Image-to-Video pipeline is not available due to a loading error.")
191
 
192
  target_h = max(MOD_VALUE, (int(height) // MOD_VALUE) * MOD_VALUE)
193
  target_w = max(MOD_VALUE, (int(width) // MOD_VALUE) * MOD_VALUE)
 
200
  resized_image = input_image.resize((target_w, target_h))
201
  enhanced_prompt = f"{prompt}, cinematic quality, smooth motion, detailed animation, dynamic lighting"
202
 
203
+ adapter_name = "i2v_lora"
204
+ try:
205
+ # Dynamically load the selected LoRA
206
+ if lora_name and lora_name != "None":
207
+ print(f"πŸš€ Loading LoRA: {lora_name} with weight {lora_weight}")
208
+ i2v_pipe.load_lora_weights(
209
+ I2V_LORA_REPO_ID,
210
+ weight_name=lora_name,
211
+ adapter_name=adapter_name,
212
+ subfolder=I2V_LORA_SUBFOLDER
213
+ )
214
+ i2v_pipe.set_adapters([adapter_name], adapter_weights=[float(lora_weight)])
215
+
216
+ with torch.inference_mode():
217
+ output_frames_list = i2v_pipe(
218
+ image=resized_image,
219
+ prompt=enhanced_prompt,
220
+ negative_prompt=negative_prompt,
221
+ height=target_h,
222
+ width=target_w,
223
+ num_frames=num_frames,
224
+ guidance_scale=float(guidance_scale),
225
+ num_inference_steps=int(steps),
226
+ generator=torch.Generator(device="cuda").manual_seed(current_seed)
227
+ ).frames[0]
228
+ finally:
229
+ # Unload the LoRA to ensure a clean state for the next run
230
+ if lora_name and lora_name != "None" and hasattr(i2v_pipe, "unload_lora_weights"):
231
+ print(f"🧹 Unloading LoRA: {lora_name}")
232
+ i2v_pipe.unload_lora_weights()
233
+ # Clear GPU cache to free up memory for the next run
234
+ if torch.cuda.is_available():
235
+ torch.cuda.empty_cache()
236
 
237
  sanitized_prompt = sanitize_prompt_for_filename(prompt)
238
  filename = f"i2v_{sanitized_prompt}_{current_seed}.mp4"
 
245
  # --- Gradio UI Layout ---
246
  with gr.Blocks() as demo:
247
  with gr.Column(elem_classes=["main-container"]):
248
+ i2v_aspect_ratio = gr.State(value=DEFAULT_W_SLIDER_VALUE / DEFAULT_H_SLIDER_VALUE)
249
  gr.Markdown("# ⚑ FusionX Enhanced Wan 2.1 Video Suite")
250
 
251
  with gr.Tabs(elem_classes=["gr-tabs"]):
 
272
  i2v_neg_prompt = gr.Textbox(label="❌ Negative Prompt", value=default_negative_prompt, lines=4)
273
  i2v_seed = gr.Slider(label="🎲 Seed", minimum=0, maximum=MAX_SEED, step=1, value=42, interactive=True)
274
  i2v_rand_seed = gr.Checkbox(label="πŸ”€ Randomize seed", value=True, interactive=True)
275
+ i2v_lora_name = gr.Dropdown(label="🎨 LoRA Style", choices=available_i2v_loras, value="None", info="Dynamically loaded from Hugging Face.", interactive=len(available_i2v_loras) > 1)
276
+ i2v_lora_weight = gr.Slider(label="πŸ’ͺ LoRA Weight", minimum=0.0, maximum=2.0, step=0.1, value=0.8, interactive=True)
277
  with gr.Row():
278
  i2v_height = gr.Slider(minimum=SLIDER_MIN_H, maximum=SLIDER_MAX_H, step=MOD_VALUE, value=DEFAULT_H_SLIDER_VALUE, label=f"πŸ“ Height ({MOD_VALUE}px steps)")
279
  i2v_width = gr.Slider(minimum=SLIDER_MIN_W, maximum=SLIDER_MAX_W, step=MOD_VALUE, value=DEFAULT_W_SLIDER_VALUE, label=f"πŸ“ Width ({MOD_VALUE}px steps)")
280
+ gr.Markdown("<p style='color: #ffcc00; font-size: 0.9em;'>⚠️ High resolutions can lead to out-of-memory errors. If generation fails, try a smaller size.</p>")
281
  i2v_steps = gr.Slider(minimum=1, maximum=20, step=1, value=8, label="πŸš€ Inference Steps", info="8-10 recommended for great results.")
282
  i2v_guidance = gr.Slider(minimum=0.0, maximum=20.0, step=0.5, value=1.0, label="🎯 Guidance Scale", visible=False)
283
 
 
294
  i2v_input_image.upload(
295
  fn=handle_image_upload_for_dims_wan,
296
  inputs=[i2v_input_image],
297
+ outputs=[i2v_height, i2v_width, i2v_aspect_ratio]
298
  )
299
  i2v_input_image.clear(
300
+ fn=lambda: (DEFAULT_H_SLIDER_VALUE, DEFAULT_W_SLIDER_VALUE, DEFAULT_W_SLIDER_VALUE / DEFAULT_H_SLIDER_VALUE),
301
  inputs=[],
302
+ outputs=[i2v_height, i2v_width, i2v_aspect_ratio]
303
  )
304
  i2v_generate_btn.click(
305
  fn=generate_i2v_video,
306
+ inputs=[i2v_input_image, i2v_prompt, i2v_height, i2v_width, i2v_neg_prompt, i2v_duration, i2v_guidance, i2v_steps, i2v_seed, i2v_rand_seed, i2v_lora_name, i2v_lora_weight],
307
  outputs=[i2v_output_video, i2v_seed, i2v_download]
308
  )
309
+ i2v_height.release(
310
+ fn=update_linked_dimension,
311
+ inputs=[i2v_height, i2v_width, i2v_aspect_ratio, gr.State(MOD_VALUE), gr.State('h_drives_w')],
312
+ outputs=[i2v_width]
313
+ )
314
+ i2v_width.release(
315
+ fn=update_linked_dimension,
316
+ inputs=[i2v_width, i2v_height, i2v_aspect_ratio, gr.State(MOD_VALUE), gr.State('w_drives_h')],
317
+ outputs=[i2v_height]
318
+ )
319
 
320
 
321