linoyts HF Staff commited on
Commit
12606cb
Β·
verified Β·
1 Parent(s): aee98ed

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +61 -35
app.py CHANGED
@@ -123,7 +123,6 @@ def process_video_for_canny(video):
123
 
124
  return canny_video
125
 
126
-
127
  @spaces.GPU()
128
  def process_video_for_pose(video):
129
  """
@@ -169,31 +168,55 @@ def process_video_for_pose(video):
169
 
170
  return pose_video
171
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
172
  def process_video_for_control(reference_video, control_type):
 
173
  video = load_video(reference_video)
174
- """Process video based on the selected control type"""
175
  if control_type == "canny":
 
176
  processed_video = process_video_for_canny(video)
177
  elif control_type == "depth":
178
  processed_video = process_video_for_depth(video)
179
  elif control_type == "pose":
180
  processed_video = process_video_for_pose(video)
181
  else:
182
- processed_video = reference_video
183
- # fps = 24
184
- # with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmp2_file:
185
- # output2_path = tmp2_file.name
186
- # export_to_video(processed_video, output2_path, fps=fps)
187
- # return output2_path
188
  return processed_video
189
 
190
-
191
  @spaces.GPU(duration=160)
192
  def generate_video(
193
  reference_video,
 
194
  prompt,
195
  control_type,
196
- # current_lora_state,
197
  duration=3.0,
198
  negative_prompt="worst quality, inconsistent motion, blurry, jittery, distorted",
199
  height=768,
@@ -225,19 +248,18 @@ def generate_video(
225
  temporal_compression = pipeline.vae_temporal_compression_ratio
226
  num_frames = ((num_frames - 1) // temporal_compression) * temporal_compression + 1
227
 
228
-
229
-
230
- # Load the appropriate control LoRA and update state
231
- # updated_lora_state = load_control_lora(control_type, current_lora_state)
232
-
233
- # # Loads video into a list of pil images
234
- # video = load_video(reference_video)
235
- # progress(0.1, desc="Processing video for control...")
236
 
237
- # Process video based on control type
238
- processed_video = process_video_for_control(reference_video, control_type)
 
 
 
 
 
239
 
240
- processed_video = read_video(processed_video) # turns to tensor
 
241
 
242
  progress(0.2, desc="Preparing generation parameters...")
243
 
@@ -262,9 +284,7 @@ def generate_video(
262
  num_inference_steps=num_inference_steps,
263
  decode_timestep=0.05,
264
  decode_noise_scale=0.025,
265
- # image_cond_noise_scale=image_cond_noise_scale,
266
  guidance_scale=guidance_scale,
267
- # guidance_rescale=guidance_rescale,
268
  generator=torch.Generator().manual_seed(seed),
269
  output_type="latent",
270
  ).frames
@@ -294,7 +314,6 @@ def generate_video(
294
  guidance_scale=guidance_scale,
295
  decode_noise_scale = 0.025,
296
  image_cond_noise_scale=0.025,
297
- #guidance_rescale=guidance_rescale,
298
  generator=torch.Generator(device="cuda").manual_seed(seed),
299
  output_type="pil",
300
  ).frames[0]
@@ -325,8 +344,9 @@ with gr.Blocks() as demo:
325
  """
326
  )
327
 
328
- # State variable for tracking current LoRA
329
  current_lora_state = gr.State(value=None)
 
330
 
331
  with gr.Row():
332
  with gr.Column(scale=1):
@@ -402,8 +422,6 @@ with gr.Blocks() as demo:
402
  value=1.0
403
  )
404
 
405
-
406
-
407
  with gr.Row():
408
  randomize_seed = gr.Checkbox(
409
  label="Randomize Seed",
@@ -425,22 +443,21 @@ with gr.Blocks() as demo:
425
  height=400
426
  )
427
  control_video = gr.Video(
428
- label="Control Video",
429
  height=400,
430
- visible=False
431
  )
432
 
433
  gr.Examples(
434
  examples=[
435
- ["video_assets/vid_1.mp4", "A sleek cybernetic wolf sprinting through a neon-lit futuristic cityscape, its metallic form gleaming with electric blue circuits. The wolf's powerful stride carries it down rain-slicked streets between towering skyscrapers, while holographic advertisements cast colorful reflections on its chrome surface. Sparks of digital energy trail behind the creature as it moves with fluid mechanical precision through the urban maze, creating streaks of light in the misty night air.", "canny", 3, "worst quality, inconsistent motion, blurry, jittery, distorted", 768, 1152, 7, 1, 0, True],
436
- ["video_assets/vid_2.mp4", "A translucent ghost floating in a moonlit cemetery, raising a glowing spectral lantern that casts eerie light through the darkness. The ethereal figure's wispy form shimmers as it lifts the phantom light above its head, illuminating weathered tombstones and gnarled trees. Pale mist swirls around the ghost as the lantern pulses with otherworldly energy, creating haunting shadows that dance across the graveyard in the dead of night.", "canny", 2.5, "worst quality, inconsistent motion, blurry, jittery, distorted", 768, 1152, 7, 1, 0, True],
437
- ["video_assets/vid_3.mp4", "A sleek android assassin poised in a combat stance atop a futuristic skyscraper, arms positioned for perfect balance. The chrome-plated figure gleams under neon city lights as holographic data streams flow around its metallic form. Rain droplets bead on its polished surface while the sprawling cyberpunk metropolis stretches endlessly below. Electric circuits pulse beneath the android's transparent panels as it maintains its precise, calculated pose against the backdrop of flying vehicles and towering digital billboards.", "canny", 3, "worst quality, inconsistent motion, blurry, jittery, distorted", 768, 1152, 7, 1, 0, True],
438
- ["video_assets/vid_4.mp4", "Luminescent video game characters with glowing outlines and neon-bright details wandering through a digital landscape. Their bodies emit soft, colorful light that pulses gently as they move, creating trails of radiance behind them. The characters have a futuristic, stylized appearance with smooth surfaces that reflect their inner glow. They navigate naturally through their environment, their movements fluid and purposeful, while their bioluminescent features cast dynamic shadows and illuminate the surrounding area. The scene has a cyberpunk aesthetic with the characters' radiant presence serving as the primary light source in an otherwise darkened digital world.", "canny", 2.5, "worst quality, inconsistent motion, blurry, jittery, distorted", 768, 1152, 7, 1, 0, True],
439
  ],
440
  inputs=[reference_video,
441
  prompt,
442
  control_type,
443
- # current_lora_state,
444
  duration,
445
  negative_prompt,
446
  height,
@@ -454,13 +471,22 @@ with gr.Blocks() as demo:
454
  )
455
 
456
  # Event handlers
 
 
 
 
 
 
 
 
 
457
  generate_btn.click(
458
  fn=generate_video,
459
  inputs=[
460
  reference_video,
 
461
  prompt,
462
  control_type,
463
- # current_lora_state,
464
  duration,
465
  negative_prompt,
466
  height,
 
123
 
124
  return canny_video
125
 
 
126
  @spaces.GPU()
127
  def process_video_for_pose(video):
128
  """
 
168
 
169
  return pose_video
170
 
171
+ @spaces.GPU()
172
+ def process_input_video(reference_video):
173
+ """
174
+ Process the input video for canny edges and return both processed video and preview.
175
+ """
176
+ if reference_video is None:
177
+ return None
178
+
179
+ try:
180
+ # Load video into a list of PIL images
181
+ video = load_video(reference_video)
182
+
183
+ # Process video for canny edges
184
+ processed_video = process_video_for_canny(video)
185
+
186
+ # Create a preview video file for display
187
+ fps = 24
188
+ with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmp_file:
189
+ preview_path = tmp_file.name
190
+ export_to_video(processed_video, preview_path, fps=fps)
191
+
192
+ return preview_path
193
+
194
+ except Exception as e:
195
+ print(f"Error processing input video: {e}")
196
+ return None
197
+
198
  def process_video_for_control(reference_video, control_type):
199
+ """Process video based on the selected control type - now only used for non-canny types"""
200
  video = load_video(reference_video)
201
+
202
  if control_type == "canny":
203
+ # This should not be called for canny since it's pre-processed
204
  processed_video = process_video_for_canny(video)
205
  elif control_type == "depth":
206
  processed_video = process_video_for_depth(video)
207
  elif control_type == "pose":
208
  processed_video = process_video_for_pose(video)
209
  else:
210
+ processed_video = video
211
+
 
 
 
 
212
  return processed_video
213
 
 
214
  @spaces.GPU(duration=160)
215
  def generate_video(
216
  reference_video,
217
+ control_video, # New parameter for pre-processed video
218
  prompt,
219
  control_type,
 
220
  duration=3.0,
221
  negative_prompt="worst quality, inconsistent motion, blurry, jittery, distorted",
222
  height=768,
 
248
  temporal_compression = pipeline.vae_temporal_compression_ratio
249
  num_frames = ((num_frames - 1) // temporal_compression) * temporal_compression + 1
250
 
251
+ progress(0.1, desc="Preparing processed video...")
 
 
 
 
 
 
 
252
 
253
+ # Use pre-processed video frames if available (for canny), otherwise process on-demand
254
+ if control_video is not None:
255
+ # Use the pre-processed canny frames
256
+ processed_video = load_video(control_video)
257
+ else:
258
+ # Fallback to processing on-demand for other control types
259
+ processed_video = process_video_for_control(reference_video, control_type)
260
 
261
+ # Convert to tensor
262
+ processed_video = read_video(processed_video)
263
 
264
  progress(0.2, desc="Preparing generation parameters...")
265
 
 
284
  num_inference_steps=num_inference_steps,
285
  decode_timestep=0.05,
286
  decode_noise_scale=0.025,
 
287
  guidance_scale=guidance_scale,
 
288
  generator=torch.Generator().manual_seed(seed),
289
  output_type="latent",
290
  ).frames
 
314
  guidance_scale=guidance_scale,
315
  decode_noise_scale = 0.025,
316
  image_cond_noise_scale=0.025,
 
317
  generator=torch.Generator(device="cuda").manual_seed(seed),
318
  output_type="pil",
319
  ).frames[0]
 
344
  """
345
  )
346
 
347
+ # State variables
348
  current_lora_state = gr.State(value=None)
349
+ processed_video_state = gr.State(value=None) # Store processed video frames
350
 
351
  with gr.Row():
352
  with gr.Column(scale=1):
 
422
  value=1.0
423
  )
424
 
 
 
425
  with gr.Row():
426
  randomize_seed = gr.Checkbox(
427
  label="Randomize Seed",
 
443
  height=400
444
  )
445
  control_video = gr.Video(
446
+ label="Processed Control Video (Canny Edges)",
447
  height=400,
448
+ visible=True
449
  )
450
 
451
  gr.Examples(
452
  examples=[
453
+ ["video_assets/vid_1.mp4", None, "A sleek cybernetic wolf sprinting through a neon-lit futuristic cityscape, its metallic form gleaming with electric blue circuits. The wolf's powerful stride carries it down rain-slicked streets between towering skyscrapers, while holographic advertisements cast colorful reflections on its chrome surface. Sparks of digital energy trail behind the creature as it moves with fluid mechanical precision through the urban maze, creating streaks of light in the misty night air.", "canny", 3, "worst quality, inconsistent motion, blurry, jittery, distorted", 768, 1152, 7, 1, 0, True],
454
+ ["video_assets/vid_2.mp4", None, "A translucent ghost floating in a moonlit cemetery, raising a glowing spectral lantern that casts eerie light through the darkness. The ethereal figure's wispy form shimmers as it lifts the phantom light above its head, illuminating weathered tombstones and gnarled trees. Pale mist swirls around the ghost as the lantern pulses with otherworldly energy, creating haunting shadows that dance across the graveyard in the dead of night.", "canny", 2.5, "worst quality, inconsistent motion, blurry, jittery, distorted", 768, 1152, 7, 1, 0, True],
455
+ ["video_assets/vid_3.mp4", None,"A sleek android assassin poised in a combat stance atop a futuristic skyscraper, arms positioned for perfect balance. The chrome-plated figure gleams under neon city lights as holographic data streams flow around its metallic form. Rain droplets bead on its polished surface while the sprawling cyberpunk metropolis stretches endlessly below. Electric circuits pulse beneath the android's transparent panels as it maintains its precise, calculated pose against the backdrop of flying vehicles and towering digital billboards.", "canny", 3, "worst quality, inconsistent motion, blurry, jittery, distorted", 768, 1152, 7, 1, 0, True],
456
+ ["video_assets/vid_4.mp4", None, "Luminescent video game characters with glowing outlines and neon-bright details wandering through a digital landscape. Their bodies emit soft, colorful light that pulses gently as they move, creating trails of radiance behind them. The characters have a futuristic, stylized appearance with smooth surfaces that reflect their inner glow. They navigate naturally through their environment, their movements fluid and purposeful, while their bioluminescent features cast dynamic shadows and illuminate the surrounding area. The scene has a cyberpunk aesthetic with the characters' radiant presence serving as the primary light source in an otherwise darkened digital world.", "canny", 2.5, "worst quality, inconsistent motion, blurry, jittery, distorted", 768, 1152, 7, 1, 0, True],
457
  ],
458
  inputs=[reference_video,
459
  prompt,
460
  control_type,
 
461
  duration,
462
  negative_prompt,
463
  height,
 
471
  )
472
 
473
  # Event handlers
474
+
475
+ # Auto-process video when uploaded
476
+ reference_video.upload(
477
+ fn=process_input_video,
478
+ inputs=[reference_video],
479
+ outputs=[control_video],
480
+ show_progress=True
481
+ )
482
+
483
  generate_btn.click(
484
  fn=generate_video,
485
  inputs=[
486
  reference_video,
487
+ control_video, # Use pre-processed video
488
  prompt,
489
  control_type,
 
490
  duration,
491
  negative_prompt,
492
  height,