preSalesAIAutomation commited on
Commit
cdbcd1b
·
verified ·
1 Parent(s): dce339d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -4
app.py CHANGED
@@ -49,7 +49,18 @@ def generate_video(prompt, image_url):
49
  # Dimensions
50
  base_width, base_height = 512, 512
51
  downscale = 2 / 3
52
- w_d, h_d = round_to_nearest_resolution(int(base_width * downscale), int(base_height * downscale), pipe.vae_spatial_compression_ratio)
 
 
 
 
 
 
 
 
 
 
 
53
 
54
  # Step 1: Generate latents
55
  latents = pipe(
@@ -75,13 +86,13 @@ def generate_video(prompt, image_url):
75
  torch.cuda.empty_cache()
76
  gc.collect()
77
 
78
- # Step 3: Decode to frames
79
  frames = pipe(
80
  prompt=prompt,
81
  image=image,
82
  latents=upscaled,
83
- width=base_width,
84
- height=base_height,
85
  num_frames=60,
86
  num_inference_steps=10,
87
  output_type="pil",
 
49
  # Dimensions
50
  base_width, base_height = 512, 512
51
  downscale = 2 / 3
52
+ # Use correct rounding for VAE compatibility
53
+ w_d, h_d = round_to_nearest_resolution(
54
+ int(base_width * downscale),
55
+ int(base_height * downscale),
56
+ ratio=pipe.vae_spatial_compression_ratio
57
+ )
58
+ # Upscaled dimensions must also be VAE-aligned
59
+ w_up, h_up = round_to_nearest_resolution(
60
+ base_width,
61
+ base_height,
62
+ ratio=pipe.vae_spatial_compression_ratio
63
+ )
64
 
65
  # Step 1: Generate latents
66
  latents = pipe(
 
86
  torch.cuda.empty_cache()
87
  gc.collect()
88
 
89
+ # Step 3: Decode to frames (must match rounded base)
90
  frames = pipe(
91
  prompt=prompt,
92
  image=image,
93
  latents=upscaled,
94
+ width=w_up,
95
+ height=h_up,
96
  num_frames=60,
97
  num_inference_steps=10,
98
  output_type="pil",