Spaces:

preSalesAIAutomation
/

LTXpipeline

Running on Zero

preSalesAIAutomation commited on Jul 21

Commit

cdbcd1b

verified ·

1 Parent(s): dce339d

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -49,7 +49,18 @@ def generate_video(prompt, image_url):
     # Dimensions
     base_width, base_height = 512, 512
     downscale = 2 / 3
-    w_d, h_d = round_to_nearest_resolution(int(base_width * downscale), int(base_height * downscale), pipe.vae_spatial_compression_ratio)
     # Step 1: Generate latents
     latents = pipe(
@@ -75,13 +86,13 @@ def generate_video(prompt, image_url):
     torch.cuda.empty_cache()
     gc.collect()
-    # Step 3: Decode to frames
     frames = pipe(
         prompt=prompt,
         image=image,
         latents=upscaled,
-        width=base_width,
-        height=base_height,
         num_frames=60,
         num_inference_steps=10,
         output_type="pil",

     # Dimensions
     base_width, base_height = 512, 512
     downscale = 2 / 3
+    # Use correct rounding for VAE compatibility
+    w_d, h_d = round_to_nearest_resolution(
+        int(base_width * downscale),
+        int(base_height * downscale),
+        ratio=pipe.vae_spatial_compression_ratio
+    )
+    # Upscaled dimensions must also be VAE-aligned
+    w_up, h_up = round_to_nearest_resolution(
+        base_width,
+        base_height,
+        ratio=pipe.vae_spatial_compression_ratio
+    )
     # Step 1: Generate latents
     latents = pipe(
     torch.cuda.empty_cache()
     gc.collect()
+    # Step 3: Decode to frames (must match rounded base)
     frames = pipe(
         prompt=prompt,
         image=image,
         latents=upscaled,
+        width=w_up,
+        height=h_up,
         num_frames=60,
         num_inference_steps=10,
         output_type="pil",