Spaces:

roll-ai
/

EPiC

Paused

Muhammad Taqi Raza commited on 11 days ago

Commit

87f323a

1 Parent(s): 23f29c8

modifying requirements.txt

Files changed (1) hide show

inference/cli_demo_camera_i2v_pcd.py CHANGED Viewed

@@ -368,13 +368,13 @@ def generate_video(
         # ++++++++++++++++++++++++++++++++++++++
         latents = video_generate_all # This is a latent
-        # Fix the nested list structure
-        # if isinstance(latents, list) and len(latents) == 1 and isinstance(latents[0], list):
-        #     latents = latents[0]  # ✅ Unwrap the inner list of PIL images
-        transform = T.ToTensor()  # Converts PIL image to torch.FloatTensor in [0,1]
-        latents = torch.stack([transform(img) for img in latents])  # Shape: [B, C, H, W]
         print(f"Type of latents: {type(latents)}")
         print(f"Length of latents: {len(latents)}")
@@ -406,8 +406,11 @@ def generate_video(
         # Convert latents back to PIL images after processing
         latents = latents.clamp(0, 1)  # Clamp values to [0,1]
-        latents = [T.ToPILImage()(frame.cpu()) for frame in latents]
         video_generate_all = latents
         # ++++++++++++++++++++++++++++++++++++++

         # ++++++++++++++++++++++++++++++++++++++
         latents = video_generate_all # This is a latent
+        transform = T.ToTensor()
+        latents = [
+            torch.stack([transform(img) for img in sublist])  # [num_frames, C, H, W]
+            for sublist in latents
+        ]  # List of [T, C, H, W] tensors
         print(f"Type of latents: {type(latents)}")
         print(f"Length of latents: {len(latents)}")
         # Convert latents back to PIL images after processing
         latents = latents.clamp(0, 1)  # Clamp values to [0,1]
+        to_pil = T.ToPILImage()
+        latents = [
+            [to_pil(frame.cpu()) for frame in video]  # video: Tensor[T, C, H, W]
+            for video in latents
+        ]
         video_generate_all = latents
         # ++++++++++++++++++++++++++++++++++++++