Muhammad Taqi Raza commited on
Commit
38298be
·
1 Parent(s): ca22dfe

modifying requirements.txt

Browse files
inference/cli_demo_camera_i2v_pcd.py CHANGED
@@ -26,7 +26,7 @@ from PIL import Image
26
  import numpy as np
27
  import torchvision.transforms as transforms
28
  import cv2
29
-
30
  import cv2
31
  import numpy as np
32
  import torch
@@ -368,6 +368,14 @@ def generate_video(
368
 
369
  # ++++++++++++++++++++++++++++++++++++++
370
  latents = video_generate_all # This is a latent
 
 
 
 
 
 
 
 
371
  print(f"Type of latents: {type(latents)}")
372
  print(f"Length of latents: {len(latents)}")
373
 
@@ -387,8 +395,6 @@ def generate_video(
387
  print(f" Mode: {item.mode}")
388
  else:
389
  print(f" Value: {item}")
390
- # Only works if all elements are tensors of the same shape
391
- latents = torch.stack(latents)
392
 
393
  scale_status = True
394
  rife_status = True
 
26
  import numpy as np
27
  import torchvision.transforms as transforms
28
  import cv2
29
+ import torchvision.transforms as T
30
  import cv2
31
  import numpy as np
32
  import torch
 
368
 
369
  # ++++++++++++++++++++++++++++++++++++++
370
  latents = video_generate_all # This is a latent
371
+ # Fix the nested list structure
372
+ if isinstance(latents, list) and len(latents) == 1 and isinstance(latents[0], list):
373
+ latents = latents[0] # ✅ Unwrap the inner list of PIL images
374
+
375
+ transform = T.ToTensor() # Converts PIL image to torch.FloatTensor in [0,1]
376
+
377
+ latents = torch.stack([transform(img) for img in latents]) # Shape: [B, C, H, W]
378
+
379
  print(f"Type of latents: {type(latents)}")
380
  print(f"Length of latents: {len(latents)}")
381
 
 
395
  print(f" Mode: {item.mode}")
396
  else:
397
  print(f" Value: {item}")
 
 
398
 
399
  scale_status = True
400
  rife_status = True