text-to-3d

Runtime error

App Files Files Community

jbilcke-hf HF Staff commited on Jan 20

Commit

03dc078

verified ·

1 Parent(s): c882a68

Update gradio_app.py

Browse files

Files changed (1) hide show

gradio_app.py +19 -19

gradio_app.py CHANGED Viewed

@@ -55,38 +55,38 @@ def create_rgba_image(rgb_image: Image.Image, mask: np.ndarray = None) -> Image.
         print("[debug] alpha size:", alpha.size)
         rgba_image.putalpha(alpha)
     return rgba_image
 def create_batch(input_image: Image.Image) -> dict[str, Any]:
     """Prepare image batch for model input."""
-    # Ensure input is RGBA
-    if input_image.mode != 'RGBA':
-        input_image = input_image.convert('RGBA')
-    # Resize and convert to numpy array
-    resized_image = input_image.resize((COND_WIDTH, COND_HEIGHT))
-    img_array = np.array(resized_image).astype(np.float32) / 255.0
     print("[debug] img_array shape:", img_array.shape)
-    # Split into RGB and alpha
     rgb = torch.from_numpy(img_array[..., :3]).float()
-    alpha = torch.from_numpy(img_array[..., 3:4]).float()
     print("[debug] rgb tensor shape:", rgb.shape)
-    print("[debug] alpha tensor shape:", alpha.shape)
-    # Create background blend using torch.lerp()
     bg_tensor = torch.tensor(BACKGROUND_COLOR)[None, None, :]
     print("[debug] bg_tensor shape:", bg_tensor.shape)
-    rgb_cond = torch.lerp(bg_tensor, rgb, alpha)
     print("[debug] rgb_cond shape:", rgb_cond.shape)
     batch = {
-        "rgb_cond": rgb_cond.unsqueeze(0),
-        "mask_cond": alpha.unsqueeze(0),
         "c2w_cond": c2w_cond.unsqueeze(0),
-        "intrinsic_cond": intrinsic.unsqueeze(0),
         "intrinsic_normed_cond": intrinsic_normed_cond.unsqueeze(0),
     }
@@ -130,7 +130,7 @@ def generate_and_process_3d(prompt: str, seed: int = 42, width: int = 1024, heig
         rgba_image = create_rgba_image(rgb_image, mask)
         # Auto crop with foreground
-        print(f"[debug] auto-cromming the rgba_image using spar3d_utils.foreground_crop(...). newsize=(COND_WIDTH, COND_HEIGHT) = ({COND_WIDTH}, {COND_HEIGHT})")
         processed_image = spar3d_utils.foreground_crop(
             rgba_image,
             crop_ratio=1.3,

         print("[debug] alpha size:", alpha.size)
         rgba_image.putalpha(alpha)
     return rgba_image
 def create_batch(input_image: Image.Image) -> dict[str, Any]:
     """Prepare image batch for model input."""
+    # Convert input image to numpy array and normalize
+    img_array = np.array(input_image.resize((COND_WIDTH, COND_HEIGHT))).astype(np.float32) / 255.0
     print("[debug] img_array shape:", img_array.shape)
+    # Extract RGB and alpha channels
     rgb = torch.from_numpy(img_array[..., :3]).float()
+    mask = torch.from_numpy(img_array[..., 3:4]).float()
     print("[debug] rgb tensor shape:", rgb.shape)
+    print("[debug] mask tensor shape:", mask.shape)
+    # Create background blend
     bg_tensor = torch.tensor(BACKGROUND_COLOR)[None, None, :]
     print("[debug] bg_tensor shape:", bg_tensor.shape)
+    # Blend RGB with background using mask
+    rgb_cond = torch.lerp(bg_tensor, rgb, mask)
     print("[debug] rgb_cond shape:", rgb_cond.shape)
+    # Note: We need to permute the tensors to match the expected shape
+    rgb_cond = rgb_cond.permute(2, 0, 1)  # Change from [H, W, C] to [C, H, W]
+    mask = mask.permute(2, 0, 1)  # Change from [H, W, 1] to [1, H, W]
+    print("[debug] rgb_cond after permute shape:", rgb_cond.shape)
+    print("[debug] mask after permute shape:", mask.shape)
     batch = {
+        "rgb_cond": rgb_cond.unsqueeze(0),  # Add batch dimension
+        "mask_cond": mask.unsqueeze(0),
         "c2w_cond": c2w_cond.unsqueeze(0),
+        "intrinsic_cond": intrinsic.unsqueeze(0),
         "intrinsic_normed_cond": intrinsic_normed_cond.unsqueeze(0),
     }
         rgba_image = create_rgba_image(rgb_image, mask)
         # Auto crop with foreground
+        print(f"[debug] auto-cropping the rgba_image using spar3d_utils.foreground_crop(...). newsize=(COND_WIDTH, COND_HEIGHT) = ({COND_WIDTH}, {COND_HEIGHT})")
         processed_image = spar3d_utils.foreground_crop(
             rgba_image,
             crop_ratio=1.3,