Spaces:

mtwohey2
/

Depth_Stitcher

Sleeping

App Files Files Community

mtwohey2 commited on Mar 3

Commit

767c99b

verified ·

1 Parent(s): b11843e

Update app.py

Browse files

Files changed (1) hide show

app.py +58 -48

app.py CHANGED Viewed

@@ -36,65 +36,76 @@ def stitch_rgbd_videos(
         # For stitching: read the original video in full resolution (without downscaling).
         full_frames, target_fps = read_video_frames(processed_video, max_len, target_fps, max_res=-1)
         depths, _ = read_video_frames(depth_vis_video, max_len, target_fps, max_res=-1)
         print(f"Depth frame shape: {depths[0].shape}, dtype: {depths[0].dtype}, min: {depths[0].min()}, max: {depths[0].max()}")
         # For each frame, create a visual depth image from the inferenced depths.
-        d_min, d_max = depths.min(), depths.max()
         stitched_frames = []
         for i in range(min(len(full_frames), len(depths))):
             rgb_full = full_frames[i]  # Full-resolution RGB frame.
-            depth_frame = depths[i].astype(np.uint8)  # Reduce memory footprint
-            print(f"Depth range: min={d_min}, max={d_max}, diff={d_max-d_min}")
-            # Add a small buffer to ensure range is never zero
-            d_min_adj = max(0, d_min - 10)
-            d_max_adj = min(255, d_max + 10)
-            # Normalize the depth frame to the range [0, 255].
-            depth_norm = ((depth_frame - d_min_adj) / (d_max_adj - d_min_adj) * 255).astype(np.uint8)
-            depth_norm = np.clip(depth_norm, 0, 255)
-            # Ensure depth_norm is 2D (remove singleton dimensions if present)
-            if depth_norm.ndim == 3:
-                depth_norm = np.squeeze(depth_norm)
-            # Generate depth visualization:
-            if grayscale:
-                if convert_from_color:
-                    # First, generate a color depth image using the inferno colormap,
-                    # then convert that color image to grayscale.
-                    cmap = matplotlib.colormaps.get_cmap("inferno")
-                    depth_color = (cmap(depth_norm / 255.0)[..., :3] * 255).astype(np.uint8)
-                    # Check if depth_color actually has 3 or 4 channels.
-                    if depth_color.ndim == 3 and depth_color.shape[2] in [3, 4]:
                         depth_gray = cv2.cvtColor(depth_color, cv2.COLOR_RGB2GRAY)
                     else:
-                        # If it's not 3 or 4 channels, assume it's already grayscale.
-                        depth_gray = depth_color
-                    depth_vis = np.stack([depth_gray] * 3, axis=-1)
                 else:
-                    # Directly generate a grayscale image from the normalized depth values.
-                    depth_vis = np.stack([depth_norm] * 3, axis=-1)
-            else:
-                # Generate a color depth image using the inferno colormap.
-                cmap = matplotlib.colormaps.get_cmap("inferno")
-                depth_vis = (cmap(depth_norm / 255.0)[..., :3] * 255).astype(np.uint8)
-            # Ensure depth_vis is valid and contiguous
-            #if depth_vis is None or depth_vis.size == 0:
-            #    raise ValueError("depth_vis is empty or not properly computed.")
-            #else:
-            #    depth_vis = np.ascontiguousarray(depth_vis)
-            # TODO FIX: Apply Gaussian blur if requested.
-            #if blur > 0:
-            #    kernel_size = int(blur * 20) * 2 + 1  # Ensures an odd kernel size.
-            #    depth_vis = cv2.GaussianBlur(depth_vis, (kernel_size, kernel_size), 0)
             # Resize the depth visualization to match the full-resolution RGB frame.
             H_full, W_full = rgb_full.shape[:2]
@@ -114,7 +125,6 @@ def stitch_rgbd_videos(
             del rgb_full, depth_vis_resized, stitched
             gc.collect()  # Force Python to free unused memory
         stitched_frames = np.array(stitched_frames)
         # Use only the first 20 characters of the base name for the output filename and append '_RGBD.mp4'

         # For stitching: read the original video in full resolution (without downscaling).
         full_frames, target_fps = read_video_frames(processed_video, max_len, target_fps, max_res=-1)
         depths, _ = read_video_frames(depth_vis_video, max_len, target_fps, max_res=-1)
         print(f"Depth frame shape: {depths[0].shape}, dtype: {depths[0].dtype}, min: {depths[0].min()}, max: {depths[0].max()}")
         # For each frame, create a visual depth image from the inferenced depths.
+        d_min, d_max = np.min(depths), np.max(depths)
+        print(f"Depth range: min={d_min}, max={d_max}, diff={d_max-d_min}")
         stitched_frames = []
         for i in range(min(len(full_frames), len(depths))):
             rgb_full = full_frames[i]  # Full-resolution RGB frame.
+            depth_frame = depths[i]  # Already in uint8 format
+            # Handle the case where depth is already in a 3-channel format
+            if len(depth_frame.shape) == 3 and depth_frame.shape[2] == 3:
+                # The depth is already a color or grayscale image with 3 channels
+                if grayscale:
+                    if convert_from_color:
+                        # Convert to grayscale if it's a color image
+                        depth_gray = cv2.cvtColor(depth_frame, cv2.COLOR_RGB2GRAY)
+                        depth_vis = np.stack([depth_gray] * 3, axis=-1)
+                    else:
+                        # Assume it's already the right format
+                        depth_vis = depth_frame
+                else:
+                    if depth_frame.max() > 0:  # Ensure we have valid depth data
+                        # Use the inferno colormap if requested
+                        cmap = matplotlib.colormaps.get_cmap("inferno")
+                        # Convert to single channel first
+                        depth_gray = cv2.cvtColor(depth_frame, cv2.COLOR_RGB2GRAY)
+                        # Normalize to 0-1 range for colormap
+                        depth_norm = depth_gray / 255.0
+                        # Apply colormap
+                        depth_vis = (cmap(depth_norm)[..., :3] * 255).astype(np.uint8)
+                    else:
+                        # If zero depth, just use the original
+                        depth_vis = depth_frame
+            else:
+                # Process as in original code (single channel depth)
+                if d_max == d_min:
+                    d_max = d_min + 1
+                # Normalize the depth frame to the range [0, 255]
+                depth_norm = np.clip((depth_frame - d_min) / (d_max - d_min) * 255, 0, 255).astype(np.uint8)
+                # Ensure depth_norm is 2D (remove singleton dimensions if present)
+                if depth_norm.ndim == 3:
+                    depth_norm = np.squeeze(depth_norm)
+                # Generate depth visualization:
+                if grayscale:
+                    if convert_from_color:
+                        # First, generate a color depth image using the inferno colormap,
+                        # then convert that color image to grayscale.
+                        cmap = matplotlib.colormaps.get_cmap("inferno")
+                        depth_color = (cmap(depth_norm / 255.0)[..., :3] * 255).astype(np.uint8)
                         depth_gray = cv2.cvtColor(depth_color, cv2.COLOR_RGB2GRAY)
+                        depth_vis = np.stack([depth_gray] * 3, axis=-1)
                     else:
+                        # Directly generate a grayscale image from the normalized depth values.
+                        depth_vis = np.stack([depth_norm] * 3, axis=-1)
                 else:
+                    # Generate a color depth image using the inferno colormap.
+                    cmap = matplotlib.colormaps.get_cmap("inferno")
+                    depth_vis = (cmap(depth_norm / 255.0)[..., :3] * 255).astype(np.uint8)
+            # Apply Gaussian blur if requested
+            if blur > 0:
+                kernel_size = max(1, int(blur * 20) * 2 + 1)  # Ensures an odd kernel size.
+                kernel_size = min(kernel_size, 31)  # Cap kernel size at 31 (OpenCV limitation)
+                depth_vis = cv2.GaussianBlur(depth_vis, (kernel_size, kernel_size), 0)
             # Resize the depth visualization to match the full-resolution RGB frame.
             H_full, W_full = rgb_full.shape[:2]
             del rgb_full, depth_vis_resized, stitched
             gc.collect()  # Force Python to free unused memory
         stitched_frames = np.array(stitched_frames)
         # Use only the first 20 characters of the base name for the output filename and append '_RGBD.mp4'