Spaces:
				
			
			
	
			
			
		Sleeping
		
	
	
	
			
			
	
	
	
	
		
		
		Sleeping
		
	Update app.py
Browse files
    	
        app.py
    CHANGED
    
    | @@ -36,65 +36,76 @@ def stitch_rgbd_videos( | |
| 36 | 
             
                    # For stitching: read the original video in full resolution (without downscaling).
         | 
| 37 | 
             
                    full_frames, target_fps = read_video_frames(processed_video, max_len, target_fps, max_res=-1)
         | 
| 38 | 
             
                    depths, _ = read_video_frames(depth_vis_video, max_len, target_fps, max_res=-1)
         | 
| 39 | 
            -
             | 
| 40 | 
             
                    print(f"Depth frame shape: {depths[0].shape}, dtype: {depths[0].dtype}, min: {depths[0].min()}, max: {depths[0].max()}")
         | 
| 41 |  | 
| 42 | 
             
                    # For each frame, create a visual depth image from the inferenced depths.
         | 
| 43 | 
            -
                    d_min, d_max =  | 
|  | |
|  | |
| 44 | 
             
                    stitched_frames = []
         | 
| 45 | 
             
                    for i in range(min(len(full_frames), len(depths))):
         | 
| 46 | 
             
                        rgb_full = full_frames[i]  # Full-resolution RGB frame.
         | 
| 47 | 
            -
                        depth_frame = depths[i] | 
| 48 | 
            -
             | 
| 49 | 
            -
                        print(f"Depth range: min={d_min}, max={d_max}, diff={d_max-d_min}")
         | 
| 50 | 
            -
                        
         | 
| 51 | 
            -
                        # Add a small buffer to ensure range is never zero
         | 
| 52 | 
            -
                        d_min_adj = max(0, d_min - 10)
         | 
| 53 | 
            -
                        d_max_adj = min(255, d_max + 10)
         | 
| 54 |  | 
| 55 | 
            -
                        #  | 
| 56 | 
            -
                         | 
| 57 | 
            -
             | 
| 58 | 
            -
             | 
| 59 | 
            -
             | 
| 60 | 
            -
             | 
| 61 | 
            -
             | 
| 62 | 
            -
             | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 63 |  | 
| 64 | 
            -
             | 
| 65 | 
            -
             | 
| 66 | 
            -
                             | 
| 67 | 
            -
             | 
| 68 | 
            -
             | 
| 69 | 
            -
                                 | 
| 70 | 
            -
                                 | 
| 71 | 
            -
             | 
| 72 | 
            -
             | 
| 73 | 
            -
                                if  | 
|  | |
|  | |
|  | |
|  | |
| 74 | 
             
                                    depth_gray = cv2.cvtColor(depth_color, cv2.COLOR_RGB2GRAY)
         | 
|  | |
| 75 | 
             
                                else:
         | 
| 76 | 
            -
                                    #  | 
| 77 | 
            -
                                     | 
| 78 | 
            -
                                
         | 
| 79 | 
            -
                                depth_vis = np.stack([depth_gray] * 3, axis=-1)
         | 
| 80 | 
             
                            else:
         | 
| 81 | 
            -
                                #  | 
| 82 | 
            -
                                 | 
| 83 | 
            -
             | 
| 84 | 
            -
                            # Generate a color depth image using the inferno colormap.
         | 
| 85 | 
            -
                            cmap = matplotlib.colormaps.get_cmap("inferno")
         | 
| 86 | 
            -
                            depth_vis = (cmap(depth_norm / 255.0)[..., :3] * 255).astype(np.uint8)
         | 
| 87 | 
            -
                        
         | 
| 88 | 
            -
                        # Ensure depth_vis is valid and contiguous
         | 
| 89 | 
            -
                        #if depth_vis is None or depth_vis.size == 0:
         | 
| 90 | 
            -
                        #    raise ValueError("depth_vis is empty or not properly computed.")
         | 
| 91 | 
            -
                        #else:
         | 
| 92 | 
            -
                        #    depth_vis = np.ascontiguousarray(depth_vis)
         | 
| 93 |  | 
| 94 | 
            -
                        #  | 
| 95 | 
            -
                         | 
| 96 | 
            -
             | 
| 97 | 
            -
             | 
|  | |
| 98 |  | 
| 99 | 
             
                        # Resize the depth visualization to match the full-resolution RGB frame.
         | 
| 100 | 
             
                        H_full, W_full = rgb_full.shape[:2]
         | 
| @@ -114,7 +125,6 @@ def stitch_rgbd_videos( | |
| 114 |  | 
| 115 | 
             
                        del rgb_full, depth_vis_resized, stitched
         | 
| 116 | 
             
                        gc.collect()  # Force Python to free unused memory
         | 
| 117 | 
            -
             | 
| 118 |  | 
| 119 | 
             
                    stitched_frames = np.array(stitched_frames)
         | 
| 120 | 
             
                    # Use only the first 20 characters of the base name for the output filename and append '_RGBD.mp4'
         | 
|  | |
| 36 | 
             
                    # For stitching: read the original video in full resolution (without downscaling).
         | 
| 37 | 
             
                    full_frames, target_fps = read_video_frames(processed_video, max_len, target_fps, max_res=-1)
         | 
| 38 | 
             
                    depths, _ = read_video_frames(depth_vis_video, max_len, target_fps, max_res=-1)
         | 
| 39 | 
            +
                    
         | 
| 40 | 
             
                    print(f"Depth frame shape: {depths[0].shape}, dtype: {depths[0].dtype}, min: {depths[0].min()}, max: {depths[0].max()}")
         | 
| 41 |  | 
| 42 | 
             
                    # For each frame, create a visual depth image from the inferenced depths.
         | 
| 43 | 
            +
                    d_min, d_max = np.min(depths), np.max(depths)
         | 
| 44 | 
            +
                    print(f"Depth range: min={d_min}, max={d_max}, diff={d_max-d_min}")
         | 
| 45 | 
            +
                    
         | 
| 46 | 
             
                    stitched_frames = []
         | 
| 47 | 
             
                    for i in range(min(len(full_frames), len(depths))):
         | 
| 48 | 
             
                        rgb_full = full_frames[i]  # Full-resolution RGB frame.
         | 
| 49 | 
            +
                        depth_frame = depths[i]  # Already in uint8 format
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 50 |  | 
| 51 | 
            +
                        # Handle the case where depth is already in a 3-channel format
         | 
| 52 | 
            +
                        if len(depth_frame.shape) == 3 and depth_frame.shape[2] == 3:
         | 
| 53 | 
            +
                            # The depth is already a color or grayscale image with 3 channels
         | 
| 54 | 
            +
                            if grayscale:
         | 
| 55 | 
            +
                                if convert_from_color:
         | 
| 56 | 
            +
                                    # Convert to grayscale if it's a color image
         | 
| 57 | 
            +
                                    depth_gray = cv2.cvtColor(depth_frame, cv2.COLOR_RGB2GRAY)
         | 
| 58 | 
            +
                                    depth_vis = np.stack([depth_gray] * 3, axis=-1)
         | 
| 59 | 
            +
                                else:
         | 
| 60 | 
            +
                                    # Assume it's already the right format
         | 
| 61 | 
            +
                                    depth_vis = depth_frame
         | 
| 62 | 
            +
                            else:
         | 
| 63 | 
            +
                                if depth_frame.max() > 0:  # Ensure we have valid depth data
         | 
| 64 | 
            +
                                    # Use the inferno colormap if requested 
         | 
| 65 | 
            +
                                    cmap = matplotlib.colormaps.get_cmap("inferno")
         | 
| 66 | 
            +
                                    # Convert to single channel first
         | 
| 67 | 
            +
                                    depth_gray = cv2.cvtColor(depth_frame, cv2.COLOR_RGB2GRAY)
         | 
| 68 | 
            +
                                    # Normalize to 0-1 range for colormap
         | 
| 69 | 
            +
                                    depth_norm = depth_gray / 255.0
         | 
| 70 | 
            +
                                    # Apply colormap
         | 
| 71 | 
            +
                                    depth_vis = (cmap(depth_norm)[..., :3] * 255).astype(np.uint8)
         | 
| 72 | 
            +
                                else:
         | 
| 73 | 
            +
                                    # If zero depth, just use the original
         | 
| 74 | 
            +
                                    depth_vis = depth_frame
         | 
| 75 | 
            +
                        else:
         | 
| 76 | 
            +
                            # Process as in original code (single channel depth)
         | 
| 77 | 
            +
                            if d_max == d_min:
         | 
| 78 | 
            +
                                d_max = d_min + 1
         | 
| 79 |  | 
| 80 | 
            +
                            # Normalize the depth frame to the range [0, 255]
         | 
| 81 | 
            +
                            depth_norm = np.clip((depth_frame - d_min) / (d_max - d_min) * 255, 0, 255).astype(np.uint8)
         | 
| 82 | 
            +
                            
         | 
| 83 | 
            +
                            # Ensure depth_norm is 2D (remove singleton dimensions if present)
         | 
| 84 | 
            +
                            if depth_norm.ndim == 3:
         | 
| 85 | 
            +
                                depth_norm = np.squeeze(depth_norm)
         | 
| 86 | 
            +
                                
         | 
| 87 | 
            +
                            # Generate depth visualization:
         | 
| 88 | 
            +
                            if grayscale:
         | 
| 89 | 
            +
                                if convert_from_color:
         | 
| 90 | 
            +
                                    # First, generate a color depth image using the inferno colormap,
         | 
| 91 | 
            +
                                    # then convert that color image to grayscale.
         | 
| 92 | 
            +
                                    cmap = matplotlib.colormaps.get_cmap("inferno")
         | 
| 93 | 
            +
                                    depth_color = (cmap(depth_norm / 255.0)[..., :3] * 255).astype(np.uint8)
         | 
| 94 | 
             
                                    depth_gray = cv2.cvtColor(depth_color, cv2.COLOR_RGB2GRAY)
         | 
| 95 | 
            +
                                    depth_vis = np.stack([depth_gray] * 3, axis=-1)
         | 
| 96 | 
             
                                else:
         | 
| 97 | 
            +
                                    # Directly generate a grayscale image from the normalized depth values.
         | 
| 98 | 
            +
                                    depth_vis = np.stack([depth_norm] * 3, axis=-1)
         | 
|  | |
|  | |
| 99 | 
             
                            else:
         | 
| 100 | 
            +
                                # Generate a color depth image using the inferno colormap.
         | 
| 101 | 
            +
                                cmap = matplotlib.colormaps.get_cmap("inferno")
         | 
| 102 | 
            +
                                depth_vis = (cmap(depth_norm / 255.0)[..., :3] * 255).astype(np.uint8)
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 103 |  | 
| 104 | 
            +
                        # Apply Gaussian blur if requested
         | 
| 105 | 
            +
                        if blur > 0:
         | 
| 106 | 
            +
                            kernel_size = max(1, int(blur * 20) * 2 + 1)  # Ensures an odd kernel size.
         | 
| 107 | 
            +
                            kernel_size = min(kernel_size, 31)  # Cap kernel size at 31 (OpenCV limitation)
         | 
| 108 | 
            +
                            depth_vis = cv2.GaussianBlur(depth_vis, (kernel_size, kernel_size), 0)
         | 
| 109 |  | 
| 110 | 
             
                        # Resize the depth visualization to match the full-resolution RGB frame.
         | 
| 111 | 
             
                        H_full, W_full = rgb_full.shape[:2]
         | 
|  | |
| 125 |  | 
| 126 | 
             
                        del rgb_full, depth_vis_resized, stitched
         | 
| 127 | 
             
                        gc.collect()  # Force Python to free unused memory
         | 
|  | |
| 128 |  | 
| 129 | 
             
                    stitched_frames = np.array(stitched_frames)
         | 
| 130 | 
             
                    # Use only the first 20 characters of the base name for the output filename and append '_RGBD.mp4'
         |