SAM2-Video-Predictor

Sleeping

App Files Files Community

fffiloni commited on Jan 23

Commit

f065d89

verified ·

1 Parent(s): 11d0cdc

Update app.py

Browse files

Files changed (1) hide show

app.py +52 -24

app.py CHANGED Viewed

@@ -177,6 +177,19 @@ def show_mask(mask, ax, obj_id=None, random_color=False):
     mask_image = mask.reshape(h, w, 1) * color.reshape(1, 1, -1)
     ax.imshow(mask_image)
 def show_points(coords, labels, ax, marker_size=200):
     pos_points = coords[labels==1]
@@ -319,7 +332,7 @@ def propagate_to_all(video_in, checkpoint, stored_inference_state, stored_frame_
     # Initialize a list to store file paths of saved images
     jpeg_images = []
-    masks_frames = []
     # run propagation throughout the video and collect the results in a dict
     video_segments = {}  # video_segments contains the per-frame segmentation results
@@ -343,20 +356,6 @@ def propagate_to_all(video_in, checkpoint, stored_inference_state, stored_frame_
         for out_obj_id, out_mask in video_segments[out_frame_idx].items():
             show_mask(out_mask, plt.gca(), obj_id=out_obj_id)
-            # Save the raw binary mask as a separate image
-            mask_filename = os.path.join(mask_frames_output_dir, f"mask_{out_frame_idx}.jpg")
-            binary_mask = np.squeeze(out_mask)  # Ensure the mask is 2D
-            binary_mask = (binary_mask * 255).astype(np.uint8)  # Scale mask to 0-255
-            if binary_mask.ndim != 2:  # Ensure it's 2D for PIL
-                raise ValueError(f"Mask has invalid dimensions: {binary_mask.shape}")
-            mask_image = Image.fromarray(binary_mask)
-            mask_image.save(mask_filename)  # Save the mask as a JPEG
-            masks_frames.append(mask_filename)  # Append to the list of masks
-        print(f"MASKS FRAMES: {masks_frames}")
         # Define the output filename and save the figure as a JPEG file
         output_filename = os.path.join(frames_output_dir, f"frame_{out_frame_idx}.jpg")
         plt.savefig(output_filename, format='jpg')
@@ -370,6 +369,23 @@ def propagate_to_all(video_in, checkpoint, stored_inference_state, stored_frame_
         if f"frame_{out_frame_idx}.jpg" not in available_frames_to_check:
             available_frames_to_check.append(f"frame_{out_frame_idx}.jpg")
     torch.cuda.empty_cache()
@@ -392,18 +408,30 @@ def propagate_to_all(video_in, checkpoint, stored_inference_state, stored_frame_
             codec='libx264'
         )
-        # Create the video clip
-        mask_clip = ImageSequenceClip(masks_frames, fps=fps)
-        # Define the output file path
-        mask_final_vid_output_path = "mask_output_video.mp4"
-        # Write the video to a file
-        mask_clip.write_videofile(mask_final_vid_output_path, codec='libx264')
-        return gr.update(value=None), gr.update(value=final_vid_output_path), working_frame, available_frames_to_check, gr.update(visible=True), mask_final_vid_output_path
 def update_ui(vis_frame_type):
     if vis_frame_type == "check":

     mask_image = mask.reshape(h, w, 1) * color.reshape(1, 1, -1)
     ax.imshow(mask_image)
+def show_white_mask(mask, ax):
+    # Ensure mask is binary (values 0 or 1)
+    mask = (mask > 0).astype(float)  # Convert to binary mask
+    h, w = mask.shape[-2:]
+    # Create a white mask (RGBA: [1, 1, 1, alpha])
+    alpha = 1.0  # Fully opaque
+    color = np.array([1, 1, 1, alpha])
+    mask_image = mask.reshape(h, w, 1) * color.reshape(1, 1, -1)
+    # Display black background
+    ax.imshow(np.zeros((h, w, 3), dtype=float))  # Black background
+    ax.imshow(mask_image)  # Overlay white mask
 def show_points(coords, labels, ax, marker_size=200):
     pos_points = coords[labels==1]
     # Initialize a list to store file paths of saved images
     jpeg_images = []
+    masks_images = []
     # run propagation throughout the video and collect the results in a dict
     video_segments = {}  # video_segments contains the per-frame segmentation results
         for out_obj_id, out_mask in video_segments[out_frame_idx].items():
             show_mask(out_mask, plt.gca(), obj_id=out_obj_id)
         # Define the output filename and save the figure as a JPEG file
         output_filename = os.path.join(frames_output_dir, f"frame_{out_frame_idx}.jpg")
         plt.savefig(output_filename, format='jpg')
         if f"frame_{out_frame_idx}.jpg" not in available_frames_to_check:
             available_frames_to_check.append(f"frame_{out_frame_idx}.jpg")
+        # Step 2: Create and store a black-and-white mask image using show_white_mask
+        # Create a figure without displaying it for the white mask
+        fig, ax = plt.subplots(figsize=(6, 4))
+        ax.axis("off")  # Remove axes for a clean mask
+        # Overlay each mask as white on a black background
+        for out_mask in video_segments[out_frame_idx].values():
+            show_white_mask(out_mask, ax)
+        # Save the white mask figure to an image in memory
+        mask_filename = os.path.join(masks_output_dir, f"mask_{out_frame_idx}.jpg")
+        fig.savefig(mask_filename, format='jpg', bbox_inches="tight", pad_inches=0)
+        plt.close(fig)
+        # Add the saved mask image to the masks_images array
+        masks_images.append(mask_filename)
     torch.cuda.empty_cache()
             codec='libx264'
         )
+        print("MAKING MASK VIDEO ...")
+        # Create a video from the masks_images array
+        mask_video_filename = "final_masks_video.mp4"
+        # Get the dimensions of the first mask image
+        frame = cv2.imread(masks_images[0])
+        height, width, _ = frame.shape
+        # Define the video writer
+        fourcc = cv2.VideoWriter_fourcc(*"mp4v")
+        fps = original_fps  # Frames per second
+        video_writer = cv2.VideoWriter(mask_video_filename, fourcc, fps, (width, height))
+        # Write each mask image to the video
+        for mask_path in masks_images:
+            frame = cv2.imread(mask_path)
+            video_writer.write(frame)
+        video_writer.release()
+        print(f"Mask Video saved at {mask_video_filename}")
+        return gr.update(value=None), gr.update(value=final_vid_output_path), working_frame, available_frames_to_check, gr.update(visible=True), mask_video_filename
 def update_ui(vis_frame_type):
     if vis_frame_type == "check":