SAM2-Video-Predictor

Sleeping

App Files Files Community

fffiloni commited on Aug 2, 2024

Commit

40eec4d

verified ·

1 Parent(s): f0c76f5

Update app.py

Browse files

Files changed (1) hide show

app.py +10 -52

app.py CHANGED Viewed

@@ -167,51 +167,6 @@ def show_box(box, ax):
     w, h = box[2] - box[0], box[3] - box[1]
     ax.add_patch(plt.Rectangle((x0, y0), w, h, edgecolor='green', facecolor=(0, 0, 0, 0), lw=2))
-def show_masks(image, masks, scores, point_coords=None, box_coords=None, input_labels=None, borders=True):
-    combined_images = []  # List to store filenames of images with masks overlaid
-    mask_images = []      # List to store filenames of separate mask images
-    for i, (mask, score) in enumerate(zip(masks, scores)):
-        # ---- Original Image with Mask Overlaid ----
-        plt.figure(figsize=(10, 10))
-        plt.imshow(image)
-        show_mask(mask, plt.gca(), borders=borders)  # Draw the mask with borders
-        """
-        if point_coords is not None:
-            assert input_labels is not None
-            show_points(point_coords, input_labels, plt.gca())
-        """
-        if box_coords is not None:
-            show_box(box_coords, plt.gca())
-        if len(scores) > 1:
-            plt.title(f"Mask {i+1}, Score: {score:.3f}", fontsize=18)
-        plt.axis('off')
-        # Save the figure as a JPG file
-        combined_filename = f"combined_image_{i+1}.jpg"
-        plt.savefig(combined_filename, format='jpg', bbox_inches='tight')
-        combined_images.append(combined_filename)
-        plt.close()  # Close the figure to free up memory
-        # ---- Separate Mask Image (White Mask on Black Background) ----
-        # Create a black image
-        mask_image = np.zeros_like(image, dtype=np.uint8)
-        # The mask is a binary array where the masked area is 1, else 0.
-        # Convert the mask to a white color in the mask_image
-        mask_layer = (mask > 0).astype(np.uint8) * 255
-        for c in range(3):  # Assuming RGB, repeat mask for all channels
-            mask_image[:, :, c] = mask_layer
-        # Save the mask image
-        mask_filename = f"mask_image_{i+1}.png"
-        Image.fromarray(mask_image).save(mask_filename)
-        mask_images.append(mask_filename)
-        plt.close()  # Close the figure to free up memory
-    return combined_images, mask_images
 def load_model(checkpoint):
     # Load model accordingly to user's choice
@@ -254,9 +209,11 @@ def sam_process(input_first_frame_image, checkpoint, tracking_points, trackings_
     # segment and track one object
     # predictor.reset_state(inference_state) # if any previous tracking, reset
     # Add new point
     if working_frame == None:
         ann_frame_idx = 0  # the frame index we interact with
     else:
         # Use a regular expression to find the integer
         match = re.search(r'frame_(\d+)', working_frame)
@@ -264,6 +221,7 @@ def sam_process(input_first_frame_image, checkpoint, tracking_points, trackings_
             # Extract the integer from the match
             frame_number = int(match.group(1))
             ann_frame_idx = frame_number
     ann_obj_id = 1  # give a unique id to each object we interact with (it can be any integers)
@@ -292,7 +250,7 @@ def sam_process(input_first_frame_image, checkpoint, tracking_points, trackings_
     plt.close()
     torch.cuda.empty_cache()
-    return "output_first_frame.jpg", frame_names, inference_state
 def propagate_to_all(video_in, checkpoint, stored_inference_state, stored_frame_names, video_frames_dir, vis_frame_type, progress=gr.Progress(track_tqdm=True)):
     #### PROPAGATION ####
@@ -346,7 +304,7 @@ def propagate_to_all(video_in, checkpoint, stored_inference_state, stored_frame_
     print(f"JPEG_IMAGES: {jpeg_images}")
     if vis_frame_type == "check":
-        return gr.update(value=jpeg_images), gr.update(value=None), gr.update(choices=jpeg_images, value=None, visible=False)
     elif vis_frame_type == "render":
         # Create a video clip from the image sequence
         original_fps = get_video_fps(video_in)
@@ -378,7 +336,7 @@ def switch_working_frame(working_frame, scanned_frames, video_frames_dir):
             frame_number = int(match.group(1))
             ann_frame_idx = frame_number
             new_working_frame = os.path.join(video_frames_dir, scanned_frames[ann_frame_idx])
-            return new_working_frame, gr.State([]), gr.State([]), new_working_frame, new_working_frame
 with gr.Blocks() as demo:
     first_frame_path = gr.State()
@@ -453,19 +411,19 @@ with gr.Blocks() as demo:
         queue = False
     )
-    """
     working_frame.change(
         fn = switch_working_frame,
         inputs = [working_frame, scanned_frames, video_frames_dir],
-        outputs = [first_frame_path, tracking_points, trackings_input_label, input_first_frame_image, points_map],
         queue=False
     )
-    """
     submit_btn.click(
         fn = sam_process,
         inputs = [input_first_frame_image, checkpoint, tracking_points, trackings_input_label, video_frames_dir, scanned_frames, working_frame],
-        outputs = [output_result, stored_frame_names, stored_inference_state]
     )
     propagate_btn.click(

     w, h = box[2] - box[0], box[3] - box[1]
     ax.add_patch(plt.Rectangle((x0, y0), w, h, edgecolor='green', facecolor=(0, 0, 0, 0), lw=2))
 def load_model(checkpoint):
     # Load model accordingly to user's choice
     # segment and track one object
     # predictor.reset_state(inference_state) # if any previous tracking, reset
+    new_working_frame = None
     # Add new point
     if working_frame == None:
         ann_frame_idx = 0  # the frame index we interact with
+        new_working_frame = "frames_output_images/frame_0.jpg"
     else:
         # Use a regular expression to find the integer
         match = re.search(r'frame_(\d+)', working_frame)
             # Extract the integer from the match
             frame_number = int(match.group(1))
             ann_frame_idx = frame_number
+            new_working_frame = f"frames_output_images/frame_{ann_frame_idx}.jpg"
     ann_obj_id = 1  # give a unique id to each object we interact with (it can be any integers)
     plt.close()
     torch.cuda.empty_cache()
+    return "output_first_frame.jpg", frame_names, inference_state, new_working_frame
 def propagate_to_all(video_in, checkpoint, stored_inference_state, stored_frame_names, video_frames_dir, vis_frame_type, progress=gr.Progress(track_tqdm=True)):
     #### PROPAGATION ####
     print(f"JPEG_IMAGES: {jpeg_images}")
     if vis_frame_type == "check":
+        return gr.update(value=jpeg_images), gr.update(value=None), gr.update(choices=jpeg_images, value=None, visible=True)
     elif vis_frame_type == "render":
         # Create a video clip from the image sequence
         original_fps = get_video_fps(video_in)
             frame_number = int(match.group(1))
             ann_frame_idx = frame_number
             new_working_frame = os.path.join(video_frames_dir, scanned_frames[ann_frame_idx])
+            return new_working_frame, gr.State([]), gr.State([]), new_working_frame, new_working_frame, new_working_frame
 with gr.Blocks() as demo:
     first_frame_path = gr.State()
         queue = False
     )
     working_frame.change(
         fn = switch_working_frame,
         inputs = [working_frame, scanned_frames, video_frames_dir],
+        outputs = [first_frame_path, tracking_points, trackings_input_label, input_first_frame_image, points_map, working_frame],
         queue=False
     )
     submit_btn.click(
         fn = sam_process,
         inputs = [input_first_frame_image, checkpoint, tracking_points, trackings_input_label, video_frames_dir, scanned_frames, working_frame],
+        outputs = [output_result, stored_frame_names, stored_inference_state, working_frame]
     )
     propagate_btn.click(