SAM2Long-Demo

Runtime error

App Files Files Community

svjack commited on Feb 4

Commit

a8002a7

verified ·

1 Parent(s): 340485a

Update app.py

Browse files

Files changed (1) hide show

app.py +102 -0

app.py CHANGED Viewed

@@ -408,6 +408,7 @@ def get_mask_sam_process(
     # return gr.update(visible=True), "output_first_frame.jpg", frame_names, predictor, inference_state, gr.update(choices=available_frames_to_check, value=working_frame, visible=True)
     return "output_first_frame.jpg", frame_names, predictor, inference_state, gr.update(choices=available_frames_to_check, value=working_frame, visible=False)
 #@spaces.GPU
 def propagate_to_all(video_in, checkpoint, stored_inference_state, stored_frame_names, video_frames_dir, vis_frame_type, available_frames_to_check, working_frame, progress=gr.Progress(track_tqdm=True)):
     # use bfloat16 for the entire notebook
@@ -505,6 +506,107 @@ def propagate_to_all(video_in, checkpoint, stored_inference_state, stored_frame_
             codec='libx264'
         )
         return gr.update(value=None), gr.update(value=final_vid_output_path), working_frame, available_frames_to_check, gr.update(visible=True)
 def update_ui(vis_frame_type):

     # return gr.update(visible=True), "output_first_frame.jpg", frame_names, predictor, inference_state, gr.update(choices=available_frames_to_check, value=working_frame, visible=True)
     return "output_first_frame.jpg", frame_names, predictor, inference_state, gr.update(choices=available_frames_to_check, value=working_frame, visible=False)
+'''
 #@spaces.GPU
 def propagate_to_all(video_in, checkpoint, stored_inference_state, stored_frame_names, video_frames_dir, vis_frame_type, available_frames_to_check, working_frame, progress=gr.Progress(track_tqdm=True)):
     # use bfloat16 for the entire notebook
             codec='libx264'
         )
+        return gr.update(value=None), gr.update(value=final_vid_output_path), working_frame, available_frames_to_check, gr.update(visible=True)
+'''
+import json
+import numpy as np
+def propagate_to_all(video_in, checkpoint, stored_inference_state, stored_frame_names, video_frames_dir, vis_frame_type, available_frames_to_check, working_frame, progress=gr.Progress(track_tqdm=True)):
+    # use bfloat16 for the entire notebook
+    torch.autocast(device_type="cuda", dtype=torch.bfloat16).__enter__()
+    if torch.cuda.get_device_properties(0).major >= 8:
+        # turn on tfloat32 for Ampere GPUs
+        torch.backends.cuda.matmul.allow_tf32 = True
+        torch.backends.cudnn.allow_tf32 = True
+    #### PROPAGATION ####
+    sam2_checkpoint, model_cfg = load_model(checkpoint)
+    # set predictor
+    inference_state = stored_inference_state
+    if torch.cuda.is_available():
+        inference_state["device"] = 'cuda'
+        predictor = build_sam2_video_predictor(model_cfg, sam2_checkpoint)
+    else:
+        inference_state["device"] = 'cpu'
+        predictor = build_sam2_video_predictor(model_cfg, sam2_checkpoint, device='cpu')
+    frame_names = stored_frame_names
+    video_dir = video_frames_dir
+    # Define a directory to save the JPEG images
+    frames_output_dir = "frames_output_images"
+    os.makedirs(frames_output_dir, exist_ok=True)
+    # Initialize a list to store file paths of saved images
+    jpeg_images = []
+    # Initialize a list to store mask area ratios
+    mask_area_ratios = []
+    # run propagation throughout the video and collect the results in a dict
+    video_segments = {}  # video_segments contains the per-frame segmentation results
+    out_obj_ids, out_mask_logits = predictor.propagate_in_video(inference_state, start_frame_idx=0, reverse=False)
+    print(out_obj_ids)
+    for frame_idx in range(0, inference_state['num_frames']):
+        video_segments[frame_idx] = {out_obj_ids[0]: (out_mask_logits[frame_idx] > 0.0).cpu().numpy()}
+        # Calculate mask area ratio
+        mask = video_segments[frame_idx][out_obj_ids[0]]
+        mask_area = np.sum(mask)  # Number of True pixels in the mask
+        total_area = mask.shape[0] * mask.shape[1]  # Total number of pixels in the frame
+        mask_area_ratio = mask_area / total_area  # Ratio of mask area to total area
+        mask_area_ratio = mask_area / np.ones_like(mask).sum()
+        mask_area_ratios.append(mask_area_ratio)
+    # Save mask area ratios as a JSON file
+    mask_area_ratios_dict = {f"frame_{frame_idx}": ratio for frame_idx, ratio in enumerate(mask_area_ratios)}
+    with open("mask_area_ratios.json", "w") as f:
+        json.dump(mask_area_ratios_dict, f, indent=4)
+    # render the segmentation results every few frames
+    if vis_frame_type == "check":
+        vis_frame_stride = 15
+    elif vis_frame_type == "render":
+        vis_frame_stride = 1
+    plt.close("all")
+    for out_frame_idx in range(0, len(frame_names), vis_frame_stride):
+        plt.figure(figsize=(6, 4))
+        plt.title(f"frame {out_frame_idx}")
+        plt.imshow(Image.open(os.path.join(video_dir, frame_names[out_frame_idx])))
+        for out_obj_id, out_mask in video_segments[out_frame_idx].items():
+            show_mask(out_mask, plt.gca(), obj_id=out_obj_id)
+        # Define the output filename and save the figure as a JPEG file
+        output_filename = os.path.join(frames_output_dir, f"frame_{out_frame_idx}.jpg")
+        plt.savefig(output_filename, format='jpg')
+        # Close the plot
+        plt.close()
+        # Append the file path to the list
+        jpeg_images.append(output_filename)
+        if f"frame_{out_frame_idx}.jpg" not in available_frames_to_check:
+            available_frames_to_check.append(f"frame_{out_frame_idx}.jpg")
+    torch.cuda.empty_cache()
+    print(f"JPEG_IMAGES: {jpeg_images}")
+    if vis_frame_type == "check":
+        return gr.update(value=jpeg_images), gr.update(value=None), gr.update(choices=available_frames_to_check, value=working_frame, visible=True), available_frames_to_check, gr.update(visible=True), mask_area_ratios_dict
+    elif vis_frame_type == "render":
+        # Create a video clip from the image sequence
+        original_fps = get_video_fps(video_in)
+        clip = ImageSequenceClip(jpeg_images, fps=original_fps // 6)
+        final_vid_output_path = "output_video.mp4"
+        clip.write_videofile(final_vid_output_path, codec='libx264')
         return gr.update(value=None), gr.update(value=final_vid_output_path), working_frame, available_frames_to_check, gr.update(visible=True)
 def update_ui(vis_frame_type):