SAM2-Video-Predictor

Sleeping

App Files Files Community

fffiloni commited on Aug 2, 2024

Commit

96e731e

verified ·

1 Parent(s): d5deb3f

Update app.py

Browse files

Files changed (1) hide show

app.py +51 -23

app.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import subprocess
 import re
 # Define the command to be executed
 command = ["python", "setup.py", "build_ext", "--inplace"]
@@ -43,7 +44,7 @@ def get_video_fps(video_path):
     return fps
-def preprocess_image(image):
     # we clean all
     return [
         image,   # first_frame_path
@@ -59,10 +60,10 @@ def preprocess_video_in(video_path):
     unique_id = datetime.now().strftime('%Y%m%d%H%M%S')
     # Set directory with this ID to store video frames
-    output_dir = f'frames_{unique_id}'
     # Create the output directory
-    os.makedirs(output_dir, exist_ok=True)
     ### Process video frames ###
     # Open the video file
@@ -87,7 +88,7 @@ def preprocess_video_in(video_path):
             break
         # Format the frame filename as '00000.jpg'
-        frame_filename = os.path.join(output_dir, f'{frame_number:05d}.jpg')
         # Save the frame as a JPEG file
         cv2.imwrite(frame_filename, frame)
@@ -103,12 +104,11 @@ def preprocess_video_in(video_path):
     # scan all the JPEG frame names in this directory
     scanned_frames = [
-        p for p in os.listdir(output_dir)
         if os.path.splitext(p)[-1] in [".jpg", ".jpeg", ".JPG", ".JPEG"]
     ]
     scanned_frames.sort(key=lambda p: int(os.path.splitext(p)[0]))
     print(f"SCANNED_FRAMES: {scanned_frames}")
     return [
         first_frame,           # first_frame_path
@@ -116,7 +116,7 @@ def preprocess_video_in(video_path):
         gr.State([]),          # trackings_input_label
         first_frame,           # input_first_frame_image
         first_frame,           # points_map
-        output_dir,            # video_frames_dir
         scanned_frames,        # scanned_frames
         None,                  # stored_inference_state
         None,                  # stored_frame_names
@@ -195,46 +195,61 @@ def load_model(checkpoint):
     if checkpoint == "tiny":
         sam2_checkpoint = "./checkpoints/sam2_hiera_tiny.pt"
         model_cfg = "sam2_hiera_t.yaml"
-        return sam2_checkpoint, model_cfg
     elif checkpoint == "samll":
         sam2_checkpoint = "./checkpoints/sam2_hiera_small.pt"
         model_cfg = "sam2_hiera_s.yaml"
-        return sam2_checkpoint, model_cfg
     elif checkpoint == "base-plus":
         sam2_checkpoint = "./checkpoints/sam2_hiera_base_plus.pt"
         model_cfg = "sam2_hiera_b+.yaml"
-        return sam2_checkpoint, model_cfg
     elif checkpoint == "large":
         sam2_checkpoint = "./checkpoints/sam2_hiera_large.pt"
         model_cfg = "sam2_hiera_l.yaml"
-        return sam2_checkpoint, model_cfg
-def sam_process(input_first_frame_image, checkpoint, tracking_points, trackings_input_label, video_frames_dir, scanned_frames, working_frame, progress=gr.Progress(track_tqdm=True)):
-    # 1. We need to preprocess the video and store frames in the right directory
-    # — Penser à utiliser un ID unique pour le dossier
     sam2_checkpoint, model_cfg = load_model(checkpoint)
     predictor = build_sam2_video_predictor(model_cfg, sam2_checkpoint)
     # `video_dir` a directory of JPEG frames with filenames like `<frame_index>.jpg`
     print(f"STATE FRAME OUTPUT DIRECTORY: {video_frames_dir}")
     video_dir = video_frames_dir
     # scan all the JPEG frame names in this directory
     frame_names = scanned_frames
     inference_state = predictor.init_state(video_path=video_dir)
     # segment and track one object
     # predictor.reset_state(inference_state) # if any previous tracking, reset
     new_working_frame = None
     # Add new point
-    if working_frame == None:
-        ann_frame_idx = 0  # the frame index we interact with
         new_working_frame = "frames_output_images/frame_0.jpg"
     else:
         # Use a regular expression to find the integer
@@ -244,6 +259,7 @@ def sam_process(input_first_frame_image, checkpoint, tracking_points, trackings_
             frame_number = int(match.group(1))
             ann_frame_idx = frame_number
             new_working_frame = f"frames_output_images/frame_{ann_frame_idx}.jpg"
     ann_obj_id = 1  # give a unique id to each object we interact with (it can be any integers)
@@ -458,7 +474,7 @@ with gr.Blocks() as demo:
     # Clear every points clicked and added to the map
     clear_points_btn.click(
-        fn = preprocess_image,
         inputs = input_first_frame_image, # we get the untouched hidden image
         outputs = [
             first_frame_path,
@@ -480,9 +496,21 @@ with gr.Blocks() as demo:
     """
     submit_btn.click(
-        fn = sam_process,
-        inputs = [input_first_frame_image, checkpoint, tracking_points, trackings_input_label, video_frames_dir, scanned_frames, working_frame],
-        outputs = [output_result, stored_frame_names, stored_inference_state]
     )
     propagate_btn.click(

 import subprocess
 import re
+from typing import List, Tuple, Optional
 # Define the command to be executed
 command = ["python", "setup.py", "build_ext", "--inplace"]
     return fps
+def clear_points(image):
     # we clean all
     return [
         image,   # first_frame_path
     unique_id = datetime.now().strftime('%Y%m%d%H%M%S')
     # Set directory with this ID to store video frames
+    extracted_frames_output_dir = f'frames_{unique_id}'
     # Create the output directory
+    os.makedirs(extracted_frames_output_dir, exist_ok=True)
     ### Process video frames ###
     # Open the video file
             break
         # Format the frame filename as '00000.jpg'
+        frame_filename = os.path.join(extracted_frames_output_dir, f'{frame_number:05d}.jpg')
         # Save the frame as a JPEG file
         cv2.imwrite(frame_filename, frame)
     # scan all the JPEG frame names in this directory
     scanned_frames = [
+        p for p in os.listdir(extracted_frames_output_dir)
         if os.path.splitext(p)[-1] in [".jpg", ".jpeg", ".JPG", ".JPEG"]
     ]
     scanned_frames.sort(key=lambda p: int(os.path.splitext(p)[0]))
     print(f"SCANNED_FRAMES: {scanned_frames}")
     return [
         first_frame,           # first_frame_path
         gr.State([]),          # trackings_input_label
         first_frame,           # input_first_frame_image
         first_frame,           # points_map
+        extracted_frames_output_dir,            # video_frames_dir
         scanned_frames,        # scanned_frames
         None,                  # stored_inference_state
         None,                  # stored_frame_names
     if checkpoint == "tiny":
         sam2_checkpoint = "./checkpoints/sam2_hiera_tiny.pt"
         model_cfg = "sam2_hiera_t.yaml"
+        return [sam2_checkpoint, model_cfg]
     elif checkpoint == "samll":
         sam2_checkpoint = "./checkpoints/sam2_hiera_small.pt"
         model_cfg = "sam2_hiera_s.yaml"
+        return [sam2_checkpoint, model_cfg]
     elif checkpoint == "base-plus":
         sam2_checkpoint = "./checkpoints/sam2_hiera_base_plus.pt"
         model_cfg = "sam2_hiera_b+.yaml"
+        return [sam2_checkpoint, model_cfg]
     elif checkpoint == "large":
         sam2_checkpoint = "./checkpoints/sam2_hiera_large.pt"
         model_cfg = "sam2_hiera_l.yaml"
+        return [sam2_checkpoint, model_cfg]
+def get_mask_sam_process(
+    input_first_frame_image,
+    checkpoint,
+    tracking_points,
+    trackings_input_label,
+    video_frames_dir, # extracted_frames_output_dir defined in 'preprocess_video_in' function
+    scanned_frames,
+    working_frame: str = None, # current frame being added points
+    progress=gr.Progress(track_tqdm=True)
+):
+    # get model and model config paths
+    print(f"USER CHOSEN CHECKPOINT: {checkpoint}")
     sam2_checkpoint, model_cfg = load_model(checkpoint)
+    print("MODEL LOADED")
+    # set predictor
     predictor = build_sam2_video_predictor(model_cfg, sam2_checkpoint)
+    print("PREDICTOR READY")
     # `video_dir` a directory of JPEG frames with filenames like `<frame_index>.jpg`
     print(f"STATE FRAME OUTPUT DIRECTORY: {video_frames_dir}")
     video_dir = video_frames_dir
     # scan all the JPEG frame names in this directory
     frame_names = scanned_frames
+    # Init SAM2 inference_state
     inference_state = predictor.init_state(video_path=video_dir)
+    print("NEW INFERENCE_STATE INITIATED")
     # segment and track one object
     # predictor.reset_state(inference_state) # if any previous tracking, reset
+    ### HANDLING WORKING FRAME
     new_working_frame = None
     # Add new point
+    if working_frame is None:
+        ann_frame_idx = 0  # the frame index we interact with, 0 if it is the first frame
         new_working_frame = "frames_output_images/frame_0.jpg"
     else:
         # Use a regular expression to find the integer
             frame_number = int(match.group(1))
             ann_frame_idx = frame_number
             new_working_frame = f"frames_output_images/frame_{ann_frame_idx}.jpg"
+    print(f"NEW_WORKING_FRAME PATH: {new_working_frame}")
     ann_obj_id = 1  # give a unique id to each object we interact with (it can be any integers)
     # Clear every points clicked and added to the map
     clear_points_btn.click(
+        fn = clear_points,
         inputs = input_first_frame_image, # we get the untouched hidden image
         outputs = [
             first_frame_path,
     """
     submit_btn.click(
+        fn = get_mask_sam_process,
+        inputs = [
+            input_first_frame_image,
+            checkpoint,
+            tracking_points,
+            trackings_input_label,
+            video_frames_dir,
+            scanned_frames,
+            working_frame,
+        ],
+        outputs = [
+            output_result,
+            stored_frame_names,
+            stored_inference_state,
+        ]
     )
     propagate_btn.click(