SAM2-Video-Predictor

Running on T4

App Files Files Community

fffiloni commited on Aug 2, 2024

Commit

d5deb3f

verified ·

1 Parent(s): 5d3e654

Update app.py

Browse files

Files changed (1) hide show

app.py +68 -14

app.py CHANGED Viewed

@@ -44,17 +44,27 @@ def get_video_fps(video_path):
     return fps
 def preprocess_image(image):
-    return image, gr.State([]), gr.State([]), image, gr.State()
 def preprocess_video_in(video_path):
     # Generate a unique ID based on the current date and time
     unique_id = datetime.now().strftime('%Y%m%d%H%M%S')
     output_dir = f'frames_{unique_id}'
     # Create the output directory
     os.makedirs(output_dir, exist_ok=True)
     # Open the video file
     cap = cv2.VideoCapture(video_path)
@@ -97,9 +107,21 @@ def preprocess_video_in(video_path):
         if os.path.splitext(p)[-1] in [".jpg", ".jpeg", ".JPG", ".JPEG"]
     ]
     scanned_frames.sort(key=lambda p: int(os.path.splitext(p)[0]))
-    # 'image' is the first frame extracted from video_in
-    return first_frame, gr.State([]), gr.State([]), first_frame, first_frame, output_dir, scanned_frames, None, None, gr.update(open=False)
 def get_point(point_type, tracking_points, trackings_input_label, first_frame_path, evt: gr.SelectData):
     print(f"You selected {evt.value} at {evt.index} from {evt.target}")
@@ -395,27 +417,59 @@ with gr.Blocks() as demo:
                 output_video = gr.Video(visible=False)
                 # output_result_mask = gr.Image()
-    clear_points_btn.click(
-        fn = preprocess_image,
-        inputs = input_first_frame_image,
-        outputs = [first_frame_path, tracking_points, trackings_input_label, points_map, stored_inference_state],
-        queue=False
-    )
     video_in.upload(
         fn = preprocess_video_in,
         inputs = [video_in],
-        outputs = [first_frame_path, tracking_points, trackings_input_label, input_first_frame_image, points_map, video_frames_dir, scanned_frames, stored_inference_state, stored_frame_names, video_in_drawer],
         queue = False
     )
     points_map.select(
         fn = get_point,
-        inputs = [point_type, tracking_points, trackings_input_label, first_frame_path],
-        outputs = [tracking_points, trackings_input_label, points_map],
         queue = False
     )
     """
     working_frame.change(
         fn = switch_working_frame,

     return fps
 def preprocess_image(image):
+    # we clean all
+    return [
+        image,   # first_frame_path
+        [],      # tracking_points
+        [],      # trackings_input_label
+        image,   # points_map
+        None     # stored_inference_state
+    ]
 def preprocess_video_in(video_path):
     # Generate a unique ID based on the current date and time
     unique_id = datetime.now().strftime('%Y%m%d%H%M%S')
+    # Set directory with this ID to store video frames
     output_dir = f'frames_{unique_id}'
     # Create the output directory
     os.makedirs(output_dir, exist_ok=True)
+    ### Process video frames ###
     # Open the video file
     cap = cv2.VideoCapture(video_path)
         if os.path.splitext(p)[-1] in [".jpg", ".jpeg", ".JPG", ".JPEG"]
     ]
     scanned_frames.sort(key=lambda p: int(os.path.splitext(p)[0]))
+    print(f"SCANNED_FRAMES: {scanned_frames}")
+    return [
+        first_frame,           # first_frame_path
+        gr.State([]),          # tracking_points
+        gr.State([]),          # trackings_input_label
+        first_frame,           # input_first_frame_image
+        first_frame,           # points_map
+        output_dir,            # video_frames_dir
+        scanned_frames,        # scanned_frames
+        None,                  # stored_inference_state
+        None,                  # stored_frame_names
+        gr.update(open=False)  # video_in_drawer
+    ]
 def get_point(point_type, tracking_points, trackings_input_label, first_frame_path, evt: gr.SelectData):
     print(f"You selected {evt.value} at {evt.index} from {evt.target}")
                 output_video = gr.Video(visible=False)
                 # output_result_mask = gr.Image()
+    # When new video is uploaded
     video_in.upload(
         fn = preprocess_video_in,
         inputs = [video_in],
+        outputs = [
+            first_frame_path,
+            tracking_points, # update Tracking Points in the gr.State([]) object
+            trackings_input_label, # update Tracking Labels in the gr.State([]) object
+            input_first_frame_image, # hidden component used as ref when clearing points
+            points_map, # Image component where we add new tracking points
+            video_frames_dir, # Array where frames from video_in are deep stored
+            scanned_frames, # Scanned frames by SAM2
+            stored_inference_state, # Sam2 inference state
+            stored_frame_names, #
+            video_in_drawer, # Accordion to hide uploaded video player
+        ],
         queue = False
     )
+    # triggered when we click on image to add new points
     points_map.select(
         fn = get_point,
+        inputs = [
+            point_type, # "include" or "exclude"
+            tracking_points, # get tracking_points values
+            trackings_input_label, # get tracking label values
+            first_frame_path, # gr.State() first frame path
+        ],
+        outputs = [
+            tracking_points, # updated with new points
+            trackings_input_label, # updated with corresponding labels
+            points_map, # updated image with points
+        ],
         queue = False
     )
+    # Clear every points clicked and added to the map
+    clear_points_btn.click(
+        fn = preprocess_image,
+        inputs = input_first_frame_image, # we get the untouched hidden image
+        outputs = [
+            first_frame_path,
+            tracking_points,
+            trackings_input_label,
+            points_map,
+            stored_inference_state,
+        ],
+        queue=False
+    )
     """
     working_frame.change(
         fn = switch_working_frame,