Spaces:

jhj0517
/

sam2-playground

Runtime error

App Files Files Community

jhj0517 commited on Aug 28, 2024

Commit

1b5d47b

1 Parent(s): ee4969b

Add `invert_mask` parameter to the functions

Browse files

Files changed (1) hide show

modules/sam_inference.py +30 -3

modules/sam_inference.py CHANGED Viewed

@@ -16,6 +16,7 @@ from modules.model_downloader import (
 from modules.paths import (MODELS_DIR, TEMP_OUT_DIR, TEMP_DIR, MODEL_CONFIGS, OUTPUT_DIR)
 from modules.constants import (BOX_PROMPT_MODE, AUTOMATIC_MODE, COLOR_FILTER, PIXELIZE_FILTER, IMAGE_FILE_EXT)
 from modules.mask_utils import (
     save_psd_with_masks,
     create_mask_combined_images,
     create_mask_gallery,
@@ -129,6 +130,7 @@ class SamInference:
     def generate_mask(self,
                       image: np.ndarray,
                       model_type: str,
                       **params) -> List[Dict[str, Any]]:
         """
         Generate masks with Automatic segmentation. Default hyperparameters are in './configs/default_hparams.yaml.'
@@ -136,6 +138,7 @@ class SamInference:
         Args:
             image (np.ndarray): The input image.
             model_type (str): The model type to load.
             **params: The hyperparameters for the mask generator.
         Returns:
@@ -154,6 +157,11 @@ class SamInference:
         except Exception as e:
             logger.exception(f"Error while auto generating masks : {e}")
             raise RuntimeError(f"Failed to generate masks") from e
         return generated_masks
     def predict_image(self,
@@ -162,6 +170,7 @@ class SamInference:
                       box: Optional[np.ndarray] = None,
                       point_coords: Optional[np.ndarray] = None,
                       point_labels: Optional[np.ndarray] = None,
                       **params) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
         """
         Predict image with prompt data.
@@ -172,6 +181,7 @@ class SamInference:
             box (np.ndarray): The box prompt data.
             point_coords (np.ndarray): The point coordinates prompt data.
             point_labels (np.ndarray): The point labels prompt data.
             **params: The hyperparameters for the mask generator.
         Returns:
@@ -195,6 +205,10 @@ class SamInference:
         except Exception as e:
             logger.exception(f"Error while predicting image with prompt: {str(e)}")
             raise RuntimeError(f"Failed to predict image with prompt") from e
         return masks, scores, logits
     def add_prediction_to_frame(self,
@@ -291,6 +305,7 @@ class SamInference:
                               frame_idx: int,
                               pixel_size: Optional[int] = None,
                               color_hex: Optional[str] = None,
                               ):
         """
         Add filter to the preview image with the prompt data. Specially made for gradio app.
@@ -302,6 +317,7 @@ class SamInference:
             frame_idx (int): The frame index of the video.
             pixel_size (int): The pixel size for the pixelize filter.
             color_hex (str): The color hex code for the solid color filter.
         Returns:
             np.ndarray: The filtered image output.
@@ -332,6 +348,9 @@ class SamInference:
             box=box
         )
         masks = (logits[0] > 0.0).cpu().numpy()
         generated_masks = self.format_to_auto_result(masks)
         if filter_mode == COLOR_FILTER:
@@ -347,7 +366,8 @@ class SamInference:
                               filter_mode: str,
                               frame_idx: int,
                               pixel_size: Optional[int] = None,
-                              color_hex: Optional[str] = None
                               ):
         """
         Create a whole filtered video with video_inference_state. Currently only one frame tracking is supported.
@@ -359,6 +379,7 @@ class SamInference:
             frame_idx (int): The frame index of the video.
             pixel_size (int): The pixel size for the pixelize filter.
             color_hex (str): The color hex code for the solid color filter.
         Returns:
             str: The output video path.
@@ -390,12 +411,14 @@ class SamInference:
             inference_state=self.video_inference_state,
             points=point_coords,
             labels=point_labels,
-            box=box
         )
         video_segments = self.propagate_in_video(inference_state=self.video_inference_state)
         for frame_index, info in video_segments.items():
             orig_image, masks = info["image"], info["mask"]
             masks = self.format_to_auto_result(masks)
             if filter_mode == COLOR_FILTER:
@@ -423,6 +446,7 @@ class SamInference:
                      image_prompt_input_data: Dict,
                      input_mode: str,
                      model_type: str,
                      *params):
         """
         Divide the layer with the given prompt data and save psd file.
@@ -432,6 +456,7 @@ class SamInference:
             image_prompt_input_data (Dict): The image prompt data.
             input_mode (str): The input mode for the image prompt data. ["Automatic", "Box Prompt"]
             model_type (str): The model type to load.
             *params: The hyperparameters for the mask generator.
         Returns:
@@ -463,6 +488,7 @@ class SamInference:
             generated_masks = self.generate_mask(
                 image=image,
                 model_type=model_type,
                 **hparams
             )
@@ -481,7 +507,8 @@ class SamInference:
                 box=box,
                 point_coords=point_coords,
                 point_labels=point_labels,
-                multimask_output=hparams["multimask_output"]
             )
             generated_masks = self.format_to_auto_result(predicted_masks)

 from modules.paths import (MODELS_DIR, TEMP_OUT_DIR, TEMP_DIR, MODEL_CONFIGS, OUTPUT_DIR)
 from modules.constants import (BOX_PROMPT_MODE, AUTOMATIC_MODE, COLOR_FILTER, PIXELIZE_FILTER, IMAGE_FILE_EXT)
 from modules.mask_utils import (
+    invert_masks,
     save_psd_with_masks,
     create_mask_combined_images,
     create_mask_gallery,
     def generate_mask(self,
                       image: np.ndarray,
                       model_type: str,
+                      invert_mask: bool = False,
                       **params) -> List[Dict[str, Any]]:
         """
         Generate masks with Automatic segmentation. Default hyperparameters are in './configs/default_hparams.yaml.'
         Args:
             image (np.ndarray): The input image.
             model_type (str): The model type to load.
+            invert_mask (bool): Invert the mask output - used for background masking.
             **params: The hyperparameters for the mask generator.
         Returns:
         except Exception as e:
             logger.exception(f"Error while auto generating masks : {e}")
             raise RuntimeError(f"Failed to generate masks") from e
+        if invert_mask:
+            generated_masks = [{'segmentation': invert_masks(mask['segmentation']),
+                                'area': mask['area']} for mask in generated_masks]
         return generated_masks
     def predict_image(self,
                       box: Optional[np.ndarray] = None,
                       point_coords: Optional[np.ndarray] = None,
                       point_labels: Optional[np.ndarray] = None,
+                      invert_mask: bool = False,
                       **params) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
         """
         Predict image with prompt data.
             box (np.ndarray): The box prompt data.
             point_coords (np.ndarray): The point coordinates prompt data.
             point_labels (np.ndarray): The point labels prompt data.
+            invert_mask (bool): Invert the mask output - used for background masking.
             **params: The hyperparameters for the mask generator.
         Returns:
         except Exception as e:
             logger.exception(f"Error while predicting image with prompt: {str(e)}")
             raise RuntimeError(f"Failed to predict image with prompt") from e
+        if invert_mask:
+            masks = invert_masks(masks)
         return masks, scores, logits
     def add_prediction_to_frame(self,
                               frame_idx: int,
                               pixel_size: Optional[int] = None,
                               color_hex: Optional[str] = None,
+                              invert_mask: bool = False
                               ):
         """
         Add filter to the preview image with the prompt data. Specially made for gradio app.
             frame_idx (int): The frame index of the video.
             pixel_size (int): The pixel size for the pixelize filter.
             color_hex (str): The color hex code for the solid color filter.
+            invert_mask (bool): Invert the mask output - used for background masking.
         Returns:
             np.ndarray: The filtered image output.
             box=box
         )
         masks = (logits[0] > 0.0).cpu().numpy()
+        if invert_mask:
+            masks = invert_masks(masks)
         generated_masks = self.format_to_auto_result(masks)
         if filter_mode == COLOR_FILTER:
                               filter_mode: str,
                               frame_idx: int,
                               pixel_size: Optional[int] = None,
+                              color_hex: Optional[str] = None,
+                              invert_mask: bool = False
                               ):
         """
         Create a whole filtered video with video_inference_state. Currently only one frame tracking is supported.
             frame_idx (int): The frame index of the video.
             pixel_size (int): The pixel size for the pixelize filter.
             color_hex (str): The color hex code for the solid color filter.
+            invert_mask (bool): Invert the mask output - used for background masking.
         Returns:
             str: The output video path.
             inference_state=self.video_inference_state,
             points=point_coords,
             labels=point_labels,
+            box=box,
         )
         video_segments = self.propagate_in_video(inference_state=self.video_inference_state)
         for frame_index, info in video_segments.items():
             orig_image, masks = info["image"], info["mask"]
+            if invert_mask:
+                masks = invert_masks(masks)
             masks = self.format_to_auto_result(masks)
             if filter_mode == COLOR_FILTER:
                      image_prompt_input_data: Dict,
                      input_mode: str,
                      model_type: str,
+                     invert_mask: bool = False,
                      *params):
         """
         Divide the layer with the given prompt data and save psd file.
             image_prompt_input_data (Dict): The image prompt data.
             input_mode (str): The input mode for the image prompt data. ["Automatic", "Box Prompt"]
             model_type (str): The model type to load.
+            invert_mask (bool): Invert the mask output.
             *params: The hyperparameters for the mask generator.
         Returns:
             generated_masks = self.generate_mask(
                 image=image,
                 model_type=model_type,
+                invert_mask=invert_mask,
                 **hparams
             )
                 box=box,
                 point_coords=point_coords,
                 point_labels=point_labels,
+                multimask_output=hparams["multimask_output"],
+                invert_mask=invert_mask
             )
             generated_masks = self.format_to_auto_result(predicted_masks)