Spaces:

NN-BRD
/

hackathon_depth_segment

Runtime error

App Files Files Community

s194649 commited on Aug 19, 2023

Commit

39f3339

1 Parent(s): bcdfff1

fix

Browse files

Files changed (2) hide show

app.py +1 -0
inference.py +61 -0

app.py CHANGED Viewed

@@ -142,6 +142,7 @@ with block:
         print("encoding")
         # encode image on click
         embedding = sam.encode(inputs[input_image]).cpu()
         print("encoding done")
         return [inputs[input_image], embedding]
     sam_encode_btn.click(on_click_sam_encode_btn, components, [prompt_image, embedding], queue=False)

         print("encoding")
         # encode image on click
         embedding = sam.encode(inputs[input_image]).cpu()
+        sam_cpu.dummy_encode(inputs[input_image])
         print("encoding done")
         return [inputs[input_image], embedding]
     sam_encode_btn.click(on_click_sam_encode_btn, components, [prompt_image, embedding], queue=False)

inference.py CHANGED Viewed

@@ -263,6 +263,63 @@ class CustomSamPredictor(SamPredictor):
             return_logits=return_logits,
         )
 class SegmentPredictor:
     def __init__(self, device=None):
@@ -281,6 +338,10 @@ class SegmentPredictor:
     def encode(self, image):
         image = np.array(image)
         return self.conditioned_pred.encode_image(image)
     def cond_pred(self, embedding, pts, lbls):
         lbls = np.array(lbls)

             return_logits=return_logits,
         )
+    def dummy_set_torch_image(
+        self,
+        transformed_image: torch.Tensor,
+        original_image_size: Tuple[int, ...],
+    ) -> None:
+        """
+        Calculates the image embeddings for the provided image, allowing
+        masks to be predicted with the 'predict' method. Expects the input
+        image to be already transformed to the format expected by the model.
+        Arguments:
+          transformed_image (torch.Tensor): The input image, with shape
+            1x3xHxW, which has been transformed with ResizeLongestSide.
+          original_image_size (tuple(int, int)): The size of the image
+            before transformation, in (H, W) format.
+        """
+        assert (
+            len(transformed_image.shape) == 4
+            and transformed_image.shape[1] == 3
+            and max(*transformed_image.shape[2:]) == self.model.image_encoder.img_size
+        ), f"set_torch_image input must be BCHW with long side {self.model.image_encoder.img_size}."
+        self.reset_image()
+        self.original_size = original_image_size
+        self.input_size = tuple(transformed_image.shape[-2:])
+        input_image = self.model.preprocess(transformed_image)
+        # The following line is commented out to avoid encoding on cpu
+        #self.features = self.model.image_encoder(input_image)
+        self.is_image_set = True
+    def dummy_set_image(
+        self,
+        image: np.ndarray,
+        image_format: str = "RGB",
+    ) -> None:
+        """
+        Calculates the image embeddings for the provided image, allowing
+        masks to be predicted with the 'predict' method.
+        Arguments:
+          image (np.ndarray): The image for calculating masks. Expects an
+            image in HWC uint8 format, with pixel values in [0, 255].
+          image_format (str): The color format of the image, in ['RGB', 'BGR'].
+        """
+        assert image_format in [
+            "RGB",
+            "BGR",
+        ], f"image_format must be in ['RGB', 'BGR'], is {image_format}."
+        if image_format != self.model.image_format:
+            image = image[..., ::-1]
+        # Transform the image to the form expected by the model
+        input_image = self.transform.apply_image(image)
+        input_image_torch = torch.as_tensor(input_image, device=self.device)
+        input_image_torch = input_image_torch.permute(2, 0, 1).contiguous()[None, :, :, :]
+        self.dummy_set_torch_image(input_image_torch, image.shape[:2])
 class SegmentPredictor:
     def __init__(self, device=None):
     def encode(self, image):
         image = np.array(image)
         return self.conditioned_pred.encode_image(image)
+    def dummy_encode(self, image):
+        image = np.array(image)
+        self.conditioned_pred.dummy_set_image(image)
     def cond_pred(self, embedding, pts, lbls):
         lbls = np.array(lbls)