Spaces:

NandiniLokeshReddy
/

EfficientSAM_vs_SAM

Sleeping

App Files Files Community

NandiniLokeshReddy commited on Jul 13, 2024

Commit

7637f71

1 Parent(s): db81b45

Add Gradio app and requirements

Browse files

Files changed (2) hide show

app.py +158 -0
requirements.txt +7 -0

app.py ADDED Viewed

	@@ -0,0 +1,158 @@

+import gradio as gr
+import torch
+import numpy as np
+from torchvision.transforms import ToTensor
+from PIL import Image
+import cv2
+import zipfile
+# Ensure the necessary model files are available
+!wget -q https://dl.fbaipublicfiles.com/segment_anything/sam_vit_h_4b8939.pth
+!mkdir -p weights
+!mv sam_vit_h_4b8939.pth weights/
+!git clone https://github.com/yformer/EfficientSAM.git
+import os
+os.chdir("EfficientSAM")
+!pip install git+https://github.com/facebookresearch/segment-anything.git
+from segment_anything import sam_model_registry, SamAutomaticMaskGenerator
+from efficient_sam.build_efficient_sam import build_efficient_sam_vits
+# Constants
+DEVICE = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
+MODEL_TYPE = "vit_h"
+CHECKPOINT_PATH = "weights/sam_vit_h_4b8939.pth"
+# Load SAM model
+sam = sam_model_registry[MODEL_TYPE](checkpoint=CHECKPOINT_PATH).to(device=DEVICE)
+mask_generator_sam = SamAutomaticMaskGenerator(sam)
+# Load EfficientSAM model
+with zipfile.ZipFile("weights/efficient_sam_vits.pt.zip", 'r') as zip_ref:
+    zip_ref.extractall("weights")
+efficient_sam_vits_model = build_efficient_sam_vits()
+from segment_anything.utils.amg import (
+    batched_mask_to_box,
+    calculate_stability_score,
+    mask_to_rle_pytorch,
+    remove_small_regions,
+    rle_to_mask,
+)
+from torchvision.ops.boxes import batched_nms, box_area
+def process_small_region(rles):
+    new_masks = []
+    scores = []
+    min_area = 100
+    nms_thresh = 0.7
+    for rle in rles:
+        mask = rle_to_mask(rle[0])
+        mask, changed = remove_small_regions(mask, min_area, mode="holes")
+        unchanged = not changed
+        mask, changed = remove_small_regions(mask, min_area, mode="islands")
+        unchanged = unchanged and not changed
+        new_masks.append(torch.as_tensor(mask).unsqueeze(0))
+        scores.append(float(unchanged))
+    masks = torch.cat(new_masks, dim=0)
+    boxes = batched_mask_to_box(masks)
+    keep_by_nms = batched_nms(
+        boxes.float(),
+        torch.as_tensor(scores),
+        torch.zeros_like(boxes[:, 0]),
+        iou_threshold=nms_thresh,
+    )
+    for i_mask in keep_by_nms:
+        if scores[i_mask] == 0.0:
+            mask_torch = masks[i_mask].unsqueeze(0)
+            rles[i_mask] = mask_to_rle_pytorch(mask_torch)
+    masks = [rle_to_mask(rles[i][0]) for i in keep_by_nms]
+    return masks
+def get_predictions_given_embeddings_and_queries(img, points, point_labels, model):
+    predicted_masks, predicted_iou = model(
+        img[None, ...], points, point_labels
+    )
+    sorted_ids = torch.argsort(predicted_iou, dim=-1, descending=True)
+    predicted_iou_scores = torch.take_along_dim(predicted_iou, sorted_ids, dim=2)
+    predicted_masks = torch.take_along_dim(
+        predicted_masks, sorted_ids[..., None, None], dim=2
+    )
+    predicted_masks = predicted_masks[0]
+    iou = predicted_iou_scores[0, :, 0]
+    index_iou = iou > 0.7
+    iou_ = iou[index_iou]
+    masks = predicted_masks[index_iou]
+    score = calculate_stability_score(masks, 0.0, 1.0)
+    score = score[:, 0]
+    index = score > 0.9
+    score_ = score[index]
+    masks = masks[index]
+    iou_ = iou_[index]
+    masks = torch.ge(masks, 0.0)
+    return masks, iou_
+def run_everything_ours(image_np, model):
+    model = model.cpu()
+    image = cv2.cvtColor(image_np, cv2.COLOR_BGR2RGB)
+    img_tensor = ToTensor()(image)
+    _, original_image_h, original_image_w = img_tensor.shape
+    xy = []
+    GRID_SIZE = 32
+    for i in range(GRID_SIZE):
+        curr_x = 0.5 + i / GRID_SIZE * original_image_w
+        for j in range(GRID_SIZE):
+            curr_y = 0.5 + j / GRID_SIZE * original_image_h
+            xy.append([curr_x, curr_y])
+    xy = torch.from_numpy(np.array(xy))
+    points = xy
+    num_pts = xy.shape[0]
+    point_labels = torch.ones(num_pts, 1)
+    with torch.no_grad():
+        predicted_masks, predicted_iou = get_predictions_given_embeddings_and_queries(
+            img_tensor.cpu(),
+            points.reshape(1, num_pts, 1, 2).cpu(),
+            point_labels.reshape(1, num_pts, 1).cpu(),
+            model.cpu(),
+        )
+    rle = [mask_to_rle_pytorch(m[0:1]) for m in predicted_masks]
+    predicted_masks = process_small_region(rle)
+    return predicted_masks
+def show_anns_ours(masks, image):
+    for mask in masks:
+        contours, _ = cv2.findContours(mask.astype(np.uint8), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
+        cv2.drawContours(image, contours, -1, (0, 255, 0), 2)
+    return image
+def process_image(image):
+    # Convert PIL image to numpy array
+    image_np = np.array(image)
+    # Process with SAM
+    image_rgb = cv2.cvtColor(image_np, cv2.COLOR_BGR2RGB)
+    sam_result = mask_generator_sam.generate(image_rgb)
+    # Annotate SAM result
+    sam_annotated_image = image_np.copy()
+    for mask in sam_result:
+        sam_annotated_image[mask['segmentation']] = [0, 255, 0]
+    # Process with EfficientSAM
+    mask_efficient_sam_vits = run_everything_ours(image_np, efficient_sam_vits_model)
+    efficient_sam_annotated_image = show_anns_ours(mask_efficient_sam_vits, image_np.copy())
+    return [image, sam_annotated_image, efficient_sam_annotated_image]
+# Gradio interface
+interface = gr.Interface(
+    fn=process_image,
+    inputs=gr.Image(type="pil"),
+    outputs=[gr.Image(type="pil", label="Original"), gr.Image(type="pil", label="SAM Segmented"), gr.Image(type="pil", label="EfficientSAM Segmented")],
+    title="SAM vs EfficientSAM Comparison",
+    description="Upload an image to compare the segmentation results of SAM and EfficientSAM."
+)
+interface.launch(debug=True)

requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+gradio
+torch
+torchvision
+opencv-python-headless
+numpy
+Pillow