OWLSAM_DSA2024

Sleeping

App Files Files Community

andrewkatumba commited on Jun 4, 2024

Commit

6c899dd

verified ·

1 Parent(s): ee1bcff

Add plotters for bounding boxes

Browse files

Files changed (1) hide show

app.py +42 -30

app.py CHANGED Viewed

@@ -2,7 +2,9 @@ from transformers import pipeline, SamModel, SamProcessor
 import torch
 import numpy as np
 import spaces
 checkpoint = "google/owlvit-base-patch16"
 detector = pipeline(model=checkpoint, task="zero-shot-object-detection")
 sam_model = SamModel.from_pretrained("facebook/sam-vit-base").to("cuda")
@@ -10,40 +12,50 @@ sam_processor = SamProcessor.from_pretrained("facebook/sam-vit-base")
 @spaces.GPU
 def query(image, texts, threshold):
-  texts = texts.split(",")
-  predictions = detector(
-    image,
-    candidate_labels=texts,
-    threshold=threshold
-  )
-  result_labels = []
-  for pred in predictions:
-    box = pred["box"]
-    score = pred["score"]
-    label = pred["label"]
-    box = [round(pred["box"]["xmin"], 2), round(pred["box"]["ymin"], 2),
-        round(pred["box"]["xmax"], 2), round(pred["box"]["ymax"], 2)]
-    inputs = sam_processor(
             image,
-            input_boxes=[[[box]]],
             return_tensors="pt"
         ).to("cuda")
-    with torch.no_grad():
-        outputs = sam_model(**inputs)
-    mask = sam_processor.image_processor.post_process_masks(
-        outputs.pred_masks.cpu(),
-        inputs["original_sizes"].cpu(),
-        inputs["reshaped_input_sizes"].cpu()
-    )[0][0][0].numpy()
-    mask = mask[np.newaxis, ...]
-    result_labels.append((mask, label))
-  return image, result_labels
 import gradio as gr

 import torch
 import numpy as np
 import spaces
+from PIL import Image, ImageDraw
+# Load models (unchanged)
 checkpoint = "google/owlvit-base-patch16"
 detector = pipeline(model=checkpoint, task="zero-shot-object-detection")
 sam_model = SamModel.from_pretrained("facebook/sam-vit-base").to("cuda")
 @spaces.GPU
 def query(image, texts, threshold):
+    texts = texts.split(",")
+    # --- Object Detection (unchanged) ---
+    predictions = detector(
+        image,
+        candidate_labels=texts,
+        threshold=threshold
+    )
+    result_labels = []
+    draw = ImageDraw.Draw(image)  # Create a drawing object for the image
+    for pred in predictions:
+        box = pred["box"]
+        score = pred["score"]
+        label = pred["label"]
+        # Round box coordinates for display and SAM input (mostly unchanged)
+        box = [round(coord, 2) for coord in list(box.values())]
+        # --- Segmentation (unchanged) ---
+        inputs = sam_processor(
             image,
+            input_boxes=[[[box]]],  # Note: SAM expects a nested list
             return_tensors="pt"
         ).to("cuda")
+        with torch.no_grad():
+            outputs = sam_model(**inputs)
+        mask = sam_processor.image_processor.post_process_masks(
+            outputs.pred_masks.cpu(),
+            inputs["original_sizes"].cpu(),
+            inputs["reshaped_input_sizes"].cpu()
+        )[0][0][0].numpy()
+        mask = mask[np.newaxis, ...]
+        result_labels.append((mask, label))
+        # --- Draw Bounding Box ---
+        draw.rectangle(box, outline="red", width=3)  # Draw rectangle with a red outline
+        draw.text((box[0], box[1] - 10), label, fill="red")  # Add label above the box
+    return image, result_labels  # Return the modified image
 import gradio as gr