Spaces:

HaohuaLv
/

one-shot_object_detection

Running

App Files Files Community

HaohuaLv commited on Oct 25, 2023

Commit

293d766

1 Parent(s): a3597eb

Update app.py

Browse files

Files changed (1) hide show

app.py +4 -18

app.py CHANGED Viewed

@@ -1,19 +1,16 @@
 import gradio as gr
 from PIL import Image, ImageDraw
 import torch
-from transformers import OwlViTProcessor, OwlViTForObjectDetection, OwlViTModel, OwlViTImageProcessor
 from transformers.image_transforms import center_to_corners_format
 from transformers.models.owlvit.modeling_owlvit import box_iou
 from functools import partial
-# from utils import iou
 processor = OwlViTProcessor.from_pretrained("google/owlvit-base-patch32")
 model = OwlViTForObjectDetection.from_pretrained("google/owlvit-base-patch32")
-from transformers.models.owlvit.modeling_owlvit import OwlViTImageGuidedObjectDetectionOutput, OwlViTClassPredictionHead
@@ -69,8 +66,6 @@ def class_predictor(
 def get_max_iou_indice(target_pred_boxes, query_box, target_sizes):
     boxes = center_to_corners_format(target_pred_boxes)
     img_h, img_w = target_sizes.unbind(1)
@@ -109,12 +104,6 @@ def box_guided_detection(
     batch_size, num_patches, num_patches, hidden_dim = feature_map.shape
     image_feats = torch.reshape(feature_map, (batch_size, num_patches * num_patches, hidden_dim))
-    # batch_size, num_patches, num_patches, hidden_dim = query_feature_map.shape
-    # query_image_feats = torch.reshape(query_feature_map, (batch_size, num_patches * num_patches, hidden_dim))
-    # # Get top class embedding and best box index for each query image in batch
-    # query_embeds, best_box_indices, query_pred_boxes = self.embed_image_query(query_image_feats, query_feature_map)
-    # Predict object boxes
     target_pred_boxes = self.box_predictor(image_feats, feature_map)
     # Get MAX IOU box corresponding embedding
@@ -124,9 +113,6 @@ def box_guided_detection(
     (pred_logits, class_embeds) = self.class_predictor(image_feats=image_feats, query_indice=query_indice)
     if not return_dict:
         output = (
             feature_map,
@@ -179,7 +165,7 @@ def threshold_change(xmin, ymin, xmax, ymax, image, threshold, nms):
     labels = list(zip(boxes, scores))
     labels.append((manul_box, "manual"))
-    cnt = len(boxes) - 1
     return (image, labels), cnt
@@ -198,7 +184,7 @@ def one_shot_detect(xmin, ymin, xmax, ymax, image, threshold, nms):
     labels = list(zip(boxes, scores))
     labels.append((manul_box, "manual"))
-    cnt = len(boxes) - 1
     return (image, labels), cnt

 import gradio as gr
 from PIL import Image, ImageDraw
 import torch
+from transformers import OwlViTProcessor, OwlViTForObjectDetection
 from transformers.image_transforms import center_to_corners_format
 from transformers.models.owlvit.modeling_owlvit import box_iou
 from functools import partial
 processor = OwlViTProcessor.from_pretrained("google/owlvit-base-patch32")
 model = OwlViTForObjectDetection.from_pretrained("google/owlvit-base-patch32")
+from transformers.models.owlvit.modeling_owlvit import OwlViTImageGuidedObjectDetectionOutput
 def get_max_iou_indice(target_pred_boxes, query_box, target_sizes):
     boxes = center_to_corners_format(target_pred_boxes)
     img_h, img_w = target_sizes.unbind(1)
     batch_size, num_patches, num_patches, hidden_dim = feature_map.shape
     image_feats = torch.reshape(feature_map, (batch_size, num_patches * num_patches, hidden_dim))
     target_pred_boxes = self.box_predictor(image_feats, feature_map)
     # Get MAX IOU box corresponding embedding
     (pred_logits, class_embeds) = self.class_predictor(image_feats=image_feats, query_indice=query_indice)
     if not return_dict:
         output = (
             feature_map,
     labels = list(zip(boxes, scores))
     labels.append((manul_box, "manual"))
+    cnt = len(boxes)
     return (image, labels), cnt
     labels = list(zip(boxes, scores))
     labels.append((manul_box, "manual"))
+    cnt = len(boxes)
     return (image, labels), cnt