Spaces:
No application file
No application file
File size: 7,070 Bytes
430de99 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 |
# Copyright (c) Facebook, Inc. and its affiliates.
import logging
import math
from typing import List, Tuple
import torch
from detectron2.layers import batched_nms, cat
from detectron2.structures import Boxes, Instances
logger = logging.getLogger(__name__)
def find_top_rpn_proposals(
proposals: List[torch.Tensor],
pred_objectness_logits: List[torch.Tensor],
image_sizes: List[Tuple[int, int]],
nms_thresh: float,
pre_nms_topk: int,
post_nms_topk: int,
min_box_size: float,
training: bool,
):
"""
For each feature map, select the `pre_nms_topk` highest scoring proposals,
apply NMS, clip proposals, and remove small boxes. Return the `post_nms_topk`
highest scoring proposals among all the feature maps for each image.
Args:
proposals (list[Tensor]): A list of L tensors. Tensor i has shape (N, Hi*Wi*A, 4).
All proposal predictions on the feature maps.
pred_objectness_logits (list[Tensor]): A list of L tensors. Tensor i has shape (N, Hi*Wi*A).
image_sizes (list[tuple]): sizes (h, w) for each image
nms_thresh (float): IoU threshold to use for NMS
pre_nms_topk (int): number of top k scoring proposals to keep before applying NMS.
When RPN is run on multiple feature maps (as in FPN) this number is per
feature map.
post_nms_topk (int): number of top k scoring proposals to keep after applying NMS.
When RPN is run on multiple feature maps (as in FPN) this number is total,
over all feature maps.
min_box_size (float): minimum proposal box side length in pixels (absolute units
wrt input images).
training (bool): True if proposals are to be used in training, otherwise False.
This arg exists only to support a legacy bug; look for the "NB: Legacy bug ..."
comment.
Returns:
list[Instances]: list of N Instances. The i-th Instances
stores post_nms_topk object proposals for image i, sorted by their
objectness score in descending order.
"""
num_images = len(image_sizes)
device = proposals[0].device
# 1. Select top-k anchor for every level and every image
topk_scores = [] # #lvl Tensor, each of shape N x topk
topk_proposals = []
level_ids = [] # #lvl Tensor, each of shape (topk,)
batch_idx = torch.arange(num_images, device=device)
for level_id, (proposals_i, logits_i) in enumerate(zip(proposals, pred_objectness_logits)):
Hi_Wi_A = logits_i.shape[1]
num_proposals_i = min(pre_nms_topk, Hi_Wi_A)
# sort is faster than topk (https://github.com/pytorch/pytorch/issues/22812)
# topk_scores_i, topk_idx = logits_i.topk(num_proposals_i, dim=1)
logits_i, idx = logits_i.sort(descending=True, dim=1)
topk_scores_i = logits_i[batch_idx, :num_proposals_i]
topk_idx = idx[batch_idx, :num_proposals_i]
# each is N x topk
topk_proposals_i = proposals_i[batch_idx[:, None], topk_idx] # N x topk x 4
topk_proposals.append(topk_proposals_i)
topk_scores.append(topk_scores_i)
level_ids.append(torch.full((num_proposals_i,), level_id, dtype=torch.int64, device=device))
# 2. Concat all levels together
topk_scores = cat(topk_scores, dim=1)
topk_proposals = cat(topk_proposals, dim=1)
level_ids = cat(level_ids, dim=0)
# 3. For each image, run a per-level NMS, and choose topk results.
results: List[Instances] = []
for n, image_size in enumerate(image_sizes):
boxes = Boxes(topk_proposals[n])
scores_per_img = topk_scores[n]
lvl = level_ids
valid_mask = torch.isfinite(boxes.tensor).all(dim=1) & torch.isfinite(scores_per_img)
if not valid_mask.all():
if training:
raise FloatingPointError(
"Predicted boxes or scores contain Inf/NaN. Training has diverged."
)
boxes = boxes[valid_mask]
scores_per_img = scores_per_img[valid_mask]
lvl = lvl[valid_mask]
boxes.clip(image_size)
# filter empty boxes
keep = boxes.nonempty(threshold=min_box_size)
if keep.sum().item() != len(boxes):
boxes, scores_per_img, lvl = boxes[keep], scores_per_img[keep], lvl[keep]
keep = batched_nms(boxes.tensor, scores_per_img, lvl, nms_thresh)
# In Detectron1, there was different behavior during training vs. testing.
# (https://github.com/facebookresearch/Detectron/issues/459)
# During training, topk is over the proposals from *all* images in the training batch.
# During testing, it is over the proposals for each image separately.
# As a result, the training behavior becomes batch-dependent,
# and the configuration "POST_NMS_TOPK_TRAIN" end up relying on the batch size.
# This bug is addressed in Detectron2 to make the behavior independent of batch size.
keep = keep[:post_nms_topk] # keep is already sorted
res = Instances(image_size)
res.proposal_boxes = boxes[keep]
res.objectness_logits = scores_per_img[keep]
results.append(res)
return results
def add_ground_truth_to_proposals(gt_boxes, proposals):
"""
Call `add_ground_truth_to_proposals_single_image` for all images.
Args:
gt_boxes(list[Boxes]): list of N elements. Element i is a Boxes
representing the gound-truth for image i.
proposals (list[Instances]): list of N elements. Element i is a Instances
representing the proposals for image i.
Returns:
list[Instances]: list of N Instances. Each is the proposals for the image,
with field "proposal_boxes" and "objectness_logits".
"""
assert gt_boxes is not None
assert len(proposals) == len(gt_boxes)
if len(proposals) == 0:
return proposals
return [
add_ground_truth_to_proposals_single_image(gt_boxes_i, proposals_i)
for gt_boxes_i, proposals_i in zip(gt_boxes, proposals)
]
def add_ground_truth_to_proposals_single_image(gt_boxes, proposals):
"""
Augment `proposals` with ground-truth boxes from `gt_boxes`.
Args:
Same as `add_ground_truth_to_proposals`, but with gt_boxes and proposals
per image.
Returns:
Same as `add_ground_truth_to_proposals`, but for only one image.
"""
device = proposals.objectness_logits.device
# Assign all ground-truth boxes an objectness logit corresponding to
# P(object) = sigmoid(logit) =~ 1.
gt_logit_value = math.log((1.0 - 1e-10) / (1 - (1.0 - 1e-10)))
gt_logits = gt_logit_value * torch.ones(len(gt_boxes), device=device)
# Concatenating gt_boxes with proposals requires them to have the same fields
gt_proposal = Instances(proposals.image_size)
gt_proposal.proposal_boxes = gt_boxes
gt_proposal.objectness_logits = gt_logits
new_proposals = Instances.cat([proposals, gt_proposal])
return new_proposals
|