from third_party.PIPNet.FaceBoxesV2.detector import Detector import cv2, os import numpy as np import torch import torch.nn as nn from third_party.PIPNet.FaceBoxesV2.utils.config import cfg from third_party.PIPNet.FaceBoxesV2.utils.prior_box import PriorBox from third_party.PIPNet.FaceBoxesV2.utils.nms_wrapper import nms from third_party.PIPNet.FaceBoxesV2.utils.faceboxes import FaceBoxesV2 from third_party.PIPNet.FaceBoxesV2.utils.box_utils import decode import time class FaceBoxesDetector(Detector): def __init__(self, model_arch, model_weights, use_gpu, device): super().__init__(model_arch, model_weights) self.name = "FaceBoxesDetector" self.net = FaceBoxesV2( phase="test", size=None, num_classes=2 ) # initialize detector self.use_gpu = use_gpu self.device = device state_dict = torch.load(self.model_weights, map_location=self.device) # create new OrderedDict that does not contain `module.` from collections import OrderedDict new_state_dict = OrderedDict() for k, v in state_dict.items(): name = k[7:] # remove `module.` new_state_dict[name] = v # load params self.net.load_state_dict(new_state_dict) self.net = self.net.to(self.device) self.net.eval() def detect(self, image, thresh=0.6, im_scale=None): # auto resize for large images if im_scale is None: height, width, _ = image.shape if min(height, width) > 600: im_scale = 600.0 / min(height, width) else: im_scale = 1 image_scale = cv2.resize( image, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR ) scale = torch.Tensor( [ image_scale.shape[1], image_scale.shape[0], image_scale.shape[1], image_scale.shape[0], ] ) image_scale = ( torch.from_numpy(image_scale.transpose(2, 0, 1)).to(self.device).int() ) mean_tmp = torch.IntTensor([104, 117, 123]).to(self.device) mean_tmp = mean_tmp.unsqueeze(1).unsqueeze(2) image_scale -= mean_tmp image_scale = image_scale.float().unsqueeze(0) scale = scale.to(self.device) with torch.no_grad(): out = self.net(image_scale) # priorbox = PriorBox(cfg, out[2], (image_scale.size()[2], image_scale.size()[3]), phase='test') priorbox = PriorBox( cfg, image_size=(image_scale.size()[2], image_scale.size()[3]) ) priors = priorbox.forward() priors = priors.to(self.device) loc, conf = out prior_data = priors.data boxes = decode(loc.data.squeeze(0), prior_data, cfg["variance"]) boxes = boxes * scale boxes = boxes.cpu().numpy() scores = conf.data.cpu().numpy()[:, 1] # ignore low scores inds = np.where(scores > thresh)[0] boxes = boxes[inds] scores = scores[inds] # keep top-K before NMS order = scores.argsort()[::-1][:5000] boxes = boxes[order] scores = scores[order] # do NMS dets = np.hstack((boxes, scores[:, np.newaxis])).astype( np.float32, copy=False ) keep = nms(dets, 0.3) dets = dets[keep, :] dets = dets[:750, :] detections_scale = [] for i in range(dets.shape[0]): xmin = int(dets[i][0]) ymin = int(dets[i][1]) xmax = int(dets[i][2]) ymax = int(dets[i][3]) score = dets[i][4] width = xmax - xmin height = ymax - ymin detections_scale.append(["face", score, xmin, ymin, width, height]) # adapt bboxes to the original image size if len(detections_scale) > 0: detections_scale = [ [ det[0], det[1], int(det[2] / im_scale), int(det[3] / im_scale), int(det[4] / im_scale), int(det[5] / im_scale), ] for det in detections_scale ] return detections_scale, im_scale