File size: 4,481 Bytes
b9be4e6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
from third_party.PIPNet.FaceBoxesV2.detector import Detector
import cv2, os
import numpy as np
import torch
import torch.nn as nn
from third_party.PIPNet.FaceBoxesV2.utils.config import cfg
from third_party.PIPNet.FaceBoxesV2.utils.prior_box import PriorBox
from third_party.PIPNet.FaceBoxesV2.utils.nms_wrapper import nms
from third_party.PIPNet.FaceBoxesV2.utils.faceboxes import FaceBoxesV2
from third_party.PIPNet.FaceBoxesV2.utils.box_utils import decode
import time


class FaceBoxesDetector(Detector):
    def __init__(self, model_arch, model_weights, use_gpu, device):
        super().__init__(model_arch, model_weights)
        self.name = "FaceBoxesDetector"
        self.net = FaceBoxesV2(
            phase="test", size=None, num_classes=2
        )  # initialize detector
        self.use_gpu = use_gpu
        self.device = device

        state_dict = torch.load(self.model_weights, map_location=self.device)
        # create new OrderedDict that does not contain `module.`
        from collections import OrderedDict

        new_state_dict = OrderedDict()
        for k, v in state_dict.items():
            name = k[7:]  # remove `module.`
            new_state_dict[name] = v
        # load params
        self.net.load_state_dict(new_state_dict)
        self.net = self.net.to(self.device)
        self.net.eval()

    def detect(self, image, thresh=0.6, im_scale=None):
        # auto resize for large images
        if im_scale is None:
            height, width, _ = image.shape
            if min(height, width) > 600:
                im_scale = 600.0 / min(height, width)
            else:
                im_scale = 1
        image_scale = cv2.resize(
            image, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR
        )

        scale = torch.Tensor(
            [
                image_scale.shape[1],
                image_scale.shape[0],
                image_scale.shape[1],
                image_scale.shape[0],
            ]
        )
        image_scale = (
            torch.from_numpy(image_scale.transpose(2, 0, 1)).to(self.device).int()
        )
        mean_tmp = torch.IntTensor([104, 117, 123]).to(self.device)
        mean_tmp = mean_tmp.unsqueeze(1).unsqueeze(2)
        image_scale -= mean_tmp
        image_scale = image_scale.float().unsqueeze(0)
        scale = scale.to(self.device)

        with torch.no_grad():
            out = self.net(image_scale)
            # priorbox = PriorBox(cfg, out[2], (image_scale.size()[2], image_scale.size()[3]), phase='test')
            priorbox = PriorBox(
                cfg, image_size=(image_scale.size()[2], image_scale.size()[3])
            )
            priors = priorbox.forward()
            priors = priors.to(self.device)
            loc, conf = out
            prior_data = priors.data
            boxes = decode(loc.data.squeeze(0), prior_data, cfg["variance"])
            boxes = boxes * scale
            boxes = boxes.cpu().numpy()
            scores = conf.data.cpu().numpy()[:, 1]

            # ignore low scores
            inds = np.where(scores > thresh)[0]
            boxes = boxes[inds]
            scores = scores[inds]

            # keep top-K before NMS
            order = scores.argsort()[::-1][:5000]
            boxes = boxes[order]
            scores = scores[order]

            # do NMS
            dets = np.hstack((boxes, scores[:, np.newaxis])).astype(
                np.float32, copy=False
            )
            keep = nms(dets, 0.3)
            dets = dets[keep, :]

            dets = dets[:750, :]
            detections_scale = []
            for i in range(dets.shape[0]):
                xmin = int(dets[i][0])
                ymin = int(dets[i][1])
                xmax = int(dets[i][2])
                ymax = int(dets[i][3])
                score = dets[i][4]
                width = xmax - xmin
                height = ymax - ymin
                detections_scale.append(["face", score, xmin, ymin, width, height])

        # adapt bboxes to the original image size
        if len(detections_scale) > 0:
            detections_scale = [
                [
                    det[0],
                    det[1],
                    int(det[2] / im_scale),
                    int(det[3] / im_scale),
                    int(det[4] / im_scale),
                    int(det[5] / im_scale),
                ]
                for det in detections_scale
            ]

        return detections_scale, im_scale