File size: 5,557 Bytes
8ed2f16
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
#################################################
# Copyright (c) 2021-present, xiaobing.ai, Inc. #
# All rights reserved.                          #
#################################################
# CV Research, DEV(USA) xiaobing.               #
# Written by [email protected]             #
#################################################

##### Python internal and external packages
import numpy as np
import torch
import cv2


class FaceLdmkDetector:
    """
    A class for face detection and landmark detection (2D and 3D).
    """

    def __init__(self, facedetector, ldmkdetector, ldmk3ddetector):
        """
        Initialize the face landmark detector.

        Args:
            facedetector: Face detection model.
            ldmkdetector: 2D landmark detection model.
            ldmk3ddetector: 3D landmark detection model.
        """
        self.facedetector = facedetector
        self.ldmkdetector = ldmkdetector
        self.ldmk3ddetector = ldmk3ddetector

        self.last_frame = None
        self.frame_count = 0
        self.frame_margin = 15

    def reset(self):
        """Reset the last frame and frame count."""
        self.last_frame = None
        self.frame_count = 0

    def get_box_from_ldmk(self, ldmks):
        """
        Compute bounding boxes from landmark points.

        Args:
            ldmks (np.ndarray): Landmark points.

        Returns:
            np.ndarray: Bounding box coordinates.
        """
        boxes = [[np.min(ldmk[:, 0]), np.min(ldmk[:, 1]),
                  np.max(ldmk[:, 0]), np.max(ldmk[:, 1])]
                 for ldmk in ldmks]
        return np.array(boxes)

    def extend_box(self, boxes, ratio=1.5):
        """
        Extend bounding boxes by a given ratio.

        Args:
            boxes (np.ndarray): Bounding boxes.
            ratio (float): Scaling factor.

        Returns:
            np.ndarray: Extended bounding boxes.
        """
        extended_boxes = []
        for box in boxes:
            xmin, ymin, xmax, ymax = box
            center = [(xmin + xmax) / 2, (ymin + ymax) / 2]
            size = np.sqrt((ymax - ymin + 1) * (xmax - xmin + 1))
            extend_size = size * ratio

            xmine, ymine = center[0] - extend_size / 2, center[1] - extend_size / 2
            xmaxe, ymaxe = center[0] + extend_size / 2, center[1] + extend_size / 2

            extended_boxes.append([xmine, ymine, xmaxe, ymaxe])

        return np.array(extended_boxes)

    def ldmk_detect(self, img, boxes):
        """
        Perform landmark detection on given bounding boxes.

        Args:
            img (np.ndarray): Input image.
            boxes (np.ndarray): Bounding boxes.

        Returns:
            np.ndarray: Smoothed landmark points.
        """
        ldmks = []
        h, w, _ = img.shape

        for box in boxes:
            xmin, ymin, xmax, ymax = map(int, box)
            img_crop = np.zeros([ymax - ymin, xmax - xmin, 3])

            img_crop[
            0 - min(ymin, 0):(ymax - ymin) - (max(ymax, h) - h),
            0 - min(xmin, 0):(xmax - xmin) - (max(xmax, w) - w),
            ] = img[max(ymin, 0):min(ymax, h), max(xmin, 0):min(xmax, w)]

            ldmk = self.ldmkdetector(img_crop)
            ldmk = ldmk[0] + np.array([xmin, ymin]).reshape(1, 2)
            ldmks.append(ldmk)

        ldmks = np.array(ldmks)

        # Apply smoothing to landmarks
        ldmks_smooth = [ldmks[idx] if idx == 0 or idx == len(ldmks) - 1
                        else ldmks[idx - 1:idx + 2].mean(0)
                        for idx in range(len(ldmks))]

        return np.array(ldmks_smooth)

    def inference(self, img):
        """
        Perform face detection and landmark detection.

        Args:
            img (np.ndarray): Input image.

        Returns:
            tuple: 2D landmarks, 3D landmarks, bounding boxes.
        """
        h, w, _ = img.shape

        if self.last_frame is None and self.frame_count % self.frame_margin == 0:
            boxes_scores = self.facedetector(img)
            if len(boxes_scores) == 0:
                return None

            boxes = boxes_scores[:, :4]
            scores = boxes_scores[:, 4]

            # Compute box sizes and distances from the center
            box_sizes = ((boxes[:, 3] - boxes[:, 1]) + (boxes[:, 2] - boxes[:, 0])) / 2
            box_distances = ((boxes[:, 3] + boxes[:, 1]) / 2 - 255) ** 2 + ((boxes[:, 2] + boxes[:, 0]) / 2 - 255) ** 2

            # Select the largest detected face
            index = np.argmax(box_sizes)
            boxes = boxes[index:index + 1]

            # Extend the bounding box
            boxes_extend = self.extend_box(boxes, ratio=1.5)

            self.frame_count += 1

        else:
            # Use last detected landmarks to determine the bounding box
            boxes = self.get_box_from_ldmk(self.last_frame)
            box_sizes = ((boxes[:, 3] - boxes[:, 1]) + (boxes[:, 2] - boxes[:, 0])) / 2
            box_centers = np.concatenate([((boxes[:, 2] + boxes[:, 0]) / 2),
                                          ((boxes[:, 3] + boxes[:, 1]) / 2)], axis=0)

            # Extend the bounding box
            boxes_extend = self.extend_box(boxes, ratio=1.8)

            self.frame_count += 1

        # Perform landmark detection
        ldmks = self.ldmk_detect(img, boxes_extend)
        ldmks_3d = self.ldmk3ddetector(img, boxes)

        if self.last_frame is None:
            boxes = self.get_box_from_ldmk(ldmks)

        self.last_frame = ldmks

        return ldmks, ldmks_3d, boxes