Makhinur commited on
Commit
47b2ea7
·
verified ·
1 Parent(s): 6a41499

Update face_detection.py

Browse files
Files changed (1) hide show
  1. face_detection.py +85 -113
face_detection.py CHANGED
@@ -1,17 +1,14 @@
1
- # Copyright (c) 2021 Justin Pinkney
2
-
3
- import dlib
4
  import numpy as np
5
- import os
6
- from PIL import Image
7
- from PIL import ImageOps
8
- from scipy.ndimage import gaussian_filter
9
  import cv2
10
 
 
 
 
11
 
12
- MODEL_PATH = "shape_predictor_5_face_landmarks.dat"
13
- detector = dlib.get_frontal_face_detector()
14
-
15
 
16
  def align(image_in, face_index=0, output_size=256):
17
  try:
@@ -30,111 +27,86 @@ def align(image_in, face_index=0, output_size=256):
30
 
31
  return aligned_image, n_faces, quad
32
 
33
-
34
- def composite_images(quad, img, output):
35
- """Composite an image into and output canvas according to transformed co-ords"""
36
- output = output.convert("RGBA")
37
- img = img.convert("RGBA")
38
- input_size = img.size
39
- src = np.array(((0, 0), (0, input_size[1]), input_size, (input_size[0], 0)), dtype=np.float32)
40
- dst = np.float32(quad)
41
- mtx = cv2.getPerspectiveTransform(dst, src)
42
- img = img.transform(output.size, Image.PERSPECTIVE, mtx.flatten(), Image.BILINEAR)
43
- output.alpha_composite(img)
44
-
45
- return output.convert("RGB")
46
-
47
-
48
  def get_landmarks(image):
49
  """Get landmarks from PIL image"""
50
- shape_predictor = dlib.shape_predictor(MODEL_PATH)
51
-
52
- max_size = max(image.size)
53
- reduction_scale = int(max_size/512)
54
- if reduction_scale == 0:
55
- reduction_scale = 1
56
- downscaled = image.reduce(reduction_scale)
57
- img = np.array(downscaled)
58
- detections = detector(img, 0)
59
-
60
- for detection in detections:
61
- try:
62
- face_landmarks = [(reduction_scale*item.x, reduction_scale*item.y) for item in shape_predictor(img, detection).parts()]
63
- yield face_landmarks
64
- except Exception as e:
65
- print(e)
66
 
 
 
 
 
67
 
68
  def image_align(src_img, face_landmarks, output_size=512, transform_size=2048, enable_padding=True, x_scale=1, y_scale=1, em_scale=0.1, alpha=False):
69
- # Align function modified from ffhq-dataset
70
- # See https://github.com/NVlabs/ffhq-dataset for license
71
-
72
- lm = np.array(face_landmarks)
73
- lm_eye_left = lm[2:3] # left-clockwise
74
- lm_eye_right = lm[0:1] # left-clockwise
75
-
76
- # Calculate auxiliary vectors.
77
- eye_left = np.mean(lm_eye_left, axis=0)
78
- eye_right = np.mean(lm_eye_right, axis=0)
79
- eye_avg = (eye_left + eye_right) * 0.5
80
- eye_to_eye = 0.71*(eye_right - eye_left)
81
- mouth_avg = lm[4]
82
- eye_to_mouth = 1.35*(mouth_avg - eye_avg)
83
-
84
- # Choose oriented crop rectangle.
85
- x = eye_to_eye.copy()
86
- x /= np.hypot(*x)
87
- x *= max(np.hypot(*eye_to_eye) * 2.0, np.hypot(*eye_to_mouth) * 1.8)
88
- x *= x_scale
89
- y = np.flipud(x) * [-y_scale, y_scale]
90
- c = eye_avg + eye_to_mouth * em_scale
91
- quad = np.stack([c - x - y, c - x + y, c + x + y, c + x - y])
92
- quad_orig = quad.copy()
93
- qsize = np.hypot(*x) * 2
94
-
95
- img = src_img.convert('RGBA').convert('RGB')
96
-
97
- # Shrink.
98
- shrink = int(np.floor(qsize / output_size * 0.5))
99
- if shrink > 1:
100
- rsize = (int(np.rint(float(img.size[0]) / shrink)), int(np.rint(float(img.size[1]) / shrink)))
101
- img = img.resize(rsize, Image.Resampling.LANCZOS)
102
- quad /= shrink
103
- qsize /= shrink
104
-
105
- # Crop.
106
- border = max(int(np.rint(qsize * 0.1)), 3)
107
- crop = (int(np.floor(min(quad[:,0]))), int(np.floor(min(quad[:,1]))), int(np.ceil(max(quad[:,0]))), int(np.ceil(max(quad[:,1]))))
108
- crop = (max(crop[0] - border, 0), max(crop[1] - border, 0), min(crop[2] + border, img.size[0]), min(crop[3] + border, img.size[1]))
109
- if crop[2] - crop[0] < img.size[0] or crop[3] - crop[1] < img.size[1]:
110
- img = img.crop(crop)
111
- quad -= crop[0:2]
112
-
113
- # Pad.
114
- pad = (int(np.floor(min(quad[:,0]))), int(np.floor(min(quad[:,1]))), int(np.ceil(max(quad[:,0]))), int(np.ceil(max(quad[:,1]))))
115
- pad = (max(-pad[0] + border, 0), max(-pad[1] + border, 0), max(pad[2] - img.size[0] + border, 0), max(pad[3] - img.size[1] + border, 0))
116
- if enable_padding and max(pad) > border - 4:
117
- pad = np.maximum(pad, int(np.rint(qsize * 0.3)))
118
- img = np.pad(np.float32(img), ((pad[1], pad[3]), (pad[0], pad[2]), (0, 0)), 'reflect')
119
- h, w, _ = img.shape
120
- y, x, _ = np.ogrid[:h, :w, :1]
121
- mask = np.maximum(1.0 - np.minimum(np.float32(x) / pad[0], np.float32(w-1-x) / pad[2]), 1.0 - np.minimum(np.float32(y) / pad[1], np.float32(h-1-y) / pad[3]))
122
- blur = qsize * 0.02
123
- img += (gaussian_filter(img, [blur, blur, 0]) - img) * np.clip(mask * 3.0 + 1.0, 0.0, 1.0)
124
- img += (np.median(img, axis=(0,1)) - img) * np.clip(mask, 0.0, 1.0)
125
- img = np.uint8(np.clip(np.rint(img), 0, 255))
126
- if alpha:
127
- mask = 1-np.clip(3.0 * mask, 0.0, 1.0)
128
- mask = np.uint8(np.clip(np.rint(mask*255), 0, 255))
129
- img = np.concatenate((img, mask), axis=2)
130
- img = Image.fromarray(img, 'RGBA')
131
- else:
132
- img = Image.fromarray(img, 'RGB')
133
- quad += pad[:2]
134
-
135
- # Transform.
136
- img = img.transform((transform_size, transform_size), Image.QUAD, (quad + 0.5).flatten(), Image.BILINEAR)
137
- if output_size < transform_size:
138
- img = img.resize((output_size, output_size), Image.Resampling.LANCZOS)
139
-
140
- return img, quad_orig
 
1
+ import insightface
 
 
2
  import numpy as np
3
+ from PIL import Image, ImageOps
 
 
 
4
  import cv2
5
 
6
+ # Load InsightFace models
7
+ detector = insightface.model_zoo.get_model('retinaface_r50_v1')
8
+ detector.prepare(ctx_id=-1) # Use CPU, set ctx_id=0 for GPU
9
 
10
+ landmark_model = insightface.model_zoo.get_model('2d106det')
11
+ landmark_model.prepare(ctx_id=-1) # Use CPU, set ctx_id=0 for GPU
 
12
 
13
  def align(image_in, face_index=0, output_size=256):
14
  try:
 
27
 
28
  return aligned_image, n_faces, quad
29
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
  def get_landmarks(image):
31
  """Get landmarks from PIL image"""
32
+ img = np.array(image)
33
+ bboxes, _ = detector.detect(img, threshold=0.5, scale=1.0)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
 
35
+ for bbox in bboxes:
36
+ # Use the landmark model to predict landmarks
37
+ landmarks = landmark_model.get(img, bbox)
38
+ yield landmarks
39
 
40
  def image_align(src_img, face_landmarks, output_size=512, transform_size=2048, enable_padding=True, x_scale=1, y_scale=1, em_scale=0.1, alpha=False):
41
+ # Align function modified from ffhq-dataset
42
+ # See https://github.com/NVlabs/ffhq-dataset for license
43
+
44
+ lm = np.array(face_landmarks)
45
+ lm_eye_left = lm[2:3] # left-clockwise
46
+ lm_eye_right = lm[0:1] # left-clockwise
47
+
48
+ # Calculate auxiliary vectors.
49
+ eye_left = np.mean(lm_eye_left, axis=0)
50
+ eye_right = np.mean(lm_eye_right, axis=0)
51
+ eye_avg = (eye_left + eye_right) * 0.5
52
+ eye_to_eye = 0.71 * (eye_right - eye_left)
53
+ mouth_avg = lm[4]
54
+ eye_to_mouth = 1.35 * (mouth_avg - eye_avg)
55
+
56
+ # Choose oriented crop rectangle.
57
+ x = eye_to_eye.copy()
58
+ x /= np.hypot(*x)
59
+ x *= max(np.hypot(*eye_to_eye) * 2.0, np.hypot(*eye_to_mouth) * 1.8)
60
+ x *= x_scale
61
+ y = np.flipud(x) * [-y_scale, y_scale]
62
+ c = eye_avg + eye_to_mouth * em_scale
63
+ quad = np.stack([c - x - y, c - x + y, c + x + y, c + x - y])
64
+ quad_orig = quad.copy()
65
+ qsize = np.hypot(*x) * 2
66
+
67
+ img = src_img.convert('RGBA').convert('RGB')
68
+
69
+ # Shrink.
70
+ shrink = int(np.floor(qsize / output_size * 0.5))
71
+ if shrink > 1:
72
+ rsize = (int(np.rint(float(img.size[0]) / shrink)), int(np.rint(float(img.size[1]) / shrink)))
73
+ img = img.resize(rsize, Image.Resampling.LANCZOS)
74
+ quad /= shrink
75
+ qsize /= shrink
76
+
77
+ # Crop.
78
+ border = max(int(np.rint(qsize * 0.1)), 3)
79
+ crop = (int(np.floor(min(quad[:, 0]))), int(np.floor(min(quad[:, 1]))), int(np.ceil(max(quad[:, 0]))), int(np.ceil(max(quad[:, 1]))))
80
+ crop = (max(crop[0] - border, 0), max(crop[1] - border, 0), min(crop[2] + border, img.size[0]), min(crop[3] + border, img.size[1]))
81
+ if crop[2] - crop[0] < img.size[0] or crop[3] - crop[1] < img.size[1]:
82
+ img = img.crop(crop)
83
+ quad -= crop[0:2]
84
+
85
+ # Pad.
86
+ pad = (int(np.floor(min(quad[:, 0]))), int(np.floor(min(quad[:, 1]))), int(np.ceil(max(quad[:, 0]))), int(np.ceil(max(quad[:, 1]))))
87
+ pad = (max(-pad[0] + border, 0), max(-pad[1] + border, 0), max(pad[2] - img.size[0] + border, 0), max(pad[3] - img.size[1] + border, 0))
88
+ if enable_padding and max(pad) > border - 4:
89
+ pad = np.maximum(pad, int(np.rint(qsize * 0.3)))
90
+ img = np.pad(np.float32(img), ((pad[1], pad[3]), (pad[0], pad[2]), (0, 0)), 'reflect')
91
+ h, w, _ = img.shape
92
+ y, x, _ = np.ogrid[:h, :w, :1]
93
+ mask = np.maximum(1.0 - np.minimum(np.float32(x) / pad[0], np.float32(w - 1 - x) / pad[2]), 1.0 - np.minimum(np.float32(y) / pad[1], np.float32(h - 1 - y) / pad[3]))
94
+ blur = qsize * 0.02
95
+ img += (gaussian_filter(img, [blur, blur, 0]) - img) * np.clip(mask * 3.0 + 1.0, 0.0, 1.0)
96
+ img += (np.median(img, axis=(0, 1)) - img) * np.clip(mask, 0.0, 1.0)
97
+ img = np.uint8(np.clip(np.rint(img), 0, 255))
98
+ if alpha:
99
+ mask = 1 - np.clip(3.0 * mask, 0.0, 1.0)
100
+ mask = np.uint8(np.clip(np.rint(mask * 255), 0, 255))
101
+ img = np.concatenate((img, mask), axis=2)
102
+ img = Image.fromarray(img, 'RGBA')
103
+ else:
104
+ img = Image.fromarray(img, 'RGB')
105
+ quad += pad[:2]
106
+
107
+ # Transform.
108
+ img = img.transform((transform_size, transform_size), Image.QUAD, (quad + 0.5).flatten(), Image.BILINEAR)
109
+ if output_size < transform_size:
110
+ img = img.resize((output_size, output_size), Image.Resampling.LANCZOS)
111
+
112
+ return img, quad_orig