YuNet-face-detection / image_resizer.py
sam749's picture
added window sliding (#1)
58ae671 verified
import cv2
import cv2 as cv
import numpy as np
from yunet import YuNet
# Valid combinations of backends and targets
backend_target_pairs = [
[cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_TARGET_CPU],
[cv.dnn.DNN_BACKEND_CUDA, cv.dnn.DNN_TARGET_CUDA],
[cv.dnn.DNN_BACKEND_CUDA, cv.dnn.DNN_TARGET_CUDA_FP16],
[cv.dnn.DNN_BACKEND_TIMVX, cv.dnn.DNN_TARGET_NPU],
[cv.dnn.DNN_BACKEND_CANN, cv.dnn.DNN_TARGET_NPU],
]
class ImageResizer:
def __init__(
self,
modelPath,
input_size=(320, 320),
conf_threshold=0.6,
nms_threshold=0.3,
top_k=5000,
backend_id=0,
target_id=0,
):
self.model = YuNet(
modelPath=modelPath,
inputSize=input_size,
confThreshold=conf_threshold,
nmsThreshold=nms_threshold,
topK=top_k,
backendId=backend_id,
targetId=target_id,
)
def detect(self, image, num_faces=None):
# If input is an image
if image is not None:
h, w, _ = image.shape
# Inference
self.model.setInputSize([w, h])
results = self.model.infer(image)
faces = results[:num_faces] if num_faces else results
bboxs = []
for face in faces:
bbox = face[0:4].astype(np.int32) # x,y,w,h
x, y, w, h = bbox
# draw
cv2.rectangle(image, (x, y), (x + w, y + h), (0, 0, 255), 2)
bboxs.append(bbox)
return image, bboxs
def resize(self, image, target_size=512, above_head_ratio=0.5):
height, width, _c = image.shape
ar = width / height
# downscale the image
if not target_size:
target_size = 512
if ar > 1:
# Landscape
new_height = target_size
new_width = int(target_size * ar)
elif ar < 1:
# Portrait
new_width = target_size
new_height = int(target_size / ar)
else:
# Square
new_width = target_size
new_height = target_size
resized = cv2.resize(
image, (new_width, new_height), interpolation=cv2.INTER_AREA
)
# Perform object detection on the resized image
dt_image, bboxes = self.detect(resized.copy())
# crop around face
if len(bboxes) >= 1:
x, y, w, h = bboxes[0]
else:
x, y, w, h = 0, 0, target_size, target_size
# 20% of image height
above_head_max = int(target_size * above_head_ratio)
x_center = int((x + (x + w)) / 2)
y_center = int((y + (y + h)) / 2)
# Calculate cropping box
top = int(max(0, y_center - above_head_max))
bottom = int(min(top + target_size, resized.shape[0]))
left = int(max(0, x_center - target_size // 2))
right = int(min(x_center + target_size // 2, resized.shape[1]))
# adjust width if necessory
_w = right - left
if _w != target_size:
dx = (
target_size - _w
) # difference between the target size and the current width
nl = max(0, left - dx)
dr = dx - nl # remaining adjustment needed for the right coordinate
left = nl
right += dr
cropped_image = resized[top:bottom, left:right]
return dt_image, cropped_image