V
File size: 5,099 Bytes
47b2ea7
0e3fe1f
47b2ea7
0e3fe1f
cafcff1
0e3fe1f
47b2ea7
a8db7a5
cafcff1
a8db7a5
 
 
 
 
 
 
 
 
 
 
0e3fe1f
 
a8db7a5
 
 
 
0e3fe1f
 
 
a8db7a5
0e3fe1f
 
 
 
 
 
 
 
 
 
 
 
 
 
a8db7a5
 
 
47b2ea7
 
0e3fe1f
47b2ea7
 
 
 
0e3fe1f
 
47b2ea7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
import insightface
import numpy as np
from PIL import Image, ImageOps
import cv2
from scipy.ndimage import gaussian_filter

# Load InsightFace models
try:
    detector = insightface.model_zoo.get_model('retinaface_mnet025_v2')
    detector.prepare(ctx_id=-1)  # Use CPU, set ctx_id=0 for GPU
except Exception as e:
    print(f"Error loading face detection model: {e}")
    detector = None

try:
    landmark_model = insightface.model_zoo.get_model('2d106det')
    landmark_model.prepare(ctx_id=-1)  # Use CPU, set ctx_id=0 for GPU
except Exception as e:
    print(f"Error loading landmark model: {e}")
    landmark_model = None

def align(image_in, face_index=0, output_size=256):
    if detector is None or landmark_model is None:
        print("Models not loaded properly.")
        return image_in, 0, None

    try:
        image_in = ImageOps.exif_transpose(image_in)
    except:
        print("Exif problem, not rotating")

    landmarks = list(get_landmarks(image_in))
    n_faces = len(landmarks)
    face_index = min(n_faces-1, face_index)
    if n_faces == 0:
        aligned_image = image_in
        quad = None
    else:
        aligned_image, quad = image_align(image_in, landmarks[face_index], output_size=output_size)

    return aligned_image, n_faces, quad

def get_landmarks(image):
    """Get landmarks from PIL image"""
    if detector is None or landmark_model is None:
        return []

    img = np.array(image)
    bboxes, _ = detector.detect(img, threshold=0.5, scale=1.0)

    for bbox in bboxes:
        # Use the landmark model to predict landmarks
        landmarks = landmark_model.get(img, bbox)
        yield landmarks

def image_align(src_img, face_landmarks, output_size=512, transform_size=2048, enable_padding=True, x_scale=1, y_scale=1, em_scale=0.1, alpha=False):
    # Align function modified from ffhq-dataset
    # See https://github.com/NVlabs/ffhq-dataset for license

    lm = np.array(face_landmarks)
    lm_eye_left = lm[2:3]  # left-clockwise
    lm_eye_right = lm[0:1]  # left-clockwise

    # Calculate auxiliary vectors.
    eye_left = np.mean(lm_eye_left, axis=0)
    eye_right = np.mean(lm_eye_right, axis=0)
    eye_avg = (eye_left + eye_right) * 0.5
    eye_to_eye = 0.71 * (eye_right - eye_left)
    mouth_avg = lm[4]
    eye_to_mouth = 1.35 * (mouth_avg - eye_avg)

    # Choose oriented crop rectangle.
    x = eye_to_eye.copy()
    x /= np.hypot(*x)
    x *= max(np.hypot(*eye_to_eye) * 2.0, np.hypot(*eye_to_mouth) * 1.8)
    x *= x_scale
    y = np.flipud(x) * [-y_scale, y_scale]
    c = eye_avg + eye_to_mouth * em_scale
    quad = np.stack([c - x - y, c - x + y, c + x + y, c + x - y])
    quad_orig = quad.copy()
    qsize = np.hypot(*x) * 2

    img = src_img.convert('RGBA').convert('RGB')

    # Shrink.
    shrink = int(np.floor(qsize / output_size * 0.5))
    if shrink > 1:
        rsize = (int(np.rint(float(img.size[0]) / shrink)), int(np.rint(float(img.size[1]) / shrink)))
        img = img.resize(rsize, Image.Resampling.LANCZOS)
        quad /= shrink
        qsize /= shrink

    # Crop.
    border = max(int(np.rint(qsize * 0.1)), 3)
    crop = (int(np.floor(min(quad[:, 0]))), int(np.floor(min(quad[:, 1]))), int(np.ceil(max(quad[:, 0]))), int(np.ceil(max(quad[:, 1]))))
    crop = (max(crop[0] - border, 0), max(crop[1] - border, 0), min(crop[2] + border, img.size[0]), min(crop[3] + border, img.size[1]))
    if crop[2] - crop[0] < img.size[0] or crop[3] - crop[1] < img.size[1]:
        img = img.crop(crop)
        quad -= crop[0:2]

    # Pad.
    pad = (int(np.floor(min(quad[:, 0]))), int(np.floor(min(quad[:, 1]))), int(np.ceil(max(quad[:, 0]))), int(np.ceil(max(quad[:, 1]))))
    pad = (max(-pad[0] + border, 0), max(-pad[1] + border, 0), max(pad[2] - img.size[0] + border, 0), max(pad[3] - img.size[1] + border, 0))
    if enable_padding and max(pad) > border - 4:
        pad = np.maximum(pad, int(np.rint(qsize * 0.3)))
        img = np.pad(np.float32(img), ((pad[1], pad[3]), (pad[0], pad[2]), (0, 0)), 'reflect')
        h, w, _ = img.shape
        y, x, _ = np.ogrid[:h, :w, :1]
        mask = np.maximum(1.0 - np.minimum(np.float32(x) / pad[0], np.float32(w - 1 - x) / pad[2]), 1.0 - np.minimum(np.float32(y) / pad[1], np.float32(h - 1 - y) / pad[3]))
        blur = qsize * 0.02
        img += (gaussian_filter(img, [blur, blur, 0]) - img) * np.clip(mask * 3.0 + 1.0, 0.0, 1.0)
        img += (np.median(img, axis=(0, 1)) - img) * np.clip(mask, 0.0, 1.0)
        img = np.uint8(np.clip(np.rint(img), 0, 255))
        if alpha:
            mask = 1 - np.clip(3.0 * mask, 0.0, 1.0)
            mask = np.uint8(np.clip(np.rint(mask * 255), 0, 255))
            img = np.concatenate((img, mask), axis=2)
            img = Image.fromarray(img, 'RGBA')
        else:
            img = Image.fromarray(img, 'RGB')
        quad += pad[:2]

    # Transform.
    img = img.transform((transform_size, transform_size), Image.QUAD, (quad + 0.5).flatten(), Image.BILINEAR)
    if output_size < transform_size:
        img = img.resize((output_size, output_size), Image.Resampling.LANCZOS)

    return img, quad_orig