File size: 4,693 Bytes
9a02a1c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
#############################################################################
#
#   Source from:
#   https://www.tensorflow.org/hub/tutorials/movenet
#
#
#############################################################################
import PIL.Image
import PIL.ImageOps
import numpy as np
import tensorflow as tf
from PIL import ImageDraw
from huggingface_hub import snapshot_download


# Dictionary that maps from joint names to keypoint indices.
KEYPOINT_DICT = {
    'nose': 0,
    'left_eye': 1, 'right_eye': 2,
    'left_ear': 3, 'right_ear': 4,
    'left_shoulder': 5, 'right_shoulder': 6,
    'left_elbow': 7, 'right_elbow': 8,
    'left_wrist': 9, 'right_wrist': 10,
    'left_hip': 11, 'right_hip': 12,
    'left_knee': 13, 'right_knee': 14,
    'left_ankle': 15, 'right_ankle': 16
}

COLOR_DICT = {
    (0, 1): 'Magenta',
    (0, 2): 'Cyan',
    (1, 3): 'Magenta',
    (2, 4): 'Cyan',
    (0, 5): 'Magenta',
    (0, 6): 'Cyan',
    (5, 7): 'Magenta',
    (7, 9): 'Magenta',
    (6, 8): 'Cyan',
    (8, 10): 'Cyan',
    (5, 6): 'Yellow',
    (5, 11): 'Magenta',
    (6, 12): 'Cyan',
    (11, 12): 'Yellow',
    (11, 13): 'Magenta',
    (13, 15): 'Magenta',
    (12, 14): 'Cyan',
    (14, 16): 'Cyan'
}


def process_keypoints(keypoints, height, width, threshold=0.22):
    """Returns high confidence keypoints and edges for visualization.

      Args:
        keypoints: A numpy array with shape [1, 1, 17, 3] representing
          the keypoint coordinates and scores returned from the MoveNet model.
        height: height of the image in pixels.
        width: width of the image in pixels.
        threshold: minimum confidence score for a keypoint to be
          visualized.

      Returns:
        A (joints, bones, colors) containing:
          * the coordinates of all keypoints of all detected entities;
          * the coordinates of all skeleton edges of all detected entities;
          * the colors in which the edges should be plotted.
      """
    keypoints_all = []
    keypoint_edges_all = []
    colors = []
    num_instances, _, _, _ = keypoints.shape
    for idx in range(num_instances):
        kpts_x = keypoints[0, idx, :, 1]
        kpts_y = keypoints[0, idx, :, 0]
        kpts_scores = keypoints[0, idx, :, 2]
        kpts_absolute_xy = np.stack(
            [width * np.array(kpts_x), height * np.array(kpts_y)], axis=-1)
        kpts_above_thresh_absolute = kpts_absolute_xy[
                                     kpts_scores > threshold, :]
        keypoints_all.append(kpts_above_thresh_absolute)

        for edge_pair, color in COLOR_DICT.items():
            if (kpts_scores[edge_pair[0]] > threshold and
                    kpts_scores[edge_pair[1]] > threshold):
                x_start = kpts_absolute_xy[edge_pair[0], 0]
                y_start = kpts_absolute_xy[edge_pair[0], 1]
                x_end = kpts_absolute_xy[edge_pair[1], 0]
                y_end = kpts_absolute_xy[edge_pair[1], 1]
                line_seg = np.array([[x_start, y_start], [x_end, y_end]])
                keypoint_edges_all.append(line_seg)
                colors.append(color)
    if keypoints_all:
        joints = np.concatenate(keypoints_all, axis=0)
    else:
        joints = np.zeros((0, 17, 2))

    if keypoint_edges_all:
        bones = np.stack(keypoint_edges_all, axis=0)
    else:
        bones = np.zeros((0, 2, 2))
    return joints, bones, colors


def draw_bones(pixmap: PIL.Image, keypoints):
    draw = ImageDraw.Draw(pixmap)
    joints, bones, colors = process_keypoints(keypoints, pixmap.height, pixmap.width)

    for bone, color in zip(bones.tolist(), colors):
        draw.line((*bone[0], *bone[1]), fill=color, width=4)

    radio = 3

    for c_x, c_y in joints:
        shape = [(c_x - radio, c_y - radio), (c_x + radio, c_y + radio)]
        draw.ellipse(shape, fill="red", outline="red")


def movenet(image):
    """Runs detection on an input image.

        Args:
          image: A [1, height, width, 3] tensor represents the input image
            pixels. Note that the height/width should already be resized and match the
            expected input resolution of the model before passing into this function.

        Returns:
          A [1, 1, 17, 3] float numpy array representing the predicted keypoint
          coordinates and scores.
    """
    model_path = snapshot_download("leonelhs/movenet")
    module = tf.saved_model.load(model_path)
    model = module.signatures['serving_default']
    # SavedModel format expects tensor type of int32.
    image = tf.cast(image, dtype=tf.int32)
    # Run model inference.
    outputs = model(image)
    # Output is a [1, 1, 17, 3] tensor.
    return outputs['output_0'].numpy()