Spaces:

leonelhs
/

poser-tf

Running

File size: 4,693 Bytes

9a02a1c

#############################################################################
#
#   Source from:
#   https://www.tensorflow.org/hub/tutorials/movenet
#
#
#############################################################################
import PIL.Image
import PIL.ImageOps
import numpy as np
import tensorflow as tf
from PIL import ImageDraw
from huggingface_hub import snapshot_download


# Dictionary that maps from joint names to keypoint indices.
KEYPOINT_DICT = {
    'nose': 0,
    'left_eye': 1, 'right_eye': 2,
    'left_ear': 3, 'right_ear': 4,
    'left_shoulder': 5, 'right_shoulder': 6,
    'left_elbow': 7, 'right_elbow': 8,
    'left_wrist': 9, 'right_wrist': 10,
    'left_hip': 11, 'right_hip': 12,
    'left_knee': 13, 'right_knee': 14,
    'left_ankle': 15, 'right_ankle': 16
}

COLOR_DICT = {
    (0, 1): 'Magenta',
    (0, 2): 'Cyan',
    (1, 3): 'Magenta',
    (2, 4): 'Cyan',
    (0, 5): 'Magenta',
    (0, 6): 'Cyan',
    (5, 7): 'Magenta',
    (7, 9): 'Magenta',
    (6, 8): 'Cyan',
    (8, 10): 'Cyan',
    (5, 6): 'Yellow',
    (5, 11): 'Magenta',
    (6, 12): 'Cyan',
    (11, 12): 'Yellow',
    (11, 13): 'Magenta',
    (13, 15): 'Magenta',
    (12, 14): 'Cyan',
    (14, 16): 'Cyan'
}


def process_keypoints(keypoints, height, width, threshold=0.22):
    """Returns high confidence keypoints and edges for visualization.

      Args:
        keypoints: A numpy array with shape [1, 1, 17, 3] representing
          the keypoint coordinates and scores returned from the MoveNet model.
        height: height of the image in pixels.
        width: width of the image in pixels.
        threshold: minimum confidence score for a keypoint to be
          visualized.

      Returns:
        A (joints, bones, colors) containing:
          * the coordinates of all keypoints of all detected entities;
          * the coordinates of all skeleton edges of all detected entities;
          * the colors in which the edges should be plotted.
      """
    keypoints_all = []
    keypoint_edges_all = []
    colors = []
    num_instances, _, _, _ = keypoints.shape
    for idx in range(num_instances):
        kpts_x = keypoints[0, idx, :, 1]
        kpts_y = keypoints[0, idx, :, 0]
        kpts_scores = keypoints[0, idx, :, 2]
        kpts_absolute_xy = np.stack(
            [width * np.array(kpts_x), height * np.array(kpts_y)], axis=-1)
        kpts_above_thresh_absolute = kpts_absolute_xy[
                                     kpts_scores > threshold, :]
        keypoints_all.append(kpts_above_thresh_absolute)

        for edge_pair, color in COLOR_DICT.items():
            if (kpts_scores[edge_pair[0]] > threshold and
                    kpts_scores[edge_pair[1]] > threshold):
                x_start = kpts_absolute_xy[edge_pair[0], 0]
                y_start = kpts_absolute_xy[edge_pair[0], 1]
                x_end = kpts_absolute_xy[edge_pair[1], 0]
                y_end = kpts_absolute_xy[edge_pair[1], 1]
                line_seg = np.array([[x_start, y_start], [x_end, y_end]])
                keypoint_edges_all.append(line_seg)
                colors.append(color)
    if keypoints_all:
        joints = np.concatenate(keypoints_all, axis=0)
    else:
        joints = np.zeros((0, 17, 2))

    if keypoint_edges_all:
        bones = np.stack(keypoint_edges_all, axis=0)
    else:
        bones = np.zeros((0, 2, 2))
    return joints, bones, colors


def draw_bones(pixmap: PIL.Image, keypoints):
    draw = ImageDraw.Draw(pixmap)
    joints, bones, colors = process_keypoints(keypoints, pixmap.height, pixmap.width)

    for bone, color in zip(bones.tolist(), colors):
        draw.line((*bone[0], *bone[1]), fill=color, width=4)

    radio = 3

    for c_x, c_y in joints:
        shape = [(c_x - radio, c_y - radio), (c_x + radio, c_y + radio)]
        draw.ellipse(shape, fill="red", outline="red")


def movenet(image):
    """Runs detection on an input image.

        Args:
          image: A [1, height, width, 3] tensor represents the input image
            pixels. Note that the height/width should already be resized and match the
            expected input resolution of the model before passing into this function.

        Returns:
          A [1, 1, 17, 3] float numpy array representing the predicted keypoint
          coordinates and scores.
    """
    model_path = snapshot_download("leonelhs/movenet")
    module = tf.saved_model.load(model_path)
    model = module.signatures['serving_default']
    # SavedModel format expects tensor type of int32.
    image = tf.cast(image, dtype=tf.int32)
    # Run model inference.
    outputs = model(image)
    # Output is a [1, 1, 17, 3] tensor.
    return outputs['output_0'].numpy()