|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
"""Shared utility functions for OmniGlue.""" |
|
|
|
import math |
|
from typing import Optional |
|
import cv2 |
|
import numpy as np |
|
import tensorflow as tf |
|
|
|
|
|
def lookup_descriptor_bilinear( |
|
keypoint: np.ndarray, descriptor_map: np.ndarray |
|
) -> np.ndarray: |
|
"""Looks up descriptor value for keypoint from a dense descriptor map. |
|
|
|
Uses bilinear interpolation to find descriptor value at non-integer |
|
positions. |
|
|
|
Args: |
|
keypoint: 2-dim numpy array containing (x, y) keypoint image coordinates. |
|
descriptor_map: (H, W, D) numpy array representing a dense descriptor map. |
|
|
|
Returns: |
|
D-dim descriptor value at the input 'keypoint' location. |
|
|
|
Raises: |
|
ValueError, if kepoint position is out of bounds. |
|
""" |
|
height, width = np.shape(descriptor_map)[:2] |
|
if ( |
|
keypoint[0] < 0 |
|
or keypoint[0] > width |
|
or keypoint[1] < 0 |
|
or keypoint[1] > height |
|
): |
|
raise ValueError( |
|
'Keypoint position (%f, %f) is out of descriptor map bounds (%i w x' |
|
' %i h).' % (keypoint[0], keypoint[1], width, height) |
|
) |
|
|
|
x_range = [math.floor(keypoint[0])] |
|
if not keypoint[0].is_integer() and keypoint[0] < width-1: |
|
x_range.append(x_range[0] + 1) |
|
y_range = [math.floor(keypoint[1])] |
|
if not keypoint[1].is_integer() and keypoint[1] < height-1: |
|
y_range.append(y_range[0] + 1) |
|
|
|
bilinear_descriptor = np.zeros(np.shape(descriptor_map)[2]) |
|
for curr_x in x_range: |
|
for curr_y in y_range: |
|
curr_descriptor = descriptor_map[curr_y, curr_x, :] |
|
bilinear_scalar = (1.0 - abs(keypoint[0] - curr_x)) * ( |
|
1.0 - abs(keypoint[1] - curr_y) |
|
) |
|
bilinear_descriptor += bilinear_scalar * curr_descriptor |
|
return bilinear_descriptor |
|
|
|
|
|
def soft_assignment_to_match_matrix( |
|
soft_assignment: tf.Tensor, match_threshold: float |
|
) -> tf.Tensor: |
|
"""Converts a matrix of soft assignment values to binary yes/no match matrix. |
|
|
|
Searches soft_assignment for row- and column-maximum values, which indicate |
|
mutual nearest neighbor matches between two unique sets of keypoints. Also, |
|
ensures that score values for matches are above the specified threshold. |
|
|
|
Args: |
|
soft_assignment: (B, N, M) tensor, contains matching likelihood value |
|
between features of different sets. N is number of features in image0, and |
|
M is number of features in image1. Higher value indicates more likely to |
|
match. |
|
match_threshold: float, thresholding value to consider a match valid. |
|
|
|
Returns: |
|
(B, N, M) tensor of binary values. A value of 1 at index (x, y) indicates |
|
a match between index 'x' (out of N) in image0 and index 'y' (out of M) in |
|
image 1. |
|
""" |
|
|
|
def _range_like(x, dim): |
|
"""Returns tensor with values (0, 1, 2, ..., N) for dimension in input x.""" |
|
return tf.range(tf.shape(x)[dim], dtype=x.dtype) |
|
|
|
|
|
matches = tf.TensorArray(tf.float32, size=tf.shape(soft_assignment)[0]) |
|
for i in range(tf.shape(soft_assignment)[0]): |
|
|
|
scores = tf.expand_dims(soft_assignment[i, :], 0) |
|
|
|
|
|
max0 = tf.math.reduce_max(scores, axis=2) |
|
indices0 = tf.math.argmax(scores, axis=2) |
|
indices1 = tf.math.argmax(scores, axis=1) |
|
|
|
|
|
mutual = tf.expand_dims(_range_like(indices0, 1), 0) == tf.gather( |
|
indices1, indices0, axis=1 |
|
) |
|
|
|
|
|
kp_ind_pairs = tf.stack( |
|
[_range_like(indices0, 1), tf.squeeze(indices0)], axis=1 |
|
) |
|
mutual_max0 = tf.squeeze(tf.squeeze(tf.where(mutual, max0, 0), 0)) |
|
sparse = tf.sparse.SparseTensor( |
|
kp_ind_pairs, mutual_max0, tf.shape(scores, out_type=tf.int64)[1:] |
|
) |
|
match_matrix = tf.sparse.to_dense(sparse) |
|
matches = matches.write(i, match_matrix) |
|
|
|
|
|
match_matrix = matches.stack() |
|
match_matrix = match_matrix > match_threshold |
|
return match_matrix |
|
|
|
|
|
def visualize_matches( |
|
image0: np.ndarray, |
|
image1: np.ndarray, |
|
kp0: np.ndarray, |
|
kp1: np.ndarray, |
|
match_matrix: np.ndarray, |
|
match_labels: Optional[np.ndarray] = None, |
|
show_keypoints: bool = False, |
|
highlight_unmatched: bool = False, |
|
title: Optional[str] = None, |
|
line_width: int = 1, |
|
circle_radius: int = 4, |
|
circle_thickness: int = 2, |
|
rng: Optional['np.random.Generator'] = None, |
|
): |
|
"""Generates visualization of keypoints and matches for two images. |
|
|
|
Stacks image0 and image1 horizontally. In case the two images have different |
|
heights, scales image1 (and its keypoints) to match image0's height. Note |
|
that keypoints must be in (x, y) format, NOT (row, col). If match_matrix |
|
includes unmatched dustbins, the dustbins will be removed before visualizing |
|
matches. |
|
|
|
Args: |
|
image0: (H, W, 3) array containing image0 contents. |
|
image1: (H, W, 3) array containing image1 contents. |
|
kp0: (N, 2) array where each row represents (x, y) coordinates of keypoints |
|
in image0. |
|
kp1: (M, 2) array, where each row represents (x, y) coordinates of keypoints |
|
in image1. |
|
match_matrix: (N, M) binary array, where values are non-zero for keypoint |
|
indices making up a match. |
|
match_labels: (N, M) binary array, where values are non-zero for keypoint |
|
indices making up a ground-truth match. When None, matches from |
|
'match_matrix' are colored randomly. Otherwise, matches from |
|
'match_matrix' are colored according to accuracy (compared to labels). |
|
show_keypoints: if True, all image0 and image1 keypoints (including |
|
unmatched ones) are visualized. |
|
highlight_unmatched: if True, highlights unmatched keypoints in blue. |
|
title: if not None, adds title text to top left of visualization. |
|
line_width: width of correspondence line, in pixels. |
|
circle_radius: radius of keypoint circles, if visualized. |
|
circle_thickness: thickness of keypoint circles, if visualized. |
|
rng: np random number generator to generate the line colors. |
|
|
|
Returns: |
|
Numpy array of image0 and image1 side-by-side, with lines between matches |
|
according to match_matrix. If show_keypoints is True, keypoints from both |
|
images are also visualized. |
|
""" |
|
|
|
if rng is None: |
|
rng = np.random.default_rng() |
|
|
|
|
|
kp1 = np.copy(kp1) |
|
|
|
|
|
has_unmatched_dustbins = (match_matrix.shape[0] == kp0.shape[0] + 1) and ( |
|
match_matrix.shape[1] == kp1.shape[0] + 1 |
|
) |
|
|
|
|
|
height0 = image0.shape[0] |
|
height1 = image1.shape[0] |
|
if height0 != height1: |
|
scale_factor = height0 / height1 |
|
if scale_factor <= 1.0: |
|
interp_method = cv2.INTER_AREA |
|
else: |
|
interp_method = cv2.INTER_LINEAR |
|
new_dim1 = (int(image1.shape[1] * scale_factor), height0) |
|
image1 = cv2.resize(image1, new_dim1, interpolation=interp_method) |
|
kp1 *= scale_factor |
|
|
|
|
|
viz = cv2.hconcat([image0, image1]) |
|
w0 = image0.shape[1] |
|
matches = np.argwhere( |
|
match_matrix[:-1, :-1] if has_unmatched_dustbins else match_matrix |
|
) |
|
for match in matches: |
|
pt0 = (int(kp0[match[0], 0]), int(kp0[match[0], 1])) |
|
pt1 = (int(kp1[match[1], 0] + w0), int(kp1[match[1], 1])) |
|
if match_labels is None: |
|
color = tuple(rng.integers(0, 255, size=3).tolist()) |
|
else: |
|
if match_labels[match[0], match[1]]: |
|
color = (0, 255, 0) |
|
else: |
|
color = (255, 0, 0) |
|
cv2.line(viz, pt0, pt1, color, line_width) |
|
|
|
|
|
if show_keypoints: |
|
for i in range(np.shape(kp0)[0]): |
|
kp = kp0[i, :] |
|
if highlight_unmatched and has_unmatched_dustbins and match_matrix[i, -1]: |
|
cv2.circle( |
|
viz, |
|
tuple(kp.astype(np.int32).tolist()), |
|
circle_radius, |
|
(255, 0, 0), |
|
circle_thickness, |
|
) |
|
else: |
|
cv2.circle( |
|
viz, |
|
tuple(kp.astype(np.int32).tolist()), |
|
circle_radius, |
|
(0, 0, 255), |
|
circle_thickness, |
|
) |
|
for j in range(np.shape(kp1)[0]): |
|
kp = kp1[j, :] |
|
kp[0] += w0 |
|
if highlight_unmatched and has_unmatched_dustbins and match_matrix[-1, j]: |
|
cv2.circle( |
|
viz, |
|
tuple(kp.astype(np.int32).tolist()), |
|
circle_radius, |
|
(255, 0, 0), |
|
circle_thickness, |
|
) |
|
else: |
|
cv2.circle( |
|
viz, |
|
tuple(kp.astype(np.int32).tolist()), |
|
circle_radius, |
|
(0, 0, 255), |
|
circle_thickness, |
|
) |
|
if title is not None: |
|
viz = cv2.putText( |
|
viz, |
|
title, |
|
(5, 30), |
|
cv2.FONT_HERSHEY_SIMPLEX, |
|
1, |
|
(0, 0, 255), |
|
2, |
|
cv2.LINE_AA, |
|
) |
|
return viz |
|
|