Last commit not found
raw
history blame
12.6 kB
# -*- coding: utf-8 -*-
# @Author : xuelun
import math
import cv2
import torch
import random
import numpy as np
from albumentations.augmentations import functional as F
from datasets.utils import get_divisible_wh
def fast_make_matching_robust_fitting_figure(data, b_id=0, transpose=False):
robust_fitting = True if 'inliers' in list(data.keys()) and data['inliers'] is not None else False
gray0 = (data['image0'][b_id][0].cpu().numpy() * 255).round().astype(np.uint8)
gray1 = (data['image1'][b_id][0].cpu().numpy() * 255).round().astype(np.uint8)
kpts0 = data['mkpts0_f']
kpts1 = data['mkpts1_f']
if 'scale0' in data:
kpts0 = kpts0 / data['scale0'][b_id].cpu().numpy()
kpts1 = kpts1 / data['scale1'][b_id].cpu().numpy()
if transpose:
gray0 = cv2.rotate(gray0, cv2.ROTATE_90_COUNTERCLOCKWISE)
gray1 = cv2.rotate(gray1, cv2.ROTATE_90_COUNTERCLOCKWISE)
h0, w0 = data['hw0_i']
h1, w1 = data['hw1_i']
kpts0_new = np.copy(kpts0)
kpts1_new = np.copy(kpts1)
kpts0_new[:, 0], kpts0_new[:, 1] = kpts0[:, 1], w0 - kpts0[:, 0]
kpts1_new[:, 0], kpts1_new[:, 1] = kpts1[:, 1], w1 - kpts1[:, 0]
kpts0, kpts1 = kpts0_new, kpts1_new
(h0, w0), (h1, w1) = (w0, h0), (w1, h1)
else:
(h0, w0), (h1, w1) = data['hw0_i'], data['hw1_i']
rows = 3
margin = 2
h, w = max(h0, h1), max(w0, w1)
H, W = margin * (rows + 1) + h * rows, margin * 3 + w * 2
# canvas
out = 255 * np.ones((H, W), np.uint8)
wx = [margin, margin + w0, margin + w + margin, margin + w + margin + w1]
hx = lambda row: margin * row + h * (row-1)
out = np.stack([out] * 3, -1)
sh = hx(row=1)
color0 = (data['color0'][b_id].permute(1, 2, 0).cpu().numpy() * 255).round().astype(np.uint8)
color1 = (data['color1'][b_id].permute(1, 2, 0).cpu().numpy() * 255).round().astype(np.uint8)
if transpose:
color0 = cv2.rotate(color0, cv2.ROTATE_90_COUNTERCLOCKWISE)
color1 = cv2.rotate(color1, cv2.ROTATE_90_COUNTERCLOCKWISE)
out[sh: sh + h0, wx[0]: wx[1]] = color0
out[sh: sh + h1, wx[2]: wx[3]] = color1
# only show keypoints
sh = hx(row=2)
mkpts0, mkpts1 = np.round(kpts0).astype(int), np.round(kpts1).astype(int)
out[sh: sh + h0, wx[0]: wx[1]] = np.stack([gray0] * 3, -1)
out[sh: sh + h1, wx[2]: wx[3]] = np.stack([gray1] * 3, -1)
for (x0, y0), (x1, y1) in zip(mkpts0, mkpts1):
# display line end-points as circles
c = (230, 216, 132)
cv2.circle(out, (x0, y0+sh), 1, c, -1, lineType=cv2.LINE_AA)
cv2.circle(out, (x1 + margin + w, y1+sh), 1, c, -1, lineType=cv2.LINE_AA)
# show keypoints and correspondences
sh = hx(row=3)
mkpts0, mkpts1 = np.round(kpts0).astype(int), np.round(kpts1).astype(int)
out[sh: sh + h0, wx[0]: wx[1]] = np.stack([gray0] * 3, -1)
out[sh: sh + h1, wx[2]: wx[3]] = np.stack([gray1] * 3, -1)
for (x0, y0), (x1, y1) in zip(mkpts0, mkpts1):
c = (159, 212, 252)
cv2.line(out, (x0, y0+sh), (x1 + margin + w, y1+sh), color=c, thickness=1, lineType=cv2.LINE_AA)
for (x0, y0), (x1, y1) in zip(mkpts0, mkpts1):
# display line end-points as circles
c = (230, 216, 132)
cv2.circle(out, (x0, y0+sh), 2, c, -1, lineType=cv2.LINE_AA)
cv2.circle(out, (x1 + margin + w, y1+sh), 2, c, -1, lineType=cv2.LINE_AA)
# Big text.
text = [
f' ',
f'#Matches {len(kpts0)}',
f'#Matches {sum(data["inliers"][b_id])}' if robust_fitting else '',
]
sc = min(H / 640., 1.0)
Ht = int(30 * sc) # text height
txt_color_fg = (255, 255, 255) # white
txt_color_bg = (0, 0, 0) # black
for i, t in enumerate(text):
cv2.putText(out, t, (int(8 * sc), Ht * (i + 1)), cv2.FONT_HERSHEY_DUPLEX, 1.0 * sc, txt_color_bg, 2, cv2.LINE_AA)
cv2.putText(out, t, (int(8 * sc), Ht * (i + 1)), cv2.FONT_HERSHEY_DUPLEX, 1.0 * sc, txt_color_fg, 1, cv2.LINE_AA)
fingerprint = [
'Dataset: {}'.format(data['dataset_name'][b_id]),
'Scene ID: {}'.format(data['scene_id'][b_id]),
'Pair ID: {}'.format(data['pair_id'][b_id]),
'co-visible: {:.4f}/{:.4f}'.format(data['covisible0'],
data['covisible1']),
'Image sizes: {} - {}'.format(
tuple(reversed(data['imsize0'][b_id])) if transpose and isinstance(data['imsize0'][b_id], (list, tuple, np.ndarray)) and len(data['imsize0'][b_id]) >= 2 else data['imsize0'][b_id],
tuple(reversed(data['imsize1'][b_id])) if transpose and isinstance(data['imsize1'][b_id], (list, tuple, np.ndarray)) and len(data['imsize1'][b_id]) >= 2 else data['imsize1'][b_id]),
'Pair names: {}:{}'.format(data['pair_names'][0].split('/')[-1],
data['pair_names'][1].split('/')[-1]),
'Rand Scale: {} - {}'.format(data['rands0'],
data['rands1']),
'Offset: {} - {}'.format(data['offset0'].cpu().numpy(),
data['offset1'].cpu().numpy()),
'Fliped: {} - {}'.format(data['hflip0'],
data['hflip1']),
'Transposed: {}'.format(transpose)
]
sc = min(H / 1280., 1.0)
Ht = int(18 * sc) # text height
txt_color_fg = (255, 255, 255) # white
txt_color_bg = (0, 0, 0) # black
for i, t in enumerate(reversed(fingerprint)):
cv2.putText(out, t, (int(8 * sc), int(H - Ht * (i + .6))), cv2.FONT_HERSHEY_SIMPLEX, .5 * sc, txt_color_bg, 2, cv2.LINE_AA)
cv2.putText(out, t, (int(8 * sc), int(H - Ht * (i + .6))), cv2.FONT_HERSHEY_SIMPLEX, .5 * sc, txt_color_fg, 1, cv2.LINE_AA)
return out[h+margin:]
def eudist(a, b):
aa = np.sum(a ** 2, axis=-1, keepdims=True)
bb = np.sum(b ** 2, axis=-1, keepdims=True).T
cc = a @ b.T
dist = aa + bb - 2*cc
return dist
def covision(kpts, size):
return (kpts[:, 0].max() - kpts[:, 0].min()) * \
(kpts[:, 1].max() - kpts[:, 1].min()) / \
(size[0] * size[1] + 1e-8)
view = lambda x: x.view([('', x.dtype)] * x.shape[1])
def intersected(x, y):
intersected_ = np.intersect1d(view(x), view(y))
z = intersected_.view(x.dtype).reshape(-1, x.shape[1])
return z
def imread_color(path, augment_fn=None, read_size=None, source=None):
if augment_fn is None:
image = cv2.imread(str(path), cv2.IMREAD_COLOR) if source is None else source
image = cv2.resize(image, read_size) if read_size is not None else image
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) if source is None else image
else:
image = cv2.imread(str(path), cv2.IMREAD_COLOR) if source is None else source
image = cv2.resize(image, read_size) if read_size is not None else image
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) if source is None else image
image = augment_fn(image)
return image # (h, w)
def get_resized_wh(w, h, resize, aug_prob):
nh, nw = resize
sh, sw = nh / h, nw / w
# scale = min(sh, sw)
scale = random.choice([sh, sw]) if aug_prob != 1.0 else min(sh, sw)
w_new, h_new = int(round(w*scale)), int(round(h*scale))
return w_new, h_new
def pad_bottom_right(inp, pad_size, ret_mask=False):
mask = None
if inp.ndim == 2:
padded = np.zeros((pad_size[0], pad_size[1]), dtype=inp.dtype)
padded[:inp.shape[0], :inp.shape[1]] = inp
elif inp.ndim == 3:
padded = np.zeros((pad_size[0], pad_size[1], inp.shape[-1]), dtype=inp.dtype)
padded[:inp.shape[0], :inp.shape[1]] = inp
else:
raise NotImplementedError()
if ret_mask:
mask = np.zeros((pad_size[0], pad_size[1]), dtype=bool)
mask[:inp.shape[0], :inp.shape[1]] = True
return padded, mask
def read_images(path, max_resize, df=None, padding=True, augment_fn=None, aug_prob=0.0, flip_prob=1.0,
is_left=None, upper_cornor=None, read_size=None, image=None):
"""
Args:
path: string
max_resize (int): max image size after resied
df (int, optional): image size division factor.
NOTE: this will change the final image size after img_resize
padding (bool): If set to 'True', zero-pad resized images to squared size.
augment_fn (callable, optional): augments images with pre-defined visual effects
aug_prob (float, optional): probability of applying augment_fn
flip_prob (float, optional): probability of flipping images
is_left (bool, optional): if set to 'True', it is left image, otherwise is right image
upper_cornor (tuple, optional): upper left corner of the image
read_size (int, optional): read image size
image (callable, optional): input image
Returns:
image (torch.tensor): (1, h, w)
mask (torch.tensor): (h, w)
scale (torch.tensor): [w/w_new, h/h_new]
"""
# read image
assert max_resize is not None
assert isinstance(max_resize, list)
if len(max_resize) == 1: max_resize = max_resize * 2
w_new, h_new = get_divisible_wh(max_resize[0], max_resize[1], df)
max_resize = [h_new, w_new]
image = imread_color(path, augment_fn, read_size, image) # (h,w,3) image is RGB
# resize image
w, h = image.shape[1], image.shape[0]
if (h > max_resize[0]) or (w > max_resize[1]):
w_new, h_new = get_resized_wh(w, h, max_resize, aug_prob) # make max(w, h) to max_size
else:
w_new, h_new = w, h
# random resize
if random.uniform(0, 1) > aug_prob:
# random rescale
ratio = max(h / max_resize[0], w / max_resize[1])
if type(is_left) == bool:
if is_left:
low, upper = (0.6 / ratio, 1.0 / ratio) if ratio < 1.0 else (0.6, 1.0)
else:
low, upper = (1.0 / ratio, 1.4 / ratio) if ratio < 1.0 else (1.0, 1.4)
else:
low, upper = (0.6 / ratio, 1.4 / ratio) if ratio < 1.0 else (0.6, 1.4)
if not is_left and upper_cornor is not None:
corner = upper_cornor[2:]
upper = min(upper, min(max_resize[0]/corner[1], max_resize[1]/corner[0]))
rands = random.uniform(low, upper)
w_new, h_new = map(lambda x: x*rands, [w_new, h_new])
w_new, h_new = get_divisible_wh(w_new, h_new, df) # make image divided by df and must <= max_size
else:
rands = 1
w_new, h_new = get_divisible_wh(w_new, h_new, df)
# width, height = w_new, h_new
# h_start = w_start = 0
if upper_cornor is not None:
upper_cornor = upper_cornor[:2]
# random crop
if h_new > max_resize[0]:
height = max_resize[0]
h_start = int(random.uniform(0, 1) * (h_new - max_resize[0]))
if upper_cornor is not None:
h_start = min(h_start, math.floor(upper_cornor[1]*(h_new/h)))
else:
height = h_new
h_start = 0
if w_new > max_resize[1]:
width = max_resize[1]
w_start = int(random.uniform(0, 1) * (w_new - max_resize[1]))
if upper_cornor is not None:
w_start = min(w_start, math.floor(upper_cornor[0]*(w_new/w)))
else:
width = w_new
w_start = 0
w_new, h_new = map(int, [w_new, h_new])
width, height = map(int, [width, height])
image = cv2.resize(image, (w_new, h_new)) # (w',h',3)
image = image[h_start:h_start+height, w_start:w_start+width]
scale = [w / w_new, h / h_new]
offset = [w_start, h_start]
# vertical flip
if random.uniform(0, 1) > flip_prob:
hflip = F.hflip_cv2 if image.ndim == 3 and image.shape[2] > 1 and image.dtype == np.uint8 else F.hflip
image = hflip(image)
image = F.vflip(image)
hflip = True
vflip = True
else:
hflip = False
vflip = False
gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
# padding
mask = None
if padding:
image, _ = pad_bottom_right(image, max_resize, ret_mask=False)
gray, mask = pad_bottom_right(gray, max_resize, ret_mask=True)
mask = torch.from_numpy(mask)
gray = torch.from_numpy(gray).float()[None] / 255 # (1,h,w)
image = torch.from_numpy(image).float() / 255 # (h,w,3)
image = image.permute(2, 0, 1) # (3,h,w)
offset = torch.tensor(offset, dtype=torch.float)
scale = torch.tensor(scale, dtype=torch.float)
resize = [height, width]
return gray, image, scale, rands, offset, hflip, vflip, resize, mask