# -*- coding: utf-8 -*- # @Author : xuelun import math import cv2 import torch import random import numpy as np from albumentations.augmentations import functional as F from datasets.utils import get_divisible_wh def fast_make_matching_robust_fitting_figure(data, b_id=0, transpose=False): robust_fitting = True if 'inliers' in list(data.keys()) and data['inliers'] is not None else False gray0 = (data['image0'][b_id][0].cpu().numpy() * 255).round().astype(np.uint8) gray1 = (data['image1'][b_id][0].cpu().numpy() * 255).round().astype(np.uint8) kpts0 = data['mkpts0_f'] kpts1 = data['mkpts1_f'] if 'scale0' in data: kpts0 = kpts0 / data['scale0'][b_id].cpu().numpy() kpts1 = kpts1 / data['scale1'][b_id].cpu().numpy() if transpose: gray0 = cv2.rotate(gray0, cv2.ROTATE_90_COUNTERCLOCKWISE) gray1 = cv2.rotate(gray1, cv2.ROTATE_90_COUNTERCLOCKWISE) h0, w0 = data['hw0_i'] h1, w1 = data['hw1_i'] kpts0_new = np.copy(kpts0) kpts1_new = np.copy(kpts1) kpts0_new[:, 0], kpts0_new[:, 1] = kpts0[:, 1], w0 - kpts0[:, 0] kpts1_new[:, 0], kpts1_new[:, 1] = kpts1[:, 1], w1 - kpts1[:, 0] kpts0, kpts1 = kpts0_new, kpts1_new (h0, w0), (h1, w1) = (w0, h0), (w1, h1) else: (h0, w0), (h1, w1) = data['hw0_i'], data['hw1_i'] rows = 3 margin = 2 h, w = max(h0, h1), max(w0, w1) H, W = margin * (rows + 1) + h * rows, margin * 3 + w * 2 # canvas out = 255 * np.ones((H, W), np.uint8) wx = [margin, margin + w0, margin + w + margin, margin + w + margin + w1] hx = lambda row: margin * row + h * (row-1) out = np.stack([out] * 3, -1) sh = hx(row=1) color0 = (data['color0'][b_id].permute(1, 2, 0).cpu().numpy() * 255).round().astype(np.uint8) color1 = (data['color1'][b_id].permute(1, 2, 0).cpu().numpy() * 255).round().astype(np.uint8) if transpose: color0 = cv2.rotate(color0, cv2.ROTATE_90_COUNTERCLOCKWISE) color1 = cv2.rotate(color1, cv2.ROTATE_90_COUNTERCLOCKWISE) out[sh: sh + h0, wx[0]: wx[1]] = color0 out[sh: sh + h1, wx[2]: wx[3]] = color1 # only show keypoints sh = hx(row=2) mkpts0, mkpts1 = np.round(kpts0).astype(int), np.round(kpts1).astype(int) out[sh: sh + h0, wx[0]: wx[1]] = np.stack([gray0] * 3, -1) out[sh: sh + h1, wx[2]: wx[3]] = np.stack([gray1] * 3, -1) for (x0, y0), (x1, y1) in zip(mkpts0, mkpts1): # display line end-points as circles c = (230, 216, 132) cv2.circle(out, (x0, y0+sh), 1, c, -1, lineType=cv2.LINE_AA) cv2.circle(out, (x1 + margin + w, y1+sh), 1, c, -1, lineType=cv2.LINE_AA) # show keypoints and correspondences sh = hx(row=3) mkpts0, mkpts1 = np.round(kpts0).astype(int), np.round(kpts1).astype(int) out[sh: sh + h0, wx[0]: wx[1]] = np.stack([gray0] * 3, -1) out[sh: sh + h1, wx[2]: wx[3]] = np.stack([gray1] * 3, -1) for (x0, y0), (x1, y1) in zip(mkpts0, mkpts1): c = (159, 212, 252) cv2.line(out, (x0, y0+sh), (x1 + margin + w, y1+sh), color=c, thickness=1, lineType=cv2.LINE_AA) for (x0, y0), (x1, y1) in zip(mkpts0, mkpts1): # display line end-points as circles c = (230, 216, 132) cv2.circle(out, (x0, y0+sh), 2, c, -1, lineType=cv2.LINE_AA) cv2.circle(out, (x1 + margin + w, y1+sh), 2, c, -1, lineType=cv2.LINE_AA) # Big text. text = [ f' ', f'#Matches {len(kpts0)}', f'#Matches {sum(data["inliers"][b_id])}' if robust_fitting else '', ] sc = min(H / 640., 1.0) Ht = int(30 * sc) # text height txt_color_fg = (255, 255, 255) # white txt_color_bg = (0, 0, 0) # black for i, t in enumerate(text): cv2.putText(out, t, (int(8 * sc), Ht * (i + 1)), cv2.FONT_HERSHEY_DUPLEX, 1.0 * sc, txt_color_bg, 2, cv2.LINE_AA) cv2.putText(out, t, (int(8 * sc), Ht * (i + 1)), cv2.FONT_HERSHEY_DUPLEX, 1.0 * sc, txt_color_fg, 1, cv2.LINE_AA) fingerprint = [ 'Dataset: {}'.format(data['dataset_name'][b_id]), 'Scene ID: {}'.format(data['scene_id'][b_id]), 'Pair ID: {}'.format(data['pair_id'][b_id]), 'co-visible: {:.4f}/{:.4f}'.format(data['covisible0'], data['covisible1']), 'Image sizes: {} - {}'.format( tuple(reversed(data['imsize0'][b_id])) if transpose and isinstance(data['imsize0'][b_id], (list, tuple, np.ndarray)) and len(data['imsize0'][b_id]) >= 2 else data['imsize0'][b_id], tuple(reversed(data['imsize1'][b_id])) if transpose and isinstance(data['imsize1'][b_id], (list, tuple, np.ndarray)) and len(data['imsize1'][b_id]) >= 2 else data['imsize1'][b_id]), 'Pair names: {}:{}'.format(data['pair_names'][0].split('/')[-1], data['pair_names'][1].split('/')[-1]), 'Rand Scale: {} - {}'.format(data['rands0'], data['rands1']), 'Offset: {} - {}'.format(data['offset0'].cpu().numpy(), data['offset1'].cpu().numpy()), 'Fliped: {} - {}'.format(data['hflip0'], data['hflip1']), 'Transposed: {}'.format(transpose) ] sc = min(H / 1280., 1.0) Ht = int(18 * sc) # text height txt_color_fg = (255, 255, 255) # white txt_color_bg = (0, 0, 0) # black for i, t in enumerate(reversed(fingerprint)): cv2.putText(out, t, (int(8 * sc), int(H - Ht * (i + .6))), cv2.FONT_HERSHEY_SIMPLEX, .5 * sc, txt_color_bg, 2, cv2.LINE_AA) cv2.putText(out, t, (int(8 * sc), int(H - Ht * (i + .6))), cv2.FONT_HERSHEY_SIMPLEX, .5 * sc, txt_color_fg, 1, cv2.LINE_AA) return out[h+margin:] def eudist(a, b): aa = np.sum(a ** 2, axis=-1, keepdims=True) bb = np.sum(b ** 2, axis=-1, keepdims=True).T cc = a @ b.T dist = aa + bb - 2*cc return dist def covision(kpts, size): return (kpts[:, 0].max() - kpts[:, 0].min()) * \ (kpts[:, 1].max() - kpts[:, 1].min()) / \ (size[0] * size[1] + 1e-8) view = lambda x: x.view([('', x.dtype)] * x.shape[1]) def intersected(x, y): intersected_ = np.intersect1d(view(x), view(y)) z = intersected_.view(x.dtype).reshape(-1, x.shape[1]) return z def imread_color(path, augment_fn=None, read_size=None, source=None): if augment_fn is None: image = cv2.imread(str(path), cv2.IMREAD_COLOR) if source is None else source image = cv2.resize(image, read_size) if read_size is not None else image image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) if source is None else image else: image = cv2.imread(str(path), cv2.IMREAD_COLOR) if source is None else source image = cv2.resize(image, read_size) if read_size is not None else image image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) if source is None else image image = augment_fn(image) return image # (h, w) def get_resized_wh(w, h, resize, aug_prob): nh, nw = resize sh, sw = nh / h, nw / w # scale = min(sh, sw) scale = random.choice([sh, sw]) if aug_prob != 1.0 else min(sh, sw) w_new, h_new = int(round(w*scale)), int(round(h*scale)) return w_new, h_new def pad_bottom_right(inp, pad_size, ret_mask=False): mask = None if inp.ndim == 2: padded = np.zeros((pad_size[0], pad_size[1]), dtype=inp.dtype) padded[:inp.shape[0], :inp.shape[1]] = inp elif inp.ndim == 3: padded = np.zeros((pad_size[0], pad_size[1], inp.shape[-1]), dtype=inp.dtype) padded[:inp.shape[0], :inp.shape[1]] = inp else: raise NotImplementedError() if ret_mask: mask = np.zeros((pad_size[0], pad_size[1]), dtype=bool) mask[:inp.shape[0], :inp.shape[1]] = True return padded, mask def read_images(path, max_resize, df=None, padding=True, augment_fn=None, aug_prob=0.0, flip_prob=1.0, is_left=None, upper_cornor=None, read_size=None, image=None): """ Args: path: string max_resize (int): max image size after resied df (int, optional): image size division factor. NOTE: this will change the final image size after img_resize padding (bool): If set to 'True', zero-pad resized images to squared size. augment_fn (callable, optional): augments images with pre-defined visual effects aug_prob (float, optional): probability of applying augment_fn flip_prob (float, optional): probability of flipping images is_left (bool, optional): if set to 'True', it is left image, otherwise is right image upper_cornor (tuple, optional): upper left corner of the image read_size (int, optional): read image size image (callable, optional): input image Returns: image (torch.tensor): (1, h, w) mask (torch.tensor): (h, w) scale (torch.tensor): [w/w_new, h/h_new] """ # read image assert max_resize is not None assert isinstance(max_resize, list) if len(max_resize) == 1: max_resize = max_resize * 2 w_new, h_new = get_divisible_wh(max_resize[0], max_resize[1], df) max_resize = [h_new, w_new] image = imread_color(path, augment_fn, read_size, image) # (h,w,3) image is RGB # resize image w, h = image.shape[1], image.shape[0] if (h > max_resize[0]) or (w > max_resize[1]): w_new, h_new = get_resized_wh(w, h, max_resize, aug_prob) # make max(w, h) to max_size else: w_new, h_new = w, h # random resize if random.uniform(0, 1) > aug_prob: # random rescale ratio = max(h / max_resize[0], w / max_resize[1]) if type(is_left) == bool: if is_left: low, upper = (0.6 / ratio, 1.0 / ratio) if ratio < 1.0 else (0.6, 1.0) else: low, upper = (1.0 / ratio, 1.4 / ratio) if ratio < 1.0 else (1.0, 1.4) else: low, upper = (0.6 / ratio, 1.4 / ratio) if ratio < 1.0 else (0.6, 1.4) if not is_left and upper_cornor is not None: corner = upper_cornor[2:] upper = min(upper, min(max_resize[0]/corner[1], max_resize[1]/corner[0])) rands = random.uniform(low, upper) w_new, h_new = map(lambda x: x*rands, [w_new, h_new]) w_new, h_new = get_divisible_wh(w_new, h_new, df) # make image divided by df and must <= max_size else: rands = 1 w_new, h_new = get_divisible_wh(w_new, h_new, df) # width, height = w_new, h_new # h_start = w_start = 0 if upper_cornor is not None: upper_cornor = upper_cornor[:2] # random crop if h_new > max_resize[0]: height = max_resize[0] h_start = int(random.uniform(0, 1) * (h_new - max_resize[0])) if upper_cornor is not None: h_start = min(h_start, math.floor(upper_cornor[1]*(h_new/h))) else: height = h_new h_start = 0 if w_new > max_resize[1]: width = max_resize[1] w_start = int(random.uniform(0, 1) * (w_new - max_resize[1])) if upper_cornor is not None: w_start = min(w_start, math.floor(upper_cornor[0]*(w_new/w))) else: width = w_new w_start = 0 w_new, h_new = map(int, [w_new, h_new]) width, height = map(int, [width, height]) image = cv2.resize(image, (w_new, h_new)) # (w',h',3) image = image[h_start:h_start+height, w_start:w_start+width] scale = [w / w_new, h / h_new] offset = [w_start, h_start] # vertical flip if random.uniform(0, 1) > flip_prob: hflip = F.hflip_cv2 if image.ndim == 3 and image.shape[2] > 1 and image.dtype == np.uint8 else F.hflip image = hflip(image) image = F.vflip(image) hflip = True vflip = True else: hflip = False vflip = False gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY) # padding mask = None if padding: image, _ = pad_bottom_right(image, max_resize, ret_mask=False) gray, mask = pad_bottom_right(gray, max_resize, ret_mask=True) mask = torch.from_numpy(mask) gray = torch.from_numpy(gray).float()[None] / 255 # (1,h,w) image = torch.from_numpy(image).float() / 255 # (h,w,3) image = image.permute(2, 0, 1) # (3,h,w) offset = torch.tensor(offset, dtype=torch.float) scale = torch.tensor(scale, dtype=torch.float) resize = [height, width] return gray, image, scale, rands, offset, hflip, vflip, resize, mask