Spaces:

Realcat
/

image-matching-webui

Running

App Files Files Community

image-matching-webui / imcui /third_party /gim /datasets /walk /utils.py

Realcat

update: major change

499e141 6 months ago

raw

history blame

12.6 kB

	# -- coding: utf-8 --
	# @Author : xuelun

	import math

	import cv2
	import torch
	import random
	import numpy as np

	from albumentations.augmentations import functional as F

	from datasets.utils import get_divisible_wh


	def fast_make_matching_robust_fitting_figure(data, b_id=0, transpose=False):
	robust_fitting = True if 'inliers' in list(data.keys()) and data['inliers'] is not None else False

	gray0 = (data['image0'][b_id][0].cpu().numpy() * 255).round().astype(np.uint8)
	gray1 = (data['image1'][b_id][0].cpu().numpy() * 255).round().astype(np.uint8)
	kpts0 = data['mkpts0_f']
	kpts1 = data['mkpts1_f']

	if 'scale0' in data:
	kpts0 = kpts0 / data['scale0'][b_id].cpu().numpy()
	kpts1 = kpts1 / data['scale1'][b_id].cpu().numpy()

	if transpose:
	gray0 = cv2.rotate(gray0, cv2.ROTATE_90_COUNTERCLOCKWISE)
	gray1 = cv2.rotate(gray1, cv2.ROTATE_90_COUNTERCLOCKWISE)

	h0, w0 = data['hw0_i']
	h1, w1 = data['hw1_i']
	kpts0_new = np.copy(kpts0)
	kpts1_new = np.copy(kpts1)
	kpts0_new[:, 0], kpts0_new[:, 1] = kpts0[:, 1], w0 - kpts0[:, 0]
	kpts1_new[:, 0], kpts1_new[:, 1] = kpts1[:, 1], w1 - kpts1[:, 0]
	kpts0, kpts1 = kpts0_new, kpts1_new
	(h0, w0), (h1, w1) = (w0, h0), (w1, h1)
	else:
	(h0, w0), (h1, w1) = data['hw0_i'], data['hw1_i']

	rows = 3
	margin = 2
	h, w = max(h0, h1), max(w0, w1)
	H, W = margin * (rows + 1) + h * rows, margin * 3 + w * 2

	# canvas
	out = 255 * np.ones((H, W), np.uint8)

	wx = [margin, margin + w0, margin + w + margin, margin + w + margin + w1]
	hx = lambda row: margin * row + h * (row-1)
	out = np.stack([out] * 3, -1)

	sh = hx(row=1)
	color0 = (data['color0'][b_id].permute(1, 2, 0).cpu().numpy() * 255).round().astype(np.uint8)
	color1 = (data['color1'][b_id].permute(1, 2, 0).cpu().numpy() * 255).round().astype(np.uint8)
	if transpose:
	color0 = cv2.rotate(color0, cv2.ROTATE_90_COUNTERCLOCKWISE)
	color1 = cv2.rotate(color1, cv2.ROTATE_90_COUNTERCLOCKWISE)
	out[sh: sh + h0, wx[0]: wx[1]] = color0
	out[sh: sh + h1, wx[2]: wx[3]] = color1

	# only show keypoints
	sh = hx(row=2)
	mkpts0, mkpts1 = np.round(kpts0).astype(int), np.round(kpts1).astype(int)
	out[sh: sh + h0, wx[0]: wx[1]] = np.stack([gray0] * 3, -1)
	out[sh: sh + h1, wx[2]: wx[3]] = np.stack([gray1] * 3, -1)
	for (x0, y0), (x1, y1) in zip(mkpts0, mkpts1):
	# display line end-points as circles
	c = (230, 216, 132)
	cv2.circle(out, (x0, y0+sh), 1, c, -1, lineType=cv2.LINE_AA)
	cv2.circle(out, (x1 + margin + w, y1+sh), 1, c, -1, lineType=cv2.LINE_AA)

	# show keypoints and correspondences
	sh = hx(row=3)
	mkpts0, mkpts1 = np.round(kpts0).astype(int), np.round(kpts1).astype(int)
	out[sh: sh + h0, wx[0]: wx[1]] = np.stack([gray0] * 3, -1)
	out[sh: sh + h1, wx[2]: wx[3]] = np.stack([gray1] * 3, -1)
	for (x0, y0), (x1, y1) in zip(mkpts0, mkpts1):
	c = (159, 212, 252)
	cv2.line(out, (x0, y0+sh), (x1 + margin + w, y1+sh), color=c, thickness=1, lineType=cv2.LINE_AA)
	for (x0, y0), (x1, y1) in zip(mkpts0, mkpts1):
	# display line end-points as circles
	c = (230, 216, 132)
	cv2.circle(out, (x0, y0+sh), 2, c, -1, lineType=cv2.LINE_AA)
	cv2.circle(out, (x1 + margin + w, y1+sh), 2, c, -1, lineType=cv2.LINE_AA)

	# Big text.
	text = [
	f' ',
	f'#Matches {len(kpts0)}',
	f'#Matches {sum(data["inliers"][b_id])}' if robust_fitting else '',
	]
	sc = min(H / 640., 1.0)
	Ht = int(30 * sc) # text height
	txt_color_fg = (255, 255, 255) # white
	txt_color_bg = (0, 0, 0) # black
	for i, t in enumerate(text):
	cv2.putText(out, t, (int(8 * sc), Ht * (i + 1)), cv2.FONT_HERSHEY_DUPLEX, 1.0 * sc, txt_color_bg, 2, cv2.LINE_AA)
	cv2.putText(out, t, (int(8 * sc), Ht * (i + 1)), cv2.FONT_HERSHEY_DUPLEX, 1.0 * sc, txt_color_fg, 1, cv2.LINE_AA)

	fingerprint = [
	'Dataset: {}'.format(data['dataset_name'][b_id]),
	'Scene ID: {}'.format(data['scene_id'][b_id]),
	'Pair ID: {}'.format(data['pair_id'][b_id]),
	'co-visible: {:.4f}/{:.4f}'.format(data['covisible0'],
	data['covisible1']),
	'Image sizes: {} - {}'.format(
	tuple(reversed(data['imsize0'][b_id])) if transpose and isinstance(data['imsize0'][b_id], (list, tuple, np.ndarray)) and len(data['imsize0'][b_id]) >= 2 else data['imsize0'][b_id],
	tuple(reversed(data['imsize1'][b_id])) if transpose and isinstance(data['imsize1'][b_id], (list, tuple, np.ndarray)) and len(data['imsize1'][b_id]) >= 2 else data['imsize1'][b_id]),
	'Pair names: {}:{}'.format(data['pair_names'][0].split('/')[-1],
	data['pair_names'][1].split('/')[-1]),
	'Rand Scale: {} - {}'.format(data['rands0'],
	data['rands1']),
	'Offset: {} - {}'.format(data['offset0'].cpu().numpy(),
	data['offset1'].cpu().numpy()),
	'Fliped: {} - {}'.format(data['hflip0'],
	data['hflip1']),
	'Transposed: {}'.format(transpose)
	]
	sc = min(H / 1280., 1.0)
	Ht = int(18 * sc) # text height
	txt_color_fg = (255, 255, 255) # white
	txt_color_bg = (0, 0, 0) # black
	for i, t in enumerate(reversed(fingerprint)):
	cv2.putText(out, t, (int(8 * sc), int(H - Ht * (i + .6))), cv2.FONT_HERSHEY_SIMPLEX, .5 * sc, txt_color_bg, 2, cv2.LINE_AA)
	cv2.putText(out, t, (int(8 * sc), int(H - Ht * (i + .6))), cv2.FONT_HERSHEY_SIMPLEX, .5 * sc, txt_color_fg, 1, cv2.LINE_AA)

	return out[h+margin:]


	def eudist(a, b):
	aa = np.sum(a ** 2, axis=-1, keepdims=True)
	bb = np.sum(b ** 2, axis=-1, keepdims=True).T
	cc = a @ b.T
	dist = aa + bb - 2*cc
	return dist


	def covision(kpts, size):
	return (kpts[:, 0].max() - kpts[:, 0].min()) * \
	(kpts[:, 1].max() - kpts[:, 1].min()) / \
	(size[0] * size[1] + 1e-8)


	view = lambda x: x.view([('', x.dtype)] * x.shape[1])


	def intersected(x, y):
	intersected_ = np.intersect1d(view(x), view(y))
	z = intersected_.view(x.dtype).reshape(-1, x.shape[1])
	return z


	def imread_color(path, augment_fn=None, read_size=None, source=None):
	if augment_fn is None:
	image = cv2.imread(str(path), cv2.IMREAD_COLOR) if source is None else source
	image = cv2.resize(image, read_size) if read_size is not None else image
	image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) if source is None else image
	else:
	image = cv2.imread(str(path), cv2.IMREAD_COLOR) if source is None else source
	image = cv2.resize(image, read_size) if read_size is not None else image
	image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) if source is None else image
	image = augment_fn(image)
	return image # (h, w)


	def get_resized_wh(w, h, resize, aug_prob):
	nh, nw = resize
	sh, sw = nh / h, nw / w
	# scale = min(sh, sw)
	scale = random.choice([sh, sw]) if aug_prob != 1.0 else min(sh, sw)
	w_new, h_new = int(round(wscale)), int(round(hscale))
	return w_new, h_new


	def pad_bottom_right(inp, pad_size, ret_mask=False):
	mask = None
	if inp.ndim == 2:
	padded = np.zeros((pad_size[0], pad_size[1]), dtype=inp.dtype)
	padded[:inp.shape[0], :inp.shape[1]] = inp
	elif inp.ndim == 3:
	padded = np.zeros((pad_size[0], pad_size[1], inp.shape[-1]), dtype=inp.dtype)
	padded[:inp.shape[0], :inp.shape[1]] = inp
	else:
	raise NotImplementedError()

	if ret_mask:
	mask = np.zeros((pad_size[0], pad_size[1]), dtype=bool)
	mask[:inp.shape[0], :inp.shape[1]] = True

	return padded, mask


	def read_images(path, max_resize, df=None, padding=True, augment_fn=None, aug_prob=0.0, flip_prob=1.0,
	is_left=None, upper_cornor=None, read_size=None, image=None):
	"""
	Args:
	path: string
	max_resize (int): max image size after resied
	df (int, optional): image size division factor.
	NOTE: this will change the final image size after img_resize
	padding (bool): If set to 'True', zero-pad resized images to squared size.
	augment_fn (callable, optional): augments images with pre-defined visual effects
	aug_prob (float, optional): probability of applying augment_fn
	flip_prob (float, optional): probability of flipping images
	is_left (bool, optional): if set to 'True', it is left image, otherwise is right image
	upper_cornor (tuple, optional): upper left corner of the image
	read_size (int, optional): read image size
	image (callable, optional): input image
	Returns:
	image (torch.tensor): (1, h, w)
	mask (torch.tensor): (h, w)
	scale (torch.tensor): [w/w_new, h/h_new]
	"""
	# read image
	assert max_resize is not None
	assert isinstance(max_resize, list)
	if len(max_resize) == 1: max_resize = max_resize * 2

	w_new, h_new = get_divisible_wh(max_resize[0], max_resize[1], df)
	max_resize = [h_new, w_new]

	image = imread_color(path, augment_fn, read_size, image) # (h,w,3) image is RGB

	# resize image
	w, h = image.shape[1], image.shape[0]
	if (h > max_resize[0]) or (w > max_resize[1]):
	w_new, h_new = get_resized_wh(w, h, max_resize, aug_prob) # make max(w, h) to max_size
	else:
	w_new, h_new = w, h

	# random resize
	if random.uniform(0, 1) > aug_prob:
	# random rescale
	ratio = max(h / max_resize[0], w / max_resize[1])
	if type(is_left) == bool:
	if is_left:
	low, upper = (0.6 / ratio, 1.0 / ratio) if ratio < 1.0 else (0.6, 1.0)
	else:
	low, upper = (1.0 / ratio, 1.4 / ratio) if ratio < 1.0 else (1.0, 1.4)
	else:
	low, upper = (0.6 / ratio, 1.4 / ratio) if ratio < 1.0 else (0.6, 1.4)
	if not is_left and upper_cornor is not None:
	corner = upper_cornor[2:]
	upper = min(upper, min(max_resize[0]/corner[1], max_resize[1]/corner[0]))
	rands = random.uniform(low, upper)
	w_new, h_new = map(lambda x: x*rands, [w_new, h_new])
	w_new, h_new = get_divisible_wh(w_new, h_new, df) # make image divided by df and must <= max_size
	else:
	rands = 1
	w_new, h_new = get_divisible_wh(w_new, h_new, df)
	# width, height = w_new, h_new
	# h_start = w_start = 0

	if upper_cornor is not None:
	upper_cornor = upper_cornor[:2]

	# random crop
	if h_new > max_resize[0]:
	height = max_resize[0]
	h_start = int(random.uniform(0, 1) * (h_new - max_resize[0]))
	if upper_cornor is not None:
	h_start = min(h_start, math.floor(upper_cornor[1]*(h_new/h)))
	else:
	height = h_new
	h_start = 0

	if w_new > max_resize[1]:
	width = max_resize[1]
	w_start = int(random.uniform(0, 1) * (w_new - max_resize[1]))
	if upper_cornor is not None:
	w_start = min(w_start, math.floor(upper_cornor[0]*(w_new/w)))
	else:
	width = w_new
	w_start = 0

	w_new, h_new = map(int, [w_new, h_new])
	width, height = map(int, [width, height])

	image = cv2.resize(image, (w_new, h_new)) # (w',h',3)
	image = image[h_start:h_start+height, w_start:w_start+width]

	scale = [w / w_new, h / h_new]
	offset = [w_start, h_start]

	# vertical flip
	if random.uniform(0, 1) > flip_prob:
	hflip = F.hflip_cv2 if image.ndim == 3 and image.shape[2] > 1 and image.dtype == np.uint8 else F.hflip
	image = hflip(image)
	image = F.vflip(image)
	hflip = True
	vflip = True
	else:
	hflip = False
	vflip = False

	gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)

	# padding
	mask = None
	if padding:
	image, _ = pad_bottom_right(image, max_resize, ret_mask=False)
	gray, mask = pad_bottom_right(gray, max_resize, ret_mask=True)
	mask = torch.from_numpy(mask)

	gray = torch.from_numpy(gray).float()[None] / 255 # (1,h,w)
	image = torch.from_numpy(image).float() / 255 # (h,w,3)
	image = image.permute(2, 0, 1) # (3,h,w)

	offset = torch.tensor(offset, dtype=torch.float)
	scale = torch.tensor(scale, dtype=torch.float)
	resize = [height, width]

	return gray, image, scale, rands, offset, hflip, vflip, resize, mask