Spaces:

Realcat
/

image-matching-webui

Running

App Files Files Community

image-matching-webui / imcui /third_party /gim /datasets /eth3d /utils.py

Realcat

update: major change

499e141 6 months ago

raw

history blame

3.71 kB

	# -- coding: utf-8 --
	# @Author : xuelun

	import cv2
	import math
	import torch

	import numpy as np

	from datasets.utils import imread_color, get_resized_wh


	def World_to_Camera(image_pose):
	qvec = image_pose[:4]
	qvec = qvec / np.linalg.norm(qvec)
	w, x, y, z = qvec

	R = np.array([
	[
	1 - 2 * y * y - 2 * z * z,
	2 * x * y - 2 * z * w,
	2 * x * z + 2 * y * w
	],
	[
	2 * x * y + 2 * z * w,
	1 - 2 * x * x - 2 * z * z,
	2 * y * z - 2 * x * w
	],
	[
	2 * x * z - 2 * y * w,
	2 * y * z + 2 * x * w,
	1 - 2 * x * x - 2 * y * y
	]
	])

	t = image_pose[4:7]

	# World-to-Camera pose
	current_pose = np.zeros([4, 4])
	current_pose[: 3, : 3] = R
	current_pose[: 3, 3] = t
	current_pose[3, 3] = 1
	return current_pose


	def read_depth(filename):
	# read 4-byte float from file
	with open(filename, 'rb') as f:
	depth = np.fromfile(f, dtype=np.float32)
	return depth


	def pad_bottom_right(inp, pad_size, ret_mask=False):
	h = pad_size[0]
	h = math.ceil(h / 8) * 8
	pad_size = (h, pad_size[1])
	# assert isinstance(pad_size, int) and pad_size >= max(inp.shape[-2:]), f"{pad_size} < {max(inp.shape[-2:])}"
	mask = None
	if inp.ndim == 2:
	padded = np.zeros((pad_size[0], pad_size[1]), dtype=inp.dtype)
	padded[:inp.shape[0], :inp.shape[1]] = inp
	elif inp.ndim == 3:
	padded = np.zeros((pad_size[0], pad_size[1], inp.shape[-1]), dtype=inp.dtype)
	padded[:inp.shape[0], :inp.shape[1]] = inp
	else:
	raise NotImplementedError()

	if ret_mask:
	mask = np.zeros((pad_size[0], pad_size[1]), dtype=bool)
	mask[:inp.shape[0], :inp.shape[1]] = True

	return padded, mask


	def read_images(path, max_resize, df, padding, augment_fn=None, image=None):
	"""
	Args:
	path: string
	max_resize (int): max image size after resied
	df (int, optional): image size division factor.
	NOTE: this will change the final image size after img_resize
	padding (bool): If set to 'True', zero-pad resized images to squared size.
	augment_fn (callable, optional): augments images with pre-defined visual effects
	image: RGB image
	Returns:
	image (torch.tensor): (1, h, w)
	mask (torch.tensor): (h, w)
	scale (torch.tensor): [w/w_new, h/h_new]
	"""
	# read image
	assert max_resize is not None

	image = imread_color(path, augment_fn) if image is None else image # (w,h,3) image is RGB
	gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)

	# resize image
	w, h = image.shape[1], image.shape[0]
	if max(w, h) > max_resize:
	w_new, h_new = get_resized_wh(w, h, max_resize) # make max(w, h) to max_size
	else:
	w_new, h_new = w, h

	# w_new, h_new = get_divisible_wh(w_new, h_new, df) # make image divided by df and must <= max_size
	image = cv2.resize(image, (w_new, h_new)) # (w',h',3)
	gray = cv2.resize(gray, (w_new, h_new)) # (w',h',3)
	scale = torch.tensor([w / w_new, h / h_new], dtype=torch.float)

	# padding
	mask = None
	if padding:
	image, _ = pad_bottom_right(image, (int(max_resize/1.5), max_resize), ret_mask=False)
	gray, mask = pad_bottom_right(gray, (int(max_resize/1.5), max_resize), ret_mask=True)
	mask = torch.from_numpy(mask)

	gray = torch.from_numpy(gray).float()[None] / 255 # (1,h,w)
	image = torch.from_numpy(image).float() / 255 # (h,w,3)
	image = image.permute(2,0,1) # (3,h,w)

	resize = [h_new, w_new]

	return gray, image, scale, resize, mask