Spaces:
				
			
			
	
			
			
		Runtime error
		
	
	
	
			
			
	
	
	
	
		
		
		Runtime error
		
	| # | |
| # Toyota Motor Europe NV/SA and its affiliated companies retain all intellectual | |
| # property and proprietary rights in and to this software and related documentation. | |
| # Any commercial use, reproduction, disclosure or distribution of this software and | |
| # related documentation without an express license agreement from Toyota Motor Europe NV/SA | |
| # is strictly prohibited. | |
| # | |
| from typing import Tuple, Literal | |
| import torch | |
| import torch.nn.functional as F | |
| import math | |
| import numpy as np | |
| from scipy.spatial.transform import Rotation | |
| def align_cameras_to_axes( | |
| R: torch.Tensor, | |
| T: torch.Tensor, | |
| target_convention: Literal["opengl", "opencv"] = None, | |
| ): | |
| """align the averaged axes of cameras with the world axes. | |
| Args: | |
| R: rotation matrix (N, 3, 3) | |
| T: translation vector (N, 3) | |
| """ | |
| # The column vectors of R are the basis vectors of each camera. | |
| # We construct new bases by taking the mean directions of axes, then use Gram-Schmidt | |
| # process to make them orthonormal | |
| bases_c2w = gram_schmidt_orthogonalization(R.mean(0)) | |
| if target_convention == "opengl": | |
| bases_c2w[:, [1, 2]] *= -1 # flip y and z axes | |
| elif target_convention == "opencv": | |
| pass | |
| bases_w2c = bases_c2w.t() | |
| # convert the camera poses into the new coordinate system | |
| R = bases_w2c[None, ...] @ R | |
| T = bases_w2c[None, ...] @ T | |
| return R, T | |
| def convert_camera_convention(camera_convention_conversion: str, R: torch.Tensor, K: torch.Tensor, H: int, W: int): | |
| if camera_convention_conversion is not None: | |
| if camera_convention_conversion == "opencv->opengl": | |
| R[:, :3, [1, 2]] *= -1 | |
| # flip y of the principal point | |
| K[..., 1, 2] = H - K[..., 1, 2] | |
| elif camera_convention_conversion == "opencv->pytorch3d": | |
| R[:, :3, [0, 1]] *= -1 | |
| # flip x and y of the principal point | |
| K[..., 0, 2] = W - K[..., 0, 2] | |
| K[..., 1, 2] = H - K[..., 1, 2] | |
| elif camera_convention_conversion == "opengl->pytorch3d": | |
| R[:, :3, [0, 2]] *= -1 | |
| # flip x of the principal point | |
| K[..., 0, 2] = W - K[..., 0, 2] | |
| else: | |
| raise ValueError( | |
| f"Unknown camera coordinate conversion: {camera_convention_conversion}." | |
| ) | |
| return R, K | |
| def gram_schmidt_orthogonalization(M: torch.tensor): | |
| """conducting Gram-Schmidt process to transform column vectors into orthogonal bases | |
| Args: | |
| M: An matrix (num_rows, num_cols) | |
| Return: | |
| M: An matrix with orthonormal column vectors (num_rows, num_cols) | |
| """ | |
| num_rows, num_cols = M.shape | |
| for c in range(1, num_cols): | |
| M[:, [c - 1, c]] = F.normalize(M[:, [c - 1, c]], p=2, dim=0) | |
| M[:, [c]] -= M[:, :c] @ (M[:, :c].T @ M[:, [c]]) | |
| M[:, -1] = F.normalize(M[:, -1], p=2, dim=0) | |
| return M | |
| def projection_from_intrinsics(K: np.ndarray, image_size: Tuple[int], near: float=0.01, far:float=10, flip_y: bool=False, z_sign=-1): | |
| """ | |
| Transform points from camera space (x: right, y: up, z: out) to clip space (x: right, y: down, z: in) | |
| Args: | |
| K: Intrinsic matrix, (N, 3, 3) | |
| K = [[ | |
| [fx, 0, cx], | |
| [0, fy, cy], | |
| [0, 0, 1], | |
| ] | |
| ] | |
| image_size: (height, width) | |
| Output: | |
| proj = [[ | |
| [2*fx/w, 0.0, (w - 2*cx)/w, 0.0 ], | |
| [0.0, 2*fy/h, (h - 2*cy)/h, 0.0 ], | |
| [0.0, 0.0, z_sign*(far+near) / (far-near), -2*far*near / (far-near)], | |
| [0.0, 0.0, z_sign, 0.0 ] | |
| ] | |
| ] | |
| """ | |
| B = K.shape[0] | |
| h, w = image_size | |
| if K.shape[-2:] == (3, 3): | |
| fx = K[..., 0, 0] | |
| fy = K[..., 1, 1] | |
| cx = K[..., 0, 2] | |
| cy = K[..., 1, 2] | |
| elif K.shape[-1] == 4: | |
| # fx, fy, cx, cy = K[..., [0, 1, 2, 3]].split(1, dim=-1) | |
| fx = K[..., [0]] | |
| fy = K[..., [1]] | |
| cx = K[..., [2]] | |
| cy = K[..., [3]] | |
| else: | |
| raise ValueError(f"Expected K to be (N, 3, 3) or (N, 4) but got: {K.shape}") | |
| proj = np.zeros([B, 4, 4]) | |
| proj[:, 0, 0] = fx * 2 / w | |
| proj[:, 1, 1] = fy * 2 / h | |
| proj[:, 0, 2] = (w - 2 * cx) / w | |
| proj[:, 1, 2] = (h - 2 * cy) / h | |
| proj[:, 2, 2] = z_sign * (far+near) / (far-near) | |
| proj[:, 2, 3] = -2*far*near / (far-near) | |
| proj[:, 3, 2] = z_sign | |
| if flip_y: | |
| proj[:, 1, 1] *= -1 | |
| return proj | |
| class OrbitCamera: | |
| def __init__(self, W, H, r=2, fovy=60, znear=1e-8, zfar=10, convention: Literal["opengl", "opencv"]="opengl"): | |
| self.image_width = W | |
| self.image_height = H | |
| self.radius_default = r | |
| self.fovy_default = fovy | |
| self.znear = znear | |
| self.zfar = zfar | |
| self.convention = convention | |
| self.up = np.array([0, 1, 0], dtype=np.float32) | |
| self.reset() | |
| def reset(self): | |
| """ The internal state of the camera is based on the OpenGL convention, but | |
| properties are converted to the target convention when queried. | |
| """ | |
| self.rot = Rotation.from_matrix([[1, 0, 0], [0, 1, 0], [0, 0, 1]]) # OpenGL convention | |
| self.look_at = np.array([0, 0, 0], dtype=np.float32) # look at this point | |
| self.radius = self.radius_default # camera distance from center | |
| self.fovy = self.fovy_default | |
| if self.convention == "opencv": | |
| self.z_sign = 1 | |
| self.y_sign = 1 | |
| elif self.convention == "opengl": | |
| self.z_sign = -1 | |
| self.y_sign = -1 | |
| else: | |
| raise ValueError(f"Unknown convention: {self.convention}") | |
| def fovx(self): | |
| return self.fovy / self.image_height * self.image_width | |
| def intrinsics(self): | |
| focal = self.image_height / (2 * np.tan(np.radians(self.fovy) / 2)) | |
| return np.array([focal, focal, self.image_width // 2, self.image_height // 2]) | |
| def projection_matrix(self): | |
| return projection_from_intrinsics(self.intrinsics[None], (self.image_height, self.image_width), self.znear, self.zfar, z_sign=self.z_sign)[0] | |
| def world_view_transform(self): | |
| return np.linalg.inv(self.pose) # world2cam | |
| def full_proj_transform(self): | |
| return self.projection_matrix @ self.world_view_transform | |
| def pose(self): | |
| # first move camera to radius | |
| pose = np.eye(4, dtype=np.float32) | |
| pose[2, 3] += self.radius | |
| # rotate | |
| rot = np.eye(4, dtype=np.float32) | |
| rot[:3, :3] = self.rot.as_matrix() | |
| pose = rot @ pose | |
| # translate | |
| pose[:3, 3] -= self.look_at | |
| if self.convention == "opencv": | |
| pose[:, [1, 2]] *= -1 | |
| elif self.convention == "opengl": | |
| pass | |
| else: | |
| raise ValueError(f"Unknown convention: {self.convention}") | |
| return pose | |
| def orbit(self, dx, dy): | |
| # rotate along camera up/side axis! | |
| side = self.rot.as_matrix()[:3, 0] | |
| rotvec_x = self.up * np.radians(-0.3 * dx) | |
| rotvec_y = side * np.radians(-0.3 * dy) | |
| self.rot = Rotation.from_rotvec(rotvec_x) * Rotation.from_rotvec(rotvec_y) * self.rot | |
| def scale(self, delta): | |
| self.radius *= 1.1 ** (-delta) | |
| def pan(self, dx, dy, dz=0): | |
| # pan in camera coordinate system (careful on the sensitivity!) | |
| d = np.array([dx, -dy, dz]) # the y axis is flipped | |
| self.look_at += 2 * self.rot.as_matrix()[:3, :3] @ d * self.radius / self.image_height * math.tan(np.radians(self.fovy) / 2) | |
