Spaces:
Runtime error
Runtime error
File size: 6,597 Bytes
19c4ddf |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 |
from abc import ABC, abstractmethod
from dataclasses import dataclass
from typing import Dict, List, Tuple
import numpy as np
@dataclass
class Camera(ABC):
"""
An object describing how a camera corresponds to pixels in an image.
"""
@abstractmethod
def image_coords(self) -> np.ndarray:
"""
:return: ([self.height, self.width, 2]).reshape(self.height * self.width, 2) image coordinates
"""
@abstractmethod
def camera_rays(self, coords: np.ndarray) -> np.ndarray:
"""
For every (x, y) coordinate in a rendered image, compute the ray of the
corresponding pixel.
:param coords: an [N x 2] integer array of 2D image coordinates.
:return: an [N x 2 x 3] array of [2 x 3] (origin, direction) tuples.
The direction should always be unit length.
"""
def depth_directions(self, coords: np.ndarray) -> np.ndarray:
"""
For every (x, y) coordinate in a rendered image, get the direction that
corresponds to "depth" in an RGBD rendering.
This may raise an exception if there is no "D" channel in the
corresponding ViewData.
:param coords: an [N x 2] integer array of 2D image coordinates.
:return: an [N x 3] array of normalized depth directions.
"""
_ = coords
raise NotImplementedError
@abstractmethod
def center_crop(self) -> "Camera":
"""
Creates a new camera with the same intrinsics and direction as this one,
but with a center crop to a square of the smaller dimension.
"""
@abstractmethod
def resize_image(self, width: int, height: int) -> "Camera":
"""
Creates a new camera with the same intrinsics and direction as this one,
but with resized image dimensions.
"""
@abstractmethod
def scale_scene(self, factor: float) -> "Camera":
"""
Creates a new camera with the same intrinsics and direction as this one,
but with the scene rescaled by the given factor.
"""
@dataclass
class ProjectiveCamera(Camera):
"""
A Camera implementation for a standard pinhole camera.
The camera rays shoot away from the origin in the z direction, with the x
and y directions corresponding to the positive horizontal and vertical axes
in image space.
"""
origin: np.ndarray
x: np.ndarray
y: np.ndarray
z: np.ndarray
width: int
height: int
x_fov: float
y_fov: float
def image_coords(self) -> np.ndarray:
ind = np.arange(self.width * self.height)
coords = np.stack([ind % self.width, ind // self.width], axis=1).astype(np.float32)
return coords
def camera_rays(self, coords: np.ndarray) -> np.ndarray:
fracs = (coords / (np.array([self.width, self.height], dtype=np.float32) - 1)) * 2 - 1
fracs = fracs * np.tan(np.array([self.x_fov, self.y_fov]) / 2)
directions = self.z + self.x * fracs[:, :1] + self.y * fracs[:, 1:]
directions = directions / np.linalg.norm(directions, axis=-1, keepdims=True)
return np.stack([np.broadcast_to(self.origin, directions.shape), directions], axis=1)
def depth_directions(self, coords: np.ndarray) -> np.ndarray:
return np.tile((self.z / np.linalg.norm(self.z))[None], [len(coords), 1])
def resize_image(self, width: int, height: int) -> "ProjectiveCamera":
"""
Creates a new camera for the resized view assuming the aspect ratio does not change.
"""
assert width * self.height == height * self.width, "The aspect ratio should not change."
return ProjectiveCamera(
origin=self.origin,
x=self.x,
y=self.y,
z=self.z,
width=width,
height=height,
x_fov=self.x_fov,
y_fov=self.y_fov,
)
def center_crop(self) -> "ProjectiveCamera":
"""
Creates a new camera for the center-cropped view
"""
size = min(self.width, self.height)
fov = min(self.x_fov, self.y_fov)
return ProjectiveCamera(
origin=self.origin,
x=self.x,
y=self.y,
z=self.z,
width=size,
height=size,
x_fov=fov,
y_fov=fov,
)
def scale_scene(self, factor: float) -> "ProjectiveCamera":
"""
Creates a new camera with the same intrinsics and direction as this one,
but with the camera frame rescaled by the given factor.
"""
return ProjectiveCamera(
origin=self.origin * factor,
x=self.x,
y=self.y,
z=self.z,
width=self.width,
height=self.height,
x_fov=self.x_fov,
y_fov=self.y_fov,
)
class ViewData(ABC):
"""
A collection of rendered camera views of a scene or object.
This is a generalization of a NeRF dataset, since NeRF datasets only encode
RGB or RGBA data, whereas this dataset supports arbitrary channels.
"""
@property
@abstractmethod
def num_views(self) -> int:
"""
The number of rendered views.
"""
@property
@abstractmethod
def channel_names(self) -> List[str]:
"""
Get all of the supported channels available for the views.
This can be arbitrary, but there are some standard names:
"R", "G", "B", "A" (alpha), and "D" (depth).
"""
@abstractmethod
def load_view(self, index: int, channels: List[str]) -> Tuple[Camera, np.ndarray]:
"""
Load the given channels from the view at the given index.
:return: a tuple (camera_view, data), where data is a float array of
shape [height x width x num_channels].
"""
class MemoryViewData(ViewData):
"""
A ViewData that is implemented in memory.
"""
def __init__(self, channels: Dict[str, np.ndarray], cameras: List[Camera]):
assert all(v.shape[0] == len(cameras) for v in channels.values())
self.channels = channels
self.cameras = cameras
@property
def num_views(self) -> int:
return len(self.cameras)
@property
def channel_names(self) -> List[str]:
return list(self.channels.keys())
def load_view(self, index: int, channels: List[str]) -> Tuple[Camera, np.ndarray]:
outputs = [self.channels[channel][index] for channel in channels]
return self.cameras[index], np.stack(outputs, axis=-1)
|