File size: 3,109 Bytes
19c4ddf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
import itertools
import json
import zipfile
from typing import BinaryIO, List, Tuple

import numpy as np
from PIL import Image

from shap_e.rendering.view_data import Camera, ProjectiveCamera, ViewData


class BlenderViewData(ViewData):
    """
    Interact with a dataset zipfile exported by view_data.py.
    """

    def __init__(self, f_obj: BinaryIO):
        self.zipfile = zipfile.ZipFile(f_obj, mode="r")
        self.infos = []
        with self.zipfile.open("info.json", "r") as f:
            self.info = json.load(f)
        self.channels = list(self.info.get("channels", "RGBAD"))
        assert set("RGBA").issubset(
            set(self.channels)
        ), "The blender output should at least have RGBA images."
        names = set(x.filename for x in self.zipfile.infolist())
        for i in itertools.count():
            name = f"{i:05}.json"
            if name not in names:
                break
            with self.zipfile.open(name, "r") as f:
                self.infos.append(json.load(f))

    @property
    def num_views(self) -> int:
        return len(self.infos)

    @property
    def channel_names(self) -> List[str]:
        return list(self.channels)

    def load_view(self, index: int, channels: List[str]) -> Tuple[Camera, np.ndarray]:
        for ch in channels:
            if ch not in self.channel_names:
                raise ValueError(f"unsupported channel: {ch}")

        # Gather (a superset of) the requested channels.
        channel_map = {}
        if any(x in channels for x in "RGBA"):
            with self.zipfile.open(f"{index:05}.png", "r") as f:
                rgba = np.array(Image.open(f)).astype(np.float32) / 255.0
                channel_map.update(zip("RGBA", rgba.transpose([2, 0, 1])))
        if "D" in channels:
            with self.zipfile.open(f"{index:05}_depth.png", "r") as f:
                # Decode a 16-bit fixed-point number.
                fp = np.array(Image.open(f))
                inf_dist = fp == 0xFFFF
                channel_map["D"] = np.where(
                    inf_dist,
                    np.inf,
                    self.infos[index]["max_depth"] * (fp.astype(np.float32) / 65536),
                )
        if "MatAlpha" in channels:
            with self.zipfile.open(f"{index:05}_MatAlpha.png", "r") as f:
                channel_map["MatAlpha"] = np.array(Image.open(f)).astype(np.float32) / 65536

        # The order of channels is user-specified.
        combined = np.stack([channel_map[k] for k in channels], axis=-1)

        h, w, _ = combined.shape
        return self.camera(index, w, h), combined

    def camera(self, index: int, width: int, height: int) -> ProjectiveCamera:
        info = self.infos[index]
        return ProjectiveCamera(
            origin=np.array(info["origin"], dtype=np.float32),
            x=np.array(info["x"], dtype=np.float32),
            y=np.array(info["y"], dtype=np.float32),
            z=np.array(info["z"], dtype=np.float32),
            width=width,
            height=height,
            x_fov=info["x_fov"],
            y_fov=info["y_fov"],
        )