Metric3D / training /mono /datasets /eth3d_dataset.py
zach
initial commit based on github repo
3ef1661
raw
history blame
4.13 kB
import os
import json
import torch
import torchvision.transforms as transforms
import os.path
import numpy as np
import cv2
from torch.utils.data import Dataset
import random
from .__base_dataset__ import BaseDataset
class ETH3DDataset(BaseDataset):
def __init__(self, cfg, phase, **kwargs):
super(ETH3DDataset, self).__init__(
cfg=cfg,
phase=phase,
**kwargs)
self.metric_scale = cfg.metric_scale
def __getitem__(self, idx):
anno = self.annotations['files'][idx]
curr_rgb_path = os.path.join(self.data_root, anno['rgb_path'])
curr_depth_path = os.path.join(self.depth_root, anno['depth_path'])
meta_data = self.load_meta_data(anno)
ori_curr_intrinsic = [2000, 2000, 3024, 2016] #meta_data['cam_in']
curr_rgb = cv2.imread(curr_rgb_path) # [r, g, b]
with open(curr_depth_path, 'r') as f:
imgfile = np.fromfile(f, np.float32)
curr_depth = imgfile.reshape((4032, 6048))
curr_depth[curr_depth>100] = 0
#curr_rgb, curr_depth = self.load_rgb_depth(curr_rgb_path, curr_depth_path)
# curr_rgb = cv2.resize(curr_rgb, dsize=(3024, 2016), interpolation=cv2.INTER_LINEAR)
# curr_depth = cv2.resize(curr_depth, dsize=(3024, 2016), interpolation=cv2.INTER_LINEAR)
# ori_curr_intrinsic = [i//2 for i in ori_curr_intrinsic]
ori_h, ori_w, _ = curr_rgb.shape
# create camera model
curr_cam_model = self.create_cam_model(curr_rgb.shape[0], curr_rgb.shape[1], ori_curr_intrinsic)
# load tmpl rgb info
# tmpl_annos = self.load_tmpl_annos(anno, curr_rgb, meta_data)
# tmpl_rgb = tmpl_annos['tmpl_rgb_list'] # list of reference rgbs
transform_paras = dict()
rgbs, depths, intrinsics, cam_models, _, other_labels, transform_paras = self.img_transforms(
images=[curr_rgb, ],
labels=[curr_depth, ],
intrinsics=[ori_curr_intrinsic,],
cam_models=[curr_cam_model, ],
transform_paras=transform_paras)
# depth in original size
depth_out = self.clip_depth(curr_depth) * self.depth_range[1]
filename = os.path.basename(anno['rgb_path'])
curr_intrinsic_mat = self.intrinsics_list2mat(intrinsics[0])
pad = transform_paras['pad'] if 'pad' in transform_paras else [0,0,0,0]
scale_ratio = transform_paras['label_scale_factor'] if 'label_scale_factor' in transform_paras else 1.0
cam_models_stacks = [
torch.nn.functional.interpolate(cam_models[0][None, :, :, :], size=(cam_models[0].shape[1]//i, cam_models[0].shape[2]//i), mode='bilinear', align_corners=False).squeeze()
for i in [2, 4, 8, 16, 32]
]
raw_rgb = torch.from_numpy(curr_rgb)
data = dict(input=rgbs[0],
target=depth_out,
intrinsic=curr_intrinsic_mat,
filename=filename,
dataset=self.data_name,
cam_model=cam_models_stacks,
ref_input=rgbs[1:],
tmpl_flg=False,
pad=pad,
scale=scale_ratio,
raw_rgb=raw_rgb,
normal = np.zeros_like(curr_rgb.transpose((2,0,1))),
#stereo_depth=torch.zeros_like(depth_out)
)
return data
def process_depth(self, depth):
depth[depth>65500] = 0
depth /= self.metric_scale
return depth
if __name__ == '__main__':
from mmcv.utils import Config
cfg = Config.fromfile('mono/configs/Apolloscape_DDAD/convnext_base.cascade.1m.sgd.mae.py')
dataset_i = NYUDataset(cfg['Apolloscape'], 'train', **cfg.data_basic)
print(dataset_i)