import os import sys sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__)))) from eval.video_depth.tools import depth_evaluation, group_by_directory import numpy as np import cv2 from tqdm import tqdm import glob from PIL import Image import argparse import json from eval.video_depth.metadata import dataset_metadata def get_args_parser(): parser = argparse.ArgumentParser() parser.add_argument( "--output_dir", type=str, default="", help="value for outdir", ) parser.add_argument( "--eval_dataset", type=str, default="nyu", choices=list(dataset_metadata.keys()) ) parser.add_argument( "--align", type=str, default="scale&shift", choices=["scale&shift", "scale", "metric"], ) return parser def main(args): if args.eval_dataset == "sintel": TAG_FLOAT = 202021.25 def depth_read(filename): """Read depth data from file, return as numpy array.""" f = open(filename, "rb") check = np.fromfile(f, dtype=np.float32, count=1)[0] assert ( check == TAG_FLOAT ), " depth_read:: Wrong tag in flow file (should be: {0}, is: {1}). Big-endian machine? ".format( TAG_FLOAT, check ) width = np.fromfile(f, dtype=np.int32, count=1)[0] height = np.fromfile(f, dtype=np.int32, count=1)[0] size = width * height assert ( width > 0 and height > 0 and size > 1 and size < 100000000 ), " depth_read:: Wrong input size (width = {0}, height = {1}).".format( width, height ) depth = np.fromfile(f, dtype=np.float32, count=-1).reshape((height, width)) return depth pred_pathes = glob.glob( f"{args.output_dir}/*/frame_*.npy" ) # TODO: update the path to your prediction pred_pathes = sorted(pred_pathes) if len(pred_pathes) > 643: full = True else: full = False if full: depth_pathes = glob.glob(f"data/sintel/training/depth/*/*.dpt") depth_pathes = sorted(depth_pathes) else: seq_list = [ "alley_2", "ambush_4", "ambush_5", "ambush_6", "cave_2", "cave_4", "market_2", "market_5", "market_6", "shaman_3", "sleeping_1", "sleeping_2", "temple_2", "temple_3", ] depth_pathes_folder = [ f"data/sintel/training/depth/{seq}" for seq in seq_list ] depth_pathes = [] for depth_pathes_folder_i in depth_pathes_folder: depth_pathes += glob.glob(depth_pathes_folder_i + "/*.dpt") depth_pathes = sorted(depth_pathes) def get_video_results(): grouped_pred_depth = group_by_directory(pred_pathes) grouped_gt_depth = group_by_directory(depth_pathes) gathered_depth_metrics = [] for key in tqdm(grouped_pred_depth.keys()): pd_pathes = grouped_pred_depth[key] gt_pathes = grouped_gt_depth[key.replace("_pred_depth", "")] gt_depth = np.stack( [depth_read(gt_path) for gt_path in gt_pathes], axis=0 ) pr_depth = np.stack( [ cv2.resize( np.load(pd_path), (gt_depth.shape[2], gt_depth.shape[1]), interpolation=cv2.INTER_CUBIC, ) for pd_path in pd_pathes ], axis=0, ) # for depth eval, set align_with_lad2=False to use median alignment; set align_with_lad2=True to use scale&shift alignment if args.align == "scale&shift": depth_results, error_map, depth_predict, depth_gt = ( depth_evaluation( pr_depth, gt_depth, max_depth=70, align_with_lad2=True, use_gpu=True, post_clip_max=70, ) ) elif args.align == "scale": depth_results, error_map, depth_predict, depth_gt = ( depth_evaluation( pr_depth, gt_depth, max_depth=70, align_with_scale=True, use_gpu=True, post_clip_max=70, ) ) elif args.align == "metric": depth_results, error_map, depth_predict, depth_gt = ( depth_evaluation( pr_depth, gt_depth, max_depth=70, metric_scale=True, use_gpu=True, post_clip_max=70, ) ) gathered_depth_metrics.append(depth_results) depth_log_path = f"{args.output_dir}/result_{args.align}.json" average_metrics = { key: np.average( [metrics[key] for metrics in gathered_depth_metrics], weights=[ metrics["valid_pixels"] for metrics in gathered_depth_metrics ], ) for key in gathered_depth_metrics[0].keys() if key != "valid_pixels" } print("Average depth evaluation metrics:", average_metrics) with open(depth_log_path, "w") as f: f.write(json.dumps(average_metrics)) get_video_results() elif args.eval_dataset == "bonn": def depth_read(filename): # loads depth map D from png file # and returns it as a numpy array depth_png = np.asarray(Image.open(filename)) # make sure we have a proper 16bit depth map here.. not 8bit! assert np.max(depth_png) > 255 depth = depth_png.astype(np.float64) / 5000.0 depth[depth_png == 0] = -1.0 return depth seq_list = ["balloon2", "crowd2", "crowd3", "person_tracking2", "synchronous"] img_pathes_folder = [ f"data/bonn/rgbd_bonn_dataset/rgbd_bonn_{seq}/rgb_110/*.png" for seq in seq_list ] img_pathes = [] for img_pathes_folder_i in img_pathes_folder: img_pathes += glob.glob(img_pathes_folder_i) img_pathes = sorted(img_pathes) depth_pathes_folder = [ f"data/bonn/rgbd_bonn_dataset/rgbd_bonn_{seq}/depth_110/*.png" for seq in seq_list ] depth_pathes = [] for depth_pathes_folder_i in depth_pathes_folder: depth_pathes += glob.glob(depth_pathes_folder_i) depth_pathes = sorted(depth_pathes) pred_pathes = glob.glob( f"{args.output_dir}/*/frame*.npy" ) # TODO: update the path to your prediction pred_pathes = sorted(pred_pathes) def get_video_results(): grouped_pred_depth = group_by_directory(pred_pathes) grouped_gt_depth = group_by_directory(depth_pathes, idx=-2) gathered_depth_metrics = [] for key in tqdm(grouped_gt_depth.keys()): pd_pathes = grouped_pred_depth[key[10:]] gt_pathes = grouped_gt_depth[key] gt_depth = np.stack( [depth_read(gt_path) for gt_path in gt_pathes], axis=0 ) pr_depth = np.stack( [ cv2.resize( np.load(pd_path), (gt_depth.shape[2], gt_depth.shape[1]), interpolation=cv2.INTER_CUBIC, ) for pd_path in pd_pathes ], axis=0, ) # for depth eval, set align_with_lad2=False to use median alignment; set align_with_lad2=True to use scale&shift alignment if args.align == "scale&shift": depth_results, error_map, depth_predict, depth_gt = ( depth_evaluation( pr_depth, gt_depth, max_depth=70, align_with_lad2=True, use_gpu=True, ) ) elif args.align == "scale": depth_results, error_map, depth_predict, depth_gt = ( depth_evaluation( pr_depth, gt_depth, max_depth=70, align_with_scale=True, use_gpu=True, ) ) elif args.align == "metric": depth_results, error_map, depth_predict, depth_gt = ( depth_evaluation( pr_depth, gt_depth, max_depth=70, metric_scale=True, use_gpu=True, ) ) gathered_depth_metrics.append(depth_results) # seq_len = gt_depth.shape[0] # error_map = error_map.reshape(seq_len, -1, error_map.shape[-1]).cpu() # error_map_colored = colorize(error_map, range=(error_map.min(), error_map.max()), append_cbar=True) # ImageSequenceClip([x for x in (error_map_colored.numpy()*255).astype(np.uint8)], fps=10).write_videofile(f'{args.output_dir}/errormap_{key}_{args.align}.mp4', fps=10) depth_log_path = f"{args.output_dir}/result_{args.align}.json" average_metrics = { key: np.average( [metrics[key] for metrics in gathered_depth_metrics], weights=[ metrics["valid_pixels"] for metrics in gathered_depth_metrics ], ) for key in gathered_depth_metrics[0].keys() if key != "valid_pixels" } print("Average depth evaluation metrics:", average_metrics) with open(depth_log_path, "w") as f: f.write(json.dumps(average_metrics)) get_video_results() elif args.eval_dataset == "kitti": def depth_read(filename): # loads depth map D from png file # and returns it as a numpy array, # for details see readme.txt img_pil = Image.open(filename) depth_png = np.array(img_pil, dtype=int) # make sure we have a proper 16bit depth map here.. not 8bit! assert np.max(depth_png) > 255 depth = depth_png.astype(float) / 256.0 depth[depth_png == 0] = -1.0 return depth depth_pathes = glob.glob( "data/kitti/depth_selection/val_selection_cropped/groundtruth_depth_gathered/*/*.png" ) depth_pathes = sorted(depth_pathes) pred_pathes = glob.glob( f"{args.output_dir}/*/frame_*.npy" ) # TODO: update the path to your prediction pred_pathes = sorted(pred_pathes) def get_video_results(): grouped_pred_depth = group_by_directory(pred_pathes) grouped_gt_depth = group_by_directory(depth_pathes) gathered_depth_metrics = [] for key in tqdm(grouped_pred_depth.keys()): pd_pathes = grouped_pred_depth[key] gt_pathes = grouped_gt_depth[key] gt_depth = np.stack( [depth_read(gt_path) for gt_path in gt_pathes], axis=0 ) pr_depth = np.stack( [ cv2.resize( np.load(pd_path), (gt_depth.shape[2], gt_depth.shape[1]), interpolation=cv2.INTER_CUBIC, ) for pd_path in pd_pathes ], axis=0, ) # for depth eval, set align_with_lad2=False to use median alignment; set align_with_lad2=True to use scale&shift alignment if args.align == "scale&shift": depth_results, error_map, depth_predict, depth_gt = ( depth_evaluation( pr_depth, gt_depth, max_depth=None, align_with_lad2=True, use_gpu=True, ) ) elif args.align == "scale": depth_results, error_map, depth_predict, depth_gt = ( depth_evaluation( pr_depth, gt_depth, max_depth=None, align_with_scale=True, use_gpu=True, ) ) elif args.align == "metric": depth_results, error_map, depth_predict, depth_gt = ( depth_evaluation( pr_depth, gt_depth, max_depth=None, metric_scale=True, use_gpu=True, ) ) gathered_depth_metrics.append(depth_results) depth_log_path = f"{args.output_dir}/result_{args.align}.json" average_metrics = { key: np.average( [metrics[key] for metrics in gathered_depth_metrics], weights=[ metrics["valid_pixels"] for metrics in gathered_depth_metrics ], ) for key in gathered_depth_metrics[0].keys() if key != "valid_pixels" } print("Average depth evaluation metrics:", average_metrics) with open(depth_log_path, "w") as f: f.write(json.dumps(average_metrics)) get_video_results() if __name__ == "__main__": args = get_args_parser() args = args.parse_args() main(args)