Spaces:
Runtime error
Runtime error
File size: 6,400 Bytes
2df809d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 |
#!/usr/bin/env python3
"""
HOI4D Preprocessing Script
This script processes HOI4D data by:
1. Searching specific subdirectories for RGB and depth images.
2. Reading camera intrinsics from a .npy file (one per high-level scene).
3. Rescaling the RGB images and depth maps to a fixed output resolution
(e.g., 640x480) using the 'cropping' module.
4. Saving results (RGB, .npy depth, .npz camera intrinsics) in a new directory structure.
Usage:
python preprocess_hoi4d.py \
--root_dir /path/to/HOI4D_release \
--cam_root /path/to/camera_params \
--out_dir /path/to/processed_hoi4d
"""
import os
import glob
import cv2
import numpy as np
from PIL import Image
from tqdm import tqdm
from concurrent.futures import ProcessPoolExecutor
import argparse
import src.dust3r.datasets.utils.cropping as cropping
def parse_arguments():
"""
Parse command-line arguments for HOI4D preprocessing.
Returns:
argparse.Namespace: The parsed arguments.
"""
parser = argparse.ArgumentParser(
description="Preprocess HOI4D dataset by rescaling RGB and depth images."
)
parser.add_argument("--root_dir", required=True,
help="Path to the HOI4D_release directory.")
parser.add_argument("--cam_root", required=True,
help="Path to the directory containing camera intrinsics.")
parser.add_argument("--out_dir", required=True,
help="Path to the directory where processed files will be saved.")
parser.add_argument("--max_workers", type=int, default=None,
help="Number of parallel workers. Default uses half of available CPU cores.")
args = parser.parse_args()
return args
def process_image(args):
"""
Process a single image and depth map:
- Loads the image (using PIL) and depth (using OpenCV).
- Converts depth from mm to meters (divided by 1000).
- Rescales both using 'cropping.rescale_image_depthmap'.
- Saves the rescaled image (.png), depth (.npy), and camera intrinsics (.npz).
Args:
args (tuple): A tuple of:
(img_path, depth_path, out_img_path, out_depth_path, out_cam_path, intrinsics)
Returns:
None. Errors are printed to the console but do not stop the workflow.
"""
img_path, depth_path, out_img_path, out_depth_path, out_cam_path, intrinsics = args
try:
# Load image
img = Image.open(img_path)
# Load depth (in mm) and convert to meters
depth = cv2.imread(depth_path, cv2.IMREAD_ANYDEPTH)
if depth is None:
raise ValueError(f"Could not read depth image: {depth_path}")
depth = depth.astype(np.float32) / 1000.0
# Rescale image and depth map
img_rescaled, depth_rescaled, intrinsics_rescaled = cropping.rescale_image_depthmap(
img, depth, intrinsics.copy(), (640, 480)
)
# Save processed data
img_rescaled.save(out_img_path) # PNG image
np.save(out_depth_path, depth_rescaled) # Depth .npy
np.savez(out_cam_path, intrinsics=intrinsics_rescaled)
except Exception as e:
print(f"Error processing {img_path}: {e}")
def main():
args = parse_arguments()
root = args.root_dir
cam_root = args.cam_root
out_dir = args.out_dir
if not os.path.exists(out_dir):
os.makedirs(out_dir, exist_ok=True)
# Collect a list of subdirectories using a glob pattern
# e.g.: root/ZY2021*/H*/C*/N*/S*/s*/T*
scene_dirs = glob.glob(os.path.join(root, "ZY2021*", "H*", "C*", "N*", "S*", "s*", "T*"))
# Build tasks
tasks = []
for scene_dir in tqdm(scene_dirs, desc="Collecting scenes"):
# Build an output sub-directory name
# Example: "ZY202101/H1/C1/N1/S1/s1/T1" -> "ZY202101_H1_C1_N1_S1_s1_T1"
scene_relpath = os.path.relpath(scene_dir, root)
scene_name = "_".join(scene_relpath.split(os.sep))
# Load camera intrinsics from a .npy file in cam_root
# e.g., first token of scene_relpath might point to the relevant .npy
# "ZY202101" -> "cam_root/ZY202101/intrin.npy" (adjust logic as needed)
top_level = scene_relpath.split(os.sep)[0]
cam_file = os.path.join(cam_root, top_level, "intrin.npy")
if not os.path.isfile(cam_file):
print(f"Warning: Camera file not found: {cam_file}. Skipping {scene_dir}")
continue
intrinsics = np.load(cam_file)
# Directories for this sequence
rgb_dir = os.path.join(scene_dir, "align_rgb")
depth_dir = os.path.join(scene_dir, "align_depth")
# Output directories
out_rgb_dir = os.path.join(out_dir, scene_name, "rgb")
out_depth_dir = os.path.join(out_dir, scene_name, "depth")
out_cam_dir = os.path.join(out_dir, scene_name, "cam")
os.makedirs(out_rgb_dir, exist_ok=True)
os.makedirs(out_depth_dir, exist_ok=True)
os.makedirs(out_cam_dir, exist_ok=True)
# Find all image paths
img_paths = sorted(glob.glob(os.path.join(rgb_dir, "*.jpg")))
# Build tasks for each image
for img_path in img_paths:
basename = os.path.splitext(os.path.basename(img_path))[0]
depth_path = os.path.join(depth_dir, f"{basename}.png")
out_img_path = os.path.join(out_rgb_dir, f"{basename}.png")
out_depth_path = os.path.join(out_depth_dir, f"{basename}.npy")
out_cam_path = os.path.join(out_cam_dir, f"{basename}.npz")
# Skip if already processed
if (os.path.exists(out_img_path) and os.path.exists(out_depth_path) and
os.path.exists(out_cam_path)):
continue
task = (
img_path,
depth_path,
out_img_path,
out_depth_path,
out_cam_path,
intrinsics
)
tasks.append(task)
# Process tasks in parallel
max_workers = args.max_workers
if max_workers is None:
max_workers = max(1, os.cpu_count() // 2)
with ProcessPoolExecutor(max_workers=max_workers) as executor:
list(tqdm(
executor.map(process_image, tasks),
total=len(tasks),
desc="Processing images"
))
if __name__ == "__main__":
main()
|