File size: 6,400 Bytes
2df809d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
#!/usr/bin/env python3
"""
HOI4D Preprocessing Script

This script processes HOI4D data by:
  1. Searching specific subdirectories for RGB and depth images.
  2. Reading camera intrinsics from a .npy file (one per high-level scene).
  3. Rescaling the RGB images and depth maps to a fixed output resolution
     (e.g., 640x480) using the 'cropping' module.
  4. Saving results (RGB, .npy depth, .npz camera intrinsics) in a new directory structure.

Usage:
    python preprocess_hoi4d.py \
        --root_dir /path/to/HOI4D_release \
        --cam_root /path/to/camera_params \
        --out_dir /path/to/processed_hoi4d
"""

import os
import glob
import cv2
import numpy as np
from PIL import Image
from tqdm import tqdm
from concurrent.futures import ProcessPoolExecutor
import argparse

import src.dust3r.datasets.utils.cropping as cropping

def parse_arguments():
    """
    Parse command-line arguments for HOI4D preprocessing.

    Returns:
        argparse.Namespace: The parsed arguments.
    """
    parser = argparse.ArgumentParser(
        description="Preprocess HOI4D dataset by rescaling RGB and depth images."
    )
    parser.add_argument("--root_dir", required=True,
                        help="Path to the HOI4D_release directory.")
    parser.add_argument("--cam_root", required=True,
                        help="Path to the directory containing camera intrinsics.")
    parser.add_argument("--out_dir", required=True,
                        help="Path to the directory where processed files will be saved.")
    parser.add_argument("--max_workers", type=int, default=None,
                        help="Number of parallel workers. Default uses half of available CPU cores.")
    args = parser.parse_args()
    return args

def process_image(args):
    """
    Process a single image and depth map:
      - Loads the image (using PIL) and depth (using OpenCV).
      - Converts depth from mm to meters (divided by 1000).
      - Rescales both using 'cropping.rescale_image_depthmap'.
      - Saves the rescaled image (.png), depth (.npy), and camera intrinsics (.npz).

    Args:
        args (tuple): A tuple of:
          (img_path, depth_path, out_img_path, out_depth_path, out_cam_path, intrinsics)

    Returns:
        None. Errors are printed to the console but do not stop the workflow.
    """
    img_path, depth_path, out_img_path, out_depth_path, out_cam_path, intrinsics = args

    try:
        # Load image
        img = Image.open(img_path)

        # Load depth (in mm) and convert to meters
        depth = cv2.imread(depth_path, cv2.IMREAD_ANYDEPTH)
        if depth is None:
            raise ValueError(f"Could not read depth image: {depth_path}")
        depth = depth.astype(np.float32) / 1000.0

        # Rescale image and depth map
        img_rescaled, depth_rescaled, intrinsics_rescaled = cropping.rescale_image_depthmap(
            img, depth, intrinsics.copy(), (640, 480)
        )

        # Save processed data
        img_rescaled.save(out_img_path)      # PNG image
        np.save(out_depth_path, depth_rescaled)  # Depth .npy
        np.savez(out_cam_path, intrinsics=intrinsics_rescaled)

    except Exception as e:
        print(f"Error processing {img_path}: {e}")

def main():
    args = parse_arguments()

    root = args.root_dir
    cam_root = args.cam_root
    out_dir = args.out_dir
    if not os.path.exists(out_dir):
        os.makedirs(out_dir, exist_ok=True)

    # Collect a list of subdirectories using a glob pattern
    # e.g.: root/ZY2021*/H*/C*/N*/S*/s*/T*
    scene_dirs = glob.glob(os.path.join(root, "ZY2021*", "H*", "C*", "N*", "S*", "s*", "T*"))

    # Build tasks
    tasks = []
    for scene_dir in tqdm(scene_dirs, desc="Collecting scenes"):
        # Build an output sub-directory name
        # Example: "ZY202101/H1/C1/N1/S1/s1/T1" -> "ZY202101_H1_C1_N1_S1_s1_T1"
        scene_relpath = os.path.relpath(scene_dir, root)
        scene_name = "_".join(scene_relpath.split(os.sep))

        # Load camera intrinsics from a .npy file in cam_root
        # e.g., first token of scene_relpath might point to the relevant .npy
        # "ZY202101" -> "cam_root/ZY202101/intrin.npy" (adjust logic as needed)
        top_level = scene_relpath.split(os.sep)[0]
        cam_file = os.path.join(cam_root, top_level, "intrin.npy")
        if not os.path.isfile(cam_file):
            print(f"Warning: Camera file not found: {cam_file}. Skipping {scene_dir}")
            continue
        intrinsics = np.load(cam_file)

        # Directories for this sequence
        rgb_dir = os.path.join(scene_dir, "align_rgb")
        depth_dir = os.path.join(scene_dir, "align_depth")

        # Output directories
        out_rgb_dir = os.path.join(out_dir, scene_name, "rgb")
        out_depth_dir = os.path.join(out_dir, scene_name, "depth")
        out_cam_dir = os.path.join(out_dir, scene_name, "cam")
        os.makedirs(out_rgb_dir, exist_ok=True)
        os.makedirs(out_depth_dir, exist_ok=True)
        os.makedirs(out_cam_dir, exist_ok=True)

        # Find all image paths
        img_paths = sorted(glob.glob(os.path.join(rgb_dir, "*.jpg")))

        # Build tasks for each image
        for img_path in img_paths:
            basename = os.path.splitext(os.path.basename(img_path))[0]
            depth_path = os.path.join(depth_dir, f"{basename}.png")

            out_img_path = os.path.join(out_rgb_dir, f"{basename}.png")
            out_depth_path = os.path.join(out_depth_dir, f"{basename}.npy")
            out_cam_path = os.path.join(out_cam_dir, f"{basename}.npz")

            # Skip if already processed
            if (os.path.exists(out_img_path) and os.path.exists(out_depth_path) and
                    os.path.exists(out_cam_path)):
                continue

            task = (
                img_path,
                depth_path,
                out_img_path,
                out_depth_path,
                out_cam_path,
                intrinsics
            )
            tasks.append(task)

    # Process tasks in parallel
    max_workers = args.max_workers
    if max_workers is None:
        max_workers = max(1, os.cpu_count() // 2)

    with ProcessPoolExecutor(max_workers=max_workers) as executor:
        list(tqdm(
            executor.map(process_image, tasks),
            total=len(tasks),
            desc="Processing images"
        ))


if __name__ == "__main__":
    main()