File size: 6,412 Bytes
2df809d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
#!/usr/bin/env python3
"""
Preprocess Script for Point Odyssey Dataset

This script processes the Point Odyssey dataset by:
  - Copying RGB images.
  - Converting 16-bit depth images to a normalized float32 depth map.
  - Inverting camera extrinsic matrices to obtain poses.
  - Saving intrinsics and computed poses in a structured output directory.

The dataset is expected to have subdirectories for each split (e.g., train, test, val),
with each split containing multiple sequence directories. Each sequence directory must
contain the following:
  - An 'rgbs' folder with .jpg images.
  - A 'depths' folder with .png depth images.
  - An 'anno.npz' file with 'intrinsics' and 'extrinsics' arrays.

Usage:
    python preprocess_point_odyssey.py --input_dir /path/to/input_dataset --output_dir /path/to/output_dataset
"""

import os
import argparse
import shutil
import numpy as np
import cv2
from tqdm import tqdm


def process_sequence(seq_dir, out_seq_dir):
    """
    Process a single sequence:
      - Verifies that required folders/files exist.
      - Loads camera annotations.
      - Processes each frame: copies the RGB image, processes the depth map,
        computes the camera pose, and saves the results.

    Args:
        seq_dir (str): Directory of the sequence (should contain 'rgbs', 'depths', and 'anno.npz').
        out_seq_dir (str): Output directory where processed files will be saved.
    """
    # Define input subdirectories and annotation file
    img_dir = os.path.join(seq_dir, "rgbs")
    depth_dir = os.path.join(seq_dir, "depths")
    cam_file = os.path.join(seq_dir, "anno.npz")

    # Ensure all necessary files/folders exist
    if not (
        os.path.exists(img_dir)
        and os.path.exists(depth_dir)
        and os.path.exists(cam_file)
    ):
        raise FileNotFoundError(f"Missing required data in {seq_dir}")

    # Create output subdirectories for images, depth maps, and camera parameters
    out_img_dir = os.path.join(out_seq_dir, "rgb")
    out_depth_dir = os.path.join(out_seq_dir, "depth")
    out_cam_dir = os.path.join(out_seq_dir, "cam")
    os.makedirs(out_img_dir, exist_ok=True)
    os.makedirs(out_depth_dir, exist_ok=True)
    os.makedirs(out_cam_dir, exist_ok=True)

    # Load camera annotations
    annotations = np.load(cam_file)
    cam_ints = annotations["intrinsics"].astype(np.float32)
    cam_exts = annotations["extrinsics"].astype(np.float32)

    # List and sort image and depth filenames
    rgbs = sorted([f for f in os.listdir(img_dir) if f.endswith(".jpg")])
    depths = sorted([f for f in os.listdir(depth_dir) if f.endswith(".png")])

    # Ensure that the number of intrinsics, extrinsics, RGB images, and depth images match
    if not (len(cam_ints) == len(cam_exts) == len(rgbs) == len(depths)):
        raise ValueError(
            f"Mismatch in sequence {seq_dir}: "
            f"{len(cam_ints)} intrinsics, {len(cam_exts)} extrinsics, {len(rgbs)} images, {len(depths)} depths."
        )

    # Skip sequence if it has already been processed
    if len(os.listdir(out_img_dir)) == len(rgbs):
        return

    # Process each frame in the sequence
    for i in tqdm(range(len(cam_exts)), desc="Processing frames", leave=False):
        # Extract frame index from filenames
        basename_img = rgbs[i].split(".")[0].split("_")[-1]
        basename_depth = depths[i].split(".")[0].split("_")[-1]
        if int(basename_img) != i or int(basename_depth) != i:
            raise ValueError(
                f"Frame index mismatch in sequence {seq_dir} for frame {i}"
            )

        img_path = os.path.join(img_dir, rgbs[i])
        depth_path = os.path.join(depth_dir, depths[i])

        # Retrieve intrinsics and compute camera pose by inverting the extrinsic matrix
        intrins = cam_ints[i]
        cam_extrinsic = cam_exts[i]
        pose = np.linalg.inv(cam_extrinsic)
        if np.any(np.isinf(pose)) or np.any(np.isnan(pose)):
            raise ValueError(
                f"Invalid pose computed from extrinsics for frame {i} in {seq_dir}"
            )

        # Read and process depth image
        depth_16bit = cv2.imread(depth_path, cv2.IMREAD_ANYDEPTH)
        depth = depth_16bit.astype(np.float32) / 65535.0 * 1000.0

        # Save processed files: copy the RGB image and save depth and camera parameters
        basename = basename_img  # or str(i)
        out_img_path = os.path.join(out_img_dir, basename + ".jpg")
        shutil.copyfile(img_path, out_img_path)
        np.save(os.path.join(out_depth_dir, basename + ".npy"), depth)
        np.savez(
            os.path.join(out_cam_dir, basename + ".npz"), intrinsics=intrins, pose=pose
        )


def process_split(split_dir, out_split_dir):
    """
    Process all sequences within a data split (e.g., train, test, or val).

    Args:
        split_dir (str): Directory for the split.
        out_split_dir (str): Output directory for the processed split.
    """
    sequences = sorted(
        [d for d in os.listdir(split_dir) if os.path.isdir(os.path.join(split_dir, d))]
    )
    for seq in tqdm(
        sequences, desc=f"Processing sequences in {os.path.basename(split_dir)}"
    ):
        seq_dir = os.path.join(split_dir, seq)
        out_seq_dir = os.path.join(out_split_dir, seq)
        process_sequence(seq_dir, out_seq_dir)


def main():
    parser = argparse.ArgumentParser(
        description="Preprocess Point Odyssey dataset by processing images, depth maps, and camera parameters."
    )
    parser.add_argument(
        "--input_dir",
        type=str,
        required=True,
        help="Path to the root input dataset directory.",
    )
    parser.add_argument(
        "--output_dir",
        type=str,
        required=True,
        help="Path to the root output directory where processed data will be stored.",
    )
    args = parser.parse_args()

    # Define the expected dataset splits
    splits = ["train", "test", "val"]
    for split in splits:
        split_dir = os.path.join(args.input_dir, split)
        out_split_dir = os.path.join(args.output_dir, split)
        if not os.path.exists(split_dir):
            print(
                f"Warning: Split directory {split_dir} does not exist. Skipping this split."
            )
            continue
        os.makedirs(out_split_dir, exist_ok=True)
        process_split(split_dir, out_split_dir)


if __name__ == "__main__":
    main()