File size: 6,202 Bytes
2df809d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
#!/usr/bin/env python3
"""
Preprocess the MVS Synth dataset.

This script processes each sequence in a given dataset directory by:
  - Reading the RGB image, EXR depth image, and JSON camera parameters.
  - Computing the camera pose from the extrinsic matrix (with a conversion matrix applied).
  - Creating a simple camera intrinsics matrix from the provided focal lengths and principal point.
  - Copying the RGB image (as JPG), saving the depth (as a NumPy array), and saving the camera data (as a NPZ file).

Usage:
    python preprocess_mvs_synth.py --root_dir /path/to/data_mvs_synth/GTAV_720/ \
                                   --out_dir /path/to/processed_mvs_synth \
                                   --num_workers 32
"""

import os
import shutil
import json
from concurrent.futures import ProcessPoolExecutor, as_completed
from tqdm import tqdm
import numpy as np
import cv2
import argparse

# Ensure OpenEXR support if needed
os.environ["OPENCV_IO_ENABLE_OPENEXR"] = "1"

# Conversion matrix (example conversion, adjust if needed)
R_conv = np.array(
    [[0, 1, 0, 0], [1, 0, 0, 0], [0, 0, 1, 0], [0, 0, 0, 1]], dtype=np.float32
)


def process_basename(seq, basename, root_dir, out_dir):
    """
    Process a single frame identified by 'basename' within a given sequence.

    Reads the RGB image, depth (EXR) file, and camera parameters (JSON file),
    computes the adjusted camera pose, builds the camera intrinsics matrix,
    and saves the processed outputs.

    Parameters:
      seq (str): The sequence (subdirectory) name.
      basename (str): The basename of the file (without extension).
      root_dir (str): Root directory containing the raw data.
      out_dir (str): Output directory where processed data will be saved.

    Returns:
      None on success, or an error string on failure.
    """
    try:
        # Define input directories.
        seq_dir = os.path.join(root_dir, seq)
        img_dir = os.path.join(seq_dir, "images")
        depth_dir = os.path.join(seq_dir, "depths")
        cam_dir = os.path.join(seq_dir, "poses")

        # Define input file paths.
        img_path = os.path.join(img_dir, basename + ".png")
        depth_path = os.path.join(depth_dir, basename + ".exr")
        cam_path = os.path.join(cam_dir, basename + ".json")

        # Define output directories.
        out_seq_dir = os.path.join(out_dir, seq)
        out_img_dir = os.path.join(out_seq_dir, "rgb")
        out_depth_dir = os.path.join(out_seq_dir, "depth")
        out_cam_dir = os.path.join(out_seq_dir, "cam")
        os.makedirs(out_img_dir, exist_ok=True)
        os.makedirs(out_depth_dir, exist_ok=True)
        os.makedirs(out_cam_dir, exist_ok=True)

        # Define output file paths.
        out_img_path = os.path.join(out_img_dir, basename + ".jpg")
        out_depth_path = os.path.join(out_depth_dir, basename + ".npy")
        out_cam_path = os.path.join(out_cam_dir, basename + ".npz")

        # Read and process camera parameters.
        with open(cam_path, "r") as f:
            cam_data = json.load(f)
        c_x = cam_data["c_x"]
        c_y = cam_data["c_y"]
        f_x = cam_data["f_x"]
        f_y = cam_data["f_y"]
        extrinsic = np.array(cam_data["extrinsic"])
        # Invert extrinsic matrix to obtain camera-to-world pose.
        pose = np.linalg.inv(extrinsic)
        # Apply conversion matrix.
        pose = R_conv @ pose

        # Build a simple intrinsics matrix.
        intrinsics = np.array(
            [[f_x, 0, c_x], [0, f_y, c_y], [0, 0, 1]], dtype=np.float32
        )

        if np.any(np.isinf(pose)) or np.any(np.isnan(pose)):
            raise ValueError(f"Invalid pose for {basename}")

        # Read depth image.
        depth = cv2.imread(depth_path, cv2.IMREAD_ANYDEPTH).astype(np.float32)
        depth[np.isinf(depth)] = 0.0  # Clean up any infinite values

        # Save the processed data.
        shutil.copyfile(img_path, out_img_path)
        np.save(out_depth_path, depth)
        np.savez(out_cam_path, intrinsics=intrinsics, pose=pose)

    except Exception as e:
        return f"Error processing {seq}/{basename}: {e}"

    return None


def main():
    parser = argparse.ArgumentParser(
        description="Preprocess MVS Synth dataset: convert images, depth, and camera data."
    )
    parser.add_argument(
        "--root_dir",
        type=str,
        default="/path/to/data_mvs_synth/GTAV_720/",
        help="Root directory of the raw MVS Synth data.",
    )
    parser.add_argument(
        "--out_dir",
        type=str,
        default="/path/to/processed_mvs_synth",
        help="Output directory for processed data.",
    )
    parser.add_argument(
        "--num_workers", type=int, default=32, help="Number of parallel workers."
    )
    args = parser.parse_args()

    root_dir = args.root_dir
    out_dir = args.out_dir

    # Get list of sequence directories.
    seqs = sorted(
        [d for d in os.listdir(root_dir) if os.path.isdir(os.path.join(root_dir, d))]
    )

    # Pre-create output directories for each sequence.
    for seq in seqs:
        out_seq_dir = os.path.join(out_dir, seq)
        os.makedirs(os.path.join(out_seq_dir, "rgb"), exist_ok=True)
        os.makedirs(os.path.join(out_seq_dir, "depth"), exist_ok=True)
        os.makedirs(os.path.join(out_seq_dir, "cam"), exist_ok=True)

    # Build list of processing tasks.
    tasks = []
    for seq in seqs:
        seq_dir = os.path.join(root_dir, seq)
        img_dir = os.path.join(seq_dir, "images")
        basenames = sorted([d[:-4] for d in os.listdir(img_dir) if d.endswith(".png")])
        for basename in basenames:
            tasks.append((seq, basename, root_dir, out_dir))

    num_workers = args.num_workers
    print(f"Processing {len(tasks)} tasks using {num_workers} workers...")

    with ProcessPoolExecutor(max_workers=num_workers) as executor:
        futures = {executor.submit(process_basename, *task): task[1] for task in tasks}
        for future in tqdm(
            as_completed(futures), total=len(futures), desc="Processing"
        ):
            error = future.result()
            if error:
                print(error)


if __name__ == "__main__":
    main()