Spaces:

liguang0115
/

vmem

Runtime error

File size: 6,202 Bytes

2df809d

#!/usr/bin/env python3
"""
Preprocess the MVS Synth dataset.

This script processes each sequence in a given dataset directory by:
  - Reading the RGB image, EXR depth image, and JSON camera parameters.
  - Computing the camera pose from the extrinsic matrix (with a conversion matrix applied).
  - Creating a simple camera intrinsics matrix from the provided focal lengths and principal point.
  - Copying the RGB image (as JPG), saving the depth (as a NumPy array), and saving the camera data (as a NPZ file).

Usage:
    python preprocess_mvs_synth.py --root_dir /path/to/data_mvs_synth/GTAV_720/ \
                                   --out_dir /path/to/processed_mvs_synth \
                                   --num_workers 32
"""

import os
import shutil
import json
from concurrent.futures import ProcessPoolExecutor, as_completed
from tqdm import tqdm
import numpy as np
import cv2
import argparse

# Ensure OpenEXR support if needed
os.environ["OPENCV_IO_ENABLE_OPENEXR"] = "1"

# Conversion matrix (example conversion, adjust if needed)
R_conv = np.array(
    [[0, 1, 0, 0], [1, 0, 0, 0], [0, 0, 1, 0], [0, 0, 0, 1]], dtype=np.float32
)


def process_basename(seq, basename, root_dir, out_dir):
    """
    Process a single frame identified by 'basename' within a given sequence.

    Reads the RGB image, depth (EXR) file, and camera parameters (JSON file),
    computes the adjusted camera pose, builds the camera intrinsics matrix,
    and saves the processed outputs.

    Parameters:
      seq (str): The sequence (subdirectory) name.
      basename (str): The basename of the file (without extension).
      root_dir (str): Root directory containing the raw data.
      out_dir (str): Output directory where processed data will be saved.

    Returns:
      None on success, or an error string on failure.
    """
    try:
        # Define input directories.
        seq_dir = os.path.join(root_dir, seq)
        img_dir = os.path.join(seq_dir, "images")
        depth_dir = os.path.join(seq_dir, "depths")
        cam_dir = os.path.join(seq_dir, "poses")

        # Define input file paths.
        img_path = os.path.join(img_dir, basename + ".png")
        depth_path = os.path.join(depth_dir, basename + ".exr")
        cam_path = os.path.join(cam_dir, basename + ".json")

        # Define output directories.
        out_seq_dir = os.path.join(out_dir, seq)
        out_img_dir = os.path.join(out_seq_dir, "rgb")
        out_depth_dir = os.path.join(out_seq_dir, "depth")
        out_cam_dir = os.path.join(out_seq_dir, "cam")
        os.makedirs(out_img_dir, exist_ok=True)
        os.makedirs(out_depth_dir, exist_ok=True)
        os.makedirs(out_cam_dir, exist_ok=True)

        # Define output file paths.
        out_img_path = os.path.join(out_img_dir, basename + ".jpg")
        out_depth_path = os.path.join(out_depth_dir, basename + ".npy")
        out_cam_path = os.path.join(out_cam_dir, basename + ".npz")

        # Read and process camera parameters.
        with open(cam_path, "r") as f:
            cam_data = json.load(f)
        c_x = cam_data["c_x"]
        c_y = cam_data["c_y"]
        f_x = cam_data["f_x"]
        f_y = cam_data["f_y"]
        extrinsic = np.array(cam_data["extrinsic"])
        # Invert extrinsic matrix to obtain camera-to-world pose.
        pose = np.linalg.inv(extrinsic)
        # Apply conversion matrix.
        pose = R_conv @ pose

        # Build a simple intrinsics matrix.
        intrinsics = np.array(
            [[f_x, 0, c_x], [0, f_y, c_y], [0, 0, 1]], dtype=np.float32
        )

        if np.any(np.isinf(pose)) or np.any(np.isnan(pose)):
            raise ValueError(f"Invalid pose for {basename}")

        # Read depth image.
        depth = cv2.imread(depth_path, cv2.IMREAD_ANYDEPTH).astype(np.float32)
        depth[np.isinf(depth)] = 0.0  # Clean up any infinite values

        # Save the processed data.
        shutil.copyfile(img_path, out_img_path)
        np.save(out_depth_path, depth)
        np.savez(out_cam_path, intrinsics=intrinsics, pose=pose)

    except Exception as e:
        return f"Error processing {seq}/{basename}: {e}"

    return None


def main():
    parser = argparse.ArgumentParser(
        description="Preprocess MVS Synth dataset: convert images, depth, and camera data."
    )
    parser.add_argument(
        "--root_dir",
        type=str,
        default="/path/to/data_mvs_synth/GTAV_720/",
        help="Root directory of the raw MVS Synth data.",
    )
    parser.add_argument(
        "--out_dir",
        type=str,
        default="/path/to/processed_mvs_synth",
        help="Output directory for processed data.",
    )
    parser.add_argument(
        "--num_workers", type=int, default=32, help="Number of parallel workers."
    )
    args = parser.parse_args()

    root_dir = args.root_dir
    out_dir = args.out_dir

    # Get list of sequence directories.
    seqs = sorted(
        [d for d in os.listdir(root_dir) if os.path.isdir(os.path.join(root_dir, d))]
    )

    # Pre-create output directories for each sequence.
    for seq in seqs:
        out_seq_dir = os.path.join(out_dir, seq)
        os.makedirs(os.path.join(out_seq_dir, "rgb"), exist_ok=True)
        os.makedirs(os.path.join(out_seq_dir, "depth"), exist_ok=True)
        os.makedirs(os.path.join(out_seq_dir, "cam"), exist_ok=True)

    # Build list of processing tasks.
    tasks = []
    for seq in seqs:
        seq_dir = os.path.join(root_dir, seq)
        img_dir = os.path.join(seq_dir, "images")
        basenames = sorted([d[:-4] for d in os.listdir(img_dir) if d.endswith(".png")])
        for basename in basenames:
            tasks.append((seq, basename, root_dir, out_dir))

    num_workers = args.num_workers
    print(f"Processing {len(tasks)} tasks using {num_workers} workers...")

    with ProcessPoolExecutor(max_workers=num_workers) as executor:
        futures = {executor.submit(process_basename, *task): task[1] for task in tasks}
        for future in tqdm(
            as_completed(futures), total=len(futures), desc="Processing"
        ):
            error = future.result()
            if error:
                print(error)


if __name__ == "__main__":
    main()