Spaces:

liguang0115
/

vmem

Runtime error

File size: 7,837 Bytes

2df809d

#!/usr/bin/env python3
"""
Preprocess the Matterport3D (MP3D) dataset.

This script reads camera parameters and overlap data from a configuration file,
processes RGB images and corresponding depth images, adjusts camera poses using a
conversion matrix, and then saves the processed images, depth maps, and camera
metadata into separate output directories.

Usage:
    python preprocess_mp3d.py --root_dir /path/to/data_mp3d/v1/scans \
                              --out_dir /path/to/processed_mp3d
"""

import os
import numpy as np
import cv2
import shutil
from concurrent.futures import ProcessPoolExecutor, as_completed
from tqdm import tqdm
import argparse


def process_image(args):
    """
    Process a single image: reads the RGB image and depth image, normalizes the depth,
    adjusts the camera pose using a conversion matrix, and saves the processed outputs.

    Parameters:
      args: tuple containing
         (i, paths, K, pose, img_dir, depth_dir, out_rgb_dir, out_depth_dir, out_cam_dir, R_conv)
         where:
           i             - the frame index
           paths         - tuple of (depth filename, RGB filename)
           K             - camera intrinsics matrix (3x3 NumPy array)
           pose          - camera pose (4x4 NumPy array)
           img_dir       - directory containing RGB images
           depth_dir     - directory containing depth images
           out_rgb_dir   - output directory for processed RGB images
           out_depth_dir - output directory for processed depth maps
           out_cam_dir   - output directory for processed camera metadata
           R_conv        - a 4x4 conversion matrix (NumPy array)
    Returns:
      None if successful, or an error string if processing fails.
    """
    (
        i,
        paths,
        K,
        pose,
        img_dir,
        depth_dir,
        out_rgb_dir,
        out_depth_dir,
        out_cam_dir,
        R_conv,
    ) = args

    depth_path, img_path = paths
    img_path_full = os.path.join(img_dir, img_path)
    depth_path_full = os.path.join(depth_dir, depth_path)

    try:
        # Read depth image using OpenCV (assumed to be stored with 16-bit depth)
        depth = cv2.imread(depth_path_full, cv2.IMREAD_ANYDEPTH).astype(np.float32)
        depth = depth / 4000.0  # Normalize depth (adjust this factor as needed)

        # Adjust the camera pose with the conversion matrix
        pose_adjusted = pose @ R_conv

        # Generate output filenames using a zero-padded frame index.
        basename = f"{i:06d}"
        out_img_path = os.path.join(out_rgb_dir, basename + ".png")
        out_depth_path = os.path.join(out_depth_dir, basename + ".npy")
        out_cam_path = os.path.join(out_cam_dir, basename + ".npz")

        # Copy the RGB image.
        shutil.copyfile(img_path_full, out_img_path)

        # Save the depth map.
        np.save(out_depth_path, depth)

        # Save the camera intrinsics and adjusted pose.
        np.savez(out_cam_path, intrinsics=K, pose=pose_adjusted)

    except Exception as e:
        return f"Error processing image {img_path}: {e}"

    return None


def main():
    parser = argparse.ArgumentParser(
        description="Preprocess MP3D scans: convert and save RGB images, depth maps, and camera metadata."
    )
    parser.add_argument(
        "--root_dir",
        type=str,
        default="/path/to/data_mp3d/v1/scans",
        help="Root directory of the raw MP3D data.",
    )
    parser.add_argument(
        "--out_dir",
        type=str,
        default="/path/to/processed_mp3d",
        help="Output directory for processed MP3D data.",
    )
    args = parser.parse_args()

    root = args.root_dir
    out_dir = args.out_dir

    # List sequence directories (each scan is stored as a separate directory).
    seqs = sorted([d for d in os.listdir(root) if os.path.isdir(os.path.join(root, d))])

    # Define a conversion matrix from MP3D to the desired coordinate system.
    R_conv = np.array(
        [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]], dtype=np.float32
    )

    for seq in tqdm(seqs, desc="Sequences"):
        # The sequence directory structure assumes that images and depth files are stored
        # under a subdirectory with the same name as the sequence.
        seq_dir = os.path.join(root, seq, seq)

        img_dir = os.path.join(seq_dir, "undistorted_color_images")
        depth_dir = os.path.join(seq_dir, "undistorted_depth_images")
        cam_file = os.path.join(seq_dir, "undistorted_camera_parameters", f"{seq}.conf")
        overlap_file = os.path.join(seq_dir, "image_overlap_data", f"{seq}_iis.txt")

        # Read overlap data and save it (optional).
        overlap = []
        with open(overlap_file, "r") as f:
            for line in f:
                parts = line.split()
                overlap.append([int(parts[1]), int(parts[2]), float(parts[3])])
        overlap = np.array(overlap)
        os.makedirs(os.path.join(out_dir, seq), exist_ok=True)
        np.save(os.path.join(out_dir, seq, "overlap.npy"), overlap)

        # Read camera parameters from a configuration file.
        intrinsics = []
        camera_poses = []
        image_files = []

        with open(cam_file, "r") as file:
            lines = file.readlines()
        current_intrinsics = None
        for line in lines:
            parts = line.split()
            if not parts:
                continue
            if parts[0] == "intrinsics_matrix":
                # Extract intrinsic parameters.
                fx, cx, fy, cy = (
                    float(parts[1]),
                    float(parts[3]),
                    float(parts[5]),
                    float(parts[6]),
                )
                current_intrinsics = np.array(
                    [[fx, 0, cx], [0, fy, cy], [0, 0, 1]], dtype=np.float32
                )
            elif parts[0] == "scan":
                # Read the image filenames and camera pose.
                depth_image = parts[1]
                color_image = parts[2]
                image_files.append((depth_image, color_image))
                matrix_values = list(map(float, parts[3:]))
                camera_pose = np.array(matrix_values).reshape(4, 4)
                camera_poses.append(camera_pose)
                if current_intrinsics is not None:
                    intrinsics.append(current_intrinsics.copy())

        if not (len(image_files) == len(intrinsics) == len(camera_poses)):
            print(f"Inconsistent data in sequence {seq}")
            continue

        # Prepare output directories.
        out_rgb_dir = os.path.join(out_dir, seq, "rgb")
        out_depth_dir = os.path.join(out_dir, seq, "depth")
        out_cam_dir = os.path.join(out_dir, seq, "cam")
        os.makedirs(out_rgb_dir, exist_ok=True)
        os.makedirs(out_depth_dir, exist_ok=True)
        os.makedirs(out_cam_dir, exist_ok=True)

        tasks = []
        for i, (paths, K, pose) in enumerate(
            zip(image_files, intrinsics, camera_poses)
        ):
            args_task = (
                i,
                paths,
                K,
                pose,
                img_dir,
                depth_dir,
                out_rgb_dir,
                out_depth_dir,
                out_cam_dir,
                R_conv,
            )
            tasks.append(args_task)

        num_workers = os.cpu_count() // 2
        with ProcessPoolExecutor(max_workers=num_workers) as executor:
            futures = {executor.submit(process_image, task): task[0] for task in tasks}
            for future in tqdm(
                as_completed(futures), total=len(futures), desc=f"Processing {seq}"
            ):
                error = future.result()
                if error:
                    print(error)


if __name__ == "__main__":
    main()