Spaces:

liguang0115
/

vmem

Runtime error

File size: 7,901 Bytes

2df809d

#!/usr/bin/env python3
"""
Preprocess Script for UrbanSyn Dataset

This script:
  1. Reads RGB, depth (EXR), and semantic segmentation (class) files from an UrbanSyn dataset directory.
  2. Retrieves camera intrinsics from a JSON metadata file.
  3. Rescales images, depth maps, and masks to a fixed resolution (e.g., 640×480).
  4. Saves processed data (RGB, .npy depth, .png sky mask, and .npz intrinsics) in an organized structure.

Usage:
    python preprocess_urbansyn.py \
        --input_dir /path/to/data_urbansyn \
        --output_dir /path/to/processed_urbansyn
"""

import os
import json
import argparse
import shutil
from concurrent.futures import ProcessPoolExecutor, as_completed
import cv2
import numpy as np
from tqdm import tqdm
from PIL import Image

# Make sure OpenCV EXR support is enabled
os.environ["OPENCV_IO_ENABLE_OPENEXR"] = "1"

# Custom "cropping" module (ensure cropping.py is available/installed)
import cropping


def process_basename(
    basename,
    rgb_dir,
    depth_dir,
    class_dir,
    cam_info,
    out_rgb_dir,
    out_depth_dir,
    out_mask_dir,
    out_cam_dir,
):
    """
    Process a single file triplet (RGB, depth, class) for a given basename.

    Args:
        basename (str): Base name without file extension (e.g., 'image_0001').
        rgb_dir (str): Directory containing RGB .png files.
        depth_dir (str): Directory containing .exr depth files.
        class_dir (str): Directory containing class .png files (semantic segmentation).
        cam_info (dict): Dictionary with camera metadata (focal length, sensor size).
        out_rgb_dir (str): Output directory for rescaled RGB images.
        out_depth_dir (str): Output directory for rescaled depth files.
        out_mask_dir (str): Output directory for sky masks.
        out_cam_dir (str): Output directory for camera intrinsics.

    Returns:
        str or None:
            - Returns None if successful.
            - Returns an error message if something fails.
    """

    # Construct output file paths
    out_img_path = os.path.join(out_rgb_dir, f"{basename}.png")
    out_depth_path = os.path.join(out_depth_dir, f"{basename}.npy")
    out_mask_path = os.path.join(out_mask_dir, f"{basename}.png")
    out_cam_path = os.path.join(out_cam_dir, f"{basename}.npz")

    # Skip if already processed
    if (
        os.path.exists(out_img_path)
        and os.path.exists(out_depth_path)
        and os.path.exists(out_mask_path)
        and os.path.exists(out_cam_path)
    ):
        return None

    try:
        # Build file paths
        img_file = os.path.join(rgb_dir, f"{basename}.png")
        depth_file = os.path.join(depth_dir, f'{basename.replace("rgb", "depth")}.exr')
        class_file = os.path.join(class_dir, basename.replace("rgb", "ss") + ".png")

        # 1. Read RGB image
        img = cv2.imread(img_file, cv2.IMREAD_UNCHANGED)
        if img is None:
            return f"Error: Could not read image file {img_file}"
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)  # Convert BGR -> RGB
        H, W = img.shape[:2]

        # 2. Read depth from EXR
        depth = cv2.imread(depth_file, cv2.IMREAD_UNCHANGED)
        if depth is None:
            # Attempt fallback if there's a '.exr.1' file
            alt_exr_1 = depth_file + ".1"
            if os.path.exists(alt_exr_1):
                temp_exr = depth_file.replace(".exr", "_tmp.exr")
                os.rename(alt_exr_1, temp_exr)
                depth = cv2.imread(temp_exr, cv2.IMREAD_UNCHANGED)
                if depth is None:
                    return f"Error reading depth file (fallback) {temp_exr}"
                depth *= 1e5
            else:
                return f"Error reading depth file {depth_file}"
        else:
            depth *= 1e5  # multiply by 1e5, consistent with your original code

        # 3. Read class image, build sky mask
        cl = cv2.imread(class_file, cv2.IMREAD_UNCHANGED)
        if cl is None:
            return f"Error: Could not read class file {class_file}"
        sky_mask = (cl[..., 0] == 10).astype(np.uint8)  # class ID 10 => sky

        # 4. Build camera intrinsics
        f_mm = cam_info["focalLength_mm"]
        w_mm = cam_info["sensorWidth_mm"]
        h_mm = cam_info["sensorHeight_mm"]
        K = np.eye(3, dtype=np.float32)
        K[0, 0] = f_mm / w_mm * W
        K[1, 1] = f_mm / h_mm * H
        K[0, 2] = W / 2
        K[1, 2] = H / 2

        # 5. Combine depth + sky_mask in a single array for rescaling
        depth_with_mask = np.stack([depth, sky_mask], axis=-1)

        # 6. Rescale to desired size
        image_pil = Image.fromarray(img)
        image_rescaled, depth_with_mask_rescaled, K_rescaled = (
            cropping.rescale_image_depthmap(
                image_pil, depth_with_mask, K, output_resolution=(640, 480)
            )
        )

        # Write outputs
        image_rescaled.save(out_img_path)
        np.save(out_depth_path, depth_with_mask_rescaled[..., 0])
        cv2.imwrite(
            out_mask_path, (depth_with_mask_rescaled[..., 1] * 255).astype(np.uint8)
        )
        np.savez(out_cam_path, intrinsics=K_rescaled)

    except Exception as e:
        return f"Error processing {basename}: {e}"

    return None


def main():
    parser = argparse.ArgumentParser(
        description="Preprocess UrbanSyn dataset by loading RGB/Depth/Seg "
        "and rescaling them with camera intrinsics."
    )
    parser.add_argument(
        "--input_dir", required=True, help="Path to the UrbanSyn dataset directory."
    )
    parser.add_argument(
        "--output_dir",
        required=True,
        help="Path to the directory where processed data will be stored.",
    )
    args = parser.parse_args()

    input_dir = os.path.abspath(args.input_dir)
    output_dir = os.path.abspath(args.output_dir)
    os.makedirs(output_dir, exist_ok=True)

    # Define input subdirectories
    rgb_dir = os.path.join(input_dir, "rgb")
    depth_dir = os.path.join(input_dir, "depth")
    class_dir = os.path.join(input_dir, "ss")
    meta_file = os.path.join(input_dir, "camera_metadata.json")

    # Define output subdirectories
    out_rgb_dir = os.path.join(output_dir, "rgb")
    out_depth_dir = os.path.join(output_dir, "depth")
    out_mask_dir = os.path.join(output_dir, "sky_mask")
    out_cam_dir = os.path.join(output_dir, "cam")
    for d in [out_rgb_dir, out_depth_dir, out_mask_dir, out_cam_dir]:
        os.makedirs(d, exist_ok=True)

    # Gather basenames from RGB files
    basenames = sorted(
        [
            os.path.splitext(fname)[0]
            for fname in os.listdir(rgb_dir)
            if fname.endswith(".png")
        ]
    )
    if not basenames:
        print(f"No RGB .png files found in {rgb_dir}. Exiting.")
        return

    # Load camera metadata
    if not os.path.isfile(meta_file):
        print(f"Error: metadata file not found at {meta_file}. Exiting.")
        return

    with open(meta_file, "r") as f:
        cam_info_full = json.load(f)
        cam_info = cam_info_full["parameters"][0]["Camera"]

    # Process in parallel
    num_workers = max(1, os.cpu_count() or 1)
    with ProcessPoolExecutor(max_workers=num_workers) as executor:
        futures = {
            executor.submit(
                process_basename,
                basename,
                rgb_dir,
                depth_dir,
                class_dir,
                cam_info,
                out_rgb_dir,
                out_depth_dir,
                out_mask_dir,
                out_cam_dir,
            ): basename
            for basename in basenames
        }

        # Use tqdm for progress
        for future in tqdm(
            as_completed(futures), total=len(futures), desc="Processing UrbanSyn"
        ):
            error = future.result()
            if error:
                print(error)


if __name__ == "__main__":
    main()