Spaces:

liguang0115
/

vmem

Runtime error

File size: 5,991 Bytes

2df809d

#!/usr/bin/env python3
"""
Preprocess processed_scannetpp scenes to update scene metadata.

This script reads each scene's "scene_metadata.npz", sorts images by timestamp,
updates trajectories, intrinsics, and pair indices, and builds two collections:
  - image_collection: For each image, stores pairs (other image index, score)
  - video_collection: For each image, groups subsequent images whose timestamps
                      differ by at most a given max_interval (and share the same
                      first character in the image name).

The new metadata is saved as "new_scene_metadata.npz" in each scene folder.

Usage:
    python generate_set_scannetpp.py --root /path/to/processed_scannetpp \
        --max_interval 150 --num_workers 8
"""

import os
import os.path as osp
import argparse
import numpy as np
from concurrent.futures import ThreadPoolExecutor, as_completed
from tqdm import tqdm


def get_timestamp(img_name):
    """
    Convert an image name to a timestamp (integer).

    If the image name starts with 'DSC', the timestamp is the integer part after 'DSC'.
    Otherwise, it is assumed the image name has an underscore, and the second element is used.

    Args:
        img_name (str): The image basename (without extension).

    Returns:
        int: The extracted timestamp.
    """
    if img_name.startswith("DSC"):
        return int(img_name[3:])
    else:
        return int(img_name.split("_")[1])


def process_scene(root, scene, max_interval):
    """
    Process a single scene: sort images, update trajectories/intrinsics/pairs, and
    form image and video collections. Save the updated metadata.

    Args:
        root (str): Root directory containing scene folders.
        scene (str): Scene folder name.
        max_interval (int): Maximum allowed difference (in timestamp units) for video grouping.
    """
    scene_dir = osp.join(root, scene)
    metadata_path = osp.join(scene_dir, "scene_metadata.npz")
    with np.load(metadata_path, allow_pickle=True) as data:
        images = data["images"]
        trajectories = data["trajectories"]
        intrinsics = data["intrinsics"]
        pairs = data["pairs"]

    # Sort images by timestamp.
    imgs_with_indices = sorted(enumerate(images), key=lambda x: x[1])
    indices, images = zip(*imgs_with_indices)
    indices = np.array(indices)
    index2sorted = {index: i for i, index in enumerate(indices)}

    # Update trajectories and intrinsics arrays according to the new order.
    trajectories = trajectories[indices]
    intrinsics = intrinsics[indices]

    # Update pairs (each pair is (id1, id2, score)) with new indices.
    pairs = [(index2sorted[id1], index2sorted[id2], score) for id1, id2, score in pairs]

    # Build image_collection: for each pair, verify that both image files exist.
    image_collection = {}
    for id1, id2, score in pairs:
        img1 = images[id1]
        img2 = images[id2]
        img1_path = osp.join(scene_dir, "images", img1 + ".jpg")
        img2_path = osp.join(scene_dir, "images", img2 + ".jpg")
        if not (osp.exists(img1_path) and osp.exists(img2_path)):
            continue
        if id1 not in image_collection:
            image_collection[id1] = []
        image_collection[id1].append((id2, score))

    # Build video_collection: for each image, group subsequent images if:
    #  1. Their timestamp difference is at most max_interval.
    #  2. Their name's first character is the same as the current image.
    video_collection = {}
    for i, image in enumerate(images):
        img_path = osp.join(scene_dir, "images", image + ".jpg")
        if not osp.exists(img_path):
            continue
        video_collection[i] = []
        for j in range(i + 1, len(images)):
            next_img_path = osp.join(scene_dir, "images", images[j] + ".jpg")
            if not osp.exists(next_img_path):
                continue
            if (
                get_timestamp(images[j]) - get_timestamp(image) > max_interval
                or images[j][0] != image[0]
            ):
                break
            video_collection[i].append(j)

    # Save the updated metadata to a new file.
    out_path = osp.join(scene_dir, "new_scene_metadata.npz")
    np.savez(
        out_path,
        images=images,
        trajectories=trajectories,
        intrinsics=intrinsics,
        pairs=pairs,
        image_collection=image_collection,
        video_collection=video_collection,
    )
    print(f"Processed scene: {scene}")


def main(args):
    root = args.root
    max_interval = args.max_interval
    num_workers = args.num_workers

    # Load the list of scenes from the 'all_metadata.npz' file.
    all_metadata_path = osp.join(root, "all_metadata.npz")
    with np.load(all_metadata_path, allow_pickle=True) as data:
        scenes = data["scenes"]

    # Process scenes in parallel.
    futures = []
    with ThreadPoolExecutor(max_workers=num_workers) as executor:
        for scene in scenes:
            futures.append(executor.submit(process_scene, root, scene, max_interval))
        for future in tqdm(
            as_completed(futures), total=len(futures), desc="Processing scenes"
        ):
            # This will raise any exceptions from process_scene.
            future.result()


if __name__ == "__main__":
    parser = argparse.ArgumentParser(
        description="Preprocess processed_scannetpp scenes to update scene metadata."
    )
    parser.add_argument(
        "--root",
        type=str,
        required=True,
        help="Root directory containing processed_scannetpp scene folders.",
    )
    parser.add_argument(
        "--max_interval",
        type=int,
        default=150,
        help="Maximum timestamp interval for grouping images (default: 150).",
    )
    parser.add_argument(
        "--num_workers",
        type=int,
        default=8,
        help="Number of worker threads for parallel processing (default: 8).",
    )
    args = parser.parse_args()
    main(args)