File size: 5,991 Bytes
2df809d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
#!/usr/bin/env python3
"""
Preprocess processed_scannetpp scenes to update scene metadata.

This script reads each scene's "scene_metadata.npz", sorts images by timestamp,
updates trajectories, intrinsics, and pair indices, and builds two collections:
  - image_collection: For each image, stores pairs (other image index, score)
  - video_collection: For each image, groups subsequent images whose timestamps
                      differ by at most a given max_interval (and share the same
                      first character in the image name).

The new metadata is saved as "new_scene_metadata.npz" in each scene folder.

Usage:
    python generate_set_scannetpp.py --root /path/to/processed_scannetpp \
        --max_interval 150 --num_workers 8
"""

import os
import os.path as osp
import argparse
import numpy as np
from concurrent.futures import ThreadPoolExecutor, as_completed
from tqdm import tqdm


def get_timestamp(img_name):
    """
    Convert an image name to a timestamp (integer).

    If the image name starts with 'DSC', the timestamp is the integer part after 'DSC'.
    Otherwise, it is assumed the image name has an underscore, and the second element is used.

    Args:
        img_name (str): The image basename (without extension).

    Returns:
        int: The extracted timestamp.
    """
    if img_name.startswith("DSC"):
        return int(img_name[3:])
    else:
        return int(img_name.split("_")[1])


def process_scene(root, scene, max_interval):
    """
    Process a single scene: sort images, update trajectories/intrinsics/pairs, and
    form image and video collections. Save the updated metadata.

    Args:
        root (str): Root directory containing scene folders.
        scene (str): Scene folder name.
        max_interval (int): Maximum allowed difference (in timestamp units) for video grouping.
    """
    scene_dir = osp.join(root, scene)
    metadata_path = osp.join(scene_dir, "scene_metadata.npz")
    with np.load(metadata_path, allow_pickle=True) as data:
        images = data["images"]
        trajectories = data["trajectories"]
        intrinsics = data["intrinsics"]
        pairs = data["pairs"]

    # Sort images by timestamp.
    imgs_with_indices = sorted(enumerate(images), key=lambda x: x[1])
    indices, images = zip(*imgs_with_indices)
    indices = np.array(indices)
    index2sorted = {index: i for i, index in enumerate(indices)}

    # Update trajectories and intrinsics arrays according to the new order.
    trajectories = trajectories[indices]
    intrinsics = intrinsics[indices]

    # Update pairs (each pair is (id1, id2, score)) with new indices.
    pairs = [(index2sorted[id1], index2sorted[id2], score) for id1, id2, score in pairs]

    # Build image_collection: for each pair, verify that both image files exist.
    image_collection = {}
    for id1, id2, score in pairs:
        img1 = images[id1]
        img2 = images[id2]
        img1_path = osp.join(scene_dir, "images", img1 + ".jpg")
        img2_path = osp.join(scene_dir, "images", img2 + ".jpg")
        if not (osp.exists(img1_path) and osp.exists(img2_path)):
            continue
        if id1 not in image_collection:
            image_collection[id1] = []
        image_collection[id1].append((id2, score))

    # Build video_collection: for each image, group subsequent images if:
    #  1. Their timestamp difference is at most max_interval.
    #  2. Their name's first character is the same as the current image.
    video_collection = {}
    for i, image in enumerate(images):
        img_path = osp.join(scene_dir, "images", image + ".jpg")
        if not osp.exists(img_path):
            continue
        video_collection[i] = []
        for j in range(i + 1, len(images)):
            next_img_path = osp.join(scene_dir, "images", images[j] + ".jpg")
            if not osp.exists(next_img_path):
                continue
            if (
                get_timestamp(images[j]) - get_timestamp(image) > max_interval
                or images[j][0] != image[0]
            ):
                break
            video_collection[i].append(j)

    # Save the updated metadata to a new file.
    out_path = osp.join(scene_dir, "new_scene_metadata.npz")
    np.savez(
        out_path,
        images=images,
        trajectories=trajectories,
        intrinsics=intrinsics,
        pairs=pairs,
        image_collection=image_collection,
        video_collection=video_collection,
    )
    print(f"Processed scene: {scene}")


def main(args):
    root = args.root
    max_interval = args.max_interval
    num_workers = args.num_workers

    # Load the list of scenes from the 'all_metadata.npz' file.
    all_metadata_path = osp.join(root, "all_metadata.npz")
    with np.load(all_metadata_path, allow_pickle=True) as data:
        scenes = data["scenes"]

    # Process scenes in parallel.
    futures = []
    with ThreadPoolExecutor(max_workers=num_workers) as executor:
        for scene in scenes:
            futures.append(executor.submit(process_scene, root, scene, max_interval))
        for future in tqdm(
            as_completed(futures), total=len(futures), desc="Processing scenes"
        ):
            # This will raise any exceptions from process_scene.
            future.result()


if __name__ == "__main__":
    parser = argparse.ArgumentParser(
        description="Preprocess processed_scannetpp scenes to update scene metadata."
    )
    parser.add_argument(
        "--root",
        type=str,
        required=True,
        help="Root directory containing processed_scannetpp scene folders.",
    )
    parser.add_argument(
        "--max_interval",
        type=int,
        default=150,
        help="Maximum timestamp interval for grouping images (default: 150).",
    )
    parser.add_argument(
        "--num_workers",
        type=int,
        default=8,
        help="Number of worker threads for parallel processing (default: 8).",
    )
    args = parser.parse_args()
    main(args)