File size: 2,686 Bytes
d1df841
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
import os
import random
import shutil
import glob

from tqdm import tqdm

def sample_images_and_features(image_folder, feature_folder, sample_size, dest_image_folder, dest_feature_folder):
    """
    Randomly samples a specified number of resized images along with their corresponding
    CLIP and caption features, and copies them to new folders.

    Args:
        image_folder (str): Path to the folder containing resized images.
        feature_folder (str): Path to the folder containing feature files.
        sample_size (int): Number of images to sample.
        dest_image_folder (str): Destination folder for sampled images.
        dest_feature_folder (str): Destination folder for sampled feature files.
    """

    # Ensure destination folders exist
    os.makedirs(dest_image_folder, exist_ok=True)
    os.makedirs(dest_feature_folder, exist_ok=True)

    # Get all resized image file names
    image_files = glob.glob(os.path.join(image_folder, "resized_*.jpg"))
    image_files.extend(glob.glob(os.path.join(image_folder, "resized_*.png")))
    image_files.extend(glob.glob(os.path.join(image_folder, "resized_*.jpeg")))

    # Check if there are enough images
    if len(image_files) < sample_size:
        raise ValueError("Not enough resized images in the source folder.")

    # Sample a subset of image files
    sampled_images = random.sample(image_files, sample_size)

    # Copy images and corresponding feature files
    for image_path in tqdm(sampled_images):
        image_name = os.path.basename(image_path)
        base_name, _ = os.path.splitext(image_name)

        # Construct paths for CLIP and caption feature files
        clip_feature_path = os.path.join(feature_folder, f"{base_name}_clip.npy")
        caption_feature_path = os.path.join(feature_folder, f"{base_name}_caption.npy")

        # Copy image file
        shutil.copy2(image_path, dest_image_folder)  # copy2 preserves metadata

        # Copy feature files (if they exist)
        if os.path.exists(clip_feature_path):
            shutil.copy2(clip_feature_path, dest_feature_folder)
        if os.path.exists(caption_feature_path):
            shutil.copy2(caption_feature_path, dest_feature_folder)

if __name__ == "__main__":
    from pathlib import Path

    PROJECT_ROOT = Path(__file__).resolve().parent
    image_folder = str(PROJECT_ROOT / "data/images")
    feature_folder = str(PROJECT_ROOT / "data/features")
    sample_size = 10
    dest_image_folder = str(PROJECT_ROOT / "data_temp/images")
    dest_feature_folder = str(PROJECT_ROOT / "data_temp/features")
    sample_images_and_features(image_folder, feature_folder, sample_size, dest_image_folder, dest_feature_folder)