Spaces:
Sleeping
Sleeping
File size: 2,686 Bytes
d1df841 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 |
import os
import random
import shutil
import glob
from tqdm import tqdm
def sample_images_and_features(image_folder, feature_folder, sample_size, dest_image_folder, dest_feature_folder):
"""
Randomly samples a specified number of resized images along with their corresponding
CLIP and caption features, and copies them to new folders.
Args:
image_folder (str): Path to the folder containing resized images.
feature_folder (str): Path to the folder containing feature files.
sample_size (int): Number of images to sample.
dest_image_folder (str): Destination folder for sampled images.
dest_feature_folder (str): Destination folder for sampled feature files.
"""
# Ensure destination folders exist
os.makedirs(dest_image_folder, exist_ok=True)
os.makedirs(dest_feature_folder, exist_ok=True)
# Get all resized image file names
image_files = glob.glob(os.path.join(image_folder, "resized_*.jpg"))
image_files.extend(glob.glob(os.path.join(image_folder, "resized_*.png")))
image_files.extend(glob.glob(os.path.join(image_folder, "resized_*.jpeg")))
# Check if there are enough images
if len(image_files) < sample_size:
raise ValueError("Not enough resized images in the source folder.")
# Sample a subset of image files
sampled_images = random.sample(image_files, sample_size)
# Copy images and corresponding feature files
for image_path in tqdm(sampled_images):
image_name = os.path.basename(image_path)
base_name, _ = os.path.splitext(image_name)
# Construct paths for CLIP and caption feature files
clip_feature_path = os.path.join(feature_folder, f"{base_name}_clip.npy")
caption_feature_path = os.path.join(feature_folder, f"{base_name}_caption.npy")
# Copy image file
shutil.copy2(image_path, dest_image_folder) # copy2 preserves metadata
# Copy feature files (if they exist)
if os.path.exists(clip_feature_path):
shutil.copy2(clip_feature_path, dest_feature_folder)
if os.path.exists(caption_feature_path):
shutil.copy2(caption_feature_path, dest_feature_folder)
if __name__ == "__main__":
from pathlib import Path
PROJECT_ROOT = Path(__file__).resolve().parent
image_folder = str(PROJECT_ROOT / "data/images")
feature_folder = str(PROJECT_ROOT / "data/features")
sample_size = 10
dest_image_folder = str(PROJECT_ROOT / "data_temp/images")
dest_feature_folder = str(PROJECT_ROOT / "data_temp/features")
sample_images_and_features(image_folder, feature_folder, sample_size, dest_image_folder, dest_feature_folder) |