# Copyright (c) Meta Platforms, Inc. and affiliates. # All rights reserved. # # This source code is licensed under the license found in the # LICENSE file in the root directory of this source tree. """Get image metas on a specific dataset. Here is an example to run this script. Example: python tools/misc/get_image_metas.py ${CONFIG} \ --out ${OUTPUT FILE NAME} """ import argparse import csv import os.path as osp from multiprocessing import Pool import mmcv from mmengine.config import Config from mmengine.fileio import dump, get def parse_args(): parser = argparse.ArgumentParser(description='Collect image metas') parser.add_argument('config', help='Config file path') parser.add_argument( '--dataset', default='val', choices=['train', 'val', 'test'], help='Collect image metas from which dataset') parser.add_argument( '--out', default='validation-image-metas.pkl', help='The output image metas file name. The save dir is in the ' 'same directory as `dataset.ann_file` path') parser.add_argument( '--nproc', default=4, type=int, help='Processes used for get image metas') args = parser.parse_args() return args def get_metas_from_csv_style_ann_file(ann_file): data_infos = [] cp_filename = None with open(ann_file, 'r') as f: reader = csv.reader(f) for i, line in enumerate(reader): if i == 0: continue img_id = line[0] filename = f'{img_id}.jpg' if filename != cp_filename: data_infos.append(dict(filename=filename)) cp_filename = filename return data_infos def get_metas_from_txt_style_ann_file(ann_file): with open(ann_file) as f: lines = f.readlines() i = 0 data_infos = [] while i < len(lines): filename = lines[i].rstrip() data_infos.append(dict(filename=filename)) skip_lines = int(lines[i + 2]) + 3 i += skip_lines return data_infos def get_image_metas(data_info, img_prefix): filename = data_info.get('filename', None) if filename is not None: if img_prefix is not None: filename = osp.join(img_prefix, filename) img_bytes = get(filename) img = mmcv.imfrombytes(img_bytes, flag='color') shape = img.shape meta = dict(filename=filename, ori_shape=shape) else: raise NotImplementedError('Missing `filename` in data_info') return meta def main(): args = parse_args() assert args.out.endswith('pkl'), 'The output file name must be pkl suffix' # load config files cfg = Config.fromfile(args.config) dataloader_cfg = cfg.get(f'{args.dataset}_dataloader') ann_file = osp.join(dataloader_cfg.dataset.data_root, dataloader_cfg.dataset.ann_file) img_prefix = osp.join(dataloader_cfg.dataset.data_root, dataloader_cfg.dataset.data_prefix['img']) print(f'{"-" * 5} Start Processing {"-" * 5}') if ann_file.endswith('csv'): data_infos = get_metas_from_csv_style_ann_file(ann_file) elif ann_file.endswith('txt'): data_infos = get_metas_from_txt_style_ann_file(ann_file) else: shuffix = ann_file.split('.')[-1] raise NotImplementedError('File name must be csv or txt suffix but ' f'get {shuffix}') print(f'Successfully load annotation file from {ann_file}') print(f'Processing {len(data_infos)} images...') pool = Pool(args.nproc) # get image metas with multiple processes image_metas = pool.starmap( get_image_metas, zip(data_infos, [img_prefix for _ in range(len(data_infos))]), ) pool.close() # save image metas root_path = dataloader_cfg.dataset.ann_file.rsplit('/', 1)[0] save_path = osp.join(root_path, args.out) dump(image_metas, save_path, protocol=4) print(f'Image meta file save to: {save_path}') if __name__ == '__main__': main()