import os import glob from pathlib import Path from datetime import date from collections import defaultdict from warnings import warn from path_utils import * def merge_cats_get_id(cats, this_cat): cat_nms = [c['name'] for c in cats] if this_cat['name'] not in cat_nms: this_cat['id'] = len(cats) + 1 cats.append(this_cat) return this_cat["id"] else: return this_cat["id"] def filter_images(images, annotations): img_ids_from_anns = [ann['image_id'] for ann in annotations] images_ = [ img_info for img_info in images if img_info['id'] in img_ids_from_anns ] return images_ def merge(jsons, img_roots, output_dir, output_nm="merged", verbose=True): assert len(jsons) == len(img_roots) out_dir_path = Path(output_dir) out_imgs_dir_path = out_dir_path / "images" merged_img_id_state = 1 merged_ann_id_state = 1 merged_names = [] merged_dict = { "info" : {"description": "", "data_created": f"{date.today():%Y/%m/%d}"}, "annotations": [], "categories" : [], "images" : [] } for i, (json_path, imgs_dir_path) in enumerate(zip(jsons, img_roots)): coco_dict = read_coco_json(json_path) dataset_name = get_setname(json_path) merged_names.append(dataset_name) # categories cat_id_old2new = {} for cat in coco_dict['categories']: old_cat_id = cat['id'] new_cat_id = merge_cats_get_id(merged_dict['categories'], cat) cat_id_old2new[old_cat_id] = new_cat_id # images coco_dict['images'] = filter_images( coco_dict['images'], coco_dict['annotations'] ) img_id_old2new = {} for img in coco_dict['images']: img_id_old2new[img["id"]] = merged_img_id_state img["id"] = merged_img_id_state old_img_path = Path(imgs_dir_path) / img['file_name'] img['file_name'] = dataset_name + "_" + img['file_name'] new_img_path = out_imgs_dir_path / img['file_name'] assure_copy(old_img_path, new_img_path) merged_img_id_state += 1 merged_dict['images'].append(img) # annotations for ann in coco_dict['annotations']: ann['id'] = merged_ann_id_state ann['image_id'] = img_id_old2new[ann['image_id']] ann['category_id'] = cat_id_old2new[ann['category_id']] merged_ann_id_state += 1 merged_dict['annotations'].append(ann) merged_dict["info"]["description"] = "+".join(merged_names) out_json = out_dir_path / f"{output_nm}.json" write_json(out_json, merged_dict) if verbose: print(f"Number of images: {len(merged_dict['images'])}") print(f"Number of annotations: {len(merged_dict['annotations'])}") if __name__ == '__main__': paths2images = [] paths2json = [] for dataset in glob.glob("dataset_*"): paths2images.append(os.path.join(dataset, "images")) paths2json.append(os.path.join(dataset, "annotations/instances_default.json")) merge(paths2json, paths2images, './merged_cocos', 'merged', verbose=True)