AIEM / trainer /utils /merge_cocos.py
lhhj
initial ppush
463b952
raw
history blame
3.23 kB
import os
import glob
from pathlib import Path
from datetime import date
from collections import defaultdict
from warnings import warn
from path_utils import *
def merge_cats_get_id(cats, this_cat):
cat_nms = [c['name'] for c in cats]
if this_cat['name'] not in cat_nms:
this_cat['id'] = len(cats) + 1
cats.append(this_cat)
return this_cat["id"]
else:
return this_cat["id"]
def filter_images(images, annotations):
img_ids_from_anns = [ann['image_id'] for ann in annotations]
images_ = [
img_info for img_info in images if img_info['id'] in img_ids_from_anns
]
return images_
def merge(jsons, img_roots, output_dir, output_nm="merged", verbose=True):
assert len(jsons) == len(img_roots)
out_dir_path = Path(output_dir)
out_imgs_dir_path = out_dir_path / "images"
merged_img_id_state = 1
merged_ann_id_state = 1
merged_names = []
merged_dict = {
"info" : {"description": "", "data_created": f"{date.today():%Y/%m/%d}"},
"annotations": [],
"categories" : [],
"images" : []
}
for i, (json_path, imgs_dir_path) in enumerate(zip(jsons, img_roots)):
coco_dict = read_coco_json(json_path)
dataset_name = get_setname(json_path)
merged_names.append(dataset_name)
# categories
cat_id_old2new = {}
for cat in coco_dict['categories']:
old_cat_id = cat['id']
new_cat_id = merge_cats_get_id(merged_dict['categories'], cat)
cat_id_old2new[old_cat_id] = new_cat_id
# images
coco_dict['images'] = filter_images(
coco_dict['images'], coco_dict['annotations']
)
img_id_old2new = {}
for img in coco_dict['images']:
img_id_old2new[img["id"]] = merged_img_id_state
img["id"] = merged_img_id_state
old_img_path = Path(imgs_dir_path) / img['file_name']
img['file_name'] = dataset_name + "_" + img['file_name']
new_img_path = out_imgs_dir_path / img['file_name']
assure_copy(old_img_path, new_img_path)
merged_img_id_state += 1
merged_dict['images'].append(img)
# annotations
for ann in coco_dict['annotations']:
ann['id'] = merged_ann_id_state
ann['image_id'] = img_id_old2new[ann['image_id']]
ann['category_id'] = cat_id_old2new[ann['category_id']]
merged_ann_id_state += 1
merged_dict['annotations'].append(ann)
merged_dict["info"]["description"] = "+".join(merged_names)
out_json = out_dir_path / f"{output_nm}.json"
write_json(out_json, merged_dict)
if verbose:
print(f"Number of images: {len(merged_dict['images'])}")
print(f"Number of annotations: {len(merged_dict['annotations'])}")
if __name__ == '__main__':
paths2images = []
paths2json = []
for dataset in glob.glob("dataset_*"):
paths2images.append(os.path.join(dataset, "images"))
paths2json.append(os.path.join(dataset, "annotations/instances_default.json"))
merge(paths2json, paths2images, './merged_cocos', 'merged', verbose=True)