Spaces:
Build error
Build error
File size: 3,228 Bytes
463b952 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 |
import os
import glob
from pathlib import Path
from datetime import date
from collections import defaultdict
from warnings import warn
from path_utils import *
def merge_cats_get_id(cats, this_cat):
cat_nms = [c['name'] for c in cats]
if this_cat['name'] not in cat_nms:
this_cat['id'] = len(cats) + 1
cats.append(this_cat)
return this_cat["id"]
else:
return this_cat["id"]
def filter_images(images, annotations):
img_ids_from_anns = [ann['image_id'] for ann in annotations]
images_ = [
img_info for img_info in images if img_info['id'] in img_ids_from_anns
]
return images_
def merge(jsons, img_roots, output_dir, output_nm="merged", verbose=True):
assert len(jsons) == len(img_roots)
out_dir_path = Path(output_dir)
out_imgs_dir_path = out_dir_path / "images"
merged_img_id_state = 1
merged_ann_id_state = 1
merged_names = []
merged_dict = {
"info" : {"description": "", "data_created": f"{date.today():%Y/%m/%d}"},
"annotations": [],
"categories" : [],
"images" : []
}
for i, (json_path, imgs_dir_path) in enumerate(zip(jsons, img_roots)):
coco_dict = read_coco_json(json_path)
dataset_name = get_setname(json_path)
merged_names.append(dataset_name)
# categories
cat_id_old2new = {}
for cat in coco_dict['categories']:
old_cat_id = cat['id']
new_cat_id = merge_cats_get_id(merged_dict['categories'], cat)
cat_id_old2new[old_cat_id] = new_cat_id
# images
coco_dict['images'] = filter_images(
coco_dict['images'], coco_dict['annotations']
)
img_id_old2new = {}
for img in coco_dict['images']:
img_id_old2new[img["id"]] = merged_img_id_state
img["id"] = merged_img_id_state
old_img_path = Path(imgs_dir_path) / img['file_name']
img['file_name'] = dataset_name + "_" + img['file_name']
new_img_path = out_imgs_dir_path / img['file_name']
assure_copy(old_img_path, new_img_path)
merged_img_id_state += 1
merged_dict['images'].append(img)
# annotations
for ann in coco_dict['annotations']:
ann['id'] = merged_ann_id_state
ann['image_id'] = img_id_old2new[ann['image_id']]
ann['category_id'] = cat_id_old2new[ann['category_id']]
merged_ann_id_state += 1
merged_dict['annotations'].append(ann)
merged_dict["info"]["description"] = "+".join(merged_names)
out_json = out_dir_path / f"{output_nm}.json"
write_json(out_json, merged_dict)
if verbose:
print(f"Number of images: {len(merged_dict['images'])}")
print(f"Number of annotations: {len(merged_dict['annotations'])}")
if __name__ == '__main__':
paths2images = []
paths2json = []
for dataset in glob.glob("dataset_*"):
paths2images.append(os.path.join(dataset, "images"))
paths2json.append(os.path.join(dataset, "annotations/instances_default.json"))
merge(paths2json, paths2images, './merged_cocos', 'merged', verbose=True)
|