Spaces:
Build error
Build error
import os | |
import glob | |
from pathlib import Path | |
from datetime import date | |
from collections import defaultdict | |
from warnings import warn | |
from path_utils import * | |
def merge_cats_get_id(cats, this_cat): | |
cat_nms = [c['name'] for c in cats] | |
if this_cat['name'] not in cat_nms: | |
this_cat['id'] = len(cats) + 1 | |
cats.append(this_cat) | |
return this_cat["id"] | |
else: | |
return this_cat["id"] | |
def filter_images(images, annotations): | |
img_ids_from_anns = [ann['image_id'] for ann in annotations] | |
images_ = [ | |
img_info for img_info in images if img_info['id'] in img_ids_from_anns | |
] | |
return images_ | |
def merge(jsons, img_roots, output_dir, output_nm="merged", verbose=True): | |
assert len(jsons) == len(img_roots) | |
out_dir_path = Path(output_dir) | |
out_imgs_dir_path = out_dir_path / "images" | |
merged_img_id_state = 1 | |
merged_ann_id_state = 1 | |
merged_names = [] | |
merged_dict = { | |
"info" : {"description": "", "data_created": f"{date.today():%Y/%m/%d}"}, | |
"annotations": [], | |
"categories" : [], | |
"images" : [] | |
} | |
for i, (json_path, imgs_dir_path) in enumerate(zip(jsons, img_roots)): | |
coco_dict = read_coco_json(json_path) | |
dataset_name = get_setname(json_path) | |
merged_names.append(dataset_name) | |
# categories | |
cat_id_old2new = {} | |
for cat in coco_dict['categories']: | |
old_cat_id = cat['id'] | |
new_cat_id = merge_cats_get_id(merged_dict['categories'], cat) | |
cat_id_old2new[old_cat_id] = new_cat_id | |
# images | |
coco_dict['images'] = filter_images( | |
coco_dict['images'], coco_dict['annotations'] | |
) | |
img_id_old2new = {} | |
for img in coco_dict['images']: | |
img_id_old2new[img["id"]] = merged_img_id_state | |
img["id"] = merged_img_id_state | |
old_img_path = Path(imgs_dir_path) / img['file_name'] | |
img['file_name'] = dataset_name + "_" + img['file_name'] | |
new_img_path = out_imgs_dir_path / img['file_name'] | |
assure_copy(old_img_path, new_img_path) | |
merged_img_id_state += 1 | |
merged_dict['images'].append(img) | |
# annotations | |
for ann in coco_dict['annotations']: | |
ann['id'] = merged_ann_id_state | |
ann['image_id'] = img_id_old2new[ann['image_id']] | |
ann['category_id'] = cat_id_old2new[ann['category_id']] | |
merged_ann_id_state += 1 | |
merged_dict['annotations'].append(ann) | |
merged_dict["info"]["description"] = "+".join(merged_names) | |
out_json = out_dir_path / f"{output_nm}.json" | |
write_json(out_json, merged_dict) | |
if verbose: | |
print(f"Number of images: {len(merged_dict['images'])}") | |
print(f"Number of annotations: {len(merged_dict['annotations'])}") | |
if __name__ == '__main__': | |
paths2images = [] | |
paths2json = [] | |
for dataset in glob.glob("dataset_*"): | |
paths2images.append(os.path.join(dataset, "images")) | |
paths2json.append(os.path.join(dataset, "annotations/instances_default.json")) | |
merge(paths2json, paths2images, './merged_cocos', 'merged', verbose=True) | |