File size: 3,228 Bytes
463b952
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
import os
import glob
from pathlib import Path
from datetime import date
from collections import defaultdict 
from warnings import warn 

from path_utils import *

def merge_cats_get_id(cats, this_cat):
    cat_nms = [c['name'] for c in cats]
    if this_cat['name'] not in cat_nms:
        this_cat['id'] = len(cats) + 1
        cats.append(this_cat)
        return this_cat["id"]
    else:
        return this_cat["id"]
    

def filter_images(images, annotations):
    img_ids_from_anns = [ann['image_id'] for ann in annotations]
    images_ = [
        img_info for img_info in images if img_info['id'] in img_ids_from_anns
    ]
    return images_


def merge(jsons, img_roots, output_dir, output_nm="merged", verbose=True):
    assert len(jsons) == len(img_roots)

    out_dir_path = Path(output_dir)
    out_imgs_dir_path = out_dir_path / "images"
    
    merged_img_id_state = 1
    merged_ann_id_state = 1
    merged_names = []
    merged_dict = {
        "info"       : {"description": "", "data_created": f"{date.today():%Y/%m/%d}"},
        "annotations": [],
        "categories" : [],
        "images"     : [] 
    }
    for i, (json_path, imgs_dir_path) in enumerate(zip(jsons, img_roots)):
        coco_dict = read_coco_json(json_path)
        dataset_name = get_setname(json_path)
        merged_names.append(dataset_name)

        # categories
        cat_id_old2new = {}
        for cat in coco_dict['categories']:
            old_cat_id = cat['id']
            new_cat_id = merge_cats_get_id(merged_dict['categories'], cat)
            cat_id_old2new[old_cat_id] = new_cat_id
        
        # images
        coco_dict['images'] = filter_images(
            coco_dict['images'], coco_dict['annotations']
        )
        img_id_old2new = {}
        for img in coco_dict['images']:
            img_id_old2new[img["id"]] = merged_img_id_state
            img["id"] = merged_img_id_state

            old_img_path = Path(imgs_dir_path) / img['file_name']
            img['file_name'] = dataset_name + "_" + img['file_name']
            new_img_path = out_imgs_dir_path / img['file_name']
            assure_copy(old_img_path, new_img_path)

            merged_img_id_state += 1
            merged_dict['images'].append(img)

        # annotations
        for ann in coco_dict['annotations']:
            ann['id'] = merged_ann_id_state
            ann['image_id'] = img_id_old2new[ann['image_id']]
            ann['category_id'] = cat_id_old2new[ann['category_id']]

            merged_ann_id_state += 1
            merged_dict['annotations'].append(ann)

    merged_dict["info"]["description"] = "+".join(merged_names)

    out_json = out_dir_path / f"{output_nm}.json"
    write_json(out_json, merged_dict)

    if verbose:
        print(f"Number of images: {len(merged_dict['images'])}")
        print(f"Number of annotations: {len(merged_dict['annotations'])}")


if __name__ == '__main__':
    paths2images = []
    paths2json = []
    for dataset in glob.glob("dataset_*"):
        paths2images.append(os.path.join(dataset, "images"))
        paths2json.append(os.path.join(dataset, "annotations/instances_default.json"))

    merge(paths2json, paths2images, './merged_cocos', 'merged', verbose=True)