File size: 4,086 Bytes
28c256d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the license found in the
# LICENSE file in the root directory of this source tree.

"""Get image metas on a specific dataset.

Here is an example to run this script.

Example:
    python tools/misc/get_image_metas.py ${CONFIG} \
    --out ${OUTPUT FILE NAME}
"""
import argparse
import csv
import os.path as osp
from multiprocessing import Pool

import mmcv
from mmengine.config import Config
from mmengine.fileio import dump, get


def parse_args():
    parser = argparse.ArgumentParser(description='Collect image metas')
    parser.add_argument('config', help='Config file path')
    parser.add_argument(
        '--dataset',
        default='val',
        choices=['train', 'val', 'test'],
        help='Collect image metas from which dataset')
    parser.add_argument(
        '--out',
        default='validation-image-metas.pkl',
        help='The output image metas file name. The save dir is in the '
        'same directory as `dataset.ann_file` path')
    parser.add_argument(
        '--nproc',
        default=4,
        type=int,
        help='Processes used for get image metas')
    args = parser.parse_args()
    return args


def get_metas_from_csv_style_ann_file(ann_file):
    data_infos = []
    cp_filename = None
    with open(ann_file, 'r') as f:
        reader = csv.reader(f)
        for i, line in enumerate(reader):
            if i == 0:
                continue
            img_id = line[0]
            filename = f'{img_id}.jpg'
            if filename != cp_filename:
                data_infos.append(dict(filename=filename))
                cp_filename = filename
    return data_infos


def get_metas_from_txt_style_ann_file(ann_file):
    with open(ann_file) as f:
        lines = f.readlines()
    i = 0
    data_infos = []
    while i < len(lines):
        filename = lines[i].rstrip()
        data_infos.append(dict(filename=filename))
        skip_lines = int(lines[i + 2]) + 3
        i += skip_lines
    return data_infos


def get_image_metas(data_info, img_prefix):
    filename = data_info.get('filename', None)
    if filename is not None:
        if img_prefix is not None:
            filename = osp.join(img_prefix, filename)
        img_bytes = get(filename)
        img = mmcv.imfrombytes(img_bytes, flag='color')
        shape = img.shape
        meta = dict(filename=filename, ori_shape=shape)
    else:
        raise NotImplementedError('Missing `filename` in data_info')
    return meta


def main():
    args = parse_args()
    assert args.out.endswith('pkl'), 'The output file name must be pkl suffix'

    # load config files
    cfg = Config.fromfile(args.config)
    dataloader_cfg = cfg.get(f'{args.dataset}_dataloader')
    ann_file = osp.join(dataloader_cfg.dataset.data_root,
                        dataloader_cfg.dataset.ann_file)
    img_prefix = osp.join(dataloader_cfg.dataset.data_root,
                          dataloader_cfg.dataset.data_prefix['img'])

    print(f'{"-" * 5} Start Processing {"-" * 5}')
    if ann_file.endswith('csv'):
        data_infos = get_metas_from_csv_style_ann_file(ann_file)
    elif ann_file.endswith('txt'):
        data_infos = get_metas_from_txt_style_ann_file(ann_file)
    else:
        shuffix = ann_file.split('.')[-1]
        raise NotImplementedError('File name must be csv or txt suffix but '
                                  f'get {shuffix}')

    print(f'Successfully load annotation file from {ann_file}')
    print(f'Processing {len(data_infos)} images...')
    pool = Pool(args.nproc)
    # get image metas with multiple processes
    image_metas = pool.starmap(
        get_image_metas,
        zip(data_infos, [img_prefix for _ in range(len(data_infos))]),
    )
    pool.close()

    # save image metas
    root_path = dataloader_cfg.dataset.ann_file.rsplit('/', 1)[0]
    save_path = osp.join(root_path, args.out)
    dump(image_metas, save_path, protocol=4)
    print(f'Image meta file save to: {save_path}')


if __name__ == '__main__':
    main()