get_image_metas.py
3.44 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
# Copyright (c) OpenMMLab. All rights reserved.
"""Get test image metas on a specific dataset.
Here is an example to run this script.
Example:
python tools/misc/get_image_metas.py ${CONFIG} \
--out ${OUTPUT FILE NAME}
"""
import argparse
import csv
import os.path as osp
from multiprocessing import Pool
import mmcv
from mmcv import Config
def parse_args():
parser = argparse.ArgumentParser(description='Collect image metas')
parser.add_argument('config', help='Config file path')
parser.add_argument(
'--out',
default='validation-image-metas.pkl',
help='The output image metas file name. The save dir is in the '
'same directory as `dataset.ann_file` path')
parser.add_argument(
'--nproc',
default=4,
type=int,
help='Processes used for get image metas')
args = parser.parse_args()
return args
def get_metas_from_csv_style_ann_file(ann_file):
data_infos = []
cp_filename = None
with open(ann_file, 'r') as f:
reader = csv.reader(f)
for i, line in enumerate(reader):
if i == 0:
continue
img_id = line[0]
filename = f'{img_id}.jpg'
if filename != cp_filename:
data_infos.append(dict(filename=filename))
cp_filename = filename
return data_infos
def get_metas_from_txt_style_ann_file(ann_file):
with open(ann_file) as f:
lines = f.readlines()
i = 0
data_infos = []
while i < len(lines):
filename = lines[i].rstrip()
data_infos.append(dict(filename=filename))
skip_lines = int(lines[i + 2]) + 3
i += skip_lines
return data_infos
def get_image_metas(data_info, img_prefix):
file_client = mmcv.FileClient(backend='disk')
filename = data_info.get('filename', None)
if filename is not None:
if img_prefix is not None:
filename = osp.join(img_prefix, filename)
img_bytes = file_client.get(filename)
img = mmcv.imfrombytes(img_bytes, flag='color')
meta = dict(filename=filename, ori_shape=img.shape)
else:
raise NotImplementedError('Missing `filename` in data_info')
return meta
def main():
args = parse_args()
assert args.out.endswith('pkl'), 'The output file name must be pkl suffix'
# load config files
cfg = Config.fromfile(args.config)
ann_file = cfg.data.test.ann_file
img_prefix = cfg.data.test.img_prefix
print(f'{"-" * 5} Start Processing {"-" * 5}')
if ann_file.endswith('csv'):
data_infos = get_metas_from_csv_style_ann_file(ann_file)
elif ann_file.endswith('txt'):
data_infos = get_metas_from_txt_style_ann_file(ann_file)
else:
shuffix = ann_file.split('.')[-1]
raise NotImplementedError('File name must be csv or txt suffix but '
f'get {shuffix}')
print(f'Successfully load annotation file from {ann_file}')
print(f'Processing {len(data_infos)} images...')
pool = Pool(args.nproc)
# get image metas with multiple processes
image_metas = pool.starmap(
get_image_metas,
zip(data_infos, [img_prefix for _ in range(len(data_infos))]),
)
pool.close()
# save image metas
root_path = cfg.data.test.ann_file.rsplit('/', 1)[0]
save_path = osp.join(root_path, args.out)
mmcv.dump(image_metas, save_path)
print(f'Image meta file save to: {save_path}')
if __name__ == '__main__':
main()