8cec0024 by yy_think

init

0 parents
Showing 1000 changed files with 4718 additions and 0 deletions

Too many changes to show.

To preserve performance only 1000 of 1000+ files are displayed.

1 ### Example user template template
2 ### Example user template
3
4 # IntelliJ project files
5 .idea
6 *.iml
7 out
8 gen
9 map
1 # detect config file
2 config = dict(
3
4 data=dict(
5 image_files='test.txt', # 需要检测的图像 .txt/dir/.jpg/.png
6 ),
7 model=dict(
8 cuda='cpu',
9 class_txt='config/id/id_classes.txt',
10 config_files=['config/id/cascade_rcnn_r101_fpn_1x_coco_multiscale.py', 'config/id/faster_rcnn_r101_fpn_2x_coco_multiscale.py'],
11 checkpoint_files=['model/id/cascade_rcnn_r101_fpn_1x_multiscale_epoch_20.pth', 'model/id/faster_rcnn_r101_fpn_2x_multiscale_epoch_24.pth'],
12 ),
13 fusion=dict(
14 type=['weighted_boxes_fusion'],
15 class_list=[[1, 5], [0, 2, 3, 4]],
16 weight_list=[1, 1], # 不同模型所占权重
17 iou=0.5, # 融合iou设置
18 score=0.4,
19 skip_box_thr=0.0001 # 排除低于此阈值score的box
20 ),
21 show=False,
22 save_txt=True,
23 save_image=True,
24 out_dir='../out/test_20220516'
25 )
26
27
28 def load_config():
29 return config
1 model = dict(
2 type='FasterRCNN',
3 backbone=dict(
4 type='ResNet',
5 depth=101,
6 num_stages=4,
7 out_indices=(0, 1, 2, 3),
8 frozen_stages=1,
9 norm_cfg=dict(type='BN', requires_grad=True),
10 norm_eval=True,
11 style='pytorch',
12 init_cfg=dict(type='Pretrained',
13 checkpoint='torchvision://resnet101')),
14 neck=dict(
15 type='FPN',
16 in_channels=[256, 512, 1024, 2048],
17 out_channels=256,
18 num_outs=5),
19 rpn_head=dict(
20 type='RPNHead',
21 in_channels=256,
22 feat_channels=256,
23 anchor_generator=dict(
24 type='AnchorGenerator',
25 scales=[8],
26 ratios=[0.5, 1.0, 2.0],
27 strides=[4, 8, 16, 32, 64]),
28 bbox_coder=dict(
29 type='DeltaXYWHBBoxCoder',
30 target_means=[0.0, 0.0, 0.0, 0.0],
31 target_stds=[1.0, 1.0, 1.0, 1.0]),
32 loss_cls=dict(
33 type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
34 loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
35 roi_head=dict(
36 type='StandardRoIHead',
37 bbox_roi_extractor=dict(
38 type='SingleRoIExtractor',
39 roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
40 out_channels=256,
41 featmap_strides=[4, 8, 16, 32]),
42 bbox_head=dict(
43 type='Shared2FCBBoxHead',
44 in_channels=256,
45 fc_out_channels=1024,
46 roi_feat_size=7,
47 num_classes=2,
48 bbox_coder=dict(
49 type='DeltaXYWHBBoxCoder',
50 target_means=[0.0, 0.0, 0.0, 0.0],
51 target_stds=[0.1, 0.1, 0.2, 0.2]),
52 reg_class_agnostic=False,
53 loss_cls=dict(
54 type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
55 loss_bbox=dict(type='L1Loss', loss_weight=1.0))),
56 train_cfg=dict(
57 rpn=dict(
58 assigner=dict(
59 type='MaxIoUAssigner',
60 pos_iou_thr=0.7,
61 neg_iou_thr=0.3,
62 min_pos_iou=0.3,
63 match_low_quality=True,
64 ignore_iof_thr=-1),
65 sampler=dict(
66 type='RandomSampler',
67 num=256,
68 pos_fraction=0.5,
69 neg_pos_ub=-1,
70 add_gt_as_proposals=False),
71 allowed_border=-1,
72 pos_weight=-1,
73 debug=False),
74 rpn_proposal=dict(
75 nms_pre=2000,
76 max_per_img=1000,
77 nms=dict(type='nms', iou_threshold=0.7),
78 min_bbox_size=0),
79 rcnn=dict(
80 assigner=dict(
81 type='MaxIoUAssigner',
82 pos_iou_thr=0.5,
83 neg_iou_thr=0.5,
84 min_pos_iou=0.5,
85 match_low_quality=False,
86 ignore_iof_thr=-1),
87 sampler=dict(
88 type='RandomSampler',
89 num=512,
90 pos_fraction=0.25,
91 neg_pos_ub=-1,
92 add_gt_as_proposals=True),
93 pos_weight=-1,
94 debug=False)),
95 test_cfg=dict(
96 rpn=dict(
97 nms_pre=1000,
98 max_per_img=1000,
99 nms=dict(type='nms', iou_threshold=0.7),
100 min_bbox_size=0),
101 rcnn=dict(
102 score_thr=0.05,
103 nms=dict(type='nms', iou_threshold=0.5),
104 max_per_img=100)))
105 dataset_type = 'CocoDataset'
106 data_root = '/home/dyy/annotations/id'
107 img_norm_cfg = dict(
108 mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
109 train_pipeline = [
110 dict(type='LoadImageFromFile'),
111 dict(type='LoadAnnotations', with_bbox=True),
112 dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
113 dict(type='RandomFlip', flip_ratio=0.5),
114 dict(
115 type='Normalize',
116 mean=[123.675, 116.28, 103.53],
117 std=[58.395, 57.12, 57.375],
118 to_rgb=True),
119 dict(type='Pad', size_divisor=32),
120 dict(type='DefaultFormatBundle'),
121 dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels'])
122 ]
123 test_pipeline = [
124 dict(type='LoadImageFromFile'),
125 dict(
126 type='MultiScaleFlipAug',
127 img_scale=(1333, 800),
128 flip=False,
129 transforms=[
130 dict(type='Resize', keep_ratio=True),
131 dict(type='RandomFlip'),
132 dict(
133 type='Normalize',
134 mean=[123.675, 116.28, 103.53],
135 std=[58.395, 57.12, 57.375],
136 to_rgb=True),
137 dict(type='Pad', size_divisor=32),
138 dict(type='ImageToTensor', keys=['img']),
139 dict(type='Collect', keys=['img'])
140 ])
141 ]
142 data = dict(
143 samples_per_gpu=2,
144 workers_per_gpu=2,
145 train=dict(
146 type='CocoDataset',
147 ann_file=
148 '/home/dyy/annotations/head/coco/annotations/instances_train2017.json',
149 img_prefix='/home/dyy/annotations/head/coco/train2017/',
150 pipeline=[
151 dict(type='LoadImageFromFile'),
152 dict(type='LoadAnnotations', with_bbox=True),
153 dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
154 dict(type='RandomFlip', flip_ratio=0.5),
155 dict(
156 type='Normalize',
157 mean=[123.675, 116.28, 103.53],
158 std=[58.395, 57.12, 57.375],
159 to_rgb=True),
160 dict(type='Pad', size_divisor=32),
161 dict(type='DefaultFormatBundle'),
162 dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels'])
163 ]),
164 val=dict(
165 type='CocoDataset',
166 ann_file=
167 '/home/dyy/annotations/head/coco/annotations/instances_val2017.json',
168 img_prefix='/home/dyy/annotations/head/coco/val2017/',
169 pipeline=[
170 dict(type='LoadImageFromFile'),
171 dict(
172 type='MultiScaleFlipAug',
173 img_scale=(1333, 800),
174 flip=False,
175 transforms=[
176 dict(type='Resize', keep_ratio=True),
177 dict(type='RandomFlip'),
178 dict(
179 type='Normalize',
180 mean=[123.675, 116.28, 103.53],
181 std=[58.395, 57.12, 57.375],
182 to_rgb=True),
183 dict(type='Pad', size_divisor=32),
184 dict(type='ImageToTensor', keys=['img']),
185 dict(type='Collect', keys=['img'])
186 ])
187 ]),
188 test=dict(
189 type='CocoDataset',
190 ann_file=
191 '/home/dyy/annotations/head/coco/annotations/instances_val2017.json',
192 img_prefix='/home/dyy/annotations/head/coco/val2017/',
193 pipeline=[
194 dict(type='LoadImageFromFile'),
195 dict(
196 type='MultiScaleFlipAug',
197 img_scale=(1333, 800),
198 flip=False,
199 transforms=[
200 dict(type='Resize', keep_ratio=True),
201 dict(type='RandomFlip'),
202 dict(
203 type='Normalize',
204 mean=[123.675, 116.28, 103.53],
205 std=[58.395, 57.12, 57.375],
206 to_rgb=True),
207 dict(type='Pad', size_divisor=32),
208 dict(type='ImageToTensor', keys=['img']),
209 dict(type='Collect', keys=['img'])
210 ])
211 ]))
212 evaluation = dict(interval=1, metric='bbox')
213 optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
214 optimizer_config = dict(grad_clip=None)
215 lr_config = dict(
216 policy='step',
217 warmup='linear',
218 warmup_iters=500,
219 warmup_ratio=0.001,
220 step=[16, 22])
221 runner = dict(type='EpochBasedRunner', max_epochs=24)
222 checkpoint_config = dict(interval=1)
223 log_config = dict(interval=50, hooks=[dict(type='TextLoggerHook')])
224 custom_hooks = [dict(type='NumClassCheckHook')]
225 dist_params = dict(backend='nccl')
226 log_level = 'INFO'
227 load_from = None
228 resume_from = None
229 workflow = [('train', 1)]
230 opencv_num_threads = 0
231 mp_start_method = 'fork'
232 work_dir = '/home/dyy/project/mmdetection/work_dirs/head/faster_rcnn_r101_fpn_2x_coco.py'
233 auto_resume = False
234 gpu_ids = [0]
1 head
2 hand
...\ No newline at end of file ...\ No newline at end of file
1 model = dict(
2 type='FasterRCNN',
3 backbone=dict(
4 type='ResNet',
5 depth=101,
6 num_stages=4,
7 out_indices=(0, 1, 2, 3),
8 frozen_stages=1,
9 norm_cfg=dict(type='BN', requires_grad=True),
10 norm_eval=True,
11 style='pytorch',
12 init_cfg=dict(type='Pretrained',
13 checkpoint='torchvision://resnet101')),
14 neck=dict(
15 type='FPN',
16 in_channels=[256, 512, 1024, 2048],
17 out_channels=256,
18 num_outs=5),
19 rpn_head=dict(
20 type='RPNHead',
21 in_channels=256,
22 feat_channels=256,
23 anchor_generator=dict(
24 type='AnchorGenerator',
25 scales=[8],
26 ratios=[0.5, 1.0, 2.0],
27 strides=[4, 8, 16, 32, 64]),
28 bbox_coder=dict(
29 type='DeltaXYWHBBoxCoder',
30 target_means=[0.0, 0.0, 0.0, 0.0],
31 target_stds=[1.0, 1.0, 1.0, 1.0]),
32 loss_cls=dict(
33 type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
34 loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
35 roi_head=dict(
36 type='StandardRoIHead',
37 bbox_roi_extractor=dict(
38 type='SingleRoIExtractor',
39 roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
40 out_channels=256,
41 featmap_strides=[4, 8, 16, 32]),
42 bbox_head=dict(
43 type='Shared2FCBBoxHead',
44 in_channels=256,
45 fc_out_channels=1024,
46 roi_feat_size=7,
47 num_classes=6,
48 bbox_coder=dict(
49 type='DeltaXYWHBBoxCoder',
50 target_means=[0.0, 0.0, 0.0, 0.0],
51 target_stds=[0.1, 0.1, 0.2, 0.2]),
52 reg_class_agnostic=False,
53 loss_cls=dict(
54 type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
55 loss_bbox=dict(type='L1Loss', loss_weight=1.0))),
56 train_cfg=dict(
57 rpn=dict(
58 assigner=dict(
59 type='MaxIoUAssigner',
60 pos_iou_thr=0.7,
61 neg_iou_thr=0.3,
62 min_pos_iou=0.3,
63 match_low_quality=True,
64 ignore_iof_thr=-1),
65 sampler=dict(
66 type='RandomSampler',
67 num=256,
68 pos_fraction=0.5,
69 neg_pos_ub=-1,
70 add_gt_as_proposals=False),
71 allowed_border=-1,
72 pos_weight=-1,
73 debug=False),
74 rpn_proposal=dict(
75 nms_pre=2000,
76 max_per_img=1000,
77 nms=dict(type='nms', iou_threshold=0.7),
78 min_bbox_size=0),
79 rcnn=dict(
80 assigner=dict(
81 type='MaxIoUAssigner',
82 pos_iou_thr=0.5,
83 neg_iou_thr=0.5,
84 min_pos_iou=0.5,
85 match_low_quality=False,
86 ignore_iof_thr=-1),
87 sampler=dict(
88 type='RandomSampler',
89 num=512,
90 pos_fraction=0.25,
91 neg_pos_ub=-1,
92 add_gt_as_proposals=True),
93 pos_weight=-1,
94 debug=False)),
95 test_cfg=dict(
96 rpn=dict(
97 nms_pre=1000,
98 max_per_img=1000,
99 nms=dict(type='nms', iou_threshold=0.7),
100 min_bbox_size=0),
101 rcnn=dict(
102 score_thr=0.05,
103 nms=dict(type='nms', iou_threshold=0.5),
104 max_per_img=100)))
105
106 dataset_type = 'CocoDataset'
107 data_root = '/home/situ/Disk6T/dyy/work/mmdetection/data/yuanjian'
108 img_norm_cfg = dict(
109 mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
110 train_pipeline = [
111 dict(type='LoadImageFromFile'),
112 dict(type='LoadAnnotations', with_bbox=True),
113 dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
114 dict(type='RandomFlip', flip_ratio=0.5),
115 dict(
116 type='Normalize',
117 mean=[123.675, 116.28, 103.53],
118 std=[58.395, 57.12, 57.375],
119 to_rgb=True),
120 dict(type='Pad', size_divisor=32),
121 dict(type='DefaultFormatBundle'),
122 dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels'])
123 ]
124 test_pipeline = [
125 dict(type='LoadImageFromFile'),
126 dict(
127 type='MultiScaleFlipAug',
128 img_scale=(1333, 800),
129 flip=False,
130 transforms=[
131 dict(type='Resize', keep_ratio=True),
132 dict(type='RandomFlip'),
133 dict(
134 type='Normalize',
135 mean=[123.675, 116.28, 103.53],
136 std=[58.395, 57.12, 57.375],
137 to_rgb=True),
138 dict(type='Pad', size_divisor=32),
139 dict(type='ImageToTensor', keys=['img']),
140 dict(type='Collect', keys=['img'])
141 ])
142 ]
143 data = dict(
144 samples_per_gpu=2,
145 workers_per_gpu=2,
146 train=dict(
147 type='CocoDataset',
148 ann_file='/home/situ/Disk6T/dyy/work/mmdetection/data/yuanjian/annotations/instances_train2017.json',
149 img_prefix='/home/situ/Disk6T/dyy/work/mmdetection/data/yuanjian/images/',
150 pipeline=[
151 dict(type='LoadImageFromFile'),
152 dict(type='LoadAnnotations', with_bbox=True),
153 dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
154 dict(type='RandomFlip', flip_ratio=0.5),
155 dict(
156 type='Normalize',
157 mean=[123.675, 116.28, 103.53],
158 std=[58.395, 57.12, 57.375],
159 to_rgb=True),
160 dict(type='Pad', size_divisor=32),
161 dict(type='DefaultFormatBundle'),
162 dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels'])
163 ]),
164 val=dict(
165 type='CocoDataset',
166 ann_file='/home/situ/Disk6T/dyy/work/mmdetection/data/yuanjian/annotations/instances_val2017.json',
167 img_prefix='/home/situ/Disk6T/dyy/work/mmdetection/data/yuanjian/images/',
168 pipeline=[
169 dict(type='LoadImageFromFile'),
170 dict(
171 type='MultiScaleFlipAug',
172 img_scale=(1333, 800),
173 flip=False,
174 transforms=[
175 dict(type='Resize', keep_ratio=True),
176 dict(type='RandomFlip'),
177 dict(
178 type='Normalize',
179 mean=[123.675, 116.28, 103.53],
180 std=[58.395, 57.12, 57.375],
181 to_rgb=True),
182 dict(type='Pad', size_divisor=32),
183 dict(type='ImageToTensor', keys=['img']),
184 dict(type='Collect', keys=['img'])
185 ])
186 ]),
187 test=dict(
188 type='CocoDataset',
189 ann_file='/home/situ/Disk6T/dyy/work/mmdetection/data/yuanjian/annotations/instances_val2017.json',
190 img_prefix='/home/situ/Disk6T/dyy/work/mmdetection/data/yuanjian/images/',
191 pipeline=[
192 dict(type='LoadImageFromFile'),
193 dict(
194 type='MultiScaleFlipAug',
195 img_scale=(1333, 800),
196 flip=False,
197 transforms=[
198 dict(type='Resize', keep_ratio=True),
199 dict(type='RandomFlip'),
200 dict(
201 type='Normalize',
202 mean=[123.675, 116.28, 103.53],
203 std=[58.395, 57.12, 57.375],
204 to_rgb=True),
205 dict(type='Pad', size_divisor=32),
206 dict(type='ImageToTensor', keys=['img']),
207 dict(type='Collect', keys=['img'])
208 ])
209 ]))
210 evaluation = dict(interval=1, metric='bbox')
211 optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
212 optimizer_config = dict(grad_clip=None)
213 lr_config = dict(
214 policy='step',
215 warmup='linear',
216 warmup_iters=500,
217 warmup_ratio=0.001,
218 step=[16, 22])
219 runner = dict(type='EpochBasedRunner', max_epochs=24)
220 checkpoint_config = dict(interval=1)
221 log_config = dict(interval=50, hooks=[dict(type='TextLoggerHook')])
222 custom_hooks = [dict(type='NumClassCheckHook')]
223 dist_params = dict(backend='nccl')
224 log_level = 'INFO'
225 load_from = None
226 resume_from = None
227 workflow = [('train', 1)]
228 opencv_num_threads = 0
229 mp_start_method = 'fork'
1 model = dict(
2 type='FasterRCNN',
3 backbone=dict(
4 type='ResNet',
5 depth=101,
6 num_stages=4,
7 out_indices=(0, 1, 2, 3),
8 frozen_stages=1,
9 norm_cfg=dict(type='BN', requires_grad=True),
10 norm_eval=True,
11 style='pytorch',
12 init_cfg=dict(type='Pretrained',
13 checkpoint='torchvision://resnet101')),
14 neck=dict(
15 type='FPN',
16 in_channels=[256, 512, 1024, 2048],
17 out_channels=256,
18 num_outs=5),
19 rpn_head=dict(
20 type='RPNHead',
21 in_channels=256,
22 feat_channels=256,
23 anchor_generator=dict(
24 type='AnchorGenerator',
25 scales=[8],
26 ratios=[0.5, 1.0, 2.0],
27 strides=[4, 8, 16, 32, 64]),
28 bbox_coder=dict(
29 type='DeltaXYWHBBoxCoder',
30 target_means=[0.0, 0.0, 0.0, 0.0],
31 target_stds=[1.0, 1.0, 1.0, 1.0]),
32 loss_cls=dict(
33 type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
34 loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
35 roi_head=dict(
36 type='StandardRoIHead',
37 bbox_roi_extractor=dict(
38 type='SingleRoIExtractor',
39 roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
40 out_channels=256,
41 featmap_strides=[4, 8, 16, 32]),
42 bbox_head=dict(
43 type='Shared2FCBBoxHead',
44 in_channels=256,
45 fc_out_channels=1024,
46 roi_feat_size=7,
47 num_classes=6,
48 bbox_coder=dict(
49 type='DeltaXYWHBBoxCoder',
50 target_means=[0.0, 0.0, 0.0, 0.0],
51 target_stds=[0.1, 0.1, 0.2, 0.2]),
52 reg_class_agnostic=False,
53 loss_cls=dict(
54 type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
55 loss_bbox=dict(type='L1Loss', loss_weight=1.0))),
56 train_cfg=dict(
57 rpn=dict(
58 assigner=dict(
59 type='MaxIoUAssigner',
60 pos_iou_thr=0.7,
61 neg_iou_thr=0.3,
62 min_pos_iou=0.3,
63 match_low_quality=True,
64 ignore_iof_thr=-1),
65 sampler=dict(
66 type='RandomSampler',
67 num=256,
68 pos_fraction=0.5,
69 neg_pos_ub=-1,
70 add_gt_as_proposals=False),
71 allowed_border=-1,
72 pos_weight=-1,
73 debug=False),
74 rpn_proposal=dict(
75 nms_pre=2000,
76 max_per_img=1000,
77 nms=dict(type='nms', iou_threshold=0.7),
78 min_bbox_size=0),
79 rcnn=dict(
80 assigner=dict(
81 type='MaxIoUAssigner',
82 pos_iou_thr=0.5,
83 neg_iou_thr=0.5,
84 min_pos_iou=0.5,
85 match_low_quality=False,
86 ignore_iof_thr=-1),
87 sampler=dict(
88 type='RandomSampler',
89 num=512,
90 pos_fraction=0.25,
91 neg_pos_ub=-1,
92 add_gt_as_proposals=True),
93 pos_weight=-1,
94 debug=False)),
95 test_cfg=dict(
96 rpn=dict(
97 nms_pre=1000,
98 max_per_img=1000,
99 nms=dict(type='nms', iou_threshold=0.7),
100 min_bbox_size=0),
101 rcnn=dict(
102 score_thr=0.05,
103 nms=dict(type='nms', iou_threshold=0.5),
104 max_per_img=100)))
105 dataset_type = 'CocoDataset'
106 data_root = '/home/dyy/annotations/id/coco'
107 img_norm_cfg = dict(
108 mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
109 train_pipeline = [
110 dict(type='LoadImageFromFile'),
111 dict(type='LoadAnnotations', with_bbox=True),
112 dict(
113 type='Resize',
114 img_scale=[(416, 416), (512, 512), (618, 618), (1333, 800)],
115 keep_ratio=True,
116 multiscale_mode='value'),
117 dict(type='RandomFlip', flip_ratio=0.5),
118 dict(
119 type='Normalize',
120 mean=[123.675, 116.28, 103.53],
121 std=[58.395, 57.12, 57.375],
122 to_rgb=True),
123 dict(type='Pad', size_divisor=32),
124 dict(type='DefaultFormatBundle'),
125 dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels'])
126 ]
127 test_pipeline = [
128 dict(type='LoadImageFromFile'),
129 dict(
130 type='MultiScaleFlipAug',
131 img_scale=(1333, 800),
132 flip=False,
133 transforms=[
134 dict(
135 type='Resize',
136 img_scale=[(416, 416), (512, 512), (618, 618), (1333, 800)],
137 keep_ratio=True,
138 multiscale_mode='value'),
139 dict(type='RandomFlip'),
140 dict(
141 type='Normalize',
142 mean=[123.675, 116.28, 103.53],
143 std=[58.395, 57.12, 57.375],
144 to_rgb=True),
145 dict(type='Pad', size_divisor=32),
146 dict(type='ImageToTensor', keys=['img']),
147 dict(type='Collect', keys=['img'])
148 ])
149 ]
150 data = dict(
151 samples_per_gpu=2,
152 workers_per_gpu=2,
153 train=dict(
154 type='CocoDataset',
155 ann_file=
156 '/home/dyy/annotations/id/coco/annotations/instances_train2017.json',
157 img_prefix='/home/dyy/annotations/id/coco/train2017',
158 pipeline=[
159 dict(type='LoadImageFromFile'),
160 dict(type='LoadAnnotations', with_bbox=True),
161 dict(
162 type='Resize',
163 img_scale=[(416, 416), (512, 512), (618, 618), (1333, 800)],
164 keep_ratio=True,
165 multiscale_mode='value'),
166 dict(type='RandomFlip', flip_ratio=0.5),
167 dict(
168 type='Normalize',
169 mean=[123.675, 116.28, 103.53],
170 std=[58.395, 57.12, 57.375],
171 to_rgb=True),
172 dict(type='Pad', size_divisor=32),
173 dict(type='DefaultFormatBundle'),
174 dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels'])
175 ]),
176 val=dict(
177 type='CocoDataset',
178 ann_file=
179 '/home/dyy/annotations/id/coco/annotations/instances_val2017.json',
180 img_prefix='/home/dyy/annotations/id/coco/val2017',
181 pipeline=[
182 dict(type='LoadImageFromFile'),
183 dict(
184 type='MultiScaleFlipAug',
185 img_scale=(1333, 800),
186 flip=False,
187 transforms=[
188 dict(
189 type='Resize',
190 img_scale=[(416, 416), (512, 512), (618, 618),
191 (1333, 800)],
192 keep_ratio=True,
193 multiscale_mode='value'),
194 dict(type='RandomFlip'),
195 dict(
196 type='Normalize',
197 mean=[123.675, 116.28, 103.53],
198 std=[58.395, 57.12, 57.375],
199 to_rgb=True),
200 dict(type='Pad', size_divisor=32),
201 dict(type='ImageToTensor', keys=['img']),
202 dict(type='Collect', keys=['img'])
203 ])
204 ]),
205 test=dict(
206 type='CocoDataset',
207 ann_file=
208 '/home/dyy/annotations/id/coco/annotations/instances_val2017.json',
209 img_prefix='/home/dyy/annotations/id/coco/val2017',
210 pipeline=[
211 dict(type='LoadImageFromFile'),
212 dict(
213 type='MultiScaleFlipAug',
214 img_scale=(1333, 800),
215 flip=False,
216 transforms=[
217 dict(type='Resize', keep_ratio=True),
218 dict(type='RandomFlip'),
219 dict(
220 type='Normalize',
221 mean=[123.675, 116.28, 103.53],
222 std=[58.395, 57.12, 57.375],
223 to_rgb=True),
224 dict(type='Pad', size_divisor=32),
225 dict(type='ImageToTensor', keys=['img']),
226 dict(type='Collect', keys=['img'])
227 ])
228 ]))
229 evaluation = dict(interval=1, metric='bbox')
230 optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
231 optimizer_config = dict(grad_clip=None)
232 lr_config = dict(
233 policy='step',
234 warmup='linear',
235 warmup_iters=500,
236 warmup_ratio=0.001,
237 step=[16, 22])
238 runner = dict(type='EpochBasedRunner', max_epochs=24)
239 checkpoint_config = dict(interval=1)
240 log_config = dict(interval=50, hooks=[dict(type='TextLoggerHook')])
241 custom_hooks = [dict(type='NumClassCheckHook')]
242 dist_params = dict(backend='nccl')
243 log_level = 'INFO'
244 load_from = None
245 resume_from = None
246 workflow = [('train', 1)]
247 opencv_num_threads = 0
248 mp_start_method = 'fork'
249 fp16 = dict(loss_scale=512.0)
250 work_dir = '/home/dyy/project/mmdetection/work_dirs/id/faster_rcnn_r101_fpn_2x_coco_multiscale'
251 auto_resume = False
252 gpu_ids = [0]
1 id_front
2 id_back
3 zhiye_front
4 zhiye_back
5 file
6 phone
...\ No newline at end of file ...\ No newline at end of file
1 # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
2 # COCO128 dataset https://www.kaggle.com/ultralytics/coco128 (first 128 images from COCO train2017) by Ultralytics
3 # Example usage: python train.py --data coco128.yaml
4 # parent
5 # ├── yolov5
6 # └── datasets
7 # └── coco128 ← downloads here
8
9
10 # Download script/URL (optional)
11 # download: bash data/scripts/get_coco.sh
12
13 # train and val data as 1) directory: path/images/, 2) file: path/images.txt, or 3) list: [path1/images/, path2/images/]
14 path: /home/situ/Disk6T/dyy/work/Yolov5/yolov5/datasets/YuanJian2022
15 train: /home/situ/Disk6T/dyy/work/Yolov5/yolov5/input/yuanjian_0112/train_0112.txt # 118287 images
16 val: /home/situ/Disk6T/dyy/work/Yolov5/yolov5/input/yuanjian_0112/valid_0112.txt # 5000 images
17
18
19 # Classes
20 nc: 6 # number of classes
21 names: ['id_front', 'id_back', 'zhiye_front', 'zhiye_back', 'file', 'phone'] # class names
1 MODEL:
2 META_ARCHITECTURE: "RetinaNet"
3 BACKBONE:
4 NAME: "build_retinanet_resnet_fpn_backbone"
5 RESNETS:
6 DEPTH: 101
7 OUT_FEATURES: ["res3", "res4", "res5"]
8 ANCHOR_GENERATOR:
9 SIZES: !!python/object/apply:eval ["[[x, x * 2**(1.0/3), x * 2**(2.0/3) ] for x in [32, 64, 128, 256, 512 ]]"]
10 FPN:
11 IN_FEATURES: ["res3", "res4", "res5"]
12 RETINANET:
13 IOU_THRESHOLDS: [0.4, 0.5]
14 IOU_LABELS: [0, -1, 1]
15 SMOOTH_L1_LOSS_BETA: 0.0
16 WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
17
18 DATASETS:
19 TRAIN: ("id_2017_train",)
20 TEST: ("id_2017_train",)
21 SOLVER:
22 IMS_PER_BATCH: 4
23 BASE_LR: 0.001 # Note that RetinaNet uses a different default learning rate
24 STEPS: (210000, 250000)
25 MAX_ITER: 270000
26 INPUT:
27 MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
28 VERSION: 2
29
30
31
32
1 id_front
2 id_back
3 zhiye_front
4 zhiye_back
5 file
6 phone
...\ No newline at end of file ...\ No newline at end of file
1 import glob
2 import shutil
3 import os
4 from config.config import load_config
5 import mmcv
6 from ensemble_boxes import *
7 import cv2
8
9
10 # 初始化文件夹
11 def init_dir(file_dir):
12 if not os.path.exists(file_dir):
13 os.mkdir(file_dir)
14
15
16 # 标准化输入文件格式
17 def init_input(cfg):
18 image_files = cfg['image_files']
19 if os.path.isdir(image_files):
20 image_files = [os.path.join(image_files, image_file) for image_file in os.listdir(image_files)]
21 elif os.path.exists(image_files):
22 if os.path.splitext(image_files)[-1] in ['.jpg', '.png']:
23 image_files = [image_files]
24 elif os.path.splitext(image_files)[-1] in ['.txt']:
25 image_files = [os.path.abspath(line.strip('\n').split(' ')[0]).replace('\\', '/') for line in open(image_files).readlines()]
26 elif isinstance(image_files, list) and os.path.splitext(image_files[0])[-1] in ['jpg', 'png']:
27 pass
28 elif '*.jpg' in image_files or '*.png' in image_files:
29 image_files = glob.glob(image_files)
30 else:
31 print('error input: ', image_files)
32 return
33 print(image_files)
34 return image_files
35
36
37 # 对mmdet输出结果处理
38 def mmdet_out(out, iou=0.5):
39 out_list = []
40 for i, label_list in enumerate(out):
41 for label in label_list:
42 if float(label[4]) < iou:
43 continue
44 out_list.append([i, label[4], label[0], label[1], label[2], label[3]])
45 return out_list
46
47
48 # box归一化
49 def box_normalize(box, size):
50 box[0] = box[0] / size[0]
51 box[1] = box[1] / size[1]
52 box[2] = box[2] / size[0]
53 box[3] = box[3] / size[1]
54 for i, s in enumerate(box):
55 if s > 1:
56 box[i] = 1
57 elif s < 0:
58 box[i] = 0
59 return box
60
61
62 # box反归一化
63 def box_re_std(box, size):
64 box[0] = box[0] * size[0]
65 box[1] = box[1] * size[1]
66 box[2] = box[2] * size[0]
67 box[3] = box[3] * size[1]
68 return box.tolist()
69
70
71 # box融合
72 def boxes_fusion(cfg, boxes_list):
73 rs = []
74 if 'class_fusion' in cfg['type']:
75 assert len(cfg['class_list']) == len(boxes_list)
76 for i in range(len(boxes_list)):
77 for box in boxes_list[i]:
78 if box[0] in cfg['class_list']:
79 rs.append(box)
80
81 if 'weighted_boxes_fusion' in cfg['type']:
82 if rs:
83 boxes_list.append(rs)
84 cfg['weight_list'].append(1)
85
86 scores_list = [[box[1] for box in boxes] for boxes in boxes_list]
87 labels_list = [[int(box[0]) for box in boxes] for boxes in boxes_list]
88 boxes_list = [[box_normalize(box[2:], cfg['size']) for box in boxes] for boxes in boxes_list]
89 boxes, scores, labels = weighted_boxes_fusion(boxes_list, scores_list, labels_list, weights=cfg['weight_list'],
90 iou_thr=cfg['iou'], skip_box_thr=cfg['skip_box_thr'])
91 for i, box in enumerate(boxes):
92 if scores[i] > cfg['score']:
93 rs.append([labels[i], scores[i]] + box_re_std(box, cfg['size']))
94 return rs
95
96
97 # mm_detect
98 def mmdetect(models, fusion, img):
99 if os.path.exists(models['class_txt']):
100 shutil.copy(models['class_txt'], 'data/mmdet_classes.txt')
101
102 from mmdet.apis import init_detector, inference_detector
103 boxes_list = []
104 if isinstance(img, str):
105 img = mmcv.imread(img)
106 fusion['size'] = img.shape[:-1][::-1]
107 for i, config in enumerate(models['config_files']):
108 model = init_detector(config, os.path.abspath( models['checkpoint_files'][i]), device=models['cuda'])
109 out = inference_detector(model, img)
110 out = mmdet_out(out)
111 boxes_list.append(out)
112
113 if fusion:
114 boxes_list = boxes_fusion(fusion, boxes_list)
115 print(boxes_list)
116 return boxes_list
117
118
119 def run():
120 cfg = load_config() # 加载config文件
121
122 # 初始化
123 init_dir(cfg['out_dir'])
124 if cfg['save_image']:
125 image_dir = os.path.join(cfg['out_dir'], 'image')
126 init_dir(image_dir)
127 if cfg['save_txt']:
128 txt_dir = os.path.join(cfg['out_dir'], 'label')
129 init_dir(txt_dir)
130
131 image_files = init_input(cfg['data'])
132
133 # detect
134 for image_file in image_files:
135 image = cv2.imread(image_file)
136 boxes = mmdetect(cfg['model'], cfg['fusion'], image_file)
137
138 if cfg['save_txt']:
139 out_txt = os.path.join(txt_dir, image_file.split('/')[-1].replace('.jpg', '.txt'))
140 f = open(out_txt, 'w')
141 for box in boxes:
142 f.write(' {} {} {} {} {} {}'.format(box[0], box[1], box[2], box[3], box[4], box[5]))
143 f.write('\n')
144
145 for box in boxes:
146 cv2.rectangle(image, (int(box[2]), int(box[3])), (int(box[4]), int(box[5])), (255, 0, 255))
147 cv2.putText(image, '{} {:.2f}'.format(int(box[0]), box[1]), (int(box[2]), int(box[3]) + 5), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 255), thickness=2)
148
149 if cfg['show']:
150 cv2.imshow('image', image)
151
152 if cfg['save_image']:
153 image_path = os.path.join(image_dir, image_file.split('/')[-1])
154 cv2.imwrite(image_path, image)
155
156
157 if __name__ == '__main__':
158 run()
1 # dataset settings
2 dataset_type = 'CityscapesDataset'
3 data_root = 'data/cityscapes/'
4 img_norm_cfg = dict(
5 mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
6 train_pipeline = [
7 dict(type='LoadImageFromFile'),
8 dict(type='LoadAnnotations', with_bbox=True),
9 dict(
10 type='Resize', img_scale=[(2048, 800), (2048, 1024)], keep_ratio=True),
11 dict(type='RandomFlip', flip_ratio=0.5),
12 dict(type='Normalize', **img_norm_cfg),
13 dict(type='Pad', size_divisor=32),
14 dict(type='DefaultFormatBundle'),
15 dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),
16 ]
17 test_pipeline = [
18 dict(type='LoadImageFromFile'),
19 dict(
20 type='MultiScaleFlipAug',
21 img_scale=(2048, 1024),
22 flip=False,
23 transforms=[
24 dict(type='Resize', keep_ratio=True),
25 dict(type='RandomFlip'),
26 dict(type='Normalize', **img_norm_cfg),
27 dict(type='Pad', size_divisor=32),
28 dict(type='ImageToTensor', keys=['img']),
29 dict(type='Collect', keys=['img']),
30 ])
31 ]
32 data = dict(
33 samples_per_gpu=1,
34 workers_per_gpu=2,
35 train=dict(
36 type='RepeatDataset',
37 times=8,
38 dataset=dict(
39 type=dataset_type,
40 ann_file=data_root +
41 'annotations/instancesonly_filtered_gtFine_train.json',
42 img_prefix=data_root + 'leftImg8bit/train/',
43 pipeline=train_pipeline)),
44 val=dict(
45 type=dataset_type,
46 ann_file=data_root +
47 'annotations/instancesonly_filtered_gtFine_val.json',
48 img_prefix=data_root + 'leftImg8bit/val/',
49 pipeline=test_pipeline),
50 test=dict(
51 type=dataset_type,
52 ann_file=data_root +
53 'annotations/instancesonly_filtered_gtFine_test.json',
54 img_prefix=data_root + 'leftImg8bit/test/',
55 pipeline=test_pipeline))
56 evaluation = dict(interval=1, metric='bbox')
1 # dataset settings
2 dataset_type = 'CityscapesDataset'
3 data_root = 'data/cityscapes/'
4 img_norm_cfg = dict(
5 mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
6 train_pipeline = [
7 dict(type='LoadImageFromFile'),
8 dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
9 dict(
10 type='Resize', img_scale=[(2048, 800), (2048, 1024)], keep_ratio=True),
11 dict(type='RandomFlip', flip_ratio=0.5),
12 dict(type='Normalize', **img_norm_cfg),
13 dict(type='Pad', size_divisor=32),
14 dict(type='DefaultFormatBundle'),
15 dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),
16 ]
17 test_pipeline = [
18 dict(type='LoadImageFromFile'),
19 dict(
20 type='MultiScaleFlipAug',
21 img_scale=(2048, 1024),
22 flip=False,
23 transforms=[
24 dict(type='Resize', keep_ratio=True),
25 dict(type='RandomFlip'),
26 dict(type='Normalize', **img_norm_cfg),
27 dict(type='Pad', size_divisor=32),
28 dict(type='ImageToTensor', keys=['img']),
29 dict(type='Collect', keys=['img']),
30 ])
31 ]
32 data = dict(
33 samples_per_gpu=1,
34 workers_per_gpu=2,
35 train=dict(
36 type='RepeatDataset',
37 times=8,
38 dataset=dict(
39 type=dataset_type,
40 ann_file=data_root +
41 'annotations/instancesonly_filtered_gtFine_train.json',
42 img_prefix=data_root + 'leftImg8bit/train/',
43 pipeline=train_pipeline)),
44 val=dict(
45 type=dataset_type,
46 ann_file=data_root +
47 'annotations/instancesonly_filtered_gtFine_val.json',
48 img_prefix=data_root + 'leftImg8bit/val/',
49 pipeline=test_pipeline),
50 test=dict(
51 type=dataset_type,
52 ann_file=data_root +
53 'annotations/instancesonly_filtered_gtFine_test.json',
54 img_prefix=data_root + 'leftImg8bit/test/',
55 pipeline=test_pipeline))
56 evaluation = dict(metric=['bbox', 'segm'])
1 # dataset settings
2 dataset_type = 'CocoDataset'
3 data_root = 'data/coco/'
4 img_norm_cfg = dict(
5 mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
6 train_pipeline = [
7 dict(type='LoadImageFromFile'),
8 dict(type='LoadAnnotations', with_bbox=True),
9 dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
10 dict(type='RandomFlip', flip_ratio=0.5),
11 dict(type='Normalize', **img_norm_cfg),
12 dict(type='Pad', size_divisor=32),
13 dict(type='DefaultFormatBundle'),
14 dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),
15 ]
16 test_pipeline = [
17 dict(type='LoadImageFromFile'),
18 dict(
19 type='MultiScaleFlipAug',
20 img_scale=(1333, 800),
21 flip=False,
22 transforms=[
23 dict(type='Resize', keep_ratio=True),
24 dict(type='RandomFlip'),
25 dict(type='Normalize', **img_norm_cfg),
26 dict(type='Pad', size_divisor=32),
27 dict(type='ImageToTensor', keys=['img']),
28 dict(type='Collect', keys=['img']),
29 ])
30 ]
31 data = dict(
32 samples_per_gpu=2,
33 workers_per_gpu=2,
34 train=dict(
35 type=dataset_type,
36 ann_file=data_root + 'annotations/instances_train2017.json',
37 img_prefix=data_root + 'train2017/',
38 pipeline=train_pipeline),
39 val=dict(
40 type=dataset_type,
41 ann_file=data_root + 'annotations/instances_val2017.json',
42 img_prefix=data_root + 'val2017/',
43 pipeline=test_pipeline),
44 test=dict(
45 type=dataset_type,
46 ann_file=data_root + 'annotations/instances_val2017.json',
47 img_prefix=data_root + 'val2017/',
48 pipeline=test_pipeline))
49 evaluation = dict(interval=1, metric='bbox')
1 # dataset settings
2 dataset_type = 'CocoDataset'
3 data_root = 'data/coco/'
4 img_norm_cfg = dict(
5 mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
6 train_pipeline = [
7 dict(type='LoadImageFromFile'),
8 dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
9 dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
10 dict(type='RandomFlip', flip_ratio=0.5),
11 dict(type='Normalize', **img_norm_cfg),
12 dict(type='Pad', size_divisor=32),
13 dict(type='DefaultFormatBundle'),
14 dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),
15 ]
16 test_pipeline = [
17 dict(type='LoadImageFromFile'),
18 dict(
19 type='MultiScaleFlipAug',
20 img_scale=(1333, 800),
21 flip=False,
22 transforms=[
23 dict(type='Resize', keep_ratio=True),
24 dict(type='RandomFlip'),
25 dict(type='Normalize', **img_norm_cfg),
26 dict(type='Pad', size_divisor=32),
27 dict(type='ImageToTensor', keys=['img']),
28 dict(type='Collect', keys=['img']),
29 ])
30 ]
31 data = dict(
32 samples_per_gpu=2,
33 workers_per_gpu=2,
34 train=dict(
35 type=dataset_type,
36 ann_file=data_root + 'annotations/instances_train2017.json',
37 img_prefix=data_root + 'train2017/',
38 pipeline=train_pipeline),
39 val=dict(
40 type=dataset_type,
41 ann_file=data_root + 'annotations/instances_val2017.json',
42 img_prefix=data_root + 'val2017/',
43 pipeline=test_pipeline),
44 test=dict(
45 type=dataset_type,
46 ann_file=data_root + 'annotations/instances_val2017.json',
47 img_prefix=data_root + 'val2017/',
48 pipeline=test_pipeline))
49 evaluation = dict(metric=['bbox', 'segm'])
1 # dataset settings
2 dataset_type = 'CocoDataset'
3 data_root = 'data/coco/'
4 img_norm_cfg = dict(
5 mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
6 train_pipeline = [
7 dict(type='LoadImageFromFile'),
8 dict(
9 type='LoadAnnotations', with_bbox=True, with_mask=True, with_seg=True),
10 dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
11 dict(type='RandomFlip', flip_ratio=0.5),
12 dict(type='Normalize', **img_norm_cfg),
13 dict(type='Pad', size_divisor=32),
14 dict(type='SegRescale', scale_factor=1 / 8),
15 dict(type='DefaultFormatBundle'),
16 dict(
17 type='Collect',
18 keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks', 'gt_semantic_seg']),
19 ]
20 test_pipeline = [
21 dict(type='LoadImageFromFile'),
22 dict(
23 type='MultiScaleFlipAug',
24 img_scale=(1333, 800),
25 flip=False,
26 transforms=[
27 dict(type='Resize', keep_ratio=True),
28 dict(type='RandomFlip', flip_ratio=0.5),
29 dict(type='Normalize', **img_norm_cfg),
30 dict(type='Pad', size_divisor=32),
31 dict(type='ImageToTensor', keys=['img']),
32 dict(type='Collect', keys=['img']),
33 ])
34 ]
35 data = dict(
36 samples_per_gpu=2,
37 workers_per_gpu=2,
38 train=dict(
39 type=dataset_type,
40 ann_file=data_root + 'annotations/instances_train2017.json',
41 img_prefix=data_root + 'train2017/',
42 seg_prefix=data_root + 'stuffthingmaps/train2017/',
43 pipeline=train_pipeline),
44 val=dict(
45 type=dataset_type,
46 ann_file=data_root + 'annotations/instances_val2017.json',
47 img_prefix=data_root + 'val2017/',
48 pipeline=test_pipeline),
49 test=dict(
50 type=dataset_type,
51 ann_file=data_root + 'annotations/instances_val2017.json',
52 img_prefix=data_root + 'val2017/',
53 pipeline=test_pipeline))
54 evaluation = dict(metric=['bbox', 'segm'])
1 # dataset settings
2 dataset_type = 'CocoPanopticDataset'
3 data_root = 'data/coco/'
4 img_norm_cfg = dict(
5 mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
6 train_pipeline = [
7 dict(type='LoadImageFromFile'),
8 dict(
9 type='LoadPanopticAnnotations',
10 with_bbox=True,
11 with_mask=True,
12 with_seg=True),
13 dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
14 dict(type='RandomFlip', flip_ratio=0.5),
15 dict(type='Normalize', **img_norm_cfg),
16 dict(type='Pad', size_divisor=32),
17 dict(type='SegRescale', scale_factor=1 / 4),
18 dict(type='DefaultFormatBundle'),
19 dict(
20 type='Collect',
21 keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks', 'gt_semantic_seg']),
22 ]
23 test_pipeline = [
24 dict(type='LoadImageFromFile'),
25 dict(
26 type='MultiScaleFlipAug',
27 img_scale=(1333, 800),
28 flip=False,
29 transforms=[
30 dict(type='Resize', keep_ratio=True),
31 dict(type='RandomFlip'),
32 dict(type='Normalize', **img_norm_cfg),
33 dict(type='Pad', size_divisor=32),
34 dict(type='ImageToTensor', keys=['img']),
35 dict(type='Collect', keys=['img']),
36 ])
37 ]
38 data = dict(
39 samples_per_gpu=2,
40 workers_per_gpu=2,
41 train=dict(
42 type=dataset_type,
43 ann_file=data_root + 'annotations/panoptic_train2017.json',
44 img_prefix=data_root + 'train2017/',
45 seg_prefix=data_root + 'annotations/panoptic_train2017/',
46 pipeline=train_pipeline),
47 val=dict(
48 type=dataset_type,
49 ann_file=data_root + 'annotations/panoptic_val2017.json',
50 img_prefix=data_root + 'val2017/',
51 seg_prefix=data_root + 'annotations/panoptic_val2017/',
52 pipeline=test_pipeline),
53 test=dict(
54 type=dataset_type,
55 ann_file=data_root + 'annotations/panoptic_val2017.json',
56 img_prefix=data_root + 'val2017/',
57 seg_prefix=data_root + 'annotations/panoptic_val2017/',
58 pipeline=test_pipeline))
59 evaluation = dict(interval=1, metric=['PQ'])
1 # dataset settings
2 dataset_type = 'DeepFashionDataset'
3 data_root = 'data/DeepFashion/In-shop/'
4 img_norm_cfg = dict(
5 mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
6 train_pipeline = [
7 dict(type='LoadImageFromFile'),
8 dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
9 dict(type='Resize', img_scale=(750, 1101), keep_ratio=True),
10 dict(type='RandomFlip', flip_ratio=0.5),
11 dict(type='Normalize', **img_norm_cfg),
12 dict(type='Pad', size_divisor=32),
13 dict(type='DefaultFormatBundle'),
14 dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),
15 ]
16 test_pipeline = [
17 dict(type='LoadImageFromFile'),
18 dict(
19 type='MultiScaleFlipAug',
20 img_scale=(750, 1101),
21 flip=False,
22 transforms=[
23 dict(type='Resize', keep_ratio=True),
24 dict(type='RandomFlip'),
25 dict(type='Normalize', **img_norm_cfg),
26 dict(type='Pad', size_divisor=32),
27 dict(type='ImageToTensor', keys=['img']),
28 dict(type='Collect', keys=['img']),
29 ])
30 ]
31 data = dict(
32 imgs_per_gpu=2,
33 workers_per_gpu=1,
34 train=dict(
35 type=dataset_type,
36 ann_file=data_root + 'annotations/DeepFashion_segmentation_query.json',
37 img_prefix=data_root + 'Img/',
38 pipeline=train_pipeline,
39 data_root=data_root),
40 val=dict(
41 type=dataset_type,
42 ann_file=data_root + 'annotations/DeepFashion_segmentation_query.json',
43 img_prefix=data_root + 'Img/',
44 pipeline=test_pipeline,
45 data_root=data_root),
46 test=dict(
47 type=dataset_type,
48 ann_file=data_root +
49 'annotations/DeepFashion_segmentation_gallery.json',
50 img_prefix=data_root + 'Img/',
51 pipeline=test_pipeline,
52 data_root=data_root))
53 evaluation = dict(interval=5, metric=['bbox', 'segm'])
1 # dataset settings
2 _base_ = 'coco_instance.py'
3 dataset_type = 'LVISV05Dataset'
4 data_root = 'data/lvis_v0.5/'
5 data = dict(
6 samples_per_gpu=2,
7 workers_per_gpu=2,
8 train=dict(
9 _delete_=True,
10 type='ClassBalancedDataset',
11 oversample_thr=1e-3,
12 dataset=dict(
13 type=dataset_type,
14 ann_file=data_root + 'annotations/lvis_v0.5_train.json',
15 img_prefix=data_root + 'train2017/')),
16 val=dict(
17 type=dataset_type,
18 ann_file=data_root + 'annotations/lvis_v0.5_val.json',
19 img_prefix=data_root + 'val2017/'),
20 test=dict(
21 type=dataset_type,
22 ann_file=data_root + 'annotations/lvis_v0.5_val.json',
23 img_prefix=data_root + 'val2017/'))
24 evaluation = dict(metric=['bbox', 'segm'])
1 # dataset settings
2 _base_ = 'coco_instance.py'
3 dataset_type = 'LVISV1Dataset'
4 data_root = 'data/lvis_v1/'
5 data = dict(
6 samples_per_gpu=2,
7 workers_per_gpu=2,
8 train=dict(
9 _delete_=True,
10 type='ClassBalancedDataset',
11 oversample_thr=1e-3,
12 dataset=dict(
13 type=dataset_type,
14 ann_file=data_root + 'annotations/lvis_v1_train.json',
15 img_prefix=data_root)),
16 val=dict(
17 type=dataset_type,
18 ann_file=data_root + 'annotations/lvis_v1_val.json',
19 img_prefix=data_root),
20 test=dict(
21 type=dataset_type,
22 ann_file=data_root + 'annotations/lvis_v1_val.json',
23 img_prefix=data_root))
24 evaluation = dict(metric=['bbox', 'segm'])
1 # dataset settings
2 dataset_type = 'OpenImagesDataset'
3 data_root = 'data/OpenImages/'
4 img_norm_cfg = dict(
5 mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
6 train_pipeline = [
7 dict(type='LoadImageFromFile'),
8 dict(type='LoadAnnotations', with_bbox=True, denorm_bbox=True),
9 dict(type='Resize', img_scale=(1024, 800), keep_ratio=True),
10 dict(type='RandomFlip', flip_ratio=0.5),
11 dict(type='Normalize', **img_norm_cfg),
12 dict(type='Pad', size_divisor=32),
13 dict(type='DefaultFormatBundle'),
14 dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),
15 ]
16 test_pipeline = [
17 dict(type='LoadImageFromFile'),
18 dict(
19 type='MultiScaleFlipAug',
20 img_scale=(1024, 800),
21 flip=False,
22 transforms=[
23 dict(type='Resize', keep_ratio=True),
24 dict(type='RandomFlip'),
25 dict(type='Normalize', **img_norm_cfg),
26 dict(type='Pad', size_divisor=32),
27 dict(type='ImageToTensor', keys=['img']),
28 dict(type='Collect', keys=['img']),
29 ],
30 ),
31 ]
32 data = dict(
33 samples_per_gpu=2,
34 workers_per_gpu=0, # workers_per_gpu > 0 may occur out of memory
35 train=dict(
36 type=dataset_type,
37 ann_file=data_root + 'annotations/oidv6-train-annotations-bbox.csv',
38 img_prefix=data_root + 'OpenImages/train/',
39 label_file=data_root + 'annotations/class-descriptions-boxable.csv',
40 hierarchy_file=data_root +
41 'annotations/bbox_labels_600_hierarchy.json',
42 pipeline=train_pipeline),
43 val=dict(
44 type=dataset_type,
45 ann_file=data_root + 'annotations/validation-annotations-bbox.csv',
46 img_prefix=data_root + 'OpenImages/validation/',
47 label_file=data_root + 'annotations/class-descriptions-boxable.csv',
48 hierarchy_file=data_root +
49 'annotations/bbox_labels_600_hierarchy.json',
50 meta_file=data_root + 'annotations/validation-image-metas.pkl',
51 image_level_ann_file=data_root +
52 'annotations/validation-annotations-human-imagelabels-boxable.csv',
53 pipeline=test_pipeline),
54 test=dict(
55 type=dataset_type,
56 ann_file=data_root + 'annotations/validation-annotations-bbox.csv',
57 img_prefix=data_root + 'OpenImages/validation/',
58 label_file=data_root + 'annotations/class-descriptions-boxable.csv',
59 hierarchy_file=data_root +
60 'annotations/bbox_labels_600_hierarchy.json',
61 meta_file=data_root + 'annotations/validation-image-metas.pkl',
62 image_level_ann_file=data_root +
63 'annotations/validation-annotations-human-imagelabels-boxable.csv',
64 pipeline=test_pipeline))
65 evaluation = dict(interval=1, metric='mAP')
1 # dataset settings
2 dataset_type = 'VOCDataset'
3 data_root = 'data/VOCdevkit/'
4 img_norm_cfg = dict(
5 mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
6 train_pipeline = [
7 dict(type='LoadImageFromFile'),
8 dict(type='LoadAnnotations', with_bbox=True),
9 dict(type='Resize', img_scale=(1000, 600), keep_ratio=True),
10 dict(type='RandomFlip', flip_ratio=0.5),
11 dict(type='Normalize', **img_norm_cfg),
12 dict(type='Pad', size_divisor=32),
13 dict(type='DefaultFormatBundle'),
14 dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),
15 ]
16 test_pipeline = [
17 dict(type='LoadImageFromFile'),
18 dict(
19 type='MultiScaleFlipAug',
20 img_scale=(1000, 600),
21 flip=False,
22 transforms=[
23 dict(type='Resize', keep_ratio=True),
24 dict(type='RandomFlip'),
25 dict(type='Normalize', **img_norm_cfg),
26 dict(type='Pad', size_divisor=32),
27 dict(type='ImageToTensor', keys=['img']),
28 dict(type='Collect', keys=['img']),
29 ])
30 ]
31 data = dict(
32 samples_per_gpu=2,
33 workers_per_gpu=2,
34 train=dict(
35 type='RepeatDataset',
36 times=3,
37 dataset=dict(
38 type=dataset_type,
39 ann_file=[
40 data_root + 'VOC2007/ImageSets/Main/trainval.txt',
41 data_root + 'VOC2012/ImageSets/Main/trainval.txt'
42 ],
43 img_prefix=[data_root + 'VOC2007/', data_root + 'VOC2012/'],
44 pipeline=train_pipeline)),
45 val=dict(
46 type=dataset_type,
47 ann_file=data_root + 'VOC2007/ImageSets/Main/test.txt',
48 img_prefix=data_root + 'VOC2007/',
49 pipeline=test_pipeline),
50 test=dict(
51 type=dataset_type,
52 ann_file=data_root + 'VOC2007/ImageSets/Main/test.txt',
53 img_prefix=data_root + 'VOC2007/',
54 pipeline=test_pipeline))
55 evaluation = dict(interval=1, metric='mAP')
1 # dataset settings
2 dataset_type = 'WIDERFaceDataset'
3 data_root = 'data/WIDERFace/'
4 img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], std=[1, 1, 1], to_rgb=True)
5 train_pipeline = [
6 dict(type='LoadImageFromFile', to_float32=True),
7 dict(type='LoadAnnotations', with_bbox=True),
8 dict(
9 type='PhotoMetricDistortion',
10 brightness_delta=32,
11 contrast_range=(0.5, 1.5),
12 saturation_range=(0.5, 1.5),
13 hue_delta=18),
14 dict(
15 type='Expand',
16 mean=img_norm_cfg['mean'],
17 to_rgb=img_norm_cfg['to_rgb'],
18 ratio_range=(1, 4)),
19 dict(
20 type='MinIoURandomCrop',
21 min_ious=(0.1, 0.3, 0.5, 0.7, 0.9),
22 min_crop_size=0.3),
23 dict(type='Resize', img_scale=(300, 300), keep_ratio=False),
24 dict(type='Normalize', **img_norm_cfg),
25 dict(type='RandomFlip', flip_ratio=0.5),
26 dict(type='DefaultFormatBundle'),
27 dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),
28 ]
29 test_pipeline = [
30 dict(type='LoadImageFromFile'),
31 dict(
32 type='MultiScaleFlipAug',
33 img_scale=(300, 300),
34 flip=False,
35 transforms=[
36 dict(type='Resize', keep_ratio=False),
37 dict(type='Normalize', **img_norm_cfg),
38 dict(type='ImageToTensor', keys=['img']),
39 dict(type='Collect', keys=['img']),
40 ])
41 ]
42 data = dict(
43 samples_per_gpu=60,
44 workers_per_gpu=2,
45 train=dict(
46 type='RepeatDataset',
47 times=2,
48 dataset=dict(
49 type=dataset_type,
50 ann_file=data_root + 'train.txt',
51 img_prefix=data_root + 'WIDER_train/',
52 min_size=17,
53 pipeline=train_pipeline)),
54 val=dict(
55 type=dataset_type,
56 ann_file=data_root + 'val.txt',
57 img_prefix=data_root + 'WIDER_val/',
58 pipeline=test_pipeline),
59 test=dict(
60 type=dataset_type,
61 ann_file=data_root + 'val.txt',
62 img_prefix=data_root + 'WIDER_val/',
63 pipeline=test_pipeline))
1 checkpoint_config = dict(interval=1)
2 # yapf:disable
3 log_config = dict(
4 interval=50,
5 hooks=[
6 dict(type='TextLoggerHook'),
7 # dict(type='TensorboardLoggerHook')
8 ])
9 # yapf:enable
10 custom_hooks = [dict(type='NumClassCheckHook')]
11
12 dist_params = dict(backend='nccl')
13 log_level = 'INFO'
14 load_from = None
15 resume_from = None
16 workflow = [('train', 1)]
17
18 # disable opencv multithreading to avoid system being overloaded
19 opencv_num_threads = 0
20 # set multi-process start method as `fork` to speed up the training
21 mp_start_method = 'fork'
1 # model settings
2 model = dict(
3 type='CascadeRCNN',
4 backbone=dict(
5 type='ResNet',
6 depth=50,
7 num_stages=4,
8 out_indices=(0, 1, 2, 3),
9 frozen_stages=1,
10 norm_cfg=dict(type='BN', requires_grad=True),
11 norm_eval=True,
12 style='pytorch',
13 init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
14 neck=dict(
15 type='FPN',
16 in_channels=[256, 512, 1024, 2048],
17 out_channels=256,
18 num_outs=5),
19 rpn_head=dict(
20 type='RPNHead',
21 in_channels=256,
22 feat_channels=256,
23 anchor_generator=dict(
24 type='AnchorGenerator',
25 scales=[8],
26 ratios=[0.5, 1.0, 2.0],
27 strides=[4, 8, 16, 32, 64]),
28 bbox_coder=dict(
29 type='DeltaXYWHBBoxCoder',
30 target_means=[.0, .0, .0, .0],
31 target_stds=[1.0, 1.0, 1.0, 1.0]),
32 loss_cls=dict(
33 type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
34 loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)),
35 roi_head=dict(
36 type='CascadeRoIHead',
37 num_stages=3,
38 stage_loss_weights=[1, 0.5, 0.25],
39 bbox_roi_extractor=dict(
40 type='SingleRoIExtractor',
41 roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
42 out_channels=256,
43 featmap_strides=[4, 8, 16, 32]),
44 bbox_head=[
45 dict(
46 type='Shared2FCBBoxHead',
47 in_channels=256,
48 fc_out_channels=1024,
49 roi_feat_size=7,
50 num_classes=80,
51 bbox_coder=dict(
52 type='DeltaXYWHBBoxCoder',
53 target_means=[0., 0., 0., 0.],
54 target_stds=[0.1, 0.1, 0.2, 0.2]),
55 reg_class_agnostic=True,
56 loss_cls=dict(
57 type='CrossEntropyLoss',
58 use_sigmoid=False,
59 loss_weight=1.0),
60 loss_bbox=dict(type='SmoothL1Loss', beta=1.0,
61 loss_weight=1.0)),
62 dict(
63 type='Shared2FCBBoxHead',
64 in_channels=256,
65 fc_out_channels=1024,
66 roi_feat_size=7,
67 num_classes=80,
68 bbox_coder=dict(
69 type='DeltaXYWHBBoxCoder',
70 target_means=[0., 0., 0., 0.],
71 target_stds=[0.05, 0.05, 0.1, 0.1]),
72 reg_class_agnostic=True,
73 loss_cls=dict(
74 type='CrossEntropyLoss',
75 use_sigmoid=False,
76 loss_weight=1.0),
77 loss_bbox=dict(type='SmoothL1Loss', beta=1.0,
78 loss_weight=1.0)),
79 dict(
80 type='Shared2FCBBoxHead',
81 in_channels=256,
82 fc_out_channels=1024,
83 roi_feat_size=7,
84 num_classes=80,
85 bbox_coder=dict(
86 type='DeltaXYWHBBoxCoder',
87 target_means=[0., 0., 0., 0.],
88 target_stds=[0.033, 0.033, 0.067, 0.067]),
89 reg_class_agnostic=True,
90 loss_cls=dict(
91 type='CrossEntropyLoss',
92 use_sigmoid=False,
93 loss_weight=1.0),
94 loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0))
95 ],
96 mask_roi_extractor=dict(
97 type='SingleRoIExtractor',
98 roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0),
99 out_channels=256,
100 featmap_strides=[4, 8, 16, 32]),
101 mask_head=dict(
102 type='FCNMaskHead',
103 num_convs=4,
104 in_channels=256,
105 conv_out_channels=256,
106 num_classes=80,
107 loss_mask=dict(
108 type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))),
109 # model training and testing settings
110 train_cfg=dict(
111 rpn=dict(
112 assigner=dict(
113 type='MaxIoUAssigner',
114 pos_iou_thr=0.7,
115 neg_iou_thr=0.3,
116 min_pos_iou=0.3,
117 match_low_quality=True,
118 ignore_iof_thr=-1),
119 sampler=dict(
120 type='RandomSampler',
121 num=256,
122 pos_fraction=0.5,
123 neg_pos_ub=-1,
124 add_gt_as_proposals=False),
125 allowed_border=0,
126 pos_weight=-1,
127 debug=False),
128 rpn_proposal=dict(
129 nms_pre=2000,
130 max_per_img=2000,
131 nms=dict(type='nms', iou_threshold=0.7),
132 min_bbox_size=0),
133 rcnn=[
134 dict(
135 assigner=dict(
136 type='MaxIoUAssigner',
137 pos_iou_thr=0.5,
138 neg_iou_thr=0.5,
139 min_pos_iou=0.5,
140 match_low_quality=False,
141 ignore_iof_thr=-1),
142 sampler=dict(
143 type='RandomSampler',
144 num=512,
145 pos_fraction=0.25,
146 neg_pos_ub=-1,
147 add_gt_as_proposals=True),
148 mask_size=28,
149 pos_weight=-1,
150 debug=False),
151 dict(
152 assigner=dict(
153 type='MaxIoUAssigner',
154 pos_iou_thr=0.6,
155 neg_iou_thr=0.6,
156 min_pos_iou=0.6,
157 match_low_quality=False,
158 ignore_iof_thr=-1),
159 sampler=dict(
160 type='RandomSampler',
161 num=512,
162 pos_fraction=0.25,
163 neg_pos_ub=-1,
164 add_gt_as_proposals=True),
165 mask_size=28,
166 pos_weight=-1,
167 debug=False),
168 dict(
169 assigner=dict(
170 type='MaxIoUAssigner',
171 pos_iou_thr=0.7,
172 neg_iou_thr=0.7,
173 min_pos_iou=0.7,
174 match_low_quality=False,
175 ignore_iof_thr=-1),
176 sampler=dict(
177 type='RandomSampler',
178 num=512,
179 pos_fraction=0.25,
180 neg_pos_ub=-1,
181 add_gt_as_proposals=True),
182 mask_size=28,
183 pos_weight=-1,
184 debug=False)
185 ]),
186 test_cfg=dict(
187 rpn=dict(
188 nms_pre=1000,
189 max_per_img=1000,
190 nms=dict(type='nms', iou_threshold=0.7),
191 min_bbox_size=0),
192 rcnn=dict(
193 score_thr=0.05,
194 nms=dict(type='nms', iou_threshold=0.5),
195 max_per_img=100,
196 mask_thr_binary=0.5)))
1 # model settings
2 model = dict(
3 type='CascadeRCNN',
4 backbone=dict(
5 type='ResNet',
6 depth=50,
7 num_stages=4,
8 out_indices=(0, 1, 2, 3),
9 frozen_stages=1,
10 norm_cfg=dict(type='BN', requires_grad=True),
11 norm_eval=True,
12 style='pytorch',
13 init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
14 neck=dict(
15 type='FPN',
16 in_channels=[256, 512, 1024, 2048],
17 out_channels=256,
18 num_outs=5),
19 rpn_head=dict(
20 type='RPNHead',
21 in_channels=256,
22 feat_channels=256,
23 anchor_generator=dict(
24 type='AnchorGenerator',
25 scales=[8],
26 ratios=[0.5, 1.0, 2.0],
27 strides=[4, 8, 16, 32, 64]),
28 bbox_coder=dict(
29 type='DeltaXYWHBBoxCoder',
30 target_means=[.0, .0, .0, .0],
31 target_stds=[1.0, 1.0, 1.0, 1.0]),
32 loss_cls=dict(
33 type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
34 loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)),
35 roi_head=dict(
36 type='CascadeRoIHead',
37 num_stages=3,
38 stage_loss_weights=[1, 0.5, 0.25],
39 bbox_roi_extractor=dict(
40 type='SingleRoIExtractor',
41 roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
42 out_channels=256,
43 featmap_strides=[4, 8, 16, 32]),
44 bbox_head=[
45 dict(
46 type='Shared2FCBBoxHead',
47 in_channels=256,
48 fc_out_channels=1024,
49 roi_feat_size=7,
50 num_classes=80,
51 bbox_coder=dict(
52 type='DeltaXYWHBBoxCoder',
53 target_means=[0., 0., 0., 0.],
54 target_stds=[0.1, 0.1, 0.2, 0.2]),
55 reg_class_agnostic=True,
56 loss_cls=dict(
57 type='CrossEntropyLoss',
58 use_sigmoid=False,
59 loss_weight=1.0),
60 loss_bbox=dict(type='SmoothL1Loss', beta=1.0,
61 loss_weight=1.0)),
62 dict(
63 type='Shared2FCBBoxHead',
64 in_channels=256,
65 fc_out_channels=1024,
66 roi_feat_size=7,
67 num_classes=80,
68 bbox_coder=dict(
69 type='DeltaXYWHBBoxCoder',
70 target_means=[0., 0., 0., 0.],
71 target_stds=[0.05, 0.05, 0.1, 0.1]),
72 reg_class_agnostic=True,
73 loss_cls=dict(
74 type='CrossEntropyLoss',
75 use_sigmoid=False,
76 loss_weight=1.0),
77 loss_bbox=dict(type='SmoothL1Loss', beta=1.0,
78 loss_weight=1.0)),
79 dict(
80 type='Shared2FCBBoxHead',
81 in_channels=256,
82 fc_out_channels=1024,
83 roi_feat_size=7,
84 num_classes=80,
85 bbox_coder=dict(
86 type='DeltaXYWHBBoxCoder',
87 target_means=[0., 0., 0., 0.],
88 target_stds=[0.033, 0.033, 0.067, 0.067]),
89 reg_class_agnostic=True,
90 loss_cls=dict(
91 type='CrossEntropyLoss',
92 use_sigmoid=False,
93 loss_weight=1.0),
94 loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0))
95 ]),
96 # model training and testing settings
97 train_cfg=dict(
98 rpn=dict(
99 assigner=dict(
100 type='MaxIoUAssigner',
101 pos_iou_thr=0.7,
102 neg_iou_thr=0.3,
103 min_pos_iou=0.3,
104 match_low_quality=True,
105 ignore_iof_thr=-1),
106 sampler=dict(
107 type='RandomSampler',
108 num=256,
109 pos_fraction=0.5,
110 neg_pos_ub=-1,
111 add_gt_as_proposals=False),
112 allowed_border=0,
113 pos_weight=-1,
114 debug=False),
115 rpn_proposal=dict(
116 nms_pre=2000,
117 max_per_img=2000,
118 nms=dict(type='nms', iou_threshold=0.7),
119 min_bbox_size=0),
120 rcnn=[
121 dict(
122 assigner=dict(
123 type='MaxIoUAssigner',
124 pos_iou_thr=0.5,
125 neg_iou_thr=0.5,
126 min_pos_iou=0.5,
127 match_low_quality=False,
128 ignore_iof_thr=-1),
129 sampler=dict(
130 type='RandomSampler',
131 num=512,
132 pos_fraction=0.25,
133 neg_pos_ub=-1,
134 add_gt_as_proposals=True),
135 pos_weight=-1,
136 debug=False),
137 dict(
138 assigner=dict(
139 type='MaxIoUAssigner',
140 pos_iou_thr=0.6,
141 neg_iou_thr=0.6,
142 min_pos_iou=0.6,
143 match_low_quality=False,
144 ignore_iof_thr=-1),
145 sampler=dict(
146 type='RandomSampler',
147 num=512,
148 pos_fraction=0.25,
149 neg_pos_ub=-1,
150 add_gt_as_proposals=True),
151 pos_weight=-1,
152 debug=False),
153 dict(
154 assigner=dict(
155 type='MaxIoUAssigner',
156 pos_iou_thr=0.7,
157 neg_iou_thr=0.7,
158 min_pos_iou=0.7,
159 match_low_quality=False,
160 ignore_iof_thr=-1),
161 sampler=dict(
162 type='RandomSampler',
163 num=512,
164 pos_fraction=0.25,
165 neg_pos_ub=-1,
166 add_gt_as_proposals=True),
167 pos_weight=-1,
168 debug=False)
169 ]),
170 test_cfg=dict(
171 rpn=dict(
172 nms_pre=1000,
173 max_per_img=1000,
174 nms=dict(type='nms', iou_threshold=0.7),
175 min_bbox_size=0),
176 rcnn=dict(
177 score_thr=0.05,
178 nms=dict(type='nms', iou_threshold=0.5),
179 max_per_img=100)))
1 # model settings
2 model = dict(
3 type='FastRCNN',
4 backbone=dict(
5 type='ResNet',
6 depth=50,
7 num_stages=4,
8 out_indices=(0, 1, 2, 3),
9 frozen_stages=1,
10 norm_cfg=dict(type='BN', requires_grad=True),
11 norm_eval=True,
12 style='pytorch',
13 init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
14 neck=dict(
15 type='FPN',
16 in_channels=[256, 512, 1024, 2048],
17 out_channels=256,
18 num_outs=5),
19 roi_head=dict(
20 type='StandardRoIHead',
21 bbox_roi_extractor=dict(
22 type='SingleRoIExtractor',
23 roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
24 out_channels=256,
25 featmap_strides=[4, 8, 16, 32]),
26 bbox_head=dict(
27 type='Shared2FCBBoxHead',
28 in_channels=256,
29 fc_out_channels=1024,
30 roi_feat_size=7,
31 num_classes=80,
32 bbox_coder=dict(
33 type='DeltaXYWHBBoxCoder',
34 target_means=[0., 0., 0., 0.],
35 target_stds=[0.1, 0.1, 0.2, 0.2]),
36 reg_class_agnostic=False,
37 loss_cls=dict(
38 type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
39 loss_bbox=dict(type='L1Loss', loss_weight=1.0))),
40 # model training and testing settings
41 train_cfg=dict(
42 rcnn=dict(
43 assigner=dict(
44 type='MaxIoUAssigner',
45 pos_iou_thr=0.5,
46 neg_iou_thr=0.5,
47 min_pos_iou=0.5,
48 match_low_quality=False,
49 ignore_iof_thr=-1),
50 sampler=dict(
51 type='RandomSampler',
52 num=512,
53 pos_fraction=0.25,
54 neg_pos_ub=-1,
55 add_gt_as_proposals=True),
56 pos_weight=-1,
57 debug=False)),
58 test_cfg=dict(
59 rcnn=dict(
60 score_thr=0.05,
61 nms=dict(type='nms', iou_threshold=0.5),
62 max_per_img=100)))
1 # model settings
2 norm_cfg = dict(type='BN', requires_grad=False)
3 model = dict(
4 type='FasterRCNN',
5 backbone=dict(
6 type='ResNet',
7 depth=50,
8 num_stages=3,
9 strides=(1, 2, 2),
10 dilations=(1, 1, 1),
11 out_indices=(2, ),
12 frozen_stages=1,
13 norm_cfg=norm_cfg,
14 norm_eval=True,
15 style='caffe',
16 init_cfg=dict(
17 type='Pretrained',
18 checkpoint='open-mmlab://detectron2/resnet50_caffe')),
19 rpn_head=dict(
20 type='RPNHead',
21 in_channels=1024,
22 feat_channels=1024,
23 anchor_generator=dict(
24 type='AnchorGenerator',
25 scales=[2, 4, 8, 16, 32],
26 ratios=[0.5, 1.0, 2.0],
27 strides=[16]),
28 bbox_coder=dict(
29 type='DeltaXYWHBBoxCoder',
30 target_means=[.0, .0, .0, .0],
31 target_stds=[1.0, 1.0, 1.0, 1.0]),
32 loss_cls=dict(
33 type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
34 loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
35 roi_head=dict(
36 type='StandardRoIHead',
37 shared_head=dict(
38 type='ResLayer',
39 depth=50,
40 stage=3,
41 stride=2,
42 dilation=1,
43 style='caffe',
44 norm_cfg=norm_cfg,
45 norm_eval=True),
46 bbox_roi_extractor=dict(
47 type='SingleRoIExtractor',
48 roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0),
49 out_channels=1024,
50 featmap_strides=[16]),
51 bbox_head=dict(
52 type='BBoxHead',
53 with_avg_pool=True,
54 roi_feat_size=7,
55 in_channels=2048,
56 num_classes=80,
57 bbox_coder=dict(
58 type='DeltaXYWHBBoxCoder',
59 target_means=[0., 0., 0., 0.],
60 target_stds=[0.1, 0.1, 0.2, 0.2]),
61 reg_class_agnostic=False,
62 loss_cls=dict(
63 type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
64 loss_bbox=dict(type='L1Loss', loss_weight=1.0))),
65 # model training and testing settings
66 train_cfg=dict(
67 rpn=dict(
68 assigner=dict(
69 type='MaxIoUAssigner',
70 pos_iou_thr=0.7,
71 neg_iou_thr=0.3,
72 min_pos_iou=0.3,
73 match_low_quality=True,
74 ignore_iof_thr=-1),
75 sampler=dict(
76 type='RandomSampler',
77 num=256,
78 pos_fraction=0.5,
79 neg_pos_ub=-1,
80 add_gt_as_proposals=False),
81 allowed_border=0,
82 pos_weight=-1,
83 debug=False),
84 rpn_proposal=dict(
85 nms_pre=12000,
86 max_per_img=2000,
87 nms=dict(type='nms', iou_threshold=0.7),
88 min_bbox_size=0),
89 rcnn=dict(
90 assigner=dict(
91 type='MaxIoUAssigner',
92 pos_iou_thr=0.5,
93 neg_iou_thr=0.5,
94 min_pos_iou=0.5,
95 match_low_quality=False,
96 ignore_iof_thr=-1),
97 sampler=dict(
98 type='RandomSampler',
99 num=512,
100 pos_fraction=0.25,
101 neg_pos_ub=-1,
102 add_gt_as_proposals=True),
103 pos_weight=-1,
104 debug=False)),
105 test_cfg=dict(
106 rpn=dict(
107 nms_pre=6000,
108 max_per_img=1000,
109 nms=dict(type='nms', iou_threshold=0.7),
110 min_bbox_size=0),
111 rcnn=dict(
112 score_thr=0.05,
113 nms=dict(type='nms', iou_threshold=0.5),
114 max_per_img=100)))
1 # model settings
2 norm_cfg = dict(type='BN', requires_grad=False)
3 model = dict(
4 type='FasterRCNN',
5 backbone=dict(
6 type='ResNet',
7 depth=50,
8 num_stages=4,
9 strides=(1, 2, 2, 1),
10 dilations=(1, 1, 1, 2),
11 out_indices=(3, ),
12 frozen_stages=1,
13 norm_cfg=norm_cfg,
14 norm_eval=True,
15 style='caffe',
16 init_cfg=dict(
17 type='Pretrained',
18 checkpoint='open-mmlab://detectron2/resnet50_caffe')),
19 rpn_head=dict(
20 type='RPNHead',
21 in_channels=2048,
22 feat_channels=2048,
23 anchor_generator=dict(
24 type='AnchorGenerator',
25 scales=[2, 4, 8, 16, 32],
26 ratios=[0.5, 1.0, 2.0],
27 strides=[16]),
28 bbox_coder=dict(
29 type='DeltaXYWHBBoxCoder',
30 target_means=[.0, .0, .0, .0],
31 target_stds=[1.0, 1.0, 1.0, 1.0]),
32 loss_cls=dict(
33 type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
34 loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
35 roi_head=dict(
36 type='StandardRoIHead',
37 bbox_roi_extractor=dict(
38 type='SingleRoIExtractor',
39 roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
40 out_channels=2048,
41 featmap_strides=[16]),
42 bbox_head=dict(
43 type='Shared2FCBBoxHead',
44 in_channels=2048,
45 fc_out_channels=1024,
46 roi_feat_size=7,
47 num_classes=80,
48 bbox_coder=dict(
49 type='DeltaXYWHBBoxCoder',
50 target_means=[0., 0., 0., 0.],
51 target_stds=[0.1, 0.1, 0.2, 0.2]),
52 reg_class_agnostic=False,
53 loss_cls=dict(
54 type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
55 loss_bbox=dict(type='L1Loss', loss_weight=1.0))),
56 # model training and testing settings
57 train_cfg=dict(
58 rpn=dict(
59 assigner=dict(
60 type='MaxIoUAssigner',
61 pos_iou_thr=0.7,
62 neg_iou_thr=0.3,
63 min_pos_iou=0.3,
64 match_low_quality=True,
65 ignore_iof_thr=-1),
66 sampler=dict(
67 type='RandomSampler',
68 num=256,
69 pos_fraction=0.5,
70 neg_pos_ub=-1,
71 add_gt_as_proposals=False),
72 allowed_border=0,
73 pos_weight=-1,
74 debug=False),
75 rpn_proposal=dict(
76 nms_pre=12000,
77 max_per_img=2000,
78 nms=dict(type='nms', iou_threshold=0.7),
79 min_bbox_size=0),
80 rcnn=dict(
81 assigner=dict(
82 type='MaxIoUAssigner',
83 pos_iou_thr=0.5,
84 neg_iou_thr=0.5,
85 min_pos_iou=0.5,
86 match_low_quality=False,
87 ignore_iof_thr=-1),
88 sampler=dict(
89 type='RandomSampler',
90 num=512,
91 pos_fraction=0.25,
92 neg_pos_ub=-1,
93 add_gt_as_proposals=True),
94 pos_weight=-1,
95 debug=False)),
96 test_cfg=dict(
97 rpn=dict(
98 nms=dict(type='nms', iou_threshold=0.7),
99 nms_pre=6000,
100 max_per_img=1000,
101 min_bbox_size=0),
102 rcnn=dict(
103 score_thr=0.05,
104 nms=dict(type='nms', iou_threshold=0.5),
105 max_per_img=100)))
1 # model settings
2 model = dict(
3 type='FasterRCNN',
4 backbone=dict(
5 type='ResNet',
6 depth=50,
7 num_stages=4,
8 out_indices=(0, 1, 2, 3),
9 frozen_stages=1,
10 norm_cfg=dict(type='BN', requires_grad=True),
11 norm_eval=True,
12 style='pytorch',
13 init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
14 neck=dict(
15 type='FPN',
16 in_channels=[256, 512, 1024, 2048],
17 out_channels=256,
18 num_outs=5),
19 rpn_head=dict(
20 type='RPNHead',
21 in_channels=256,
22 feat_channels=256,
23 anchor_generator=dict(
24 type='AnchorGenerator',
25 scales=[8],
26 ratios=[0.5, 1.0, 2.0],
27 strides=[4, 8, 16, 32, 64]),
28 bbox_coder=dict(
29 type='DeltaXYWHBBoxCoder',
30 target_means=[.0, .0, .0, .0],
31 target_stds=[1.0, 1.0, 1.0, 1.0]),
32 loss_cls=dict(
33 type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
34 loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
35 roi_head=dict(
36 type='StandardRoIHead',
37 bbox_roi_extractor=dict(
38 type='SingleRoIExtractor',
39 roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
40 out_channels=256,
41 featmap_strides=[4, 8, 16, 32]),
42 bbox_head=dict(
43 type='Shared2FCBBoxHead',
44 in_channels=256,
45 fc_out_channels=1024,
46 roi_feat_size=7,
47 num_classes=80,
48 bbox_coder=dict(
49 type='DeltaXYWHBBoxCoder',
50 target_means=[0., 0., 0., 0.],
51 target_stds=[0.1, 0.1, 0.2, 0.2]),
52 reg_class_agnostic=False,
53 loss_cls=dict(
54 type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
55 loss_bbox=dict(type='L1Loss', loss_weight=1.0))),
56 # model training and testing settings
57 train_cfg=dict(
58 rpn=dict(
59 assigner=dict(
60 type='MaxIoUAssigner',
61 pos_iou_thr=0.7,
62 neg_iou_thr=0.3,
63 min_pos_iou=0.3,
64 match_low_quality=True,
65 ignore_iof_thr=-1),
66 sampler=dict(
67 type='RandomSampler',
68 num=256,
69 pos_fraction=0.5,
70 neg_pos_ub=-1,
71 add_gt_as_proposals=False),
72 allowed_border=-1,
73 pos_weight=-1,
74 debug=False),
75 rpn_proposal=dict(
76 nms_pre=2000,
77 max_per_img=1000,
78 nms=dict(type='nms', iou_threshold=0.7),
79 min_bbox_size=0),
80 rcnn=dict(
81 assigner=dict(
82 type='MaxIoUAssigner',
83 pos_iou_thr=0.5,
84 neg_iou_thr=0.5,
85 min_pos_iou=0.5,
86 match_low_quality=False,
87 ignore_iof_thr=-1),
88 sampler=dict(
89 type='RandomSampler',
90 num=512,
91 pos_fraction=0.25,
92 neg_pos_ub=-1,
93 add_gt_as_proposals=True),
94 pos_weight=-1,
95 debug=False)),
96 test_cfg=dict(
97 rpn=dict(
98 nms_pre=1000,
99 max_per_img=1000,
100 nms=dict(type='nms', iou_threshold=0.7),
101 min_bbox_size=0),
102 rcnn=dict(
103 score_thr=0.05,
104 nms=dict(type='nms', iou_threshold=0.5),
105 max_per_img=100)
106 # soft-nms is also supported for rcnn testing
107 # e.g., nms=dict(type='soft_nms', iou_threshold=0.5, min_score=0.05)
108 ))
1 # model settings
2 norm_cfg = dict(type='BN', requires_grad=False)
3 model = dict(
4 type='MaskRCNN',
5 backbone=dict(
6 type='ResNet',
7 depth=50,
8 num_stages=3,
9 strides=(1, 2, 2),
10 dilations=(1, 1, 1),
11 out_indices=(2, ),
12 frozen_stages=1,
13 norm_cfg=norm_cfg,
14 norm_eval=True,
15 style='caffe',
16 init_cfg=dict(
17 type='Pretrained',
18 checkpoint='open-mmlab://detectron2/resnet50_caffe')),
19 rpn_head=dict(
20 type='RPNHead',
21 in_channels=1024,
22 feat_channels=1024,
23 anchor_generator=dict(
24 type='AnchorGenerator',
25 scales=[2, 4, 8, 16, 32],
26 ratios=[0.5, 1.0, 2.0],
27 strides=[16]),
28 bbox_coder=dict(
29 type='DeltaXYWHBBoxCoder',
30 target_means=[.0, .0, .0, .0],
31 target_stds=[1.0, 1.0, 1.0, 1.0]),
32 loss_cls=dict(
33 type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
34 loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
35 roi_head=dict(
36 type='StandardRoIHead',
37 shared_head=dict(
38 type='ResLayer',
39 depth=50,
40 stage=3,
41 stride=2,
42 dilation=1,
43 style='caffe',
44 norm_cfg=norm_cfg,
45 norm_eval=True),
46 bbox_roi_extractor=dict(
47 type='SingleRoIExtractor',
48 roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0),
49 out_channels=1024,
50 featmap_strides=[16]),
51 bbox_head=dict(
52 type='BBoxHead',
53 with_avg_pool=True,
54 roi_feat_size=7,
55 in_channels=2048,
56 num_classes=80,
57 bbox_coder=dict(
58 type='DeltaXYWHBBoxCoder',
59 target_means=[0., 0., 0., 0.],
60 target_stds=[0.1, 0.1, 0.2, 0.2]),
61 reg_class_agnostic=False,
62 loss_cls=dict(
63 type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
64 loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
65 mask_roi_extractor=None,
66 mask_head=dict(
67 type='FCNMaskHead',
68 num_convs=0,
69 in_channels=2048,
70 conv_out_channels=256,
71 num_classes=80,
72 loss_mask=dict(
73 type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))),
74 # model training and testing settings
75 train_cfg=dict(
76 rpn=dict(
77 assigner=dict(
78 type='MaxIoUAssigner',
79 pos_iou_thr=0.7,
80 neg_iou_thr=0.3,
81 min_pos_iou=0.3,
82 match_low_quality=True,
83 ignore_iof_thr=-1),
84 sampler=dict(
85 type='RandomSampler',
86 num=256,
87 pos_fraction=0.5,
88 neg_pos_ub=-1,
89 add_gt_as_proposals=False),
90 allowed_border=0,
91 pos_weight=-1,
92 debug=False),
93 rpn_proposal=dict(
94 nms_pre=12000,
95 max_per_img=2000,
96 nms=dict(type='nms', iou_threshold=0.7),
97 min_bbox_size=0),
98 rcnn=dict(
99 assigner=dict(
100 type='MaxIoUAssigner',
101 pos_iou_thr=0.5,
102 neg_iou_thr=0.5,
103 min_pos_iou=0.5,
104 match_low_quality=False,
105 ignore_iof_thr=-1),
106 sampler=dict(
107 type='RandomSampler',
108 num=512,
109 pos_fraction=0.25,
110 neg_pos_ub=-1,
111 add_gt_as_proposals=True),
112 mask_size=14,
113 pos_weight=-1,
114 debug=False)),
115 test_cfg=dict(
116 rpn=dict(
117 nms_pre=6000,
118 nms=dict(type='nms', iou_threshold=0.7),
119 max_per_img=1000,
120 min_bbox_size=0),
121 rcnn=dict(
122 score_thr=0.05,
123 nms=dict(type='nms', iou_threshold=0.5),
124 max_per_img=100,
125 mask_thr_binary=0.5)))
1 # model settings
2 model = dict(
3 type='MaskRCNN',
4 backbone=dict(
5 type='ResNet',
6 depth=50,
7 num_stages=4,
8 out_indices=(0, 1, 2, 3),
9 frozen_stages=1,
10 norm_cfg=dict(type='BN', requires_grad=True),
11 norm_eval=True,
12 style='pytorch',
13 init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
14 neck=dict(
15 type='FPN',
16 in_channels=[256, 512, 1024, 2048],
17 out_channels=256,
18 num_outs=5),
19 rpn_head=dict(
20 type='RPNHead',
21 in_channels=256,
22 feat_channels=256,
23 anchor_generator=dict(
24 type='AnchorGenerator',
25 scales=[8],
26 ratios=[0.5, 1.0, 2.0],
27 strides=[4, 8, 16, 32, 64]),
28 bbox_coder=dict(
29 type='DeltaXYWHBBoxCoder',
30 target_means=[.0, .0, .0, .0],
31 target_stds=[1.0, 1.0, 1.0, 1.0]),
32 loss_cls=dict(
33 type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
34 loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
35 roi_head=dict(
36 type='StandardRoIHead',
37 bbox_roi_extractor=dict(
38 type='SingleRoIExtractor',
39 roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
40 out_channels=256,
41 featmap_strides=[4, 8, 16, 32]),
42 bbox_head=dict(
43 type='Shared2FCBBoxHead',
44 in_channels=256,
45 fc_out_channels=1024,
46 roi_feat_size=7,
47 num_classes=80,
48 bbox_coder=dict(
49 type='DeltaXYWHBBoxCoder',
50 target_means=[0., 0., 0., 0.],
51 target_stds=[0.1, 0.1, 0.2, 0.2]),
52 reg_class_agnostic=False,
53 loss_cls=dict(
54 type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
55 loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
56 mask_roi_extractor=dict(
57 type='SingleRoIExtractor',
58 roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0),
59 out_channels=256,
60 featmap_strides=[4, 8, 16, 32]),
61 mask_head=dict(
62 type='FCNMaskHead',
63 num_convs=4,
64 in_channels=256,
65 conv_out_channels=256,
66 num_classes=80,
67 loss_mask=dict(
68 type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))),
69 # model training and testing settings
70 train_cfg=dict(
71 rpn=dict(
72 assigner=dict(
73 type='MaxIoUAssigner',
74 pos_iou_thr=0.7,
75 neg_iou_thr=0.3,
76 min_pos_iou=0.3,
77 match_low_quality=True,
78 ignore_iof_thr=-1),
79 sampler=dict(
80 type='RandomSampler',
81 num=256,
82 pos_fraction=0.5,
83 neg_pos_ub=-1,
84 add_gt_as_proposals=False),
85 allowed_border=-1,
86 pos_weight=-1,
87 debug=False),
88 rpn_proposal=dict(
89 nms_pre=2000,
90 max_per_img=1000,
91 nms=dict(type='nms', iou_threshold=0.7),
92 min_bbox_size=0),
93 rcnn=dict(
94 assigner=dict(
95 type='MaxIoUAssigner',
96 pos_iou_thr=0.5,
97 neg_iou_thr=0.5,
98 min_pos_iou=0.5,
99 match_low_quality=True,
100 ignore_iof_thr=-1),
101 sampler=dict(
102 type='RandomSampler',
103 num=512,
104 pos_fraction=0.25,
105 neg_pos_ub=-1,
106 add_gt_as_proposals=True),
107 mask_size=28,
108 pos_weight=-1,
109 debug=False)),
110 test_cfg=dict(
111 rpn=dict(
112 nms_pre=1000,
113 max_per_img=1000,
114 nms=dict(type='nms', iou_threshold=0.7),
115 min_bbox_size=0),
116 rcnn=dict(
117 score_thr=0.05,
118 nms=dict(type='nms', iou_threshold=0.5),
119 max_per_img=100,
120 mask_thr_binary=0.5)))
1 # model settings
2 model = dict(
3 type='RetinaNet',
4 backbone=dict(
5 type='ResNet',
6 depth=50,
7 num_stages=4,
8 out_indices=(0, 1, 2, 3),
9 frozen_stages=1,
10 norm_cfg=dict(type='BN', requires_grad=True),
11 norm_eval=True,
12 style='pytorch',
13 init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
14 neck=dict(
15 type='FPN',
16 in_channels=[256, 512, 1024, 2048],
17 out_channels=256,
18 start_level=1,
19 add_extra_convs='on_input',
20 num_outs=5),
21 bbox_head=dict(
22 type='RetinaHead',
23 num_classes=80,
24 in_channels=256,
25 stacked_convs=4,
26 feat_channels=256,
27 anchor_generator=dict(
28 type='AnchorGenerator',
29 octave_base_scale=4,
30 scales_per_octave=3,
31 ratios=[0.5, 1.0, 2.0],
32 strides=[8, 16, 32, 64, 128]),
33 bbox_coder=dict(
34 type='DeltaXYWHBBoxCoder',
35 target_means=[.0, .0, .0, .0],
36 target_stds=[1.0, 1.0, 1.0, 1.0]),
37 loss_cls=dict(
38 type='FocalLoss',
39 use_sigmoid=True,
40 gamma=2.0,
41 alpha=0.25,
42 loss_weight=1.0),
43 loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
44 # model training and testing settings
45 train_cfg=dict(
46 assigner=dict(
47 type='MaxIoUAssigner',
48 pos_iou_thr=0.5,
49 neg_iou_thr=0.4,
50 min_pos_iou=0,
51 ignore_iof_thr=-1),
52 allowed_border=-1,
53 pos_weight=-1,
54 debug=False),
55 test_cfg=dict(
56 nms_pre=1000,
57 min_bbox_size=0,
58 score_thr=0.05,
59 nms=dict(type='nms', iou_threshold=0.5),
60 max_per_img=100))
1 # model settings
2 model = dict(
3 type='RPN',
4 backbone=dict(
5 type='ResNet',
6 depth=50,
7 num_stages=3,
8 strides=(1, 2, 2),
9 dilations=(1, 1, 1),
10 out_indices=(2, ),
11 frozen_stages=1,
12 norm_cfg=dict(type='BN', requires_grad=False),
13 norm_eval=True,
14 style='caffe',
15 init_cfg=dict(
16 type='Pretrained',
17 checkpoint='open-mmlab://detectron2/resnet50_caffe')),
18 neck=None,
19 rpn_head=dict(
20 type='RPNHead',
21 in_channels=1024,
22 feat_channels=1024,
23 anchor_generator=dict(
24 type='AnchorGenerator',
25 scales=[2, 4, 8, 16, 32],
26 ratios=[0.5, 1.0, 2.0],
27 strides=[16]),
28 bbox_coder=dict(
29 type='DeltaXYWHBBoxCoder',
30 target_means=[.0, .0, .0, .0],
31 target_stds=[1.0, 1.0, 1.0, 1.0]),
32 loss_cls=dict(
33 type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
34 loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
35 # model training and testing settings
36 train_cfg=dict(
37 rpn=dict(
38 assigner=dict(
39 type='MaxIoUAssigner',
40 pos_iou_thr=0.7,
41 neg_iou_thr=0.3,
42 min_pos_iou=0.3,
43 ignore_iof_thr=-1),
44 sampler=dict(
45 type='RandomSampler',
46 num=256,
47 pos_fraction=0.5,
48 neg_pos_ub=-1,
49 add_gt_as_proposals=False),
50 allowed_border=0,
51 pos_weight=-1,
52 debug=False)),
53 test_cfg=dict(
54 rpn=dict(
55 nms_pre=12000,
56 max_per_img=2000,
57 nms=dict(type='nms', iou_threshold=0.7),
58 min_bbox_size=0)))
1 # model settings
2 model = dict(
3 type='RPN',
4 backbone=dict(
5 type='ResNet',
6 depth=50,
7 num_stages=4,
8 out_indices=(0, 1, 2, 3),
9 frozen_stages=1,
10 norm_cfg=dict(type='BN', requires_grad=True),
11 norm_eval=True,
12 style='pytorch',
13 init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
14 neck=dict(
15 type='FPN',
16 in_channels=[256, 512, 1024, 2048],
17 out_channels=256,
18 num_outs=5),
19 rpn_head=dict(
20 type='RPNHead',
21 in_channels=256,
22 feat_channels=256,
23 anchor_generator=dict(
24 type='AnchorGenerator',
25 scales=[8],
26 ratios=[0.5, 1.0, 2.0],
27 strides=[4, 8, 16, 32, 64]),
28 bbox_coder=dict(
29 type='DeltaXYWHBBoxCoder',
30 target_means=[.0, .0, .0, .0],
31 target_stds=[1.0, 1.0, 1.0, 1.0]),
32 loss_cls=dict(
33 type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
34 loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
35 # model training and testing settings
36 train_cfg=dict(
37 rpn=dict(
38 assigner=dict(
39 type='MaxIoUAssigner',
40 pos_iou_thr=0.7,
41 neg_iou_thr=0.3,
42 min_pos_iou=0.3,
43 ignore_iof_thr=-1),
44 sampler=dict(
45 type='RandomSampler',
46 num=256,
47 pos_fraction=0.5,
48 neg_pos_ub=-1,
49 add_gt_as_proposals=False),
50 allowed_border=0,
51 pos_weight=-1,
52 debug=False)),
53 test_cfg=dict(
54 rpn=dict(
55 nms_pre=2000,
56 max_per_img=1000,
57 nms=dict(type='nms', iou_threshold=0.7),
58 min_bbox_size=0)))
1 # model settings
2 input_size = 300
3 model = dict(
4 type='SingleStageDetector',
5 backbone=dict(
6 type='SSDVGG',
7 depth=16,
8 with_last_pool=False,
9 ceil_mode=True,
10 out_indices=(3, 4),
11 out_feature_indices=(22, 34),
12 init_cfg=dict(
13 type='Pretrained', checkpoint='open-mmlab://vgg16_caffe')),
14 neck=dict(
15 type='SSDNeck',
16 in_channels=(512, 1024),
17 out_channels=(512, 1024, 512, 256, 256, 256),
18 level_strides=(2, 2, 1, 1),
19 level_paddings=(1, 1, 0, 0),
20 l2_norm_scale=20),
21 bbox_head=dict(
22 type='SSDHead',
23 in_channels=(512, 1024, 512, 256, 256, 256),
24 num_classes=80,
25 anchor_generator=dict(
26 type='SSDAnchorGenerator',
27 scale_major=False,
28 input_size=input_size,
29 basesize_ratio_range=(0.15, 0.9),
30 strides=[8, 16, 32, 64, 100, 300],
31 ratios=[[2], [2, 3], [2, 3], [2, 3], [2], [2]]),
32 bbox_coder=dict(
33 type='DeltaXYWHBBoxCoder',
34 target_means=[.0, .0, .0, .0],
35 target_stds=[0.1, 0.1, 0.2, 0.2])),
36 # model training and testing settings
37 train_cfg=dict(
38 assigner=dict(
39 type='MaxIoUAssigner',
40 pos_iou_thr=0.5,
41 neg_iou_thr=0.5,
42 min_pos_iou=0.,
43 ignore_iof_thr=-1,
44 gt_max_assign_all=False),
45 smoothl1_beta=1.,
46 allowed_border=-1,
47 pos_weight=-1,
48 neg_pos_ratio=3,
49 debug=False),
50 test_cfg=dict(
51 nms_pre=1000,
52 nms=dict(type='nms', iou_threshold=0.45),
53 min_bbox_size=0,
54 score_thr=0.02,
55 max_per_img=200))
56 cudnn_benchmark = True
1 # optimizer
2 optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
3 optimizer_config = dict(grad_clip=None)
4 # learning policy
5 lr_config = dict(
6 policy='step',
7 warmup='linear',
8 warmup_iters=500,
9 warmup_ratio=0.001,
10 step=[8, 11])
11 runner = dict(type='EpochBasedRunner', max_epochs=12)
1 # optimizer
2 optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
3 optimizer_config = dict(grad_clip=None)
4 # learning policy
5 lr_config = dict(
6 policy='step',
7 warmup='linear',
8 warmup_iters=500,
9 warmup_ratio=0.001,
10 step=[16, 19])
11 runner = dict(type='EpochBasedRunner', max_epochs=20)
1 # optimizer
2 optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
3 optimizer_config = dict(grad_clip=None)
4 # learning policy
5 lr_config = dict(
6 policy='step',
7 warmup='linear',
8 warmup_iters=500,
9 warmup_ratio=0.001,
10 step=[16, 22])
11 runner = dict(type='EpochBasedRunner', max_epochs=24)
1 # Albu Example
2
3 > [Albumentations: fast and flexible image augmentations](https://arxiv.org/abs/1809.06839)
4
5 <!-- [OTHERS] -->
6
7 ## Abstract
8
9 Data augmentation is a commonly used technique for increasing both the size and the diversity of labeled training sets by leveraging input transformations that preserve output labels. In computer vision domain, image augmentations have become a common implicit regularization technique to combat overfitting in deep convolutional neural networks and are ubiquitously used to improve performance. While most deep learning frameworks implement basic image transformations, the list is typically limited to some variations and combinations of flipping, rotating, scaling, and cropping. Moreover, the image processing speed varies in existing tools for image augmentation. We present Albumentations, a fast and flexible library for image augmentations with many various image transform operations available, that is also an easy-to-use wrapper around other augmentation libraries. We provide examples of image augmentations for different computer vision tasks and show that Albumentations is faster than other commonly used image augmentation tools on the most of commonly used image transformations.
10
11 <div align=center>
12 <img src="https://user-images.githubusercontent.com/40661020/143870703-74f3ea3f-ae23-4035-9856-746bc3f88464.png" height="400" />
13 </div>
14
15 ## Results and Models
16
17 | Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | mask AP | Config | Download |
18 |:---------:|:-------:|:-------:|:--------:|:--------------:|:------:|:-------:|:------:|:--------:|
19 | R-50 | pytorch | 1x | 4.4 | 16.6 | 38.0 | 34.5 |[config](https://github.com/open-mmlab/mmdetection/tree/master/configs/albu_example/mask_rcnn_r50_fpn_albu_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/albu_example/mask_rcnn_r50_fpn_albu_1x_coco/mask_rcnn_r50_fpn_albu_1x_coco_20200208-ab203bcd.pth) &#124; [log](https://download.openmmlab.com/mmdetection/v2.0/albu_example/mask_rcnn_r50_fpn_albu_1x_coco/mask_rcnn_r50_fpn_albu_1x_coco_20200208_225520.log.json) |
20
21 ## Citation
22
23 ```latex
24 @article{2018arXiv180906839B,
25 author = {A. Buslaev, A. Parinov, E. Khvedchenya, V.~I. Iglovikov and A.~A. Kalinin},
26 title = "{Albumentations: fast and flexible image augmentations}",
27 journal = {ArXiv e-prints},
28 eprint = {1809.06839},
29 year = 2018
30 }
31 ```
1 _base_ = '../mask_rcnn/mask_rcnn_r50_fpn_1x_coco.py'
2 img_norm_cfg = dict(
3 mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
4 albu_train_transforms = [
5 dict(
6 type='ShiftScaleRotate',
7 shift_limit=0.0625,
8 scale_limit=0.0,
9 rotate_limit=0,
10 interpolation=1,
11 p=0.5),
12 dict(
13 type='RandomBrightnessContrast',
14 brightness_limit=[0.1, 0.3],
15 contrast_limit=[0.1, 0.3],
16 p=0.2),
17 dict(
18 type='OneOf',
19 transforms=[
20 dict(
21 type='RGBShift',
22 r_shift_limit=10,
23 g_shift_limit=10,
24 b_shift_limit=10,
25 p=1.0),
26 dict(
27 type='HueSaturationValue',
28 hue_shift_limit=20,
29 sat_shift_limit=30,
30 val_shift_limit=20,
31 p=1.0)
32 ],
33 p=0.1),
34 dict(type='JpegCompression', quality_lower=85, quality_upper=95, p=0.2),
35 dict(type='ChannelShuffle', p=0.1),
36 dict(
37 type='OneOf',
38 transforms=[
39 dict(type='Blur', blur_limit=3, p=1.0),
40 dict(type='MedianBlur', blur_limit=3, p=1.0)
41 ],
42 p=0.1),
43 ]
44 train_pipeline = [
45 dict(type='LoadImageFromFile'),
46 dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
47 dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
48 dict(type='Pad', size_divisor=32),
49 dict(
50 type='Albu',
51 transforms=albu_train_transforms,
52 bbox_params=dict(
53 type='BboxParams',
54 format='pascal_voc',
55 label_fields=['gt_labels'],
56 min_visibility=0.0,
57 filter_lost_elements=True),
58 keymap={
59 'img': 'image',
60 'gt_masks': 'masks',
61 'gt_bboxes': 'bboxes'
62 },
63 update_pad_shape=False,
64 skip_img_without_anno=True),
65 dict(type='Normalize', **img_norm_cfg),
66 dict(type='DefaultFormatBundle'),
67 dict(
68 type='Collect',
69 keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks'],
70 meta_keys=('filename', 'ori_shape', 'img_shape', 'img_norm_cfg',
71 'pad_shape', 'scale_factor'))
72 ]
73 data = dict(train=dict(pipeline=train_pipeline))
1 # ATSS
2
3 > [Bridging the Gap Between Anchor-based and Anchor-free Detection via Adaptive Training Sample Selection](https://arxiv.org/abs/1912.02424)
4
5 <!-- [ALGORITHM] -->
6
7 ## Abstract
8
9 Object detection has been dominated by anchor-based detectors for several years. Recently, anchor-free detectors have become popular due to the proposal of FPN and Focal Loss. In this paper, we first point out that the essential difference between anchor-based and anchor-free detection is actually how to define positive and negative training samples, which leads to the performance gap between them. If they adopt the same definition of positive and negative samples during training, there is no obvious difference in the final performance, no matter regressing from a box or a point. This shows that how to select positive and negative training samples is important for current object detectors. Then, we propose an Adaptive Training Sample Selection (ATSS) to automatically select positive and negative samples according to statistical characteristics of object. It significantly improves the performance of anchor-based and anchor-free detectors and bridges the gap between them. Finally, we discuss the necessity of tiling multiple anchors per location on the image to detect objects. Extensive experiments conducted on MS COCO support our aforementioned analysis and conclusions. With the newly introduced ATSS, we improve state-of-the-art detectors by a large margin to 50.7% AP without introducing any overhead.
10
11 <div align=center>
12 <img src="https://user-images.githubusercontent.com/40661020/143870776-c81168f5-e8b2-44ee-978b-509e4372c5c9.png"/>
13 </div>
14
15 ## Results and Models
16
17 | Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | Config | Download |
18 |:---------:|:-------:|:-------:|:--------:|:--------------:|:------:|:------:|:--------:|
19 | R-50 | pytorch | 1x | 3.7 | 19.7 | 39.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/atss/atss_r50_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/atss/atss_r50_fpn_1x_coco/atss_r50_fpn_1x_coco_20200209-985f7bd0.pth) &#124; [log](https://download.openmmlab.com/mmdetection/v2.0/atss/atss_r50_fpn_1x_coco/atss_r50_fpn_1x_coco_20200209_102539.log.json) |
20 | R-101 | pytorch | 1x | 5.6 | 12.3 | 41.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/atss/atss_r101_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/atss/atss_r101_fpn_1x_coco/atss_r101_fpn_1x_20200825-dfcadd6f.pth) &#124; [log](https://download.openmmlab.com/mmdetection/v2.0/atss/atss_r101_fpn_1x_coco/atss_r101_fpn_1x_20200825-dfcadd6f.log.json) |
21
22 ## Citation
23
24 ```latex
25 @article{zhang2019bridging,
26 title = {Bridging the Gap Between Anchor-based and Anchor-free Detection via Adaptive Training Sample Selection},
27 author = {Zhang, Shifeng and Chi, Cheng and Yao, Yongqiang and Lei, Zhen and Li, Stan Z.},
28 journal = {arXiv preprint arXiv:1912.02424},
29 year = {2019}
30 }
31 ```
1 _base_ = './atss_r50_fpn_1x_coco.py'
2 model = dict(
3 backbone=dict(
4 depth=101,
5 init_cfg=dict(type='Pretrained',
6 checkpoint='torchvision://resnet101')))
1 _base_ = [
2 '../_base_/datasets/coco_detection.py',
3 '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'
4 ]
5 model = dict(
6 type='ATSS',
7 backbone=dict(
8 type='ResNet',
9 depth=50,
10 num_stages=4,
11 out_indices=(0, 1, 2, 3),
12 frozen_stages=1,
13 norm_cfg=dict(type='BN', requires_grad=True),
14 norm_eval=True,
15 style='pytorch',
16 init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
17 neck=dict(
18 type='FPN',
19 in_channels=[256, 512, 1024, 2048],
20 out_channels=256,
21 start_level=1,
22 add_extra_convs='on_output',
23 num_outs=5),
24 bbox_head=dict(
25 type='ATSSHead',
26 num_classes=80,
27 in_channels=256,
28 stacked_convs=4,
29 feat_channels=256,
30 anchor_generator=dict(
31 type='AnchorGenerator',
32 ratios=[1.0],
33 octave_base_scale=8,
34 scales_per_octave=1,
35 strides=[8, 16, 32, 64, 128]),
36 bbox_coder=dict(
37 type='DeltaXYWHBBoxCoder',
38 target_means=[.0, .0, .0, .0],
39 target_stds=[0.1, 0.1, 0.2, 0.2]),
40 loss_cls=dict(
41 type='FocalLoss',
42 use_sigmoid=True,
43 gamma=2.0,
44 alpha=0.25,
45 loss_weight=1.0),
46 loss_bbox=dict(type='GIoULoss', loss_weight=2.0),
47 loss_centerness=dict(
48 type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0)),
49 # training and testing settings
50 train_cfg=dict(
51 assigner=dict(type='ATSSAssigner', topk=9),
52 allowed_border=-1,
53 pos_weight=-1,
54 debug=False),
55 test_cfg=dict(
56 nms_pre=1000,
57 min_bbox_size=0,
58 score_thr=0.05,
59 nms=dict(type='nms', iou_threshold=0.6),
60 max_per_img=100))
61 # optimizer
62 optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001)
1 Collections:
2 - Name: ATSS
3 Metadata:
4 Training Data: COCO
5 Training Techniques:
6 - SGD with Momentum
7 - Weight Decay
8 Training Resources: 8x V100 GPUs
9 Architecture:
10 - ATSS
11 - FPN
12 - ResNet
13 Paper:
14 URL: https://arxiv.org/abs/1912.02424
15 Title: 'Bridging the Gap Between Anchor-based and Anchor-free Detection via Adaptive Training Sample Selection'
16 README: configs/atss/README.md
17 Code:
18 URL: https://github.com/open-mmlab/mmdetection/blob/v2.0.0/mmdet/models/detectors/atss.py#L6
19 Version: v2.0.0
20
21 Models:
22 - Name: atss_r50_fpn_1x_coco
23 In Collection: ATSS
24 Config: configs/atss/atss_r50_fpn_1x_coco.py
25 Metadata:
26 Training Memory (GB): 3.7
27 inference time (ms/im):
28 - value: 50.76
29 hardware: V100
30 backend: PyTorch
31 batch size: 1
32 mode: FP32
33 resolution: (800, 1333)
34 Epochs: 12
35 Results:
36 - Task: Object Detection
37 Dataset: COCO
38 Metrics:
39 box AP: 39.4
40 Weights: https://download.openmmlab.com/mmdetection/v2.0/atss/atss_r50_fpn_1x_coco/atss_r50_fpn_1x_coco_20200209-985f7bd0.pth
41
42 - Name: atss_r101_fpn_1x_coco
43 In Collection: ATSS
44 Config: configs/atss/atss_r101_fpn_1x_coco.py
45 Metadata:
46 Training Memory (GB): 5.6
47 inference time (ms/im):
48 - value: 81.3
49 hardware: V100
50 backend: PyTorch
51 batch size: 1
52 mode: FP32
53 resolution: (800, 1333)
54 Epochs: 12
55 Results:
56 - Task: Object Detection
57 Dataset: COCO
58 Metrics:
59 box AP: 41.5
60 Weights: https://download.openmmlab.com/mmdetection/v2.0/atss/atss_r101_fpn_1x_coco/atss_r101_fpn_1x_20200825-dfcadd6f.pth
1 # AutoAssign
2
3 > [AutoAssign: Differentiable Label Assignment for Dense Object Detection](https://arxiv.org/abs/2007.03496)
4
5 <!-- [ALGORITHM] -->
6
7 ## Abstract
8
9 Determining positive/negative samples for object detection is known as label assignment. Here we present an anchor-free detector named AutoAssign. It requires little human knowledge and achieves appearance-aware through a fully differentiable weighting mechanism. During training, to both satisfy the prior distribution of data and adapt to category characteristics, we present Center Weighting to adjust the category-specific prior distributions. To adapt to object appearances, Confidence Weighting is proposed to adjust the specific assign strategy of each instance. The two weighting modules are then combined to generate positive and negative weights to adjust each location's confidence. Extensive experiments on the MS COCO show that our method steadily surpasses other best sampling strategies by large margins with various backbones. Moreover, our best model achieves 52.1% AP, outperforming all existing one-stage detectors. Besides, experiments on other datasets, e.g., PASCAL VOC, Objects365, and WiderFace, demonstrate the broad applicability of AutoAssign.
10
11 <div align=center>
12 <img src="https://user-images.githubusercontent.com/40661020/143870875-33567e44-0584-4470-9a90-0df0fb6c1fe2.png"/>
13 </div>
14
15 ## Results and Models
16
17 | Backbone | Style | Lr schd | Mem (GB) | box AP | Config | Download |
18 |:---------:|:-------:|:-------:|:--------:|:------:|:------:|:--------:|
19 | R-50 | caffe | 1x | 4.08 | 40.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/autoassign/autoassign_r50_fpn_8x2_1x_coco.py) |[model](https://download.openmmlab.com/mmdetection/v2.0/autoassign/auto_assign_r50_fpn_1x_coco/auto_assign_r50_fpn_1x_coco_20210413_115540-5e17991f.pth) &#124; [log](https://download.openmmlab.com/mmdetection/v2.0/autoassign/auto_assign_r50_fpn_1x_coco/auto_assign_r50_fpn_1x_coco_20210413_115540-5e17991f.log.json) |
20
21 **Note**:
22
23 1. We find that the performance is unstable with 1x setting and may fluctuate by about 0.3 mAP. mAP 40.3 ~ 40.6 is acceptable. Such fluctuation can also be found in the original implementation.
24 2. You can get a more stable results ~ mAP 40.6 with a schedule total 13 epoch, and learning rate is divided by 10 at 10th and 13th epoch.
25
26 ## Citation
27
28 ```latex
29 @article{zhu2020autoassign,
30 title={AutoAssign: Differentiable Label Assignment for Dense Object Detection},
31 author={Zhu, Benjin and Wang, Jianfeng and Jiang, Zhengkai and Zong, Fuhang and Liu, Songtao and Li, Zeming and Sun, Jian},
32 journal={arXiv preprint arXiv:2007.03496},
33 year={2020}
34 }
35 ```
1 # We follow the original implementation which
2 # adopts the Caffe pre-trained backbone.
3 _base_ = [
4 '../_base_/datasets/coco_detection.py',
5 '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'
6 ]
7 model = dict(
8 type='AutoAssign',
9 backbone=dict(
10 type='ResNet',
11 depth=50,
12 num_stages=4,
13 out_indices=(0, 1, 2, 3),
14 frozen_stages=1,
15 norm_cfg=dict(type='BN', requires_grad=False),
16 norm_eval=True,
17 style='caffe',
18 init_cfg=dict(
19 type='Pretrained',
20 checkpoint='open-mmlab://detectron2/resnet50_caffe')),
21 neck=dict(
22 type='FPN',
23 in_channels=[256, 512, 1024, 2048],
24 out_channels=256,
25 start_level=1,
26 add_extra_convs=True,
27 num_outs=5,
28 relu_before_extra_convs=True,
29 init_cfg=dict(type='Caffe2Xavier', layer='Conv2d')),
30 bbox_head=dict(
31 type='AutoAssignHead',
32 num_classes=80,
33 in_channels=256,
34 stacked_convs=4,
35 feat_channels=256,
36 strides=[8, 16, 32, 64, 128],
37 loss_bbox=dict(type='GIoULoss', loss_weight=5.0)),
38 train_cfg=None,
39 test_cfg=dict(
40 nms_pre=1000,
41 min_bbox_size=0,
42 score_thr=0.05,
43 nms=dict(type='nms', iou_threshold=0.6),
44 max_per_img=100))
45 img_norm_cfg = dict(
46 mean=[102.9801, 115.9465, 122.7717], std=[1.0, 1.0, 1.0], to_rgb=False)
47 train_pipeline = [
48 dict(type='LoadImageFromFile'),
49 dict(type='LoadAnnotations', with_bbox=True),
50 dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
51 dict(type='RandomFlip', flip_ratio=0.5),
52 dict(type='Normalize', **img_norm_cfg),
53 dict(type='Pad', size_divisor=32),
54 dict(type='DefaultFormatBundle'),
55 dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels'])
56 ]
57 test_pipeline = [
58 dict(type='LoadImageFromFile'),
59 dict(
60 type='MultiScaleFlipAug',
61 img_scale=(1333, 800),
62 flip=False,
63 transforms=[
64 dict(type='Resize', keep_ratio=True),
65 dict(type='RandomFlip'),
66 dict(type='Normalize', **img_norm_cfg),
67 dict(type='Pad', size_divisor=32),
68 dict(type='ImageToTensor', keys=['img']),
69 dict(type='Collect', keys=['img'])
70 ])
71 ]
72 data = dict(
73 train=dict(pipeline=train_pipeline),
74 val=dict(pipeline=test_pipeline),
75 test=dict(pipeline=test_pipeline))
76 # optimizer
77 optimizer = dict(lr=0.01, paramwise_cfg=dict(norm_decay_mult=0.))
78 # learning policy
79 lr_config = dict(
80 policy='step',
81 warmup='linear',
82 warmup_iters=1000,
83 warmup_ratio=1.0 / 1000,
84 step=[8, 11])
85 total_epochs = 12
1 Collections:
2 - Name: AutoAssign
3 Metadata:
4 Training Data: COCO
5 Training Techniques:
6 - SGD with Momentum
7 - Weight Decay
8 Training Resources: 8x V100 GPUs
9 Architecture:
10 - AutoAssign
11 - FPN
12 - ResNet
13 Paper:
14 URL: https://arxiv.org/abs/2007.03496
15 Title: 'AutoAssign: Differentiable Label Assignment for Dense Object Detection'
16 README: configs/autoassign/README.md
17 Code:
18 URL: https://github.com/open-mmlab/mmdetection/blob/v2.12.0/mmdet/models/detectors/autoassign.py#L6
19 Version: v2.12.0
20
21 Models:
22 - Name: autoassign_r50_fpn_8x2_1x_coco
23 In Collection: AutoAssign
24 Config: configs/autoassign/autoassign_r50_fpn_8x2_1x_coco.py
25 Metadata:
26 Training Memory (GB): 4.08
27 Epochs: 12
28 Results:
29 - Task: Object Detection
30 Dataset: COCO
31 Metrics:
32 box AP: 40.4
33 Weights: https://download.openmmlab.com/mmdetection/v2.0/autoassign/auto_assign_r50_fpn_1x_coco/auto_assign_r50_fpn_1x_coco_20210413_115540-5e17991f.pth
1 # CARAFE
2
3 > [CARAFE: Content-Aware ReAssembly of FEatures](https://arxiv.org/abs/1905.02188)
4
5 <!-- [ALGORITHM] -->
6
7 ## Abstract
8
9 Feature upsampling is a key operation in a number of modern convolutional network architectures, e.g. feature pyramids. Its design is critical for dense prediction tasks such as object detection and semantic/instance segmentation. In this work, we propose Content-Aware ReAssembly of FEatures (CARAFE), a universal, lightweight and highly effective operator to fulfill this goal. CARAFE has several appealing properties: (1) Large field of view. Unlike previous works (e.g. bilinear interpolation) that only exploit sub-pixel neighborhood, CARAFE can aggregate contextual information within a large receptive field. (2) Content-aware handling. Instead of using a fixed kernel for all samples (e.g. deconvolution), CARAFE enables instance-specific content-aware handling, which generates adaptive kernels on-the-fly. (3) Lightweight and fast to compute. CARAFE introduces little computational overhead and can be readily integrated into modern network architectures. We conduct comprehensive evaluations on standard benchmarks in object detection, instance/semantic segmentation and inpainting. CARAFE shows consistent and substantial gains across all the tasks (1.2%, 1.3%, 1.8%, 1.1db respectively) with negligible computational overhead. It has great potential to serve as a strong building block for future research. It has great potential to serve as a strong building block for future research.
10
11 <div align=center>
12 <img src="https://user-images.githubusercontent.com/40661020/143872016-48225685-0e59-49cf-bd65-a50ee04ca8a2.png"/>
13 </div>
14
15 ## Results and Models
16
17 The results on COCO 2017 val is shown in the below table.
18
19 | Method | Backbone | Style | Lr schd | Test Proposal Num | Inf time (fps) | Box AP | Mask AP | Config | Download |
20 |:--------------------:|:--------:|:-------:|:-------:|:-----------------:|:--------------:|:------:|:-------:|:------:|:--------:|
21 | Faster R-CNN w/ CARAFE | R-50-FPN | pytorch | 1x | 1000 | 16.5 | 38.6 | 38.6 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/carafe/faster_rcnn_r50_fpn_carafe_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/carafe/faster_rcnn_r50_fpn_carafe_1x_coco/faster_rcnn_r50_fpn_carafe_1x_coco_bbox_mAP-0.386_20200504_175733-385a75b7.pth) &#124; [log](https://download.openmmlab.com/mmdetection/v2.0/carafe/faster_rcnn_r50_fpn_carafe_1x_coco/faster_rcnn_r50_fpn_carafe_1x_coco_20200504_175733.log.json) |
22 | - | - | - | - | 2000 | | | | |
23 | Mask R-CNN w/ CARAFE | R-50-FPN | pytorch | 1x | 1000 | 14.0 | 39.3 | 35.8 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/carafe/mask_rcnn_r50_fpn_carafe_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/carafe/mask_rcnn_r50_fpn_carafe_1x_coco/mask_rcnn_r50_fpn_carafe_1x_coco_bbox_mAP-0.393__segm_mAP-0.358_20200503_135957-8687f195.pth) &#124; [log](https://download.openmmlab.com/mmdetection/v2.0/carafe/mask_rcnn_r50_fpn_carafe_1x_coco/mask_rcnn_r50_fpn_carafe_1x_coco_20200503_135957.log.json) |
24 | - | - | - | - | 2000 | | | | |
25
26 ## Implementation
27
28 The CUDA implementation of CARAFE can be find at https://github.com/myownskyW7/CARAFE.
29
30 ## Citation
31
32 We provide config files to reproduce the object detection & instance segmentation results in the ICCV 2019 Oral paper for [CARAFE: Content-Aware ReAssembly of FEatures](https://arxiv.org/abs/1905.02188).
33
34 ```latex
35 @inproceedings{Wang_2019_ICCV,
36 title = {CARAFE: Content-Aware ReAssembly of FEatures},
37 author = {Wang, Jiaqi and Chen, Kai and Xu, Rui and Liu, Ziwei and Loy, Chen Change and Lin, Dahua},
38 booktitle = {The IEEE International Conference on Computer Vision (ICCV)},
39 month = {October},
40 year = {2019}
41 }
42 ```
1 _base_ = '../faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py'
2 model = dict(
3 neck=dict(
4 type='FPN_CARAFE',
5 in_channels=[256, 512, 1024, 2048],
6 out_channels=256,
7 num_outs=5,
8 start_level=0,
9 end_level=-1,
10 norm_cfg=None,
11 act_cfg=None,
12 order=('conv', 'norm', 'act'),
13 upsample_cfg=dict(
14 type='carafe',
15 up_kernel=5,
16 up_group=1,
17 encoder_kernel=3,
18 encoder_dilation=1,
19 compressed_channels=64)))
20 img_norm_cfg = dict(
21 mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
22 train_pipeline = [
23 dict(type='LoadImageFromFile'),
24 dict(type='LoadAnnotations', with_bbox=True),
25 dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
26 dict(type='RandomFlip', flip_ratio=0.5),
27 dict(type='Normalize', **img_norm_cfg),
28 dict(type='Pad', size_divisor=64),
29 dict(type='DefaultFormatBundle'),
30 dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),
31 ]
32 test_pipeline = [
33 dict(type='LoadImageFromFile'),
34 dict(
35 type='MultiScaleFlipAug',
36 img_scale=(1333, 800),
37 flip=False,
38 transforms=[
39 dict(type='Resize', keep_ratio=True),
40 dict(type='RandomFlip'),
41 dict(type='Normalize', **img_norm_cfg),
42 dict(type='Pad', size_divisor=64),
43 dict(type='ImageToTensor', keys=['img']),
44 dict(type='Collect', keys=['img']),
45 ])
46 ]
47 data = dict(
48 train=dict(pipeline=train_pipeline),
49 val=dict(pipeline=test_pipeline),
50 test=dict(pipeline=test_pipeline))
1 _base_ = '../mask_rcnn/mask_rcnn_r50_fpn_1x_coco.py'
2 model = dict(
3 neck=dict(
4 type='FPN_CARAFE',
5 in_channels=[256, 512, 1024, 2048],
6 out_channels=256,
7 num_outs=5,
8 start_level=0,
9 end_level=-1,
10 norm_cfg=None,
11 act_cfg=None,
12 order=('conv', 'norm', 'act'),
13 upsample_cfg=dict(
14 type='carafe',
15 up_kernel=5,
16 up_group=1,
17 encoder_kernel=3,
18 encoder_dilation=1,
19 compressed_channels=64)),
20 roi_head=dict(
21 mask_head=dict(
22 upsample_cfg=dict(
23 type='carafe',
24 scale_factor=2,
25 up_kernel=5,
26 up_group=1,
27 encoder_kernel=3,
28 encoder_dilation=1,
29 compressed_channels=64))))
30 img_norm_cfg = dict(
31 mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
32 train_pipeline = [
33 dict(type='LoadImageFromFile'),
34 dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
35 dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
36 dict(type='RandomFlip', flip_ratio=0.5),
37 dict(type='Normalize', **img_norm_cfg),
38 dict(type='Pad', size_divisor=64),
39 dict(type='DefaultFormatBundle'),
40 dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),
41 ]
42 test_pipeline = [
43 dict(type='LoadImageFromFile'),
44 dict(
45 type='MultiScaleFlipAug',
46 img_scale=(1333, 800),
47 flip=False,
48 transforms=[
49 dict(type='Resize', keep_ratio=True),
50 dict(type='RandomFlip'),
51 dict(type='Normalize', **img_norm_cfg),
52 dict(type='Pad', size_divisor=64),
53 dict(type='ImageToTensor', keys=['img']),
54 dict(type='Collect', keys=['img']),
55 ])
56 ]
57 data = dict(
58 train=dict(pipeline=train_pipeline),
59 val=dict(pipeline=test_pipeline),
60 test=dict(pipeline=test_pipeline))
1 Collections:
2 - Name: CARAFE
3 Metadata:
4 Training Data: COCO
5 Training Techniques:
6 - SGD with Momentum
7 - Weight Decay
8 Training Resources: 8x V100 GPUs
9 Architecture:
10 - RPN
11 - FPN_CARAFE
12 - ResNet
13 - RoIPool
14 Paper:
15 URL: https://arxiv.org/abs/1905.02188
16 Title: 'CARAFE: Content-Aware ReAssembly of FEatures'
17 README: configs/carafe/README.md
18 Code:
19 URL: https://github.com/open-mmlab/mmdetection/blob/v2.12.0/mmdet/models/necks/fpn_carafe.py#L11
20 Version: v2.12.0
21
22 Models:
23 - Name: faster_rcnn_r50_fpn_carafe_1x_coco
24 In Collection: CARAFE
25 Config: configs/carafe/faster_rcnn_r50_fpn_carafe_1x_coco.py
26 Metadata:
27 Training Memory (GB): 4.26
28 Epochs: 12
29 Results:
30 - Task: Object Detection
31 Dataset: COCO
32 Metrics:
33 box AP: 38.6
34 - Task: Instance Segmentation
35 Dataset: COCO
36 Metrics:
37 mask AP: 38.6
38 Weights: https://download.openmmlab.com/mmdetection/v2.0/carafe/faster_rcnn_r50_fpn_carafe_1x_coco/faster_rcnn_r50_fpn_carafe_1x_coco_bbox_mAP-0.386_20200504_175733-385a75b7.pth
39
40 - Name: mask_rcnn_r50_fpn_carafe_1x_coco
41 In Collection: CARAFE
42 Config: configs/carafe/mask_rcnn_r50_fpn_carafe_1x_coco.py
43 Metadata:
44 Training Memory (GB): 4.31
45 Epochs: 12
46 Results:
47 - Task: Object Detection
48 Dataset: COCO
49 Metrics:
50 box AP: 39.3
51 - Task: Instance Segmentation
52 Dataset: COCO
53 Metrics:
54 mask AP: 35.6
55 Weights: https://download.openmmlab.com/mmdetection/v2.0/carafe/mask_rcnn_r50_fpn_carafe_1x_coco/mask_rcnn_r50_fpn_carafe_1x_coco_bbox_mAP-0.393__segm_mAP-0.358_20200503_135957-8687f195.pth
1 _base_ = './cascade_mask_rcnn_r50_caffe_fpn_1x_coco.py'
2 model = dict(
3 backbone=dict(
4 depth=101,
5 init_cfg=dict(
6 type='Pretrained',
7 checkpoint='open-mmlab://detectron2/resnet101_caffe')))
1 _base_ = './cascade_mask_rcnn_r50_caffe_fpn_mstrain_3x_coco.py'
2 model = dict(
3 backbone=dict(
4 depth=101,
5 init_cfg=dict(
6 type='Pretrained',
7 checkpoint='open-mmlab://detectron2/resnet101_caffe')))
1 _base_ = './cascade_mask_rcnn_r50_fpn_1x_coco.py'
2 model = dict(
3 backbone=dict(
4 depth=101,
5 init_cfg=dict(type='Pretrained',
6 checkpoint='torchvision://resnet101')))
1 _base_ = './cascade_mask_rcnn_r50_fpn_20e_coco.py'
2 model = dict(
3 backbone=dict(
4 depth=101,
5 init_cfg=dict(type='Pretrained',
6 checkpoint='torchvision://resnet101')))
1 _base_ = './cascade_mask_rcnn_r50_fpn_mstrain_3x_coco.py'
2 model = dict(
3 backbone=dict(
4 depth=101,
5 init_cfg=dict(type='Pretrained',
6 checkpoint='torchvision://resnet101')))
1 _base_ = ['./cascade_mask_rcnn_r50_fpn_1x_coco.py']
2
3 model = dict(
4 backbone=dict(
5 norm_cfg=dict(requires_grad=False),
6 norm_eval=True,
7 style='caffe',
8 init_cfg=dict(
9 type='Pretrained',
10 checkpoint='open-mmlab://detectron2/resnet50_caffe')))
11 img_norm_cfg = dict(
12 mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False)
13 train_pipeline = [
14 dict(type='LoadImageFromFile'),
15 dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
16 dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
17 dict(type='RandomFlip', flip_ratio=0.5),
18 dict(type='Normalize', **img_norm_cfg),
19 dict(type='Pad', size_divisor=32),
20 dict(type='DefaultFormatBundle'),
21 dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),
22 ]
23 test_pipeline = [
24 dict(type='LoadImageFromFile'),
25 dict(
26 type='MultiScaleFlipAug',
27 img_scale=(1333, 800),
28 flip=False,
29 transforms=[
30 dict(type='Resize', keep_ratio=True),
31 dict(type='RandomFlip'),
32 dict(type='Normalize', **img_norm_cfg),
33 dict(type='Pad', size_divisor=32),
34 dict(type='ImageToTensor', keys=['img']),
35 dict(type='Collect', keys=['img']),
36 ])
37 ]
38 data = dict(
39 train=dict(pipeline=train_pipeline),
40 val=dict(pipeline=test_pipeline),
41 test=dict(pipeline=test_pipeline))
1 _base_ = ['./cascade_mask_rcnn_r50_fpn_mstrain_3x_coco.py']
2 model = dict(
3 backbone=dict(
4 norm_cfg=dict(requires_grad=False),
5 norm_eval=True,
6 style='caffe',
7 init_cfg=dict(
8 type='Pretrained',
9 checkpoint='open-mmlab://detectron2/resnet50_caffe')))
10
11 # use caffe img_norm
12 img_norm_cfg = dict(
13 mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False)
14 # In mstrain 3x config, img_scale=[(1333, 640), (1333, 800)],
15 # multiscale_mode='range'
16 train_pipeline = [
17 dict(type='LoadImageFromFile'),
18 dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
19 dict(
20 type='Resize',
21 img_scale=[(1333, 640), (1333, 800)],
22 multiscale_mode='range',
23 keep_ratio=True),
24 dict(type='RandomFlip', flip_ratio=0.5),
25 dict(type='Normalize', **img_norm_cfg),
26 dict(type='Pad', size_divisor=32),
27 dict(type='DefaultFormatBundle'),
28 dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),
29 ]
30 test_pipeline = [
31 dict(type='LoadImageFromFile'),
32 dict(
33 type='MultiScaleFlipAug',
34 img_scale=(1333, 800),
35 flip=False,
36 transforms=[
37 dict(type='Resize', keep_ratio=True),
38 dict(type='RandomFlip'),
39 dict(type='Normalize', **img_norm_cfg),
40 dict(type='Pad', size_divisor=32),
41 dict(type='ImageToTensor', keys=['img']),
42 dict(type='Collect', keys=['img']),
43 ])
44 ]
45
46 data = dict(
47 train=dict(dataset=dict(pipeline=train_pipeline)),
48 val=dict(pipeline=test_pipeline),
49 test=dict(pipeline=test_pipeline))
1 _base_ = [
2 '../_base_/models/cascade_mask_rcnn_r50_fpn.py',
3 '../_base_/datasets/coco_instance.py',
4 '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'
5 ]
1 _base_ = [
2 '../_base_/models/cascade_mask_rcnn_r50_fpn.py',
3 '../_base_/datasets/coco_instance.py',
4 '../_base_/schedules/schedule_20e.py', '../_base_/default_runtime.py'
5 ]
1 _base_ = [
2 '../common/mstrain_3x_coco_instance.py',
3 '../_base_/models/cascade_mask_rcnn_r50_fpn.py'
4 ]
1 _base_ = './cascade_mask_rcnn_r50_fpn_1x_coco.py'
2 model = dict(
3 backbone=dict(
4 type='ResNeXt',
5 depth=101,
6 groups=32,
7 base_width=4,
8 num_stages=4,
9 out_indices=(0, 1, 2, 3),
10 frozen_stages=1,
11 norm_cfg=dict(type='BN', requires_grad=True),
12 style='pytorch',
13 init_cfg=dict(
14 type='Pretrained', checkpoint='open-mmlab://resnext101_32x4d')))
1 _base_ = './cascade_mask_rcnn_r50_fpn_20e_coco.py'
2 model = dict(
3 backbone=dict(
4 type='ResNeXt',
5 depth=101,
6 groups=32,
7 base_width=4,
8 num_stages=4,
9 out_indices=(0, 1, 2, 3),
10 frozen_stages=1,
11 norm_cfg=dict(type='BN', requires_grad=True),
12 style='pytorch',
13 init_cfg=dict(
14 type='Pretrained', checkpoint='open-mmlab://resnext101_32x4d')))
1 _base_ = './cascade_mask_rcnn_r50_fpn_mstrain_3x_coco.py'
2 model = dict(
3 backbone=dict(
4 type='ResNeXt',
5 depth=101,
6 groups=32,
7 base_width=4,
8 num_stages=4,
9 out_indices=(0, 1, 2, 3),
10 frozen_stages=1,
11 norm_cfg=dict(type='BN', requires_grad=True),
12 style='pytorch',
13 init_cfg=dict(
14 type='Pretrained', checkpoint='open-mmlab://resnext101_32x4d')))
1 _base_ = './cascade_mask_rcnn_r50_fpn_mstrain_3x_coco.py'
2
3 model = dict(
4 backbone=dict(
5 type='ResNeXt',
6 depth=101,
7 groups=32,
8 base_width=8,
9 num_stages=4,
10 out_indices=(0, 1, 2, 3),
11 frozen_stages=1,
12 norm_cfg=dict(type='BN', requires_grad=False),
13 style='pytorch',
14 init_cfg=dict(
15 type='Pretrained',
16 checkpoint='open-mmlab://detectron2/resnext101_32x8d')))
17
18 # ResNeXt-101-32x8d model trained with Caffe2 at FB,
19 # so the mean and std need to be changed.
20 img_norm_cfg = dict(
21 mean=[103.530, 116.280, 123.675],
22 std=[57.375, 57.120, 58.395],
23 to_rgb=False)
24
25 # In mstrain 3x config, img_scale=[(1333, 640), (1333, 800)],
26 # multiscale_mode='range'
27 train_pipeline = [
28 dict(type='LoadImageFromFile'),
29 dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
30 dict(
31 type='Resize',
32 img_scale=[(1333, 640), (1333, 800)],
33 multiscale_mode='range',
34 keep_ratio=True),
35 dict(type='RandomFlip', flip_ratio=0.5),
36 dict(type='Normalize', **img_norm_cfg),
37 dict(type='Pad', size_divisor=32),
38 dict(type='DefaultFormatBundle'),
39 dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),
40 ]
41 test_pipeline = [
42 dict(type='LoadImageFromFile'),
43 dict(
44 type='MultiScaleFlipAug',
45 img_scale=(1333, 800),
46 flip=False,
47 transforms=[
48 dict(type='Resize', keep_ratio=True),
49 dict(type='RandomFlip'),
50 dict(type='Normalize', **img_norm_cfg),
51 dict(type='Pad', size_divisor=32),
52 dict(type='ImageToTensor', keys=['img']),
53 dict(type='Collect', keys=['img']),
54 ])
55 ]
56
57 data = dict(
58 train=dict(dataset=dict(pipeline=train_pipeline)),
59 val=dict(pipeline=test_pipeline),
60 test=dict(pipeline=test_pipeline))
1 _base_ = './cascade_mask_rcnn_r50_fpn_1x_coco.py'
2 model = dict(
3 backbone=dict(
4 type='ResNeXt',
5 depth=101,
6 groups=64,
7 base_width=4,
8 num_stages=4,
9 out_indices=(0, 1, 2, 3),
10 frozen_stages=1,
11 norm_cfg=dict(type='BN', requires_grad=True),
12 style='pytorch',
13 init_cfg=dict(
14 type='Pretrained', checkpoint='open-mmlab://resnext101_64x4d')))
1 _base_ = './cascade_mask_rcnn_r50_fpn_20e_coco.py'
2 model = dict(
3 backbone=dict(
4 type='ResNeXt',
5 depth=101,
6 groups=64,
7 base_width=4,
8 num_stages=4,
9 out_indices=(0, 1, 2, 3),
10 frozen_stages=1,
11 norm_cfg=dict(type='BN', requires_grad=True),
12 style='pytorch',
13 init_cfg=dict(
14 type='Pretrained', checkpoint='open-mmlab://resnext101_64x4d')))
1 _base_ = './cascade_mask_rcnn_r50_fpn_mstrain_3x_coco.py'
2 model = dict(
3 backbone=dict(
4 type='ResNeXt',
5 depth=101,
6 groups=64,
7 base_width=4,
8 num_stages=4,
9 out_indices=(0, 1, 2, 3),
10 frozen_stages=1,
11 norm_cfg=dict(type='BN', requires_grad=True),
12 style='pytorch',
13 init_cfg=dict(
14 type='Pretrained', checkpoint='open-mmlab://resnext101_64x4d')))
1 _base_ = './cascade_rcnn_r50_caffe_fpn_1x_coco.py'
2 model = dict(
3 backbone=dict(
4 depth=101,
5 init_cfg=dict(
6 type='Pretrained',
7 checkpoint='open-mmlab://detectron2/resnet101_caffe')))
1 _base_ = './cascade_rcnn_r50_fpn_1x_coco.py'
2 model = dict(
3 backbone=dict(
4 depth=101,
5 init_cfg=dict(type='Pretrained',
6 checkpoint='torchvision://resnet101')))
1 _base_ = './cascade_rcnn_r50_fpn_20e_coco.py'
2 model = dict(
3 backbone=dict(
4 depth=101,
5 init_cfg=dict(type='Pretrained',
6 checkpoint='torchvision://resnet101')))
1 _base_ = './cascade_rcnn_r50_fpn_1x_coco.py'
2
3 model = dict(
4 backbone=dict(
5 norm_cfg=dict(requires_grad=False),
6 style='caffe',
7 init_cfg=dict(
8 type='Pretrained',
9 checkpoint='open-mmlab://detectron2/resnet50_caffe')))
10
11 # use caffe img_norm
12 img_norm_cfg = dict(
13 mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False)
14 train_pipeline = [
15 dict(type='LoadImageFromFile'),
16 dict(type='LoadAnnotations', with_bbox=True),
17 dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
18 dict(type='RandomFlip', flip_ratio=0.5),
19 dict(type='Normalize', **img_norm_cfg),
20 dict(type='Pad', size_divisor=32),
21 dict(type='DefaultFormatBundle'),
22 dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),
23 ]
24 test_pipeline = [
25 dict(type='LoadImageFromFile'),
26 dict(
27 type='MultiScaleFlipAug',
28 img_scale=(1333, 800),
29 flip=False,
30 transforms=[
31 dict(type='Resize', keep_ratio=True),
32 dict(type='RandomFlip'),
33 dict(type='Normalize', **img_norm_cfg),
34 dict(type='Pad', size_divisor=32),
35 dict(type='ImageToTensor', keys=['img']),
36 dict(type='Collect', keys=['img']),
37 ])
38 ]
39 data = dict(
40 train=dict(pipeline=train_pipeline),
41 val=dict(pipeline=test_pipeline),
42 test=dict(pipeline=test_pipeline))
1 _base_ = [
2 '../_base_/models/cascade_rcnn_r50_fpn.py',
3 '../_base_/datasets/coco_detection.py',
4 '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'
5 ]
1 _base_ = './cascade_rcnn_r50_fpn_1x_coco.py'
2 # learning policy
3 lr_config = dict(step=[16, 19])
4 runner = dict(type='EpochBasedRunner', max_epochs=20)
1 _base_ = './cascade_rcnn_r50_fpn_1x_coco.py'
2 model = dict(
3 backbone=dict(
4 type='ResNeXt',
5 depth=101,
6 groups=32,
7 base_width=4,
8 num_stages=4,
9 out_indices=(0, 1, 2, 3),
10 frozen_stages=1,
11 norm_cfg=dict(type='BN', requires_grad=True),
12 style='pytorch',
13 init_cfg=dict(
14 type='Pretrained', checkpoint='open-mmlab://resnext101_32x4d')))
1 _base_ = './cascade_rcnn_r50_fpn_20e_coco.py'
2 model = dict(
3 backbone=dict(
4 type='ResNeXt',
5 depth=101,
6 groups=32,
7 base_width=4,
8 num_stages=4,
9 out_indices=(0, 1, 2, 3),
10 frozen_stages=1,
11 norm_cfg=dict(type='BN', requires_grad=True),
12 style='pytorch',
13 init_cfg=dict(
14 type='Pretrained', checkpoint='open-mmlab://resnext101_32x4d')))
1 _base_ = './cascade_rcnn_r50_fpn_1x_coco.py'
2 model = dict(
3 type='CascadeRCNN',
4 backbone=dict(
5 type='ResNeXt',
6 depth=101,
7 groups=64,
8 base_width=4,
9 num_stages=4,
10 out_indices=(0, 1, 2, 3),
11 frozen_stages=1,
12 norm_cfg=dict(type='BN', requires_grad=True),
13 style='pytorch',
14 init_cfg=dict(
15 type='Pretrained', checkpoint='open-mmlab://resnext101_64x4d')))
1 _base_ = './cascade_rcnn_r50_fpn_20e_coco.py'
2 model = dict(
3 type='CascadeRCNN',
4 backbone=dict(
5 type='ResNeXt',
6 depth=101,
7 groups=64,
8 base_width=4,
9 num_stages=4,
10 out_indices=(0, 1, 2, 3),
11 frozen_stages=1,
12 norm_cfg=dict(type='BN', requires_grad=True),
13 style='pytorch',
14 init_cfg=dict(
15 type='Pretrained', checkpoint='open-mmlab://resnext101_64x4d')))
1 # Cascade RPN
2
3 > [Cascade RPN: Delving into High-Quality Region Proposal Network with Adaptive Convolution](https://arxiv.org/abs/1909.06720)
4
5 <!-- [ALGORITHM] -->
6
7 ## Abstract
8
9 This paper considers an architecture referred to as Cascade Region Proposal Network (Cascade RPN) for improving the region-proposal quality and detection performance by systematically addressing the limitation of the conventional RPN that heuristically defines the anchors and aligns the features to the anchors. First, instead of using multiple anchors with predefined scales and aspect ratios, Cascade RPN relies on a single anchor per location and performs multi-stage refinement. Each stage is progressively more stringent in defining positive samples by starting out with an anchor-free metric followed by anchor-based metrics in the ensuing stages. Second, to attain alignment between the features and the anchors throughout the stages, adaptive convolution is proposed that takes the anchors in addition to the image features as its input and learns the sampled features guided by the anchors. A simple implementation of a two-stage Cascade RPN achieves AR 13.4 points higher than that of the conventional RPN, surpassing any existing region proposal methods. When adopting to Fast R-CNN and Faster R-CNN, Cascade RPN can improve the detection mAP by 3.1 and 3.5 points, respectively.
10
11 <div align=center>
12 <img src="https://user-images.githubusercontent.com/40661020/143872368-1580193a-d19c-4723-a579-c7ed2d5da4d1.png"/>
13 </div>
14
15 ## Results and Models
16
17 ### Region proposal performance
18
19 | Method | Backbone | Style | Mem (GB) | Train time (s/iter) | Inf time (fps) | AR 1000 | Config | Download |
20 |:------:|:--------:|:-----:|:--------:|:-------------------:|:--------------:|:-------:|:-------:|:--------------------------------------:|
21 | CRPN | R-50-FPN | caffe | - | - | - | 72.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/cascade_rpn/crpn_r50_caffe_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/cascade_rpn/crpn_r50_caffe_fpn_1x_coco/cascade_rpn_r50_caffe_fpn_1x_coco-7aa93cef.pth) |
22
23 ### Detection performance
24
25 | Method | Proposal | Backbone | Style | Schedule | Mem (GB) | Train time (s/iter) | Inf time (fps) | box AP | Config | Download |
26 |:-------------:|:-----------:|:--------:|:-------:|:--------:|:--------:|:-------------------:|:--------------:|:------:|:-------:|:--------------------------------------------:|
27 | Fast R-CNN | Cascade RPN | R-50-FPN | caffe | 1x | - | - | - | 39.9 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/cascade_rpn/crpn_fast_rcnn_r50_caffe_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/cascade_rpn/crpn_fast_rcnn_r50_caffe_fpn_1x_coco/crpn_fast_rcnn_r50_caffe_fpn_1x_coco-cb486e66.pth) |
28 | Faster R-CNN | Cascade RPN | R-50-FPN | caffe | 1x | - | - | - | 40.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/cascade_rpn/crpn_faster_rcnn_r50_caffe_fpn_1x_coco.py) |[model](https://download.openmmlab.com/mmdetection/v2.0/cascade_rpn/crpn_faster_rcnn_r50_caffe_fpn_1x_coco/crpn_faster_rcnn_r50_caffe_fpn_1x_coco-c8283cca.pth) |
29
30 ## Citation
31
32 We provide the code for reproducing experiment results of [Cascade RPN](https://arxiv.org/abs/1909.06720).
33
34 ```latex
35 @inproceedings{vu2019cascade,
36 title={Cascade RPN: Delving into High-Quality Region Proposal Network with Adaptive Convolution},
37 author={Vu, Thang and Jang, Hyunjun and Pham, Trung X and Yoo, Chang D},
38 booktitle={Conference on Neural Information Processing Systems (NeurIPS)},
39 year={2019}
40 }
41 ```
1 _base_ = '../fast_rcnn/fast_rcnn_r50_fpn_1x_coco.py'
2 model = dict(
3 backbone=dict(
4 type='ResNet',
5 depth=50,
6 num_stages=4,
7 out_indices=(0, 1, 2, 3),
8 frozen_stages=1,
9 norm_cfg=dict(type='BN', requires_grad=False),
10 norm_eval=True,
11 style='caffe',
12 init_cfg=dict(
13 type='Pretrained',
14 checkpoint='open-mmlab://detectron2/resnet50_caffe')),
15 roi_head=dict(
16 bbox_head=dict(
17 bbox_coder=dict(target_stds=[0.04, 0.04, 0.08, 0.08]),
18 loss_cls=dict(
19 type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.5),
20 loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0))),
21 # model training and testing settings
22 train_cfg=dict(
23 rcnn=dict(
24 assigner=dict(
25 pos_iou_thr=0.65, neg_iou_thr=0.65, min_pos_iou=0.65),
26 sampler=dict(num=256))),
27 test_cfg=dict(rcnn=dict(score_thr=1e-3)))
28 dataset_type = 'CocoDataset'
29 data_root = 'data/coco/'
30 img_norm_cfg = dict(
31 mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False)
32 train_pipeline = [
33 dict(type='LoadImageFromFile'),
34 dict(type='LoadProposals', num_max_proposals=300),
35 dict(type='LoadAnnotations', with_bbox=True),
36 dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
37 dict(type='RandomFlip', flip_ratio=0.5),
38 dict(type='Normalize', **img_norm_cfg),
39 dict(type='Pad', size_divisor=32),
40 dict(type='DefaultFormatBundle'),
41 dict(type='Collect', keys=['img', 'proposals', 'gt_bboxes', 'gt_labels']),
42 ]
43 test_pipeline = [
44 dict(type='LoadImageFromFile'),
45 dict(type='LoadProposals', num_max_proposals=300),
46 dict(
47 type='MultiScaleFlipAug',
48 img_scale=(1333, 800),
49 flip=False,
50 transforms=[
51 dict(type='Resize', keep_ratio=True),
52 dict(type='RandomFlip'),
53 dict(type='Normalize', **img_norm_cfg),
54 dict(type='Pad', size_divisor=32),
55 dict(type='ImageToTensor', keys=['img']),
56 dict(type='ToTensor', keys=['proposals']),
57 dict(
58 type='ToDataContainer',
59 fields=[dict(key='proposals', stack=False)]),
60 dict(type='Collect', keys=['img', 'proposals']),
61 ])
62 ]
63 data = dict(
64 train=dict(
65 proposal_file=data_root +
66 'proposals/crpn_r50_caffe_fpn_1x_train2017.pkl',
67 pipeline=train_pipeline),
68 val=dict(
69 proposal_file=data_root +
70 'proposals/crpn_r50_caffe_fpn_1x_val2017.pkl',
71 pipeline=test_pipeline),
72 test=dict(
73 proposal_file=data_root +
74 'proposals/crpn_r50_caffe_fpn_1x_val2017.pkl',
75 pipeline=test_pipeline))
76 optimizer_config = dict(
77 _delete_=True, grad_clip=dict(max_norm=35, norm_type=2))
1 _base_ = '../faster_rcnn/faster_rcnn_r50_caffe_fpn_1x_coco.py'
2 rpn_weight = 0.7
3 model = dict(
4 rpn_head=dict(
5 _delete_=True,
6 type='CascadeRPNHead',
7 num_stages=2,
8 stages=[
9 dict(
10 type='StageCascadeRPNHead',
11 in_channels=256,
12 feat_channels=256,
13 anchor_generator=dict(
14 type='AnchorGenerator',
15 scales=[8],
16 ratios=[1.0],
17 strides=[4, 8, 16, 32, 64]),
18 adapt_cfg=dict(type='dilation', dilation=3),
19 bridged_feature=True,
20 sampling=False,
21 with_cls=False,
22 reg_decoded_bbox=True,
23 bbox_coder=dict(
24 type='DeltaXYWHBBoxCoder',
25 target_means=(.0, .0, .0, .0),
26 target_stds=(0.1, 0.1, 0.5, 0.5)),
27 loss_bbox=dict(
28 type='IoULoss', linear=True,
29 loss_weight=10.0 * rpn_weight)),
30 dict(
31 type='StageCascadeRPNHead',
32 in_channels=256,
33 feat_channels=256,
34 adapt_cfg=dict(type='offset'),
35 bridged_feature=False,
36 sampling=True,
37 with_cls=True,
38 reg_decoded_bbox=True,
39 bbox_coder=dict(
40 type='DeltaXYWHBBoxCoder',
41 target_means=(.0, .0, .0, .0),
42 target_stds=(0.05, 0.05, 0.1, 0.1)),
43 loss_cls=dict(
44 type='CrossEntropyLoss',
45 use_sigmoid=True,
46 loss_weight=1.0 * rpn_weight),
47 loss_bbox=dict(
48 type='IoULoss', linear=True,
49 loss_weight=10.0 * rpn_weight))
50 ]),
51 roi_head=dict(
52 bbox_head=dict(
53 bbox_coder=dict(target_stds=[0.04, 0.04, 0.08, 0.08]),
54 loss_cls=dict(
55 type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.5),
56 loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0))),
57 # model training and testing settings
58 train_cfg=dict(
59 rpn=[
60 dict(
61 assigner=dict(
62 type='RegionAssigner', center_ratio=0.2, ignore_ratio=0.5),
63 allowed_border=-1,
64 pos_weight=-1,
65 debug=False),
66 dict(
67 assigner=dict(
68 type='MaxIoUAssigner',
69 pos_iou_thr=0.7,
70 neg_iou_thr=0.7,
71 min_pos_iou=0.3,
72 ignore_iof_thr=-1),
73 sampler=dict(
74 type='RandomSampler',
75 num=256,
76 pos_fraction=0.5,
77 neg_pos_ub=-1,
78 add_gt_as_proposals=False),
79 allowed_border=-1,
80 pos_weight=-1,
81 debug=False)
82 ],
83 rpn_proposal=dict(max_per_img=300, nms=dict(iou_threshold=0.8)),
84 rcnn=dict(
85 assigner=dict(
86 pos_iou_thr=0.65, neg_iou_thr=0.65, min_pos_iou=0.65),
87 sampler=dict(type='RandomSampler', num=256))),
88 test_cfg=dict(
89 rpn=dict(max_per_img=300, nms=dict(iou_threshold=0.8)),
90 rcnn=dict(score_thr=1e-3)))
91 optimizer_config = dict(
92 _delete_=True, grad_clip=dict(max_norm=35, norm_type=2))
1 _base_ = '../rpn/rpn_r50_caffe_fpn_1x_coco.py'
2 model = dict(
3 rpn_head=dict(
4 _delete_=True,
5 type='CascadeRPNHead',
6 num_stages=2,
7 stages=[
8 dict(
9 type='StageCascadeRPNHead',
10 in_channels=256,
11 feat_channels=256,
12 anchor_generator=dict(
13 type='AnchorGenerator',
14 scales=[8],
15 ratios=[1.0],
16 strides=[4, 8, 16, 32, 64]),
17 adapt_cfg=dict(type='dilation', dilation=3),
18 bridged_feature=True,
19 sampling=False,
20 with_cls=False,
21 reg_decoded_bbox=True,
22 bbox_coder=dict(
23 type='DeltaXYWHBBoxCoder',
24 target_means=(.0, .0, .0, .0),
25 target_stds=(0.1, 0.1, 0.5, 0.5)),
26 loss_bbox=dict(type='IoULoss', linear=True, loss_weight=10.0)),
27 dict(
28 type='StageCascadeRPNHead',
29 in_channels=256,
30 feat_channels=256,
31 adapt_cfg=dict(type='offset'),
32 bridged_feature=False,
33 sampling=True,
34 with_cls=True,
35 reg_decoded_bbox=True,
36 bbox_coder=dict(
37 type='DeltaXYWHBBoxCoder',
38 target_means=(.0, .0, .0, .0),
39 target_stds=(0.05, 0.05, 0.1, 0.1)),
40 loss_cls=dict(
41 type='CrossEntropyLoss', use_sigmoid=True,
42 loss_weight=1.0),
43 loss_bbox=dict(type='IoULoss', linear=True, loss_weight=10.0))
44 ]),
45 train_cfg=dict(rpn=[
46 dict(
47 assigner=dict(
48 type='RegionAssigner', center_ratio=0.2, ignore_ratio=0.5),
49 allowed_border=-1,
50 pos_weight=-1,
51 debug=False),
52 dict(
53 assigner=dict(
54 type='MaxIoUAssigner',
55 pos_iou_thr=0.7,
56 neg_iou_thr=0.7,
57 min_pos_iou=0.3,
58 ignore_iof_thr=-1,
59 iou_calculator=dict(type='BboxOverlaps2D')),
60 sampler=dict(
61 type='RandomSampler',
62 num=256,
63 pos_fraction=0.5,
64 neg_pos_ub=-1,
65 add_gt_as_proposals=False),
66 allowed_border=-1,
67 pos_weight=-1,
68 debug=False)
69 ]),
70 test_cfg=dict(
71 rpn=dict(
72 nms_pre=2000,
73 max_per_img=2000,
74 nms=dict(type='nms', iou_threshold=0.8),
75 min_bbox_size=0)))
76 optimizer_config = dict(
77 _delete_=True, grad_clip=dict(max_norm=35, norm_type=2))
1 Collections:
2 - Name: Cascade RPN
3 Metadata:
4 Training Data: COCO
5 Training Techniques:
6 - SGD with Momentum
7 - Weight Decay
8 Training Resources: 8x V100 GPUs
9 Architecture:
10 - Cascade RPN
11 - FPN
12 - ResNet
13 Paper:
14 URL: https://arxiv.org/abs/1909.06720
15 Title: 'Cascade RPN: Delving into High-Quality Region Proposal Network with Adaptive Convolution'
16 README: configs/cascade_rpn/README.md
17 Code:
18 URL: https://github.com/open-mmlab/mmdetection/blob/v2.8.0/mmdet/models/dense_heads/cascade_rpn_head.py#L538
19 Version: v2.8.0
20
21 Models:
22 - Name: crpn_fast_rcnn_r50_caffe_fpn_1x_coco
23 In Collection: Cascade RPN
24 Config: configs/cascade_rpn/crpn_fast_rcnn_r50_caffe_fpn_1x_coco.py
25 Metadata:
26 Epochs: 12
27 Results:
28 - Task: Object Detection
29 Dataset: COCO
30 Metrics:
31 box AP: 39.9
32 Weights: https://download.openmmlab.com/mmdetection/v2.0/cascade_rpn/crpn_fast_rcnn_r50_caffe_fpn_1x_coco/crpn_fast_rcnn_r50_caffe_fpn_1x_coco-cb486e66.pth
33
34 - Name: crpn_faster_rcnn_r50_caffe_fpn_1x_coco
35 In Collection: Cascade RPN
36 Config: configs/cascade_rpn/crpn_faster_rcnn_r50_caffe_fpn_1x_coco.py
37 Metadata:
38 Epochs: 12
39 Results:
40 - Task: Object Detection
41 Dataset: COCO
42 Metrics:
43 box AP: 40.4
44 Weights: https://download.openmmlab.com/mmdetection/v2.0/cascade_rpn/crpn_faster_rcnn_r50_caffe_fpn_1x_coco/crpn_faster_rcnn_r50_caffe_fpn_1x_coco-c8283cca.pth
1 # CenterNet
2
3 > [Objects as Points](https://arxiv.org/abs/1904.07850)
4
5 <!-- [ALGORITHM] -->
6
7 ## Abstract
8
9 Detection identifies objects as axis-aligned boxes in an image. Most successful object detectors enumerate a nearly exhaustive list of potential object locations and classify each. This is wasteful, inefficient, and requires additional post-processing. In this paper, we take a different approach. We model an object as a single point --- the center point of its bounding box. Our detector uses keypoint estimation to find center points and regresses to all other object properties, such as size, 3D location, orientation, and even pose. Our center point based approach, CenterNet, is end-to-end differentiable, simpler, faster, and more accurate than corresponding bounding box based detectors. CenterNet achieves the best speed-accuracy trade-off on the MS COCO dataset, with 28.1% AP at 142 FPS, 37.4% AP at 52 FPS, and 45.1% AP with multi-scale testing at 1.4 FPS. We use the same approach to estimate 3D bounding box in the KITTI benchmark and human pose on the COCO keypoint dataset. Our method performs competitively with sophisticated multi-stage methods and runs in real-time.
10
11 <div align=center>
12 <img src="https://user-images.githubusercontent.com/40661020/143873810-85ffa6e7-915b-46a4-9b8f-709e5d7700bb.png"/>
13 </div>
14
15 ## Results and Models
16
17 | Backbone | DCN | Mem (GB) | Box AP | Flip box AP| Config | Download |
18 | :-------------: | :--------: |:----------------: | :------: | :------------: | :----: | :----: |
19 | ResNet-18 | N | 3.45 | 25.9 | 27.3 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/centernet/centernet_resnet18_140e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/centernet/centernet_resnet18_140e_coco/centernet_resnet18_140e_coco_20210705_093630-bb5b3bf7.pth) &#124; [log](https://download.openmmlab.com/mmdetection/v2.0/centernet/centernet_resnet18_140e_coco/centernet_resnet18_140e_coco_20210705_093630.log.json) |
20 | ResNet-18 | Y | 3.47 | 29.5 | 30.9 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/centernet/centernet_resnet18_dcnv2_140e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/centernet/centernet_resnet18_dcnv2_140e_coco/centernet_resnet18_dcnv2_140e_coco_20210702_155131-c8cd631f.pth) &#124; [log](https://download.openmmlab.com/mmdetection/v2.0/centernet/centernet_resnet18_dcnv2_140e_coco/centernet_resnet18_dcnv2_140e_coco_20210702_155131.log.json) |
21
22 Note:
23
24 - Flip box AP setting is single-scale and `flip=True`.
25 - Due to complex data enhancement, we find that the performance is unstable and may fluctuate by about 0.4 mAP. mAP 29.4 ~ 29.8 is acceptable in ResNet-18-DCNv2.
26 - Compared to the source code, we refer to [CenterNet-Better](https://github.com/FateScript/CenterNet-better), and make the following changes
27 - fix wrong image mean and variance in image normalization to be compatible with the pre-trained backbone.
28 - Use SGD rather than ADAM optimizer and add warmup and grad clip.
29 - Use DistributedDataParallel as other models in MMDetection rather than using DataParallel.
30
31 ## Citation
32
33 ```latex
34 @article{zhou2019objects,
35 title={Objects as Points},
36 author={Zhou, Xingyi and Wang, Dequan and Kr{\"a}henb{\"u}hl, Philipp},
37 booktitle={arXiv preprint arXiv:1904.07850},
38 year={2019}
39 }
40 ```
1 _base_ = './centernet_resnet18_dcnv2_140e_coco.py'
2
3 model = dict(neck=dict(use_dcn=False))
1 _base_ = [
2 '../_base_/datasets/coco_detection.py',
3 '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'
4 ]
5
6 model = dict(
7 type='CenterNet',
8 backbone=dict(
9 type='ResNet',
10 depth=18,
11 norm_eval=False,
12 norm_cfg=dict(type='BN'),
13 init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet18')),
14 neck=dict(
15 type='CTResNetNeck',
16 in_channel=512,
17 num_deconv_filters=(256, 128, 64),
18 num_deconv_kernels=(4, 4, 4),
19 use_dcn=True),
20 bbox_head=dict(
21 type='CenterNetHead',
22 num_classes=80,
23 in_channel=64,
24 feat_channel=64,
25 loss_center_heatmap=dict(type='GaussianFocalLoss', loss_weight=1.0),
26 loss_wh=dict(type='L1Loss', loss_weight=0.1),
27 loss_offset=dict(type='L1Loss', loss_weight=1.0)),
28 train_cfg=None,
29 test_cfg=dict(topk=100, local_maximum_kernel=3, max_per_img=100))
30
31 # We fixed the incorrect img_norm_cfg problem in the source code.
32 img_norm_cfg = dict(
33 mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
34
35 train_pipeline = [
36 dict(type='LoadImageFromFile', to_float32=True, color_type='color'),
37 dict(type='LoadAnnotations', with_bbox=True),
38 dict(
39 type='PhotoMetricDistortion',
40 brightness_delta=32,
41 contrast_range=(0.5, 1.5),
42 saturation_range=(0.5, 1.5),
43 hue_delta=18),
44 dict(
45 type='RandomCenterCropPad',
46 crop_size=(512, 512),
47 ratios=(0.6, 0.7, 0.8, 0.9, 1.0, 1.1, 1.2, 1.3),
48 mean=[0, 0, 0],
49 std=[1, 1, 1],
50 to_rgb=True,
51 test_pad_mode=None),
52 dict(type='Resize', img_scale=(512, 512), keep_ratio=True),
53 dict(type='RandomFlip', flip_ratio=0.5),
54 dict(type='Normalize', **img_norm_cfg),
55 dict(type='DefaultFormatBundle'),
56 dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels'])
57 ]
58 test_pipeline = [
59 dict(type='LoadImageFromFile', to_float32=True),
60 dict(
61 type='MultiScaleFlipAug',
62 scale_factor=1.0,
63 flip=False,
64 transforms=[
65 dict(type='Resize', keep_ratio=True),
66 dict(
67 type='RandomCenterCropPad',
68 ratios=None,
69 border=None,
70 mean=[0, 0, 0],
71 std=[1, 1, 1],
72 to_rgb=True,
73 test_mode=True,
74 test_pad_mode=['logical_or', 31],
75 test_pad_add_pix=1),
76 dict(type='RandomFlip'),
77 dict(type='Normalize', **img_norm_cfg),
78 dict(type='DefaultFormatBundle'),
79 dict(
80 type='Collect',
81 meta_keys=('filename', 'ori_shape', 'img_shape', 'pad_shape',
82 'scale_factor', 'flip', 'flip_direction',
83 'img_norm_cfg', 'border'),
84 keys=['img'])
85 ])
86 ]
87
88 dataset_type = 'CocoDataset'
89 data_root = 'data/coco/'
90
91 # Use RepeatDataset to speed up training
92 data = dict(
93 samples_per_gpu=16,
94 workers_per_gpu=4,
95 train=dict(
96 _delete_=True,
97 type='RepeatDataset',
98 times=5,
99 dataset=dict(
100 type=dataset_type,
101 ann_file=data_root + 'annotations/instances_train2017.json',
102 img_prefix=data_root + 'train2017/',
103 pipeline=train_pipeline)),
104 val=dict(pipeline=test_pipeline),
105 test=dict(pipeline=test_pipeline))
106
107 # optimizer
108 # Based on the default settings of modern detectors, the SGD effect is better
109 # than the Adam in the source code, so we use SGD default settings and
110 # if you use adam+lr5e-4, the map is 29.1.
111 optimizer_config = dict(
112 _delete_=True, grad_clip=dict(max_norm=35, norm_type=2))
113
114 # learning policy
115 # Based on the default settings of modern detectors, we added warmup settings.
116 lr_config = dict(
117 policy='step',
118 warmup='linear',
119 warmup_iters=1000,
120 warmup_ratio=1.0 / 1000,
121 step=[18, 24]) # the real step is [18*5, 24*5]
122 runner = dict(max_epochs=28) # the real epoch is 28*5=140
1 Collections:
2 - Name: CenterNet
3 Metadata:
4 Training Data: COCO
5 Training Techniques:
6 - SGD with Momentum
7 - Weight Decay
8 Training Resources: 8x TITANXP GPUs
9 Architecture:
10 - ResNet
11 Paper:
12 URL: https://arxiv.org/abs/1904.07850
13 Title: 'Objects as Points'
14 README: configs/centernet/README.md
15 Code:
16 URL: https://github.com/open-mmlab/mmdetection/blob/v2.13.0/mmdet/models/detectors/centernet.py#L10
17 Version: v2.13.0
18
19 Models:
20 - Name: centernet_resnet18_dcnv2_140e_coco
21 In Collection: CenterNet
22 Config: configs/centernet/centernet_resnet18_dcnv2_140e_coco.py
23 Metadata:
24 Batch Size: 128
25 Training Memory (GB): 3.47
26 Epochs: 140
27 Results:
28 - Task: Object Detection
29 Dataset: COCO
30 Metrics:
31 box AP: 29.5
32 Weights: https://download.openmmlab.com/mmdetection/v2.0/centernet/centernet_resnet18_dcnv2_140e_coco/centernet_resnet18_dcnv2_140e_coco_20210702_155131-c8cd631f.pth
33
34 - Name: centernet_resnet18_140e_coco
35 In Collection: CenterNet
36 Config: configs/centernet/centernet_resnet18_140e_coco.py
37 Metadata:
38 Batch Size: 128
39 Training Memory (GB): 3.45
40 Epochs: 140
41 Results:
42 - Task: Object Detection
43 Dataset: COCO
44 Metrics:
45 box AP: 25.9
46 Weights: https://download.openmmlab.com/mmdetection/v2.0/centernet/centernet_resnet18_140e_coco/centernet_resnet18_140e_coco_20210705_093630-bb5b3bf7.pth
1 # CentripetalNet
2
3 > [CentripetalNet: Pursuing High-quality Keypoint Pairs for Object Detection](https://arxiv.org/abs/2003.09119)
4
5 <!-- [ALGORITHM] -->
6
7 ## Abstract
8
9 Keypoint-based detectors have achieved pretty-well performance. However, incorrect keypoint matching is still widespread and greatly affects the performance of the detector. In this paper, we propose CentripetalNet which uses centripetal shift to pair corner keypoints from the same instance. CentripetalNet predicts the position and the centripetal shift of the corner points and matches corners whose shifted results are aligned. Combining position information, our approach matches corner points more accurately than the conventional embedding approaches do. Corner pooling extracts information inside the bounding boxes onto the border. To make this information more aware at the corners, we design a cross-star deformable convolution network to conduct feature adaption. Furthermore, we explore instance segmentation on anchor-free detectors by equipping our CentripetalNet with a mask prediction module. On MS-COCO test-dev, our CentripetalNet not only outperforms all existing anchor-free detectors with an AP of 48.0% but also achieves comparable performance to the state-of-the-art instance segmentation approaches with a 40.2% MaskAP.
10
11 <div align=center>
12 <img src="https://user-images.githubusercontent.com/40661020/143873955-42804e0e-3638-4c5b-8bf4-ac8133bbcdc8.png"/>
13 </div>
14
15 ## Results and Models
16
17 | Backbone | Batch Size | Step/Total Epochs | Mem (GB) | Inf time (fps) | box AP | Config | Download |
18 | :-------------: | :--------: |:----------------: | :------: | :------------: | :----: | :------: | :--------: |
19 | HourglassNet-104 | [16 x 6](./centripetalnet_hourglass104_mstest_16x6_210e_coco.py) | 190/210 | 16.7 | 3.7 | 44.8 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/centripetalnet/centripetalnet_hourglass104_mstest_16x6_210e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/centripetalnet/centripetalnet_hourglass104_mstest_16x6_210e_coco/centripetalnet_hourglass104_mstest_16x6_210e_coco_20200915_204804-3ccc61e5.pth) &#124; [log](https://download.openmmlab.com/mmdetection/v2.0/centripetalnet/centripetalnet_hourglass104_mstest_16x6_210e_coco/centripetalnet_hourglass104_mstest_16x6_210e_coco_20200915_204804.log.json) |
20
21 Note:
22
23 - TTA setting is single-scale and `flip=True`.
24 - The model we released is the best checkpoint rather than the latest checkpoint (box AP 44.8 vs 44.6 in our experiment).
25
26 ## Citation
27
28 ```latex
29 @InProceedings{Dong_2020_CVPR,
30 author = {Dong, Zhiwei and Li, Guoxuan and Liao, Yue and Wang, Fei and Ren, Pengju and Qian, Chen},
31 title = {CentripetalNet: Pursuing High-Quality Keypoint Pairs for Object Detection},
32 booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
33 month = {June},
34 year = {2020}
35 }
36 ```
1 _base_ = [
2 '../_base_/default_runtime.py', '../_base_/datasets/coco_detection.py'
3 ]
4
5 # model settings
6 model = dict(
7 type='CornerNet',
8 backbone=dict(
9 type='HourglassNet',
10 downsample_times=5,
11 num_stacks=2,
12 stage_channels=[256, 256, 384, 384, 384, 512],
13 stage_blocks=[2, 2, 2, 2, 2, 4],
14 norm_cfg=dict(type='BN', requires_grad=True)),
15 neck=None,
16 bbox_head=dict(
17 type='CentripetalHead',
18 num_classes=80,
19 in_channels=256,
20 num_feat_levels=2,
21 corner_emb_channels=0,
22 loss_heatmap=dict(
23 type='GaussianFocalLoss', alpha=2.0, gamma=4.0, loss_weight=1),
24 loss_offset=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1),
25 loss_guiding_shift=dict(
26 type='SmoothL1Loss', beta=1.0, loss_weight=0.05),
27 loss_centripetal_shift=dict(
28 type='SmoothL1Loss', beta=1.0, loss_weight=1)),
29 # training and testing settings
30 train_cfg=None,
31 test_cfg=dict(
32 corner_topk=100,
33 local_maximum_kernel=3,
34 distance_threshold=0.5,
35 score_thr=0.05,
36 max_per_img=100,
37 nms=dict(type='soft_nms', iou_threshold=0.5, method='gaussian')))
38 # data settings
39 img_norm_cfg = dict(
40 mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
41 train_pipeline = [
42 dict(type='LoadImageFromFile', to_float32=True),
43 dict(type='LoadAnnotations', with_bbox=True),
44 dict(
45 type='PhotoMetricDistortion',
46 brightness_delta=32,
47 contrast_range=(0.5, 1.5),
48 saturation_range=(0.5, 1.5),
49 hue_delta=18),
50 dict(
51 type='RandomCenterCropPad',
52 crop_size=(511, 511),
53 ratios=(0.6, 0.7, 0.8, 0.9, 1.0, 1.1, 1.2, 1.3),
54 test_mode=False,
55 test_pad_mode=None,
56 **img_norm_cfg),
57 dict(type='Resize', img_scale=(511, 511), keep_ratio=False),
58 dict(type='RandomFlip', flip_ratio=0.5),
59 dict(type='Normalize', **img_norm_cfg),
60 dict(type='DefaultFormatBundle'),
61 dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),
62 ]
63 test_pipeline = [
64 dict(type='LoadImageFromFile', to_float32=True),
65 dict(
66 type='MultiScaleFlipAug',
67 scale_factor=1.0,
68 flip=True,
69 transforms=[
70 dict(type='Resize'),
71 dict(
72 type='RandomCenterCropPad',
73 crop_size=None,
74 ratios=None,
75 border=None,
76 test_mode=True,
77 test_pad_mode=['logical_or', 127],
78 **img_norm_cfg),
79 dict(type='RandomFlip'),
80 dict(type='Normalize', **img_norm_cfg),
81 dict(type='ImageToTensor', keys=['img']),
82 dict(
83 type='Collect',
84 keys=['img'],
85 meta_keys=('filename', 'ori_shape', 'img_shape', 'pad_shape',
86 'scale_factor', 'flip', 'img_norm_cfg', 'border')),
87 ])
88 ]
89 data = dict(
90 samples_per_gpu=6,
91 workers_per_gpu=3,
92 train=dict(pipeline=train_pipeline),
93 val=dict(pipeline=test_pipeline),
94 test=dict(pipeline=test_pipeline))
95 # optimizer
96 optimizer = dict(type='Adam', lr=0.0005)
97 optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
98 # learning policy
99 lr_config = dict(
100 policy='step',
101 warmup='linear',
102 warmup_iters=500,
103 warmup_ratio=1.0 / 3,
104 step=[190])
105 runner = dict(type='EpochBasedRunner', max_epochs=210)
1 Collections:
2 - Name: CentripetalNet
3 Metadata:
4 Training Data: COCO
5 Training Techniques:
6 - Adam
7 Training Resources: 16x V100 GPUs
8 Architecture:
9 - Corner Pooling
10 - Stacked Hourglass Network
11 Paper:
12 URL: https://arxiv.org/abs/2003.09119
13 Title: 'CentripetalNet: Pursuing High-quality Keypoint Pairs for Object Detection'
14 README: configs/centripetalnet/README.md
15 Code:
16 URL: https://github.com/open-mmlab/mmdetection/blob/v2.5.0/mmdet/models/detectors/cornernet.py#L9
17 Version: v2.5.0
18
19 Models:
20 - Name: centripetalnet_hourglass104_mstest_16x6_210e_coco
21 In Collection: CentripetalNet
22 Config: configs/centripetalnet/centripetalnet_hourglass104_mstest_16x6_210e_coco.py
23 Metadata:
24 Batch Size: 96
25 Training Memory (GB): 16.7
26 inference time (ms/im):
27 - value: 270.27
28 hardware: V100
29 backend: PyTorch
30 batch size: 1
31 mode: FP32
32 resolution: (800, 1333)
33 Epochs: 210
34 Results:
35 - Task: Object Detection
36 Dataset: COCO
37 Metrics:
38 box AP: 44.8
39 Weights: https://download.openmmlab.com/mmdetection/v2.0/centripetalnet/centripetalnet_hourglass104_mstest_16x6_210e_coco/centripetalnet_hourglass104_mstest_16x6_210e_coco_20200915_204804-3ccc61e5.pth
1 # Cityscapes
2
3 > [The Cityscapes Dataset for Semantic Urban Scene Understanding](https://arxiv.org/abs/1604.01685)
4
5 <!-- [DATASET] -->
6
7 ## Abstract
8
9 Visual understanding of complex urban street scenes is an enabling factor for a wide range of applications. Object detection has benefited enormously from large-scale datasets, especially in the context of deep learning. For semantic urban scene understanding, however, no current dataset adequately captures the complexity of real-world urban scenes.
10 To address this, we introduce Cityscapes, a benchmark suite and large-scale dataset to train and test approaches for pixel-level and instance-level semantic labeling. Cityscapes is comprised of a large, diverse set of stereo video sequences recorded in streets from 50 different cities. 5000 of these images have high quality pixel-level annotations; 20000 additional images have coarse annotations to enable methods that leverage large volumes of weakly-labeled data. Crucially, our effort exceeds previous attempts in terms of dataset size, annotation richness, scene variability, and complexity. Our accompanying empirical study provides an in-depth analysis of the dataset characteristics, as well as a performance evaluation of several state-of-the-art approaches based on our benchmark.
11
12 <div align=center>
13 <img src="https://user-images.githubusercontent.com/40661020/143874154-db4484a5-9211-41f6-852a-b7f0a8c9ec26.png"/>
14 </div>
15
16 ## Common settings
17
18 - All baselines were trained using 8 GPU with a batch size of 8 (1 images per GPU) using the [linear scaling rule](https://arxiv.org/abs/1706.02677) to scale the learning rate.
19 - All models were trained on `cityscapes_train`, and tested on `cityscapes_val`.
20 - 1x training schedule indicates 64 epochs which corresponds to slightly less than the 24k iterations reported in the original schedule from the [Mask R-CNN paper](https://arxiv.org/abs/1703.06870)
21 - COCO pre-trained weights are used to initialize.
22 - A conversion [script](../../tools/dataset_converters/cityscapes.py) is provided to convert Cityscapes into COCO format. Please refer to [install.md](../../docs/1_exist_data_model.md#prepare-datasets) for details.
23 - `CityscapesDataset` implemented three evaluation methods. `bbox` and `segm` are standard COCO bbox/mask AP. `cityscapes` is the cityscapes dataset official evaluation, which may be slightly higher than COCO.
24
25 ### Faster R-CNN
26
27 | Backbone | Style | Lr schd | Scale | Mem (GB) | Inf time (fps) | box AP | Config | Download |
28 | :-------------: | :-----: | :-----: | :---: | :------: | :------------: | :----: | :------: | :--------: |
29 | R-50-FPN | pytorch | 1x | 800-1024 | 5.2 | - | 40.3 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/cityscapes/faster_rcnn_r50_fpn_1x_cityscapes.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/cityscapes/faster_rcnn_r50_fpn_1x_cityscapes_20200502-829424c0.pth) &#124; [log](https://download.openmmlab.com/mmdetection/v2.0/cityscapes/faster_rcnn_r50_fpn_1x_cityscapes_20200502_114915.log.json) |
30
31 ### Mask R-CNN
32
33 | Backbone | Style | Lr schd | Scale | Mem (GB) | Inf time (fps) | box AP | mask AP | Config | Download |
34 | :-------------: | :-----: | :-----: | :------: | :------: | :------------: | :----: | :-----: | :------: | :------: |
35 | R-50-FPN | pytorch | 1x | 800-1024 | 5.3 | - | 40.9 | 36.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/cityscapes/mask_rcnn_r50_fpn_1x_cityscapes.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/cityscapes/mask_rcnn_r50_fpn_1x_cityscapes/mask_rcnn_r50_fpn_1x_cityscapes_20201211_133733-d2858245.pth) &#124; [log](https://download.openmmlab.com/mmdetection/v2.0/cityscapes/mask_rcnn_r50_fpn_1x_cityscapes/mask_rcnn_r50_fpn_1x_cityscapes_20201211_133733.log.json) |
36
37 ## Citation
38
39 ```latex
40 @inproceedings{Cordts2016Cityscapes,
41 title={The Cityscapes Dataset for Semantic Urban Scene Understanding},
42 author={Cordts, Marius and Omran, Mohamed and Ramos, Sebastian and Rehfeld, Timo and Enzweiler, Markus and Benenson, Rodrigo and Franke, Uwe and Roth, Stefan and Schiele, Bernt},
43 booktitle={Proc. of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
44 year={2016}
45 }
46 ```
1 _base_ = [
2 '../_base_/models/faster_rcnn_r50_fpn.py',
3 '../_base_/datasets/cityscapes_detection.py',
4 '../_base_/default_runtime.py'
5 ]
6 model = dict(
7 backbone=dict(init_cfg=None),
8 roi_head=dict(
9 bbox_head=dict(
10 type='Shared2FCBBoxHead',
11 in_channels=256,
12 fc_out_channels=1024,
13 roi_feat_size=7,
14 num_classes=8,
15 bbox_coder=dict(
16 type='DeltaXYWHBBoxCoder',
17 target_means=[0., 0., 0., 0.],
18 target_stds=[0.1, 0.1, 0.2, 0.2]),
19 reg_class_agnostic=False,
20 loss_cls=dict(
21 type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
22 loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0))))
23 # optimizer
24 # lr is set for a batch size of 8
25 optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001)
26 optimizer_config = dict(grad_clip=None)
27 # learning policy
28 lr_config = dict(
29 policy='step',
30 warmup='linear',
31 warmup_iters=500,
32 warmup_ratio=0.001,
33 # [7] yields higher performance than [6]
34 step=[7])
35 runner = dict(
36 type='EpochBasedRunner', max_epochs=8) # actual epoch = 8 * 8 = 64
37 log_config = dict(interval=100)
38 # For better, more stable performance initialize from COCO
39 load_from = 'https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_1x_coco/faster_rcnn_r50_fpn_1x_coco_20200130-047c8118.pth' # noqa
This diff could not be displayed because it is too large.
Styling with Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!