darknet.py
8.25 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
# Copyright (c) OpenMMLab. All rights reserved.
# Copyright (c) 2019 Western Digital Corporation or its affiliates.
import warnings
import torch.nn as nn
from mmcv.cnn import ConvModule
from mmcv.runner import BaseModule
from torch.nn.modules.batchnorm import _BatchNorm
from ..builder import BACKBONES
class ResBlock(BaseModule):
"""The basic residual block used in Darknet. Each ResBlock consists of two
ConvModules and the input is added to the final output. Each ConvModule is
composed of Conv, BN, and LeakyReLU. In YoloV3 paper, the first convLayer
has half of the number of the filters as much as the second convLayer. The
first convLayer has filter size of 1x1 and the second one has the filter
size of 3x3.
Args:
in_channels (int): The input channels. Must be even.
conv_cfg (dict): Config dict for convolution layer. Default: None.
norm_cfg (dict): Dictionary to construct and config norm layer.
Default: dict(type='BN', requires_grad=True)
act_cfg (dict): Config dict for activation layer.
Default: dict(type='LeakyReLU', negative_slope=0.1).
init_cfg (dict or list[dict], optional): Initialization config dict.
Default: None
"""
def __init__(self,
in_channels,
conv_cfg=None,
norm_cfg=dict(type='BN', requires_grad=True),
act_cfg=dict(type='LeakyReLU', negative_slope=0.1),
init_cfg=None):
super(ResBlock, self).__init__(init_cfg)
assert in_channels % 2 == 0 # ensure the in_channels is even
half_in_channels = in_channels // 2
# shortcut
cfg = dict(conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=act_cfg)
self.conv1 = ConvModule(in_channels, half_in_channels, 1, **cfg)
self.conv2 = ConvModule(
half_in_channels, in_channels, 3, padding=1, **cfg)
def forward(self, x):
residual = x
out = self.conv1(x)
out = self.conv2(out)
out = out + residual
return out
@BACKBONES.register_module()
class Darknet(BaseModule):
"""Darknet backbone.
Args:
depth (int): Depth of Darknet. Currently only support 53.
out_indices (Sequence[int]): Output from which stages.
frozen_stages (int): Stages to be frozen (stop grad and set eval mode).
-1 means not freezing any parameters. Default: -1.
conv_cfg (dict): Config dict for convolution layer. Default: None.
norm_cfg (dict): Dictionary to construct and config norm layer.
Default: dict(type='BN', requires_grad=True)
act_cfg (dict): Config dict for activation layer.
Default: dict(type='LeakyReLU', negative_slope=0.1).
norm_eval (bool): Whether to set norm layers to eval mode, namely,
freeze running stats (mean and var). Note: Effect on Batch Norm
and its variants only.
pretrained (str, optional): model pretrained path. Default: None
init_cfg (dict or list[dict], optional): Initialization config dict.
Default: None
Example:
>>> from mmdet.models import Darknet
>>> import torch
>>> self = Darknet(depth=53)
>>> self.eval()
>>> inputs = torch.rand(1, 3, 416, 416)
>>> level_outputs = self.forward(inputs)
>>> for level_out in level_outputs:
... print(tuple(level_out.shape))
...
(1, 256, 52, 52)
(1, 512, 26, 26)
(1, 1024, 13, 13)
"""
# Dict(depth: (layers, channels))
arch_settings = {
53: ((1, 2, 8, 8, 4), ((32, 64), (64, 128), (128, 256), (256, 512),
(512, 1024)))
}
def __init__(self,
depth=53,
out_indices=(3, 4, 5),
frozen_stages=-1,
conv_cfg=None,
norm_cfg=dict(type='BN', requires_grad=True),
act_cfg=dict(type='LeakyReLU', negative_slope=0.1),
norm_eval=True,
pretrained=None,
init_cfg=None):
super(Darknet, self).__init__(init_cfg)
if depth not in self.arch_settings:
raise KeyError(f'invalid depth {depth} for darknet')
self.depth = depth
self.out_indices = out_indices
self.frozen_stages = frozen_stages
self.layers, self.channels = self.arch_settings[depth]
cfg = dict(conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=act_cfg)
self.conv1 = ConvModule(3, 32, 3, padding=1, **cfg)
self.cr_blocks = ['conv1']
for i, n_layers in enumerate(self.layers):
layer_name = f'conv_res_block{i + 1}'
in_c, out_c = self.channels[i]
self.add_module(
layer_name,
self.make_conv_res_block(in_c, out_c, n_layers, **cfg))
self.cr_blocks.append(layer_name)
self.norm_eval = norm_eval
assert not (init_cfg and pretrained), \
'init_cfg and pretrained cannot be specified at the same time'
if isinstance(pretrained, str):
warnings.warn('DeprecationWarning: pretrained is deprecated, '
'please use "init_cfg" instead')
self.init_cfg = dict(type='Pretrained', checkpoint=pretrained)
elif pretrained is None:
if init_cfg is None:
self.init_cfg = [
dict(type='Kaiming', layer='Conv2d'),
dict(
type='Constant',
val=1,
layer=['_BatchNorm', 'GroupNorm'])
]
else:
raise TypeError('pretrained must be a str or None')
def forward(self, x):
outs = []
for i, layer_name in enumerate(self.cr_blocks):
cr_block = getattr(self, layer_name)
x = cr_block(x)
if i in self.out_indices:
outs.append(x)
return tuple(outs)
def _freeze_stages(self):
if self.frozen_stages >= 0:
for i in range(self.frozen_stages):
m = getattr(self, self.cr_blocks[i])
m.eval()
for param in m.parameters():
param.requires_grad = False
def train(self, mode=True):
super(Darknet, self).train(mode)
self._freeze_stages()
if mode and self.norm_eval:
for m in self.modules():
if isinstance(m, _BatchNorm):
m.eval()
@staticmethod
def make_conv_res_block(in_channels,
out_channels,
res_repeat,
conv_cfg=None,
norm_cfg=dict(type='BN', requires_grad=True),
act_cfg=dict(type='LeakyReLU',
negative_slope=0.1)):
"""In Darknet backbone, ConvLayer is usually followed by ResBlock. This
function will make that. The Conv layers always have 3x3 filters with
stride=2. The number of the filters in Conv layer is the same as the
out channels of the ResBlock.
Args:
in_channels (int): The number of input channels.
out_channels (int): The number of output channels.
res_repeat (int): The number of ResBlocks.
conv_cfg (dict): Config dict for convolution layer. Default: None.
norm_cfg (dict): Dictionary to construct and config norm layer.
Default: dict(type='BN', requires_grad=True)
act_cfg (dict): Config dict for activation layer.
Default: dict(type='LeakyReLU', negative_slope=0.1).
"""
cfg = dict(conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=act_cfg)
model = nn.Sequential()
model.add_module(
'conv',
ConvModule(
in_channels, out_channels, 3, stride=2, padding=1, **cfg))
for idx in range(res_repeat):
model.add_module('res{}'.format(idx),
ResBlock(out_channels, **cfg))
return model