mirror of https://github.com/hero-y/BHRL
213 lines
8.0 KiB
Python
213 lines
8.0 KiB
Python
# Copyright (c) 2019 Western Digital Corporation or its affiliates.
|
|
|
|
import warnings
|
|
|
|
import torch.nn as nn
|
|
from mmcv.cnn import ConvModule
|
|
from mmcv.runner import BaseModule
|
|
from torch.nn.modules.batchnorm import _BatchNorm
|
|
|
|
from ..builder import BACKBONES
|
|
|
|
|
|
class ResBlock(BaseModule):
|
|
"""The basic residual block used in Darknet. Each ResBlock consists of two
|
|
ConvModules and the input is added to the final output. Each ConvModule is
|
|
composed of Conv, BN, and LeakyReLU. In YoloV3 paper, the first convLayer
|
|
has half of the number of the filters as much as the second convLayer. The
|
|
first convLayer has filter size of 1x1 and the second one has the filter
|
|
size of 3x3.
|
|
|
|
Args:
|
|
in_channels (int): The input channels. Must be even.
|
|
conv_cfg (dict): Config dict for convolution layer. Default: None.
|
|
norm_cfg (dict): Dictionary to construct and config norm layer.
|
|
Default: dict(type='BN', requires_grad=True)
|
|
act_cfg (dict): Config dict for activation layer.
|
|
Default: dict(type='LeakyReLU', negative_slope=0.1).
|
|
init_cfg (dict or list[dict], optional): Initialization config dict.
|
|
Default: None
|
|
"""
|
|
|
|
def __init__(self,
|
|
in_channels,
|
|
conv_cfg=None,
|
|
norm_cfg=dict(type='BN', requires_grad=True),
|
|
act_cfg=dict(type='LeakyReLU', negative_slope=0.1),
|
|
init_cfg=None):
|
|
super(ResBlock, self).__init__(init_cfg)
|
|
assert in_channels % 2 == 0 # ensure the in_channels is even
|
|
half_in_channels = in_channels // 2
|
|
|
|
# shortcut
|
|
cfg = dict(conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=act_cfg)
|
|
|
|
self.conv1 = ConvModule(in_channels, half_in_channels, 1, **cfg)
|
|
self.conv2 = ConvModule(
|
|
half_in_channels, in_channels, 3, padding=1, **cfg)
|
|
|
|
def forward(self, x):
|
|
residual = x
|
|
out = self.conv1(x)
|
|
out = self.conv2(out)
|
|
out = out + residual
|
|
|
|
return out
|
|
|
|
|
|
@BACKBONES.register_module()
|
|
class Darknet(BaseModule):
|
|
"""Darknet backbone.
|
|
|
|
Args:
|
|
depth (int): Depth of Darknet. Currently only support 53.
|
|
out_indices (Sequence[int]): Output from which stages.
|
|
frozen_stages (int): Stages to be frozen (stop grad and set eval mode).
|
|
-1 means not freezing any parameters. Default: -1.
|
|
conv_cfg (dict): Config dict for convolution layer. Default: None.
|
|
norm_cfg (dict): Dictionary to construct and config norm layer.
|
|
Default: dict(type='BN', requires_grad=True)
|
|
act_cfg (dict): Config dict for activation layer.
|
|
Default: dict(type='LeakyReLU', negative_slope=0.1).
|
|
norm_eval (bool): Whether to set norm layers to eval mode, namely,
|
|
freeze running stats (mean and var). Note: Effect on Batch Norm
|
|
and its variants only.
|
|
pretrained (str, optional): model pretrained path. Default: None
|
|
init_cfg (dict or list[dict], optional): Initialization config dict.
|
|
Default: None
|
|
|
|
Example:
|
|
>>> from mmdet.models import Darknet
|
|
>>> import torch
|
|
>>> self = Darknet(depth=53)
|
|
>>> self.eval()
|
|
>>> inputs = torch.rand(1, 3, 416, 416)
|
|
>>> level_outputs = self.forward(inputs)
|
|
>>> for level_out in level_outputs:
|
|
... print(tuple(level_out.shape))
|
|
...
|
|
(1, 256, 52, 52)
|
|
(1, 512, 26, 26)
|
|
(1, 1024, 13, 13)
|
|
"""
|
|
|
|
# Dict(depth: (layers, channels))
|
|
arch_settings = {
|
|
53: ((1, 2, 8, 8, 4), ((32, 64), (64, 128), (128, 256), (256, 512),
|
|
(512, 1024)))
|
|
}
|
|
|
|
def __init__(self,
|
|
depth=53,
|
|
out_indices=(3, 4, 5),
|
|
frozen_stages=-1,
|
|
conv_cfg=None,
|
|
norm_cfg=dict(type='BN', requires_grad=True),
|
|
act_cfg=dict(type='LeakyReLU', negative_slope=0.1),
|
|
norm_eval=True,
|
|
pretrained=None,
|
|
init_cfg=None):
|
|
super(Darknet, self).__init__(init_cfg)
|
|
if depth not in self.arch_settings:
|
|
raise KeyError(f'invalid depth {depth} for darknet')
|
|
|
|
self.depth = depth
|
|
self.out_indices = out_indices
|
|
self.frozen_stages = frozen_stages
|
|
self.layers, self.channels = self.arch_settings[depth]
|
|
|
|
cfg = dict(conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=act_cfg)
|
|
|
|
self.conv1 = ConvModule(3, 32, 3, padding=1, **cfg)
|
|
|
|
self.cr_blocks = ['conv1']
|
|
for i, n_layers in enumerate(self.layers):
|
|
layer_name = f'conv_res_block{i + 1}'
|
|
in_c, out_c = self.channels[i]
|
|
self.add_module(
|
|
layer_name,
|
|
self.make_conv_res_block(in_c, out_c, n_layers, **cfg))
|
|
self.cr_blocks.append(layer_name)
|
|
|
|
self.norm_eval = norm_eval
|
|
|
|
assert not (init_cfg and pretrained), \
|
|
'init_cfg and pretrained cannot be setting at the same time'
|
|
if isinstance(pretrained, str):
|
|
warnings.warn('DeprecationWarning: pretrained is a deprecated, '
|
|
'please use "init_cfg" instead')
|
|
self.init_cfg = dict(type='Pretrained', checkpoint=pretrained)
|
|
elif pretrained is None:
|
|
if init_cfg is None:
|
|
self.init_cfg = [
|
|
dict(type='Kaiming', layer='Conv2d'),
|
|
dict(
|
|
type='Constant',
|
|
val=1,
|
|
layer=['_BatchNorm', 'GroupNorm'])
|
|
]
|
|
else:
|
|
raise TypeError('pretrained must be a str or None')
|
|
|
|
def forward(self, x):
|
|
outs = []
|
|
for i, layer_name in enumerate(self.cr_blocks):
|
|
cr_block = getattr(self, layer_name)
|
|
x = cr_block(x)
|
|
if i in self.out_indices:
|
|
outs.append(x)
|
|
|
|
return tuple(outs)
|
|
|
|
def _freeze_stages(self):
|
|
if self.frozen_stages >= 0:
|
|
for i in range(self.frozen_stages):
|
|
m = getattr(self, self.cr_blocks[i])
|
|
m.eval()
|
|
for param in m.parameters():
|
|
param.requires_grad = False
|
|
|
|
def train(self, mode=True):
|
|
super(Darknet, self).train(mode)
|
|
self._freeze_stages()
|
|
if mode and self.norm_eval:
|
|
for m in self.modules():
|
|
if isinstance(m, _BatchNorm):
|
|
m.eval()
|
|
|
|
@staticmethod
|
|
def make_conv_res_block(in_channels,
|
|
out_channels,
|
|
res_repeat,
|
|
conv_cfg=None,
|
|
norm_cfg=dict(type='BN', requires_grad=True),
|
|
act_cfg=dict(type='LeakyReLU',
|
|
negative_slope=0.1)):
|
|
"""In Darknet backbone, ConvLayer is usually followed by ResBlock. This
|
|
function will make that. The Conv layers always have 3x3 filters with
|
|
stride=2. The number of the filters in Conv layer is the same as the
|
|
out channels of the ResBlock.
|
|
|
|
Args:
|
|
in_channels (int): The number of input channels.
|
|
out_channels (int): The number of output channels.
|
|
res_repeat (int): The number of ResBlocks.
|
|
conv_cfg (dict): Config dict for convolution layer. Default: None.
|
|
norm_cfg (dict): Dictionary to construct and config norm layer.
|
|
Default: dict(type='BN', requires_grad=True)
|
|
act_cfg (dict): Config dict for activation layer.
|
|
Default: dict(type='LeakyReLU', negative_slope=0.1).
|
|
"""
|
|
|
|
cfg = dict(conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=act_cfg)
|
|
|
|
model = nn.Sequential()
|
|
model.add_module(
|
|
'conv',
|
|
ConvModule(
|
|
in_channels, out_channels, 3, stride=2, padding=1, **cfg))
|
|
for idx in range(res_repeat):
|
|
model.add_module('res{}'.format(idx),
|
|
ResBlock(out_channels, **cfg))
|
|
return model
|