diff --git a/mmcls/models/backbones/__init__.py b/mmcls/models/backbones/__init__.py index 59a07cbc..8ad4daf9 100644 --- a/mmcls/models/backbones/__init__.py +++ b/mmcls/models/backbones/__init__.py @@ -1,4 +1,6 @@ from .resnet import ResNet, ResNetV1d +from .resnext import ResNeXt from .shufflenet_v1 import ShuffleNetv1 -__all__ = ['ResNet', 'ResNetV1d', 'ShuffleNetv1'] + +__all__ = ['ResNet', 'ResNeXt', 'ResNetV1d', 'ShuffleNetv1'] diff --git a/mmcls/models/backbones/resnet.py b/mmcls/models/backbones/resnet.py index d9b62156..6830379e 100644 --- a/mmcls/models/backbones/resnet.py +++ b/mmcls/models/backbones/resnet.py @@ -9,6 +9,26 @@ from .base_backbone import BaseBackbone class BasicBlock(nn.Module): + """BasicBlock for ResNet. + + Args: + inplanes (int): inplanes of block. + planes (int): planes of block. + stride (int): stride of the block. Default: 1 + dilation (int): dilation of convolution. Default: 1 + downsample (nn.Module): downsample operation on identity branch. + Default: None + style (str): `pytorch` or `caffe`. If set to "pytorch", the stride-two + layer is the 3x3 conv layer, otherwise the stride-two layer is + the first 1x1 conv layer. + with_cp (bool): Use checkpoint or not. Using checkpoint will save some + memory while slowing down the training speed. + conv_cfg (dict): dictionary to construct and config conv layer. + Default: None + norm_cfg (dict): dictionary to construct and config norm layer. + Default: dict(type='BN') + """ + expansion = 1 def __init__(self, @@ -84,6 +104,26 @@ class BasicBlock(nn.Module): class Bottleneck(nn.Module): + """Bottleneck block for ResNet. + + Args: + inplanes (int): inplanes of block. + planes (int): planes of block. + stride (int): stride of the block. Default: 1 + dilation (int): dilation of convolution. Default: 1 + downsample (nn.Module): downsample operation on identity branch. + Default: None + style (str): `pytorch` or `caffe`. If set to "pytorch", the stride-two + layer is the 3x3 conv layer, otherwise the stride-two layer is + the first 1x1 conv layer. + with_cp (bool): Use checkpoint or not. Using checkpoint will save some + memory while slowing down the training speed. + conv_cfg (dict): dictionary to construct and config conv layer. + Default: None + norm_cfg (dict): dictionary to construct and config norm layer. + Default: dict(type='BN') + """ + expansion = 4 def __init__(self, @@ -96,10 +136,6 @@ class Bottleneck(nn.Module): with_cp=False, conv_cfg=None, norm_cfg=dict(type='BN')): - """Bottleneck block for ResNet. - If style is "pytorch", the stride-two layer is the 3x3 conv layer, - if it is "caffe", the stride-two layer is the first 1x1 conv layer. - """ super(Bottleneck, self).__init__() assert style in ['pytorch', 'caffe'] @@ -341,9 +377,9 @@ class ResNet(BaseBackbone): frozen_stages=-1, conv_cfg=None, norm_cfg=dict(type='BN', requires_grad=True), - norm_eval=True, + norm_eval=False, with_cp=False, - zero_init_residual=True): + zero_init_residual=False): super(ResNet, self).__init__() if depth not in self.arch_settings: raise KeyError(f'invalid depth {depth} for resnet') diff --git a/mmcls/models/backbones/resnext.py b/mmcls/models/backbones/resnext.py new file mode 100644 index 00000000..521e4be8 --- /dev/null +++ b/mmcls/models/backbones/resnext.py @@ -0,0 +1,132 @@ +import math + +from mmcv.cnn import build_conv_layer, build_norm_layer + +from ..builder import BACKBONES +from .resnet import Bottleneck as _Bottleneck +from .resnet import ResLayer, ResNet + + +class Bottleneck(_Bottleneck): + """Bottleneck block for ResNeXt. + + Args: + inplanes (int): inplanes of block. + planes (int): planes of block. + groups (int): group of convolution. + base_width (int): Base width of resnext. + base_channels (int): Number of base channels of hidden layer. + stride (int): stride of the block. Default: 1 + dilation (int): dilation of convolution. Default: 1 + downsample (nn.Module): downsample operation on identity branch. + Default: None + style (str): `pytorch` or `caffe`. If set to "pytorch", the stride-two + layer is the 3x3 conv layer, otherwise the stride-two layer is + the first 1x1 conv layer. + conv_cfg (dict): dictionary to construct and config conv layer. + Default: None + norm_cfg (dict): dictionary to construct and config norm layer. + Default: dict(type='BN') + with_cp (bool): Use checkpoint or not. Using checkpoint will save some + memory while slowing down the training speed. + """ + + expansion = 4 + + def __init__(self, + inplanes, + planes, + groups=1, + base_width=4, + base_channels=64, + **kwargs): + super(Bottleneck, self).__init__(inplanes, planes, **kwargs) + + if groups == 1: + width = self.planes + else: + width = math.floor(self.planes * + (base_width / base_channels)) * groups + + self.norm1_name, norm1 = build_norm_layer( + self.norm_cfg, width, postfix=1) + self.norm2_name, norm2 = build_norm_layer( + self.norm_cfg, width, postfix=2) + self.norm3_name, norm3 = build_norm_layer( + self.norm_cfg, self.planes * self.expansion, postfix=3) + + self.conv1 = build_conv_layer( + self.conv_cfg, + self.inplanes, + width, + kernel_size=1, + stride=self.conv1_stride, + bias=False) + self.add_module(self.norm1_name, norm1) + self.conv2 = build_conv_layer( + self.conv_cfg, + width, + width, + kernel_size=3, + stride=self.conv2_stride, + padding=self.dilation, + dilation=self.dilation, + groups=groups, + bias=False) + + self.add_module(self.norm2_name, norm2) + self.conv3 = build_conv_layer( + self.conv_cfg, + width, + self.planes * self.expansion, + kernel_size=1, + bias=False) + self.add_module(self.norm3_name, norm3) + + +@BACKBONES.register_module() +class ResNeXt(ResNet): + """ResNeXt backbone. + + Args: + groups (int): Group of resnext. + base_width (int): Base width of resnext. + depth (int): Depth of resnext, from {50, 101, 152}. + in_channels (int): Number of input image channels. Default: 3. + base_channels (int): Number of base channels of hidden layer. + num_stages (int): Resnet stages. Default: 4. + strides (Sequence[int]): Strides of the first block of each stage. + dilations (Sequence[int]): Dilation of each stage. + out_indices (Sequence[int]): Output from which stages. + style (str): `pytorch` or `caffe`. If set to "pytorch", the stride-two + layer is the 3x3 conv layer, otherwise the stride-two layer is + the first 1x1 conv layer. + frozen_stages (int): Stages to be frozen (all param fixed). -1 means + not freezing any parameters. + norm_cfg (dict): dictionary to construct and config norm layer. + norm_eval (bool): Whether to set norm layers to eval mode, namely, + freeze running stats (mean and var). Note: Effect on Batch Norm + and its variants only. + with_cp (bool): Use checkpoint or not. Using checkpoint will save some + memory while slowing down the training speed. + zero_init_residual (bool): whether to use zero init for last norm layer + in resblocks to let them behave as identity. + """ + + arch_settings = { + 50: (Bottleneck, (3, 4, 6, 3)), + 101: (Bottleneck, (3, 4, 23, 3)), + 152: (Bottleneck, (3, 8, 36, 3)) + } + + def __init__(self, groups=1, base_width=4, **kwargs): + self.groups = groups + self.base_width = base_width + super(ResNeXt, self).__init__(**kwargs) + + def make_res_layer(self, **kwargs): + return ResLayer( + groups=self.groups, + base_width=self.base_width, + base_channels=self.base_channels, + **kwargs) diff --git a/mmcls/models/backbones/shufflenet_v1.py b/mmcls/models/backbones/shufflenet_v1.py index d749263e..09cf5b23 100644 --- a/mmcls/models/backbones/shufflenet_v1.py +++ b/mmcls/models/backbones/shufflenet_v1.py @@ -193,12 +193,13 @@ class ShuffleNetv1(BaseBackbone): Args: groups (int, optional): The number of groups to be used in grouped 1x1 convolutions in each ShuffleUnit. Default is 3 for best performance - according to original paper. + according to original paper. Default: 3. widen_factor (float, optional): Width multiplier - adjusts number of - channels in each layer by this amount. Default is 1.0. + channels in each layer by this amount. Default: 1.0. out_indices (Sequence[int]): Output from which stages. - frozen_stages (int): Stages to be frozen (all param fixed). -1 means - not freezing any parameters. + Default: (0, 1, 2, 3) + frozen_stages (int): Stages to be frozen (all param fixed). + Default: -1, which means not freezing any parameters. conv_cfg (dict): Config dict for convolution layer. Default: None, which means using conv2d. norm_cfg (dict): Config dict for normalization layer. @@ -207,9 +208,9 @@ class ShuffleNetv1(BaseBackbone): Default: dict(type='ReLU'). norm_eval (bool): Whether to set norm layers to eval mode, namely, freeze running stats (mean and var). Note: Effect on Batch Norm - and its variants only. + and its variants only. Default: False. with_cp (bool): Use checkpoint or not. Using checkpoint will save some - memory while slowing down the training speed. + memory while slowing down the training speed. Default: False. """ def __init__(self, @@ -220,7 +221,7 @@ class ShuffleNetv1(BaseBackbone): conv_cfg=None, norm_cfg=dict(type='BN'), act_cfg=dict(type='ReLU'), - norm_eval=True, + norm_eval=False, with_cp=False): super(ShuffleNetv1, self).__init__() self.stage_blocks = [3, 7, 3] diff --git a/tests/test_backbones/test_resnext.py b/tests/test_backbones/test_resnext.py new file mode 100644 index 00000000..65f09bce --- /dev/null +++ b/tests/test_backbones/test_resnext.py @@ -0,0 +1,66 @@ +import pytest +import torch + +from mmcls.models.backbones import ResNeXt +from mmcls.models.backbones.resnext import Bottleneck as BottleneckX + + +def is_block(modules): + """Check if is ResNeXt building block.""" + if isinstance(modules, (BottleneckX)): + return True + return False + + +def test_resnext_bottleneck(): + with pytest.raises(AssertionError): + # Style must be in ['pytorch', 'caffe'] + BottleneckX(64, 64, groups=32, base_width=4, style='tensorflow') + + # Test ResNeXt Bottleneck structure + block = BottleneckX( + 64, 64, groups=32, base_width=4, stride=2, style='pytorch') + assert block.conv2.stride == (2, 2) + assert block.conv2.groups == 32 + assert block.conv2.out_channels == 128 + + # Test ResNeXt Bottleneck forward + block = BottleneckX(64, 16, groups=32, base_width=4) + x = torch.randn(1, 64, 56, 56) + x_out = block(x) + assert x_out.shape == torch.Size([1, 64, 56, 56]) + + +def test_resnext_backbone(): + with pytest.raises(KeyError): + # ResNeXt depth should be in [50, 101, 152] + ResNeXt(depth=18) + + # Test ResNeXt with group 32, base_width 4 + model = ResNeXt( + depth=50, groups=32, base_width=4, out_indices=(0, 1, 2, 3)) + for m in model.modules(): + if is_block(m): + assert m.conv2.groups == 32 + model.init_weights() + model.train() + + imgs = torch.randn(1, 3, 224, 224) + feat = model(imgs) + assert len(feat) == 4 + assert feat[0].shape == torch.Size([1, 256, 56, 56]) + assert feat[1].shape == torch.Size([1, 512, 28, 28]) + assert feat[2].shape == torch.Size([1, 1024, 14, 14]) + assert feat[3].shape == torch.Size([1, 2048, 7, 7]) + + # Test ResNeXt with group 32, base_width 4 and layers 3 out forward + model = ResNeXt(depth=50, groups=32, base_width=4, out_indices=(3, )) + for m in model.modules(): + if is_block(m): + assert m.conv2.groups == 32 + model.init_weights() + model.train() + + imgs = torch.randn(1, 3, 224, 224) + feat = model(imgs) + assert feat.shape == torch.Size([1, 2048, 7, 7]) diff --git a/tests/test_backbones/test_shufflenet_v1.py b/tests/test_backbones/test_shufflenet_v1.py index 59d19089..b3a854ff 100644 --- a/tests/test_backbones/test_shufflenet_v1.py +++ b/tests/test_backbones/test_shufflenet_v1.py @@ -97,7 +97,7 @@ def test_shufflenetv1_backbone(): model = ShuffleNetv1() model.init_weights() model.train() - assert check_norm_state(model.modules(), False) + assert check_norm_state(model.modules(), True) # Test ShuffleNetv1 with first stage frozen frozen_stages = 1