diff --git a/.gitignore b/.gitignore index 77824a97a..6b4e92ae7 100644 --- a/.gitignore +++ b/.gitignore @@ -113,6 +113,10 @@ data *.pkl.json *.log.json work_dirs/ +workdirs/ +configs_unify/ # Pytorch *.pth + + diff --git a/configs/_base_/models/fast_scnn.py b/configs/_base_/models/fast_scnn.py new file mode 100644 index 000000000..cf68d9f18 --- /dev/null +++ b/configs/_base_/models/fast_scnn.py @@ -0,0 +1,55 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True, momentum=0.01) +model = dict( + type='EncoderDecoder', + backbone=dict( + type='FastSCNN', + downsample_dw_channels1=32, + downsample_dw_channels2=48, + global_in_channels=64, + global_block_channels=(64, 96, 128), + global_out_channels=128, + higher_in_channels=64, + lower_in_channels=128, + fusion_out_channels=128, + scale_factor=4, + out_indices=(0, 1, 2), + norm_cfg=norm_cfg, + align_corners=False), + decode_head=dict( + type='SepFCNHead', + in_channels=128, + channels=128, + concat_input=False, + num_classes=19, + in_index=-1, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.)), + auxiliary_head=[ + dict( + type='FCNHead', + in_channels=128, + channels=32, + num_convs=1, + num_classes=19, + in_index=-2, + norm_cfg=norm_cfg, + concat_input=False, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + dict( + type='FCNHead', + in_channels=64, + channels=32, + num_convs=1, + num_classes=19, + in_index=-3, + norm_cfg=norm_cfg, + concat_input=False, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + ]) diff --git a/configs/fastscnn/fast_scnn_4x3_1000e_cityscapes.py b/configs/fastscnn/fast_scnn_4x3_1000e_cityscapes.py new file mode 100644 index 000000000..dbc5ee410 --- /dev/null +++ b/configs/fastscnn/fast_scnn_4x3_1000e_cityscapes.py @@ -0,0 +1,61 @@ +_base_ = [ + '../_base_/models/fast_scnn.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py' +] +crop_size = (512, 1024) +cudnn_benchmark = True +# model training and testing settings +train_cfg = dict() +test_cfg = dict(mode='whole') + +# Here: What is parameter 'with_seg'? +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile', to_float32=True), + dict(type='LoadAnnotations'), # with_seg=True + dict(type='Resize', img_scale=(2048, 1024), ratio_range=(0.5, 2.0)), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='PhotoMetricDistortion'), + dict(type='Normalize', **img_norm_cfg), + dict(type='RandomCrop', crop_size=crop_size), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_semantic_seg']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(2048, 1024), + # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + samples_per_gpu=3, + workers_per_gpu=3, + train=dict(pipeline=train_pipeline), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) + +# optimizer +optimizer = dict(type='SGD', lr=0.045, momentum=0.9, weight_decay=4e-5) +optimizer_config = dict() +# learning policy +lr_config = dict( + policy='poly', + power=0.9, + by_epoch=False, +) +# runtime settings +# total_epochs = 1000 +total_iters = 10000 +evaluation = dict(interval=100, metric='mIoU') +checkpoint_config = dict(interval=100) + diff --git a/mmseg/models/backbones/__init__.py b/mmseg/models/backbones/__init__.py index 367b398ce..1cb52384d 100644 --- a/mmseg/models/backbones/__init__.py +++ b/mmseg/models/backbones/__init__.py @@ -1,5 +1,6 @@ from .hrnet import HRNet from .resnet import ResNet, ResNetV1c, ResNetV1d from .resnext import ResNeXt +from .fast_scnn import FastSCNN -__all__ = ['ResNet', 'ResNetV1c', 'ResNetV1d', 'ResNeXt', 'HRNet'] +__all__ = ['ResNet', 'ResNetV1c', 'ResNetV1d', 'ResNeXt', 'HRNet', 'FastSCNN'] diff --git a/mmseg/models/backbones/fast_scnn.py b/mmseg/models/backbones/fast_scnn.py new file mode 100644 index 000000000..3acf541c8 --- /dev/null +++ b/mmseg/models/backbones/fast_scnn.py @@ -0,0 +1,248 @@ +import torch +import torch.nn as nn +from mmcv.cnn import ConvModule, constant_init, kaiming_init +from torch.nn.modules.batchnorm import _BatchNorm + +from mmseg.models.backbones.mobile_net_v2 import InvertedResidual +from mmseg.models.decode_heads.psp_head import PPM +from mmseg.ops import DepthwiseSeparableConvModule, resize +from ..builder import BACKBONES + + +class LearningToDownsample(nn.Module): + """Learning to downsample module""" + + def __init__(self, + in_channels, + dw_channels1, + dw_channels2, + out_channels, + conv_cfg=None, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='ReLU')): + super(LearningToDownsample, self).__init__() + self.conv_cfg = conv_cfg + self.norm_cfg = norm_cfg + self.act_cfg = act_cfg + self.conv = ConvModule( + in_channels, + dw_channels1, + 3, + stride=2, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + self.dsconv1 = DepthwiseSeparableConvModule( + dw_channels1, + dw_channels2, + stride=2, + relu_first=False, + norm_cfg=self.norm_cfg) + self.dsconv2 = DepthwiseSeparableConvModule( + dw_channels2, + out_channels, + stride=2, + relu_first=False, + norm_cfg=self.norm_cfg) + + def forward(self, x): + x = self.conv(x) + x = self.dsconv1(x) + x = self.dsconv2(x) + return x + + +class GlobalFeatureExtractor(nn.Module): + """Global feature extractor module""" + + def __init__(self, + in_channels=64, + block_channels=(64, 96, 128), + out_channels=128, + t=6, + num_blocks=(3, 3, 3), + pool_scales=(1, 2, 3, 6), + conv_cfg=None, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='ReLU'), + align_corners=True): + super(GlobalFeatureExtractor, self).__init__() + self.conv_cfg = conv_cfg + self.norm_cfg = norm_cfg + self.act_cfg = act_cfg + assert len(block_channels) == len(num_blocks) == 3 + self.bottleneck1 = self._make_layer(in_channels, block_channels[0], + num_blocks[0], t, 2) + self.bottleneck2 = self._make_layer(block_channels[0], + block_channels[1], num_blocks[1], + t, 2) + self.bottleneck3 = self._make_layer(block_channels[1], + block_channels[2], num_blocks[2], + t, 1) + self.ppm = PPM( + pool_scales, + block_channels[2], + block_channels[2] // 4, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg, + align_corners=align_corners) + self.out = ConvModule( + block_channels[2] * 2, + out_channels, + 1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + + def _make_layer(self, inplanes, planes, blocks, t=6, stride=1): + layers = [] + layers.append( + InvertedResidual( + inplanes, planes, stride, t, norm_cfg=self.norm_cfg)) + for i in range(1, blocks): + layers.append( + InvertedResidual(planes, planes, 1, t, norm_cfg=self.norm_cfg)) + return nn.Sequential(*layers) + + def forward(self, x): + x = self.bottleneck1(x) + x = self.bottleneck2(x) + x = self.bottleneck3(x) + x = torch.cat([x, *self.ppm(x)], dim=1) + x = self.out(x) + return x + + +class FeatureFusionModule(nn.Module): + """Feature fusion module""" + + def __init__(self, + higher_in_channels, + lower_in_channels, + out_channels, + scale_factor, + conv_cfg=None, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='ReLU'), + align_corners=True): + super(FeatureFusionModule, self).__init__() + self.scale_factor = scale_factor + self.conv_cfg = conv_cfg + self.norm_cfg = norm_cfg + self.act_cfg = act_cfg + self.align_corners = align_corners + self.dwconv = ConvModule( + lower_in_channels, + out_channels, + 1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + self.conv_lower_res = ConvModule( + out_channels, + out_channels, + 1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=None) + self.conv_higher_res = ConvModule( + higher_in_channels, + out_channels, + 1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=None) + self.relu = nn.ReLU(True) + + def forward(self, higher_res_feature, lower_res_feature): + lower_res_feature = resize( + lower_res_feature, + scale_factor=self.scale_factor, + mode='bilinear', + align_corners=self.align_corners) + lower_res_feature = self.dwconv(lower_res_feature) + lower_res_feature = self.conv_lower_res(lower_res_feature) + + higher_res_feature = self.conv_higher_res(higher_res_feature) + out = higher_res_feature + lower_res_feature + return self.relu(out) + + +@BACKBONES.register_module() +class FastSCNN(nn.Module): + + def __init__(self, + in_channels=3, + downsample_dw_channels1=32, + downsample_dw_channels2=48, + global_in_channels=64, + global_block_channels=(64, 96, 128), + global_out_channels=128, + higher_in_channels=64, + lower_in_channels=128, + fusion_out_channels=128, + scale_factor=4, + out_indices=(0, 1, 2), + conv_cfg=None, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='ReLU'), + align_corners=False): + super(FastSCNN, self).__init__() + self.in_channels = in_channels + self.downsample_dw_channels1 = downsample_dw_channels1 + self.downsample_dw_channels2 = downsample_dw_channels2 + self.global_in_channels = global_in_channels + self.global_block_channels = global_block_channels + self.global_out_channels = global_out_channels + self.higher_in_channels = higher_in_channels + self.lower_in_channels = lower_in_channels + self.fusion_out_channels = fusion_out_channels + self.scale_factor = scale_factor + self.out_indices = out_indices + self.conv_cfg = conv_cfg + self.norm_cfg = norm_cfg + self.act_cfg = act_cfg + self.align_corners = align_corners + self.learning_to_downsample = LearningToDownsample( + in_channels, + downsample_dw_channels1, + downsample_dw_channels2, + global_in_channels, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + self.global_feature_extractor = GlobalFeatureExtractor( + global_in_channels, + global_block_channels, + global_out_channels, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg, + align_corners=self.align_corners) + self.feature_fusion = FeatureFusionModule( + higher_in_channels, + lower_in_channels, + fusion_out_channels, + scale_factor=self.scale_factor, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg, + align_corners=self.align_corners) + + def init_weights(self, pretrained=None): + for m in self.modules(): + if isinstance(m, nn.Conv2d): + kaiming_init(m) + elif isinstance(m, (_BatchNorm, nn.GroupNorm)): + constant_init(m, 1) + + def forward(self, x): + higher_res_features = self.learning_to_downsample(x) + lower_res_features = self.global_feature_extractor(higher_res_features) + fusion_output = self.feature_fusion(higher_res_features, + lower_res_features) + + outs = [higher_res_features, lower_res_features, fusion_output] + outs = [outs[i] for i in self.out_indices] + return tuple(outs) diff --git a/mmseg/models/backbones/mobile_net_v2.py b/mmseg/models/backbones/mobile_net_v2.py new file mode 100644 index 000000000..718a4b1b5 --- /dev/null +++ b/mmseg/models/backbones/mobile_net_v2.py @@ -0,0 +1,203 @@ +from mmcv.cnn import (ConvModule, build_norm_layer, constant_init, + kaiming_init, normal_init) +from mmcv.runner import load_checkpoint +from torch import nn +from torch.nn.modules.batchnorm import _BatchNorm + +from mmseg.utils import get_root_logger +from ..builder import BACKBONES + + +class InvertedResidual(nn.Module): + + def __init__(self, + inp, + oup, + stride, + expand_ratio, + dilation=1, + conv_cfg=None, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='ReLU6')): + super(InvertedResidual, self).__init__() + self.stride = stride + assert stride in [1, 2] + + hidden_dim = int(round(inp * expand_ratio)) + self.use_res_connect = self.stride == 1 and inp == oup + + layers = [] + if expand_ratio != 1: + # pw + layers.append( + ConvModule( + inp, + hidden_dim, + kernel_size=1, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg)) + layers.extend([ + # dw + ConvModule( + hidden_dim, + hidden_dim, + kernel_size=3, + padding=dilation, + stride=stride, + dilation=dilation, + groups=hidden_dim, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg), + # pw-linear + nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False), + build_norm_layer(norm_cfg, oup)[1], + ]) + self.conv = nn.Sequential(*layers) + + def forward(self, x): + if self.use_res_connect: + return x + self.conv(x) + else: + return self.conv(x) + + +@BACKBONES.register_module() +class MobileNetV2(nn.Module): + arch_settings = ( + InvertedResidual, + [ + # t, c, n, s + [1, 16, 1, 1], + [6, 24, 2, 2], + [6, 32, 3, 2], + [6, 64, 4, 2], + [6, 96, 3, 1], + [6, 160, 3, 2], + [6, 320, 1, 1] + ]) + + def __init__(self, + in_channels=3, + dilations=(1, 1, 1, 1, 1), + out_indices=(0, 1, 2, 3), + input_channels=32, + width_mult=1.0, + round_nearest=8, + conv_cfg=None, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='ReLU6')): + """ + MobileNet V2 main class + Args: + width_mult (float): Width multiplier - adjusts number of channels + in each layer by this amount + round_nearest (int): Round the number of channels in each layer to + be a multiple of this number + Set to 1 to turn off rounding + block: Module specifying inverted residual building block for + mobilenet + """ + super(MobileNetV2, self).__init__() + self.in_channels = in_channels + self.width_mult = width_mult + self.conv_cfg = conv_cfg + self.norm_cfg = norm_cfg + self.act_cfg = act_cfg + + block, inverted_residual_setting = self.arch_settings + self.dilations = dilations + self.out_indices = out_indices + + # building first layer + input_channels = int( + input_channels * + self.width_mult) if self.width_mult > 1.0 else input_channels + # last_channels = int(1280 * multiplier) if multiplier > 1.0 else 1280 + self.conv1 = ConvModule( + 3, + input_channels, + kernel_size=3, + stride=2, + padding=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + + # building inverted residual blocks + self.planes = input_channels + self.block1 = self._make_layer(block, self.planes, + inverted_residual_setting[0:1], + dilations[0]) + self.block2 = self._make_layer(block, self.planes, + inverted_residual_setting[1:2], + dilations[1]) + self.block3 = self._make_layer(block, self.planes, + inverted_residual_setting[2:3], + dilations[2]) + self.block4 = self._make_layer(block, self.planes, + inverted_residual_setting[3:5], + dilations[3]) + self.block5 = self._make_layer(block, self.planes, + inverted_residual_setting[5:], + dilations[4]) + + def _make_layer(self, + block, + planes, + inverted_residual_setting, + dilation=1): + features = list() + for t, c, n, s in inverted_residual_setting: + out_channels = int(c * self.width_mult) + stride = s if dilation == 1 else 1 + features.append( + block( + planes, + out_channels, + stride, + t, + dilation, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg)) + planes = out_channels + for i in range(n - 1): + features.append( + block( + planes, + out_channels, + 1, + t, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg)) + planes = out_channels + self.planes = planes + return nn.Sequential(*features) + + def init_weights(self, pretrained=None): + if isinstance(pretrained, str): + logger = get_root_logger() + load_checkpoint(self, pretrained, strict=False, logger=logger) + else: + for m in self.modules(): + if isinstance(m, nn.Conv2d): + kaiming_init(m, mode='fan_out') + elif isinstance(m, _BatchNorm): + constant_init(m, 1) + elif isinstance(m, nn.Linear): + normal_init(m, 0, 0.01) + + def forward(self, x): + x = self.conv1(x) + x = self.block1(x) + c1 = self.block2(x) + c2 = self.block3(c1) + c3 = self.block4(c2) + c4 = self.block5(c3) + + outs = [c1, c2, c3, c4] + outs = [outs[i] for i in self.out_indices] + return tuple(outs) diff --git a/mmseg/models/decode_heads/__init__.py b/mmseg/models/decode_heads/__init__.py index fda430943..05df8c2ab 100644 --- a/mmseg/models/decode_heads/__init__.py +++ b/mmseg/models/decode_heads/__init__.py @@ -11,9 +11,10 @@ from .psa_head import PSAHead from .psp_head import PSPHead from .sep_aspp_head import DepthwiseSeparableASPPHead from .uper_head import UPerHead +from .sep_fcn_head import SepFCNHead __all__ = [ 'FCNHead', 'PSPHead', 'ASPPHead', 'PSAHead', 'NLHead', 'GCHead', 'CCHead', 'UPerHead', 'DepthwiseSeparableASPPHead', 'ANNHead', 'DAHead', 'OCRHead', - 'EncHead' + 'EncHead', 'SepFCNHead' ] diff --git a/mmseg/models/decode_heads/sep_fcn_head.py b/mmseg/models/decode_heads/sep_fcn_head.py new file mode 100644 index 000000000..e93dfab9b --- /dev/null +++ b/mmseg/models/decode_heads/sep_fcn_head.py @@ -0,0 +1,29 @@ +from mmseg.ops import DepthwiseSeparableConvModule +from ..builder import HEADS +from .fcn_head import FCNHead + + +@HEADS.register_module() +class SepFCNHead(FCNHead): + + def __init__(self, **kwargs): + super(SepFCNHead, self).__init__(**kwargs) + self.convs[0] = DepthwiseSeparableConvModule( + self.in_channels, + self.channels, + norm_cfg=self.norm_cfg, + relu_first=False) + for i in range(1, self.num_convs): + self.convs[i] = DepthwiseSeparableConvModule( + self.channels, + self.channels, + norm_cfg=self.norm_cfg, + relu_first=False) + + if self.concat_input: + self.conv_cat = DepthwiseSeparableConvModule( + self.in_channels + self.channels, + self.channels, + self.channels, + norm_cfg=self.norm_cfg, + relu_first=False) diff --git a/mmseg/ops/separable_conv_module.py b/mmseg/ops/separable_conv_module.py index 4e5922cc4..e11365400 100644 --- a/mmseg/ops/separable_conv_module.py +++ b/mmseg/ops/separable_conv_module.py @@ -1,88 +1,60 @@ -import torch.nn as nn -from mmcv.cnn import ConvModule +from mmcv.cnn import build_norm_layer +from torch import nn class DepthwiseSeparableConvModule(nn.Module): - """Depthwise separable convolution module. - - See https://arxiv.org/pdf/1704.04861.pdf for details. - - This module can replace a ConvModule with the conv block replaced by two - conv block: depthwise conv block and pointwise conv block. The depthwise - conv block contains depthwise-conv/norm/activation layers. The pointwise - conv block contains pointwise-conv/norm/activation layers. It should be - noted that there will be norm/activation layer in the depthwise conv block - if `norm_cfg` and `act_cfg` are specified. - - Args: - in_channels (int): Same as nn.Conv2d. - out_channels (int): Same as nn.Conv2d. - kernel_size (int or tuple[int]): Same as nn.Conv2d. - stride (int or tuple[int]): Same as nn.Conv2d. Default: 1. - padding (int or tuple[int]): Same as nn.Conv2d. Default: 0. - dilation (int or tuple[int]): Same as nn.Conv2d. Default: 1. - norm_cfg (dict): Default norm config for both depthwise ConvModule and - pointwise ConvModule. Default: None. - act_cfg (dict): Default activation config for both depthwise ConvModule - and pointwise ConvModule. Default: dict(type='ReLU'). - dw_norm_cfg (dict): Norm config of depthwise ConvModule. If it is - 'default', it will be the same as `norm_cfg`. Default: 'default'. - dw_act_cfg (dict): Activation config of depthwise ConvModule. If it is - 'default', it will be the same as `act_cfg`. Default: 'default'. - pw_norm_cfg (dict): Norm config of pointwise ConvModule. If it is - 'default', it will be the same as `norm_cfg`. Default: 'default'. - pw_act_cfg (dict): Activation config of pointwise ConvModule. If it is - 'default', it will be the same as `act_cfg`. Default: 'default'. - kwargs (optional): Other shared arguments for depthwise and pointwise - ConvModule. See ConvModule for ref. - """ def __init__(self, in_channels, out_channels, - kernel_size, + kernel_size=3, stride=1, - padding=0, dilation=1, - norm_cfg=None, - act_cfg=dict(type='ReLU'), - dw_norm_cfg='default', - dw_act_cfg='default', - pw_norm_cfg='default', - pw_act_cfg='default', - **kwargs): + relu_first=True, + bias=False, + norm_cfg=dict(type='BN')): super(DepthwiseSeparableConvModule, self).__init__() - assert 'groups' not in kwargs, 'groups should not be specified' - - # if norm/activation config of depthwise/pointwise ConvModule is not - # specified, use default config. - dw_norm_cfg = dw_norm_cfg if dw_norm_cfg != 'default' else norm_cfg - dw_act_cfg = dw_act_cfg if dw_act_cfg != 'default' else act_cfg - pw_norm_cfg = pw_norm_cfg if pw_norm_cfg != 'default' else norm_cfg - pw_act_cfg = pw_act_cfg if pw_act_cfg != 'default' else act_cfg - - # depthwise convolution - self.depthwise_conv = ConvModule( + self.depthwise = nn.Conv2d( in_channels, in_channels, kernel_size, stride=stride, - padding=padding, + padding=dilation, dilation=dilation, groups=in_channels, - norm_cfg=dw_norm_cfg, - act_cfg=dw_act_cfg, - **kwargs) + bias=bias) + self.norm_depth_name, norm_depth = build_norm_layer( + norm_cfg, in_channels, postfix='_depth') + self.add_module(self.norm_depth_name, norm_depth) - self.pointwise_conv = ConvModule( - in_channels, - out_channels, - 1, - norm_cfg=pw_norm_cfg, - act_cfg=pw_act_cfg, - **kwargs) + self.pointwise = nn.Conv2d(in_channels, out_channels, 1, bias=bias) + self.norm_point_name, norm_point = build_norm_layer( + norm_cfg, out_channels, postfix='_point') + self.add_module(self.norm_point_name, norm_point) + + self.relu_first = relu_first + self.relu = nn.ReLU(inplace=not relu_first) + + @property + def norm_depth(self): + return getattr(self, self.norm_depth_name) + + @property + def norm_point(self): + return getattr(self, self.norm_point_name) def forward(self, x): - x = self.depthwise_conv(x) - x = self.pointwise_conv(x) - return x + if self.relu_first: + out = self.relu(x) + out = self.depthwise(out) + out = self.norm_depth(out) + out = self.pointwise(out) + out = self.norm_point(out) + else: + out = self.depthwise(x) + out = self.norm_depth(out) + out = self.relu(out) + out = self.pointwise(out) + out = self.norm_point(out) + out = self.relu(out) + return out