relevant files modified according to Jerry's instructions

2025-06-03 22:03:48 +08:00 · 2020-08-11 13:50:10 +08:00 · 2020-08-11 13:50:10 +08:00 · e1986a5e5e
commit e1986a5e5e
parent d8cba3d6a9
6 changed files with 60 additions and 219 deletions
--- a/configs/_base_/models/fast_scnn.py
+++ b/configs/_base_/models/fast_scnn.py
@ -17,7 +17,7 @@ model = dict(
        norm_cfg=norm_cfg,
        align_corners=False),
    decode_head=dict(
-        type='SepFCNHead',
+        type='DepthwiseSeparableFCNHead',
        in_channels=128,
        channels=128,
        concat_input=False,
@ -53,3 +53,7 @@ model = dict(
            loss_decode=dict(
                type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
    ])
+
+# model training and testing settings
+train_cfg = dict()
+test_cfg = dict(mode='whole')
--- a/configs/fastscnn/fast_scnn_4x8_80k_lr0.12_cityscapes.py
+++ b/configs/fastscnn/fast_scnn_4x8_80k_lr0.12_cityscapes.py
@ -1,19 +1,16 @@
 _base_ = [
    '../_base_/models/fast_scnn.py', '../_base_/datasets/cityscapes.py',
-    '../_base_/default_runtime.py'
+    '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py'
 ]
 crop_size = (512, 1024)
 cudnn_benchmark = True
-# model training and testing settings
-train_cfg = dict()
-test_cfg = dict(mode='whole')

-# Here: What is parameter 'with_seg'?
+# dataset settings
 img_norm_cfg = dict(
    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 train_pipeline = [
    dict(type='LoadImageFromFile', to_float32=True),
-    dict(type='LoadAnnotations'),   # with_seg=True
+    dict(type='LoadAnnotations'),
    dict(type='Resize', img_scale=(2048, 1024), ratio_range=(0.5, 2.0)),
    dict(type='RandomFlip', flip_ratio=0.5),
    dict(type='PhotoMetricDistortion'),
@ -37,6 +34,8 @@ test_pipeline = [
            dict(type='Collect', keys=['img']),
        ])
 ]
+
+# Re-config the data sampler.
 data = dict(
    samples_per_gpu=8,
    workers_per_gpu=4,
@ -44,21 +43,5 @@ data = dict(
    val=dict(pipeline=test_pipeline),
    test=dict(pipeline=test_pipeline))

-# optimizer
+# Re-config the optimizer.
 optimizer = dict(type='SGD', lr=0.12, momentum=0.9, weight_decay=4e-5)
-optimizer_config = dict()
-# learning policy
-lr_config = dict(
-    policy='poly',
-    power=0.9,
-    by_epoch=False,
-)
-# runtime settings
-# total_epochs = 1000
-total_iters = 80000
-evaluation = dict(interval=2000, metric='mIoU')
-checkpoint_config = dict(interval=2000)
-
-# log config: log by iter.
-log_config = dict(interval=50, hooks=[dict(type='TextLoggerHook', by_epoch=False)])
-
--- a/mmseg/models/backbones/fast_scnn.py
+++ b/mmseg/models/backbones/fast_scnn.py
@ -228,6 +228,49 @@ class FeatureFusionModule(nn.Module):

@BACKBONES.register_module()
 class FastSCNN(nn.Module):
+    """Fast-SCNN Backbone.
+    Args:
+        in_channels (int): Number of input image channels. Default=3 (RGB)
+
+        downsample_dw_channels1 (int): Number of output channels after
+            the first conv layer in Learning-To-Downsample (LTD) module.
+
+        downsample_dw_channels2 (int): Number of output channels
+            after the second conv layer in LTD.
+
+        global_in_channels (int): Number of input channels of
+            Global Feature Extractor(GFE).
+            Equal to number of output channels of LTD.
+
+        global_block_channels (tuple): Tuple of integers that describe
+            the output channels for each of the MobileNet-v2 bottleneck
+            residual blocks in GFE.
+
+        global_out_channels (int): Number of output channels of GFE.
+
+        higher_in_channels (int): Number of input channels of the higher
+            resolution branch in FFM.
+            Equal to global_in_channels.
+
+        lower_in_channels (int): Number of input channels of  the lower
+            resolution branch in FFM.
+            Equal to global_out_channels.
+
+        fusion_out_channels (int): Number of output channels of FFM.
+
+        scale_factor (int): The upsampling factor of the higher resolution
+            branch in FFM.
+            Equal to the downsampling factor in GFE.
+
+        out_indices (tuple): Tuple of indices of list
+            [higher_res_features, lower_res_features, fusion_output].
+            Often set to (0,1,2) to enable aux. heads.
+
+        conv_cfg (dict|None): Config of conv layers.
+        norm_cfg (dict|None): Config of norm layers.
+        act_cfg (dict): Config of activation layers.
+        align_corners (bool): align_corners argument of F.interpolate.
+    """

    def __init__(self,
                 in_channels=3,
@ -245,49 +288,6 @@ class FastSCNN(nn.Module):
                 norm_cfg=dict(type='BN'),
                 act_cfg=dict(type='ReLU'),
                 align_corners=False):
-        """Fast-SCNN Backbone.
-        Args:
-            in_channels (int): Number of input image channels. Default=3 (RGB)
-
-            downsample_dw_channels1 (int): Number of output channels after
-                the first conv layer in Learning-To-Downsample (LTD) module.
-
-            downsample_dw_channels2 (int): Number of output channels
-                after the second conv layer in LTD.
-
-            global_in_channels (int): Number of input channels of
-                Global Feature Extractor(GFE).
-                Equal to number of output channels of LTD.
-
-            global_block_channels (tuple): Tuple of integers that describe
-                the output channels for each of the MobileNet-v2 bottleneck
-                residual blocks in GFE.
-
-            global_out_channels (int): Number of output channels of GFE.
-
-            higher_in_channels (int): Number of input channels of the higher
-                resolution branch in FFM.
-                Equal to global_in_channels.
-
-            lower_in_channels (int): Number of input channels of  the lower
-                resolution branch in FFM.
-                Equal to global_out_channels.
-
-            fusion_out_channels (int): Number of output channels of FFM.
-
-            scale_factor (int): The upsampling factor of the higher resolution
-                branch in FFM.
-                Equal to the downsampling factor in GFE.
-
-            out_indices (tuple): Tuple of indices of list
-                [higher_res_features, lower_res_features, fusion_output].
-                Often set to (0,1,2) to enable aux. heads.
-
-            conv_cfg (dict|None): Config of conv layers.
-            norm_cfg (dict|None): Config of norm layers.
-            act_cfg (dict): Config of activation layers.
-            align_corners (bool): align_corners argument of F.interpolate.
-        """

        super(FastSCNN, self).__init__()
        if global_in_channels != higher_in_channels:
--- a/mmseg/models/backbones/mobile_net_v2.py
+++ b/mmseg/models/backbones/mobile_net_v2.py
@ -1,11 +1,5 @@
-from mmcv.cnn import (ConvModule, build_norm_layer, constant_init,
-                      kaiming_init, normal_init)
-from mmcv.runner import load_checkpoint
+from mmcv.cnn import ConvModule, build_norm_layer
 from torch import nn
-from torch.nn.modules.batchnorm import _BatchNorm
-
-from mmseg.utils import get_root_logger
-from ..builder import BACKBONES


 class InvertedResidual(nn.Module):
@ -61,143 +55,3 @@ class InvertedResidual(nn.Module):
            return x + self.conv(x)
        else:
            return self.conv(x)
-
-
-@BACKBONES.register_module()
-class MobileNetV2(nn.Module):
-    arch_settings = (
-        InvertedResidual,
-        [
-            # t, c, n, s
-            [1, 16, 1, 1],
-            [6, 24, 2, 2],
-            [6, 32, 3, 2],
-            [6, 64, 4, 2],
-            [6, 96, 3, 1],
-            [6, 160, 3, 2],
-            [6, 320, 1, 1]
-        ])
-
-    def __init__(self,
-                 in_channels=3,
-                 dilations=(1, 1, 1, 1, 1),
-                 out_indices=(0, 1, 2, 3),
-                 input_channels=32,
-                 width_mult=1.0,
-                 round_nearest=8,
-                 conv_cfg=None,
-                 norm_cfg=dict(type='BN'),
-                 act_cfg=dict(type='ReLU6')):
-        """
-        MobileNet V2 main class
-        Args:
-            width_mult (float): Width multiplier - adjusts number of channels
-                in each layer by this amount
-            round_nearest (int): Round the number of channels in each layer to
-                be a multiple of this number
-            Set to 1 to turn off rounding
-            block: Module specifying inverted residual building block for
-                mobilenet
-        """
-        super(MobileNetV2, self).__init__()
-        self.in_channels = in_channels
-        self.width_mult = width_mult
-        self.conv_cfg = conv_cfg
-        self.norm_cfg = norm_cfg
-        self.act_cfg = act_cfg
-
-        block, inverted_residual_setting = self.arch_settings
-        self.dilations = dilations
-        self.out_indices = out_indices
-
-        # building first layer
-        input_channels = int(
-            input_channels *
-            self.width_mult) if self.width_mult > 1.0 else input_channels
-        # last_channels = int(1280 * multiplier) if multiplier > 1.0 else 1280
-        self.conv1 = ConvModule(
-            3,
-            input_channels,
-            kernel_size=3,
-            stride=2,
-            padding=1,
-            conv_cfg=self.conv_cfg,
-            norm_cfg=self.norm_cfg,
-            act_cfg=self.act_cfg)
-
-        # building inverted residual blocks
-        self.planes = input_channels
-        self.block1 = self._make_layer(block, self.planes,
-                                       inverted_residual_setting[0:1],
-                                       dilations[0])
-        self.block2 = self._make_layer(block, self.planes,
-                                       inverted_residual_setting[1:2],
-                                       dilations[1])
-        self.block3 = self._make_layer(block, self.planes,
-                                       inverted_residual_setting[2:3],
-                                       dilations[2])
-        self.block4 = self._make_layer(block, self.planes,
-                                       inverted_residual_setting[3:5],
-                                       dilations[3])
-        self.block5 = self._make_layer(block, self.planes,
-                                       inverted_residual_setting[5:],
-                                       dilations[4])
-
-    def _make_layer(self,
-                    block,
-                    planes,
-                    inverted_residual_setting,
-                    dilation=1):
-        features = list()
-        for t, c, n, s in inverted_residual_setting:
-            out_channels = int(c * self.width_mult)
-            stride = s if dilation == 1 else 1
-            features.append(
-                block(
-                    planes,
-                    out_channels,
-                    stride,
-                    t,
-                    dilation,
-                    conv_cfg=self.conv_cfg,
-                    norm_cfg=self.norm_cfg,
-                    act_cfg=self.act_cfg))
-            planes = out_channels
-            for i in range(n - 1):
-                features.append(
-                    block(
-                        planes,
-                        out_channels,
-                        1,
-                        t,
-                        conv_cfg=self.conv_cfg,
-                        norm_cfg=self.norm_cfg,
-                        act_cfg=self.act_cfg))
-                planes = out_channels
-        self.planes = planes
-        return nn.Sequential(*features)
-
-    def init_weights(self, pretrained=None):
-        if isinstance(pretrained, str):
-            logger = get_root_logger()
-            load_checkpoint(self, pretrained, strict=False, logger=logger)
-        else:
-            for m in self.modules():
-                if isinstance(m, nn.Conv2d):
-                    kaiming_init(m, mode='fan_out')
-                elif isinstance(m, _BatchNorm):
-                    constant_init(m, 1)
-                elif isinstance(m, nn.Linear):
-                    normal_init(m, 0, 0.01)
-
-    def forward(self, x):
-        x = self.conv1(x)
-        x = self.block1(x)
-        c1 = self.block2(x)
-        c2 = self.block3(c1)
-        c3 = self.block4(c2)
-        c4 = self.block5(c3)
-
-        outs = [c1, c2, c3, c4]
-        outs = [outs[i] for i in self.out_indices]
-        return tuple(outs)
--- a/mmseg/models/decode_heads/init.py
+++ b/mmseg/models/decode_heads/init.py
@ -10,11 +10,11 @@ from .ocr_head import OCRHead
 from .psa_head import PSAHead
 from .psp_head import PSPHead
 from .sep_aspp_head import DepthwiseSeparableASPPHead
+from .sep_fcn_head import DepthwiseSeparableFCNHead
 from .uper_head import UPerHead
-from .sep_fcn_head import SepFCNHead

 __all__ = [
    'FCNHead', 'PSPHead', 'ASPPHead', 'PSAHead', 'NLHead', 'GCHead', 'CCHead',
    'UPerHead', 'DepthwiseSeparableASPPHead', 'ANNHead', 'DAHead', 'OCRHead',
-    'EncHead', 'SepFCNHead'
+    'EncHead', 'DepthwiseSeparableFCNHead'
 ]
--- a/mmseg/models/decode_heads/sep_fcn_head.py
+++ b/mmseg/models/decode_heads/sep_fcn_head.py
@ -4,7 +4,7 @@ from .fcn_head import FCNHead


@HEADS.register_module()
-class SepFCNHead(FCNHead):
+class DepthwiseSeparableFCNHead(FCNHead):
    """Depthwise-Separable Fully Convolutional Network for Semantic
    Segmentation.

@ -15,7 +15,7 @@ class SepFCNHead(FCNHead):
        channels(int): Number of middle-stage channels in the decode head.

        concat_input(bool): Whether to concatenate original decode input into
-            the result of consecutive convolution layers.
+            the result of several consecutive convolution layers.

        num_classes(int): Used to determine the dimension of
            final prediction tensor.
@ -31,7 +31,7 @@ class SepFCNHead(FCNHead):
    """

    def __init__(self, **kwargs):
-        super(SepFCNHead, self).__init__(**kwargs)
+        super(DepthwiseSeparableFCNHead, self).__init__(**kwargs)
        self.convs[0] = DepthwiseSeparableConvModule(
            self.in_channels,
            self.channels,