relevant files modified according to Jerry's instructions

pull/58/head
johnzja 2020-08-11 13:50:10 +08:00
parent d8cba3d6a9
commit e1986a5e5e
6 changed files with 60 additions and 219 deletions

View File

@ -17,7 +17,7 @@ model = dict(
norm_cfg=norm_cfg,
align_corners=False),
decode_head=dict(
type='SepFCNHead',
type='DepthwiseSeparableFCNHead',
in_channels=128,
channels=128,
concat_input=False,
@ -53,3 +53,7 @@ model = dict(
loss_decode=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
])
# model training and testing settings
train_cfg = dict()
test_cfg = dict(mode='whole')

View File

@ -1,19 +1,16 @@
_base_ = [
'../_base_/models/fast_scnn.py', '../_base_/datasets/cityscapes.py',
'../_base_/default_runtime.py'
'../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py'
]
crop_size = (512, 1024)
cudnn_benchmark = True
# model training and testing settings
train_cfg = dict()
test_cfg = dict(mode='whole')
# Here: What is parameter 'with_seg'?
# dataset settings
img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
train_pipeline = [
dict(type='LoadImageFromFile', to_float32=True),
dict(type='LoadAnnotations'), # with_seg=True
dict(type='LoadAnnotations'),
dict(type='Resize', img_scale=(2048, 1024), ratio_range=(0.5, 2.0)),
dict(type='RandomFlip', flip_ratio=0.5),
dict(type='PhotoMetricDistortion'),
@ -37,6 +34,8 @@ test_pipeline = [
dict(type='Collect', keys=['img']),
])
]
# Re-config the data sampler.
data = dict(
samples_per_gpu=8,
workers_per_gpu=4,
@ -44,21 +43,5 @@ data = dict(
val=dict(pipeline=test_pipeline),
test=dict(pipeline=test_pipeline))
# optimizer
# Re-config the optimizer.
optimizer = dict(type='SGD', lr=0.12, momentum=0.9, weight_decay=4e-5)
optimizer_config = dict()
# learning policy
lr_config = dict(
policy='poly',
power=0.9,
by_epoch=False,
)
# runtime settings
# total_epochs = 1000
total_iters = 80000
evaluation = dict(interval=2000, metric='mIoU')
checkpoint_config = dict(interval=2000)
# log config: log by iter.
log_config = dict(interval=50, hooks=[dict(type='TextLoggerHook', by_epoch=False)])

View File

@ -228,6 +228,49 @@ class FeatureFusionModule(nn.Module):
@BACKBONES.register_module()
class FastSCNN(nn.Module):
"""Fast-SCNN Backbone.
Args:
in_channels (int): Number of input image channels. Default=3 (RGB)
downsample_dw_channels1 (int): Number of output channels after
the first conv layer in Learning-To-Downsample (LTD) module.
downsample_dw_channels2 (int): Number of output channels
after the second conv layer in LTD.
global_in_channels (int): Number of input channels of
Global Feature Extractor(GFE).
Equal to number of output channels of LTD.
global_block_channels (tuple): Tuple of integers that describe
the output channels for each of the MobileNet-v2 bottleneck
residual blocks in GFE.
global_out_channels (int): Number of output channels of GFE.
higher_in_channels (int): Number of input channels of the higher
resolution branch in FFM.
Equal to global_in_channels.
lower_in_channels (int): Number of input channels of the lower
resolution branch in FFM.
Equal to global_out_channels.
fusion_out_channels (int): Number of output channels of FFM.
scale_factor (int): The upsampling factor of the higher resolution
branch in FFM.
Equal to the downsampling factor in GFE.
out_indices (tuple): Tuple of indices of list
[higher_res_features, lower_res_features, fusion_output].
Often set to (0,1,2) to enable aux. heads.
conv_cfg (dict|None): Config of conv layers.
norm_cfg (dict|None): Config of norm layers.
act_cfg (dict): Config of activation layers.
align_corners (bool): align_corners argument of F.interpolate.
"""
def __init__(self,
in_channels=3,
@ -245,49 +288,6 @@ class FastSCNN(nn.Module):
norm_cfg=dict(type='BN'),
act_cfg=dict(type='ReLU'),
align_corners=False):
"""Fast-SCNN Backbone.
Args:
in_channels (int): Number of input image channels. Default=3 (RGB)
downsample_dw_channels1 (int): Number of output channels after
the first conv layer in Learning-To-Downsample (LTD) module.
downsample_dw_channels2 (int): Number of output channels
after the second conv layer in LTD.
global_in_channels (int): Number of input channels of
Global Feature Extractor(GFE).
Equal to number of output channels of LTD.
global_block_channels (tuple): Tuple of integers that describe
the output channels for each of the MobileNet-v2 bottleneck
residual blocks in GFE.
global_out_channels (int): Number of output channels of GFE.
higher_in_channels (int): Number of input channels of the higher
resolution branch in FFM.
Equal to global_in_channels.
lower_in_channels (int): Number of input channels of the lower
resolution branch in FFM.
Equal to global_out_channels.
fusion_out_channels (int): Number of output channels of FFM.
scale_factor (int): The upsampling factor of the higher resolution
branch in FFM.
Equal to the downsampling factor in GFE.
out_indices (tuple): Tuple of indices of list
[higher_res_features, lower_res_features, fusion_output].
Often set to (0,1,2) to enable aux. heads.
conv_cfg (dict|None): Config of conv layers.
norm_cfg (dict|None): Config of norm layers.
act_cfg (dict): Config of activation layers.
align_corners (bool): align_corners argument of F.interpolate.
"""
super(FastSCNN, self).__init__()
if global_in_channels != higher_in_channels:

View File

@ -1,11 +1,5 @@
from mmcv.cnn import (ConvModule, build_norm_layer, constant_init,
kaiming_init, normal_init)
from mmcv.runner import load_checkpoint
from mmcv.cnn import ConvModule, build_norm_layer
from torch import nn
from torch.nn.modules.batchnorm import _BatchNorm
from mmseg.utils import get_root_logger
from ..builder import BACKBONES
class InvertedResidual(nn.Module):
@ -61,143 +55,3 @@ class InvertedResidual(nn.Module):
return x + self.conv(x)
else:
return self.conv(x)
@BACKBONES.register_module()
class MobileNetV2(nn.Module):
arch_settings = (
InvertedResidual,
[
# t, c, n, s
[1, 16, 1, 1],
[6, 24, 2, 2],
[6, 32, 3, 2],
[6, 64, 4, 2],
[6, 96, 3, 1],
[6, 160, 3, 2],
[6, 320, 1, 1]
])
def __init__(self,
in_channels=3,
dilations=(1, 1, 1, 1, 1),
out_indices=(0, 1, 2, 3),
input_channels=32,
width_mult=1.0,
round_nearest=8,
conv_cfg=None,
norm_cfg=dict(type='BN'),
act_cfg=dict(type='ReLU6')):
"""
MobileNet V2 main class
Args:
width_mult (float): Width multiplier - adjusts number of channels
in each layer by this amount
round_nearest (int): Round the number of channels in each layer to
be a multiple of this number
Set to 1 to turn off rounding
block: Module specifying inverted residual building block for
mobilenet
"""
super(MobileNetV2, self).__init__()
self.in_channels = in_channels
self.width_mult = width_mult
self.conv_cfg = conv_cfg
self.norm_cfg = norm_cfg
self.act_cfg = act_cfg
block, inverted_residual_setting = self.arch_settings
self.dilations = dilations
self.out_indices = out_indices
# building first layer
input_channels = int(
input_channels *
self.width_mult) if self.width_mult > 1.0 else input_channels
# last_channels = int(1280 * multiplier) if multiplier > 1.0 else 1280
self.conv1 = ConvModule(
3,
input_channels,
kernel_size=3,
stride=2,
padding=1,
conv_cfg=self.conv_cfg,
norm_cfg=self.norm_cfg,
act_cfg=self.act_cfg)
# building inverted residual blocks
self.planes = input_channels
self.block1 = self._make_layer(block, self.planes,
inverted_residual_setting[0:1],
dilations[0])
self.block2 = self._make_layer(block, self.planes,
inverted_residual_setting[1:2],
dilations[1])
self.block3 = self._make_layer(block, self.planes,
inverted_residual_setting[2:3],
dilations[2])
self.block4 = self._make_layer(block, self.planes,
inverted_residual_setting[3:5],
dilations[3])
self.block5 = self._make_layer(block, self.planes,
inverted_residual_setting[5:],
dilations[4])
def _make_layer(self,
block,
planes,
inverted_residual_setting,
dilation=1):
features = list()
for t, c, n, s in inverted_residual_setting:
out_channels = int(c * self.width_mult)
stride = s if dilation == 1 else 1
features.append(
block(
planes,
out_channels,
stride,
t,
dilation,
conv_cfg=self.conv_cfg,
norm_cfg=self.norm_cfg,
act_cfg=self.act_cfg))
planes = out_channels
for i in range(n - 1):
features.append(
block(
planes,
out_channels,
1,
t,
conv_cfg=self.conv_cfg,
norm_cfg=self.norm_cfg,
act_cfg=self.act_cfg))
planes = out_channels
self.planes = planes
return nn.Sequential(*features)
def init_weights(self, pretrained=None):
if isinstance(pretrained, str):
logger = get_root_logger()
load_checkpoint(self, pretrained, strict=False, logger=logger)
else:
for m in self.modules():
if isinstance(m, nn.Conv2d):
kaiming_init(m, mode='fan_out')
elif isinstance(m, _BatchNorm):
constant_init(m, 1)
elif isinstance(m, nn.Linear):
normal_init(m, 0, 0.01)
def forward(self, x):
x = self.conv1(x)
x = self.block1(x)
c1 = self.block2(x)
c2 = self.block3(c1)
c3 = self.block4(c2)
c4 = self.block5(c3)
outs = [c1, c2, c3, c4]
outs = [outs[i] for i in self.out_indices]
return tuple(outs)

View File

@ -10,11 +10,11 @@ from .ocr_head import OCRHead
from .psa_head import PSAHead
from .psp_head import PSPHead
from .sep_aspp_head import DepthwiseSeparableASPPHead
from .sep_fcn_head import DepthwiseSeparableFCNHead
from .uper_head import UPerHead
from .sep_fcn_head import SepFCNHead
__all__ = [
'FCNHead', 'PSPHead', 'ASPPHead', 'PSAHead', 'NLHead', 'GCHead', 'CCHead',
'UPerHead', 'DepthwiseSeparableASPPHead', 'ANNHead', 'DAHead', 'OCRHead',
'EncHead', 'SepFCNHead'
'EncHead', 'DepthwiseSeparableFCNHead'
]

View File

@ -4,7 +4,7 @@ from .fcn_head import FCNHead
@HEADS.register_module()
class SepFCNHead(FCNHead):
class DepthwiseSeparableFCNHead(FCNHead):
"""Depthwise-Separable Fully Convolutional Network for Semantic
Segmentation.
@ -15,7 +15,7 @@ class SepFCNHead(FCNHead):
channels(int): Number of middle-stage channels in the decode head.
concat_input(bool): Whether to concatenate original decode input into
the result of consecutive convolution layers.
the result of several consecutive convolution layers.
num_classes(int): Used to determine the dimension of
final prediction tensor.
@ -31,7 +31,7 @@ class SepFCNHead(FCNHead):
"""
def __init__(self, **kwargs):
super(SepFCNHead, self).__init__(**kwargs)
super(DepthwiseSeparableFCNHead, self).__init__(**kwargs)
self.convs[0] = DepthwiseSeparableConvModule(
self.in_channels,
self.channels,