mirror of
https://github.com/open-mmlab/mmsegmentation.git
synced 2025-06-03 22:03:48 +08:00
init commit: fast_scnn
This commit is contained in:
parent
2b801dedfc
commit
6435e3e162
4
.gitignore
vendored
4
.gitignore
vendored
@ -113,6 +113,10 @@ data
|
|||||||
*.pkl.json
|
*.pkl.json
|
||||||
*.log.json
|
*.log.json
|
||||||
work_dirs/
|
work_dirs/
|
||||||
|
workdirs/
|
||||||
|
configs_unify/
|
||||||
|
|
||||||
# Pytorch
|
# Pytorch
|
||||||
*.pth
|
*.pth
|
||||||
|
|
||||||
|
|
||||||
|
55
configs/_base_/models/fast_scnn.py
Normal file
55
configs/_base_/models/fast_scnn.py
Normal file
@ -0,0 +1,55 @@
|
|||||||
|
# model settings
|
||||||
|
norm_cfg = dict(type='SyncBN', requires_grad=True, momentum=0.01)
|
||||||
|
model = dict(
|
||||||
|
type='EncoderDecoder',
|
||||||
|
backbone=dict(
|
||||||
|
type='FastSCNN',
|
||||||
|
downsample_dw_channels1=32,
|
||||||
|
downsample_dw_channels2=48,
|
||||||
|
global_in_channels=64,
|
||||||
|
global_block_channels=(64, 96, 128),
|
||||||
|
global_out_channels=128,
|
||||||
|
higher_in_channels=64,
|
||||||
|
lower_in_channels=128,
|
||||||
|
fusion_out_channels=128,
|
||||||
|
scale_factor=4,
|
||||||
|
out_indices=(0, 1, 2),
|
||||||
|
norm_cfg=norm_cfg,
|
||||||
|
align_corners=False),
|
||||||
|
decode_head=dict(
|
||||||
|
type='SepFCNHead',
|
||||||
|
in_channels=128,
|
||||||
|
channels=128,
|
||||||
|
concat_input=False,
|
||||||
|
num_classes=19,
|
||||||
|
in_index=-1,
|
||||||
|
norm_cfg=norm_cfg,
|
||||||
|
align_corners=False,
|
||||||
|
loss_decode=dict(
|
||||||
|
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.)),
|
||||||
|
auxiliary_head=[
|
||||||
|
dict(
|
||||||
|
type='FCNHead',
|
||||||
|
in_channels=128,
|
||||||
|
channels=32,
|
||||||
|
num_convs=1,
|
||||||
|
num_classes=19,
|
||||||
|
in_index=-2,
|
||||||
|
norm_cfg=norm_cfg,
|
||||||
|
concat_input=False,
|
||||||
|
align_corners=False,
|
||||||
|
loss_decode=dict(
|
||||||
|
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
|
||||||
|
dict(
|
||||||
|
type='FCNHead',
|
||||||
|
in_channels=64,
|
||||||
|
channels=32,
|
||||||
|
num_convs=1,
|
||||||
|
num_classes=19,
|
||||||
|
in_index=-3,
|
||||||
|
norm_cfg=norm_cfg,
|
||||||
|
concat_input=False,
|
||||||
|
align_corners=False,
|
||||||
|
loss_decode=dict(
|
||||||
|
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
|
||||||
|
])
|
61
configs/fastscnn/fast_scnn_4x3_1000e_cityscapes.py
Normal file
61
configs/fastscnn/fast_scnn_4x3_1000e_cityscapes.py
Normal file
@ -0,0 +1,61 @@
|
|||||||
|
_base_ = [
|
||||||
|
'../_base_/models/fast_scnn.py', '../_base_/datasets/cityscapes.py',
|
||||||
|
'../_base_/default_runtime.py'
|
||||||
|
]
|
||||||
|
crop_size = (512, 1024)
|
||||||
|
cudnn_benchmark = True
|
||||||
|
# model training and testing settings
|
||||||
|
train_cfg = dict()
|
||||||
|
test_cfg = dict(mode='whole')
|
||||||
|
|
||||||
|
# Here: What is parameter 'with_seg'?
|
||||||
|
img_norm_cfg = dict(
|
||||||
|
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
|
||||||
|
train_pipeline = [
|
||||||
|
dict(type='LoadImageFromFile', to_float32=True),
|
||||||
|
dict(type='LoadAnnotations'), # with_seg=True
|
||||||
|
dict(type='Resize', img_scale=(2048, 1024), ratio_range=(0.5, 2.0)),
|
||||||
|
dict(type='RandomFlip', flip_ratio=0.5),
|
||||||
|
dict(type='PhotoMetricDistortion'),
|
||||||
|
dict(type='Normalize', **img_norm_cfg),
|
||||||
|
dict(type='RandomCrop', crop_size=crop_size),
|
||||||
|
dict(type='DefaultFormatBundle'),
|
||||||
|
dict(type='Collect', keys=['img', 'gt_semantic_seg']),
|
||||||
|
]
|
||||||
|
test_pipeline = [
|
||||||
|
dict(type='LoadImageFromFile'),
|
||||||
|
dict(
|
||||||
|
type='MultiScaleFlipAug',
|
||||||
|
img_scale=(2048, 1024),
|
||||||
|
# img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
|
||||||
|
flip=False,
|
||||||
|
transforms=[
|
||||||
|
dict(type='Resize', keep_ratio=True),
|
||||||
|
dict(type='RandomFlip'),
|
||||||
|
dict(type='Normalize', **img_norm_cfg),
|
||||||
|
dict(type='ImageToTensor', keys=['img']),
|
||||||
|
dict(type='Collect', keys=['img']),
|
||||||
|
])
|
||||||
|
]
|
||||||
|
data = dict(
|
||||||
|
samples_per_gpu=3,
|
||||||
|
workers_per_gpu=3,
|
||||||
|
train=dict(pipeline=train_pipeline),
|
||||||
|
val=dict(pipeline=test_pipeline),
|
||||||
|
test=dict(pipeline=test_pipeline))
|
||||||
|
|
||||||
|
# optimizer
|
||||||
|
optimizer = dict(type='SGD', lr=0.045, momentum=0.9, weight_decay=4e-5)
|
||||||
|
optimizer_config = dict()
|
||||||
|
# learning policy
|
||||||
|
lr_config = dict(
|
||||||
|
policy='poly',
|
||||||
|
power=0.9,
|
||||||
|
by_epoch=False,
|
||||||
|
)
|
||||||
|
# runtime settings
|
||||||
|
# total_epochs = 1000
|
||||||
|
total_iters = 10000
|
||||||
|
evaluation = dict(interval=100, metric='mIoU')
|
||||||
|
checkpoint_config = dict(interval=100)
|
||||||
|
|
@ -1,5 +1,6 @@
|
|||||||
from .hrnet import HRNet
|
from .hrnet import HRNet
|
||||||
from .resnet import ResNet, ResNetV1c, ResNetV1d
|
from .resnet import ResNet, ResNetV1c, ResNetV1d
|
||||||
from .resnext import ResNeXt
|
from .resnext import ResNeXt
|
||||||
|
from .fast_scnn import FastSCNN
|
||||||
|
|
||||||
__all__ = ['ResNet', 'ResNetV1c', 'ResNetV1d', 'ResNeXt', 'HRNet']
|
__all__ = ['ResNet', 'ResNetV1c', 'ResNetV1d', 'ResNeXt', 'HRNet', 'FastSCNN']
|
||||||
|
248
mmseg/models/backbones/fast_scnn.py
Normal file
248
mmseg/models/backbones/fast_scnn.py
Normal file
@ -0,0 +1,248 @@
|
|||||||
|
import torch
|
||||||
|
import torch.nn as nn
|
||||||
|
from mmcv.cnn import ConvModule, constant_init, kaiming_init
|
||||||
|
from torch.nn.modules.batchnorm import _BatchNorm
|
||||||
|
|
||||||
|
from mmseg.models.backbones.mobile_net_v2 import InvertedResidual
|
||||||
|
from mmseg.models.decode_heads.psp_head import PPM
|
||||||
|
from mmseg.ops import DepthwiseSeparableConvModule, resize
|
||||||
|
from ..builder import BACKBONES
|
||||||
|
|
||||||
|
|
||||||
|
class LearningToDownsample(nn.Module):
|
||||||
|
"""Learning to downsample module"""
|
||||||
|
|
||||||
|
def __init__(self,
|
||||||
|
in_channels,
|
||||||
|
dw_channels1,
|
||||||
|
dw_channels2,
|
||||||
|
out_channels,
|
||||||
|
conv_cfg=None,
|
||||||
|
norm_cfg=dict(type='BN'),
|
||||||
|
act_cfg=dict(type='ReLU')):
|
||||||
|
super(LearningToDownsample, self).__init__()
|
||||||
|
self.conv_cfg = conv_cfg
|
||||||
|
self.norm_cfg = norm_cfg
|
||||||
|
self.act_cfg = act_cfg
|
||||||
|
self.conv = ConvModule(
|
||||||
|
in_channels,
|
||||||
|
dw_channels1,
|
||||||
|
3,
|
||||||
|
stride=2,
|
||||||
|
conv_cfg=self.conv_cfg,
|
||||||
|
norm_cfg=self.norm_cfg,
|
||||||
|
act_cfg=self.act_cfg)
|
||||||
|
self.dsconv1 = DepthwiseSeparableConvModule(
|
||||||
|
dw_channels1,
|
||||||
|
dw_channels2,
|
||||||
|
stride=2,
|
||||||
|
relu_first=False,
|
||||||
|
norm_cfg=self.norm_cfg)
|
||||||
|
self.dsconv2 = DepthwiseSeparableConvModule(
|
||||||
|
dw_channels2,
|
||||||
|
out_channels,
|
||||||
|
stride=2,
|
||||||
|
relu_first=False,
|
||||||
|
norm_cfg=self.norm_cfg)
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
x = self.conv(x)
|
||||||
|
x = self.dsconv1(x)
|
||||||
|
x = self.dsconv2(x)
|
||||||
|
return x
|
||||||
|
|
||||||
|
|
||||||
|
class GlobalFeatureExtractor(nn.Module):
|
||||||
|
"""Global feature extractor module"""
|
||||||
|
|
||||||
|
def __init__(self,
|
||||||
|
in_channels=64,
|
||||||
|
block_channels=(64, 96, 128),
|
||||||
|
out_channels=128,
|
||||||
|
t=6,
|
||||||
|
num_blocks=(3, 3, 3),
|
||||||
|
pool_scales=(1, 2, 3, 6),
|
||||||
|
conv_cfg=None,
|
||||||
|
norm_cfg=dict(type='BN'),
|
||||||
|
act_cfg=dict(type='ReLU'),
|
||||||
|
align_corners=True):
|
||||||
|
super(GlobalFeatureExtractor, self).__init__()
|
||||||
|
self.conv_cfg = conv_cfg
|
||||||
|
self.norm_cfg = norm_cfg
|
||||||
|
self.act_cfg = act_cfg
|
||||||
|
assert len(block_channels) == len(num_blocks) == 3
|
||||||
|
self.bottleneck1 = self._make_layer(in_channels, block_channels[0],
|
||||||
|
num_blocks[0], t, 2)
|
||||||
|
self.bottleneck2 = self._make_layer(block_channels[0],
|
||||||
|
block_channels[1], num_blocks[1],
|
||||||
|
t, 2)
|
||||||
|
self.bottleneck3 = self._make_layer(block_channels[1],
|
||||||
|
block_channels[2], num_blocks[2],
|
||||||
|
t, 1)
|
||||||
|
self.ppm = PPM(
|
||||||
|
pool_scales,
|
||||||
|
block_channels[2],
|
||||||
|
block_channels[2] // 4,
|
||||||
|
conv_cfg=self.conv_cfg,
|
||||||
|
norm_cfg=self.norm_cfg,
|
||||||
|
act_cfg=self.act_cfg,
|
||||||
|
align_corners=align_corners)
|
||||||
|
self.out = ConvModule(
|
||||||
|
block_channels[2] * 2,
|
||||||
|
out_channels,
|
||||||
|
1,
|
||||||
|
conv_cfg=self.conv_cfg,
|
||||||
|
norm_cfg=self.norm_cfg,
|
||||||
|
act_cfg=self.act_cfg)
|
||||||
|
|
||||||
|
def _make_layer(self, inplanes, planes, blocks, t=6, stride=1):
|
||||||
|
layers = []
|
||||||
|
layers.append(
|
||||||
|
InvertedResidual(
|
||||||
|
inplanes, planes, stride, t, norm_cfg=self.norm_cfg))
|
||||||
|
for i in range(1, blocks):
|
||||||
|
layers.append(
|
||||||
|
InvertedResidual(planes, planes, 1, t, norm_cfg=self.norm_cfg))
|
||||||
|
return nn.Sequential(*layers)
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
x = self.bottleneck1(x)
|
||||||
|
x = self.bottleneck2(x)
|
||||||
|
x = self.bottleneck3(x)
|
||||||
|
x = torch.cat([x, *self.ppm(x)], dim=1)
|
||||||
|
x = self.out(x)
|
||||||
|
return x
|
||||||
|
|
||||||
|
|
||||||
|
class FeatureFusionModule(nn.Module):
|
||||||
|
"""Feature fusion module"""
|
||||||
|
|
||||||
|
def __init__(self,
|
||||||
|
higher_in_channels,
|
||||||
|
lower_in_channels,
|
||||||
|
out_channels,
|
||||||
|
scale_factor,
|
||||||
|
conv_cfg=None,
|
||||||
|
norm_cfg=dict(type='BN'),
|
||||||
|
act_cfg=dict(type='ReLU'),
|
||||||
|
align_corners=True):
|
||||||
|
super(FeatureFusionModule, self).__init__()
|
||||||
|
self.scale_factor = scale_factor
|
||||||
|
self.conv_cfg = conv_cfg
|
||||||
|
self.norm_cfg = norm_cfg
|
||||||
|
self.act_cfg = act_cfg
|
||||||
|
self.align_corners = align_corners
|
||||||
|
self.dwconv = ConvModule(
|
||||||
|
lower_in_channels,
|
||||||
|
out_channels,
|
||||||
|
1,
|
||||||
|
conv_cfg=self.conv_cfg,
|
||||||
|
norm_cfg=self.norm_cfg,
|
||||||
|
act_cfg=self.act_cfg)
|
||||||
|
self.conv_lower_res = ConvModule(
|
||||||
|
out_channels,
|
||||||
|
out_channels,
|
||||||
|
1,
|
||||||
|
conv_cfg=self.conv_cfg,
|
||||||
|
norm_cfg=self.norm_cfg,
|
||||||
|
act_cfg=None)
|
||||||
|
self.conv_higher_res = ConvModule(
|
||||||
|
higher_in_channels,
|
||||||
|
out_channels,
|
||||||
|
1,
|
||||||
|
conv_cfg=self.conv_cfg,
|
||||||
|
norm_cfg=self.norm_cfg,
|
||||||
|
act_cfg=None)
|
||||||
|
self.relu = nn.ReLU(True)
|
||||||
|
|
||||||
|
def forward(self, higher_res_feature, lower_res_feature):
|
||||||
|
lower_res_feature = resize(
|
||||||
|
lower_res_feature,
|
||||||
|
scale_factor=self.scale_factor,
|
||||||
|
mode='bilinear',
|
||||||
|
align_corners=self.align_corners)
|
||||||
|
lower_res_feature = self.dwconv(lower_res_feature)
|
||||||
|
lower_res_feature = self.conv_lower_res(lower_res_feature)
|
||||||
|
|
||||||
|
higher_res_feature = self.conv_higher_res(higher_res_feature)
|
||||||
|
out = higher_res_feature + lower_res_feature
|
||||||
|
return self.relu(out)
|
||||||
|
|
||||||
|
|
||||||
|
@BACKBONES.register_module()
|
||||||
|
class FastSCNN(nn.Module):
|
||||||
|
|
||||||
|
def __init__(self,
|
||||||
|
in_channels=3,
|
||||||
|
downsample_dw_channels1=32,
|
||||||
|
downsample_dw_channels2=48,
|
||||||
|
global_in_channels=64,
|
||||||
|
global_block_channels=(64, 96, 128),
|
||||||
|
global_out_channels=128,
|
||||||
|
higher_in_channels=64,
|
||||||
|
lower_in_channels=128,
|
||||||
|
fusion_out_channels=128,
|
||||||
|
scale_factor=4,
|
||||||
|
out_indices=(0, 1, 2),
|
||||||
|
conv_cfg=None,
|
||||||
|
norm_cfg=dict(type='BN'),
|
||||||
|
act_cfg=dict(type='ReLU'),
|
||||||
|
align_corners=False):
|
||||||
|
super(FastSCNN, self).__init__()
|
||||||
|
self.in_channels = in_channels
|
||||||
|
self.downsample_dw_channels1 = downsample_dw_channels1
|
||||||
|
self.downsample_dw_channels2 = downsample_dw_channels2
|
||||||
|
self.global_in_channels = global_in_channels
|
||||||
|
self.global_block_channels = global_block_channels
|
||||||
|
self.global_out_channels = global_out_channels
|
||||||
|
self.higher_in_channels = higher_in_channels
|
||||||
|
self.lower_in_channels = lower_in_channels
|
||||||
|
self.fusion_out_channels = fusion_out_channels
|
||||||
|
self.scale_factor = scale_factor
|
||||||
|
self.out_indices = out_indices
|
||||||
|
self.conv_cfg = conv_cfg
|
||||||
|
self.norm_cfg = norm_cfg
|
||||||
|
self.act_cfg = act_cfg
|
||||||
|
self.align_corners = align_corners
|
||||||
|
self.learning_to_downsample = LearningToDownsample(
|
||||||
|
in_channels,
|
||||||
|
downsample_dw_channels1,
|
||||||
|
downsample_dw_channels2,
|
||||||
|
global_in_channels,
|
||||||
|
conv_cfg=self.conv_cfg,
|
||||||
|
norm_cfg=self.norm_cfg,
|
||||||
|
act_cfg=self.act_cfg)
|
||||||
|
self.global_feature_extractor = GlobalFeatureExtractor(
|
||||||
|
global_in_channels,
|
||||||
|
global_block_channels,
|
||||||
|
global_out_channels,
|
||||||
|
conv_cfg=self.conv_cfg,
|
||||||
|
norm_cfg=self.norm_cfg,
|
||||||
|
act_cfg=self.act_cfg,
|
||||||
|
align_corners=self.align_corners)
|
||||||
|
self.feature_fusion = FeatureFusionModule(
|
||||||
|
higher_in_channels,
|
||||||
|
lower_in_channels,
|
||||||
|
fusion_out_channels,
|
||||||
|
scale_factor=self.scale_factor,
|
||||||
|
conv_cfg=self.conv_cfg,
|
||||||
|
norm_cfg=self.norm_cfg,
|
||||||
|
act_cfg=self.act_cfg,
|
||||||
|
align_corners=self.align_corners)
|
||||||
|
|
||||||
|
def init_weights(self, pretrained=None):
|
||||||
|
for m in self.modules():
|
||||||
|
if isinstance(m, nn.Conv2d):
|
||||||
|
kaiming_init(m)
|
||||||
|
elif isinstance(m, (_BatchNorm, nn.GroupNorm)):
|
||||||
|
constant_init(m, 1)
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
higher_res_features = self.learning_to_downsample(x)
|
||||||
|
lower_res_features = self.global_feature_extractor(higher_res_features)
|
||||||
|
fusion_output = self.feature_fusion(higher_res_features,
|
||||||
|
lower_res_features)
|
||||||
|
|
||||||
|
outs = [higher_res_features, lower_res_features, fusion_output]
|
||||||
|
outs = [outs[i] for i in self.out_indices]
|
||||||
|
return tuple(outs)
|
203
mmseg/models/backbones/mobile_net_v2.py
Normal file
203
mmseg/models/backbones/mobile_net_v2.py
Normal file
@ -0,0 +1,203 @@
|
|||||||
|
from mmcv.cnn import (ConvModule, build_norm_layer, constant_init,
|
||||||
|
kaiming_init, normal_init)
|
||||||
|
from mmcv.runner import load_checkpoint
|
||||||
|
from torch import nn
|
||||||
|
from torch.nn.modules.batchnorm import _BatchNorm
|
||||||
|
|
||||||
|
from mmseg.utils import get_root_logger
|
||||||
|
from ..builder import BACKBONES
|
||||||
|
|
||||||
|
|
||||||
|
class InvertedResidual(nn.Module):
|
||||||
|
|
||||||
|
def __init__(self,
|
||||||
|
inp,
|
||||||
|
oup,
|
||||||
|
stride,
|
||||||
|
expand_ratio,
|
||||||
|
dilation=1,
|
||||||
|
conv_cfg=None,
|
||||||
|
norm_cfg=dict(type='BN'),
|
||||||
|
act_cfg=dict(type='ReLU6')):
|
||||||
|
super(InvertedResidual, self).__init__()
|
||||||
|
self.stride = stride
|
||||||
|
assert stride in [1, 2]
|
||||||
|
|
||||||
|
hidden_dim = int(round(inp * expand_ratio))
|
||||||
|
self.use_res_connect = self.stride == 1 and inp == oup
|
||||||
|
|
||||||
|
layers = []
|
||||||
|
if expand_ratio != 1:
|
||||||
|
# pw
|
||||||
|
layers.append(
|
||||||
|
ConvModule(
|
||||||
|
inp,
|
||||||
|
hidden_dim,
|
||||||
|
kernel_size=1,
|
||||||
|
conv_cfg=conv_cfg,
|
||||||
|
norm_cfg=norm_cfg,
|
||||||
|
act_cfg=act_cfg))
|
||||||
|
layers.extend([
|
||||||
|
# dw
|
||||||
|
ConvModule(
|
||||||
|
hidden_dim,
|
||||||
|
hidden_dim,
|
||||||
|
kernel_size=3,
|
||||||
|
padding=dilation,
|
||||||
|
stride=stride,
|
||||||
|
dilation=dilation,
|
||||||
|
groups=hidden_dim,
|
||||||
|
conv_cfg=conv_cfg,
|
||||||
|
norm_cfg=norm_cfg,
|
||||||
|
act_cfg=act_cfg),
|
||||||
|
# pw-linear
|
||||||
|
nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
|
||||||
|
build_norm_layer(norm_cfg, oup)[1],
|
||||||
|
])
|
||||||
|
self.conv = nn.Sequential(*layers)
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
if self.use_res_connect:
|
||||||
|
return x + self.conv(x)
|
||||||
|
else:
|
||||||
|
return self.conv(x)
|
||||||
|
|
||||||
|
|
||||||
|
@BACKBONES.register_module()
|
||||||
|
class MobileNetV2(nn.Module):
|
||||||
|
arch_settings = (
|
||||||
|
InvertedResidual,
|
||||||
|
[
|
||||||
|
# t, c, n, s
|
||||||
|
[1, 16, 1, 1],
|
||||||
|
[6, 24, 2, 2],
|
||||||
|
[6, 32, 3, 2],
|
||||||
|
[6, 64, 4, 2],
|
||||||
|
[6, 96, 3, 1],
|
||||||
|
[6, 160, 3, 2],
|
||||||
|
[6, 320, 1, 1]
|
||||||
|
])
|
||||||
|
|
||||||
|
def __init__(self,
|
||||||
|
in_channels=3,
|
||||||
|
dilations=(1, 1, 1, 1, 1),
|
||||||
|
out_indices=(0, 1, 2, 3),
|
||||||
|
input_channels=32,
|
||||||
|
width_mult=1.0,
|
||||||
|
round_nearest=8,
|
||||||
|
conv_cfg=None,
|
||||||
|
norm_cfg=dict(type='BN'),
|
||||||
|
act_cfg=dict(type='ReLU6')):
|
||||||
|
"""
|
||||||
|
MobileNet V2 main class
|
||||||
|
Args:
|
||||||
|
width_mult (float): Width multiplier - adjusts number of channels
|
||||||
|
in each layer by this amount
|
||||||
|
round_nearest (int): Round the number of channels in each layer to
|
||||||
|
be a multiple of this number
|
||||||
|
Set to 1 to turn off rounding
|
||||||
|
block: Module specifying inverted residual building block for
|
||||||
|
mobilenet
|
||||||
|
"""
|
||||||
|
super(MobileNetV2, self).__init__()
|
||||||
|
self.in_channels = in_channels
|
||||||
|
self.width_mult = width_mult
|
||||||
|
self.conv_cfg = conv_cfg
|
||||||
|
self.norm_cfg = norm_cfg
|
||||||
|
self.act_cfg = act_cfg
|
||||||
|
|
||||||
|
block, inverted_residual_setting = self.arch_settings
|
||||||
|
self.dilations = dilations
|
||||||
|
self.out_indices = out_indices
|
||||||
|
|
||||||
|
# building first layer
|
||||||
|
input_channels = int(
|
||||||
|
input_channels *
|
||||||
|
self.width_mult) if self.width_mult > 1.0 else input_channels
|
||||||
|
# last_channels = int(1280 * multiplier) if multiplier > 1.0 else 1280
|
||||||
|
self.conv1 = ConvModule(
|
||||||
|
3,
|
||||||
|
input_channels,
|
||||||
|
kernel_size=3,
|
||||||
|
stride=2,
|
||||||
|
padding=1,
|
||||||
|
conv_cfg=self.conv_cfg,
|
||||||
|
norm_cfg=self.norm_cfg,
|
||||||
|
act_cfg=self.act_cfg)
|
||||||
|
|
||||||
|
# building inverted residual blocks
|
||||||
|
self.planes = input_channels
|
||||||
|
self.block1 = self._make_layer(block, self.planes,
|
||||||
|
inverted_residual_setting[0:1],
|
||||||
|
dilations[0])
|
||||||
|
self.block2 = self._make_layer(block, self.planes,
|
||||||
|
inverted_residual_setting[1:2],
|
||||||
|
dilations[1])
|
||||||
|
self.block3 = self._make_layer(block, self.planes,
|
||||||
|
inverted_residual_setting[2:3],
|
||||||
|
dilations[2])
|
||||||
|
self.block4 = self._make_layer(block, self.planes,
|
||||||
|
inverted_residual_setting[3:5],
|
||||||
|
dilations[3])
|
||||||
|
self.block5 = self._make_layer(block, self.planes,
|
||||||
|
inverted_residual_setting[5:],
|
||||||
|
dilations[4])
|
||||||
|
|
||||||
|
def _make_layer(self,
|
||||||
|
block,
|
||||||
|
planes,
|
||||||
|
inverted_residual_setting,
|
||||||
|
dilation=1):
|
||||||
|
features = list()
|
||||||
|
for t, c, n, s in inverted_residual_setting:
|
||||||
|
out_channels = int(c * self.width_mult)
|
||||||
|
stride = s if dilation == 1 else 1
|
||||||
|
features.append(
|
||||||
|
block(
|
||||||
|
planes,
|
||||||
|
out_channels,
|
||||||
|
stride,
|
||||||
|
t,
|
||||||
|
dilation,
|
||||||
|
conv_cfg=self.conv_cfg,
|
||||||
|
norm_cfg=self.norm_cfg,
|
||||||
|
act_cfg=self.act_cfg))
|
||||||
|
planes = out_channels
|
||||||
|
for i in range(n - 1):
|
||||||
|
features.append(
|
||||||
|
block(
|
||||||
|
planes,
|
||||||
|
out_channels,
|
||||||
|
1,
|
||||||
|
t,
|
||||||
|
conv_cfg=self.conv_cfg,
|
||||||
|
norm_cfg=self.norm_cfg,
|
||||||
|
act_cfg=self.act_cfg))
|
||||||
|
planes = out_channels
|
||||||
|
self.planes = planes
|
||||||
|
return nn.Sequential(*features)
|
||||||
|
|
||||||
|
def init_weights(self, pretrained=None):
|
||||||
|
if isinstance(pretrained, str):
|
||||||
|
logger = get_root_logger()
|
||||||
|
load_checkpoint(self, pretrained, strict=False, logger=logger)
|
||||||
|
else:
|
||||||
|
for m in self.modules():
|
||||||
|
if isinstance(m, nn.Conv2d):
|
||||||
|
kaiming_init(m, mode='fan_out')
|
||||||
|
elif isinstance(m, _BatchNorm):
|
||||||
|
constant_init(m, 1)
|
||||||
|
elif isinstance(m, nn.Linear):
|
||||||
|
normal_init(m, 0, 0.01)
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
x = self.conv1(x)
|
||||||
|
x = self.block1(x)
|
||||||
|
c1 = self.block2(x)
|
||||||
|
c2 = self.block3(c1)
|
||||||
|
c3 = self.block4(c2)
|
||||||
|
c4 = self.block5(c3)
|
||||||
|
|
||||||
|
outs = [c1, c2, c3, c4]
|
||||||
|
outs = [outs[i] for i in self.out_indices]
|
||||||
|
return tuple(outs)
|
@ -11,9 +11,10 @@ from .psa_head import PSAHead
|
|||||||
from .psp_head import PSPHead
|
from .psp_head import PSPHead
|
||||||
from .sep_aspp_head import DepthwiseSeparableASPPHead
|
from .sep_aspp_head import DepthwiseSeparableASPPHead
|
||||||
from .uper_head import UPerHead
|
from .uper_head import UPerHead
|
||||||
|
from .sep_fcn_head import SepFCNHead
|
||||||
|
|
||||||
__all__ = [
|
__all__ = [
|
||||||
'FCNHead', 'PSPHead', 'ASPPHead', 'PSAHead', 'NLHead', 'GCHead', 'CCHead',
|
'FCNHead', 'PSPHead', 'ASPPHead', 'PSAHead', 'NLHead', 'GCHead', 'CCHead',
|
||||||
'UPerHead', 'DepthwiseSeparableASPPHead', 'ANNHead', 'DAHead', 'OCRHead',
|
'UPerHead', 'DepthwiseSeparableASPPHead', 'ANNHead', 'DAHead', 'OCRHead',
|
||||||
'EncHead'
|
'EncHead', 'SepFCNHead'
|
||||||
]
|
]
|
||||||
|
29
mmseg/models/decode_heads/sep_fcn_head.py
Normal file
29
mmseg/models/decode_heads/sep_fcn_head.py
Normal file
@ -0,0 +1,29 @@
|
|||||||
|
from mmseg.ops import DepthwiseSeparableConvModule
|
||||||
|
from ..builder import HEADS
|
||||||
|
from .fcn_head import FCNHead
|
||||||
|
|
||||||
|
|
||||||
|
@HEADS.register_module()
|
||||||
|
class SepFCNHead(FCNHead):
|
||||||
|
|
||||||
|
def __init__(self, **kwargs):
|
||||||
|
super(SepFCNHead, self).__init__(**kwargs)
|
||||||
|
self.convs[0] = DepthwiseSeparableConvModule(
|
||||||
|
self.in_channels,
|
||||||
|
self.channels,
|
||||||
|
norm_cfg=self.norm_cfg,
|
||||||
|
relu_first=False)
|
||||||
|
for i in range(1, self.num_convs):
|
||||||
|
self.convs[i] = DepthwiseSeparableConvModule(
|
||||||
|
self.channels,
|
||||||
|
self.channels,
|
||||||
|
norm_cfg=self.norm_cfg,
|
||||||
|
relu_first=False)
|
||||||
|
|
||||||
|
if self.concat_input:
|
||||||
|
self.conv_cat = DepthwiseSeparableConvModule(
|
||||||
|
self.in_channels + self.channels,
|
||||||
|
self.channels,
|
||||||
|
self.channels,
|
||||||
|
norm_cfg=self.norm_cfg,
|
||||||
|
relu_first=False)
|
@ -1,88 +1,60 @@
|
|||||||
import torch.nn as nn
|
from mmcv.cnn import build_norm_layer
|
||||||
from mmcv.cnn import ConvModule
|
from torch import nn
|
||||||
|
|
||||||
|
|
||||||
class DepthwiseSeparableConvModule(nn.Module):
|
class DepthwiseSeparableConvModule(nn.Module):
|
||||||
"""Depthwise separable convolution module.
|
|
||||||
|
|
||||||
See https://arxiv.org/pdf/1704.04861.pdf for details.
|
|
||||||
|
|
||||||
This module can replace a ConvModule with the conv block replaced by two
|
|
||||||
conv block: depthwise conv block and pointwise conv block. The depthwise
|
|
||||||
conv block contains depthwise-conv/norm/activation layers. The pointwise
|
|
||||||
conv block contains pointwise-conv/norm/activation layers. It should be
|
|
||||||
noted that there will be norm/activation layer in the depthwise conv block
|
|
||||||
if `norm_cfg` and `act_cfg` are specified.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
in_channels (int): Same as nn.Conv2d.
|
|
||||||
out_channels (int): Same as nn.Conv2d.
|
|
||||||
kernel_size (int or tuple[int]): Same as nn.Conv2d.
|
|
||||||
stride (int or tuple[int]): Same as nn.Conv2d. Default: 1.
|
|
||||||
padding (int or tuple[int]): Same as nn.Conv2d. Default: 0.
|
|
||||||
dilation (int or tuple[int]): Same as nn.Conv2d. Default: 1.
|
|
||||||
norm_cfg (dict): Default norm config for both depthwise ConvModule and
|
|
||||||
pointwise ConvModule. Default: None.
|
|
||||||
act_cfg (dict): Default activation config for both depthwise ConvModule
|
|
||||||
and pointwise ConvModule. Default: dict(type='ReLU').
|
|
||||||
dw_norm_cfg (dict): Norm config of depthwise ConvModule. If it is
|
|
||||||
'default', it will be the same as `norm_cfg`. Default: 'default'.
|
|
||||||
dw_act_cfg (dict): Activation config of depthwise ConvModule. If it is
|
|
||||||
'default', it will be the same as `act_cfg`. Default: 'default'.
|
|
||||||
pw_norm_cfg (dict): Norm config of pointwise ConvModule. If it is
|
|
||||||
'default', it will be the same as `norm_cfg`. Default: 'default'.
|
|
||||||
pw_act_cfg (dict): Activation config of pointwise ConvModule. If it is
|
|
||||||
'default', it will be the same as `act_cfg`. Default: 'default'.
|
|
||||||
kwargs (optional): Other shared arguments for depthwise and pointwise
|
|
||||||
ConvModule. See ConvModule for ref.
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self,
|
def __init__(self,
|
||||||
in_channels,
|
in_channels,
|
||||||
out_channels,
|
out_channels,
|
||||||
kernel_size,
|
kernel_size=3,
|
||||||
stride=1,
|
stride=1,
|
||||||
padding=0,
|
|
||||||
dilation=1,
|
dilation=1,
|
||||||
norm_cfg=None,
|
relu_first=True,
|
||||||
act_cfg=dict(type='ReLU'),
|
bias=False,
|
||||||
dw_norm_cfg='default',
|
norm_cfg=dict(type='BN')):
|
||||||
dw_act_cfg='default',
|
|
||||||
pw_norm_cfg='default',
|
|
||||||
pw_act_cfg='default',
|
|
||||||
**kwargs):
|
|
||||||
super(DepthwiseSeparableConvModule, self).__init__()
|
super(DepthwiseSeparableConvModule, self).__init__()
|
||||||
assert 'groups' not in kwargs, 'groups should not be specified'
|
self.depthwise = nn.Conv2d(
|
||||||
|
|
||||||
# if norm/activation config of depthwise/pointwise ConvModule is not
|
|
||||||
# specified, use default config.
|
|
||||||
dw_norm_cfg = dw_norm_cfg if dw_norm_cfg != 'default' else norm_cfg
|
|
||||||
dw_act_cfg = dw_act_cfg if dw_act_cfg != 'default' else act_cfg
|
|
||||||
pw_norm_cfg = pw_norm_cfg if pw_norm_cfg != 'default' else norm_cfg
|
|
||||||
pw_act_cfg = pw_act_cfg if pw_act_cfg != 'default' else act_cfg
|
|
||||||
|
|
||||||
# depthwise convolution
|
|
||||||
self.depthwise_conv = ConvModule(
|
|
||||||
in_channels,
|
in_channels,
|
||||||
in_channels,
|
in_channels,
|
||||||
kernel_size,
|
kernel_size,
|
||||||
stride=stride,
|
stride=stride,
|
||||||
padding=padding,
|
padding=dilation,
|
||||||
dilation=dilation,
|
dilation=dilation,
|
||||||
groups=in_channels,
|
groups=in_channels,
|
||||||
norm_cfg=dw_norm_cfg,
|
bias=bias)
|
||||||
act_cfg=dw_act_cfg,
|
self.norm_depth_name, norm_depth = build_norm_layer(
|
||||||
**kwargs)
|
norm_cfg, in_channels, postfix='_depth')
|
||||||
|
self.add_module(self.norm_depth_name, norm_depth)
|
||||||
|
|
||||||
self.pointwise_conv = ConvModule(
|
self.pointwise = nn.Conv2d(in_channels, out_channels, 1, bias=bias)
|
||||||
in_channels,
|
self.norm_point_name, norm_point = build_norm_layer(
|
||||||
out_channels,
|
norm_cfg, out_channels, postfix='_point')
|
||||||
1,
|
self.add_module(self.norm_point_name, norm_point)
|
||||||
norm_cfg=pw_norm_cfg,
|
|
||||||
act_cfg=pw_act_cfg,
|
self.relu_first = relu_first
|
||||||
**kwargs)
|
self.relu = nn.ReLU(inplace=not relu_first)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def norm_depth(self):
|
||||||
|
return getattr(self, self.norm_depth_name)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def norm_point(self):
|
||||||
|
return getattr(self, self.norm_point_name)
|
||||||
|
|
||||||
def forward(self, x):
|
def forward(self, x):
|
||||||
x = self.depthwise_conv(x)
|
if self.relu_first:
|
||||||
x = self.pointwise_conv(x)
|
out = self.relu(x)
|
||||||
return x
|
out = self.depthwise(out)
|
||||||
|
out = self.norm_depth(out)
|
||||||
|
out = self.pointwise(out)
|
||||||
|
out = self.norm_point(out)
|
||||||
|
else:
|
||||||
|
out = self.depthwise(x)
|
||||||
|
out = self.norm_depth(out)
|
||||||
|
out = self.relu(out)
|
||||||
|
out = self.pointwise(out)
|
||||||
|
out = self.norm_point(out)
|
||||||
|
out = self.relu(out)
|
||||||
|
return out
|
||||||
|
Loading…
x
Reference in New Issue
Block a user