mirror of
https://github.com/open-mmlab/mmsegmentation.git
synced 2025-06-03 22:03:48 +08:00
[Enhance] Refactor inverted residual (#164)
* [Enhance] Unifed InvertedResidual in MobileNetV2 and FastSCNN * [Enhance] Unifed InvertedResidual in MobileNetV2 and FastSCNN
This commit is contained in:
parent
924571eced
commit
f86c24d806
@ -15,4 +15,4 @@
|
|||||||
### Cityscapes
|
### Cityscapes
|
||||||
| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | download |
|
| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | download |
|
||||||
|------------|-----------|-----------|--------:|----------|----------------|------:|---------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
|------------|-----------|-----------|--------:|----------|----------------|------:|---------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
||||||
| Fast-SCNN | Fast-SCNN | 512x1024 | 80000 | 8.4 | 63.61 | 69.06 | - | [model](https://openmmlab.oss-cn-hangzhou.aliyuncs.com/mmsegmentation/v0.5/fast_scnn/fast_scnn_4x8_80k_lr0.12_cityscapes-cae6c46a.pth) | [log](https://openmmlab.oss-cn-hangzhou.aliyuncs.com/mmsegmentation/v0.5/fast_scnn/fast_scnn_4x8_80k_lr0.12_cityscapes-20200807_165744.log.json) |
|
| Fast-SCNN | Fast-SCNN | 512x1024 | 80000 | 8.4 | 63.61 | 69.06 | - | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fast_scnn/fast_scnn_4x8_80k_lr0.12_cityscapes-f5096c79.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/fast_scnn/fast_scnn_4x8_80k_lr0.12_cityscapes-20200807_165744.log.json) |
|
||||||
|
@ -1,70 +0,0 @@
|
|||||||
_base_ = [
|
|
||||||
'../_base_/models/fast_scnn.py', '../_base_/datasets/pascal_voc12.py',
|
|
||||||
'../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py'
|
|
||||||
]
|
|
||||||
|
|
||||||
# Re-config the data sampler.
|
|
||||||
data = dict(samples_per_gpu=8, workers_per_gpu=4)
|
|
||||||
|
|
||||||
# Re-config the optimizer.
|
|
||||||
optimizer = dict(type='SGD', lr=0.12, momentum=0.9, weight_decay=4e-5)
|
|
||||||
|
|
||||||
# update num_classes of the segmentor.
|
|
||||||
# model settings
|
|
||||||
norm_cfg = dict(type='SyncBN', requires_grad=True, momentum=0.01)
|
|
||||||
model = dict(
|
|
||||||
type='EncoderDecoder',
|
|
||||||
backbone=dict(
|
|
||||||
type='FastSCNN',
|
|
||||||
downsample_dw_channels=(32, 48),
|
|
||||||
global_in_channels=64,
|
|
||||||
global_block_channels=(64, 96, 128),
|
|
||||||
global_block_strides=(2, 2, 1),
|
|
||||||
global_out_channels=128,
|
|
||||||
higher_in_channels=64,
|
|
||||||
lower_in_channels=128,
|
|
||||||
fusion_out_channels=128,
|
|
||||||
out_indices=(0, 1, 2),
|
|
||||||
norm_cfg=norm_cfg,
|
|
||||||
align_corners=False),
|
|
||||||
decode_head=dict(
|
|
||||||
type='DepthwiseSeparableFCNHead',
|
|
||||||
in_channels=128,
|
|
||||||
channels=128,
|
|
||||||
concat_input=False,
|
|
||||||
num_classes=21,
|
|
||||||
in_index=-1,
|
|
||||||
norm_cfg=norm_cfg,
|
|
||||||
align_corners=False,
|
|
||||||
loss_decode=dict(
|
|
||||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.)),
|
|
||||||
auxiliary_head=[
|
|
||||||
dict(
|
|
||||||
type='FCNHead',
|
|
||||||
in_channels=128,
|
|
||||||
channels=32,
|
|
||||||
num_convs=1,
|
|
||||||
num_classes=21,
|
|
||||||
in_index=-2,
|
|
||||||
norm_cfg=norm_cfg,
|
|
||||||
concat_input=False,
|
|
||||||
align_corners=False,
|
|
||||||
loss_decode=dict(
|
|
||||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
|
|
||||||
dict(
|
|
||||||
type='FCNHead',
|
|
||||||
in_channels=64,
|
|
||||||
channels=32,
|
|
||||||
num_convs=1,
|
|
||||||
num_classes=21,
|
|
||||||
in_index=-3,
|
|
||||||
norm_cfg=norm_cfg,
|
|
||||||
concat_input=False,
|
|
||||||
align_corners=False,
|
|
||||||
loss_decode=dict(
|
|
||||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
|
|
||||||
])
|
|
||||||
|
|
||||||
# model training and testing settings
|
|
||||||
train_cfg = dict()
|
|
||||||
test_cfg = dict(mode='whole')
|
|
@ -6,8 +6,8 @@ from torch.nn.modules.batchnorm import _BatchNorm
|
|||||||
|
|
||||||
from mmseg.models.decode_heads.psp_head import PPM
|
from mmseg.models.decode_heads.psp_head import PPM
|
||||||
from mmseg.ops import resize
|
from mmseg.ops import resize
|
||||||
from mmseg.utils import InvertedResidual
|
|
||||||
from ..builder import BACKBONES
|
from ..builder import BACKBONES
|
||||||
|
from ..utils.inverted_residual import InvertedResidual
|
||||||
|
|
||||||
|
|
||||||
class LearningToDownsample(nn.Module):
|
class LearningToDownsample(nn.Module):
|
||||||
|
@ -1,102 +1,12 @@
|
|||||||
import logging
|
import logging
|
||||||
|
|
||||||
import torch.nn as nn
|
import torch.nn as nn
|
||||||
import torch.utils.checkpoint as cp
|
|
||||||
from mmcv.cnn import ConvModule, constant_init, kaiming_init
|
from mmcv.cnn import ConvModule, constant_init, kaiming_init
|
||||||
from mmcv.runner import load_checkpoint
|
from mmcv.runner import load_checkpoint
|
||||||
from torch.nn.modules.batchnorm import _BatchNorm
|
from torch.nn.modules.batchnorm import _BatchNorm
|
||||||
|
|
||||||
from ..builder import BACKBONES
|
from ..builder import BACKBONES
|
||||||
from ..utils import make_divisible
|
from ..utils import InvertedResidual, make_divisible
|
||||||
|
|
||||||
|
|
||||||
class InvertedResidual(nn.Module):
|
|
||||||
"""InvertedResidual block for MobileNetV2.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
in_channels (int): The input channels of the InvertedResidual block.
|
|
||||||
out_channels (int): The output channels of the InvertedResidual block.
|
|
||||||
stride (int): Stride of the middle (first) 3x3 convolution.
|
|
||||||
expand_ratio (int): Adjusts number of channels of the hidden layer
|
|
||||||
in InvertedResidual by this amount.
|
|
||||||
dilation (int): Dilation rate of depthwise conv. Default: 1
|
|
||||||
conv_cfg (dict): Config dict for convolution layer.
|
|
||||||
Default: None, which means using conv2d.
|
|
||||||
norm_cfg (dict): Config dict for normalization layer.
|
|
||||||
Default: dict(type='BN').
|
|
||||||
act_cfg (dict): Config dict for activation layer.
|
|
||||||
Default: dict(type='ReLU6').
|
|
||||||
with_cp (bool): Use checkpoint or not. Using checkpoint will save some
|
|
||||||
memory while slowing down the training speed. Default: False.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Tensor: The output tensor
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self,
|
|
||||||
in_channels,
|
|
||||||
out_channels,
|
|
||||||
stride,
|
|
||||||
expand_ratio,
|
|
||||||
dilation=1,
|
|
||||||
conv_cfg=None,
|
|
||||||
norm_cfg=dict(type='BN'),
|
|
||||||
act_cfg=dict(type='ReLU6'),
|
|
||||||
with_cp=False):
|
|
||||||
super(InvertedResidual, self).__init__()
|
|
||||||
self.stride = stride
|
|
||||||
assert stride in [1, 2], f'stride must in [1, 2]. ' \
|
|
||||||
f'But received {stride}.'
|
|
||||||
self.with_cp = with_cp
|
|
||||||
self.use_res_connect = self.stride == 1 and in_channels == out_channels
|
|
||||||
hidden_dim = int(round(in_channels * expand_ratio))
|
|
||||||
|
|
||||||
layers = []
|
|
||||||
if expand_ratio != 1:
|
|
||||||
layers.append(
|
|
||||||
ConvModule(
|
|
||||||
in_channels=in_channels,
|
|
||||||
out_channels=hidden_dim,
|
|
||||||
kernel_size=1,
|
|
||||||
conv_cfg=conv_cfg,
|
|
||||||
norm_cfg=norm_cfg,
|
|
||||||
act_cfg=act_cfg))
|
|
||||||
layers.extend([
|
|
||||||
ConvModule(
|
|
||||||
in_channels=hidden_dim,
|
|
||||||
out_channels=hidden_dim,
|
|
||||||
kernel_size=3,
|
|
||||||
stride=stride,
|
|
||||||
padding=dilation,
|
|
||||||
dilation=dilation,
|
|
||||||
groups=hidden_dim,
|
|
||||||
conv_cfg=conv_cfg,
|
|
||||||
norm_cfg=norm_cfg,
|
|
||||||
act_cfg=act_cfg),
|
|
||||||
ConvModule(
|
|
||||||
in_channels=hidden_dim,
|
|
||||||
out_channels=out_channels,
|
|
||||||
kernel_size=1,
|
|
||||||
conv_cfg=conv_cfg,
|
|
||||||
norm_cfg=norm_cfg,
|
|
||||||
act_cfg=None)
|
|
||||||
])
|
|
||||||
self.conv = nn.Sequential(*layers)
|
|
||||||
|
|
||||||
def forward(self, x):
|
|
||||||
|
|
||||||
def _inner_forward(x):
|
|
||||||
if self.use_res_connect:
|
|
||||||
return x + self.conv(x)
|
|
||||||
else:
|
|
||||||
return self.conv(x)
|
|
||||||
|
|
||||||
if self.with_cp and x.requires_grad:
|
|
||||||
out = cp.checkpoint(_inner_forward, x)
|
|
||||||
else:
|
|
||||||
out = _inner_forward(x)
|
|
||||||
|
|
||||||
return out
|
|
||||||
|
|
||||||
|
|
||||||
@BACKBONES.register_module()
|
@BACKBONES.register_module()
|
||||||
|
@ -1,5 +1,8 @@
|
|||||||
|
from .inverted_residual import InvertedResidual
|
||||||
from .make_divisible import make_divisible
|
from .make_divisible import make_divisible
|
||||||
from .res_layer import ResLayer
|
from .res_layer import ResLayer
|
||||||
from .self_attention_block import SelfAttentionBlock
|
from .self_attention_block import SelfAttentionBlock
|
||||||
|
|
||||||
__all__ = ['ResLayer', 'SelfAttentionBlock', 'make_divisible']
|
__all__ = [
|
||||||
|
'ResLayer', 'SelfAttentionBlock', 'make_divisible', 'InvertedResidual'
|
||||||
|
]
|
||||||
|
@ -1,22 +1,29 @@
|
|||||||
from mmcv.cnn import ConvModule, build_norm_layer
|
from mmcv.cnn import ConvModule
|
||||||
from torch import nn
|
from torch import nn as nn
|
||||||
|
from torch.utils import checkpoint as cp
|
||||||
|
|
||||||
|
|
||||||
class InvertedResidual(nn.Module):
|
class InvertedResidual(nn.Module):
|
||||||
"""Inverted residual module.
|
"""InvertedResidual block for MobileNetV2.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
in_channels (int): The input channels of the InvertedResidual block.
|
in_channels (int): The input channels of the InvertedResidual block.
|
||||||
out_channels (int): The output channels of the InvertedResidual block.
|
out_channels (int): The output channels of the InvertedResidual block.
|
||||||
stride (int): Stride of the middle (first) 3x3 convolution.
|
stride (int): Stride of the middle (first) 3x3 convolution.
|
||||||
expand_ratio (int): adjusts number of channels of the hidden layer
|
expand_ratio (int): Adjusts number of channels of the hidden layer
|
||||||
in InvertedResidual by this amount.
|
in InvertedResidual by this amount.
|
||||||
|
dilation (int): Dilation rate of depthwise conv. Default: 1
|
||||||
conv_cfg (dict): Config dict for convolution layer.
|
conv_cfg (dict): Config dict for convolution layer.
|
||||||
Default: None, which means using conv2d.
|
Default: None, which means using conv2d.
|
||||||
norm_cfg (dict): Config dict for normalization layer.
|
norm_cfg (dict): Config dict for normalization layer.
|
||||||
Default: dict(type='BN').
|
Default: dict(type='BN').
|
||||||
act_cfg (dict): Config dict for activation layer.
|
act_cfg (dict): Config dict for activation layer.
|
||||||
Default: dict(type='ReLU6').
|
Default: dict(type='ReLU6').
|
||||||
|
with_cp (bool): Use checkpoint or not. Using checkpoint will save some
|
||||||
|
memory while slowing down the training speed. Default: False.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Tensor: The output tensor
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self,
|
def __init__(self,
|
||||||
@ -27,47 +34,59 @@ class InvertedResidual(nn.Module):
|
|||||||
dilation=1,
|
dilation=1,
|
||||||
conv_cfg=None,
|
conv_cfg=None,
|
||||||
norm_cfg=dict(type='BN'),
|
norm_cfg=dict(type='BN'),
|
||||||
act_cfg=dict(type='ReLU6')):
|
act_cfg=dict(type='ReLU6'),
|
||||||
|
with_cp=False):
|
||||||
super(InvertedResidual, self).__init__()
|
super(InvertedResidual, self).__init__()
|
||||||
self.stride = stride
|
self.stride = stride
|
||||||
assert stride in [1, 2]
|
assert stride in [1, 2], f'stride must in [1, 2]. ' \
|
||||||
|
f'But received {stride}.'
|
||||||
|
self.with_cp = with_cp
|
||||||
|
self.use_res_connect = self.stride == 1 and in_channels == out_channels
|
||||||
hidden_dim = int(round(in_channels * expand_ratio))
|
hidden_dim = int(round(in_channels * expand_ratio))
|
||||||
self.use_res_connect = self.stride == 1 \
|
|
||||||
and in_channels == out_channels
|
|
||||||
|
|
||||||
layers = []
|
layers = []
|
||||||
if expand_ratio != 1:
|
if expand_ratio != 1:
|
||||||
# pw
|
|
||||||
layers.append(
|
layers.append(
|
||||||
ConvModule(
|
ConvModule(
|
||||||
in_channels,
|
in_channels=in_channels,
|
||||||
hidden_dim,
|
out_channels=hidden_dim,
|
||||||
kernel_size=1,
|
kernel_size=1,
|
||||||
conv_cfg=conv_cfg,
|
conv_cfg=conv_cfg,
|
||||||
norm_cfg=norm_cfg,
|
norm_cfg=norm_cfg,
|
||||||
act_cfg=act_cfg))
|
act_cfg=act_cfg))
|
||||||
layers.extend([
|
layers.extend([
|
||||||
# dw
|
|
||||||
ConvModule(
|
ConvModule(
|
||||||
hidden_dim,
|
in_channels=hidden_dim,
|
||||||
hidden_dim,
|
out_channels=hidden_dim,
|
||||||
kernel_size=3,
|
kernel_size=3,
|
||||||
padding=dilation,
|
|
||||||
stride=stride,
|
stride=stride,
|
||||||
|
padding=dilation,
|
||||||
dilation=dilation,
|
dilation=dilation,
|
||||||
groups=hidden_dim,
|
groups=hidden_dim,
|
||||||
conv_cfg=conv_cfg,
|
conv_cfg=conv_cfg,
|
||||||
norm_cfg=norm_cfg,
|
norm_cfg=norm_cfg,
|
||||||
act_cfg=act_cfg),
|
act_cfg=act_cfg),
|
||||||
# pw-linear
|
ConvModule(
|
||||||
nn.Conv2d(hidden_dim, out_channels, 1, 1, 0, bias=False),
|
in_channels=hidden_dim,
|
||||||
build_norm_layer(norm_cfg, out_channels)[1],
|
out_channels=out_channels,
|
||||||
|
kernel_size=1,
|
||||||
|
conv_cfg=conv_cfg,
|
||||||
|
norm_cfg=norm_cfg,
|
||||||
|
act_cfg=None)
|
||||||
])
|
])
|
||||||
self.conv = nn.Sequential(*layers)
|
self.conv = nn.Sequential(*layers)
|
||||||
|
|
||||||
def forward(self, x):
|
def forward(self, x):
|
||||||
|
|
||||||
|
def _inner_forward(x):
|
||||||
if self.use_res_connect:
|
if self.use_res_connect:
|
||||||
return x + self.conv(x)
|
return x + self.conv(x)
|
||||||
else:
|
else:
|
||||||
return self.conv(x)
|
return self.conv(x)
|
||||||
|
|
||||||
|
if self.with_cp and x.requires_grad:
|
||||||
|
out = cp.checkpoint(_inner_forward, x)
|
||||||
|
else:
|
||||||
|
out = _inner_forward(x)
|
||||||
|
|
||||||
|
return out
|
@ -1,5 +1,4 @@
|
|||||||
from .collect_env import collect_env
|
from .collect_env import collect_env
|
||||||
from .inverted_residual_module import InvertedResidual
|
|
||||||
from .logger import get_root_logger
|
from .logger import get_root_logger
|
||||||
|
|
||||||
__all__ = ['get_root_logger', 'collect_env', 'InvertedResidual']
|
__all__ = ['get_root_logger', 'collect_env']
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
import pytest
|
import pytest
|
||||||
import torch
|
import torch
|
||||||
|
|
||||||
from mmseg.utils import InvertedResidual
|
from mmseg.models.utils import InvertedResidual
|
||||||
|
|
||||||
|
|
||||||
def test_inv_residual():
|
def test_inv_residual():
|
||||||
|
Loading…
x
Reference in New Issue
Block a user