mirror of
https://github.com/open-mmlab/mmsegmentation.git
synced 2025-06-03 22:03:48 +08:00
relevant files modified according to Jerry's instructions
This commit is contained in:
parent
d8cba3d6a9
commit
e1986a5e5e
@ -17,7 +17,7 @@ model = dict(
|
|||||||
norm_cfg=norm_cfg,
|
norm_cfg=norm_cfg,
|
||||||
align_corners=False),
|
align_corners=False),
|
||||||
decode_head=dict(
|
decode_head=dict(
|
||||||
type='SepFCNHead',
|
type='DepthwiseSeparableFCNHead',
|
||||||
in_channels=128,
|
in_channels=128,
|
||||||
channels=128,
|
channels=128,
|
||||||
concat_input=False,
|
concat_input=False,
|
||||||
@ -53,3 +53,7 @@ model = dict(
|
|||||||
loss_decode=dict(
|
loss_decode=dict(
|
||||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
|
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
|
||||||
])
|
])
|
||||||
|
|
||||||
|
# model training and testing settings
|
||||||
|
train_cfg = dict()
|
||||||
|
test_cfg = dict(mode='whole')
|
||||||
|
@ -1,19 +1,16 @@
|
|||||||
_base_ = [
|
_base_ = [
|
||||||
'../_base_/models/fast_scnn.py', '../_base_/datasets/cityscapes.py',
|
'../_base_/models/fast_scnn.py', '../_base_/datasets/cityscapes.py',
|
||||||
'../_base_/default_runtime.py'
|
'../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py'
|
||||||
]
|
]
|
||||||
crop_size = (512, 1024)
|
crop_size = (512, 1024)
|
||||||
cudnn_benchmark = True
|
cudnn_benchmark = True
|
||||||
# model training and testing settings
|
|
||||||
train_cfg = dict()
|
|
||||||
test_cfg = dict(mode='whole')
|
|
||||||
|
|
||||||
# Here: What is parameter 'with_seg'?
|
# dataset settings
|
||||||
img_norm_cfg = dict(
|
img_norm_cfg = dict(
|
||||||
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
|
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
|
||||||
train_pipeline = [
|
train_pipeline = [
|
||||||
dict(type='LoadImageFromFile', to_float32=True),
|
dict(type='LoadImageFromFile', to_float32=True),
|
||||||
dict(type='LoadAnnotations'), # with_seg=True
|
dict(type='LoadAnnotations'),
|
||||||
dict(type='Resize', img_scale=(2048, 1024), ratio_range=(0.5, 2.0)),
|
dict(type='Resize', img_scale=(2048, 1024), ratio_range=(0.5, 2.0)),
|
||||||
dict(type='RandomFlip', flip_ratio=0.5),
|
dict(type='RandomFlip', flip_ratio=0.5),
|
||||||
dict(type='PhotoMetricDistortion'),
|
dict(type='PhotoMetricDistortion'),
|
||||||
@ -37,6 +34,8 @@ test_pipeline = [
|
|||||||
dict(type='Collect', keys=['img']),
|
dict(type='Collect', keys=['img']),
|
||||||
])
|
])
|
||||||
]
|
]
|
||||||
|
|
||||||
|
# Re-config the data sampler.
|
||||||
data = dict(
|
data = dict(
|
||||||
samples_per_gpu=8,
|
samples_per_gpu=8,
|
||||||
workers_per_gpu=4,
|
workers_per_gpu=4,
|
||||||
@ -44,21 +43,5 @@ data = dict(
|
|||||||
val=dict(pipeline=test_pipeline),
|
val=dict(pipeline=test_pipeline),
|
||||||
test=dict(pipeline=test_pipeline))
|
test=dict(pipeline=test_pipeline))
|
||||||
|
|
||||||
# optimizer
|
# Re-config the optimizer.
|
||||||
optimizer = dict(type='SGD', lr=0.12, momentum=0.9, weight_decay=4e-5)
|
optimizer = dict(type='SGD', lr=0.12, momentum=0.9, weight_decay=4e-5)
|
||||||
optimizer_config = dict()
|
|
||||||
# learning policy
|
|
||||||
lr_config = dict(
|
|
||||||
policy='poly',
|
|
||||||
power=0.9,
|
|
||||||
by_epoch=False,
|
|
||||||
)
|
|
||||||
# runtime settings
|
|
||||||
# total_epochs = 1000
|
|
||||||
total_iters = 80000
|
|
||||||
evaluation = dict(interval=2000, metric='mIoU')
|
|
||||||
checkpoint_config = dict(interval=2000)
|
|
||||||
|
|
||||||
# log config: log by iter.
|
|
||||||
log_config = dict(interval=50, hooks=[dict(type='TextLoggerHook', by_epoch=False)])
|
|
||||||
|
|
||||||
|
@ -228,23 +228,6 @@ class FeatureFusionModule(nn.Module):
|
|||||||
|
|
||||||
@BACKBONES.register_module()
|
@BACKBONES.register_module()
|
||||||
class FastSCNN(nn.Module):
|
class FastSCNN(nn.Module):
|
||||||
|
|
||||||
def __init__(self,
|
|
||||||
in_channels=3,
|
|
||||||
downsample_dw_channels1=32,
|
|
||||||
downsample_dw_channels2=48,
|
|
||||||
global_in_channels=64,
|
|
||||||
global_block_channels=(64, 96, 128),
|
|
||||||
global_out_channels=128,
|
|
||||||
higher_in_channels=64,
|
|
||||||
lower_in_channels=128,
|
|
||||||
fusion_out_channels=128,
|
|
||||||
scale_factor=4,
|
|
||||||
out_indices=(0, 1, 2),
|
|
||||||
conv_cfg=None,
|
|
||||||
norm_cfg=dict(type='BN'),
|
|
||||||
act_cfg=dict(type='ReLU'),
|
|
||||||
align_corners=False):
|
|
||||||
"""Fast-SCNN Backbone.
|
"""Fast-SCNN Backbone.
|
||||||
Args:
|
Args:
|
||||||
in_channels (int): Number of input image channels. Default=3 (RGB)
|
in_channels (int): Number of input image channels. Default=3 (RGB)
|
||||||
@ -289,6 +272,23 @@ class FastSCNN(nn.Module):
|
|||||||
align_corners (bool): align_corners argument of F.interpolate.
|
align_corners (bool): align_corners argument of F.interpolate.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
def __init__(self,
|
||||||
|
in_channels=3,
|
||||||
|
downsample_dw_channels1=32,
|
||||||
|
downsample_dw_channels2=48,
|
||||||
|
global_in_channels=64,
|
||||||
|
global_block_channels=(64, 96, 128),
|
||||||
|
global_out_channels=128,
|
||||||
|
higher_in_channels=64,
|
||||||
|
lower_in_channels=128,
|
||||||
|
fusion_out_channels=128,
|
||||||
|
scale_factor=4,
|
||||||
|
out_indices=(0, 1, 2),
|
||||||
|
conv_cfg=None,
|
||||||
|
norm_cfg=dict(type='BN'),
|
||||||
|
act_cfg=dict(type='ReLU'),
|
||||||
|
align_corners=False):
|
||||||
|
|
||||||
super(FastSCNN, self).__init__()
|
super(FastSCNN, self).__init__()
|
||||||
if global_in_channels != higher_in_channels:
|
if global_in_channels != higher_in_channels:
|
||||||
raise AssertionError('Global Input Channels must be the same \
|
raise AssertionError('Global Input Channels must be the same \
|
||||||
|
@ -1,11 +1,5 @@
|
|||||||
from mmcv.cnn import (ConvModule, build_norm_layer, constant_init,
|
from mmcv.cnn import ConvModule, build_norm_layer
|
||||||
kaiming_init, normal_init)
|
|
||||||
from mmcv.runner import load_checkpoint
|
|
||||||
from torch import nn
|
from torch import nn
|
||||||
from torch.nn.modules.batchnorm import _BatchNorm
|
|
||||||
|
|
||||||
from mmseg.utils import get_root_logger
|
|
||||||
from ..builder import BACKBONES
|
|
||||||
|
|
||||||
|
|
||||||
class InvertedResidual(nn.Module):
|
class InvertedResidual(nn.Module):
|
||||||
@ -61,143 +55,3 @@ class InvertedResidual(nn.Module):
|
|||||||
return x + self.conv(x)
|
return x + self.conv(x)
|
||||||
else:
|
else:
|
||||||
return self.conv(x)
|
return self.conv(x)
|
||||||
|
|
||||||
|
|
||||||
@BACKBONES.register_module()
|
|
||||||
class MobileNetV2(nn.Module):
|
|
||||||
arch_settings = (
|
|
||||||
InvertedResidual,
|
|
||||||
[
|
|
||||||
# t, c, n, s
|
|
||||||
[1, 16, 1, 1],
|
|
||||||
[6, 24, 2, 2],
|
|
||||||
[6, 32, 3, 2],
|
|
||||||
[6, 64, 4, 2],
|
|
||||||
[6, 96, 3, 1],
|
|
||||||
[6, 160, 3, 2],
|
|
||||||
[6, 320, 1, 1]
|
|
||||||
])
|
|
||||||
|
|
||||||
def __init__(self,
|
|
||||||
in_channels=3,
|
|
||||||
dilations=(1, 1, 1, 1, 1),
|
|
||||||
out_indices=(0, 1, 2, 3),
|
|
||||||
input_channels=32,
|
|
||||||
width_mult=1.0,
|
|
||||||
round_nearest=8,
|
|
||||||
conv_cfg=None,
|
|
||||||
norm_cfg=dict(type='BN'),
|
|
||||||
act_cfg=dict(type='ReLU6')):
|
|
||||||
"""
|
|
||||||
MobileNet V2 main class
|
|
||||||
Args:
|
|
||||||
width_mult (float): Width multiplier - adjusts number of channels
|
|
||||||
in each layer by this amount
|
|
||||||
round_nearest (int): Round the number of channels in each layer to
|
|
||||||
be a multiple of this number
|
|
||||||
Set to 1 to turn off rounding
|
|
||||||
block: Module specifying inverted residual building block for
|
|
||||||
mobilenet
|
|
||||||
"""
|
|
||||||
super(MobileNetV2, self).__init__()
|
|
||||||
self.in_channels = in_channels
|
|
||||||
self.width_mult = width_mult
|
|
||||||
self.conv_cfg = conv_cfg
|
|
||||||
self.norm_cfg = norm_cfg
|
|
||||||
self.act_cfg = act_cfg
|
|
||||||
|
|
||||||
block, inverted_residual_setting = self.arch_settings
|
|
||||||
self.dilations = dilations
|
|
||||||
self.out_indices = out_indices
|
|
||||||
|
|
||||||
# building first layer
|
|
||||||
input_channels = int(
|
|
||||||
input_channels *
|
|
||||||
self.width_mult) if self.width_mult > 1.0 else input_channels
|
|
||||||
# last_channels = int(1280 * multiplier) if multiplier > 1.0 else 1280
|
|
||||||
self.conv1 = ConvModule(
|
|
||||||
3,
|
|
||||||
input_channels,
|
|
||||||
kernel_size=3,
|
|
||||||
stride=2,
|
|
||||||
padding=1,
|
|
||||||
conv_cfg=self.conv_cfg,
|
|
||||||
norm_cfg=self.norm_cfg,
|
|
||||||
act_cfg=self.act_cfg)
|
|
||||||
|
|
||||||
# building inverted residual blocks
|
|
||||||
self.planes = input_channels
|
|
||||||
self.block1 = self._make_layer(block, self.planes,
|
|
||||||
inverted_residual_setting[0:1],
|
|
||||||
dilations[0])
|
|
||||||
self.block2 = self._make_layer(block, self.planes,
|
|
||||||
inverted_residual_setting[1:2],
|
|
||||||
dilations[1])
|
|
||||||
self.block3 = self._make_layer(block, self.planes,
|
|
||||||
inverted_residual_setting[2:3],
|
|
||||||
dilations[2])
|
|
||||||
self.block4 = self._make_layer(block, self.planes,
|
|
||||||
inverted_residual_setting[3:5],
|
|
||||||
dilations[3])
|
|
||||||
self.block5 = self._make_layer(block, self.planes,
|
|
||||||
inverted_residual_setting[5:],
|
|
||||||
dilations[4])
|
|
||||||
|
|
||||||
def _make_layer(self,
|
|
||||||
block,
|
|
||||||
planes,
|
|
||||||
inverted_residual_setting,
|
|
||||||
dilation=1):
|
|
||||||
features = list()
|
|
||||||
for t, c, n, s in inverted_residual_setting:
|
|
||||||
out_channels = int(c * self.width_mult)
|
|
||||||
stride = s if dilation == 1 else 1
|
|
||||||
features.append(
|
|
||||||
block(
|
|
||||||
planes,
|
|
||||||
out_channels,
|
|
||||||
stride,
|
|
||||||
t,
|
|
||||||
dilation,
|
|
||||||
conv_cfg=self.conv_cfg,
|
|
||||||
norm_cfg=self.norm_cfg,
|
|
||||||
act_cfg=self.act_cfg))
|
|
||||||
planes = out_channels
|
|
||||||
for i in range(n - 1):
|
|
||||||
features.append(
|
|
||||||
block(
|
|
||||||
planes,
|
|
||||||
out_channels,
|
|
||||||
1,
|
|
||||||
t,
|
|
||||||
conv_cfg=self.conv_cfg,
|
|
||||||
norm_cfg=self.norm_cfg,
|
|
||||||
act_cfg=self.act_cfg))
|
|
||||||
planes = out_channels
|
|
||||||
self.planes = planes
|
|
||||||
return nn.Sequential(*features)
|
|
||||||
|
|
||||||
def init_weights(self, pretrained=None):
|
|
||||||
if isinstance(pretrained, str):
|
|
||||||
logger = get_root_logger()
|
|
||||||
load_checkpoint(self, pretrained, strict=False, logger=logger)
|
|
||||||
else:
|
|
||||||
for m in self.modules():
|
|
||||||
if isinstance(m, nn.Conv2d):
|
|
||||||
kaiming_init(m, mode='fan_out')
|
|
||||||
elif isinstance(m, _BatchNorm):
|
|
||||||
constant_init(m, 1)
|
|
||||||
elif isinstance(m, nn.Linear):
|
|
||||||
normal_init(m, 0, 0.01)
|
|
||||||
|
|
||||||
def forward(self, x):
|
|
||||||
x = self.conv1(x)
|
|
||||||
x = self.block1(x)
|
|
||||||
c1 = self.block2(x)
|
|
||||||
c2 = self.block3(c1)
|
|
||||||
c3 = self.block4(c2)
|
|
||||||
c4 = self.block5(c3)
|
|
||||||
|
|
||||||
outs = [c1, c2, c3, c4]
|
|
||||||
outs = [outs[i] for i in self.out_indices]
|
|
||||||
return tuple(outs)
|
|
||||||
|
@ -10,11 +10,11 @@ from .ocr_head import OCRHead
|
|||||||
from .psa_head import PSAHead
|
from .psa_head import PSAHead
|
||||||
from .psp_head import PSPHead
|
from .psp_head import PSPHead
|
||||||
from .sep_aspp_head import DepthwiseSeparableASPPHead
|
from .sep_aspp_head import DepthwiseSeparableASPPHead
|
||||||
|
from .sep_fcn_head import DepthwiseSeparableFCNHead
|
||||||
from .uper_head import UPerHead
|
from .uper_head import UPerHead
|
||||||
from .sep_fcn_head import SepFCNHead
|
|
||||||
|
|
||||||
__all__ = [
|
__all__ = [
|
||||||
'FCNHead', 'PSPHead', 'ASPPHead', 'PSAHead', 'NLHead', 'GCHead', 'CCHead',
|
'FCNHead', 'PSPHead', 'ASPPHead', 'PSAHead', 'NLHead', 'GCHead', 'CCHead',
|
||||||
'UPerHead', 'DepthwiseSeparableASPPHead', 'ANNHead', 'DAHead', 'OCRHead',
|
'UPerHead', 'DepthwiseSeparableASPPHead', 'ANNHead', 'DAHead', 'OCRHead',
|
||||||
'EncHead', 'SepFCNHead'
|
'EncHead', 'DepthwiseSeparableFCNHead'
|
||||||
]
|
]
|
||||||
|
@ -4,7 +4,7 @@ from .fcn_head import FCNHead
|
|||||||
|
|
||||||
|
|
||||||
@HEADS.register_module()
|
@HEADS.register_module()
|
||||||
class SepFCNHead(FCNHead):
|
class DepthwiseSeparableFCNHead(FCNHead):
|
||||||
"""Depthwise-Separable Fully Convolutional Network for Semantic
|
"""Depthwise-Separable Fully Convolutional Network for Semantic
|
||||||
Segmentation.
|
Segmentation.
|
||||||
|
|
||||||
@ -15,7 +15,7 @@ class SepFCNHead(FCNHead):
|
|||||||
channels(int): Number of middle-stage channels in the decode head.
|
channels(int): Number of middle-stage channels in the decode head.
|
||||||
|
|
||||||
concat_input(bool): Whether to concatenate original decode input into
|
concat_input(bool): Whether to concatenate original decode input into
|
||||||
the result of consecutive convolution layers.
|
the result of several consecutive convolution layers.
|
||||||
|
|
||||||
num_classes(int): Used to determine the dimension of
|
num_classes(int): Used to determine the dimension of
|
||||||
final prediction tensor.
|
final prediction tensor.
|
||||||
@ -31,7 +31,7 @@ class SepFCNHead(FCNHead):
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, **kwargs):
|
def __init__(self, **kwargs):
|
||||||
super(SepFCNHead, self).__init__(**kwargs)
|
super(DepthwiseSeparableFCNHead, self).__init__(**kwargs)
|
||||||
self.convs[0] = DepthwiseSeparableConvModule(
|
self.convs[0] = DepthwiseSeparableConvModule(
|
||||||
self.in_channels,
|
self.in_channels,
|
||||||
self.channels,
|
self.channels,
|
||||||
|
Loading…
x
Reference in New Issue
Block a user