2021-09-26 18:52:16 +08:00
|
|
|
|
# Copyright (c) OpenMMLab. All rights reserved.
|
|
|
|
|
import torch
|
|
|
|
|
import torch.nn as nn
|
|
|
|
|
from mmcv.cnn import (ConvModule, DepthwiseSeparableConvModule,
|
|
|
|
|
build_activation_layer, build_norm_layer)
|
|
|
|
|
from mmcv.runner import BaseModule
|
|
|
|
|
|
|
|
|
|
from mmseg.ops import resize
|
|
|
|
|
from ..builder import BACKBONES
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class DetailBranch(BaseModule):
|
|
|
|
|
"""Detail Branch with wide channels and shallow layers to capture low-level
|
|
|
|
|
details and generate high-resolution feature representation.
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
detail_channels (Tuple[int]): Size of channel numbers of each stage
|
|
|
|
|
in Detail Branch, in paper it has 3 stages.
|
|
|
|
|
Default: (64, 64, 128).
|
|
|
|
|
in_channels (int): Number of channels of input image. Default: 3.
|
|
|
|
|
conv_cfg (dict | None): Config of conv layers.
|
|
|
|
|
Default: None.
|
|
|
|
|
norm_cfg (dict | None): Config of norm layers.
|
|
|
|
|
Default: dict(type='BN').
|
|
|
|
|
act_cfg (dict): Config of activation layers.
|
|
|
|
|
Default: dict(type='ReLU').
|
|
|
|
|
init_cfg (dict or list[dict], optional): Initialization config dict.
|
|
|
|
|
Default: None.
|
|
|
|
|
Returns:
|
|
|
|
|
x (torch.Tensor): Feature map of Detail Branch.
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
def __init__(self,
|
|
|
|
|
detail_channels=(64, 64, 128),
|
|
|
|
|
in_channels=3,
|
|
|
|
|
conv_cfg=None,
|
|
|
|
|
norm_cfg=dict(type='BN'),
|
|
|
|
|
act_cfg=dict(type='ReLU'),
|
|
|
|
|
init_cfg=None):
|
|
|
|
|
super(DetailBranch, self).__init__(init_cfg=init_cfg)
|
|
|
|
|
detail_branch = []
|
|
|
|
|
for i in range(len(detail_channels)):
|
|
|
|
|
if i == 0:
|
|
|
|
|
detail_branch.append(
|
|
|
|
|
nn.Sequential(
|
|
|
|
|
ConvModule(
|
|
|
|
|
in_channels=in_channels,
|
|
|
|
|
out_channels=detail_channels[i],
|
|
|
|
|
kernel_size=3,
|
|
|
|
|
stride=2,
|
|
|
|
|
padding=1,
|
|
|
|
|
conv_cfg=conv_cfg,
|
|
|
|
|
norm_cfg=norm_cfg,
|
|
|
|
|
act_cfg=act_cfg),
|
|
|
|
|
ConvModule(
|
|
|
|
|
in_channels=detail_channels[i],
|
|
|
|
|
out_channels=detail_channels[i],
|
|
|
|
|
kernel_size=3,
|
|
|
|
|
stride=1,
|
|
|
|
|
padding=1,
|
|
|
|
|
conv_cfg=conv_cfg,
|
|
|
|
|
norm_cfg=norm_cfg,
|
|
|
|
|
act_cfg=act_cfg)))
|
|
|
|
|
else:
|
|
|
|
|
detail_branch.append(
|
|
|
|
|
nn.Sequential(
|
|
|
|
|
ConvModule(
|
|
|
|
|
in_channels=detail_channels[i - 1],
|
|
|
|
|
out_channels=detail_channels[i],
|
|
|
|
|
kernel_size=3,
|
|
|
|
|
stride=2,
|
|
|
|
|
padding=1,
|
|
|
|
|
conv_cfg=conv_cfg,
|
|
|
|
|
norm_cfg=norm_cfg,
|
|
|
|
|
act_cfg=act_cfg),
|
|
|
|
|
ConvModule(
|
|
|
|
|
in_channels=detail_channels[i],
|
|
|
|
|
out_channels=detail_channels[i],
|
|
|
|
|
kernel_size=3,
|
|
|
|
|
stride=1,
|
|
|
|
|
padding=1,
|
|
|
|
|
conv_cfg=conv_cfg,
|
|
|
|
|
norm_cfg=norm_cfg,
|
|
|
|
|
act_cfg=act_cfg),
|
|
|
|
|
ConvModule(
|
|
|
|
|
in_channels=detail_channels[i],
|
|
|
|
|
out_channels=detail_channels[i],
|
|
|
|
|
kernel_size=3,
|
|
|
|
|
stride=1,
|
|
|
|
|
padding=1,
|
|
|
|
|
conv_cfg=conv_cfg,
|
|
|
|
|
norm_cfg=norm_cfg,
|
|
|
|
|
act_cfg=act_cfg)))
|
|
|
|
|
self.detail_branch = nn.ModuleList(detail_branch)
|
|
|
|
|
|
|
|
|
|
def forward(self, x):
|
|
|
|
|
for stage in self.detail_branch:
|
|
|
|
|
x = stage(x)
|
|
|
|
|
return x
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class StemBlock(BaseModule):
|
|
|
|
|
"""Stem Block at the beginning of Semantic Branch.
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
in_channels (int): Number of input channels.
|
|
|
|
|
Default: 3.
|
|
|
|
|
out_channels (int): Number of output channels.
|
|
|
|
|
Default: 16.
|
|
|
|
|
conv_cfg (dict | None): Config of conv layers.
|
|
|
|
|
Default: None.
|
|
|
|
|
norm_cfg (dict | None): Config of norm layers.
|
|
|
|
|
Default: dict(type='BN').
|
|
|
|
|
act_cfg (dict): Config of activation layers.
|
|
|
|
|
Default: dict(type='ReLU').
|
|
|
|
|
init_cfg (dict or list[dict], optional): Initialization config dict.
|
|
|
|
|
Default: None.
|
|
|
|
|
Returns:
|
|
|
|
|
x (torch.Tensor): First feature map in Semantic Branch.
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
def __init__(self,
|
|
|
|
|
in_channels=3,
|
|
|
|
|
out_channels=16,
|
|
|
|
|
conv_cfg=None,
|
|
|
|
|
norm_cfg=dict(type='BN'),
|
|
|
|
|
act_cfg=dict(type='ReLU'),
|
|
|
|
|
init_cfg=None):
|
|
|
|
|
super(StemBlock, self).__init__(init_cfg=init_cfg)
|
|
|
|
|
|
|
|
|
|
self.conv_first = ConvModule(
|
|
|
|
|
in_channels=in_channels,
|
|
|
|
|
out_channels=out_channels,
|
|
|
|
|
kernel_size=3,
|
|
|
|
|
stride=2,
|
|
|
|
|
padding=1,
|
|
|
|
|
conv_cfg=conv_cfg,
|
|
|
|
|
norm_cfg=norm_cfg,
|
|
|
|
|
act_cfg=act_cfg)
|
|
|
|
|
self.convs = nn.Sequential(
|
|
|
|
|
ConvModule(
|
|
|
|
|
in_channels=out_channels,
|
|
|
|
|
out_channels=out_channels // 2,
|
|
|
|
|
kernel_size=1,
|
|
|
|
|
stride=1,
|
|
|
|
|
padding=0,
|
|
|
|
|
conv_cfg=conv_cfg,
|
|
|
|
|
norm_cfg=norm_cfg,
|
|
|
|
|
act_cfg=act_cfg),
|
|
|
|
|
ConvModule(
|
|
|
|
|
in_channels=out_channels // 2,
|
|
|
|
|
out_channels=out_channels,
|
|
|
|
|
kernel_size=3,
|
|
|
|
|
stride=2,
|
|
|
|
|
padding=1,
|
|
|
|
|
conv_cfg=conv_cfg,
|
|
|
|
|
norm_cfg=norm_cfg,
|
|
|
|
|
act_cfg=act_cfg))
|
|
|
|
|
self.pool = nn.MaxPool2d(
|
|
|
|
|
kernel_size=3, stride=2, padding=1, ceil_mode=False)
|
|
|
|
|
self.fuse_last = ConvModule(
|
|
|
|
|
in_channels=out_channels * 2,
|
|
|
|
|
out_channels=out_channels,
|
|
|
|
|
kernel_size=3,
|
|
|
|
|
stride=1,
|
|
|
|
|
padding=1,
|
|
|
|
|
conv_cfg=conv_cfg,
|
|
|
|
|
norm_cfg=norm_cfg,
|
|
|
|
|
act_cfg=act_cfg)
|
|
|
|
|
|
|
|
|
|
def forward(self, x):
|
|
|
|
|
x = self.conv_first(x)
|
|
|
|
|
x_left = self.convs(x)
|
|
|
|
|
x_right = self.pool(x)
|
|
|
|
|
x = self.fuse_last(torch.cat([x_left, x_right], dim=1))
|
|
|
|
|
return x
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class GELayer(BaseModule):
|
|
|
|
|
"""Gather-and-Expansion Layer.
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
in_channels (int): Number of input channels.
|
|
|
|
|
out_channels (int): Number of output channels.
|
|
|
|
|
exp_ratio (int): Expansion ratio for middle channels.
|
|
|
|
|
Default: 6.
|
|
|
|
|
stride (int): Stride of GELayer. Default: 1
|
|
|
|
|
conv_cfg (dict | None): Config of conv layers.
|
|
|
|
|
Default: None.
|
|
|
|
|
norm_cfg (dict | None): Config of norm layers.
|
|
|
|
|
Default: dict(type='BN').
|
|
|
|
|
act_cfg (dict): Config of activation layers.
|
|
|
|
|
Default: dict(type='ReLU').
|
|
|
|
|
init_cfg (dict or list[dict], optional): Initialization config dict.
|
|
|
|
|
Default: None.
|
|
|
|
|
Returns:
|
2021-10-13 21:21:17 +08:00
|
|
|
|
x (torch.Tensor): Intermediate feature map in
|
2021-09-26 18:52:16 +08:00
|
|
|
|
Semantic Branch.
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
def __init__(self,
|
|
|
|
|
in_channels,
|
|
|
|
|
out_channels,
|
|
|
|
|
exp_ratio=6,
|
|
|
|
|
stride=1,
|
|
|
|
|
conv_cfg=None,
|
|
|
|
|
norm_cfg=dict(type='BN'),
|
|
|
|
|
act_cfg=dict(type='ReLU'),
|
|
|
|
|
init_cfg=None):
|
|
|
|
|
super(GELayer, self).__init__(init_cfg=init_cfg)
|
|
|
|
|
mid_channel = in_channels * exp_ratio
|
|
|
|
|
self.conv1 = ConvModule(
|
|
|
|
|
in_channels=in_channels,
|
|
|
|
|
out_channels=in_channels,
|
|
|
|
|
kernel_size=3,
|
|
|
|
|
stride=1,
|
|
|
|
|
padding=1,
|
|
|
|
|
conv_cfg=conv_cfg,
|
|
|
|
|
norm_cfg=norm_cfg,
|
|
|
|
|
act_cfg=act_cfg)
|
|
|
|
|
if stride == 1:
|
|
|
|
|
self.dwconv = nn.Sequential(
|
|
|
|
|
# ReLU in ConvModule not shown in paper
|
|
|
|
|
ConvModule(
|
|
|
|
|
in_channels=in_channels,
|
|
|
|
|
out_channels=mid_channel,
|
|
|
|
|
kernel_size=3,
|
|
|
|
|
stride=stride,
|
|
|
|
|
padding=1,
|
|
|
|
|
groups=in_channels,
|
|
|
|
|
conv_cfg=conv_cfg,
|
|
|
|
|
norm_cfg=norm_cfg,
|
|
|
|
|
act_cfg=act_cfg))
|
|
|
|
|
self.shortcut = None
|
|
|
|
|
else:
|
|
|
|
|
self.dwconv = nn.Sequential(
|
|
|
|
|
ConvModule(
|
|
|
|
|
in_channels=in_channels,
|
|
|
|
|
out_channels=mid_channel,
|
|
|
|
|
kernel_size=3,
|
|
|
|
|
stride=stride,
|
|
|
|
|
padding=1,
|
|
|
|
|
groups=in_channels,
|
|
|
|
|
bias=False,
|
|
|
|
|
conv_cfg=conv_cfg,
|
|
|
|
|
norm_cfg=norm_cfg,
|
|
|
|
|
act_cfg=None),
|
|
|
|
|
# ReLU in ConvModule not shown in paper
|
|
|
|
|
ConvModule(
|
|
|
|
|
in_channels=mid_channel,
|
|
|
|
|
out_channels=mid_channel,
|
|
|
|
|
kernel_size=3,
|
|
|
|
|
stride=1,
|
|
|
|
|
padding=1,
|
|
|
|
|
groups=mid_channel,
|
|
|
|
|
conv_cfg=conv_cfg,
|
|
|
|
|
norm_cfg=norm_cfg,
|
|
|
|
|
act_cfg=act_cfg),
|
|
|
|
|
)
|
|
|
|
|
self.shortcut = nn.Sequential(
|
|
|
|
|
DepthwiseSeparableConvModule(
|
|
|
|
|
in_channels=in_channels,
|
|
|
|
|
out_channels=out_channels,
|
|
|
|
|
kernel_size=3,
|
|
|
|
|
stride=stride,
|
|
|
|
|
padding=1,
|
|
|
|
|
dw_norm_cfg=norm_cfg,
|
|
|
|
|
dw_act_cfg=None,
|
|
|
|
|
pw_norm_cfg=norm_cfg,
|
|
|
|
|
pw_act_cfg=None,
|
|
|
|
|
))
|
|
|
|
|
|
|
|
|
|
self.conv2 = nn.Sequential(
|
|
|
|
|
ConvModule(
|
|
|
|
|
in_channels=mid_channel,
|
|
|
|
|
out_channels=out_channels,
|
|
|
|
|
kernel_size=1,
|
|
|
|
|
stride=1,
|
|
|
|
|
padding=0,
|
|
|
|
|
bias=False,
|
|
|
|
|
conv_cfg=conv_cfg,
|
|
|
|
|
norm_cfg=norm_cfg,
|
|
|
|
|
act_cfg=None,
|
|
|
|
|
))
|
|
|
|
|
|
|
|
|
|
self.act = build_activation_layer(act_cfg)
|
|
|
|
|
|
|
|
|
|
def forward(self, x):
|
|
|
|
|
identity = x
|
|
|
|
|
x = self.conv1(x)
|
|
|
|
|
x = self.dwconv(x)
|
|
|
|
|
x = self.conv2(x)
|
|
|
|
|
if self.shortcut is not None:
|
|
|
|
|
shortcut = self.shortcut(identity)
|
|
|
|
|
x = x + shortcut
|
|
|
|
|
else:
|
|
|
|
|
x = x + identity
|
|
|
|
|
x = self.act(x)
|
|
|
|
|
return x
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class CEBlock(BaseModule):
|
|
|
|
|
"""Context Embedding Block for large receptive filed in Semantic Branch.
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
in_channels (int): Number of input channels.
|
|
|
|
|
Default: 3.
|
|
|
|
|
out_channels (int): Number of output channels.
|
|
|
|
|
Default: 16.
|
|
|
|
|
conv_cfg (dict | None): Config of conv layers.
|
|
|
|
|
Default: None.
|
|
|
|
|
norm_cfg (dict | None): Config of norm layers.
|
|
|
|
|
Default: dict(type='BN').
|
|
|
|
|
act_cfg (dict): Config of activation layers.
|
|
|
|
|
Default: dict(type='ReLU').
|
|
|
|
|
init_cfg (dict or list[dict], optional): Initialization config dict.
|
|
|
|
|
Default: None.
|
|
|
|
|
Returns:
|
|
|
|
|
x (torch.Tensor): Last feature map in Semantic Branch.
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
def __init__(self,
|
|
|
|
|
in_channels=3,
|
|
|
|
|
out_channels=16,
|
|
|
|
|
conv_cfg=None,
|
|
|
|
|
norm_cfg=dict(type='BN'),
|
|
|
|
|
act_cfg=dict(type='ReLU'),
|
|
|
|
|
init_cfg=None):
|
|
|
|
|
super(CEBlock, self).__init__(init_cfg=init_cfg)
|
|
|
|
|
self.in_channels = in_channels
|
|
|
|
|
self.out_channels = out_channels
|
|
|
|
|
self.gap = nn.Sequential(
|
|
|
|
|
nn.AdaptiveAvgPool2d((1, 1)),
|
|
|
|
|
build_norm_layer(norm_cfg, self.in_channels)[1])
|
|
|
|
|
self.conv_gap = ConvModule(
|
|
|
|
|
in_channels=self.in_channels,
|
|
|
|
|
out_channels=self.out_channels,
|
|
|
|
|
kernel_size=1,
|
|
|
|
|
stride=1,
|
|
|
|
|
padding=0,
|
|
|
|
|
conv_cfg=conv_cfg,
|
|
|
|
|
norm_cfg=norm_cfg,
|
|
|
|
|
act_cfg=act_cfg)
|
|
|
|
|
# Note: in paper here is naive conv2d, no bn-relu
|
|
|
|
|
self.conv_last = ConvModule(
|
|
|
|
|
in_channels=self.out_channels,
|
|
|
|
|
out_channels=self.out_channels,
|
|
|
|
|
kernel_size=3,
|
|
|
|
|
stride=1,
|
|
|
|
|
padding=1,
|
|
|
|
|
conv_cfg=conv_cfg,
|
|
|
|
|
norm_cfg=norm_cfg,
|
|
|
|
|
act_cfg=act_cfg)
|
|
|
|
|
|
|
|
|
|
def forward(self, x):
|
|
|
|
|
identity = x
|
|
|
|
|
x = self.gap(x)
|
|
|
|
|
x = self.conv_gap(x)
|
|
|
|
|
x = identity + x
|
|
|
|
|
x = self.conv_last(x)
|
|
|
|
|
return x
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class SemanticBranch(BaseModule):
|
|
|
|
|
"""Semantic Branch which is lightweight with narrow channels and deep
|
|
|
|
|
layers to obtain high-level semantic context.
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
semantic_channels(Tuple[int]): Size of channel numbers of
|
|
|
|
|
various stages in Semantic Branch.
|
|
|
|
|
Default: (16, 32, 64, 128).
|
|
|
|
|
in_channels (int): Number of channels of input image. Default: 3.
|
|
|
|
|
exp_ratio (int): Expansion ratio for middle channels.
|
|
|
|
|
Default: 6.
|
|
|
|
|
init_cfg (dict or list[dict], optional): Initialization config dict.
|
|
|
|
|
Default: None.
|
|
|
|
|
Returns:
|
|
|
|
|
semantic_outs (List[torch.Tensor]): List of several feature maps
|
|
|
|
|
for auxiliary heads (Booster) and Bilateral
|
|
|
|
|
Guided Aggregation Layer.
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
def __init__(self,
|
|
|
|
|
semantic_channels=(16, 32, 64, 128),
|
|
|
|
|
in_channels=3,
|
|
|
|
|
exp_ratio=6,
|
|
|
|
|
init_cfg=None):
|
|
|
|
|
super(SemanticBranch, self).__init__(init_cfg=init_cfg)
|
|
|
|
|
self.in_channels = in_channels
|
|
|
|
|
self.semantic_channels = semantic_channels
|
|
|
|
|
self.semantic_stages = []
|
|
|
|
|
for i in range(len(semantic_channels)):
|
|
|
|
|
stage_name = f'stage{i + 1}'
|
|
|
|
|
self.semantic_stages.append(stage_name)
|
|
|
|
|
if i == 0:
|
|
|
|
|
self.add_module(
|
|
|
|
|
stage_name,
|
|
|
|
|
StemBlock(self.in_channels, semantic_channels[i]))
|
|
|
|
|
elif i == (len(semantic_channels) - 1):
|
|
|
|
|
self.add_module(
|
|
|
|
|
stage_name,
|
|
|
|
|
nn.Sequential(
|
|
|
|
|
GELayer(semantic_channels[i - 1], semantic_channels[i],
|
|
|
|
|
exp_ratio, 2),
|
|
|
|
|
GELayer(semantic_channels[i], semantic_channels[i],
|
|
|
|
|
exp_ratio, 1),
|
|
|
|
|
GELayer(semantic_channels[i], semantic_channels[i],
|
|
|
|
|
exp_ratio, 1),
|
|
|
|
|
GELayer(semantic_channels[i], semantic_channels[i],
|
|
|
|
|
exp_ratio, 1)))
|
|
|
|
|
else:
|
|
|
|
|
self.add_module(
|
|
|
|
|
stage_name,
|
|
|
|
|
nn.Sequential(
|
|
|
|
|
GELayer(semantic_channels[i - 1], semantic_channels[i],
|
|
|
|
|
exp_ratio, 2),
|
|
|
|
|
GELayer(semantic_channels[i], semantic_channels[i],
|
|
|
|
|
exp_ratio, 1)))
|
|
|
|
|
|
|
|
|
|
self.add_module(f'stage{len(semantic_channels)}_CEBlock',
|
|
|
|
|
CEBlock(semantic_channels[-1], semantic_channels[-1]))
|
|
|
|
|
self.semantic_stages.append(f'stage{len(semantic_channels)}_CEBlock')
|
|
|
|
|
|
|
|
|
|
def forward(self, x):
|
|
|
|
|
semantic_outs = []
|
|
|
|
|
for stage_name in self.semantic_stages:
|
|
|
|
|
semantic_stage = getattr(self, stage_name)
|
|
|
|
|
x = semantic_stage(x)
|
|
|
|
|
semantic_outs.append(x)
|
|
|
|
|
return semantic_outs
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class BGALayer(BaseModule):
|
|
|
|
|
"""Bilateral Guided Aggregation Layer to fuse the complementary information
|
|
|
|
|
from both Detail Branch and Semantic Branch.
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
out_channels (int): Number of output channels.
|
|
|
|
|
Default: 128.
|
|
|
|
|
align_corners (bool): align_corners argument of F.interpolate.
|
|
|
|
|
Default: False.
|
|
|
|
|
conv_cfg (dict | None): Config of conv layers.
|
|
|
|
|
Default: None.
|
|
|
|
|
norm_cfg (dict | None): Config of norm layers.
|
|
|
|
|
Default: dict(type='BN').
|
|
|
|
|
act_cfg (dict): Config of activation layers.
|
|
|
|
|
Default: dict(type='ReLU').
|
|
|
|
|
init_cfg (dict or list[dict], optional): Initialization config dict.
|
|
|
|
|
Default: None.
|
|
|
|
|
Returns:
|
|
|
|
|
output (torch.Tensor): Output feature map for Segment heads.
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
def __init__(self,
|
|
|
|
|
out_channels=128,
|
|
|
|
|
align_corners=False,
|
|
|
|
|
conv_cfg=None,
|
|
|
|
|
norm_cfg=dict(type='BN'),
|
|
|
|
|
act_cfg=dict(type='ReLU'),
|
|
|
|
|
init_cfg=None):
|
|
|
|
|
super(BGALayer, self).__init__(init_cfg=init_cfg)
|
|
|
|
|
self.out_channels = out_channels
|
|
|
|
|
self.align_corners = align_corners
|
|
|
|
|
self.detail_dwconv = nn.Sequential(
|
|
|
|
|
DepthwiseSeparableConvModule(
|
|
|
|
|
in_channels=self.out_channels,
|
|
|
|
|
out_channels=self.out_channels,
|
|
|
|
|
kernel_size=3,
|
|
|
|
|
stride=1,
|
|
|
|
|
padding=1,
|
|
|
|
|
dw_norm_cfg=norm_cfg,
|
|
|
|
|
dw_act_cfg=None,
|
|
|
|
|
pw_norm_cfg=None,
|
|
|
|
|
pw_act_cfg=None,
|
|
|
|
|
))
|
|
|
|
|
self.detail_down = nn.Sequential(
|
|
|
|
|
ConvModule(
|
|
|
|
|
in_channels=self.out_channels,
|
|
|
|
|
out_channels=self.out_channels,
|
|
|
|
|
kernel_size=3,
|
|
|
|
|
stride=2,
|
|
|
|
|
padding=1,
|
|
|
|
|
bias=False,
|
|
|
|
|
conv_cfg=conv_cfg,
|
|
|
|
|
norm_cfg=norm_cfg,
|
|
|
|
|
act_cfg=None),
|
|
|
|
|
nn.AvgPool2d(kernel_size=3, stride=2, padding=1, ceil_mode=False))
|
|
|
|
|
self.semantic_conv = nn.Sequential(
|
|
|
|
|
ConvModule(
|
|
|
|
|
in_channels=self.out_channels,
|
|
|
|
|
out_channels=self.out_channels,
|
|
|
|
|
kernel_size=3,
|
|
|
|
|
stride=1,
|
|
|
|
|
padding=1,
|
|
|
|
|
bias=False,
|
|
|
|
|
conv_cfg=conv_cfg,
|
|
|
|
|
norm_cfg=norm_cfg,
|
|
|
|
|
act_cfg=None))
|
|
|
|
|
self.semantic_dwconv = nn.Sequential(
|
|
|
|
|
DepthwiseSeparableConvModule(
|
|
|
|
|
in_channels=self.out_channels,
|
|
|
|
|
out_channels=self.out_channels,
|
|
|
|
|
kernel_size=3,
|
|
|
|
|
stride=1,
|
|
|
|
|
padding=1,
|
|
|
|
|
dw_norm_cfg=norm_cfg,
|
|
|
|
|
dw_act_cfg=None,
|
|
|
|
|
pw_norm_cfg=None,
|
|
|
|
|
pw_act_cfg=None,
|
|
|
|
|
))
|
|
|
|
|
self.conv = ConvModule(
|
|
|
|
|
in_channels=self.out_channels,
|
|
|
|
|
out_channels=self.out_channels,
|
|
|
|
|
kernel_size=3,
|
|
|
|
|
stride=1,
|
|
|
|
|
padding=1,
|
|
|
|
|
inplace=True,
|
|
|
|
|
conv_cfg=conv_cfg,
|
|
|
|
|
norm_cfg=norm_cfg,
|
|
|
|
|
act_cfg=act_cfg,
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
def forward(self, x_d, x_s):
|
|
|
|
|
detail_dwconv = self.detail_dwconv(x_d)
|
|
|
|
|
detail_down = self.detail_down(x_d)
|
|
|
|
|
semantic_conv = self.semantic_conv(x_s)
|
|
|
|
|
semantic_dwconv = self.semantic_dwconv(x_s)
|
|
|
|
|
semantic_conv = resize(
|
|
|
|
|
input=semantic_conv,
|
|
|
|
|
size=detail_dwconv.shape[2:],
|
|
|
|
|
mode='bilinear',
|
|
|
|
|
align_corners=self.align_corners)
|
|
|
|
|
fuse_1 = detail_dwconv * torch.sigmoid(semantic_conv)
|
|
|
|
|
fuse_2 = detail_down * torch.sigmoid(semantic_dwconv)
|
|
|
|
|
fuse_2 = resize(
|
|
|
|
|
input=fuse_2,
|
|
|
|
|
size=fuse_1.shape[2:],
|
|
|
|
|
mode='bilinear',
|
|
|
|
|
align_corners=self.align_corners)
|
|
|
|
|
output = self.conv(fuse_1 + fuse_2)
|
|
|
|
|
return output
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@BACKBONES.register_module()
|
|
|
|
|
class BiSeNetV2(BaseModule):
|
|
|
|
|
"""BiSeNetV2: Bilateral Network with Guided Aggregation for
|
|
|
|
|
Real-time Semantic Segmentation.
|
|
|
|
|
|
|
|
|
|
This backbone is the implementation of
|
|
|
|
|
`BiSeNetV2 <https://arxiv.org/abs/2004.02147>`_.
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
in_channels (int): Number of channel of input image. Default: 3.
|
|
|
|
|
detail_channels (Tuple[int], optional): Channels of each stage
|
|
|
|
|
in Detail Branch. Default: (64, 64, 128).
|
|
|
|
|
semantic_channels (Tuple[int], optional): Channels of each stage
|
|
|
|
|
in Semantic Branch. Default: (16, 32, 64, 128).
|
|
|
|
|
See Table 1 and Figure 3 of paper for more details.
|
|
|
|
|
semantic_expansion_ratio (int, optional): The expansion factor
|
|
|
|
|
expanding channel number of middle channels in Semantic Branch.
|
|
|
|
|
Default: 6.
|
|
|
|
|
bga_channels (int, optional): Number of middle channels in
|
|
|
|
|
Bilateral Guided Aggregation Layer. Default: 128.
|
|
|
|
|
out_indices (Tuple[int] | int, optional): Output from which stages.
|
|
|
|
|
Default: (0, 1, 2, 3, 4).
|
|
|
|
|
align_corners (bool, optional): The align_corners argument of
|
|
|
|
|
resize operation in Bilateral Guided Aggregation Layer.
|
|
|
|
|
Default: False.
|
|
|
|
|
conv_cfg (dict | None): Config of conv layers.
|
|
|
|
|
Default: None.
|
|
|
|
|
norm_cfg (dict | None): Config of norm layers.
|
|
|
|
|
Default: dict(type='BN').
|
|
|
|
|
act_cfg (dict): Config of activation layers.
|
|
|
|
|
Default: dict(type='ReLU').
|
|
|
|
|
init_cfg (dict or list[dict], optional): Initialization config dict.
|
|
|
|
|
Default: None.
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
def __init__(self,
|
|
|
|
|
in_channels=3,
|
|
|
|
|
detail_channels=(64, 64, 128),
|
|
|
|
|
semantic_channels=(16, 32, 64, 128),
|
|
|
|
|
semantic_expansion_ratio=6,
|
|
|
|
|
bga_channels=128,
|
|
|
|
|
out_indices=(0, 1, 2, 3, 4),
|
|
|
|
|
align_corners=False,
|
|
|
|
|
conv_cfg=None,
|
|
|
|
|
norm_cfg=dict(type='BN'),
|
|
|
|
|
act_cfg=dict(type='ReLU'),
|
|
|
|
|
init_cfg=None):
|
|
|
|
|
if init_cfg is None:
|
|
|
|
|
init_cfg = [
|
|
|
|
|
dict(type='Kaiming', layer='Conv2d'),
|
|
|
|
|
dict(
|
|
|
|
|
type='Constant', val=1, layer=['_BatchNorm', 'GroupNorm'])
|
|
|
|
|
]
|
|
|
|
|
super(BiSeNetV2, self).__init__(init_cfg=init_cfg)
|
|
|
|
|
self.in_channels = in_channels
|
|
|
|
|
self.out_indices = out_indices
|
|
|
|
|
self.detail_channels = detail_channels
|
|
|
|
|
self.semantic_channels = semantic_channels
|
|
|
|
|
self.semantic_expansion_ratio = semantic_expansion_ratio
|
|
|
|
|
self.bga_channels = bga_channels
|
|
|
|
|
self.align_corners = align_corners
|
|
|
|
|
self.conv_cfg = conv_cfg
|
|
|
|
|
self.norm_cfg = norm_cfg
|
|
|
|
|
self.act_cfg = act_cfg
|
|
|
|
|
|
|
|
|
|
self.detail = DetailBranch(self.detail_channels, self.in_channels)
|
|
|
|
|
self.semantic = SemanticBranch(self.semantic_channels,
|
|
|
|
|
self.in_channels,
|
|
|
|
|
self.semantic_expansion_ratio)
|
|
|
|
|
self.bga = BGALayer(self.bga_channels, self.align_corners)
|
|
|
|
|
|
|
|
|
|
def forward(self, x):
|
|
|
|
|
# stole refactoring code from Coin Cheung, thanks
|
|
|
|
|
x_detail = self.detail(x)
|
|
|
|
|
x_semantic_lst = self.semantic(x)
|
|
|
|
|
x_head = self.bga(x_detail, x_semantic_lst[-1])
|
|
|
|
|
outs = [x_head] + x_semantic_lst[:-1]
|
|
|
|
|
outs = [outs[i] for i in self.out_indices]
|
|
|
|
|
return tuple(outs)
|