mirror of
https://github.com/open-mmlab/mmsegmentation.git
synced 2025-06-03 22:03:48 +08:00
Fix typo: upsampe_cfg -> upsample_cfg (#449)
* Fix typo: upsampe_cfg -> upsample_cfg Signed-off-by: lizz <lizz@sensetime.com> * convoluton -> convolution Signed-off-by: lizz <lizz@sensetime.com> * more Signed-off-by: lizz <lizz@sensetime.com> * ok Signed-off-by: lizz <lizz@sensetime.com>
This commit is contained in:
parent
f7a5d53a4b
commit
33e8357c62
@ -42,7 +42,7 @@ def single_gpu_test(model,
|
|||||||
Args:
|
Args:
|
||||||
model (nn.Module): Model to be tested.
|
model (nn.Module): Model to be tested.
|
||||||
data_loader (utils.data.Dataloader): Pytorch data loader.
|
data_loader (utils.data.Dataloader): Pytorch data loader.
|
||||||
show (bool): Whether show results during infernece. Default: False.
|
show (bool): Whether show results during inference. Default: False.
|
||||||
out_dir (str, optional): If specified, the results will be dumped into
|
out_dir (str, optional): If specified, the results will be dumped into
|
||||||
the directory to save output results.
|
the directory to save output results.
|
||||||
efficient_test (bool): Whether save the results as local numpy files to
|
efficient_test (bool): Whether save the results as local numpy files to
|
||||||
|
@ -212,7 +212,7 @@ def eval_metrics(results,
|
|||||||
Returns:
|
Returns:
|
||||||
float: Overall accuracy on all images.
|
float: Overall accuracy on all images.
|
||||||
ndarray: Per category accuracy, shape (num_classes, ).
|
ndarray: Per category accuracy, shape (num_classes, ).
|
||||||
ndarray: Per category evalution metrics, shape (num_classes, ).
|
ndarray: Per category evaluation metrics, shape (num_classes, ).
|
||||||
"""
|
"""
|
||||||
if isinstance(metrics, str):
|
if isinstance(metrics, str):
|
||||||
metrics = [metrics]
|
metrics = [metrics]
|
||||||
|
@ -10,4 +10,3 @@ class BasePixelSampler(metaclass=ABCMeta):
|
|||||||
@abstractmethod
|
@abstractmethod
|
||||||
def sample(self, seg_logit, seg_label):
|
def sample(self, seg_logit, seg_label):
|
||||||
"""Placeholder for sample function."""
|
"""Placeholder for sample function."""
|
||||||
pass
|
|
||||||
|
@ -214,8 +214,8 @@ class CustomDataset(Dataset):
|
|||||||
idx (int): Index of data.
|
idx (int): Index of data.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
dict: Testing data after pipeline with new keys intorduced by
|
dict: Testing data after pipeline with new keys introduced by
|
||||||
piepline.
|
pipeline.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
img_info = self.img_infos[idx]
|
img_info = self.img_infos[idx]
|
||||||
@ -225,7 +225,6 @@ class CustomDataset(Dataset):
|
|||||||
|
|
||||||
def format_results(self, results, **kwargs):
|
def format_results(self, results, **kwargs):
|
||||||
"""Place holder to format result to dataset specific output."""
|
"""Place holder to format result to dataset specific output."""
|
||||||
pass
|
|
||||||
|
|
||||||
def get_gt_seg_maps(self, efficient_test=False):
|
def get_gt_seg_maps(self, efficient_test=False):
|
||||||
"""Get ground truth segmentation maps for evaluation."""
|
"""Get ground truth segmentation maps for evaluation."""
|
||||||
|
@ -14,7 +14,7 @@ class Resize(object):
|
|||||||
contains the key "scale", then the scale in the input dict is used,
|
contains the key "scale", then the scale in the input dict is used,
|
||||||
otherwise the specified scale in the init method is used.
|
otherwise the specified scale in the init method is used.
|
||||||
|
|
||||||
``img_scale`` can be Nong, a tuple (single-scale) or a list of tuple
|
``img_scale`` can be None, a tuple (single-scale) or a list of tuple
|
||||||
(multi-scale). There are 4 multiscale modes:
|
(multi-scale). There are 4 multiscale modes:
|
||||||
|
|
||||||
- ``ratio_range is not None``:
|
- ``ratio_range is not None``:
|
||||||
@ -89,7 +89,7 @@ class Resize(object):
|
|||||||
Args:
|
Args:
|
||||||
img_scales (list[tuple]): Images scale range for sampling.
|
img_scales (list[tuple]): Images scale range for sampling.
|
||||||
There must be two tuples in img_scales, which specify the lower
|
There must be two tuples in img_scales, which specify the lower
|
||||||
and uper bound of image scales.
|
and upper bound of image scales.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
(tuple, None): Returns a tuple ``(img_scale, None)``, where
|
(tuple, None): Returns a tuple ``(img_scale, None)``, where
|
||||||
|
@ -13,7 +13,7 @@ from ..builder import BACKBONES
|
|||||||
class GlobalContextExtractor(nn.Module):
|
class GlobalContextExtractor(nn.Module):
|
||||||
"""Global Context Extractor for CGNet.
|
"""Global Context Extractor for CGNet.
|
||||||
|
|
||||||
This class is employed to refine the joFint feature of both local feature
|
This class is employed to refine the joint feature of both local feature
|
||||||
and surrounding context.
|
and surrounding context.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
@ -357,7 +357,7 @@ class CGNet(nn.Module):
|
|||||||
raise TypeError('pretrained must be a str or None')
|
raise TypeError('pretrained must be a str or None')
|
||||||
|
|
||||||
def train(self, mode=True):
|
def train(self, mode=True):
|
||||||
"""Convert the model into training mode whill keeping the normalization
|
"""Convert the model into training mode will keeping the normalization
|
||||||
layer freezed."""
|
layer freezed."""
|
||||||
super(CGNet, self).train(mode)
|
super(CGNet, self).train(mode)
|
||||||
if mode and self.norm_eval:
|
if mode and self.norm_eval:
|
||||||
|
@ -545,7 +545,7 @@ class HRNet(nn.Module):
|
|||||||
return y_list
|
return y_list
|
||||||
|
|
||||||
def train(self, mode=True):
|
def train(self, mode=True):
|
||||||
"""Convert the model into training mode whill keeping the normalization
|
"""Convert the model into training mode will keeping the normalization
|
||||||
layer freezed."""
|
layer freezed."""
|
||||||
super(HRNet, self).train(mode)
|
super(HRNet, self).train(mode)
|
||||||
if mode and self.norm_eval:
|
if mode and self.norm_eval:
|
||||||
|
@ -19,7 +19,7 @@ class MobileNetV3(nn.Module):
|
|||||||
<https://ieeexplore.ieee.org/document/9008835>`_.
|
<https://ieeexplore.ieee.org/document/9008835>`_.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
arch (str): Architechture of mobilnetv3, from {'small', 'large'}.
|
arch (str): Architecture of mobilnetv3, from {'small', 'large'}.
|
||||||
Default: 'small'.
|
Default: 'small'.
|
||||||
conv_cfg (dict): Config dict for convolution layer.
|
conv_cfg (dict): Config dict for convolution layer.
|
||||||
Default: None, which means using conv2d.
|
Default: None, which means using conv2d.
|
||||||
@ -28,13 +28,13 @@ class MobileNetV3(nn.Module):
|
|||||||
out_indices (tuple[int]): Output from which layer.
|
out_indices (tuple[int]): Output from which layer.
|
||||||
Default: (0, 1, 12).
|
Default: (0, 1, 12).
|
||||||
frozen_stages (int): Stages to be frozen (all param fixed).
|
frozen_stages (int): Stages to be frozen (all param fixed).
|
||||||
Defualt: -1, which means not freezing any parameters.
|
Default: -1, which means not freezing any parameters.
|
||||||
norm_eval (bool): Whether to set norm layers to eval mode, namely,
|
norm_eval (bool): Whether to set norm layers to eval mode, namely,
|
||||||
freeze running stats (mean and var). Note: Effect on Batch Norm
|
freeze running stats (mean and var). Note: Effect on Batch Norm
|
||||||
and its variants only. Default: False.
|
and its variants only. Default: False.
|
||||||
with_cp (bool): Use checkpoint or not. Using checkpoint will save
|
with_cp (bool): Use checkpoint or not. Using checkpoint will save
|
||||||
some memory while slowing down the training speed.
|
some memory while slowing down the training speed.
|
||||||
Defualt: False.
|
Default: False.
|
||||||
"""
|
"""
|
||||||
# Parameters to build each block:
|
# Parameters to build each block:
|
||||||
# [kernel size, mid channels, out channels, with_se, act type, stride]
|
# [kernel size, mid channels, out channels, with_se, act type, stride]
|
||||||
|
@ -35,7 +35,7 @@ class BasicConvBlock(nn.Module):
|
|||||||
Default: dict(type='BN').
|
Default: dict(type='BN').
|
||||||
act_cfg (dict | None): Config dict for activation layer in ConvModule.
|
act_cfg (dict | None): Config dict for activation layer in ConvModule.
|
||||||
Default: dict(type='ReLU').
|
Default: dict(type='ReLU').
|
||||||
dcn (bool): Use deformable convoluton in convolutional layer or not.
|
dcn (bool): Use deformable convolution in convolutional layer or not.
|
||||||
Default: None.
|
Default: None.
|
||||||
plugins (dict): plugins for convolutional layers. Default: None.
|
plugins (dict): plugins for convolutional layers. Default: None.
|
||||||
"""
|
"""
|
||||||
@ -171,7 +171,7 @@ class InterpConv(nn.Module):
|
|||||||
kernel_size (int): Kernel size of the convolutional layer. Default: 1.
|
kernel_size (int): Kernel size of the convolutional layer. Default: 1.
|
||||||
stride (int): Stride of the convolutional layer. Default: 1.
|
stride (int): Stride of the convolutional layer. Default: 1.
|
||||||
padding (int): Padding of the convolutional layer. Default: 1.
|
padding (int): Padding of the convolutional layer. Default: 1.
|
||||||
upsampe_cfg (dict): Interpolation config of the upsample layer.
|
upsample_cfg (dict): Interpolation config of the upsample layer.
|
||||||
Default: dict(
|
Default: dict(
|
||||||
scale_factor=2, mode='bilinear', align_corners=False).
|
scale_factor=2, mode='bilinear', align_corners=False).
|
||||||
"""
|
"""
|
||||||
@ -188,7 +188,7 @@ class InterpConv(nn.Module):
|
|||||||
kernel_size=1,
|
kernel_size=1,
|
||||||
stride=1,
|
stride=1,
|
||||||
padding=0,
|
padding=0,
|
||||||
upsampe_cfg=dict(
|
upsample_cfg=dict(
|
||||||
scale_factor=2, mode='bilinear', align_corners=False)):
|
scale_factor=2, mode='bilinear', align_corners=False)):
|
||||||
super(InterpConv, self).__init__()
|
super(InterpConv, self).__init__()
|
||||||
|
|
||||||
@ -202,7 +202,7 @@ class InterpConv(nn.Module):
|
|||||||
conv_cfg=conv_cfg,
|
conv_cfg=conv_cfg,
|
||||||
norm_cfg=norm_cfg,
|
norm_cfg=norm_cfg,
|
||||||
act_cfg=act_cfg)
|
act_cfg=act_cfg)
|
||||||
upsample = nn.Upsample(**upsampe_cfg)
|
upsample = nn.Upsample(**upsample_cfg)
|
||||||
if conv_first:
|
if conv_first:
|
||||||
self.interp_upsample = nn.Sequential(conv, upsample)
|
self.interp_upsample = nn.Sequential(conv, upsample)
|
||||||
else:
|
else:
|
||||||
@ -232,17 +232,17 @@ class UNet(nn.Module):
|
|||||||
strides (Sequence[int 1 | 2]): Strides of each stage in encoder.
|
strides (Sequence[int 1 | 2]): Strides of each stage in encoder.
|
||||||
len(strides) is equal to num_stages. Normally the stride of the
|
len(strides) is equal to num_stages. Normally the stride of the
|
||||||
first stage in encoder is 1. If strides[i]=2, it uses stride
|
first stage in encoder is 1. If strides[i]=2, it uses stride
|
||||||
convolution to downsample in the correspondance encoder stage.
|
convolution to downsample in the correspondence encoder stage.
|
||||||
Default: (1, 1, 1, 1, 1).
|
Default: (1, 1, 1, 1, 1).
|
||||||
enc_num_convs (Sequence[int]): Number of convolutional layers in the
|
enc_num_convs (Sequence[int]): Number of convolutional layers in the
|
||||||
convolution block of the correspondance encoder stage.
|
convolution block of the correspondence encoder stage.
|
||||||
Default: (2, 2, 2, 2, 2).
|
Default: (2, 2, 2, 2, 2).
|
||||||
dec_num_convs (Sequence[int]): Number of convolutional layers in the
|
dec_num_convs (Sequence[int]): Number of convolutional layers in the
|
||||||
convolution block of the correspondance decoder stage.
|
convolution block of the correspondence decoder stage.
|
||||||
Default: (2, 2, 2, 2).
|
Default: (2, 2, 2, 2).
|
||||||
downsamples (Sequence[int]): Whether use MaxPool to downsample the
|
downsamples (Sequence[int]): Whether use MaxPool to downsample the
|
||||||
feature map after the first stage of encoder
|
feature map after the first stage of encoder
|
||||||
(stages: [1, num_stages)). If the correspondance encoder stage use
|
(stages: [1, num_stages)). If the correspondence encoder stage use
|
||||||
stride convolution (strides[i]=2), it will never use MaxPool to
|
stride convolution (strides[i]=2), it will never use MaxPool to
|
||||||
downsample, even downsamples[i-1]=True.
|
downsample, even downsamples[i-1]=True.
|
||||||
Default: (True, True, True, True).
|
Default: (True, True, True, True).
|
||||||
@ -263,14 +263,14 @@ class UNet(nn.Module):
|
|||||||
norm_eval (bool): Whether to set norm layers to eval mode, namely,
|
norm_eval (bool): Whether to set norm layers to eval mode, namely,
|
||||||
freeze running stats (mean and var). Note: Effect on Batch Norm
|
freeze running stats (mean and var). Note: Effect on Batch Norm
|
||||||
and its variants only. Default: False.
|
and its variants only. Default: False.
|
||||||
dcn (bool): Use deformable convoluton in convolutional layer or not.
|
dcn (bool): Use deformable convolution in convolutional layer or not.
|
||||||
Default: None.
|
Default: None.
|
||||||
plugins (dict): plugins for convolutional layers. Default: None.
|
plugins (dict): plugins for convolutional layers. Default: None.
|
||||||
|
|
||||||
Notice:
|
Notice:
|
||||||
The input image size should be devisible by the whole downsample rate
|
The input image size should be divisible by the whole downsample rate
|
||||||
of the encoder. More detail of the whole downsample rate can be found
|
of the encoder. More detail of the whole downsample rate can be found
|
||||||
in UNet._check_input_devisible.
|
in UNet._check_input_divisible.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
@ -373,7 +373,7 @@ class UNet(nn.Module):
|
|||||||
in_channels = base_channels * 2**i
|
in_channels = base_channels * 2**i
|
||||||
|
|
||||||
def forward(self, x):
|
def forward(self, x):
|
||||||
self._check_input_devisible(x)
|
self._check_input_divisible(x)
|
||||||
enc_outs = []
|
enc_outs = []
|
||||||
for enc in self.encoder:
|
for enc in self.encoder:
|
||||||
x = enc(x)
|
x = enc(x)
|
||||||
@ -395,7 +395,7 @@ class UNet(nn.Module):
|
|||||||
if isinstance(m, _BatchNorm):
|
if isinstance(m, _BatchNorm):
|
||||||
m.eval()
|
m.eval()
|
||||||
|
|
||||||
def _check_input_devisible(self, x):
|
def _check_input_divisible(self, x):
|
||||||
h, w = x.shape[-2:]
|
h, w = x.shape[-2:]
|
||||||
whole_downsample_rate = 1
|
whole_downsample_rate = 1
|
||||||
for i in range(1, self.num_stages):
|
for i in range(1, self.num_stages):
|
||||||
@ -403,7 +403,7 @@ class UNet(nn.Module):
|
|||||||
whole_downsample_rate *= 2
|
whole_downsample_rate *= 2
|
||||||
assert (h % whole_downsample_rate == 0) \
|
assert (h % whole_downsample_rate == 0) \
|
||||||
and (w % whole_downsample_rate == 0),\
|
and (w % whole_downsample_rate == 0),\
|
||||||
f'The input image size {(h, w)} should be devisible by the whole '\
|
f'The input image size {(h, w)} should be divisible by the whole '\
|
||||||
f'downsample rate {whole_downsample_rate}, when num_stages is '\
|
f'downsample rate {whole_downsample_rate}, when num_stages is '\
|
||||||
f'{self.num_stages}, strides is {self.strides}, and downsamples '\
|
f'{self.num_stages}, strides is {self.strides}, and downsamples '\
|
||||||
f'is {self.downsamples}.'
|
f'is {self.downsamples}.'
|
||||||
|
@ -13,7 +13,7 @@ class ACM(nn.Module):
|
|||||||
|
|
||||||
Args:
|
Args:
|
||||||
pool_scale (int): Pooling scale used in Adaptive Context
|
pool_scale (int): Pooling scale used in Adaptive Context
|
||||||
Module to extract region fetures.
|
Module to extract region features.
|
||||||
fusion (bool): Add one conv to fuse residual feature.
|
fusion (bool): Add one conv to fuse residual feature.
|
||||||
in_channels (int): Input channels.
|
in_channels (int): Input channels.
|
||||||
channels (int): Channels after modules, before conv_seg.
|
channels (int): Channels after modules, before conv_seg.
|
||||||
|
@ -59,15 +59,15 @@ class DCM(nn.Module):
|
|||||||
|
|
||||||
def forward(self, x):
|
def forward(self, x):
|
||||||
"""Forward function."""
|
"""Forward function."""
|
||||||
generted_filter = self.filter_gen_conv(
|
generated_filter = self.filter_gen_conv(
|
||||||
F.adaptive_avg_pool2d(x, self.filter_size))
|
F.adaptive_avg_pool2d(x, self.filter_size))
|
||||||
x = self.input_redu_conv(x)
|
x = self.input_redu_conv(x)
|
||||||
b, c, h, w = x.shape
|
b, c, h, w = x.shape
|
||||||
# [1, b * c, h, w], c = self.channels
|
# [1, b * c, h, w], c = self.channels
|
||||||
x = x.view(1, b * c, h, w)
|
x = x.view(1, b * c, h, w)
|
||||||
# [b * c, 1, filter_size, filter_size]
|
# [b * c, 1, filter_size, filter_size]
|
||||||
generted_filter = generted_filter.view(b * c, 1, self.filter_size,
|
generated_filter = generated_filter.view(b * c, 1, self.filter_size,
|
||||||
self.filter_size)
|
self.filter_size)
|
||||||
pad = (self.filter_size - 1) // 2
|
pad = (self.filter_size - 1) // 2
|
||||||
if (self.filter_size - 1) % 2 == 0:
|
if (self.filter_size - 1) % 2 == 0:
|
||||||
p2d = (pad, pad, pad, pad)
|
p2d = (pad, pad, pad, pad)
|
||||||
@ -75,7 +75,7 @@ class DCM(nn.Module):
|
|||||||
p2d = (pad + 1, pad, pad + 1, pad)
|
p2d = (pad + 1, pad, pad + 1, pad)
|
||||||
x = F.pad(input=x, pad=p2d, mode='constant', value=0)
|
x = F.pad(input=x, pad=p2d, mode='constant', value=0)
|
||||||
# [1, b * c, h, w]
|
# [1, b * c, h, w]
|
||||||
output = F.conv2d(input=x, weight=generted_filter, groups=b * c)
|
output = F.conv2d(input=x, weight=generated_filter, groups=b * c)
|
||||||
# [b, c, h, w]
|
# [b, c, h, w]
|
||||||
output = output.view(b, c, h, w)
|
output = output.view(b, c, h, w)
|
||||||
if self.norm is not None:
|
if self.norm is not None:
|
||||||
|
@ -17,7 +17,7 @@ class GCHead(FCNHead):
|
|||||||
pooling_type (str): The pooling type of context aggregation.
|
pooling_type (str): The pooling type of context aggregation.
|
||||||
Options are 'att', 'avg'. Default: 'avg'.
|
Options are 'att', 'avg'. Default: 'avg'.
|
||||||
fusion_types (tuple[str]): The fusion type for feature fusion.
|
fusion_types (tuple[str]): The fusion type for feature fusion.
|
||||||
Options are 'channel_add', 'channel_mul'. Defautl: ('channel_add',)
|
Options are 'channel_add', 'channel_mul'. Default: ('channel_add',)
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self,
|
def __init__(self,
|
||||||
|
@ -132,7 +132,7 @@ def lovasz_softmax_flat(probs, labels, classes='present', class_weight=None):
|
|||||||
probs (torch.Tensor): [P, C], class probabilities at each prediction
|
probs (torch.Tensor): [P, C], class probabilities at each prediction
|
||||||
(between 0 and 1).
|
(between 0 and 1).
|
||||||
labels (torch.Tensor): [P], ground truth labels (between 0 and C - 1).
|
labels (torch.Tensor): [P], ground truth labels (between 0 and C - 1).
|
||||||
classes (str | list[int], optional): Classes choosed to calculate loss.
|
classes (str | list[int], optional): Classes chosen to calculate loss.
|
||||||
'all' for all classes, 'present' for classes present in labels, or
|
'all' for all classes, 'present' for classes present in labels, or
|
||||||
a list of classes to average. Default: 'present'.
|
a list of classes to average. Default: 'present'.
|
||||||
class_weight (list[float], optional): The weight for each class.
|
class_weight (list[float], optional): The weight for each class.
|
||||||
@ -183,7 +183,7 @@ def lovasz_softmax(probs,
|
|||||||
prediction (between 0 and 1).
|
prediction (between 0 and 1).
|
||||||
labels (torch.Tensor): [B, H, W], ground truth labels (between 0 and
|
labels (torch.Tensor): [B, H, W], ground truth labels (between 0 and
|
||||||
C - 1).
|
C - 1).
|
||||||
classes (str | list[int], optional): Classes choosed to calculate loss.
|
classes (str | list[int], optional): Classes chosen to calculate loss.
|
||||||
'all' for all classes, 'present' for classes present in labels, or
|
'all' for all classes, 'present' for classes present in labels, or
|
||||||
a list of classes to average. Default: 'present'.
|
a list of classes to average. Default: 'present'.
|
||||||
per_image (bool, optional): If per_image is True, compute the loss per
|
per_image (bool, optional): If per_image is True, compute the loss per
|
||||||
@ -232,7 +232,7 @@ class LovaszLoss(nn.Module):
|
|||||||
Args:
|
Args:
|
||||||
loss_type (str, optional): Binary or multi-class loss.
|
loss_type (str, optional): Binary or multi-class loss.
|
||||||
Default: 'multi_class'. Options are "binary" and "multi_class".
|
Default: 'multi_class'. Options are "binary" and "multi_class".
|
||||||
classes (str | list[int], optional): Classes choosed to calculate loss.
|
classes (str | list[int], optional): Classes chosen to calculate loss.
|
||||||
'all' for all classes, 'present' for classes present in labels, or
|
'all' for all classes, 'present' for classes present in labels, or
|
||||||
a list of classes to average. Default: 'present'.
|
a list of classes to average. Default: 'present'.
|
||||||
per_image (bool, optional): If per_image is True, compute the loss per
|
per_image (bool, optional): If per_image is True, compute the loss per
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
from mmcv.cnn import ConvModule
|
from mmcv.cnn import ConvModule
|
||||||
from torch import nn as nn
|
from torch import nn
|
||||||
from torch.utils import checkpoint as cp
|
from torch.utils import checkpoint as cp
|
||||||
|
|
||||||
from .se_layer import SELayer
|
from .se_layer import SELayer
|
||||||
@ -101,10 +101,10 @@ class InvertedResidualV3(nn.Module):
|
|||||||
in_channels (int): The input channels of this Module.
|
in_channels (int): The input channels of this Module.
|
||||||
out_channels (int): The output channels of this Module.
|
out_channels (int): The output channels of this Module.
|
||||||
mid_channels (int): The input channels of the depthwise convolution.
|
mid_channels (int): The input channels of the depthwise convolution.
|
||||||
kernel_size (int): The kernal size of the depthwise convolution.
|
kernel_size (int): The kernel size of the depthwise convolution.
|
||||||
Default: 3.
|
Default: 3.
|
||||||
stride (int): The stride of the depthwise convolution. Default: 1.
|
stride (int): The stride of the depthwise convolution. Default: 1.
|
||||||
se_cfg (dict): Config dict for se layer. Defaul: None, which means no
|
se_cfg (dict): Config dict for se layer. Default: None, which means no
|
||||||
se layer.
|
se layer.
|
||||||
with_expand_conv (bool): Use expand conv or not. If set False,
|
with_expand_conv (bool): Use expand conv or not. If set False,
|
||||||
mid_channels must be the same with in_channels. Default: True.
|
mid_channels must be the same with in_channels. Default: True.
|
||||||
|
@ -15,10 +15,10 @@ class SELayer(nn.Module):
|
|||||||
conv_cfg (None or dict): Config dict for convolution layer.
|
conv_cfg (None or dict): Config dict for convolution layer.
|
||||||
Default: None, which means using conv2d.
|
Default: None, which means using conv2d.
|
||||||
act_cfg (dict or Sequence[dict]): Config dict for activation layer.
|
act_cfg (dict or Sequence[dict]): Config dict for activation layer.
|
||||||
If act_cfg is a dict, two activation layers will be configurated
|
If act_cfg is a dict, two activation layers will be configured
|
||||||
by this dict. If act_cfg is a sequence of dicts, the first
|
by this dict. If act_cfg is a sequence of dicts, the first
|
||||||
activation layer will be configurated by the first dict and the
|
activation layer will be configured by the first dict and the
|
||||||
second activation layer will be configurated by the second dict.
|
second activation layer will be configured by the second dict.
|
||||||
Default: (dict(type='ReLU'), dict(type='HSigmoid', bias=3.0,
|
Default: (dict(type='ReLU'), dict(type='HSigmoid', bias=3.0,
|
||||||
divisor=6.0)).
|
divisor=6.0)).
|
||||||
"""
|
"""
|
||||||
|
@ -36,7 +36,7 @@ class UpConvBlock(nn.Module):
|
|||||||
high-level feature map is the same as that of skip feature map
|
high-level feature map is the same as that of skip feature map
|
||||||
(low-level feature map from encoder), it does not need upsample the
|
(low-level feature map from encoder), it does not need upsample the
|
||||||
high-level feature map and the upsample_cfg is None.
|
high-level feature map and the upsample_cfg is None.
|
||||||
dcn (bool): Use deformable convoluton in convolutional layer or not.
|
dcn (bool): Use deformable convolution in convolutional layer or not.
|
||||||
Default: None.
|
Default: None.
|
||||||
plugins (dict): plugins for convolutional layers. Default: None.
|
plugins (dict): plugins for convolutional layers. Default: None.
|
||||||
"""
|
"""
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
import torch
|
import torch
|
||||||
from torch import nn as nn
|
from torch import nn
|
||||||
from torch.nn import functional as F
|
from torch.nn import functional as F
|
||||||
|
|
||||||
|
|
||||||
@ -43,14 +43,14 @@ class Encoding(nn.Module):
|
|||||||
return scaled_l2_norm
|
return scaled_l2_norm
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def aggregate(assigment_weights, x, codewords):
|
def aggregate(assignment_weights, x, codewords):
|
||||||
num_codes, channels = codewords.size()
|
num_codes, channels = codewords.size()
|
||||||
reshaped_codewords = codewords.view((1, 1, num_codes, channels))
|
reshaped_codewords = codewords.view((1, 1, num_codes, channels))
|
||||||
batch_size = x.size(0)
|
batch_size = x.size(0)
|
||||||
|
|
||||||
expanded_x = x.unsqueeze(2).expand(
|
expanded_x = x.unsqueeze(2).expand(
|
||||||
(batch_size, x.size(1), num_codes, channels))
|
(batch_size, x.size(1), num_codes, channels))
|
||||||
encoded_feat = (assigment_weights.unsqueeze(3) *
|
encoded_feat = (assignment_weights.unsqueeze(3) *
|
||||||
(expanded_x - reshaped_codewords)).sum(dim=1)
|
(expanded_x - reshaped_codewords)).sum(dim=1)
|
||||||
return encoded_feat
|
return encoded_feat
|
||||||
|
|
||||||
@ -61,10 +61,10 @@ class Encoding(nn.Module):
|
|||||||
# [batch_size, height x width, channels]
|
# [batch_size, height x width, channels]
|
||||||
x = x.view(batch_size, self.channels, -1).transpose(1, 2).contiguous()
|
x = x.view(batch_size, self.channels, -1).transpose(1, 2).contiguous()
|
||||||
# assignment_weights: [batch_size, channels, num_codes]
|
# assignment_weights: [batch_size, channels, num_codes]
|
||||||
assigment_weights = F.softmax(
|
assignment_weights = F.softmax(
|
||||||
self.scaled_l2(x, self.codewords, self.scale), dim=2)
|
self.scaled_l2(x, self.codewords, self.scale), dim=2)
|
||||||
# aggregate
|
# aggregate
|
||||||
encoded_feat = self.aggregate(assigment_weights, x, self.codewords)
|
encoded_feat = self.aggregate(assignment_weights, x, self.codewords)
|
||||||
return encoded_feat
|
return encoded_feat
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
|
@ -171,7 +171,8 @@ def test_interp_conv():
|
|||||||
64,
|
64,
|
||||||
32,
|
32,
|
||||||
conv_first=False,
|
conv_first=False,
|
||||||
upsampe_cfg=dict(scale_factor=2, mode='bilinear', align_corners=False))
|
upsample_cfg=dict(
|
||||||
|
scale_factor=2, mode='bilinear', align_corners=False))
|
||||||
x = torch.randn(1, 64, 128, 128)
|
x = torch.randn(1, 64, 128, 128)
|
||||||
x_out = block(x)
|
x_out = block(x)
|
||||||
assert isinstance(block.interp_upsample[0], nn.Upsample)
|
assert isinstance(block.interp_upsample[0], nn.Upsample)
|
||||||
@ -184,7 +185,7 @@ def test_interp_conv():
|
|||||||
64,
|
64,
|
||||||
32,
|
32,
|
||||||
conv_first=False,
|
conv_first=False,
|
||||||
upsampe_cfg=dict(scale_factor=2, mode='nearest'))
|
upsample_cfg=dict(scale_factor=2, mode='nearest'))
|
||||||
x = torch.randn(1, 64, 128, 128)
|
x = torch.randn(1, 64, 128, 128)
|
||||||
x_out = block(x)
|
x_out = block(x)
|
||||||
assert isinstance(block.interp_upsample[0], nn.Upsample)
|
assert isinstance(block.interp_upsample[0], nn.Upsample)
|
||||||
@ -255,7 +256,7 @@ def test_up_conv_block():
|
|||||||
32,
|
32,
|
||||||
upsample_cfg=dict(
|
upsample_cfg=dict(
|
||||||
type='InterpConv',
|
type='InterpConv',
|
||||||
upsampe_cfg=dict(
|
upsample_cfg=dict(
|
||||||
scale_factor=2, mode='bilinear', align_corners=False)))
|
scale_factor=2, mode='bilinear', align_corners=False)))
|
||||||
skip_x = torch.randn(1, 32, 256, 256)
|
skip_x = torch.randn(1, 32, 256, 256)
|
||||||
x = torch.randn(1, 64, 128, 128)
|
x = torch.randn(1, 64, 128, 128)
|
||||||
@ -285,7 +286,7 @@ def test_up_conv_block():
|
|||||||
dilation=3,
|
dilation=3,
|
||||||
upsample_cfg=dict(
|
upsample_cfg=dict(
|
||||||
type='InterpConv',
|
type='InterpConv',
|
||||||
upsampe_cfg=dict(
|
upsample_cfg=dict(
|
||||||
scale_factor=2, mode='bilinear', align_corners=False)))
|
scale_factor=2, mode='bilinear', align_corners=False)))
|
||||||
skip_x = torch.randn(1, 32, 256, 256)
|
skip_x = torch.randn(1, 32, 256, 256)
|
||||||
x = torch.randn(1, 64, 128, 128)
|
x = torch.randn(1, 64, 128, 128)
|
||||||
@ -347,7 +348,7 @@ def test_unet():
|
|||||||
UNet(3, 64, 5, plugins=plugins)
|
UNet(3, 64, 5, plugins=plugins)
|
||||||
|
|
||||||
with pytest.raises(AssertionError):
|
with pytest.raises(AssertionError):
|
||||||
# Check whether the input image size can be devisible by the whole
|
# Check whether the input image size can be divisible by the whole
|
||||||
# downsample rate of the encoder. The whole downsample rate of this
|
# downsample rate of the encoder. The whole downsample rate of this
|
||||||
# case is 8.
|
# case is 8.
|
||||||
unet = UNet(
|
unet = UNet(
|
||||||
@ -364,7 +365,7 @@ def test_unet():
|
|||||||
unet(x)
|
unet(x)
|
||||||
|
|
||||||
with pytest.raises(AssertionError):
|
with pytest.raises(AssertionError):
|
||||||
# Check whether the input image size can be devisible by the whole
|
# Check whether the input image size can be divisible by the whole
|
||||||
# downsample rate of the encoder. The whole downsample rate of this
|
# downsample rate of the encoder. The whole downsample rate of this
|
||||||
# case is 16.
|
# case is 16.
|
||||||
unet = UNet(
|
unet = UNet(
|
||||||
@ -381,7 +382,7 @@ def test_unet():
|
|||||||
unet(x)
|
unet(x)
|
||||||
|
|
||||||
with pytest.raises(AssertionError):
|
with pytest.raises(AssertionError):
|
||||||
# Check whether the input image size can be devisible by the whole
|
# Check whether the input image size can be divisible by the whole
|
||||||
# downsample rate of the encoder. The whole downsample rate of this
|
# downsample rate of the encoder. The whole downsample rate of this
|
||||||
# case is 8.
|
# case is 8.
|
||||||
unet = UNet(
|
unet = UNet(
|
||||||
@ -398,7 +399,7 @@ def test_unet():
|
|||||||
unet(x)
|
unet(x)
|
||||||
|
|
||||||
with pytest.raises(AssertionError):
|
with pytest.raises(AssertionError):
|
||||||
# Check whether the input image size can be devisible by the whole
|
# Check whether the input image size can be divisible by the whole
|
||||||
# downsample rate of the encoder. The whole downsample rate of this
|
# downsample rate of the encoder. The whole downsample rate of this
|
||||||
# case is 8.
|
# case is 8.
|
||||||
unet = UNet(
|
unet = UNet(
|
||||||
@ -415,7 +416,7 @@ def test_unet():
|
|||||||
unet(x)
|
unet(x)
|
||||||
|
|
||||||
with pytest.raises(AssertionError):
|
with pytest.raises(AssertionError):
|
||||||
# Check whether the input image size can be devisible by the whole
|
# Check whether the input image size can be divisible by the whole
|
||||||
# downsample rate of the encoder. The whole downsample rate of this
|
# downsample rate of the encoder. The whole downsample rate of this
|
||||||
# case is 32.
|
# case is 32.
|
||||||
unet = UNet(
|
unet = UNet(
|
||||||
|
Loading…
x
Reference in New Issue
Block a user