mirror of
https://github.com/open-mmlab/mmsegmentation.git
synced 2025-06-03 22:03:48 +08:00
Fix typo: upsampe_cfg -> upsample_cfg (#449)
* Fix typo: upsampe_cfg -> upsample_cfg Signed-off-by: lizz <lizz@sensetime.com> * convoluton -> convolution Signed-off-by: lizz <lizz@sensetime.com> * more Signed-off-by: lizz <lizz@sensetime.com> * ok Signed-off-by: lizz <lizz@sensetime.com>
This commit is contained in:
parent
f7a5d53a4b
commit
33e8357c62
@ -42,7 +42,7 @@ def single_gpu_test(model,
|
||||
Args:
|
||||
model (nn.Module): Model to be tested.
|
||||
data_loader (utils.data.Dataloader): Pytorch data loader.
|
||||
show (bool): Whether show results during infernece. Default: False.
|
||||
show (bool): Whether show results during inference. Default: False.
|
||||
out_dir (str, optional): If specified, the results will be dumped into
|
||||
the directory to save output results.
|
||||
efficient_test (bool): Whether save the results as local numpy files to
|
||||
|
@ -212,7 +212,7 @@ def eval_metrics(results,
|
||||
Returns:
|
||||
float: Overall accuracy on all images.
|
||||
ndarray: Per category accuracy, shape (num_classes, ).
|
||||
ndarray: Per category evalution metrics, shape (num_classes, ).
|
||||
ndarray: Per category evaluation metrics, shape (num_classes, ).
|
||||
"""
|
||||
if isinstance(metrics, str):
|
||||
metrics = [metrics]
|
||||
|
@ -10,4 +10,3 @@ class BasePixelSampler(metaclass=ABCMeta):
|
||||
@abstractmethod
|
||||
def sample(self, seg_logit, seg_label):
|
||||
"""Placeholder for sample function."""
|
||||
pass
|
||||
|
@ -214,8 +214,8 @@ class CustomDataset(Dataset):
|
||||
idx (int): Index of data.
|
||||
|
||||
Returns:
|
||||
dict: Testing data after pipeline with new keys intorduced by
|
||||
piepline.
|
||||
dict: Testing data after pipeline with new keys introduced by
|
||||
pipeline.
|
||||
"""
|
||||
|
||||
img_info = self.img_infos[idx]
|
||||
@ -225,7 +225,6 @@ class CustomDataset(Dataset):
|
||||
|
||||
def format_results(self, results, **kwargs):
|
||||
"""Place holder to format result to dataset specific output."""
|
||||
pass
|
||||
|
||||
def get_gt_seg_maps(self, efficient_test=False):
|
||||
"""Get ground truth segmentation maps for evaluation."""
|
||||
|
@ -14,7 +14,7 @@ class Resize(object):
|
||||
contains the key "scale", then the scale in the input dict is used,
|
||||
otherwise the specified scale in the init method is used.
|
||||
|
||||
``img_scale`` can be Nong, a tuple (single-scale) or a list of tuple
|
||||
``img_scale`` can be None, a tuple (single-scale) or a list of tuple
|
||||
(multi-scale). There are 4 multiscale modes:
|
||||
|
||||
- ``ratio_range is not None``:
|
||||
@ -89,7 +89,7 @@ class Resize(object):
|
||||
Args:
|
||||
img_scales (list[tuple]): Images scale range for sampling.
|
||||
There must be two tuples in img_scales, which specify the lower
|
||||
and uper bound of image scales.
|
||||
and upper bound of image scales.
|
||||
|
||||
Returns:
|
||||
(tuple, None): Returns a tuple ``(img_scale, None)``, where
|
||||
|
@ -13,7 +13,7 @@ from ..builder import BACKBONES
|
||||
class GlobalContextExtractor(nn.Module):
|
||||
"""Global Context Extractor for CGNet.
|
||||
|
||||
This class is employed to refine the joFint feature of both local feature
|
||||
This class is employed to refine the joint feature of both local feature
|
||||
and surrounding context.
|
||||
|
||||
Args:
|
||||
@ -357,7 +357,7 @@ class CGNet(nn.Module):
|
||||
raise TypeError('pretrained must be a str or None')
|
||||
|
||||
def train(self, mode=True):
|
||||
"""Convert the model into training mode whill keeping the normalization
|
||||
"""Convert the model into training mode will keeping the normalization
|
||||
layer freezed."""
|
||||
super(CGNet, self).train(mode)
|
||||
if mode and self.norm_eval:
|
||||
|
@ -545,7 +545,7 @@ class HRNet(nn.Module):
|
||||
return y_list
|
||||
|
||||
def train(self, mode=True):
|
||||
"""Convert the model into training mode whill keeping the normalization
|
||||
"""Convert the model into training mode will keeping the normalization
|
||||
layer freezed."""
|
||||
super(HRNet, self).train(mode)
|
||||
if mode and self.norm_eval:
|
||||
|
@ -19,7 +19,7 @@ class MobileNetV3(nn.Module):
|
||||
<https://ieeexplore.ieee.org/document/9008835>`_.
|
||||
|
||||
Args:
|
||||
arch (str): Architechture of mobilnetv3, from {'small', 'large'}.
|
||||
arch (str): Architecture of mobilnetv3, from {'small', 'large'}.
|
||||
Default: 'small'.
|
||||
conv_cfg (dict): Config dict for convolution layer.
|
||||
Default: None, which means using conv2d.
|
||||
@ -28,13 +28,13 @@ class MobileNetV3(nn.Module):
|
||||
out_indices (tuple[int]): Output from which layer.
|
||||
Default: (0, 1, 12).
|
||||
frozen_stages (int): Stages to be frozen (all param fixed).
|
||||
Defualt: -1, which means not freezing any parameters.
|
||||
Default: -1, which means not freezing any parameters.
|
||||
norm_eval (bool): Whether to set norm layers to eval mode, namely,
|
||||
freeze running stats (mean and var). Note: Effect on Batch Norm
|
||||
and its variants only. Default: False.
|
||||
with_cp (bool): Use checkpoint or not. Using checkpoint will save
|
||||
some memory while slowing down the training speed.
|
||||
Defualt: False.
|
||||
Default: False.
|
||||
"""
|
||||
# Parameters to build each block:
|
||||
# [kernel size, mid channels, out channels, with_se, act type, stride]
|
||||
|
@ -35,7 +35,7 @@ class BasicConvBlock(nn.Module):
|
||||
Default: dict(type='BN').
|
||||
act_cfg (dict | None): Config dict for activation layer in ConvModule.
|
||||
Default: dict(type='ReLU').
|
||||
dcn (bool): Use deformable convoluton in convolutional layer or not.
|
||||
dcn (bool): Use deformable convolution in convolutional layer or not.
|
||||
Default: None.
|
||||
plugins (dict): plugins for convolutional layers. Default: None.
|
||||
"""
|
||||
@ -171,7 +171,7 @@ class InterpConv(nn.Module):
|
||||
kernel_size (int): Kernel size of the convolutional layer. Default: 1.
|
||||
stride (int): Stride of the convolutional layer. Default: 1.
|
||||
padding (int): Padding of the convolutional layer. Default: 1.
|
||||
upsampe_cfg (dict): Interpolation config of the upsample layer.
|
||||
upsample_cfg (dict): Interpolation config of the upsample layer.
|
||||
Default: dict(
|
||||
scale_factor=2, mode='bilinear', align_corners=False).
|
||||
"""
|
||||
@ -188,7 +188,7 @@ class InterpConv(nn.Module):
|
||||
kernel_size=1,
|
||||
stride=1,
|
||||
padding=0,
|
||||
upsampe_cfg=dict(
|
||||
upsample_cfg=dict(
|
||||
scale_factor=2, mode='bilinear', align_corners=False)):
|
||||
super(InterpConv, self).__init__()
|
||||
|
||||
@ -202,7 +202,7 @@ class InterpConv(nn.Module):
|
||||
conv_cfg=conv_cfg,
|
||||
norm_cfg=norm_cfg,
|
||||
act_cfg=act_cfg)
|
||||
upsample = nn.Upsample(**upsampe_cfg)
|
||||
upsample = nn.Upsample(**upsample_cfg)
|
||||
if conv_first:
|
||||
self.interp_upsample = nn.Sequential(conv, upsample)
|
||||
else:
|
||||
@ -232,17 +232,17 @@ class UNet(nn.Module):
|
||||
strides (Sequence[int 1 | 2]): Strides of each stage in encoder.
|
||||
len(strides) is equal to num_stages. Normally the stride of the
|
||||
first stage in encoder is 1. If strides[i]=2, it uses stride
|
||||
convolution to downsample in the correspondance encoder stage.
|
||||
convolution to downsample in the correspondence encoder stage.
|
||||
Default: (1, 1, 1, 1, 1).
|
||||
enc_num_convs (Sequence[int]): Number of convolutional layers in the
|
||||
convolution block of the correspondance encoder stage.
|
||||
convolution block of the correspondence encoder stage.
|
||||
Default: (2, 2, 2, 2, 2).
|
||||
dec_num_convs (Sequence[int]): Number of convolutional layers in the
|
||||
convolution block of the correspondance decoder stage.
|
||||
convolution block of the correspondence decoder stage.
|
||||
Default: (2, 2, 2, 2).
|
||||
downsamples (Sequence[int]): Whether use MaxPool to downsample the
|
||||
feature map after the first stage of encoder
|
||||
(stages: [1, num_stages)). If the correspondance encoder stage use
|
||||
(stages: [1, num_stages)). If the correspondence encoder stage use
|
||||
stride convolution (strides[i]=2), it will never use MaxPool to
|
||||
downsample, even downsamples[i-1]=True.
|
||||
Default: (True, True, True, True).
|
||||
@ -263,14 +263,14 @@ class UNet(nn.Module):
|
||||
norm_eval (bool): Whether to set norm layers to eval mode, namely,
|
||||
freeze running stats (mean and var). Note: Effect on Batch Norm
|
||||
and its variants only. Default: False.
|
||||
dcn (bool): Use deformable convoluton in convolutional layer or not.
|
||||
dcn (bool): Use deformable convolution in convolutional layer or not.
|
||||
Default: None.
|
||||
plugins (dict): plugins for convolutional layers. Default: None.
|
||||
|
||||
Notice:
|
||||
The input image size should be devisible by the whole downsample rate
|
||||
The input image size should be divisible by the whole downsample rate
|
||||
of the encoder. More detail of the whole downsample rate can be found
|
||||
in UNet._check_input_devisible.
|
||||
in UNet._check_input_divisible.
|
||||
|
||||
"""
|
||||
|
||||
@ -373,7 +373,7 @@ class UNet(nn.Module):
|
||||
in_channels = base_channels * 2**i
|
||||
|
||||
def forward(self, x):
|
||||
self._check_input_devisible(x)
|
||||
self._check_input_divisible(x)
|
||||
enc_outs = []
|
||||
for enc in self.encoder:
|
||||
x = enc(x)
|
||||
@ -395,7 +395,7 @@ class UNet(nn.Module):
|
||||
if isinstance(m, _BatchNorm):
|
||||
m.eval()
|
||||
|
||||
def _check_input_devisible(self, x):
|
||||
def _check_input_divisible(self, x):
|
||||
h, w = x.shape[-2:]
|
||||
whole_downsample_rate = 1
|
||||
for i in range(1, self.num_stages):
|
||||
@ -403,7 +403,7 @@ class UNet(nn.Module):
|
||||
whole_downsample_rate *= 2
|
||||
assert (h % whole_downsample_rate == 0) \
|
||||
and (w % whole_downsample_rate == 0),\
|
||||
f'The input image size {(h, w)} should be devisible by the whole '\
|
||||
f'The input image size {(h, w)} should be divisible by the whole '\
|
||||
f'downsample rate {whole_downsample_rate}, when num_stages is '\
|
||||
f'{self.num_stages}, strides is {self.strides}, and downsamples '\
|
||||
f'is {self.downsamples}.'
|
||||
|
@ -13,7 +13,7 @@ class ACM(nn.Module):
|
||||
|
||||
Args:
|
||||
pool_scale (int): Pooling scale used in Adaptive Context
|
||||
Module to extract region fetures.
|
||||
Module to extract region features.
|
||||
fusion (bool): Add one conv to fuse residual feature.
|
||||
in_channels (int): Input channels.
|
||||
channels (int): Channels after modules, before conv_seg.
|
||||
|
@ -59,14 +59,14 @@ class DCM(nn.Module):
|
||||
|
||||
def forward(self, x):
|
||||
"""Forward function."""
|
||||
generted_filter = self.filter_gen_conv(
|
||||
generated_filter = self.filter_gen_conv(
|
||||
F.adaptive_avg_pool2d(x, self.filter_size))
|
||||
x = self.input_redu_conv(x)
|
||||
b, c, h, w = x.shape
|
||||
# [1, b * c, h, w], c = self.channels
|
||||
x = x.view(1, b * c, h, w)
|
||||
# [b * c, 1, filter_size, filter_size]
|
||||
generted_filter = generted_filter.view(b * c, 1, self.filter_size,
|
||||
generated_filter = generated_filter.view(b * c, 1, self.filter_size,
|
||||
self.filter_size)
|
||||
pad = (self.filter_size - 1) // 2
|
||||
if (self.filter_size - 1) % 2 == 0:
|
||||
@ -75,7 +75,7 @@ class DCM(nn.Module):
|
||||
p2d = (pad + 1, pad, pad + 1, pad)
|
||||
x = F.pad(input=x, pad=p2d, mode='constant', value=0)
|
||||
# [1, b * c, h, w]
|
||||
output = F.conv2d(input=x, weight=generted_filter, groups=b * c)
|
||||
output = F.conv2d(input=x, weight=generated_filter, groups=b * c)
|
||||
# [b, c, h, w]
|
||||
output = output.view(b, c, h, w)
|
||||
if self.norm is not None:
|
||||
|
@ -17,7 +17,7 @@ class GCHead(FCNHead):
|
||||
pooling_type (str): The pooling type of context aggregation.
|
||||
Options are 'att', 'avg'. Default: 'avg'.
|
||||
fusion_types (tuple[str]): The fusion type for feature fusion.
|
||||
Options are 'channel_add', 'channel_mul'. Defautl: ('channel_add',)
|
||||
Options are 'channel_add', 'channel_mul'. Default: ('channel_add',)
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
|
@ -132,7 +132,7 @@ def lovasz_softmax_flat(probs, labels, classes='present', class_weight=None):
|
||||
probs (torch.Tensor): [P, C], class probabilities at each prediction
|
||||
(between 0 and 1).
|
||||
labels (torch.Tensor): [P], ground truth labels (between 0 and C - 1).
|
||||
classes (str | list[int], optional): Classes choosed to calculate loss.
|
||||
classes (str | list[int], optional): Classes chosen to calculate loss.
|
||||
'all' for all classes, 'present' for classes present in labels, or
|
||||
a list of classes to average. Default: 'present'.
|
||||
class_weight (list[float], optional): The weight for each class.
|
||||
@ -183,7 +183,7 @@ def lovasz_softmax(probs,
|
||||
prediction (between 0 and 1).
|
||||
labels (torch.Tensor): [B, H, W], ground truth labels (between 0 and
|
||||
C - 1).
|
||||
classes (str | list[int], optional): Classes choosed to calculate loss.
|
||||
classes (str | list[int], optional): Classes chosen to calculate loss.
|
||||
'all' for all classes, 'present' for classes present in labels, or
|
||||
a list of classes to average. Default: 'present'.
|
||||
per_image (bool, optional): If per_image is True, compute the loss per
|
||||
@ -232,7 +232,7 @@ class LovaszLoss(nn.Module):
|
||||
Args:
|
||||
loss_type (str, optional): Binary or multi-class loss.
|
||||
Default: 'multi_class'. Options are "binary" and "multi_class".
|
||||
classes (str | list[int], optional): Classes choosed to calculate loss.
|
||||
classes (str | list[int], optional): Classes chosen to calculate loss.
|
||||
'all' for all classes, 'present' for classes present in labels, or
|
||||
a list of classes to average. Default: 'present'.
|
||||
per_image (bool, optional): If per_image is True, compute the loss per
|
||||
|
@ -1,5 +1,5 @@
|
||||
from mmcv.cnn import ConvModule
|
||||
from torch import nn as nn
|
||||
from torch import nn
|
||||
from torch.utils import checkpoint as cp
|
||||
|
||||
from .se_layer import SELayer
|
||||
@ -101,10 +101,10 @@ class InvertedResidualV3(nn.Module):
|
||||
in_channels (int): The input channels of this Module.
|
||||
out_channels (int): The output channels of this Module.
|
||||
mid_channels (int): The input channels of the depthwise convolution.
|
||||
kernel_size (int): The kernal size of the depthwise convolution.
|
||||
kernel_size (int): The kernel size of the depthwise convolution.
|
||||
Default: 3.
|
||||
stride (int): The stride of the depthwise convolution. Default: 1.
|
||||
se_cfg (dict): Config dict for se layer. Defaul: None, which means no
|
||||
se_cfg (dict): Config dict for se layer. Default: None, which means no
|
||||
se layer.
|
||||
with_expand_conv (bool): Use expand conv or not. If set False,
|
||||
mid_channels must be the same with in_channels. Default: True.
|
||||
|
@ -15,10 +15,10 @@ class SELayer(nn.Module):
|
||||
conv_cfg (None or dict): Config dict for convolution layer.
|
||||
Default: None, which means using conv2d.
|
||||
act_cfg (dict or Sequence[dict]): Config dict for activation layer.
|
||||
If act_cfg is a dict, two activation layers will be configurated
|
||||
If act_cfg is a dict, two activation layers will be configured
|
||||
by this dict. If act_cfg is a sequence of dicts, the first
|
||||
activation layer will be configurated by the first dict and the
|
||||
second activation layer will be configurated by the second dict.
|
||||
activation layer will be configured by the first dict and the
|
||||
second activation layer will be configured by the second dict.
|
||||
Default: (dict(type='ReLU'), dict(type='HSigmoid', bias=3.0,
|
||||
divisor=6.0)).
|
||||
"""
|
||||
|
@ -36,7 +36,7 @@ class UpConvBlock(nn.Module):
|
||||
high-level feature map is the same as that of skip feature map
|
||||
(low-level feature map from encoder), it does not need upsample the
|
||||
high-level feature map and the upsample_cfg is None.
|
||||
dcn (bool): Use deformable convoluton in convolutional layer or not.
|
||||
dcn (bool): Use deformable convolution in convolutional layer or not.
|
||||
Default: None.
|
||||
plugins (dict): plugins for convolutional layers. Default: None.
|
||||
"""
|
||||
|
@ -1,5 +1,5 @@
|
||||
import torch
|
||||
from torch import nn as nn
|
||||
from torch import nn
|
||||
from torch.nn import functional as F
|
||||
|
||||
|
||||
@ -43,14 +43,14 @@ class Encoding(nn.Module):
|
||||
return scaled_l2_norm
|
||||
|
||||
@staticmethod
|
||||
def aggregate(assigment_weights, x, codewords):
|
||||
def aggregate(assignment_weights, x, codewords):
|
||||
num_codes, channels = codewords.size()
|
||||
reshaped_codewords = codewords.view((1, 1, num_codes, channels))
|
||||
batch_size = x.size(0)
|
||||
|
||||
expanded_x = x.unsqueeze(2).expand(
|
||||
(batch_size, x.size(1), num_codes, channels))
|
||||
encoded_feat = (assigment_weights.unsqueeze(3) *
|
||||
encoded_feat = (assignment_weights.unsqueeze(3) *
|
||||
(expanded_x - reshaped_codewords)).sum(dim=1)
|
||||
return encoded_feat
|
||||
|
||||
@ -61,10 +61,10 @@ class Encoding(nn.Module):
|
||||
# [batch_size, height x width, channels]
|
||||
x = x.view(batch_size, self.channels, -1).transpose(1, 2).contiguous()
|
||||
# assignment_weights: [batch_size, channels, num_codes]
|
||||
assigment_weights = F.softmax(
|
||||
assignment_weights = F.softmax(
|
||||
self.scaled_l2(x, self.codewords, self.scale), dim=2)
|
||||
# aggregate
|
||||
encoded_feat = self.aggregate(assigment_weights, x, self.codewords)
|
||||
encoded_feat = self.aggregate(assignment_weights, x, self.codewords)
|
||||
return encoded_feat
|
||||
|
||||
def __repr__(self):
|
||||
|
@ -171,7 +171,8 @@ def test_interp_conv():
|
||||
64,
|
||||
32,
|
||||
conv_first=False,
|
||||
upsampe_cfg=dict(scale_factor=2, mode='bilinear', align_corners=False))
|
||||
upsample_cfg=dict(
|
||||
scale_factor=2, mode='bilinear', align_corners=False))
|
||||
x = torch.randn(1, 64, 128, 128)
|
||||
x_out = block(x)
|
||||
assert isinstance(block.interp_upsample[0], nn.Upsample)
|
||||
@ -184,7 +185,7 @@ def test_interp_conv():
|
||||
64,
|
||||
32,
|
||||
conv_first=False,
|
||||
upsampe_cfg=dict(scale_factor=2, mode='nearest'))
|
||||
upsample_cfg=dict(scale_factor=2, mode='nearest'))
|
||||
x = torch.randn(1, 64, 128, 128)
|
||||
x_out = block(x)
|
||||
assert isinstance(block.interp_upsample[0], nn.Upsample)
|
||||
@ -255,7 +256,7 @@ def test_up_conv_block():
|
||||
32,
|
||||
upsample_cfg=dict(
|
||||
type='InterpConv',
|
||||
upsampe_cfg=dict(
|
||||
upsample_cfg=dict(
|
||||
scale_factor=2, mode='bilinear', align_corners=False)))
|
||||
skip_x = torch.randn(1, 32, 256, 256)
|
||||
x = torch.randn(1, 64, 128, 128)
|
||||
@ -285,7 +286,7 @@ def test_up_conv_block():
|
||||
dilation=3,
|
||||
upsample_cfg=dict(
|
||||
type='InterpConv',
|
||||
upsampe_cfg=dict(
|
||||
upsample_cfg=dict(
|
||||
scale_factor=2, mode='bilinear', align_corners=False)))
|
||||
skip_x = torch.randn(1, 32, 256, 256)
|
||||
x = torch.randn(1, 64, 128, 128)
|
||||
@ -347,7 +348,7 @@ def test_unet():
|
||||
UNet(3, 64, 5, plugins=plugins)
|
||||
|
||||
with pytest.raises(AssertionError):
|
||||
# Check whether the input image size can be devisible by the whole
|
||||
# Check whether the input image size can be divisible by the whole
|
||||
# downsample rate of the encoder. The whole downsample rate of this
|
||||
# case is 8.
|
||||
unet = UNet(
|
||||
@ -364,7 +365,7 @@ def test_unet():
|
||||
unet(x)
|
||||
|
||||
with pytest.raises(AssertionError):
|
||||
# Check whether the input image size can be devisible by the whole
|
||||
# Check whether the input image size can be divisible by the whole
|
||||
# downsample rate of the encoder. The whole downsample rate of this
|
||||
# case is 16.
|
||||
unet = UNet(
|
||||
@ -381,7 +382,7 @@ def test_unet():
|
||||
unet(x)
|
||||
|
||||
with pytest.raises(AssertionError):
|
||||
# Check whether the input image size can be devisible by the whole
|
||||
# Check whether the input image size can be divisible by the whole
|
||||
# downsample rate of the encoder. The whole downsample rate of this
|
||||
# case is 8.
|
||||
unet = UNet(
|
||||
@ -398,7 +399,7 @@ def test_unet():
|
||||
unet(x)
|
||||
|
||||
with pytest.raises(AssertionError):
|
||||
# Check whether the input image size can be devisible by the whole
|
||||
# Check whether the input image size can be divisible by the whole
|
||||
# downsample rate of the encoder. The whole downsample rate of this
|
||||
# case is 8.
|
||||
unet = UNet(
|
||||
@ -415,7 +416,7 @@ def test_unet():
|
||||
unet(x)
|
||||
|
||||
with pytest.raises(AssertionError):
|
||||
# Check whether the input image size can be devisible by the whole
|
||||
# Check whether the input image size can be divisible by the whole
|
||||
# downsample rate of the encoder. The whole downsample rate of this
|
||||
# case is 32.
|
||||
unet = UNet(
|
||||
|
Loading…
x
Reference in New Issue
Block a user