From 0ad0303ebc6100089f5a6557519df8f2ffeaab28 Mon Sep 17 00:00:00 2001 From: Rockey <41846794+RockeyCoss@users.noreply.github.com> Date: Thu, 9 Dec 2021 12:12:31 +0800 Subject: [PATCH] [Fix] Remove the inplace operation in uper_head and fpn_neck (#1103) * [Fix] Remove the inplace operation in uper_head * remove the inplace operation in fpn neck * fix conflict * increase the coverage --- mmseg/models/backbones/mit.py | 3 --- mmseg/models/decode_heads/uper_head.py | 2 +- mmseg/models/necks/fpn.py | 5 +++-- tests/test_models/test_necks/test_fpn.py | 11 +++++++++++ 4 files changed, 15 insertions(+), 6 deletions(-) diff --git a/mmseg/models/backbones/mit.py b/mmseg/models/backbones/mit.py index d2c7c2302..8eb10112c 100644 --- a/mmseg/models/backbones/mit.py +++ b/mmseg/models/backbones/mit.py @@ -22,7 +22,6 @@ class MixFFN(BaseModule): The differences between MixFFN & FFN: 1. Use 1X1 Conv to replace Linear layer. 2. Introduce 3X3 Conv to encode positional information. - Args: embed_dims (int): The feature dimension. Same as `MultiheadAttention`. Defaults: 256. @@ -94,7 +93,6 @@ class EfficientMultiheadAttention(MultiheadAttention): This module is modified from MultiheadAttention which is a module from mmcv.cnn.bricks.transformer. - Args: embed_dims (int): The embedding dimension. num_heads (int): Parallel attention heads. @@ -291,7 +289,6 @@ class MixVisionTransformer(BaseModule): This backbone is the implementation of `SegFormer: Simple and Efficient Design for Semantic Segmentation with Transformers `_. - Args: in_channels (int): Number of input channels. Default: 3. embed_dims (int): Embedding dimension. Default: 768. diff --git a/mmseg/models/decode_heads/uper_head.py b/mmseg/models/decode_heads/uper_head.py index 4a50316a5..57d80be1e 100644 --- a/mmseg/models/decode_heads/uper_head.py +++ b/mmseg/models/decode_heads/uper_head.py @@ -101,7 +101,7 @@ class UPerHead(BaseDecodeHead): used_backbone_levels = len(laterals) for i in range(used_backbone_levels - 1, 0, -1): prev_shape = laterals[i - 1].shape[2:] - laterals[i - 1] += resize( + laterals[i - 1] = laterals[i - 1] + resize( laterals[i], size=prev_shape, mode='bilinear', diff --git a/mmseg/models/necks/fpn.py b/mmseg/models/necks/fpn.py index bc237428e..975a48e8b 100644 --- a/mmseg/models/necks/fpn.py +++ b/mmseg/models/necks/fpn.py @@ -175,10 +175,11 @@ class FPN(BaseModule): # In some cases, fixing `scale factor` (e.g. 2) is preferred, but # it cannot co-exist with `size` in `F.interpolate`. if 'scale_factor' in self.upsample_cfg: - laterals[i - 1] += resize(laterals[i], **self.upsample_cfg) + laterals[i - 1] = laterals[i - 1] + resize( + laterals[i], **self.upsample_cfg) else: prev_shape = laterals[i - 1].shape[2:] - laterals[i - 1] += resize( + laterals[i - 1] = laterals[i - 1] + resize( laterals[i], size=prev_shape, **self.upsample_cfg) # build outputs diff --git a/tests/test_models/test_necks/test_fpn.py b/tests/test_models/test_necks/test_fpn.py index c64c23a4f..c29400602 100644 --- a/tests/test_models/test_necks/test_fpn.py +++ b/tests/test_models/test_necks/test_fpn.py @@ -17,3 +17,14 @@ def test_fpn(): assert outputs[1].shape == torch.Size([1, 64, 28, 28]) assert outputs[2].shape == torch.Size([1, 64, 14, 14]) assert outputs[3].shape == torch.Size([1, 64, 7, 7]) + + fpn = FPN( + in_channels, + 64, + len(in_channels), + upsample_cfg=dict(mode='nearest', scale_factor=2.0)) + outputs = fpn(inputs) + assert outputs[0].shape == torch.Size([1, 64, 56, 56]) + assert outputs[1].shape == torch.Size([1, 64, 28, 28]) + assert outputs[2].shape == torch.Size([1, 64, 14, 14]) + assert outputs[3].shape == torch.Size([1, 64, 7, 7])