From f3cd44bebf5ec76d744016276ba7f6408319f55e Mon Sep 17 00:00:00 2001 From: MengzhangLI Date: Fri, 14 Oct 2022 15:37:35 +0800 Subject: [PATCH] [Fix] Fix ResizeToMultiple transform in MMSeg 1.x (#2185) --- configs/segformer/README.md | 21 +++++++-------------- mmseg/datasets/transforms/transforms.py | 4 ++-- tests/test_datasets/test_transform.py | 2 +- 3 files changed, 10 insertions(+), 17 deletions(-) diff --git a/configs/segformer/README.md b/configs/segformer/README.md index 655c2e92a..be64099da 100644 --- a/configs/segformer/README.md +++ b/configs/segformer/README.md @@ -77,20 +77,13 @@ using `AlignedResize`, you can change the dataset pipeline like this: ```python test_pipeline = [ dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(2048, 512), - # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - # resize image to multiple of 32, improve SegFormer by 0.5-1.0 mIoU. - dict(type='ResizeToMultiple', size_divisor=32), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) + dict(type='Resize', scale=(2048, 512), keep_ratio=True), + # resize image to multiple of 32, improve SegFormer by 0.5-1.0 mIoU. + dict(type='ResizeToMultiple', size_divisor=32), + # add loading annotation after ``Resize`` because ground truth + # does not need to do resize data transform + dict(type='LoadAnnotations', reduce_zero_label=True), + dict(type='PackSegInputs') ] ``` diff --git a/mmseg/datasets/transforms/transforms.py b/mmseg/datasets/transforms/transforms.py index 8b753c599..3cb173539 100644 --- a/mmseg/datasets/transforms/transforms.py +++ b/mmseg/datasets/transforms/transforms.py @@ -59,8 +59,8 @@ class ResizeToMultiple(BaseTransform): if self.interpolation else 'bilinear') results['img'] = img - results['img_shape'] = img.shape - results['pad_shape'] = img.shape + results['img_shape'] = img.shape[:2] + results['pad_shape'] = img.shape[:2] # Align segmentation map to multiple of size divisor. for key in results.get('seg_fields', []): diff --git a/tests/test_datasets/test_transform.py b/tests/test_datasets/test_transform.py index bf4accf67..f314050b7 100644 --- a/tests/test_datasets/test_transform.py +++ b/tests/test_datasets/test_transform.py @@ -678,4 +678,4 @@ def test_resize_to_multiple(): results = transform(results) assert results['img'].shape == (224, 256, 3) assert results['gt_semantic_seg'].shape == (224, 256) - assert results['img_shape'] == (224, 256, 3) + assert results['img_shape'] == (224, 256)