[Feature] Support ICNet (#884)

* add icnet backbone

* add icnet head

* add icnet configs

* nclass -> num_classes

* Support ICNet

* ICNet

* ICNet

* Add ICNeck

* Add ICNeck

* Add ICNeck

* Add ICNeck

* Adding unittest

* Uploading models & logs

* Uploading models & logs

* add comment

* smaller test_swin.py

* try to delete test_swin.py

* delete test_unet.py

* delete test_unet.py

* temp

* smaller test_unet.py

Co-authored-by: Junjun2016 <hejunjun@sjtu.edu.cn>
pull/1801/head
MengzhangLI 2021-10-01 00:31:57 +08:00 committed by GitHub
parent 84edf6c190
commit 7db1cbb181
30 changed files with 953 additions and 112 deletions

View File

@ -79,6 +79,7 @@ Supported methods:
- [x] [PSANet (ECCV'2018)](configs/psanet)
- [x] [DeepLabV3+ (CVPR'2018)](configs/deeplabv3plus)
- [x] [UPerNet (ECCV'2018)](configs/upernet)
- [x] [ICNet (ECCV'2018)](configs/icnet)
- [x] [NonLocal Net (CVPR'2018)](configs/nonlocal_net)
- [x] [EncNet (CVPR'2018)](configs/encnet)
- [x] [Semantic FPN (CVPR'2019)](configs/sem_fpn)

View File

@ -78,6 +78,7 @@ MMSegmentation 是一个基于 PyTorch 的语义分割开源工具箱。它是 O
- [x] [PSANet (ECCV'2018)](configs/psanet)
- [x] [DeepLabV3+ (CVPR'2018)](configs/deeplabv3plus)
- [x] [UPerNet (ECCV'2018)](configs/upernet)
- [x] [ICNet (ECCV'2018)](configs/icnet)
- [x] [NonLocal Net (CVPR'2018)](configs/nonlocal_net)
- [x] [EncNet (CVPR'2018)](configs/encnet)
- [x] [Semantic FPN (CVPR'2019)](configs/sem_fpn)

View File

@ -0,0 +1,35 @@
_base_ = './cityscapes.py'
img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
crop_size = (832, 832)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations'),
dict(type='Resize', img_scale=(2048, 1024), ratio_range=(0.5, 2.0)),
dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
dict(type='RandomFlip', prob=0.5),
dict(type='PhotoMetricDistortion'),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
dict(type='DefaultFormatBundle'),
dict(type='Collect', keys=['img', 'gt_semantic_seg']),
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(
type='MultiScaleFlipAug',
img_scale=(2048, 1024),
# img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
flip=False,
transforms=[
dict(type='Resize', keep_ratio=True),
dict(type='RandomFlip'),
dict(type='Normalize', **img_norm_cfg),
dict(type='ImageToTensor', keys=['img']),
dict(type='Collect', keys=['img']),
])
]
data = dict(
train=dict(pipeline=train_pipeline),
val=dict(pipeline=test_pipeline),
test=dict(pipeline=test_pipeline))

View File

@ -0,0 +1,74 @@
# model settings
norm_cfg = dict(type='SyncBN', requires_grad=True)
model = dict(
type='EncoderDecoder',
backbone=dict(
type='ICNet',
backbone_cfg=dict(
type='ResNetV1c',
in_channels=3,
depth=50,
num_stages=4,
out_indices=(0, 1, 2, 3),
dilations=(1, 1, 2, 4),
strides=(1, 2, 1, 1),
norm_cfg=norm_cfg,
norm_eval=False,
style='pytorch',
contract_dilation=True),
in_channels=3,
layer_channels=(512, 2048),
light_branch_middle_channels=32,
psp_out_channels=512,
out_channels=(64, 256, 256),
norm_cfg=norm_cfg,
align_corners=False,
),
neck=dict(
type='ICNeck',
in_channels=(64, 256, 256),
out_channels=128,
norm_cfg=norm_cfg,
align_corners=False),
decode_head=dict(
type='FCNHead',
in_channels=128,
channels=128,
num_convs=1,
in_index=2,
dropout_ratio=0,
num_classes=19,
norm_cfg=norm_cfg,
concat_input=False,
align_corners=False,
loss_decode=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
auxiliary_head=[
dict(
type='FCNHead',
in_channels=128,
channels=128,
num_convs=1,
num_classes=19,
in_index=0,
norm_cfg=norm_cfg,
concat_input=False,
align_corners=False,
loss_decode=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
dict(
type='FCNHead',
in_channels=128,
channels=128,
num_convs=1,
num_classes=19,
in_index=1,
norm_cfg=norm_cfg,
concat_input=False,
align_corners=False,
loss_decode=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
],
# model training and testing settings
train_cfg=dict(),
test_cfg=dict(mode='whole'))

View File

@ -32,7 +32,7 @@
| BiSeNetV1 (No Pretrain) | R-18-D32 | 1024x1024 | 160000 | 5.69 | 31.77 | 74.44 | 77.05 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/bisenetv1/bisenetv1_r18-d32_4x4_1024x1024_160k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r18-d32_4x4_1024x1024_160k_cityscapes/bisenetv1_r18-d32_4x4_1024x1024_160k_cityscapes_20210922_172239-c55e78e2.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r18-d32_4x4_1024x1024_160k_cityscapes/bisenetv1_r18-d32_4x4_1024x1024_160k_cityscapes_20210922_172239.log.json) |
| BiSeNetV1| R-18-D32 | 1024x1024 | 160000 | 5.69 | 31.77 | 74.37 | 76.91 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/bisenetv1/bisenetv1_r18-d32_in1k-pre_4x4_1024x1024_160k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r18-d32_in1k-pre_4x4_1024x1024_160k_cityscapes/bisenetv1_r18-d32_in1k-pre_4x4_1024x1024_160k_cityscapes_20210905_220251-8ba80eff.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r18-d32_in1k-pre_4x4_1024x1024_160k_cityscapes/bisenetv1_r18-d32_in1k-pre_4x4_1024x1024_160k_cityscapes_20210905_220251.log.json) |
| BiSeNetV1 (4x8) | R-18-D32 | 1024x1024 | 160000 | 11.17 | 31.77 | 75.16 | 77.24 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/bisenetv1/bisenetv1_r18-d32_in1k-pre_4x8_1024x1024_160k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r18-d32_in1k-pre_4x8_1024x1024_160k_cityscapes/bisenetv1_r18-d32_in1k-pre_4x8_1024x1024_160k_cityscapes_20210905_220322-bb8db75f.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r18-d32_in1k-pre_4x8_1024x1024_160k_cityscapes/bisenetv1_r18-d32_in1k-pre_4x8_1024x1024_160k_cityscapes_20210905_220322.log.json) |
| BiSeNetV1 (No Pretrain) | R-50-D32 | 1024x1024 | 160000 | 3.3 | 7.71 | 76.92 | 78.87 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/bisenetv1/bisenetv1_r50-d32_4x4_1024x1024_160k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r50-d32_4x4_1024x1024_160k_cityscapes/bisenetv1_r50-d32_4x4_1024x1024_160k_cityscapes_20210923_222639-7b28a2a6.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r50-d32_4x4_1024x1024_160k_cityscapes/bisenetv1_r50-d32_4x4_1024x1024_160k_cityscapes_20210923_222639.log.json) |
| BiSeNetV1 (No Pretrain) | R-50-D32 | 1024x1024 | 160000 | 15.39 | 7.71 | 76.92 | 78.87 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/bisenetv1/bisenetv1_r50-d32_4x4_1024x1024_160k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r50-d32_4x4_1024x1024_160k_cityscapes/bisenetv1_r50-d32_4x4_1024x1024_160k_cityscapes_20210923_222639-7b28a2a6.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r50-d32_4x4_1024x1024_160k_cityscapes/bisenetv1_r50-d32_4x4_1024x1024_160k_cityscapes_20210923_222639.log.json) |
| BiSeNetV1 | R-50-D32 | 1024x1024 | 160000 | 15.39 | 7.71 | 77.68 | 79.57 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/bisenetv1/bisenetv1_r50-d32_in1k-pre_4x4_1024x1024_160k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r50-d32_in1k-pre_4x4_1024x1024_160k_cityscapes/bisenetv1_r50-d32_in1k-pre_4x4_1024x1024_160k_cityscapes_20210917_234628-8b304447.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r50-d32_in1k-pre_4x4_1024x1024_160k_cityscapes/bisenetv1_r50-d32_in1k-pre_4x4_1024x1024_160k_cityscapes_20210917_234628.log.json) |
Note:

View File

@ -92,7 +92,7 @@ Models:
batch size: 1
mode: FP32
resolution: (1024,1024)
memory (GB): 3.3
memory (GB): 15.39
Results:
- Task: Semantic Segmentation
Dataset: Cityscapes

View File

@ -0,0 +1,45 @@
# ICNet for Real-time Semantic Segmentation on High-resolution Images
## Introduction
<!-- [ALGORITHM] -->
<a href="https://github.com/hszhao/ICNet">Official Repo</a>
<a href="https://github.com/open-mmlab/mmsegmentation/blob/v0.18.0/mmseg/models/necks/ic_neck.py#L77">Code Snippet</a>
<details>
<summary align="right"><a href="https://arxiv.org/abs/1704.08545">ICNet (ECCV'2018)</a></summary>
```latext
@inproceedings{zhao2018icnet,
title={Icnet for real-time semantic segmentation on high-resolution images},
author={Zhao, Hengshuang and Qi, Xiaojuan and Shen, Xiaoyong and Shi, Jianping and Jia, Jiaya},
booktitle={Proceedings of the European conference on computer vision (ECCV)},
pages={405--420},
year={2018}
}
```
</details>
## Results and models
### Cityscapes
| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download |
| ------ | ---------- | --------- | ------: | -------- | -------------- | ----: | ------------: | -------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| ICNet | R-18-D8 | 832x832 | 80000 | 1.70 | 27.12 | 68.14 | 70.16 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/icnet/icnet_r18-d8_832x832_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r18-d8_832x832_80k_cityscapes/icnet_r18-d8_832x832_80k_cityscapes_20210925_225521-2e36638d.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r18-d8_832x832_80k_cityscapes/icnet_r18-d8_832x832_80k_cityscapes_20210925_225521.log.json) |
| ICNet | R-18-D8 | 832x832 | 160000 | - | - | 71.64 | 74.18 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/icnet/icnet_r18-d8_832x832_160k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r18-d8_832x832_160k_cityscapes/icnet_r18-d8_832x832_160k_cityscapes_20210925_230153-2c6eb6e0.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r18-d8_832x832_160k_cityscapes/icnet_r18-d8_832x832_160k_cityscapes_20210925_230153.log.json) |
| ICNet (in1k-pre) | R-18-D8 | 832x832 | 80000 | - | - | 72.51 | 74.78 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/icnet/icnet_r18-d8_in1k-pre_832x832_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r18-d8_in1k-pre_832x832_80k_cityscapes/icnet_r18-d8_in1k-pre_832x832_80k_cityscapes_20210925_230354-1cbe3022.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r18-d8_in1k-pre_832x832_80k_cityscapes/icnet_r18-d8_in1k-pre_832x832_80k_cityscapes_20210925_230354.log.json) |
| ICNet (in1k-pre) | R-18-D8 | 832x832 | 160000 | - | - | 74.43 | 76.72 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/icnet/icnet_r18-d8_in1k-pre_832x832_160k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r18-d8_in1k-pre_832x832_160k_cityscapes/icnet_r18-d8_in1k-pre_832x832_160k_cityscapes_20210926_052702-619c8ae1.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r18-d8_in1k-pre_832x832_160k_cityscapes/icnet_r18-d8_in1k-pre_832x832_160k_cityscapes_20210926_052702.log.json) |
| ICNet | R-50-D8 | 832x832 | 80000 | 2.53 | 20.08 | 68.91 | 69.72 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/icnet/icnet_r50-d8_832x832_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r50-d8_832x832_80k_cityscapes/icnet_r50-d8_832x832_80k_cityscapes_20210926_044625-c6407341.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r50-d8_832x832_80k_cityscapes/icnet_r50-d8_832x832_80k_cityscapes_20210926_044625.log.json) |
| ICNet | R-50-D8 | 832x832 | 160000 | - | - | 73.82 | 75.67 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/icnet/icnet_r50-d8_832x832_160k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r50-d8_832x832_160k_cityscapes/icnet_r50-d8_832x832_160k_cityscapes_20210925_232612-a95f0d4e.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r50-d8_832x832_160k_cityscapes/icnet_r50-d8_832x832_160k_cityscapes_20210925_232612.log.json) |
| ICNet (in1k-pre) | R-50-D8 | 832x832 | 80000 | - | - | 74.58 | 76.41 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/icnet/icnet_r50-d8_in1k-pre_832x832_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r50-d8_in1k-pre_832x832_80k_cityscapes/icnet_r50-d8_in1k-pre_832x832_80k_cityscapes_20210926_032943-1743dc7b.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r50-d8_in1k-pre_832x832_80k_cityscapes/icnet_r50-d8_in1k-pre_832x832_80k_cityscapes_20210926_032943.log.json) |
| ICNet (in1k-pre) | R-50-D8 | 832x832 | 160000 | - | - | 76.29 | 78.09 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/icnet/icnet_r50-d8_in1k-pre_832x832_160k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r50-d8_in1k-pre_832x832_160k_cityscapes/icnet_r50-d8_in1k-pre_832x832_160k_cityscapes_20210926_042715-ce310aea.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r50-d8_in1k-pre_832x832_160k_cityscapes/icnet_r50-d8_in1k-pre_832x832_160k_cityscapes_20210926_042715.log.json) |
| ICNet | R-101-D8 | 832x832 | 80000 | 3.08 | 16.95 | 70.28 | 71.95 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/icnet/icnet_r101-d8_832x832_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r101-d8_832x832_80k_cityscapes/icnet_r101-d8_832x832_80k_cityscapes_20210926_072447-b52f936e.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r101-d8_832x832_80k_cityscapes/icnet_r101-d8_832x832_80k_cityscapes_20210926_072447.log.json) |
| ICNet | R-101-D8 | 832x832 | 160000 | - | - | 73.80 | 76.10 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/icnet/icnet_r101-d8_832x832_160k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r101-d8_832x832_160k_cityscapes/icnet_r101-d8_832x832_160k_cityscapes_20210926_092350-3a1ebf1a.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r101-d8_832x832_160k_cityscapes/icnet_r101-d8_832x832_160k_cityscapes_20210926_092350.log.json) |
| ICNet (in1k-pre) | R-101-D8 | 832x832 | 80000 | - | - | 75.57 | 77.86 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/icnet/icnet_r101-d8_in1k-pre_832x832_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r101-d8_in1k-pre_832x832_80k_cityscapes/icnet_r101-d8_in1k-pre_832x832_80k_cityscapes_20210926_020414-7ceb12c5.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r101-d8_in1k-pre_832x832_80k_cityscapes/icnet_r101-d8_in1k-pre_832x832_80k_cityscapes_20210926_020414.log.json) |
| ICNet (in1k-pre) | R-101-D8 | 832x832 | 160000 | - | - | 76.15 | 77.98 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/icnet/icnet_r101-d8_in1k-pre_832x832_160k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r101-d8_in1k-pre_832x832_160k_cityscapes/icnet_r101-d8_in1k-pre_832x832_160k_cityscapes_20210925_232612-9484ae8a.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r101-d8_in1k-pre_832x832_160k_cityscapes/icnet_r101-d8_in1k-pre_832x832_160k_cityscapes_20210925_232612.log.json) |
Note: `in1k-pre` means pretrained model is used.

View File

@ -0,0 +1,207 @@
Collections:
- Name: icnet
Metadata:
Training Data:
- Cityscapes
Paper:
URL: https://arxiv.org/abs/1704.08545
Title: ICNet for Real-time Semantic Segmentation on High-resolution Images
README: configs/icnet/README.md
Code:
URL: https://github.com/open-mmlab/mmsegmentation/blob/v0.18.0/mmseg/models/necks/ic_neck.py#L77
Version: v0.18.0
Converted From:
Code: https://github.com/hszhao/ICNet
Models:
- Name: icnet_r18-d8_832x832_80k_cityscapes
In Collection: icnet
Metadata:
backbone: R-18-D8
crop size: (832,832)
lr schd: 80000
inference time (ms/im):
- value: 36.87
hardware: V100
backend: PyTorch
batch size: 1
mode: FP32
resolution: (832,832)
memory (GB): 1.7
Results:
- Task: Semantic Segmentation
Dataset: Cityscapes
Metrics:
mIoU: 68.14
mIoU(ms+flip): 70.16
Config: configs/icnet/icnet_r18-d8_832x832_80k_cityscapes.py
Weights: https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r18-d8_832x832_80k_cityscapes/icnet_r18-d8_832x832_80k_cityscapes_20210925_225521-2e36638d.pth
- Name: icnet_r18-d8_832x832_160k_cityscapes
In Collection: icnet
Metadata:
backbone: R-18-D8
crop size: (832,832)
lr schd: 160000
Results:
- Task: Semantic Segmentation
Dataset: Cityscapes
Metrics:
mIoU: 71.64
mIoU(ms+flip): 74.18
Config: configs/icnet/icnet_r18-d8_832x832_160k_cityscapes.py
Weights: https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r18-d8_832x832_160k_cityscapes/icnet_r18-d8_832x832_160k_cityscapes_20210925_230153-2c6eb6e0.pth
- Name: icnet_r18-d8_in1k-pre_832x832_80k_cityscapes
In Collection: icnet
Metadata:
backbone: R-18-D8
crop size: (832,832)
lr schd: 80000
Results:
- Task: Semantic Segmentation
Dataset: Cityscapes
Metrics:
mIoU: 72.51
mIoU(ms+flip): 74.78
Config: configs/icnet/icnet_r18-d8_in1k-pre_832x832_80k_cityscapes.py
Weights: https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r18-d8_in1k-pre_832x832_80k_cityscapes/icnet_r18-d8_in1k-pre_832x832_80k_cityscapes_20210925_230354-1cbe3022.pth
- Name: icnet_r18-d8_in1k-pre_832x832_160k_cityscapes
In Collection: icnet
Metadata:
backbone: R-18-D8
crop size: (832,832)
lr schd: 160000
Results:
- Task: Semantic Segmentation
Dataset: Cityscapes
Metrics:
mIoU: 74.43
mIoU(ms+flip): 76.72
Config: configs/icnet/icnet_r18-d8_in1k-pre_832x832_160k_cityscapes.py
Weights: https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r18-d8_in1k-pre_832x832_160k_cityscapes/icnet_r18-d8_in1k-pre_832x832_160k_cityscapes_20210926_052702-619c8ae1.pth
- Name: icnet_r50-d8_832x832_80k_cityscapes
In Collection: icnet
Metadata:
backbone: R-50-D8
crop size: (832,832)
lr schd: 80000
inference time (ms/im):
- value: 49.8
hardware: V100
backend: PyTorch
batch size: 1
mode: FP32
resolution: (832,832)
memory (GB): 2.53
Results:
- Task: Semantic Segmentation
Dataset: Cityscapes
Metrics:
mIoU: 68.91
mIoU(ms+flip): 69.72
Config: configs/icnet/icnet_r50-d8_832x832_80k_cityscapes.py
Weights: https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r50-d8_832x832_80k_cityscapes/icnet_r50-d8_832x832_80k_cityscapes_20210926_044625-c6407341.pth
- Name: icnet_r50-d8_832x832_160k_cityscapes
In Collection: icnet
Metadata:
backbone: R-50-D8
crop size: (832,832)
lr schd: 160000
Results:
- Task: Semantic Segmentation
Dataset: Cityscapes
Metrics:
mIoU: 73.82
mIoU(ms+flip): 75.67
Config: configs/icnet/icnet_r50-d8_832x832_160k_cityscapes.py
Weights: https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r50-d8_832x832_160k_cityscapes/icnet_r50-d8_832x832_160k_cityscapes_20210925_232612-a95f0d4e.pth
- Name: icnet_r50-d8_in1k-pre_832x832_80k_cityscapes
In Collection: icnet
Metadata:
backbone: R-50-D8
crop size: (832,832)
lr schd: 80000
Results:
- Task: Semantic Segmentation
Dataset: Cityscapes
Metrics:
mIoU: 74.58
mIoU(ms+flip): 76.41
Config: configs/icnet/icnet_r50-d8_in1k-pre_832x832_80k_cityscapes.py
Weights: https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r50-d8_in1k-pre_832x832_80k_cityscapes/icnet_r50-d8_in1k-pre_832x832_80k_cityscapes_20210926_032943-1743dc7b.pth
- Name: icnet_r50-d8_in1k-pre_832x832_160k_cityscapes
In Collection: icnet
Metadata:
backbone: R-50-D8
crop size: (832,832)
lr schd: 160000
Results:
- Task: Semantic Segmentation
Dataset: Cityscapes
Metrics:
mIoU: 76.29
mIoU(ms+flip): 78.09
Config: configs/icnet/icnet_r50-d8_in1k-pre_832x832_160k_cityscapes.py
Weights: https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r50-d8_in1k-pre_832x832_160k_cityscapes/icnet_r50-d8_in1k-pre_832x832_160k_cityscapes_20210926_042715-ce310aea.pth
- Name: icnet_r101-d8_832x832_80k_cityscapes
In Collection: icnet
Metadata:
backbone: R-101-D8
crop size: (832,832)
lr schd: 80000
inference time (ms/im):
- value: 59.0
hardware: V100
backend: PyTorch
batch size: 1
mode: FP32
resolution: (832,832)
memory (GB): 3.08
Results:
- Task: Semantic Segmentation
Dataset: Cityscapes
Metrics:
mIoU: 70.28
mIoU(ms+flip): 71.95
Config: configs/icnet/icnet_r101-d8_832x832_80k_cityscapes.py
Weights: https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r101-d8_832x832_80k_cityscapes/icnet_r101-d8_832x832_80k_cityscapes_20210926_072447-b52f936e.pth
- Name: icnet_r101-d8_832x832_160k_cityscapes
In Collection: icnet
Metadata:
backbone: R-101-D8
crop size: (832,832)
lr schd: 160000
Results:
- Task: Semantic Segmentation
Dataset: Cityscapes
Metrics:
mIoU: 73.8
mIoU(ms+flip): 76.1
Config: configs/icnet/icnet_r101-d8_832x832_160k_cityscapes.py
Weights: https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r101-d8_832x832_160k_cityscapes/icnet_r101-d8_832x832_160k_cityscapes_20210926_092350-3a1ebf1a.pth
- Name: icnet_r101-d8_in1k-pre_832x832_80k_cityscapes
In Collection: icnet
Metadata:
backbone: R-101-D8
crop size: (832,832)
lr schd: 80000
Results:
- Task: Semantic Segmentation
Dataset: Cityscapes
Metrics:
mIoU: 75.57
mIoU(ms+flip): 77.86
Config: configs/icnet/icnet_r101-d8_in1k-pre_832x832_80k_cityscapes.py
Weights: https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r101-d8_in1k-pre_832x832_80k_cityscapes/icnet_r101-d8_in1k-pre_832x832_80k_cityscapes_20210926_020414-7ceb12c5.pth
- Name: icnet_r101-d8_in1k-pre_832x832_160k_cityscapes
In Collection: icnet
Metadata:
backbone: R-101-D8
crop size: (832,832)
lr schd: 160000
Results:
- Task: Semantic Segmentation
Dataset: Cityscapes
Metrics:
mIoU: 76.15
mIoU(ms+flip): 77.98
Config: configs/icnet/icnet_r101-d8_in1k-pre_832x832_160k_cityscapes.py
Weights: https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r101-d8_in1k-pre_832x832_160k_cityscapes/icnet_r101-d8_in1k-pre_832x832_160k_cityscapes_20210925_232612-9484ae8a.pth

View File

@ -0,0 +1,2 @@
_base_ = './icnet_r50-d8_832x832_160k_cityscapes.py'
model = dict(backbone=dict(backbone_cfg=dict(depth=101)))

View File

@ -0,0 +1,2 @@
_base_ = './icnet_r50-d8_832x832_80k_cityscapes.py'
model = dict(backbone=dict(backbone_cfg=dict(depth=101)))

View File

@ -0,0 +1,7 @@
_base_ = './icnet_r50-d8_832x832_160k_cityscapes.py'
model = dict(
backbone=dict(
backbone_cfg=dict(
depth=101,
init_cfg=dict(
type='Pretrained', checkpoint='open-mmlab://resnet101_v1c'))))

View File

@ -0,0 +1,7 @@
_base_ = './icnet_r50-d8_832x832_80k_cityscapes.py'
model = dict(
backbone=dict(
backbone_cfg=dict(
depth=101,
init_cfg=dict(
type='Pretrained', checkpoint='open-mmlab://resnet101_v1c'))))

View File

@ -0,0 +1,3 @@
_base_ = './icnet_r50-d8_832x832_160k_cityscapes.py'
model = dict(
backbone=dict(layer_channels=(128, 512), backbone_cfg=dict(depth=18)))

View File

@ -0,0 +1,3 @@
_base_ = './icnet_r50-d8_832x832_80k_cityscapes.py'
model = dict(
backbone=dict(layer_channels=(128, 512), backbone_cfg=dict(depth=18)))

View File

@ -0,0 +1,8 @@
_base_ = './icnet_r50-d8_832x832_160k_cityscapes.py'
model = dict(
backbone=dict(
layer_channels=(128, 512),
backbone_cfg=dict(
depth=18,
init_cfg=dict(
type='Pretrained', checkpoint='open-mmlab://resnet18_v1c'))))

View File

@ -0,0 +1,8 @@
_base_ = './icnet_r50-d8_832x832_80k_cityscapes.py'
model = dict(
backbone=dict(
layer_channels=(128, 512),
backbone_cfg=dict(
depth=18,
init_cfg=dict(
type='Pretrained', checkpoint='open-mmlab://resnet18_v1c'))))

View File

@ -0,0 +1,5 @@
_base_ = [
'../_base_/models/icnet_r50-d8.py',
'../_base_/datasets/cityscapes_832x832.py', '../_base_/default_runtime.py',
'../_base_/schedules/schedule_160k.py'
]

View File

@ -0,0 +1,5 @@
_base_ = [
'../_base_/models/icnet_r50-d8.py',
'../_base_/datasets/cityscapes_832x832.py', '../_base_/default_runtime.py',
'../_base_/schedules/schedule_80k.py'
]

View File

@ -0,0 +1,6 @@
_base_ = './icnet_r50-d8_832x832_160k_cityscapes.py'
model = dict(
backbone=dict(
backbone_cfg=dict(
init_cfg=dict(
type='Pretrained', checkpoint='open-mmlab://resnet50_v1c'))))

View File

@ -0,0 +1,6 @@
_base_ = './icnet_r50-d8_832x832_80k_cityscapes.py'
model = dict(
backbone=dict(
backbone_cfg=dict(
init_cfg=dict(
type='Pretrained', checkpoint='open-mmlab://resnet50_v1c'))))

View File

@ -4,6 +4,7 @@ from .bisenetv2 import BiSeNetV2
from .cgnet import CGNet
from .fast_scnn import FastSCNN
from .hrnet import HRNet
from .icnet import ICNet
from .mit import MixVisionTransformer
from .mobilenet_v2 import MobileNetV2
from .mobilenet_v3 import MobileNetV3
@ -18,5 +19,5 @@ __all__ = [
'ResNet', 'ResNetV1c', 'ResNetV1d', 'ResNeXt', 'HRNet', 'FastSCNN',
'ResNeSt', 'MobileNetV2', 'UNet', 'CGNet', 'MobileNetV3',
'VisionTransformer', 'SwinTransformer', 'MixVisionTransformer',
'BiSeNetV1', 'BiSeNetV2'
'BiSeNetV1', 'BiSeNetV2', 'ICNet'
]

View File

@ -0,0 +1,165 @@
import torch
import torch.nn as nn
from mmcv.cnn import ConvModule
from mmcv.runner import BaseModule
from mmseg.ops import resize
from ..builder import BACKBONES, build_backbone
from ..decode_heads.psp_head import PPM
@BACKBONES.register_module()
class ICNet(BaseModule):
"""ICNet for Real-Time Semantic Segmentation on High-Resolution Images.
This backbone is the implementation of
`ICNet <https://arxiv.org/abs/1704.08545>`_.
Args:
backbone_cfg (dict): Config dict to build backbone. Usually it is
ResNet but it can also be other backbones.
in_channels (int): The number of input image channels. Default: 3.
layer_channels (Sequence[int]): The numbers of feature channels at
layer 2 and layer 4 in ResNet. It can also be other backbones.
Default: (512, 2048).
light_branch_middle_channels (int): The number of channels of the
middle layer in light branch. Default: 32.
psp_out_channels (int): The number of channels of the output of PSP
module. Default: 512.
out_channels (Sequence[int]): The numbers of output feature channels
at each branches. Default: (64, 256, 256).
pool_scales (tuple[int]): Pooling scales used in Pooling Pyramid
Module. Default: (1, 2, 3, 6).
conv_cfg (dict): Dictionary to construct and config conv layer.
Default: None.
norm_cfg (dict): Dictionary to construct and config norm layer.
Default: dict(type='BN').
act_cfg (dict): Dictionary to construct and config act layer.
Default: dict(type='ReLU').
align_corners (bool): align_corners argument of F.interpolate.
Default: False.
init_cfg (dict or list[dict], optional): Initialization config dict.
Default: None.
"""
def __init__(self,
backbone_cfg,
in_channels=3,
layer_channels=(512, 2048),
light_branch_middle_channels=32,
psp_out_channels=512,
out_channels=(64, 256, 256),
pool_scales=(1, 2, 3, 6),
conv_cfg=None,
norm_cfg=dict(type='BN', requires_grad=True),
act_cfg=dict(type='ReLU'),
align_corners=False,
init_cfg=None):
if backbone_cfg is None:
raise TypeError('backbone_cfg must be passed from config file!')
if init_cfg is None:
init_cfg = [
dict(type='Kaiming', mode='fan_out', layer='Conv2d'),
dict(type='Constant', val=1, layer='_BatchNorm'),
dict(type='Normal', mean=0.01, layer='Linear')
]
super(ICNet, self).__init__(init_cfg=init_cfg)
self.align_corners = align_corners
self.backbone = build_backbone(backbone_cfg)
# Note: Default `ceil_mode` is false in nn.MaxPool2d, set
# `ceil_mode=True` to keep information in the corner of feature map.
self.backbone.maxpool = nn.MaxPool2d(
kernel_size=3, stride=2, padding=1, ceil_mode=True)
self.psp_modules = PPM(
pool_scales=pool_scales,
in_channels=layer_channels[1],
channels=psp_out_channels,
conv_cfg=conv_cfg,
norm_cfg=norm_cfg,
act_cfg=act_cfg,
align_corners=align_corners)
self.psp_bottleneck = ConvModule(
layer_channels[1] + len(pool_scales) * psp_out_channels,
psp_out_channels,
3,
padding=1,
conv_cfg=conv_cfg,
norm_cfg=norm_cfg,
act_cfg=act_cfg)
self.conv_sub1 = nn.Sequential(
ConvModule(
in_channels=in_channels,
out_channels=light_branch_middle_channels,
kernel_size=3,
stride=2,
padding=1,
conv_cfg=conv_cfg,
norm_cfg=norm_cfg),
ConvModule(
in_channels=light_branch_middle_channels,
out_channels=light_branch_middle_channels,
kernel_size=3,
stride=2,
padding=1,
conv_cfg=conv_cfg,
norm_cfg=norm_cfg),
ConvModule(
in_channels=light_branch_middle_channels,
out_channels=out_channels[0],
kernel_size=3,
stride=2,
padding=1,
conv_cfg=conv_cfg,
norm_cfg=norm_cfg))
self.conv_sub2 = ConvModule(
layer_channels[0],
out_channels[1],
1,
conv_cfg=conv_cfg,
norm_cfg=norm_cfg)
self.conv_sub4 = ConvModule(
psp_out_channels,
out_channels[2],
1,
conv_cfg=conv_cfg,
norm_cfg=norm_cfg)
def forward(self, x):
output = []
# sub 1
output.append(self.conv_sub1(x))
# sub 2
x = resize(
x,
scale_factor=0.5,
mode='bilinear',
align_corners=self.align_corners)
x = self.backbone.stem(x)
x = self.backbone.maxpool(x)
x = self.backbone.layer1(x)
x = self.backbone.layer2(x)
output.append(self.conv_sub2(x))
# sub 4
x = resize(
x,
scale_factor=0.5,
mode='bilinear',
align_corners=self.align_corners)
x = self.backbone.layer3(x)
x = self.backbone.layer4(x)
psp_outs = self.psp_modules(x) + [x]
psp_outs = torch.cat(psp_outs, dim=1)
x = self.psp_bottleneck(psp_outs)
output.append(self.conv_sub4(x))
return output

View File

@ -1,6 +1,7 @@
# Copyright (c) OpenMMLab. All rights reserved.
from .fpn import FPN
from .ic_neck import ICNeck
from .mla_neck import MLANeck
from .multilevel_neck import MultiLevelNeck
__all__ = ['FPN', 'MultiLevelNeck', 'MLANeck']
__all__ = ['FPN', 'MultiLevelNeck', 'MLANeck', 'ICNeck']

View File

@ -0,0 +1,147 @@
import torch.nn.functional as F
from mmcv.cnn import ConvModule
from mmcv.runner import BaseModule
from mmseg.ops import resize
from ..builder import NECKS
class CascadeFeatureFusion(BaseModule):
"""Cascade Feature Fusion Unit in ICNet.
Args:
low_channels (int): The number of input channels for
low resolution feature map.
high_channels (int): The number of input channels for
high resolution feature map.
out_channels (int): The number of output channels.
conv_cfg (dict): Dictionary to construct and config conv layer.
Default: None.
norm_cfg (dict): Dictionary to construct and config norm layer.
Default: dict(type='BN').
act_cfg (dict): Dictionary to construct and config act layer.
Default: dict(type='ReLU').
align_corners (bool): align_corners argument of F.interpolate.
Default: False.
init_cfg (dict or list[dict], optional): Initialization config dict.
Default: None.
Returns:
x (Tensor): The output tensor of shape (N, out_channels, H, W).
x_low (Tensor): The output tensor of shape (N, out_channels, H, W)
for Cascade Label Guidance in auxiliary heads.
"""
def __init__(self,
low_channels,
high_channels,
out_channels,
conv_cfg=None,
norm_cfg=dict(type='BN'),
act_cfg=dict(type='ReLU'),
align_corners=False,
init_cfg=None):
super(CascadeFeatureFusion, self).__init__(init_cfg=init_cfg)
self.align_corners = align_corners
self.conv_low = ConvModule(
low_channels,
out_channels,
3,
padding=2,
dilation=2,
conv_cfg=conv_cfg,
norm_cfg=norm_cfg,
act_cfg=act_cfg)
self.conv_high = ConvModule(
high_channels,
out_channels,
1,
conv_cfg=conv_cfg,
norm_cfg=norm_cfg,
act_cfg=act_cfg)
def forward(self, x_low, x_high):
x_low = resize(
x_low,
size=x_high.size()[2:],
mode='bilinear',
align_corners=self.align_corners)
# Note: Different from original paper, `x_low` is underwent
# `self.conv_low` rather than another 1x1 conv classifier
# before being used for auxiliary head.
x_low = self.conv_low(x_low)
x_high = self.conv_high(x_high)
x = x_low + x_high
x = F.relu(x, inplace=True)
return x, x_low
@NECKS.register_module()
class ICNeck(BaseModule):
"""ICNet for Real-Time Semantic Segmentation on High-Resolution Images.
This head is the implementation of `ICHead
<https://arxiv.org/abs/1704.08545>`_.
Args:
in_channels (int): The number of input image channels. Default: 3.
out_channels (int): The numbers of output feature channels.
Default: 128.
conv_cfg (dict): Dictionary to construct and config conv layer.
Default: None.
norm_cfg (dict): Dictionary to construct and config norm layer.
Default: dict(type='BN').
act_cfg (dict): Dictionary to construct and config act layer.
Default: dict(type='ReLU').
align_corners (bool): align_corners argument of F.interpolate.
Default: False.
init_cfg (dict or list[dict], optional): Initialization config dict.
Default: None.
"""
def __init__(self,
in_channels=(64, 256, 256),
out_channels=128,
conv_cfg=None,
norm_cfg=dict(type='BN'),
act_cfg=dict(type='ReLU'),
align_corners=False,
init_cfg=None):
super(ICNeck, self).__init__(init_cfg=init_cfg)
assert len(in_channels) == 3, 'Length of input channels \
must be 3!'
self.in_channels = in_channels
self.out_channels = out_channels
self.conv_cfg = conv_cfg
self.norm_cfg = norm_cfg
self.act_cfg = act_cfg
self.align_corners = align_corners
self.cff_24 = CascadeFeatureFusion(
self.in_channels[2],
self.in_channels[1],
self.out_channels,
conv_cfg=self.conv_cfg,
norm_cfg=self.norm_cfg,
act_cfg=self.act_cfg,
align_corners=self.align_corners)
self.cff_12 = CascadeFeatureFusion(
self.out_channels,
self.in_channels[0],
self.out_channels,
conv_cfg=self.conv_cfg,
norm_cfg=self.norm_cfg,
act_cfg=self.act_cfg,
align_corners=self.align_corners)
def forward(self, inputs):
assert len(inputs) == 3, 'Length of input feature \
maps must be 3!'
x_sub1, x_sub2, x_sub4 = inputs
x_cff_24, x_24 = self.cff_24(x_sub4, x_sub2)
x_cff_12, x_12 = self.cff_12(x_cff_24, x_sub1)
# Note: `x_cff_12` is used for decode_head,
# `x_24` and `x_12` are used for auxiliary head.
return x_24, x_12, x_cff_12

View File

@ -18,6 +18,7 @@ Import:
- configs/fp16/fp16.yml
- configs/gcnet/gcnet.yml
- configs/hrnet/hrnet.yml
- configs/icnet/icnet.yml
- configs/isanet/isanet.yml
- configs/mobilenet_v2/mobilenet_v2.yml
- configs/mobilenet_v3/mobilenet_v3.yml

View File

@ -0,0 +1,48 @@
# Copyright (c) OpenMMLab. All rights reserved.
import pytest
import torch
from mmseg.models.backbones import ICNet
def test_icnet_backbone():
with pytest.raises(TypeError):
# Must give backbone dict in config file.
ICNet(
in_channels=3,
layer_channels=(512, 2048),
light_branch_middle_channels=32,
psp_out_channels=512,
out_channels=(64, 256, 256),
backbone_cfg=None)
# Test ICNet Standard Forward
model = ICNet(
backbone_cfg=dict(
type='ResNetV1c',
in_channels=3,
depth=50,
num_stages=4,
out_indices=(0, 1, 2, 3),
dilations=(1, 1, 2, 4),
strides=(1, 2, 1, 1),
norm_cfg=dict(type='BN', requires_grad=True),
norm_eval=False,
style='pytorch',
contract_dilation=True), )
assert hasattr(model.backbone,
'maxpool') and model.backbone.maxpool.ceil_mode is True
model.init_weights()
model.train()
batch_size = 2
imgs = torch.randn(batch_size, 3, 512, 1024)
feat = model(imgs)
assert model.psp_modules[0][0].output_size == 1
assert model.psp_modules[1][0].output_size == 2
assert model.psp_modules[2][0].output_size == 3
assert model.psp_bottleneck.padding == 1
assert model.conv_sub1[0].padding == 1
assert len(feat) == 3
assert feat[0].shape == torch.Size([batch_size, 64, 64, 128])

View File

@ -50,22 +50,22 @@ def test_swin_transformer():
model(temp)
# Test normal inference
temp = torch.randn((1, 3, 512, 512))
temp = torch.randn((1, 3, 256, 256))
model = SwinTransformer()
outs = model(temp)
assert outs[0].shape == (1, 96, 128, 128)
assert outs[1].shape == (1, 192, 64, 64)
assert outs[2].shape == (1, 384, 32, 32)
assert outs[3].shape == (1, 768, 16, 16)
assert outs[0].shape == (1, 96, 64, 64)
assert outs[1].shape == (1, 192, 32, 32)
assert outs[2].shape == (1, 384, 16, 16)
assert outs[3].shape == (1, 768, 8, 8)
# Test abnormal inference size
temp = torch.randn((1, 3, 511, 511))
temp = torch.randn((1, 3, 255, 255))
model = SwinTransformer()
outs = model(temp)
assert outs[0].shape == (1, 96, 128, 128)
assert outs[1].shape == (1, 192, 64, 64)
assert outs[2].shape == (1, 384, 32, 32)
assert outs[3].shape == (1, 768, 16, 16)
assert outs[0].shape == (1, 96, 64, 64)
assert outs[1].shape == (1, 192, 32, 32)
assert outs[2].shape == (1, 384, 16, 16)
assert outs[3].shape == (1, 768, 8, 8)
# Test abnormal inference size
temp = torch.randn((1, 3, 112, 137))
@ -89,7 +89,7 @@ def test_swin_transformer():
assert not p.requires_grad
# Test Swin with checkpoint forward
temp = torch.randn((1, 3, 224, 224))
temp = torch.randn((1, 3, 112, 112))
model = SwinTransformer(with_cp=True)
for m in model.modules():
if isinstance(m, SwinBlock):

View File

@ -345,7 +345,7 @@ def test_unet():
# case is 8.
unet = UNet(
in_channels=3,
base_channels=64,
base_channels=4,
num_stages=4,
strides=(1, 1, 1, 1),
enc_num_convs=(2, 2, 2, 2),
@ -362,7 +362,7 @@ def test_unet():
# case is 16.
unet = UNet(
in_channels=3,
base_channels=64,
base_channels=4,
num_stages=5,
strides=(1, 1, 1, 1, 1),
enc_num_convs=(2, 2, 2, 2, 2),
@ -379,7 +379,7 @@ def test_unet():
# case is 8.
unet = UNet(
in_channels=3,
base_channels=64,
base_channels=4,
num_stages=5,
strides=(1, 1, 1, 1, 1),
enc_num_convs=(2, 2, 2, 2, 2),
@ -396,7 +396,7 @@ def test_unet():
# case is 8.
unet = UNet(
in_channels=3,
base_channels=64,
base_channels=4,
num_stages=5,
strides=(1, 2, 2, 2, 1),
enc_num_convs=(2, 2, 2, 2, 2),
@ -413,7 +413,7 @@ def test_unet():
# case is 32.
unet = UNet(
in_channels=3,
base_channels=64,
base_channels=4,
num_stages=6,
strides=(1, 1, 1, 1, 1, 1),
enc_num_convs=(2, 2, 2, 2, 2, 2),
@ -428,7 +428,7 @@ def test_unet():
# Check if num_stages matchs strides, len(strides)=num_stages
unet = UNet(
in_channels=3,
base_channels=64,
base_channels=4,
num_stages=5,
strides=(1, 1, 1, 1),
enc_num_convs=(2, 2, 2, 2, 2),
@ -443,7 +443,7 @@ def test_unet():
# Check if num_stages matchs strides, len(enc_num_convs)=num_stages
unet = UNet(
in_channels=3,
base_channels=64,
base_channels=4,
num_stages=5,
strides=(1, 1, 1, 1, 1),
enc_num_convs=(2, 2, 2, 2),
@ -458,7 +458,7 @@ def test_unet():
# Check if num_stages matchs strides, len(dec_num_convs)=num_stages-1
unet = UNet(
in_channels=3,
base_channels=64,
base_channels=4,
num_stages=5,
strides=(1, 1, 1, 1, 1),
enc_num_convs=(2, 2, 2, 2, 2),
@ -473,7 +473,7 @@ def test_unet():
# Check if num_stages matchs strides, len(downsamples)=num_stages-1
unet = UNet(
in_channels=3,
base_channels=64,
base_channels=4,
num_stages=5,
strides=(1, 1, 1, 1, 1),
enc_num_convs=(2, 2, 2, 2, 2),
@ -488,7 +488,7 @@ def test_unet():
# Check if num_stages matchs strides, len(enc_dilations)=num_stages
unet = UNet(
in_channels=3,
base_channels=64,
base_channels=4,
num_stages=5,
strides=(1, 1, 1, 1, 1),
enc_num_convs=(2, 2, 2, 2, 2),
@ -503,7 +503,7 @@ def test_unet():
# Check if num_stages matchs strides, len(dec_dilations)=num_stages-1
unet = UNet(
in_channels=3,
base_channels=64,
base_channels=4,
num_stages=5,
strides=(1, 1, 1, 1, 1),
enc_num_convs=(2, 2, 2, 2, 2),
@ -517,7 +517,7 @@ def test_unet():
# test UNet norm_eval=True
unet = UNet(
in_channels=3,
base_channels=64,
base_channels=4,
num_stages=5,
strides=(1, 1, 1, 1, 1),
enc_num_convs=(2, 2, 2, 2, 2),
@ -532,7 +532,7 @@ def test_unet():
# test UNet norm_eval=False
unet = UNet(
in_channels=3,
base_channels=64,
base_channels=4,
num_stages=5,
strides=(1, 1, 1, 1, 1),
enc_num_convs=(2, 2, 2, 2, 2),
@ -547,7 +547,7 @@ def test_unet():
# test UNet forward and outputs. The whole downsample rate is 16.
unet = UNet(
in_channels=3,
base_channels=64,
base_channels=4,
num_stages=5,
strides=(1, 1, 1, 1, 1),
enc_num_convs=(2, 2, 2, 2, 2),
@ -558,16 +558,16 @@ def test_unet():
x = torch.randn(2, 3, 128, 128)
x_outs = unet(x)
assert x_outs[0].shape == torch.Size([2, 1024, 8, 8])
assert x_outs[1].shape == torch.Size([2, 512, 16, 16])
assert x_outs[2].shape == torch.Size([2, 256, 32, 32])
assert x_outs[3].shape == torch.Size([2, 128, 64, 64])
assert x_outs[4].shape == torch.Size([2, 64, 128, 128])
assert x_outs[0].shape == torch.Size([2, 64, 8, 8])
assert x_outs[1].shape == torch.Size([2, 32, 16, 16])
assert x_outs[2].shape == torch.Size([2, 16, 32, 32])
assert x_outs[3].shape == torch.Size([2, 8, 64, 64])
assert x_outs[4].shape == torch.Size([2, 4, 128, 128])
# test UNet forward and outputs. The whole downsample rate is 8.
unet = UNet(
in_channels=3,
base_channels=64,
base_channels=4,
num_stages=5,
strides=(1, 1, 1, 1, 1),
enc_num_convs=(2, 2, 2, 2, 2),
@ -578,16 +578,16 @@ def test_unet():
x = torch.randn(2, 3, 128, 128)
x_outs = unet(x)
assert x_outs[0].shape == torch.Size([2, 1024, 16, 16])
assert x_outs[1].shape == torch.Size([2, 512, 16, 16])
assert x_outs[2].shape == torch.Size([2, 256, 32, 32])
assert x_outs[3].shape == torch.Size([2, 128, 64, 64])
assert x_outs[4].shape == torch.Size([2, 64, 128, 128])
assert x_outs[0].shape == torch.Size([2, 64, 16, 16])
assert x_outs[1].shape == torch.Size([2, 32, 16, 16])
assert x_outs[2].shape == torch.Size([2, 16, 32, 32])
assert x_outs[3].shape == torch.Size([2, 8, 64, 64])
assert x_outs[4].shape == torch.Size([2, 4, 128, 128])
# test UNet forward and outputs. The whole downsample rate is 8.
unet = UNet(
in_channels=3,
base_channels=64,
base_channels=4,
num_stages=5,
strides=(1, 2, 2, 2, 1),
enc_num_convs=(2, 2, 2, 2, 2),
@ -598,16 +598,16 @@ def test_unet():
x = torch.randn(2, 3, 128, 128)
x_outs = unet(x)
assert x_outs[0].shape == torch.Size([2, 1024, 16, 16])
assert x_outs[1].shape == torch.Size([2, 512, 16, 16])
assert x_outs[2].shape == torch.Size([2, 256, 32, 32])
assert x_outs[3].shape == torch.Size([2, 128, 64, 64])
assert x_outs[4].shape == torch.Size([2, 64, 128, 128])
assert x_outs[0].shape == torch.Size([2, 64, 16, 16])
assert x_outs[1].shape == torch.Size([2, 32, 16, 16])
assert x_outs[2].shape == torch.Size([2, 16, 32, 32])
assert x_outs[3].shape == torch.Size([2, 8, 64, 64])
assert x_outs[4].shape == torch.Size([2, 4, 128, 128])
# test UNet forward and outputs. The whole downsample rate is 4.
unet = UNet(
in_channels=3,
base_channels=64,
base_channels=4,
num_stages=5,
strides=(1, 1, 1, 1, 1),
enc_num_convs=(2, 2, 2, 2, 2),
@ -618,16 +618,16 @@ def test_unet():
x = torch.randn(2, 3, 128, 128)
x_outs = unet(x)
assert x_outs[0].shape == torch.Size([2, 1024, 32, 32])
assert x_outs[1].shape == torch.Size([2, 512, 32, 32])
assert x_outs[2].shape == torch.Size([2, 256, 32, 32])
assert x_outs[3].shape == torch.Size([2, 128, 64, 64])
assert x_outs[4].shape == torch.Size([2, 64, 128, 128])
assert x_outs[0].shape == torch.Size([2, 64, 32, 32])
assert x_outs[1].shape == torch.Size([2, 32, 32, 32])
assert x_outs[2].shape == torch.Size([2, 16, 32, 32])
assert x_outs[3].shape == torch.Size([2, 8, 64, 64])
assert x_outs[4].shape == torch.Size([2, 4, 128, 128])
# test UNet forward and outputs. The whole downsample rate is 4.
unet = UNet(
in_channels=3,
base_channels=64,
base_channels=4,
num_stages=5,
strides=(1, 2, 2, 1, 1),
enc_num_convs=(2, 2, 2, 2, 2),
@ -638,16 +638,16 @@ def test_unet():
x = torch.randn(2, 3, 128, 128)
x_outs = unet(x)
assert x_outs[0].shape == torch.Size([2, 1024, 32, 32])
assert x_outs[1].shape == torch.Size([2, 512, 32, 32])
assert x_outs[2].shape == torch.Size([2, 256, 32, 32])
assert x_outs[3].shape == torch.Size([2, 128, 64, 64])
assert x_outs[4].shape == torch.Size([2, 64, 128, 128])
assert x_outs[0].shape == torch.Size([2, 64, 32, 32])
assert x_outs[1].shape == torch.Size([2, 32, 32, 32])
assert x_outs[2].shape == torch.Size([2, 16, 32, 32])
assert x_outs[3].shape == torch.Size([2, 8, 64, 64])
assert x_outs[4].shape == torch.Size([2, 4, 128, 128])
# test UNet forward and outputs. The whole downsample rate is 8.
unet = UNet(
in_channels=3,
base_channels=64,
base_channels=4,
num_stages=5,
strides=(1, 1, 1, 1, 1),
enc_num_convs=(2, 2, 2, 2, 2),
@ -658,16 +658,16 @@ def test_unet():
x = torch.randn(2, 3, 128, 128)
x_outs = unet(x)
assert x_outs[0].shape == torch.Size([2, 1024, 16, 16])
assert x_outs[1].shape == torch.Size([2, 512, 16, 16])
assert x_outs[2].shape == torch.Size([2, 256, 32, 32])
assert x_outs[3].shape == torch.Size([2, 128, 64, 64])
assert x_outs[4].shape == torch.Size([2, 64, 128, 128])
assert x_outs[0].shape == torch.Size([2, 64, 16, 16])
assert x_outs[1].shape == torch.Size([2, 32, 16, 16])
assert x_outs[2].shape == torch.Size([2, 16, 32, 32])
assert x_outs[3].shape == torch.Size([2, 8, 64, 64])
assert x_outs[4].shape == torch.Size([2, 4, 128, 128])
# test UNet forward and outputs. The whole downsample rate is 4.
unet = UNet(
in_channels=3,
base_channels=64,
base_channels=4,
num_stages=5,
strides=(1, 1, 1, 1, 1),
enc_num_convs=(2, 2, 2, 2, 2),
@ -678,16 +678,16 @@ def test_unet():
x = torch.randn(2, 3, 128, 128)
x_outs = unet(x)
assert x_outs[0].shape == torch.Size([2, 1024, 32, 32])
assert x_outs[1].shape == torch.Size([2, 512, 32, 32])
assert x_outs[2].shape == torch.Size([2, 256, 32, 32])
assert x_outs[3].shape == torch.Size([2, 128, 64, 64])
assert x_outs[4].shape == torch.Size([2, 64, 128, 128])
assert x_outs[0].shape == torch.Size([2, 64, 32, 32])
assert x_outs[1].shape == torch.Size([2, 32, 32, 32])
assert x_outs[2].shape == torch.Size([2, 16, 32, 32])
assert x_outs[3].shape == torch.Size([2, 8, 64, 64])
assert x_outs[4].shape == torch.Size([2, 4, 128, 128])
# test UNet forward and outputs. The whole downsample rate is 2.
unet = UNet(
in_channels=3,
base_channels=64,
base_channels=4,
num_stages=5,
strides=(1, 1, 1, 1, 1),
enc_num_convs=(2, 2, 2, 2, 2),
@ -698,16 +698,16 @@ def test_unet():
x = torch.randn(2, 3, 128, 128)
x_outs = unet(x)
assert x_outs[0].shape == torch.Size([2, 1024, 64, 64])
assert x_outs[1].shape == torch.Size([2, 512, 64, 64])
assert x_outs[2].shape == torch.Size([2, 256, 64, 64])
assert x_outs[3].shape == torch.Size([2, 128, 64, 64])
assert x_outs[4].shape == torch.Size([2, 64, 128, 128])
assert x_outs[0].shape == torch.Size([2, 64, 64, 64])
assert x_outs[1].shape == torch.Size([2, 32, 64, 64])
assert x_outs[2].shape == torch.Size([2, 16, 64, 64])
assert x_outs[3].shape == torch.Size([2, 8, 64, 64])
assert x_outs[4].shape == torch.Size([2, 4, 128, 128])
# test UNet forward and outputs. The whole downsample rate is 1.
unet = UNet(
in_channels=3,
base_channels=64,
base_channels=4,
num_stages=5,
strides=(1, 1, 1, 1, 1),
enc_num_convs=(2, 2, 2, 2, 2),
@ -718,16 +718,16 @@ def test_unet():
x = torch.randn(2, 3, 128, 128)
x_outs = unet(x)
assert x_outs[0].shape == torch.Size([2, 1024, 128, 128])
assert x_outs[1].shape == torch.Size([2, 512, 128, 128])
assert x_outs[2].shape == torch.Size([2, 256, 128, 128])
assert x_outs[3].shape == torch.Size([2, 128, 128, 128])
assert x_outs[4].shape == torch.Size([2, 64, 128, 128])
assert x_outs[0].shape == torch.Size([2, 64, 128, 128])
assert x_outs[1].shape == torch.Size([2, 32, 128, 128])
assert x_outs[2].shape == torch.Size([2, 16, 128, 128])
assert x_outs[3].shape == torch.Size([2, 8, 128, 128])
assert x_outs[4].shape == torch.Size([2, 4, 128, 128])
# test UNet forward and outputs. The whole downsample rate is 16.
unet = UNet(
in_channels=3,
base_channels=64,
base_channels=4,
num_stages=5,
strides=(1, 2, 2, 1, 1),
enc_num_convs=(2, 2, 2, 2, 2),
@ -737,16 +737,16 @@ def test_unet():
dec_dilations=(1, 1, 1, 1))
x = torch.randn(2, 3, 128, 128)
x_outs = unet(x)
assert x_outs[0].shape == torch.Size([2, 1024, 8, 8])
assert x_outs[1].shape == torch.Size([2, 512, 16, 16])
assert x_outs[2].shape == torch.Size([2, 256, 32, 32])
assert x_outs[3].shape == torch.Size([2, 128, 64, 64])
assert x_outs[4].shape == torch.Size([2, 64, 128, 128])
assert x_outs[0].shape == torch.Size([2, 64, 8, 8])
assert x_outs[1].shape == torch.Size([2, 32, 16, 16])
assert x_outs[2].shape == torch.Size([2, 16, 32, 32])
assert x_outs[3].shape == torch.Size([2, 8, 64, 64])
assert x_outs[4].shape == torch.Size([2, 4, 128, 128])
# test UNet forward and outputs. The whole downsample rate is 8.
unet = UNet(
in_channels=3,
base_channels=64,
base_channels=4,
num_stages=5,
strides=(1, 2, 2, 1, 1),
enc_num_convs=(2, 2, 2, 2, 2),
@ -756,16 +756,16 @@ def test_unet():
dec_dilations=(1, 1, 1, 1))
x = torch.randn(2, 3, 128, 128)
x_outs = unet(x)
assert x_outs[0].shape == torch.Size([2, 1024, 16, 16])
assert x_outs[1].shape == torch.Size([2, 512, 16, 16])
assert x_outs[2].shape == torch.Size([2, 256, 32, 32])
assert x_outs[3].shape == torch.Size([2, 128, 64, 64])
assert x_outs[4].shape == torch.Size([2, 64, 128, 128])
assert x_outs[0].shape == torch.Size([2, 64, 16, 16])
assert x_outs[1].shape == torch.Size([2, 32, 16, 16])
assert x_outs[2].shape == torch.Size([2, 16, 32, 32])
assert x_outs[3].shape == torch.Size([2, 8, 64, 64])
assert x_outs[4].shape == torch.Size([2, 4, 128, 128])
# test UNet forward and outputs. The whole downsample rate is 8.
unet = UNet(
in_channels=3,
base_channels=64,
base_channels=4,
num_stages=5,
strides=(1, 2, 2, 2, 1),
enc_num_convs=(2, 2, 2, 2, 2),
@ -775,16 +775,16 @@ def test_unet():
dec_dilations=(1, 1, 1, 1))
x = torch.randn(2, 3, 128, 128)
x_outs = unet(x)
assert x_outs[0].shape == torch.Size([2, 1024, 16, 16])
assert x_outs[1].shape == torch.Size([2, 512, 16, 16])
assert x_outs[2].shape == torch.Size([2, 256, 32, 32])
assert x_outs[3].shape == torch.Size([2, 128, 64, 64])
assert x_outs[4].shape == torch.Size([2, 64, 128, 128])
assert x_outs[0].shape == torch.Size([2, 64, 16, 16])
assert x_outs[1].shape == torch.Size([2, 32, 16, 16])
assert x_outs[2].shape == torch.Size([2, 16, 32, 32])
assert x_outs[3].shape == torch.Size([2, 8, 64, 64])
assert x_outs[4].shape == torch.Size([2, 4, 128, 128])
# test UNet forward and outputs. The whole downsample rate is 4.
unet = UNet(
in_channels=3,
base_channels=64,
base_channels=4,
num_stages=5,
strides=(1, 2, 2, 1, 1),
enc_num_convs=(2, 2, 2, 2, 2),
@ -794,16 +794,16 @@ def test_unet():
dec_dilations=(1, 1, 1, 1))
x = torch.randn(2, 3, 128, 128)
x_outs = unet(x)
assert x_outs[0].shape == torch.Size([2, 1024, 32, 32])
assert x_outs[1].shape == torch.Size([2, 512, 32, 32])
assert x_outs[2].shape == torch.Size([2, 256, 32, 32])
assert x_outs[3].shape == torch.Size([2, 128, 64, 64])
assert x_outs[4].shape == torch.Size([2, 64, 128, 128])
assert x_outs[0].shape == torch.Size([2, 64, 32, 32])
assert x_outs[1].shape == torch.Size([2, 32, 32, 32])
assert x_outs[2].shape == torch.Size([2, 16, 32, 32])
assert x_outs[3].shape == torch.Size([2, 8, 64, 64])
assert x_outs[4].shape == torch.Size([2, 4, 128, 128])
# test UNet init_weights method.
unet = UNet(
in_channels=3,
base_channels=64,
base_channels=4,
num_stages=5,
strides=(1, 2, 2, 1, 1),
enc_num_convs=(2, 2, 2, 2, 2),
@ -815,8 +815,8 @@ def test_unet():
unet.init_weights()
x = torch.randn(2, 3, 128, 128)
x_outs = unet(x)
assert x_outs[0].shape == torch.Size([2, 1024, 32, 32])
assert x_outs[1].shape == torch.Size([2, 512, 32, 32])
assert x_outs[2].shape == torch.Size([2, 256, 32, 32])
assert x_outs[3].shape == torch.Size([2, 128, 64, 64])
assert x_outs[4].shape == torch.Size([2, 64, 128, 128])
assert x_outs[0].shape == torch.Size([2, 64, 32, 32])
assert x_outs[1].shape == torch.Size([2, 32, 32, 32])
assert x_outs[2].shape == torch.Size([2, 16, 32, 32])
assert x_outs[3].shape == torch.Size([2, 8, 64, 64])
assert x_outs[4].shape == torch.Size([2, 4, 128, 128])

View File

@ -0,0 +1,53 @@
# Copyright (c) OpenMMLab. All rights reserved.
import pytest
import torch
from mmseg.models.necks import ICNeck
from mmseg.models.necks.ic_neck import CascadeFeatureFusion
from ..test_heads.utils import _conv_has_norm, to_cuda
def test_ic_neck():
# test with norm_cfg
neck = ICNeck(
in_channels=(64, 256, 256),
out_channels=128,
norm_cfg=dict(type='SyncBN'),
align_corners=False)
assert _conv_has_norm(neck, sync_bn=True)
inputs = [
torch.randn(1, 64, 128, 256),
torch.randn(1, 256, 65, 129),
torch.randn(1, 256, 32, 64)
]
neck = ICNeck(
in_channels=(64, 256, 256),
out_channels=128,
norm_cfg=dict(type='BN', requires_grad=True),
align_corners=False)
if torch.cuda.is_available():
neck, inputs = to_cuda(neck, inputs)
outputs = neck(inputs)
assert outputs[0].shape == (1, 128, 65, 129)
assert outputs[1].shape == (1, 128, 128, 256)
assert outputs[1].shape == (1, 128, 128, 256)
def test_ic_neck_cascade_feature_fusion():
cff = CascadeFeatureFusion(256, 256, 128)
assert cff.conv_low.in_channels == 256
assert cff.conv_low.out_channels == 128
assert cff.conv_high.in_channels == 256
assert cff.conv_high.out_channels == 128
def test_ic_neck_input_channels():
with pytest.raises(AssertionError):
# ICNet Neck input channel constraints.
ICNeck(
in_channels=(64, 256, 256, 256),
out_channels=128,
norm_cfg=dict(type='BN', requires_grad=True),
align_corners=False)

View File