[Feature] Support ICNet (#884)
* add icnet backbone * add icnet head * add icnet configs * nclass -> num_classes * Support ICNet * ICNet * ICNet * Add ICNeck * Add ICNeck * Add ICNeck * Add ICNeck * Adding unittest * Uploading models & logs * Uploading models & logs * add comment * smaller test_swin.py * try to delete test_swin.py * delete test_unet.py * delete test_unet.py * temp * smaller test_unet.py Co-authored-by: Junjun2016 <hejunjun@sjtu.edu.cn>pull/1801/head
parent
84edf6c190
commit
7db1cbb181
|
@ -79,6 +79,7 @@ Supported methods:
|
|||
- [x] [PSANet (ECCV'2018)](configs/psanet)
|
||||
- [x] [DeepLabV3+ (CVPR'2018)](configs/deeplabv3plus)
|
||||
- [x] [UPerNet (ECCV'2018)](configs/upernet)
|
||||
- [x] [ICNet (ECCV'2018)](configs/icnet)
|
||||
- [x] [NonLocal Net (CVPR'2018)](configs/nonlocal_net)
|
||||
- [x] [EncNet (CVPR'2018)](configs/encnet)
|
||||
- [x] [Semantic FPN (CVPR'2019)](configs/sem_fpn)
|
||||
|
|
|
@ -78,6 +78,7 @@ MMSegmentation 是一个基于 PyTorch 的语义分割开源工具箱。它是 O
|
|||
- [x] [PSANet (ECCV'2018)](configs/psanet)
|
||||
- [x] [DeepLabV3+ (CVPR'2018)](configs/deeplabv3plus)
|
||||
- [x] [UPerNet (ECCV'2018)](configs/upernet)
|
||||
- [x] [ICNet (ECCV'2018)](configs/icnet)
|
||||
- [x] [NonLocal Net (CVPR'2018)](configs/nonlocal_net)
|
||||
- [x] [EncNet (CVPR'2018)](configs/encnet)
|
||||
- [x] [Semantic FPN (CVPR'2019)](configs/sem_fpn)
|
||||
|
|
|
@ -0,0 +1,35 @@
|
|||
_base_ = './cityscapes.py'
|
||||
img_norm_cfg = dict(
|
||||
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
|
||||
crop_size = (832, 832)
|
||||
train_pipeline = [
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(type='LoadAnnotations'),
|
||||
dict(type='Resize', img_scale=(2048, 1024), ratio_range=(0.5, 2.0)),
|
||||
dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
|
||||
dict(type='RandomFlip', prob=0.5),
|
||||
dict(type='PhotoMetricDistortion'),
|
||||
dict(type='Normalize', **img_norm_cfg),
|
||||
dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
|
||||
dict(type='DefaultFormatBundle'),
|
||||
dict(type='Collect', keys=['img', 'gt_semantic_seg']),
|
||||
]
|
||||
test_pipeline = [
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(
|
||||
type='MultiScaleFlipAug',
|
||||
img_scale=(2048, 1024),
|
||||
# img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
|
||||
flip=False,
|
||||
transforms=[
|
||||
dict(type='Resize', keep_ratio=True),
|
||||
dict(type='RandomFlip'),
|
||||
dict(type='Normalize', **img_norm_cfg),
|
||||
dict(type='ImageToTensor', keys=['img']),
|
||||
dict(type='Collect', keys=['img']),
|
||||
])
|
||||
]
|
||||
data = dict(
|
||||
train=dict(pipeline=train_pipeline),
|
||||
val=dict(pipeline=test_pipeline),
|
||||
test=dict(pipeline=test_pipeline))
|
|
@ -0,0 +1,74 @@
|
|||
# model settings
|
||||
norm_cfg = dict(type='SyncBN', requires_grad=True)
|
||||
model = dict(
|
||||
type='EncoderDecoder',
|
||||
backbone=dict(
|
||||
type='ICNet',
|
||||
backbone_cfg=dict(
|
||||
type='ResNetV1c',
|
||||
in_channels=3,
|
||||
depth=50,
|
||||
num_stages=4,
|
||||
out_indices=(0, 1, 2, 3),
|
||||
dilations=(1, 1, 2, 4),
|
||||
strides=(1, 2, 1, 1),
|
||||
norm_cfg=norm_cfg,
|
||||
norm_eval=False,
|
||||
style='pytorch',
|
||||
contract_dilation=True),
|
||||
in_channels=3,
|
||||
layer_channels=(512, 2048),
|
||||
light_branch_middle_channels=32,
|
||||
psp_out_channels=512,
|
||||
out_channels=(64, 256, 256),
|
||||
norm_cfg=norm_cfg,
|
||||
align_corners=False,
|
||||
),
|
||||
neck=dict(
|
||||
type='ICNeck',
|
||||
in_channels=(64, 256, 256),
|
||||
out_channels=128,
|
||||
norm_cfg=norm_cfg,
|
||||
align_corners=False),
|
||||
decode_head=dict(
|
||||
type='FCNHead',
|
||||
in_channels=128,
|
||||
channels=128,
|
||||
num_convs=1,
|
||||
in_index=2,
|
||||
dropout_ratio=0,
|
||||
num_classes=19,
|
||||
norm_cfg=norm_cfg,
|
||||
concat_input=False,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
||||
auxiliary_head=[
|
||||
dict(
|
||||
type='FCNHead',
|
||||
in_channels=128,
|
||||
channels=128,
|
||||
num_convs=1,
|
||||
num_classes=19,
|
||||
in_index=0,
|
||||
norm_cfg=norm_cfg,
|
||||
concat_input=False,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
|
||||
dict(
|
||||
type='FCNHead',
|
||||
in_channels=128,
|
||||
channels=128,
|
||||
num_convs=1,
|
||||
num_classes=19,
|
||||
in_index=1,
|
||||
norm_cfg=norm_cfg,
|
||||
concat_input=False,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
|
||||
],
|
||||
# model training and testing settings
|
||||
train_cfg=dict(),
|
||||
test_cfg=dict(mode='whole'))
|
|
@ -32,7 +32,7 @@
|
|||
| BiSeNetV1 (No Pretrain) | R-18-D32 | 1024x1024 | 160000 | 5.69 | 31.77 | 74.44 | 77.05 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/bisenetv1/bisenetv1_r18-d32_4x4_1024x1024_160k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r18-d32_4x4_1024x1024_160k_cityscapes/bisenetv1_r18-d32_4x4_1024x1024_160k_cityscapes_20210922_172239-c55e78e2.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r18-d32_4x4_1024x1024_160k_cityscapes/bisenetv1_r18-d32_4x4_1024x1024_160k_cityscapes_20210922_172239.log.json) |
|
||||
| BiSeNetV1| R-18-D32 | 1024x1024 | 160000 | 5.69 | 31.77 | 74.37 | 76.91 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/bisenetv1/bisenetv1_r18-d32_in1k-pre_4x4_1024x1024_160k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r18-d32_in1k-pre_4x4_1024x1024_160k_cityscapes/bisenetv1_r18-d32_in1k-pre_4x4_1024x1024_160k_cityscapes_20210905_220251-8ba80eff.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r18-d32_in1k-pre_4x4_1024x1024_160k_cityscapes/bisenetv1_r18-d32_in1k-pre_4x4_1024x1024_160k_cityscapes_20210905_220251.log.json) |
|
||||
| BiSeNetV1 (4x8) | R-18-D32 | 1024x1024 | 160000 | 11.17 | 31.77 | 75.16 | 77.24 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/bisenetv1/bisenetv1_r18-d32_in1k-pre_4x8_1024x1024_160k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r18-d32_in1k-pre_4x8_1024x1024_160k_cityscapes/bisenetv1_r18-d32_in1k-pre_4x8_1024x1024_160k_cityscapes_20210905_220322-bb8db75f.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r18-d32_in1k-pre_4x8_1024x1024_160k_cityscapes/bisenetv1_r18-d32_in1k-pre_4x8_1024x1024_160k_cityscapes_20210905_220322.log.json) |
|
||||
| BiSeNetV1 (No Pretrain) | R-50-D32 | 1024x1024 | 160000 | 3.3 | 7.71 | 76.92 | 78.87 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/bisenetv1/bisenetv1_r50-d32_4x4_1024x1024_160k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r50-d32_4x4_1024x1024_160k_cityscapes/bisenetv1_r50-d32_4x4_1024x1024_160k_cityscapes_20210923_222639-7b28a2a6.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r50-d32_4x4_1024x1024_160k_cityscapes/bisenetv1_r50-d32_4x4_1024x1024_160k_cityscapes_20210923_222639.log.json) |
|
||||
| BiSeNetV1 (No Pretrain) | R-50-D32 | 1024x1024 | 160000 | 15.39 | 7.71 | 76.92 | 78.87 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/bisenetv1/bisenetv1_r50-d32_4x4_1024x1024_160k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r50-d32_4x4_1024x1024_160k_cityscapes/bisenetv1_r50-d32_4x4_1024x1024_160k_cityscapes_20210923_222639-7b28a2a6.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r50-d32_4x4_1024x1024_160k_cityscapes/bisenetv1_r50-d32_4x4_1024x1024_160k_cityscapes_20210923_222639.log.json) |
|
||||
| BiSeNetV1 | R-50-D32 | 1024x1024 | 160000 | 15.39 | 7.71 | 77.68 | 79.57 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/bisenetv1/bisenetv1_r50-d32_in1k-pre_4x4_1024x1024_160k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r50-d32_in1k-pre_4x4_1024x1024_160k_cityscapes/bisenetv1_r50-d32_in1k-pre_4x4_1024x1024_160k_cityscapes_20210917_234628-8b304447.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r50-d32_in1k-pre_4x4_1024x1024_160k_cityscapes/bisenetv1_r50-d32_in1k-pre_4x4_1024x1024_160k_cityscapes_20210917_234628.log.json) |
|
||||
|
||||
Note:
|
||||
|
|
|
@ -92,7 +92,7 @@ Models:
|
|||
batch size: 1
|
||||
mode: FP32
|
||||
resolution: (1024,1024)
|
||||
memory (GB): 3.3
|
||||
memory (GB): 15.39
|
||||
Results:
|
||||
- Task: Semantic Segmentation
|
||||
Dataset: Cityscapes
|
||||
|
|
|
@ -0,0 +1,45 @@
|
|||
# ICNet for Real-time Semantic Segmentation on High-resolution Images
|
||||
|
||||
## Introduction
|
||||
|
||||
<!-- [ALGORITHM] -->
|
||||
|
||||
<a href="https://github.com/hszhao/ICNet">Official Repo</a>
|
||||
|
||||
<a href="https://github.com/open-mmlab/mmsegmentation/blob/v0.18.0/mmseg/models/necks/ic_neck.py#L77">Code Snippet</a>
|
||||
|
||||
<details>
|
||||
<summary align="right"><a href="https://arxiv.org/abs/1704.08545">ICNet (ECCV'2018)</a></summary>
|
||||
|
||||
```latext
|
||||
@inproceedings{zhao2018icnet,
|
||||
title={Icnet for real-time semantic segmentation on high-resolution images},
|
||||
author={Zhao, Hengshuang and Qi, Xiaojuan and Shen, Xiaoyong and Shi, Jianping and Jia, Jiaya},
|
||||
booktitle={Proceedings of the European conference on computer vision (ECCV)},
|
||||
pages={405--420},
|
||||
year={2018}
|
||||
}
|
||||
```
|
||||
|
||||
</details>
|
||||
|
||||
## Results and models
|
||||
|
||||
### Cityscapes
|
||||
|
||||
| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download |
|
||||
| ------ | ---------- | --------- | ------: | -------- | -------------- | ----: | ------------: | -------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| ICNet | R-18-D8 | 832x832 | 80000 | 1.70 | 27.12 | 68.14 | 70.16 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/icnet/icnet_r18-d8_832x832_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r18-d8_832x832_80k_cityscapes/icnet_r18-d8_832x832_80k_cityscapes_20210925_225521-2e36638d.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r18-d8_832x832_80k_cityscapes/icnet_r18-d8_832x832_80k_cityscapes_20210925_225521.log.json) |
|
||||
| ICNet | R-18-D8 | 832x832 | 160000 | - | - | 71.64 | 74.18 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/icnet/icnet_r18-d8_832x832_160k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r18-d8_832x832_160k_cityscapes/icnet_r18-d8_832x832_160k_cityscapes_20210925_230153-2c6eb6e0.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r18-d8_832x832_160k_cityscapes/icnet_r18-d8_832x832_160k_cityscapes_20210925_230153.log.json) |
|
||||
| ICNet (in1k-pre) | R-18-D8 | 832x832 | 80000 | - | - | 72.51 | 74.78 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/icnet/icnet_r18-d8_in1k-pre_832x832_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r18-d8_in1k-pre_832x832_80k_cityscapes/icnet_r18-d8_in1k-pre_832x832_80k_cityscapes_20210925_230354-1cbe3022.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r18-d8_in1k-pre_832x832_80k_cityscapes/icnet_r18-d8_in1k-pre_832x832_80k_cityscapes_20210925_230354.log.json) |
|
||||
| ICNet (in1k-pre) | R-18-D8 | 832x832 | 160000 | - | - | 74.43 | 76.72 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/icnet/icnet_r18-d8_in1k-pre_832x832_160k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r18-d8_in1k-pre_832x832_160k_cityscapes/icnet_r18-d8_in1k-pre_832x832_160k_cityscapes_20210926_052702-619c8ae1.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r18-d8_in1k-pre_832x832_160k_cityscapes/icnet_r18-d8_in1k-pre_832x832_160k_cityscapes_20210926_052702.log.json) |
|
||||
| ICNet | R-50-D8 | 832x832 | 80000 | 2.53 | 20.08 | 68.91 | 69.72 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/icnet/icnet_r50-d8_832x832_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r50-d8_832x832_80k_cityscapes/icnet_r50-d8_832x832_80k_cityscapes_20210926_044625-c6407341.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r50-d8_832x832_80k_cityscapes/icnet_r50-d8_832x832_80k_cityscapes_20210926_044625.log.json) |
|
||||
| ICNet | R-50-D8 | 832x832 | 160000 | - | - | 73.82 | 75.67 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/icnet/icnet_r50-d8_832x832_160k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r50-d8_832x832_160k_cityscapes/icnet_r50-d8_832x832_160k_cityscapes_20210925_232612-a95f0d4e.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r50-d8_832x832_160k_cityscapes/icnet_r50-d8_832x832_160k_cityscapes_20210925_232612.log.json) |
|
||||
| ICNet (in1k-pre) | R-50-D8 | 832x832 | 80000 | - | - | 74.58 | 76.41 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/icnet/icnet_r50-d8_in1k-pre_832x832_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r50-d8_in1k-pre_832x832_80k_cityscapes/icnet_r50-d8_in1k-pre_832x832_80k_cityscapes_20210926_032943-1743dc7b.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r50-d8_in1k-pre_832x832_80k_cityscapes/icnet_r50-d8_in1k-pre_832x832_80k_cityscapes_20210926_032943.log.json) |
|
||||
| ICNet (in1k-pre) | R-50-D8 | 832x832 | 160000 | - | - | 76.29 | 78.09 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/icnet/icnet_r50-d8_in1k-pre_832x832_160k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r50-d8_in1k-pre_832x832_160k_cityscapes/icnet_r50-d8_in1k-pre_832x832_160k_cityscapes_20210926_042715-ce310aea.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r50-d8_in1k-pre_832x832_160k_cityscapes/icnet_r50-d8_in1k-pre_832x832_160k_cityscapes_20210926_042715.log.json) |
|
||||
| ICNet | R-101-D8 | 832x832 | 80000 | 3.08 | 16.95 | 70.28 | 71.95 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/icnet/icnet_r101-d8_832x832_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r101-d8_832x832_80k_cityscapes/icnet_r101-d8_832x832_80k_cityscapes_20210926_072447-b52f936e.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r101-d8_832x832_80k_cityscapes/icnet_r101-d8_832x832_80k_cityscapes_20210926_072447.log.json) |
|
||||
| ICNet | R-101-D8 | 832x832 | 160000 | - | - | 73.80 | 76.10 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/icnet/icnet_r101-d8_832x832_160k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r101-d8_832x832_160k_cityscapes/icnet_r101-d8_832x832_160k_cityscapes_20210926_092350-3a1ebf1a.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r101-d8_832x832_160k_cityscapes/icnet_r101-d8_832x832_160k_cityscapes_20210926_092350.log.json) |
|
||||
| ICNet (in1k-pre) | R-101-D8 | 832x832 | 80000 | - | - | 75.57 | 77.86 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/icnet/icnet_r101-d8_in1k-pre_832x832_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r101-d8_in1k-pre_832x832_80k_cityscapes/icnet_r101-d8_in1k-pre_832x832_80k_cityscapes_20210926_020414-7ceb12c5.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r101-d8_in1k-pre_832x832_80k_cityscapes/icnet_r101-d8_in1k-pre_832x832_80k_cityscapes_20210926_020414.log.json) |
|
||||
| ICNet (in1k-pre) | R-101-D8 | 832x832 | 160000 | - | - | 76.15 | 77.98 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/icnet/icnet_r101-d8_in1k-pre_832x832_160k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r101-d8_in1k-pre_832x832_160k_cityscapes/icnet_r101-d8_in1k-pre_832x832_160k_cityscapes_20210925_232612-9484ae8a.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r101-d8_in1k-pre_832x832_160k_cityscapes/icnet_r101-d8_in1k-pre_832x832_160k_cityscapes_20210925_232612.log.json) |
|
||||
|
||||
Note: `in1k-pre` means pretrained model is used.
|
|
@ -0,0 +1,207 @@
|
|||
Collections:
|
||||
- Name: icnet
|
||||
Metadata:
|
||||
Training Data:
|
||||
- Cityscapes
|
||||
Paper:
|
||||
URL: https://arxiv.org/abs/1704.08545
|
||||
Title: ICNet for Real-time Semantic Segmentation on High-resolution Images
|
||||
README: configs/icnet/README.md
|
||||
Code:
|
||||
URL: https://github.com/open-mmlab/mmsegmentation/blob/v0.18.0/mmseg/models/necks/ic_neck.py#L77
|
||||
Version: v0.18.0
|
||||
Converted From:
|
||||
Code: https://github.com/hszhao/ICNet
|
||||
Models:
|
||||
- Name: icnet_r18-d8_832x832_80k_cityscapes
|
||||
In Collection: icnet
|
||||
Metadata:
|
||||
backbone: R-18-D8
|
||||
crop size: (832,832)
|
||||
lr schd: 80000
|
||||
inference time (ms/im):
|
||||
- value: 36.87
|
||||
hardware: V100
|
||||
backend: PyTorch
|
||||
batch size: 1
|
||||
mode: FP32
|
||||
resolution: (832,832)
|
||||
memory (GB): 1.7
|
||||
Results:
|
||||
- Task: Semantic Segmentation
|
||||
Dataset: Cityscapes
|
||||
Metrics:
|
||||
mIoU: 68.14
|
||||
mIoU(ms+flip): 70.16
|
||||
Config: configs/icnet/icnet_r18-d8_832x832_80k_cityscapes.py
|
||||
Weights: https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r18-d8_832x832_80k_cityscapes/icnet_r18-d8_832x832_80k_cityscapes_20210925_225521-2e36638d.pth
|
||||
- Name: icnet_r18-d8_832x832_160k_cityscapes
|
||||
In Collection: icnet
|
||||
Metadata:
|
||||
backbone: R-18-D8
|
||||
crop size: (832,832)
|
||||
lr schd: 160000
|
||||
Results:
|
||||
- Task: Semantic Segmentation
|
||||
Dataset: Cityscapes
|
||||
Metrics:
|
||||
mIoU: 71.64
|
||||
mIoU(ms+flip): 74.18
|
||||
Config: configs/icnet/icnet_r18-d8_832x832_160k_cityscapes.py
|
||||
Weights: https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r18-d8_832x832_160k_cityscapes/icnet_r18-d8_832x832_160k_cityscapes_20210925_230153-2c6eb6e0.pth
|
||||
- Name: icnet_r18-d8_in1k-pre_832x832_80k_cityscapes
|
||||
In Collection: icnet
|
||||
Metadata:
|
||||
backbone: R-18-D8
|
||||
crop size: (832,832)
|
||||
lr schd: 80000
|
||||
Results:
|
||||
- Task: Semantic Segmentation
|
||||
Dataset: Cityscapes
|
||||
Metrics:
|
||||
mIoU: 72.51
|
||||
mIoU(ms+flip): 74.78
|
||||
Config: configs/icnet/icnet_r18-d8_in1k-pre_832x832_80k_cityscapes.py
|
||||
Weights: https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r18-d8_in1k-pre_832x832_80k_cityscapes/icnet_r18-d8_in1k-pre_832x832_80k_cityscapes_20210925_230354-1cbe3022.pth
|
||||
- Name: icnet_r18-d8_in1k-pre_832x832_160k_cityscapes
|
||||
In Collection: icnet
|
||||
Metadata:
|
||||
backbone: R-18-D8
|
||||
crop size: (832,832)
|
||||
lr schd: 160000
|
||||
Results:
|
||||
- Task: Semantic Segmentation
|
||||
Dataset: Cityscapes
|
||||
Metrics:
|
||||
mIoU: 74.43
|
||||
mIoU(ms+flip): 76.72
|
||||
Config: configs/icnet/icnet_r18-d8_in1k-pre_832x832_160k_cityscapes.py
|
||||
Weights: https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r18-d8_in1k-pre_832x832_160k_cityscapes/icnet_r18-d8_in1k-pre_832x832_160k_cityscapes_20210926_052702-619c8ae1.pth
|
||||
- Name: icnet_r50-d8_832x832_80k_cityscapes
|
||||
In Collection: icnet
|
||||
Metadata:
|
||||
backbone: R-50-D8
|
||||
crop size: (832,832)
|
||||
lr schd: 80000
|
||||
inference time (ms/im):
|
||||
- value: 49.8
|
||||
hardware: V100
|
||||
backend: PyTorch
|
||||
batch size: 1
|
||||
mode: FP32
|
||||
resolution: (832,832)
|
||||
memory (GB): 2.53
|
||||
Results:
|
||||
- Task: Semantic Segmentation
|
||||
Dataset: Cityscapes
|
||||
Metrics:
|
||||
mIoU: 68.91
|
||||
mIoU(ms+flip): 69.72
|
||||
Config: configs/icnet/icnet_r50-d8_832x832_80k_cityscapes.py
|
||||
Weights: https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r50-d8_832x832_80k_cityscapes/icnet_r50-d8_832x832_80k_cityscapes_20210926_044625-c6407341.pth
|
||||
- Name: icnet_r50-d8_832x832_160k_cityscapes
|
||||
In Collection: icnet
|
||||
Metadata:
|
||||
backbone: R-50-D8
|
||||
crop size: (832,832)
|
||||
lr schd: 160000
|
||||
Results:
|
||||
- Task: Semantic Segmentation
|
||||
Dataset: Cityscapes
|
||||
Metrics:
|
||||
mIoU: 73.82
|
||||
mIoU(ms+flip): 75.67
|
||||
Config: configs/icnet/icnet_r50-d8_832x832_160k_cityscapes.py
|
||||
Weights: https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r50-d8_832x832_160k_cityscapes/icnet_r50-d8_832x832_160k_cityscapes_20210925_232612-a95f0d4e.pth
|
||||
- Name: icnet_r50-d8_in1k-pre_832x832_80k_cityscapes
|
||||
In Collection: icnet
|
||||
Metadata:
|
||||
backbone: R-50-D8
|
||||
crop size: (832,832)
|
||||
lr schd: 80000
|
||||
Results:
|
||||
- Task: Semantic Segmentation
|
||||
Dataset: Cityscapes
|
||||
Metrics:
|
||||
mIoU: 74.58
|
||||
mIoU(ms+flip): 76.41
|
||||
Config: configs/icnet/icnet_r50-d8_in1k-pre_832x832_80k_cityscapes.py
|
||||
Weights: https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r50-d8_in1k-pre_832x832_80k_cityscapes/icnet_r50-d8_in1k-pre_832x832_80k_cityscapes_20210926_032943-1743dc7b.pth
|
||||
- Name: icnet_r50-d8_in1k-pre_832x832_160k_cityscapes
|
||||
In Collection: icnet
|
||||
Metadata:
|
||||
backbone: R-50-D8
|
||||
crop size: (832,832)
|
||||
lr schd: 160000
|
||||
Results:
|
||||
- Task: Semantic Segmentation
|
||||
Dataset: Cityscapes
|
||||
Metrics:
|
||||
mIoU: 76.29
|
||||
mIoU(ms+flip): 78.09
|
||||
Config: configs/icnet/icnet_r50-d8_in1k-pre_832x832_160k_cityscapes.py
|
||||
Weights: https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r50-d8_in1k-pre_832x832_160k_cityscapes/icnet_r50-d8_in1k-pre_832x832_160k_cityscapes_20210926_042715-ce310aea.pth
|
||||
- Name: icnet_r101-d8_832x832_80k_cityscapes
|
||||
In Collection: icnet
|
||||
Metadata:
|
||||
backbone: R-101-D8
|
||||
crop size: (832,832)
|
||||
lr schd: 80000
|
||||
inference time (ms/im):
|
||||
- value: 59.0
|
||||
hardware: V100
|
||||
backend: PyTorch
|
||||
batch size: 1
|
||||
mode: FP32
|
||||
resolution: (832,832)
|
||||
memory (GB): 3.08
|
||||
Results:
|
||||
- Task: Semantic Segmentation
|
||||
Dataset: Cityscapes
|
||||
Metrics:
|
||||
mIoU: 70.28
|
||||
mIoU(ms+flip): 71.95
|
||||
Config: configs/icnet/icnet_r101-d8_832x832_80k_cityscapes.py
|
||||
Weights: https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r101-d8_832x832_80k_cityscapes/icnet_r101-d8_832x832_80k_cityscapes_20210926_072447-b52f936e.pth
|
||||
- Name: icnet_r101-d8_832x832_160k_cityscapes
|
||||
In Collection: icnet
|
||||
Metadata:
|
||||
backbone: R-101-D8
|
||||
crop size: (832,832)
|
||||
lr schd: 160000
|
||||
Results:
|
||||
- Task: Semantic Segmentation
|
||||
Dataset: Cityscapes
|
||||
Metrics:
|
||||
mIoU: 73.8
|
||||
mIoU(ms+flip): 76.1
|
||||
Config: configs/icnet/icnet_r101-d8_832x832_160k_cityscapes.py
|
||||
Weights: https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r101-d8_832x832_160k_cityscapes/icnet_r101-d8_832x832_160k_cityscapes_20210926_092350-3a1ebf1a.pth
|
||||
- Name: icnet_r101-d8_in1k-pre_832x832_80k_cityscapes
|
||||
In Collection: icnet
|
||||
Metadata:
|
||||
backbone: R-101-D8
|
||||
crop size: (832,832)
|
||||
lr schd: 80000
|
||||
Results:
|
||||
- Task: Semantic Segmentation
|
||||
Dataset: Cityscapes
|
||||
Metrics:
|
||||
mIoU: 75.57
|
||||
mIoU(ms+flip): 77.86
|
||||
Config: configs/icnet/icnet_r101-d8_in1k-pre_832x832_80k_cityscapes.py
|
||||
Weights: https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r101-d8_in1k-pre_832x832_80k_cityscapes/icnet_r101-d8_in1k-pre_832x832_80k_cityscapes_20210926_020414-7ceb12c5.pth
|
||||
- Name: icnet_r101-d8_in1k-pre_832x832_160k_cityscapes
|
||||
In Collection: icnet
|
||||
Metadata:
|
||||
backbone: R-101-D8
|
||||
crop size: (832,832)
|
||||
lr schd: 160000
|
||||
Results:
|
||||
- Task: Semantic Segmentation
|
||||
Dataset: Cityscapes
|
||||
Metrics:
|
||||
mIoU: 76.15
|
||||
mIoU(ms+flip): 77.98
|
||||
Config: configs/icnet/icnet_r101-d8_in1k-pre_832x832_160k_cityscapes.py
|
||||
Weights: https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r101-d8_in1k-pre_832x832_160k_cityscapes/icnet_r101-d8_in1k-pre_832x832_160k_cityscapes_20210925_232612-9484ae8a.pth
|
|
@ -0,0 +1,2 @@
|
|||
_base_ = './icnet_r50-d8_832x832_160k_cityscapes.py'
|
||||
model = dict(backbone=dict(backbone_cfg=dict(depth=101)))
|
|
@ -0,0 +1,2 @@
|
|||
_base_ = './icnet_r50-d8_832x832_80k_cityscapes.py'
|
||||
model = dict(backbone=dict(backbone_cfg=dict(depth=101)))
|
|
@ -0,0 +1,7 @@
|
|||
_base_ = './icnet_r50-d8_832x832_160k_cityscapes.py'
|
||||
model = dict(
|
||||
backbone=dict(
|
||||
backbone_cfg=dict(
|
||||
depth=101,
|
||||
init_cfg=dict(
|
||||
type='Pretrained', checkpoint='open-mmlab://resnet101_v1c'))))
|
|
@ -0,0 +1,7 @@
|
|||
_base_ = './icnet_r50-d8_832x832_80k_cityscapes.py'
|
||||
model = dict(
|
||||
backbone=dict(
|
||||
backbone_cfg=dict(
|
||||
depth=101,
|
||||
init_cfg=dict(
|
||||
type='Pretrained', checkpoint='open-mmlab://resnet101_v1c'))))
|
|
@ -0,0 +1,3 @@
|
|||
_base_ = './icnet_r50-d8_832x832_160k_cityscapes.py'
|
||||
model = dict(
|
||||
backbone=dict(layer_channels=(128, 512), backbone_cfg=dict(depth=18)))
|
|
@ -0,0 +1,3 @@
|
|||
_base_ = './icnet_r50-d8_832x832_80k_cityscapes.py'
|
||||
model = dict(
|
||||
backbone=dict(layer_channels=(128, 512), backbone_cfg=dict(depth=18)))
|
|
@ -0,0 +1,8 @@
|
|||
_base_ = './icnet_r50-d8_832x832_160k_cityscapes.py'
|
||||
model = dict(
|
||||
backbone=dict(
|
||||
layer_channels=(128, 512),
|
||||
backbone_cfg=dict(
|
||||
depth=18,
|
||||
init_cfg=dict(
|
||||
type='Pretrained', checkpoint='open-mmlab://resnet18_v1c'))))
|
|
@ -0,0 +1,8 @@
|
|||
_base_ = './icnet_r50-d8_832x832_80k_cityscapes.py'
|
||||
model = dict(
|
||||
backbone=dict(
|
||||
layer_channels=(128, 512),
|
||||
backbone_cfg=dict(
|
||||
depth=18,
|
||||
init_cfg=dict(
|
||||
type='Pretrained', checkpoint='open-mmlab://resnet18_v1c'))))
|
|
@ -0,0 +1,5 @@
|
|||
_base_ = [
|
||||
'../_base_/models/icnet_r50-d8.py',
|
||||
'../_base_/datasets/cityscapes_832x832.py', '../_base_/default_runtime.py',
|
||||
'../_base_/schedules/schedule_160k.py'
|
||||
]
|
|
@ -0,0 +1,5 @@
|
|||
_base_ = [
|
||||
'../_base_/models/icnet_r50-d8.py',
|
||||
'../_base_/datasets/cityscapes_832x832.py', '../_base_/default_runtime.py',
|
||||
'../_base_/schedules/schedule_80k.py'
|
||||
]
|
|
@ -0,0 +1,6 @@
|
|||
_base_ = './icnet_r50-d8_832x832_160k_cityscapes.py'
|
||||
model = dict(
|
||||
backbone=dict(
|
||||
backbone_cfg=dict(
|
||||
init_cfg=dict(
|
||||
type='Pretrained', checkpoint='open-mmlab://resnet50_v1c'))))
|
|
@ -0,0 +1,6 @@
|
|||
_base_ = './icnet_r50-d8_832x832_80k_cityscapes.py'
|
||||
model = dict(
|
||||
backbone=dict(
|
||||
backbone_cfg=dict(
|
||||
init_cfg=dict(
|
||||
type='Pretrained', checkpoint='open-mmlab://resnet50_v1c'))))
|
|
@ -4,6 +4,7 @@ from .bisenetv2 import BiSeNetV2
|
|||
from .cgnet import CGNet
|
||||
from .fast_scnn import FastSCNN
|
||||
from .hrnet import HRNet
|
||||
from .icnet import ICNet
|
||||
from .mit import MixVisionTransformer
|
||||
from .mobilenet_v2 import MobileNetV2
|
||||
from .mobilenet_v3 import MobileNetV3
|
||||
|
@ -18,5 +19,5 @@ __all__ = [
|
|||
'ResNet', 'ResNetV1c', 'ResNetV1d', 'ResNeXt', 'HRNet', 'FastSCNN',
|
||||
'ResNeSt', 'MobileNetV2', 'UNet', 'CGNet', 'MobileNetV3',
|
||||
'VisionTransformer', 'SwinTransformer', 'MixVisionTransformer',
|
||||
'BiSeNetV1', 'BiSeNetV2'
|
||||
'BiSeNetV1', 'BiSeNetV2', 'ICNet'
|
||||
]
|
||||
|
|
|
@ -0,0 +1,165 @@
|
|||
import torch
|
||||
import torch.nn as nn
|
||||
from mmcv.cnn import ConvModule
|
||||
from mmcv.runner import BaseModule
|
||||
|
||||
from mmseg.ops import resize
|
||||
from ..builder import BACKBONES, build_backbone
|
||||
from ..decode_heads.psp_head import PPM
|
||||
|
||||
|
||||
@BACKBONES.register_module()
|
||||
class ICNet(BaseModule):
|
||||
"""ICNet for Real-Time Semantic Segmentation on High-Resolution Images.
|
||||
|
||||
This backbone is the implementation of
|
||||
`ICNet <https://arxiv.org/abs/1704.08545>`_.
|
||||
|
||||
Args:
|
||||
backbone_cfg (dict): Config dict to build backbone. Usually it is
|
||||
ResNet but it can also be other backbones.
|
||||
in_channels (int): The number of input image channels. Default: 3.
|
||||
layer_channels (Sequence[int]): The numbers of feature channels at
|
||||
layer 2 and layer 4 in ResNet. It can also be other backbones.
|
||||
Default: (512, 2048).
|
||||
light_branch_middle_channels (int): The number of channels of the
|
||||
middle layer in light branch. Default: 32.
|
||||
psp_out_channels (int): The number of channels of the output of PSP
|
||||
module. Default: 512.
|
||||
out_channels (Sequence[int]): The numbers of output feature channels
|
||||
at each branches. Default: (64, 256, 256).
|
||||
pool_scales (tuple[int]): Pooling scales used in Pooling Pyramid
|
||||
Module. Default: (1, 2, 3, 6).
|
||||
conv_cfg (dict): Dictionary to construct and config conv layer.
|
||||
Default: None.
|
||||
norm_cfg (dict): Dictionary to construct and config norm layer.
|
||||
Default: dict(type='BN').
|
||||
act_cfg (dict): Dictionary to construct and config act layer.
|
||||
Default: dict(type='ReLU').
|
||||
align_corners (bool): align_corners argument of F.interpolate.
|
||||
Default: False.
|
||||
init_cfg (dict or list[dict], optional): Initialization config dict.
|
||||
Default: None.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
backbone_cfg,
|
||||
in_channels=3,
|
||||
layer_channels=(512, 2048),
|
||||
light_branch_middle_channels=32,
|
||||
psp_out_channels=512,
|
||||
out_channels=(64, 256, 256),
|
||||
pool_scales=(1, 2, 3, 6),
|
||||
conv_cfg=None,
|
||||
norm_cfg=dict(type='BN', requires_grad=True),
|
||||
act_cfg=dict(type='ReLU'),
|
||||
align_corners=False,
|
||||
init_cfg=None):
|
||||
if backbone_cfg is None:
|
||||
raise TypeError('backbone_cfg must be passed from config file!')
|
||||
if init_cfg is None:
|
||||
init_cfg = [
|
||||
dict(type='Kaiming', mode='fan_out', layer='Conv2d'),
|
||||
dict(type='Constant', val=1, layer='_BatchNorm'),
|
||||
dict(type='Normal', mean=0.01, layer='Linear')
|
||||
]
|
||||
super(ICNet, self).__init__(init_cfg=init_cfg)
|
||||
self.align_corners = align_corners
|
||||
self.backbone = build_backbone(backbone_cfg)
|
||||
|
||||
# Note: Default `ceil_mode` is false in nn.MaxPool2d, set
|
||||
# `ceil_mode=True` to keep information in the corner of feature map.
|
||||
self.backbone.maxpool = nn.MaxPool2d(
|
||||
kernel_size=3, stride=2, padding=1, ceil_mode=True)
|
||||
|
||||
self.psp_modules = PPM(
|
||||
pool_scales=pool_scales,
|
||||
in_channels=layer_channels[1],
|
||||
channels=psp_out_channels,
|
||||
conv_cfg=conv_cfg,
|
||||
norm_cfg=norm_cfg,
|
||||
act_cfg=act_cfg,
|
||||
align_corners=align_corners)
|
||||
|
||||
self.psp_bottleneck = ConvModule(
|
||||
layer_channels[1] + len(pool_scales) * psp_out_channels,
|
||||
psp_out_channels,
|
||||
3,
|
||||
padding=1,
|
||||
conv_cfg=conv_cfg,
|
||||
norm_cfg=norm_cfg,
|
||||
act_cfg=act_cfg)
|
||||
|
||||
self.conv_sub1 = nn.Sequential(
|
||||
ConvModule(
|
||||
in_channels=in_channels,
|
||||
out_channels=light_branch_middle_channels,
|
||||
kernel_size=3,
|
||||
stride=2,
|
||||
padding=1,
|
||||
conv_cfg=conv_cfg,
|
||||
norm_cfg=norm_cfg),
|
||||
ConvModule(
|
||||
in_channels=light_branch_middle_channels,
|
||||
out_channels=light_branch_middle_channels,
|
||||
kernel_size=3,
|
||||
stride=2,
|
||||
padding=1,
|
||||
conv_cfg=conv_cfg,
|
||||
norm_cfg=norm_cfg),
|
||||
ConvModule(
|
||||
in_channels=light_branch_middle_channels,
|
||||
out_channels=out_channels[0],
|
||||
kernel_size=3,
|
||||
stride=2,
|
||||
padding=1,
|
||||
conv_cfg=conv_cfg,
|
||||
norm_cfg=norm_cfg))
|
||||
|
||||
self.conv_sub2 = ConvModule(
|
||||
layer_channels[0],
|
||||
out_channels[1],
|
||||
1,
|
||||
conv_cfg=conv_cfg,
|
||||
norm_cfg=norm_cfg)
|
||||
|
||||
self.conv_sub4 = ConvModule(
|
||||
psp_out_channels,
|
||||
out_channels[2],
|
||||
1,
|
||||
conv_cfg=conv_cfg,
|
||||
norm_cfg=norm_cfg)
|
||||
|
||||
def forward(self, x):
|
||||
output = []
|
||||
|
||||
# sub 1
|
||||
output.append(self.conv_sub1(x))
|
||||
|
||||
# sub 2
|
||||
x = resize(
|
||||
x,
|
||||
scale_factor=0.5,
|
||||
mode='bilinear',
|
||||
align_corners=self.align_corners)
|
||||
x = self.backbone.stem(x)
|
||||
x = self.backbone.maxpool(x)
|
||||
x = self.backbone.layer1(x)
|
||||
x = self.backbone.layer2(x)
|
||||
output.append(self.conv_sub2(x))
|
||||
|
||||
# sub 4
|
||||
x = resize(
|
||||
x,
|
||||
scale_factor=0.5,
|
||||
mode='bilinear',
|
||||
align_corners=self.align_corners)
|
||||
x = self.backbone.layer3(x)
|
||||
x = self.backbone.layer4(x)
|
||||
psp_outs = self.psp_modules(x) + [x]
|
||||
psp_outs = torch.cat(psp_outs, dim=1)
|
||||
x = self.psp_bottleneck(psp_outs)
|
||||
|
||||
output.append(self.conv_sub4(x))
|
||||
|
||||
return output
|
|
@ -1,6 +1,7 @@
|
|||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
from .fpn import FPN
|
||||
from .ic_neck import ICNeck
|
||||
from .mla_neck import MLANeck
|
||||
from .multilevel_neck import MultiLevelNeck
|
||||
|
||||
__all__ = ['FPN', 'MultiLevelNeck', 'MLANeck']
|
||||
__all__ = ['FPN', 'MultiLevelNeck', 'MLANeck', 'ICNeck']
|
||||
|
|
|
@ -0,0 +1,147 @@
|
|||
import torch.nn.functional as F
|
||||
from mmcv.cnn import ConvModule
|
||||
from mmcv.runner import BaseModule
|
||||
|
||||
from mmseg.ops import resize
|
||||
from ..builder import NECKS
|
||||
|
||||
|
||||
class CascadeFeatureFusion(BaseModule):
|
||||
"""Cascade Feature Fusion Unit in ICNet.
|
||||
|
||||
Args:
|
||||
low_channels (int): The number of input channels for
|
||||
low resolution feature map.
|
||||
high_channels (int): The number of input channels for
|
||||
high resolution feature map.
|
||||
out_channels (int): The number of output channels.
|
||||
conv_cfg (dict): Dictionary to construct and config conv layer.
|
||||
Default: None.
|
||||
norm_cfg (dict): Dictionary to construct and config norm layer.
|
||||
Default: dict(type='BN').
|
||||
act_cfg (dict): Dictionary to construct and config act layer.
|
||||
Default: dict(type='ReLU').
|
||||
align_corners (bool): align_corners argument of F.interpolate.
|
||||
Default: False.
|
||||
init_cfg (dict or list[dict], optional): Initialization config dict.
|
||||
Default: None.
|
||||
|
||||
Returns:
|
||||
x (Tensor): The output tensor of shape (N, out_channels, H, W).
|
||||
x_low (Tensor): The output tensor of shape (N, out_channels, H, W)
|
||||
for Cascade Label Guidance in auxiliary heads.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
low_channels,
|
||||
high_channels,
|
||||
out_channels,
|
||||
conv_cfg=None,
|
||||
norm_cfg=dict(type='BN'),
|
||||
act_cfg=dict(type='ReLU'),
|
||||
align_corners=False,
|
||||
init_cfg=None):
|
||||
super(CascadeFeatureFusion, self).__init__(init_cfg=init_cfg)
|
||||
self.align_corners = align_corners
|
||||
self.conv_low = ConvModule(
|
||||
low_channels,
|
||||
out_channels,
|
||||
3,
|
||||
padding=2,
|
||||
dilation=2,
|
||||
conv_cfg=conv_cfg,
|
||||
norm_cfg=norm_cfg,
|
||||
act_cfg=act_cfg)
|
||||
self.conv_high = ConvModule(
|
||||
high_channels,
|
||||
out_channels,
|
||||
1,
|
||||
conv_cfg=conv_cfg,
|
||||
norm_cfg=norm_cfg,
|
||||
act_cfg=act_cfg)
|
||||
|
||||
def forward(self, x_low, x_high):
|
||||
x_low = resize(
|
||||
x_low,
|
||||
size=x_high.size()[2:],
|
||||
mode='bilinear',
|
||||
align_corners=self.align_corners)
|
||||
# Note: Different from original paper, `x_low` is underwent
|
||||
# `self.conv_low` rather than another 1x1 conv classifier
|
||||
# before being used for auxiliary head.
|
||||
x_low = self.conv_low(x_low)
|
||||
x_high = self.conv_high(x_high)
|
||||
x = x_low + x_high
|
||||
x = F.relu(x, inplace=True)
|
||||
return x, x_low
|
||||
|
||||
|
||||
@NECKS.register_module()
|
||||
class ICNeck(BaseModule):
|
||||
"""ICNet for Real-Time Semantic Segmentation on High-Resolution Images.
|
||||
|
||||
This head is the implementation of `ICHead
|
||||
<https://arxiv.org/abs/1704.08545>`_.
|
||||
|
||||
Args:
|
||||
in_channels (int): The number of input image channels. Default: 3.
|
||||
out_channels (int): The numbers of output feature channels.
|
||||
Default: 128.
|
||||
conv_cfg (dict): Dictionary to construct and config conv layer.
|
||||
Default: None.
|
||||
norm_cfg (dict): Dictionary to construct and config norm layer.
|
||||
Default: dict(type='BN').
|
||||
act_cfg (dict): Dictionary to construct and config act layer.
|
||||
Default: dict(type='ReLU').
|
||||
align_corners (bool): align_corners argument of F.interpolate.
|
||||
Default: False.
|
||||
init_cfg (dict or list[dict], optional): Initialization config dict.
|
||||
Default: None.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
in_channels=(64, 256, 256),
|
||||
out_channels=128,
|
||||
conv_cfg=None,
|
||||
norm_cfg=dict(type='BN'),
|
||||
act_cfg=dict(type='ReLU'),
|
||||
align_corners=False,
|
||||
init_cfg=None):
|
||||
super(ICNeck, self).__init__(init_cfg=init_cfg)
|
||||
assert len(in_channels) == 3, 'Length of input channels \
|
||||
must be 3!'
|
||||
|
||||
self.in_channels = in_channels
|
||||
self.out_channels = out_channels
|
||||
self.conv_cfg = conv_cfg
|
||||
self.norm_cfg = norm_cfg
|
||||
self.act_cfg = act_cfg
|
||||
self.align_corners = align_corners
|
||||
self.cff_24 = CascadeFeatureFusion(
|
||||
self.in_channels[2],
|
||||
self.in_channels[1],
|
||||
self.out_channels,
|
||||
conv_cfg=self.conv_cfg,
|
||||
norm_cfg=self.norm_cfg,
|
||||
act_cfg=self.act_cfg,
|
||||
align_corners=self.align_corners)
|
||||
|
||||
self.cff_12 = CascadeFeatureFusion(
|
||||
self.out_channels,
|
||||
self.in_channels[0],
|
||||
self.out_channels,
|
||||
conv_cfg=self.conv_cfg,
|
||||
norm_cfg=self.norm_cfg,
|
||||
act_cfg=self.act_cfg,
|
||||
align_corners=self.align_corners)
|
||||
|
||||
def forward(self, inputs):
|
||||
assert len(inputs) == 3, 'Length of input feature \
|
||||
maps must be 3!'
|
||||
|
||||
x_sub1, x_sub2, x_sub4 = inputs
|
||||
x_cff_24, x_24 = self.cff_24(x_sub4, x_sub2)
|
||||
x_cff_12, x_12 = self.cff_12(x_cff_24, x_sub1)
|
||||
# Note: `x_cff_12` is used for decode_head,
|
||||
# `x_24` and `x_12` are used for auxiliary head.
|
||||
return x_24, x_12, x_cff_12
|
|
@ -18,6 +18,7 @@ Import:
|
|||
- configs/fp16/fp16.yml
|
||||
- configs/gcnet/gcnet.yml
|
||||
- configs/hrnet/hrnet.yml
|
||||
- configs/icnet/icnet.yml
|
||||
- configs/isanet/isanet.yml
|
||||
- configs/mobilenet_v2/mobilenet_v2.yml
|
||||
- configs/mobilenet_v3/mobilenet_v3.yml
|
||||
|
|
|
@ -0,0 +1,48 @@
|
|||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
import pytest
|
||||
import torch
|
||||
|
||||
from mmseg.models.backbones import ICNet
|
||||
|
||||
|
||||
def test_icnet_backbone():
|
||||
with pytest.raises(TypeError):
|
||||
# Must give backbone dict in config file.
|
||||
ICNet(
|
||||
in_channels=3,
|
||||
layer_channels=(512, 2048),
|
||||
light_branch_middle_channels=32,
|
||||
psp_out_channels=512,
|
||||
out_channels=(64, 256, 256),
|
||||
backbone_cfg=None)
|
||||
|
||||
# Test ICNet Standard Forward
|
||||
model = ICNet(
|
||||
backbone_cfg=dict(
|
||||
type='ResNetV1c',
|
||||
in_channels=3,
|
||||
depth=50,
|
||||
num_stages=4,
|
||||
out_indices=(0, 1, 2, 3),
|
||||
dilations=(1, 1, 2, 4),
|
||||
strides=(1, 2, 1, 1),
|
||||
norm_cfg=dict(type='BN', requires_grad=True),
|
||||
norm_eval=False,
|
||||
style='pytorch',
|
||||
contract_dilation=True), )
|
||||
assert hasattr(model.backbone,
|
||||
'maxpool') and model.backbone.maxpool.ceil_mode is True
|
||||
model.init_weights()
|
||||
model.train()
|
||||
batch_size = 2
|
||||
imgs = torch.randn(batch_size, 3, 512, 1024)
|
||||
feat = model(imgs)
|
||||
|
||||
assert model.psp_modules[0][0].output_size == 1
|
||||
assert model.psp_modules[1][0].output_size == 2
|
||||
assert model.psp_modules[2][0].output_size == 3
|
||||
assert model.psp_bottleneck.padding == 1
|
||||
assert model.conv_sub1[0].padding == 1
|
||||
|
||||
assert len(feat) == 3
|
||||
assert feat[0].shape == torch.Size([batch_size, 64, 64, 128])
|
|
@ -50,22 +50,22 @@ def test_swin_transformer():
|
|||
model(temp)
|
||||
|
||||
# Test normal inference
|
||||
temp = torch.randn((1, 3, 512, 512))
|
||||
temp = torch.randn((1, 3, 256, 256))
|
||||
model = SwinTransformer()
|
||||
outs = model(temp)
|
||||
assert outs[0].shape == (1, 96, 128, 128)
|
||||
assert outs[1].shape == (1, 192, 64, 64)
|
||||
assert outs[2].shape == (1, 384, 32, 32)
|
||||
assert outs[3].shape == (1, 768, 16, 16)
|
||||
assert outs[0].shape == (1, 96, 64, 64)
|
||||
assert outs[1].shape == (1, 192, 32, 32)
|
||||
assert outs[2].shape == (1, 384, 16, 16)
|
||||
assert outs[3].shape == (1, 768, 8, 8)
|
||||
|
||||
# Test abnormal inference size
|
||||
temp = torch.randn((1, 3, 511, 511))
|
||||
temp = torch.randn((1, 3, 255, 255))
|
||||
model = SwinTransformer()
|
||||
outs = model(temp)
|
||||
assert outs[0].shape == (1, 96, 128, 128)
|
||||
assert outs[1].shape == (1, 192, 64, 64)
|
||||
assert outs[2].shape == (1, 384, 32, 32)
|
||||
assert outs[3].shape == (1, 768, 16, 16)
|
||||
assert outs[0].shape == (1, 96, 64, 64)
|
||||
assert outs[1].shape == (1, 192, 32, 32)
|
||||
assert outs[2].shape == (1, 384, 16, 16)
|
||||
assert outs[3].shape == (1, 768, 8, 8)
|
||||
|
||||
# Test abnormal inference size
|
||||
temp = torch.randn((1, 3, 112, 137))
|
||||
|
@ -89,7 +89,7 @@ def test_swin_transformer():
|
|||
assert not p.requires_grad
|
||||
|
||||
# Test Swin with checkpoint forward
|
||||
temp = torch.randn((1, 3, 224, 224))
|
||||
temp = torch.randn((1, 3, 112, 112))
|
||||
model = SwinTransformer(with_cp=True)
|
||||
for m in model.modules():
|
||||
if isinstance(m, SwinBlock):
|
||||
|
|
|
@ -345,7 +345,7 @@ def test_unet():
|
|||
# case is 8.
|
||||
unet = UNet(
|
||||
in_channels=3,
|
||||
base_channels=64,
|
||||
base_channels=4,
|
||||
num_stages=4,
|
||||
strides=(1, 1, 1, 1),
|
||||
enc_num_convs=(2, 2, 2, 2),
|
||||
|
@ -362,7 +362,7 @@ def test_unet():
|
|||
# case is 16.
|
||||
unet = UNet(
|
||||
in_channels=3,
|
||||
base_channels=64,
|
||||
base_channels=4,
|
||||
num_stages=5,
|
||||
strides=(1, 1, 1, 1, 1),
|
||||
enc_num_convs=(2, 2, 2, 2, 2),
|
||||
|
@ -379,7 +379,7 @@ def test_unet():
|
|||
# case is 8.
|
||||
unet = UNet(
|
||||
in_channels=3,
|
||||
base_channels=64,
|
||||
base_channels=4,
|
||||
num_stages=5,
|
||||
strides=(1, 1, 1, 1, 1),
|
||||
enc_num_convs=(2, 2, 2, 2, 2),
|
||||
|
@ -396,7 +396,7 @@ def test_unet():
|
|||
# case is 8.
|
||||
unet = UNet(
|
||||
in_channels=3,
|
||||
base_channels=64,
|
||||
base_channels=4,
|
||||
num_stages=5,
|
||||
strides=(1, 2, 2, 2, 1),
|
||||
enc_num_convs=(2, 2, 2, 2, 2),
|
||||
|
@ -413,7 +413,7 @@ def test_unet():
|
|||
# case is 32.
|
||||
unet = UNet(
|
||||
in_channels=3,
|
||||
base_channels=64,
|
||||
base_channels=4,
|
||||
num_stages=6,
|
||||
strides=(1, 1, 1, 1, 1, 1),
|
||||
enc_num_convs=(2, 2, 2, 2, 2, 2),
|
||||
|
@ -428,7 +428,7 @@ def test_unet():
|
|||
# Check if num_stages matchs strides, len(strides)=num_stages
|
||||
unet = UNet(
|
||||
in_channels=3,
|
||||
base_channels=64,
|
||||
base_channels=4,
|
||||
num_stages=5,
|
||||
strides=(1, 1, 1, 1),
|
||||
enc_num_convs=(2, 2, 2, 2, 2),
|
||||
|
@ -443,7 +443,7 @@ def test_unet():
|
|||
# Check if num_stages matchs strides, len(enc_num_convs)=num_stages
|
||||
unet = UNet(
|
||||
in_channels=3,
|
||||
base_channels=64,
|
||||
base_channels=4,
|
||||
num_stages=5,
|
||||
strides=(1, 1, 1, 1, 1),
|
||||
enc_num_convs=(2, 2, 2, 2),
|
||||
|
@ -458,7 +458,7 @@ def test_unet():
|
|||
# Check if num_stages matchs strides, len(dec_num_convs)=num_stages-1
|
||||
unet = UNet(
|
||||
in_channels=3,
|
||||
base_channels=64,
|
||||
base_channels=4,
|
||||
num_stages=5,
|
||||
strides=(1, 1, 1, 1, 1),
|
||||
enc_num_convs=(2, 2, 2, 2, 2),
|
||||
|
@ -473,7 +473,7 @@ def test_unet():
|
|||
# Check if num_stages matchs strides, len(downsamples)=num_stages-1
|
||||
unet = UNet(
|
||||
in_channels=3,
|
||||
base_channels=64,
|
||||
base_channels=4,
|
||||
num_stages=5,
|
||||
strides=(1, 1, 1, 1, 1),
|
||||
enc_num_convs=(2, 2, 2, 2, 2),
|
||||
|
@ -488,7 +488,7 @@ def test_unet():
|
|||
# Check if num_stages matchs strides, len(enc_dilations)=num_stages
|
||||
unet = UNet(
|
||||
in_channels=3,
|
||||
base_channels=64,
|
||||
base_channels=4,
|
||||
num_stages=5,
|
||||
strides=(1, 1, 1, 1, 1),
|
||||
enc_num_convs=(2, 2, 2, 2, 2),
|
||||
|
@ -503,7 +503,7 @@ def test_unet():
|
|||
# Check if num_stages matchs strides, len(dec_dilations)=num_stages-1
|
||||
unet = UNet(
|
||||
in_channels=3,
|
||||
base_channels=64,
|
||||
base_channels=4,
|
||||
num_stages=5,
|
||||
strides=(1, 1, 1, 1, 1),
|
||||
enc_num_convs=(2, 2, 2, 2, 2),
|
||||
|
@ -517,7 +517,7 @@ def test_unet():
|
|||
# test UNet norm_eval=True
|
||||
unet = UNet(
|
||||
in_channels=3,
|
||||
base_channels=64,
|
||||
base_channels=4,
|
||||
num_stages=5,
|
||||
strides=(1, 1, 1, 1, 1),
|
||||
enc_num_convs=(2, 2, 2, 2, 2),
|
||||
|
@ -532,7 +532,7 @@ def test_unet():
|
|||
# test UNet norm_eval=False
|
||||
unet = UNet(
|
||||
in_channels=3,
|
||||
base_channels=64,
|
||||
base_channels=4,
|
||||
num_stages=5,
|
||||
strides=(1, 1, 1, 1, 1),
|
||||
enc_num_convs=(2, 2, 2, 2, 2),
|
||||
|
@ -547,7 +547,7 @@ def test_unet():
|
|||
# test UNet forward and outputs. The whole downsample rate is 16.
|
||||
unet = UNet(
|
||||
in_channels=3,
|
||||
base_channels=64,
|
||||
base_channels=4,
|
||||
num_stages=5,
|
||||
strides=(1, 1, 1, 1, 1),
|
||||
enc_num_convs=(2, 2, 2, 2, 2),
|
||||
|
@ -558,16 +558,16 @@ def test_unet():
|
|||
|
||||
x = torch.randn(2, 3, 128, 128)
|
||||
x_outs = unet(x)
|
||||
assert x_outs[0].shape == torch.Size([2, 1024, 8, 8])
|
||||
assert x_outs[1].shape == torch.Size([2, 512, 16, 16])
|
||||
assert x_outs[2].shape == torch.Size([2, 256, 32, 32])
|
||||
assert x_outs[3].shape == torch.Size([2, 128, 64, 64])
|
||||
assert x_outs[4].shape == torch.Size([2, 64, 128, 128])
|
||||
assert x_outs[0].shape == torch.Size([2, 64, 8, 8])
|
||||
assert x_outs[1].shape == torch.Size([2, 32, 16, 16])
|
||||
assert x_outs[2].shape == torch.Size([2, 16, 32, 32])
|
||||
assert x_outs[3].shape == torch.Size([2, 8, 64, 64])
|
||||
assert x_outs[4].shape == torch.Size([2, 4, 128, 128])
|
||||
|
||||
# test UNet forward and outputs. The whole downsample rate is 8.
|
||||
unet = UNet(
|
||||
in_channels=3,
|
||||
base_channels=64,
|
||||
base_channels=4,
|
||||
num_stages=5,
|
||||
strides=(1, 1, 1, 1, 1),
|
||||
enc_num_convs=(2, 2, 2, 2, 2),
|
||||
|
@ -578,16 +578,16 @@ def test_unet():
|
|||
|
||||
x = torch.randn(2, 3, 128, 128)
|
||||
x_outs = unet(x)
|
||||
assert x_outs[0].shape == torch.Size([2, 1024, 16, 16])
|
||||
assert x_outs[1].shape == torch.Size([2, 512, 16, 16])
|
||||
assert x_outs[2].shape == torch.Size([2, 256, 32, 32])
|
||||
assert x_outs[3].shape == torch.Size([2, 128, 64, 64])
|
||||
assert x_outs[4].shape == torch.Size([2, 64, 128, 128])
|
||||
assert x_outs[0].shape == torch.Size([2, 64, 16, 16])
|
||||
assert x_outs[1].shape == torch.Size([2, 32, 16, 16])
|
||||
assert x_outs[2].shape == torch.Size([2, 16, 32, 32])
|
||||
assert x_outs[3].shape == torch.Size([2, 8, 64, 64])
|
||||
assert x_outs[4].shape == torch.Size([2, 4, 128, 128])
|
||||
|
||||
# test UNet forward and outputs. The whole downsample rate is 8.
|
||||
unet = UNet(
|
||||
in_channels=3,
|
||||
base_channels=64,
|
||||
base_channels=4,
|
||||
num_stages=5,
|
||||
strides=(1, 2, 2, 2, 1),
|
||||
enc_num_convs=(2, 2, 2, 2, 2),
|
||||
|
@ -598,16 +598,16 @@ def test_unet():
|
|||
|
||||
x = torch.randn(2, 3, 128, 128)
|
||||
x_outs = unet(x)
|
||||
assert x_outs[0].shape == torch.Size([2, 1024, 16, 16])
|
||||
assert x_outs[1].shape == torch.Size([2, 512, 16, 16])
|
||||
assert x_outs[2].shape == torch.Size([2, 256, 32, 32])
|
||||
assert x_outs[3].shape == torch.Size([2, 128, 64, 64])
|
||||
assert x_outs[4].shape == torch.Size([2, 64, 128, 128])
|
||||
assert x_outs[0].shape == torch.Size([2, 64, 16, 16])
|
||||
assert x_outs[1].shape == torch.Size([2, 32, 16, 16])
|
||||
assert x_outs[2].shape == torch.Size([2, 16, 32, 32])
|
||||
assert x_outs[3].shape == torch.Size([2, 8, 64, 64])
|
||||
assert x_outs[4].shape == torch.Size([2, 4, 128, 128])
|
||||
|
||||
# test UNet forward and outputs. The whole downsample rate is 4.
|
||||
unet = UNet(
|
||||
in_channels=3,
|
||||
base_channels=64,
|
||||
base_channels=4,
|
||||
num_stages=5,
|
||||
strides=(1, 1, 1, 1, 1),
|
||||
enc_num_convs=(2, 2, 2, 2, 2),
|
||||
|
@ -618,16 +618,16 @@ def test_unet():
|
|||
|
||||
x = torch.randn(2, 3, 128, 128)
|
||||
x_outs = unet(x)
|
||||
assert x_outs[0].shape == torch.Size([2, 1024, 32, 32])
|
||||
assert x_outs[1].shape == torch.Size([2, 512, 32, 32])
|
||||
assert x_outs[2].shape == torch.Size([2, 256, 32, 32])
|
||||
assert x_outs[3].shape == torch.Size([2, 128, 64, 64])
|
||||
assert x_outs[4].shape == torch.Size([2, 64, 128, 128])
|
||||
assert x_outs[0].shape == torch.Size([2, 64, 32, 32])
|
||||
assert x_outs[1].shape == torch.Size([2, 32, 32, 32])
|
||||
assert x_outs[2].shape == torch.Size([2, 16, 32, 32])
|
||||
assert x_outs[3].shape == torch.Size([2, 8, 64, 64])
|
||||
assert x_outs[4].shape == torch.Size([2, 4, 128, 128])
|
||||
|
||||
# test UNet forward and outputs. The whole downsample rate is 4.
|
||||
unet = UNet(
|
||||
in_channels=3,
|
||||
base_channels=64,
|
||||
base_channels=4,
|
||||
num_stages=5,
|
||||
strides=(1, 2, 2, 1, 1),
|
||||
enc_num_convs=(2, 2, 2, 2, 2),
|
||||
|
@ -638,16 +638,16 @@ def test_unet():
|
|||
|
||||
x = torch.randn(2, 3, 128, 128)
|
||||
x_outs = unet(x)
|
||||
assert x_outs[0].shape == torch.Size([2, 1024, 32, 32])
|
||||
assert x_outs[1].shape == torch.Size([2, 512, 32, 32])
|
||||
assert x_outs[2].shape == torch.Size([2, 256, 32, 32])
|
||||
assert x_outs[3].shape == torch.Size([2, 128, 64, 64])
|
||||
assert x_outs[4].shape == torch.Size([2, 64, 128, 128])
|
||||
assert x_outs[0].shape == torch.Size([2, 64, 32, 32])
|
||||
assert x_outs[1].shape == torch.Size([2, 32, 32, 32])
|
||||
assert x_outs[2].shape == torch.Size([2, 16, 32, 32])
|
||||
assert x_outs[3].shape == torch.Size([2, 8, 64, 64])
|
||||
assert x_outs[4].shape == torch.Size([2, 4, 128, 128])
|
||||
|
||||
# test UNet forward and outputs. The whole downsample rate is 8.
|
||||
unet = UNet(
|
||||
in_channels=3,
|
||||
base_channels=64,
|
||||
base_channels=4,
|
||||
num_stages=5,
|
||||
strides=(1, 1, 1, 1, 1),
|
||||
enc_num_convs=(2, 2, 2, 2, 2),
|
||||
|
@ -658,16 +658,16 @@ def test_unet():
|
|||
|
||||
x = torch.randn(2, 3, 128, 128)
|
||||
x_outs = unet(x)
|
||||
assert x_outs[0].shape == torch.Size([2, 1024, 16, 16])
|
||||
assert x_outs[1].shape == torch.Size([2, 512, 16, 16])
|
||||
assert x_outs[2].shape == torch.Size([2, 256, 32, 32])
|
||||
assert x_outs[3].shape == torch.Size([2, 128, 64, 64])
|
||||
assert x_outs[4].shape == torch.Size([2, 64, 128, 128])
|
||||
assert x_outs[0].shape == torch.Size([2, 64, 16, 16])
|
||||
assert x_outs[1].shape == torch.Size([2, 32, 16, 16])
|
||||
assert x_outs[2].shape == torch.Size([2, 16, 32, 32])
|
||||
assert x_outs[3].shape == torch.Size([2, 8, 64, 64])
|
||||
assert x_outs[4].shape == torch.Size([2, 4, 128, 128])
|
||||
|
||||
# test UNet forward and outputs. The whole downsample rate is 4.
|
||||
unet = UNet(
|
||||
in_channels=3,
|
||||
base_channels=64,
|
||||
base_channels=4,
|
||||
num_stages=5,
|
||||
strides=(1, 1, 1, 1, 1),
|
||||
enc_num_convs=(2, 2, 2, 2, 2),
|
||||
|
@ -678,16 +678,16 @@ def test_unet():
|
|||
|
||||
x = torch.randn(2, 3, 128, 128)
|
||||
x_outs = unet(x)
|
||||
assert x_outs[0].shape == torch.Size([2, 1024, 32, 32])
|
||||
assert x_outs[1].shape == torch.Size([2, 512, 32, 32])
|
||||
assert x_outs[2].shape == torch.Size([2, 256, 32, 32])
|
||||
assert x_outs[3].shape == torch.Size([2, 128, 64, 64])
|
||||
assert x_outs[4].shape == torch.Size([2, 64, 128, 128])
|
||||
assert x_outs[0].shape == torch.Size([2, 64, 32, 32])
|
||||
assert x_outs[1].shape == torch.Size([2, 32, 32, 32])
|
||||
assert x_outs[2].shape == torch.Size([2, 16, 32, 32])
|
||||
assert x_outs[3].shape == torch.Size([2, 8, 64, 64])
|
||||
assert x_outs[4].shape == torch.Size([2, 4, 128, 128])
|
||||
|
||||
# test UNet forward and outputs. The whole downsample rate is 2.
|
||||
unet = UNet(
|
||||
in_channels=3,
|
||||
base_channels=64,
|
||||
base_channels=4,
|
||||
num_stages=5,
|
||||
strides=(1, 1, 1, 1, 1),
|
||||
enc_num_convs=(2, 2, 2, 2, 2),
|
||||
|
@ -698,16 +698,16 @@ def test_unet():
|
|||
|
||||
x = torch.randn(2, 3, 128, 128)
|
||||
x_outs = unet(x)
|
||||
assert x_outs[0].shape == torch.Size([2, 1024, 64, 64])
|
||||
assert x_outs[1].shape == torch.Size([2, 512, 64, 64])
|
||||
assert x_outs[2].shape == torch.Size([2, 256, 64, 64])
|
||||
assert x_outs[3].shape == torch.Size([2, 128, 64, 64])
|
||||
assert x_outs[4].shape == torch.Size([2, 64, 128, 128])
|
||||
assert x_outs[0].shape == torch.Size([2, 64, 64, 64])
|
||||
assert x_outs[1].shape == torch.Size([2, 32, 64, 64])
|
||||
assert x_outs[2].shape == torch.Size([2, 16, 64, 64])
|
||||
assert x_outs[3].shape == torch.Size([2, 8, 64, 64])
|
||||
assert x_outs[4].shape == torch.Size([2, 4, 128, 128])
|
||||
|
||||
# test UNet forward and outputs. The whole downsample rate is 1.
|
||||
unet = UNet(
|
||||
in_channels=3,
|
||||
base_channels=64,
|
||||
base_channels=4,
|
||||
num_stages=5,
|
||||
strides=(1, 1, 1, 1, 1),
|
||||
enc_num_convs=(2, 2, 2, 2, 2),
|
||||
|
@ -718,16 +718,16 @@ def test_unet():
|
|||
|
||||
x = torch.randn(2, 3, 128, 128)
|
||||
x_outs = unet(x)
|
||||
assert x_outs[0].shape == torch.Size([2, 1024, 128, 128])
|
||||
assert x_outs[1].shape == torch.Size([2, 512, 128, 128])
|
||||
assert x_outs[2].shape == torch.Size([2, 256, 128, 128])
|
||||
assert x_outs[3].shape == torch.Size([2, 128, 128, 128])
|
||||
assert x_outs[4].shape == torch.Size([2, 64, 128, 128])
|
||||
assert x_outs[0].shape == torch.Size([2, 64, 128, 128])
|
||||
assert x_outs[1].shape == torch.Size([2, 32, 128, 128])
|
||||
assert x_outs[2].shape == torch.Size([2, 16, 128, 128])
|
||||
assert x_outs[3].shape == torch.Size([2, 8, 128, 128])
|
||||
assert x_outs[4].shape == torch.Size([2, 4, 128, 128])
|
||||
|
||||
# test UNet forward and outputs. The whole downsample rate is 16.
|
||||
unet = UNet(
|
||||
in_channels=3,
|
||||
base_channels=64,
|
||||
base_channels=4,
|
||||
num_stages=5,
|
||||
strides=(1, 2, 2, 1, 1),
|
||||
enc_num_convs=(2, 2, 2, 2, 2),
|
||||
|
@ -737,16 +737,16 @@ def test_unet():
|
|||
dec_dilations=(1, 1, 1, 1))
|
||||
x = torch.randn(2, 3, 128, 128)
|
||||
x_outs = unet(x)
|
||||
assert x_outs[0].shape == torch.Size([2, 1024, 8, 8])
|
||||
assert x_outs[1].shape == torch.Size([2, 512, 16, 16])
|
||||
assert x_outs[2].shape == torch.Size([2, 256, 32, 32])
|
||||
assert x_outs[3].shape == torch.Size([2, 128, 64, 64])
|
||||
assert x_outs[4].shape == torch.Size([2, 64, 128, 128])
|
||||
assert x_outs[0].shape == torch.Size([2, 64, 8, 8])
|
||||
assert x_outs[1].shape == torch.Size([2, 32, 16, 16])
|
||||
assert x_outs[2].shape == torch.Size([2, 16, 32, 32])
|
||||
assert x_outs[3].shape == torch.Size([2, 8, 64, 64])
|
||||
assert x_outs[4].shape == torch.Size([2, 4, 128, 128])
|
||||
|
||||
# test UNet forward and outputs. The whole downsample rate is 8.
|
||||
unet = UNet(
|
||||
in_channels=3,
|
||||
base_channels=64,
|
||||
base_channels=4,
|
||||
num_stages=5,
|
||||
strides=(1, 2, 2, 1, 1),
|
||||
enc_num_convs=(2, 2, 2, 2, 2),
|
||||
|
@ -756,16 +756,16 @@ def test_unet():
|
|||
dec_dilations=(1, 1, 1, 1))
|
||||
x = torch.randn(2, 3, 128, 128)
|
||||
x_outs = unet(x)
|
||||
assert x_outs[0].shape == torch.Size([2, 1024, 16, 16])
|
||||
assert x_outs[1].shape == torch.Size([2, 512, 16, 16])
|
||||
assert x_outs[2].shape == torch.Size([2, 256, 32, 32])
|
||||
assert x_outs[3].shape == torch.Size([2, 128, 64, 64])
|
||||
assert x_outs[4].shape == torch.Size([2, 64, 128, 128])
|
||||
assert x_outs[0].shape == torch.Size([2, 64, 16, 16])
|
||||
assert x_outs[1].shape == torch.Size([2, 32, 16, 16])
|
||||
assert x_outs[2].shape == torch.Size([2, 16, 32, 32])
|
||||
assert x_outs[3].shape == torch.Size([2, 8, 64, 64])
|
||||
assert x_outs[4].shape == torch.Size([2, 4, 128, 128])
|
||||
|
||||
# test UNet forward and outputs. The whole downsample rate is 8.
|
||||
unet = UNet(
|
||||
in_channels=3,
|
||||
base_channels=64,
|
||||
base_channels=4,
|
||||
num_stages=5,
|
||||
strides=(1, 2, 2, 2, 1),
|
||||
enc_num_convs=(2, 2, 2, 2, 2),
|
||||
|
@ -775,16 +775,16 @@ def test_unet():
|
|||
dec_dilations=(1, 1, 1, 1))
|
||||
x = torch.randn(2, 3, 128, 128)
|
||||
x_outs = unet(x)
|
||||
assert x_outs[0].shape == torch.Size([2, 1024, 16, 16])
|
||||
assert x_outs[1].shape == torch.Size([2, 512, 16, 16])
|
||||
assert x_outs[2].shape == torch.Size([2, 256, 32, 32])
|
||||
assert x_outs[3].shape == torch.Size([2, 128, 64, 64])
|
||||
assert x_outs[4].shape == torch.Size([2, 64, 128, 128])
|
||||
assert x_outs[0].shape == torch.Size([2, 64, 16, 16])
|
||||
assert x_outs[1].shape == torch.Size([2, 32, 16, 16])
|
||||
assert x_outs[2].shape == torch.Size([2, 16, 32, 32])
|
||||
assert x_outs[3].shape == torch.Size([2, 8, 64, 64])
|
||||
assert x_outs[4].shape == torch.Size([2, 4, 128, 128])
|
||||
|
||||
# test UNet forward and outputs. The whole downsample rate is 4.
|
||||
unet = UNet(
|
||||
in_channels=3,
|
||||
base_channels=64,
|
||||
base_channels=4,
|
||||
num_stages=5,
|
||||
strides=(1, 2, 2, 1, 1),
|
||||
enc_num_convs=(2, 2, 2, 2, 2),
|
||||
|
@ -794,16 +794,16 @@ def test_unet():
|
|||
dec_dilations=(1, 1, 1, 1))
|
||||
x = torch.randn(2, 3, 128, 128)
|
||||
x_outs = unet(x)
|
||||
assert x_outs[0].shape == torch.Size([2, 1024, 32, 32])
|
||||
assert x_outs[1].shape == torch.Size([2, 512, 32, 32])
|
||||
assert x_outs[2].shape == torch.Size([2, 256, 32, 32])
|
||||
assert x_outs[3].shape == torch.Size([2, 128, 64, 64])
|
||||
assert x_outs[4].shape == torch.Size([2, 64, 128, 128])
|
||||
assert x_outs[0].shape == torch.Size([2, 64, 32, 32])
|
||||
assert x_outs[1].shape == torch.Size([2, 32, 32, 32])
|
||||
assert x_outs[2].shape == torch.Size([2, 16, 32, 32])
|
||||
assert x_outs[3].shape == torch.Size([2, 8, 64, 64])
|
||||
assert x_outs[4].shape == torch.Size([2, 4, 128, 128])
|
||||
|
||||
# test UNet init_weights method.
|
||||
unet = UNet(
|
||||
in_channels=3,
|
||||
base_channels=64,
|
||||
base_channels=4,
|
||||
num_stages=5,
|
||||
strides=(1, 2, 2, 1, 1),
|
||||
enc_num_convs=(2, 2, 2, 2, 2),
|
||||
|
@ -815,8 +815,8 @@ def test_unet():
|
|||
unet.init_weights()
|
||||
x = torch.randn(2, 3, 128, 128)
|
||||
x_outs = unet(x)
|
||||
assert x_outs[0].shape == torch.Size([2, 1024, 32, 32])
|
||||
assert x_outs[1].shape == torch.Size([2, 512, 32, 32])
|
||||
assert x_outs[2].shape == torch.Size([2, 256, 32, 32])
|
||||
assert x_outs[3].shape == torch.Size([2, 128, 64, 64])
|
||||
assert x_outs[4].shape == torch.Size([2, 64, 128, 128])
|
||||
assert x_outs[0].shape == torch.Size([2, 64, 32, 32])
|
||||
assert x_outs[1].shape == torch.Size([2, 32, 32, 32])
|
||||
assert x_outs[2].shape == torch.Size([2, 16, 32, 32])
|
||||
assert x_outs[3].shape == torch.Size([2, 8, 64, 64])
|
||||
assert x_outs[4].shape == torch.Size([2, 4, 128, 128])
|
||||
|
|
|
@ -0,0 +1,53 @@
|
|||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
import pytest
|
||||
import torch
|
||||
|
||||
from mmseg.models.necks import ICNeck
|
||||
from mmseg.models.necks.ic_neck import CascadeFeatureFusion
|
||||
from ..test_heads.utils import _conv_has_norm, to_cuda
|
||||
|
||||
|
||||
def test_ic_neck():
|
||||
# test with norm_cfg
|
||||
neck = ICNeck(
|
||||
in_channels=(64, 256, 256),
|
||||
out_channels=128,
|
||||
norm_cfg=dict(type='SyncBN'),
|
||||
align_corners=False)
|
||||
assert _conv_has_norm(neck, sync_bn=True)
|
||||
|
||||
inputs = [
|
||||
torch.randn(1, 64, 128, 256),
|
||||
torch.randn(1, 256, 65, 129),
|
||||
torch.randn(1, 256, 32, 64)
|
||||
]
|
||||
neck = ICNeck(
|
||||
in_channels=(64, 256, 256),
|
||||
out_channels=128,
|
||||
norm_cfg=dict(type='BN', requires_grad=True),
|
||||
align_corners=False)
|
||||
if torch.cuda.is_available():
|
||||
neck, inputs = to_cuda(neck, inputs)
|
||||
|
||||
outputs = neck(inputs)
|
||||
assert outputs[0].shape == (1, 128, 65, 129)
|
||||
assert outputs[1].shape == (1, 128, 128, 256)
|
||||
assert outputs[1].shape == (1, 128, 128, 256)
|
||||
|
||||
|
||||
def test_ic_neck_cascade_feature_fusion():
|
||||
cff = CascadeFeatureFusion(256, 256, 128)
|
||||
assert cff.conv_low.in_channels == 256
|
||||
assert cff.conv_low.out_channels == 128
|
||||
assert cff.conv_high.in_channels == 256
|
||||
assert cff.conv_high.out_channels == 128
|
||||
|
||||
|
||||
def test_ic_neck_input_channels():
|
||||
with pytest.raises(AssertionError):
|
||||
# ICNet Neck input channel constraints.
|
||||
ICNeck(
|
||||
in_channels=(64, 256, 256, 256),
|
||||
out_channels=128,
|
||||
norm_cfg=dict(type='BN', requires_grad=True),
|
||||
align_corners=False)
|
Loading…
Reference in New Issue