[Feature] Support ISA module (#70)
* add isa module * use more readable names, add more comments and exp results * add unittests * remove redundant docstring * Apply suggestions from code review Co-authored-by: Junjun2016 <hejunjun@sjtu.edu.cn> * fix unittest * Update configs * add results * update yml * Update README Co-authored-by: Junjun2016 <hejunjun@sjtu.edu.cn> Co-authored-by: xiexinch <xinchen.xie@qq.com>pull/1801/head
parent
6762958ea0
commit
b0787b8be2
|
@ -88,6 +88,7 @@ Supported methods:
|
||||||
- [x] [ANN (ICCV'2019)](configs/ann)
|
- [x] [ANN (ICCV'2019)](configs/ann)
|
||||||
- [x] [GCNet (ICCVW'2019/TPAMI'2020)](configs/gcnet)
|
- [x] [GCNet (ICCVW'2019/TPAMI'2020)](configs/gcnet)
|
||||||
- [x] [Fast-SCNN (ArXiv'2019)](configs/fastscnn)
|
- [x] [Fast-SCNN (ArXiv'2019)](configs/fastscnn)
|
||||||
|
- [x] [ISANet (ArXiv'2019/IJCV'2021)](configs/isanet)
|
||||||
- [x] [OCRNet (ECCV'2020)](configs/ocrnet)
|
- [x] [OCRNet (ECCV'2020)](configs/ocrnet)
|
||||||
- [x] [DNLNet (ECCV'2020)](configs/dnlnet)
|
- [x] [DNLNet (ECCV'2020)](configs/dnlnet)
|
||||||
- [x] [PointRend (CVPR'2020)](configs/point_rend)
|
- [x] [PointRend (CVPR'2020)](configs/point_rend)
|
||||||
|
|
|
@ -87,6 +87,7 @@ MMSegmentation 是一个基于 PyTorch 的语义分割开源工具箱。它是 O
|
||||||
- [x] [ANN (ICCV'2019)](configs/ann)
|
- [x] [ANN (ICCV'2019)](configs/ann)
|
||||||
- [x] [GCNet (ICCVW'2019/TPAMI'2020)](configs/gcnet)
|
- [x] [GCNet (ICCVW'2019/TPAMI'2020)](configs/gcnet)
|
||||||
- [x] [Fast-SCNN (ArXiv'2019)](configs/fastscnn)
|
- [x] [Fast-SCNN (ArXiv'2019)](configs/fastscnn)
|
||||||
|
- [x] [ISANet (ArXiv'2019/IJCV'2021)](configs/isanet)
|
||||||
- [x] [OCRNet (ECCV'2020)](configs/ocrnet)
|
- [x] [OCRNet (ECCV'2020)](configs/ocrnet)
|
||||||
- [x] [DNLNet (ECCV'2020)](configs/dnlnet)
|
- [x] [DNLNet (ECCV'2020)](configs/dnlnet)
|
||||||
- [x] [PointRend (CVPR'2020)](configs/point_rend)
|
- [x] [PointRend (CVPR'2020)](configs/point_rend)
|
||||||
|
|
|
@ -0,0 +1,45 @@
|
||||||
|
# model settings
|
||||||
|
norm_cfg = dict(type='SyncBN', requires_grad=True)
|
||||||
|
model = dict(
|
||||||
|
type='EncoderDecoder',
|
||||||
|
pretrained='open-mmlab://resnet50_v1c',
|
||||||
|
backbone=dict(
|
||||||
|
type='ResNetV1c',
|
||||||
|
depth=50,
|
||||||
|
num_stages=4,
|
||||||
|
out_indices=(0, 1, 2, 3),
|
||||||
|
dilations=(1, 1, 2, 4),
|
||||||
|
strides=(1, 2, 1, 1),
|
||||||
|
norm_cfg=norm_cfg,
|
||||||
|
norm_eval=False,
|
||||||
|
style='pytorch',
|
||||||
|
contract_dilation=True),
|
||||||
|
decode_head=dict(
|
||||||
|
type='ISAHead',
|
||||||
|
in_channels=2048,
|
||||||
|
in_index=3,
|
||||||
|
channels=512,
|
||||||
|
isa_channels=256,
|
||||||
|
down_factor=(8, 8),
|
||||||
|
dropout_ratio=0.1,
|
||||||
|
num_classes=19,
|
||||||
|
norm_cfg=norm_cfg,
|
||||||
|
align_corners=False,
|
||||||
|
loss_decode=dict(
|
||||||
|
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
||||||
|
auxiliary_head=dict(
|
||||||
|
type='FCNHead',
|
||||||
|
in_channels=1024,
|
||||||
|
in_index=2,
|
||||||
|
channels=256,
|
||||||
|
num_convs=1,
|
||||||
|
concat_input=False,
|
||||||
|
dropout_ratio=0.1,
|
||||||
|
num_classes=19,
|
||||||
|
norm_cfg=norm_cfg,
|
||||||
|
align_corners=False,
|
||||||
|
loss_decode=dict(
|
||||||
|
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
|
||||||
|
# model training and testing settings
|
||||||
|
train_cfg=dict(),
|
||||||
|
test_cfg=dict(mode='whole'))
|
|
@ -0,0 +1,57 @@
|
||||||
|
# Interlaced Sparse Self-Attention for Semantic Segmentation
|
||||||
|
|
||||||
|
## Introduction
|
||||||
|
|
||||||
|
<!-- [ALGORITHM] -->
|
||||||
|
|
||||||
|
```
|
||||||
|
@article{huang2019isa,
|
||||||
|
title={Interlaced Sparse Self-Attention for Semantic Segmentation},
|
||||||
|
author={Huang, Lang and Yuan, Yuhui and Guo, Jianyuan and Zhang, Chao and Chen, Xilin and Wang, Jingdong},
|
||||||
|
journal={arXiv preprint arXiv:1907.12273},
|
||||||
|
year={2019}
|
||||||
|
}
|
||||||
|
|
||||||
|
The technical report above is also presented at:
|
||||||
|
@article{yuan2021ocnet,
|
||||||
|
title={OCNet: Object Context for Semantic Segmentation},
|
||||||
|
author={Yuan, Yuhui and Huang, Lang and Guo, Jianyuan and Zhang, Chao and Chen, Xilin and Wang, Jingdong},
|
||||||
|
journal={International Journal of Computer Vision},
|
||||||
|
pages={1--24},
|
||||||
|
year={2021},
|
||||||
|
publisher={Springer}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
## Results and models
|
||||||
|
|
||||||
|
### Cityscapes
|
||||||
|
|
||||||
|
| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config |download |
|
||||||
|
| --------|----------|-----------|-----------|--------:|----------|----------------|------:|--------------:|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
||||||
|
| ISANet | R-50-D8 | 512x1024 | 40000 | 5.869 | 2.91 | 78.49 | 79.44 |[config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/isanet/isanet_r50-d8_512x1024_40k_cityscapes.py)|[model](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r50-d8_512x1024_40k_cityscapes/isanet_r50-d8_512x1024_40k_cityscapes_20210901_054739-981bd763.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r50-d8_512x1024_40k_cityscapes/isanet_r50-d8_512x1024_40k_cityscapes_20210901_054739.log.json) |
|
||||||
|
| ISANet | R-50-D8 | 512x1024 | 80000 | 5.869 | 2.91 | 78.68 | 80.25 |[config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/isanet/isanet_r50-d8_512x1024_80k_cityscapes.py)|[model](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r50-d8_512x1024_80k_cityscapes/isanet_r50-d8_512x1024_80k_cityscapes_20210901_074202-89384497.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r50-d8_512x1024_80k_cityscapes/isanet_r50-d8_512x1024_80k_cityscapes_20210901_074202.log.json) |
|
||||||
|
| ISANet | R-50-D8 | 769x769 | 40000 | 6.759 | 1.54 | 78.70 | 80.28 |[config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/isanet/isanet_r50-d8_769x769_40k_cityscapes.py)|[model](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r50-d8_769x769_40k_cityscapes/isanet_r50-d8_769x769_40k_cityscapes_20210903_050200-4ae7e65b.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r50-d8_769x769_40k_cityscapes_20210903_050200.log.json) |
|
||||||
|
| ISANet | R-50-D8 | 769x769 | 80000 | 6.759 | 1.54 | 79.29 | 80.53 |[config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/isanet/isanet_r50-d8_769x769_80k_cityscapes.py)|[model](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r50-d8_769x769_80k_cityscapes/isanet_r50-d8_769x769_80k_cityscapes_20210903_101126-99b54519.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r50-d8_769x769_80k_cityscapes/isanet_r50-d8_769x769_80k_cityscapes_20210903_101126.log.json) |
|
||||||
|
| ISANet | R-101-D8 | 512x1024 | 40000 | 9.425 | 2.35 | 79.58 | 81.05 |[config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/isanet/isanet_r101-d8_512x1024_40k_cityscapes.py)|[model](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r101-d8_512x1024_40k_cityscapes/isanet_r101-d8_512x1024_40k_cityscapes_20210901_145553-293e6bd6.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/isanet_r101-d8_512x1024_40k_cityscapes/isanet_r101-d8_512x1024_40k_cityscapes_20210901_145553.log.json) |
|
||||||
|
| ISANet | R-101-D8 | 512x1024 | 80000 | 9.425 | 2.35 | 80.32 | 81.58 |[config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/isanet/isanet_r101-d8_512x1024_80k_cityscapes.py)|[model](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r101-d8_512x1024_80k_cityscapes/isanet_r101-d8_512x1024_80k_cityscapes_20210901_145243-5b99c9b2.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r101-d8_512x1024_80k_cityscapes/isanet_r101-d8_512x1024_80k_cityscapes_20210901_145243.log.json) |
|
||||||
|
| ISANet | R-101-D8 | 769x769 | 40000 | 10.815 | 0.92 | 79.68 | 80.95 |[config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/isanet/isanet_r101-d8_769x769_40k_cityscapes.py)|[model](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r101-d8_769x769_40k_cityscapes/isanet_r101-d8_769x769_40k_cityscapes_20210903_111320-509e7224.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r101-d8_769x769_40k_cityscapes/isanet_r101-d8_769x769_40k_cityscapes_20210903_111320.log.json) |
|
||||||
|
| ISANet | R-101-D8 | 769x769 | 80000 | 10.815 | 0.92 | 80.61 | 81.59 |[config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/isanet/isanet_r101-d8_769x769_80k_cityscapes.py)|[model](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r101-d8_769x769_80k_cityscapes/isanet_r101-d8_769x769_80k_cityscapes_20210903_111319-24f71dfa.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r101-d8_769x769_80k_cityscapes/isanet_r101-d8_769x769_80k_cityscapes_20210903_111319.log.json) |
|
||||||
|
|
||||||
|
### ADE20K
|
||||||
|
|
||||||
|
| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config |download |
|
||||||
|
| --------|----------|-----------|-----------|--------:|----------|----------------|------:|--------------:|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
||||||
|
| ISANet | R-50-D8 | 512x512 | 80000 | 9.0 | 22.55 | 41.12 | 42.35 |[config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/isanet/isanet_r50-d8_512x512_80k_ade20k.py)|[model](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r50-d8_512x512_80k_ade20k/isanet_r50-d8_512x512_80k_ade20k_20210903_124557-6ed83a0c.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r50-d8_512x512_80k_ade20k/isanet_r50-d8_512x512_80k_ade20k_20210903_124557.log.json)|
|
||||||
|
| ISANet | R-50-D8 | 512x512 | 160000 | 9.0 | 22.55 | 42.59 | 43.07 |[config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/isanet/isanet_r50-d8_512x512_160k_ade20k.py)|[model](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r50-d8_512x512_160k_ade20k/isanet_r50-d8_512x512_160k_ade20k_20210903_104850-f752d0a3.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r50-d8_512x512_160k_ade20k/isanet_r50-d8_512x512_160k_ade20k_20210903_104850.log.json)|
|
||||||
|
| ISANet | R-101-D8 | 512x512 | 80000 | 12.562 | 10.56 | 43.51 | 44.38 |[config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/isanet/isanet_r101-d8_512x512_80k_ade20k.py)|[model](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r101-d8_512x512_80k_ade20k/isanet_r101-d8_512x512_80k_ade20k_20210903_162056-68b235c2.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r101-d8_512x512_80k_ade20k/isanet_r101-d8_512x512_80k_ade20k_20210903_162056.log.json)|
|
||||||
|
| ISANet | R-101-D8 | 512x512 | 160000 | 12.562 | 10.56 | 43.80 | 45.4 |[config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/isanet/isanet_r101-d8_512x512_160k_ade20k.py)|[model](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r101-d8_512x512_160k_ade20k/isanet_r101-d8_512x512_160k_ade20k_20210903_211431-a7879dcd.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r101-d8_512x512_160k_ade20k/isanet_r101-d8_512x512_160k_ade20k_20210903_211431.log.json)|
|
||||||
|
|
||||||
|
### Pascal VOC 2012 + Aug
|
||||||
|
|
||||||
|
| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config |download |
|
||||||
|
| --------|----------|-----------|-----------|--------:|----------|----------------|------:|--------------:|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
||||||
|
| ISANet | R-50-D8 | 512x512 | 20000 | 5.9 | 23.08 | 76.78 | 77.79 |[config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/isanet/isanet_r50-d8_512x512_20k_voc12aug.py)|[model](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r50-d8_512x512_20k_voc12aug/isanet_r50-d8_512x512_20k_voc12aug_20210901_164838-79d59b80.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r50-d8_512x512_20k_voc12aug/isanet_r50-d8_512x512_20k_voc12aug_20210901_164838.log.json)|
|
||||||
|
| ISANet | R-50-D8 | 512x512 | 40000 | 5.9 | 23.08 | 76.20 | 77.22 |[config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/isanet/isanet_r50-d8_512x512_40k_voc12aug.py)|[model](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r50-d8_512x512_40k_voc12aug/isanet_r50-d8_512x512_40k_voc12aug_20210901_151349-7d08a54e.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r50-d8_512x512_40k_voc12aug/isanet_r50-d8_512x512_40k_voc12aug_20210901_151349.log.json)|
|
||||||
|
| ISANet | R-101-D8 | 512x512 | 20000 | 9.465 | 7.42 | 78.46 | 79.16 |[config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/isanet/isanet_r101-d8_512x512_20k_voc12aug.py)|[model](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r101-d8_512x512_20k_voc12aug/isanet_r101-d8_512x512_20k_voc12aug_20210901_115805-3ccbf355.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r101-d8_512x512_20k_voc12aug/isanet_r101-d8_512x512_20k_voc12aug_20210901_115805.log.json)|
|
||||||
|
| ISANet | R-101-D8 | 512x512 | 40000 | 9.465 | 7.42 | 78.12 | 79.04 |[config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/isanet/isanet_r101-d8_512x512_40k_voc12aug.py)|[model](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r101-d8_512x512_40k_voc12aug/isanet_r101-d8_512x512_40k_voc12aug_20210901_145814-bc71233b.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r101-d8_512x512_40k_voc12aug/isanet_r101-d8_512x512_40k_voc12aug_20210901_145814.log.json)|
|
|
@ -0,0 +1,360 @@
|
||||||
|
Collections:
|
||||||
|
- Metadata:
|
||||||
|
Training Data:
|
||||||
|
- Cityscapes
|
||||||
|
- ADE20K
|
||||||
|
- Pascal VOC 2012 + Aug
|
||||||
|
Name: isanet
|
||||||
|
Models:
|
||||||
|
- Config: configs/isanet/isanet_r50-d8_512x1024_40k_cityscapes.py
|
||||||
|
In Collection: isanet
|
||||||
|
Metadata:
|
||||||
|
backbone: R-50-D8
|
||||||
|
crop size: (512,1024)
|
||||||
|
inference time (ms/im):
|
||||||
|
- backend: PyTorch
|
||||||
|
batch size: 1
|
||||||
|
hardware: V100
|
||||||
|
mode: FP32
|
||||||
|
resolution: (512,1024)
|
||||||
|
value: 343.64
|
||||||
|
lr schd: 40000
|
||||||
|
memory (GB): 5.869
|
||||||
|
Name: isanet_r50-d8_512x1024_40k_cityscapes
|
||||||
|
Results:
|
||||||
|
Dataset: Cityscapes
|
||||||
|
Metrics:
|
||||||
|
mIoU: 78.49
|
||||||
|
mIoU(ms+flip): 79.44
|
||||||
|
Task: Semantic Segmentation
|
||||||
|
Weights: https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r50-d8_512x1024_40k_cityscapes/isanet_r50-d8_512x1024_40k_cityscapes_20210901_054739-981bd763.pth
|
||||||
|
- Config: configs/isanet/isanet_r50-d8_512x1024_80k_cityscapes.py
|
||||||
|
In Collection: isanet
|
||||||
|
Metadata:
|
||||||
|
backbone: R-50-D8
|
||||||
|
crop size: (512,1024)
|
||||||
|
inference time (ms/im):
|
||||||
|
- backend: PyTorch
|
||||||
|
batch size: 1
|
||||||
|
hardware: V100
|
||||||
|
mode: FP32
|
||||||
|
resolution: (512,1024)
|
||||||
|
value: 343.64
|
||||||
|
lr schd: 80000
|
||||||
|
memory (GB): 5.869
|
||||||
|
Name: isanet_r50-d8_512x1024_80k_cityscapes
|
||||||
|
Results:
|
||||||
|
Dataset: Cityscapes
|
||||||
|
Metrics:
|
||||||
|
mIoU: 78.68
|
||||||
|
mIoU(ms+flip): 80.25
|
||||||
|
Task: Semantic Segmentation
|
||||||
|
Weights: https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r50-d8_512x1024_80k_cityscapes/isanet_r50-d8_512x1024_80k_cityscapes_20210901_074202-89384497.pth
|
||||||
|
- Config: configs/isanet/isanet_r50-d8_769x769_40k_cityscapes.py
|
||||||
|
In Collection: isanet
|
||||||
|
Metadata:
|
||||||
|
backbone: R-50-D8
|
||||||
|
crop size: (769,769)
|
||||||
|
inference time (ms/im):
|
||||||
|
- backend: PyTorch
|
||||||
|
batch size: 1
|
||||||
|
hardware: V100
|
||||||
|
mode: FP32
|
||||||
|
resolution: (769,769)
|
||||||
|
value: 649.35
|
||||||
|
lr schd: 40000
|
||||||
|
memory (GB): 6.759
|
||||||
|
Name: isanet_r50-d8_769x769_40k_cityscapes
|
||||||
|
Results:
|
||||||
|
Dataset: Cityscapes
|
||||||
|
Metrics:
|
||||||
|
mIoU: 78.7
|
||||||
|
mIoU(ms+flip): 80.28
|
||||||
|
Task: Semantic Segmentation
|
||||||
|
Weights: https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r50-d8_769x769_40k_cityscapes/isanet_r50-d8_769x769_40k_cityscapes_20210903_050200-4ae7e65b.pth
|
||||||
|
- Config: configs/isanet/isanet_r50-d8_769x769_80k_cityscapes.py
|
||||||
|
In Collection: isanet
|
||||||
|
Metadata:
|
||||||
|
backbone: R-50-D8
|
||||||
|
crop size: (769,769)
|
||||||
|
inference time (ms/im):
|
||||||
|
- backend: PyTorch
|
||||||
|
batch size: 1
|
||||||
|
hardware: V100
|
||||||
|
mode: FP32
|
||||||
|
resolution: (769,769)
|
||||||
|
value: 649.35
|
||||||
|
lr schd: 80000
|
||||||
|
memory (GB): 6.759
|
||||||
|
Name: isanet_r50-d8_769x769_80k_cityscapes
|
||||||
|
Results:
|
||||||
|
Dataset: Cityscapes
|
||||||
|
Metrics:
|
||||||
|
mIoU: 79.29
|
||||||
|
mIoU(ms+flip): 80.53
|
||||||
|
Task: Semantic Segmentation
|
||||||
|
Weights: https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r50-d8_769x769_80k_cityscapes/isanet_r50-d8_769x769_80k_cityscapes_20210903_101126-99b54519.pth
|
||||||
|
- Config: configs/isanet/isanet_r101-d8_512x1024_40k_cityscapes.py
|
||||||
|
In Collection: isanet
|
||||||
|
Metadata:
|
||||||
|
backbone: R-101-D8
|
||||||
|
crop size: (512,1024)
|
||||||
|
inference time (ms/im):
|
||||||
|
- backend: PyTorch
|
||||||
|
batch size: 1
|
||||||
|
hardware: V100
|
||||||
|
mode: FP32
|
||||||
|
resolution: (512,1024)
|
||||||
|
value: 425.53
|
||||||
|
lr schd: 40000
|
||||||
|
memory (GB): 9.425
|
||||||
|
Name: isanet_r101-d8_512x1024_40k_cityscapes
|
||||||
|
Results:
|
||||||
|
Dataset: Cityscapes
|
||||||
|
Metrics:
|
||||||
|
mIoU: 79.58
|
||||||
|
mIoU(ms+flip): 81.05
|
||||||
|
Task: Semantic Segmentation
|
||||||
|
Weights: https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r101-d8_512x1024_40k_cityscapes/isanet_r101-d8_512x1024_40k_cityscapes_20210901_145553-293e6bd6.pth
|
||||||
|
- Config: configs/isanet/isanet_r101-d8_512x1024_80k_cityscapes.py
|
||||||
|
In Collection: isanet
|
||||||
|
Metadata:
|
||||||
|
backbone: R-101-D8
|
||||||
|
crop size: (512,1024)
|
||||||
|
inference time (ms/im):
|
||||||
|
- backend: PyTorch
|
||||||
|
batch size: 1
|
||||||
|
hardware: V100
|
||||||
|
mode: FP32
|
||||||
|
resolution: (512,1024)
|
||||||
|
value: 425.53
|
||||||
|
lr schd: 80000
|
||||||
|
memory (GB): 9.425
|
||||||
|
Name: isanet_r101-d8_512x1024_80k_cityscapes
|
||||||
|
Results:
|
||||||
|
Dataset: Cityscapes
|
||||||
|
Metrics:
|
||||||
|
mIoU: 80.32
|
||||||
|
mIoU(ms+flip): 81.58
|
||||||
|
Task: Semantic Segmentation
|
||||||
|
Weights: https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r101-d8_512x1024_80k_cityscapes/isanet_r101-d8_512x1024_80k_cityscapes_20210901_145243-5b99c9b2.pth
|
||||||
|
- Config: configs/isanet/isanet_r101-d8_769x769_40k_cityscapes.py
|
||||||
|
In Collection: isanet
|
||||||
|
Metadata:
|
||||||
|
backbone: R-101-D8
|
||||||
|
crop size: (769,769)
|
||||||
|
inference time (ms/im):
|
||||||
|
- backend: PyTorch
|
||||||
|
batch size: 1
|
||||||
|
hardware: V100
|
||||||
|
mode: FP32
|
||||||
|
resolution: (769,769)
|
||||||
|
value: 1086.96
|
||||||
|
lr schd: 40000
|
||||||
|
memory (GB): 10.815
|
||||||
|
Name: isanet_r101-d8_769x769_40k_cityscapes
|
||||||
|
Results:
|
||||||
|
Dataset: Cityscapes
|
||||||
|
Metrics:
|
||||||
|
mIoU: 79.68
|
||||||
|
mIoU(ms+flip): 80.95
|
||||||
|
Task: Semantic Segmentation
|
||||||
|
Weights: https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r101-d8_769x769_40k_cityscapes/isanet_r101-d8_769x769_40k_cityscapes_20210903_111320-509e7224.pth
|
||||||
|
- Config: configs/isanet/isanet_r101-d8_769x769_80k_cityscapes.py
|
||||||
|
In Collection: isanet
|
||||||
|
Metadata:
|
||||||
|
backbone: R-101-D8
|
||||||
|
crop size: (769,769)
|
||||||
|
inference time (ms/im):
|
||||||
|
- backend: PyTorch
|
||||||
|
batch size: 1
|
||||||
|
hardware: V100
|
||||||
|
mode: FP32
|
||||||
|
resolution: (769,769)
|
||||||
|
value: 1086.96
|
||||||
|
lr schd: 80000
|
||||||
|
memory (GB): 10.815
|
||||||
|
Name: isanet_r101-d8_769x769_80k_cityscapes
|
||||||
|
Results:
|
||||||
|
Dataset: Cityscapes
|
||||||
|
Metrics:
|
||||||
|
mIoU: 80.61
|
||||||
|
mIoU(ms+flip): 81.59
|
||||||
|
Task: Semantic Segmentation
|
||||||
|
Weights: https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r101-d8_769x769_80k_cityscapes/isanet_r101-d8_769x769_80k_cityscapes_20210903_111319-24f71dfa.pth
|
||||||
|
- Config: configs/isanet/isanet_r50-d8_512x512_80k_ade20k.py
|
||||||
|
In Collection: isanet
|
||||||
|
Metadata:
|
||||||
|
backbone: R-50-D8
|
||||||
|
crop size: (512,512)
|
||||||
|
inference time (ms/im):
|
||||||
|
- backend: PyTorch
|
||||||
|
batch size: 1
|
||||||
|
hardware: V100
|
||||||
|
mode: FP32
|
||||||
|
resolution: (512,512)
|
||||||
|
value: 44.35
|
||||||
|
lr schd: 80000
|
||||||
|
memory (GB): 9.0
|
||||||
|
Name: isanet_r50-d8_512x512_80k_ade20k
|
||||||
|
Results:
|
||||||
|
Dataset: ADE20K
|
||||||
|
Metrics:
|
||||||
|
mIoU: 41.12
|
||||||
|
mIoU(ms+flip): 42.35
|
||||||
|
Task: Semantic Segmentation
|
||||||
|
Weights: https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r50-d8_512x512_80k_ade20k/isanet_r50-d8_512x512_80k_ade20k_20210903_124557-6ed83a0c.pth
|
||||||
|
- Config: configs/isanet/isanet_r50-d8_512x512_160k_ade20k.py
|
||||||
|
In Collection: isanet
|
||||||
|
Metadata:
|
||||||
|
backbone: R-50-D8
|
||||||
|
crop size: (512,512)
|
||||||
|
inference time (ms/im):
|
||||||
|
- backend: PyTorch
|
||||||
|
batch size: 1
|
||||||
|
hardware: V100
|
||||||
|
mode: FP32
|
||||||
|
resolution: (512,512)
|
||||||
|
value: 44.35
|
||||||
|
lr schd: 160000
|
||||||
|
memory (GB): 9.0
|
||||||
|
Name: isanet_r50-d8_512x512_160k_ade20k
|
||||||
|
Results:
|
||||||
|
Dataset: ADE20K
|
||||||
|
Metrics:
|
||||||
|
mIoU: 42.59
|
||||||
|
mIoU(ms+flip): 43.07
|
||||||
|
Task: Semantic Segmentation
|
||||||
|
Weights: https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r50-d8_512x512_160k_ade20k/isanet_r50-d8_512x512_160k_ade20k_20210903_104850-f752d0a3.pth
|
||||||
|
- Config: configs/isanet/isanet_r101-d8_512x512_80k_ade20k.py
|
||||||
|
In Collection: isanet
|
||||||
|
Metadata:
|
||||||
|
backbone: R-101-D8
|
||||||
|
crop size: (512,512)
|
||||||
|
inference time (ms/im):
|
||||||
|
- backend: PyTorch
|
||||||
|
batch size: 1
|
||||||
|
hardware: V100
|
||||||
|
mode: FP32
|
||||||
|
resolution: (512,512)
|
||||||
|
value: 94.7
|
||||||
|
lr schd: 80000
|
||||||
|
memory (GB): 12.562
|
||||||
|
Name: isanet_r101-d8_512x512_80k_ade20k
|
||||||
|
Results:
|
||||||
|
Dataset: ADE20K
|
||||||
|
Metrics:
|
||||||
|
mIoU: 43.51
|
||||||
|
mIoU(ms+flip): 44.38
|
||||||
|
Task: Semantic Segmentation
|
||||||
|
Weights: https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r101-d8_512x512_80k_ade20k/isanet_r101-d8_512x512_80k_ade20k_20210903_162056-68b235c2.pth
|
||||||
|
- Config: configs/isanet/isanet_r101-d8_512x512_160k_ade20k.py
|
||||||
|
In Collection: isanet
|
||||||
|
Metadata:
|
||||||
|
backbone: R-101-D8
|
||||||
|
crop size: (512,512)
|
||||||
|
inference time (ms/im):
|
||||||
|
- backend: PyTorch
|
||||||
|
batch size: 1
|
||||||
|
hardware: V100
|
||||||
|
mode: FP32
|
||||||
|
resolution: (512,512)
|
||||||
|
value: 94.7
|
||||||
|
lr schd: 160000
|
||||||
|
memory (GB): 12.562
|
||||||
|
Name: isanet_r101-d8_512x512_160k_ade20k
|
||||||
|
Results:
|
||||||
|
Dataset: ADE20K
|
||||||
|
Metrics:
|
||||||
|
mIoU: 43.8
|
||||||
|
mIoU(ms+flip): 45.4
|
||||||
|
Task: Semantic Segmentation
|
||||||
|
Weights: https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r101-d8_512x512_160k_ade20k/isanet_r101-d8_512x512_160k_ade20k_20210903_211431-a7879dcd.pth
|
||||||
|
- Config: configs/isanet/isanet_r50-d8_512x512_20k_voc12aug.py
|
||||||
|
In Collection: isanet
|
||||||
|
Metadata:
|
||||||
|
backbone: R-50-D8
|
||||||
|
crop size: (512,512)
|
||||||
|
inference time (ms/im):
|
||||||
|
- backend: PyTorch
|
||||||
|
batch size: 1
|
||||||
|
hardware: V100
|
||||||
|
mode: FP32
|
||||||
|
resolution: (512,512)
|
||||||
|
value: 43.33
|
||||||
|
lr schd: 20000
|
||||||
|
memory (GB): 5.9
|
||||||
|
Name: isanet_r50-d8_512x512_20k_voc12aug
|
||||||
|
Results:
|
||||||
|
Dataset: Pascal VOC 2012 + Aug
|
||||||
|
Metrics:
|
||||||
|
mIoU: 76.78
|
||||||
|
mIoU(ms+flip): 77.79
|
||||||
|
Task: Semantic Segmentation
|
||||||
|
Weights: https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r50-d8_512x512_20k_voc12aug/isanet_r50-d8_512x512_20k_voc12aug_20210901_164838-79d59b80.pth
|
||||||
|
- Config: configs/isanet/isanet_r50-d8_512x512_40k_voc12aug.py
|
||||||
|
In Collection: isanet
|
||||||
|
Metadata:
|
||||||
|
backbone: R-50-D8
|
||||||
|
crop size: (512,512)
|
||||||
|
inference time (ms/im):
|
||||||
|
- backend: PyTorch
|
||||||
|
batch size: 1
|
||||||
|
hardware: V100
|
||||||
|
mode: FP32
|
||||||
|
resolution: (512,512)
|
||||||
|
value: 43.33
|
||||||
|
lr schd: 40000
|
||||||
|
memory (GB): 5.9
|
||||||
|
Name: isanet_r50-d8_512x512_40k_voc12aug
|
||||||
|
Results:
|
||||||
|
Dataset: Pascal VOC 2012 + Aug
|
||||||
|
Metrics:
|
||||||
|
mIoU: 76.2
|
||||||
|
mIoU(ms+flip): 77.22
|
||||||
|
Task: Semantic Segmentation
|
||||||
|
Weights: https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r50-d8_512x512_40k_voc12aug/isanet_r50-d8_512x512_40k_voc12aug_20210901_151349-7d08a54e.pth
|
||||||
|
- Config: configs/isanet/isanet_r101-d8_512x512_20k_voc12aug.py
|
||||||
|
In Collection: isanet
|
||||||
|
Metadata:
|
||||||
|
backbone: R-101-D8
|
||||||
|
crop size: (512,512)
|
||||||
|
inference time (ms/im):
|
||||||
|
- backend: PyTorch
|
||||||
|
batch size: 1
|
||||||
|
hardware: V100
|
||||||
|
mode: FP32
|
||||||
|
resolution: (512,512)
|
||||||
|
value: 134.77
|
||||||
|
lr schd: 20000
|
||||||
|
memory (GB): 9.465
|
||||||
|
Name: isanet_r101-d8_512x512_20k_voc12aug
|
||||||
|
Results:
|
||||||
|
Dataset: Pascal VOC 2012 + Aug
|
||||||
|
Metrics:
|
||||||
|
mIoU: 78.46
|
||||||
|
mIoU(ms+flip): 79.16
|
||||||
|
Task: Semantic Segmentation
|
||||||
|
Weights: https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r101-d8_512x512_20k_voc12aug/isanet_r101-d8_512x512_20k_voc12aug_20210901_115805-3ccbf355.pth
|
||||||
|
- Config: configs/isanet/isanet_r101-d8_512x512_40k_voc12aug.py
|
||||||
|
In Collection: isanet
|
||||||
|
Metadata:
|
||||||
|
backbone: R-101-D8
|
||||||
|
crop size: (512,512)
|
||||||
|
inference time (ms/im):
|
||||||
|
- backend: PyTorch
|
||||||
|
batch size: 1
|
||||||
|
hardware: V100
|
||||||
|
mode: FP32
|
||||||
|
resolution: (512,512)
|
||||||
|
value: 134.77
|
||||||
|
lr schd: 40000
|
||||||
|
memory (GB): 9.465
|
||||||
|
Name: isanet_r101-d8_512x512_40k_voc12aug
|
||||||
|
Results:
|
||||||
|
Dataset: Pascal VOC 2012 + Aug
|
||||||
|
Metrics:
|
||||||
|
mIoU: 78.12
|
||||||
|
mIoU(ms+flip): 79.04
|
||||||
|
Task: Semantic Segmentation
|
||||||
|
Weights: https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r101-d8_512x512_40k_voc12aug/isanet_r101-d8_512x512_40k_voc12aug_20210901_145814-bc71233b.pth
|
|
@ -0,0 +1,2 @@
|
||||||
|
_base_ = './isanet_r50-d8_512x1024_40k_cityscapes.py'
|
||||||
|
model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
|
|
@ -0,0 +1,2 @@
|
||||||
|
_base_ = './isanet_r50-d8_512x1024_80k_cityscapes.py'
|
||||||
|
model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
|
|
@ -0,0 +1,2 @@
|
||||||
|
_base_ = './isanet_r50-d8_512x512_160k_ade20k.py'
|
||||||
|
model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
|
|
@ -0,0 +1,2 @@
|
||||||
|
_base_ = './isanet_r50-d8_512x512_20k_voc12aug.py'
|
||||||
|
model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
|
|
@ -0,0 +1,2 @@
|
||||||
|
_base_ = './isanet_r50-d8_512x512_40k_voc12aug.py'
|
||||||
|
model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
|
|
@ -0,0 +1,2 @@
|
||||||
|
_base_ = './isanet_r50-d8_512x512_80k_ade20k.py'
|
||||||
|
model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
|
|
@ -0,0 +1,2 @@
|
||||||
|
_base_ = './isanet_r50-d8_769x769_40k_cityscapes.py'
|
||||||
|
model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
|
|
@ -0,0 +1,2 @@
|
||||||
|
_base_ = './isanet_r50-d8_769x769_80k_cityscapes.py'
|
||||||
|
model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
|
|
@ -0,0 +1,4 @@
|
||||||
|
_base_ = [
|
||||||
|
'../_base_/models/isanet_r50-d8.py', '../_base_/datasets/cityscapes.py',
|
||||||
|
'../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py'
|
||||||
|
]
|
|
@ -0,0 +1,4 @@
|
||||||
|
_base_ = [
|
||||||
|
'../_base_/models/isanet_r50-d8.py', '../_base_/datasets/cityscapes.py',
|
||||||
|
'../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py'
|
||||||
|
]
|
|
@ -0,0 +1,6 @@
|
||||||
|
_base_ = [
|
||||||
|
'../_base_/models/isanet_r50-d8.py', '../_base_/datasets/ade20k.py',
|
||||||
|
'../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py'
|
||||||
|
]
|
||||||
|
model = dict(
|
||||||
|
decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150))
|
|
@ -0,0 +1,7 @@
|
||||||
|
_base_ = [
|
||||||
|
'../_base_/models/isanet_r50-d8.py',
|
||||||
|
'../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py',
|
||||||
|
'../_base_/schedules/schedule_20k.py'
|
||||||
|
]
|
||||||
|
model = dict(
|
||||||
|
decode_head=dict(num_classes=21), auxiliary_head=dict(num_classes=21))
|
|
@ -0,0 +1,7 @@
|
||||||
|
_base_ = [
|
||||||
|
'../_base_/models/isanet_r50-d8.py',
|
||||||
|
'../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py',
|
||||||
|
'../_base_/schedules/schedule_40k.py'
|
||||||
|
]
|
||||||
|
model = dict(
|
||||||
|
decode_head=dict(num_classes=21), auxiliary_head=dict(num_classes=21))
|
|
@ -0,0 +1,6 @@
|
||||||
|
_base_ = [
|
||||||
|
'../_base_/models/isanet_r50-d8.py', '../_base_/datasets/ade20k.py',
|
||||||
|
'../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py'
|
||||||
|
]
|
||||||
|
model = dict(
|
||||||
|
decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150))
|
|
@ -0,0 +1,9 @@
|
||||||
|
_base_ = [
|
||||||
|
'../_base_/models/isanet_r50-d8.py',
|
||||||
|
'../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py',
|
||||||
|
'../_base_/schedules/schedule_40k.py'
|
||||||
|
]
|
||||||
|
model = dict(
|
||||||
|
decode_head=dict(align_corners=True),
|
||||||
|
auxiliary_head=dict(align_corners=True),
|
||||||
|
test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513)))
|
|
@ -0,0 +1,9 @@
|
||||||
|
_base_ = [
|
||||||
|
'../_base_/models/isanet_r50-d8.py',
|
||||||
|
'../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py',
|
||||||
|
'../_base_/schedules/schedule_80k.py'
|
||||||
|
]
|
||||||
|
model = dict(
|
||||||
|
decode_head=dict(align_corners=True),
|
||||||
|
auxiliary_head=dict(align_corners=True),
|
||||||
|
test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513)))
|
|
@ -12,6 +12,7 @@ from .enc_head import EncHead
|
||||||
from .fcn_head import FCNHead
|
from .fcn_head import FCNHead
|
||||||
from .fpn_head import FPNHead
|
from .fpn_head import FPNHead
|
||||||
from .gc_head import GCHead
|
from .gc_head import GCHead
|
||||||
|
from .isa_head import ISAHead
|
||||||
from .lraspp_head import LRASPPHead
|
from .lraspp_head import LRASPPHead
|
||||||
from .nl_head import NLHead
|
from .nl_head import NLHead
|
||||||
from .ocr_head import OCRHead
|
from .ocr_head import OCRHead
|
||||||
|
@ -30,5 +31,5 @@ __all__ = [
|
||||||
'UPerHead', 'DepthwiseSeparableASPPHead', 'ANNHead', 'DAHead', 'OCRHead',
|
'UPerHead', 'DepthwiseSeparableASPPHead', 'ANNHead', 'DAHead', 'OCRHead',
|
||||||
'EncHead', 'DepthwiseSeparableFCNHead', 'FPNHead', 'EMAHead', 'DNLHead',
|
'EncHead', 'DepthwiseSeparableFCNHead', 'FPNHead', 'EMAHead', 'DNLHead',
|
||||||
'PointHead', 'APCHead', 'DMHead', 'LRASPPHead', 'SETRUPHead',
|
'PointHead', 'APCHead', 'DMHead', 'LRASPPHead', 'SETRUPHead',
|
||||||
'SETRMLAHead', 'DPTHead', 'SETRMLAHead', 'SegformerHead'
|
'SETRMLAHead', 'DPTHead', 'SETRMLAHead', 'SegformerHead', 'ISAHead'
|
||||||
]
|
]
|
||||||
|
|
|
@ -0,0 +1,142 @@
|
||||||
|
import math
|
||||||
|
|
||||||
|
import torch
|
||||||
|
import torch.nn.functional as F
|
||||||
|
from mmcv.cnn import ConvModule
|
||||||
|
|
||||||
|
from ..builder import HEADS
|
||||||
|
from ..utils import SelfAttentionBlock as _SelfAttentionBlock
|
||||||
|
from .decode_head import BaseDecodeHead
|
||||||
|
|
||||||
|
|
||||||
|
class SelfAttentionBlock(_SelfAttentionBlock):
|
||||||
|
"""Self-Attention Module.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
in_channels (int): Input channels of key/query feature.
|
||||||
|
channels (int): Output channels of key/query transform.
|
||||||
|
conv_cfg (dict | None): Config of conv layers.
|
||||||
|
norm_cfg (dict | None): Config of norm layers.
|
||||||
|
act_cfg (dict | None): Config of activation layers.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, in_channels, channels, conv_cfg, norm_cfg, act_cfg):
|
||||||
|
super(SelfAttentionBlock, self).__init__(
|
||||||
|
key_in_channels=in_channels,
|
||||||
|
query_in_channels=in_channels,
|
||||||
|
channels=channels,
|
||||||
|
out_channels=in_channels,
|
||||||
|
share_key_query=False,
|
||||||
|
query_downsample=None,
|
||||||
|
key_downsample=None,
|
||||||
|
key_query_num_convs=2,
|
||||||
|
key_query_norm=True,
|
||||||
|
value_out_num_convs=1,
|
||||||
|
value_out_norm=False,
|
||||||
|
matmul_norm=True,
|
||||||
|
with_out=False,
|
||||||
|
conv_cfg=conv_cfg,
|
||||||
|
norm_cfg=norm_cfg,
|
||||||
|
act_cfg=act_cfg)
|
||||||
|
|
||||||
|
self.output_project = self.build_project(
|
||||||
|
in_channels,
|
||||||
|
in_channels,
|
||||||
|
num_convs=1,
|
||||||
|
use_conv_module=True,
|
||||||
|
conv_cfg=conv_cfg,
|
||||||
|
norm_cfg=norm_cfg,
|
||||||
|
act_cfg=act_cfg)
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
"""Forward function."""
|
||||||
|
context = super(SelfAttentionBlock, self).forward(x, x)
|
||||||
|
return self.output_project(context)
|
||||||
|
|
||||||
|
|
||||||
|
@HEADS.register_module()
|
||||||
|
class ISAHead(BaseDecodeHead):
|
||||||
|
"""Interlaced Sparse Self-Attention for Semantic Segmentation.
|
||||||
|
|
||||||
|
This head is the implementation of `ISA
|
||||||
|
<https://arxiv.org/abs/1907.12273>`_.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
isa_channels (int): The channels of ISA Module.
|
||||||
|
down_factor (tuple[int]): The local group size of ISA.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, isa_channels, down_factor=(8, 8), **kwargs):
|
||||||
|
super(ISAHead, self).__init__(**kwargs)
|
||||||
|
self.down_factor = down_factor
|
||||||
|
|
||||||
|
self.in_conv = ConvModule(
|
||||||
|
self.in_channels,
|
||||||
|
self.channels,
|
||||||
|
3,
|
||||||
|
padding=1,
|
||||||
|
conv_cfg=self.conv_cfg,
|
||||||
|
norm_cfg=self.norm_cfg,
|
||||||
|
act_cfg=self.act_cfg)
|
||||||
|
self.global_relation = SelfAttentionBlock(
|
||||||
|
self.channels,
|
||||||
|
isa_channels,
|
||||||
|
conv_cfg=self.conv_cfg,
|
||||||
|
norm_cfg=self.norm_cfg,
|
||||||
|
act_cfg=self.act_cfg)
|
||||||
|
self.local_relation = SelfAttentionBlock(
|
||||||
|
self.channels,
|
||||||
|
isa_channels,
|
||||||
|
conv_cfg=self.conv_cfg,
|
||||||
|
norm_cfg=self.norm_cfg,
|
||||||
|
act_cfg=self.act_cfg)
|
||||||
|
self.out_conv = ConvModule(
|
||||||
|
self.channels * 2,
|
||||||
|
self.channels,
|
||||||
|
1,
|
||||||
|
conv_cfg=self.conv_cfg,
|
||||||
|
norm_cfg=self.norm_cfg,
|
||||||
|
act_cfg=self.act_cfg)
|
||||||
|
|
||||||
|
def forward(self, inputs):
|
||||||
|
"""Forward function."""
|
||||||
|
x_ = self._transform_inputs(inputs)
|
||||||
|
x = self.in_conv(x_)
|
||||||
|
residual = x
|
||||||
|
|
||||||
|
n, c, h, w = x.size()
|
||||||
|
loc_h, loc_w = self.down_factor # size of local group in H- and W-axes
|
||||||
|
glb_h, glb_w = math.ceil(h / loc_h), math.ceil(w / loc_w)
|
||||||
|
pad_h, pad_w = glb_h * loc_h - h, glb_w * loc_w - w
|
||||||
|
if pad_h > 0 or pad_w > 0: # pad if the size is not divisible
|
||||||
|
padding = (pad_w // 2, pad_w - pad_w // 2, pad_h // 2,
|
||||||
|
pad_h - pad_h // 2)
|
||||||
|
x = F.pad(x, padding)
|
||||||
|
|
||||||
|
# global relation
|
||||||
|
x = x.view(n, c, glb_h, loc_h, glb_w, loc_w)
|
||||||
|
# do permutation to gather global group
|
||||||
|
x = x.permute(0, 3, 5, 1, 2, 4) # (n, loc_h, loc_w, c, glb_h, glb_w)
|
||||||
|
x = x.reshape(-1, c, glb_h, glb_w)
|
||||||
|
# apply attention within each global group
|
||||||
|
x = self.global_relation(x) # (n * loc_h * loc_w, c, glb_h, glb_w)
|
||||||
|
|
||||||
|
# local relation
|
||||||
|
x = x.view(n, loc_h, loc_w, c, glb_h, glb_w)
|
||||||
|
# do permutation to gather local group
|
||||||
|
x = x.permute(0, 4, 5, 3, 1, 2) # (n, glb_h, glb_w, c, loc_h, loc_w)
|
||||||
|
x = x.reshape(-1, c, loc_h, loc_w)
|
||||||
|
# apply attention within each local group
|
||||||
|
x = self.local_relation(x) # (n * glb_h * glb_w, c, loc_h, loc_w)
|
||||||
|
|
||||||
|
# permute each pixel back to its original position
|
||||||
|
x = x.view(n, glb_h, glb_w, c, loc_h, loc_w)
|
||||||
|
x = x.permute(0, 3, 1, 4, 2, 5) # (n, c, glb_h, loc_h, glb_w, loc_w)
|
||||||
|
x = x.reshape(n, c, glb_h * loc_h, glb_w * loc_w)
|
||||||
|
if pad_h > 0 or pad_w > 0: # remove padding
|
||||||
|
x = x[:, :, pad_h // 2:pad_h // 2 + h, pad_w // 2:pad_w // 2 + w]
|
||||||
|
|
||||||
|
x = self.out_conv(torch.cat([x, residual], dim=1))
|
||||||
|
out = self.cls_seg(x)
|
||||||
|
|
||||||
|
return out
|
|
@ -16,6 +16,7 @@ Import:
|
||||||
- configs/fp16/fp16.yml
|
- configs/fp16/fp16.yml
|
||||||
- configs/gcnet/gcnet.yml
|
- configs/gcnet/gcnet.yml
|
||||||
- configs/hrnet/hrnet.yml
|
- configs/hrnet/hrnet.yml
|
||||||
|
- configs/isanet/isanet.yml
|
||||||
- configs/mobilenet_v2/mobilenet_v2.yml
|
- configs/mobilenet_v2/mobilenet_v2.yml
|
||||||
- configs/mobilenet_v3/mobilenet_v3.yml
|
- configs/mobilenet_v3/mobilenet_v3.yml
|
||||||
- configs/nonlocal_net/nonlocal_net.yml
|
- configs/nonlocal_net/nonlocal_net.yml
|
||||||
|
|
|
@ -175,6 +175,11 @@ def test_emanet_forward():
|
||||||
'emanet/emanet_r50-d8_512x1024_80k_cityscapes.py')
|
'emanet/emanet_r50-d8_512x1024_80k_cityscapes.py')
|
||||||
|
|
||||||
|
|
||||||
|
def test_isanet_forward():
|
||||||
|
_test_encoder_decoder_forward(
|
||||||
|
'isanet/isanet_r50-d8_512x1024_40k_cityscapes.py')
|
||||||
|
|
||||||
|
|
||||||
def get_world_size(process_group):
|
def get_world_size(process_group):
|
||||||
|
|
||||||
return 1
|
return 1
|
||||||
|
|
|
@ -0,0 +1,20 @@
|
||||||
|
# Copyright (c) OpenMMLab. All rights reserved.
|
||||||
|
import torch
|
||||||
|
|
||||||
|
from mmseg.models.decode_heads import ISAHead
|
||||||
|
from .utils import to_cuda
|
||||||
|
|
||||||
|
|
||||||
|
def test_isa_head():
|
||||||
|
|
||||||
|
inputs = [torch.randn(1, 32, 45, 45)]
|
||||||
|
isa_head = ISAHead(
|
||||||
|
in_channels=32,
|
||||||
|
channels=16,
|
||||||
|
num_classes=19,
|
||||||
|
isa_channels=16,
|
||||||
|
down_factor=(8, 8))
|
||||||
|
if torch.cuda.is_available():
|
||||||
|
isa_head, inputs = to_cuda(isa_head, inputs)
|
||||||
|
output = isa_head(inputs)
|
||||||
|
assert output.shape == (1, isa_head.num_classes, 45, 45)
|
Loading…
Reference in New Issue