Merge branch 'master' of https://github.com/open-mmlab/mmsegmentation into open-mmlab-master

# Conflicts: # docs/model_zoo.md # mmseg/models/backbones/__init__.py # tests/test_models/test_backbone.py
2025-06-03 22:03:48 +08:00 · 2020-08-17 20:51:44 +08:00 · 2020-08-17 20:51:44 +08:00 · 3cbfbf6434
commit 3cbfbf6434
parent 9f128f9e11 d9484068ce
36 changed files with 884 additions and 2827 deletions
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@ -47,6 +47,9 @@ jobs:
          - torch: 1.5.0+cu101
            torchvision: 0.6.0+cu101
            python-version: 3.7
          - torch: 1.6.0+cu101
            torchvision: 0.7.0+cu101
            python-version: 3.7
    steps:
      - uses: actions/checkout@v2
--- a/.gitignore
+++ b/.gitignore
@ -103,7 +103,6 @@ venv.bak/
 # mypy
 .mypy_cache/
 mmseg/version.py
 data
 .vscode
 .idea
--- a/README.md
+++ b/README.md
@ -44,7 +44,8 @@ This project is released under the [Apache 2.0 license](LICENSE).
 ## Changelog
-v0.5.0 was released in 10/7/2020.
+v0.5.1 was released in 11/08/2020.
 Please refer to [changelog.md](docs/changelog.md) for details and release history.
 ## Benchmark and model zoo
@ -53,7 +54,8 @@ Results and models are available in the [model zoo](docs/model_zoo.md).
 Supported backbones:
 - [x] ResNet
 - [x] ResNeXt
- [x] HRNet
+- [x] [HRNet](configs/hrnet/README.md)
 - [x] [ResNeSt](configs/resnest/README.md)
 Supported methods:
 - [x] [FCN](configs/fcn)
--- a/configs/_base_/models/ocrnet_r50-d8.py
+++ b/configs/_base_/models/ocrnet_r50-d8.py
@ -0,0 +1,47 @@
 # model settings
 norm_cfg = dict(type='SyncBN', requires_grad=True)
 model = dict(
    type='CascadeEncoderDecoder',
    num_stages=2,
    pretrained='open-mmlab://resnet50_v1c',
    backbone=dict(
        type='ResNetV1c',
        depth=50,
        num_stages=4,
        out_indices=(0, 1, 2, 3),
        dilations=(1, 1, 2, 4),
        strides=(1, 2, 1, 1),
        norm_cfg=norm_cfg,
        norm_eval=False,
        style='pytorch',
        contract_dilation=True),
    decode_head=[
        dict(
            type='FCNHead',
            in_channels=1024,
            in_index=2,
            channels=256,
            num_convs=1,
            concat_input=False,
            drop_out_ratio=0.1,
            num_classes=19,
            norm_cfg=norm_cfg,
            align_corners=False,
            loss_decode=dict(
                type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
        dict(
            type='OCRHead',
            in_channels=2048,
            in_index=3,
            channels=512,
            ocr_channels=256,
            drop_out_ratio=0.1,
            num_classes=19,
            norm_cfg=norm_cfg,
            align_corners=False,
            loss_decode=dict(
                type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0))
    ])
 # model training and testing settings
 train_cfg = dict()
 test_cfg = dict(mode='whole')
--- a/configs/ocrnet/README.md
+++ b/configs/ocrnet/README.md
@ -1,18 +1,28 @@
 # Object-Contextual Representations for Semantic Segmentation
 ## Introduction
 ```
-@article{yuan2019ocr,
+@article{YuanW18,
  title={Ocnet: Object context network for scene parsing},
  author={Yuhui Yuan and Jingdong Wang},
  booktitle={arXiv preprint arXiv:1809.00916},
  year={2018}
 }
@article{YuanCW20,
  title={Object-Contextual Representations for Semantic Segmentation},
-  author={Yuan Yuhui and Chen Xilin and Wang Jingdong},
+  author={Yuhui Yuan and Xilin Chen and Jingdong Wang},
-  journal={arXiv preprint arXiv:1909.11065},
+  booktitle={ECCV},
-  year={2019}
+  year={2020}
 }
 ```
 ## Results and models
 ### Cityscapes
 #### HRNet backbone
 | Method |      Backbone      | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU  | mIoU(ms+flip) |                                                                                                                                                                                               download                                                                                                                                                                                               |
 |--------|--------------------|-----------|--------:|----------|----------------|------:|--------------:|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
 | OCRNet | HRNetV2p-W18-Small | 512x1024  |   40000 |      3.5 |          10.45 | 74.30 |         75.95 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_512x1024_40k_cityscapes/ocrnet_hr18s_512x1024_40k_cityscapes_20200601_033304-fa2436c2.pth) &#124; [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_512x1024_40k_cityscapes/ocrnet_hr18s_512x1024_40k_cityscapes_20200601_033304.log.json)     |
@ -25,6 +35,16 @@
 | OCRNet | HRNetV2p-W18       | 512x1024  |  160000 | -        | -              | 79.47 |         80.91 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x1024_160k_cityscapes/ocrnet_hr18_512x1024_160k_cityscapes_20200602_191001-b9172d0c.pth) &#124; [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x1024_160k_cityscapes/ocrnet_hr18_512x1024_160k_cityscapes_20200602_191001.log.json)     |
 | OCRNet | HRNetV2p-W48       | 512x1024  |  160000 | -        | -              | 81.35 |         82.70 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x1024_160k_cityscapes/ocrnet_hr48_512x1024_160k_cityscapes_20200602_191037-dfbf1b0c.pth) &#124; [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x1024_160k_cityscapes/ocrnet_hr48_512x1024_160k_cityscapes_20200602_191037.log.json)     |
 #### ResNet backbone
 | Method |      Backbone      | Crop Size | Batch Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU  | mIoU(ms+flip) |                                                                                                                                                                                               download                                                                                                                                                                                               |
 |--------|--------------------|-----------|--------|----------|-----------|----------------|------|--------------:|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
 | OCRNet  | R-101-D8 | 512x1024  | 8 |   40000 |  -   |   -  |   80.09  |  -  | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/ocrnet/ocrnet_r101-d8_512x1024_40k_b8_cityscapes/ocrnet_r101-d8_512x1024_40k_b8_cityscapes-02ac0f13.pth) &#124; [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/ocrnet/ocrnet_r101-d8_512x1024_40k_b8_cityscapes/ocrnet_r101-d8_512x1024_40k_b8_cityscapes_20200717_110721.log.json) |
 | OCRNet  | R-101-D8 | 512x1024  | 16 |   40000 |  8.8   |   3.02  |   80.30  |  -  | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/ocrnet/ocrnet_r101-d8_512x1024_40k_b16_cityscapes/ocrnet_r101-d8_512x1024_40k_b16_cityscapes-db500f80.pth) &#124; [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/ocrnet/ocrnet_r101-d8_512x1024_40k_b16_cityscapes/ocrnet_r101-d8_512x1024_40k_b16_cityscapes_20200723_193726.log.json) |
 | OCRNet  | R-101-D8 | 512x1024  | 16 |   80000 |  8.8   |   3.02  |   80.81  |  -  | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/ocrnet/ocrnet_r101-d8_512x1024_80k_b16_cityscapes/ocrnet_r101-d8_512x1024_80k_b16_cityscapes-78688424.pth) &#124; [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/ocrnet/ocrnet_r101-d8_512x1024_80k_b16_cityscapes/ocrnet_r101-d8_512x1024_80k_b16_cityscapes_20200723_192421.log.json) |
 ### ADE20K
 | Method |      Backbone      | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU  | mIoU(ms+flip) |                                                                                                                                                                                     download                                                                                                                                                                                     |
 |--------|--------------------|-----------|--------:|----------|----------------|------:|--------------:|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
--- a/configs/ocrnet/ocrnet_r101-d8_512x1024_40k_b16_cityscapes.py
+++ b/configs/ocrnet/ocrnet_r101-d8_512x1024_40k_b16_cityscapes.py
@ -0,0 +1,9 @@
 _base_ = [
    '../_base_/models/ocrnet_r50-d8.py',
    '../_base_/datasets/cityscapes.py',
    '../_base_/default_runtime.py',
    '../_base_/schedules/schedule_40k.py'
 ]
 model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
 optimizer = dict(lr=0.02)
 lr_config = dict(min_lr=2e-4)
--- a/configs/ocrnet/ocrnet_r101-d8_512x1024_40k_b8_cityscapes.py
+++ b/configs/ocrnet/ocrnet_r101-d8_512x1024_40k_b8_cityscapes.py
@ -0,0 +1,7 @@
 _base_ = [
    '../_base_/models/ocrnet_r50-d8.py',
    '../_base_/datasets/cityscapes.py',
    '../_base_/default_runtime.py',
    '../_base_/schedules/schedule_40k.py'
 ]
 model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
--- a/configs/ocrnet/ocrnet_r101-d8_512x1024_80k_b16_cityscapes.py
+++ b/configs/ocrnet/ocrnet_r101-d8_512x1024_80k_b16_cityscapes.py
@ -0,0 +1,9 @@
 _base_ = [
    '../_base_/models/ocrnet_r50-d8.py',
    '../_base_/datasets/cityscapes.py',
    '../_base_/default_runtime.py',
    '../_base_/schedules/schedule_80k.py'
 ]
 model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
 optimizer = dict(lr=0.02)
 lr_config = dict(min_lr=2e-4)
--- a/configs/resnest/README.md
+++ b/configs/resnest/README.md
@ -0,0 +1,30 @@
 # ResNeSt: Split-Attention Networks
 ## Introduction
 ```
@article{zhang2020resnest,
 title={ResNeSt: Split-Attention Networks},
 author={Zhang, Hang and Wu, Chongruo and Zhang, Zhongyue and Zhu, Yi and Zhang, Zhi and Lin, Haibin and Sun, Yue and He, Tong and Muller, Jonas and Manmatha, R. and Li, Mu and Smola, Alexander},
 journal={arXiv preprint arXiv:2004.08955},
 year={2020}
 }
 ```
 ## Results and models
 ### Cityscapes
 |   Method   | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU  | mIoU(ms+flip) |                                                                                                                                                                                                         download                                                                                                                                                                                                         |
 |------------|----------|-----------|--------:|---------:|----------------|------:|---------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
 | FCN        | S-101-D8 | 512x1024  |   80000 |     11.4 | 2.39           | 77.56 | 78.98         | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/resnest/fcn_s101-d8_512x1024_80k_cityscapes/fcn_s101-d8_512x1024_80k_cityscapes_20200807_140631-f8d155b3.pth) &#124; [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/resnest/fcn_s101-d8_512x1024_80k_cityscapes/fcn_s101-d8_512x1024_80k_cityscapes-20200807_140631.log.json)                                         |
 | PSPNet     | S-101-D8 | 512x1024  |   80000 |     11.8 | 2.52           | 78.57 | 79.19         | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/resnest/pspnet_s101-d8_512x1024_80k_cityscapes/pspnet_s101-d8_512x1024_80k_cityscapes_20200807_140631-c75f3b99.pth) &#124; [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/resnest/pspnet_s101-d8_512x1024_80k_cityscapes/pspnet_s101-d8_512x1024_80k_cityscapes-20200807_140631.log.json)                             |
 | DeepLabV3  | S-101-D8 | 512x1024  |   80000 |     11.9 | 1.88           | 79.67 | 80.51         | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/resnest/deeplabv3_s101-d8_512x1024_80k_cityscapes/deeplabv3_s101-d8_512x1024_80k_cityscapes_20200807_144429-b73c4270.pth) &#124; [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/resnest/deeplabv3_s101-d8_512x1024_80k_cityscapes/deeplabv3_s101-d8_512x1024_80k_cityscapes-20200807_144429.log.json)                 |
 | DeepLabV3+ | S-101-D8 | 512x1024  |   80000 |     13.2 | 2.36           | 79.62 | 80.27         | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/resnest/deeplabv3plus_s101-d8_512x1024_80k_cityscapes/deeplabv3plus_s101-d8_512x1024_80k_cityscapes_20200807_144429-1239eb43.pth) &#124; [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/resnest/deeplabv3plus_s101-d8_512x1024_80k_cityscapes/deeplabv3plus_s101-d8_512x1024_80k_cityscapes-20200807_144429.log.json) |
 ### ADE20k
 |   Method   | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU  | mIoU(ms+flip) |                                                                                                                                                                                                 download                                                                                                                                                                                                 |
 |------------|----------|-----------|--------:|---------:|----------------|------:|---------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
 | FCN        | S-101-D8 | 512x512   |  160000 |     14.2 | 12.86          | 45.62 | 46.16         | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/resnest/fcn_s101-d8_512x512_160k_ade20k/fcn_s101-d8_512x512_160k_ade20k_20200807_145416-d3160329.pth) &#124; [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/resnest/fcn_s101-d8_512x512_160k_ade20k/fcn_s101-d8_512x512_160k_ade20k-20200807_145416.log.json)                                         |
 | PSPNet     | S-101-D8 | 512x512   |  160000 |     14.2 | 13.02          | 45.44 | 46.28         | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/resnest/pspnet_s101-d8_512x512_160k_ade20k/pspnet_s101-d8_512x512_160k_ade20k_20200807_145416-a6daa92a.pth) &#124; [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/resnest/pspnet_s101-d8_512x512_160k_ade20k/pspnet_s101-d8_512x512_160k_ade20k-20200807_145416.log.json)                             |
 | DeepLabV3  | S-101-D8 | 512x512   |  160000 |     14.6 | 9.28           | 45.71 | 46.59         | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/resnest/deeplabv3_s101-d8_512x512_160k_ade20k/deeplabv3_s101-d8_512x512_160k_ade20k_20200807_144503-17ecabe5.pth) &#124; [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/resnest/deeplabv3_s101-d8_512x512_160k_ade20k/deeplabv3_s101-d8_512x512_160k_ade20k-20200807_144503.log.json)                 |
 | DeepLabV3+ | S-101-D8 | 512x512   |  160000 |     16.2 | 11.96          | 46.47 | 47.27         | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/resnest/deeplabv3plus_s101-d8_512x512_160k_ade20k/deeplabv3plus_s101-d8_512x512_160k_ade20k_20200807_144503-27b26226.pth) &#124; [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/resnest/deeplabv3plus_s101-d8_512x512_160k_ade20k/deeplabv3plus_s101-d8_512x512_160k_ade20k-20200807_144503.log.json) |
--- a/configs/resnest/deeplabv3_s101-d8_512x1024_80k_cityscapes.py
+++ b/configs/resnest/deeplabv3_s101-d8_512x1024_80k_cityscapes.py
@ -0,0 +1,9 @@
 _base_ = '../deeplabv3/deeplabv3_r101-d8_512x1024_80k_cityscapes.py'
 model = dict(
    pretrained='open-mmlab://resnest101',
    backbone=dict(
        type='ResNeSt',
        stem_channels=128,
        radix=2,
        reduction_factor=4,
        avg_down_stride=True))
--- a/configs/resnest/deeplabv3_s101-d8_512x512_160k_ade20k.py
+++ b/configs/resnest/deeplabv3_s101-d8_512x512_160k_ade20k.py
@ -0,0 +1,9 @@
 _base_ = '../deeplabv3/deeplabv3_r101-d8_512x512_160k_ade20k.py'
 model = dict(
    pretrained='open-mmlab://resnest101',
    backbone=dict(
        type='ResNeSt',
        stem_channels=128,
        radix=2,
        reduction_factor=4,
        avg_down_stride=True))
--- a/configs/resnest/deeplabv3plus_s101-d8_512x1024_80k_cityscapes.py
+++ b/configs/resnest/deeplabv3plus_s101-d8_512x1024_80k_cityscapes.py
@ -0,0 +1,9 @@
 _base_ = '../deeplabv3plus/deeplabv3plus_r101-d8_512x1024_80k_cityscapes.py'
 model = dict(
    pretrained='open-mmlab://resnest101',
    backbone=dict(
        type='ResNeSt',
        stem_channels=128,
        radix=2,
        reduction_factor=4,
        avg_down_stride=True))
--- a/configs/resnest/deeplabv3plus_s101-d8_512x512_160k_ade20k.py
+++ b/configs/resnest/deeplabv3plus_s101-d8_512x512_160k_ade20k.py
@ -0,0 +1,9 @@
 _base_ = '../deeplabv3plus/deeplabv3plus_r101-d8_512x512_160k_ade20k.py'
 model = dict(
    pretrained='open-mmlab://resnest101',
    backbone=dict(
        type='ResNeSt',
        stem_channels=128,
        radix=2,
        reduction_factor=4,
        avg_down_stride=True))
--- a/configs/resnest/fcn_s101-d8_512x1024_80k_cityscapes.py
+++ b/configs/resnest/fcn_s101-d8_512x1024_80k_cityscapes.py
@ -0,0 +1,9 @@
 _base_ = '../fcn/fcn_r101-d8_512x1024_80k_cityscapes.py'
 model = dict(
    pretrained='open-mmlab://resnest101',
    backbone=dict(
        type='ResNeSt',
        stem_channels=128,
        radix=2,
        reduction_factor=4,
        avg_down_stride=True))
--- a/configs/resnest/fcn_s101-d8_512x512_160k_ade20k.py
+++ b/configs/resnest/fcn_s101-d8_512x512_160k_ade20k.py
@ -0,0 +1,9 @@
 _base_ = '../fcn/fcn_r101-d8_512x512_160k_ade20k.py'
 model = dict(
    pretrained='open-mmlab://resnest101',
    backbone=dict(
        type='ResNeSt',
        stem_channels=128,
        radix=2,
        reduction_factor=4,
        avg_down_stride=True))
--- a/configs/resnest/pspnet_s101-d8_512x1024_80k_cityscapes.py
+++ b/configs/resnest/pspnet_s101-d8_512x1024_80k_cityscapes.py
@ -0,0 +1,9 @@
 _base_ = '../pspnet/pspnet_r101-d8_512x1024_80k_cityscapes.py'
 model = dict(
    pretrained='open-mmlab://resnest101',
    backbone=dict(
        type='ResNeSt',
        stem_channels=128,
        radix=2,
        reduction_factor=4,
        avg_down_stride=True))
--- a/configs/resnest/pspnet_s101-d8_512x512_160k_ade20k.py
+++ b/configs/resnest/pspnet_s101-d8_512x512_160k_ade20k.py
@ -0,0 +1,9 @@
 _base_ = '../pspnet/pspnet_r101-d8_512x512_160k_ade20k.py'
 model = dict(
    pretrained='open-mmlab://resnest101',
    backbone=dict(
        type='ResNeSt',
        stem_channels=128,
        radix=2,
        reduction_factor=4,
        avg_down_stride=True))
--- a/docs/changelog.md
+++ b/docs/changelog.md
@ -0,0 +1,15 @@
 ## Changelog
 ### v0.5.1 (11/08/2020)
 **Highlights**
 - Support FP16 and more generalized OHEM
 **Bug Fixes**
 - Fixed Pascal VOC conversion script (#19)
 - Fixed OHEM weight assign bug (#54)
 - Fixed palette type when palette is not given (#27)
 **New Features**
 - Support FP16 (#21)
 - Generalized OHEM (#54)
 **Improvements**
 - Add load-from flag (#33)
 - Fixed training tricks doc about different learning rates of model (#26)
--- a/docs/conf.py
+++ b/docs/conf.py
@ -20,10 +20,17 @@ sys.path.insert(0, os.path.abspath('..'))
 project = 'MMSegmentation'
 copyright = '2020-2020, OpenMMLab'
 author = 'MMSegmentation Authors'
 version_file = '../mmseg/version.py'
 def get_version():
    with open(version_file, 'r') as f:
        exec(compile(f.read(), version_file, 'exec'))
    return locals()['__version__']
 # The full version, including alpha/beta/rc tags
-with open('../mmseg/VERSION', 'r') as f:
+release = get_version()
    release = f.read().strip()
 # -- General configuration ---------------------------------------------------
--- a/docs/getting_started.md
+++ b/docs/getting_started.md
@ -332,3 +332,18 @@ python tools/publish_model.py work_dirs/pspnet/latest.pth psp_r50_hszhao_200ep.p
 ```
 The final output filename will be `psp_r50_512x1024_40ki_cityscapes-{hash id}.pth`.
 ### Convert to ONNX (experimental)
 We provide a script to convert model to [ONNX](https://github.com/onnx/onnx) format. The converted model could be visualized by tools like [Netron](https://github.com/lutzroeder/netron). Besides, we also support comparing the output results between Pytorch and ONNX model.
 ```shell
 python tools/pytorch2onnx.py ${CONFIG_FILE} --checkpoint ${CHECKPOINT_FILE} --output_file ${ONNX_FILE} [--shape ${INPUT_SHAPE} --verify]
 ```
 **Note**: This tool is still experimental. Some customized operators are not supported for now.
 ## Tutorials
 Currently, we provide four tutorials for users to [add new dataset](tutorials/new_dataset.md), [design data pipeline](tutorials/data_pipeline.md) and [add new modules](tutorials/new_modules.md), [use training tricks](tutorials/training_tricks.md).
 We also provide a full description about the [config system](config.md).
--- a/docs/install.md
+++ b/docs/install.md
@ -54,10 +54,9 @@ pip install -e .  # or "python setup.py develop"
 Note:
-1. In `dev` mode, the git commit id will be written to the version number with step *d*, e.g. 0.5.0+c415a2e. The version will also be saved in trained models.
+1. The `version+git_hash` will also be saved in trained models meta, e.g. 0.5.0+c415a2e.
 It is recommended that you run step *d* each time you pull some updates from github. If C++/CUDA codes are modified, then this step is compulsory.
-2. When MMsegmentation is installed on `dev` mode, any local modifications made to the code will take effect without the need to reinstall it (unless you submit some commits and want to update the version number).
+2. When MMsegmentation is installed on `dev` mode, any local modifications made to the code will take effect without the need to reinstall it.
 3. If you would like to use `opencv-python-headless` instead of `opencv-python`,
 you can install it before installing MMCV.
--- a/docs/model_zoo.json
+++ b/docs/model_zoo.json
--- a/docs/model_zoo.md
+++ b/docs/model_zoo.md
@ -85,6 +85,10 @@ Please refer to [OCRNet](https://github.com/open-mmlab/mmsegmentation/blob/maste
 Please refer to [Fast-SCNN](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/fastscnn) for details.
 ### ResNeSt
 Please refer to [ResNeSt](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/resnest) for details.
 ### Mixed Precision (FP16) Training
 Please refer [Mixed Precision (FP16) Training](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/fp16/README.md) for details.
--- a/mmseg/VERSION
+++ b/mmseg/VERSION
@ -1 +0,0 @@
 0.5.0
--- a/mmseg/init.py
+++ b/mmseg/init.py
@ -1,3 +1,30 @@
-from .version import __version__, short_version, version_info
+import mmcv
-__all__ = ['__version__', 'short_version', 'version_info']
+from .version import __version__, version_info
 MMCV_MIN = '1.0.5'
 MMCV_MAX = '1.0.5'
 def digit_version(version_str):
    digit_version = []
    for x in version_str.split('.'):
        if x.isdigit():
            digit_version.append(int(x))
        elif x.find('rc') != -1:
            patch_version = x.split('rc')
            digit_version.append(int(patch_version[0]) - 1)
            digit_version.append(int(patch_version[1]))
    return digit_version
 mmcv_min_version = digit_version(MMCV_MIN)
 mmcv_max_version = digit_version(MMCV_MAX)
 mmcv_version = digit_version(mmcv.__version__)
 assert (mmcv_min_version <= mmcv_version <= mmcv_max_version), \
    f'MMCV=={mmcv.__version__} is used but incompatible. ' \
    f'Please install mmcv>={mmcv_min_version}, <={mmcv_max_version}.'
 __all__ = ['__version__', 'version_info']
--- a/mmseg/models/backbones/init.py
+++ b/mmseg/models/backbones/init.py
@ -1,6 +1,10 @@
 from .fast_scnn import FastSCNN
 from .hrnet import HRNet
 from .resnest import ResNeSt
 from .resnet import ResNet, ResNetV1c, ResNetV1d
 from .resnext import ResNeXt
-__all__ = ['ResNet', 'ResNetV1c', 'ResNetV1d', 'ResNeXt', 'HRNet', 'FastSCNN']
+__all__ = [
    'ResNet', 'ResNetV1c', 'ResNetV1d', 'ResNeXt', 'HRNet', 'FastSCNN',
    'ResNeSt'
 ]
--- a/mmseg/models/backbones/resnest.py
+++ b/mmseg/models/backbones/resnest.py
@ -0,0 +1,314 @@
 import math
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
 import torch.utils.checkpoint as cp
 from mmcv.cnn import build_conv_layer, build_norm_layer
 from ..builder import BACKBONES
 from ..utils import ResLayer
 from .resnet import Bottleneck as _Bottleneck
 from .resnet import ResNetV1d
 class RSoftmax(nn.Module):
    """Radix Softmax module in ``SplitAttentionConv2d``.
    Args:
        radix (int): Radix of input.
        groups (int): Groups of input.
    """
    def __init__(self, radix, groups):
        super().__init__()
        self.radix = radix
        self.groups = groups
    def forward(self, x):
        batch = x.size(0)
        if self.radix > 1:
            x = x.view(batch, self.groups, self.radix, -1).transpose(1, 2)
            x = F.softmax(x, dim=1)
            x = x.reshape(batch, -1)
        else:
            x = torch.sigmoid(x)
        return x
 class SplitAttentionConv2d(nn.Module):
    """Split-Attention Conv2d in ResNeSt.
    Args:
        in_channels (int): Same as nn.Conv2d.
        out_channels (int): Same as nn.Conv2d.
        kernel_size (int | tuple[int]): Same as nn.Conv2d.
        stride (int | tuple[int]): Same as nn.Conv2d.
        padding (int | tuple[int]): Same as nn.Conv2d.
        dilation (int | tuple[int]): Same as nn.Conv2d.
        groups (int): Same as nn.Conv2d.
        radix (int): Radix of SpltAtConv2d. Default: 2
        reduction_factor (int): Reduction factor of inter_channels. Default: 4.
        conv_cfg (dict): Config dict for convolution layer. Default: None,
            which means using conv2d.
        norm_cfg (dict): Config dict for normalization layer. Default: None.
        dcn (dict): Config dict for DCN. Default: None.
    """
    def __init__(self,
                 in_channels,
                 channels,
                 kernel_size,
                 stride=1,
                 padding=0,
                 dilation=1,
                 groups=1,
                 radix=2,
                 reduction_factor=4,
                 conv_cfg=None,
                 norm_cfg=dict(type='BN'),
                 dcn=None):
        super(SplitAttentionConv2d, self).__init__()
        inter_channels = max(in_channels * radix // reduction_factor, 32)
        self.radix = radix
        self.groups = groups
        self.channels = channels
        self.with_dcn = dcn is not None
        self.dcn = dcn
        fallback_on_stride = False
        if self.with_dcn:
            fallback_on_stride = self.dcn.pop('fallback_on_stride', False)
        if self.with_dcn and not fallback_on_stride:
            assert conv_cfg is None, 'conv_cfg must be None for DCN'
            conv_cfg = dcn
        self.conv = build_conv_layer(
            conv_cfg,
            in_channels,
            channels * radix,
            kernel_size,
            stride=stride,
            padding=padding,
            dilation=dilation,
            groups=groups * radix,
            bias=False)
        self.norm0_name, norm0 = build_norm_layer(
            norm_cfg, channels * radix, postfix=0)
        self.add_module(self.norm0_name, norm0)
        self.relu = nn.ReLU(inplace=True)
        self.fc1 = build_conv_layer(
            None, channels, inter_channels, 1, groups=self.groups)
        self.norm1_name, norm1 = build_norm_layer(
            norm_cfg, inter_channels, postfix=1)
        self.add_module(self.norm1_name, norm1)
        self.fc2 = build_conv_layer(
            None, inter_channels, channels * radix, 1, groups=self.groups)
        self.rsoftmax = RSoftmax(radix, groups)
    @property
    def norm0(self):
        """nn.Module: the normalization layer named "norm0" """
        return getattr(self, self.norm0_name)
    @property
    def norm1(self):
        """nn.Module: the normalization layer named "norm1" """
        return getattr(self, self.norm1_name)
    def forward(self, x):
        x = self.conv(x)
        x = self.norm0(x)
        x = self.relu(x)
        batch, rchannel = x.shape[:2]
        batch = x.size(0)
        if self.radix > 1:
            splits = x.view(batch, self.radix, -1, *x.shape[2:])
            gap = splits.sum(dim=1)
        else:
            gap = x
        gap = F.adaptive_avg_pool2d(gap, 1)
        gap = self.fc1(gap)
        gap = self.norm1(gap)
        gap = self.relu(gap)
        atten = self.fc2(gap)
        atten = self.rsoftmax(atten).view(batch, -1, 1, 1)
        if self.radix > 1:
            attens = atten.view(batch, self.radix, -1, *atten.shape[2:])
            out = torch.sum(attens * splits, dim=1)
        else:
            out = atten * x
        return out.contiguous()
 class Bottleneck(_Bottleneck):
    """Bottleneck block for ResNeSt.
    Args:
        inplane (int): Input planes of this block.
        planes (int): Middle planes of this block.
        groups (int): Groups of conv2.
        width_per_group (int): Width per group of conv2. 64x4d indicates
            ``groups=64, width_per_group=4`` and 32x8d indicates
            ``groups=32, width_per_group=8``.
        radix (int): Radix of SpltAtConv2d. Default: 2
        reduction_factor (int): Reduction factor of inter_channels in
            SplitAttentionConv2d. Default: 4.
        avg_down_stride (bool): Whether to use average pool for stride in
            Bottleneck. Default: True.
        kwargs (dict): Key word arguments for base class.
    """
    expansion = 4
    def __init__(self,
                 inplanes,
                 planes,
                 groups=1,
                 base_width=4,
                 base_channels=64,
                 radix=2,
                 reduction_factor=4,
                 avg_down_stride=True,
                 **kwargs):
        """Bottleneck block for ResNeSt."""
        super(Bottleneck, self).__init__(inplanes, planes, **kwargs)
        if groups == 1:
            width = self.planes
        else:
            width = math.floor(self.planes *
                               (base_width / base_channels)) * groups
        self.avg_down_stride = avg_down_stride and self.conv2_stride > 1
        self.norm1_name, norm1 = build_norm_layer(
            self.norm_cfg, width, postfix=1)
        self.norm3_name, norm3 = build_norm_layer(
            self.norm_cfg, self.planes * self.expansion, postfix=3)
        self.conv1 = build_conv_layer(
            self.conv_cfg,
            self.inplanes,
            width,
            kernel_size=1,
            stride=self.conv1_stride,
            bias=False)
        self.add_module(self.norm1_name, norm1)
        self.with_modulated_dcn = False
        self.conv2 = SplitAttentionConv2d(
            width,
            width,
            kernel_size=3,
            stride=1 if self.avg_down_stride else self.conv2_stride,
            padding=self.dilation,
            dilation=self.dilation,
            groups=groups,
            radix=radix,
            reduction_factor=reduction_factor,
            conv_cfg=self.conv_cfg,
            norm_cfg=self.norm_cfg,
            dcn=self.dcn)
        delattr(self, self.norm2_name)
        if self.avg_down_stride:
            self.avd_layer = nn.AvgPool2d(3, self.conv2_stride, padding=1)
        self.conv3 = build_conv_layer(
            self.conv_cfg,
            width,
            self.planes * self.expansion,
            kernel_size=1,
            bias=False)
        self.add_module(self.norm3_name, norm3)
    def forward(self, x):
        def _inner_forward(x):
            identity = x
            out = self.conv1(x)
            out = self.norm1(out)
            out = self.relu(out)
            if self.with_plugins:
                out = self.forward_plugin(out, self.after_conv1_plugin_names)
            out = self.conv2(out)
            if self.avg_down_stride:
                out = self.avd_layer(out)
            if self.with_plugins:
                out = self.forward_plugin(out, self.after_conv2_plugin_names)
            out = self.conv3(out)
            out = self.norm3(out)
            if self.with_plugins:
                out = self.forward_plugin(out, self.after_conv3_plugin_names)
            if self.downsample is not None:
                identity = self.downsample(x)
            out += identity
            return out
        if self.with_cp and x.requires_grad:
            out = cp.checkpoint(_inner_forward, x)
        else:
            out = _inner_forward(x)
        out = self.relu(out)
        return out
@BACKBONES.register_module()
 class ResNeSt(ResNetV1d):
    """ResNeSt backbone.
    Args:
        groups (int): Number of groups of Bottleneck. Default: 1
        base_width (int): Base width of Bottleneck. Default: 4
        radix (int): Radix of SpltAtConv2d. Default: 2
        reduction_factor (int): Reduction factor of inter_channels in
            SplitAttentionConv2d. Default: 4.
        avg_down_stride (bool): Whether to use average pool for stride in
            Bottleneck. Default: True.
        kwargs (dict): Keyword arguments for ResNet.
    """
    arch_settings = {
        50: (Bottleneck, (3, 4, 6, 3)),
        101: (Bottleneck, (3, 4, 23, 3)),
        152: (Bottleneck, (3, 8, 36, 3)),
        200: (Bottleneck, (3, 24, 36, 3))
    }
    def __init__(self,
                 groups=1,
                 base_width=4,
                 radix=2,
                 reduction_factor=4,
                 avg_down_stride=True,
                 **kwargs):
        self.groups = groups
        self.base_width = base_width
        self.radix = radix
        self.reduction_factor = reduction_factor
        self.avg_down_stride = avg_down_stride
        super(ResNeSt, self).__init__(**kwargs)
    def make_res_layer(self, **kwargs):
        """Pack all blocks in a stage into a ``ResLayer``."""
        return ResLayer(
            groups=self.groups,
            base_width=self.base_width,
            base_channels=self.base_channels,
            radix=self.radix,
            reduction_factor=self.reduction_factor,
            avg_down_stride=self.avg_down_stride,
            **kwargs)
--- a/mmseg/models/segmentors/encoder_decoder.py
+++ b/mmseg/models/segmentors/encoder_decoder.py
@ -1,3 +1,4 @@
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
@ -171,6 +172,8 @@ class EncoderDecoder(BaseSegmentor):
        h_stride, w_stride = self.test_cfg.stride
        h_crop, w_crop = self.test_cfg.crop_size
        batch_size, _, h_img, w_img = img.size()
        assert h_crop <= h_img and w_crop <= w_img, (
            'crop size should not greater than image size')
        num_classes = self.num_classes
        h_grids = max(h_img - h_crop + h_stride - 1, 0) // h_stride + 1
        w_grids = max(w_img - w_crop + w_stride - 1, 0) // w_stride + 1
@ -185,14 +188,15 @@ class EncoderDecoder(BaseSegmentor):
                y1 = max(y2 - h_crop, 0)
                x1 = max(x2 - w_crop, 0)
                crop_img = img[:, :, y1:y2, x1:x2]
-                pad_img = crop_img.new_zeros(
+                crop_seg_logit = self.encode_decode(crop_img, img_meta)
-                    (crop_img.size(0), crop_img.size(1), h_crop, w_crop))
+                preds += F.pad(crop_seg_logit,
-                pad_img[:, :, :y2 - y1, :x2 - x1] = crop_img
+                               (int(x1), int(preds.shape[3] - x2), int(y1),
-                pad_seg_logit = self.encode_decode(pad_img, img_meta)
+                                int(preds.shape[2] - y2)))
-                preds[:, :, y1:y2,
+
                      x1:x2] += pad_seg_logit[:, :, :y2 - y1, :x2 - x1]
                count_mat[:, :, y1:y2, x1:x2] += 1
        assert (count_mat == 0).sum() == 0
        # We want to regard count_mat as a constant while exporting to ONNX
        count_mat = torch.from_numpy(count_mat.detach().numpy())
        preds = preds / count_mat
        if rescale:
            preds = resize(
@ -201,7 +205,6 @@ class EncoderDecoder(BaseSegmentor):
                mode='bilinear',
                align_corners=self.align_corners,
                warning=False)
        return preds
    def whole_inference(self, img, img_meta, rescale):
@ -243,8 +246,8 @@ class EncoderDecoder(BaseSegmentor):
            seg_logit = self.whole_inference(img, img_meta, rescale)
        output = F.softmax(seg_logit, dim=1)
        flip = img_meta[0]['flip']
        flip_direction = img_meta[0]['flip_direction']
        if flip:
            flip_direction = img_meta[0]['flip_direction']
            assert flip_direction in ['horizontal', 'vertical']
            if flip_direction == 'horizontal':
                output = output.flip(dims=(3, ))
@ -257,6 +260,8 @@ class EncoderDecoder(BaseSegmentor):
        """Simple test with single image."""
        seg_logit = self.inference(img, img_meta, rescale)
        seg_pred = seg_logit.argmax(dim=1)
        if torch.onnx.is_in_onnx_export():
            return seg_pred
        seg_pred = seg_pred.cpu().numpy()
        # unravel batch dim
        seg_pred = list(seg_pred)
--- a/mmseg/models/utils/res_layer.py
+++ b/mmseg/models/utils/res_layer.py
@ -42,8 +42,7 @@ class ResLayer(nn.Sequential):
        if stride != 1 or inplanes != planes * block.expansion:
            downsample = []
            conv_stride = stride
-            # check dilation for dilated ResNet
+            if avg_down:
            if avg_down and (stride != 1 or dilation != 1):
                conv_stride = 1
                downsample.append(
                    nn.AvgPool2d(
--- a/mmseg/utils/collect_env.py
+++ b/mmseg/utils/collect_env.py
@ -7,7 +7,7 @@ import cv2
 import mmcv
 import torch
 import torchvision
-from mmcv.utils.parrots_wrapper import get_build_config
+from mmcv.utils import get_build_config, get_git_hash
 import mmseg
@ -53,7 +53,7 @@ def collect_env():
    env_info['OpenCV'] = cv2.__version__
    env_info['MMCV'] = mmcv.__version__
-    env_info['MMSegmentation'] = mmseg.__version__
+    env_info['MMSegmentation'] = f'{mmseg.__version__}+{get_git_hash()[:7]}'
    try:
        from mmcv.ops import get_compiler_version, get_compiling_cuda_version
        env_info['MMCV Compiler'] = get_compiler_version()
--- a/mmseg/version.py
+++ b/mmseg/version.py
@ -0,0 +1,18 @@
 # Copyright (c) Open-MMLab. All rights reserved.
 __version__ = '0.5.1'
 def parse_version_info(version_str):
    version_info = []
    for x in version_str.split('.'):
        if x.isdigit():
            version_info.append(int(x))
        elif x.find('rc') != -1:
            patch_version = x.split('rc')
            version_info.append(int(patch_version[0]))
            version_info.append(f'rc{patch_version[1]}')
    return tuple(version_info)
 version_info = parse_version_info(__version__)
--- a/setup.cfg
+++ b/setup.cfg
@ -8,6 +8,6 @@ line_length = 79
 multi_line_output = 0
 known_standard_library = setuptools
 known_first_party = mmseg
-known_third_party = PIL,cityscapesscripts,cv2,matplotlib,mmcv,numpy,pytablewriter,pytest,scipy,torch,torchvision
+known_third_party = PIL,cityscapesscripts,cv2,matplotlib,mmcv,numpy,onnxruntime,pytablewriter,pytest,scipy,torch,torchvision
 no_lines_before = STDLIB,LOCALFOLDER
 default_section = THIRDPARTY
--- a/setup.py
+++ b/setup.py
@ -1,81 +1,19 @@
 import os
 import subprocess
 import time
 from setuptools import find_packages, setup
 def readme():
    with open('README.md', encoding='utf-8') as f:
        content = f.read()
    return content
 version_file = 'mmseg/version.py'
 def get_git_hash():
    def _minimal_ext_cmd(cmd):
        # construct minimal environment
        env = {}
        for k in ['SYSTEMROOT', 'PATH', 'HOME']:
            v = os.environ.get(k)
            if v is not None:
                env[k] = v
        # LANGUAGE is used on win32
        env['LANGUAGE'] = 'C'
        env['LANG'] = 'C'
        env['LC_ALL'] = 'C'
        out = subprocess.Popen(
            cmd, stdout=subprocess.PIPE, env=env).communicate()[0]
        return out
    try:
        out = _minimal_ext_cmd(['git', 'rev-parse', 'HEAD'])
        sha = out.strip().decode('ascii')
    except OSError:
        sha = 'unknown'
    return sha
 def get_hash():
    if os.path.exists('.git'):
        sha = get_git_hash()[:7]
    elif os.path.exists(version_file):
        try:
            from mmseg.version import __version__
            sha = __version__.split('+')[-1]
        except ImportError:
            raise ImportError('Unable to get git version')
    else:
        sha = 'unknown'
    return sha
 def write_version_py():
    content = """# GENERATED VERSION FILE
 # TIME: {}
 __version__ = '{}'
 short_version = '{}'
 version_info = ({})
 """
    sha = get_hash()
    with open('mmseg/VERSION', 'r') as f:
        SHORT_VERSION = f.read().strip()
    VERSION_INFO = ', '.join(SHORT_VERSION.split('.'))
    VERSION = SHORT_VERSION + '+' + sha
    version_file_str = content.format(time.asctime(), VERSION, SHORT_VERSION,
                                      VERSION_INFO)
    with open(version_file, 'w') as f:
        f.write(version_file_str)
 def get_version():
    with open(version_file, 'r') as f:
        exec(compile(f.read(), version_file, 'exec'))
-    import sys
+    return locals()['__version__']
    # return short version for sdist
    if 'sdist' in sys.argv or 'bdist_wheel' in sys.argv:
        return locals()['short_version']
    else:
        return locals()['__version__']
 def parse_requirements(fname='requirements.txt', with_version=True):
@ -155,11 +93,12 @@ def parse_requirements(fname='requirements.txt', with_version=True):
 if __name__ == '__main__':
    write_version_py()
    setup(
        name='mmsegmentation',
        version=get_version(),
        description='Open MMLab Semantic Segmentation Toolbox and Benchmark',
        long_description=readme(),
        long_description_content_type='text/markdown',
        author='MMSegmentation Authors',
        author_email='openmmlab@gmail.com',
        keywords='computer vision, semantic segmentation',
--- a/tests/test_models/test_backbone.py
+++ b/tests/test_models/test_backbone.py
@ -4,7 +4,9 @@ from mmcv.ops import DeformConv2dPack
 from mmcv.utils.parrots_wrapper import _BatchNorm
 from torch.nn.modules import AvgPool2d, GroupNorm
-from mmseg.models.backbones import FastSCNN, ResNet, ResNetV1d, ResNeXt
+from mmseg.models.backbones import (FastSCNN, ResNeSt, ResNet, ResNetV1d,
                                    ResNeXt)
 from mmseg.models.backbones.resnest import Bottleneck as BottleneckS
 from mmseg.models.backbones.resnet import BasicBlock, Bottleneck
 from mmseg.models.backbones.resnext import Bottleneck as BottleneckX
 from mmseg.models.utils import ResLayer
@ -689,3 +691,41 @@ def test_fastscnn_backbone():
    assert feat[1].shape == torch.Size([batch_size, 128, 16, 32])
    # FFM output
    assert feat[2].shape == torch.Size([batch_size, 128, 64, 128])
 def test_resnest_bottleneck():
    with pytest.raises(AssertionError):
        # Style must be in ['pytorch', 'caffe']
        BottleneckS(64, 64, radix=2, reduction_factor=4, style='tensorflow')
    # Test ResNeSt Bottleneck structure
    block = BottleneckS(
        64, 256, radix=2, reduction_factor=4, stride=2, style='pytorch')
    assert block.avd_layer.stride == 2
    assert block.conv2.channels == 256
    # Test ResNeSt Bottleneck forward
    block = BottleneckS(64, 16, radix=2, reduction_factor=4)
    x = torch.randn(2, 64, 56, 56)
    x_out = block(x)
    assert x_out.shape == torch.Size([2, 64, 56, 56])
 def test_resnest_backbone():
    with pytest.raises(KeyError):
        # ResNeSt depth should be in [50, 101, 152, 200]
        ResNeSt(depth=18)
    # Test ResNeSt with radix 2, reduction_factor 4
    model = ResNeSt(
        depth=50, radix=2, reduction_factor=4, out_indices=(0, 1, 2, 3))
    model.init_weights()
    model.train()
    imgs = torch.randn(2, 3, 224, 224)
    feat = model(imgs)
    assert len(feat) == 4
    assert feat[0].shape == torch.Size([2, 256, 56, 56])
    assert feat[1].shape == torch.Size([2, 512, 28, 28])
    assert feat[2].shape == torch.Size([2, 1024, 14, 14])
    assert feat[3].shape == torch.Size([2, 2048, 7, 7])
--- a/tools/pytorch2onnx.py
+++ b/tools/pytorch2onnx.py
@ -0,0 +1,198 @@
 import argparse
 from functools import partial
 import mmcv
 import numpy as np
 import onnxruntime as rt
 import torch
 import torch._C
 import torch.serialization
 from mmcv.onnx import register_extra_symbolics
 from mmcv.runner import load_checkpoint
 from mmseg.models import build_segmentor
 torch.manual_seed(3)
 def _convert_batchnorm(module):
    module_output = module
    if isinstance(module, torch.nn.SyncBatchNorm):
        module_output = torch.nn.BatchNorm2d(module.num_features, module.eps,
                                             module.momentum, module.affine,
                                             module.track_running_stats)
        if module.affine:
            module_output.weight.data = module.weight.data.clone().detach()
            module_output.bias.data = module.bias.data.clone().detach()
            # keep requires_grad unchanged
            module_output.weight.requires_grad = module.weight.requires_grad
            module_output.bias.requires_grad = module.bias.requires_grad
        module_output.running_mean = module.running_mean
        module_output.running_var = module.running_var
        module_output.num_batches_tracked = module.num_batches_tracked
    for name, child in module.named_children():
        module_output.add_module(name, _convert_batchnorm(child))
    del module
    return module_output
 def _demo_mm_inputs(input_shape, num_classes):
    """Create a superset of inputs needed to run test or train batches.
    Args:
        input_shape (tuple):
            input batch dimensions
        num_classes (int):
            number of semantic classes
    """
    (N, C, H, W) = input_shape
    rng = np.random.RandomState(0)
    imgs = rng.rand(*input_shape)
    segs = rng.randint(
        low=0, high=num_classes - 1, size=(N, 1, H, W)).astype(np.uint8)
    img_metas = [{
        'img_shape': (H, W, C),
        'ori_shape': (H, W, C),
        'pad_shape': (H, W, C),
        'filename': '<demo>.png',
        'scale_factor': 1.0,
        'flip': False,
    } for _ in range(N)]
    mm_inputs = {
        'imgs': torch.FloatTensor(imgs).requires_grad_(True),
        'img_metas': img_metas,
        'gt_semantic_seg': torch.LongTensor(segs)
    }
    return mm_inputs
 def pytorch2onnx(model,
                 input_shape,
                 opset_version=11,
                 show=False,
                 output_file='tmp.onnx',
                 verify=False):
    """Export Pytorch model to ONNX model and verify the outputs are same
    between Pytorch and ONNX.
    Args:
        model (nn.Module): Pytorch model we want to export.
        input_shape (tuple): Use this input shape to construct
            the corresponding dummy input and execute the model.
        opset_version (int): The onnx op version. Default: 11.
        show (bool): Whether print the computation graph. Default: False.
        output_file (string): The path to where we store the output ONNX model.
            Default: `tmp.onnx`.
        verify (bool): Whether compare the outputs between Pytorch and ONNX.
            Default: False.
    """
    model.cpu().eval()
    num_classes = model.decode_head.num_classes
    mm_inputs = _demo_mm_inputs(input_shape, num_classes)
    imgs = mm_inputs.pop('imgs')
    img_metas = mm_inputs.pop('img_metas')
    img_list = [img[None, :] for img in imgs]
    img_meta_list = [[img_meta] for img_meta in img_metas]
    # replace original forward function
    origin_forward = model.forward
    model.forward = partial(
        model.forward, img_metas=img_meta_list, return_loss=False)
    register_extra_symbolics(opset_version)
    with torch.no_grad():
        torch.onnx.export(
            model, (img_list, ),
            output_file,
            export_params=True,
            keep_initializers_as_inputs=True,
            verbose=show,
            opset_version=opset_version)
        print(f'Successfully exported ONNX model: {output_file}')
    model.forward = origin_forward
    if verify:
        # check by onnx
        import onnx
        onnx_model = onnx.load(output_file)
        onnx.checker.check_model(onnx_model)
        # check the numerical value
        # get pytorch output
        pytorch_result = model(img_list, img_meta_list, return_loss=False)[0]
        # get onnx output
        input_all = [node.name for node in onnx_model.graph.input]
        input_initializer = [
            node.name for node in onnx_model.graph.initializer
        ]
        net_feed_input = list(set(input_all) - set(input_initializer))
        assert (len(net_feed_input) == 1)
        sess = rt.InferenceSession(output_file)
        onnx_result = sess.run(
            None, {net_feed_input[0]: img_list[0].detach().numpy()})[0]
        if not np.allclose(pytorch_result, onnx_result):
            raise ValueError(
                'The outputs are different between Pytorch and ONNX')
        print('The outputs are same between Pytorch and ONNX')
 def parse_args():
    parser = argparse.ArgumentParser(description='Convert MMDet to ONNX')
    parser.add_argument('config', help='test config file path')
    parser.add_argument('--checkpoint', help='checkpoint file', default=None)
    parser.add_argument('--show', action='store_true', help='show onnx graph')
    parser.add_argument(
        '--verify', action='store_true', help='verify the onnx model')
    parser.add_argument('--output-file', type=str, default='tmp.onnx')
    parser.add_argument('--opset-version', type=int, default=11)
    parser.add_argument(
        '--shape',
        type=int,
        nargs='+',
        default=[256, 256],
        help='input image size')
    args = parser.parse_args()
    return args
 if __name__ == '__main__':
    args = parse_args()
    if len(args.shape) == 1:
        input_shape = (1, 3, args.shape[0], args.shape[0])
    elif len(args.shape) == 2:
        input_shape = (
            1,
            3,
        ) + tuple(args.shape)
    else:
        raise ValueError('invalid input shape')
    cfg = mmcv.Config.fromfile(args.config)
    cfg.model.pretrained = None
    # build the model and load checkpoint
    segmentor = build_segmentor(
        cfg.model, train_cfg=None, test_cfg=cfg.test_cfg)
    # convert SyncBN to BN
    segmentor = _convert_batchnorm(segmentor)
    num_classes = segmentor.decode_head.num_classes
    if args.checkpoint:
        checkpoint = load_checkpoint(
            segmentor, args.checkpoint, map_location='cpu')
    # conver model to onnx file
    pytorch2onnx(
        segmentor,
        input_shape,
        opset_version=args.opset_version,
        show=args.show,
        output_file=args.output_file,
        verify=args.verify)
--- a/tools/train.py
+++ b/tools/train.py
@ -7,7 +7,7 @@ import time
 import mmcv
 import torch
 from mmcv.runner import init_dist
-from mmcv.utils import Config, DictAction
+from mmcv.utils import Config, DictAction, get_git_hash
 from mmseg import __version__
 from mmseg.apis import set_random_seed, train_segmentor
@ -141,7 +141,7 @@ def main():
        # save mmseg version, config file content and class names in
        # checkpoints as meta data
        cfg.checkpoint_config.meta = dict(
-            mmseg_version=__version__,
+            mmseg_version=f'{__version__}+{get_git_hash()[:7]}',
            config=cfg.pretty_text,
            CLASSES=datasets[0].CLASSES,
            PALETTE=datasets[0].PALETTE)