[Fix] Fix MaskFormer and Mask2Former of MMSegmentation (#2532)
## Motivation The DETR-related modules have been refactored in open-mmlab/mmdetection#8763, which causes breakings of MaskFormer and Mask2Former in both MMDetection (has been fixed in open-mmlab/mmdetection#9515) and MMSegmentation. This pr fix the bugs in MMSegmentation. ### TO-DO List - [x] update configs - [x] check and modify data flow - [x] fix unit test - [x] aligning inference - [x] write a ckpt converter - [x] write ckpt update script - [x] update model zoo - [x] update model link in readme - [x] update [faq.md](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/docs/en/notes/faq.md#installation) ## Tips of Fixing other implementations based on MaskXFormer of mmseg 1. The Transformer modules should be built directly. The original building with register manner has been refactored. 2. The config requires to be modified. Delete `type` and modify several keys, according to the modifications in this pr. 3. The `batch_first` is set `True` uniformly in the new implementations. Hence the data flow requires to be transposed and config of `batch_first` needs to be modified. 4. The checkpoint trained on the old implementation should be converted to be used in the new one. ### Convert script ```Python import argparse from copy import deepcopy from collections import OrderedDict import torch from mmengine.config import Config from mmseg.models import build_segmentor from mmseg.utils import register_all_modules register_all_modules(init_default_scope=True) def parse_args(): parser = argparse.ArgumentParser( description='MMSeg convert MaskXFormer model, by Li-Qingyun') parser.add_argument('Mask_what_former', type=int, help='Mask what former, can be a `1` or `2`', choices=[1, 2]) parser.add_argument('CFG_FILE', help='config file path') parser.add_argument('OLD_CKPT_FILEPATH', help='old ckpt file path') parser.add_argument('NEW_CKPT_FILEPATH', help='new ckpt file path') args = parser.parse_args() return args args = parse_args() def get_new_name(old_name: str): new_name = old_name if 'encoder.layers' in new_name: new_name = new_name.replace('attentions.0', 'self_attn') new_name = new_name.replace('ffns.0', 'ffn') if 'decoder.layers' in new_name: if args.Mask_what_former == 2: # for Mask2Former new_name = new_name.replace('attentions.0', 'cross_attn') new_name = new_name.replace('attentions.1', 'self_attn') else: # for Mask2Former new_name = new_name.replace('attentions.0', 'self_attn') new_name = new_name.replace('attentions.1', 'cross_attn') return new_name def cvt_sd(old_sd: OrderedDict): new_sd = OrderedDict() for name, param in old_sd.items(): new_name = get_new_name(name) assert new_name not in new_sd new_sd[new_name] = param assert len(new_sd) == len(old_sd) return new_sd if __name__ == '__main__': cfg = Config.fromfile(args.CFG_FILE) model_cfg = cfg.model segmentor = build_segmentor(model_cfg) refer_sd = segmentor.state_dict() old_ckpt = torch.load(args.OLD_CKPT_FILEPATH) old_sd = old_ckpt['state_dict'] new_sd = cvt_sd(old_sd) print(segmentor.load_state_dict(new_sd)) new_ckpt = deepcopy(old_ckpt) new_ckpt['state_dict'] = new_sd torch.save(new_ckpt, args.NEW_CKPT_FILEPATH) print(f'{args.NEW_CKPT_FILEPATH} has been saved!') ``` Usage: ```bash # for example python ckpt4pr2532.py 1 configs/maskformer/maskformer_r50-d32_8xb2-160k_ade20k-512x512.py original_ckpts/maskformer_r50-d32_8xb2-160k_ade20k-512x512_20221030_182724-cbd39cc1.pth cvt_outputs/maskformer_r50-d32_8xb2-160k_ade20k-512x512_20221030_182724.pth python ckpt4pr2532.py 2 configs/mask2former/mask2former_r50_8xb2-160k_ade20k-512x512.py original_ckpts/mask2former_r50_8xb2-160k_ade20k-512x512_20221204_000055-4c62652d.pth cvt_outputs/mask2former_r50_8xb2-160k_ade20k-512x512_20221204_000055.pth ``` --------- Co-authored-by: MeowZheng <meowzheng@outlook.com>pull/2549/head
parent
124b87ce90
commit
a092fea8c1
|
@ -63,7 +63,7 @@ jobs:
|
|||
pip install -U openmim
|
||||
mim install 'mmcv>=2.0.0rc4'
|
||||
pip install git+https://github.com/open-mmlab/mmclassification@dev-1.x
|
||||
mim install 'mmdet==3.0.0rc5'
|
||||
pip install git+https://github.com/open-mmlab/mmdetection.git@dev-3.x
|
||||
pip install -r requirements/tests.txt -r requirements/optional.txt
|
||||
- run:
|
||||
name: Build and install
|
||||
|
@ -97,6 +97,7 @@ jobs:
|
|||
command: |
|
||||
git clone -b main --depth 1 https://github.com/open-mmlab/mmengine.git /home/circleci/mmengine
|
||||
git clone -b dev-1.x --depth 1 https://github.com/open-mmlab/mmclassification.git /home/circleci/mmclassification
|
||||
git clone -b dev-3.x --depth 1 https://github.com/open-mmlab/mmdetection.git /home/circleci/mmdetection
|
||||
- run:
|
||||
name: Build Docker image
|
||||
command: |
|
||||
|
@ -109,7 +110,7 @@ jobs:
|
|||
docker exec mmseg pip install -U openmim
|
||||
docker exec mmseg mim install 'mmcv>=2.0.0rc4'
|
||||
docker exec mmseg pip install -e /mmclassification
|
||||
docker exec mmseg mim install 'mmdet==3.0.0rc5'
|
||||
docker exec mmseg pip install -e /mmdetection
|
||||
docker exec mmseg pip install -r requirements/tests.txt -r requirements/optional.txt
|
||||
- run:
|
||||
name: Build and install
|
||||
|
|
|
@ -46,7 +46,7 @@ jobs:
|
|||
pip install git+https://github.com/open-mmlab/mmengine.git
|
||||
mim install 'mmcv>=2.0.0rc4'
|
||||
pip install git+https://github.com/open-mmlab/mmclassification.git@dev-1.x
|
||||
mim install 'mmdet==3.0.0rc5'
|
||||
pip install git+https://github.com/open-mmlab/mmdetection.git@dev-3.x
|
||||
- name: Install unittest dependencies
|
||||
run: pip install -r requirements/tests.txt -r requirements/optional.txt
|
||||
- name: Build and install
|
||||
|
@ -102,7 +102,7 @@ jobs:
|
|||
pip install git+https://github.com/open-mmlab/mmengine.git
|
||||
mim install 'mmcv>=2.0.0rc4'
|
||||
pip install git+https://github.com/open-mmlab/mmclassification.git@dev-1.x
|
||||
mim install 'mmdet==3.0.0rc5'
|
||||
pip install git+https://github.com/open-mmlab/mmdetection.git@dev-3.x
|
||||
- name: Install unittest dependencies
|
||||
run: pip install -r requirements/tests.txt -r requirements/optional.txt
|
||||
- name: Build and install
|
||||
|
@ -168,7 +168,7 @@ jobs:
|
|||
pip install git+https://github.com/open-mmlab/mmengine.git
|
||||
mim install 'mmcv>=2.0.0rc4'
|
||||
pip install git+https://github.com/open-mmlab/mmclassification.git@dev-1.x
|
||||
mim install 'mmdet==3.0.0rc5'
|
||||
pip install git+https://github.com/open-mmlab/mmdetection.git@dev-3.x
|
||||
- name: Install unittest dependencies
|
||||
run: pip install -r requirements/tests.txt -r requirements/optional.txt
|
||||
- name: Build and install
|
||||
|
@ -211,7 +211,7 @@ jobs:
|
|||
pip install git+https://github.com/open-mmlab/mmengine.git
|
||||
mim install 'mmcv>=2.0.0rc4'
|
||||
pip install git+https://github.com/open-mmlab/mmclassification.git@dev-1.x
|
||||
mim install 'mmdet==3.0.0rc5'
|
||||
pip install git+https://github.com/open-mmlab/mmdetection.git@dev-3.x
|
||||
- name: Install unittest dependencies
|
||||
run: pip install -r requirements/tests.txt -r requirements/optional.txt
|
||||
- name: Build and install
|
||||
|
@ -246,7 +246,7 @@ jobs:
|
|||
pip install git+https://github.com/open-mmlab/mmengine.git
|
||||
mim install 'mmcv>=2.0.0rc4'
|
||||
pip install git+https://github.com/open-mmlab/mmclassification.git@dev-1.x
|
||||
mim install 'mmdet==3.0.0rc5'
|
||||
pip install git+https://github.com/open-mmlab/mmdetection.git@dev-3.x
|
||||
- name: Install unittest dependencies
|
||||
run: pip install -r requirements/tests.txt -r requirements/optional.txt
|
||||
- name: Build and install
|
||||
|
|
|
@ -46,7 +46,7 @@ jobs:
|
|||
pip install git+https://github.com/open-mmlab/mmengine.git
|
||||
mim install 'mmcv>=2.0.0rc4'
|
||||
pip install git+https://github.com/open-mmlab/mmclassification.git@dev-1.x
|
||||
mim install 'mmdet==3.0.0rc5'
|
||||
pip install git+https://github.com/open-mmlab/mmdetection.git@dev-3.x
|
||||
- name: Install unittest dependencies
|
||||
run: pip install -r requirements/tests.txt -r requirements/optional.txt
|
||||
- name: Build and install
|
||||
|
@ -102,7 +102,7 @@ jobs:
|
|||
pip install git+https://github.com/open-mmlab/mmengine.git
|
||||
mim install 'mmcv>=2.0.0rc4'
|
||||
pip install git+https://github.com/open-mmlab/mmclassification.git@dev-1.x
|
||||
mim install 'mmdet==3.0.0rc5'
|
||||
pip install git+https://github.com/open-mmlab/mmdetection.git@dev-3.x
|
||||
- name: Install unittest dependencies
|
||||
run: pip install -r requirements/tests.txt -r requirements/optional.txt
|
||||
- name: Build and install
|
||||
|
@ -137,7 +137,7 @@ jobs:
|
|||
pip install git+https://github.com/open-mmlab/mmengine.git
|
||||
mim install 'mmcv>=2.0.0rc4'
|
||||
pip install git+https://github.com/open-mmlab/mmclassification.git@dev-1.x
|
||||
mim install 'mmdet==3.0.0rc5'
|
||||
pip install git+https://github.com/open-mmlab/mmdetection.git@dev-3.x
|
||||
- name: Install unittest dependencies
|
||||
run: pip install -r requirements/tests.txt -r requirements/optional.txt
|
||||
- name: Build and install
|
||||
|
|
|
@ -45,24 +45,24 @@ pip install "mmdet>=3.0.0rc4"
|
|||
|
||||
| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download |
|
||||
| ----------- | -------------- | --------- | ------- | -------: | -------------- | ----- | ------------: | -----------------------------------------------------------------------------------------------------------------------------------------------------------: | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| Mask2Former | R-50-D32 | 512x1024 | 90000 | 5806 | 9.17 | 80.44 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/mask2former/mask2former_r50_8xb2-90k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_r50_8xb2-90k_cityscapes-512x1024/mask2former_r50_8xb2-90k_cityscapes-512x1024_20221202_140802-2ff5ffa0.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_r50_8xb2-90k_cityscapes-512x1024/mask2former_r50_8xb2-90k_cityscapes-512x1024_20221202_140802.json) |
|
||||
| Mask2Former | R-101-D32 | 512x1024 | 90000 | 6971 | 7.11 | 80.80 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/mask2former/mask2former_r101_8xb2-90k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_r101_8xb2-90k_cityscapes-512x1024/mask2former_r101_8xb2-90k_cityscapes-512x1024_20221130_031628-8ad528ea.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_r101_8xb2-90k_cityscapes-512x1024/mask2former_r101_8xb2-90k_cityscapes-512x1024_20221130_031628.json)) |
|
||||
| Mask2Former | Swin-T | 512x1024 | 90000 | 6511 | 7.18 | 81.71 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/mask2former/mask2former_swin-t_8xb2-90k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-t_8xb2-90k_cityscapes-512x1024/mask2former_swin-t_8xb2-90k_cityscapes-512x1024_20221127_144501-290b34af.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-t_8xb2-90k_cityscapes-512x1024/mask2former_swin-t_8xb2-90k_cityscapes-512x1024_20221127_144501.json)) |
|
||||
| Mask2Former | Swin-S | 512x1024 | 90000 | 8282 | 5.57 | 82.57 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/mask2former/mask2former_swin-s_8xb2-90k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-s_8xb2-90k_cityscapes-512x1024/mask2former_swin-s_8xb2-90k_cityscapes-512x1024_20221127_143802-7c98854a.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-s_8xb2-90k_cityscapes-512x1024/mask2former_swin-s_8xb2-90k_cityscapes-512x1024_20221127_143802.json)) |
|
||||
| Mask2Former | Swin-B (in22k) | 512x1024 | 90000 | 11152 | 4.32 | 83.52 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/mask2former/mask2former_swin-b-in22k-384x384-pre_8xb2-90k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-b-in22k-384x384-pre_8xb2-90k_cityscapes-512x1024/mask2former_swin-b-in22k-384x384-pre_8xb2-90k_cityscapes-512x1024_20221203_045030-59a4379a.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-b-in22k-384x384-pre_8xb2-90k_cityscapes-512x1024/mask2former_swin-b-in22k-384x384-pre_8xb2-90k_cityscapes-512x1024_20221203_045030.json)) |
|
||||
| Mask2Former | Swin-L (in22k) | 512x1024 | 90000 | 16207 | 2.86 | 83.65 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/mask2former/mask2former_swin-l-in22k-384x384-pre_8xb2-90k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-l-in22k-384x384-pre_8xb2-90k_cityscapes-512x1024/mask2former_swin-l-in22k-384x384-pre_8xb2-90k_cityscapes-512x1024_20221202_141901-dc2c2ddd.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-l-in22k-384x384-pre_8xb2-90k_cityscapes-512x1024/mask2former_swin-l-in22k-384x384-pre_8xb2-90k_cityscapes-512x1024_20221202_141901.json)) |
|
||||
| Mask2Former | R-50-D32 | 512x1024 | 90000 | 5806 | 9.17 | 80.44 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/mask2former/mask2former_r50_8xb2-90k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_r50_8xb2-90k_cityscapes-512x1024/mask2former_r50_8xb2-90k_cityscapes-512x1024_20221202_140802-ffd9d750.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_r50_8xb2-90k_cityscapes-512x1024/mask2former_r50_8xb2-90k_cityscapes-512x1024_20221202_140802.json) |
|
||||
| Mask2Former | R-101-D32 | 512x1024 | 90000 | 6971 | 7.11 | 80.80 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/mask2former/mask2former_r101_8xb2-90k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_r101_8xb2-90k_cityscapes-512x1024/mask2former_r101_8xb2-90k_cityscapes-512x1024_20221130_031628-43e68666.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_r101_8xb2-90k_cityscapes-512x1024/mask2former_r101_8xb2-90k_cityscapes-512x1024_20221130_031628.json)) |
|
||||
| Mask2Former | Swin-T | 512x1024 | 90000 | 6511 | 7.18 | 81.71 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/mask2former/mask2former_swin-t_8xb2-90k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-t_8xb2-90k_cityscapes-512x1024/mask2former_swin-t_8xb2-90k_cityscapes-512x1024_20221127_144501-36c59341.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-t_8xb2-90k_cityscapes-512x1024/mask2former_swin-t_8xb2-90k_cityscapes-512x1024_20221127_144501.json)) |
|
||||
| Mask2Former | Swin-S | 512x1024 | 90000 | 8282 | 5.57 | 82.57 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/mask2former/mask2former_swin-s_8xb2-90k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-s_8xb2-90k_cityscapes-512x1024/mask2former_swin-s_8xb2-90k_cityscapes-512x1024_20221127_143802-9ab177f6.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-s_8xb2-90k_cityscapes-512x1024/mask2former_swin-s_8xb2-90k_cityscapes-512x1024_20221127_143802.json)) |
|
||||
| Mask2Former | Swin-B (in22k) | 512x1024 | 90000 | 11152 | 4.32 | 83.52 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/mask2former/mask2former_swin-b-in22k-384x384-pre_8xb2-90k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-b-in22k-384x384-pre_8xb2-90k_cityscapes-512x1024/mask2former_swin-b-in22k-384x384-pre_8xb2-90k_cityscapes-512x1024_20221203_045030-9a86a225.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-b-in22k-384x384-pre_8xb2-90k_cityscapes-512x1024/mask2former_swin-b-in22k-384x384-pre_8xb2-90k_cityscapes-512x1024_20221203_045030.json)) |
|
||||
| Mask2Former | Swin-L (in22k) | 512x1024 | 90000 | 16207 | 2.86 | 83.65 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/mask2former/mask2former_swin-l-in22k-384x384-pre_8xb2-90k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-l-in22k-384x384-pre_8xb2-90k_cityscapes-512x1024/mask2former_swin-l-in22k-384x384-pre_8xb2-90k_cityscapes-512x1024_20221202_141901-28ad20f1.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-l-in22k-384x384-pre_8xb2-90k_cityscapes-512x1024/mask2former_swin-l-in22k-384x384-pre_8xb2-90k_cityscapes-512x1024_20221202_141901.json)) |
|
||||
|
||||
### ADE20K
|
||||
|
||||
| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download |
|
||||
| ----------- | -------------- | --------- | ------- | -------: | -------------- | ----- | ------------: | -------------------------------------------------------------------------------------------------------------------------------------------------------: | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| Mask2Former | R-50-D32 | 512x512 | 160000 | 3385 | 26.59 | 47.87 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/mask2former/mask2former_r50_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_r50_8xb2-160k_ade20k-512x512/mask2former_r50_8xb2-160k_ade20k-512x512_20221204_000055-4c62652d.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_r50_8xb2-160k_ade20k-512x512/mask2former_r50_8xb2-160k_ade20k-512x512_20221204_000055.json)) |
|
||||
| Mask2Former | R-101-D32 | 512x512 | 160000 | 4190 | 22.97 | 48.60 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/mask2former/mask2former_r101_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_r101_8xb2-160k_ade20k-512x512/mask2former_r101_8xb2-160k_ade20k-512x512_20221203_233905-b1169bc0.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_r101_8xb2-160k_ade20k-512x512/mask2former_r101_8xb2-160k_ade20k-512x512_20221203_233905.json)) |
|
||||
| Mask2Former | Swin-T | 512x512 | 160000 | 3826 | 23.82 | 48.66 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/mask2former/mask2former_swin-t_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-t_8xb2-160k_ade20k-512x512/mask2former_swin-t_8xb2-160k_ade20k-512x512_20221203_234230-4341520b.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-t_8xb2-160k_ade20k-512x512/mask2former_swin-t_8xb2-160k_ade20k-512x512_20221203_234230.json)) |
|
||||
| Mask2Former | Swin-S | 512x512 | 160000 | 5034 | 19.69 | 51.24 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/mask2former/mask2former_swin-s_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-s_8xb2-160k_ade20k-512x512/mask2former_swin-s_8xb2-160k_ade20k-512x512_20221204_143905-ab263c11.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-s_8xb2-160k_ade20k-512x512/mask2former_swin-s_8xb2-160k_ade20k-512x512_20221204_143905.json)) |
|
||||
| Mask2Former | Swin-B | 640x640 | 160000 | 5795 | 12.48 | 52.44 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/mask2former/mask2former_swin-b-in1k-384x384-pre_8xb2-160k_ade20k-640x640.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-b-in1k-384x384-pre_8xb2-160k_ade20k-640x640/mask2former_swin-b-in1k-384x384-pre_8xb2-160k_ade20k-640x640_20221129_125118-35e3a2c7.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-b-in1k-384x384-pre_8xb2-160k_ade20k-640x640/mask2former_swin-b-in1k-384x384-pre_8xb2-160k_ade20k-640x640_20221129_125118.json)) |
|
||||
| Mask2Former | Swin-B (in22k) | 640x640 | 160000 | 5795 | 12.43 | 53.90 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/mask2former/mask2former_swin-b-in22k-384x384-pre_8xb2-160k_ade20k-640x640.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-b-in22k-384x384-pre_8xb2-160k_ade20k-640x640/mask2former_swin-b-in22k-384x384-pre_8xb2-160k_ade20k-640x640_20221203_235230-622e093b.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-b-in22k-384x384-pre_8xb2-160k_ade20k-640x640/mask2former_swin-b-in22k-384x384-pre_8xb2-160k_ade20k-640x640_20221203_235230.json)) |
|
||||
| Mask2Former | Swin-L (in22k) | 640x640 | 160000 | 9077 | 8.81 | 56.01 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/mask2former/mask2former_swin-l-in22k-384x384-pre_8xb2-160k_ade20k-640x640.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-l-in22k-384x384-pre_8xb2-160k_ade20k-640x640/mask2former_swin-l-in22k-384x384-pre_8xb2-160k_ade20k-640x640_20221203_235933-5cc76a78.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-l-in22k-384x384-pre_8xb2-160k_ade20k-640x640/mask2former_swin-l-in22k-384x384-pre_8xb2-160k_ade20k-640x640_20221203_235933.json)) |
|
||||
| Mask2Former | R-50-D32 | 512x512 | 160000 | 3385 | 26.59 | 47.87 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/mask2former/mask2former_r50_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_r50_8xb2-160k_ade20k-512x512/mask2former_r50_8xb2-160k_ade20k-512x512_20221204_000055-2d1f55f1.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_r50_8xb2-160k_ade20k-512x512/mask2former_r50_8xb2-160k_ade20k-512x512_20221204_000055.json)) |
|
||||
| Mask2Former | R-101-D32 | 512x512 | 160000 | 4190 | 22.97 | 48.60 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/mask2former/mask2former_r101_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_r101_8xb2-160k_ade20k-512x512/mask2former_r101_8xb2-160k_ade20k-512x512_20221203_233905-b7135890.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_r101_8xb2-160k_ade20k-512x512/mask2former_r101_8xb2-160k_ade20k-512x512_20221203_233905.json)) |
|
||||
| Mask2Former | Swin-T | 512x512 | 160000 | 3826 | 23.82 | 48.66 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/mask2former/mask2former_swin-t_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-t_8xb2-160k_ade20k-512x512/mask2former_swin-t_8xb2-160k_ade20k-512x512_20221203_234230-7d64e5dd.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-t_8xb2-160k_ade20k-512x512/mask2former_swin-t_8xb2-160k_ade20k-512x512_20221203_234230.json)) |
|
||||
| Mask2Former | Swin-S | 512x512 | 160000 | 5034 | 19.69 | 51.24 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/mask2former/mask2former_swin-s_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-s_8xb2-160k_ade20k-512x512/mask2former_swin-s_8xb2-160k_ade20k-512x512_20221204_143905-e715144e.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-s_8xb2-160k_ade20k-512x512/mask2former_swin-s_8xb2-160k_ade20k-512x512_20221204_143905.json)) |
|
||||
| Mask2Former | Swin-B | 640x640 | 160000 | 5795 | 12.48 | 52.44 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/mask2former/mask2former_swin-b-in1k-384x384-pre_8xb2-160k_ade20k-640x640.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-b-in1k-384x384-pre_8xb2-160k_ade20k-640x640/mask2former_swin-b-in1k-384x384-pre_8xb2-160k_ade20k-640x640_20221129_125118-a4a086d2.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-b-in1k-384x384-pre_8xb2-160k_ade20k-640x640/mask2former_swin-b-in1k-384x384-pre_8xb2-160k_ade20k-640x640_20221129_125118.json)) |
|
||||
| Mask2Former | Swin-B (in22k) | 640x640 | 160000 | 5795 | 12.43 | 53.90 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/mask2former/mask2former_swin-b-in22k-384x384-pre_8xb2-160k_ade20k-640x640.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-b-in22k-384x384-pre_8xb2-160k_ade20k-640x640/mask2former_swin-b-in22k-384x384-pre_8xb2-160k_ade20k-640x640_20221203_235230-7ec0f569.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-b-in22k-384x384-pre_8xb2-160k_ade20k-640x640/mask2former_swin-b-in22k-384x384-pre_8xb2-160k_ade20k-640x640_20221203_235230.json)) |
|
||||
| Mask2Former | Swin-L (in22k) | 640x640 | 160000 | 9077 | 8.81 | 56.01 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/mask2former/mask2former_swin-l-in22k-384x384-pre_8xb2-160k_ade20k-640x640.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-l-in22k-384x384-pre_8xb2-160k_ade20k-640x640/mask2former_swin-l-in22k-384x384-pre_8xb2-160k_ade20k-640x640_20221203_235933-7120c214.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-l-in22k-384x384-pre_8xb2-160k_ade20k-640x640/mask2former_swin-l-in22k-384x384-pre_8xb2-160k_ade20k-640x640_20221203_235933.json)) |
|
||||
|
||||
Note:
|
||||
|
||||
|
|
|
@ -35,7 +35,7 @@ Models:
|
|||
Metrics:
|
||||
mIoU: 80.44
|
||||
Config: configs/mask2former/mask2former_r50_8xb2-90k_cityscapes-512x1024.py
|
||||
Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_r50_8xb2-90k_cityscapes-512x1024/mask2former_r50_8xb2-90k_cityscapes-512x1024_20221202_140802-2ff5ffa0.pth
|
||||
Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_r50_8xb2-90k_cityscapes-512x1024/mask2former_r50_8xb2-90k_cityscapes-512x1024_20221202_140802-ffd9d750.pth
|
||||
- Name: mask2former_r101_8xb2-90k_cityscapes-512x1024
|
||||
In Collection: Mask2Former
|
||||
Metadata:
|
||||
|
@ -56,7 +56,7 @@ Models:
|
|||
Metrics:
|
||||
mIoU: 80.8
|
||||
Config: configs/mask2former/mask2former_r101_8xb2-90k_cityscapes-512x1024.py
|
||||
Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_r101_8xb2-90k_cityscapes-512x1024/mask2former_r101_8xb2-90k_cityscapes-512x1024_20221130_031628-8ad528ea.pth
|
||||
Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_r101_8xb2-90k_cityscapes-512x1024/mask2former_r101_8xb2-90k_cityscapes-512x1024_20221130_031628-43e68666.pth
|
||||
- Name: mask2former_swin-t_8xb2-90k_cityscapes-512x1024
|
||||
In Collection: Mask2Former
|
||||
Metadata:
|
||||
|
@ -77,7 +77,7 @@ Models:
|
|||
Metrics:
|
||||
mIoU: 81.71
|
||||
Config: configs/mask2former/mask2former_swin-t_8xb2-90k_cityscapes-512x1024.py
|
||||
Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-t_8xb2-90k_cityscapes-512x1024/mask2former_swin-t_8xb2-90k_cityscapes-512x1024_20221127_144501-290b34af.pth
|
||||
Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-t_8xb2-90k_cityscapes-512x1024/mask2former_swin-t_8xb2-90k_cityscapes-512x1024_20221127_144501-36c59341.pth
|
||||
- Name: mask2former_swin-s_8xb2-90k_cityscapes-512x1024
|
||||
In Collection: Mask2Former
|
||||
Metadata:
|
||||
|
@ -98,7 +98,7 @@ Models:
|
|||
Metrics:
|
||||
mIoU: 82.57
|
||||
Config: configs/mask2former/mask2former_swin-s_8xb2-90k_cityscapes-512x1024.py
|
||||
Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-s_8xb2-90k_cityscapes-512x1024/mask2former_swin-s_8xb2-90k_cityscapes-512x1024_20221127_143802-7c98854a.pth
|
||||
Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-s_8xb2-90k_cityscapes-512x1024/mask2former_swin-s_8xb2-90k_cityscapes-512x1024_20221127_143802-9ab177f6.pth
|
||||
- Name: mask2former_swin-b-in22k-384x384-pre_8xb2-90k_cityscapes-512x1024
|
||||
In Collection: Mask2Former
|
||||
Metadata:
|
||||
|
@ -119,7 +119,7 @@ Models:
|
|||
Metrics:
|
||||
mIoU: 83.52
|
||||
Config: configs/mask2former/mask2former_swin-b-in22k-384x384-pre_8xb2-90k_cityscapes-512x1024.py
|
||||
Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-b-in22k-384x384-pre_8xb2-90k_cityscapes-512x1024/mask2former_swin-b-in22k-384x384-pre_8xb2-90k_cityscapes-512x1024_20221203_045030-59a4379a.pth
|
||||
Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-b-in22k-384x384-pre_8xb2-90k_cityscapes-512x1024/mask2former_swin-b-in22k-384x384-pre_8xb2-90k_cityscapes-512x1024_20221203_045030-9a86a225.pth
|
||||
- Name: mask2former_swin-l-in22k-384x384-pre_8xb2-90k_cityscapes-512x1024
|
||||
In Collection: Mask2Former
|
||||
Metadata:
|
||||
|
@ -140,7 +140,7 @@ Models:
|
|||
Metrics:
|
||||
mIoU: 83.65
|
||||
Config: configs/mask2former/mask2former_swin-l-in22k-384x384-pre_8xb2-90k_cityscapes-512x1024.py
|
||||
Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-l-in22k-384x384-pre_8xb2-90k_cityscapes-512x1024/mask2former_swin-l-in22k-384x384-pre_8xb2-90k_cityscapes-512x1024_20221202_141901-dc2c2ddd.pth
|
||||
Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-l-in22k-384x384-pre_8xb2-90k_cityscapes-512x1024/mask2former_swin-l-in22k-384x384-pre_8xb2-90k_cityscapes-512x1024_20221202_141901-28ad20f1.pth
|
||||
- Name: mask2former_r50_8xb2-160k_ade20k-512x512
|
||||
In Collection: Mask2Former
|
||||
Metadata:
|
||||
|
@ -161,7 +161,7 @@ Models:
|
|||
Metrics:
|
||||
mIoU: 47.87
|
||||
Config: configs/mask2former/mask2former_r50_8xb2-160k_ade20k-512x512.py
|
||||
Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_r50_8xb2-160k_ade20k-512x512/mask2former_r50_8xb2-160k_ade20k-512x512_20221204_000055-4c62652d.pth
|
||||
Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_r50_8xb2-160k_ade20k-512x512/mask2former_r50_8xb2-160k_ade20k-512x512_20221204_000055-2d1f55f1.pth
|
||||
- Name: mask2former_r101_8xb2-160k_ade20k-512x512
|
||||
In Collection: Mask2Former
|
||||
Metadata:
|
||||
|
@ -182,7 +182,7 @@ Models:
|
|||
Metrics:
|
||||
mIoU: 48.6
|
||||
Config: configs/mask2former/mask2former_r101_8xb2-160k_ade20k-512x512.py
|
||||
Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_r101_8xb2-160k_ade20k-512x512/mask2former_r101_8xb2-160k_ade20k-512x512_20221203_233905-b1169bc0.pth
|
||||
Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_r101_8xb2-160k_ade20k-512x512/mask2former_r101_8xb2-160k_ade20k-512x512_20221203_233905-b7135890.pth
|
||||
- Name: mask2former_swin-t_8xb2-160k_ade20k-512x512
|
||||
In Collection: Mask2Former
|
||||
Metadata:
|
||||
|
@ -203,7 +203,7 @@ Models:
|
|||
Metrics:
|
||||
mIoU: 48.66
|
||||
Config: configs/mask2former/mask2former_swin-t_8xb2-160k_ade20k-512x512.py
|
||||
Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-t_8xb2-160k_ade20k-512x512/mask2former_swin-t_8xb2-160k_ade20k-512x512_20221203_234230-4341520b.pth
|
||||
Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-t_8xb2-160k_ade20k-512x512/mask2former_swin-t_8xb2-160k_ade20k-512x512_20221203_234230-7d64e5dd.pth
|
||||
- Name: mask2former_swin-s_8xb2-160k_ade20k-512x512
|
||||
In Collection: Mask2Former
|
||||
Metadata:
|
||||
|
@ -224,7 +224,7 @@ Models:
|
|||
Metrics:
|
||||
mIoU: 51.24
|
||||
Config: configs/mask2former/mask2former_swin-s_8xb2-160k_ade20k-512x512.py
|
||||
Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-s_8xb2-160k_ade20k-512x512/mask2former_swin-s_8xb2-160k_ade20k-512x512_20221204_143905-ab263c11.pth
|
||||
Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-s_8xb2-160k_ade20k-512x512/mask2former_swin-s_8xb2-160k_ade20k-512x512_20221204_143905-e715144e.pth
|
||||
- Name: mask2former_swin-b-in1k-384x384-pre_8xb2-160k_ade20k-640x640
|
||||
In Collection: Mask2Former
|
||||
Metadata:
|
||||
|
@ -245,7 +245,7 @@ Models:
|
|||
Metrics:
|
||||
mIoU: 52.44
|
||||
Config: configs/mask2former/mask2former_swin-b-in1k-384x384-pre_8xb2-160k_ade20k-640x640.py
|
||||
Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-b-in1k-384x384-pre_8xb2-160k_ade20k-640x640/mask2former_swin-b-in1k-384x384-pre_8xb2-160k_ade20k-640x640_20221129_125118-35e3a2c7.pth
|
||||
Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-b-in1k-384x384-pre_8xb2-160k_ade20k-640x640/mask2former_swin-b-in1k-384x384-pre_8xb2-160k_ade20k-640x640_20221129_125118-a4a086d2.pth
|
||||
- Name: mask2former_swin-b-in22k-384x384-pre_8xb2-160k_ade20k-640x640
|
||||
In Collection: Mask2Former
|
||||
Metadata:
|
||||
|
@ -266,7 +266,7 @@ Models:
|
|||
Metrics:
|
||||
mIoU: 53.9
|
||||
Config: configs/mask2former/mask2former_swin-b-in22k-384x384-pre_8xb2-160k_ade20k-640x640.py
|
||||
Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-b-in22k-384x384-pre_8xb2-160k_ade20k-640x640/mask2former_swin-b-in22k-384x384-pre_8xb2-160k_ade20k-640x640_20221203_235230-622e093b.pth
|
||||
Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-b-in22k-384x384-pre_8xb2-160k_ade20k-640x640/mask2former_swin-b-in22k-384x384-pre_8xb2-160k_ade20k-640x640_20221203_235230-7ec0f569.pth
|
||||
- Name: mask2former_swin-l-in22k-384x384-pre_8xb2-160k_ade20k-640x640
|
||||
In Collection: Mask2Former
|
||||
Metadata:
|
||||
|
@ -287,4 +287,4 @@ Models:
|
|||
Metrics:
|
||||
mIoU: 56.01
|
||||
Config: configs/mask2former/mask2former_swin-l-in22k-384x384-pre_8xb2-160k_ade20k-640x640.py
|
||||
Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-l-in22k-384x384-pre_8xb2-160k_ade20k-640x640/mask2former_swin-l-in22k-384x384-pre_8xb2-160k_ade20k-640x640_20221203_235933-5cc76a78.pth
|
||||
Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-l-in22k-384x384-pre_8xb2-160k_ade20k-640x640/mask2former_swin-l-in22k-384x384-pre_8xb2-160k_ade20k-640x640_20221203_235933-7120c214.pth
|
||||
|
|
|
@ -41,65 +41,58 @@ model = dict(
|
|||
num_outs=3,
|
||||
norm_cfg=dict(type='GN', num_groups=32),
|
||||
act_cfg=dict(type='ReLU'),
|
||||
encoder=dict(
|
||||
type='mmdet.DetrTransformerEncoder',
|
||||
encoder=dict( # DeformableDetrTransformerEncoder
|
||||
num_layers=6,
|
||||
transformerlayers=dict(
|
||||
type='mmdet.BaseTransformerLayer',
|
||||
attn_cfgs=dict(
|
||||
type='mmdet.MultiScaleDeformableAttention',
|
||||
layer_cfg=dict( # DeformableDetrTransformerEncoderLayer
|
||||
self_attn_cfg=dict( # MultiScaleDeformableAttention
|
||||
embed_dims=256,
|
||||
num_heads=8,
|
||||
num_levels=3,
|
||||
num_points=4,
|
||||
im2col_step=64,
|
||||
dropout=0.0,
|
||||
batch_first=False,
|
||||
batch_first=True,
|
||||
norm_cfg=None,
|
||||
init_cfg=None),
|
||||
ffn_cfgs=dict(
|
||||
type='FFN',
|
||||
ffn_cfg=dict(
|
||||
embed_dims=256,
|
||||
feedforward_channels=1024,
|
||||
num_fcs=2,
|
||||
ffn_drop=0.0,
|
||||
act_cfg=dict(type='ReLU', inplace=True)),
|
||||
operation_order=('self_attn', 'norm', 'ffn', 'norm')),
|
||||
act_cfg=dict(type='ReLU', inplace=True))),
|
||||
init_cfg=None),
|
||||
positional_encoding=dict(
|
||||
type='mmdet.SinePositionalEncoding',
|
||||
num_feats=128,
|
||||
normalize=True),
|
||||
positional_encoding=dict( # SinePositionalEncoding
|
||||
num_feats=128, normalize=True),
|
||||
init_cfg=None),
|
||||
enforce_decoder_input_project=False,
|
||||
positional_encoding=dict(
|
||||
type='mmdet.SinePositionalEncoding', num_feats=128,
|
||||
normalize=True),
|
||||
transformer_decoder=dict(
|
||||
type='mmdet.DetrTransformerDecoder',
|
||||
positional_encoding=dict( # SinePositionalEncoding
|
||||
num_feats=128, normalize=True),
|
||||
transformer_decoder=dict( # Mask2FormerTransformerDecoder
|
||||
return_intermediate=True,
|
||||
num_layers=9,
|
||||
transformerlayers=dict(
|
||||
type='mmdet.DetrTransformerDecoderLayer',
|
||||
attn_cfgs=dict(
|
||||
type='mmdet.MultiheadAttention',
|
||||
layer_cfg=dict( # Mask2FormerTransformerDecoderLayer
|
||||
self_attn_cfg=dict( # MultiheadAttention
|
||||
embed_dims=256,
|
||||
num_heads=8,
|
||||
attn_drop=0.0,
|
||||
proj_drop=0.0,
|
||||
dropout_layer=None,
|
||||
batch_first=False),
|
||||
ffn_cfgs=dict(
|
||||
batch_first=True),
|
||||
cross_attn_cfg=dict( # MultiheadAttention
|
||||
embed_dims=256,
|
||||
num_heads=8,
|
||||
attn_drop=0.0,
|
||||
proj_drop=0.0,
|
||||
dropout_layer=None,
|
||||
batch_first=True),
|
||||
ffn_cfg=dict(
|
||||
embed_dims=256,
|
||||
feedforward_channels=2048,
|
||||
num_fcs=2,
|
||||
act_cfg=dict(type='ReLU', inplace=True),
|
||||
ffn_drop=0.0,
|
||||
dropout_layer=None,
|
||||
add_identity=True),
|
||||
feedforward_channels=2048,
|
||||
operation_order=('cross_attn', 'norm', 'self_attn', 'norm',
|
||||
'ffn', 'norm')),
|
||||
add_identity=True)),
|
||||
init_cfg=None),
|
||||
loss_cls=dict(
|
||||
type='mmdet.CrossEntropyLoss',
|
||||
|
|
|
@ -41,65 +41,58 @@ model = dict(
|
|||
num_outs=3,
|
||||
norm_cfg=dict(type='GN', num_groups=32),
|
||||
act_cfg=dict(type='ReLU'),
|
||||
encoder=dict(
|
||||
type='mmdet.DetrTransformerEncoder',
|
||||
encoder=dict( # DeformableDetrTransformerEncoder
|
||||
num_layers=6,
|
||||
transformerlayers=dict(
|
||||
type='mmdet.BaseTransformerLayer',
|
||||
attn_cfgs=dict(
|
||||
type='mmdet.MultiScaleDeformableAttention',
|
||||
layer_cfg=dict( # DeformableDetrTransformerEncoderLayer
|
||||
self_attn_cfg=dict( # MultiScaleDeformableAttention
|
||||
embed_dims=256,
|
||||
num_heads=8,
|
||||
num_levels=3,
|
||||
num_points=4,
|
||||
im2col_step=64,
|
||||
dropout=0.0,
|
||||
batch_first=False,
|
||||
batch_first=True,
|
||||
norm_cfg=None,
|
||||
init_cfg=None),
|
||||
ffn_cfgs=dict(
|
||||
type='FFN',
|
||||
ffn_cfg=dict(
|
||||
embed_dims=256,
|
||||
feedforward_channels=1024,
|
||||
num_fcs=2,
|
||||
ffn_drop=0.0,
|
||||
act_cfg=dict(type='ReLU', inplace=True)),
|
||||
operation_order=('self_attn', 'norm', 'ffn', 'norm')),
|
||||
act_cfg=dict(type='ReLU', inplace=True))),
|
||||
init_cfg=None),
|
||||
positional_encoding=dict(
|
||||
type='mmdet.SinePositionalEncoding',
|
||||
num_feats=128,
|
||||
normalize=True),
|
||||
positional_encoding=dict( # SinePositionalEncoding
|
||||
num_feats=128, normalize=True),
|
||||
init_cfg=None),
|
||||
enforce_decoder_input_project=False,
|
||||
positional_encoding=dict(
|
||||
type='mmdet.SinePositionalEncoding', num_feats=128,
|
||||
normalize=True),
|
||||
transformer_decoder=dict(
|
||||
type='mmdet.DetrTransformerDecoder',
|
||||
positional_encoding=dict( # SinePositionalEncoding
|
||||
num_feats=128, normalize=True),
|
||||
transformer_decoder=dict( # Mask2FormerTransformerDecoder
|
||||
return_intermediate=True,
|
||||
num_layers=9,
|
||||
transformerlayers=dict(
|
||||
type='mmdet.DetrTransformerDecoderLayer',
|
||||
attn_cfgs=dict(
|
||||
type='mmdet.MultiheadAttention',
|
||||
layer_cfg=dict( # Mask2FormerTransformerDecoderLayer
|
||||
self_attn_cfg=dict( # MultiheadAttention
|
||||
embed_dims=256,
|
||||
num_heads=8,
|
||||
attn_drop=0.0,
|
||||
proj_drop=0.0,
|
||||
dropout_layer=None,
|
||||
batch_first=False),
|
||||
ffn_cfgs=dict(
|
||||
batch_first=True),
|
||||
cross_attn_cfg=dict( # MultiheadAttention
|
||||
embed_dims=256,
|
||||
num_heads=8,
|
||||
attn_drop=0.0,
|
||||
proj_drop=0.0,
|
||||
dropout_layer=None,
|
||||
batch_first=True),
|
||||
ffn_cfg=dict(
|
||||
embed_dims=256,
|
||||
feedforward_channels=2048,
|
||||
num_fcs=2,
|
||||
act_cfg=dict(type='ReLU', inplace=True),
|
||||
ffn_drop=0.0,
|
||||
dropout_layer=None,
|
||||
add_identity=True),
|
||||
feedforward_channels=2048,
|
||||
operation_order=('cross_attn', 'norm', 'self_attn', 'norm',
|
||||
'ffn', 'norm')),
|
||||
add_identity=True)),
|
||||
init_cfg=None),
|
||||
loss_cls=dict(
|
||||
type='mmdet.CrossEntropyLoss',
|
||||
|
|
|
@ -53,65 +53,58 @@ model = dict(
|
|||
num_outs=3,
|
||||
norm_cfg=dict(type='GN', num_groups=32),
|
||||
act_cfg=dict(type='ReLU'),
|
||||
encoder=dict(
|
||||
type='mmdet.DetrTransformerEncoder',
|
||||
encoder=dict( # DeformableDetrTransformerEncoder
|
||||
num_layers=6,
|
||||
transformerlayers=dict(
|
||||
type='mmdet.BaseTransformerLayer',
|
||||
attn_cfgs=dict(
|
||||
type='mmdet.MultiScaleDeformableAttention',
|
||||
layer_cfg=dict( # DeformableDetrTransformerEncoderLayer
|
||||
self_attn_cfg=dict( # MultiScaleDeformableAttention
|
||||
embed_dims=256,
|
||||
num_heads=8,
|
||||
num_levels=3,
|
||||
num_points=4,
|
||||
im2col_step=64,
|
||||
dropout=0.0,
|
||||
batch_first=False,
|
||||
batch_first=True,
|
||||
norm_cfg=None,
|
||||
init_cfg=None),
|
||||
ffn_cfgs=dict(
|
||||
type='FFN',
|
||||
ffn_cfg=dict(
|
||||
embed_dims=256,
|
||||
feedforward_channels=1024,
|
||||
num_fcs=2,
|
||||
ffn_drop=0.0,
|
||||
act_cfg=dict(type='ReLU', inplace=True)),
|
||||
operation_order=('self_attn', 'norm', 'ffn', 'norm')),
|
||||
act_cfg=dict(type='ReLU', inplace=True))),
|
||||
init_cfg=None),
|
||||
positional_encoding=dict(
|
||||
type='mmdet.SinePositionalEncoding',
|
||||
num_feats=128,
|
||||
normalize=True),
|
||||
positional_encoding=dict( # SinePositionalEncoding
|
||||
num_feats=128, normalize=True),
|
||||
init_cfg=None),
|
||||
enforce_decoder_input_project=False,
|
||||
positional_encoding=dict(
|
||||
type='mmdet.SinePositionalEncoding', num_feats=128,
|
||||
normalize=True),
|
||||
transformer_decoder=dict(
|
||||
type='mmdet.DetrTransformerDecoder',
|
||||
positional_encoding=dict( # SinePositionalEncoding
|
||||
num_feats=128, normalize=True),
|
||||
transformer_decoder=dict( # Mask2FormerTransformerDecoder
|
||||
return_intermediate=True,
|
||||
num_layers=9,
|
||||
transformerlayers=dict(
|
||||
type='mmdet.DetrTransformerDecoderLayer',
|
||||
attn_cfgs=dict(
|
||||
type='mmdet.MultiheadAttention',
|
||||
layer_cfg=dict( # Mask2FormerTransformerDecoderLayer
|
||||
self_attn_cfg=dict( # MultiheadAttention
|
||||
embed_dims=256,
|
||||
num_heads=8,
|
||||
attn_drop=0.0,
|
||||
proj_drop=0.0,
|
||||
dropout_layer=None,
|
||||
batch_first=False),
|
||||
ffn_cfgs=dict(
|
||||
batch_first=True),
|
||||
cross_attn_cfg=dict( # MultiheadAttention
|
||||
embed_dims=256,
|
||||
num_heads=8,
|
||||
attn_drop=0.0,
|
||||
proj_drop=0.0,
|
||||
dropout_layer=None,
|
||||
batch_first=True),
|
||||
ffn_cfg=dict(
|
||||
embed_dims=256,
|
||||
feedforward_channels=2048,
|
||||
num_fcs=2,
|
||||
act_cfg=dict(type='ReLU', inplace=True),
|
||||
ffn_drop=0.0,
|
||||
dropout_layer=None,
|
||||
add_identity=True),
|
||||
feedforward_channels=2048,
|
||||
operation_order=('cross_attn', 'norm', 'self_attn', 'norm',
|
||||
'ffn', 'norm')),
|
||||
add_identity=True)),
|
||||
init_cfg=None),
|
||||
loss_cls=dict(
|
||||
type='mmdet.CrossEntropyLoss',
|
||||
|
|
|
@ -47,10 +47,10 @@ pip install "mmdet>=3.0.0rc4"
|
|||
|
||||
| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download |
|
||||
| ---------- | --------- | --------- | ------- | -------- | -------------- | ----- | ------------- | -------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
|
||||
| MaskFormer | R-50-D32 | 512x512 | 160000 | 3.29 | 42.20 | 44.29 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/maskformer/maskformer_r50-d32_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/maskformer/maskformer_r50-d32_8xb2-160k_ade20k-512x512/maskformer_r50-d32_8xb2-160k_ade20k-512x512_20221030_182724-cbd39cc1.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/maskformer/maskformer_r50-d32_8xb2-160k_ade20k-512x512/maskformer_r50-d32_8xb2-160k_ade20k-512x512_20221030_182724.json) |
|
||||
| MaskFormer | R-101-D32 | 512x512 | 160000 | 4.12 | 34.90 | 45.11 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/maskformer/maskformer_r101-d32_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/maskformer/maskformer_r101-d32_8xb2-160k_ade20k-512x512/maskformer_r101-d32_8xb2-160k_ade20k-512x512_20221031_223053-c8e0931d.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/maskformer/maskformer_r101-d32_8xb2-160k_ade20k-512x512/maskformer_r101-d32_8xb2-160k_ade20k-512x512_20221031_223053.json) |
|
||||
| MaskFormer | Swin-T | 512x512 | 160000 | 3.73 | 40.53 | 46.69 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/maskformer/maskformer_swin-t_upernet_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/maskformer/maskformer_swin-t_upernet_8xb2-160k_ade20k-512x512/maskformer_swin-t_upernet_8xb2-160k_ade20k-512x512_20221114_232813-03550716.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/maskformer/maskformer_swin-t_upernet_8xb2-160k_ade20k-512x512/maskformer_swin-t_upernet_8xb2-160k_ade20k-512x512_20221114_232813.json) |
|
||||
| MaskFormer | Swin-S | 512x512 | 160000 | 5.33 | 26.98 | 49.36 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/maskformer/maskformer_swin-s_upernet_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/maskformer/maskformer_swin-s_upernet_8xb2-160k_ade20k-512x512/maskformer_swin-s_upernet_8xb2-160k_ade20k-512x512_20221115_114710-5ab67e58.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/maskformer/maskformer_swin-s_upernet_8xb2-160k_ade20k-512x512/maskformer_swin-s_upernet_8xb2-160k_ade20k-512x512_20221115_114710.json) |
|
||||
| MaskFormer | R-50-D32 | 512x512 | 160000 | 3.29 | 42.20 | 44.29 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/maskformer/maskformer_r50-d32_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/maskformer/maskformer_r50-d32_8xb2-160k_ade20k-512x512/maskformer_r50-d32_8xb2-160k_ade20k-512x512_20221030_182724-3a9cfe45.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/maskformer/maskformer_r50-d32_8xb2-160k_ade20k-512x512/maskformer_r50-d32_8xb2-160k_ade20k-512x512_20221030_182724.json) |
|
||||
| MaskFormer | R-101-D32 | 512x512 | 160000 | 4.12 | 34.90 | 45.11 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/maskformer/maskformer_r101-d32_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/maskformer/maskformer_r101-d32_8xb2-160k_ade20k-512x512/maskformer_r101-d32_8xb2-160k_ade20k-512x512_20221031_223053-84adbfcb.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/maskformer/maskformer_r101-d32_8xb2-160k_ade20k-512x512/maskformer_r101-d32_8xb2-160k_ade20k-512x512_20221031_223053.json) |
|
||||
| MaskFormer | Swin-T | 512x512 | 160000 | 3.73 | 40.53 | 46.69 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/maskformer/maskformer_swin-t_upernet_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/maskformer/maskformer_swin-t_upernet_8xb2-160k_ade20k-512x512/maskformer_swin-t_upernet_8xb2-160k_ade20k-512x512_20221114_232813-f14e7ce0.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/maskformer/maskformer_swin-t_upernet_8xb2-160k_ade20k-512x512/maskformer_swin-t_upernet_8xb2-160k_ade20k-512x512_20221114_232813.json) |
|
||||
| MaskFormer | Swin-S | 512x512 | 160000 | 5.33 | 26.98 | 49.36 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/maskformer/maskformer_swin-s_upernet_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/maskformer/maskformer_swin-s_upernet_8xb2-160k_ade20k-512x512/maskformer_swin-s_upernet_8xb2-160k_ade20k-512x512_20221115_114710-723512c7.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/maskformer/maskformer_swin-s_upernet_8xb2-160k_ade20k-512x512/maskformer_swin-s_upernet_8xb2-160k_ade20k-512x512_20221115_114710.json) |
|
||||
|
||||
Note:
|
||||
|
||||
|
|
|
@ -35,7 +35,7 @@ Models:
|
|||
Metrics:
|
||||
mIoU: 44.29
|
||||
Config: configs/maskformer/maskformer_r50-d32_8xb2-160k_ade20k-512x512.py
|
||||
Weights: https://download.openmmlab.com/mmsegmentation/v0.5/maskformer/maskformer_r50-d32_8xb2-160k_ade20k-512x512/maskformer_r50-d32_8xb2-160k_ade20k-512x512_20221030_182724-cbd39cc1.pth
|
||||
Weights: https://download.openmmlab.com/mmsegmentation/v0.5/maskformer/maskformer_r50-d32_8xb2-160k_ade20k-512x512/maskformer_r50-d32_8xb2-160k_ade20k-512x512_20221030_182724-3a9cfe45.pth
|
||||
- Name: maskformer_r101-d32_8xb2-160k_ade20k-512x512
|
||||
In Collection: MaskFormer
|
||||
Metadata:
|
||||
|
@ -56,7 +56,7 @@ Models:
|
|||
Metrics:
|
||||
mIoU: 45.11
|
||||
Config: configs/maskformer/maskformer_r101-d32_8xb2-160k_ade20k-512x512.py
|
||||
Weights: https://download.openmmlab.com/mmsegmentation/v0.5/maskformer/maskformer_r101-d32_8xb2-160k_ade20k-512x512/maskformer_r101-d32_8xb2-160k_ade20k-512x512_20221031_223053-c8e0931d.pth
|
||||
Weights: https://download.openmmlab.com/mmsegmentation/v0.5/maskformer/maskformer_r101-d32_8xb2-160k_ade20k-512x512/maskformer_r101-d32_8xb2-160k_ade20k-512x512_20221031_223053-84adbfcb.pth
|
||||
- Name: maskformer_swin-t_upernet_8xb2-160k_ade20k-512x512
|
||||
In Collection: MaskFormer
|
||||
Metadata:
|
||||
|
@ -77,7 +77,7 @@ Models:
|
|||
Metrics:
|
||||
mIoU: 46.69
|
||||
Config: configs/maskformer/maskformer_swin-t_upernet_8xb2-160k_ade20k-512x512.py
|
||||
Weights: https://download.openmmlab.com/mmsegmentation/v0.5/maskformer/maskformer_swin-t_upernet_8xb2-160k_ade20k-512x512/maskformer_swin-t_upernet_8xb2-160k_ade20k-512x512_20221114_232813-03550716.pth
|
||||
Weights: https://download.openmmlab.com/mmsegmentation/v0.5/maskformer/maskformer_swin-t_upernet_8xb2-160k_ade20k-512x512/maskformer_swin-t_upernet_8xb2-160k_ade20k-512x512_20221114_232813-f14e7ce0.pth
|
||||
- Name: maskformer_swin-s_upernet_8xb2-160k_ade20k-512x512
|
||||
In Collection: MaskFormer
|
||||
Metadata:
|
||||
|
@ -98,4 +98,4 @@ Models:
|
|||
Metrics:
|
||||
mIoU: 49.36
|
||||
Config: configs/maskformer/maskformer_swin-s_upernet_8xb2-160k_ade20k-512x512.py
|
||||
Weights: https://download.openmmlab.com/mmsegmentation/v0.5/maskformer/maskformer_swin-s_upernet_8xb2-160k_ade20k-512x512/maskformer_swin-s_upernet_8xb2-160k_ade20k-512x512_20221115_114710-5ab67e58.pth
|
||||
Weights: https://download.openmmlab.com/mmsegmentation/v0.5/maskformer/maskformer_swin-s_upernet_8xb2-160k_ade20k-512x512/maskformer_swin-s_upernet_8xb2-160k_ade20k-512x512_20221115_114710-723512c7.pth
|
||||
|
|
|
@ -43,36 +43,34 @@ model = dict(
|
|||
norm_cfg=dict(type='GN', num_groups=32),
|
||||
act_cfg=dict(type='ReLU')),
|
||||
enforce_decoder_input_project=False,
|
||||
positional_encoding=dict(
|
||||
type='mmdet.SinePositionalEncoding', num_feats=128,
|
||||
normalize=True),
|
||||
transformer_decoder=dict(
|
||||
type='mmdet.DetrTransformerDecoder',
|
||||
positional_encoding=dict( # SinePositionalEncoding
|
||||
num_feats=128, normalize=True),
|
||||
transformer_decoder=dict( # DetrTransformerDecoder
|
||||
return_intermediate=True,
|
||||
num_layers=6,
|
||||
transformerlayers=dict(
|
||||
type='mmdet.DetrTransformerDecoderLayer',
|
||||
attn_cfgs=dict(
|
||||
type='mmdet.MultiheadAttention',
|
||||
layer_cfg=dict( # DetrTransformerDecoderLayer
|
||||
self_attn_cfg=dict( # MultiheadAttention
|
||||
embed_dims=256,
|
||||
num_heads=8,
|
||||
attn_drop=0.1,
|
||||
proj_drop=0.1,
|
||||
dropout_layer=None,
|
||||
batch_first=False),
|
||||
ffn_cfgs=dict(
|
||||
batch_first=True),
|
||||
cross_attn_cfg=dict( # MultiheadAttention
|
||||
embed_dims=256,
|
||||
num_heads=8,
|
||||
attn_drop=0.1,
|
||||
proj_drop=0.1,
|
||||
dropout_layer=None,
|
||||
batch_first=True),
|
||||
ffn_cfg=dict(
|
||||
embed_dims=256,
|
||||
feedforward_channels=2048,
|
||||
num_fcs=2,
|
||||
act_cfg=dict(type='ReLU', inplace=True),
|
||||
ffn_drop=0.1,
|
||||
dropout_layer=None,
|
||||
add_identity=True),
|
||||
# the following parameter was not used,
|
||||
# just make current api happy
|
||||
feedforward_channels=2048,
|
||||
operation_order=('self_attn', 'norm', 'cross_attn', 'norm',
|
||||
'ffn', 'norm')),
|
||||
add_identity=True)),
|
||||
init_cfg=None),
|
||||
loss_cls=dict(
|
||||
type='mmdet.CrossEntropyLoss',
|
||||
|
|
|
@ -8,7 +8,7 @@ The compatible MMSegmentation, MMCV and MMEngine versions are as below. Please i
|
|||
|
||||
| MMSegmentation version | MMCV version | MMEngine version | MMClassification (optional) version | MMDetection (optional) version |
|
||||
| :--------------------: | :----------------------------: | :---------------: | :---------------------------------: | :----------------------------: |
|
||||
| dev-1.x branch | mmcv >= 2.0.0rc4 | MMEngine >= 0.2.0 | mmcls>=1.0.0rc0 | mmdet>=3.0.0rc4, \<=3.0.0rc5> |
|
||||
| dev-1.x branch | mmcv >= 2.0.0rc4 | MMEngine >= 0.2.0 | mmcls>=1.0.0rc0 | mmdet>3.0.0rc5> |
|
||||
| 1.x branch | mmcv == 2.0.0rc3 | MMEngine >= 0.2.0 | mmcls>=1.0.0rc0 | mmdet>=3.0.0rc4, \<=3.0.0rc5> |
|
||||
| 1.0.0rc4 | mmcv == 2.0.0rc3 | MMEngine >= 0.1.0 | mmcls>=1.0.0rc0 | mmdet>=3.0.0rc4, \<=3.0.0rc5> |
|
||||
| 1.0.0rc3 | mmcv == 2.0.0rc3 | MMEngine >= 0.1.0 | mmcls>=1.0.0rc0 | mmdet>=3.0.0rc4 \<=3.0.0rc5> |
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
mmcls>=1.0.0rc0
|
||||
mmcv>=2.0.0rc4
|
||||
mmdet==3.0.0rc5
|
||||
-e git+https://github.com/open-mmlab/mmdetection.git@dev-3.x#egg=mmdet
|
||||
mmengine>=0.2.0,<1.0.0
|
||||
|
|
|
@ -25,65 +25,58 @@ def test_mask2former_head():
|
|||
num_outs=3,
|
||||
norm_cfg=dict(type='GN', num_groups=32),
|
||||
act_cfg=dict(type='ReLU'),
|
||||
encoder=dict(
|
||||
type='mmdet.DetrTransformerEncoder',
|
||||
encoder=dict( # DeformableDetrTransformerEncoder
|
||||
num_layers=6,
|
||||
transformerlayers=dict(
|
||||
type='mmdet.BaseTransformerLayer',
|
||||
attn_cfgs=dict(
|
||||
type='mmdet.MultiScaleDeformableAttention',
|
||||
layer_cfg=dict( # DeformableDetrTransformerEncoderLayer
|
||||
self_attn_cfg=dict( # MultiScaleDeformableAttention
|
||||
embed_dims=256,
|
||||
num_heads=8,
|
||||
num_levels=3,
|
||||
num_points=4,
|
||||
im2col_step=64,
|
||||
dropout=0.0,
|
||||
batch_first=False,
|
||||
batch_first=True,
|
||||
norm_cfg=None,
|
||||
init_cfg=None),
|
||||
ffn_cfgs=dict(
|
||||
type='FFN',
|
||||
ffn_cfg=dict(
|
||||
embed_dims=256,
|
||||
feedforward_channels=1024,
|
||||
num_fcs=2,
|
||||
ffn_drop=0.0,
|
||||
act_cfg=dict(type='ReLU', inplace=True)),
|
||||
operation_order=('self_attn', 'norm', 'ffn', 'norm')),
|
||||
act_cfg=dict(type='ReLU', inplace=True))),
|
||||
init_cfg=None),
|
||||
positional_encoding=dict(
|
||||
type='mmdet.SinePositionalEncoding',
|
||||
num_feats=128,
|
||||
normalize=True),
|
||||
positional_encoding=dict( # SinePositionalEncoding
|
||||
num_feats=128, normalize=True),
|
||||
init_cfg=None),
|
||||
enforce_decoder_input_project=False,
|
||||
positional_encoding=dict(
|
||||
type='mmdet.SinePositionalEncoding', num_feats=128,
|
||||
normalize=True),
|
||||
transformer_decoder=dict(
|
||||
type='mmdet.DetrTransformerDecoder',
|
||||
positional_encoding=dict( # SinePositionalEncoding
|
||||
num_feats=128, normalize=True),
|
||||
transformer_decoder=dict( # Mask2FormerTransformerDecoder
|
||||
return_intermediate=True,
|
||||
num_layers=9,
|
||||
transformerlayers=dict(
|
||||
type='mmdet.DetrTransformerDecoderLayer',
|
||||
attn_cfgs=dict(
|
||||
type='mmdet.MultiheadAttention',
|
||||
layer_cfg=dict( # Mask2FormerTransformerDecoderLayer
|
||||
self_attn_cfg=dict( # MultiheadAttention
|
||||
embed_dims=256,
|
||||
num_heads=8,
|
||||
attn_drop=0.0,
|
||||
proj_drop=0.0,
|
||||
dropout_layer=None,
|
||||
batch_first=False),
|
||||
ffn_cfgs=dict(
|
||||
batch_first=True),
|
||||
cross_attn_cfg=dict( # MultiheadAttention
|
||||
embed_dims=256,
|
||||
num_heads=8,
|
||||
attn_drop=0.0,
|
||||
proj_drop=0.0,
|
||||
dropout_layer=None,
|
||||
batch_first=True),
|
||||
ffn_cfg=dict(
|
||||
embed_dims=256,
|
||||
feedforward_channels=2048,
|
||||
num_fcs=2,
|
||||
act_cfg=dict(type='ReLU', inplace=True),
|
||||
ffn_drop=0.0,
|
||||
dropout_layer=None,
|
||||
add_identity=True),
|
||||
feedforward_channels=2048,
|
||||
operation_order=('cross_attn', 'norm', 'self_attn', 'norm',
|
||||
'ffn', 'norm')),
|
||||
add_identity=True)),
|
||||
init_cfg=None),
|
||||
loss_cls=dict(
|
||||
type='mmdet.CrossEntropyLoss',
|
||||
|
|
Loading…
Reference in New Issue