mmsegmentation/configs/mask2former/mask2former.yml

Collections:
- Name: Mask2Former
  Metadata:
    Training Data:
    - Usage
    - Cityscapes
    - ADE20K
  Paper:
    URL: https://arxiv.org/abs/2112.01527
    Title: Masked-attention Mask Transformer for Universal Image Segmentation
  README: configs/mask2former/README.md
  Code:
    URL: https://github.com/open-mmlab/mmdetection/blob/3.x/mmdet/models/dense_heads/mask2former_head.py
    Version: 3.x
  Converted From:
    Code: https://github.com/facebookresearch/Mask2Former
Models:
- Name: mask2former_r50_8xb2-90k_cityscapes-512x1024
  In Collection: Mask2Former
  Metadata:
    backbone: R-50-D32
    crop size: (512,1024)
    lr schd: 90000
    inference time (ms/im):
    - value: 109.05
      hardware: V100
      backend: PyTorch
      batch size: 1
      mode: FP32
      resolution: (512,1024)
    Training Memory (GB): 5806.0
  Results:
  - Task: Semantic Segmentation
    Dataset: Cityscapes
    Metrics:
      mIoU: 80.44
  Config: configs/mask2former/mask2former_r50_8xb2-90k_cityscapes-512x1024.py
  Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_r50_8xb2-90k_cityscapes-512x1024/mask2former_r50_8xb2-90k_cityscapes-512x1024_20221202_140802-ffd9d750.pth
- Name: mask2former_r101_8xb2-90k_cityscapes-512x1024
  In Collection: Mask2Former
  Metadata:
    backbone: R-101-D32
    crop size: (512,1024)
    lr schd: 90000
    inference time (ms/im):
    - value: 140.65
      hardware: V100
      backend: PyTorch
      batch size: 1
      mode: FP32
      resolution: (512,1024)
    Training Memory (GB): 6971.0
  Results:
  - Task: Semantic Segmentation
    Dataset: Cityscapes
    Metrics:
      mIoU: 80.8
  Config: configs/mask2former/mask2former_r101_8xb2-90k_cityscapes-512x1024.py
  Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_r101_8xb2-90k_cityscapes-512x1024/mask2former_r101_8xb2-90k_cityscapes-512x1024_20221130_031628-43e68666.pth
- Name: mask2former_swin-t_8xb2-90k_cityscapes-512x1024
  In Collection: Mask2Former
  Metadata:
    backbone: Swin-T
    crop size: (512,1024)
    lr schd: 90000
    inference time (ms/im):
    - value: 139.28
      hardware: V100
      backend: PyTorch
      batch size: 1
      mode: FP32
      resolution: (512,1024)
    Training Memory (GB): 6511.0
  Results:
  - Task: Semantic Segmentation
    Dataset: Cityscapes
    Metrics:
      mIoU: 81.71
  Config: configs/mask2former/mask2former_swin-t_8xb2-90k_cityscapes-512x1024.py
  Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-t_8xb2-90k_cityscapes-512x1024/mask2former_swin-t_8xb2-90k_cityscapes-512x1024_20221127_144501-36c59341.pth
- Name: mask2former_swin-s_8xb2-90k_cityscapes-512x1024
  In Collection: Mask2Former
  Metadata:
    backbone: Swin-S
    crop size: (512,1024)
    lr schd: 90000
    inference time (ms/im):
    - value: 179.53
      hardware: V100
      backend: PyTorch
      batch size: 1
      mode: FP32
      resolution: (512,1024)
    Training Memory (GB): 8282.0
  Results:
  - Task: Semantic Segmentation
    Dataset: Cityscapes
    Metrics:
      mIoU: 82.57
  Config: configs/mask2former/mask2former_swin-s_8xb2-90k_cityscapes-512x1024.py
  Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-s_8xb2-90k_cityscapes-512x1024/mask2former_swin-s_8xb2-90k_cityscapes-512x1024_20221127_143802-9ab177f6.pth
- Name: mask2former_swin-b-in22k-384x384-pre_8xb2-90k_cityscapes-512x1024
  In Collection: Mask2Former
  Metadata:
    backbone: Swin-B (in22k)
    crop size: (512,1024)
    lr schd: 90000
    inference time (ms/im):
    - value: 231.48
      hardware: V100
      backend: PyTorch
      batch size: 1
      mode: FP32
      resolution: (512,1024)
    Training Memory (GB): 11152.0
  Results:
  - Task: Semantic Segmentation
    Dataset: Cityscapes
    Metrics:
      mIoU: 83.52
  Config: configs/mask2former/mask2former_swin-b-in22k-384x384-pre_8xb2-90k_cityscapes-512x1024.py
  Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-b-in22k-384x384-pre_8xb2-90k_cityscapes-512x1024/mask2former_swin-b-in22k-384x384-pre_8xb2-90k_cityscapes-512x1024_20221203_045030-9a86a225.pth
- Name: mask2former_swin-l-in22k-384x384-pre_8xb2-90k_cityscapes-512x1024
  In Collection: Mask2Former
  Metadata:
    backbone: Swin-L (in22k)
    crop size: (512,1024)
    lr schd: 90000
    inference time (ms/im):
    - value: 349.65
      hardware: V100
      backend: PyTorch
      batch size: 1
      mode: FP32
      resolution: (512,1024)
    Training Memory (GB): 16207.0
  Results:
  - Task: Semantic Segmentation
    Dataset: Cityscapes
    Metrics:
      mIoU: 83.65
  Config: configs/mask2former/mask2former_swin-l-in22k-384x384-pre_8xb2-90k_cityscapes-512x1024.py
  Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-l-in22k-384x384-pre_8xb2-90k_cityscapes-512x1024/mask2former_swin-l-in22k-384x384-pre_8xb2-90k_cityscapes-512x1024_20221202_141901-28ad20f1.pth
- Name: mask2former_r50_8xb2-160k_ade20k-512x512
  In Collection: Mask2Former
  Metadata:
    backbone: R-50-D32
    crop size: (512,512)
    lr schd: 160000
    inference time (ms/im):
    - value: 37.61
      hardware: V100
      backend: PyTorch
      batch size: 1
      mode: FP32
      resolution: (512,512)
    Training Memory (GB): 3385.0
  Results:
  - Task: Semantic Segmentation
    Dataset: ADE20K
    Metrics:
      mIoU: 47.87
  Config: configs/mask2former/mask2former_r50_8xb2-160k_ade20k-512x512.py
  Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_r50_8xb2-160k_ade20k-512x512/mask2former_r50_8xb2-160k_ade20k-512x512_20221204_000055-2d1f55f1.pth
- Name: mask2former_r101_8xb2-160k_ade20k-512x512
  In Collection: Mask2Former
  Metadata:
    backbone: R-101-D32
    crop size: (512,512)
    lr schd: 160000
    inference time (ms/im):
    - value: 43.54
      hardware: V100
      backend: PyTorch
      batch size: 1
      mode: FP32
      resolution: (512,512)
    Training Memory (GB): 4190.0
  Results:
  - Task: Semantic Segmentation
    Dataset: ADE20K
    Metrics:
      mIoU: 48.6
  Config: configs/mask2former/mask2former_r101_8xb2-160k_ade20k-512x512.py
  Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_r101_8xb2-160k_ade20k-512x512/mask2former_r101_8xb2-160k_ade20k-512x512_20221203_233905-b7135890.pth
- Name: mask2former_swin-t_8xb2-160k_ade20k-512x512
  In Collection: Mask2Former
  Metadata:
    backbone: Swin-T
    crop size: (512,512)
    lr schd: 160000
    inference time (ms/im):
    - value: 41.98
      hardware: V100
      backend: PyTorch
      batch size: 1
      mode: FP32
      resolution: (512,512)
    Training Memory (GB): 3826.0
  Results:
  - Task: Semantic Segmentation
    Dataset: ADE20K
    Metrics:
      mIoU: 48.66
  Config: configs/mask2former/mask2former_swin-t_8xb2-160k_ade20k-512x512.py
  Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-t_8xb2-160k_ade20k-512x512/mask2former_swin-t_8xb2-160k_ade20k-512x512_20221203_234230-7d64e5dd.pth
- Name: mask2former_swin-s_8xb2-160k_ade20k-512x512
  In Collection: Mask2Former
  Metadata:
    backbone: Swin-S
    crop size: (512,512)
    lr schd: 160000
    inference time (ms/im):
    - value: 50.79
      hardware: V100
      backend: PyTorch
      batch size: 1
      mode: FP32
      resolution: (512,512)
    Training Memory (GB): 5034.0
  Results:
  - Task: Semantic Segmentation
    Dataset: ADE20K
    Metrics:
      mIoU: 51.24
  Config: configs/mask2former/mask2former_swin-s_8xb2-160k_ade20k-512x512.py
  Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-s_8xb2-160k_ade20k-512x512/mask2former_swin-s_8xb2-160k_ade20k-512x512_20221204_143905-e715144e.pth
- Name: mask2former_swin-b-in1k-384x384-pre_8xb2-160k_ade20k-640x640
  In Collection: Mask2Former
  Metadata:
    backbone: Swin-B
    crop size: (640,640)
    lr schd: 160000
    inference time (ms/im):
    - value: 80.13
      hardware: V100
      backend: PyTorch
      batch size: 1
      mode: FP32
      resolution: (640,640)
    Training Memory (GB): 5795.0
  Results:
  - Task: Semantic Segmentation
    Dataset: ADE20K
    Metrics:
      mIoU: 52.44
  Config: configs/mask2former/mask2former_swin-b-in1k-384x384-pre_8xb2-160k_ade20k-640x640.py
  Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-b-in1k-384x384-pre_8xb2-160k_ade20k-640x640/mask2former_swin-b-in1k-384x384-pre_8xb2-160k_ade20k-640x640_20221129_125118-a4a086d2.pth
- Name: mask2former_swin-b-in22k-384x384-pre_8xb2-160k_ade20k-640x640
  In Collection: Mask2Former
  Metadata:
    backbone: Swin-B (in22k)
    crop size: (640,640)
    lr schd: 160000
    inference time (ms/im):
    - value: 80.45
      hardware: V100
      backend: PyTorch
      batch size: 1
      mode: FP32
      resolution: (640,640)
    Training Memory (GB): 5795.0
  Results:
  - Task: Semantic Segmentation
    Dataset: ADE20K
    Metrics:
      mIoU: 53.9
  Config: configs/mask2former/mask2former_swin-b-in22k-384x384-pre_8xb2-160k_ade20k-640x640.py
  Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-b-in22k-384x384-pre_8xb2-160k_ade20k-640x640/mask2former_swin-b-in22k-384x384-pre_8xb2-160k_ade20k-640x640_20221203_235230-7ec0f569.pth
- Name: mask2former_swin-l-in22k-384x384-pre_8xb2-160k_ade20k-640x640
  In Collection: Mask2Former
  Metadata:
    backbone: Swin-L (in22k)
    crop size: (640,640)
    lr schd: 160000
    inference time (ms/im):
    - value: 113.51
      hardware: V100
      backend: PyTorch
      batch size: 1
      mode: FP32
      resolution: (640,640)
    Training Memory (GB): 9077.0
  Results:
  - Task: Semantic Segmentation
    Dataset: ADE20K
    Metrics:
      mIoU: 56.01
  Config: configs/mask2former/mask2former_swin-l-in22k-384x384-pre_8xb2-160k_ade20k-640x640.py
  Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-l-in22k-384x384-pre_8xb2-160k_ade20k-640x640/mask2former_swin-l-in22k-384x384-pre_8xb2-160k_ade20k-640x640_20221203_235933-7120c214.pth