[Benchmark] Add BiSeNetV1 COCO-Stuff 164k benchmark (#1019)
* bisenetv1 on cocostuff164k * change config_names & delete redundant keys * pretrain should before lr. * remove redundancy in bisenetv1_r50-d32pull/1052/head
parent
97f9670c5a
commit
e38eae3894
|
@ -35,8 +35,19 @@
|
|||
| BiSeNetV1 (No Pretrain) | R-50-D32 | 1024x1024 | 160000 | 15.39 | 7.71 | 76.92 | 78.87 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/bisenetv1/bisenetv1_r50-d32_4x4_1024x1024_160k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r50-d32_4x4_1024x1024_160k_cityscapes/bisenetv1_r50-d32_4x4_1024x1024_160k_cityscapes_20210923_222639-7b28a2a6.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r50-d32_4x4_1024x1024_160k_cityscapes/bisenetv1_r50-d32_4x4_1024x1024_160k_cityscapes_20210923_222639.log.json) |
|
||||
| BiSeNetV1 | R-50-D32 | 1024x1024 | 160000 | 15.39 | 7.71 | 77.68 | 79.57 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/bisenetv1/bisenetv1_r50-d32_in1k-pre_4x4_1024x1024_160k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r50-d32_in1k-pre_4x4_1024x1024_160k_cityscapes/bisenetv1_r50-d32_in1k-pre_4x4_1024x1024_160k_cityscapes_20210917_234628-8b304447.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r50-d32_in1k-pre_4x4_1024x1024_160k_cityscapes/bisenetv1_r50-d32_in1k-pre_4x4_1024x1024_160k_cityscapes_20210917_234628.log.json) |
|
||||
|
||||
### COCO-Stuff 164k
|
||||
|
||||
| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download |
|
||||
| --------- | --------- | --------- | ------: | -------- | -------------- | ----: | ------------- | --------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| BiSeNetV1 (No Pretrain) | R-18-D32 | 512x512 | 160000 | - | - | 25.45 | 26.15 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/bisenetv1/bisenetv1_r18-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r18-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k/bisenetv1_r18-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k_20211022_054328-046aa2f2.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r18-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k/bisenetv1_r18-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k_20211022_054328.log.json) |
|
||||
| BiSeNetV1| R-18-D32 | 512x512 | 160000 | 6.33 | 74.24 | 28.55 | 29.26 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/bisenetv1/bisenetv1_r18-d32_in1k-pre_lr5e-3_4x4_512x512_160k_coco-stuff164k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r18-d32_in1k-pre_lr5e-3_4x4_512x512_160k_coco-stuff164k/bisenetv1_r18-d32_in1k-pre_lr5e-3_4x4_512x512_160k_coco-stuff164k_20211023_013100-f700dbf7.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r18-d32_in1k-pre_lr5e-3_4x4_512x512_160k_coco-stuff164k/bisenetv1_r18-d32_in1k-pre_lr5e-3_4x4_512x512_160k_coco-stuff164k_20211023_013100.log.json) |
|
||||
| BiSeNetV1 (No Pretrain) | R-50-D32 | 512x512 | 160000 | - | - | 29.82 | 30.33 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/bisenetv1/bisenetv1_r50-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r50-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k/bisenetv1_r50-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k_20211101_040616-d2bb0df4.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r50-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k/bisenetv1_r50-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k_20211101_040616.log.json) |
|
||||
| BiSeNetV1 | R-50-D32 | 512x512 | 160000 | 9.28 | 32.60 | 34.88 | 35.37 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/bisenetv1/bisenetv1_r50-d32_in1k-pre_lr5e-3_4x4_512x512_160k_coco-stuff164k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r50-d32_in1k-pre_lr5e-3_4x4_512x512_160k_coco-stuff164k/bisenetv1_r50-d32_in1k-pre_lr5e-3_4x4_512x512_160k_coco-stuff164k_20211101_181932-66747911.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r50-d32_in1k-pre_lr5e-3_4x4_512x512_160k_coco-stuff164k/bisenetv1_r50-d32_in1k-pre_lr5e-3_4x4_512x512_160k_coco-stuff164k_20211101_181932.log.json) |
|
||||
| BiSeNetV1(No Pretrain) | R-101-D32 | 512x512 | 160000 | - | - | 31.14 | 31.76 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/bisenetv1/bisenetv1_r101-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r101-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k/bisenetv1_r101-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k_20211102_164147-c6b32c3b.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r101-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k/bisenetv1_r101-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k_20211102_164147.log.json) |
|
||||
| BiSeNetV1 | R-101-D32 | 512x512 | 160000 | 10.36 | 25.25 | 37.38 | 37.99 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/bisenetv1/bisenetv1_r101-d32_in1k-pre_lr5e-3_4x4_512x512_160k_coco-stuff164k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r101-d32_in1k-pre_lr5e-3_4x4_512x512_160k_coco-stuff164k/bisenetv1_r101-d32_in1k-pre_lr5e-3_4x4_512x512_160k_coco-stuff164k_20211101_225220-28c8f092.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r101-d32_in1k-pre_lr5e-3_4x4_512x512_160k_coco-stuff164k/bisenetv1_r101-d32_in1k-pre_lr5e-3_4x4_512x512_160k_coco-stuff164k_20211101_225220.log.json) |
|
||||
|
||||
Note:
|
||||
|
||||
- `4x8`: Using 4 GPUs with 8 samples per GPU in training.
|
||||
- Default setting is 4 GPUs with 4 samples per GPU in training.
|
||||
- For BiSeNetV1 on Cityscapes dataset, default setting is 4 GPUs with 4 samples per GPU in training.
|
||||
- `No Pretrain` means the model is trained from scratch.
|
||||
|
|
|
@ -3,6 +3,7 @@ Collections:
|
|||
Metadata:
|
||||
Training Data:
|
||||
- Cityscapes
|
||||
- COCO-Stuff 164k
|
||||
Paper:
|
||||
URL: https://arxiv.org/abs/1808.00897
|
||||
Title: 'BiSeNet: Bilateral Segmentation Network for Real-time Semantic Segmentation'
|
||||
|
@ -123,3 +124,111 @@ Models:
|
|||
mIoU(ms+flip): 79.57
|
||||
Config: configs/bisenetv1/bisenetv1_r50-d32_in1k-pre_4x4_1024x1024_160k_cityscapes.py
|
||||
Weights: https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r50-d32_in1k-pre_4x4_1024x1024_160k_cityscapes/bisenetv1_r50-d32_in1k-pre_4x4_1024x1024_160k_cityscapes_20210917_234628-8b304447.pth
|
||||
- Name: bisenetv1_r18-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k
|
||||
In Collection: bisenetv1
|
||||
Metadata:
|
||||
backbone: R-18-D32
|
||||
crop size: (512,512)
|
||||
lr schd: 160000
|
||||
Results:
|
||||
- Task: Semantic Segmentation
|
||||
Dataset: COCO-Stuff 164k
|
||||
Metrics:
|
||||
mIoU: 25.45
|
||||
mIoU(ms+flip): 26.15
|
||||
Config: configs/bisenetv1/bisenetv1_r18-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k.py
|
||||
Weights: https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r18-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k/bisenetv1_r18-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k_20211022_054328-046aa2f2.pth
|
||||
- Name: bisenetv1_r18-d32_in1k-pre_lr5e-3_4x4_512x512_160k_coco-stuff164k
|
||||
In Collection: bisenetv1
|
||||
Metadata:
|
||||
backbone: R-18-D32
|
||||
crop size: (512,512)
|
||||
lr schd: 160000
|
||||
inference time (ms/im):
|
||||
- value: 13.47
|
||||
hardware: V100
|
||||
backend: PyTorch
|
||||
batch size: 1
|
||||
mode: FP32
|
||||
resolution: (512,512)
|
||||
memory (GB): 6.33
|
||||
Results:
|
||||
- Task: Semantic Segmentation
|
||||
Dataset: COCO-Stuff 164k
|
||||
Metrics:
|
||||
mIoU: 28.55
|
||||
mIoU(ms+flip): 29.26
|
||||
Config: configs/bisenetv1/bisenetv1_r18-d32_in1k-pre_lr5e-3_4x4_512x512_160k_coco-stuff164k.py
|
||||
Weights: https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r18-d32_in1k-pre_lr5e-3_4x4_512x512_160k_coco-stuff164k/bisenetv1_r18-d32_in1k-pre_lr5e-3_4x4_512x512_160k_coco-stuff164k_20211023_013100-f700dbf7.pth
|
||||
- Name: bisenetv1_r50-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k
|
||||
In Collection: bisenetv1
|
||||
Metadata:
|
||||
backbone: R-50-D32
|
||||
crop size: (512,512)
|
||||
lr schd: 160000
|
||||
Results:
|
||||
- Task: Semantic Segmentation
|
||||
Dataset: COCO-Stuff 164k
|
||||
Metrics:
|
||||
mIoU: 29.82
|
||||
mIoU(ms+flip): 30.33
|
||||
Config: configs/bisenetv1/bisenetv1_r50-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k.py
|
||||
Weights: https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r50-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k/bisenetv1_r50-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k_20211101_040616-d2bb0df4.pth
|
||||
- Name: bisenetv1_r50-d32_in1k-pre_lr5e-3_4x4_512x512_160k_coco-stuff164k
|
||||
In Collection: bisenetv1
|
||||
Metadata:
|
||||
backbone: R-50-D32
|
||||
crop size: (512,512)
|
||||
lr schd: 160000
|
||||
inference time (ms/im):
|
||||
- value: 30.67
|
||||
hardware: V100
|
||||
backend: PyTorch
|
||||
batch size: 1
|
||||
mode: FP32
|
||||
resolution: (512,512)
|
||||
memory (GB): 9.28
|
||||
Results:
|
||||
- Task: Semantic Segmentation
|
||||
Dataset: COCO-Stuff 164k
|
||||
Metrics:
|
||||
mIoU: 34.88
|
||||
mIoU(ms+flip): 35.37
|
||||
Config: configs/bisenetv1/bisenetv1_r50-d32_in1k-pre_lr5e-3_4x4_512x512_160k_coco-stuff164k.py
|
||||
Weights: https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r50-d32_in1k-pre_lr5e-3_4x4_512x512_160k_coco-stuff164k/bisenetv1_r50-d32_in1k-pre_lr5e-3_4x4_512x512_160k_coco-stuff164k_20211101_181932-66747911.pth
|
||||
- Name: bisenetv1_r101-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k
|
||||
In Collection: bisenetv1
|
||||
Metadata:
|
||||
backbone: R-101-D32
|
||||
crop size: (512,512)
|
||||
lr schd: 160000
|
||||
Results:
|
||||
- Task: Semantic Segmentation
|
||||
Dataset: COCO-Stuff 164k
|
||||
Metrics:
|
||||
mIoU: 31.14
|
||||
mIoU(ms+flip): 31.76
|
||||
Config: configs/bisenetv1/bisenetv1_r101-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k.py
|
||||
Weights: https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r101-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k/bisenetv1_r101-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k_20211102_164147-c6b32c3b.pth
|
||||
- Name: bisenetv1_r101-d32_in1k-pre_lr5e-3_4x4_512x512_160k_coco-stuff164k
|
||||
In Collection: bisenetv1
|
||||
Metadata:
|
||||
backbone: R-101-D32
|
||||
crop size: (512,512)
|
||||
lr schd: 160000
|
||||
inference time (ms/im):
|
||||
- value: 39.6
|
||||
hardware: V100
|
||||
backend: PyTorch
|
||||
batch size: 1
|
||||
mode: FP32
|
||||
resolution: (512,512)
|
||||
memory (GB): 10.36
|
||||
Results:
|
||||
- Task: Semantic Segmentation
|
||||
Dataset: COCO-Stuff 164k
|
||||
Metrics:
|
||||
mIoU: 37.38
|
||||
mIoU(ms+flip): 37.99
|
||||
Config: configs/bisenetv1/bisenetv1_r101-d32_in1k-pre_lr5e-3_4x4_512x512_160k_coco-stuff164k.py
|
||||
Weights: https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r101-d32_in1k-pre_lr5e-3_4x4_512x512_160k_coco-stuff164k/bisenetv1_r101-d32_in1k-pre_lr5e-3_4x4_512x512_160k_coco-stuff164k_20211101_225220-28c8f092.pth
|
||||
|
|
|
@ -0,0 +1,6 @@
|
|||
_base_ = './bisenetv1_r101-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k.py'
|
||||
model = dict(
|
||||
backbone=dict(
|
||||
backbone_cfg=dict(
|
||||
init_cfg=dict(
|
||||
type='Pretrained', checkpoint='open-mmlab://resnet101_v1c'))))
|
|
@ -0,0 +1,18 @@
|
|||
_base_ = [
|
||||
'../_base_/models/bisenetv1_r18-d32.py',
|
||||
'../_base_/datasets/coco-stuff164k.py', '../_base_/default_runtime.py',
|
||||
'../_base_/schedules/schedule_160k.py'
|
||||
]
|
||||
model = dict(
|
||||
backbone=dict(
|
||||
context_channels=(512, 1024, 2048),
|
||||
spatial_channels=(256, 256, 256, 512),
|
||||
out_channels=1024,
|
||||
backbone_cfg=dict(type='ResNet', depth=101)),
|
||||
decode_head=dict(in_channels=1024, channels=1024, num_classes=171),
|
||||
auxiliary_head=[
|
||||
dict(in_channels=512, channels=256, num_classes=171),
|
||||
dict(in_channels=512, channels=256, num_classes=171),
|
||||
])
|
||||
lr_config = dict(warmup='linear', warmup_iters=1000)
|
||||
optimizer = dict(lr=0.005)
|
|
@ -0,0 +1,6 @@
|
|||
_base_ = './bisenetv1_r18-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k.py'
|
||||
model = dict(
|
||||
backbone=dict(
|
||||
backbone_cfg=dict(
|
||||
init_cfg=dict(
|
||||
type='Pretrained', checkpoint='open-mmlab://resnet18_v1c'))), )
|
|
@ -0,0 +1,13 @@
|
|||
_base_ = [
|
||||
'../_base_/models/bisenetv1_r18-d32.py',
|
||||
'../_base_/datasets/coco-stuff164k.py', '../_base_/default_runtime.py',
|
||||
'../_base_/schedules/schedule_160k.py'
|
||||
]
|
||||
model = dict(
|
||||
decode_head=dict(num_classes=171),
|
||||
auxiliary_head=[
|
||||
dict(num_classes=171),
|
||||
dict(num_classes=171),
|
||||
])
|
||||
lr_config = dict(warmup='linear', warmup_iters=1000)
|
||||
optimizer = dict(lr=0.005)
|
|
@ -0,0 +1,7 @@
|
|||
_base_ = './bisenetv1_r50-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k.py'
|
||||
|
||||
model = dict(
|
||||
backbone=dict(
|
||||
backbone_cfg=dict(
|
||||
init_cfg=dict(
|
||||
type='Pretrained', checkpoint='open-mmlab://resnet50_v1c'))))
|
|
@ -0,0 +1,18 @@
|
|||
_base_ = [
|
||||
'../_base_/models/bisenetv1_r18-d32.py',
|
||||
'../_base_/datasets/coco-stuff164k.py', '../_base_/default_runtime.py',
|
||||
'../_base_/schedules/schedule_160k.py'
|
||||
]
|
||||
model = dict(
|
||||
backbone=dict(
|
||||
context_channels=(512, 1024, 2048),
|
||||
spatial_channels=(256, 256, 256, 512),
|
||||
out_channels=1024,
|
||||
backbone_cfg=dict(type='ResNet', depth=50)),
|
||||
decode_head=dict(in_channels=1024, channels=1024, num_classes=171),
|
||||
auxiliary_head=[
|
||||
dict(in_channels=512, channels=256, num_classes=171),
|
||||
dict(in_channels=512, channels=256, num_classes=171),
|
||||
])
|
||||
lr_config = dict(warmup='linear', warmup_iters=1000)
|
||||
optimizer = dict(lr=0.005)
|
Loading…
Reference in New Issue