diff --git a/configs/segmentation/segformer/segformer_b0_coco.py b/configs/segmentation/segformer/segformer_b0_coco.py index 75dbefe8..52b5c5de 100644 --- a/configs/segmentation/segformer/segformer_b0_coco.py +++ b/configs/segmentation/segformer/segformer_b0_coco.py @@ -170,7 +170,6 @@ data = dict( pipeline=train_pipeline), val=dict( imgs_per_gpu=1, - workers_per_gpu=1, ignore_index=255, type=dataset_type, data_source=dict( @@ -185,7 +184,6 @@ data = dict( pipeline=test_pipeline), test=dict( imgs_per_gpu=1, - workers_per_gpu=1, type=dataset_type, data_source=dict( type='SegSourceRaw', diff --git a/configs/segmentation/segformer/segformer_b1_coco.py b/configs/segmentation/segformer/segformer_b1_coco.py new file mode 100644 index 00000000..64f6e2c1 --- /dev/null +++ b/configs/segmentation/segformer/segformer_b1_coco.py @@ -0,0 +1,8 @@ +_base_ = './segformer_b0_coco.py' + +model = dict( + pretrained= + 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/segformer/mit_b1_20220624-02e5a6a1.pth', + backbone=dict(embed_dims=64, ), + decode_head=dict(in_channels=[64, 128, 320, 512], ), +) diff --git a/configs/segmentation/segformer/segformer_b2_coco.py b/configs/segmentation/segformer/segformer_b2_coco.py new file mode 100644 index 00000000..16538eb3 --- /dev/null +++ b/configs/segmentation/segformer/segformer_b2_coco.py @@ -0,0 +1,14 @@ +_base_ = './segformer_b0_coco.py' + +model = dict( + pretrained= + 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/segformer/mit_b2_20220624-66e8bf70.pth', + backbone=dict( + embed_dims=64, + num_layers=[3, 4, 6, 3], + ), + decode_head=dict( + in_channels=[64, 128, 320, 512], + channels=768, + ), +) diff --git a/configs/segmentation/segformer/segformer_b3_coco.py b/configs/segmentation/segformer/segformer_b3_coco.py new file mode 100644 index 00000000..8e59d9be --- /dev/null +++ b/configs/segmentation/segformer/segformer_b3_coco.py @@ -0,0 +1,14 @@ +_base_ = './segformer_b0_coco.py' + +model = dict( + pretrained= + 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/segformer/mit_b3_20220624-13b1141c.pth', + backbone=dict( + embed_dims=64, + num_layers=[3, 4, 18, 3], + ), + decode_head=dict( + in_channels=[64, 128, 320, 512], + channels=768, + ), +) diff --git a/configs/segmentation/segformer/segformer_b4_coco.py b/configs/segmentation/segformer/segformer_b4_coco.py new file mode 100644 index 00000000..d8de6ce2 --- /dev/null +++ b/configs/segmentation/segformer/segformer_b4_coco.py @@ -0,0 +1,14 @@ +_base_ = './segformer_b0_coco.py' + +model = dict( + pretrained= + 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/segformer/mit_b4_20220624-d588d980.pth', + backbone=dict( + embed_dims=64, + num_layers=[3, 8, 27, 3], + ), + decode_head=dict( + in_channels=[64, 128, 320, 512], + channels=768, + ), +) diff --git a/configs/segmentation/segformer/segformer_b5_coco.py b/configs/segmentation/segformer/segformer_b5_coco.py index 9863cc14..83ccd9a3 100644 --- a/configs/segmentation/segformer/segformer_b5_coco.py +++ b/configs/segmentation/segformer/segformer_b5_coco.py @@ -1,119 +1,18 @@ -# segformer B5 +_base_ = './segformer_b0_coco.py' -CLASSES = [ - 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', - 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', - 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', - 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', - 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', - 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', - 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', - 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', - 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', - 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', - 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', - 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', - 'hair drier', 'toothbrush', 'banner', 'blanket', 'branch', 'bridge', - 'building-other', 'bush', 'cabinet', 'cage', 'cardboard', 'carpet', - 'ceiling-other', 'ceiling-tile', 'cloth', 'clothes', 'clouds', 'counter', - 'cupboard', 'curtain', 'desk-stuff', 'dirt', 'door-stuff', 'fence', - 'floor-marble', 'floor-other', 'floor-stone', 'floor-tile', 'floor-wood', - 'flower', 'fog', 'food-other', 'fruit', 'furniture-other', 'grass', - 'gravel', 'ground-other', 'hill', 'house', 'leaves', 'light', 'mat', - 'metal', 'mirror-stuff', 'moss', 'mountain', 'mud', 'napkin', 'net', - 'paper', 'pavement', 'pillow', 'plant-other', 'plastic', 'platform', - 'playingfield', 'railing', 'railroad', 'river', 'road', 'rock', 'roof', - 'rug', 'salad', 'sand', 'sea', 'shelf', 'sky-other', 'skyscraper', 'snow', - 'solid-other', 'stairs', 'stone', 'straw', 'structural-other', 'table', - 'tent', 'textile-other', 'towel', 'tree', 'vegetable', 'wall-brick', - 'wall-concrete', 'wall-other', 'wall-panel', 'wall-stone', 'wall-tile', - 'wall-wood', 'water-other', 'waterdrops', 'window-blind', 'window-other', - 'wood' -] -PALETTE = [[0, 192, 64], [0, 192, 64], [0, 64, 96], - [128, 192, 192], [0, 64, 64], [0, 192, 224], [0, 192, 192], - [128, 192, 64], [0, 192, 96], [128, 192, 64], [128, 32, 192], - [0, 0, 224], [0, 0, 64], [0, 160, 192], [128, 0, 96], [128, 0, 192], - [0, 32, 192], [128, 128, 224], [0, 0, 192], [128, 160, 192], - [128, 128, 0], [128, 0, 32], [128, 32, 0], [128, 0, 128], - [64, 128, 32], [0, 160, 0], [0, 0, 0], [192, 128, 160], [0, 32, 0], - [0, 128, 128], [64, 128, 160], [128, 160, 0], [0, 128, 0], - [192, 128, 32], [128, 96, 128], [0, 0, 128], [64, 0, 32], - [0, 224, 128], [128, 0, 0], [192, 0, 160], [0, 96, 128], - [128, 128, 128], [64, 0, 160], [128, 224, 128], [128, 128, - 64], [192, 0, 32], - [128, 96, 0], [128, 0, 192], [0, 128, 32], [64, 224, 0], [0, 0, 64], - [128, 128, 160], [64, 96, 0], [0, 128, 192], [0, 128, 160], - [192, 224, 0], [0, 128, 64], [128, 128, 32], [192, 32, 128], - [0, 64, 192], [0, 0, 32], [64, 160, 128], [128, 64, 64], - [128, 0, 160], [64, 32, 128], [128, 192, 192], [0, 0, 160], - [192, 160, 128], [128, 192, 0], [128, 0, 96], [192, 32, 0], - [128, 64, 128], [64, 128, 96], [64, 160, 0], [0, 64, 0], - [192, 128, 224], [64, 32, 0], [0, 192, 128], [64, 128, 224], - [192, 160, 0], [0, 192, 0], [192, 128, 96], [192, 96, 128], - [0, 64, 128], [64, 0, 96], [64, 224, 128], [128, 64, 0], - [192, 0, 224], [64, 96, 128], [128, 192, 128], [64, 0, 224], - [192, 224, 128], [128, 192, 64], [192, 0, 96], [192, 96, 0], - [128, 64, 192], [0, 128, 96], [0, 224, 0], [64, 64, 64], - [128, 128, 224], [0, 96, 0], [64, 192, 192], [0, 128, 224], - [128, 224, 0], [64, 192, 64], [128, 128, 96], [128, 32, 128], - [64, 0, 192], [0, 64, 96], [0, 160, 128], [192, 0, 64], - [128, 64, 224], [0, 32, 128], [192, 128, 192], [0, 64, 224], - [128, 160, 128], [192, 128, 0], [128, 64, 32], [128, 32, 64], - [192, 0, 128], [64, 192, 32], [0, 160, 64], [64, 0, 0], - [192, 192, 160], [0, 32, 64], [64, 128, 128], [64, 192, 160], - [128, 160, 64], [64, 128, 0], [192, 192, 32], [128, 96, 192], - [64, 0, 128], [64, 64, 32], [0, 224, 192], [192, 0, 0], - [192, 64, 160], [0, 96, 192], [192, 128, 128], [64, 64, 160], - [128, 224, 192], [192, 128, 64], [192, 64, 32], [128, 96, 64], - [192, 0, 192], [0, 192, 32], [64, 224, 64], [64, 0, 64], - [128, 192, 160], [64, 96, 64], [64, 128, 192], [0, 192, 160], - [192, 224, 64], [64, 128, 64], [128, 192, 32], [192, 32, 192], - [64, 64, 192], [0, 64, 32], [64, 160, 192], [192, 64, 64], - [128, 64, 160], [64, 32, 192], [192, 192, 192], [0, 64, 160], - [192, 160, 192], [192, 192, 0], [128, 64, 96], [192, 32, 64], - [192, 64, 128], [64, 192, 96], [64, 160, 64], [64, 64, 0]] - -num_classes = 172 - -norm_cfg = dict(type='SyncBN', requires_grad=True) model = dict( - type='EncoderDecoder', pretrained= 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/segformer/mit_b5_20220624-658746d9.pth', backbone=dict( - type='MixVisionTransformer', - in_channels=3, embed_dims=64, - num_stages=4, num_layers=[3, 6, 40, 3], - num_heads=[1, 2, 5, 8], - patch_sizes=[7, 3, 3, 3], - sr_ratios=[8, 4, 2, 1], - out_indices=(0, 1, 2, 3), - mlp_ratio=4, - qkv_bias=True, - drop_rate=0.0, - attn_drop_rate=0.0, - drop_path_rate=0.1), + ), decode_head=dict( - type='SegformerHead', in_channels=[64, 128, 320, 512], - in_index=[0, 1, 2, 3], channels=768, - dropout_ratio=0.1, - num_classes=num_classes, - norm_cfg=norm_cfg, - align_corners=False, - loss_decode=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), - # model training and testing settings - train_cfg=dict(), - test_cfg=dict(mode='whole')) + ), +) -# dataset settings -dataset_type = 'SegDataset' -data_root = 'data/coco_stuff164k/' img_norm_cfg = dict( mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) crop_size = (640, 640) @@ -151,101 +50,3 @@ test_pipeline = [ 'flip_direction', 'img_norm_cfg')), ]) ] - -data = dict( - imgs_per_gpu=2, - workers_per_gpu=2, - train=dict( - type=dataset_type, - ignore_index=255, - data_source=dict( - type='SegSourceRaw', - img_suffix='.jpg', - label_suffix='_labelTrainIds.png', - img_root=data_root + 'train2017/', - label_root=data_root + 'annotations/train2017/', - split=data_root + 'train.txt', - classes=CLASSES, - ), - pipeline=train_pipeline), - val=dict( - imgs_per_gpu=1, - workers_per_gpu=1, - ignore_index=255, - type=dataset_type, - data_source=dict( - type='SegSourceRaw', - img_suffix='.jpg', - label_suffix='_labelTrainIds.png', - img_root=data_root + 'val2017/', - label_root=data_root + 'annotations/val2017', - split=data_root + 'val.txt', - classes=CLASSES, - ), - pipeline=test_pipeline), - test=dict( - imgs_per_gpu=1, - workers_per_gpu=1, - type=dataset_type, - data_source=dict( - type='SegSourceRaw', - img_suffix='.jpg', - label_suffix='_labelTrainIds.png', - img_root=data_root + 'val2017/', - label_root=data_root + 'annotations/val2017', - split=data_root + 'val.txt', - classes=CLASSES, - ), - pipeline=test_pipeline)) -optimizer = dict( - type='AdamW', - lr=6e-05, - betas=(0.9, 0.999), - weight_decay=0.01, - paramwise_options=dict( - custom_keys=dict( - pos_block=dict(decay_mult=0.0), - norm=dict(decay_mult=0.0), - head=dict(lr_mult=10.0)))) -optimizer_config = dict() -lr_config = dict( - policy='poly', - warmup='linear', - warmup_iters=800, - warmup_ratio=1e-06, - power=1.0, - min_lr=0.0, - by_epoch=False) - -# runtime settings -total_epochs = 20 -checkpoint_config = dict(interval=1) -eval_config = dict(interval=1, gpu_collect=False) -eval_pipelines = [ - dict( - mode='test', - evaluators=[ - dict( - type='SegmentationEvaluator', - classes=CLASSES, - metric_names=['mIoU']) - ], - ) -] - -predict = dict(type='SegmentationPredictor') - -log_config = dict( - interval=50, - hooks=[ - dict(type='TextLoggerHook'), - # dict(type='TensorboardLoggerHook') - ]) - -dist_params = dict(backend='nccl') - -cudnn_benchmark = False -log_level = 'INFO' -load_from = None -resume_from = None -workflow = [('train', 1)] diff --git a/docs/source/model_zoo_seg.md b/docs/source/model_zoo_seg.md index 3593cf08..f06e475c 100644 --- a/docs/source/model_zoo_seg.md +++ b/docs/source/model_zoo_seg.md @@ -36,4 +36,8 @@ Semantic segmentation models trained on **CoCo_stuff164k**. | Algorithm | Config | Params
(backbone/total) | inference time(V100)
(ms/img) |mIoU | Download | | ---------- | ------------------------------------------------------------ | ------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ | | SegFormer_B0 | [segformer_b0_coco.py](https://github.com/alibaba/EasyCV/tree/master/configs/segmentation/segformer/segformer_b0_coco.py) | 3.3M/3.8M | 47.2ms | 34.79 | [model](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/damo/modelzoo/segmentation/segformer/segformer_b0/SegmentationEvaluator_mIoU_best.pth) - [log](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/damo/modelzoo/segmentation/segformer/segformer_b0/20220810_102335.log.json) | -| SegFormer_B5 | [segformer_b5_coco.py](https://github.com/alibaba/EasyCV/tree/master/configs/segmentation/segformer/segformer_b5_coco.py) | 81M/85M | 99.2ms | 46.75 | [model](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/damo/modelzoo/segmentation/segformer/segformer_b5/SegmentationEvaluator_mIoU_best.pth) - [log](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/damo/modelzoo/segmentation/segformer/segformer_b5/20220812_144336.log.json) | +| SegFormer_B1 | [segformer_b1_coco.py](https://github.com/alibaba/EasyCV/tree/master/configs/segmentation/segformer/segformer_b1_coco.py) | 13.2M/13.7M | 46.8ms | 39.27 | [model](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/damo/modelzoo/segmentation/segformer/segformer_b1/SegmentationEvaluator_mIoU_best.pth) - [log](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/damo/modelzoo/segmentation/segformer/segformer_b1/20220819_142214.log.json) | +| SegFormer_B2 | [segformer_b2_coco.py](https://github.com/alibaba/EasyCV/tree/master/configs/segmentation/segformer/segformer_b2_coco.py) | 24.2M/27.5M | 49.1ms | 44.01 | [model](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/damo/modelzoo/segmentation/segformer/segformer_b2/SegmentationEvaluator_mIoU_best.pth) - [log](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/damo/modelzoo/segmentation/segformer/segformer_b2/20220822_100513.log.json) | +| SegFormer_B3 | [segformer_b3_coco.py](https://github.com/alibaba/EasyCV/tree/master/configs/segmentation/segformer/segformer_b3_coco.py) | 44.1M/47.4M | 52.3ms | 45.31 | [model](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/damo/modelzoo/segmentation/segformer/segformer_b3/SegmentationEvaluator_mIoU_best.pth) - [log](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/damo/modelzoo/segmentation/segformer/segformer_b3/20220823_100744.log.json) | +| SegFormer_B4 | [segformer_b4_coco.py](https://github.com/alibaba/EasyCV/tree/master/configs/segmentation/segformer/segformer_b4_coco.py) | 60.8M/64.1M | 58.5ms | 46.00 | [model](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/damo/modelzoo/segmentation/segformer/segformer_b4/SegmentationEvaluator_mIoU_best.pth) - [log](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/damo/modelzoo/segmentation/segformer/segformer_b4/20220824_102735.log.json) | +| SegFormer_B5 | [segformer_b5_coco.py](https://github.com/alibaba/EasyCV/tree/master/configs/segmentation/segformer/segformer_b5_coco.py) | 81.4M/85.7M | 99.2ms | 46.75 | [model](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/damo/modelzoo/segmentation/segformer/segformer_b5/SegmentationEvaluator_mIoU_best.pth) - [log](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/damo/modelzoo/segmentation/segformer/segformer_b5/20220812_144336.log.json) |