diff --git a/configs/_base_/datasets/ade20k.py b/configs/_base_/datasets/ade20k.py index f863404ea..31ac64fdf 100644 --- a/configs/_base_/datasets/ade20k.py +++ b/configs/_base_/datasets/ade20k.py @@ -1,8 +1,6 @@ # dataset settings dataset_type = 'ADE20KDataset' data_root = 'data/ade/ADEChallengeData2016' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) crop_size = (512, 512) train_pipeline = [ dict(type='LoadImageFromFile'), @@ -11,7 +9,6 @@ train_pipeline = [ dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), dict(type='RandomFlip', prob=0.5), dict(type='PhotoMetricDistortion'), - dict(type='Pad', size=crop_size), dict(type='PackSegInputs') ] test_pipeline = [ diff --git a/configs/_base_/datasets/ade20k_640x640.py b/configs/_base_/datasets/ade20k_640x640.py index ec7c5de89..47e75fd66 100644 --- a/configs/_base_/datasets/ade20k_640x640.py +++ b/configs/_base_/datasets/ade20k_640x640.py @@ -1,8 +1,6 @@ # dataset settings dataset_type = 'ADE20KDataset' data_root = 'data/ade/ADEChallengeData2016' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) crop_size = (640, 640) train_pipeline = [ dict(type='LoadImageFromFile'), @@ -11,7 +9,6 @@ train_pipeline = [ dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), dict(type='RandomFlip', prob=0.5), dict(type='PhotoMetricDistortion'), - dict(type='Pad', size=crop_size), dict(type='PackSegInputs') ] test_pipeline = [ diff --git a/configs/_base_/datasets/chase_db1.py b/configs/_base_/datasets/chase_db1.py index 8f917e918..9b861861b 100644 --- a/configs/_base_/datasets/chase_db1.py +++ b/configs/_base_/datasets/chase_db1.py @@ -1,8 +1,6 @@ # dataset settings dataset_type = 'ChaseDB1Dataset' data_root = 'data/CHASE_DB1' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) img_scale = (960, 999) crop_size = (128, 128) train_pipeline = [ @@ -12,7 +10,6 @@ train_pipeline = [ dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), dict(type='RandomFlip', prob=0.5), dict(type='PhotoMetricDistortion'), - dict(type='Pad', size=crop_size), dict(type='PackSegInputs') ] test_pipeline = [ diff --git a/configs/_base_/datasets/cityscapes.py b/configs/_base_/datasets/cityscapes.py index 32bcd450f..5d314aa3f 100644 --- a/configs/_base_/datasets/cityscapes.py +++ b/configs/_base_/datasets/cityscapes.py @@ -1,8 +1,6 @@ # dataset settings dataset_type = 'CityscapesDataset' data_root = 'data/cityscapes/' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) crop_size = (512, 1024) train_pipeline = [ dict(type='LoadImageFromFile'), @@ -11,7 +9,6 @@ train_pipeline = [ dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), dict(type='RandomFlip', prob=0.5), dict(type='PhotoMetricDistortion'), - dict(type='Pad', size=crop_size), dict(type='PackSegInputs') ] test_pipeline = [ diff --git a/configs/_base_/datasets/cityscapes_1024x1024.py b/configs/_base_/datasets/cityscapes_1024x1024.py index 3c79537bf..08a352723 100644 --- a/configs/_base_/datasets/cityscapes_1024x1024.py +++ b/configs/_base_/datasets/cityscapes_1024x1024.py @@ -1,6 +1,4 @@ _base_ = './cityscapes.py' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) crop_size = (1024, 1024) train_pipeline = [ dict(type='LoadImageFromFile'), @@ -9,7 +7,6 @@ train_pipeline = [ dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), dict(type='RandomFlip', prob=0.5), dict(type='PhotoMetricDistortion'), - dict(type='Pad', size=crop_size), dict(type='PackSegInputs') ] test_pipeline = [ diff --git a/configs/_base_/datasets/cityscapes_768x768.py b/configs/_base_/datasets/cityscapes_768x768.py index bb012a70a..818644f18 100644 --- a/configs/_base_/datasets/cityscapes_768x768.py +++ b/configs/_base_/datasets/cityscapes_768x768.py @@ -1,6 +1,4 @@ _base_ = './cityscapes.py' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) crop_size = (768, 768) train_pipeline = [ dict(type='LoadImageFromFile'), @@ -9,7 +7,6 @@ train_pipeline = [ dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), dict(type='RandomFlip', prob=0.5), dict(type='PhotoMetricDistortion'), - dict(type='Pad', size=crop_size), dict(type='PackSegInputs') ] test_pipeline = [ diff --git a/configs/_base_/datasets/cityscapes_769x769.py b/configs/_base_/datasets/cityscapes_769x769.py index 44a329ac9..ed3cfef71 100644 --- a/configs/_base_/datasets/cityscapes_769x769.py +++ b/configs/_base_/datasets/cityscapes_769x769.py @@ -1,6 +1,4 @@ _base_ = './cityscapes.py' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) crop_size = (769, 769) train_pipeline = [ dict(type='LoadImageFromFile'), @@ -9,7 +7,6 @@ train_pipeline = [ dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), dict(type='RandomFlip', prob=0.5), dict(type='PhotoMetricDistortion'), - dict(type='Pad', size=crop_size), dict(type='PackSegInputs') ] test_pipeline = [ diff --git a/configs/_base_/datasets/cityscapes_832x832.py b/configs/_base_/datasets/cityscapes_832x832.py index 03fdc4d79..0f9b5815a 100644 --- a/configs/_base_/datasets/cityscapes_832x832.py +++ b/configs/_base_/datasets/cityscapes_832x832.py @@ -1,6 +1,4 @@ _base_ = './cityscapes.py' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) crop_size = (832, 832) train_pipeline = [ dict(type='LoadImageFromFile'), @@ -9,7 +7,6 @@ train_pipeline = [ dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), dict(type='RandomFlip', prob=0.5), dict(type='PhotoMetricDistortion'), - dict(type='Pad', size=crop_size), dict(type='PackSegInputs') ] test_pipeline = [ diff --git a/configs/_base_/datasets/coco-stuff10k.py b/configs/_base_/datasets/coco-stuff10k.py index b660e6867..d617a7241 100644 --- a/configs/_base_/datasets/coco-stuff10k.py +++ b/configs/_base_/datasets/coco-stuff10k.py @@ -1,8 +1,6 @@ # dataset settings dataset_type = 'COCOStuffDataset' data_root = 'data/coco_stuff10k' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) crop_size = (512, 512) train_pipeline = [ dict(type='LoadImageFromFile'), @@ -11,7 +9,6 @@ train_pipeline = [ dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), dict(type='RandomFlip', prob=0.5), dict(type='PhotoMetricDistortion'), - dict(type='Pad', size=crop_size), dict(type='PackSegInputs') ] test_pipeline = [ diff --git a/configs/_base_/datasets/coco-stuff164k.py b/configs/_base_/datasets/coco-stuff164k.py index 9ea2e20a5..01c55ae8a 100644 --- a/configs/_base_/datasets/coco-stuff164k.py +++ b/configs/_base_/datasets/coco-stuff164k.py @@ -1,8 +1,6 @@ # dataset settings dataset_type = 'COCOStuffDataset' data_root = 'data/coco_stuff164k' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) crop_size = (512, 512) train_pipeline = [ dict(type='LoadImageFromFile'), @@ -11,7 +9,6 @@ train_pipeline = [ dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), dict(type='RandomFlip', prob=0.5), dict(type='PhotoMetricDistortion'), - dict(type='Pad', size=crop_size), dict(type='PackSegInputs') ] test_pipeline = [ diff --git a/configs/_base_/datasets/drive.py b/configs/_base_/datasets/drive.py index bac4a67eb..85ee6accb 100644 --- a/configs/_base_/datasets/drive.py +++ b/configs/_base_/datasets/drive.py @@ -1,8 +1,6 @@ # dataset settings dataset_type = 'DRIVEDataset' data_root = 'data/DRIVE' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) img_scale = (584, 565) crop_size = (64, 64) train_pipeline = [ @@ -12,7 +10,6 @@ train_pipeline = [ dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), dict(type='RandomFlip', prob=0.5), dict(type='PhotoMetricDistortion'), - dict(type='Pad', size=crop_size), dict(type='PackSegInputs') ] test_pipeline = [ diff --git a/configs/_base_/datasets/hrf.py b/configs/_base_/datasets/hrf.py index b9e6b894a..1401d8f5a 100644 --- a/configs/_base_/datasets/hrf.py +++ b/configs/_base_/datasets/hrf.py @@ -1,8 +1,6 @@ # dataset settings dataset_type = 'HRFDataset' data_root = 'data/HRF' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) img_scale = (2336, 3504) crop_size = (256, 256) train_pipeline = [ @@ -12,7 +10,6 @@ train_pipeline = [ dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), dict(type='RandomFlip', prob=0.5), dict(type='PhotoMetricDistortion'), - dict(type='Pad', size=crop_size), dict(type='PackSegInputs') ] test_pipeline = [ diff --git a/configs/_base_/datasets/isaid.py b/configs/_base_/datasets/isaid.py index 2f15fac10..25b01c065 100644 --- a/configs/_base_/datasets/isaid.py +++ b/configs/_base_/datasets/isaid.py @@ -1,9 +1,6 @@ # dataset settings dataset_type = 'iSAIDDataset' data_root = 'data/iSAID' - -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) """ This crop_size setting is followed by the implementation of `PointFlow: Flowing Semantics Through Points for Aerial Image @@ -19,7 +16,6 @@ train_pipeline = [ dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), dict(type='RandomFlip', prob=0.5), dict(type='PhotoMetricDistortion'), - dict(type='Pad', size=crop_size), dict(type='PackSegInputs') ] test_pipeline = [ diff --git a/configs/_base_/datasets/loveda.py b/configs/_base_/datasets/loveda.py index 1df7a4d65..73b754ea3 100644 --- a/configs/_base_/datasets/loveda.py +++ b/configs/_base_/datasets/loveda.py @@ -1,8 +1,6 @@ # dataset settings dataset_type = 'LoveDADataset' data_root = 'data/loveDA' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) crop_size = (512, 512) train_pipeline = [ dict(type='LoadImageFromFile'), @@ -11,7 +9,6 @@ train_pipeline = [ dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), dict(type='RandomFlip', prob=0.5), dict(type='PhotoMetricDistortion'), - dict(type='Pad', size=crop_size), dict(type='PackSegInputs') ] test_pipeline = [ diff --git a/configs/_base_/datasets/pascal_context.py b/configs/_base_/datasets/pascal_context.py index e05c59fef..096e55599 100644 --- a/configs/_base_/datasets/pascal_context.py +++ b/configs/_base_/datasets/pascal_context.py @@ -1,8 +1,6 @@ # dataset settings dataset_type = 'PascalContextDataset' data_root = 'data/VOCdevkit/VOC2010/' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) img_scale = (520, 520) crop_size = (480, 480) @@ -14,7 +12,6 @@ train_pipeline = [ dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), dict(type='RandomFlip', prob=0.5), dict(type='PhotoMetricDistortion'), - dict(type='Pad', size=crop_size), dict(type='PackSegInputs') ] test_pipeline = [ diff --git a/configs/_base_/datasets/pascal_context_59.py b/configs/_base_/datasets/pascal_context_59.py index 58cde6cc7..21d36d0dc 100644 --- a/configs/_base_/datasets/pascal_context_59.py +++ b/configs/_base_/datasets/pascal_context_59.py @@ -1,8 +1,6 @@ # dataset settings dataset_type = 'PascalContextDataset59' data_root = 'data/VOCdevkit/VOC2010/' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) img_scale = (520, 520) crop_size = (480, 480) @@ -14,7 +12,6 @@ train_pipeline = [ dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), dict(type='RandomFlip', prob=0.5), dict(type='PhotoMetricDistortion'), - dict(type='Pad', size=crop_size), dict(type='PackSegInputs') ] test_pipeline = [ diff --git a/configs/_base_/datasets/pascal_voc12.py b/configs/_base_/datasets/pascal_voc12.py index a8611b50a..b45d7db75 100644 --- a/configs/_base_/datasets/pascal_voc12.py +++ b/configs/_base_/datasets/pascal_voc12.py @@ -1,8 +1,6 @@ # dataset settings dataset_type = 'PascalVOCDataset' data_root = 'data/VOCdevkit/VOC2012' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) crop_size = (512, 512) train_pipeline = [ dict(type='LoadImageFromFile'), @@ -11,7 +9,6 @@ train_pipeline = [ dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), dict(type='RandomFlip', prob=0.5), dict(type='PhotoMetricDistortion'), - dict(type='Pad', size=crop_size), dict(type='PackSegInputs') ] test_pipeline = [ diff --git a/configs/_base_/datasets/potsdam.py b/configs/_base_/datasets/potsdam.py index ef7d9ef03..2ff9d02a1 100644 --- a/configs/_base_/datasets/potsdam.py +++ b/configs/_base_/datasets/potsdam.py @@ -1,8 +1,6 @@ # dataset settings dataset_type = 'PotsdamDataset' data_root = 'data/potsdam' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) crop_size = (512, 512) train_pipeline = [ dict(type='LoadImageFromFile'), @@ -11,7 +9,6 @@ train_pipeline = [ dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), dict(type='RandomFlip', prob=0.5), dict(type='PhotoMetricDistortion'), - dict(type='Pad', size=crop_size), dict(type='PackSegInputs') ] test_pipeline = [ diff --git a/configs/_base_/datasets/stare.py b/configs/_base_/datasets/stare.py index 4c91c8c54..64284d92e 100644 --- a/configs/_base_/datasets/stare.py +++ b/configs/_base_/datasets/stare.py @@ -1,8 +1,6 @@ # dataset settings dataset_type = 'STAREDataset' data_root = 'data/STARE' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) img_scale = (605, 700) crop_size = (128, 128) train_pipeline = [ @@ -12,7 +10,6 @@ train_pipeline = [ dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), dict(type='RandomFlip', prob=0.5), dict(type='PhotoMetricDistortion'), - dict(type='Pad', size=crop_size), dict(type='PackSegInputs') ] test_pipeline = [ diff --git a/configs/_base_/datasets/vaihingen.py b/configs/_base_/datasets/vaihingen.py index f564f5667..96874bdda 100644 --- a/configs/_base_/datasets/vaihingen.py +++ b/configs/_base_/datasets/vaihingen.py @@ -1,8 +1,6 @@ # dataset settings dataset_type = 'ISPRSDataset' data_root = 'data/vaihingen' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) crop_size = (512, 512) train_pipeline = [ dict(type='LoadImageFromFile'), @@ -11,7 +9,6 @@ train_pipeline = [ dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), dict(type='RandomFlip', prob=0.5), dict(type='PhotoMetricDistortion'), - dict(type='Pad', size=crop_size), dict(type='PackSegInputs') ] test_pipeline = [ diff --git a/configs/_base_/models/ann_r50-d8.py b/configs/_base_/models/ann_r50-d8.py index a2cb65382..b02526066 100644 --- a/configs/_base_/models/ann_r50-d8.py +++ b/configs/_base_/models/ann_r50-d8.py @@ -1,7 +1,14 @@ # model settings norm_cfg = dict(type='SyncBN', requires_grad=True) +preprocess_cfg = dict( + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + to_rgb=True, + pad_val=0, + seg_pad_val=255) model = dict( type='EncoderDecoder', + preprocess_cfg=preprocess_cfg, pretrained='open-mmlab://resnet50_v1c', backbone=dict( type='ResNetV1c', diff --git a/configs/_base_/models/apcnet_r50-d8.py b/configs/_base_/models/apcnet_r50-d8.py index c8f5316cb..e321f61dd 100644 --- a/configs/_base_/models/apcnet_r50-d8.py +++ b/configs/_base_/models/apcnet_r50-d8.py @@ -1,7 +1,14 @@ # model settings norm_cfg = dict(type='SyncBN', requires_grad=True) +preprocess_cfg = dict( + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + to_rgb=True, + pad_val=0, + seg_pad_val=255) model = dict( type='EncoderDecoder', + preprocess_cfg=preprocess_cfg, pretrained='open-mmlab://resnet50_v1c', backbone=dict( type='ResNetV1c', diff --git a/configs/_base_/models/bisenetv1_r18-d32.py b/configs/_base_/models/bisenetv1_r18-d32.py index 40698644b..4facee139 100644 --- a/configs/_base_/models/bisenetv1_r18-d32.py +++ b/configs/_base_/models/bisenetv1_r18-d32.py @@ -1,7 +1,14 @@ # model settings norm_cfg = dict(type='SyncBN', requires_grad=True) +preprocess_cfg = dict( + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + to_rgb=True, + pad_val=0, + seg_pad_val=255) model = dict( type='EncoderDecoder', + preprocess_cfg=preprocess_cfg, backbone=dict( type='BiSeNetV1', in_channels=3, diff --git a/configs/_base_/models/bisenetv2.py b/configs/_base_/models/bisenetv2.py index f8fffeeca..8be0a1779 100644 --- a/configs/_base_/models/bisenetv2.py +++ b/configs/_base_/models/bisenetv2.py @@ -1,7 +1,14 @@ # model settings norm_cfg = dict(type='SyncBN', requires_grad=True) +preprocess_cfg = dict( + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + to_rgb=True, + pad_val=0, + seg_pad_val=255) model = dict( type='EncoderDecoder', + preprocess_cfg=preprocess_cfg, pretrained=None, backbone=dict( type='BiSeNetV2', diff --git a/configs/_base_/models/ccnet_r50-d8.py b/configs/_base_/models/ccnet_r50-d8.py index 794148f57..47ceda4bd 100644 --- a/configs/_base_/models/ccnet_r50-d8.py +++ b/configs/_base_/models/ccnet_r50-d8.py @@ -1,7 +1,14 @@ # model settings norm_cfg = dict(type='SyncBN', requires_grad=True) +preprocess_cfg = dict( + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + to_rgb=True, + pad_val=0, + seg_pad_val=255) model = dict( type='EncoderDecoder', + preprocess_cfg=preprocess_cfg, pretrained='open-mmlab://resnet50_v1c', backbone=dict( type='ResNetV1c', diff --git a/configs/_base_/models/cgnet.py b/configs/_base_/models/cgnet.py index eff8d9458..555725f7d 100644 --- a/configs/_base_/models/cgnet.py +++ b/configs/_base_/models/cgnet.py @@ -1,7 +1,14 @@ # model settings norm_cfg = dict(type='SyncBN', eps=1e-03, requires_grad=True) +preprocess_cfg = dict( + mean=[72.39239876, 82.90891754, 73.15835921], + std=[1, 1, 1], + to_rgb=True, + pad_val=0, + seg_pad_val=255) model = dict( type='EncoderDecoder', + preprocess_cfg=preprocess_cfg, backbone=dict( type='CGNet', norm_cfg=norm_cfg, diff --git a/configs/_base_/models/danet_r50-d8.py b/configs/_base_/models/danet_r50-d8.py index 2c934939f..af4e3923d 100644 --- a/configs/_base_/models/danet_r50-d8.py +++ b/configs/_base_/models/danet_r50-d8.py @@ -1,7 +1,14 @@ # model settings norm_cfg = dict(type='SyncBN', requires_grad=True) +preprocess_cfg = dict( + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + to_rgb=True, + pad_val=0, + seg_pad_val=255) model = dict( type='EncoderDecoder', + preprocess_cfg=preprocess_cfg, pretrained='open-mmlab://resnet50_v1c', backbone=dict( type='ResNetV1c', diff --git a/configs/_base_/models/deeplabv3_r50-d8.py b/configs/_base_/models/deeplabv3_r50-d8.py index d7a43bee0..746e528c1 100644 --- a/configs/_base_/models/deeplabv3_r50-d8.py +++ b/configs/_base_/models/deeplabv3_r50-d8.py @@ -1,7 +1,14 @@ # model settings norm_cfg = dict(type='SyncBN', requires_grad=True) +preprocess_cfg = dict( + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + to_rgb=True, + pad_val=0, + seg_pad_val=255) model = dict( type='EncoderDecoder', + preprocess_cfg=preprocess_cfg, pretrained='open-mmlab://resnet50_v1c', backbone=dict( type='ResNetV1c', diff --git a/configs/_base_/models/deeplabv3_unet_s5-d16.py b/configs/_base_/models/deeplabv3_unet_s5-d16.py index 0cd262999..8cbd64574 100644 --- a/configs/_base_/models/deeplabv3_unet_s5-d16.py +++ b/configs/_base_/models/deeplabv3_unet_s5-d16.py @@ -1,7 +1,14 @@ # model settings norm_cfg = dict(type='SyncBN', requires_grad=True) +preprocess_cfg = dict( + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + to_rgb=True, + pad_val=0, + seg_pad_val=255) model = dict( type='EncoderDecoder', + preprocess_cfg=preprocess_cfg, pretrained=None, backbone=dict( type='UNet', diff --git a/configs/_base_/models/deeplabv3plus_r50-d8.py b/configs/_base_/models/deeplabv3plus_r50-d8.py index 050e39e09..5c30fecbf 100644 --- a/configs/_base_/models/deeplabv3plus_r50-d8.py +++ b/configs/_base_/models/deeplabv3plus_r50-d8.py @@ -1,7 +1,14 @@ # model settings norm_cfg = dict(type='SyncBN', requires_grad=True) +preprocess_cfg = dict( + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + to_rgb=True, + pad_val=0, + seg_pad_val=255) model = dict( type='EncoderDecoder', + preprocess_cfg=preprocess_cfg, pretrained='open-mmlab://resnet50_v1c', backbone=dict( type='ResNetV1c', diff --git a/configs/_base_/models/dmnet_r50-d8.py b/configs/_base_/models/dmnet_r50-d8.py index d22ba5264..105b53417 100644 --- a/configs/_base_/models/dmnet_r50-d8.py +++ b/configs/_base_/models/dmnet_r50-d8.py @@ -1,7 +1,14 @@ # model settings norm_cfg = dict(type='SyncBN', requires_grad=True) +preprocess_cfg = dict( + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + to_rgb=True, + pad_val=0, + seg_pad_val=255) model = dict( type='EncoderDecoder', + preprocess_cfg=preprocess_cfg, pretrained='open-mmlab://resnet50_v1c', backbone=dict( type='ResNetV1c', diff --git a/configs/_base_/models/dnl_r50-d8.py b/configs/_base_/models/dnl_r50-d8.py index edb4c174c..bb2361e3d 100644 --- a/configs/_base_/models/dnl_r50-d8.py +++ b/configs/_base_/models/dnl_r50-d8.py @@ -1,7 +1,14 @@ # model settings norm_cfg = dict(type='SyncBN', requires_grad=True) +preprocess_cfg = dict( + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + to_rgb=True, + pad_val=0, + seg_pad_val=255) model = dict( type='EncoderDecoder', + preprocess_cfg=preprocess_cfg, pretrained='open-mmlab://resnet50_v1c', backbone=dict( type='ResNetV1c', diff --git a/configs/_base_/models/dpt_vit-b16.py b/configs/_base_/models/dpt_vit-b16.py index dfd48a95f..e6a539e4e 100644 --- a/configs/_base_/models/dpt_vit-b16.py +++ b/configs/_base_/models/dpt_vit-b16.py @@ -1,6 +1,13 @@ norm_cfg = dict(type='SyncBN', requires_grad=True) +preprocess_cfg = dict( + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + to_rgb=True, + pad_val=0, + seg_pad_val=255) model = dict( type='EncoderDecoder', + preprocess_cfg=preprocess_cfg, pretrained='pretrain/vit-b16_p16_224-80ecf9dd.pth', # noqa backbone=dict( type='VisionTransformer', diff --git a/configs/_base_/models/emanet_r50-d8.py b/configs/_base_/models/emanet_r50-d8.py index 26adcd430..13bb0c620 100644 --- a/configs/_base_/models/emanet_r50-d8.py +++ b/configs/_base_/models/emanet_r50-d8.py @@ -1,7 +1,14 @@ # model settings norm_cfg = dict(type='SyncBN', requires_grad=True) +preprocess_cfg = dict( + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + to_rgb=True, + pad_val=0, + seg_pad_val=255) model = dict( type='EncoderDecoder', + preprocess_cfg=preprocess_cfg, pretrained='open-mmlab://resnet50_v1c', backbone=dict( type='ResNetV1c', diff --git a/configs/_base_/models/encnet_r50-d8.py b/configs/_base_/models/encnet_r50-d8.py index be777123a..0313d860f 100644 --- a/configs/_base_/models/encnet_r50-d8.py +++ b/configs/_base_/models/encnet_r50-d8.py @@ -1,7 +1,14 @@ # model settings norm_cfg = dict(type='SyncBN', requires_grad=True) +preprocess_cfg = dict( + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + to_rgb=True, + pad_val=0, + seg_pad_val=255) model = dict( type='EncoderDecoder', + preprocess_cfg=preprocess_cfg, pretrained='open-mmlab://resnet50_v1c', backbone=dict( type='ResNetV1c', diff --git a/configs/_base_/models/erfnet_fcn.py b/configs/_base_/models/erfnet_fcn.py index 7f2e9bff8..e2283ddac 100644 --- a/configs/_base_/models/erfnet_fcn.py +++ b/configs/_base_/models/erfnet_fcn.py @@ -1,7 +1,14 @@ # model settings norm_cfg = dict(type='SyncBN', requires_grad=True) +preprocess_cfg = dict( + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + to_rgb=True, + pad_val=0, + seg_pad_val=255) model = dict( type='EncoderDecoder', + preprocess_cfg=preprocess_cfg, pretrained=None, backbone=dict( type='ERFNet', diff --git a/configs/_base_/models/fast_scnn.py b/configs/_base_/models/fast_scnn.py index 8e89d911d..6c9c2fe12 100644 --- a/configs/_base_/models/fast_scnn.py +++ b/configs/_base_/models/fast_scnn.py @@ -1,7 +1,14 @@ # model settings norm_cfg = dict(type='SyncBN', requires_grad=True, momentum=0.01) +preprocess_cfg = dict( + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + to_rgb=True, + pad_val=0, + seg_pad_val=255) model = dict( type='EncoderDecoder', + preprocess_cfg=preprocess_cfg, backbone=dict( type='FastSCNN', downsample_dw_channels=(32, 48), diff --git a/configs/_base_/models/fastfcn_r50-d32_jpu_psp.py b/configs/_base_/models/fastfcn_r50-d32_jpu_psp.py index 9dc8609ae..fe4a93488 100644 --- a/configs/_base_/models/fastfcn_r50-d32_jpu_psp.py +++ b/configs/_base_/models/fastfcn_r50-d32_jpu_psp.py @@ -1,7 +1,14 @@ # model settings norm_cfg = dict(type='SyncBN', requires_grad=True) +preprocess_cfg = dict( + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + to_rgb=True, + pad_val=0, + seg_pad_val=255) model = dict( type='EncoderDecoder', + preprocess_cfg=preprocess_cfg, pretrained='open-mmlab://resnet50_v1c', backbone=dict( type='ResNetV1c', diff --git a/configs/_base_/models/fcn_hr18.py b/configs/_base_/models/fcn_hr18.py index c3e299bc8..1c77e7df7 100644 --- a/configs/_base_/models/fcn_hr18.py +++ b/configs/_base_/models/fcn_hr18.py @@ -1,7 +1,14 @@ # model settings norm_cfg = dict(type='SyncBN', requires_grad=True) +preprocess_cfg = dict( + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + to_rgb=True, + pad_val=0, + seg_pad_val=255) model = dict( type='EncoderDecoder', + preprocess_cfg=preprocess_cfg, pretrained='open-mmlab://msra/hrnetv2_w18', backbone=dict( type='HRNet', diff --git a/configs/_base_/models/fcn_r50-d8.py b/configs/_base_/models/fcn_r50-d8.py index 5e98f6cc9..205ec76e9 100644 --- a/configs/_base_/models/fcn_r50-d8.py +++ b/configs/_base_/models/fcn_r50-d8.py @@ -1,7 +1,14 @@ # model settings norm_cfg = dict(type='SyncBN', requires_grad=True) +preprocess_cfg = dict( + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + to_rgb=True, + pad_val=0, + seg_pad_val=255) model = dict( type='EncoderDecoder', + preprocess_cfg=preprocess_cfg, pretrained='open-mmlab://resnet50_v1c', backbone=dict( type='ResNetV1c', diff --git a/configs/_base_/models/fcn_unet_s5-d16.py b/configs/_base_/models/fcn_unet_s5-d16.py index a33e79728..c40fe6df2 100644 --- a/configs/_base_/models/fcn_unet_s5-d16.py +++ b/configs/_base_/models/fcn_unet_s5-d16.py @@ -1,7 +1,14 @@ # model settings norm_cfg = dict(type='SyncBN', requires_grad=True) +preprocess_cfg = dict( + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + to_rgb=True, + pad_val=0, + seg_pad_val=255) model = dict( type='EncoderDecoder', + preprocess_cfg=preprocess_cfg, pretrained=None, backbone=dict( type='UNet', diff --git a/configs/_base_/models/fpn_r50.py b/configs/_base_/models/fpn_r50.py index 86ab327db..c786dfc3a 100644 --- a/configs/_base_/models/fpn_r50.py +++ b/configs/_base_/models/fpn_r50.py @@ -1,7 +1,14 @@ # model settings norm_cfg = dict(type='SyncBN', requires_grad=True) +preprocess_cfg = dict( + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + to_rgb=True, + pad_val=0, + seg_pad_val=255) model = dict( type='EncoderDecoder', + preprocess_cfg=preprocess_cfg, pretrained='open-mmlab://resnet50_v1c', backbone=dict( type='ResNetV1c', diff --git a/configs/_base_/models/gcnet_r50-d8.py b/configs/_base_/models/gcnet_r50-d8.py index 3d2ad69f5..62680914b 100644 --- a/configs/_base_/models/gcnet_r50-d8.py +++ b/configs/_base_/models/gcnet_r50-d8.py @@ -1,7 +1,14 @@ # model settings norm_cfg = dict(type='SyncBN', requires_grad=True) +preprocess_cfg = dict( + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + to_rgb=True, + pad_val=0, + seg_pad_val=255) model = dict( type='EncoderDecoder', + preprocess_cfg=preprocess_cfg, pretrained='open-mmlab://resnet50_v1c', backbone=dict( type='ResNetV1c', diff --git a/configs/_base_/models/icnet_r50-d8.py b/configs/_base_/models/icnet_r50-d8.py index d7273cd28..7462a700c 100644 --- a/configs/_base_/models/icnet_r50-d8.py +++ b/configs/_base_/models/icnet_r50-d8.py @@ -1,7 +1,14 @@ # model settings norm_cfg = dict(type='SyncBN', requires_grad=True) +preprocess_cfg = dict( + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + to_rgb=True, + pad_val=0, + seg_pad_val=255) model = dict( type='EncoderDecoder', + preprocess_cfg=preprocess_cfg, backbone=dict( type='ICNet', backbone_cfg=dict( diff --git a/configs/_base_/models/isanet_r50-d8.py b/configs/_base_/models/isanet_r50-d8.py index c0221a371..69161dbd7 100644 --- a/configs/_base_/models/isanet_r50-d8.py +++ b/configs/_base_/models/isanet_r50-d8.py @@ -1,7 +1,14 @@ # model settings norm_cfg = dict(type='SyncBN', requires_grad=True) +preprocess_cfg = dict( + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + to_rgb=True, + pad_val=0, + seg_pad_val=255) model = dict( type='EncoderDecoder', + preprocess_cfg=preprocess_cfg, pretrained='open-mmlab://resnet50_v1c', backbone=dict( type='ResNetV1c', diff --git a/configs/_base_/models/lraspp_m-v3-d8.py b/configs/_base_/models/lraspp_m-v3-d8.py index 93258242a..bcd02b398 100644 --- a/configs/_base_/models/lraspp_m-v3-d8.py +++ b/configs/_base_/models/lraspp_m-v3-d8.py @@ -1,7 +1,14 @@ # model settings norm_cfg = dict(type='SyncBN', eps=0.001, requires_grad=True) +preprocess_cfg = dict( + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + to_rgb=True, + pad_val=0, + seg_pad_val=255) model = dict( type='EncoderDecoder', + preprocess_cfg=preprocess_cfg, backbone=dict( type='MobileNetV3', arch='large', diff --git a/configs/_base_/models/nonlocal_r50-d8.py b/configs/_base_/models/nonlocal_r50-d8.py index 5674a3985..3943a3203 100644 --- a/configs/_base_/models/nonlocal_r50-d8.py +++ b/configs/_base_/models/nonlocal_r50-d8.py @@ -1,7 +1,14 @@ # model settings norm_cfg = dict(type='SyncBN', requires_grad=True) +preprocess_cfg = dict( + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + to_rgb=True, + pad_val=0, + seg_pad_val=255) model = dict( type='EncoderDecoder', + preprocess_cfg=preprocess_cfg, pretrained='open-mmlab://resnet50_v1c', backbone=dict( type='ResNetV1c', diff --git a/configs/_base_/models/ocrnet_hr18.py b/configs/_base_/models/ocrnet_hr18.py index c60f62a7c..91b40f4ad 100644 --- a/configs/_base_/models/ocrnet_hr18.py +++ b/configs/_base_/models/ocrnet_hr18.py @@ -1,7 +1,14 @@ # model settings norm_cfg = dict(type='SyncBN', requires_grad=True) +preprocess_cfg = dict( + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + to_rgb=True, + pad_val=0, + seg_pad_val=255) model = dict( type='CascadeEncoderDecoder', + preprocess_cfg=preprocess_cfg, num_stages=2, pretrained='open-mmlab://msra/hrnetv2_w18', backbone=dict( diff --git a/configs/_base_/models/ocrnet_r50-d8.py b/configs/_base_/models/ocrnet_r50-d8.py index 615aa3ff7..4f7da7463 100644 --- a/configs/_base_/models/ocrnet_r50-d8.py +++ b/configs/_base_/models/ocrnet_r50-d8.py @@ -1,7 +1,14 @@ # model settings norm_cfg = dict(type='SyncBN', requires_grad=True) +preprocess_cfg = dict( + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + to_rgb=True, + pad_val=0, + seg_pad_val=255) model = dict( type='CascadeEncoderDecoder', + preprocess_cfg=preprocess_cfg, num_stages=2, pretrained='open-mmlab://resnet50_v1c', backbone=dict( diff --git a/configs/_base_/models/pointrend_r50.py b/configs/_base_/models/pointrend_r50.py index 9d323dbf9..a0d861398 100644 --- a/configs/_base_/models/pointrend_r50.py +++ b/configs/_base_/models/pointrend_r50.py @@ -1,7 +1,14 @@ # model settings norm_cfg = dict(type='SyncBN', requires_grad=True) +preprocess_cfg = dict( + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + to_rgb=True, + pad_val=0, + seg_pad_val=255) model = dict( type='CascadeEncoderDecoder', + preprocess_cfg=preprocess_cfg, num_stages=2, pretrained='open-mmlab://resnet50_v1c', backbone=dict( diff --git a/configs/_base_/models/psanet_r50-d8.py b/configs/_base_/models/psanet_r50-d8.py index 689513fa9..d55435460 100644 --- a/configs/_base_/models/psanet_r50-d8.py +++ b/configs/_base_/models/psanet_r50-d8.py @@ -1,7 +1,14 @@ # model settings norm_cfg = dict(type='SyncBN', requires_grad=True) +preprocess_cfg = dict( + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + to_rgb=True, + pad_val=0, + seg_pad_val=255) model = dict( type='EncoderDecoder', + preprocess_cfg=preprocess_cfg, pretrained='open-mmlab://resnet50_v1c', backbone=dict( type='ResNetV1c', diff --git a/configs/_base_/models/pspnet_r50-d8.py b/configs/_base_/models/pspnet_r50-d8.py index f451e08ad..0fa2bdb21 100644 --- a/configs/_base_/models/pspnet_r50-d8.py +++ b/configs/_base_/models/pspnet_r50-d8.py @@ -1,7 +1,14 @@ # model settings norm_cfg = dict(type='SyncBN', requires_grad=True) +preprocess_cfg = dict( + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + to_rgb=True, + pad_val=0, + seg_pad_val=255) model = dict( type='EncoderDecoder', + preprocess_cfg=preprocess_cfg, pretrained='open-mmlab://resnet50_v1c', backbone=dict( type='ResNetV1c', diff --git a/configs/_base_/models/pspnet_unet_s5-d16.py b/configs/_base_/models/pspnet_unet_s5-d16.py index fcff9ec4f..46f6bded7 100644 --- a/configs/_base_/models/pspnet_unet_s5-d16.py +++ b/configs/_base_/models/pspnet_unet_s5-d16.py @@ -1,7 +1,14 @@ # model settings norm_cfg = dict(type='SyncBN', requires_grad=True) +preprocess_cfg = dict( + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + to_rgb=True, + pad_val=0, + seg_pad_val=255) model = dict( type='EncoderDecoder', + preprocess_cfg=preprocess_cfg, pretrained=None, backbone=dict( type='UNet', diff --git a/configs/_base_/models/segformer_mit-b0.py b/configs/_base_/models/segformer_mit-b0.py index 5b3e07331..63d9d14db 100644 --- a/configs/_base_/models/segformer_mit-b0.py +++ b/configs/_base_/models/segformer_mit-b0.py @@ -1,7 +1,14 @@ # model settings norm_cfg = dict(type='SyncBN', requires_grad=True) +preprocess_cfg = dict( + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + to_rgb=True, + pad_val=0, + seg_pad_val=255) model = dict( type='EncoderDecoder', + preprocess_cfg=preprocess_cfg, pretrained=None, backbone=dict( type='MixVisionTransformer', diff --git a/configs/_base_/models/segmenter_vit-b16_mask.py b/configs/_base_/models/segmenter_vit-b16_mask.py index 622f1228a..45131cfb0 100644 --- a/configs/_base_/models/segmenter_vit-b16_mask.py +++ b/configs/_base_/models/segmenter_vit-b16_mask.py @@ -1,8 +1,15 @@ checkpoint = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/segmenter/vit_base_p16_384_20220308-96dfe169.pth' # noqa # model settings backbone_norm_cfg = dict(type='LN', eps=1e-6, requires_grad=True) +preprocess_cfg = dict( + mean=[127.5, 127.5, 127.5], + std=[127.5, 127.5, 127.5], + to_rgb=True, + pad_val=0, + seg_pad_val=255) model = dict( type='EncoderDecoder', + preprocess_cfg=preprocess_cfg, pretrained=checkpoint, backbone=dict( type='VisionTransformer', diff --git a/configs/_base_/models/setr_mla.py b/configs/_base_/models/setr_mla.py index af4ba2492..420f8a0da 100644 --- a/configs/_base_/models/setr_mla.py +++ b/configs/_base_/models/setr_mla.py @@ -1,8 +1,15 @@ # model settings backbone_norm_cfg = dict(type='LN', eps=1e-6, requires_grad=True) norm_cfg = dict(type='SyncBN', requires_grad=True) +preprocess_cfg = dict( + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + to_rgb=True, + pad_val=0, + seg_pad_val=255) model = dict( type='EncoderDecoder', + preprocess_cfg=preprocess_cfg, pretrained='pretrain/jx_vit_large_p16_384-b3be5167.pth', backbone=dict( type='VisionTransformer', diff --git a/configs/_base_/models/setr_naive.py b/configs/_base_/models/setr_naive.py index 0c330ea2d..988fe8220 100644 --- a/configs/_base_/models/setr_naive.py +++ b/configs/_base_/models/setr_naive.py @@ -1,8 +1,15 @@ # model settings backbone_norm_cfg = dict(type='LN', eps=1e-6, requires_grad=True) norm_cfg = dict(type='SyncBN', requires_grad=True) +preprocess_cfg = dict( + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + to_rgb=True, + pad_val=0, + seg_pad_val=255) model = dict( type='EncoderDecoder', + preprocess_cfg=preprocess_cfg, pretrained='pretrain/jx_vit_large_p16_384-b3be5167.pth', backbone=dict( type='VisionTransformer', diff --git a/configs/_base_/models/setr_pup.py b/configs/_base_/models/setr_pup.py index 8e5f23b9c..b77988dd1 100644 --- a/configs/_base_/models/setr_pup.py +++ b/configs/_base_/models/setr_pup.py @@ -1,8 +1,15 @@ # model settings backbone_norm_cfg = dict(type='LN', eps=1e-6, requires_grad=True) norm_cfg = dict(type='SyncBN', requires_grad=True) +preprocess_cfg = dict( + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + to_rgb=True, + pad_val=0, + seg_pad_val=255) model = dict( type='EncoderDecoder', + preprocess_cfg=preprocess_cfg, pretrained='pretrain/jx_vit_large_p16_384-b3be5167.pth', backbone=dict( type='VisionTransformer', diff --git a/configs/_base_/models/stdc.py b/configs/_base_/models/stdc.py index 341a4ec58..804d415b1 100644 --- a/configs/_base_/models/stdc.py +++ b/configs/_base_/models/stdc.py @@ -1,6 +1,13 @@ norm_cfg = dict(type='BN', requires_grad=True) +preprocess_cfg = dict( + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + to_rgb=True, + pad_val=0, + seg_pad_val=255) model = dict( type='EncoderDecoder', + preprocess_cfg=preprocess_cfg, pretrained=None, backbone=dict( type='STDCContextPathNet', diff --git a/configs/_base_/models/twins_pcpvt-s_fpn.py b/configs/_base_/models/twins_pcpvt-s_fpn.py index 0f4488a75..1a65e6536 100644 --- a/configs/_base_/models/twins_pcpvt-s_fpn.py +++ b/configs/_base_/models/twins_pcpvt-s_fpn.py @@ -3,8 +3,15 @@ checkpoint = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/twins/ # model settings backbone_norm_cfg = dict(type='LN') norm_cfg = dict(type='SyncBN', requires_grad=True) +preprocess_cfg = dict( + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + to_rgb=True, + pad_val=0, + seg_pad_val=255) model = dict( type='EncoderDecoder', + preprocess_cfg=preprocess_cfg, backbone=dict( type='PCPVT', init_cfg=dict(type='Pretrained', checkpoint=checkpoint), diff --git a/configs/_base_/models/twins_pcpvt-s_upernet.py b/configs/_base_/models/twins_pcpvt-s_upernet.py index 14a74b988..7da8663f1 100644 --- a/configs/_base_/models/twins_pcpvt-s_upernet.py +++ b/configs/_base_/models/twins_pcpvt-s_upernet.py @@ -3,8 +3,15 @@ checkpoint = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/twins/ # model settings backbone_norm_cfg = dict(type='LN') norm_cfg = dict(type='SyncBN', requires_grad=True) +preprocess_cfg = dict( + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + to_rgb=True, + pad_val=0, + seg_pad_val=255) model = dict( type='EncoderDecoder', + preprocess_cfg=preprocess_cfg, backbone=dict( type='PCPVT', init_cfg=dict(type='Pretrained', checkpoint=checkpoint), diff --git a/configs/_base_/models/upernet_beit.py b/configs/_base_/models/upernet_beit.py index 9c5bfa331..32454011d 100644 --- a/configs/_base_/models/upernet_beit.py +++ b/configs/_base_/models/upernet_beit.py @@ -1,6 +1,13 @@ norm_cfg = dict(type='SyncBN', requires_grad=True) +preprocess_cfg = dict( + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + to_rgb=True, + pad_val=0, + seg_pad_val=255) model = dict( type='EncoderDecoder', + preprocess_cfg=preprocess_cfg, pretrained=None, backbone=dict( type='BEiT', diff --git a/configs/_base_/models/upernet_convnext.py b/configs/_base_/models/upernet_convnext.py index 36b882f68..1ca05f059 100644 --- a/configs/_base_/models/upernet_convnext.py +++ b/configs/_base_/models/upernet_convnext.py @@ -1,8 +1,15 @@ norm_cfg = dict(type='SyncBN', requires_grad=True) custom_imports = dict(imports='mmcls.models', allow_failed_imports=False) checkpoint_file = 'https://download.openmmlab.com/mmclassification/v0/convnext/downstream/convnext-base_3rdparty_32xb128-noema_in1k_20220301-2a0ee547.pth' # noqa +preprocess_cfg = dict( + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + to_rgb=True, + pad_val=0, + seg_pad_val=255) model = dict( type='EncoderDecoder', + preprocess_cfg=preprocess_cfg, pretrained=None, backbone=dict( type='mmcls.ConvNeXt', diff --git a/configs/_base_/models/upernet_mae.py b/configs/_base_/models/upernet_mae.py index 1e0da7082..289e3764c 100644 --- a/configs/_base_/models/upernet_mae.py +++ b/configs/_base_/models/upernet_mae.py @@ -1,6 +1,13 @@ norm_cfg = dict(type='SyncBN', requires_grad=True) +preprocess_cfg = dict( + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + to_rgb=True, + pad_val=0, + seg_pad_val=255) model = dict( type='EncoderDecoder', + preprocess_cfg=preprocess_cfg, pretrained=None, backbone=dict( type='MAE', diff --git a/configs/_base_/models/upernet_r50.py b/configs/_base_/models/upernet_r50.py index 10974962f..c13e00781 100644 --- a/configs/_base_/models/upernet_r50.py +++ b/configs/_base_/models/upernet_r50.py @@ -1,7 +1,14 @@ # model settings norm_cfg = dict(type='SyncBN', requires_grad=True) +preprocess_cfg = dict( + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + to_rgb=True, + pad_val=0, + seg_pad_val=255) model = dict( type='EncoderDecoder', + preprocess_cfg=preprocess_cfg, pretrained='open-mmlab://resnet50_v1c', backbone=dict( type='ResNetV1c', diff --git a/configs/_base_/models/upernet_swin.py b/configs/_base_/models/upernet_swin.py index 71b51629e..45e720bef 100644 --- a/configs/_base_/models/upernet_swin.py +++ b/configs/_base_/models/upernet_swin.py @@ -1,8 +1,15 @@ # model settings norm_cfg = dict(type='SyncBN', requires_grad=True) backbone_norm_cfg = dict(type='LN', requires_grad=True) +preprocess_cfg = dict( + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + to_rgb=True, + pad_val=0, + seg_pad_val=255) model = dict( type='EncoderDecoder', + preprocess_cfg=preprocess_cfg, pretrained=None, backbone=dict( type='SwinTransformer', diff --git a/configs/_base_/models/upernet_vit-b16_ln_mln.py b/configs/_base_/models/upernet_vit-b16_ln_mln.py index cd6587dfe..9645ecb5d 100644 --- a/configs/_base_/models/upernet_vit-b16_ln_mln.py +++ b/configs/_base_/models/upernet_vit-b16_ln_mln.py @@ -1,7 +1,14 @@ # model settings norm_cfg = dict(type='SyncBN', requires_grad=True) +preprocess_cfg = dict( + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + to_rgb=True, + pad_val=0, + seg_pad_val=255) model = dict( type='EncoderDecoder', + preprocess_cfg=preprocess_cfg, pretrained='pretrain/jx_vit_base_p16_224-80ecf9dd.pth', backbone=dict( type='VisionTransformer', diff --git a/configs/ann/ann_r50-d8_512x1024_40k_cityscapes.py b/configs/ann/ann_r50-d8_512x1024_40k_cityscapes.py index 00b2594ba..6e76b697b 100644 --- a/configs/ann/ann_r50-d8_512x1024_40k_cityscapes.py +++ b/configs/ann/ann_r50-d8_512x1024_40k_cityscapes.py @@ -2,3 +2,6 @@ _base_ = [ '../_base_/models/ann_r50-d8.py', '../_base_/datasets/cityscapes.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' ] +crop_size = (512, 1024) +preprocess_cfg = dict(size=crop_size) +model = dict(preprocess_cfg=preprocess_cfg) diff --git a/configs/ann/ann_r50-d8_512x1024_80k_cityscapes.py b/configs/ann/ann_r50-d8_512x1024_80k_cityscapes.py index ef7b369dd..848274b16 100644 --- a/configs/ann/ann_r50-d8_512x1024_80k_cityscapes.py +++ b/configs/ann/ann_r50-d8_512x1024_80k_cityscapes.py @@ -2,3 +2,6 @@ _base_ = [ '../_base_/models/ann_r50-d8.py', '../_base_/datasets/cityscapes.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' ] +crop_size = (512, 1024) +preprocess_cfg = dict(size=crop_size) +model = dict(preprocess_cfg=preprocess_cfg) diff --git a/configs/ann/ann_r50-d8_512x512_160k_ade20k.py b/configs/ann/ann_r50-d8_512x512_160k_ade20k.py index ca6bb248a..b46375c24 100644 --- a/configs/ann/ann_r50-d8_512x512_160k_ade20k.py +++ b/configs/ann/ann_r50-d8_512x512_160k_ade20k.py @@ -2,5 +2,9 @@ _base_ = [ '../_base_/models/ann_r50-d8.py', '../_base_/datasets/ade20k.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' ] +crop_size = (512, 512) +preprocess_cfg = dict(size=crop_size) model = dict( - decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150)) + preprocess_cfg=preprocess_cfg, + decode_head=dict(num_classes=150), + auxiliary_head=dict(num_classes=150)) diff --git a/configs/ann/ann_r50-d8_512x512_20k_voc12aug.py b/configs/ann/ann_r50-d8_512x512_20k_voc12aug.py index 071f19026..552efea5e 100644 --- a/configs/ann/ann_r50-d8_512x512_20k_voc12aug.py +++ b/configs/ann/ann_r50-d8_512x512_20k_voc12aug.py @@ -2,5 +2,9 @@ _base_ = [ '../_base_/models/ann_r50-d8.py', '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_20k.py' ] +crop_size = (512, 512) +preprocess_cfg = dict(size=crop_size) model = dict( - decode_head=dict(num_classes=21), auxiliary_head=dict(num_classes=21)) + preprocess_cfg=preprocess_cfg, + decode_head=dict(num_classes=21), + auxiliary_head=dict(num_classes=21)) diff --git a/configs/ann/ann_r50-d8_512x512_40k_voc12aug.py b/configs/ann/ann_r50-d8_512x512_40k_voc12aug.py index 82a1c9386..102fc1055 100644 --- a/configs/ann/ann_r50-d8_512x512_40k_voc12aug.py +++ b/configs/ann/ann_r50-d8_512x512_40k_voc12aug.py @@ -2,5 +2,9 @@ _base_ = [ '../_base_/models/ann_r50-d8.py', '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' ] +crop_size = (512, 512) +preprocess_cfg = dict(size=crop_size) model = dict( - decode_head=dict(num_classes=21), auxiliary_head=dict(num_classes=21)) + preprocess_cfg=preprocess_cfg, + decode_head=dict(num_classes=21), + auxiliary_head=dict(num_classes=21)) diff --git a/configs/ann/ann_r50-d8_512x512_80k_ade20k.py b/configs/ann/ann_r50-d8_512x512_80k_ade20k.py index 5e04aa7c6..6cdaaecee 100644 --- a/configs/ann/ann_r50-d8_512x512_80k_ade20k.py +++ b/configs/ann/ann_r50-d8_512x512_80k_ade20k.py @@ -2,5 +2,9 @@ _base_ = [ '../_base_/models/ann_r50-d8.py', '../_base_/datasets/ade20k.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' ] +crop_size = (512, 512) +preprocess_cfg = dict(size=crop_size) model = dict( - decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150)) + preprocess_cfg=preprocess_cfg, + decode_head=dict(num_classes=150), + auxiliary_head=dict(num_classes=150)) diff --git a/configs/ann/ann_r50-d8_769x769_40k_cityscapes.py b/configs/ann/ann_r50-d8_769x769_40k_cityscapes.py index 4912bdb9f..9dd11d95e 100644 --- a/configs/ann/ann_r50-d8_769x769_40k_cityscapes.py +++ b/configs/ann/ann_r50-d8_769x769_40k_cityscapes.py @@ -3,7 +3,10 @@ _base_ = [ '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' ] +crop_size = (769, 769) +preprocess_cfg = dict(size=crop_size) model = dict( + preprocess_cfg=preprocess_cfg, decode_head=dict(align_corners=True), auxiliary_head=dict(align_corners=True), test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513))) diff --git a/configs/ann/ann_r50-d8_769x769_80k_cityscapes.py b/configs/ann/ann_r50-d8_769x769_80k_cityscapes.py index d1cc072b1..15db62bf2 100644 --- a/configs/ann/ann_r50-d8_769x769_80k_cityscapes.py +++ b/configs/ann/ann_r50-d8_769x769_80k_cityscapes.py @@ -3,7 +3,10 @@ _base_ = [ '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' ] +crop_size = (769, 769) +preprocess_cfg = dict(size=crop_size) model = dict( + preprocess_cfg=preprocess_cfg, decode_head=dict(align_corners=True), auxiliary_head=dict(align_corners=True), test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513))) diff --git a/configs/apcnet/apcnet_r50-d8_512x1024_40k_cityscapes.py b/configs/apcnet/apcnet_r50-d8_512x1024_40k_cityscapes.py index 99c61a942..fe5b0cd4d 100644 --- a/configs/apcnet/apcnet_r50-d8_512x1024_40k_cityscapes.py +++ b/configs/apcnet/apcnet_r50-d8_512x1024_40k_cityscapes.py @@ -2,3 +2,6 @@ _base_ = [ '../_base_/models/apcnet_r50-d8.py', '../_base_/datasets/cityscapes.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' ] +crop_size = (512, 1024) +preprocess_cfg = dict(size=crop_size) +model = dict(preprocess_cfg=preprocess_cfg) diff --git a/configs/apcnet/apcnet_r50-d8_512x1024_80k_cityscapes.py b/configs/apcnet/apcnet_r50-d8_512x1024_80k_cityscapes.py index 62a0627ae..9a7bc3136 100644 --- a/configs/apcnet/apcnet_r50-d8_512x1024_80k_cityscapes.py +++ b/configs/apcnet/apcnet_r50-d8_512x1024_80k_cityscapes.py @@ -2,3 +2,6 @@ _base_ = [ '../_base_/models/apcnet_r50-d8.py', '../_base_/datasets/cityscapes.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' ] +crop_size = (512, 1024) +preprocess_cfg = dict(size=crop_size) +model = dict(preprocess_cfg=preprocess_cfg) diff --git a/configs/apcnet/apcnet_r50-d8_512x512_160k_ade20k.py b/configs/apcnet/apcnet_r50-d8_512x512_160k_ade20k.py index f7821c559..8e93f0257 100644 --- a/configs/apcnet/apcnet_r50-d8_512x512_160k_ade20k.py +++ b/configs/apcnet/apcnet_r50-d8_512x512_160k_ade20k.py @@ -2,5 +2,9 @@ _base_ = [ '../_base_/models/apcnet_r50-d8.py', '../_base_/datasets/ade20k.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' ] +crop_size = (512, 512) +preprocess_cfg = dict(size=crop_size) model = dict( - decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150)) + preprocess_cfg=preprocess_cfg, + decode_head=dict(num_classes=150), + auxiliary_head=dict(num_classes=150)) diff --git a/configs/apcnet/apcnet_r50-d8_512x512_80k_ade20k.py b/configs/apcnet/apcnet_r50-d8_512x512_80k_ade20k.py index daafa5fbc..25a5a3236 100644 --- a/configs/apcnet/apcnet_r50-d8_512x512_80k_ade20k.py +++ b/configs/apcnet/apcnet_r50-d8_512x512_80k_ade20k.py @@ -2,5 +2,9 @@ _base_ = [ '../_base_/models/apcnet_r50-d8.py', '../_base_/datasets/ade20k.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' ] +crop_size = (512, 512) +preprocess_cfg = dict(size=crop_size) model = dict( - decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150)) + preprocess_cfg=preprocess_cfg, + decode_head=dict(num_classes=150), + auxiliary_head=dict(num_classes=150)) diff --git a/configs/apcnet/apcnet_r50-d8_769x769_40k_cityscapes.py b/configs/apcnet/apcnet_r50-d8_769x769_40k_cityscapes.py index 3db6140cb..9a1b2038b 100644 --- a/configs/apcnet/apcnet_r50-d8_769x769_40k_cityscapes.py +++ b/configs/apcnet/apcnet_r50-d8_769x769_40k_cityscapes.py @@ -3,7 +3,10 @@ _base_ = [ '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' ] +crop_size = (769, 769) +preprocess_cfg = dict(size=crop_size) model = dict( + preprocess_cfg=preprocess_cfg, decode_head=dict(align_corners=True), auxiliary_head=dict(align_corners=True), test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513))) diff --git a/configs/apcnet/apcnet_r50-d8_769x769_80k_cityscapes.py b/configs/apcnet/apcnet_r50-d8_769x769_80k_cityscapes.py index 9cac4254f..1429fee5d 100644 --- a/configs/apcnet/apcnet_r50-d8_769x769_80k_cityscapes.py +++ b/configs/apcnet/apcnet_r50-d8_769x769_80k_cityscapes.py @@ -3,7 +3,10 @@ _base_ = [ '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' ] +crop_size = (769, 769) +preprocess_cfg = dict(size=crop_size) model = dict( + preprocess_cfg=preprocess_cfg, decode_head=dict(align_corners=True), auxiliary_head=dict(align_corners=True), test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513))) diff --git a/configs/beit/upernet_beit-base_640x640_160k_ade20k_ms.py b/configs/beit/upernet_beit-base_640x640_160k_ade20k_ms.py index 7383af589..9acbe9dc2 100644 --- a/configs/beit/upernet_beit-base_640x640_160k_ade20k_ms.py +++ b/configs/beit/upernet_beit-base_640x640_160k_ade20k_ms.py @@ -1,8 +1,5 @@ _base_ = './upernet_beit-base_8x2_640x640_160k_ade20k.py' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) - test_pipeline = [ dict(type='LoadImageFromFile'), # TODO: Refactor 'MultiScaleFlipAug' which supports diff --git a/configs/beit/upernet_beit-base_8x2_640x640_160k_ade20k.py b/configs/beit/upernet_beit-base_8x2_640x640_160k_ade20k.py index 1ff26c1ae..5a26b04b8 100644 --- a/configs/beit/upernet_beit-base_8x2_640x640_160k_ade20k.py +++ b/configs/beit/upernet_beit-base_8x2_640x640_160k_ade20k.py @@ -2,8 +2,10 @@ _base_ = [ '../_base_/models/upernet_beit.py', '../_base_/datasets/ade20k_640x640.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' ] - +crop_size = (640, 640) +preprocess_cfg = dict(size=crop_size) model = dict( + preprocess_cfg=preprocess_cfg, pretrained='pretrain/beit_base_patch16_224_pt22k_ft22k.pth', test_cfg=dict(mode='slide', crop_size=(640, 640), stride=(426, 426))) diff --git a/configs/beit/upernet_beit-large_fp16_640x640_160k_ade20k_ms.py b/configs/beit/upernet_beit-large_fp16_640x640_160k_ade20k_ms.py index 9a2e95909..f33d534ce 100644 --- a/configs/beit/upernet_beit-large_fp16_640x640_160k_ade20k_ms.py +++ b/configs/beit/upernet_beit-large_fp16_640x640_160k_ade20k_ms.py @@ -1,8 +1,5 @@ _base_ = './upernet_beit-large_fp16_8x1_640x640_160k_ade20k.py' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) - test_pipeline = [ dict(type='LoadImageFromFile'), # TODO: Refactor 'MultiScaleFlipAug' which supports diff --git a/configs/beit/upernet_beit-large_fp16_8x1_640x640_160k_ade20k.py b/configs/beit/upernet_beit-large_fp16_8x1_640x640_160k_ade20k.py index d3c0a15b6..ca571ee8d 100644 --- a/configs/beit/upernet_beit-large_fp16_8x1_640x640_160k_ade20k.py +++ b/configs/beit/upernet_beit-large_fp16_8x1_640x640_160k_ade20k.py @@ -2,8 +2,10 @@ _base_ = [ '../_base_/models/upernet_beit.py', '../_base_/datasets/ade20k_640x640.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_320k.py' ] - +crop_size = (640, 640) +preprocess_cfg = dict(size=crop_size) model = dict( + preprocess_cfg=preprocess_cfg, pretrained='pretrain/beit_large_patch16_224_pt22k_ft22k.pth', backbone=dict( type='BEiT', diff --git a/configs/bisenetv1/bisenetv1_r101-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k.py b/configs/bisenetv1/bisenetv1_r101-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k.py index 3b53dc6a8..7152c460d 100644 --- a/configs/bisenetv1/bisenetv1_r101-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k.py +++ b/configs/bisenetv1/bisenetv1_r101-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k.py @@ -3,7 +3,10 @@ _base_ = [ '../_base_/datasets/coco-stuff164k.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' ] +crop_size = (512, 512) +preprocess_cfg = dict(size=crop_size) model = dict( + preprocess_cfg=preprocess_cfg, backbone=dict( context_channels=(512, 1024, 2048), spatial_channels=(256, 256, 256, 512), diff --git a/configs/bisenetv1/bisenetv1_r18-d32_4x4_1024x1024_160k_cityscapes.py b/configs/bisenetv1/bisenetv1_r18-d32_4x4_1024x1024_160k_cityscapes.py index 4dbd2eb87..4db959e36 100644 --- a/configs/bisenetv1/bisenetv1_r18-d32_4x4_1024x1024_160k_cityscapes.py +++ b/configs/bisenetv1/bisenetv1_r18-d32_4x4_1024x1024_160k_cityscapes.py @@ -3,6 +3,9 @@ _base_ = [ '../_base_/datasets/cityscapes_1024x1024.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' ] +crop_size = (1024, 1024) +preprocess_cfg = dict(size=crop_size) +model = dict(preprocess_cfg=preprocess_cfg) param_scheduler = [ dict(type='LinearLR', by_epoch=False, start_factor=0.1, begin=0, end=1000), dict( diff --git a/configs/bisenetv1/bisenetv1_r18-d32_in1k-pre_4x4_1024x1024_160k_cityscapes.py b/configs/bisenetv1/bisenetv1_r18-d32_in1k-pre_4x4_1024x1024_160k_cityscapes.py index ba92ba162..4a8ae7c35 100644 --- a/configs/bisenetv1/bisenetv1_r18-d32_in1k-pre_4x4_1024x1024_160k_cityscapes.py +++ b/configs/bisenetv1/bisenetv1_r18-d32_in1k-pre_4x4_1024x1024_160k_cityscapes.py @@ -3,7 +3,10 @@ _base_ = [ '../_base_/datasets/cityscapes_1024x1024.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' ] +crop_size = (1024, 1024) +preprocess_cfg = dict(size=crop_size) model = dict( + preprocess_cfg=preprocess_cfg, backbone=dict( backbone_cfg=dict( init_cfg=dict( diff --git a/configs/bisenetv1/bisenetv1_r18-d32_in1k-pre_lr5e-3_4x4_512x512_160k_coco-stuff164k.py b/configs/bisenetv1/bisenetv1_r18-d32_in1k-pre_lr5e-3_4x4_512x512_160k_coco-stuff164k.py index c6d93049e..4e073da41 100644 --- a/configs/bisenetv1/bisenetv1_r18-d32_in1k-pre_lr5e-3_4x4_512x512_160k_coco-stuff164k.py +++ b/configs/bisenetv1/bisenetv1_r18-d32_in1k-pre_lr5e-3_4x4_512x512_160k_coco-stuff164k.py @@ -1,6 +1,10 @@ _base_ = './bisenetv1_r18-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k.py' +crop_size = (512, 512) +preprocess_cfg = dict(size=crop_size) model = dict( + preprocess_cfg=preprocess_cfg, backbone=dict( backbone_cfg=dict( init_cfg=dict( - type='Pretrained', checkpoint='open-mmlab://resnet18_v1c'))), ) + type='Pretrained', checkpoint='open-mmlab://resnet18_v1c'))), +) diff --git a/configs/bisenetv1/bisenetv1_r18-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k.py b/configs/bisenetv1/bisenetv1_r18-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k.py index 44c75225c..2c408a789 100644 --- a/configs/bisenetv1/bisenetv1_r18-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k.py +++ b/configs/bisenetv1/bisenetv1_r18-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k.py @@ -3,7 +3,10 @@ _base_ = [ '../_base_/datasets/coco-stuff164k.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' ] +crop_size = (512, 512) +preprocess_cfg = dict(size=crop_size) model = dict( + preprocess_cfg=preprocess_cfg, decode_head=dict(num_classes=171), auxiliary_head=[ dict(num_classes=171), diff --git a/configs/bisenetv1/bisenetv1_r50-d32_4x4_1024x1024_160k_cityscapes.py b/configs/bisenetv1/bisenetv1_r50-d32_4x4_1024x1024_160k_cityscapes.py index 26e05303a..1e4af495f 100644 --- a/configs/bisenetv1/bisenetv1_r50-d32_4x4_1024x1024_160k_cityscapes.py +++ b/configs/bisenetv1/bisenetv1_r50-d32_4x4_1024x1024_160k_cityscapes.py @@ -4,8 +4,11 @@ _base_ = [ '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' ] norm_cfg = dict(type='SyncBN', requires_grad=True) +crop_size = (1024, 1024) +preprocess_cfg = dict(size=crop_size) model = dict( type='EncoderDecoder', + preprocess_cfg=preprocess_cfg, backbone=dict( type='BiSeNetV1', context_channels=(512, 1024, 2048), diff --git a/configs/bisenetv1/bisenetv1_r50-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k.py b/configs/bisenetv1/bisenetv1_r50-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k.py index 93fbbbead..92c3e04c7 100644 --- a/configs/bisenetv1/bisenetv1_r50-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k.py +++ b/configs/bisenetv1/bisenetv1_r50-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k.py @@ -3,7 +3,10 @@ _base_ = [ '../_base_/datasets/coco-stuff164k.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' ] +crop_size = (512, 512) +preprocess_cfg = dict(size=crop_size) model = dict( + preprocess_cfg=preprocess_cfg, backbone=dict( context_channels=(512, 1024, 2048), spatial_channels=(256, 256, 256, 512), diff --git a/configs/bisenetv2/bisenetv2_fcn_4x4_1024x1024_160k_cityscapes.py b/configs/bisenetv2/bisenetv2_fcn_4x4_1024x1024_160k_cityscapes.py index b2988f744..9460f7049 100644 --- a/configs/bisenetv2/bisenetv2_fcn_4x4_1024x1024_160k_cityscapes.py +++ b/configs/bisenetv2/bisenetv2_fcn_4x4_1024x1024_160k_cityscapes.py @@ -3,6 +3,9 @@ _base_ = [ '../_base_/datasets/cityscapes_1024x1024.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' ] +crop_size = (1024, 1024) +preprocess_cfg = dict(size=crop_size) +model = dict(preprocess_cfg=preprocess_cfg) param_scheduler = [ dict(type='LinearLR', by_epoch=False, start_factor=0.1, begin=0, end=1000), dict( diff --git a/configs/bisenetv2/bisenetv2_fcn_4x8_1024x1024_160k_cityscapes.py b/configs/bisenetv2/bisenetv2_fcn_4x8_1024x1024_160k_cityscapes.py index 92d9914a8..57adf5101 100644 --- a/configs/bisenetv2/bisenetv2_fcn_4x8_1024x1024_160k_cityscapes.py +++ b/configs/bisenetv2/bisenetv2_fcn_4x8_1024x1024_160k_cityscapes.py @@ -3,6 +3,9 @@ _base_ = [ '../_base_/datasets/cityscapes_1024x1024.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' ] +crop_size = (1024, 1024) +preprocess_cfg = dict(size=crop_size) +model = dict(preprocess_cfg=preprocess_cfg) param_scheduler = [ dict(type='LinearLR', by_epoch=False, start_factor=0.1, begin=0, end=1000), dict( diff --git a/configs/bisenetv2/bisenetv2_fcn_ohem_4x4_1024x1024_160k_cityscapes.py b/configs/bisenetv2/bisenetv2_fcn_ohem_4x4_1024x1024_160k_cityscapes.py index 123e030a1..4d9395a41 100644 --- a/configs/bisenetv2/bisenetv2_fcn_ohem_4x4_1024x1024_160k_cityscapes.py +++ b/configs/bisenetv2/bisenetv2_fcn_ohem_4x4_1024x1024_160k_cityscapes.py @@ -3,8 +3,11 @@ _base_ = [ '../_base_/datasets/cityscapes_1024x1024.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' ] +crop_size = (1024, 1024) +preprocess_cfg = dict(size=crop_size) norm_cfg = dict(type='SyncBN', requires_grad=True) models = dict( + preprocess_cfg=preprocess_cfg, decode_head=dict( sampler=dict(type='OHEMPixelSampler', thresh=0.7, min_kept=10000)), auxiliary_head=[ diff --git a/configs/ccnet/ccnet_r50-d8_512x1024_40k_cityscapes.py b/configs/ccnet/ccnet_r50-d8_512x1024_40k_cityscapes.py index 6a4316dde..6fa71d1d3 100644 --- a/configs/ccnet/ccnet_r50-d8_512x1024_40k_cityscapes.py +++ b/configs/ccnet/ccnet_r50-d8_512x1024_40k_cityscapes.py @@ -2,3 +2,6 @@ _base_ = [ '../_base_/models/ccnet_r50-d8.py', '../_base_/datasets/cityscapes.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' ] +crop_size = (512, 1024) +preprocess_cfg = dict(size=crop_size) +model = dict(preprocess_cfg=preprocess_cfg) diff --git a/configs/ccnet/ccnet_r50-d8_512x1024_80k_cityscapes.py b/configs/ccnet/ccnet_r50-d8_512x1024_80k_cityscapes.py index 16e34356e..12c48a865 100644 --- a/configs/ccnet/ccnet_r50-d8_512x1024_80k_cityscapes.py +++ b/configs/ccnet/ccnet_r50-d8_512x1024_80k_cityscapes.py @@ -2,3 +2,6 @@ _base_ = [ '../_base_/models/ccnet_r50-d8.py', '../_base_/datasets/cityscapes.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' ] +crop_size = (512, 1024) +preprocess_cfg = dict(size=crop_size) +model = dict(preprocess_cfg=preprocess_cfg) diff --git a/configs/ccnet/ccnet_r50-d8_512x512_160k_ade20k.py b/configs/ccnet/ccnet_r50-d8_512x512_160k_ade20k.py index 1ad94d898..d53b4832e 100644 --- a/configs/ccnet/ccnet_r50-d8_512x512_160k_ade20k.py +++ b/configs/ccnet/ccnet_r50-d8_512x512_160k_ade20k.py @@ -2,5 +2,9 @@ _base_ = [ '../_base_/models/ccnet_r50-d8.py', '../_base_/datasets/ade20k.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' ] +crop_size = (512, 512) +preprocess_cfg = dict(size=crop_size) model = dict( - decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150)) + preprocess_cfg=preprocess_cfg, + decode_head=dict(num_classes=150), + auxiliary_head=dict(num_classes=150)) diff --git a/configs/ccnet/ccnet_r50-d8_512x512_20k_voc12aug.py b/configs/ccnet/ccnet_r50-d8_512x512_20k_voc12aug.py index bbcd29cce..0b2fd15af 100644 --- a/configs/ccnet/ccnet_r50-d8_512x512_20k_voc12aug.py +++ b/configs/ccnet/ccnet_r50-d8_512x512_20k_voc12aug.py @@ -3,5 +3,9 @@ _base_ = [ '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_20k.py' ] +crop_size = (512, 512) +preprocess_cfg = dict(size=crop_size) model = dict( - decode_head=dict(num_classes=21), auxiliary_head=dict(num_classes=21)) + preprocess_cfg=preprocess_cfg, + decode_head=dict(num_classes=21), + auxiliary_head=dict(num_classes=21)) diff --git a/configs/ccnet/ccnet_r50-d8_512x512_40k_voc12aug.py b/configs/ccnet/ccnet_r50-d8_512x512_40k_voc12aug.py index 947b8ac8c..534a23fb3 100644 --- a/configs/ccnet/ccnet_r50-d8_512x512_40k_voc12aug.py +++ b/configs/ccnet/ccnet_r50-d8_512x512_40k_voc12aug.py @@ -3,5 +3,9 @@ _base_ = [ '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' ] +crop_size = (512, 512) +preprocess_cfg = dict(size=crop_size) model = dict( - decode_head=dict(num_classes=21), auxiliary_head=dict(num_classes=21)) + preprocess_cfg=preprocess_cfg, + decode_head=dict(num_classes=21), + auxiliary_head=dict(num_classes=21)) diff --git a/configs/ccnet/ccnet_r50-d8_512x512_80k_ade20k.py b/configs/ccnet/ccnet_r50-d8_512x512_80k_ade20k.py index 1a1f49cf6..fad9dfed0 100644 --- a/configs/ccnet/ccnet_r50-d8_512x512_80k_ade20k.py +++ b/configs/ccnet/ccnet_r50-d8_512x512_80k_ade20k.py @@ -2,5 +2,9 @@ _base_ = [ '../_base_/models/ccnet_r50-d8.py', '../_base_/datasets/ade20k.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' ] +crop_size = (512, 512) +preprocess_cfg = dict(size=crop_size) model = dict( - decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150)) + preprocess_cfg=preprocess_cfg, + decode_head=dict(num_classes=150), + auxiliary_head=dict(num_classes=150)) diff --git a/configs/ccnet/ccnet_r50-d8_769x769_40k_cityscapes.py b/configs/ccnet/ccnet_r50-d8_769x769_40k_cityscapes.py index 580d59ca6..2f3927956 100644 --- a/configs/ccnet/ccnet_r50-d8_769x769_40k_cityscapes.py +++ b/configs/ccnet/ccnet_r50-d8_769x769_40k_cityscapes.py @@ -3,7 +3,10 @@ _base_ = [ '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' ] +crop_size = (769, 769) +preprocess_cfg = dict(size=crop_size) model = dict( + preprocess_cfg=preprocess_cfg, decode_head=dict(align_corners=True), auxiliary_head=dict(align_corners=True), test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513))) diff --git a/configs/ccnet/ccnet_r50-d8_769x769_80k_cityscapes.py b/configs/ccnet/ccnet_r50-d8_769x769_80k_cityscapes.py index c6dac6437..5beace965 100644 --- a/configs/ccnet/ccnet_r50-d8_769x769_80k_cityscapes.py +++ b/configs/ccnet/ccnet_r50-d8_769x769_80k_cityscapes.py @@ -3,7 +3,10 @@ _base_ = [ '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' ] +crop_size = (769, 769) +preprocess_cfg = dict(size=crop_size) model = dict( + preprocess_cfg=preprocess_cfg, decode_head=dict(align_corners=True), auxiliary_head=dict(align_corners=True), test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513))) diff --git a/configs/cgnet/cgnet_512x1024_60k_cityscapes.py b/configs/cgnet/cgnet_512x1024_60k_cityscapes.py index b838b813f..fdb781d38 100644 --- a/configs/cgnet/cgnet_512x1024_60k_cityscapes.py +++ b/configs/cgnet/cgnet_512x1024_60k_cityscapes.py @@ -1,4 +1,7 @@ -_base_ = ['../_base_/models/cgnet.py', '../_base_/default_runtime.py'] +_base_ = [ + '../_base_/models/cgnet.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py' +] # optimizer optimizer = dict(type='Adam', lr=0.001, eps=1e-08, weight_decay=0.0005) @@ -21,39 +24,10 @@ val_cfg = dict(type='ValLoop') test_cfg = dict(type='TestLoop') default_hooks = dict(checkpoint=dict(by_epoch=False, interval=4000)) -# dataset settings -dataset_type = 'CityscapesDataset' -data_root = 'data/cityscapes/' -img_norm_cfg = dict( - mean=[72.39239876, 82.90891754, 73.15835921], std=[1, 1, 1], to_rgb=True) crop_size = (512, 1024) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadAnnotations'), - dict(type='RandomResize', scale=(2048, 1024), ratio_range=(0.5, 2.0)), - dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), - dict(type='RandomFlip', prob=0.5), - dict(type='PhotoMetricDistortion'), - dict(type='Pad', size=crop_size), - dict(type='PackSegInputs') -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='Resize', scale=(2048, 1024), keep_ratio=True), - dict(type='PackSegInputs') -] -train_dataloader = dict( - batch_size=8, - num_workers=4, - dataset=dict( - data_prefix=dict( - img_path='leftImg8bit/train', seg_map_path='gtFine/train'), - pipeline=train_pipeline)) -val_dataloader = dict( - batch_size=8, - num_workers=4, - dataset=dict( - data_prefix=dict( - img_path='leftImg8bit/val', seg_map_path='gtFine/val'), - pipeline=test_pipeline)) +preprocess_cfg = dict(size=crop_size) +model = dict(preprocess_cfg=preprocess_cfg) + +train_dataloader = dict(batch_size=8) +val_dataloader = dict(batch_size=8) test_dataloader = val_dataloader diff --git a/configs/cgnet/cgnet_680x680_60k_cityscapes.py b/configs/cgnet/cgnet_680x680_60k_cityscapes.py index 24e873e52..e2aa3598b 100644 --- a/configs/cgnet/cgnet_680x680_60k_cityscapes.py +++ b/configs/cgnet/cgnet_680x680_60k_cityscapes.py @@ -24,16 +24,15 @@ val_cfg = dict(type='ValLoop') test_cfg = dict(type='TestLoop') default_hooks = dict(checkpoint=dict(by_epoch=False, interval=4000)) -img_norm_cfg = dict( - mean=[72.39239876, 82.90891754, 73.15835921], std=[1, 1, 1], to_rgb=True) crop_size = (680, 680) +preprocess_cfg = dict(size=crop_size) +model = dict(preprocess_cfg=preprocess_cfg) train_pipeline = [ dict(type='LoadImageFromFile'), dict(type='LoadAnnotations'), dict(type='RandomResize', scale=(2048, 1024), ratio_range=(0.5, 2.0)), dict(type='RandomCrop', crop_size=crop_size), dict(type='RandomFlip', prob=0.5), - dict(type='Pad', size=crop_size), dict(type='PackSegInputs') ] test_pipeline = [ diff --git a/configs/convnext/upernet_convnext_base_fp16_512x512_160k_ade20k.py b/configs/convnext/upernet_convnext_base_fp16_512x512_160k_ade20k.py index 6a0963e14..1c9e48d82 100644 --- a/configs/convnext/upernet_convnext_base_fp16_512x512_160k_ade20k.py +++ b/configs/convnext/upernet_convnext_base_fp16_512x512_160k_ade20k.py @@ -3,7 +3,9 @@ _base_ = [ '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' ] crop_size = (512, 512) +preprocess_cfg = dict(size=crop_size) model = dict( + preprocess_cfg=preprocess_cfg, decode_head=dict(in_channels=[128, 256, 512, 1024], num_classes=150), auxiliary_head=dict(in_channels=512, num_classes=150), test_cfg=dict(mode='slide', crop_size=crop_size, stride=(341, 341)), diff --git a/configs/convnext/upernet_convnext_base_fp16_640x640_160k_ade20k.py b/configs/convnext/upernet_convnext_base_fp16_640x640_160k_ade20k.py index f0abb7231..e625fd367 100644 --- a/configs/convnext/upernet_convnext_base_fp16_640x640_160k_ade20k.py +++ b/configs/convnext/upernet_convnext_base_fp16_640x640_160k_ade20k.py @@ -4,8 +4,10 @@ _base_ = [ '../_base_/schedules/schedule_160k.py' ] crop_size = (640, 640) +preprocess_cfg = dict(size=crop_size) checkpoint_file = 'https://download.openmmlab.com/mmclassification/v0/convnext/downstream/convnext-base_3rdparty_in21k_20220301-262fd037.pth' # noqa model = dict( + preprocess_cfg=preprocess_cfg, backbone=dict( type='mmcls.ConvNeXt', arch='base', diff --git a/configs/convnext/upernet_convnext_large_fp16_640x640_160k_ade20k.py b/configs/convnext/upernet_convnext_large_fp16_640x640_160k_ade20k.py index 656cb1281..ba729e840 100644 --- a/configs/convnext/upernet_convnext_large_fp16_640x640_160k_ade20k.py +++ b/configs/convnext/upernet_convnext_large_fp16_640x640_160k_ade20k.py @@ -4,8 +4,10 @@ _base_ = [ '../_base_/schedules/schedule_160k.py' ] crop_size = (640, 640) +preprocess_cfg = dict(size=crop_size) checkpoint_file = 'https://download.openmmlab.com/mmclassification/v0/convnext/downstream/convnext-large_3rdparty_in21k_20220301-e6e0ea0a.pth' # noqa model = dict( + preprocess_cfg=preprocess_cfg, backbone=dict( type='mmcls.ConvNeXt', arch='large', diff --git a/configs/convnext/upernet_convnext_small_fp16_512x512_160k_ade20k.py b/configs/convnext/upernet_convnext_small_fp16_512x512_160k_ade20k.py index b06e819a6..555ed74a4 100644 --- a/configs/convnext/upernet_convnext_small_fp16_512x512_160k_ade20k.py +++ b/configs/convnext/upernet_convnext_small_fp16_512x512_160k_ade20k.py @@ -3,8 +3,10 @@ _base_ = [ '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' ] crop_size = (512, 512) +preprocess_cfg = dict(size=crop_size) checkpoint_file = 'https://download.openmmlab.com/mmclassification/v0/convnext/downstream/convnext-small_3rdparty_32xb128-noema_in1k_20220301-303e75e3.pth' # noqa model = dict( + preprocess_cfg=preprocess_cfg, backbone=dict( type='mmcls.ConvNeXt', arch='small', diff --git a/configs/convnext/upernet_convnext_tiny_fp16_512x512_160k_ade20k.py b/configs/convnext/upernet_convnext_tiny_fp16_512x512_160k_ade20k.py index 797026779..24054443a 100644 --- a/configs/convnext/upernet_convnext_tiny_fp16_512x512_160k_ade20k.py +++ b/configs/convnext/upernet_convnext_tiny_fp16_512x512_160k_ade20k.py @@ -3,8 +3,10 @@ _base_ = [ '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' ] crop_size = (512, 512) +preprocess_cfg = dict(size=crop_size) checkpoint_file = 'https://download.openmmlab.com/mmclassification/v0/convnext/downstream/convnext-tiny_3rdparty_32xb128-noema_in1k_20220301-795e9634.pth' # noqa model = dict( + preprocess_cfg=preprocess_cfg, backbone=dict( type='mmcls.ConvNeXt', arch='tiny', diff --git a/configs/convnext/upernet_convnext_xlarge_fp16_640x640_160k_ade20k.py b/configs/convnext/upernet_convnext_xlarge_fp16_640x640_160k_ade20k.py index 9e125fde9..ac11a956f 100644 --- a/configs/convnext/upernet_convnext_xlarge_fp16_640x640_160k_ade20k.py +++ b/configs/convnext/upernet_convnext_xlarge_fp16_640x640_160k_ade20k.py @@ -4,8 +4,10 @@ _base_ = [ '../_base_/schedules/schedule_160k.py' ] crop_size = (640, 640) +preprocess_cfg = dict(size=crop_size) checkpoint_file = 'https://download.openmmlab.com/mmclassification/v0/convnext/downstream/convnext-xlarge_3rdparty_in21k_20220301-08aa5ddc.pth' # noqa model = dict( + preprocess_cfg=preprocess_cfg, backbone=dict( type='mmcls.ConvNeXt', arch='xlarge', diff --git a/configs/danet/danet_r50-d8_512x1024_40k_cityscapes.py b/configs/danet/danet_r50-d8_512x1024_40k_cityscapes.py index 1b70c5b8d..1a0624685 100644 --- a/configs/danet/danet_r50-d8_512x1024_40k_cityscapes.py +++ b/configs/danet/danet_r50-d8_512x1024_40k_cityscapes.py @@ -2,3 +2,6 @@ _base_ = [ '../_base_/models/danet_r50-d8.py', '../_base_/datasets/cityscapes.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' ] +crop_size = (512, 1024) +preprocess_cfg = dict(size=crop_size) +model = dict(preprocess_cfg=preprocess_cfg) diff --git a/configs/danet/danet_r50-d8_512x1024_80k_cityscapes.py b/configs/danet/danet_r50-d8_512x1024_80k_cityscapes.py index 03734310d..2f8bb600a 100644 --- a/configs/danet/danet_r50-d8_512x1024_80k_cityscapes.py +++ b/configs/danet/danet_r50-d8_512x1024_80k_cityscapes.py @@ -2,3 +2,6 @@ _base_ = [ '../_base_/models/danet_r50-d8.py', '../_base_/datasets/cityscapes.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' ] +crop_size = (512, 1024) +preprocess_cfg = dict(size=crop_size) +model = dict(preprocess_cfg=preprocess_cfg) diff --git a/configs/danet/danet_r50-d8_512x512_160k_ade20k.py b/configs/danet/danet_r50-d8_512x512_160k_ade20k.py index 22aaf857c..dc499ab64 100644 --- a/configs/danet/danet_r50-d8_512x512_160k_ade20k.py +++ b/configs/danet/danet_r50-d8_512x512_160k_ade20k.py @@ -2,5 +2,9 @@ _base_ = [ '../_base_/models/danet_r50-d8.py', '../_base_/datasets/ade20k.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' ] +crop_size = (512, 512) +preprocess_cfg = dict(size=crop_size) model = dict( - decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150)) + preprocess_cfg=preprocess_cfg, + decode_head=dict(num_classes=150), + auxiliary_head=dict(num_classes=150)) diff --git a/configs/danet/danet_r50-d8_512x512_20k_voc12aug.py b/configs/danet/danet_r50-d8_512x512_20k_voc12aug.py index 010f86f1a..5859db5e1 100644 --- a/configs/danet/danet_r50-d8_512x512_20k_voc12aug.py +++ b/configs/danet/danet_r50-d8_512x512_20k_voc12aug.py @@ -3,5 +3,9 @@ _base_ = [ '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_20k.py' ] +crop_size = (512, 512) +preprocess_cfg = dict(size=crop_size) model = dict( - decode_head=dict(num_classes=21), auxiliary_head=dict(num_classes=21)) + preprocess_cfg=preprocess_cfg, + decode_head=dict(num_classes=21), + auxiliary_head=dict(num_classes=21)) diff --git a/configs/danet/danet_r50-d8_512x512_40k_voc12aug.py b/configs/danet/danet_r50-d8_512x512_40k_voc12aug.py index 0cef0f09b..0804271b7 100644 --- a/configs/danet/danet_r50-d8_512x512_40k_voc12aug.py +++ b/configs/danet/danet_r50-d8_512x512_40k_voc12aug.py @@ -3,5 +3,9 @@ _base_ = [ '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' ] +crop_size = (512, 512) +preprocess_cfg = dict(size=crop_size) model = dict( - decode_head=dict(num_classes=21), auxiliary_head=dict(num_classes=21)) + preprocess_cfg=preprocess_cfg, + decode_head=dict(num_classes=21), + auxiliary_head=dict(num_classes=21)) diff --git a/configs/danet/danet_r50-d8_512x512_80k_ade20k.py b/configs/danet/danet_r50-d8_512x512_80k_ade20k.py index 154e84890..8afdc4de3 100644 --- a/configs/danet/danet_r50-d8_512x512_80k_ade20k.py +++ b/configs/danet/danet_r50-d8_512x512_80k_ade20k.py @@ -2,5 +2,9 @@ _base_ = [ '../_base_/models/danet_r50-d8.py', '../_base_/datasets/ade20k.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' ] +crop_size = (512, 512) +preprocess_cfg = dict(size=crop_size) model = dict( - decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150)) + preprocess_cfg=preprocess_cfg, + decode_head=dict(num_classes=150), + auxiliary_head=dict(num_classes=150)) diff --git a/configs/danet/danet_r50-d8_769x769_40k_cityscapes.py b/configs/danet/danet_r50-d8_769x769_40k_cityscapes.py index 5c5b94e5a..ae2696049 100644 --- a/configs/danet/danet_r50-d8_769x769_40k_cityscapes.py +++ b/configs/danet/danet_r50-d8_769x769_40k_cityscapes.py @@ -3,7 +3,10 @@ _base_ = [ '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' ] +crop_size = (769, 769) +preprocess_cfg = dict(size=crop_size) model = dict( + preprocess_cfg=preprocess_cfg, decode_head=dict(align_corners=True), auxiliary_head=dict(align_corners=True), test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513))) diff --git a/configs/danet/danet_r50-d8_769x769_80k_cityscapes.py b/configs/danet/danet_r50-d8_769x769_80k_cityscapes.py index c7237ae03..97fddf554 100644 --- a/configs/danet/danet_r50-d8_769x769_80k_cityscapes.py +++ b/configs/danet/danet_r50-d8_769x769_80k_cityscapes.py @@ -3,7 +3,10 @@ _base_ = [ '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' ] +crop_size = (769, 769) +preprocess_cfg = dict(size=crop_size) model = dict( + preprocess_cfg=preprocess_cfg, decode_head=dict(align_corners=True), auxiliary_head=dict(align_corners=True), test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513))) diff --git a/configs/deeplabv3/deeplabv3_r50-d8_480x480_40k_pascal_context.py b/configs/deeplabv3/deeplabv3_r50-d8_480x480_40k_pascal_context.py index 9d493ef52..7aacb4e5e 100644 --- a/configs/deeplabv3/deeplabv3_r50-d8_480x480_40k_pascal_context.py +++ b/configs/deeplabv3/deeplabv3_r50-d8_480x480_40k_pascal_context.py @@ -3,7 +3,10 @@ _base_ = [ '../_base_/datasets/pascal_context.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' ] +crop_size = (480, 480) +preprocess_cfg = dict(size=crop_size) model = dict( + preprocess_cfg=preprocess_cfg, decode_head=dict(num_classes=60), auxiliary_head=dict(num_classes=60), test_cfg=dict(mode='slide', crop_size=(480, 480), stride=(320, 320))) diff --git a/configs/deeplabv3/deeplabv3_r50-d8_480x480_40k_pascal_context_59.py b/configs/deeplabv3/deeplabv3_r50-d8_480x480_40k_pascal_context_59.py index 038993c6a..ef4ba686e 100644 --- a/configs/deeplabv3/deeplabv3_r50-d8_480x480_40k_pascal_context_59.py +++ b/configs/deeplabv3/deeplabv3_r50-d8_480x480_40k_pascal_context_59.py @@ -3,7 +3,10 @@ _base_ = [ '../_base_/datasets/pascal_context_59.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' ] +crop_size = (480, 480) +preprocess_cfg = dict(size=crop_size) model = dict( + preprocess_cfg=preprocess_cfg, decode_head=dict(num_classes=59), auxiliary_head=dict(num_classes=59), test_cfg=dict(mode='slide', crop_size=(480, 480), stride=(320, 320))) diff --git a/configs/deeplabv3/deeplabv3_r50-d8_480x480_80k_pascal_context.py b/configs/deeplabv3/deeplabv3_r50-d8_480x480_80k_pascal_context.py index 71a0fda48..4b36cb32d 100644 --- a/configs/deeplabv3/deeplabv3_r50-d8_480x480_80k_pascal_context.py +++ b/configs/deeplabv3/deeplabv3_r50-d8_480x480_80k_pascal_context.py @@ -3,7 +3,10 @@ _base_ = [ '../_base_/datasets/pascal_context.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' ] +crop_size = (480, 480) +preprocess_cfg = dict(size=crop_size) model = dict( + preprocess_cfg=preprocess_cfg, decode_head=dict(num_classes=60), auxiliary_head=dict(num_classes=60), test_cfg=dict(mode='slide', crop_size=(480, 480), stride=(320, 320))) diff --git a/configs/deeplabv3/deeplabv3_r50-d8_480x480_80k_pascal_context_59.py b/configs/deeplabv3/deeplabv3_r50-d8_480x480_80k_pascal_context_59.py index bcdc0b459..eff427f22 100644 --- a/configs/deeplabv3/deeplabv3_r50-d8_480x480_80k_pascal_context_59.py +++ b/configs/deeplabv3/deeplabv3_r50-d8_480x480_80k_pascal_context_59.py @@ -3,7 +3,10 @@ _base_ = [ '../_base_/datasets/pascal_context_59.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' ] +crop_size = (480, 480) +preprocess_cfg = dict(size=crop_size) model = dict( + preprocess_cfg=preprocess_cfg, decode_head=dict(num_classes=59), auxiliary_head=dict(num_classes=59), test_cfg=dict(mode='slide', crop_size=(480, 480), stride=(320, 320))) diff --git a/configs/deeplabv3/deeplabv3_r50-d8_512x1024_40k_cityscapes.py b/configs/deeplabv3/deeplabv3_r50-d8_512x1024_40k_cityscapes.py index 8e7420d24..9c08a14cf 100644 --- a/configs/deeplabv3/deeplabv3_r50-d8_512x1024_40k_cityscapes.py +++ b/configs/deeplabv3/deeplabv3_r50-d8_512x1024_40k_cityscapes.py @@ -2,3 +2,6 @@ _base_ = [ '../_base_/models/deeplabv3_r50-d8.py', '../_base_/datasets/cityscapes.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' ] +crop_size = (512, 1024) +preprocess_cfg = dict(size=crop_size) +model = dict(preprocess_cfg=preprocess_cfg) diff --git a/configs/deeplabv3/deeplabv3_r50-d8_512x1024_80k_cityscapes.py b/configs/deeplabv3/deeplabv3_r50-d8_512x1024_80k_cityscapes.py index 132787db9..11089bd0e 100644 --- a/configs/deeplabv3/deeplabv3_r50-d8_512x1024_80k_cityscapes.py +++ b/configs/deeplabv3/deeplabv3_r50-d8_512x1024_80k_cityscapes.py @@ -2,3 +2,6 @@ _base_ = [ '../_base_/models/deeplabv3_r50-d8.py', '../_base_/datasets/cityscapes.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' ] +crop_size = (512, 1024) +preprocess_cfg = dict(size=crop_size) +model = dict(preprocess_cfg=preprocess_cfg) diff --git a/configs/deeplabv3/deeplabv3_r50-d8_512x512_160k_ade20k.py b/configs/deeplabv3/deeplabv3_r50-d8_512x512_160k_ade20k.py index b4a9d4e1b..934d36a5c 100644 --- a/configs/deeplabv3/deeplabv3_r50-d8_512x512_160k_ade20k.py +++ b/configs/deeplabv3/deeplabv3_r50-d8_512x512_160k_ade20k.py @@ -2,5 +2,9 @@ _base_ = [ '../_base_/models/deeplabv3_r50-d8.py', '../_base_/datasets/ade20k.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' ] +crop_size = (512, 512) +preprocess_cfg = dict(size=crop_size) model = dict( - decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150)) + preprocess_cfg=preprocess_cfg, + decode_head=dict(num_classes=150), + auxiliary_head=dict(num_classes=150)) diff --git a/configs/deeplabv3/deeplabv3_r50-d8_512x512_20k_voc12aug.py b/configs/deeplabv3/deeplabv3_r50-d8_512x512_20k_voc12aug.py index f62da1a80..1474b754b 100644 --- a/configs/deeplabv3/deeplabv3_r50-d8_512x512_20k_voc12aug.py +++ b/configs/deeplabv3/deeplabv3_r50-d8_512x512_20k_voc12aug.py @@ -3,5 +3,9 @@ _base_ = [ '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_20k.py' ] +crop_size = (512, 512) +preprocess_cfg = dict(size=crop_size) model = dict( - decode_head=dict(num_classes=21), auxiliary_head=dict(num_classes=21)) + preprocess_cfg=preprocess_cfg, + decode_head=dict(num_classes=21), + auxiliary_head=dict(num_classes=21)) diff --git a/configs/deeplabv3/deeplabv3_r50-d8_512x512_40k_voc12aug.py b/configs/deeplabv3/deeplabv3_r50-d8_512x512_40k_voc12aug.py index 492bd3dfd..cbc67a100 100644 --- a/configs/deeplabv3/deeplabv3_r50-d8_512x512_40k_voc12aug.py +++ b/configs/deeplabv3/deeplabv3_r50-d8_512x512_40k_voc12aug.py @@ -3,5 +3,9 @@ _base_ = [ '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' ] +crop_size = (512, 512) +preprocess_cfg = dict(size=crop_size) model = dict( - decode_head=dict(num_classes=21), auxiliary_head=dict(num_classes=21)) + preprocess_cfg=preprocess_cfg, + decode_head=dict(num_classes=21), + auxiliary_head=dict(num_classes=21)) diff --git a/configs/deeplabv3/deeplabv3_r50-d8_512x512_4x4_160k_coco-stuff164k.py b/configs/deeplabv3/deeplabv3_r50-d8_512x512_4x4_160k_coco-stuff164k.py index 22d647ecb..201e208f5 100644 --- a/configs/deeplabv3/deeplabv3_r50-d8_512x512_4x4_160k_coco-stuff164k.py +++ b/configs/deeplabv3/deeplabv3_r50-d8_512x512_4x4_160k_coco-stuff164k.py @@ -3,5 +3,9 @@ _base_ = [ '../_base_/datasets/coco-stuff164k.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' ] +crop_size = (512, 512) +preprocess_cfg = dict(size=crop_size) model = dict( - decode_head=dict(num_classes=171), auxiliary_head=dict(num_classes=171)) + preprocess_cfg=preprocess_cfg, + decode_head=dict(num_classes=171), + auxiliary_head=dict(num_classes=171)) diff --git a/configs/deeplabv3/deeplabv3_r50-d8_512x512_4x4_20k_coco-stuff10k.py b/configs/deeplabv3/deeplabv3_r50-d8_512x512_4x4_20k_coco-stuff10k.py index 45e0b5614..b67c62eba 100644 --- a/configs/deeplabv3/deeplabv3_r50-d8_512x512_4x4_20k_coco-stuff10k.py +++ b/configs/deeplabv3/deeplabv3_r50-d8_512x512_4x4_20k_coco-stuff10k.py @@ -3,5 +3,9 @@ _base_ = [ '../_base_/datasets/coco-stuff10k.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_20k.py' ] +crop_size = (512, 512) +preprocess_cfg = dict(size=crop_size) model = dict( - decode_head=dict(num_classes=171), auxiliary_head=dict(num_classes=171)) + preprocess_cfg=preprocess_cfg, + decode_head=dict(num_classes=171), + auxiliary_head=dict(num_classes=171)) diff --git a/configs/deeplabv3/deeplabv3_r50-d8_512x512_4x4_320k_coco-stuff164k.py b/configs/deeplabv3/deeplabv3_r50-d8_512x512_4x4_320k_coco-stuff164k.py index 3e43234bb..b15d3df33 100644 --- a/configs/deeplabv3/deeplabv3_r50-d8_512x512_4x4_320k_coco-stuff164k.py +++ b/configs/deeplabv3/deeplabv3_r50-d8_512x512_4x4_320k_coco-stuff164k.py @@ -3,5 +3,9 @@ _base_ = [ '../_base_/datasets/coco-stuff164k.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_320k.py' ] +crop_size = (512, 512) +preprocess_cfg = dict(size=crop_size) model = dict( - decode_head=dict(num_classes=171), auxiliary_head=dict(num_classes=171)) + preprocess_cfg=preprocess_cfg, + decode_head=dict(num_classes=171), + auxiliary_head=dict(num_classes=171)) diff --git a/configs/deeplabv3/deeplabv3_r50-d8_512x512_4x4_40k_coco-stuff10k.py b/configs/deeplabv3/deeplabv3_r50-d8_512x512_4x4_40k_coco-stuff10k.py index f02772ab0..309d654a6 100644 --- a/configs/deeplabv3/deeplabv3_r50-d8_512x512_4x4_40k_coco-stuff10k.py +++ b/configs/deeplabv3/deeplabv3_r50-d8_512x512_4x4_40k_coco-stuff10k.py @@ -3,5 +3,9 @@ _base_ = [ '../_base_/datasets/coco-stuff10k.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' ] +crop_size = (512, 512) +preprocess_cfg = dict(size=crop_size) model = dict( - decode_head=dict(num_classes=171), auxiliary_head=dict(num_classes=171)) + preprocess_cfg=preprocess_cfg, + decode_head=dict(num_classes=171), + auxiliary_head=dict(num_classes=171)) diff --git a/configs/deeplabv3/deeplabv3_r50-d8_512x512_4x4_80k_coco-stuff164k.py b/configs/deeplabv3/deeplabv3_r50-d8_512x512_4x4_80k_coco-stuff164k.py index 8697e9284..11e8da806 100644 --- a/configs/deeplabv3/deeplabv3_r50-d8_512x512_4x4_80k_coco-stuff164k.py +++ b/configs/deeplabv3/deeplabv3_r50-d8_512x512_4x4_80k_coco-stuff164k.py @@ -3,5 +3,9 @@ _base_ = [ '../_base_/datasets/coco-stuff164k.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' ] +crop_size = (512, 512) +preprocess_cfg = dict(size=crop_size) model = dict( - decode_head=dict(num_classes=171), auxiliary_head=dict(num_classes=171)) + preprocess_cfg=preprocess_cfg, + decode_head=dict(num_classes=171), + auxiliary_head=dict(num_classes=171)) diff --git a/configs/deeplabv3/deeplabv3_r50-d8_512x512_80k_ade20k.py b/configs/deeplabv3/deeplabv3_r50-d8_512x512_80k_ade20k.py index 78f4d0d9d..202d42de4 100644 --- a/configs/deeplabv3/deeplabv3_r50-d8_512x512_80k_ade20k.py +++ b/configs/deeplabv3/deeplabv3_r50-d8_512x512_80k_ade20k.py @@ -2,5 +2,9 @@ _base_ = [ '../_base_/models/deeplabv3_r50-d8.py', '../_base_/datasets/ade20k.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' ] +crop_size = (512, 512) +preprocess_cfg = dict(size=crop_size) model = dict( - decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150)) + preprocess_cfg=preprocess_cfg, + decode_head=dict(num_classes=150), + auxiliary_head=dict(num_classes=150)) diff --git a/configs/deeplabv3/deeplabv3_r50-d8_769x769_40k_cityscapes.py b/configs/deeplabv3/deeplabv3_r50-d8_769x769_40k_cityscapes.py index e35d1988f..60034dce3 100644 --- a/configs/deeplabv3/deeplabv3_r50-d8_769x769_40k_cityscapes.py +++ b/configs/deeplabv3/deeplabv3_r50-d8_769x769_40k_cityscapes.py @@ -3,7 +3,10 @@ _base_ = [ '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' ] +crop_size = (769, 769) +preprocess_cfg = dict(size=crop_size) model = dict( + preprocess_cfg=preprocess_cfg, decode_head=dict(align_corners=True), auxiliary_head=dict(align_corners=True), test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513))) diff --git a/configs/deeplabv3/deeplabv3_r50-d8_769x769_80k_cityscapes.py b/configs/deeplabv3/deeplabv3_r50-d8_769x769_80k_cityscapes.py index dd7c16580..f742eb67d 100644 --- a/configs/deeplabv3/deeplabv3_r50-d8_769x769_80k_cityscapes.py +++ b/configs/deeplabv3/deeplabv3_r50-d8_769x769_80k_cityscapes.py @@ -3,7 +3,10 @@ _base_ = [ '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' ] +crop_size = (769, 769) +preprocess_cfg = dict(size=crop_size) model = dict( + preprocess_cfg=preprocess_cfg, decode_head=dict(align_corners=True), auxiliary_head=dict(align_corners=True), test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513))) diff --git a/configs/deeplabv3plus/deeplabv3plus_r50-d8_480x480_40k_pascal_context.py b/configs/deeplabv3plus/deeplabv3plus_r50-d8_480x480_40k_pascal_context.py index 318845de1..29bc72a8c 100644 --- a/configs/deeplabv3plus/deeplabv3plus_r50-d8_480x480_40k_pascal_context.py +++ b/configs/deeplabv3plus/deeplabv3plus_r50-d8_480x480_40k_pascal_context.py @@ -3,7 +3,10 @@ _base_ = [ '../_base_/datasets/pascal_context.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' ] +crop_size = (480, 480) +preprocess_cfg = dict(size=crop_size) model = dict( + preprocess_cfg=preprocess_cfg, decode_head=dict(num_classes=60), auxiliary_head=dict(num_classes=60), test_cfg=dict(mode='slide', crop_size=(480, 480), stride=(320, 320))) diff --git a/configs/deeplabv3plus/deeplabv3plus_r50-d8_480x480_40k_pascal_context_59.py b/configs/deeplabv3plus/deeplabv3plus_r50-d8_480x480_40k_pascal_context_59.py index f9e831bcd..99cfa6d33 100644 --- a/configs/deeplabv3plus/deeplabv3plus_r50-d8_480x480_40k_pascal_context_59.py +++ b/configs/deeplabv3plus/deeplabv3plus_r50-d8_480x480_40k_pascal_context_59.py @@ -3,7 +3,10 @@ _base_ = [ '../_base_/datasets/pascal_context_59.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' ] +crop_size = (480, 480) +preprocess_cfg = dict(size=crop_size) model = dict( + preprocess_cfg=preprocess_cfg, decode_head=dict(num_classes=59), auxiliary_head=dict(num_classes=59), test_cfg=dict(mode='slide', crop_size=(480, 480), stride=(320, 320))) diff --git a/configs/deeplabv3plus/deeplabv3plus_r50-d8_480x480_80k_pascal_context.py b/configs/deeplabv3plus/deeplabv3plus_r50-d8_480x480_80k_pascal_context.py index 1736c2397..8d7249ea5 100644 --- a/configs/deeplabv3plus/deeplabv3plus_r50-d8_480x480_80k_pascal_context.py +++ b/configs/deeplabv3plus/deeplabv3plus_r50-d8_480x480_80k_pascal_context.py @@ -3,7 +3,10 @@ _base_ = [ '../_base_/datasets/pascal_context.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' ] +crop_size = (480, 480) +preprocess_cfg = dict(size=crop_size) model = dict( + preprocess_cfg=preprocess_cfg, decode_head=dict(num_classes=60), auxiliary_head=dict(num_classes=60), test_cfg=dict(mode='slide', crop_size=(480, 480), stride=(320, 320))) diff --git a/configs/deeplabv3plus/deeplabv3plus_r50-d8_480x480_80k_pascal_context_59.py b/configs/deeplabv3plus/deeplabv3plus_r50-d8_480x480_80k_pascal_context_59.py index d2af575df..ee4925437 100644 --- a/configs/deeplabv3plus/deeplabv3plus_r50-d8_480x480_80k_pascal_context_59.py +++ b/configs/deeplabv3plus/deeplabv3plus_r50-d8_480x480_80k_pascal_context_59.py @@ -3,7 +3,10 @@ _base_ = [ '../_base_/datasets/pascal_context_59.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' ] +crop_size = (480, 480) +preprocess_cfg = dict(size=crop_size) model = dict( + preprocess_cfg=preprocess_cfg, decode_head=dict(num_classes=59), auxiliary_head=dict(num_classes=59), test_cfg=dict(mode='slide', crop_size=(480, 480), stride=(320, 320))) diff --git a/configs/deeplabv3plus/deeplabv3plus_r50-d8_4x4_512x512_80k_vaihingen.py b/configs/deeplabv3plus/deeplabv3plus_r50-d8_4x4_512x512_80k_vaihingen.py index fed9314f5..76bd43518 100644 --- a/configs/deeplabv3plus/deeplabv3plus_r50-d8_4x4_512x512_80k_vaihingen.py +++ b/configs/deeplabv3plus/deeplabv3plus_r50-d8_4x4_512x512_80k_vaihingen.py @@ -3,5 +3,9 @@ _base_ = [ '../_base_/datasets/vaihingen.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' ] +crop_size = (512, 512) +preprocess_cfg = dict(size=crop_size) model = dict( - decode_head=dict(num_classes=6), auxiliary_head=dict(num_classes=6)) + preprocess_cfg=preprocess_cfg, + decode_head=dict(num_classes=6), + auxiliary_head=dict(num_classes=6)) diff --git a/configs/deeplabv3plus/deeplabv3plus_r50-d8_4x4_896x896_80k_isaid.py b/configs/deeplabv3plus/deeplabv3plus_r50-d8_4x4_896x896_80k_isaid.py index a1a8beb82..7d164e072 100644 --- a/configs/deeplabv3plus/deeplabv3plus_r50-d8_4x4_896x896_80k_isaid.py +++ b/configs/deeplabv3plus/deeplabv3plus_r50-d8_4x4_896x896_80k_isaid.py @@ -2,5 +2,9 @@ _base_ = [ '../_base_/models/deeplabv3plus_r50-d8.py', '../_base_/datasets/isaid.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' ] +crop_size = (896, 896) +preprocess_cfg = dict(size=crop_size) model = dict( - decode_head=dict(num_classes=16), auxiliary_head=dict(num_classes=16)) + preprocess_cfg=preprocess_cfg, + decode_head=dict(num_classes=16), + auxiliary_head=dict(num_classes=16)) diff --git a/configs/deeplabv3plus/deeplabv3plus_r50-d8_512x1024_40k_cityscapes.py b/configs/deeplabv3plus/deeplabv3plus_r50-d8_512x1024_40k_cityscapes.py index 7243d0390..12f7b2f07 100644 --- a/configs/deeplabv3plus/deeplabv3plus_r50-d8_512x1024_40k_cityscapes.py +++ b/configs/deeplabv3plus/deeplabv3plus_r50-d8_512x1024_40k_cityscapes.py @@ -3,3 +3,6 @@ _base_ = [ '../_base_/datasets/cityscapes.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' ] +crop_size = (512, 1024) +preprocess_cfg = dict(size=crop_size) +model = dict(preprocess_cfg=preprocess_cfg) diff --git a/configs/deeplabv3plus/deeplabv3plus_r50-d8_512x1024_80k_cityscapes.py b/configs/deeplabv3plus/deeplabv3plus_r50-d8_512x1024_80k_cityscapes.py index 3304d3677..b5efc7689 100644 --- a/configs/deeplabv3plus/deeplabv3plus_r50-d8_512x1024_80k_cityscapes.py +++ b/configs/deeplabv3plus/deeplabv3plus_r50-d8_512x1024_80k_cityscapes.py @@ -3,3 +3,6 @@ _base_ = [ '../_base_/datasets/cityscapes.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' ] +crop_size = (512, 1024) +preprocess_cfg = dict(size=crop_size) +model = dict(preprocess_cfg=preprocess_cfg) diff --git a/configs/deeplabv3plus/deeplabv3plus_r50-d8_512x512_160k_ade20k.py b/configs/deeplabv3plus/deeplabv3plus_r50-d8_512x512_160k_ade20k.py index 1491e3b82..84fd130c5 100644 --- a/configs/deeplabv3plus/deeplabv3plus_r50-d8_512x512_160k_ade20k.py +++ b/configs/deeplabv3plus/deeplabv3plus_r50-d8_512x512_160k_ade20k.py @@ -2,5 +2,9 @@ _base_ = [ '../_base_/models/deeplabv3plus_r50-d8.py', '../_base_/datasets/ade20k.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' ] +crop_size = (512, 512) +preprocess_cfg = dict(size=crop_size) model = dict( - decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150)) + preprocess_cfg=preprocess_cfg, + decode_head=dict(num_classes=150), + auxiliary_head=dict(num_classes=150)) diff --git a/configs/deeplabv3plus/deeplabv3plus_r50-d8_512x512_20k_voc12aug.py b/configs/deeplabv3plus/deeplabv3plus_r50-d8_512x512_20k_voc12aug.py index 1056ad4d1..202d05a77 100644 --- a/configs/deeplabv3plus/deeplabv3plus_r50-d8_512x512_20k_voc12aug.py +++ b/configs/deeplabv3plus/deeplabv3plus_r50-d8_512x512_20k_voc12aug.py @@ -3,5 +3,9 @@ _base_ = [ '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_20k.py' ] +crop_size = (512, 512) +preprocess_cfg = dict(size=crop_size) model = dict( - decode_head=dict(num_classes=21), auxiliary_head=dict(num_classes=21)) + preprocess_cfg=preprocess_cfg, + decode_head=dict(num_classes=21), + auxiliary_head=dict(num_classes=21)) diff --git a/configs/deeplabv3plus/deeplabv3plus_r50-d8_512x512_40k_voc12aug.py b/configs/deeplabv3plus/deeplabv3plus_r50-d8_512x512_40k_voc12aug.py index e36c83ba6..c2952ceeb 100644 --- a/configs/deeplabv3plus/deeplabv3plus_r50-d8_512x512_40k_voc12aug.py +++ b/configs/deeplabv3plus/deeplabv3plus_r50-d8_512x512_40k_voc12aug.py @@ -3,5 +3,9 @@ _base_ = [ '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' ] +crop_size = (512, 512) +preprocess_cfg = dict(size=crop_size) model = dict( - decode_head=dict(num_classes=21), auxiliary_head=dict(num_classes=21)) + preprocess_cfg=preprocess_cfg, + decode_head=dict(num_classes=21), + auxiliary_head=dict(num_classes=21)) diff --git a/configs/deeplabv3plus/deeplabv3plus_r50-d8_512x512_80k_ade20k.py b/configs/deeplabv3plus/deeplabv3plus_r50-d8_512x512_80k_ade20k.py index 352d870bc..d1163c0b3 100644 --- a/configs/deeplabv3plus/deeplabv3plus_r50-d8_512x512_80k_ade20k.py +++ b/configs/deeplabv3plus/deeplabv3plus_r50-d8_512x512_80k_ade20k.py @@ -2,5 +2,9 @@ _base_ = [ '../_base_/models/deeplabv3plus_r50-d8.py', '../_base_/datasets/ade20k.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' ] +crop_size = (512, 512) +preprocess_cfg = dict(size=crop_size) model = dict( - decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150)) + preprocess_cfg=preprocess_cfg, + decode_head=dict(num_classes=150), + auxiliary_head=dict(num_classes=150)) diff --git a/configs/deeplabv3plus/deeplabv3plus_r50-d8_512x512_80k_loveda.py b/configs/deeplabv3plus/deeplabv3plus_r50-d8_512x512_80k_loveda.py index 62756f65b..78e18c447 100644 --- a/configs/deeplabv3plus/deeplabv3plus_r50-d8_512x512_80k_loveda.py +++ b/configs/deeplabv3plus/deeplabv3plus_r50-d8_512x512_80k_loveda.py @@ -2,5 +2,9 @@ _base_ = [ '../_base_/models/deeplabv3plus_r50-d8.py', '../_base_/datasets/loveda.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' ] +crop_size = (512, 512) +preprocess_cfg = dict(size=crop_size) model = dict( - decode_head=dict(num_classes=7), auxiliary_head=dict(num_classes=7)) + preprocess_cfg=preprocess_cfg, + decode_head=dict(num_classes=7), + auxiliary_head=dict(num_classes=7)) diff --git a/configs/deeplabv3plus/deeplabv3plus_r50-d8_512x512_80k_potsdam.py b/configs/deeplabv3plus/deeplabv3plus_r50-d8_512x512_80k_potsdam.py index d5ae03fd5..8739447d8 100644 --- a/configs/deeplabv3plus/deeplabv3plus_r50-d8_512x512_80k_potsdam.py +++ b/configs/deeplabv3plus/deeplabv3plus_r50-d8_512x512_80k_potsdam.py @@ -3,5 +3,9 @@ _base_ = [ '../_base_/datasets/potsdam.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' ] +crop_size = (512, 512) +preprocess_cfg = dict(size=crop_size) model = dict( - decode_head=dict(num_classes=6), auxiliary_head=dict(num_classes=6)) + preprocess_cfg=preprocess_cfg, + decode_head=dict(num_classes=6), + auxiliary_head=dict(num_classes=6)) diff --git a/configs/deeplabv3plus/deeplabv3plus_r50-d8_769x769_40k_cityscapes.py b/configs/deeplabv3plus/deeplabv3plus_r50-d8_769x769_40k_cityscapes.py index e4bda3ede..6c59861f8 100644 --- a/configs/deeplabv3plus/deeplabv3plus_r50-d8_769x769_40k_cityscapes.py +++ b/configs/deeplabv3plus/deeplabv3plus_r50-d8_769x769_40k_cityscapes.py @@ -3,7 +3,10 @@ _base_ = [ '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' ] +crop_size = (769, 769) +preprocess_cfg = dict(size=crop_size) model = dict( + preprocess_cfg=preprocess_cfg, decode_head=dict(align_corners=True), auxiliary_head=dict(align_corners=True), test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513))) diff --git a/configs/deeplabv3plus/deeplabv3plus_r50-d8_769x769_80k_cityscapes.py b/configs/deeplabv3plus/deeplabv3plus_r50-d8_769x769_80k_cityscapes.py index 1420b97a4..6fd260467 100644 --- a/configs/deeplabv3plus/deeplabv3plus_r50-d8_769x769_80k_cityscapes.py +++ b/configs/deeplabv3plus/deeplabv3plus_r50-d8_769x769_80k_cityscapes.py @@ -3,7 +3,10 @@ _base_ = [ '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' ] +crop_size = (769, 769) +preprocess_cfg = dict(size=crop_size) model = dict( + preprocess_cfg=preprocess_cfg, decode_head=dict(align_corners=True), auxiliary_head=dict(align_corners=True), test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513))) diff --git a/configs/dmnet/dmnet_r50-d8_512x1024_40k_cityscapes.py b/configs/dmnet/dmnet_r50-d8_512x1024_40k_cityscapes.py index 1f9a917fa..4e60309c0 100644 --- a/configs/dmnet/dmnet_r50-d8_512x1024_40k_cityscapes.py +++ b/configs/dmnet/dmnet_r50-d8_512x1024_40k_cityscapes.py @@ -2,3 +2,6 @@ _base_ = [ '../_base_/models/dmnet_r50-d8.py', '../_base_/datasets/cityscapes.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' ] +crop_size = (512, 1024) +preprocess_cfg = dict(size=crop_size) +model = dict(preprocess_cfg=preprocess_cfg) diff --git a/configs/dmnet/dmnet_r50-d8_512x1024_80k_cityscapes.py b/configs/dmnet/dmnet_r50-d8_512x1024_80k_cityscapes.py index 1b38f90dc..026dc0f1e 100644 --- a/configs/dmnet/dmnet_r50-d8_512x1024_80k_cityscapes.py +++ b/configs/dmnet/dmnet_r50-d8_512x1024_80k_cityscapes.py @@ -2,3 +2,6 @@ _base_ = [ '../_base_/models/dmnet_r50-d8.py', '../_base_/datasets/cityscapes.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' ] +crop_size = (512, 1024) +preprocess_cfg = dict(size=crop_size) +model = dict(preprocess_cfg=preprocess_cfg) diff --git a/configs/dmnet/dmnet_r50-d8_512x512_160k_ade20k.py b/configs/dmnet/dmnet_r50-d8_512x512_160k_ade20k.py index a8fbd9beb..5857cd856 100644 --- a/configs/dmnet/dmnet_r50-d8_512x512_160k_ade20k.py +++ b/configs/dmnet/dmnet_r50-d8_512x512_160k_ade20k.py @@ -2,5 +2,9 @@ _base_ = [ '../_base_/models/dmnet_r50-d8.py', '../_base_/datasets/ade20k.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' ] +crop_size = (512, 512) +preprocess_cfg = dict(size=crop_size) model = dict( - decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150)) + preprocess_cfg=preprocess_cfg, + decode_head=dict(num_classes=150), + auxiliary_head=dict(num_classes=150)) diff --git a/configs/dmnet/dmnet_r50-d8_512x512_80k_ade20k.py b/configs/dmnet/dmnet_r50-d8_512x512_80k_ade20k.py index 74f6d6a85..490d4550c 100644 --- a/configs/dmnet/dmnet_r50-d8_512x512_80k_ade20k.py +++ b/configs/dmnet/dmnet_r50-d8_512x512_80k_ade20k.py @@ -2,5 +2,9 @@ _base_ = [ '../_base_/models/dmnet_r50-d8.py', '../_base_/datasets/ade20k.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' ] +crop_size = (512, 512) +preprocess_cfg = dict(size=crop_size) model = dict( - decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150)) + preprocess_cfg=preprocess_cfg, + decode_head=dict(num_classes=150), + auxiliary_head=dict(num_classes=150)) diff --git a/configs/dmnet/dmnet_r50-d8_769x769_40k_cityscapes.py b/configs/dmnet/dmnet_r50-d8_769x769_40k_cityscapes.py index 19841547a..1c475616e 100644 --- a/configs/dmnet/dmnet_r50-d8_769x769_40k_cityscapes.py +++ b/configs/dmnet/dmnet_r50-d8_769x769_40k_cityscapes.py @@ -3,7 +3,10 @@ _base_ = [ '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' ] +crop_size = (769, 769) +preprocess_cfg = dict(size=crop_size) model = dict( + preprocess_cfg=preprocess_cfg, decode_head=dict(align_corners=True), auxiliary_head=dict(align_corners=True), test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513))) diff --git a/configs/dmnet/dmnet_r50-d8_769x769_80k_cityscapes.py b/configs/dmnet/dmnet_r50-d8_769x769_80k_cityscapes.py index 31d95f96e..ecd32ed48 100644 --- a/configs/dmnet/dmnet_r50-d8_769x769_80k_cityscapes.py +++ b/configs/dmnet/dmnet_r50-d8_769x769_80k_cityscapes.py @@ -3,7 +3,10 @@ _base_ = [ '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' ] +crop_size = (769, 769) +preprocess_cfg = dict(size=crop_size) model = dict( + preprocess_cfg=preprocess_cfg, decode_head=dict(align_corners=True), auxiliary_head=dict(align_corners=True), test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513))) diff --git a/configs/dnlnet/dnl_r50-d8_512x1024_40k_cityscapes.py b/configs/dnlnet/dnl_r50-d8_512x1024_40k_cityscapes.py index f7aa7444d..e26507b70 100644 --- a/configs/dnlnet/dnl_r50-d8_512x1024_40k_cityscapes.py +++ b/configs/dnlnet/dnl_r50-d8_512x1024_40k_cityscapes.py @@ -2,3 +2,6 @@ _base_ = [ '../_base_/models/dnl_r50-d8.py', '../_base_/datasets/cityscapes.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' ] +crop_size = (512, 1024) +preprocess_cfg = dict(size=crop_size) +model = dict(preprocess_cfg=preprocess_cfg) diff --git a/configs/dnlnet/dnl_r50-d8_512x1024_80k_cityscapes.py b/configs/dnlnet/dnl_r50-d8_512x1024_80k_cityscapes.py index fdff93f54..30cefb3dd 100644 --- a/configs/dnlnet/dnl_r50-d8_512x1024_80k_cityscapes.py +++ b/configs/dnlnet/dnl_r50-d8_512x1024_80k_cityscapes.py @@ -2,3 +2,6 @@ _base_ = [ '../_base_/models/dnl_r50-d8.py', '../_base_/datasets/cityscapes.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' ] +crop_size = (512, 1024) +preprocess_cfg = dict(size=crop_size) +model = dict(preprocess_cfg=preprocess_cfg) diff --git a/configs/dnlnet/dnl_r50-d8_512x512_160k_ade20k.py b/configs/dnlnet/dnl_r50-d8_512x512_160k_ade20k.py index 5305689d0..b7ccbf79f 100644 --- a/configs/dnlnet/dnl_r50-d8_512x512_160k_ade20k.py +++ b/configs/dnlnet/dnl_r50-d8_512x512_160k_ade20k.py @@ -2,5 +2,9 @@ _base_ = [ '../_base_/models/dnl_r50-d8.py', '../_base_/datasets/ade20k.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' ] +crop_size = (512, 512) +preprocess_cfg = dict(size=crop_size) model = dict( - decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150)) + preprocess_cfg=preprocess_cfg, + decode_head=dict(num_classes=150), + auxiliary_head=dict(num_classes=150)) diff --git a/configs/dnlnet/dnl_r50-d8_512x512_80k_ade20k.py b/configs/dnlnet/dnl_r50-d8_512x512_80k_ade20k.py index 09604c397..cefdb24d1 100644 --- a/configs/dnlnet/dnl_r50-d8_512x512_80k_ade20k.py +++ b/configs/dnlnet/dnl_r50-d8_512x512_80k_ade20k.py @@ -2,5 +2,9 @@ _base_ = [ '../_base_/models/dnl_r50-d8.py', '../_base_/datasets/ade20k.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' ] +crop_size = (512, 512) +preprocess_cfg = dict(size=crop_size) model = dict( - decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150)) + preprocess_cfg=preprocess_cfg, + decode_head=dict(num_classes=150), + auxiliary_head=dict(num_classes=150)) diff --git a/configs/dnlnet/dnl_r50-d8_769x769_40k_cityscapes.py b/configs/dnlnet/dnl_r50-d8_769x769_40k_cityscapes.py index 0666199b6..1f72c87d3 100644 --- a/configs/dnlnet/dnl_r50-d8_769x769_40k_cityscapes.py +++ b/configs/dnlnet/dnl_r50-d8_769x769_40k_cityscapes.py @@ -3,7 +3,10 @@ _base_ = [ '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' ] +crop_size = (769, 769) +preprocess_cfg = dict(size=crop_size) model = dict( + preprocess_cfg=preprocess_cfg, decode_head=dict(align_corners=True), auxiliary_head=dict(align_corners=True), test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513))) diff --git a/configs/dnlnet/dnl_r50-d8_769x769_80k_cityscapes.py b/configs/dnlnet/dnl_r50-d8_769x769_80k_cityscapes.py index f7b07c4f4..92bd3a80b 100644 --- a/configs/dnlnet/dnl_r50-d8_769x769_80k_cityscapes.py +++ b/configs/dnlnet/dnl_r50-d8_769x769_80k_cityscapes.py @@ -3,7 +3,10 @@ _base_ = [ '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' ] +crop_size = (769, 769) +preprocess_cfg = dict(size=crop_size) model = dict( + preprocess_cfg=preprocess_cfg, decode_head=dict(align_corners=True), auxiliary_head=dict(align_corners=True), test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513))) diff --git a/configs/dpt/dpt_vit-b16_512x512_160k_ade20k.py b/configs/dpt/dpt_vit-b16_512x512_160k_ade20k.py index d8a0e4620..3d83f5a58 100644 --- a/configs/dpt/dpt_vit-b16_512x512_160k_ade20k.py +++ b/configs/dpt/dpt_vit-b16_512x512_160k_ade20k.py @@ -2,7 +2,9 @@ _base_ = [ '../_base_/models/dpt_vit-b16.py', '../_base_/datasets/ade20k.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' ] - +crop_size = (512, 512) +preprocess_cfg = dict(size=crop_size) +model = dict(preprocess_cfg=preprocess_cfg) # AdamW optimizer, no weight decay for position embedding & layer norm # in backbone optimizer = dict( diff --git a/configs/emanet/emanet_r50-d8_512x1024_80k_cityscapes.py b/configs/emanet/emanet_r50-d8_512x1024_80k_cityscapes.py index 73b7788bf..e4f3d9467 100644 --- a/configs/emanet/emanet_r50-d8_512x1024_80k_cityscapes.py +++ b/configs/emanet/emanet_r50-d8_512x1024_80k_cityscapes.py @@ -2,3 +2,6 @@ _base_ = [ '../_base_/models/emanet_r50-d8.py', '../_base_/datasets/cityscapes.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' ] +crop_size = (512, 1024) +preprocess_cfg = dict(size=crop_size) +model = dict(preprocess_cfg=preprocess_cfg) diff --git a/configs/emanet/emanet_r50-d8_769x769_80k_cityscapes.py b/configs/emanet/emanet_r50-d8_769x769_80k_cityscapes.py index 699aa212c..2dc865186 100644 --- a/configs/emanet/emanet_r50-d8_769x769_80k_cityscapes.py +++ b/configs/emanet/emanet_r50-d8_769x769_80k_cityscapes.py @@ -3,7 +3,10 @@ _base_ = [ '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' ] +crop_size = (769, 769) +preprocess_cfg = dict(size=crop_size) model = dict( + preprocess_cfg=preprocess_cfg, decode_head=dict(align_corners=True), auxiliary_head=dict(align_corners=True), test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513))) diff --git a/configs/encnet/encnet_r50-d8_512x1024_40k_cityscapes.py b/configs/encnet/encnet_r50-d8_512x1024_40k_cityscapes.py index 4ea6ed0e8..5effa9e00 100644 --- a/configs/encnet/encnet_r50-d8_512x1024_40k_cityscapes.py +++ b/configs/encnet/encnet_r50-d8_512x1024_40k_cityscapes.py @@ -2,3 +2,6 @@ _base_ = [ '../_base_/models/encnet_r50-d8.py', '../_base_/datasets/cityscapes.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' ] +crop_size = (512, 1024) +preprocess_cfg = dict(size=crop_size) +model = dict(preprocess_cfg=preprocess_cfg) diff --git a/configs/encnet/encnet_r50-d8_512x1024_80k_cityscapes.py b/configs/encnet/encnet_r50-d8_512x1024_80k_cityscapes.py index d2feeef7e..473d8cdc1 100644 --- a/configs/encnet/encnet_r50-d8_512x1024_80k_cityscapes.py +++ b/configs/encnet/encnet_r50-d8_512x1024_80k_cityscapes.py @@ -2,3 +2,6 @@ _base_ = [ '../_base_/models/encnet_r50-d8.py', '../_base_/datasets/cityscapes.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' ] +crop_size = (512, 1024) +preprocess_cfg = dict(size=crop_size) +model = dict(preprocess_cfg=preprocess_cfg) diff --git a/configs/encnet/encnet_r50-d8_512x512_160k_ade20k.py b/configs/encnet/encnet_r50-d8_512x512_160k_ade20k.py index 2a5dc203c..42ebfa84a 100644 --- a/configs/encnet/encnet_r50-d8_512x512_160k_ade20k.py +++ b/configs/encnet/encnet_r50-d8_512x512_160k_ade20k.py @@ -2,5 +2,9 @@ _base_ = [ '../_base_/models/encnet_r50-d8.py', '../_base_/datasets/ade20k.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' ] +crop_size = (512, 512) +preprocess_cfg = dict(size=crop_size) model = dict( - decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150)) + preprocess_cfg=preprocess_cfg, + decode_head=dict(num_classes=150), + auxiliary_head=dict(num_classes=150)) diff --git a/configs/encnet/encnet_r50-d8_512x512_20k_voc12aug.py b/configs/encnet/encnet_r50-d8_512x512_20k_voc12aug.py index 9cb7952ce..b52b00a2a 100644 --- a/configs/encnet/encnet_r50-d8_512x512_20k_voc12aug.py +++ b/configs/encnet/encnet_r50-d8_512x512_20k_voc12aug.py @@ -3,5 +3,9 @@ _base_ = [ '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_20k.py' ] +crop_size = (512, 512) +preprocess_cfg = dict(size=crop_size) model = dict( - decode_head=dict(num_classes=21), auxiliary_head=dict(num_classes=21)) + preprocess_cfg=preprocess_cfg, + decode_head=dict(num_classes=21), + auxiliary_head=dict(num_classes=21)) diff --git a/configs/encnet/encnet_r50-d8_512x512_40k_voc12aug.py b/configs/encnet/encnet_r50-d8_512x512_40k_voc12aug.py index 81f3cbfbf..c1ed6c1e6 100644 --- a/configs/encnet/encnet_r50-d8_512x512_40k_voc12aug.py +++ b/configs/encnet/encnet_r50-d8_512x512_40k_voc12aug.py @@ -3,5 +3,9 @@ _base_ = [ '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' ] +crop_size = (512, 512) +preprocess_cfg = dict(size=crop_size) model = dict( - decode_head=dict(num_classes=21), auxiliary_head=dict(num_classes=21)) + preprocess_cfg=preprocess_cfg, + decode_head=dict(num_classes=21), + auxiliary_head=dict(num_classes=21)) diff --git a/configs/encnet/encnet_r50-d8_512x512_80k_ade20k.py b/configs/encnet/encnet_r50-d8_512x512_80k_ade20k.py index 835375cb0..f17123792 100644 --- a/configs/encnet/encnet_r50-d8_512x512_80k_ade20k.py +++ b/configs/encnet/encnet_r50-d8_512x512_80k_ade20k.py @@ -2,5 +2,9 @@ _base_ = [ '../_base_/models/encnet_r50-d8.py', '../_base_/datasets/ade20k.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' ] +crop_size = (512, 512) +preprocess_cfg = dict(size=crop_size) model = dict( - decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150)) + preprocess_cfg=preprocess_cfg, + decode_head=dict(num_classes=150), + auxiliary_head=dict(num_classes=150)) diff --git a/configs/encnet/encnet_r50-d8_769x769_40k_cityscapes.py b/configs/encnet/encnet_r50-d8_769x769_40k_cityscapes.py index d311e33f5..f5ff7aaf3 100644 --- a/configs/encnet/encnet_r50-d8_769x769_40k_cityscapes.py +++ b/configs/encnet/encnet_r50-d8_769x769_40k_cityscapes.py @@ -3,7 +3,10 @@ _base_ = [ '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' ] +crop_size = (769, 769) +preprocess_cfg = dict(size=crop_size) model = dict( + preprocess_cfg=preprocess_cfg, decode_head=dict(align_corners=True), auxiliary_head=dict(align_corners=True), test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513))) diff --git a/configs/encnet/encnet_r50-d8_769x769_80k_cityscapes.py b/configs/encnet/encnet_r50-d8_769x769_80k_cityscapes.py index 7b535f3c8..1019a10f8 100644 --- a/configs/encnet/encnet_r50-d8_769x769_80k_cityscapes.py +++ b/configs/encnet/encnet_r50-d8_769x769_80k_cityscapes.py @@ -3,7 +3,10 @@ _base_ = [ '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' ] +crop_size = (769, 769) +preprocess_cfg = dict(size=crop_size) model = dict( + preprocess_cfg=preprocess_cfg, decode_head=dict(align_corners=True), auxiliary_head=dict(align_corners=True), test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513))) diff --git a/configs/encnet/encnet_r50s-d8_512x512_80k_ade20k.py b/configs/encnet/encnet_r50s-d8_512x512_80k_ade20k.py index 600b701a7..224530518 100644 --- a/configs/encnet/encnet_r50s-d8_512x512_80k_ade20k.py +++ b/configs/encnet/encnet_r50s-d8_512x512_80k_ade20k.py @@ -2,7 +2,10 @@ _base_ = [ '../_base_/models/encnet_r50-d8.py', '../_base_/datasets/ade20k.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' ] +crop_size = (512, 512) +preprocess_cfg = dict(size=crop_size) model = dict( + preprocess_cfg=preprocess_cfg, backbone=dict(stem_channels=128), decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150)) diff --git a/configs/erfnet/erfnet_fcn_4x4_512x1024_160k_cityscapes.py b/configs/erfnet/erfnet_fcn_4x4_512x1024_160k_cityscapes.py index 8cc72d4ea..e95004a4c 100644 --- a/configs/erfnet/erfnet_fcn_4x4_512x1024_160k_cityscapes.py +++ b/configs/erfnet/erfnet_fcn_4x4_512x1024_160k_cityscapes.py @@ -2,6 +2,9 @@ _base_ = [ '../_base_/models/erfnet_fcn.py', '../_base_/datasets/cityscapes.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' ] +crop_size = (512, 1024) +preprocess_cfg = dict(size=crop_size) +model = dict(preprocess_cfg=preprocess_cfg) train_dataloader = dict(batch_size=4, num_workers=4) val_dataloader = dict(batch_size=4, num_workers=4) test_dataloader = val_dataloader diff --git a/configs/fastfcn/fastfcn_r50-d32_jpu_psp_4x4_512x1024_80k_cityscapes.py b/configs/fastfcn/fastfcn_r50-d32_jpu_psp_4x4_512x1024_80k_cityscapes.py index 78be5f451..55833d11b 100644 --- a/configs/fastfcn/fastfcn_r50-d32_jpu_psp_4x4_512x1024_80k_cityscapes.py +++ b/configs/fastfcn/fastfcn_r50-d32_jpu_psp_4x4_512x1024_80k_cityscapes.py @@ -3,6 +3,9 @@ _base_ = [ '../_base_/datasets/cityscapes.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' ] +crop_size = (512, 1024) +preprocess_cfg = dict(size=crop_size) +model = dict(preprocess_cfg=preprocess_cfg) train_dataloader = dict(batch_size=4, num_workers=4) val_dataloader = dict(batch_size=4, num_workers=4) test_dataloader = val_dataloader diff --git a/configs/fastfcn/fastfcn_r50-d32_jpu_psp_512x1024_80k_cityscapes.py b/configs/fastfcn/fastfcn_r50-d32_jpu_psp_512x1024_80k_cityscapes.py index e7637fabe..7afc2d7b5 100644 --- a/configs/fastfcn/fastfcn_r50-d32_jpu_psp_512x1024_80k_cityscapes.py +++ b/configs/fastfcn/fastfcn_r50-d32_jpu_psp_512x1024_80k_cityscapes.py @@ -3,3 +3,6 @@ _base_ = [ '../_base_/datasets/cityscapes.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' ] +crop_size = (512, 1024) +preprocess_cfg = dict(size=crop_size) +model = dict(preprocess_cfg=preprocess_cfg) diff --git a/configs/fastfcn/fastfcn_r50-d32_jpu_psp_512x512_160k_ade20k.py b/configs/fastfcn/fastfcn_r50-d32_jpu_psp_512x512_160k_ade20k.py index e267ac6b4..942c67a64 100644 --- a/configs/fastfcn/fastfcn_r50-d32_jpu_psp_512x512_160k_ade20k.py +++ b/configs/fastfcn/fastfcn_r50-d32_jpu_psp_512x512_160k_ade20k.py @@ -3,5 +3,9 @@ _base_ = [ '../_base_/datasets/ade20k.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' ] +crop_size = (512, 512) +preprocess_cfg = dict(size=crop_size) model = dict( - decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150)) + preprocess_cfg=preprocess_cfg, + decode_head=dict(num_classes=150), + auxiliary_head=dict(num_classes=150)) diff --git a/configs/fastfcn/fastfcn_r50-d32_jpu_psp_512x512_80k_ade20k.py b/configs/fastfcn/fastfcn_r50-d32_jpu_psp_512x512_80k_ade20k.py index 22e0447be..7081c2760 100644 --- a/configs/fastfcn/fastfcn_r50-d32_jpu_psp_512x512_80k_ade20k.py +++ b/configs/fastfcn/fastfcn_r50-d32_jpu_psp_512x512_80k_ade20k.py @@ -3,5 +3,9 @@ _base_ = [ '../_base_/datasets/ade20k.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' ] +crop_size = (512, 512) +preprocess_cfg = dict(size=crop_size) model = dict( - decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150)) + preprocess_cfg=preprocess_cfg, + decode_head=dict(num_classes=150), + auxiliary_head=dict(num_classes=150)) diff --git a/configs/fastscnn/fast_scnn_lr0.12_8x4_160k_cityscapes.py b/configs/fastscnn/fast_scnn_lr0.12_8x4_160k_cityscapes.py index 19758f36f..3b03dc5b4 100644 --- a/configs/fastscnn/fast_scnn_lr0.12_8x4_160k_cityscapes.py +++ b/configs/fastscnn/fast_scnn_lr0.12_8x4_160k_cityscapes.py @@ -2,7 +2,9 @@ _base_ = [ '../_base_/models/fast_scnn.py', '../_base_/datasets/cityscapes.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' ] - +crop_size = (512, 1024) +preprocess_cfg = dict(size=crop_size) +model = dict(preprocess_cfg=preprocess_cfg) # Re-config the data sampler. train_dataloader = dict(batch_size=4, num_workers=4) val_dataloader = dict(batch_size=4, num_workers=4) diff --git a/configs/fcn/fcn_d6_r50-d16_512x1024_40k_cityscapes.py b/configs/fcn/fcn_d6_r50-d16_512x1024_40k_cityscapes.py index f30646ede..65bf00e08 100644 --- a/configs/fcn/fcn_d6_r50-d16_512x1024_40k_cityscapes.py +++ b/configs/fcn/fcn_d6_r50-d16_512x1024_40k_cityscapes.py @@ -2,7 +2,10 @@ _base_ = [ '../_base_/models/fcn_r50-d8.py', '../_base_/datasets/cityscapes.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' ] +crop_size = (512, 1024) +preprocess_cfg = dict(size=crop_size) model = dict( + preprocess_cfg=preprocess_cfg, backbone=dict(dilations=(1, 1, 1, 2), strides=(1, 2, 2, 1)), decode_head=dict(dilation=6), auxiliary_head=dict(dilation=6)) diff --git a/configs/fcn/fcn_d6_r50-d16_512x1024_80k_cityscapes.py b/configs/fcn/fcn_d6_r50-d16_512x1024_80k_cityscapes.py index e4b623aca..69430f722 100644 --- a/configs/fcn/fcn_d6_r50-d16_512x1024_80k_cityscapes.py +++ b/configs/fcn/fcn_d6_r50-d16_512x1024_80k_cityscapes.py @@ -2,7 +2,10 @@ _base_ = [ '../_base_/models/fcn_r50-d8.py', '../_base_/datasets/cityscapes.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' ] +crop_size = (512, 1024) +preprocess_cfg = dict(size=crop_size) model = dict( + preprocess_cfg=preprocess_cfg, backbone=dict(dilations=(1, 1, 1, 2), strides=(1, 2, 2, 1)), decode_head=dict(dilation=6), auxiliary_head=dict(dilation=6)) diff --git a/configs/fcn/fcn_d6_r50-d16_769x769_40k_cityscapes.py b/configs/fcn/fcn_d6_r50-d16_769x769_40k_cityscapes.py index 01d8f27c8..0c49272b2 100644 --- a/configs/fcn/fcn_d6_r50-d16_769x769_40k_cityscapes.py +++ b/configs/fcn/fcn_d6_r50-d16_769x769_40k_cityscapes.py @@ -3,7 +3,10 @@ _base_ = [ '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' ] +crop_size = (769, 769) +preprocess_cfg = dict(size=crop_size) model = dict( + preprocess_cfg=preprocess_cfg, backbone=dict(dilations=(1, 1, 1, 2), strides=(1, 2, 2, 1)), decode_head=dict(align_corners=True, dilation=6), auxiliary_head=dict(align_corners=True, dilation=6), diff --git a/configs/fcn/fcn_d6_r50-d16_769x769_80k_cityscapes.py b/configs/fcn/fcn_d6_r50-d16_769x769_80k_cityscapes.py index c5ef3b880..b46afcacc 100644 --- a/configs/fcn/fcn_d6_r50-d16_769x769_80k_cityscapes.py +++ b/configs/fcn/fcn_d6_r50-d16_769x769_80k_cityscapes.py @@ -3,7 +3,10 @@ _base_ = [ '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' ] +crop_size = (769, 769) +preprocess_cfg = dict(size=crop_size) model = dict( + preprocess_cfg=preprocess_cfg, backbone=dict(dilations=(1, 1, 1, 2), strides=(1, 2, 2, 1)), decode_head=dict(align_corners=True, dilation=6), auxiliary_head=dict(align_corners=True, dilation=6), diff --git a/configs/fcn/fcn_r50-d8_480x480_40k_pascal_context.py b/configs/fcn/fcn_r50-d8_480x480_40k_pascal_context.py index 7c57a6f8f..4251c28fe 100644 --- a/configs/fcn/fcn_r50-d8_480x480_40k_pascal_context.py +++ b/configs/fcn/fcn_r50-d8_480x480_40k_pascal_context.py @@ -2,7 +2,10 @@ _base_ = [ '../_base_/models/fcn_r50-d8.py', '../_base_/datasets/pascal_context.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' ] +crop_size = (480, 480) +preprocess_cfg = dict(size=crop_size) model = dict( + preprocess_cfg=preprocess_cfg, decode_head=dict(num_classes=60), auxiliary_head=dict(num_classes=60), test_cfg=dict(mode='slide', crop_size=(480, 480), stride=(320, 320))) diff --git a/configs/fcn/fcn_r50-d8_480x480_40k_pascal_context_59.py b/configs/fcn/fcn_r50-d8_480x480_40k_pascal_context_59.py index 4a8180038..2b62a588d 100644 --- a/configs/fcn/fcn_r50-d8_480x480_40k_pascal_context_59.py +++ b/configs/fcn/fcn_r50-d8_480x480_40k_pascal_context_59.py @@ -3,7 +3,10 @@ _base_ = [ '../_base_/datasets/pascal_context_59.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' ] +crop_size = (480, 480) +preprocess_cfg = dict(size=crop_size) model = dict( + preprocess_cfg=preprocess_cfg, decode_head=dict(num_classes=59), auxiliary_head=dict(num_classes=59), test_cfg=dict(mode='slide', crop_size=(480, 480), stride=(320, 320))) diff --git a/configs/fcn/fcn_r50-d8_480x480_80k_pascal_context.py b/configs/fcn/fcn_r50-d8_480x480_80k_pascal_context.py index df6d25b6a..d532a0c44 100644 --- a/configs/fcn/fcn_r50-d8_480x480_80k_pascal_context.py +++ b/configs/fcn/fcn_r50-d8_480x480_80k_pascal_context.py @@ -2,7 +2,10 @@ _base_ = [ '../_base_/models/fcn_r50-d8.py', '../_base_/datasets/pascal_context.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' ] +crop_size = (480, 480) +preprocess_cfg = dict(size=crop_size) model = dict( + preprocess_cfg=preprocess_cfg, decode_head=dict(num_classes=60), auxiliary_head=dict(num_classes=60), test_cfg=dict(mode='slide', crop_size=(480, 480), stride=(320, 320))) diff --git a/configs/fcn/fcn_r50-d8_480x480_80k_pascal_context_59.py b/configs/fcn/fcn_r50-d8_480x480_80k_pascal_context_59.py index 02507ccb7..49a6d3ab9 100644 --- a/configs/fcn/fcn_r50-d8_480x480_80k_pascal_context_59.py +++ b/configs/fcn/fcn_r50-d8_480x480_80k_pascal_context_59.py @@ -3,7 +3,10 @@ _base_ = [ '../_base_/datasets/pascal_context_59.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' ] +crop_size = (480, 480) +preprocess_cfg = dict(size=crop_size) model = dict( + preprocess_cfg=preprocess_cfg, decode_head=dict(num_classes=59), auxiliary_head=dict(num_classes=59), test_cfg=dict(mode='slide', crop_size=(480, 480), stride=(320, 320))) diff --git a/configs/fcn/fcn_r50-d8_512x1024_40k_cityscapes.py b/configs/fcn/fcn_r50-d8_512x1024_40k_cityscapes.py index 401c6ea73..3bb85ecf5 100644 --- a/configs/fcn/fcn_r50-d8_512x1024_40k_cityscapes.py +++ b/configs/fcn/fcn_r50-d8_512x1024_40k_cityscapes.py @@ -2,3 +2,6 @@ _base_ = [ '../_base_/models/fcn_r50-d8.py', '../_base_/datasets/cityscapes.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' ] +crop_size = (512, 1024) +preprocess_cfg = dict(size=crop_size) +model = dict(preprocess_cfg=preprocess_cfg) diff --git a/configs/fcn/fcn_r50-d8_512x1024_80k_cityscapes.py b/configs/fcn/fcn_r50-d8_512x1024_80k_cityscapes.py index 990a085ed..7072dd6f0 100644 --- a/configs/fcn/fcn_r50-d8_512x1024_80k_cityscapes.py +++ b/configs/fcn/fcn_r50-d8_512x1024_80k_cityscapes.py @@ -2,3 +2,6 @@ _base_ = [ '../_base_/models/fcn_r50-d8.py', '../_base_/datasets/cityscapes.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' ] +crop_size = (512, 1024) +preprocess_cfg = dict(size=crop_size) +model = dict(preprocess_cfg=preprocess_cfg) diff --git a/configs/fcn/fcn_r50-d8_512x512_160k_ade20k.py b/configs/fcn/fcn_r50-d8_512x512_160k_ade20k.py index 9ca7fd23c..f62f2fcbb 100644 --- a/configs/fcn/fcn_r50-d8_512x512_160k_ade20k.py +++ b/configs/fcn/fcn_r50-d8_512x512_160k_ade20k.py @@ -2,5 +2,9 @@ _base_ = [ '../_base_/models/fcn_r50-d8.py', '../_base_/datasets/ade20k.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' ] +crop_size = (512, 512) +preprocess_cfg = dict(size=crop_size) model = dict( - decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150)) + preprocess_cfg=preprocess_cfg, + decode_head=dict(num_classes=150), + auxiliary_head=dict(num_classes=150)) diff --git a/configs/fcn/fcn_r50-d8_512x512_20k_voc12aug.py b/configs/fcn/fcn_r50-d8_512x512_20k_voc12aug.py index 17206a517..05fc3eaea 100644 --- a/configs/fcn/fcn_r50-d8_512x512_20k_voc12aug.py +++ b/configs/fcn/fcn_r50-d8_512x512_20k_voc12aug.py @@ -2,5 +2,9 @@ _base_ = [ '../_base_/models/fcn_r50-d8.py', '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_20k.py' ] +crop_size = (512, 512) +preprocess_cfg = dict(size=crop_size) model = dict( - decode_head=dict(num_classes=21), auxiliary_head=dict(num_classes=21)) + preprocess_cfg=preprocess_cfg, + decode_head=dict(num_classes=21), + auxiliary_head=dict(num_classes=21)) diff --git a/configs/fcn/fcn_r50-d8_512x512_40k_voc12aug.py b/configs/fcn/fcn_r50-d8_512x512_40k_voc12aug.py index 8cec429c3..61321df31 100644 --- a/configs/fcn/fcn_r50-d8_512x512_40k_voc12aug.py +++ b/configs/fcn/fcn_r50-d8_512x512_40k_voc12aug.py @@ -2,5 +2,9 @@ _base_ = [ '../_base_/models/fcn_r50-d8.py', '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' ] +crop_size = (512, 512) +preprocess_cfg = dict(size=crop_size) model = dict( - decode_head=dict(num_classes=21), auxiliary_head=dict(num_classes=21)) + preprocess_cfg=preprocess_cfg, + decode_head=dict(num_classes=21), + auxiliary_head=dict(num_classes=21)) diff --git a/configs/fcn/fcn_r50-d8_512x512_80k_ade20k.py b/configs/fcn/fcn_r50-d8_512x512_80k_ade20k.py index ef194cb59..6d97cb47e 100644 --- a/configs/fcn/fcn_r50-d8_512x512_80k_ade20k.py +++ b/configs/fcn/fcn_r50-d8_512x512_80k_ade20k.py @@ -2,5 +2,9 @@ _base_ = [ '../_base_/models/fcn_r50-d8.py', '../_base_/datasets/ade20k.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' ] +crop_size = (512, 512) +preprocess_cfg = dict(size=crop_size) model = dict( - decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150)) + preprocess_cfg=preprocess_cfg, + decode_head=dict(num_classes=150), + auxiliary_head=dict(num_classes=150)) diff --git a/configs/fcn/fcn_r50-d8_769x769_40k_cityscapes.py b/configs/fcn/fcn_r50-d8_769x769_40k_cityscapes.py index fca98c1d9..561129b69 100644 --- a/configs/fcn/fcn_r50-d8_769x769_40k_cityscapes.py +++ b/configs/fcn/fcn_r50-d8_769x769_40k_cityscapes.py @@ -3,7 +3,10 @@ _base_ = [ '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' ] +crop_size = (769, 769) +preprocess_cfg = dict(size=crop_size) model = dict( + preprocess_cfg=preprocess_cfg, decode_head=dict(align_corners=True), auxiliary_head=dict(align_corners=True), test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513))) diff --git a/configs/fcn/fcn_r50-d8_769x769_80k_cityscapes.py b/configs/fcn/fcn_r50-d8_769x769_80k_cityscapes.py index 7d75cd9f4..2b178f0f5 100644 --- a/configs/fcn/fcn_r50-d8_769x769_80k_cityscapes.py +++ b/configs/fcn/fcn_r50-d8_769x769_80k_cityscapes.py @@ -3,7 +3,10 @@ _base_ = [ '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' ] +crop_size = (769, 769) +preprocess_cfg = dict(size=crop_size) model = dict( + preprocess_cfg=preprocess_cfg, decode_head=dict(align_corners=True), auxiliary_head=dict(align_corners=True), test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513))) diff --git a/configs/gcnet/gcnet_r50-d8_512x1024_40k_cityscapes.py b/configs/gcnet/gcnet_r50-d8_512x1024_40k_cityscapes.py index 610467c07..0ca8885a2 100644 --- a/configs/gcnet/gcnet_r50-d8_512x1024_40k_cityscapes.py +++ b/configs/gcnet/gcnet_r50-d8_512x1024_40k_cityscapes.py @@ -2,3 +2,6 @@ _base_ = [ '../_base_/models/gcnet_r50-d8.py', '../_base_/datasets/cityscapes.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' ] +crop_size = (512, 1024) +preprocess_cfg = dict(size=crop_size) +model = dict(preprocess_cfg=preprocess_cfg) diff --git a/configs/gcnet/gcnet_r50-d8_512x1024_80k_cityscapes.py b/configs/gcnet/gcnet_r50-d8_512x1024_80k_cityscapes.py index 155e28f42..6a9c503d7 100644 --- a/configs/gcnet/gcnet_r50-d8_512x1024_80k_cityscapes.py +++ b/configs/gcnet/gcnet_r50-d8_512x1024_80k_cityscapes.py @@ -2,3 +2,6 @@ _base_ = [ '../_base_/models/gcnet_r50-d8.py', '../_base_/datasets/cityscapes.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' ] +crop_size = (512, 1024) +preprocess_cfg = dict(size=crop_size) +model = dict(preprocess_cfg=preprocess_cfg) diff --git a/configs/gcnet/gcnet_r50-d8_512x512_160k_ade20k.py b/configs/gcnet/gcnet_r50-d8_512x512_160k_ade20k.py index 1549a4d5b..bb19a1d5a 100644 --- a/configs/gcnet/gcnet_r50-d8_512x512_160k_ade20k.py +++ b/configs/gcnet/gcnet_r50-d8_512x512_160k_ade20k.py @@ -2,5 +2,9 @@ _base_ = [ '../_base_/models/gcnet_r50-d8.py', '../_base_/datasets/ade20k.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' ] +crop_size = (512, 512) +preprocess_cfg = dict(size=crop_size) model = dict( - decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150)) + preprocess_cfg=preprocess_cfg, + decode_head=dict(num_classes=150), + auxiliary_head=dict(num_classes=150)) diff --git a/configs/gcnet/gcnet_r50-d8_512x512_20k_voc12aug.py b/configs/gcnet/gcnet_r50-d8_512x512_20k_voc12aug.py index a496204bd..9aeac2d54 100644 --- a/configs/gcnet/gcnet_r50-d8_512x512_20k_voc12aug.py +++ b/configs/gcnet/gcnet_r50-d8_512x512_20k_voc12aug.py @@ -3,5 +3,9 @@ _base_ = [ '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_20k.py' ] +crop_size = (512, 512) +preprocess_cfg = dict(size=crop_size) model = dict( - decode_head=dict(num_classes=21), auxiliary_head=dict(num_classes=21)) + preprocess_cfg=preprocess_cfg, + decode_head=dict(num_classes=21), + auxiliary_head=dict(num_classes=21)) diff --git a/configs/gcnet/gcnet_r50-d8_512x512_40k_voc12aug.py b/configs/gcnet/gcnet_r50-d8_512x512_40k_voc12aug.py index d85cf6550..4183902da 100644 --- a/configs/gcnet/gcnet_r50-d8_512x512_40k_voc12aug.py +++ b/configs/gcnet/gcnet_r50-d8_512x512_40k_voc12aug.py @@ -3,5 +3,9 @@ _base_ = [ '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' ] +crop_size = (512, 512) +preprocess_cfg = dict(size=crop_size) model = dict( - decode_head=dict(num_classes=21), auxiliary_head=dict(num_classes=21)) + preprocess_cfg=preprocess_cfg, + decode_head=dict(num_classes=21), + auxiliary_head=dict(num_classes=21)) diff --git a/configs/gcnet/gcnet_r50-d8_512x512_80k_ade20k.py b/configs/gcnet/gcnet_r50-d8_512x512_80k_ade20k.py index 89d5e1ae0..e9bb0d1ef 100644 --- a/configs/gcnet/gcnet_r50-d8_512x512_80k_ade20k.py +++ b/configs/gcnet/gcnet_r50-d8_512x512_80k_ade20k.py @@ -2,5 +2,9 @@ _base_ = [ '../_base_/models/gcnet_r50-d8.py', '../_base_/datasets/ade20k.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' ] +crop_size = (512, 512) +preprocess_cfg = dict(size=crop_size) model = dict( - decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150)) + preprocess_cfg=preprocess_cfg, + decode_head=dict(num_classes=150), + auxiliary_head=dict(num_classes=150)) diff --git a/configs/gcnet/gcnet_r50-d8_769x769_40k_cityscapes.py b/configs/gcnet/gcnet_r50-d8_769x769_40k_cityscapes.py index 332495d3d..d56ee1b71 100644 --- a/configs/gcnet/gcnet_r50-d8_769x769_40k_cityscapes.py +++ b/configs/gcnet/gcnet_r50-d8_769x769_40k_cityscapes.py @@ -3,7 +3,10 @@ _base_ = [ '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' ] +crop_size = (769, 769) +preprocess_cfg = dict(size=crop_size) model = dict( + preprocess_cfg=preprocess_cfg, decode_head=dict(align_corners=True), auxiliary_head=dict(align_corners=True), test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513))) diff --git a/configs/gcnet/gcnet_r50-d8_769x769_80k_cityscapes.py b/configs/gcnet/gcnet_r50-d8_769x769_80k_cityscapes.py index d6d9cb1c6..246ae7cd9 100644 --- a/configs/gcnet/gcnet_r50-d8_769x769_80k_cityscapes.py +++ b/configs/gcnet/gcnet_r50-d8_769x769_80k_cityscapes.py @@ -3,7 +3,10 @@ _base_ = [ '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' ] +crop_size = (769, 769) +preprocess_cfg = dict(size=crop_size) model = dict( + preprocess_cfg=preprocess_cfg, decode_head=dict(align_corners=True), auxiliary_head=dict(align_corners=True), test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513))) diff --git a/configs/hrnet/fcn_hr18_480x480_40k_pascal_context.py b/configs/hrnet/fcn_hr18_480x480_40k_pascal_context.py index 5ff05aa59..5af2bc256 100644 --- a/configs/hrnet/fcn_hr18_480x480_40k_pascal_context.py +++ b/configs/hrnet/fcn_hr18_480x480_40k_pascal_context.py @@ -2,7 +2,10 @@ _base_ = [ '../_base_/models/fcn_hr18.py', '../_base_/datasets/pascal_context.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' ] +crop_size = (480, 480) +preprocess_cfg = dict(size=crop_size) model = dict( + preprocess_cfg=preprocess_cfg, decode_head=dict(num_classes=60), test_cfg=dict(mode='slide', crop_size=(480, 480), stride=(320, 320))) optimizer = dict(type='SGD', lr=0.004, momentum=0.9, weight_decay=0.0001) diff --git a/configs/hrnet/fcn_hr18_480x480_40k_pascal_context_59.py b/configs/hrnet/fcn_hr18_480x480_40k_pascal_context_59.py index d2eecf016..c64da8560 100644 --- a/configs/hrnet/fcn_hr18_480x480_40k_pascal_context_59.py +++ b/configs/hrnet/fcn_hr18_480x480_40k_pascal_context_59.py @@ -2,7 +2,10 @@ _base_ = [ '../_base_/models/fcn_hr18.py', '../_base_/datasets/pascal_context_59.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' ] +crop_size = (480, 480) +preprocess_cfg = dict(size=crop_size) model = dict( + preprocess_cfg=preprocess_cfg, decode_head=dict(num_classes=59), test_cfg=dict(mode='slide', crop_size=(480, 480), stride=(320, 320))) optimizer = dict(type='SGD', lr=0.004, momentum=0.9, weight_decay=0.0001) diff --git a/configs/hrnet/fcn_hr18_480x480_80k_pascal_context.py b/configs/hrnet/fcn_hr18_480x480_80k_pascal_context.py index cf315a4f0..9174b1f1d 100644 --- a/configs/hrnet/fcn_hr18_480x480_80k_pascal_context.py +++ b/configs/hrnet/fcn_hr18_480x480_80k_pascal_context.py @@ -2,7 +2,10 @@ _base_ = [ '../_base_/models/fcn_hr18.py', '../_base_/datasets/pascal_context.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' ] +crop_size = (480, 480) +preprocess_cfg = dict(size=crop_size) model = dict( + preprocess_cfg=preprocess_cfg, decode_head=dict(num_classes=60), test_cfg=dict(mode='slide', crop_size=(480, 480), stride=(320, 320))) optimizer = dict(type='SGD', lr=0.004, momentum=0.9, weight_decay=0.0001) diff --git a/configs/hrnet/fcn_hr18_480x480_80k_pascal_context_59.py b/configs/hrnet/fcn_hr18_480x480_80k_pascal_context_59.py index 9cbf4100d..d29a1bb5f 100644 --- a/configs/hrnet/fcn_hr18_480x480_80k_pascal_context_59.py +++ b/configs/hrnet/fcn_hr18_480x480_80k_pascal_context_59.py @@ -2,7 +2,10 @@ _base_ = [ '../_base_/models/fcn_hr18.py', '../_base_/datasets/pascal_context_59.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' ] +crop_size = (480, 480) +preprocess_cfg = dict(size=crop_size) model = dict( + preprocess_cfg=preprocess_cfg, decode_head=dict(num_classes=59), test_cfg=dict(mode='slide', crop_size=(480, 480), stride=(320, 320))) optimizer = dict(type='SGD', lr=0.004, momentum=0.9, weight_decay=0.0001) diff --git a/configs/hrnet/fcn_hr18_4x4_512x512_80k_vaihingen.py b/configs/hrnet/fcn_hr18_4x4_512x512_80k_vaihingen.py index 3585a7cb3..bf5573e18 100644 --- a/configs/hrnet/fcn_hr18_4x4_512x512_80k_vaihingen.py +++ b/configs/hrnet/fcn_hr18_4x4_512x512_80k_vaihingen.py @@ -2,4 +2,6 @@ _base_ = [ '../_base_/models/fcn_hr18.py', '../_base_/datasets/vaihingen.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' ] -model = dict(decode_head=dict(num_classes=6)) +crop_size = (512, 512) +preprocess_cfg = dict(size=crop_size) +model = dict(preprocess_cfg=preprocess_cfg, decode_head=dict(num_classes=6)) diff --git a/configs/hrnet/fcn_hr18_4x4_896x896_80k_isaid.py b/configs/hrnet/fcn_hr18_4x4_896x896_80k_isaid.py index 62e6d6bf0..63bda7b03 100644 --- a/configs/hrnet/fcn_hr18_4x4_896x896_80k_isaid.py +++ b/configs/hrnet/fcn_hr18_4x4_896x896_80k_isaid.py @@ -2,4 +2,6 @@ _base_ = [ '../_base_/models/fcn_hr18.py', '../_base_/datasets/isaid.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' ] -model = dict(decode_head=dict(num_classes=16)) +crop_size = (896, 896) +preprocess_cfg = dict(size=crop_size) +model = dict(preprocess_cfg=preprocess_cfg, decode_head=dict(num_classes=16)) diff --git a/configs/hrnet/fcn_hr18_512x1024_160k_cityscapes.py b/configs/hrnet/fcn_hr18_512x1024_160k_cityscapes.py index 9f04e935c..ecd67e6c2 100644 --- a/configs/hrnet/fcn_hr18_512x1024_160k_cityscapes.py +++ b/configs/hrnet/fcn_hr18_512x1024_160k_cityscapes.py @@ -2,3 +2,6 @@ _base_ = [ '../_base_/models/fcn_hr18.py', '../_base_/datasets/cityscapes.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' ] +crop_size = (512, 1024) +preprocess_cfg = dict(size=crop_size) +model = dict(preprocess_cfg=preprocess_cfg) diff --git a/configs/hrnet/fcn_hr18_512x1024_40k_cityscapes.py b/configs/hrnet/fcn_hr18_512x1024_40k_cityscapes.py index 99760c36d..072922624 100644 --- a/configs/hrnet/fcn_hr18_512x1024_40k_cityscapes.py +++ b/configs/hrnet/fcn_hr18_512x1024_40k_cityscapes.py @@ -2,3 +2,6 @@ _base_ = [ '../_base_/models/fcn_hr18.py', '../_base_/datasets/cityscapes.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' ] +crop_size = (512, 1024) +preprocess_cfg = dict(size=crop_size) +model = dict(preprocess_cfg=preprocess_cfg) diff --git a/configs/hrnet/fcn_hr18_512x1024_80k_cityscapes.py b/configs/hrnet/fcn_hr18_512x1024_80k_cityscapes.py index a653dda19..25bcbd5d1 100644 --- a/configs/hrnet/fcn_hr18_512x1024_80k_cityscapes.py +++ b/configs/hrnet/fcn_hr18_512x1024_80k_cityscapes.py @@ -2,3 +2,6 @@ _base_ = [ '../_base_/models/fcn_hr18.py', '../_base_/datasets/cityscapes.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' ] +crop_size = (512, 1024) +preprocess_cfg = dict(size=crop_size) +model = dict(preprocess_cfg=preprocess_cfg) diff --git a/configs/hrnet/fcn_hr18_512x512_160k_ade20k.py b/configs/hrnet/fcn_hr18_512x512_160k_ade20k.py index 45ed99b68..e91bf8e8e 100644 --- a/configs/hrnet/fcn_hr18_512x512_160k_ade20k.py +++ b/configs/hrnet/fcn_hr18_512x512_160k_ade20k.py @@ -2,4 +2,6 @@ _base_ = [ '../_base_/models/fcn_hr18.py', '../_base_/datasets/ade20k.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' ] -model = dict(decode_head=dict(num_classes=150)) +crop_size = (512, 512) +preprocess_cfg = dict(size=crop_size) +model = dict(preprocess_cfg=preprocess_cfg, decode_head=dict(num_classes=150)) diff --git a/configs/hrnet/fcn_hr18_512x512_20k_voc12aug.py b/configs/hrnet/fcn_hr18_512x512_20k_voc12aug.py index f06448b16..aec8d1884 100644 --- a/configs/hrnet/fcn_hr18_512x512_20k_voc12aug.py +++ b/configs/hrnet/fcn_hr18_512x512_20k_voc12aug.py @@ -2,4 +2,6 @@ _base_ = [ '../_base_/models/fcn_hr18.py', '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_20k.py' ] -model = dict(decode_head=dict(num_classes=21)) +crop_size = (512, 512) +preprocess_cfg = dict(size=crop_size) +model = dict(preprocess_cfg=preprocess_cfg, decode_head=dict(num_classes=21)) diff --git a/configs/hrnet/fcn_hr18_512x512_40k_voc12aug.py b/configs/hrnet/fcn_hr18_512x512_40k_voc12aug.py index d74e95943..b128a13d1 100644 --- a/configs/hrnet/fcn_hr18_512x512_40k_voc12aug.py +++ b/configs/hrnet/fcn_hr18_512x512_40k_voc12aug.py @@ -2,4 +2,6 @@ _base_ = [ '../_base_/models/fcn_hr18.py', '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' ] -model = dict(decode_head=dict(num_classes=21)) +crop_size = (512, 512) +preprocess_cfg = dict(size=crop_size) +model = dict(preprocess_cfg=preprocess_cfg, decode_head=dict(num_classes=21)) diff --git a/configs/hrnet/fcn_hr18_512x512_80k_ade20k.py b/configs/hrnet/fcn_hr18_512x512_80k_ade20k.py index 52bc9f5e9..0f06280e4 100644 --- a/configs/hrnet/fcn_hr18_512x512_80k_ade20k.py +++ b/configs/hrnet/fcn_hr18_512x512_80k_ade20k.py @@ -2,4 +2,6 @@ _base_ = [ '../_base_/models/fcn_hr18.py', '../_base_/datasets/ade20k.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' ] -model = dict(decode_head=dict(num_classes=150)) +crop_size = (512, 512) +preprocess_cfg = dict(size=crop_size) +model = dict(preprocess_cfg=preprocess_cfg, decode_head=dict(num_classes=150)) diff --git a/configs/hrnet/fcn_hr18_512x512_80k_loveda.py b/configs/hrnet/fcn_hr18_512x512_80k_loveda.py index 3bc4d0a32..0976c2c6a 100644 --- a/configs/hrnet/fcn_hr18_512x512_80k_loveda.py +++ b/configs/hrnet/fcn_hr18_512x512_80k_loveda.py @@ -2,4 +2,6 @@ _base_ = [ '../_base_/models/fcn_hr18.py', '../_base_/datasets/loveda.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' ] -model = dict(decode_head=dict(num_classes=7)) +crop_size = (512, 512) +preprocess_cfg = dict(size=crop_size) +model = dict(preprocess_cfg=preprocess_cfg, decode_head=dict(num_classes=7)) diff --git a/configs/hrnet/fcn_hr18_512x512_80k_potsdam.py b/configs/hrnet/fcn_hr18_512x512_80k_potsdam.py index 043017f91..8bda44c5e 100644 --- a/configs/hrnet/fcn_hr18_512x512_80k_potsdam.py +++ b/configs/hrnet/fcn_hr18_512x512_80k_potsdam.py @@ -2,4 +2,6 @@ _base_ = [ '../_base_/models/fcn_hr18.py', '../_base_/datasets/potsdam.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' ] -model = dict(decode_head=dict(num_classes=6)) +crop_size = (512, 512) +preprocess_cfg = dict(size=crop_size) +model = dict(preprocess_cfg=preprocess_cfg, decode_head=dict(num_classes=6)) diff --git a/configs/icnet/icnet_r50-d8_832x832_160k_cityscapes.py b/configs/icnet/icnet_r50-d8_832x832_160k_cityscapes.py index 5b9fd9b09..77dcf9739 100644 --- a/configs/icnet/icnet_r50-d8_832x832_160k_cityscapes.py +++ b/configs/icnet/icnet_r50-d8_832x832_160k_cityscapes.py @@ -3,3 +3,6 @@ _base_ = [ '../_base_/datasets/cityscapes_832x832.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' ] +crop_size = (832, 832) +preprocess_cfg = dict(size=crop_size) +model = dict(preprocess_cfg=preprocess_cfg) diff --git a/configs/icnet/icnet_r50-d8_832x832_80k_cityscapes.py b/configs/icnet/icnet_r50-d8_832x832_80k_cityscapes.py index e0336c99d..498650cd6 100644 --- a/configs/icnet/icnet_r50-d8_832x832_80k_cityscapes.py +++ b/configs/icnet/icnet_r50-d8_832x832_80k_cityscapes.py @@ -3,3 +3,6 @@ _base_ = [ '../_base_/datasets/cityscapes_832x832.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' ] +crop_size = (832, 832) +preprocess_cfg = dict(size=crop_size) +model = dict(preprocess_cfg=preprocess_cfg) diff --git a/configs/isanet/isanet_r50-d8_512x1024_40k_cityscapes.py b/configs/isanet/isanet_r50-d8_512x1024_40k_cityscapes.py index f8675e9d6..46ba2406c 100644 --- a/configs/isanet/isanet_r50-d8_512x1024_40k_cityscapes.py +++ b/configs/isanet/isanet_r50-d8_512x1024_40k_cityscapes.py @@ -2,3 +2,6 @@ _base_ = [ '../_base_/models/isanet_r50-d8.py', '../_base_/datasets/cityscapes.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' ] +crop_size = (512, 1024) +preprocess_cfg = dict(size=crop_size) +model = dict(preprocess_cfg=preprocess_cfg) diff --git a/configs/isanet/isanet_r50-d8_512x1024_80k_cityscapes.py b/configs/isanet/isanet_r50-d8_512x1024_80k_cityscapes.py index 46119fbee..9a4a4bd8d 100644 --- a/configs/isanet/isanet_r50-d8_512x1024_80k_cityscapes.py +++ b/configs/isanet/isanet_r50-d8_512x1024_80k_cityscapes.py @@ -2,3 +2,6 @@ _base_ = [ '../_base_/models/isanet_r50-d8.py', '../_base_/datasets/cityscapes.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' ] +crop_size = (512, 1024) +preprocess_cfg = dict(size=crop_size) +model = dict(preprocess_cfg=preprocess_cfg) diff --git a/configs/isanet/isanet_r50-d8_512x512_160k_ade20k.py b/configs/isanet/isanet_r50-d8_512x512_160k_ade20k.py index 7d5c235ae..ec8217b87 100644 --- a/configs/isanet/isanet_r50-d8_512x512_160k_ade20k.py +++ b/configs/isanet/isanet_r50-d8_512x512_160k_ade20k.py @@ -2,5 +2,9 @@ _base_ = [ '../_base_/models/isanet_r50-d8.py', '../_base_/datasets/ade20k.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' ] +crop_size = (512, 512) +preprocess_cfg = dict(size=crop_size) model = dict( - decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150)) + preprocess_cfg=preprocess_cfg, + decode_head=dict(num_classes=150), + auxiliary_head=dict(num_classes=150)) diff --git a/configs/isanet/isanet_r50-d8_512x512_20k_voc12aug.py b/configs/isanet/isanet_r50-d8_512x512_20k_voc12aug.py index d8b60ba84..1ca637027 100644 --- a/configs/isanet/isanet_r50-d8_512x512_20k_voc12aug.py +++ b/configs/isanet/isanet_r50-d8_512x512_20k_voc12aug.py @@ -3,5 +3,9 @@ _base_ = [ '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_20k.py' ] +crop_size = (512, 512) +preprocess_cfg = dict(size=crop_size) model = dict( - decode_head=dict(num_classes=21), auxiliary_head=dict(num_classes=21)) + preprocess_cfg=preprocess_cfg, + decode_head=dict(num_classes=21), + auxiliary_head=dict(num_classes=21)) diff --git a/configs/isanet/isanet_r50-d8_512x512_40k_voc12aug.py b/configs/isanet/isanet_r50-d8_512x512_40k_voc12aug.py index 472989940..8a2eecdb6 100644 --- a/configs/isanet/isanet_r50-d8_512x512_40k_voc12aug.py +++ b/configs/isanet/isanet_r50-d8_512x512_40k_voc12aug.py @@ -3,5 +3,9 @@ _base_ = [ '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' ] +crop_size = (512, 512) +preprocess_cfg = dict(size=crop_size) model = dict( - decode_head=dict(num_classes=21), auxiliary_head=dict(num_classes=21)) + preprocess_cfg=preprocess_cfg, + decode_head=dict(num_classes=21), + auxiliary_head=dict(num_classes=21)) diff --git a/configs/isanet/isanet_r50-d8_512x512_80k_ade20k.py b/configs/isanet/isanet_r50-d8_512x512_80k_ade20k.py index e35480dad..8a5b97337 100644 --- a/configs/isanet/isanet_r50-d8_512x512_80k_ade20k.py +++ b/configs/isanet/isanet_r50-d8_512x512_80k_ade20k.py @@ -2,5 +2,9 @@ _base_ = [ '../_base_/models/isanet_r50-d8.py', '../_base_/datasets/ade20k.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' ] +crop_size = (512, 512) +preprocess_cfg = dict(size=crop_size) model = dict( - decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150)) + preprocess_cfg=preprocess_cfg, + decode_head=dict(num_classes=150), + auxiliary_head=dict(num_classes=150)) diff --git a/configs/isanet/isanet_r50-d8_769x769_40k_cityscapes.py b/configs/isanet/isanet_r50-d8_769x769_40k_cityscapes.py index 201a35873..382794434 100644 --- a/configs/isanet/isanet_r50-d8_769x769_40k_cityscapes.py +++ b/configs/isanet/isanet_r50-d8_769x769_40k_cityscapes.py @@ -3,7 +3,10 @@ _base_ = [ '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' ] +crop_size = (769, 769) +preprocess_cfg = dict(size=crop_size) model = dict( + preprocess_cfg=preprocess_cfg, decode_head=dict(align_corners=True), auxiliary_head=dict(align_corners=True), test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513))) diff --git a/configs/isanet/isanet_r50-d8_769x769_80k_cityscapes.py b/configs/isanet/isanet_r50-d8_769x769_80k_cityscapes.py index 560435045..46960934b 100644 --- a/configs/isanet/isanet_r50-d8_769x769_80k_cityscapes.py +++ b/configs/isanet/isanet_r50-d8_769x769_80k_cityscapes.py @@ -3,7 +3,10 @@ _base_ = [ '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' ] +crop_size = (769, 769) +preprocess_cfg = dict(size=crop_size) model = dict( + preprocess_cfg=preprocess_cfg, decode_head=dict(align_corners=True), auxiliary_head=dict(align_corners=True), test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513))) diff --git a/configs/knet/knet_s3_deeplabv3_r50-d8_8x2_512x512_adamw_80k_ade20k.py b/configs/knet/knet_s3_deeplabv3_r50-d8_8x2_512x512_adamw_80k_ade20k.py index 80ca2774a..3bd8a1873 100644 --- a/configs/knet/knet_s3_deeplabv3_r50-d8_8x2_512x512_adamw_80k_ade20k.py +++ b/configs/knet/knet_s3_deeplabv3_r50-d8_8x2_512x512_adamw_80k_ade20k.py @@ -2,13 +2,15 @@ _base_ = [ '../_base_/datasets/ade20k.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' ] - +crop_size = (512, 512) +preprocess_cfg = dict(size=crop_size) # model settings norm_cfg = dict(type='SyncBN', requires_grad=True) num_stages = 3 conv_kernel_size = 1 model = dict( type='EncoderDecoder', + preprocess_cfg=preprocess_cfg, pretrained='open-mmlab://resnet50_v1c', backbone=dict( type='ResNetV1c', diff --git a/configs/knet/knet_s3_fcn_r50-d8_8x2_512x512_adamw_80k_ade20k.py b/configs/knet/knet_s3_fcn_r50-d8_8x2_512x512_adamw_80k_ade20k.py index 8afb51d88..2e2b486a0 100644 --- a/configs/knet/knet_s3_fcn_r50-d8_8x2_512x512_adamw_80k_ade20k.py +++ b/configs/knet/knet_s3_fcn_r50-d8_8x2_512x512_adamw_80k_ade20k.py @@ -2,13 +2,15 @@ _base_ = [ '../_base_/datasets/ade20k.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' ] - +crop_size = (512, 512) +preprocess_cfg = dict(size=crop_size) # model settings norm_cfg = dict(type='SyncBN', requires_grad=True) num_stages = 3 conv_kernel_size = 1 model = dict( type='EncoderDecoder', + preprocess_cfg=preprocess_cfg, pretrained='open-mmlab://resnet50_v1c', backbone=dict( type='ResNetV1c', diff --git a/configs/knet/knet_s3_pspnet_r50-d8_8x2_512x512_adamw_80k_ade20k.py b/configs/knet/knet_s3_pspnet_r50-d8_8x2_512x512_adamw_80k_ade20k.py index cd39bd387..66e3400c7 100644 --- a/configs/knet/knet_s3_pspnet_r50-d8_8x2_512x512_adamw_80k_ade20k.py +++ b/configs/knet/knet_s3_pspnet_r50-d8_8x2_512x512_adamw_80k_ade20k.py @@ -2,13 +2,15 @@ _base_ = [ '../_base_/datasets/ade20k.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' ] - +crop_size = (512, 512) +preprocess_cfg = dict(size=crop_size) # model settings norm_cfg = dict(type='SyncBN', requires_grad=True) num_stages = 3 conv_kernel_size = 1 model = dict( type='EncoderDecoder', + preprocess_cfg=preprocess_cfg, pretrained='open-mmlab://resnet50_v1c', backbone=dict( type='ResNetV1c', diff --git a/configs/knet/knet_s3_upernet_r50-d8_8x2_512x512_adamw_80k_ade20k.py b/configs/knet/knet_s3_upernet_r50-d8_8x2_512x512_adamw_80k_ade20k.py index 9c5f84849..2450af1d6 100644 --- a/configs/knet/knet_s3_upernet_r50-d8_8x2_512x512_adamw_80k_ade20k.py +++ b/configs/knet/knet_s3_upernet_r50-d8_8x2_512x512_adamw_80k_ade20k.py @@ -2,7 +2,8 @@ _base_ = [ '../_base_/datasets/ade20k.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' ] - +crop_size = (512, 512) +preprocess_cfg = dict(size=crop_size) # model settings norm_cfg = dict(type='SyncBN', requires_grad=True) num_stages = 3 @@ -10,6 +11,7 @@ conv_kernel_size = 1 model = dict( type='EncoderDecoder', + preprocess_cfg=preprocess_cfg, pretrained='open-mmlab://resnet50_v1c', backbone=dict( type='ResNetV1c', diff --git a/configs/knet/knet_s3_upernet_swin-l_8x2_640x640_adamw_80k_ade20k.py b/configs/knet/knet_s3_upernet_swin-l_8x2_640x640_adamw_80k_ade20k.py index b4c982fac..751da435e 100644 --- a/configs/knet/knet_s3_upernet_swin-l_8x2_640x640_adamw_80k_ade20k.py +++ b/configs/knet/knet_s3_upernet_swin-l_8x2_640x640_adamw_80k_ade20k.py @@ -2,7 +2,10 @@ _base_ = 'knet_s3_upernet_swin-t_8x2_512x512_adamw_80k_ade20k.py' checkpoint_file = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/swin/swin_large_patch4_window7_224_22k_20220308-d5bdebaf.pth' # noqa # model settings +crop_size = (640, 640) +preprocess_cfg = dict(size=crop_size) model = dict( + preprocess_cfg=preprocess_cfg, pretrained=checkpoint_file, backbone=dict( embed_dims=192, @@ -16,8 +19,6 @@ model = dict( kernel_generate_head=dict(in_channels=[192, 384, 768, 1536])), auxiliary_head=dict(in_channels=768)) -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) crop_size = (640, 640) train_pipeline = [ dict(type='LoadImageFromFile'), @@ -26,7 +27,6 @@ train_pipeline = [ dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), dict(type='RandomFlip', prob=0.5), dict(type='PhotoMetricDistortion'), - dict(type='Pad', size=crop_size), dict(type='PackSegInputs') ] test_pipeline = [ diff --git a/configs/mae/upernet_mae-base_fp16_512x512_160k_ade20k_ms.py b/configs/mae/upernet_mae-base_fp16_512x512_160k_ade20k_ms.py index 4bfd7aa3e..30e26a050 100644 --- a/configs/mae/upernet_mae-base_fp16_512x512_160k_ade20k_ms.py +++ b/configs/mae/upernet_mae-base_fp16_512x512_160k_ade20k_ms.py @@ -1,8 +1,5 @@ _base_ = './upernet_mae-base_fp16_8x2_512x512_160k_ade20k.py' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) - test_pipeline = [ dict(type='LoadImageFromFile'), # TODO: Refactor 'MultiScaleFlipAug' which supports diff --git a/configs/mae/upernet_mae-base_fp16_8x2_512x512_160k_ade20k.py b/configs/mae/upernet_mae-base_fp16_8x2_512x512_160k_ade20k.py index 7ee3d69a0..c892882db 100644 --- a/configs/mae/upernet_mae-base_fp16_8x2_512x512_160k_ade20k.py +++ b/configs/mae/upernet_mae-base_fp16_8x2_512x512_160k_ade20k.py @@ -2,8 +2,10 @@ _base_ = [ '../_base_/models/upernet_mae.py', '../_base_/datasets/ade20k.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' ] - +crop_size = (512, 512) +preprocess_cfg = dict(size=crop_size) model = dict( + preprocess_cfg=preprocess_cfg, pretrained='./pretrain/mae_pretrain_vit_base_mmcls.pth', backbone=dict( type='MAE', diff --git a/configs/mobilenet_v3/lraspp_m-v3-d8_512x1024_320k_cityscapes.py b/configs/mobilenet_v3/lraspp_m-v3-d8_512x1024_320k_cityscapes.py index 3da5c24af..e869971d5 100644 --- a/configs/mobilenet_v3/lraspp_m-v3-d8_512x1024_320k_cityscapes.py +++ b/configs/mobilenet_v3/lraspp_m-v3-d8_512x1024_320k_cityscapes.py @@ -2,8 +2,11 @@ _base_ = [ '../_base_/models/lraspp_m-v3-d8.py', '../_base_/datasets/cityscapes.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' ] - -model = dict(pretrained='open-mmlab://contrib/mobilenet_v3_large') +crop_size = (512, 1024) +preprocess_cfg = dict(size=crop_size) +model = dict( + preprocess_cfg=preprocess_cfg, + pretrained='open-mmlab://contrib/mobilenet_v3_large') # Re-config the data sampler. train_dataloader = dict(batch_size=4, num_workers=4) diff --git a/configs/mobilenet_v3/lraspp_m-v3-d8_scratch_512x1024_320k_cityscapes.py b/configs/mobilenet_v3/lraspp_m-v3-d8_scratch_512x1024_320k_cityscapes.py index 0a9ce41c7..239f3c8a6 100644 --- a/configs/mobilenet_v3/lraspp_m-v3-d8_scratch_512x1024_320k_cityscapes.py +++ b/configs/mobilenet_v3/lraspp_m-v3-d8_scratch_512x1024_320k_cityscapes.py @@ -2,8 +2,10 @@ _base_ = [ '../_base_/models/lraspp_m-v3-d8.py', '../_base_/datasets/cityscapes.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' ] - +crop_size = (512, 1024) +preprocess_cfg = dict(size=crop_size) # Re-config the data sampler. +model = dict(preprocess_cfg=preprocess_cfg) train_dataloader = dict(batch_size=4, num_workers=4) val_dataloader = dict(batch_size=4, num_workers=4) test_dataloader = val_dataloader diff --git a/configs/nonlocal_net/nonlocal_r50-d8_512x1024_40k_cityscapes.py b/configs/nonlocal_net/nonlocal_r50-d8_512x1024_40k_cityscapes.py index 9d4dc7390..fea176492 100644 --- a/configs/nonlocal_net/nonlocal_r50-d8_512x1024_40k_cityscapes.py +++ b/configs/nonlocal_net/nonlocal_r50-d8_512x1024_40k_cityscapes.py @@ -2,3 +2,6 @@ _base_ = [ '../_base_/models/nonlocal_r50-d8.py', '../_base_/datasets/cityscapes.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' ] +crop_size = (512, 1024) +preprocess_cfg = dict(size=crop_size) +model = dict(preprocess_cfg=preprocess_cfg) diff --git a/configs/nonlocal_net/nonlocal_r50-d8_512x1024_80k_cityscapes.py b/configs/nonlocal_net/nonlocal_r50-d8_512x1024_80k_cityscapes.py index b0672b687..313168475 100644 --- a/configs/nonlocal_net/nonlocal_r50-d8_512x1024_80k_cityscapes.py +++ b/configs/nonlocal_net/nonlocal_r50-d8_512x1024_80k_cityscapes.py @@ -2,3 +2,6 @@ _base_ = [ '../_base_/models/nonlocal_r50-d8.py', '../_base_/datasets/cityscapes.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' ] +crop_size = (512, 1024) +preprocess_cfg = dict(size=crop_size) +model = dict(preprocess_cfg=preprocess_cfg) diff --git a/configs/nonlocal_net/nonlocal_r50-d8_512x512_160k_ade20k.py b/configs/nonlocal_net/nonlocal_r50-d8_512x512_160k_ade20k.py index b1adfbab8..b763e7402 100644 --- a/configs/nonlocal_net/nonlocal_r50-d8_512x512_160k_ade20k.py +++ b/configs/nonlocal_net/nonlocal_r50-d8_512x512_160k_ade20k.py @@ -2,5 +2,9 @@ _base_ = [ '../_base_/models/nonlocal_r50-d8.py', '../_base_/datasets/ade20k.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' ] +crop_size = (512, 512) +preprocess_cfg = dict(size=crop_size) model = dict( - decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150)) + preprocess_cfg=preprocess_cfg, + decode_head=dict(num_classes=150), + auxiliary_head=dict(num_classes=150)) diff --git a/configs/nonlocal_net/nonlocal_r50-d8_512x512_20k_voc12aug.py b/configs/nonlocal_net/nonlocal_r50-d8_512x512_20k_voc12aug.py index 2e808d807..1f81e1968 100644 --- a/configs/nonlocal_net/nonlocal_r50-d8_512x512_20k_voc12aug.py +++ b/configs/nonlocal_net/nonlocal_r50-d8_512x512_20k_voc12aug.py @@ -3,5 +3,9 @@ _base_ = [ '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_20k.py' ] +crop_size = (512, 512) +preprocess_cfg = dict(size=crop_size) model = dict( - decode_head=dict(num_classes=21), auxiliary_head=dict(num_classes=21)) + preprocess_cfg=preprocess_cfg, + decode_head=dict(num_classes=21), + auxiliary_head=dict(num_classes=21)) diff --git a/configs/nonlocal_net/nonlocal_r50-d8_512x512_40k_voc12aug.py b/configs/nonlocal_net/nonlocal_r50-d8_512x512_40k_voc12aug.py index 66b443abe..5f5c8dbf4 100644 --- a/configs/nonlocal_net/nonlocal_r50-d8_512x512_40k_voc12aug.py +++ b/configs/nonlocal_net/nonlocal_r50-d8_512x512_40k_voc12aug.py @@ -3,5 +3,9 @@ _base_ = [ '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' ] +crop_size = (512, 512) +preprocess_cfg = dict(size=crop_size) model = dict( - decode_head=dict(num_classes=21), auxiliary_head=dict(num_classes=21)) + preprocess_cfg=preprocess_cfg, + decode_head=dict(num_classes=21), + auxiliary_head=dict(num_classes=21)) diff --git a/configs/nonlocal_net/nonlocal_r50-d8_512x512_80k_ade20k.py b/configs/nonlocal_net/nonlocal_r50-d8_512x512_80k_ade20k.py index 8a7a2f509..d1db1a48f 100644 --- a/configs/nonlocal_net/nonlocal_r50-d8_512x512_80k_ade20k.py +++ b/configs/nonlocal_net/nonlocal_r50-d8_512x512_80k_ade20k.py @@ -2,5 +2,9 @@ _base_ = [ '../_base_/models/nonlocal_r50-d8.py', '../_base_/datasets/ade20k.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' ] +crop_size = (512, 512) +preprocess_cfg = dict(size=crop_size) model = dict( - decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150)) + preprocess_cfg=preprocess_cfg, + decode_head=dict(num_classes=150), + auxiliary_head=dict(num_classes=150)) diff --git a/configs/nonlocal_net/nonlocal_r50-d8_769x769_40k_cityscapes.py b/configs/nonlocal_net/nonlocal_r50-d8_769x769_40k_cityscapes.py index 75adef324..d5ce3f8b2 100644 --- a/configs/nonlocal_net/nonlocal_r50-d8_769x769_40k_cityscapes.py +++ b/configs/nonlocal_net/nonlocal_r50-d8_769x769_40k_cityscapes.py @@ -3,7 +3,10 @@ _base_ = [ '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' ] +crop_size = (769, 769) +preprocess_cfg = dict(size=crop_size) model = dict( + preprocess_cfg=preprocess_cfg, decode_head=dict(align_corners=True), auxiliary_head=dict(align_corners=True), test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513))) diff --git a/configs/nonlocal_net/nonlocal_r50-d8_769x769_80k_cityscapes.py b/configs/nonlocal_net/nonlocal_r50-d8_769x769_80k_cityscapes.py index a0726c293..145fd45ab 100644 --- a/configs/nonlocal_net/nonlocal_r50-d8_769x769_80k_cityscapes.py +++ b/configs/nonlocal_net/nonlocal_r50-d8_769x769_80k_cityscapes.py @@ -3,7 +3,10 @@ _base_ = [ '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' ] +crop_size = (769, 769) +preprocess_cfg = dict(size=crop_size) model = dict( + preprocess_cfg=preprocess_cfg, decode_head=dict(align_corners=True), auxiliary_head=dict(align_corners=True), test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513))) diff --git a/configs/ocrnet/ocrnet_hr18_512x1024_160k_cityscapes.py b/configs/ocrnet/ocrnet_hr18_512x1024_160k_cityscapes.py index 1c86eba17..28de85808 100644 --- a/configs/ocrnet/ocrnet_hr18_512x1024_160k_cityscapes.py +++ b/configs/ocrnet/ocrnet_hr18_512x1024_160k_cityscapes.py @@ -2,3 +2,6 @@ _base_ = [ '../_base_/models/ocrnet_hr18.py', '../_base_/datasets/cityscapes.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' ] +crop_size = (512, 1024) +preprocess_cfg = dict(size=crop_size) +model = dict(preprocess_cfg=preprocess_cfg) diff --git a/configs/ocrnet/ocrnet_hr18_512x1024_40k_cityscapes.py b/configs/ocrnet/ocrnet_hr18_512x1024_40k_cityscapes.py index 2c73b3839..d48f6f8d1 100644 --- a/configs/ocrnet/ocrnet_hr18_512x1024_40k_cityscapes.py +++ b/configs/ocrnet/ocrnet_hr18_512x1024_40k_cityscapes.py @@ -2,3 +2,6 @@ _base_ = [ '../_base_/models/ocrnet_hr18.py', '../_base_/datasets/cityscapes.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' ] +crop_size = (512, 1024) +preprocess_cfg = dict(size=crop_size) +model = dict(preprocess_cfg=preprocess_cfg) diff --git a/configs/ocrnet/ocrnet_hr18_512x1024_80k_cityscapes.py b/configs/ocrnet/ocrnet_hr18_512x1024_80k_cityscapes.py index 506ad9319..b6c5341a5 100644 --- a/configs/ocrnet/ocrnet_hr18_512x1024_80k_cityscapes.py +++ b/configs/ocrnet/ocrnet_hr18_512x1024_80k_cityscapes.py @@ -2,3 +2,6 @@ _base_ = [ '../_base_/models/ocrnet_hr18.py', '../_base_/datasets/cityscapes.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' ] +crop_size = (512, 1024) +preprocess_cfg = dict(size=crop_size) +model = dict(preprocess_cfg=preprocess_cfg) diff --git a/configs/ocrnet/ocrnet_hr18_512x512_160k_ade20k.py b/configs/ocrnet/ocrnet_hr18_512x512_160k_ade20k.py index a3c86e18e..cdb2128aa 100644 --- a/configs/ocrnet/ocrnet_hr18_512x512_160k_ade20k.py +++ b/configs/ocrnet/ocrnet_hr18_512x512_160k_ade20k.py @@ -2,34 +2,38 @@ _base_ = [ '../_base_/models/ocrnet_hr18.py', '../_base_/datasets/ade20k.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' ] +crop_size = (512, 512) +preprocess_cfg = dict(size=crop_size) norm_cfg = dict(type='SyncBN', requires_grad=True) -model = dict(decode_head=[ - dict( - type='FCNHead', - in_channels=[18, 36, 72, 144], - channels=sum([18, 36, 72, 144]), - in_index=(0, 1, 2, 3), - input_transform='resize_concat', - kernel_size=1, - num_convs=1, - concat_input=False, - dropout_ratio=-1, - num_classes=150, - norm_cfg=norm_cfg, - align_corners=False, - loss_decode=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), - dict( - type='OCRHead', - in_channels=[18, 36, 72, 144], - in_index=(0, 1, 2, 3), - input_transform='resize_concat', - channels=512, - ocr_channels=256, - dropout_ratio=-1, - num_classes=150, - norm_cfg=norm_cfg, - align_corners=False, - loss_decode=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), -]) +model = dict( + preprocess_cfg=preprocess_cfg, + decode_head=[ + dict( + type='FCNHead', + in_channels=[18, 36, 72, 144], + channels=sum([18, 36, 72, 144]), + in_index=(0, 1, 2, 3), + input_transform='resize_concat', + kernel_size=1, + num_convs=1, + concat_input=False, + dropout_ratio=-1, + num_classes=150, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + dict( + type='OCRHead', + in_channels=[18, 36, 72, 144], + in_index=(0, 1, 2, 3), + input_transform='resize_concat', + channels=512, + ocr_channels=256, + dropout_ratio=-1, + num_classes=150, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + ]) diff --git a/configs/ocrnet/ocrnet_hr18_512x512_20k_voc12aug.py b/configs/ocrnet/ocrnet_hr18_512x512_20k_voc12aug.py index ab9d6446c..8f557f6cd 100644 --- a/configs/ocrnet/ocrnet_hr18_512x512_20k_voc12aug.py +++ b/configs/ocrnet/ocrnet_hr18_512x512_20k_voc12aug.py @@ -3,34 +3,38 @@ _base_ = [ '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_20k.py' ] +crop_size = (512, 512) +preprocess_cfg = dict(size=crop_size) norm_cfg = dict(type='SyncBN', requires_grad=True) -model = dict(decode_head=[ - dict( - type='FCNHead', - in_channels=[18, 36, 72, 144], - channels=sum([18, 36, 72, 144]), - in_index=(0, 1, 2, 3), - input_transform='resize_concat', - kernel_size=1, - num_convs=1, - concat_input=False, - dropout_ratio=-1, - num_classes=21, - norm_cfg=norm_cfg, - align_corners=False, - loss_decode=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), - dict( - type='OCRHead', - in_channels=[18, 36, 72, 144], - in_index=(0, 1, 2, 3), - input_transform='resize_concat', - channels=512, - ocr_channels=256, - dropout_ratio=-1, - num_classes=21, - norm_cfg=norm_cfg, - align_corners=False, - loss_decode=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), -]) +model = dict( + preprocess_cfg=preprocess_cfg, + decode_head=[ + dict( + type='FCNHead', + in_channels=[18, 36, 72, 144], + channels=sum([18, 36, 72, 144]), + in_index=(0, 1, 2, 3), + input_transform='resize_concat', + kernel_size=1, + num_convs=1, + concat_input=False, + dropout_ratio=-1, + num_classes=21, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + dict( + type='OCRHead', + in_channels=[18, 36, 72, 144], + in_index=(0, 1, 2, 3), + input_transform='resize_concat', + channels=512, + ocr_channels=256, + dropout_ratio=-1, + num_classes=21, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + ]) diff --git a/configs/ocrnet/ocrnet_hr18_512x512_40k_voc12aug.py b/configs/ocrnet/ocrnet_hr18_512x512_40k_voc12aug.py index df79a9cf1..4e9fbcd19 100644 --- a/configs/ocrnet/ocrnet_hr18_512x512_40k_voc12aug.py +++ b/configs/ocrnet/ocrnet_hr18_512x512_40k_voc12aug.py @@ -3,34 +3,38 @@ _base_ = [ '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' ] +crop_size = (512, 512) +preprocess_cfg = dict(size=crop_size) norm_cfg = dict(type='SyncBN', requires_grad=True) -model = dict(decode_head=[ - dict( - type='FCNHead', - in_channels=[18, 36, 72, 144], - channels=sum([18, 36, 72, 144]), - in_index=(0, 1, 2, 3), - input_transform='resize_concat', - kernel_size=1, - num_convs=1, - concat_input=False, - dropout_ratio=-1, - num_classes=21, - norm_cfg=norm_cfg, - align_corners=False, - loss_decode=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), - dict( - type='OCRHead', - in_channels=[18, 36, 72, 144], - in_index=(0, 1, 2, 3), - input_transform='resize_concat', - channels=512, - ocr_channels=256, - dropout_ratio=-1, - num_classes=21, - norm_cfg=norm_cfg, - align_corners=False, - loss_decode=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), -]) +model = dict( + preprocess_cfg=preprocess_cfg, + decode_head=[ + dict( + type='FCNHead', + in_channels=[18, 36, 72, 144], + channels=sum([18, 36, 72, 144]), + in_index=(0, 1, 2, 3), + input_transform='resize_concat', + kernel_size=1, + num_convs=1, + concat_input=False, + dropout_ratio=-1, + num_classes=21, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + dict( + type='OCRHead', + in_channels=[18, 36, 72, 144], + in_index=(0, 1, 2, 3), + input_transform='resize_concat', + channels=512, + ocr_channels=256, + dropout_ratio=-1, + num_classes=21, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + ]) diff --git a/configs/ocrnet/ocrnet_hr18_512x512_80k_ade20k.py b/configs/ocrnet/ocrnet_hr18_512x512_80k_ade20k.py index 6ad67722a..79fe766c5 100644 --- a/configs/ocrnet/ocrnet_hr18_512x512_80k_ade20k.py +++ b/configs/ocrnet/ocrnet_hr18_512x512_80k_ade20k.py @@ -2,34 +2,38 @@ _base_ = [ '../_base_/models/ocrnet_hr18.py', '../_base_/datasets/ade20k.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' ] +crop_size = (512, 512) +preprocess_cfg = dict(size=crop_size) norm_cfg = dict(type='SyncBN', requires_grad=True) -model = dict(decode_head=[ - dict( - type='FCNHead', - in_channels=[18, 36, 72, 144], - channels=sum([18, 36, 72, 144]), - in_index=(0, 1, 2, 3), - input_transform='resize_concat', - kernel_size=1, - num_convs=1, - concat_input=False, - dropout_ratio=-1, - num_classes=150, - norm_cfg=norm_cfg, - align_corners=False, - loss_decode=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), - dict( - type='OCRHead', - in_channels=[18, 36, 72, 144], - in_index=(0, 1, 2, 3), - input_transform='resize_concat', - channels=512, - ocr_channels=256, - dropout_ratio=-1, - num_classes=150, - norm_cfg=norm_cfg, - align_corners=False, - loss_decode=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), -]) +model = dict( + preprocess_cfg=preprocess_cfg, + decode_head=[ + dict( + type='FCNHead', + in_channels=[18, 36, 72, 144], + channels=sum([18, 36, 72, 144]), + in_index=(0, 1, 2, 3), + input_transform='resize_concat', + kernel_size=1, + num_convs=1, + concat_input=False, + dropout_ratio=-1, + num_classes=150, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + dict( + type='OCRHead', + in_channels=[18, 36, 72, 144], + in_index=(0, 1, 2, 3), + input_transform='resize_concat', + channels=512, + ocr_channels=256, + dropout_ratio=-1, + num_classes=150, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + ]) diff --git a/configs/ocrnet/ocrnet_r101-d8_512x1024_40k_b16_cityscapes.py b/configs/ocrnet/ocrnet_r101-d8_512x1024_40k_b16_cityscapes.py index 20fd24e77..da19aa3df 100644 --- a/configs/ocrnet/ocrnet_r101-d8_512x1024_40k_b16_cityscapes.py +++ b/configs/ocrnet/ocrnet_r101-d8_512x1024_40k_b16_cityscapes.py @@ -2,7 +2,12 @@ _base_ = [ '../_base_/models/ocrnet_r50-d8.py', '../_base_/datasets/cityscapes.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' ] -model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) +crop_size = (512, 1024) +preprocess_cfg = dict(size=crop_size) +model = dict( + preprocess_cfg=preprocess_cfg, + pretrained='open-mmlab://resnet101_v1c', + backbone=dict(depth=101)) optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0005) optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer) param_scheduler = [ diff --git a/configs/ocrnet/ocrnet_r101-d8_512x1024_40k_b8_cityscapes.py b/configs/ocrnet/ocrnet_r101-d8_512x1024_40k_b8_cityscapes.py index e34f3432e..be32a9a9a 100644 --- a/configs/ocrnet/ocrnet_r101-d8_512x1024_40k_b8_cityscapes.py +++ b/configs/ocrnet/ocrnet_r101-d8_512x1024_40k_b8_cityscapes.py @@ -2,4 +2,9 @@ _base_ = [ '../_base_/models/ocrnet_r50-d8.py', '../_base_/datasets/cityscapes.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' ] -model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) +crop_size = (512, 1024) +preprocess_cfg = dict(size=crop_size) +model = dict( + preprocess_cfg=preprocess_cfg, + pretrained='open-mmlab://resnet101_v1c', + backbone=dict(depth=101)) diff --git a/configs/ocrnet/ocrnet_r101-d8_512x1024_80k_b16_cityscapes.py b/configs/ocrnet/ocrnet_r101-d8_512x1024_80k_b16_cityscapes.py index 29775b50f..faa4f77d7 100644 --- a/configs/ocrnet/ocrnet_r101-d8_512x1024_80k_b16_cityscapes.py +++ b/configs/ocrnet/ocrnet_r101-d8_512x1024_80k_b16_cityscapes.py @@ -2,7 +2,12 @@ _base_ = [ '../_base_/models/ocrnet_r50-d8.py', '../_base_/datasets/cityscapes.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' ] -model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) +crop_size = (512, 1024) +preprocess_cfg = dict(size=crop_size) +model = dict( + preprocess_cfg=preprocess_cfg, + pretrained='open-mmlab://resnet101_v1c', + backbone=dict(depth=101)) optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0005) optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer) param_scheduler = [ diff --git a/configs/point_rend/pointrend_r50_512x1024_80k_cityscapes.py b/configs/point_rend/pointrend_r50_512x1024_80k_cityscapes.py index ed56b952c..f6c19e2fc 100644 --- a/configs/point_rend/pointrend_r50_512x1024_80k_cityscapes.py +++ b/configs/point_rend/pointrend_r50_512x1024_80k_cityscapes.py @@ -2,6 +2,9 @@ _base_ = [ '../_base_/models/pointrend_r50.py', '../_base_/datasets/cityscapes.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' ] +crop_size = (512, 1024) +preprocess_cfg = dict(size=crop_size) +model = dict(preprocess_cfg=preprocess_cfg) param_scheduler = [ dict(type='LinearLR', by_epoch=False, start_factor=0.1, begin=0, end=200), dict( diff --git a/configs/point_rend/pointrend_r50_512x512_160k_ade20k.py b/configs/point_rend/pointrend_r50_512x512_160k_ade20k.py index 51e489cb2..964b42c0f 100644 --- a/configs/point_rend/pointrend_r50_512x512_160k_ade20k.py +++ b/configs/point_rend/pointrend_r50_512x512_160k_ade20k.py @@ -2,33 +2,37 @@ _base_ = [ '../_base_/models/pointrend_r50.py', '../_base_/datasets/ade20k.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' ] +crop_size = (512, 512) +preprocess_cfg = dict(size=crop_size) norm_cfg = dict(type='SyncBN', requires_grad=True) -model = dict(decode_head=[ - dict( - type='FPNHead', - in_channels=[256, 256, 256, 256], - in_index=[0, 1, 2, 3], - feature_strides=[4, 8, 16, 32], - channels=128, - dropout_ratio=-1, - num_classes=150, - norm_cfg=norm_cfg, - align_corners=False, - loss_decode=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), - dict( - type='PointHead', - in_channels=[256], - in_index=[0], - channels=256, - num_fcs=3, - coarse_pred_each_layer=True, - dropout_ratio=-1, - num_classes=150, - align_corners=False, - loss_decode=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)) -]) +model = dict( + preprocess_cfg=preprocess_cfg, + decode_head=[ + dict( + type='FPNHead', + in_channels=[256, 256, 256, 256], + in_index=[0, 1, 2, 3], + feature_strides=[4, 8, 16, 32], + channels=128, + dropout_ratio=-1, + num_classes=150, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + dict( + type='PointHead', + in_channels=[256], + in_index=[0], + channels=256, + num_fcs=3, + coarse_pred_each_layer=True, + dropout_ratio=-1, + num_classes=150, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)) + ]) param_scheduler = [ dict(type='LinearLR', by_epoch=False, start_factor=0.1, begin=0, end=200), dict( diff --git a/configs/psanet/psanet_r50-d8_512x1024_40k_cityscapes.py b/configs/psanet/psanet_r50-d8_512x1024_40k_cityscapes.py index 6671fcb4b..7d20293cf 100644 --- a/configs/psanet/psanet_r50-d8_512x1024_40k_cityscapes.py +++ b/configs/psanet/psanet_r50-d8_512x1024_40k_cityscapes.py @@ -2,3 +2,6 @@ _base_ = [ '../_base_/models/psanet_r50-d8.py', '../_base_/datasets/cityscapes.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' ] +crop_size = (512, 1024) +preprocess_cfg = dict(size=crop_size) +model = dict(preprocess_cfg=preprocess_cfg) diff --git a/configs/psanet/psanet_r50-d8_512x1024_80k_cityscapes.py b/configs/psanet/psanet_r50-d8_512x1024_80k_cityscapes.py index a441013a4..df8948c69 100644 --- a/configs/psanet/psanet_r50-d8_512x1024_80k_cityscapes.py +++ b/configs/psanet/psanet_r50-d8_512x1024_80k_cityscapes.py @@ -2,3 +2,6 @@ _base_ = [ '../_base_/models/psanet_r50-d8.py', '../_base_/datasets/cityscapes.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' ] +crop_size = (512, 1024) +preprocess_cfg = dict(size=crop_size) +model = dict(preprocess_cfg=preprocess_cfg) diff --git a/configs/psanet/psanet_r50-d8_512x512_160k_ade20k.py b/configs/psanet/psanet_r50-d8_512x512_160k_ade20k.py index 9c6364eb4..16ee96532 100644 --- a/configs/psanet/psanet_r50-d8_512x512_160k_ade20k.py +++ b/configs/psanet/psanet_r50-d8_512x512_160k_ade20k.py @@ -2,6 +2,9 @@ _base_ = [ '../_base_/models/psanet_r50-d8.py', '../_base_/datasets/ade20k.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' ] +crop_size = (512, 512) +preprocess_cfg = dict(size=crop_size) model = dict( + preprocess_cfg=preprocess_cfg, decode_head=dict(mask_size=(66, 66), num_classes=150), auxiliary_head=dict(num_classes=150)) diff --git a/configs/psanet/psanet_r50-d8_512x512_20k_voc12aug.py b/configs/psanet/psanet_r50-d8_512x512_20k_voc12aug.py index af06cb66c..115f37037 100644 --- a/configs/psanet/psanet_r50-d8_512x512_20k_voc12aug.py +++ b/configs/psanet/psanet_r50-d8_512x512_20k_voc12aug.py @@ -3,5 +3,9 @@ _base_ = [ '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_20k.py' ] +crop_size = (512, 512) +preprocess_cfg = dict(size=crop_size) model = dict( - decode_head=dict(num_classes=21), auxiliary_head=dict(num_classes=21)) + preprocess_cfg=preprocess_cfg, + decode_head=dict(num_classes=21), + auxiliary_head=dict(num_classes=21)) diff --git a/configs/psanet/psanet_r50-d8_512x512_40k_voc12aug.py b/configs/psanet/psanet_r50-d8_512x512_40k_voc12aug.py index 803c42da3..18669ae41 100644 --- a/configs/psanet/psanet_r50-d8_512x512_40k_voc12aug.py +++ b/configs/psanet/psanet_r50-d8_512x512_40k_voc12aug.py @@ -3,5 +3,9 @@ _base_ = [ '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' ] +crop_size = (512, 512) +preprocess_cfg = dict(size=crop_size) model = dict( - decode_head=dict(num_classes=21), auxiliary_head=dict(num_classes=21)) + preprocess_cfg=preprocess_cfg, + decode_head=dict(num_classes=21), + auxiliary_head=dict(num_classes=21)) diff --git a/configs/psanet/psanet_r50-d8_512x512_80k_ade20k.py b/configs/psanet/psanet_r50-d8_512x512_80k_ade20k.py index 0141a6d09..41fc4b045 100644 --- a/configs/psanet/psanet_r50-d8_512x512_80k_ade20k.py +++ b/configs/psanet/psanet_r50-d8_512x512_80k_ade20k.py @@ -2,6 +2,9 @@ _base_ = [ '../_base_/models/psanet_r50-d8.py', '../_base_/datasets/ade20k.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' ] +crop_size = (512, 512) +preprocess_cfg = dict(size=crop_size) model = dict( + preprocess_cfg=preprocess_cfg, decode_head=dict(mask_size=(66, 66), num_classes=150), auxiliary_head=dict(num_classes=150)) diff --git a/configs/psanet/psanet_r50-d8_769x769_40k_cityscapes.py b/configs/psanet/psanet_r50-d8_769x769_40k_cityscapes.py index 690f8b5ef..f470aa531 100644 --- a/configs/psanet/psanet_r50-d8_769x769_40k_cityscapes.py +++ b/configs/psanet/psanet_r50-d8_769x769_40k_cityscapes.py @@ -3,7 +3,10 @@ _base_ = [ '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' ] +crop_size = (769, 769) +preprocess_cfg = dict(size=crop_size) model = dict( + preprocess_cfg=preprocess_cfg, decode_head=dict(align_corners=True), auxiliary_head=dict(align_corners=True), test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513))) diff --git a/configs/psanet/psanet_r50-d8_769x769_80k_cityscapes.py b/configs/psanet/psanet_r50-d8_769x769_80k_cityscapes.py index 0966b4770..843da86bd 100644 --- a/configs/psanet/psanet_r50-d8_769x769_80k_cityscapes.py +++ b/configs/psanet/psanet_r50-d8_769x769_80k_cityscapes.py @@ -3,7 +3,10 @@ _base_ = [ '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' ] +crop_size = (769, 769) +preprocess_cfg = dict(size=crop_size) model = dict( + preprocess_cfg=preprocess_cfg, decode_head=dict(align_corners=True), auxiliary_head=dict(align_corners=True), test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513))) diff --git a/configs/pspnet/pspnet_r50-d32_512x1024_80k_cityscapes.py b/configs/pspnet/pspnet_r50-d32_512x1024_80k_cityscapes.py index 6bfeef319..4879f4294 100644 --- a/configs/pspnet/pspnet_r50-d32_512x1024_80k_cityscapes.py +++ b/configs/pspnet/pspnet_r50-d32_512x1024_80k_cityscapes.py @@ -2,4 +2,8 @@ _base_ = [ '../_base_/models/pspnet_r50-d8.py', '../_base_/datasets/cityscapes.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' ] -model = dict(backbone=dict(dilations=(1, 1, 2, 4), strides=(1, 2, 2, 2))) +crop_size = (512, 1024) +preprocess_cfg = dict(size=crop_size) +model = dict( + preprocess_cfg=preprocess_cfg, + backbone=dict(dilations=(1, 1, 2, 4), strides=(1, 2, 2, 2))) diff --git a/configs/pspnet/pspnet_r50-d32_rsb-pretrain_512x1024_adamw_80k_cityscapes.py b/configs/pspnet/pspnet_r50-d32_rsb-pretrain_512x1024_adamw_80k_cityscapes.py index 23b636bb5..74d38afea 100644 --- a/configs/pspnet/pspnet_r50-d32_rsb-pretrain_512x1024_adamw_80k_cityscapes.py +++ b/configs/pspnet/pspnet_r50-d32_rsb-pretrain_512x1024_adamw_80k_cityscapes.py @@ -2,8 +2,11 @@ _base_ = [ '../_base_/models/pspnet_r50-d8.py', '../_base_/datasets/cityscapes.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' ] +crop_size = (512, 1024) +preprocess_cfg = dict(size=crop_size) checkpoint = 'https://download.openmmlab.com/mmclassification/v0/resnet/resnet50_8xb256-rsb-a1-600e_in1k_20211228-20e21305.pth' # noqa model = dict( + preprocess_cfg=preprocess_cfg, pretrained=None, backbone=dict( type='ResNet', diff --git a/configs/pspnet/pspnet_r50-d8_480x480_40k_pascal_context.py b/configs/pspnet/pspnet_r50-d8_480x480_40k_pascal_context.py index 30abe46e7..29bf614be 100644 --- a/configs/pspnet/pspnet_r50-d8_480x480_40k_pascal_context.py +++ b/configs/pspnet/pspnet_r50-d8_480x480_40k_pascal_context.py @@ -3,7 +3,10 @@ _base_ = [ '../_base_/datasets/pascal_context.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' ] +crop_size = (480, 480) +preprocess_cfg = dict(size=crop_size) model = dict( + preprocess_cfg=preprocess_cfg, decode_head=dict(num_classes=60), auxiliary_head=dict(num_classes=60), test_cfg=dict(mode='slide', crop_size=(480, 480), stride=(320, 320))) diff --git a/configs/pspnet/pspnet_r50-d8_480x480_40k_pascal_context_59.py b/configs/pspnet/pspnet_r50-d8_480x480_40k_pascal_context_59.py index 88041c681..371592bfe 100644 --- a/configs/pspnet/pspnet_r50-d8_480x480_40k_pascal_context_59.py +++ b/configs/pspnet/pspnet_r50-d8_480x480_40k_pascal_context_59.py @@ -3,7 +3,10 @@ _base_ = [ '../_base_/datasets/pascal_context_59.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' ] +crop_size = (480, 480) +preprocess_cfg = dict(size=crop_size) model = dict( + preprocess_cfg=preprocess_cfg, decode_head=dict(num_classes=59), auxiliary_head=dict(num_classes=59), test_cfg=dict(mode='slide', crop_size=(480, 480), stride=(320, 320))) diff --git a/configs/pspnet/pspnet_r50-d8_480x480_80k_pascal_context.py b/configs/pspnet/pspnet_r50-d8_480x480_80k_pascal_context.py index 09e96dabf..0179cb953 100644 --- a/configs/pspnet/pspnet_r50-d8_480x480_80k_pascal_context.py +++ b/configs/pspnet/pspnet_r50-d8_480x480_80k_pascal_context.py @@ -3,7 +3,10 @@ _base_ = [ '../_base_/datasets/pascal_context.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' ] +crop_size = (480, 480) +preprocess_cfg = dict(size=crop_size) model = dict( + preprocess_cfg=preprocess_cfg, decode_head=dict(num_classes=60), auxiliary_head=dict(num_classes=60), test_cfg=dict(mode='slide', crop_size=(480, 480), stride=(320, 320))) diff --git a/configs/pspnet/pspnet_r50-d8_480x480_80k_pascal_context_59.py b/configs/pspnet/pspnet_r50-d8_480x480_80k_pascal_context_59.py index d4065ec05..7aeb72e1e 100644 --- a/configs/pspnet/pspnet_r50-d8_480x480_80k_pascal_context_59.py +++ b/configs/pspnet/pspnet_r50-d8_480x480_80k_pascal_context_59.py @@ -3,7 +3,10 @@ _base_ = [ '../_base_/datasets/pascal_context_59.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' ] +crop_size = (480, 480) +preprocess_cfg = dict(size=crop_size) model = dict( + preprocess_cfg=preprocess_cfg, decode_head=dict(num_classes=59), auxiliary_head=dict(num_classes=59), test_cfg=dict(mode='slide', crop_size=(480, 480), stride=(320, 320))) diff --git a/configs/pspnet/pspnet_r50-d8_4x4_512x512_80k_potsdam.py b/configs/pspnet/pspnet_r50-d8_4x4_512x512_80k_potsdam.py index f78faff0a..983f34180 100644 --- a/configs/pspnet/pspnet_r50-d8_4x4_512x512_80k_potsdam.py +++ b/configs/pspnet/pspnet_r50-d8_4x4_512x512_80k_potsdam.py @@ -2,5 +2,9 @@ _base_ = [ '../_base_/models/pspnet_r50-d8.py', '../_base_/datasets/potsdam.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' ] +crop_size = (512, 512) +preprocess_cfg = dict(size=crop_size) model = dict( - decode_head=dict(num_classes=6), auxiliary_head=dict(num_classes=6)) + preprocess_cfg=preprocess_cfg, + decode_head=dict(num_classes=6), + auxiliary_head=dict(num_classes=6)) diff --git a/configs/pspnet/pspnet_r50-d8_4x4_512x512_80k_vaihingen.py b/configs/pspnet/pspnet_r50-d8_4x4_512x512_80k_vaihingen.py index dfdd294e8..5d6718081 100644 --- a/configs/pspnet/pspnet_r50-d8_4x4_512x512_80k_vaihingen.py +++ b/configs/pspnet/pspnet_r50-d8_4x4_512x512_80k_vaihingen.py @@ -2,5 +2,9 @@ _base_ = [ '../_base_/models/pspnet_r50-d8.py', '../_base_/datasets/vaihingen.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' ] +crop_size = (512, 512) +preprocess_cfg = dict(size=crop_size) model = dict( - decode_head=dict(num_classes=6), auxiliary_head=dict(num_classes=6)) + preprocess_cfg=preprocess_cfg, + decode_head=dict(num_classes=6), + auxiliary_head=dict(num_classes=6)) diff --git a/configs/pspnet/pspnet_r50-d8_4x4_896x896_80k_isaid.py b/configs/pspnet/pspnet_r50-d8_4x4_896x896_80k_isaid.py index ef7eb9928..93513e78c 100644 --- a/configs/pspnet/pspnet_r50-d8_4x4_896x896_80k_isaid.py +++ b/configs/pspnet/pspnet_r50-d8_4x4_896x896_80k_isaid.py @@ -2,5 +2,9 @@ _base_ = [ '../_base_/models/pspnet_r50-d8.py', '../_base_/datasets/isaid.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' ] +crop_size = (896, 896) +preprocess_cfg = dict(size=crop_size) model = dict( - decode_head=dict(num_classes=16), auxiliary_head=dict(num_classes=16)) + preprocess_cfg=preprocess_cfg, + decode_head=dict(num_classes=16), + auxiliary_head=dict(num_classes=16)) diff --git a/configs/pspnet/pspnet_r50-d8_512x1024_40k_cityscapes.py b/configs/pspnet/pspnet_r50-d8_512x1024_40k_cityscapes.py index 5deb5872b..7fb0b3ad3 100644 --- a/configs/pspnet/pspnet_r50-d8_512x1024_40k_cityscapes.py +++ b/configs/pspnet/pspnet_r50-d8_512x1024_40k_cityscapes.py @@ -2,3 +2,6 @@ _base_ = [ '../_base_/models/pspnet_r50-d8.py', '../_base_/datasets/cityscapes.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' ] +crop_size = (512, 1024) +preprocess_cfg = dict(size=crop_size) +model = dict(preprocess_cfg=preprocess_cfg) diff --git a/configs/pspnet/pspnet_r50-d8_512x1024_40k_dark.py b/configs/pspnet/pspnet_r50-d8_512x1024_40k_dark.py index 98e2fc856..93a497cff 100644 --- a/configs/pspnet/pspnet_r50-d8_512x1024_40k_dark.py +++ b/configs/pspnet/pspnet_r50-d8_512x1024_40k_dark.py @@ -2,8 +2,10 @@ _base_ = [ '../_base_/models/pspnet_r50-d8.py', '../_base_/datasets/cityscapes.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' ] -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +crop_size = (512, 1024) +preprocess_cfg = dict(size=crop_size) +model = dict(preprocess_cfg=preprocess_cfg) + test_pipeline = [ dict(type='LoadImageFromFile'), dict(type='Resize', scale=(1920, 1080), keep_ratio=True), diff --git a/configs/pspnet/pspnet_r50-d8_512x1024_40k_night_driving.py b/configs/pspnet/pspnet_r50-d8_512x1024_40k_night_driving.py index 447b64bc7..a26ed992e 100644 --- a/configs/pspnet/pspnet_r50-d8_512x1024_40k_night_driving.py +++ b/configs/pspnet/pspnet_r50-d8_512x1024_40k_night_driving.py @@ -3,8 +3,10 @@ _base_ = [ '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' ] -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +crop_size = (512, 1024) +preprocess_cfg = dict(size=crop_size) +model = dict(preprocess_cfg=preprocess_cfg) + test_pipeline = [ dict(type='LoadImageFromFile'), dict(type='Resize', scale=(1920, 1080), keep_ratio=True), diff --git a/configs/pspnet/pspnet_r50-d8_512x1024_80k_cityscapes.py b/configs/pspnet/pspnet_r50-d8_512x1024_80k_cityscapes.py index 4e9972849..8f9d9ee8c 100644 --- a/configs/pspnet/pspnet_r50-d8_512x1024_80k_cityscapes.py +++ b/configs/pspnet/pspnet_r50-d8_512x1024_80k_cityscapes.py @@ -2,3 +2,6 @@ _base_ = [ '../_base_/models/pspnet_r50-d8.py', '../_base_/datasets/cityscapes.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' ] +crop_size = (512, 1024) +preprocess_cfg = dict(size=crop_size) +model = dict(preprocess_cfg=preprocess_cfg) diff --git a/configs/pspnet/pspnet_r50-d8_512x1024_80k_dark.py b/configs/pspnet/pspnet_r50-d8_512x1024_80k_dark.py index 59552de5a..ed6a03871 100644 --- a/configs/pspnet/pspnet_r50-d8_512x1024_80k_dark.py +++ b/configs/pspnet/pspnet_r50-d8_512x1024_80k_dark.py @@ -3,8 +3,10 @@ _base_ = [ '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' ] -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +crop_size = (512, 1024) +preprocess_cfg = dict(size=crop_size) +model = dict(preprocess_cfg=preprocess_cfg) + test_pipeline = [ dict(type='LoadImageFromFile'), dict(type='Resize', scale=(1920, 1080), keep_ratio=True), diff --git a/configs/pspnet/pspnet_r50-d8_512x1024_80k_night_driving.py b/configs/pspnet/pspnet_r50-d8_512x1024_80k_night_driving.py index 80d953450..cec9cb6cc 100644 --- a/configs/pspnet/pspnet_r50-d8_512x1024_80k_night_driving.py +++ b/configs/pspnet/pspnet_r50-d8_512x1024_80k_night_driving.py @@ -3,8 +3,10 @@ _base_ = [ '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' ] -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +crop_size = (512, 1024) +preprocess_cfg = dict(size=crop_size) +model = dict(preprocess_cfg=preprocess_cfg) + test_pipeline = [ dict(type='LoadImageFromFile'), dict(type='Resize', scale=(1920, 1080), keep_ratio=True), diff --git a/configs/pspnet/pspnet_r50-d8_512x512_160k_ade20k.py b/configs/pspnet/pspnet_r50-d8_512x512_160k_ade20k.py index 86584573a..a9a0e4ec8 100644 --- a/configs/pspnet/pspnet_r50-d8_512x512_160k_ade20k.py +++ b/configs/pspnet/pspnet_r50-d8_512x512_160k_ade20k.py @@ -2,5 +2,9 @@ _base_ = [ '../_base_/models/pspnet_r50-d8.py', '../_base_/datasets/ade20k.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' ] +crop_size = (512, 512) +preprocess_cfg = dict(size=crop_size) model = dict( - decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150)) + preprocess_cfg=preprocess_cfg, + decode_head=dict(num_classes=150), + auxiliary_head=dict(num_classes=150)) diff --git a/configs/pspnet/pspnet_r50-d8_512x512_20k_voc12aug.py b/configs/pspnet/pspnet_r50-d8_512x512_20k_voc12aug.py index cd88154d5..9c62703de 100644 --- a/configs/pspnet/pspnet_r50-d8_512x512_20k_voc12aug.py +++ b/configs/pspnet/pspnet_r50-d8_512x512_20k_voc12aug.py @@ -3,5 +3,9 @@ _base_ = [ '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_20k.py' ] +crop_size = (512, 512) +preprocess_cfg = dict(size=crop_size) model = dict( - decode_head=dict(num_classes=21), auxiliary_head=dict(num_classes=21)) + preprocess_cfg=preprocess_cfg, + decode_head=dict(num_classes=21), + auxiliary_head=dict(num_classes=21)) diff --git a/configs/pspnet/pspnet_r50-d8_512x512_40k_voc12aug.py b/configs/pspnet/pspnet_r50-d8_512x512_40k_voc12aug.py index f0c20c12f..2c23bf37f 100644 --- a/configs/pspnet/pspnet_r50-d8_512x512_40k_voc12aug.py +++ b/configs/pspnet/pspnet_r50-d8_512x512_40k_voc12aug.py @@ -3,5 +3,9 @@ _base_ = [ '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' ] +crop_size = (512, 512) +preprocess_cfg = dict(size=crop_size) model = dict( - decode_head=dict(num_classes=21), auxiliary_head=dict(num_classes=21)) + preprocess_cfg=preprocess_cfg, + decode_head=dict(num_classes=21), + auxiliary_head=dict(num_classes=21)) diff --git a/configs/pspnet/pspnet_r50-d8_512x512_4x4_160k_coco-stuff164k.py b/configs/pspnet/pspnet_r50-d8_512x512_4x4_160k_coco-stuff164k.py index e1f8887a2..9f3146a4f 100644 --- a/configs/pspnet/pspnet_r50-d8_512x512_4x4_160k_coco-stuff164k.py +++ b/configs/pspnet/pspnet_r50-d8_512x512_4x4_160k_coco-stuff164k.py @@ -3,5 +3,9 @@ _base_ = [ '../_base_/datasets/coco-stuff164k.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' ] +crop_size = (512, 512) +preprocess_cfg = dict(size=crop_size) model = dict( - decode_head=dict(num_classes=171), auxiliary_head=dict(num_classes=171)) + preprocess_cfg=preprocess_cfg, + decode_head=dict(num_classes=171), + auxiliary_head=dict(num_classes=171)) diff --git a/configs/pspnet/pspnet_r50-d8_512x512_4x4_20k_coco-stuff10k.py b/configs/pspnet/pspnet_r50-d8_512x512_4x4_20k_coco-stuff10k.py index 6cd94f9a0..5376d52fa 100644 --- a/configs/pspnet/pspnet_r50-d8_512x512_4x4_20k_coco-stuff10k.py +++ b/configs/pspnet/pspnet_r50-d8_512x512_4x4_20k_coco-stuff10k.py @@ -2,5 +2,9 @@ _base_ = [ '../_base_/models/pspnet_r50-d8.py', '../_base_/datasets/coco-stuff10k.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_20k.py' ] +crop_size = (512, 512) +preprocess_cfg = dict(size=crop_size) model = dict( - decode_head=dict(num_classes=171), auxiliary_head=dict(num_classes=171)) + preprocess_cfg=preprocess_cfg, + decode_head=dict(num_classes=171), + auxiliary_head=dict(num_classes=171)) diff --git a/configs/pspnet/pspnet_r50-d8_512x512_4x4_320k_coco-stuff164k.py b/configs/pspnet/pspnet_r50-d8_512x512_4x4_320k_coco-stuff164k.py index 32b3281d0..40b288ad7 100644 --- a/configs/pspnet/pspnet_r50-d8_512x512_4x4_320k_coco-stuff164k.py +++ b/configs/pspnet/pspnet_r50-d8_512x512_4x4_320k_coco-stuff164k.py @@ -3,5 +3,9 @@ _base_ = [ '../_base_/datasets/coco-stuff164k.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_320k.py' ] +crop_size = (512, 512) +preprocess_cfg = dict(size=crop_size) model = dict( - decode_head=dict(num_classes=171), auxiliary_head=dict(num_classes=171)) + preprocess_cfg=preprocess_cfg, + decode_head=dict(num_classes=171), + auxiliary_head=dict(num_classes=171)) diff --git a/configs/pspnet/pspnet_r50-d8_512x512_4x4_40k_coco-stuff10k.py b/configs/pspnet/pspnet_r50-d8_512x512_4x4_40k_coco-stuff10k.py index c792bb4e7..2a9b213ef 100644 --- a/configs/pspnet/pspnet_r50-d8_512x512_4x4_40k_coco-stuff10k.py +++ b/configs/pspnet/pspnet_r50-d8_512x512_4x4_40k_coco-stuff10k.py @@ -2,5 +2,9 @@ _base_ = [ '../_base_/models/pspnet_r50-d8.py', '../_base_/datasets/coco-stuff10k.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' ] +crop_size = (512, 512) +preprocess_cfg = dict(size=crop_size) model = dict( - decode_head=dict(num_classes=171), auxiliary_head=dict(num_classes=171)) + preprocess_cfg=preprocess_cfg, + decode_head=dict(num_classes=171), + auxiliary_head=dict(num_classes=171)) diff --git a/configs/pspnet/pspnet_r50-d8_512x512_4x4_80k_coco-stuff164k.py b/configs/pspnet/pspnet_r50-d8_512x512_4x4_80k_coco-stuff164k.py index 7f7bc6400..357760672 100644 --- a/configs/pspnet/pspnet_r50-d8_512x512_4x4_80k_coco-stuff164k.py +++ b/configs/pspnet/pspnet_r50-d8_512x512_4x4_80k_coco-stuff164k.py @@ -3,5 +3,9 @@ _base_ = [ '../_base_/datasets/coco-stuff164k.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' ] +crop_size = (512, 512) +preprocess_cfg = dict(size=crop_size) model = dict( - decode_head=dict(num_classes=171), auxiliary_head=dict(num_classes=171)) + preprocess_cfg=preprocess_cfg, + decode_head=dict(num_classes=171), + auxiliary_head=dict(num_classes=171)) diff --git a/configs/pspnet/pspnet_r50-d8_512x512_80k_ade20k.py b/configs/pspnet/pspnet_r50-d8_512x512_80k_ade20k.py index 52efdf51d..fa1df0123 100644 --- a/configs/pspnet/pspnet_r50-d8_512x512_80k_ade20k.py +++ b/configs/pspnet/pspnet_r50-d8_512x512_80k_ade20k.py @@ -2,5 +2,9 @@ _base_ = [ '../_base_/models/pspnet_r50-d8.py', '../_base_/datasets/ade20k.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' ] +crop_size = (512, 512) +preprocess_cfg = dict(size=crop_size) model = dict( - decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150)) + preprocess_cfg=preprocess_cfg, + decode_head=dict(num_classes=150), + auxiliary_head=dict(num_classes=150)) diff --git a/configs/pspnet/pspnet_r50-d8_512x512_80k_loveda.py b/configs/pspnet/pspnet_r50-d8_512x512_80k_loveda.py index 830af482e..c2967a2f7 100644 --- a/configs/pspnet/pspnet_r50-d8_512x512_80k_loveda.py +++ b/configs/pspnet/pspnet_r50-d8_512x512_80k_loveda.py @@ -2,5 +2,9 @@ _base_ = [ '../_base_/models/pspnet_r50-d8.py', '../_base_/datasets/loveda.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' ] +crop_size = (512, 512) +preprocess_cfg = dict(size=crop_size) model = dict( - decode_head=dict(num_classes=7), auxiliary_head=dict(num_classes=7)) + preprocess_cfg=preprocess_cfg, + decode_head=dict(num_classes=7), + auxiliary_head=dict(num_classes=7)) diff --git a/configs/pspnet/pspnet_r50-d8_769x769_40k_cityscapes.py b/configs/pspnet/pspnet_r50-d8_769x769_40k_cityscapes.py index 145cadb24..be7ce9b67 100644 --- a/configs/pspnet/pspnet_r50-d8_769x769_40k_cityscapes.py +++ b/configs/pspnet/pspnet_r50-d8_769x769_40k_cityscapes.py @@ -3,7 +3,10 @@ _base_ = [ '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' ] +crop_size = (769, 769) +preprocess_cfg = dict(size=crop_size) model = dict( + preprocess_cfg=preprocess_cfg, decode_head=dict(align_corners=True), auxiliary_head=dict(align_corners=True), test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513))) diff --git a/configs/pspnet/pspnet_r50-d8_769x769_80k_cityscapes.py b/configs/pspnet/pspnet_r50-d8_769x769_80k_cityscapes.py index 23a81eb7e..6c24c991c 100644 --- a/configs/pspnet/pspnet_r50-d8_769x769_80k_cityscapes.py +++ b/configs/pspnet/pspnet_r50-d8_769x769_80k_cityscapes.py @@ -3,7 +3,10 @@ _base_ = [ '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' ] +crop_size = (769, 769) +preprocess_cfg = dict(size=crop_size) model = dict( + preprocess_cfg=preprocess_cfg, decode_head=dict(align_corners=True), auxiliary_head=dict(align_corners=True), test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513))) diff --git a/configs/pspnet/pspnet_r50-d8_rsb-pretrain_512x1024_adamw_80k_cityscapes.py b/configs/pspnet/pspnet_r50-d8_rsb-pretrain_512x1024_adamw_80k_cityscapes.py index 614598872..fc138b775 100644 --- a/configs/pspnet/pspnet_r50-d8_rsb-pretrain_512x1024_adamw_80k_cityscapes.py +++ b/configs/pspnet/pspnet_r50-d8_rsb-pretrain_512x1024_adamw_80k_cityscapes.py @@ -2,8 +2,11 @@ _base_ = [ '../_base_/models/pspnet_r50-d8.py', '../_base_/datasets/cityscapes.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' ] +crop_size = (512, 1024) +preprocess_cfg = dict(size=crop_size) checkpoint = 'https://download.openmmlab.com/mmclassification/v0/resnet/resnet50_8xb256-rsb-a1-600e_in1k_20211228-20e21305.pth' # noqa model = dict( + preprocess_cfg=preprocess_cfg, pretrained=None, backbone=dict( type='ResNet', diff --git a/configs/pspnet/pspnet_r50b-d32_512x1024_80k_cityscapes.py b/configs/pspnet/pspnet_r50b-d32_512x1024_80k_cityscapes.py index 7f4f6c9b4..337b29445 100644 --- a/configs/pspnet/pspnet_r50b-d32_512x1024_80k_cityscapes.py +++ b/configs/pspnet/pspnet_r50b-d32_512x1024_80k_cityscapes.py @@ -2,6 +2,9 @@ _base_ = [ '../_base_/models/pspnet_r50-d8.py', '../_base_/datasets/cityscapes.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' ] +crop_size = (512, 1024) +preprocess_cfg = dict(size=crop_size) model = dict( + preprocess_cfg=preprocess_cfg, pretrained='torchvision://resnet50', backbone=dict(type='ResNet', dilations=(1, 1, 2, 4), strides=(1, 2, 2, 2))) diff --git a/configs/segformer/segformer_mit-b0_512x512_160k_ade20k.py b/configs/segformer/segformer_mit-b0_512x512_160k_ade20k.py index 697232dc5..32fb75728 100644 --- a/configs/segformer/segformer_mit-b0_512x512_160k_ade20k.py +++ b/configs/segformer/segformer_mit-b0_512x512_160k_ade20k.py @@ -2,9 +2,12 @@ _base_ = [ '../_base_/models/segformer_mit-b0.py', '../_base_/datasets/ade20k.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' ] - +crop_size = (512, 512) +preprocess_cfg = dict(size=crop_size) model = dict( - pretrained='pretrain/mit_b0.pth', decode_head=dict(num_classes=150)) + preprocess_cfg=preprocess_cfg, + pretrained='pretrain/mit_b0.pth', + decode_head=dict(num_classes=150)) # optimizer optimizer = dict( diff --git a/configs/segformer/segformer_mit-b0_8x1_1024x1024_160k_cityscapes.py b/configs/segformer/segformer_mit-b0_8x1_1024x1024_160k_cityscapes.py index c124fdfde..ae76a4d19 100644 --- a/configs/segformer/segformer_mit-b0_8x1_1024x1024_160k_cityscapes.py +++ b/configs/segformer/segformer_mit-b0_8x1_1024x1024_160k_cityscapes.py @@ -3,8 +3,10 @@ _base_ = [ '../_base_/datasets/cityscapes_1024x1024.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' ] - +crop_size = (1024, 1024) +preprocess_cfg = dict(size=crop_size) model = dict( + preprocess_cfg=preprocess_cfg, backbone=dict( init_cfg=dict(type='Pretrained', checkpoint='pretrain/mit_b0.pth')), test_cfg=dict(mode='slide', crop_size=(1024, 1024), stride=(768, 768))) diff --git a/configs/segformer/segformer_mit-b5_640x640_160k_ade20k.py b/configs/segformer/segformer_mit-b5_640x640_160k_ade20k.py index 935d4667f..d63b7cb0d 100644 --- a/configs/segformer/segformer_mit-b5_640x640_160k_ade20k.py +++ b/configs/segformer/segformer_mit-b5_640x640_160k_ade20k.py @@ -1,9 +1,8 @@ _base_ = ['./segformer_mit-b0_512x512_160k_ade20k.py'] # dataset settings -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) crop_size = (640, 640) +preprocess_cfg = dict(size=crop_size) train_pipeline = [ dict(type='LoadImageFromFile'), dict(type='LoadAnnotations', reduce_zero_label=True), @@ -11,7 +10,6 @@ train_pipeline = [ dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), dict(type='RandomFlip', prob=0.5), dict(type='PhotoMetricDistortion'), - dict(type='Pad', size=crop_size), dict(type='PackSegInputs') ] test_pipeline = [ @@ -25,6 +23,7 @@ test_dataloader = val_dataloader # model settings model = dict( + preprocess_cfg=preprocess_cfg, pretrained='pretrain/mit_b5.pth', backbone=dict( embed_dims=64, num_heads=[1, 2, 5, 8], num_layers=[3, 6, 40, 3]), diff --git a/configs/segmenter/segmenter_vit-b_mask_8x1_512x512_160k_ade20k.py b/configs/segmenter/segmenter_vit-b_mask_8x1_512x512_160k_ade20k.py index 8d417ae85..efbd55d20 100644 --- a/configs/segmenter/segmenter_vit-b_mask_8x1_512x512_160k_ade20k.py +++ b/configs/segmenter/segmenter_vit-b_mask_8x1_512x512_160k_ade20k.py @@ -3,29 +3,12 @@ _base_ = [ '../_base_/datasets/ade20k.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' ] +crop_size = (512, 512) +preprocess_cfg = dict(size=crop_size) +model = dict(preprocess_cfg=preprocess_cfg) optimizer = dict(lr=0.001, weight_decay=0.0) -img_norm_cfg = dict( - mean=[127.5, 127.5, 127.5], std=[127.5, 127.5, 127.5], to_rgb=True) -crop_size = (512, 512) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadAnnotations', reduce_zero_label=True), - dict(type='RandomResize', scale=(2048, 512), ratio_range=(0.5, 2.0)), - dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), - dict(type='RandomFlip', prob=0.5), - dict(type='PhotoMetricDistortion'), - dict(type='Pad', size=crop_size), - dict(type='PackSegInputs') -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='Resize', scale=(2048, 512), keep_ratio=True), - dict(type='PackSegInputs') -] train_dataloader = dict( # num_gpus: 8 -> batch_size: 8 - batch_size=1, - dataset=dict(pipeline=train_pipeline)) -val_dataloader = dict(batch_size=1, dataset=dict(pipeline=test_pipeline)) -test_dataloader = val_dataloader + batch_size=1) +val_dataloader = dict(batch_size=1) diff --git a/configs/segmenter/segmenter_vit-l_mask_8x1_512x512_160k_ade20k.py b/configs/segmenter/segmenter_vit-l_mask_8x1_512x512_160k_ade20k.py index 55c34b93f..2d0838216 100644 --- a/configs/segmenter/segmenter_vit-l_mask_8x1_512x512_160k_ade20k.py +++ b/configs/segmenter/segmenter_vit-l_mask_8x1_512x512_160k_ade20k.py @@ -1,11 +1,14 @@ _base_ = [ '../_base_/models/segmenter_vit-b16_mask.py', - '../_base_/datasets/ade20k.py', '../_base_/default_runtime.py', + '../_base_/datasets/ade20k_640x640.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' ] +crop_size = (640, 640) +preprocess_cfg = dict(size=crop_size) checkpoint = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/segmenter/vit_large_p16_384_20220308-d4efb41d.pth' # noqa model = dict( + preprocess_cfg=preprocess_cfg, pretrained=checkpoint, backbone=dict( type='VisionTransformer', @@ -23,27 +26,7 @@ model = dict( optimizer = dict(lr=0.001, weight_decay=0.0) -img_norm_cfg = dict( - mean=[127.5, 127.5, 127.5], std=[127.5, 127.5, 127.5], to_rgb=True) -crop_size = (640, 640) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadAnnotations', reduce_zero_label=True), - dict(type='RandomResize', scale=(2048, 640), ratio_range=(0.5, 2.0)), - dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), - dict(type='RandomFlip', prob=0.5), - dict(type='PhotoMetricDistortion'), - dict(type='Pad', size=crop_size), - dict(type='PackSegInputs') -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='Resize', scale=(2048, 640), keep_ratio=True), - dict(type='PackSegInputs') -] train_dataloader = dict( # num_gpus: 8 -> batch_size: 8 - batch_size=1, - dataset=dict(pipeline=train_pipeline)) -val_dataloader = dict(batch_size=1, dataset=dict(pipeline=test_pipeline)) -test_dataloader = val_dataloader + batch_size=1) +val_dataloader = dict(batch_size=1) diff --git a/configs/segmenter/segmenter_vit-s_mask_8x1_512x512_160k_ade20k.py b/configs/segmenter/segmenter_vit-s_mask_8x1_512x512_160k_ade20k.py index 2a448f59e..d79e18c2e 100644 --- a/configs/segmenter/segmenter_vit-s_mask_8x1_512x512_160k_ade20k.py +++ b/configs/segmenter/segmenter_vit-s_mask_8x1_512x512_160k_ade20k.py @@ -3,11 +3,13 @@ _base_ = [ '../_base_/datasets/ade20k.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' ] - +crop_size = (512, 512) +preprocess_cfg = dict(size=crop_size) checkpoint = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/segmenter/vit_small_p16_384_20220308-410f6037.pth' # noqa backbone_norm_cfg = dict(type='LN', eps=1e-6, requires_grad=True) model = dict( + preprocess_cfg=preprocess_cfg, pretrained=checkpoint, backbone=dict( img_size=(512, 512), @@ -28,27 +30,7 @@ model = dict( optimizer = dict(lr=0.001, weight_decay=0.0) -img_norm_cfg = dict( - mean=[127.5, 127.5, 127.5], std=[127.5, 127.5, 127.5], to_rgb=True) -crop_size = (512, 512) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadAnnotations', reduce_zero_label=True), - dict(type='RandomResize', scale=(2048, 512), ratio_range=(0.5, 2.0)), - dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), - dict(type='RandomFlip', prob=0.5), - dict(type='PhotoMetricDistortion'), - dict(type='Pad', size=crop_size), - dict(type='PackSegInputs') -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='Resize', scale=(2048, 512), keep_ratio=True), - dict(type='PackSegInputs') -] train_dataloader = dict( # num_gpus: 8 -> batch_size: 8 - batch_size=1, - dataset=dict(pipeline=train_pipeline)) -val_dataloader = dict(batch_size=1, dataset=dict(pipeline=test_pipeline)) -test_dataloader = val_dataloader + batch_size=1) +val_dataloader = dict(batch_size=1) diff --git a/configs/segmenter/segmenter_vit-t_mask_8x1_512x512_160k_ade20k.py b/configs/segmenter/segmenter_vit-t_mask_8x1_512x512_160k_ade20k.py index 79de7d541..42c807af1 100644 --- a/configs/segmenter/segmenter_vit-t_mask_8x1_512x512_160k_ade20k.py +++ b/configs/segmenter/segmenter_vit-t_mask_8x1_512x512_160k_ade20k.py @@ -3,10 +3,12 @@ _base_ = [ '../_base_/datasets/ade20k.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' ] - +crop_size = (512, 512) +preprocess_cfg = dict(size=crop_size) checkpoint = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/segmenter/vit_tiny_p16_384_20220308-cce8c795.pth' # noqa model = dict( + preprocess_cfg=preprocess_cfg, pretrained=checkpoint, backbone=dict(embed_dims=192, num_heads=3), decode_head=dict( @@ -18,27 +20,7 @@ model = dict( optimizer = dict(lr=0.001, weight_decay=0.0) -img_norm_cfg = dict( - mean=[127.5, 127.5, 127.5], std=[127.5, 127.5, 127.5], to_rgb=True) -crop_size = (512, 512) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadAnnotations', reduce_zero_label=True), - dict(type='RandomResize', scale=(2048, 512), ratio_range=(0.5, 2.0)), - dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), - dict(type='RandomFlip', prob=0.5), - dict(type='PhotoMetricDistortion'), - dict(type='Pad', size=crop_size), - dict(type='PackSegInputs') -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='Resize', scale=(2048, 512), keep_ratio=True), - dict(type='PackSegInputs') -] train_dataloader = dict( # num_gpus: 8 -> batch_size: 8 - batch_size=1, - dataset=dict(pipeline=train_pipeline)) -val_dataloader = dict(batch_size=1, dataset=dict(pipeline=test_pipeline)) -test_dataloader = val_dataloader + batch_size=1) +val_dataloader = dict(batch_size=1) diff --git a/configs/sem_fpn/fpn_r101_512x512_160k_ade20k.py b/configs/sem_fpn/fpn_r101_512x512_160k_ade20k.py index 2654096df..8a425e1dc 100644 --- a/configs/sem_fpn/fpn_r101_512x512_160k_ade20k.py +++ b/configs/sem_fpn/fpn_r101_512x512_160k_ade20k.py @@ -1,2 +1,5 @@ _base_ = './fpn_r50_512x512_160k_ade20k.py' model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) +crop_size = (512, 512) +preprocess_cfg = dict(size=crop_size) +model = dict(preprocess_cfg=preprocess_cfg) diff --git a/configs/sem_fpn/fpn_r50_512x1024_80k_cityscapes.py b/configs/sem_fpn/fpn_r50_512x1024_80k_cityscapes.py index 4bf3edd82..73f6f2089 100644 --- a/configs/sem_fpn/fpn_r50_512x1024_80k_cityscapes.py +++ b/configs/sem_fpn/fpn_r50_512x1024_80k_cityscapes.py @@ -2,3 +2,6 @@ _base_ = [ '../_base_/models/fpn_r50.py', '../_base_/datasets/cityscapes.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' ] +crop_size = (512, 1024) +preprocess_cfg = dict(size=crop_size) +model = dict(preprocess_cfg=preprocess_cfg) diff --git a/configs/sem_fpn/fpn_r50_512x512_160k_ade20k.py b/configs/sem_fpn/fpn_r50_512x512_160k_ade20k.py index 5cdfc8ca2..b9440714b 100644 --- a/configs/sem_fpn/fpn_r50_512x512_160k_ade20k.py +++ b/configs/sem_fpn/fpn_r50_512x512_160k_ade20k.py @@ -2,4 +2,6 @@ _base_ = [ '../_base_/models/fpn_r50.py', '../_base_/datasets/ade20k.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' ] -model = dict(decode_head=dict(num_classes=150)) +crop_size = (512, 512) +preprocess_cfg = dict(size=crop_size) +model = dict(preprocess_cfg=preprocess_cfg, decode_head=dict(num_classes=150)) diff --git a/configs/setr/setr_mla_512x512_160k_b8_ade20k.py b/configs/setr/setr_mla_512x512_160k_b8_ade20k.py index c0339eea9..e7e32ef8e 100644 --- a/configs/setr/setr_mla_512x512_160k_b8_ade20k.py +++ b/configs/setr/setr_mla_512x512_160k_b8_ade20k.py @@ -2,8 +2,11 @@ _base_ = [ '../_base_/models/setr_mla.py', '../_base_/datasets/ade20k.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' ] +crop_size = (512, 512) +preprocess_cfg = dict(size=crop_size) norm_cfg = dict(type='SyncBN', requires_grad=True) model = dict( + preprocess_cfg=preprocess_cfg, pretrained=None, backbone=dict( img_size=(512, 512), diff --git a/configs/setr/setr_naive_512x512_160k_b16_ade20k.py b/configs/setr/setr_naive_512x512_160k_b16_ade20k.py index f3346ee3c..9b4cdad81 100644 --- a/configs/setr/setr_naive_512x512_160k_b16_ade20k.py +++ b/configs/setr/setr_naive_512x512_160k_b16_ade20k.py @@ -2,8 +2,11 @@ _base_ = [ '../_base_/models/setr_naive.py', '../_base_/datasets/ade20k.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' ] +crop_size = (512, 512) +preprocess_cfg = dict(size=crop_size) norm_cfg = dict(type='SyncBN', requires_grad=True) model = dict( + preprocess_cfg=preprocess_cfg, pretrained=None, backbone=dict( img_size=(512, 512), diff --git a/configs/setr/setr_pup_512x512_160k_b16_ade20k.py b/configs/setr/setr_pup_512x512_160k_b16_ade20k.py index a5cb19d1e..ab0a19db7 100644 --- a/configs/setr/setr_pup_512x512_160k_b16_ade20k.py +++ b/configs/setr/setr_pup_512x512_160k_b16_ade20k.py @@ -2,8 +2,11 @@ _base_ = [ '../_base_/models/setr_pup.py', '../_base_/datasets/ade20k.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' ] +crop_size = (512, 512) +preprocess_cfg = dict(size=crop_size) norm_cfg = dict(type='SyncBN', requires_grad=True) model = dict( + preprocess_cfg=preprocess_cfg, pretrained=None, backbone=dict( img_size=(512, 512), diff --git a/configs/setr/setr_vit-large_mla_8x1_768x768_80k_cityscapes.py b/configs/setr/setr_vit-large_mla_8x1_768x768_80k_cityscapes.py index 36bcced14..230fc68d9 100644 --- a/configs/setr/setr_vit-large_mla_8x1_768x768_80k_cityscapes.py +++ b/configs/setr/setr_vit-large_mla_8x1_768x768_80k_cityscapes.py @@ -2,7 +2,10 @@ _base_ = [ '../_base_/models/setr_mla.py', '../_base_/datasets/cityscapes_768x768.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' ] +crop_size = (768, 768) +preprocess_cfg = dict(size=crop_size) model = dict( + preprocess_cfg=preprocess_cfg, pretrained=None, backbone=dict( drop_rate=0, diff --git a/configs/setr/setr_vit-large_naive_8x1_768x768_80k_cityscapes.py b/configs/setr/setr_vit-large_naive_8x1_768x768_80k_cityscapes.py index ffb51b54a..0cfbb22c9 100644 --- a/configs/setr/setr_vit-large_naive_8x1_768x768_80k_cityscapes.py +++ b/configs/setr/setr_vit-large_naive_8x1_768x768_80k_cityscapes.py @@ -3,7 +3,10 @@ _base_ = [ '../_base_/datasets/cityscapes_768x768.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' ] +crop_size = (768, 768) +preprocess_cfg = dict(size=crop_size) model = dict( + preprocess_cfg=preprocess_cfg, pretrained=None, backbone=dict( drop_rate=0., diff --git a/configs/setr/setr_vit-large_pup_8x1_768x768_80k_cityscapes.py b/configs/setr/setr_vit-large_pup_8x1_768x768_80k_cityscapes.py index c2f40d9b9..341137f2f 100644 --- a/configs/setr/setr_vit-large_pup_8x1_768x768_80k_cityscapes.py +++ b/configs/setr/setr_vit-large_pup_8x1_768x768_80k_cityscapes.py @@ -2,10 +2,12 @@ _base_ = [ '../_base_/models/setr_pup.py', '../_base_/datasets/cityscapes_768x768.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' ] - +crop_size = (768, 768) +preprocess_cfg = dict(size=crop_size) norm_cfg = dict(type='SyncBN', requires_grad=True) crop_size = (768, 768) model = dict( + preprocess_cfg=preprocess_cfg, pretrained=None, backbone=dict( drop_rate=0., diff --git a/configs/stdc/stdc1_512x1024_80k_cityscapes.py b/configs/stdc/stdc1_512x1024_80k_cityscapes.py index 8a04cd225..30e7909c3 100644 --- a/configs/stdc/stdc1_512x1024_80k_cityscapes.py +++ b/configs/stdc/stdc1_512x1024_80k_cityscapes.py @@ -2,6 +2,9 @@ _base_ = [ '../_base_/models/stdc.py', '../_base_/datasets/cityscapes.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' ] +crop_size = (512, 1024) +preprocess_cfg = dict(size=crop_size) +model = dict(preprocess_cfg=preprocess_cfg) param_scheduler = [ dict(type='LinearLR', by_epoch=False, start_factor=0.1, begin=0, end=1000), dict( diff --git a/configs/swin/upernet_swin_tiny_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K.py b/configs/swin/upernet_swin_tiny_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K.py index 0f3d28618..d3ccc69d5 100644 --- a/configs/swin/upernet_swin_tiny_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K.py +++ b/configs/swin/upernet_swin_tiny_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K.py @@ -2,8 +2,11 @@ _base_ = [ '../_base_/models/upernet_swin.py', '../_base_/datasets/ade20k.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' ] +crop_size = (512, 512) +preprocess_cfg = dict(size=crop_size) checkpoint_file = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/swin/swin_tiny_patch4_window7_224_20220317-1cdeb081.pth' # noqa model = dict( + preprocess_cfg=preprocess_cfg, backbone=dict( init_cfg=dict(type='Pretrained', checkpoint=checkpoint_file), embed_dims=96, diff --git a/configs/twins/twins_pcpvt-s_fpn_fpnhead_8x4_512x512_80k_ade20k.py b/configs/twins/twins_pcpvt-s_fpn_fpnhead_8x4_512x512_80k_ade20k.py index 3d7be96e8..3dfee413c 100644 --- a/configs/twins/twins_pcpvt-s_fpn_fpnhead_8x4_512x512_80k_ade20k.py +++ b/configs/twins/twins_pcpvt-s_fpn_fpnhead_8x4_512x512_80k_ade20k.py @@ -2,5 +2,7 @@ _base_ = [ '../_base_/models/twins_pcpvt-s_fpn.py', '../_base_/datasets/ade20k.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' ] - +crop_size = (512, 512) +preprocess_cfg = dict(size=crop_size) +model = dict(preprocess_cfg=preprocess_cfg) optimizer = dict(_delete_=True, type='AdamW', lr=0.0001, weight_decay=0.0001) diff --git a/configs/twins/twins_pcpvt-s_uperhead_8x4_512x512_160k_ade20k.py b/configs/twins/twins_pcpvt-s_uperhead_8x4_512x512_160k_ade20k.py index 81451713b..d15adfef6 100644 --- a/configs/twins/twins_pcpvt-s_uperhead_8x4_512x512_160k_ade20k.py +++ b/configs/twins/twins_pcpvt-s_uperhead_8x4_512x512_160k_ade20k.py @@ -3,7 +3,9 @@ _base_ = [ '../_base_/datasets/ade20k.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' ] - +crop_size = (512, 512) +preprocess_cfg = dict(size=crop_size) +model = dict(preprocess_cfg=preprocess_cfg) optimizer = dict( _delete_=True, type='AdamW', diff --git a/configs/twins/twins_svt-s_fpn_fpnhead_8x4_512x512_80k_ade20k.py b/configs/twins/twins_svt-s_fpn_fpnhead_8x4_512x512_80k_ade20k.py index dbb944c20..055c07f58 100644 --- a/configs/twins/twins_svt-s_fpn_fpnhead_8x4_512x512_80k_ade20k.py +++ b/configs/twins/twins_svt-s_fpn_fpnhead_8x4_512x512_80k_ade20k.py @@ -2,10 +2,12 @@ _base_ = [ '../_base_/models/twins_pcpvt-s_fpn.py', '../_base_/datasets/ade20k.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' ] - +crop_size = (512, 512) +preprocess_cfg = dict(size=crop_size) checkpoint = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/twins/alt_gvt_small_20220308-7e1c3695.pth' # noqa model = dict( + preprocess_cfg=preprocess_cfg, backbone=dict( type='SVT', init_cfg=dict(type='Pretrained', checkpoint=checkpoint), diff --git a/configs/twins/twins_svt-s_uperhead_8x2_512x512_160k_ade20k.py b/configs/twins/twins_svt-s_uperhead_8x2_512x512_160k_ade20k.py index 770847ef7..5a01a2320 100644 --- a/configs/twins/twins_svt-s_uperhead_8x2_512x512_160k_ade20k.py +++ b/configs/twins/twins_svt-s_uperhead_8x2_512x512_160k_ade20k.py @@ -3,10 +3,12 @@ _base_ = [ '../_base_/datasets/ade20k.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' ] - +crop_size = (512, 512) +preprocess_cfg = dict(size=crop_size) checkpoint = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/twins/alt_gvt_small_20220308-7e1c3695.pth' # noqa model = dict( + preprocess_cfg=preprocess_cfg, backbone=dict( type='SVT', init_cfg=dict(type='Pretrained', checkpoint=checkpoint), diff --git a/configs/unet/deeplabv3_unet_s5-d16_128x128_40k_chase_db1.py b/configs/unet/deeplabv3_unet_s5-d16_128x128_40k_chase_db1.py index 3f501fb1a..2947ffdf8 100644 --- a/configs/unet/deeplabv3_unet_s5-d16_128x128_40k_chase_db1.py +++ b/configs/unet/deeplabv3_unet_s5-d16_128x128_40k_chase_db1.py @@ -3,4 +3,8 @@ _base_ = [ '../_base_/datasets/chase_db1.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' ] -model = dict(test_cfg=dict(crop_size=(128, 128), stride=(85, 85))) +crop_size = (128, 128) +preprocess_cfg = dict(size=crop_size) +model = dict( + preprocess_cfg=preprocess_cfg, + test_cfg=dict(crop_size=(128, 128), stride=(85, 85))) diff --git a/configs/unet/deeplabv3_unet_s5-d16_128x128_40k_stare.py b/configs/unet/deeplabv3_unet_s5-d16_128x128_40k_stare.py index 1dfd5f123..262a9aa79 100644 --- a/configs/unet/deeplabv3_unet_s5-d16_128x128_40k_stare.py +++ b/configs/unet/deeplabv3_unet_s5-d16_128x128_40k_stare.py @@ -2,4 +2,8 @@ _base_ = [ '../_base_/models/deeplabv3_unet_s5-d16.py', '../_base_/datasets/stare.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' ] -model = dict(test_cfg=dict(crop_size=(128, 128), stride=(85, 85))) +crop_size = (128, 128) +preprocess_cfg = dict(size=crop_size) +model = dict( + preprocess_cfg=preprocess_cfg, + test_cfg=dict(crop_size=(128, 128), stride=(85, 85))) diff --git a/configs/unet/deeplabv3_unet_s5-d16_256x256_40k_hrf.py b/configs/unet/deeplabv3_unet_s5-d16_256x256_40k_hrf.py index 8492d44a7..ec6495d1f 100644 --- a/configs/unet/deeplabv3_unet_s5-d16_256x256_40k_hrf.py +++ b/configs/unet/deeplabv3_unet_s5-d16_256x256_40k_hrf.py @@ -2,4 +2,8 @@ _base_ = [ '../_base_/models/deeplabv3_unet_s5-d16.py', '../_base_/datasets/hrf.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' ] -model = dict(test_cfg=dict(crop_size=(256, 256), stride=(170, 170))) +crop_size = (256, 256) +preprocess_cfg = dict(size=crop_size) +model = dict( + preprocess_cfg=preprocess_cfg, + test_cfg=dict(crop_size=(256, 256), stride=(170, 170))) diff --git a/configs/unet/deeplabv3_unet_s5-d16_64x64_40k_drive.py b/configs/unet/deeplabv3_unet_s5-d16_64x64_40k_drive.py index c55521d21..64c38b1ee 100644 --- a/configs/unet/deeplabv3_unet_s5-d16_64x64_40k_drive.py +++ b/configs/unet/deeplabv3_unet_s5-d16_64x64_40k_drive.py @@ -2,4 +2,8 @@ _base_ = [ '../_base_/models/deeplabv3_unet_s5-d16.py', '../_base_/datasets/drive.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' ] -model = dict(test_cfg=dict(crop_size=(64, 64), stride=(42, 42))) +crop_size = (64, 64) +preprocess_cfg = dict(size=crop_size) +model = dict( + preprocess_cfg=preprocess_cfg, + test_cfg=dict(crop_size=(64, 64), stride=(42, 42))) diff --git a/configs/unet/fcn_unet_s5-d16_128x128_40k_chase_db1.py b/configs/unet/fcn_unet_s5-d16_128x128_40k_chase_db1.py index f4809784a..38ac11aec 100644 --- a/configs/unet/fcn_unet_s5-d16_128x128_40k_chase_db1.py +++ b/configs/unet/fcn_unet_s5-d16_128x128_40k_chase_db1.py @@ -2,4 +2,8 @@ _base_ = [ '../_base_/models/fcn_unet_s5-d16.py', '../_base_/datasets/chase_db1.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' ] -model = dict(test_cfg=dict(crop_size=(128, 128), stride=(85, 85))) +crop_size = (128, 128) +preprocess_cfg = dict(size=crop_size) +model = dict( + preprocess_cfg=preprocess_cfg, + test_cfg=dict(crop_size=(128, 128), stride=(85, 85))) diff --git a/configs/unet/fcn_unet_s5-d16_128x128_40k_stare.py b/configs/unet/fcn_unet_s5-d16_128x128_40k_stare.py index f62323ec6..f53f90f6c 100644 --- a/configs/unet/fcn_unet_s5-d16_128x128_40k_stare.py +++ b/configs/unet/fcn_unet_s5-d16_128x128_40k_stare.py @@ -2,4 +2,8 @@ _base_ = [ '../_base_/models/fcn_unet_s5-d16.py', '../_base_/datasets/stare.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' ] -model = dict(test_cfg=dict(crop_size=(128, 128), stride=(85, 85))) +crop_size = (128, 128) +preprocess_cfg = dict(size=crop_size) +model = dict( + preprocess_cfg=preprocess_cfg, + test_cfg=dict(crop_size=(128, 128), stride=(85, 85))) diff --git a/configs/unet/fcn_unet_s5-d16_256x256_40k_hrf.py b/configs/unet/fcn_unet_s5-d16_256x256_40k_hrf.py index 7dc904414..00a696b10 100644 --- a/configs/unet/fcn_unet_s5-d16_256x256_40k_hrf.py +++ b/configs/unet/fcn_unet_s5-d16_256x256_40k_hrf.py @@ -2,4 +2,8 @@ _base_ = [ '../_base_/models/fcn_unet_s5-d16.py', '../_base_/datasets/hrf.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' ] -model = dict(test_cfg=dict(crop_size=(256, 256), stride=(170, 170))) +crop_size = (256, 256) +preprocess_cfg = dict(size=crop_size) +model = dict( + preprocess_cfg=preprocess_cfg, + test_cfg=dict(crop_size=(256, 256), stride=(170, 170))) diff --git a/configs/unet/fcn_unet_s5-d16_4x4_512x1024_160k_cityscapes.py b/configs/unet/fcn_unet_s5-d16_4x4_512x1024_160k_cityscapes.py index 00cbf3ce1..0206abc90 100644 --- a/configs/unet/fcn_unet_s5-d16_4x4_512x1024_160k_cityscapes.py +++ b/configs/unet/fcn_unet_s5-d16_4x4_512x1024_160k_cityscapes.py @@ -2,8 +2,10 @@ _base_ = [ '../_base_/models/fcn_unet_s5-d16.py', '../_base_/datasets/cityscapes.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' ] - +crop_size = (512, 1024) +preprocess_cfg = dict(size=crop_size) model = dict( + preprocess_cfg=preprocess_cfg, decode_head=dict(num_classes=19), auxiliary_head=dict(num_classes=19), # model training and testing settings diff --git a/configs/unet/fcn_unet_s5-d16_64x64_40k_drive.py b/configs/unet/fcn_unet_s5-d16_64x64_40k_drive.py index 3aec7c9de..cc3d5083f 100644 --- a/configs/unet/fcn_unet_s5-d16_64x64_40k_drive.py +++ b/configs/unet/fcn_unet_s5-d16_64x64_40k_drive.py @@ -2,4 +2,8 @@ _base_ = [ '../_base_/models/fcn_unet_s5-d16.py', '../_base_/datasets/drive.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' ] -model = dict(test_cfg=dict(crop_size=(64, 64), stride=(42, 42))) +crop_size = (64, 64) +preprocess_cfg = dict(size=crop_size) +model = dict( + preprocess_cfg=preprocess_cfg, + test_cfg=dict(crop_size=(64, 64), stride=(42, 42))) diff --git a/configs/unet/pspnet_unet_s5-d16_128x128_40k_chase_db1.py b/configs/unet/pspnet_unet_s5-d16_128x128_40k_chase_db1.py index 7c9ca3479..b9b382299 100644 --- a/configs/unet/pspnet_unet_s5-d16_128x128_40k_chase_db1.py +++ b/configs/unet/pspnet_unet_s5-d16_128x128_40k_chase_db1.py @@ -3,4 +3,8 @@ _base_ = [ '../_base_/datasets/chase_db1.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' ] -model = dict(test_cfg=dict(crop_size=(128, 128), stride=(85, 85))) +crop_size = (128, 128) +preprocess_cfg = dict(size=crop_size) +model = dict( + preprocess_cfg=preprocess_cfg, + test_cfg=dict(crop_size=(128, 128), stride=(85, 85))) diff --git a/configs/unet/pspnet_unet_s5-d16_128x128_40k_stare.py b/configs/unet/pspnet_unet_s5-d16_128x128_40k_stare.py index 1d2ee426a..f3e22446b 100644 --- a/configs/unet/pspnet_unet_s5-d16_128x128_40k_stare.py +++ b/configs/unet/pspnet_unet_s5-d16_128x128_40k_stare.py @@ -2,4 +2,8 @@ _base_ = [ '../_base_/models/pspnet_unet_s5-d16.py', '../_base_/datasets/stare.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' ] -model = dict(test_cfg=dict(crop_size=(128, 128), stride=(85, 85))) +crop_size = (128, 128) +preprocess_cfg = dict(size=crop_size) +model = dict( + preprocess_cfg=preprocess_cfg, + test_cfg=dict(crop_size=(128, 128), stride=(85, 85))) diff --git a/configs/unet/pspnet_unet_s5-d16_256x256_40k_hrf.py b/configs/unet/pspnet_unet_s5-d16_256x256_40k_hrf.py index f1b2879b3..400ab3b2d 100644 --- a/configs/unet/pspnet_unet_s5-d16_256x256_40k_hrf.py +++ b/configs/unet/pspnet_unet_s5-d16_256x256_40k_hrf.py @@ -2,4 +2,8 @@ _base_ = [ '../_base_/models/pspnet_unet_s5-d16.py', '../_base_/datasets/hrf.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' ] -model = dict(test_cfg=dict(crop_size=(256, 256), stride=(170, 170))) +crop_size = (256, 256) +preprocess_cfg = dict(size=crop_size) +model = dict( + preprocess_cfg=preprocess_cfg, + test_cfg=dict(crop_size=(256, 256), stride=(170, 170))) diff --git a/configs/unet/pspnet_unet_s5-d16_64x64_40k_drive.py b/configs/unet/pspnet_unet_s5-d16_64x64_40k_drive.py index 765b18527..77a583cc0 100644 --- a/configs/unet/pspnet_unet_s5-d16_64x64_40k_drive.py +++ b/configs/unet/pspnet_unet_s5-d16_64x64_40k_drive.py @@ -2,4 +2,8 @@ _base_ = [ '../_base_/models/pspnet_unet_s5-d16.py', '../_base_/datasets/drive.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' ] -model = dict(test_cfg=dict(crop_size=(64, 64), stride=(42, 42))) +crop_size = (64, 64) +preprocess_cfg = dict(size=crop_size) +model = dict( + preprocess_cfg=preprocess_cfg, + test_cfg=dict(crop_size=(64, 64), stride=(42, 42))) diff --git a/configs/upernet/upernet_r50_512x1024_40k_cityscapes.py b/configs/upernet/upernet_r50_512x1024_40k_cityscapes.py index d621e89ce..7eaef49df 100644 --- a/configs/upernet/upernet_r50_512x1024_40k_cityscapes.py +++ b/configs/upernet/upernet_r50_512x1024_40k_cityscapes.py @@ -2,3 +2,6 @@ _base_ = [ '../_base_/models/upernet_r50.py', '../_base_/datasets/cityscapes.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' ] +crop_size = (512, 1024) +preprocess_cfg = dict(size=crop_size) +model = dict(preprocess_cfg=preprocess_cfg) diff --git a/configs/upernet/upernet_r50_512x1024_80k_cityscapes.py b/configs/upernet/upernet_r50_512x1024_80k_cityscapes.py index 95fffcc76..c4fb3166a 100644 --- a/configs/upernet/upernet_r50_512x1024_80k_cityscapes.py +++ b/configs/upernet/upernet_r50_512x1024_80k_cityscapes.py @@ -2,3 +2,6 @@ _base_ = [ '../_base_/models/upernet_r50.py', '../_base_/datasets/cityscapes.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' ] +crop_size = (512, 1024) +preprocess_cfg = dict(size=crop_size) +model = dict(preprocess_cfg=preprocess_cfg) diff --git a/configs/upernet/upernet_r50_512x512_160k_ade20k.py b/configs/upernet/upernet_r50_512x512_160k_ade20k.py index f5dd9aa4e..afcadee24 100644 --- a/configs/upernet/upernet_r50_512x512_160k_ade20k.py +++ b/configs/upernet/upernet_r50_512x512_160k_ade20k.py @@ -2,5 +2,9 @@ _base_ = [ '../_base_/models/upernet_r50.py', '../_base_/datasets/ade20k.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' ] +crop_size = (512, 512) +preprocess_cfg = dict(size=crop_size) model = dict( - decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150)) + preprocess_cfg=preprocess_cfg, + decode_head=dict(num_classes=150), + auxiliary_head=dict(num_classes=150)) diff --git a/configs/upernet/upernet_r50_512x512_20k_voc12aug.py b/configs/upernet/upernet_r50_512x512_20k_voc12aug.py index 95f5c0956..8749e8009 100644 --- a/configs/upernet/upernet_r50_512x512_20k_voc12aug.py +++ b/configs/upernet/upernet_r50_512x512_20k_voc12aug.py @@ -3,5 +3,9 @@ _base_ = [ '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_20k.py' ] +crop_size = (512, 512) +preprocess_cfg = dict(size=crop_size) model = dict( - decode_head=dict(num_classes=21), auxiliary_head=dict(num_classes=21)) + preprocess_cfg=preprocess_cfg, + decode_head=dict(num_classes=21), + auxiliary_head=dict(num_classes=21)) diff --git a/configs/upernet/upernet_r50_512x512_40k_voc12aug.py b/configs/upernet/upernet_r50_512x512_40k_voc12aug.py index 9621fd1f5..916f0459d 100644 --- a/configs/upernet/upernet_r50_512x512_40k_voc12aug.py +++ b/configs/upernet/upernet_r50_512x512_40k_voc12aug.py @@ -3,5 +3,9 @@ _base_ = [ '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' ] +crop_size = (512, 512) +preprocess_cfg = dict(size=crop_size) model = dict( - decode_head=dict(num_classes=21), auxiliary_head=dict(num_classes=21)) + preprocess_cfg=preprocess_cfg, + decode_head=dict(num_classes=21), + auxiliary_head=dict(num_classes=21)) diff --git a/configs/upernet/upernet_r50_512x512_80k_ade20k.py b/configs/upernet/upernet_r50_512x512_80k_ade20k.py index f561e309e..448306ced 100644 --- a/configs/upernet/upernet_r50_512x512_80k_ade20k.py +++ b/configs/upernet/upernet_r50_512x512_80k_ade20k.py @@ -2,5 +2,9 @@ _base_ = [ '../_base_/models/upernet_r50.py', '../_base_/datasets/ade20k.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' ] +crop_size = (512, 512) +preprocess_cfg = dict(size=crop_size) model = dict( - decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150)) + preprocess_cfg=preprocess_cfg, + decode_head=dict(num_classes=150), + auxiliary_head=dict(num_classes=150)) diff --git a/configs/upernet/upernet_r50_769x769_40k_cityscapes.py b/configs/upernet/upernet_r50_769x769_40k_cityscapes.py index 89b18aa28..3223f70d4 100644 --- a/configs/upernet/upernet_r50_769x769_40k_cityscapes.py +++ b/configs/upernet/upernet_r50_769x769_40k_cityscapes.py @@ -3,7 +3,10 @@ _base_ = [ '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' ] +crop_size = (769, 769) +preprocess_cfg = dict(size=crop_size) model = dict( + preprocess_cfg=preprocess_cfg, decode_head=dict(align_corners=True), auxiliary_head=dict(align_corners=True), test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513))) diff --git a/configs/upernet/upernet_r50_769x769_80k_cityscapes.py b/configs/upernet/upernet_r50_769x769_80k_cityscapes.py index 29af98f2e..7c2de3eb3 100644 --- a/configs/upernet/upernet_r50_769x769_80k_cityscapes.py +++ b/configs/upernet/upernet_r50_769x769_80k_cityscapes.py @@ -3,7 +3,10 @@ _base_ = [ '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' ] +crop_size = (769, 769) +preprocess_cfg = dict(size=crop_size) model = dict( + preprocess_cfg=preprocess_cfg, decode_head=dict(align_corners=True), auxiliary_head=dict(align_corners=True), test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513))) diff --git a/configs/vit/upernet_vit-b16_ln_mln_512x512_160k_ade20k.py b/configs/vit/upernet_vit-b16_ln_mln_512x512_160k_ade20k.py index 8c81bb726..1169fc436 100644 --- a/configs/vit/upernet_vit-b16_ln_mln_512x512_160k_ade20k.py +++ b/configs/vit/upernet_vit-b16_ln_mln_512x512_160k_ade20k.py @@ -3,8 +3,10 @@ _base_ = [ '../_base_/datasets/ade20k.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' ] - +crop_size = (512, 512) +preprocess_cfg = dict(size=crop_size) model = dict( + preprocess_cfg=preprocess_cfg, pretrained='pretrain/vit_base_patch16_224.pth', backbone=dict(drop_path_rate=0.1, final_norm=True), decode_head=dict(num_classes=150), diff --git a/configs/vit/upernet_vit-b16_mln_512x512_160k_ade20k.py b/configs/vit/upernet_vit-b16_mln_512x512_160k_ade20k.py index 66821e6c5..4abd804a5 100644 --- a/configs/vit/upernet_vit-b16_mln_512x512_160k_ade20k.py +++ b/configs/vit/upernet_vit-b16_mln_512x512_160k_ade20k.py @@ -3,8 +3,10 @@ _base_ = [ '../_base_/datasets/ade20k.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' ] - +crop_size = (512, 512) +preprocess_cfg = dict(size=crop_size) model = dict( + preprocess_cfg=preprocess_cfg, pretrained='pretrain/vit_base_patch16_224.pth', decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150)) diff --git a/configs/vit/upernet_vit-b16_mln_512x512_80k_ade20k.py b/configs/vit/upernet_vit-b16_mln_512x512_80k_ade20k.py index 70b8d35e9..f937fa8d5 100644 --- a/configs/vit/upernet_vit-b16_mln_512x512_80k_ade20k.py +++ b/configs/vit/upernet_vit-b16_mln_512x512_80k_ade20k.py @@ -3,8 +3,10 @@ _base_ = [ '../_base_/datasets/ade20k.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' ] - +crop_size = (512, 512) +preprocess_cfg = dict(size=crop_size) model = dict( + preprocess_cfg=preprocess_cfg, pretrained='pretrain/vit_base_patch16_224.pth', decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150)) diff --git a/mmseg/core/utils/__init__.py b/mmseg/core/utils/__init__.py index 28882893a..c4bcc13cf 100644 --- a/mmseg/core/utils/__init__.py +++ b/mmseg/core/utils/__init__.py @@ -1,5 +1,5 @@ # Copyright (c) OpenMMLab. All rights reserved. from .dist_util import check_dist_init, sync_random_seed -from .misc import add_prefix +from .misc import add_prefix, stack_batch -__all__ = ['add_prefix', 'check_dist_init', 'sync_random_seed'] +__all__ = ['add_prefix', 'check_dist_init', 'sync_random_seed', 'stack_batch'] diff --git a/mmseg/core/utils/misc.py b/mmseg/core/utils/misc.py index 282bb8d96..209795729 100644 --- a/mmseg/core/utils/misc.py +++ b/mmseg/core/utils/misc.py @@ -1,4 +1,12 @@ # Copyright (c) OpenMMLab. All rights reserved. +from typing import List, Union + +import torch +import torch.nn.functional as F + +from mmseg.core import SegDataSample + + def add_prefix(inputs, prefix): """Add prefix for dict. @@ -16,3 +24,71 @@ def add_prefix(inputs, prefix): outputs[f'{prefix}.{name}'] = value return outputs + + +def stack_batch(inputs: List[torch.Tensor], + batch_data_samples: List[SegDataSample], + size: tuple = None, + pad_value: Union[int, float] = 0, + seg_pad_val: Union[int, float] = 255, + padding_mode: str = 'constant') -> torch.Tensor: + """Stack multiple inputs to form a batch and pad the images and gt_sem_segs + to the max shape use the right bottom padding mode. + + Args: + inputs (List[Tensor]): The input multiple tensors. each is a + CHW 3D-tensor. + batch_data_samples (list[:obj:`SegDataSample`]): The Data + Samples. It usually includes information such as `gt_sem_seg`. + size (tuple): The img crop size. + pad_value (int, float): The padding value. Defaults to 0 + seg_pad_val (int, float): The padding value. Defaults to 255 + padding_mode (str): Type of padding. Default: constant. + - constant: pads with a constant value, this value is specified + with pad_val. + + Returns: + Tensor: The 4D-tensor. + batch_data_samples (list[:obj:`SegDataSample`]): After the padding of + the gt_seg_map. + """ + assert isinstance(inputs, list), \ + f'Expected input type to be list, but got {type(inputs)}' + assert len(set([tensor.ndim for tensor in inputs])) == 1, \ + f'Expected the dimensions of all inputs must be the same, ' \ + f'but got {[tensor.ndim for tensor in inputs]}' + assert inputs[0].ndim == 3, f'Expected tensor dimension to be 3, ' \ + f'but got {inputs[0].ndim}' + assert len(set([tensor.shape[0] for tensor in inputs])) == 1, \ + f'Expected the channels of all inputs must be the same, ' \ + f'but got {[tensor.shape[0] for tensor in inputs]}' + + padded_samples = [] + + for tensor, data_sample in zip(inputs, batch_data_samples): + if size is not None: + width = max(size[-1] - tensor.shape[-1], 0) + height = max(size[-2] - tensor.shape[-2], 0) + padding_size = (0, width, 0, height) + else: + padding_size = [0, 0, 0, 0] + if sum(padding_size) == 0: + padded_samples.append(tensor) + else: + # pad img + pad_img = F.pad( + tensor, padding_size, mode=padding_mode, value=pad_value) + padded_samples.append(pad_img) + # pad gt_sem_seg + gt_sem_seg = data_sample.gt_sem_seg.data + gt_width = max(pad_img.shape[-1] - gt_sem_seg.shape[-1], 0) + gt_height = max(pad_img.shape[-2] - gt_sem_seg.shape[-2], 0) + padding_gt_size = (0, gt_width, 0, gt_height) + del data_sample.gt_sem_seg.data + data_sample.gt_sem_seg.data = F.pad( + gt_sem_seg, + padding_gt_size, + mode=padding_mode, + value=seg_pad_val) + + return torch.stack(padded_samples, dim=0), batch_data_samples diff --git a/mmseg/models/decode_heads/cascade_decode_head.py b/mmseg/models/decode_heads/cascade_decode_head.py index f7c3da0d6..ab590e1b4 100644 --- a/mmseg/models/decode_heads/cascade_decode_head.py +++ b/mmseg/models/decode_heads/cascade_decode_head.py @@ -16,40 +16,33 @@ class BaseCascadeDecodeHead(BaseDecodeHead, metaclass=ABCMeta): """Placeholder of forward function.""" pass - def forward_train(self, inputs, prev_output, img_metas, gt_semantic_seg, + def forward_train(self, inputs, prev_output, batch_data_samples, train_cfg): """Forward function for training. Args: inputs (list[Tensor]): List of multi-level img features. prev_output (Tensor): The output of previous decode head. - img_metas (list[dict]): List of image info dict where each dict - has: 'img_shape', 'scale_factor', 'flip', and may also contain - 'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'. - For details on the values of these keys see - `mmseg/datasets/pipelines/formatting.py:Collect`. - gt_semantic_seg (Tensor): Semantic segmentation masks - used if the architecture supports semantic segmentation task. + batch_data_samples (list[:obj:`SegDataSample`]): The seg + data samples. It usually includes information such + as `img_metas` or `gt_semantic_seg`. train_cfg (dict): The training config. Returns: dict[str, Tensor]: a dictionary of loss components """ seg_logits = self.forward(inputs, prev_output) - losses = self.losses(seg_logits, gt_semantic_seg) + losses = self.losses(seg_logits, batch_data_samples) return losses - def forward_test(self, inputs, prev_output, img_metas, test_cfg): + def forward_test(self, inputs, prev_output, batch_img_metas, test_cfg): """Forward function for testing. Args: inputs (list[Tensor]): List of multi-level img features. prev_output (Tensor): The output of previous decode head. - img_metas (list[dict]): List of image info dict where each dict - has: 'img_shape', 'scale_factor', 'flip', and may also contain - 'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'. - For details on the values of these keys see - `mmseg/datasets/pipelines/formatting.py:Collect`. + batch_img_metas (list[dict]): Meta information of each image, e.g., + image size, scaling factor, etc. test_cfg (dict): The testing config. Returns: diff --git a/mmseg/models/decode_heads/decode_head.py b/mmseg/models/decode_heads/decode_head.py index d08b1d0b6..3068a6d40 100644 --- a/mmseg/models/decode_heads/decode_head.py +++ b/mmseg/models/decode_heads/decode_head.py @@ -184,36 +184,29 @@ class BaseDecodeHead(BaseModule, metaclass=ABCMeta): """Placeholder of forward function.""" pass - def forward_train(self, inputs, img_metas, gt_semantic_seg, train_cfg): + def forward_train(self, inputs, batch_data_samples, train_cfg): """Forward function for training. Args: inputs (list[Tensor]): List of multi-level img features. - img_metas (list[dict]): List of image info dict where each dict - has: 'img_shape', 'scale_factor', 'flip', and may also contain - 'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'. - For details on the values of these keys see - `mmseg/datasets/pipelines/formatting.py:Collect`. - gt_semantic_seg (Tensor): Semantic segmentation masks - used if the architecture supports semantic segmentation task. + batch_data_samples (list[:obj:`SegDataSample`]): The seg + data samples. It usually includes information such + as `img_metas` or `gt_semantic_seg`. train_cfg (dict): The training config. Returns: dict[str, Tensor]: a dictionary of loss components """ seg_logits = self.forward(inputs) - losses = self.losses(seg_logits, gt_semantic_seg) + losses = self.losses(seg_logits, batch_data_samples) return losses - def forward_test(self, inputs, img_metas, test_cfg): + def forward_test(self, inputs, batch_img_metas, test_cfg): """Forward function for testing. Args: inputs (list[Tensor]): List of multi-level img features. - img_metas (list[dict]): List of image info dict where each dict - has: 'img_shape', 'scale_factor', 'flip', and may also contain - 'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'. - For details on the values of these keys see - `mmseg/datasets/pipelines/formatting.py:Collect`. + batch_img_metas (list[dict]): Meta information of each image, e.g., + image size, scaling factor, etc. test_cfg (dict): The testing config. Returns: @@ -229,8 +222,12 @@ class BaseDecodeHead(BaseModule, metaclass=ABCMeta): return output @force_fp32(apply_to=('seg_logit', )) - def losses(self, seg_logit, seg_label): + def losses(self, seg_logit, batch_data_samples): """Compute segmentation loss.""" + gt_semantic_segs = [ + data_sample.gt_sem_seg.data for data_sample in batch_data_samples + ] + seg_label = torch.stack(gt_semantic_segs, dim=0) loss = dict() seg_logit = resize( input=seg_logit, diff --git a/mmseg/models/segmentors/base.py b/mmseg/models/segmentors/base.py index 76dc8f075..bfd44a4f6 100644 --- a/mmseg/models/segmentors/base.py +++ b/mmseg/models/segmentors/base.py @@ -1,4 +1,5 @@ # Copyright (c) OpenMMLab. All rights reserved. +import copy import warnings from abc import ABCMeta, abstractmethod from collections import OrderedDict @@ -8,14 +9,53 @@ import numpy as np import torch import torch.distributed as dist from mmcv.runner import BaseModule, auto_fp16 +from mmengine.data import PixelData + +from mmseg.core import SegDataSample +from mmseg.core.utils import stack_batch class BaseSegmentor(BaseModule, metaclass=ABCMeta): - """Base class for segmentors.""" + """Base class for segmentors. - def __init__(self, init_cfg=None): + Args: + preprocess_cfg (dict, optional): Model preprocessing config + for processing the input data. it usually includes + ``to_rgb``, ``pad_size_divisor``, ``pad_value``, + ``mean`` and ``std``. Default to None. + init_cfg (dict, optional): the config to control the + initialization. Default to None. + """ + + def __init__(self, preprocess_cfg=None, init_cfg=None): super(BaseSegmentor, self).__init__(init_cfg) self.fp16_enabled = False + self.preprocess_cfg = preprocess_cfg + + self.pad_value = 0 + + if self.preprocess_cfg is not None: + assert isinstance(self.preprocess_cfg, dict) + self.preprocess_cfg = copy.deepcopy(self.preprocess_cfg) + + self.to_rgb = preprocess_cfg.get('to_rgb', False) + self.pad_value = preprocess_cfg.get('pad_value', 0) + self.size = preprocess_cfg.get('size') + self.seg_pad_val = preprocess_cfg.get('seg_pad_val', 255) + + self.register_buffer( + 'pixel_mean', + torch.tensor(preprocess_cfg['mean']).view(-1, 1, 1), False) + self.register_buffer( + 'pixel_std', + torch.tensor(preprocess_cfg['std']).view(-1, 1, 1), False) + else: + # Only used to provide device information + self.register_buffer('pixel_mean', torch.tensor(1), False) + + @property + def device(self): + return self.pixel_mean.device @property def with_neck(self): @@ -34,82 +74,157 @@ class BaseSegmentor(BaseModule, metaclass=ABCMeta): return hasattr(self, 'decode_head') and self.decode_head is not None @abstractmethod - def extract_feat(self, imgs): + def extract_feat(self, batch_inputs): """Placeholder for extract features from images.""" pass @abstractmethod - def encode_decode(self, img, img_metas): + def encode_decode(self, batch_inputs, batch_data_samples): """Placeholder for encode images with backbone and decode into a semantic segmentation map of the same size as input.""" pass - @abstractmethod - def forward_train(self, imgs, img_metas, **kwargs): + @auto_fp16(apply_to=('batch_inputs', )) + def forward_train(self, batch_inputs, batch_data_samples, **kwargs): """Placeholder for Forward function for training.""" pass @abstractmethod - def simple_test(self, img, img_meta, **kwargs): + def simple_test(self, batch_inputs, batch_img_metas, **kwargs): """Placeholder for single image test.""" pass @abstractmethod - def aug_test(self, imgs, img_metas, **kwargs): + def aug_test(self, batch_inputs, batch_img_metas, **kwargs): """Placeholder for augmentation test.""" pass - def forward_test(self, imgs, img_metas, **kwargs): + @auto_fp16(apply_to=('batch_inputs', )) + def forward_test(self, batch_inputs, batch_data_samples, **kwargs): """ Args: - imgs (List[Tensor]): the outer list indicates test-time + batch_inputs (List[Tensor]): the outer list indicates test-time augmentations and inner Tensor should have a shape NxCxHxW, which contains all images in the batch. - img_metas (List[List[dict]]): the outer list indicates test-time - augs (multiscale, flip, etc.) and the inner list indicates - images in a batch. + batch_data_samples (List[:obj:`SegDataSample`]): The Data + Samples. It usually includes information such as + `batch_img_metas`. """ - for var, name in [(imgs, 'imgs'), (img_metas, 'img_metas')]: - if not isinstance(var, list): - raise TypeError(f'{name} must be a list, but got ' - f'{type(var)}') - - num_augs = len(imgs) - if num_augs != len(img_metas): - raise ValueError(f'num of augmentations ({len(imgs)}) != ' - f'num of image meta ({len(img_metas)})') - # all images in the same aug batch all of the same ori_shape and pad - # shape - for img_meta in img_metas: - ori_shapes = [_['ori_shape'] for _ in img_meta] - assert all(shape == ori_shapes[0] for shape in ori_shapes) - img_shapes = [_['img_shape'] for _ in img_meta] - assert all(shape == img_shapes[0] for shape in img_shapes) - pad_shapes = [_['pad_shape'] for _ in img_meta] - assert all(shape == pad_shapes[0] for shape in pad_shapes) + batch_size = len(batch_data_samples) + batch_img_metas = [] + for batch_index in range(batch_size): + metainfo = batch_data_samples[batch_index].metainfo + metainfo['batch_input_shape'] = \ + tuple(batch_inputs[batch_index].size()[-2:]) + batch_img_metas.append(metainfo) + # TODO: support aug_test + num_augs = 1 if num_augs == 1: - return self.simple_test(imgs[0], img_metas[0], **kwargs) + return self.simple_test( + torch.unsqueeze(batch_inputs[0], 0), batch_img_metas, **kwargs) else: - return self.aug_test(imgs, img_metas, **kwargs) + # TODO: refactor and support aug test later + return self.aug_test(batch_inputs, batch_img_metas, **kwargs) - @auto_fp16(apply_to=('img', )) - def forward(self, img, img_metas, return_loss=True, **kwargs): + def forward(self, data, return_loss=False, **kwargs): """Calls either :func:`forward_train` or :func:`forward_test` depending on whether ``return_loss`` is ``True``. - Note this setting will change the expected inputs. When - ``return_loss=True``, img and img_meta are single-nested (i.e. Tensor - and List[dict]), and when ``resturn_loss=False``, img and img_meta - should be double nested (i.e. List[Tensor], List[List[dict]]), with - the outer list indicating test time augmentations. - """ - if return_loss: - return self.forward_train(img, img_metas, **kwargs) - else: - return self.forward_test(img, img_metas, **kwargs) + Args: + data (list[dict]): The output of dataloader. + return_loss (bool): Whether to return loss. In general, + it will be set to True during training and False + during testing. Default to False. - def train_step(self, data_batch, optimizer, **kwargs): + Returns: + during training + dict: It should contain at least 3 keys: ``loss``, + ``log_vars``, ``num_samples``. + - ``loss`` is a tensor for back propagation, which can be a + weighted sum of multiple losses. + - ``log_vars`` contains all the variables to be sent to the + logger. + - ``num_samples`` indicates the batch size (when the model + is DDP, it means the batch size on each GPU), which is + used for averaging the logs. + + during testing + list[np.ndarray]: The predicted value obtained. + """ + batch_inputs, batch_data_samples = self.preprocss_data( + data, return_loss) + if return_loss: + losses = self.forward_train(batch_inputs, batch_data_samples, + **kwargs) + loss, log_vars = self._parse_losses(losses) + + outputs = dict( + loss=loss, + log_vars=log_vars, + num_samples=len(batch_data_samples)) + return outputs + else: + return self.forward_test(batch_inputs, batch_data_samples, + **kwargs) + + def preprocss_data(self, data, return_loss): + """ Process input data during training and simple testing phases. + Args: + data (list[dict]): The data to be processed, which + comes from dataloader. + return_loss (bool): Train or test. + + Returns: + tuple: It should contain 2 item. + - batch_inputs (Tensor): The batch input tensor. + - batch_data_samples (list[:obj:`SegDataSample`]): The Data + Samples. It usually includes information such as + `gt_sem_seg`. + """ + inputs = [data_['inputs'] for data_ in data] + data_samples = [data_['data_sample'] for data_ in data] + + batch_data_samples = [ + data_sample.to(self.device) for data_sample in data_samples + ] + inputs = [_input.to(self.device) for _input in inputs] + + if self.preprocess_cfg is None: + batch_inputs, batch_data_samples = stack_batch( + inputs, batch_data_samples) + return batch_inputs.float(), batch_data_samples + + if self.to_rgb and inputs[0].size(0) == 3: + inputs = [_input[[2, 1, 0], ...] for _input in inputs] + batch_inputs = [(_input - self.pixel_mean) / self.pixel_std + for _input in inputs] + if return_loss: + batch_inputs, batch_data_samples = stack_batch( + batch_inputs, batch_data_samples, self.size, self.pad_value, + self.seg_pad_val) + return batch_inputs, batch_data_samples + + def postprocess_result(self, results_dict: dict) -> list: + """ Convert results list to `SegDataSample`. + Args: + results_dict (dict): Segmentation results of + each image. It usually contain 'seg_logits' and 'pred_sem_seg' + + Returns: + dict: Segmentation results of the input images. + It usually contain 'seg_logits' and 'pred_sem_seg'. + """ + batch_datasampes = [ + SegDataSample() + for _ in range(results_dict['pred_sem_seg'].shape[0]) + ] + for key, value in results_dict.items(): + for i in range(value.shape[0]): + batch_datasampes[i].set_data({key: PixelData(data=value[i])}) + return batch_datasampes + + def train_step(self, data_batch, optim_wrapper, **kwargs): """The iteration step during training. This method defines an iteration step during training, except for the @@ -135,7 +250,7 @@ class BaseSegmentor(BaseModule, metaclass=ABCMeta): DDP, it means the batch size on each GPU), which is used for averaging the logs. """ - losses = self(**data_batch) + losses = self(data_batch, True) loss, log_vars = self._parse_losses(losses) outputs = dict( @@ -145,7 +260,7 @@ class BaseSegmentor(BaseModule, metaclass=ABCMeta): return outputs - def val_step(self, data_batch, optimizer=None, **kwargs): + def val_step(self, data_batch, optim_wrapper=None, **kwargs): """The iteration step during validation. This method shares the same signature as :func:`train_step`, but used @@ -167,6 +282,12 @@ class BaseSegmentor(BaseModule, metaclass=ABCMeta): return outputs + def test_step(self, data_batch): + """The iteration step during test.""" + predictions = self(data_batch) + + return predictions + @staticmethod def _parse_losses(losses): """Parse the raw outputs (losses) of the network. diff --git a/mmseg/models/segmentors/cascade_encoder_decoder.py b/mmseg/models/segmentors/cascade_encoder_decoder.py index 8e0676563..6d548e572 100644 --- a/mmseg/models/segmentors/cascade_encoder_decoder.py +++ b/mmseg/models/segmentors/cascade_encoder_decoder.py @@ -24,6 +24,7 @@ class CascadeEncoderDecoder(EncoderDecoder): auxiliary_head=None, train_cfg=None, test_cfg=None, + preprocess_cfg=None, pretrained=None, init_cfg=None): self.num_stages = num_stages @@ -34,6 +35,7 @@ class CascadeEncoderDecoder(EncoderDecoder): auxiliary_head=auxiliary_head, train_cfg=train_cfg, test_cfg=test_cfg, + preprocess_cfg=preprocess_cfg, pretrained=pretrained, init_cfg=init_cfg) @@ -47,41 +49,50 @@ class CascadeEncoderDecoder(EncoderDecoder): self.align_corners = self.decode_head[-1].align_corners self.num_classes = self.decode_head[-1].num_classes - def encode_decode(self, img, img_metas): + def encode_decode(self, batch_inputs, batch_img_metas): """Encode images with backbone and decode into a semantic segmentation map of the same size as input.""" - x = self.extract_feat(img) - out = self.decode_head[0].forward_test(x, img_metas, self.test_cfg) + x = self.extract_feat(batch_inputs) + out = self.decode_head[0].forward_test(x, batch_img_metas, + self.test_cfg) for i in range(1, self.num_stages): - out = self.decode_head[i].forward_test(x, out, img_metas, + out = self.decode_head[i].forward_test(x, out, batch_img_metas, self.test_cfg) out = resize( input=out, - size=img.shape[2:], + size=batch_inputs.shape[2:], mode='bilinear', align_corners=self.align_corners) return out - def _decode_head_forward_train(self, x, img_metas, gt_semantic_seg): + def _decode_head_forward_train(self, batch_inputs, batch_data_samples): """Run forward function and calculate loss for decode head in training.""" losses = dict() loss_decode = self.decode_head[0].forward_train( - x, img_metas, gt_semantic_seg, self.train_cfg) + batch_inputs, batch_data_samples, self.train_cfg) losses.update(add_prefix(loss_decode, 'decode_0')) + # get batch_img_metas + batch_size = len(batch_data_samples) + batch_img_metas = [] + for batch_index in range(batch_size): + metainfo = batch_data_samples[batch_index].metainfo + metainfo['batch_input_shape'] = \ + tuple(batch_inputs[batch_index].size()[-2:]) + batch_img_metas.append(metainfo) for i in range(1, self.num_stages): # forward test again, maybe unnecessary for most methods. if i == 1: prev_outputs = self.decode_head[0].forward_test( - x, img_metas, self.test_cfg) + batch_inputs, batch_img_metas, self.test_cfg) else: prev_outputs = self.decode_head[i - 1].forward_test( - x, prev_outputs, img_metas, self.test_cfg) + batch_inputs, prev_outputs, batch_img_metas, self.test_cfg) loss_decode = self.decode_head[i].forward_train( - x, prev_outputs, img_metas, gt_semantic_seg, self.train_cfg) + batch_inputs, prev_outputs, batch_data_samples, self.train_cfg) losses.update(add_prefix(loss_decode, f'decode_{i}')) return losses diff --git a/mmseg/models/segmentors/encoder_decoder.py b/mmseg/models/segmentors/encoder_decoder.py index c22705f7e..1da7f0716 100644 --- a/mmseg/models/segmentors/encoder_decoder.py +++ b/mmseg/models/segmentors/encoder_decoder.py @@ -1,5 +1,4 @@ # Copyright (c) OpenMMLab. All rights reserved. -import torch import torch.nn as nn import torch.nn.functional as F @@ -25,9 +24,11 @@ class EncoderDecoder(BaseSegmentor): auxiliary_head=None, train_cfg=None, test_cfg=None, + preprocess_cfg=None, pretrained=None, init_cfg=None): - super(EncoderDecoder, self).__init__(init_cfg) + super(EncoderDecoder, self).__init__( + preprocess_cfg=preprocess_cfg, init_cfg=init_cfg) if pretrained is not None: assert backbone.get('pretrained') is None, \ 'both backbone and segmentor set pretrained weight' @@ -59,113 +60,119 @@ class EncoderDecoder(BaseSegmentor): else: self.auxiliary_head = MODELS.build(auxiliary_head) - def extract_feat(self, img): + def extract_feat(self, batch_inputs): """Extract features from images.""" - x = self.backbone(img) + x = self.backbone(batch_inputs) if self.with_neck: x = self.neck(x) return x - def encode_decode(self, img, img_metas): + def encode_decode(self, batch_inputs, batch_img_metas): """Encode images with backbone and decode into a semantic segmentation map of the same size as input.""" - x = self.extract_feat(img) - out = self._decode_head_forward_test(x, img_metas) + x = self.extract_feat(batch_inputs) + out = self._decode_head_forward_test(x, batch_img_metas) out = resize( input=out, - size=img.shape[2:], + size=batch_inputs.shape[2:], mode='bilinear', align_corners=self.align_corners) return out - def _decode_head_forward_train(self, x, img_metas, gt_semantic_seg): + def _decode_head_forward_train(self, batch_inputs, batch_data_samples): """Run forward function and calculate loss for decode head in training.""" losses = dict() - loss_decode = self.decode_head.forward_train(x, img_metas, - gt_semantic_seg, + loss_decode = self.decode_head.forward_train(batch_inputs, + batch_data_samples, self.train_cfg) losses.update(add_prefix(loss_decode, 'decode')) return losses - def _decode_head_forward_test(self, x, img_metas): + def _decode_head_forward_test(self, batch_inputs, batch_img_metas): """Run forward function and calculate loss for decode head in inference.""" - seg_logits = self.decode_head.forward_test(x, img_metas, self.test_cfg) + seg_logits = self.decode_head.forward_test(batch_inputs, + batch_img_metas, + self.test_cfg) return seg_logits - def _auxiliary_head_forward_train(self, x, img_metas, gt_semantic_seg): + def _auxiliary_head_forward_train(self, batch_inputs, batch_data_samples): """Run forward function and calculate loss for auxiliary head in training.""" losses = dict() if isinstance(self.auxiliary_head, nn.ModuleList): for idx, aux_head in enumerate(self.auxiliary_head): - loss_aux = aux_head.forward_train(x, img_metas, - gt_semantic_seg, + loss_aux = aux_head.forward_train(batch_inputs, + batch_data_samples, self.train_cfg) losses.update(add_prefix(loss_aux, f'aux_{idx}')) else: loss_aux = self.auxiliary_head.forward_train( - x, img_metas, gt_semantic_seg, self.train_cfg) + batch_inputs, batch_data_samples, self.train_cfg) losses.update(add_prefix(loss_aux, 'aux')) return losses - def forward_dummy(self, img): + def forward_dummy(self, batch_inputs, batch_img_metas): """Dummy forward function.""" - seg_logit = self.encode_decode(img, None) + seg_logit = self.encode_decode(batch_inputs, batch_img_metas) return seg_logit - def forward_train(self, img, img_metas, gt_semantic_seg): + def forward_train(self, batch_inputs, batch_data_samples): """Forward function for training. Args: img (Tensor): Input images. - img_metas (list[dict]): List of image info dict where each dict - has: 'img_shape', 'scale_factor', 'flip', and may also contain - 'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'. - For details on the values of these keys see - `mmseg/datasets/pipelines/formatting.py:Collect`. - gt_semantic_seg (Tensor): Semantic segmentation masks - used if the architecture supports semantic segmentation task. + batch_data_samples (list[:obj:`SegDataSample`]): The seg + data samples. It usually includes information such + as `img_metas` or `gt_semantic_seg`. Returns: dict[str, Tensor]: a dictionary of loss components """ - x = self.extract_feat(img) + x = self.extract_feat(batch_inputs) losses = dict() - loss_decode = self._decode_head_forward_train(x, img_metas, - gt_semantic_seg) + loss_decode = self._decode_head_forward_train(x, batch_data_samples) losses.update(loss_decode) if self.with_auxiliary_head: loss_aux = self._auxiliary_head_forward_train( - x, img_metas, gt_semantic_seg) + x, batch_data_samples) losses.update(loss_aux) return losses # TODO refactor - def slide_inference(self, img, img_meta, rescale): + def slide_inference(self, batch_inputs, batch_img_metas, rescale): """Inference by sliding-window with overlap. If h_crop > h_img or w_crop > w_img, the small patch will be used to decode without padding. + + Args: + batch_inputs (tensor): the tensor should have a shape NxCxHxW, + which contains all images in the batch. + batch_img_metas (list[dict]): Meta information of each image, e.g., + image size, scaling factor, etc. + + Returns: + tensor: get seg_logit. """ h_stride, w_stride = self.test_cfg.stride h_crop, w_crop = self.test_cfg.crop_size - batch_size, _, h_img, w_img = img.size() + batch_size, _, h_img, w_img = batch_inputs.size() num_classes = self.num_classes h_grids = max(h_img - h_crop + h_stride - 1, 0) // h_stride + 1 w_grids = max(w_img - w_crop + w_stride - 1, 0) // w_stride + 1 - preds = img.new_zeros((batch_size, num_classes, h_img, w_img)) - count_mat = img.new_zeros((batch_size, 1, h_img, w_img)) + preds = batch_inputs.new_zeros((batch_size, num_classes, h_img, w_img)) + count_mat = batch_inputs.new_zeros((batch_size, 1, h_img, w_img)) for h_idx in range(h_grids): for w_idx in range(w_grids): y1 = h_idx * h_stride @@ -174,38 +181,30 @@ class EncoderDecoder(BaseSegmentor): x2 = min(x1 + w_crop, w_img) y1 = max(y2 - h_crop, 0) x1 = max(x2 - w_crop, 0) - crop_img = img[:, :, y1:y2, x1:x2] - crop_seg_logit = self.encode_decode(crop_img, img_meta) + crop_img = batch_inputs[:, :, y1:y2, x1:x2] + crop_seg_logit = self.encode_decode(crop_img, batch_img_metas) preds += F.pad(crop_seg_logit, (int(x1), int(preds.shape[3] - x2), int(y1), int(preds.shape[2] - y2))) count_mat[:, :, y1:y2, x1:x2] += 1 assert (count_mat == 0).sum() == 0 - if torch.onnx.is_in_onnx_export(): - # cast count_mat to constant while exporting to ONNX - count_mat = torch.from_numpy( - count_mat.cpu().detach().numpy()).to(device=img.device) preds = preds / count_mat if rescale: preds = resize( preds, - size=img_meta[0]['ori_shape'][:2], + size=batch_img_metas[0]['ori_shape'][:2], mode='bilinear', align_corners=self.align_corners, warning=False) return preds - def whole_inference(self, img, img_meta, rescale): + def whole_inference(self, batch_inputs, batch_img_metas, rescale): """Inference with full image.""" - seg_logit = self.encode_decode(img, img_meta) + seg_logit = self.encode_decode(batch_inputs, batch_img_metas) if rescale: - # support dynamic shape for onnx - if torch.onnx.is_in_onnx_export(): - size = img.shape[2:] - else: - size = img_meta[0]['ori_shape'][:2] + size = batch_img_metas[0]['ori_shape'][:2] seg_logit = resize( seg_logit, size=size, @@ -215,13 +214,13 @@ class EncoderDecoder(BaseSegmentor): return seg_logit - def inference(self, img, img_meta, rescale): + def inference(self, batch_inputs, batch_img_metas, rescale): """Inference with slide/whole style. Args: - img (Tensor): The input image of shape (N, 3, H, W). - img_meta (dict): Image info dict where each dict has: 'img_shape', - 'scale_factor', 'flip', and may also contain + batch_inputs (Tensor): The input image of shape (N, 3, H, W). + batch_img_metas (dict): Image info dict where each dict has: + 'img_shape', 'scale_factor', 'flip', and may also contain 'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'. For details on the values of these keys see `mmseg/datasets/pipelines/formatting.py:Collect`. @@ -232,16 +231,18 @@ class EncoderDecoder(BaseSegmentor): """ assert self.test_cfg.mode in ['slide', 'whole'] - ori_shape = img_meta[0]['ori_shape'] - assert all(_['ori_shape'] == ori_shape for _ in img_meta) + ori_shape = batch_img_metas[0]['ori_shape'] + assert all(_['ori_shape'] == ori_shape for _ in batch_img_metas) if self.test_cfg.mode == 'slide': - seg_logit = self.slide_inference(img, img_meta, rescale) + seg_logit = self.slide_inference(batch_inputs, batch_img_metas, + rescale) else: - seg_logit = self.whole_inference(img, img_meta, rescale) + seg_logit = self.whole_inference(batch_inputs, batch_img_metas, + rescale) output = F.softmax(seg_logit, dim=1) - flip = img_meta[0]['flip'] + flip = batch_img_metas[0].get('flip', None) if flip: - flip_direction = img_meta[0]['flip_direction'] + flip_direction = batch_img_metas[0]['flip_direction'] assert flip_direction in ['horizontal', 'vertical'] if flip_direction == 'horizontal': output = output.flip(dims=(3, )) @@ -250,20 +251,18 @@ class EncoderDecoder(BaseSegmentor): return output - def simple_test(self, img, img_meta, rescale=True): + def simple_test(self, batch_inputs, batch_img_metas, rescale=True): """Simple test with single image.""" - seg_logit = self.inference(img, img_meta, rescale) + results_dict = dict() + seg_logit = self.inference(batch_inputs, batch_img_metas, rescale) + results_dict['seg_logits'] = seg_logit seg_pred = seg_logit.argmax(dim=1) - if torch.onnx.is_in_onnx_export(): - # our inference backend only support 4D output - seg_pred = seg_pred.unsqueeze(0) - return seg_pred seg_pred = seg_pred.cpu().numpy() - # unravel batch dim - seg_pred = list(seg_pred) - return seg_pred + results_dict['pred_sem_seg'] = seg_pred + results_list = self.postprocess_result(results_dict) + return results_list - def aug_test(self, imgs, img_metas, rescale=True): + def aug_test(self, batch_inputs, batch_img_metas, rescale=True): """Test with augmentations. Only rescale=True is supported. @@ -271,13 +270,14 @@ class EncoderDecoder(BaseSegmentor): # aug_test rescale all imgs back to ori_shape for now assert rescale # to save memory, we get augmented seg logit inplace - seg_logit = self.inference(imgs[0], img_metas[0], rescale) - for i in range(1, len(imgs)): - cur_seg_logit = self.inference(imgs[i], img_metas[i], rescale) + seg_logit = self.inference(batch_inputs[0], batch_img_metas[0], + rescale) + for i in range(1, len(batch_inputs)): + cur_seg_logit = self.inference(batch_inputs[i], batch_img_metas[i], + rescale) seg_logit += cur_seg_logit - seg_logit /= len(imgs) + seg_logit /= len(batch_inputs) seg_pred = seg_logit.argmax(dim=1) - seg_pred = seg_pred.cpu().numpy() # unravel batch dim seg_pred = list(seg_pred) return seg_pred diff --git a/tests/test_models/test_forward.py b/tests/test_models/test_forward.py index ee707b351..2c5862997 100644 --- a/tests/test_models/test_forward.py +++ b/tests/test_models/test_forward.py @@ -9,42 +9,58 @@ import pytest import torch import torch.nn as nn from mmcv.cnn.utils import revert_sync_batchnorm +from mmengine.data import PixelData + +from mmseg.core import SegDataSample -def _demo_mm_inputs(input_shape=(2, 3, 8, 16), num_classes=10): +def _demo_mm_inputs(batch_size=2, image_shapes=(3, 128, 128), num_classes=10): """Create a superset of inputs needed to run test or train batches. Args: - input_shape (tuple): - input batch dimensions - - num_classes (int): - number of semantic classes + batch_size (int): batch size. Default to 2. + image_shapes (List[tuple], Optional): image shape. + Default to (3, 128, 128) + num_classes (int): number of different labels a + box might have. Default to 10. """ - (N, C, H, W) = input_shape + if isinstance(image_shapes, list): + assert len(image_shapes) == batch_size + else: + image_shapes = [image_shapes] * batch_size - rng = np.random.RandomState(0) + packed_inputs = [] + for idx in range(batch_size): + image_shape = image_shapes[idx] + c, h, w = image_shape + image = np.random.randint(0, 255, size=image_shape, dtype=np.uint8) - imgs = rng.rand(*input_shape) - segs = rng.randint( - low=0, high=num_classes - 1, size=(N, 1, H, W)).astype(np.uint8) + mm_inputs = dict() + mm_inputs['inputs'] = torch.from_numpy(image) - img_metas = [{ - 'img_shape': (H, W, C), - 'ori_shape': (H, W, C), - 'pad_shape': (H, W, C), - 'filename': '.png', - 'scale_factor': 1.0, - 'flip': False, - 'flip_direction': 'horizontal' - } for _ in range(N)] + img_meta = { + 'img_id': idx, + 'img_shape': image_shape, + 'ori_shape': image_shape, + 'pad_shape': image_shape, + 'filename': '.png', + 'scale_factor': 1.0, + 'flip': False, + 'flip_direction': None, + } - mm_inputs = { - 'imgs': torch.FloatTensor(imgs), - 'img_metas': img_metas, - 'gt_semantic_seg': torch.LongTensor(segs) - } - return mm_inputs + data_sample = SegDataSample() + data_sample.set_metainfo(img_meta) + + gt_semantic_seg = np.random.randint( + 0, num_classes, (1, h, w), dtype=np.uint8) + gt_semantic_seg = torch.LongTensor(gt_semantic_seg) + gt_sem_seg_data = dict(data=gt_semantic_seg) + data_sample.gt_sem_seg = PixelData(**gt_sem_seg_data) + mm_inputs['data_sample'] = data_sample + packed_inputs.append(mm_inputs) + + return packed_inputs def _get_config_directory(): @@ -146,20 +162,10 @@ def test_psanet_forward(): 'psanet/psanet_r50-d8_512x1024_40k_cityscapes.py') -def test_encnet_forward(): - _test_encoder_decoder_forward( - 'encnet/encnet_r50-d8_512x1024_40k_cityscapes.py') - - def test_sem_fpn_forward(): _test_encoder_decoder_forward('sem_fpn/fpn_r50_512x1024_80k_cityscapes.py') -def test_point_rend_forward(): - _test_encoder_decoder_forward( - 'point_rend/pointrend_r50_512x1024_80k_cityscapes.py') - - def test_mobilenet_v2_forward(): _test_encoder_decoder_forward( 'mobilenet_v2/pspnet_m-v2-d8_512x1024_80k_cityscapes.py') @@ -206,30 +212,23 @@ def _test_encoder_decoder_forward(cfg_file): else: num_classes = segmentor.decode_head.num_classes # batch_size=2 for BatchNorm - input_shape = (2, 3, 32, 32) - mm_inputs = _demo_mm_inputs(input_shape, num_classes=num_classes) - - imgs = mm_inputs.pop('imgs') - img_metas = mm_inputs.pop('img_metas') - gt_semantic_seg = mm_inputs['gt_semantic_seg'] - + packed_inputs = _demo_mm_inputs( + batch_size=2, image_shapes=(3, 32, 32), num_classes=num_classes) # convert to cuda Tensor if applicable if torch.cuda.is_available(): segmentor = segmentor.cuda() - imgs = imgs.cuda() - gt_semantic_seg = gt_semantic_seg.cuda() else: segmentor = revert_sync_batchnorm(segmentor) # Test forward train - losses = segmentor.forward( - imgs, img_metas, gt_semantic_seg=gt_semantic_seg, return_loss=True) + losses = segmentor.forward(packed_inputs, return_loss=True) assert isinstance(losses, dict) # Test forward test + packed_inputs = _demo_mm_inputs( + batch_size=1, image_shapes=(3, 32, 32), num_classes=num_classes) with torch.no_grad(): segmentor.eval() # pack into lists - img_list = [img[None, :] for img in imgs] - img_meta_list = [[img_meta] for img_meta in img_metas] - segmentor.forward(img_list, img_meta_list, return_loss=False) + batch_results = segmentor.forward(packed_inputs, return_loss=False) + assert len(batch_results) == 1