diff --git a/configs/_base_/datasets/ade20k.py b/configs/_base_/datasets/ade20k.py index efc8b4bb2..1b365f689 100644 --- a/configs/_base_/datasets/ade20k.py +++ b/configs/_base_/datasets/ade20k.py @@ -7,48 +7,39 @@ crop_size = (512, 512) train_pipeline = [ dict(type='LoadImageFromFile'), dict(type='LoadAnnotations', reduce_zero_label=True), - dict(type='Resize', img_scale=(2048, 512), ratio_range=(0.5, 2.0)), + dict(type='RandomResize', scale=(2048, 512), ratio_range=(0.5, 2.0)), dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), dict(type='RandomFlip', prob=0.5), dict(type='PhotoMetricDistortion'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_semantic_seg']), + dict(type='Pad', size=crop_size), + dict(type='PackSegInputs') ] test_pipeline = [ dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(2048, 512), - # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) + dict(type='Resize', scale=(2048, 512), keep_ratio=True), + dict(type='PackSegInputs') ] -data = dict( - samples_per_gpu=4, - workers_per_gpu=4, - train=dict( +train_dataloader = dict( + batch_size=4, + num_workers=4, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( type=dataset_type, data_root=data_root, - img_dir='images/training', - ann_dir='annotations/training', - pipeline=train_pipeline), - val=dict( + data_prefix=dict( + img_path='images/training', seg_map_path='annotations/training'), + pipeline=train_pipeline)) +val_dataloader = dict( + batch_size=4, + num_workers=4, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=False), + dataset=dict( type=dataset_type, data_root=data_root, - img_dir='images/validation', - ann_dir='annotations/validation', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - data_root=data_root, - img_dir='images/validation', - ann_dir='annotations/validation', + data_prefix=dict( + img_path='images/validation', + seg_map_path='annotations/validation'), pipeline=test_pipeline)) +test_dataloader = val_dataloader diff --git a/configs/_base_/datasets/ade20k_640x640.py b/configs/_base_/datasets/ade20k_640x640.py index 14a4bb092..2392cd3e9 100644 --- a/configs/_base_/datasets/ade20k_640x640.py +++ b/configs/_base_/datasets/ade20k_640x640.py @@ -7,48 +7,39 @@ crop_size = (640, 640) train_pipeline = [ dict(type='LoadImageFromFile'), dict(type='LoadAnnotations', reduce_zero_label=True), - dict(type='Resize', img_scale=(2560, 640), ratio_range=(0.5, 2.0)), + dict(type='RandomResize', scale=(2560, 640), ratio_range=(0.5, 2.0)), dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), dict(type='RandomFlip', prob=0.5), dict(type='PhotoMetricDistortion'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_semantic_seg']), + dict(type='Pad', size=crop_size), + dict(type='PackSegInputs') ] test_pipeline = [ dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(2560, 640), - # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) + dict(type='Resize', scale=(2560, 640), keep_ratio=True), + dict(type='PackSegInputs') ] -data = dict( - samples_per_gpu=4, - workers_per_gpu=4, - train=dict( +train_dataloader = dict( + batch_size=4, + num_workers=4, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( type=dataset_type, data_root=data_root, - img_dir='images/training', - ann_dir='annotations/training', - pipeline=train_pipeline), - val=dict( + data_prefix=dict( + img_path='images/training', seg_map_path='annotations/training'), + pipeline=train_pipeline)) +val_dataloader = dict( + batch_size=4, + num_workers=4, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=False), + dataset=dict( type=dataset_type, data_root=data_root, - img_dir='images/validation', - ann_dir='annotations/validation', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - data_root=data_root, - img_dir='images/validation', - ann_dir='annotations/validation', + data_prefix=dict( + img_path='images/validation', + seg_map_path='annotations/validation'), pipeline=test_pipeline)) +test_dataloader = val_dataloader diff --git a/configs/_base_/datasets/chase_db1.py b/configs/_base_/datasets/chase_db1.py index 298594ea9..17f39b7e1 100644 --- a/configs/_base_/datasets/chase_db1.py +++ b/configs/_base_/datasets/chase_db1.py @@ -8,52 +8,42 @@ crop_size = (128, 128) train_pipeline = [ dict(type='LoadImageFromFile'), dict(type='LoadAnnotations'), - dict(type='Resize', img_scale=img_scale, ratio_range=(0.5, 2.0)), + dict(type='RandomResize', scale=img_scale, ratio_range=(0.5, 2.0)), dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), dict(type='RandomFlip', prob=0.5), dict(type='PhotoMetricDistortion'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_semantic_seg']) + dict(type='Pad', size=crop_size), + dict(type='PackSegInputs') ] test_pipeline = [ dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=img_scale, - # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75, 2.0], - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']) - ]) + dict(type='Resize', scale=img_scale, keep_ratio=True), + dict(type='PackSegInputs') ] -data = dict( - samples_per_gpu=4, - workers_per_gpu=4, - train=dict( - type='RepeatDataset', - times=40000, - dataset=dict( - type=dataset_type, - data_root=data_root, - img_dir='images/training', - ann_dir='annotations/training', - pipeline=train_pipeline)), - val=dict( +train_dataloader = dict( + batch_size=4, + num_workers=4, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + type='RepeatDataset', + times=40000, + dataset=dict( type=dataset_type, data_root=data_root, - img_dir='images/validation', - ann_dir='annotations/validation', - pipeline=test_pipeline), - test=dict( + data_prefix=dict( + img_path='images/training', seg_map_path='annotations/training'), + pipeline=train_pipeline)) +val_dataloader = dict( + batch_size=4, + num_workers=4, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=False), + dataset=dict( type=dataset_type, data_root=data_root, - img_dir='images/validation', - ann_dir='annotations/validation', + data_prefix=dict( + img_path='images/validation', + seg_map_path='annotations/validation'), pipeline=test_pipeline)) +test_dataloader = val_dataloader diff --git a/configs/_base_/datasets/cityscapes.py b/configs/_base_/datasets/cityscapes.py index f21867c63..893652f88 100644 --- a/configs/_base_/datasets/cityscapes.py +++ b/configs/_base_/datasets/cityscapes.py @@ -7,48 +7,38 @@ crop_size = (512, 1024) train_pipeline = [ dict(type='LoadImageFromFile'), dict(type='LoadAnnotations'), - dict(type='Resize', img_scale=(2048, 1024), ratio_range=(0.5, 2.0)), + dict(type='RandomResize', scale=(2048, 1024), ratio_range=(0.5, 2.0)), dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), dict(type='RandomFlip', prob=0.5), dict(type='PhotoMetricDistortion'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_semantic_seg']), + dict(type='Pad', size=crop_size), + dict(type='PackSegInputs') ] test_pipeline = [ dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(2048, 1024), - # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) + dict(type='Resize', scale=(2048, 1024), keep_ratio=True), + dict(type='PackSegInputs') ] -data = dict( - samples_per_gpu=2, - workers_per_gpu=2, - train=dict( +train_dataloader = dict( + batch_size=2, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( type=dataset_type, data_root=data_root, - img_dir='leftImg8bit/train', - ann_dir='gtFine/train', - pipeline=train_pipeline), - val=dict( + data_prefix=dict( + img_path='leftImg8bit/train', seg_map_path='gtFine/train'), + pipeline=train_pipeline)) +val_dataloader = dict( + batch_size=4, + num_workers=4, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=False), + dataset=dict( type=dataset_type, data_root=data_root, - img_dir='leftImg8bit/val', - ann_dir='gtFine/val', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - data_root=data_root, - img_dir='leftImg8bit/val', - ann_dir='gtFine/val', + data_prefix=dict( + img_path='leftImg8bit/val', seg_map_path='gtFine/val'), pipeline=test_pipeline)) +test_dataloader = val_dataloader diff --git a/configs/_base_/datasets/cityscapes_1024x1024.py b/configs/_base_/datasets/cityscapes_1024x1024.py index f98d92972..9bff81433 100644 --- a/configs/_base_/datasets/cityscapes_1024x1024.py +++ b/configs/_base_/datasets/cityscapes_1024x1024.py @@ -5,31 +5,18 @@ crop_size = (1024, 1024) train_pipeline = [ dict(type='LoadImageFromFile'), dict(type='LoadAnnotations'), - dict(type='Resize', img_scale=(2048, 1024), ratio_range=(0.5, 2.0)), + dict(type='RandomResize', scale=(2048, 1024), ratio_range=(0.5, 2.0)), dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), dict(type='RandomFlip', prob=0.5), dict(type='PhotoMetricDistortion'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_semantic_seg']), + dict(type='Pad', size=crop_size), + dict(type='PackSegInputs') ] test_pipeline = [ dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(2048, 1024), - # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) + dict(type='Resize', scale=(2048, 1024), keep_ratio=True), + dict(type='PackSegInputs') ] -data = dict( - train=dict(pipeline=train_pipeline), - val=dict(pipeline=test_pipeline), - test=dict(pipeline=test_pipeline)) +train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) +val_dataloader = dict(dataset=dict(pipeline=test_pipeline)) +test_dataloader = val_dataloader diff --git a/configs/_base_/datasets/cityscapes_768x768.py b/configs/_base_/datasets/cityscapes_768x768.py index fde9d7c7d..1eaa801a7 100644 --- a/configs/_base_/datasets/cityscapes_768x768.py +++ b/configs/_base_/datasets/cityscapes_768x768.py @@ -5,31 +5,18 @@ crop_size = (768, 768) train_pipeline = [ dict(type='LoadImageFromFile'), dict(type='LoadAnnotations'), - dict(type='Resize', img_scale=(2049, 1025), ratio_range=(0.5, 2.0)), + dict(type='RandomResize', scale=(2049, 1025), ratio_range=(0.5, 2.0)), dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), dict(type='RandomFlip', prob=0.5), dict(type='PhotoMetricDistortion'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_semantic_seg']), + dict(type='Pad', size=crop_size), + dict(type='PackSegInputs') ] test_pipeline = [ dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(2049, 1025), - # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) + dict(type='Resize', scale=(2049, 1025), keep_ratio=True), + dict(type='PackSegInputs') ] -data = dict( - train=dict(pipeline=train_pipeline), - val=dict(pipeline=test_pipeline), - test=dict(pipeline=test_pipeline)) +train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) +val_dataloader = dict(dataset=dict(pipeline=test_pipeline)) +test_dataloader = val_dataloader diff --git a/configs/_base_/datasets/cityscapes_769x769.py b/configs/_base_/datasets/cityscapes_769x769.py index 336c7b254..be0b80c76 100644 --- a/configs/_base_/datasets/cityscapes_769x769.py +++ b/configs/_base_/datasets/cityscapes_769x769.py @@ -5,31 +5,18 @@ crop_size = (769, 769) train_pipeline = [ dict(type='LoadImageFromFile'), dict(type='LoadAnnotations'), - dict(type='Resize', img_scale=(2049, 1025), ratio_range=(0.5, 2.0)), + dict(type='RandomResize', scale=(2049, 1025), ratio_range=(0.5, 2.0)), dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), dict(type='RandomFlip', prob=0.5), dict(type='PhotoMetricDistortion'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_semantic_seg']), + dict(type='Pad', size=crop_size), + dict(type='PackSegInputs') ] test_pipeline = [ dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(2049, 1025), - # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) + dict(type='Resize', scale=(2049, 1025), keep_ratio=True), + dict(type='PackSegInputs') ] -data = dict( - train=dict(pipeline=train_pipeline), - val=dict(pipeline=test_pipeline), - test=dict(pipeline=test_pipeline)) +train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) +val_dataloader = dict(dataset=dict(pipeline=test_pipeline)) +test_dataloader = val_dataloader diff --git a/configs/_base_/datasets/cityscapes_832x832.py b/configs/_base_/datasets/cityscapes_832x832.py index b9325cc00..52dc58099 100644 --- a/configs/_base_/datasets/cityscapes_832x832.py +++ b/configs/_base_/datasets/cityscapes_832x832.py @@ -5,31 +5,18 @@ crop_size = (832, 832) train_pipeline = [ dict(type='LoadImageFromFile'), dict(type='LoadAnnotations'), - dict(type='Resize', img_scale=(2048, 1024), ratio_range=(0.5, 2.0)), + dict(type='RandomResize', scale=(2048, 1024), ratio_range=(0.5, 2.0)), dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), dict(type='RandomFlip', prob=0.5), dict(type='PhotoMetricDistortion'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_semantic_seg']), + dict(type='Pad', size=crop_size), + dict(type='PackSegInputs') ] test_pipeline = [ dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(2048, 1024), - # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) + dict(type='Resize', scale=(2048, 1024), keep_ratio=True), + dict(type='PackSegInputs') ] -data = dict( - train=dict(pipeline=train_pipeline), - val=dict(pipeline=test_pipeline), - test=dict(pipeline=test_pipeline)) +train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) +val_dataloader = dict(dataset=dict(pipeline=test_pipeline)) +test_dataloader = val_dataloader diff --git a/configs/_base_/datasets/coco-stuff10k.py b/configs/_base_/datasets/coco-stuff10k.py index ec0496928..ef824eba1 100644 --- a/configs/_base_/datasets/coco-stuff10k.py +++ b/configs/_base_/datasets/coco-stuff10k.py @@ -7,51 +7,40 @@ crop_size = (512, 512) train_pipeline = [ dict(type='LoadImageFromFile'), dict(type='LoadAnnotations', reduce_zero_label=True), - dict(type='Resize', img_scale=(2048, 512), ratio_range=(0.5, 2.0)), + dict(type='RandomResize', scale=(2048, 512), ratio_range=(0.5, 2.0)), dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), dict(type='RandomFlip', prob=0.5), dict(type='PhotoMetricDistortion'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_semantic_seg']), + dict(type='Pad', size=crop_size), + dict(type='PackSegInputs') ] test_pipeline = [ dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(2048, 512), - # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) + dict(type='Resize', scale=(2048, 512), keep_ratio=True), + dict(type='PackSegInputs') ] -data = dict( - samples_per_gpu=4, - workers_per_gpu=4, - train=dict( +train_dataloader = dict( + batch_size=4, + num_workers=4, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( type=dataset_type, data_root=data_root, reduce_zero_label=True, - img_dir='images/train2014', - ann_dir='annotations/train2014', - pipeline=train_pipeline), - val=dict( + data_prefix=dict( + img_path='images/train2014', seg_map_path='annotations/train2014'), + pipeline=train_pipeline)) +val_dataloader = dict( + batch_size=4, + num_workers=4, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=False), + dataset=dict( type=dataset_type, data_root=data_root, reduce_zero_label=True, - img_dir='images/test2014', - ann_dir='annotations/test2014', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - data_root=data_root, - reduce_zero_label=True, - img_dir='images/test2014', - ann_dir='annotations/test2014', + data_prefix=dict( + img_path='images/test2014', seg_map_path='annotations/test2014'), pipeline=test_pipeline)) +test_dataloader = val_dataloader diff --git a/configs/_base_/datasets/coco-stuff164k.py b/configs/_base_/datasets/coco-stuff164k.py index a6a38f2ac..4b986045b 100644 --- a/configs/_base_/datasets/coco-stuff164k.py +++ b/configs/_base_/datasets/coco-stuff164k.py @@ -7,48 +7,38 @@ crop_size = (512, 512) train_pipeline = [ dict(type='LoadImageFromFile'), dict(type='LoadAnnotations'), - dict(type='Resize', img_scale=(2048, 512), ratio_range=(0.5, 2.0)), + dict(type='RandomResize', scale=(2048, 512), ratio_range=(0.5, 2.0)), dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), dict(type='RandomFlip', prob=0.5), dict(type='PhotoMetricDistortion'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_semantic_seg']), + dict(type='Pad', size=crop_size), + dict(type='PackSegInputs') ] test_pipeline = [ dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(2048, 512), - # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) + dict(type='Resize', scale=(2048, 512), keep_ratio=True), + dict(type='PackSegInputs') ] -data = dict( - samples_per_gpu=4, - workers_per_gpu=4, - train=dict( +train_dataloader = dict( + batch_size=4, + num_workers=4, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( type=dataset_type, data_root=data_root, - img_dir='images/train2017', - ann_dir='annotations/train2017', - pipeline=train_pipeline), - val=dict( + data_prefix=dict( + img_path='images/train2017', seg_map_path='annotations/val2017'), + pipeline=train_pipeline)) +val_dataloader = dict( + batch_size=4, + num_workers=4, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=False), + dataset=dict( type=dataset_type, data_root=data_root, - img_dir='images/val2017', - ann_dir='annotations/val2017', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - data_root=data_root, - img_dir='images/val2017', - ann_dir='annotations/val2017', + data_prefix=dict( + img_path='images/val2017', seg_map_path='annotations/val2017'), pipeline=test_pipeline)) +test_dataloader = val_dataloader diff --git a/configs/_base_/datasets/drive.py b/configs/_base_/datasets/drive.py index 06e8ff606..00d17f51a 100644 --- a/configs/_base_/datasets/drive.py +++ b/configs/_base_/datasets/drive.py @@ -8,52 +8,41 @@ crop_size = (64, 64) train_pipeline = [ dict(type='LoadImageFromFile'), dict(type='LoadAnnotations'), - dict(type='Resize', img_scale=img_scale, ratio_range=(0.5, 2.0)), + dict(type='RandomResize', scale=img_scale, ratio_range=(0.5, 2.0)), dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), dict(type='RandomFlip', prob=0.5), dict(type='PhotoMetricDistortion'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_semantic_seg']) + dict(type='Pad', size=crop_size), + dict(type='PackSegInputs') ] test_pipeline = [ dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=img_scale, - # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75, 2.0], - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']) - ]) + dict(type='Resize', scale=img_scale, keep_ratio=True), + dict(type='PackSegInputs') ] - -data = dict( - samples_per_gpu=4, - workers_per_gpu=4, - train=dict( - type='RepeatDataset', - times=40000, - dataset=dict( - type=dataset_type, - data_root=data_root, - img_dir='images/training', - ann_dir='annotations/training', - pipeline=train_pipeline)), - val=dict( +train_dataloader = dict( + batch_size=4, + num_workers=4, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + type='RepeatDataset', + times=40000, + dataset=dict( type=dataset_type, data_root=data_root, - img_dir='images/validation', - ann_dir='annotations/validation', - pipeline=test_pipeline), - test=dict( + data_prefix=dict( + img_path='images/training', seg_map_path='annotations/training'), + pipeline=train_pipeline)) +val_dataloader = dict( + batch_size=4, + num_workers=4, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=False), + dataset=dict( type=dataset_type, data_root=data_root, - img_dir='images/validation', - ann_dir='annotations/validation', + data_prefix=dict( + img_path='images/validation', + seg_map_path='annotations/validation'), pipeline=test_pipeline)) +test_dataloader = val_dataloader diff --git a/configs/_base_/datasets/hrf.py b/configs/_base_/datasets/hrf.py index 242d790eb..fd1b31156 100644 --- a/configs/_base_/datasets/hrf.py +++ b/configs/_base_/datasets/hrf.py @@ -8,52 +8,41 @@ crop_size = (256, 256) train_pipeline = [ dict(type='LoadImageFromFile'), dict(type='LoadAnnotations'), - dict(type='Resize', img_scale=img_scale, ratio_range=(0.5, 2.0)), + dict(type='RandomResize', scale=img_scale, ratio_range=(0.5, 2.0)), dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), dict(type='RandomFlip', prob=0.5), dict(type='PhotoMetricDistortion'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_semantic_seg']) + dict(type='Pad', size=crop_size), + dict(type='PackSegInputs') ] test_pipeline = [ dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=img_scale, - # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75, 2.0], - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']) - ]) + dict(type='Resize', scale=img_scale, keep_ratio=True), + dict(type='PackSegInputs') ] - -data = dict( - samples_per_gpu=4, - workers_per_gpu=4, - train=dict( - type='RepeatDataset', - times=40000, - dataset=dict( - type=dataset_type, - data_root=data_root, - img_dir='images/training', - ann_dir='annotations/training', - pipeline=train_pipeline)), - val=dict( +train_dataloader = dict( + batch_size=4, + num_workers=4, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + type='RepeatDataset', + times=40000, + dataset=dict( type=dataset_type, data_root=data_root, - img_dir='images/validation', - ann_dir='annotations/validation', - pipeline=test_pipeline), - test=dict( + data_prefix=dict( + img_path='images/training', seg_map_path='annotations/training'), + pipeline=train_pipeline)) +val_dataloader = dict( + batch_size=4, + num_workers=4, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=False), + dataset=dict( type=dataset_type, data_root=data_root, - img_dir='images/validation', - ann_dir='annotations/validation', + data_prefix=dict( + img_path='images/validation', + seg_map_path='annotations/validation'), pipeline=test_pipeline)) +test_dataloader = val_dataloader diff --git a/configs/_base_/datasets/isaid.py b/configs/_base_/datasets/isaid.py index 8e4c26abb..4c7371591 100644 --- a/configs/_base_/datasets/isaid.py +++ b/configs/_base_/datasets/isaid.py @@ -15,48 +15,37 @@ crop_size = (896, 896) train_pipeline = [ dict(type='LoadImageFromFile'), dict(type='LoadAnnotations'), - dict(type='Resize', img_scale=(896, 896), ratio_range=(0.5, 2.0)), + dict(type='RandomResize', scale=(896, 896), ratio_range=(0.5, 2.0)), dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), dict(type='RandomFlip', prob=0.5), dict(type='PhotoMetricDistortion'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_semantic_seg']), + dict(type='Pad', size=crop_size), + dict(type='PackSegInputs') ] test_pipeline = [ dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(896, 896), - # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) + dict(type='Resize', scale=(896, 896), keep_ratio=True), + dict(type='PackSegInputs') ] -data = dict( - samples_per_gpu=4, - workers_per_gpu=4, - train=dict( +train_dataloader = dict( + batch_size=4, + num_workers=4, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( type=dataset_type, data_root=data_root, - img_dir='img_dir/train', - ann_dir='ann_dir/train', - pipeline=train_pipeline), - val=dict( + data_prefix=dict( + img_path='img_dir/train', seg_map_path='ann_dir/train'), + pipeline=train_pipeline)) +val_dataloader = dict( + batch_size=4, + num_workers=4, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=False), + dataset=dict( type=dataset_type, data_root=data_root, - img_dir='img_dir/val', - ann_dir='ann_dir/val', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - data_root=data_root, - img_dir='img_dir/val', - ann_dir='ann_dir/val', + data_prefix=dict(img_path='img_dir/val', seg_map_path='ann_dir/val'), pipeline=test_pipeline)) +test_dataloader = val_dataloader diff --git a/configs/_base_/datasets/loveda.py b/configs/_base_/datasets/loveda.py index e55335695..a31e0237a 100644 --- a/configs/_base_/datasets/loveda.py +++ b/configs/_base_/datasets/loveda.py @@ -7,48 +7,37 @@ crop_size = (512, 512) train_pipeline = [ dict(type='LoadImageFromFile'), dict(type='LoadAnnotations', reduce_zero_label=True), - dict(type='Resize', img_scale=(2048, 512), ratio_range=(0.5, 2.0)), + dict(type='RandomResize', scale=(2048, 512), ratio_range=(0.5, 2.0)), dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), dict(type='RandomFlip', prob=0.5), dict(type='PhotoMetricDistortion'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_semantic_seg']), + dict(type='Pad', size=crop_size), + dict(type='PackSegInputs') ] test_pipeline = [ dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(1024, 1024), - # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) + dict(type='Resize', scale=(1024, 1024), keep_ratio=True), + dict(type='PackSegInputs') ] -data = dict( - samples_per_gpu=4, - workers_per_gpu=4, - train=dict( +train_dataloader = dict( + batch_size=4, + num_workers=4, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( type=dataset_type, data_root=data_root, - img_dir='img_dir/train', - ann_dir='ann_dir/train', - pipeline=train_pipeline), - val=dict( + data_prefix=dict( + img_path='img_dir/train', seg_map_path='ann_dir/train'), + pipeline=train_pipeline)) +val_dataloader = dict( + batch_size=4, + num_workers=4, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=False), + dataset=dict( type=dataset_type, data_root=data_root, - img_dir='img_dir/val', - ann_dir='ann_dir/val', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - data_root=data_root, - img_dir='img_dir/val', - ann_dir='ann_dir/val', + data_prefix=dict(img_path='img_dir/val', seg_map_path='ann_dir/val'), pipeline=test_pipeline)) +test_dataloader = val_dataloader diff --git a/configs/_base_/datasets/pascal_context.py b/configs/_base_/datasets/pascal_context.py index ff65bad1b..24c2a9b3b 100644 --- a/configs/_base_/datasets/pascal_context.py +++ b/configs/_base_/datasets/pascal_context.py @@ -10,51 +10,40 @@ crop_size = (480, 480) train_pipeline = [ dict(type='LoadImageFromFile'), dict(type='LoadAnnotations'), - dict(type='Resize', img_scale=img_scale, ratio_range=(0.5, 2.0)), + dict(type='RandomResize', scale=img_scale, ratio_range=(0.5, 2.0)), dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), dict(type='RandomFlip', prob=0.5), dict(type='PhotoMetricDistortion'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_semantic_seg']), + dict(type='Pad', size=crop_size), + dict(type='PackSegInputs') ] test_pipeline = [ dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=img_scale, - # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) + dict(type='Resize', scale=img_scale, keep_ratio=True), + dict(type='PackSegInputs') ] -data = dict( - samples_per_gpu=4, - workers_per_gpu=4, - train=dict( +train_dataloader = dict( + batch_size=4, + num_workers=4, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( type=dataset_type, data_root=data_root, - img_dir='JPEGImages', - ann_dir='SegmentationClassContext', - split='ImageSets/SegmentationContext/train.txt', - pipeline=train_pipeline), - val=dict( + data_prefix=dict( + img_path='JPEGImages', seg_map_path='SegmentationClassContext'), + ann_file='ImageSets/SegmentationContext/train.txt', + pipeline=train_pipeline)) +val_dataloader = dict( + batch_size=4, + num_workers=4, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=False), + dataset=dict( type=dataset_type, data_root=data_root, - img_dir='JPEGImages', - ann_dir='SegmentationClassContext', - split='ImageSets/SegmentationContext/val.txt', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - data_root=data_root, - img_dir='JPEGImages', - ann_dir='SegmentationClassContext', - split='ImageSets/SegmentationContext/val.txt', + data_prefix=dict( + img_path='JPEGImages', seg_map_path='SegmentationClassContext'), + ann_file='ImageSets/SegmentationContext/val.txt', pipeline=test_pipeline)) +test_dataloader = val_dataloader diff --git a/configs/_base_/datasets/pascal_context_59.py b/configs/_base_/datasets/pascal_context_59.py index 37585abab..0db910a5e 100644 --- a/configs/_base_/datasets/pascal_context_59.py +++ b/configs/_base_/datasets/pascal_context_59.py @@ -10,51 +10,40 @@ crop_size = (480, 480) train_pipeline = [ dict(type='LoadImageFromFile'), dict(type='LoadAnnotations', reduce_zero_label=True), - dict(type='Resize', img_scale=img_scale, ratio_range=(0.5, 2.0)), + dict(type='RandomResize', scale=img_scale, ratio_range=(0.5, 2.0)), dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), dict(type='RandomFlip', prob=0.5), dict(type='PhotoMetricDistortion'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_semantic_seg']), + dict(type='Pad', size=crop_size), + dict(type='PackSegInputs') ] test_pipeline = [ dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=img_scale, - # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) + dict(type='Resize', scale=img_scale, keep_ratio=True), + dict(type='PackSegInputs') ] -data = dict( - samples_per_gpu=4, - workers_per_gpu=4, - train=dict( +train_dataloader = dict( + batch_size=4, + num_workers=4, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( type=dataset_type, data_root=data_root, - img_dir='JPEGImages', - ann_dir='SegmentationClassContext', - split='ImageSets/SegmentationContext/train.txt', - pipeline=train_pipeline), - val=dict( + data_prefix=dict( + img_path='JPEGImages', seg_map_path='SegmentationClassContext'), + ann_file='ImageSets/SegmentationContext/train.txt', + pipeline=train_pipeline)) +val_dataloader = dict( + batch_size=4, + num_workers=4, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=False), + dataset=dict( type=dataset_type, data_root=data_root, - img_dir='JPEGImages', - ann_dir='SegmentationClassContext', - split='ImageSets/SegmentationContext/val.txt', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - data_root=data_root, - img_dir='JPEGImages', - ann_dir='SegmentationClassContext', - split='ImageSets/SegmentationContext/val.txt', + data_prefix=dict( + img_path='JPEGImages', seg_map_path='SegmentationClassContext'), + ann_file='ImageSets/SegmentationContext/val.txt', pipeline=test_pipeline)) +test_dataloader = val_dataloader diff --git a/configs/_base_/datasets/pascal_voc12.py b/configs/_base_/datasets/pascal_voc12.py index ba1d42d0c..d4a6e8e95 100644 --- a/configs/_base_/datasets/pascal_voc12.py +++ b/configs/_base_/datasets/pascal_voc12.py @@ -7,51 +7,40 @@ crop_size = (512, 512) train_pipeline = [ dict(type='LoadImageFromFile'), dict(type='LoadAnnotations'), - dict(type='Resize', img_scale=(2048, 512), ratio_range=(0.5, 2.0)), + dict(type='RandomResize', scale=(2048, 512), ratio_range=(0.5, 2.0)), dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), dict(type='RandomFlip', prob=0.5), dict(type='PhotoMetricDistortion'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_semantic_seg']), + dict(type='Pad', size=crop_size), + dict(type='PackSegInputs') ] test_pipeline = [ dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(2048, 512), - # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) + dict(type='Resize', scale=(2048, 512), keep_ratio=True), + dict(type='PackSegInputs') ] -data = dict( - samples_per_gpu=4, - workers_per_gpu=4, - train=dict( +train_dataloader = dict( + batch_size=4, + num_workers=4, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( type=dataset_type, data_root=data_root, - img_dir='JPEGImages', - ann_dir='SegmentationClass', - split='ImageSets/Segmentation/train.txt', - pipeline=train_pipeline), - val=dict( + data_prefix=dict( + img_path='JPEGImages', seg_map_path='SegmentationClass'), + ann_file='ImageSets/Segmentation/train.txt', + pipeline=train_pipeline)) +val_dataloader = dict( + batch_size=4, + num_workers=4, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=False), + dataset=dict( type=dataset_type, data_root=data_root, - img_dir='JPEGImages', - ann_dir='SegmentationClass', - split='ImageSets/Segmentation/val.txt', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - data_root=data_root, - img_dir='JPEGImages', - ann_dir='SegmentationClass', - split='ImageSets/Segmentation/val.txt', + data_prefix=dict( + img_path='JPEGImages', seg_map_path='SegmentationClass'), + ann_file='ImageSets/Segmentation/val.txt', pipeline=test_pipeline)) +test_dataloader = val_dataloader diff --git a/configs/_base_/datasets/pascal_voc12_aug.py b/configs/_base_/datasets/pascal_voc12_aug.py index 3f23b6717..24ebb74d3 100644 --- a/configs/_base_/datasets/pascal_voc12_aug.py +++ b/configs/_base_/datasets/pascal_voc12_aug.py @@ -1,9 +1,9 @@ _base_ = './pascal_voc12.py' # dataset settings -data = dict( - train=dict( +train_dataloader = dict( + dataset=dict( ann_dir=['SegmentationClass', 'SegmentationClassAug'], - split=[ + ann_file=[ 'ImageSets/Segmentation/train.txt', 'ImageSets/Segmentation/aug.txt' ])) diff --git a/configs/_base_/datasets/potsdam.py b/configs/_base_/datasets/potsdam.py index f74c4a56c..197e24957 100644 --- a/configs/_base_/datasets/potsdam.py +++ b/configs/_base_/datasets/potsdam.py @@ -7,48 +7,37 @@ crop_size = (512, 512) train_pipeline = [ dict(type='LoadImageFromFile'), dict(type='LoadAnnotations', reduce_zero_label=True), - dict(type='Resize', img_scale=(512, 512), ratio_range=(0.5, 2.0)), + dict(type='RandomResize', scale=(512, 512), ratio_range=(0.5, 2.0)), dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), dict(type='RandomFlip', prob=0.5), dict(type='PhotoMetricDistortion'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_semantic_seg']), + dict(type='Pad', size=crop_size), + dict(type='PackSegInputs') ] test_pipeline = [ dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(512, 512), - # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) + dict(type='Resize', scale=(512, 512), keep_ratio=True), + dict(type='PackSegInputs') ] -data = dict( - samples_per_gpu=4, - workers_per_gpu=4, - train=dict( +train_dataloader = dict( + batch_size=4, + num_workers=4, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( type=dataset_type, data_root=data_root, - img_dir='img_dir/train', - ann_dir='ann_dir/train', - pipeline=train_pipeline), - val=dict( + data_prefix=dict( + img_path='img_dir/train', seg_map_path='ann_dir/train'), + pipeline=train_pipeline)) +val_dataloader = dict( + batch_size=4, + num_workers=4, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=False), + dataset=dict( type=dataset_type, data_root=data_root, - img_dir='img_dir/val', - ann_dir='ann_dir/val', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - data_root=data_root, - img_dir='img_dir/val', - ann_dir='ann_dir/val', + data_prefix=dict(img_path='img_dir/val', seg_map_path='ann_dir/val'), pipeline=test_pipeline)) +test_dataloader = val_dataloader diff --git a/configs/_base_/datasets/stare.py b/configs/_base_/datasets/stare.py index 3f71b2548..53e3acc35 100644 --- a/configs/_base_/datasets/stare.py +++ b/configs/_base_/datasets/stare.py @@ -8,52 +8,41 @@ crop_size = (128, 128) train_pipeline = [ dict(type='LoadImageFromFile'), dict(type='LoadAnnotations'), - dict(type='Resize', img_scale=img_scale, ratio_range=(0.5, 2.0)), + dict(type='RandomResize', scale=img_scale, ratio_range=(0.5, 2.0)), dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), dict(type='RandomFlip', prob=0.5), dict(type='PhotoMetricDistortion'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_semantic_seg']) + dict(type='Pad', size=crop_size), + dict(type='PackSegInputs') ] test_pipeline = [ dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=img_scale, - # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75, 2.0], - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']) - ]) + dict(type='Resize', scale=img_scale, keep_ratio=True), + dict(type='PackSegInputs') ] - -data = dict( - samples_per_gpu=4, - workers_per_gpu=4, - train=dict( - type='RepeatDataset', - times=40000, - dataset=dict( - type=dataset_type, - data_root=data_root, - img_dir='images/training', - ann_dir='annotations/training', - pipeline=train_pipeline)), - val=dict( +train_dataloader = dict( + batch_size=4, + num_workers=4, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + type='RepeatDataset', + times=40000, + dataset=dict( type=dataset_type, data_root=data_root, - img_dir='images/validation', - ann_dir='annotations/validation', - pipeline=test_pipeline), - test=dict( + data_prefix=dict( + img_path='images/training', seg_map_path='annotations/training'), + pipeline=train_pipeline)) +val_dataloader = dict( + batch_size=4, + num_workers=4, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=False), + dataset=dict( type=dataset_type, data_root=data_root, - img_dir='images/validation', - ann_dir='annotations/validation', + data_prefix=dict( + img_path='images/validation', + seg_map_path='annotations/validation'), pipeline=test_pipeline)) +test_dataloader = val_dataloader diff --git a/configs/_base_/datasets/vaihingen.py b/configs/_base_/datasets/vaihingen.py index c0df282c4..3e028d889 100644 --- a/configs/_base_/datasets/vaihingen.py +++ b/configs/_base_/datasets/vaihingen.py @@ -7,48 +7,37 @@ crop_size = (512, 512) train_pipeline = [ dict(type='LoadImageFromFile'), dict(type='LoadAnnotations', reduce_zero_label=True), - dict(type='Resize', img_scale=(512, 512), ratio_range=(0.5, 2.0)), + dict(type='RandomResize', scale=(512, 512), ratio_range=(0.5, 2.0)), dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), dict(type='RandomFlip', prob=0.5), dict(type='PhotoMetricDistortion'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_semantic_seg']), + dict(type='Pad', size=crop_size), + dict(type='PackSegInputs') ] test_pipeline = [ dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(512, 512), - # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) + dict(type='Resize', scale=(512, 512), keep_ratio=True), + dict(type='PackSegInputs') ] -data = dict( - samples_per_gpu=4, - workers_per_gpu=4, - train=dict( +train_dataloader = dict( + batch_size=4, + num_workers=4, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( type=dataset_type, data_root=data_root, - img_dir='img_dir/train', - ann_dir='ann_dir/train', - pipeline=train_pipeline), - val=dict( + data_prefix=dict( + img_path='img_dir/train', seg_map_path='ann_dir/train'), + pipeline=train_pipeline)) +val_dataloader = dict( + batch_size=4, + num_workers=4, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=False), + dataset=dict( type=dataset_type, data_root=data_root, - img_dir='img_dir/val', - ann_dir='ann_dir/val', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - data_root=data_root, - img_dir='img_dir/val', - ann_dir='ann_dir/val', + data_prefix=dict(img_path='img_dir/val', seg_map_path='ann_dir/val'), pipeline=test_pipeline)) +test_dataloader = val_dataloader diff --git a/configs/beit/upernet_beit-base_640x640_160k_ade20k_ms.py b/configs/beit/upernet_beit-base_640x640_160k_ade20k_ms.py index f764c92c1..7383af589 100644 --- a/configs/beit/upernet_beit-base_640x640_160k_ade20k_ms.py +++ b/configs/beit/upernet_beit-base_640x640_160k_ade20k_ms.py @@ -5,20 +5,12 @@ img_norm_cfg = dict( test_pipeline = [ dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(2560, 640), - img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], - flip=True, - transforms=[ - dict(type='Resize', keep_ratio=True, min_size=640), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) + # TODO: Refactor 'MultiScaleFlipAug' which supports + # `min_size` feature in `Resize` class + # img_ratios is [0.5, 0.75, 1.0, 1.25, 1.5, 1.75] + # original image scale is (2560, 640) + dict(type='Resize', scale=(2560, 640), keep_ratio=True), + dict(type='PackSegInputs'), ] -data = dict( - val=dict(pipeline=test_pipeline), - test=dict(pipeline=test_pipeline), - samples_per_gpu=2) +val_dataloader = dict(batch_size=2, dataset=dict(pipeline=test_pipeline)) +test_dataloader = val_dataloader diff --git a/configs/beit/upernet_beit-base_8x2_640x640_160k_ade20k.py b/configs/beit/upernet_beit-base_8x2_640x640_160k_ade20k.py index b36adc3c0..08ca8ab78 100644 --- a/configs/beit/upernet_beit-base_8x2_640x640_160k_ade20k.py +++ b/configs/beit/upernet_beit-base_8x2_640x640_160k_ade20k.py @@ -27,4 +27,6 @@ lr_config = dict( by_epoch=False) # By default, models are trained on 8 GPUs with 2 images per GPU -data = dict(samples_per_gpu=2) +train_dataloader = dict(batch_size=2) +val_dataloader = dict(batch_size=2) +test_dataloader = val_dataloader diff --git a/configs/beit/upernet_beit-large_fp16_640x640_160k_ade20k_ms.py b/configs/beit/upernet_beit-large_fp16_640x640_160k_ade20k_ms.py index fd4d9477d..9a2e95909 100644 --- a/configs/beit/upernet_beit-large_fp16_640x640_160k_ade20k_ms.py +++ b/configs/beit/upernet_beit-large_fp16_640x640_160k_ade20k_ms.py @@ -5,18 +5,12 @@ img_norm_cfg = dict( test_pipeline = [ dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(2560, 640), - img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], - flip=True, - transforms=[ - dict(type='Resize', keep_ratio=True, min_size=640), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) + # TODO: Refactor 'MultiScaleFlipAug' which supports + # `min_size` feature in `Resize` class + # img_ratios is [0.5, 0.75, 1.0, 1.25, 1.5, 1.75] + # original image scale is (2560, 640) + dict(type='Resize', scale=(2560, 640), keep_ratio=True), + dict(type='PackSegInputs'), ] -data = dict( - val=dict(pipeline=test_pipeline), test=dict(pipeline=test_pipeline)) +val_dataloader = dict(dataset=dict(pipeline=test_pipeline)) +test_dataloader = val_dataloader diff --git a/configs/beit/upernet_beit-large_fp16_8x1_640x640_160k_ade20k.py b/configs/beit/upernet_beit-large_fp16_8x1_640x640_160k_ade20k.py index e6247b735..d41a74614 100644 --- a/configs/beit/upernet_beit-large_fp16_8x1_640x640_160k_ade20k.py +++ b/configs/beit/upernet_beit-large_fp16_8x1_640x640_160k_ade20k.py @@ -40,7 +40,9 @@ lr_config = dict( min_lr=0.0, by_epoch=False) -data = dict(samples_per_gpu=1) +train_dataloader = dict(batch_size=1) +val_dataloader = dict(batch_size=1) +test_dataloader = val_dataloader optimizer_config = dict( type='GradientCumulativeFp16OptimizerHook', cumulative_iters=2) diff --git a/configs/bisenetv1/bisenetv1_r18-d32_4x4_1024x1024_160k_cityscapes.py b/configs/bisenetv1/bisenetv1_r18-d32_4x4_1024x1024_160k_cityscapes.py index f4019e930..198be7bd1 100644 --- a/configs/bisenetv1/bisenetv1_r18-d32_4x4_1024x1024_160k_cityscapes.py +++ b/configs/bisenetv1/bisenetv1_r18-d32_4x4_1024x1024_160k_cityscapes.py @@ -5,7 +5,6 @@ _base_ = [ ] lr_config = dict(warmup='linear', warmup_iters=1000) optimizer = dict(lr=0.025) -data = dict( - samples_per_gpu=4, - workers_per_gpu=4, -) +train_dataloader = dict(batch_size=4, num_workers=4) +val_dataloader = dict(batch_size=4, num_workers=4) +test_dataloader = val_dataloader diff --git a/configs/bisenetv1/bisenetv1_r18-d32_in1k-pre_4x4_1024x1024_160k_cityscapes.py b/configs/bisenetv1/bisenetv1_r18-d32_in1k-pre_4x4_1024x1024_160k_cityscapes.py index ef061a16b..0e694afa9 100644 --- a/configs/bisenetv1/bisenetv1_r18-d32_in1k-pre_4x4_1024x1024_160k_cityscapes.py +++ b/configs/bisenetv1/bisenetv1_r18-d32_in1k-pre_4x4_1024x1024_160k_cityscapes.py @@ -10,7 +10,6 @@ model = dict( type='Pretrained', checkpoint='open-mmlab://resnet18_v1c')))) lr_config = dict(warmup='linear', warmup_iters=1000) optimizer = dict(lr=0.025) -data = dict( - samples_per_gpu=4, - workers_per_gpu=4, -) +train_dataloader = dict(batch_size=4, num_workers=4) +val_dataloader = dict(batch_size=4, num_workers=4) +test_dataloader = val_dataloader diff --git a/configs/bisenetv1/bisenetv1_r18-d32_in1k-pre_4x8_1024x1024_160k_cityscapes.py b/configs/bisenetv1/bisenetv1_r18-d32_in1k-pre_4x8_1024x1024_160k_cityscapes.py index f4b9f6d2c..9b517405e 100644 --- a/configs/bisenetv1/bisenetv1_r18-d32_in1k-pre_4x8_1024x1024_160k_cityscapes.py +++ b/configs/bisenetv1/bisenetv1_r18-d32_in1k-pre_4x8_1024x1024_160k_cityscapes.py @@ -1,5 +1,4 @@ _base_ = './bisenetv1_r18-d32_in1k-pre_4x4_1024x1024_160k_cityscapes.py' -data = dict( - samples_per_gpu=8, - workers_per_gpu=4, -) +train_dataloader = dict(batch_size=8, num_workers=4) +val_dataloader = dict(batch_size=8, num_workers=4) +test_dataloader = val_dataloader diff --git a/configs/bisenetv1/bisenetv1_r50-d32_4x4_1024x1024_160k_cityscapes.py b/configs/bisenetv1/bisenetv1_r50-d32_4x4_1024x1024_160k_cityscapes.py index 7cadd503c..4a979f26a 100644 --- a/configs/bisenetv1/bisenetv1_r50-d32_4x4_1024x1024_160k_cityscapes.py +++ b/configs/bisenetv1/bisenetv1_r50-d32_4x4_1024x1024_160k_cityscapes.py @@ -36,7 +36,6 @@ model = dict( ]) lr_config = dict(warmup='linear', warmup_iters=1000) optimizer = dict(lr=0.05) -data = dict( - samples_per_gpu=4, - workers_per_gpu=4, -) +train_dataloader = dict(batch_size=4, num_workers=4) +val_dataloader = dict(batch_size=4, num_workers=4) +test_dataloader = val_dataloader diff --git a/configs/bisenetv2/bisenetv2_fcn_4x4_1024x1024_160k_cityscapes.py b/configs/bisenetv2/bisenetv2_fcn_4x4_1024x1024_160k_cityscapes.py index 1248bd87a..0a502a99b 100644 --- a/configs/bisenetv2/bisenetv2_fcn_4x4_1024x1024_160k_cityscapes.py +++ b/configs/bisenetv2/bisenetv2_fcn_4x4_1024x1024_160k_cityscapes.py @@ -5,7 +5,6 @@ _base_ = [ ] lr_config = dict(warmup='linear', warmup_iters=1000) optimizer = dict(lr=0.05) -data = dict( - samples_per_gpu=4, - workers_per_gpu=4, -) +train_dataloader = dict(batch_size=4, num_workers=4) +val_dataloader = dict(batch_size=4, num_workers=4) +test_dataloader = val_dataloader diff --git a/configs/bisenetv2/bisenetv2_fcn_4x8_1024x1024_160k_cityscapes.py b/configs/bisenetv2/bisenetv2_fcn_4x8_1024x1024_160k_cityscapes.py index 5e93bea8b..287146bc8 100644 --- a/configs/bisenetv2/bisenetv2_fcn_4x8_1024x1024_160k_cityscapes.py +++ b/configs/bisenetv2/bisenetv2_fcn_4x8_1024x1024_160k_cityscapes.py @@ -5,7 +5,6 @@ _base_ = [ ] lr_config = dict(warmup='linear', warmup_iters=1000) optimizer = dict(lr=0.05) -data = dict( - samples_per_gpu=8, - workers_per_gpu=4, -) +train_dataloader = dict(batch_size=8, num_workers=4) +val_dataloader = dict(batch_size=8, num_workers=4) +test_dataloader = val_dataloader diff --git a/configs/bisenetv2/bisenetv2_fcn_ohem_4x4_1024x1024_160k_cityscapes.py b/configs/bisenetv2/bisenetv2_fcn_ohem_4x4_1024x1024_160k_cityscapes.py index f14e52813..75fc49897 100644 --- a/configs/bisenetv2/bisenetv2_fcn_ohem_4x4_1024x1024_160k_cityscapes.py +++ b/configs/bisenetv2/bisenetv2_fcn_ohem_4x4_1024x1024_160k_cityscapes.py @@ -6,7 +6,6 @@ _base_ = [ sampler = dict(type='OHEMPixelSampler', thresh=0.7, min_kept=10000) lr_config = dict(warmup='linear', warmup_iters=1000) optimizer = dict(lr=0.05) -data = dict( - samples_per_gpu=4, - workers_per_gpu=4, -) +train_dataloader = dict(batch_size=4, num_workers=4) +val_dataloader = dict(batch_size=4, num_workers=4) +test_dataloader = val_dataloader diff --git a/configs/cgnet/cgnet_512x1024_60k_cityscapes.py b/configs/cgnet/cgnet_512x1024_60k_cityscapes.py index 6efc4cd5c..83088b5dc 100644 --- a/configs/cgnet/cgnet_512x1024_60k_cityscapes.py +++ b/configs/cgnet/cgnet_512x1024_60k_cityscapes.py @@ -19,48 +19,30 @@ crop_size = (512, 1024) train_pipeline = [ dict(type='LoadImageFromFile'), dict(type='LoadAnnotations'), - dict(type='Resize', img_scale=(2048, 1024), ratio_range=(0.5, 2.0)), + dict(type='RandomResize', scale=(2048, 1024), ratio_range=(0.5, 2.0)), dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), - dict(type='RandomFlip', flip_ratio=0.5), + dict(type='RandomFlip', prob=0.5), dict(type='PhotoMetricDistortion'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_semantic_seg']), + dict(type='Pad', size=crop_size), + dict(type='PackSegInputs') ] test_pipeline = [ dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(2048, 1024), - # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) + dict(type='Resize', scale=(2048, 1024), keep_ratio=True), + dict(type='PackSegInputs') ] -data = dict( - samples_per_gpu=8, - workers_per_gpu=4, - train=dict( - type=dataset_type, - data_root=data_root, - img_dir='leftImg8bit/train', - ann_dir='gtFine/train', - pipeline=train_pipeline), - val=dict( - type=dataset_type, - data_root=data_root, - img_dir='leftImg8bit/val', - ann_dir='gtFine/val', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - data_root=data_root, - img_dir='leftImg8bit/val', - ann_dir='gtFine/val', +train_dataloader = dict( + batch_size=8, + num_workers=4, + dataset=dict( + data_prefix=dict( + img_path='leftImg8bit/train', seg_map_path='gtFine/train'), + pipeline=train_pipeline)) +val_dataloader = dict( + batch_size=8, + num_workers=4, + dataset=dict( + data_prefix=dict( + img_path='leftImg8bit/val', seg_map_path='gtFine/val'), pipeline=test_pipeline)) +test_dataloader = val_dataloader diff --git a/configs/cgnet/cgnet_680x680_60k_cityscapes.py b/configs/cgnet/cgnet_680x680_60k_cityscapes.py index 7d4ebe136..ffc96bdfa 100644 --- a/configs/cgnet/cgnet_680x680_60k_cityscapes.py +++ b/configs/cgnet/cgnet_680x680_60k_cityscapes.py @@ -19,32 +19,19 @@ crop_size = (680, 680) train_pipeline = [ dict(type='LoadImageFromFile'), dict(type='LoadAnnotations'), - dict(type='Resize', img_scale=(2048, 1024), ratio_range=(0.5, 2.0)), + dict(type='RandomResize', scale=(2048, 1024), ratio_range=(0.5, 2.0)), dict(type='RandomCrop', crop_size=crop_size), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_semantic_seg']), + dict(type='RandomFlip', prob=0.5), + dict(type='Pad', size=crop_size), + dict(type='PackSegInputs') ] test_pipeline = [ dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(2048, 1024), - # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) + dict(type='Resize', scale=(2048, 1024), keep_ratio=True), + dict(type='PackSegInputs') ] -data = dict( - samples_per_gpu=8, - workers_per_gpu=4, - train=dict(pipeline=train_pipeline), - val=dict(pipeline=test_pipeline), - test=dict(pipeline=test_pipeline)) +train_dataloader = dict( + batch_size=8, num_workers=4, dataset=dict(pipeline=train_pipeline)) +val_dataloader = dict( + batch_size=8, num_workers=4, dataset=dict(pipeline=test_pipeline)) +test_dataloader = val_dataloader diff --git a/configs/convnext/upernet_convnext_base_fp16_512x512_160k_ade20k.py b/configs/convnext/upernet_convnext_base_fp16_512x512_160k_ade20k.py index 7bf35b2f1..26e669648 100644 --- a/configs/convnext/upernet_convnext_base_fp16_512x512_160k_ade20k.py +++ b/configs/convnext/upernet_convnext_base_fp16_512x512_160k_ade20k.py @@ -33,7 +33,9 @@ lr_config = dict( by_epoch=False) # By default, models are trained on 8 GPUs with 2 images per GPU -data = dict(samples_per_gpu=2) +train_dataloader = dict(batch_size=2) +val_dataloader = dict(batch_size=2) +test_dataloader = val_dataloader # fp16 settings optimizer_config = dict(type='Fp16OptimizerHook', loss_scale='dynamic') # fp16 placeholder diff --git a/configs/convnext/upernet_convnext_base_fp16_640x640_160k_ade20k.py b/configs/convnext/upernet_convnext_base_fp16_640x640_160k_ade20k.py index 8d2c0c26d..760cde32d 100644 --- a/configs/convnext/upernet_convnext_base_fp16_640x640_160k_ade20k.py +++ b/configs/convnext/upernet_convnext_base_fp16_640x640_160k_ade20k.py @@ -48,7 +48,9 @@ lr_config = dict( by_epoch=False) # By default, models are trained on 8 GPUs with 2 images per GPU -data = dict(samples_per_gpu=2) +train_dataloader = dict(batch_size=2) +val_dataloader = dict(batch_size=2) +test_dataloader = val_dataloader # fp16 settings optimizer_config = dict(type='Fp16OptimizerHook', loss_scale='dynamic') # fp16 placeholder diff --git a/configs/convnext/upernet_convnext_large_fp16_640x640_160k_ade20k.py b/configs/convnext/upernet_convnext_large_fp16_640x640_160k_ade20k.py index 7527ed51f..6bef56185 100644 --- a/configs/convnext/upernet_convnext_large_fp16_640x640_160k_ade20k.py +++ b/configs/convnext/upernet_convnext_large_fp16_640x640_160k_ade20k.py @@ -48,7 +48,9 @@ lr_config = dict( by_epoch=False) # By default, models are trained on 8 GPUs with 2 images per GPU -data = dict(samples_per_gpu=2) +train_dataloader = dict(batch_size=2) +val_dataloader = dict(batch_size=2) +test_dataloader = val_dataloader # fp16 settings optimizer_config = dict(type='Fp16OptimizerHook', loss_scale='dynamic') # fp16 placeholder diff --git a/configs/convnext/upernet_convnext_small_fp16_512x512_160k_ade20k.py b/configs/convnext/upernet_convnext_small_fp16_512x512_160k_ade20k.py index 2e95f3af9..f124888c3 100644 --- a/configs/convnext/upernet_convnext_small_fp16_512x512_160k_ade20k.py +++ b/configs/convnext/upernet_convnext_small_fp16_512x512_160k_ade20k.py @@ -47,7 +47,9 @@ lr_config = dict( by_epoch=False) # By default, models are trained on 8 GPUs with 2 images per GPU -data = dict(samples_per_gpu=2) +train_dataloader = dict(batch_size=2) +val_dataloader = dict(batch_size=2) +test_dataloader = val_dataloader # fp16 settings optimizer_config = dict(type='Fp16OptimizerHook', loss_scale='dynamic') # fp16 placeholder diff --git a/configs/convnext/upernet_convnext_tiny_fp16_512x512_160k_ade20k.py b/configs/convnext/upernet_convnext_tiny_fp16_512x512_160k_ade20k.py index 35c72a8d9..75e24842e 100644 --- a/configs/convnext/upernet_convnext_tiny_fp16_512x512_160k_ade20k.py +++ b/configs/convnext/upernet_convnext_tiny_fp16_512x512_160k_ade20k.py @@ -47,7 +47,9 @@ lr_config = dict( by_epoch=False) # By default, models are trained on 8 GPUs with 2 images per GPU -data = dict(samples_per_gpu=2) +train_dataloader = dict(batch_size=2) +val_dataloader = dict(batch_size=2) +test_dataloader = val_dataloader # fp16 settings optimizer_config = dict(type='Fp16OptimizerHook', loss_scale='dynamic') # fp16 placeholder diff --git a/configs/convnext/upernet_convnext_xlarge_fp16_640x640_160k_ade20k.py b/configs/convnext/upernet_convnext_xlarge_fp16_640x640_160k_ade20k.py index 0e2f38ebb..31899793c 100644 --- a/configs/convnext/upernet_convnext_xlarge_fp16_640x640_160k_ade20k.py +++ b/configs/convnext/upernet_convnext_xlarge_fp16_640x640_160k_ade20k.py @@ -48,7 +48,9 @@ lr_config = dict( by_epoch=False) # By default, models are trained on 8 GPUs with 2 images per GPU -data = dict(samples_per_gpu=2) +train_dataloader = dict(batch_size=2) +val_dataloader = dict(batch_size=2) +test_dataloader = val_dataloader # fp16 settings optimizer_config = dict(type='Fp16OptimizerHook', loss_scale='dynamic') # fp16 placeholder diff --git a/configs/dpt/dpt_vit-b16_512x512_160k_ade20k.py b/configs/dpt/dpt_vit-b16_512x512_160k_ade20k.py index c751a6823..907bfcbbb 100644 --- a/configs/dpt/dpt_vit-b16_512x512_160k_ade20k.py +++ b/configs/dpt/dpt_vit-b16_512x512_160k_ade20k.py @@ -29,4 +29,6 @@ lr_config = dict( by_epoch=False) # By default, models are trained on 8 GPUs with 2 images per GPU -data = dict(samples_per_gpu=2, workers_per_gpu=2) +train_dataloader = dict(batch_size=2, num_workers=2) +val_dataloader = dict(batch_size=2, num_workers=2) +test_dataloader = val_dataloader diff --git a/configs/erfnet/erfnet_fcn_4x4_512x1024_160k_cityscapes.py b/configs/erfnet/erfnet_fcn_4x4_512x1024_160k_cityscapes.py index 8cb8e5149..8cc72d4ea 100644 --- a/configs/erfnet/erfnet_fcn_4x4_512x1024_160k_cityscapes.py +++ b/configs/erfnet/erfnet_fcn_4x4_512x1024_160k_cityscapes.py @@ -2,7 +2,6 @@ _base_ = [ '../_base_/models/erfnet_fcn.py', '../_base_/datasets/cityscapes.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' ] -data = dict( - samples_per_gpu=4, - workers_per_gpu=4, -) +train_dataloader = dict(batch_size=4, num_workers=4) +val_dataloader = dict(batch_size=4, num_workers=4) +test_dataloader = val_dataloader diff --git a/configs/fastfcn/fastfcn_r50-d32_jpu_aspp_4x4_512x1024_80k_cityscapes.py b/configs/fastfcn/fastfcn_r50-d32_jpu_aspp_4x4_512x1024_80k_cityscapes.py index 87fc274dc..6f0452e63 100644 --- a/configs/fastfcn/fastfcn_r50-d32_jpu_aspp_4x4_512x1024_80k_cityscapes.py +++ b/configs/fastfcn/fastfcn_r50-d32_jpu_aspp_4x4_512x1024_80k_cityscapes.py @@ -1,6 +1,5 @@ # model settings _base_ = './fastfcn_r50-d32_jpu_aspp_512x1024_80k_cityscapes.py' -data = dict( - samples_per_gpu=4, - workers_per_gpu=4, -) +train_dataloader = dict(batch_size=4, num_workers=4) +val_dataloader = dict(batch_size=4, num_workers=4) +test_dataloader = val_dataloader diff --git a/configs/fastfcn/fastfcn_r50-d32_jpu_enc_4x4_512x1024_80k_cityscapes.py b/configs/fastfcn/fastfcn_r50-d32_jpu_enc_4x4_512x1024_80k_cityscapes.py index 59d294b5f..e8ff93957 100644 --- a/configs/fastfcn/fastfcn_r50-d32_jpu_enc_4x4_512x1024_80k_cityscapes.py +++ b/configs/fastfcn/fastfcn_r50-d32_jpu_enc_4x4_512x1024_80k_cityscapes.py @@ -1,6 +1,5 @@ # model settings _base_ = './fastfcn_r50-d32_jpu_enc_512x1024_80k_cityscapes.py' -data = dict( - samples_per_gpu=4, - workers_per_gpu=4, -) +train_dataloader = dict(batch_size=4, num_workers=4) +val_dataloader = dict(batch_size=4, num_workers=4) +test_dataloader = val_dataloader diff --git a/configs/fastfcn/fastfcn_r50-d32_jpu_psp_4x4_512x1024_80k_cityscapes.py b/configs/fastfcn/fastfcn_r50-d32_jpu_psp_4x4_512x1024_80k_cityscapes.py index 5fe5ca16b..78be5f451 100644 --- a/configs/fastfcn/fastfcn_r50-d32_jpu_psp_4x4_512x1024_80k_cityscapes.py +++ b/configs/fastfcn/fastfcn_r50-d32_jpu_psp_4x4_512x1024_80k_cityscapes.py @@ -3,7 +3,6 @@ _base_ = [ '../_base_/datasets/cityscapes.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' ] -data = dict( - samples_per_gpu=4, - workers_per_gpu=4, -) +train_dataloader = dict(batch_size=4, num_workers=4) +val_dataloader = dict(batch_size=4, num_workers=4) +test_dataloader = val_dataloader diff --git a/configs/fastscnn/fast_scnn_lr0.12_8x4_160k_cityscapes.py b/configs/fastscnn/fast_scnn_lr0.12_8x4_160k_cityscapes.py index 469812503..19758f36f 100644 --- a/configs/fastscnn/fast_scnn_lr0.12_8x4_160k_cityscapes.py +++ b/configs/fastscnn/fast_scnn_lr0.12_8x4_160k_cityscapes.py @@ -4,7 +4,9 @@ _base_ = [ ] # Re-config the data sampler. -data = dict(samples_per_gpu=4, workers_per_gpu=4) +train_dataloader = dict(batch_size=4, num_workers=4) +val_dataloader = dict(batch_size=4, num_workers=4) +test_dataloader = val_dataloader # Re-config the optimizer. optimizer = dict(type='SGD', lr=0.12, momentum=0.9, weight_decay=4e-5) diff --git a/configs/knet/knet_s3_deeplabv3_r50-d8_8x2_512x512_adamw_80k_ade20k.py b/configs/knet/knet_s3_deeplabv3_r50-d8_8x2_512x512_adamw_80k_ade20k.py index 3edb05c87..d906b991b 100644 --- a/configs/knet/knet_s3_deeplabv3_r50-d8_8x2_512x512_adamw_80k_ade20k.py +++ b/configs/knet/knet_s3_deeplabv3_r50-d8_8x2_512x512_adamw_80k_ade20k.py @@ -90,4 +90,6 @@ lr_config = dict( step=[60000, 72000], by_epoch=False) # In K-Net implementation we use batch size 2 per GPU as default -data = dict(samples_per_gpu=2, workers_per_gpu=2) +train_dataloader = dict(batch_size=2, num_workers=2) +val_dataloader = dict(batch_size=2, num_workers=2) +test_dataloader = val_dataloader diff --git a/configs/knet/knet_s3_fcn_r50-d8_8x2_512x512_adamw_80k_ade20k.py b/configs/knet/knet_s3_fcn_r50-d8_8x2_512x512_adamw_80k_ade20k.py index 29a088f72..1f83f095c 100644 --- a/configs/knet/knet_s3_fcn_r50-d8_8x2_512x512_adamw_80k_ade20k.py +++ b/configs/knet/knet_s3_fcn_r50-d8_8x2_512x512_adamw_80k_ade20k.py @@ -90,4 +90,6 @@ lr_config = dict( step=[60000, 72000], by_epoch=False) # In K-Net implementation we use batch size 2 per GPU as default -data = dict(samples_per_gpu=2, workers_per_gpu=2) +train_dataloader = dict(batch_size=2, num_workers=2) +val_dataloader = dict(batch_size=2, num_workers=2) +test_dataloader = val_dataloader diff --git a/configs/knet/knet_s3_pspnet_r50-d8_8x2_512x512_adamw_80k_ade20k.py b/configs/knet/knet_s3_pspnet_r50-d8_8x2_512x512_adamw_80k_ade20k.py index d77a3b442..a4e7bca10 100644 --- a/configs/knet/knet_s3_pspnet_r50-d8_8x2_512x512_adamw_80k_ade20k.py +++ b/configs/knet/knet_s3_pspnet_r50-d8_8x2_512x512_adamw_80k_ade20k.py @@ -89,4 +89,6 @@ lr_config = dict( step=[60000, 72000], by_epoch=False) # In K-Net implementation we use batch size 2 per GPU as default -data = dict(samples_per_gpu=2, workers_per_gpu=2) +train_dataloader = dict(batch_size=2, num_workers=2) +val_dataloader = dict(batch_size=2, num_workers=2) +test_dataloader = val_dataloader diff --git a/configs/knet/knet_s3_upernet_r50-d8_8x2_512x512_adamw_80k_ade20k.py b/configs/knet/knet_s3_upernet_r50-d8_8x2_512x512_adamw_80k_ade20k.py index 0071cea75..90a06aa1a 100644 --- a/configs/knet/knet_s3_upernet_r50-d8_8x2_512x512_adamw_80k_ade20k.py +++ b/configs/knet/knet_s3_upernet_r50-d8_8x2_512x512_adamw_80k_ade20k.py @@ -90,4 +90,6 @@ lr_config = dict( step=[60000, 72000], by_epoch=False) # In K-Net implementation we use batch size 2 per GPU as default -data = dict(samples_per_gpu=2, workers_per_gpu=2) +train_dataloader = dict(batch_size=2, num_workers=2) +val_dataloader = dict(batch_size=2, num_workers=2) +test_dataloader = val_dataloader diff --git a/configs/knet/knet_s3_upernet_swin-l_8x2_512x512_adamw_80k_ade20k.py b/configs/knet/knet_s3_upernet_swin-l_8x2_512x512_adamw_80k_ade20k.py index b9d1a0952..e525fa5f3 100644 --- a/configs/knet/knet_s3_upernet_swin-l_8x2_512x512_adamw_80k_ade20k.py +++ b/configs/knet/knet_s3_upernet_swin-l_8x2_512x512_adamw_80k_ade20k.py @@ -16,4 +16,6 @@ model = dict( kernel_generate_head=dict(in_channels=[192, 384, 768, 1536])), auxiliary_head=dict(in_channels=768)) # In K-Net implementation we use batch size 2 per GPU as default -data = dict(samples_per_gpu=2, workers_per_gpu=2) +train_dataloader = dict(batch_size=2, num_workers=2) +val_dataloader = dict(batch_size=2, num_workers=2) +test_dataloader = val_dataloader diff --git a/configs/knet/knet_s3_upernet_swin-l_8x2_640x640_adamw_80k_ade20k.py b/configs/knet/knet_s3_upernet_swin-l_8x2_640x640_adamw_80k_ade20k.py index fc6e9fe39..b4c982fac 100644 --- a/configs/knet/knet_s3_upernet_swin-l_8x2_640x640_adamw_80k_ade20k.py +++ b/configs/knet/knet_s3_upernet_swin-l_8x2_640x640_adamw_80k_ade20k.py @@ -22,33 +22,22 @@ crop_size = (640, 640) train_pipeline = [ dict(type='LoadImageFromFile'), dict(type='LoadAnnotations', reduce_zero_label=True), - dict(type='Resize', img_scale=(2048, 640), ratio_range=(0.5, 2.0)), + dict(type='RandomResize', scale=(2048, 640), ratio_range=(0.5, 2.0)), dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), dict(type='RandomFlip', prob=0.5), dict(type='PhotoMetricDistortion'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_semantic_seg']), + dict(type='Pad', size=crop_size), + dict(type='PackSegInputs') ] test_pipeline = [ dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(2048, 640), - # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) + dict(type='Resize', scale=(2048, 640), keep_ratio=True), + dict(type='PackSegInputs') ] -data = dict( - train=dict(pipeline=train_pipeline), - val=dict(pipeline=test_pipeline), - test=dict(pipeline=test_pipeline)) +train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) +val_dataloader = dict(dataset=dict(pipeline=test_pipeline)) +test_dataloader = val_dataloader # In K-Net implementation we use batch size 2 per GPU as default -data = dict(samples_per_gpu=2, workers_per_gpu=2) +train_dataloader = dict(batch_size=2, num_workers=2) +val_dataloader = dict(batch_size=2, num_workers=2) +test_dataloader = val_dataloader diff --git a/configs/knet/knet_s3_upernet_swin-t_8x2_512x512_adamw_80k_ade20k.py b/configs/knet/knet_s3_upernet_swin-t_8x2_512x512_adamw_80k_ade20k.py index 0b29b2b8c..0d3294c89 100644 --- a/configs/knet/knet_s3_upernet_swin-t_8x2_512x512_adamw_80k_ade20k.py +++ b/configs/knet/knet_s3_upernet_swin-t_8x2_512x512_adamw_80k_ade20k.py @@ -54,4 +54,6 @@ lr_config = dict( step=[60000, 72000], by_epoch=False) # In K-Net implementation we use batch size 2 per GPU as default -data = dict(samples_per_gpu=2, workers_per_gpu=2) +train_dataloader = dict(batch_size=2, num_workers=2) +val_dataloader = dict(batch_size=2, num_workers=2) +test_dataloader = val_dataloader diff --git a/configs/mae/upernet_mae-base_fp16_512x512_160k_ade20k_ms.py b/configs/mae/upernet_mae-base_fp16_512x512_160k_ade20k_ms.py index 85b3be303..4bfd7aa3e 100644 --- a/configs/mae/upernet_mae-base_fp16_512x512_160k_ade20k_ms.py +++ b/configs/mae/upernet_mae-base_fp16_512x512_160k_ade20k_ms.py @@ -5,20 +5,12 @@ img_norm_cfg = dict( test_pipeline = [ dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(2048, 512), - img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], - flip=True, - transforms=[ - dict(type='Resize', keep_ratio=True, min_size=512), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) + # TODO: Refactor 'MultiScaleFlipAug' which supports + # `min_size` feature in `Resize` class + # img_ratios is [0.5, 0.75, 1.0, 1.25, 1.5, 1.75] + # original image scale is (2048, 512) + dict(type='Resize', scale=(2048, 512), keep_ratio=True), + dict(type='PackSegInputs') ] -data = dict( - val=dict(pipeline=test_pipeline), - test=dict(pipeline=test_pipeline), - samples_per_gpu=2) +val_dataloader = dict(batch_size=2, dataset=dict(pipeline=test_pipeline)) +test_dataloader = val_dataloader diff --git a/configs/mae/upernet_mae-base_fp16_8x2_512x512_160k_ade20k.py b/configs/mae/upernet_mae-base_fp16_8x2_512x512_160k_ade20k.py index cb236cc04..0bfe224c5 100644 --- a/configs/mae/upernet_mae-base_fp16_8x2_512x512_160k_ade20k.py +++ b/configs/mae/upernet_mae-base_fp16_8x2_512x512_160k_ade20k.py @@ -45,4 +45,6 @@ lr_config = dict( fp16 = dict(loss_scale='dynamic') # By default, models are trained on 8 GPUs with 2 images per GPU -data = dict(samples_per_gpu=2) +train_dataloader = dict(batch_size=2) +val_dataloader = dict(batch_size=2) +test_dataloader = val_dataloader diff --git a/configs/mobilenet_v3/lraspp_m-v3-d8_512x1024_320k_cityscapes.py b/configs/mobilenet_v3/lraspp_m-v3-d8_512x1024_320k_cityscapes.py index e59a78b48..3da5c24af 100644 --- a/configs/mobilenet_v3/lraspp_m-v3-d8_512x1024_320k_cityscapes.py +++ b/configs/mobilenet_v3/lraspp_m-v3-d8_512x1024_320k_cityscapes.py @@ -6,6 +6,8 @@ _base_ = [ model = dict(pretrained='open-mmlab://contrib/mobilenet_v3_large') # Re-config the data sampler. -data = dict(samples_per_gpu=4, workers_per_gpu=4) +train_dataloader = dict(batch_size=4, num_workers=4) +val_dataloader = dict(batch_size=4, num_workers=4) +test_dataloader = val_dataloader runner = dict(type='IterBasedRunner', max_iters=320000) diff --git a/configs/mobilenet_v3/lraspp_m-v3-d8_scratch_512x1024_320k_cityscapes.py b/configs/mobilenet_v3/lraspp_m-v3-d8_scratch_512x1024_320k_cityscapes.py index a3c543514..0a9ce41c7 100644 --- a/configs/mobilenet_v3/lraspp_m-v3-d8_scratch_512x1024_320k_cityscapes.py +++ b/configs/mobilenet_v3/lraspp_m-v3-d8_scratch_512x1024_320k_cityscapes.py @@ -4,6 +4,8 @@ _base_ = [ ] # Re-config the data sampler. -data = dict(samples_per_gpu=4, workers_per_gpu=4) +train_dataloader = dict(batch_size=4, num_workers=4) +val_dataloader = dict(batch_size=4, num_workers=4) +test_dataloader = val_dataloader runner = dict(type='IterBasedRunner', max_iters=320000) diff --git a/configs/pspnet/pspnet_r50-d8_512x1024_40k_dark.py b/configs/pspnet/pspnet_r50-d8_512x1024_40k_dark.py index 9abb5113c..98e2fc856 100644 --- a/configs/pspnet/pspnet_r50-d8_512x1024_40k_dark.py +++ b/configs/pspnet/pspnet_r50-d8_512x1024_40k_dark.py @@ -6,24 +6,14 @@ img_norm_cfg = dict( mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) test_pipeline = [ dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(1920, 1080), - # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) + dict(type='Resize', scale=(1920, 1080), keep_ratio=True), + dict(type='PackSegInputs') ] - -data = dict( - test=dict( +test_dataloader = dict( + dataset=dict( type='DarkZurichDataset', data_root='data/dark_zurich/', - img_dir='rgb_anon/val/night/GOPR0356', - ann_dir='gt/val/night/GOPR0356', + data_prefix=dict( + img_path='rgb_anon/val/night/GOPR0356', + seg_map_path='gt/val/night/GOPR0356'), pipeline=test_pipeline)) diff --git a/configs/pspnet/pspnet_r50-d8_512x1024_40k_night_driving.py b/configs/pspnet/pspnet_r50-d8_512x1024_40k_night_driving.py index 195aeea5e..447b64bc7 100644 --- a/configs/pspnet/pspnet_r50-d8_512x1024_40k_night_driving.py +++ b/configs/pspnet/pspnet_r50-d8_512x1024_40k_night_driving.py @@ -7,23 +7,14 @@ img_norm_cfg = dict( mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) test_pipeline = [ dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(1920, 1080), - # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) + dict(type='Resize', scale=(1920, 1080), keep_ratio=True), + dict(type='PackSegInputs') ] -data = dict( - test=dict( +test_dataloader = dict( + dataset=dict( type='NightDrivingDataset', data_root='data/NighttimeDrivingTest/', - img_dir='leftImg8bit/test/night', - ann_dir='gtCoarse_daytime_trainvaltest/test/night', + data_prefix=dict( + img_path='leftImg8bit/test/night', + seg_map_path='gtCoarse_daytime_trainvaltest/test/night'), pipeline=test_pipeline)) diff --git a/configs/pspnet/pspnet_r50-d8_512x1024_80k_dark.py b/configs/pspnet/pspnet_r50-d8_512x1024_80k_dark.py index 2f16171ac..59552de5a 100644 --- a/configs/pspnet/pspnet_r50-d8_512x1024_80k_dark.py +++ b/configs/pspnet/pspnet_r50-d8_512x1024_80k_dark.py @@ -7,24 +7,14 @@ img_norm_cfg = dict( mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) test_pipeline = [ dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(1920, 1080), - # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) + dict(type='Resize', scale=(1920, 1080), keep_ratio=True), + dict(type='PackSegInputs') ] - -data = dict( - test=dict( +test_dataloader = dict( + dataset=dict( type='DarkZurichDataset', data_root='data/dark_zurich/', - img_dir='rgb_anon/val/night/GOPR0356', - ann_dir='gt/val/night/GOPR0356', + data_prefix=dict( + img_path='rgb_anon/val/night/GOPR0356', + seg_map_path='gt/val/night/GOPR0356'), pipeline=test_pipeline)) diff --git a/configs/pspnet/pspnet_r50-d8_512x1024_80k_night_driving.py b/configs/pspnet/pspnet_r50-d8_512x1024_80k_night_driving.py index ecc5d99d7..80d953450 100644 --- a/configs/pspnet/pspnet_r50-d8_512x1024_80k_night_driving.py +++ b/configs/pspnet/pspnet_r50-d8_512x1024_80k_night_driving.py @@ -7,23 +7,14 @@ img_norm_cfg = dict( mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) test_pipeline = [ dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(1920, 1080), - # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) + dict(type='Resize', scale=(1920, 1080), keep_ratio=True), + dict(type='PackSegInputs') ] -data = dict( - test=dict( +test_dataloader = dict( + dataset=dict( type='NightDrivingDataset', data_root='data/NighttimeDrivingTest/', - img_dir='leftImg8bit/test/night', - ann_dir='gtCoarse_daytime_trainvaltest/test/night', + data_prefix=dict( + img_path='leftImg8bit/test/night', + seg_map_path='gtCoarse_daytime_trainvaltest/test/night'), pipeline=test_pipeline)) diff --git a/configs/segformer/segformer_mit-b0_512x512_160k_ade20k.py b/configs/segformer/segformer_mit-b0_512x512_160k_ade20k.py index 03065a794..d0551f115 100644 --- a/configs/segformer/segformer_mit-b0_512x512_160k_ade20k.py +++ b/configs/segformer/segformer_mit-b0_512x512_160k_ade20k.py @@ -29,5 +29,6 @@ lr_config = dict( power=1.0, min_lr=0.0, by_epoch=False) - -data = dict(samples_per_gpu=2, workers_per_gpu=2) +train_dataloader = dict(batch_size=2, num_workers=2) +val_dataloader = dict(batch_size=2, num_workers=2) +test_dataloader = val_dataloader diff --git a/configs/segformer/segformer_mit-b0_8x1_1024x1024_160k_cityscapes.py b/configs/segformer/segformer_mit-b0_8x1_1024x1024_160k_cityscapes.py index 644450053..3b87b2e5a 100644 --- a/configs/segformer/segformer_mit-b0_8x1_1024x1024_160k_cityscapes.py +++ b/configs/segformer/segformer_mit-b0_8x1_1024x1024_160k_cityscapes.py @@ -32,5 +32,6 @@ lr_config = dict( power=1.0, min_lr=0.0, by_epoch=False) - -data = dict(samples_per_gpu=1, workers_per_gpu=1) +train_dataloader = dict(batch_size=1, num_workers=1) +val_dataloader = dict(batch_size=1, num_workers=1) +test_dataloader = val_dataloader diff --git a/configs/segformer/segformer_mit-b5_640x640_160k_ade20k.py b/configs/segformer/segformer_mit-b5_640x640_160k_ade20k.py index d21774c4d..935d4667f 100644 --- a/configs/segformer/segformer_mit-b5_640x640_160k_ade20k.py +++ b/configs/segformer/segformer_mit-b5_640x640_160k_ade20k.py @@ -7,34 +7,21 @@ crop_size = (640, 640) train_pipeline = [ dict(type='LoadImageFromFile'), dict(type='LoadAnnotations', reduce_zero_label=True), - dict(type='Resize', img_scale=(2048, 640), ratio_range=(0.5, 2.0)), + dict(type='RandomResize', scale=(2048, 640), ratio_range=(0.5, 2.0)), dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), dict(type='RandomFlip', prob=0.5), dict(type='PhotoMetricDistortion'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_semantic_seg']), + dict(type='Pad', size=crop_size), + dict(type='PackSegInputs') ] test_pipeline = [ dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(2048, 640), - # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) + dict(type='Resize', scale=(2048, 640), keep_ratio=True), + dict(type='PackSegInputs') ] -data = dict( - train=dict(pipeline=train_pipeline), - val=dict(pipeline=test_pipeline), - test=dict(pipeline=test_pipeline)) +train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) +val_dataloader = dict(batch_size=1, dataset=dict(pipeline=test_pipeline)) +test_dataloader = val_dataloader # model settings model = dict( diff --git a/configs/segmenter/segmenter_vit-b_mask_8x1_512x512_160k_ade20k.py b/configs/segmenter/segmenter_vit-b_mask_8x1_512x512_160k_ade20k.py index 766a99fbf..8d417ae85 100644 --- a/configs/segmenter/segmenter_vit-b_mask_8x1_512x512_160k_ade20k.py +++ b/configs/segmenter/segmenter_vit-b_mask_8x1_512x512_160k_ade20k.py @@ -11,33 +11,21 @@ crop_size = (512, 512) train_pipeline = [ dict(type='LoadImageFromFile'), dict(type='LoadAnnotations', reduce_zero_label=True), - dict(type='Resize', img_scale=(2048, 512), ratio_range=(0.5, 2.0)), + dict(type='RandomResize', scale=(2048, 512), ratio_range=(0.5, 2.0)), dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), dict(type='RandomFlip', prob=0.5), dict(type='PhotoMetricDistortion'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_semantic_seg']) + dict(type='Pad', size=crop_size), + dict(type='PackSegInputs') ] test_pipeline = [ dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(2048, 512), - # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']) - ]) + dict(type='Resize', scale=(2048, 512), keep_ratio=True), + dict(type='PackSegInputs') ] -data = dict( +train_dataloader = dict( # num_gpus: 8 -> batch_size: 8 - samples_per_gpu=1, - train=dict(pipeline=train_pipeline), - val=dict(pipeline=test_pipeline), - test=dict(pipeline=test_pipeline)) + batch_size=1, + dataset=dict(pipeline=train_pipeline)) +val_dataloader = dict(batch_size=1, dataset=dict(pipeline=test_pipeline)) +test_dataloader = val_dataloader diff --git a/configs/segmenter/segmenter_vit-l_mask_8x1_512x512_160k_ade20k.py b/configs/segmenter/segmenter_vit-l_mask_8x1_512x512_160k_ade20k.py index 718657093..55c34b93f 100644 --- a/configs/segmenter/segmenter_vit-l_mask_8x1_512x512_160k_ade20k.py +++ b/configs/segmenter/segmenter_vit-l_mask_8x1_512x512_160k_ade20k.py @@ -29,33 +29,21 @@ crop_size = (640, 640) train_pipeline = [ dict(type='LoadImageFromFile'), dict(type='LoadAnnotations', reduce_zero_label=True), - dict(type='Resize', img_scale=(2048, 640), ratio_range=(0.5, 2.0)), + dict(type='RandomResize', scale=(2048, 640), ratio_range=(0.5, 2.0)), dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), dict(type='RandomFlip', prob=0.5), dict(type='PhotoMetricDistortion'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_semantic_seg']) + dict(type='Pad', size=crop_size), + dict(type='PackSegInputs') ] test_pipeline = [ dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(2048, 640), - # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']) - ]) + dict(type='Resize', scale=(2048, 640), keep_ratio=True), + dict(type='PackSegInputs') ] -data = dict( +train_dataloader = dict( # num_gpus: 8 -> batch_size: 8 - samples_per_gpu=1, - train=dict(pipeline=train_pipeline), - val=dict(pipeline=test_pipeline), - test=dict(pipeline=test_pipeline)) + batch_size=1, + dataset=dict(pipeline=train_pipeline)) +val_dataloader = dict(batch_size=1, dataset=dict(pipeline=test_pipeline)) +test_dataloader = val_dataloader diff --git a/configs/segmenter/segmenter_vit-s_mask_8x1_512x512_160k_ade20k.py b/configs/segmenter/segmenter_vit-s_mask_8x1_512x512_160k_ade20k.py index 7e0eeb1be..2a448f59e 100644 --- a/configs/segmenter/segmenter_vit-s_mask_8x1_512x512_160k_ade20k.py +++ b/configs/segmenter/segmenter_vit-s_mask_8x1_512x512_160k_ade20k.py @@ -34,33 +34,21 @@ crop_size = (512, 512) train_pipeline = [ dict(type='LoadImageFromFile'), dict(type='LoadAnnotations', reduce_zero_label=True), - dict(type='Resize', img_scale=(2048, 512), ratio_range=(0.5, 2.0)), + dict(type='RandomResize', scale=(2048, 512), ratio_range=(0.5, 2.0)), dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), dict(type='RandomFlip', prob=0.5), dict(type='PhotoMetricDistortion'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_semantic_seg']) + dict(type='Pad', size=crop_size), + dict(type='PackSegInputs') ] test_pipeline = [ dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(2048, 512), - # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']) - ]) + dict(type='Resize', scale=(2048, 512), keep_ratio=True), + dict(type='PackSegInputs') ] -data = dict( +train_dataloader = dict( # num_gpus: 8 -> batch_size: 8 - samples_per_gpu=1, - train=dict(pipeline=train_pipeline), - val=dict(pipeline=test_pipeline), - test=dict(pipeline=test_pipeline)) + batch_size=1, + dataset=dict(pipeline=train_pipeline)) +val_dataloader = dict(batch_size=1, dataset=dict(pipeline=test_pipeline)) +test_dataloader = val_dataloader diff --git a/configs/segmenter/segmenter_vit-t_mask_8x1_512x512_160k_ade20k.py b/configs/segmenter/segmenter_vit-t_mask_8x1_512x512_160k_ade20k.py index ec0107d55..79de7d541 100644 --- a/configs/segmenter/segmenter_vit-t_mask_8x1_512x512_160k_ade20k.py +++ b/configs/segmenter/segmenter_vit-t_mask_8x1_512x512_160k_ade20k.py @@ -24,33 +24,21 @@ crop_size = (512, 512) train_pipeline = [ dict(type='LoadImageFromFile'), dict(type='LoadAnnotations', reduce_zero_label=True), - dict(type='Resize', img_scale=(2048, 512), ratio_range=(0.5, 2.0)), + dict(type='RandomResize', scale=(2048, 512), ratio_range=(0.5, 2.0)), dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), dict(type='RandomFlip', prob=0.5), dict(type='PhotoMetricDistortion'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_semantic_seg']) + dict(type='Pad', size=crop_size), + dict(type='PackSegInputs') ] test_pipeline = [ dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(2048, 512), - # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']) - ]) + dict(type='Resize', scale=(2048, 512), keep_ratio=True), + dict(type='PackSegInputs') ] -data = dict( +train_dataloader = dict( # num_gpus: 8 -> batch_size: 8 - samples_per_gpu=1, - train=dict(pipeline=train_pipeline), - val=dict(pipeline=test_pipeline), - test=dict(pipeline=test_pipeline)) + batch_size=1, + dataset=dict(pipeline=train_pipeline)) +val_dataloader = dict(batch_size=1, dataset=dict(pipeline=test_pipeline)) +test_dataloader = val_dataloader diff --git a/configs/setr/setr_mla_512x512_160k_b16_ade20k.py b/configs/setr/setr_mla_512x512_160k_b16_ade20k.py index c8418c634..d2c7ede66 100644 --- a/configs/setr/setr_mla_512x512_160k_b16_ade20k.py +++ b/configs/setr/setr_mla_512x512_160k_b16_ade20k.py @@ -1,4 +1,6 @@ _base_ = ['./setr_mla_512x512_160k_b8_ade20k.py'] # num_gpus: 8 -> batch_size: 16 -data = dict(samples_per_gpu=2) +train_dataloader = dict(batch_size=2) +val_dataloader = dict(batch_size=2) +test_dataloader = val_dataloader diff --git a/configs/setr/setr_mla_512x512_160k_b8_ade20k.py b/configs/setr/setr_mla_512x512_160k_b8_ade20k.py index e1a07ce5a..c0339eea9 100644 --- a/configs/setr/setr_mla_512x512_160k_b8_ade20k.py +++ b/configs/setr/setr_mla_512x512_160k_b8_ade20k.py @@ -82,4 +82,6 @@ optimizer = dict( paramwise_cfg=dict(custom_keys={'head': dict(lr_mult=10.)})) # num_gpus: 8 -> batch_size: 8 -data = dict(samples_per_gpu=1) +train_dataloader = dict(batch_size=1) +val_dataloader = dict(batch_size=1) +test_dataloader = val_dataloader diff --git a/configs/setr/setr_naive_512x512_160k_b16_ade20k.py b/configs/setr/setr_naive_512x512_160k_b16_ade20k.py index 8ad8c9fe2..f3346ee3c 100644 --- a/configs/setr/setr_naive_512x512_160k_b16_ade20k.py +++ b/configs/setr/setr_naive_512x512_160k_b16_ade20k.py @@ -64,4 +64,6 @@ optimizer = dict( paramwise_cfg=dict(custom_keys={'head': dict(lr_mult=10.)})) # num_gpus: 8 -> batch_size: 16 -data = dict(samples_per_gpu=2) +train_dataloader = dict(batch_size=2) +val_dataloader = dict(batch_size=2) +test_dataloader = val_dataloader diff --git a/configs/setr/setr_pup_512x512_160k_b16_ade20k.py b/configs/setr/setr_pup_512x512_160k_b16_ade20k.py index 83997a2bf..a5cb19d1e 100644 --- a/configs/setr/setr_pup_512x512_160k_b16_ade20k.py +++ b/configs/setr/setr_pup_512x512_160k_b16_ade20k.py @@ -64,4 +64,6 @@ optimizer = dict( paramwise_cfg=dict(custom_keys={'head': dict(lr_mult=10.)})) # num_gpus: 8 -> batch_size: 16 -data = dict(samples_per_gpu=2) +train_dataloader = dict(batch_size=2) +val_dataloader = dict(batch_size=2) +test_dataloader = val_dataloader diff --git a/configs/setr/setr_vit-large_mla_8x1_768x768_80k_cityscapes.py b/configs/setr/setr_vit-large_mla_8x1_768x768_80k_cityscapes.py index 4237cd5aa..36bcced14 100644 --- a/configs/setr/setr_vit-large_mla_8x1_768x768_80k_cityscapes.py +++ b/configs/setr/setr_vit-large_mla_8x1_768x768_80k_cityscapes.py @@ -14,4 +14,6 @@ optimizer = dict( lr=0.002, weight_decay=0.0, paramwise_cfg=dict(custom_keys={'head': dict(lr_mult=10.)})) -data = dict(samples_per_gpu=1) +train_dataloader = dict(batch_size=1) +val_dataloader = dict(batch_size=1) +test_dataloader = val_dataloader diff --git a/configs/setr/setr_vit-large_naive_8x1_768x768_80k_cityscapes.py b/configs/setr/setr_vit-large_naive_8x1_768x768_80k_cityscapes.py index 0c6621ef1..ffb51b54a 100644 --- a/configs/setr/setr_vit-large_naive_8x1_768x768_80k_cityscapes.py +++ b/configs/setr/setr_vit-large_naive_8x1_768x768_80k_cityscapes.py @@ -15,4 +15,6 @@ optimizer = dict( weight_decay=0.0, paramwise_cfg=dict(custom_keys={'head': dict(lr_mult=10.)})) -data = dict(samples_per_gpu=1) +train_dataloader = dict(batch_size=1) +val_dataloader = dict(batch_size=1) +test_dataloader = val_dataloader diff --git a/configs/setr/setr_vit-large_pup_8x1_768x768_80k_cityscapes.py b/configs/setr/setr_vit-large_pup_8x1_768x768_80k_cityscapes.py index e108988a0..c2f40d9b9 100644 --- a/configs/setr/setr_vit-large_pup_8x1_768x768_80k_cityscapes.py +++ b/configs/setr/setr_vit-large_pup_8x1_768x768_80k_cityscapes.py @@ -61,4 +61,6 @@ optimizer = dict( weight_decay=0.0, paramwise_cfg=dict(custom_keys={'head': dict(lr_mult=10.)})) -data = dict(samples_per_gpu=1) +train_dataloader = dict(batch_size=1) +val_dataloader = dict(batch_size=1) +test_dataloader = val_dataloader diff --git a/configs/stdc/stdc1_512x1024_80k_cityscapes.py b/configs/stdc/stdc1_512x1024_80k_cityscapes.py index 849e771e4..021b6a5a2 100644 --- a/configs/stdc/stdc1_512x1024_80k_cityscapes.py +++ b/configs/stdc/stdc1_512x1024_80k_cityscapes.py @@ -3,7 +3,6 @@ _base_ = [ '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' ] lr_config = dict(warmup='linear', warmup_iters=1000) -data = dict( - samples_per_gpu=12, - workers_per_gpu=4, -) +train_dataloader = dict(batch_size=12, num_workers=4) +val_dataloader = dict(batch_size=12, num_workers=4) +test_dataloader = val_dataloader diff --git a/configs/swin/upernet_swin_tiny_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K.py b/configs/swin/upernet_swin_tiny_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K.py index 6d8c413b6..d966e113d 100644 --- a/configs/swin/upernet_swin_tiny_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K.py +++ b/configs/swin/upernet_swin_tiny_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K.py @@ -42,4 +42,6 @@ lr_config = dict( by_epoch=False) # By default, models are trained on 8 GPUs with 2 images per GPU -data = dict(samples_per_gpu=2) +train_dataloader = dict(batch_size=2) +val_dataloader = dict(batch_size=2) +test_dataloader = val_dataloader diff --git a/configs/twins/twins_pcpvt-b_uperhead_8x2_512x512_160k_ade20k.py b/configs/twins/twins_pcpvt-b_uperhead_8x2_512x512_160k_ade20k.py index 8c299d32a..c13378aca 100644 --- a/configs/twins/twins_pcpvt-b_uperhead_8x2_512x512_160k_ade20k.py +++ b/configs/twins/twins_pcpvt-b_uperhead_8x2_512x512_160k_ade20k.py @@ -8,4 +8,6 @@ model = dict( depths=[3, 4, 18, 3], drop_path_rate=0.3)) -data = dict(samples_per_gpu=2, workers_per_gpu=2) +train_dataloader = dict(batch_size=2, num_workers=2) +val_dataloader = dict(batch_size=2, num_workers=2) +test_dataloader = val_dataloader diff --git a/configs/twins/twins_pcpvt-l_uperhead_8x2_512x512_160k_ade20k.py b/configs/twins/twins_pcpvt-l_uperhead_8x2_512x512_160k_ade20k.py index f6f7d2771..29c440008 100644 --- a/configs/twins/twins_pcpvt-l_uperhead_8x2_512x512_160k_ade20k.py +++ b/configs/twins/twins_pcpvt-l_uperhead_8x2_512x512_160k_ade20k.py @@ -8,4 +8,6 @@ model = dict( depths=[3, 8, 27, 3], drop_path_rate=0.3)) -data = dict(samples_per_gpu=2, workers_per_gpu=2) +train_dataloader = dict(batch_size=2, num_workers=2) +val_dataloader = dict(batch_size=2, num_workers=2) +test_dataloader = val_dataloader diff --git a/configs/twins/twins_svt-s_uperhead_8x2_512x512_160k_ade20k.py b/configs/twins/twins_svt-s_uperhead_8x2_512x512_160k_ade20k.py index 44bf60b35..8d2848262 100644 --- a/configs/twins/twins_svt-s_uperhead_8x2_512x512_160k_ade20k.py +++ b/configs/twins/twins_svt-s_uperhead_8x2_512x512_160k_ade20k.py @@ -40,4 +40,6 @@ lr_config = dict( min_lr=0.0, by_epoch=False) -data = dict(samples_per_gpu=2, workers_per_gpu=2) +train_dataloader = dict(batch_size=2, num_workers=2) +val_dataloader = dict(batch_size=2, num_workers=2) +test_dataloader = val_dataloader diff --git a/configs/unet/fcn_unet_s5-d16_4x4_512x1024_160k_cityscapes.py b/configs/unet/fcn_unet_s5-d16_4x4_512x1024_160k_cityscapes.py index a2f7dbe3f..00cbf3ce1 100644 --- a/configs/unet/fcn_unet_s5-d16_4x4_512x1024_160k_cityscapes.py +++ b/configs/unet/fcn_unet_s5-d16_4x4_512x1024_160k_cityscapes.py @@ -9,8 +9,6 @@ model = dict( # model training and testing settings train_cfg=dict(), test_cfg=dict(mode='whole')) - -data = dict( - samples_per_gpu=4, - workers_per_gpu=4, -) +train_dataloader = dict(batch_size=4, num_workers=4) +val_dataloader = dict(batch_size=4, num_workers=4) +test_dataloader = val_dataloader diff --git a/configs/vit/upernet_vit-b16_ln_mln_512x512_160k_ade20k.py b/configs/vit/upernet_vit-b16_ln_mln_512x512_160k_ade20k.py index 51eeda012..3acb9abb9 100644 --- a/configs/vit/upernet_vit-b16_ln_mln_512x512_160k_ade20k.py +++ b/configs/vit/upernet_vit-b16_ln_mln_512x512_160k_ade20k.py @@ -36,4 +36,6 @@ lr_config = dict( by_epoch=False) # By default, models are trained on 8 GPUs with 2 images per GPU -data = dict(samples_per_gpu=2) +train_dataloader = dict(batch_size=2) +val_dataloader = dict(batch_size=2) +test_dataloader = val_dataloader diff --git a/configs/vit/upernet_vit-b16_mln_512x512_160k_ade20k.py b/configs/vit/upernet_vit-b16_mln_512x512_160k_ade20k.py index 5b148d725..821eb0619 100644 --- a/configs/vit/upernet_vit-b16_mln_512x512_160k_ade20k.py +++ b/configs/vit/upernet_vit-b16_mln_512x512_160k_ade20k.py @@ -35,4 +35,6 @@ lr_config = dict( by_epoch=False) # By default, models are trained on 8 GPUs with 2 images per GPU -data = dict(samples_per_gpu=2) +train_dataloader = dict(batch_size=2) +val_dataloader = dict(batch_size=2) +test_dataloader = val_dataloader diff --git a/configs/vit/upernet_vit-b16_mln_512x512_80k_ade20k.py b/configs/vit/upernet_vit-b16_mln_512x512_80k_ade20k.py index f893500d3..d83d8ac75 100644 --- a/configs/vit/upernet_vit-b16_mln_512x512_80k_ade20k.py +++ b/configs/vit/upernet_vit-b16_mln_512x512_80k_ade20k.py @@ -35,4 +35,6 @@ lr_config = dict( by_epoch=False) # By default, models are trained on 8 GPUs with 2 images per GPU -data = dict(samples_per_gpu=2) +train_dataloader = dict(batch_size=2) +val_dataloader = dict(batch_size=2) +test_dataloader = val_dataloader diff --git a/mmseg/datasets/pipelines/formatting.py b/mmseg/datasets/pipelines/formatting.py index ce8a45886..bbc04ba74 100644 --- a/mmseg/datasets/pipelines/formatting.py +++ b/mmseg/datasets/pipelines/formatting.py @@ -16,9 +16,7 @@ class PackSegInputs(BaseTransform): The ``img_meta`` item is always populated. The contents of the ``img_meta`` dictionary depends on ``meta_keys``. By default this includes: - - ``filename``: filename of the image - - - ``ori_filename``: original filename of the image file + - ``img_path``: filename of the image - ``ori_shape``: original shape of the image as a tuple (h, w, c) @@ -34,20 +32,17 @@ class PackSegInputs(BaseTransform): - ``flip_direction``: the flipping direction - - ``img_norm_cfg``: config of image pixel normalization - Args: meta_keys (Sequence[str], optional): Meta keys to be packed from ``SegDataSample`` and collected in ``data[img_metas]``. - Default: ``('filename', 'ori_filename', 'ori_shape', + Default: ``('img_path', 'ori_shape', 'img_shape', 'pad_shape', 'scale_factor', 'flip', - 'flip_direction', 'img_norm_cfg')`` + 'flip_direction')`` """ def __init__(self, - meta_keys=('filename', 'ori_filename', 'ori_shape', - 'img_shape', 'pad_shape', 'scale_factor', 'flip', - 'flip_direction', 'img_norm_cfg')): + meta_keys=('img_path', 'ori_shape', 'img_shape', 'pad_shape', + 'scale_factor', 'flip', 'flip_direction')): self.meta_keys = meta_keys def transform(self, results: dict) -> dict: diff --git a/tests/test_config.py b/tests/test_config.py new file mode 100644 index 000000000..41d1f9ca6 --- /dev/null +++ b/tests/test_config.py @@ -0,0 +1,162 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import glob +import os +from os.path import dirname, exists, isdir, join, relpath + +from mmcv import Config +from torch import nn + +from mmseg.models import build_segmentor + + +def _get_config_directory(): + """Find the predefined segmentor config directory.""" + try: + # Assume we are running in the source mmsegmentation repo + repo_dpath = dirname(dirname(__file__)) + except NameError: + # For IPython development when this __file__ is not defined + import mmseg + repo_dpath = dirname(dirname(mmseg.__file__)) + config_dpath = join(repo_dpath, 'configs') + if not exists(config_dpath): + raise Exception('Cannot find config path') + return config_dpath + + +def test_config_build_segmentor(): + """Test that all segmentation models defined in the configs can be + initialized.""" + config_dpath = _get_config_directory() + print('Found config_dpath = {!r}'.format(config_dpath)) + + config_fpaths = [] + # one config each sub folder + for sub_folder in os.listdir(config_dpath): + if isdir(sub_folder): + config_fpaths.append( + list(glob.glob(join(config_dpath, sub_folder, '*.py')))[0]) + config_fpaths = [p for p in config_fpaths if p.find('_base_') == -1] + config_names = [relpath(p, config_dpath) for p in config_fpaths] + + print('Using {} config files'.format(len(config_names))) + + for config_fname in config_names: + config_fpath = join(config_dpath, config_fname) + config_mod = Config.fromfile(config_fpath) + + config_mod.model + print('Building segmentor, config_fpath = {!r}'.format(config_fpath)) + + # Remove pretrained keys to allow for testing in an offline environment + if 'pretrained' in config_mod.model: + config_mod.model['pretrained'] = None + + print('building {}'.format(config_fname)) + segmentor = build_segmentor(config_mod.model) + assert segmentor is not None + + head_config = config_mod.model['decode_head'] + _check_decode_head(head_config, segmentor.decode_head) + + +def test_config_data_pipeline(): + """Test whether the data pipeline is valid and can process corner cases. + + CommandLine: + xdoctest -m tests/test_config.py test_config_build_data_pipeline + """ + import numpy as np + from mmcv import Config + + from mmseg.datasets.pipelines import Compose + + config_dpath = _get_config_directory() + print('Found config_dpath = {!r}'.format(config_dpath)) + + import glob + config_fpaths = list(glob.glob(join(config_dpath, '**', '*.py'))) + config_fpaths = [p for p in config_fpaths if p.find('_base_') == -1] + config_names = [relpath(p, config_dpath) for p in config_fpaths] + + print('Using {} config files'.format(len(config_names))) + + for config_fname in config_names: + config_fpath = join(config_dpath, config_fname) + print( + 'Building data pipeline, config_fpath = {!r}'.format(config_fpath)) + config_mod = Config.fromfile(config_fpath) + + # remove loading pipeline + load_img_pipeline = config_mod.train_pipeline.pop(0) + to_float32 = load_img_pipeline.get('to_float32', False) + config_mod.train_pipeline.pop(0) + config_mod.test_pipeline.pop(0) + + train_pipeline = Compose(config_mod.train_pipeline) + test_pipeline = Compose(config_mod.test_pipeline) + + img = np.random.randint(0, 255, size=(1024, 2048, 3), dtype=np.uint8) + if to_float32: + img = img.astype(np.float32) + seg = np.random.randint(0, 255, size=(1024, 2048, 1), dtype=np.uint8) + + results = dict( + filename='test_img.png', + ori_filename='test_img.png', + img=img, + img_shape=img.shape, + ori_shape=img.shape, + gt_seg_map=seg) + results['seg_fields'] = ['gt_seg_map'] + + print('Test training data pipeline: \n{!r}'.format(train_pipeline)) + output_results = train_pipeline(results) + assert output_results is not None + + results = dict( + filename='test_img.png', + ori_filename='test_img.png', + img=img, + img_shape=img.shape, + ori_shape=img.shape, + ) + print('Test testing data pipeline: \n{!r}'.format(test_pipeline)) + output_results = test_pipeline(results) + assert output_results is not None + + +def _check_decode_head(decode_head_cfg, decode_head): + if isinstance(decode_head_cfg, list): + assert isinstance(decode_head, nn.ModuleList) + assert len(decode_head_cfg) == len(decode_head) + num_heads = len(decode_head) + for i in range(num_heads): + _check_decode_head(decode_head_cfg[i], decode_head[i]) + return + # check consistency between head_config and roi_head + assert decode_head_cfg['type'] == decode_head.__class__.__name__ + + assert decode_head_cfg['type'] == decode_head.__class__.__name__ + + in_channels = decode_head_cfg.in_channels + input_transform = decode_head.input_transform + assert input_transform in ['resize_concat', 'multiple_select', None] + if input_transform is not None: + assert isinstance(in_channels, (list, tuple)) + assert isinstance(decode_head.in_index, (list, tuple)) + assert len(in_channels) == len(decode_head.in_index) + elif input_transform == 'resize_concat': + assert sum(in_channels) == decode_head.in_channels + else: + assert isinstance(in_channels, int) + assert in_channels == decode_head.in_channels + assert isinstance(decode_head.in_index, int) + + if decode_head_cfg['type'] == 'PointHead': + assert decode_head_cfg.channels+decode_head_cfg.num_classes == \ + decode_head.fc_seg.in_channels + assert decode_head.fc_seg.out_channels == decode_head_cfg.num_classes + else: + assert decode_head_cfg.channels == decode_head.conv_seg.in_channels + assert decode_head.conv_seg.out_channels == decode_head_cfg.num_classes diff --git a/tests/test_data/test_formatting.py b/tests/test_data/test_formatting.py index cd5a9b947..3d02e2a2a 100644 --- a/tests/test_data/test_formatting.py +++ b/tests/test_data/test_formatting.py @@ -22,8 +22,7 @@ class TestPackSegInputs(unittest.TestCase): img_path = osp.join(data_prefix, 'color.jpg') rng = np.random.RandomState(0) self.results = { - 'filename': img_path, - 'ori_filename': 'color.jpg', + 'img_path': img_path, 'ori_shape': (300, 400), 'pad_shape': (600, 800), 'img_shape': (600, 800), @@ -34,9 +33,8 @@ class TestPackSegInputs(unittest.TestCase): 'img': rng.rand(300, 400), 'gt_seg_map': rng.rand(300, 400), } - self.meta_keys = ('filename', 'ori_filename', 'ori_shape', 'img_shape', - 'pad_shape', 'scale_factor', 'flip', - 'flip_direction', 'img_norm_cfg') + self.meta_keys = ('img_path', 'ori_shape', 'img_shape', 'pad_shape', + 'scale_factor', 'flip', 'flip_direction') def test_transform(self): transform = PackSegInputs(meta_keys=self.meta_keys)