From a05c79e8066d0746bb1627611473fa205828300b Mon Sep 17 00:00:00 2001 From: Ma Zerun Date: Fri, 3 Mar 2023 15:01:11 +0800 Subject: [PATCH] [Refactor] Move transforms in mmselfsup to mmpretrain. (#1396) * [Refactor] Move transforms in mmselfsup to mmpretrain. * Update transform docs and configs. And register some mmcv transforms in mmpretrain. * Fix missing transform wrapper. * update selfsup transforms * Fix UT * Fix UT * update gaussianblur inconfigs --------- Co-authored-by: fangyixiao18 --- configs/_base_/datasets/cifar100_bs16.py | 4 +- configs/_base_/datasets/cifar10_bs16.py | 4 +- configs/_base_/datasets/cub_bs8_384.py | 4 +- configs/_base_/datasets/cub_bs8_448.py | 4 +- configs/_base_/datasets/imagenet21k_bs128.py | 4 +- .../_base_/datasets/imagenet_bs128_mbv3.py | 4 +- .../imagenet_bs128_poolformer_medium_224.py | 4 +- .../imagenet_bs128_poolformer_small_224.py | 4 +- .../datasets/imagenet_bs128_revvit_224.py | 4 +- .../_base_/datasets/imagenet_bs128_vig_224.py | 4 +- .../_base_/datasets/imagenet_bs16_eva_196.py | 4 +- .../_base_/datasets/imagenet_bs16_eva_336.py | 4 +- .../_base_/datasets/imagenet_bs16_eva_560.py | 4 +- .../datasets/imagenet_bs16_pil_bicubic_384.py | 4 +- .../_base_/datasets/imagenet_bs256_beitv2.py | 5 +- .../datasets/imagenet_bs256_davit_224.py | 4 +- .../datasets/imagenet_bs256_levit_224.py | 4 +- .../_base_/datasets/imagenet_bs256_rsb_a12.py | 4 +- .../_base_/datasets/imagenet_bs256_rsb_a3.py | 4 +- .../datasets/imagenet_bs256_simmim_192.py | 27 +- .../datasets/imagenet_bs256_swin_192.py | 4 +- configs/_base_/datasets/imagenet_bs32.py | 4 +- configs/_base_/datasets/imagenet_bs32_byol.py | 18 +- .../_base_/datasets/imagenet_bs32_mocov2.py | 13 +- .../datasets/imagenet_bs32_pil_bicubic.py | 4 +- .../datasets/imagenet_bs32_pil_resize.py | 4 +- .../_base_/datasets/imagenet_bs32_pillow.py | 57 --- .../_base_/datasets/imagenet_bs32_simclr.py | 10 +- configs/_base_/datasets/imagenet_bs512_mae.py | 6 +- .../_base_/datasets/imagenet_bs512_mocov3.py | 24 +- configs/_base_/datasets/imagenet_bs64.py | 4 +- .../_base_/datasets/imagenet_bs64_autoaug.py | 4 +- .../datasets/imagenet_bs64_convmixer_224.py | 4 +- .../datasets/imagenet_bs64_deit3_224.py | 4 +- .../datasets/imagenet_bs64_deit3_384.py | 4 +- .../datasets/imagenet_bs64_edgenext_256.py | 4 +- .../datasets/imagenet_bs64_mixer_224.py | 4 +- .../datasets/imagenet_bs64_pil_resize.py | 4 +- .../imagenet_bs64_pil_resize_autoaug.py | 4 +- .../_base_/datasets/imagenet_bs64_swin_224.py | 4 +- .../_base_/datasets/imagenet_bs64_swin_256.py | 4 +- .../_base_/datasets/imagenet_bs64_swin_384.py | 4 +- .../_base_/datasets/imagenet_bs64_t2t_224.py | 4 +- .../datasets/imagenet_bs8_pil_bicubic_320.py | 4 +- configs/_base_/datasets/inshop_bs32_448.py | 4 +- configs/_base_/datasets/voc_bs16.py | 4 +- .../resnet50_8xb32-linear-coslr-100e_in1k.py | 2 +- ...eit-base-p16_8xb256-amp-coslr-300e_in1k.py | 5 +- .../beit-base-p16_8xb128-coslr-100e_in1k.py | 4 +- .../beit-base-p16_8xb128-coslr-100e_in1k.py | 4 +- .../resnet50_8xb512-linear-coslr-90e_in1k.py | 2 +- .../beit-base-p16_8xb128-coslr-100e_in1k.py | 4 +- ...vit-base-p16_8xb256-amp-coslr-300e_in1k.py | 5 +- .../convnext-v2-huge_32xb32_in1k-512px.py | 4 +- configs/cspnet/cspdarknet50_8xb32_in1k.py | 4 +- configs/cspnet/cspresnet50_8xb32_in1k.py | 4 +- configs/cspnet/cspresnext50_8xb32_in1k.py | 4 +- .../csra/resnet101-csra_1xb16_voc07-448px.py | 4 +- .../resnet50_8xb32-linear-steplr-100e_in1k.py | 2 +- .../edgenext/edgenext-base_8xb256-usi_in1k.py | 2 +- .../edgenext-small_8xb256-usi_in1k.py | 2 +- .../efficientnet-b0_8xb32-01norm_in1k.py | 4 +- .../efficientnet-b0_8xb32_in1k.py | 4 +- .../efficientnet-b1_8xb32-01norm_in1k.py | 4 +- .../efficientnet-b1_8xb32_in1k.py | 4 +- .../efficientnet-b2_8xb32-01norm_in1k.py | 4 +- .../efficientnet-b2_8xb32_in1k.py | 4 +- .../efficientnet-b3_8xb32-01norm_in1k.py | 4 +- .../efficientnet-b3_8xb32_in1k.py | 4 +- .../efficientnet-b4_8xb32-01norm_in1k.py | 4 +- .../efficientnet-b4_8xb32_in1k.py | 4 +- .../efficientnet-b5_8xb32-01norm_in1k.py | 4 +- .../efficientnet-b5_8xb32_in1k.py | 4 +- .../efficientnet-b6_8xb32-01norm_in1k.py | 4 +- .../efficientnet-b6_8xb32_in1k.py | 4 +- .../efficientnet-b7_8xb32-01norm_in1k.py | 4 +- .../efficientnet-b7_8xb32_in1k.py | 4 +- .../efficientnet-b8_8xb32-01norm_in1k.py | 4 +- .../efficientnet-b8_8xb32_in1k.py | 4 +- .../efficientnet-em_8xb32-01norm_in1k.py | 4 +- .../efficientnet-es_8xb32-01norm_in1k.py | 4 +- .../efficientnet-l2_8xb32_in1k-475px.py | 4 +- .../efficientnet-l2_8xb8_in1k-800px.py | 4 +- .../efficientnetv2-b0_8xb32_in1k.py | 4 +- .../efficientnetv2-b1_8xb32_in1k.py | 4 +- .../efficientnetv2-b2_8xb32_in1k.py | 4 +- .../efficientnetv2-b3_8xb32_in1k.py | 4 +- .../efficientnetv2-l_8xb32_in1k-480px.py | 4 +- .../efficientnetv2-m_8xb32_in1k-480px.py | 4 +- .../efficientnetv2-s_8xb32_in1k-384px.py | 4 +- .../efficientnetv2-s_8xb32_in21k.py | 4 +- .../efficientnetv2-xl_8xb32_in1k-512px.py | 4 +- .../vit-base-p16_8xb128-coslr-100e_in1k.py | 4 +- ...base-p16_8xb2048-linear-coslr-100e_in1k.py | 2 +- .../inception_v3/inception-v3_8xb32_in1k.py | 4 +- configs/lenet/lenet5_mnist.py | 2 +- .../vit-base-p16_8xb128-coslr-100e_in1k.py | 4 +- ...-base-p16_8xb2048-linear-coslr-90e_in1k.py | 2 +- ...vit-huge-p14_32xb8-coslr-50e_in1k-448px.py | 4 +- .../vit-huge-p14_8xb128-coslr-50e_in1k.py | 4 +- .../vit-large-p16_8xb128-coslr-50e_in1k.py | 4 +- ...large-p16_8xb2048-linear-coslr-90e_in1k.py | 2 +- .../vit-base-p16_8xb256-coslr-100e_in1k.py | 4 +- ...vit-base-p16_8xb256-amp-coslr-300e_in1k.py | 10 +- .../vit-base-p16_8xb128-coslr-100e_in1k.py | 4 +- ...base-p16_8xb2048-linear-coslr-100e_in1k.py | 2 +- .../mixmim-base_8xb128-coslr-100e_in1k.py | 4 +- ...mim_mixmim-base_16xb128-coslr-300e_in1k.py | 6 +- .../mobilenet-v3-small-050_8xb128_in1k.py | 4 +- .../mobilenet-v3-small-075_8xb128_in1k.py | 4 +- configs/mobileone/mobileone-s1_8xb32_in1k.py | 2 +- configs/mobileone/mobileone-s2_8xb32_in1k.py | 2 +- configs/mobileone/mobileone-s3_8xb32_in1k.py | 2 +- configs/mobileone/mobileone-s4_8xb32_in1k.py | 2 +- .../mobilevit/mobilevit-small_8xb128_in1k.py | 2 +- .../mobilevit/mobilevit-xsmall_8xb128_in1k.py | 2 +- .../mobilevit-xxsmall_8xb128_in1k.py | 2 +- .../resnet50_8xb32-linear-steplr-100e_in1k.py | 2 +- .../resnet50_8xb128-linear-coslr-90e_in1k.py | 2 +- ...t-base-p16_8xb128-linear-coslr-90e_in1k.py | 2 +- ...-small-p16_8xb128-linear-coslr-90e_in1k.py | 2 +- ...it-base-p16_16xb256-amp-coslr-300e_in1k.py | 22 +- ...it-large-p16_64xb64-amp-coslr-300e_in1k.py | 22 +- ...t-small-p16_16xb256-amp-coslr-300e_in1k.py | 22 +- configs/regnet/regnetx-400mf_8xb128_in1k.py | 2 +- .../repmlp/repmlp-base_8xb64_in1k-256px.py | 4 +- configs/repmlp/repmlp-base_8xb64_in1k.py | 2 +- configs/repvgg/repvgg-B3_8xb32_in1k.py | 4 +- configs/resnest/resnest101_32xb64_in1k.py | 4 +- configs/resnest/resnest200_64xb32_in1k.py | 4 +- configs/resnest/resnest269_64xb32_in1k.py | 4 +- configs/resnest/resnest50_32xb64_in1k.py | 4 +- .../resnet50_8xb512-linear-coslr-90e_in1k.py | 2 +- .../swin-base-w7_8xb256-coslr-100e_in1k.py | 4 +- .../swin-large-w14_8xb256-coslr-100e_in1k.py | 4 +- .../resnet50_8xb512-linear-coslr-90e_in1k.py | 2 +- .../resnet50_8xb512-linear-coslr-90e_in1k.py | 2 +- ..._8xb32-mcrop-coslr-200e_in1k-224px-96px.py | 22 +- .../tinyvit-21m-distill_8xb256_in1k-384px.py | 2 +- .../tinyvit-21m-distill_8xb256_in1k-512px.py | 2 +- configs/tnt/tnt-s-p16_16xb64_in1k.py | 2 +- configs/van/van-base_8xb128_in1k.py | 4 +- configs/van/van-large_8xb128_in1k.py | 4 +- configs/van/van-small_8xb128_in1k.py | 4 +- configs/van/van-tiny_8xb128_in1k.py | 4 +- configs/vig/pvig-base_8xb128_in1k.py | 2 +- .../vit-base-p16_64xb64_in1k-384px.py | 4 +- .../vit-base-p32_64xb64_in1k-384px.py | 4 +- .../vit-large-p16_64xb64_in1k-384px.py | 4 +- .../vit-large-p32_64xb64_in1k-384px.py | 4 +- docs/en/advanced_guides/pipeline.md | 6 +- docs/en/api/data_process.rst | 88 ++--- docs/en/migration.md | 4 +- docs/en/user_guides/config.md | 10 +- docs/en/user_guides/finetune.md | 8 +- docs/zh_CN/migration.md | 4 +- docs/zh_CN/user_guides/config.md | 10 +- docs/zh_CN/user_guides/finetune.md | 8 +- mmpretrain/datasets/transforms/__init__.py | 29 +- .../datasets/transforms/auto_augment.py | 62 ++++ mmpretrain/datasets/transforms/formatting.py | 174 +++++---- mmpretrain/datasets/transforms/processing.py | 351 +++++++++++++++++- mmpretrain/datasets/transforms/wrappers.py | 97 +++++ mmpretrain/structures/__init__.py | 6 +- .../test_transforms/test_auto_augment.py | 57 +++ .../test_transforms/test_formatting.py | 42 ++- .../test_transforms/test_processing.py | 59 ++- .../test_transforms/test_wrappers.py | 43 +++ tests/test_models/test_retrievers.py | 4 +- 169 files changed, 1253 insertions(+), 559 deletions(-) delete mode 100644 configs/_base_/datasets/imagenet_bs32_pillow.py create mode 100644 mmpretrain/datasets/transforms/wrappers.py create mode 100644 tests/test_datasets/test_transforms/test_wrappers.py diff --git a/configs/_base_/datasets/cifar100_bs16.py b/configs/_base_/datasets/cifar100_bs16.py index 86ac33a4..3744ac6d 100644 --- a/configs/_base_/datasets/cifar100_bs16.py +++ b/configs/_base_/datasets/cifar100_bs16.py @@ -11,11 +11,11 @@ data_preprocessor = dict( train_pipeline = [ dict(type='RandomCrop', crop_size=32, padding=4), dict(type='RandomFlip', prob=0.5, direction='horizontal'), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] test_pipeline = [ - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] train_dataloader = dict( diff --git a/configs/_base_/datasets/cifar10_bs16.py b/configs/_base_/datasets/cifar10_bs16.py index cbd191c5..8d8023e7 100644 --- a/configs/_base_/datasets/cifar10_bs16.py +++ b/configs/_base_/datasets/cifar10_bs16.py @@ -11,11 +11,11 @@ data_preprocessor = dict( train_pipeline = [ dict(type='RandomCrop', crop_size=32, padding=4), dict(type='RandomFlip', prob=0.5, direction='horizontal'), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] test_pipeline = [ - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] train_dataloader = dict( diff --git a/configs/_base_/datasets/cub_bs8_384.py b/configs/_base_/datasets/cub_bs8_384.py index d896d961..15f8e15f 100644 --- a/configs/_base_/datasets/cub_bs8_384.py +++ b/configs/_base_/datasets/cub_bs8_384.py @@ -14,14 +14,14 @@ train_pipeline = [ dict(type='Resize', scale=510), dict(type='RandomCrop', crop_size=384), dict(type='RandomFlip', prob=0.5, direction='horizontal'), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] test_pipeline = [ dict(type='LoadImageFromFile'), dict(type='Resize', scale=510), dict(type='CenterCrop', crop_size=384), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] train_dataloader = dict( diff --git a/configs/_base_/datasets/cub_bs8_448.py b/configs/_base_/datasets/cub_bs8_448.py index b990b629..9ff335bf 100644 --- a/configs/_base_/datasets/cub_bs8_448.py +++ b/configs/_base_/datasets/cub_bs8_448.py @@ -13,14 +13,14 @@ train_pipeline = [ dict(type='Resize', scale=600), dict(type='RandomCrop', crop_size=448), dict(type='RandomFlip', prob=0.5, direction='horizontal'), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] test_pipeline = [ dict(type='LoadImageFromFile'), dict(type='Resize', scale=600), dict(type='CenterCrop', crop_size=448), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] train_dataloader = dict( diff --git a/configs/_base_/datasets/imagenet21k_bs128.py b/configs/_base_/datasets/imagenet21k_bs128.py index 84716257..1a727ff4 100644 --- a/configs/_base_/datasets/imagenet21k_bs128.py +++ b/configs/_base_/datasets/imagenet21k_bs128.py @@ -13,14 +13,14 @@ train_pipeline = [ dict(type='LoadImageFromFile'), dict(type='RandomResizedCrop', scale=224), dict(type='RandomFlip', prob=0.5, direction='horizontal'), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] test_pipeline = [ dict(type='LoadImageFromFile'), dict(type='ResizeEdge', scale=256, edge='short'), dict(type='CenterCrop', crop_size=224), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] train_dataloader = dict( diff --git a/configs/_base_/datasets/imagenet_bs128_mbv3.py b/configs/_base_/datasets/imagenet_bs128_mbv3.py index ae90fa03..abc67b0b 100644 --- a/configs/_base_/datasets/imagenet_bs128_mbv3.py +++ b/configs/_base_/datasets/imagenet_bs128_mbv3.py @@ -28,14 +28,14 @@ train_pipeline = [ max_area_ratio=1 / 3, fill_color=bgr_mean, fill_std=bgr_std), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] test_pipeline = [ dict(type='LoadImageFromFile'), dict(type='ResizeEdge', scale=256, edge='short', backend='pillow'), dict(type='CenterCrop', crop_size=224), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] train_dataloader = dict( diff --git a/configs/_base_/datasets/imagenet_bs128_poolformer_medium_224.py b/configs/_base_/datasets/imagenet_bs128_poolformer_medium_224.py index 3e33d303..05afae4f 100644 --- a/configs/_base_/datasets/imagenet_bs128_poolformer_medium_224.py +++ b/configs/_base_/datasets/imagenet_bs128_poolformer_medium_224.py @@ -37,7 +37,7 @@ train_pipeline = [ max_area_ratio=1 / 3, fill_color=bgr_mean, fill_std=bgr_std), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] test_pipeline = [ @@ -49,7 +49,7 @@ test_pipeline = [ backend='pillow', interpolation='bicubic'), dict(type='CenterCrop', crop_size=224), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] train_dataloader = dict( diff --git a/configs/_base_/datasets/imagenet_bs128_poolformer_small_224.py b/configs/_base_/datasets/imagenet_bs128_poolformer_small_224.py index b61de03b..8a4b63d7 100644 --- a/configs/_base_/datasets/imagenet_bs128_poolformer_small_224.py +++ b/configs/_base_/datasets/imagenet_bs128_poolformer_small_224.py @@ -37,7 +37,7 @@ train_pipeline = [ max_area_ratio=1 / 3, fill_color=bgr_mean, fill_std=bgr_std), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] test_pipeline = [ @@ -49,7 +49,7 @@ test_pipeline = [ backend='pillow', interpolation='bicubic'), dict(type='CenterCrop', crop_size=224), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] train_dataloader = dict( diff --git a/configs/_base_/datasets/imagenet_bs128_revvit_224.py b/configs/_base_/datasets/imagenet_bs128_revvit_224.py index 12ef45a5..821cfaf4 100644 --- a/configs/_base_/datasets/imagenet_bs128_revvit_224.py +++ b/configs/_base_/datasets/imagenet_bs128_revvit_224.py @@ -38,7 +38,7 @@ train_pipeline = [ max_area_ratio=1 / 3, fill_color=bgr_mean, fill_std=bgr_std), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] test_pipeline = [ @@ -50,7 +50,7 @@ test_pipeline = [ backend='pillow', interpolation='bicubic'), dict(type='CenterCrop', crop_size=224), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] train_dataloader = dict( diff --git a/configs/_base_/datasets/imagenet_bs128_vig_224.py b/configs/_base_/datasets/imagenet_bs128_vig_224.py index 54f05553..26337782 100644 --- a/configs/_base_/datasets/imagenet_bs128_vig_224.py +++ b/configs/_base_/datasets/imagenet_bs128_vig_224.py @@ -37,7 +37,7 @@ train_pipeline = [ max_area_ratio=1 / 3, fill_color=bgr_mean, fill_std=bgr_std), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] test_pipeline = [ @@ -49,7 +49,7 @@ test_pipeline = [ backend='pillow', interpolation='bicubic'), dict(type='CenterCrop', crop_size=224), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] train_dataloader = dict( diff --git a/configs/_base_/datasets/imagenet_bs16_eva_196.py b/configs/_base_/datasets/imagenet_bs16_eva_196.py index 292603c0..f5be7c5a 100644 --- a/configs/_base_/datasets/imagenet_bs16_eva_196.py +++ b/configs/_base_/datasets/imagenet_bs16_eva_196.py @@ -17,7 +17,7 @@ train_pipeline = [ backend='pillow', interpolation='bicubic'), dict(type='RandomFlip', prob=0.5, direction='horizontal'), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] test_pipeline = [ @@ -29,7 +29,7 @@ test_pipeline = [ backend='pillow', interpolation='bicubic'), dict(type='CenterCrop', crop_size=196), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] train_dataloader = dict( diff --git a/configs/_base_/datasets/imagenet_bs16_eva_336.py b/configs/_base_/datasets/imagenet_bs16_eva_336.py index 094c7ddd..5e2a9cff 100644 --- a/configs/_base_/datasets/imagenet_bs16_eva_336.py +++ b/configs/_base_/datasets/imagenet_bs16_eva_336.py @@ -17,7 +17,7 @@ train_pipeline = [ backend='pillow', interpolation='bicubic'), dict(type='RandomFlip', prob=0.5, direction='horizontal'), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] test_pipeline = [ @@ -29,7 +29,7 @@ test_pipeline = [ backend='pillow', interpolation='bicubic'), dict(type='CenterCrop', crop_size=336), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] train_dataloader = dict( diff --git a/configs/_base_/datasets/imagenet_bs16_eva_560.py b/configs/_base_/datasets/imagenet_bs16_eva_560.py index 2df2ab45..c324e241 100644 --- a/configs/_base_/datasets/imagenet_bs16_eva_560.py +++ b/configs/_base_/datasets/imagenet_bs16_eva_560.py @@ -17,7 +17,7 @@ train_pipeline = [ backend='pillow', interpolation='bicubic'), dict(type='RandomFlip', prob=0.5, direction='horizontal'), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] test_pipeline = [ @@ -29,7 +29,7 @@ test_pipeline = [ backend='pillow', interpolation='bicubic'), dict(type='CenterCrop', crop_size=560), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] train_dataloader = dict( diff --git a/configs/_base_/datasets/imagenet_bs16_pil_bicubic_384.py b/configs/_base_/datasets/imagenet_bs16_pil_bicubic_384.py index 9bb3f83d..e3f6775f 100644 --- a/configs/_base_/datasets/imagenet_bs16_pil_bicubic_384.py +++ b/configs/_base_/datasets/imagenet_bs16_pil_bicubic_384.py @@ -16,13 +16,13 @@ train_pipeline = [ backend='pillow', interpolation='bicubic'), dict(type='RandomFlip', prob=0.5, direction='horizontal'), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] test_pipeline = [ dict(type='LoadImageFromFile'), dict(type='Resize', scale=384, backend='pillow', interpolation='bicubic'), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] train_dataloader = dict( diff --git a/configs/_base_/datasets/imagenet_bs256_beitv2.py b/configs/_base_/datasets/imagenet_bs256_beitv2.py index 34daf9d5..ec914f8d 100644 --- a/configs/_base_/datasets/imagenet_bs256_beitv2.py +++ b/configs/_base_/datasets/imagenet_bs256_beitv2.py @@ -31,10 +31,7 @@ train_pipeline = [ num_masking_patches=75, max_num_patches=75, min_num_patches=16), - dict( - type='PackSelfSupInputs', - algorithm_keys=['mask'], - meta_keys=['img_path']) + dict(type='PackInputs') ] train_dataloader = dict( diff --git a/configs/_base_/datasets/imagenet_bs256_davit_224.py b/configs/_base_/datasets/imagenet_bs256_davit_224.py index 7dbb6c3c..b35fc9da 100644 --- a/configs/_base_/datasets/imagenet_bs256_davit_224.py +++ b/configs/_base_/datasets/imagenet_bs256_davit_224.py @@ -37,7 +37,7 @@ train_pipeline = [ max_area_ratio=1 / 3, fill_color=bgr_mean, fill_std=bgr_std), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] test_pipeline = [ @@ -49,7 +49,7 @@ test_pipeline = [ backend='pillow', interpolation='bicubic'), dict(type='CenterCrop', crop_size=224), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] train_dataloader = dict( diff --git a/configs/_base_/datasets/imagenet_bs256_levit_224.py b/configs/_base_/datasets/imagenet_bs256_levit_224.py index 4804a166..d030e7a5 100644 --- a/configs/_base_/datasets/imagenet_bs256_levit_224.py +++ b/configs/_base_/datasets/imagenet_bs256_levit_224.py @@ -37,7 +37,7 @@ train_pipeline = [ max_area_ratio=1 / 3, fill_color=bgr_mean, fill_std=bgr_std), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] test_pipeline = [ @@ -49,7 +49,7 @@ test_pipeline = [ backend='pillow', interpolation='bicubic'), dict(type='CenterCrop', crop_size=224), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] train_dataloader = dict( diff --git a/configs/_base_/datasets/imagenet_bs256_rsb_a12.py b/configs/_base_/datasets/imagenet_bs256_rsb_a12.py index 77b179f9..8a6f50f7 100644 --- a/configs/_base_/datasets/imagenet_bs256_rsb_a12.py +++ b/configs/_base_/datasets/imagenet_bs256_rsb_a12.py @@ -29,7 +29,7 @@ train_pipeline = [ magnitude_std=0.5, hparams=dict( pad_val=[round(x) for x in bgr_mean], interpolation='bicubic')), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] test_pipeline = [ @@ -41,7 +41,7 @@ test_pipeline = [ backend='pillow', interpolation='bicubic'), dict(type='CenterCrop', crop_size=224), - dict(type='PackClsInputs') + dict(type='PackInputs') ] train_dataloader = dict( diff --git a/configs/_base_/datasets/imagenet_bs256_rsb_a3.py b/configs/_base_/datasets/imagenet_bs256_rsb_a3.py index 8f3d1a48..1979b837 100644 --- a/configs/_base_/datasets/imagenet_bs256_rsb_a3.py +++ b/configs/_base_/datasets/imagenet_bs256_rsb_a3.py @@ -29,7 +29,7 @@ train_pipeline = [ magnitude_std=0.5, hparams=dict( pad_val=[round(x) for x in bgr_mean], interpolation='bicubic')), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] test_pipeline = [ @@ -41,7 +41,7 @@ test_pipeline = [ backend='pillow', interpolation='bicubic'), dict(type='CenterCrop', crop_size=224), - dict(type='PackClsInputs') + dict(type='PackInputs') ] train_dataloader = dict( diff --git a/configs/_base_/datasets/imagenet_bs256_simmim_192.py b/configs/_base_/datasets/imagenet_bs256_simmim_192.py index 2d91665c..be87e12a 100644 --- a/configs/_base_/datasets/imagenet_bs256_simmim_192.py +++ b/configs/_base_/datasets/imagenet_bs256_simmim_192.py @@ -9,11 +9,7 @@ data_preprocessor = dict( train_pipeline = [ dict(type='LoadImageFromFile'), - dict( - type='RandomResizedCrop', - size=192, - scale=(0.67, 1.0), - ratio=(3. / 4., 4. / 3.)), + dict(type='RandomResizedCrop', scale=192, crop_ratio_range=(0.67, 1.0)), dict(type='RandomFlip', prob=0.5), dict( type='SimMIMMaskGenerator', @@ -21,10 +17,7 @@ train_pipeline = [ mask_patch_size=32, model_patch_size=4, mask_ratio=0.6), - dict( - type='PackSelfSupInputs', - algorithm_keys=['mask'], - meta_keys=['img_path']) + dict(type='PackInputs') ] train_dataloader = dict( @@ -39,19 +32,3 @@ train_dataloader = dict( ann_file='meta/train.txt', data_prefix=dict(img_path='train/'), pipeline=train_pipeline)) - -# for visualization -vis_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='Resize', scale=(192, 192), backend='pillow'), - dict( - type='SimMIMMaskGenerator', - input_size=192, - mask_patch_size=32, - model_patch_size=4, - mask_ratio=0.6), - dict( - type='PackSelfSupInputs', - algorithm_keys=['mask'], - meta_keys=['img_path']) -] diff --git a/configs/_base_/datasets/imagenet_bs256_swin_192.py b/configs/_base_/datasets/imagenet_bs256_swin_192.py index 7900c6d2..a29a344f 100644 --- a/configs/_base_/datasets/imagenet_bs256_swin_192.py +++ b/configs/_base_/datasets/imagenet_bs256_swin_192.py @@ -34,7 +34,7 @@ train_pipeline = [ max_area_ratio=1 / 3, fill_color=[103.53, 116.28, 123.675], fill_std=[57.375, 57.12, 58.395]), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] test_pipeline = [ @@ -46,7 +46,7 @@ test_pipeline = [ backend='pillow', interpolation='bicubic'), dict(type='CenterCrop', crop_size=192), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] train_dataloader = dict( diff --git a/configs/_base_/datasets/imagenet_bs32.py b/configs/_base_/datasets/imagenet_bs32.py index 4b3b4ba2..40bc2ee1 100644 --- a/configs/_base_/datasets/imagenet_bs32.py +++ b/configs/_base_/datasets/imagenet_bs32.py @@ -13,14 +13,14 @@ train_pipeline = [ dict(type='LoadImageFromFile'), dict(type='RandomResizedCrop', scale=224), dict(type='RandomFlip', prob=0.5, direction='horizontal'), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] test_pipeline = [ dict(type='LoadImageFromFile'), dict(type='ResizeEdge', scale=256, edge='short'), dict(type='CenterCrop', crop_size=224), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] train_dataloader = dict( diff --git a/configs/_base_/datasets/imagenet_bs32_byol.py b/configs/_base_/datasets/imagenet_bs32_byol.py index 6bb7b75f..81518ec3 100644 --- a/configs/_base_/datasets/imagenet_bs32_byol.py +++ b/configs/_base_/datasets/imagenet_bs32_byol.py @@ -10,7 +10,7 @@ data_preprocessor = dict( view_pipeline1 = [ dict( type='RandomResizedCrop', - size=224, + scale=224, interpolation='bicubic', backend='pillow'), dict(type='RandomFlip', prob=0.5), @@ -30,13 +30,17 @@ view_pipeline1 = [ prob=0.2, keep_channels=True, channel_weights=(0.114, 0.587, 0.2989)), - dict(type='RandomGaussianBlur', sigma_min=0.1, sigma_max=2.0, prob=1.), + dict( + type='GaussianBlur', + magnitude_range=(0.1, 2.0), + magnitude_std='inf', + prob=1.), dict(type='RandomSolarize', prob=0.), ] view_pipeline2 = [ dict( type='RandomResizedCrop', - size=224, + scale=224, interpolation='bicubic', backend='pillow'), dict(type='RandomFlip', prob=0.5), @@ -56,7 +60,11 @@ view_pipeline2 = [ prob=0.2, keep_channels=True, channel_weights=(0.114, 0.587, 0.2989)), - dict(type='RandomGaussianBlur', sigma_min=0.1, sigma_max=2.0, prob=0.1), + dict( + type='GaussianBlur', + magnitude_range=(0.1, 2.0), + magnitude_std='inf', + prob=0.1), dict(type='RandomSolarize', prob=0.2) ] train_pipeline = [ @@ -65,7 +73,7 @@ train_pipeline = [ type='MultiView', num_views=[1, 1], transforms=[view_pipeline1, view_pipeline2]), - dict(type='PackSelfSupInputs', meta_keys=['img_path']) + dict(type='PackInputs') ] train_dataloader = dict( diff --git a/configs/_base_/datasets/imagenet_bs32_mocov2.py b/configs/_base_/datasets/imagenet_bs32_mocov2.py index fa710ad4..ef048039 100644 --- a/configs/_base_/datasets/imagenet_bs32_mocov2.py +++ b/configs/_base_/datasets/imagenet_bs32_mocov2.py @@ -10,7 +10,10 @@ data_preprocessor = dict( # The difference between mocov2 and mocov1 is the transforms in the pipeline view_pipeline = [ dict( - type='RandomResizedCrop', size=224, scale=(0.2, 1.), backend='pillow'), + type='RandomResizedCrop', + scale=224, + crop_ratio_range=(0.2, 1.), + backend='pillow'), dict( type='RandomApply', transforms=[ @@ -27,14 +30,18 @@ view_pipeline = [ prob=0.2, keep_channels=True, channel_weights=(0.114, 0.587, 0.2989)), - dict(type='RandomGaussianBlur', sigma_min=0.1, sigma_max=2.0, prob=0.5), + dict( + type='GaussianBlur', + magnitude_range=(0.1, 2.0), + magnitude_std='inf', + prob=0.5), dict(type='RandomFlip', prob=0.5), ] train_pipeline = [ dict(type='LoadImageFromFile'), dict(type='MultiView', num_views=2, transforms=[view_pipeline]), - dict(type='PackSelfSupInputs', meta_keys=['img_path']) + dict(type='PackInputs') ] train_dataloader = dict( diff --git a/configs/_base_/datasets/imagenet_bs32_pil_bicubic.py b/configs/_base_/datasets/imagenet_bs32_pil_bicubic.py index d5483876..08f52576 100644 --- a/configs/_base_/datasets/imagenet_bs32_pil_bicubic.py +++ b/configs/_base_/datasets/imagenet_bs32_pil_bicubic.py @@ -17,7 +17,7 @@ train_pipeline = [ backend='pillow', interpolation='bicubic'), dict(type='RandomFlip', prob=0.5, direction='horizontal'), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] test_pipeline = [ @@ -29,7 +29,7 @@ test_pipeline = [ backend='pillow', interpolation='bicubic'), dict(type='CenterCrop', crop_size=224), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] train_dataloader = dict( diff --git a/configs/_base_/datasets/imagenet_bs32_pil_resize.py b/configs/_base_/datasets/imagenet_bs32_pil_resize.py index 2db8f89b..ad5c3895 100644 --- a/configs/_base_/datasets/imagenet_bs32_pil_resize.py +++ b/configs/_base_/datasets/imagenet_bs32_pil_resize.py @@ -13,14 +13,14 @@ train_pipeline = [ dict(type='LoadImageFromFile'), dict(type='RandomResizedCrop', scale=224, backend='pillow'), dict(type='RandomFlip', prob=0.5, direction='horizontal'), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] test_pipeline = [ dict(type='LoadImageFromFile'), dict(type='ResizeEdge', scale=256, edge='short', backend='pillow'), dict(type='CenterCrop', crop_size=224), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] train_dataloader = dict( diff --git a/configs/_base_/datasets/imagenet_bs32_pillow.py b/configs/_base_/datasets/imagenet_bs32_pillow.py deleted file mode 100644 index 97e5cd88..00000000 --- a/configs/_base_/datasets/imagenet_bs32_pillow.py +++ /dev/null @@ -1,57 +0,0 @@ -# dataset settings -dataset_type = 'ImageNet' -data_root = 'data/imagenet/' -data_preprocessor = dict( - num_classes=1000, - # RGB format normalization parameters - mean=[123.675, 116.28, 103.53], - std=[58.395, 57.12, 57.375], - # convert image from BGR to RGB - to_rgb=True, -) - -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='RandomResizedCrop', scale=224, backend='pillow'), - dict(type='RandomFlip', prob=0.5, direction='horizontal'), - dict(type='PackClsInputs'), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='ResizeEdge', scale=256, edge='short', backend='pillow'), - dict(type='CenterCrop', crop_size=224), - dict(type='PackClsInputs'), -] - -train_dataloader = dict( - batch_size=32, - num_workers=4, - dataset=dict( - type=dataset_type, - data_root=data_root, - ann_file='meta/train.txt', - data_prefix='train', - pipeline=train_pipeline), - sampler=dict(type='DefaultSampler', shuffle=True), - collate_fn=dict(type='default_collate'), - persistent_workers=True, - pin_memory=True, -) - -val_dataloader = dict( - batch_size=32, - num_workers=4, - dataset=dict( - type=dataset_type, - data_root=data_root, - ann_file='meta/val.txt', - data_prefix='val', - pipeline=test_pipeline), - sampler=dict(type='DefaultSampler', shuffle=False), - persistent_workers=True, -) -val_evaluator = dict(type='Accuracy', topk=(1, 5)) - -# If you want standard test, please manually configure the test dataset -test_dataloader = val_dataloader -test_evaluator = val_evaluator diff --git a/configs/_base_/datasets/imagenet_bs32_simclr.py b/configs/_base_/datasets/imagenet_bs32_simclr.py index c04f19df..9270810a 100644 --- a/configs/_base_/datasets/imagenet_bs32_simclr.py +++ b/configs/_base_/datasets/imagenet_bs32_simclr.py @@ -8,7 +8,7 @@ data_preprocessor = dict( to_rgb=True) view_pipeline = [ - dict(type='RandomResizedCrop', size=224, backend='pillow'), + dict(type='RandomResizedCrop', scale=224, backend='pillow'), dict(type='RandomFlip', prob=0.5), dict( type='RandomApply', @@ -26,13 +26,17 @@ view_pipeline = [ prob=0.2, keep_channels=True, channel_weights=(0.114, 0.587, 0.2989)), - dict(type='RandomGaussianBlur', sigma_min=0.1, sigma_max=2.0, prob=0.5), + dict( + type='GaussianBlur', + magnitude_range=(0.1, 2.0), + magnitude_std='inf', + prob=0.5), ] train_pipeline = [ dict(type='LoadImageFromFile'), dict(type='MultiView', num_views=2, transforms=[view_pipeline]), - dict(type='PackSelfSupInputs', meta_keys=['img_path']) + dict(type='PackInputs') ] train_dataloader = dict( diff --git a/configs/_base_/datasets/imagenet_bs512_mae.py b/configs/_base_/datasets/imagenet_bs512_mae.py index b37776a6..dcf0b365 100644 --- a/configs/_base_/datasets/imagenet_bs512_mae.py +++ b/configs/_base_/datasets/imagenet_bs512_mae.py @@ -11,12 +11,12 @@ train_pipeline = [ dict(type='LoadImageFromFile'), dict( type='RandomResizedCrop', - size=224, - scale=(0.2, 1.0), + scale=224, + crop_ratio_range=(0.2, 1.0), backend='pillow', interpolation='bicubic'), dict(type='RandomFlip', prob=0.5), - dict(type='PackSelfSupInputs', meta_keys=['img_path']) + dict(type='PackInputs') ] train_dataloader = dict( diff --git a/configs/_base_/datasets/imagenet_bs512_mocov3.py b/configs/_base_/datasets/imagenet_bs512_mocov3.py index c7a746cc..f55dd2f6 100644 --- a/configs/_base_/datasets/imagenet_bs512_mocov3.py +++ b/configs/_base_/datasets/imagenet_bs512_mocov3.py @@ -9,7 +9,10 @@ data_preprocessor = dict( view_pipeline1 = [ dict( - type='RandomResizedCrop', size=224, scale=(0.2, 1.), backend='pillow'), + type='RandomResizedCrop', + scale=224, + crop_ratio_range=(0.2, 1.), + backend='pillow'), dict( type='RandomApply', transforms=[ @@ -26,13 +29,20 @@ view_pipeline1 = [ prob=0.2, keep_channels=True, channel_weights=(0.114, 0.587, 0.2989)), - dict(type='RandomGaussianBlur', sigma_min=0.1, sigma_max=2.0, prob=1.), + dict( + type='GaussianBlur', + magnitude_range=(0.1, 2.0), + magnitude_std='inf', + prob=1.), dict(type='RandomSolarize', prob=0.), dict(type='RandomFlip', prob=0.5), ] view_pipeline2 = [ dict( - type='RandomResizedCrop', size=224, scale=(0.2, 1.), backend='pillow'), + type='RandomResizedCrop', + scale=224, + crop_ratio_range=(0.2, 1.), + backend='pillow'), dict( type='RandomApply', transforms=[ @@ -49,7 +59,11 @@ view_pipeline2 = [ prob=0.2, keep_channels=True, channel_weights=(0.114, 0.587, 0.2989)), - dict(type='RandomGaussianBlur', sigma_min=0.1, sigma_max=2.0, prob=0.1), + dict( + type='GaussianBlur', + magnitude_range=(0.1, 2.0), + magnitude_std='inf', + prob=0.1), dict(type='RandomSolarize', prob=0.2), dict(type='RandomFlip', prob=0.5), ] @@ -59,7 +73,7 @@ train_pipeline = [ type='MultiView', num_views=[1, 1], transforms=[view_pipeline1, view_pipeline2]), - dict(type='PackSelfSupInputs', meta_keys=['img_path']) + dict(type='PackInputs') ] train_dataloader = dict( diff --git a/configs/_base_/datasets/imagenet_bs64.py b/configs/_base_/datasets/imagenet_bs64.py index bb80a1f5..fc8fb340 100644 --- a/configs/_base_/datasets/imagenet_bs64.py +++ b/configs/_base_/datasets/imagenet_bs64.py @@ -13,14 +13,14 @@ train_pipeline = [ dict(type='LoadImageFromFile'), dict(type='RandomResizedCrop', scale=224), dict(type='RandomFlip', prob=0.5, direction='horizontal'), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] test_pipeline = [ dict(type='LoadImageFromFile'), dict(type='ResizeEdge', scale=256, edge='short'), dict(type='CenterCrop', crop_size=224), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] train_dataloader = dict( diff --git a/configs/_base_/datasets/imagenet_bs64_autoaug.py b/configs/_base_/datasets/imagenet_bs64_autoaug.py index 196dec82..ec92c79c 100644 --- a/configs/_base_/datasets/imagenet_bs64_autoaug.py +++ b/configs/_base_/datasets/imagenet_bs64_autoaug.py @@ -21,14 +21,14 @@ train_pipeline = [ policies='imagenet', hparams=dict( pad_val=[round(x) for x in bgr_mean], interpolation='bicubic')), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] test_pipeline = [ dict(type='LoadImageFromFile'), dict(type='ResizeEdge', scale=256, edge='short'), dict(type='CenterCrop', crop_size=224), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] train_dataloader = dict( diff --git a/configs/_base_/datasets/imagenet_bs64_convmixer_224.py b/configs/_base_/datasets/imagenet_bs64_convmixer_224.py index 0a30815d..f42c8b77 100644 --- a/configs/_base_/datasets/imagenet_bs64_convmixer_224.py +++ b/configs/_base_/datasets/imagenet_bs64_convmixer_224.py @@ -37,7 +37,7 @@ train_pipeline = [ max_area_ratio=1 / 3, fill_color=bgr_mean, fill_std=bgr_std), - dict(type='PackClsInputs') + dict(type='PackInputs') ] test_pipeline = [ @@ -49,7 +49,7 @@ test_pipeline = [ backend='pillow', interpolation='bicubic'), dict(type='CenterCrop', crop_size=224), - dict(type='PackClsInputs') + dict(type='PackInputs') ] train_dataloader = dict( diff --git a/configs/_base_/datasets/imagenet_bs64_deit3_224.py b/configs/_base_/datasets/imagenet_bs64_deit3_224.py index 60a882d2..6b0ae8c6 100644 --- a/configs/_base_/datasets/imagenet_bs64_deit3_224.py +++ b/configs/_base_/datasets/imagenet_bs64_deit3_224.py @@ -37,7 +37,7 @@ train_pipeline = [ max_area_ratio=1 / 3, fill_color=bgr_mean, fill_std=bgr_std), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] test_pipeline = [ @@ -49,7 +49,7 @@ test_pipeline = [ backend='pillow', interpolation='bicubic'), dict(type='CenterCrop', crop_size=224), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] train_dataloader = dict( diff --git a/configs/_base_/datasets/imagenet_bs64_deit3_384.py b/configs/_base_/datasets/imagenet_bs64_deit3_384.py index 9b8c73ad..9760c9ff 100644 --- a/configs/_base_/datasets/imagenet_bs64_deit3_384.py +++ b/configs/_base_/datasets/imagenet_bs64_deit3_384.py @@ -17,7 +17,7 @@ train_pipeline = [ backend='pillow', interpolation='bicubic'), dict(type='RandomFlip', prob=0.5, direction='horizontal'), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] test_pipeline = [ @@ -29,7 +29,7 @@ test_pipeline = [ backend='pillow', interpolation='bicubic'), dict(type='CenterCrop', crop_size=384), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] train_dataloader = dict( diff --git a/configs/_base_/datasets/imagenet_bs64_edgenext_256.py b/configs/_base_/datasets/imagenet_bs64_edgenext_256.py index df095b6b..d3517256 100644 --- a/configs/_base_/datasets/imagenet_bs64_edgenext_256.py +++ b/configs/_base_/datasets/imagenet_bs64_edgenext_256.py @@ -37,7 +37,7 @@ train_pipeline = [ max_area_ratio=1 / 3, fill_color=bgr_mean, fill_std=bgr_std), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] test_pipeline = [ @@ -49,7 +49,7 @@ test_pipeline = [ backend='pillow', interpolation='bicubic'), dict(type='CenterCrop', crop_size=256), - dict(type='PackClsInputs') + dict(type='PackInputs') ] train_dataloader = dict( diff --git a/configs/_base_/datasets/imagenet_bs64_mixer_224.py b/configs/_base_/datasets/imagenet_bs64_mixer_224.py index ddf07dc8..f1478d49 100644 --- a/configs/_base_/datasets/imagenet_bs64_mixer_224.py +++ b/configs/_base_/datasets/imagenet_bs64_mixer_224.py @@ -14,14 +14,14 @@ train_pipeline = [ dict(type='LoadImageFromFile'), dict(type='RandomResizedCrop', scale=224), dict(type='RandomFlip', prob=0.5, direction='horizontal'), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] test_pipeline = [ dict(type='LoadImageFromFile'), dict(type='ResizeEdge', scale=256, edge='short', interpolation='bicubic'), dict(type='CenterCrop', crop_size=224), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] train_dataloader = dict( diff --git a/configs/_base_/datasets/imagenet_bs64_pil_resize.py b/configs/_base_/datasets/imagenet_bs64_pil_resize.py index c97be68e..5d535b4a 100644 --- a/configs/_base_/datasets/imagenet_bs64_pil_resize.py +++ b/configs/_base_/datasets/imagenet_bs64_pil_resize.py @@ -13,14 +13,14 @@ train_pipeline = [ dict(type='LoadImageFromFile'), dict(type='RandomResizedCrop', scale=224, backend='pillow'), dict(type='RandomFlip', prob=0.5, direction='horizontal'), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] test_pipeline = [ dict(type='LoadImageFromFile'), dict(type='ResizeEdge', scale=256, edge='short', backend='pillow'), dict(type='CenterCrop', crop_size=224), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] train_dataloader = dict( diff --git a/configs/_base_/datasets/imagenet_bs64_pil_resize_autoaug.py b/configs/_base_/datasets/imagenet_bs64_pil_resize_autoaug.py index 6244fbaa..246268f8 100644 --- a/configs/_base_/datasets/imagenet_bs64_pil_resize_autoaug.py +++ b/configs/_base_/datasets/imagenet_bs64_pil_resize_autoaug.py @@ -25,7 +25,7 @@ train_pipeline = [ policies='imagenet', hparams=dict( pad_val=[round(x) for x in bgr_mean], interpolation='bicubic')), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] test_pipeline = [ @@ -37,7 +37,7 @@ test_pipeline = [ backend='pillow', interpolation='bicubic'), dict(type='CenterCrop', crop_size=224), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] train_dataloader = dict( diff --git a/configs/_base_/datasets/imagenet_bs64_swin_224.py b/configs/_base_/datasets/imagenet_bs64_swin_224.py index 39d71679..12dd8a83 100644 --- a/configs/_base_/datasets/imagenet_bs64_swin_224.py +++ b/configs/_base_/datasets/imagenet_bs64_swin_224.py @@ -37,7 +37,7 @@ train_pipeline = [ max_area_ratio=1 / 3, fill_color=bgr_mean, fill_std=bgr_std), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] test_pipeline = [ @@ -49,7 +49,7 @@ test_pipeline = [ backend='pillow', interpolation='bicubic'), dict(type='CenterCrop', crop_size=224), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] train_dataloader = dict( diff --git a/configs/_base_/datasets/imagenet_bs64_swin_256.py b/configs/_base_/datasets/imagenet_bs64_swin_256.py index 79e2a1ca..09f99774 100644 --- a/configs/_base_/datasets/imagenet_bs64_swin_256.py +++ b/configs/_base_/datasets/imagenet_bs64_swin_256.py @@ -37,7 +37,7 @@ train_pipeline = [ max_area_ratio=1 / 3, fill_color=bgr_mean, fill_std=bgr_std), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] test_pipeline = [ @@ -49,7 +49,7 @@ test_pipeline = [ backend='pillow', interpolation='bicubic'), dict(type='CenterCrop', crop_size=256), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] train_dataloader = dict( diff --git a/configs/_base_/datasets/imagenet_bs64_swin_384.py b/configs/_base_/datasets/imagenet_bs64_swin_384.py index d4e9d3ff..9bcef510 100644 --- a/configs/_base_/datasets/imagenet_bs64_swin_384.py +++ b/configs/_base_/datasets/imagenet_bs64_swin_384.py @@ -17,13 +17,13 @@ train_pipeline = [ backend='pillow', interpolation='bicubic'), dict(type='RandomFlip', prob=0.5, direction='horizontal'), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] test_pipeline = [ dict(type='LoadImageFromFile'), dict(type='Resize', scale=384, backend='pillow', interpolation='bicubic'), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] train_dataloader = dict( diff --git a/configs/_base_/datasets/imagenet_bs64_t2t_224.py b/configs/_base_/datasets/imagenet_bs64_t2t_224.py index f3dc75ab..c95625e9 100644 --- a/configs/_base_/datasets/imagenet_bs64_t2t_224.py +++ b/configs/_base_/datasets/imagenet_bs64_t2t_224.py @@ -37,7 +37,7 @@ train_pipeline = [ max_area_ratio=1 / 3, fill_color=bgr_mean, fill_std=bgr_std), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] test_pipeline = [ @@ -49,7 +49,7 @@ test_pipeline = [ backend='pillow', interpolation='bicubic'), dict(type='CenterCrop', crop_size=224), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] train_dataloader = dict( diff --git a/configs/_base_/datasets/imagenet_bs8_pil_bicubic_320.py b/configs/_base_/datasets/imagenet_bs8_pil_bicubic_320.py index e776907d..71f41241 100644 --- a/configs/_base_/datasets/imagenet_bs8_pil_bicubic_320.py +++ b/configs/_base_/datasets/imagenet_bs8_pil_bicubic_320.py @@ -16,7 +16,7 @@ train_pipeline = [ backend='pillow', interpolation='bicubic'), dict(type='RandomFlip', prob=0.5, direction='horizontal'), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] test_pipeline = [ @@ -28,7 +28,7 @@ test_pipeline = [ backend='pillow', interpolation='bicubic'), dict(type='CenterCrop', crop_size=320), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] train_dataloader = dict( diff --git a/configs/_base_/datasets/inshop_bs32_448.py b/configs/_base_/datasets/inshop_bs32_448.py index f0c23b1b..585f301d 100644 --- a/configs/_base_/datasets/inshop_bs32_448.py +++ b/configs/_base_/datasets/inshop_bs32_448.py @@ -12,14 +12,14 @@ train_pipeline = [ dict(type='Resize', scale=512), dict(type='RandomCrop', crop_size=448), dict(type='RandomFlip', prob=0.5, direction='horizontal'), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] test_pipeline = [ dict(type='LoadImageFromFile'), dict(type='Resize', scale=512), dict(type='CenterCrop', crop_size=448), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] train_dataloader = dict( diff --git a/configs/_base_/datasets/voc_bs16.py b/configs/_base_/datasets/voc_bs16.py index dce46edb..eca41345 100644 --- a/configs/_base_/datasets/voc_bs16.py +++ b/configs/_base_/datasets/voc_bs16.py @@ -15,14 +15,14 @@ train_pipeline = [ dict(type='LoadImageFromFile'), dict(type='RandomResizedCrop', scale=224), dict(type='RandomFlip', prob=0.5, direction='horizontal'), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] test_pipeline = [ dict(type='LoadImageFromFile'), dict(type='ResizeEdge', scale=256, edge='short'), dict(type='CenterCrop', crop_size=224), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] train_dataloader = dict( diff --git a/configs/barlowtwins/benchmarks/resnet50_8xb32-linear-coslr-100e_in1k.py b/configs/barlowtwins/benchmarks/resnet50_8xb32-linear-coslr-100e_in1k.py index ff85136f..7b365127 100644 --- a/configs/barlowtwins/benchmarks/resnet50_8xb32-linear-coslr-100e_in1k.py +++ b/configs/barlowtwins/benchmarks/resnet50_8xb32-linear-coslr-100e_in1k.py @@ -1,6 +1,6 @@ _base_ = [ '../../_base_/models/resnet50.py', - '../../_base_/datasets/imagenet_bs32_pillow.py', + '../../_base_/datasets/imagenet_bs32_pil_resize.py', '../../_base_/schedules/imagenet_sgd_coslr_100e.py', '../../_base_/default_runtime.py', ] diff --git a/configs/beit/beit_beit-base-p16_8xb256-amp-coslr-300e_in1k.py b/configs/beit/beit_beit-base-p16_8xb256-amp-coslr-300e_in1k.py index fc773822..7ed9280d 100644 --- a/configs/beit/beit_beit-base-p16_8xb256-amp-coslr-300e_in1k.py +++ b/configs/beit/beit_beit-base-p16_8xb256-amp-coslr-300e_in1k.py @@ -33,10 +33,7 @@ train_pipeline = [ num_masking_patches=75, max_num_patches=None, min_num_patches=16), - dict( - type='PackSelfSupInputs', - algorithm_keys=['mask'], - meta_keys=['img_path']) + dict(type='PackInputs') ] train_dataloader = dict( batch_size=256, diff --git a/configs/beit/benchmarks/beit-base-p16_8xb128-coslr-100e_in1k.py b/configs/beit/benchmarks/beit-base-p16_8xb128-coslr-100e_in1k.py index 18a5efe7..9e59caf5 100644 --- a/configs/beit/benchmarks/beit-base-p16_8xb128-coslr-100e_in1k.py +++ b/configs/beit/benchmarks/beit-base-p16_8xb128-coslr-100e_in1k.py @@ -64,7 +64,7 @@ train_pipeline = [ max_area_ratio=0.3333333333333333, fill_color=[103.53, 116.28, 123.675], fill_std=[57.375, 57.12, 58.395]), - dict(type='PackClsInputs') + dict(type='PackInputs') ] test_pipeline = [ dict(type='LoadImageFromFile', file_client_args=file_client_args), @@ -75,7 +75,7 @@ test_pipeline = [ backend='pillow', interpolation='bicubic'), dict(type='CenterCrop', crop_size=224), - dict(type='PackClsInputs') + dict(type='PackInputs') ] train_dataloader = dict(batch_size=128, dataset=dict(pipeline=train_pipeline)) diff --git a/configs/beitv2/benchmarks/beit-base-p16_8xb128-coslr-100e_in1k.py b/configs/beitv2/benchmarks/beit-base-p16_8xb128-coslr-100e_in1k.py index b7040f00..9e2a16ec 100644 --- a/configs/beitv2/benchmarks/beit-base-p16_8xb128-coslr-100e_in1k.py +++ b/configs/beitv2/benchmarks/beit-base-p16_8xb128-coslr-100e_in1k.py @@ -57,7 +57,7 @@ train_pipeline = [ max_area_ratio=0.3333333333333333, fill_color=[103.53, 116.28, 123.675], fill_std=[57.375, 57.12, 58.395]), - dict(type='PackClsInputs') + dict(type='PackInputs') ] test_pipeline = [ dict(type='LoadImageFromFile', file_client_args=file_client_args), @@ -68,7 +68,7 @@ test_pipeline = [ backend='pillow', interpolation='bicubic'), dict(type='CenterCrop', crop_size=224), - dict(type='PackClsInputs') + dict(type='PackInputs') ] train_dataloader = dict(batch_size=128, dataset=dict(pipeline=train_pipeline)) diff --git a/configs/byol/benchmarks/resnet50_8xb512-linear-coslr-90e_in1k.py b/configs/byol/benchmarks/resnet50_8xb512-linear-coslr-90e_in1k.py index 3196fc5c..973d2b22 100644 --- a/configs/byol/benchmarks/resnet50_8xb512-linear-coslr-90e_in1k.py +++ b/configs/byol/benchmarks/resnet50_8xb512-linear-coslr-90e_in1k.py @@ -1,6 +1,6 @@ _base_ = [ '../../_base_/models/resnet50.py', - '../../_base_/datasets/imagenet_bs32_pillow.py', + '../../_base_/datasets/imagenet_bs32_pil_resize.py', '../../_base_/schedules/imagenet_lars_coslr_90e.py', '../../_base_/default_runtime.py', ] diff --git a/configs/cae/benchmarks/beit-base-p16_8xb128-coslr-100e_in1k.py b/configs/cae/benchmarks/beit-base-p16_8xb128-coslr-100e_in1k.py index 9f4064f0..0e65e18c 100644 --- a/configs/cae/benchmarks/beit-base-p16_8xb128-coslr-100e_in1k.py +++ b/configs/cae/benchmarks/beit-base-p16_8xb128-coslr-100e_in1k.py @@ -43,7 +43,7 @@ train_pipeline = [ max_area_ratio=1 / 3, fill_color=bgr_mean, fill_std=bgr_std), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] test_pipeline = [ @@ -55,7 +55,7 @@ test_pipeline = [ backend='pillow', interpolation='bicubic'), dict(type='CenterCrop', crop_size=224), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] train_dataloader = dict(dataset=dict(pipeline=train_pipeline), batch_size=128) val_dataloader = dict(dataset=dict(pipeline=test_pipeline), batch_size=128) diff --git a/configs/cae/cae_vit-base-p16_8xb256-amp-coslr-300e_in1k.py b/configs/cae/cae_vit-base-p16_8xb256-amp-coslr-300e_in1k.py index 7d35c20c..10c2aaaf 100644 --- a/configs/cae/cae_vit-base-p16_8xb256-amp-coslr-300e_in1k.py +++ b/configs/cae/cae_vit-base-p16_8xb256-amp-coslr-300e_in1k.py @@ -28,10 +28,7 @@ train_pipeline = [ num_masking_patches=75, max_num_patches=None, min_num_patches=16), - dict( - type='PackSelfSupInputs', - algorithm_keys=['mask'], - meta_keys=['img_path']) + dict(type='PackInputs') ] train_dataloader = dict( diff --git a/configs/convnext_v2/convnext-v2-huge_32xb32_in1k-512px.py b/configs/convnext_v2/convnext-v2-huge_32xb32_in1k-512px.py index 3ce16208..7c63b023 100644 --- a/configs/convnext_v2/convnext-v2-huge_32xb32_in1k-512px.py +++ b/configs/convnext_v2/convnext-v2-huge_32xb32_in1k-512px.py @@ -14,13 +14,13 @@ train_pipeline = [ backend='pillow', interpolation='bicubic'), dict(type='RandomFlip', prob=0.5, direction='horizontal'), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] test_pipeline = [ dict(type='LoadImageFromFile'), dict(type='Resize', scale=512, backend='pillow', interpolation='bicubic'), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] train_dataloader = dict(batch_size=32, dataset=dict(pipeline=train_pipeline)) diff --git a/configs/cspnet/cspdarknet50_8xb32_in1k.py b/configs/cspnet/cspdarknet50_8xb32_in1k.py index 4edc2531..85114810 100644 --- a/configs/cspnet/cspdarknet50_8xb32_in1k.py +++ b/configs/cspnet/cspdarknet50_8xb32_in1k.py @@ -25,7 +25,7 @@ train_pipeline = [ backend='pillow', interpolation='bicubic'), dict(type='RandomFlip', prob=0.5, direction='horizontal'), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] test_pipeline = [ @@ -37,7 +37,7 @@ test_pipeline = [ backend='pillow', interpolation='bicubic'), dict(type='CenterCrop', crop_size=256), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) diff --git a/configs/cspnet/cspresnet50_8xb32_in1k.py b/configs/cspnet/cspresnet50_8xb32_in1k.py index b28c8fe6..d149637a 100644 --- a/configs/cspnet/cspresnet50_8xb32_in1k.py +++ b/configs/cspnet/cspresnet50_8xb32_in1k.py @@ -25,7 +25,7 @@ train_pipeline = [ backend='pillow', interpolation='bicubic'), dict(type='RandomFlip', prob=0.5, direction='horizontal'), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] test_pipeline = [ @@ -37,7 +37,7 @@ test_pipeline = [ backend='pillow', interpolation='bicubic'), dict(type='CenterCrop', crop_size=256), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) diff --git a/configs/cspnet/cspresnext50_8xb32_in1k.py b/configs/cspnet/cspresnext50_8xb32_in1k.py index 5885bd98..1f8c15c1 100644 --- a/configs/cspnet/cspresnext50_8xb32_in1k.py +++ b/configs/cspnet/cspresnext50_8xb32_in1k.py @@ -25,7 +25,7 @@ train_pipeline = [ backend='pillow', interpolation='bicubic'), dict(type='RandomFlip', prob=0.5, direction='horizontal'), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] test_pipeline = [ @@ -37,7 +37,7 @@ test_pipeline = [ backend='pillow', interpolation='bicubic'), dict(type='CenterCrop', crop_size=224), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) diff --git a/configs/csra/resnet101-csra_1xb16_voc07-448px.py b/configs/csra/resnet101-csra_1xb16_voc07-448px.py index b80a14ae..85135ae2 100644 --- a/configs/csra/resnet101-csra_1xb16_voc07-448px.py +++ b/configs/csra/resnet101-csra_1xb16_voc07-448px.py @@ -38,14 +38,14 @@ train_pipeline = [ dict(type='LoadImageFromFile'), dict(type='RandomResizedCrop', scale=448, crop_ratio_range=(0.7, 1.0)), dict(type='RandomFlip', prob=0.5, direction='horizontal'), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] test_pipeline = [ dict(type='LoadImageFromFile'), dict(type='Resize', scale=448), dict( - type='PackClsInputs', + type='PackInputs', # `gt_label_difficult` is needed for VOC evaluation meta_keys=('sample_idx', 'img_path', 'ori_shape', 'img_shape', 'scale_factor', 'flip', 'flip_direction', diff --git a/configs/densecl/benchmarks/resnet50_8xb32-linear-steplr-100e_in1k.py b/configs/densecl/benchmarks/resnet50_8xb32-linear-steplr-100e_in1k.py index cb370f89..f7d82d1e 100644 --- a/configs/densecl/benchmarks/resnet50_8xb32-linear-steplr-100e_in1k.py +++ b/configs/densecl/benchmarks/resnet50_8xb32-linear-steplr-100e_in1k.py @@ -1,6 +1,6 @@ _base_ = [ '../../_base_/models/resnet50.py', - '../../_base_/datasets/imagenet_bs32_pillow.py', + '../../_base_/datasets/imagenet_bs32_pil_resize.py', '../../_base_/schedules/imagenet_sgd_steplr_100e.py', '../../_base_/default_runtime.py', ] diff --git a/configs/edgenext/edgenext-base_8xb256-usi_in1k.py b/configs/edgenext/edgenext-base_8xb256-usi_in1k.py index f8de0926..13949dea 100644 --- a/configs/edgenext/edgenext-base_8xb256-usi_in1k.py +++ b/configs/edgenext/edgenext-base_8xb256-usi_in1k.py @@ -11,7 +11,7 @@ test_pipeline = [ backend='pillow', interpolation='bicubic'), dict(type='CenterCrop', crop_size=256), - dict(type='PackClsInputs') + dict(type='PackInputs') ] val_dataloader = dict(dataset=dict(pipeline=test_pipeline)) diff --git a/configs/edgenext/edgenext-small_8xb256-usi_in1k.py b/configs/edgenext/edgenext-small_8xb256-usi_in1k.py index bf996e27..d6bc904b 100644 --- a/configs/edgenext/edgenext-small_8xb256-usi_in1k.py +++ b/configs/edgenext/edgenext-small_8xb256-usi_in1k.py @@ -11,7 +11,7 @@ test_pipeline = [ backend='pillow', interpolation='bicubic'), dict(type='CenterCrop', crop_size=256), - dict(type='PackClsInputs') + dict(type='PackInputs') ] val_dataloader = dict(dataset=dict(pipeline=test_pipeline)) diff --git a/configs/efficientnet/efficientnet-b0_8xb32-01norm_in1k.py b/configs/efficientnet/efficientnet-b0_8xb32-01norm_in1k.py index 76e14081..369d0a43 100644 --- a/configs/efficientnet/efficientnet-b0_8xb32-01norm_in1k.py +++ b/configs/efficientnet/efficientnet-b0_8xb32-01norm_in1k.py @@ -17,13 +17,13 @@ train_pipeline = [ dict(type='LoadImageFromFile'), dict(type='EfficientNetRandomCrop', scale=224), dict(type='RandomFlip', prob=0.5, direction='horizontal'), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] test_pipeline = [ dict(type='LoadImageFromFile'), dict(type='EfficientNetCenterCrop', crop_size=224), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) diff --git a/configs/efficientnet/efficientnet-b0_8xb32_in1k.py b/configs/efficientnet/efficientnet-b0_8xb32_in1k.py index b88de4ee..e4263da1 100644 --- a/configs/efficientnet/efficientnet-b0_8xb32_in1k.py +++ b/configs/efficientnet/efficientnet-b0_8xb32_in1k.py @@ -10,13 +10,13 @@ train_pipeline = [ dict(type='LoadImageFromFile'), dict(type='EfficientNetRandomCrop', scale=224), dict(type='RandomFlip', prob=0.5, direction='horizontal'), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] test_pipeline = [ dict(type='LoadImageFromFile'), dict(type='EfficientNetCenterCrop', crop_size=224), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) diff --git a/configs/efficientnet/efficientnet-b1_8xb32-01norm_in1k.py b/configs/efficientnet/efficientnet-b1_8xb32-01norm_in1k.py index 4e673ddb..0405cf5f 100644 --- a/configs/efficientnet/efficientnet-b1_8xb32-01norm_in1k.py +++ b/configs/efficientnet/efficientnet-b1_8xb32-01norm_in1k.py @@ -17,13 +17,13 @@ train_pipeline = [ dict(type='LoadImageFromFile'), dict(type='EfficientNetRandomCrop', scale=240), dict(type='RandomFlip', prob=0.5, direction='horizontal'), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] test_pipeline = [ dict(type='LoadImageFromFile'), dict(type='EfficientNetCenterCrop', crop_size=240), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) diff --git a/configs/efficientnet/efficientnet-b1_8xb32_in1k.py b/configs/efficientnet/efficientnet-b1_8xb32_in1k.py index 53651159..e5bf2e80 100644 --- a/configs/efficientnet/efficientnet-b1_8xb32_in1k.py +++ b/configs/efficientnet/efficientnet-b1_8xb32_in1k.py @@ -10,13 +10,13 @@ train_pipeline = [ dict(type='LoadImageFromFile'), dict(type='EfficientNetRandomCrop', scale=240), dict(type='RandomFlip', prob=0.5, direction='horizontal'), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] test_pipeline = [ dict(type='LoadImageFromFile'), dict(type='EfficientNetCenterCrop', crop_size=240), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) diff --git a/configs/efficientnet/efficientnet-b2_8xb32-01norm_in1k.py b/configs/efficientnet/efficientnet-b2_8xb32-01norm_in1k.py index 5a86d0d0..da3f23b8 100644 --- a/configs/efficientnet/efficientnet-b2_8xb32-01norm_in1k.py +++ b/configs/efficientnet/efficientnet-b2_8xb32-01norm_in1k.py @@ -17,13 +17,13 @@ train_pipeline = [ dict(type='LoadImageFromFile'), dict(type='EfficientNetRandomCrop', scale=260), dict(type='RandomFlip', prob=0.5, direction='horizontal'), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] test_pipeline = [ dict(type='LoadImageFromFile'), dict(type='EfficientNetCenterCrop', crop_size=260), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) diff --git a/configs/efficientnet/efficientnet-b2_8xb32_in1k.py b/configs/efficientnet/efficientnet-b2_8xb32_in1k.py index ab389819..060a2ad3 100644 --- a/configs/efficientnet/efficientnet-b2_8xb32_in1k.py +++ b/configs/efficientnet/efficientnet-b2_8xb32_in1k.py @@ -10,13 +10,13 @@ train_pipeline = [ dict(type='LoadImageFromFile'), dict(type='EfficientNetRandomCrop', scale=260), dict(type='RandomFlip', prob=0.5, direction='horizontal'), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] test_pipeline = [ dict(type='LoadImageFromFile'), dict(type='EfficientNetCenterCrop', crop_size=260), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) diff --git a/configs/efficientnet/efficientnet-b3_8xb32-01norm_in1k.py b/configs/efficientnet/efficientnet-b3_8xb32-01norm_in1k.py index 87de0850..55729a9c 100644 --- a/configs/efficientnet/efficientnet-b3_8xb32-01norm_in1k.py +++ b/configs/efficientnet/efficientnet-b3_8xb32-01norm_in1k.py @@ -17,13 +17,13 @@ train_pipeline = [ dict(type='LoadImageFromFile'), dict(type='EfficientNetRandomCrop', scale=300), dict(type='RandomFlip', prob=0.5, direction='horizontal'), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] test_pipeline = [ dict(type='LoadImageFromFile'), dict(type='EfficientNetCenterCrop', crop_size=300), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) diff --git a/configs/efficientnet/efficientnet-b3_8xb32_in1k.py b/configs/efficientnet/efficientnet-b3_8xb32_in1k.py index 55cad6ad..d84de5a7 100644 --- a/configs/efficientnet/efficientnet-b3_8xb32_in1k.py +++ b/configs/efficientnet/efficientnet-b3_8xb32_in1k.py @@ -10,13 +10,13 @@ train_pipeline = [ dict(type='LoadImageFromFile'), dict(type='EfficientNetRandomCrop', scale=300), dict(type='RandomFlip', prob=0.5, direction='horizontal'), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] test_pipeline = [ dict(type='LoadImageFromFile'), dict(type='EfficientNetCenterCrop', crop_size=300), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) diff --git a/configs/efficientnet/efficientnet-b4_8xb32-01norm_in1k.py b/configs/efficientnet/efficientnet-b4_8xb32-01norm_in1k.py index dc25fbde..a4dbfb21 100644 --- a/configs/efficientnet/efficientnet-b4_8xb32-01norm_in1k.py +++ b/configs/efficientnet/efficientnet-b4_8xb32-01norm_in1k.py @@ -17,13 +17,13 @@ train_pipeline = [ dict(type='LoadImageFromFile'), dict(type='EfficientNetRandomCrop', scale=380), dict(type='RandomFlip', prob=0.5, direction='horizontal'), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] test_pipeline = [ dict(type='LoadImageFromFile'), dict(type='EfficientNetCenterCrop', crop_size=380), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) diff --git a/configs/efficientnet/efficientnet-b4_8xb32_in1k.py b/configs/efficientnet/efficientnet-b4_8xb32_in1k.py index 475daa4a..08e246c3 100644 --- a/configs/efficientnet/efficientnet-b4_8xb32_in1k.py +++ b/configs/efficientnet/efficientnet-b4_8xb32_in1k.py @@ -10,13 +10,13 @@ train_pipeline = [ dict(type='LoadImageFromFile'), dict(type='EfficientNetRandomCrop', scale=380), dict(type='RandomFlip', prob=0.5, direction='horizontal'), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] test_pipeline = [ dict(type='LoadImageFromFile'), dict(type='EfficientNetCenterCrop', crop_size=380), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) diff --git a/configs/efficientnet/efficientnet-b5_8xb32-01norm_in1k.py b/configs/efficientnet/efficientnet-b5_8xb32-01norm_in1k.py index fe75f4e6..0c646da4 100644 --- a/configs/efficientnet/efficientnet-b5_8xb32-01norm_in1k.py +++ b/configs/efficientnet/efficientnet-b5_8xb32-01norm_in1k.py @@ -17,13 +17,13 @@ train_pipeline = [ dict(type='LoadImageFromFile'), dict(type='EfficientNetRandomCrop', scale=456), dict(type='RandomFlip', prob=0.5, direction='horizontal'), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] test_pipeline = [ dict(type='LoadImageFromFile'), dict(type='EfficientNetCenterCrop', crop_size=456), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) diff --git a/configs/efficientnet/efficientnet-b5_8xb32_in1k.py b/configs/efficientnet/efficientnet-b5_8xb32_in1k.py index b548de37..af4fa4b8 100644 --- a/configs/efficientnet/efficientnet-b5_8xb32_in1k.py +++ b/configs/efficientnet/efficientnet-b5_8xb32_in1k.py @@ -10,13 +10,13 @@ train_pipeline = [ dict(type='LoadImageFromFile'), dict(type='EfficientNetRandomCrop', scale=456), dict(type='RandomFlip', prob=0.5, direction='horizontal'), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] test_pipeline = [ dict(type='LoadImageFromFile'), dict(type='EfficientNetCenterCrop', crop_size=456), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) diff --git a/configs/efficientnet/efficientnet-b6_8xb32-01norm_in1k.py b/configs/efficientnet/efficientnet-b6_8xb32-01norm_in1k.py index 7abe461e..dd150549 100644 --- a/configs/efficientnet/efficientnet-b6_8xb32-01norm_in1k.py +++ b/configs/efficientnet/efficientnet-b6_8xb32-01norm_in1k.py @@ -17,13 +17,13 @@ train_pipeline = [ dict(type='LoadImageFromFile'), dict(type='EfficientNetRandomCrop', scale=528), dict(type='RandomFlip', prob=0.5, direction='horizontal'), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] test_pipeline = [ dict(type='LoadImageFromFile'), dict(type='EfficientNetCenterCrop', crop_size=528), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) diff --git a/configs/efficientnet/efficientnet-b6_8xb32_in1k.py b/configs/efficientnet/efficientnet-b6_8xb32_in1k.py index eb9f9da6..fae02aed 100644 --- a/configs/efficientnet/efficientnet-b6_8xb32_in1k.py +++ b/configs/efficientnet/efficientnet-b6_8xb32_in1k.py @@ -10,13 +10,13 @@ train_pipeline = [ dict(type='LoadImageFromFile'), dict(type='EfficientNetRandomCrop', scale=528), dict(type='RandomFlip', prob=0.5, direction='horizontal'), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] test_pipeline = [ dict(type='LoadImageFromFile'), dict(type='EfficientNetCenterCrop', crop_size=528), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) diff --git a/configs/efficientnet/efficientnet-b7_8xb32-01norm_in1k.py b/configs/efficientnet/efficientnet-b7_8xb32-01norm_in1k.py index fe968747..687dfd26 100644 --- a/configs/efficientnet/efficientnet-b7_8xb32-01norm_in1k.py +++ b/configs/efficientnet/efficientnet-b7_8xb32-01norm_in1k.py @@ -17,13 +17,13 @@ train_pipeline = [ dict(type='LoadImageFromFile'), dict(type='EfficientNetRandomCrop', scale=600), dict(type='RandomFlip', prob=0.5, direction='horizontal'), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] test_pipeline = [ dict(type='LoadImageFromFile'), dict(type='EfficientNetCenterCrop', crop_size=600), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) diff --git a/configs/efficientnet/efficientnet-b7_8xb32_in1k.py b/configs/efficientnet/efficientnet-b7_8xb32_in1k.py index 3f9c1fc2..5d783bb3 100644 --- a/configs/efficientnet/efficientnet-b7_8xb32_in1k.py +++ b/configs/efficientnet/efficientnet-b7_8xb32_in1k.py @@ -10,13 +10,13 @@ train_pipeline = [ dict(type='LoadImageFromFile'), dict(type='EfficientNetRandomCrop', scale=600), dict(type='RandomFlip', prob=0.5, direction='horizontal'), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] test_pipeline = [ dict(type='LoadImageFromFile'), dict(type='EfficientNetCenterCrop', crop_size=600), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) diff --git a/configs/efficientnet/efficientnet-b8_8xb32-01norm_in1k.py b/configs/efficientnet/efficientnet-b8_8xb32-01norm_in1k.py index f26a788b..07d3692b 100644 --- a/configs/efficientnet/efficientnet-b8_8xb32-01norm_in1k.py +++ b/configs/efficientnet/efficientnet-b8_8xb32-01norm_in1k.py @@ -17,13 +17,13 @@ train_pipeline = [ dict(type='LoadImageFromFile'), dict(type='EfficientNetRandomCrop', scale=672), dict(type='RandomFlip', prob=0.5, direction='horizontal'), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] test_pipeline = [ dict(type='LoadImageFromFile'), dict(type='EfficientNetCenterCrop', crop_size=672), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) diff --git a/configs/efficientnet/efficientnet-b8_8xb32_in1k.py b/configs/efficientnet/efficientnet-b8_8xb32_in1k.py index 81934303..868986f5 100644 --- a/configs/efficientnet/efficientnet-b8_8xb32_in1k.py +++ b/configs/efficientnet/efficientnet-b8_8xb32_in1k.py @@ -10,13 +10,13 @@ train_pipeline = [ dict(type='LoadImageFromFile'), dict(type='EfficientNetRandomCrop', scale=672), dict(type='RandomFlip', prob=0.5, direction='horizontal'), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] test_pipeline = [ dict(type='LoadImageFromFile'), dict(type='EfficientNetCenterCrop', crop_size=672), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) diff --git a/configs/efficientnet/efficientnet-em_8xb32-01norm_in1k.py b/configs/efficientnet/efficientnet-em_8xb32-01norm_in1k.py index 1e520ccb..9de3b27f 100644 --- a/configs/efficientnet/efficientnet-em_8xb32-01norm_in1k.py +++ b/configs/efficientnet/efficientnet-em_8xb32-01norm_in1k.py @@ -17,13 +17,13 @@ train_pipeline = [ dict(type='LoadImageFromFile'), dict(type='EfficientNetRandomCrop', scale=240), dict(type='RandomFlip', prob=0.5, direction='horizontal'), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] test_pipeline = [ dict(type='LoadImageFromFile'), dict(type='EfficientNetCenterCrop', crop_size=240), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) diff --git a/configs/efficientnet/efficientnet-es_8xb32-01norm_in1k.py b/configs/efficientnet/efficientnet-es_8xb32-01norm_in1k.py index be79f225..e643d550 100644 --- a/configs/efficientnet/efficientnet-es_8xb32-01norm_in1k.py +++ b/configs/efficientnet/efficientnet-es_8xb32-01norm_in1k.py @@ -10,13 +10,13 @@ train_pipeline = [ dict(type='LoadImageFromFile'), dict(type='EfficientNetRandomCrop', scale=224), dict(type='RandomFlip', prob=0.5, direction='horizontal'), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] test_pipeline = [ dict(type='LoadImageFromFile'), dict(type='EfficientNetCenterCrop', crop_size=224), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) diff --git a/configs/efficientnet/efficientnet-l2_8xb32_in1k-475px.py b/configs/efficientnet/efficientnet-l2_8xb32_in1k-475px.py index 010f808b..56069514 100644 --- a/configs/efficientnet/efficientnet-l2_8xb32_in1k-475px.py +++ b/configs/efficientnet/efficientnet-l2_8xb32_in1k-475px.py @@ -10,13 +10,13 @@ train_pipeline = [ dict(type='LoadImageFromFile'), dict(type='EfficientNetRandomCrop', scale=475), dict(type='RandomFlip', prob=0.5, direction='horizontal'), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] test_pipeline = [ dict(type='LoadImageFromFile'), dict(type='EfficientNetCenterCrop', crop_size=475), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) diff --git a/configs/efficientnet/efficientnet-l2_8xb8_in1k-800px.py b/configs/efficientnet/efficientnet-l2_8xb8_in1k-800px.py index ac4664c6..61bddfa7 100644 --- a/configs/efficientnet/efficientnet-l2_8xb8_in1k-800px.py +++ b/configs/efficientnet/efficientnet-l2_8xb8_in1k-800px.py @@ -10,13 +10,13 @@ train_pipeline = [ dict(type='LoadImageFromFile'), dict(type='EfficientNetRandomCrop', scale=800), dict(type='RandomFlip', prob=0.5, direction='horizontal'), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] test_pipeline = [ dict(type='LoadImageFromFile'), dict(type='EfficientNetCenterCrop', crop_size=800), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] train_dataloader = dict(batch_size=8, dataset=dict(pipeline=train_pipeline)) diff --git a/configs/efficientnet_v2/efficientnetv2-b0_8xb32_in1k.py b/configs/efficientnet_v2/efficientnetv2-b0_8xb32_in1k.py index c8a64f56..4dc23d49 100644 --- a/configs/efficientnet_v2/efficientnetv2-b0_8xb32_in1k.py +++ b/configs/efficientnet_v2/efficientnetv2-b0_8xb32_in1k.py @@ -44,13 +44,13 @@ train_pipeline = [ max_area_ratio=1 / 3, fill_color=bgr_mean, fill_std=bgr_std), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] test_pipeline = [ dict(type='LoadImageFromFile'), dict(type='EfficientNetCenterCrop', crop_size=224, crop_padding=0), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) diff --git a/configs/efficientnet_v2/efficientnetv2-b1_8xb32_in1k.py b/configs/efficientnet_v2/efficientnetv2-b1_8xb32_in1k.py index 33f48dfd..fa187ff1 100644 --- a/configs/efficientnet_v2/efficientnetv2-b1_8xb32_in1k.py +++ b/configs/efficientnet_v2/efficientnetv2-b1_8xb32_in1k.py @@ -7,13 +7,13 @@ train_pipeline = [ dict(type='LoadImageFromFile'), dict(type='EfficientNetRandomCrop', scale=192), dict(type='RandomFlip', prob=0.5, direction='horizontal'), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] test_pipeline = [ dict(type='LoadImageFromFile'), dict(type='EfficientNetCenterCrop', crop_size=240, crop_padding=0), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) diff --git a/configs/efficientnet_v2/efficientnetv2-b2_8xb32_in1k.py b/configs/efficientnet_v2/efficientnetv2-b2_8xb32_in1k.py index 497c2aa3..3ff5530d 100644 --- a/configs/efficientnet_v2/efficientnetv2-b2_8xb32_in1k.py +++ b/configs/efficientnet_v2/efficientnetv2-b2_8xb32_in1k.py @@ -7,13 +7,13 @@ train_pipeline = [ dict(type='LoadImageFromFile'), dict(type='EfficientNetRandomCrop', scale=208), dict(type='RandomFlip', prob=0.5, direction='horizontal'), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] test_pipeline = [ dict(type='LoadImageFromFile'), dict(type='EfficientNetCenterCrop', crop_size=260, crop_padding=0), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) diff --git a/configs/efficientnet_v2/efficientnetv2-b3_8xb32_in1k.py b/configs/efficientnet_v2/efficientnetv2-b3_8xb32_in1k.py index 16f82c3a..84fb29a5 100644 --- a/configs/efficientnet_v2/efficientnetv2-b3_8xb32_in1k.py +++ b/configs/efficientnet_v2/efficientnetv2-b3_8xb32_in1k.py @@ -7,13 +7,13 @@ train_pipeline = [ dict(type='LoadImageFromFile'), dict(type='EfficientNetRandomCrop', scale=240), dict(type='RandomFlip', prob=0.5, direction='horizontal'), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] test_pipeline = [ dict(type='LoadImageFromFile'), dict(type='EfficientNetCenterCrop', crop_size=300, crop_padding=0), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) diff --git a/configs/efficientnet_v2/efficientnetv2-l_8xb32_in1k-480px.py b/configs/efficientnet_v2/efficientnetv2-l_8xb32_in1k-480px.py index 2bef5591..c3606cf0 100644 --- a/configs/efficientnet_v2/efficientnetv2-l_8xb32_in1k-480px.py +++ b/configs/efficientnet_v2/efficientnetv2-l_8xb32_in1k-480px.py @@ -9,13 +9,13 @@ train_pipeline = [ dict(type='LoadImageFromFile'), dict(type='EfficientNetRandomCrop', scale=384, crop_padding=0), dict(type='RandomFlip', prob=0.5, direction='horizontal'), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] test_pipeline = [ dict(type='LoadImageFromFile'), dict(type='EfficientNetCenterCrop', crop_size=480, crop_padding=0), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) diff --git a/configs/efficientnet_v2/efficientnetv2-m_8xb32_in1k-480px.py b/configs/efficientnet_v2/efficientnetv2-m_8xb32_in1k-480px.py index 06f941e2..c7bdd9be 100644 --- a/configs/efficientnet_v2/efficientnetv2-m_8xb32_in1k-480px.py +++ b/configs/efficientnet_v2/efficientnetv2-m_8xb32_in1k-480px.py @@ -9,13 +9,13 @@ train_pipeline = [ dict(type='LoadImageFromFile'), dict(type='EfficientNetRandomCrop', scale=384, crop_padding=0), dict(type='RandomFlip', prob=0.5, direction='horizontal'), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] test_pipeline = [ dict(type='LoadImageFromFile'), dict(type='EfficientNetCenterCrop', crop_size=480, crop_padding=0), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) diff --git a/configs/efficientnet_v2/efficientnetv2-s_8xb32_in1k-384px.py b/configs/efficientnet_v2/efficientnetv2-s_8xb32_in1k-384px.py index 2d9b8e4f..2bdee636 100644 --- a/configs/efficientnet_v2/efficientnetv2-s_8xb32_in1k-384px.py +++ b/configs/efficientnet_v2/efficientnetv2-s_8xb32_in1k-384px.py @@ -20,13 +20,13 @@ train_pipeline = [ dict(type='LoadImageFromFile'), dict(type='EfficientNetRandomCrop', scale=300, crop_padding=0), dict(type='RandomFlip', prob=0.5, direction='horizontal'), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] test_pipeline = [ dict(type='LoadImageFromFile'), dict(type='EfficientNetCenterCrop', crop_size=384, crop_padding=0), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) diff --git a/configs/efficientnet_v2/efficientnetv2-s_8xb32_in21k.py b/configs/efficientnet_v2/efficientnetv2-s_8xb32_in21k.py index e4536946..54f8a5af 100644 --- a/configs/efficientnet_v2/efficientnetv2-s_8xb32_in21k.py +++ b/configs/efficientnet_v2/efficientnetv2-s_8xb32_in21k.py @@ -23,13 +23,13 @@ train_pipeline = [ dict(type='LoadImageFromFile'), dict(type='EfficientNetRandomCrop', scale=224), dict(type='RandomFlip', prob=0.5, direction='horizontal'), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] test_pipeline = [ dict(type='LoadImageFromFile'), dict(type='EfficientNetCenterCrop', crop_size=224, crop_padding=0), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) diff --git a/configs/efficientnet_v2/efficientnetv2-xl_8xb32_in1k-512px.py b/configs/efficientnet_v2/efficientnetv2-xl_8xb32_in1k-512px.py index ea161aa6..18f56ff0 100644 --- a/configs/efficientnet_v2/efficientnetv2-xl_8xb32_in1k-512px.py +++ b/configs/efficientnet_v2/efficientnetv2-xl_8xb32_in1k-512px.py @@ -9,13 +9,13 @@ train_pipeline = [ dict(type='LoadImageFromFile'), dict(type='EfficientNetRandomCrop', scale=384, crop_padding=0), dict(type='RandomFlip', prob=0.5, direction='horizontal'), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] test_pipeline = [ dict(type='LoadImageFromFile'), dict(type='EfficientNetCenterCrop', crop_size=512, crop_padding=0), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) diff --git a/configs/eva/benchmarks/vit-base-p16_8xb128-coslr-100e_in1k.py b/configs/eva/benchmarks/vit-base-p16_8xb128-coslr-100e_in1k.py index 1ca8adfe..cedfe600 100644 --- a/configs/eva/benchmarks/vit-base-p16_8xb128-coslr-100e_in1k.py +++ b/configs/eva/benchmarks/vit-base-p16_8xb128-coslr-100e_in1k.py @@ -29,7 +29,7 @@ train_pipeline = [ max_area_ratio=0.3333333333333333, fill_color=[103.53, 116.28, 123.675], fill_std=[57.375, 57.12, 58.395]), - dict(type='PackClsInputs') + dict(type='PackInputs') ] test_pipeline = [ dict(type='LoadImageFromFile'), @@ -40,7 +40,7 @@ test_pipeline = [ backend='pillow', interpolation='bicubic'), dict(type='CenterCrop', crop_size=224), - dict(type='PackClsInputs') + dict(type='PackInputs') ] train_dataloader = dict(batch_size=128, dataset=dict(pipeline=train_pipeline)) diff --git a/configs/eva/benchmarks/vit-base-p16_8xb2048-linear-coslr-100e_in1k.py b/configs/eva/benchmarks/vit-base-p16_8xb2048-linear-coslr-100e_in1k.py index 63d19733..6eb01044 100644 --- a/configs/eva/benchmarks/vit-base-p16_8xb2048-linear-coslr-100e_in1k.py +++ b/configs/eva/benchmarks/vit-base-p16_8xb2048-linear-coslr-100e_in1k.py @@ -1,5 +1,5 @@ _base_ = [ - '../../_base_/datasets/imagenet_bs32_pillow.py', + '../../_base_/datasets/imagenet_bs32_pil_resize.py', '../../_base_/schedules/imagenet_bs1024_adamw_swin.py', '../../_base_/default_runtime.py' ] diff --git a/configs/inception_v3/inception-v3_8xb32_in1k.py b/configs/inception_v3/inception-v3_8xb32_in1k.py index 061ea6e5..ac977f4e 100644 --- a/configs/inception_v3/inception-v3_8xb32_in1k.py +++ b/configs/inception_v3/inception-v3_8xb32_in1k.py @@ -9,14 +9,14 @@ train_pipeline = [ dict(type='LoadImageFromFile'), dict(type='RandomResizedCrop', scale=299), dict(type='RandomFlip', prob=0.5, direction='horizontal'), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] test_pipeline = [ dict(type='LoadImageFromFile'), dict(type='ResizeEdge', scale=342, edge='short'), dict(type='CenterCrop', crop_size=299), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) diff --git a/configs/lenet/lenet5_mnist.py b/configs/lenet/lenet5_mnist.py index df53c94d..0ae81925 100644 --- a/configs/lenet/lenet5_mnist.py +++ b/configs/lenet/lenet5_mnist.py @@ -12,7 +12,7 @@ model = dict( dataset_type = 'MNIST' data_preprocessor = dict(mean=[33.46], std=[78.87], num_classes=10) -pipeline = [dict(type='Resize', scale=32), dict(type='PackClsInputs')] +pipeline = [dict(type='Resize', scale=32), dict(type='PackInputs')] common_data_cfg = dict( type=dataset_type, data_prefix='data/mnist', pipeline=pipeline) diff --git a/configs/mae/benchmarks/vit-base-p16_8xb128-coslr-100e_in1k.py b/configs/mae/benchmarks/vit-base-p16_8xb128-coslr-100e_in1k.py index ac0d6fa6..54275494 100644 --- a/configs/mae/benchmarks/vit-base-p16_8xb128-coslr-100e_in1k.py +++ b/configs/mae/benchmarks/vit-base-p16_8xb128-coslr-100e_in1k.py @@ -28,7 +28,7 @@ train_pipeline = [ max_area_ratio=0.3333333333333333, fill_color=[103.53, 116.28, 123.675], fill_std=[57.375, 57.12, 58.395]), - dict(type='PackClsInputs') + dict(type='PackInputs') ] test_pipeline = [ dict(type='LoadImageFromFile'), @@ -39,7 +39,7 @@ test_pipeline = [ backend='pillow', interpolation='bicubic'), dict(type='CenterCrop', crop_size=224), - dict(type='PackClsInputs') + dict(type='PackInputs') ] train_dataloader = dict(batch_size=128, dataset=dict(pipeline=train_pipeline)) diff --git a/configs/mae/benchmarks/vit-base-p16_8xb2048-linear-coslr-90e_in1k.py b/configs/mae/benchmarks/vit-base-p16_8xb2048-linear-coslr-90e_in1k.py index a27f47f8..90f9a596 100644 --- a/configs/mae/benchmarks/vit-base-p16_8xb2048-linear-coslr-90e_in1k.py +++ b/configs/mae/benchmarks/vit-base-p16_8xb2048-linear-coslr-90e_in1k.py @@ -1,5 +1,5 @@ _base_ = [ - '../../_base_/datasets/imagenet_bs32_pillow.py', + '../../_base_/datasets/imagenet_bs32_pil_resize.py', '../../_base_/schedules/imagenet_bs1024_adamw_swin.py', '../../_base_/default_runtime.py' ] diff --git a/configs/mae/benchmarks/vit-huge-p14_32xb8-coslr-50e_in1k-448px.py b/configs/mae/benchmarks/vit-huge-p14_32xb8-coslr-50e_in1k-448px.py index 8414d55c..75cd48e5 100644 --- a/configs/mae/benchmarks/vit-huge-p14_32xb8-coslr-50e_in1k-448px.py +++ b/configs/mae/benchmarks/vit-huge-p14_32xb8-coslr-50e_in1k-448px.py @@ -29,7 +29,7 @@ train_pipeline = [ max_area_ratio=0.3333333333333333, fill_color=[103.53, 116.28, 123.675], fill_std=[57.375, 57.12, 58.395]), - dict(type='PackClsInputs') + dict(type='PackInputs') ] test_pipeline = [ @@ -41,7 +41,7 @@ test_pipeline = [ backend='pillow', interpolation='bicubic'), dict(type='CenterCrop', crop_size=448), - dict(type='PackClsInputs') + dict(type='PackInputs') ] train_dataloader = dict(batch_size=128, dataset=dict(pipeline=train_pipeline)) diff --git a/configs/mae/benchmarks/vit-huge-p14_8xb128-coslr-50e_in1k.py b/configs/mae/benchmarks/vit-huge-p14_8xb128-coslr-50e_in1k.py index 812dcf10..6b3158dc 100644 --- a/configs/mae/benchmarks/vit-huge-p14_8xb128-coslr-50e_in1k.py +++ b/configs/mae/benchmarks/vit-huge-p14_8xb128-coslr-50e_in1k.py @@ -29,7 +29,7 @@ train_pipeline = [ max_area_ratio=0.3333333333333333, fill_color=[103.53, 116.28, 123.675], fill_std=[57.375, 57.12, 58.395]), - dict(type='PackClsInputs') + dict(type='PackInputs') ] test_pipeline = [ dict(type='LoadImageFromFile'), @@ -40,7 +40,7 @@ test_pipeline = [ backend='pillow', interpolation='bicubic'), dict(type='CenterCrop', crop_size=224), - dict(type='PackClsInputs') + dict(type='PackInputs') ] train_dataloader = dict(batch_size=128, dataset=dict(pipeline=train_pipeline)) diff --git a/configs/mae/benchmarks/vit-large-p16_8xb128-coslr-50e_in1k.py b/configs/mae/benchmarks/vit-large-p16_8xb128-coslr-50e_in1k.py index a7721dd5..1b65ba9b 100644 --- a/configs/mae/benchmarks/vit-large-p16_8xb128-coslr-50e_in1k.py +++ b/configs/mae/benchmarks/vit-large-p16_8xb128-coslr-50e_in1k.py @@ -29,7 +29,7 @@ train_pipeline = [ max_area_ratio=0.3333333333333333, fill_color=[103.53, 116.28, 123.675], fill_std=[57.375, 57.12, 58.395]), - dict(type='PackClsInputs') + dict(type='PackInputs') ] test_pipeline = [ dict(type='LoadImageFromFile'), @@ -40,7 +40,7 @@ test_pipeline = [ backend='pillow', interpolation='bicubic'), dict(type='CenterCrop', crop_size=224), - dict(type='PackClsInputs') + dict(type='PackInputs') ] train_dataloader = dict(batch_size=128, dataset=dict(pipeline=train_pipeline)) diff --git a/configs/mae/benchmarks/vit-large-p16_8xb2048-linear-coslr-90e_in1k.py b/configs/mae/benchmarks/vit-large-p16_8xb2048-linear-coslr-90e_in1k.py index f8291eef..145f3dcf 100644 --- a/configs/mae/benchmarks/vit-large-p16_8xb2048-linear-coslr-90e_in1k.py +++ b/configs/mae/benchmarks/vit-large-p16_8xb2048-linear-coslr-90e_in1k.py @@ -1,5 +1,5 @@ _base_ = [ - '../../_base_/datasets/imagenet_bs32_pillow.py', + '../../_base_/datasets/imagenet_bs32_pil_resize.py', '../../_base_/schedules/imagenet_bs1024_adamw_swin.py', '../../_base_/default_runtime.py' ] diff --git a/configs/maskfeat/benchmarks/vit-base-p16_8xb256-coslr-100e_in1k.py b/configs/maskfeat/benchmarks/vit-base-p16_8xb256-coslr-100e_in1k.py index ce163113..57f82d98 100644 --- a/configs/maskfeat/benchmarks/vit-base-p16_8xb256-coslr-100e_in1k.py +++ b/configs/maskfeat/benchmarks/vit-base-p16_8xb256-coslr-100e_in1k.py @@ -30,13 +30,13 @@ train_pipeline = [ max_area_ratio=0.3333333333333333, fill_color=[103.53, 116.28, 123.675], fill_std=[57.375, 57.12, 58.395]), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] test_pipeline = [ dict(type='LoadImageFromFile'), dict(type='ResizeEdge', scale=256, edge='short', backend='pillow'), dict(type='CenterCrop', crop_size=224), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] train_dataloader = dict(batch_size=256, dataset=dict(pipeline=train_pipeline)) diff --git a/configs/maskfeat/maskfeat_vit-base-p16_8xb256-amp-coslr-300e_in1k.py b/configs/maskfeat/maskfeat_vit-base-p16_8xb256-amp-coslr-300e_in1k.py index 89a63d49..446e1c32 100644 --- a/configs/maskfeat/maskfeat_vit-base-p16_8xb256-amp-coslr-300e_in1k.py +++ b/configs/maskfeat/maskfeat_vit-base-p16_8xb256-amp-coslr-300e_in1k.py @@ -13,9 +13,8 @@ train_pipeline = [ dict(type='LoadImageFromFile'), dict( type='RandomResizedCrop', - size=224, - scale=(0.5, 1.0), - ratio=(0.75, 1.3333), + scale=224, + crop_ratio_range=(0.5, 1.0), interpolation='bicubic'), dict(type='RandomFlip', prob=0.5, direction='horizontal'), dict( @@ -24,10 +23,7 @@ train_pipeline = [ num_masking_patches=78, min_num_patches=15, ), - dict( - type='PackSelfSupInputs', - algorithm_keys=['mask'], - meta_keys=['img_path']) + dict(type='PackInputs') ] train_dataloader = dict( diff --git a/configs/milan/benchmarks/vit-base-p16_8xb128-coslr-100e_in1k.py b/configs/milan/benchmarks/vit-base-p16_8xb128-coslr-100e_in1k.py index 1ca8adfe..cedfe600 100644 --- a/configs/milan/benchmarks/vit-base-p16_8xb128-coslr-100e_in1k.py +++ b/configs/milan/benchmarks/vit-base-p16_8xb128-coslr-100e_in1k.py @@ -29,7 +29,7 @@ train_pipeline = [ max_area_ratio=0.3333333333333333, fill_color=[103.53, 116.28, 123.675], fill_std=[57.375, 57.12, 58.395]), - dict(type='PackClsInputs') + dict(type='PackInputs') ] test_pipeline = [ dict(type='LoadImageFromFile'), @@ -40,7 +40,7 @@ test_pipeline = [ backend='pillow', interpolation='bicubic'), dict(type='CenterCrop', crop_size=224), - dict(type='PackClsInputs') + dict(type='PackInputs') ] train_dataloader = dict(batch_size=128, dataset=dict(pipeline=train_pipeline)) diff --git a/configs/milan/benchmarks/vit-base-p16_8xb2048-linear-coslr-100e_in1k.py b/configs/milan/benchmarks/vit-base-p16_8xb2048-linear-coslr-100e_in1k.py index 63d19733..6eb01044 100644 --- a/configs/milan/benchmarks/vit-base-p16_8xb2048-linear-coslr-100e_in1k.py +++ b/configs/milan/benchmarks/vit-base-p16_8xb2048-linear-coslr-100e_in1k.py @@ -1,5 +1,5 @@ _base_ = [ - '../../_base_/datasets/imagenet_bs32_pillow.py', + '../../_base_/datasets/imagenet_bs32_pil_resize.py', '../../_base_/schedules/imagenet_bs1024_adamw_swin.py', '../../_base_/default_runtime.py' ] diff --git a/configs/mixmim/benchmarks/mixmim-base_8xb128-coslr-100e_in1k.py b/configs/mixmim/benchmarks/mixmim-base_8xb128-coslr-100e_in1k.py index 1865f17e..0421d120 100644 --- a/configs/mixmim/benchmarks/mixmim-base_8xb128-coslr-100e_in1k.py +++ b/configs/mixmim/benchmarks/mixmim-base_8xb128-coslr-100e_in1k.py @@ -42,7 +42,7 @@ train_pipeline = [ max_area_ratio=1 / 3, fill_color=bgr_mean, fill_std=bgr_std), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] train_dataloader = dict( @@ -67,7 +67,7 @@ test_pipeline = [ backend='pillow', interpolation='bicubic'), dict(type='CenterCrop', crop_size=224), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] val_dataloader = dict( diff --git a/configs/mixmim/mixmim_mixmim-base_16xb128-coslr-300e_in1k.py b/configs/mixmim/mixmim_mixmim-base_16xb128-coslr-300e_in1k.py index 45cae815..835dfdf8 100644 --- a/configs/mixmim/mixmim_mixmim-base_16xb128-coslr-300e_in1k.py +++ b/configs/mixmim/mixmim_mixmim-base_16xb128-coslr-300e_in1k.py @@ -8,12 +8,12 @@ train_pipeline = [ dict(type='LoadImageFromFile'), dict( type='RandomResizedCrop', - size=224, - scale=(0.2, 1.0), + scale=224, + crop_ratio_range=(0.2, 1.0), backend='pillow', interpolation='bicubic'), dict(type='RandomFlip', prob=0.5), - dict(type='PackSelfSupInputs', meta_keys=['img_path']) + dict(type='PackInputs') ] train_dataloader = dict( diff --git a/configs/mobilenet_v3/mobilenet-v3-small-050_8xb128_in1k.py b/configs/mobilenet_v3/mobilenet-v3-small-050_8xb128_in1k.py index 0377d4a0..fc145625 100644 --- a/configs/mobilenet_v3/mobilenet-v3-small-050_8xb128_in1k.py +++ b/configs/mobilenet_v3/mobilenet-v3-small-050_8xb128_in1k.py @@ -27,7 +27,7 @@ train_pipeline = [ max_area_ratio=1 / 3, fill_color=[103.53, 116.28, 123.675], fill_std=[57.375, 57.12, 58.395]), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] test_pipeline = [ @@ -39,7 +39,7 @@ test_pipeline = [ backend='pillow', interpolation='bicubic'), dict(type='CenterCrop', crop_size=224), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) diff --git a/configs/mobilenet_v3/mobilenet-v3-small-075_8xb128_in1k.py b/configs/mobilenet_v3/mobilenet-v3-small-075_8xb128_in1k.py index d3ea0900..464b7cbd 100644 --- a/configs/mobilenet_v3/mobilenet-v3-small-075_8xb128_in1k.py +++ b/configs/mobilenet_v3/mobilenet-v3-small-075_8xb128_in1k.py @@ -27,7 +27,7 @@ train_pipeline = [ max_area_ratio=1 / 3, fill_color=[103.53, 116.28, 123.675], fill_std=[57.375, 57.12, 58.395]), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] test_pipeline = [ @@ -39,7 +39,7 @@ test_pipeline = [ backend='pillow', interpolation='bicubic'), dict(type='CenterCrop', crop_size=224), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) diff --git a/configs/mobileone/mobileone-s1_8xb32_in1k.py b/configs/mobileone/mobileone-s1_8xb32_in1k.py index 52c8442e..0bc3fb08 100644 --- a/configs/mobileone/mobileone-s1_8xb32_in1k.py +++ b/configs/mobileone/mobileone-s1_8xb32_in1k.py @@ -24,7 +24,7 @@ base_train_pipeline = [ magnitude_level=7, magnitude_std=0.5, hparams=dict(pad_val=[round(x) for x in bgr_mean])), - dict(type='PackClsInputs') + dict(type='PackInputs') ] import copy # noqa: E402 diff --git a/configs/mobileone/mobileone-s2_8xb32_in1k.py b/configs/mobileone/mobileone-s2_8xb32_in1k.py index 547ae995..a7d4aae0 100644 --- a/configs/mobileone/mobileone-s2_8xb32_in1k.py +++ b/configs/mobileone/mobileone-s2_8xb32_in1k.py @@ -26,7 +26,7 @@ base_train_pipeline = [ magnitude_level=7, magnitude_std=0.5, hparams=dict(pad_val=[round(x) for x in bgr_mean])), - dict(type='PackClsInputs') + dict(type='PackInputs') ] # modify start epoch RandomResizedCrop.scale to 160 diff --git a/configs/mobileone/mobileone-s3_8xb32_in1k.py b/configs/mobileone/mobileone-s3_8xb32_in1k.py index b0ef4164..2be0dc7e 100644 --- a/configs/mobileone/mobileone-s3_8xb32_in1k.py +++ b/configs/mobileone/mobileone-s3_8xb32_in1k.py @@ -26,7 +26,7 @@ base_train_pipeline = [ magnitude_level=7, magnitude_std=0.5, hparams=dict(pad_val=[round(x) for x in bgr_mean])), - dict(type='PackClsInputs') + dict(type='PackInputs') ] # modify start epoch RandomResizedCrop.scale to 160 diff --git a/configs/mobileone/mobileone-s4_8xb32_in1k.py b/configs/mobileone/mobileone-s4_8xb32_in1k.py index 8c31f240..49356f05 100644 --- a/configs/mobileone/mobileone-s4_8xb32_in1k.py +++ b/configs/mobileone/mobileone-s4_8xb32_in1k.py @@ -24,7 +24,7 @@ base_train_pipeline = [ magnitude_level=7, magnitude_std=0.5, hparams=dict(pad_val=[round(x) for x in bgr_mean])), - dict(type='PackClsInputs') + dict(type='PackInputs') ] import copy # noqa: E402 diff --git a/configs/mobilevit/mobilevit-small_8xb128_in1k.py b/configs/mobilevit/mobilevit-small_8xb128_in1k.py index c5a9a566..59693963 100644 --- a/configs/mobilevit/mobilevit-small_8xb128_in1k.py +++ b/configs/mobilevit/mobilevit-small_8xb128_in1k.py @@ -18,7 +18,7 @@ test_pipeline = [ dict(type='LoadImageFromFile'), dict(type='ResizeEdge', scale=288, edge='short'), dict(type='CenterCrop', crop_size=256), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] train_dataloader = dict(batch_size=128) diff --git a/configs/mobilevit/mobilevit-xsmall_8xb128_in1k.py b/configs/mobilevit/mobilevit-xsmall_8xb128_in1k.py index c9f6955f..557892bc 100644 --- a/configs/mobilevit/mobilevit-xsmall_8xb128_in1k.py +++ b/configs/mobilevit/mobilevit-xsmall_8xb128_in1k.py @@ -18,7 +18,7 @@ test_pipeline = [ dict(type='LoadImageFromFile'), dict(type='ResizeEdge', scale=288, edge='short'), dict(type='CenterCrop', crop_size=256), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] train_dataloader = dict(batch_size=128) diff --git a/configs/mobilevit/mobilevit-xxsmall_8xb128_in1k.py b/configs/mobilevit/mobilevit-xxsmall_8xb128_in1k.py index 303d499c..74aea82f 100644 --- a/configs/mobilevit/mobilevit-xxsmall_8xb128_in1k.py +++ b/configs/mobilevit/mobilevit-xxsmall_8xb128_in1k.py @@ -18,7 +18,7 @@ test_pipeline = [ dict(type='LoadImageFromFile'), dict(type='ResizeEdge', scale=288, edge='short'), dict(type='CenterCrop', crop_size=256), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] train_dataloader = dict(batch_size=128) diff --git a/configs/mocov2/benchmarks/resnet50_8xb32-linear-steplr-100e_in1k.py b/configs/mocov2/benchmarks/resnet50_8xb32-linear-steplr-100e_in1k.py index cb370f89..f7d82d1e 100644 --- a/configs/mocov2/benchmarks/resnet50_8xb32-linear-steplr-100e_in1k.py +++ b/configs/mocov2/benchmarks/resnet50_8xb32-linear-steplr-100e_in1k.py @@ -1,6 +1,6 @@ _base_ = [ '../../_base_/models/resnet50.py', - '../../_base_/datasets/imagenet_bs32_pillow.py', + '../../_base_/datasets/imagenet_bs32_pil_resize.py', '../../_base_/schedules/imagenet_sgd_steplr_100e.py', '../../_base_/default_runtime.py', ] diff --git a/configs/mocov3/benchmarks/resnet50_8xb128-linear-coslr-90e_in1k.py b/configs/mocov3/benchmarks/resnet50_8xb128-linear-coslr-90e_in1k.py index df72c2e1..4af5e170 100644 --- a/configs/mocov3/benchmarks/resnet50_8xb128-linear-coslr-90e_in1k.py +++ b/configs/mocov3/benchmarks/resnet50_8xb128-linear-coslr-90e_in1k.py @@ -1,6 +1,6 @@ _base_ = [ '../../_base_/models/resnet50.py', - '../../_base_/datasets/imagenet_bs32_pillow.py', + '../../_base_/datasets/imagenet_bs32_pil_resize.py', '../../_base_/schedules/imagenet_sgd_coslr_100e.py', '../../_base_/default_runtime.py', ] diff --git a/configs/mocov3/benchmarks/vit-base-p16_8xb128-linear-coslr-90e_in1k.py b/configs/mocov3/benchmarks/vit-base-p16_8xb128-linear-coslr-90e_in1k.py index e6c35e69..1ad27c8b 100644 --- a/configs/mocov3/benchmarks/vit-base-p16_8xb128-linear-coslr-90e_in1k.py +++ b/configs/mocov3/benchmarks/vit-base-p16_8xb128-linear-coslr-90e_in1k.py @@ -1,5 +1,5 @@ _base_ = [ - '../../_base_/datasets/imagenet_bs32_pillow.py', + '../../_base_/datasets/imagenet_bs32_pil_resize.py', '../../_base_/default_runtime.py', ] diff --git a/configs/mocov3/benchmarks/vit-small-p16_8xb128-linear-coslr-90e_in1k.py b/configs/mocov3/benchmarks/vit-small-p16_8xb128-linear-coslr-90e_in1k.py index 2b3a8a8c..9bee68be 100644 --- a/configs/mocov3/benchmarks/vit-small-p16_8xb128-linear-coslr-90e_in1k.py +++ b/configs/mocov3/benchmarks/vit-small-p16_8xb128-linear-coslr-90e_in1k.py @@ -1,5 +1,5 @@ _base_ = [ - '../../_base_/datasets/imagenet_bs32_pillow.py', + '../../_base_/datasets/imagenet_bs32_pil_resize.py', '../../_base_/default_runtime.py', ] diff --git a/configs/mocov3/mocov3_vit-base-p16_16xb256-amp-coslr-300e_in1k.py b/configs/mocov3/mocov3_vit-base-p16_16xb256-amp-coslr-300e_in1k.py index c7d302ad..cd22ddc9 100644 --- a/configs/mocov3/mocov3_vit-base-p16_16xb256-amp-coslr-300e_in1k.py +++ b/configs/mocov3/mocov3_vit-base-p16_16xb256-amp-coslr-300e_in1k.py @@ -8,7 +8,9 @@ _base_ = [ # `RandomResizedCrop`, `scale=(0.08, 1.)` in ViT pipeline view_pipeline1 = [ dict( - type='RandomResizedCrop', size=224, scale=(0.08, 1.), + type='RandomResizedCrop', + scale=224, + crop_ratio_range=(0.08, 1.), backend='pillow'), dict( type='RandomApply', @@ -26,13 +28,19 @@ view_pipeline1 = [ prob=0.2, keep_channels=True, channel_weights=(0.114, 0.587, 0.2989)), - dict(type='RandomGaussianBlur', sigma_min=0.1, sigma_max=2.0, prob=1.), + dict( + type='GaussianBlur', + magnitude_range=(0.1, 2.0), + magnitude_std='inf', + prob=1.), dict(type='RandomSolarize', prob=0.), dict(type='RandomFlip', prob=0.5), ] view_pipeline2 = [ dict( - type='RandomResizedCrop', size=224, scale=(0.08, 1.), + type='RandomResizedCrop', + scale=224, + crop_ratio_range=(0.08, 1.), backend='pillow'), dict( type='RandomApply', @@ -50,7 +58,11 @@ view_pipeline2 = [ prob=0.2, keep_channels=True, channel_weights=(0.114, 0.587, 0.2989)), - dict(type='RandomGaussianBlur', sigma_min=0.1, sigma_max=2.0, prob=0.1), + dict( + type='GaussianBlur', + magnitude_range=(0.1, 2.0), + magnitude_std='inf', + prob=0.1), dict(type='RandomSolarize', prob=0.2), dict(type='RandomFlip', prob=0.5), ] @@ -61,7 +73,7 @@ train_pipeline = [ type='MultiView', num_views=[1, 1], transforms=[view_pipeline1, view_pipeline2]), - dict(type='PackSelfSupInputs', meta_keys=['img_path']) + dict(type='PackInputs') ] train_dataloader = dict(batch_size=256, dataset=dict(pipeline=train_pipeline)) diff --git a/configs/mocov3/mocov3_vit-large-p16_64xb64-amp-coslr-300e_in1k.py b/configs/mocov3/mocov3_vit-large-p16_64xb64-amp-coslr-300e_in1k.py index 7352fda2..b2d52896 100644 --- a/configs/mocov3/mocov3_vit-large-p16_64xb64-amp-coslr-300e_in1k.py +++ b/configs/mocov3/mocov3_vit-large-p16_64xb64-amp-coslr-300e_in1k.py @@ -8,7 +8,9 @@ _base_ = [ # `RandomResizedCrop`, `scale=(0.08, 1.)` in ViT pipeline view_pipeline1 = [ dict( - type='RandomResizedCrop', size=224, scale=(0.08, 1.), + type='RandomResizedCrop', + scale=224, + crop_ratio_range=(0.08, 1.), backend='pillow'), dict( type='RandomApply', @@ -26,13 +28,19 @@ view_pipeline1 = [ prob=0.2, keep_channels=True, channel_weights=(0.114, 0.587, 0.2989)), - dict(type='RandomGaussianBlur', sigma_min=0.1, sigma_max=2.0, prob=1.), + dict( + type='GaussianBlur', + magnitude_range=(0.1, 2.0), + magnitude_std='inf', + prob=1.), dict(type='RandomSolarize', prob=0.), dict(type='RandomFlip', prob=0.5), ] view_pipeline2 = [ dict( - type='RandomResizedCrop', size=224, scale=(0.08, 1.), + type='RandomResizedCrop', + scale=224, + crop_ratio_range=(0.08, 1.), backend='pillow'), dict( type='RandomApply', @@ -50,7 +58,11 @@ view_pipeline2 = [ prob=0.2, keep_channels=True, channel_weights=(0.114, 0.587, 0.2989)), - dict(type='RandomGaussianBlur', sigma_min=0.1, sigma_max=2.0, prob=0.1), + dict( + type='GaussianBlur', + magnitude_range=(0.1, 2.0), + magnitude_std='inf', + prob=0.1), dict(type='RandomSolarize', prob=0.2), dict(type='RandomFlip', prob=0.5), ] @@ -61,7 +73,7 @@ train_pipeline = [ type='MultiView', num_views=[1, 1], transforms=[view_pipeline1, view_pipeline2]), - dict(type='PackSelfSupInputs', meta_keys=['img_path']) + dict(type='PackInputs') ] train_dataloader = dict(batch_size=64, dataset=dict(pipeline=train_pipeline)) diff --git a/configs/mocov3/mocov3_vit-small-p16_16xb256-amp-coslr-300e_in1k.py b/configs/mocov3/mocov3_vit-small-p16_16xb256-amp-coslr-300e_in1k.py index 7d180571..aaff259f 100644 --- a/configs/mocov3/mocov3_vit-small-p16_16xb256-amp-coslr-300e_in1k.py +++ b/configs/mocov3/mocov3_vit-small-p16_16xb256-amp-coslr-300e_in1k.py @@ -8,7 +8,9 @@ _base_ = [ # `RandomResizedCrop`, `scale=(0.08, 1.)` in ViT pipeline view_pipeline1 = [ dict( - type='RandomResizedCrop', size=224, scale=(0.08, 1.), + type='RandomResizedCrop', + scale=224, + crop_ratio_range=(0.08, 1.), backend='pillow'), dict( type='RandomApply', @@ -26,13 +28,19 @@ view_pipeline1 = [ prob=0.2, keep_channels=True, channel_weights=(0.114, 0.587, 0.2989)), - dict(type='RandomGaussianBlur', sigma_min=0.1, sigma_max=2.0, prob=1.), + dict( + type='GaussianBlur', + magnitude_range=(0.1, 2.0), + magnitude_std='inf', + prob=1.), dict(type='RandomSolarize', prob=0.), dict(type='RandomFlip', prob=0.5), ] view_pipeline2 = [ dict( - type='RandomResizedCrop', size=224, scale=(0.08, 1.), + type='RandomResizedCrop', + scale=224, + crop_ratio_range=(0.08, 1.), backend='pillow'), dict( type='RandomApply', @@ -50,7 +58,11 @@ view_pipeline2 = [ prob=0.2, keep_channels=True, channel_weights=(0.114, 0.587, 0.2989)), - dict(type='RandomGaussianBlur', sigma_min=0.1, sigma_max=2.0, prob=0.1), + dict( + type='GaussianBlur', + magnitude_range=(0.1, 2.0), + magnitude_std='inf', + prob=0.1), dict(type='RandomSolarize', prob=0.2), dict(type='RandomFlip', prob=0.5), ] @@ -61,7 +73,7 @@ train_pipeline = [ type='MultiView', num_views=[1, 1], transforms=[view_pipeline1, view_pipeline2]), - dict(type='PackSelfSupInputs', meta_keys=['img_path']) + dict(type='PackInputs') ] train_dataloader = dict(batch_size=256, dataset=dict(pipeline=train_pipeline)) diff --git a/configs/regnet/regnetx-400mf_8xb128_in1k.py b/configs/regnet/regnetx-400mf_8xb128_in1k.py index 2181d999..bad16785 100644 --- a/configs/regnet/regnetx-400mf_8xb128_in1k.py +++ b/configs/regnet/regnetx-400mf_8xb128_in1k.py @@ -31,7 +31,7 @@ train_pipeline = [ eigvec=EIGVEC, alphastd=25.5, # because the value range of images is [0,255] to_rgb=False), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] train_dataloader = dict(batch_size=128, dataset=dict(pipeline=train_pipeline)) diff --git a/configs/repmlp/repmlp-base_8xb64_in1k-256px.py b/configs/repmlp/repmlp-base_8xb64_in1k-256px.py index 499adae4..81dc55a2 100644 --- a/configs/repmlp/repmlp-base_8xb64_in1k-256px.py +++ b/configs/repmlp/repmlp-base_8xb64_in1k-256px.py @@ -13,14 +13,14 @@ train_pipeline = [ dict(type='LoadImageFromFile'), dict(type='RandomResizedCrop', scale=256), dict(type='RandomFlip', prob=0.5, direction='horizontal'), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] test_pipeline = [ dict(type='LoadImageFromFile'), dict(type='ResizeEdge', scale=292, edge='short', backend='pillow'), dict(type='CenterCrop', crop_size=256), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) diff --git a/configs/repmlp/repmlp-base_8xb64_in1k.py b/configs/repmlp/repmlp-base_8xb64_in1k.py index 43c8765f..666ce405 100644 --- a/configs/repmlp/repmlp-base_8xb64_in1k.py +++ b/configs/repmlp/repmlp-base_8xb64_in1k.py @@ -11,7 +11,7 @@ test_pipeline = [ # resizing to (256, 256) here, different from resizing shorter edge to 256 dict(type='Resize', scale=(256, 256), backend='pillow'), dict(type='CenterCrop', crop_size=224), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] val_dataloader = dict(dataset=dict(pipeline=test_pipeline)) diff --git a/configs/repvgg/repvgg-B3_8xb32_in1k.py b/configs/repvgg/repvgg-B3_8xb32_in1k.py index 2d5d6e13..e9d52578 100644 --- a/configs/repvgg/repvgg-B3_8xb32_in1k.py +++ b/configs/repvgg/repvgg-B3_8xb32_in1k.py @@ -38,14 +38,14 @@ train_pipeline = [ magnitude_level=7, magnitude_std=0.5, hparams=dict(pad_val=[round(x) for x in bgr_mean])), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] test_pipeline = [ dict(type='LoadImageFromFile'), dict(type='ResizeEdge', scale=256, edge='short', backend='pillow'), dict(type='CenterCrop', crop_size=224), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) diff --git a/configs/resnest/resnest101_32xb64_in1k.py b/configs/resnest/resnest101_32xb64_in1k.py index 4cd38223..ac786591 100644 --- a/configs/resnest/resnest101_32xb64_in1k.py +++ b/configs/resnest/resnest101_32xb64_in1k.py @@ -31,13 +31,13 @@ train_pipeline = [ eigvec=EIGVEC, alphastd=0.1, to_rgb=False), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] test_pipeline = [ dict(type='LoadImageFromFile'), dict(type='EfficientNetCenterCrop', crop_size=256, backend='pillow'), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) diff --git a/configs/resnest/resnest200_64xb32_in1k.py b/configs/resnest/resnest200_64xb32_in1k.py index 27fbf8b3..e3b9fb3d 100644 --- a/configs/resnest/resnest200_64xb32_in1k.py +++ b/configs/resnest/resnest200_64xb32_in1k.py @@ -31,13 +31,13 @@ train_pipeline = [ eigvec=EIGVEC, alphastd=0.1, to_rgb=False), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] test_pipeline = [ dict(type='LoadImageFromFile'), dict(type='EfficientNetCenterCrop', crop_size=320, backend='pillow'), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] # schedule settings diff --git a/configs/resnest/resnest269_64xb32_in1k.py b/configs/resnest/resnest269_64xb32_in1k.py index 956b00a7..0e884d63 100644 --- a/configs/resnest/resnest269_64xb32_in1k.py +++ b/configs/resnest/resnest269_64xb32_in1k.py @@ -31,13 +31,13 @@ train_pipeline = [ eigvec=EIGVEC, alphastd=0.1, to_rgb=False), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] test_pipeline = [ dict(type='LoadImageFromFile'), dict(type='EfficientNetCenterCrop', crop_size=416, backend='pillow'), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) diff --git a/configs/resnest/resnest50_32xb64_in1k.py b/configs/resnest/resnest50_32xb64_in1k.py index eb83c6e9..05f839b3 100644 --- a/configs/resnest/resnest50_32xb64_in1k.py +++ b/configs/resnest/resnest50_32xb64_in1k.py @@ -31,13 +31,13 @@ train_pipeline = [ eigvec=EIGVEC, alphastd=0.1, to_rgb=False), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] test_pipeline = [ dict(type='LoadImageFromFile'), dict(type='EfficientNetCenterCrop', crop_size=256, backend='pillow'), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) diff --git a/configs/simclr/benchmarks/resnet50_8xb512-linear-coslr-90e_in1k.py b/configs/simclr/benchmarks/resnet50_8xb512-linear-coslr-90e_in1k.py index 3196fc5c..973d2b22 100644 --- a/configs/simclr/benchmarks/resnet50_8xb512-linear-coslr-90e_in1k.py +++ b/configs/simclr/benchmarks/resnet50_8xb512-linear-coslr-90e_in1k.py @@ -1,6 +1,6 @@ _base_ = [ '../../_base_/models/resnet50.py', - '../../_base_/datasets/imagenet_bs32_pillow.py', + '../../_base_/datasets/imagenet_bs32_pil_resize.py', '../../_base_/schedules/imagenet_lars_coslr_90e.py', '../../_base_/default_runtime.py', ] diff --git a/configs/simmim/benchmarks/swin-base-w7_8xb256-coslr-100e_in1k.py b/configs/simmim/benchmarks/swin-base-w7_8xb256-coslr-100e_in1k.py index 77095b60..f7713750 100644 --- a/configs/simmim/benchmarks/swin-base-w7_8xb256-coslr-100e_in1k.py +++ b/configs/simmim/benchmarks/swin-base-w7_8xb256-coslr-100e_in1k.py @@ -29,7 +29,7 @@ train_pipeline = [ max_area_ratio=0.3333333333333333, fill_color=[103.53, 116.28, 123.675], fill_std=[57.375, 57.12, 58.395]), - dict(type='PackClsInputs') + dict(type='PackInputs') ] test_pipeline = [ dict(type='LoadImageFromFile'), @@ -40,7 +40,7 @@ test_pipeline = [ backend='pillow', interpolation='bicubic'), dict(type='CenterCrop', crop_size=224), - dict(type='PackClsInputs') + dict(type='PackInputs') ] train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) diff --git a/configs/simmim/benchmarks/swin-large-w14_8xb256-coslr-100e_in1k.py b/configs/simmim/benchmarks/swin-large-w14_8xb256-coslr-100e_in1k.py index 3b2fd408..72873750 100644 --- a/configs/simmim/benchmarks/swin-large-w14_8xb256-coslr-100e_in1k.py +++ b/configs/simmim/benchmarks/swin-large-w14_8xb256-coslr-100e_in1k.py @@ -29,7 +29,7 @@ train_pipeline = [ max_area_ratio=0.3333333333333333, fill_color=[103.53, 116.28, 123.675], fill_std=[57.375, 57.12, 58.395]), - dict(type='PackClsInputs') + dict(type='PackInputs') ] test_pipeline = [ dict(type='LoadImageFromFile'), @@ -40,7 +40,7 @@ test_pipeline = [ backend='pillow', interpolation='bicubic'), dict(type='CenterCrop', crop_size=224), - dict(type='PackClsInputs') + dict(type='PackInputs') ] train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) diff --git a/configs/simsiam/benchmarks/resnet50_8xb512-linear-coslr-90e_in1k.py b/configs/simsiam/benchmarks/resnet50_8xb512-linear-coslr-90e_in1k.py index 3196fc5c..973d2b22 100644 --- a/configs/simsiam/benchmarks/resnet50_8xb512-linear-coslr-90e_in1k.py +++ b/configs/simsiam/benchmarks/resnet50_8xb512-linear-coslr-90e_in1k.py @@ -1,6 +1,6 @@ _base_ = [ '../../_base_/models/resnet50.py', - '../../_base_/datasets/imagenet_bs32_pillow.py', + '../../_base_/datasets/imagenet_bs32_pil_resize.py', '../../_base_/schedules/imagenet_lars_coslr_90e.py', '../../_base_/default_runtime.py', ] diff --git a/configs/swav/benchmarks/resnet50_8xb512-linear-coslr-90e_in1k.py b/configs/swav/benchmarks/resnet50_8xb512-linear-coslr-90e_in1k.py index 3196fc5c..973d2b22 100644 --- a/configs/swav/benchmarks/resnet50_8xb512-linear-coslr-90e_in1k.py +++ b/configs/swav/benchmarks/resnet50_8xb512-linear-coslr-90e_in1k.py @@ -1,6 +1,6 @@ _base_ = [ '../../_base_/models/resnet50.py', - '../../_base_/datasets/imagenet_bs32_pillow.py', + '../../_base_/datasets/imagenet_bs32_pil_resize.py', '../../_base_/schedules/imagenet_lars_coslr_90e.py', '../../_base_/default_runtime.py', ] diff --git a/configs/swav/swav_resnet50_8xb32-mcrop-coslr-200e_in1k-224px-96px.py b/configs/swav/swav_resnet50_8xb32-mcrop-coslr-200e_in1k-224px-96px.py index 317dc2ff..690968b2 100644 --- a/configs/swav/swav_resnet50_8xb32-mcrop-coslr-200e_in1k-224px-96px.py +++ b/configs/swav/swav_resnet50_8xb32-mcrop-coslr-200e_in1k-224px-96px.py @@ -16,7 +16,9 @@ num_crops = [2, 6] color_distort_strength = 1.0 view_pipeline1 = [ dict( - type='RandomResizedCrop', size=224, scale=(0.14, 1.), + type='RandomResizedCrop', + scale=224, + crop_ratio_range=(0.14, 1.), backend='pillow'), dict( type='RandomApply', @@ -34,14 +36,18 @@ view_pipeline1 = [ prob=0.2, keep_channels=True, channel_weights=(0.114, 0.587, 0.2989)), - dict(type='RandomGaussianBlur', sigma_min=0.1, sigma_max=2.0, prob=0.5), + dict( + type='GaussianBlur', + magnitude_range=(0.1, 2.0), + magnitude_std='inf', + prob=0.5), dict(type='RandomFlip', prob=0.5), ] view_pipeline2 = [ dict( type='RandomResizedCrop', - size=96, - scale=(0.05, 0.14), + scale=96, + crop_ratio_range=(0.05, 0.14), backend='pillow'), dict( type='RandomApply', @@ -59,7 +65,11 @@ view_pipeline2 = [ prob=0.2, keep_channels=True, channel_weights=(0.114, 0.587, 0.2989)), - dict(type='RandomGaussianBlur', sigma_min=0.1, sigma_max=2.0, prob=0.5), + dict( + type='GaussianBlur', + magnitude_range=(0.1, 2.0), + magnitude_std='inf', + prob=0.5), dict(type='RandomFlip', prob=0.5), ] train_pipeline = [ @@ -68,7 +78,7 @@ train_pipeline = [ type='MultiView', num_views=num_crops, transforms=[view_pipeline1, view_pipeline2]), - dict(type='PackSelfSupInputs', meta_keys=['img_path']) + dict(type='PackInputs') ] batch_size = 32 diff --git a/configs/tinyvit/tinyvit-21m-distill_8xb256_in1k-384px.py b/configs/tinyvit/tinyvit-21m-distill_8xb256_in1k-384px.py index 6c487812..44e51b19 100644 --- a/configs/tinyvit/tinyvit-21m-distill_8xb256_in1k-384px.py +++ b/configs/tinyvit/tinyvit-21m-distill_8xb256_in1k-384px.py @@ -21,7 +21,7 @@ test_pipeline = [ scale=(384, 384), backend='pillow', interpolation='bicubic'), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] val_dataloader = dict(dataset=dict(pipeline=test_pipeline)) diff --git a/configs/tinyvit/tinyvit-21m-distill_8xb256_in1k-512px.py b/configs/tinyvit/tinyvit-21m-distill_8xb256_in1k-512px.py index 4746e320..05b47c6d 100644 --- a/configs/tinyvit/tinyvit-21m-distill_8xb256_in1k-512px.py +++ b/configs/tinyvit/tinyvit-21m-distill_8xb256_in1k-512px.py @@ -20,7 +20,7 @@ test_pipeline = [ scale=(512, 512), backend='pillow', interpolation='bicubic'), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] val_dataloader = dict(batch_size=16, dataset=dict(pipeline=test_pipeline)) diff --git a/configs/tnt/tnt-s-p16_16xb64_in1k.py b/configs/tnt/tnt-s-p16_16xb64_in1k.py index b882b1e5..af71232f 100644 --- a/configs/tnt/tnt-s-p16_16xb64_in1k.py +++ b/configs/tnt/tnt-s-p16_16xb64_in1k.py @@ -22,7 +22,7 @@ test_pipeline = [ backend='pillow', interpolation='bicubic'), dict(type='CenterCrop', crop_size=224), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] train_dataloader = dict(batch_size=64) diff --git a/configs/van/van-base_8xb128_in1k.py b/configs/van/van-base_8xb128_in1k.py index 688bfd76..47082b74 100644 --- a/configs/van/van-base_8xb128_in1k.py +++ b/configs/van/van-base_8xb128_in1k.py @@ -42,7 +42,7 @@ train_pipeline = [ max_area_ratio=1 / 3, fill_color=bgr_mean, fill_std=bgr_std), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] test_pipeline = [ @@ -54,7 +54,7 @@ test_pipeline = [ backend='pillow', interpolation='bicubic'), dict(type='CenterCrop', crop_size=224), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] train_dataloader = dict(dataset=dict(pipeline=train_pipeline), batch_size=128) diff --git a/configs/van/van-large_8xb128_in1k.py b/configs/van/van-large_8xb128_in1k.py index 38e7419a..b1656772 100644 --- a/configs/van/van-large_8xb128_in1k.py +++ b/configs/van/van-large_8xb128_in1k.py @@ -42,7 +42,7 @@ train_pipeline = [ max_area_ratio=1 / 3, fill_color=bgr_mean, fill_std=bgr_std), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] test_pipeline = [ @@ -54,7 +54,7 @@ test_pipeline = [ backend='pillow', interpolation='bicubic'), dict(type='CenterCrop', crop_size=224), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] train_dataloader = dict(dataset=dict(pipeline=train_pipeline), batch_size=128) diff --git a/configs/van/van-small_8xb128_in1k.py b/configs/van/van-small_8xb128_in1k.py index 5f7498a2..bbbbbdf4 100644 --- a/configs/van/van-small_8xb128_in1k.py +++ b/configs/van/van-small_8xb128_in1k.py @@ -42,7 +42,7 @@ train_pipeline = [ max_area_ratio=1 / 3, fill_color=bgr_mean, fill_std=bgr_std), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] test_pipeline = [ @@ -54,7 +54,7 @@ test_pipeline = [ backend='pillow', interpolation='bicubic'), dict(type='CenterCrop', crop_size=224), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] train_dataloader = dict(dataset=dict(pipeline=train_pipeline), batch_size=128) diff --git a/configs/van/van-tiny_8xb128_in1k.py b/configs/van/van-tiny_8xb128_in1k.py index e2899470..2ac62dab 100644 --- a/configs/van/van-tiny_8xb128_in1k.py +++ b/configs/van/van-tiny_8xb128_in1k.py @@ -42,7 +42,7 @@ train_pipeline = [ max_area_ratio=1 / 3, fill_color=bgr_mean, fill_std=bgr_std), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] test_pipeline = [ @@ -54,7 +54,7 @@ test_pipeline = [ backend='pillow', interpolation='bicubic'), dict(type='CenterCrop', crop_size=224), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] train_dataloader = dict(dataset=dict(pipeline=train_pipeline), batch_size=128) diff --git a/configs/vig/pvig-base_8xb128_in1k.py b/configs/vig/pvig-base_8xb128_in1k.py index 7d61e8fa..1d66359c 100644 --- a/configs/vig/pvig-base_8xb128_in1k.py +++ b/configs/vig/pvig-base_8xb128_in1k.py @@ -15,7 +15,7 @@ test_pipeline = [ backend='pillow', interpolation='bicubic'), dict(type='CenterCrop', crop_size=224), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] val_dataloader = dict(dataset=dict(pipeline=test_pipeline)) diff --git a/configs/vision_transformer/vit-base-p16_64xb64_in1k-384px.py b/configs/vision_transformer/vit-base-p16_64xb64_in1k-384px.py index 5c80879b..e0f74587 100644 --- a/configs/vision_transformer/vit-base-p16_64xb64_in1k-384px.py +++ b/configs/vision_transformer/vit-base-p16_64xb64_in1k-384px.py @@ -20,14 +20,14 @@ train_pipeline = [ dict(type='LoadImageFromFile'), dict(type='RandomResizedCrop', scale=384, backend='pillow'), dict(type='RandomFlip', prob=0.5, direction='horizontal'), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] test_pipeline = [ dict(type='LoadImageFromFile'), dict(type='ResizeEdge', scale=384, edge='short', backend='pillow'), dict(type='CenterCrop', crop_size=384), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) diff --git a/configs/vision_transformer/vit-base-p32_64xb64_in1k-384px.py b/configs/vision_transformer/vit-base-p32_64xb64_in1k-384px.py index ae69ce85..e5a4d14f 100644 --- a/configs/vision_transformer/vit-base-p32_64xb64_in1k-384px.py +++ b/configs/vision_transformer/vit-base-p32_64xb64_in1k-384px.py @@ -20,14 +20,14 @@ train_pipeline = [ dict(type='LoadImageFromFile'), dict(type='RandomResizedCrop', scale=384, backend='pillow'), dict(type='RandomFlip', prob=0.5, direction='horizontal'), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] test_pipeline = [ dict(type='LoadImageFromFile'), dict(type='ResizeEdge', scale=384, edge='short', backend='pillow'), dict(type='CenterCrop', crop_size=384), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) diff --git a/configs/vision_transformer/vit-large-p16_64xb64_in1k-384px.py b/configs/vision_transformer/vit-large-p16_64xb64_in1k-384px.py index ece6dd95..98e96ec6 100644 --- a/configs/vision_transformer/vit-large-p16_64xb64_in1k-384px.py +++ b/configs/vision_transformer/vit-large-p16_64xb64_in1k-384px.py @@ -20,14 +20,14 @@ train_pipeline = [ dict(type='LoadImageFromFile'), dict(type='RandomResizedCrop', scale=384, backend='pillow'), dict(type='RandomFlip', prob=0.5, direction='horizontal'), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] test_pipeline = [ dict(type='LoadImageFromFile'), dict(type='ResizeEdge', scale=384, edge='short', backend='pillow'), dict(type='CenterCrop', crop_size=384), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) diff --git a/configs/vision_transformer/vit-large-p32_64xb64_in1k-384px.py b/configs/vision_transformer/vit-large-p32_64xb64_in1k-384px.py index 3ace6e0b..22320d11 100644 --- a/configs/vision_transformer/vit-large-p32_64xb64_in1k-384px.py +++ b/configs/vision_transformer/vit-large-p32_64xb64_in1k-384px.py @@ -20,14 +20,14 @@ train_pipeline = [ dict(type='LoadImageFromFile'), dict(type='RandomResizedCrop', scale=384, backend='pillow'), dict(type='RandomFlip', prob=0.5, direction='horizontal'), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] test_pipeline = [ dict(type='LoadImageFromFile'), dict(type='ResizeEdge', scale=384, edge='short', backend='pillow'), dict(type='CenterCrop', crop_size=384), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) diff --git a/docs/en/advanced_guides/pipeline.md b/docs/en/advanced_guides/pipeline.md index 1ab7cbe8..427a3068 100644 --- a/docs/en/advanced_guides/pipeline.md +++ b/docs/en/advanced_guides/pipeline.md @@ -21,7 +21,7 @@ train_pipeline = [ dict(type='LoadImageFromFile'), dict(type='RandomResizedCrop', scale=224), dict(type='RandomFlip', prob=0.5, direction='horizontal'), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] ``` @@ -115,14 +115,14 @@ to accelerate. To configure image normalization and mixup/cutmix, please use the The formatting is to collect training data from the data information dict and convert these data to model-friendly format. -In most cases, you can simply use [`PackClsInputs`](mmpretrain.datasets.transforms.PackClsInputs), and it will +In most cases, you can simply use [`PackInputs`](mmpretrain.datasets.transforms.PackInputs), and it will convert the image in NumPy array format to PyTorch tensor, and pack the ground truth categories information and other meta information as a [`DataSample`](mmpretrain.structures.DataSample). ```python train_pipeline = [ ... - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] ``` diff --git a/docs/en/api/data_process.rst b/docs/en/api/data_process.rst index f6421a91..b1fb32dd 100644 --- a/docs/en/api/data_process.rst +++ b/docs/en/api/data_process.rst @@ -32,7 +32,7 @@ for example: dict(type='LoadImageFromFile'), dict(type='RandomResizedCrop', scale=224), dict(type='RandomFlip', prob=0.5, direction='horizontal'), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] train_dataloader = dict( @@ -50,6 +50,21 @@ Every item of a pipeline list is one of the following data transforms class. And :local: :backlinks: top +Loading and Formatting +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. autosummary:: + :toctree: generated + :nosignatures: + :template: data_transform.rst + + LoadImageFromFile + PackInputs + ToNumpy + ToPIL + Transpose + Collect + Processing and Augmentation ^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -59,13 +74,19 @@ Processing and Augmentation :template: data_transform.rst Albumentations + CenterCrop ColorJitter EfficientNetCenterCrop EfficientNetRandomCrop Lighting + Normalize RandomCrop RandomErasing + RandomFlip + RandomGrayscale + RandomResize RandomResizedCrop + Resize ResizeEdge Composed Augmentation @@ -81,27 +102,8 @@ augmentation transforms, such as ``AutoAugment`` and ``RandAugment``. AutoAugment RandAugment -To specify the augmentation combination (The ``policies`` argument), you can use string to specify -from some preset policies. - -.. list-table:: - :widths: 20 20 60 - :header-rows: 1 - - * - Preset policy - - Use for - - Description - * - "imagenet" - - :class:`AutoAugment` - - Policy for ImageNet, come from `DeepVoltaire/AutoAugment`_ - * - "timm_increasing" - - :class:`RandAugment` - - The ``_RAND_INCREASING_TRANSFORMS`` policy from `timm`_ - -.. _DeepVoltaire/AutoAugment: https://github.com/DeepVoltaire/AutoAugment -.. _timm: https://github.com/rwightman/pytorch-image-models - -And you can also configure a group of policies manually by selecting from the below table. +The above transforms is composed from a group of policies from the below random +transforms: .. autosummary:: :toctree: generated @@ -114,6 +116,7 @@ And you can also configure a group of policies manually by selecting from the be Contrast Cutout Equalize + GaussianBlur Invert Posterize Rotate @@ -124,45 +127,20 @@ And you can also configure a group of policies manually by selecting from the be Translate BaseAugTransform -Formatting -^^^^^^^^^^ +MMCV transforms +^^^^^^^^^^^^^^^ + +We also provides many transforms in MMCV. You can use them directly in the config files. Here are some frequently used transforms, and the whole transforms list can be found in :external+mmcv:doc:`api/transforms`. + +Transform Wrapper +^^^^^^^^^^^^^^^^^ .. autosummary:: :toctree: generated :nosignatures: :template: data_transform.rst - Collect - PackClsInputs - ToNumpy - ToPIL - Transpose - - -MMCV transforms -^^^^^^^^^^^^^^^ - -We also provides many transforms in MMCV. You can use them directly in the config files. Here are some frequently used transforms, and the whole transforms list can be found in :external+mmcv:doc:`api/transforms`. - -.. list-table:: - :widths: 50 50 - - * - :external:class:`~mmcv.transforms.LoadImageFromFile` - - Load an image from file. - * - :external:class:`~mmcv.transforms.Resize` - - Resize images & bbox & seg & keypoints. - * - :external:class:`~mmcv.transforms.RandomResize` - - Random resize images & bbox & keypoints. - * - :external:class:`~mmcv.transforms.RandomFlip` - - Flip the image & bbox & keypoints & segmentation map. - * - :external:class:`~mmcv.transforms.RandomGrayscale` - - Randomly convert image to grayscale with a probability. - * - :external:class:`~mmcv.transforms.CenterCrop` - - Crop the center of the image, segmentation masks, bounding boxes and key points. If the crop area exceeds the original image and ``auto_pad`` is True, the original image will be padded before cropping. - * - :external:class:`~mmcv.transforms.Normalize` - - Normalize the image. - * - :external:class:`~mmcv.transforms.Compose` - - Compose multiple transforms sequentially. + MultiView .. module:: mmpretrain.models.utils.data_preprocessor diff --git a/docs/en/migration.md b/docs/en/migration.md index 2dbf2b6e..ba27e262 100644 --- a/docs/en/migration.md +++ b/docs/en/migration.md @@ -121,7 +121,7 @@ test_dataloader = val_dataloader Changes in **`pipeline`**: -- The original formatting transforms **`ToTensor`**, **`ImageToTensor`** and **`Collect`** are combined as [`PackClsInputs`](mmpretrain.datasets.transforms.PackClsInputs). +- The original formatting transforms **`ToTensor`**, **`ImageToTensor`** and **`Collect`** are combined as [`PackInputs`](mmpretrain.datasets.transforms.PackInputs). - We don't recommend to do **`Normalize`** in the dataset pipeline. Please remove it from pipelines and set it in the `data_preprocessor` field. - The argument `flip_prob` in [**`RandomFlip`**](mmcv.transforms.RandomFlip) is renamed to `flip`. - The argument `size` in [**`RandomCrop`**](mmpretrain.datasets.transforms.RandomCrop) is renamed to `crop_size`. @@ -168,7 +168,7 @@ train_pipeline = [ dict(type='LoadImageFromFile'), dict(type='RandomResizedCrop', scale=224), dict(type='RandomFlip', prob=0.5, direction='horizontal'), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] ``` diff --git a/docs/en/user_guides/config.md b/docs/en/user_guides/config.md index 31fb8cb2..e0f3f8ba 100644 --- a/docs/en/user_guides/config.md +++ b/docs/en/user_guides/config.md @@ -123,14 +123,14 @@ train_pipeline = [ dict(type='LoadImageFromFile'), # read image dict(type='RandomResizedCrop', scale=224), # Random scaling and cropping dict(type='RandomFlip', prob=0.5, direction='horizontal'), # random horizontal flip - dict(type='PackClsInputs'), # prepare images and labels + dict(type='PackInputs'), # prepare images and labels ] test_pipeline = [ dict(type='LoadImageFromFile'), # read image dict(type='ResizeEdge', scale=256, edge='short'), # Scale the short side to 256 dict(type='CenterCrop', crop_size=224), # center crop - dict(type='PackClsInputs'), # prepare images and labels + dict(type='PackInputs'), # prepare images and labels ] # Construct training set dataloader @@ -331,14 +331,14 @@ train_pipeline = [ magnitude_level=6, magnitude_std=0.5, hparams=dict(pad_val=[round(x) for x in bgr_mean], interpolation='bicubic')), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] test_pipeline = [ dict(type='LoadImageFromFile'), dict(type='ResizeEdge', scale=236, edge='short', backend='pillow', interpolation='bicubic'), dict(type='CenterCrop', crop_size=224), - dict(type='PackClsInputs') + dict(type='PackInputs') ] train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) @@ -387,7 +387,7 @@ train_pipeline = [ eigvec=EIGVEC, alphastd=0.1, to_rgb=False), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) diff --git a/docs/en/user_guides/finetune.md b/docs/en/user_guides/finetune.md index be799455..c4427747 100644 --- a/docs/en/user_guides/finetune.md +++ b/docs/en/user_guides/finetune.md @@ -117,11 +117,11 @@ train_pipeline = [ dict(type='RandomCrop', crop_size=32, padding=4), dict(type='RandomFlip', prob=0.5, direction='horizontal'), dict(type='Resize', scale=224), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] test_pipeline = [ dict(type='Resize', scale=224), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] # dataloader settings train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) @@ -177,11 +177,11 @@ train_pipeline = [ dict(type='RandomCrop', crop_size=32, padding=4), dict(type='RandomFlip', prob=0.5, direction='horizontal'), dict(type='Resize', scale=224), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] test_pipeline = [ dict(type='Resize', scale=224), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] # dataloader settings train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) diff --git a/docs/zh_CN/migration.md b/docs/zh_CN/migration.md index ac27cde5..b0ac7901 100644 --- a/docs/zh_CN/migration.md +++ b/docs/zh_CN/migration.md @@ -115,7 +115,7 @@ test_dataloader = val_dataloader **`pipeline`** 字段的变化: -- 原先的 **`ToTensor`**、**`ImageToTensor`** 和 **`Collect`** 被合并为 [`PackClsInputs`](mmpretrain.datasets.transforms.PackClsInputs) +- 原先的 **`ToTensor`**、**`ImageToTensor`** 和 **`Collect`** 被合并为 [`PackInputs`](mmpretrain.datasets.transforms.PackInputs) - 我们建议去除数据集流水线中的 **`Normalize`** 变换,转而使用 `data_preprocessor` 字段进行归一化预处理。 - [**`RandomFlip`**](mmcv.transforms.RandomFlip) 中的 `flip_prob` 参数被重命名为 `flip` - [**`RandomCrop`**](mmpretrain.datasets.transforms.RandomCrop) 中的 `size` 参数被重命名为 `crop_size` @@ -161,7 +161,7 @@ train_pipeline = [ dict(type='LoadImageFromFile'), dict(type='RandomResizedCrop', scale=224), dict(type='RandomFlip', prob=0.5, direction='horizontal'), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] ``` diff --git a/docs/zh_CN/user_guides/config.md b/docs/zh_CN/user_guides/config.md index 96260046..8aacb4eb 100644 --- a/docs/zh_CN/user_guides/config.md +++ b/docs/zh_CN/user_guides/config.md @@ -119,14 +119,14 @@ train_pipeline = [ dict(type='LoadImageFromFile'), # 读取图像 dict(type='RandomResizedCrop', scale=224), # 随机放缩裁剪 dict(type='RandomFlip', prob=0.5, direction='horizontal'), # 随机水平翻转 - dict(type='PackClsInputs'), # 准备图像以及标签 + dict(type='PackInputs'), # 准备图像以及标签 ] test_pipeline = [ dict(type='LoadImageFromFile'), # 读取图像 dict(type='ResizeEdge', scale=256, edge='short'), # 短边对其256进行放缩 dict(type='CenterCrop', crop_size=224), # 中心裁剪 - dict(type='PackClsInputs'), # 准备图像以及标签 + dict(type='PackInputs'), # 准备图像以及标签 ] # 构造训练集 dataloader @@ -320,14 +320,14 @@ train_pipeline = [ magnitude_level=6, magnitude_std=0.5, hparams=dict(pad_val=[round(x) for x in bgr_mean], interpolation='bicubic')), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] test_pipeline = [ dict(type='LoadImageFromFile'), dict(type='ResizeEdge', scale=236, edge='short', backend='pillow', interpolation='bicubic'), dict(type='CenterCrop', crop_size=224), - dict(type='PackClsInputs') + dict(type='PackInputs') ] train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) @@ -376,7 +376,7 @@ train_pipeline = [ eigvec=EIGVEC, alphastd=0.1, to_rgb=False), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) diff --git a/docs/zh_CN/user_guides/finetune.md b/docs/zh_CN/user_guides/finetune.md index 0d28f691..4a30f8d8 100644 --- a/docs/zh_CN/user_guides/finetune.md +++ b/docs/zh_CN/user_guides/finetune.md @@ -105,11 +105,11 @@ train_pipeline = [ dict(type='RandomCrop', crop_size=32, padding=4), dict(type='RandomFlip', prob=0.5, direction='horizontal'), dict(type='Resize', scale=224), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] test_pipeline = [ dict(type='Resize', scale=224), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] # 数据加载器设置 train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) @@ -164,11 +164,11 @@ train_pipeline = [ dict(type='RandomCrop', crop_size=32, padding=4), dict(type='RandomFlip', prob=0.5, direction='horizontal'), dict(type='Resize', scale=224), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] test_pipeline = [ dict(type='Resize', scale=224), - dict(type='PackClsInputs'), + dict(type='PackInputs'), ] # 数据加载器设置 train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) diff --git a/mmpretrain/datasets/transforms/__init__.py b/mmpretrain/datasets/transforms/__init__.py index 1fa905a5..cc2df47e 100644 --- a/mmpretrain/datasets/transforms/__init__.py +++ b/mmpretrain/datasets/transforms/__init__.py @@ -1,13 +1,24 @@ # Copyright (c) OpenMMLab. All rights reserved. +from mmcv.transforms import (CenterCrop, LoadImageFromFile, Normalize, + RandomFlip, RandomGrayscale, RandomResize, Resize) + +from mmpretrain.registry import TRANSFORMS from .auto_augment import (AutoAugment, AutoContrast, BaseAugTransform, Brightness, ColorTransform, Contrast, Cutout, - Equalize, Invert, Posterize, RandAugment, Rotate, - Sharpness, Shear, Solarize, SolarizeAdd, Translate) -from .formatting import (Collect, PackClsInputs, PackMultiTaskInputs, ToNumpy, + Equalize, GaussianBlur, Invert, Posterize, + RandAugment, Rotate, Sharpness, Shear, Solarize, + SolarizeAdd, Translate) +from .formatting import (Collect, PackInputs, PackMultiTaskInputs, ToNumpy, ToPIL, Transpose) -from .processing import (Albumentations, ColorJitter, EfficientNetCenterCrop, - EfficientNetRandomCrop, Lighting, RandomCrop, - RandomErasing, RandomResizedCrop, ResizeEdge) +from .processing import (Albumentations, BEiTMaskGenerator, ColorJitter, + EfficientNetCenterCrop, EfficientNetRandomCrop, + Lighting, RandomCrop, RandomErasing, + RandomResizedCrop, ResizeEdge, SimMIMMaskGenerator) +from .wrappers import MultiView + +for t in (CenterCrop, LoadImageFromFile, Normalize, RandomFlip, + RandomGrayscale, RandomResize, Resize): + TRANSFORMS.register_module(module=t) __all__ = [ 'ToPIL', 'ToNumpy', 'Transpose', 'Collect', 'RandomCrop', @@ -15,7 +26,9 @@ __all__ = [ 'ColorTransform', 'Solarize', 'Posterize', 'AutoContrast', 'Equalize', 'Contrast', 'Brightness', 'Sharpness', 'AutoAugment', 'SolarizeAdd', 'Cutout', 'RandAugment', 'Lighting', 'ColorJitter', 'RandomErasing', - 'PackClsInputs', 'Albumentations', 'EfficientNetRandomCrop', + 'PackInputs', 'Albumentations', 'EfficientNetRandomCrop', 'EfficientNetCenterCrop', 'ResizeEdge', 'BaseAugTransform', - 'PackMultiTaskInputs' + 'PackMultiTaskInputs', 'GaussianBlur', 'BEiTMaskGenerator', + 'SimMIMMaskGenerator', 'CenterCrop', 'LoadImageFromFile', 'Normalize', + 'RandomFlip', 'RandomGrayscale', 'RandomResize', 'Resize', 'MultiView' ] diff --git a/mmpretrain/datasets/transforms/auto_augment.py b/mmpretrain/datasets/transforms/auto_augment.py index 9b9987b9..1d169ed5 100644 --- a/mmpretrain/datasets/transforms/auto_augment.py +++ b/mmpretrain/datasets/transforms/auto_augment.py @@ -10,6 +10,7 @@ import numpy as np from mmcv.transforms import BaseTransform, Compose, RandomChoice from mmcv.transforms.utils import cache_randomness from mmengine.utils import is_list_of, is_seq_of +from PIL import Image, ImageFilter from mmpretrain.registry import TRANSFORMS @@ -53,6 +54,13 @@ class AutoAugment(RandomChoice): hparams (dict): Configs of hyperparameters. Hyperparameters will be used in policies that require these arguments if these arguments are not set in policy dicts. Defaults to ``dict(pad_val=128)``. + + .. admonition:: Available preset policies + + - ``"imagenet"``: Policy for ImageNet, come from + `DeepVoltaire/AutoAugment`_ + + .. _DeepVoltaire/AutoAugment: https://github.com/DeepVoltaire/AutoAugment """ def __init__(self, @@ -119,6 +127,13 @@ class RandAugment(BaseTransform): used in policies that require these arguments if these arguments are not set in policy dicts. Defaults to ``dict(pad_val=128)``. + .. admonition:: Available preset policies + + - ``"timm_increasing"``: The ``_RAND_INCREASING_TRANSFORMS`` policy + from `timm`_ + + .. _timm: https://github.com/rwightman/pytorch-image-models + Examples: To use "timm-increasing" policies collection, select two policies every @@ -1119,6 +1134,53 @@ class Cutout(BaseAugTransform): return repr_str +@TRANSFORMS.register_module() +class GaussianBlur(BaseAugTransform): + """Gaussian blur images. + + Args: + radius (int, float, optional): The blur radius. If None, generate from + ``magnitude_range``, see :class:`BaseAugTransform`. + Defaults to None. + prob (float): The probability for posterizing therefore should be in + range [0, 1]. Defaults to 0.5. + **kwargs: Other keyword arguments of :class:`BaseAugTransform`. + """ + + def __init__(self, + radius: Union[int, float, None] = None, + prob: float = 0.5, + **kwargs): + super().__init__(prob=prob, random_negative_prob=0., **kwargs) + assert (radius is None) ^ (self.magnitude_range is None), \ + 'Please specify only one of `radius` and `magnitude_range`.' + + self.radius = radius + + def transform(self, results): + """Apply transform to results.""" + if self.random_disable(): + return results + + if self.radius is not None: + radius = self.radius + else: + radius = self.random_magnitude() + + img = results['img'] + pil_img = Image.fromarray(img) + pil_img.filter(ImageFilter.GaussianBlur(radius=radius)) + results['img'] = np.array(pil_img, dtype=img.dtype) + + return results + + def __repr__(self): + repr_str = self.__class__.__name__ + repr_str += f'(radius={self.radius}, ' + repr_str += f'prob={self.prob}{self.extra_repr()})' + return repr_str + + # yapf: disable # flake8: noqa AUTOAUG_POLICIES = { diff --git a/mmpretrain/datasets/transforms/formatting.py b/mmpretrain/datasets/transforms/formatting.py index d3883304..4ba0aa17 100644 --- a/mmpretrain/datasets/transforms/formatting.py +++ b/mmpretrain/datasets/transforms/formatting.py @@ -1,10 +1,10 @@ # Copyright (c) OpenMMLab. All rights reserved. from collections import defaultdict from collections.abc import Sequence -from functools import partial import numpy as np import torch +import torchvision.transforms.functional as F from mmcv.transforms import BaseTransform from mmengine.utils import is_str from PIL import Image @@ -37,18 +37,18 @@ def to_tensor(data): @TRANSFORMS.register_module() -class PackClsInputs(BaseTransform): - """Pack the inputs data for the classification. +class PackInputs(BaseTransform): + """Pack the inputs data. **Required Keys:** - - img - - gt_label (optional) - - ``*meta_keys`` (optional) + - ``input_key`` + - ``*algorithm_keys`` + - ``*meta_keys`` **Deleted Keys:** - All keys in the dict. + All other keys in the dict. **Added Keys:** @@ -57,50 +57,110 @@ class PackClsInputs(BaseTransform): annotation info of the sample. Args: - meta_keys (Sequence[str]): The meta keys to be saved in the - ``metainfo`` of the packed ``data_samples``. - Defaults to a tuple includes keys: + input_key (str): The key of element to feed into the model forwarding. + Defaults to 'img'. + algorithm_keys (Sequence[str]): The keys of custom elements to be used + in the algorithm. Defaults to an empty tuple. + meta_keys (Sequence[str]): The keys of meta information to be saved in + the data sample. Defaults to :attr:`PackInputs.DEFAULT_META_KEYS`. - - ``sample_idx``: The id of the image sample. - - ``img_path``: The path to the image file. - - ``ori_shape``: The original shape of the image as a tuple (H, W). - - ``img_shape``: The shape of the image after the pipeline as a - tuple (H, W). - - ``scale_factor``: The scale factor between the resized image and - the original image. - - ``flip``: A boolean indicating if image flip transform was used. - - ``flip_direction``: The flipping direction. + .. admonition:: Default algorithm keys + + Besides the specified ``algorithm_keys``, we will set some default keys + into the output data sample and do some formatting. Therefore, you + don't need to set these keys in the ``algorithm_keys``. + + - ``gt_label``: The ground-truth label. The value will be converted + into a 1-D tensor. + - ``gt_score``: The ground-truth score. The value will be converted + into a 1-D tensor. + - ``mask``: The mask for some self-supervise tasks. The value will + be converted into a tensor. + + .. admonition:: Default meta keys + + - ``sample_idx``: The id of the image sample. + - ``img_path``: The path to the image file. + - ``ori_shape``: The original shape of the image as a tuple (H, W). + - ``img_shape``: The shape of the image after the pipeline as a + tuple (H, W). + - ``scale_factor``: The scale factor between the resized image and + the original image. + - ``flip``: A boolean indicating if image flip transform was used. + - ``flip_direction``: The flipping direction. """ + DEFAULT_META_KEYS = ('sample_idx', 'img_path', 'ori_shape', 'img_shape', + 'scale_factor', 'flip', 'flip_direction') + def __init__(self, - meta_keys=('sample_idx', 'img_path', 'ori_shape', 'img_shape', - 'scale_factor', 'flip', 'flip_direction')): + input_key='img', + algorithm_keys=(), + meta_keys=DEFAULT_META_KEYS): + self.input_key = input_key + self.algorithm_keys = algorithm_keys self.meta_keys = meta_keys + @staticmethod + def format_input(input_): + if isinstance(input_, list): + return [PackInputs._format_input(item) for item in input_] + elif isinstance(input_, np.ndarray): + if input_.ndim == 2: # For grayscale image. + input_ = np.expand_dims(input_, -1) + if input_.ndim == 3 and not input_.flags.c_contiguous: + input_ = np.ascontiguousarray(input_.transpose(2, 0, 1)) + input_ = to_tensor(input_) + elif input_.ndim == 3: + # convert to tensor first to accelerate, see + # https://github.com/open-mmlab/mmdetection/pull/9533 + input_ = to_tensor(input_).permute(2, 0, 1).contiguous() + else: + # convert input with other shape to tensor without permute, + # like video input (num_crops, C, T, H, W). + input_ = to_tensor(input_) + elif isinstance(input_, Image.Image): + input_ = F.pil_to_tensor(input_) + elif not isinstance(input_, torch.Tensor): + raise TypeError(f'Unsupported input type {type(input_)}.') + + return input_ + def transform(self, results: dict) -> dict: """Method to pack the input data.""" packed_results = dict() - if 'img' in results: - img = results['img'] - if len(img.shape) < 3: - img = np.expand_dims(img, -1) - img = np.ascontiguousarray(img.transpose(2, 0, 1)) - packed_results['inputs'] = to_tensor(img) + if self.input_key in results: + input_ = results[self.input_key] + packed_results['inputs'] = self.format_input(input_) data_sample = DataSample() + + # Set default keys if 'gt_label' in results: data_sample.set_gt_label(results['gt_label']) if 'gt_score' in results: data_sample.set_gt_score(results['gt_score']) + if 'mask' in results: + data_sample.set_mask(results['mask']) + + # Set custom algorithm keys + for key in self.algorithm_keys: + if key in results: + data_sample.set_field(results[key], key) + + # Set meta keys + for key in self.meta_keys: + if key in results: + data_sample.set_field(results[key], key, field_type='metainfo') - img_meta = {k: results[k] for k in self.meta_keys if k in results} - data_sample.set_metainfo(img_meta) packed_results['data_samples'] = data_sample return packed_results def __repr__(self) -> str: repr_str = self.__class__.__name__ - repr_str += f'(meta_keys={self.meta_keys})' + repr_str += f"(input_key='{self.input_key}', " + repr_str += f'algorithm_keys={self.algorithm_keys}, ' + repr_str += f'meta_keys={self.meta_keys})' return repr_str @@ -109,34 +169,20 @@ class PackMultiTaskInputs(BaseTransform): """Convert all image labels of multi-task dataset to a dict of tensor. Args: - tasks (List[str]): The task names defined in the dataset. - meta_keys(Sequence[str]): The meta keys to be saved in the - ``metainfo`` of the packed ``data_samples``. - Defaults to a tuple includes keys: - - - ``sample_idx``: The id of the image sample. - - ``img_path``: The path to the image file. - - ``ori_shape``: The original shape of the image as a tuple (H, W). - - ``img_shape``: The shape of the image after the pipeline as a - tuple (H, W). - - ``scale_factor``: The scale factor between the resized image and - the original image. - - ``flip``: A boolean indicating if image flip transform was used. - - ``flip_direction``: The flipping direction. + multi_task_fields (Sequence[str]): + input_key (str): + task_handlers (dict): """ def __init__(self, - task_handlers=dict(), - multi_task_fields=('gt_label', ), - meta_keys=('sample_idx', 'img_path', 'ori_shape', 'img_shape', - 'scale_factor', 'flip', 'flip_direction')): + multi_task_fields, + input_key='img', + task_handlers=dict()): self.multi_task_fields = multi_task_fields - self.meta_keys = meta_keys - self.task_handlers = defaultdict( - partial(PackClsInputs, meta_keys=meta_keys)) + self.input_key = input_key + self.task_handlers = defaultdict(PackInputs) for task_name, task_handler in task_handlers.items(): - self.task_handlers[task_name] = TRANSFORMS.build( - dict(type=task_handler, meta_keys=meta_keys)) + self.task_handlers[task_name] = TRANSFORMS.build(task_handler) def transform(self, results: dict) -> dict: """Method to pack the input data. @@ -147,12 +193,9 @@ class PackMultiTaskInputs(BaseTransform): packed_results = dict() results = results.copy() - if 'img' in results: - img = results.pop('img') - if len(img.shape) < 3: - img = np.expand_dims(img, -1) - img = np.ascontiguousarray(img.transpose(2, 0, 1)) - packed_results['inputs'] = to_tensor(img) + if self.input_key in results: + input_ = results[self.input_key] + packed_results['inputs'] = PackInputs.format_input(input_) task_results = defaultdict(dict) for field in self.multi_task_fields: @@ -172,13 +215,12 @@ class PackMultiTaskInputs(BaseTransform): def __repr__(self): repr = self.__class__.__name__ - task_handlers = { - name: handler.__class__.__name__ - for name, handler in self.task_handlers.items() - } - repr += f'(task_handlers={task_handlers}, ' - repr += f'multi_task_fields={self.multi_task_fields}, ' - repr += f'meta_keys={self.meta_keys})' + task_handlers = ', '.join( + f"'{name}': {handler.__class__.__name__}" + for name, handler in self.task_handlers.items()) + repr += f'(multi_task_fields={self.multi_task_fields}, ' + repr += f"input_key='{self.input_key}', " + repr += f'task_handlers={{{task_handlers}}})' return repr diff --git a/mmpretrain/datasets/transforms/processing.py b/mmpretrain/datasets/transforms/processing.py index c66a1339..9d52d9ad 100644 --- a/mmpretrain/datasets/transforms/processing.py +++ b/mmpretrain/datasets/transforms/processing.py @@ -834,17 +834,20 @@ class ColorJitter(BaseTransform): hue_factor is chosen uniformly from ``[-hue, hue]`` (0 <= hue <= 0.5) or the given ``[min, max]`` (-0.5 <= min <= max <= 0.5). Defaults to 0. + backend (str): The backend to operate the image. Defaults to 'pillow' """ def __init__(self, brightness: Union[float, Sequence[float]] = 0., contrast: Union[float, Sequence[float]] = 0., saturation: Union[float, Sequence[float]] = 0., - hue: Union[float, Sequence[float]] = 0.): + hue: Union[float, Sequence[float]] = 0., + backend='pillow'): self.brightness = self._set_range(brightness, 'brightness') self.contrast = self._set_range(contrast, 'contrast') self.saturation = self._set_range(saturation, 'saturation') self.hue = self._set_range(hue, 'hue', center=0, bound=(-0.5, 0.5)) + self.backend = backend def _set_range(self, value, name, center=1, bound=(0, float('inf'))): """Set the range of magnitudes.""" @@ -906,13 +909,15 @@ class ColorJitter(BaseTransform): for index in trans_inds: if index == 0 and brightness is not None: - img = mmcv.adjust_brightness(img, brightness) + img = mmcv.adjust_brightness( + img, brightness, backend=self.backend) elif index == 1 and contrast is not None: - img = mmcv.adjust_contrast(img, contrast) + img = mmcv.adjust_contrast(img, contrast, backend=self.backend) elif index == 2 and saturation is not None: - img = mmcv.adjust_color(img, alpha=saturation) + img = mmcv.adjust_color( + img, alpha=saturation, backend=self.backend) elif index == 3 and hue is not None: - img = mmcv.adjust_hue(img, hue) + img = mmcv.adjust_hue(img, hue, backend=self.backend) results['img'] = img return results @@ -1192,3 +1197,339 @@ class Albumentations(BaseTransform): repr_str = self.__class__.__name__ repr_str += f'(transforms={repr(self.transforms)})' return repr_str + + +@TRANSFORMS.register_module() +class SimMIMMaskGenerator(BaseTransform): + """Generate random block mask for each Image. + + **Added Keys**: + + - mask + + This module is used in SimMIM to generate masks. + + Args: + input_size (int): Size of input image. Defaults to 192. + mask_patch_size (int): Size of each block mask. Defaults to 32. + model_patch_size (int): Patch size of each token. Defaults to 4. + mask_ratio (float): The mask ratio of image. Defaults to 0.6. + """ + + def __init__(self, + input_size: int = 192, + mask_patch_size: int = 32, + model_patch_size: int = 4, + mask_ratio: float = 0.6): + self.input_size = input_size + self.mask_patch_size = mask_patch_size + self.model_patch_size = model_patch_size + self.mask_ratio = mask_ratio + + assert self.input_size % self.mask_patch_size == 0 + assert self.mask_patch_size % self.model_patch_size == 0 + + self.rand_size = self.input_size // self.mask_patch_size + self.scale = self.mask_patch_size // self.model_patch_size + + self.token_count = self.rand_size**2 + self.mask_count = int(np.ceil(self.token_count * self.mask_ratio)) + + def transform(self, results: dict) -> dict: + """Method to generate random block mask for each Image in SimMIM. + + Args: + results (dict): Result dict from previous pipeline. + + Returns: + dict: Result dict with added key ``mask``. + """ + mask_idx = np.random.permutation(self.token_count)[:self.mask_count] + mask = np.zeros(self.token_count, dtype=int) + mask[mask_idx] = 1 + + mask = mask.reshape((self.rand_size, self.rand_size)) + mask = mask.repeat(self.scale, axis=0).repeat(self.scale, axis=1) + + results.update({'mask': mask}) + + return results + + def __repr__(self) -> str: + repr_str = self.__class__.__name__ + repr_str += f'(input_size={self.input_size}, ' + repr_str += f'mask_patch_size={self.mask_patch_size}, ' + repr_str += f'model_patch_size={self.model_patch_size}, ' + repr_str += f'mask_ratio={self.mask_ratio})' + return repr_str + + +@TRANSFORMS.register_module() +class BEiTMaskGenerator(BaseTransform): + """Generate mask for image. + + **Added Keys**: + + - mask + + This module is borrowed from + https://github.com/microsoft/unilm/tree/master/beit + + Args: + input_size (int): The size of input image. + num_masking_patches (int): The number of patches to be masked. + min_num_patches (int): The minimum number of patches to be masked + in the process of generating mask. Defaults to 4. + max_num_patches (int, optional): The maximum number of patches to be + masked in the process of generating mask. Defaults to None. + min_aspect (float): The minimum aspect ratio of mask blocks. Defaults + to 0.3. + min_aspect (float, optional): The minimum aspect ratio of mask blocks. + Defaults to None. + """ + + def __init__(self, + input_size: int, + num_masking_patches: int, + min_num_patches: int = 4, + max_num_patches: Optional[int] = None, + min_aspect: float = 0.3, + max_aspect: Optional[float] = None) -> None: + if not isinstance(input_size, tuple): + input_size = (input_size, ) * 2 + self.height, self.width = input_size + + self.num_patches = self.height * self.width + + self.num_masking_patches = num_masking_patches + self.min_num_patches = min_num_patches + self.max_num_patches = num_masking_patches if max_num_patches is None \ + else max_num_patches + + max_aspect = max_aspect or 1 / min_aspect + self.log_aspect_ratio = (math.log(min_aspect), math.log(max_aspect)) + + def _mask(self, mask: np.ndarray, max_mask_patches: int) -> int: + """Generate mask recursively. + + Args: + mask (np.ndarray): The mask to be generated. + max_mask_patches (int): The maximum number of patches to be masked. + + Returns: + int: The number of patches masked. + """ + delta = 0 + for _ in range(10): + target_area = np.random.uniform(self.min_num_patches, + max_mask_patches) + aspect_ratio = math.exp(np.random.uniform(*self.log_aspect_ratio)) + h = int(round(math.sqrt(target_area * aspect_ratio))) + w = int(round(math.sqrt(target_area / aspect_ratio))) + if w < self.width and h < self.height: + top = np.random.randint(0, self.height - h) + left = np.random.randint(0, self.width - w) + + num_masked = mask[top:top + h, left:left + w].sum() + # Overlap + if 0 < h * w - num_masked <= max_mask_patches: + for i in range(top, top + h): + for j in range(left, left + w): + if mask[i, j] == 0: + mask[i, j] = 1 + delta += 1 + if delta > 0: + break + return delta + + def transform(self, results: dict) -> dict: + """Method to generate random block mask for each Image in BEiT. + + Args: + results (dict): Result dict from previous pipeline. + + Returns: + dict: Result dict with added key ``mask``. + """ + mask = np.zeros(shape=(self.height, self.width), dtype=int) + + mask_count = 0 + while mask_count != self.num_masking_patches: + max_mask_patches = self.num_masking_patches - mask_count + max_mask_patches = min(max_mask_patches, self.max_num_patches) + + delta = self._mask(mask, max_mask_patches) + mask_count += delta + results.update({'mask': mask}) + + return results + + def __repr__(self) -> str: + repr_str = self.__class__.__name__ + repr_str += f'(height={self.height}, ' + repr_str += f'width={self.width}, ' + repr_str += f'num_patches={self.num_patches}, ' + repr_str += f'num_masking_patches={self.num_masking_patches}, ' + repr_str += f'min_num_patches={self.min_num_patches}, ' + repr_str += f'max_num_patches={self.max_num_patches}, ' + repr_str += f'log_aspect_ratio={self.log_aspect_ratio})' + return repr_str + + +@TRANSFORMS.register_module() +class RandomResizedCropAndInterpolationWithTwoPic(BaseTransform): + """Crop the given PIL Image to random size and aspect ratio with random + interpolation. + + **Required Keys**: + + - img + + **Modified Keys**: + + - img + + **Added Keys**: + + - target_img + + This module is borrowed from + https://github.com/microsoft/unilm/tree/master/beit. + + A crop of random size (default: of 0.08 to 1.0) of the original size and a + random aspect ratio (default: of 3/4 to 4/3) of the original aspect ratio + is made. This crop is finally resized to given size. This is popularly used + to train the Inception networks. This module first crops the image and + resizes the crop to two different sizes. + + Args: + size (Union[tuple, int]): Expected output size of each edge of the + first image. + second_size (Union[tuple, int], optional): Expected output size of each + edge of the second image. + scale (tuple[float, float]): Range of size of the origin size cropped. + Defaults to (0.08, 1.0). + ratio (tuple[float, float]): Range of aspect ratio of the origin aspect + ratio cropped. Defaults to (3./4., 4./3.). + interpolation (str): The interpolation for the first image. Defaults + to ``bilinear``. + second_interpolation (str): The interpolation for the second image. + Defaults to ``lanczos``. + """ + + def __init__(self, + size: Union[tuple, int], + second_size=None, + scale=(0.08, 1.0), + ratio=(3. / 4., 4. / 3.), + interpolation='bilinear', + second_interpolation='lanczos') -> None: + if isinstance(size, tuple): + self.size = size + else: + self.size = (size, size) + if second_size is not None: + if isinstance(second_size, tuple): + self.second_size = second_size + else: + self.second_size = (second_size, second_size) + else: + self.second_size = None + if (scale[0] > scale[1]) or (ratio[0] > ratio[1]): + ('range should be of kind (min, max)') + + if interpolation == 'random': + self.interpolation = ('bilinear', 'bicubic') + else: + self.interpolation = interpolation + self.second_interpolation = second_interpolation + self.scale = scale + self.ratio = ratio + + @staticmethod + def get_params(img: np.ndarray, scale: tuple, + ratio: tuple) -> Sequence[int]: + """Get parameters for ``crop`` for a random sized crop. + + Args: + img (np.ndarray): Image to be cropped. + scale (tuple): range of size of the origin size cropped + ratio (tuple): range of aspect ratio of the origin aspect + ratio cropped + + Returns: + tuple: params (i, j, h, w) to be passed to ``crop`` for a random + sized crop. + """ + img_h, img_w = img.shape[:2] + area = img_h * img_w + + for _ in range(10): + target_area = np.random.uniform(*scale) * area + log_ratio = (math.log(ratio[0]), math.log(ratio[1])) + aspect_ratio = math.exp(np.random.uniform(*log_ratio)) + + w = int(round(math.sqrt(target_area * aspect_ratio))) + h = int(round(math.sqrt(target_area / aspect_ratio))) + + if w <= img_w and h <= img_h: + i = np.random.randint(0, img_h - h) + j = np.random.randint(0, img_w - w) + return i, j, h, w + + # Fallback to central crop + in_ratio = img_w / img_h + if in_ratio < min(ratio): + w = img_w + h = int(round(w / min(ratio))) + elif in_ratio > max(ratio): + h = img_h + w = int(round(h * max(ratio))) + else: # whole image + w = img_w + h = img_h + i = (img_h - h) // 2 + j = (img_w - w) // 2 + return i, j, h, w + + def transform(self, results: dict) -> dict: + """Crop the given image and resize it to two different sizes. + + This module crops the given image randomly and resize the crop to two + different sizes. This is popularly used in BEiT-style masked image + modeling, where an off-the-shelf model is used to provide the target. + + Args: + results (dict): Results from previous pipeline. + + Returns: + dict: Results after applying this transformation. + """ + img = results['img'] + i, j, h, w = self.get_params(img, self.scale, self.ratio) + if isinstance(self.interpolation, (tuple, list)): + interpolation = np.random.choice(self.interpolation) + else: + interpolation = self.interpolation + if self.second_size is None: + img = img[i:i + h, j:j + w] + img = mmcv.imresize(img, self.size, interpolation=interpolation) + results.update({'img': img}) + else: + img = img[i:i + h, j:j + w] + img_sample = mmcv.imresize( + img, self.size, interpolation=interpolation) + img_target = mmcv.imresize( + img, self.second_size, interpolation=self.second_interpolation) + results.update({'img': [img_sample, img_target]}) + return results + + def __repr__(self) -> str: + repr_str = self.__class__.__name__ + repr_str += f'(size={self.size}, ' + repr_str += f'second_size={self.second_size}, ' + repr_str += f'interpolation={self.interpolation}, ' + repr_str += f'second_interpolation={self.second_interpolation}, ' + repr_str += f'scale={self.scale}, ' + repr_str += f'ratio={self.ratio})' + return repr_str diff --git a/mmpretrain/datasets/transforms/wrappers.py b/mmpretrain/datasets/transforms/wrappers.py new file mode 100644 index 00000000..93bc31d1 --- /dev/null +++ b/mmpretrain/datasets/transforms/wrappers.py @@ -0,0 +1,97 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import copy +from typing import Callable, List, Union + +from mmcv.transforms import BaseTransform, Compose + +from mmpretrain.registry import TRANSFORMS + +# Define type of transform or transform config +Transform = Union[dict, Callable[[dict], dict]] + + +@TRANSFORMS.register_module() +class MultiView(BaseTransform): + """A transform wrapper for multiple views of an image. + + Args: + transforms (list[dict | callable], optional): Sequence of transform + object or config dict to be wrapped. + mapping (dict): A dict that defines the input key mapping. + The keys corresponds to the inner key (i.e., kwargs of the + ``transform`` method), and should be string type. The values + corresponds to the outer keys (i.e., the keys of the + data/results), and should have a type of string, list or dict. + None means not applying input mapping. Default: None. + allow_nonexist_keys (bool): If False, the outer keys in the mapping + must exist in the input data, or an exception will be raised. + Default: False. + + Examples: + >>> # Example 1: MultiViews 1 pipeline with 2 views + >>> pipeline = [ + >>> dict(type='MultiView', + >>> num_views=2, + >>> transforms=[ + >>> [ + >>> dict(type='Resize', scale=224))], + >>> ]) + >>> ] + >>> # Example 2: MultiViews 2 pipelines, the first with 2 views, + >>> # the second with 6 views + >>> pipeline = [ + >>> dict(type='MultiView', + >>> num_views=[2, 6], + >>> transforms=[ + >>> [ + >>> dict(type='Resize', scale=224)], + >>> [ + >>> dict(type='Resize', scale=224), + >>> dict(type='RandomSolarize')], + >>> ]) + >>> ] + """ + + def __init__(self, transforms: List[List[Transform]], + num_views: Union[int, List[int]]) -> None: + + if isinstance(num_views, int): + num_views = [num_views] + assert isinstance(num_views, List) + assert len(num_views) == len(transforms) + self.num_views = num_views + + self.pipelines = [] + for trans in transforms: + pipeline = Compose(trans) + self.pipelines.append(pipeline) + + self.transforms = [] + for i in range(len(num_views)): + self.transforms.extend([self.pipelines[i]] * num_views[i]) + + def transform(self, results: dict) -> dict: + """Apply transformation to inputs. + + Args: + results (dict): Result dict from previous pipelines. + + Returns: + dict: Transformed results. + """ + multi_views_outputs = dict(img=[]) + for trans in self.transforms: + inputs = copy.deepcopy(results) + outputs = trans(inputs) + + multi_views_outputs['img'].append(outputs['img']) + results.update(multi_views_outputs) + return results + + def __repr__(self) -> str: + repr_str = self.__class__.__name__ + '(' + for i, p in enumerate(self.pipelines): + repr_str += f'\nPipeline {i + 1} with {self.num_views[i]} views:\n' + repr_str += str(p) + repr_str += ')' + return repr_str diff --git a/mmpretrain/structures/__init__.py b/mmpretrain/structures/__init__.py index 16aaa20b..e7de8630 100644 --- a/mmpretrain/structures/__init__.py +++ b/mmpretrain/structures/__init__.py @@ -1,10 +1,10 @@ # Copyright (c) OpenMMLab. All rights reserved. from .data_sample import DataSample from .multi_task_data_sample import MultiTaskDataSample -from .utils import (batch_label_to_onehot, cat_batch_labels, label_to_onehot, - tensor_split) +from .utils import (batch_label_to_onehot, cat_batch_labels, format_label, + format_score, label_to_onehot, tensor_split) __all__ = [ 'DataSample', 'batch_label_to_onehot', 'cat_batch_labels', 'tensor_split', - 'MultiTaskDataSample', 'label_to_onehot' + 'MultiTaskDataSample', 'label_to_onehot', 'format_label', 'format_score' ] diff --git a/tests/test_datasets/test_transforms/test_auto_augment.py b/tests/test_datasets/test_transforms/test_auto_augment.py index 1720b728..fa076101 100644 --- a/tests/test_datasets/test_transforms/test_auto_augment.py +++ b/tests/test_datasets/test_transforms/test_auto_augment.py @@ -1270,3 +1270,60 @@ class TestCutout(TestCase): transform = TRANSFORMS.build(cfg) self.assertIn('Cutout(shape=None', repr(transform)) self.assertIn('magnitude_range=(0, 41)', repr(transform)) + + +class TestGaussianBlur(TestCase): + DEFAULT_ARGS = dict(type='GaussianBlur') + + def test_initialize(self): + with self.assertRaisesRegex(AssertionError, 'only one of'): + TRANSFORMS.build(self.DEFAULT_ARGS) + + with self.assertRaisesRegex(AssertionError, 'only one of'): + cfg = {**self.DEFAULT_ARGS, 'radius': 1, 'magnitude_range': (1, 2)} + TRANSFORMS.build(cfg) + + def test_transform(self): + transform_func = 'PIL.ImageFilter.GaussianBlur' + + # test params inputs + with patch(transform_func, autospec=True) as mock: + cfg = { + **self.DEFAULT_ARGS, + 'radius': 0.5, + 'prob': 1., + } + TRANSFORMS.build(cfg)(construct_toy_data()) + mock.assert_called_once_with(radius=0.5) + + # test prob + with patch(transform_func, autospec=True) as mock: + cfg = { + **self.DEFAULT_ARGS, + 'radius': 0.5, + 'prob': 0., + } + TRANSFORMS.build(cfg)(construct_toy_data()) + mock.assert_not_called() + + # test magnitude_range + with patch(transform_func, autospec=True) as mock: + cfg = { + **self.DEFAULT_ARGS, + 'magnitude_range': (0.1, 2), + 'magnitude_std': 'inf', + 'prob': 1., + } + TRANSFORMS.build(cfg)(construct_toy_data()) + self.assertTrue(0.1 < mock.call_args[1]['radius'] < 2) + + def test_repr(self): + cfg = {**self.DEFAULT_ARGS, 'radius': 0.1} + transform = TRANSFORMS.build(cfg) + self.assertIn('GaussianBlur(radius=0.1, prob=0.5', repr(transform)) + self.assertNotIn('magnitude_range', repr(transform)) + + cfg = {**self.DEFAULT_ARGS, 'magnitude_range': (0.1, 2)} + transform = TRANSFORMS.build(cfg) + self.assertIn('GaussianBlur(radius=None, prob=0.5', repr(transform)) + self.assertIn('magnitude_range=(0.1, 2)', repr(transform)) diff --git a/tests/test_datasets/test_transforms/test_formatting.py b/tests/test_datasets/test_transforms/test_formatting.py index 4e570b10..3fe255af 100644 --- a/tests/test_datasets/test_transforms/test_formatting.py +++ b/tests/test_datasets/test_transforms/test_formatting.py @@ -12,7 +12,7 @@ from mmpretrain.registry import TRANSFORMS from mmpretrain.structures import DataSample, MultiTaskDataSample -class TestPackClsInputs(unittest.TestCase): +class TestPackInputs(unittest.TestCase): def test_transform(self): img_path = osp.join(osp.dirname(__file__), '../../data/color.jpg') @@ -25,9 +25,10 @@ class TestPackClsInputs(unittest.TestCase): 'flip': False, 'img': mmcv.imread(img_path), 'gt_label': 2, + 'custom_key': torch.tensor([1, 2, 3]) } - cfg = dict(type='PackClsInputs') + cfg = dict(type='PackInputs', algorithm_keys=['custom_key']) transform = TRANSFORMS.build(cfg) results = transform(copy.deepcopy(data)) self.assertIn('inputs', results) @@ -36,6 +37,7 @@ class TestPackClsInputs(unittest.TestCase): self.assertIsInstance(results['data_samples'], DataSample) self.assertIn('flip', results['data_samples'].metainfo_keys()) self.assertIsInstance(results['data_samples'].gt_label, torch.Tensor) + self.assertIsInstance(results['data_samples'].custom_key, torch.Tensor) # Test grayscale image data['img'] = data['img'].mean(-1) @@ -44,6 +46,21 @@ class TestPackClsInputs(unittest.TestCase): self.assertIsInstance(results['inputs'], torch.Tensor) self.assertEqual(results['inputs'].shape, (1, 300, 400)) + # Test video input + data['img'] = np.random.randint( + 0, 256, (10, 3, 1, 224, 224), dtype=np.uint8) + results = transform(copy.deepcopy(data)) + self.assertIn('inputs', results) + self.assertIsInstance(results['inputs'], torch.Tensor) + self.assertEqual(results['inputs'].shape, (10, 3, 1, 224, 224)) + + # Test Pillow input + data['img'] = Image.open(img_path) + results = transform(copy.deepcopy(data)) + self.assertIn('inputs', results) + self.assertIsInstance(results['inputs'], torch.Tensor) + self.assertEqual(results['inputs'].shape, (3, 300, 400)) + # Test without `img` and `gt_label` del data['img'] del data['gt_label'] @@ -51,10 +68,11 @@ class TestPackClsInputs(unittest.TestCase): self.assertNotIn('gt_label', results['data_samples']) def test_repr(self): - cfg = dict(type='PackClsInputs', meta_keys=['flip', 'img_shape']) + cfg = dict(type='PackInputs', meta_keys=['flip', 'img_shape']) transform = TRANSFORMS.build(cfg) self.assertEqual( - repr(transform), "PackClsInputs(meta_keys=['flip', 'img_shape'])") + repr(transform), "PackInputs(input_key='img', algorithm_keys=(), " + "meta_keys=['flip', 'img_shape'])") class TestTranspose(unittest.TestCase): @@ -145,7 +163,7 @@ class TestPackMultiTaskInputs(unittest.TestCase): }, } - cfg = dict(type='PackMultiTaskInputs', ) + cfg = dict(type='PackMultiTaskInputs', multi_task_fields=['gt_label']) transform = TRANSFORMS.build(cfg) results = transform(copy.deepcopy(data)) self.assertIn('inputs', results) @@ -170,9 +188,13 @@ class TestPackMultiTaskInputs(unittest.TestCase): self.assertNotIn('gt_label', results['data_samples']) def test_repr(self): - cfg = dict(type='PackMultiTaskInputs', meta_keys=['img_shape']) + cfg = dict( + type='PackMultiTaskInputs', + multi_task_fields=['gt_label'], + task_handlers=dict(task1=dict(type='PackInputs')), + ) transform = TRANSFORMS.build(cfg) - rep = 'PackMultiTaskInputs(task_handlers={},' - rep += ' multi_task_fields=(\'gt_label\',),' - rep += ' meta_keys=[\'img_shape\'])' - self.assertEqual(repr(transform), rep) + self.assertEqual( + repr(transform), + "PackMultiTaskInputs(multi_task_fields=['gt_label'], " + "input_key='img', task_handlers={'task1': PackInputs})") diff --git a/tests/test_datasets/test_transforms/test_processing.py b/tests/test_datasets/test_transforms/test_processing.py index 878ff5aa..dc00c808 100644 --- a/tests/test_datasets/test_transforms/test_processing.py +++ b/tests/test_datasets/test_transforms/test_processing.py @@ -1,5 +1,6 @@ # Copyright (c) OpenMMLab. All rights reserved. import copy +import math import random from unittest import TestCase from unittest.mock import ANY, call, patch @@ -562,9 +563,9 @@ class TestColorJitter(TestCase): with patch('numpy.random', np.random.RandomState(0)): mmcv_module = 'mmpretrain.datasets.transforms.processing.mmcv' call_list = [ - call.adjust_color(ANY, alpha=ANY), - call.adjust_hue(ANY, ANY), - call.adjust_brightness(ANY, ANY) + call.adjust_color(ANY, alpha=ANY, backend='pillow'), + call.adjust_hue(ANY, ANY, backend='pillow'), + call.adjust_brightness(ANY, ANY, backend='pillow'), ] with patch(mmcv_module, autospec=True) as mock: transform(results) @@ -800,3 +801,55 @@ class TestAlbumentations(TestCase): self.assertEqual( repr(transform), "Albumentations(transforms=[{'type': " "'ChannelShuffle', 'p': 1}])") + + +class TestSimMIMMaskGenerator(TestCase): + DEFAULT_ARGS = dict( + type='SimMIMMaskGenerator', + input_size=192, + mask_patch_size=32, + model_patch_size=4, + mask_ratio=0.6) + + def test_transform(self): + img = np.random.randint(0, 256, (3, 192, 192), np.uint8) + results = {'img': img} + module = TRANSFORMS.build(self.DEFAULT_ARGS) + + results = module(results) + + self.assertTupleEqual(results['img'].shape, (3, 192, 192)) + self.assertTupleEqual(results['mask'].shape, (48, 48)) + + def test_repr(self): + cfg = copy.deepcopy(self.DEFAULT_ARGS) + transform = TRANSFORMS.build(cfg) + self.assertEqual( + repr(transform), 'SimMIMMaskGenerator(input_size=192, ' + 'mask_patch_size=32, model_patch_size=4, mask_ratio=0.6)') + + +class TestBEiTMaskGenerator(TestCase): + DEFAULT_ARGS = dict( + type='BEiTMaskGenerator', + input_size=(14, 14), + num_masking_patches=75, + max_num_patches=None, + min_num_patches=16) + + def test_transform(self): + module = TRANSFORMS.build(self.DEFAULT_ARGS) + + results = module({}) + + self.assertTupleEqual(results['mask'].shape, (14, 14)) + + def test_repr(self): + cfg = copy.deepcopy(self.DEFAULT_ARGS) + transform = TRANSFORMS.build(cfg) + + log_aspect_ratio = (math.log(0.3), math.log(1 / 0.3)) + self.assertEqual( + repr(transform), 'BEiTMaskGenerator(height=14, width=14, ' + 'num_patches=196, num_masking_patches=75, min_num_patches=16, ' + f'max_num_patches=75, log_aspect_ratio={log_aspect_ratio})') diff --git a/tests/test_datasets/test_transforms/test_wrappers.py b/tests/test_datasets/test_transforms/test_wrappers.py new file mode 100644 index 00000000..fc487ede --- /dev/null +++ b/tests/test_datasets/test_transforms/test_wrappers.py @@ -0,0 +1,43 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import numpy as np +from mmcv.transforms import Resize + +from mmpretrain.datasets import GaussianBlur, MultiView, Solarize + + +def test_multi_view(): + original_img = np.ones((4, 4, 3), dtype=np.uint8) + + # test 1 pipeline with 2 views + pipeline1 = [ + Resize(2), + GaussianBlur(magnitude_range=(0.1, 2), magnitude_std='inf') + ] + + transform = MultiView([pipeline1], 2) + results = dict(img=original_img) + results = transform(results) + assert len(results['img']) == 2 + assert results['img'][0].shape == (2, 2, 3) + + transform = MultiView([pipeline1], [2]) + results = dict(img=original_img) + results = transform(results) + assert len(results['img']) == 2 + assert results['img'][0].shape == (2, 2, 3) + + # test 2 pipeline with 3 views + pipeline2 = [ + Solarize(thr=128), + GaussianBlur(magnitude_range=(0.1, 2), magnitude_std='inf') + ] + transform = MultiView([pipeline1, pipeline2], [1, 2]) + + results = dict(img=original_img) + results = transform(results) + assert len(results['img']) == 3 + assert results['img'][0].shape == (2, 2, 3) + assert results['img'][1].shape == (4, 4, 3) + + # test repr + assert isinstance(str(transform), str) diff --git a/tests/test_models/test_retrievers.py b/tests/test_models/test_retrievers.py index 5fb45a2f..0e7d9dc0 100644 --- a/tests/test_models/test_retrievers.py +++ b/tests/test_models/test_retrievers.py @@ -11,7 +11,7 @@ from mmengine import ConfigDict from mmengine.dataset.utils import default_collate from torch.utils.data import DataLoader, Dataset -from mmpretrain.datasets.transforms import PackClsInputs +from mmpretrain.datasets.transforms import PackInputs from mmpretrain.registry import MODELS from mmpretrain.structures import DataSample @@ -20,7 +20,7 @@ class ExampleDataset(Dataset): def __init__(self): self.metainfo = None - self.pipe = PackClsInputs() + self.pipe = PackInputs() def __getitem__(self, idx): results = dict(