From 7d8aa8f782a05e491476ca96bdd35ef030b8e12d Mon Sep 17 00:00:00 2001 From: zeyuanyin Date: Wed, 26 Jul 2023 00:33:55 +0400 Subject: [PATCH] finish 4 vit_large*.py --- .../configs/_base_/models/vit_base_p16.py | 4 +- .../configs/_base_/models/vit_base_p32.py | 5 +- .../configs/_base_/models/vit_large_p16.py | 29 +++++++++++ .../configs/_base_/models/vit_large_p32.py | 29 +++++++++++ .../vit_base_p32_64xb64_in1k_384px.py | 1 - .../vit_large_p16_64xb64_in1k.py | 20 ++++++++ .../vit_large_p16_64xb64_in1k_384px.py | 48 +++++++++++++++++++ .../vit_large_p32_64xb64_in1k.py | 19 ++++++++ .../vit_large_p32_64xb64_in1k_384px.py | 45 +++++++++++++++++ 9 files changed, 194 insertions(+), 6 deletions(-) create mode 100644 mmpretrain/configs/_base_/models/vit_large_p16.py create mode 100644 mmpretrain/configs/_base_/models/vit_large_p32.py create mode 100644 mmpretrain/configs/vision_transformer/vit_large_p16_64xb64_in1k.py create mode 100644 mmpretrain/configs/vision_transformer/vit_large_p16_64xb64_in1k_384px.py create mode 100644 mmpretrain/configs/vision_transformer/vit_large_p32_64xb64_in1k.py create mode 100644 mmpretrain/configs/vision_transformer/vit_large_p32_64xb64_in1k_384px.py diff --git a/mmpretrain/configs/_base_/models/vit_base_p16.py b/mmpretrain/configs/_base_/models/vit_base_p16.py index 0a529d7b..0cbb9f7b 100644 --- a/mmpretrain/configs/_base_/models/vit_base_p16.py +++ b/mmpretrain/configs/_base_/models/vit_base_p16.py @@ -1,6 +1,6 @@ # Copyright (c) OpenMMLab. All rights reserved. # This is a BETA new format config file, and the usage may change recently. -from mmpretrain.models import (VisionTransformer, ImageClassifier, VisionTransformerClsHead) +from mmpretrain.models import (VisionTransformer, ImageClassifier, VisionTransformerClsHead, CrossEntropyLoss) from mmengine.model.weight_init import KaimingInit @@ -25,6 +25,6 @@ model = dict( type=VisionTransformerClsHead, num_classes=1000, in_channels=1024, - loss=dict(type='CrossEntropyLoss', loss_weight=1.0), + loss=dict(type=CrossEntropyLoss, loss_weight=1.0), topk=(1, 5), )) \ No newline at end of file diff --git a/mmpretrain/configs/_base_/models/vit_base_p32.py b/mmpretrain/configs/_base_/models/vit_base_p32.py index 56a0ce58..b0677bd0 100644 --- a/mmpretrain/configs/_base_/models/vit_base_p32.py +++ b/mmpretrain/configs/_base_/models/vit_base_p32.py @@ -1,7 +1,6 @@ # Copyright (c) OpenMMLab. All rights reserved. # This is a BETA new format config file, and the usage may change recently. -from mmpretrain.models import (VisionTransformer, ImageClassifier, VisionTransformerClsHead) - +from mmpretrain.models import (VisionTransformer, ImageClassifier, VisionTransformerClsHead, CrossEntropyLoss) from mmengine.model.weight_init import KaimingInit # model settings @@ -25,6 +24,6 @@ model = dict( type=VisionTransformerClsHead, num_classes=1000, in_channels=768, - loss=dict(type='CrossEntropyLoss', loss_weight=1.0), + loss=dict(type=CrossEntropyLoss, loss_weight=1.0), topk=(1, 5), )) \ No newline at end of file diff --git a/mmpretrain/configs/_base_/models/vit_large_p16.py b/mmpretrain/configs/_base_/models/vit_large_p16.py new file mode 100644 index 00000000..b709b6b0 --- /dev/null +++ b/mmpretrain/configs/_base_/models/vit_large_p16.py @@ -0,0 +1,29 @@ +# Copyright (c) OpenMMLab. All rights reserved. +# This is a BETA new format config file, and the usage may change recently. +from mmpretrain.models import (VisionTransformer, ImageClassifier, VisionTransformerClsHead, CrossEntropyLoss) +from mmengine.model.weight_init import KaimingInit + +# model settings +model = dict( + type=ImageClassifier, + backbone=dict( + type=VisionTransformer, + arch='l', + img_size=224, + patch_size=16, + drop_rate=0.1, + init_cfg=[ + dict( + type=KaimingInit, + layer='Conv2d', + mode='fan_in', + nonlinearity='linear') + ]), + neck=None, + head=dict( + type=VisionTransformerClsHead, + num_classes=1000, + in_channels=1024, + loss=dict(type=CrossEntropyLoss, loss_weight=1.0), + topk=(1, 5), + )) \ No newline at end of file diff --git a/mmpretrain/configs/_base_/models/vit_large_p32.py b/mmpretrain/configs/_base_/models/vit_large_p32.py new file mode 100644 index 00000000..04fd3398 --- /dev/null +++ b/mmpretrain/configs/_base_/models/vit_large_p32.py @@ -0,0 +1,29 @@ +# Copyright (c) OpenMMLab. All rights reserved. +# This is a BETA new format config file, and the usage may change recently. +from mmpretrain.models import (VisionTransformer, ImageClassifier, VisionTransformerClsHead, CrossEntropyLoss) +from mmengine.model.weight_init import KaimingInit + +# model settings +model = dict( + type=ImageClassifier, + backbone=dict( + type=VisionTransformer, + arch='l', + img_size=224, + patch_size=32, + drop_rate=0.1, + init_cfg=[ + dict( + type=KaimingInit, + layer='Conv2d', + mode='fan_in', + nonlinearity='linear') + ]), + neck=None, + head=dict( + type=VisionTransformerClsHead, + num_classes=1000, + in_channels=1024, + loss=dict(type=CrossEntropyLoss, loss_weight=1.0), + topk=(1, 5), + )) \ No newline at end of file diff --git a/mmpretrain/configs/vision_transformer/vit_base_p32_64xb64_in1k_384px.py b/mmpretrain/configs/vision_transformer/vit_base_p32_64xb64_in1k_384px.py index be901497..79d2f574 100644 --- a/mmpretrain/configs/vision_transformer/vit_base_p32_64xb64_in1k_384px.py +++ b/mmpretrain/configs/vision_transformer/vit_base_p32_64xb64_in1k_384px.py @@ -14,7 +14,6 @@ with read_base(): from .._base_.schedules.imagenet_bs4096_AdamW import * from .._base_.default_runtime import * - # model setting model = dict(backbone=dict(img_size=384)) diff --git a/mmpretrain/configs/vision_transformer/vit_large_p16_64xb64_in1k.py b/mmpretrain/configs/vision_transformer/vit_large_p16_64xb64_in1k.py new file mode 100644 index 00000000..43de8f11 --- /dev/null +++ b/mmpretrain/configs/vision_transformer/vit_large_p16_64xb64_in1k.py @@ -0,0 +1,20 @@ +# Copyright (c) OpenMMLab. All rights reserved. +# This is a BETA new format config file, and the usage may change recently. +from mmengine.config import read_base +from mmpretrain.models import Mixup + +with read_base(): + from .._base_.models.vit_large_p16 import * + from .._base_.datasets.imagenet_bs64_pil_resize_autoaug import * + from .._base_.schedules.imagenet_bs4096_AdamW import * + from .._base_.default_runtime import * + + +# model setting +model = dict( + head=dict(hidden_dim=3072), + train_cfg=dict(augments=dict(type=Mixup, alpha=0.2)), +) + +# schedule setting +optim_wrapper = dict(clip_grad=dict(max_norm=1.0)) \ No newline at end of file diff --git a/mmpretrain/configs/vision_transformer/vit_large_p16_64xb64_in1k_384px.py b/mmpretrain/configs/vision_transformer/vit_large_p16_64xb64_in1k_384px.py new file mode 100644 index 00000000..380ba350 --- /dev/null +++ b/mmpretrain/configs/vision_transformer/vit_large_p16_64xb64_in1k_384px.py @@ -0,0 +1,48 @@ +# Copyright (c) OpenMMLab. All rights reserved. +# This is a BETA new format config file, and the usage may change recently. +from mmengine.config import read_base + +from mmcv.transforms import (LoadImageFromFile, RandomFlip) + +from mmpretrain.datasets import (CenterCrop, LoadImageFromFile, + PackInputs, RandomFlip, RandomResizedCrop, + ResizeEdge) + +with read_base(): + from .._base_.models.vit_large_p16 import * + from .._base_.datasets.imagenet_bs64_pil_resize import * + from .._base_.schedules.imagenet_bs4096_AdamW import * + from .._base_.default_runtime import * + + +# model setting +model = dict(backbone=dict(img_size=384)) + +# dataset setting +data_preprocessor = dict( + mean=[127.5, 127.5, 127.5], + std=[127.5, 127.5, 127.5], + # convert image from BGR to RGB + to_rgb=True, +) + +train_pipeline = [ + dict(type=LoadImageFromFile), + dict(type=RandomResizedCrop, scale=384, backend='pillow'), + dict(type=RandomFlip, prob=0.5, direction='horizontal'), + dict(type=PackInputs), +] + +test_pipeline = [ + dict(type=LoadImageFromFile), + dict(type=ResizeEdge, scale=384, edge='short', backend='pillow'), + dict(type=CenterCrop, crop_size=384), + dict(type=PackInputs), +] + +train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) +val_dataloader = dict(dataset=dict(pipeline=test_pipeline)) +test_dataloader = dict(dataset=dict(pipeline=test_pipeline)) + +# schedule setting +optim_wrapper = dict(clip_grad=dict(max_norm=1.0)) \ No newline at end of file diff --git a/mmpretrain/configs/vision_transformer/vit_large_p32_64xb64_in1k.py b/mmpretrain/configs/vision_transformer/vit_large_p32_64xb64_in1k.py new file mode 100644 index 00000000..b3318102 --- /dev/null +++ b/mmpretrain/configs/vision_transformer/vit_large_p32_64xb64_in1k.py @@ -0,0 +1,19 @@ +# Copyright (c) OpenMMLab. All rights reserved. +# This is a BETA new format config file, and the usage may change recently. +from mmengine.config import read_base +from mmpretrain.models import Mixup + +with read_base(): + from .._base_.models.vit_large_p32 import * + from .._base_.datasets.imagenet_bs64_pil_resize_autoaug import * + from .._base_.schedules.imagenet_bs4096_AdamW import * + from .._base_.default_runtime import * + +# model setting +model = dict( + head=dict(hidden_dim=3072), + train_cfg=dict(augments=dict(type=Mixup, alpha=0.2)), +) + +# schedule setting +optim_wrapper = dict(clip_grad=dict(max_norm=1.0)) \ No newline at end of file diff --git a/mmpretrain/configs/vision_transformer/vit_large_p32_64xb64_in1k_384px.py b/mmpretrain/configs/vision_transformer/vit_large_p32_64xb64_in1k_384px.py new file mode 100644 index 00000000..8f472b0b --- /dev/null +++ b/mmpretrain/configs/vision_transformer/vit_large_p32_64xb64_in1k_384px.py @@ -0,0 +1,45 @@ +# Copyright (c) OpenMMLab. All rights reserved. +# This is a BETA new format config file, and the usage may change recently. +from mmengine.config import read_base +from mmcv.transforms import (LoadImageFromFile, RandomFlip) +from mmpretrain.datasets import (CenterCrop, LoadImageFromFile, + PackInputs, RandomFlip, RandomResizedCrop, + ResizeEdge) + +with read_base(): + from .._base_.models.vit_large_p32 import * + from .._base_.datasets.imagenet_bs64_pil_resize import * + from .._base_.schedules.imagenet_bs4096_AdamW import * + from .._base_.default_runtime import * + +# model setting +model = dict(backbone=dict(img_size=384)) + +# dataset setting +data_preprocessor = dict( + mean=[127.5, 127.5, 127.5], + std=[127.5, 127.5, 127.5], + # convert image from BGR to RGB + to_rgb=True, +) + +train_pipeline = [ + dict(type=LoadImageFromFile), + dict(type=RandomResizedCrop, scale=384, backend='pillow'), + dict(type=RandomFlip, prob=0.5, direction='horizontal'), + dict(type=PackInputs), +] + +test_pipeline = [ + dict(type=LoadImageFromFile), + dict(type=ResizeEdge, scale=384, edge='short', backend='pillow'), + dict(type=CenterCrop, crop_size=384), + dict(type=PackInputs), +] + +train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) +val_dataloader = dict(dataset=dict(pipeline=test_pipeline)) +test_dataloader = dict(dataset=dict(pipeline=test_pipeline)) + +# schedule setting +optim_wrapper = dict(clip_grad=dict(max_norm=1.0)) \ No newline at end of file