finish 4 vit_large*.py

pull/1727/head
zeyuanyin 2023-07-26 00:33:55 +04:00
parent b22e3eb031
commit 7d8aa8f782
9 changed files with 194 additions and 6 deletions

View File

@ -1,6 +1,6 @@
# Copyright (c) OpenMMLab. All rights reserved.
# This is a BETA new format config file, and the usage may change recently.
from mmpretrain.models import (VisionTransformer, ImageClassifier, VisionTransformerClsHead)
from mmpretrain.models import (VisionTransformer, ImageClassifier, VisionTransformerClsHead, CrossEntropyLoss)
from mmengine.model.weight_init import KaimingInit
@ -25,6 +25,6 @@ model = dict(
type=VisionTransformerClsHead,
num_classes=1000,
in_channels=1024,
loss=dict(type='CrossEntropyLoss', loss_weight=1.0),
loss=dict(type=CrossEntropyLoss, loss_weight=1.0),
topk=(1, 5),
))

View File

@ -1,7 +1,6 @@
# Copyright (c) OpenMMLab. All rights reserved.
# This is a BETA new format config file, and the usage may change recently.
from mmpretrain.models import (VisionTransformer, ImageClassifier, VisionTransformerClsHead)
from mmpretrain.models import (VisionTransformer, ImageClassifier, VisionTransformerClsHead, CrossEntropyLoss)
from mmengine.model.weight_init import KaimingInit
# model settings
@ -25,6 +24,6 @@ model = dict(
type=VisionTransformerClsHead,
num_classes=1000,
in_channels=768,
loss=dict(type='CrossEntropyLoss', loss_weight=1.0),
loss=dict(type=CrossEntropyLoss, loss_weight=1.0),
topk=(1, 5),
))

View File

@ -0,0 +1,29 @@
# Copyright (c) OpenMMLab. All rights reserved.
# This is a BETA new format config file, and the usage may change recently.
from mmpretrain.models import (VisionTransformer, ImageClassifier, VisionTransformerClsHead, CrossEntropyLoss)
from mmengine.model.weight_init import KaimingInit
# model settings
model = dict(
type=ImageClassifier,
backbone=dict(
type=VisionTransformer,
arch='l',
img_size=224,
patch_size=16,
drop_rate=0.1,
init_cfg=[
dict(
type=KaimingInit,
layer='Conv2d',
mode='fan_in',
nonlinearity='linear')
]),
neck=None,
head=dict(
type=VisionTransformerClsHead,
num_classes=1000,
in_channels=1024,
loss=dict(type=CrossEntropyLoss, loss_weight=1.0),
topk=(1, 5),
))

View File

@ -0,0 +1,29 @@
# Copyright (c) OpenMMLab. All rights reserved.
# This is a BETA new format config file, and the usage may change recently.
from mmpretrain.models import (VisionTransformer, ImageClassifier, VisionTransformerClsHead, CrossEntropyLoss)
from mmengine.model.weight_init import KaimingInit
# model settings
model = dict(
type=ImageClassifier,
backbone=dict(
type=VisionTransformer,
arch='l',
img_size=224,
patch_size=32,
drop_rate=0.1,
init_cfg=[
dict(
type=KaimingInit,
layer='Conv2d',
mode='fan_in',
nonlinearity='linear')
]),
neck=None,
head=dict(
type=VisionTransformerClsHead,
num_classes=1000,
in_channels=1024,
loss=dict(type=CrossEntropyLoss, loss_weight=1.0),
topk=(1, 5),
))

View File

@ -14,7 +14,6 @@ with read_base():
from .._base_.schedules.imagenet_bs4096_AdamW import *
from .._base_.default_runtime import *
# model setting
model = dict(backbone=dict(img_size=384))

View File

@ -0,0 +1,20 @@
# Copyright (c) OpenMMLab. All rights reserved.
# This is a BETA new format config file, and the usage may change recently.
from mmengine.config import read_base
from mmpretrain.models import Mixup
with read_base():
from .._base_.models.vit_large_p16 import *
from .._base_.datasets.imagenet_bs64_pil_resize_autoaug import *
from .._base_.schedules.imagenet_bs4096_AdamW import *
from .._base_.default_runtime import *
# model setting
model = dict(
head=dict(hidden_dim=3072),
train_cfg=dict(augments=dict(type=Mixup, alpha=0.2)),
)
# schedule setting
optim_wrapper = dict(clip_grad=dict(max_norm=1.0))

View File

@ -0,0 +1,48 @@
# Copyright (c) OpenMMLab. All rights reserved.
# This is a BETA new format config file, and the usage may change recently.
from mmengine.config import read_base
from mmcv.transforms import (LoadImageFromFile, RandomFlip)
from mmpretrain.datasets import (CenterCrop, LoadImageFromFile,
PackInputs, RandomFlip, RandomResizedCrop,
ResizeEdge)
with read_base():
from .._base_.models.vit_large_p16 import *
from .._base_.datasets.imagenet_bs64_pil_resize import *
from .._base_.schedules.imagenet_bs4096_AdamW import *
from .._base_.default_runtime import *
# model setting
model = dict(backbone=dict(img_size=384))
# dataset setting
data_preprocessor = dict(
mean=[127.5, 127.5, 127.5],
std=[127.5, 127.5, 127.5],
# convert image from BGR to RGB
to_rgb=True,
)
train_pipeline = [
dict(type=LoadImageFromFile),
dict(type=RandomResizedCrop, scale=384, backend='pillow'),
dict(type=RandomFlip, prob=0.5, direction='horizontal'),
dict(type=PackInputs),
]
test_pipeline = [
dict(type=LoadImageFromFile),
dict(type=ResizeEdge, scale=384, edge='short', backend='pillow'),
dict(type=CenterCrop, crop_size=384),
dict(type=PackInputs),
]
train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
val_dataloader = dict(dataset=dict(pipeline=test_pipeline))
test_dataloader = dict(dataset=dict(pipeline=test_pipeline))
# schedule setting
optim_wrapper = dict(clip_grad=dict(max_norm=1.0))

View File

@ -0,0 +1,19 @@
# Copyright (c) OpenMMLab. All rights reserved.
# This is a BETA new format config file, and the usage may change recently.
from mmengine.config import read_base
from mmpretrain.models import Mixup
with read_base():
from .._base_.models.vit_large_p32 import *
from .._base_.datasets.imagenet_bs64_pil_resize_autoaug import *
from .._base_.schedules.imagenet_bs4096_AdamW import *
from .._base_.default_runtime import *
# model setting
model = dict(
head=dict(hidden_dim=3072),
train_cfg=dict(augments=dict(type=Mixup, alpha=0.2)),
)
# schedule setting
optim_wrapper = dict(clip_grad=dict(max_norm=1.0))

View File

@ -0,0 +1,45 @@
# Copyright (c) OpenMMLab. All rights reserved.
# This is a BETA new format config file, and the usage may change recently.
from mmengine.config import read_base
from mmcv.transforms import (LoadImageFromFile, RandomFlip)
from mmpretrain.datasets import (CenterCrop, LoadImageFromFile,
PackInputs, RandomFlip, RandomResizedCrop,
ResizeEdge)
with read_base():
from .._base_.models.vit_large_p32 import *
from .._base_.datasets.imagenet_bs64_pil_resize import *
from .._base_.schedules.imagenet_bs4096_AdamW import *
from .._base_.default_runtime import *
# model setting
model = dict(backbone=dict(img_size=384))
# dataset setting
data_preprocessor = dict(
mean=[127.5, 127.5, 127.5],
std=[127.5, 127.5, 127.5],
# convert image from BGR to RGB
to_rgb=True,
)
train_pipeline = [
dict(type=LoadImageFromFile),
dict(type=RandomResizedCrop, scale=384, backend='pillow'),
dict(type=RandomFlip, prob=0.5, direction='horizontal'),
dict(type=PackInputs),
]
test_pipeline = [
dict(type=LoadImageFromFile),
dict(type=ResizeEdge, scale=384, edge='short', backend='pillow'),
dict(type=CenterCrop, crop_size=384),
dict(type=PackInputs),
]
train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
val_dataloader = dict(dataset=dict(pipeline=test_pipeline))
test_dataloader = dict(dataset=dict(pipeline=test_pipeline))
# schedule setting
optim_wrapper = dict(clip_grad=dict(max_norm=1.0))