only keep vit_base_p16.py

pull/1727/head
zeyuanyin 2023-07-29 00:28:48 +04:00
parent 474e651180
commit 57c59fecdc
11 changed files with 63 additions and 133 deletions

View File

@ -2,7 +2,7 @@
# This is a BETA new format config file, and the usage may change recently.
from mmengine.model.weight_init import KaimingInit
from mmpretrain.models import (CrossEntropyLoss, ImageClassifier,
from mmpretrain.models import (ImageClassifier, LabelSmoothLoss,
VisionTransformer, VisionTransformerClsHead)
# model settings
@ -10,7 +10,7 @@ model = dict(
type=ImageClassifier,
backbone=dict(
type=VisionTransformer,
arch='l',
arch='b',
img_size=224,
patch_size=16,
drop_rate=0.1,
@ -25,7 +25,7 @@ model = dict(
head=dict(
type=VisionTransformerClsHead,
num_classes=1000,
in_channels=1024,
loss=dict(type=CrossEntropyLoss, loss_weight=1.0),
topk=(1, 5),
in_channels=768,
loss=dict(
type=LabelSmoothLoss, label_smooth_val=0.1, mode='classy_vision'),
))

View File

@ -1,31 +0,0 @@
# Copyright (c) OpenMMLab. All rights reserved.
# This is a BETA new format config file, and the usage may change recently.
from mmengine.model.weight_init import KaimingInit
from mmpretrain.models import (CrossEntropyLoss, ImageClassifier,
VisionTransformer, VisionTransformerClsHead)
# model settings
model = dict(
type=ImageClassifier,
backbone=dict(
type=VisionTransformer,
arch='b',
img_size=224,
patch_size=32,
drop_rate=0.1,
init_cfg=[
dict(
type=KaimingInit,
layer='Conv2d',
mode='fan_in',
nonlinearity='linear')
]),
neck=None,
head=dict(
type=VisionTransformerClsHead,
num_classes=1000,
in_channels=768,
loss=dict(type=CrossEntropyLoss, loss_weight=1.0),
topk=(1, 5),
))

View File

@ -1,31 +0,0 @@
# Copyright (c) OpenMMLab. All rights reserved.
# This is a BETA new format config file, and the usage may change recently.
from mmengine.model.weight_init import KaimingInit
from mmpretrain.models import (CrossEntropyLoss, ImageClassifier,
VisionTransformer, VisionTransformerClsHead)
# model settings
model = dict(
type=ImageClassifier,
backbone=dict(
type=VisionTransformer,
arch='l',
img_size=224,
patch_size=16,
drop_rate=0.1,
init_cfg=[
dict(
type=KaimingInit,
layer='Conv2d',
mode='fan_in',
nonlinearity='linear')
]),
neck=None,
head=dict(
type=VisionTransformerClsHead,
num_classes=1000,
in_channels=1024,
loss=dict(type=CrossEntropyLoss, loss_weight=1.0),
topk=(1, 5),
))

View File

@ -1,31 +0,0 @@
# Copyright (c) OpenMMLab. All rights reserved.
# This is a BETA new format config file, and the usage may change recently.
from mmengine.model.weight_init import KaimingInit
from mmpretrain.models import (CrossEntropyLoss, ImageClassifier,
VisionTransformer, VisionTransformerClsHead)
# model settings
model = dict(
type=ImageClassifier,
backbone=dict(
type=VisionTransformer,
arch='l',
img_size=224,
patch_size=32,
drop_rate=0.1,
init_cfg=[
dict(
type=KaimingInit,
layer='Conv2d',
mode='fan_in',
nonlinearity='linear')
]),
neck=None,
head=dict(
type=VisionTransformerClsHead,
num_classes=1000,
in_channels=1024,
loss=dict(type=CrossEntropyLoss, loss_weight=1.0),
topk=(1, 5),
))

View File

@ -1,34 +1,21 @@
# Copyright (c) OpenMMLab. All rights reserved.
# This is a BETA new format config file, and the usage may change recently.
from mmengine.config import read_base
from mmengine.model import ConstantInit, TruncNormalInit
from torch.optim import AdamW
from mmpretrain.engine import EMAHook
from mmpretrain.models import (ConstantInit, CutMix, ImageClassifier,
LabelSmoothLoss, Mixup, TruncNormalInit,
VisionTransformer, VisionTransformerClsHead)
from mmpretrain.models import CutMix, Mixup
with read_base():
from .._base_.datasets.imagenet_bs64_swin_224 import *
from .._base_.default_runtime import *
from .._base_.models.vit_base_p16 import *
from .._base_.schedules.imagenet_bs1024_adamw_swin import *
# model settings
model = dict(
type=ImageClassifier,
backbone=dict(
type=VisionTransformer,
arch='base',
img_size=224,
patch_size=16,
drop_path_rate=0.1),
neck=None,
head=dict(
type=VisionTransformerClsHead,
num_classes=1000,
in_channels=768,
loss=dict(type=LabelSmoothLoss, label_smooth_val=0.1, mode='original'),
),
model.update(
backbone=dict(drop_rate=0, drop_path_rate=0.1),
head=dict(loss=dict(mode='original')),
init_cfg=[
dict(type=TruncNormalInit, layer='Linear', std=.02),
dict(type=ConstantInit, layer='LayerNorm', val=1., bias=0.),
@ -36,6 +23,7 @@ model = dict(
train_cfg=dict(
augments=[dict(type=Mixup, alpha=0.8),
dict(type=CutMix, alpha=1.0)]))
model.backbone.init_cfg = []
# dataset settings
train_dataloader.update(batch_size=128)

View File

@ -2,19 +2,26 @@
# This is a BETA new format config file, and the usage may change recently.
from mmengine.config import read_base
from mmpretrain.models import Mixup
from mmpretrain.models import CrossEntropyLoss, Mixup
with read_base():
from .._base_.datasets.imagenet_bs64_pil_resize_autoaug import *
from .._base_.default_runtime import *
from .._base_.models.vit_base_p32 import *
from .._base_.models.vit_base_p16 import *
from .._base_.schedules.imagenet_bs4096_adamw import *
# model setting
model.update(
head=dict(hidden_dim=3072),
backbone=dict(patch_size=32),
head=dict(
hidden_dim=3072,
# loss=dict(type=CrossEntropyLoss, loss_weight=1.0),
topk=(1, 5),
),
train_cfg=dict(augments=dict(type=Mixup, alpha=0.2)),
)
model.head.loss = dict(type=CrossEntropyLoss, loss_weight=1.0)
# schedule setting
optim_wrapper.update(clip_grad=dict(max_norm=1.0))

View File

@ -4,15 +4,19 @@ from mmengine.config import read_base
from mmpretrain.datasets import (CenterCrop, LoadImageFromFile, PackInputs,
RandomFlip, RandomResizedCrop, ResizeEdge)
from mmpretrain.models import CrossEntropyLoss
with read_base():
from .._base_.datasets.imagenet_bs64_pil_resize import *
from .._base_.default_runtime import *
from .._base_.models.vit_base_p32 import *
from .._base_.models.vit_base_p16 import *
from .._base_.schedules.imagenet_bs4096_adamw import *
# model setting
model.update(backbone=dict(img_size=384))
model.update(
backbone=dict(img_size=384, patch_size=32), head=dict(topk=(1, 5)))
model.head.loss = dict(type=CrossEntropyLoss, loss_weight=1.0)
# dataset setting
data_preprocessor.update(

View File

@ -2,19 +2,26 @@
# This is a BETA new format config file, and the usage may change recently.
from mmengine.config import read_base
from mmpretrain.models import Mixup
from mmpretrain.models import CrossEntropyLoss, Mixup
with read_base():
from .._base_.datasets.imagenet_bs64_pil_resize_autoaug import *
from .._base_.default_runtime import *
from .._base_.models.vit_large_p16 import *
from .._base_.models.vit_base_p16 import *
from .._base_.schedules.imagenet_bs4096_adamw import *
# model setting
model.update(
head=dict(hidden_dim=3072),
backbone=dict(arch='l'),
head=dict(
hidden_dim=3072,
in_channels=1024,
topk=(1, 5),
),
train_cfg=dict(augments=dict(type=Mixup, alpha=0.2)),
)
model.head.loss = dict(type=CrossEntropyLoss, loss_weight=1.0)
# schedule setting
optim_wrapper.update(clip_grad=dict(max_norm=1.0))

View File

@ -4,15 +4,20 @@ from mmengine.config import read_base
from mmpretrain.datasets import (CenterCrop, LoadImageFromFile, PackInputs,
RandomFlip, RandomResizedCrop, ResizeEdge)
from mmpretrain.models import CrossEntropyLoss
with read_base():
from .._base_.datasets.imagenet_bs64_pil_resize import *
from .._base_.default_runtime import *
from .._base_.models.vit_large_p16 import *
from .._base_.models.vit_base_p16 import *
from .._base_.schedules.imagenet_bs4096_adamw import *
# model setting
model.update(backbone=dict(img_size=384))
model.update(
backbone=dict(arch='l', img_size=384),
head=dict(in_channels=1024, topk=(1, 5)))
model.head.loss = dict(type=CrossEntropyLoss, loss_weight=1.0)
# dataset setting
data_preprocessor.update(

View File

@ -2,19 +2,26 @@
# This is a BETA new format config file, and the usage may change recently.
from mmengine.config import read_base
from mmpretrain.models import Mixup
from mmpretrain.models import CrossEntropyLoss, Mixup
with read_base():
from .._base_.datasets.imagenet_bs64_pil_resize_autoaug import *
from .._base_.default_runtime import *
from .._base_.models.vit_large_p32 import *
from .._base_.models.vit_base_p16 import *
from .._base_.schedules.imagenet_bs4096_adamw import *
# model setting
model.update(
head=dict(hidden_dim=3072),
backbone=dict(arch='l', patch_size=32),
head=dict(
hidden_dim=3072,
in_channels=1024,
topk=(1, 5),
),
train_cfg=dict(augments=dict(type=Mixup, alpha=0.2)),
)
loss = dict(type=CrossEntropyLoss, loss_weight=1.0)
# schedule setting
optim_wrapper.update(clip_grad=dict(max_norm=1.0))

View File

@ -4,18 +4,23 @@ from mmengine.config import read_base
from mmpretrain.datasets import (CenterCrop, LoadImageFromFile, PackInputs,
RandomFlip, RandomResizedCrop, ResizeEdge)
from mmpretrain.models import CrossEntropyLoss
with read_base():
from .._base_.datasets.imagenet_bs64_pil_resize import *
from .._base_.default_runtime import *
from .._base_.models.vit_large_p32 import *
from .._base_.models.vit_base_p16 import *
from .._base_.schedules.imagenet_bs4096_adamw import *
# model setting
model.update(backbone=dict(img_size=384))
model.update(
backbone=dict(arch='l', img_size=384, patch_size=32),
head=dict(in_channels=1024, topk=(1, 5)))
model.head.loss = dict(type=CrossEntropyLoss, loss_weight=1.0)
# dataset setting
data_preprocessor = dict(
data_preprocessor.update(
mean=[127.5, 127.5, 127.5],
std=[127.5, 127.5, 127.5],
# convert image from BGR to RGB