only keep vit_base_p16.py

pull/1727/head
zeyuanyin 2023-07-29 00:28:48 +04:00
parent 474e651180
commit 57c59fecdc
11 changed files with 63 additions and 133 deletions

View File

@ -2,7 +2,7 @@
# This is a BETA new format config file, and the usage may change recently. # This is a BETA new format config file, and the usage may change recently.
from mmengine.model.weight_init import KaimingInit from mmengine.model.weight_init import KaimingInit
from mmpretrain.models import (CrossEntropyLoss, ImageClassifier, from mmpretrain.models import (ImageClassifier, LabelSmoothLoss,
VisionTransformer, VisionTransformerClsHead) VisionTransformer, VisionTransformerClsHead)
# model settings # model settings
@ -10,7 +10,7 @@ model = dict(
type=ImageClassifier, type=ImageClassifier,
backbone=dict( backbone=dict(
type=VisionTransformer, type=VisionTransformer,
arch='l', arch='b',
img_size=224, img_size=224,
patch_size=16, patch_size=16,
drop_rate=0.1, drop_rate=0.1,
@ -25,7 +25,7 @@ model = dict(
head=dict( head=dict(
type=VisionTransformerClsHead, type=VisionTransformerClsHead,
num_classes=1000, num_classes=1000,
in_channels=1024, in_channels=768,
loss=dict(type=CrossEntropyLoss, loss_weight=1.0), loss=dict(
topk=(1, 5), type=LabelSmoothLoss, label_smooth_val=0.1, mode='classy_vision'),
)) ))

View File

@ -1,31 +0,0 @@
# Copyright (c) OpenMMLab. All rights reserved.
# This is a BETA new format config file, and the usage may change recently.
from mmengine.model.weight_init import KaimingInit
from mmpretrain.models import (CrossEntropyLoss, ImageClassifier,
VisionTransformer, VisionTransformerClsHead)
# model settings
model = dict(
type=ImageClassifier,
backbone=dict(
type=VisionTransformer,
arch='b',
img_size=224,
patch_size=32,
drop_rate=0.1,
init_cfg=[
dict(
type=KaimingInit,
layer='Conv2d',
mode='fan_in',
nonlinearity='linear')
]),
neck=None,
head=dict(
type=VisionTransformerClsHead,
num_classes=1000,
in_channels=768,
loss=dict(type=CrossEntropyLoss, loss_weight=1.0),
topk=(1, 5),
))

View File

@ -1,31 +0,0 @@
# Copyright (c) OpenMMLab. All rights reserved.
# This is a BETA new format config file, and the usage may change recently.
from mmengine.model.weight_init import KaimingInit
from mmpretrain.models import (CrossEntropyLoss, ImageClassifier,
VisionTransformer, VisionTransformerClsHead)
# model settings
model = dict(
type=ImageClassifier,
backbone=dict(
type=VisionTransformer,
arch='l',
img_size=224,
patch_size=16,
drop_rate=0.1,
init_cfg=[
dict(
type=KaimingInit,
layer='Conv2d',
mode='fan_in',
nonlinearity='linear')
]),
neck=None,
head=dict(
type=VisionTransformerClsHead,
num_classes=1000,
in_channels=1024,
loss=dict(type=CrossEntropyLoss, loss_weight=1.0),
topk=(1, 5),
))

View File

@ -1,31 +0,0 @@
# Copyright (c) OpenMMLab. All rights reserved.
# This is a BETA new format config file, and the usage may change recently.
from mmengine.model.weight_init import KaimingInit
from mmpretrain.models import (CrossEntropyLoss, ImageClassifier,
VisionTransformer, VisionTransformerClsHead)
# model settings
model = dict(
type=ImageClassifier,
backbone=dict(
type=VisionTransformer,
arch='l',
img_size=224,
patch_size=32,
drop_rate=0.1,
init_cfg=[
dict(
type=KaimingInit,
layer='Conv2d',
mode='fan_in',
nonlinearity='linear')
]),
neck=None,
head=dict(
type=VisionTransformerClsHead,
num_classes=1000,
in_channels=1024,
loss=dict(type=CrossEntropyLoss, loss_weight=1.0),
topk=(1, 5),
))

View File

@ -1,34 +1,21 @@
# Copyright (c) OpenMMLab. All rights reserved. # Copyright (c) OpenMMLab. All rights reserved.
# This is a BETA new format config file, and the usage may change recently. # This is a BETA new format config file, and the usage may change recently.
from mmengine.config import read_base from mmengine.config import read_base
from mmengine.model import ConstantInit, TruncNormalInit
from torch.optim import AdamW from torch.optim import AdamW
from mmpretrain.engine import EMAHook from mmpretrain.engine import EMAHook
from mmpretrain.models import (ConstantInit, CutMix, ImageClassifier, from mmpretrain.models import CutMix, Mixup
LabelSmoothLoss, Mixup, TruncNormalInit,
VisionTransformer, VisionTransformerClsHead)
with read_base(): with read_base():
from .._base_.datasets.imagenet_bs64_swin_224 import * from .._base_.datasets.imagenet_bs64_swin_224 import *
from .._base_.default_runtime import * from .._base_.default_runtime import *
from .._base_.models.vit_base_p16 import *
from .._base_.schedules.imagenet_bs1024_adamw_swin import * from .._base_.schedules.imagenet_bs1024_adamw_swin import *
# model settings model.update(
model = dict( backbone=dict(drop_rate=0, drop_path_rate=0.1),
type=ImageClassifier, head=dict(loss=dict(mode='original')),
backbone=dict(
type=VisionTransformer,
arch='base',
img_size=224,
patch_size=16,
drop_path_rate=0.1),
neck=None,
head=dict(
type=VisionTransformerClsHead,
num_classes=1000,
in_channels=768,
loss=dict(type=LabelSmoothLoss, label_smooth_val=0.1, mode='original'),
),
init_cfg=[ init_cfg=[
dict(type=TruncNormalInit, layer='Linear', std=.02), dict(type=TruncNormalInit, layer='Linear', std=.02),
dict(type=ConstantInit, layer='LayerNorm', val=1., bias=0.), dict(type=ConstantInit, layer='LayerNorm', val=1., bias=0.),
@ -36,6 +23,7 @@ model = dict(
train_cfg=dict( train_cfg=dict(
augments=[dict(type=Mixup, alpha=0.8), augments=[dict(type=Mixup, alpha=0.8),
dict(type=CutMix, alpha=1.0)])) dict(type=CutMix, alpha=1.0)]))
model.backbone.init_cfg = []
# dataset settings # dataset settings
train_dataloader.update(batch_size=128) train_dataloader.update(batch_size=128)

View File

@ -2,19 +2,26 @@
# This is a BETA new format config file, and the usage may change recently. # This is a BETA new format config file, and the usage may change recently.
from mmengine.config import read_base from mmengine.config import read_base
from mmpretrain.models import Mixup from mmpretrain.models import CrossEntropyLoss, Mixup
with read_base(): with read_base():
from .._base_.datasets.imagenet_bs64_pil_resize_autoaug import * from .._base_.datasets.imagenet_bs64_pil_resize_autoaug import *
from .._base_.default_runtime import * from .._base_.default_runtime import *
from .._base_.models.vit_base_p32 import * from .._base_.models.vit_base_p16 import *
from .._base_.schedules.imagenet_bs4096_adamw import * from .._base_.schedules.imagenet_bs4096_adamw import *
# model setting # model setting
model.update( model.update(
head=dict(hidden_dim=3072), backbone=dict(patch_size=32),
head=dict(
hidden_dim=3072,
# loss=dict(type=CrossEntropyLoss, loss_weight=1.0),
topk=(1, 5),
),
train_cfg=dict(augments=dict(type=Mixup, alpha=0.2)), train_cfg=dict(augments=dict(type=Mixup, alpha=0.2)),
) )
model.head.loss = dict(type=CrossEntropyLoss, loss_weight=1.0)
# schedule setting # schedule setting
optim_wrapper.update(clip_grad=dict(max_norm=1.0)) optim_wrapper.update(clip_grad=dict(max_norm=1.0))

View File

@ -4,15 +4,19 @@ from mmengine.config import read_base
from mmpretrain.datasets import (CenterCrop, LoadImageFromFile, PackInputs, from mmpretrain.datasets import (CenterCrop, LoadImageFromFile, PackInputs,
RandomFlip, RandomResizedCrop, ResizeEdge) RandomFlip, RandomResizedCrop, ResizeEdge)
from mmpretrain.models import CrossEntropyLoss
with read_base(): with read_base():
from .._base_.datasets.imagenet_bs64_pil_resize import * from .._base_.datasets.imagenet_bs64_pil_resize import *
from .._base_.default_runtime import * from .._base_.default_runtime import *
from .._base_.models.vit_base_p32 import * from .._base_.models.vit_base_p16 import *
from .._base_.schedules.imagenet_bs4096_adamw import * from .._base_.schedules.imagenet_bs4096_adamw import *
# model setting # model setting
model.update(backbone=dict(img_size=384)) model.update(
backbone=dict(img_size=384, patch_size=32), head=dict(topk=(1, 5)))
model.head.loss = dict(type=CrossEntropyLoss, loss_weight=1.0)
# dataset setting # dataset setting
data_preprocessor.update( data_preprocessor.update(

View File

@ -2,19 +2,26 @@
# This is a BETA new format config file, and the usage may change recently. # This is a BETA new format config file, and the usage may change recently.
from mmengine.config import read_base from mmengine.config import read_base
from mmpretrain.models import Mixup from mmpretrain.models import CrossEntropyLoss, Mixup
with read_base(): with read_base():
from .._base_.datasets.imagenet_bs64_pil_resize_autoaug import * from .._base_.datasets.imagenet_bs64_pil_resize_autoaug import *
from .._base_.default_runtime import * from .._base_.default_runtime import *
from .._base_.models.vit_large_p16 import * from .._base_.models.vit_base_p16 import *
from .._base_.schedules.imagenet_bs4096_adamw import * from .._base_.schedules.imagenet_bs4096_adamw import *
# model setting # model setting
model.update( model.update(
head=dict(hidden_dim=3072), backbone=dict(arch='l'),
head=dict(
hidden_dim=3072,
in_channels=1024,
topk=(1, 5),
),
train_cfg=dict(augments=dict(type=Mixup, alpha=0.2)), train_cfg=dict(augments=dict(type=Mixup, alpha=0.2)),
) )
model.head.loss = dict(type=CrossEntropyLoss, loss_weight=1.0)
# schedule setting # schedule setting
optim_wrapper.update(clip_grad=dict(max_norm=1.0)) optim_wrapper.update(clip_grad=dict(max_norm=1.0))

View File

@ -4,15 +4,20 @@ from mmengine.config import read_base
from mmpretrain.datasets import (CenterCrop, LoadImageFromFile, PackInputs, from mmpretrain.datasets import (CenterCrop, LoadImageFromFile, PackInputs,
RandomFlip, RandomResizedCrop, ResizeEdge) RandomFlip, RandomResizedCrop, ResizeEdge)
from mmpretrain.models import CrossEntropyLoss
with read_base(): with read_base():
from .._base_.datasets.imagenet_bs64_pil_resize import * from .._base_.datasets.imagenet_bs64_pil_resize import *
from .._base_.default_runtime import * from .._base_.default_runtime import *
from .._base_.models.vit_large_p16 import * from .._base_.models.vit_base_p16 import *
from .._base_.schedules.imagenet_bs4096_adamw import * from .._base_.schedules.imagenet_bs4096_adamw import *
# model setting # model setting
model.update(backbone=dict(img_size=384)) model.update(
backbone=dict(arch='l', img_size=384),
head=dict(in_channels=1024, topk=(1, 5)))
model.head.loss = dict(type=CrossEntropyLoss, loss_weight=1.0)
# dataset setting # dataset setting
data_preprocessor.update( data_preprocessor.update(

View File

@ -2,19 +2,26 @@
# This is a BETA new format config file, and the usage may change recently. # This is a BETA new format config file, and the usage may change recently.
from mmengine.config import read_base from mmengine.config import read_base
from mmpretrain.models import Mixup from mmpretrain.models import CrossEntropyLoss, Mixup
with read_base(): with read_base():
from .._base_.datasets.imagenet_bs64_pil_resize_autoaug import * from .._base_.datasets.imagenet_bs64_pil_resize_autoaug import *
from .._base_.default_runtime import * from .._base_.default_runtime import *
from .._base_.models.vit_large_p32 import * from .._base_.models.vit_base_p16 import *
from .._base_.schedules.imagenet_bs4096_adamw import * from .._base_.schedules.imagenet_bs4096_adamw import *
# model setting # model setting
model.update( model.update(
head=dict(hidden_dim=3072), backbone=dict(arch='l', patch_size=32),
head=dict(
hidden_dim=3072,
in_channels=1024,
topk=(1, 5),
),
train_cfg=dict(augments=dict(type=Mixup, alpha=0.2)), train_cfg=dict(augments=dict(type=Mixup, alpha=0.2)),
) )
loss = dict(type=CrossEntropyLoss, loss_weight=1.0)
# schedule setting # schedule setting
optim_wrapper.update(clip_grad=dict(max_norm=1.0)) optim_wrapper.update(clip_grad=dict(max_norm=1.0))

View File

@ -4,18 +4,23 @@ from mmengine.config import read_base
from mmpretrain.datasets import (CenterCrop, LoadImageFromFile, PackInputs, from mmpretrain.datasets import (CenterCrop, LoadImageFromFile, PackInputs,
RandomFlip, RandomResizedCrop, ResizeEdge) RandomFlip, RandomResizedCrop, ResizeEdge)
from mmpretrain.models import CrossEntropyLoss
with read_base(): with read_base():
from .._base_.datasets.imagenet_bs64_pil_resize import * from .._base_.datasets.imagenet_bs64_pil_resize import *
from .._base_.default_runtime import * from .._base_.default_runtime import *
from .._base_.models.vit_large_p32 import * from .._base_.models.vit_base_p16 import *
from .._base_.schedules.imagenet_bs4096_adamw import * from .._base_.schedules.imagenet_bs4096_adamw import *
# model setting # model setting
model.update(backbone=dict(img_size=384)) model.update(
backbone=dict(arch='l', img_size=384, patch_size=32),
head=dict(in_channels=1024, topk=(1, 5)))
model.head.loss = dict(type=CrossEntropyLoss, loss_weight=1.0)
# dataset setting # dataset setting
data_preprocessor = dict( data_preprocessor.update(
mean=[127.5, 127.5, 127.5], mean=[127.5, 127.5, 127.5],
std=[127.5, 127.5, 127.5], std=[127.5, 127.5, 127.5],
# convert image from BGR to RGB # convert image from BGR to RGB