only keep vit_base_p16.py
parent
474e651180
commit
57c59fecdc
|
@ -2,7 +2,7 @@
|
|||
# This is a BETA new format config file, and the usage may change recently.
|
||||
from mmengine.model.weight_init import KaimingInit
|
||||
|
||||
from mmpretrain.models import (CrossEntropyLoss, ImageClassifier,
|
||||
from mmpretrain.models import (ImageClassifier, LabelSmoothLoss,
|
||||
VisionTransformer, VisionTransformerClsHead)
|
||||
|
||||
# model settings
|
||||
|
@ -10,7 +10,7 @@ model = dict(
|
|||
type=ImageClassifier,
|
||||
backbone=dict(
|
||||
type=VisionTransformer,
|
||||
arch='l',
|
||||
arch='b',
|
||||
img_size=224,
|
||||
patch_size=16,
|
||||
drop_rate=0.1,
|
||||
|
@ -25,7 +25,7 @@ model = dict(
|
|||
head=dict(
|
||||
type=VisionTransformerClsHead,
|
||||
num_classes=1000,
|
||||
in_channels=1024,
|
||||
loss=dict(type=CrossEntropyLoss, loss_weight=1.0),
|
||||
topk=(1, 5),
|
||||
in_channels=768,
|
||||
loss=dict(
|
||||
type=LabelSmoothLoss, label_smooth_val=0.1, mode='classy_vision'),
|
||||
))
|
||||
|
|
|
@ -1,31 +0,0 @@
|
|||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
# This is a BETA new format config file, and the usage may change recently.
|
||||
from mmengine.model.weight_init import KaimingInit
|
||||
|
||||
from mmpretrain.models import (CrossEntropyLoss, ImageClassifier,
|
||||
VisionTransformer, VisionTransformerClsHead)
|
||||
|
||||
# model settings
|
||||
model = dict(
|
||||
type=ImageClassifier,
|
||||
backbone=dict(
|
||||
type=VisionTransformer,
|
||||
arch='b',
|
||||
img_size=224,
|
||||
patch_size=32,
|
||||
drop_rate=0.1,
|
||||
init_cfg=[
|
||||
dict(
|
||||
type=KaimingInit,
|
||||
layer='Conv2d',
|
||||
mode='fan_in',
|
||||
nonlinearity='linear')
|
||||
]),
|
||||
neck=None,
|
||||
head=dict(
|
||||
type=VisionTransformerClsHead,
|
||||
num_classes=1000,
|
||||
in_channels=768,
|
||||
loss=dict(type=CrossEntropyLoss, loss_weight=1.0),
|
||||
topk=(1, 5),
|
||||
))
|
|
@ -1,31 +0,0 @@
|
|||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
# This is a BETA new format config file, and the usage may change recently.
|
||||
from mmengine.model.weight_init import KaimingInit
|
||||
|
||||
from mmpretrain.models import (CrossEntropyLoss, ImageClassifier,
|
||||
VisionTransformer, VisionTransformerClsHead)
|
||||
|
||||
# model settings
|
||||
model = dict(
|
||||
type=ImageClassifier,
|
||||
backbone=dict(
|
||||
type=VisionTransformer,
|
||||
arch='l',
|
||||
img_size=224,
|
||||
patch_size=16,
|
||||
drop_rate=0.1,
|
||||
init_cfg=[
|
||||
dict(
|
||||
type=KaimingInit,
|
||||
layer='Conv2d',
|
||||
mode='fan_in',
|
||||
nonlinearity='linear')
|
||||
]),
|
||||
neck=None,
|
||||
head=dict(
|
||||
type=VisionTransformerClsHead,
|
||||
num_classes=1000,
|
||||
in_channels=1024,
|
||||
loss=dict(type=CrossEntropyLoss, loss_weight=1.0),
|
||||
topk=(1, 5),
|
||||
))
|
|
@ -1,31 +0,0 @@
|
|||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
# This is a BETA new format config file, and the usage may change recently.
|
||||
from mmengine.model.weight_init import KaimingInit
|
||||
|
||||
from mmpretrain.models import (CrossEntropyLoss, ImageClassifier,
|
||||
VisionTransformer, VisionTransformerClsHead)
|
||||
|
||||
# model settings
|
||||
model = dict(
|
||||
type=ImageClassifier,
|
||||
backbone=dict(
|
||||
type=VisionTransformer,
|
||||
arch='l',
|
||||
img_size=224,
|
||||
patch_size=32,
|
||||
drop_rate=0.1,
|
||||
init_cfg=[
|
||||
dict(
|
||||
type=KaimingInit,
|
||||
layer='Conv2d',
|
||||
mode='fan_in',
|
||||
nonlinearity='linear')
|
||||
]),
|
||||
neck=None,
|
||||
head=dict(
|
||||
type=VisionTransformerClsHead,
|
||||
num_classes=1000,
|
||||
in_channels=1024,
|
||||
loss=dict(type=CrossEntropyLoss, loss_weight=1.0),
|
||||
topk=(1, 5),
|
||||
))
|
|
@ -1,34 +1,21 @@
|
|||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
# This is a BETA new format config file, and the usage may change recently.
|
||||
from mmengine.config import read_base
|
||||
from mmengine.model import ConstantInit, TruncNormalInit
|
||||
from torch.optim import AdamW
|
||||
|
||||
from mmpretrain.engine import EMAHook
|
||||
from mmpretrain.models import (ConstantInit, CutMix, ImageClassifier,
|
||||
LabelSmoothLoss, Mixup, TruncNormalInit,
|
||||
VisionTransformer, VisionTransformerClsHead)
|
||||
from mmpretrain.models import CutMix, Mixup
|
||||
|
||||
with read_base():
|
||||
from .._base_.datasets.imagenet_bs64_swin_224 import *
|
||||
from .._base_.default_runtime import *
|
||||
from .._base_.models.vit_base_p16 import *
|
||||
from .._base_.schedules.imagenet_bs1024_adamw_swin import *
|
||||
|
||||
# model settings
|
||||
model = dict(
|
||||
type=ImageClassifier,
|
||||
backbone=dict(
|
||||
type=VisionTransformer,
|
||||
arch='base',
|
||||
img_size=224,
|
||||
patch_size=16,
|
||||
drop_path_rate=0.1),
|
||||
neck=None,
|
||||
head=dict(
|
||||
type=VisionTransformerClsHead,
|
||||
num_classes=1000,
|
||||
in_channels=768,
|
||||
loss=dict(type=LabelSmoothLoss, label_smooth_val=0.1, mode='original'),
|
||||
),
|
||||
model.update(
|
||||
backbone=dict(drop_rate=0, drop_path_rate=0.1),
|
||||
head=dict(loss=dict(mode='original')),
|
||||
init_cfg=[
|
||||
dict(type=TruncNormalInit, layer='Linear', std=.02),
|
||||
dict(type=ConstantInit, layer='LayerNorm', val=1., bias=0.),
|
||||
|
@ -36,6 +23,7 @@ model = dict(
|
|||
train_cfg=dict(
|
||||
augments=[dict(type=Mixup, alpha=0.8),
|
||||
dict(type=CutMix, alpha=1.0)]))
|
||||
model.backbone.init_cfg = []
|
||||
|
||||
# dataset settings
|
||||
train_dataloader.update(batch_size=128)
|
||||
|
|
|
@ -2,19 +2,26 @@
|
|||
# This is a BETA new format config file, and the usage may change recently.
|
||||
from mmengine.config import read_base
|
||||
|
||||
from mmpretrain.models import Mixup
|
||||
from mmpretrain.models import CrossEntropyLoss, Mixup
|
||||
|
||||
with read_base():
|
||||
from .._base_.datasets.imagenet_bs64_pil_resize_autoaug import *
|
||||
from .._base_.default_runtime import *
|
||||
from .._base_.models.vit_base_p32 import *
|
||||
from .._base_.models.vit_base_p16 import *
|
||||
from .._base_.schedules.imagenet_bs4096_adamw import *
|
||||
|
||||
# model setting
|
||||
model.update(
|
||||
head=dict(hidden_dim=3072),
|
||||
backbone=dict(patch_size=32),
|
||||
head=dict(
|
||||
hidden_dim=3072,
|
||||
# loss=dict(type=CrossEntropyLoss, loss_weight=1.0),
|
||||
topk=(1, 5),
|
||||
),
|
||||
train_cfg=dict(augments=dict(type=Mixup, alpha=0.2)),
|
||||
)
|
||||
|
||||
model.head.loss = dict(type=CrossEntropyLoss, loss_weight=1.0)
|
||||
|
||||
# schedule setting
|
||||
optim_wrapper.update(clip_grad=dict(max_norm=1.0))
|
||||
|
|
|
@ -4,15 +4,19 @@ from mmengine.config import read_base
|
|||
|
||||
from mmpretrain.datasets import (CenterCrop, LoadImageFromFile, PackInputs,
|
||||
RandomFlip, RandomResizedCrop, ResizeEdge)
|
||||
from mmpretrain.models import CrossEntropyLoss
|
||||
|
||||
with read_base():
|
||||
from .._base_.datasets.imagenet_bs64_pil_resize import *
|
||||
from .._base_.default_runtime import *
|
||||
from .._base_.models.vit_base_p32 import *
|
||||
from .._base_.models.vit_base_p16 import *
|
||||
from .._base_.schedules.imagenet_bs4096_adamw import *
|
||||
|
||||
# model setting
|
||||
model.update(backbone=dict(img_size=384))
|
||||
model.update(
|
||||
backbone=dict(img_size=384, patch_size=32), head=dict(topk=(1, 5)))
|
||||
|
||||
model.head.loss = dict(type=CrossEntropyLoss, loss_weight=1.0)
|
||||
|
||||
# dataset setting
|
||||
data_preprocessor.update(
|
||||
|
|
|
@ -2,19 +2,26 @@
|
|||
# This is a BETA new format config file, and the usage may change recently.
|
||||
from mmengine.config import read_base
|
||||
|
||||
from mmpretrain.models import Mixup
|
||||
from mmpretrain.models import CrossEntropyLoss, Mixup
|
||||
|
||||
with read_base():
|
||||
from .._base_.datasets.imagenet_bs64_pil_resize_autoaug import *
|
||||
from .._base_.default_runtime import *
|
||||
from .._base_.models.vit_large_p16 import *
|
||||
from .._base_.models.vit_base_p16 import *
|
||||
from .._base_.schedules.imagenet_bs4096_adamw import *
|
||||
|
||||
# model setting
|
||||
model.update(
|
||||
head=dict(hidden_dim=3072),
|
||||
backbone=dict(arch='l'),
|
||||
head=dict(
|
||||
hidden_dim=3072,
|
||||
in_channels=1024,
|
||||
topk=(1, 5),
|
||||
),
|
||||
train_cfg=dict(augments=dict(type=Mixup, alpha=0.2)),
|
||||
)
|
||||
|
||||
model.head.loss = dict(type=CrossEntropyLoss, loss_weight=1.0)
|
||||
|
||||
# schedule setting
|
||||
optim_wrapper.update(clip_grad=dict(max_norm=1.0))
|
||||
|
|
|
@ -4,15 +4,20 @@ from mmengine.config import read_base
|
|||
|
||||
from mmpretrain.datasets import (CenterCrop, LoadImageFromFile, PackInputs,
|
||||
RandomFlip, RandomResizedCrop, ResizeEdge)
|
||||
from mmpretrain.models import CrossEntropyLoss
|
||||
|
||||
with read_base():
|
||||
from .._base_.datasets.imagenet_bs64_pil_resize import *
|
||||
from .._base_.default_runtime import *
|
||||
from .._base_.models.vit_large_p16 import *
|
||||
from .._base_.models.vit_base_p16 import *
|
||||
from .._base_.schedules.imagenet_bs4096_adamw import *
|
||||
|
||||
# model setting
|
||||
model.update(backbone=dict(img_size=384))
|
||||
model.update(
|
||||
backbone=dict(arch='l', img_size=384),
|
||||
head=dict(in_channels=1024, topk=(1, 5)))
|
||||
|
||||
model.head.loss = dict(type=CrossEntropyLoss, loss_weight=1.0)
|
||||
|
||||
# dataset setting
|
||||
data_preprocessor.update(
|
||||
|
|
|
@ -2,19 +2,26 @@
|
|||
# This is a BETA new format config file, and the usage may change recently.
|
||||
from mmengine.config import read_base
|
||||
|
||||
from mmpretrain.models import Mixup
|
||||
from mmpretrain.models import CrossEntropyLoss, Mixup
|
||||
|
||||
with read_base():
|
||||
from .._base_.datasets.imagenet_bs64_pil_resize_autoaug import *
|
||||
from .._base_.default_runtime import *
|
||||
from .._base_.models.vit_large_p32 import *
|
||||
from .._base_.models.vit_base_p16 import *
|
||||
from .._base_.schedules.imagenet_bs4096_adamw import *
|
||||
|
||||
# model setting
|
||||
model.update(
|
||||
head=dict(hidden_dim=3072),
|
||||
backbone=dict(arch='l', patch_size=32),
|
||||
head=dict(
|
||||
hidden_dim=3072,
|
||||
in_channels=1024,
|
||||
topk=(1, 5),
|
||||
),
|
||||
train_cfg=dict(augments=dict(type=Mixup, alpha=0.2)),
|
||||
)
|
||||
|
||||
loss = dict(type=CrossEntropyLoss, loss_weight=1.0)
|
||||
|
||||
# schedule setting
|
||||
optim_wrapper.update(clip_grad=dict(max_norm=1.0))
|
||||
|
|
|
@ -4,18 +4,23 @@ from mmengine.config import read_base
|
|||
|
||||
from mmpretrain.datasets import (CenterCrop, LoadImageFromFile, PackInputs,
|
||||
RandomFlip, RandomResizedCrop, ResizeEdge)
|
||||
from mmpretrain.models import CrossEntropyLoss
|
||||
|
||||
with read_base():
|
||||
from .._base_.datasets.imagenet_bs64_pil_resize import *
|
||||
from .._base_.default_runtime import *
|
||||
from .._base_.models.vit_large_p32 import *
|
||||
from .._base_.models.vit_base_p16 import *
|
||||
from .._base_.schedules.imagenet_bs4096_adamw import *
|
||||
|
||||
# model setting
|
||||
model.update(backbone=dict(img_size=384))
|
||||
model.update(
|
||||
backbone=dict(arch='l', img_size=384, patch_size=32),
|
||||
head=dict(in_channels=1024, topk=(1, 5)))
|
||||
|
||||
model.head.loss = dict(type=CrossEntropyLoss, loss_weight=1.0)
|
||||
|
||||
# dataset setting
|
||||
data_preprocessor = dict(
|
||||
data_preprocessor.update(
|
||||
mean=[127.5, 127.5, 127.5],
|
||||
std=[127.5, 127.5, 127.5],
|
||||
# convert image from BGR to RGB
|
||||
|
|
Loading…
Reference in New Issue