only keep vit_base_p16.py
parent
474e651180
commit
57c59fecdc
|
@ -2,7 +2,7 @@
|
||||||
# This is a BETA new format config file, and the usage may change recently.
|
# This is a BETA new format config file, and the usage may change recently.
|
||||||
from mmengine.model.weight_init import KaimingInit
|
from mmengine.model.weight_init import KaimingInit
|
||||||
|
|
||||||
from mmpretrain.models import (CrossEntropyLoss, ImageClassifier,
|
from mmpretrain.models import (ImageClassifier, LabelSmoothLoss,
|
||||||
VisionTransformer, VisionTransformerClsHead)
|
VisionTransformer, VisionTransformerClsHead)
|
||||||
|
|
||||||
# model settings
|
# model settings
|
||||||
|
@ -10,7 +10,7 @@ model = dict(
|
||||||
type=ImageClassifier,
|
type=ImageClassifier,
|
||||||
backbone=dict(
|
backbone=dict(
|
||||||
type=VisionTransformer,
|
type=VisionTransformer,
|
||||||
arch='l',
|
arch='b',
|
||||||
img_size=224,
|
img_size=224,
|
||||||
patch_size=16,
|
patch_size=16,
|
||||||
drop_rate=0.1,
|
drop_rate=0.1,
|
||||||
|
@ -25,7 +25,7 @@ model = dict(
|
||||||
head=dict(
|
head=dict(
|
||||||
type=VisionTransformerClsHead,
|
type=VisionTransformerClsHead,
|
||||||
num_classes=1000,
|
num_classes=1000,
|
||||||
in_channels=1024,
|
in_channels=768,
|
||||||
loss=dict(type=CrossEntropyLoss, loss_weight=1.0),
|
loss=dict(
|
||||||
topk=(1, 5),
|
type=LabelSmoothLoss, label_smooth_val=0.1, mode='classy_vision'),
|
||||||
))
|
))
|
||||||
|
|
|
@ -1,31 +0,0 @@
|
||||||
# Copyright (c) OpenMMLab. All rights reserved.
|
|
||||||
# This is a BETA new format config file, and the usage may change recently.
|
|
||||||
from mmengine.model.weight_init import KaimingInit
|
|
||||||
|
|
||||||
from mmpretrain.models import (CrossEntropyLoss, ImageClassifier,
|
|
||||||
VisionTransformer, VisionTransformerClsHead)
|
|
||||||
|
|
||||||
# model settings
|
|
||||||
model = dict(
|
|
||||||
type=ImageClassifier,
|
|
||||||
backbone=dict(
|
|
||||||
type=VisionTransformer,
|
|
||||||
arch='b',
|
|
||||||
img_size=224,
|
|
||||||
patch_size=32,
|
|
||||||
drop_rate=0.1,
|
|
||||||
init_cfg=[
|
|
||||||
dict(
|
|
||||||
type=KaimingInit,
|
|
||||||
layer='Conv2d',
|
|
||||||
mode='fan_in',
|
|
||||||
nonlinearity='linear')
|
|
||||||
]),
|
|
||||||
neck=None,
|
|
||||||
head=dict(
|
|
||||||
type=VisionTransformerClsHead,
|
|
||||||
num_classes=1000,
|
|
||||||
in_channels=768,
|
|
||||||
loss=dict(type=CrossEntropyLoss, loss_weight=1.0),
|
|
||||||
topk=(1, 5),
|
|
||||||
))
|
|
|
@ -1,31 +0,0 @@
|
||||||
# Copyright (c) OpenMMLab. All rights reserved.
|
|
||||||
# This is a BETA new format config file, and the usage may change recently.
|
|
||||||
from mmengine.model.weight_init import KaimingInit
|
|
||||||
|
|
||||||
from mmpretrain.models import (CrossEntropyLoss, ImageClassifier,
|
|
||||||
VisionTransformer, VisionTransformerClsHead)
|
|
||||||
|
|
||||||
# model settings
|
|
||||||
model = dict(
|
|
||||||
type=ImageClassifier,
|
|
||||||
backbone=dict(
|
|
||||||
type=VisionTransformer,
|
|
||||||
arch='l',
|
|
||||||
img_size=224,
|
|
||||||
patch_size=16,
|
|
||||||
drop_rate=0.1,
|
|
||||||
init_cfg=[
|
|
||||||
dict(
|
|
||||||
type=KaimingInit,
|
|
||||||
layer='Conv2d',
|
|
||||||
mode='fan_in',
|
|
||||||
nonlinearity='linear')
|
|
||||||
]),
|
|
||||||
neck=None,
|
|
||||||
head=dict(
|
|
||||||
type=VisionTransformerClsHead,
|
|
||||||
num_classes=1000,
|
|
||||||
in_channels=1024,
|
|
||||||
loss=dict(type=CrossEntropyLoss, loss_weight=1.0),
|
|
||||||
topk=(1, 5),
|
|
||||||
))
|
|
|
@ -1,31 +0,0 @@
|
||||||
# Copyright (c) OpenMMLab. All rights reserved.
|
|
||||||
# This is a BETA new format config file, and the usage may change recently.
|
|
||||||
from mmengine.model.weight_init import KaimingInit
|
|
||||||
|
|
||||||
from mmpretrain.models import (CrossEntropyLoss, ImageClassifier,
|
|
||||||
VisionTransformer, VisionTransformerClsHead)
|
|
||||||
|
|
||||||
# model settings
|
|
||||||
model = dict(
|
|
||||||
type=ImageClassifier,
|
|
||||||
backbone=dict(
|
|
||||||
type=VisionTransformer,
|
|
||||||
arch='l',
|
|
||||||
img_size=224,
|
|
||||||
patch_size=32,
|
|
||||||
drop_rate=0.1,
|
|
||||||
init_cfg=[
|
|
||||||
dict(
|
|
||||||
type=KaimingInit,
|
|
||||||
layer='Conv2d',
|
|
||||||
mode='fan_in',
|
|
||||||
nonlinearity='linear')
|
|
||||||
]),
|
|
||||||
neck=None,
|
|
||||||
head=dict(
|
|
||||||
type=VisionTransformerClsHead,
|
|
||||||
num_classes=1000,
|
|
||||||
in_channels=1024,
|
|
||||||
loss=dict(type=CrossEntropyLoss, loss_weight=1.0),
|
|
||||||
topk=(1, 5),
|
|
||||||
))
|
|
|
@ -1,34 +1,21 @@
|
||||||
# Copyright (c) OpenMMLab. All rights reserved.
|
# Copyright (c) OpenMMLab. All rights reserved.
|
||||||
# This is a BETA new format config file, and the usage may change recently.
|
# This is a BETA new format config file, and the usage may change recently.
|
||||||
from mmengine.config import read_base
|
from mmengine.config import read_base
|
||||||
|
from mmengine.model import ConstantInit, TruncNormalInit
|
||||||
from torch.optim import AdamW
|
from torch.optim import AdamW
|
||||||
|
|
||||||
from mmpretrain.engine import EMAHook
|
from mmpretrain.engine import EMAHook
|
||||||
from mmpretrain.models import (ConstantInit, CutMix, ImageClassifier,
|
from mmpretrain.models import CutMix, Mixup
|
||||||
LabelSmoothLoss, Mixup, TruncNormalInit,
|
|
||||||
VisionTransformer, VisionTransformerClsHead)
|
|
||||||
|
|
||||||
with read_base():
|
with read_base():
|
||||||
from .._base_.datasets.imagenet_bs64_swin_224 import *
|
from .._base_.datasets.imagenet_bs64_swin_224 import *
|
||||||
from .._base_.default_runtime import *
|
from .._base_.default_runtime import *
|
||||||
|
from .._base_.models.vit_base_p16 import *
|
||||||
from .._base_.schedules.imagenet_bs1024_adamw_swin import *
|
from .._base_.schedules.imagenet_bs1024_adamw_swin import *
|
||||||
|
|
||||||
# model settings
|
model.update(
|
||||||
model = dict(
|
backbone=dict(drop_rate=0, drop_path_rate=0.1),
|
||||||
type=ImageClassifier,
|
head=dict(loss=dict(mode='original')),
|
||||||
backbone=dict(
|
|
||||||
type=VisionTransformer,
|
|
||||||
arch='base',
|
|
||||||
img_size=224,
|
|
||||||
patch_size=16,
|
|
||||||
drop_path_rate=0.1),
|
|
||||||
neck=None,
|
|
||||||
head=dict(
|
|
||||||
type=VisionTransformerClsHead,
|
|
||||||
num_classes=1000,
|
|
||||||
in_channels=768,
|
|
||||||
loss=dict(type=LabelSmoothLoss, label_smooth_val=0.1, mode='original'),
|
|
||||||
),
|
|
||||||
init_cfg=[
|
init_cfg=[
|
||||||
dict(type=TruncNormalInit, layer='Linear', std=.02),
|
dict(type=TruncNormalInit, layer='Linear', std=.02),
|
||||||
dict(type=ConstantInit, layer='LayerNorm', val=1., bias=0.),
|
dict(type=ConstantInit, layer='LayerNorm', val=1., bias=0.),
|
||||||
|
@ -36,6 +23,7 @@ model = dict(
|
||||||
train_cfg=dict(
|
train_cfg=dict(
|
||||||
augments=[dict(type=Mixup, alpha=0.8),
|
augments=[dict(type=Mixup, alpha=0.8),
|
||||||
dict(type=CutMix, alpha=1.0)]))
|
dict(type=CutMix, alpha=1.0)]))
|
||||||
|
model.backbone.init_cfg = []
|
||||||
|
|
||||||
# dataset settings
|
# dataset settings
|
||||||
train_dataloader.update(batch_size=128)
|
train_dataloader.update(batch_size=128)
|
||||||
|
|
|
@ -2,19 +2,26 @@
|
||||||
# This is a BETA new format config file, and the usage may change recently.
|
# This is a BETA new format config file, and the usage may change recently.
|
||||||
from mmengine.config import read_base
|
from mmengine.config import read_base
|
||||||
|
|
||||||
from mmpretrain.models import Mixup
|
from mmpretrain.models import CrossEntropyLoss, Mixup
|
||||||
|
|
||||||
with read_base():
|
with read_base():
|
||||||
from .._base_.datasets.imagenet_bs64_pil_resize_autoaug import *
|
from .._base_.datasets.imagenet_bs64_pil_resize_autoaug import *
|
||||||
from .._base_.default_runtime import *
|
from .._base_.default_runtime import *
|
||||||
from .._base_.models.vit_base_p32 import *
|
from .._base_.models.vit_base_p16 import *
|
||||||
from .._base_.schedules.imagenet_bs4096_adamw import *
|
from .._base_.schedules.imagenet_bs4096_adamw import *
|
||||||
|
|
||||||
# model setting
|
# model setting
|
||||||
model.update(
|
model.update(
|
||||||
head=dict(hidden_dim=3072),
|
backbone=dict(patch_size=32),
|
||||||
|
head=dict(
|
||||||
|
hidden_dim=3072,
|
||||||
|
# loss=dict(type=CrossEntropyLoss, loss_weight=1.0),
|
||||||
|
topk=(1, 5),
|
||||||
|
),
|
||||||
train_cfg=dict(augments=dict(type=Mixup, alpha=0.2)),
|
train_cfg=dict(augments=dict(type=Mixup, alpha=0.2)),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
model.head.loss = dict(type=CrossEntropyLoss, loss_weight=1.0)
|
||||||
|
|
||||||
# schedule setting
|
# schedule setting
|
||||||
optim_wrapper.update(clip_grad=dict(max_norm=1.0))
|
optim_wrapper.update(clip_grad=dict(max_norm=1.0))
|
||||||
|
|
|
@ -4,15 +4,19 @@ from mmengine.config import read_base
|
||||||
|
|
||||||
from mmpretrain.datasets import (CenterCrop, LoadImageFromFile, PackInputs,
|
from mmpretrain.datasets import (CenterCrop, LoadImageFromFile, PackInputs,
|
||||||
RandomFlip, RandomResizedCrop, ResizeEdge)
|
RandomFlip, RandomResizedCrop, ResizeEdge)
|
||||||
|
from mmpretrain.models import CrossEntropyLoss
|
||||||
|
|
||||||
with read_base():
|
with read_base():
|
||||||
from .._base_.datasets.imagenet_bs64_pil_resize import *
|
from .._base_.datasets.imagenet_bs64_pil_resize import *
|
||||||
from .._base_.default_runtime import *
|
from .._base_.default_runtime import *
|
||||||
from .._base_.models.vit_base_p32 import *
|
from .._base_.models.vit_base_p16 import *
|
||||||
from .._base_.schedules.imagenet_bs4096_adamw import *
|
from .._base_.schedules.imagenet_bs4096_adamw import *
|
||||||
|
|
||||||
# model setting
|
# model setting
|
||||||
model.update(backbone=dict(img_size=384))
|
model.update(
|
||||||
|
backbone=dict(img_size=384, patch_size=32), head=dict(topk=(1, 5)))
|
||||||
|
|
||||||
|
model.head.loss = dict(type=CrossEntropyLoss, loss_weight=1.0)
|
||||||
|
|
||||||
# dataset setting
|
# dataset setting
|
||||||
data_preprocessor.update(
|
data_preprocessor.update(
|
||||||
|
|
|
@ -2,19 +2,26 @@
|
||||||
# This is a BETA new format config file, and the usage may change recently.
|
# This is a BETA new format config file, and the usage may change recently.
|
||||||
from mmengine.config import read_base
|
from mmengine.config import read_base
|
||||||
|
|
||||||
from mmpretrain.models import Mixup
|
from mmpretrain.models import CrossEntropyLoss, Mixup
|
||||||
|
|
||||||
with read_base():
|
with read_base():
|
||||||
from .._base_.datasets.imagenet_bs64_pil_resize_autoaug import *
|
from .._base_.datasets.imagenet_bs64_pil_resize_autoaug import *
|
||||||
from .._base_.default_runtime import *
|
from .._base_.default_runtime import *
|
||||||
from .._base_.models.vit_large_p16 import *
|
from .._base_.models.vit_base_p16 import *
|
||||||
from .._base_.schedules.imagenet_bs4096_adamw import *
|
from .._base_.schedules.imagenet_bs4096_adamw import *
|
||||||
|
|
||||||
# model setting
|
# model setting
|
||||||
model.update(
|
model.update(
|
||||||
head=dict(hidden_dim=3072),
|
backbone=dict(arch='l'),
|
||||||
|
head=dict(
|
||||||
|
hidden_dim=3072,
|
||||||
|
in_channels=1024,
|
||||||
|
topk=(1, 5),
|
||||||
|
),
|
||||||
train_cfg=dict(augments=dict(type=Mixup, alpha=0.2)),
|
train_cfg=dict(augments=dict(type=Mixup, alpha=0.2)),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
model.head.loss = dict(type=CrossEntropyLoss, loss_weight=1.0)
|
||||||
|
|
||||||
# schedule setting
|
# schedule setting
|
||||||
optim_wrapper.update(clip_grad=dict(max_norm=1.0))
|
optim_wrapper.update(clip_grad=dict(max_norm=1.0))
|
||||||
|
|
|
@ -4,15 +4,20 @@ from mmengine.config import read_base
|
||||||
|
|
||||||
from mmpretrain.datasets import (CenterCrop, LoadImageFromFile, PackInputs,
|
from mmpretrain.datasets import (CenterCrop, LoadImageFromFile, PackInputs,
|
||||||
RandomFlip, RandomResizedCrop, ResizeEdge)
|
RandomFlip, RandomResizedCrop, ResizeEdge)
|
||||||
|
from mmpretrain.models import CrossEntropyLoss
|
||||||
|
|
||||||
with read_base():
|
with read_base():
|
||||||
from .._base_.datasets.imagenet_bs64_pil_resize import *
|
from .._base_.datasets.imagenet_bs64_pil_resize import *
|
||||||
from .._base_.default_runtime import *
|
from .._base_.default_runtime import *
|
||||||
from .._base_.models.vit_large_p16 import *
|
from .._base_.models.vit_base_p16 import *
|
||||||
from .._base_.schedules.imagenet_bs4096_adamw import *
|
from .._base_.schedules.imagenet_bs4096_adamw import *
|
||||||
|
|
||||||
# model setting
|
# model setting
|
||||||
model.update(backbone=dict(img_size=384))
|
model.update(
|
||||||
|
backbone=dict(arch='l', img_size=384),
|
||||||
|
head=dict(in_channels=1024, topk=(1, 5)))
|
||||||
|
|
||||||
|
model.head.loss = dict(type=CrossEntropyLoss, loss_weight=1.0)
|
||||||
|
|
||||||
# dataset setting
|
# dataset setting
|
||||||
data_preprocessor.update(
|
data_preprocessor.update(
|
||||||
|
|
|
@ -2,19 +2,26 @@
|
||||||
# This is a BETA new format config file, and the usage may change recently.
|
# This is a BETA new format config file, and the usage may change recently.
|
||||||
from mmengine.config import read_base
|
from mmengine.config import read_base
|
||||||
|
|
||||||
from mmpretrain.models import Mixup
|
from mmpretrain.models import CrossEntropyLoss, Mixup
|
||||||
|
|
||||||
with read_base():
|
with read_base():
|
||||||
from .._base_.datasets.imagenet_bs64_pil_resize_autoaug import *
|
from .._base_.datasets.imagenet_bs64_pil_resize_autoaug import *
|
||||||
from .._base_.default_runtime import *
|
from .._base_.default_runtime import *
|
||||||
from .._base_.models.vit_large_p32 import *
|
from .._base_.models.vit_base_p16 import *
|
||||||
from .._base_.schedules.imagenet_bs4096_adamw import *
|
from .._base_.schedules.imagenet_bs4096_adamw import *
|
||||||
|
|
||||||
# model setting
|
# model setting
|
||||||
model.update(
|
model.update(
|
||||||
head=dict(hidden_dim=3072),
|
backbone=dict(arch='l', patch_size=32),
|
||||||
|
head=dict(
|
||||||
|
hidden_dim=3072,
|
||||||
|
in_channels=1024,
|
||||||
|
topk=(1, 5),
|
||||||
|
),
|
||||||
train_cfg=dict(augments=dict(type=Mixup, alpha=0.2)),
|
train_cfg=dict(augments=dict(type=Mixup, alpha=0.2)),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
loss = dict(type=CrossEntropyLoss, loss_weight=1.0)
|
||||||
|
|
||||||
# schedule setting
|
# schedule setting
|
||||||
optim_wrapper.update(clip_grad=dict(max_norm=1.0))
|
optim_wrapper.update(clip_grad=dict(max_norm=1.0))
|
||||||
|
|
|
@ -4,18 +4,23 @@ from mmengine.config import read_base
|
||||||
|
|
||||||
from mmpretrain.datasets import (CenterCrop, LoadImageFromFile, PackInputs,
|
from mmpretrain.datasets import (CenterCrop, LoadImageFromFile, PackInputs,
|
||||||
RandomFlip, RandomResizedCrop, ResizeEdge)
|
RandomFlip, RandomResizedCrop, ResizeEdge)
|
||||||
|
from mmpretrain.models import CrossEntropyLoss
|
||||||
|
|
||||||
with read_base():
|
with read_base():
|
||||||
from .._base_.datasets.imagenet_bs64_pil_resize import *
|
from .._base_.datasets.imagenet_bs64_pil_resize import *
|
||||||
from .._base_.default_runtime import *
|
from .._base_.default_runtime import *
|
||||||
from .._base_.models.vit_large_p32 import *
|
from .._base_.models.vit_base_p16 import *
|
||||||
from .._base_.schedules.imagenet_bs4096_adamw import *
|
from .._base_.schedules.imagenet_bs4096_adamw import *
|
||||||
|
|
||||||
# model setting
|
# model setting
|
||||||
model.update(backbone=dict(img_size=384))
|
model.update(
|
||||||
|
backbone=dict(arch='l', img_size=384, patch_size=32),
|
||||||
|
head=dict(in_channels=1024, topk=(1, 5)))
|
||||||
|
|
||||||
|
model.head.loss = dict(type=CrossEntropyLoss, loss_weight=1.0)
|
||||||
|
|
||||||
# dataset setting
|
# dataset setting
|
||||||
data_preprocessor = dict(
|
data_preprocessor.update(
|
||||||
mean=[127.5, 127.5, 127.5],
|
mean=[127.5, 127.5, 127.5],
|
||||||
std=[127.5, 127.5, 127.5],
|
std=[127.5, 127.5, 127.5],
|
||||||
# convert image from BGR to RGB
|
# convert image from BGR to RGB
|
||||||
|
|
Loading…
Reference in New Issue