mmpretrain/configs/_base_/models/vit-large-p32.py

25 lines
593 B
Python

# model settings
model = dict(
type='ImageClassifier',
backbone=dict(
type='VisionTransformer',
arch='l',
img_size=224,
patch_size=32,
drop_rate=0.1,
init_cfg=[
dict(
type='Kaiming',
layer='Conv2d',
mode='fan_in',
nonlinearity='linear')
]),
neck=None,
head=dict(
type='VisionTransformerClsHead',
num_classes=1000,
in_channels=1024,
loss=dict(type='CrossEntropyLoss', loss_weight=1.0),
topk=(1, 5),
))