# model settings model = dict( type='ImageClassifier', backbone=dict( type='VisionTransformer', arch='l', img_size=224, patch_size=32, drop_rate=0.1, init_cfg=[ dict( type='Kaiming', layer='Conv2d', mode='fan_in', nonlinearity='linear') ]), neck=None, head=dict( type='VisionTransformerClsHead', num_classes=1000, in_channels=1024, loss=dict(type='CrossEntropyLoss', loss_weight=1.0), topk=(1, 5), ))