_base_ = './deit-small_pt-4xb256_in1k.py' # model settings model = dict( backbone=dict(type='DistilledVisionTransformer', arch='deit-base'), head=dict(type='DeiTClsHead', in_channels=768), ) # dataset settings train_dataloader = dict(batch_size=64) # NOTE: `auto_scale_lr` is for automatically scaling LR, # USER SHOULD NOT CHANGE ITS VALUES. # base_batch_size = (16 GPUs) x (64 samples per GPU) auto_scale_lr = dict(base_batch_size=1024)