_base_ = [ '../_base_/datasets/imagenet_bs128_riformer_medium_384.py', '../_base_/schedules/imagenet_bs1024_adamw_swin.py', '../_base_/default_runtime.py', ] # Model settings model = dict( type='ImageClassifier', backbone=dict( type='RIFormer', arch='m48', drop_path_rate=0.1, init_cfg=[ dict( type='TruncNormal', layer=['Conv2d', 'Linear'], std=.02, bias=0.), dict(type='Constant', layer=['GroupNorm'], val=1., bias=0.), ]), neck=dict(type='GlobalAveragePooling'), head=dict( type='LinearClsHead', num_classes=1000, in_channels=768, loss=dict(type='CrossEntropyLoss', loss_weight=1.0), )) # schedule settings optim_wrapper = dict( optimizer=dict(lr=4e-3), clip_grad=dict(max_norm=5.0), ) # NOTE: `auto_scale_lr` is for automatically scaling LR # based on the actual training batch size. # base_batch_size = (32 GPUs) x (128 samples per GPU) auto_scale_lr = dict(base_batch_size=4096)