_base_ = [ '../_base_/datasets/imagenet_bs32_simclr.py', '../_base_/schedules/imagenet_lars_coslr_200e.py', '../_base_/default_runtime.py', ] # model settings model = dict( type='SimCLR', backbone=dict( type='ResNet', depth=50, norm_cfg=dict(type='SyncBN'), zero_init_residual=True), neck=dict( type='NonLinearNeck', # SimCLR non-linear neck in_channels=2048, hid_channels=2048, out_channels=128, num_layers=2, with_avg_pool=True), head=dict( type='ContrastiveHead', loss=dict(type='CrossEntropyLoss'), temperature=0.1), ) # optimizer optim_wrapper = dict( type='OptimWrapper', optimizer=dict(type='LARS', lr=4.8, momentum=0.9, weight_decay=1e-6), paramwise_cfg=dict( custom_keys={ 'bn': dict(decay_mult=0, lars_exclude=True), 'bias': dict(decay_mult=0, lars_exclude=True), # bn layer in ResNet block downsample module 'downsample.1': dict(decay_mult=0, lars_exclude=True), })) # learning rate scheduler param_scheduler = [ dict( type='LinearLR', start_factor=1e-4, by_epoch=True, begin=0, end=10, convert_to_iter_based=True), dict( type='CosineAnnealingLR', T_max=790, by_epoch=True, begin=10, end=800) ] # runtime settings train_cfg = dict(max_epochs=800) default_hooks = dict( # only keeps the latest 3 checkpoints checkpoint=dict(type='CheckpointHook', interval=10, max_keep_ckpts=3))