_base_ = ['./vit-large-p16_8xb128-coslr-50e_in1k.py'] # optimizer wrapper optim_wrapper = dict(type='DeepSpeedOptimWrapper') # training strategy strategy = dict( type='DeepSpeedStrategy', fp16=dict( enabled=True, fp16_master_weights_and_grads=False, loss_scale=0, loss_scale_window=500, hysteresis=2, min_loss_scale=1, initial_scale_power=15, ), inputs_to_half=['inputs'], zero_optimization=dict( stage=1, allgather_partitions=True, reduce_scatter=True, allgather_bucket_size=50000000, reduce_bucket_size=50000000, overlap_comm=True, contiguous_gradients=True, cpu_offload=False, )) # runner which supports strategies runner_type = 'FlexibleRunner'