32 lines
795 B
Python
32 lines
795 B
Python
_base_ = ['./vit-huge-p14_8xb128-coslr-50e_in1k.py']
|
|
|
|
# optimizer wrapper
|
|
optim_wrapper = dict(type='DeepSpeedOptimWrapper')
|
|
|
|
# training strategy
|
|
strategy = dict(
|
|
type='DeepSpeedStrategy',
|
|
fp16=dict(
|
|
enabled=True,
|
|
fp16_master_weights_and_grads=False,
|
|
loss_scale=0,
|
|
loss_scale_window=500,
|
|
hysteresis=2,
|
|
min_loss_scale=1,
|
|
initial_scale_power=15,
|
|
),
|
|
inputs_to_half=['inputs'],
|
|
zero_optimization=dict(
|
|
stage=1,
|
|
allgather_partitions=True,
|
|
reduce_scatter=True,
|
|
allgather_bucket_size=50000000,
|
|
reduce_bucket_size=50000000,
|
|
overlap_comm=True,
|
|
contiguous_gradients=True,
|
|
cpu_offload=False,
|
|
))
|
|
|
|
# runner which supports strategies
|
|
runner_type = 'FlexibleRunner'
|