mirror of
https://github.com/open-mmlab/mmpretrain.git
synced 2025-06-03 14:59:18 +08:00
33 lines
826 B
Python
33 lines
826 B
Python
|
_base_ = ['./vit-large-p16_8xb128-coslr-50e_in1k.py']
|
||
|
|
||
|
# optimizer wrapper
|
||
|
optim_wrapper = dict(type='DeepSpeedOptimWrapper')
|
||
|
|
||
|
# training strategy
|
||
|
# Deepspeed with ZeRO3 + fp16
|
||
|
strategy = dict(
|
||
|
type='DeepSpeedStrategy',
|
||
|
fp16=dict(
|
||
|
enabled=True,
|
||
|
fp16_master_weights_and_grads=False,
|
||
|
loss_scale=0,
|
||
|
loss_scale_window=500,
|
||
|
hysteresis=2,
|
||
|
min_loss_scale=1,
|
||
|
initial_scale_power=15,
|
||
|
),
|
||
|
inputs_to_half=['inputs'],
|
||
|
zero_optimization=dict(
|
||
|
stage=3,
|
||
|
allgather_partitions=True,
|
||
|
reduce_scatter=True,
|
||
|
allgather_bucket_size=50000000,
|
||
|
reduce_bucket_size=50000000,
|
||
|
overlap_comm=True,
|
||
|
contiguous_gradients=True,
|
||
|
cpu_offload=False,
|
||
|
))
|
||
|
|
||
|
# runner which supports strategies
|
||
|
runner_type = 'FlexibleRunner'
|