diff --git a/ppcls/arch/backbone/model_zoo/foundation_vit.py b/ppcls/arch/backbone/model_zoo/foundation_vit.py index 12cc699e5..588020fe0 100644 --- a/ppcls/arch/backbone/model_zoo/foundation_vit.py +++ b/ppcls/arch/backbone/model_zoo/foundation_vit.py @@ -567,7 +567,7 @@ class VisionTransformer(nn.Layer): drop_path_rate=0., norm_layer='nn.LayerNorm', epsilon=1e-5, - head_init_scale=1, + head_init_scale=0.001, **kwargs): super().__init__() global _model_diff diff --git a/ppcls/configs/CLIP/CLIP_vit_base_patch16_224_finetune.yaml b/ppcls/configs/CLIP/CLIP_vit_base_patch16_224_finetune.yaml index c7e6e0de8..e2f6f55d6 100644 --- a/ppcls/configs/CLIP/CLIP_vit_base_patch16_224_finetune.yaml +++ b/ppcls/configs/CLIP/CLIP_vit_base_patch16_224_finetune.yaml @@ -27,7 +27,6 @@ Arch: class_num: 1000 return_embed: False pretrained: True - head_init_scale: 0.001 # loss function config for traing/eval process Loss: diff --git a/ppcls/configs/CLIP/CLIP_vit_large_patch14_224_finetune.yaml b/ppcls/configs/CLIP/CLIP_vit_large_patch14_224_finetune.yaml index 1380587be..e17ae8184 100644 --- a/ppcls/configs/CLIP/CLIP_vit_large_patch14_224_finetune.yaml +++ b/ppcls/configs/CLIP/CLIP_vit_large_patch14_224_finetune.yaml @@ -27,7 +27,6 @@ Arch: class_num: 1000 return_embed: False pretrained: True - head_init_scale: 0.001 # loss function config for traing/eval process Loss: