Merge pull request #1823 from leng-yue/fix-layer-scale
[Fix] Update dinov2 layerscale init valuespull/1837/head
commit
cd950e6583
|
@ -1982,7 +1982,7 @@ def vit_small_patch14_dinov2(pretrained=False, **kwargs) -> VisionTransformer:
|
|||
""" ViT-S/14 for DINOv2
|
||||
"""
|
||||
model_args = dict(
|
||||
patch_size=14, embed_dim=384, depth=12, num_heads=6, init_values=1.0, img_size=518,
|
||||
patch_size=14, embed_dim=384, depth=12, num_heads=6, init_values=1e-5, img_size=518,
|
||||
)
|
||||
model = _create_vision_transformer(
|
||||
'vit_small_patch14_dinov2', pretrained=pretrained, **dict(model_args, **kwargs))
|
||||
|
@ -1994,7 +1994,7 @@ def vit_base_patch14_dinov2(pretrained=False, **kwargs) -> VisionTransformer:
|
|||
""" ViT-B/14 for DINOv2
|
||||
"""
|
||||
model_args = dict(
|
||||
patch_size=14, embed_dim=768, depth=12, num_heads=12, init_values=1.0, img_size=518,
|
||||
patch_size=14, embed_dim=768, depth=12, num_heads=12, init_values=1e-5, img_size=518,
|
||||
)
|
||||
model = _create_vision_transformer(
|
||||
'vit_base_patch14_dinov2', pretrained=pretrained, **dict(model_args, **kwargs))
|
||||
|
@ -2006,7 +2006,7 @@ def vit_large_patch14_dinov2(pretrained=False, **kwargs) -> VisionTransformer:
|
|||
""" ViT-L/14 for DINOv2
|
||||
"""
|
||||
model_args = dict(
|
||||
patch_size=14, embed_dim=1024, depth=24, num_heads=16, init_values=1.0, img_size=518,
|
||||
patch_size=14, embed_dim=1024, depth=24, num_heads=16, init_values=1e-5, img_size=518,
|
||||
)
|
||||
model = _create_vision_transformer(
|
||||
'vit_large_patch14_dinov2', pretrained=pretrained, **dict(model_args, **kwargs))
|
||||
|
@ -2024,7 +2024,7 @@ def vit_giant_patch14_dinov2(pretrained=False, **kwargs) -> VisionTransformer:
|
|||
# With SwiGLUPacked, we need to set hidden_features = 2 * 4096 = 8192
|
||||
|
||||
model_args = dict(
|
||||
patch_size=14, embed_dim=1536, depth=40, num_heads=24, init_values=1.0,
|
||||
patch_size=14, embed_dim=1536, depth=40, num_heads=24, init_values=1e-5,
|
||||
mlp_ratio=2.66667 * 2, mlp_layer=SwiGLUPacked, img_size=518, act_layer=nn.SiLU
|
||||
)
|
||||
model = _create_vision_transformer(
|
||||
|
|
Loading…
Reference in New Issue