diff --git a/timm/models/vision_transformer.py b/timm/models/vision_transformer.py index 4a14afba..3f08b98f 100644 --- a/timm/models/vision_transformer.py +++ b/timm/models/vision_transformer.py @@ -1473,18 +1473,22 @@ default_cfgs = generate_default_cfgs({ 'vit_base_patch32_clip_224.metaclip_2pt5b': _cfg( hf_hub_id='facebook/metaclip-b32-fullcc2.5b', hf_hub_filename='metaclip_b32_fullcc2.5b.bin', + license='cc-by-nc-4.0', mean=OPENAI_CLIP_MEAN, std=OPENAI_CLIP_STD, crop_pct=1.0, num_classes=512), 'vit_base_patch16_clip_224.metaclip_2pt5b': _cfg( hf_hub_id='facebook/metaclip-b16-fullcc2.5b', hf_hub_filename='metaclip_b16_fullcc2.5b.bin', + license='cc-by-nc-4.0', mean=OPENAI_CLIP_MEAN, std=OPENAI_CLIP_STD, crop_pct=1.0, num_classes=512), 'vit_large_patch14_clip_224.metaclip_2pt5b': _cfg( hf_hub_id='facebook/metaclip-l14-fullcc2.5b', hf_hub_filename='metaclip_l14_fullcc2.5b.bin', + license='cc-by-nc-4.0', mean=OPENAI_CLIP_MEAN, std=OPENAI_CLIP_STD, crop_pct=1.0, num_classes=768), 'vit_huge_patch14_clip_224.metaclip_2pt5b': _cfg( hf_hub_id='facebook/metaclip-h14-fullcc2.5b', hf_hub_filename='metaclip_h14_fullcc2.5b.bin', + license='cc-by-nc-4.0', mean=OPENAI_CLIP_MEAN, std=OPENAI_CLIP_STD, crop_pct=1.0, num_classes=1024), 'vit_base_patch32_clip_224.openai': _cfg( @@ -2129,7 +2133,8 @@ def vit_base_patch32_clip_quickgelu_224(pretrained=False, **kwargs) -> VisionTra patch_size=32, embed_dim=768, depth=12, num_heads=12, pre_norm=True, norm_layer=nn.LayerNorm, act_layer='quick_gelu') model = _create_vision_transformer( - 'vit_base_patch32_clip_224', pretrained=pretrained, **dict(model_args, **kwargs)) + 'vit_base_patch32_clip_224', # map to non quickgelu pretrained_cfg intentionally + pretrained=pretrained, **dict(model_args, **kwargs)) return model @@ -2141,7 +2146,8 @@ def vit_base_patch16_clip_quickgelu_224(pretrained=False, **kwargs) -> VisionTra patch_size=16, embed_dim=768, depth=12, num_heads=12, pre_norm=True, norm_layer=nn.LayerNorm, act_layer='quick_gelu') model = _create_vision_transformer( - 'vit_base_patch16_clip_224', pretrained=pretrained, **dict(model_args, **kwargs)) + 'vit_base_patch16_clip_224', # map to non quickgelu pretrained_cfg intentionally + pretrained=pretrained, **dict(model_args, **kwargs)) return model @@ -2154,7 +2160,8 @@ def vit_large_patch14_clip_quickgelu_224(pretrained=False, **kwargs) -> VisionTr patch_size=14, embed_dim=1024, depth=24, num_heads=16, pre_norm=True, norm_layer=nn.LayerNorm, act_layer='quick_gelu') model = _create_vision_transformer( - 'vit_large_patch14_clip_224', pretrained=pretrained, **dict(model_args, **kwargs)) + 'vit_large_patch14_clip_224', # map to non quickgelu pretrained_cfg intentionally + pretrained=pretrained, **dict(model_args, **kwargs)) return model @@ -2166,7 +2173,8 @@ def vit_large_patch14_clip_quickgelu_336(pretrained=False, **kwargs) -> VisionTr patch_size=14, embed_dim=1024, depth=24, num_heads=16, pre_norm=True, norm_layer=nn.LayerNorm, act_layer='quick_gelu') model = _create_vision_transformer( - 'vit_large_patch14_clip_336', pretrained=pretrained, **dict(model_args, **kwargs)) + 'vit_large_patch14_clip_336', # map to non quickgelu pretrained_cfg intentionally + pretrained=pretrained, **dict(model_args, **kwargs)) return model @@ -2178,7 +2186,8 @@ def vit_huge_patch14_clip_quickgelu_224(pretrained=False, **kwargs) -> VisionTra patch_size=14, embed_dim=1280, depth=32, num_heads=16, pre_norm=True, norm_layer=nn.LayerNorm, act_layer='quick_gelu') model = _create_vision_transformer( - 'vit_huge_patch14_clip_224', pretrained=pretrained, **dict(model_args, **kwargs)) + 'vit_huge_patch14_clip_224', # map to non quickgelu pretrained_cfg intentionally + pretrained=pretrained, **dict(model_args, **kwargs)) return model @@ -2190,7 +2199,8 @@ def vit_huge_patch14_clip_quickgelu_378(pretrained=False, **kwargs) -> VisionTra patch_size=14, embed_dim=1280, depth=32, num_heads=16, pre_norm=True, norm_layer=nn.LayerNorm, act_layer='quick_gelu') model = _create_vision_transformer( - 'vit_huge_patch14_clip_378', pretrained=pretrained, **dict(model_args, **kwargs)) + 'vit_huge_patch14_clip_378', # map to non quickgelu pretrained_cfg intentionally + pretrained=pretrained, **dict(model_args, **kwargs)) return model