mirror of
https://github.com/huggingface/pytorch-image-models.git
synced 2025-06-03 15:01:08 +08:00
Switching to timm specific weight instances for open_clip image encoders to facilitate hf-hub: use in timm and new transformers TimmWrapper
This commit is contained in:
parent
a02b1a8e79
commit
447147a25b
@ -916,53 +916,43 @@ default_cfgs = generate_default_cfgs({
|
||||
|
||||
# CLIP original image tower weights
|
||||
'convnext_base.clip_laion2b': _cfg(
|
||||
hf_hub_id='laion/CLIP-convnext_base_w-laion2B-s13B-b82K',
|
||||
hf_hub_filename='open_clip_pytorch_model.bin',
|
||||
hf_hub_id='timm/',
|
||||
mean=OPENAI_CLIP_MEAN, std=OPENAI_CLIP_STD,
|
||||
input_size=(3, 256, 256), pool_size=(8, 8), crop_pct=1.0, num_classes=640),
|
||||
'convnext_base.clip_laion2b_augreg': _cfg(
|
||||
hf_hub_id='laion/CLIP-convnext_base_w-laion2B-s13B-b82K-augreg',
|
||||
hf_hub_filename='open_clip_pytorch_model.bin',
|
||||
hf_hub_id='timm/',
|
||||
mean=OPENAI_CLIP_MEAN, std=OPENAI_CLIP_STD,
|
||||
input_size=(3, 256, 256), pool_size=(8, 8), crop_pct=1.0, num_classes=640),
|
||||
'convnext_base.clip_laiona': _cfg(
|
||||
hf_hub_id='laion/CLIP-convnext_base_w-laion_aesthetic-s13B-b82K',
|
||||
hf_hub_filename='open_clip_pytorch_model.bin',
|
||||
hf_hub_id='timm/',
|
||||
mean=OPENAI_CLIP_MEAN, std=OPENAI_CLIP_STD,
|
||||
input_size=(3, 256, 256), pool_size=(8, 8), crop_pct=1.0, num_classes=640),
|
||||
'convnext_base.clip_laiona_320': _cfg(
|
||||
hf_hub_id='laion/CLIP-convnext_base_w_320-laion_aesthetic-s13B-b82K',
|
||||
hf_hub_filename='open_clip_pytorch_model.bin',
|
||||
hf_hub_id='timm/',
|
||||
mean=OPENAI_CLIP_MEAN, std=OPENAI_CLIP_STD,
|
||||
input_size=(3, 320, 320), pool_size=(10, 10), crop_pct=1.0, num_classes=640),
|
||||
'convnext_base.clip_laiona_augreg_320': _cfg(
|
||||
hf_hub_id='laion/CLIP-convnext_base_w_320-laion_aesthetic-s13B-b82K-augreg',
|
||||
hf_hub_filename='open_clip_pytorch_model.bin',
|
||||
hf_hub_id='timm/',
|
||||
mean=OPENAI_CLIP_MEAN, std=OPENAI_CLIP_STD,
|
||||
input_size=(3, 320, 320), pool_size=(10, 10), crop_pct=1.0, num_classes=640),
|
||||
'convnext_large_mlp.clip_laion2b_augreg': _cfg(
|
||||
hf_hub_id='laion/CLIP-convnext_large_d.laion2B-s26B-b102K-augreg',
|
||||
hf_hub_filename='open_clip_pytorch_model.bin',
|
||||
hf_hub_id='timm/',
|
||||
mean=OPENAI_CLIP_MEAN, std=OPENAI_CLIP_STD,
|
||||
input_size=(3, 256, 256), pool_size=(8, 8), crop_pct=1.0, num_classes=768),
|
||||
'convnext_large_mlp.clip_laion2b_ft_320': _cfg(
|
||||
hf_hub_id='laion/CLIP-convnext_large_d_320.laion2B-s29B-b131K-ft',
|
||||
hf_hub_filename='open_clip_pytorch_model.bin',
|
||||
hf_hub_id='timm/',
|
||||
mean=OPENAI_CLIP_MEAN, std=OPENAI_CLIP_STD,
|
||||
input_size=(3, 320, 320), pool_size=(10, 10), crop_pct=1.0, num_classes=768),
|
||||
'convnext_large_mlp.clip_laion2b_ft_soup_320': _cfg(
|
||||
hf_hub_id='laion/CLIP-convnext_large_d_320.laion2B-s29B-b131K-ft-soup',
|
||||
hf_hub_filename='open_clip_pytorch_model.bin',
|
||||
hf_hub_id='timm/',
|
||||
mean=OPENAI_CLIP_MEAN, std=OPENAI_CLIP_STD,
|
||||
input_size=(3, 320, 320), pool_size=(10, 10), crop_pct=1.0, num_classes=768),
|
||||
'convnext_xxlarge.clip_laion2b_soup': _cfg(
|
||||
hf_hub_id='laion/CLIP-convnext_xxlarge-laion2B-s34B-b82K-augreg-soup',
|
||||
hf_hub_filename='open_clip_pytorch_model.bin',
|
||||
hf_hub_id='timm/',
|
||||
mean=OPENAI_CLIP_MEAN, std=OPENAI_CLIP_STD,
|
||||
input_size=(3, 256, 256), pool_size=(8, 8), crop_pct=1.0, num_classes=1024),
|
||||
'convnext_xxlarge.clip_laion2b_rewind': _cfg(
|
||||
hf_hub_id='laion/CLIP-convnext_xxlarge-laion2B-s34B-b82K-augreg-rewind',
|
||||
hf_hub_filename='open_clip_pytorch_model.bin',
|
||||
hf_hub_id='timm/',
|
||||
mean=OPENAI_CLIP_MEAN, std=OPENAI_CLIP_STD,
|
||||
input_size=(3, 256, 256), pool_size=(8, 8), crop_pct=1.0, num_classes=1024),
|
||||
|
||||
|
@ -1556,9 +1556,6 @@ default_cfgs = {
|
||||
hf_hub_id='timm/',
|
||||
mean=OPENAI_CLIP_MEAN, std=OPENAI_CLIP_STD, crop_pct=1.0),
|
||||
|
||||
'vit_base_patch32_clip_224.laion2b_ft_in12k': _cfg(
|
||||
#hf_hub_id='timm/vit_base_patch32_clip_224.laion2b_ft_in12k', # FIXME weight exists, need to push
|
||||
mean=OPENAI_CLIP_MEAN, std=OPENAI_CLIP_STD, num_classes=11821),
|
||||
'vit_base_patch16_clip_224.laion2b_ft_in12k': _cfg(
|
||||
hf_hub_id='timm/',
|
||||
mean=OPENAI_CLIP_MEAN, std=OPENAI_CLIP_STD, num_classes=11821),
|
||||
@ -1569,9 +1566,6 @@ default_cfgs = {
|
||||
hf_hub_id='timm/',
|
||||
mean=OPENAI_CLIP_MEAN, std=OPENAI_CLIP_STD, crop_pct=1.0, num_classes=11821),
|
||||
|
||||
'vit_base_patch32_clip_224.openai_ft_in12k': _cfg(
|
||||
# hf_hub_id='timm/vit_base_patch32_clip_224.openai_ft_in12k', # FIXME weight exists, need to push
|
||||
mean=OPENAI_CLIP_MEAN, std=OPENAI_CLIP_STD, num_classes=11821),
|
||||
'vit_base_patch16_clip_224.openai_ft_in12k': _cfg(
|
||||
hf_hub_id='timm/',
|
||||
mean=OPENAI_CLIP_MEAN, std=OPENAI_CLIP_STD, num_classes=11821),
|
||||
@ -1580,28 +1574,22 @@ default_cfgs = {
|
||||
mean=OPENAI_CLIP_MEAN, std=OPENAI_CLIP_STD, crop_pct=1.0, num_classes=11821),
|
||||
|
||||
'vit_base_patch32_clip_224.laion2b': _cfg(
|
||||
hf_hub_id='laion/CLIP-ViT-B-32-laion2B-s34B-b79K',
|
||||
hf_hub_filename='open_clip_pytorch_model.bin',
|
||||
hf_hub_id='timm/',
|
||||
mean=OPENAI_CLIP_MEAN, std=OPENAI_CLIP_STD, num_classes=512),
|
||||
'vit_base_patch16_clip_224.laion2b': _cfg(
|
||||
hf_hub_id='laion/CLIP-ViT-B-16-laion2B-s34B-b88K',
|
||||
hf_hub_filename='open_clip_pytorch_model.bin',
|
||||
hf_hub_id='timm/',
|
||||
mean=OPENAI_CLIP_MEAN, std=OPENAI_CLIP_STD, crop_pct=1.0, num_classes=512),
|
||||
'vit_large_patch14_clip_224.laion2b': _cfg(
|
||||
hf_hub_id='laion/CLIP-ViT-L-14-laion2B-s32B-b82K',
|
||||
hf_hub_filename='open_clip_pytorch_model.bin',
|
||||
hf_hub_id='timm/',
|
||||
mean=IMAGENET_INCEPTION_MEAN, std=IMAGENET_INCEPTION_STD, crop_pct=1.0, num_classes=768),
|
||||
'vit_huge_patch14_clip_224.laion2b': _cfg(
|
||||
hf_hub_id='laion/CLIP-ViT-H-14-laion2B-s32B-b79K',
|
||||
hf_hub_filename='open_clip_pytorch_model.bin',
|
||||
hf_hub_id='timm/',
|
||||
mean=OPENAI_CLIP_MEAN, std=OPENAI_CLIP_STD, crop_pct=1.0, num_classes=1024),
|
||||
'vit_giant_patch14_clip_224.laion2b': _cfg(
|
||||
hf_hub_id='laion/CLIP-ViT-g-14-laion2B-s12B-b42K',
|
||||
hf_hub_filename='open_clip_pytorch_model.bin',
|
||||
hf_hub_id='timm/',
|
||||
mean=OPENAI_CLIP_MEAN, std=OPENAI_CLIP_STD, crop_pct=1.0, num_classes=1024),
|
||||
'vit_gigantic_patch14_clip_224.laion2b': _cfg(
|
||||
hf_hub_id='laion/CLIP-ViT-bigG-14-laion2B-39B-b160k',
|
||||
hf_hub_filename='open_clip_pytorch_model.bin',
|
||||
hf_hub_id='timm/',
|
||||
mean=OPENAI_CLIP_MEAN, std=OPENAI_CLIP_STD, crop_pct=1.0, num_classes=1280),
|
||||
|
||||
'vit_base_patch32_clip_224.laion400m_e32': _cfg(
|
||||
@ -1620,21 +1608,17 @@ default_cfgs = {
|
||||
mean=OPENAI_CLIP_MEAN, std=OPENAI_CLIP_STD, crop_pct=1.0, num_classes=768),
|
||||
|
||||
'vit_base_patch32_clip_224.datacompxl': _cfg(
|
||||
hf_hub_id='laion/CLIP-ViT-B-32-DataComp.XL-s13B-b90K',
|
||||
hf_hub_filename='open_clip_pytorch_model.bin',
|
||||
hf_hub_id='timm/',
|
||||
mean=OPENAI_CLIP_MEAN, std=OPENAI_CLIP_STD, crop_pct=1.0, num_classes=512),
|
||||
'vit_base_patch32_clip_256.datacompxl': _cfg(
|
||||
hf_hub_id='laion/CLIP-ViT-B-32-256x256-DataComp-s34B-b86K',
|
||||
hf_hub_filename='open_clip_pytorch_model.bin',
|
||||
hf_hub_id='timm/',
|
||||
mean=OPENAI_CLIP_MEAN, std=OPENAI_CLIP_STD,
|
||||
crop_pct=1.0, input_size=(3, 256, 256), num_classes=512),
|
||||
'vit_base_patch16_clip_224.datacompxl': _cfg(
|
||||
hf_hub_id='laion/CLIP-ViT-B-16-DataComp.XL-s13B-b90K',
|
||||
hf_hub_filename='open_clip_pytorch_model.bin',
|
||||
hf_hub_id='timm/',
|
||||
mean=OPENAI_CLIP_MEAN, std=OPENAI_CLIP_STD, crop_pct=1.0, num_classes=512),
|
||||
'vit_large_patch14_clip_224.datacompxl': _cfg(
|
||||
hf_hub_id='laion/CLIP-ViT-L-14-DataComp.XL-s13B-b90K',
|
||||
hf_hub_filename='open_clip_pytorch_model.bin',
|
||||
hf_hub_id='timm/',
|
||||
mean=OPENAI_CLIP_MEAN, std=OPENAI_CLIP_STD, crop_pct=1.0, num_classes=768),
|
||||
|
||||
'vit_base_patch16_clip_224.dfn2b': _cfg(
|
||||
@ -1659,42 +1643,46 @@ default_cfgs = {
|
||||
crop_pct=1.0, input_size=(3, 378, 378), num_classes=1024),
|
||||
|
||||
'vit_base_patch32_clip_224.metaclip_2pt5b': _cfg(
|
||||
hf_hub_id='timm/', hf_hub_filename='open_clip_pytorch_model.bin',
|
||||
hf_hub_id='timm/',
|
||||
license='cc-by-nc-4.0',
|
||||
notes=('natively QuickGELU, use quickgelu model variant for original results',),
|
||||
mean=OPENAI_CLIP_MEAN, std=OPENAI_CLIP_STD, crop_pct=1.0, num_classes=512),
|
||||
'vit_base_patch16_clip_224.metaclip_2pt5b': _cfg(
|
||||
hf_hub_id='timm/', hf_hub_filename='open_clip_pytorch_model.bin',
|
||||
hf_hub_id='timm/',
|
||||
license='cc-by-nc-4.0',
|
||||
notes=('natively QuickGELU, use quickgelu model variant for original results',),
|
||||
mean=OPENAI_CLIP_MEAN, std=OPENAI_CLIP_STD, crop_pct=1.0, num_classes=512),
|
||||
'vit_large_patch14_clip_224.metaclip_2pt5b': _cfg(
|
||||
hf_hub_id='timm/', hf_hub_filename='open_clip_pytorch_model.bin',
|
||||
hf_hub_id='timm/',
|
||||
license='cc-by-nc-4.0',
|
||||
notes=('natively QuickGELU, use quickgelu model variant for original results',),
|
||||
mean=OPENAI_CLIP_MEAN, std=OPENAI_CLIP_STD, crop_pct=1.0, num_classes=768),
|
||||
'vit_huge_patch14_clip_224.metaclip_2pt5b': _cfg(
|
||||
hf_hub_id='timm/', hf_hub_filename='open_clip_pytorch_model.bin',
|
||||
hf_hub_id='timm/',
|
||||
license='cc-by-nc-4.0',
|
||||
notes=('natively QuickGELU, use quickgelu model variant for original results',),
|
||||
mean=OPENAI_CLIP_MEAN, std=OPENAI_CLIP_STD, crop_pct=1.0, num_classes=1024),
|
||||
'vit_huge_patch14_clip_224.metaclip_altogether': _cfg(
|
||||
hf_hub_id='timm/',
|
||||
license='cc-by-nc-4.0',
|
||||
mean=OPENAI_CLIP_MEAN, std=OPENAI_CLIP_STD, crop_pct=1.0, num_classes=1024),
|
||||
'vit_gigantic_patch14_clip_224.metaclip_2pt5b': _cfg(
|
||||
hf_hub_id='timm/', hf_hub_filename='open_clip_pytorch_model.bin',
|
||||
hf_hub_id='timm/',
|
||||
license='cc-by-nc-4.0',
|
||||
notes=('natively QuickGELU, use quickgelu model variant for original results',),
|
||||
mean=OPENAI_CLIP_MEAN, std=OPENAI_CLIP_STD, crop_pct=1.0, num_classes=1280),
|
||||
'vit_base_patch32_clip_224.metaclip_400m': _cfg(
|
||||
hf_hub_id='timm/', hf_hub_filename='open_clip_pytorch_model.bin',
|
||||
hf_hub_id='timm/',
|
||||
license='cc-by-nc-4.0',
|
||||
notes=('natively QuickGELU, use quickgelu model variant for original results',),
|
||||
mean=OPENAI_CLIP_MEAN, std=OPENAI_CLIP_STD, crop_pct=1.0, num_classes=512),
|
||||
'vit_base_patch16_clip_224.metaclip_400m': _cfg(
|
||||
hf_hub_id='timm/', hf_hub_filename='open_clip_pytorch_model.bin',
|
||||
hf_hub_id='timm/',
|
||||
license='cc-by-nc-4.0',
|
||||
notes=('natively QuickGELU, use quickgelu model variant for original results',),
|
||||
mean=OPENAI_CLIP_MEAN, std=OPENAI_CLIP_STD, crop_pct=1.0, num_classes=512),
|
||||
'vit_large_patch14_clip_224.metaclip_400m': _cfg(
|
||||
hf_hub_id='timm/', hf_hub_filename='open_clip_pytorch_model.bin',
|
||||
hf_hub_id='timm/',
|
||||
license='cc-by-nc-4.0',
|
||||
notes=('natively QuickGELU, use quickgelu model variant for original results',),
|
||||
mean=OPENAI_CLIP_MEAN, std=OPENAI_CLIP_STD, crop_pct=1.0, num_classes=768),
|
||||
|
Loading…
x
Reference in New Issue
Block a user