Add ImageNet-12k intermediate fine-tunes of convnext base & large CLIP models, add first 1k fine-tune of xxlarge

pull/1760/head
Ross Wightman 2023-03-31 16:45:01 -07:00
parent 9aa1133bd2
commit beef7f0a22
1 changed files with 38 additions and 1 deletions

View File

@ -772,6 +772,23 @@ default_cfgs = generate_default_cfgs({
'convnextv2_small.untrained': _cfg(),
# CLIP weights, fine-tuned on in1k or in12k + in1k
'convnext_base.clip_laion2b_augreg_ft_in12k_in1k': _cfg(
hf_hub_id='timm/',
mean=OPENAI_CLIP_MEAN, std=OPENAI_CLIP_STD,
input_size=(3, 256, 256), pool_size=(8, 8), crop_pct=1.0),
'convnext_base.clip_laion2b_augreg_ft_in12k_in1k_384': _cfg(
hf_hub_id='timm/',
mean=OPENAI_CLIP_MEAN, std=OPENAI_CLIP_STD,
input_size=(3, 384, 384), pool_size=(12, 12), crop_pct=1.0, crop_mode='squash'),
'convnext_large_mlp.clip_laion2b_soup_ft_in12k_in1k_320': _cfg(
hf_hub_id='timm/',
mean=OPENAI_CLIP_MEAN, std=OPENAI_CLIP_STD,
input_size=(3, 320, 320), pool_size=(10, 10), crop_pct=1.0),
'convnext_large_mlp.clip_laion2b_soup_ft_in12k_in1k_384': _cfg(
hf_hub_id='timm/',
mean=OPENAI_CLIP_MEAN, std=OPENAI_CLIP_STD,
input_size=(3, 384, 384), pool_size=(12, 12), crop_pct=1.0, crop_mode='squash'),
'convnext_base.clip_laion2b_augreg_ft_in1k': _cfg(
hf_hub_id='timm/',
mean=OPENAI_CLIP_MEAN, std=OPENAI_CLIP_STD,
@ -790,9 +807,29 @@ default_cfgs = generate_default_cfgs({
mean=OPENAI_CLIP_MEAN, std=OPENAI_CLIP_STD,
input_size=(3, 384, 384), pool_size=(12, 12), crop_pct=1.0, crop_mode='squash'
),
'convnext_xxlarge.clip_laion2b_soup_ft_in1k': _cfg(
hf_hub_id='timm/',
mean=OPENAI_CLIP_MEAN, std=OPENAI_CLIP_STD,
input_size=(3, 256, 256), pool_size=(8, 8), crop_pct=1.0),
'convnext_base.clip_laion2b_augreg_ft_in12k': _cfg(
hf_hub_id='timm/',
mean=OPENAI_CLIP_MEAN, std=OPENAI_CLIP_STD, num_classes=11821,
input_size=(3, 256, 256), pool_size=(8, 8), crop_pct=1.0),
'convnext_large_mlp.clip_laion2b_soup_ft_in12k_320': _cfg(
hf_hub_id='timm/',
mean=OPENAI_CLIP_MEAN, std=OPENAI_CLIP_STD, num_classes=11821,
input_size=(3, 320, 320), pool_size=(10, 10), crop_pct=1.0),
'convnext_large_mlp.clip_laion2b_augreg_ft_in12k_384': _cfg(
hf_hub_id='timm/',
mean=OPENAI_CLIP_MEAN, std=OPENAI_CLIP_STD, num_classes=11821,
input_size=(3, 384, 384), pool_size=(12, 12), crop_pct=1.0, crop_mode='squash'),
'convnext_large_mlp.clip_laion2b_soup_ft_in12k_384': _cfg(
hf_hub_id='timm/',
mean=OPENAI_CLIP_MEAN, std=OPENAI_CLIP_STD, num_classes=11821,
input_size=(3, 384, 384), pool_size=(12, 12), crop_pct=1.0, crop_mode='squash'),
# CLIP based weights, original image tower weights and fine-tunes
# CLIP original image tower weights
'convnext_base.clip_laion2b': _cfg(
hf_hub_id='laion/CLIP-convnext_base_w-laion2B-s13B-b82K',
hf_hub_filename='open_clip_pytorch_model.bin',