Added B/8 models to ViT.
parent
65419f60cc
commit
5220711d87
|
@ -88,6 +88,9 @@ default_cfgs = {
|
|||
url='https://storage.googleapis.com/vit_models/augreg/'
|
||||
'B_16-i21k-300ep-lr_0.001-aug_medium1-wd_0.1-do_0.0-sd_0.0--imagenet2012-steps_20k-lr_0.01-res_384.npz',
|
||||
input_size=(3, 384, 384), crop_pct=1.0),
|
||||
'vit_base_patch8_224': _cfg(
|
||||
url='https://storage.googleapis.com/vit_models/augreg/'
|
||||
'B_8-i21k-300ep-lr_0.001-aug_medium1-wd_0.1-do_0.0-sd_0.0--imagenet2012-steps_20k-lr_0.01-res_224.npz'),
|
||||
'vit_large_patch32_224': _cfg(
|
||||
url='', # no official model weights for this combo, only for in21k
|
||||
),
|
||||
|
@ -118,6 +121,9 @@ default_cfgs = {
|
|||
'vit_base_patch16_224_in21k': _cfg(
|
||||
url='https://storage.googleapis.com/vit_models/augreg/B_16-i21k-300ep-lr_0.001-aug_medium1-wd_0.1-do_0.0-sd_0.0.npz',
|
||||
num_classes=21843),
|
||||
'vit_base_patch8_224_in21k': _cfg(
|
||||
url='https://storage.googleapis.com/vit_models/augreg/B_8-i21k-300ep-lr_0.001-aug_medium1-wd_0.1-do_0.0-sd_0.0.npz',
|
||||
num_classes=21843),
|
||||
'vit_large_patch32_224_in21k': _cfg(
|
||||
url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-vitjx/jx_vit_large_patch32_224_in21k-9046d2e7.pth',
|
||||
num_classes=21843),
|
||||
|
@ -640,6 +646,16 @@ def vit_base_patch16_384(pretrained=False, **kwargs):
|
|||
return model
|
||||
|
||||
|
||||
@register_model
|
||||
def vit_base_patch8_224(pretrained=False, **kwargs):
|
||||
""" ViT-Base (ViT-B/8) from original paper (https://arxiv.org/abs/2010.11929).
|
||||
ImageNet-1k weights fine-tuned from in21k @ 224x224, source https://github.com/google-research/vision_transformer.
|
||||
"""
|
||||
model_kwargs = dict(patch_size=8, embed_dim=768, depth=12, num_heads=12, **kwargs)
|
||||
model = _create_vision_transformer('vit_base_patch8_224', pretrained=pretrained, **model_kwargs)
|
||||
return model
|
||||
|
||||
|
||||
@register_model
|
||||
def vit_large_patch32_224(pretrained=False, **kwargs):
|
||||
""" ViT-Large model (ViT-L/32) from original paper (https://arxiv.org/abs/2010.11929). No pretrained weights.
|
||||
|
@ -756,6 +772,18 @@ def vit_base_patch16_224_in21k(pretrained=False, **kwargs):
|
|||
return model
|
||||
|
||||
|
||||
@register_model
|
||||
def vit_base_patch8_224_in21k(pretrained=False, **kwargs):
|
||||
""" ViT-Base model (ViT-B/8) from original paper (https://arxiv.org/abs/2010.11929).
|
||||
ImageNet-21k weights @ 224x224, source https://github.com/google-research/vision_transformer.
|
||||
NOTE: this model has valid 21k classifier head and no representation (pre-logits) layer
|
||||
"""
|
||||
model_kwargs = dict(
|
||||
patch_size=8, embed_dim=768, depth=12, num_heads=12, **kwargs)
|
||||
model = _create_vision_transformer('vit_base_patch8_224_in21k', pretrained=pretrained, **model_kwargs)
|
||||
return model
|
||||
|
||||
|
||||
@register_model
|
||||
def vit_large_patch32_224_in21k(pretrained=False, **kwargs):
|
||||
""" ViT-Large model (ViT-L/32) from original paper (https://arxiv.org/abs/2010.11929).
|
||||
|
|
Loading…
Reference in New Issue