mirror of
https://github.com/huggingface/pytorch-image-models.git
synced 2025-06-03 15:01:08 +08:00
MaxVit, ViT, ConvNeXt, and EfficientNet-v2 updates
* Add support for TF weights and modelling specifics to MaxVit (testing ported weights) * More fine-tuned CLIP ViT configs * ConvNeXt and MaxVit updated to new pretrained cfgs use * EfficientNetV2, MaxVit and ConvNeXt high res models use squash crop/resize
This commit is contained in:
parent
3db4e346e0
commit
4d5c395160
@ -21,111 +21,13 @@ from timm.data import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD
|
||||
from .helpers import named_apply, build_model_with_cfg, checkpoint_seq
|
||||
from .layers import trunc_normal_, SelectAdaptivePool2d, DropPath, ConvMlp, Mlp, LayerNorm2d, LayerNorm, \
|
||||
create_conv2d, get_act_layer, make_divisible, to_ntuple
|
||||
from ._pretrained import generate_defaults
|
||||
from .registry import register_model
|
||||
|
||||
|
||||
__all__ = ['ConvNeXt'] # model_registry will add each entrypoint fn to this
|
||||
|
||||
|
||||
def _cfg(url='', **kwargs):
|
||||
return {
|
||||
'url': url,
|
||||
'num_classes': 1000, 'input_size': (3, 224, 224), 'pool_size': (7, 7),
|
||||
'crop_pct': 0.875, 'interpolation': 'bicubic',
|
||||
'mean': IMAGENET_DEFAULT_MEAN, 'std': IMAGENET_DEFAULT_STD,
|
||||
'first_conv': 'stem.0', 'classifier': 'head.fc',
|
||||
**kwargs
|
||||
}
|
||||
|
||||
|
||||
default_cfgs = dict(
|
||||
# timm specific variants
|
||||
convnext_atto=_cfg(
|
||||
url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-rsb-weights/convnext_atto_d2-01bb0f51.pth',
|
||||
test_input_size=(3, 288, 288), test_crop_pct=0.95),
|
||||
convnext_atto_ols=_cfg(
|
||||
url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-rsb-weights/convnext_atto_ols_a2-78d1c8f3.pth',
|
||||
test_input_size=(3, 288, 288), test_crop_pct=0.95),
|
||||
convnext_femto=_cfg(
|
||||
url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-rsb-weights/convnext_femto_d1-d71d5b4c.pth',
|
||||
test_input_size=(3, 288, 288), test_crop_pct=0.95),
|
||||
convnext_femto_ols=_cfg(
|
||||
url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-rsb-weights/convnext_femto_ols_d1-246bf2ed.pth',
|
||||
test_input_size=(3, 288, 288), test_crop_pct=0.95),
|
||||
convnext_pico=_cfg(
|
||||
url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-rsb-weights/convnext_pico_d1-10ad7f0d.pth',
|
||||
test_input_size=(3, 288, 288), test_crop_pct=0.95),
|
||||
convnext_pico_ols=_cfg(
|
||||
url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-rsb-weights/convnext_pico_ols_d1-611f0ca7.pth',
|
||||
crop_pct=0.95, test_input_size=(3, 288, 288), test_crop_pct=1.0),
|
||||
convnext_nano=_cfg(
|
||||
url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-rsb-weights/convnext_nano_d1h-7eb4bdea.pth',
|
||||
crop_pct=0.95, test_input_size=(3, 288, 288), test_crop_pct=1.0),
|
||||
convnext_nano_ols=_cfg(
|
||||
url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-rsb-weights/convnext_nano_ols_d1h-ae424a9a.pth',
|
||||
crop_pct=0.95, test_input_size=(3, 288, 288), test_crop_pct=1.0),
|
||||
convnext_tiny_hnf=_cfg(
|
||||
url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-rsb-weights/convnext_tiny_hnf_a2h-ab7e9df2.pth',
|
||||
crop_pct=0.95, test_input_size=(3, 288, 288), test_crop_pct=1.0),
|
||||
|
||||
convnext_tiny=_cfg(
|
||||
url="https://dl.fbaipublicfiles.com/convnext/convnext_tiny_1k_224_ema.pth",
|
||||
test_input_size=(3, 288, 288), test_crop_pct=1.0),
|
||||
convnext_small=_cfg(
|
||||
url="https://dl.fbaipublicfiles.com/convnext/convnext_small_1k_224_ema.pth",
|
||||
test_input_size=(3, 288, 288), test_crop_pct=1.0),
|
||||
convnext_base=_cfg(
|
||||
url="https://dl.fbaipublicfiles.com/convnext/convnext_base_1k_224_ema.pth",
|
||||
test_input_size=(3, 288, 288), test_crop_pct=1.0),
|
||||
convnext_large=_cfg(
|
||||
url="https://dl.fbaipublicfiles.com/convnext/convnext_large_1k_224_ema.pth",
|
||||
test_input_size=(3, 288, 288), test_crop_pct=1.0),
|
||||
|
||||
convnext_tiny_in22ft1k=_cfg(
|
||||
url='https://dl.fbaipublicfiles.com/convnext/convnext_tiny_22k_1k_224.pth',
|
||||
test_input_size=(3, 288, 288), test_crop_pct=1.0),
|
||||
convnext_small_in22ft1k=_cfg(
|
||||
url='https://dl.fbaipublicfiles.com/convnext/convnext_small_22k_1k_224.pth',
|
||||
test_input_size=(3, 288, 288), test_crop_pct=1.0),
|
||||
convnext_base_in22ft1k=_cfg(
|
||||
url='https://dl.fbaipublicfiles.com/convnext/convnext_base_22k_1k_224.pth',
|
||||
test_input_size=(3, 288, 288), test_crop_pct=1.0),
|
||||
convnext_large_in22ft1k=_cfg(
|
||||
url='https://dl.fbaipublicfiles.com/convnext/convnext_large_22k_1k_224.pth',
|
||||
test_input_size=(3, 288, 288), test_crop_pct=1.0),
|
||||
convnext_xlarge_in22ft1k=_cfg(
|
||||
url='https://dl.fbaipublicfiles.com/convnext/convnext_xlarge_22k_1k_224_ema.pth',
|
||||
test_input_size=(3, 288, 288), test_crop_pct=1.0),
|
||||
|
||||
convnext_tiny_384_in22ft1k=_cfg(
|
||||
url='https://dl.fbaipublicfiles.com/convnext/convnext_tiny_22k_1k_384.pth',
|
||||
input_size=(3, 384, 384), pool_size=(12, 12), crop_pct=1.0),
|
||||
convnext_small_384_in22ft1k=_cfg(
|
||||
url='https://dl.fbaipublicfiles.com/convnext/convnext_small_22k_1k_384.pth',
|
||||
input_size=(3, 384, 384), pool_size=(12, 12), crop_pct=1.0),
|
||||
convnext_base_384_in22ft1k=_cfg(
|
||||
url='https://dl.fbaipublicfiles.com/convnext/convnext_base_22k_1k_384.pth',
|
||||
input_size=(3, 384, 384), pool_size=(12, 12), crop_pct=1.0),
|
||||
convnext_large_384_in22ft1k=_cfg(
|
||||
url='https://dl.fbaipublicfiles.com/convnext/convnext_large_22k_1k_384.pth',
|
||||
input_size=(3, 384, 384), pool_size=(12, 12), crop_pct=1.0),
|
||||
convnext_xlarge_384_in22ft1k=_cfg(
|
||||
url='https://dl.fbaipublicfiles.com/convnext/convnext_xlarge_22k_1k_384_ema.pth',
|
||||
input_size=(3, 384, 384), pool_size=(12, 12), crop_pct=1.0),
|
||||
|
||||
convnext_tiny_in22k=_cfg(
|
||||
url="https://dl.fbaipublicfiles.com/convnext/convnext_tiny_22k_224.pth", num_classes=21841),
|
||||
convnext_small_in22k=_cfg(
|
||||
url="https://dl.fbaipublicfiles.com/convnext/convnext_small_22k_224.pth", num_classes=21841),
|
||||
convnext_base_in22k=_cfg(
|
||||
url="https://dl.fbaipublicfiles.com/convnext/convnext_base_22k_224.pth", num_classes=21841),
|
||||
convnext_large_in22k=_cfg(
|
||||
url="https://dl.fbaipublicfiles.com/convnext/convnext_large_22k_224.pth", num_classes=21841),
|
||||
convnext_xlarge_in22k=_cfg(
|
||||
url="https://dl.fbaipublicfiles.com/convnext/convnext_xlarge_22k_224.pth", num_classes=21841),
|
||||
)
|
||||
|
||||
|
||||
class ConvNeXtBlock(nn.Module):
|
||||
""" ConvNeXt Block
|
||||
There are two equivalent implementations:
|
||||
@ -459,6 +361,107 @@ def _create_convnext(variant, pretrained=False, **kwargs):
|
||||
return model
|
||||
|
||||
|
||||
|
||||
def _cfg(url='', **kwargs):
|
||||
return {
|
||||
'url': url,
|
||||
'num_classes': 1000, 'input_size': (3, 224, 224), 'pool_size': (7, 7),
|
||||
'crop_pct': 0.875, 'interpolation': 'bicubic',
|
||||
'mean': IMAGENET_DEFAULT_MEAN, 'std': IMAGENET_DEFAULT_STD,
|
||||
'first_conv': 'stem.0', 'classifier': 'head.fc',
|
||||
**kwargs
|
||||
}
|
||||
|
||||
|
||||
default_cfgs = generate_defaults({
|
||||
# timm specific variants
|
||||
'convnext_atto.timm_in1k': _cfg(
|
||||
url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-rsb-weights/convnext_atto_d2-01bb0f51.pth',
|
||||
test_input_size=(3, 288, 288), test_crop_pct=0.95),
|
||||
'convnext_atto_ols.timm_in1k': _cfg(
|
||||
url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-rsb-weights/convnext_atto_ols_a2-78d1c8f3.pth',
|
||||
test_input_size=(3, 288, 288), test_crop_pct=0.95),
|
||||
'convnext_femto.timm_in1k': _cfg(
|
||||
url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-rsb-weights/convnext_femto_d1-d71d5b4c.pth',
|
||||
test_input_size=(3, 288, 288), test_crop_pct=0.95),
|
||||
'convnext_femto_ols.timm_in1k': _cfg(
|
||||
url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-rsb-weights/convnext_femto_ols_d1-246bf2ed.pth',
|
||||
test_input_size=(3, 288, 288), test_crop_pct=0.95),
|
||||
'convnext_pico.timm_in1k': _cfg(
|
||||
url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-rsb-weights/convnext_pico_d1-10ad7f0d.pth',
|
||||
test_input_size=(3, 288, 288), test_crop_pct=0.95),
|
||||
'convnext_pico_ols.timm_in1k': _cfg(
|
||||
url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-rsb-weights/convnext_pico_ols_d1-611f0ca7.pth',
|
||||
crop_pct=0.95, test_input_size=(3, 288, 288), test_crop_pct=1.0),
|
||||
'convnext_nano.timm_in1k': _cfg(
|
||||
url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-rsb-weights/convnext_nano_d1h-7eb4bdea.pth',
|
||||
crop_pct=0.95, test_input_size=(3, 288, 288), test_crop_pct=1.0),
|
||||
'convnext_nano_ols.timm_in1k': _cfg(
|
||||
url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-rsb-weights/convnext_nano_ols_d1h-ae424a9a.pth',
|
||||
crop_pct=0.95, test_input_size=(3, 288, 288), test_crop_pct=1.0),
|
||||
'convnext_tiny_hnf.timm_in1k': _cfg(
|
||||
url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-rsb-weights/convnext_tiny_hnf_a2h-ab7e9df2.pth',
|
||||
crop_pct=0.95, test_input_size=(3, 288, 288), test_crop_pct=1.0),
|
||||
|
||||
'convnext_tiny.fb_in1k': _cfg(
|
||||
url="https://dl.fbaipublicfiles.com/convnext/convnext_tiny_1k_224_ema.pth",
|
||||
test_input_size=(3, 288, 288), test_crop_pct=1.0),
|
||||
'convnext_small.fb_in1k': _cfg(
|
||||
url="https://dl.fbaipublicfiles.com/convnext/convnext_small_1k_224_ema.pth",
|
||||
test_input_size=(3, 288, 288), test_crop_pct=1.0),
|
||||
'convnext_base.fb_in1k': _cfg(
|
||||
url="https://dl.fbaipublicfiles.com/convnext/convnext_base_1k_224_ema.pth",
|
||||
test_input_size=(3, 288, 288), test_crop_pct=1.0),
|
||||
'convnext_large.fb_in1k': _cfg(
|
||||
url="https://dl.fbaipublicfiles.com/convnext/convnext_large_1k_224_ema.pth",
|
||||
test_input_size=(3, 288, 288), test_crop_pct=1.0),
|
||||
'convnext_xlarge.untrained': _cfg(),
|
||||
|
||||
'convnext_tiny.fb_in22k_ft_in1k': _cfg(
|
||||
url='https://dl.fbaipublicfiles.com/convnext/convnext_tiny_22k_1k_224.pth',
|
||||
test_input_size=(3, 288, 288), test_crop_pct=1.0),
|
||||
'convnext_small.fb_in22k_ft_in1k': _cfg(
|
||||
url='https://dl.fbaipublicfiles.com/convnext/convnext_small_22k_1k_224.pth',
|
||||
test_input_size=(3, 288, 288), test_crop_pct=1.0),
|
||||
'convnext_base.fb_in22k_ft_in1k': _cfg(
|
||||
url='https://dl.fbaipublicfiles.com/convnext/convnext_base_22k_1k_224.pth',
|
||||
test_input_size=(3, 288, 288), test_crop_pct=1.0),
|
||||
'convnext_large.fb_in22k_ft_in1k': _cfg(
|
||||
url='https://dl.fbaipublicfiles.com/convnext/convnext_large_22k_1k_224.pth',
|
||||
test_input_size=(3, 288, 288), test_crop_pct=1.0),
|
||||
'convnext_xlarge.fb_in22k_ft_in1k': _cfg(
|
||||
url='https://dl.fbaipublicfiles.com/convnext/convnext_xlarge_22k_1k_224_ema.pth',
|
||||
test_input_size=(3, 288, 288), test_crop_pct=1.0),
|
||||
|
||||
'convnext_tiny.fb_in22k_ft_in1k_384': _cfg(
|
||||
url='https://dl.fbaipublicfiles.com/convnext/convnext_tiny_22k_1k_384.pth',
|
||||
input_size=(3, 384, 384), pool_size=(12, 12), crop_pct=1.0, crop_mode='squash'),
|
||||
'convnext_small..fb_in22k_ft_in1k_384': _cfg(
|
||||
url='https://dl.fbaipublicfiles.com/convnext/convnext_small_22k_1k_384.pth',
|
||||
input_size=(3, 384, 384), pool_size=(12, 12), crop_pct=1.0, crop_mode='squash'),
|
||||
'convnext_base.fb_in22k_ft_in1k_384': _cfg(
|
||||
url='https://dl.fbaipublicfiles.com/convnext/convnext_base_22k_1k_384.pth',
|
||||
input_size=(3, 384, 384), pool_size=(12, 12), crop_pct=1.0, crop_mode='squash'),
|
||||
'convnext_large.fb_in22k_ft_in1k_384': _cfg(
|
||||
url='https://dl.fbaipublicfiles.com/convnext/convnext_large_22k_1k_384.pth',
|
||||
input_size=(3, 384, 384), pool_size=(12, 12), crop_pct=1.0, crop_mode='squash'),
|
||||
'convnext_xlarge.fb_in22k_ft_in1k_384': _cfg(
|
||||
url='https://dl.fbaipublicfiles.com/convnext/convnext_xlarge_22k_1k_384_ema.pth',
|
||||
input_size=(3, 384, 384), pool_size=(12, 12), crop_pct=1.0, crop_mode='squash'),
|
||||
|
||||
'convnext_tiny_in22k.fb_in22k': _cfg(
|
||||
url="https://dl.fbaipublicfiles.com/convnext/convnext_tiny_22k_224.pth", num_classes=21841),
|
||||
'convnext_small_in22k.fb_in22k': _cfg(
|
||||
url="https://dl.fbaipublicfiles.com/convnext/convnext_small_22k_224.pth", num_classes=21841),
|
||||
'convnext_base_in22k.fb_in22k': _cfg(
|
||||
url="https://dl.fbaipublicfiles.com/convnext/convnext_base_22k_224.pth", num_classes=21841),
|
||||
'convnext_large_in22k.fb_in22k': _cfg(
|
||||
url="https://dl.fbaipublicfiles.com/convnext/convnext_large_22k_224.pth", num_classes=21841),
|
||||
'convnext_xlarge_in22k.fb_in22k': _cfg(
|
||||
url="https://dl.fbaipublicfiles.com/convnext/convnext_xlarge_22k_224.pth", num_classes=21841),
|
||||
})
|
||||
|
||||
|
||||
@register_model
|
||||
def convnext_atto(pretrained=False, **kwargs):
|
||||
# timm femto variant (NOTE: still tweaking depths, will vary between 3-4M param, current is 3.7M
|
||||
@ -569,105 +572,7 @@ def convnext_large(pretrained=False, **kwargs):
|
||||
|
||||
|
||||
@register_model
|
||||
def convnext_tiny_in22ft1k(pretrained=False, **kwargs):
|
||||
model_args = dict(depths=(3, 3, 9, 3), dims=(96, 192, 384, 768), **kwargs)
|
||||
model = _create_convnext('convnext_tiny_in22ft1k', pretrained=pretrained, **model_args)
|
||||
return model
|
||||
|
||||
|
||||
@register_model
|
||||
def convnext_small_in22ft1k(pretrained=False, **kwargs):
|
||||
model_args = dict(depths=[3, 3, 27, 3], dims=[96, 192, 384, 768], **kwargs)
|
||||
model = _create_convnext('convnext_small_in22ft1k', pretrained=pretrained, **model_args)
|
||||
return model
|
||||
|
||||
|
||||
@register_model
|
||||
def convnext_base_in22ft1k(pretrained=False, **kwargs):
|
||||
model_args = dict(depths=[3, 3, 27, 3], dims=[128, 256, 512, 1024], **kwargs)
|
||||
model = _create_convnext('convnext_base_in22ft1k', pretrained=pretrained, **model_args)
|
||||
return model
|
||||
|
||||
|
||||
@register_model
|
||||
def convnext_large_in22ft1k(pretrained=False, **kwargs):
|
||||
model_args = dict(depths=[3, 3, 27, 3], dims=[192, 384, 768, 1536], **kwargs)
|
||||
model = _create_convnext('convnext_large_in22ft1k', pretrained=pretrained, **model_args)
|
||||
return model
|
||||
|
||||
|
||||
@register_model
|
||||
def convnext_xlarge_in22ft1k(pretrained=False, **kwargs):
|
||||
def convnext_xlarge(pretrained=False, **kwargs):
|
||||
model_args = dict(depths=[3, 3, 27, 3], dims=[256, 512, 1024, 2048], **kwargs)
|
||||
model = _create_convnext('convnext_xlarge_in22ft1k', pretrained=pretrained, **model_args)
|
||||
return model
|
||||
|
||||
|
||||
@register_model
|
||||
def convnext_tiny_384_in22ft1k(pretrained=False, **kwargs):
|
||||
model_args = dict(depths=(3, 3, 9, 3), dims=(96, 192, 384, 768), **kwargs)
|
||||
model = _create_convnext('convnext_tiny_384_in22ft1k', pretrained=pretrained, **model_args)
|
||||
return model
|
||||
|
||||
|
||||
@register_model
|
||||
def convnext_small_384_in22ft1k(pretrained=False, **kwargs):
|
||||
model_args = dict(depths=[3, 3, 27, 3], dims=[96, 192, 384, 768], **kwargs)
|
||||
model = _create_convnext('convnext_small_384_in22ft1k', pretrained=pretrained, **model_args)
|
||||
return model
|
||||
|
||||
|
||||
@register_model
|
||||
def convnext_base_384_in22ft1k(pretrained=False, **kwargs):
|
||||
model_args = dict(depths=[3, 3, 27, 3], dims=[128, 256, 512, 1024], **kwargs)
|
||||
model = _create_convnext('convnext_base_384_in22ft1k', pretrained=pretrained, **model_args)
|
||||
return model
|
||||
|
||||
|
||||
@register_model
|
||||
def convnext_large_384_in22ft1k(pretrained=False, **kwargs):
|
||||
model_args = dict(depths=[3, 3, 27, 3], dims=[192, 384, 768, 1536], **kwargs)
|
||||
model = _create_convnext('convnext_large_384_in22ft1k', pretrained=pretrained, **model_args)
|
||||
return model
|
||||
|
||||
|
||||
@register_model
|
||||
def convnext_xlarge_384_in22ft1k(pretrained=False, **kwargs):
|
||||
model_args = dict(depths=[3, 3, 27, 3], dims=[256, 512, 1024, 2048], **kwargs)
|
||||
model = _create_convnext('convnext_xlarge_384_in22ft1k', pretrained=pretrained, **model_args)
|
||||
return model
|
||||
|
||||
|
||||
@register_model
|
||||
def convnext_tiny_in22k(pretrained=False, **kwargs):
|
||||
model_args = dict(depths=(3, 3, 9, 3), dims=(96, 192, 384, 768), **kwargs)
|
||||
model = _create_convnext('convnext_tiny_in22k', pretrained=pretrained, **model_args)
|
||||
return model
|
||||
|
||||
|
||||
@register_model
|
||||
def convnext_small_in22k(pretrained=False, **kwargs):
|
||||
model_args = dict(depths=[3, 3, 27, 3], dims=[96, 192, 384, 768], **kwargs)
|
||||
model = _create_convnext('convnext_small_in22k', pretrained=pretrained, **model_args)
|
||||
return model
|
||||
|
||||
|
||||
@register_model
|
||||
def convnext_base_in22k(pretrained=False, **kwargs):
|
||||
model_args = dict(depths=[3, 3, 27, 3], dims=[128, 256, 512, 1024], **kwargs)
|
||||
model = _create_convnext('convnext_base_in22k', pretrained=pretrained, **model_args)
|
||||
return model
|
||||
|
||||
|
||||
@register_model
|
||||
def convnext_large_in22k(pretrained=False, **kwargs):
|
||||
model_args = dict(depths=[3, 3, 27, 3], dims=[192, 384, 768, 1536], **kwargs)
|
||||
model = _create_convnext('convnext_large_in22k', pretrained=pretrained, **model_args)
|
||||
return model
|
||||
|
||||
|
||||
@register_model
|
||||
def convnext_xlarge_in22k(pretrained=False, **kwargs):
|
||||
model_args = dict(depths=[3, 3, 27, 3], dims=[256, 512, 1024, 2048], **kwargs)
|
||||
model = _create_convnext('convnext_xlarge_in22k', pretrained=pretrained, **model_args)
|
||||
model = _create_convnext('convnext_xlarge', pretrained=pretrained, **model_args)
|
||||
return model
|
||||
|
@ -366,11 +366,11 @@ default_cfgs = {
|
||||
'tf_efficientnetv2_m': _cfg(
|
||||
url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-effv2-weights/tf_efficientnetv2_m-cc09e0cd.pth',
|
||||
mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5),
|
||||
input_size=(3, 384, 384), test_input_size=(3, 480, 480), pool_size=(12, 12), crop_pct=1.0),
|
||||
input_size=(3, 384, 384), test_input_size=(3, 480, 480), pool_size=(12, 12), crop_pct=1.0, crop_mode='squash'),
|
||||
'tf_efficientnetv2_l': _cfg(
|
||||
url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-effv2-weights/tf_efficientnetv2_l-d664b728.pth',
|
||||
mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5),
|
||||
input_size=(3, 384, 384), test_input_size=(3, 480, 480), pool_size=(12, 12), crop_pct=1.0),
|
||||
input_size=(3, 384, 384), test_input_size=(3, 480, 480), pool_size=(12, 12), crop_pct=1.0, crop_mode='squash'),
|
||||
|
||||
'tf_efficientnetv2_s_in21ft1k': _cfg(
|
||||
url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-effv2-weights/tf_efficientnetv2_s_21ft1k-d7dafa41.pth',
|
||||
@ -379,15 +379,15 @@ default_cfgs = {
|
||||
'tf_efficientnetv2_m_in21ft1k': _cfg(
|
||||
url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-effv2-weights/tf_efficientnetv2_m_21ft1k-bf41664a.pth',
|
||||
mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5),
|
||||
input_size=(3, 384, 384), test_input_size=(3, 480, 480), pool_size=(12, 12), crop_pct=1.0),
|
||||
input_size=(3, 384, 384), test_input_size=(3, 480, 480), pool_size=(12, 12), crop_pct=1.0, crop_mode='squash'),
|
||||
'tf_efficientnetv2_l_in21ft1k': _cfg(
|
||||
url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-effv2-weights/tf_efficientnetv2_l_21ft1k-60127a9d.pth',
|
||||
mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5),
|
||||
input_size=(3, 384, 384), test_input_size=(3, 480, 480), pool_size=(12, 12), crop_pct=1.0),
|
||||
input_size=(3, 384, 384), test_input_size=(3, 480, 480), pool_size=(12, 12), crop_pct=1.0, crop_mode='squash'),
|
||||
'tf_efficientnetv2_xl_in21ft1k': _cfg(
|
||||
url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-effv2-weights/tf_efficientnetv2_xl_in21ft1k-06c35c48.pth',
|
||||
mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5),
|
||||
input_size=(3, 384, 384), test_input_size=(3, 512, 512), pool_size=(12, 12), crop_pct=1.0),
|
||||
input_size=(3, 384, 384), test_input_size=(3, 512, 512), pool_size=(12, 12), crop_pct=1.0, crop_mode='squash'),
|
||||
|
||||
'tf_efficientnetv2_s_in21k': _cfg(
|
||||
url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-effv2-weights/tf_efficientnetv2_s_21k-6337ad01.pth',
|
||||
@ -396,15 +396,15 @@ default_cfgs = {
|
||||
'tf_efficientnetv2_m_in21k': _cfg(
|
||||
url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-effv2-weights/tf_efficientnetv2_m_21k-361418a2.pth',
|
||||
mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5), num_classes=21843,
|
||||
input_size=(3, 384, 384), test_input_size=(3, 480, 480), pool_size=(12, 12), crop_pct=1.0),
|
||||
input_size=(3, 384, 384), test_input_size=(3, 480, 480), pool_size=(12, 12), crop_pct=1.0, crop_mode='squash'),
|
||||
'tf_efficientnetv2_l_in21k': _cfg(
|
||||
url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-effv2-weights/tf_efficientnetv2_l_21k-91a19ec9.pth',
|
||||
mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5), num_classes=21843,
|
||||
input_size=(3, 384, 384), test_input_size=(3, 480, 480), pool_size=(12, 12), crop_pct=1.0),
|
||||
input_size=(3, 384, 384), test_input_size=(3, 480, 480), pool_size=(12, 12), crop_pct=1.0, crop_mode='squash'),
|
||||
'tf_efficientnetv2_xl_in21k': _cfg(
|
||||
url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-effv2-weights/tf_efficientnetv2_xl_in21k-fd7e8abf.pth',
|
||||
mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5), num_classes=21843,
|
||||
input_size=(3, 384, 384), test_input_size=(3, 512, 512), pool_size=(12, 12), crop_pct=1.0),
|
||||
input_size=(3, 384, 384), test_input_size=(3, 512, 512), pool_size=(12, 12), crop_pct=1.0, crop_mode='squash'),
|
||||
|
||||
'tf_efficientnetv2_b0': _cfg(
|
||||
url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-effv2-weights/tf_efficientnetv2_b0-c7cc451f.pth',
|
||||
|
@ -143,3 +143,17 @@ class GELU(nn.Module):
|
||||
|
||||
def forward(self, input: torch.Tensor) -> torch.Tensor:
|
||||
return F.gelu(input)
|
||||
|
||||
|
||||
def gelu_tanh(x: torch.Tensor, inplace: bool = False) -> torch.Tensor:
|
||||
return F.gelu(x, approximate='tanh')
|
||||
|
||||
|
||||
class GELUTanh(nn.Module):
|
||||
"""Applies the Gaussian Error Linear Units function (w/ dummy inplace arg)
|
||||
"""
|
||||
def __init__(self, inplace: bool = False):
|
||||
super(GELUTanh, self).__init__()
|
||||
|
||||
def forward(self, input: torch.Tensor) -> torch.Tensor:
|
||||
return F.gelu(input, approximate='tanh')
|
||||
|
@ -28,6 +28,7 @@ _ACT_FN_DEFAULT = dict(
|
||||
celu=F.celu,
|
||||
selu=F.selu,
|
||||
gelu=gelu,
|
||||
gelu_tanh=gelu_tanh,
|
||||
sigmoid=sigmoid,
|
||||
tanh=tanh,
|
||||
hard_sigmoid=F.hardsigmoid if _has_hardsigmoid else hard_sigmoid,
|
||||
@ -71,6 +72,7 @@ _ACT_LAYER_DEFAULT = dict(
|
||||
celu=nn.CELU,
|
||||
selu=nn.SELU,
|
||||
gelu=GELU,
|
||||
gelu_tanh=GELUTanh,
|
||||
sigmoid=Sigmoid,
|
||||
tanh=Tanh,
|
||||
hard_sigmoid=nn.Hardsigmoid if _has_hardsigmoid else HardSigmoid,
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -32,7 +32,7 @@ import torch.utils.checkpoint
|
||||
|
||||
from timm.data import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD, IMAGENET_INCEPTION_MEAN, IMAGENET_INCEPTION_STD,\
|
||||
OPENAI_CLIP_MEAN, OPENAI_CLIP_STD
|
||||
from .helpers import build_model_with_cfg, resolve_pretrained_cfg, named_apply, adapt_input_conv, checkpoint_seq
|
||||
from .helpers import build_model_with_cfg, named_apply, adapt_input_conv, checkpoint_seq
|
||||
from .layers import PatchEmbed, Mlp, DropPath, trunc_normal_, lecun_normal_
|
||||
from ._pretrained import generate_defaults
|
||||
from .registry import register_model
|
||||
@ -795,13 +795,15 @@ default_cfgs = generate_defaults({
|
||||
mean=IMAGENET_INCEPTION_MEAN, std=IMAGENET_INCEPTION_STD, crop_pct=1.0),
|
||||
'vit_large_patch14_clip_336.laion2b_ft_in1k': _cfg(
|
||||
hf_hub_id='timm/vit_large_patch14_clip_336.laion2b_ft_in1k',
|
||||
mean=IMAGENET_INCEPTION_MEAN, std=IMAGENET_INCEPTION_STD, crop_pct=1.0, input_size=(3, 336, 336)),
|
||||
mean=IMAGENET_INCEPTION_MEAN, std=IMAGENET_INCEPTION_STD,
|
||||
crop_pct=1.0, input_size=(3, 336, 336), crop_mode='squash'),
|
||||
'vit_huge_patch14_clip_224.laion2b_ft_in1k': _cfg(
|
||||
hf_hub_id='timm/vit_huge_patch14_clip_224.laion2b_ft_in1k',
|
||||
mean=OPENAI_CLIP_MEAN, std=OPENAI_CLIP_STD, crop_pct=1.0),
|
||||
'vit_huge_patch14_clip_336.laion2b_ft_in1k': _cfg(
|
||||
hf_hub_id='',
|
||||
mean=OPENAI_CLIP_MEAN, std=OPENAI_CLIP_STD, crop_pct=1.0, input_size=(3, 336, 336)),
|
||||
mean=OPENAI_CLIP_MEAN, std=OPENAI_CLIP_STD,
|
||||
crop_pct=1.0, input_size=(3, 336, 336), crop_mode='squash'),
|
||||
|
||||
'vit_base_patch32_clip_224.laion2b_ft_in12k_in1k': _cfg(
|
||||
hf_hub_id='timm/vit_base_patch32_clip_224.laion2b_ft_in12k_in1k',
|
||||
@ -823,13 +825,15 @@ default_cfgs = generate_defaults({
|
||||
mean=IMAGENET_INCEPTION_MEAN, std=IMAGENET_INCEPTION_STD, crop_pct=1.0),
|
||||
'vit_large_patch14_clip_336.laion2b_ft_in12k_in1k': _cfg(
|
||||
hf_hub_id='timm/vit_large_patch14_clip_336.laion2b_ft_in12k_in1k',
|
||||
mean=IMAGENET_INCEPTION_MEAN, std=IMAGENET_INCEPTION_STD, crop_pct=1.0, input_size=(3, 336, 336)),
|
||||
mean=IMAGENET_INCEPTION_MEAN, std=IMAGENET_INCEPTION_STD,
|
||||
crop_pct=1.0, input_size=(3, 336, 336), crop_mode='squash'),
|
||||
'vit_huge_patch14_clip_224.laion2b_ft_in12k_in1k': _cfg(
|
||||
hf_hub_id='timm/vit_huge_patch14_clip_224.laion2b_ft_in12k_in1k',
|
||||
mean=OPENAI_CLIP_MEAN, std=OPENAI_CLIP_STD, crop_pct=1.0),
|
||||
'vit_huge_patch14_clip_336.laion2b_ft_in12k_in1k': _cfg(
|
||||
hf_hub_id='timm/vit_huge_patch14_clip_336.laion2b_ft_in12k_in1k',
|
||||
mean=OPENAI_CLIP_MEAN, std=OPENAI_CLIP_STD, crop_pct=1.0, input_size=(3, 336, 336)),
|
||||
mean=OPENAI_CLIP_MEAN, std=OPENAI_CLIP_STD,
|
||||
crop_pct=1.0, input_size=(3, 336, 336), crop_mode='squash'),
|
||||
|
||||
'vit_base_patch32_clip_224.laion2b_ft_in12k': _cfg(
|
||||
hf_hub_id='timm/vit_base_patch32_clip_224.laion2b_ft_in12k',
|
||||
@ -879,12 +883,16 @@ default_cfgs = generate_defaults({
|
||||
'vit_large_patch14_clip_224.openai_ft_in12k_in1k': _cfg(
|
||||
hf_hub_id='timm/vit_large_patch14_clip_224.openai_ft_in12k_in1k',
|
||||
mean=OPENAI_CLIP_MEAN, std=OPENAI_CLIP_STD, crop_pct=1.0),
|
||||
'vit_large_patch14_clip_336.openai_ft_in12k_in1k': _cfg(
|
||||
hf_hub_id='timm/vit_large_patch14_clip_336.openai_ft_in12k_in1k',
|
||||
mean=OPENAI_CLIP_MEAN, std=OPENAI_CLIP_STD,
|
||||
crop_pct=1.0, input_size=(3, 336, 336), crop_mode='squash'),
|
||||
|
||||
'vit_base_patch32_clip_224.openai_ft_in12k': _cfg(
|
||||
#hf_hub_id='timm/vit_base_patch32_clip_224.openai_ft_in12k',
|
||||
hf_hub_id='timm/vit_base_patch32_clip_224.openai_ft_in12k',
|
||||
mean=OPENAI_CLIP_MEAN, std=OPENAI_CLIP_STD, num_classes=11821),
|
||||
'vit_base_patch16_clip_224.openai_ft_in12k': _cfg(
|
||||
#hf_hub_id='timm/vit_base_patch16_clip_224.openai_ft_in12k',
|
||||
hf_hub_id='timm/vit_base_patch16_clip_224.openai_ft_in12k',
|
||||
mean=OPENAI_CLIP_MEAN, std=OPENAI_CLIP_STD, num_classes=11821),
|
||||
'vit_large_patch14_clip_224.openai_ft_in12k': _cfg(
|
||||
hf_hub_id='timm/vit_large_patch14_clip_224.openai_ft_in12k',
|
||||
|
Loading…
x
Reference in New Issue
Block a user