Add default norm_eps=1e-5 for convnext_xxlarge, improve kwarg merging for all convnext models

This commit is contained in:
Ross Wightman 2023-02-26 12:11:49 -08:00
parent 450b74affb
commit 1c13ef7b46

View File

@ -789,130 +789,121 @@ default_cfgs = generate_default_cfgs({
@register_model
def convnext_atto(pretrained=False, **kwargs):
# timm femto variant (NOTE: still tweaking depths, will vary between 3-4M param, current is 3.7M
model_args = dict(
depths=(2, 2, 6, 2), dims=(40, 80, 160, 320), conv_mlp=True, **kwargs)
model = _create_convnext('convnext_atto', pretrained=pretrained, **model_args)
model_args = dict(depths=(2, 2, 6, 2), dims=(40, 80, 160, 320), conv_mlp=True)
model = _create_convnext('convnext_atto', pretrained=pretrained, **dict(model_args, **kwargs))
return model
@register_model
def convnext_atto_ols(pretrained=False, **kwargs):
# timm femto variant with overlapping 3x3 conv stem, wider than non-ols femto above, current param count 3.7M
model_args = dict(
depths=(2, 2, 6, 2), dims=(40, 80, 160, 320), conv_mlp=True, stem_type='overlap_tiered', **kwargs)
model = _create_convnext('convnext_atto_ols', pretrained=pretrained, **model_args)
model_args = dict(depths=(2, 2, 6, 2), dims=(40, 80, 160, 320), conv_mlp=True, stem_type='overlap_tiered')
model = _create_convnext('convnext_atto_ols', pretrained=pretrained, **dict(model_args, **kwargs))
return model
@register_model
def convnext_femto(pretrained=False, **kwargs):
# timm femto variant
model_args = dict(
depths=(2, 2, 6, 2), dims=(48, 96, 192, 384), conv_mlp=True, **kwargs)
model = _create_convnext('convnext_femto', pretrained=pretrained, **model_args)
model_args = dict(depths=(2, 2, 6, 2), dims=(48, 96, 192, 384), conv_mlp=True)
model = _create_convnext('convnext_femto', pretrained=pretrained, **dict(model_args, **kwargs))
return model
@register_model
def convnext_femto_ols(pretrained=False, **kwargs):
# timm femto variant
model_args = dict(
depths=(2, 2, 6, 2), dims=(48, 96, 192, 384), conv_mlp=True, stem_type='overlap_tiered', **kwargs)
model = _create_convnext('convnext_femto_ols', pretrained=pretrained, **model_args)
model_args = dict(depths=(2, 2, 6, 2), dims=(48, 96, 192, 384), conv_mlp=True, stem_type='overlap_tiered')
model = _create_convnext('convnext_femto_ols', pretrained=pretrained, **dict(model_args, **kwargs))
return model
@register_model
def convnext_pico(pretrained=False, **kwargs):
# timm pico variant
model_args = dict(
depths=(2, 2, 6, 2), dims=(64, 128, 256, 512), conv_mlp=True, **kwargs)
model = _create_convnext('convnext_pico', pretrained=pretrained, **model_args)
model_args = dict(depths=(2, 2, 6, 2), dims=(64, 128, 256, 512), conv_mlp=True)
model = _create_convnext('convnext_pico', pretrained=pretrained, **dict(model_args, **kwargs))
return model
@register_model
def convnext_pico_ols(pretrained=False, **kwargs):
# timm nano variant with overlapping 3x3 conv stem
model_args = dict(
depths=(2, 2, 6, 2), dims=(64, 128, 256, 512), conv_mlp=True, stem_type='overlap_tiered', **kwargs)
model = _create_convnext('convnext_pico_ols', pretrained=pretrained, **model_args)
model_args = dict(depths=(2, 2, 6, 2), dims=(64, 128, 256, 512), conv_mlp=True, stem_type='overlap_tiered')
model = _create_convnext('convnext_pico_ols', pretrained=pretrained, **dict(model_args, **kwargs))
return model
@register_model
def convnext_nano(pretrained=False, **kwargs):
# timm nano variant with standard stem and head
model_args = dict(
depths=(2, 2, 8, 2), dims=(80, 160, 320, 640), conv_mlp=True, **kwargs)
model = _create_convnext('convnext_nano', pretrained=pretrained, **model_args)
model_args = dict(depths=(2, 2, 8, 2), dims=(80, 160, 320, 640), conv_mlp=True)
model = _create_convnext('convnext_nano', pretrained=pretrained, **dict(model_args, **kwargs))
return model
@register_model
def convnext_nano_ols(pretrained=False, **kwargs):
# experimental nano variant with overlapping conv stem
model_args = dict(
depths=(2, 2, 8, 2), dims=(80, 160, 320, 640), conv_mlp=True, stem_type='overlap', **kwargs)
model = _create_convnext('convnext_nano_ols', pretrained=pretrained, **model_args)
model_args = dict(depths=(2, 2, 8, 2), dims=(80, 160, 320, 640), conv_mlp=True, stem_type='overlap')
model = _create_convnext('convnext_nano_ols', pretrained=pretrained, **dict(model_args, **kwargs))
return model
@register_model
def convnext_tiny_hnf(pretrained=False, **kwargs):
# experimental tiny variant with norm before pooling in head (head norm first)
model_args = dict(
depths=(3, 3, 9, 3), dims=(96, 192, 384, 768), head_norm_first=True, conv_mlp=True, **kwargs)
model = _create_convnext('convnext_tiny_hnf', pretrained=pretrained, **model_args)
model_args = dict(depths=(3, 3, 9, 3), dims=(96, 192, 384, 768), head_norm_first=True, conv_mlp=True)
model = _create_convnext('convnext_tiny_hnf', pretrained=pretrained, **dict(model_args, **kwargs))
return model
@register_model
def convnext_tiny(pretrained=False, **kwargs):
model_args = dict(depths=(3, 3, 9, 3), dims=(96, 192, 384, 768), **kwargs)
model = _create_convnext('convnext_tiny', pretrained=pretrained, **model_args)
model_args = dict(depths=(3, 3, 9, 3), dims=(96, 192, 384, 768))
model = _create_convnext('convnext_tiny', pretrained=pretrained, **dict(model_args, **kwargs))
return model
@register_model
def convnext_small(pretrained=False, **kwargs):
model_args = dict(depths=[3, 3, 27, 3], dims=[96, 192, 384, 768], **kwargs)
model = _create_convnext('convnext_small', pretrained=pretrained, **model_args)
model_args = dict(depths=[3, 3, 27, 3], dims=[96, 192, 384, 768])
model = _create_convnext('convnext_small', pretrained=pretrained, **dict(model_args, **kwargs))
return model
@register_model
def convnext_base(pretrained=False, **kwargs):
model_args = dict(depths=[3, 3, 27, 3], dims=[128, 256, 512, 1024], **kwargs)
model = _create_convnext('convnext_base', pretrained=pretrained, **model_args)
model_args = dict(depths=[3, 3, 27, 3], dims=[128, 256, 512, 1024])
model = _create_convnext('convnext_base', pretrained=pretrained, **dict(model_args, **kwargs))
return model
@register_model
def convnext_large(pretrained=False, **kwargs):
model_args = dict(depths=[3, 3, 27, 3], dims=[192, 384, 768, 1536], **kwargs)
model = _create_convnext('convnext_large', pretrained=pretrained, **model_args)
model_args = dict(depths=[3, 3, 27, 3], dims=[192, 384, 768, 1536])
model = _create_convnext('convnext_large', pretrained=pretrained, **dict(model_args, **kwargs))
return model
@register_model
def convnext_large_mlp(pretrained=False, **kwargs):
model_args = dict(depths=[3, 3, 27, 3], dims=[192, 384, 768, 1536], head_hidden_size=1536, **kwargs)
model = _create_convnext('convnext_large_mlp', pretrained=pretrained, **model_args)
model_args = dict(depths=[3, 3, 27, 3], dims=[192, 384, 768, 1536], head_hidden_size=1536)
model = _create_convnext('convnext_large_mlp', pretrained=pretrained, **dict(model_args, **kwargs))
return model
@register_model
def convnext_xlarge(pretrained=False, **kwargs):
model_args = dict(depths=[3, 3, 27, 3], dims=[256, 512, 1024, 2048], **kwargs)
model = _create_convnext('convnext_xlarge', pretrained=pretrained, **model_args)
model_args = dict(depths=[3, 3, 27, 3], dims=[256, 512, 1024, 2048])
model = _create_convnext('convnext_xlarge', pretrained=pretrained, **dict(model_args, **kwargs))
return model
@register_model
def convnext_xxlarge(pretrained=False, **kwargs):
model_args = dict(depths=[3, 4, 30, 3], dims=[384, 768, 1536, 3072], **kwargs)
model = _create_convnext('convnext_xxlarge', pretrained=pretrained, **model_args)
model_args = dict(depths=[3, 4, 30, 3], dims=[384, 768, 1536, 3072], norm_eps=kwargs.pop('norm_eps', 1e-5))
model = _create_convnext('convnext_xxlarge', pretrained=pretrained, **dict(model_args, **kwargs))
return model
@ -920,8 +911,8 @@ def convnext_xxlarge(pretrained=False, **kwargs):
def convnextv2_atto(pretrained=False, **kwargs):
# timm femto variant (NOTE: still tweaking depths, will vary between 3-4M param, current is 3.7M
model_args = dict(
depths=(2, 2, 6, 2), dims=(40, 80, 160, 320), use_grn=True, ls_init_value=None, conv_mlp=True, **kwargs)
model = _create_convnext('convnextv2_atto', pretrained=pretrained, **model_args)
depths=(2, 2, 6, 2), dims=(40, 80, 160, 320), use_grn=True, ls_init_value=None, conv_mlp=True)
model = _create_convnext('convnextv2_atto', pretrained=pretrained, **dict(model_args, **kwargs))
return model
@ -929,8 +920,8 @@ def convnextv2_atto(pretrained=False, **kwargs):
def convnextv2_femto(pretrained=False, **kwargs):
# timm femto variant
model_args = dict(
depths=(2, 2, 6, 2), dims=(48, 96, 192, 384), use_grn=True, ls_init_value=None, conv_mlp=True, **kwargs)
model = _create_convnext('convnextv2_femto', pretrained=pretrained, **model_args)
depths=(2, 2, 6, 2), dims=(48, 96, 192, 384), use_grn=True, ls_init_value=None, conv_mlp=True)
model = _create_convnext('convnextv2_femto', pretrained=pretrained, **dict(model_args, **kwargs))
return model
@ -938,8 +929,8 @@ def convnextv2_femto(pretrained=False, **kwargs):
def convnextv2_pico(pretrained=False, **kwargs):
# timm pico variant
model_args = dict(
depths=(2, 2, 6, 2), dims=(64, 128, 256, 512), use_grn=True, ls_init_value=None, conv_mlp=True, **kwargs)
model = _create_convnext('convnextv2_pico', pretrained=pretrained, **model_args)
depths=(2, 2, 6, 2), dims=(64, 128, 256, 512), use_grn=True, ls_init_value=None, conv_mlp=True)
model = _create_convnext('convnextv2_pico', pretrained=pretrained, **dict(model_args, **kwargs))
return model
@ -947,42 +938,41 @@ def convnextv2_pico(pretrained=False, **kwargs):
def convnextv2_nano(pretrained=False, **kwargs):
# timm nano variant with standard stem and head
model_args = dict(
depths=(2, 2, 8, 2), dims=(80, 160, 320, 640), use_grn=True, ls_init_value=None, conv_mlp=True, **kwargs)
model = _create_convnext('convnextv2_nano', pretrained=pretrained, **model_args)
depths=(2, 2, 8, 2), dims=(80, 160, 320, 640), use_grn=True, ls_init_value=None, conv_mlp=True)
model = _create_convnext('convnextv2_nano', pretrained=pretrained, **dict(model_args, **kwargs))
return model
@register_model
def convnextv2_tiny(pretrained=False, **kwargs):
model_args = dict(
depths=(3, 3, 9, 3), dims=(96, 192, 384, 768), use_grn=True, ls_init_value=None, **kwargs)
model = _create_convnext('convnextv2_tiny', pretrained=pretrained, **model_args)
model_args = dict(depths=(3, 3, 9, 3), dims=(96, 192, 384, 768), use_grn=True, ls_init_value=None)
model = _create_convnext('convnextv2_tiny', pretrained=pretrained, **dict(model_args, **kwargs))
return model
@register_model
def convnextv2_small(pretrained=False, **kwargs):
model_args = dict(depths=[3, 3, 27, 3], dims=[96, 192, 384, 768], use_grn=True, ls_init_value=None, **kwargs)
model = _create_convnext('convnextv2_small', pretrained=pretrained, **model_args)
model_args = dict(depths=[3, 3, 27, 3], dims=[96, 192, 384, 768], use_grn=True, ls_init_value=None)
model = _create_convnext('convnextv2_small', pretrained=pretrained, **dict(model_args, **kwargs))
return model
@register_model
def convnextv2_base(pretrained=False, **kwargs):
model_args = dict(depths=[3, 3, 27, 3], dims=[128, 256, 512, 1024], use_grn=True, ls_init_value=None, **kwargs)
model = _create_convnext('convnextv2_base', pretrained=pretrained, **model_args)
model_args = dict(depths=[3, 3, 27, 3], dims=[128, 256, 512, 1024], use_grn=True, ls_init_value=None)
model = _create_convnext('convnextv2_base', pretrained=pretrained, **dict(model_args, **kwargs))
return model
@register_model
def convnextv2_large(pretrained=False, **kwargs):
model_args = dict(depths=[3, 3, 27, 3], dims=[192, 384, 768, 1536], use_grn=True, ls_init_value=None, **kwargs)
model = _create_convnext('convnextv2_large', pretrained=pretrained, **model_args)
model_args = dict(depths=[3, 3, 27, 3], dims=[192, 384, 768, 1536], use_grn=True, ls_init_value=None)
model = _create_convnext('convnextv2_large', pretrained=pretrained, **dict(model_args, **kwargs))
return model
@register_model
def convnextv2_huge(pretrained=False, **kwargs):
model_args = dict(depths=[3, 3, 27, 3], dims=[352, 704, 1408, 2816], use_grn=True, ls_init_value=None, **kwargs)
model = _create_convnext('convnextv2_huge', pretrained=pretrained, **model_args)
return model
model_args = dict(depths=[3, 3, 27, 3], dims=[352, 704, 1408, 2816], use_grn=True, ls_init_value=None)
model = _create_convnext('convnextv2_huge', pretrained=pretrained, **dict(model_args, **kwargs))
return model