mirror of
https://github.com/huggingface/pytorch-image-models.git
synced 2025-06-03 15:01:08 +08:00
Add EVA-large models
This commit is contained in:
parent
6a92587e0d
commit
7c4ed4d5a4
11
README.md
11
README.md
@ -21,6 +21,17 @@ And a big thanks to all GitHub sponsors who helped with some of my costs before
|
|||||||
|
|
||||||
## What's New
|
## What's New
|
||||||
|
|
||||||
|
# Dec 8, 2022
|
||||||
|
* Add 'EVA l' to `vision_transformer.py`, MAE style ViT-L/14 MIM pretrain w/ EVA-CLIP targets, FT on ImageNet-1k (w/ ImageNet-22k intermediate for some)
|
||||||
|
* original source: https://github.com/baaivision/EVA
|
||||||
|
|
||||||
|
| model | top1 | param_count | gmac | macts | hub |
|
||||||
|
|:------------------------------------------|-----:|------------:|------:|------:|:----------------------------------------|
|
||||||
|
| eva_large_patch14_336.in22k_ft_in22k_in1k | 89.2 | 304.5 | 191.1 | 270.2 | [link](https://huggingface.co/BAAI/EVA) |
|
||||||
|
| eva_large_patch14_336.in22k_ft_in1k | 88.7 | 304.5 | 191.1 | 270.2 | [link](https://huggingface.co/BAAI/EVA) |
|
||||||
|
| eva_large_patch14_196.in22k_ft_in22k_in1k | 88.6 | 304.1 | 61.6 | 63.5 | [link](https://huggingface.co/BAAI/EVA) |
|
||||||
|
| eva_large_patch14_196.in22k_ft_in1k | 87.9 | 304.1 | 61.6 | 63.5 | [link](https://huggingface.co/BAAI/EVA) |
|
||||||
|
|
||||||
# Dec 6, 2022
|
# Dec 6, 2022
|
||||||
* Add 'EVA g', BEiT style ViT-g/14 model weights w/ both MIM pretrain and CLIP pretrain to `beit.py`.
|
* Add 'EVA g', BEiT style ViT-g/14 model weights w/ both MIM pretrain and CLIP pretrain to `beit.py`.
|
||||||
* original source: https://github.com/baaivision/EVA
|
* original source: https://github.com/baaivision/EVA
|
||||||
|
@ -933,6 +933,25 @@ default_cfgs = generate_default_cfgs({
|
|||||||
'vit_small_patch16_36x1_224': _cfg(url=''),
|
'vit_small_patch16_36x1_224': _cfg(url=''),
|
||||||
'vit_small_patch16_18x2_224': _cfg(url=''),
|
'vit_small_patch16_18x2_224': _cfg(url=''),
|
||||||
'vit_base_patch16_18x2_224': _cfg(url=''),
|
'vit_base_patch16_18x2_224': _cfg(url=''),
|
||||||
|
|
||||||
|
# EVA fine-tuned weights from MAE style MIM - EVA-CLIP target pretrain
|
||||||
|
# https://github.com/baaivision/EVA/blob/7ecf2c0a370d97967e86d047d7af9188f78d2df3/eva/README.md#eva-l-learning-better-mim-representations-from-eva-clip
|
||||||
|
'eva_large_patch14_196.in22k_ft_in22k_in1k': _cfg(
|
||||||
|
hf_hub_id='BAAI/EVA', hf_hub_filename='eva_l_psz14_196px_21k_to_1k_ft_88p6.pt',
|
||||||
|
mean=OPENAI_CLIP_MEAN, std=OPENAI_CLIP_STD,
|
||||||
|
input_size=(3, 196, 196), crop_pct=1.0),
|
||||||
|
'eva_large_patch14_336.in22k_ft_in22k_in1k': _cfg(
|
||||||
|
hf_hub_id='BAAI/EVA', hf_hub_filename='eva_l_psz14_336px_21k_to_1k_ft_89p2.pt',
|
||||||
|
mean=OPENAI_CLIP_MEAN, std=OPENAI_CLIP_STD,
|
||||||
|
input_size=(3, 336, 336), crop_pct=1.0, crop_mode='squash'),
|
||||||
|
'eva_large_patch14_196.in22k_ft_in1k': _cfg(
|
||||||
|
hf_hub_id='BAAI/EVA', hf_hub_filename='eva_l_psz14_196px_1k_ft_88p0.pt',
|
||||||
|
mean=OPENAI_CLIP_MEAN, std=OPENAI_CLIP_STD,
|
||||||
|
input_size=(3, 196, 196), crop_pct=1.0),
|
||||||
|
'eva_large_patch14_336.in22k_ft_in1k': _cfg(
|
||||||
|
hf_hub_id='BAAI/EVA', hf_hub_filename='eva_l_psz14_336px_1k_ft_88p65.pt',
|
||||||
|
mean=OPENAI_CLIP_MEAN, std=OPENAI_CLIP_STD,
|
||||||
|
input_size=(3, 336, 336), crop_pct=1.0, crop_mode='squash'),
|
||||||
})
|
})
|
||||||
|
|
||||||
|
|
||||||
@ -1354,3 +1373,21 @@ def vit_base_patch16_18x2_224(pretrained=False, **kwargs):
|
|||||||
patch_size=16, embed_dim=768, depth=18, num_heads=12, init_values=1e-5, block_fn=ParallelBlock, **kwargs)
|
patch_size=16, embed_dim=768, depth=18, num_heads=12, init_values=1e-5, block_fn=ParallelBlock, **kwargs)
|
||||||
model = _create_vision_transformer('vit_base_patch16_18x2_224', pretrained=pretrained, **model_kwargs)
|
model = _create_vision_transformer('vit_base_patch16_18x2_224', pretrained=pretrained, **model_kwargs)
|
||||||
return model
|
return model
|
||||||
|
|
||||||
|
|
||||||
|
@register_model
|
||||||
|
def eva_large_patch14_196(pretrained=False, **kwargs):
|
||||||
|
""" EVA-large model https://arxiv.org/abs/2211.07636 /via MAE MIM pretrain"""
|
||||||
|
model_kwargs = dict(
|
||||||
|
patch_size=14, embed_dim=1024, depth=24, num_heads=16, global_pool='avg', **kwargs)
|
||||||
|
model = _create_vision_transformer('eva_large_patch14_196', pretrained=pretrained, **model_kwargs)
|
||||||
|
return model
|
||||||
|
|
||||||
|
|
||||||
|
@register_model
|
||||||
|
def eva_large_patch14_336(pretrained=False, **kwargs):
|
||||||
|
""" EVA-large model https://arxiv.org/abs/2211.07636 via MAE MIM pretrain"""
|
||||||
|
model_kwargs = dict(
|
||||||
|
patch_size=14, embed_dim=1024, depth=24, num_heads=16, global_pool='avg', **kwargs)
|
||||||
|
model = _create_vision_transformer('eva_large_patch14_336', pretrained=pretrained, **model_kwargs)
|
||||||
|
return model
|
||||||
|
Loading…
x
Reference in New Issue
Block a user