Parametrizable attn_class in models/ViT

pull/498/head
Wala-Touati 2024-11-18 16:40:00 +00:00
parent ace39d1b53
commit 235f50669e
1 changed files with 8 additions and 8 deletions

View File

@ -337,49 +337,49 @@ def init_weights_vit_timm(module: nn.Module, name: str = ""):
nn.init.zeros_(module.bias)
def vit_small(patch_size=16, num_register_tokens=0, **kwargs):
def vit_small(patch_size=1, attn_class: nn.Module = MemEffAttention, num_register_tokens=0, **kwargs):
model = DinoVisionTransformer(
patch_size=patch_size,
embed_dim=384,
depth=12,
num_heads=6,
mlp_ratio=4,
block_fn=partial(Block, attn_class=MemEffAttention),
block_fn=partial(Block, attn_class=attn_class),
num_register_tokens=num_register_tokens,
**kwargs,
)
return model
def vit_base(patch_size=16, num_register_tokens=0, **kwargs):
def vit_base(patch_size=16, attn_class: nn.Module = MemEffAttention, num_register_tokens=0, **kwargs):
model = DinoVisionTransformer(
patch_size=patch_size,
embed_dim=768,
depth=12,
num_heads=12,
mlp_ratio=4,
block_fn=partial(Block, attn_class=MemEffAttention),
block_fn=partial(Block, attn_class=attn_class),
num_register_tokens=num_register_tokens,
**kwargs,
)
return model
def vit_large(patch_size=16, num_register_tokens=0, **kwargs):
def vit_large(patch_size=16, attn_class: nn.Module = MemEffAttention, num_register_tokens=0, **kwargs):
model = DinoVisionTransformer(
patch_size=patch_size,
embed_dim=1024,
depth=24,
num_heads=16,
mlp_ratio=4,
block_fn=partial(Block, attn_class=MemEffAttention),
block_fn=partial(Block, attn_class=attn_class),
num_register_tokens=num_register_tokens,
**kwargs,
)
return model
def vit_giant2(patch_size=16, num_register_tokens=0, **kwargs):
def vit_giant2(patch_size=16, attn_class: nn.Module = MemEffAttention, num_register_tokens=0, **kwargs):
"""
Close to ViT-giant, with embed-dim 1536 and 24 heads => embed-dim per head 64
"""
@ -389,7 +389,7 @@ def vit_giant2(patch_size=16, num_register_tokens=0, **kwargs):
depth=40,
num_heads=24,
mlp_ratio=4,
block_fn=partial(Block, attn_class=MemEffAttention),
block_fn=partial(Block, attn_class=attn_class),
num_register_tokens=num_register_tokens,
**kwargs,
)