mirror of
https://github.com/open-mmlab/mmpretrain.git
synced 2025-06-03 14:59:18 +08:00
* add t2t backbone * register t2t_vit * add t2t_vit config * [Temp] Align posterize transform with timm. * Fix lint * Refactor t2t-vit * Add config for t2t-vit * Add metafile and README for t2t-vit * Add unit tests * configs * Update metafile and README * Improve docstring * Fix batch size which should be 8x64 instead of 8x128 * Fix typo * Update model zoo * Update training augments config. * Move some arguments of T2TModule to T2TViT * Update docs. * Update unit test Co-authored-by: HIT-cwh <2892770585@qq.com>
42 lines
1.2 KiB
Python
42 lines
1.2 KiB
Python
# model settings
|
|
embed_dims = 512
|
|
num_classes = 1000
|
|
|
|
model = dict(
|
|
type='ImageClassifier',
|
|
backbone=dict(
|
|
type='T2T_ViT',
|
|
img_size=224,
|
|
in_channels=3,
|
|
embed_dims=embed_dims,
|
|
t2t_cfg=dict(
|
|
token_dims=64,
|
|
use_performer=False,
|
|
),
|
|
num_layers=24,
|
|
layer_cfgs=dict(
|
|
num_heads=8,
|
|
feedforward_channels=3 * embed_dims, # mlp_ratio = 3
|
|
),
|
|
drop_path_rate=0.1,
|
|
init_cfg=[
|
|
dict(type='TruncNormal', layer='Linear', std=.02),
|
|
dict(type='Constant', layer='LayerNorm', val=1., bias=0.),
|
|
]),
|
|
neck=None,
|
|
head=dict(
|
|
type='VisionTransformerClsHead',
|
|
num_classes=num_classes,
|
|
in_channels=embed_dims,
|
|
loss=dict(
|
|
type='LabelSmoothLoss',
|
|
label_smooth_val=0.1,
|
|
mode='original',
|
|
),
|
|
topk=(1, 5),
|
|
init_cfg=dict(type='TruncNormal', layer='Linear', std=.02)),
|
|
train_cfg=dict(augments=[
|
|
dict(type='BatchMixup', alpha=0.8, prob=0.5, num_classes=num_classes),
|
|
dict(type='BatchCutMix', alpha=1.0, prob=0.5, num_classes=num_classes),
|
|
]))
|