mirror of
https://github.com/open-mmlab/mmsegmentation.git
synced 2025-06-03 22:03:48 +08:00
* add Swin Transformer * add Swin Transformer * fixed import * Add some swin training settings. * Fix some filename error. * Fix attribute name: pretrain -> pretrained * Upload mmcls implementation of swin transformer. * Refactor Swin Transformer to follow mmcls style. * Refactor init_weigths of swin_transformer.py * Fix lint * Match inference precision * Add some comments * Add swin_convert to load official style ckpt * Remove arg: auto_pad * 1. Complete comments for each block; 2. Correct weight convert function; 3. Fix the pad of Patch Merging; * Clean function args. * Fix vit unit test. * 1. Add swin transformer unit tests; 2. Fix some pad bug; 3. Modify config to adapt new swin implementation; * Modify config arg * Update readme.md of swin * Fix config arg error and Add some swin benchmark msg. * Add MeM and ms test content for readme.md of swin transformer. * Fix doc string of swin module * 1. Register swin transformer to model list; 2. Modify pth url which keep meta attribute; * Update swin.py * Merge config settings. * Modify config style. * Update README.md Add ViT link * Modify main readme.md Co-authored-by: Jiarui XU <xvjiarui0826@gmail.com> Co-authored-by: sennnnn <201730271412@mail.scut.edu.cn> Co-authored-by: Junjun2016 <hejunjun@sjtu.edu.cn>
56 lines
1.6 KiB
Python
56 lines
1.6 KiB
Python
# model settings
|
|
norm_cfg = dict(type='SyncBN', requires_grad=True)
|
|
backbone_norm_cfg = dict(type='LN', requires_grad=True)
|
|
model = dict(
|
|
type='EncoderDecoder',
|
|
pretrained=None,
|
|
backbone=dict(
|
|
type='SwinTransformer',
|
|
pretrain_img_size=224,
|
|
embed_dims=96,
|
|
patch_size=4,
|
|
window_size=7,
|
|
mlp_ratio=4,
|
|
depths=[2, 2, 6, 2],
|
|
num_heads=[3, 6, 12, 24],
|
|
strides=(4, 2, 2, 2),
|
|
out_indices=(0, 1, 2, 3),
|
|
qkv_bias=True,
|
|
qk_scale=None,
|
|
patch_norm=True,
|
|
drop_rate=0.,
|
|
attn_drop_rate=0.,
|
|
drop_path_rate=0.3,
|
|
use_abs_pos_embed=False,
|
|
act_cfg=dict(type='GELU'),
|
|
norm_cfg=backbone_norm_cfg,
|
|
pretrain_style='official'),
|
|
decode_head=dict(
|
|
type='UPerHead',
|
|
in_channels=[96, 192, 384, 768],
|
|
in_index=[0, 1, 2, 3],
|
|
pool_scales=(1, 2, 3, 6),
|
|
channels=512,
|
|
dropout_ratio=0.1,
|
|
num_classes=19,
|
|
norm_cfg=norm_cfg,
|
|
align_corners=False,
|
|
loss_decode=dict(
|
|
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
|
auxiliary_head=dict(
|
|
type='FCNHead',
|
|
in_channels=384,
|
|
in_index=2,
|
|
channels=256,
|
|
num_convs=1,
|
|
concat_input=False,
|
|
dropout_ratio=0.1,
|
|
num_classes=19,
|
|
norm_cfg=norm_cfg,
|
|
align_corners=False,
|
|
loss_decode=dict(
|
|
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
|
|
# model training and testing settings
|
|
train_cfg=dict(),
|
|
test_cfg=dict(mode='whole'))
|