112 lines
3.4 KiB
Python
112 lines
3.4 KiB
Python
_base_ = [
|
|
'../_base_/datasets/ade20k.py', '../_base_/default_runtime.py',
|
|
'../_base_/schedules/schedule_80k.py'
|
|
]
|
|
crop_size = (512, 512)
|
|
data_preprocessor = dict(
|
|
type='SegDataPreProcessor',
|
|
mean=[123.675, 116.28, 103.53],
|
|
std=[58.395, 57.12, 57.375],
|
|
bgr_to_rgb=True,
|
|
pad_val=0,
|
|
size=crop_size,
|
|
seg_pad_val=255)
|
|
# model settings
|
|
norm_cfg = dict(type='SyncBN', requires_grad=True)
|
|
num_stages = 3
|
|
conv_kernel_size = 1
|
|
|
|
model = dict(
|
|
type='EncoderDecoder',
|
|
data_preprocessor=data_preprocessor,
|
|
pretrained='open-mmlab://resnet50_v1c',
|
|
backbone=dict(
|
|
type='ResNetV1c',
|
|
depth=50,
|
|
num_stages=4,
|
|
out_indices=(0, 1, 2, 3),
|
|
dilations=(1, 1, 1, 1),
|
|
strides=(1, 2, 2, 2),
|
|
norm_cfg=norm_cfg,
|
|
norm_eval=False,
|
|
style='pytorch',
|
|
contract_dilation=True),
|
|
decode_head=dict(
|
|
type='IterativeDecodeHead',
|
|
num_stages=num_stages,
|
|
kernel_update_head=[
|
|
dict(
|
|
type='KernelUpdateHead',
|
|
num_classes=150,
|
|
num_ffn_fcs=2,
|
|
num_heads=8,
|
|
num_mask_fcs=1,
|
|
feedforward_channels=2048,
|
|
in_channels=512,
|
|
out_channels=512,
|
|
dropout=0.0,
|
|
conv_kernel_size=conv_kernel_size,
|
|
ffn_act_cfg=dict(type='ReLU', inplace=True),
|
|
with_ffn=True,
|
|
feat_transform_cfg=dict(
|
|
conv_cfg=dict(type='Conv2d'), act_cfg=None),
|
|
kernel_updator_cfg=dict(
|
|
type='KernelUpdator',
|
|
in_channels=256,
|
|
feat_channels=256,
|
|
out_channels=256,
|
|
act_cfg=dict(type='ReLU', inplace=True),
|
|
norm_cfg=dict(type='LN'))) for _ in range(num_stages)
|
|
],
|
|
kernel_generate_head=dict(
|
|
type='UPerHead',
|
|
in_channels=[256, 512, 1024, 2048],
|
|
in_index=[0, 1, 2, 3],
|
|
pool_scales=(1, 2, 3, 6),
|
|
channels=512,
|
|
dropout_ratio=0.1,
|
|
num_classes=150,
|
|
norm_cfg=norm_cfg,
|
|
align_corners=False,
|
|
loss_decode=dict(
|
|
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0))),
|
|
auxiliary_head=dict(
|
|
type='FCNHead',
|
|
in_channels=1024,
|
|
in_index=2,
|
|
channels=256,
|
|
num_convs=1,
|
|
concat_input=False,
|
|
dropout_ratio=0.1,
|
|
num_classes=150,
|
|
norm_cfg=norm_cfg,
|
|
align_corners=False,
|
|
loss_decode=dict(
|
|
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
|
|
# model training and testing settings
|
|
train_cfg=dict(),
|
|
test_cfg=dict(mode='whole'))
|
|
# optimizer
|
|
optim_wrapper = dict(
|
|
_delete_=True,
|
|
type='OptimWrapper',
|
|
optimizer=dict(type='AdamW', lr=0.0001, weight_decay=0.0005),
|
|
clip_grad=dict(max_norm=1, norm_type=2))
|
|
# learning policy
|
|
param_scheduler = [
|
|
dict(
|
|
type='LinearLR', start_factor=0.001, by_epoch=False, begin=0,
|
|
end=1000),
|
|
dict(
|
|
type='MultiStepLR',
|
|
begin=1000,
|
|
end=80000,
|
|
milestones=[60000, 72000],
|
|
by_epoch=False,
|
|
)
|
|
]
|
|
# In K-Net implementation we use batch size 2 per GPU as default
|
|
train_dataloader = dict(batch_size=2, num_workers=2)
|
|
val_dataloader = dict(batch_size=1, num_workers=4)
|
|
test_dataloader = val_dataloader
|