[Refactor] add benchmark configs
parent
00e51990fb
commit
67716e2011
|
@ -0,0 +1,49 @@
|
|||
# dataset settings
|
||||
data_source = 'CIFAR10'
|
||||
dataset_type = 'SingleViewDataset'
|
||||
img_norm_cfg = dict(mean=[0.4914, 0.4822, 0.4465], std=[0.2023, 0.1994, 0.201])
|
||||
train_pipeline = [
|
||||
dict(type='RandomCrop', size=32, padding=4),
|
||||
dict(type='RandomHorizontalFlip'),
|
||||
]
|
||||
test_pipeline = []
|
||||
|
||||
# prefetch
|
||||
prefetch = False
|
||||
if not prefetch:
|
||||
train_pipeline.extend(
|
||||
[dict(type='ToTensor'),
|
||||
dict(type='Normalize', **img_norm_cfg)])
|
||||
test_pipeline.extend(
|
||||
[dict(type='ToTensor'),
|
||||
dict(type='Normalize', **img_norm_cfg)])
|
||||
|
||||
# dataset summary
|
||||
data = dict(
|
||||
imgs_per_gpu=128,
|
||||
workers_per_gpu=2,
|
||||
train=dict(
|
||||
type=dataset_type,
|
||||
data_source=dict(
|
||||
type=data_source,
|
||||
data_prefix='data/cifar10',
|
||||
),
|
||||
pipeline=train_pipeline,
|
||||
prefetch=prefetch),
|
||||
val=dict(
|
||||
type=dataset_type,
|
||||
data_source=dict(
|
||||
type=data_source,
|
||||
data_prefix='data/cifar10',
|
||||
),
|
||||
pipeline=test_pipeline,
|
||||
prefetch=prefetch),
|
||||
test=dict(
|
||||
type=dataset_type,
|
||||
data_source=dict(
|
||||
type=data_source,
|
||||
data_prefix='data/cifar10',
|
||||
),
|
||||
pipeline=test_pipeline,
|
||||
prefetch=prefetch))
|
||||
evaluation = dict(interval=10, topk=(1, 5))
|
|
@ -0,0 +1,46 @@
|
|||
# dataset settings
|
||||
data_source = 'ImageNet'
|
||||
dataset_type = 'SingleViewDataset'
|
||||
img_norm_cfg = dict(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
|
||||
train_pipeline = [
|
||||
dict(type='RandomResizedCrop', size=224),
|
||||
dict(type='RandomHorizontalFlip'),
|
||||
]
|
||||
test_pipeline = [
|
||||
dict(type='Resize', size=256),
|
||||
dict(type='CenterCrop', size=224),
|
||||
]
|
||||
|
||||
# prefetch
|
||||
prefetch = False
|
||||
if not prefetch:
|
||||
train_pipeline.extend(
|
||||
[dict(type='ToTensor'),
|
||||
dict(type='Normalize', **img_norm_cfg)])
|
||||
test_pipeline.extend(
|
||||
[dict(type='ToTensor'),
|
||||
dict(type='Normalize', **img_norm_cfg)])
|
||||
|
||||
# dataset summary
|
||||
data = dict(
|
||||
imgs_per_gpu=32, # total 32x8=256, 8GPU linear cls
|
||||
workers_per_gpu=4,
|
||||
train=dict(
|
||||
type=dataset_type,
|
||||
data_source=dict(
|
||||
type=data_source,
|
||||
data_prefix='data/imagenet/train',
|
||||
ann_file='data/imagenet/meta/train.txt',
|
||||
),
|
||||
pipeline=train_pipeline,
|
||||
prefetch=prefetch),
|
||||
val=dict(
|
||||
type=dataset_type,
|
||||
data_source=dict(
|
||||
type=data_source,
|
||||
data_prefix='data/imagenet/val',
|
||||
ann_file='data/imagenet/meta/val.txt',
|
||||
),
|
||||
pipeline=test_pipeline,
|
||||
prefetch=prefetch))
|
||||
evaluation = dict(interval=10, topk=(1, 5))
|
|
@ -0,0 +1,46 @@
|
|||
# dataset settings
|
||||
data_source = 'ImageNet'
|
||||
dataset_type = 'SingleViewDataset'
|
||||
img_norm_cfg = dict(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
|
||||
train_pipeline = [
|
||||
dict(type='RandomResizedCrop', size=224),
|
||||
dict(type='RandomHorizontalFlip'),
|
||||
]
|
||||
test_pipeline = [
|
||||
dict(type='Resize', size=256),
|
||||
dict(type='CenterCrop', size=224),
|
||||
]
|
||||
|
||||
# prefetch
|
||||
prefetch = False
|
||||
if not prefetch:
|
||||
train_pipeline.extend(
|
||||
[dict(type='ToTensor'),
|
||||
dict(type='Normalize', **img_norm_cfg)])
|
||||
test_pipeline.extend(
|
||||
[dict(type='ToTensor'),
|
||||
dict(type='Normalize', **img_norm_cfg)])
|
||||
|
||||
# dataset summary
|
||||
data = dict(
|
||||
imgs_per_gpu=32, # total 32x8=256, 8GPU linear cls
|
||||
workers_per_gpu=4,
|
||||
train=dict(
|
||||
type=dataset_type,
|
||||
data_source=dict(
|
||||
type=data_source,
|
||||
data_prefix='data/iNaturalist2018/train_val2018',
|
||||
ann_file='data/iNaturalist2018/meta/train.txt',
|
||||
),
|
||||
pipeline=train_pipeline,
|
||||
prefetch=prefetch),
|
||||
val=dict(
|
||||
type=dataset_type,
|
||||
data_source=dict(
|
||||
type=data_source,
|
||||
data_prefix='data/iNaturalist2018/train_val2018',
|
||||
ann_file='data/iNaturalist2018/meta/val.txt',
|
||||
),
|
||||
pipeline=test_pipeline,
|
||||
prefetch=prefetch))
|
||||
evaluation = dict(interval=10, topk=(1, 5))
|
|
@ -0,0 +1,48 @@
|
|||
# dataset settings
|
||||
data_source = 'ImageNet'
|
||||
dataset_type = 'SingleViewDataset'
|
||||
img_norm_cfg = dict(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
|
||||
train_pipeline = [
|
||||
dict(type='Resize', size=256),
|
||||
dict(type='CenterCrop', size=256),
|
||||
dict(type='RandomCrop', size=224),
|
||||
dict(type='RandomHorizontalFlip'),
|
||||
]
|
||||
test_pipeline = [
|
||||
dict(type='Resize', size=256),
|
||||
dict(type='CenterCrop', size=224),
|
||||
]
|
||||
|
||||
# prefetch
|
||||
prefetch = False
|
||||
if not prefetch:
|
||||
train_pipeline.extend(
|
||||
[dict(type='ToTensor'),
|
||||
dict(type='Normalize', **img_norm_cfg)])
|
||||
test_pipeline.extend(
|
||||
[dict(type='ToTensor'),
|
||||
dict(type='Normalize', **img_norm_cfg)])
|
||||
|
||||
# dataset summary
|
||||
data = dict(
|
||||
imgs_per_gpu=32, # total 32x8=256, 8GPU linear cls
|
||||
workers_per_gpu=4,
|
||||
train=dict(
|
||||
type=dataset_type,
|
||||
data_source=dict(
|
||||
type=data_source,
|
||||
data_prefix='data/Places205/train',
|
||||
ann_file='data/Places205/meta/train.txt',
|
||||
),
|
||||
pipeline=train_pipeline,
|
||||
prefetch=prefetch),
|
||||
val=dict(
|
||||
type=dataset_type,
|
||||
data_source=dict(
|
||||
type=data_source,
|
||||
data_prefix='data/Places205/val',
|
||||
ann_file='data/Places205/meta/val.txt',
|
||||
),
|
||||
pipeline=test_pipeline,
|
||||
prefetch=prefetch))
|
||||
evaluation = dict(interval=10, topk=(1, 5))
|
|
@ -0,0 +1,20 @@
|
|||
train_cfg = {}
|
||||
test_cfg = {}
|
||||
optimizer_config = dict() # grad_clip, coalesce, bucket_size_mb
|
||||
# yapf:disable
|
||||
log_config = dict(
|
||||
interval=50,
|
||||
hooks=[
|
||||
dict(type='TextLoggerHook'),
|
||||
# dict(type='TensorboardLoggerHook')
|
||||
])
|
||||
# yapf:enable
|
||||
|
||||
# runtime settings
|
||||
dist_params = dict(backend='nccl')
|
||||
cudnn_benchmark = True
|
||||
log_level = 'INFO'
|
||||
load_from = None
|
||||
resume_from = None
|
||||
workflow = [('train', 1)]
|
||||
persistent_workers = True
|
|
@ -0,0 +1,15 @@
|
|||
model = dict(
|
||||
type='Classification',
|
||||
backbone=dict(
|
||||
type='ResNet',
|
||||
depth=50,
|
||||
in_channels=3,
|
||||
num_stages=4,
|
||||
strides=(1, 2, 2, 2),
|
||||
dilations=(1, 1, 1, 1),
|
||||
out_indices=[4], # 0: conv-1, x: stage-x
|
||||
norm_cfg=dict(type='BN'),
|
||||
frozen_stages=-1),
|
||||
head=dict(
|
||||
type='ClsHead', with_avg_pool=True, in_channels=2048,
|
||||
num_classes=1000))
|
|
@ -0,0 +1,17 @@
|
|||
model = dict(
|
||||
type='Classification',
|
||||
backbone=dict(
|
||||
type='ResNet',
|
||||
depth=50,
|
||||
in_channels=3,
|
||||
out_indices=[0, 1, 2, 3, 4], # 0: conv-1, x: stage-x
|
||||
norm_cfg=dict(type='BN'),
|
||||
frozen_stages=-1),
|
||||
head=dict(
|
||||
type='MultiClsHead',
|
||||
pool_type='specified',
|
||||
in_indices=[0, 1, 2, 3, 4],
|
||||
with_last_layer_unpool=False,
|
||||
backbone='resnet50',
|
||||
norm_cfg=dict(type='SyncBN', momentum=0.1, affine=False),
|
||||
num_classes=1000))
|
|
@ -0,0 +1,8 @@
|
|||
# optimizer
|
||||
optimizer = dict(type='LARS', lr=1.6, momentum=0.9, weight_decay=0.)
|
||||
|
||||
# learning policy
|
||||
lr_config = dict(policy='CosineAnnealing', min_lr=0.)
|
||||
|
||||
# runtime settings
|
||||
runner = dict(type='EpochBasedRunner', max_epochs=90)
|
|
@ -0,0 +1,8 @@
|
|||
# optimizer
|
||||
optimizer = dict(type='SGD', lr=0.3, momentum=0.9, weight_decay=1e-6)
|
||||
|
||||
# learning policy
|
||||
lr_config = dict(policy='CosineAnnealing', min_lr=0.)
|
||||
|
||||
# runtime settings
|
||||
runner = dict(type='EpochBasedRunner', max_epochs=100)
|
|
@ -0,0 +1,8 @@
|
|||
# optimizer
|
||||
optimizer = dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=1e-4)
|
||||
|
||||
# learning policy
|
||||
lr_config = dict(policy='step', step=[60, 80])
|
||||
|
||||
# runtime settings
|
||||
runner = dict(type='EpochBasedRunner', max_epochs=100)
|
|
@ -0,0 +1,19 @@
|
|||
_base_ = [
|
||||
'../_base_/models/resnet50.py',
|
||||
'../_base_/datasets/cifar10.py',
|
||||
'../_base_/schedules/sgd_steplr-100e.py',
|
||||
'../_base_/default_runtime.py',
|
||||
]
|
||||
|
||||
# model settings
|
||||
model = dict(head=dict(num_classes=10))
|
||||
|
||||
# optimizer
|
||||
optimizer = dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=5e-4)
|
||||
|
||||
# learning policy
|
||||
lr_config = dict(policy='step', step=[150, 250])
|
||||
|
||||
# runtime settings
|
||||
runner = dict(type='EpochBasedRunner', max_epochs=350)
|
||||
checkpoint_config = dict(interval=50)
|
|
@ -0,0 +1,4 @@
|
|||
_base_ = 'resnet50_head1_4xb64-steplr1e-2-20e_in1k-10pct.py'
|
||||
|
||||
# model settings
|
||||
model = dict(with_sobel=True, backbone=dict(in_channels=2))
|
|
@ -0,0 +1,4 @@
|
|||
_base_ = 'resnet50_head1_4xb64-steplr1e-1-20e_in1k-10pct.py'
|
||||
|
||||
# optimizer
|
||||
optimizer = dict(paramwise_options={'\\Ahead.': dict(lr_mult=100)})
|
|
@ -0,0 +1,4 @@
|
|||
_base_ = 'resnet50_head1_4xb64-steplr1e-1-20e_in1k-10pct.py'
|
||||
|
||||
# optimizer
|
||||
optimizer = dict(lr=0.01, paramwise_options={'\\Ahead.': dict(lr_mult=100)})
|
|
@ -0,0 +1,4 @@
|
|||
_base_ = 'resnet50_head1_4xb64-steplr1e-1-20e_in1k-10pct.py'
|
||||
|
||||
# optimizer
|
||||
optimizer = dict(lr=0.001, paramwise_options={'\\Ahead.': dict(lr_mult=100)})
|
|
@ -0,0 +1,4 @@
|
|||
_base_ = 'resnet50_head1_4xb64-steplr1e-1-20e_in1k-10pct.py'
|
||||
|
||||
# optimizer
|
||||
optimizer = dict(paramwise_options={'\\Ahead.': dict(lr_mult=10)})
|
|
@ -0,0 +1,4 @@
|
|||
_base_ = 'resnet50_head1_4xb64-steplr1e-1-20e_in1k-10pct.py'
|
||||
|
||||
# optimizer
|
||||
optimizer = dict(lr=0.01, paramwise_options={'\\Ahead.': dict(lr_mult=10)})
|
|
@ -0,0 +1,4 @@
|
|||
_base_ = 'resnet50_head1_4xb64-steplr1e-1-20e_in1k-10pct.py'
|
||||
|
||||
# optimizer
|
||||
optimizer = dict(lr=0.001, paramwise_options={'\\Ahead.': dict(lr_mult=10)})
|
|
@ -0,0 +1,34 @@
|
|||
_base_ = [
|
||||
'../../_base_/models/resnet50.py',
|
||||
'../../_base_/datasets/imagenet.py',
|
||||
'../../_base_/schedules/sgd_steplr-100e.py',
|
||||
'../../_base_/default_runtime.py',
|
||||
]
|
||||
|
||||
# model settings
|
||||
model = dict(backbone=dict(norm_cfg=dict(type='SyncBN')))
|
||||
|
||||
# dataset settings
|
||||
data = dict(
|
||||
imgs_per_gpu=64, # total 64x4=256
|
||||
train=dict(
|
||||
data_source=dict(ann_file='data/imagenet/meta/train_10pct.txt')))
|
||||
|
||||
# optimizer
|
||||
optimizer = dict(
|
||||
type='SGD',
|
||||
lr=0.1,
|
||||
momentum=0.9,
|
||||
weight_decay=1e-4,
|
||||
paramwise_options={'\\Ahead.': dict(lr_mult=1)})
|
||||
|
||||
# learning policy
|
||||
lr_config = dict(policy='step', step=[12, 16], gamma=0.2)
|
||||
|
||||
# runtime settings
|
||||
runner = dict(type='EpochBasedRunner', max_epochs=20)
|
||||
checkpoint_config = dict(interval=10)
|
||||
log_config = dict(
|
||||
interval=10,
|
||||
hooks=[dict(type='TextLoggerHook'),
|
||||
dict(type='TensorboardLoggerHook')])
|
|
@ -0,0 +1,4 @@
|
|||
_base_ = 'resnet50_head1_4xb64-steplr1e-1-20e_in1k-10pct.py'
|
||||
|
||||
# optimizer
|
||||
optimizer = dict(lr=0.01)
|
|
@ -0,0 +1,4 @@
|
|||
_base_ = 'resnet50_head1_4xb64-steplr1e-1-20e_in1k-10pct.py'
|
||||
|
||||
# optimizer
|
||||
optimizer = dict(lr=0.001)
|
|
@ -0,0 +1,4 @@
|
|||
_base_ = 'resnet50_head1_4xb64-steplr1e-2-20e_in1k-1pct.py'
|
||||
|
||||
# model settings
|
||||
model = dict(with_sobel=True, backbone=dict(in_channels=2))
|
|
@ -0,0 +1,4 @@
|
|||
_base_ = 'resnet50_head1_4xb64-steplr1e-1-20e_in1k-1pct.py'
|
||||
|
||||
# optimizer
|
||||
optimizer = dict(paramwise_options={'\\Ahead.': dict(lr_mult=100)})
|
|
@ -0,0 +1,4 @@
|
|||
_base_ = 'resnet50_head1_4xb64-steplr1e-1-20e_in1k-1pct.py'
|
||||
|
||||
# optimizer
|
||||
optimizer = dict(lr=0.01, paramwise_options={'\\Ahead.': dict(lr_mult=100)})
|
|
@ -0,0 +1,4 @@
|
|||
_base_ = 'resnet50_head1_4xb64-steplr1e-1-20e_in1k-1pct.py'
|
||||
|
||||
# optimizer
|
||||
optimizer = dict(lr=0.001, paramwise_options={'\\Ahead.': dict(lr_mult=100)})
|
|
@ -0,0 +1,4 @@
|
|||
_base_ = 'resnet50_head1_4xb64-steplr1e-1-20e_in1k-1pct.py'
|
||||
|
||||
# optimizer
|
||||
optimizer = dict(paramwise_options={'\\Ahead.': dict(lr_mult=10)})
|
|
@ -0,0 +1,4 @@
|
|||
_base_ = 'resnet50_head1_4xb64-steplr1e-1-20e_in1k-1pct.py'
|
||||
|
||||
# optimizer
|
||||
optimizer = dict(lr=0.01, paramwise_options={'\\Ahead.': dict(lr_mult=10)})
|
|
@ -0,0 +1,4 @@
|
|||
_base_ = 'resnet50_head1_4xb64-steplr1e-1-20e_in1k-1pct.py'
|
||||
|
||||
# optimizer
|
||||
optimizer = dict(lr=0.001, paramwise_options={'\\Ahead.': dict(lr_mult=10)})
|
|
@ -0,0 +1,34 @@
|
|||
_base_ = [
|
||||
'../../_base_/models/resnet50.py',
|
||||
'../../_base_/datasets/imagenet.py',
|
||||
'../../_base_/schedules/sgd_steplr-100e.py',
|
||||
'../../_base_/default_runtime.py',
|
||||
]
|
||||
|
||||
# model settings
|
||||
model = dict(backbone=dict(norm_cfg=dict(type='SyncBN')))
|
||||
|
||||
# dataset settings
|
||||
data = dict(
|
||||
imgs_per_gpu=64, # total 64x4=256
|
||||
train=dict(
|
||||
data_source=dict(ann_file='data/imagenet/meta/train_1percent.txt')))
|
||||
|
||||
# optimizer
|
||||
optimizer = dict(
|
||||
type='SGD',
|
||||
lr=0.1,
|
||||
momentum=0.9,
|
||||
weight_decay=5e-4,
|
||||
paramwise_options={'\\Ahead.': dict(lr_mult=1)})
|
||||
|
||||
# learning policy
|
||||
lr_config = dict(policy='step', step=[12, 16], gamma=0.2)
|
||||
|
||||
# runtime settings
|
||||
runner = dict(type='EpochBasedRunner', max_epochs=20)
|
||||
checkpoint_config = dict(interval=10)
|
||||
log_config = dict(
|
||||
interval=10,
|
||||
hooks=[dict(type='TextLoggerHook'),
|
||||
dict(type='TensorboardLoggerHook')])
|
|
@ -0,0 +1,4 @@
|
|||
_base_ = 'resnet50_head1_4xb64-steplr1e-1-20e_in1k-1pct.py'
|
||||
|
||||
# optimizer
|
||||
optimizer = dict(lr=0.01)
|
|
@ -0,0 +1,4 @@
|
|||
_base_ = 'resnet50_head1_4xb64-steplr1e-1-20e_in1k-1pct.py'
|
||||
|
||||
# optimizer
|
||||
optimizer = dict(lr=0.001)
|
|
@ -0,0 +1,19 @@
|
|||
_base_ = [
|
||||
'../_base_/models/resnet50.py',
|
||||
'../_base_/datasets/imagenet.py',
|
||||
'../_base_/schedules/sgd_steplr-100e.py',
|
||||
'../_base_/default_runtime.py',
|
||||
]
|
||||
|
||||
# model settings
|
||||
model = dict(backbone=dict(norm_cfg=dict(type='SyncBN')))
|
||||
|
||||
# learning policy
|
||||
lr_config = dict(step=[30, 60, 90])
|
||||
|
||||
# runtime settings
|
||||
runner = dict(type='EpochBasedRunner', max_epochs=90)
|
||||
# the max_keep_ckpts controls the max number of ckpt file in your work_dirs
|
||||
# if it is 3, when CheckpointHook (in mmcv) saves the 4th ckpt
|
||||
# it will remove the oldest one to keep the number of total ckpts as 3
|
||||
checkpoint_config = dict(interval=10, max_keep_ckpts=3, out_dir='s3://results')
|
|
@ -0,0 +1,4 @@
|
|||
_base_ = 'resnet50_8xb32-steplr-100e_in1k.py'
|
||||
|
||||
# model settings
|
||||
model = dict(with_sobel=True, backbone=dict(in_channels=2, frozen_stages=4))
|
|
@ -0,0 +1,4 @@
|
|||
_base_ = 'resnet50_mhead_8xb32-steplr-90e_in1k.py'
|
||||
|
||||
# model settings
|
||||
model = dict(with_sobel=True, backbone=dict(in_channels=2, frozen_stages=4))
|
|
@ -0,0 +1,15 @@
|
|||
_base_ = [
|
||||
'../_base_/models/resnet50.py',
|
||||
'../_base_/datasets/imagenet.py',
|
||||
'../_base_/schedules/sgd_coslr-100e.py',
|
||||
'../_base_/default_runtime.py',
|
||||
]
|
||||
|
||||
model = dict(backbone=dict(frozen_stages=4))
|
||||
|
||||
# swav setting
|
||||
# runtime settings
|
||||
# the max_keep_ckpts controls the max number of ckpt file in your work_dirs
|
||||
# if it is 3, when CheckpointHook (in mmcv) saves the 4th ckpt
|
||||
# it will remove the oldest one to keep the number of total ckpts as 3
|
||||
checkpoint_config = dict(interval=10, max_keep_ckpts=3)
|
|
@ -0,0 +1,20 @@
|
|||
_base_ = [
|
||||
'../_base_/models/resnet50.py',
|
||||
'../_base_/datasets/imagenet.py',
|
||||
'../_base_/schedules/sgd_steplr-100e.py',
|
||||
'../_base_/default_runtime.py',
|
||||
]
|
||||
|
||||
model = dict(backbone=dict(frozen_stages=4))
|
||||
|
||||
evaluation = dict(interval=1, topk=(1, 5))
|
||||
|
||||
# moco setting
|
||||
# optimizer
|
||||
optimizer = dict(type='SGD', lr=30., momentum=0.9, weight_decay=0.)
|
||||
|
||||
# runtime settings
|
||||
# the max_keep_ckpts controls the max number of ckpt file in your work_dirs
|
||||
# if it is 3, when CheckpointHook (in mmcv) saves the 4th ckpt
|
||||
# it will remove the oldest one to keep the number of total ckpts as 3
|
||||
checkpoint_config = dict(interval=10, max_keep_ckpts=3)
|
|
@ -0,0 +1,18 @@
|
|||
_base_ = [
|
||||
'../_base_/models/resnet50.py',
|
||||
'../_base_/datasets/imagenet.py',
|
||||
'../_base_/schedules/lars_coslr-90e.py',
|
||||
'../_base_/default_runtime.py',
|
||||
]
|
||||
|
||||
model = dict(backbone=dict(frozen_stages=4))
|
||||
|
||||
# dataset summary
|
||||
data = dict(imgs_per_gpu=512) # total 512*8=4096, 8GPU linear cls
|
||||
|
||||
# simsiam setting
|
||||
# runtime settings
|
||||
# the max_keep_ckpts controls the max number of ckpt file in your work_dirs
|
||||
# if it is 3, when CheckpointHook (in mmcv) saves the 4th ckpt
|
||||
# it will remove the oldest one to keep the number of total ckpts as 3
|
||||
checkpoint_config = dict(interval=10, max_keep_ckpts=3)
|
|
@ -0,0 +1,48 @@
|
|||
_base_ = [
|
||||
'../_base_/models/resnet50_multihead.py',
|
||||
'../_base_/datasets/imagenet.py',
|
||||
'../_base_/schedules/sgd_steplr-100e.py',
|
||||
'../_base_/default_runtime.py',
|
||||
]
|
||||
|
||||
model = dict(backbone=dict(frozen_stages=4))
|
||||
|
||||
# dataset settings
|
||||
img_norm_cfg = dict(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
|
||||
train_pipeline = [
|
||||
dict(type='RandomResizedCrop', size=224),
|
||||
dict(type='RandomHorizontalFlip'),
|
||||
dict(
|
||||
type='ColorJitter',
|
||||
brightness=0.4,
|
||||
contrast=0.4,
|
||||
saturation=0.4,
|
||||
hue=0.),
|
||||
dict(type='ToTensor'),
|
||||
dict(type='Lighting'),
|
||||
dict(type='Normalize', **img_norm_cfg),
|
||||
]
|
||||
test_pipeline = [
|
||||
dict(type='Resize', size=256),
|
||||
dict(type='CenterCrop', size=224),
|
||||
dict(type='ToTensor'),
|
||||
dict(type='Normalize', **img_norm_cfg),
|
||||
]
|
||||
data = dict(
|
||||
train=dict(pipeline=train_pipeline), val=dict(pipeline=test_pipeline))
|
||||
|
||||
# optimizer
|
||||
optimizer = dict(
|
||||
type='SGD',
|
||||
lr=0.01,
|
||||
momentum=0.9,
|
||||
weight_decay=1e-4,
|
||||
paramwise_options=dict(norm_decay_mult=0.),
|
||||
nesterov=True)
|
||||
|
||||
# learning policy
|
||||
lr_config = dict(policy='step', step=[30, 60, 90])
|
||||
|
||||
# runtime settings
|
||||
runner = dict(type='EpochBasedRunner', max_epochs=90)
|
||||
checkpoint_config = dict(interval=10)
|
|
@ -0,0 +1,4 @@
|
|||
_base_ = 'resnet50_mhead_8xb32-steplr-84e_inat18.py'
|
||||
|
||||
# model settings
|
||||
model = dict(with_sobel=True, backbone=dict(in_channels=2))
|
|
@ -0,0 +1,32 @@
|
|||
_base_ = [
|
||||
'../_base_/models/resnet50_multihead.py',
|
||||
'../_base_/datasets/inaturalist2018.py',
|
||||
'../_base_/schedules/sgd_steplr-100e.py',
|
||||
'../_base_/default_runtime.py',
|
||||
]
|
||||
|
||||
# model settings
|
||||
model = dict(
|
||||
backbone=dict(frozen_stages=4),
|
||||
head=dict(
|
||||
norm_cfg=dict(type='SyncBN', momentum=0.1, affine=False),
|
||||
num_classes=8142))
|
||||
|
||||
# optimizer
|
||||
optimizer = dict(
|
||||
type='SGD',
|
||||
lr=0.01,
|
||||
momentum=0.9,
|
||||
weight_decay=1e-4,
|
||||
paramwise_options=dict(norm_decay_mult=0.),
|
||||
nesterov=True)
|
||||
|
||||
# learning policy
|
||||
lr_config = dict(policy='step', step=[24, 48, 72])
|
||||
|
||||
# runtime settings
|
||||
runner = dict(type='EpochBasedRunner', max_epochs=84)
|
||||
# the max_keep_ckpts controls the max number of ckpt file in your work_dirs
|
||||
# if it is 3, when CheckpointHook (in mmcv) saves the 4th ckpt
|
||||
# it will remove the oldest one to keep the number of total ckpts as 3
|
||||
checkpoint_config = dict(interval=1, max_keep_ckpts=3)
|
|
@ -0,0 +1,4 @@
|
|||
_base_ = 'resnet50_mhead_8xb32-steplr-28e_places205.py'
|
||||
|
||||
# model settings
|
||||
model = dict(with_sobel=True, backbone=dict(in_channels=2))
|
|
@ -0,0 +1,32 @@
|
|||
_base_ = [
|
||||
'../_base_/models/resnet50_multihead.py',
|
||||
'../_base_/datasets/places205.py',
|
||||
'../_base_/schedules/sgd_steplr-100e.py',
|
||||
'../_base_/default_runtime.py',
|
||||
]
|
||||
|
||||
# model settings
|
||||
model = dict(
|
||||
backbone=dict(frozen_stages=4),
|
||||
head=dict(
|
||||
norm_cfg=dict(type='SyncBN', momentum=0.1, affine=False),
|
||||
num_classes=205))
|
||||
|
||||
# optimizer
|
||||
optimizer = dict(
|
||||
type='SGD',
|
||||
lr=0.01,
|
||||
momentum=0.9,
|
||||
weight_decay=1e-4,
|
||||
paramwise_options=dict(norm_decay_mult=0.),
|
||||
nesterov=True)
|
||||
|
||||
# learning policy
|
||||
lr_config = dict(policy='step', step=[7, 14, 21])
|
||||
|
||||
# runtime settings
|
||||
runner = dict(type='EpochBasedRunner', max_epochs=28)
|
||||
# the max_keep_ckpts controls the max number of ckpt file in your work_dirs
|
||||
# if it is 3, when CheckpointHook (in mmcv) saves the 4th ckpt
|
||||
# it will remove the oldest one to keep the number of total ckpts as 3
|
||||
checkpoint_config = dict(interval=1, max_keep_ckpts=3)
|
|
@ -0,0 +1,22 @@
|
|||
data_source = 'ImageList'
|
||||
dataset_type = 'SingleViewDataset'
|
||||
split_at = [5011]
|
||||
split_name = ['voc07_trainval', 'voc07_test']
|
||||
img_norm_cfg = dict(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
|
||||
|
||||
data = dict(
|
||||
imgs_per_gpu=32,
|
||||
workers_per_gpu=4,
|
||||
extract=dict(
|
||||
type=dataset_type,
|
||||
data_source=dict(
|
||||
type=data_source,
|
||||
data_prefix='data/VOCdevkit/VOC2007/JPEGImages',
|
||||
ann_file='data/VOCdevkit/VOC2007/Lists/trainvaltest.txt',
|
||||
),
|
||||
pipeline=[
|
||||
dict(type='Resize', size=256),
|
||||
dict(type='Resize', size=(224, 224)),
|
||||
dict(type='ToTensor'),
|
||||
dict(type='Normalize', **img_norm_cfg),
|
||||
]))
|
|
@ -0,0 +1,21 @@
|
|||
data_source = 'ImageNet'
|
||||
dataset_type = 'SingleViewDataset'
|
||||
name = 'imagenet_val'
|
||||
img_norm_cfg = dict(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
|
||||
|
||||
data = dict(
|
||||
imgs_per_gpu=8,
|
||||
workers_per_gpu=4,
|
||||
extract=dict(
|
||||
type='SingleViewDataset',
|
||||
data_source=dict(
|
||||
type=data_source,
|
||||
data_prefix='data/imagenet/val',
|
||||
ann_file='data/imagenet/meta/val.txt',
|
||||
),
|
||||
pipeline=[
|
||||
dict(type='Resize', size=256),
|
||||
dict(type='CenterCrop', size=224),
|
||||
dict(type='ToTensor'),
|
||||
dict(type='Normalize', **img_norm_cfg),
|
||||
]))
|
|
@ -0,0 +1,15 @@
|
|||
_BASE_: "Base-RCNN-FPN.yaml"
|
||||
MODEL:
|
||||
KEYPOINT_ON: True
|
||||
ROI_HEADS:
|
||||
NUM_CLASSES: 1
|
||||
ROI_BOX_HEAD:
|
||||
SMOOTH_L1_BETA: 0.5 # Keypoint AP degrades (though box AP improves) when using plain L1 loss
|
||||
RPN:
|
||||
# Detectron1 uses 2000 proposals per-batch, but this option is per-image in detectron2.
|
||||
# 1000 proposals per-image is found to hurt box AP.
|
||||
# Therefore we increase it to 1500 per-image.
|
||||
POST_NMS_TOPK_TRAIN: 1500
|
||||
DATASETS:
|
||||
TRAIN: ("keypoints_coco_2017_train",)
|
||||
TEST: ("keypoints_coco_2017_val",)
|
|
@ -0,0 +1,17 @@
|
|||
MODEL:
|
||||
META_ARCHITECTURE: "GeneralizedRCNN"
|
||||
RPN:
|
||||
PRE_NMS_TOPK_TEST: 6000
|
||||
POST_NMS_TOPK_TEST: 1000
|
||||
ROI_HEADS:
|
||||
NAME: "Res5ROIHeadsExtraNorm"
|
||||
BACKBONE:
|
||||
FREEZE_AT: 0
|
||||
RESNETS:
|
||||
NORM: "SyncBN"
|
||||
TEST:
|
||||
PRECISE_BN:
|
||||
ENABLED: True
|
||||
SOLVER:
|
||||
IMS_PER_BATCH: 16
|
||||
BASE_LR: 0.02
|
|
@ -0,0 +1,42 @@
|
|||
MODEL:
|
||||
META_ARCHITECTURE: "GeneralizedRCNN"
|
||||
BACKBONE:
|
||||
NAME: "build_resnet_fpn_backbone"
|
||||
RESNETS:
|
||||
OUT_FEATURES: ["res2", "res3", "res4", "res5"]
|
||||
FPN:
|
||||
IN_FEATURES: ["res2", "res3", "res4", "res5"]
|
||||
ANCHOR_GENERATOR:
|
||||
SIZES: [[32], [64], [128], [256], [512]] # One size for each in feature map
|
||||
ASPECT_RATIOS: [[0.5, 1.0, 2.0]] # Three aspect ratios (same for all in feature maps)
|
||||
RPN:
|
||||
IN_FEATURES: ["p2", "p3", "p4", "p5", "p6"]
|
||||
PRE_NMS_TOPK_TRAIN: 2000 # Per FPN level
|
||||
PRE_NMS_TOPK_TEST: 1000 # Per FPN level
|
||||
# Detectron1 uses 2000 proposals per-batch,
|
||||
# (See "modeling/rpn/rpn_outputs.py" for details of this legacy issue)
|
||||
# which is approximately 1000 proposals per-image since the default batch size for FPN is 2.
|
||||
POST_NMS_TOPK_TRAIN: 1000
|
||||
POST_NMS_TOPK_TEST: 1000
|
||||
ROI_HEADS:
|
||||
NAME: "StandardROIHeads"
|
||||
IN_FEATURES: ["p2", "p3", "p4", "p5"]
|
||||
ROI_BOX_HEAD:
|
||||
NAME: "FastRCNNConvFCHead"
|
||||
NUM_FC: 2
|
||||
POOLER_RESOLUTION: 7
|
||||
ROI_MASK_HEAD:
|
||||
NAME: "MaskRCNNConvUpsampleHead"
|
||||
NUM_CONV: 4
|
||||
POOLER_RESOLUTION: 14
|
||||
DATASETS:
|
||||
TRAIN: ("coco_2017_train",)
|
||||
TEST: ("coco_2017_val",)
|
||||
SOLVER:
|
||||
IMS_PER_BATCH: 16
|
||||
BASE_LR: 0.02
|
||||
STEPS: (60000, 80000)
|
||||
MAX_ITER: 90000
|
||||
INPUT:
|
||||
MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
|
||||
VERSION: 2
|
|
@ -0,0 +1,25 @@
|
|||
MODEL:
|
||||
META_ARCHITECTURE: "RetinaNet"
|
||||
BACKBONE:
|
||||
NAME: "build_retinanet_resnet_fpn_backbone"
|
||||
RESNETS:
|
||||
OUT_FEATURES: ["res3", "res4", "res5"]
|
||||
ANCHOR_GENERATOR:
|
||||
SIZES: !!python/object/apply:eval ["[[x, x * 2**(1.0/3), x * 2**(2.0/3) ] for x in [32, 64, 128, 256, 512 ]]"]
|
||||
FPN:
|
||||
IN_FEATURES: ["res3", "res4", "res5"]
|
||||
RETINANET:
|
||||
IOU_THRESHOLDS: [0.4, 0.5]
|
||||
IOU_LABELS: [0, -1, 1]
|
||||
SMOOTH_L1_LOSS_BETA: 0.0
|
||||
DATASETS:
|
||||
TRAIN: ("coco_2017_train",)
|
||||
TEST: ("coco_2017_val",)
|
||||
SOLVER:
|
||||
IMS_PER_BATCH: 16
|
||||
BASE_LR: 0.01 # Note that RetinaNet uses a different default learning rate
|
||||
STEPS: (60000, 80000)
|
||||
MAX_ITER: 90000
|
||||
INPUT:
|
||||
MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
|
||||
VERSION: 2
|
|
@ -0,0 +1,30 @@
|
|||
_BASE_: "../Base-RCNN-FPN.yaml"
|
||||
MODEL:
|
||||
WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
|
||||
MASK_ON: True
|
||||
ROI_HEADS:
|
||||
NUM_CLASSES: 8
|
||||
BACKBONE:
|
||||
FREEZE_AT: 0
|
||||
RESNETS:
|
||||
DEPTH: 50
|
||||
NORM: "SyncBN"
|
||||
FPN:
|
||||
NORM: "SyncBN"
|
||||
INPUT:
|
||||
MIN_SIZE_TRAIN: (800, 832, 864, 896, 928, 960, 992, 1024)
|
||||
MIN_SIZE_TRAIN_SAMPLING: "choice"
|
||||
MIN_SIZE_TEST: 1024
|
||||
MAX_SIZE_TRAIN: 2048
|
||||
MAX_SIZE_TEST: 2048
|
||||
DATASETS:
|
||||
TRAIN: ("cityscapes_fine_instance_seg_train",)
|
||||
TEST: ("cityscapes_fine_instance_seg_val",)
|
||||
SOLVER:
|
||||
BASE_LR: 0.01
|
||||
STEPS: (18000,)
|
||||
MAX_ITER: 24000
|
||||
IMS_PER_BATCH: 8
|
||||
TEST:
|
||||
PRECISE_BN:
|
||||
ENABLED: True
|
|
@ -0,0 +1,9 @@
|
|||
_BASE_: "mask_rcnn_R_50_FPN.yaml"
|
||||
MODEL:
|
||||
PIXEL_MEAN: [123.675, 116.280, 103.530]
|
||||
PIXEL_STD: [58.395, 57.120, 57.375]
|
||||
WEIGHTS: "See Instructions"
|
||||
RESNETS:
|
||||
STRIDE_IN_1X1: False
|
||||
INPUT:
|
||||
FORMAT: "RGB"
|
|
@ -0,0 +1,4 @@
|
|||
_BASE_: "coco_R_50_C4_2x.yaml"
|
||||
SOLVER:
|
||||
STEPS: (60000, 80000)
|
||||
MAX_ITER: 90000
|
|
@ -0,0 +1,4 @@
|
|||
_BASE_: "coco_R_50_C4_2x_moco.yaml"
|
||||
SOLVER:
|
||||
STEPS: (60000, 80000)
|
||||
MAX_ITER: 90000
|
|
@ -0,0 +1,13 @@
|
|||
_BASE_: "Base-RCNN-C4-BN.yaml"
|
||||
MODEL:
|
||||
MASK_ON: True
|
||||
WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
|
||||
INPUT:
|
||||
MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
|
||||
MIN_SIZE_TEST: 800
|
||||
DATASETS:
|
||||
TRAIN: ("coco_2017_train",)
|
||||
TEST: ("coco_2017_val",)
|
||||
SOLVER:
|
||||
STEPS: (120000, 160000)
|
||||
MAX_ITER: 180000
|
|
@ -0,0 +1,10 @@
|
|||
_BASE_: "coco_R_50_C4_2x.yaml"
|
||||
MODEL:
|
||||
PIXEL_MEAN: [123.675, 116.280, 103.530]
|
||||
PIXEL_STD: [58.395, 57.120, 57.375]
|
||||
WEIGHTS: "See Instructions"
|
||||
RESNETS:
|
||||
STRIDE_IN_1X1: False
|
||||
INPUT:
|
||||
MAX_SIZE_TRAIN: 1200
|
||||
FORMAT: "RGB"
|
|
@ -0,0 +1,17 @@
|
|||
_BASE_: "Base-RCNN-FPN.yaml"
|
||||
MODEL:
|
||||
MASK_ON: True
|
||||
WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
|
||||
BACKBONE:
|
||||
FREEZE_AT: 0
|
||||
RESNETS:
|
||||
DEPTH: 50
|
||||
NORM: "SyncBN"
|
||||
FPN:
|
||||
NORM: "SyncBN"
|
||||
TEST:
|
||||
PRECISE_BN:
|
||||
ENABLED: True
|
||||
SOLVER:
|
||||
STEPS: (60000, 80000)
|
||||
MAX_ITER: 90000
|
|
@ -0,0 +1,9 @@
|
|||
_BASE_: "coco_R_50_FPN_1x.yaml"
|
||||
MODEL:
|
||||
PIXEL_MEAN: [123.675, 116.280, 103.530]
|
||||
PIXEL_STD: [58.395, 57.120, 57.375]
|
||||
WEIGHTS: "See Instructions"
|
||||
RESNETS:
|
||||
STRIDE_IN_1X1: False
|
||||
INPUT:
|
||||
FORMAT: "RGB"
|
|
@ -0,0 +1,4 @@
|
|||
_BASE_: "coco_R_50_FPN_1x.yaml"
|
||||
SOLVER:
|
||||
STEPS: (120000, 160000)
|
||||
MAX_ITER: 180000
|
|
@ -0,0 +1,4 @@
|
|||
_BASE_: "coco_R_50_FPN_1x_moco.yaml"
|
||||
SOLVER:
|
||||
STEPS: (120000, 160000)
|
||||
MAX_ITER: 180000
|
|
@ -0,0 +1,13 @@
|
|||
_BASE_: "Base-RetinaNet.yaml"
|
||||
MODEL:
|
||||
WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
|
||||
BACKBONE:
|
||||
FREEZE_AT: 0
|
||||
RESNETS:
|
||||
DEPTH: 50
|
||||
NORM: "SyncBN"
|
||||
FPN:
|
||||
NORM: "SyncBN"
|
||||
TEST:
|
||||
PRECISE_BN:
|
||||
ENABLED: True
|
|
@ -0,0 +1,9 @@
|
|||
_BASE_: "coco_R_50_RetinaNet_1x.yaml"
|
||||
MODEL:
|
||||
PIXEL_MEAN: [123.675, 116.280, 103.530]
|
||||
PIXEL_STD: [58.395, 57.120, 57.375]
|
||||
WEIGHTS: "See Instructions"
|
||||
RESNETS:
|
||||
STRIDE_IN_1X1: False
|
||||
INPUT:
|
||||
FORMAT: "RGB"
|
|
@ -0,0 +1,4 @@
|
|||
_BASE_: "coco_R_50_RetinaNet_1x.yaml"
|
||||
SOLVER:
|
||||
STEPS: (120000, 160000)
|
||||
MAX_ITER: 180000
|
|
@ -0,0 +1,4 @@
|
|||
_BASE_: "coco_R_50_RetinaNet_1x_moco.yaml"
|
||||
SOLVER:
|
||||
STEPS: (120000, 160000)
|
||||
MAX_ITER: 180000
|
|
@ -0,0 +1,16 @@
|
|||
_BASE_: "Base-Keypoint-RCNN-FPN.yaml"
|
||||
MODEL:
|
||||
WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
|
||||
BACKBONE:
|
||||
FREEZE_AT: 0
|
||||
RESNETS:
|
||||
DEPTH: 50
|
||||
NORM: "SyncBN"
|
||||
FPN:
|
||||
NORM: "SyncBN"
|
||||
TEST:
|
||||
PRECISE_BN:
|
||||
ENABLED: True
|
||||
SOLVER:
|
||||
STEPS: (120000, 160000)
|
||||
MAX_ITER: 180000
|
|
@ -0,0 +1,9 @@
|
|||
_BASE_: "keypoint_rcnn_R_50_FPN_2x.yaml"
|
||||
MODEL:
|
||||
PIXEL_MEAN: [123.675, 116.280, 103.530]
|
||||
PIXEL_STD: [58.395, 57.120, 57.375]
|
||||
WEIGHTS: "See Instructions"
|
||||
RESNETS:
|
||||
STRIDE_IN_1X1: False
|
||||
INPUT:
|
||||
FORMAT: "RGB"
|
|
@ -0,0 +1,16 @@
|
|||
_BASE_: "Base-RCNN-C4-BN.yaml"
|
||||
MODEL:
|
||||
MASK_ON: False
|
||||
WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
|
||||
ROI_HEADS:
|
||||
NUM_CLASSES: 20
|
||||
INPUT:
|
||||
MIN_SIZE_TRAIN: (480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800)
|
||||
MIN_SIZE_TEST: 800
|
||||
DATASETS:
|
||||
TRAIN: ('voc_2007_trainval', 'voc_2012_trainval')
|
||||
TEST: ('voc_2007_test',)
|
||||
SOLVER:
|
||||
STEPS: (18000, 22000)
|
||||
MAX_ITER: 24000
|
||||
WARMUP_ITERS: 100
|
|
@ -0,0 +1,9 @@
|
|||
_BASE_: "pascal_voc_R_50_C4_24k.yaml"
|
||||
MODEL:
|
||||
PIXEL_MEAN: [123.675, 116.280, 103.530]
|
||||
PIXEL_STD: [58.395, 57.120, 57.375]
|
||||
WEIGHTS: "See Instructions"
|
||||
RESNETS:
|
||||
STRIDE_IN_1X1: False
|
||||
INPUT:
|
||||
FORMAT: "RGB"
|
|
@ -0,0 +1,49 @@
|
|||
# dataset settings
|
||||
dataset_type = 'CocoDataset'
|
||||
data_root = 'data/coco/'
|
||||
img_norm_cfg = dict(
|
||||
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
|
||||
train_pipeline = [
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
|
||||
dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
|
||||
dict(type='RandomFlip', flip_ratio=0.5),
|
||||
dict(type='Normalize', **img_norm_cfg),
|
||||
dict(type='Pad', size_divisor=32),
|
||||
dict(type='DefaultFormatBundle'),
|
||||
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),
|
||||
]
|
||||
test_pipeline = [
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(
|
||||
type='MultiScaleFlipAug',
|
||||
img_scale=(1333, 800),
|
||||
flip=False,
|
||||
transforms=[
|
||||
dict(type='Resize', keep_ratio=True),
|
||||
dict(type='RandomFlip'),
|
||||
dict(type='Normalize', **img_norm_cfg),
|
||||
dict(type='Pad', size_divisor=32),
|
||||
dict(type='ImageToTensor', keys=['img']),
|
||||
dict(type='Collect', keys=['img']),
|
||||
])
|
||||
]
|
||||
data = dict(
|
||||
samples_per_gpu=2,
|
||||
workers_per_gpu=2,
|
||||
train=dict(
|
||||
type=dataset_type,
|
||||
ann_file=data_root + 'annotations/instances_train2017.json',
|
||||
img_prefix=data_root + 'train2017/',
|
||||
pipeline=train_pipeline),
|
||||
val=dict(
|
||||
type=dataset_type,
|
||||
ann_file=data_root + 'annotations/instances_val2017.json',
|
||||
img_prefix=data_root + 'val2017/',
|
||||
pipeline=test_pipeline),
|
||||
test=dict(
|
||||
type=dataset_type,
|
||||
ann_file=data_root + 'annotations/instances_val2017.json',
|
||||
img_prefix=data_root + 'val2017/',
|
||||
pipeline=test_pipeline))
|
||||
evaluation = dict(metric=['bbox', 'segm'])
|
|
@ -0,0 +1,55 @@
|
|||
# dataset settings
|
||||
dataset_type = 'VOCDataset'
|
||||
data_root = 'data/VOCdevkit/'
|
||||
img_norm_cfg = dict(
|
||||
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
|
||||
train_pipeline = [
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(type='LoadAnnotations', with_bbox=True),
|
||||
dict(type='Resize', img_scale=(1000, 600), keep_ratio=True),
|
||||
dict(type='RandomFlip', flip_ratio=0.5),
|
||||
dict(type='Normalize', **img_norm_cfg),
|
||||
dict(type='Pad', size_divisor=32),
|
||||
dict(type='DefaultFormatBundle'),
|
||||
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),
|
||||
]
|
||||
test_pipeline = [
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(
|
||||
type='MultiScaleFlipAug',
|
||||
img_scale=(1000, 600),
|
||||
flip=False,
|
||||
transforms=[
|
||||
dict(type='Resize', keep_ratio=True),
|
||||
dict(type='RandomFlip'),
|
||||
dict(type='Normalize', **img_norm_cfg),
|
||||
dict(type='Pad', size_divisor=32),
|
||||
dict(type='ImageToTensor', keys=['img']),
|
||||
dict(type='Collect', keys=['img']),
|
||||
])
|
||||
]
|
||||
data = dict(
|
||||
samples_per_gpu=2,
|
||||
workers_per_gpu=2,
|
||||
train=dict(
|
||||
type='RepeatDataset',
|
||||
times=3,
|
||||
dataset=dict(
|
||||
type=dataset_type,
|
||||
ann_file=[
|
||||
data_root + 'VOC2007/ImageSets/Main/trainval.txt',
|
||||
data_root + 'VOC2012/ImageSets/Main/trainval.txt'
|
||||
],
|
||||
img_prefix=[data_root + 'VOC2007/', data_root + 'VOC2012/'],
|
||||
pipeline=train_pipeline)),
|
||||
val=dict(
|
||||
type=dataset_type,
|
||||
ann_file=data_root + 'VOC2007/ImageSets/Main/test.txt',
|
||||
img_prefix=data_root + 'VOC2007/',
|
||||
pipeline=test_pipeline),
|
||||
test=dict(
|
||||
type=dataset_type,
|
||||
ann_file=data_root + 'VOC2007/ImageSets/Main/test.txt',
|
||||
img_prefix=data_root + 'VOC2007/',
|
||||
pipeline=test_pipeline))
|
||||
evaluation = dict(interval=1, metric='mAP')
|
|
@ -0,0 +1,16 @@
|
|||
checkpoint_config = dict(interval=1)
|
||||
# yapf:disable
|
||||
log_config = dict(
|
||||
interval=50,
|
||||
hooks=[
|
||||
dict(type='TextLoggerHook'),
|
||||
# dict(type='TensorboardLoggerHook')
|
||||
])
|
||||
# yapf:enable
|
||||
custom_hooks = [dict(type='NumClassCheckHook')]
|
||||
|
||||
dist_params = dict(backend='nccl')
|
||||
log_level = 'INFO'
|
||||
load_from = None
|
||||
resume_from = None
|
||||
workflow = [('train', 1)]
|
|
@ -0,0 +1,112 @@
|
|||
# model settings
|
||||
norm_cfg = dict(type='BN', requires_grad=False)
|
||||
model = dict(
|
||||
type='FasterRCNN',
|
||||
backbone=dict(
|
||||
type='ResNet',
|
||||
depth=50,
|
||||
num_stages=3,
|
||||
strides=(1, 2, 2),
|
||||
dilations=(1, 1, 1),
|
||||
out_indices=(2, ),
|
||||
frozen_stages=1,
|
||||
norm_cfg=norm_cfg,
|
||||
norm_eval=True,
|
||||
style='pytorch',
|
||||
init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
|
||||
rpn_head=dict(
|
||||
type='RPNHead',
|
||||
in_channels=1024,
|
||||
feat_channels=1024,
|
||||
anchor_generator=dict(
|
||||
type='AnchorGenerator',
|
||||
scales=[2, 4, 8, 16, 32],
|
||||
ratios=[0.5, 1.0, 2.0],
|
||||
strides=[16]),
|
||||
bbox_coder=dict(
|
||||
type='DeltaXYWHBBoxCoder',
|
||||
target_means=[.0, .0, .0, .0],
|
||||
target_stds=[1.0, 1.0, 1.0, 1.0]),
|
||||
loss_cls=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
|
||||
loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
|
||||
roi_head=dict(
|
||||
type='StandardRoIHead',
|
||||
shared_head=dict(
|
||||
type='ResLayer',
|
||||
depth=50,
|
||||
stage=3,
|
||||
stride=2,
|
||||
dilation=1,
|
||||
style='pytorch',
|
||||
norm_cfg=norm_cfg,
|
||||
norm_eval=True),
|
||||
bbox_roi_extractor=dict(
|
||||
type='SingleRoIExtractor',
|
||||
roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0),
|
||||
out_channels=1024,
|
||||
featmap_strides=[16]),
|
||||
bbox_head=dict(
|
||||
type='BBoxHead',
|
||||
with_avg_pool=True,
|
||||
roi_feat_size=7,
|
||||
in_channels=2048,
|
||||
num_classes=80,
|
||||
bbox_coder=dict(
|
||||
type='DeltaXYWHBBoxCoder',
|
||||
target_means=[0., 0., 0., 0.],
|
||||
target_stds=[0.1, 0.1, 0.2, 0.2]),
|
||||
reg_class_agnostic=False,
|
||||
loss_cls=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
|
||||
loss_bbox=dict(type='L1Loss', loss_weight=1.0))),
|
||||
# model training and testing settings
|
||||
train_cfg=dict(
|
||||
rpn=dict(
|
||||
assigner=dict(
|
||||
type='MaxIoUAssigner',
|
||||
pos_iou_thr=0.7,
|
||||
neg_iou_thr=0.3,
|
||||
min_pos_iou=0.3,
|
||||
match_low_quality=True,
|
||||
ignore_iof_thr=-1),
|
||||
sampler=dict(
|
||||
type='RandomSampler',
|
||||
num=256,
|
||||
pos_fraction=0.5,
|
||||
neg_pos_ub=-1,
|
||||
add_gt_as_proposals=False),
|
||||
allowed_border=0,
|
||||
pos_weight=-1,
|
||||
debug=False),
|
||||
rpn_proposal=dict(
|
||||
nms_pre=12000,
|
||||
max_per_img=2000,
|
||||
nms=dict(type='nms', iou_threshold=0.7),
|
||||
min_bbox_size=0),
|
||||
rcnn=dict(
|
||||
assigner=dict(
|
||||
type='MaxIoUAssigner',
|
||||
pos_iou_thr=0.5,
|
||||
neg_iou_thr=0.5,
|
||||
min_pos_iou=0.5,
|
||||
match_low_quality=False,
|
||||
ignore_iof_thr=-1),
|
||||
sampler=dict(
|
||||
type='RandomSampler',
|
||||
num=512,
|
||||
pos_fraction=0.25,
|
||||
neg_pos_ub=-1,
|
||||
add_gt_as_proposals=True),
|
||||
pos_weight=-1,
|
||||
debug=False)),
|
||||
test_cfg=dict(
|
||||
rpn=dict(
|
||||
nms_pre=6000,
|
||||
max_per_img=1000,
|
||||
nms=dict(type='nms', iou_threshold=0.7),
|
||||
min_bbox_size=0),
|
||||
rcnn=dict(
|
||||
score_thr=0.05,
|
||||
nms=dict(type='nms', iou_threshold=0.5),
|
||||
max_per_img=100)))
|
|
@ -0,0 +1,123 @@
|
|||
# model settings
|
||||
norm_cfg = dict(type='BN', requires_grad=False)
|
||||
model = dict(
|
||||
type='MaskRCNN',
|
||||
backbone=dict(
|
||||
type='ResNet',
|
||||
depth=50,
|
||||
num_stages=3,
|
||||
strides=(1, 2, 2),
|
||||
dilations=(1, 1, 1),
|
||||
out_indices=(2, ),
|
||||
frozen_stages=1,
|
||||
norm_cfg=norm_cfg,
|
||||
norm_eval=True,
|
||||
style='pytorch',
|
||||
init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
|
||||
rpn_head=dict(
|
||||
type='RPNHead',
|
||||
in_channels=1024,
|
||||
feat_channels=1024,
|
||||
anchor_generator=dict(
|
||||
type='AnchorGenerator',
|
||||
scales=[2, 4, 8, 16, 32],
|
||||
ratios=[0.5, 1.0, 2.0],
|
||||
strides=[16]),
|
||||
bbox_coder=dict(
|
||||
type='DeltaXYWHBBoxCoder',
|
||||
target_means=[.0, .0, .0, .0],
|
||||
target_stds=[1.0, 1.0, 1.0, 1.0]),
|
||||
loss_cls=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
|
||||
loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
|
||||
roi_head=dict(
|
||||
type='StandardRoIHead',
|
||||
shared_head=dict(
|
||||
type='ResLayer',
|
||||
depth=50,
|
||||
stage=3,
|
||||
stride=2,
|
||||
dilation=1,
|
||||
style='pytorch',
|
||||
norm_cfg=norm_cfg,
|
||||
norm_eval=True),
|
||||
bbox_roi_extractor=dict(
|
||||
type='SingleRoIExtractor',
|
||||
roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0),
|
||||
out_channels=1024,
|
||||
featmap_strides=[16]),
|
||||
bbox_head=dict(
|
||||
type='BBoxHead',
|
||||
with_avg_pool=True,
|
||||
roi_feat_size=7,
|
||||
in_channels=2048,
|
||||
num_classes=80,
|
||||
bbox_coder=dict(
|
||||
type='DeltaXYWHBBoxCoder',
|
||||
target_means=[0., 0., 0., 0.],
|
||||
target_stds=[0.1, 0.1, 0.2, 0.2]),
|
||||
reg_class_agnostic=False,
|
||||
loss_cls=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
|
||||
loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
|
||||
mask_roi_extractor=None,
|
||||
mask_head=dict(
|
||||
type='FCNMaskHead',
|
||||
num_convs=0,
|
||||
in_channels=2048,
|
||||
conv_out_channels=256,
|
||||
num_classes=80,
|
||||
loss_mask=dict(
|
||||
type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))),
|
||||
# model training and testing settings
|
||||
train_cfg=dict(
|
||||
rpn=dict(
|
||||
assigner=dict(
|
||||
type='MaxIoUAssigner',
|
||||
pos_iou_thr=0.7,
|
||||
neg_iou_thr=0.3,
|
||||
min_pos_iou=0.3,
|
||||
match_low_quality=True,
|
||||
ignore_iof_thr=-1),
|
||||
sampler=dict(
|
||||
type='RandomSampler',
|
||||
num=256,
|
||||
pos_fraction=0.5,
|
||||
neg_pos_ub=-1,
|
||||
add_gt_as_proposals=False),
|
||||
allowed_border=0,
|
||||
pos_weight=-1,
|
||||
debug=False),
|
||||
rpn_proposal=dict(
|
||||
nms_pre=12000,
|
||||
max_per_img=2000,
|
||||
nms=dict(type='nms', iou_threshold=0.7),
|
||||
min_bbox_size=0),
|
||||
rcnn=dict(
|
||||
assigner=dict(
|
||||
type='MaxIoUAssigner',
|
||||
pos_iou_thr=0.5,
|
||||
neg_iou_thr=0.5,
|
||||
min_pos_iou=0.5,
|
||||
match_low_quality=False,
|
||||
ignore_iof_thr=-1),
|
||||
sampler=dict(
|
||||
type='RandomSampler',
|
||||
num=512,
|
||||
pos_fraction=0.25,
|
||||
neg_pos_ub=-1,
|
||||
add_gt_as_proposals=True),
|
||||
mask_size=14,
|
||||
pos_weight=-1,
|
||||
debug=False)),
|
||||
test_cfg=dict(
|
||||
rpn=dict(
|
||||
nms_pre=6000,
|
||||
nms=dict(type='nms', iou_threshold=0.7),
|
||||
max_per_img=1000,
|
||||
min_bbox_size=0),
|
||||
rcnn=dict(
|
||||
score_thr=0.05,
|
||||
nms=dict(type='nms', iou_threshold=0.5),
|
||||
max_per_img=100,
|
||||
mask_thr_binary=0.5)))
|
|
@ -0,0 +1,120 @@
|
|||
# model settings
|
||||
model = dict(
|
||||
type='MaskRCNN',
|
||||
backbone=dict(
|
||||
type='ResNet',
|
||||
depth=50,
|
||||
num_stages=4,
|
||||
out_indices=(0, 1, 2, 3),
|
||||
frozen_stages=1,
|
||||
norm_cfg=dict(type='BN', requires_grad=True),
|
||||
norm_eval=True,
|
||||
style='pytorch',
|
||||
init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
|
||||
neck=dict(
|
||||
type='FPN',
|
||||
in_channels=[256, 512, 1024, 2048],
|
||||
out_channels=256,
|
||||
num_outs=5),
|
||||
rpn_head=dict(
|
||||
type='RPNHead',
|
||||
in_channels=256,
|
||||
feat_channels=256,
|
||||
anchor_generator=dict(
|
||||
type='AnchorGenerator',
|
||||
scales=[8],
|
||||
ratios=[0.5, 1.0, 2.0],
|
||||
strides=[4, 8, 16, 32, 64]),
|
||||
bbox_coder=dict(
|
||||
type='DeltaXYWHBBoxCoder',
|
||||
target_means=[.0, .0, .0, .0],
|
||||
target_stds=[1.0, 1.0, 1.0, 1.0]),
|
||||
loss_cls=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
|
||||
loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
|
||||
roi_head=dict(
|
||||
type='StandardRoIHead',
|
||||
bbox_roi_extractor=dict(
|
||||
type='SingleRoIExtractor',
|
||||
roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
|
||||
out_channels=256,
|
||||
featmap_strides=[4, 8, 16, 32]),
|
||||
bbox_head=dict(
|
||||
type='Shared2FCBBoxHead',
|
||||
in_channels=256,
|
||||
fc_out_channels=1024,
|
||||
roi_feat_size=7,
|
||||
num_classes=80,
|
||||
bbox_coder=dict(
|
||||
type='DeltaXYWHBBoxCoder',
|
||||
target_means=[0., 0., 0., 0.],
|
||||
target_stds=[0.1, 0.1, 0.2, 0.2]),
|
||||
reg_class_agnostic=False,
|
||||
loss_cls=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
|
||||
loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
|
||||
mask_roi_extractor=dict(
|
||||
type='SingleRoIExtractor',
|
||||
roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0),
|
||||
out_channels=256,
|
||||
featmap_strides=[4, 8, 16, 32]),
|
||||
mask_head=dict(
|
||||
type='FCNMaskHead',
|
||||
num_convs=4,
|
||||
in_channels=256,
|
||||
conv_out_channels=256,
|
||||
num_classes=80,
|
||||
loss_mask=dict(
|
||||
type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))),
|
||||
# model training and testing settings
|
||||
train_cfg=dict(
|
||||
rpn=dict(
|
||||
assigner=dict(
|
||||
type='MaxIoUAssigner',
|
||||
pos_iou_thr=0.7,
|
||||
neg_iou_thr=0.3,
|
||||
min_pos_iou=0.3,
|
||||
match_low_quality=True,
|
||||
ignore_iof_thr=-1),
|
||||
sampler=dict(
|
||||
type='RandomSampler',
|
||||
num=256,
|
||||
pos_fraction=0.5,
|
||||
neg_pos_ub=-1,
|
||||
add_gt_as_proposals=False),
|
||||
allowed_border=-1,
|
||||
pos_weight=-1,
|
||||
debug=False),
|
||||
rpn_proposal=dict(
|
||||
nms_pre=2000,
|
||||
max_per_img=1000,
|
||||
nms=dict(type='nms', iou_threshold=0.7),
|
||||
min_bbox_size=0),
|
||||
rcnn=dict(
|
||||
assigner=dict(
|
||||
type='MaxIoUAssigner',
|
||||
pos_iou_thr=0.5,
|
||||
neg_iou_thr=0.5,
|
||||
min_pos_iou=0.5,
|
||||
match_low_quality=True,
|
||||
ignore_iof_thr=-1),
|
||||
sampler=dict(
|
||||
type='RandomSampler',
|
||||
num=512,
|
||||
pos_fraction=0.25,
|
||||
neg_pos_ub=-1,
|
||||
add_gt_as_proposals=True),
|
||||
mask_size=28,
|
||||
pos_weight=-1,
|
||||
debug=False)),
|
||||
test_cfg=dict(
|
||||
rpn=dict(
|
||||
nms_pre=1000,
|
||||
max_per_img=1000,
|
||||
nms=dict(type='nms', iou_threshold=0.7),
|
||||
min_bbox_size=0),
|
||||
rcnn=dict(
|
||||
score_thr=0.05,
|
||||
nms=dict(type='nms', iou_threshold=0.5),
|
||||
max_per_img=100,
|
||||
mask_thr_binary=0.5)))
|
|
@ -0,0 +1,11 @@
|
|||
# optimizer
|
||||
optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
|
||||
optimizer_config = dict(grad_clip=None)
|
||||
# learning policy
|
||||
lr_config = dict(
|
||||
policy='step',
|
||||
warmup='linear',
|
||||
warmup_iters=1000,
|
||||
warmup_ratio=0.001,
|
||||
step=[8, 11])
|
||||
runner = dict(type='EpochBasedRunner', max_epochs=12)
|
|
@ -0,0 +1,12 @@
|
|||
# optimizer
|
||||
optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
|
||||
optimizer_config = dict(grad_clip=None)
|
||||
# learning policy
|
||||
lr_config = dict(
|
||||
policy='step',
|
||||
warmup='linear',
|
||||
warmup_iters=100,
|
||||
warmup_ratio=0.001,
|
||||
step=[18000, 22000],
|
||||
by_epoch=False)
|
||||
runner = dict(type='IterBasedRunner', max_iters=24000)
|
|
@ -0,0 +1,11 @@
|
|||
# optimizer
|
||||
optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
|
||||
optimizer_config = dict(grad_clip=None)
|
||||
# learning policy
|
||||
lr_config = dict(
|
||||
policy='step',
|
||||
warmup='linear',
|
||||
warmup_iters=1000,
|
||||
warmup_ratio=0.001,
|
||||
step=[16, 22])
|
||||
runner = dict(type='EpochBasedRunner', max_epochs=24)
|
|
@ -0,0 +1,36 @@
|
|||
_base_ = [
|
||||
'../_base_/models/mask_rcnn_r50_c4.py',
|
||||
'../_base_/datasets/coco_instance.py',
|
||||
'../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'
|
||||
]
|
||||
|
||||
norm_cfg = dict(type='SyncBN', requires_grad=True)
|
||||
model = dict(
|
||||
backbone=dict(frozen_stages=-1, norm_cfg=norm_cfg, norm_eval=False),
|
||||
roi_head=dict(
|
||||
shared_head=dict(
|
||||
type='ResLayerExtraNorm', norm_cfg=norm_cfg, norm_eval=False)))
|
||||
|
||||
img_norm_cfg = dict(
|
||||
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
|
||||
train_pipeline = [
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
|
||||
dict(
|
||||
type='Resize',
|
||||
img_scale=[(1333, 640), (1333, 672), (1333, 704), (1333, 736),
|
||||
(1333, 768), (1333, 800)],
|
||||
multiscale_mode='value',
|
||||
keep_ratio=True),
|
||||
dict(type='RandomFlip', flip_ratio=0.5),
|
||||
dict(type='Normalize', **img_norm_cfg),
|
||||
dict(type='Pad', size_divisor=32),
|
||||
dict(type='DefaultFormatBundle'),
|
||||
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),
|
||||
]
|
||||
|
||||
data = dict(train=dict(pipeline=train_pipeline))
|
||||
|
||||
custom_imports = dict(
|
||||
imports=['tools.benchmarks.mmdetection.res_layer_extra_norm'],
|
||||
allow_failed_imports=False)
|
|
@ -0,0 +1,36 @@
|
|||
_base_ = [
|
||||
'../_base_/models/mask_rcnn_r50_c4.py',
|
||||
'../_base_/datasets/coco_instance.py',
|
||||
'../_base_/schedules/schedule_2x.py', '../_base_/default_runtime.py'
|
||||
]
|
||||
|
||||
norm_cfg = dict(type='SyncBN', requires_grad=True)
|
||||
model = dict(
|
||||
backbone=dict(frozen_stages=-1, norm_cfg=norm_cfg, norm_eval=False),
|
||||
roi_head=dict(
|
||||
shared_head=dict(
|
||||
type='ResLayerExtraNorm', norm_cfg=norm_cfg, norm_eval=False)))
|
||||
|
||||
img_norm_cfg = dict(
|
||||
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
|
||||
train_pipeline = [
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
|
||||
dict(
|
||||
type='Resize',
|
||||
img_scale=[(1333, 640), (1333, 672), (1333, 704), (1333, 736),
|
||||
(1333, 768), (1333, 800)],
|
||||
multiscale_mode='value',
|
||||
keep_ratio=True),
|
||||
dict(type='RandomFlip', flip_ratio=0.5),
|
||||
dict(type='Normalize', **img_norm_cfg),
|
||||
dict(type='Pad', size_divisor=32),
|
||||
dict(type='DefaultFormatBundle'),
|
||||
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),
|
||||
]
|
||||
|
||||
data = dict(train=dict(pipeline=train_pipeline))
|
||||
|
||||
custom_imports = dict(
|
||||
imports=['tools.benchmarks.mmdetection.res_layer_extra_norm'],
|
||||
allow_failed_imports=False)
|
|
@ -0,0 +1,33 @@
|
|||
_base_ = [
|
||||
'../_base_/models/mask_rcnn_r50_fpn.py',
|
||||
'../_base_/datasets/coco_instance.py',
|
||||
'../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'
|
||||
]
|
||||
|
||||
norm_cfg = dict(type='SyncBN', requires_grad=True)
|
||||
model = dict(
|
||||
backbone=dict(frozen_stages=-1, norm_cfg=norm_cfg, norm_eval=False),
|
||||
neck=dict(norm_cfg=norm_cfg),
|
||||
roi_head=dict(
|
||||
bbox_head=dict(type='Shared4Conv1FCBBoxHead', norm_cfg=norm_cfg),
|
||||
mask_head=dict(norm_cfg=norm_cfg)))
|
||||
|
||||
img_norm_cfg = dict(
|
||||
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
|
||||
train_pipeline = [
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
|
||||
dict(
|
||||
type='Resize',
|
||||
img_scale=[(1333, 640), (1333, 672), (1333, 704), (1333, 736),
|
||||
(1333, 768), (1333, 800)],
|
||||
multiscale_mode='value',
|
||||
keep_ratio=True),
|
||||
dict(type='RandomFlip', flip_ratio=0.5),
|
||||
dict(type='Normalize', **img_norm_cfg),
|
||||
dict(type='Pad', size_divisor=32),
|
||||
dict(type='DefaultFormatBundle'),
|
||||
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),
|
||||
]
|
||||
|
||||
data = dict(train=dict(pipeline=train_pipeline))
|
|
@ -0,0 +1,33 @@
|
|||
_base_ = [
|
||||
'../_base_/models/mask_rcnn_r50_fpn.py',
|
||||
'../_base_/datasets/coco_instance.py',
|
||||
'../_base_/schedules/schedule_2x.py', '../_base_/default_runtime.py'
|
||||
]
|
||||
|
||||
norm_cfg = dict(type='SyncBN', requires_grad=True)
|
||||
model = dict(
|
||||
backbone=dict(frozen_stages=-1, norm_cfg=norm_cfg, norm_eval=False),
|
||||
neck=dict(norm_cfg=norm_cfg),
|
||||
roi_head=dict(
|
||||
bbox_head=dict(type='Shared4Conv1FCBBoxHead', norm_cfg=norm_cfg),
|
||||
mask_head=dict(norm_cfg=norm_cfg)))
|
||||
|
||||
img_norm_cfg = dict(
|
||||
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
|
||||
train_pipeline = [
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
|
||||
dict(
|
||||
type='Resize',
|
||||
img_scale=[(1333, 640), (1333, 672), (1333, 704), (1333, 736),
|
||||
(1333, 768), (1333, 800)],
|
||||
multiscale_mode='value',
|
||||
keep_ratio=True),
|
||||
dict(type='RandomFlip', flip_ratio=0.5),
|
||||
dict(type='Normalize', **img_norm_cfg),
|
||||
dict(type='Pad', size_divisor=32),
|
||||
dict(type='DefaultFormatBundle'),
|
||||
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),
|
||||
]
|
||||
|
||||
data = dict(train=dict(pipeline=train_pipeline))
|
|
@ -0,0 +1,84 @@
|
|||
_base_ = [
|
||||
'../_base_/models/faster_rcnn_r50_c4.py',
|
||||
'../_base_/schedules/schedule_24k.py', '../_base_/default_runtime.py'
|
||||
]
|
||||
|
||||
norm_cfg = dict(type='SyncBN', requires_grad=True)
|
||||
model = dict(
|
||||
backbone=dict(frozen_stages=-1, norm_cfg=norm_cfg, norm_eval=False),
|
||||
roi_head=dict(
|
||||
shared_head=dict(
|
||||
type='ResLayerExtraNorm', norm_cfg=norm_cfg, norm_eval=False),
|
||||
bbox_head=dict(num_classes=20)))
|
||||
|
||||
# dataset settings
|
||||
dataset_type = 'VOCDataset'
|
||||
data_root = 'data/VOCdevkit/'
|
||||
img_norm_cfg = dict(
|
||||
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
|
||||
train_pipeline = [
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(type='LoadAnnotations', with_bbox=True),
|
||||
dict(
|
||||
type='Resize',
|
||||
img_scale=[(1333, 480), (1333, 512), (1333, 544), (1333, 576),
|
||||
(1333, 608), (1333, 640), (1333, 672), (1333, 704),
|
||||
(1333, 736), (1333, 768), (1333, 800)],
|
||||
multiscale_mode='value',
|
||||
keep_ratio=True),
|
||||
dict(type='RandomFlip', flip_ratio=0.5),
|
||||
dict(type='Normalize', **img_norm_cfg),
|
||||
dict(type='Pad', size_divisor=32),
|
||||
dict(type='DefaultFormatBundle'),
|
||||
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),
|
||||
]
|
||||
test_pipeline = [
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(
|
||||
type='MultiScaleFlipAug',
|
||||
img_scale=(1333, 800),
|
||||
flip=False,
|
||||
transforms=[
|
||||
dict(type='Resize', keep_ratio=True),
|
||||
dict(type='RandomFlip'),
|
||||
dict(type='Normalize', **img_norm_cfg),
|
||||
dict(type='Pad', size_divisor=32),
|
||||
dict(type='ImageToTensor', keys=['img']),
|
||||
dict(type='Collect', keys=['img']),
|
||||
])
|
||||
]
|
||||
data = dict(
|
||||
samples_per_gpu=2,
|
||||
workers_per_gpu=2,
|
||||
train=dict(
|
||||
type=dataset_type,
|
||||
ann_file=[
|
||||
data_root + 'VOC2007/ImageSets/Main/trainval.txt',
|
||||
data_root + 'VOC2012/ImageSets/Main/trainval.txt'
|
||||
],
|
||||
img_prefix=[data_root + 'VOC2007/', data_root + 'VOC2012/'],
|
||||
pipeline=train_pipeline),
|
||||
val=dict(
|
||||
type=dataset_type,
|
||||
ann_file=data_root + 'VOC2007/ImageSets/Main/test.txt',
|
||||
img_prefix=data_root + 'VOC2007/',
|
||||
pipeline=test_pipeline),
|
||||
test=dict(
|
||||
type=dataset_type,
|
||||
ann_file=data_root + 'VOC2007/ImageSets/Main/test.txt',
|
||||
img_prefix=data_root + 'VOC2007/',
|
||||
pipeline=test_pipeline))
|
||||
evaluation = dict(interval=2000, metric='mAP')
|
||||
|
||||
checkpoint_config = dict(by_epoch=False, interval=2000)
|
||||
|
||||
log_config = dict(
|
||||
interval=50,
|
||||
hooks=[
|
||||
dict(type='TextLoggerHook', by_epoch=False),
|
||||
# dict(type='TensorboardLoggerHook')
|
||||
])
|
||||
|
||||
custom_imports = dict(
|
||||
imports=['tools.benchmarks.mmdetection.res_layer_extra_norm'],
|
||||
allow_failed_imports=False)
|
|
@ -0,0 +1,54 @@
|
|||
# dataset settings
|
||||
dataset_type = 'CityscapesDataset'
|
||||
data_root = 'data/cityscapes/'
|
||||
img_norm_cfg = dict(
|
||||
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
|
||||
crop_size = (512, 1024)
|
||||
train_pipeline = [
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(type='LoadAnnotations'),
|
||||
dict(type='Resize', img_scale=(2048, 1024), ratio_range=(0.5, 2.0)),
|
||||
dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
|
||||
dict(type='RandomFlip', prob=0.5),
|
||||
dict(type='PhotoMetricDistortion'),
|
||||
dict(type='Normalize', **img_norm_cfg),
|
||||
dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
|
||||
dict(type='DefaultFormatBundle'),
|
||||
dict(type='Collect', keys=['img', 'gt_semantic_seg']),
|
||||
]
|
||||
test_pipeline = [
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(
|
||||
type='MultiScaleFlipAug',
|
||||
img_scale=(2048, 1024),
|
||||
# img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
|
||||
flip=False,
|
||||
transforms=[
|
||||
dict(type='Resize', keep_ratio=True),
|
||||
dict(type='RandomFlip'),
|
||||
dict(type='Normalize', **img_norm_cfg),
|
||||
dict(type='ImageToTensor', keys=['img']),
|
||||
dict(type='Collect', keys=['img']),
|
||||
])
|
||||
]
|
||||
data = dict(
|
||||
samples_per_gpu=2,
|
||||
workers_per_gpu=2,
|
||||
train=dict(
|
||||
type=dataset_type,
|
||||
data_root=data_root,
|
||||
img_dir='leftImg8bit/train',
|
||||
ann_dir='gtFine/train',
|
||||
pipeline=train_pipeline),
|
||||
val=dict(
|
||||
type=dataset_type,
|
||||
data_root=data_root,
|
||||
img_dir='leftImg8bit/val',
|
||||
ann_dir='gtFine/val',
|
||||
pipeline=test_pipeline),
|
||||
test=dict(
|
||||
type=dataset_type,
|
||||
data_root=data_root,
|
||||
img_dir='leftImg8bit/val',
|
||||
ann_dir='gtFine/val',
|
||||
pipeline=test_pipeline))
|
|
@ -0,0 +1,35 @@
|
|||
_base_ = './cityscapes.py'
|
||||
img_norm_cfg = dict(
|
||||
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
|
||||
crop_size = (769, 769)
|
||||
train_pipeline = [
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(type='LoadAnnotations'),
|
||||
dict(type='Resize', img_scale=(2049, 1025), ratio_range=(0.5, 2.0)),
|
||||
dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
|
||||
dict(type='RandomFlip', prob=0.5),
|
||||
dict(type='PhotoMetricDistortion'),
|
||||
dict(type='Normalize', **img_norm_cfg),
|
||||
dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
|
||||
dict(type='DefaultFormatBundle'),
|
||||
dict(type='Collect', keys=['img', 'gt_semantic_seg']),
|
||||
]
|
||||
test_pipeline = [
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(
|
||||
type='MultiScaleFlipAug',
|
||||
img_scale=(2049, 1025),
|
||||
# img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
|
||||
flip=False,
|
||||
transforms=[
|
||||
dict(type='Resize', keep_ratio=True),
|
||||
dict(type='RandomFlip'),
|
||||
dict(type='Normalize', **img_norm_cfg),
|
||||
dict(type='ImageToTensor', keys=['img']),
|
||||
dict(type='Collect', keys=['img']),
|
||||
])
|
||||
]
|
||||
data = dict(
|
||||
train=dict(pipeline=train_pipeline),
|
||||
val=dict(pipeline=test_pipeline),
|
||||
test=dict(pipeline=test_pipeline))
|
|
@ -0,0 +1,57 @@
|
|||
# dataset settings
|
||||
dataset_type = 'PascalVOCDataset'
|
||||
data_root = 'data/VOCdevkit/VOC2012'
|
||||
img_norm_cfg = dict(
|
||||
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
|
||||
crop_size = (512, 512)
|
||||
train_pipeline = [
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(type='LoadAnnotations'),
|
||||
dict(type='Resize', img_scale=(2048, 512), ratio_range=(0.5, 2.0)),
|
||||
dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
|
||||
dict(type='RandomFlip', prob=0.5),
|
||||
dict(type='PhotoMetricDistortion'),
|
||||
dict(type='Normalize', **img_norm_cfg),
|
||||
dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
|
||||
dict(type='DefaultFormatBundle'),
|
||||
dict(type='Collect', keys=['img', 'gt_semantic_seg']),
|
||||
]
|
||||
test_pipeline = [
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(
|
||||
type='MultiScaleFlipAug',
|
||||
img_scale=(2048, 512),
|
||||
# img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
|
||||
flip=False,
|
||||
transforms=[
|
||||
dict(type='Resize', keep_ratio=True),
|
||||
dict(type='RandomFlip'),
|
||||
dict(type='Normalize', **img_norm_cfg),
|
||||
dict(type='ImageToTensor', keys=['img']),
|
||||
dict(type='Collect', keys=['img']),
|
||||
])
|
||||
]
|
||||
data = dict(
|
||||
samples_per_gpu=4,
|
||||
workers_per_gpu=4,
|
||||
train=dict(
|
||||
type=dataset_type,
|
||||
data_root=data_root,
|
||||
img_dir='JPEGImages',
|
||||
ann_dir='SegmentationClass',
|
||||
split='ImageSets/Segmentation/train.txt',
|
||||
pipeline=train_pipeline),
|
||||
val=dict(
|
||||
type=dataset_type,
|
||||
data_root=data_root,
|
||||
img_dir='JPEGImages',
|
||||
ann_dir='SegmentationClass',
|
||||
split='ImageSets/Segmentation/val.txt',
|
||||
pipeline=test_pipeline),
|
||||
test=dict(
|
||||
type=dataset_type,
|
||||
data_root=data_root,
|
||||
img_dir='JPEGImages',
|
||||
ann_dir='SegmentationClass',
|
||||
split='ImageSets/Segmentation/val.txt',
|
||||
pipeline=test_pipeline))
|
|
@ -0,0 +1,9 @@
|
|||
_base_ = './pascal_voc12.py'
|
||||
# dataset settings
|
||||
data = dict(
|
||||
train=dict(
|
||||
ann_dir=['SegmentationClass', 'SegmentationClassAug'],
|
||||
split=[
|
||||
'ImageSets/Segmentation/train.txt',
|
||||
'ImageSets/Segmentation/aug.txt'
|
||||
]))
|
|
@ -0,0 +1,14 @@
|
|||
# yapf:disable
|
||||
log_config = dict(
|
||||
interval=50,
|
||||
hooks=[
|
||||
dict(type='TextLoggerHook', by_epoch=False),
|
||||
# dict(type='TensorboardLoggerHook')
|
||||
])
|
||||
# yapf:enable
|
||||
dist_params = dict(backend='nccl')
|
||||
log_level = 'INFO'
|
||||
load_from = None
|
||||
resume_from = None
|
||||
workflow = [('train', 1)]
|
||||
cudnn_benchmark = True
|
|
@ -0,0 +1,45 @@
|
|||
# model settings
|
||||
norm_cfg = dict(type='SyncBN', requires_grad=True)
|
||||
model = dict(
|
||||
type='EncoderDecoder',
|
||||
backbone=dict(
|
||||
type='ResNet',
|
||||
depth=50,
|
||||
num_stages=4,
|
||||
out_indices=(0, 1, 2, 3),
|
||||
dilations=(1, 1, 2, 4),
|
||||
strides=(1, 2, 1, 1),
|
||||
norm_cfg=norm_cfg,
|
||||
norm_eval=False,
|
||||
style='pytorch',
|
||||
contract_dilation=True,
|
||||
init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
|
||||
decode_head=dict(
|
||||
type='FCNHead',
|
||||
in_channels=2048,
|
||||
in_index=3,
|
||||
channels=512,
|
||||
num_convs=2,
|
||||
concat_input=True,
|
||||
dropout_ratio=0.1,
|
||||
num_classes=19,
|
||||
norm_cfg=norm_cfg,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
||||
auxiliary_head=dict(
|
||||
type='FCNHead',
|
||||
in_channels=1024,
|
||||
in_index=2,
|
||||
channels=256,
|
||||
num_convs=1,
|
||||
concat_input=False,
|
||||
dropout_ratio=0.1,
|
||||
num_classes=19,
|
||||
norm_cfg=norm_cfg,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
|
||||
# model training and testing settings
|
||||
train_cfg=dict(),
|
||||
test_cfg=dict(mode='whole'))
|
|
@ -0,0 +1,9 @@
|
|||
# optimizer
|
||||
optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005)
|
||||
optimizer_config = dict()
|
||||
# learning policy
|
||||
lr_config = dict(policy='poly', power=0.9, min_lr=1e-4, by_epoch=False)
|
||||
# runtime settings
|
||||
runner = dict(type='IterBasedRunner', max_iters=20000)
|
||||
checkpoint_config = dict(by_epoch=False, interval=2000)
|
||||
evaluation = dict(interval=2000, metric='mIoU', pre_eval=True)
|
|
@ -0,0 +1,9 @@
|
|||
# optimizer
|
||||
optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005)
|
||||
optimizer_config = dict()
|
||||
# learning policy
|
||||
lr_config = dict(policy='poly', power=0.9, min_lr=1e-4, by_epoch=False)
|
||||
# runtime settings
|
||||
runner = dict(type='IterBasedRunner', max_iters=40000)
|
||||
checkpoint_config = dict(by_epoch=False, interval=4000)
|
||||
evaluation = dict(interval=4000, metric='mIoU', pre_eval=True)
|
|
@ -0,0 +1,9 @@
|
|||
_base_ = [
|
||||
'../_base_/models/fcn_r50-d8.py',
|
||||
'../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py',
|
||||
'../_base_/schedules/schedule_40k.py'
|
||||
]
|
||||
model = dict(
|
||||
decode_head=dict(align_corners=True),
|
||||
auxiliary_head=dict(align_corners=True),
|
||||
test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513)))
|
|
@ -0,0 +1,6 @@
|
|||
_base_ = [
|
||||
'../_base_/models/fcn_r50-d8.py', '../_base_/datasets/pascal_voc12_aug.py',
|
||||
'../_base_/default_runtime.py', '../_base_/schedules/schedule_20k.py'
|
||||
]
|
||||
model = dict(
|
||||
decode_head=dict(num_classes=21), auxiliary_head=dict(num_classes=21))
|
Loading…
Reference in New Issue