[Refactor] add benchmark configs

pull/133/head
Jiahao000 2021-12-15 19:09:54 +08:00
parent 00e51990fb
commit 67716e2011
90 changed files with 1928 additions and 0 deletions

View File

@ -0,0 +1,49 @@
# dataset settings
data_source = 'CIFAR10'
dataset_type = 'SingleViewDataset'
img_norm_cfg = dict(mean=[0.4914, 0.4822, 0.4465], std=[0.2023, 0.1994, 0.201])
train_pipeline = [
dict(type='RandomCrop', size=32, padding=4),
dict(type='RandomHorizontalFlip'),
]
test_pipeline = []
# prefetch
prefetch = False
if not prefetch:
train_pipeline.extend(
[dict(type='ToTensor'),
dict(type='Normalize', **img_norm_cfg)])
test_pipeline.extend(
[dict(type='ToTensor'),
dict(type='Normalize', **img_norm_cfg)])
# dataset summary
data = dict(
imgs_per_gpu=128,
workers_per_gpu=2,
train=dict(
type=dataset_type,
data_source=dict(
type=data_source,
data_prefix='data/cifar10',
),
pipeline=train_pipeline,
prefetch=prefetch),
val=dict(
type=dataset_type,
data_source=dict(
type=data_source,
data_prefix='data/cifar10',
),
pipeline=test_pipeline,
prefetch=prefetch),
test=dict(
type=dataset_type,
data_source=dict(
type=data_source,
data_prefix='data/cifar10',
),
pipeline=test_pipeline,
prefetch=prefetch))
evaluation = dict(interval=10, topk=(1, 5))

View File

@ -0,0 +1,46 @@
# dataset settings
data_source = 'ImageNet'
dataset_type = 'SingleViewDataset'
img_norm_cfg = dict(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
train_pipeline = [
dict(type='RandomResizedCrop', size=224),
dict(type='RandomHorizontalFlip'),
]
test_pipeline = [
dict(type='Resize', size=256),
dict(type='CenterCrop', size=224),
]
# prefetch
prefetch = False
if not prefetch:
train_pipeline.extend(
[dict(type='ToTensor'),
dict(type='Normalize', **img_norm_cfg)])
test_pipeline.extend(
[dict(type='ToTensor'),
dict(type='Normalize', **img_norm_cfg)])
# dataset summary
data = dict(
imgs_per_gpu=32, # total 32x8=256, 8GPU linear cls
workers_per_gpu=4,
train=dict(
type=dataset_type,
data_source=dict(
type=data_source,
data_prefix='data/imagenet/train',
ann_file='data/imagenet/meta/train.txt',
),
pipeline=train_pipeline,
prefetch=prefetch),
val=dict(
type=dataset_type,
data_source=dict(
type=data_source,
data_prefix='data/imagenet/val',
ann_file='data/imagenet/meta/val.txt',
),
pipeline=test_pipeline,
prefetch=prefetch))
evaluation = dict(interval=10, topk=(1, 5))

View File

@ -0,0 +1,46 @@
# dataset settings
data_source = 'ImageNet'
dataset_type = 'SingleViewDataset'
img_norm_cfg = dict(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
train_pipeline = [
dict(type='RandomResizedCrop', size=224),
dict(type='RandomHorizontalFlip'),
]
test_pipeline = [
dict(type='Resize', size=256),
dict(type='CenterCrop', size=224),
]
# prefetch
prefetch = False
if not prefetch:
train_pipeline.extend(
[dict(type='ToTensor'),
dict(type='Normalize', **img_norm_cfg)])
test_pipeline.extend(
[dict(type='ToTensor'),
dict(type='Normalize', **img_norm_cfg)])
# dataset summary
data = dict(
imgs_per_gpu=32, # total 32x8=256, 8GPU linear cls
workers_per_gpu=4,
train=dict(
type=dataset_type,
data_source=dict(
type=data_source,
data_prefix='data/iNaturalist2018/train_val2018',
ann_file='data/iNaturalist2018/meta/train.txt',
),
pipeline=train_pipeline,
prefetch=prefetch),
val=dict(
type=dataset_type,
data_source=dict(
type=data_source,
data_prefix='data/iNaturalist2018/train_val2018',
ann_file='data/iNaturalist2018/meta/val.txt',
),
pipeline=test_pipeline,
prefetch=prefetch))
evaluation = dict(interval=10, topk=(1, 5))

View File

@ -0,0 +1,48 @@
# dataset settings
data_source = 'ImageNet'
dataset_type = 'SingleViewDataset'
img_norm_cfg = dict(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
train_pipeline = [
dict(type='Resize', size=256),
dict(type='CenterCrop', size=256),
dict(type='RandomCrop', size=224),
dict(type='RandomHorizontalFlip'),
]
test_pipeline = [
dict(type='Resize', size=256),
dict(type='CenterCrop', size=224),
]
# prefetch
prefetch = False
if not prefetch:
train_pipeline.extend(
[dict(type='ToTensor'),
dict(type='Normalize', **img_norm_cfg)])
test_pipeline.extend(
[dict(type='ToTensor'),
dict(type='Normalize', **img_norm_cfg)])
# dataset summary
data = dict(
imgs_per_gpu=32, # total 32x8=256, 8GPU linear cls
workers_per_gpu=4,
train=dict(
type=dataset_type,
data_source=dict(
type=data_source,
data_prefix='data/Places205/train',
ann_file='data/Places205/meta/train.txt',
),
pipeline=train_pipeline,
prefetch=prefetch),
val=dict(
type=dataset_type,
data_source=dict(
type=data_source,
data_prefix='data/Places205/val',
ann_file='data/Places205/meta/val.txt',
),
pipeline=test_pipeline,
prefetch=prefetch))
evaluation = dict(interval=10, topk=(1, 5))

View File

@ -0,0 +1,20 @@
train_cfg = {}
test_cfg = {}
optimizer_config = dict() # grad_clip, coalesce, bucket_size_mb
# yapf:disable
log_config = dict(
interval=50,
hooks=[
dict(type='TextLoggerHook'),
# dict(type='TensorboardLoggerHook')
])
# yapf:enable
# runtime settings
dist_params = dict(backend='nccl')
cudnn_benchmark = True
log_level = 'INFO'
load_from = None
resume_from = None
workflow = [('train', 1)]
persistent_workers = True

View File

@ -0,0 +1,15 @@
model = dict(
type='Classification',
backbone=dict(
type='ResNet',
depth=50,
in_channels=3,
num_stages=4,
strides=(1, 2, 2, 2),
dilations=(1, 1, 1, 1),
out_indices=[4], # 0: conv-1, x: stage-x
norm_cfg=dict(type='BN'),
frozen_stages=-1),
head=dict(
type='ClsHead', with_avg_pool=True, in_channels=2048,
num_classes=1000))

View File

@ -0,0 +1,17 @@
model = dict(
type='Classification',
backbone=dict(
type='ResNet',
depth=50,
in_channels=3,
out_indices=[0, 1, 2, 3, 4], # 0: conv-1, x: stage-x
norm_cfg=dict(type='BN'),
frozen_stages=-1),
head=dict(
type='MultiClsHead',
pool_type='specified',
in_indices=[0, 1, 2, 3, 4],
with_last_layer_unpool=False,
backbone='resnet50',
norm_cfg=dict(type='SyncBN', momentum=0.1, affine=False),
num_classes=1000))

View File

@ -0,0 +1,8 @@
# optimizer
optimizer = dict(type='LARS', lr=1.6, momentum=0.9, weight_decay=0.)
# learning policy
lr_config = dict(policy='CosineAnnealing', min_lr=0.)
# runtime settings
runner = dict(type='EpochBasedRunner', max_epochs=90)

View File

@ -0,0 +1,8 @@
# optimizer
optimizer = dict(type='SGD', lr=0.3, momentum=0.9, weight_decay=1e-6)
# learning policy
lr_config = dict(policy='CosineAnnealing', min_lr=0.)
# runtime settings
runner = dict(type='EpochBasedRunner', max_epochs=100)

View File

@ -0,0 +1,8 @@
# optimizer
optimizer = dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=1e-4)
# learning policy
lr_config = dict(policy='step', step=[60, 80])
# runtime settings
runner = dict(type='EpochBasedRunner', max_epochs=100)

View File

@ -0,0 +1,19 @@
_base_ = [
'../_base_/models/resnet50.py',
'../_base_/datasets/cifar10.py',
'../_base_/schedules/sgd_steplr-100e.py',
'../_base_/default_runtime.py',
]
# model settings
model = dict(head=dict(num_classes=10))
# optimizer
optimizer = dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=5e-4)
# learning policy
lr_config = dict(policy='step', step=[150, 250])
# runtime settings
runner = dict(type='EpochBasedRunner', max_epochs=350)
checkpoint_config = dict(interval=50)

View File

@ -0,0 +1,4 @@
_base_ = 'resnet50_head1_4xb64-steplr1e-2-20e_in1k-10pct.py'
# model settings
model = dict(with_sobel=True, backbone=dict(in_channels=2))

View File

@ -0,0 +1,4 @@
_base_ = 'resnet50_head1_4xb64-steplr1e-1-20e_in1k-10pct.py'
# optimizer
optimizer = dict(paramwise_options={'\\Ahead.': dict(lr_mult=100)})

View File

@ -0,0 +1,4 @@
_base_ = 'resnet50_head1_4xb64-steplr1e-1-20e_in1k-10pct.py'
# optimizer
optimizer = dict(lr=0.01, paramwise_options={'\\Ahead.': dict(lr_mult=100)})

View File

@ -0,0 +1,4 @@
_base_ = 'resnet50_head1_4xb64-steplr1e-1-20e_in1k-10pct.py'
# optimizer
optimizer = dict(lr=0.001, paramwise_options={'\\Ahead.': dict(lr_mult=100)})

View File

@ -0,0 +1,4 @@
_base_ = 'resnet50_head1_4xb64-steplr1e-1-20e_in1k-10pct.py'
# optimizer
optimizer = dict(paramwise_options={'\\Ahead.': dict(lr_mult=10)})

View File

@ -0,0 +1,4 @@
_base_ = 'resnet50_head1_4xb64-steplr1e-1-20e_in1k-10pct.py'
# optimizer
optimizer = dict(lr=0.01, paramwise_options={'\\Ahead.': dict(lr_mult=10)})

View File

@ -0,0 +1,4 @@
_base_ = 'resnet50_head1_4xb64-steplr1e-1-20e_in1k-10pct.py'
# optimizer
optimizer = dict(lr=0.001, paramwise_options={'\\Ahead.': dict(lr_mult=10)})

View File

@ -0,0 +1,34 @@
_base_ = [
'../../_base_/models/resnet50.py',
'../../_base_/datasets/imagenet.py',
'../../_base_/schedules/sgd_steplr-100e.py',
'../../_base_/default_runtime.py',
]
# model settings
model = dict(backbone=dict(norm_cfg=dict(type='SyncBN')))
# dataset settings
data = dict(
imgs_per_gpu=64, # total 64x4=256
train=dict(
data_source=dict(ann_file='data/imagenet/meta/train_10pct.txt')))
# optimizer
optimizer = dict(
type='SGD',
lr=0.1,
momentum=0.9,
weight_decay=1e-4,
paramwise_options={'\\Ahead.': dict(lr_mult=1)})
# learning policy
lr_config = dict(policy='step', step=[12, 16], gamma=0.2)
# runtime settings
runner = dict(type='EpochBasedRunner', max_epochs=20)
checkpoint_config = dict(interval=10)
log_config = dict(
interval=10,
hooks=[dict(type='TextLoggerHook'),
dict(type='TensorboardLoggerHook')])

View File

@ -0,0 +1,4 @@
_base_ = 'resnet50_head1_4xb64-steplr1e-1-20e_in1k-10pct.py'
# optimizer
optimizer = dict(lr=0.01)

View File

@ -0,0 +1,4 @@
_base_ = 'resnet50_head1_4xb64-steplr1e-1-20e_in1k-10pct.py'
# optimizer
optimizer = dict(lr=0.001)

View File

@ -0,0 +1,4 @@
_base_ = 'resnet50_head1_4xb64-steplr1e-2-20e_in1k-1pct.py'
# model settings
model = dict(with_sobel=True, backbone=dict(in_channels=2))

View File

@ -0,0 +1,4 @@
_base_ = 'resnet50_head1_4xb64-steplr1e-1-20e_in1k-1pct.py'
# optimizer
optimizer = dict(paramwise_options={'\\Ahead.': dict(lr_mult=100)})

View File

@ -0,0 +1,4 @@
_base_ = 'resnet50_head1_4xb64-steplr1e-1-20e_in1k-1pct.py'
# optimizer
optimizer = dict(lr=0.01, paramwise_options={'\\Ahead.': dict(lr_mult=100)})

View File

@ -0,0 +1,4 @@
_base_ = 'resnet50_head1_4xb64-steplr1e-1-20e_in1k-1pct.py'
# optimizer
optimizer = dict(lr=0.001, paramwise_options={'\\Ahead.': dict(lr_mult=100)})

View File

@ -0,0 +1,4 @@
_base_ = 'resnet50_head1_4xb64-steplr1e-1-20e_in1k-1pct.py'
# optimizer
optimizer = dict(paramwise_options={'\\Ahead.': dict(lr_mult=10)})

View File

@ -0,0 +1,4 @@
_base_ = 'resnet50_head1_4xb64-steplr1e-1-20e_in1k-1pct.py'
# optimizer
optimizer = dict(lr=0.01, paramwise_options={'\\Ahead.': dict(lr_mult=10)})

View File

@ -0,0 +1,4 @@
_base_ = 'resnet50_head1_4xb64-steplr1e-1-20e_in1k-1pct.py'
# optimizer
optimizer = dict(lr=0.001, paramwise_options={'\\Ahead.': dict(lr_mult=10)})

View File

@ -0,0 +1,34 @@
_base_ = [
'../../_base_/models/resnet50.py',
'../../_base_/datasets/imagenet.py',
'../../_base_/schedules/sgd_steplr-100e.py',
'../../_base_/default_runtime.py',
]
# model settings
model = dict(backbone=dict(norm_cfg=dict(type='SyncBN')))
# dataset settings
data = dict(
imgs_per_gpu=64, # total 64x4=256
train=dict(
data_source=dict(ann_file='data/imagenet/meta/train_1percent.txt')))
# optimizer
optimizer = dict(
type='SGD',
lr=0.1,
momentum=0.9,
weight_decay=5e-4,
paramwise_options={'\\Ahead.': dict(lr_mult=1)})
# learning policy
lr_config = dict(policy='step', step=[12, 16], gamma=0.2)
# runtime settings
runner = dict(type='EpochBasedRunner', max_epochs=20)
checkpoint_config = dict(interval=10)
log_config = dict(
interval=10,
hooks=[dict(type='TextLoggerHook'),
dict(type='TensorboardLoggerHook')])

View File

@ -0,0 +1,4 @@
_base_ = 'resnet50_head1_4xb64-steplr1e-1-20e_in1k-1pct.py'
# optimizer
optimizer = dict(lr=0.01)

View File

@ -0,0 +1,4 @@
_base_ = 'resnet50_head1_4xb64-steplr1e-1-20e_in1k-1pct.py'
# optimizer
optimizer = dict(lr=0.001)

View File

@ -0,0 +1,19 @@
_base_ = [
'../_base_/models/resnet50.py',
'../_base_/datasets/imagenet.py',
'../_base_/schedules/sgd_steplr-100e.py',
'../_base_/default_runtime.py',
]
# model settings
model = dict(backbone=dict(norm_cfg=dict(type='SyncBN')))
# learning policy
lr_config = dict(step=[30, 60, 90])
# runtime settings
runner = dict(type='EpochBasedRunner', max_epochs=90)
# the max_keep_ckpts controls the max number of ckpt file in your work_dirs
# if it is 3, when CheckpointHook (in mmcv) saves the 4th ckpt
# it will remove the oldest one to keep the number of total ckpts as 3
checkpoint_config = dict(interval=10, max_keep_ckpts=3, out_dir='s3://results')

View File

@ -0,0 +1,4 @@
_base_ = 'resnet50_8xb32-steplr-100e_in1k.py'
# model settings
model = dict(with_sobel=True, backbone=dict(in_channels=2, frozen_stages=4))

View File

@ -0,0 +1,4 @@
_base_ = 'resnet50_mhead_8xb32-steplr-90e_in1k.py'
# model settings
model = dict(with_sobel=True, backbone=dict(in_channels=2, frozen_stages=4))

View File

@ -0,0 +1,15 @@
_base_ = [
'../_base_/models/resnet50.py',
'../_base_/datasets/imagenet.py',
'../_base_/schedules/sgd_coslr-100e.py',
'../_base_/default_runtime.py',
]
model = dict(backbone=dict(frozen_stages=4))
# swav setting
# runtime settings
# the max_keep_ckpts controls the max number of ckpt file in your work_dirs
# if it is 3, when CheckpointHook (in mmcv) saves the 4th ckpt
# it will remove the oldest one to keep the number of total ckpts as 3
checkpoint_config = dict(interval=10, max_keep_ckpts=3)

View File

@ -0,0 +1,20 @@
_base_ = [
'../_base_/models/resnet50.py',
'../_base_/datasets/imagenet.py',
'../_base_/schedules/sgd_steplr-100e.py',
'../_base_/default_runtime.py',
]
model = dict(backbone=dict(frozen_stages=4))
evaluation = dict(interval=1, topk=(1, 5))
# moco setting
# optimizer
optimizer = dict(type='SGD', lr=30., momentum=0.9, weight_decay=0.)
# runtime settings
# the max_keep_ckpts controls the max number of ckpt file in your work_dirs
# if it is 3, when CheckpointHook (in mmcv) saves the 4th ckpt
# it will remove the oldest one to keep the number of total ckpts as 3
checkpoint_config = dict(interval=10, max_keep_ckpts=3)

View File

@ -0,0 +1,18 @@
_base_ = [
'../_base_/models/resnet50.py',
'../_base_/datasets/imagenet.py',
'../_base_/schedules/lars_coslr-90e.py',
'../_base_/default_runtime.py',
]
model = dict(backbone=dict(frozen_stages=4))
# dataset summary
data = dict(imgs_per_gpu=512) # total 512*8=4096, 8GPU linear cls
# simsiam setting
# runtime settings
# the max_keep_ckpts controls the max number of ckpt file in your work_dirs
# if it is 3, when CheckpointHook (in mmcv) saves the 4th ckpt
# it will remove the oldest one to keep the number of total ckpts as 3
checkpoint_config = dict(interval=10, max_keep_ckpts=3)

View File

@ -0,0 +1,48 @@
_base_ = [
'../_base_/models/resnet50_multihead.py',
'../_base_/datasets/imagenet.py',
'../_base_/schedules/sgd_steplr-100e.py',
'../_base_/default_runtime.py',
]
model = dict(backbone=dict(frozen_stages=4))
# dataset settings
img_norm_cfg = dict(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
train_pipeline = [
dict(type='RandomResizedCrop', size=224),
dict(type='RandomHorizontalFlip'),
dict(
type='ColorJitter',
brightness=0.4,
contrast=0.4,
saturation=0.4,
hue=0.),
dict(type='ToTensor'),
dict(type='Lighting'),
dict(type='Normalize', **img_norm_cfg),
]
test_pipeline = [
dict(type='Resize', size=256),
dict(type='CenterCrop', size=224),
dict(type='ToTensor'),
dict(type='Normalize', **img_norm_cfg),
]
data = dict(
train=dict(pipeline=train_pipeline), val=dict(pipeline=test_pipeline))
# optimizer
optimizer = dict(
type='SGD',
lr=0.01,
momentum=0.9,
weight_decay=1e-4,
paramwise_options=dict(norm_decay_mult=0.),
nesterov=True)
# learning policy
lr_config = dict(policy='step', step=[30, 60, 90])
# runtime settings
runner = dict(type='EpochBasedRunner', max_epochs=90)
checkpoint_config = dict(interval=10)

View File

@ -0,0 +1,4 @@
_base_ = 'resnet50_mhead_8xb32-steplr-84e_inat18.py'
# model settings
model = dict(with_sobel=True, backbone=dict(in_channels=2))

View File

@ -0,0 +1,32 @@
_base_ = [
'../_base_/models/resnet50_multihead.py',
'../_base_/datasets/inaturalist2018.py',
'../_base_/schedules/sgd_steplr-100e.py',
'../_base_/default_runtime.py',
]
# model settings
model = dict(
backbone=dict(frozen_stages=4),
head=dict(
norm_cfg=dict(type='SyncBN', momentum=0.1, affine=False),
num_classes=8142))
# optimizer
optimizer = dict(
type='SGD',
lr=0.01,
momentum=0.9,
weight_decay=1e-4,
paramwise_options=dict(norm_decay_mult=0.),
nesterov=True)
# learning policy
lr_config = dict(policy='step', step=[24, 48, 72])
# runtime settings
runner = dict(type='EpochBasedRunner', max_epochs=84)
# the max_keep_ckpts controls the max number of ckpt file in your work_dirs
# if it is 3, when CheckpointHook (in mmcv) saves the 4th ckpt
# it will remove the oldest one to keep the number of total ckpts as 3
checkpoint_config = dict(interval=1, max_keep_ckpts=3)

View File

@ -0,0 +1,4 @@
_base_ = 'resnet50_mhead_8xb32-steplr-28e_places205.py'
# model settings
model = dict(with_sobel=True, backbone=dict(in_channels=2))

View File

@ -0,0 +1,32 @@
_base_ = [
'../_base_/models/resnet50_multihead.py',
'../_base_/datasets/places205.py',
'../_base_/schedules/sgd_steplr-100e.py',
'../_base_/default_runtime.py',
]
# model settings
model = dict(
backbone=dict(frozen_stages=4),
head=dict(
norm_cfg=dict(type='SyncBN', momentum=0.1, affine=False),
num_classes=205))
# optimizer
optimizer = dict(
type='SGD',
lr=0.01,
momentum=0.9,
weight_decay=1e-4,
paramwise_options=dict(norm_decay_mult=0.),
nesterov=True)
# learning policy
lr_config = dict(policy='step', step=[7, 14, 21])
# runtime settings
runner = dict(type='EpochBasedRunner', max_epochs=28)
# the max_keep_ckpts controls the max number of ckpt file in your work_dirs
# if it is 3, when CheckpointHook (in mmcv) saves the 4th ckpt
# it will remove the oldest one to keep the number of total ckpts as 3
checkpoint_config = dict(interval=1, max_keep_ckpts=3)

View File

@ -0,0 +1,22 @@
data_source = 'ImageList'
dataset_type = 'SingleViewDataset'
split_at = [5011]
split_name = ['voc07_trainval', 'voc07_test']
img_norm_cfg = dict(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
data = dict(
imgs_per_gpu=32,
workers_per_gpu=4,
extract=dict(
type=dataset_type,
data_source=dict(
type=data_source,
data_prefix='data/VOCdevkit/VOC2007/JPEGImages',
ann_file='data/VOCdevkit/VOC2007/Lists/trainvaltest.txt',
),
pipeline=[
dict(type='Resize', size=256),
dict(type='Resize', size=(224, 224)),
dict(type='ToTensor'),
dict(type='Normalize', **img_norm_cfg),
]))

View File

@ -0,0 +1,21 @@
data_source = 'ImageNet'
dataset_type = 'SingleViewDataset'
name = 'imagenet_val'
img_norm_cfg = dict(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
data = dict(
imgs_per_gpu=8,
workers_per_gpu=4,
extract=dict(
type='SingleViewDataset',
data_source=dict(
type=data_source,
data_prefix='data/imagenet/val',
ann_file='data/imagenet/meta/val.txt',
),
pipeline=[
dict(type='Resize', size=256),
dict(type='CenterCrop', size=224),
dict(type='ToTensor'),
dict(type='Normalize', **img_norm_cfg),
]))

View File

@ -0,0 +1,15 @@
_BASE_: "Base-RCNN-FPN.yaml"
MODEL:
KEYPOINT_ON: True
ROI_HEADS:
NUM_CLASSES: 1
ROI_BOX_HEAD:
SMOOTH_L1_BETA: 0.5 # Keypoint AP degrades (though box AP improves) when using plain L1 loss
RPN:
# Detectron1 uses 2000 proposals per-batch, but this option is per-image in detectron2.
# 1000 proposals per-image is found to hurt box AP.
# Therefore we increase it to 1500 per-image.
POST_NMS_TOPK_TRAIN: 1500
DATASETS:
TRAIN: ("keypoints_coco_2017_train",)
TEST: ("keypoints_coco_2017_val",)

View File

@ -0,0 +1,17 @@
MODEL:
META_ARCHITECTURE: "GeneralizedRCNN"
RPN:
PRE_NMS_TOPK_TEST: 6000
POST_NMS_TOPK_TEST: 1000
ROI_HEADS:
NAME: "Res5ROIHeadsExtraNorm"
BACKBONE:
FREEZE_AT: 0
RESNETS:
NORM: "SyncBN"
TEST:
PRECISE_BN:
ENABLED: True
SOLVER:
IMS_PER_BATCH: 16
BASE_LR: 0.02

View File

@ -0,0 +1,42 @@
MODEL:
META_ARCHITECTURE: "GeneralizedRCNN"
BACKBONE:
NAME: "build_resnet_fpn_backbone"
RESNETS:
OUT_FEATURES: ["res2", "res3", "res4", "res5"]
FPN:
IN_FEATURES: ["res2", "res3", "res4", "res5"]
ANCHOR_GENERATOR:
SIZES: [[32], [64], [128], [256], [512]] # One size for each in feature map
ASPECT_RATIOS: [[0.5, 1.0, 2.0]] # Three aspect ratios (same for all in feature maps)
RPN:
IN_FEATURES: ["p2", "p3", "p4", "p5", "p6"]
PRE_NMS_TOPK_TRAIN: 2000 # Per FPN level
PRE_NMS_TOPK_TEST: 1000 # Per FPN level
# Detectron1 uses 2000 proposals per-batch,
# (See "modeling/rpn/rpn_outputs.py" for details of this legacy issue)
# which is approximately 1000 proposals per-image since the default batch size for FPN is 2.
POST_NMS_TOPK_TRAIN: 1000
POST_NMS_TOPK_TEST: 1000
ROI_HEADS:
NAME: "StandardROIHeads"
IN_FEATURES: ["p2", "p3", "p4", "p5"]
ROI_BOX_HEAD:
NAME: "FastRCNNConvFCHead"
NUM_FC: 2
POOLER_RESOLUTION: 7
ROI_MASK_HEAD:
NAME: "MaskRCNNConvUpsampleHead"
NUM_CONV: 4
POOLER_RESOLUTION: 14
DATASETS:
TRAIN: ("coco_2017_train",)
TEST: ("coco_2017_val",)
SOLVER:
IMS_PER_BATCH: 16
BASE_LR: 0.02
STEPS: (60000, 80000)
MAX_ITER: 90000
INPUT:
MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
VERSION: 2

View File

@ -0,0 +1,25 @@
MODEL:
META_ARCHITECTURE: "RetinaNet"
BACKBONE:
NAME: "build_retinanet_resnet_fpn_backbone"
RESNETS:
OUT_FEATURES: ["res3", "res4", "res5"]
ANCHOR_GENERATOR:
SIZES: !!python/object/apply:eval ["[[x, x * 2**(1.0/3), x * 2**(2.0/3) ] for x in [32, 64, 128, 256, 512 ]]"]
FPN:
IN_FEATURES: ["res3", "res4", "res5"]
RETINANET:
IOU_THRESHOLDS: [0.4, 0.5]
IOU_LABELS: [0, -1, 1]
SMOOTH_L1_LOSS_BETA: 0.0
DATASETS:
TRAIN: ("coco_2017_train",)
TEST: ("coco_2017_val",)
SOLVER:
IMS_PER_BATCH: 16
BASE_LR: 0.01 # Note that RetinaNet uses a different default learning rate
STEPS: (60000, 80000)
MAX_ITER: 90000
INPUT:
MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
VERSION: 2

View File

@ -0,0 +1,30 @@
_BASE_: "../Base-RCNN-FPN.yaml"
MODEL:
WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
MASK_ON: True
ROI_HEADS:
NUM_CLASSES: 8
BACKBONE:
FREEZE_AT: 0
RESNETS:
DEPTH: 50
NORM: "SyncBN"
FPN:
NORM: "SyncBN"
INPUT:
MIN_SIZE_TRAIN: (800, 832, 864, 896, 928, 960, 992, 1024)
MIN_SIZE_TRAIN_SAMPLING: "choice"
MIN_SIZE_TEST: 1024
MAX_SIZE_TRAIN: 2048
MAX_SIZE_TEST: 2048
DATASETS:
TRAIN: ("cityscapes_fine_instance_seg_train",)
TEST: ("cityscapes_fine_instance_seg_val",)
SOLVER:
BASE_LR: 0.01
STEPS: (18000,)
MAX_ITER: 24000
IMS_PER_BATCH: 8
TEST:
PRECISE_BN:
ENABLED: True

View File

@ -0,0 +1,9 @@
_BASE_: "mask_rcnn_R_50_FPN.yaml"
MODEL:
PIXEL_MEAN: [123.675, 116.280, 103.530]
PIXEL_STD: [58.395, 57.120, 57.375]
WEIGHTS: "See Instructions"
RESNETS:
STRIDE_IN_1X1: False
INPUT:
FORMAT: "RGB"

View File

@ -0,0 +1,4 @@
_BASE_: "coco_R_50_C4_2x.yaml"
SOLVER:
STEPS: (60000, 80000)
MAX_ITER: 90000

View File

@ -0,0 +1,4 @@
_BASE_: "coco_R_50_C4_2x_moco.yaml"
SOLVER:
STEPS: (60000, 80000)
MAX_ITER: 90000

View File

@ -0,0 +1,13 @@
_BASE_: "Base-RCNN-C4-BN.yaml"
MODEL:
MASK_ON: True
WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
INPUT:
MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
MIN_SIZE_TEST: 800
DATASETS:
TRAIN: ("coco_2017_train",)
TEST: ("coco_2017_val",)
SOLVER:
STEPS: (120000, 160000)
MAX_ITER: 180000

View File

@ -0,0 +1,10 @@
_BASE_: "coco_R_50_C4_2x.yaml"
MODEL:
PIXEL_MEAN: [123.675, 116.280, 103.530]
PIXEL_STD: [58.395, 57.120, 57.375]
WEIGHTS: "See Instructions"
RESNETS:
STRIDE_IN_1X1: False
INPUT:
MAX_SIZE_TRAIN: 1200
FORMAT: "RGB"

View File

@ -0,0 +1,17 @@
_BASE_: "Base-RCNN-FPN.yaml"
MODEL:
MASK_ON: True
WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
BACKBONE:
FREEZE_AT: 0
RESNETS:
DEPTH: 50
NORM: "SyncBN"
FPN:
NORM: "SyncBN"
TEST:
PRECISE_BN:
ENABLED: True
SOLVER:
STEPS: (60000, 80000)
MAX_ITER: 90000

View File

@ -0,0 +1,9 @@
_BASE_: "coco_R_50_FPN_1x.yaml"
MODEL:
PIXEL_MEAN: [123.675, 116.280, 103.530]
PIXEL_STD: [58.395, 57.120, 57.375]
WEIGHTS: "See Instructions"
RESNETS:
STRIDE_IN_1X1: False
INPUT:
FORMAT: "RGB"

View File

@ -0,0 +1,4 @@
_BASE_: "coco_R_50_FPN_1x.yaml"
SOLVER:
STEPS: (120000, 160000)
MAX_ITER: 180000

View File

@ -0,0 +1,4 @@
_BASE_: "coco_R_50_FPN_1x_moco.yaml"
SOLVER:
STEPS: (120000, 160000)
MAX_ITER: 180000

View File

@ -0,0 +1,13 @@
_BASE_: "Base-RetinaNet.yaml"
MODEL:
WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
BACKBONE:
FREEZE_AT: 0
RESNETS:
DEPTH: 50
NORM: "SyncBN"
FPN:
NORM: "SyncBN"
TEST:
PRECISE_BN:
ENABLED: True

View File

@ -0,0 +1,9 @@
_BASE_: "coco_R_50_RetinaNet_1x.yaml"
MODEL:
PIXEL_MEAN: [123.675, 116.280, 103.530]
PIXEL_STD: [58.395, 57.120, 57.375]
WEIGHTS: "See Instructions"
RESNETS:
STRIDE_IN_1X1: False
INPUT:
FORMAT: "RGB"

View File

@ -0,0 +1,4 @@
_BASE_: "coco_R_50_RetinaNet_1x.yaml"
SOLVER:
STEPS: (120000, 160000)
MAX_ITER: 180000

View File

@ -0,0 +1,4 @@
_BASE_: "coco_R_50_RetinaNet_1x_moco.yaml"
SOLVER:
STEPS: (120000, 160000)
MAX_ITER: 180000

View File

@ -0,0 +1,16 @@
_BASE_: "Base-Keypoint-RCNN-FPN.yaml"
MODEL:
WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
BACKBONE:
FREEZE_AT: 0
RESNETS:
DEPTH: 50
NORM: "SyncBN"
FPN:
NORM: "SyncBN"
TEST:
PRECISE_BN:
ENABLED: True
SOLVER:
STEPS: (120000, 160000)
MAX_ITER: 180000

View File

@ -0,0 +1,9 @@
_BASE_: "keypoint_rcnn_R_50_FPN_2x.yaml"
MODEL:
PIXEL_MEAN: [123.675, 116.280, 103.530]
PIXEL_STD: [58.395, 57.120, 57.375]
WEIGHTS: "See Instructions"
RESNETS:
STRIDE_IN_1X1: False
INPUT:
FORMAT: "RGB"

View File

@ -0,0 +1,16 @@
_BASE_: "Base-RCNN-C4-BN.yaml"
MODEL:
MASK_ON: False
WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
ROI_HEADS:
NUM_CLASSES: 20
INPUT:
MIN_SIZE_TRAIN: (480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800)
MIN_SIZE_TEST: 800
DATASETS:
TRAIN: ('voc_2007_trainval', 'voc_2012_trainval')
TEST: ('voc_2007_test',)
SOLVER:
STEPS: (18000, 22000)
MAX_ITER: 24000
WARMUP_ITERS: 100

View File

@ -0,0 +1,9 @@
_BASE_: "pascal_voc_R_50_C4_24k.yaml"
MODEL:
PIXEL_MEAN: [123.675, 116.280, 103.530]
PIXEL_STD: [58.395, 57.120, 57.375]
WEIGHTS: "See Instructions"
RESNETS:
STRIDE_IN_1X1: False
INPUT:
FORMAT: "RGB"

View File

@ -0,0 +1,49 @@
# dataset settings
dataset_type = 'CocoDataset'
data_root = 'data/coco/'
img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
dict(type='RandomFlip', flip_ratio=0.5),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='DefaultFormatBundle'),
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(
type='MultiScaleFlipAug',
img_scale=(1333, 800),
flip=False,
transforms=[
dict(type='Resize', keep_ratio=True),
dict(type='RandomFlip'),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='ImageToTensor', keys=['img']),
dict(type='Collect', keys=['img']),
])
]
data = dict(
samples_per_gpu=2,
workers_per_gpu=2,
train=dict(
type=dataset_type,
ann_file=data_root + 'annotations/instances_train2017.json',
img_prefix=data_root + 'train2017/',
pipeline=train_pipeline),
val=dict(
type=dataset_type,
ann_file=data_root + 'annotations/instances_val2017.json',
img_prefix=data_root + 'val2017/',
pipeline=test_pipeline),
test=dict(
type=dataset_type,
ann_file=data_root + 'annotations/instances_val2017.json',
img_prefix=data_root + 'val2017/',
pipeline=test_pipeline))
evaluation = dict(metric=['bbox', 'segm'])

View File

@ -0,0 +1,55 @@
# dataset settings
dataset_type = 'VOCDataset'
data_root = 'data/VOCdevkit/'
img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations', with_bbox=True),
dict(type='Resize', img_scale=(1000, 600), keep_ratio=True),
dict(type='RandomFlip', flip_ratio=0.5),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='DefaultFormatBundle'),
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(
type='MultiScaleFlipAug',
img_scale=(1000, 600),
flip=False,
transforms=[
dict(type='Resize', keep_ratio=True),
dict(type='RandomFlip'),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='ImageToTensor', keys=['img']),
dict(type='Collect', keys=['img']),
])
]
data = dict(
samples_per_gpu=2,
workers_per_gpu=2,
train=dict(
type='RepeatDataset',
times=3,
dataset=dict(
type=dataset_type,
ann_file=[
data_root + 'VOC2007/ImageSets/Main/trainval.txt',
data_root + 'VOC2012/ImageSets/Main/trainval.txt'
],
img_prefix=[data_root + 'VOC2007/', data_root + 'VOC2012/'],
pipeline=train_pipeline)),
val=dict(
type=dataset_type,
ann_file=data_root + 'VOC2007/ImageSets/Main/test.txt',
img_prefix=data_root + 'VOC2007/',
pipeline=test_pipeline),
test=dict(
type=dataset_type,
ann_file=data_root + 'VOC2007/ImageSets/Main/test.txt',
img_prefix=data_root + 'VOC2007/',
pipeline=test_pipeline))
evaluation = dict(interval=1, metric='mAP')

View File

@ -0,0 +1,16 @@
checkpoint_config = dict(interval=1)
# yapf:disable
log_config = dict(
interval=50,
hooks=[
dict(type='TextLoggerHook'),
# dict(type='TensorboardLoggerHook')
])
# yapf:enable
custom_hooks = [dict(type='NumClassCheckHook')]
dist_params = dict(backend='nccl')
log_level = 'INFO'
load_from = None
resume_from = None
workflow = [('train', 1)]

View File

@ -0,0 +1,112 @@
# model settings
norm_cfg = dict(type='BN', requires_grad=False)
model = dict(
type='FasterRCNN',
backbone=dict(
type='ResNet',
depth=50,
num_stages=3,
strides=(1, 2, 2),
dilations=(1, 1, 1),
out_indices=(2, ),
frozen_stages=1,
norm_cfg=norm_cfg,
norm_eval=True,
style='pytorch',
init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
rpn_head=dict(
type='RPNHead',
in_channels=1024,
feat_channels=1024,
anchor_generator=dict(
type='AnchorGenerator',
scales=[2, 4, 8, 16, 32],
ratios=[0.5, 1.0, 2.0],
strides=[16]),
bbox_coder=dict(
type='DeltaXYWHBBoxCoder',
target_means=[.0, .0, .0, .0],
target_stds=[1.0, 1.0, 1.0, 1.0]),
loss_cls=dict(
type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
roi_head=dict(
type='StandardRoIHead',
shared_head=dict(
type='ResLayer',
depth=50,
stage=3,
stride=2,
dilation=1,
style='pytorch',
norm_cfg=norm_cfg,
norm_eval=True),
bbox_roi_extractor=dict(
type='SingleRoIExtractor',
roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0),
out_channels=1024,
featmap_strides=[16]),
bbox_head=dict(
type='BBoxHead',
with_avg_pool=True,
roi_feat_size=7,
in_channels=2048,
num_classes=80,
bbox_coder=dict(
type='DeltaXYWHBBoxCoder',
target_means=[0., 0., 0., 0.],
target_stds=[0.1, 0.1, 0.2, 0.2]),
reg_class_agnostic=False,
loss_cls=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
loss_bbox=dict(type='L1Loss', loss_weight=1.0))),
# model training and testing settings
train_cfg=dict(
rpn=dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.7,
neg_iou_thr=0.3,
min_pos_iou=0.3,
match_low_quality=True,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=256,
pos_fraction=0.5,
neg_pos_ub=-1,
add_gt_as_proposals=False),
allowed_border=0,
pos_weight=-1,
debug=False),
rpn_proposal=dict(
nms_pre=12000,
max_per_img=2000,
nms=dict(type='nms', iou_threshold=0.7),
min_bbox_size=0),
rcnn=dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.5,
neg_iou_thr=0.5,
min_pos_iou=0.5,
match_low_quality=False,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=512,
pos_fraction=0.25,
neg_pos_ub=-1,
add_gt_as_proposals=True),
pos_weight=-1,
debug=False)),
test_cfg=dict(
rpn=dict(
nms_pre=6000,
max_per_img=1000,
nms=dict(type='nms', iou_threshold=0.7),
min_bbox_size=0),
rcnn=dict(
score_thr=0.05,
nms=dict(type='nms', iou_threshold=0.5),
max_per_img=100)))

View File

@ -0,0 +1,123 @@
# model settings
norm_cfg = dict(type='BN', requires_grad=False)
model = dict(
type='MaskRCNN',
backbone=dict(
type='ResNet',
depth=50,
num_stages=3,
strides=(1, 2, 2),
dilations=(1, 1, 1),
out_indices=(2, ),
frozen_stages=1,
norm_cfg=norm_cfg,
norm_eval=True,
style='pytorch',
init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
rpn_head=dict(
type='RPNHead',
in_channels=1024,
feat_channels=1024,
anchor_generator=dict(
type='AnchorGenerator',
scales=[2, 4, 8, 16, 32],
ratios=[0.5, 1.0, 2.0],
strides=[16]),
bbox_coder=dict(
type='DeltaXYWHBBoxCoder',
target_means=[.0, .0, .0, .0],
target_stds=[1.0, 1.0, 1.0, 1.0]),
loss_cls=dict(
type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
roi_head=dict(
type='StandardRoIHead',
shared_head=dict(
type='ResLayer',
depth=50,
stage=3,
stride=2,
dilation=1,
style='pytorch',
norm_cfg=norm_cfg,
norm_eval=True),
bbox_roi_extractor=dict(
type='SingleRoIExtractor',
roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0),
out_channels=1024,
featmap_strides=[16]),
bbox_head=dict(
type='BBoxHead',
with_avg_pool=True,
roi_feat_size=7,
in_channels=2048,
num_classes=80,
bbox_coder=dict(
type='DeltaXYWHBBoxCoder',
target_means=[0., 0., 0., 0.],
target_stds=[0.1, 0.1, 0.2, 0.2]),
reg_class_agnostic=False,
loss_cls=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
mask_roi_extractor=None,
mask_head=dict(
type='FCNMaskHead',
num_convs=0,
in_channels=2048,
conv_out_channels=256,
num_classes=80,
loss_mask=dict(
type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))),
# model training and testing settings
train_cfg=dict(
rpn=dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.7,
neg_iou_thr=0.3,
min_pos_iou=0.3,
match_low_quality=True,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=256,
pos_fraction=0.5,
neg_pos_ub=-1,
add_gt_as_proposals=False),
allowed_border=0,
pos_weight=-1,
debug=False),
rpn_proposal=dict(
nms_pre=12000,
max_per_img=2000,
nms=dict(type='nms', iou_threshold=0.7),
min_bbox_size=0),
rcnn=dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.5,
neg_iou_thr=0.5,
min_pos_iou=0.5,
match_low_quality=False,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=512,
pos_fraction=0.25,
neg_pos_ub=-1,
add_gt_as_proposals=True),
mask_size=14,
pos_weight=-1,
debug=False)),
test_cfg=dict(
rpn=dict(
nms_pre=6000,
nms=dict(type='nms', iou_threshold=0.7),
max_per_img=1000,
min_bbox_size=0),
rcnn=dict(
score_thr=0.05,
nms=dict(type='nms', iou_threshold=0.5),
max_per_img=100,
mask_thr_binary=0.5)))

View File

@ -0,0 +1,120 @@
# model settings
model = dict(
type='MaskRCNN',
backbone=dict(
type='ResNet',
depth=50,
num_stages=4,
out_indices=(0, 1, 2, 3),
frozen_stages=1,
norm_cfg=dict(type='BN', requires_grad=True),
norm_eval=True,
style='pytorch',
init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
neck=dict(
type='FPN',
in_channels=[256, 512, 1024, 2048],
out_channels=256,
num_outs=5),
rpn_head=dict(
type='RPNHead',
in_channels=256,
feat_channels=256,
anchor_generator=dict(
type='AnchorGenerator',
scales=[8],
ratios=[0.5, 1.0, 2.0],
strides=[4, 8, 16, 32, 64]),
bbox_coder=dict(
type='DeltaXYWHBBoxCoder',
target_means=[.0, .0, .0, .0],
target_stds=[1.0, 1.0, 1.0, 1.0]),
loss_cls=dict(
type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
roi_head=dict(
type='StandardRoIHead',
bbox_roi_extractor=dict(
type='SingleRoIExtractor',
roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
out_channels=256,
featmap_strides=[4, 8, 16, 32]),
bbox_head=dict(
type='Shared2FCBBoxHead',
in_channels=256,
fc_out_channels=1024,
roi_feat_size=7,
num_classes=80,
bbox_coder=dict(
type='DeltaXYWHBBoxCoder',
target_means=[0., 0., 0., 0.],
target_stds=[0.1, 0.1, 0.2, 0.2]),
reg_class_agnostic=False,
loss_cls=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
mask_roi_extractor=dict(
type='SingleRoIExtractor',
roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0),
out_channels=256,
featmap_strides=[4, 8, 16, 32]),
mask_head=dict(
type='FCNMaskHead',
num_convs=4,
in_channels=256,
conv_out_channels=256,
num_classes=80,
loss_mask=dict(
type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))),
# model training and testing settings
train_cfg=dict(
rpn=dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.7,
neg_iou_thr=0.3,
min_pos_iou=0.3,
match_low_quality=True,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=256,
pos_fraction=0.5,
neg_pos_ub=-1,
add_gt_as_proposals=False),
allowed_border=-1,
pos_weight=-1,
debug=False),
rpn_proposal=dict(
nms_pre=2000,
max_per_img=1000,
nms=dict(type='nms', iou_threshold=0.7),
min_bbox_size=0),
rcnn=dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.5,
neg_iou_thr=0.5,
min_pos_iou=0.5,
match_low_quality=True,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=512,
pos_fraction=0.25,
neg_pos_ub=-1,
add_gt_as_proposals=True),
mask_size=28,
pos_weight=-1,
debug=False)),
test_cfg=dict(
rpn=dict(
nms_pre=1000,
max_per_img=1000,
nms=dict(type='nms', iou_threshold=0.7),
min_bbox_size=0),
rcnn=dict(
score_thr=0.05,
nms=dict(type='nms', iou_threshold=0.5),
max_per_img=100,
mask_thr_binary=0.5)))

View File

@ -0,0 +1,11 @@
# optimizer
optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
optimizer_config = dict(grad_clip=None)
# learning policy
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=1000,
warmup_ratio=0.001,
step=[8, 11])
runner = dict(type='EpochBasedRunner', max_epochs=12)

View File

@ -0,0 +1,12 @@
# optimizer
optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
optimizer_config = dict(grad_clip=None)
# learning policy
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=100,
warmup_ratio=0.001,
step=[18000, 22000],
by_epoch=False)
runner = dict(type='IterBasedRunner', max_iters=24000)

View File

@ -0,0 +1,11 @@
# optimizer
optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
optimizer_config = dict(grad_clip=None)
# learning policy
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=1000,
warmup_ratio=0.001,
step=[16, 22])
runner = dict(type='EpochBasedRunner', max_epochs=24)

View File

@ -0,0 +1,36 @@
_base_ = [
'../_base_/models/mask_rcnn_r50_c4.py',
'../_base_/datasets/coco_instance.py',
'../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'
]
norm_cfg = dict(type='SyncBN', requires_grad=True)
model = dict(
backbone=dict(frozen_stages=-1, norm_cfg=norm_cfg, norm_eval=False),
roi_head=dict(
shared_head=dict(
type='ResLayerExtraNorm', norm_cfg=norm_cfg, norm_eval=False)))
img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
dict(
type='Resize',
img_scale=[(1333, 640), (1333, 672), (1333, 704), (1333, 736),
(1333, 768), (1333, 800)],
multiscale_mode='value',
keep_ratio=True),
dict(type='RandomFlip', flip_ratio=0.5),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='DefaultFormatBundle'),
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),
]
data = dict(train=dict(pipeline=train_pipeline))
custom_imports = dict(
imports=['tools.benchmarks.mmdetection.res_layer_extra_norm'],
allow_failed_imports=False)

View File

@ -0,0 +1,36 @@
_base_ = [
'../_base_/models/mask_rcnn_r50_c4.py',
'../_base_/datasets/coco_instance.py',
'../_base_/schedules/schedule_2x.py', '../_base_/default_runtime.py'
]
norm_cfg = dict(type='SyncBN', requires_grad=True)
model = dict(
backbone=dict(frozen_stages=-1, norm_cfg=norm_cfg, norm_eval=False),
roi_head=dict(
shared_head=dict(
type='ResLayerExtraNorm', norm_cfg=norm_cfg, norm_eval=False)))
img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
dict(
type='Resize',
img_scale=[(1333, 640), (1333, 672), (1333, 704), (1333, 736),
(1333, 768), (1333, 800)],
multiscale_mode='value',
keep_ratio=True),
dict(type='RandomFlip', flip_ratio=0.5),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='DefaultFormatBundle'),
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),
]
data = dict(train=dict(pipeline=train_pipeline))
custom_imports = dict(
imports=['tools.benchmarks.mmdetection.res_layer_extra_norm'],
allow_failed_imports=False)

View File

@ -0,0 +1,33 @@
_base_ = [
'../_base_/models/mask_rcnn_r50_fpn.py',
'../_base_/datasets/coco_instance.py',
'../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'
]
norm_cfg = dict(type='SyncBN', requires_grad=True)
model = dict(
backbone=dict(frozen_stages=-1, norm_cfg=norm_cfg, norm_eval=False),
neck=dict(norm_cfg=norm_cfg),
roi_head=dict(
bbox_head=dict(type='Shared4Conv1FCBBoxHead', norm_cfg=norm_cfg),
mask_head=dict(norm_cfg=norm_cfg)))
img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
dict(
type='Resize',
img_scale=[(1333, 640), (1333, 672), (1333, 704), (1333, 736),
(1333, 768), (1333, 800)],
multiscale_mode='value',
keep_ratio=True),
dict(type='RandomFlip', flip_ratio=0.5),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='DefaultFormatBundle'),
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),
]
data = dict(train=dict(pipeline=train_pipeline))

View File

@ -0,0 +1,33 @@
_base_ = [
'../_base_/models/mask_rcnn_r50_fpn.py',
'../_base_/datasets/coco_instance.py',
'../_base_/schedules/schedule_2x.py', '../_base_/default_runtime.py'
]
norm_cfg = dict(type='SyncBN', requires_grad=True)
model = dict(
backbone=dict(frozen_stages=-1, norm_cfg=norm_cfg, norm_eval=False),
neck=dict(norm_cfg=norm_cfg),
roi_head=dict(
bbox_head=dict(type='Shared4Conv1FCBBoxHead', norm_cfg=norm_cfg),
mask_head=dict(norm_cfg=norm_cfg)))
img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
dict(
type='Resize',
img_scale=[(1333, 640), (1333, 672), (1333, 704), (1333, 736),
(1333, 768), (1333, 800)],
multiscale_mode='value',
keep_ratio=True),
dict(type='RandomFlip', flip_ratio=0.5),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='DefaultFormatBundle'),
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),
]
data = dict(train=dict(pipeline=train_pipeline))

View File

@ -0,0 +1,84 @@
_base_ = [
'../_base_/models/faster_rcnn_r50_c4.py',
'../_base_/schedules/schedule_24k.py', '../_base_/default_runtime.py'
]
norm_cfg = dict(type='SyncBN', requires_grad=True)
model = dict(
backbone=dict(frozen_stages=-1, norm_cfg=norm_cfg, norm_eval=False),
roi_head=dict(
shared_head=dict(
type='ResLayerExtraNorm', norm_cfg=norm_cfg, norm_eval=False),
bbox_head=dict(num_classes=20)))
# dataset settings
dataset_type = 'VOCDataset'
data_root = 'data/VOCdevkit/'
img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations', with_bbox=True),
dict(
type='Resize',
img_scale=[(1333, 480), (1333, 512), (1333, 544), (1333, 576),
(1333, 608), (1333, 640), (1333, 672), (1333, 704),
(1333, 736), (1333, 768), (1333, 800)],
multiscale_mode='value',
keep_ratio=True),
dict(type='RandomFlip', flip_ratio=0.5),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='DefaultFormatBundle'),
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(
type='MultiScaleFlipAug',
img_scale=(1333, 800),
flip=False,
transforms=[
dict(type='Resize', keep_ratio=True),
dict(type='RandomFlip'),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='ImageToTensor', keys=['img']),
dict(type='Collect', keys=['img']),
])
]
data = dict(
samples_per_gpu=2,
workers_per_gpu=2,
train=dict(
type=dataset_type,
ann_file=[
data_root + 'VOC2007/ImageSets/Main/trainval.txt',
data_root + 'VOC2012/ImageSets/Main/trainval.txt'
],
img_prefix=[data_root + 'VOC2007/', data_root + 'VOC2012/'],
pipeline=train_pipeline),
val=dict(
type=dataset_type,
ann_file=data_root + 'VOC2007/ImageSets/Main/test.txt',
img_prefix=data_root + 'VOC2007/',
pipeline=test_pipeline),
test=dict(
type=dataset_type,
ann_file=data_root + 'VOC2007/ImageSets/Main/test.txt',
img_prefix=data_root + 'VOC2007/',
pipeline=test_pipeline))
evaluation = dict(interval=2000, metric='mAP')
checkpoint_config = dict(by_epoch=False, interval=2000)
log_config = dict(
interval=50,
hooks=[
dict(type='TextLoggerHook', by_epoch=False),
# dict(type='TensorboardLoggerHook')
])
custom_imports = dict(
imports=['tools.benchmarks.mmdetection.res_layer_extra_norm'],
allow_failed_imports=False)

View File

@ -0,0 +1,54 @@
# dataset settings
dataset_type = 'CityscapesDataset'
data_root = 'data/cityscapes/'
img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
crop_size = (512, 1024)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations'),
dict(type='Resize', img_scale=(2048, 1024), ratio_range=(0.5, 2.0)),
dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
dict(type='RandomFlip', prob=0.5),
dict(type='PhotoMetricDistortion'),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
dict(type='DefaultFormatBundle'),
dict(type='Collect', keys=['img', 'gt_semantic_seg']),
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(
type='MultiScaleFlipAug',
img_scale=(2048, 1024),
# img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
flip=False,
transforms=[
dict(type='Resize', keep_ratio=True),
dict(type='RandomFlip'),
dict(type='Normalize', **img_norm_cfg),
dict(type='ImageToTensor', keys=['img']),
dict(type='Collect', keys=['img']),
])
]
data = dict(
samples_per_gpu=2,
workers_per_gpu=2,
train=dict(
type=dataset_type,
data_root=data_root,
img_dir='leftImg8bit/train',
ann_dir='gtFine/train',
pipeline=train_pipeline),
val=dict(
type=dataset_type,
data_root=data_root,
img_dir='leftImg8bit/val',
ann_dir='gtFine/val',
pipeline=test_pipeline),
test=dict(
type=dataset_type,
data_root=data_root,
img_dir='leftImg8bit/val',
ann_dir='gtFine/val',
pipeline=test_pipeline))

View File

@ -0,0 +1,35 @@
_base_ = './cityscapes.py'
img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
crop_size = (769, 769)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations'),
dict(type='Resize', img_scale=(2049, 1025), ratio_range=(0.5, 2.0)),
dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
dict(type='RandomFlip', prob=0.5),
dict(type='PhotoMetricDistortion'),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
dict(type='DefaultFormatBundle'),
dict(type='Collect', keys=['img', 'gt_semantic_seg']),
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(
type='MultiScaleFlipAug',
img_scale=(2049, 1025),
# img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
flip=False,
transforms=[
dict(type='Resize', keep_ratio=True),
dict(type='RandomFlip'),
dict(type='Normalize', **img_norm_cfg),
dict(type='ImageToTensor', keys=['img']),
dict(type='Collect', keys=['img']),
])
]
data = dict(
train=dict(pipeline=train_pipeline),
val=dict(pipeline=test_pipeline),
test=dict(pipeline=test_pipeline))

View File

@ -0,0 +1,57 @@
# dataset settings
dataset_type = 'PascalVOCDataset'
data_root = 'data/VOCdevkit/VOC2012'
img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
crop_size = (512, 512)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations'),
dict(type='Resize', img_scale=(2048, 512), ratio_range=(0.5, 2.0)),
dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
dict(type='RandomFlip', prob=0.5),
dict(type='PhotoMetricDistortion'),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
dict(type='DefaultFormatBundle'),
dict(type='Collect', keys=['img', 'gt_semantic_seg']),
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(
type='MultiScaleFlipAug',
img_scale=(2048, 512),
# img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
flip=False,
transforms=[
dict(type='Resize', keep_ratio=True),
dict(type='RandomFlip'),
dict(type='Normalize', **img_norm_cfg),
dict(type='ImageToTensor', keys=['img']),
dict(type='Collect', keys=['img']),
])
]
data = dict(
samples_per_gpu=4,
workers_per_gpu=4,
train=dict(
type=dataset_type,
data_root=data_root,
img_dir='JPEGImages',
ann_dir='SegmentationClass',
split='ImageSets/Segmentation/train.txt',
pipeline=train_pipeline),
val=dict(
type=dataset_type,
data_root=data_root,
img_dir='JPEGImages',
ann_dir='SegmentationClass',
split='ImageSets/Segmentation/val.txt',
pipeline=test_pipeline),
test=dict(
type=dataset_type,
data_root=data_root,
img_dir='JPEGImages',
ann_dir='SegmentationClass',
split='ImageSets/Segmentation/val.txt',
pipeline=test_pipeline))

View File

@ -0,0 +1,9 @@
_base_ = './pascal_voc12.py'
# dataset settings
data = dict(
train=dict(
ann_dir=['SegmentationClass', 'SegmentationClassAug'],
split=[
'ImageSets/Segmentation/train.txt',
'ImageSets/Segmentation/aug.txt'
]))

View File

@ -0,0 +1,14 @@
# yapf:disable
log_config = dict(
interval=50,
hooks=[
dict(type='TextLoggerHook', by_epoch=False),
# dict(type='TensorboardLoggerHook')
])
# yapf:enable
dist_params = dict(backend='nccl')
log_level = 'INFO'
load_from = None
resume_from = None
workflow = [('train', 1)]
cudnn_benchmark = True

View File

@ -0,0 +1,45 @@
# model settings
norm_cfg = dict(type='SyncBN', requires_grad=True)
model = dict(
type='EncoderDecoder',
backbone=dict(
type='ResNet',
depth=50,
num_stages=4,
out_indices=(0, 1, 2, 3),
dilations=(1, 1, 2, 4),
strides=(1, 2, 1, 1),
norm_cfg=norm_cfg,
norm_eval=False,
style='pytorch',
contract_dilation=True,
init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
decode_head=dict(
type='FCNHead',
in_channels=2048,
in_index=3,
channels=512,
num_convs=2,
concat_input=True,
dropout_ratio=0.1,
num_classes=19,
norm_cfg=norm_cfg,
align_corners=False,
loss_decode=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
auxiliary_head=dict(
type='FCNHead',
in_channels=1024,
in_index=2,
channels=256,
num_convs=1,
concat_input=False,
dropout_ratio=0.1,
num_classes=19,
norm_cfg=norm_cfg,
align_corners=False,
loss_decode=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
# model training and testing settings
train_cfg=dict(),
test_cfg=dict(mode='whole'))

View File

@ -0,0 +1,9 @@
# optimizer
optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005)
optimizer_config = dict()
# learning policy
lr_config = dict(policy='poly', power=0.9, min_lr=1e-4, by_epoch=False)
# runtime settings
runner = dict(type='IterBasedRunner', max_iters=20000)
checkpoint_config = dict(by_epoch=False, interval=2000)
evaluation = dict(interval=2000, metric='mIoU', pre_eval=True)

View File

@ -0,0 +1,9 @@
# optimizer
optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005)
optimizer_config = dict()
# learning policy
lr_config = dict(policy='poly', power=0.9, min_lr=1e-4, by_epoch=False)
# runtime settings
runner = dict(type='IterBasedRunner', max_iters=40000)
checkpoint_config = dict(by_epoch=False, interval=4000)
evaluation = dict(interval=4000, metric='mIoU', pre_eval=True)

View File

@ -0,0 +1,9 @@
_base_ = [
'../_base_/models/fcn_r50-d8.py',
'../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py',
'../_base_/schedules/schedule_40k.py'
]
model = dict(
decode_head=dict(align_corners=True),
auxiliary_head=dict(align_corners=True),
test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513)))

View File

@ -0,0 +1,6 @@
_base_ = [
'../_base_/models/fcn_r50-d8.py', '../_base_/datasets/pascal_voc12_aug.py',
'../_base_/default_runtime.py', '../_base_/schedules/schedule_20k.py'
]
model = dict(
decode_head=dict(num_classes=21), auxiliary_head=dict(num_classes=21))