[Refactor] add benchmark configs

2021-12-15 19:09:54 +08:00 · 2021-12-15 19:09:54 +08:00 · 67716e2011
parent 00e51990fb
commit 67716e2011
90 changed files with 1928 additions and 0 deletions
--- a/configs/benchmarks/classification/_base_/datasets/cifar10.py
+++ b/configs/benchmarks/classification/_base_/datasets/cifar10.py
@ -0,0 +1,49 @@
+# dataset settings
+data_source = 'CIFAR10'
+dataset_type = 'SingleViewDataset'
+img_norm_cfg = dict(mean=[0.4914, 0.4822, 0.4465], std=[0.2023, 0.1994, 0.201])
+train_pipeline = [
+    dict(type='RandomCrop', size=32, padding=4),
+    dict(type='RandomHorizontalFlip'),
+]
+test_pipeline = []
+
+# prefetch
+prefetch = False
+if not prefetch:
+    train_pipeline.extend(
+        [dict(type='ToTensor'),
+         dict(type='Normalize', **img_norm_cfg)])
+    test_pipeline.extend(
+        [dict(type='ToTensor'),
+         dict(type='Normalize', **img_norm_cfg)])
+
+# dataset summary
+data = dict(
+    imgs_per_gpu=128,
+    workers_per_gpu=2,
+    train=dict(
+        type=dataset_type,
+        data_source=dict(
+            type=data_source,
+            data_prefix='data/cifar10',
+        ),
+        pipeline=train_pipeline,
+        prefetch=prefetch),
+    val=dict(
+        type=dataset_type,
+        data_source=dict(
+            type=data_source,
+            data_prefix='data/cifar10',
+        ),
+        pipeline=test_pipeline,
+        prefetch=prefetch),
+    test=dict(
+        type=dataset_type,
+        data_source=dict(
+            type=data_source,
+            data_prefix='data/cifar10',
+        ),
+        pipeline=test_pipeline,
+        prefetch=prefetch))
+evaluation = dict(interval=10, topk=(1, 5))
--- a/configs/benchmarks/classification/_base_/datasets/imagenet.py
+++ b/configs/benchmarks/classification/_base_/datasets/imagenet.py
@ -0,0 +1,46 @@
+# dataset settings
+data_source = 'ImageNet'
+dataset_type = 'SingleViewDataset'
+img_norm_cfg = dict(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
+train_pipeline = [
+    dict(type='RandomResizedCrop', size=224),
+    dict(type='RandomHorizontalFlip'),
+]
+test_pipeline = [
+    dict(type='Resize', size=256),
+    dict(type='CenterCrop', size=224),
+]
+
+# prefetch
+prefetch = False
+if not prefetch:
+    train_pipeline.extend(
+        [dict(type='ToTensor'),
+         dict(type='Normalize', **img_norm_cfg)])
+    test_pipeline.extend(
+        [dict(type='ToTensor'),
+         dict(type='Normalize', **img_norm_cfg)])
+
+# dataset summary
+data = dict(
+    imgs_per_gpu=32,  # total 32x8=256, 8GPU linear cls
+    workers_per_gpu=4,
+    train=dict(
+        type=dataset_type,
+        data_source=dict(
+            type=data_source,
+            data_prefix='data/imagenet/train',
+            ann_file='data/imagenet/meta/train.txt',
+        ),
+        pipeline=train_pipeline,
+        prefetch=prefetch),
+    val=dict(
+        type=dataset_type,
+        data_source=dict(
+            type=data_source,
+            data_prefix='data/imagenet/val',
+            ann_file='data/imagenet/meta/val.txt',
+        ),
+        pipeline=test_pipeline,
+        prefetch=prefetch))
+evaluation = dict(interval=10, topk=(1, 5))
--- a/configs/benchmarks/classification/_base_/datasets/inaturalist2018.py
+++ b/configs/benchmarks/classification/_base_/datasets/inaturalist2018.py
@ -0,0 +1,46 @@
+# dataset settings
+data_source = 'ImageNet'
+dataset_type = 'SingleViewDataset'
+img_norm_cfg = dict(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
+train_pipeline = [
+    dict(type='RandomResizedCrop', size=224),
+    dict(type='RandomHorizontalFlip'),
+]
+test_pipeline = [
+    dict(type='Resize', size=256),
+    dict(type='CenterCrop', size=224),
+]
+
+# prefetch
+prefetch = False
+if not prefetch:
+    train_pipeline.extend(
+        [dict(type='ToTensor'),
+         dict(type='Normalize', **img_norm_cfg)])
+    test_pipeline.extend(
+        [dict(type='ToTensor'),
+         dict(type='Normalize', **img_norm_cfg)])
+
+# dataset summary
+data = dict(
+    imgs_per_gpu=32,  # total 32x8=256, 8GPU linear cls
+    workers_per_gpu=4,
+    train=dict(
+        type=dataset_type,
+        data_source=dict(
+            type=data_source,
+            data_prefix='data/iNaturalist2018/train_val2018',
+            ann_file='data/iNaturalist2018/meta/train.txt',
+        ),
+        pipeline=train_pipeline,
+        prefetch=prefetch),
+    val=dict(
+        type=dataset_type,
+        data_source=dict(
+            type=data_source,
+            data_prefix='data/iNaturalist2018/train_val2018',
+            ann_file='data/iNaturalist2018/meta/val.txt',
+        ),
+        pipeline=test_pipeline,
+        prefetch=prefetch))
+evaluation = dict(interval=10, topk=(1, 5))
--- a/configs/benchmarks/classification/_base_/datasets/places205.py
+++ b/configs/benchmarks/classification/_base_/datasets/places205.py
@ -0,0 +1,48 @@
+# dataset settings
+data_source = 'ImageNet'
+dataset_type = 'SingleViewDataset'
+img_norm_cfg = dict(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
+train_pipeline = [
+    dict(type='Resize', size=256),
+    dict(type='CenterCrop', size=256),
+    dict(type='RandomCrop', size=224),
+    dict(type='RandomHorizontalFlip'),
+]
+test_pipeline = [
+    dict(type='Resize', size=256),
+    dict(type='CenterCrop', size=224),
+]
+
+# prefetch
+prefetch = False
+if not prefetch:
+    train_pipeline.extend(
+        [dict(type='ToTensor'),
+         dict(type='Normalize', **img_norm_cfg)])
+    test_pipeline.extend(
+        [dict(type='ToTensor'),
+         dict(type='Normalize', **img_norm_cfg)])
+
+# dataset summary
+data = dict(
+    imgs_per_gpu=32,  # total 32x8=256, 8GPU linear cls
+    workers_per_gpu=4,
+    train=dict(
+        type=dataset_type,
+        data_source=dict(
+            type=data_source,
+            data_prefix='data/Places205/train',
+            ann_file='data/Places205/meta/train.txt',
+        ),
+        pipeline=train_pipeline,
+        prefetch=prefetch),
+    val=dict(
+        type=dataset_type,
+        data_source=dict(
+            type=data_source,
+            data_prefix='data/Places205/val',
+            ann_file='data/Places205/meta/val.txt',
+        ),
+        pipeline=test_pipeline,
+        prefetch=prefetch))
+evaluation = dict(interval=10, topk=(1, 5))
--- a/configs/benchmarks/classification/_base_/default_runtime.py
+++ b/configs/benchmarks/classification/_base_/default_runtime.py
@ -0,0 +1,20 @@
+train_cfg = {}
+test_cfg = {}
+optimizer_config = dict()  # grad_clip, coalesce, bucket_size_mb
+# yapf:disable
+log_config = dict(
+    interval=50,
+    hooks=[
+        dict(type='TextLoggerHook'),
+        # dict(type='TensorboardLoggerHook')
+    ])
+# yapf:enable
+
+# runtime settings
+dist_params = dict(backend='nccl')
+cudnn_benchmark = True
+log_level = 'INFO'
+load_from = None
+resume_from = None
+workflow = [('train', 1)]
+persistent_workers = True
--- a/configs/benchmarks/classification/_base_/models/resnet50.py
+++ b/configs/benchmarks/classification/_base_/models/resnet50.py
@ -0,0 +1,15 @@
+model = dict(
+    type='Classification',
+    backbone=dict(
+        type='ResNet',
+        depth=50,
+        in_channels=3,
+        num_stages=4,
+        strides=(1, 2, 2, 2),
+        dilations=(1, 1, 1, 1),
+        out_indices=[4],  # 0: conv-1, x: stage-x
+        norm_cfg=dict(type='BN'),
+        frozen_stages=-1),
+    head=dict(
+        type='ClsHead', with_avg_pool=True, in_channels=2048,
+        num_classes=1000))
--- a/configs/benchmarks/classification/_base_/models/resnet50_multihead.py
+++ b/configs/benchmarks/classification/_base_/models/resnet50_multihead.py
@ -0,0 +1,17 @@
+model = dict(
+    type='Classification',
+    backbone=dict(
+        type='ResNet',
+        depth=50,
+        in_channels=3,
+        out_indices=[0, 1, 2, 3, 4],  # 0: conv-1, x: stage-x
+        norm_cfg=dict(type='BN'),
+        frozen_stages=-1),
+    head=dict(
+        type='MultiClsHead',
+        pool_type='specified',
+        in_indices=[0, 1, 2, 3, 4],
+        with_last_layer_unpool=False,
+        backbone='resnet50',
+        norm_cfg=dict(type='SyncBN', momentum=0.1, affine=False),
+        num_classes=1000))
--- a/configs/benchmarks/classification/_base_/schedules/lars_coslr-90e.py
+++ b/configs/benchmarks/classification/_base_/schedules/lars_coslr-90e.py
@ -0,0 +1,8 @@
+# optimizer
+optimizer = dict(type='LARS', lr=1.6, momentum=0.9, weight_decay=0.)
+
+# learning policy
+lr_config = dict(policy='CosineAnnealing', min_lr=0.)
+
+# runtime settings
+runner = dict(type='EpochBasedRunner', max_epochs=90)
--- a/configs/benchmarks/classification/_base_/schedules/sgd_coslr-100e.py
+++ b/configs/benchmarks/classification/_base_/schedules/sgd_coslr-100e.py
@ -0,0 +1,8 @@
+# optimizer
+optimizer = dict(type='SGD', lr=0.3, momentum=0.9, weight_decay=1e-6)
+
+# learning policy
+lr_config = dict(policy='CosineAnnealing', min_lr=0.)
+
+# runtime settings
+runner = dict(type='EpochBasedRunner', max_epochs=100)
--- a/configs/benchmarks/classification/_base_/schedules/sgd_steplr-100e.py
+++ b/configs/benchmarks/classification/_base_/schedules/sgd_steplr-100e.py
@ -0,0 +1,8 @@
+# optimizer
+optimizer = dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=1e-4)
+
+# learning policy
+lr_config = dict(policy='step', step=[60, 80])
+
+# runtime settings
+runner = dict(type='EpochBasedRunner', max_epochs=100)
--- a/configs/benchmarks/classification/cifar/resnet50_8xb128-steplr-350e_cifar10.py
+++ b/configs/benchmarks/classification/cifar/resnet50_8xb128-steplr-350e_cifar10.py
@ -0,0 +1,19 @@
+_base_ = [
+    '../_base_/models/resnet50.py',
+    '../_base_/datasets/cifar10.py',
+    '../_base_/schedules/sgd_steplr-100e.py',
+    '../_base_/default_runtime.py',
+]
+
+# model settings
+model = dict(head=dict(num_classes=10))
+
+# optimizer
+optimizer = dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=5e-4)
+
+# learning policy
+lr_config = dict(policy='step', step=[150, 250])
+
+# runtime settings
+runner = dict(type='EpochBasedRunner', max_epochs=350)
+checkpoint_config = dict(interval=50)
--- a/configs/benchmarks/classification/imagenet/imagenet_10percent/resnet50-sobel_4xb64-steplr1e-2-20e_in1k-10pct.py
+++ b/configs/benchmarks/classification/imagenet/imagenet_10percent/resnet50-sobel_4xb64-steplr1e-2-20e_in1k-10pct.py
@ -0,0 +1,4 @@
+_base_ = 'resnet50_head1_4xb64-steplr1e-2-20e_in1k-10pct.py'
+
+# model settings
+model = dict(with_sobel=True, backbone=dict(in_channels=2))
--- a/configs/benchmarks/classification/imagenet/imagenet_10percent/resnet50_head100_4xb64-steplr1e-1-20e_in1k-10pct.py
+++ b/configs/benchmarks/classification/imagenet/imagenet_10percent/resnet50_head100_4xb64-steplr1e-1-20e_in1k-10pct.py
@ -0,0 +1,4 @@
+_base_ = 'resnet50_head1_4xb64-steplr1e-1-20e_in1k-10pct.py'
+
+# optimizer
+optimizer = dict(paramwise_options={'\\Ahead.': dict(lr_mult=100)})
--- a/configs/benchmarks/classification/imagenet/imagenet_10percent/resnet50_head100_4xb64-steplr1e-2-20e_in1k-10pct.py
+++ b/configs/benchmarks/classification/imagenet/imagenet_10percent/resnet50_head100_4xb64-steplr1e-2-20e_in1k-10pct.py
@ -0,0 +1,4 @@
+_base_ = 'resnet50_head1_4xb64-steplr1e-1-20e_in1k-10pct.py'
+
+# optimizer
+optimizer = dict(lr=0.01, paramwise_options={'\\Ahead.': dict(lr_mult=100)})
--- a/configs/benchmarks/classification/imagenet/imagenet_10percent/resnet50_head100_4xb64-steplr1e-3-20e_in1k-10pct.py
+++ b/configs/benchmarks/classification/imagenet/imagenet_10percent/resnet50_head100_4xb64-steplr1e-3-20e_in1k-10pct.py
@ -0,0 +1,4 @@
+_base_ = 'resnet50_head1_4xb64-steplr1e-1-20e_in1k-10pct.py'
+
+# optimizer
+optimizer = dict(lr=0.001, paramwise_options={'\\Ahead.': dict(lr_mult=100)})
--- a/configs/benchmarks/classification/imagenet/imagenet_10percent/resnet50_head10_4xb64-steplr1e-1-20e_in1k-10pct.py
+++ b/configs/benchmarks/classification/imagenet/imagenet_10percent/resnet50_head10_4xb64-steplr1e-1-20e_in1k-10pct.py
@ -0,0 +1,4 @@
+_base_ = 'resnet50_head1_4xb64-steplr1e-1-20e_in1k-10pct.py'
+
+# optimizer
+optimizer = dict(paramwise_options={'\\Ahead.': dict(lr_mult=10)})
--- a/configs/benchmarks/classification/imagenet/imagenet_10percent/resnet50_head10_4xb64-steplr1e-2-20e_in1k-10pct.py
+++ b/configs/benchmarks/classification/imagenet/imagenet_10percent/resnet50_head10_4xb64-steplr1e-2-20e_in1k-10pct.py
@ -0,0 +1,4 @@
+_base_ = 'resnet50_head1_4xb64-steplr1e-1-20e_in1k-10pct.py'
+
+# optimizer
+optimizer = dict(lr=0.01, paramwise_options={'\\Ahead.': dict(lr_mult=10)})
--- a/configs/benchmarks/classification/imagenet/imagenet_10percent/resnet50_head10_4xb64-steplr1e-3-20e_in1k-10pct.py
+++ b/configs/benchmarks/classification/imagenet/imagenet_10percent/resnet50_head10_4xb64-steplr1e-3-20e_in1k-10pct.py
@ -0,0 +1,4 @@
+_base_ = 'resnet50_head1_4xb64-steplr1e-1-20e_in1k-10pct.py'
+
+# optimizer
+optimizer = dict(lr=0.001, paramwise_options={'\\Ahead.': dict(lr_mult=10)})
--- a/configs/benchmarks/classification/imagenet/imagenet_10percent/resnet50_head1_4xb64-steplr1e-1-20e_in1k-10pct.py
+++ b/configs/benchmarks/classification/imagenet/imagenet_10percent/resnet50_head1_4xb64-steplr1e-1-20e_in1k-10pct.py
@ -0,0 +1,34 @@
+_base_ = [
+    '../../_base_/models/resnet50.py',
+    '../../_base_/datasets/imagenet.py',
+    '../../_base_/schedules/sgd_steplr-100e.py',
+    '../../_base_/default_runtime.py',
+]
+
+# model settings
+model = dict(backbone=dict(norm_cfg=dict(type='SyncBN')))
+
+# dataset settings
+data = dict(
+    imgs_per_gpu=64,  # total 64x4=256
+    train=dict(
+        data_source=dict(ann_file='data/imagenet/meta/train_10pct.txt')))
+
+# optimizer
+optimizer = dict(
+    type='SGD',
+    lr=0.1,
+    momentum=0.9,
+    weight_decay=1e-4,
+    paramwise_options={'\\Ahead.': dict(lr_mult=1)})
+
+# learning policy
+lr_config = dict(policy='step', step=[12, 16], gamma=0.2)
+
+# runtime settings
+runner = dict(type='EpochBasedRunner', max_epochs=20)
+checkpoint_config = dict(interval=10)
+log_config = dict(
+    interval=10,
+    hooks=[dict(type='TextLoggerHook'),
+           dict(type='TensorboardLoggerHook')])
--- a/configs/benchmarks/classification/imagenet/imagenet_10percent/resnet50_head1_4xb64-steplr1e-2-20e_in1k-10pct.py
+++ b/configs/benchmarks/classification/imagenet/imagenet_10percent/resnet50_head1_4xb64-steplr1e-2-20e_in1k-10pct.py
@ -0,0 +1,4 @@
+_base_ = 'resnet50_head1_4xb64-steplr1e-1-20e_in1k-10pct.py'
+
+# optimizer
+optimizer = dict(lr=0.01)
--- a/configs/benchmarks/classification/imagenet/imagenet_10percent/resnet50_head1_4xb64-steplr1e-3-20e_in1k-10pct.py
+++ b/configs/benchmarks/classification/imagenet/imagenet_10percent/resnet50_head1_4xb64-steplr1e-3-20e_in1k-10pct.py
@ -0,0 +1,4 @@
+_base_ = 'resnet50_head1_4xb64-steplr1e-1-20e_in1k-10pct.py'
+
+# optimizer
+optimizer = dict(lr=0.001)
--- a/configs/benchmarks/classification/imagenet/imagenet_1percent/resnet50-sobel_4xb64-steplr1e-2-20e_in1k-1pct.py
+++ b/configs/benchmarks/classification/imagenet/imagenet_1percent/resnet50-sobel_4xb64-steplr1e-2-20e_in1k-1pct.py
@ -0,0 +1,4 @@
+_base_ = 'resnet50_head1_4xb64-steplr1e-2-20e_in1k-1pct.py'
+
+# model settings
+model = dict(with_sobel=True, backbone=dict(in_channels=2))
--- a/configs/benchmarks/classification/imagenet/imagenet_1percent/resnet50_head100_4xb64-steplr1e-1-20e_in1k-1pct.py
+++ b/configs/benchmarks/classification/imagenet/imagenet_1percent/resnet50_head100_4xb64-steplr1e-1-20e_in1k-1pct.py
@ -0,0 +1,4 @@
+_base_ = 'resnet50_head1_4xb64-steplr1e-1-20e_in1k-1pct.py'
+
+# optimizer
+optimizer = dict(paramwise_options={'\\Ahead.': dict(lr_mult=100)})
--- a/configs/benchmarks/classification/imagenet/imagenet_1percent/resnet50_head100_4xb64-steplr1e-2-20e_in1k-1pct.py
+++ b/configs/benchmarks/classification/imagenet/imagenet_1percent/resnet50_head100_4xb64-steplr1e-2-20e_in1k-1pct.py
@ -0,0 +1,4 @@
+_base_ = 'resnet50_head1_4xb64-steplr1e-1-20e_in1k-1pct.py'
+
+# optimizer
+optimizer = dict(lr=0.01, paramwise_options={'\\Ahead.': dict(lr_mult=100)})
--- a/configs/benchmarks/classification/imagenet/imagenet_1percent/resnet50_head100_4xb64-steplr1e-3-20e_in1k-1pct.py
+++ b/configs/benchmarks/classification/imagenet/imagenet_1percent/resnet50_head100_4xb64-steplr1e-3-20e_in1k-1pct.py
@ -0,0 +1,4 @@
+_base_ = 'resnet50_head1_4xb64-steplr1e-1-20e_in1k-1pct.py'
+
+# optimizer
+optimizer = dict(lr=0.001, paramwise_options={'\\Ahead.': dict(lr_mult=100)})
--- a/configs/benchmarks/classification/imagenet/imagenet_1percent/resnet50_head10_4xb64-steplr1e-1-20e_in1k-1pct.py
+++ b/configs/benchmarks/classification/imagenet/imagenet_1percent/resnet50_head10_4xb64-steplr1e-1-20e_in1k-1pct.py
@ -0,0 +1,4 @@
+_base_ = 'resnet50_head1_4xb64-steplr1e-1-20e_in1k-1pct.py'
+
+# optimizer
+optimizer = dict(paramwise_options={'\\Ahead.': dict(lr_mult=10)})
--- a/configs/benchmarks/classification/imagenet/imagenet_1percent/resnet50_head10_4xb64-steplr1e-2-20e_in1k-1pct.py
+++ b/configs/benchmarks/classification/imagenet/imagenet_1percent/resnet50_head10_4xb64-steplr1e-2-20e_in1k-1pct.py
@ -0,0 +1,4 @@
+_base_ = 'resnet50_head1_4xb64-steplr1e-1-20e_in1k-1pct.py'
+
+# optimizer
+optimizer = dict(lr=0.01, paramwise_options={'\\Ahead.': dict(lr_mult=10)})
--- a/configs/benchmarks/classification/imagenet/imagenet_1percent/resnet50_head10_4xb64-steplr1e-3-20e_in1k-1pct.py
+++ b/configs/benchmarks/classification/imagenet/imagenet_1percent/resnet50_head10_4xb64-steplr1e-3-20e_in1k-1pct.py
@ -0,0 +1,4 @@
+_base_ = 'resnet50_head1_4xb64-steplr1e-1-20e_in1k-1pct.py'
+
+# optimizer
+optimizer = dict(lr=0.001, paramwise_options={'\\Ahead.': dict(lr_mult=10)})
--- a/configs/benchmarks/classification/imagenet/imagenet_1percent/resnet50_head1_4xb64-steplr1e-1-20e_in1k-1pct.py
+++ b/configs/benchmarks/classification/imagenet/imagenet_1percent/resnet50_head1_4xb64-steplr1e-1-20e_in1k-1pct.py
@ -0,0 +1,34 @@
+_base_ = [
+    '../../_base_/models/resnet50.py',
+    '../../_base_/datasets/imagenet.py',
+    '../../_base_/schedules/sgd_steplr-100e.py',
+    '../../_base_/default_runtime.py',
+]
+
+# model settings
+model = dict(backbone=dict(norm_cfg=dict(type='SyncBN')))
+
+# dataset settings
+data = dict(
+    imgs_per_gpu=64,  # total 64x4=256
+    train=dict(
+        data_source=dict(ann_file='data/imagenet/meta/train_1percent.txt')))
+
+# optimizer
+optimizer = dict(
+    type='SGD',
+    lr=0.1,
+    momentum=0.9,
+    weight_decay=5e-4,
+    paramwise_options={'\\Ahead.': dict(lr_mult=1)})
+
+# learning policy
+lr_config = dict(policy='step', step=[12, 16], gamma=0.2)
+
+# runtime settings
+runner = dict(type='EpochBasedRunner', max_epochs=20)
+checkpoint_config = dict(interval=10)
+log_config = dict(
+    interval=10,
+    hooks=[dict(type='TextLoggerHook'),
+           dict(type='TensorboardLoggerHook')])
--- a/configs/benchmarks/classification/imagenet/imagenet_1percent/resnet50_head1_4xb64-steplr1e-2-20e_in1k-1pct.py
+++ b/configs/benchmarks/classification/imagenet/imagenet_1percent/resnet50_head1_4xb64-steplr1e-2-20e_in1k-1pct.py
@ -0,0 +1,4 @@
+_base_ = 'resnet50_head1_4xb64-steplr1e-1-20e_in1k-1pct.py'
+
+# optimizer
+optimizer = dict(lr=0.01)
--- a/configs/benchmarks/classification/imagenet/imagenet_1percent/resnet50_head1_4xb64-steplr1e-3-20e_in1k-1pct.py
+++ b/configs/benchmarks/classification/imagenet/imagenet_1percent/resnet50_head1_4xb64-steplr1e-3-20e_in1k-1pct.py
@ -0,0 +1,4 @@
+_base_ = 'resnet50_head1_4xb64-steplr1e-1-20e_in1k-1pct.py'
+
+# optimizer
+optimizer = dict(lr=0.001)
--- a/configs/benchmarks/classification/imagenet/resnet50-nofrz_8xb32-steplr-90e_in1k.py
+++ b/configs/benchmarks/classification/imagenet/resnet50-nofrz_8xb32-steplr-90e_in1k.py
@ -0,0 +1,19 @@
+_base_ = [
+    '../_base_/models/resnet50.py',
+    '../_base_/datasets/imagenet.py',
+    '../_base_/schedules/sgd_steplr-100e.py',
+    '../_base_/default_runtime.py',
+]
+
+# model settings
+model = dict(backbone=dict(norm_cfg=dict(type='SyncBN')))
+
+# learning policy
+lr_config = dict(step=[30, 60, 90])
+
+# runtime settings
+runner = dict(type='EpochBasedRunner', max_epochs=90)
+# the max_keep_ckpts controls the max number of ckpt file in your work_dirs
+# if it is 3, when CheckpointHook (in mmcv) saves the 4th ckpt
+# it will remove the oldest one to keep the number of total ckpts as 3
+checkpoint_config = dict(interval=10, max_keep_ckpts=3, out_dir='s3://results')
--- a/configs/benchmarks/classification/imagenet/resnet50-sobel_8xb32-steplr-100e_in1k.py
+++ b/configs/benchmarks/classification/imagenet/resnet50-sobel_8xb32-steplr-100e_in1k.py
@ -0,0 +1,4 @@
+_base_ = 'resnet50_8xb32-steplr-100e_in1k.py'
+
+# model settings
+model = dict(with_sobel=True, backbone=dict(in_channels=2, frozen_stages=4))
--- a/configs/benchmarks/classification/imagenet/resnet50-sobel_mhead_8xb32-steplr-90e_in1k.py
+++ b/configs/benchmarks/classification/imagenet/resnet50-sobel_mhead_8xb32-steplr-90e_in1k.py
@ -0,0 +1,4 @@
+_base_ = 'resnet50_mhead_8xb32-steplr-90e_in1k.py'
+
+# model settings
+model = dict(with_sobel=True, backbone=dict(in_channels=2, frozen_stages=4))
--- a/configs/benchmarks/classification/imagenet/resnet50_8xb32-coslr-100e_in1k.py
+++ b/configs/benchmarks/classification/imagenet/resnet50_8xb32-coslr-100e_in1k.py
@ -0,0 +1,15 @@
+_base_ = [
+    '../_base_/models/resnet50.py',
+    '../_base_/datasets/imagenet.py',
+    '../_base_/schedules/sgd_coslr-100e.py',
+    '../_base_/default_runtime.py',
+]
+
+model = dict(backbone=dict(frozen_stages=4))
+
+# swav setting
+# runtime settings
+# the max_keep_ckpts controls the max number of ckpt file in your work_dirs
+# if it is 3, when CheckpointHook (in mmcv) saves the 4th ckpt
+# it will remove the oldest one to keep the number of total ckpts as 3
+checkpoint_config = dict(interval=10, max_keep_ckpts=3)
--- a/configs/benchmarks/classification/imagenet/resnet50_8xb32-steplr-100e_in1k.py
+++ b/configs/benchmarks/classification/imagenet/resnet50_8xb32-steplr-100e_in1k.py
@ -0,0 +1,20 @@
+_base_ = [
+    '../_base_/models/resnet50.py',
+    '../_base_/datasets/imagenet.py',
+    '../_base_/schedules/sgd_steplr-100e.py',
+    '../_base_/default_runtime.py',
+]
+
+model = dict(backbone=dict(frozen_stages=4))
+
+evaluation = dict(interval=1, topk=(1, 5))
+
+# moco setting
+# optimizer
+optimizer = dict(type='SGD', lr=30., momentum=0.9, weight_decay=0.)
+
+# runtime settings
+# the max_keep_ckpts controls the max number of ckpt file in your work_dirs
+# if it is 3, when CheckpointHook (in mmcv) saves the 4th ckpt
+# it will remove the oldest one to keep the number of total ckpts as 3
+checkpoint_config = dict(interval=10, max_keep_ckpts=3)
--- a/configs/benchmarks/classification/imagenet/resnet50_8xb512-coslr-90e_in1k.py
+++ b/configs/benchmarks/classification/imagenet/resnet50_8xb512-coslr-90e_in1k.py
@ -0,0 +1,18 @@
+_base_ = [
+    '../_base_/models/resnet50.py',
+    '../_base_/datasets/imagenet.py',
+    '../_base_/schedules/lars_coslr-90e.py',
+    '../_base_/default_runtime.py',
+]
+
+model = dict(backbone=dict(frozen_stages=4))
+
+# dataset summary
+data = dict(imgs_per_gpu=512)  # total 512*8=4096, 8GPU linear cls
+
+# simsiam setting
+# runtime settings
+# the max_keep_ckpts controls the max number of ckpt file in your work_dirs
+# if it is 3, when CheckpointHook (in mmcv) saves the 4th ckpt
+# it will remove the oldest one to keep the number of total ckpts as 3
+checkpoint_config = dict(interval=10, max_keep_ckpts=3)
--- a/configs/benchmarks/classification/imagenet/resnet50_mhead_8xb32-steplr-90e_in1k.py
+++ b/configs/benchmarks/classification/imagenet/resnet50_mhead_8xb32-steplr-90e_in1k.py
@ -0,0 +1,48 @@
+_base_ = [
+    '../_base_/models/resnet50_multihead.py',
+    '../_base_/datasets/imagenet.py',
+    '../_base_/schedules/sgd_steplr-100e.py',
+    '../_base_/default_runtime.py',
+]
+
+model = dict(backbone=dict(frozen_stages=4))
+
+# dataset settings
+img_norm_cfg = dict(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
+train_pipeline = [
+    dict(type='RandomResizedCrop', size=224),
+    dict(type='RandomHorizontalFlip'),
+    dict(
+        type='ColorJitter',
+        brightness=0.4,
+        contrast=0.4,
+        saturation=0.4,
+        hue=0.),
+    dict(type='ToTensor'),
+    dict(type='Lighting'),
+    dict(type='Normalize', **img_norm_cfg),
+]
+test_pipeline = [
+    dict(type='Resize', size=256),
+    dict(type='CenterCrop', size=224),
+    dict(type='ToTensor'),
+    dict(type='Normalize', **img_norm_cfg),
+]
+data = dict(
+    train=dict(pipeline=train_pipeline), val=dict(pipeline=test_pipeline))
+
+# optimizer
+optimizer = dict(
+    type='SGD',
+    lr=0.01,
+    momentum=0.9,
+    weight_decay=1e-4,
+    paramwise_options=dict(norm_decay_mult=0.),
+    nesterov=True)
+
+# learning policy
+lr_config = dict(policy='step', step=[30, 60, 90])
+
+# runtime settings
+runner = dict(type='EpochBasedRunner', max_epochs=90)
+checkpoint_config = dict(interval=10)
--- a/configs/benchmarks/classification/inaturalist2018/resnet50-sobel_mhead_8xb32-steplr-84e_inat18.py
+++ b/configs/benchmarks/classification/inaturalist2018/resnet50-sobel_mhead_8xb32-steplr-84e_inat18.py
@ -0,0 +1,4 @@
+_base_ = 'resnet50_mhead_8xb32-steplr-84e_inat18.py'
+
+# model settings
+model = dict(with_sobel=True, backbone=dict(in_channels=2))
--- a/configs/benchmarks/classification/inaturalist2018/resnet50_mhead_8xb32-steplr-84e_inat18.py
+++ b/configs/benchmarks/classification/inaturalist2018/resnet50_mhead_8xb32-steplr-84e_inat18.py
@ -0,0 +1,32 @@
+_base_ = [
+    '../_base_/models/resnet50_multihead.py',
+    '../_base_/datasets/inaturalist2018.py',
+    '../_base_/schedules/sgd_steplr-100e.py',
+    '../_base_/default_runtime.py',
+]
+
+# model settings
+model = dict(
+    backbone=dict(frozen_stages=4),
+    head=dict(
+        norm_cfg=dict(type='SyncBN', momentum=0.1, affine=False),
+        num_classes=8142))
+
+# optimizer
+optimizer = dict(
+    type='SGD',
+    lr=0.01,
+    momentum=0.9,
+    weight_decay=1e-4,
+    paramwise_options=dict(norm_decay_mult=0.),
+    nesterov=True)
+
+# learning policy
+lr_config = dict(policy='step', step=[24, 48, 72])
+
+# runtime settings
+runner = dict(type='EpochBasedRunner', max_epochs=84)
+# the max_keep_ckpts controls the max number of ckpt file in your work_dirs
+# if it is 3, when CheckpointHook (in mmcv) saves the 4th ckpt
+# it will remove the oldest one to keep the number of total ckpts as 3
+checkpoint_config = dict(interval=1, max_keep_ckpts=3)
--- a/configs/benchmarks/classification/places205/resnet50-sobel_mhead_8xb32-steplr-28e_places205.py
+++ b/configs/benchmarks/classification/places205/resnet50-sobel_mhead_8xb32-steplr-28e_places205.py
@ -0,0 +1,4 @@
+_base_ = 'resnet50_mhead_8xb32-steplr-28e_places205.py'
+
+# model settings
+model = dict(with_sobel=True, backbone=dict(in_channels=2))
--- a/configs/benchmarks/classification/places205/resnet50_mhead_8xb32-steplr-28e_places205.py
+++ b/configs/benchmarks/classification/places205/resnet50_mhead_8xb32-steplr-28e_places205.py
@ -0,0 +1,32 @@
+_base_ = [
+    '../_base_/models/resnet50_multihead.py',
+    '../_base_/datasets/places205.py',
+    '../_base_/schedules/sgd_steplr-100e.py',
+    '../_base_/default_runtime.py',
+]
+
+# model settings
+model = dict(
+    backbone=dict(frozen_stages=4),
+    head=dict(
+        norm_cfg=dict(type='SyncBN', momentum=0.1, affine=False),
+        num_classes=205))
+
+# optimizer
+optimizer = dict(
+    type='SGD',
+    lr=0.01,
+    momentum=0.9,
+    weight_decay=1e-4,
+    paramwise_options=dict(norm_decay_mult=0.),
+    nesterov=True)
+
+# learning policy
+lr_config = dict(policy='step', step=[7, 14, 21])
+
+# runtime settings
+runner = dict(type='EpochBasedRunner', max_epochs=28)
+# the max_keep_ckpts controls the max number of ckpt file in your work_dirs
+# if it is 3, when CheckpointHook (in mmcv) saves the 4th ckpt
+# it will remove the oldest one to keep the number of total ckpts as 3
+checkpoint_config = dict(interval=1, max_keep_ckpts=3)
--- a/configs/benchmarks/classification/svm_voc07.py
+++ b/configs/benchmarks/classification/svm_voc07.py
@ -0,0 +1,22 @@
+data_source = 'ImageList'
+dataset_type = 'SingleViewDataset'
+split_at = [5011]
+split_name = ['voc07_trainval', 'voc07_test']
+img_norm_cfg = dict(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
+
+data = dict(
+    imgs_per_gpu=32,
+    workers_per_gpu=4,
+    extract=dict(
+        type=dataset_type,
+        data_source=dict(
+            type=data_source,
+            data_prefix='data/VOCdevkit/VOC2007/JPEGImages',
+            ann_file='data/VOCdevkit/VOC2007/Lists/trainvaltest.txt',
+        ),
+        pipeline=[
+            dict(type='Resize', size=256),
+            dict(type='Resize', size=(224, 224)),
+            dict(type='ToTensor'),
+            dict(type='Normalize', **img_norm_cfg),
+        ]))
--- a/configs/benchmarks/classification/tsne_imagenet.py
+++ b/configs/benchmarks/classification/tsne_imagenet.py
@ -0,0 +1,21 @@
+data_source = 'ImageNet'
+dataset_type = 'SingleViewDataset'
+name = 'imagenet_val'
+img_norm_cfg = dict(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
+
+data = dict(
+    imgs_per_gpu=8,
+    workers_per_gpu=4,
+    extract=dict(
+        type='SingleViewDataset',
+        data_source=dict(
+            type=data_source,
+            data_prefix='data/imagenet/val',
+            ann_file='data/imagenet/meta/val.txt',
+        ),
+        pipeline=[
+            dict(type='Resize', size=256),
+            dict(type='CenterCrop', size=224),
+            dict(type='ToTensor'),
+            dict(type='Normalize', **img_norm_cfg),
+        ]))
--- a/configs/benchmarks/detectron2/Base-Keypoint-RCNN-FPN.yaml
+++ b/configs/benchmarks/detectron2/Base-Keypoint-RCNN-FPN.yaml
@ -0,0 +1,15 @@
+_BASE_: "Base-RCNN-FPN.yaml"
+MODEL:
+  KEYPOINT_ON: True
+  ROI_HEADS:
+    NUM_CLASSES: 1
+  ROI_BOX_HEAD:
+    SMOOTH_L1_BETA: 0.5  # Keypoint AP degrades (though box AP improves) when using plain L1 loss
+  RPN:
+    # Detectron1 uses 2000 proposals per-batch, but this option is per-image in detectron2.
+    # 1000 proposals per-image is found to hurt box AP.
+    # Therefore we increase it to 1500 per-image.
+    POST_NMS_TOPK_TRAIN: 1500
+DATASETS:
+  TRAIN: ("keypoints_coco_2017_train",)
+  TEST: ("keypoints_coco_2017_val",)
--- a/configs/benchmarks/detectron2/Base-RCNN-C4-BN.yaml
+++ b/configs/benchmarks/detectron2/Base-RCNN-C4-BN.yaml
@ -0,0 +1,17 @@
+MODEL:
+  META_ARCHITECTURE: "GeneralizedRCNN"
+  RPN:
+    PRE_NMS_TOPK_TEST: 6000
+    POST_NMS_TOPK_TEST: 1000
+  ROI_HEADS:
+    NAME: "Res5ROIHeadsExtraNorm"
+  BACKBONE:
+    FREEZE_AT: 0
+  RESNETS:
+    NORM: "SyncBN"
+TEST:
+  PRECISE_BN:
+    ENABLED: True
+SOLVER:
+  IMS_PER_BATCH: 16
+  BASE_LR: 0.02
--- a/configs/benchmarks/detectron2/Base-RCNN-FPN.yaml
+++ b/configs/benchmarks/detectron2/Base-RCNN-FPN.yaml
@ -0,0 +1,42 @@
+MODEL:
+  META_ARCHITECTURE: "GeneralizedRCNN"
+  BACKBONE:
+    NAME: "build_resnet_fpn_backbone"
+  RESNETS:
+    OUT_FEATURES: ["res2", "res3", "res4", "res5"]
+  FPN:
+    IN_FEATURES: ["res2", "res3", "res4", "res5"]
+  ANCHOR_GENERATOR:
+    SIZES: [[32], [64], [128], [256], [512]]  # One size for each in feature map
+    ASPECT_RATIOS: [[0.5, 1.0, 2.0]]  # Three aspect ratios (same for all in feature maps)
+  RPN:
+    IN_FEATURES: ["p2", "p3", "p4", "p5", "p6"]
+    PRE_NMS_TOPK_TRAIN: 2000  # Per FPN level
+    PRE_NMS_TOPK_TEST: 1000  # Per FPN level
+    # Detectron1 uses 2000 proposals per-batch,
+    # (See "modeling/rpn/rpn_outputs.py" for details of this legacy issue)
+    # which is approximately 1000 proposals per-image since the default batch size for FPN is 2.
+    POST_NMS_TOPK_TRAIN: 1000
+    POST_NMS_TOPK_TEST: 1000
+  ROI_HEADS:
+    NAME: "StandardROIHeads"
+    IN_FEATURES: ["p2", "p3", "p4", "p5"]
+  ROI_BOX_HEAD:
+    NAME: "FastRCNNConvFCHead"
+    NUM_FC: 2
+    POOLER_RESOLUTION: 7
+  ROI_MASK_HEAD:
+    NAME: "MaskRCNNConvUpsampleHead"
+    NUM_CONV: 4
+    POOLER_RESOLUTION: 14
+DATASETS:
+  TRAIN: ("coco_2017_train",)
+  TEST: ("coco_2017_val",)
+SOLVER:
+  IMS_PER_BATCH: 16
+  BASE_LR: 0.02
+  STEPS: (60000, 80000)
+  MAX_ITER: 90000
+INPUT:
+  MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
+VERSION: 2
--- a/configs/benchmarks/detectron2/Base-RetinaNet.yaml
+++ b/configs/benchmarks/detectron2/Base-RetinaNet.yaml
@ -0,0 +1,25 @@
+MODEL:
+  META_ARCHITECTURE: "RetinaNet"
+  BACKBONE:
+    NAME: "build_retinanet_resnet_fpn_backbone"
+  RESNETS:
+    OUT_FEATURES: ["res3", "res4", "res5"]
+  ANCHOR_GENERATOR:
+    SIZES: !!python/object/apply:eval ["[[x, x * 2**(1.0/3), x * 2**(2.0/3) ] for x in [32, 64, 128, 256, 512 ]]"]
+  FPN:
+    IN_FEATURES: ["res3", "res4", "res5"]
+  RETINANET:
+    IOU_THRESHOLDS: [0.4, 0.5]
+    IOU_LABELS: [0, -1, 1]
+    SMOOTH_L1_LOSS_BETA: 0.0
+DATASETS:
+  TRAIN: ("coco_2017_train",)
+  TEST: ("coco_2017_val",)
+SOLVER:
+  IMS_PER_BATCH: 16
+  BASE_LR: 0.01  # Note that RetinaNet uses a different default learning rate
+  STEPS: (60000, 80000)
+  MAX_ITER: 90000
+INPUT:
+  MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
+VERSION: 2
--- a/configs/benchmarks/detectron2/Cityscapes/mask_rcnn_R_50_FPN.yaml
+++ b/configs/benchmarks/detectron2/Cityscapes/mask_rcnn_R_50_FPN.yaml
@ -0,0 +1,30 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  MASK_ON: True
+  ROI_HEADS:
+    NUM_CLASSES: 8
+  BACKBONE:
+    FREEZE_AT: 0
+  RESNETS:
+    DEPTH: 50
+    NORM: "SyncBN"
+  FPN:
+    NORM: "SyncBN"
+INPUT:
+  MIN_SIZE_TRAIN: (800, 832, 864, 896, 928, 960, 992, 1024)
+  MIN_SIZE_TRAIN_SAMPLING: "choice"
+  MIN_SIZE_TEST: 1024
+  MAX_SIZE_TRAIN: 2048
+  MAX_SIZE_TEST: 2048
+DATASETS:
+  TRAIN: ("cityscapes_fine_instance_seg_train",)
+  TEST: ("cityscapes_fine_instance_seg_val",)
+SOLVER:
+  BASE_LR: 0.01
+  STEPS: (18000,)
+  MAX_ITER: 24000
+  IMS_PER_BATCH: 8
+TEST:
+  PRECISE_BN:
+    ENABLED: True
--- a/configs/benchmarks/detectron2/Cityscapes/mask_rcnn_R_50_FPN_moco.yaml
+++ b/configs/benchmarks/detectron2/Cityscapes/mask_rcnn_R_50_FPN_moco.yaml
@ -0,0 +1,9 @@
+_BASE_: "mask_rcnn_R_50_FPN.yaml"
+MODEL:
+  PIXEL_MEAN: [123.675, 116.280, 103.530]
+  PIXEL_STD: [58.395, 57.120, 57.375]
+  WEIGHTS: "See Instructions"
+  RESNETS:
+    STRIDE_IN_1X1: False
+INPUT:
+  FORMAT: "RGB"
--- a/configs/benchmarks/detectron2/coco_R_50_C4_1x.yaml
+++ b/configs/benchmarks/detectron2/coco_R_50_C4_1x.yaml
@ -0,0 +1,4 @@
+_BASE_: "coco_R_50_C4_2x.yaml"
+SOLVER:
+  STEPS: (60000, 80000)
+  MAX_ITER: 90000
--- a/configs/benchmarks/detectron2/coco_R_50_C4_1x_moco.yaml
+++ b/configs/benchmarks/detectron2/coco_R_50_C4_1x_moco.yaml
@ -0,0 +1,4 @@
+_BASE_: "coco_R_50_C4_2x_moco.yaml"
+SOLVER:
+  STEPS: (60000, 80000)
+  MAX_ITER: 90000
--- a/configs/benchmarks/detectron2/coco_R_50_C4_2x.yaml
+++ b/configs/benchmarks/detectron2/coco_R_50_C4_2x.yaml
@ -0,0 +1,13 @@
+_BASE_: "Base-RCNN-C4-BN.yaml"
+MODEL:
+  MASK_ON: True
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+INPUT:
+  MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
+  MIN_SIZE_TEST: 800
+DATASETS:
+  TRAIN: ("coco_2017_train",)
+  TEST: ("coco_2017_val",)
+SOLVER:
+  STEPS: (120000, 160000)
+  MAX_ITER: 180000
--- a/configs/benchmarks/detectron2/coco_R_50_C4_2x_moco.yaml
+++ b/configs/benchmarks/detectron2/coco_R_50_C4_2x_moco.yaml
@ -0,0 +1,10 @@
+_BASE_: "coco_R_50_C4_2x.yaml"
+MODEL:
+  PIXEL_MEAN: [123.675, 116.280, 103.530]
+  PIXEL_STD: [58.395, 57.120, 57.375]
+  WEIGHTS: "See Instructions"
+  RESNETS:
+    STRIDE_IN_1X1: False
+INPUT:
+  MAX_SIZE_TRAIN: 1200
+  FORMAT: "RGB"
--- a/configs/benchmarks/detectron2/coco_R_50_FPN_1x.yaml
+++ b/configs/benchmarks/detectron2/coco_R_50_FPN_1x.yaml
@ -0,0 +1,17 @@
+_BASE_: "Base-RCNN-FPN.yaml"
+MODEL:
+  MASK_ON: True
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  BACKBONE:
+    FREEZE_AT: 0
+  RESNETS:
+    DEPTH: 50
+    NORM: "SyncBN"
+  FPN:
+    NORM: "SyncBN"
+TEST:
+  PRECISE_BN:
+    ENABLED: True
+SOLVER:
+  STEPS: (60000, 80000)
+  MAX_ITER: 90000
--- a/configs/benchmarks/detectron2/coco_R_50_FPN_1x_moco.yaml
+++ b/configs/benchmarks/detectron2/coco_R_50_FPN_1x_moco.yaml
@ -0,0 +1,9 @@
+_BASE_: "coco_R_50_FPN_1x.yaml"
+MODEL:
+  PIXEL_MEAN: [123.675, 116.280, 103.530]
+  PIXEL_STD: [58.395, 57.120, 57.375]
+  WEIGHTS: "See Instructions"
+  RESNETS:
+    STRIDE_IN_1X1: False
+INPUT:
+  FORMAT: "RGB"
--- a/configs/benchmarks/detectron2/coco_R_50_FPN_2x.yaml
+++ b/configs/benchmarks/detectron2/coco_R_50_FPN_2x.yaml
@ -0,0 +1,4 @@
+_BASE_: "coco_R_50_FPN_1x.yaml"
+SOLVER:
+  STEPS: (120000, 160000)
+  MAX_ITER: 180000
--- a/configs/benchmarks/detectron2/coco_R_50_FPN_2x_moco.yaml
+++ b/configs/benchmarks/detectron2/coco_R_50_FPN_2x_moco.yaml
@ -0,0 +1,4 @@
+_BASE_: "coco_R_50_FPN_1x_moco.yaml"
+SOLVER:
+  STEPS: (120000, 160000)
+  MAX_ITER: 180000
--- a/configs/benchmarks/detectron2/coco_R_50_RetinaNet_1x.yaml
+++ b/configs/benchmarks/detectron2/coco_R_50_RetinaNet_1x.yaml
@ -0,0 +1,13 @@
+_BASE_: "Base-RetinaNet.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  BACKBONE:
+    FREEZE_AT: 0
+  RESNETS:
+    DEPTH: 50
+    NORM: "SyncBN"
+  FPN:
+    NORM: "SyncBN"
+TEST:
+  PRECISE_BN:
+    ENABLED: True
--- a/configs/benchmarks/detectron2/coco_R_50_RetinaNet_1x_moco.yaml
+++ b/configs/benchmarks/detectron2/coco_R_50_RetinaNet_1x_moco.yaml
@ -0,0 +1,9 @@
+_BASE_: "coco_R_50_RetinaNet_1x.yaml"
+MODEL:
+  PIXEL_MEAN: [123.675, 116.280, 103.530]
+  PIXEL_STD: [58.395, 57.120, 57.375]
+  WEIGHTS: "See Instructions"
+  RESNETS:
+    STRIDE_IN_1X1: False
+INPUT:
+  FORMAT: "RGB"
--- a/configs/benchmarks/detectron2/coco_R_50_RetinaNet_2x.yaml
+++ b/configs/benchmarks/detectron2/coco_R_50_RetinaNet_2x.yaml
@ -0,0 +1,4 @@
+_BASE_: "coco_R_50_RetinaNet_1x.yaml"
+SOLVER:
+  STEPS: (120000, 160000)
+  MAX_ITER: 180000
--- a/configs/benchmarks/detectron2/coco_R_50_RetinaNet_2x_moco.yaml
+++ b/configs/benchmarks/detectron2/coco_R_50_RetinaNet_2x_moco.yaml
@ -0,0 +1,4 @@
+_BASE_: "coco_R_50_RetinaNet_1x_moco.yaml"
+SOLVER:
+  STEPS: (120000, 160000)
+  MAX_ITER: 180000
--- a/configs/benchmarks/detectron2/keypoint_rcnn_R_50_FPN_2x.yaml
+++ b/configs/benchmarks/detectron2/keypoint_rcnn_R_50_FPN_2x.yaml
@ -0,0 +1,16 @@
+_BASE_: "Base-Keypoint-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  BACKBONE:
+    FREEZE_AT: 0
+  RESNETS:
+    DEPTH: 50
+    NORM: "SyncBN"
+  FPN:
+    NORM: "SyncBN"
+TEST:
+  PRECISE_BN:
+    ENABLED: True
+SOLVER:
+  STEPS: (120000, 160000)
+  MAX_ITER: 180000
--- a/configs/benchmarks/detectron2/keypoint_rcnn_R_50_FPN_2x_moco.yaml
+++ b/configs/benchmarks/detectron2/keypoint_rcnn_R_50_FPN_2x_moco.yaml
@ -0,0 +1,9 @@
+_BASE_: "keypoint_rcnn_R_50_FPN_2x.yaml"
+MODEL:
+  PIXEL_MEAN: [123.675, 116.280, 103.530]
+  PIXEL_STD: [58.395, 57.120, 57.375]
+  WEIGHTS: "See Instructions"
+  RESNETS:
+    STRIDE_IN_1X1: False
+INPUT:
+  FORMAT: "RGB"
--- a/configs/benchmarks/detectron2/pascal_voc_R_50_C4_24k.yaml
+++ b/configs/benchmarks/detectron2/pascal_voc_R_50_C4_24k.yaml
@ -0,0 +1,16 @@
+_BASE_: "Base-RCNN-C4-BN.yaml"
+MODEL:
+  MASK_ON: False
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  ROI_HEADS:
+    NUM_CLASSES: 20
+INPUT:
+  MIN_SIZE_TRAIN: (480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800)
+  MIN_SIZE_TEST: 800
+DATASETS:
+  TRAIN: ('voc_2007_trainval', 'voc_2012_trainval')
+  TEST: ('voc_2007_test',)
+SOLVER:
+  STEPS: (18000, 22000)
+  MAX_ITER: 24000
+  WARMUP_ITERS: 100
--- a/configs/benchmarks/detectron2/pascal_voc_R_50_C4_24k_moco.yaml
+++ b/configs/benchmarks/detectron2/pascal_voc_R_50_C4_24k_moco.yaml
@ -0,0 +1,9 @@
+_BASE_: "pascal_voc_R_50_C4_24k.yaml"
+MODEL:
+  PIXEL_MEAN: [123.675, 116.280, 103.530]
+  PIXEL_STD: [58.395, 57.120, 57.375]
+  WEIGHTS: "See Instructions"
+  RESNETS:
+    STRIDE_IN_1X1: False
+INPUT:
+  FORMAT: "RGB"
--- a/configs/benchmarks/mmdetection/_base_/datasets/coco_instance.py
+++ b/configs/benchmarks/mmdetection/_base_/datasets/coco_instance.py
@ -0,0 +1,49 @@
+# dataset settings
+dataset_type = 'CocoDataset'
+data_root = 'data/coco/'
+img_norm_cfg = dict(
+    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
+    dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
+    dict(type='RandomFlip', flip_ratio=0.5),
+    dict(type='Normalize', **img_norm_cfg),
+    dict(type='Pad', size_divisor=32),
+    dict(type='DefaultFormatBundle'),
+    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),
+]
+test_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='MultiScaleFlipAug',
+        img_scale=(1333, 800),
+        flip=False,
+        transforms=[
+            dict(type='Resize', keep_ratio=True),
+            dict(type='RandomFlip'),
+            dict(type='Normalize', **img_norm_cfg),
+            dict(type='Pad', size_divisor=32),
+            dict(type='ImageToTensor', keys=['img']),
+            dict(type='Collect', keys=['img']),
+        ])
+]
+data = dict(
+    samples_per_gpu=2,
+    workers_per_gpu=2,
+    train=dict(
+        type=dataset_type,
+        ann_file=data_root + 'annotations/instances_train2017.json',
+        img_prefix=data_root + 'train2017/',
+        pipeline=train_pipeline),
+    val=dict(
+        type=dataset_type,
+        ann_file=data_root + 'annotations/instances_val2017.json',
+        img_prefix=data_root + 'val2017/',
+        pipeline=test_pipeline),
+    test=dict(
+        type=dataset_type,
+        ann_file=data_root + 'annotations/instances_val2017.json',
+        img_prefix=data_root + 'val2017/',
+        pipeline=test_pipeline))
+evaluation = dict(metric=['bbox', 'segm'])
--- a/configs/benchmarks/mmdetection/_base_/datasets/voc0712.py
+++ b/configs/benchmarks/mmdetection/_base_/datasets/voc0712.py
@ -0,0 +1,55 @@
+# dataset settings
+dataset_type = 'VOCDataset'
+data_root = 'data/VOCdevkit/'
+img_norm_cfg = dict(
+    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='LoadAnnotations', with_bbox=True),
+    dict(type='Resize', img_scale=(1000, 600), keep_ratio=True),
+    dict(type='RandomFlip', flip_ratio=0.5),
+    dict(type='Normalize', **img_norm_cfg),
+    dict(type='Pad', size_divisor=32),
+    dict(type='DefaultFormatBundle'),
+    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),
+]
+test_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='MultiScaleFlipAug',
+        img_scale=(1000, 600),
+        flip=False,
+        transforms=[
+            dict(type='Resize', keep_ratio=True),
+            dict(type='RandomFlip'),
+            dict(type='Normalize', **img_norm_cfg),
+            dict(type='Pad', size_divisor=32),
+            dict(type='ImageToTensor', keys=['img']),
+            dict(type='Collect', keys=['img']),
+        ])
+]
+data = dict(
+    samples_per_gpu=2,
+    workers_per_gpu=2,
+    train=dict(
+        type='RepeatDataset',
+        times=3,
+        dataset=dict(
+            type=dataset_type,
+            ann_file=[
+                data_root + 'VOC2007/ImageSets/Main/trainval.txt',
+                data_root + 'VOC2012/ImageSets/Main/trainval.txt'
+            ],
+            img_prefix=[data_root + 'VOC2007/', data_root + 'VOC2012/'],
+            pipeline=train_pipeline)),
+    val=dict(
+        type=dataset_type,
+        ann_file=data_root + 'VOC2007/ImageSets/Main/test.txt',
+        img_prefix=data_root + 'VOC2007/',
+        pipeline=test_pipeline),
+    test=dict(
+        type=dataset_type,
+        ann_file=data_root + 'VOC2007/ImageSets/Main/test.txt',
+        img_prefix=data_root + 'VOC2007/',
+        pipeline=test_pipeline))
+evaluation = dict(interval=1, metric='mAP')
--- a/configs/benchmarks/mmdetection/_base_/default_runtime.py
+++ b/configs/benchmarks/mmdetection/_base_/default_runtime.py
@ -0,0 +1,16 @@
+checkpoint_config = dict(interval=1)
+# yapf:disable
+log_config = dict(
+    interval=50,
+    hooks=[
+        dict(type='TextLoggerHook'),
+        # dict(type='TensorboardLoggerHook')
+    ])
+# yapf:enable
+custom_hooks = [dict(type='NumClassCheckHook')]
+
+dist_params = dict(backend='nccl')
+log_level = 'INFO'
+load_from = None
+resume_from = None
+workflow = [('train', 1)]
--- a/configs/benchmarks/mmdetection/_base_/models/faster_rcnn_r50_c4.py
+++ b/configs/benchmarks/mmdetection/_base_/models/faster_rcnn_r50_c4.py
@ -0,0 +1,112 @@
+# model settings
+norm_cfg = dict(type='BN', requires_grad=False)
+model = dict(
+    type='FasterRCNN',
+    backbone=dict(
+        type='ResNet',
+        depth=50,
+        num_stages=3,
+        strides=(1, 2, 2),
+        dilations=(1, 1, 1),
+        out_indices=(2, ),
+        frozen_stages=1,
+        norm_cfg=norm_cfg,
+        norm_eval=True,
+        style='pytorch',
+        init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
+    rpn_head=dict(
+        type='RPNHead',
+        in_channels=1024,
+        feat_channels=1024,
+        anchor_generator=dict(
+            type='AnchorGenerator',
+            scales=[2, 4, 8, 16, 32],
+            ratios=[0.5, 1.0, 2.0],
+            strides=[16]),
+        bbox_coder=dict(
+            type='DeltaXYWHBBoxCoder',
+            target_means=[.0, .0, .0, .0],
+            target_stds=[1.0, 1.0, 1.0, 1.0]),
+        loss_cls=dict(
+            type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
+        loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
+    roi_head=dict(
+        type='StandardRoIHead',
+        shared_head=dict(
+            type='ResLayer',
+            depth=50,
+            stage=3,
+            stride=2,
+            dilation=1,
+            style='pytorch',
+            norm_cfg=norm_cfg,
+            norm_eval=True),
+        bbox_roi_extractor=dict(
+            type='SingleRoIExtractor',
+            roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0),
+            out_channels=1024,
+            featmap_strides=[16]),
+        bbox_head=dict(
+            type='BBoxHead',
+            with_avg_pool=True,
+            roi_feat_size=7,
+            in_channels=2048,
+            num_classes=80,
+            bbox_coder=dict(
+                type='DeltaXYWHBBoxCoder',
+                target_means=[0., 0., 0., 0.],
+                target_stds=[0.1, 0.1, 0.2, 0.2]),
+            reg_class_agnostic=False,
+            loss_cls=dict(
+                type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
+            loss_bbox=dict(type='L1Loss', loss_weight=1.0))),
+    # model training and testing settings
+    train_cfg=dict(
+        rpn=dict(
+            assigner=dict(
+                type='MaxIoUAssigner',
+                pos_iou_thr=0.7,
+                neg_iou_thr=0.3,
+                min_pos_iou=0.3,
+                match_low_quality=True,
+                ignore_iof_thr=-1),
+            sampler=dict(
+                type='RandomSampler',
+                num=256,
+                pos_fraction=0.5,
+                neg_pos_ub=-1,
+                add_gt_as_proposals=False),
+            allowed_border=0,
+            pos_weight=-1,
+            debug=False),
+        rpn_proposal=dict(
+            nms_pre=12000,
+            max_per_img=2000,
+            nms=dict(type='nms', iou_threshold=0.7),
+            min_bbox_size=0),
+        rcnn=dict(
+            assigner=dict(
+                type='MaxIoUAssigner',
+                pos_iou_thr=0.5,
+                neg_iou_thr=0.5,
+                min_pos_iou=0.5,
+                match_low_quality=False,
+                ignore_iof_thr=-1),
+            sampler=dict(
+                type='RandomSampler',
+                num=512,
+                pos_fraction=0.25,
+                neg_pos_ub=-1,
+                add_gt_as_proposals=True),
+            pos_weight=-1,
+            debug=False)),
+    test_cfg=dict(
+        rpn=dict(
+            nms_pre=6000,
+            max_per_img=1000,
+            nms=dict(type='nms', iou_threshold=0.7),
+            min_bbox_size=0),
+        rcnn=dict(
+            score_thr=0.05,
+            nms=dict(type='nms', iou_threshold=0.5),
+            max_per_img=100)))
--- a/configs/benchmarks/mmdetection/_base_/models/mask_rcnn_r50_c4.py
+++ b/configs/benchmarks/mmdetection/_base_/models/mask_rcnn_r50_c4.py
@ -0,0 +1,123 @@
+# model settings
+norm_cfg = dict(type='BN', requires_grad=False)
+model = dict(
+    type='MaskRCNN',
+    backbone=dict(
+        type='ResNet',
+        depth=50,
+        num_stages=3,
+        strides=(1, 2, 2),
+        dilations=(1, 1, 1),
+        out_indices=(2, ),
+        frozen_stages=1,
+        norm_cfg=norm_cfg,
+        norm_eval=True,
+        style='pytorch',
+        init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
+    rpn_head=dict(
+        type='RPNHead',
+        in_channels=1024,
+        feat_channels=1024,
+        anchor_generator=dict(
+            type='AnchorGenerator',
+            scales=[2, 4, 8, 16, 32],
+            ratios=[0.5, 1.0, 2.0],
+            strides=[16]),
+        bbox_coder=dict(
+            type='DeltaXYWHBBoxCoder',
+            target_means=[.0, .0, .0, .0],
+            target_stds=[1.0, 1.0, 1.0, 1.0]),
+        loss_cls=dict(
+            type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
+        loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
+    roi_head=dict(
+        type='StandardRoIHead',
+        shared_head=dict(
+            type='ResLayer',
+            depth=50,
+            stage=3,
+            stride=2,
+            dilation=1,
+            style='pytorch',
+            norm_cfg=norm_cfg,
+            norm_eval=True),
+        bbox_roi_extractor=dict(
+            type='SingleRoIExtractor',
+            roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0),
+            out_channels=1024,
+            featmap_strides=[16]),
+        bbox_head=dict(
+            type='BBoxHead',
+            with_avg_pool=True,
+            roi_feat_size=7,
+            in_channels=2048,
+            num_classes=80,
+            bbox_coder=dict(
+                type='DeltaXYWHBBoxCoder',
+                target_means=[0., 0., 0., 0.],
+                target_stds=[0.1, 0.1, 0.2, 0.2]),
+            reg_class_agnostic=False,
+            loss_cls=dict(
+                type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
+            loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
+        mask_roi_extractor=None,
+        mask_head=dict(
+            type='FCNMaskHead',
+            num_convs=0,
+            in_channels=2048,
+            conv_out_channels=256,
+            num_classes=80,
+            loss_mask=dict(
+                type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))),
+    # model training and testing settings
+    train_cfg=dict(
+        rpn=dict(
+            assigner=dict(
+                type='MaxIoUAssigner',
+                pos_iou_thr=0.7,
+                neg_iou_thr=0.3,
+                min_pos_iou=0.3,
+                match_low_quality=True,
+                ignore_iof_thr=-1),
+            sampler=dict(
+                type='RandomSampler',
+                num=256,
+                pos_fraction=0.5,
+                neg_pos_ub=-1,
+                add_gt_as_proposals=False),
+            allowed_border=0,
+            pos_weight=-1,
+            debug=False),
+        rpn_proposal=dict(
+            nms_pre=12000,
+            max_per_img=2000,
+            nms=dict(type='nms', iou_threshold=0.7),
+            min_bbox_size=0),
+        rcnn=dict(
+            assigner=dict(
+                type='MaxIoUAssigner',
+                pos_iou_thr=0.5,
+                neg_iou_thr=0.5,
+                min_pos_iou=0.5,
+                match_low_quality=False,
+                ignore_iof_thr=-1),
+            sampler=dict(
+                type='RandomSampler',
+                num=512,
+                pos_fraction=0.25,
+                neg_pos_ub=-1,
+                add_gt_as_proposals=True),
+            mask_size=14,
+            pos_weight=-1,
+            debug=False)),
+    test_cfg=dict(
+        rpn=dict(
+            nms_pre=6000,
+            nms=dict(type='nms', iou_threshold=0.7),
+            max_per_img=1000,
+            min_bbox_size=0),
+        rcnn=dict(
+            score_thr=0.05,
+            nms=dict(type='nms', iou_threshold=0.5),
+            max_per_img=100,
+            mask_thr_binary=0.5)))
--- a/configs/benchmarks/mmdetection/_base_/models/mask_rcnn_r50_fpn.py
+++ b/configs/benchmarks/mmdetection/_base_/models/mask_rcnn_r50_fpn.py
@ -0,0 +1,120 @@
+# model settings
+model = dict(
+    type='MaskRCNN',
+    backbone=dict(
+        type='ResNet',
+        depth=50,
+        num_stages=4,
+        out_indices=(0, 1, 2, 3),
+        frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
+        norm_eval=True,
+        style='pytorch',
+        init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
+    neck=dict(
+        type='FPN',
+        in_channels=[256, 512, 1024, 2048],
+        out_channels=256,
+        num_outs=5),
+    rpn_head=dict(
+        type='RPNHead',
+        in_channels=256,
+        feat_channels=256,
+        anchor_generator=dict(
+            type='AnchorGenerator',
+            scales=[8],
+            ratios=[0.5, 1.0, 2.0],
+            strides=[4, 8, 16, 32, 64]),
+        bbox_coder=dict(
+            type='DeltaXYWHBBoxCoder',
+            target_means=[.0, .0, .0, .0],
+            target_stds=[1.0, 1.0, 1.0, 1.0]),
+        loss_cls=dict(
+            type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
+        loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
+    roi_head=dict(
+        type='StandardRoIHead',
+        bbox_roi_extractor=dict(
+            type='SingleRoIExtractor',
+            roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
+            out_channels=256,
+            featmap_strides=[4, 8, 16, 32]),
+        bbox_head=dict(
+            type='Shared2FCBBoxHead',
+            in_channels=256,
+            fc_out_channels=1024,
+            roi_feat_size=7,
+            num_classes=80,
+            bbox_coder=dict(
+                type='DeltaXYWHBBoxCoder',
+                target_means=[0., 0., 0., 0.],
+                target_stds=[0.1, 0.1, 0.2, 0.2]),
+            reg_class_agnostic=False,
+            loss_cls=dict(
+                type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
+            loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
+        mask_roi_extractor=dict(
+            type='SingleRoIExtractor',
+            roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0),
+            out_channels=256,
+            featmap_strides=[4, 8, 16, 32]),
+        mask_head=dict(
+            type='FCNMaskHead',
+            num_convs=4,
+            in_channels=256,
+            conv_out_channels=256,
+            num_classes=80,
+            loss_mask=dict(
+                type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))),
+    # model training and testing settings
+    train_cfg=dict(
+        rpn=dict(
+            assigner=dict(
+                type='MaxIoUAssigner',
+                pos_iou_thr=0.7,
+                neg_iou_thr=0.3,
+                min_pos_iou=0.3,
+                match_low_quality=True,
+                ignore_iof_thr=-1),
+            sampler=dict(
+                type='RandomSampler',
+                num=256,
+                pos_fraction=0.5,
+                neg_pos_ub=-1,
+                add_gt_as_proposals=False),
+            allowed_border=-1,
+            pos_weight=-1,
+            debug=False),
+        rpn_proposal=dict(
+            nms_pre=2000,
+            max_per_img=1000,
+            nms=dict(type='nms', iou_threshold=0.7),
+            min_bbox_size=0),
+        rcnn=dict(
+            assigner=dict(
+                type='MaxIoUAssigner',
+                pos_iou_thr=0.5,
+                neg_iou_thr=0.5,
+                min_pos_iou=0.5,
+                match_low_quality=True,
+                ignore_iof_thr=-1),
+            sampler=dict(
+                type='RandomSampler',
+                num=512,
+                pos_fraction=0.25,
+                neg_pos_ub=-1,
+                add_gt_as_proposals=True),
+            mask_size=28,
+            pos_weight=-1,
+            debug=False)),
+    test_cfg=dict(
+        rpn=dict(
+            nms_pre=1000,
+            max_per_img=1000,
+            nms=dict(type='nms', iou_threshold=0.7),
+            min_bbox_size=0),
+        rcnn=dict(
+            score_thr=0.05,
+            nms=dict(type='nms', iou_threshold=0.5),
+            max_per_img=100,
+            mask_thr_binary=0.5)))
--- a/configs/benchmarks/mmdetection/_base_/schedules/schedule_1x.py
+++ b/configs/benchmarks/mmdetection/_base_/schedules/schedule_1x.py
@ -0,0 +1,11 @@
+# optimizer
+optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+    policy='step',
+    warmup='linear',
+    warmup_iters=1000,
+    warmup_ratio=0.001,
+    step=[8, 11])
+runner = dict(type='EpochBasedRunner', max_epochs=12)
--- a/configs/benchmarks/mmdetection/_base_/schedules/schedule_24k.py
+++ b/configs/benchmarks/mmdetection/_base_/schedules/schedule_24k.py
@ -0,0 +1,12 @@
+# optimizer
+optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+    policy='step',
+    warmup='linear',
+    warmup_iters=100,
+    warmup_ratio=0.001,
+    step=[18000, 22000],
+    by_epoch=False)
+runner = dict(type='IterBasedRunner', max_iters=24000)
--- a/configs/benchmarks/mmdetection/_base_/schedules/schedule_2x.py
+++ b/configs/benchmarks/mmdetection/_base_/schedules/schedule_2x.py
@ -0,0 +1,11 @@
+# optimizer
+optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+    policy='step',
+    warmup='linear',
+    warmup_iters=1000,
+    warmup_ratio=0.001,
+    step=[16, 22])
+runner = dict(type='EpochBasedRunner', max_epochs=24)
--- a/configs/benchmarks/mmdetection/coco/mask_rcnn_r50_c4_mstrain_1x_coco.py
+++ b/configs/benchmarks/mmdetection/coco/mask_rcnn_r50_c4_mstrain_1x_coco.py
@ -0,0 +1,36 @@
+_base_ = [
+    '../_base_/models/mask_rcnn_r50_c4.py',
+    '../_base_/datasets/coco_instance.py',
+    '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'
+]
+
+norm_cfg = dict(type='SyncBN', requires_grad=True)
+model = dict(
+    backbone=dict(frozen_stages=-1, norm_cfg=norm_cfg, norm_eval=False),
+    roi_head=dict(
+        shared_head=dict(
+            type='ResLayerExtraNorm', norm_cfg=norm_cfg, norm_eval=False)))
+
+img_norm_cfg = dict(
+    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
+    dict(
+        type='Resize',
+        img_scale=[(1333, 640), (1333, 672), (1333, 704), (1333, 736),
+                   (1333, 768), (1333, 800)],
+        multiscale_mode='value',
+        keep_ratio=True),
+    dict(type='RandomFlip', flip_ratio=0.5),
+    dict(type='Normalize', **img_norm_cfg),
+    dict(type='Pad', size_divisor=32),
+    dict(type='DefaultFormatBundle'),
+    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),
+]
+
+data = dict(train=dict(pipeline=train_pipeline))
+
+custom_imports = dict(
+    imports=['tools.benchmarks.mmdetection.res_layer_extra_norm'],
+    allow_failed_imports=False)
--- a/configs/benchmarks/mmdetection/coco/mask_rcnn_r50_c4_mstrain_2x_coco.py
+++ b/configs/benchmarks/mmdetection/coco/mask_rcnn_r50_c4_mstrain_2x_coco.py
@ -0,0 +1,36 @@
+_base_ = [
+    '../_base_/models/mask_rcnn_r50_c4.py',
+    '../_base_/datasets/coco_instance.py',
+    '../_base_/schedules/schedule_2x.py', '../_base_/default_runtime.py'
+]
+
+norm_cfg = dict(type='SyncBN', requires_grad=True)
+model = dict(
+    backbone=dict(frozen_stages=-1, norm_cfg=norm_cfg, norm_eval=False),
+    roi_head=dict(
+        shared_head=dict(
+            type='ResLayerExtraNorm', norm_cfg=norm_cfg, norm_eval=False)))
+
+img_norm_cfg = dict(
+    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
+    dict(
+        type='Resize',
+        img_scale=[(1333, 640), (1333, 672), (1333, 704), (1333, 736),
+                   (1333, 768), (1333, 800)],
+        multiscale_mode='value',
+        keep_ratio=True),
+    dict(type='RandomFlip', flip_ratio=0.5),
+    dict(type='Normalize', **img_norm_cfg),
+    dict(type='Pad', size_divisor=32),
+    dict(type='DefaultFormatBundle'),
+    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),
+]
+
+data = dict(train=dict(pipeline=train_pipeline))
+
+custom_imports = dict(
+    imports=['tools.benchmarks.mmdetection.res_layer_extra_norm'],
+    allow_failed_imports=False)
--- a/configs/benchmarks/mmdetection/coco/mask_rcnn_r50_fpn_mstrain_1x_coco.py
+++ b/configs/benchmarks/mmdetection/coco/mask_rcnn_r50_fpn_mstrain_1x_coco.py
@ -0,0 +1,33 @@
+_base_ = [
+    '../_base_/models/mask_rcnn_r50_fpn.py',
+    '../_base_/datasets/coco_instance.py',
+    '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'
+]
+
+norm_cfg = dict(type='SyncBN', requires_grad=True)
+model = dict(
+    backbone=dict(frozen_stages=-1, norm_cfg=norm_cfg, norm_eval=False),
+    neck=dict(norm_cfg=norm_cfg),
+    roi_head=dict(
+        bbox_head=dict(type='Shared4Conv1FCBBoxHead', norm_cfg=norm_cfg),
+        mask_head=dict(norm_cfg=norm_cfg)))
+
+img_norm_cfg = dict(
+    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
+    dict(
+        type='Resize',
+        img_scale=[(1333, 640), (1333, 672), (1333, 704), (1333, 736),
+                   (1333, 768), (1333, 800)],
+        multiscale_mode='value',
+        keep_ratio=True),
+    dict(type='RandomFlip', flip_ratio=0.5),
+    dict(type='Normalize', **img_norm_cfg),
+    dict(type='Pad', size_divisor=32),
+    dict(type='DefaultFormatBundle'),
+    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),
+]
+
+data = dict(train=dict(pipeline=train_pipeline))
--- a/configs/benchmarks/mmdetection/coco/mask_rcnn_r50_fpn_mstrain_2x_coco.py
+++ b/configs/benchmarks/mmdetection/coco/mask_rcnn_r50_fpn_mstrain_2x_coco.py
@ -0,0 +1,33 @@
+_base_ = [
+    '../_base_/models/mask_rcnn_r50_fpn.py',
+    '../_base_/datasets/coco_instance.py',
+    '../_base_/schedules/schedule_2x.py', '../_base_/default_runtime.py'
+]
+
+norm_cfg = dict(type='SyncBN', requires_grad=True)
+model = dict(
+    backbone=dict(frozen_stages=-1, norm_cfg=norm_cfg, norm_eval=False),
+    neck=dict(norm_cfg=norm_cfg),
+    roi_head=dict(
+        bbox_head=dict(type='Shared4Conv1FCBBoxHead', norm_cfg=norm_cfg),
+        mask_head=dict(norm_cfg=norm_cfg)))
+
+img_norm_cfg = dict(
+    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
+    dict(
+        type='Resize',
+        img_scale=[(1333, 640), (1333, 672), (1333, 704), (1333, 736),
+                   (1333, 768), (1333, 800)],
+        multiscale_mode='value',
+        keep_ratio=True),
+    dict(type='RandomFlip', flip_ratio=0.5),
+    dict(type='Normalize', **img_norm_cfg),
+    dict(type='Pad', size_divisor=32),
+    dict(type='DefaultFormatBundle'),
+    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),
+]
+
+data = dict(train=dict(pipeline=train_pipeline))
--- a/configs/benchmarks/mmdetection/voc0712/faster_rcnn_r50_c4_mstrain_24k_voc0712.py
+++ b/configs/benchmarks/mmdetection/voc0712/faster_rcnn_r50_c4_mstrain_24k_voc0712.py
@ -0,0 +1,84 @@
+_base_ = [
+    '../_base_/models/faster_rcnn_r50_c4.py',
+    '../_base_/schedules/schedule_24k.py', '../_base_/default_runtime.py'
+]
+
+norm_cfg = dict(type='SyncBN', requires_grad=True)
+model = dict(
+    backbone=dict(frozen_stages=-1, norm_cfg=norm_cfg, norm_eval=False),
+    roi_head=dict(
+        shared_head=dict(
+            type='ResLayerExtraNorm', norm_cfg=norm_cfg, norm_eval=False),
+        bbox_head=dict(num_classes=20)))
+
+# dataset settings
+dataset_type = 'VOCDataset'
+data_root = 'data/VOCdevkit/'
+img_norm_cfg = dict(
+    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='LoadAnnotations', with_bbox=True),
+    dict(
+        type='Resize',
+        img_scale=[(1333, 480), (1333, 512), (1333, 544), (1333, 576),
+                   (1333, 608), (1333, 640), (1333, 672), (1333, 704),
+                   (1333, 736), (1333, 768), (1333, 800)],
+        multiscale_mode='value',
+        keep_ratio=True),
+    dict(type='RandomFlip', flip_ratio=0.5),
+    dict(type='Normalize', **img_norm_cfg),
+    dict(type='Pad', size_divisor=32),
+    dict(type='DefaultFormatBundle'),
+    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),
+]
+test_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='MultiScaleFlipAug',
+        img_scale=(1333, 800),
+        flip=False,
+        transforms=[
+            dict(type='Resize', keep_ratio=True),
+            dict(type='RandomFlip'),
+            dict(type='Normalize', **img_norm_cfg),
+            dict(type='Pad', size_divisor=32),
+            dict(type='ImageToTensor', keys=['img']),
+            dict(type='Collect', keys=['img']),
+        ])
+]
+data = dict(
+    samples_per_gpu=2,
+    workers_per_gpu=2,
+    train=dict(
+        type=dataset_type,
+        ann_file=[
+            data_root + 'VOC2007/ImageSets/Main/trainval.txt',
+            data_root + 'VOC2012/ImageSets/Main/trainval.txt'
+        ],
+        img_prefix=[data_root + 'VOC2007/', data_root + 'VOC2012/'],
+        pipeline=train_pipeline),
+    val=dict(
+        type=dataset_type,
+        ann_file=data_root + 'VOC2007/ImageSets/Main/test.txt',
+        img_prefix=data_root + 'VOC2007/',
+        pipeline=test_pipeline),
+    test=dict(
+        type=dataset_type,
+        ann_file=data_root + 'VOC2007/ImageSets/Main/test.txt',
+        img_prefix=data_root + 'VOC2007/',
+        pipeline=test_pipeline))
+evaluation = dict(interval=2000, metric='mAP')
+
+checkpoint_config = dict(by_epoch=False, interval=2000)
+
+log_config = dict(
+    interval=50,
+    hooks=[
+        dict(type='TextLoggerHook', by_epoch=False),
+        # dict(type='TensorboardLoggerHook')
+    ])
+
+custom_imports = dict(
+    imports=['tools.benchmarks.mmdetection.res_layer_extra_norm'],
+    allow_failed_imports=False)
--- a/configs/benchmarks/mmsegmentation/_base_/datasets/cityscapes.py
+++ b/configs/benchmarks/mmsegmentation/_base_/datasets/cityscapes.py
@ -0,0 +1,54 @@
+# dataset settings
+dataset_type = 'CityscapesDataset'
+data_root = 'data/cityscapes/'
+img_norm_cfg = dict(
+    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
+crop_size = (512, 1024)
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='LoadAnnotations'),
+    dict(type='Resize', img_scale=(2048, 1024), ratio_range=(0.5, 2.0)),
+    dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
+    dict(type='RandomFlip', prob=0.5),
+    dict(type='PhotoMetricDistortion'),
+    dict(type='Normalize', **img_norm_cfg),
+    dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
+    dict(type='DefaultFormatBundle'),
+    dict(type='Collect', keys=['img', 'gt_semantic_seg']),
+]
+test_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='MultiScaleFlipAug',
+        img_scale=(2048, 1024),
+        # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
+        flip=False,
+        transforms=[
+            dict(type='Resize', keep_ratio=True),
+            dict(type='RandomFlip'),
+            dict(type='Normalize', **img_norm_cfg),
+            dict(type='ImageToTensor', keys=['img']),
+            dict(type='Collect', keys=['img']),
+        ])
+]
+data = dict(
+    samples_per_gpu=2,
+    workers_per_gpu=2,
+    train=dict(
+        type=dataset_type,
+        data_root=data_root,
+        img_dir='leftImg8bit/train',
+        ann_dir='gtFine/train',
+        pipeline=train_pipeline),
+    val=dict(
+        type=dataset_type,
+        data_root=data_root,
+        img_dir='leftImg8bit/val',
+        ann_dir='gtFine/val',
+        pipeline=test_pipeline),
+    test=dict(
+        type=dataset_type,
+        data_root=data_root,
+        img_dir='leftImg8bit/val',
+        ann_dir='gtFine/val',
+        pipeline=test_pipeline))
--- a/configs/benchmarks/mmsegmentation/_base_/datasets/cityscapes_769x769.py
+++ b/configs/benchmarks/mmsegmentation/_base_/datasets/cityscapes_769x769.py
@ -0,0 +1,35 @@
+_base_ = './cityscapes.py'
+img_norm_cfg = dict(
+    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
+crop_size = (769, 769)
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='LoadAnnotations'),
+    dict(type='Resize', img_scale=(2049, 1025), ratio_range=(0.5, 2.0)),
+    dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
+    dict(type='RandomFlip', prob=0.5),
+    dict(type='PhotoMetricDistortion'),
+    dict(type='Normalize', **img_norm_cfg),
+    dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
+    dict(type='DefaultFormatBundle'),
+    dict(type='Collect', keys=['img', 'gt_semantic_seg']),
+]
+test_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='MultiScaleFlipAug',
+        img_scale=(2049, 1025),
+        # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
+        flip=False,
+        transforms=[
+            dict(type='Resize', keep_ratio=True),
+            dict(type='RandomFlip'),
+            dict(type='Normalize', **img_norm_cfg),
+            dict(type='ImageToTensor', keys=['img']),
+            dict(type='Collect', keys=['img']),
+        ])
+]
+data = dict(
+    train=dict(pipeline=train_pipeline),
+    val=dict(pipeline=test_pipeline),
+    test=dict(pipeline=test_pipeline))
--- a/configs/benchmarks/mmsegmentation/_base_/datasets/pascal_voc12.py
+++ b/configs/benchmarks/mmsegmentation/_base_/datasets/pascal_voc12.py
@ -0,0 +1,57 @@
+# dataset settings
+dataset_type = 'PascalVOCDataset'
+data_root = 'data/VOCdevkit/VOC2012'
+img_norm_cfg = dict(
+    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
+crop_size = (512, 512)
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='LoadAnnotations'),
+    dict(type='Resize', img_scale=(2048, 512), ratio_range=(0.5, 2.0)),
+    dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
+    dict(type='RandomFlip', prob=0.5),
+    dict(type='PhotoMetricDistortion'),
+    dict(type='Normalize', **img_norm_cfg),
+    dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
+    dict(type='DefaultFormatBundle'),
+    dict(type='Collect', keys=['img', 'gt_semantic_seg']),
+]
+test_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='MultiScaleFlipAug',
+        img_scale=(2048, 512),
+        # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
+        flip=False,
+        transforms=[
+            dict(type='Resize', keep_ratio=True),
+            dict(type='RandomFlip'),
+            dict(type='Normalize', **img_norm_cfg),
+            dict(type='ImageToTensor', keys=['img']),
+            dict(type='Collect', keys=['img']),
+        ])
+]
+data = dict(
+    samples_per_gpu=4,
+    workers_per_gpu=4,
+    train=dict(
+        type=dataset_type,
+        data_root=data_root,
+        img_dir='JPEGImages',
+        ann_dir='SegmentationClass',
+        split='ImageSets/Segmentation/train.txt',
+        pipeline=train_pipeline),
+    val=dict(
+        type=dataset_type,
+        data_root=data_root,
+        img_dir='JPEGImages',
+        ann_dir='SegmentationClass',
+        split='ImageSets/Segmentation/val.txt',
+        pipeline=test_pipeline),
+    test=dict(
+        type=dataset_type,
+        data_root=data_root,
+        img_dir='JPEGImages',
+        ann_dir='SegmentationClass',
+        split='ImageSets/Segmentation/val.txt',
+        pipeline=test_pipeline))
--- a/configs/benchmarks/mmsegmentation/_base_/datasets/pascal_voc12_aug.py
+++ b/configs/benchmarks/mmsegmentation/_base_/datasets/pascal_voc12_aug.py
@ -0,0 +1,9 @@
+_base_ = './pascal_voc12.py'
+# dataset settings
+data = dict(
+    train=dict(
+        ann_dir=['SegmentationClass', 'SegmentationClassAug'],
+        split=[
+            'ImageSets/Segmentation/train.txt',
+            'ImageSets/Segmentation/aug.txt'
+        ]))
--- a/configs/benchmarks/mmsegmentation/_base_/default_runtime.py
+++ b/configs/benchmarks/mmsegmentation/_base_/default_runtime.py
@ -0,0 +1,14 @@
+# yapf:disable
+log_config = dict(
+    interval=50,
+    hooks=[
+        dict(type='TextLoggerHook', by_epoch=False),
+        # dict(type='TensorboardLoggerHook')
+    ])
+# yapf:enable
+dist_params = dict(backend='nccl')
+log_level = 'INFO'
+load_from = None
+resume_from = None
+workflow = [('train', 1)]
+cudnn_benchmark = True
--- a/configs/benchmarks/mmsegmentation/_base_/models/fcn_r50-d8.py
+++ b/configs/benchmarks/mmsegmentation/_base_/models/fcn_r50-d8.py
@ -0,0 +1,45 @@
+# model settings
+norm_cfg = dict(type='SyncBN', requires_grad=True)
+model = dict(
+    type='EncoderDecoder',
+    backbone=dict(
+        type='ResNet',
+        depth=50,
+        num_stages=4,
+        out_indices=(0, 1, 2, 3),
+        dilations=(1, 1, 2, 4),
+        strides=(1, 2, 1, 1),
+        norm_cfg=norm_cfg,
+        norm_eval=False,
+        style='pytorch',
+        contract_dilation=True,
+        init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
+    decode_head=dict(
+        type='FCNHead',
+        in_channels=2048,
+        in_index=3,
+        channels=512,
+        num_convs=2,
+        concat_input=True,
+        dropout_ratio=0.1,
+        num_classes=19,
+        norm_cfg=norm_cfg,
+        align_corners=False,
+        loss_decode=dict(
+            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
+    auxiliary_head=dict(
+        type='FCNHead',
+        in_channels=1024,
+        in_index=2,
+        channels=256,
+        num_convs=1,
+        concat_input=False,
+        dropout_ratio=0.1,
+        num_classes=19,
+        norm_cfg=norm_cfg,
+        align_corners=False,
+        loss_decode=dict(
+            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
+    # model training and testing settings
+    train_cfg=dict(),
+    test_cfg=dict(mode='whole'))
--- a/configs/benchmarks/mmsegmentation/_base_/schedules/schedule_20k.py
+++ b/configs/benchmarks/mmsegmentation/_base_/schedules/schedule_20k.py
@ -0,0 +1,9 @@
+# optimizer
+optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005)
+optimizer_config = dict()
+# learning policy
+lr_config = dict(policy='poly', power=0.9, min_lr=1e-4, by_epoch=False)
+# runtime settings
+runner = dict(type='IterBasedRunner', max_iters=20000)
+checkpoint_config = dict(by_epoch=False, interval=2000)
+evaluation = dict(interval=2000, metric='mIoU', pre_eval=True)
--- a/configs/benchmarks/mmsegmentation/_base_/schedules/schedule_40k.py
+++ b/configs/benchmarks/mmsegmentation/_base_/schedules/schedule_40k.py
@ -0,0 +1,9 @@
+# optimizer
+optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005)
+optimizer_config = dict()
+# learning policy
+lr_config = dict(policy='poly', power=0.9, min_lr=1e-4, by_epoch=False)
+# runtime settings
+runner = dict(type='IterBasedRunner', max_iters=40000)
+checkpoint_config = dict(by_epoch=False, interval=4000)
+evaluation = dict(interval=4000, metric='mIoU', pre_eval=True)
--- a/configs/benchmarks/mmsegmentation/cityscapes/fcn_r50-d8_769x769_40k_cityscapes.py
+++ b/configs/benchmarks/mmsegmentation/cityscapes/fcn_r50-d8_769x769_40k_cityscapes.py
@ -0,0 +1,9 @@
+_base_ = [
+    '../_base_/models/fcn_r50-d8.py',
+    '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py',
+    '../_base_/schedules/schedule_40k.py'
+]
+model = dict(
+    decode_head=dict(align_corners=True),
+    auxiliary_head=dict(align_corners=True),
+    test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513)))
--- a/configs/benchmarks/mmsegmentation/voc12aug/fcn_r50-d8_512x512_20k_voc12aug.py
+++ b/configs/benchmarks/mmsegmentation/voc12aug/fcn_r50-d8_512x512_20k_voc12aug.py
@ -0,0 +1,6 @@
+_base_ = [
+    '../_base_/models/fcn_r50-d8.py', '../_base_/datasets/pascal_voc12_aug.py',
+    '../_base_/default_runtime.py', '../_base_/schedules/schedule_20k.py'
+]
+model = dict(
+    decode_head=dict(num_classes=21), auxiliary_head=dict(num_classes=21))