[Improve] Beauty RTMDet config (#531)

* beauty rtmdet config * format
2023-02-09 19:08:21 +08:00 · 2023-02-09 19:08:21 +08:00 · e966ce4e60
parent 82d288f5d4
commit e966ce4e60
6 changed files with 137 additions and 60 deletions
--- a/configs/rtmdet/rtmdet_l_syncbn_fast_8xb32-300e_coco.py
+++ b/configs/rtmdet/rtmdet_l_syncbn_fast_8xb32-300e_coco.py
@ -1,30 +1,56 @@
 _base_ = '../_base_/default_runtime.py'
 # ========================Frequently modified parameters======================
 # -----data related-----
 data_root = 'data/coco/'
-dataset_type = 'YOLOv5CocoDataset'
+# Path of train annotation file
-
+train_ann_file = 'annotations/instances_train2017.json'
-img_scale = (640, 640)  # width, height
+train_data_prefix = 'train2017/'  # Prefix of train image path
-deepen_factor = 1.0
+# Path of val annotation file
-widen_factor = 1.0
+val_ann_file = 'annotations/instances_val2017.json'
-max_epochs = 300
+val_data_prefix = 'val2017/'  # Prefix of val image path
 stage2_num_epochs = 20
 interval = 10
 num_classes = 80
 num_classes = 80  # Number of classes for classification
 # Batch size of a single GPU during training
 train_batch_size_per_gpu = 32
 # Worker to pre-fetch data for each single GPU during training
 train_num_workers = 10
 val_batch_size_per_gpu = 32
 val_num_workers = 10
 # persistent_workers must be False if num_workers is 0.
 persistent_workers = True
-strides = [8, 16, 32]
+
 # -----train val related-----
 # Base learning rate for optim_wrapper. Corresponding to 8xb16=64 bs
 base_lr = 0.004
 max_epochs = 300  # Maximum training epochs
 # Change train_pipeline for final 20 epochs (stage 2)
 num_epochs_stage2 = 20
-# single-scale training is recommended to
+model_test_cfg = dict(
-# be turned on, which can speed up training.
+    # The config of multi-label for multi-class prediction.
-env_cfg = dict(cudnn_benchmark=True)
+    multi_label=True,
    # The number of boxes before NMS
    nms_pre=30000,
    score_thr=0.001,  # Threshold to filter out boxes.
    nms=dict(type='nms', iou_threshold=0.65),  # NMS type and threshold
    max_per_img=300)  # Max number of detections of each image
-# only on Val
+# ========================Possible modified parameters========================
 # -----data related-----
 img_scale = (640, 640)  # width, height
 # ratio range for random resize
 random_resize_ratio_range = (0.1, 2.0)
 # Cached images number in mosaic
 mosaic_max_cached_images = 40
 # Number of cached images in mixup
 mixup_max_cached_images = 20
 # Dataset type, this will be used to define the dataset
 dataset_type = 'YOLOv5CocoDataset'
 # Batch size of a single GPU during validation
 val_batch_size_per_gpu = 32
 # Worker to pre-fetch data for each single GPU during validation
 val_num_workers = 10
 # Config of batch shapes. Only on val.
 batch_shapes_cfg = dict(
    type='BatchShapePolicy',
    batch_size=val_batch_size_per_gpu,
@ -32,6 +58,35 @@ batch_shapes_cfg = dict(
    size_divisor=32,
    extra_pad_ratio=0.5)
 # -----model related-----
 # The scaling factor that controls the depth of the network structure
 deepen_factor = 1.0
 # The scaling factor that controls the width of the network structure
 widen_factor = 1.0
 # Strides of multi-scale prior box
 strides = [8, 16, 32]
 norm_cfg = dict(type='BN')  # Normalization config
 # -----train val related-----
 lr_start_factor = 1.0e-5
 dsl_topk = 13  # Number of bbox selected in each level
 loss_cls_weight = 1.0
 loss_bbox_weight = 2.0
 qfl_beta = 2.0  # beta of QualityFocalLoss
 weight_decay = 0.05
 # Save model checkpoint and validation intervals
 save_checkpoint_intervals = 10
 # validation intervals in stage 2
 val_interval_stage2 = 1
 # The maximum checkpoints to keep.
 max_keep_ckpts = 3
 # single-scale training is recommended to
 # be turned on, which can speed up training.
 env_cfg = dict(cudnn_benchmark=True)
 # ===============================Unmodified in most cases====================
 model = dict(
    type='YOLODetector',
    data_preprocessor=dict(
@ -46,7 +101,7 @@ model = dict(
        deepen_factor=deepen_factor,
        widen_factor=widen_factor,
        channel_attention=True,
-        norm_cfg=dict(type='BN'),
+        norm_cfg=norm_cfg,
        act_cfg=dict(type='SiLU', inplace=True)),
    neck=dict(
        type='CSPNeXtPAFPN',
@ -56,7 +111,7 @@ model = dict(
        out_channels=256,
        num_csp_blocks=3,
        expand_ratio=0.5,
-        norm_cfg=dict(type='BN'),
+        norm_cfg=norm_cfg,
        act_cfg=dict(type='SiLU', inplace=True)),
    bbox_head=dict(
        type='RTMDetHead',
@ -66,7 +121,7 @@ model = dict(
            in_channels=256,
            stacked_convs=2,
            feat_channels=256,
-            norm_cfg=dict(type='BN'),
+            norm_cfg=norm_cfg,
            act_cfg=dict(type='SiLU', inplace=True),
            share_conv=True,
            pred_kernel_size=1,
@ -77,24 +132,19 @@ model = dict(
        loss_cls=dict(
            type='mmdet.QualityFocalLoss',
            use_sigmoid=True,
-            beta=2.0,
+            beta=qfl_beta,
-            loss_weight=1.0),
+            loss_weight=loss_cls_weight),
-        loss_bbox=dict(type='mmdet.GIoULoss', loss_weight=2.0)),
+        loss_bbox=dict(type='mmdet.GIoULoss', loss_weight=loss_bbox_weight)),
    train_cfg=dict(
        assigner=dict(
            type='BatchDynamicSoftLabelAssigner',
            num_classes=num_classes,
-            topk=13,
+            topk=dsl_topk,
            iou_calculator=dict(type='mmdet.BboxOverlaps2D')),
        allowed_border=-1,
        pos_weight=-1,
        debug=False),
-    test_cfg=dict(
+    test_cfg=model_test_cfg,
        multi_label=True,
        nms_pre=30000,
        score_thr=0.001,
        nms=dict(type='nms', iou_threshold=0.65),
        max_per_img=300),
 )
 train_pipeline = [
@ -104,20 +154,23 @@ train_pipeline = [
        type='Mosaic',
        img_scale=img_scale,
        use_cached=True,
-        max_cached_images=40,
+        max_cached_images=mosaic_max_cached_images,
        pad_val=114.0),
    dict(
        type='mmdet.RandomResize',
        # img_scale is (width, height)
        scale=(img_scale[0] * 2, img_scale[1] * 2),
-        ratio_range=(0.1, 2.0),
+        ratio_range=random_resize_ratio_range,
        resize_type='mmdet.Resize',
        keep_ratio=True),
    dict(type='mmdet.RandomCrop', crop_size=img_scale),
    dict(type='mmdet.YOLOXHSVRandomAug'),
    dict(type='mmdet.RandomFlip', prob=0.5),
    dict(type='mmdet.Pad', size=img_scale, pad_val=dict(img=(114, 114, 114))),
-    dict(type='YOLOv5MixUp', use_cached=True, max_cached_images=20),
+    dict(
        type='YOLOv5MixUp',
        use_cached=True,
        max_cached_images=mixup_max_cached_images),
    dict(type='mmdet.PackDetInputs')
 ]
@ -127,7 +180,7 @@ train_pipeline_stage2 = [
    dict(
        type='mmdet.RandomResize',
        scale=img_scale,
-        ratio_range=(0.1, 2.0),
+        ratio_range=random_resize_ratio_range,
        resize_type='mmdet.Resize',
        keep_ratio=True),
    dict(type='mmdet.RandomCrop', crop_size=img_scale),
@ -162,8 +215,8 @@ train_dataloader = dict(
    dataset=dict(
        type=dataset_type,
        data_root=data_root,
-        ann_file='annotations/instances_train2017.json',
+        ann_file=train_ann_file,
-        data_prefix=dict(img='train2017/'),
+        data_prefix=dict(img=train_data_prefix),
        filter_cfg=dict(filter_empty_gt=True, min_size=32),
        pipeline=train_pipeline))
@ -177,8 +230,8 @@ val_dataloader = dict(
    dataset=dict(
        type=dataset_type,
        data_root=data_root,
-        ann_file='annotations/instances_val2017.json',
+        ann_file=val_ann_file,
-        data_prefix=dict(img='val2017/'),
+        data_prefix=dict(img=val_data_prefix),
        test_mode=True,
        batch_shapes_cfg=batch_shapes_cfg,
        pipeline=test_pipeline))
@ -189,14 +242,14 @@ test_dataloader = val_dataloader
 val_evaluator = dict(
    type='mmdet.CocoMetric',
    proposal_nums=(100, 1, 10),
-    ann_file=data_root + 'annotations/instances_val2017.json',
+    ann_file=data_root + val_ann_file,
    metric='bbox')
 test_evaluator = val_evaluator
 # optimizer
 optim_wrapper = dict(
    type='OptimWrapper',
-    optimizer=dict(type='AdamW', lr=base_lr, weight_decay=0.05),
+    optimizer=dict(type='AdamW', lr=base_lr, weight_decay=weight_decay),
    paramwise_cfg=dict(
        norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True))
@ -204,7 +257,7 @@ optim_wrapper = dict(
 param_scheduler = [
    dict(
        type='LinearLR',
-        start_factor=1.0e-5,
+        start_factor=lr_start_factor,
        by_epoch=False,
        begin=0,
        end=1000),
@ -223,8 +276,8 @@ param_scheduler = [
 default_hooks = dict(
    checkpoint=dict(
        type='CheckpointHook',
-        interval=interval,
+        interval=save_checkpoint_intervals,
-        max_keep_ckpts=3  # only keep latest 3 checkpoints
+        max_keep_ckpts=max_keep_ckpts  # only keep latest 3 checkpoints
    ))
 custom_hooks = [
@ -237,15 +290,15 @@ custom_hooks = [
        priority=49),
    dict(
        type='mmdet.PipelineSwitchHook',
-        switch_epoch=max_epochs - stage2_num_epochs,
+        switch_epoch=max_epochs - num_epochs_stage2,
        switch_pipeline=train_pipeline_stage2)
 ]
 train_cfg = dict(
    type='EpochBasedTrainLoop',
    max_epochs=max_epochs,
-    val_interval=interval,
+    val_interval=save_checkpoint_intervals,
-    dynamic_intervals=[(max_epochs - stage2_num_epochs, 1)])
+    dynamic_intervals=[(max_epochs - num_epochs_stage2, val_interval_stage2)])
 val_cfg = dict(type='ValLoop')
 test_cfg = dict(type='TestLoop')
--- a/configs/rtmdet/rtmdet_m_syncbn_fast_8xb32-300e_coco.py
+++ b/configs/rtmdet/rtmdet_m_syncbn_fast_8xb32-300e_coco.py
@ -1,8 +1,10 @@
 _base_ = './rtmdet_l_syncbn_fast_8xb32-300e_coco.py'
 # ========================modified parameters======================
 deepen_factor = 0.67
 widen_factor = 0.75
 # =======================Unmodified in most cases==================
 model = dict(
    backbone=dict(deepen_factor=deepen_factor, widen_factor=widen_factor),
    neck=dict(deepen_factor=deepen_factor, widen_factor=widen_factor),
--- a/configs/rtmdet/rtmdet_s_syncbn_fast_8xb32-300e_coco.py
+++ b/configs/rtmdet/rtmdet_s_syncbn_fast_8xb32-300e_coco.py
@ -1,10 +1,19 @@
 _base_ = './rtmdet_l_syncbn_fast_8xb32-300e_coco.py'
 checkpoint = 'https://download.openmmlab.com/mmdetection/v3.0/rtmdet/cspnext_rsb_pretrain/cspnext-s_imagenet_600e.pth'  # noqa
 # ========================modified parameters======================
 deepen_factor = 0.33
 widen_factor = 0.5
 img_scale = _base_.img_scale
 # ratio range for random resize
 random_resize_ratio_range = (0.5, 2.0)
 # Number of cached images in mosaic
 mosaic_max_cached_images = 40
 # Number of cached images in mixup
 mixup_max_cached_images = 20
 # =======================Unmodified in most cases==================
 model = dict(
    backbone=dict(
        deepen_factor=deepen_factor,
@ -30,20 +39,23 @@ train_pipeline = [
        type='Mosaic',
        img_scale=img_scale,
        use_cached=True,
-        max_cached_images=40,
+        max_cached_images=mosaic_max_cached_images,
        pad_val=114.0),
    dict(
        type='mmdet.RandomResize',
        # img_scale is (width, height)
        scale=(img_scale[0] * 2, img_scale[1] * 2),
-        ratio_range=(0.5, 2.0),  # note
+        ratio_range=random_resize_ratio_range,  # note
        resize_type='mmdet.Resize',
        keep_ratio=True),
    dict(type='mmdet.RandomCrop', crop_size=img_scale),
    dict(type='mmdet.YOLOXHSVRandomAug'),
    dict(type='mmdet.RandomFlip', prob=0.5),
    dict(type='mmdet.Pad', size=img_scale, pad_val=dict(img=(114, 114, 114))),
-    dict(type='YOLOv5MixUp', use_cached=True, max_cached_images=20),
+    dict(
        type='YOLOv5MixUp',
        use_cached=True,
        max_cached_images=mixup_max_cached_images),
    dict(type='mmdet.PackDetInputs')
 ]
@ -53,7 +65,7 @@ train_pipeline_stage2 = [
    dict(
        type='mmdet.RandomResize',
        scale=img_scale,
-        ratio_range=(0.5, 2.0),  # note
+        ratio_range=random_resize_ratio_range,  # note
        resize_type='mmdet.Resize',
        keep_ratio=True),
    dict(type='mmdet.RandomCrop', crop_size=img_scale),
@ -75,6 +87,6 @@ custom_hooks = [
        priority=49),
    dict(
        type='mmdet.PipelineSwitchHook',
-        switch_epoch=_base_.max_epochs - _base_.stage2_num_epochs,
+        switch_epoch=_base_.max_epochs - _base_.num_epochs_stage2,
        switch_pipeline=train_pipeline_stage2)
 ]
--- a/configs/rtmdet/rtmdet_tiny_syncbn_fast_8xb32-300e_coco.py
+++ b/configs/rtmdet/rtmdet_tiny_syncbn_fast_8xb32-300e_coco.py
@ -1,11 +1,19 @@
 _base_ = './rtmdet_s_syncbn_fast_8xb32-300e_coco.py'
 checkpoint = 'https://download.openmmlab.com/mmdetection/v3.0/rtmdet/cspnext_rsb_pretrain/cspnext-tiny_imagenet_600e.pth'  # noqa
 # ========================modified parameters======================
 deepen_factor = 0.167
 widen_factor = 0.375
 img_scale = _base_.img_scale
 # ratio range for random resize
 random_resize_ratio_range = (0.5, 2.0)
 # Number of cached images in mosaic
 mosaic_max_cached_images = 20
 # Number of cached images in mixup
 mixup_max_cached_images = 10
 # =======================Unmodified in most cases==================
 model = dict(
    backbone=dict(
        deepen_factor=deepen_factor,
@ -24,14 +32,14 @@ train_pipeline = [
        type='Mosaic',
        img_scale=img_scale,
        use_cached=True,
-        max_cached_images=20,  # note
+        max_cached_images=mosaic_max_cached_images,  # note
        random_pop=False,  # note
        pad_val=114.0),
    dict(
        type='mmdet.RandomResize',
        # img_scale is (width, height)
        scale=(img_scale[0] * 2, img_scale[1] * 2),
-        ratio_range=(0.5, 2.0),
+        ratio_range=random_resize_ratio_range,
        resize_type='mmdet.Resize',
        keep_ratio=True),
    dict(type='mmdet.RandomCrop', crop_size=img_scale),
@ -42,7 +50,7 @@ train_pipeline = [
        type='YOLOv5MixUp',
        use_cached=True,
        random_pop=False,
-        max_cached_images=10,
+        max_cached_images=mixup_max_cached_images,
        prob=0.5),
    dict(type='mmdet.PackDetInputs')
 ]
--- a/configs/rtmdet/rtmdet_x_syncbn_fast_8xb32-300e_coco.py
+++ b/configs/rtmdet/rtmdet_x_syncbn_fast_8xb32-300e_coco.py
@ -1,8 +1,10 @@
 _base_ = './rtmdet_l_syncbn_fast_8xb32-300e_coco.py'
 # ========================modified parameters======================
 deepen_factor = 1.33
 widen_factor = 1.25
 # =======================Unmodified in most cases==================
 model = dict(
    backbone=dict(deepen_factor=deepen_factor, widen_factor=widen_factor),
    neck=dict(deepen_factor=deepen_factor, widen_factor=widen_factor),
--- a/configs/yolov5/yolov5_s-v61_syncbn_8xb16-300e_coco.py
+++ b/configs/yolov5/yolov5_s-v61_syncbn_8xb16-300e_coco.py
@ -27,7 +27,7 @@ anchors = [
 ]
 # -----train val related-----
-# Base learning rate for optim_wrapper. Corresponding to 8xb16=64 bs
+# Base learning rate for optim_wrapper. Corresponding to 8xb16=128 bs
 base_lr = 0.01
 max_epochs = 300  # Maximum training epochs
@ -77,12 +77,12 @@ loss_cls_weight = 0.5
 loss_bbox_weight = 0.05
 loss_obj_weight = 1.0
 prior_match_thr = 4.  # Priori box matching threshold
-obj_level_weights = [4., 1.,
+# The obj loss weights of the three output layers
-                     0.4]  # The obj loss weights of the three output layers
+obj_level_weights = [4., 1., 0.4]
 lr_factor = 0.01  # Learning rate scaling factor
 weight_decay = 0.0005
 # Save model checkpoint and validation intervals
-save_epoch_intervals = 10
+save_checkpoint_intervals = 10
 # The maximum checkpoints to keep.
 max_keep_ckpts = 3
 # Single-scale training is recommended to
@ -263,7 +263,7 @@ default_hooks = dict(
        max_epochs=max_epochs),
    checkpoint=dict(
        type='CheckpointHook',
-        interval=save_epoch_intervals,
+        interval=save_checkpoint_intervals,
        save_best='auto',
        max_keep_ckpts=max_keep_ckpts))
@ -287,6 +287,6 @@ test_evaluator = val_evaluator
 train_cfg = dict(
    type='EpochBasedTrainLoop',
    max_epochs=max_epochs,
-    val_interval=save_epoch_intervals)
+    val_interval=save_checkpoint_intervals)
 val_cfg = dict(type='ValLoop')
 test_cfg = dict(type='TestLoop')