diff --git a/README.md b/README.md index 10cfdd7d..99fbdb80 100644 --- a/README.md +++ b/README.md @@ -86,7 +86,7 @@ conda activate open-mmlab pip install openmim mim install "mmengine>=0.2.0" mim install "mmcv>=2.0.0rc1,<2.1.0" -mim install "mmdet>=3.0.0rc1,<3.1.0" +mim install "mmdet>=3.0.0rc2,<3.1.0" git clone https://github.com/open-mmlab/mmyolo.git cd mmyolo # Install albumentations diff --git a/README_zh-CN.md b/README_zh-CN.md index 0884258e..16bbcc6d 100644 --- a/README_zh-CN.md +++ b/README_zh-CN.md @@ -87,7 +87,7 @@ conda activate open-mmlab pip install openmim mim install "mmengine>=0.2.0" mim install "mmcv>=2.0.0rc1,<2.1.0" -mim install "mmdet>=3.0.0rc1,<3.1.0" +mim install "mmdet>=3.0.0rc2,<3.1.0" git clone https://github.com/open-mmlab/mmyolo.git cd mmyolo # Install albumentations diff --git a/configs/rtmdet/rtmdet_l_syncbn_8xb32-300e_coco.py b/configs/rtmdet/rtmdet_l_syncbn_8xb32-300e_coco.py index 42440a07..72a9da70 100644 --- a/configs/rtmdet/rtmdet_l_syncbn_8xb32-300e_coco.py +++ b/configs/rtmdet/rtmdet_l_syncbn_8xb32-300e_coco.py @@ -218,6 +218,7 @@ custom_hooks = [ ema_type='ExpMomentumEMA', momentum=0.0002, update_buffers=True, + strict_load=False, priority=49), dict( type='mmdet.PipelineSwitchHook', diff --git a/configs/rtmdet/rtmdet_s_syncbn_8xb32-300e_coco.py b/configs/rtmdet/rtmdet_s_syncbn_8xb32-300e_coco.py index 0be19357..23c5c2de 100644 --- a/configs/rtmdet/rtmdet_s_syncbn_8xb32-300e_coco.py +++ b/configs/rtmdet/rtmdet_s_syncbn_8xb32-300e_coco.py @@ -70,6 +70,7 @@ custom_hooks = [ ema_type='ExpMomentumEMA', momentum=0.0002, update_buffers=True, + strict_load=False, priority=49), dict( type='mmdet.PipelineSwitchHook', diff --git a/configs/yolov5/README.md b/configs/yolov5/README.md index be483552..247f681f 100644 --- a/configs/yolov5/README.md +++ b/configs/yolov5/README.md @@ -10,20 +10,25 @@ YOLOv5 is a family of object detection architectures and models pretrained on th ### COCO -| Backbone | size | SyncBN | AMP | Mem (GB) | box AP | Config | Download | -| :------: | :--: | :----: | :-: | :------: | :----: | :-------------------------------------------------------------------------------------------------------------------: | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | -| YOLOv5-n | 640 | Yes | Yes | 1.5 | 28.0 | [config](https://github.com/open-mmlab/mmyolo/tree/master/configs/yolov5/yolov5_n-v61_syncbn_fast_8xb16-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_n-v61_syncbn_fast_8xb16-300e_coco/yolov5_n-v61_syncbn_fast_8xb16-300e_coco_20220919_090739-b804c1ad.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_n-v61_syncbn_fast_8xb16-300e_coco/yolov5_n-v61_syncbn_fast_8xb16-300e_coco_20220919_090739.log.json) | -| YOLOv5-s | 640 | Yes | Yes | 2.7 | 37.7 | [config](https://github.com/open-mmlab/mmyolo/tree/master/configs/yolov5/yolov5_s-v61_syncbn_fast_8xb16-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_s-v61_syncbn_fast_8xb16-300e_coco/yolov5_s-v61_syncbn_fast_8xb16-300e_coco_20220918_084700-86e02187.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_s-v61_syncbn_fast_8xb16-300e_coco/yolov5_s-v61_syncbn_fast_8xb16-300e_coco_20220918_084700.log.json) | -| YOLOv5-m | 640 | Yes | Yes | 5.0 | 45.3 | [config](https://github.com/open-mmlab/mmyolo/tree/master/configs/yolov5/yolov5_m-v61_syncbn_fast_8xb16-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_m-v61_syncbn_fast_8xb16-300e_coco/yolov5_m-v61_syncbn_fast_8xb16-300e_coco_20220917_204944-516a710f.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_m-v61_syncbn_fast_8xb16-300e_coco/yolov5_m-v61_syncbn_fast_8xb16-300e_coco_20220917_204944.log.json) | -| YOLOv5-l | 640 | Yes | Yes | 8.1 | 48.8 | [config](https://github.com/open-mmlab/mmyolo/tree/master/configs/yolov5/yolov5_l-v61_syncbn_fast_8xb16-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_l-v61_syncbn_fast_8xb16-300e_coco/yolov5_l-v61_syncbn_fast_8xb16-300e_coco_20220917_031007-096ef0eb.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_l-v61_syncbn_fast_8xb16-300e_coco/yolov5_l-v61_syncbn_fast_8xb16-300e_coco_20220917_031007.log.json) | +| Backbone | Arch | size | SyncBN | AMP | Mem (GB) | box AP | Config | Download | +| :------: | :--: | :--: | :----: | :-: | :------: | :----: | :----------------------------------------------------------------------------------------------------------------------: | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | +| YOLOv5-n | P5 | 640 | Yes | Yes | 1.5 | 28.0 | [config](https://github.com/open-mmlab/mmyolo/tree/master/configs/yolov5/yolov5_n-v61_syncbn_fast_8xb16-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_n-v61_syncbn_fast_8xb16-300e_coco/yolov5_n-v61_syncbn_fast_8xb16-300e_coco_20220919_090739-b804c1ad.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_n-v61_syncbn_fast_8xb16-300e_coco/yolov5_n-v61_syncbn_fast_8xb16-300e_coco_20220919_090739.log.json) | +| YOLOv5-s | P5 | 640 | Yes | Yes | 2.7 | 37.7 | [config](https://github.com/open-mmlab/mmyolo/tree/master/configs/yolov5/yolov5_s-v61_syncbn_fast_8xb16-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_s-v61_syncbn_fast_8xb16-300e_coco/yolov5_s-v61_syncbn_fast_8xb16-300e_coco_20220918_084700-86e02187.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_s-v61_syncbn_fast_8xb16-300e_coco/yolov5_s-v61_syncbn_fast_8xb16-300e_coco_20220918_084700.log.json) | +| YOLOv5-m | P5 | 640 | Yes | Yes | 5.0 | 45.3 | [config](https://github.com/open-mmlab/mmyolo/tree/master/configs/yolov5/yolov5_m-v61_syncbn_fast_8xb16-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_m-v61_syncbn_fast_8xb16-300e_coco/yolov5_m-v61_syncbn_fast_8xb16-300e_coco_20220917_204944-516a710f.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_m-v61_syncbn_fast_8xb16-300e_coco/yolov5_m-v61_syncbn_fast_8xb16-300e_coco_20220917_204944.log.json) | +| YOLOv5-l | P5 | 640 | Yes | Yes | 8.1 | 48.8 | [config](https://github.com/open-mmlab/mmyolo/tree/master/configs/yolov5/yolov5_l-v61_syncbn_fast_8xb16-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_l-v61_syncbn_fast_8xb16-300e_coco/yolov5_l-v61_syncbn_fast_8xb16-300e_coco_20220917_031007-096ef0eb.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_l-v61_syncbn_fast_8xb16-300e_coco/yolov5_l-v61_syncbn_fast_8xb16-300e_coco_20220917_031007.log.json) | +| YOLOv5-n | P6 | 1280 | Yes | Yes | 5.8 | 35.9 | [config](https://github.com/open-mmlab/mmyolo/tree/master/configs/yolov5/yolov5_n-p6-v62_syncbn_fast_8xb16-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_n-p6-v62_syncbn_fast_8xb16-300e_coco/yolov5_n-p6-v62_syncbn_fast_8xb16-300e_coco_20221027_224705-d493c5f3.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_n-p6-v62_syncbn_fast_8xb16-300e_coco/yolov5_n-p6-v62_syncbn_fast_8xb16-300e_coco_20221027_224705.log.json) | +| YOLOv5-s | P6 | 1280 | Yes | Yes | 10.5 | 44.4 | [config](https://github.com/open-mmlab/mmyolo/tree/master/configs/yolov5/yolov5_s-p6-v62_syncbn_fast_8xb16-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_s-p6-v62_syncbn_fast_8xb16-300e_coco/yolov5_s-p6-v62_syncbn_fast_8xb16-300e_coco_20221027_215044-58865c19.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_s-p6-v62_syncbn_fast_8xb16-300e_coco/yolov5_s-p6-v62_syncbn_fast_8xb16-300e_coco_20221027_215044.log.json) | +| YOLOv5-m | P6 | 1280 | Yes | Yes | 19.1 | 51.3 | [config](https://github.com/open-mmlab/mmyolo/tree/master/configs/yolov5/yolov5_m-p6-v62_syncbn_fast_8xb16-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_m-p6-v62_syncbn_fast_8xb16-300e_coco/yolov5_m-p6-v62_syncbn_fast_8xb16-300e_coco_20221027_230453-49564d58.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_m-p6-v62_syncbn_fast_8xb16-300e_coco/yolov5_m-p6-v62_syncbn_fast_8xb16-300e_coco_20221027_230453.log.json) | +| YOLOv5-l | P6 | 1280 | Yes | Yes | 30.5 | 53.7 | [config](https://github.com/open-mmlab/mmyolo/tree/master/configs/yolov5/yolov5_l-p6-v62_syncbn_fast_8xb16-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_l-p6-v62_syncbn_fast_8xb16-300e_coco/yolov5_l-p6-v62_syncbn_fast_8xb16-300e_coco_20221027_234308-7a2ba6bf.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_l-p6-v62_syncbn_fast_8xb16-300e_coco/yolov5_l-p6-v62_syncbn_fast_8xb16-300e_coco_20221027_234308.log.json) | **Note**: +In the official YOLOv5 code, the `random_perspective` data augmentation in COCO object detection task training uses mask annotation information, which leads to higher performance. Object detection should not use mask annotation, so only box annotation information is used in `MMYOLO`. We will use the mask annotation information in the instance segmentation task. See https://github.com/ultralytics/yolov5/issues/9917 for details. 1. `fast` means that `YOLOv5DetDataPreprocessor` and `yolov5_collate` are used for data preprocessing, which is faster for training, but less flexible for multitasking. Recommended to use fast version config if you only care about object detection. 2. `detect` means that the network input is fixed to `640x640` and the post-processing thresholds is modified. 3. `SyncBN` means use SyncBN, `AMP` indicates training with mixed precision. 4. We use 8x A100 for training, and the single-GPU batch size is 16. This is different from the official code. -5. The performance is unstable and may fluctuate by about 0.4 mAP. +5. The performance is unstable and may fluctuate by about 0.4 mAP and the highest performance weight in `COCO` training in `YOLOv5` may not be the last epoch. 6. `balloon` means that this is a demo configuration. ### VOC @@ -37,8 +42,6 @@ YOLOv5 is a family of object detection architectures and models pretrained on th **Note**: -Please make sure `mmengine>=0.2.0`. - 1. Training on VOC dataset need pretrained model which trained on COCO. 2. The performance is unstable and may fluctuate by about 0.4 mAP. 3. Official YOLOv5 use COCO metric, while training VOC dataset. diff --git a/configs/yolov5/metafile.yml b/configs/yolov5/metafile.yml index cf270975..c64f38e5 100644 --- a/configs/yolov5/metafile.yml +++ b/configs/yolov5/metafile.yml @@ -80,6 +80,54 @@ Models: Metrics: box AP: 48.8 Weights: https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_l-v61_syncbn_fast_8xb16-300e_coco/yolov5_l-v61_syncbn_fast_8xb16-300e_coco_20220917_031007-096ef0eb.pth + - Name: yolov5_n-p6-v62_syncbn_fast_8xb16-300e_coco + In Collection: YOLOv5 + Config: configs/yolov5/yolov5_n-p6-v62_syncbn_fast_8xb16-300e_coco.py + Metadata: + Training Memory (GB): 5.8 + Epochs: 300 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 35.9 + Weights: https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_n-p6-v62_syncbn_fast_8xb16-300e_coco/yolov5_n-p6-v62_syncbn_fast_8xb16-300e_coco_20221027_224705-d493c5f3.pth + - Name: yolov5_s-p6-v62_syncbn_fast_8xb16-300e_coco + In Collection: YOLOv5 + Config: configs/yolov5/yolov5_s-p6-v62_syncbn_fast_8xb16-300e_coco.py + Metadata: + Training Memory (GB): 10.5 + Epochs: 300 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 44.4 + Weights: https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_s-p6-v62_syncbn_fast_8xb16-300e_coco/yolov5_s-p6-v62_syncbn_fast_8xb16-300e_coco_20221027_215044-58865c19.pth + - Name: yolov5_m-p6-v62_syncbn_fast_8xb16-300e_coco + In Collection: YOLOv5 + Config: configs/yolov5/yolov5_m-p6-v62_syncbn_fast_8xb16-300e_coco.py + Metadata: + Training Memory (GB): 19.1 + Epochs: 300 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 51.3 + Weights: https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_m-p6-v62_syncbn_fast_8xb16-300e_coco/yolov5_m-p6-v62_syncbn_fast_8xb16-300e_coco_20221027_230453-49564d58.pth + - Name: yolov5_l-p6-v62_syncbn_fast_8xb16-300e_coco + In Collection: YOLOv5 + Config: configs/yolov5/yolov5_l-p6-v62_syncbn_fast_8xb16-300e_coco.py + Metadata: + Training Memory (GB): 30.5 + Epochs: 300 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 53.7 + Weights: https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_l-p6-v62_syncbn_fast_8xb16-300e_coco/yolov5_l-p6-v62_syncbn_fast_8xb16-300e_coco_20221027_234308-7a2ba6bf.pth - Name: yolov5_n-v61_fast_1xb64-50e_voc In Collection: YOLOv5_VOC Config: configs/yolov5/voc/yolov5_n-v61_fast_1xb64-50e_voc.py diff --git a/configs/yolov5/yolov5_l-p6-v62_syncbn_fast_8xb16-300e_coco.py b/configs/yolov5/yolov5_l-p6-v62_syncbn_fast_8xb16-300e_coco.py new file mode 100644 index 00000000..6a84fdbe --- /dev/null +++ b/configs/yolov5/yolov5_l-p6-v62_syncbn_fast_8xb16-300e_coco.py @@ -0,0 +1,15 @@ +_base_ = './yolov5_m-p6-v62_syncbn_fast_8xb16-300e_coco.py' + +deepen_factor = 1.0 +widen_factor = 1.0 + +model = dict( + backbone=dict( + deepen_factor=deepen_factor, + widen_factor=widen_factor, + ), + neck=dict( + deepen_factor=deepen_factor, + widen_factor=widen_factor, + ), + bbox_head=dict(head_module=dict(widen_factor=widen_factor))) diff --git a/configs/yolov5/yolov5_m-p6-v62_syncbn_fast_8xb16-300e_coco.py b/configs/yolov5/yolov5_m-p6-v62_syncbn_fast_8xb16-300e_coco.py new file mode 100644 index 00000000..2974418a --- /dev/null +++ b/configs/yolov5/yolov5_m-p6-v62_syncbn_fast_8xb16-300e_coco.py @@ -0,0 +1,74 @@ +_base_ = './yolov5_s-p6-v62_syncbn_fast_8xb16-300e_coco.py' + +deepen_factor = 0.67 +widen_factor = 0.75 +lr_factor = 0.1 # lrf=0.1 +affine_scale = 0.9 + +num_classes = _base_.num_classes +num_det_layers = _base_.num_det_layers +img_scale = _base_.img_scale + +model = dict( + backbone=dict( + deepen_factor=deepen_factor, + widen_factor=widen_factor, + ), + neck=dict( + deepen_factor=deepen_factor, + widen_factor=widen_factor, + ), + bbox_head=dict( + head_module=dict(widen_factor=widen_factor), + loss_cls=dict(loss_weight=0.3 * + (num_classes / 80 * 3 / num_det_layers)), + loss_obj=dict(loss_weight=0.7 * + ((img_scale[0] / 640)**2 * 3 / num_det_layers)))) + +pre_transform = _base_.pre_transform +albu_train_transforms = _base_.albu_train_transforms + +mosaic_affine_pipeline = [ + dict( + type='Mosaic', + img_scale=img_scale, + pad_val=114.0, + pre_transform=pre_transform), + dict( + type='YOLOv5RandomAffine', + max_rotate_degree=0.0, + max_shear_degree=0.0, + scaling_ratio_range=(1 - affine_scale, 1 + affine_scale), + border=(-img_scale[0] // 2, -img_scale[1] // 2), + border_val=(114, 114, 114)) +] + +# enable mixup +train_pipeline = [ + *pre_transform, *mosaic_affine_pipeline, + dict( + type='YOLOv5MixUp', + prob=0.1, + pre_transform=[*pre_transform, *mosaic_affine_pipeline]), + dict( + type='mmdet.Albu', + transforms=albu_train_transforms, + bbox_params=dict( + type='BboxParams', + format='pascal_voc', + label_fields=['gt_bboxes_labels', 'gt_ignore_flags']), + keymap={ + 'img': 'image', + 'gt_bboxes': 'bboxes' + }), + dict(type='YOLOv5HSVRandomAug'), + dict(type='mmdet.RandomFlip', prob=0.5), + dict( + type='mmdet.PackDetInputs', + meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 'flip', + 'flip_direction')) +] + +train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) + +default_hooks = dict(param_scheduler=dict(lr_factor=lr_factor)) diff --git a/configs/yolov5/yolov5_m-v61_syncbn_fast_8xb16-300e_coco.py b/configs/yolov5/yolov5_m-v61_syncbn_fast_8xb16-300e_coco.py index 4339513c..dcd55ac4 100644 --- a/configs/yolov5/yolov5_m-v61_syncbn_fast_8xb16-300e_coco.py +++ b/configs/yolov5/yolov5_m-v61_syncbn_fast_8xb16-300e_coco.py @@ -5,6 +5,10 @@ widen_factor = 0.75 lr_factor = 0.1 # lrf=0.1 affine_scale = 0.9 +num_classes = _base_.num_classes +num_det_layers = _base_.num_det_layers +img_scale = _base_.img_scale + model = dict( backbone=dict( deepen_factor=deepen_factor, @@ -16,12 +20,13 @@ model = dict( ), bbox_head=dict( head_module=dict(widen_factor=widen_factor), - loss_cls=dict(loss_weight=0.3), - loss_obj=dict(loss_weight=0.7))) + loss_cls=dict(loss_weight=0.3 * + (num_classes / 80 * 3 / num_det_layers)), + loss_obj=dict(loss_weight=0.7 * + ((img_scale[0] / 640)**2 * 3 / num_det_layers)))) pre_transform = _base_.pre_transform albu_train_transforms = _base_.albu_train_transforms -img_scale = _base_.img_scale mosaic_affine_pipeline = [ dict( diff --git a/configs/yolov5/yolov5_n-p6-v62_syncbn_fast_8xb16-300e_coco.py b/configs/yolov5/yolov5_n-p6-v62_syncbn_fast_8xb16-300e_coco.py new file mode 100644 index 00000000..3cd2d6b7 --- /dev/null +++ b/configs/yolov5/yolov5_n-p6-v62_syncbn_fast_8xb16-300e_coco.py @@ -0,0 +1,15 @@ +_base_ = 'yolov5_s-p6-v62_syncbn_fast_8xb16-300e_coco.py' + +deepen_factor = 0.33 +widen_factor = 0.25 + +model = dict( + backbone=dict( + deepen_factor=deepen_factor, + widen_factor=widen_factor, + ), + neck=dict( + deepen_factor=deepen_factor, + widen_factor=widen_factor, + ), + bbox_head=dict(head_module=dict(widen_factor=widen_factor))) diff --git a/configs/yolov5/yolov5_s-p6-v62_syncbn_fast_8xb16-300e_coco.py b/configs/yolov5/yolov5_s-p6-v62_syncbn_fast_8xb16-300e_coco.py new file mode 100644 index 00000000..787317c8 --- /dev/null +++ b/configs/yolov5/yolov5_s-p6-v62_syncbn_fast_8xb16-300e_coco.py @@ -0,0 +1,89 @@ +_base_ = 'yolov5_s-v61_syncbn_fast_8xb16-300e_coco.py' + +img_scale = (1280, 1280) # height, width +num_classes = 80 +# only on Val +batch_shapes_cfg = dict(img_size=img_scale[0], size_divisor=64) + +anchors = [ + [(19, 27), (44, 40), (38, 94)], # P3/8 + [(96, 68), (86, 152), (180, 137)], # P4/16 + [(140, 301), (303, 264), (238, 542)], # P5/32 + [(436, 615), (739, 380), (925, 792)] # P6/64 +] +strides = [8, 16, 32, 64] +num_det_layers = 4 + +model = dict( + backbone=dict(arch='P6', out_indices=(2, 3, 4, 5)), + neck=dict( + in_channels=[256, 512, 768, 1024], out_channels=[256, 512, 768, 1024]), + bbox_head=dict( + head_module=dict( + in_channels=[256, 512, 768, 1024], featmap_strides=strides), + prior_generator=dict(base_sizes=anchors, strides=strides), + # scaled based on number of detection layers + loss_cls=dict(loss_weight=0.5 * + (num_classes / 80 * 3 / num_det_layers)), + loss_bbox=dict(loss_weight=0.05 * (3 / num_det_layers)), + loss_obj=dict(loss_weight=1.0 * + ((img_scale[0] / 640)**2 * 3 / num_det_layers)), + obj_level_weights=[4.0, 1.0, 0.25, 0.06])) + +pre_transform = _base_.pre_transform +albu_train_transforms = _base_.albu_train_transforms + +train_pipeline = [ + *pre_transform, + dict( + type='Mosaic', + img_scale=img_scale, + pad_val=114.0, + pre_transform=pre_transform), + dict( + type='YOLOv5RandomAffine', + max_rotate_degree=0.0, + max_shear_degree=0.0, + scaling_ratio_range=(0.5, 1.5), + border=(-img_scale[0] // 2, -img_scale[1] // 2), + border_val=(114, 114, 114)), + dict( + type='mmdet.Albu', + transforms=albu_train_transforms, + bbox_params=dict( + type='BboxParams', + format='pascal_voc', + label_fields=['gt_bboxes_labels', 'gt_ignore_flags']), + keymap={ + 'img': 'image', + 'gt_bboxes': 'bboxes' + }), + dict(type='YOLOv5HSVRandomAug'), + dict(type='mmdet.RandomFlip', prob=0.5), + dict( + type='mmdet.PackDetInputs', + meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 'flip', + 'flip_direction')) +] + +train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) + +test_pipeline = [ + dict(type='LoadImageFromFile', file_client_args=_base_.file_client_args), + dict(type='YOLOv5KeepRatioResize', scale=img_scale), + dict( + type='LetterResize', + scale=img_scale, + allow_scale_up=False, + pad_val=dict(img=114)), + dict(type='LoadAnnotations', with_bbox=True, _scope_='mmdet'), + dict( + type='mmdet.PackDetInputs', + meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', + 'scale_factor', 'pad_param')) +] + +val_dataloader = dict( + dataset=dict(pipeline=test_pipeline, batch_shapes_cfg=batch_shapes_cfg)) + +test_dataloader = val_dataloader diff --git a/configs/yolov5/yolov5_s-v61_syncbn_8xb16-300e_coco.py b/configs/yolov5/yolov5_s-v61_syncbn_8xb16-300e_coco.py index 2a2a0c89..4a56b492 100644 --- a/configs/yolov5/yolov5_s-v61_syncbn_8xb16-300e_coco.py +++ b/configs/yolov5/yolov5_s-v61_syncbn_8xb16-300e_coco.py @@ -5,6 +5,7 @@ data_root = 'data/coco/' dataset_type = 'YOLOv5CocoDataset' # parameters that often need to be modified +num_classes = 80 img_scale = (640, 640) # height, width deepen_factor = 0.33 widen_factor = 0.5 @@ -26,9 +27,13 @@ batch_shapes_cfg = dict( size_divisor=32, extra_pad_ratio=0.5) -anchors = [[(10, 13), (16, 30), (33, 23)], [(30, 61), (62, 45), (59, 119)], - [(116, 90), (156, 198), (373, 326)]] +anchors = [ + [(10, 13), (16, 30), (33, 23)], # P3/8 + [(30, 61), (62, 45), (59, 119)], # P4/16 + [(116, 90), (156, 198), (373, 326)] # P5/32 +] strides = [8, 16, 32] +num_det_layers = 3 # single-scale training is recommended to # be turned on, which can speed up training. @@ -60,7 +65,7 @@ model = dict( type='YOLOv5Head', head_module=dict( type='YOLOv5HeadModule', - num_classes=80, + num_classes=num_classes, in_channels=[256, 512, 1024], widen_factor=widen_factor, featmap_strides=strides, @@ -69,24 +74,25 @@ model = dict( type='mmdet.YOLOAnchorGenerator', base_sizes=anchors, strides=strides), + # scaled based on number of detection layers loss_cls=dict( type='mmdet.CrossEntropyLoss', use_sigmoid=True, reduction='mean', - loss_weight=0.5), + loss_weight=0.5 * (num_classes / 80 * 3 / num_det_layers)), loss_bbox=dict( type='IoULoss', iou_mode='ciou', bbox_format='xywh', eps=1e-7, reduction='mean', - loss_weight=0.05, + loss_weight=0.05 * (3 / num_det_layers), return_iou=True), loss_obj=dict( type='mmdet.CrossEntropyLoss', use_sigmoid=True, reduction='mean', - loss_weight=1.0), + loss_weight=1.0 * ((img_scale[0] / 640)**2 * 3 / num_det_layers)), prior_match_thr=4., obj_level_weights=[4., 1., 0.4]), test_cfg=dict( @@ -104,9 +110,7 @@ albu_train_transforms = [ ] pre_transform = [ - dict( - type='LoadImageFromFile', - file_client_args={{_base_.file_client_args}}), + dict(type='LoadImageFromFile', file_client_args=_base_.file_client_args), dict(type='LoadAnnotations', with_bbox=True) ] @@ -158,9 +162,7 @@ train_dataloader = dict( pipeline=train_pipeline)) test_pipeline = [ - dict( - type='LoadImageFromFile', - file_client_args={{_base_.file_client_args}}), + dict(type='LoadImageFromFile', file_client_args=_base_.file_client_args), dict(type='YOLOv5KeepRatioResize', scale=img_scale), dict( type='LetterResize', @@ -211,7 +213,9 @@ default_hooks = dict( lr_factor=0.01, max_epochs=max_epochs), checkpoint=dict( - type='CheckpointHook', interval=save_epoch_intervals, + type='CheckpointHook', + interval=save_epoch_intervals, + save_best='auto', max_keep_ckpts=3)) custom_hooks = [ @@ -220,6 +224,7 @@ custom_hooks = [ ema_type='ExpMomentumEMA', momentum=0.0001, update_buffers=True, + strict_load=False, priority=49) ] diff --git a/configs/yolov5/yolov5_x-p6-v62_syncbn_fast_8xb16-300e_coco.py b/configs/yolov5/yolov5_x-p6-v62_syncbn_fast_8xb16-300e_coco.py new file mode 100644 index 00000000..9fe5c010 --- /dev/null +++ b/configs/yolov5/yolov5_x-p6-v62_syncbn_fast_8xb16-300e_coco.py @@ -0,0 +1,14 @@ +_base_ = './yolov5_m-p6-v62_syncbn_fast_8xb16-300e_coco.py' +deepen_factor = 1.33 +widen_factor = 1.25 + +model = dict( + backbone=dict( + deepen_factor=deepen_factor, + widen_factor=widen_factor, + ), + neck=dict( + deepen_factor=deepen_factor, + widen_factor=widen_factor, + ), + bbox_head=dict(head_module=dict(widen_factor=widen_factor))) diff --git a/configs/yolox/yolox_s_8xb8-300e_coco.py b/configs/yolox/yolox_s_8xb8-300e_coco.py index 323df078..c9f8aa5e 100644 --- a/configs/yolox/yolox_s_8xb8-300e_coco.py +++ b/configs/yolox/yolox_s_8xb8-300e_coco.py @@ -264,6 +264,7 @@ custom_hooks = [ ema_type='ExpMomentumEMA', momentum=0.0001, update_buffers=True, + strict_load=False, priority=49) ] diff --git a/docker/Dockerfile b/docker/Dockerfile index 4843261a..dbd92600 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -26,7 +26,7 @@ RUN apt-get update \ # Install MMEngine , MMCV and MMDet RUN pip install --no-cache-dir openmim && \ - mim install --no-cache-dir "mmengine>=0.2.0" "mmcv>=2.0.0rc1,<2.1.0" "mmdet>=3.0.0rc1,<3.1.0" + mim install --no-cache-dir "mmengine>=0.2.0" "mmcv>=2.0.0rc1,<2.1.0" "mmdet>=3.0.0rc2,<3.1.0" # Install MMYOLO RUN git clone https://github.com/open-mmlab/mmyolo.git /mmyolo && \ diff --git a/docs/en/get_started.md b/docs/en/get_started.md index 61d7f4a0..b475effb 100644 --- a/docs/en/get_started.md +++ b/docs/en/get_started.md @@ -6,10 +6,10 @@ Compatible MMEngine, MMCV and MMDetection versions are shown as below. Please in | MMYOLO version | MMDetection version | MMEngine version | MMCV version | | :------------: | :----------------------: | :----------------------: | :---------------------: | -| main | mmdet>=3.0.0rc1, \<3.1.0 | mmengine>=0.2.0, \<1.0.0 | mmcv>=2.0.0rc0, \<2.1.0 | -| 0.1.2 | mmdet>=3.0.0rc1, \<3.1.0 | mmengine>=0.2.0, \<1.0.0 | mmcv>=2.0.0rc0, \<2.1.0 | -| 0.1.1 | mmdet>=3.0.0rc1, \<3.1.0 | mmengine>=0.1.0, \<0.2.0 | mmcv>=2.0.0rc0, \<2.1.0 | -| 0.1.0 | mmdet>=3.0.0rc0, \<3.1.0 | mmengine>=0.1.0, \<0.2.0 | mmcv>=2.0.0rc0, \<2.1.0 | +| main | mmdet>=3.0.0rc2, \<3.1.0 | mmengine>=0.2.0, \<1.0.0 | mmcv>=2.0.0rc0, \<2.1.0 | +| 0.1.2 | mmdet>=3.0.0rc2, \<3.1.0 | mmengine>=0.2.0, \<1.0.0 | mmcv>=2.0.0rc0, \<2.1.0 | +| 0.1.1 | mmdet==3.0.0rc1 | mmengine>=0.1.0, \<0.2.0 | mmcv>=2.0.0rc0, \<2.1.0 | +| 0.1.0 | mmdet==3.0.0rc0 | mmengine>=0.1.0, \<0.2.0 | mmcv>=2.0.0rc0, \<2.1.0 | In this section, we demonstrate how to prepare an environment with PyTorch. diff --git a/docs/en/user_guides/yolov5_tutorial.md b/docs/en/user_guides/yolov5_tutorial.md index af594ba4..67c3e732 100644 --- a/docs/en/user_guides/yolov5_tutorial.md +++ b/docs/en/user_guides/yolov5_tutorial.md @@ -12,8 +12,7 @@ conda install pytorch torchvision -c pytorch pip install -U openmim mim install "mmengine>=0.2.0" mim install "mmcv>=2.0.0rc1,<2.1.0" -mim install "mmdet>=3.0.0rc1,<3.1.0" -# for albumentations +mim install "mmdet>=3.0.0rc2,<3.1.0" git clone https://github.com/open-mmlab/mmyolo.git cd mmyolo # Install albumentations diff --git a/docs/zh_cn/get_started.md b/docs/zh_cn/get_started.md index dacf9f34..792a7e4e 100644 --- a/docs/zh_cn/get_started.md +++ b/docs/zh_cn/get_started.md @@ -6,10 +6,10 @@ | MMYOLO version | MMDetection version | MMEngine version | MMCV version | | :------------: | :----------------------: | :----------------------: | :---------------------: | -| main | mmdet>=3.0.0rc1, \<3.1.0 | mmengine>=0.2.0, \<1.0.0 | mmcv>=2.0.0rc0, \<2.1.0 | -| 0.1.2 | mmdet>=3.0.0rc1, \<3.1.0 | mmengine>=0.2.0, \<1.0.0 | mmcv>=2.0.0rc0, \<2.1.0 | -| 0.1.1 | mmdet>=3.0.0rc1, \<3.1.0 | mmengine>=0.1.0, \<0.2.0 | mmcv>=2.0.0rc0, \<2.1.0 | -| 0.1.0 | mmdet>=3.0.0rc0, \<3.1.0 | mmengine>=0.1.0, \<0.2.0 | mmcv>=2.0.0rc0, \<2.1.0 | +| main | mmdet>=3.0.0rc2, \<3.1.0 | mmengine>=0.2.0, \<1.0.0 | mmcv>=2.0.0rc0, \<2.1.0 | +| 0.1.2 | mmdet>=3.0.0rc2, \<3.1.0 | mmengine>=0.2.0, \<1.0.0 | mmcv>=2.0.0rc0, \<2.1.0 | +| 0.1.1 | mmdet==3.0.0rc1 | mmengine>=0.1.0, \<0.2.0 | mmcv>=2.0.0rc0, \<2.1.0 | +| 0.1.0 | mmdet==3.0.0rc0 | mmengine>=0.1.0, \<0.2.0 | mmcv>=2.0.0rc0, \<2.1.0 | 本节中,我们将演示如何用 PyTorch 准备一个环境。 diff --git a/docs/zh_cn/user_guides/yolov5_tutorial.md b/docs/zh_cn/user_guides/yolov5_tutorial.md index 8db69201..7fe44197 100644 --- a/docs/zh_cn/user_guides/yolov5_tutorial.md +++ b/docs/zh_cn/user_guides/yolov5_tutorial.md @@ -12,8 +12,7 @@ conda install pytorch torchvision -c pytorch pip install -U openmim mim install "mmengine>=0.2.0" mim install "mmcv>=2.0.0rc1,<2.1.0" -mim install "mmdet>=3.0.0rc1,<3.1.0" -# for albumentations +mim install "mmdet>=3.0.0rc2,<3.1.0" git clone https://github.com/open-mmlab/mmyolo.git cd mmyolo # Install albumentations diff --git a/mmyolo/__init__.py b/mmyolo/__init__.py index 39ae5c4e..b20994b1 100644 --- a/mmyolo/__init__.py +++ b/mmyolo/__init__.py @@ -14,7 +14,7 @@ mmengine_minimum_version = '0.2.0' mmengine_maximum_version = '1.0.0' mmengine_version = digit_version(mmengine.__version__) -mmdet_minimum_version = '3.0.0rc1' +mmdet_minimum_version = '3.0.0rc2' mmdet_maximum_version = '3.1.0' mmdet_version = digit_version(mmdet.__version__) diff --git a/mmyolo/models/backbones/csp_darknet.py b/mmyolo/models/backbones/csp_darknet.py index 041a4b90..7aba5445 100644 --- a/mmyolo/models/backbones/csp_darknet.py +++ b/mmyolo/models/backbones/csp_darknet.py @@ -62,7 +62,10 @@ class YOLOv5CSPDarknet(BaseBackbone): # in_channels, out_channels, num_blocks, add_identity, use_spp arch_settings = { 'P5': [[64, 128, 3, True, False], [128, 256, 6, True, False], - [256, 512, 9, True, False], [512, 1024, 3, True, True]] + [256, 512, 9, True, False], [512, 1024, 3, True, True]], + 'P6': [[64, 128, 3, True, False], [128, 256, 6, True, False], + [256, 512, 9, True, False], [512, 768, 3, True, False], + [768, 1024, 3, True, True]] } def __init__(self, diff --git a/mmyolo/models/necks/yolov5_pafpn.py b/mmyolo/models/necks/yolov5_pafpn.py index 1f867f4c..cc7487e7 100644 --- a/mmyolo/models/necks/yolov5_pafpn.py +++ b/mmyolo/models/necks/yolov5_pafpn.py @@ -72,7 +72,7 @@ class YOLOv5PAFPN(BaseYOLONeck): Returns: nn.Module: The reduce layer. """ - if idx == 2: + if idx == len(self.in_channels) - 1: layer = ConvModule( make_divisible(self.in_channels[idx], self.widen_factor), make_divisible(self.in_channels[idx - 1], self.widen_factor), @@ -97,6 +97,7 @@ class YOLOv5PAFPN(BaseYOLONeck): Returns: nn.Module: The top down layer. """ + if idx == 1: return CSPLayer( make_divisible(self.in_channels[idx - 1] * 2, @@ -106,7 +107,7 @@ class YOLOv5PAFPN(BaseYOLONeck): add_identity=False, norm_cfg=self.norm_cfg, act_cfg=self.act_cfg) - elif idx == 2: + else: return nn.Sequential( CSPLayer( make_divisible(self.in_channels[idx - 1] * 2, diff --git a/requirements/mminstall.txt b/requirements/mminstall.txt index eef76637..cdf7d390 100644 --- a/requirements/mminstall.txt +++ b/requirements/mminstall.txt @@ -1,3 +1,3 @@ mmcv>=2.0.0rc1,<2.1.0 -mmdet>=3.0.0rc1 +mmdet>=3.0.0rc2 mmengine>=0.2.0 diff --git a/requirements/readthedocs.txt b/requirements/readthedocs.txt index d4cca9eb..2b17cbb8 100644 --- a/requirements/readthedocs.txt +++ b/requirements/readthedocs.txt @@ -1,5 +1,5 @@ mmcv>=2.0.0rc1,<2.1.0 -mmdet>=3.0.0rc1 +mmdet>=3.0.0rc2 mmengine>=0.2.0 torch torchvision diff --git a/tools/model_converters/yolov5_to_mmyolo.py b/tools/model_converters/yolov5_to_mmyolo.py index d97d5640..c1d4e41d 100644 --- a/tools/model_converters/yolov5_to_mmyolo.py +++ b/tools/model_converters/yolov5_to_mmyolo.py @@ -4,7 +4,7 @@ from collections import OrderedDict import torch -convert_dict = { +convert_dict_p5 = { 'model.0': 'backbone.stem', 'model.1': 'backbone.stage1.0', 'model.2': 'backbone.stage1.1', @@ -27,17 +27,62 @@ convert_dict = { 'model.24.m': 'bbox_head.head_module.convs_pred', } +convert_dict_p6 = { + 'model.0': 'backbone.stem', + 'model.1': 'backbone.stage1.0', + 'model.2': 'backbone.stage1.1', + 'model.3': 'backbone.stage2.0', + 'model.4': 'backbone.stage2.1', + 'model.5': 'backbone.stage3.0', + 'model.6': 'backbone.stage3.1', + 'model.7': 'backbone.stage4.0', + 'model.8': 'backbone.stage4.1', + 'model.9': 'backbone.stage5.0', + 'model.10': 'backbone.stage5.1', + 'model.11.cv1': 'backbone.stage5.2.conv1', + 'model.11.cv2': 'backbone.stage5.2.conv2', + 'model.12': 'neck.reduce_layers.3', + 'model.15': 'neck.top_down_layers.0.0', + 'model.16': 'neck.top_down_layers.0.1', + 'model.19': 'neck.top_down_layers.1.0', + 'model.20': 'neck.top_down_layers.1.1', + 'model.23': 'neck.top_down_layers.2', + 'model.24': 'neck.downsample_layers.0', + 'model.26': 'neck.bottom_up_layers.0', + 'model.27': 'neck.downsample_layers.1', + 'model.29': 'neck.bottom_up_layers.1', + 'model.30': 'neck.downsample_layers.2', + 'model.32': 'neck.bottom_up_layers.2', + 'model.33.m': 'bbox_head.head_module.convs_pred', +} + def convert(src, dst): - """Convert keys in detectron pretrained YOLOV5 models to mmyolo style.""" - yolov5_model = torch.load(src)['model'] - blobs = yolov5_model.state_dict() + """Convert keys in pretrained YOLOv5 models to mmyolo style.""" + if src.endswith('6.pt'): + convert_dict = convert_dict_p6 + is_p6_model = True + print('Converting P6 model') + else: + convert_dict = convert_dict_p5 + is_p6_model = False + print('Converting P5 model') + try: + yolov5_model = torch.load(src)['model'] + blobs = yolov5_model.state_dict() + except ModuleNotFoundError: + raise RuntimeError( + 'This script must be placed under the ultralytics/yolov5 repo,' + ' because loading the official pretrained model need' + ' `model.py` to build model.') state_dict = OrderedDict() for key, weight in blobs.items(): num, module = key.split('.')[1:3] - if num == '9' or num == '24': + if (is_p6_model and + (num == '11' or num == '33')) or (not is_p6_model and + (num == '9' or num == '24')): if module == 'anchors': continue prefix = f'model.{num}.{module}' @@ -68,7 +113,7 @@ def main(): parser = argparse.ArgumentParser(description='Convert model keys') parser.add_argument( '--src', default='yolov5s.pt', help='src yolov5 model path') - parser.add_argument('--dst', default='mmyolov5.pt', help='save path') + parser.add_argument('--dst', default='mmyolov5s.pt', help='save path') args = parser.parse_args() convert(args.src, args.dst)