diff --git a/README.md b/README.md index 885e9a82..ecf32b4f 100644 --- a/README.md +++ b/README.md @@ -196,8 +196,9 @@ For different parts from MMDetection, we have also prepared user guides and adva - [Resume training](docs/en/common_usage/resume_training.md) - [Enabling and disabling SyncBatchNorm](docs/en/common_usage/syncbn.md) - [Enabling AMP](docs/en/common_usage/amp_training.md) +- [TTA Related Notes](docs/en/common_usage/tta.md) - [Add plugins to the backbone network](docs/en/common_usage/plugins.md) -- [Freeze layers](docs/en/common_usage/common_usage/freeze_layers.md) +- [Freeze layers](docs/en/common_usage/freeze_layers.md) - [Output model predictions](docs/en/common_usage/output_predictions.md) - [Set random seed](docs/en/common_usage/set_random_seed.md) - [Module combination](docs/en/common_usage/module_combination.md) diff --git a/README_zh-CN.md b/README_zh-CN.md index 767c3178..eae02d86 100644 --- a/README_zh-CN.md +++ b/README_zh-CN.md @@ -217,6 +217,7 @@ MMYOLO 用法和 MMDetection 几乎一致,所有教程都是通用的,你也 - [恢复训练](docs/zh_cn/common_usage/resume_training.md) - [开启和关闭 SyncBatchNorm](docs/zh_cn/common_usage/syncbn.md) - [开启混合精度训练](docs/zh_cn/common_usage/amp_training.md) +- [测试时增强相关说明](docs/zh_cn/common_usage/tta.md) - [给主干网络增加插件](docs/zh_cn/common_usage/plugins.md) - [冻结指定网络层权重](docs/zh_cn/common_usage/common_usage/freeze_layers.md) - [输出模型预测结果](docs/zh_cn/common_usage/output_predictions.md) diff --git a/configs/_base_/det_p5_tta.py b/configs/_base_/det_p5_tta.py new file mode 100644 index 00000000..cbbaf2e6 --- /dev/null +++ b/configs/_base_/det_p5_tta.py @@ -0,0 +1,57 @@ +# TODO: Need to solve the problem of multiple file_client_args parameters +# _file_client_args = dict( +# backend='petrel', +# path_mapping=dict({ +# './data/': 's3://openmmlab/datasets/detection/', +# 'data/': 's3://openmmlab/datasets/detection/' +# })) +_file_client_args = dict(backend='disk') + +tta_model = dict( + type='mmdet.DetTTAModel', + tta_cfg=dict(nms=dict(type='nms', iou_threshold=0.65), max_per_img=300)) + +img_scales = [(640, 640), (320, 320), (960, 960)] + +# LoadImageFromFile +# / | \ +# (RatioResize,LetterResize) (RatioResize,LetterResize) (RatioResize,LetterResize) # noqa +# / \ / \ / \ +# RandomFlip RandomFlip RandomFlip RandomFlip RandomFlip RandomFlip # noqa +# | | | | | | +# LoadAnn LoadAnn LoadAnn LoadAnn LoadAnn LoadAnn +# | | | | | | +# PackDetIn PackDetIn PackDetIn PackDetIn PackDetIn PackDetIn # noqa + +_multiscale_resize_transforms = [ + dict( + type='Compose', + transforms=[ + dict(type='YOLOv5KeepRatioResize', scale=s), + dict( + type='LetterResize', + scale=s, + allow_scale_up=False, + pad_val=dict(img=114)) + ]) for s in img_scales +] + +tta_pipeline = [ + dict(type='LoadImageFromFile', file_client_args=_file_client_args), + dict( + type='TestTimeAug', + transforms=[ + _multiscale_resize_transforms, + [ + dict(type='mmdet.RandomFlip', prob=1.), + dict(type='mmdet.RandomFlip', prob=0.) + ], [dict(type='mmdet.LoadAnnotations', with_bbox=True)], + [ + dict( + type='mmdet.PackDetInputs', + meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', + 'scale_factor', 'pad_param', 'flip', + 'flip_direction')) + ] + ]) +] diff --git a/configs/ppyoloe/ppyoloe_plus_s_fast_8xb8-80e_coco.py b/configs/ppyoloe/ppyoloe_plus_s_fast_8xb8-80e_coco.py index 7c5ce298..e44dc34a 100644 --- a/configs/ppyoloe/ppyoloe_plus_s_fast_8xb8-80e_coco.py +++ b/configs/ppyoloe/ppyoloe_plus_s_fast_8xb8-80e_coco.py @@ -1,4 +1,4 @@ -_base_ = '../_base_/default_runtime.py' +_base_ = ['../_base_/default_runtime.py', '../_base_/det_p5_tta.py'] # dataset settings data_root = 'data/coco/' diff --git a/configs/rtmdet/README.md b/configs/rtmdet/README.md index 3059a575..1089b71b 100644 --- a/configs/rtmdet/README.md +++ b/configs/rtmdet/README.md @@ -23,18 +23,19 @@ RTMDet-l model structure ## Object Detection -| Model | size | box AP | Params(M) | FLOPS(G) | TRT-FP16-Latency(ms) | Config | Download | -| :---------: | :--: | :----: | :-------: | :------: | :------------------: | :----------------------------------------------------: | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | -| RTMDet-tiny | 640 | 41.0 | 4.8 | 8.1 | 0.98 | [config](./rtmdet_tiny_syncbn_fast_8xb32-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_tiny_syncbn_fast_8xb32-300e_coco/rtmdet_tiny_syncbn_fast_8xb32-300e_coco_20230102_140117-dbb1dc83.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_tiny_syncbn_fast_8xb32-300e_coco/rtmdet_tiny_syncbn_fast_8xb32-300e_coco_20230102_140117.log.json) | -| RTMDet-s | 640 | 44.6 | 8.89 | 14.8 | 1.22 | [config](./rtmdet_s_syncbn_fast_8xb32-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_s_syncbn_fast_8xb32-300e_coco/rtmdet_s_syncbn_fast_8xb32-300e_coco_20221230_182329-0a8c901a.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_s_syncbn_fast_8xb32-300e_coco/rtmdet_s_syncbn_fast_8xb32-300e_coco_20221230_182329.log.json) | -| RTMDet-m | 640 | 49.3 | 24.71 | 39.27 | 1.62 | [config](./rtmdet_m_syncbn_fast_8xb32-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_m_syncbn_fast_8xb32-300e_coco/rtmdet_m_syncbn_fast_8xb32-300e_coco_20230102_135952-40af4fe8.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_m_syncbn_fast_8xb32-300e_coco/rtmdet_m_syncbn_fast_8xb32-300e_coco_20230102_135952.log.json) | -| RTMDet-l | 640 | 51.4 | 52.3 | 80.23 | 2.44 | [config](./rtmdet_l_syncbn_fast_8xb32-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_l_syncbn_fast_8xb32-300e_coco/rtmdet_l_syncbn_fast_8xb32-300e_coco_20230102_135928-ee3abdc4.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_l_syncbn_fast_8xb32-300e_coco/rtmdet_l_syncbn_fast_8xb32-300e_coco_20230102_135928.log.json) | -| RTMDet-x | 640 | 52.8 | 94.86 | 141.67 | 3.10 | [config](./rtmdet_x_syncbn_fast_8xb32-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_x_syncbn_fast_8xb32-300e_coco/rtmdet_x_syncbn_fast_8xb32-300e_coco_20221231_100345-b85cd476.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_x_syncbn_fast_8xb32-300e_coco/rtmdet_x_syncbn_fast_8xb32-300e_coco_20221231_100345.log.json) | +| Model | size | Params(M) | FLOPS(G) | TRT-FP16-Latency(ms) | box AP | TTA box AP | Config | Download | +| :---------: | :--: | :-------: | :------: | :------------------: | :----: | :--------: | :----------------------------------------------------: | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | +| RTMDet-tiny | 640 | 4.8 | 8.1 | 0.98 | 41.0 | 42.7 | [config](./rtmdet_tiny_syncbn_fast_8xb32-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_tiny_syncbn_fast_8xb32-300e_coco/rtmdet_tiny_syncbn_fast_8xb32-300e_coco_20230102_140117-dbb1dc83.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_tiny_syncbn_fast_8xb32-300e_coco/rtmdet_tiny_syncbn_fast_8xb32-300e_coco_20230102_140117.log.json) | +| RTMDet-s | 640 | 8.89 | 14.8 | 1.22 | 44.6 | 45.8 | [config](./rtmdet_s_syncbn_fast_8xb32-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_s_syncbn_fast_8xb32-300e_coco/rtmdet_s_syncbn_fast_8xb32-300e_coco_20221230_182329-0a8c901a.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_s_syncbn_fast_8xb32-300e_coco/rtmdet_s_syncbn_fast_8xb32-300e_coco_20221230_182329.log.json) | +| RTMDet-m | 640 | 24.71 | 39.27 | 1.62 | 49.3 | 50.9 | [config](./rtmdet_m_syncbn_fast_8xb32-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_m_syncbn_fast_8xb32-300e_coco/rtmdet_m_syncbn_fast_8xb32-300e_coco_20230102_135952-40af4fe8.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_m_syncbn_fast_8xb32-300e_coco/rtmdet_m_syncbn_fast_8xb32-300e_coco_20230102_135952.log.json) | +| RTMDet-l | 640 | 52.3 | 80.23 | 2.44 | 51.4 | 53.1 | [config](./rtmdet_l_syncbn_fast_8xb32-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_l_syncbn_fast_8xb32-300e_coco/rtmdet_l_syncbn_fast_8xb32-300e_coco_20230102_135928-ee3abdc4.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_l_syncbn_fast_8xb32-300e_coco/rtmdet_l_syncbn_fast_8xb32-300e_coco_20230102_135928.log.json) | +| RTMDet-x | 640 | 94.86 | 141.67 | 3.10 | 52.8 | 54.2 | [config](./rtmdet_x_syncbn_fast_8xb32-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_x_syncbn_fast_8xb32-300e_coco/rtmdet_x_syncbn_fast_8xb32-300e_coco_20221231_100345-b85cd476.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_x_syncbn_fast_8xb32-300e_coco/rtmdet_x_syncbn_fast_8xb32-300e_coco_20221231_100345.log.json) | **Note**: 1. The inference speed of RTMDet is measured on an NVIDIA 3090 GPU with TensorRT 8.4.3, cuDNN 8.2.0, FP16, batch size=1, and without NMS. 2. For a fair comparison, the config of bbox postprocessing is changed to be consistent with YOLOv5/6/7 after [PR#9494](https://github.com/open-mmlab/mmdetection/pull/9494), bringing about 0.1~0.3% AP improvement. +3. `TTA` means that Test Time Augmentation. It's perform 3 multi-scaling transformations on the image, followed by 2 flipping transformations (flipping and not flipping). You only need to specify `--tta` when testing to enable. see [TTA](https://github.com/open-mmlab/mmyolo/blob/dev/docs/en/common_usage/tta.md) for details. ## Citation diff --git a/configs/rtmdet/rtmdet_l_syncbn_fast_8xb32-300e_coco.py b/configs/rtmdet/rtmdet_l_syncbn_fast_8xb32-300e_coco.py index 64ccc598..fecd0d31 100644 --- a/configs/rtmdet/rtmdet_l_syncbn_fast_8xb32-300e_coco.py +++ b/configs/rtmdet/rtmdet_l_syncbn_fast_8xb32-300e_coco.py @@ -1,4 +1,4 @@ -_base_ = '../_base_/default_runtime.py' +_base_ = ['../_base_/default_runtime.py', '../_base_/det_p5_tta.py'] # ========================Frequently modified parameters====================== # -----data related----- diff --git a/configs/yolov5/README.md b/configs/yolov5/README.md index 399de4f2..b22d880f 100644 --- a/configs/yolov5/README.md +++ b/configs/yolov5/README.md @@ -20,16 +20,16 @@ YOLOv5-l-P6 model structure ### COCO -| Backbone | Arch | size | SyncBN | AMP | Mem (GB) | box AP | Config | Download | -| :------: | :--: | :--: | :----: | :-: | :------: | :----: | :--------------------------------------------------------------------------------------------------------------------: | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | -| YOLOv5-n | P5 | 640 | Yes | Yes | 1.5 | 28.0 | [config](https://github.com/open-mmlab/mmyolo/tree/main/configs/yolov5/yolov5_n-v61_syncbn_fast_8xb16-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_n-v61_syncbn_fast_8xb16-300e_coco/yolov5_n-v61_syncbn_fast_8xb16-300e_coco_20220919_090739-b804c1ad.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_n-v61_syncbn_fast_8xb16-300e_coco/yolov5_n-v61_syncbn_fast_8xb16-300e_coco_20220919_090739.log.json) | -| YOLOv5-s | P5 | 640 | Yes | Yes | 2.7 | 37.7 | [config](https://github.com/open-mmlab/mmyolo/tree/main/configs/yolov5/yolov5_s-v61_syncbn_fast_8xb16-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_s-v61_syncbn_fast_8xb16-300e_coco/yolov5_s-v61_syncbn_fast_8xb16-300e_coco_20220918_084700-86e02187.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_s-v61_syncbn_fast_8xb16-300e_coco/yolov5_s-v61_syncbn_fast_8xb16-300e_coco_20220918_084700.log.json) | -| YOLOv5-m | P5 | 640 | Yes | Yes | 5.0 | 45.3 | [config](https://github.com/open-mmlab/mmyolo/tree/main/configs/yolov5/yolov5_m-v61_syncbn_fast_8xb16-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_m-v61_syncbn_fast_8xb16-300e_coco/yolov5_m-v61_syncbn_fast_8xb16-300e_coco_20220917_204944-516a710f.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_m-v61_syncbn_fast_8xb16-300e_coco/yolov5_m-v61_syncbn_fast_8xb16-300e_coco_20220917_204944.log.json) | -| YOLOv5-l | P5 | 640 | Yes | Yes | 8.1 | 48.8 | [config](https://github.com/open-mmlab/mmyolo/tree/main/configs/yolov5/yolov5_l-v61_syncbn_fast_8xb16-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_l-v61_syncbn_fast_8xb16-300e_coco/yolov5_l-v61_syncbn_fast_8xb16-300e_coco_20220917_031007-096ef0eb.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_l-v61_syncbn_fast_8xb16-300e_coco/yolov5_l-v61_syncbn_fast_8xb16-300e_coco_20220917_031007.log.json) | -| YOLOv5-n | P6 | 1280 | Yes | Yes | 5.8 | 35.9 | [config](https://github.com/open-mmlab/mmyolo/tree/main/configs/yolov5/yolov5_n-p6-v62_syncbn_fast_8xb16-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_n-p6-v62_syncbn_fast_8xb16-300e_coco/yolov5_n-p6-v62_syncbn_fast_8xb16-300e_coco_20221027_224705-d493c5f3.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_n-p6-v62_syncbn_fast_8xb16-300e_coco/yolov5_n-p6-v62_syncbn_fast_8xb16-300e_coco_20221027_224705.log.json) | -| YOLOv5-s | P6 | 1280 | Yes | Yes | 10.5 | 44.4 | [config](https://github.com/open-mmlab/mmyolo/tree/main/configs/yolov5/yolov5_s-p6-v62_syncbn_fast_8xb16-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_s-p6-v62_syncbn_fast_8xb16-300e_coco/yolov5_s-p6-v62_syncbn_fast_8xb16-300e_coco_20221027_215044-58865c19.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_s-p6-v62_syncbn_fast_8xb16-300e_coco/yolov5_s-p6-v62_syncbn_fast_8xb16-300e_coco_20221027_215044.log.json) | -| YOLOv5-m | P6 | 1280 | Yes | Yes | 19.1 | 51.3 | [config](https://github.com/open-mmlab/mmyolo/tree/main/configs/yolov5/yolov5_m-p6-v62_syncbn_fast_8xb16-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_m-p6-v62_syncbn_fast_8xb16-300e_coco/yolov5_m-p6-v62_syncbn_fast_8xb16-300e_coco_20221027_230453-49564d58.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_m-p6-v62_syncbn_fast_8xb16-300e_coco/yolov5_m-p6-v62_syncbn_fast_8xb16-300e_coco_20221027_230453.log.json) | -| YOLOv5-l | P6 | 1280 | Yes | Yes | 30.5 | 53.7 | [config](https://github.com/open-mmlab/mmyolo/tree/main/configs/yolov5/yolov5_l-p6-v62_syncbn_fast_8xb16-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_l-p6-v62_syncbn_fast_8xb16-300e_coco/yolov5_l-p6-v62_syncbn_fast_8xb16-300e_coco_20221027_234308-7a2ba6bf.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_l-p6-v62_syncbn_fast_8xb16-300e_coco/yolov5_l-p6-v62_syncbn_fast_8xb16-300e_coco_20221027_234308.log.json) | +| Backbone | Arch | size | SyncBN | AMP | Mem (GB) | box AP | TTA box AP | Config | Download | +| :------: | :--: | :--: | :----: | :-: | :------: | :----: | :--------: | :--------------------------------------------------------------------------------------------------------------------: | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | +| YOLOv5-n | P5 | 640 | Yes | Yes | 1.5 | 28.0 | 30.7 | [config](https://github.com/open-mmlab/mmyolo/tree/main/configs/yolov5/yolov5_n-v61_syncbn_fast_8xb16-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_n-v61_syncbn_fast_8xb16-300e_coco/yolov5_n-v61_syncbn_fast_8xb16-300e_coco_20220919_090739-b804c1ad.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_n-v61_syncbn_fast_8xb16-300e_coco/yolov5_n-v61_syncbn_fast_8xb16-300e_coco_20220919_090739.log.json) | +| YOLOv5-s | P5 | 640 | Yes | Yes | 2.7 | 37.7 | 40.2 | [config](https://github.com/open-mmlab/mmyolo/tree/main/configs/yolov5/yolov5_s-v61_syncbn_fast_8xb16-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_s-v61_syncbn_fast_8xb16-300e_coco/yolov5_s-v61_syncbn_fast_8xb16-300e_coco_20220918_084700-86e02187.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_s-v61_syncbn_fast_8xb16-300e_coco/yolov5_s-v61_syncbn_fast_8xb16-300e_coco_20220918_084700.log.json) | +| YOLOv5-m | P5 | 640 | Yes | Yes | 5.0 | 45.3 | 46.9 | [config](https://github.com/open-mmlab/mmyolo/tree/main/configs/yolov5/yolov5_m-v61_syncbn_fast_8xb16-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_m-v61_syncbn_fast_8xb16-300e_coco/yolov5_m-v61_syncbn_fast_8xb16-300e_coco_20220917_204944-516a710f.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_m-v61_syncbn_fast_8xb16-300e_coco/yolov5_m-v61_syncbn_fast_8xb16-300e_coco_20220917_204944.log.json) | +| YOLOv5-l | P5 | 640 | Yes | Yes | 8.1 | 48.8 | 49.9 | [config](https://github.com/open-mmlab/mmyolo/tree/main/configs/yolov5/yolov5_l-v61_syncbn_fast_8xb16-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_l-v61_syncbn_fast_8xb16-300e_coco/yolov5_l-v61_syncbn_fast_8xb16-300e_coco_20220917_031007-096ef0eb.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_l-v61_syncbn_fast_8xb16-300e_coco/yolov5_l-v61_syncbn_fast_8xb16-300e_coco_20220917_031007.log.json) | +| YOLOv5-n | P6 | 1280 | Yes | Yes | 5.8 | 35.9 | | [config](https://github.com/open-mmlab/mmyolo/tree/main/configs/yolov5/yolov5_n-p6-v62_syncbn_fast_8xb16-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_n-p6-v62_syncbn_fast_8xb16-300e_coco/yolov5_n-p6-v62_syncbn_fast_8xb16-300e_coco_20221027_224705-d493c5f3.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_n-p6-v62_syncbn_fast_8xb16-300e_coco/yolov5_n-p6-v62_syncbn_fast_8xb16-300e_coco_20221027_224705.log.json) | +| YOLOv5-s | P6 | 1280 | Yes | Yes | 10.5 | 44.4 | | [config](https://github.com/open-mmlab/mmyolo/tree/main/configs/yolov5/yolov5_s-p6-v62_syncbn_fast_8xb16-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_s-p6-v62_syncbn_fast_8xb16-300e_coco/yolov5_s-p6-v62_syncbn_fast_8xb16-300e_coco_20221027_215044-58865c19.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_s-p6-v62_syncbn_fast_8xb16-300e_coco/yolov5_s-p6-v62_syncbn_fast_8xb16-300e_coco_20221027_215044.log.json) | +| YOLOv5-m | P6 | 1280 | Yes | Yes | 19.1 | 51.3 | | [config](https://github.com/open-mmlab/mmyolo/tree/main/configs/yolov5/yolov5_m-p6-v62_syncbn_fast_8xb16-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_m-p6-v62_syncbn_fast_8xb16-300e_coco/yolov5_m-p6-v62_syncbn_fast_8xb16-300e_coco_20221027_230453-49564d58.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_m-p6-v62_syncbn_fast_8xb16-300e_coco/yolov5_m-p6-v62_syncbn_fast_8xb16-300e_coco_20221027_230453.log.json) | +| YOLOv5-l | P6 | 1280 | Yes | Yes | 30.5 | 53.7 | | [config](https://github.com/open-mmlab/mmyolo/tree/main/configs/yolov5/yolov5_l-p6-v62_syncbn_fast_8xb16-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_l-p6-v62_syncbn_fast_8xb16-300e_coco/yolov5_l-p6-v62_syncbn_fast_8xb16-300e_coco_20221027_234308-7a2ba6bf.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_l-p6-v62_syncbn_fast_8xb16-300e_coco/yolov5_l-p6-v62_syncbn_fast_8xb16-300e_coco_20221027_234308.log.json) | **Note**: In the official YOLOv5 code, the `random_perspective` data augmentation in COCO object detection task training uses mask annotation information, which leads to higher performance. Object detection should not use mask annotation, so only box annotation information is used in `MMYOLO`. We will use the mask annotation information in the instance segmentation task. See https://github.com/ultralytics/yolov5/issues/9917 for details. @@ -39,7 +39,7 @@ In the official YOLOv5 code, the `random_perspective` data augmentation in COCO 3. `SyncBN` means use SyncBN, `AMP` indicates training with mixed precision. 4. We use 8x A100 for training, and the single-GPU batch size is 16. This is different from the official code. 5. The performance is unstable and may fluctuate by about 0.4 mAP and the highest performance weight in `COCO` training in `YOLOv5` may not be the last epoch. -6. `balloon` means that this is a demo configuration. +6. `TTA` means that Test Time Augmentation. It's perform 3 multi-scaling transformations on the image, followed by 2 flipping transformations (flipping and not flipping). You only need to specify `--tta` when testing to enable. see [TTA](https://github.com/open-mmlab/mmyolo/blob/dev/docs/en/common_usage/tta.md) for details. ### VOC diff --git a/configs/yolov5/voc/yolov5_s-v61_fast_1xb64-50e_voc.py b/configs/yolov5/voc/yolov5_s-v61_fast_1xb64-50e_voc.py index 54f6cdeb..9585b51f 100644 --- a/configs/yolov5/voc/yolov5_s-v61_fast_1xb64-50e_voc.py +++ b/configs/yolov5/voc/yolov5_s-v61_fast_1xb64-50e_voc.py @@ -29,6 +29,8 @@ num_det_layers = 3 load_from = 'https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_s-v61_syncbn_fast_8xb16-300e_coco/yolov5_s-v61_syncbn_fast_8xb16-300e_coco_20220918_084700-86e02187.pth' # noqa +tta_img_scales = [img_scale, (416, 416), (640, 640)] + # Hyperparameter reference from: # https://github.com/ultralytics/yolov5/blob/master/data/hyps/hyp.VOC.yaml model = dict( @@ -232,3 +234,37 @@ val_evaluator = dict( test_evaluator = val_evaluator train_cfg = dict(max_epochs=max_epochs) + +# Config for Test Time Augmentation. (TTA) +_multiscale_resize_transforms = [ + dict( + type='Compose', + transforms=[ + dict(type='YOLOv5KeepRatioResize', scale=s), + dict( + type='LetterResize', + scale=s, + allow_scale_up=False, + pad_val=dict(img=114)) + ]) for s in tta_img_scales +] + +tta_pipeline = [ + dict(type='LoadImageFromFile', file_client_args=_base_.file_client_args), + dict( + type='TestTimeAug', + transforms=[ + _multiscale_resize_transforms, + [ + dict(type='mmdet.RandomFlip', prob=1.), + dict(type='mmdet.RandomFlip', prob=0.) + ], [dict(type='mmdet.LoadAnnotations', with_bbox=True)], + [ + dict( + type='mmdet.PackDetInputs', + meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', + 'scale_factor', 'pad_param', 'flip', + 'flip_direction')) + ] + ]) +] diff --git a/configs/yolov5/yolov5_s-p6-v62_syncbn_fast_8xb16-300e_coco.py b/configs/yolov5/yolov5_s-p6-v62_syncbn_fast_8xb16-300e_coco.py index 3d14484f..0af1fcb8 100644 --- a/configs/yolov5/yolov5_s-p6-v62_syncbn_fast_8xb16-300e_coco.py +++ b/configs/yolov5/yolov5_s-p6-v62_syncbn_fast_8xb16-300e_coco.py @@ -26,6 +26,7 @@ loss_obj_weight = 1.0 obj_level_weights = [4.0, 1.0, 0.25, 0.06] affine_scale = 0.5 # YOLOv5RandomAffine scaling ratio +tta_img_scales = [(1280, 1280), (1024, 1024), (1536, 1536)] # =======================Unmodified in most cases================== model = dict( backbone=dict(arch='P6', out_indices=(2, 3, 4, 5)), @@ -101,3 +102,37 @@ val_dataloader = dict( dataset=dict(pipeline=test_pipeline, batch_shapes_cfg=batch_shapes_cfg)) test_dataloader = val_dataloader + +# Config for Test Time Augmentation. (TTA) +_multiscale_resize_transforms = [ + dict( + type='Compose', + transforms=[ + dict(type='YOLOv5KeepRatioResize', scale=s), + dict( + type='LetterResize', + scale=s, + allow_scale_up=False, + pad_val=dict(img=114)) + ]) for s in tta_img_scales +] + +tta_pipeline = [ + dict(type='LoadImageFromFile', file_client_args=_base_.file_client_args), + dict( + type='TestTimeAug', + transforms=[ + _multiscale_resize_transforms, + [ + dict(type='mmdet.RandomFlip', prob=1.), + dict(type='mmdet.RandomFlip', prob=0.) + ], [dict(type='mmdet.LoadAnnotations', with_bbox=True)], + [ + dict( + type='mmdet.PackDetInputs', + meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', + 'scale_factor', 'pad_param', 'flip', + 'flip_direction')) + ] + ]) +] diff --git a/configs/yolov5/yolov5_s-v61_syncbn_8xb16-300e_coco.py b/configs/yolov5/yolov5_s-v61_syncbn_8xb16-300e_coco.py index 77070b5d..30503413 100644 --- a/configs/yolov5/yolov5_s-v61_syncbn_8xb16-300e_coco.py +++ b/configs/yolov5/yolov5_s-v61_syncbn_8xb16-300e_coco.py @@ -1,4 +1,4 @@ -_base_ = '../_base_/default_runtime.py' +_base_ = ['../_base_/default_runtime.py', '../_base_/det_p5_tta.py'] # ========================Frequently modified parameters====================== # -----data related----- diff --git a/configs/yolov6/yolov6_s_syncbn_fast_8xb32-400e_coco.py b/configs/yolov6/yolov6_s_syncbn_fast_8xb32-400e_coco.py index bda6562a..0b5fa560 100644 --- a/configs/yolov6/yolov6_s_syncbn_fast_8xb32-400e_coco.py +++ b/configs/yolov6/yolov6_s_syncbn_fast_8xb32-400e_coco.py @@ -1,4 +1,4 @@ -_base_ = '../_base_/default_runtime.py' +_base_ = ['../_base_/default_runtime.py', '../_base_/det_p5_tta.py'] # ======================= Frequently modified parameters ===================== # -----data related----- diff --git a/configs/yolov7/yolov7_l_syncbn_fast_8x16b-300e_coco.py b/configs/yolov7/yolov7_l_syncbn_fast_8x16b-300e_coco.py index 1247774e..6712002c 100644 --- a/configs/yolov7/yolov7_l_syncbn_fast_8x16b-300e_coco.py +++ b/configs/yolov7/yolov7_l_syncbn_fast_8x16b-300e_coco.py @@ -1,4 +1,4 @@ -_base_ = '../_base_/default_runtime.py' +_base_ = ['../_base_/default_runtime.py', '../_base_/det_p5_tta.py'] # ========================Frequently modified parameters====================== # -----data related----- diff --git a/configs/yolov7/yolov7_w-p6_syncbn_fast_8x16b-300e_coco.py b/configs/yolov7/yolov7_w-p6_syncbn_fast_8x16b-300e_coco.py index 17cb84da..11164d21 100644 --- a/configs/yolov7/yolov7_w-p6_syncbn_fast_8x16b-300e_coco.py +++ b/configs/yolov7/yolov7_w-p6_syncbn_fast_8x16b-300e_coco.py @@ -10,6 +10,7 @@ batch_shapes_cfg = dict( img_size=img_scale[ 0], # The image scale of padding should be divided by pad_size_divisor size_divisor=64) # Additional paddings for pixel scale +tta_img_scales = [(1280, 1280), (1024, 1024), (1536, 1536)] # -----model related----- # Basic size of multi-scale prior box @@ -35,8 +36,16 @@ mixup_beta = 8.0 # YOLOv5MixUp loss_cls_weight = 0.3 loss_bbox_weight = 0.05 loss_obj_weight = 0.7 +obj_level_weights = [4.0, 1.0, 0.25, 0.06] +simota_candidate_topk = 20 + +# The only difference between P6 and P5 in terms of +# hyperparameters is lr_factor +lr_factor = 0.2 # ===============================Unmodified in most cases==================== +pre_transform = _base_.pre_transform + model = dict( backbone=dict(arch='W', out_indices=(2, 3, 4, 5)), neck=dict( @@ -52,16 +61,14 @@ model = dict( norm_cfg=norm_cfg, act_cfg=dict(type='SiLU', inplace=True)), prior_generator=dict(base_sizes=anchors, strides=strides), - simota_candidate_topk=20, # note + simota_candidate_topk=simota_candidate_topk, # note # scaled based on number of detection layers loss_cls=dict(loss_weight=loss_cls_weight * (num_classes / 80 * 3 / num_det_layers)), loss_bbox=dict(loss_weight=loss_bbox_weight * (3 / num_det_layers)), loss_obj=dict(loss_weight=loss_obj_weight * ((img_scale[0] / 640)**2 * 3 / num_det_layers)), - obj_level_weights=[4.0, 1.0, 0.25, 0.06])) - -pre_transform = _base_.pre_transform + obj_level_weights=obj_level_weights)) mosiac4_pipeline = [ dict( @@ -138,6 +145,38 @@ val_dataloader = dict( dataset=dict(pipeline=test_pipeline, batch_shapes_cfg=batch_shapes_cfg)) test_dataloader = val_dataloader -# The only difference between P6 and P5 in terms of -# hyperparameters is lr_factor -default_hooks = dict(param_scheduler=dict(lr_factor=0.2)) +default_hooks = dict(param_scheduler=dict(lr_factor=lr_factor)) + +# Config for Test Time Augmentation. (TTA) +_multiscale_resize_transforms = [ + dict( + type='Compose', + transforms=[ + dict(type='YOLOv5KeepRatioResize', scale=s), + dict( + type='LetterResize', + scale=s, + allow_scale_up=False, + pad_val=dict(img=114)) + ]) for s in tta_img_scales +] + +tta_pipeline = [ + dict(type='LoadImageFromFile', file_client_args=_base_.file_client_args), + dict( + type='TestTimeAug', + transforms=[ + _multiscale_resize_transforms, + [ + dict(type='mmdet.RandomFlip', prob=1.), + dict(type='mmdet.RandomFlip', prob=0.) + ], [dict(type='mmdet.LoadAnnotations', with_bbox=True)], + [ + dict( + type='mmdet.PackDetInputs', + meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', + 'scale_factor', 'pad_param', 'flip', + 'flip_direction')) + ] + ]) +] diff --git a/configs/yolov8/README.md b/configs/yolov8/README.md index 47075b6c..a284e237 100644 --- a/configs/yolov8/README.md +++ b/configs/yolov8/README.md @@ -20,18 +20,18 @@ YOLOv8-P5 model structure ### COCO -| Backbone | Arch | size | Mask Refine | SyncBN | AMP | Mem (GB) | box AP | Config | Download | -| :------: | :--: | :--: | :---------: | :----: | :-: | :------: | :---------: | :---------------------------------------------------------------------: | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | -| YOLOv8-n | P5 | 640 | No | Yes | Yes | 2.8 | 37.2 | [config](../yolov8/yolov8_n_syncbn_fast_8xb16-500e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_n_syncbn_fast_8xb16-500e_coco/yolov8_n_syncbn_fast_8xb16-500e_coco_20230114_131804-88c11cdb.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_n_syncbn_fast_8xb16-500e_coco/yolov8_n_syncbn_fast_8xb16-500e_coco_20230114_131804.log.json) | -| YOLOv8-n | P5 | 640 | Yes | Yes | Yes | 2.5 | 37.4 (+0.2) | [config](../yolov8/yolov8_n_mask-refine_syncbn_fast_8xb16-500e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_n_mask-refine_syncbn_fast_8xb16-500e_coco/yolov8_n_mask-refine_syncbn_fast_8xb16-500e_coco_20230216_101206-b975b1cd.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_n_mask-refine_syncbn_fast_8xb16-500e_coco/yolov8_n_mask-refine_syncbn_fast_8xb16-500e_coco_20230216_101206.log.json) | -| YOLOv8-s | P5 | 640 | No | Yes | Yes | 4.0 | 44.2 | [config](../yolov8/yolov8_s_syncbn_fast_8xb16-500e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_s_syncbn_fast_8xb16-500e_coco/yolov8_s_syncbn_fast_8xb16-500e_coco_20230117_180101-5aa5f0f1.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_s_syncbn_fast_8xb16-500e_coco/yolov8_s_syncbn_fast_8xb16-500e_coco_20230117_180101.log.json) | -| YOLOv8-s | P5 | 640 | Yes | Yes | Yes | 4.0 | 45.1 (+0.9) | [config](../yolov8/yolov8_s_mask-refine_syncbn_fast_8xb16-500e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_s_mask-refine_syncbn_fast_8xb16-500e_coco/yolov8_s_mask-refine_syncbn_fast_8xb16-500e_coco_20230216_095938-ce3c1b3f.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_s_mask-refine_syncbn_fast_8xb16-500e_coco/yolov8_s_mask-refine_syncbn_fast_8xb16-500e_coco_20230216_095938.log.json) | -| YOLOv8-m | P5 | 640 | No | Yes | Yes | 7.2 | 49.8 | [config](../yolov8/yolov8_m_syncbn_fast_8xb16-500e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_m_syncbn_fast_8xb16-500e_coco/yolov8_m_syncbn_fast_8xb16-500e_coco_20230115_192200-c22e560a.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_m_syncbn_fast_8xb16-500e_coco/yolov8_m_syncbn_fast_8xb16-500e_coco_20230115_192200.log.json) | -| YOLOv8-m | P5 | 640 | Yes | Yes | Yes | 7.0 | 50.6 (+0.8) | [config](../yolov8/yolov8_m_mask-refine_syncbn_fast_8xb16-500e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_m_mask-refine_syncbn_fast_8xb16-500e_coco/yolov8_m_mask-refine_syncbn_fast_8xb16-500e_coco_20230216_223400-f40abfcd.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_m_mask-refine_syncbn_fast_8xb16-500e_coco/yolov8_m_mask-refine_syncbn_fast_8xb16-500e_coco_20230216_223400.log.json) | -| YOLOv8-l | P5 | 640 | No | Yes | Yes | 9.8 | 52.1 | [config](../yolov8/yolov8_l_syncbn_fast_8xb16-500e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_l_syncbn_fast_8xb16-500e_coco/yolov8_l_syncbn_fast_8xb16-500e_coco_20230217_182526-189611b6.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_l_syncbn_fast_8xb16-500e_coco/yolov8_l_syncbn_fast_8xb16-500e_coco_20230217_182526.log.json) | -| YOLOv8-l | P5 | 640 | Yes | Yes | Yes | 9.1 | 53.0 (+0.9) | [config](../yolov8/yolov8_l_mask-refine_syncbn_fast_8xb16-500e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_l_mask-refine_syncbn_fast_8xb16-500e_coco/yolov8_l_mask-refine_syncbn_fast_8xb16-500e_coco_20230217_120100-5881dec4.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_l_mask-refine_syncbn_fast_8xb16-500e_coco/yolov8_l_mask-refine_syncbn_fast_8xb16-500e_coco_20230217_120100.log.json) | -| YOLOv8-x | P5 | 640 | No | Yes | Yes | 12.2 | 52.7 | [config](../yolov8/yolov8_x_syncbn_fast_8xb16-500e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_x_syncbn_fast_8xb16-500e_coco/yolov8_x_syncbn_fast_8xb16-500e_coco_20230218_023338-5674673c.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_x_syncbn_fast_8xb16-500e_coco/yolov8_x_syncbn_fast_8xb16-500e_coco_20230218_023338.log.json) | -| YOLOv8-x | P5 | 640 | Yes | Yes | Yes | 12.4 | 54.0 (+1.3) | [config](../yolov8/yolov8_x_mask-refine_syncbn_fast_8xb16-500e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_x_mask-refine_syncbn_fast_8xb16-500e_coco/yolov8_x_mask-refine_syncbn_fast_8xb16-500e_coco_20230217_120411-079ca8d1.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_x_mask-refine_syncbn_fast_8xb16-500e_coco/yolov8_x_mask-refine_syncbn_fast_8xb16-500e_coco_20230217_120411.log.json) | +| Backbone | Arch | size | Mask Refine | SyncBN | AMP | Mem (GB) | box AP | TTA box AP | Config | Download | +| :------: | :--: | :--: | :---------: | :----: | :-: | :------: | :---------: | :--------: | :---------------------------------------------------------------------: | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | +| YOLOv8-n | P5 | 640 | No | Yes | Yes | 2.8 | 37.2 | | [config](../yolov8/yolov8_n_syncbn_fast_8xb16-500e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_n_syncbn_fast_8xb16-500e_coco/yolov8_n_syncbn_fast_8xb16-500e_coco_20230114_131804-88c11cdb.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_n_syncbn_fast_8xb16-500e_coco/yolov8_n_syncbn_fast_8xb16-500e_coco_20230114_131804.log.json) | +| YOLOv8-n | P5 | 640 | Yes | Yes | Yes | 2.5 | 37.4 (+0.2) | 39.9 | [config](../yolov8/yolov8_n_mask-refine_syncbn_fast_8xb16-500e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_n_mask-refine_syncbn_fast_8xb16-500e_coco/yolov8_n_mask-refine_syncbn_fast_8xb16-500e_coco_20230216_101206-b975b1cd.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_n_mask-refine_syncbn_fast_8xb16-500e_coco/yolov8_n_mask-refine_syncbn_fast_8xb16-500e_coco_20230216_101206.log.json) | +| YOLOv8-s | P5 | 640 | No | Yes | Yes | 4.0 | 44.2 | | [config](../yolov8/yolov8_s_syncbn_fast_8xb16-500e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_s_syncbn_fast_8xb16-500e_coco/yolov8_s_syncbn_fast_8xb16-500e_coco_20230117_180101-5aa5f0f1.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_s_syncbn_fast_8xb16-500e_coco/yolov8_s_syncbn_fast_8xb16-500e_coco_20230117_180101.log.json) | +| YOLOv8-s | P5 | 640 | Yes | Yes | Yes | 4.0 | 45.1 (+0.9) | 46.8 | [config](../yolov8/yolov8_s_mask-refine_syncbn_fast_8xb16-500e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_s_mask-refine_syncbn_fast_8xb16-500e_coco/yolov8_s_mask-refine_syncbn_fast_8xb16-500e_coco_20230216_095938-ce3c1b3f.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_s_mask-refine_syncbn_fast_8xb16-500e_coco/yolov8_s_mask-refine_syncbn_fast_8xb16-500e_coco_20230216_095938.log.json) | +| YOLOv8-m | P5 | 640 | No | Yes | Yes | 7.2 | 49.8 | | [config](../yolov8/yolov8_m_syncbn_fast_8xb16-500e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_m_syncbn_fast_8xb16-500e_coco/yolov8_m_syncbn_fast_8xb16-500e_coco_20230115_192200-c22e560a.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_m_syncbn_fast_8xb16-500e_coco/yolov8_m_syncbn_fast_8xb16-500e_coco_20230115_192200.log.json) | +| YOLOv8-m | P5 | 640 | Yes | Yes | Yes | 7.0 | 50.6 (+0.8) | 52.3 | [config](../yolov8/yolov8_m_mask-refine_syncbn_fast_8xb16-500e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_m_mask-refine_syncbn_fast_8xb16-500e_coco/yolov8_m_mask-refine_syncbn_fast_8xb16-500e_coco_20230216_223400-f40abfcd.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_m_mask-refine_syncbn_fast_8xb16-500e_coco/yolov8_m_mask-refine_syncbn_fast_8xb16-500e_coco_20230216_223400.log.json) | +| YOLOv8-l | P5 | 640 | No | Yes | Yes | 9.8 | 52.1 | | [config](../yolov8/yolov8_l_syncbn_fast_8xb16-500e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_l_syncbn_fast_8xb16-500e_coco/yolov8_l_syncbn_fast_8xb16-500e_coco_20230217_182526-189611b6.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_l_syncbn_fast_8xb16-500e_coco/yolov8_l_syncbn_fast_8xb16-500e_coco_20230217_182526.log.json) | +| YOLOv8-l | P5 | 640 | Yes | Yes | Yes | 9.1 | 53.0 (+0.9) | 54.4 | [config](../yolov8/yolov8_l_mask-refine_syncbn_fast_8xb16-500e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_l_mask-refine_syncbn_fast_8xb16-500e_coco/yolov8_l_mask-refine_syncbn_fast_8xb16-500e_coco_20230217_120100-5881dec4.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_l_mask-refine_syncbn_fast_8xb16-500e_coco/yolov8_l_mask-refine_syncbn_fast_8xb16-500e_coco_20230217_120100.log.json) | +| YOLOv8-x | P5 | 640 | No | Yes | Yes | 12.2 | 52.7 | | [config](../yolov8/yolov8_x_syncbn_fast_8xb16-500e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_x_syncbn_fast_8xb16-500e_coco/yolov8_x_syncbn_fast_8xb16-500e_coco_20230218_023338-5674673c.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_x_syncbn_fast_8xb16-500e_coco/yolov8_x_syncbn_fast_8xb16-500e_coco_20230218_023338.log.json) | +| YOLOv8-x | P5 | 640 | Yes | Yes | Yes | 12.4 | 54.0 (+1.3) | 55.0 | [config](../yolov8/yolov8_x_mask-refine_syncbn_fast_8xb16-500e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_x_mask-refine_syncbn_fast_8xb16-500e_coco/yolov8_x_mask-refine_syncbn_fast_8xb16-500e_coco_20230217_120411-079ca8d1.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_x_mask-refine_syncbn_fast_8xb16-500e_coco/yolov8_x_mask-refine_syncbn_fast_8xb16-500e_coco_20230217_120411.log.json) | **Note** @@ -40,5 +40,6 @@ YOLOv8-P5 model structure 3. We provide [scripts](https://github.com/open-mmlab/mmyolo/tree/dev/tools/model_converters/yolov8_to_mmyolo.py) to convert official weights to MMYOLO. 4. `SyncBN` means using SyncBN, `AMP` indicates training with mixed precision. 5. The performance of `Mask Refine` training is for the weight performance officially released by YOLOv8. `Mask Refine` means refining bbox by mask while loading annotations and transforming after `YOLOv5RandomAffine`, and the L and X models use `Copy Paste`. +6. `TTA` means that Test Time Augmentation. It's perform 3 multi-scaling transformations on the image, followed by 2 flipping transformations (flipping and not flipping). You only need to specify `--tta` when testing to enable. see [TTA](https://github.com/open-mmlab/mmyolo/blob/dev/docs/en/common_usage/tta.md) for details. ## Citation diff --git a/configs/yolov8/yolov8_s_syncbn_fast_8xb16-500e_coco.py b/configs/yolov8/yolov8_s_syncbn_fast_8xb16-500e_coco.py index 58441a99..adb9c7fe 100644 --- a/configs/yolov8/yolov8_s_syncbn_fast_8xb16-500e_coco.py +++ b/configs/yolov8/yolov8_s_syncbn_fast_8xb16-500e_coco.py @@ -1,4 +1,4 @@ -_base_ = '../_base_/default_runtime.py' +_base_ = ['../_base_/default_runtime.py', '../_base_/det_p5_tta.py'] # ========================Frequently modified parameters====================== # -----data related----- diff --git a/configs/yolox/yolox_p5_tta.py b/configs/yolox/yolox_p5_tta.py new file mode 100644 index 00000000..3a5b4652 --- /dev/null +++ b/configs/yolox/yolox_p5_tta.py @@ -0,0 +1,55 @@ +# TODO: Need to solve the problem of multiple file_client_args parameters +# _file_client_args = dict( +# backend='petrel', +# path_mapping=dict({ +# './data/': 's3://openmmlab/datasets/detection/', +# 'data/': 's3://openmmlab/datasets/detection/' +# })) +_file_client_args = dict(backend='disk') + +tta_model = dict( + type='mmdet.DetTTAModel', + tta_cfg=dict(nms=dict(type='nms', iou_threshold=0.65), max_per_img=300)) + +img_scales = [(640, 640), (320, 320), (960, 960)] + +# LoadImageFromFile +# / | \ +# Resize Resize Resize # noqa +# / \ / \ / \ +# RandomFlip RandomFlip RandomFlip RandomFlip RandomFlip RandomFlip # noqa +# | | | | | | +# LoadAnn LoadAnn LoadAnn LoadAnn LoadAnn LoadAnn +# | | | | | | +# PackDetIn PackDetIn PackDetIn PackDetIn PackDetIn PackDetIn # noqa + +tta_pipeline = [ + dict(type='LoadImageFromFile', file_client_args=_file_client_args), + dict( + type='TestTimeAug', + transforms=[ + [ + dict(type='mmdet.Resize', scale=s, keep_ratio=True) + for s in img_scales + ], + [ + # ``RandomFlip`` must be placed before ``Pad``, otherwise + # bounding box coordinates after flipping cannot be + # recovered correctly. + dict(type='mmdet.RandomFlip', prob=1.), + dict(type='mmdet.RandomFlip', prob=0.) + ], + [ + dict( + type='mmdet.Pad', + pad_to_square=True, + pad_val=dict(img=(114.0, 114.0, 114.0))), + ], + [ + dict( + type='mmdet.PackDetInputs', + meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', + 'scale_factor', 'flip', 'flip_direction')) + ] + ]) +] diff --git a/configs/yolox/yolox_s_fast_8xb8-300e_coco.py b/configs/yolox/yolox_s_fast_8xb8-300e_coco.py index b51a1087..e751b0d6 100644 --- a/configs/yolox/yolox_s_fast_8xb8-300e_coco.py +++ b/configs/yolox/yolox_s_fast_8xb8-300e_coco.py @@ -1,4 +1,4 @@ -_base_ = '../_base_/default_runtime.py' +_base_ = ['../_base_/default_runtime.py', 'yolox_p5_tta.py'] # ========================Frequently modified parameters====================== # -----data related----- diff --git a/configs/yolox/yolox_tiny_fast_8xb8-300e_coco.py b/configs/yolox/yolox_tiny_fast_8xb8-300e_coco.py index e8c822e0..14187342 100644 --- a/configs/yolox/yolox_tiny_fast_8xb8-300e_coco.py +++ b/configs/yolox/yolox_tiny_fast_8xb8-300e_coco.py @@ -9,6 +9,9 @@ scaling_ratio_range = (0.5, 1.5) img_scale = _base_.img_scale pre_transform = _base_.pre_transform +test_img_scale = (416, 416) +tta_img_scales = [test_img_scale, (320, 320), (640, 640)] + # model settings model = dict( data_preprocessor=dict(batch_augments=[ @@ -48,7 +51,7 @@ train_pipeline_stage1 = [ test_pipeline = [ dict(type='LoadImageFromFile', file_client_args=_base_.file_client_args), - dict(type='mmdet.Resize', scale=(416, 416), keep_ratio=True), # note + dict(type='mmdet.Resize', scale=test_img_scale, keep_ratio=True), # note dict( type='mmdet.Pad', pad_to_square=True, @@ -63,3 +66,35 @@ test_pipeline = [ train_dataloader = dict(dataset=dict(pipeline=train_pipeline_stage1)) val_dataloader = dict(dataset=dict(pipeline=test_pipeline)) test_dataloader = val_dataloader + +# Config for Test Time Augmentation. (TTA) +tta_pipeline = [ + dict(type='LoadImageFromFile', file_client_args=_base_.file_client_args), + dict( + type='TestTimeAug', + transforms=[ + [ + dict(type='mmdet.Resize', scale=s, keep_ratio=True) + for s in tta_img_scales + ], + [ + # ``RandomFlip`` must be placed before ``Pad``, otherwise + # bounding box coordinates after flipping cannot be + # recovered correctly. + dict(type='mmdet.RandomFlip', prob=1.), + dict(type='mmdet.RandomFlip', prob=0.) + ], + [ + dict( + type='mmdet.Pad', + pad_to_square=True, + pad_val=dict(img=(114.0, 114.0, 114.0))), + ], + [ + dict( + type='mmdet.PackDetInputs', + meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', + 'scale_factor', 'flip', 'flip_direction')) + ] + ]) +] diff --git a/docs/en/common_usage/tta.md b/docs/en/common_usage/tta.md new file mode 100644 index 00000000..517d34b8 --- /dev/null +++ b/docs/en/common_usage/tta.md @@ -0,0 +1,87 @@ +# TTA Related Notes + +## Test Time Augmentation (TTA) + +MMYOLO support for TTA in v0.5.0+, so that users can specify the `-tta` parameter to enable it during evaluation. Take `YOLOv5-s` as an example, its single GPU TTA test command is as follows + +```shell +python tools/test.py configs/yolov5/yolov5_n-v61_syncbn_fast_8xb16-300e_coco.py https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_n-v61_syncbn_fast_8xb16-300e_coco/yolov5_n-v61_syncbn_fast_8xb16-300e_coco_20220919_090739-b804c1ad.pth --tta +``` + +For TTA to work properly, you must ensure that the variables `tta_model` and `tta_pipeline` are present in the configuration, see [det_p5_tta.py](https://github.com/open-mmlab/mmyolo/blob/dev/configs/_base_/det_p5_tta.py) for details. + +The default TTA in MMYOLO performs 3 multi-scale enhancements, followed by 2 horizontal flip enhancements, for a total of 6 parallel pipelines. take `YOLOv5-s` as an example, its TTA configuration is as follows + +```python +img_scales = [(640, 640), (320, 320), (960, 960)] + +_multiscale_resize_transforms = [ + dict( + type='Compose', + transforms=[ + dict(type='YOLOv5KeepRatioResize', scale=s), + dict( + type='LetterResize', + scale=s, + allow_scale_up=False, + pad_val=dict(img=114)) + ]) for s in img_scales +] + +tta_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='TestTimeAug', + transforms=[ + _multiscale_resize_transforms, + [ + dict(type='mmdet.RandomFlip', prob=1.), + dict(type='mmdet.RandomFlip', prob=0.) + ], [dict(type='mmdet.LoadAnnotations', with_bbox=True)], + [ + dict( + type='mmdet.PackDetInputs', + meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', + 'scale_factor', 'pad_param', 'flip', + 'flip_direction')) + ] + ]) +] +``` + +The schematic diagram is shown below. + +```text + LoadImageFromFile + / | \ +(RatioResize,LetterResize) (RatioResize,LetterResize) (RatioResize,LetterResize) + / \ / \ / \ + RandomFlip RandomFlip RandomFlip RandomFlip RandomFlip RandomFlip + | | | | | | + LoadAnn LoadAnn LoadAnn LoadAnn LoadAnn LoadAnn + | | | | | | + PackDetIn PackDetIn PackDetIn PackDetIn PackDetIn PackDetIn +``` + +You can modify `img_scales` to support different multi-scale enhancements, or you can insert a new pipeline to implement custom TTA requirements. Assuming you only want to do horizontal flip enhancements, the configuration should be modified as follows. + +```python +tta_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='TestTimeAug', + transforms=[ + [ + dict(type='mmdet.RandomFlip', prob=1.), + dict(type='mmdet.RandomFlip', prob=0.) + ], [dict(type='mmdet.LoadAnnotations', with_bbox=True)], + [ + dict( + type='mmdet.PackDetInputs', + meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', + 'scale_factor', 'pad_param', 'flip', + 'flip_direction')) + ] + ]) +] +``` diff --git a/docs/en/index.rst b/docs/en/index.rst index 92837c47..004bf0e7 100644 --- a/docs/en/index.rst +++ b/docs/en/index.rst @@ -37,6 +37,7 @@ You can switch between Chinese and English documents in the top-right corner of common_usage/resume_training.md common_usage/syncbn.md common_usage/amp_training.md + common_usage/tta.md common_usage/plugins.md common_usage/freeze_layers.md common_usage/output_predictions.md diff --git a/docs/zh_cn/common_usage/tta.md b/docs/zh_cn/common_usage/tta.md new file mode 100644 index 00000000..9983665f --- /dev/null +++ b/docs/zh_cn/common_usage/tta.md @@ -0,0 +1,87 @@ +# 测试时增强相关说明 + +## 测试时增强 TTA + +MMYOLO 在 v0.5.0+ 版本中增加对 TTA 的支持,用户可以在进行评估时候指定 `--tta` 参数使能。 以 `YOLOv5-s` 为例,其单卡 TTA 测试命令为: + +```shell +python tools/test.py configs/yolov5/yolov5_n-v61_syncbn_fast_8xb16-300e_coco.py https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_n-v61_syncbn_fast_8xb16-300e_coco/yolov5_n-v61_syncbn_fast_8xb16-300e_coco_20220919_090739-b804c1ad.pth --tta +``` + +TTA 功能的正常运行必须确保配置中存在 `tta_model` 和 `tta_pipeline` 两个变量,详情可以参考 [det_p5_tta.py](https://github.com/open-mmlab/mmyolo/blob/dev/configs/_base_/det_p5_tta.py)。 + +MMYOLO 中默认的 TTA 会先执行 3 个多尺度增强,然后再增强两个水平翻转增强,一共 6 个并行的 pipeline。以 `YOLOv5-s` 为例,其 TTA 配置为: + +```python +img_scales = [(640, 640), (320, 320), (960, 960)] + +_multiscale_resize_transforms = [ + dict( + type='Compose', + transforms=[ + dict(type='YOLOv5KeepRatioResize', scale=s), + dict( + type='LetterResize', + scale=s, + allow_scale_up=False, + pad_val=dict(img=114)) + ]) for s in img_scales +] + +tta_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='TestTimeAug', + transforms=[ + _multiscale_resize_transforms, + [ + dict(type='mmdet.RandomFlip', prob=1.), + dict(type='mmdet.RandomFlip', prob=0.) + ], [dict(type='mmdet.LoadAnnotations', with_bbox=True)], + [ + dict( + type='mmdet.PackDetInputs', + meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', + 'scale_factor', 'pad_param', 'flip', + 'flip_direction')) + ] + ]) +] +``` + +其示意图如下所示: + +```text + LoadImageFromFile + / | \ +(RatioResize,LetterResize) (RatioResize,LetterResize) (RatioResize,LetterResize) + / \ / \ / \ + RandomFlip RandomFlip RandomFlip RandomFlip RandomFlip RandomFlip + | | | | | | + LoadAnn LoadAnn LoadAnn LoadAnn LoadAnn LoadAnn + | | | | | | + PackDetIn PackDetIn PackDetIn PackDetIn PackDetIn PackDetIn +``` + +你可以修改 `img_scales` 来支持不同的多尺度增强,也可以插入新的 pipeline 从而实现自定义 TTA 需求。 假设你只想进行水平翻转增强,则配置应该修改为如下: + +```python +tta_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='TestTimeAug', + transforms=[ + [ + dict(type='mmdet.RandomFlip', prob=1.), + dict(type='mmdet.RandomFlip', prob=0.) + ], [dict(type='mmdet.LoadAnnotations', with_bbox=True)], + [ + dict( + type='mmdet.PackDetInputs', + meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', + 'scale_factor', 'pad_param', 'flip', + 'flip_direction')) + ] + ]) +] +``` diff --git a/docs/zh_cn/index.rst b/docs/zh_cn/index.rst index 5026c30e..80d7cbe1 100644 --- a/docs/zh_cn/index.rst +++ b/docs/zh_cn/index.rst @@ -37,6 +37,7 @@ common_usage/resume_training.md common_usage/syncbn.md common_usage/amp_training.md + common_usage/tta.md common_usage/plugins.md common_usage/freeze_layers.md common_usage/output_predictions.md diff --git a/mmyolo/datasets/transforms/transforms.py b/mmyolo/datasets/transforms/transforms.py index 926af7cc..a58084f3 100644 --- a/mmyolo/datasets/transforms/transforms.py +++ b/mmyolo/datasets/transforms/transforms.py @@ -7,7 +7,7 @@ import cv2 import mmcv import numpy as np import torch -from mmcv.transforms import BaseTransform +from mmcv.transforms import BaseTransform, Compose from mmcv.transforms.utils import cache_randomness from mmdet.datasets.transforms import LoadAnnotations as MMDET_LoadAnnotations from mmdet.datasets.transforms import Resize as MMDET_Resize @@ -18,6 +18,9 @@ from numpy import random from mmyolo.registry import TRANSFORMS +# TODO: Waiting for MMCV support +TRANSFORMS.register_module(module=Compose, force=True) + @TRANSFORMS.register_module() class YOLOv5KeepRatioResize(MMDET_Resize): diff --git a/tools/misc/print_config.py b/tools/misc/print_config.py new file mode 100644 index 00000000..2c2efe33 --- /dev/null +++ b/tools/misc/print_config.py @@ -0,0 +1,59 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import argparse +import os + +from mmdet.utils import replace_cfg_vals, update_data_root +from mmengine import Config, DictAction + + +def parse_args(): + parser = argparse.ArgumentParser(description='Print the whole config') + parser.add_argument('config', help='config file path') + parser.add_argument( + '--save-path', + default=None, + help='save path of whole config, suffixed with .py, .json or .yml') + parser.add_argument( + '--cfg-options', + nargs='+', + action=DictAction, + help='override some settings in the used config, the key-value pair ' + 'in xxx=yyy format will be merged into config file. If the value to ' + 'be overwritten is a list, it should be like key="[a,b]" or key=a,b ' + 'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" ' + 'Note that the quotation marks are necessary and that no white space ' + 'is allowed.') + args = parser.parse_args() + + return args + + +def main(): + args = parse_args() + + cfg = Config.fromfile(args.config) + + # replace the ${key} with the value of cfg.key + cfg = replace_cfg_vals(cfg) + + # update data root according to MMDET_DATASETS + update_data_root(cfg) + + if args.cfg_options is not None: + cfg.merge_from_dict(args.cfg_options) + print(f'Config:\n{cfg.pretty_text}') + + if args.save_path is not None: + save_path = args.save_path + + suffix = os.path.splitext(save_path)[-1] + assert suffix in ['.py', '.json', '.yml'] + + if not os.path.exists(os.path.split(save_path)[0]): + os.makedirs(os.path.split(save_path)[0]) + cfg.dump(save_path) + print(f'Config saving at {save_path}') + + +if __name__ == '__main__': + main() diff --git a/tools/test.py b/tools/test.py index 53a617fd..7262234b 100644 --- a/tools/test.py +++ b/tools/test.py @@ -4,7 +4,7 @@ import os import os.path as osp from mmdet.engine.hooks.utils import trigger_visualization_hook -from mmengine.config import Config, DictAction +from mmengine.config import Config, ConfigDict, DictAction from mmengine.evaluator import DumpResults from mmengine.runner import Runner @@ -31,6 +31,10 @@ def parse_args(): help='the prefix of the output json file without perform evaluation, ' 'which is useful when you want to format the result to a specific ' 'format and submit it to the test server') + parser.add_argument( + '--tta', + action='store_true', + help='Whether to use test time augmentation') parser.add_argument( '--show', action='store_true', help='show prediction results') parser.add_argument( @@ -109,6 +113,23 @@ def main(): # Determine whether the custom metainfo fields are all lowercase is_metainfo_lower(cfg) + if args.tta: + assert 'tta_model' in cfg, 'Cannot find ``tta_model`` in config.' \ + " Can't use tta !" + assert 'tta_pipeline' in cfg, 'Cannot find ``tta_pipeline`` ' \ + "in config. Can't use tta !" + + cfg.model = ConfigDict(**cfg.tta_model, module=cfg.model) + test_data_cfg = cfg.test_dataloader.dataset + while 'dataset' in test_data_cfg: + test_data_cfg = test_data_cfg['dataset'] + + # batch_shapes_cfg will force control the size of the output image, + # it is not compatible with tta. + if 'batch_shapes_cfg' in test_data_cfg: + test_data_cfg.batch_shapes_cfg = None + test_data_cfg.pipeline = cfg.tta_pipeline + # build the runner from config if 'runner_type' not in cfg: # build the default runner