[Feature] Support TTA (#553)

* support TTA

* update note
pull/595/head
Haian Huang(深度眸) 2023-02-24 21:46:24 +08:00 committed by GitHub
parent 78dc0fde99
commit 1c833eb195
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
25 changed files with 568 additions and 48 deletions

View File

@ -196,8 +196,9 @@ For different parts from MMDetection, we have also prepared user guides and adva
- [Resume training](docs/en/common_usage/resume_training.md)
- [Enabling and disabling SyncBatchNorm](docs/en/common_usage/syncbn.md)
- [Enabling AMP](docs/en/common_usage/amp_training.md)
- [TTA Related Notes](docs/en/common_usage/tta.md)
- [Add plugins to the backbone network](docs/en/common_usage/plugins.md)
- [Freeze layers](docs/en/common_usage/common_usage/freeze_layers.md)
- [Freeze layers](docs/en/common_usage/freeze_layers.md)
- [Output model predictions](docs/en/common_usage/output_predictions.md)
- [Set random seed](docs/en/common_usage/set_random_seed.md)
- [Module combination](docs/en/common_usage/module_combination.md)

View File

@ -217,6 +217,7 @@ MMYOLO 用法和 MMDetection 几乎一致,所有教程都是通用的,你也
- [恢复训练](docs/zh_cn/common_usage/resume_training.md)
- [开启和关闭 SyncBatchNorm](docs/zh_cn/common_usage/syncbn.md)
- [开启混合精度训练](docs/zh_cn/common_usage/amp_training.md)
- [测试时增强相关说明](docs/zh_cn/common_usage/tta.md)
- [给主干网络增加插件](docs/zh_cn/common_usage/plugins.md)
- [冻结指定网络层权重](docs/zh_cn/common_usage/common_usage/freeze_layers.md)
- [输出模型预测结果](docs/zh_cn/common_usage/output_predictions.md)

View File

@ -0,0 +1,57 @@
# TODO: Need to solve the problem of multiple file_client_args parameters
# _file_client_args = dict(
# backend='petrel',
# path_mapping=dict({
# './data/': 's3://openmmlab/datasets/detection/',
# 'data/': 's3://openmmlab/datasets/detection/'
# }))
_file_client_args = dict(backend='disk')
tta_model = dict(
type='mmdet.DetTTAModel',
tta_cfg=dict(nms=dict(type='nms', iou_threshold=0.65), max_per_img=300))
img_scales = [(640, 640), (320, 320), (960, 960)]
# LoadImageFromFile
# / | \
# (RatioResize,LetterResize) (RatioResize,LetterResize) (RatioResize,LetterResize) # noqa
# / \ / \ / \
# RandomFlip RandomFlip RandomFlip RandomFlip RandomFlip RandomFlip # noqa
# | | | | | |
# LoadAnn LoadAnn LoadAnn LoadAnn LoadAnn LoadAnn
# | | | | | |
# PackDetIn PackDetIn PackDetIn PackDetIn PackDetIn PackDetIn # noqa
_multiscale_resize_transforms = [
dict(
type='Compose',
transforms=[
dict(type='YOLOv5KeepRatioResize', scale=s),
dict(
type='LetterResize',
scale=s,
allow_scale_up=False,
pad_val=dict(img=114))
]) for s in img_scales
]
tta_pipeline = [
dict(type='LoadImageFromFile', file_client_args=_file_client_args),
dict(
type='TestTimeAug',
transforms=[
_multiscale_resize_transforms,
[
dict(type='mmdet.RandomFlip', prob=1.),
dict(type='mmdet.RandomFlip', prob=0.)
], [dict(type='mmdet.LoadAnnotations', with_bbox=True)],
[
dict(
type='mmdet.PackDetInputs',
meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
'scale_factor', 'pad_param', 'flip',
'flip_direction'))
]
])
]

View File

@ -1,4 +1,4 @@
_base_ = '../_base_/default_runtime.py'
_base_ = ['../_base_/default_runtime.py', '../_base_/det_p5_tta.py']
# dataset settings
data_root = 'data/coco/'

View File

@ -23,18 +23,19 @@ RTMDet-l model structure
## Object Detection
| Model | size | box AP | Params(M) | FLOPS(G) | TRT-FP16-Latency(ms) | Config | Download |
| :---------: | :--: | :----: | :-------: | :------: | :------------------: | :----------------------------------------------------: | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
| RTMDet-tiny | 640 | 41.0 | 4.8 | 8.1 | 0.98 | [config](./rtmdet_tiny_syncbn_fast_8xb32-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_tiny_syncbn_fast_8xb32-300e_coco/rtmdet_tiny_syncbn_fast_8xb32-300e_coco_20230102_140117-dbb1dc83.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_tiny_syncbn_fast_8xb32-300e_coco/rtmdet_tiny_syncbn_fast_8xb32-300e_coco_20230102_140117.log.json) |
| RTMDet-s | 640 | 44.6 | 8.89 | 14.8 | 1.22 | [config](./rtmdet_s_syncbn_fast_8xb32-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_s_syncbn_fast_8xb32-300e_coco/rtmdet_s_syncbn_fast_8xb32-300e_coco_20221230_182329-0a8c901a.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_s_syncbn_fast_8xb32-300e_coco/rtmdet_s_syncbn_fast_8xb32-300e_coco_20221230_182329.log.json) |
| RTMDet-m | 640 | 49.3 | 24.71 | 39.27 | 1.62 | [config](./rtmdet_m_syncbn_fast_8xb32-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_m_syncbn_fast_8xb32-300e_coco/rtmdet_m_syncbn_fast_8xb32-300e_coco_20230102_135952-40af4fe8.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_m_syncbn_fast_8xb32-300e_coco/rtmdet_m_syncbn_fast_8xb32-300e_coco_20230102_135952.log.json) |
| RTMDet-l | 640 | 51.4 | 52.3 | 80.23 | 2.44 | [config](./rtmdet_l_syncbn_fast_8xb32-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_l_syncbn_fast_8xb32-300e_coco/rtmdet_l_syncbn_fast_8xb32-300e_coco_20230102_135928-ee3abdc4.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_l_syncbn_fast_8xb32-300e_coco/rtmdet_l_syncbn_fast_8xb32-300e_coco_20230102_135928.log.json) |
| RTMDet-x | 640 | 52.8 | 94.86 | 141.67 | 3.10 | [config](./rtmdet_x_syncbn_fast_8xb32-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_x_syncbn_fast_8xb32-300e_coco/rtmdet_x_syncbn_fast_8xb32-300e_coco_20221231_100345-b85cd476.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_x_syncbn_fast_8xb32-300e_coco/rtmdet_x_syncbn_fast_8xb32-300e_coco_20221231_100345.log.json) |
| Model | size | Params(M) | FLOPS(G) | TRT-FP16-Latency(ms) | box AP | TTA box AP | Config | Download |
| :---------: | :--: | :-------: | :------: | :------------------: | :----: | :--------: | :----------------------------------------------------: | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
| RTMDet-tiny | 640 | 4.8 | 8.1 | 0.98 | 41.0 | 42.7 | [config](./rtmdet_tiny_syncbn_fast_8xb32-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_tiny_syncbn_fast_8xb32-300e_coco/rtmdet_tiny_syncbn_fast_8xb32-300e_coco_20230102_140117-dbb1dc83.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_tiny_syncbn_fast_8xb32-300e_coco/rtmdet_tiny_syncbn_fast_8xb32-300e_coco_20230102_140117.log.json) |
| RTMDet-s | 640 | 8.89 | 14.8 | 1.22 | 44.6 | 45.8 | [config](./rtmdet_s_syncbn_fast_8xb32-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_s_syncbn_fast_8xb32-300e_coco/rtmdet_s_syncbn_fast_8xb32-300e_coco_20221230_182329-0a8c901a.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_s_syncbn_fast_8xb32-300e_coco/rtmdet_s_syncbn_fast_8xb32-300e_coco_20221230_182329.log.json) |
| RTMDet-m | 640 | 24.71 | 39.27 | 1.62 | 49.3 | 50.9 | [config](./rtmdet_m_syncbn_fast_8xb32-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_m_syncbn_fast_8xb32-300e_coco/rtmdet_m_syncbn_fast_8xb32-300e_coco_20230102_135952-40af4fe8.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_m_syncbn_fast_8xb32-300e_coco/rtmdet_m_syncbn_fast_8xb32-300e_coco_20230102_135952.log.json) |
| RTMDet-l | 640 | 52.3 | 80.23 | 2.44 | 51.4 | 53.1 | [config](./rtmdet_l_syncbn_fast_8xb32-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_l_syncbn_fast_8xb32-300e_coco/rtmdet_l_syncbn_fast_8xb32-300e_coco_20230102_135928-ee3abdc4.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_l_syncbn_fast_8xb32-300e_coco/rtmdet_l_syncbn_fast_8xb32-300e_coco_20230102_135928.log.json) |
| RTMDet-x | 640 | 94.86 | 141.67 | 3.10 | 52.8 | 54.2 | [config](./rtmdet_x_syncbn_fast_8xb32-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_x_syncbn_fast_8xb32-300e_coco/rtmdet_x_syncbn_fast_8xb32-300e_coco_20221231_100345-b85cd476.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_x_syncbn_fast_8xb32-300e_coco/rtmdet_x_syncbn_fast_8xb32-300e_coco_20221231_100345.log.json) |
**Note**:
1. The inference speed of RTMDet is measured on an NVIDIA 3090 GPU with TensorRT 8.4.3, cuDNN 8.2.0, FP16, batch size=1, and without NMS.
2. For a fair comparison, the config of bbox postprocessing is changed to be consistent with YOLOv5/6/7 after [PR#9494](https://github.com/open-mmlab/mmdetection/pull/9494), bringing about 0.1~0.3% AP improvement.
3. `TTA` means that Test Time Augmentation. It's perform 3 multi-scaling transformations on the image, followed by 2 flipping transformations (flipping and not flipping). You only need to specify `--tta` when testing to enable. see [TTA](https://github.com/open-mmlab/mmyolo/blob/dev/docs/en/common_usage/tta.md) for details.
## Citation

View File

@ -1,4 +1,4 @@
_base_ = '../_base_/default_runtime.py'
_base_ = ['../_base_/default_runtime.py', '../_base_/det_p5_tta.py']
# ========================Frequently modified parameters======================
# -----data related-----

View File

@ -20,16 +20,16 @@ YOLOv5-l-P6 model structure
### COCO
| Backbone | Arch | size | SyncBN | AMP | Mem (GB) | box AP | Config | Download |
| :------: | :--: | :--: | :----: | :-: | :------: | :----: | :--------------------------------------------------------------------------------------------------------------------: | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
| YOLOv5-n | P5 | 640 | Yes | Yes | 1.5 | 28.0 | [config](https://github.com/open-mmlab/mmyolo/tree/main/configs/yolov5/yolov5_n-v61_syncbn_fast_8xb16-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_n-v61_syncbn_fast_8xb16-300e_coco/yolov5_n-v61_syncbn_fast_8xb16-300e_coco_20220919_090739-b804c1ad.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_n-v61_syncbn_fast_8xb16-300e_coco/yolov5_n-v61_syncbn_fast_8xb16-300e_coco_20220919_090739.log.json) |
| YOLOv5-s | P5 | 640 | Yes | Yes | 2.7 | 37.7 | [config](https://github.com/open-mmlab/mmyolo/tree/main/configs/yolov5/yolov5_s-v61_syncbn_fast_8xb16-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_s-v61_syncbn_fast_8xb16-300e_coco/yolov5_s-v61_syncbn_fast_8xb16-300e_coco_20220918_084700-86e02187.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_s-v61_syncbn_fast_8xb16-300e_coco/yolov5_s-v61_syncbn_fast_8xb16-300e_coco_20220918_084700.log.json) |
| YOLOv5-m | P5 | 640 | Yes | Yes | 5.0 | 45.3 | [config](https://github.com/open-mmlab/mmyolo/tree/main/configs/yolov5/yolov5_m-v61_syncbn_fast_8xb16-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_m-v61_syncbn_fast_8xb16-300e_coco/yolov5_m-v61_syncbn_fast_8xb16-300e_coco_20220917_204944-516a710f.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_m-v61_syncbn_fast_8xb16-300e_coco/yolov5_m-v61_syncbn_fast_8xb16-300e_coco_20220917_204944.log.json) |
| YOLOv5-l | P5 | 640 | Yes | Yes | 8.1 | 48.8 | [config](https://github.com/open-mmlab/mmyolo/tree/main/configs/yolov5/yolov5_l-v61_syncbn_fast_8xb16-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_l-v61_syncbn_fast_8xb16-300e_coco/yolov5_l-v61_syncbn_fast_8xb16-300e_coco_20220917_031007-096ef0eb.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_l-v61_syncbn_fast_8xb16-300e_coco/yolov5_l-v61_syncbn_fast_8xb16-300e_coco_20220917_031007.log.json) |
| YOLOv5-n | P6 | 1280 | Yes | Yes | 5.8 | 35.9 | [config](https://github.com/open-mmlab/mmyolo/tree/main/configs/yolov5/yolov5_n-p6-v62_syncbn_fast_8xb16-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_n-p6-v62_syncbn_fast_8xb16-300e_coco/yolov5_n-p6-v62_syncbn_fast_8xb16-300e_coco_20221027_224705-d493c5f3.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_n-p6-v62_syncbn_fast_8xb16-300e_coco/yolov5_n-p6-v62_syncbn_fast_8xb16-300e_coco_20221027_224705.log.json) |
| YOLOv5-s | P6 | 1280 | Yes | Yes | 10.5 | 44.4 | [config](https://github.com/open-mmlab/mmyolo/tree/main/configs/yolov5/yolov5_s-p6-v62_syncbn_fast_8xb16-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_s-p6-v62_syncbn_fast_8xb16-300e_coco/yolov5_s-p6-v62_syncbn_fast_8xb16-300e_coco_20221027_215044-58865c19.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_s-p6-v62_syncbn_fast_8xb16-300e_coco/yolov5_s-p6-v62_syncbn_fast_8xb16-300e_coco_20221027_215044.log.json) |
| YOLOv5-m | P6 | 1280 | Yes | Yes | 19.1 | 51.3 | [config](https://github.com/open-mmlab/mmyolo/tree/main/configs/yolov5/yolov5_m-p6-v62_syncbn_fast_8xb16-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_m-p6-v62_syncbn_fast_8xb16-300e_coco/yolov5_m-p6-v62_syncbn_fast_8xb16-300e_coco_20221027_230453-49564d58.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_m-p6-v62_syncbn_fast_8xb16-300e_coco/yolov5_m-p6-v62_syncbn_fast_8xb16-300e_coco_20221027_230453.log.json) |
| YOLOv5-l | P6 | 1280 | Yes | Yes | 30.5 | 53.7 | [config](https://github.com/open-mmlab/mmyolo/tree/main/configs/yolov5/yolov5_l-p6-v62_syncbn_fast_8xb16-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_l-p6-v62_syncbn_fast_8xb16-300e_coco/yolov5_l-p6-v62_syncbn_fast_8xb16-300e_coco_20221027_234308-7a2ba6bf.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_l-p6-v62_syncbn_fast_8xb16-300e_coco/yolov5_l-p6-v62_syncbn_fast_8xb16-300e_coco_20221027_234308.log.json) |
| Backbone | Arch | size | SyncBN | AMP | Mem (GB) | box AP | TTA box AP | Config | Download |
| :------: | :--: | :--: | :----: | :-: | :------: | :----: | :--------: | :--------------------------------------------------------------------------------------------------------------------: | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
| YOLOv5-n | P5 | 640 | Yes | Yes | 1.5 | 28.0 | 30.7 | [config](https://github.com/open-mmlab/mmyolo/tree/main/configs/yolov5/yolov5_n-v61_syncbn_fast_8xb16-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_n-v61_syncbn_fast_8xb16-300e_coco/yolov5_n-v61_syncbn_fast_8xb16-300e_coco_20220919_090739-b804c1ad.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_n-v61_syncbn_fast_8xb16-300e_coco/yolov5_n-v61_syncbn_fast_8xb16-300e_coco_20220919_090739.log.json) |
| YOLOv5-s | P5 | 640 | Yes | Yes | 2.7 | 37.7 | 40.2 | [config](https://github.com/open-mmlab/mmyolo/tree/main/configs/yolov5/yolov5_s-v61_syncbn_fast_8xb16-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_s-v61_syncbn_fast_8xb16-300e_coco/yolov5_s-v61_syncbn_fast_8xb16-300e_coco_20220918_084700-86e02187.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_s-v61_syncbn_fast_8xb16-300e_coco/yolov5_s-v61_syncbn_fast_8xb16-300e_coco_20220918_084700.log.json) |
| YOLOv5-m | P5 | 640 | Yes | Yes | 5.0 | 45.3 | 46.9 | [config](https://github.com/open-mmlab/mmyolo/tree/main/configs/yolov5/yolov5_m-v61_syncbn_fast_8xb16-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_m-v61_syncbn_fast_8xb16-300e_coco/yolov5_m-v61_syncbn_fast_8xb16-300e_coco_20220917_204944-516a710f.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_m-v61_syncbn_fast_8xb16-300e_coco/yolov5_m-v61_syncbn_fast_8xb16-300e_coco_20220917_204944.log.json) |
| YOLOv5-l | P5 | 640 | Yes | Yes | 8.1 | 48.8 | 49.9 | [config](https://github.com/open-mmlab/mmyolo/tree/main/configs/yolov5/yolov5_l-v61_syncbn_fast_8xb16-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_l-v61_syncbn_fast_8xb16-300e_coco/yolov5_l-v61_syncbn_fast_8xb16-300e_coco_20220917_031007-096ef0eb.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_l-v61_syncbn_fast_8xb16-300e_coco/yolov5_l-v61_syncbn_fast_8xb16-300e_coco_20220917_031007.log.json) |
| YOLOv5-n | P6 | 1280 | Yes | Yes | 5.8 | 35.9 | | [config](https://github.com/open-mmlab/mmyolo/tree/main/configs/yolov5/yolov5_n-p6-v62_syncbn_fast_8xb16-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_n-p6-v62_syncbn_fast_8xb16-300e_coco/yolov5_n-p6-v62_syncbn_fast_8xb16-300e_coco_20221027_224705-d493c5f3.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_n-p6-v62_syncbn_fast_8xb16-300e_coco/yolov5_n-p6-v62_syncbn_fast_8xb16-300e_coco_20221027_224705.log.json) |
| YOLOv5-s | P6 | 1280 | Yes | Yes | 10.5 | 44.4 | | [config](https://github.com/open-mmlab/mmyolo/tree/main/configs/yolov5/yolov5_s-p6-v62_syncbn_fast_8xb16-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_s-p6-v62_syncbn_fast_8xb16-300e_coco/yolov5_s-p6-v62_syncbn_fast_8xb16-300e_coco_20221027_215044-58865c19.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_s-p6-v62_syncbn_fast_8xb16-300e_coco/yolov5_s-p6-v62_syncbn_fast_8xb16-300e_coco_20221027_215044.log.json) |
| YOLOv5-m | P6 | 1280 | Yes | Yes | 19.1 | 51.3 | | [config](https://github.com/open-mmlab/mmyolo/tree/main/configs/yolov5/yolov5_m-p6-v62_syncbn_fast_8xb16-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_m-p6-v62_syncbn_fast_8xb16-300e_coco/yolov5_m-p6-v62_syncbn_fast_8xb16-300e_coco_20221027_230453-49564d58.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_m-p6-v62_syncbn_fast_8xb16-300e_coco/yolov5_m-p6-v62_syncbn_fast_8xb16-300e_coco_20221027_230453.log.json) |
| YOLOv5-l | P6 | 1280 | Yes | Yes | 30.5 | 53.7 | | [config](https://github.com/open-mmlab/mmyolo/tree/main/configs/yolov5/yolov5_l-p6-v62_syncbn_fast_8xb16-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_l-p6-v62_syncbn_fast_8xb16-300e_coco/yolov5_l-p6-v62_syncbn_fast_8xb16-300e_coco_20221027_234308-7a2ba6bf.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_l-p6-v62_syncbn_fast_8xb16-300e_coco/yolov5_l-p6-v62_syncbn_fast_8xb16-300e_coco_20221027_234308.log.json) |
**Note**:
In the official YOLOv5 code, the `random_perspective` data augmentation in COCO object detection task training uses mask annotation information, which leads to higher performance. Object detection should not use mask annotation, so only box annotation information is used in `MMYOLO`. We will use the mask annotation information in the instance segmentation task. See https://github.com/ultralytics/yolov5/issues/9917 for details.
@ -39,7 +39,7 @@ In the official YOLOv5 code, the `random_perspective` data augmentation in COCO
3. `SyncBN` means use SyncBN, `AMP` indicates training with mixed precision.
4. We use 8x A100 for training, and the single-GPU batch size is 16. This is different from the official code.
5. The performance is unstable and may fluctuate by about 0.4 mAP and the highest performance weight in `COCO` training in `YOLOv5` may not be the last epoch.
6. `balloon` means that this is a demo configuration.
6. `TTA` means that Test Time Augmentation. It's perform 3 multi-scaling transformations on the image, followed by 2 flipping transformations (flipping and not flipping). You only need to specify `--tta` when testing to enable. see [TTA](https://github.com/open-mmlab/mmyolo/blob/dev/docs/en/common_usage/tta.md) for details.
### VOC

View File

@ -29,6 +29,8 @@ num_det_layers = 3
load_from = 'https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_s-v61_syncbn_fast_8xb16-300e_coco/yolov5_s-v61_syncbn_fast_8xb16-300e_coco_20220918_084700-86e02187.pth' # noqa
tta_img_scales = [img_scale, (416, 416), (640, 640)]
# Hyperparameter reference from:
# https://github.com/ultralytics/yolov5/blob/master/data/hyps/hyp.VOC.yaml
model = dict(
@ -232,3 +234,37 @@ val_evaluator = dict(
test_evaluator = val_evaluator
train_cfg = dict(max_epochs=max_epochs)
# Config for Test Time Augmentation. (TTA)
_multiscale_resize_transforms = [
dict(
type='Compose',
transforms=[
dict(type='YOLOv5KeepRatioResize', scale=s),
dict(
type='LetterResize',
scale=s,
allow_scale_up=False,
pad_val=dict(img=114))
]) for s in tta_img_scales
]
tta_pipeline = [
dict(type='LoadImageFromFile', file_client_args=_base_.file_client_args),
dict(
type='TestTimeAug',
transforms=[
_multiscale_resize_transforms,
[
dict(type='mmdet.RandomFlip', prob=1.),
dict(type='mmdet.RandomFlip', prob=0.)
], [dict(type='mmdet.LoadAnnotations', with_bbox=True)],
[
dict(
type='mmdet.PackDetInputs',
meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
'scale_factor', 'pad_param', 'flip',
'flip_direction'))
]
])
]

View File

@ -26,6 +26,7 @@ loss_obj_weight = 1.0
obj_level_weights = [4.0, 1.0, 0.25, 0.06]
affine_scale = 0.5 # YOLOv5RandomAffine scaling ratio
tta_img_scales = [(1280, 1280), (1024, 1024), (1536, 1536)]
# =======================Unmodified in most cases==================
model = dict(
backbone=dict(arch='P6', out_indices=(2, 3, 4, 5)),
@ -101,3 +102,37 @@ val_dataloader = dict(
dataset=dict(pipeline=test_pipeline, batch_shapes_cfg=batch_shapes_cfg))
test_dataloader = val_dataloader
# Config for Test Time Augmentation. (TTA)
_multiscale_resize_transforms = [
dict(
type='Compose',
transforms=[
dict(type='YOLOv5KeepRatioResize', scale=s),
dict(
type='LetterResize',
scale=s,
allow_scale_up=False,
pad_val=dict(img=114))
]) for s in tta_img_scales
]
tta_pipeline = [
dict(type='LoadImageFromFile', file_client_args=_base_.file_client_args),
dict(
type='TestTimeAug',
transforms=[
_multiscale_resize_transforms,
[
dict(type='mmdet.RandomFlip', prob=1.),
dict(type='mmdet.RandomFlip', prob=0.)
], [dict(type='mmdet.LoadAnnotations', with_bbox=True)],
[
dict(
type='mmdet.PackDetInputs',
meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
'scale_factor', 'pad_param', 'flip',
'flip_direction'))
]
])
]

View File

@ -1,4 +1,4 @@
_base_ = '../_base_/default_runtime.py'
_base_ = ['../_base_/default_runtime.py', '../_base_/det_p5_tta.py']
# ========================Frequently modified parameters======================
# -----data related-----

View File

@ -1,4 +1,4 @@
_base_ = '../_base_/default_runtime.py'
_base_ = ['../_base_/default_runtime.py', '../_base_/det_p5_tta.py']
# ======================= Frequently modified parameters =====================
# -----data related-----

View File

@ -1,4 +1,4 @@
_base_ = '../_base_/default_runtime.py'
_base_ = ['../_base_/default_runtime.py', '../_base_/det_p5_tta.py']
# ========================Frequently modified parameters======================
# -----data related-----

View File

@ -10,6 +10,7 @@ batch_shapes_cfg = dict(
img_size=img_scale[
0], # The image scale of padding should be divided by pad_size_divisor
size_divisor=64) # Additional paddings for pixel scale
tta_img_scales = [(1280, 1280), (1024, 1024), (1536, 1536)]
# -----model related-----
# Basic size of multi-scale prior box
@ -35,8 +36,16 @@ mixup_beta = 8.0 # YOLOv5MixUp
loss_cls_weight = 0.3
loss_bbox_weight = 0.05
loss_obj_weight = 0.7
obj_level_weights = [4.0, 1.0, 0.25, 0.06]
simota_candidate_topk = 20
# The only difference between P6 and P5 in terms of
# hyperparameters is lr_factor
lr_factor = 0.2
# ===============================Unmodified in most cases====================
pre_transform = _base_.pre_transform
model = dict(
backbone=dict(arch='W', out_indices=(2, 3, 4, 5)),
neck=dict(
@ -52,16 +61,14 @@ model = dict(
norm_cfg=norm_cfg,
act_cfg=dict(type='SiLU', inplace=True)),
prior_generator=dict(base_sizes=anchors, strides=strides),
simota_candidate_topk=20, # note
simota_candidate_topk=simota_candidate_topk, # note
# scaled based on number of detection layers
loss_cls=dict(loss_weight=loss_cls_weight *
(num_classes / 80 * 3 / num_det_layers)),
loss_bbox=dict(loss_weight=loss_bbox_weight * (3 / num_det_layers)),
loss_obj=dict(loss_weight=loss_obj_weight *
((img_scale[0] / 640)**2 * 3 / num_det_layers)),
obj_level_weights=[4.0, 1.0, 0.25, 0.06]))
pre_transform = _base_.pre_transform
obj_level_weights=obj_level_weights))
mosiac4_pipeline = [
dict(
@ -138,6 +145,38 @@ val_dataloader = dict(
dataset=dict(pipeline=test_pipeline, batch_shapes_cfg=batch_shapes_cfg))
test_dataloader = val_dataloader
# The only difference between P6 and P5 in terms of
# hyperparameters is lr_factor
default_hooks = dict(param_scheduler=dict(lr_factor=0.2))
default_hooks = dict(param_scheduler=dict(lr_factor=lr_factor))
# Config for Test Time Augmentation. (TTA)
_multiscale_resize_transforms = [
dict(
type='Compose',
transforms=[
dict(type='YOLOv5KeepRatioResize', scale=s),
dict(
type='LetterResize',
scale=s,
allow_scale_up=False,
pad_val=dict(img=114))
]) for s in tta_img_scales
]
tta_pipeline = [
dict(type='LoadImageFromFile', file_client_args=_base_.file_client_args),
dict(
type='TestTimeAug',
transforms=[
_multiscale_resize_transforms,
[
dict(type='mmdet.RandomFlip', prob=1.),
dict(type='mmdet.RandomFlip', prob=0.)
], [dict(type='mmdet.LoadAnnotations', with_bbox=True)],
[
dict(
type='mmdet.PackDetInputs',
meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
'scale_factor', 'pad_param', 'flip',
'flip_direction'))
]
])
]

View File

@ -20,18 +20,18 @@ YOLOv8-P5 model structure
### COCO
| Backbone | Arch | size | Mask Refine | SyncBN | AMP | Mem (GB) | box AP | Config | Download |
| :------: | :--: | :--: | :---------: | :----: | :-: | :------: | :---------: | :---------------------------------------------------------------------: | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
| YOLOv8-n | P5 | 640 | No | Yes | Yes | 2.8 | 37.2 | [config](../yolov8/yolov8_n_syncbn_fast_8xb16-500e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_n_syncbn_fast_8xb16-500e_coco/yolov8_n_syncbn_fast_8xb16-500e_coco_20230114_131804-88c11cdb.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_n_syncbn_fast_8xb16-500e_coco/yolov8_n_syncbn_fast_8xb16-500e_coco_20230114_131804.log.json) |
| YOLOv8-n | P5 | 640 | Yes | Yes | Yes | 2.5 | 37.4 (+0.2) | [config](../yolov8/yolov8_n_mask-refine_syncbn_fast_8xb16-500e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_n_mask-refine_syncbn_fast_8xb16-500e_coco/yolov8_n_mask-refine_syncbn_fast_8xb16-500e_coco_20230216_101206-b975b1cd.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_n_mask-refine_syncbn_fast_8xb16-500e_coco/yolov8_n_mask-refine_syncbn_fast_8xb16-500e_coco_20230216_101206.log.json) |
| YOLOv8-s | P5 | 640 | No | Yes | Yes | 4.0 | 44.2 | [config](../yolov8/yolov8_s_syncbn_fast_8xb16-500e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_s_syncbn_fast_8xb16-500e_coco/yolov8_s_syncbn_fast_8xb16-500e_coco_20230117_180101-5aa5f0f1.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_s_syncbn_fast_8xb16-500e_coco/yolov8_s_syncbn_fast_8xb16-500e_coco_20230117_180101.log.json) |
| YOLOv8-s | P5 | 640 | Yes | Yes | Yes | 4.0 | 45.1 (+0.9) | [config](../yolov8/yolov8_s_mask-refine_syncbn_fast_8xb16-500e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_s_mask-refine_syncbn_fast_8xb16-500e_coco/yolov8_s_mask-refine_syncbn_fast_8xb16-500e_coco_20230216_095938-ce3c1b3f.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_s_mask-refine_syncbn_fast_8xb16-500e_coco/yolov8_s_mask-refine_syncbn_fast_8xb16-500e_coco_20230216_095938.log.json) |
| YOLOv8-m | P5 | 640 | No | Yes | Yes | 7.2 | 49.8 | [config](../yolov8/yolov8_m_syncbn_fast_8xb16-500e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_m_syncbn_fast_8xb16-500e_coco/yolov8_m_syncbn_fast_8xb16-500e_coco_20230115_192200-c22e560a.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_m_syncbn_fast_8xb16-500e_coco/yolov8_m_syncbn_fast_8xb16-500e_coco_20230115_192200.log.json) |
| YOLOv8-m | P5 | 640 | Yes | Yes | Yes | 7.0 | 50.6 (+0.8) | [config](../yolov8/yolov8_m_mask-refine_syncbn_fast_8xb16-500e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_m_mask-refine_syncbn_fast_8xb16-500e_coco/yolov8_m_mask-refine_syncbn_fast_8xb16-500e_coco_20230216_223400-f40abfcd.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_m_mask-refine_syncbn_fast_8xb16-500e_coco/yolov8_m_mask-refine_syncbn_fast_8xb16-500e_coco_20230216_223400.log.json) |
| YOLOv8-l | P5 | 640 | No | Yes | Yes | 9.8 | 52.1 | [config](../yolov8/yolov8_l_syncbn_fast_8xb16-500e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_l_syncbn_fast_8xb16-500e_coco/yolov8_l_syncbn_fast_8xb16-500e_coco_20230217_182526-189611b6.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_l_syncbn_fast_8xb16-500e_coco/yolov8_l_syncbn_fast_8xb16-500e_coco_20230217_182526.log.json) |
| YOLOv8-l | P5 | 640 | Yes | Yes | Yes | 9.1 | 53.0 (+0.9) | [config](../yolov8/yolov8_l_mask-refine_syncbn_fast_8xb16-500e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_l_mask-refine_syncbn_fast_8xb16-500e_coco/yolov8_l_mask-refine_syncbn_fast_8xb16-500e_coco_20230217_120100-5881dec4.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_l_mask-refine_syncbn_fast_8xb16-500e_coco/yolov8_l_mask-refine_syncbn_fast_8xb16-500e_coco_20230217_120100.log.json) |
| YOLOv8-x | P5 | 640 | No | Yes | Yes | 12.2 | 52.7 | [config](../yolov8/yolov8_x_syncbn_fast_8xb16-500e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_x_syncbn_fast_8xb16-500e_coco/yolov8_x_syncbn_fast_8xb16-500e_coco_20230218_023338-5674673c.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_x_syncbn_fast_8xb16-500e_coco/yolov8_x_syncbn_fast_8xb16-500e_coco_20230218_023338.log.json) |
| YOLOv8-x | P5 | 640 | Yes | Yes | Yes | 12.4 | 54.0 (+1.3) | [config](../yolov8/yolov8_x_mask-refine_syncbn_fast_8xb16-500e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_x_mask-refine_syncbn_fast_8xb16-500e_coco/yolov8_x_mask-refine_syncbn_fast_8xb16-500e_coco_20230217_120411-079ca8d1.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_x_mask-refine_syncbn_fast_8xb16-500e_coco/yolov8_x_mask-refine_syncbn_fast_8xb16-500e_coco_20230217_120411.log.json) |
| Backbone | Arch | size | Mask Refine | SyncBN | AMP | Mem (GB) | box AP | TTA box AP | Config | Download |
| :------: | :--: | :--: | :---------: | :----: | :-: | :------: | :---------: | :--------: | :---------------------------------------------------------------------: | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
| YOLOv8-n | P5 | 640 | No | Yes | Yes | 2.8 | 37.2 | | [config](../yolov8/yolov8_n_syncbn_fast_8xb16-500e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_n_syncbn_fast_8xb16-500e_coco/yolov8_n_syncbn_fast_8xb16-500e_coco_20230114_131804-88c11cdb.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_n_syncbn_fast_8xb16-500e_coco/yolov8_n_syncbn_fast_8xb16-500e_coco_20230114_131804.log.json) |
| YOLOv8-n | P5 | 640 | Yes | Yes | Yes | 2.5 | 37.4 (+0.2) | 39.9 | [config](../yolov8/yolov8_n_mask-refine_syncbn_fast_8xb16-500e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_n_mask-refine_syncbn_fast_8xb16-500e_coco/yolov8_n_mask-refine_syncbn_fast_8xb16-500e_coco_20230216_101206-b975b1cd.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_n_mask-refine_syncbn_fast_8xb16-500e_coco/yolov8_n_mask-refine_syncbn_fast_8xb16-500e_coco_20230216_101206.log.json) |
| YOLOv8-s | P5 | 640 | No | Yes | Yes | 4.0 | 44.2 | | [config](../yolov8/yolov8_s_syncbn_fast_8xb16-500e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_s_syncbn_fast_8xb16-500e_coco/yolov8_s_syncbn_fast_8xb16-500e_coco_20230117_180101-5aa5f0f1.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_s_syncbn_fast_8xb16-500e_coco/yolov8_s_syncbn_fast_8xb16-500e_coco_20230117_180101.log.json) |
| YOLOv8-s | P5 | 640 | Yes | Yes | Yes | 4.0 | 45.1 (+0.9) | 46.8 | [config](../yolov8/yolov8_s_mask-refine_syncbn_fast_8xb16-500e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_s_mask-refine_syncbn_fast_8xb16-500e_coco/yolov8_s_mask-refine_syncbn_fast_8xb16-500e_coco_20230216_095938-ce3c1b3f.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_s_mask-refine_syncbn_fast_8xb16-500e_coco/yolov8_s_mask-refine_syncbn_fast_8xb16-500e_coco_20230216_095938.log.json) |
| YOLOv8-m | P5 | 640 | No | Yes | Yes | 7.2 | 49.8 | | [config](../yolov8/yolov8_m_syncbn_fast_8xb16-500e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_m_syncbn_fast_8xb16-500e_coco/yolov8_m_syncbn_fast_8xb16-500e_coco_20230115_192200-c22e560a.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_m_syncbn_fast_8xb16-500e_coco/yolov8_m_syncbn_fast_8xb16-500e_coco_20230115_192200.log.json) |
| YOLOv8-m | P5 | 640 | Yes | Yes | Yes | 7.0 | 50.6 (+0.8) | 52.3 | [config](../yolov8/yolov8_m_mask-refine_syncbn_fast_8xb16-500e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_m_mask-refine_syncbn_fast_8xb16-500e_coco/yolov8_m_mask-refine_syncbn_fast_8xb16-500e_coco_20230216_223400-f40abfcd.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_m_mask-refine_syncbn_fast_8xb16-500e_coco/yolov8_m_mask-refine_syncbn_fast_8xb16-500e_coco_20230216_223400.log.json) |
| YOLOv8-l | P5 | 640 | No | Yes | Yes | 9.8 | 52.1 | | [config](../yolov8/yolov8_l_syncbn_fast_8xb16-500e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_l_syncbn_fast_8xb16-500e_coco/yolov8_l_syncbn_fast_8xb16-500e_coco_20230217_182526-189611b6.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_l_syncbn_fast_8xb16-500e_coco/yolov8_l_syncbn_fast_8xb16-500e_coco_20230217_182526.log.json) |
| YOLOv8-l | P5 | 640 | Yes | Yes | Yes | 9.1 | 53.0 (+0.9) | 54.4 | [config](../yolov8/yolov8_l_mask-refine_syncbn_fast_8xb16-500e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_l_mask-refine_syncbn_fast_8xb16-500e_coco/yolov8_l_mask-refine_syncbn_fast_8xb16-500e_coco_20230217_120100-5881dec4.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_l_mask-refine_syncbn_fast_8xb16-500e_coco/yolov8_l_mask-refine_syncbn_fast_8xb16-500e_coco_20230217_120100.log.json) |
| YOLOv8-x | P5 | 640 | No | Yes | Yes | 12.2 | 52.7 | | [config](../yolov8/yolov8_x_syncbn_fast_8xb16-500e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_x_syncbn_fast_8xb16-500e_coco/yolov8_x_syncbn_fast_8xb16-500e_coco_20230218_023338-5674673c.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_x_syncbn_fast_8xb16-500e_coco/yolov8_x_syncbn_fast_8xb16-500e_coco_20230218_023338.log.json) |
| YOLOv8-x | P5 | 640 | Yes | Yes | Yes | 12.4 | 54.0 (+1.3) | 55.0 | [config](../yolov8/yolov8_x_mask-refine_syncbn_fast_8xb16-500e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_x_mask-refine_syncbn_fast_8xb16-500e_coco/yolov8_x_mask-refine_syncbn_fast_8xb16-500e_coco_20230217_120411-079ca8d1.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_x_mask-refine_syncbn_fast_8xb16-500e_coco/yolov8_x_mask-refine_syncbn_fast_8xb16-500e_coco_20230217_120411.log.json) |
**Note**
@ -40,5 +40,6 @@ YOLOv8-P5 model structure
3. We provide [scripts](https://github.com/open-mmlab/mmyolo/tree/dev/tools/model_converters/yolov8_to_mmyolo.py) to convert official weights to MMYOLO.
4. `SyncBN` means using SyncBN, `AMP` indicates training with mixed precision.
5. The performance of `Mask Refine` training is for the weight performance officially released by YOLOv8. `Mask Refine` means refining bbox by mask while loading annotations and transforming after `YOLOv5RandomAffine`, and the L and X models use `Copy Paste`.
6. `TTA` means that Test Time Augmentation. It's perform 3 multi-scaling transformations on the image, followed by 2 flipping transformations (flipping and not flipping). You only need to specify `--tta` when testing to enable. see [TTA](https://github.com/open-mmlab/mmyolo/blob/dev/docs/en/common_usage/tta.md) for details.
## Citation

View File

@ -1,4 +1,4 @@
_base_ = '../_base_/default_runtime.py'
_base_ = ['../_base_/default_runtime.py', '../_base_/det_p5_tta.py']
# ========================Frequently modified parameters======================
# -----data related-----

View File

@ -0,0 +1,55 @@
# TODO: Need to solve the problem of multiple file_client_args parameters
# _file_client_args = dict(
# backend='petrel',
# path_mapping=dict({
# './data/': 's3://openmmlab/datasets/detection/',
# 'data/': 's3://openmmlab/datasets/detection/'
# }))
_file_client_args = dict(backend='disk')
tta_model = dict(
type='mmdet.DetTTAModel',
tta_cfg=dict(nms=dict(type='nms', iou_threshold=0.65), max_per_img=300))
img_scales = [(640, 640), (320, 320), (960, 960)]
# LoadImageFromFile
# / | \
# Resize Resize Resize # noqa
# / \ / \ / \
# RandomFlip RandomFlip RandomFlip RandomFlip RandomFlip RandomFlip # noqa
# | | | | | |
# LoadAnn LoadAnn LoadAnn LoadAnn LoadAnn LoadAnn
# | | | | | |
# PackDetIn PackDetIn PackDetIn PackDetIn PackDetIn PackDetIn # noqa
tta_pipeline = [
dict(type='LoadImageFromFile', file_client_args=_file_client_args),
dict(
type='TestTimeAug',
transforms=[
[
dict(type='mmdet.Resize', scale=s, keep_ratio=True)
for s in img_scales
],
[
# ``RandomFlip`` must be placed before ``Pad``, otherwise
# bounding box coordinates after flipping cannot be
# recovered correctly.
dict(type='mmdet.RandomFlip', prob=1.),
dict(type='mmdet.RandomFlip', prob=0.)
],
[
dict(
type='mmdet.Pad',
pad_to_square=True,
pad_val=dict(img=(114.0, 114.0, 114.0))),
],
[
dict(
type='mmdet.PackDetInputs',
meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
'scale_factor', 'flip', 'flip_direction'))
]
])
]

View File

@ -1,4 +1,4 @@
_base_ = '../_base_/default_runtime.py'
_base_ = ['../_base_/default_runtime.py', 'yolox_p5_tta.py']
# ========================Frequently modified parameters======================
# -----data related-----

View File

@ -9,6 +9,9 @@ scaling_ratio_range = (0.5, 1.5)
img_scale = _base_.img_scale
pre_transform = _base_.pre_transform
test_img_scale = (416, 416)
tta_img_scales = [test_img_scale, (320, 320), (640, 640)]
# model settings
model = dict(
data_preprocessor=dict(batch_augments=[
@ -48,7 +51,7 @@ train_pipeline_stage1 = [
test_pipeline = [
dict(type='LoadImageFromFile', file_client_args=_base_.file_client_args),
dict(type='mmdet.Resize', scale=(416, 416), keep_ratio=True), # note
dict(type='mmdet.Resize', scale=test_img_scale, keep_ratio=True), # note
dict(
type='mmdet.Pad',
pad_to_square=True,
@ -63,3 +66,35 @@ test_pipeline = [
train_dataloader = dict(dataset=dict(pipeline=train_pipeline_stage1))
val_dataloader = dict(dataset=dict(pipeline=test_pipeline))
test_dataloader = val_dataloader
# Config for Test Time Augmentation. (TTA)
tta_pipeline = [
dict(type='LoadImageFromFile', file_client_args=_base_.file_client_args),
dict(
type='TestTimeAug',
transforms=[
[
dict(type='mmdet.Resize', scale=s, keep_ratio=True)
for s in tta_img_scales
],
[
# ``RandomFlip`` must be placed before ``Pad``, otherwise
# bounding box coordinates after flipping cannot be
# recovered correctly.
dict(type='mmdet.RandomFlip', prob=1.),
dict(type='mmdet.RandomFlip', prob=0.)
],
[
dict(
type='mmdet.Pad',
pad_to_square=True,
pad_val=dict(img=(114.0, 114.0, 114.0))),
],
[
dict(
type='mmdet.PackDetInputs',
meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
'scale_factor', 'flip', 'flip_direction'))
]
])
]

View File

@ -0,0 +1,87 @@
# TTA Related Notes
## Test Time Augmentation (TTA)
MMYOLO support for TTA in v0.5.0+, so that users can specify the `-tta` parameter to enable it during evaluation. Take `YOLOv5-s` as an example, its single GPU TTA test command is as follows
```shell
python tools/test.py configs/yolov5/yolov5_n-v61_syncbn_fast_8xb16-300e_coco.py https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_n-v61_syncbn_fast_8xb16-300e_coco/yolov5_n-v61_syncbn_fast_8xb16-300e_coco_20220919_090739-b804c1ad.pth --tta
```
For TTA to work properly, you must ensure that the variables `tta_model` and `tta_pipeline` are present in the configuration, see [det_p5_tta.py](https://github.com/open-mmlab/mmyolo/blob/dev/configs/_base_/det_p5_tta.py) for details.
The default TTA in MMYOLO performs 3 multi-scale enhancements, followed by 2 horizontal flip enhancements, for a total of 6 parallel pipelines. take `YOLOv5-s` as an example, its TTA configuration is as follows
```python
img_scales = [(640, 640), (320, 320), (960, 960)]
_multiscale_resize_transforms = [
dict(
type='Compose',
transforms=[
dict(type='YOLOv5KeepRatioResize', scale=s),
dict(
type='LetterResize',
scale=s,
allow_scale_up=False,
pad_val=dict(img=114))
]) for s in img_scales
]
tta_pipeline = [
dict(type='LoadImageFromFile'),
dict(
type='TestTimeAug',
transforms=[
_multiscale_resize_transforms,
[
dict(type='mmdet.RandomFlip', prob=1.),
dict(type='mmdet.RandomFlip', prob=0.)
], [dict(type='mmdet.LoadAnnotations', with_bbox=True)],
[
dict(
type='mmdet.PackDetInputs',
meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
'scale_factor', 'pad_param', 'flip',
'flip_direction'))
]
])
]
```
The schematic diagram is shown below.
```text
LoadImageFromFile
/ | \
(RatioResize,LetterResize) (RatioResize,LetterResize) (RatioResize,LetterResize)
/ \ / \ / \
RandomFlip RandomFlip RandomFlip RandomFlip RandomFlip RandomFlip
| | | | | |
LoadAnn LoadAnn LoadAnn LoadAnn LoadAnn LoadAnn
| | | | | |
PackDetIn PackDetIn PackDetIn PackDetIn PackDetIn PackDetIn
```
You can modify `img_scales` to support different multi-scale enhancements, or you can insert a new pipeline to implement custom TTA requirements. Assuming you only want to do horizontal flip enhancements, the configuration should be modified as follows.
```python
tta_pipeline = [
dict(type='LoadImageFromFile'),
dict(
type='TestTimeAug',
transforms=[
[
dict(type='mmdet.RandomFlip', prob=1.),
dict(type='mmdet.RandomFlip', prob=0.)
], [dict(type='mmdet.LoadAnnotations', with_bbox=True)],
[
dict(
type='mmdet.PackDetInputs',
meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
'scale_factor', 'pad_param', 'flip',
'flip_direction'))
]
])
]
```

View File

@ -37,6 +37,7 @@ You can switch between Chinese and English documents in the top-right corner of
common_usage/resume_training.md
common_usage/syncbn.md
common_usage/amp_training.md
common_usage/tta.md
common_usage/plugins.md
common_usage/freeze_layers.md
common_usage/output_predictions.md

View File

@ -0,0 +1,87 @@
# 测试时增强相关说明
## 测试时增强 TTA
MMYOLO 在 v0.5.0+ 版本中增加对 TTA 的支持,用户可以在进行评估时候指定 `--tta` 参数使能。 以 `YOLOv5-s` 为例,其单卡 TTA 测试命令为:
```shell
python tools/test.py configs/yolov5/yolov5_n-v61_syncbn_fast_8xb16-300e_coco.py https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_n-v61_syncbn_fast_8xb16-300e_coco/yolov5_n-v61_syncbn_fast_8xb16-300e_coco_20220919_090739-b804c1ad.pth --tta
```
TTA 功能的正常运行必须确保配置中存在 `tta_model``tta_pipeline` 两个变量,详情可以参考 [det_p5_tta.py](https://github.com/open-mmlab/mmyolo/blob/dev/configs/_base_/det_p5_tta.py)。
MMYOLO 中默认的 TTA 会先执行 3 个多尺度增强,然后再增强两个水平翻转增强,一共 6 个并行的 pipeline。以 `YOLOv5-s` 为例,其 TTA 配置为:
```python
img_scales = [(640, 640), (320, 320), (960, 960)]
_multiscale_resize_transforms = [
dict(
type='Compose',
transforms=[
dict(type='YOLOv5KeepRatioResize', scale=s),
dict(
type='LetterResize',
scale=s,
allow_scale_up=False,
pad_val=dict(img=114))
]) for s in img_scales
]
tta_pipeline = [
dict(type='LoadImageFromFile'),
dict(
type='TestTimeAug',
transforms=[
_multiscale_resize_transforms,
[
dict(type='mmdet.RandomFlip', prob=1.),
dict(type='mmdet.RandomFlip', prob=0.)
], [dict(type='mmdet.LoadAnnotations', with_bbox=True)],
[
dict(
type='mmdet.PackDetInputs',
meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
'scale_factor', 'pad_param', 'flip',
'flip_direction'))
]
])
]
```
其示意图如下所示:
```text
LoadImageFromFile
/ | \
(RatioResize,LetterResize) (RatioResize,LetterResize) (RatioResize,LetterResize)
/ \ / \ / \
RandomFlip RandomFlip RandomFlip RandomFlip RandomFlip RandomFlip
| | | | | |
LoadAnn LoadAnn LoadAnn LoadAnn LoadAnn LoadAnn
| | | | | |
PackDetIn PackDetIn PackDetIn PackDetIn PackDetIn PackDetIn
```
你可以修改 `img_scales` 来支持不同的多尺度增强,也可以插入新的 pipeline 从而实现自定义 TTA 需求。 假设你只想进行水平翻转增强,则配置应该修改为如下:
```python
tta_pipeline = [
dict(type='LoadImageFromFile'),
dict(
type='TestTimeAug',
transforms=[
[
dict(type='mmdet.RandomFlip', prob=1.),
dict(type='mmdet.RandomFlip', prob=0.)
], [dict(type='mmdet.LoadAnnotations', with_bbox=True)],
[
dict(
type='mmdet.PackDetInputs',
meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
'scale_factor', 'pad_param', 'flip',
'flip_direction'))
]
])
]
```

View File

@ -37,6 +37,7 @@
common_usage/resume_training.md
common_usage/syncbn.md
common_usage/amp_training.md
common_usage/tta.md
common_usage/plugins.md
common_usage/freeze_layers.md
common_usage/output_predictions.md

View File

@ -7,7 +7,7 @@ import cv2
import mmcv
import numpy as np
import torch
from mmcv.transforms import BaseTransform
from mmcv.transforms import BaseTransform, Compose
from mmcv.transforms.utils import cache_randomness
from mmdet.datasets.transforms import LoadAnnotations as MMDET_LoadAnnotations
from mmdet.datasets.transforms import Resize as MMDET_Resize
@ -18,6 +18,9 @@ from numpy import random
from mmyolo.registry import TRANSFORMS
# TODO: Waiting for MMCV support
TRANSFORMS.register_module(module=Compose, force=True)
@TRANSFORMS.register_module()
class YOLOv5KeepRatioResize(MMDET_Resize):

View File

@ -0,0 +1,59 @@
# Copyright (c) OpenMMLab. All rights reserved.
import argparse
import os
from mmdet.utils import replace_cfg_vals, update_data_root
from mmengine import Config, DictAction
def parse_args():
parser = argparse.ArgumentParser(description='Print the whole config')
parser.add_argument('config', help='config file path')
parser.add_argument(
'--save-path',
default=None,
help='save path of whole config, suffixed with .py, .json or .yml')
parser.add_argument(
'--cfg-options',
nargs='+',
action=DictAction,
help='override some settings in the used config, the key-value pair '
'in xxx=yyy format will be merged into config file. If the value to '
'be overwritten is a list, it should be like key="[a,b]" or key=a,b '
'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" '
'Note that the quotation marks are necessary and that no white space '
'is allowed.')
args = parser.parse_args()
return args
def main():
args = parse_args()
cfg = Config.fromfile(args.config)
# replace the ${key} with the value of cfg.key
cfg = replace_cfg_vals(cfg)
# update data root according to MMDET_DATASETS
update_data_root(cfg)
if args.cfg_options is not None:
cfg.merge_from_dict(args.cfg_options)
print(f'Config:\n{cfg.pretty_text}')
if args.save_path is not None:
save_path = args.save_path
suffix = os.path.splitext(save_path)[-1]
assert suffix in ['.py', '.json', '.yml']
if not os.path.exists(os.path.split(save_path)[0]):
os.makedirs(os.path.split(save_path)[0])
cfg.dump(save_path)
print(f'Config saving at {save_path}')
if __name__ == '__main__':
main()

View File

@ -4,7 +4,7 @@ import os
import os.path as osp
from mmdet.engine.hooks.utils import trigger_visualization_hook
from mmengine.config import Config, DictAction
from mmengine.config import Config, ConfigDict, DictAction
from mmengine.evaluator import DumpResults
from mmengine.runner import Runner
@ -31,6 +31,10 @@ def parse_args():
help='the prefix of the output json file without perform evaluation, '
'which is useful when you want to format the result to a specific '
'format and submit it to the test server')
parser.add_argument(
'--tta',
action='store_true',
help='Whether to use test time augmentation')
parser.add_argument(
'--show', action='store_true', help='show prediction results')
parser.add_argument(
@ -109,6 +113,23 @@ def main():
# Determine whether the custom metainfo fields are all lowercase
is_metainfo_lower(cfg)
if args.tta:
assert 'tta_model' in cfg, 'Cannot find ``tta_model`` in config.' \
" Can't use tta !"
assert 'tta_pipeline' in cfg, 'Cannot find ``tta_pipeline`` ' \
"in config. Can't use tta !"
cfg.model = ConfigDict(**cfg.tta_model, module=cfg.model)
test_data_cfg = cfg.test_dataloader.dataset
while 'dataset' in test_data_cfg:
test_data_cfg = test_data_cfg['dataset']
# batch_shapes_cfg will force control the size of the output image,
# it is not compatible with tta.
if 'batch_shapes_cfg' in test_data_cfg:
test_data_cfg.batch_shapes_cfg = None
test_data_cfg.pipeline = cfg.tta_pipeline
# build the runner from config
if 'runner_type' not in cfg:
# build the default runner