Bump version to v0.5.0

Bump version to v0.5.0
pull/632/head v0.5.0
Haian Huang(深度眸) 2023-03-02 17:37:23 +08:00 committed by GitHub
commit dc85144fab
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
318 changed files with 18433 additions and 4780 deletions
.circleci
configs

View File

@ -67,7 +67,7 @@ jobs:
command: |
pip install -U openmim
mim install git+https://github.com/open-mmlab/mmengine.git@main
mim install 'mmcv >= 2.0.0rc1'
mim install 'mmcv >= 2.0.0rc4'
mim install git+https://github.com/open-mmlab/mmdetection.git@dev-3.x
pip install -r requirements/albu.txt
pip install -r requirements/tests.txt
@ -125,7 +125,7 @@ jobs:
command: |
docker exec mmyolo pip install -U openmim
docker exec mmyolo mim install -e /mmengine
docker exec mmyolo mim install 'mmcv >= 2.0.0rc1'
docker exec mmyolo mim install 'mmcv >= 2.0.0rc4'
docker exec mmyolo pip install -e /mmdetection
docker exec mmyolo pip install -r requirements/albu.txt
docker exec mmyolo pip install -r requirements/tests.txt

View File

@ -110,6 +110,7 @@ def get_dataset_name(config):
CocoDataset='COCO',
YOLOv5CocoDataset='COCO',
CocoPanopticDataset='COCO',
YOLOv5DOTADataset='DOTA 1.0',
DeepFashionDataset='Deep Fashion',
LVISV05Dataset='LVIS v0.5',
LVISV1Dataset='LVIS v1',

View File

@ -0,0 +1,448 @@
# Copyright (c) OpenMMLab. All rights reserved.
import argparse
import importlib
import os
import os.path as osp
import pkgutil
import sys
import tempfile
from multiprocessing import Pool
from pathlib import Path
import numpy as np
import pandas as pd
# host_addr = 'https://gitee.com/open-mmlab'
host_addr = 'https://github.com/open-mmlab'
tools_list = ['tools', '.dev_scripts']
proxy_names = {
'mmdet': 'mmdetection',
'mmseg': 'mmsegmentation',
'mmcls': 'mmclassification'
}
merge_module_keys = {'mmcv': ['mmengine']}
# exclude_prefix = {'mmcv': ['<class \'mmengine.model.']}
exclude_prefix = {}
markdown_title = '# MM 系列开源库注册表\n'
markdown_title += '(注意:本文档是通过 .dev_scripts/print_registers.py 脚本自动生成)'
def capitalize(repo_name):
lower = repo_name.lower()
if lower == 'mmcv':
return repo_name.upper()
elif lower.startswith('mm'):
return 'MM' + repo_name[2:]
return repo_name.capitalize()
def mkdir_or_exist(dir_name, mode=0o777):
if dir_name == '':
return
dir_name = osp.expanduser(dir_name)
os.makedirs(dir_name, mode=mode, exist_ok=True)
def parse_repo_name(repo_name):
proxy_names_rev = dict(zip(proxy_names.values(), proxy_names.keys()))
repo_name = proxy_names.get(repo_name, repo_name)
module_name = proxy_names_rev.get(repo_name, repo_name)
return repo_name, module_name
def git_pull_branch(repo_name, branch_name='', pulldir='.'):
mkdir_or_exist(pulldir)
exec_str = f'cd {pulldir};git init;git pull '
exec_str += f'{host_addr}/{repo_name}.git'
if branch_name:
exec_str += f' {branch_name}'
returncode = os.system(exec_str)
if returncode:
raise RuntimeError(
f'failed to get the remote repo, code: {returncode}')
def load_modules_from_dir(module_name, module_root, throw_error=False):
print(f'loading the {module_name} modules...')
# # install the dependencies
# if osp.exists(osp.join(pkg_dir, 'requirements.txt')):
# os.system('pip install -r requirements.txt')
# get all module list
module_list = []
error_dict = {}
module_root = osp.join(module_root, module_name)
assert osp.exists(module_root), \
f'cannot find the module root: {module_root}'
for _root, _dirs, _files in os.walk(module_root):
if (('__init__.py' not in _files)
and (osp.split(_root)[1] != '__pycache__')):
# add __init__.py file to the package
with open(osp.join(_root, '__init__.py'), 'w') as _:
pass
def _onerror(*args, **kwargs):
pass
for _finder, _name, _ispkg in pkgutil.walk_packages([module_root],
prefix=module_name +
'.',
onerror=_onerror):
try:
module = importlib.import_module(_name)
module_list.append(module)
except Exception as e:
if throw_error:
raise e
_error_msg = f'{type(e)}: {e}.'
print(f'cannot import the module: {_name} ({_error_msg})')
assert (_name not in error_dict), \
f'duplicate error name was found: {_name}'
error_dict[_name] = _error_msg
for module in module_list:
assert module.__file__.startswith(module_root), \
f'the importing path of package was wrong: {module.__file__}'
print('modules were loaded...')
return module_list, error_dict
def get_registries_from_modules(module_list):
registries = {}
objects_set = set()
# import the Registry class,
# import at the beginning is not allowed
# because it is not the temp package
from mmengine.registry import Registry
# only get the specific registries in module list
for module in module_list:
for obj_name in dir(module):
_obj = getattr(module, obj_name)
if isinstance(_obj, Registry):
objects_set.add(_obj)
for _obj in objects_set:
if _obj.scope not in registries:
registries[_obj.scope] = {}
registries_scope = registries[_obj.scope]
assert _obj.name not in registries_scope, \
f'multiple definition of {_obj.name} in registries'
registries_scope[_obj.name] = {
key: str(val)
for key, val in _obj.module_dict.items()
}
print('registries got...')
return registries
def merge_registries(src_dict, dst_dict):
assert type(src_dict) == type(dst_dict), \
(f'merge type is not supported: '
f'{type(dst_dict)} and {type(src_dict)}')
if isinstance(src_dict, str):
return
for _k, _v in dst_dict.items():
if (_k not in src_dict):
src_dict.update({_k: _v})
else:
assert isinstance(_v, (dict, str)) and \
isinstance(src_dict[_k], (dict, str)), \
'merge type is not supported: ' \
f'{type(_v)} and {type(src_dict[_k])}'
merge_registries(src_dict[_k], _v)
def exclude_registries(registries, exclude_key):
for _k in list(registries.keys()):
_v = registries[_k]
if isinstance(_v, str) and _v.startswith(exclude_key):
registries.pop(_k)
elif isinstance(_v, dict):
exclude_registries(_v, exclude_key)
def get_scripts_from_dir(root):
def _recurse(_dict, _chain):
if len(_chain) <= 1:
_dict[_chain[0]] = None
return
_key, *_chain = _chain
if _key not in _dict:
_dict[_key] = {}
_recurse(_dict[_key], _chain)
# find all scripts in the root directory. (not just ('.py', '.sh'))
# can not use the scandir function in mmengine to scan the dir,
# because mmengine import is not allowed before git pull
scripts = {}
for _subroot, _dirs, _files in os.walk(root):
for _file in _files:
_script = osp.join(osp.relpath(_subroot, root), _file)
_recurse(scripts, Path(_script).parts)
return scripts
def get_version_from_module_name(module_name, branch):
branch_str = str(branch) if branch is not None else ''
version_str = ''
try:
exec(f'import {module_name}')
_module = eval(f'{module_name}')
if hasattr(_module, '__version__'):
version_str = str(_module.__version__)
else:
version_str = branch_str
version_str = f' ({version_str})' if version_str else version_str
except (ImportError, AttributeError) as e:
print(f'can not get the version of module {module_name}: {e}')
return version_str
def print_tree(print_dict):
# recursive print the dict tree
def _recurse(_dict, _connector='', n=0):
assert isinstance(_dict, dict), 'recursive type must be dict'
tree = ''
for idx, (_key, _val) in enumerate(_dict.items()):
sub_tree = ''
_last = (idx == (len(_dict) - 1))
if isinstance(_val, str):
_key += f' ({_val})'
elif isinstance(_val, dict):
sub_tree = _recurse(_val,
_connector + (' ' if _last else ''),
n + 1)
else:
assert (_val is None), f'unknown print type {_val}'
tree += ' ' + _connector + \
('└─' if _last else '├─') + f'({n}) {_key}' + '\n'
tree += sub_tree
return tree
for _pname, _pdict in print_dict.items():
print('-' * 100)
print(f'{_pname}\n' + _recurse(_pdict))
def divide_list_into_groups(_array, _maxsize_per_group):
if not _array:
return _array
_groups = np.asarray(len(_array) / _maxsize_per_group)
if len(_array) % _maxsize_per_group:
_groups = np.floor(_groups) + 1
_groups = _groups.astype(int)
return np.array_split(_array, _groups)
def registries_to_html(registries, title=''):
max_col_per_row = 5
max_size_per_cell = 20
html = ''
table_data = []
# save repository registries
for registry_name, registry_dict in registries.items():
# filter the empty registries
if not registry_dict:
continue
registry_strings = []
if isinstance(registry_dict, dict):
registry_dict = list(registry_dict.keys())
elif isinstance(registry_dict, list):
pass
else:
raise TypeError(
f'unknown type of registry_dict {type(registry_dict)}')
for _k in registry_dict:
registry_strings.append(f'<li>{_k}</li>')
table_data.append((registry_name, registry_strings))
# sort the data list
table_data = sorted(table_data, key=lambda x: len(x[1]))
# split multi parts
table_data_multi_parts = []
for (registry_name, registry_strings) in table_data:
multi_parts = False
if len(registry_strings) > max_size_per_cell:
multi_parts = True
for cell_idx, registry_cell in enumerate(
divide_list_into_groups(registry_strings, max_size_per_cell)):
registry_str = ''.join(registry_cell.tolist())
registry_str = f'<ul>{registry_str}</ul>'
table_data_multi_parts.append([
registry_name if not multi_parts else
f'{registry_name} (part {cell_idx + 1})', registry_str
])
for table_data in divide_list_into_groups(table_data_multi_parts,
max_col_per_row):
table_data = list(zip(*table_data.tolist()))
html += dataframe_to_html(
pd.DataFrame([table_data[1]], columns=table_data[0]))
if html:
html = f'<div align=\'center\'><b>{title}</b></div>\n{html}'
html = f'<details open>{html}</details>\n'
return html
def tools_to_html(tools_dict, repo_name=''):
def _recurse(_dict, _connector, _result):
assert isinstance(_dict, dict), \
f'unknown recurse type: {_dict} ({type(_dict)})'
for _k, _v in _dict.items():
if _v is None:
if _connector not in _result:
_result[_connector] = []
_result[_connector].append(_k)
else:
_recurse(_v, osp.join(_connector, _k), _result)
table_data = {}
title = f'{capitalize(repo_name)} Tools'
_recurse(tools_dict, '', table_data)
return registries_to_html(table_data, title)
def dataframe_to_html(dataframe):
styler = dataframe.style
styler = styler.hide(axis='index')
styler = styler.format(na_rep='-')
styler = styler.set_properties(**{
'text-align': 'left',
'align': 'center',
'vertical-align': 'top'
})
styler = styler.set_table_styles([{
'selector':
'thead th',
'props':
'align:center;text-align:center;vertical-align:bottom'
}])
html = styler.to_html()
html = f'<div align=\'center\'>\n{html}</div>'
return html
def generate_markdown_by_repository(repo_name,
module_name,
branch,
pulldir,
throw_error=False):
# add the pull dir to the system path so that it can be found
if pulldir not in sys.path:
sys.path.insert(0, pulldir)
module_list, error_dict = load_modules_from_dir(
module_name, pulldir, throw_error=throw_error)
registries_tree = get_registries_from_modules(module_list)
if error_dict:
error_dict_name = 'error_modules'
assert (error_dict_name not in registries_tree), \
f'duplicate module name was found: {error_dict_name}'
registries_tree.update({error_dict_name: error_dict})
# get the tools files
for tools_name in tools_list:
assert (tools_name not in registries_tree), \
f'duplicate tools name was found: {tools_name}'
tools_tree = osp.join(pulldir, tools_name)
tools_tree = get_scripts_from_dir(tools_tree)
registries_tree.update({tools_name: tools_tree})
# print_tree(registries_tree)
# get registries markdown string
module_registries = registries_tree.get(module_name, {})
for merge_key in merge_module_keys.get(module_name, []):
merge_dict = registries_tree.get(merge_key, {})
merge_registries(module_registries, merge_dict)
for exclude_key in exclude_prefix.get(module_name, []):
exclude_registries(module_registries, exclude_key)
markdown_str = registries_to_html(
module_registries, title=f'{capitalize(repo_name)} Module Components')
# get tools markdown string
tools_registries = {}
for tools_name in tools_list:
tools_registries.update(
{tools_name: registries_tree.get(tools_name, {})})
markdown_str += tools_to_html(tools_registries, repo_name=repo_name)
version_str = get_version_from_module_name(module_name, branch)
title_str = f'\n\n## {capitalize(repo_name)}{version_str}\n'
# remove the pull dir from system path
if pulldir in sys.path:
sys.path.remove(pulldir)
return f'{title_str}{markdown_str}'
def parse_args():
parser = argparse.ArgumentParser(
description='print registries in openmmlab repositories')
parser.add_argument(
'-r',
'--repositories',
nargs='+',
default=['mmdet', 'mmcls', 'mmseg', 'mmengine', 'mmcv'],
type=str,
help='git repositories name in OpenMMLab')
parser.add_argument(
'-b',
'--branches',
nargs='+',
default=['3.x', '1.x', '1.x', 'main', '2.x'],
type=str,
help='the branch names of git repositories, the length of branches '
'must be same as the length of repositories')
parser.add_argument(
'-o', '--out', type=str, default='.', help='output path of the file')
parser.add_argument(
'--throw-error',
action='store_true',
default=False,
help='whether to throw error when trying to import modules')
args = parser.parse_args()
return args
# TODO: Refine
def main():
args = parse_args()
repositories = args.repositories
branches = args.branches
assert isinstance(repositories, list), \
'Type of repositories must be list'
if branches is None:
branches = [None] * len(repositories)
assert isinstance(branches, list) and \
len(branches) == len(repositories), \
'The length of branches must be same as ' \
'that of repositories'
assert isinstance(args.out, str), \
'The type of output path must be string'
# save path of file
mkdir_or_exist(args.out)
save_path = osp.join(args.out, 'registries_info.md')
with tempfile.TemporaryDirectory() as tmpdir:
# multi process init
pool = Pool(processes=len(repositories))
multi_proc_input_list = []
multi_proc_output_list = []
# get the git repositories
for branch, repository in zip(branches, repositories):
repo_name, module_name = parse_repo_name(repository)
pulldir = osp.join(tmpdir, f'tmp_{repo_name}')
git_pull_branch(
repo_name=repo_name, branch_name=branch, pulldir=pulldir)
multi_proc_input_list.append(
(repo_name, module_name, branch, pulldir, args.throw_error))
print('starting the multi process to get the registries')
for multi_proc_input in multi_proc_input_list:
multi_proc_output_list.append(
pool.apply_async(generate_markdown_by_repository,
multi_proc_input))
pool.close()
pool.join()
with open(save_path, 'w', encoding='utf-8') as fw:
fw.write(f'{markdown_title}\n')
for multi_proc_output in multi_proc_output_list:
markdown_str = multi_proc_output.get()
fw.write(f'{markdown_str}\n')
print(f'saved registries to the path: {save_path}')
if __name__ == '__main__':
main()

View File

@ -5,7 +5,7 @@ repos:
hooks:
- id: flake8
- repo: https://gitee.com/openmmlab/mirrors-isort
rev: 5.10.1
rev: 5.11.5
hooks:
- id: isort
- repo: https://gitee.com/openmmlab/mirrors-yapf

View File

@ -5,7 +5,7 @@ repos:
hooks:
- id: flake8
- repo: https://github.com/PyCQA/isort
rev: 5.10.1
rev: 5.11.5
hooks:
- id: isort
- repo: https://github.com/pre-commit/mirrors-yapf

View File

@ -6,4 +6,3 @@ python:
version: 3.7
install:
- requirements: requirements/docs.txt
- requirements: requirements/readthedocs.txt

195
README.md
View File

@ -1,5 +1,5 @@
<div align="center">
<img width="100%" src="https://user-images.githubusercontent.com/27466624/213130448-1f8529fd-2247-4ac4-851c-acd0148a49b9.png"/>
<img width="100%" src="https://user-images.githubusercontent.com/27466624/222385101-516e551c-49f5-480d-a135-4b24ee6dc308.png"/>
<div>&nbsp;</div>
<div align="center">
<b><font size="5">OpenMMLab website</font></b>
@ -21,13 +21,13 @@
[![PyPI](https://img.shields.io/pypi/v/mmyolo)](https://pypi.org/project/mmyolo)
[![docs](https://img.shields.io/badge/docs-latest-blue)](https://mmyolo.readthedocs.io/en/latest/)
[![deploy](https://github.com/open-mmlab/mmyolo/workflows/deploy/badge.svg)](https://github.com/open-mmlab/mmyolo/actions)
[![codecov](https://codecov.io/gh/open-mmlab/mmyolo/branch/master/graph/badge.svg)](https://codecov.io/gh/open-mmlab/mmyolo)
[![license](https://img.shields.io/github/license/open-mmlab/mmyolo.svg)](https://github.com/open-mmlab/mmyolo/blob/master/LICENSE)
[![codecov](https://codecov.io/gh/open-mmlab/mmyolo/branch/main/graph/badge.svg)](https://codecov.io/gh/open-mmlab/mmyolo)
[![license](https://img.shields.io/github/license/open-mmlab/mmyolo.svg)](https://github.com/open-mmlab/mmyolo/blob/main/LICENSE)
[![open issues](https://isitmaintained.com/badge/open/open-mmlab/mmyolo.svg)](https://github.com/open-mmlab/mmyolo/issues)
[![issue resolution](https://isitmaintained.com/badge/resolution/open-mmlab/mmyolo.svg)](https://github.com/open-mmlab/mmyolo/issues)
[📘Documentation](https://mmyolo.readthedocs.io/en/latest/) |
[🛠Installation](https://mmyolo.readthedocs.io/en/latest/get_started.html) |
[🛠Installation](https://mmyolo.readthedocs.io/en/latest/get_started/installation.html) |
[👀Model Zoo](https://mmyolo.readthedocs.io/en/latest/model_zoo.html) |
[🆕Update News](https://mmyolo.readthedocs.io/en/latest/notes/changelog.html) |
[🤔Reporting Issues](https://github.com/open-mmlab/mmyolo/issues/new/choose)
@ -40,6 +40,26 @@ English | [简体中文](README_zh-CN.md)
</div>
<div align="center">
<a href="https://openmmlab.medium.com/" style="text-decoration:none;">
<img src="https://user-images.githubusercontent.com/25839884/219255827-67c1a27f-f8c5-46a9-811d-5e57448c61d1.png" width="3%" alt="" /></a>
<img src="https://user-images.githubusercontent.com/25839884/218346358-56cc8e2f-a2b8-487f-9088-32480cceabcf.png" width="3%" alt="" />
<a href="https://discord.com/channels/1037617289144569886/1046608014234370059" style="text-decoration:none;">
<img src="https://user-images.githubusercontent.com/25839884/218347213-c080267f-cbb6-443e-8532-8e1ed9a58ea9.png" width="3%" alt="" /></a>
<img src="https://user-images.githubusercontent.com/25839884/218346358-56cc8e2f-a2b8-487f-9088-32480cceabcf.png" width="3%" alt="" />
<a href="https://twitter.com/OpenMMLab" style="text-decoration:none;">
<img src="https://user-images.githubusercontent.com/25839884/218346637-d30c8a0f-3eba-4699-8131-512fb06d46db.png" width="3%" alt="" /></a>
<img src="https://user-images.githubusercontent.com/25839884/218346358-56cc8e2f-a2b8-487f-9088-32480cceabcf.png" width="3%" alt="" />
<a href="https://www.youtube.com/openmmlab" style="text-decoration:none;">
<img src="https://user-images.githubusercontent.com/25839884/218346691-ceb2116a-465a-40af-8424-9f30d2348ca9.png" width="3%" alt="" /></a>
<img src="https://user-images.githubusercontent.com/25839884/218346358-56cc8e2f-a2b8-487f-9088-32480cceabcf.png" width="3%" alt="" />
<a href="https://space.bilibili.com/1293512903" style="text-decoration:none;">
<img src="https://user-images.githubusercontent.com/25839884/219026751-d7d14cce-a7c9-4e82-9942-8375fca65b99.png" width="3%" alt="" /></a>
<img src="https://user-images.githubusercontent.com/25839884/218346358-56cc8e2f-a2b8-487f-9088-32480cceabcf.png" width="3%" alt="" />
<a href="https://www.zhihu.com/people/openmmlab" style="text-decoration:none;">
<img src="https://user-images.githubusercontent.com/25839884/219026120-ba71e48b-6e94-4bd4-b4e9-b7d175b5e362.png" width="3%" alt="" /></a>
</div>
## 📄 Table of Contents
- [🥳 🚀 What's New](#--whats-new-)
@ -57,10 +77,17 @@ English | [简体中文](README_zh-CN.md)
## 🥳 🚀 What's New [🔝](#-table-of-contents)
💎 **v0.4.0** was released on 18/1/2023:
💎 **v0.5.0** was released on 2/3/2023:
1. Implemented [YOLOv8](https://github.com/open-mmlab/mmyolo/blob/dev/configs/yolov8/README.md) object detection model, and supports model deployment in [projects/easydeploy](https://github.com/open-mmlab/mmyolo/blob/dev/projects/easydeploy)
2. Added Chinese and English versions of [Algorithm principles and implementation with YOLOv8](https://github.com/open-mmlab/mmyolo/blob/dev/docs/en/algorithm_descriptions/yolov8_description.md)
1. Support [RTMDet-R](https://github.com/open-mmlab/mmyolo/blob/dev/configs/rtmdet/README.md#rotated-object-detection) rotated object detection
2. Support for using mask annotation to improve [YOLOv8](https://github.com/open-mmlab/mmyolo/blob/dev/configs/yolov8/README.md) object detection performance
3. Support [MMRazor](https://github.com/open-mmlab/mmyolo/blob/dev/configs/razor/subnets/README.md) searchable NAS sub-network as the backbone of YOLO series algorithm
4. Support calling [MMRazor](https://github.com/open-mmlab/mmyolo/blob/dev/configs/rtmdet/distillation/README.md) to distill the knowledge of RTMDet
5. [MMYOLO](https://mmyolo.readthedocs.io/zh_CN/dev/) document structure optimization, comprehensive content upgrade
6. Improve YOLOX mAP and training speed based on RTMDet training hyperparameters
7. Support calculation of model parameters and FLOPs, provide GPU latency data on T4 devices, and update [Model Zoo](https://github.com/open-mmlab/mmyolo/blob/dev/docs/en/model_zoo.md)
8. Support test-time augmentation (TTA)
9. Support RTMDet, YOLOv8 and YOLOv7 assigner visualization
For release history and update details, please refer to [changelog](https://mmyolo.readthedocs.io/en/latest/notes/changelog.html).
@ -82,7 +109,7 @@ We are excited to announce our latest work on real-time object recognition tasks
<img src="https://user-images.githubusercontent.com/12907710/208044554-1e8de6b5-48d8-44e4-a7b5-75076c7ebb71.png"/>
</div>
MMYOLO currently only implements the object detection algorithm, but it has a significant training acceleration compared to the MMDeteciton version. The training speed is 2.6 times faster than the previous version.
MMYOLO currently implements the object detection and rotated object detection algorithm, but it has a significant training acceleration compared to the MMDeteciton version. The training speed is 2.6 times faster than the previous version.
## 📖 Introduction [🔝](#-table-of-contents)
@ -109,21 +136,21 @@ The master branch works with **PyTorch 1.6+**.
<img src="https://user-images.githubusercontent.com/27466624/199999337-0544a4cb-3cbd-4f3e-be26-bcd9e74db7ff.jpg" alt="BaseModule-P5"/>
The figure above is contributed by RangeKing@GitHub, thank you very much!
And the figure of P6 model is in [model_design.md](docs/en/algorithm_descriptions/model_design.md).
And the figure of P6 model is in [model_design.md](docs/en/recommended_topics/model_design.md).
</details>
## 🛠️ Installation [🔝](#-table-of-contents)
MMYOLO relies on PyTorch, MMCV, MMEngine, and MMDetection. Below are quick steps for installation. Please refer to the [Install Guide](docs/en/get_started.md) for more detailed instructions.
MMYOLO relies on PyTorch, MMCV, MMEngine, and MMDetection. Below are quick steps for installation. Please refer to the [Install Guide](docs/en/get_started/installation.md) for more detailed instructions.
```shell
conda create -n open-mmlab python=3.8 pytorch==1.10.1 torchvision==0.11.2 cudatoolkit=11.3 -c pytorch -y
conda activate open-mmlab
conda create -n mmyolo python=3.8 pytorch==1.10.1 torchvision==0.11.2 cudatoolkit=11.3 -c pytorch -y
conda activate mmyolo
pip install openmim
mim install "mmengine>=0.3.1"
mim install "mmcv>=2.0.0rc1,<2.1.0"
mim install "mmdet>=3.0.0rc5,<3.1.0"
mim install "mmengine>=0.6.0"
mim install "mmcv>=2.0.0rc4,<2.1.0"
mim install "mmdet>=3.0.0rc6,<3.1.0"
git clone https://github.com/open-mmlab/mmyolo.git
cd mmyolo
# Install albumentations
@ -140,49 +167,125 @@ The usage of MMYOLO is almost identical to MMDetection and all tutorials are str
For different parts from MMDetection, we have also prepared user guides and advanced guides, please read our [documentation](https://mmyolo.readthedocs.io/zenh_CN/latest/).
- User Guides
<details>
<summary>Get Started</summary>
- [Train & Test](https://mmyolo.readthedocs.io/en/latest/user_guides/index.html#train-test)
- [Learn about Configs with YOLOv5](docs/en/user_guides/config.md)
- [From getting started to deployment](https://mmyolo.readthedocs.io/en/latest/user_guides/index.html#get-started-to-deployment)
- [Custom Dataset](docs/en/user_guides/custom_dataset.md)
- [From getting started to deployment with YOLOv5](docs/en/user_guides/yolov5_tutorial.md)
- [Useful Tools](https://mmdetection.readthedocs.io/en/latest/user_guides/index.html#useful-tools)
- [Visualization](docs/en/user_guides/visualization.md)
- [Useful Tools](docs/en/user_guides/useful_tools.md)
- [Overview](docs/en/get_started/overview.md)
- [Dependencies](docs/en/get_started/dependencies.md)
- [Installation](docs/en/get_started/installation.md)
- [15 minutes object detection](docs/en/get_started/15_minutes_object_detection.md)
- [15 minutes rotated object detection](docs/en/get_started/15_minutes_rotated_object_detection.md)
- [15 minutes instance segmentation](docs/en/get_started/15_minutes_instance_segmentation.md)
- [Resources summary](docs/en/get_started/article.md)
- Algorithm description
</details>
- [Essential Basics](https://mmyolo.readthedocs.io/en/latest/algorithm_descriptions/index.html#essential-basics)
- [Model design-related instructions](docs/en/algorithm_descriptions/model_design.md)
- [Algorithm principles and implementation](https://mmyolo.readthedocs.io/en/latest/algorithm_descriptions/index.html#algorithm-principles-and-implementation)
- [Algorithm principles and implementation with YOLOv5](docs/en/algorithm_descriptions/yolov5_description.md)
- [Algorithm principles and implementation with RTMDet](docs/en/algorithm_descriptions/rtmdet_description.md)
- [Algorithm principles and implementation with YOLOv8](docs/en/algorithm_descriptions/yolov8_description.md)
<details>
<summary>Recommended Topics</summary>
- Deployment Guides
- [How to contribute code to MMYOLO](docs/en/recommended_topics/contributing.md)
- [MMYOLO model design](docs/en/recommended_topics/model_design.md)
- [Algorithm principles and implementation](docs/en/recommended_topics/algorithm_descriptions/)
- [Replace the backbone network](docs/en/recommended_topics/replace_backbone.md)
- [MMYOLO model complexity analysis](docs/en/recommended_topics/complexity_analysis.md)
- [Annotation-to-deployment workflow for custom dataset](docs/en/recommended_topics/labeling_to_deployment_tutorials.md)
- [Visualization](docs/en/recommended_topics/visualization.md)
- [Model deployment](docs/en/recommended_topics/deploy/)
- [Troubleshooting steps](docs/en/recommended_topics/troubleshooting_steps.md)
- [MMYOLO industry examples](docs/en/recommended_topics/industry_examples.md)
- [MM series repo essential basics](docs/en/recommended_topics/mm_basics.md)
- [Dataset preparation and description](docs/en/recommended_topics/dataset_preparation.md)
- [Basic Deployment Guide](https://mmyolo.readthedocs.io/en/latest/deploy/index.html#basic-deployment-guide)
- [Basic Deployment Guide](docs/en/deploy/basic_deployment_guide.md)
- [Deployment Tutorial](https://mmyolo.readthedocs.io/en/latest/deploy/index.html#deployment-tutorial)
- [YOLOv5 Deployment](docs/en/deploy/yolov5_deployment.md)
</details>
- Advanced Guides
<details>
<summary>Common Usage</summary>
- [Data flow](docs/en/advanced_guides/data_flow.md)
- [How to](docs/en/advanced_guides/how_to.md)
- [Plugins](docs/en/advanced_guides/plugins.md)
- [Resume training](docs/en/common_usage/resume_training.md)
- [Enabling and disabling SyncBatchNorm](docs/en/common_usage/syncbn.md)
- [Enabling AMP](docs/en/common_usage/amp_training.md)
- [TTA Related Notes](docs/en/common_usage/tta.md)
- [Add plugins to the backbone network](docs/en/common_usage/plugins.md)
- [Freeze layers](docs/en/common_usage/freeze_layers.md)
- [Output model predictions](docs/en/common_usage/output_predictions.md)
- [Set random seed](docs/en/common_usage/set_random_seed.md)
- [Module combination](docs/en/common_usage/module_combination.md)
- [Cross-library calls using mim](docs/en/common_usage/mim_usage.md)
- [Apply multiple Necks](docs/en/common_usage/multi_necks.md)
- [Specify specific device training or inference](docs/en/common_usage/specify_device.md)
- [Single and multi-channel application examples](docs/en/common_usage/single_multi_channel_applications.md)
</details>
<details>
<summary>Useful Tools</summary>
- [Browse coco json](docs/en/useful_tools/browse_coco_json.md)
- [Browse dataset](docs/en/useful_tools/browse_dataset.md)
- [Print config](docs/en/useful_tools/print_config.md)
- [Dataset analysis](docs/en/useful_tools/dataset_analysis.md)
- [Optimize anchors](docs/en/useful_tools/optimize_anchors.md)
- [Extract subcoco](docs/en/useful_tools/extract_subcoco.md)
- [Visualization scheduler](docs/en/useful_tools/vis_scheduler.md)
- [Dataset converters](docs/en/useful_tools/dataset_converters.md)
- [Download dataset](docs/en/useful_tools/download_dataset.md)
- [Log analysis](docs/en/useful_tools/log_analysis.md)
- [Model converters](docs/en/useful_tools/model_converters.md)
</details>
<details>
<summary>Basic Tutorials</summary>
- [Learn about configs with YOLOv5](docs/en/tutorials/config.md)
- [Data flow](docs/en/tutorials/data_flow.md)
- [Rotated detection](docs/en/tutorials/rotated_detection.md)
- [Custom Installation](docs/en/tutorials/custom_installation.md)
- [Common Warning Notes](docs/zh_cn/tutorials/warning_notes.md)
- [FAQ](docs/en/tutorials/faq.md)
</details>
<details>
<summary>Advanced Tutorials</summary>
- [MMYOLO cross-library application](docs/en/advanced_guides/cross-library_application.md)
</details>
<details>
<summary>Descriptions</summary>
- [Changelog](docs/en/notes/changelog.md)
- [Compatibility](docs/en/notes/compatibility.md)
- [Conventions](docs/en/notes/conventions.md)
- [Code Style](docs/en/notes/code_style.md)
</details>
## 📊 Overview of Benchmark and Model Zoo [🔝](#-table-of-contents)
<div align=center>
<img src="https://user-images.githubusercontent.com/17425982/222087414-168175cc-dae6-4c5c-a8e3-3109a152dd19.png"/>
</div>
Results and models are available in the [model zoo](docs/en/model_zoo.md).
<details open>
<summary><b>Supported Tasks</b></summary>
- [x] Object detection
- [x] Rotated object detection
</details>
<details open>
<summary><b>Supported Algorithms</b></summary>
- [x] [YOLOv5](configs/yolov5)
- [x] [YOLOX](configs/yolox)
- [x] [RTMDet](configs/rtmdet)
- [x] [RTMDet-Rotated](configs/rtmdet)
- [x] [YOLOv6](configs/yolov6)
- [x] [YOLOv7](configs/yolov7)
- [x] [PPYOLOE](configs/ppyoloe)
@ -190,6 +293,16 @@ Results and models are available in the [model zoo](docs/en/model_zoo.md).
</details>
<details open>
<summary><b>Supported Datasets</b></summary>
- [x] COCO Dataset
- [x] VOC Dataset
- [x] CrowdHuman Dataset
- [x] DOTA 1.0 Dataset
</details>
<details open>
<div align="center">
<b>Module Components</b>
@ -256,7 +369,7 @@ Results and models are available in the [model zoo](docs/en/model_zoo.md).
## ❓ FAQ [🔝](#-table-of-contents)
Please refer to the [FAQ](docs/en/notes/faq.md) for frequently asked questions.
Please refer to the [FAQ](docs/en/tutorials/faq.md) for frequently asked questions.
## 🙌 Contributing [🔝](#-table-of-contents)

View File

@ -1,5 +1,5 @@
<div align="center">
<img src="https://user-images.githubusercontent.com/27466624/213156908-cef7cc50-97d1-4e0a-9e06-309bd0a49173.png" width="100%"/>
<img src="https://user-images.githubusercontent.com/27466624/222385182-1247251c-8fac-4e77-94f5-57580e0ce3bd.png" width="100%"/>
<div>&nbsp;</div>
<div align="center">
<b><font size="5">OpenMMLab 官网</font></b>
@ -19,17 +19,17 @@
<div>&nbsp;</div>
[![PyPI](https://img.shields.io/pypi/v/mmyolo)](https://pypi.org/project/mmyolo)
[![docs](https://img.shields.io/badge/docs-latest-blue)](https://mmyolo.readthedocs.io/en/latest/)
[![docs](https://img.shields.io/badge/docs-latest-blue)](https://mmyolo.readthedocs.io/zh_CN/latest/)
[![deploy](https://github.com/open-mmlab/mmyolo/workflows/deploy/badge.svg)](https://github.com/open-mmlab/mmyolo/actions)
[![codecov](https://codecov.io/gh/open-mmlab/mmyolo/branch/master/graph/badge.svg)](https://codecov.io/gh/open-mmlab/mmyolo)
[![license](https://img.shields.io/github/license/open-mmlab/mmyolo.svg)](https://github.com/open-mmlab/mmyolo/blob/master/LICENSE)
[![codecov](https://codecov.io/gh/open-mmlab/mmyolo/branch/main/graph/badge.svg)](https://codecov.io/gh/open-mmlab/mmyolo)
[![license](https://img.shields.io/github/license/open-mmlab/mmyolo.svg)](https://github.com/open-mmlab/mmyolo/blob/main/LICENSE)
[![open issues](https://isitmaintained.com/badge/open/open-mmlab/mmyolo.svg)](https://github.com/open-mmlab/mmyolo/issues)
[![issue resolution](https://isitmaintained.com/badge/resolution/open-mmlab/mmyolo.svg)](https://github.com/open-mmlab/mmyolo/issues)
[📘使用文档](https://mmyolo.readthedocs.io/zh_CN/latest/) |
[🛠️安装教程](https://mmyolo.readthedocs.io/zh_CN/latest/get_started.html) |
[🛠️安装教程](https://mmyolo.readthedocs.io/zh_CN/latest/get_started/installation.html) |
[👀模型库](https://mmyolo.readthedocs.io/zh_CN/latest/model_zoo.html) |
[🆕更新日志](https://mmyolo.readthedocs.io/en/latest/notes/changelog.html) |
[🆕更新日志](https://mmyolo.readthedocs.io/zh_CN/latest/notes/changelog.html) |
[🤔报告问题](https://github.com/open-mmlab/mmyolo/issues/new/choose)
</div>
@ -40,6 +40,26 @@
</div>
<div align="center">
<a href="https://openmmlab.medium.com/" style="text-decoration:none;">
<img src="https://user-images.githubusercontent.com/25839884/219255827-67c1a27f-f8c5-46a9-811d-5e57448c61d1.png" width="3%" alt="" /></a>
<img src="https://user-images.githubusercontent.com/25839884/218346358-56cc8e2f-a2b8-487f-9088-32480cceabcf.png" width="3%" alt="" />
<a href="https://discord.com/channels/1037617289144569886/1046608014234370059" style="text-decoration:none;">
<img src="https://user-images.githubusercontent.com/25839884/218347213-c080267f-cbb6-443e-8532-8e1ed9a58ea9.png" width="3%" alt="" /></a>
<img src="https://user-images.githubusercontent.com/25839884/218346358-56cc8e2f-a2b8-487f-9088-32480cceabcf.png" width="3%" alt="" />
<a href="https://twitter.com/OpenMMLab" style="text-decoration:none;">
<img src="https://user-images.githubusercontent.com/25839884/218346637-d30c8a0f-3eba-4699-8131-512fb06d46db.png" width="3%" alt="" /></a>
<img src="https://user-images.githubusercontent.com/25839884/218346358-56cc8e2f-a2b8-487f-9088-32480cceabcf.png" width="3%" alt="" />
<a href="https://www.youtube.com/openmmlab" style="text-decoration:none;">
<img src="https://user-images.githubusercontent.com/25839884/218346691-ceb2116a-465a-40af-8424-9f30d2348ca9.png" width="3%" alt="" /></a>
<img src="https://user-images.githubusercontent.com/25839884/218346358-56cc8e2f-a2b8-487f-9088-32480cceabcf.png" width="3%" alt="" />
<a href="https://space.bilibili.com/1293512903" style="text-decoration:none;">
<img src="https://user-images.githubusercontent.com/25839884/219026751-d7d14cce-a7c9-4e82-9942-8375fca65b99.png" width="3%" alt="" /></a>
<img src="https://user-images.githubusercontent.com/25839884/218346358-56cc8e2f-a2b8-487f-9088-32480cceabcf.png" width="3%" alt="" />
<a href="https://www.zhihu.com/people/openmmlab" style="text-decoration:none;">
<img src="https://user-images.githubusercontent.com/25839884/219026120-ba71e48b-6e94-4bd4-b4e9-b7d175b5e362.png" width="3%" alt="" /></a>
</div>
## 📄 Table of Contents
- [🥳 🚀 最新进展](#--最新进展-)
@ -58,10 +78,17 @@
## 🥳 🚀 最新进展 [🔝](#-table-of-contents)
💎 **v0.4.0** 版本已经在 2023.1.18 发布:
💎 **v0.5.0** 版本已经在 2023.3.2 发布:
1. 实现了 [YOLOv8](https://github.com/open-mmlab/mmyolo/blob/dev/configs/yolov8/README.md) 目标检测模型,并通过 [projects/easydeploy](https://github.com/open-mmlab/mmyolo/blob/dev/projects/easydeploy) 支持了模型部署
2. 新增了中英文版本的 [YOLOv8 原理和实现全解析文档](https://github.com/open-mmlab/mmyolo/blob/dev/docs/zh_cn/algorithm_descriptions/yolov8_description.md)
1. 支持了 [RTMDet-R](https://github.com/open-mmlab/mmyolo/blob/dev/configs/rtmdet/README.md#rotated-object-detection) 旋转框目标检测任务和算法
2. [YOLOv8](https://github.com/open-mmlab/mmyolo/blob/dev/configs/yolov8/README.md) 支持使用 mask 标注提升目标检测模型性能
3. 支持 [MMRazor](https://github.com/open-mmlab/mmyolo/blob/dev/configs/razor/subnets/README.md) 搜索的 NAS 子网络作为 YOLO 系列算法的 backbone
4. 支持调用 [MMRazor](https://github.com/open-mmlab/mmyolo/blob/dev/configs/rtmdet/distillation/README.md) 对 RTMDet 进行知识蒸馏
5. [MMYOLO](https://mmyolo.readthedocs.io/zh_CN/dev/) 文档结构优化,内容全面升级
6. 基于 RTMDet 训练超参提升 YOLOX 精度和训练速度
7. 支持模型参数量、FLOPs 计算和提供 T4 设备上 GPU 延时数据,并更新了 [Model Zoo](https://github.com/open-mmlab/mmyolo/blob/dev/docs/zh_cn/model_zoo.md)
8. 支持测试时增强 TTA
9. 支持 RTMDet、YOLOv8 和 YOLOv7 assigner 可视化
我们提供了实用的**脚本命令速查表**
@ -103,7 +130,7 @@
<img src="https://user-images.githubusercontent.com/12907710/208044554-1e8de6b5-48d8-44e4-a7b5-75076c7ebb71.png"/>
</div>
MMYOLO 中目前仅仅实现了目标检测算法,但是相比 MMDeteciton 版本有显著训练加速,训练速度相比原先版本提升 2.6 倍。
MMYOLO 中目前实现了目标检测和旋转框目标检测算法,但是相比 MMDeteciton 版本有显著训练加速,训练速度相比原先版本提升 2.6 倍。
## 📖 简介 [🔝](#-table-of-contents)
@ -130,21 +157,21 @@ MMYOLO 是一个基于 PyTorch 和 MMDetection 的 YOLO 系列算法开源工具
<img src="https://user-images.githubusercontent.com/27466624/199999337-0544a4cb-3cbd-4f3e-be26-bcd9e74db7ff.jpg" alt="基类-P5"/>
图为 RangeKing@GitHub 提供,非常感谢!
P6 模型图详见 [model_design.md](docs/zh_CN/algorithm_descriptions/model_design.md)。
P6 模型图详见 [model_design.md](docs/zh_cn/recommended_topics/model_design.md)。
</details>
## 🛠️ 安装 [🔝](#-table-of-contents)
MMYOLO 依赖 PyTorch, MMCV, MMEngine 和 MMDetection以下是安装的简要步骤。 更详细的安装指南请参考[安装文档](docs/zh_cn/get_started.md)。
MMYOLO 依赖 PyTorch, MMCV, MMEngine 和 MMDetection以下是安装的简要步骤。 更详细的安装指南请参考[安装文档](docs/zh_cn/get_started/installation.md)。
```shell
conda create -n open-mmlab python=3.8 pytorch==1.10.1 torchvision==0.11.2 cudatoolkit=11.3 -c pytorch -y
conda activate open-mmlab
conda create -n mmyolo python=3.8 pytorch==1.10.1 torchvision==0.11.2 cudatoolkit=11.3 -c pytorch -y
conda activate mmyolo
pip install openmim
mim install "mmengine>=0.3.1"
mim install "mmcv>=2.0.0rc1,<2.1.0"
mim install "mmdet>=3.0.0rc5,<3.1.0"
mim install "mmengine>=0.6.0"
mim install "mmcv>=2.0.0rc4,<2.1.0"
mim install "mmdet>=3.0.0rc6,<3.1.0"
git clone https://github.com/open-mmlab/mmyolo.git
cd mmyolo
# Install albumentations
@ -161,53 +188,126 @@ MMYOLO 用法和 MMDetection 几乎一致,所有教程都是通用的,你也
针对和 MMDetection 不同的部分,我们也准备了用户指南和进阶指南,请阅读我们的 [文档](https://mmyolo.readthedocs.io/zh_CN/latest/) 。
- 用户指南
<details>
<summary>开启 MMYOLO 之旅</summary>
- [训练 & 测试](https://mmyolo.readthedocs.io/zh_CN/latest/user_guides/index.html#训练-测试)
- [学习 YOLOv5 配置文件](docs/zh_cn/user_guides/config.md)
- [从入门到部署全流程](https://mmyolo.readthedocs.io/zh_CN/latest/user_guides/index.html#从入门到部署全流程)
- [自定义数据集](docs/zh_cn/user_guides/custom_dataset.md)
- [YOLOv5 从入门到部署全流程](docs/zh_cn/user_guides/yolov5_tutorial.md)
- [实用工具](https://mmyolo.readthedocs.io/zh_CN/latest/user_guides/index.html#实用工具)
- [可视化教程](docs/zh_cn/user_guides/visualization.md)
- [实用工具](docs/zh_cn/user_guides/useful_tools.md)
- [概述](docs/zh_cn/get_started/overview.md)
- [依赖](docs/zh_cn/get_started/dependencies.md)
- [安装和验证](docs/zh_cn/get_started/installation.md)
- [15 分钟上手 MMYOLO 目标检测](docs/zh_cn/get_started/15_minutes_object_detection.md)
- [15 分钟上手 MMYOLO 旋转框目标检测](docs/zh_cn/get_started/15_minutes_rotated_object_detection.md)
- [15 分钟上手 MMYOLO 实例分割](docs/zh_cn/get_started/15_minutes_instance_segmentation.md)
- [中文解读资源汇总](docs/zh_cn/get_started/article.md)
- 算法描述
</details>
- [必备基础](https://mmyolo.readthedocs.io/zh_CN/latest/algorithm_descriptions/index.html#基础内容)
- [模型设计相关说明](docs/zh_cn/algorithm_descriptions/model_design.md)
- [算法原理和实现全解析](https://mmyolo.readthedocs.io/zh_CN/latest/algorithm_descriptions/index.html#算法原理和实现全解析)
- [YOLOv5 原理和实现全解析](docs/zh_cn/algorithm_descriptions/yolov5_description.md)
- [YOLOv6 原理和实现全解析](docs/zh_cn/algorithm_descriptions/yolov6_description.md)
- [RTMDet 原理和实现全解析](docs/zh_cn/algorithm_descriptions/rtmdet_description.md)
- [YOLOv8 原理和实现全解析](docs/zh_cn/algorithm_descriptions/yolov8_description.md)
<details>
<summary>推荐专题</summary>
- 算法部署
- [如何给 MMYOLO 贡献代码](docs/zh_cn/recommended_topics/contributing.md)
- [MMYOLO 模型结构设计](docs/zh_cn/recommended_topics/model_design.md)
- [原理和实现全解析](docs/zh_cn/recommended_topics/algorithm_descriptions/)
- [轻松更换主干网络](docs/zh_cn/recommended_topics/replace_backbone.md)
- [MMYOLO 模型复杂度分析](docs/zh_cn/recommended_topics/complexity_analysis.md)
- [标注+训练+测试+部署全流程](docs/zh_cn/recommended_topics/labeling_to_deployment_tutorials.md)
- [关于可视化的一切](docs/zh_cn/recommended_topics/visualization.md)
- [模型部署流程](docs/zh_cn/recommended_topics/deploy/)
- [常见错误排查步骤](docs/zh_cn/recommended_topics/troubleshooting_steps.md)
- [MMYOLO 产业范例介绍](docs/zh_cn/recommended_topics/industry_examples.md)
- [MM 系列 Repo 必备基础](docs/zh_cn/recommended_topics/mm_basics.md)
- [数据集准备和说明](docs/zh_cn/recommended_topics/dataset_preparation.md)
- [部署必备教程](https://mmyolo.readthedocs.io/zh_CN/latest/deploy/index.html#id1)
- [部署必备教程](docs/zh_cn/deploy/basic_deployment_guide.md)
- [部署全流程说明](https://mmyolo.readthedocs.io/zh_CN/latest/deploy/index.html#id2)
- [YOLOv5 部署全流程说明](docs/zh_cn/deploy/yolov5_deployment.md)
</details>
- 进阶指南
<details>
<summary>常用功能</summary>
- [模块组合](docs/zh_cn/advanced_guides/module_combination.md)
- [数据流](docs/zh_cn/advanced_guides/data_flow.md)
- [How to](docs/zh_cn/advanced_guides/how_to.md)
- [插件](docs/zh_cn/advanced_guides/plugins.md)
- [恢复训练](docs/zh_cn/common_usage/resume_training.md)
- [开启和关闭 SyncBatchNorm](docs/zh_cn/common_usage/syncbn.md)
- [开启混合精度训练](docs/zh_cn/common_usage/amp_training.md)
- [测试时增强相关说明](docs/zh_cn/common_usage/tta.md)
- [给主干网络增加插件](docs/zh_cn/common_usage/plugins.md)
- [冻结指定网络层权重](docs/zh_cn/common_usage/common_usage/freeze_layers.md)
- [输出模型预测结果](docs/zh_cn/common_usage/output_predictions.md)
- [设置随机种子](docs/zh_cn/common_usage/set_random_seed.md)
- [算法组合替换教程](docs/zh_cn/common_usage/module_combination.md)
- [使用 mim 跨库调用其他 OpenMMLab 仓库的脚本](docs/zh_cn/common_usage/mim_usage.md)
- [应用多个 Neck](docs/zh_cn/common_usage/multi_necks.md)
- [指定特定设备训练或推理](docs/zh_cn/common_usage/specify_device.md)
- [单通道和多通道应用案例](docs/zh_cn/common_usage/single_multi_channel_applications.md)
- [MM 系列开源库注册表](docs/zh_cn/common_usage/registries_info.md)
- [解读文章和资源汇总](docs/zh_cn/article.md)
</details>
<details>
<summary>实用工具</summary>
- [可视化 COCO 标签](docs/zh_cn/useful_tools/browse_coco_json.md)
- [可视化数据集](docs/zh_cn/useful_tools/browse_dataset.md)
- [打印完整配置文件](docs/zh_cn/useful_tools/print_config.md)
- [可视化数据集分析结果](docs/zh_cn/useful_tools/dataset_analysis.md)
- [优化锚框尺寸](docs/zh_cn/useful_tools/optimize_anchors.md)
- [提取 COCO 子集](docs/zh_cn/useful_tools/extract_subcoco.md)
- [可视化优化器参数策略](docs/zh_cn/useful_tools/vis_scheduler.md)
- [数据集转换](docs/zh_cn/useful_tools/dataset_converters.md)
- [数据集下载](docs/zh_cn/useful_tools/download_dataset.md)
- [日志分析](docs/zh_cn/useful_tools/log_analysis.md)
- [模型转换](docs/zh_cn/useful_tools/model_converters.md)
</details>
<details>
<summary>基础教程</summary>
- [学习 YOLOv5 配置文件](docs/zh_cn/tutorials/config.md)
- [数据流](docs/zh_cn/tutorials/data_flow.md)
- [旋转目标检测](docs/zh_cn/tutorials/rotated_detection.md)
- [自定义安装](docs/zh_cn/tutorials/custom_installation.md)
- [常见警告说明](docs/zh_cn/tutorials/warning_notes.md)
- [常见问题](docs/zh_cn/tutorials/faq.md)
</details>
<details>
<summary>进阶教程</summary>
- [MMYOLO 跨库应用解析](docs/zh_cn/advanced_guides/cross-library_application.md)
</details>
<details>
<summary>说明</summary>
- [更新日志](docs/zh_cn/notes/changelog.md)
- [兼容性说明](docs/zh_cn/notes/compatibility.md)
- [默认约定](docs/zh_cn/notes/conventions.md)
- [代码规范](docs/zh_cn/notes/code_style.md)
</details>
## 📊 基准测试和模型库 [🔝](#-table-of-contents)
<div align=center>
<img src="https://user-images.githubusercontent.com/17425982/222087414-168175cc-dae6-4c5c-a8e3-3109a152dd19.png"/>
</div>
测试结果和模型可以在 [模型库](docs/zh_cn/model_zoo.md) 中找到。
<details open>
<summary><b>支持的任务</b></summary>
- [x] 目标检测
- [x] 旋转框目标检测
</details>
<details open>
<summary><b>支持的算法</b></summary>
- [x] [YOLOv5](configs/yolov5)
- [x] [YOLOX](configs/yolox)
- [x] [RTMDet](configs/rtmdet)
- [x] [RTMDet-Rotated](configs/rtmdet)
- [x] [YOLOv6](configs/yolov6)
- [x] [YOLOv7](configs/yolov7)
- [x] [PPYOLOE](configs/ppyoloe)
@ -215,6 +315,16 @@ MMYOLO 用法和 MMDetection 几乎一致,所有教程都是通用的,你也
</details>
<details open>
<summary><b>支持的数据集</b></summary>
- [x] COCO Dataset
- [x] VOC Dataset
- [x] CrowdHuman Dataset
- [x] DOTA 1.0 Dataset
</details>
<details open>
<div align="center">
<b>模块组件</b>
@ -281,7 +391,7 @@ MMYOLO 用法和 MMDetection 几乎一致,所有教程都是通用的,你也
## ❓ 常见问题 [🔝](#-table-of-contents)
请参考 [FAQ](docs/zh_cn/notes/faq.md) 了解其他用户的常见问题。
请参考 [FAQ](docs/zh_cn/tutorials/faq.md) 了解其他用户的常见问题。
## 🙌 贡献指南 [🔝](#-table-of-contents)

View File

@ -0,0 +1,57 @@
# TODO: Need to solve the problem of multiple file_client_args parameters
# _file_client_args = dict(
# backend='petrel',
# path_mapping=dict({
# './data/': 's3://openmmlab/datasets/detection/',
# 'data/': 's3://openmmlab/datasets/detection/'
# }))
_file_client_args = dict(backend='disk')
tta_model = dict(
type='mmdet.DetTTAModel',
tta_cfg=dict(nms=dict(type='nms', iou_threshold=0.65), max_per_img=300))
img_scales = [(640, 640), (320, 320), (960, 960)]
# LoadImageFromFile
# / | \
# (RatioResize,LetterResize) (RatioResize,LetterResize) (RatioResize,LetterResize) # noqa
# / \ / \ / \
# RandomFlip RandomFlip RandomFlip RandomFlip RandomFlip RandomFlip # noqa
# | | | | | |
# LoadAnn LoadAnn LoadAnn LoadAnn LoadAnn LoadAnn
# | | | | | |
# PackDetIn PackDetIn PackDetIn PackDetIn PackDetIn PackDetIn # noqa
_multiscale_resize_transforms = [
dict(
type='Compose',
transforms=[
dict(type='YOLOv5KeepRatioResize', scale=s),
dict(
type='LetterResize',
scale=s,
allow_scale_up=False,
pad_val=dict(img=114))
]) for s in img_scales
]
tta_pipeline = [
dict(type='LoadImageFromFile', file_client_args=_file_client_args),
dict(
type='TestTimeAug',
transforms=[
_multiscale_resize_transforms,
[
dict(type='mmdet.RandomFlip', prob=1.),
dict(type='mmdet.RandomFlip', prob=0.)
], [dict(type='mmdet.LoadAnnotations', with_bbox=True)],
[
dict(
type='mmdet.PackDetInputs',
meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
'scale_factor', 'pad_param', 'flip',
'flip_direction'))
]
])
]

View File

@ -0,0 +1,56 @@
# Compared to other same scale models, this configuration consumes too much
# GPU memory and is not validated for now
_base_ = 'ppyoloe_plus_s_fast_8xb8-80e_coco.py'
data_root = './data/cat/'
class_name = ('cat', )
num_classes = len(class_name)
metainfo = dict(classes=class_name, palette=[(20, 220, 60)])
num_last_epochs = 5
max_epochs = 40
train_batch_size_per_gpu = 12
train_num_workers = 2
load_from = 'https://download.openmmlab.com/mmyolo/v0/ppyoloe/ppyoloe_plus_s_fast_8xb8-80e_coco/ppyoloe_plus_s_fast_8xb8-80e_coco_20230101_154052-9fee7619.pth' # noqa
model = dict(
backbone=dict(frozen_stages=4),
bbox_head=dict(head_module=dict(num_classes=num_classes)),
train_cfg=dict(
initial_assigner=dict(num_classes=num_classes),
assigner=dict(num_classes=num_classes)))
train_dataloader = dict(
batch_size=train_batch_size_per_gpu,
num_workers=train_num_workers,
dataset=dict(
data_root=data_root,
metainfo=metainfo,
ann_file='annotations/trainval.json',
data_prefix=dict(img='images/')))
val_dataloader = dict(
dataset=dict(
metainfo=metainfo,
data_root=data_root,
ann_file='annotations/test.json',
data_prefix=dict(img='images/')))
test_dataloader = val_dataloader
default_hooks = dict(
param_scheduler=dict(
warmup_min_iter=10,
warmup_epochs=3,
total_epochs=int(max_epochs * 1.2)))
val_evaluator = dict(ann_file=data_root + 'annotations/test.json')
test_evaluator = val_evaluator
default_hooks = dict(
checkpoint=dict(interval=10, max_keep_ckpts=2, save_best='auto'),
logger=dict(type='LoggerHook', interval=5))
train_cfg = dict(max_epochs=max_epochs, val_interval=10)
# visualizer = dict(vis_backends = [dict(type='LocalVisBackend'), dict(type='WandbVisBackend')]) # noqa

View File

@ -1,4 +1,4 @@
_base_ = '../_base_/default_runtime.py'
_base_ = ['../_base_/default_runtime.py', '../_base_/det_p5_tta.py']
# dataset settings
data_root = 'data/coco/'

View File

@ -0,0 +1,79 @@
# Projecs Based on MMRazor
There are many research works and pre-trained models built on MMRazor. We list some of them as examples of how to use MMRazor slimmable models for downstream frameworks. As the page might not be completed, please feel free to contribute more efficient mmrazor-models to update this page.
## Description
This is an implementation of MMRazor Searchable Backbone Application, we provide detection configs and models for MMRazor in MMYOLO.
### Backbone support
Here are the Neural Architecture Search(NAS) Models that come from MMRazor which support YOLO Series. If you are looking for MMRazor models only for Backbone, you could refer to MMRazor [ModelZoo](https://github.com/open-mmlab/mmrazor/blob/dev-1.x/docs/en/get_started/model_zoo.md) and corresponding repository.
- [x] [AttentiveMobileNetV3](https://github.com/open-mmlab/mmrazor/blob/dev-1.x/configs/_base_/nas_backbones/attentive_mobilenetv3_supernet.py)
- [x] [SearchableShuffleNetV2](https://github.com/open-mmlab/mmrazor/blob/dev-1.x/configs/_base_/nas_backbones/spos_shufflenet_supernet.py)
- [x] [SearchableMobileNetV2](https://github.com/open-mmlab/mmrazor/blob/dev-1.x/configs/_base_/nas_backbones/spos_mobilenet_supernet.py)
## Usage
### Prerequisites
- [MMRazor v1.0.0rc2](https://github.com/open-mmlab/mmrazor/tree/v1.0.0rc2) or higher (dev-1.x)
Install MMRazor using MIM.
```shell
mim install mmengine
mim install "mmrazor>=1.0.0rc2"
```
Install MMRazor from source
```
git clone -b dev-1.x https://github.com/open-mmlab/mmrazor.git
cd mmrazor
# Install MMRazor
mim install -v -e .
```
### Training commands
In MMYOLO's root directory, if you want to use single GPU for training, run the following command to train the model:
```bash
CUDA_VISIBLE_DEVICES=0 PORT=29500 ./tools/dist_train.sh configs/razor/subnets/yolov5_s_spos_shufflenetv2_syncbn_8xb16-300e_coco.py
```
If you want to use several of these GPUs to train in parallel, you can use the following command:
```bash
CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 PORT=29500 ./tools/dist_train.sh configs/razor/subnets/yolov5_s_spos_shufflenetv2_syncbn_8xb16-300e_coco.py
```
### Testing commands
In MMYOLO's root directory, run the following command to test the model:
```bash
CUDA_VISIBLE_DEVICES=0 PORT=29500 ./tools/dist_test.sh configs/razor/subnets/yolov5_s_spos_shufflenetv2_syncbn_8xb16-300e_coco.py ${CHECKPOINT_PATH}
```
## Results and Models
Here we provide the baseline version of YOLO Series with NAS backbone.
| Model | size | box AP | Params(M) | FLOPs(G) | Config | Download |
| :------------------------: | :--: | :----: | :----------: | :------: | :---------------------------------------------------------------------------------------------------------------------------------------: | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
| yolov5-s | 640 | 37.7 | 7.235 | 8.265 | [config](https://github.com/open-mmlab/mmyolo/blob/main/configs/yolov5/yolov5_s-v61_syncbn_fast_8xb16-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_s-v61_syncbn_fast_8xb16-300e_coco/yolov5_s-v61_syncbn_fast_8xb16-300e_coco_20220918_084700-86e02187.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_s-v61_syncbn_fast_8xb16-300e_coco/yolov5_s-v61_syncbn_fast_8xb16-300e_coco_20220918_084700.log.json) |
| yolov5_s_spos_shufflenetv2 | 640 | 38.0 | 7.04(-2.7%) | 7.03 | [config](https://github.com/open-mmlab/mmyolo/tree/dev/configs/razor/subnets/yolov5_s_spos_shufflenetv2_syncbn_8xb16-300e_coco.py) | [model](https://download.openmmlab.com/mmrazor/v1/yolo_nas_backbone/yolov5_s_spos_shufflenetv2_syncbn_8xb16-300e_coco_20230211_220635-578be9a9.pth) \| log |
| yolov6-s | 640 | 44.0 | 18.869 | 24.253 | [config](https://github.com/open-mmlab/mmyolo/blob/main/configs/yolov6/yolov6_s_syncbn_fast_8xb32-400e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov6/yolov6_s_syncbn_fast_8xb32-400e_coco/yolov6_s_syncbn_fast_8xb32-400e_coco_20221102_203035-932e1d91.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov6/yolov6_s_syncbn_fast_8xb32-400e_coco/yolov6_s_syncbn_fast_8xb32-400e_coco_20221102_203035.log.json) |
| yolov6_l_attentivenas_a6 | 640 | 45.3 | 18.38(-2.6%) | 8.49 | [config](https://github.com/open-mmlab/mmyolo/tree/dev/configs/razor/subnets/yolov6_l_attentivenas_a6_d12_syncbn_fast_8xb32-300e_coco.py) | [model](https://download.openmmlab.com/mmrazor/v1/yolo_nas_backbone/yolov6_l_attentivenas_a6_d12_syncbn_fast_8xb32-300e_coco_20230211_222409-dcc72668.pth) \| log |
| RTMDet-tiny | 640 | 41.0 | 4.8 | 8.1 | [config](./rtmdet_l_syncbn_fast_8xb32-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_tiny_syncbn_fast_8xb32-300e_coco/rtmdet_tiny_syncbn_fast_8xb32-300e_coco_20230102_140117-dbb1dc83.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_tiny_syncbn_fast_8xb32-300e_coco/rtmdet_tiny_syncbn_fast_8xb32-300e_coco_20230102_140117.log.json) |
| rtmdet_tiny_ofa_lat31 | 960 | 41.3 | 3.91(-18.5%) | 6.09 | [config](https://github.com/open-mmlab/mmyolo/tree/dev/configs/razor/subnets/rtmdet_tiny_ofa_lat31_syncbn_16xb16-300e_coco.py) | [model](https://download.openmmlab.com/mmrazor/v1/yolo_nas_backbone/rtmdet_tiny_ofa_lat31_syncbn_16xb16-300e_coco_20230214_210623-449bb2a0.pth) \| log |
**Note**:
1. For fair comparison, the training configuration is consistent with the original configuration and results in an improvement of about 0.2-0.5% AP.
2. `yolov5_s_spos_shufflenetv2` achieves 38.0% AP with only 7.042M parameters, directly instead of the backbone, and outperforms `yolov5_s` with a similar size by more than 0.3% AP.
3. With the efficient backbone of `yolov6_l_attentivenas_a6`, the input channels of `YOLOv6RepPAFPN` are reduced. Meanwhile, modify the **deepen_factor** and the neck is made deeper to restore the AP.
4. with the `rtmdet_tiny_ofa_lat31` backbone with only 3.315M parameters and 3.634G flops, we can modify the input resolution to 960, with a similar model size compared to `rtmdet_tiny` and exceeds `rtmdet_tiny` by 0.4% AP, reducing the size of the whole model to 3.91 MB.

View File

@ -0,0 +1,124 @@
_base_ = [
'mmrazor::_base_/nas_backbones/ofa_mobilenetv3_supernet.py',
'../../rtmdet/rtmdet_s_syncbn_fast_8xb32-300e_coco.py'
]
checkpoint_file = 'https://download.openmmlab.com/mmrazor/v1/ofa/ofa_mobilenet_subnet_8xb256_in1k_note8_lat%4031ms_top1%4072.8_finetune%4025.py_20221214_0939-981a8b2a.pth' # noqa
fix_subnet = 'https://download.openmmlab.com/mmrazor/v1/yolo_nas_backbone/OFA_SUBNET_NOTE8_LAT31.yaml' # noqa
deepen_factor = 0.167
widen_factor = 1.0
channels = [40, 112, 160]
train_batch_size_per_gpu = 16
img_scale = (960, 960)
_base_.nas_backbone.out_indices = (2, 4, 5)
_base_.nas_backbone.conv_cfg = dict(type='mmrazor.OFAConv2d')
_base_.nas_backbone.init_cfg = dict(
type='Pretrained',
checkpoint=checkpoint_file,
prefix='architecture.backbone.')
nas_backbone = dict(
type='mmrazor.sub_model',
fix_subnet=fix_subnet,
cfg=_base_.nas_backbone,
extra_prefix='backbone.')
_base_.model.backbone = nas_backbone
_base_.model.neck.widen_factor = widen_factor
_base_.model.neck.deepen_factor = deepen_factor
_base_.model.neck.in_channels = channels
_base_.model.neck.out_channels = channels[0]
_base_.model.bbox_head.head_module.in_channels = channels[0]
_base_.model.bbox_head.head_module.feat_channels = channels[0]
_base_.model.bbox_head.head_module.widen_factor = widen_factor
_base_.model.test_cfg = dict(
multi_label=True,
nms_pre=1000,
min_bbox_size=0,
score_thr=0.05,
nms=dict(type='nms', iou_threshold=0.6),
max_per_img=100)
train_pipeline = [
dict(type='LoadImageFromFile', file_client_args=_base_.file_client_args),
dict(type='LoadAnnotations', with_bbox=True),
dict(
type='Mosaic',
img_scale=img_scale,
use_cached=True,
max_cached_images=20,
random_pop=False,
pad_val=114.0),
dict(
type='mmdet.RandomResize',
scale=(1280, 1280),
ratio_range=(0.5, 2.0), # note
resize_type='mmdet.Resize',
keep_ratio=True),
dict(type='mmdet.RandomCrop', crop_size=img_scale),
dict(type='mmdet.YOLOXHSVRandomAug'),
dict(type='mmdet.RandomFlip', prob=0.5),
dict(type='mmdet.Pad', size=img_scale, pad_val=dict(img=(114, 114, 114))),
dict(
type='YOLOXMixUp',
img_scale=(960, 960),
ratio_range=(1.0, 1.0),
max_cached_images=10,
use_cached=True,
random_pop=False,
pad_val=(114, 114, 114),
prob=0.5),
dict(type='mmdet.PackDetInputs')
]
train_pipeline_stage2 = [
dict(type='LoadImageFromFile', file_client_args=_base_.file_client_args),
dict(type='LoadAnnotations', with_bbox=True),
dict(
type='mmdet.RandomResize',
scale=img_scale,
ratio_range=(0.5, 2.0), # note
resize_type='mmdet.Resize',
keep_ratio=True),
dict(type='mmdet.RandomCrop', crop_size=img_scale),
dict(type='mmdet.YOLOXHSVRandomAug'),
dict(type='mmdet.RandomFlip', prob=0.5),
dict(type='mmdet.Pad', size=img_scale, pad_val=dict(img=(114, 114, 114))),
dict(type='mmdet.PackDetInputs')
]
train_dataloader = dict(
batch_size=train_batch_size_per_gpu, dataset=dict(pipeline=train_pipeline))
test_pipeline = [
dict(type='LoadImageFromFile', file_client_args=_base_.file_client_args),
dict(type='mmdet.Resize', scale=(960, 960), keep_ratio=True),
dict(type='mmdet.Pad', size=(960, 960), pad_val=dict(img=(114, 114, 114))),
dict(type='LoadAnnotations', with_bbox=True, _scope_='mmdet'),
dict(
type='mmdet.PackDetInputs',
meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
'scale_factor'))
]
val_dataloader = dict(
dataset=dict(pipeline=test_pipeline, batch_shapes_cfg=None))
test_dataloader = val_dataloader
custom_hooks = [
dict(
type='EMAHook',
ema_type='ExpMomentumEMA',
momentum=0.0002,
update_buffers=True,
strict_load=False,
priority=49),
dict(
type='mmdet.PipelineSwitchHook',
switch_epoch=_base_.max_epochs - _base_.num_epochs_stage2,
switch_pipeline=train_pipeline_stage2)
]
find_unused_parameters = True

View File

@ -0,0 +1,29 @@
_base_ = [
'mmrazor::_base_/nas_backbones/spos_shufflenet_supernet.py',
'../../yolov5/yolov5_s-v61_syncbn_fast_8xb16-300e_coco.py'
]
checkpoint_file = 'https://download.openmmlab.com/mmrazor/v1/spos/spos_shufflenetv2_subnet_8xb128_in1k_flops_0.33M_acc_73.87_20211222-1f0a0b4d_v3.pth' # noqa
fix_subnet = 'https://download.openmmlab.com/mmrazor/v1/spos/spos_shufflenetv2_subnet_8xb128_in1k_flops_0.33M_acc_73.87_20211222-1f0a0b4d_subnet_cfg_v3.yaml' # noqa
widen_factor = 1.0
channels = [160, 320, 640]
_base_.nas_backbone.out_indices = (1, 2, 3)
_base_.nas_backbone.init_cfg = dict(
type='Pretrained',
checkpoint=checkpoint_file,
prefix='architecture.backbone.')
nas_backbone = dict(
type='mmrazor.sub_model',
fix_subnet=fix_subnet,
cfg=_base_.nas_backbone,
extra_prefix='architecture.backbone.')
_base_.model.backbone = nas_backbone
_base_.model.neck.widen_factor = widen_factor
_base_.model.neck.in_channels = channels
_base_.model.neck.out_channels = channels
_base_.model.bbox_head.head_module.in_channels = channels
_base_.model.bbox_head.head_module.widen_factor = widen_factor
find_unused_parameters = True

View File

@ -0,0 +1,35 @@
_base_ = [
'mmrazor::_base_/nas_backbones/attentive_mobilenetv3_supernet.py',
'../../yolov6/yolov6_l_syncbn_fast_8xb32-300e_coco.py'
]
checkpoint_file = 'https://download.openmmlab.com/mmrazor/v1/bignas/attentive_mobilenet_subnet_8xb256_in1k_flops-0.93G_acc-80.81_20221229_200440-73d92cc6.pth' # noqa
fix_subnet = 'https://download.openmmlab.com/mmrazor/v1/bignas/ATTENTIVE_SUBNET_A6.yaml' # noqa
deepen_factor = 1.2
widen_factor = 1
channels = [40, 128, 224]
mid_channels = [40, 128, 224]
_base_.train_dataloader.batch_size = 16
_base_.nas_backbone.out_indices = (2, 4, 6)
_base_.nas_backbone.conv_cfg = dict(type='mmrazor.BigNasConv2d')
_base_.nas_backbone.norm_cfg = dict(type='mmrazor.DynamicBatchNorm2d')
_base_.nas_backbone.init_cfg = dict(
type='Pretrained',
checkpoint=checkpoint_file,
prefix='architecture.backbone.')
nas_backbone = dict(
type='mmrazor.sub_model',
fix_subnet=fix_subnet,
cfg=_base_.nas_backbone,
extra_prefix='backbone.')
_base_.model.backbone = nas_backbone
_base_.model.neck.widen_factor = widen_factor
_base_.model.neck.deepen_factor = deepen_factor
_base_.model.neck.in_channels = channels
_base_.model.neck.out_channels = mid_channels
_base_.model.bbox_head.head_module.in_channels = mid_channels
_base_.model.bbox_head.head_module.widen_factor = widen_factor
find_unused_parameters = True

View File

@ -21,20 +21,53 @@ RTMDet-l model structure
## Results and Models
## Object Detection
### Object Detection
| Model | size | box AP | Params(M) | FLOPS(G) | TRT-FP16-Latency(ms) | Config | Download |
| :---------: | :--: | :----: | :-------: | :------: | :------------------: | :-------------------------------------------------: | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
| RTMDet-tiny | 640 | 41.0 | 4.8 | 8.1 | 0.98 | [config](./rtmdet_l_syncbn_fast_8xb32-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_tiny_syncbn_fast_8xb32-300e_coco/rtmdet_tiny_syncbn_fast_8xb32-300e_coco_20230102_140117-dbb1dc83.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_tiny_syncbn_fast_8xb32-300e_coco/rtmdet_tiny_syncbn_fast_8xb32-300e_coco_20230102_140117.log.json) |
| RTMDet-s | 640 | 44.6 | 8.89 | 14.8 | 1.22 | [config](./rtmdet_s_syncbn_fast_8xb32-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_s_syncbn_fast_8xb32-300e_coco/rtmdet_s_syncbn_fast_8xb32-300e_coco_20221230_182329-0a8c901a.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_s_syncbn_fast_8xb32-300e_coco/rtmdet_s_syncbn_fast_8xb32-300e_coco_20221230_182329.log.json) |
| RTMDet-m | 640 | 49.3 | 24.71 | 39.27 | 1.62 | [config](./rtmdet_m_syncbn_fast_8xb32-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_m_syncbn_fast_8xb32-300e_coco/rtmdet_m_syncbn_fast_8xb32-300e_coco_20230102_135952-40af4fe8.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_m_syncbn_fast_8xb32-300e_coco/rtmdet_m_syncbn_fast_8xb32-300e_coco_20230102_135952.log.json) |
| RTMDet-l | 640 | 51.4 | 52.3 | 80.23 | 2.44 | [config](./rtmdet_l_syncbn_fast_8xb32-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_l_syncbn_fast_8xb32-300e_coco/rtmdet_l_syncbn_fast_8xb32-300e_coco_20230102_135928-ee3abdc4.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_l_syncbn_fast_8xb32-300e_coco/rtmdet_l_syncbn_fast_8xb32-300e_coco_20230102_135928.log.json) |
| RTMDet-x | 640 | 52.8 | 94.86 | 141.67 | 3.10 | [config](./rtmdet_x_syncbn_fast_8xb32-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_x_syncbn_fast_8xb32-300e_coco/rtmdet_x_syncbn_fast_8xb32-300e_coco_20221231_100345-b85cd476.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_x_syncbn_fast_8xb32-300e_coco/rtmdet_x_syncbn_fast_8xb32-300e_coco_20221231_100345.log.json) |
| Model | size | Params(M) | FLOPs(G) | TRT-FP16-Latency(ms) | box AP | TTA box AP | Config | Download |
| :------------: | :--: | :-------: | :------: | :------------------: | :---------: | :---------: | :---------------------------------------------------------: | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
| RTMDet-tiny | 640 | 4.8 | 8.1 | 0.98 | 41.0 | 42.7 | [config](./rtmdet_tiny_syncbn_fast_8xb32-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_tiny_syncbn_fast_8xb32-300e_coco/rtmdet_tiny_syncbn_fast_8xb32-300e_coco_20230102_140117-dbb1dc83.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_tiny_syncbn_fast_8xb32-300e_coco/rtmdet_tiny_syncbn_fast_8xb32-300e_coco_20230102_140117.log.json) |
| RTMDet-tiny \* | 640 | 4.8 | 8.1 | 0.98 | 41.8 (+0.8) | 43.2 (+0.5) | [config](./distillation/kd_tiny_rtmdet_s_neck_300e_coco.py) | [model](https://download.openmmlab.com/mmrazor/v1/rtmdet_distillation/kd_tiny_rtmdet_s_neck_300e_coco/kd_tiny_rtmdet_s_neck_300e_coco_20230213_104240-e1e4197c.pth) \| [log](https://download.openmmlab.com/mmrazor/v1/rtmdet_distillation/kd_tiny_rtmdet_s_neck_300e_coco/kd_tiny_rtmdet_s_neck_300e_coco_20230213_104240-176901d8.json) |
| RTMDet-s | 640 | 8.89 | 14.8 | 1.22 | 44.6 | 45.8 | [config](./rtmdet_s_syncbn_fast_8xb32-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_s_syncbn_fast_8xb32-300e_coco/rtmdet_s_syncbn_fast_8xb32-300e_coco_20221230_182329-0a8c901a.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_s_syncbn_fast_8xb32-300e_coco/rtmdet_s_syncbn_fast_8xb32-300e_coco_20221230_182329.log.json) |
| RTMDet-s \* | 640 | 8.89 | 14.8 | 1.22 | 45.7 (+1.1) | 47.3 (+1.5) | [config](./distillation/kd_s_rtmdet_m_neck_300e_coco.py) | [model](https://download.openmmlab.com/mmrazor/v1/rtmdet_distillation/kd_s_rtmdet_m_neck_300e_coco/kd_s_rtmdet_m_neck_300e_coco_20230220_140647-446ff003.pth) \| [log](https://download.openmmlab.com/mmrazor/v1/rtmdet_distillation/kd_s_rtmdet_m_neck_300e_coco/kd_s_rtmdet_m_neck_300e_coco_20230220_140647-89862269.json) |
| RTMDet-m | 640 | 24.71 | 39.27 | 1.62 | 49.3 | 50.9 | [config](./rtmdet_m_syncbn_fast_8xb32-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_m_syncbn_fast_8xb32-300e_coco/rtmdet_m_syncbn_fast_8xb32-300e_coco_20230102_135952-40af4fe8.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_m_syncbn_fast_8xb32-300e_coco/rtmdet_m_syncbn_fast_8xb32-300e_coco_20230102_135952.log.json) |
| RTMDet-m \* | 640 | 24.71 | 39.27 | 1.62 | 50.2 (+0.9) | 51.9 (+1.0) | [config](./distillation/kd_m_rtmdet_l_neck_300e_coco.py) | [model](https://download.openmmlab.com/mmrazor/v1/rtmdet_distillation/kd_m_rtmdet_l_neck_300e_coco/kd_m_rtmdet_l_neck_300e_coco_20230220_141313-b806f503.pth) \| [log](https://download.openmmlab.com/mmrazor/v1/rtmdet_distillation/kd_m_rtmdet_l_neck_300e_coco/kd_m_rtmdet_l_neck_300e_coco_20230220_141313-bd028fd3.json) |
| RTMDet-l | 640 | 52.3 | 80.23 | 2.44 | 51.4 | 53.1 | [config](./rtmdet_l_syncbn_fast_8xb32-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_l_syncbn_fast_8xb32-300e_coco/rtmdet_l_syncbn_fast_8xb32-300e_coco_20230102_135928-ee3abdc4.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_l_syncbn_fast_8xb32-300e_coco/rtmdet_l_syncbn_fast_8xb32-300e_coco_20230102_135928.log.json) |
| RTMDet-l \* | 640 | 52.3 | 80.23 | 2.44 | 52.3 (+0.9) | 53.7 (+0.6) | [config](./distillation/kd_l_rtmdet_x_neck_300e_coco.py) | [model](https://download.openmmlab.com/mmrazor/v1/rtmdet_distillation/kd_l_rtmdet_x_neck_300e_coco/kd_l_rtmdet_x_neck_300e_coco_20230220_141912-c9979722.pth) \| [log](https://download.openmmlab.com/mmrazor/v1/rtmdet_distillation/kd_l_rtmdet_x_neck_300e_coco/kd_l_rtmdet_x_neck_300e_coco_20230220_141912-c5c4e17b.json) |
| RTMDet-x | 640 | 94.86 | 141.67 | 3.10 | 52.8 | 54.2 | [config](./rtmdet_x_syncbn_fast_8xb32-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_x_syncbn_fast_8xb32-300e_coco/rtmdet_x_syncbn_fast_8xb32-300e_coco_20221231_100345-b85cd476.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_x_syncbn_fast_8xb32-300e_coco/rtmdet_x_syncbn_fast_8xb32-300e_coco_20221231_100345.log.json) |
**Note**:
1. The inference speed of RTMDet is measured on an NVIDIA 3090 GPU with TensorRT 8.4.3, cuDNN 8.2.0, FP16, batch size=1, and without NMS.
2. For a fair comparison, the config of bbox postprocessing is changed to be consistent with YOLOv5/6/7 after [PR#9494](https://github.com/open-mmlab/mmdetection/pull/9494), bringing about 0.1~0.3% AP improvement.
3. `TTA` means that Test Time Augmentation. It's perform 3 multi-scaling transformations on the image, followed by 2 flipping transformations (flipping and not flipping). You only need to specify `--tta` when testing to enable. see [TTA](https://github.com/open-mmlab/mmyolo/blob/dev/docs/en/common_usage/tta.md) for details.
4. \* means checkpoints are trained with knowledge distillation. More details can be found in [RTMDet distillation](./distillation).
### Rotated Object Detection
RTMDet-R achieves state-of-the-art on various remote sensing datasets.
| Backbone | pretrain | Epoch | Batch Size | Aug | mmAP | mAP50 | mAP75 | Mem (GB) | Params(M) | FLOPS(G) | TRT-FP16-Latency(ms) | Config | Download |
| :---------: | :------: | :---: | :--------: | :-------------: | :---: | :---: | :---: | :------: | :-------: | :------: | :------------------: | :--------------------------------------------------------------------------: | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
| RTMDet-tiny | IN | 36 | 1xb8 | RR | 46.94 | 75.07 | 50.11 | 12.7 | 4.88 | 20.45 | 4.40 | [config](./rotated/rtmdet-r_tiny_fast_1xb8-36e_dota.py) | [model](https://download.openmmlab.com/mmyolo/v0/rtmdet/rotated/rtmdet-r_tiny_fast_1xb8-36e_dota/rtmdet-r_tiny_fast_1xb8-36e_dota_20230228_162210-e8ccfb1c.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/rtmdet/rotated/rtmdet-r_tiny_fast_1xb8-36e_dota/rtmdet-r_tiny_fast_1xb8-36e_dota_20230228_162210.log.json) |
| RTMDet-s | IN | 36 | 1xb8 | RR | 48.99 | 77.33 | 52.65 | 16.6 | 8.86 | 37.62 | 4.86 | [config](./rotated/rtmdet-r_s_fast_1xb8-36e_dota.py) | [model](https://download.openmmlab.com/mmyolo/v0/rtmdet/rotated/rtmdet-r_s_fast_1xb8-36e_dota/rtmdet-r_s_fast_1xb8-36e_dota_20230224_110307-3946a5aa.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/rtmdet/rotated/rtmdet-r_s_fast_1xb8-36e_dota/rtmdet-r_s_fast_1xb8-36e_dota_20230224_110307.log.json) |
| RTMDet-m | IN | 36 | 2xb4 | RR | 50.38 | 78.43 | 54.28 | 10.9 | 24.67 | 99.76 | 7.82 | [config](./rotated/rtmdet-r_m_syncbn_fast_2xb4-36e_dota.py) | [model](https://download.openmmlab.com/mmyolo/v0/rtmdet/rotated/rtmdet-r_m_syncbn_fast_2xb4-36e_dota/rtmdet-r_m_syncbn_fast_2xb4-36e_dota_20230224_124237-29ae1619.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/rtmdet/rotated/rtmdet-r_m_syncbn_fast_2xb4-36e_dota/rtmdet-r_m_syncbn_fast_2xb4-36e_dota_20230224_124237.log.json) |
| RTMDet-l | IN | 36 | 2xb4 | RR | 50.61 | 78.66 | 54.95 | 16.1 | 52.27 | 204.21 | 10.82 | [config](./rotated/rtmdet-r_l_syncbn_fast_2xb4-36e_dota.py) | [model](https://download.openmmlab.com/mmyolo/v0/rtmdet/rotated/rtmdet-r_l_syncbn_fast_2xb4-36e_dota/rtmdet-r_l_syncbn_fast_2xb4-36e_dota_20230224_124544-38bc5f08.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/rtmdet/rotated/rtmdet-r_l_syncbn_fast_2xb4-36e_dota/rtmdet-r_l_syncbn_fast_2xb4-36e_dota_20230224_124544.log.json) |
| RTMDet-tiny | IN | 36 | 1xb8 | MS+RR | - | - | - | | 4.88 | 20.45 | 4.40 | [config](./rotated/rtmdet-r_tiny_fast_1xb8-36e_dota-ms.py) | \| |
| RTMDet-s | IN | 36 | 1xb8 | MS+RR | - | - | - | | 8.86 | 37.62 | 4.86 | [config](./rotated/rtmdet-r_s_fast_1xb8-36e_dota-ms.py) | \| |
| RTMDet-m | IN | 36 | 2xb4 | MS+RR | - | - | - | | 24.67 | 99.76 | 7.82 | [config](./rotated/rtmdet-r_m_syncbn_fast_2xb4-36e_dota-ms.py) | \| |
| RTMDet-l | IN | 36 | 2xb4 | MS+RR | - | - | - | | 52.27 | 204.21 | 10.82 | [config](./rotated/rtmdet-r_l_syncbn_fast_2xb4-36e_dota-ms.py) | \| |
| RTMDet-l | COCO | 36 | 2xb4 | MS+RR | - | - | - | | 52.27 | 204.21 | 10.82 | [config](./rotated/rtmdet-r_l_syncbn_fast_coco-pretrain_2xb4-36e_dota-ms.py) | \| |
| RTMDet-l | IN | 100 | 2xb4 | Mixup+Mosaic+RR | 55.05 | 80.14 | 61.32 | 19.6 | 52.27 | 204.21 | 10.82 | [config](./rotated/rtmdet-r_l_syncbn_fast_2xb4-aug-100e_dota.py) | [model](https://download.openmmlab.com/mmyolo/v0/rtmdet/rotated/rtmdet-r_l_syncbn_fast_2xb4-aug-100e_dota/rtmdet-r_l_syncbn_fast_2xb4-aug-100e_dota_20230224_124735-ed4ea966.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/rtmdet/rotated/rtmdet-r_l_syncbn_fast_2xb4-aug-100e_dota/rtmdet-r_l_syncbn_fast_2xb4-aug-100e_dota_20230224_124735.log.json) |
**Note**:
1. Please follow doc to get start with rotated detection. [Rotated Object Detection](../../docs/zh_cn/tutorials/rotated_detection.md)
2. We follow the latest metrics from the DOTA evaluation server, original voc format mAP is now mAP50.
3. All models trained with image size 1024\*1024.
4. `IN` means ImageNet pretrain, `COCO` means COCO pretrain.
5. For Aug, RR means `RandomRotate`, MS means multi-scale augmentation in data prepare.
6. The inference speed here is measured on an NVIDIA 2080Ti GPU with TensorRT 8.4.3, cuDNN 8.2.0, FP16, batch size=1, and with NMS.
7. Currently, the training process of RTMDet-R tiny is unstable and may have 1% accuracy fluctuation, we will continue to investigate why.
## Citation

View File

@ -0,0 +1,146 @@
# Distill RTM Detectors Based on MMRazor
## Description
To further improve the model accuracy while not introducing much additional
computation cost, we apply the feature-based distillation to the training phase
of these RTM detectors. In summary, our distillation strategy are threefold:
(1) Inspired by [PKD](https://arxiv.org/abs/2207.02039), we first normalize
the intermediate feature maps to have zero mean and unit variances before calculating
the distillation loss.
(2) Inspired by [CWD](https://arxiv.org/abs/2011.13256), we adopt the channel-wise
distillation paradigm, which can pay more attention to the most salient regions
of each channel.
(3) Inspired by [DAMO-YOLO](https://arxiv.org/abs/2211.15444), the distillation
process is split into two stages. 1) The teacher distills the student at the
first stage (280 epochs) on strong mosaic domain. 2) The student finetunes itself
on no masaic domain at the second stage (20 epochs).
## Results and Models
| Location | Dataset | Teacher | Student | mAP | mAP(T) | mAP(S) | Config | Download |
| :------: | :-----: | :---------------------------------------------------------------------------------------------------------------: | :---------------------------------------------------------------------------------------------------------------------: | :---------: | :----: | :----: | :------------------------------------------: | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
| FPN | COCO | [RTMDet-s](https://github.com/open-mmlab/mmyolo/blob/main/configs/rtmdet/rtmdet_s_syncbn_fast_8xb32-300e_coco.py) | [RTMDet-tiny](https://github.com/open-mmlab/mmyolo/blob/main/configs/rtmdet/rtmdet_tiny_syncbn_fast_8xb32-300e_coco.py) | 41.8 (+0.8) | 44.6 | 41.0 | [config](kd_tiny_rtmdet_s_neck_300e_coco.py) | [teacher](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_s_syncbn_fast_8xb32-300e_coco/rtmdet_s_syncbn_fast_8xb32-300e_coco_20221230_182329-0a8c901a.pth) \|[model](https://download.openmmlab.com/mmrazor/v1/rtmdet_distillation/kd_tiny_rtmdet_s_neck_300e_coco/kd_tiny_rtmdet_s_neck_300e_coco_20230213_104240-e1e4197c.pth) \| [log](https://download.openmmlab.com/mmrazor/v1/rtmdet_distillation/kd_tiny_rtmdet_s_neck_300e_coco/kd_tiny_rtmdet_s_neck_300e_coco_20230213_104240-176901d8.json) |
| FPN | COCO | [RTMDet-m](https://github.com/open-mmlab/mmyolo/blob/main/configs/rtmdet/rtmdet_m_syncbn_fast_8xb32-300e_coco.py) | [RTMDet-s](https://github.com/open-mmlab/mmyolo/blob/main/configs/rtmdet/rtmdet_s_syncbn_fast_8xb32-300e_coco.py) | 45.7 (+1.1) | 49.3 | 44.6 | [config](kd_s_rtmdet_m_neck_300e_coco.py) | [teacher](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_m_syncbn_fast_8xb32-300e_coco/rtmdet_m_syncbn_fast_8xb32-300e_coco_20230102_135952-40af4fe8.pth) \|[model](https://download.openmmlab.com/mmrazor/v1/rtmdet_distillation/kd_s_rtmdet_m_neck_300e_coco/kd_s_rtmdet_m_neck_300e_coco_20230220_140647-446ff003.pth) \| [log](https://download.openmmlab.com/mmrazor/v1/rtmdet_distillation/kd_s_rtmdet_m_neck_300e_coco/kd_s_rtmdet_m_neck_300e_coco_20230220_140647-89862269.json) |
| FPN | COCO | [RTMDet-l](https://github.com/open-mmlab/mmyolo/blob/main/configs/rtmdet/rtmdet_l_syncbn_fast_8xb32-300e_coco.py) | [RTMDet-m](https://github.com/open-mmlab/mmyolo/blob/main/configs/rtmdet/rtmdet_m_syncbn_fast_8xb32-300e_coco.py) | 50.2 (+0.9) | 51.4 | 49.3 | [config](kd_m_rtmdet_l_neck_300e_coco.py) | [teacher](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_l_syncbn_fast_8xb32-300e_coco/rtmdet_l_syncbn_fast_8xb32-300e_coco_20230102_135928-ee3abdc4.pth) \|[model](https://download.openmmlab.com/mmrazor/v1/rtmdet_distillation/kd_m_rtmdet_l_neck_300e_coco/kd_m_rtmdet_l_neck_300e_coco_20230220_141313-b806f503.pth) \| [log](https://download.openmmlab.com/mmrazor/v1/rtmdet_distillation/kd_m_rtmdet_l_neck_300e_coco/kd_m_rtmdet_l_neck_300e_coco_20230220_141313-bd028fd3.json) |
| FPN | COCO | [RTMDet-x](https://github.com/open-mmlab/mmyolo/blob/main/configs/rtmdet/rtmdet_x_syncbn_fast_8xb32-300e_coco.py) | [RTMDet-l](https://github.com/open-mmlab/mmyolo/blob/main/configs/rtmdet/rtmdet_l_syncbn_fast_8xb32-300e_coco.py) | 52.3 (+0.9) | 52.8 | 51.4 | [config](kd_l_rtmdet_x_neck_300e_coco.py) | [teacher](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_x_syncbn_fast_8xb32-300e_coco/rtmdet_x_syncbn_fast_8xb32-300e_coco_20221231_100345-b85cd476.pth) \|[model](https://download.openmmlab.com/mmrazor/v1/rtmdet_distillation/kd_l_rtmdet_x_neck_300e_coco/kd_l_rtmdet_x_neck_300e_coco_20230220_141912-c9979722.pth) \| [log](https://download.openmmlab.com/mmrazor/v1/rtmdet_distillation/kd_l_rtmdet_x_neck_300e_coco/kd_l_rtmdet_x_neck_300e_coco_20230220_141912-c5c4e17b.json) |
## Usage
### Prerequisites
- [MMRazor dev-1.x](https://github.com/open-mmlab/mmrazor/tree/dev-1.x)
Install MMRazor from source
```
git clone -b dev-1.x https://github.com/open-mmlab/mmrazor.git
cd mmrazor
# Install MMRazor
mim install -v -e .
```
### Training commands
In MMYOLO's root directory, run the following command to train the RTMDet-tiny
with 8 GPUs, using RTMDet-s as the teacher:
```bash
CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 PORT=29500 ./tools/dist_train.sh configs/rtmdet/distillation/kd_tiny_rtmdet_s_neck_300e_coco.py
```
### Testing commands
In MMYOLO's root directory, run the following command to test the model:
```bash
CUDA_VISIBLE_DEVICES=0 PORT=29500 ./tools/dist_test.sh configs/rtmdet/distillation/kd_tiny_rtmdet_s_neck_300e_coco.py ${CHECKPOINT_PATH}
```
### Getting student-only checkpoint
After training, the checkpoint contains parameters for both student and teacher models.
Run the following command to convert it to student-only checkpoint:
```bash
python ./tools/model_converters/convert_kd_ckpt_to_student.py ${CHECKPOINT_PATH} --out-path ${OUTPUT_CHECKPOINT_PATH}
```
## Configs
Here we provide detection configs and models for MMRazor in MMYOLO. For clarify,
we take `./kd_tiny_rtmdet_s_neck_300e_coco.py` as an example to show how to
distill a RTM detector based on MMRazor.
Here is the main part of `./kd_tiny_rtmdet_s_neck_300e_coco.py`.
```shell
norm_cfg = dict(type='BN', affine=False, track_running_stats=False)
distiller=dict(
type='ConfigurableDistiller',
student_recorders=dict(
fpn0=dict(type='ModuleOutputs', source='neck.out_layers.0.conv'),
fpn1=dict(type='ModuleOutputs', source='neck.out_layers.1.conv'),
fpn2=dict(type='ModuleOutputs', source='neck.out_layers.2.conv'),
),
teacher_recorders=dict(
fpn0=dict(type='ModuleOutputs', source='neck.out_layers.0.conv'),
fpn1=dict(type='ModuleOutputs', source='neck.out_layers.1.conv'),
fpn2=dict(type='ModuleOutputs', source='neck.out_layers.2.conv')),
connectors=dict(
fpn0_s=dict(type='ConvModuleConnector', in_channel=96,
out_channel=128, bias=False, norm_cfg=norm_cfg,
act_cfg=None),
fpn0_t=dict(
type='NormConnector', in_channels=128, norm_cfg=norm_cfg),
fpn1_s=dict(
type='ConvModuleConnector', in_channel=96,
out_channel=128, bias=False, norm_cfg=norm_cfg,
act_cfg=None),
fpn1_t=dict(
type='NormConnector', in_channels=128, norm_cfg=norm_cfg),
fpn2_s=dict(
type='ConvModuleConnector', in_channel=96,
out_channel=128, bias=False, norm_cfg=norm_cfg,
act_cfg=None),
fpn2_t=dict(
type='NormConnector', in_channels=128, norm_cfg=norm_cfg)),
distill_losses=dict(
loss_fpn0=dict(type='ChannelWiseDivergence', loss_weight=1),
loss_fpn1=dict(type='ChannelWiseDivergence', loss_weight=1),
loss_fpn2=dict(type='ChannelWiseDivergence', loss_weight=1)),
loss_forward_mappings=dict(
loss_fpn0=dict(
preds_S=dict(from_student=True, recorder='fpn0', connector='fpn0_s'),
preds_T=dict(from_student=False, recorder='fpn0', connector='fpn0_t')),
loss_fpn1=dict(
preds_S=dict(from_student=True, recorder='fpn1', connector='fpn1_s'),
preds_T=dict(from_student=False, recorder='fpn1', connector='fpn1_t')),
loss_fpn2=dict(
preds_S=dict(from_student=True, recorder='fpn2', connector='fpn2_s'),
preds_T=dict(from_student=False, recorder='fpn2', connector='fpn2_t'))))
```
`recorders` are used to record various intermediate results during the model forward.
In this example, they can help record the output of 3 `nn.Module` of the teacher
and the student. Details are list in [Recorder](https://github.com/open-mmlab/mmrazor/blob/dev-1.x/docs/en/advanced_guides/recorder.md) and [MMRazor Distillation](https://zhuanlan.zhihu.com/p/596582609) (if users can read Chinese).
`connectors` are adaptive layers which usually map teacher's and students features
to the same dimension.
`distill_losses` are configs for multiple distill losses.
`loss_forward_mappings` are mappings between distill loss forward arguments and records.
In addition, the student finetunes itself on no masaic domain at the last 20 epochs,
so we add a new hook named `StopDistillHook` to stop distillation on time.
We need to add this hook to the `custom_hooks` list like this:
```shell
custom_hooks = [..., dict(type='mmrazor.StopDistillHook', detach_epoch=280)]
```

View File

@ -0,0 +1,99 @@
_base_ = '../rtmdet_l_syncbn_fast_8xb32-300e_coco.py'
teacher_ckpt = 'https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_x_syncbn_fast_8xb32-300e_coco/rtmdet_x_syncbn_fast_8xb32-300e_coco_20221231_100345-b85cd476.pth' # noqa: E501
norm_cfg = dict(type='BN', affine=False, track_running_stats=False)
model = dict(
_delete_=True,
_scope_='mmrazor',
type='FpnTeacherDistill',
architecture=dict(
cfg_path='mmyolo::rtmdet/rtmdet_l_syncbn_fast_8xb32-300e_coco.py'),
teacher=dict(
cfg_path='mmyolo::rtmdet/rtmdet_x_syncbn_fast_8xb32-300e_coco.py'),
teacher_ckpt=teacher_ckpt,
distiller=dict(
type='ConfigurableDistiller',
# `recorders` are used to record various intermediate results during
# the model forward.
student_recorders=dict(
fpn0=dict(type='ModuleOutputs', source='neck.out_layers.0.conv'),
fpn1=dict(type='ModuleOutputs', source='neck.out_layers.1.conv'),
fpn2=dict(type='ModuleOutputs', source='neck.out_layers.2.conv'),
),
teacher_recorders=dict(
fpn0=dict(type='ModuleOutputs', source='neck.out_layers.0.conv'),
fpn1=dict(type='ModuleOutputs', source='neck.out_layers.1.conv'),
fpn2=dict(type='ModuleOutputs', source='neck.out_layers.2.conv')),
# `connectors` are adaptive layers which usually map teacher's and
# students features to the same dimension.
connectors=dict(
fpn0_s=dict(
type='ConvModuleConnector',
in_channel=256,
out_channel=320,
bias=False,
norm_cfg=norm_cfg,
act_cfg=None),
fpn0_t=dict(
type='NormConnector', in_channels=320, norm_cfg=norm_cfg),
fpn1_s=dict(
type='ConvModuleConnector',
in_channel=256,
out_channel=320,
bias=False,
norm_cfg=norm_cfg,
act_cfg=None),
fpn1_t=dict(
type='NormConnector', in_channels=320, norm_cfg=norm_cfg),
fpn2_s=dict(
type='ConvModuleConnector',
in_channel=256,
out_channel=320,
bias=False,
norm_cfg=norm_cfg,
act_cfg=None),
fpn2_t=dict(
type='NormConnector', in_channels=320, norm_cfg=norm_cfg)),
distill_losses=dict(
loss_fpn0=dict(type='ChannelWiseDivergence', loss_weight=1),
loss_fpn1=dict(type='ChannelWiseDivergence', loss_weight=1),
loss_fpn2=dict(type='ChannelWiseDivergence', loss_weight=1)),
# `loss_forward_mappings` are mappings between distill loss forward
# arguments and records.
loss_forward_mappings=dict(
loss_fpn0=dict(
preds_S=dict(
from_student=True, recorder='fpn0', connector='fpn0_s'),
preds_T=dict(
from_student=False, recorder='fpn0', connector='fpn0_t')),
loss_fpn1=dict(
preds_S=dict(
from_student=True, recorder='fpn1', connector='fpn1_s'),
preds_T=dict(
from_student=False, recorder='fpn1', connector='fpn1_t')),
loss_fpn2=dict(
preds_S=dict(
from_student=True, recorder='fpn2', connector='fpn2_s'),
preds_T=dict(
from_student=False, recorder='fpn2',
connector='fpn2_t')))))
find_unused_parameters = True
custom_hooks = [
dict(
type='EMAHook',
ema_type='ExpMomentumEMA',
momentum=0.0002,
update_buffers=True,
strict_load=False,
priority=49),
dict(
type='mmdet.PipelineSwitchHook',
switch_epoch=_base_.max_epochs - _base_.num_epochs_stage2,
switch_pipeline=_base_.train_pipeline_stage2),
# stop distillation after the 280th epoch
dict(type='mmrazor.StopDistillHook', stop_epoch=280)
]

View File

@ -0,0 +1,99 @@
_base_ = '../rtmdet_m_syncbn_fast_8xb32-300e_coco.py'
teacher_ckpt = 'https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_l_syncbn_fast_8xb32-300e_coco/rtmdet_l_syncbn_fast_8xb32-300e_coco_20230102_135928-ee3abdc4.pth' # noqa: E501
norm_cfg = dict(type='BN', affine=False, track_running_stats=False)
model = dict(
_delete_=True,
_scope_='mmrazor',
type='FpnTeacherDistill',
architecture=dict(
cfg_path='mmyolo::rtmdet/rtmdet_m_syncbn_fast_8xb32-300e_coco.py'),
teacher=dict(
cfg_path='mmyolo::rtmdet/rtmdet_l_syncbn_fast_8xb32-300e_coco.py'),
teacher_ckpt=teacher_ckpt,
distiller=dict(
type='ConfigurableDistiller',
# `recorders` are used to record various intermediate results during
# the model forward.
student_recorders=dict(
fpn0=dict(type='ModuleOutputs', source='neck.out_layers.0.conv'),
fpn1=dict(type='ModuleOutputs', source='neck.out_layers.1.conv'),
fpn2=dict(type='ModuleOutputs', source='neck.out_layers.2.conv'),
),
teacher_recorders=dict(
fpn0=dict(type='ModuleOutputs', source='neck.out_layers.0.conv'),
fpn1=dict(type='ModuleOutputs', source='neck.out_layers.1.conv'),
fpn2=dict(type='ModuleOutputs', source='neck.out_layers.2.conv')),
# `connectors` are adaptive layers which usually map teacher's and
# students features to the same dimension.
connectors=dict(
fpn0_s=dict(
type='ConvModuleConnector',
in_channel=192,
out_channel=256,
bias=False,
norm_cfg=norm_cfg,
act_cfg=None),
fpn0_t=dict(
type='NormConnector', in_channels=256, norm_cfg=norm_cfg),
fpn1_s=dict(
type='ConvModuleConnector',
in_channel=192,
out_channel=256,
bias=False,
norm_cfg=norm_cfg,
act_cfg=None),
fpn1_t=dict(
type='NormConnector', in_channels=256, norm_cfg=norm_cfg),
fpn2_s=dict(
type='ConvModuleConnector',
in_channel=192,
out_channel=256,
bias=False,
norm_cfg=norm_cfg,
act_cfg=None),
fpn2_t=dict(
type='NormConnector', in_channels=256, norm_cfg=norm_cfg)),
distill_losses=dict(
loss_fpn0=dict(type='ChannelWiseDivergence', loss_weight=1),
loss_fpn1=dict(type='ChannelWiseDivergence', loss_weight=1),
loss_fpn2=dict(type='ChannelWiseDivergence', loss_weight=1)),
# `loss_forward_mappings` are mappings between distill loss forward
# arguments and records.
loss_forward_mappings=dict(
loss_fpn0=dict(
preds_S=dict(
from_student=True, recorder='fpn0', connector='fpn0_s'),
preds_T=dict(
from_student=False, recorder='fpn0', connector='fpn0_t')),
loss_fpn1=dict(
preds_S=dict(
from_student=True, recorder='fpn1', connector='fpn1_s'),
preds_T=dict(
from_student=False, recorder='fpn1', connector='fpn1_t')),
loss_fpn2=dict(
preds_S=dict(
from_student=True, recorder='fpn2', connector='fpn2_s'),
preds_T=dict(
from_student=False, recorder='fpn2',
connector='fpn2_t')))))
find_unused_parameters = True
custom_hooks = [
dict(
type='EMAHook',
ema_type='ExpMomentumEMA',
momentum=0.0002,
update_buffers=True,
strict_load=False,
priority=49),
dict(
type='mmdet.PipelineSwitchHook',
switch_epoch=_base_.max_epochs - _base_.num_epochs_stage2,
switch_pipeline=_base_.train_pipeline_stage2),
# stop distillation after the 280th epoch
dict(type='mmrazor.StopDistillHook', stop_epoch=280)
]

View File

@ -0,0 +1,99 @@
_base_ = '../rtmdet_s_syncbn_fast_8xb32-300e_coco.py'
teacher_ckpt = 'https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_m_syncbn_fast_8xb32-300e_coco/rtmdet_m_syncbn_fast_8xb32-300e_coco_20230102_135952-40af4fe8.pth' # noqa: E501
norm_cfg = dict(type='BN', affine=False, track_running_stats=False)
model = dict(
_delete_=True,
_scope_='mmrazor',
type='FpnTeacherDistill',
architecture=dict(
cfg_path='mmyolo::rtmdet/rtmdet_s_syncbn_fast_8xb32-300e_coco.py'),
teacher=dict(
cfg_path='mmyolo::rtmdet/rtmdet_m_syncbn_fast_8xb32-300e_coco.py'),
teacher_ckpt=teacher_ckpt,
distiller=dict(
type='ConfigurableDistiller',
# `recorders` are used to record various intermediate results during
# the model forward.
student_recorders=dict(
fpn0=dict(type='ModuleOutputs', source='neck.out_layers.0.conv'),
fpn1=dict(type='ModuleOutputs', source='neck.out_layers.1.conv'),
fpn2=dict(type='ModuleOutputs', source='neck.out_layers.2.conv'),
),
teacher_recorders=dict(
fpn0=dict(type='ModuleOutputs', source='neck.out_layers.0.conv'),
fpn1=dict(type='ModuleOutputs', source='neck.out_layers.1.conv'),
fpn2=dict(type='ModuleOutputs', source='neck.out_layers.2.conv')),
# `connectors` are adaptive layers which usually map teacher's and
# students features to the same dimension.
connectors=dict(
fpn0_s=dict(
type='ConvModuleConnector',
in_channel=128,
out_channel=192,
bias=False,
norm_cfg=norm_cfg,
act_cfg=None),
fpn0_t=dict(
type='NormConnector', in_channels=192, norm_cfg=norm_cfg),
fpn1_s=dict(
type='ConvModuleConnector',
in_channel=128,
out_channel=192,
bias=False,
norm_cfg=norm_cfg,
act_cfg=None),
fpn1_t=dict(
type='NormConnector', in_channels=192, norm_cfg=norm_cfg),
fpn2_s=dict(
type='ConvModuleConnector',
in_channel=128,
out_channel=192,
bias=False,
norm_cfg=norm_cfg,
act_cfg=None),
fpn2_t=dict(
type='NormConnector', in_channels=192, norm_cfg=norm_cfg)),
distill_losses=dict(
loss_fpn0=dict(type='ChannelWiseDivergence', loss_weight=1),
loss_fpn1=dict(type='ChannelWiseDivergence', loss_weight=1),
loss_fpn2=dict(type='ChannelWiseDivergence', loss_weight=1)),
# `loss_forward_mappings` are mappings between distill loss forward
# arguments and records.
loss_forward_mappings=dict(
loss_fpn0=dict(
preds_S=dict(
from_student=True, recorder='fpn0', connector='fpn0_s'),
preds_T=dict(
from_student=False, recorder='fpn0', connector='fpn0_t')),
loss_fpn1=dict(
preds_S=dict(
from_student=True, recorder='fpn1', connector='fpn1_s'),
preds_T=dict(
from_student=False, recorder='fpn1', connector='fpn1_t')),
loss_fpn2=dict(
preds_S=dict(
from_student=True, recorder='fpn2', connector='fpn2_s'),
preds_T=dict(
from_student=False, recorder='fpn2',
connector='fpn2_t')))))
find_unused_parameters = True
custom_hooks = [
dict(
type='EMAHook',
ema_type='ExpMomentumEMA',
momentum=0.0002,
update_buffers=True,
strict_load=False,
priority=49),
dict(
type='mmdet.PipelineSwitchHook',
switch_epoch=_base_.max_epochs - _base_.num_epochs_stage2,
switch_pipeline=_base_.train_pipeline_stage2),
# stop distillation after the 280th epoch
dict(type='mmrazor.StopDistillHook', stop_epoch=280)
]

View File

@ -0,0 +1,99 @@
_base_ = '../rtmdet_tiny_syncbn_fast_8xb32-300e_coco.py'
teacher_ckpt = 'https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_s_syncbn_fast_8xb32-300e_coco/rtmdet_s_syncbn_fast_8xb32-300e_coco_20221230_182329-0a8c901a.pth' # noqa: E501
norm_cfg = dict(type='BN', affine=False, track_running_stats=False)
model = dict(
_delete_=True,
_scope_='mmrazor',
type='FpnTeacherDistill',
architecture=dict(
cfg_path='mmyolo::rtmdet/rtmdet_tiny_syncbn_fast_8xb32-300e_coco.py'),
teacher=dict(
cfg_path='mmyolo::rtmdet/rtmdet_s_syncbn_fast_8xb32-300e_coco.py'),
teacher_ckpt=teacher_ckpt,
distiller=dict(
type='ConfigurableDistiller',
# `recorders` are used to record various intermediate results during
# the model forward.
student_recorders=dict(
fpn0=dict(type='ModuleOutputs', source='neck.out_layers.0.conv'),
fpn1=dict(type='ModuleOutputs', source='neck.out_layers.1.conv'),
fpn2=dict(type='ModuleOutputs', source='neck.out_layers.2.conv'),
),
teacher_recorders=dict(
fpn0=dict(type='ModuleOutputs', source='neck.out_layers.0.conv'),
fpn1=dict(type='ModuleOutputs', source='neck.out_layers.1.conv'),
fpn2=dict(type='ModuleOutputs', source='neck.out_layers.2.conv')),
# `connectors` are adaptive layers which usually map teacher's and
# students features to the same dimension.
connectors=dict(
fpn0_s=dict(
type='ConvModuleConnector',
in_channel=96,
out_channel=128,
bias=False,
norm_cfg=norm_cfg,
act_cfg=None),
fpn0_t=dict(
type='NormConnector', in_channels=128, norm_cfg=norm_cfg),
fpn1_s=dict(
type='ConvModuleConnector',
in_channel=96,
out_channel=128,
bias=False,
norm_cfg=norm_cfg,
act_cfg=None),
fpn1_t=dict(
type='NormConnector', in_channels=128, norm_cfg=norm_cfg),
fpn2_s=dict(
type='ConvModuleConnector',
in_channel=96,
out_channel=128,
bias=False,
norm_cfg=norm_cfg,
act_cfg=None),
fpn2_t=dict(
type='NormConnector', in_channels=128, norm_cfg=norm_cfg)),
distill_losses=dict(
loss_fpn0=dict(type='ChannelWiseDivergence', loss_weight=1),
loss_fpn1=dict(type='ChannelWiseDivergence', loss_weight=1),
loss_fpn2=dict(type='ChannelWiseDivergence', loss_weight=1)),
# `loss_forward_mappings` are mappings between distill loss forward
# arguments and records.
loss_forward_mappings=dict(
loss_fpn0=dict(
preds_S=dict(
from_student=True, recorder='fpn0', connector='fpn0_s'),
preds_T=dict(
from_student=False, recorder='fpn0', connector='fpn0_t')),
loss_fpn1=dict(
preds_S=dict(
from_student=True, recorder='fpn1', connector='fpn1_s'),
preds_T=dict(
from_student=False, recorder='fpn1', connector='fpn1_t')),
loss_fpn2=dict(
preds_S=dict(
from_student=True, recorder='fpn2', connector='fpn2_s'),
preds_T=dict(
from_student=False, recorder='fpn2',
connector='fpn2_t')))))
find_unused_parameters = True
custom_hooks = [
dict(
type='EMAHook',
ema_type='ExpMomentumEMA',
momentum=0.0002,
update_buffers=True,
strict_load=False,
priority=49),
dict(
type='mmdet.PipelineSwitchHook',
switch_epoch=_base_.max_epochs - _base_.num_epochs_stage2,
switch_pipeline=_base_.train_pipeline_stage2),
# stop distillation after the 280th epoch
dict(type='mmrazor.StopDistillHook', stop_epoch=280)
]

View File

@ -13,6 +13,20 @@ Collections:
Code:
URL: https://github.com/open-mmlab/mmyolo/blob/main/mmyolo/models/detectors/yolo_detector.py#L12
Version: v0.1.1
- Name: Rotated_RTMDet
Metadata:
Training Data: DOTAv1.0
Training Techniques:
- AdamW
- Flat Cosine Annealing
Training Resources: 1x A100 GPUs
Architecture:
- CSPNeXt
- CSPNeXtPAFPN
README: configs/rtmdet/README.md
Code:
URL: https://github.com/open-mmlab/mmyolo/blob/main/mmyolo/models/detectors/yolo_detector.py#L12
Version: v0.1.1
Models:
- Name: rtmdet_tiny_syncbn_fast_8xb32-300e_coco
@ -28,6 +42,19 @@ Models:
box AP: 41.0
Weights: https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_tiny_syncbn_fast_8xb32-300e_coco/rtmdet_tiny_syncbn_fast_8xb32-300e_coco_20230102_140117-dbb1dc83.pth
- Name: kd_tiny_rtmdet_s_neck_300e_coco
In Collection: RTMDet
Config: configs/rtmdet/distillation/kd_tiny_rtmdet_s_neck_300e_coco.py
Metadata:
Training Memory (GB): 11.9
Epochs: 300
Results:
- Task: Object Detection
Dataset: COCO
Metrics:
box AP: 41.8
Weights: https://download.openmmlab.com/mmrazor/v1/rtmdet_distillation/kd_tiny_rtmdet_s_neck_300e_coco/kd_tiny_rtmdet_s_neck_300e_coco_20230213_104240-e1e4197c.pth
- Name: rtmdet_s_syncbn_fast_8xb32-300e_coco
In Collection: RTMDet
Config: configs/rtmdet/rtmdet_s_syncbn_fast_8xb32-300e_coco.py
@ -41,6 +68,19 @@ Models:
box AP: 44.6
Weights: https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_s_syncbn_fast_8xb32-300e_coco/rtmdet_s_syncbn_fast_8xb32-300e_coco_20221230_182329-0a8c901a.pth
- Name: kd_s_rtmdet_m_neck_300e_coco
In Collection: RTMDet
Config: configs/rtmdet/distillation/kd_s_rtmdet_m_neck_300e_coco.py
Metadata:
Training Memory (GB): 16.3
Epochs: 300
Results:
- Task: Object Detection
Dataset: COCO
Metrics:
box AP: 45.7
Weights: https://download.openmmlab.com/mmrazor/v1/rtmdet_distillation/kd_s_rtmdet_m_neck_300e_coco/kd_s_rtmdet_m_neck_300e_coco_20230220_140647-446ff003.pth
- Name: rtmdet_m_syncbn_fast_8xb32-300e_coco
In Collection: RTMDet
Config: configs/rtmdet/rtmdet_m_syncbn_fast_8xb32-300e_coco.py
@ -54,6 +94,19 @@ Models:
box AP: 49.3
Weights: https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_m_syncbn_fast_8xb32-300e_coco/rtmdet_m_syncbn_fast_8xb32-300e_coco_20230102_135952-40af4fe8.pth
- Name: kd_m_rtmdet_l_neck_300e_coco
In Collection: RTMDet
Config: configs/rtmdet/distillation/kd_m_rtmdet_l_neck_300e_coco.py
Metadata:
Training Memory (GB): 29.0
Epochs: 300
Results:
- Task: Object Detection
Dataset: COCO
Metrics:
box AP: 50.2
Weights: https://download.openmmlab.com/mmrazor/v1/rtmdet_distillation/kd_m_rtmdet_l_neck_300e_coco/kd_m_rtmdet_l_neck_300e_coco_20230220_141313-b806f503.pth
- Name: rtmdet_l_syncbn_fast_8xb32-300e_coco
In Collection: RTMDet
Config: configs/rtmdet/rtmdet_l_syncbn_fast_8xb32-300e_coco.py
@ -67,6 +120,19 @@ Models:
box AP: 51.4
Weights: https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_l_syncbn_fast_8xb32-300e_coco/rtmdet_l_syncbn_fast_8xb32-300e_coco_20230102_135928-ee3abdc4.pth
- Name: kd_l_rtmdet_x_neck_300e_coco
In Collection: RTMDet
Config: configs/rtmdet/distillation/kd_l_rtmdet_x_neck_300e_coco.py
Metadata:
Training Memory (GB): 45.2
Epochs: 300
Results:
- Task: Object Detection
Dataset: COCO
Metrics:
box AP: 52.3
Weights: https://download.openmmlab.com/mmrazor/v1/rtmdet_distillation/kd_l_rtmdet_x_neck_300e_coco/kd_l_rtmdet_x_neck_300e_coco_20230220_141912-c9979722.pth
- Name: rtmdet_x_syncbn_fast_8xb32-300e_coco
In Collection: RTMDet
Config: configs/rtmdet/rtmdet_x_syncbn_fast_8xb32-300e_coco.py
@ -79,3 +145,71 @@ Models:
Metrics:
box AP: 52.8
Weights: https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_x_syncbn_fast_8xb32-300e_coco/rtmdet_x_syncbn_fast_8xb32-300e_coco_20221231_100345-b85cd476.pth
- Name: rtmdet-r_tiny_fast_1xb8-36e_dota
In Collection: Rotated_RTMDet
Config: configs/rtmdet/rotated/rtmdet-r_tiny_fast_1xb8-36e_dota.py
Metadata:
Training Memory (GB): 12.7
Epochs: 36
Results:
- Task: Oriented Object Detection
Dataset: DOTAv1.0
Metrics:
mAP: 75.07
Weights: https://download.openmmlab.com/mmyolo/v0/rtmdet/rotated/rtmdet-r_tiny_fast_1xb8-36e_dota/rtmdet-r_tiny_fast_1xb8-36e_dota_20230228_162210-e8ccfb1c.pth
- Name: rtmdet-r_s_fast_1xb8-36e_dota
In Collection: Rotated_RTMDet
Config: configs/rtmdet/rotated/rtmdet-r_s_fast_1xb8-36e_dota.py
Metadata:
Training Memory (GB): 16.6
Epochs: 36
Results:
- Task: Oriented Object Detection
Dataset: DOTAv1.0
Metrics:
mAP: 77.33
Weights: https://download.openmmlab.com/mmyolo/v0/rtmdet/rotated/rtmdet-r_s_fast_1xb8-36e_dota/rtmdet-r_s_fast_1xb8-36e_dota_20230224_110307-3946a5aa.pth
- Name: rtmdet-r_m_syncbn_fast_2xb4-36e_dota
In Collection: Rotated_RTMDet
Config: configs/rtmdet/rotated/rtmdet-r_m_syncbn_fast_2xb4-36e_dota.py
Metadata:
Training Resources: 2x A100 GPUs
Training Memory (GB): 10.9
Epochs: 36
Results:
- Task: Oriented Object Detection
Dataset: DOTAv1.0
Metrics:
mAP: 78.43
Weights: https://download.openmmlab.com/mmyolo/v0/rtmdet/rotated/rtmdet-r_m_syncbn_fast_2xb4-36e_dota/rtmdet-r_m_syncbn_fast_2xb4-36e_dota_20230224_124237-29ae1619.pth
- Name: rtmdet-r_l_syncbn_fast_2xb4-36e_dota
In Collection: Rotated_RTMDet
Config: configs/rtmdet/rotated/rtmdet-r_l_syncbn_fast_2xb4-36e_dota.py
Metadata:
Training Resources: 2x A100 GPUs
Training Memory (GB): 16.1
Epochs: 36
Results:
- Task: Oriented Object Detection
Dataset: DOTAv1.0
Metrics:
mAP: 78.66
Weights: https://download.openmmlab.com/mmyolo/v0/rtmdet/rotated/rtmdet-r_l_syncbn_fast_2xb4-36e_dota/rtmdet-r_l_syncbn_fast_2xb4-36e_dota_20230224_124544-38bc5f08.pth
- Name: rtmdet-r_l_syncbn_fast_2xb4-aug-100e_dota
In Collection: Rotated_RTMDet
Config: configs/rtmdet/rotated/rtmdet-r_l_syncbn_fast_2xb4-aug-100e_dota.py
Metadata:
Training Resources: 2x A100 GPUs
Training Memory (GB): 19.6
Epochs: 100
Results:
- Task: Oriented Object Detection
Dataset: DOTAv1.0
Metrics:
mAP: 80.14
Weights: https://download.openmmlab.com/mmyolo/v0/rtmdet/rotated/rtmdet-r_l_syncbn_fast_2xb4-aug-100e_dota/rtmdet-r_l_syncbn_fast_2xb4-aug-100e_dota_20230224_124735-ed4ea966.pth

View File

@ -0,0 +1,30 @@
_base_ = './rtmdet-r_l_syncbn_fast_2xb4-36e_dota.py'
# ========================modified parameters======================
data_root = 'data/split_ms_dota/'
# Path of test images folder
test_data_prefix = 'test/images/'
# Submission dir for result submit
submission_dir = './work_dirs/{{fileBasenameNoExtension}}/submission'
# =======================Unmodified in most cases==================
train_dataloader = dict(dataset=dict(data_root=data_root))
val_dataloader = dict(dataset=dict(data_root=data_root))
# Inference on val dataset
test_dataloader = val_dataloader
# Inference on test dataset and format the output results
# for submission. Note: the test set has no annotation.
# test_dataloader = dict(
# dataset=dict(
# data_root=data_root,
# ann_file='', # test set has no annotation
# data_prefix=dict(img_path=test_data_prefix),
# pipeline=_base_.test_pipeline))
# test_evaluator = dict(
# type='mmrotate.DOTAMetric',
# format_only=True,
# merge_patches=True,
# outfile_prefix=submission_dir)

View File

@ -0,0 +1,331 @@
_base_ = '../../_base_/default_runtime.py'
checkpoint = 'https://download.openmmlab.com/mmdetection/v3.0/rtmdet/cspnext_rsb_pretrain/cspnext-l_8xb256-rsb-a1-600e_in1k-6a760974.pth' # noqa
# ========================Frequently modified parameters======================
# -----data related-----
data_root = 'data/split_ss_dota/'
# Path of train annotation folder
train_ann_file = 'trainval/annfiles/'
train_data_prefix = 'trainval/images/' # Prefix of train image path
# Path of val annotation folder
val_ann_file = 'trainval/annfiles/'
val_data_prefix = 'trainval/images/' # Prefix of val image path
# Path of test images folder
test_data_prefix = 'test/images/'
# Submission dir for result submit
submission_dir = './work_dirs/{{fileBasenameNoExtension}}/submission'
num_classes = 15 # Number of classes for classification
# Batch size of a single GPU during training
train_batch_size_per_gpu = 4
# Worker to pre-fetch data for each single GPU during training
train_num_workers = 8
# persistent_workers must be False if num_workers is 0.
persistent_workers = True
# -----train val related-----
# Base learning rate for optim_wrapper. Corresponding to 1xb8=8 bs
base_lr = 0.00025 # 0.004 / 16
max_epochs = 36 # Maximum training epochs
model_test_cfg = dict(
# The config of multi-label for multi-class prediction.
multi_label=True,
# Decode rbox with angle, For RTMDet-R, Defaults to True.
# When set to True, use rbox coder such as DistanceAnglePointCoder
# When set to False, use hbox coder such as DistancePointBBoxCoder
# different setting lead to different AP.
decode_with_angle=True,
# The number of boxes before NMS
nms_pre=30000,
score_thr=0.05, # Threshold to filter out boxes.
nms=dict(type='nms_rotated', iou_threshold=0.1), # NMS type and threshold
max_per_img=2000) # Max number of detections of each image
# ========================Possible modified parameters========================
# -----data related-----
img_scale = (1024, 1024) # width, height
# ratio for random rotate
random_rotate_ratio = 0.5
# label ids for rect objs
rotate_rect_obj_labels = [9, 11]
# Dataset type, this will be used to define the dataset
dataset_type = 'YOLOv5DOTADataset'
# Batch size of a single GPU during validation
val_batch_size_per_gpu = 8
# Worker to pre-fetch data for each single GPU during validation
val_num_workers = 8
# Config of batch shapes. Only on val. Not use in RTMDet-R
batch_shapes_cfg = None
# -----model related-----
# The scaling factor that controls the depth of the network structure
deepen_factor = 1.0
# The scaling factor that controls the width of the network structure
widen_factor = 1.0
# Strides of multi-scale prior box
strides = [8, 16, 32]
# The angle definition for model
angle_version = 'le90' # le90, le135, oc are available options
norm_cfg = dict(type='BN') # Normalization config
# -----train val related-----
lr_start_factor = 1.0e-5
dsl_topk = 13 # Number of bbox selected in each level
loss_cls_weight = 1.0
loss_bbox_weight = 2.0
qfl_beta = 2.0 # beta of QualityFocalLoss
weight_decay = 0.05
# Save model checkpoint and validation intervals
save_checkpoint_intervals = 1
# The maximum checkpoints to keep.
max_keep_ckpts = 3
# single-scale training is recommended to
# be turned on, which can speed up training.
env_cfg = dict(cudnn_benchmark=True)
# ===============================Unmodified in most cases====================
model = dict(
type='YOLODetector',
data_preprocessor=dict(
type='YOLOv5DetDataPreprocessor',
mean=[103.53, 116.28, 123.675],
std=[57.375, 57.12, 58.395],
bgr_to_rgb=False),
backbone=dict(
type='CSPNeXt',
arch='P5',
expand_ratio=0.5,
deepen_factor=deepen_factor,
widen_factor=widen_factor,
channel_attention=True,
norm_cfg=norm_cfg,
act_cfg=dict(type='SiLU', inplace=True),
init_cfg=dict(
type='Pretrained', prefix='backbone.', checkpoint=checkpoint)),
neck=dict(
type='CSPNeXtPAFPN',
deepen_factor=deepen_factor,
widen_factor=widen_factor,
in_channels=[256, 512, 1024],
out_channels=256,
num_csp_blocks=3,
expand_ratio=0.5,
norm_cfg=norm_cfg,
act_cfg=dict(type='SiLU', inplace=True)),
bbox_head=dict(
type='RTMDetRotatedHead',
head_module=dict(
type='RTMDetRotatedSepBNHeadModule',
num_classes=num_classes,
widen_factor=widen_factor,
in_channels=256,
stacked_convs=2,
feat_channels=256,
norm_cfg=norm_cfg,
act_cfg=dict(type='SiLU', inplace=True),
share_conv=True,
pred_kernel_size=1,
featmap_strides=strides),
prior_generator=dict(
type='mmdet.MlvlPointGenerator', offset=0, strides=strides),
bbox_coder=dict(
type='DistanceAnglePointCoder', angle_version=angle_version),
loss_cls=dict(
type='mmdet.QualityFocalLoss',
use_sigmoid=True,
beta=qfl_beta,
loss_weight=loss_cls_weight),
loss_bbox=dict(
type='mmrotate.RotatedIoULoss',
mode='linear',
loss_weight=loss_bbox_weight),
angle_version=angle_version,
# Used for angle encode and decode, similar to bbox coder
angle_coder=dict(type='mmrotate.PseudoAngleCoder'),
# If true, it will apply loss_bbox on horizontal box, and angle_loss
# needs to be specified. In this case the loss_bbox should use
# horizontal box loss e.g. IoULoss. Arg details can be seen in
# `docs/zh_cn/tutorials/rotated_detection.md`
use_hbbox_loss=False,
loss_angle=None),
train_cfg=dict(
assigner=dict(
type='BatchDynamicSoftLabelAssigner',
num_classes=num_classes,
topk=dsl_topk,
iou_calculator=dict(type='mmrotate.RBboxOverlaps2D'),
# RBboxOverlaps2D doesn't support batch input, use loop instead.
batch_iou=False),
allowed_border=-1,
pos_weight=-1,
debug=False),
test_cfg=model_test_cfg,
)
train_pipeline = [
dict(type='LoadImageFromFile', file_client_args=_base_.file_client_args),
dict(type='LoadAnnotations', with_bbox=True, box_type='qbox'),
dict(
type='mmrotate.ConvertBoxType',
box_type_mapping=dict(gt_bboxes='rbox')),
dict(type='mmdet.Resize', scale=img_scale, keep_ratio=True),
dict(
type='mmdet.RandomFlip',
prob=0.75,
direction=['horizontal', 'vertical', 'diagonal']),
dict(
type='mmrotate.RandomRotate',
prob=random_rotate_ratio,
angle_range=180,
rotate_type='mmrotate.Rotate',
rect_obj_labels=rotate_rect_obj_labels),
dict(type='mmdet.Pad', size=img_scale, pad_val=dict(img=(114, 114, 114))),
dict(type='RegularizeRotatedBox', angle_version=angle_version),
dict(type='mmdet.PackDetInputs')
]
val_pipeline = [
dict(type='LoadImageFromFile', file_client_args=_base_.file_client_args),
dict(type='mmdet.Resize', scale=img_scale, keep_ratio=True),
dict(type='mmdet.Pad', size=img_scale, pad_val=dict(img=(114, 114, 114))),
dict(
type='LoadAnnotations',
with_bbox=True,
box_type='qbox',
_scope_='mmdet'),
dict(
type='mmrotate.ConvertBoxType',
box_type_mapping=dict(gt_bboxes='rbox')),
dict(
type='mmdet.PackDetInputs',
meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
'scale_factor'))
]
test_pipeline = [
dict(type='LoadImageFromFile', file_client_args=_base_.file_client_args),
dict(type='mmdet.Resize', scale=img_scale, keep_ratio=True),
dict(type='mmdet.Pad', size=img_scale, pad_val=dict(img=(114, 114, 114))),
dict(
type='mmdet.PackDetInputs',
meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
'scale_factor'))
]
train_dataloader = dict(
batch_size=train_batch_size_per_gpu,
num_workers=train_num_workers,
persistent_workers=persistent_workers,
pin_memory=True,
collate_fn=dict(type='yolov5_collate'),
sampler=dict(type='DefaultSampler', shuffle=True),
dataset=dict(
type=dataset_type,
data_root=data_root,
ann_file=train_ann_file,
data_prefix=dict(img_path=train_data_prefix),
filter_cfg=dict(filter_empty_gt=True),
pipeline=train_pipeline))
val_dataloader = dict(
batch_size=val_batch_size_per_gpu,
num_workers=val_num_workers,
persistent_workers=persistent_workers,
pin_memory=True,
drop_last=False,
sampler=dict(type='DefaultSampler', shuffle=False),
dataset=dict(
type=dataset_type,
data_root=data_root,
ann_file=val_ann_file,
data_prefix=dict(img_path=val_data_prefix),
test_mode=True,
batch_shapes_cfg=batch_shapes_cfg,
pipeline=val_pipeline))
val_evaluator = dict(type='mmrotate.DOTAMetric', metric='mAP')
# Inference on val dataset
test_dataloader = val_dataloader
test_evaluator = val_evaluator
# Inference on test dataset and format the output results
# for submission. Note: the test set has no annotation.
# test_dataloader = dict(
# batch_size=val_batch_size_per_gpu,
# num_workers=val_num_workers,
# persistent_workers=True,
# drop_last=False,
# sampler=dict(type='DefaultSampler', shuffle=False),
# dataset=dict(
# type=dataset_type,
# data_root=data_root,
# data_prefix=dict(img_path=test_data_prefix),
# test_mode=True,
# batch_shapes_cfg=batch_shapes_cfg,
# pipeline=test_pipeline))
# test_evaluator = dict(
# type='mmrotate.DOTAMetric',
# format_only=True,
# merge_patches=True,
# outfile_prefix=submission_dir)
# optimizer
optim_wrapper = dict(
type='OptimWrapper',
optimizer=dict(type='AdamW', lr=base_lr, weight_decay=weight_decay),
paramwise_cfg=dict(
norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True))
# learning rate
param_scheduler = [
dict(
type='LinearLR',
start_factor=lr_start_factor,
by_epoch=False,
begin=0,
end=1000),
dict(
# use cosine lr from 150 to 300 epoch
type='CosineAnnealingLR',
eta_min=base_lr * 0.05,
begin=max_epochs // 2,
end=max_epochs,
T_max=max_epochs // 2,
by_epoch=True,
convert_to_iter_based=True),
]
# hooks
default_hooks = dict(
checkpoint=dict(
type='CheckpointHook',
interval=save_checkpoint_intervals,
max_keep_ckpts=max_keep_ckpts, # only keep latest 3 checkpoints
save_best='auto'))
custom_hooks = [
dict(
type='EMAHook',
ema_type='ExpMomentumEMA',
momentum=0.0002,
update_buffers=True,
strict_load=False,
priority=49)
]
train_cfg = dict(
type='EpochBasedTrainLoop',
max_epochs=max_epochs,
val_interval=save_checkpoint_intervals)
val_cfg = dict(type='ValLoop')
test_cfg = dict(type='TestLoop')
visualizer = dict(type='mmrotate.RotLocalVisualizer')

View File

@ -0,0 +1,168 @@
_base_ = './rtmdet-r_l_syncbn_fast_2xb4-36e_dota.py'
# This config use longer schedule with Mixup, Mosaic and Random Rotate.
checkpoint = 'https://download.openmmlab.com/mmdetection/v3.0/rtmdet/cspnext_rsb_pretrain/cspnext-l_8xb256-rsb-a1-600e_in1k-6a760974.pth' # noqa
# ========================modified parameters======================
# Base learning rate for optim_wrapper. Corresponding to 1xb8=8 bs
base_lr = 0.00025 # 0.004 / 16
lr_start_factor = 1.0e-5
max_epochs = 100 # Maximum training epochs
# Change train_pipeline for final 10 epochs (stage 2)
num_epochs_stage2 = 10
img_scale = (1024, 1024) # width, height
# ratio range for random resize
random_resize_ratio_range = (0.1, 2.0)
# Cached images number in mosaic
mosaic_max_cached_images = 40
# Number of cached images in mixup
mixup_max_cached_images = 20
# ratio for random rotate
random_rotate_ratio = 0.5
# label ids for rect objs
rotate_rect_obj_labels = [9, 11]
# Save model checkpoint and validation intervals
save_checkpoint_intervals = 1
# validation intervals in stage 2
val_interval_stage2 = 1
# The maximum checkpoints to keep.
max_keep_ckpts = 3
# Submission dir for result submit
submission_dir = './work_dirs/{{fileBasenameNoExtension}}/submission'
# =======================Unmodified in most cases==================
train_pipeline = [
dict(type='LoadImageFromFile', file_client_args=_base_.file_client_args),
dict(type='LoadAnnotations', with_bbox=True, box_type='qbox'),
dict(
type='mmrotate.ConvertBoxType',
box_type_mapping=dict(gt_bboxes='rbox')),
dict(
type='Mosaic',
img_scale=img_scale,
use_cached=True,
max_cached_images=mosaic_max_cached_images,
pad_val=114.0),
dict(
type='mmdet.RandomResize',
# img_scale is (width, height)
scale=(img_scale[0] * 2, img_scale[1] * 2),
ratio_range=random_resize_ratio_range,
resize_type='mmdet.Resize',
keep_ratio=True),
dict(
type='mmrotate.RandomRotate',
prob=random_rotate_ratio,
angle_range=180,
rotate_type='mmrotate.Rotate',
rect_obj_labels=rotate_rect_obj_labels),
dict(type='mmdet.RandomCrop', crop_size=img_scale),
dict(type='mmdet.YOLOXHSVRandomAug'),
dict(
type='mmdet.RandomFlip',
prob=0.75,
direction=['horizontal', 'vertical', 'diagonal']),
dict(type='mmdet.Pad', size=img_scale, pad_val=dict(img=(114, 114, 114))),
dict(
type='YOLOv5MixUp',
use_cached=True,
max_cached_images=mixup_max_cached_images),
dict(type='mmdet.PackDetInputs')
]
train_pipeline_stage2 = [
dict(type='LoadImageFromFile', file_client_args=_base_.file_client_args),
dict(type='LoadAnnotations', with_bbox=True, box_type='qbox'),
dict(
type='mmrotate.ConvertBoxType',
box_type_mapping=dict(gt_bboxes='rbox')),
dict(
type='mmdet.RandomResize',
scale=img_scale,
ratio_range=random_resize_ratio_range,
resize_type='mmdet.Resize',
keep_ratio=True),
dict(
type='mmrotate.RandomRotate',
prob=random_rotate_ratio,
angle_range=180,
rotate_type='mmrotate.Rotate',
rect_obj_labels=rotate_rect_obj_labels),
dict(type='mmdet.RandomCrop', crop_size=img_scale),
dict(type='mmdet.YOLOXHSVRandomAug'),
dict(
type='mmdet.RandomFlip',
prob=0.75,
direction=['horizontal', 'vertical', 'diagonal']),
dict(type='mmdet.Pad', size=img_scale, pad_val=dict(img=(114, 114, 114))),
dict(type='mmdet.PackDetInputs')
]
train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
# learning rate
param_scheduler = [
dict(
type='LinearLR',
start_factor=lr_start_factor,
by_epoch=False,
begin=0,
end=1000),
dict(
# use cosine lr from 150 to 300 epoch
type='CosineAnnealingLR',
eta_min=base_lr * 0.05,
begin=max_epochs // 2,
end=max_epochs,
T_max=max_epochs // 2,
by_epoch=True,
convert_to_iter_based=True),
]
# hooks
default_hooks = dict(
checkpoint=dict(
type='CheckpointHook',
interval=save_checkpoint_intervals,
max_keep_ckpts=max_keep_ckpts, # only keep latest 3 checkpoints
save_best='auto'))
custom_hooks = [
dict(
type='EMAHook',
ema_type='ExpMomentumEMA',
momentum=0.0002,
update_buffers=True,
strict_load=False,
priority=49),
dict(
type='mmdet.PipelineSwitchHook',
switch_epoch=max_epochs - num_epochs_stage2,
switch_pipeline=train_pipeline_stage2)
]
train_cfg = dict(
type='EpochBasedTrainLoop',
max_epochs=max_epochs,
val_interval=save_checkpoint_intervals,
dynamic_intervals=[(max_epochs - num_epochs_stage2, val_interval_stage2)])
# Inference on test dataset and format the output results
# for submission. Note: the test set has no annotation.
# test_dataloader = dict(
# dataset=dict(
# data_root=_base_.data_root,
# ann_file='', # test set has no annotation
# data_prefix=dict(img_path=_base_.test_data_prefix),
# pipeline=_base_.test_pipeline))
# test_evaluator = dict(
# type='mmrotate.DOTAMetric',
# format_only=True,
# merge_patches=True,
# outfile_prefix=submission_dir)

View File

@ -0,0 +1,20 @@
_base_ = './rtmdet-r_l_syncbn_fast_2xb4-36e_dota-ms.py'
load_from = 'https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_l_syncbn_fast_8xb32-300e_coco/rtmdet_l_syncbn_fast_8xb32-300e_coco_20230102_135928-ee3abdc4.pth' # noqa
# Submission dir for result submit
submission_dir = './work_dirs/{{fileBasenameNoExtension}}/submission'
# Inference on test dataset and format the output results
# for submission. Note: the test set has no annotation.
# test_dataloader = dict(
# dataset=dict(
# data_root=_base_.data_root,
# ann_file='', # test set has no annotation
# data_prefix=dict(img_path=_base_.test_data_prefix),
# pipeline=_base_.test_pipeline))
# test_evaluator = dict(
# type='mmrotate.DOTAMetric',
# format_only=True,
# merge_patches=True,
# outfile_prefix=submission_dir)

View File

@ -0,0 +1,33 @@
_base_ = './rtmdet-r_l_syncbn_fast_2xb4-36e_dota-ms.py'
checkpoint = 'https://download.openmmlab.com/mmdetection/v3.0/rtmdet/cspnext_rsb_pretrain/cspnext-m_8xb256-rsb-a1-600e_in1k-ecb3bbd9.pth' # noqa
# ========================modified parameters======================
deepen_factor = 0.67
widen_factor = 0.75
# Submission dir for result submit
submission_dir = './work_dirs/{{fileBasenameNoExtension}}/submission'
# =======================Unmodified in most cases==================
model = dict(
backbone=dict(
deepen_factor=deepen_factor,
widen_factor=widen_factor,
init_cfg=dict(checkpoint=checkpoint)),
neck=dict(deepen_factor=deepen_factor, widen_factor=widen_factor),
bbox_head=dict(head_module=dict(widen_factor=widen_factor)))
# Inference on test dataset and format the output results
# for submission. Note: the test set has no annotation.
# test_dataloader = dict(
# dataset=dict(
# data_root=_base_.data_root,
# ann_file='', # test set has no annotation
# data_prefix=dict(img_path=_base_.test_data_prefix),
# pipeline=_base_.test_pipeline))
# test_evaluator = dict(
# type='mmrotate.DOTAMetric',
# format_only=True,
# merge_patches=True,
# outfile_prefix=submission_dir)

View File

@ -0,0 +1,33 @@
_base_ = './rtmdet-r_l_syncbn_fast_2xb4-36e_dota.py'
checkpoint = 'https://download.openmmlab.com/mmdetection/v3.0/rtmdet/cspnext_rsb_pretrain/cspnext-m_8xb256-rsb-a1-600e_in1k-ecb3bbd9.pth' # noqa
# ========================modified parameters======================
deepen_factor = 0.67
widen_factor = 0.75
# Submission dir for result submit
submission_dir = './work_dirs/{{fileBasenameNoExtension}}/submission'
# =======================Unmodified in most cases==================
model = dict(
backbone=dict(
deepen_factor=deepen_factor,
widen_factor=widen_factor,
init_cfg=dict(checkpoint=checkpoint)),
neck=dict(deepen_factor=deepen_factor, widen_factor=widen_factor),
bbox_head=dict(head_module=dict(widen_factor=widen_factor)))
# Inference on test dataset and format the output results
# for submission. Note: the test set has no annotation.
# test_dataloader = dict(
# dataset=dict(
# data_root=_base_.data_root,
# ann_file='', # test set has no annotation
# data_prefix=dict(img_path=_base_.test_data_prefix),
# pipeline=_base_.test_pipeline))
# test_evaluator = dict(
# type='mmrotate.DOTAMetric',
# format_only=True,
# merge_patches=True,
# outfile_prefix=submission_dir)

View File

@ -0,0 +1,38 @@
_base_ = './rtmdet-r_l_syncbn_fast_2xb4-36e_dota-ms.py'
checkpoint = 'https://download.openmmlab.com/mmdetection/v3.0/rtmdet/cspnext_rsb_pretrain/cspnext-s_imagenet_600e.pth' # noqa
# ========================modified parameters======================
deepen_factor = 0.33
widen_factor = 0.5
# Batch size of a single GPU during training
train_batch_size_per_gpu = 8
# Submission dir for result submit
submission_dir = './work_dirs/{{fileBasenameNoExtension}}/submission'
# =======================Unmodified in most cases==================
model = dict(
backbone=dict(
deepen_factor=deepen_factor,
widen_factor=widen_factor,
init_cfg=dict(checkpoint=checkpoint)),
neck=dict(deepen_factor=deepen_factor, widen_factor=widen_factor),
bbox_head=dict(head_module=dict(widen_factor=widen_factor)))
train_dataloader = dict(batch_size=train_batch_size_per_gpu)
# Inference on test dataset and format the output results
# for submission. Note: the test set has no annotation.
# test_dataloader = dict(
# dataset=dict(
# data_root=_base_.data_root,
# ann_file='', # test set has no annotation
# data_prefix=dict(img_path=_base_.test_data_prefix),
# pipeline=_base_.test_pipeline))
# test_evaluator = dict(
# type='mmrotate.DOTAMetric',
# format_only=True,
# merge_patches=True,
# outfile_prefix=submission_dir)

View File

@ -0,0 +1,38 @@
_base_ = './rtmdet-r_l_syncbn_fast_2xb4-36e_dota.py'
checkpoint = 'https://download.openmmlab.com/mmdetection/v3.0/rtmdet/cspnext_rsb_pretrain/cspnext-s_imagenet_600e.pth' # noqa
# ========================modified parameters======================
deepen_factor = 0.33
widen_factor = 0.5
# Batch size of a single GPU during training
train_batch_size_per_gpu = 8
# Submission dir for result submit
submission_dir = './work_dirs/{{fileBasenameNoExtension}}/submission'
# =======================Unmodified in most cases==================
model = dict(
backbone=dict(
deepen_factor=deepen_factor,
widen_factor=widen_factor,
init_cfg=dict(checkpoint=checkpoint)),
neck=dict(deepen_factor=deepen_factor, widen_factor=widen_factor),
bbox_head=dict(head_module=dict(widen_factor=widen_factor)))
train_dataloader = dict(batch_size=train_batch_size_per_gpu)
# Inference on test dataset and format the output results
# for submission. Note: the test set has no annotation.
# test_dataloader = dict(
# dataset=dict(
# data_root=_base_.data_root,
# ann_file='', # test set has no annotation
# data_prefix=dict(img_path=_base_.test_data_prefix),
# pipeline=_base_.test_pipeline))
# test_evaluator = dict(
# type='mmrotate.DOTAMetric',
# format_only=True,
# merge_patches=True,
# outfile_prefix=submission_dir)

View File

@ -0,0 +1,38 @@
_base_ = './rtmdet-r_l_syncbn_fast_2xb4-36e_dota-ms.py'
checkpoint = 'https://download.openmmlab.com/mmdetection/v3.0/rtmdet/cspnext_rsb_pretrain/cspnext-tiny_imagenet_600e.pth' # noqa
# ========================modified parameters======================
deepen_factor = 0.167
widen_factor = 0.375
# Batch size of a single GPU during training
train_batch_size_per_gpu = 8
# Submission dir for result submit
submission_dir = './work_dirs/{{fileBasenameNoExtension}}/submission'
# =======================Unmodified in most cases==================
model = dict(
backbone=dict(
deepen_factor=deepen_factor,
widen_factor=widen_factor,
init_cfg=dict(checkpoint=checkpoint)),
neck=dict(deepen_factor=deepen_factor, widen_factor=widen_factor),
bbox_head=dict(head_module=dict(widen_factor=widen_factor)))
train_dataloader = dict(batch_size=train_batch_size_per_gpu)
# Inference on test dataset and format the output results
# for submission. Note: the test set has no annotation.
# test_dataloader = dict(
# dataset=dict(
# data_root=_base_.data_root,
# ann_file='', # test set has no annotation
# data_prefix=dict(img_path=_base_.test_data_prefix),
# pipeline=_base_.test_pipeline))
# test_evaluator = dict(
# type='mmrotate.DOTAMetric',
# format_only=True,
# merge_patches=True,
# outfile_prefix=submission_dir)

View File

@ -0,0 +1,38 @@
_base_ = './rtmdet-r_l_syncbn_fast_2xb4-36e_dota.py'
checkpoint = 'https://download.openmmlab.com/mmdetection/v3.0/rtmdet/cspnext_rsb_pretrain/cspnext-tiny_imagenet_600e.pth' # noqa
# ========================modified parameters======================
deepen_factor = 0.167
widen_factor = 0.375
# Batch size of a single GPU during training
train_batch_size_per_gpu = 8
# Submission dir for result submit
submission_dir = './work_dirs/{{fileBasenameNoExtension}}/submission'
# =======================Unmodified in most cases==================
model = dict(
backbone=dict(
deepen_factor=deepen_factor,
widen_factor=widen_factor,
init_cfg=dict(checkpoint=checkpoint)),
neck=dict(deepen_factor=deepen_factor, widen_factor=widen_factor),
bbox_head=dict(head_module=dict(widen_factor=widen_factor)))
train_dataloader = dict(batch_size=train_batch_size_per_gpu)
# Inference on test dataset and format the output results
# for submission. Note: the test set has no annotation.
# test_dataloader = dict(
# dataset=dict(
# data_root=_base_.data_root,
# ann_file='', # test set has no annotation
# data_prefix=dict(img_path=_base_.test_data_prefix),
# pipeline=_base_.test_pipeline))
# test_evaluator = dict(
# type='mmrotate.DOTAMetric',
# format_only=True,
# merge_patches=True,
# outfile_prefix=submission_dir)

View File

@ -0,0 +1,31 @@
_base_ = './rtmdet_s_syncbn_fast_8xb32-300e_coco.py'
widen_factor = 0.5
model = dict(
bbox_head=dict(
type='RTMDetInsSepBNHead',
head_module=dict(
type='RTMDetInsSepBNHeadModule',
use_sigmoid_cls=True,
widen_factor=widen_factor),
loss_mask=dict(
type='mmdet.DiceLoss', loss_weight=2.0, eps=5e-6,
reduction='mean')),
test_cfg=dict(
multi_label=True,
nms_pre=1000,
min_bbox_size=0,
score_thr=0.05,
nms=dict(type='nms', iou_threshold=0.6),
max_per_img=100,
mask_thr_binary=0.5))
_base_.test_pipeline[-2] = dict(
type='LoadAnnotations', with_bbox=True, with_mask=True, _scope_='mmdet')
val_dataloader = dict(dataset=dict(pipeline=_base_.test_pipeline))
test_dataloader = val_dataloader
val_evaluator = dict(metric=['bbox', 'segm'])
test_evaluator = val_evaluator

View File

@ -1,30 +1,56 @@
_base_ = '../_base_/default_runtime.py'
_base_ = ['../_base_/default_runtime.py', '../_base_/det_p5_tta.py']
# ========================Frequently modified parameters======================
# -----data related-----
data_root = 'data/coco/'
dataset_type = 'YOLOv5CocoDataset'
img_scale = (640, 640) # width, height
deepen_factor = 1.0
widen_factor = 1.0
max_epochs = 300
stage2_num_epochs = 20
interval = 10
num_classes = 80
# Path of train annotation file
train_ann_file = 'annotations/instances_train2017.json'
train_data_prefix = 'train2017/' # Prefix of train image path
# Path of val annotation file
val_ann_file = 'annotations/instances_val2017.json'
val_data_prefix = 'val2017/' # Prefix of val image path
num_classes = 80 # Number of classes for classification
# Batch size of a single GPU during training
train_batch_size_per_gpu = 32
# Worker to pre-fetch data for each single GPU during training
train_num_workers = 10
val_batch_size_per_gpu = 32
val_num_workers = 10
# persistent_workers must be False if num_workers is 0.
persistent_workers = True
strides = [8, 16, 32]
# -----train val related-----
# Base learning rate for optim_wrapper. Corresponding to 8xb16=64 bs
base_lr = 0.004
max_epochs = 300 # Maximum training epochs
# Change train_pipeline for final 20 epochs (stage 2)
num_epochs_stage2 = 20
# single-scale training is recommended to
# be turned on, which can speed up training.
env_cfg = dict(cudnn_benchmark=True)
model_test_cfg = dict(
# The config of multi-label for multi-class prediction.
multi_label=True,
# The number of boxes before NMS
nms_pre=30000,
score_thr=0.001, # Threshold to filter out boxes.
nms=dict(type='nms', iou_threshold=0.65), # NMS type and threshold
max_per_img=300) # Max number of detections of each image
# only on Val
# ========================Possible modified parameters========================
# -----data related-----
img_scale = (640, 640) # width, height
# ratio range for random resize
random_resize_ratio_range = (0.1, 2.0)
# Cached images number in mosaic
mosaic_max_cached_images = 40
# Number of cached images in mixup
mixup_max_cached_images = 20
# Dataset type, this will be used to define the dataset
dataset_type = 'YOLOv5CocoDataset'
# Batch size of a single GPU during validation
val_batch_size_per_gpu = 32
# Worker to pre-fetch data for each single GPU during validation
val_num_workers = 10
# Config of batch shapes. Only on val.
batch_shapes_cfg = dict(
type='BatchShapePolicy',
batch_size=val_batch_size_per_gpu,
@ -32,6 +58,35 @@ batch_shapes_cfg = dict(
size_divisor=32,
extra_pad_ratio=0.5)
# -----model related-----
# The scaling factor that controls the depth of the network structure
deepen_factor = 1.0
# The scaling factor that controls the width of the network structure
widen_factor = 1.0
# Strides of multi-scale prior box
strides = [8, 16, 32]
norm_cfg = dict(type='BN') # Normalization config
# -----train val related-----
lr_start_factor = 1.0e-5
dsl_topk = 13 # Number of bbox selected in each level
loss_cls_weight = 1.0
loss_bbox_weight = 2.0
qfl_beta = 2.0 # beta of QualityFocalLoss
weight_decay = 0.05
# Save model checkpoint and validation intervals
save_checkpoint_intervals = 10
# validation intervals in stage 2
val_interval_stage2 = 1
# The maximum checkpoints to keep.
max_keep_ckpts = 3
# single-scale training is recommended to
# be turned on, which can speed up training.
env_cfg = dict(cudnn_benchmark=True)
# ===============================Unmodified in most cases====================
model = dict(
type='YOLODetector',
data_preprocessor=dict(
@ -46,7 +101,7 @@ model = dict(
deepen_factor=deepen_factor,
widen_factor=widen_factor,
channel_attention=True,
norm_cfg=dict(type='BN'),
norm_cfg=norm_cfg,
act_cfg=dict(type='SiLU', inplace=True)),
neck=dict(
type='CSPNeXtPAFPN',
@ -56,7 +111,7 @@ model = dict(
out_channels=256,
num_csp_blocks=3,
expand_ratio=0.5,
norm_cfg=dict(type='BN'),
norm_cfg=norm_cfg,
act_cfg=dict(type='SiLU', inplace=True)),
bbox_head=dict(
type='RTMDetHead',
@ -66,7 +121,7 @@ model = dict(
in_channels=256,
stacked_convs=2,
feat_channels=256,
norm_cfg=dict(type='BN'),
norm_cfg=norm_cfg,
act_cfg=dict(type='SiLU', inplace=True),
share_conv=True,
pred_kernel_size=1,
@ -77,24 +132,19 @@ model = dict(
loss_cls=dict(
type='mmdet.QualityFocalLoss',
use_sigmoid=True,
beta=2.0,
loss_weight=1.0),
loss_bbox=dict(type='mmdet.GIoULoss', loss_weight=2.0)),
beta=qfl_beta,
loss_weight=loss_cls_weight),
loss_bbox=dict(type='mmdet.GIoULoss', loss_weight=loss_bbox_weight)),
train_cfg=dict(
assigner=dict(
type='BatchDynamicSoftLabelAssigner',
num_classes=num_classes,
topk=13,
topk=dsl_topk,
iou_calculator=dict(type='mmdet.BboxOverlaps2D')),
allowed_border=-1,
pos_weight=-1,
debug=False),
test_cfg=dict(
multi_label=True,
nms_pre=30000,
score_thr=0.001,
nms=dict(type='nms', iou_threshold=0.65),
max_per_img=300),
test_cfg=model_test_cfg,
)
train_pipeline = [
@ -104,20 +154,23 @@ train_pipeline = [
type='Mosaic',
img_scale=img_scale,
use_cached=True,
max_cached_images=40,
max_cached_images=mosaic_max_cached_images,
pad_val=114.0),
dict(
type='mmdet.RandomResize',
# img_scale is (width, height)
scale=(img_scale[0] * 2, img_scale[1] * 2),
ratio_range=(0.1, 2.0),
ratio_range=random_resize_ratio_range,
resize_type='mmdet.Resize',
keep_ratio=True),
dict(type='mmdet.RandomCrop', crop_size=img_scale),
dict(type='mmdet.YOLOXHSVRandomAug'),
dict(type='mmdet.RandomFlip', prob=0.5),
dict(type='mmdet.Pad', size=img_scale, pad_val=dict(img=(114, 114, 114))),
dict(type='YOLOv5MixUp', use_cached=True, max_cached_images=20),
dict(
type='YOLOv5MixUp',
use_cached=True,
max_cached_images=mixup_max_cached_images),
dict(type='mmdet.PackDetInputs')
]
@ -127,7 +180,7 @@ train_pipeline_stage2 = [
dict(
type='mmdet.RandomResize',
scale=img_scale,
ratio_range=(0.1, 2.0),
ratio_range=random_resize_ratio_range,
resize_type='mmdet.Resize',
keep_ratio=True),
dict(type='mmdet.RandomCrop', crop_size=img_scale),
@ -162,8 +215,8 @@ train_dataloader = dict(
dataset=dict(
type=dataset_type,
data_root=data_root,
ann_file='annotations/instances_train2017.json',
data_prefix=dict(img='train2017/'),
ann_file=train_ann_file,
data_prefix=dict(img=train_data_prefix),
filter_cfg=dict(filter_empty_gt=True, min_size=32),
pipeline=train_pipeline))
@ -177,8 +230,8 @@ val_dataloader = dict(
dataset=dict(
type=dataset_type,
data_root=data_root,
ann_file='annotations/instances_val2017.json',
data_prefix=dict(img='val2017/'),
ann_file=val_ann_file,
data_prefix=dict(img=val_data_prefix),
test_mode=True,
batch_shapes_cfg=batch_shapes_cfg,
pipeline=test_pipeline))
@ -189,14 +242,14 @@ test_dataloader = val_dataloader
val_evaluator = dict(
type='mmdet.CocoMetric',
proposal_nums=(100, 1, 10),
ann_file=data_root + 'annotations/instances_val2017.json',
ann_file=data_root + val_ann_file,
metric='bbox')
test_evaluator = val_evaluator
# optimizer
optim_wrapper = dict(
type='OptimWrapper',
optimizer=dict(type='AdamW', lr=base_lr, weight_decay=0.05),
optimizer=dict(type='AdamW', lr=base_lr, weight_decay=weight_decay),
paramwise_cfg=dict(
norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True))
@ -204,7 +257,7 @@ optim_wrapper = dict(
param_scheduler = [
dict(
type='LinearLR',
start_factor=1.0e-5,
start_factor=lr_start_factor,
by_epoch=False,
begin=0,
end=1000),
@ -223,8 +276,8 @@ param_scheduler = [
default_hooks = dict(
checkpoint=dict(
type='CheckpointHook',
interval=interval,
max_keep_ckpts=3 # only keep latest 3 checkpoints
interval=save_checkpoint_intervals,
max_keep_ckpts=max_keep_ckpts # only keep latest 3 checkpoints
))
custom_hooks = [
@ -237,15 +290,15 @@ custom_hooks = [
priority=49),
dict(
type='mmdet.PipelineSwitchHook',
switch_epoch=max_epochs - stage2_num_epochs,
switch_epoch=max_epochs - num_epochs_stage2,
switch_pipeline=train_pipeline_stage2)
]
train_cfg = dict(
type='EpochBasedTrainLoop',
max_epochs=max_epochs,
val_interval=interval,
dynamic_intervals=[(max_epochs - stage2_num_epochs, 1)])
val_interval=save_checkpoint_intervals,
dynamic_intervals=[(max_epochs - num_epochs_stage2, val_interval_stage2)])
val_cfg = dict(type='ValLoop')
test_cfg = dict(type='TestLoop')

View File

@ -1,8 +1,10 @@
_base_ = './rtmdet_l_syncbn_fast_8xb32-300e_coco.py'
# ========================modified parameters======================
deepen_factor = 0.67
widen_factor = 0.75
# =======================Unmodified in most cases==================
model = dict(
backbone=dict(deepen_factor=deepen_factor, widen_factor=widen_factor),
neck=dict(deepen_factor=deepen_factor, widen_factor=widen_factor),

View File

@ -1,10 +1,19 @@
_base_ = './rtmdet_l_syncbn_fast_8xb32-300e_coco.py'
checkpoint = 'https://download.openmmlab.com/mmdetection/v3.0/rtmdet/cspnext_rsb_pretrain/cspnext-s_imagenet_600e.pth' # noqa
# ========================modified parameters======================
deepen_factor = 0.33
widen_factor = 0.5
img_scale = _base_.img_scale
# ratio range for random resize
random_resize_ratio_range = (0.5, 2.0)
# Number of cached images in mosaic
mosaic_max_cached_images = 40
# Number of cached images in mixup
mixup_max_cached_images = 20
# =======================Unmodified in most cases==================
model = dict(
backbone=dict(
deepen_factor=deepen_factor,
@ -30,20 +39,23 @@ train_pipeline = [
type='Mosaic',
img_scale=img_scale,
use_cached=True,
max_cached_images=40,
max_cached_images=mosaic_max_cached_images,
pad_val=114.0),
dict(
type='mmdet.RandomResize',
# img_scale is (width, height)
scale=(img_scale[0] * 2, img_scale[1] * 2),
ratio_range=(0.5, 2.0), # note
ratio_range=random_resize_ratio_range, # note
resize_type='mmdet.Resize',
keep_ratio=True),
dict(type='mmdet.RandomCrop', crop_size=img_scale),
dict(type='mmdet.YOLOXHSVRandomAug'),
dict(type='mmdet.RandomFlip', prob=0.5),
dict(type='mmdet.Pad', size=img_scale, pad_val=dict(img=(114, 114, 114))),
dict(type='YOLOv5MixUp', use_cached=True, max_cached_images=20),
dict(
type='YOLOv5MixUp',
use_cached=True,
max_cached_images=mixup_max_cached_images),
dict(type='mmdet.PackDetInputs')
]
@ -53,7 +65,7 @@ train_pipeline_stage2 = [
dict(
type='mmdet.RandomResize',
scale=img_scale,
ratio_range=(0.5, 2.0), # note
ratio_range=random_resize_ratio_range, # note
resize_type='mmdet.Resize',
keep_ratio=True),
dict(type='mmdet.RandomCrop', crop_size=img_scale),
@ -75,6 +87,6 @@ custom_hooks = [
priority=49),
dict(
type='mmdet.PipelineSwitchHook',
switch_epoch=_base_.max_epochs - _base_.stage2_num_epochs,
switch_epoch=_base_.max_epochs - _base_.num_epochs_stage2,
switch_pipeline=train_pipeline_stage2)
]

View File

@ -0,0 +1,70 @@
_base_ = 'rtmdet_tiny_syncbn_fast_8xb32-300e_coco.py'
data_root = './data/cat/'
class_name = ('cat', )
num_classes = len(class_name)
metainfo = dict(classes=class_name, palette=[(20, 220, 60)])
num_epochs_stage2 = 5
max_epochs = 40
train_batch_size_per_gpu = 12
train_num_workers = 4
val_batch_size_per_gpu = 1
val_num_workers = 2
load_from = 'https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_tiny_syncbn_fast_8xb32-300e_coco/rtmdet_tiny_syncbn_fast_8xb32-300e_coco_20230102_140117-dbb1dc83.pth' # noqa
model = dict(
backbone=dict(frozen_stages=4),
bbox_head=dict(head_module=dict(num_classes=num_classes)),
train_cfg=dict(assigner=dict(num_classes=num_classes)))
train_dataloader = dict(
batch_size=train_batch_size_per_gpu,
num_workers=train_num_workers,
dataset=dict(
data_root=data_root,
metainfo=metainfo,
ann_file='annotations/trainval.json',
data_prefix=dict(img='images/')))
val_dataloader = dict(
batch_size=val_batch_size_per_gpu,
num_workers=val_num_workers,
dataset=dict(
metainfo=metainfo,
data_root=data_root,
ann_file='annotations/test.json',
data_prefix=dict(img='images/')))
test_dataloader = val_dataloader
param_scheduler = [
dict(
type='LinearLR',
start_factor=_base_.lr_start_factor,
by_epoch=False,
begin=0,
end=30),
dict(
# use cosine lr from 150 to 300 epoch
type='CosineAnnealingLR',
eta_min=_base_.base_lr * 0.05,
begin=max_epochs // 2,
end=max_epochs,
T_max=max_epochs // 2,
by_epoch=True,
convert_to_iter_based=True),
]
_base_.custom_hooks[1].switch_epoch = max_epochs - num_epochs_stage2
val_evaluator = dict(ann_file=data_root + 'annotations/test.json')
test_evaluator = val_evaluator
default_hooks = dict(
checkpoint=dict(interval=10, max_keep_ckpts=2, save_best='auto'),
logger=dict(type='LoggerHook', interval=5))
train_cfg = dict(max_epochs=max_epochs, val_interval=10)
# visualizer = dict(vis_backends = [dict(type='LocalVisBackend'), dict(type='WandbVisBackend')]) # noqa

View File

@ -1,11 +1,19 @@
_base_ = './rtmdet_s_syncbn_fast_8xb32-300e_coco.py'
checkpoint = 'https://download.openmmlab.com/mmdetection/v3.0/rtmdet/cspnext_rsb_pretrain/cspnext-tiny_imagenet_600e.pth' # noqa
# ========================modified parameters======================
deepen_factor = 0.167
widen_factor = 0.375
img_scale = _base_.img_scale
# ratio range for random resize
random_resize_ratio_range = (0.5, 2.0)
# Number of cached images in mosaic
mosaic_max_cached_images = 20
# Number of cached images in mixup
mixup_max_cached_images = 10
# =======================Unmodified in most cases==================
model = dict(
backbone=dict(
deepen_factor=deepen_factor,
@ -24,14 +32,14 @@ train_pipeline = [
type='Mosaic',
img_scale=img_scale,
use_cached=True,
max_cached_images=20, # note
max_cached_images=mosaic_max_cached_images, # note
random_pop=False, # note
pad_val=114.0),
dict(
type='mmdet.RandomResize',
# img_scale is (width, height)
scale=(img_scale[0] * 2, img_scale[1] * 2),
ratio_range=(0.5, 2.0),
ratio_range=random_resize_ratio_range,
resize_type='mmdet.Resize',
keep_ratio=True),
dict(type='mmdet.RandomCrop', crop_size=img_scale),
@ -42,7 +50,7 @@ train_pipeline = [
type='YOLOv5MixUp',
use_cached=True,
random_pop=False,
max_cached_images=10,
max_cached_images=mixup_max_cached_images,
prob=0.5),
dict(type='mmdet.PackDetInputs')
]

View File

@ -1,8 +1,10 @@
_base_ = './rtmdet_l_syncbn_fast_8xb32-300e_coco.py'
# ========================modified parameters======================
deepen_factor = 1.33
widen_factor = 1.25
# =======================Unmodified in most cases==================
model = dict(
backbone=dict(deepen_factor=deepen_factor, widen_factor=widen_factor),
neck=dict(deepen_factor=deepen_factor, widen_factor=widen_factor),

View File

@ -20,16 +20,16 @@ YOLOv5-l-P6 model structure
### COCO
| Backbone | Arch | size | SyncBN | AMP | Mem (GB) | box AP | Config | Download |
| :------: | :--: | :--: | :----: | :-: | :------: | :----: | :----------------------------------------------------------------------------------------------------------------------: | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
| YOLOv5-n | P5 | 640 | Yes | Yes | 1.5 | 28.0 | [config](https://github.com/open-mmlab/mmyolo/tree/master/configs/yolov5/yolov5_n-v61_syncbn_fast_8xb16-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_n-v61_syncbn_fast_8xb16-300e_coco/yolov5_n-v61_syncbn_fast_8xb16-300e_coco_20220919_090739-b804c1ad.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_n-v61_syncbn_fast_8xb16-300e_coco/yolov5_n-v61_syncbn_fast_8xb16-300e_coco_20220919_090739.log.json) |
| YOLOv5-s | P5 | 640 | Yes | Yes | 2.7 | 37.7 | [config](https://github.com/open-mmlab/mmyolo/tree/master/configs/yolov5/yolov5_s-v61_syncbn_fast_8xb16-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_s-v61_syncbn_fast_8xb16-300e_coco/yolov5_s-v61_syncbn_fast_8xb16-300e_coco_20220918_084700-86e02187.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_s-v61_syncbn_fast_8xb16-300e_coco/yolov5_s-v61_syncbn_fast_8xb16-300e_coco_20220918_084700.log.json) |
| YOLOv5-m | P5 | 640 | Yes | Yes | 5.0 | 45.3 | [config](https://github.com/open-mmlab/mmyolo/tree/master/configs/yolov5/yolov5_m-v61_syncbn_fast_8xb16-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_m-v61_syncbn_fast_8xb16-300e_coco/yolov5_m-v61_syncbn_fast_8xb16-300e_coco_20220917_204944-516a710f.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_m-v61_syncbn_fast_8xb16-300e_coco/yolov5_m-v61_syncbn_fast_8xb16-300e_coco_20220917_204944.log.json) |
| YOLOv5-l | P5 | 640 | Yes | Yes | 8.1 | 48.8 | [config](https://github.com/open-mmlab/mmyolo/tree/master/configs/yolov5/yolov5_l-v61_syncbn_fast_8xb16-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_l-v61_syncbn_fast_8xb16-300e_coco/yolov5_l-v61_syncbn_fast_8xb16-300e_coco_20220917_031007-096ef0eb.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_l-v61_syncbn_fast_8xb16-300e_coco/yolov5_l-v61_syncbn_fast_8xb16-300e_coco_20220917_031007.log.json) |
| YOLOv5-n | P6 | 1280 | Yes | Yes | 5.8 | 35.9 | [config](https://github.com/open-mmlab/mmyolo/tree/master/configs/yolov5/yolov5_n-p6-v62_syncbn_fast_8xb16-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_n-p6-v62_syncbn_fast_8xb16-300e_coco/yolov5_n-p6-v62_syncbn_fast_8xb16-300e_coco_20221027_224705-d493c5f3.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_n-p6-v62_syncbn_fast_8xb16-300e_coco/yolov5_n-p6-v62_syncbn_fast_8xb16-300e_coco_20221027_224705.log.json) |
| YOLOv5-s | P6 | 1280 | Yes | Yes | 10.5 | 44.4 | [config](https://github.com/open-mmlab/mmyolo/tree/master/configs/yolov5/yolov5_s-p6-v62_syncbn_fast_8xb16-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_s-p6-v62_syncbn_fast_8xb16-300e_coco/yolov5_s-p6-v62_syncbn_fast_8xb16-300e_coco_20221027_215044-58865c19.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_s-p6-v62_syncbn_fast_8xb16-300e_coco/yolov5_s-p6-v62_syncbn_fast_8xb16-300e_coco_20221027_215044.log.json) |
| YOLOv5-m | P6 | 1280 | Yes | Yes | 19.1 | 51.3 | [config](https://github.com/open-mmlab/mmyolo/tree/master/configs/yolov5/yolov5_m-p6-v62_syncbn_fast_8xb16-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_m-p6-v62_syncbn_fast_8xb16-300e_coco/yolov5_m-p6-v62_syncbn_fast_8xb16-300e_coco_20221027_230453-49564d58.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_m-p6-v62_syncbn_fast_8xb16-300e_coco/yolov5_m-p6-v62_syncbn_fast_8xb16-300e_coco_20221027_230453.log.json) |
| YOLOv5-l | P6 | 1280 | Yes | Yes | 30.5 | 53.7 | [config](https://github.com/open-mmlab/mmyolo/tree/master/configs/yolov5/yolov5_l-p6-v62_syncbn_fast_8xb16-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_l-p6-v62_syncbn_fast_8xb16-300e_coco/yolov5_l-p6-v62_syncbn_fast_8xb16-300e_coco_20221027_234308-7a2ba6bf.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_l-p6-v62_syncbn_fast_8xb16-300e_coco/yolov5_l-p6-v62_syncbn_fast_8xb16-300e_coco_20221027_234308.log.json) |
| Backbone | Arch | size | SyncBN | AMP | Mem (GB) | box AP | TTA box AP | Config | Download |
| :------: | :--: | :--: | :----: | :-: | :------: | :----: | :--------: | :--------------------------------------------------------------------------------------------------------------------: | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
| YOLOv5-n | P5 | 640 | Yes | Yes | 1.5 | 28.0 | 30.7 | [config](https://github.com/open-mmlab/mmyolo/tree/main/configs/yolov5/yolov5_n-v61_syncbn_fast_8xb16-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_n-v61_syncbn_fast_8xb16-300e_coco/yolov5_n-v61_syncbn_fast_8xb16-300e_coco_20220919_090739-b804c1ad.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_n-v61_syncbn_fast_8xb16-300e_coco/yolov5_n-v61_syncbn_fast_8xb16-300e_coco_20220919_090739.log.json) |
| YOLOv5-s | P5 | 640 | Yes | Yes | 2.7 | 37.7 | 40.2 | [config](https://github.com/open-mmlab/mmyolo/tree/main/configs/yolov5/yolov5_s-v61_syncbn_fast_8xb16-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_s-v61_syncbn_fast_8xb16-300e_coco/yolov5_s-v61_syncbn_fast_8xb16-300e_coco_20220918_084700-86e02187.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_s-v61_syncbn_fast_8xb16-300e_coco/yolov5_s-v61_syncbn_fast_8xb16-300e_coco_20220918_084700.log.json) |
| YOLOv5-m | P5 | 640 | Yes | Yes | 5.0 | 45.3 | 46.9 | [config](https://github.com/open-mmlab/mmyolo/tree/main/configs/yolov5/yolov5_m-v61_syncbn_fast_8xb16-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_m-v61_syncbn_fast_8xb16-300e_coco/yolov5_m-v61_syncbn_fast_8xb16-300e_coco_20220917_204944-516a710f.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_m-v61_syncbn_fast_8xb16-300e_coco/yolov5_m-v61_syncbn_fast_8xb16-300e_coco_20220917_204944.log.json) |
| YOLOv5-l | P5 | 640 | Yes | Yes | 8.1 | 48.8 | 49.9 | [config](https://github.com/open-mmlab/mmyolo/tree/main/configs/yolov5/yolov5_l-v61_syncbn_fast_8xb16-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_l-v61_syncbn_fast_8xb16-300e_coco/yolov5_l-v61_syncbn_fast_8xb16-300e_coco_20220917_031007-096ef0eb.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_l-v61_syncbn_fast_8xb16-300e_coco/yolov5_l-v61_syncbn_fast_8xb16-300e_coco_20220917_031007.log.json) |
| YOLOv5-n | P6 | 1280 | Yes | Yes | 5.8 | 35.9 | | [config](https://github.com/open-mmlab/mmyolo/tree/main/configs/yolov5/yolov5_n-p6-v62_syncbn_fast_8xb16-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_n-p6-v62_syncbn_fast_8xb16-300e_coco/yolov5_n-p6-v62_syncbn_fast_8xb16-300e_coco_20221027_224705-d493c5f3.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_n-p6-v62_syncbn_fast_8xb16-300e_coco/yolov5_n-p6-v62_syncbn_fast_8xb16-300e_coco_20221027_224705.log.json) |
| YOLOv5-s | P6 | 1280 | Yes | Yes | 10.5 | 44.4 | | [config](https://github.com/open-mmlab/mmyolo/tree/main/configs/yolov5/yolov5_s-p6-v62_syncbn_fast_8xb16-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_s-p6-v62_syncbn_fast_8xb16-300e_coco/yolov5_s-p6-v62_syncbn_fast_8xb16-300e_coco_20221027_215044-58865c19.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_s-p6-v62_syncbn_fast_8xb16-300e_coco/yolov5_s-p6-v62_syncbn_fast_8xb16-300e_coco_20221027_215044.log.json) |
| YOLOv5-m | P6 | 1280 | Yes | Yes | 19.1 | 51.3 | | [config](https://github.com/open-mmlab/mmyolo/tree/main/configs/yolov5/yolov5_m-p6-v62_syncbn_fast_8xb16-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_m-p6-v62_syncbn_fast_8xb16-300e_coco/yolov5_m-p6-v62_syncbn_fast_8xb16-300e_coco_20221027_230453-49564d58.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_m-p6-v62_syncbn_fast_8xb16-300e_coco/yolov5_m-p6-v62_syncbn_fast_8xb16-300e_coco_20221027_230453.log.json) |
| YOLOv5-l | P6 | 1280 | Yes | Yes | 30.5 | 53.7 | | [config](https://github.com/open-mmlab/mmyolo/tree/main/configs/yolov5/yolov5_l-p6-v62_syncbn_fast_8xb16-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_l-p6-v62_syncbn_fast_8xb16-300e_coco/yolov5_l-p6-v62_syncbn_fast_8xb16-300e_coco_20221027_234308-7a2ba6bf.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_l-p6-v62_syncbn_fast_8xb16-300e_coco/yolov5_l-p6-v62_syncbn_fast_8xb16-300e_coco_20221027_234308.log.json) |
**Note**:
In the official YOLOv5 code, the `random_perspective` data augmentation in COCO object detection task training uses mask annotation information, which leads to higher performance. Object detection should not use mask annotation, so only box annotation information is used in `MMYOLO`. We will use the mask annotation information in the instance segmentation task. See https://github.com/ultralytics/yolov5/issues/9917 for details.
@ -39,16 +39,16 @@ In the official YOLOv5 code, the `random_perspective` data augmentation in COCO
3. `SyncBN` means use SyncBN, `AMP` indicates training with mixed precision.
4. We use 8x A100 for training, and the single-GPU batch size is 16. This is different from the official code.
5. The performance is unstable and may fluctuate by about 0.4 mAP and the highest performance weight in `COCO` training in `YOLOv5` may not be the last epoch.
6. `balloon` means that this is a demo configuration.
6. `TTA` means that Test Time Augmentation. It's perform 3 multi-scaling transformations on the image, followed by 2 flipping transformations (flipping and not flipping). You only need to specify `--tta` when testing to enable. see [TTA](https://github.com/open-mmlab/mmyolo/blob/dev/docs/en/common_usage/tta.md) for details.
### VOC
| Backbone | size | Batchsize | AMP | Mem (GB) | box AP(COCO metric) | Config | Download |
| :------: | :--: | :-------: | :-: | :------: | :-----------------: | :--------------------------------------------------------------------------------------------------------------: | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
| YOLOv5-n | 512 | 64 | Yes | 3.5 | 51.2 | [config](https://github.com/open-mmlab/mmyolo/tree/master/configs/yolov5/voc/yolov5_n-v61_fast_1xb64-50e_voc.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_n-v61_fast_1xb64-50e_voc/yolov5_n-v61_fast_1xb64-50e_voc_20221017_234254-f1493430.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_n-v61_fast_1xb64-50e_voc/yolov5_n-v61_fast_1xb64-50e_voc_20221017_234254.log.json) |
| YOLOv5-s | 512 | 64 | Yes | 6.5 | 62.7 | [config](https://github.com/open-mmlab/mmyolo/tree/master/configs/yolov5/voc/yolov5_s-v61_fast_1xb64-50e_voc.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_s-v61_fast_1xb64-50e_voc/yolov5_s-v61_fast_1xb64-50e_voc_20221017_234156-0009b33e.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_s-v61_fast_1xb64-50e_voc/yolov5_s-v61_fast_1xb64-50e_voc_20221017_234156.log.json) |
| YOLOv5-m | 512 | 64 | Yes | 12.0 | 70.1 | [config](https://github.com/open-mmlab/mmyolo/tree/master/configs/yolov5/voc/yolov5_m-v61_fast_1xb64-50e_voc.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_m-v61_fast_1xb64-50e_voc/yolov5_m-v61_fast_1xb64-50e_voc_20221017_114138-815c143a.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_m-v61_fast_1xb64-50e_voc/yolov5_m-v61_fast_1xb64-50e_voc_20221017_114138.log.json) |
| YOLOv5-l | 512 | 32 | Yes | 10.0 | 73.1 | [config](https://github.com/open-mmlab/mmyolo/tree/master/configs/yolov5/voc/yolov5_l-v61_fast_1xb32-50e_voc.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_l-v61_fast_1xb32-50e_voc/yolov5_l-v61_fast_1xb32-50e_voc_20221017_045500-edc7e0d8.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_l-v61_fast_1xb32-50e_voc/yolov5_l-v61_fast_1xb32-50e_voc_20221017_045500.log.json) |
| Backbone | size | Batchsize | AMP | Mem (GB) | box AP(COCO metric) | Config | Download |
| :------: | :--: | :-------: | :-: | :------: | :-----------------: | :------------------------------------------------------------------------------------------------------------: | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
| YOLOv5-n | 512 | 64 | Yes | 3.5 | 51.2 | [config](https://github.com/open-mmlab/mmyolo/tree/main/configs/yolov5/voc/yolov5_n-v61_fast_1xb64-50e_voc.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_n-v61_fast_1xb64-50e_voc/yolov5_n-v61_fast_1xb64-50e_voc_20221017_234254-f1493430.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_n-v61_fast_1xb64-50e_voc/yolov5_n-v61_fast_1xb64-50e_voc_20221017_234254.log.json) |
| YOLOv5-s | 512 | 64 | Yes | 6.5 | 62.7 | [config](https://github.com/open-mmlab/mmyolo/tree/main/configs/yolov5/voc/yolov5_s-v61_fast_1xb64-50e_voc.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_s-v61_fast_1xb64-50e_voc/yolov5_s-v61_fast_1xb64-50e_voc_20221017_234156-0009b33e.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_s-v61_fast_1xb64-50e_voc/yolov5_s-v61_fast_1xb64-50e_voc_20221017_234156.log.json) |
| YOLOv5-m | 512 | 64 | Yes | 12.0 | 70.1 | [config](https://github.com/open-mmlab/mmyolo/tree/main/configs/yolov5/voc/yolov5_m-v61_fast_1xb64-50e_voc.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_m-v61_fast_1xb64-50e_voc/yolov5_m-v61_fast_1xb64-50e_voc_20221017_114138-815c143a.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_m-v61_fast_1xb64-50e_voc/yolov5_m-v61_fast_1xb64-50e_voc_20221017_114138.log.json) |
| YOLOv5-l | 512 | 32 | Yes | 10.0 | 73.1 | [config](https://github.com/open-mmlab/mmyolo/tree/main/configs/yolov5/voc/yolov5_l-v61_fast_1xb32-50e_voc.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_l-v61_fast_1xb32-50e_voc/yolov5_l-v61_fast_1xb32-50e_voc_20221017_045500-edc7e0d8.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_l-v61_fast_1xb32-50e_voc/yolov5_l-v61_fast_1xb32-50e_voc_20221017_045500.log.json) |
**Note**:
@ -62,10 +62,10 @@ In the official YOLOv5 code, the `random_perspective` data augmentation in COCO
Since the `iscrowd` annotation of the COCO dataset is not equivalent to `ignore`, we use the CrowdHuman dataset to verify that the YOLOv5 ignore logic is correct.
| Backbone | size | SyncBN | AMP | Mem (GB) | ignore_iof_thr | box AP50(CrowDHuman Metric) | MR | JI | Config | Download |
| :------: | :--: | :----: | :-: | :------: | :------------: | :-------------------------: | :--: | :---: | :-------------------------------------------------------------------------------------------------------------------------------: | :------: |
| YOLOv5-s | 640 | Yes | Yes | 2.6 | -1 | 85.79 | 48.7 | 75.33 | [config](https://github.com/open-mmlab/mmyolo/tree/master/configs/yolov5/crowdhuman/yolov5_s-v61_fast_8xb16-300e_crowdhuman.py) | |
| YOLOv5-s | 640 | Yes | Yes | 2.6 | 0.5 | 86.17 | 48.8 | 75.87 | [config](https://github.com/open-mmlab/mmyolo/tree/master/configs/yolov5/crowdhuman/yolov5_s-v61_8xb16-300e_ignore_crowdhuman.py) | |
| Backbone | size | SyncBN | AMP | Mem (GB) | ignore_iof_thr | box AP50(CrowDHuman Metric) | MR | JI | Config | Download |
| :------: | :--: | :----: | :-: | :------: | :------------: | :-------------------------: | :--: | :---: | :-----------------------------------------------------------------------------------------------------------------------------: | :------: |
| YOLOv5-s | 640 | Yes | Yes | 2.6 | -1 | 85.79 | 48.7 | 75.33 | [config](https://github.com/open-mmlab/mmyolo/tree/main/configs/yolov5/crowdhuman/yolov5_s-v61_fast_8xb16-300e_crowdhuman.py) | |
| YOLOv5-s | 640 | Yes | Yes | 2.6 | 0.5 | 86.17 | 48.8 | 75.87 | [config](https://github.com/open-mmlab/mmyolo/tree/main/configs/yolov5/crowdhuman/yolov5_s-v61_8xb16-300e_ignore_crowdhuman.py) | |
**Note**:

View File

@ -29,6 +29,8 @@ num_det_layers = 3
load_from = 'https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_s-v61_syncbn_fast_8xb16-300e_coco/yolov5_s-v61_syncbn_fast_8xb16-300e_coco_20220918_084700-86e02187.pth' # noqa
tta_img_scales = [img_scale, (416, 416), (640, 640)]
# Hyperparameter reference from:
# https://github.com/ultralytics/yolov5/blob/master/data/hyps/hyp.VOC.yaml
model = dict(
@ -232,3 +234,37 @@ val_evaluator = dict(
test_evaluator = val_evaluator
train_cfg = dict(max_epochs=max_epochs)
# Config for Test Time Augmentation. (TTA)
_multiscale_resize_transforms = [
dict(
type='Compose',
transforms=[
dict(type='YOLOv5KeepRatioResize', scale=s),
dict(
type='LetterResize',
scale=s,
allow_scale_up=False,
pad_val=dict(img=114))
]) for s in tta_img_scales
]
tta_pipeline = [
dict(type='LoadImageFromFile', file_client_args=_base_.file_client_args),
dict(
type='TestTimeAug',
transforms=[
_multiscale_resize_transforms,
[
dict(type='mmdet.RandomFlip', prob=1.),
dict(type='mmdet.RandomFlip', prob=0.)
], [dict(type='mmdet.LoadAnnotations', with_bbox=True)],
[
dict(
type='mmdet.PackDetInputs',
meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
'scale_factor', 'pad_param', 'flip',
'flip_direction'))
]
])
]

View File

@ -1,10 +1,15 @@
_base_ = './yolov5_s-p6-v62_syncbn_fast_8xb16-300e_coco.py'
# ========================modified parameters======================
deepen_factor = 0.67
widen_factor = 0.75
lr_factor = 0.1 # lrf=0.1
lr_factor = 0.1
affine_scale = 0.9
loss_cls_weight = 0.3
loss_obj_weight = 0.7
mixup_prob = 0.1
# =======================Unmodified in most cases==================
num_classes = _base_.num_classes
num_det_layers = _base_.num_det_layers
img_scale = _base_.img_scale
@ -20,9 +25,9 @@ model = dict(
),
bbox_head=dict(
head_module=dict(widen_factor=widen_factor),
loss_cls=dict(loss_weight=0.3 *
loss_cls=dict(loss_weight=loss_cls_weight *
(num_classes / 80 * 3 / num_det_layers)),
loss_obj=dict(loss_weight=0.7 *
loss_obj=dict(loss_weight=loss_obj_weight *
((img_scale[0] / 640)**2 * 3 / num_det_layers))))
pre_transform = _base_.pre_transform
@ -49,7 +54,7 @@ train_pipeline = [
*pre_transform, *mosaic_affine_pipeline,
dict(
type='YOLOv5MixUp',
prob=0.1,
prob=mixup_prob,
pre_transform=[*pre_transform, *mosaic_affine_pipeline]),
dict(
type='mmdet.Albu',
@ -71,5 +76,4 @@ train_pipeline = [
]
train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
default_hooks = dict(param_scheduler=dict(lr_factor=lr_factor))

View File

@ -1,10 +1,15 @@
_base_ = './yolov5_s-v61_syncbn_fast_8xb16-300e_coco.py'
# ========================modified parameters======================
deepen_factor = 0.67
widen_factor = 0.75
lr_factor = 0.1 # lrf=0.1
lr_factor = 0.1
affine_scale = 0.9
loss_cls_weight = 0.3
loss_obj_weight = 0.7
mixup_prob = 0.1
# =======================Unmodified in most cases==================
num_classes = _base_.num_classes
num_det_layers = _base_.num_det_layers
img_scale = _base_.img_scale
@ -20,9 +25,9 @@ model = dict(
),
bbox_head=dict(
head_module=dict(widen_factor=widen_factor),
loss_cls=dict(loss_weight=0.3 *
loss_cls=dict(loss_weight=loss_cls_weight *
(num_classes / 80 * 3 / num_det_layers)),
loss_obj=dict(loss_weight=0.7 *
loss_obj=dict(loss_weight=loss_obj_weight *
((img_scale[0] / 640)**2 * 3 / num_det_layers))))
pre_transform = _base_.pre_transform
@ -49,7 +54,7 @@ train_pipeline = [
*pre_transform, *mosaic_affine_pipeline,
dict(
type='YOLOv5MixUp',
prob=0.1,
prob=mixup_prob,
pre_transform=[*pre_transform, *mosaic_affine_pipeline]),
dict(
type='mmdet.Albu',
@ -71,5 +76,4 @@ train_pipeline = [
]
train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
default_hooks = dict(param_scheduler=dict(lr_factor=lr_factor))

View File

@ -1,19 +1,33 @@
_base_ = 'yolov5_s-v61_syncbn_fast_8xb16-300e_coco.py'
# ========================modified parameters======================
img_scale = (1280, 1280) # width, height
num_classes = 80
# only on Val
batch_shapes_cfg = dict(img_size=img_scale[0], size_divisor=64)
num_classes = 80 # Number of classes for classification
# Config of batch shapes. Only on val.
# It means not used if batch_shapes_cfg is None.
batch_shapes_cfg = dict(
img_size=img_scale[0],
# The image scale of padding should be divided by pad_size_divisor
size_divisor=64)
# Basic size of multi-scale prior box
anchors = [
[(19, 27), (44, 40), (38, 94)], # P3/8
[(96, 68), (86, 152), (180, 137)], # P4/16
[(140, 301), (303, 264), (238, 542)], # P5/32
[(436, 615), (739, 380), (925, 792)] # P6/64
]
# Strides of multi-scale prior box
strides = [8, 16, 32, 64]
num_det_layers = 4
num_det_layers = 4 # The number of model output scales
loss_cls_weight = 0.5
loss_bbox_weight = 0.05
loss_obj_weight = 1.0
# The obj loss weights of the three output layers
obj_level_weights = [4.0, 1.0, 0.25, 0.06]
affine_scale = 0.5 # YOLOv5RandomAffine scaling ratio
tta_img_scales = [(1280, 1280), (1024, 1024), (1536, 1536)]
# =======================Unmodified in most cases==================
model = dict(
backbone=dict(arch='P6', out_indices=(2, 3, 4, 5)),
neck=dict(
@ -23,12 +37,12 @@ model = dict(
in_channels=[256, 512, 768, 1024], featmap_strides=strides),
prior_generator=dict(base_sizes=anchors, strides=strides),
# scaled based on number of detection layers
loss_cls=dict(loss_weight=0.5 *
loss_cls=dict(loss_weight=loss_cls_weight *
(num_classes / 80 * 3 / num_det_layers)),
loss_bbox=dict(loss_weight=0.05 * (3 / num_det_layers)),
loss_obj=dict(loss_weight=1.0 *
loss_bbox=dict(loss_weight=loss_bbox_weight * (3 / num_det_layers)),
loss_obj=dict(loss_weight=loss_obj_weight *
((img_scale[0] / 640)**2 * 3 / num_det_layers)),
obj_level_weights=[4.0, 1.0, 0.25, 0.06]))
obj_level_weights=obj_level_weights))
pre_transform = _base_.pre_transform
albu_train_transforms = _base_.albu_train_transforms
@ -44,7 +58,7 @@ train_pipeline = [
type='YOLOv5RandomAffine',
max_rotate_degree=0.0,
max_shear_degree=0.0,
scaling_ratio_range=(0.5, 1.5),
scaling_ratio_range=(1 - affine_scale, 1 + affine_scale),
# img_scale is (width, height)
border=(-img_scale[0] // 2, -img_scale[1] // 2),
border_val=(114, 114, 114)),
@ -88,3 +102,37 @@ val_dataloader = dict(
dataset=dict(pipeline=test_pipeline, batch_shapes_cfg=batch_shapes_cfg))
test_dataloader = val_dataloader
# Config for Test Time Augmentation. (TTA)
_multiscale_resize_transforms = [
dict(
type='Compose',
transforms=[
dict(type='YOLOv5KeepRatioResize', scale=s),
dict(
type='LetterResize',
scale=s,
allow_scale_up=False,
pad_val=dict(img=114))
]) for s in tta_img_scales
]
tta_pipeline = [
dict(type='LoadImageFromFile', file_client_args=_base_.file_client_args),
dict(
type='TestTimeAug',
transforms=[
_multiscale_resize_transforms,
[
dict(type='mmdet.RandomFlip', prob=1.),
dict(type='mmdet.RandomFlip', prob=0.)
], [dict(type='mmdet.LoadAnnotations', with_bbox=True)],
[
dict(
type='mmdet.PackDetInputs',
meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
'scale_factor', 'pad_param', 'flip',
'flip_direction'))
]
])
]

View File

@ -0,0 +1,70 @@
_base_ = 'yolov5_s-v61_fast_1xb12-40e_cat.py'
# This configuration is used to provide non-square training examples
# Must be a multiple of 32
img_scale = (608, 352) # w h
anchors = [
[(65, 35), (159, 45), (119, 80)], # P3/8
[(215, 77), (224, 116), (170, 166)], # P4/16
[(376, 108), (339, 176), (483, 190)] # P5/32
]
# ===============================Unmodified in most cases====================
_base_.model.bbox_head.loss_obj.loss_weight = 1.0 * ((img_scale[1] / 640)**2)
_base_.model.bbox_head.prior_generator.base_sizes = anchors
train_pipeline = [
*_base_.pre_transform,
dict(
type='Mosaic',
img_scale=img_scale,
pad_val=114.0,
pre_transform=_base_.pre_transform),
dict(
type='YOLOv5RandomAffine',
max_rotate_degree=0.0,
max_shear_degree=0.0,
scaling_ratio_range=(1 - _base_.affine_scale, 1 + _base_.affine_scale),
# img_scale is (width, height)
border=(-img_scale[0] // 2, -img_scale[1] // 2),
border_val=(114, 114, 114)),
dict(
type='mmdet.Albu',
transforms=_base_.albu_train_transforms,
bbox_params=dict(
type='BboxParams',
format='pascal_voc',
label_fields=['gt_bboxes_labels', 'gt_ignore_flags']),
keymap={
'img': 'image',
'gt_bboxes': 'bboxes'
}),
dict(type='YOLOv5HSVRandomAug'),
dict(type='mmdet.RandomFlip', prob=0.5),
dict(
type='mmdet.PackDetInputs',
meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 'flip',
'flip_direction'))
]
_base_.train_dataloader.dataset.pipeline = train_pipeline
test_pipeline = [
dict(type='LoadImageFromFile', file_client_args=_base_.file_client_args),
dict(type='YOLOv5KeepRatioResize', scale=img_scale),
dict(
type='LetterResize',
scale=img_scale,
allow_scale_up=False,
pad_val=dict(img=114)),
dict(type='mmdet.LoadAnnotations', with_bbox=True),
dict(
type='mmdet.PackDetInputs',
meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
'scale_factor', 'pad_param'))
]
val_dataloader = dict(
dataset=dict(pipeline=test_pipeline, batch_shapes_cfg=None))
test_dataloader = val_dataloader

View File

@ -0,0 +1,56 @@
_base_ = 'yolov5_s-v61_syncbn_fast_8xb16-300e_coco.py'
data_root = './data/cat/'
class_name = ('cat', )
num_classes = len(class_name)
metainfo = dict(classes=class_name, palette=[(20, 220, 60)])
anchors = [
[(68, 69), (154, 91), (143, 162)], # P3/8
[(242, 160), (189, 287), (391, 207)], # P4/16
[(353, 337), (539, 341), (443, 432)] # P5/32
]
max_epochs = 40
train_batch_size_per_gpu = 12
train_num_workers = 4
load_from = 'https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_s-v61_syncbn_fast_8xb16-300e_coco/yolov5_s-v61_syncbn_fast_8xb16-300e_coco_20220918_084700-86e02187.pth' # noqa
model = dict(
backbone=dict(frozen_stages=4),
bbox_head=dict(
head_module=dict(num_classes=num_classes),
prior_generator=dict(base_sizes=anchors)))
train_dataloader = dict(
batch_size=train_batch_size_per_gpu,
num_workers=train_num_workers,
dataset=dict(
data_root=data_root,
metainfo=metainfo,
ann_file='annotations/trainval.json',
data_prefix=dict(img='images/')))
val_dataloader = dict(
dataset=dict(
metainfo=metainfo,
data_root=data_root,
ann_file='annotations/test.json',
data_prefix=dict(img='images/')))
test_dataloader = val_dataloader
_base_.optim_wrapper.optimizer.batch_size_per_gpu = train_batch_size_per_gpu
val_evaluator = dict(ann_file=data_root + 'annotations/test.json')
test_evaluator = val_evaluator
default_hooks = dict(
checkpoint=dict(interval=10, max_keep_ckpts=2, save_best='auto'),
# The warmup_mim_iter parameter is critical.
# The default value is 1000 which is not suitable for cat datasets.
param_scheduler=dict(max_epochs=max_epochs, warmup_mim_iter=10),
logger=dict(type='LoggerHook', interval=5))
train_cfg = dict(max_epochs=max_epochs, val_interval=10)
# visualizer = dict(vis_backends = [dict(type='LocalVisBackend'), dict(type='WandbVisBackend')]) # noqa

View File

@ -1,9 +1,7 @@
_base_ = 'yolov5_s-v61_syncbn_8xb16-300e_coco.py'
test_pipeline = [
dict(
type='LoadImageFromFile',
file_client_args={{_base_.file_client_args}}),
dict(type='LoadImageFromFile', file_client_args=_base_.file_client_args),
dict(
type='LetterResize',
scale=_base_.img_scale,

View File

@ -1,47 +1,95 @@
_base_ = '../_base_/default_runtime.py'
_base_ = ['../_base_/default_runtime.py', '../_base_/det_p5_tta.py']
# dataset settings
data_root = 'data/coco/'
dataset_type = 'YOLOv5CocoDataset'
# ========================Frequently modified parameters======================
# -----data related-----
data_root = 'data/coco/' # Root path of data
# Path of train annotation file
train_ann_file = 'annotations/instances_train2017.json'
train_data_prefix = 'train2017/' # Prefix of train image path
# Path of val annotation file
val_ann_file = 'annotations/instances_val2017.json'
val_data_prefix = 'val2017/' # Prefix of val image path
# parameters that often need to be modified
num_classes = 80
img_scale = (640, 640) # width, height
deepen_factor = 0.33
widen_factor = 0.5
max_epochs = 300
save_epoch_intervals = 10
num_classes = 80 # Number of classes for classification
# Batch size of a single GPU during training
train_batch_size_per_gpu = 16
# Worker to pre-fetch data for each single GPU during training
train_num_workers = 8
val_batch_size_per_gpu = 1
val_num_workers = 2
# persistent_workers must be False if num_workers is 0.
# persistent_workers must be False if num_workers is 0
persistent_workers = True
# Base learning rate for optim_wrapper
base_lr = 0.01
# only on Val
batch_shapes_cfg = dict(
type='BatchShapePolicy',
batch_size=val_batch_size_per_gpu,
img_size=img_scale[0],
size_divisor=32,
extra_pad_ratio=0.5)
# -----model related-----
# Basic size of multi-scale prior box
anchors = [
[(10, 13), (16, 30), (33, 23)], # P3/8
[(30, 61), (62, 45), (59, 119)], # P4/16
[(116, 90), (156, 198), (373, 326)] # P5/32
]
strides = [8, 16, 32]
num_det_layers = 3
# single-scale training is recommended to
# -----train val related-----
# Base learning rate for optim_wrapper. Corresponding to 8xb16=128 bs
base_lr = 0.01
max_epochs = 300 # Maximum training epochs
model_test_cfg = dict(
# The config of multi-label for multi-class prediction.
multi_label=True,
# The number of boxes before NMS
nms_pre=30000,
score_thr=0.001, # Threshold to filter out boxes.
nms=dict(type='nms', iou_threshold=0.65), # NMS type and threshold
max_per_img=300) # Max number of detections of each image
# ========================Possible modified parameters========================
# -----data related-----
img_scale = (640, 640) # width, height
# Dataset type, this will be used to define the dataset
dataset_type = 'YOLOv5CocoDataset'
# Batch size of a single GPU during validation
val_batch_size_per_gpu = 1
# Worker to pre-fetch data for each single GPU during validation
val_num_workers = 2
# Config of batch shapes. Only on val.
# It means not used if batch_shapes_cfg is None.
batch_shapes_cfg = dict(
type='BatchShapePolicy',
batch_size=val_batch_size_per_gpu,
img_size=img_scale[0],
# The image scale of padding should be divided by pad_size_divisor
size_divisor=32,
# Additional paddings for pixel scale
extra_pad_ratio=0.5)
# -----model related-----
# The scaling factor that controls the depth of the network structure
deepen_factor = 0.33
# The scaling factor that controls the width of the network structure
widen_factor = 0.5
# Strides of multi-scale prior box
strides = [8, 16, 32]
num_det_layers = 3 # The number of model output scales
norm_cfg = dict(type='BN', momentum=0.03, eps=0.001) # Normalization config
# -----train val related-----
affine_scale = 0.5 # YOLOv5RandomAffine scaling ratio
loss_cls_weight = 0.5
loss_bbox_weight = 0.05
loss_obj_weight = 1.0
prior_match_thr = 4. # Priori box matching threshold
# The obj loss weights of the three output layers
obj_level_weights = [4., 1., 0.4]
lr_factor = 0.01 # Learning rate scaling factor
weight_decay = 0.0005
# Save model checkpoint and validation intervals
save_checkpoint_intervals = 10
# The maximum checkpoints to keep.
max_keep_ckpts = 3
# Single-scale training is recommended to
# be turned on, which can speed up training.
env_cfg = dict(cudnn_benchmark=True)
# ===============================Unmodified in most cases====================
model = dict(
type='YOLODetector',
data_preprocessor=dict(
@ -53,7 +101,7 @@ model = dict(
type='YOLOv5CSPDarknet',
deepen_factor=deepen_factor,
widen_factor=widen_factor,
norm_cfg=dict(type='BN', momentum=0.03, eps=0.001),
norm_cfg=norm_cfg,
act_cfg=dict(type='SiLU', inplace=True)),
neck=dict(
type='YOLOv5PAFPN',
@ -62,7 +110,7 @@ model = dict(
in_channels=[256, 512, 1024],
out_channels=[256, 512, 1024],
num_csp_blocks=3,
norm_cfg=dict(type='BN', momentum=0.03, eps=0.001),
norm_cfg=norm_cfg,
act_cfg=dict(type='SiLU', inplace=True)),
bbox_head=dict(
type='YOLOv5Head',
@ -82,28 +130,25 @@ model = dict(
type='mmdet.CrossEntropyLoss',
use_sigmoid=True,
reduction='mean',
loss_weight=0.5 * (num_classes / 80 * 3 / num_det_layers)),
loss_weight=loss_cls_weight *
(num_classes / 80 * 3 / num_det_layers)),
loss_bbox=dict(
type='IoULoss',
iou_mode='ciou',
bbox_format='xywh',
eps=1e-7,
reduction='mean',
loss_weight=0.05 * (3 / num_det_layers),
loss_weight=loss_bbox_weight * (3 / num_det_layers),
return_iou=True),
loss_obj=dict(
type='mmdet.CrossEntropyLoss',
use_sigmoid=True,
reduction='mean',
loss_weight=1.0 * ((img_scale[0] / 640)**2 * 3 / num_det_layers)),
prior_match_thr=4.,
obj_level_weights=[4., 1., 0.4]),
test_cfg=dict(
multi_label=True,
nms_pre=30000,
score_thr=0.001,
nms=dict(type='nms', iou_threshold=0.65),
max_per_img=300))
loss_weight=loss_obj_weight *
((img_scale[0] / 640)**2 * 3 / num_det_layers)),
prior_match_thr=prior_match_thr,
obj_level_weights=obj_level_weights),
test_cfg=model_test_cfg)
albu_train_transforms = [
dict(type='Blur', p=0.01),
@ -128,7 +173,7 @@ train_pipeline = [
type='YOLOv5RandomAffine',
max_rotate_degree=0.0,
max_shear_degree=0.0,
scaling_ratio_range=(0.5, 1.5),
scaling_ratio_range=(1 - affine_scale, 1 + affine_scale),
# img_scale is (width, height)
border=(-img_scale[0] // 2, -img_scale[1] // 2),
border_val=(114, 114, 114)),
@ -160,8 +205,8 @@ train_dataloader = dict(
dataset=dict(
type=dataset_type,
data_root=data_root,
ann_file='annotations/instances_train2017.json',
data_prefix=dict(img='train2017/'),
ann_file=train_ann_file,
data_prefix=dict(img=train_data_prefix),
filter_cfg=dict(filter_empty_gt=False, min_size=32),
pipeline=train_pipeline))
@ -191,8 +236,8 @@ val_dataloader = dict(
type=dataset_type,
data_root=data_root,
test_mode=True,
data_prefix=dict(img='val2017/'),
ann_file='annotations/instances_val2017.json',
data_prefix=dict(img=val_data_prefix),
ann_file=val_ann_file,
pipeline=test_pipeline,
batch_shapes_cfg=batch_shapes_cfg))
@ -205,7 +250,7 @@ optim_wrapper = dict(
type='SGD',
lr=base_lr,
momentum=0.937,
weight_decay=0.0005,
weight_decay=weight_decay,
nesterov=True,
batch_size_per_gpu=train_batch_size_per_gpu),
constructor='YOLOv5OptimizerConstructor')
@ -214,13 +259,13 @@ default_hooks = dict(
param_scheduler=dict(
type='YOLOv5ParamSchedulerHook',
scheduler_type='linear',
lr_factor=0.01,
lr_factor=lr_factor,
max_epochs=max_epochs),
checkpoint=dict(
type='CheckpointHook',
interval=save_epoch_intervals,
interval=save_checkpoint_intervals,
save_best='auto',
max_keep_ckpts=3))
max_keep_ckpts=max_keep_ckpts))
custom_hooks = [
dict(
@ -235,13 +280,13 @@ custom_hooks = [
val_evaluator = dict(
type='mmdet.CocoMetric',
proposal_nums=(100, 1, 10),
ann_file=data_root + 'annotations/instances_val2017.json',
ann_file=data_root + val_ann_file,
metric='bbox')
test_evaluator = val_evaluator
train_cfg = dict(
type='EpochBasedTrainLoop',
max_epochs=max_epochs,
val_interval=save_epoch_intervals)
val_interval=save_checkpoint_intervals)
val_cfg = dict(type='ValLoop')
test_cfg = dict(type='TestLoop')

View File

@ -1,39 +1,42 @@
_base_ = './yolov5_s-v61_syncbn_fast_8xb16-300e_coco.py'
# ========================modified parameters======================
data_root = 'data/balloon/'
train_batch_size_per_gpu = 4
train_num_workers = 2
# Path of train annotation file
train_ann_file = 'train.json'
train_data_prefix = 'train/' # Prefix of train image path
# Path of val annotation file
val_ann_file = 'val.json'
val_data_prefix = 'val/' # Prefix of val image path
metainfo = {
'classes': ('balloon', ),
'palette': [
(220, 20, 60),
]
}
num_classes = 1
train_batch_size_per_gpu = 4
train_num_workers = 2
log_interval = 1
# =======================Unmodified in most cases==================
train_dataloader = dict(
batch_size=train_batch_size_per_gpu,
num_workers=train_num_workers,
dataset=dict(
data_root=data_root,
metainfo=metainfo,
data_prefix=dict(img='train/'),
ann_file='train.json'))
data_prefix=dict(img=train_data_prefix),
ann_file=train_ann_file))
val_dataloader = dict(
dataset=dict(
data_root=data_root,
metainfo=metainfo,
data_prefix=dict(img='val/'),
ann_file='val.json'))
data_prefix=dict(img=val_data_prefix),
ann_file=val_ann_file))
test_dataloader = val_dataloader
val_evaluator = dict(ann_file=data_root + 'val.json')
val_evaluator = dict(ann_file=data_root + val_ann_file)
test_evaluator = val_evaluator
model = dict(bbox_head=dict(head_module=dict(num_classes=1)))
default_hooks = dict(logger=dict(interval=1))
model = dict(bbox_head=dict(head_module=dict(num_classes=num_classes)))
default_hooks = dict(logger=dict(interval=log_interval))

View File

@ -31,7 +31,7 @@ YOLOv6-l model structure
| YOLOv6-n | P5 | 640 | 400 | Yes | Yes | 6.04 | 36.2 | [config](../yolov6/yolov6_n_syncbn_fast_8xb32-400e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov6/yolov6_n_syncbn_fast_8xb32-400e_coco/yolov6_n_syncbn_fast_8xb32-400e_coco_20221030_202726-d99b2e82.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov6/yolov6_n_syncbn_fast_8xb32-400e_coco/yolov6_n_syncbn_fast_8xb32-400e_coco_20221030_202726.log.json) |
| YOLOv6-t | P5 | 640 | 400 | Yes | Yes | 8.13 | 41.0 | [config](../yolov6/yolov6_t_syncbn_fast_8xb32-400e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov6/yolov6_t_syncbn_fast_8xb32-400e_coco/yolov6_t_syncbn_fast_8xb32-400e_coco_20221030_143755-cf0d278f.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov6/yolov6_t_syncbn_fast_8xb32-400e_coco/yolov6_t_syncbn_fast_8xb32-400e_coco_20221030_143755.log.json) |
| YOLOv6-s | P5 | 640 | 400 | Yes | Yes | 8.88 | 44.0 | [config](../yolov6/yolov6_s_syncbn_fast_8xb32-400e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov6/yolov6_s_syncbn_fast_8xb32-400e_coco/yolov6_s_syncbn_fast_8xb32-400e_coco_20221102_203035-932e1d91.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov6/yolov6_s_syncbn_fast_8xb32-400e_coco/yolov6_s_syncbn_fast_8xb32-400e_coco_20221102_203035.log.json) |
| YOLOv6-m | P5 | 640 | 300 | Yes | Yes | 16.69 | 48.4 | [config](../yolov6/yolov6_m_syncbn_fast_8xb32-400e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov6/yolov6_m_syncbn_fast_8xb32-300e_coco/yolov6_m_syncbn_fast_8xb32-300e_coco_20221109_182658-85bda3f4.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov6/yolov6_m_syncbn_fast_8xb32-300e_coco/yolov6_m_syncbn_fast_8xb32-300e_coco_20221109_182658.log.json) |
| YOLOv6-m | P5 | 640 | 300 | Yes | Yes | 16.69 | 48.4 | [config](../yolov6/yolov6_m_syncbn_fast_8xb32-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov6/yolov6_m_syncbn_fast_8xb32-300e_coco/yolov6_m_syncbn_fast_8xb32-300e_coco_20221109_182658-85bda3f4.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov6/yolov6_m_syncbn_fast_8xb32-300e_coco/yolov6_m_syncbn_fast_8xb32-300e_coco_20221109_182658.log.json) |
| YOLOv6-l | P5 | 640 | 300 | Yes | Yes | 20.86 | 51.0 | [config](../yolov6/yolov6_l_syncbn_fast_8xb32-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov6/yolov6_l_syncbn_fast_8xb32-300e_coco/yolov6_l_syncbn_fast_8xb32-300e_coco_20221109_183156-91e3c447.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov6/yolov6_l_syncbn_fast_8xb32-300e_coco/yolov6_l_syncbn_fast_8xb32-300e_coco_20221109_183156.log.json) |
**Note**:

View File

@ -1,8 +1,13 @@
_base_ = './yolov6_m_syncbn_fast_8xb32-300e_coco.py'
# ======================= Possible modified parameters =======================
# -----model related-----
# The scaling factor that controls the depth of the network structure
deepen_factor = 1
# The scaling factor that controls the width of the network structure
widen_factor = 1
# ============================== Unmodified in most cases ===================
model = dict(
backbone=dict(
deepen_factor=deepen_factor,

View File

@ -1,9 +1,16 @@
_base_ = './yolov6_s_syncbn_fast_8xb32-300e_coco.py'
# ======================= Possible modified parameters =======================
# -----model related-----
# The scaling factor that controls the depth of the network structure
deepen_factor = 0.6
# The scaling factor that controls the width of the network structure
widen_factor = 0.75
affine_scale = 0.9
# -----train val related-----
affine_scale = 0.9 # YOLOv5RandomAffine scaling ratio
# ============================== Unmodified in most cases ===================
model = dict(
backbone=dict(
type='YOLOv6CSPBep',

View File

@ -1,8 +1,16 @@
_base_ = './yolov6_s_syncbn_fast_8xb32-300e_coco.py'
# ======================= Possible modified parameters =======================
# -----model related-----
# The scaling factor that controls the depth of the network structure
deepen_factor = 0.33
# The scaling factor that controls the width of the network structure
widen_factor = 0.25
# -----train val related-----
lr_factor = 0.02 # Learning rate scaling factor
# ============================== Unmodified in most cases ===================
model = dict(
backbone=dict(deepen_factor=deepen_factor, widen_factor=widen_factor),
neck=dict(deepen_factor=deepen_factor, widen_factor=widen_factor),
@ -10,4 +18,4 @@ model = dict(
head_module=dict(widen_factor=widen_factor),
loss_bbox=dict(iou_mode='siou')))
default_hooks = dict(param_scheduler=dict(lr_factor=0.02))
default_hooks = dict(param_scheduler=dict(lr_factor=lr_factor))

View File

@ -1,8 +1,16 @@
_base_ = './yolov6_s_syncbn_fast_8xb32-400e_coco.py'
# ======================= Possible modified parameters =======================
# -----model related-----
# The scaling factor that controls the depth of the network structure
deepen_factor = 0.33
# The scaling factor that controls the width of the network structure
widen_factor = 0.25
# -----train val related-----
lr_factor = 0.02 # Learning rate scaling factor
# ============================== Unmodified in most cases ===================
model = dict(
backbone=dict(deepen_factor=deepen_factor, widen_factor=widen_factor),
neck=dict(deepen_factor=deepen_factor, widen_factor=widen_factor),
@ -10,4 +18,4 @@ model = dict(
head_module=dict(widen_factor=widen_factor),
loss_bbox=dict(iou_mode='siou')))
default_hooks = dict(param_scheduler=dict(lr_factor=0.02))
default_hooks = dict(param_scheduler=dict(lr_factor=lr_factor))

View File

@ -0,0 +1,56 @@
_base_ = './yolov6_s_syncbn_fast_8xb32-400e_coco.py'
data_root = './data/cat/'
class_name = ('cat', )
num_classes = len(class_name)
metainfo = dict(classes=class_name, palette=[(20, 220, 60)])
max_epochs = 40
train_batch_size_per_gpu = 12
train_num_workers = 4
num_last_epochs = 5
load_from = 'https://download.openmmlab.com/mmyolo/v0/yolov6/yolov6_s_syncbn_fast_8xb32-400e_coco/yolov6_s_syncbn_fast_8xb32-400e_coco_20221102_203035-932e1d91.pth' # noqa
model = dict(
backbone=dict(frozen_stages=4),
bbox_head=dict(head_module=dict(num_classes=num_classes)),
train_cfg=dict(
initial_assigner=dict(num_classes=num_classes),
assigner=dict(num_classes=num_classes)))
train_dataloader = dict(
batch_size=train_batch_size_per_gpu,
num_workers=train_num_workers,
dataset=dict(
data_root=data_root,
metainfo=metainfo,
ann_file='annotations/trainval.json',
data_prefix=dict(img='images/')))
val_dataloader = dict(
dataset=dict(
metainfo=metainfo,
data_root=data_root,
ann_file='annotations/test.json',
data_prefix=dict(img='images/')))
test_dataloader = val_dataloader
val_evaluator = dict(ann_file=data_root + 'annotations/test.json')
test_evaluator = val_evaluator
_base_.optim_wrapper.optimizer.batch_size_per_gpu = train_batch_size_per_gpu
_base_.custom_hooks[1].switch_epoch = max_epochs - num_last_epochs
default_hooks = dict(
checkpoint=dict(interval=10, max_keep_ckpts=2, save_best='auto'),
# The warmup_mim_iter parameter is critical.
# The default value is 1000 which is not suitable for cat datasets.
param_scheduler=dict(max_epochs=max_epochs, warmup_mim_iter=10),
logger=dict(type='LoggerHook', interval=5))
train_cfg = dict(
max_epochs=max_epochs,
val_interval=10,
dynamic_intervals=[(max_epochs - num_last_epochs, 1)])
# visualizer = dict(vis_backends = [dict(type='LocalVisBackend'), dict(type='WandbVisBackend')]) # noqa

View File

@ -1,8 +1,12 @@
_base_ = './yolov6_s_syncbn_fast_8xb32-400e_coco.py'
max_epochs = 300
num_last_epochs = 15
# ======================= Frequently modified parameters =====================
# -----train val related-----
# Base learning rate for optim_wrapper
max_epochs = 300 # Maximum training epochs
num_last_epochs = 15 # Last epoch number to switch training pipeline
# ============================== Unmodified in most cases ===================
default_hooks = dict(
param_scheduler=dict(
type='YOLOv5ParamSchedulerHook',

View File

@ -1,31 +1,41 @@
_base_ = '../_base_/default_runtime.py'
_base_ = ['../_base_/default_runtime.py', '../_base_/det_p5_tta.py']
# dataset settings
data_root = 'data/coco/'
dataset_type = 'YOLOv5CocoDataset'
# ======================= Frequently modified parameters =====================
# -----data related-----
data_root = 'data/coco/' # Root path of data
# Path of train annotation file
train_ann_file = 'annotations/instances_train2017.json'
train_data_prefix = 'train2017/' # Prefix of train image path
# Path of val annotation file
val_ann_file = 'annotations/instances_val2017.json'
val_data_prefix = 'val2017/' # Prefix of val image path
num_last_epochs = 15
max_epochs = 400
num_classes = 80
# parameters that often need to be modified
img_scale = (640, 640) # width, height
deepen_factor = 0.33
widen_factor = 0.5
affine_scale = 0.5
save_epoch_intervals = 10
num_classes = 80 # Number of classes for classification
# Batch size of a single GPU during training
train_batch_size_per_gpu = 32
# Worker to pre-fetch data for each single GPU during training
train_num_workers = 8
val_batch_size_per_gpu = 1
val_num_workers = 2
# persistent_workers must be False if num_workers is 0.
# persistent_workers must be False if num_workers is 0
persistent_workers = True
# -----train val related-----
# Base learning rate for optim_wrapper
base_lr = 0.01
max_epochs = 400 # Maximum training epochs
num_last_epochs = 15 # Last epoch number to switch training pipeline
# only on Val
# ======================= Possible modified parameters =======================
# -----data related-----
img_scale = (640, 640) # width, height
# Dataset type, this will be used to define the dataset
dataset_type = 'YOLOv5CocoDataset'
# Batch size of a single GPU during validation
val_batch_size_per_gpu = 1
# Worker to pre-fetch data for each single GPU during validation
val_num_workers = 2
# Config of batch shapes. Only on val.
# It means not used if batch_shapes_cfg is None.
batch_shapes_cfg = dict(
type='BatchShapePolicy',
batch_size=val_batch_size_per_gpu,
@ -33,10 +43,25 @@ batch_shapes_cfg = dict(
size_divisor=32,
extra_pad_ratio=0.5)
# single-scale training is recommended to
# -----model related-----
# The scaling factor that controls the depth of the network structure
deepen_factor = 0.33
# The scaling factor that controls the width of the network structure
widen_factor = 0.5
# -----train val related-----
affine_scale = 0.5 # YOLOv5RandomAffine scaling ratio
lr_factor = 0.01 # Learning rate scaling factor
weight_decay = 0.0005
# Save model checkpoint and validation intervals
save_epoch_intervals = 10
# The maximum checkpoints to keep.
max_keep_ckpts = 3
# Single-scale training is recommended to
# be turned on, which can speed up training.
env_cfg = dict(cudnn_benchmark=True)
# ============================== Unmodified in most cases ===================
model = dict(
type='YOLODetector',
data_preprocessor=dict(
@ -162,8 +187,8 @@ train_dataloader = dict(
dataset=dict(
type=dataset_type,
data_root=data_root,
ann_file='annotations/instances_train2017.json',
data_prefix=dict(img='train2017/'),
ann_file=train_ann_file,
data_prefix=dict(img=train_data_prefix),
filter_cfg=dict(filter_empty_gt=False, min_size=32),
pipeline=train_pipeline))
@ -193,8 +218,8 @@ val_dataloader = dict(
type=dataset_type,
data_root=data_root,
test_mode=True,
data_prefix=dict(img='val2017/'),
ann_file='annotations/instances_val2017.json',
data_prefix=dict(img=val_data_prefix),
ann_file=val_ann_file,
pipeline=test_pipeline,
batch_shapes_cfg=batch_shapes_cfg))
@ -208,7 +233,7 @@ optim_wrapper = dict(
type='SGD',
lr=base_lr,
momentum=0.937,
weight_decay=0.0005,
weight_decay=weight_decay,
nesterov=True,
batch_size_per_gpu=train_batch_size_per_gpu),
constructor='YOLOv5OptimizerConstructor')
@ -217,12 +242,12 @@ default_hooks = dict(
param_scheduler=dict(
type='YOLOv5ParamSchedulerHook',
scheduler_type='cosine',
lr_factor=0.01,
lr_factor=lr_factor,
max_epochs=max_epochs),
checkpoint=dict(
type='CheckpointHook',
interval=save_epoch_intervals,
max_keep_ckpts=3,
max_keep_ckpts=max_keep_ckpts,
save_best='auto'))
custom_hooks = [
@ -242,7 +267,7 @@ custom_hooks = [
val_evaluator = dict(
type='mmdet.CocoMetric',
proposal_nums=(100, 1, 10),
ann_file=data_root + 'annotations/instances_val2017.json',
ann_file=data_root + val_ann_file,
metric='bbox')
test_evaluator = val_evaluator

View File

@ -1,8 +1,13 @@
_base_ = './yolov6_s_syncbn_fast_8xb32-300e_coco.py'
# ======================= Possible modified parameters =======================
# -----model related-----
# The scaling factor that controls the depth of the network structure
deepen_factor = 0.33
# The scaling factor that controls the width of the network structure
widen_factor = 0.375
# ============================== Unmodified in most cases ===================
model = dict(
backbone=dict(deepen_factor=deepen_factor, widen_factor=widen_factor),
neck=dict(deepen_factor=deepen_factor, widen_factor=widen_factor),

View File

@ -1,8 +1,13 @@
_base_ = './yolov6_s_syncbn_fast_8xb32-400e_coco.py'
# ======================= Possible modified parameters =======================
# -----model related-----
# The scaling factor that controls the depth of the network structure
deepen_factor = 0.33
# The scaling factor that controls the width of the network structure
widen_factor = 0.375
# ============================== Unmodified in most cases ===================
model = dict(
backbone=dict(deepen_factor=deepen_factor, widen_factor=widen_factor),
neck=dict(deepen_factor=deepen_factor, widen_factor=widen_factor),

View File

@ -12,17 +12,22 @@ YOLOv7 surpasses all known object detectors in both speed and accuracy in the ra
<img src="https://user-images.githubusercontent.com/17425982/204231759-cc5c77a9-38c6-4a41-85be-eb97e4b2bcbb.png"/>
</div>
<div align=center>
<img alt="YOLOv7-l" src="https://user-images.githubusercontent.com/68552295/216335336-963bd03a-71f3-4556-97af-18b20d69e065.png" width = 95.5%/>
YOLOv7-l-P5 model structure
</div>
## Results and models
### COCO
| Backbone | Arch | Size | SyncBN | AMP | Mem (GB) | Box AP | Config | Download |
| :---------: | :--: | :--: | :----: | :-: | :------: | :----: | :------------------------------------------------------------------------------------------------------------------: | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
| YOLOv7-tiny | P5 | 640 | Yes | Yes | 2.7 | 37.5 | [config](https://github.com/open-mmlab/mmyolo/tree/master/configs/yolov7/yolov7_tiny_syncbn_fast_8x16b-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov7/yolov7_tiny_syncbn_fast_8x16b-300e_coco/yolov7_tiny_syncbn_fast_8x16b-300e_coco_20221126_102719-0ee5bbdf.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov7/yolov7_tiny_syncbn_fast_8x16b-300e_coco/yolov7_tiny_syncbn_fast_8x16b-300e_coco_20221126_102719.log.json) |
| YOLOv7-l | P5 | 640 | Yes | Yes | 10.3 | 50.9 | [config](https://github.com/open-mmlab/mmyolo/tree/master/configs/yolov7/yolov7_l_syncbn_fast_8x16b-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov7/yolov7_l_syncbn_fast_8x16b-300e_coco/yolov7_l_syncbn_fast_8x16b-300e_coco_20221123_023601-8113c0eb.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov7/yolov7_l_syncbn_fast_8x16b-300e_coco/yolov7_l_syncbn_fast_8x16b-300e_coco_20221123_023601.log.json) |
| YOLOv7-x | P5 | 640 | Yes | Yes | 13.7 | 52.8 | [config](https://github.com/open-mmlab/mmyolo/tree/master/configs/yolov7/yolov7_x_syncbn_fast_8x16b-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov7/yolov7_x_syncbn_fast_8x16b-300e_coco/yolov7_x_syncbn_fast_8x16b-300e_coco_20221124_215331-ef949a68.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov7/yolov7_x_syncbn_fast_8x16b-300e_coco/yolov7_x_syncbn_fast_8x16b-300e_coco_20221124_215331.log.json) |
| YOLOv7-w | P6 | 1280 | Yes | Yes | 27.0 | 54.1 | [config](https://github.com/open-mmlab/mmyolo/tree/master/configs/yolov7/yolov7_w-p6_syncbn_fast_8x16b-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov7/yolov7_w-p6_syncbn_fast_8x16b-300e_coco/yolov7_w-p6_syncbn_fast_8x16b-300e_coco_20221123_053031-a68ef9d2.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov7/yolov7_w-p6_syncbn_fast_8x16b-300e_coco/yolov7_w-p6_syncbn_fast_8x16b-300e_coco_20221123_053031.log.json) |
| YOLOv7-e | P6 | 1280 | Yes | Yes | 42.5 | 55.1 | [config](https://github.com/open-mmlab/mmyolo/tree/master/configs/yolov7/yolov7_e-p6_syncbn_fast_8x16b-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov7/yolov7_e-p6_syncbn_fast_8x16b-300e_coco/yolov7_e-p6_syncbn_fast_8x16b-300e_coco_20221126_102636-34425033.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov7/yolov7_e-p6_syncbn_fast_8x16b-300e_coco/yolov7_e-p6_syncbn_fast_8x16b-300e_coco_20221126_102636.log.json) |
| Backbone | Arch | Size | SyncBN | AMP | Mem (GB) | Box AP | Config | Download |
| :---------: | :--: | :--: | :----: | :-: | :------: | :----: | :----------------------------------------------------------------------------------------------------------------: | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
| YOLOv7-tiny | P5 | 640 | Yes | Yes | 2.7 | 37.5 | [config](https://github.com/open-mmlab/mmyolo/tree/main/configs/yolov7/yolov7_tiny_syncbn_fast_8x16b-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov7/yolov7_tiny_syncbn_fast_8x16b-300e_coco/yolov7_tiny_syncbn_fast_8x16b-300e_coco_20221126_102719-0ee5bbdf.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov7/yolov7_tiny_syncbn_fast_8x16b-300e_coco/yolov7_tiny_syncbn_fast_8x16b-300e_coco_20221126_102719.log.json) |
| YOLOv7-l | P5 | 640 | Yes | Yes | 10.3 | 50.9 | [config](https://github.com/open-mmlab/mmyolo/tree/main/configs/yolov7/yolov7_l_syncbn_fast_8x16b-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov7/yolov7_l_syncbn_fast_8x16b-300e_coco/yolov7_l_syncbn_fast_8x16b-300e_coco_20221123_023601-8113c0eb.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov7/yolov7_l_syncbn_fast_8x16b-300e_coco/yolov7_l_syncbn_fast_8x16b-300e_coco_20221123_023601.log.json) |
| YOLOv7-x | P5 | 640 | Yes | Yes | 13.7 | 52.8 | [config](https://github.com/open-mmlab/mmyolo/tree/main/configs/yolov7/yolov7_x_syncbn_fast_8x16b-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov7/yolov7_x_syncbn_fast_8x16b-300e_coco/yolov7_x_syncbn_fast_8x16b-300e_coco_20221124_215331-ef949a68.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov7/yolov7_x_syncbn_fast_8x16b-300e_coco/yolov7_x_syncbn_fast_8x16b-300e_coco_20221124_215331.log.json) |
| YOLOv7-w | P6 | 1280 | Yes | Yes | 27.0 | 54.1 | [config](https://github.com/open-mmlab/mmyolo/tree/main/configs/yolov7/yolov7_w-p6_syncbn_fast_8x16b-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov7/yolov7_w-p6_syncbn_fast_8x16b-300e_coco/yolov7_w-p6_syncbn_fast_8x16b-300e_coco_20221123_053031-a68ef9d2.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov7/yolov7_w-p6_syncbn_fast_8x16b-300e_coco/yolov7_w-p6_syncbn_fast_8x16b-300e_coco_20221123_053031.log.json) |
| YOLOv7-e | P6 | 1280 | Yes | Yes | 42.5 | 55.1 | [config](https://github.com/open-mmlab/mmyolo/tree/main/configs/yolov7/yolov7_e-p6_syncbn_fast_8x16b-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov7/yolov7_e-p6_syncbn_fast_8x16b-300e_coco/yolov7_e-p6_syncbn_fast_8x16b-300e_coco_20221126_102636-34425033.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov7/yolov7_e-p6_syncbn_fast_8x16b-300e_coco/yolov7_e-p6_syncbn_fast_8x16b-300e_coco_20221126_102636.log.json) |
**Note**:
In the official YOLOv7 code, the `random_perspective` data augmentation in COCO object detection task training uses mask annotation information, which leads to higher performance. Object detection should not use mask annotation, so only box annotation information is used in `MMYOLO`. We will use the mask annotation information in the instance segmentation task.

View File

@ -1,42 +1,103 @@
_base_ = '../_base_/default_runtime.py'
_base_ = ['../_base_/default_runtime.py', '../_base_/det_p5_tta.py']
# dataset settings
data_root = 'data/coco/'
dataset_type = 'YOLOv5CocoDataset'
# ========================Frequently modified parameters======================
# -----data related-----
data_root = 'data/coco/' # Root path of data
# Path of train annotation file
train_ann_file = 'annotations/instances_train2017.json'
train_data_prefix = 'train2017/' # Prefix of train image path
# Path of val annotation file
val_ann_file = 'annotations/instances_val2017.json'
val_data_prefix = 'val2017/' # Prefix of val image path
# parameters that often need to be modified
img_scale = (640, 640) # width, height
max_epochs = 300
save_epoch_intervals = 10
num_classes = 80 # Number of classes for classification
# Batch size of a single GPU during training
train_batch_size_per_gpu = 16
# Worker to pre-fetch data for each single GPU during training
train_num_workers = 8
# persistent_workers must be False if num_workers is 0.
# persistent_workers must be False if num_workers is 0
persistent_workers = True
val_batch_size_per_gpu = 1
val_num_workers = 2
# only on Val
batch_shapes_cfg = dict(
type='BatchShapePolicy',
batch_size=val_batch_size_per_gpu,
img_size=img_scale[0],
size_divisor=32,
extra_pad_ratio=0.5)
# different from yolov5
# -----model related-----
# Basic size of multi-scale prior box
anchors = [
[(12, 16), (19, 36), (40, 28)], # P3/8
[(36, 75), (76, 55), (72, 146)], # P4/16
[(142, 110), (192, 243), (459, 401)] # P5/32
]
strides = [8, 16, 32]
num_det_layers = 3
num_classes = 80
# -----train val related-----
# Base learning rate for optim_wrapper. Corresponding to 8xb16=128 bs
base_lr = 0.01
max_epochs = 300 # Maximum training epochs
# single-scale training is recommended to
num_epoch_stage2 = 30 # The last 30 epochs switch evaluation interval
val_interval_stage2 = 1 # Evaluation interval
model_test_cfg = dict(
# The config of multi-label for multi-class prediction.
multi_label=True,
# The number of boxes before NMS.
nms_pre=30000,
score_thr=0.001, # Threshold to filter out boxes.
nms=dict(type='nms', iou_threshold=0.65), # NMS type and threshold
max_per_img=300) # Max number of detections of each image
# ========================Possible modified parameters========================
# -----data related-----
img_scale = (640, 640) # width, height
# Dataset type, this will be used to define the dataset
dataset_type = 'YOLOv5CocoDataset'
# Batch size of a single GPU during validation
val_batch_size_per_gpu = 1
# Worker to pre-fetch data for each single GPU during validation
val_num_workers = 2
# Config of batch shapes. Only on val.
# It means not used if batch_shapes_cfg is None.
batch_shapes_cfg = dict(
type='BatchShapePolicy',
batch_size=val_batch_size_per_gpu,
img_size=img_scale[0],
# The image scale of padding should be divided by pad_size_divisor
size_divisor=32,
# Additional paddings for pixel scale
extra_pad_ratio=0.5)
# -----model related-----
strides = [8, 16, 32] # Strides of multi-scale prior box
num_det_layers = 3 # The number of model output scales
norm_cfg = dict(type='BN', momentum=0.03, eps=0.001)
# Data augmentation
max_translate_ratio = 0.2 # YOLOv5RandomAffine
scaling_ratio_range = (0.1, 2.0) # YOLOv5RandomAffine
mixup_prob = 0.15 # YOLOv5MixUp
randchoice_mosaic_prob = [0.8, 0.2]
mixup_alpha = 8.0 # YOLOv5MixUp
mixup_beta = 8.0 # YOLOv5MixUp
# -----train val related-----
loss_cls_weight = 0.3
loss_bbox_weight = 0.05
loss_obj_weight = 0.7
# BatchYOLOv7Assigner params
simota_candidate_topk = 10
simota_iou_weight = 3.0
simota_cls_weight = 1.0
prior_match_thr = 4. # Priori box matching threshold
obj_level_weights = [4., 1.,
0.4] # The obj loss weights of the three output layers
lr_factor = 0.1 # Learning rate scaling factor
weight_decay = 0.0005
save_epoch_intervals = 1 # Save model checkpoint and validation intervals
max_keep_ckpts = 3 # The maximum checkpoints to keep.
# Single-scale training is recommended to
# be turned on, which can speed up training.
env_cfg = dict(cudnn_benchmark=True)
# ===============================Unmodified in most cases====================
model = dict(
type='YOLODetector',
data_preprocessor=dict(
@ -47,7 +108,7 @@ model = dict(
backbone=dict(
type='YOLOv7Backbone',
arch='L',
norm_cfg=dict(type='BN', momentum=0.03, eps=0.001),
norm_cfg=norm_cfg,
act_cfg=dict(type='SiLU', inplace=True)),
neck=dict(
type='YOLOv7PAFPN',
@ -61,7 +122,7 @@ model = dict(
in_channels=[512, 1024, 1024],
# The real output channel will be multiplied by 2
out_channels=[128, 256, 512],
norm_cfg=dict(type='BN', momentum=0.03, eps=0.001),
norm_cfg=norm_cfg,
act_cfg=dict(type='SiLU', inplace=True)),
bbox_head=dict(
type='YOLOv7Head',
@ -80,31 +141,28 @@ model = dict(
type='mmdet.CrossEntropyLoss',
use_sigmoid=True,
reduction='mean',
loss_weight=0.3 * (num_classes / 80 * 3 / num_det_layers)),
loss_weight=loss_cls_weight *
(num_classes / 80 * 3 / num_det_layers)),
loss_bbox=dict(
type='IoULoss',
iou_mode='ciou',
bbox_format='xywh',
reduction='mean',
loss_weight=0.05 * (3 / num_det_layers),
loss_weight=loss_bbox_weight * (3 / num_det_layers),
return_iou=True),
loss_obj=dict(
type='mmdet.CrossEntropyLoss',
use_sigmoid=True,
reduction='mean',
loss_weight=0.7 * ((img_scale[0] / 640)**2 * 3 / num_det_layers)),
obj_level_weights=[4., 1., 0.4],
loss_weight=loss_obj_weight *
((img_scale[0] / 640)**2 * 3 / num_det_layers)),
prior_match_thr=prior_match_thr,
obj_level_weights=obj_level_weights,
# BatchYOLOv7Assigner params
prior_match_thr=4.,
simota_candidate_topk=10,
simota_iou_weight=3.0,
simota_cls_weight=1.0),
test_cfg=dict(
multi_label=True,
nms_pre=30000,
score_thr=0.001,
nms=dict(type='nms', iou_threshold=0.65),
max_per_img=300))
simota_candidate_topk=simota_candidate_topk,
simota_iou_weight=simota_iou_weight,
simota_cls_weight=simota_cls_weight),
test_cfg=model_test_cfg)
pre_transform = [
dict(type='LoadImageFromFile', file_client_args=_base_.file_client_args),
@ -121,8 +179,8 @@ mosiac4_pipeline = [
type='YOLOv5RandomAffine',
max_rotate_degree=0.0,
max_shear_degree=0.0,
max_translate_ratio=0.2, # note
scaling_ratio_range=(0.1, 2.0), # note
max_translate_ratio=max_translate_ratio, # note
scaling_ratio_range=scaling_ratio_range, # note
# img_scale is (width, height)
border=(-img_scale[0] // 2, -img_scale[1] // 2),
border_val=(114, 114, 114)),
@ -138,8 +196,8 @@ mosiac9_pipeline = [
type='YOLOv5RandomAffine',
max_rotate_degree=0.0,
max_shear_degree=0.0,
max_translate_ratio=0.2, # note
scaling_ratio_range=(0.1, 2.0), # note
max_translate_ratio=max_translate_ratio, # note
scaling_ratio_range=scaling_ratio_range, # note
# img_scale is (width, height)
border=(-img_scale[0] // 2, -img_scale[1] // 2),
border_val=(114, 114, 114)),
@ -148,16 +206,16 @@ mosiac9_pipeline = [
randchoice_mosaic_pipeline = dict(
type='RandomChoice',
transforms=[mosiac4_pipeline, mosiac9_pipeline],
prob=[0.8, 0.2])
prob=randchoice_mosaic_prob)
train_pipeline = [
*pre_transform,
randchoice_mosaic_pipeline,
dict(
type='YOLOv5MixUp',
alpha=8.0, # note
beta=8.0, # note
prob=0.15,
alpha=mixup_alpha, # note
beta=mixup_beta, # note
prob=mixup_prob,
pre_transform=[*pre_transform, randchoice_mosaic_pipeline]),
dict(type='YOLOv5HSVRandomAug'),
dict(type='mmdet.RandomFlip', prob=0.5),
@ -177,8 +235,8 @@ train_dataloader = dict(
dataset=dict(
type=dataset_type,
data_root=data_root,
ann_file='annotations/instances_train2017.json',
data_prefix=dict(img='train2017/'),
ann_file=train_ann_file,
data_prefix=dict(img=train_data_prefix),
filter_cfg=dict(filter_empty_gt=False, min_size=32),
pipeline=train_pipeline))
@ -208,8 +266,8 @@ val_dataloader = dict(
type=dataset_type,
data_root=data_root,
test_mode=True,
data_prefix=dict(img='val2017/'),
ann_file='annotations/instances_val2017.json',
data_prefix=dict(img=val_data_prefix),
ann_file=val_ann_file,
pipeline=test_pipeline,
batch_shapes_cfg=batch_shapes_cfg))
@ -220,9 +278,9 @@ optim_wrapper = dict(
type='OptimWrapper',
optimizer=dict(
type='SGD',
lr=0.01,
lr=base_lr,
momentum=0.937,
weight_decay=0.0005,
weight_decay=weight_decay,
nesterov=True,
batch_size_per_gpu=train_batch_size_per_gpu),
constructor='YOLOv7OptimWrapperConstructor')
@ -231,27 +289,14 @@ default_hooks = dict(
param_scheduler=dict(
type='YOLOv5ParamSchedulerHook',
scheduler_type='cosine',
lr_factor=0.1, # note
lr_factor=lr_factor, # note
max_epochs=max_epochs),
checkpoint=dict(
type='CheckpointHook',
save_param_scheduler=False,
interval=1,
interval=save_epoch_intervals,
save_best='auto',
max_keep_ckpts=3))
val_evaluator = dict(
type='mmdet.CocoMetric',
proposal_nums=(100, 1, 10), # Can be accelerated
ann_file=data_root + 'annotations/instances_val2017.json',
metric='bbox')
test_evaluator = val_evaluator
train_cfg = dict(
type='EpochBasedTrainLoop',
max_epochs=max_epochs,
val_interval=save_epoch_intervals,
dynamic_intervals=[(270, 1)])
max_keep_ckpts=max_keep_ckpts))
custom_hooks = [
dict(
@ -263,7 +308,17 @@ custom_hooks = [
priority=49)
]
val_evaluator = dict(
type='mmdet.CocoMetric',
proposal_nums=(100, 1, 10), # Can be accelerated
ann_file=data_root + val_ann_file,
metric='bbox')
test_evaluator = val_evaluator
train_cfg = dict(
type='EpochBasedTrainLoop',
max_epochs=max_epochs,
val_interval=save_epoch_intervals,
dynamic_intervals=[(max_epochs - num_epoch_stage2, val_interval_stage2)])
val_cfg = dict(type='ValLoop')
test_cfg = dict(type='TestLoop')
# randomness = dict(seed=1, deterministic=True)

View File

@ -0,0 +1,56 @@
_base_ = 'yolov7_tiny_syncbn_fast_8x16b-300e_coco.py'
data_root = './data/cat/'
class_name = ('cat', )
num_classes = len(class_name)
metainfo = dict(classes=class_name, palette=[(20, 220, 60)])
anchors = [
[(68, 69), (154, 91), (143, 162)], # P3/8
[(242, 160), (189, 287), (391, 207)], # P4/16
[(353, 337), (539, 341), (443, 432)] # P5/32
]
max_epochs = 40
train_batch_size_per_gpu = 12
train_num_workers = 4
load_from = 'https://download.openmmlab.com/mmyolo/v0/yolov7/yolov7_tiny_syncbn_fast_8x16b-300e_coco/yolov7_tiny_syncbn_fast_8x16b-300e_coco_20221126_102719-0ee5bbdf.pth' # noqa
model = dict(
backbone=dict(frozen_stages=4),
bbox_head=dict(
head_module=dict(num_classes=num_classes),
prior_generator=dict(base_sizes=anchors)))
train_dataloader = dict(
batch_size=train_batch_size_per_gpu,
num_workers=train_num_workers,
dataset=dict(
data_root=data_root,
metainfo=metainfo,
ann_file='annotations/trainval.json',
data_prefix=dict(img='images/')))
val_dataloader = dict(
dataset=dict(
metainfo=metainfo,
data_root=data_root,
ann_file='annotations/test.json',
data_prefix=dict(img='images/')))
test_dataloader = val_dataloader
_base_.optim_wrapper.optimizer.batch_size_per_gpu = train_batch_size_per_gpu
val_evaluator = dict(ann_file=data_root + 'annotations/test.json')
test_evaluator = val_evaluator
default_hooks = dict(
checkpoint=dict(interval=10, max_keep_ckpts=2, save_best='auto'),
# The warmup_mim_iter parameter is critical.
# The default value is 1000 which is not suitable for cat datasets.
param_scheduler=dict(max_epochs=max_epochs, warmup_mim_iter=10),
logger=dict(type='LoggerHook', interval=5))
train_cfg = dict(max_epochs=max_epochs, val_interval=10)
# visualizer = dict(vis_backends = [dict(type='LocalVisBackend'), dict(type='WandbVisBackend')]) # noqa

View File

@ -1,10 +1,26 @@
_base_ = './yolov7_l_syncbn_fast_8x16b-300e_coco.py'
# ========================modified parameters========================
# -----model related-----
# Data augmentation
max_translate_ratio = 0.1 # YOLOv5RandomAffine
scaling_ratio_range = (0.5, 1.6) # YOLOv5RandomAffine
mixup_prob = 0.05 # YOLOv5MixUp
randchoice_mosaic_prob = [0.8, 0.2]
mixup_alpha = 8.0 # YOLOv5MixUp
mixup_beta = 8.0 # YOLOv5MixUp
# -----train val related-----
loss_cls_weight = 0.5
loss_obj_weight = 1.0
lr_factor = 0.01 # Learning rate scaling factor
# ===============================Unmodified in most cases====================
num_classes = _base_.num_classes
num_det_layers = _base_.num_det_layers
img_scale = _base_.img_scale
pre_transform = _base_.pre_transform
model = dict(
backbone=dict(
arch='Tiny', act_cfg=dict(type='LeakyReLU', negative_slope=0.1)),
@ -18,9 +34,9 @@ model = dict(
use_repconv_outs=False),
bbox_head=dict(
head_module=dict(in_channels=[128, 256, 512]),
loss_cls=dict(loss_weight=0.5 *
loss_cls=dict(loss_weight=loss_cls_weight *
(num_classes / 80 * 3 / num_det_layers)),
loss_obj=dict(loss_weight=1.0 *
loss_obj=dict(loss_weight=loss_obj_weight *
((img_scale[0] / 640)**2 * 3 / num_det_layers))))
mosiac4_pipeline = [
@ -33,8 +49,8 @@ mosiac4_pipeline = [
type='YOLOv5RandomAffine',
max_rotate_degree=0.0,
max_shear_degree=0.0,
max_translate_ratio=0.1, # change
scaling_ratio_range=(0.5, 1.6), # change
max_translate_ratio=max_translate_ratio, # change
scaling_ratio_range=scaling_ratio_range, # change
# img_scale is (width, height)
border=(-img_scale[0] // 2, -img_scale[1] // 2),
border_val=(114, 114, 114)),
@ -50,8 +66,8 @@ mosiac9_pipeline = [
type='YOLOv5RandomAffine',
max_rotate_degree=0.0,
max_shear_degree=0.0,
max_translate_ratio=0.1, # change
scaling_ratio_range=(0.5, 1.6), # change
max_translate_ratio=max_translate_ratio, # change
scaling_ratio_range=scaling_ratio_range, # change
border=(-img_scale[0] // 2, -img_scale[1] // 2),
border_val=(114, 114, 114)),
]
@ -59,16 +75,16 @@ mosiac9_pipeline = [
randchoice_mosaic_pipeline = dict(
type='RandomChoice',
transforms=[mosiac4_pipeline, mosiac9_pipeline],
prob=[0.8, 0.2])
prob=randchoice_mosaic_prob)
train_pipeline = [
*pre_transform,
randchoice_mosaic_pipeline,
dict(
type='YOLOv5MixUp',
alpha=8.0,
beta=8.0,
prob=0.05, # change
alpha=mixup_alpha,
beta=mixup_beta,
prob=mixup_prob, # change
pre_transform=[*pre_transform, randchoice_mosaic_pipeline]),
dict(type='YOLOv5HSVRandomAug'),
dict(type='mmdet.RandomFlip', prob=0.5),
@ -79,4 +95,4 @@ train_pipeline = [
]
train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
default_hooks = dict(param_scheduler=dict(lr_factor=0.01))
default_hooks = dict(param_scheduler=dict(lr_factor=lr_factor))

View File

@ -1,18 +1,50 @@
_base_ = './yolov7_l_syncbn_fast_8x16b-300e_coco.py'
# ========================modified parameters========================
# -----data related-----
img_scale = (1280, 1280) # height, width
num_classes = 80
# only on Val
batch_shapes_cfg = dict(img_size=img_scale[0], size_divisor=64)
num_classes = 80 # Number of classes for classification
# Config of batch shapes. Only on val
# It means not used if batch_shapes_cfg is None.
batch_shapes_cfg = dict(
img_size=img_scale[
0], # The image scale of padding should be divided by pad_size_divisor
size_divisor=64) # Additional paddings for pixel scale
tta_img_scales = [(1280, 1280), (1024, 1024), (1536, 1536)]
# -----model related-----
# Basic size of multi-scale prior box
anchors = [
[(19, 27), (44, 40), (38, 94)], # P3/8
[(96, 68), (86, 152), (180, 137)], # P4/16
[(140, 301), (303, 264), (238, 542)], # P5/32
[(436, 615), (739, 380), (925, 792)] # P6/64
]
strides = [8, 16, 32, 64]
num_det_layers = 4
strides = [8, 16, 32, 64] # Strides of multi-scale prior box
num_det_layers = 4 # # The number of model output scales
norm_cfg = dict(type='BN', momentum=0.03, eps=0.001)
# Data augmentation
max_translate_ratio = 0.2 # YOLOv5RandomAffine
scaling_ratio_range = (0.1, 2.0) # YOLOv5RandomAffine
mixup_prob = 0.15 # YOLOv5MixUp
randchoice_mosaic_prob = [0.8, 0.2]
mixup_alpha = 8.0 # YOLOv5MixUp
mixup_beta = 8.0 # YOLOv5MixUp
# -----train val related-----
loss_cls_weight = 0.3
loss_bbox_weight = 0.05
loss_obj_weight = 0.7
obj_level_weights = [4.0, 1.0, 0.25, 0.06]
simota_candidate_topk = 20
# The only difference between P6 and P5 in terms of
# hyperparameters is lr_factor
lr_factor = 0.2
# ===============================Unmodified in most cases====================
pre_transform = _base_.pre_transform
model = dict(
backbone=dict(arch='W', out_indices=(2, 3, 4, 5)),
@ -26,19 +58,17 @@ model = dict(
type='YOLOv7p6HeadModule',
in_channels=[128, 256, 384, 512],
featmap_strides=strides,
norm_cfg=dict(type='BN', momentum=0.03, eps=0.001),
norm_cfg=norm_cfg,
act_cfg=dict(type='SiLU', inplace=True)),
prior_generator=dict(base_sizes=anchors, strides=strides),
simota_candidate_topk=20, # note
simota_candidate_topk=simota_candidate_topk, # note
# scaled based on number of detection layers
loss_cls=dict(loss_weight=0.3 *
loss_cls=dict(loss_weight=loss_cls_weight *
(num_classes / 80 * 3 / num_det_layers)),
loss_bbox=dict(loss_weight=0.05 * (3 / num_det_layers)),
loss_obj=dict(loss_weight=0.7 *
loss_bbox=dict(loss_weight=loss_bbox_weight * (3 / num_det_layers)),
loss_obj=dict(loss_weight=loss_obj_weight *
((img_scale[0] / 640)**2 * 3 / num_det_layers)),
obj_level_weights=[4.0, 1.0, 0.25, 0.06]))
pre_transform = _base_.pre_transform
obj_level_weights=obj_level_weights))
mosiac4_pipeline = [
dict(
@ -50,8 +80,8 @@ mosiac4_pipeline = [
type='YOLOv5RandomAffine',
max_rotate_degree=0.0,
max_shear_degree=0.0,
max_translate_ratio=0.2, # note
scaling_ratio_range=(0.1, 2.0), # note
max_translate_ratio=max_translate_ratio, # note
scaling_ratio_range=scaling_ratio_range, # note
# img_scale is (width, height)
border=(-img_scale[0] // 2, -img_scale[1] // 2),
border_val=(114, 114, 114)),
@ -67,8 +97,8 @@ mosiac9_pipeline = [
type='YOLOv5RandomAffine',
max_rotate_degree=0.0,
max_shear_degree=0.0,
max_translate_ratio=0.2, # note
scaling_ratio_range=(0.1, 2.0), # note
max_translate_ratio=max_translate_ratio, # note
scaling_ratio_range=scaling_ratio_range, # note
# img_scale is (width, height)
border=(-img_scale[0] // 2, -img_scale[1] // 2),
border_val=(114, 114, 114)),
@ -77,16 +107,16 @@ mosiac9_pipeline = [
randchoice_mosaic_pipeline = dict(
type='RandomChoice',
transforms=[mosiac4_pipeline, mosiac9_pipeline],
prob=[0.8, 0.2])
prob=randchoice_mosaic_prob)
train_pipeline = [
*pre_transform,
randchoice_mosaic_pipeline,
dict(
type='YOLOv5MixUp',
alpha=8.0, # note
beta=8.0, # note
prob=0.15,
alpha=mixup_alpha, # note
beta=mixup_beta, # note
prob=mixup_prob,
pre_transform=[*pre_transform, randchoice_mosaic_pipeline]),
dict(type='YOLOv5HSVRandomAug'),
dict(type='mmdet.RandomFlip', prob=0.5),
@ -115,6 +145,38 @@ val_dataloader = dict(
dataset=dict(pipeline=test_pipeline, batch_shapes_cfg=batch_shapes_cfg))
test_dataloader = val_dataloader
# The only difference between P6 and P5 in terms of
# hyperparameters is lr_factor
default_hooks = dict(param_scheduler=dict(lr_factor=0.2))
default_hooks = dict(param_scheduler=dict(lr_factor=lr_factor))
# Config for Test Time Augmentation. (TTA)
_multiscale_resize_transforms = [
dict(
type='Compose',
transforms=[
dict(type='YOLOv5KeepRatioResize', scale=s),
dict(
type='LetterResize',
scale=s,
allow_scale_up=False,
pad_val=dict(img=114))
]) for s in tta_img_scales
]
tta_pipeline = [
dict(type='LoadImageFromFile', file_client_args=_base_.file_client_args),
dict(
type='TestTimeAug',
transforms=[
_multiscale_resize_transforms,
[
dict(type='mmdet.RandomFlip', prob=1.),
dict(type='mmdet.RandomFlip', prob=0.)
], [dict(type='mmdet.LoadAnnotations', with_bbox=True)],
[
dict(
type='mmdet.PackDetInputs',
meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
'scale_factor', 'pad_param', 'flip',
'flip_direction'))
]
])
]

View File

@ -20,19 +20,26 @@ YOLOv8-P5 model structure
### COCO
| Backbone | Arch | size | SyncBN | AMP | Mem (GB) | box AP | Config | Download |
| :------: | :--: | :--: | :----: | :-: | :------: | :----: | :-------------------------------------------------------------------------------------------------------: | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
| YOLOv8-n | P5 | 640 | Yes | Yes | 2.8 | 37.2 | [config](https://github.com/open-mmlab/mmyolo/tree/dev/configs/yolov8/yolov8_n_syncbn_8xb16-500e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_n_syncbn_fast_8xb16-500e_coco/yolov8_n_syncbn_fast_8xb16-500e_coco_20230114_131804-88c11cdb.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_n_syncbn_fast_8xb16-500e_coco/yolov8_n_syncbn_fast_8xb16-500e_coco_20230114_131804.log.json) |
| YOLOv8-s | P5 | 640 | Yes | Yes | 4.0 | 44.2 | [config](https://github.com/open-mmlab/mmyolo/tree/dev/configs/yolov8/yolov8_s_syncbn_8xb16-500e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_s_syncbn_fast_8xb16-500e_coco/yolov8_s_syncbn_fast_8xb16-500e_coco_20230117_180101-5aa5f0f1.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_s_syncbn_fast_8xb16-500e_coco/yolov8_s_syncbn_fast_8xb16-500e_coco_20230117_180101.log.json) |
| YOLOv8-m | P5 | 640 | Yes | Yes | 7.2 | 49.8 | [config](https://github.com/open-mmlab/mmyolo/tree/dev/configs/yolov8/yolov8_m_syncbn_8xb16-500e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_m_syncbn_fast_8xb16-500e_coco/yolov8_m_syncbn_fast_8xb16-500e_coco_20230115_192200-c22e560a.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_m_syncbn_fast_8xb16-500e_coco/yolov8_m_syncbn_fast_8xb16-500e_coco_20230115_192200.log.json) |
| Backbone | Arch | size | Mask Refine | SyncBN | AMP | Mem (GB) | box AP | TTA box AP | Config | Download |
| :------: | :--: | :--: | :---------: | :----: | :-: | :------: | :---------: | :--------: | :---------------------------------------------------------------------: | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
| YOLOv8-n | P5 | 640 | No | Yes | Yes | 2.8 | 37.2 | | [config](../yolov8/yolov8_n_syncbn_fast_8xb16-500e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_n_syncbn_fast_8xb16-500e_coco/yolov8_n_syncbn_fast_8xb16-500e_coco_20230114_131804-88c11cdb.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_n_syncbn_fast_8xb16-500e_coco/yolov8_n_syncbn_fast_8xb16-500e_coco_20230114_131804.log.json) |
| YOLOv8-n | P5 | 640 | Yes | Yes | Yes | 2.5 | 37.4 (+0.2) | 39.9 | [config](../yolov8/yolov8_n_mask-refine_syncbn_fast_8xb16-500e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_n_mask-refine_syncbn_fast_8xb16-500e_coco/yolov8_n_mask-refine_syncbn_fast_8xb16-500e_coco_20230216_101206-b975b1cd.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_n_mask-refine_syncbn_fast_8xb16-500e_coco/yolov8_n_mask-refine_syncbn_fast_8xb16-500e_coco_20230216_101206.log.json) |
| YOLOv8-s | P5 | 640 | No | Yes | Yes | 4.0 | 44.2 | | [config](../yolov8/yolov8_s_syncbn_fast_8xb16-500e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_s_syncbn_fast_8xb16-500e_coco/yolov8_s_syncbn_fast_8xb16-500e_coco_20230117_180101-5aa5f0f1.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_s_syncbn_fast_8xb16-500e_coco/yolov8_s_syncbn_fast_8xb16-500e_coco_20230117_180101.log.json) |
| YOLOv8-s | P5 | 640 | Yes | Yes | Yes | 4.0 | 45.1 (+0.9) | 46.8 | [config](../yolov8/yolov8_s_mask-refine_syncbn_fast_8xb16-500e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_s_mask-refine_syncbn_fast_8xb16-500e_coco/yolov8_s_mask-refine_syncbn_fast_8xb16-500e_coco_20230216_095938-ce3c1b3f.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_s_mask-refine_syncbn_fast_8xb16-500e_coco/yolov8_s_mask-refine_syncbn_fast_8xb16-500e_coco_20230216_095938.log.json) |
| YOLOv8-m | P5 | 640 | No | Yes | Yes | 7.2 | 49.8 | | [config](../yolov8/yolov8_m_syncbn_fast_8xb16-500e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_m_syncbn_fast_8xb16-500e_coco/yolov8_m_syncbn_fast_8xb16-500e_coco_20230115_192200-c22e560a.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_m_syncbn_fast_8xb16-500e_coco/yolov8_m_syncbn_fast_8xb16-500e_coco_20230115_192200.log.json) |
| YOLOv8-m | P5 | 640 | Yes | Yes | Yes | 7.0 | 50.6 (+0.8) | 52.3 | [config](../yolov8/yolov8_m_mask-refine_syncbn_fast_8xb16-500e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_m_mask-refine_syncbn_fast_8xb16-500e_coco/yolov8_m_mask-refine_syncbn_fast_8xb16-500e_coco_20230216_223400-f40abfcd.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_m_mask-refine_syncbn_fast_8xb16-500e_coco/yolov8_m_mask-refine_syncbn_fast_8xb16-500e_coco_20230216_223400.log.json) |
| YOLOv8-l | P5 | 640 | No | Yes | Yes | 9.8 | 52.1 | | [config](../yolov8/yolov8_l_syncbn_fast_8xb16-500e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_l_syncbn_fast_8xb16-500e_coco/yolov8_l_syncbn_fast_8xb16-500e_coco_20230217_182526-189611b6.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_l_syncbn_fast_8xb16-500e_coco/yolov8_l_syncbn_fast_8xb16-500e_coco_20230217_182526.log.json) |
| YOLOv8-l | P5 | 640 | Yes | Yes | Yes | 9.1 | 53.0 (+0.9) | 54.4 | [config](../yolov8/yolov8_l_mask-refine_syncbn_fast_8xb16-500e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_l_mask-refine_syncbn_fast_8xb16-500e_coco/yolov8_l_mask-refine_syncbn_fast_8xb16-500e_coco_20230217_120100-5881dec4.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_l_mask-refine_syncbn_fast_8xb16-500e_coco/yolov8_l_mask-refine_syncbn_fast_8xb16-500e_coco_20230217_120100.log.json) |
| YOLOv8-x | P5 | 640 | No | Yes | Yes | 12.2 | 52.7 | | [config](../yolov8/yolov8_x_syncbn_fast_8xb16-500e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_x_syncbn_fast_8xb16-500e_coco/yolov8_x_syncbn_fast_8xb16-500e_coco_20230218_023338-5674673c.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_x_syncbn_fast_8xb16-500e_coco/yolov8_x_syncbn_fast_8xb16-500e_coco_20230218_023338.log.json) |
| YOLOv8-x | P5 | 640 | Yes | Yes | Yes | 12.4 | 54.0 (+1.3) | 55.0 | [config](../yolov8/yolov8_x_mask-refine_syncbn_fast_8xb16-500e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_x_mask-refine_syncbn_fast_8xb16-500e_coco/yolov8_x_mask-refine_syncbn_fast_8xb16-500e_coco_20230217_120411-079ca8d1.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_x_mask-refine_syncbn_fast_8xb16-500e_coco/yolov8_x_mask-refine_syncbn_fast_8xb16-500e_coco_20230217_120411.log.json) |
**Note**
In the official YOLOv8 code, the [bbox annotation](https://github.com/ultralytics/ultralytics/blob/0cb87f7dd340a2611148fbf2a0af59b544bd7b1b/ultralytics/yolo/data/dataloaders/v5loader.py#L1011), [`random_perspective`](https://github.com/ultralytics/ultralytics/blob/0cb87f7dd3/ultralytics/yolo/data/dataloaders/v5augmentations.py#L208) and [`copy_paste`](https://github.com/ultralytics/ultralytics/blob/0cb87f7dd3/ultralytics/yolo/data/dataloaders/v5augmentations.py#L208) data augmentation in COCO object detection task training uses mask annotation information, which leads to higher performance. Object detection should not use mask annotation, so only box annotation information is used in `MMYOLO`. We trained the official YOLOv8s code with `8xb16` configuration and its best performance is also 44.2. We will support mask annotations in object detection tasks in the next version.
1. We use 8x A100 for training, and the single-GPU batch size is 16. This is different from the official code, but has no effect on performance.
2. The performance is unstable and may fluctuate by about 0.3 mAP and the highest performance weight in `COCO` training in `YOLOv8` may not be the last epoch. The performance shown above is the best model.
3. We provide [scripts](https://github.com/open-mmlab/mmyolo/tree/dev/tools/model_converters/yolov8_to_mmyolo.py) to convert official weights to MMYOLO.
4. `SyncBN` means use SyncBN, `AMP` indicates training with mixed precision.
4. `SyncBN` means using SyncBN, `AMP` indicates training with mixed precision.
5. The performance of `Mask Refine` training is for the weight performance officially released by YOLOv8. `Mask Refine` means refining bbox by mask while loading annotations and transforming after `YOLOv5RandomAffine`, and the L and X models use `Copy Paste`.
6. `TTA` means that Test Time Augmentation. It's perform 3 multi-scaling transformations on the image, followed by 2 flipping transformations (flipping and not flipping). You only need to specify `--tta` when testing to enable. see [TTA](https://github.com/open-mmlab/mmyolo/blob/dev/docs/en/common_usage/tta.md) for details.
## Citation

View File

@ -54,3 +54,87 @@ Models:
Metrics:
box AP: 49.8
Weights: https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_m_syncbn_fast_8xb16-500e_coco/yolov8_m_syncbn_fast_8xb16-500e_coco_20230115_192200-c22e560a.pth
- Name: yolov8_l_syncbn_fast_8xb16-500e_coco
In Collection: YOLOv8
Config: configs/yolov8/yolov8_l_syncbn_fast_8xb16-500e_coco.py
Metadata:
Training Memory (GB): 9.8
Epochs: 500
Results:
- Task: Object Detection
Dataset: COCO
Metrics:
box AP: 52.1
Weights: https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_l_syncbn_fast_8xb16-500e_coco/yolov8_l_syncbn_fast_8xb16-500e_coco_20230217_182526-189611b6.pth
- Name: yolov8_x_syncbn_fast_8xb16-500e_coco
In Collection: YOLOv8
Config: configs/yolov8/yolov8_x_syncbn_fast_8xb16-500e_coco.py
Metadata:
Training Memory (GB): 12.2
Epochs: 500
Results:
- Task: Object Detection
Dataset: COCO
Metrics:
box AP: 52.7
Weights: https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_x_syncbn_fast_8xb16-500e_coco/yolov8_x_syncbn_fast_8xb16-500e_coco_20230218_023338-5674673c.pth
- Name: yolov8_n_mask-refine_syncbn_fast_8xb16-500e_coco
In Collection: YOLOv8
Config: configs/yolov8/yolov8_n_mask-refine_syncbn_fast_8xb16-500e_coco.py
Metadata:
Training Memory (GB): 2.5
Epochs: 500
Results:
- Task: Object Detection
Dataset: COCO
Metrics:
box AP: 37.4
Weights: https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_n_mask-refine_syncbn_fast_8xb16-500e_coco/yolov8_n_mask-refine_syncbn_fast_8xb16-500e_coco_20230216_101206-b975b1cd.pth
- Name: yolov8_s_mask-refine_syncbn_fast_8xb16-500e_coco
In Collection: YOLOv8
Config: configs/yolov8/yolov8_s_mask-refine_syncbn_fast_8xb16-500e_coco.py
Metadata:
Training Memory (GB): 4.0
Epochs: 500
Results:
- Task: Object Detection
Dataset: COCO
Metrics:
box AP: 45.1
Weights: https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_s_mask-refine_syncbn_fast_8xb16-500e_coco/yolov8_s_mask-refine_syncbn_fast_8xb16-500e_coco_20230216_095938-ce3c1b3f.pth
- Name: yolov8_m_mask-refine_syncbn_fast_8xb16-500e_coco
In Collection: YOLOv8
Config: configs/yolov8/yolov8_m_mask-refine_syncbn_fast_8xb16-500e_coco.py
Metadata:
Training Memory (GB): 7.0
Epochs: 500
Results:
- Task: Object Detection
Dataset: COCO
Metrics:
box AP: 50.6
Weights: https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_m_mask-refine_syncbn_fast_8xb16-500e_coco/yolov8_m_mask-refine_syncbn_fast_8xb16-500e_coco_20230216_223400-f40abfcd.pth
- Name: yolov8_l_mask-refine_syncbn_fast_8xb16-500e_coco
In Collection: YOLOv8
Config: configs/yolov8/yolov8_l_mask-refine_syncbn_fast_8xb16-500e_coco.py
Metadata:
Training Memory (GB): 9.1
Epochs: 500
Results:
- Task: Object Detection
Dataset: COCO
Metrics:
box AP: 53.0
Weights: https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_l_mask-refine_syncbn_fast_8xb16-500e_coco/yolov8_l_mask-refine_syncbn_fast_8xb16-500e_coco_20230217_120100-5881dec4.pth
- Name: yolov8_x_mask-refine_syncbn_fast_8xb16-500e_coco
In Collection: YOLOv8
Config: configs/yolov8/yolov8_x_mask-refine_syncbn_fast_8xb16-500e_coco.py
Metadata:
Training Memory (GB): 12.4
Epochs: 500
Results:
- Task: Object Detection
Dataset: COCO
Metrics:
box AP: 54.0
Weights: https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_x_mask-refine_syncbn_fast_8xb16-500e_coco/yolov8_x_mask-refine_syncbn_fast_8xb16-500e_coco_20230217_120411-079ca8d1.pth

View File

@ -0,0 +1,65 @@
_base_ = './yolov8_m_mask-refine_syncbn_fast_8xb16-500e_coco.py'
# This config use refining bbox and `YOLOv5CopyPaste`.
# Refining bbox means refining bbox by mask while loading annotations and
# transforming after `YOLOv5RandomAffine`
# ========================modified parameters======================
deepen_factor = 1.00
widen_factor = 1.00
last_stage_out_channels = 512
mixup_prob = 0.15
copypaste_prob = 0.3
# =======================Unmodified in most cases==================
img_scale = _base_.img_scale
pre_transform = _base_.pre_transform
last_transform = _base_.last_transform
affine_scale = _base_.affine_scale
model = dict(
backbone=dict(
last_stage_out_channels=last_stage_out_channels,
deepen_factor=deepen_factor,
widen_factor=widen_factor),
neck=dict(
deepen_factor=deepen_factor,
widen_factor=widen_factor,
in_channels=[256, 512, last_stage_out_channels],
out_channels=[256, 512, last_stage_out_channels]),
bbox_head=dict(
head_module=dict(
widen_factor=widen_factor,
in_channels=[256, 512, last_stage_out_channels])))
mosaic_affine_transform = [
dict(
type='Mosaic',
img_scale=img_scale,
pad_val=114.0,
pre_transform=pre_transform),
dict(type='YOLOv5CopyPaste', prob=copypaste_prob),
dict(
type='YOLOv5RandomAffine',
max_rotate_degree=0.0,
max_shear_degree=0.0,
max_aspect_ratio=100.,
scaling_ratio_range=(1 - affine_scale, 1 + affine_scale),
# img_scale is (width, height)
border=(-img_scale[0] // 2, -img_scale[1] // 2),
border_val=(114, 114, 114),
min_area_ratio=_base_.min_area_ratio,
use_mask_refine=_base_.use_mask2refine)
]
train_pipeline = [
*pre_transform, *mosaic_affine_transform,
dict(
type='YOLOv5MixUp',
prob=mixup_prob,
pre_transform=[*pre_transform, *mosaic_affine_transform]),
*last_transform
]
train_dataloader = dict(dataset=dict(pipeline=train_pipeline))

View File

@ -1,9 +1,16 @@
_base_ = './yolov8_m_syncbn_fast_8xb16-500e_coco.py'
# ========================modified parameters======================
deepen_factor = 1.00
widen_factor = 1.00
last_stage_out_channels = 512
mixup_ratio = 0.15
mixup_prob = 0.15
# =======================Unmodified in most cases==================
pre_transform = _base_.pre_transform
mosaic_affine_transform = _base_.mosaic_affine_transform
last_transform = _base_.last_transform
model = dict(
backbone=dict(
@ -20,16 +27,11 @@ model = dict(
widen_factor=widen_factor,
in_channels=[256, 512, last_stage_out_channels])))
pre_transform = _base_.pre_transform
albu_train_transform = _base_.albu_train_transform
mosaic_affine_transform = _base_.mosaic_affine_transform
last_transform = _base_.last_transform
train_pipeline = [
*pre_transform, *mosaic_affine_transform,
dict(
type='YOLOv5MixUp',
prob=mixup_ratio,
prob=mixup_prob,
pre_transform=[*pre_transform, *mosaic_affine_transform]),
*last_transform
]

View File

@ -0,0 +1,85 @@
_base_ = './yolov8_s_mask-refine_syncbn_fast_8xb16-500e_coco.py'
# This config use refining bbox and `YOLOv5CopyPaste`.
# Refining bbox means refining bbox by mask while loading annotations and
# transforming after `YOLOv5RandomAffine`
# ========================modified parameters======================
deepen_factor = 0.67
widen_factor = 0.75
last_stage_out_channels = 768
affine_scale = 0.9
mixup_prob = 0.1
copypaste_prob = 0.1
# ===============================Unmodified in most cases====================
img_scale = _base_.img_scale
pre_transform = _base_.pre_transform
last_transform = _base_.last_transform
model = dict(
backbone=dict(
last_stage_out_channels=last_stage_out_channels,
deepen_factor=deepen_factor,
widen_factor=widen_factor),
neck=dict(
deepen_factor=deepen_factor,
widen_factor=widen_factor,
in_channels=[256, 512, last_stage_out_channels],
out_channels=[256, 512, last_stage_out_channels]),
bbox_head=dict(
head_module=dict(
widen_factor=widen_factor,
in_channels=[256, 512, last_stage_out_channels])))
mosaic_affine_transform = [
dict(
type='Mosaic',
img_scale=img_scale,
pad_val=114.0,
pre_transform=pre_transform),
dict(type='YOLOv5CopyPaste', prob=copypaste_prob),
dict(
type='YOLOv5RandomAffine',
max_rotate_degree=0.0,
max_shear_degree=0.0,
max_aspect_ratio=100.,
scaling_ratio_range=(1 - affine_scale, 1 + affine_scale),
# img_scale is (width, height)
border=(-img_scale[0] // 2, -img_scale[1] // 2),
border_val=(114, 114, 114),
min_area_ratio=_base_.min_area_ratio,
use_mask_refine=_base_.use_mask2refine)
]
train_pipeline = [
*pre_transform, *mosaic_affine_transform,
dict(
type='YOLOv5MixUp',
prob=mixup_prob,
pre_transform=[*pre_transform, *mosaic_affine_transform]),
*last_transform
]
train_pipeline_stage2 = [
*pre_transform,
dict(type='YOLOv5KeepRatioResize', scale=img_scale),
dict(
type='LetterResize',
scale=img_scale,
allow_scale_up=True,
pad_val=dict(img=114.0)),
dict(
type='YOLOv5RandomAffine',
max_rotate_degree=0.0,
max_shear_degree=0.0,
scaling_ratio_range=(1 - affine_scale, 1 + affine_scale),
max_aspect_ratio=_base_.max_aspect_ratio,
border_val=(114, 114, 114),
min_area_ratio=_base_.min_area_ratio,
use_mask_refine=_base_.use_mask2refine), *last_transform
]
train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
_base_.custom_hooks[1].switch_pipeline = train_pipeline_stage2

View File

@ -1,15 +1,17 @@
_base_ = './yolov8_s_syncbn_fast_8xb16-500e_coco.py'
# ========================modified parameters======================
deepen_factor = 0.67
widen_factor = 0.75
last_stage_out_channels = 768
affine_scale = 0.9
mixup_ratio = 0.1
mixup_prob = 0.1
num_classes = _base_.num_classes
num_det_layers = _base_.num_det_layers
# =======================Unmodified in most cases==================
img_scale = _base_.img_scale
pre_transform = _base_.pre_transform
last_transform = _base_.last_transform
model = dict(
backbone=dict(
@ -26,10 +28,6 @@ model = dict(
widen_factor=widen_factor,
in_channels=[256, 512, last_stage_out_channels])))
pre_transform = _base_.pre_transform
albu_train_transform = _base_.albu_train_transform
last_transform = _base_.last_transform
mosaic_affine_transform = [
dict(
type='Mosaic',
@ -47,17 +45,16 @@ mosaic_affine_transform = [
border_val=(114, 114, 114))
]
# enable mixup
train_pipeline = [
*pre_transform, *mosaic_affine_transform,
dict(
type='YOLOv5MixUp',
prob=mixup_ratio,
prob=mixup_prob,
pre_transform=[*pre_transform, *mosaic_affine_transform]),
*last_transform
]
train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
train_pipeline_stage2 = [
*pre_transform,
dict(type='YOLOv5KeepRatioResize', scale=img_scale),
@ -75,16 +72,5 @@ train_pipeline_stage2 = [
border_val=(114, 114, 114)), *last_transform
]
custom_hooks = [
dict(
type='EMAHook',
ema_type='ExpMomentumEMA',
momentum=0.0001,
update_buffers=True,
strict_load=False,
priority=49),
dict(
type='mmdet.PipelineSwitchHook',
switch_epoch=_base_.max_epochs - 10,
switch_pipeline=train_pipeline_stage2)
]
train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
_base_.custom_hooks[1].switch_pipeline = train_pipeline_stage2

View File

@ -0,0 +1,12 @@
_base_ = './yolov8_s_mask-refine_syncbn_fast_8xb16-500e_coco.py'
# This config will refine bbox by mask while loading annotations and
# transforming after `YOLOv5RandomAffine`
deepen_factor = 0.33
widen_factor = 0.25
model = dict(
backbone=dict(deepen_factor=deepen_factor, widen_factor=widen_factor),
neck=dict(deepen_factor=deepen_factor, widen_factor=widen_factor),
bbox_head=dict(head_module=dict(widen_factor=widen_factor)))

View File

@ -0,0 +1,52 @@
_base_ = 'yolov8_s_syncbn_fast_8xb16-500e_coco.py'
data_root = './data/cat/'
class_name = ('cat', )
num_classes = len(class_name)
metainfo = dict(classes=class_name, palette=[(20, 220, 60)])
close_mosaic_epochs = 5
max_epochs = 40
train_batch_size_per_gpu = 12
train_num_workers = 4
load_from = 'https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_s_syncbn_fast_8xb16-500e_coco/yolov8_s_syncbn_fast_8xb16-500e_coco_20230117_180101-5aa5f0f1.pth' # noqa
model = dict(
backbone=dict(frozen_stages=4),
bbox_head=dict(head_module=dict(num_classes=num_classes)),
train_cfg=dict(assigner=dict(num_classes=num_classes)))
train_dataloader = dict(
batch_size=train_batch_size_per_gpu,
num_workers=train_num_workers,
dataset=dict(
data_root=data_root,
metainfo=metainfo,
ann_file='annotations/trainval.json',
data_prefix=dict(img='images/')))
val_dataloader = dict(
dataset=dict(
metainfo=metainfo,
data_root=data_root,
ann_file='annotations/test.json',
data_prefix=dict(img='images/')))
test_dataloader = val_dataloader
_base_.optim_wrapper.optimizer.batch_size_per_gpu = train_batch_size_per_gpu
_base_.custom_hooks[1].switch_epoch = max_epochs - close_mosaic_epochs
val_evaluator = dict(ann_file=data_root + 'annotations/test.json')
test_evaluator = val_evaluator
default_hooks = dict(
checkpoint=dict(interval=10, max_keep_ckpts=2, save_best='auto'),
# The warmup_mim_iter parameter is critical.
# The default value is 1000 which is not suitable for cat datasets.
param_scheduler=dict(max_epochs=max_epochs, warmup_mim_iter=10),
logger=dict(type='LoggerHook', interval=5))
train_cfg = dict(max_epochs=max_epochs, val_interval=10)
# visualizer = dict(vis_backends = [dict(type='LocalVisBackend'), dict(type='WandbVisBackend')]) # noqa

View File

@ -0,0 +1,83 @@
_base_ = './yolov8_s_syncbn_fast_8xb16-500e_coco.py'
# This config will refine bbox by mask while loading annotations and
# transforming after `YOLOv5RandomAffine`
# ========================modified parameters======================
use_mask2refine = True
min_area_ratio = 0.01 # YOLOv5RandomAffine
# ===============================Unmodified in most cases====================
pre_transform = [
dict(type='LoadImageFromFile', file_client_args=_base_.file_client_args),
dict(
type='LoadAnnotations',
with_bbox=True,
with_mask=True,
mask2bbox=use_mask2refine)
]
last_transform = [
# Delete gt_masks to avoid more computation
dict(type='RemoveDataElement', keys=['gt_masks']),
dict(
type='mmdet.Albu',
transforms=_base_.albu_train_transforms,
bbox_params=dict(
type='BboxParams',
format='pascal_voc',
label_fields=['gt_bboxes_labels', 'gt_ignore_flags']),
keymap={
'img': 'image',
'gt_bboxes': 'bboxes'
}),
dict(type='YOLOv5HSVRandomAug'),
dict(type='mmdet.RandomFlip', prob=0.5),
dict(
type='mmdet.PackDetInputs',
meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 'flip',
'flip_direction'))
]
train_pipeline = [
*pre_transform,
dict(
type='Mosaic',
img_scale=_base_.img_scale,
pad_val=114.0,
pre_transform=pre_transform),
dict(
type='YOLOv5RandomAffine',
max_rotate_degree=0.0,
max_shear_degree=0.0,
scaling_ratio_range=(1 - _base_.affine_scale, 1 + _base_.affine_scale),
max_aspect_ratio=_base_.max_aspect_ratio,
# img_scale is (width, height)
border=(-_base_.img_scale[0] // 2, -_base_.img_scale[1] // 2),
border_val=(114, 114, 114),
min_area_ratio=min_area_ratio,
use_mask_refine=use_mask2refine),
*last_transform
]
train_pipeline_stage2 = [
*pre_transform,
dict(type='YOLOv5KeepRatioResize', scale=_base_.img_scale),
dict(
type='LetterResize',
scale=_base_.img_scale,
allow_scale_up=True,
pad_val=dict(img=114.0)),
dict(
type='YOLOv5RandomAffine',
max_rotate_degree=0.0,
max_shear_degree=0.0,
scaling_ratio_range=(1 - _base_.affine_scale, 1 + _base_.affine_scale),
max_aspect_ratio=_base_.max_aspect_ratio,
border_val=(114, 114, 114),
min_area_ratio=min_area_ratio,
use_mask_refine=use_mask2refine), *last_transform
]
train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
_base_.custom_hooks[1].switch_pipeline = train_pipeline_stage2

View File

@ -1,37 +1,100 @@
_base_ = '../_base_/default_runtime.py'
_base_ = ['../_base_/default_runtime.py', '../_base_/det_p5_tta.py']
# dataset settings
data_root = 'data/coco/'
dataset_type = 'YOLOv5CocoDataset'
# ========================Frequently modified parameters======================
# -----data related-----
data_root = 'data/coco/' # Root path of data
# Path of train annotation file
train_ann_file = 'annotations/instances_train2017.json'
train_data_prefix = 'train2017/' # Prefix of train image path
# Path of val annotation file
val_ann_file = 'annotations/instances_val2017.json'
val_data_prefix = 'val2017/' # Prefix of val image path
# parameters that often need to be modified
num_classes = 80
img_scale = (640, 640) # height, width
deepen_factor = 0.33
widen_factor = 0.5
max_epochs = 500
save_epoch_intervals = 10
num_classes = 80 # Number of classes for classification
# Batch size of a single GPU during training
train_batch_size_per_gpu = 16
# Worker to pre-fetch data for each single GPU during training
train_num_workers = 8
val_batch_size_per_gpu = 1
val_num_workers = 2
# persistent_workers must be False if num_workers is 0.
# persistent_workers must be False if num_workers is 0
persistent_workers = True
strides = [8, 16, 32]
num_det_layers = 3
last_stage_out_channels = 1024
# Base learning rate for optim_wrapper
# -----train val related-----
# Base learning rate for optim_wrapper. Corresponding to 8xb16=64 bs
base_lr = 0.01
lr_factor = 0.01
max_epochs = 500 # Maximum training epochs
# Disable mosaic augmentation for final 10 epochs (stage 2)
close_mosaic_epochs = 10
# single-scale training is recommended to
model_test_cfg = dict(
# The config of multi-label for multi-class prediction.
multi_label=True,
# The number of boxes before NMS
nms_pre=30000,
score_thr=0.001, # Threshold to filter out boxes.
nms=dict(type='nms', iou_threshold=0.7), # NMS type and threshold
max_per_img=300) # Max number of detections of each image
# ========================Possible modified parameters========================
# -----data related-----
img_scale = (640, 640) # width, height
# Dataset type, this will be used to define the dataset
dataset_type = 'YOLOv5CocoDataset'
# Batch size of a single GPU during validation
val_batch_size_per_gpu = 1
# Worker to pre-fetch data for each single GPU during validation
val_num_workers = 2
# Config of batch shapes. Only on val.
# We tested YOLOv8-m will get 0.02 higher than not using it.
batch_shapes_cfg = None
# You can turn on `batch_shapes_cfg` by uncommenting the following lines.
# batch_shapes_cfg = dict(
# type='BatchShapePolicy',
# batch_size=val_batch_size_per_gpu,
# img_size=img_scale[0],
# # The image scale of padding should be divided by pad_size_divisor
# size_divisor=32,
# # Additional paddings for pixel scale
# extra_pad_ratio=0.5)
# -----model related-----
# The scaling factor that controls the depth of the network structure
deepen_factor = 0.33
# The scaling factor that controls the width of the network structure
widen_factor = 0.5
# Strides of multi-scale prior box
strides = [8, 16, 32]
# The output channel of the last stage
last_stage_out_channels = 1024
num_det_layers = 3 # The number of model output scales
norm_cfg = dict(type='BN', momentum=0.03, eps=0.001) # Normalization config
# -----train val related-----
affine_scale = 0.5 # YOLOv5RandomAffine scaling ratio
# YOLOv5RandomAffine aspect ratio of width and height thres to filter bboxes
max_aspect_ratio = 100
tal_topk = 10 # Number of bbox selected in each level
tal_alpha = 0.5 # A Hyper-parameter related to alignment_metrics
tal_beta = 6.0 # A Hyper-parameter related to alignment_metrics
# TODO: Automatically scale loss_weight based on number of detection layers
loss_cls_weight = 0.5
loss_bbox_weight = 7.5
# Since the dfloss is implemented differently in the official
# and mmdet, we're going to divide loss_weight by 4.
loss_dfl_weight = 1.5 / 4
lr_factor = 0.01 # Learning rate scaling factor
weight_decay = 0.0005
# Save model checkpoint and validation intervals in stage 1
save_epoch_intervals = 10
# validation intervals in stage 2
val_interval_stage2 = 1
# The maximum checkpoints to keep.
max_keep_ckpts = 2
# Single-scale training is recommended to
# be turned on, which can speed up training.
env_cfg = dict(cudnn_benchmark=True)
# ===============================Unmodified in most cases====================
model = dict(
type='YOLODetector',
data_preprocessor=dict(
@ -45,7 +108,7 @@ model = dict(
last_stage_out_channels=last_stage_out_channels,
deepen_factor=deepen_factor,
widen_factor=widen_factor,
norm_cfg=dict(type='BN', momentum=0.03, eps=0.001),
norm_cfg=norm_cfg,
act_cfg=dict(type='SiLU', inplace=True)),
neck=dict(
type='YOLOv8PAFPN',
@ -54,7 +117,7 @@ model = dict(
in_channels=[256, 512, last_stage_out_channels],
out_channels=[256, 512, last_stage_out_channels],
num_csp_blocks=3,
norm_cfg=dict(type='BN', momentum=0.03, eps=0.001),
norm_cfg=norm_cfg,
act_cfg=dict(type='SiLU', inplace=True)),
bbox_head=dict(
type='YOLOv8Head',
@ -64,47 +127,41 @@ model = dict(
in_channels=[256, 512, last_stage_out_channels],
widen_factor=widen_factor,
reg_max=16,
norm_cfg=dict(type='BN', momentum=0.03, eps=0.001),
norm_cfg=norm_cfg,
act_cfg=dict(type='SiLU', inplace=True),
featmap_strides=[8, 16, 32]),
featmap_strides=strides),
prior_generator=dict(
type='mmdet.MlvlPointGenerator', offset=0.5, strides=[8, 16, 32]),
type='mmdet.MlvlPointGenerator', offset=0.5, strides=strides),
bbox_coder=dict(type='DistancePointBBoxCoder'),
# scaled based on number of detection layers
loss_cls=dict(
type='mmdet.CrossEntropyLoss',
use_sigmoid=True,
reduction='none',
loss_weight=0.5),
loss_weight=loss_cls_weight),
loss_bbox=dict(
type='IoULoss',
iou_mode='ciou',
bbox_format='xyxy',
reduction='sum',
loss_weight=7.5,
loss_weight=loss_bbox_weight,
return_iou=False),
# Since the dfloss is implemented differently in the official
# and mmdet, we're going to divide loss_weight by 4.
loss_dfl=dict(
type='mmdet.DistributionFocalLoss',
reduction='mean',
loss_weight=1.5 / 4)),
loss_weight=loss_dfl_weight)),
train_cfg=dict(
assigner=dict(
type='BatchTaskAlignedAssigner',
num_classes=num_classes,
use_ciou=True,
topk=10,
alpha=0.5,
beta=6.0,
topk=tal_topk,
alpha=tal_alpha,
beta=tal_beta,
eps=1e-9)),
test_cfg=dict(
multi_label=True,
nms_pre=30000,
score_thr=0.001,
nms=dict(type='nms', iou_threshold=0.7),
max_per_img=300))
test_cfg=model_test_cfg)
albu_train_transform = [
albu_train_transforms = [
dict(type='Blur', p=0.01),
dict(type='MedianBlur', p=0.01),
dict(type='ToGray', p=0.01),
@ -119,7 +176,7 @@ pre_transform = [
last_transform = [
dict(
type='mmdet.Albu',
transforms=albu_train_transform,
transforms=albu_train_transforms,
bbox_params=dict(
type='BboxParams',
format='pascal_voc',
@ -135,6 +192,7 @@ last_transform = [
meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 'flip',
'flip_direction'))
]
train_pipeline = [
*pre_transform,
dict(
@ -146,8 +204,8 @@ train_pipeline = [
type='YOLOv5RandomAffine',
max_rotate_degree=0.0,
max_shear_degree=0.0,
scaling_ratio_range=(0.5, 1.5),
max_aspect_ratio=100,
scaling_ratio_range=(1 - affine_scale, 1 + affine_scale),
max_aspect_ratio=max_aspect_ratio,
# img_scale is (width, height)
border=(-img_scale[0] // 2, -img_scale[1] // 2),
border_val=(114, 114, 114)),
@ -166,8 +224,8 @@ train_pipeline_stage2 = [
type='YOLOv5RandomAffine',
max_rotate_degree=0.0,
max_shear_degree=0.0,
scaling_ratio_range=(0.5, 1.5),
max_aspect_ratio=100,
scaling_ratio_range=(1 - affine_scale, 1 + affine_scale),
max_aspect_ratio=max_aspect_ratio,
border_val=(114, 114, 114)), *last_transform
]
@ -181,8 +239,8 @@ train_dataloader = dict(
dataset=dict(
type=dataset_type,
data_root=data_root,
ann_file='annotations/instances_train2017.json',
data_prefix=dict(img='train2017/'),
ann_file=train_ann_file,
data_prefix=dict(img=train_data_prefix),
filter_cfg=dict(filter_empty_gt=False, min_size=32),
pipeline=train_pipeline))
@ -201,17 +259,6 @@ test_pipeline = [
'scale_factor', 'pad_param'))
]
# only on Val
# you can turn on `batch_shapes_cfg`,
# we tested YOLOv8-m will get 0.02 higher than not using it.
batch_shapes_cfg = None
# batch_shapes_cfg = dict(
# type='BatchShapePolicy',
# batch_size=val_batch_size_per_gpu,
# img_size=img_scale[0],
# size_divisor=32,
# extra_pad_ratio=0.5)
val_dataloader = dict(
batch_size=val_batch_size_per_gpu,
num_workers=val_num_workers,
@ -223,8 +270,8 @@ val_dataloader = dict(
type=dataset_type,
data_root=data_root,
test_mode=True,
data_prefix=dict(img='val2017/'),
ann_file='annotations/instances_val2017.json',
data_prefix=dict(img=val_data_prefix),
ann_file=val_ann_file,
pipeline=test_pipeline,
batch_shapes_cfg=batch_shapes_cfg))
@ -238,7 +285,7 @@ optim_wrapper = dict(
type='SGD',
lr=base_lr,
momentum=0.937,
weight_decay=0.0005,
weight_decay=weight_decay,
nesterov=True,
batch_size_per_gpu=train_batch_size_per_gpu),
constructor='YOLOv5OptimizerConstructor')
@ -253,7 +300,7 @@ default_hooks = dict(
type='CheckpointHook',
interval=save_epoch_intervals,
save_best='auto',
max_keep_ckpts=2))
max_keep_ckpts=max_keep_ckpts))
custom_hooks = [
dict(
@ -265,14 +312,14 @@ custom_hooks = [
priority=49),
dict(
type='mmdet.PipelineSwitchHook',
switch_epoch=max_epochs - 10,
switch_epoch=max_epochs - close_mosaic_epochs,
switch_pipeline=train_pipeline_stage2)
]
val_evaluator = dict(
type='mmdet.CocoMetric',
proposal_nums=(100, 1, 10),
ann_file=data_root + 'annotations/instances_val2017.json',
ann_file=data_root + val_ann_file,
metric='bbox')
test_evaluator = val_evaluator
@ -280,7 +327,8 @@ train_cfg = dict(
type='EpochBasedTrainLoop',
max_epochs=max_epochs,
val_interval=save_epoch_intervals,
dynamic_intervals=[(max_epochs - 10, 1)])
dynamic_intervals=[((max_epochs - close_mosaic_epochs),
val_interval_stage2)])
val_cfg = dict(type='ValLoop')
test_cfg = dict(type='TestLoop')

View File

@ -0,0 +1,13 @@
_base_ = './yolov8_l_mask-refine_syncbn_fast_8xb16-500e_coco.py'
# This config use refining bbox and `YOLOv5CopyPaste`.
# Refining bbox means refining bbox by mask while loading annotations and
# transforming after `YOLOv5RandomAffine`
deepen_factor = 1.00
widen_factor = 1.25
model = dict(
backbone=dict(deepen_factor=deepen_factor, widen_factor=widen_factor),
neck=dict(deepen_factor=deepen_factor, widen_factor=widen_factor),
bbox_head=dict(head_module=dict(widen_factor=widen_factor)))

View File

@ -13,16 +13,32 @@ In this report, we present some experienced improvements to YOLO series, forming
</div>
<div align=center>
<img src="https://user-images.githubusercontent.com/27466624/211143387-004c6718-3d61-44c8-9406-f56b9238452a.jpg"/>
<img src="https://user-images.githubusercontent.com/71306851/218628641-6c0101e6-e40e-4b16-a696-c0f55b8d335c.png"/>
YOLOX-l model structure
</div>
## Results and Models
## 🥳 🚀 Results and Models
| Backbone | size | Mem (GB) | box AP | Config | Download |
| :--------: | :--: | :------: | :----: | :---------------------------------------------------------------------------------------------------: | :----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
| YOLOX-tiny | 416 | 2.8 | 32.7 | [config](https://github.com/open-mmlab/mmyolo/tree/master/configs/yolox/yolox_tiny_8xb8-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolox/yolox_tiny_8xb8-300e_coco/yolox_tiny_8xb8-300e_coco_20220919_090908-0e40a6fc.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolox/yolox_tiny_8xb8-300e_coco/yolox_tiny_8xb8-300e_coco_20220919_090908.log.json) |
| YOLOX-s | 640 | 5.6 | 40.8 | [config](https://github.com/open-mmlab/mmyolo/tree/master/configs/yolox/yolox_s_8xb8-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolox/yolox_s_8xb8-300e_coco/yolox_s_8xb8-300e_coco_20220917_030738-d7e60cb2.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolox/yolox_s_8xb8-300e_coco/yolox_s_8xb8-300e_coco_20220917_030738.log.json) |
| Backbone | Size | Batch Size | AMP | RTMDet-Hyp | Mem (GB) | Box AP | Config | Download |
| :--------: | :--: | :--------: | :-: | :--------: | :------: | :---------: | :-----------------------------------------------------------------------------------------------------------------: | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
| YOLOX-tiny | 416 | 8xb8 | No | No | 2.8 | 32.7 | [config](https://github.com/open-mmlab/mmyolo/tree/dev/configs/yolox/yolox_tiny_fast_8xb8-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolox/yolox_tiny_8xb8-300e_coco/yolox_tiny_8xb8-300e_coco_20220919_090908-0e40a6fc.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolox/yolox_tiny_8xb8-300e_coco/yolox_tiny_8xb8-300e_coco_20220919_090908.log.json) |
| YOLOX-tiny | 416 | 8xb32 | Yes | Yes | 4.9 | 34.3 (+1.6) | [config](https://github.com/open-mmlab/mmyolo/tree/dev/configs/yolox/yolox_tiny_fast_8xb32-300e-rtmdet-hyp_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolox/yolox_tiny_fast_8xb32-300e-rtmdet-hyp_coco/yolox_tiny_fast_8xb32-300e-rtmdet-hyp_coco_20230210_143637-4c338102.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolox/yolox_tiny_fast_8xb32-300e-rtmdet-hyp_coco/yolox_tiny_fast_8xb32-300e-rtmdet-hyp_coco_20230210_143637.log.json) |
| YOLOX-s | 640 | 8xb8 | Yes | No | 2.9 | 40.7 | [config](https://github.com/open-mmlab/mmyolo/tree/dev/configs/yolox/yolox_s_fast_8xb8-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolox/yolox_s_fast_8xb8-300e_coco/yolox_s_fast_8xb8-300e_coco_20230213_142600-2b224d8b.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolox/yolox_s_fast_8xb8-300e_coco/yolox_s_fast_8xb8-300e_coco_20230213_142600.log.json) |
| YOLOX-s | 640 | 8xb32 | Yes | Yes | 9.8 | 41.9 (+1.2) | [config](https://github.com/open-mmlab/mmyolo/tree/dev/configs/yolox/yolox_s_fast_8xb32-300e-rtmdet-hyp_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolox/yolox_s_fast_8xb32-300e-rtmdet-hyp_coco/yolox_s_fast_8xb32-300e-rtmdet-hyp_coco_20230210_134645-3a8dfbd7.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolox/yolox_s_fast_8xb32-300e-rtmdet-hyp_coco/yolox_s_fast_8xb32-300e-rtmdet-hyp_coco_20230210_134645.log.json) |
| YOLOX-m | 640 | 8xb8 | Yes | No | 4.9 | 46.9 | [config](https://github.com/open-mmlab/mmyolo/tree/dev/configs/yolox/yolox_m_fast_8xb8-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolox/yolox_m_fast_8xb8-300e_coco/yolox_m_fast_8xb8-300e_coco_20230213_160218-a71a6b25.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolox/yolox_m_fast_8xb8-300e_coco/yolox_m_fast_8xb8-300e_coco_20230213_160218.log.json) |
| YOLOX-m | 640 | 8xb32 | Yes | Yes | 17.6 | 47.5 (+0.6) | [config](https://github.com/open-mmlab/mmyolo/tree/dev/configs/yolox/yolox_m_fast_8xb32-300e-rtmdet-hyp_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolox/yolox_m_fast_8xb32-300e-rtmdet-hyp_coco/yolox_m_fast_8xb32-300e-rtmdet-hyp_coco_20230210_144328-e657e182.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolox/yolox_m_fast_8xb32-300e-rtmdet-hyp_coco/yolox_m_fast_8xb32-300e-rtmdet-hyp_coco_20230210_144328.log.json) |
| YOLOX-l | 640 | 8xb8 | Yes | No | 8.0 | 50.1 | [config](https://github.com/open-mmlab/mmyolo/tree/dev/configs/yolox/yolox_l_fast_8xb8-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolox/yolox_l_fast__8xb8-300e_coco/yolox_l_fast_8xb8-300e_coco_20230213_160715-c731eb1c.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolox/yolox_l_fast_8xb8-300e_coco/yolox_l_fast_8xb8-300e_coco_20230213_160715.log.json) |
| YOLOX-x | 640 | 8xb8 | Yes | No | 9.8 | 51.4 | [config](https://github.com/open-mmlab/mmyolo/tree/dev/configs/yolox/yolox_x_fast_8xb8-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolox/yolox_x_fast_8xb8-300e_coco/yolox_x_fast_8xb8-300e_coco_20230215_133950-1d509fab.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolox/yolox_x_fast_8xb8-300e_coco/yolox_x_fast_8xb8-300e_coco_20230215_133950.log.json) |
YOLOX uses a default training configuration of `8xbs8` which results in a long training time, we expect it to use `8xbs32` to speed up the training and not cause a decrease in mAP. We modified `train_batch_size_per_gpu` from 8 to 32, `batch_augments_interval` from 10 to 1 and `base_lr` from 0.01 to 0.04 under YOLOX-s default configuration based on the linear scaling rule, which resulted in mAP degradation. Finally, I found that using RTMDet's training hyperparameter can improve performance in YOLOX Tiny/S/M, which also validates the superiority of RTMDet's training hyperparameter.
The modified training parameters are as follows
1. train_batch_size_per_gpu: 8 -> 32
2. batch_augments_interval: 10 -> 1
3. num_last_epochs: 15 -> 20
4. optim cfg: SGD -> AdamW, base_lr 0.01 -> 0.004, weight_decay 0.0005 -> 0.05
5. ema momentum: 0.0001 -> 0.0002
**Note**:

View File

@ -20,9 +20,9 @@ Collections:
Models:
- Name: yolox_tiny_8xb8-300e_coco
- Name: yolox_tiny_fast_8xb8-300e_coco
In Collection: YOLOX
Config: configs/yolox/yolox_tiny_8xb8-300e_coco.py
Config: configs/yolox/yolox_tiny_fast_8xb8-300e_coco.py
Metadata:
Training Memory (GB): 2.8
Epochs: 300
@ -32,15 +32,87 @@ Models:
Metrics:
box AP: 32.7
Weights: https://download.openmmlab.com/mmyolo/v0/yolox/yolox_tiny_8xb8-300e_coco/yolox_tiny_8xb8-300e_coco_20220919_090908-0e40a6fc.pth
- Name: yolox_s_8xb8-300e_coco
- Name: yolox_s_fast_8xb8-300e_coco
In Collection: YOLOX
Config: configs/yolox/yolox_s_8xb8-300e_coco.py
Config: configs/yolox/yolox_s_fast_8xb8-300e_coco.py
Metadata:
Training Memory (GB): 5.6
Training Memory (GB): 2.9
Epochs: 300
Results:
- Task: Object Detection
Dataset: COCO
Metrics:
box AP: 40.8
Weights: https://download.openmmlab.com/mmyolo/v0/yolox/yolox_s_8xb8-300e_coco/yolox_s_8xb8-300e_coco_20220917_030738-d7e60cb2.pth
box AP: 40.7
Weights: https://download.openmmlab.com/mmyolo/v0/yolox/yolox_s_fast_8xb8-300e_coco/yolox_s_fast_8xb8-300e_coco_20230213_142600-2b224d8b.pth
- Name: yolox_m_fast_8xb8-300e_coco
In Collection: YOLOX
Config: configs/yolox/yolox_m_fast_8xb8-300e_coco.py
Metadata:
Training Memory (GB): 4.9
Epochs: 300
Results:
- Task: Object Detection
Dataset: COCO
Metrics:
box AP: 46.9
Weights: https://download.openmmlab.com/mmyolo/v0/yolox/yolox_m_fast_8xb8-300e_coco/yolox_m_fast_8xb8-300e_coco_20230213_160218-a71a6b25.pth
- Name: yolox_l_fast_8xb8-300e_coco
In Collection: YOLOX
Config: configs/yolox/yolox_l_fast_8xb8-300e_coco.py
Metadata:
Training Memory (GB): 8.0
Epochs: 300
Results:
- Task: Object Detection
Dataset: COCO
Metrics:
box AP: 50.1
Weights: https://download.openmmlab.com/mmyolo/v0/yolox/yolox_l_fast_8xb8-300e_coco/yolox_l_fast_8xb8-300e_coco_20230213_160715-c731eb1c.pth
- Name: yolox_x_fast_8xb8-300e_coco
In Collection: YOLOX
Config: configs/yolox/yolox_x_fast_8xb8-300e_coco.py
Metadata:
Training Memory (GB): 9.8
Epochs: 300
Results:
- Task: Object Detection
Dataset: COCO
Metrics:
box AP: 51.4
Weights: https://download.openmmlab.com/mmyolo/v0/yolox/yolox_x_fast_8xb8-300e_coco/yolox_x_fast_8xb8-300e_coco_20230215_133950-1d509fab.pth
- Name: yolox_tiny_fast_8xb32-300e-rtmdet-hyp_coco
In Collection: YOLOX
Config: configs/yolox/yolox_tiny_fast_8xb32-300e-rtmdet-hyp_coco.py
Metadata:
Training Memory (GB): 4.9
Epochs: 300
Results:
- Task: Object Detection
Dataset: COCO
Metrics:
box AP: 34.3
Weights: https://download.openmmlab.com/mmyolo/v0/yolox/yolox_tiny_fast_8xb32-300e-rtmdet-hyp_coco/yolox_tiny_fast_8xb32-300e-rtmdet-hyp_coco_20230210_143637-4c338102.pth
- Name: yolox_s_fast_8xb32-300e-rtmdet-hyp_coco
In Collection: YOLOX
Config: configs/yolox/yolox_s_fast_8xb32-300e-rtmdet-hyp_coco.py
Metadata:
Training Memory (GB): 9.8
Epochs: 300
Results:
- Task: Object Detection
Dataset: COCO
Metrics:
box AP: 41.9
Weights: https://download.openmmlab.com/mmyolo/v0/yolox/yolox_s_fast_8xb32-300e-rtmdet-hyp_coco/yolox_s_fast_8xb32-300e-rtmdet-hyp_coco_20230210_134645-3a8dfbd7.pth
- Name: yolox_m_fast_8xb32-300e-rtmdet-hyp_coco
In Collection: YOLOX
Config: configs/yolox/yolox_m_fast_8xb32-300e-rtmdet-hyp_coco.py
Metadata:
Training Memory (GB): 17.6
Epochs: 300
Results:
- Task: Object Detection
Dataset: COCO
Metrics:
box AP: 47.5
Weights: https://download.openmmlab.com/mmyolo/v0/yolox/yolox_m_fast_8xb32-300e-rtmdet-hyp_coco/yolox_m_fast_8xb32-300e-rtmdet-hyp_coco_20230210_144328-e657e182.pth

View File

@ -1,8 +1,10 @@
_base_ = './yolox_s_8xb8-300e_coco.py'
_base_ = './yolox_s_fast_8xb8-300e_coco.py'
# ========================modified parameters======================
deepen_factor = 1.0
widen_factor = 1.0
# =======================Unmodified in most cases==================
# model settings
model = dict(
backbone=dict(deepen_factor=deepen_factor, widen_factor=widen_factor),

View File

@ -0,0 +1,12 @@
_base_ = './yolox_s_fast_8xb32-300e-rtmdet-hyp_coco.py'
# ========================modified parameters======================
deepen_factor = 0.67
widen_factor = 0.75
# =======================Unmodified in most cases==================
# model settings
model = dict(
backbone=dict(deepen_factor=deepen_factor, widen_factor=widen_factor),
neck=dict(deepen_factor=deepen_factor, widen_factor=widen_factor),
bbox_head=dict(head_module=dict(widen_factor=widen_factor)))

View File

@ -1,8 +1,10 @@
_base_ = './yolox_s_8xb8-300e_coco.py'
_base_ = './yolox_s_fast_8xb8-300e_coco.py'
# ========================modified parameters======================
deepen_factor = 0.67
widen_factor = 0.75
# =======================Unmodified in most cases==================
# model settings
model = dict(
backbone=dict(deepen_factor=deepen_factor, widen_factor=widen_factor),

View File

@ -0,0 +1,21 @@
_base_ = './yolox_tiny_fast_8xb32-300e-rtmdet-hyp_coco.py'
# ========================modified parameters======================
deepen_factor = 0.33
widen_factor = 0.25
use_depthwise = True
# =======================Unmodified in most cases==================
# model settings
model = dict(
backbone=dict(
deepen_factor=deepen_factor,
widen_factor=widen_factor,
use_depthwise=use_depthwise),
neck=dict(
deepen_factor=deepen_factor,
widen_factor=widen_factor,
use_depthwise=use_depthwise),
bbox_head=dict(
head_module=dict(
widen_factor=widen_factor, use_depthwise=use_depthwise)))

View File

@ -1,9 +1,11 @@
_base_ = './yolox_tiny_8xb8-300e_coco.py'
_base_ = './yolox_tiny_fast_8xb8-300e_coco.py'
# ========================modified parameters======================
deepen_factor = 0.33
widen_factor = 0.25
use_depthwise = True
# =======================Unmodified in most cases==================
# model settings
model = dict(
backbone=dict(

View File

@ -0,0 +1,55 @@
# TODO: Need to solve the problem of multiple file_client_args parameters
# _file_client_args = dict(
# backend='petrel',
# path_mapping=dict({
# './data/': 's3://openmmlab/datasets/detection/',
# 'data/': 's3://openmmlab/datasets/detection/'
# }))
_file_client_args = dict(backend='disk')
tta_model = dict(
type='mmdet.DetTTAModel',
tta_cfg=dict(nms=dict(type='nms', iou_threshold=0.65), max_per_img=300))
img_scales = [(640, 640), (320, 320), (960, 960)]
# LoadImageFromFile
# / | \
# Resize Resize Resize # noqa
# / \ / \ / \
# RandomFlip RandomFlip RandomFlip RandomFlip RandomFlip RandomFlip # noqa
# | | | | | |
# LoadAnn LoadAnn LoadAnn LoadAnn LoadAnn LoadAnn
# | | | | | |
# PackDetIn PackDetIn PackDetIn PackDetIn PackDetIn PackDetIn # noqa
tta_pipeline = [
dict(type='LoadImageFromFile', file_client_args=_file_client_args),
dict(
type='TestTimeAug',
transforms=[
[
dict(type='mmdet.Resize', scale=s, keep_ratio=True)
for s in img_scales
],
[
# ``RandomFlip`` must be placed before ``Pad``, otherwise
# bounding box coordinates after flipping cannot be
# recovered correctly.
dict(type='mmdet.RandomFlip', prob=1.),
dict(type='mmdet.RandomFlip', prob=0.)
],
[
dict(
type='mmdet.Pad',
pad_to_square=True,
pad_val=dict(img=(114.0, 114.0, 114.0))),
],
[
dict(
type='mmdet.PackDetInputs',
meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
'scale_factor', 'flip', 'flip_direction'))
]
])
]

View File

@ -0,0 +1,76 @@
_base_ = './yolox_s_fast_8xb32-300e-rtmdet-hyp_coco.py'
data_root = './data/cat/'
class_name = ('cat', )
num_classes = len(class_name)
metainfo = dict(classes=class_name, palette=[(20, 220, 60)])
num_last_epochs = 5
max_epochs = 40
train_batch_size_per_gpu = 12
train_num_workers = 4
load_from = 'https://download.openmmlab.com/mmyolo/v0/yolox/yolox_s_fast_8xb32-300e-rtmdet-hyp_coco/yolox_s_fast_8xb32-300e-rtmdet-hyp_coco_20230210_134645-3a8dfbd7.pth' # noqa
model = dict(
backbone=dict(frozen_stages=4),
bbox_head=dict(head_module=dict(num_classes=num_classes)))
train_dataloader = dict(
batch_size=train_batch_size_per_gpu,
num_workers=train_num_workers,
dataset=dict(
data_root=data_root,
metainfo=metainfo,
ann_file='annotations/trainval.json',
data_prefix=dict(img='images/')))
val_dataloader = dict(
dataset=dict(
metainfo=metainfo,
data_root=data_root,
ann_file='annotations/test.json',
data_prefix=dict(img='images/')))
test_dataloader = val_dataloader
param_scheduler = [
dict(
# use quadratic formula to warm up 3 epochs
# and lr is updated by iteration
# TODO: fix default scope in get function
type='mmdet.QuadraticWarmupLR',
by_epoch=True,
begin=0,
end=3,
convert_to_iter_based=True),
dict(
# use cosine lr from 5 to 35 epoch
type='CosineAnnealingLR',
eta_min=_base_.base_lr * 0.05,
begin=5,
T_max=max_epochs - num_last_epochs,
end=max_epochs - num_last_epochs,
by_epoch=True,
convert_to_iter_based=True),
dict(
# use fixed lr during last num_last_epochs epochs
type='ConstantLR',
by_epoch=True,
factor=1,
begin=max_epochs - num_last_epochs,
end=max_epochs,
)
]
_base_.custom_hooks[0].num_last_epochs = num_last_epochs
val_evaluator = dict(ann_file=data_root + 'annotations/test.json')
test_evaluator = val_evaluator
default_hooks = dict(
checkpoint=dict(interval=10, max_keep_ckpts=2, save_best='auto'),
logger=dict(type='LoggerHook', interval=5))
train_cfg = dict(max_epochs=max_epochs, val_interval=10)
# visualizer = dict(vis_backends = [dict(type='LocalVisBackend'), dict(type='WandbVisBackend')]) # noqa

View File

@ -0,0 +1,87 @@
_base_ = './yolox_s_fast_8xb8-300e_coco.py'
# ========================modified parameters======================
# Batch size of a single GPU during training
# 8 -> 32
train_batch_size_per_gpu = 32
# Multi-scale training intervals
# 10 -> 1
batch_augments_interval = 1
# Last epoch number to switch training pipeline
# 15 -> 20
num_last_epochs = 20
# Base learning rate for optim_wrapper. Corresponding to 8xb32=256 bs
base_lr = 0.004
# SGD -> AdamW
optim_wrapper = dict(
_delete_=True,
type='OptimWrapper',
optimizer=dict(type='AdamW', lr=base_lr, weight_decay=0.05),
paramwise_cfg=dict(
norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True))
# 0.0001 -> 0.0002
ema_momentum = 0.0002
# ============================== Unmodified in most cases ===================
model = dict(
data_preprocessor=dict(batch_augments=[
dict(
type='YOLOXBatchSyncRandomResize',
random_size_range=(480, 800),
size_divisor=32,
interval=batch_augments_interval)
]))
param_scheduler = [
dict(
# use quadratic formula to warm up 5 epochs
# and lr is updated by iteration
# TODO: fix default scope in get function
type='mmdet.QuadraticWarmupLR',
by_epoch=True,
begin=0,
end=5,
convert_to_iter_based=True),
dict(
# use cosine lr from 5 to 285 epoch
type='CosineAnnealingLR',
eta_min=base_lr * 0.05,
begin=5,
T_max=_base_.max_epochs - num_last_epochs,
end=_base_.max_epochs - num_last_epochs,
by_epoch=True,
convert_to_iter_based=True),
dict(
# use fixed lr during last num_last_epochs epochs
type='ConstantLR',
by_epoch=True,
factor=1,
begin=_base_.max_epochs - num_last_epochs,
end=_base_.max_epochs,
)
]
custom_hooks = [
dict(
type='YOLOXModeSwitchHook',
num_last_epochs=num_last_epochs,
new_train_pipeline=_base_.train_pipeline_stage2,
priority=48),
dict(type='mmdet.SyncNormHook', priority=48),
dict(
type='EMAHook',
ema_type='ExpMomentumEMA',
momentum=ema_momentum,
update_buffers=True,
strict_load=False,
priority=49)
]
train_dataloader = dict(batch_size=train_batch_size_per_gpu)
train_cfg = dict(dynamic_intervals=[(_base_.max_epochs - num_last_epochs, 1)])
auto_scale_lr = dict(base_batch_size=8 * train_batch_size_per_gpu)

View File

@ -1,21 +1,73 @@
_base_ = '../_base_/default_runtime.py'
_base_ = ['../_base_/default_runtime.py', 'yolox_p5_tta.py']
data_root = 'data/coco/'
dataset_type = 'YOLOv5CocoDataset'
# ========================Frequently modified parameters======================
# -----data related-----
data_root = 'data/coco/' # Root path of data
# path of train annotation file
train_ann_file = 'annotations/instances_train2017.json'
train_data_prefix = 'train2017/' # Prefix of train image path
# path of val annotation file
val_ann_file = 'annotations/instances_val2017.json'
val_data_prefix = 'val2017/' # Prefix of train image path
img_scale = (640, 640) # width, height
deepen_factor = 0.33
widen_factor = 0.5
save_epoch_intervals = 10
num_classes = 80 # Number of classes for classification
# Batch size of a single GPU during training
train_batch_size_per_gpu = 8
# Worker to pre-fetch data for each single GPU during tarining
train_num_workers = 8
# Presistent_workers must be False if num_workers is 0
persistent_workers = True
# -----train val related-----
# Base learning rate for optim_wrapper. Corresponding to 8xb16=64 bs
base_lr = 0.01
max_epochs = 300 # Maximum training epochs
model_test_cfg = dict(
yolox_style=True, # better
# The config of multi-label for multi-class prediction
multi_label=True, # 40.5 -> 40.7
score_thr=0.001, # Threshold to filter out boxes
max_per_img=300, # Max number of detections of each image
nms=dict(type='nms', iou_threshold=0.65)) # NMS type and threshold
# ========================Possible modified parameters========================
# -----data related-----
img_scale = (640, 640) # width, height
# Dataset type, this will be used to define the dataset
dataset_type = 'YOLOv5CocoDataset'
# Batch size of a single GPU during validation
val_batch_size_per_gpu = 1
# Worker to pre-fetch data for each single GPU during validation
val_num_workers = 2
max_epochs = 300
num_last_epochs = 15
# -----model related-----
# The scaling factor that controls the depth of the network structure
deepen_factor = 0.33
# The scaling factor that controls the width of the network structure
widen_factor = 0.5
norm_cfg = dict(type='BN', momentum=0.03, eps=0.001)
# generate new random resize shape interval
batch_augments_interval = 10
# -----train val related-----
weight_decay = 0.0005
loss_cls_weight = 1.0
loss_bbox_weight = 5.0
loss_obj_weight = 1.0
loss_bbox_aux_weight = 1.0
center_radius = 2.5 # SimOTAAssigner
num_last_epochs = 15
random_affine_scaling_ratio_range = (0.1, 2)
mixup_ratio_range = (0.8, 1.6)
# Save model checkpoint and validation intervals
save_epoch_intervals = 10
# The maximum checkpoints to keep.
max_keep_ckpts = 3
ema_momentum = 0.0001
# ===============================Unmodified in most cases====================
# model settings
model = dict(
type='YOLODetector',
@ -29,14 +81,14 @@ model = dict(
# TODO: Waiting for mmengine support
use_syncbn=False,
data_preprocessor=dict(
type='mmdet.DetDataPreprocessor',
type='YOLOv5DetDataPreprocessor',
pad_size_divisor=32,
batch_augments=[
dict(
type='mmdet.BatchSyncRandomResize',
type='YOLOXBatchSyncRandomResize',
random_size_range=(480, 800),
size_divisor=32,
interval=10)
interval=batch_augments_interval)
]),
backbone=dict(
type='YOLOXCSPDarknet',
@ -44,7 +96,7 @@ model = dict(
widen_factor=widen_factor,
out_indices=(2, 3, 4),
spp_kernal_sizes=(5, 9, 13),
norm_cfg=dict(type='BN', momentum=0.03, eps=0.001),
norm_cfg=norm_cfg,
act_cfg=dict(type='SiLU', inplace=True),
),
neck=dict(
@ -53,51 +105,48 @@ model = dict(
widen_factor=widen_factor,
in_channels=[256, 512, 1024],
out_channels=256,
norm_cfg=dict(type='BN', momentum=0.03, eps=0.001),
norm_cfg=norm_cfg,
act_cfg=dict(type='SiLU', inplace=True)),
bbox_head=dict(
type='YOLOXHead',
head_module=dict(
type='YOLOXHeadModule',
num_classes=80,
num_classes=num_classes,
in_channels=256,
feat_channels=256,
widen_factor=widen_factor,
stacked_convs=2,
featmap_strides=(8, 16, 32),
use_depthwise=False,
norm_cfg=dict(type='BN', momentum=0.03, eps=0.001),
norm_cfg=norm_cfg,
act_cfg=dict(type='SiLU', inplace=True),
),
loss_cls=dict(
type='mmdet.CrossEntropyLoss',
use_sigmoid=True,
reduction='sum',
loss_weight=1.0),
loss_weight=loss_cls_weight),
loss_bbox=dict(
type='mmdet.IoULoss',
mode='square',
eps=1e-16,
reduction='sum',
loss_weight=5.0),
loss_weight=loss_bbox_weight),
loss_obj=dict(
type='mmdet.CrossEntropyLoss',
use_sigmoid=True,
reduction='sum',
loss_weight=1.0),
loss_weight=loss_obj_weight),
loss_bbox_aux=dict(
type='mmdet.L1Loss', reduction='sum', loss_weight=1.0)),
type='mmdet.L1Loss',
reduction='sum',
loss_weight=loss_bbox_aux_weight)),
train_cfg=dict(
assigner=dict(
type='mmdet.SimOTAAssigner',
center_radius=2.5,
center_radius=center_radius,
iou_calculator=dict(type='mmdet.BboxOverlaps2D'))),
test_cfg=dict(
yolox_style=True, # better
multi_label=True, # 40.5 -> 40.7
score_thr=0.001,
max_per_img=300,
nms=dict(type='nms', iou_threshold=0.65)))
test_cfg=model_test_cfg)
pre_transform = [
dict(type='LoadImageFromFile', file_client_args=_base_.file_client_args),
@ -113,13 +162,13 @@ train_pipeline_stage1 = [
pre_transform=pre_transform),
dict(
type='mmdet.RandomAffine',
scaling_ratio_range=(0.1, 2),
scaling_ratio_range=random_affine_scaling_ratio_range,
# img_scale is (width, height)
border=(-img_scale[0] // 2, -img_scale[1] // 2)),
dict(
type='YOLOXMixUp',
img_scale=img_scale,
ratio_range=(0.8, 1.6),
ratio_range=mixup_ratio_range,
pad_val=114.0,
pre_transform=pre_transform),
dict(type='mmdet.YOLOXHSVRandomAug'),
@ -155,14 +204,15 @@ train_pipeline_stage2 = [
train_dataloader = dict(
batch_size=train_batch_size_per_gpu,
num_workers=train_num_workers,
persistent_workers=True,
persistent_workers=persistent_workers,
pin_memory=True,
collate_fn=dict(type='yolov5_collate'),
sampler=dict(type='DefaultSampler', shuffle=True),
dataset=dict(
type=dataset_type,
data_root=data_root,
ann_file='annotations/instances_train2017.json',
data_prefix=dict(img='train2017/'),
ann_file=train_ann_file,
data_prefix=dict(img=train_data_prefix),
filter_cfg=dict(filter_empty_gt=False, min_size=32),
pipeline=train_pipeline_stage1))
@ -183,15 +233,15 @@ test_pipeline = [
val_dataloader = dict(
batch_size=val_batch_size_per_gpu,
num_workers=val_num_workers,
persistent_workers=True,
persistent_workers=persistent_workers,
pin_memory=True,
drop_last=False,
sampler=dict(type='DefaultSampler', shuffle=False),
dataset=dict(
type=dataset_type,
data_root=data_root,
ann_file='annotations/instances_val2017.json',
data_prefix=dict(img='val2017/'),
ann_file=val_ann_file,
data_prefix=dict(img=val_data_prefix),
test_mode=True,
pipeline=test_pipeline))
test_dataloader = val_dataloader
@ -200,18 +250,20 @@ test_dataloader = val_dataloader
val_evaluator = dict(
type='mmdet.CocoMetric',
proposal_nums=(100, 1, 10),
ann_file=data_root + 'annotations/instances_val2017.json',
ann_file=data_root + val_ann_file,
metric='bbox')
test_evaluator = val_evaluator
# optimizer
# default 8 gpu
base_lr = 0.01
optim_wrapper = dict(
type='OptimWrapper',
optimizer=dict(
type='SGD', lr=base_lr, momentum=0.9, weight_decay=5e-4,
type='SGD',
lr=base_lr,
momentum=0.9,
weight_decay=weight_decay,
nesterov=True),
paramwise_cfg=dict(norm_decay_mult=0., bias_decay_mult=0.))
@ -247,7 +299,10 @@ param_scheduler = [
default_hooks = dict(
checkpoint=dict(
type='CheckpointHook', interval=1, max_keep_ckpts=3, save_best='auto'))
type='CheckpointHook',
interval=save_epoch_intervals,
max_keep_ckpts=max_keep_ckpts,
save_best='auto'))
custom_hooks = [
dict(
@ -259,7 +314,7 @@ custom_hooks = [
dict(
type='EMAHook',
ema_type='ExpMomentumEMA',
momentum=0.0001,
momentum=ema_momentum,
update_buffers=True,
strict_load=False,
priority=49)
@ -271,6 +326,6 @@ train_cfg = dict(
val_interval=save_epoch_intervals,
dynamic_intervals=[(max_epochs - num_last_epochs, 1)])
auto_scale_lr = dict(base_batch_size=64)
auto_scale_lr = dict(base_batch_size=8 * train_batch_size_per_gpu)
val_cfg = dict(type='ValLoop')
test_cfg = dict(type='TestLoop')

View File

@ -1,24 +1,32 @@
_base_ = './yolox_s_8xb8-300e_coco.py'
_base_ = './yolox_s_fast_8xb32-300e-rtmdet-hyp_coco.py'
# ========================modified parameters======================
deepen_factor = 0.33
widen_factor = 0.375
# Multi-scale training intervals
# 10 -> 1
batch_augments_interval = 1
scaling_ratio_range = (0.5, 1.5)
# =======================Unmodified in most cases==================
img_scale = _base_.img_scale
pre_transform = _base_.pre_transform
# model settings
model = dict(
data_preprocessor=dict(batch_augments=[
dict(
type='mmdet.BatchSyncRandomResize',
random_size_range=(320, 640), # note
type='YOLOXBatchSyncRandomResize',
random_size_range=(320, 640),
size_divisor=32,
interval=10)
interval=batch_augments_interval)
]),
backbone=dict(deepen_factor=deepen_factor, widen_factor=widen_factor),
neck=dict(deepen_factor=deepen_factor, widen_factor=widen_factor),
bbox_head=dict(head_module=dict(widen_factor=widen_factor)))
img_scale = _base_.img_scale
pre_transform = _base_.pre_transform
train_pipeline_stage1 = [
*pre_transform,
dict(
@ -28,7 +36,7 @@ train_pipeline_stage1 = [
pre_transform=pre_transform),
dict(
type='mmdet.RandomAffine',
scaling_ratio_range=(0.5, 1.5), # note
scaling_ratio_range=scaling_ratio_range, # note
# img_scale is (width, height)
border=(-img_scale[0] // 2, -img_scale[1] // 2)),
dict(type='mmdet.YOLOXHSVRandomAug'),

View File

@ -0,0 +1,100 @@
_base_ = './yolox_s_fast_8xb8-300e_coco.py'
# ========================modified parameters======================
deepen_factor = 0.33
widen_factor = 0.375
scaling_ratio_range = (0.5, 1.5)
# =======================Unmodified in most cases==================
img_scale = _base_.img_scale
pre_transform = _base_.pre_transform
test_img_scale = (416, 416)
tta_img_scales = [test_img_scale, (320, 320), (640, 640)]
# model settings
model = dict(
data_preprocessor=dict(batch_augments=[
dict(
type='YOLOXBatchSyncRandomResize',
random_size_range=(320, 640),
size_divisor=32,
interval=10)
]),
backbone=dict(deepen_factor=deepen_factor, widen_factor=widen_factor),
neck=dict(deepen_factor=deepen_factor, widen_factor=widen_factor),
bbox_head=dict(head_module=dict(widen_factor=widen_factor)))
train_pipeline_stage1 = [
*pre_transform,
dict(
type='Mosaic',
img_scale=img_scale,
pad_val=114.0,
pre_transform=pre_transform),
dict(
type='mmdet.RandomAffine',
scaling_ratio_range=scaling_ratio_range, # note
# img_scale is (width, height)
border=(-img_scale[0] // 2, -img_scale[1] // 2)),
dict(type='mmdet.YOLOXHSVRandomAug'),
dict(type='mmdet.RandomFlip', prob=0.5),
dict(
type='mmdet.FilterAnnotations',
min_gt_bbox_wh=(1, 1),
keep_empty=False),
dict(
type='mmdet.PackDetInputs',
meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 'flip',
'flip_direction'))
]
test_pipeline = [
dict(type='LoadImageFromFile', file_client_args=_base_.file_client_args),
dict(type='mmdet.Resize', scale=test_img_scale, keep_ratio=True), # note
dict(
type='mmdet.Pad',
pad_to_square=True,
pad_val=dict(img=(114.0, 114.0, 114.0))),
dict(type='LoadAnnotations', with_bbox=True, _scope_='mmdet'),
dict(
type='mmdet.PackDetInputs',
meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
'scale_factor'))
]
train_dataloader = dict(dataset=dict(pipeline=train_pipeline_stage1))
val_dataloader = dict(dataset=dict(pipeline=test_pipeline))
test_dataloader = val_dataloader
# Config for Test Time Augmentation. (TTA)
tta_pipeline = [
dict(type='LoadImageFromFile', file_client_args=_base_.file_client_args),
dict(
type='TestTimeAug',
transforms=[
[
dict(type='mmdet.Resize', scale=s, keep_ratio=True)
for s in tta_img_scales
],
[
# ``RandomFlip`` must be placed before ``Pad``, otherwise
# bounding box coordinates after flipping cannot be
# recovered correctly.
dict(type='mmdet.RandomFlip', prob=1.),
dict(type='mmdet.RandomFlip', prob=0.)
],
[
dict(
type='mmdet.Pad',
pad_to_square=True,
pad_val=dict(img=(114.0, 114.0, 114.0))),
],
[
dict(
type='mmdet.PackDetInputs',
meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
'scale_factor', 'flip', 'flip_direction'))
]
])
]

View File

@ -1,8 +1,10 @@
_base_ = './yolox_s_8xb8-300e_coco.py'
_base_ = './yolox_s_fast_8xb8-300e_coco.py'
# ========================modified parameters======================
deepen_factor = 1.33
widen_factor = 1.25
# =======================Unmodified in most cases==================
# model settings
model = dict(
backbone=dict(deepen_factor=deepen_factor, widen_factor=widen_factor),

View File

@ -18,7 +18,6 @@ import mmcv
from mmengine import Config, DictAction, MessageHub
from mmengine.utils import ProgressBar
from mmyolo.utils import register_all_modules
from mmyolo.utils.boxam_utils import (BoxAMDetectorVisualizer,
BoxAMDetectorWrapper, DetAblationLayer,
DetBoxScoreTarget, GradCAM,
@ -47,6 +46,9 @@ IGNORE_LOSS_PARAMS = {
'yolov6': ['loss_cls'],
'yolox': ['loss_obj'],
'rtmdet': ['loss_cls'],
'yolov7': ['loss_obj'],
'yolov8': ['loss_cls'],
'ppyoloe': ['loss_cls'],
}
# This parameter is required in some algorithms
@ -177,8 +179,6 @@ def init_detector_and_visualizer(args, cfg):
def main():
register_all_modules()
args = parse_args()
# hard code

View File

@ -6,10 +6,10 @@ from typing import Sequence
import mmcv
from mmdet.apis import inference_detector, init_detector
from mmengine import Config, DictAction
from mmengine.registry import init_default_scope
from mmengine.utils import ProgressBar
from mmyolo.registry import VISUALIZERS
from mmyolo.utils import register_all_modules
from mmyolo.utils.misc import auto_arrange_images, get_file_list
@ -96,13 +96,12 @@ class ActivationsWrapper:
def main():
args = parse_args()
# register all modules in mmdet into the registries
register_all_modules()
cfg = Config.fromfile(args.config)
if args.cfg_options is not None:
cfg.merge_from_dict(args.cfg_options)
init_default_scope(cfg.get('default_scope', 'mmyolo'))
channel_reduction = args.channel_reduction
if channel_reduction == 'None':
channel_reduction = None

View File

@ -1,14 +1,16 @@
# Copyright (c) OpenMMLab. All rights reserved.
import os
from argparse import ArgumentParser
from pathlib import Path
import mmcv
from mmdet.apis import inference_detector, init_detector
from mmengine.config import Config, ConfigDict
from mmengine.logging import print_log
from mmengine.utils import ProgressBar, path
from mmyolo.registry import VISUALIZERS
from mmyolo.utils import register_all_modules, switch_to_deploy
from mmyolo.utils import switch_to_deploy
from mmyolo.utils.labelme_utils import LabelmeFormat
from mmyolo.utils.misc import get_file_list, show_data_classes
@ -29,6 +31,10 @@ def parse_args():
'--deploy',
action='store_true',
help='Switch model to deployment mode')
parser.add_argument(
'--tta',
action='store_true',
help='Whether to use test time augmentation')
parser.add_argument(
'--score-thr', type=float, default=0.3, help='Bbox score threshold')
parser.add_argument(
@ -50,12 +56,37 @@ def main():
if args.to_labelme and args.show:
raise RuntimeError('`--to-labelme` or `--show` only '
'can choose one at the same time.')
config = args.config
# register all modules in mmdet into the registries
register_all_modules()
if isinstance(config, (str, Path)):
config = Config.fromfile(config)
elif not isinstance(config, Config):
raise TypeError('config must be a filename or Config object, '
f'but got {type(config)}')
if 'init_cfg' in config.model.backbone:
config.model.backbone.init_cfg = None
if args.tta:
assert 'tta_model' in config, 'Cannot find ``tta_model`` in config.' \
" Can't use tta !"
assert 'tta_pipeline' in config, 'Cannot find ``tta_pipeline`` ' \
"in config. Can't use tta !"
config.model = ConfigDict(**config.tta_model, module=config.model)
test_data_cfg = config.test_dataloader.dataset
while 'dataset' in test_data_cfg:
test_data_cfg = test_data_cfg['dataset']
# batch_shapes_cfg will force control the size of the output image,
# it is not compatible with tta.
if 'batch_shapes_cfg' in test_data_cfg:
test_data_cfg.batch_shapes_cfg = None
test_data_cfg.pipeline = config.tta_pipeline
# TODO: TTA mode will error if cfg_options is not set.
# This is an mmdet issue and needs to be fixed later.
# build the model from a config file and a checkpoint file
model = init_detector(args.config, args.checkpoint, device=args.device)
model = init_detector(
config, args.checkpoint, device=args.device, cfg_options={})
if args.deploy:
switch_to_deploy(model)

View File

@ -14,10 +14,12 @@ python demo/large_image_demo.py \
import os
import random
from argparse import ArgumentParser
from pathlib import Path
import mmcv
import numpy as np
from mmdet.apis import inference_detector, init_detector
from mmengine.config import Config, ConfigDict
from mmengine.logging import print_log
from mmengine.utils import ProgressBar
@ -28,7 +30,7 @@ except ImportError:
'to install sahi first for large image inference.')
from mmyolo.registry import VISUALIZERS
from mmyolo.utils import register_all_modules, switch_to_deploy
from mmyolo.utils import switch_to_deploy
from mmyolo.utils.large_image import merge_results_by_nms, shift_predictions
from mmyolo.utils.misc import get_file_list
@ -50,13 +52,17 @@ def parse_args():
'--deploy',
action='store_true',
help='Switch model to deployment mode')
parser.add_argument(
'--tta',
action='store_true',
help='Whether to use test time augmentation')
parser.add_argument(
'--score-thr', type=float, default=0.3, help='Bbox score threshold')
parser.add_argument(
'--patch-size', type=int, default=640, help='The size of patches')
parser.add_argument(
'--patch-overlap-ratio',
type=int,
type=float,
default=0.25,
help='Ratio of overlap between two patches')
parser.add_argument(
@ -90,11 +96,37 @@ def parse_args():
def main():
args = parse_args()
# register all modules in mmdet into the registries
register_all_modules()
config = args.config
if isinstance(config, (str, Path)):
config = Config.fromfile(config)
elif not isinstance(config, Config):
raise TypeError('config must be a filename or Config object, '
f'but got {type(config)}')
if 'init_cfg' in config.model.backbone:
config.model.backbone.init_cfg = None
if args.tta:
assert 'tta_model' in config, 'Cannot find ``tta_model`` in config.' \
" Can't use tta !"
assert 'tta_pipeline' in config, 'Cannot find ``tta_pipeline`` ' \
"in config. Can't use tta !"
config.model = ConfigDict(**config.tta_model, module=config.model)
test_data_cfg = config.test_dataloader.dataset
while 'dataset' in test_data_cfg:
test_data_cfg = test_data_cfg['dataset']
# batch_shapes_cfg will force control the size of the output image,
# it is not compatible with tta.
if 'batch_shapes_cfg' in test_data_cfg:
test_data_cfg.batch_shapes_cfg = None
test_data_cfg.pipeline = config.tta_pipeline
# TODO: TTA mode will error if cfg_options is not set.
# This is an mmdet issue and needs to be fixed later.
# build the model from a config file and a checkpoint file
model = init_detector(args.config, args.checkpoint, device=args.device)
model = init_detector(
config, args.checkpoint, device=args.device, cfg_options={})
if args.deploy:
switch_to_deploy(model)
@ -238,7 +270,7 @@ def main():
src_image_shape=(height, width),
nms_cfg={
'type': args.merge_nms_type,
'iou_thr': args.merge_iou_thr
'iou_threshold': args.merge_iou_thr
})
visualizer.add_datasample(

View File

@ -20,7 +20,6 @@ from mmdet.apis import inference_detector, init_detector
from mmengine.utils import track_iter_progress
from mmyolo.registry import VISUALIZERS
from mmyolo.utils import register_all_modules
def parse_args():
@ -49,9 +48,6 @@ def main():
('Please specify at least one operation (save/show the '
'video) with the argument "--out" or "--show"')
# register all modules in mmdet into the registries
register_all_modules()
# build the model from a config file and a checkpoint file
model = init_detector(args.config, args.checkpoint, device=args.device)

View File

@ -26,7 +26,7 @@ RUN apt-get update \
# Install MMEngine , MMCV and MMDet
RUN pip install --no-cache-dir openmim && \
mim install --no-cache-dir "mmengine>=0.3.1" "mmcv>=2.0.0rc1,<2.1.0" "mmdet>=3.0.0rc5,<3.1.0"
mim install --no-cache-dir "mmengine>=0.6.0" "mmcv>=2.0.0rc4,<2.1.0" "mmdet>=3.0.0rc6,<3.1.0"
# Install MMYOLO
RUN git clone https://github.com/open-mmlab/mmyolo.git /mmyolo && \

View File

@ -30,7 +30,7 @@ RUN wget -q https://github.com/microsoft/onnxruntime/releases/download/v${ONNXRU
# Install OPENMIM MMENGINE MMDET
RUN pip install --no-cache-dir openmim \
&& mim install --no-cache-dir "mmengine>=0.3.1" "mmdet>=3.0.0rc5,<3.1.0" \
&& mim install --no-cache-dir "mmengine>=0.6.0" "mmdet>=3.0.0rc6,<3.1.0" \
&& mim install --no-cache-dir opencv-python==4.5.5.64 opencv-python-headless==4.5.5.64
RUN git clone https://github.com/open-mmlab/mmcv.git -b 2.x mmcv \

28
docs/README.md 100644
View File

@ -0,0 +1,28 @@
## Build Documentation
1. Clone MMYOLO
```bash
git clone https://github.com/open-mmlab/mmyolo.git
cd mmyolo
```
2. Install the building dependencies of documentation
```bash
pip install -r requirements/docs.txt
```
3. Change directory to `docs/en` or `docs/zh_cn`
```bash
cd docs/en # or docs/zh_cn
```
4. Build documentation
```bash
make html
```
5. Open `_build/html/index.html` with browser

View File

@ -0,0 +1 @@
# MMYOLO cross-library application

View File

@ -1,548 +0,0 @@
# How to xxx
This tutorial collects answers to any `How to xxx with MMYOLO`. Feel free to update this doc if you meet new questions about `How to` and find the answers!
## Add plugins to the backbone network
Please see [Plugins](plugins.md).
## Apply multiple Necks
If you want to stack multiple Necks, you can directly set the Neck parameters in the config. MMYOLO supports concatenating multiple Necks in the form of `List`. You need to ensure that the output channel of the previous Neck matches the input channel of the next Neck. If you need to adjust the number of channels, you can insert the `mmdet.ChannelMapper` module to align the number of channels between multiple Necks. The specific configuration is as follows:
```python
_base_ = './yolov5_s-v61_syncbn_8xb16-300e_coco.py'
deepen_factor = _base_.deepen_factor
widen_factor = _base_.widen_factor
model = dict(
type='YOLODetector',
neck=[
dict(
type='YOLOv5PAFPN',
deepen_factor=deepen_factor,
widen_factor=widen_factor,
in_channels=[256, 512, 1024],
out_channels=[256, 512, 1024], # The out_channels is controlled by widen_factorso the YOLOv5PAFPN's out_channels equls to out_channels * widen_factor
num_csp_blocks=3,
norm_cfg=dict(type='BN', momentum=0.03, eps=0.001),
act_cfg=dict(type='SiLU', inplace=True)),
dict(
type='mmdet.ChannelMapper',
in_channels=[128, 256, 512],
out_channels=128,
),
dict(
type='mmdet.DyHead',
in_channels=128,
out_channels=256,
num_blocks=2,
# disable zero_init_offset to follow official implementation
zero_init_offset=False)
]
bbox_head=dict(head_module=dict(in_channels=[512,512,512])) # The out_channels is controlled by widen_factorso the YOLOv5HeadModuled in_channels * widen_factor equals to the last neck's out_channels
)
```
## Replace the backbone network
```{note}
1. When using other backbone networks, you need to ensure that the output channels of the backbone network match the input channels of the neck network.
2. The configuration files given below only ensure that the training will work correctly, and their training performance may not be optimal. Because some backbones require specific learning rates, optimizers, and other hyperparameters. Related contents will be added in the "Training Tips" section later.
```
### Use backbone network implemented in MMYOLO
Suppose you want to use `YOLOv6EfficientRep` as the backbone network of `YOLOv5`, the example config is as the following:
```python
_base_ = './yolov5_s-v61_syncbn_8xb16-300e_coco.py'
model = dict(
backbone=dict(
type='YOLOv6EfficientRep',
norm_cfg=dict(type='BN', momentum=0.03, eps=0.001),
act_cfg=dict(type='ReLU', inplace=True))
)
```
### Use backbone network implemented in other OpenMMLab repositories
The model registry in MMYOLO, MMDetection, MMClassification, and MMSegmentation all inherit from the root registry in MMEngine in the OpenMMLab 2.0 system, allowing these repositories to directly use modules already implemented by each other. Therefore, in MMYOLO, users can use backbone networks from MMDetection and MMClassification without reimplementation.
#### Use backbone network implemented in MMDetection
1. Suppose you want to use `ResNet-50` as the backbone network of `YOLOv5`, the example config is as the following:
```python
_base_ = './yolov5_s-v61_syncbn_8xb16-300e_coco.py'
deepen_factor = _base_.deepen_factor
widen_factor = 1.0
channels = [512, 1024, 2048]
model = dict(
backbone=dict(
_delete_=True, # Delete the backbone field in _base_
type='mmdet.ResNet', # Using ResNet from mmdet
depth=50,
num_stages=4,
out_indices=(1, 2, 3),
frozen_stages=1,
norm_cfg=dict(type='BN', requires_grad=True),
norm_eval=True,
style='pytorch',
init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
neck=dict(
type='YOLOv5PAFPN',
widen_factor=widen_factor,
in_channels=channels, # Note: The 3 channels of ResNet-50 output are [512, 1024, 2048], which do not match the original yolov5-s neck and need to be changed.
out_channels=channels),
bbox_head=dict(
type='YOLOv5Head',
head_module=dict(
type='YOLOv5HeadModule',
in_channels=channels, # input channels of head need to be changed accordingly
widen_factor=widen_factor))
)
```
2. Suppose you want to use `SwinTransformer-Tiny` as the backbone network of `YOLOv5`, the example config is as the following:
```python
_base_ = './yolov5_s-v61_syncbn_8xb16-300e_coco.py'
deepen_factor = _base_.deepen_factor
widen_factor = 1.0
channels = [192, 384, 768]
checkpoint_file = 'https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_tiny_patch4_window7_224.pth' # noqa
model = dict(
backbone=dict(
_delete_=True, # Delete the backbone field in _base_
type='mmdet.SwinTransformer', # Using SwinTransformer from mmdet
embed_dims=96,
depths=[2, 2, 6, 2],
num_heads=[3, 6, 12, 24],
window_size=7,
mlp_ratio=4,
qkv_bias=True,
qk_scale=None,
drop_rate=0.,
attn_drop_rate=0.,
drop_path_rate=0.2,
patch_norm=True,
out_indices=(1, 2, 3),
with_cp=False,
convert_weights=True,
init_cfg=dict(type='Pretrained', checkpoint=checkpoint_file)),
neck=dict(
type='YOLOv5PAFPN',
deepen_factor=deepen_factor,
widen_factor=widen_factor,
in_channels=channels, # Note: The 3 channels of SwinTransformer-Tiny output are [192, 384, 768], which do not match the original yolov5-s neck and need to be changed.
out_channels=channels),
bbox_head=dict(
type='YOLOv5Head',
head_module=dict(
type='YOLOv5HeadModule',
in_channels=channels, # input channels of head need to be changed accordingly
widen_factor=widen_factor))
)
```
#### Use backbone network implemented in MMClassification
1. Suppose you want to use `ConvNeXt-Tiny` as the backbone network of `YOLOv5`, the example config is as the following:
```python
_base_ = './yolov5_s-v61_syncbn_8xb16-300e_coco.py'
# please run the command, mim install "mmcls>=1.0.0rc2", to install mmcls
# import mmcls.models to trigger register_module in mmcls
custom_imports = dict(imports=['mmcls.models'], allow_failed_imports=False)
checkpoint_file = 'https://download.openmmlab.com/mmclassification/v0/convnext/downstream/convnext-tiny_3rdparty_32xb128-noema_in1k_20220301-795e9634.pth' # noqa
deepen_factor = _base_.deepen_factor
widen_factor = 1.0
channels = [192, 384, 768]
model = dict(
backbone=dict(
_delete_=True, # Delete the backbone field in _base_
type='mmcls.ConvNeXt', # Using ConvNeXt from mmcls
arch='tiny',
out_indices=(1, 2, 3),
drop_path_rate=0.4,
layer_scale_init_value=1.0,
gap_before_final_norm=False,
init_cfg=dict(
type='Pretrained', checkpoint=checkpoint_file,
prefix='backbone.')), # The pre-trained weights of backbone network in MMCls have prefix='backbone.'. The prefix in the keys will be removed so that these weights can be normally loaded.
neck=dict(
type='YOLOv5PAFPN',
deepen_factor=deepen_factor,
widen_factor=widen_factor,
in_channels=channels, # Note: The 3 channels of ConvNeXt-Tiny output are [192, 384, 768], which do not match the original yolov5-s neck and need to be changed.
out_channels=channels),
bbox_head=dict(
type='YOLOv5Head',
head_module=dict(
type='YOLOv5HeadModule',
in_channels=channels, # input channels of head need to be changed accordingly
widen_factor=widen_factor))
)
```
2. Suppose you want to use `MobileNetV3-small` as the backbone network of `YOLOv5`, the example config is as the following:
```python
_base_ = './yolov5_s-v61_syncbn_8xb16-300e_coco.py'
# please run the command, mim install "mmcls>=1.0.0rc2", to install mmcls
# import mmcls.models to trigger register_module in mmcls
custom_imports = dict(imports=['mmcls.models'], allow_failed_imports=False)
checkpoint_file = 'https://download.openmmlab.com/mmclassification/v0/mobilenet_v3/convert/mobilenet_v3_small-8427ecf0.pth' # noqa
deepen_factor = _base_.deepen_factor
widen_factor = 1.0
channels = [24, 48, 96]
model = dict(
backbone=dict(
_delete_=True, # Delete the backbone field in _base_
type='mmcls.MobileNetV3', # Using MobileNetV3 from mmcls
arch='small',
out_indices=(3, 8, 11), # Modify out_indices
init_cfg=dict(
type='Pretrained',
checkpoint=checkpoint_file,
prefix='backbone.')), # The pre-trained weights of backbone network in MMCls have prefix='backbone.'. The prefix in the keys will be removed so that these weights can be normally loaded.
neck=dict(
type='YOLOv5PAFPN',
deepen_factor=deepen_factor,
widen_factor=widen_factor,
in_channels=channels, # Note: The 3 channels of MobileNetV3 output are [24, 48, 96], which do not match the original yolov5-s neck and need to be changed.
out_channels=channels),
bbox_head=dict(
type='YOLOv5Head',
head_module=dict(
type='YOLOv5HeadModule',
in_channels=channels, # input channels of head need to be changed accordingly
widen_factor=widen_factor))
)
```
#### Use backbone network in `timm` through MMClassification
MMClassification also provides a wrapper for the Py**T**orch **Im**age **M**odels (`timm`) backbone network, users can directly use the backbone network in `timm` through MMClassification. Suppose you want to use `EfficientNet-B1` as the backbone network of `YOLOv5`, the example config is as the following:
```python
_base_ = './yolov5_s-v61_syncbn_8xb16-300e_coco.py'
# please run the command, mim install "mmcls>=1.0.0rc2", to install mmcls
# and the command, pip install timm, to install timm
# import mmcls.models to trigger register_module in mmcls
custom_imports = dict(imports=['mmcls.models'], allow_failed_imports=False)
deepen_factor = _base_.deepen_factor
widen_factor = 1.0
channels = [40, 112, 320]
model = dict(
backbone=dict(
_delete_=True, # Delete the backbone field in _base_
type='mmcls.TIMMBackbone', # Using timm from mmcls
model_name='efficientnet_b1', # Using efficientnet_b1 in timm
features_only=True,
pretrained=True,
out_indices=(2, 3, 4)),
neck=dict(
type='YOLOv5PAFPN',
deepen_factor=deepen_factor,
widen_factor=widen_factor,
in_channels=channels, # Note: The 3 channels of EfficientNet-B1 output are [40, 112, 320], which do not match the original yolov5-s neck and need to be changed.
out_channels=channels),
bbox_head=dict(
type='YOLOv5Head',
head_module=dict(
type='YOLOv5HeadModule',
in_channels=channels, # input channels of head need to be changed accordingly
widen_factor=widen_factor))
)
```
#### Use backbone network implemented in MMSelfSup
Suppose you want to use `ResNet-50` which is self-supervised trained by `MoCo v3` in MMSelfSup as the backbone network of `YOLOv5`, the example config is as the following:
```python
_base_ = './yolov5_s-v61_syncbn_8xb16-300e_coco.py'
# please run the command, mim install "mmselfsup>=1.0.0rc3", to install mmselfsup
# import mmselfsup.models to trigger register_module in mmselfsup
custom_imports = dict(imports=['mmselfsup.models'], allow_failed_imports=False)
checkpoint_file = 'https://download.openmmlab.com/mmselfsup/1.x/mocov3/mocov3_resnet50_8xb512-amp-coslr-800e_in1k/mocov3_resnet50_8xb512-amp-coslr-800e_in1k_20220927-e043f51a.pth' # noqa
deepen_factor = _base_.deepen_factor
widen_factor = 1.0
channels = [512, 1024, 2048]
model = dict(
backbone=dict(
_delete_=True, # Delete the backbone field in _base_
type='mmselfsup.ResNet',
depth=50,
num_stages=4,
out_indices=(2, 3, 4), # Note: out_indices of ResNet in MMSelfSup are 1 larger than those in MMdet and MMCls
frozen_stages=1,
norm_cfg=dict(type='BN', requires_grad=True),
norm_eval=True,
style='pytorch',
init_cfg=dict(type='Pretrained', checkpoint=checkpoint_file)),
neck=dict(
type='YOLOv5PAFPN',
deepen_factor=deepen_factor,
widen_factor=widen_factor,
in_channels=channels, # Note: The 3 channels of ResNet-50 output are [512, 1024, 2048], which do not match the original yolov5-s neck and need to be changed.
out_channels=channels),
bbox_head=dict(
type='YOLOv5Head',
head_module=dict(
type='YOLOv5HeadModule',
in_channels=channels, # input channels of head need to be changed accordingly
widen_factor=widen_factor))
)
```
#### Don't used pre-training weights
When we replace the backbone network, the model initialization is trained by default loading the pre-training weight of the backbone network. Instead of using the pre-training weights of the backbone network, if you want to train the time model from scratch,
You can set `init_cfg` in 'backbone' to 'None'. In this case, the backbone network will be initialized with the default initialization method, instead of using the trained pre-training weight.
```python
_base_ = './yolov5_s-v61_syncbn_8xb16-300e_coco.py'
deepen_factor = _base_.deepen_factor
widen_factor = 1.0
channels = [512, 1024, 2048]
model = dict(
backbone=dict(
_delete_=True, # Delete the backbone field in _base_
type='mmdet.ResNet', # Using ResNet from mmdet
depth=50,
num_stages=4,
out_indices=(1, 2, 3),
frozen_stages=1,
norm_cfg=dict(type='BN', requires_grad=True),
norm_eval=True,
style='pytorch',
init_cfg=None # If init_cfg is set to None, backbone will not be initialized with pre-trained weights
),
neck=dict(
type='YOLOv5PAFPN',
widen_factor=widen_factor,
in_channels=channels, # Note: The 3 channels of ResNet-50 output are [512, 1024, 2048], which do not match the original yolov5-s neck and need to be changed.
out_channels=channels),
bbox_head=dict(
type='YOLOv5Head',
head_module=dict(
type='YOLOv5HeadModule',
in_channels=channels, # input channels of head need to be changed accordingly
widen_factor=widen_factor))
)
```
#### Freeze the weight of backbone or neck
In MMYOLO, we can freeze some `stages` of the backbone network by setting `frozen_stages` parameters, so that these `stage` parameters do not participate in model updating.
It should be noted that `frozen_stages = i` means that all parameters from the initial `stage` to the `i`<sup>th</sup> `stage` will be frozen. The following is an example of `YOLOv5`. Other algorithms are the same logic.
```python
_base_ = './yolov5_s-v61_syncbn_8xb16-300e_coco.py'
model = dict(
backbone=dict(
frozen_stages=1 # Indicates that the parameters in the first stage and all stages before it are frozen
))
```
In addition, it's able to freeze the whole `neck` with the parameter `freeze_all` in MMYOLO. The following is an example of `YOLOv5`. Other algorithms are the same logic.
```python
_base_ = './yolov5_s-v61_syncbn_8xb16-300e_coco.py'
model = dict(
neck=dict(
freeze_all=True # If freeze_all=True, all parameters of the neck will be frozen
))
```
## Output prediction results
If you want to save the prediction results as a specific file for offline evaluation, MMYOLO currently supports both json and pkl formats.
```{note}
The json file only save `image_id`, `bbox`, `score` and `category_id`. The json file can be read using the json library.
The pkl file holds more content than the json file, and also holds information such as the file name and size of the predicted image; the pkl file can be read using the pickle library. The pkl file can be read using the pickle library.
```
### Output into json file
If you want to output the prediction results as a json file, the command is as follows.
```shell
python tools/test.py {path_to_config} {path_to_checkpoint} --json-prefix {json_prefix}
```
The argument after `--json-prefix` should be a filename prefix (no need to enter the `.json` suffix) and can also contain a path. For a concrete example:
```shell
python tools/test.py configs\yolov5\yolov5_s-v61_syncbn_8xb16-300e_coco.py yolov5_s-v61_syncbn_fast_8xb16-300e_coco_20220918_084700-86e02187.pth --json-prefix work_dirs/demo/json_demo
```
Running the above command will output the `json_demo.bbox.json` file in the `work_dirs/demo` folder.
### Output into pkl file
If you want to output the prediction results as a pkl file, the command is as follows.
```shell
python tools/test.py {path_to_config} {path_to_checkpoint} --out {path_to_output_file}
```
The argument after `--out` should be a full filename (**must be** with a `.pkl` or `.pickle` suffix) and can also contain a path. For a concrete example:
```shell
python tools/test.py configs\yolov5\yolov5_s-v61_syncbn_8xb16-300e_coco.py yolov5_s-v61_syncbn_fast_8xb16-300e_coco_20220918_084700-86e02187.pth --out work_dirs/demo/pkl_demo.pkl
```
Running the above command will output the `pkl_demo.pkl` file in the `work_dirs/demo` folder.
## Use mim to run scripts from other OpenMMLab repositories
```{note}
1. All script calls across libraries are currently not supported and are being fixed. More examples will be added to this document when the fix is complete. 2.
2. mAP plotting and average training speed calculation are fixed in the MMDetection dev-3.x branch, which currently needs to be installed via the source code to be run successfully.
```
## Log Analysis
#### Curve plotting
`tools/analysis_tools/analyze_logs.py` plots loss/mAP curves given a training log file. Run `pip install seaborn` first to install the dependency.
```shell
mim run mmdet analyze_logs plot_curve \
${LOG} \ # path of train log in json format
[--keys ${KEYS}] \ # the metric that you want to plot, default to 'bbox_mAP'
[--start-epoch ${START_EPOCH}] # the epoch that you want to start, default to 1
[--eval-interval ${EVALUATION_INTERVAL}] \ # the evaluation interval when training, default to 1
[--title ${TITLE}] \ # title of figure
[--legend ${LEGEND}] \ # legend of each plot, default to None
[--backend ${BACKEND}] \ # backend of plt, default to None
[--style ${STYLE}] \ # style of plt, default to 'dark'
[--out ${OUT_FILE}] # the path of output file
# [] stands for optional parameters, when actually entering the command line, you do not need to enter []
```
Examples:
- Plot the classification loss of some run.
```shell
mim run mmdet analyze_logs plot_curve \
yolov5_s-v61_syncbn_fast_8xb16-300e_coco_20220918_084700.log.json \
--keys loss_cls \
--legend loss_cls
```
<img src="https://user-images.githubusercontent.com/27466624/204747359-754555df-1f97-4d5c-87ca-9ad3a0badcce.png" width="600"/>
- Plot the classification and regression loss of some run, and save the figure to a pdf.
```shell
mim run mmdet analyze_logs plot_curve \
yolov5_s-v61_syncbn_fast_8xb16-300e_coco_20220918_084700.log.json \
--keys loss_cls loss_bbox \
--legend loss_cls loss_bbox \
--out losses_yolov5_s.pdf
```
<img src="https://user-images.githubusercontent.com/27466624/204748560-2d17ce4b-fb5f-4732-a962-329109e73aad.png" width="600"/>
- Compare the bbox mAP of two runs in the same figure.
```shell
mim run mmdet analyze_logs plot_curve \
yolov5_s-v61_syncbn_fast_8xb16-300e_coco_20220918_084700.log.json \
yolov5_n-v61_syncbn_fast_8xb16-300e_coco_20220919_090739.log.json \
--keys bbox_mAP \
--legend yolov5_s yolov5_n \
--eval-interval 10 # Note that the evaluation interval must be the same as during training. Otherwise, it will raise an error.
```
<img src="https://user-images.githubusercontent.com/27466624/204748704-21db9f9e-386e-449c-91c7-2ce3f8b51f24.png" width="600"/>
#### Compute the average training speed
```shell
mim run mmdet analyze_logs cal_train_time \
${LOG} \ # path of train log in json format
[--include-outliers] # include the first value of every epoch when computing the average time
```
Examples:
```shell
mim run mmdet analyze_logs cal_train_time \
yolov5_s-v61_syncbn_fast_8xb16-300e_coco_20220918_084700.log.json
```
The output is expected to be like the following.
```text
-----Analyze train time of yolov5_s-v61_syncbn_fast_8xb16-300e_coco_20220918_084700.log.json-----
slowest epoch 278, average time is 0.1705 s/iter
fastest epoch 300, average time is 0.1510 s/iter
time std over epochs is 0.0026
average iter time: 0.1556 s/iter
```
### Print the whole config
`print_config.py` in MMDetection prints the whole config verbatim, expanding all its imports. The command is as following.
```shell
mim run mmdet print_config \
${CONFIG} \ # path of the config file
[--save-path] \ # save path of whole config, suffixed with .py, .json or .yml
[--cfg-options ${OPTIONS [OPTIONS...]}] # override some settings in the used config
```
Examples:
```shell
mim run mmdet print_config \
configs/yolov5/yolov5_s-v61_syncbn_fast_1xb4-300e_balloon.py \
--save-path ./work_dirs/yolov5_s-v61_syncbn_fast_1xb4-300e_balloon.py
```
Running the above command will save the `yolov5_s-v61_syncbn_fast_1xb4-300e_balloon.py` config file with the inheritance relationship expanded to \`\`yolov5_s-v61_syncbn_fast_1xb4-300e_balloon_whole.py`in the`./work_dirs\` folder.
## Set the random seed
If you want to set the random seed during training, you can use the following command.
```shell
python ./tools/train.py \
${CONFIG} \ # path of the config file
--cfg-options randomness.seed=2023 \ # set seed to 2023
[randomness.diff_rank_seed=True] \ # set different seeds according to global rank
[randomness.deterministic=True] # set the deterministic option for CUDNN backend
# [] stands for optional parameters, when actually entering the command line, you do not need to enter []
```
`randomness` has three parameters that can be set, with the following meanings.
- `randomness.seed=2023`, set the random seed to 2023.
- `randomness.diff_rank_seed=True`, set different seeds according to global rank. Defaults to False.
- `randomness.deterministic=True`, set the deterministic option for cuDNN backend, i.e., set `torch.backends.cudnn.deterministic` to True and `torch.backends.cudnn.benchmark` to False. Defaults to False. See https://pytorch.org/docs/stable/notes/randomness.html for more details.

View File

@ -1,25 +0,0 @@
Data flow
************************
.. toctree::
:maxdepth: 1
data_flow.md
How to
************************
.. toctree::
:maxdepth: 1
how_to.md
Plugins
************************
.. toctree::
:maxdepth: 1
plugins.md

Some files were not shown because too many files have changed in this diff Show More