mirror of https://github.com/open-mmlab/mmyolo.git
commit
dc85144fab
.circleci
.dev_scripts
configs
_base_
rtmdet
rotated
yolov5
yolov6
yolov8
yolox
docs
en/advanced_guides
|
@ -67,7 +67,7 @@ jobs:
|
|||
command: |
|
||||
pip install -U openmim
|
||||
mim install git+https://github.com/open-mmlab/mmengine.git@main
|
||||
mim install 'mmcv >= 2.0.0rc1'
|
||||
mim install 'mmcv >= 2.0.0rc4'
|
||||
mim install git+https://github.com/open-mmlab/mmdetection.git@dev-3.x
|
||||
pip install -r requirements/albu.txt
|
||||
pip install -r requirements/tests.txt
|
||||
|
@ -125,7 +125,7 @@ jobs:
|
|||
command: |
|
||||
docker exec mmyolo pip install -U openmim
|
||||
docker exec mmyolo mim install -e /mmengine
|
||||
docker exec mmyolo mim install 'mmcv >= 2.0.0rc1'
|
||||
docker exec mmyolo mim install 'mmcv >= 2.0.0rc4'
|
||||
docker exec mmyolo pip install -e /mmdetection
|
||||
docker exec mmyolo pip install -r requirements/albu.txt
|
||||
docker exec mmyolo pip install -r requirements/tests.txt
|
||||
|
|
|
@ -110,6 +110,7 @@ def get_dataset_name(config):
|
|||
CocoDataset='COCO',
|
||||
YOLOv5CocoDataset='COCO',
|
||||
CocoPanopticDataset='COCO',
|
||||
YOLOv5DOTADataset='DOTA 1.0',
|
||||
DeepFashionDataset='Deep Fashion',
|
||||
LVISV05Dataset='LVIS v0.5',
|
||||
LVISV1Dataset='LVIS v1',
|
||||
|
|
|
@ -0,0 +1,448 @@
|
|||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
import argparse
|
||||
import importlib
|
||||
import os
|
||||
import os.path as osp
|
||||
import pkgutil
|
||||
import sys
|
||||
import tempfile
|
||||
from multiprocessing import Pool
|
||||
from pathlib import Path
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
# host_addr = 'https://gitee.com/open-mmlab'
|
||||
host_addr = 'https://github.com/open-mmlab'
|
||||
tools_list = ['tools', '.dev_scripts']
|
||||
proxy_names = {
|
||||
'mmdet': 'mmdetection',
|
||||
'mmseg': 'mmsegmentation',
|
||||
'mmcls': 'mmclassification'
|
||||
}
|
||||
merge_module_keys = {'mmcv': ['mmengine']}
|
||||
# exclude_prefix = {'mmcv': ['<class \'mmengine.model.']}
|
||||
exclude_prefix = {}
|
||||
markdown_title = '# MM 系列开源库注册表\n'
|
||||
markdown_title += '(注意:本文档是通过 .dev_scripts/print_registers.py 脚本自动生成)'
|
||||
|
||||
|
||||
def capitalize(repo_name):
|
||||
lower = repo_name.lower()
|
||||
if lower == 'mmcv':
|
||||
return repo_name.upper()
|
||||
elif lower.startswith('mm'):
|
||||
return 'MM' + repo_name[2:]
|
||||
return repo_name.capitalize()
|
||||
|
||||
|
||||
def mkdir_or_exist(dir_name, mode=0o777):
|
||||
if dir_name == '':
|
||||
return
|
||||
dir_name = osp.expanduser(dir_name)
|
||||
os.makedirs(dir_name, mode=mode, exist_ok=True)
|
||||
|
||||
|
||||
def parse_repo_name(repo_name):
|
||||
proxy_names_rev = dict(zip(proxy_names.values(), proxy_names.keys()))
|
||||
repo_name = proxy_names.get(repo_name, repo_name)
|
||||
module_name = proxy_names_rev.get(repo_name, repo_name)
|
||||
return repo_name, module_name
|
||||
|
||||
|
||||
def git_pull_branch(repo_name, branch_name='', pulldir='.'):
|
||||
mkdir_or_exist(pulldir)
|
||||
exec_str = f'cd {pulldir};git init;git pull '
|
||||
exec_str += f'{host_addr}/{repo_name}.git'
|
||||
if branch_name:
|
||||
exec_str += f' {branch_name}'
|
||||
returncode = os.system(exec_str)
|
||||
if returncode:
|
||||
raise RuntimeError(
|
||||
f'failed to get the remote repo, code: {returncode}')
|
||||
|
||||
|
||||
def load_modules_from_dir(module_name, module_root, throw_error=False):
|
||||
print(f'loading the {module_name} modules...')
|
||||
# # install the dependencies
|
||||
# if osp.exists(osp.join(pkg_dir, 'requirements.txt')):
|
||||
# os.system('pip install -r requirements.txt')
|
||||
# get all module list
|
||||
module_list = []
|
||||
error_dict = {}
|
||||
module_root = osp.join(module_root, module_name)
|
||||
assert osp.exists(module_root), \
|
||||
f'cannot find the module root: {module_root}'
|
||||
for _root, _dirs, _files in os.walk(module_root):
|
||||
if (('__init__.py' not in _files)
|
||||
and (osp.split(_root)[1] != '__pycache__')):
|
||||
# add __init__.py file to the package
|
||||
with open(osp.join(_root, '__init__.py'), 'w') as _:
|
||||
pass
|
||||
|
||||
def _onerror(*args, **kwargs):
|
||||
pass
|
||||
|
||||
for _finder, _name, _ispkg in pkgutil.walk_packages([module_root],
|
||||
prefix=module_name +
|
||||
'.',
|
||||
onerror=_onerror):
|
||||
try:
|
||||
module = importlib.import_module(_name)
|
||||
module_list.append(module)
|
||||
except Exception as e:
|
||||
if throw_error:
|
||||
raise e
|
||||
_error_msg = f'{type(e)}: {e}.'
|
||||
print(f'cannot import the module: {_name} ({_error_msg})')
|
||||
assert (_name not in error_dict), \
|
||||
f'duplicate error name was found: {_name}'
|
||||
error_dict[_name] = _error_msg
|
||||
for module in module_list:
|
||||
assert module.__file__.startswith(module_root), \
|
||||
f'the importing path of package was wrong: {module.__file__}'
|
||||
print('modules were loaded...')
|
||||
return module_list, error_dict
|
||||
|
||||
|
||||
def get_registries_from_modules(module_list):
|
||||
registries = {}
|
||||
objects_set = set()
|
||||
# import the Registry class,
|
||||
# import at the beginning is not allowed
|
||||
# because it is not the temp package
|
||||
from mmengine.registry import Registry
|
||||
|
||||
# only get the specific registries in module list
|
||||
for module in module_list:
|
||||
for obj_name in dir(module):
|
||||
_obj = getattr(module, obj_name)
|
||||
if isinstance(_obj, Registry):
|
||||
objects_set.add(_obj)
|
||||
for _obj in objects_set:
|
||||
if _obj.scope not in registries:
|
||||
registries[_obj.scope] = {}
|
||||
registries_scope = registries[_obj.scope]
|
||||
assert _obj.name not in registries_scope, \
|
||||
f'multiple definition of {_obj.name} in registries'
|
||||
registries_scope[_obj.name] = {
|
||||
key: str(val)
|
||||
for key, val in _obj.module_dict.items()
|
||||
}
|
||||
print('registries got...')
|
||||
return registries
|
||||
|
||||
|
||||
def merge_registries(src_dict, dst_dict):
|
||||
assert type(src_dict) == type(dst_dict), \
|
||||
(f'merge type is not supported: '
|
||||
f'{type(dst_dict)} and {type(src_dict)}')
|
||||
if isinstance(src_dict, str):
|
||||
return
|
||||
for _k, _v in dst_dict.items():
|
||||
if (_k not in src_dict):
|
||||
src_dict.update({_k: _v})
|
||||
else:
|
||||
assert isinstance(_v, (dict, str)) and \
|
||||
isinstance(src_dict[_k], (dict, str)), \
|
||||
'merge type is not supported: ' \
|
||||
f'{type(_v)} and {type(src_dict[_k])}'
|
||||
merge_registries(src_dict[_k], _v)
|
||||
|
||||
|
||||
def exclude_registries(registries, exclude_key):
|
||||
for _k in list(registries.keys()):
|
||||
_v = registries[_k]
|
||||
if isinstance(_v, str) and _v.startswith(exclude_key):
|
||||
registries.pop(_k)
|
||||
elif isinstance(_v, dict):
|
||||
exclude_registries(_v, exclude_key)
|
||||
|
||||
|
||||
def get_scripts_from_dir(root):
|
||||
|
||||
def _recurse(_dict, _chain):
|
||||
if len(_chain) <= 1:
|
||||
_dict[_chain[0]] = None
|
||||
return
|
||||
_key, *_chain = _chain
|
||||
if _key not in _dict:
|
||||
_dict[_key] = {}
|
||||
_recurse(_dict[_key], _chain)
|
||||
|
||||
# find all scripts in the root directory. (not just ('.py', '.sh'))
|
||||
# can not use the scandir function in mmengine to scan the dir,
|
||||
# because mmengine import is not allowed before git pull
|
||||
scripts = {}
|
||||
for _subroot, _dirs, _files in os.walk(root):
|
||||
for _file in _files:
|
||||
_script = osp.join(osp.relpath(_subroot, root), _file)
|
||||
_recurse(scripts, Path(_script).parts)
|
||||
return scripts
|
||||
|
||||
|
||||
def get_version_from_module_name(module_name, branch):
|
||||
branch_str = str(branch) if branch is not None else ''
|
||||
version_str = ''
|
||||
try:
|
||||
exec(f'import {module_name}')
|
||||
_module = eval(f'{module_name}')
|
||||
if hasattr(_module, '__version__'):
|
||||
version_str = str(_module.__version__)
|
||||
else:
|
||||
version_str = branch_str
|
||||
version_str = f' ({version_str})' if version_str else version_str
|
||||
except (ImportError, AttributeError) as e:
|
||||
print(f'can not get the version of module {module_name}: {e}')
|
||||
return version_str
|
||||
|
||||
|
||||
def print_tree(print_dict):
|
||||
# recursive print the dict tree
|
||||
def _recurse(_dict, _connector='', n=0):
|
||||
assert isinstance(_dict, dict), 'recursive type must be dict'
|
||||
tree = ''
|
||||
for idx, (_key, _val) in enumerate(_dict.items()):
|
||||
sub_tree = ''
|
||||
_last = (idx == (len(_dict) - 1))
|
||||
if isinstance(_val, str):
|
||||
_key += f' ({_val})'
|
||||
elif isinstance(_val, dict):
|
||||
sub_tree = _recurse(_val,
|
||||
_connector + (' ' if _last else '│ '),
|
||||
n + 1)
|
||||
else:
|
||||
assert (_val is None), f'unknown print type {_val}'
|
||||
tree += ' ' + _connector + \
|
||||
('└─' if _last else '├─') + f'({n}) {_key}' + '\n'
|
||||
tree += sub_tree
|
||||
return tree
|
||||
|
||||
for _pname, _pdict in print_dict.items():
|
||||
print('-' * 100)
|
||||
print(f'{_pname}\n' + _recurse(_pdict))
|
||||
|
||||
|
||||
def divide_list_into_groups(_array, _maxsize_per_group):
|
||||
if not _array:
|
||||
return _array
|
||||
_groups = np.asarray(len(_array) / _maxsize_per_group)
|
||||
if len(_array) % _maxsize_per_group:
|
||||
_groups = np.floor(_groups) + 1
|
||||
_groups = _groups.astype(int)
|
||||
return np.array_split(_array, _groups)
|
||||
|
||||
|
||||
def registries_to_html(registries, title=''):
|
||||
max_col_per_row = 5
|
||||
max_size_per_cell = 20
|
||||
html = ''
|
||||
table_data = []
|
||||
# save repository registries
|
||||
for registry_name, registry_dict in registries.items():
|
||||
# filter the empty registries
|
||||
if not registry_dict:
|
||||
continue
|
||||
registry_strings = []
|
||||
if isinstance(registry_dict, dict):
|
||||
registry_dict = list(registry_dict.keys())
|
||||
elif isinstance(registry_dict, list):
|
||||
pass
|
||||
else:
|
||||
raise TypeError(
|
||||
f'unknown type of registry_dict {type(registry_dict)}')
|
||||
for _k in registry_dict:
|
||||
registry_strings.append(f'<li>{_k}</li>')
|
||||
table_data.append((registry_name, registry_strings))
|
||||
|
||||
# sort the data list
|
||||
table_data = sorted(table_data, key=lambda x: len(x[1]))
|
||||
# split multi parts
|
||||
table_data_multi_parts = []
|
||||
for (registry_name, registry_strings) in table_data:
|
||||
multi_parts = False
|
||||
if len(registry_strings) > max_size_per_cell:
|
||||
multi_parts = True
|
||||
for cell_idx, registry_cell in enumerate(
|
||||
divide_list_into_groups(registry_strings, max_size_per_cell)):
|
||||
registry_str = ''.join(registry_cell.tolist())
|
||||
registry_str = f'<ul>{registry_str}</ul>'
|
||||
table_data_multi_parts.append([
|
||||
registry_name if not multi_parts else
|
||||
f'{registry_name} (part {cell_idx + 1})', registry_str
|
||||
])
|
||||
|
||||
for table_data in divide_list_into_groups(table_data_multi_parts,
|
||||
max_col_per_row):
|
||||
table_data = list(zip(*table_data.tolist()))
|
||||
html += dataframe_to_html(
|
||||
pd.DataFrame([table_data[1]], columns=table_data[0]))
|
||||
if html:
|
||||
html = f'<div align=\'center\'><b>{title}</b></div>\n{html}'
|
||||
html = f'<details open>{html}</details>\n'
|
||||
return html
|
||||
|
||||
|
||||
def tools_to_html(tools_dict, repo_name=''):
|
||||
|
||||
def _recurse(_dict, _connector, _result):
|
||||
assert isinstance(_dict, dict), \
|
||||
f'unknown recurse type: {_dict} ({type(_dict)})'
|
||||
for _k, _v in _dict.items():
|
||||
if _v is None:
|
||||
if _connector not in _result:
|
||||
_result[_connector] = []
|
||||
_result[_connector].append(_k)
|
||||
else:
|
||||
_recurse(_v, osp.join(_connector, _k), _result)
|
||||
|
||||
table_data = {}
|
||||
title = f'{capitalize(repo_name)} Tools'
|
||||
_recurse(tools_dict, '', table_data)
|
||||
return registries_to_html(table_data, title)
|
||||
|
||||
|
||||
def dataframe_to_html(dataframe):
|
||||
styler = dataframe.style
|
||||
styler = styler.hide(axis='index')
|
||||
styler = styler.format(na_rep='-')
|
||||
styler = styler.set_properties(**{
|
||||
'text-align': 'left',
|
||||
'align': 'center',
|
||||
'vertical-align': 'top'
|
||||
})
|
||||
styler = styler.set_table_styles([{
|
||||
'selector':
|
||||
'thead th',
|
||||
'props':
|
||||
'align:center;text-align:center;vertical-align:bottom'
|
||||
}])
|
||||
html = styler.to_html()
|
||||
html = f'<div align=\'center\'>\n{html}</div>'
|
||||
return html
|
||||
|
||||
|
||||
def generate_markdown_by_repository(repo_name,
|
||||
module_name,
|
||||
branch,
|
||||
pulldir,
|
||||
throw_error=False):
|
||||
# add the pull dir to the system path so that it can be found
|
||||
if pulldir not in sys.path:
|
||||
sys.path.insert(0, pulldir)
|
||||
module_list, error_dict = load_modules_from_dir(
|
||||
module_name, pulldir, throw_error=throw_error)
|
||||
registries_tree = get_registries_from_modules(module_list)
|
||||
if error_dict:
|
||||
error_dict_name = 'error_modules'
|
||||
assert (error_dict_name not in registries_tree), \
|
||||
f'duplicate module name was found: {error_dict_name}'
|
||||
registries_tree.update({error_dict_name: error_dict})
|
||||
# get the tools files
|
||||
for tools_name in tools_list:
|
||||
assert (tools_name not in registries_tree), \
|
||||
f'duplicate tools name was found: {tools_name}'
|
||||
tools_tree = osp.join(pulldir, tools_name)
|
||||
tools_tree = get_scripts_from_dir(tools_tree)
|
||||
registries_tree.update({tools_name: tools_tree})
|
||||
# print_tree(registries_tree)
|
||||
# get registries markdown string
|
||||
module_registries = registries_tree.get(module_name, {})
|
||||
for merge_key in merge_module_keys.get(module_name, []):
|
||||
merge_dict = registries_tree.get(merge_key, {})
|
||||
merge_registries(module_registries, merge_dict)
|
||||
for exclude_key in exclude_prefix.get(module_name, []):
|
||||
exclude_registries(module_registries, exclude_key)
|
||||
markdown_str = registries_to_html(
|
||||
module_registries, title=f'{capitalize(repo_name)} Module Components')
|
||||
# get tools markdown string
|
||||
tools_registries = {}
|
||||
for tools_name in tools_list:
|
||||
tools_registries.update(
|
||||
{tools_name: registries_tree.get(tools_name, {})})
|
||||
markdown_str += tools_to_html(tools_registries, repo_name=repo_name)
|
||||
version_str = get_version_from_module_name(module_name, branch)
|
||||
title_str = f'\n\n## {capitalize(repo_name)}{version_str}\n'
|
||||
# remove the pull dir from system path
|
||||
if pulldir in sys.path:
|
||||
sys.path.remove(pulldir)
|
||||
return f'{title_str}{markdown_str}'
|
||||
|
||||
|
||||
def parse_args():
|
||||
parser = argparse.ArgumentParser(
|
||||
description='print registries in openmmlab repositories')
|
||||
parser.add_argument(
|
||||
'-r',
|
||||
'--repositories',
|
||||
nargs='+',
|
||||
default=['mmdet', 'mmcls', 'mmseg', 'mmengine', 'mmcv'],
|
||||
type=str,
|
||||
help='git repositories name in OpenMMLab')
|
||||
parser.add_argument(
|
||||
'-b',
|
||||
'--branches',
|
||||
nargs='+',
|
||||
default=['3.x', '1.x', '1.x', 'main', '2.x'],
|
||||
type=str,
|
||||
help='the branch names of git repositories, the length of branches '
|
||||
'must be same as the length of repositories')
|
||||
parser.add_argument(
|
||||
'-o', '--out', type=str, default='.', help='output path of the file')
|
||||
parser.add_argument(
|
||||
'--throw-error',
|
||||
action='store_true',
|
||||
default=False,
|
||||
help='whether to throw error when trying to import modules')
|
||||
args = parser.parse_args()
|
||||
return args
|
||||
|
||||
|
||||
# TODO: Refine
|
||||
def main():
|
||||
args = parse_args()
|
||||
repositories = args.repositories
|
||||
branches = args.branches
|
||||
assert isinstance(repositories, list), \
|
||||
'Type of repositories must be list'
|
||||
if branches is None:
|
||||
branches = [None] * len(repositories)
|
||||
assert isinstance(branches, list) and \
|
||||
len(branches) == len(repositories), \
|
||||
'The length of branches must be same as ' \
|
||||
'that of repositories'
|
||||
assert isinstance(args.out, str), \
|
||||
'The type of output path must be string'
|
||||
# save path of file
|
||||
mkdir_or_exist(args.out)
|
||||
save_path = osp.join(args.out, 'registries_info.md')
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
# multi process init
|
||||
pool = Pool(processes=len(repositories))
|
||||
multi_proc_input_list = []
|
||||
multi_proc_output_list = []
|
||||
# get the git repositories
|
||||
for branch, repository in zip(branches, repositories):
|
||||
repo_name, module_name = parse_repo_name(repository)
|
||||
pulldir = osp.join(tmpdir, f'tmp_{repo_name}')
|
||||
git_pull_branch(
|
||||
repo_name=repo_name, branch_name=branch, pulldir=pulldir)
|
||||
multi_proc_input_list.append(
|
||||
(repo_name, module_name, branch, pulldir, args.throw_error))
|
||||
print('starting the multi process to get the registries')
|
||||
for multi_proc_input in multi_proc_input_list:
|
||||
multi_proc_output_list.append(
|
||||
pool.apply_async(generate_markdown_by_repository,
|
||||
multi_proc_input))
|
||||
pool.close()
|
||||
pool.join()
|
||||
with open(save_path, 'w', encoding='utf-8') as fw:
|
||||
fw.write(f'{markdown_title}\n')
|
||||
for multi_proc_output in multi_proc_output_list:
|
||||
markdown_str = multi_proc_output.get()
|
||||
fw.write(f'{markdown_str}\n')
|
||||
print(f'saved registries to the path: {save_path}')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
|
@ -5,7 +5,7 @@ repos:
|
|||
hooks:
|
||||
- id: flake8
|
||||
- repo: https://gitee.com/openmmlab/mirrors-isort
|
||||
rev: 5.10.1
|
||||
rev: 5.11.5
|
||||
hooks:
|
||||
- id: isort
|
||||
- repo: https://gitee.com/openmmlab/mirrors-yapf
|
||||
|
|
|
@ -5,7 +5,7 @@ repos:
|
|||
hooks:
|
||||
- id: flake8
|
||||
- repo: https://github.com/PyCQA/isort
|
||||
rev: 5.10.1
|
||||
rev: 5.11.5
|
||||
hooks:
|
||||
- id: isort
|
||||
- repo: https://github.com/pre-commit/mirrors-yapf
|
||||
|
|
|
@ -6,4 +6,3 @@ python:
|
|||
version: 3.7
|
||||
install:
|
||||
- requirements: requirements/docs.txt
|
||||
- requirements: requirements/readthedocs.txt
|
||||
|
|
195
README.md
195
README.md
|
@ -1,5 +1,5 @@
|
|||
<div align="center">
|
||||
<img width="100%" src="https://user-images.githubusercontent.com/27466624/213130448-1f8529fd-2247-4ac4-851c-acd0148a49b9.png"/>
|
||||
<img width="100%" src="https://user-images.githubusercontent.com/27466624/222385101-516e551c-49f5-480d-a135-4b24ee6dc308.png"/>
|
||||
<div> </div>
|
||||
<div align="center">
|
||||
<b><font size="5">OpenMMLab website</font></b>
|
||||
|
@ -21,13 +21,13 @@
|
|||
[](https://pypi.org/project/mmyolo)
|
||||
[](https://mmyolo.readthedocs.io/en/latest/)
|
||||
[](https://github.com/open-mmlab/mmyolo/actions)
|
||||
[](https://codecov.io/gh/open-mmlab/mmyolo)
|
||||
[](https://github.com/open-mmlab/mmyolo/blob/master/LICENSE)
|
||||
[](https://codecov.io/gh/open-mmlab/mmyolo)
|
||||
[](https://github.com/open-mmlab/mmyolo/blob/main/LICENSE)
|
||||
[](https://github.com/open-mmlab/mmyolo/issues)
|
||||
[](https://github.com/open-mmlab/mmyolo/issues)
|
||||
|
||||
[📘Documentation](https://mmyolo.readthedocs.io/en/latest/) |
|
||||
[🛠️Installation](https://mmyolo.readthedocs.io/en/latest/get_started.html) |
|
||||
[🛠️Installation](https://mmyolo.readthedocs.io/en/latest/get_started/installation.html) |
|
||||
[👀Model Zoo](https://mmyolo.readthedocs.io/en/latest/model_zoo.html) |
|
||||
[🆕Update News](https://mmyolo.readthedocs.io/en/latest/notes/changelog.html) |
|
||||
[🤔Reporting Issues](https://github.com/open-mmlab/mmyolo/issues/new/choose)
|
||||
|
@ -40,6 +40,26 @@ English | [简体中文](README_zh-CN.md)
|
|||
|
||||
</div>
|
||||
|
||||
<div align="center">
|
||||
<a href="https://openmmlab.medium.com/" style="text-decoration:none;">
|
||||
<img src="https://user-images.githubusercontent.com/25839884/219255827-67c1a27f-f8c5-46a9-811d-5e57448c61d1.png" width="3%" alt="" /></a>
|
||||
<img src="https://user-images.githubusercontent.com/25839884/218346358-56cc8e2f-a2b8-487f-9088-32480cceabcf.png" width="3%" alt="" />
|
||||
<a href="https://discord.com/channels/1037617289144569886/1046608014234370059" style="text-decoration:none;">
|
||||
<img src="https://user-images.githubusercontent.com/25839884/218347213-c080267f-cbb6-443e-8532-8e1ed9a58ea9.png" width="3%" alt="" /></a>
|
||||
<img src="https://user-images.githubusercontent.com/25839884/218346358-56cc8e2f-a2b8-487f-9088-32480cceabcf.png" width="3%" alt="" />
|
||||
<a href="https://twitter.com/OpenMMLab" style="text-decoration:none;">
|
||||
<img src="https://user-images.githubusercontent.com/25839884/218346637-d30c8a0f-3eba-4699-8131-512fb06d46db.png" width="3%" alt="" /></a>
|
||||
<img src="https://user-images.githubusercontent.com/25839884/218346358-56cc8e2f-a2b8-487f-9088-32480cceabcf.png" width="3%" alt="" />
|
||||
<a href="https://www.youtube.com/openmmlab" style="text-decoration:none;">
|
||||
<img src="https://user-images.githubusercontent.com/25839884/218346691-ceb2116a-465a-40af-8424-9f30d2348ca9.png" width="3%" alt="" /></a>
|
||||
<img src="https://user-images.githubusercontent.com/25839884/218346358-56cc8e2f-a2b8-487f-9088-32480cceabcf.png" width="3%" alt="" />
|
||||
<a href="https://space.bilibili.com/1293512903" style="text-decoration:none;">
|
||||
<img src="https://user-images.githubusercontent.com/25839884/219026751-d7d14cce-a7c9-4e82-9942-8375fca65b99.png" width="3%" alt="" /></a>
|
||||
<img src="https://user-images.githubusercontent.com/25839884/218346358-56cc8e2f-a2b8-487f-9088-32480cceabcf.png" width="3%" alt="" />
|
||||
<a href="https://www.zhihu.com/people/openmmlab" style="text-decoration:none;">
|
||||
<img src="https://user-images.githubusercontent.com/25839884/219026120-ba71e48b-6e94-4bd4-b4e9-b7d175b5e362.png" width="3%" alt="" /></a>
|
||||
</div>
|
||||
|
||||
## 📄 Table of Contents
|
||||
|
||||
- [🥳 🚀 What's New](#--whats-new-)
|
||||
|
@ -57,10 +77,17 @@ English | [简体中文](README_zh-CN.md)
|
|||
|
||||
## 🥳 🚀 What's New [🔝](#-table-of-contents)
|
||||
|
||||
💎 **v0.4.0** was released on 18/1/2023:
|
||||
💎 **v0.5.0** was released on 2/3/2023:
|
||||
|
||||
1. Implemented [YOLOv8](https://github.com/open-mmlab/mmyolo/blob/dev/configs/yolov8/README.md) object detection model, and supports model deployment in [projects/easydeploy](https://github.com/open-mmlab/mmyolo/blob/dev/projects/easydeploy)
|
||||
2. Added Chinese and English versions of [Algorithm principles and implementation with YOLOv8](https://github.com/open-mmlab/mmyolo/blob/dev/docs/en/algorithm_descriptions/yolov8_description.md)
|
||||
1. Support [RTMDet-R](https://github.com/open-mmlab/mmyolo/blob/dev/configs/rtmdet/README.md#rotated-object-detection) rotated object detection
|
||||
2. Support for using mask annotation to improve [YOLOv8](https://github.com/open-mmlab/mmyolo/blob/dev/configs/yolov8/README.md) object detection performance
|
||||
3. Support [MMRazor](https://github.com/open-mmlab/mmyolo/blob/dev/configs/razor/subnets/README.md) searchable NAS sub-network as the backbone of YOLO series algorithm
|
||||
4. Support calling [MMRazor](https://github.com/open-mmlab/mmyolo/blob/dev/configs/rtmdet/distillation/README.md) to distill the knowledge of RTMDet
|
||||
5. [MMYOLO](https://mmyolo.readthedocs.io/zh_CN/dev/) document structure optimization, comprehensive content upgrade
|
||||
6. Improve YOLOX mAP and training speed based on RTMDet training hyperparameters
|
||||
7. Support calculation of model parameters and FLOPs, provide GPU latency data on T4 devices, and update [Model Zoo](https://github.com/open-mmlab/mmyolo/blob/dev/docs/en/model_zoo.md)
|
||||
8. Support test-time augmentation (TTA)
|
||||
9. Support RTMDet, YOLOv8 and YOLOv7 assigner visualization
|
||||
|
||||
For release history and update details, please refer to [changelog](https://mmyolo.readthedocs.io/en/latest/notes/changelog.html).
|
||||
|
||||
|
@ -82,7 +109,7 @@ We are excited to announce our latest work on real-time object recognition tasks
|
|||
<img src="https://user-images.githubusercontent.com/12907710/208044554-1e8de6b5-48d8-44e4-a7b5-75076c7ebb71.png"/>
|
||||
</div>
|
||||
|
||||
MMYOLO currently only implements the object detection algorithm, but it has a significant training acceleration compared to the MMDeteciton version. The training speed is 2.6 times faster than the previous version.
|
||||
MMYOLO currently implements the object detection and rotated object detection algorithm, but it has a significant training acceleration compared to the MMDeteciton version. The training speed is 2.6 times faster than the previous version.
|
||||
|
||||
## 📖 Introduction [🔝](#-table-of-contents)
|
||||
|
||||
|
@ -109,21 +136,21 @@ The master branch works with **PyTorch 1.6+**.
|
|||
<img src="https://user-images.githubusercontent.com/27466624/199999337-0544a4cb-3cbd-4f3e-be26-bcd9e74db7ff.jpg" alt="BaseModule-P5"/>
|
||||
The figure above is contributed by RangeKing@GitHub, thank you very much!
|
||||
|
||||
And the figure of P6 model is in [model_design.md](docs/en/algorithm_descriptions/model_design.md).
|
||||
And the figure of P6 model is in [model_design.md](docs/en/recommended_topics/model_design.md).
|
||||
|
||||
</details>
|
||||
|
||||
## 🛠️ Installation [🔝](#-table-of-contents)
|
||||
|
||||
MMYOLO relies on PyTorch, MMCV, MMEngine, and MMDetection. Below are quick steps for installation. Please refer to the [Install Guide](docs/en/get_started.md) for more detailed instructions.
|
||||
MMYOLO relies on PyTorch, MMCV, MMEngine, and MMDetection. Below are quick steps for installation. Please refer to the [Install Guide](docs/en/get_started/installation.md) for more detailed instructions.
|
||||
|
||||
```shell
|
||||
conda create -n open-mmlab python=3.8 pytorch==1.10.1 torchvision==0.11.2 cudatoolkit=11.3 -c pytorch -y
|
||||
conda activate open-mmlab
|
||||
conda create -n mmyolo python=3.8 pytorch==1.10.1 torchvision==0.11.2 cudatoolkit=11.3 -c pytorch -y
|
||||
conda activate mmyolo
|
||||
pip install openmim
|
||||
mim install "mmengine>=0.3.1"
|
||||
mim install "mmcv>=2.0.0rc1,<2.1.0"
|
||||
mim install "mmdet>=3.0.0rc5,<3.1.0"
|
||||
mim install "mmengine>=0.6.0"
|
||||
mim install "mmcv>=2.0.0rc4,<2.1.0"
|
||||
mim install "mmdet>=3.0.0rc6,<3.1.0"
|
||||
git clone https://github.com/open-mmlab/mmyolo.git
|
||||
cd mmyolo
|
||||
# Install albumentations
|
||||
|
@ -140,49 +167,125 @@ The usage of MMYOLO is almost identical to MMDetection and all tutorials are str
|
|||
|
||||
For different parts from MMDetection, we have also prepared user guides and advanced guides, please read our [documentation](https://mmyolo.readthedocs.io/zenh_CN/latest/).
|
||||
|
||||
- User Guides
|
||||
<details>
|
||||
<summary>Get Started</summary>
|
||||
|
||||
- [Train & Test](https://mmyolo.readthedocs.io/en/latest/user_guides/index.html#train-test)
|
||||
- [Learn about Configs with YOLOv5](docs/en/user_guides/config.md)
|
||||
- [From getting started to deployment](https://mmyolo.readthedocs.io/en/latest/user_guides/index.html#get-started-to-deployment)
|
||||
- [Custom Dataset](docs/en/user_guides/custom_dataset.md)
|
||||
- [From getting started to deployment with YOLOv5](docs/en/user_guides/yolov5_tutorial.md)
|
||||
- [Useful Tools](https://mmdetection.readthedocs.io/en/latest/user_guides/index.html#useful-tools)
|
||||
- [Visualization](docs/en/user_guides/visualization.md)
|
||||
- [Useful Tools](docs/en/user_guides/useful_tools.md)
|
||||
- [Overview](docs/en/get_started/overview.md)
|
||||
- [Dependencies](docs/en/get_started/dependencies.md)
|
||||
- [Installation](docs/en/get_started/installation.md)
|
||||
- [15 minutes object detection](docs/en/get_started/15_minutes_object_detection.md)
|
||||
- [15 minutes rotated object detection](docs/en/get_started/15_minutes_rotated_object_detection.md)
|
||||
- [15 minutes instance segmentation](docs/en/get_started/15_minutes_instance_segmentation.md)
|
||||
- [Resources summary](docs/en/get_started/article.md)
|
||||
|
||||
- Algorithm description
|
||||
</details>
|
||||
|
||||
- [Essential Basics](https://mmyolo.readthedocs.io/en/latest/algorithm_descriptions/index.html#essential-basics)
|
||||
- [Model design-related instructions](docs/en/algorithm_descriptions/model_design.md)
|
||||
- [Algorithm principles and implementation](https://mmyolo.readthedocs.io/en/latest/algorithm_descriptions/index.html#algorithm-principles-and-implementation)
|
||||
- [Algorithm principles and implementation with YOLOv5](docs/en/algorithm_descriptions/yolov5_description.md)
|
||||
- [Algorithm principles and implementation with RTMDet](docs/en/algorithm_descriptions/rtmdet_description.md)
|
||||
- [Algorithm principles and implementation with YOLOv8](docs/en/algorithm_descriptions/yolov8_description.md)
|
||||
<details>
|
||||
<summary>Recommended Topics</summary>
|
||||
|
||||
- Deployment Guides
|
||||
- [How to contribute code to MMYOLO](docs/en/recommended_topics/contributing.md)
|
||||
- [MMYOLO model design](docs/en/recommended_topics/model_design.md)
|
||||
- [Algorithm principles and implementation](docs/en/recommended_topics/algorithm_descriptions/)
|
||||
- [Replace the backbone network](docs/en/recommended_topics/replace_backbone.md)
|
||||
- [MMYOLO model complexity analysis](docs/en/recommended_topics/complexity_analysis.md)
|
||||
- [Annotation-to-deployment workflow for custom dataset](docs/en/recommended_topics/labeling_to_deployment_tutorials.md)
|
||||
- [Visualization](docs/en/recommended_topics/visualization.md)
|
||||
- [Model deployment](docs/en/recommended_topics/deploy/)
|
||||
- [Troubleshooting steps](docs/en/recommended_topics/troubleshooting_steps.md)
|
||||
- [MMYOLO industry examples](docs/en/recommended_topics/industry_examples.md)
|
||||
- [MM series repo essential basics](docs/en/recommended_topics/mm_basics.md)
|
||||
- [Dataset preparation and description](docs/en/recommended_topics/dataset_preparation.md)
|
||||
|
||||
- [Basic Deployment Guide](https://mmyolo.readthedocs.io/en/latest/deploy/index.html#basic-deployment-guide)
|
||||
- [Basic Deployment Guide](docs/en/deploy/basic_deployment_guide.md)
|
||||
- [Deployment Tutorial](https://mmyolo.readthedocs.io/en/latest/deploy/index.html#deployment-tutorial)
|
||||
- [YOLOv5 Deployment](docs/en/deploy/yolov5_deployment.md)
|
||||
</details>
|
||||
|
||||
- Advanced Guides
|
||||
<details>
|
||||
<summary>Common Usage</summary>
|
||||
|
||||
- [Data flow](docs/en/advanced_guides/data_flow.md)
|
||||
- [How to](docs/en/advanced_guides/how_to.md)
|
||||
- [Plugins](docs/en/advanced_guides/plugins.md)
|
||||
- [Resume training](docs/en/common_usage/resume_training.md)
|
||||
- [Enabling and disabling SyncBatchNorm](docs/en/common_usage/syncbn.md)
|
||||
- [Enabling AMP](docs/en/common_usage/amp_training.md)
|
||||
- [TTA Related Notes](docs/en/common_usage/tta.md)
|
||||
- [Add plugins to the backbone network](docs/en/common_usage/plugins.md)
|
||||
- [Freeze layers](docs/en/common_usage/freeze_layers.md)
|
||||
- [Output model predictions](docs/en/common_usage/output_predictions.md)
|
||||
- [Set random seed](docs/en/common_usage/set_random_seed.md)
|
||||
- [Module combination](docs/en/common_usage/module_combination.md)
|
||||
- [Cross-library calls using mim](docs/en/common_usage/mim_usage.md)
|
||||
- [Apply multiple Necks](docs/en/common_usage/multi_necks.md)
|
||||
- [Specify specific device training or inference](docs/en/common_usage/specify_device.md)
|
||||
- [Single and multi-channel application examples](docs/en/common_usage/single_multi_channel_applications.md)
|
||||
|
||||
</details>
|
||||
|
||||
<details>
|
||||
<summary>Useful Tools</summary>
|
||||
|
||||
- [Browse coco json](docs/en/useful_tools/browse_coco_json.md)
|
||||
- [Browse dataset](docs/en/useful_tools/browse_dataset.md)
|
||||
- [Print config](docs/en/useful_tools/print_config.md)
|
||||
- [Dataset analysis](docs/en/useful_tools/dataset_analysis.md)
|
||||
- [Optimize anchors](docs/en/useful_tools/optimize_anchors.md)
|
||||
- [Extract subcoco](docs/en/useful_tools/extract_subcoco.md)
|
||||
- [Visualization scheduler](docs/en/useful_tools/vis_scheduler.md)
|
||||
- [Dataset converters](docs/en/useful_tools/dataset_converters.md)
|
||||
- [Download dataset](docs/en/useful_tools/download_dataset.md)
|
||||
- [Log analysis](docs/en/useful_tools/log_analysis.md)
|
||||
- [Model converters](docs/en/useful_tools/model_converters.md)
|
||||
|
||||
</details>
|
||||
|
||||
<details>
|
||||
<summary>Basic Tutorials</summary>
|
||||
|
||||
- [Learn about configs with YOLOv5](docs/en/tutorials/config.md)
|
||||
- [Data flow](docs/en/tutorials/data_flow.md)
|
||||
- [Rotated detection](docs/en/tutorials/rotated_detection.md)
|
||||
- [Custom Installation](docs/en/tutorials/custom_installation.md)
|
||||
- [Common Warning Notes](docs/zh_cn/tutorials/warning_notes.md)
|
||||
- [FAQ](docs/en/tutorials/faq.md)
|
||||
|
||||
</details>
|
||||
|
||||
<details>
|
||||
<summary>Advanced Tutorials</summary>
|
||||
|
||||
- [MMYOLO cross-library application](docs/en/advanced_guides/cross-library_application.md)
|
||||
|
||||
</details>
|
||||
|
||||
<details>
|
||||
<summary>Descriptions</summary>
|
||||
|
||||
- [Changelog](docs/en/notes/changelog.md)
|
||||
- [Compatibility](docs/en/notes/compatibility.md)
|
||||
- [Conventions](docs/en/notes/conventions.md)
|
||||
- [Code Style](docs/en/notes/code_style.md)
|
||||
|
||||
</details>
|
||||
|
||||
## 📊 Overview of Benchmark and Model Zoo [🔝](#-table-of-contents)
|
||||
|
||||
<div align=center>
|
||||
<img src="https://user-images.githubusercontent.com/17425982/222087414-168175cc-dae6-4c5c-a8e3-3109a152dd19.png"/>
|
||||
</div>
|
||||
|
||||
Results and models are available in the [model zoo](docs/en/model_zoo.md).
|
||||
|
||||
<details open>
|
||||
<summary><b>Supported Tasks</b></summary>
|
||||
|
||||
- [x] Object detection
|
||||
- [x] Rotated object detection
|
||||
|
||||
</details>
|
||||
|
||||
<details open>
|
||||
<summary><b>Supported Algorithms</b></summary>
|
||||
|
||||
- [x] [YOLOv5](configs/yolov5)
|
||||
- [x] [YOLOX](configs/yolox)
|
||||
- [x] [RTMDet](configs/rtmdet)
|
||||
- [x] [RTMDet-Rotated](configs/rtmdet)
|
||||
- [x] [YOLOv6](configs/yolov6)
|
||||
- [x] [YOLOv7](configs/yolov7)
|
||||
- [x] [PPYOLOE](configs/ppyoloe)
|
||||
|
@ -190,6 +293,16 @@ Results and models are available in the [model zoo](docs/en/model_zoo.md).
|
|||
|
||||
</details>
|
||||
|
||||
<details open>
|
||||
<summary><b>Supported Datasets</b></summary>
|
||||
|
||||
- [x] COCO Dataset
|
||||
- [x] VOC Dataset
|
||||
- [x] CrowdHuman Dataset
|
||||
- [x] DOTA 1.0 Dataset
|
||||
|
||||
</details>
|
||||
|
||||
<details open>
|
||||
<div align="center">
|
||||
<b>Module Components</b>
|
||||
|
@ -256,7 +369,7 @@ Results and models are available in the [model zoo](docs/en/model_zoo.md).
|
|||
|
||||
## ❓ FAQ [🔝](#-table-of-contents)
|
||||
|
||||
Please refer to the [FAQ](docs/en/notes/faq.md) for frequently asked questions.
|
||||
Please refer to the [FAQ](docs/en/tutorials/faq.md) for frequently asked questions.
|
||||
|
||||
## 🙌 Contributing [🔝](#-table-of-contents)
|
||||
|
||||
|
|
202
README_zh-CN.md
202
README_zh-CN.md
|
@ -1,5 +1,5 @@
|
|||
<div align="center">
|
||||
<img src="https://user-images.githubusercontent.com/27466624/213156908-cef7cc50-97d1-4e0a-9e06-309bd0a49173.png" width="100%"/>
|
||||
<img src="https://user-images.githubusercontent.com/27466624/222385182-1247251c-8fac-4e77-94f5-57580e0ce3bd.png" width="100%"/>
|
||||
<div> </div>
|
||||
<div align="center">
|
||||
<b><font size="5">OpenMMLab 官网</font></b>
|
||||
|
@ -19,17 +19,17 @@
|
|||
<div> </div>
|
||||
|
||||
[](https://pypi.org/project/mmyolo)
|
||||
[](https://mmyolo.readthedocs.io/en/latest/)
|
||||
[](https://mmyolo.readthedocs.io/zh_CN/latest/)
|
||||
[](https://github.com/open-mmlab/mmyolo/actions)
|
||||
[](https://codecov.io/gh/open-mmlab/mmyolo)
|
||||
[](https://github.com/open-mmlab/mmyolo/blob/master/LICENSE)
|
||||
[](https://codecov.io/gh/open-mmlab/mmyolo)
|
||||
[](https://github.com/open-mmlab/mmyolo/blob/main/LICENSE)
|
||||
[](https://github.com/open-mmlab/mmyolo/issues)
|
||||
[](https://github.com/open-mmlab/mmyolo/issues)
|
||||
|
||||
[📘使用文档](https://mmyolo.readthedocs.io/zh_CN/latest/) |
|
||||
[🛠️安装教程](https://mmyolo.readthedocs.io/zh_CN/latest/get_started.html) |
|
||||
[🛠️安装教程](https://mmyolo.readthedocs.io/zh_CN/latest/get_started/installation.html) |
|
||||
[👀模型库](https://mmyolo.readthedocs.io/zh_CN/latest/model_zoo.html) |
|
||||
[🆕更新日志](https://mmyolo.readthedocs.io/en/latest/notes/changelog.html) |
|
||||
[🆕更新日志](https://mmyolo.readthedocs.io/zh_CN/latest/notes/changelog.html) |
|
||||
[🤔报告问题](https://github.com/open-mmlab/mmyolo/issues/new/choose)
|
||||
|
||||
</div>
|
||||
|
@ -40,6 +40,26 @@
|
|||
|
||||
</div>
|
||||
|
||||
<div align="center">
|
||||
<a href="https://openmmlab.medium.com/" style="text-decoration:none;">
|
||||
<img src="https://user-images.githubusercontent.com/25839884/219255827-67c1a27f-f8c5-46a9-811d-5e57448c61d1.png" width="3%" alt="" /></a>
|
||||
<img src="https://user-images.githubusercontent.com/25839884/218346358-56cc8e2f-a2b8-487f-9088-32480cceabcf.png" width="3%" alt="" />
|
||||
<a href="https://discord.com/channels/1037617289144569886/1046608014234370059" style="text-decoration:none;">
|
||||
<img src="https://user-images.githubusercontent.com/25839884/218347213-c080267f-cbb6-443e-8532-8e1ed9a58ea9.png" width="3%" alt="" /></a>
|
||||
<img src="https://user-images.githubusercontent.com/25839884/218346358-56cc8e2f-a2b8-487f-9088-32480cceabcf.png" width="3%" alt="" />
|
||||
<a href="https://twitter.com/OpenMMLab" style="text-decoration:none;">
|
||||
<img src="https://user-images.githubusercontent.com/25839884/218346637-d30c8a0f-3eba-4699-8131-512fb06d46db.png" width="3%" alt="" /></a>
|
||||
<img src="https://user-images.githubusercontent.com/25839884/218346358-56cc8e2f-a2b8-487f-9088-32480cceabcf.png" width="3%" alt="" />
|
||||
<a href="https://www.youtube.com/openmmlab" style="text-decoration:none;">
|
||||
<img src="https://user-images.githubusercontent.com/25839884/218346691-ceb2116a-465a-40af-8424-9f30d2348ca9.png" width="3%" alt="" /></a>
|
||||
<img src="https://user-images.githubusercontent.com/25839884/218346358-56cc8e2f-a2b8-487f-9088-32480cceabcf.png" width="3%" alt="" />
|
||||
<a href="https://space.bilibili.com/1293512903" style="text-decoration:none;">
|
||||
<img src="https://user-images.githubusercontent.com/25839884/219026751-d7d14cce-a7c9-4e82-9942-8375fca65b99.png" width="3%" alt="" /></a>
|
||||
<img src="https://user-images.githubusercontent.com/25839884/218346358-56cc8e2f-a2b8-487f-9088-32480cceabcf.png" width="3%" alt="" />
|
||||
<a href="https://www.zhihu.com/people/openmmlab" style="text-decoration:none;">
|
||||
<img src="https://user-images.githubusercontent.com/25839884/219026120-ba71e48b-6e94-4bd4-b4e9-b7d175b5e362.png" width="3%" alt="" /></a>
|
||||
</div>
|
||||
|
||||
## 📄 Table of Contents
|
||||
|
||||
- [🥳 🚀 最新进展](#--最新进展-)
|
||||
|
@ -58,10 +78,17 @@
|
|||
|
||||
## 🥳 🚀 最新进展 [🔝](#-table-of-contents)
|
||||
|
||||
💎 **v0.4.0** 版本已经在 2023.1.18 发布:
|
||||
💎 **v0.5.0** 版本已经在 2023.3.2 发布:
|
||||
|
||||
1. 实现了 [YOLOv8](https://github.com/open-mmlab/mmyolo/blob/dev/configs/yolov8/README.md) 目标检测模型,并通过 [projects/easydeploy](https://github.com/open-mmlab/mmyolo/blob/dev/projects/easydeploy) 支持了模型部署
|
||||
2. 新增了中英文版本的 [YOLOv8 原理和实现全解析文档](https://github.com/open-mmlab/mmyolo/blob/dev/docs/zh_cn/algorithm_descriptions/yolov8_description.md)
|
||||
1. 支持了 [RTMDet-R](https://github.com/open-mmlab/mmyolo/blob/dev/configs/rtmdet/README.md#rotated-object-detection) 旋转框目标检测任务和算法
|
||||
2. [YOLOv8](https://github.com/open-mmlab/mmyolo/blob/dev/configs/yolov8/README.md) 支持使用 mask 标注提升目标检测模型性能
|
||||
3. 支持 [MMRazor](https://github.com/open-mmlab/mmyolo/blob/dev/configs/razor/subnets/README.md) 搜索的 NAS 子网络作为 YOLO 系列算法的 backbone
|
||||
4. 支持调用 [MMRazor](https://github.com/open-mmlab/mmyolo/blob/dev/configs/rtmdet/distillation/README.md) 对 RTMDet 进行知识蒸馏
|
||||
5. [MMYOLO](https://mmyolo.readthedocs.io/zh_CN/dev/) 文档结构优化,内容全面升级
|
||||
6. 基于 RTMDet 训练超参提升 YOLOX 精度和训练速度
|
||||
7. 支持模型参数量、FLOPs 计算和提供 T4 设备上 GPU 延时数据,并更新了 [Model Zoo](https://github.com/open-mmlab/mmyolo/blob/dev/docs/zh_cn/model_zoo.md)
|
||||
8. 支持测试时增强 TTA
|
||||
9. 支持 RTMDet、YOLOv8 和 YOLOv7 assigner 可视化
|
||||
|
||||
我们提供了实用的**脚本命令速查表**
|
||||
|
||||
|
@ -103,7 +130,7 @@
|
|||
<img src="https://user-images.githubusercontent.com/12907710/208044554-1e8de6b5-48d8-44e4-a7b5-75076c7ebb71.png"/>
|
||||
</div>
|
||||
|
||||
MMYOLO 中目前仅仅实现了目标检测算法,但是相比 MMDeteciton 版本有显著训练加速,训练速度相比原先版本提升 2.6 倍。
|
||||
MMYOLO 中目前实现了目标检测和旋转框目标检测算法,但是相比 MMDeteciton 版本有显著训练加速,训练速度相比原先版本提升 2.6 倍。
|
||||
|
||||
## 📖 简介 [🔝](#-table-of-contents)
|
||||
|
||||
|
@ -130,21 +157,21 @@ MMYOLO 是一个基于 PyTorch 和 MMDetection 的 YOLO 系列算法开源工具
|
|||
<img src="https://user-images.githubusercontent.com/27466624/199999337-0544a4cb-3cbd-4f3e-be26-bcd9e74db7ff.jpg" alt="基类-P5"/>
|
||||
图为 RangeKing@GitHub 提供,非常感谢!
|
||||
|
||||
P6 模型图详见 [model_design.md](docs/zh_CN/algorithm_descriptions/model_design.md)。
|
||||
P6 模型图详见 [model_design.md](docs/zh_cn/recommended_topics/model_design.md)。
|
||||
|
||||
</details>
|
||||
|
||||
## 🛠️ 安装 [🔝](#-table-of-contents)
|
||||
|
||||
MMYOLO 依赖 PyTorch, MMCV, MMEngine 和 MMDetection,以下是安装的简要步骤。 更详细的安装指南请参考[安装文档](docs/zh_cn/get_started.md)。
|
||||
MMYOLO 依赖 PyTorch, MMCV, MMEngine 和 MMDetection,以下是安装的简要步骤。 更详细的安装指南请参考[安装文档](docs/zh_cn/get_started/installation.md)。
|
||||
|
||||
```shell
|
||||
conda create -n open-mmlab python=3.8 pytorch==1.10.1 torchvision==0.11.2 cudatoolkit=11.3 -c pytorch -y
|
||||
conda activate open-mmlab
|
||||
conda create -n mmyolo python=3.8 pytorch==1.10.1 torchvision==0.11.2 cudatoolkit=11.3 -c pytorch -y
|
||||
conda activate mmyolo
|
||||
pip install openmim
|
||||
mim install "mmengine>=0.3.1"
|
||||
mim install "mmcv>=2.0.0rc1,<2.1.0"
|
||||
mim install "mmdet>=3.0.0rc5,<3.1.0"
|
||||
mim install "mmengine>=0.6.0"
|
||||
mim install "mmcv>=2.0.0rc4,<2.1.0"
|
||||
mim install "mmdet>=3.0.0rc6,<3.1.0"
|
||||
git clone https://github.com/open-mmlab/mmyolo.git
|
||||
cd mmyolo
|
||||
# Install albumentations
|
||||
|
@ -161,53 +188,126 @@ MMYOLO 用法和 MMDetection 几乎一致,所有教程都是通用的,你也
|
|||
|
||||
针对和 MMDetection 不同的部分,我们也准备了用户指南和进阶指南,请阅读我们的 [文档](https://mmyolo.readthedocs.io/zh_CN/latest/) 。
|
||||
|
||||
- 用户指南
|
||||
<details>
|
||||
<summary>开启 MMYOLO 之旅</summary>
|
||||
|
||||
- [训练 & 测试](https://mmyolo.readthedocs.io/zh_CN/latest/user_guides/index.html#训练-测试)
|
||||
- [学习 YOLOv5 配置文件](docs/zh_cn/user_guides/config.md)
|
||||
- [从入门到部署全流程](https://mmyolo.readthedocs.io/zh_CN/latest/user_guides/index.html#从入门到部署全流程)
|
||||
- [自定义数据集](docs/zh_cn/user_guides/custom_dataset.md)
|
||||
- [YOLOv5 从入门到部署全流程](docs/zh_cn/user_guides/yolov5_tutorial.md)
|
||||
- [实用工具](https://mmyolo.readthedocs.io/zh_CN/latest/user_guides/index.html#实用工具)
|
||||
- [可视化教程](docs/zh_cn/user_guides/visualization.md)
|
||||
- [实用工具](docs/zh_cn/user_guides/useful_tools.md)
|
||||
- [概述](docs/zh_cn/get_started/overview.md)
|
||||
- [依赖](docs/zh_cn/get_started/dependencies.md)
|
||||
- [安装和验证](docs/zh_cn/get_started/installation.md)
|
||||
- [15 分钟上手 MMYOLO 目标检测](docs/zh_cn/get_started/15_minutes_object_detection.md)
|
||||
- [15 分钟上手 MMYOLO 旋转框目标检测](docs/zh_cn/get_started/15_minutes_rotated_object_detection.md)
|
||||
- [15 分钟上手 MMYOLO 实例分割](docs/zh_cn/get_started/15_minutes_instance_segmentation.md)
|
||||
- [中文解读资源汇总](docs/zh_cn/get_started/article.md)
|
||||
|
||||
- 算法描述
|
||||
</details>
|
||||
|
||||
- [必备基础](https://mmyolo.readthedocs.io/zh_CN/latest/algorithm_descriptions/index.html#基础内容)
|
||||
- [模型设计相关说明](docs/zh_cn/algorithm_descriptions/model_design.md)
|
||||
- [算法原理和实现全解析](https://mmyolo.readthedocs.io/zh_CN/latest/algorithm_descriptions/index.html#算法原理和实现全解析)
|
||||
- [YOLOv5 原理和实现全解析](docs/zh_cn/algorithm_descriptions/yolov5_description.md)
|
||||
- [YOLOv6 原理和实现全解析](docs/zh_cn/algorithm_descriptions/yolov6_description.md)
|
||||
- [RTMDet 原理和实现全解析](docs/zh_cn/algorithm_descriptions/rtmdet_description.md)
|
||||
- [YOLOv8 原理和实现全解析](docs/zh_cn/algorithm_descriptions/yolov8_description.md)
|
||||
<details>
|
||||
<summary>推荐专题</summary>
|
||||
|
||||
- 算法部署
|
||||
- [如何给 MMYOLO 贡献代码](docs/zh_cn/recommended_topics/contributing.md)
|
||||
- [MMYOLO 模型结构设计](docs/zh_cn/recommended_topics/model_design.md)
|
||||
- [原理和实现全解析](docs/zh_cn/recommended_topics/algorithm_descriptions/)
|
||||
- [轻松更换主干网络](docs/zh_cn/recommended_topics/replace_backbone.md)
|
||||
- [MMYOLO 模型复杂度分析](docs/zh_cn/recommended_topics/complexity_analysis.md)
|
||||
- [标注+训练+测试+部署全流程](docs/zh_cn/recommended_topics/labeling_to_deployment_tutorials.md)
|
||||
- [关于可视化的一切](docs/zh_cn/recommended_topics/visualization.md)
|
||||
- [模型部署流程](docs/zh_cn/recommended_topics/deploy/)
|
||||
- [常见错误排查步骤](docs/zh_cn/recommended_topics/troubleshooting_steps.md)
|
||||
- [MMYOLO 产业范例介绍](docs/zh_cn/recommended_topics/industry_examples.md)
|
||||
- [MM 系列 Repo 必备基础](docs/zh_cn/recommended_topics/mm_basics.md)
|
||||
- [数据集准备和说明](docs/zh_cn/recommended_topics/dataset_preparation.md)
|
||||
|
||||
- [部署必备教程](https://mmyolo.readthedocs.io/zh_CN/latest/deploy/index.html#id1)
|
||||
- [部署必备教程](docs/zh_cn/deploy/basic_deployment_guide.md)
|
||||
- [部署全流程说明](https://mmyolo.readthedocs.io/zh_CN/latest/deploy/index.html#id2)
|
||||
- [YOLOv5 部署全流程说明](docs/zh_cn/deploy/yolov5_deployment.md)
|
||||
</details>
|
||||
|
||||
- 进阶指南
|
||||
<details>
|
||||
<summary>常用功能</summary>
|
||||
|
||||
- [模块组合](docs/zh_cn/advanced_guides/module_combination.md)
|
||||
- [数据流](docs/zh_cn/advanced_guides/data_flow.md)
|
||||
- [How to](docs/zh_cn/advanced_guides/how_to.md)
|
||||
- [插件](docs/zh_cn/advanced_guides/plugins.md)
|
||||
- [恢复训练](docs/zh_cn/common_usage/resume_training.md)
|
||||
- [开启和关闭 SyncBatchNorm](docs/zh_cn/common_usage/syncbn.md)
|
||||
- [开启混合精度训练](docs/zh_cn/common_usage/amp_training.md)
|
||||
- [测试时增强相关说明](docs/zh_cn/common_usage/tta.md)
|
||||
- [给主干网络增加插件](docs/zh_cn/common_usage/plugins.md)
|
||||
- [冻结指定网络层权重](docs/zh_cn/common_usage/common_usage/freeze_layers.md)
|
||||
- [输出模型预测结果](docs/zh_cn/common_usage/output_predictions.md)
|
||||
- [设置随机种子](docs/zh_cn/common_usage/set_random_seed.md)
|
||||
- [算法组合替换教程](docs/zh_cn/common_usage/module_combination.md)
|
||||
- [使用 mim 跨库调用其他 OpenMMLab 仓库的脚本](docs/zh_cn/common_usage/mim_usage.md)
|
||||
- [应用多个 Neck](docs/zh_cn/common_usage/multi_necks.md)
|
||||
- [指定特定设备训练或推理](docs/zh_cn/common_usage/specify_device.md)
|
||||
- [单通道和多通道应用案例](docs/zh_cn/common_usage/single_multi_channel_applications.md)
|
||||
- [MM 系列开源库注册表](docs/zh_cn/common_usage/registries_info.md)
|
||||
|
||||
- [解读文章和资源汇总](docs/zh_cn/article.md)
|
||||
</details>
|
||||
|
||||
<details>
|
||||
<summary>实用工具</summary>
|
||||
|
||||
- [可视化 COCO 标签](docs/zh_cn/useful_tools/browse_coco_json.md)
|
||||
- [可视化数据集](docs/zh_cn/useful_tools/browse_dataset.md)
|
||||
- [打印完整配置文件](docs/zh_cn/useful_tools/print_config.md)
|
||||
- [可视化数据集分析结果](docs/zh_cn/useful_tools/dataset_analysis.md)
|
||||
- [优化锚框尺寸](docs/zh_cn/useful_tools/optimize_anchors.md)
|
||||
- [提取 COCO 子集](docs/zh_cn/useful_tools/extract_subcoco.md)
|
||||
- [可视化优化器参数策略](docs/zh_cn/useful_tools/vis_scheduler.md)
|
||||
- [数据集转换](docs/zh_cn/useful_tools/dataset_converters.md)
|
||||
- [数据集下载](docs/zh_cn/useful_tools/download_dataset.md)
|
||||
- [日志分析](docs/zh_cn/useful_tools/log_analysis.md)
|
||||
- [模型转换](docs/zh_cn/useful_tools/model_converters.md)
|
||||
|
||||
</details>
|
||||
|
||||
<details>
|
||||
<summary>基础教程</summary>
|
||||
|
||||
- [学习 YOLOv5 配置文件](docs/zh_cn/tutorials/config.md)
|
||||
- [数据流](docs/zh_cn/tutorials/data_flow.md)
|
||||
- [旋转目标检测](docs/zh_cn/tutorials/rotated_detection.md)
|
||||
- [自定义安装](docs/zh_cn/tutorials/custom_installation.md)
|
||||
- [常见警告说明](docs/zh_cn/tutorials/warning_notes.md)
|
||||
- [常见问题](docs/zh_cn/tutorials/faq.md)
|
||||
|
||||
</details>
|
||||
|
||||
<details>
|
||||
<summary>进阶教程</summary>
|
||||
|
||||
- [MMYOLO 跨库应用解析](docs/zh_cn/advanced_guides/cross-library_application.md)
|
||||
|
||||
</details>
|
||||
|
||||
<details>
|
||||
<summary>说明</summary>
|
||||
|
||||
- [更新日志](docs/zh_cn/notes/changelog.md)
|
||||
- [兼容性说明](docs/zh_cn/notes/compatibility.md)
|
||||
- [默认约定](docs/zh_cn/notes/conventions.md)
|
||||
- [代码规范](docs/zh_cn/notes/code_style.md)
|
||||
|
||||
</details>
|
||||
|
||||
## 📊 基准测试和模型库 [🔝](#-table-of-contents)
|
||||
|
||||
<div align=center>
|
||||
<img src="https://user-images.githubusercontent.com/17425982/222087414-168175cc-dae6-4c5c-a8e3-3109a152dd19.png"/>
|
||||
</div>
|
||||
|
||||
测试结果和模型可以在 [模型库](docs/zh_cn/model_zoo.md) 中找到。
|
||||
|
||||
<details open>
|
||||
<summary><b>支持的任务</b></summary>
|
||||
|
||||
- [x] 目标检测
|
||||
- [x] 旋转框目标检测
|
||||
|
||||
</details>
|
||||
|
||||
<details open>
|
||||
<summary><b>支持的算法</b></summary>
|
||||
|
||||
- [x] [YOLOv5](configs/yolov5)
|
||||
- [x] [YOLOX](configs/yolox)
|
||||
- [x] [RTMDet](configs/rtmdet)
|
||||
- [x] [RTMDet-Rotated](configs/rtmdet)
|
||||
- [x] [YOLOv6](configs/yolov6)
|
||||
- [x] [YOLOv7](configs/yolov7)
|
||||
- [x] [PPYOLOE](configs/ppyoloe)
|
||||
|
@ -215,6 +315,16 @@ MMYOLO 用法和 MMDetection 几乎一致,所有教程都是通用的,你也
|
|||
|
||||
</details>
|
||||
|
||||
<details open>
|
||||
<summary><b>支持的数据集</b></summary>
|
||||
|
||||
- [x] COCO Dataset
|
||||
- [x] VOC Dataset
|
||||
- [x] CrowdHuman Dataset
|
||||
- [x] DOTA 1.0 Dataset
|
||||
|
||||
</details>
|
||||
|
||||
<details open>
|
||||
<div align="center">
|
||||
<b>模块组件</b>
|
||||
|
@ -281,7 +391,7 @@ MMYOLO 用法和 MMDetection 几乎一致,所有教程都是通用的,你也
|
|||
|
||||
## ❓ 常见问题 [🔝](#-table-of-contents)
|
||||
|
||||
请参考 [FAQ](docs/zh_cn/notes/faq.md) 了解其他用户的常见问题。
|
||||
请参考 [FAQ](docs/zh_cn/tutorials/faq.md) 了解其他用户的常见问题。
|
||||
|
||||
## 🙌 贡献指南 [🔝](#-table-of-contents)
|
||||
|
||||
|
|
|
@ -0,0 +1,57 @@
|
|||
# TODO: Need to solve the problem of multiple file_client_args parameters
|
||||
# _file_client_args = dict(
|
||||
# backend='petrel',
|
||||
# path_mapping=dict({
|
||||
# './data/': 's3://openmmlab/datasets/detection/',
|
||||
# 'data/': 's3://openmmlab/datasets/detection/'
|
||||
# }))
|
||||
_file_client_args = dict(backend='disk')
|
||||
|
||||
tta_model = dict(
|
||||
type='mmdet.DetTTAModel',
|
||||
tta_cfg=dict(nms=dict(type='nms', iou_threshold=0.65), max_per_img=300))
|
||||
|
||||
img_scales = [(640, 640), (320, 320), (960, 960)]
|
||||
|
||||
# LoadImageFromFile
|
||||
# / | \
|
||||
# (RatioResize,LetterResize) (RatioResize,LetterResize) (RatioResize,LetterResize) # noqa
|
||||
# / \ / \ / \
|
||||
# RandomFlip RandomFlip RandomFlip RandomFlip RandomFlip RandomFlip # noqa
|
||||
# | | | | | |
|
||||
# LoadAnn LoadAnn LoadAnn LoadAnn LoadAnn LoadAnn
|
||||
# | | | | | |
|
||||
# PackDetIn PackDetIn PackDetIn PackDetIn PackDetIn PackDetIn # noqa
|
||||
|
||||
_multiscale_resize_transforms = [
|
||||
dict(
|
||||
type='Compose',
|
||||
transforms=[
|
||||
dict(type='YOLOv5KeepRatioResize', scale=s),
|
||||
dict(
|
||||
type='LetterResize',
|
||||
scale=s,
|
||||
allow_scale_up=False,
|
||||
pad_val=dict(img=114))
|
||||
]) for s in img_scales
|
||||
]
|
||||
|
||||
tta_pipeline = [
|
||||
dict(type='LoadImageFromFile', file_client_args=_file_client_args),
|
||||
dict(
|
||||
type='TestTimeAug',
|
||||
transforms=[
|
||||
_multiscale_resize_transforms,
|
||||
[
|
||||
dict(type='mmdet.RandomFlip', prob=1.),
|
||||
dict(type='mmdet.RandomFlip', prob=0.)
|
||||
], [dict(type='mmdet.LoadAnnotations', with_bbox=True)],
|
||||
[
|
||||
dict(
|
||||
type='mmdet.PackDetInputs',
|
||||
meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
|
||||
'scale_factor', 'pad_param', 'flip',
|
||||
'flip_direction'))
|
||||
]
|
||||
])
|
||||
]
|
|
@ -0,0 +1,56 @@
|
|||
# Compared to other same scale models, this configuration consumes too much
|
||||
# GPU memory and is not validated for now
|
||||
_base_ = 'ppyoloe_plus_s_fast_8xb8-80e_coco.py'
|
||||
|
||||
data_root = './data/cat/'
|
||||
class_name = ('cat', )
|
||||
num_classes = len(class_name)
|
||||
metainfo = dict(classes=class_name, palette=[(20, 220, 60)])
|
||||
|
||||
num_last_epochs = 5
|
||||
|
||||
max_epochs = 40
|
||||
train_batch_size_per_gpu = 12
|
||||
train_num_workers = 2
|
||||
|
||||
load_from = 'https://download.openmmlab.com/mmyolo/v0/ppyoloe/ppyoloe_plus_s_fast_8xb8-80e_coco/ppyoloe_plus_s_fast_8xb8-80e_coco_20230101_154052-9fee7619.pth' # noqa
|
||||
|
||||
model = dict(
|
||||
backbone=dict(frozen_stages=4),
|
||||
bbox_head=dict(head_module=dict(num_classes=num_classes)),
|
||||
train_cfg=dict(
|
||||
initial_assigner=dict(num_classes=num_classes),
|
||||
assigner=dict(num_classes=num_classes)))
|
||||
|
||||
train_dataloader = dict(
|
||||
batch_size=train_batch_size_per_gpu,
|
||||
num_workers=train_num_workers,
|
||||
dataset=dict(
|
||||
data_root=data_root,
|
||||
metainfo=metainfo,
|
||||
ann_file='annotations/trainval.json',
|
||||
data_prefix=dict(img='images/')))
|
||||
|
||||
val_dataloader = dict(
|
||||
dataset=dict(
|
||||
metainfo=metainfo,
|
||||
data_root=data_root,
|
||||
ann_file='annotations/test.json',
|
||||
data_prefix=dict(img='images/')))
|
||||
|
||||
test_dataloader = val_dataloader
|
||||
|
||||
default_hooks = dict(
|
||||
param_scheduler=dict(
|
||||
warmup_min_iter=10,
|
||||
warmup_epochs=3,
|
||||
total_epochs=int(max_epochs * 1.2)))
|
||||
|
||||
val_evaluator = dict(ann_file=data_root + 'annotations/test.json')
|
||||
test_evaluator = val_evaluator
|
||||
|
||||
default_hooks = dict(
|
||||
checkpoint=dict(interval=10, max_keep_ckpts=2, save_best='auto'),
|
||||
logger=dict(type='LoggerHook', interval=5))
|
||||
train_cfg = dict(max_epochs=max_epochs, val_interval=10)
|
||||
# visualizer = dict(vis_backends = [dict(type='LocalVisBackend'), dict(type='WandbVisBackend')]) # noqa
|
|
@ -1,4 +1,4 @@
|
|||
_base_ = '../_base_/default_runtime.py'
|
||||
_base_ = ['../_base_/default_runtime.py', '../_base_/det_p5_tta.py']
|
||||
|
||||
# dataset settings
|
||||
data_root = 'data/coco/'
|
||||
|
|
|
@ -0,0 +1,79 @@
|
|||
# Projecs Based on MMRazor
|
||||
|
||||
There are many research works and pre-trained models built on MMRazor. We list some of them as examples of how to use MMRazor slimmable models for downstream frameworks. As the page might not be completed, please feel free to contribute more efficient mmrazor-models to update this page.
|
||||
|
||||
## Description
|
||||
|
||||
This is an implementation of MMRazor Searchable Backbone Application, we provide detection configs and models for MMRazor in MMYOLO.
|
||||
|
||||
### Backbone support
|
||||
|
||||
Here are the Neural Architecture Search(NAS) Models that come from MMRazor which support YOLO Series. If you are looking for MMRazor models only for Backbone, you could refer to MMRazor [ModelZoo](https://github.com/open-mmlab/mmrazor/blob/dev-1.x/docs/en/get_started/model_zoo.md) and corresponding repository.
|
||||
|
||||
- [x] [AttentiveMobileNetV3](https://github.com/open-mmlab/mmrazor/blob/dev-1.x/configs/_base_/nas_backbones/attentive_mobilenetv3_supernet.py)
|
||||
- [x] [SearchableShuffleNetV2](https://github.com/open-mmlab/mmrazor/blob/dev-1.x/configs/_base_/nas_backbones/spos_shufflenet_supernet.py)
|
||||
- [x] [SearchableMobileNetV2](https://github.com/open-mmlab/mmrazor/blob/dev-1.x/configs/_base_/nas_backbones/spos_mobilenet_supernet.py)
|
||||
|
||||
## Usage
|
||||
|
||||
### Prerequisites
|
||||
|
||||
- [MMRazor v1.0.0rc2](https://github.com/open-mmlab/mmrazor/tree/v1.0.0rc2) or higher (dev-1.x)
|
||||
|
||||
Install MMRazor using MIM.
|
||||
|
||||
```shell
|
||||
mim install mmengine
|
||||
mim install "mmrazor>=1.0.0rc2"
|
||||
```
|
||||
|
||||
Install MMRazor from source
|
||||
|
||||
```
|
||||
git clone -b dev-1.x https://github.com/open-mmlab/mmrazor.git
|
||||
cd mmrazor
|
||||
# Install MMRazor
|
||||
mim install -v -e .
|
||||
```
|
||||
|
||||
### Training commands
|
||||
|
||||
In MMYOLO's root directory, if you want to use single GPU for training, run the following command to train the model:
|
||||
|
||||
```bash
|
||||
CUDA_VISIBLE_DEVICES=0 PORT=29500 ./tools/dist_train.sh configs/razor/subnets/yolov5_s_spos_shufflenetv2_syncbn_8xb16-300e_coco.py
|
||||
```
|
||||
|
||||
If you want to use several of these GPUs to train in parallel, you can use the following command:
|
||||
|
||||
```bash
|
||||
CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 PORT=29500 ./tools/dist_train.sh configs/razor/subnets/yolov5_s_spos_shufflenetv2_syncbn_8xb16-300e_coco.py
|
||||
```
|
||||
|
||||
### Testing commands
|
||||
|
||||
In MMYOLO's root directory, run the following command to test the model:
|
||||
|
||||
```bash
|
||||
CUDA_VISIBLE_DEVICES=0 PORT=29500 ./tools/dist_test.sh configs/razor/subnets/yolov5_s_spos_shufflenetv2_syncbn_8xb16-300e_coco.py ${CHECKPOINT_PATH}
|
||||
```
|
||||
|
||||
## Results and Models
|
||||
|
||||
Here we provide the baseline version of YOLO Series with NAS backbone.
|
||||
|
||||
| Model | size | box AP | Params(M) | FLOPs(G) | Config | Download |
|
||||
| :------------------------: | :--: | :----: | :----------: | :------: | :---------------------------------------------------------------------------------------------------------------------------------------: | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
|
||||
| yolov5-s | 640 | 37.7 | 7.235 | 8.265 | [config](https://github.com/open-mmlab/mmyolo/blob/main/configs/yolov5/yolov5_s-v61_syncbn_fast_8xb16-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_s-v61_syncbn_fast_8xb16-300e_coco/yolov5_s-v61_syncbn_fast_8xb16-300e_coco_20220918_084700-86e02187.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_s-v61_syncbn_fast_8xb16-300e_coco/yolov5_s-v61_syncbn_fast_8xb16-300e_coco_20220918_084700.log.json) |
|
||||
| yolov5_s_spos_shufflenetv2 | 640 | 38.0 | 7.04(-2.7%) | 7.03 | [config](https://github.com/open-mmlab/mmyolo/tree/dev/configs/razor/subnets/yolov5_s_spos_shufflenetv2_syncbn_8xb16-300e_coco.py) | [model](https://download.openmmlab.com/mmrazor/v1/yolo_nas_backbone/yolov5_s_spos_shufflenetv2_syncbn_8xb16-300e_coco_20230211_220635-578be9a9.pth) \| log |
|
||||
| yolov6-s | 640 | 44.0 | 18.869 | 24.253 | [config](https://github.com/open-mmlab/mmyolo/blob/main/configs/yolov6/yolov6_s_syncbn_fast_8xb32-400e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov6/yolov6_s_syncbn_fast_8xb32-400e_coco/yolov6_s_syncbn_fast_8xb32-400e_coco_20221102_203035-932e1d91.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov6/yolov6_s_syncbn_fast_8xb32-400e_coco/yolov6_s_syncbn_fast_8xb32-400e_coco_20221102_203035.log.json) |
|
||||
| yolov6_l_attentivenas_a6 | 640 | 45.3 | 18.38(-2.6%) | 8.49 | [config](https://github.com/open-mmlab/mmyolo/tree/dev/configs/razor/subnets/yolov6_l_attentivenas_a6_d12_syncbn_fast_8xb32-300e_coco.py) | [model](https://download.openmmlab.com/mmrazor/v1/yolo_nas_backbone/yolov6_l_attentivenas_a6_d12_syncbn_fast_8xb32-300e_coco_20230211_222409-dcc72668.pth) \| log |
|
||||
| RTMDet-tiny | 640 | 41.0 | 4.8 | 8.1 | [config](./rtmdet_l_syncbn_fast_8xb32-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_tiny_syncbn_fast_8xb32-300e_coco/rtmdet_tiny_syncbn_fast_8xb32-300e_coco_20230102_140117-dbb1dc83.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_tiny_syncbn_fast_8xb32-300e_coco/rtmdet_tiny_syncbn_fast_8xb32-300e_coco_20230102_140117.log.json) |
|
||||
| rtmdet_tiny_ofa_lat31 | 960 | 41.3 | 3.91(-18.5%) | 6.09 | [config](https://github.com/open-mmlab/mmyolo/tree/dev/configs/razor/subnets/rtmdet_tiny_ofa_lat31_syncbn_16xb16-300e_coco.py) | [model](https://download.openmmlab.com/mmrazor/v1/yolo_nas_backbone/rtmdet_tiny_ofa_lat31_syncbn_16xb16-300e_coco_20230214_210623-449bb2a0.pth) \| log |
|
||||
|
||||
**Note**:
|
||||
|
||||
1. For fair comparison, the training configuration is consistent with the original configuration and results in an improvement of about 0.2-0.5% AP.
|
||||
2. `yolov5_s_spos_shufflenetv2` achieves 38.0% AP with only 7.042M parameters, directly instead of the backbone, and outperforms `yolov5_s` with a similar size by more than 0.3% AP.
|
||||
3. With the efficient backbone of `yolov6_l_attentivenas_a6`, the input channels of `YOLOv6RepPAFPN` are reduced. Meanwhile, modify the **deepen_factor** and the neck is made deeper to restore the AP.
|
||||
4. with the `rtmdet_tiny_ofa_lat31` backbone with only 3.315M parameters and 3.634G flops, we can modify the input resolution to 960, with a similar model size compared to `rtmdet_tiny` and exceeds `rtmdet_tiny` by 0.4% AP, reducing the size of the whole model to 3.91 MB.
|
|
@ -0,0 +1,124 @@
|
|||
_base_ = [
|
||||
'mmrazor::_base_/nas_backbones/ofa_mobilenetv3_supernet.py',
|
||||
'../../rtmdet/rtmdet_s_syncbn_fast_8xb32-300e_coco.py'
|
||||
]
|
||||
|
||||
checkpoint_file = 'https://download.openmmlab.com/mmrazor/v1/ofa/ofa_mobilenet_subnet_8xb256_in1k_note8_lat%4031ms_top1%4072.8_finetune%4025.py_20221214_0939-981a8b2a.pth' # noqa
|
||||
fix_subnet = 'https://download.openmmlab.com/mmrazor/v1/yolo_nas_backbone/OFA_SUBNET_NOTE8_LAT31.yaml' # noqa
|
||||
deepen_factor = 0.167
|
||||
widen_factor = 1.0
|
||||
channels = [40, 112, 160]
|
||||
train_batch_size_per_gpu = 16
|
||||
img_scale = (960, 960)
|
||||
|
||||
_base_.nas_backbone.out_indices = (2, 4, 5)
|
||||
_base_.nas_backbone.conv_cfg = dict(type='mmrazor.OFAConv2d')
|
||||
_base_.nas_backbone.init_cfg = dict(
|
||||
type='Pretrained',
|
||||
checkpoint=checkpoint_file,
|
||||
prefix='architecture.backbone.')
|
||||
nas_backbone = dict(
|
||||
type='mmrazor.sub_model',
|
||||
fix_subnet=fix_subnet,
|
||||
cfg=_base_.nas_backbone,
|
||||
extra_prefix='backbone.')
|
||||
|
||||
_base_.model.backbone = nas_backbone
|
||||
_base_.model.neck.widen_factor = widen_factor
|
||||
_base_.model.neck.deepen_factor = deepen_factor
|
||||
_base_.model.neck.in_channels = channels
|
||||
_base_.model.neck.out_channels = channels[0]
|
||||
_base_.model.bbox_head.head_module.in_channels = channels[0]
|
||||
_base_.model.bbox_head.head_module.feat_channels = channels[0]
|
||||
_base_.model.bbox_head.head_module.widen_factor = widen_factor
|
||||
|
||||
_base_.model.test_cfg = dict(
|
||||
multi_label=True,
|
||||
nms_pre=1000,
|
||||
min_bbox_size=0,
|
||||
score_thr=0.05,
|
||||
nms=dict(type='nms', iou_threshold=0.6),
|
||||
max_per_img=100)
|
||||
|
||||
train_pipeline = [
|
||||
dict(type='LoadImageFromFile', file_client_args=_base_.file_client_args),
|
||||
dict(type='LoadAnnotations', with_bbox=True),
|
||||
dict(
|
||||
type='Mosaic',
|
||||
img_scale=img_scale,
|
||||
use_cached=True,
|
||||
max_cached_images=20,
|
||||
random_pop=False,
|
||||
pad_val=114.0),
|
||||
dict(
|
||||
type='mmdet.RandomResize',
|
||||
scale=(1280, 1280),
|
||||
ratio_range=(0.5, 2.0), # note
|
||||
resize_type='mmdet.Resize',
|
||||
keep_ratio=True),
|
||||
dict(type='mmdet.RandomCrop', crop_size=img_scale),
|
||||
dict(type='mmdet.YOLOXHSVRandomAug'),
|
||||
dict(type='mmdet.RandomFlip', prob=0.5),
|
||||
dict(type='mmdet.Pad', size=img_scale, pad_val=dict(img=(114, 114, 114))),
|
||||
dict(
|
||||
type='YOLOXMixUp',
|
||||
img_scale=(960, 960),
|
||||
ratio_range=(1.0, 1.0),
|
||||
max_cached_images=10,
|
||||
use_cached=True,
|
||||
random_pop=False,
|
||||
pad_val=(114, 114, 114),
|
||||
prob=0.5),
|
||||
dict(type='mmdet.PackDetInputs')
|
||||
]
|
||||
|
||||
train_pipeline_stage2 = [
|
||||
dict(type='LoadImageFromFile', file_client_args=_base_.file_client_args),
|
||||
dict(type='LoadAnnotations', with_bbox=True),
|
||||
dict(
|
||||
type='mmdet.RandomResize',
|
||||
scale=img_scale,
|
||||
ratio_range=(0.5, 2.0), # note
|
||||
resize_type='mmdet.Resize',
|
||||
keep_ratio=True),
|
||||
dict(type='mmdet.RandomCrop', crop_size=img_scale),
|
||||
dict(type='mmdet.YOLOXHSVRandomAug'),
|
||||
dict(type='mmdet.RandomFlip', prob=0.5),
|
||||
dict(type='mmdet.Pad', size=img_scale, pad_val=dict(img=(114, 114, 114))),
|
||||
dict(type='mmdet.PackDetInputs')
|
||||
]
|
||||
|
||||
train_dataloader = dict(
|
||||
batch_size=train_batch_size_per_gpu, dataset=dict(pipeline=train_pipeline))
|
||||
|
||||
test_pipeline = [
|
||||
dict(type='LoadImageFromFile', file_client_args=_base_.file_client_args),
|
||||
dict(type='mmdet.Resize', scale=(960, 960), keep_ratio=True),
|
||||
dict(type='mmdet.Pad', size=(960, 960), pad_val=dict(img=(114, 114, 114))),
|
||||
dict(type='LoadAnnotations', with_bbox=True, _scope_='mmdet'),
|
||||
dict(
|
||||
type='mmdet.PackDetInputs',
|
||||
meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
|
||||
'scale_factor'))
|
||||
]
|
||||
|
||||
val_dataloader = dict(
|
||||
dataset=dict(pipeline=test_pipeline, batch_shapes_cfg=None))
|
||||
|
||||
test_dataloader = val_dataloader
|
||||
|
||||
custom_hooks = [
|
||||
dict(
|
||||
type='EMAHook',
|
||||
ema_type='ExpMomentumEMA',
|
||||
momentum=0.0002,
|
||||
update_buffers=True,
|
||||
strict_load=False,
|
||||
priority=49),
|
||||
dict(
|
||||
type='mmdet.PipelineSwitchHook',
|
||||
switch_epoch=_base_.max_epochs - _base_.num_epochs_stage2,
|
||||
switch_pipeline=train_pipeline_stage2)
|
||||
]
|
||||
|
||||
find_unused_parameters = True
|
|
@ -0,0 +1,29 @@
|
|||
_base_ = [
|
||||
'mmrazor::_base_/nas_backbones/spos_shufflenet_supernet.py',
|
||||
'../../yolov5/yolov5_s-v61_syncbn_fast_8xb16-300e_coco.py'
|
||||
]
|
||||
|
||||
checkpoint_file = 'https://download.openmmlab.com/mmrazor/v1/spos/spos_shufflenetv2_subnet_8xb128_in1k_flops_0.33M_acc_73.87_20211222-1f0a0b4d_v3.pth' # noqa
|
||||
fix_subnet = 'https://download.openmmlab.com/mmrazor/v1/spos/spos_shufflenetv2_subnet_8xb128_in1k_flops_0.33M_acc_73.87_20211222-1f0a0b4d_subnet_cfg_v3.yaml' # noqa
|
||||
widen_factor = 1.0
|
||||
channels = [160, 320, 640]
|
||||
|
||||
_base_.nas_backbone.out_indices = (1, 2, 3)
|
||||
_base_.nas_backbone.init_cfg = dict(
|
||||
type='Pretrained',
|
||||
checkpoint=checkpoint_file,
|
||||
prefix='architecture.backbone.')
|
||||
nas_backbone = dict(
|
||||
type='mmrazor.sub_model',
|
||||
fix_subnet=fix_subnet,
|
||||
cfg=_base_.nas_backbone,
|
||||
extra_prefix='architecture.backbone.')
|
||||
|
||||
_base_.model.backbone = nas_backbone
|
||||
_base_.model.neck.widen_factor = widen_factor
|
||||
_base_.model.neck.in_channels = channels
|
||||
_base_.model.neck.out_channels = channels
|
||||
_base_.model.bbox_head.head_module.in_channels = channels
|
||||
_base_.model.bbox_head.head_module.widen_factor = widen_factor
|
||||
|
||||
find_unused_parameters = True
|
|
@ -0,0 +1,35 @@
|
|||
_base_ = [
|
||||
'mmrazor::_base_/nas_backbones/attentive_mobilenetv3_supernet.py',
|
||||
'../../yolov6/yolov6_l_syncbn_fast_8xb32-300e_coco.py'
|
||||
]
|
||||
|
||||
checkpoint_file = 'https://download.openmmlab.com/mmrazor/v1/bignas/attentive_mobilenet_subnet_8xb256_in1k_flops-0.93G_acc-80.81_20221229_200440-73d92cc6.pth' # noqa
|
||||
fix_subnet = 'https://download.openmmlab.com/mmrazor/v1/bignas/ATTENTIVE_SUBNET_A6.yaml' # noqa
|
||||
deepen_factor = 1.2
|
||||
widen_factor = 1
|
||||
channels = [40, 128, 224]
|
||||
mid_channels = [40, 128, 224]
|
||||
|
||||
_base_.train_dataloader.batch_size = 16
|
||||
_base_.nas_backbone.out_indices = (2, 4, 6)
|
||||
_base_.nas_backbone.conv_cfg = dict(type='mmrazor.BigNasConv2d')
|
||||
_base_.nas_backbone.norm_cfg = dict(type='mmrazor.DynamicBatchNorm2d')
|
||||
_base_.nas_backbone.init_cfg = dict(
|
||||
type='Pretrained',
|
||||
checkpoint=checkpoint_file,
|
||||
prefix='architecture.backbone.')
|
||||
nas_backbone = dict(
|
||||
type='mmrazor.sub_model',
|
||||
fix_subnet=fix_subnet,
|
||||
cfg=_base_.nas_backbone,
|
||||
extra_prefix='backbone.')
|
||||
|
||||
_base_.model.backbone = nas_backbone
|
||||
_base_.model.neck.widen_factor = widen_factor
|
||||
_base_.model.neck.deepen_factor = deepen_factor
|
||||
_base_.model.neck.in_channels = channels
|
||||
_base_.model.neck.out_channels = mid_channels
|
||||
_base_.model.bbox_head.head_module.in_channels = mid_channels
|
||||
_base_.model.bbox_head.head_module.widen_factor = widen_factor
|
||||
|
||||
find_unused_parameters = True
|
|
@ -21,20 +21,53 @@ RTMDet-l model structure
|
|||
|
||||
## Results and Models
|
||||
|
||||
## Object Detection
|
||||
### Object Detection
|
||||
|
||||
| Model | size | box AP | Params(M) | FLOPS(G) | TRT-FP16-Latency(ms) | Config | Download |
|
||||
| :---------: | :--: | :----: | :-------: | :------: | :------------------: | :-------------------------------------------------: | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
|
||||
| RTMDet-tiny | 640 | 41.0 | 4.8 | 8.1 | 0.98 | [config](./rtmdet_l_syncbn_fast_8xb32-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_tiny_syncbn_fast_8xb32-300e_coco/rtmdet_tiny_syncbn_fast_8xb32-300e_coco_20230102_140117-dbb1dc83.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_tiny_syncbn_fast_8xb32-300e_coco/rtmdet_tiny_syncbn_fast_8xb32-300e_coco_20230102_140117.log.json) |
|
||||
| RTMDet-s | 640 | 44.6 | 8.89 | 14.8 | 1.22 | [config](./rtmdet_s_syncbn_fast_8xb32-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_s_syncbn_fast_8xb32-300e_coco/rtmdet_s_syncbn_fast_8xb32-300e_coco_20221230_182329-0a8c901a.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_s_syncbn_fast_8xb32-300e_coco/rtmdet_s_syncbn_fast_8xb32-300e_coco_20221230_182329.log.json) |
|
||||
| RTMDet-m | 640 | 49.3 | 24.71 | 39.27 | 1.62 | [config](./rtmdet_m_syncbn_fast_8xb32-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_m_syncbn_fast_8xb32-300e_coco/rtmdet_m_syncbn_fast_8xb32-300e_coco_20230102_135952-40af4fe8.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_m_syncbn_fast_8xb32-300e_coco/rtmdet_m_syncbn_fast_8xb32-300e_coco_20230102_135952.log.json) |
|
||||
| RTMDet-l | 640 | 51.4 | 52.3 | 80.23 | 2.44 | [config](./rtmdet_l_syncbn_fast_8xb32-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_l_syncbn_fast_8xb32-300e_coco/rtmdet_l_syncbn_fast_8xb32-300e_coco_20230102_135928-ee3abdc4.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_l_syncbn_fast_8xb32-300e_coco/rtmdet_l_syncbn_fast_8xb32-300e_coco_20230102_135928.log.json) |
|
||||
| RTMDet-x | 640 | 52.8 | 94.86 | 141.67 | 3.10 | [config](./rtmdet_x_syncbn_fast_8xb32-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_x_syncbn_fast_8xb32-300e_coco/rtmdet_x_syncbn_fast_8xb32-300e_coco_20221231_100345-b85cd476.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_x_syncbn_fast_8xb32-300e_coco/rtmdet_x_syncbn_fast_8xb32-300e_coco_20221231_100345.log.json) |
|
||||
| Model | size | Params(M) | FLOPs(G) | TRT-FP16-Latency(ms) | box AP | TTA box AP | Config | Download |
|
||||
| :------------: | :--: | :-------: | :------: | :------------------: | :---------: | :---------: | :---------------------------------------------------------: | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
|
||||
| RTMDet-tiny | 640 | 4.8 | 8.1 | 0.98 | 41.0 | 42.7 | [config](./rtmdet_tiny_syncbn_fast_8xb32-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_tiny_syncbn_fast_8xb32-300e_coco/rtmdet_tiny_syncbn_fast_8xb32-300e_coco_20230102_140117-dbb1dc83.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_tiny_syncbn_fast_8xb32-300e_coco/rtmdet_tiny_syncbn_fast_8xb32-300e_coco_20230102_140117.log.json) |
|
||||
| RTMDet-tiny \* | 640 | 4.8 | 8.1 | 0.98 | 41.8 (+0.8) | 43.2 (+0.5) | [config](./distillation/kd_tiny_rtmdet_s_neck_300e_coco.py) | [model](https://download.openmmlab.com/mmrazor/v1/rtmdet_distillation/kd_tiny_rtmdet_s_neck_300e_coco/kd_tiny_rtmdet_s_neck_300e_coco_20230213_104240-e1e4197c.pth) \| [log](https://download.openmmlab.com/mmrazor/v1/rtmdet_distillation/kd_tiny_rtmdet_s_neck_300e_coco/kd_tiny_rtmdet_s_neck_300e_coco_20230213_104240-176901d8.json) |
|
||||
| RTMDet-s | 640 | 8.89 | 14.8 | 1.22 | 44.6 | 45.8 | [config](./rtmdet_s_syncbn_fast_8xb32-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_s_syncbn_fast_8xb32-300e_coco/rtmdet_s_syncbn_fast_8xb32-300e_coco_20221230_182329-0a8c901a.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_s_syncbn_fast_8xb32-300e_coco/rtmdet_s_syncbn_fast_8xb32-300e_coco_20221230_182329.log.json) |
|
||||
| RTMDet-s \* | 640 | 8.89 | 14.8 | 1.22 | 45.7 (+1.1) | 47.3 (+1.5) | [config](./distillation/kd_s_rtmdet_m_neck_300e_coco.py) | [model](https://download.openmmlab.com/mmrazor/v1/rtmdet_distillation/kd_s_rtmdet_m_neck_300e_coco/kd_s_rtmdet_m_neck_300e_coco_20230220_140647-446ff003.pth) \| [log](https://download.openmmlab.com/mmrazor/v1/rtmdet_distillation/kd_s_rtmdet_m_neck_300e_coco/kd_s_rtmdet_m_neck_300e_coco_20230220_140647-89862269.json) |
|
||||
| RTMDet-m | 640 | 24.71 | 39.27 | 1.62 | 49.3 | 50.9 | [config](./rtmdet_m_syncbn_fast_8xb32-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_m_syncbn_fast_8xb32-300e_coco/rtmdet_m_syncbn_fast_8xb32-300e_coco_20230102_135952-40af4fe8.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_m_syncbn_fast_8xb32-300e_coco/rtmdet_m_syncbn_fast_8xb32-300e_coco_20230102_135952.log.json) |
|
||||
| RTMDet-m \* | 640 | 24.71 | 39.27 | 1.62 | 50.2 (+0.9) | 51.9 (+1.0) | [config](./distillation/kd_m_rtmdet_l_neck_300e_coco.py) | [model](https://download.openmmlab.com/mmrazor/v1/rtmdet_distillation/kd_m_rtmdet_l_neck_300e_coco/kd_m_rtmdet_l_neck_300e_coco_20230220_141313-b806f503.pth) \| [log](https://download.openmmlab.com/mmrazor/v1/rtmdet_distillation/kd_m_rtmdet_l_neck_300e_coco/kd_m_rtmdet_l_neck_300e_coco_20230220_141313-bd028fd3.json) |
|
||||
| RTMDet-l | 640 | 52.3 | 80.23 | 2.44 | 51.4 | 53.1 | [config](./rtmdet_l_syncbn_fast_8xb32-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_l_syncbn_fast_8xb32-300e_coco/rtmdet_l_syncbn_fast_8xb32-300e_coco_20230102_135928-ee3abdc4.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_l_syncbn_fast_8xb32-300e_coco/rtmdet_l_syncbn_fast_8xb32-300e_coco_20230102_135928.log.json) |
|
||||
| RTMDet-l \* | 640 | 52.3 | 80.23 | 2.44 | 52.3 (+0.9) | 53.7 (+0.6) | [config](./distillation/kd_l_rtmdet_x_neck_300e_coco.py) | [model](https://download.openmmlab.com/mmrazor/v1/rtmdet_distillation/kd_l_rtmdet_x_neck_300e_coco/kd_l_rtmdet_x_neck_300e_coco_20230220_141912-c9979722.pth) \| [log](https://download.openmmlab.com/mmrazor/v1/rtmdet_distillation/kd_l_rtmdet_x_neck_300e_coco/kd_l_rtmdet_x_neck_300e_coco_20230220_141912-c5c4e17b.json) |
|
||||
| RTMDet-x | 640 | 94.86 | 141.67 | 3.10 | 52.8 | 54.2 | [config](./rtmdet_x_syncbn_fast_8xb32-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_x_syncbn_fast_8xb32-300e_coco/rtmdet_x_syncbn_fast_8xb32-300e_coco_20221231_100345-b85cd476.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_x_syncbn_fast_8xb32-300e_coco/rtmdet_x_syncbn_fast_8xb32-300e_coco_20221231_100345.log.json) |
|
||||
|
||||
**Note**:
|
||||
|
||||
1. The inference speed of RTMDet is measured on an NVIDIA 3090 GPU with TensorRT 8.4.3, cuDNN 8.2.0, FP16, batch size=1, and without NMS.
|
||||
2. For a fair comparison, the config of bbox postprocessing is changed to be consistent with YOLOv5/6/7 after [PR#9494](https://github.com/open-mmlab/mmdetection/pull/9494), bringing about 0.1~0.3% AP improvement.
|
||||
3. `TTA` means that Test Time Augmentation. It's perform 3 multi-scaling transformations on the image, followed by 2 flipping transformations (flipping and not flipping). You only need to specify `--tta` when testing to enable. see [TTA](https://github.com/open-mmlab/mmyolo/blob/dev/docs/en/common_usage/tta.md) for details.
|
||||
4. \* means checkpoints are trained with knowledge distillation. More details can be found in [RTMDet distillation](./distillation).
|
||||
|
||||
### Rotated Object Detection
|
||||
|
||||
RTMDet-R achieves state-of-the-art on various remote sensing datasets.
|
||||
|
||||
| Backbone | pretrain | Epoch | Batch Size | Aug | mmAP | mAP50 | mAP75 | Mem (GB) | Params(M) | FLOPS(G) | TRT-FP16-Latency(ms) | Config | Download |
|
||||
| :---------: | :------: | :---: | :--------: | :-------------: | :---: | :---: | :---: | :------: | :-------: | :------: | :------------------: | :--------------------------------------------------------------------------: | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
|
||||
| RTMDet-tiny | IN | 36 | 1xb8 | RR | 46.94 | 75.07 | 50.11 | 12.7 | 4.88 | 20.45 | 4.40 | [config](./rotated/rtmdet-r_tiny_fast_1xb8-36e_dota.py) | [model](https://download.openmmlab.com/mmyolo/v0/rtmdet/rotated/rtmdet-r_tiny_fast_1xb8-36e_dota/rtmdet-r_tiny_fast_1xb8-36e_dota_20230228_162210-e8ccfb1c.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/rtmdet/rotated/rtmdet-r_tiny_fast_1xb8-36e_dota/rtmdet-r_tiny_fast_1xb8-36e_dota_20230228_162210.log.json) |
|
||||
| RTMDet-s | IN | 36 | 1xb8 | RR | 48.99 | 77.33 | 52.65 | 16.6 | 8.86 | 37.62 | 4.86 | [config](./rotated/rtmdet-r_s_fast_1xb8-36e_dota.py) | [model](https://download.openmmlab.com/mmyolo/v0/rtmdet/rotated/rtmdet-r_s_fast_1xb8-36e_dota/rtmdet-r_s_fast_1xb8-36e_dota_20230224_110307-3946a5aa.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/rtmdet/rotated/rtmdet-r_s_fast_1xb8-36e_dota/rtmdet-r_s_fast_1xb8-36e_dota_20230224_110307.log.json) |
|
||||
| RTMDet-m | IN | 36 | 2xb4 | RR | 50.38 | 78.43 | 54.28 | 10.9 | 24.67 | 99.76 | 7.82 | [config](./rotated/rtmdet-r_m_syncbn_fast_2xb4-36e_dota.py) | [model](https://download.openmmlab.com/mmyolo/v0/rtmdet/rotated/rtmdet-r_m_syncbn_fast_2xb4-36e_dota/rtmdet-r_m_syncbn_fast_2xb4-36e_dota_20230224_124237-29ae1619.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/rtmdet/rotated/rtmdet-r_m_syncbn_fast_2xb4-36e_dota/rtmdet-r_m_syncbn_fast_2xb4-36e_dota_20230224_124237.log.json) |
|
||||
| RTMDet-l | IN | 36 | 2xb4 | RR | 50.61 | 78.66 | 54.95 | 16.1 | 52.27 | 204.21 | 10.82 | [config](./rotated/rtmdet-r_l_syncbn_fast_2xb4-36e_dota.py) | [model](https://download.openmmlab.com/mmyolo/v0/rtmdet/rotated/rtmdet-r_l_syncbn_fast_2xb4-36e_dota/rtmdet-r_l_syncbn_fast_2xb4-36e_dota_20230224_124544-38bc5f08.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/rtmdet/rotated/rtmdet-r_l_syncbn_fast_2xb4-36e_dota/rtmdet-r_l_syncbn_fast_2xb4-36e_dota_20230224_124544.log.json) |
|
||||
| RTMDet-tiny | IN | 36 | 1xb8 | MS+RR | - | - | - | | 4.88 | 20.45 | 4.40 | [config](./rotated/rtmdet-r_tiny_fast_1xb8-36e_dota-ms.py) | \| |
|
||||
| RTMDet-s | IN | 36 | 1xb8 | MS+RR | - | - | - | | 8.86 | 37.62 | 4.86 | [config](./rotated/rtmdet-r_s_fast_1xb8-36e_dota-ms.py) | \| |
|
||||
| RTMDet-m | IN | 36 | 2xb4 | MS+RR | - | - | - | | 24.67 | 99.76 | 7.82 | [config](./rotated/rtmdet-r_m_syncbn_fast_2xb4-36e_dota-ms.py) | \| |
|
||||
| RTMDet-l | IN | 36 | 2xb4 | MS+RR | - | - | - | | 52.27 | 204.21 | 10.82 | [config](./rotated/rtmdet-r_l_syncbn_fast_2xb4-36e_dota-ms.py) | \| |
|
||||
| RTMDet-l | COCO | 36 | 2xb4 | MS+RR | - | - | - | | 52.27 | 204.21 | 10.82 | [config](./rotated/rtmdet-r_l_syncbn_fast_coco-pretrain_2xb4-36e_dota-ms.py) | \| |
|
||||
| RTMDet-l | IN | 100 | 2xb4 | Mixup+Mosaic+RR | 55.05 | 80.14 | 61.32 | 19.6 | 52.27 | 204.21 | 10.82 | [config](./rotated/rtmdet-r_l_syncbn_fast_2xb4-aug-100e_dota.py) | [model](https://download.openmmlab.com/mmyolo/v0/rtmdet/rotated/rtmdet-r_l_syncbn_fast_2xb4-aug-100e_dota/rtmdet-r_l_syncbn_fast_2xb4-aug-100e_dota_20230224_124735-ed4ea966.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/rtmdet/rotated/rtmdet-r_l_syncbn_fast_2xb4-aug-100e_dota/rtmdet-r_l_syncbn_fast_2xb4-aug-100e_dota_20230224_124735.log.json) |
|
||||
|
||||
**Note**:
|
||||
|
||||
1. Please follow doc to get start with rotated detection. [Rotated Object Detection](../../docs/zh_cn/tutorials/rotated_detection.md)
|
||||
2. We follow the latest metrics from the DOTA evaluation server, original voc format mAP is now mAP50.
|
||||
3. All models trained with image size 1024\*1024.
|
||||
4. `IN` means ImageNet pretrain, `COCO` means COCO pretrain.
|
||||
5. For Aug, RR means `RandomRotate`, MS means multi-scale augmentation in data prepare.
|
||||
6. The inference speed here is measured on an NVIDIA 2080Ti GPU with TensorRT 8.4.3, cuDNN 8.2.0, FP16, batch size=1, and with NMS.
|
||||
7. Currently, the training process of RTMDet-R tiny is unstable and may have 1% accuracy fluctuation, we will continue to investigate why.
|
||||
|
||||
## Citation
|
||||
|
||||
|
|
|
@ -0,0 +1,146 @@
|
|||
# Distill RTM Detectors Based on MMRazor
|
||||
|
||||
## Description
|
||||
|
||||
To further improve the model accuracy while not introducing much additional
|
||||
computation cost, we apply the feature-based distillation to the training phase
|
||||
of these RTM detectors. In summary, our distillation strategy are threefold:
|
||||
|
||||
(1) Inspired by [PKD](https://arxiv.org/abs/2207.02039), we first normalize
|
||||
the intermediate feature maps to have zero mean and unit variances before calculating
|
||||
the distillation loss.
|
||||
|
||||
(2) Inspired by [CWD](https://arxiv.org/abs/2011.13256), we adopt the channel-wise
|
||||
distillation paradigm, which can pay more attention to the most salient regions
|
||||
of each channel.
|
||||
|
||||
(3) Inspired by [DAMO-YOLO](https://arxiv.org/abs/2211.15444), the distillation
|
||||
process is split into two stages. 1) The teacher distills the student at the
|
||||
first stage (280 epochs) on strong mosaic domain. 2) The student finetunes itself
|
||||
on no masaic domain at the second stage (20 epochs).
|
||||
|
||||
## Results and Models
|
||||
|
||||
| Location | Dataset | Teacher | Student | mAP | mAP(T) | mAP(S) | Config | Download |
|
||||
| :------: | :-----: | :---------------------------------------------------------------------------------------------------------------: | :---------------------------------------------------------------------------------------------------------------------: | :---------: | :----: | :----: | :------------------------------------------: | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
|
||||
| FPN | COCO | [RTMDet-s](https://github.com/open-mmlab/mmyolo/blob/main/configs/rtmdet/rtmdet_s_syncbn_fast_8xb32-300e_coco.py) | [RTMDet-tiny](https://github.com/open-mmlab/mmyolo/blob/main/configs/rtmdet/rtmdet_tiny_syncbn_fast_8xb32-300e_coco.py) | 41.8 (+0.8) | 44.6 | 41.0 | [config](kd_tiny_rtmdet_s_neck_300e_coco.py) | [teacher](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_s_syncbn_fast_8xb32-300e_coco/rtmdet_s_syncbn_fast_8xb32-300e_coco_20221230_182329-0a8c901a.pth) \|[model](https://download.openmmlab.com/mmrazor/v1/rtmdet_distillation/kd_tiny_rtmdet_s_neck_300e_coco/kd_tiny_rtmdet_s_neck_300e_coco_20230213_104240-e1e4197c.pth) \| [log](https://download.openmmlab.com/mmrazor/v1/rtmdet_distillation/kd_tiny_rtmdet_s_neck_300e_coco/kd_tiny_rtmdet_s_neck_300e_coco_20230213_104240-176901d8.json) |
|
||||
| FPN | COCO | [RTMDet-m](https://github.com/open-mmlab/mmyolo/blob/main/configs/rtmdet/rtmdet_m_syncbn_fast_8xb32-300e_coco.py) | [RTMDet-s](https://github.com/open-mmlab/mmyolo/blob/main/configs/rtmdet/rtmdet_s_syncbn_fast_8xb32-300e_coco.py) | 45.7 (+1.1) | 49.3 | 44.6 | [config](kd_s_rtmdet_m_neck_300e_coco.py) | [teacher](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_m_syncbn_fast_8xb32-300e_coco/rtmdet_m_syncbn_fast_8xb32-300e_coco_20230102_135952-40af4fe8.pth) \|[model](https://download.openmmlab.com/mmrazor/v1/rtmdet_distillation/kd_s_rtmdet_m_neck_300e_coco/kd_s_rtmdet_m_neck_300e_coco_20230220_140647-446ff003.pth) \| [log](https://download.openmmlab.com/mmrazor/v1/rtmdet_distillation/kd_s_rtmdet_m_neck_300e_coco/kd_s_rtmdet_m_neck_300e_coco_20230220_140647-89862269.json) |
|
||||
| FPN | COCO | [RTMDet-l](https://github.com/open-mmlab/mmyolo/blob/main/configs/rtmdet/rtmdet_l_syncbn_fast_8xb32-300e_coco.py) | [RTMDet-m](https://github.com/open-mmlab/mmyolo/blob/main/configs/rtmdet/rtmdet_m_syncbn_fast_8xb32-300e_coco.py) | 50.2 (+0.9) | 51.4 | 49.3 | [config](kd_m_rtmdet_l_neck_300e_coco.py) | [teacher](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_l_syncbn_fast_8xb32-300e_coco/rtmdet_l_syncbn_fast_8xb32-300e_coco_20230102_135928-ee3abdc4.pth) \|[model](https://download.openmmlab.com/mmrazor/v1/rtmdet_distillation/kd_m_rtmdet_l_neck_300e_coco/kd_m_rtmdet_l_neck_300e_coco_20230220_141313-b806f503.pth) \| [log](https://download.openmmlab.com/mmrazor/v1/rtmdet_distillation/kd_m_rtmdet_l_neck_300e_coco/kd_m_rtmdet_l_neck_300e_coco_20230220_141313-bd028fd3.json) |
|
||||
| FPN | COCO | [RTMDet-x](https://github.com/open-mmlab/mmyolo/blob/main/configs/rtmdet/rtmdet_x_syncbn_fast_8xb32-300e_coco.py) | [RTMDet-l](https://github.com/open-mmlab/mmyolo/blob/main/configs/rtmdet/rtmdet_l_syncbn_fast_8xb32-300e_coco.py) | 52.3 (+0.9) | 52.8 | 51.4 | [config](kd_l_rtmdet_x_neck_300e_coco.py) | [teacher](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_x_syncbn_fast_8xb32-300e_coco/rtmdet_x_syncbn_fast_8xb32-300e_coco_20221231_100345-b85cd476.pth) \|[model](https://download.openmmlab.com/mmrazor/v1/rtmdet_distillation/kd_l_rtmdet_x_neck_300e_coco/kd_l_rtmdet_x_neck_300e_coco_20230220_141912-c9979722.pth) \| [log](https://download.openmmlab.com/mmrazor/v1/rtmdet_distillation/kd_l_rtmdet_x_neck_300e_coco/kd_l_rtmdet_x_neck_300e_coco_20230220_141912-c5c4e17b.json) |
|
||||
|
||||
## Usage
|
||||
|
||||
### Prerequisites
|
||||
|
||||
- [MMRazor dev-1.x](https://github.com/open-mmlab/mmrazor/tree/dev-1.x)
|
||||
|
||||
Install MMRazor from source
|
||||
|
||||
```
|
||||
git clone -b dev-1.x https://github.com/open-mmlab/mmrazor.git
|
||||
cd mmrazor
|
||||
# Install MMRazor
|
||||
mim install -v -e .
|
||||
```
|
||||
|
||||
### Training commands
|
||||
|
||||
In MMYOLO's root directory, run the following command to train the RTMDet-tiny
|
||||
with 8 GPUs, using RTMDet-s as the teacher:
|
||||
|
||||
```bash
|
||||
CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 PORT=29500 ./tools/dist_train.sh configs/rtmdet/distillation/kd_tiny_rtmdet_s_neck_300e_coco.py
|
||||
```
|
||||
|
||||
### Testing commands
|
||||
|
||||
In MMYOLO's root directory, run the following command to test the model:
|
||||
|
||||
```bash
|
||||
CUDA_VISIBLE_DEVICES=0 PORT=29500 ./tools/dist_test.sh configs/rtmdet/distillation/kd_tiny_rtmdet_s_neck_300e_coco.py ${CHECKPOINT_PATH}
|
||||
```
|
||||
|
||||
### Getting student-only checkpoint
|
||||
|
||||
After training, the checkpoint contains parameters for both student and teacher models.
|
||||
Run the following command to convert it to student-only checkpoint:
|
||||
|
||||
```bash
|
||||
python ./tools/model_converters/convert_kd_ckpt_to_student.py ${CHECKPOINT_PATH} --out-path ${OUTPUT_CHECKPOINT_PATH}
|
||||
```
|
||||
|
||||
## Configs
|
||||
|
||||
Here we provide detection configs and models for MMRazor in MMYOLO. For clarify,
|
||||
we take `./kd_tiny_rtmdet_s_neck_300e_coco.py` as an example to show how to
|
||||
distill a RTM detector based on MMRazor.
|
||||
|
||||
Here is the main part of `./kd_tiny_rtmdet_s_neck_300e_coco.py`.
|
||||
|
||||
```shell
|
||||
norm_cfg = dict(type='BN', affine=False, track_running_stats=False)
|
||||
|
||||
distiller=dict(
|
||||
type='ConfigurableDistiller',
|
||||
student_recorders=dict(
|
||||
fpn0=dict(type='ModuleOutputs', source='neck.out_layers.0.conv'),
|
||||
fpn1=dict(type='ModuleOutputs', source='neck.out_layers.1.conv'),
|
||||
fpn2=dict(type='ModuleOutputs', source='neck.out_layers.2.conv'),
|
||||
),
|
||||
teacher_recorders=dict(
|
||||
fpn0=dict(type='ModuleOutputs', source='neck.out_layers.0.conv'),
|
||||
fpn1=dict(type='ModuleOutputs', source='neck.out_layers.1.conv'),
|
||||
fpn2=dict(type='ModuleOutputs', source='neck.out_layers.2.conv')),
|
||||
connectors=dict(
|
||||
fpn0_s=dict(type='ConvModuleConnector', in_channel=96,
|
||||
out_channel=128, bias=False, norm_cfg=norm_cfg,
|
||||
act_cfg=None),
|
||||
fpn0_t=dict(
|
||||
type='NormConnector', in_channels=128, norm_cfg=norm_cfg),
|
||||
fpn1_s=dict(
|
||||
type='ConvModuleConnector', in_channel=96,
|
||||
out_channel=128, bias=False, norm_cfg=norm_cfg,
|
||||
act_cfg=None),
|
||||
fpn1_t=dict(
|
||||
type='NormConnector', in_channels=128, norm_cfg=norm_cfg),
|
||||
fpn2_s=dict(
|
||||
type='ConvModuleConnector', in_channel=96,
|
||||
out_channel=128, bias=False, norm_cfg=norm_cfg,
|
||||
act_cfg=None),
|
||||
fpn2_t=dict(
|
||||
type='NormConnector', in_channels=128, norm_cfg=norm_cfg)),
|
||||
distill_losses=dict(
|
||||
loss_fpn0=dict(type='ChannelWiseDivergence', loss_weight=1),
|
||||
loss_fpn1=dict(type='ChannelWiseDivergence', loss_weight=1),
|
||||
loss_fpn2=dict(type='ChannelWiseDivergence', loss_weight=1)),
|
||||
loss_forward_mappings=dict(
|
||||
loss_fpn0=dict(
|
||||
preds_S=dict(from_student=True, recorder='fpn0', connector='fpn0_s'),
|
||||
preds_T=dict(from_student=False, recorder='fpn0', connector='fpn0_t')),
|
||||
loss_fpn1=dict(
|
||||
preds_S=dict(from_student=True, recorder='fpn1', connector='fpn1_s'),
|
||||
preds_T=dict(from_student=False, recorder='fpn1', connector='fpn1_t')),
|
||||
loss_fpn2=dict(
|
||||
preds_S=dict(from_student=True, recorder='fpn2', connector='fpn2_s'),
|
||||
preds_T=dict(from_student=False, recorder='fpn2', connector='fpn2_t'))))
|
||||
|
||||
```
|
||||
|
||||
`recorders` are used to record various intermediate results during the model forward.
|
||||
In this example, they can help record the output of 3 `nn.Module` of the teacher
|
||||
and the student. Details are list in [Recorder](https://github.com/open-mmlab/mmrazor/blob/dev-1.x/docs/en/advanced_guides/recorder.md) and [MMRazor Distillation](https://zhuanlan.zhihu.com/p/596582609) (if users can read Chinese).
|
||||
|
||||
`connectors` are adaptive layers which usually map teacher's and students features
|
||||
to the same dimension.
|
||||
|
||||
`distill_losses` are configs for multiple distill losses.
|
||||
|
||||
`loss_forward_mappings` are mappings between distill loss forward arguments and records.
|
||||
|
||||
In addition, the student finetunes itself on no masaic domain at the last 20 epochs,
|
||||
so we add a new hook named `StopDistillHook` to stop distillation on time.
|
||||
We need to add this hook to the `custom_hooks` list like this:
|
||||
|
||||
```shell
|
||||
custom_hooks = [..., dict(type='mmrazor.StopDistillHook', detach_epoch=280)]
|
||||
```
|
|
@ -0,0 +1,99 @@
|
|||
_base_ = '../rtmdet_l_syncbn_fast_8xb32-300e_coco.py'
|
||||
|
||||
teacher_ckpt = 'https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_x_syncbn_fast_8xb32-300e_coco/rtmdet_x_syncbn_fast_8xb32-300e_coco_20221231_100345-b85cd476.pth' # noqa: E501
|
||||
|
||||
norm_cfg = dict(type='BN', affine=False, track_running_stats=False)
|
||||
|
||||
model = dict(
|
||||
_delete_=True,
|
||||
_scope_='mmrazor',
|
||||
type='FpnTeacherDistill',
|
||||
architecture=dict(
|
||||
cfg_path='mmyolo::rtmdet/rtmdet_l_syncbn_fast_8xb32-300e_coco.py'),
|
||||
teacher=dict(
|
||||
cfg_path='mmyolo::rtmdet/rtmdet_x_syncbn_fast_8xb32-300e_coco.py'),
|
||||
teacher_ckpt=teacher_ckpt,
|
||||
distiller=dict(
|
||||
type='ConfigurableDistiller',
|
||||
# `recorders` are used to record various intermediate results during
|
||||
# the model forward.
|
||||
student_recorders=dict(
|
||||
fpn0=dict(type='ModuleOutputs', source='neck.out_layers.0.conv'),
|
||||
fpn1=dict(type='ModuleOutputs', source='neck.out_layers.1.conv'),
|
||||
fpn2=dict(type='ModuleOutputs', source='neck.out_layers.2.conv'),
|
||||
),
|
||||
teacher_recorders=dict(
|
||||
fpn0=dict(type='ModuleOutputs', source='neck.out_layers.0.conv'),
|
||||
fpn1=dict(type='ModuleOutputs', source='neck.out_layers.1.conv'),
|
||||
fpn2=dict(type='ModuleOutputs', source='neck.out_layers.2.conv')),
|
||||
# `connectors` are adaptive layers which usually map teacher's and
|
||||
# students features to the same dimension.
|
||||
connectors=dict(
|
||||
fpn0_s=dict(
|
||||
type='ConvModuleConnector',
|
||||
in_channel=256,
|
||||
out_channel=320,
|
||||
bias=False,
|
||||
norm_cfg=norm_cfg,
|
||||
act_cfg=None),
|
||||
fpn0_t=dict(
|
||||
type='NormConnector', in_channels=320, norm_cfg=norm_cfg),
|
||||
fpn1_s=dict(
|
||||
type='ConvModuleConnector',
|
||||
in_channel=256,
|
||||
out_channel=320,
|
||||
bias=False,
|
||||
norm_cfg=norm_cfg,
|
||||
act_cfg=None),
|
||||
fpn1_t=dict(
|
||||
type='NormConnector', in_channels=320, norm_cfg=norm_cfg),
|
||||
fpn2_s=dict(
|
||||
type='ConvModuleConnector',
|
||||
in_channel=256,
|
||||
out_channel=320,
|
||||
bias=False,
|
||||
norm_cfg=norm_cfg,
|
||||
act_cfg=None),
|
||||
fpn2_t=dict(
|
||||
type='NormConnector', in_channels=320, norm_cfg=norm_cfg)),
|
||||
distill_losses=dict(
|
||||
loss_fpn0=dict(type='ChannelWiseDivergence', loss_weight=1),
|
||||
loss_fpn1=dict(type='ChannelWiseDivergence', loss_weight=1),
|
||||
loss_fpn2=dict(type='ChannelWiseDivergence', loss_weight=1)),
|
||||
# `loss_forward_mappings` are mappings between distill loss forward
|
||||
# arguments and records.
|
||||
loss_forward_mappings=dict(
|
||||
loss_fpn0=dict(
|
||||
preds_S=dict(
|
||||
from_student=True, recorder='fpn0', connector='fpn0_s'),
|
||||
preds_T=dict(
|
||||
from_student=False, recorder='fpn0', connector='fpn0_t')),
|
||||
loss_fpn1=dict(
|
||||
preds_S=dict(
|
||||
from_student=True, recorder='fpn1', connector='fpn1_s'),
|
||||
preds_T=dict(
|
||||
from_student=False, recorder='fpn1', connector='fpn1_t')),
|
||||
loss_fpn2=dict(
|
||||
preds_S=dict(
|
||||
from_student=True, recorder='fpn2', connector='fpn2_s'),
|
||||
preds_T=dict(
|
||||
from_student=False, recorder='fpn2',
|
||||
connector='fpn2_t')))))
|
||||
|
||||
find_unused_parameters = True
|
||||
|
||||
custom_hooks = [
|
||||
dict(
|
||||
type='EMAHook',
|
||||
ema_type='ExpMomentumEMA',
|
||||
momentum=0.0002,
|
||||
update_buffers=True,
|
||||
strict_load=False,
|
||||
priority=49),
|
||||
dict(
|
||||
type='mmdet.PipelineSwitchHook',
|
||||
switch_epoch=_base_.max_epochs - _base_.num_epochs_stage2,
|
||||
switch_pipeline=_base_.train_pipeline_stage2),
|
||||
# stop distillation after the 280th epoch
|
||||
dict(type='mmrazor.StopDistillHook', stop_epoch=280)
|
||||
]
|
|
@ -0,0 +1,99 @@
|
|||
_base_ = '../rtmdet_m_syncbn_fast_8xb32-300e_coco.py'
|
||||
|
||||
teacher_ckpt = 'https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_l_syncbn_fast_8xb32-300e_coco/rtmdet_l_syncbn_fast_8xb32-300e_coco_20230102_135928-ee3abdc4.pth' # noqa: E501
|
||||
|
||||
norm_cfg = dict(type='BN', affine=False, track_running_stats=False)
|
||||
|
||||
model = dict(
|
||||
_delete_=True,
|
||||
_scope_='mmrazor',
|
||||
type='FpnTeacherDistill',
|
||||
architecture=dict(
|
||||
cfg_path='mmyolo::rtmdet/rtmdet_m_syncbn_fast_8xb32-300e_coco.py'),
|
||||
teacher=dict(
|
||||
cfg_path='mmyolo::rtmdet/rtmdet_l_syncbn_fast_8xb32-300e_coco.py'),
|
||||
teacher_ckpt=teacher_ckpt,
|
||||
distiller=dict(
|
||||
type='ConfigurableDistiller',
|
||||
# `recorders` are used to record various intermediate results during
|
||||
# the model forward.
|
||||
student_recorders=dict(
|
||||
fpn0=dict(type='ModuleOutputs', source='neck.out_layers.0.conv'),
|
||||
fpn1=dict(type='ModuleOutputs', source='neck.out_layers.1.conv'),
|
||||
fpn2=dict(type='ModuleOutputs', source='neck.out_layers.2.conv'),
|
||||
),
|
||||
teacher_recorders=dict(
|
||||
fpn0=dict(type='ModuleOutputs', source='neck.out_layers.0.conv'),
|
||||
fpn1=dict(type='ModuleOutputs', source='neck.out_layers.1.conv'),
|
||||
fpn2=dict(type='ModuleOutputs', source='neck.out_layers.2.conv')),
|
||||
# `connectors` are adaptive layers which usually map teacher's and
|
||||
# students features to the same dimension.
|
||||
connectors=dict(
|
||||
fpn0_s=dict(
|
||||
type='ConvModuleConnector',
|
||||
in_channel=192,
|
||||
out_channel=256,
|
||||
bias=False,
|
||||
norm_cfg=norm_cfg,
|
||||
act_cfg=None),
|
||||
fpn0_t=dict(
|
||||
type='NormConnector', in_channels=256, norm_cfg=norm_cfg),
|
||||
fpn1_s=dict(
|
||||
type='ConvModuleConnector',
|
||||
in_channel=192,
|
||||
out_channel=256,
|
||||
bias=False,
|
||||
norm_cfg=norm_cfg,
|
||||
act_cfg=None),
|
||||
fpn1_t=dict(
|
||||
type='NormConnector', in_channels=256, norm_cfg=norm_cfg),
|
||||
fpn2_s=dict(
|
||||
type='ConvModuleConnector',
|
||||
in_channel=192,
|
||||
out_channel=256,
|
||||
bias=False,
|
||||
norm_cfg=norm_cfg,
|
||||
act_cfg=None),
|
||||
fpn2_t=dict(
|
||||
type='NormConnector', in_channels=256, norm_cfg=norm_cfg)),
|
||||
distill_losses=dict(
|
||||
loss_fpn0=dict(type='ChannelWiseDivergence', loss_weight=1),
|
||||
loss_fpn1=dict(type='ChannelWiseDivergence', loss_weight=1),
|
||||
loss_fpn2=dict(type='ChannelWiseDivergence', loss_weight=1)),
|
||||
# `loss_forward_mappings` are mappings between distill loss forward
|
||||
# arguments and records.
|
||||
loss_forward_mappings=dict(
|
||||
loss_fpn0=dict(
|
||||
preds_S=dict(
|
||||
from_student=True, recorder='fpn0', connector='fpn0_s'),
|
||||
preds_T=dict(
|
||||
from_student=False, recorder='fpn0', connector='fpn0_t')),
|
||||
loss_fpn1=dict(
|
||||
preds_S=dict(
|
||||
from_student=True, recorder='fpn1', connector='fpn1_s'),
|
||||
preds_T=dict(
|
||||
from_student=False, recorder='fpn1', connector='fpn1_t')),
|
||||
loss_fpn2=dict(
|
||||
preds_S=dict(
|
||||
from_student=True, recorder='fpn2', connector='fpn2_s'),
|
||||
preds_T=dict(
|
||||
from_student=False, recorder='fpn2',
|
||||
connector='fpn2_t')))))
|
||||
|
||||
find_unused_parameters = True
|
||||
|
||||
custom_hooks = [
|
||||
dict(
|
||||
type='EMAHook',
|
||||
ema_type='ExpMomentumEMA',
|
||||
momentum=0.0002,
|
||||
update_buffers=True,
|
||||
strict_load=False,
|
||||
priority=49),
|
||||
dict(
|
||||
type='mmdet.PipelineSwitchHook',
|
||||
switch_epoch=_base_.max_epochs - _base_.num_epochs_stage2,
|
||||
switch_pipeline=_base_.train_pipeline_stage2),
|
||||
# stop distillation after the 280th epoch
|
||||
dict(type='mmrazor.StopDistillHook', stop_epoch=280)
|
||||
]
|
|
@ -0,0 +1,99 @@
|
|||
_base_ = '../rtmdet_s_syncbn_fast_8xb32-300e_coco.py'
|
||||
|
||||
teacher_ckpt = 'https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_m_syncbn_fast_8xb32-300e_coco/rtmdet_m_syncbn_fast_8xb32-300e_coco_20230102_135952-40af4fe8.pth' # noqa: E501
|
||||
|
||||
norm_cfg = dict(type='BN', affine=False, track_running_stats=False)
|
||||
|
||||
model = dict(
|
||||
_delete_=True,
|
||||
_scope_='mmrazor',
|
||||
type='FpnTeacherDistill',
|
||||
architecture=dict(
|
||||
cfg_path='mmyolo::rtmdet/rtmdet_s_syncbn_fast_8xb32-300e_coco.py'),
|
||||
teacher=dict(
|
||||
cfg_path='mmyolo::rtmdet/rtmdet_m_syncbn_fast_8xb32-300e_coco.py'),
|
||||
teacher_ckpt=teacher_ckpt,
|
||||
distiller=dict(
|
||||
type='ConfigurableDistiller',
|
||||
# `recorders` are used to record various intermediate results during
|
||||
# the model forward.
|
||||
student_recorders=dict(
|
||||
fpn0=dict(type='ModuleOutputs', source='neck.out_layers.0.conv'),
|
||||
fpn1=dict(type='ModuleOutputs', source='neck.out_layers.1.conv'),
|
||||
fpn2=dict(type='ModuleOutputs', source='neck.out_layers.2.conv'),
|
||||
),
|
||||
teacher_recorders=dict(
|
||||
fpn0=dict(type='ModuleOutputs', source='neck.out_layers.0.conv'),
|
||||
fpn1=dict(type='ModuleOutputs', source='neck.out_layers.1.conv'),
|
||||
fpn2=dict(type='ModuleOutputs', source='neck.out_layers.2.conv')),
|
||||
# `connectors` are adaptive layers which usually map teacher's and
|
||||
# students features to the same dimension.
|
||||
connectors=dict(
|
||||
fpn0_s=dict(
|
||||
type='ConvModuleConnector',
|
||||
in_channel=128,
|
||||
out_channel=192,
|
||||
bias=False,
|
||||
norm_cfg=norm_cfg,
|
||||
act_cfg=None),
|
||||
fpn0_t=dict(
|
||||
type='NormConnector', in_channels=192, norm_cfg=norm_cfg),
|
||||
fpn1_s=dict(
|
||||
type='ConvModuleConnector',
|
||||
in_channel=128,
|
||||
out_channel=192,
|
||||
bias=False,
|
||||
norm_cfg=norm_cfg,
|
||||
act_cfg=None),
|
||||
fpn1_t=dict(
|
||||
type='NormConnector', in_channels=192, norm_cfg=norm_cfg),
|
||||
fpn2_s=dict(
|
||||
type='ConvModuleConnector',
|
||||
in_channel=128,
|
||||
out_channel=192,
|
||||
bias=False,
|
||||
norm_cfg=norm_cfg,
|
||||
act_cfg=None),
|
||||
fpn2_t=dict(
|
||||
type='NormConnector', in_channels=192, norm_cfg=norm_cfg)),
|
||||
distill_losses=dict(
|
||||
loss_fpn0=dict(type='ChannelWiseDivergence', loss_weight=1),
|
||||
loss_fpn1=dict(type='ChannelWiseDivergence', loss_weight=1),
|
||||
loss_fpn2=dict(type='ChannelWiseDivergence', loss_weight=1)),
|
||||
# `loss_forward_mappings` are mappings between distill loss forward
|
||||
# arguments and records.
|
||||
loss_forward_mappings=dict(
|
||||
loss_fpn0=dict(
|
||||
preds_S=dict(
|
||||
from_student=True, recorder='fpn0', connector='fpn0_s'),
|
||||
preds_T=dict(
|
||||
from_student=False, recorder='fpn0', connector='fpn0_t')),
|
||||
loss_fpn1=dict(
|
||||
preds_S=dict(
|
||||
from_student=True, recorder='fpn1', connector='fpn1_s'),
|
||||
preds_T=dict(
|
||||
from_student=False, recorder='fpn1', connector='fpn1_t')),
|
||||
loss_fpn2=dict(
|
||||
preds_S=dict(
|
||||
from_student=True, recorder='fpn2', connector='fpn2_s'),
|
||||
preds_T=dict(
|
||||
from_student=False, recorder='fpn2',
|
||||
connector='fpn2_t')))))
|
||||
|
||||
find_unused_parameters = True
|
||||
|
||||
custom_hooks = [
|
||||
dict(
|
||||
type='EMAHook',
|
||||
ema_type='ExpMomentumEMA',
|
||||
momentum=0.0002,
|
||||
update_buffers=True,
|
||||
strict_load=False,
|
||||
priority=49),
|
||||
dict(
|
||||
type='mmdet.PipelineSwitchHook',
|
||||
switch_epoch=_base_.max_epochs - _base_.num_epochs_stage2,
|
||||
switch_pipeline=_base_.train_pipeline_stage2),
|
||||
# stop distillation after the 280th epoch
|
||||
dict(type='mmrazor.StopDistillHook', stop_epoch=280)
|
||||
]
|
|
@ -0,0 +1,99 @@
|
|||
_base_ = '../rtmdet_tiny_syncbn_fast_8xb32-300e_coco.py'
|
||||
|
||||
teacher_ckpt = 'https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_s_syncbn_fast_8xb32-300e_coco/rtmdet_s_syncbn_fast_8xb32-300e_coco_20221230_182329-0a8c901a.pth' # noqa: E501
|
||||
|
||||
norm_cfg = dict(type='BN', affine=False, track_running_stats=False)
|
||||
|
||||
model = dict(
|
||||
_delete_=True,
|
||||
_scope_='mmrazor',
|
||||
type='FpnTeacherDistill',
|
||||
architecture=dict(
|
||||
cfg_path='mmyolo::rtmdet/rtmdet_tiny_syncbn_fast_8xb32-300e_coco.py'),
|
||||
teacher=dict(
|
||||
cfg_path='mmyolo::rtmdet/rtmdet_s_syncbn_fast_8xb32-300e_coco.py'),
|
||||
teacher_ckpt=teacher_ckpt,
|
||||
distiller=dict(
|
||||
type='ConfigurableDistiller',
|
||||
# `recorders` are used to record various intermediate results during
|
||||
# the model forward.
|
||||
student_recorders=dict(
|
||||
fpn0=dict(type='ModuleOutputs', source='neck.out_layers.0.conv'),
|
||||
fpn1=dict(type='ModuleOutputs', source='neck.out_layers.1.conv'),
|
||||
fpn2=dict(type='ModuleOutputs', source='neck.out_layers.2.conv'),
|
||||
),
|
||||
teacher_recorders=dict(
|
||||
fpn0=dict(type='ModuleOutputs', source='neck.out_layers.0.conv'),
|
||||
fpn1=dict(type='ModuleOutputs', source='neck.out_layers.1.conv'),
|
||||
fpn2=dict(type='ModuleOutputs', source='neck.out_layers.2.conv')),
|
||||
# `connectors` are adaptive layers which usually map teacher's and
|
||||
# students features to the same dimension.
|
||||
connectors=dict(
|
||||
fpn0_s=dict(
|
||||
type='ConvModuleConnector',
|
||||
in_channel=96,
|
||||
out_channel=128,
|
||||
bias=False,
|
||||
norm_cfg=norm_cfg,
|
||||
act_cfg=None),
|
||||
fpn0_t=dict(
|
||||
type='NormConnector', in_channels=128, norm_cfg=norm_cfg),
|
||||
fpn1_s=dict(
|
||||
type='ConvModuleConnector',
|
||||
in_channel=96,
|
||||
out_channel=128,
|
||||
bias=False,
|
||||
norm_cfg=norm_cfg,
|
||||
act_cfg=None),
|
||||
fpn1_t=dict(
|
||||
type='NormConnector', in_channels=128, norm_cfg=norm_cfg),
|
||||
fpn2_s=dict(
|
||||
type='ConvModuleConnector',
|
||||
in_channel=96,
|
||||
out_channel=128,
|
||||
bias=False,
|
||||
norm_cfg=norm_cfg,
|
||||
act_cfg=None),
|
||||
fpn2_t=dict(
|
||||
type='NormConnector', in_channels=128, norm_cfg=norm_cfg)),
|
||||
distill_losses=dict(
|
||||
loss_fpn0=dict(type='ChannelWiseDivergence', loss_weight=1),
|
||||
loss_fpn1=dict(type='ChannelWiseDivergence', loss_weight=1),
|
||||
loss_fpn2=dict(type='ChannelWiseDivergence', loss_weight=1)),
|
||||
# `loss_forward_mappings` are mappings between distill loss forward
|
||||
# arguments and records.
|
||||
loss_forward_mappings=dict(
|
||||
loss_fpn0=dict(
|
||||
preds_S=dict(
|
||||
from_student=True, recorder='fpn0', connector='fpn0_s'),
|
||||
preds_T=dict(
|
||||
from_student=False, recorder='fpn0', connector='fpn0_t')),
|
||||
loss_fpn1=dict(
|
||||
preds_S=dict(
|
||||
from_student=True, recorder='fpn1', connector='fpn1_s'),
|
||||
preds_T=dict(
|
||||
from_student=False, recorder='fpn1', connector='fpn1_t')),
|
||||
loss_fpn2=dict(
|
||||
preds_S=dict(
|
||||
from_student=True, recorder='fpn2', connector='fpn2_s'),
|
||||
preds_T=dict(
|
||||
from_student=False, recorder='fpn2',
|
||||
connector='fpn2_t')))))
|
||||
|
||||
find_unused_parameters = True
|
||||
|
||||
custom_hooks = [
|
||||
dict(
|
||||
type='EMAHook',
|
||||
ema_type='ExpMomentumEMA',
|
||||
momentum=0.0002,
|
||||
update_buffers=True,
|
||||
strict_load=False,
|
||||
priority=49),
|
||||
dict(
|
||||
type='mmdet.PipelineSwitchHook',
|
||||
switch_epoch=_base_.max_epochs - _base_.num_epochs_stage2,
|
||||
switch_pipeline=_base_.train_pipeline_stage2),
|
||||
# stop distillation after the 280th epoch
|
||||
dict(type='mmrazor.StopDistillHook', stop_epoch=280)
|
||||
]
|
|
@ -13,6 +13,20 @@ Collections:
|
|||
Code:
|
||||
URL: https://github.com/open-mmlab/mmyolo/blob/main/mmyolo/models/detectors/yolo_detector.py#L12
|
||||
Version: v0.1.1
|
||||
- Name: Rotated_RTMDet
|
||||
Metadata:
|
||||
Training Data: DOTAv1.0
|
||||
Training Techniques:
|
||||
- AdamW
|
||||
- Flat Cosine Annealing
|
||||
Training Resources: 1x A100 GPUs
|
||||
Architecture:
|
||||
- CSPNeXt
|
||||
- CSPNeXtPAFPN
|
||||
README: configs/rtmdet/README.md
|
||||
Code:
|
||||
URL: https://github.com/open-mmlab/mmyolo/blob/main/mmyolo/models/detectors/yolo_detector.py#L12
|
||||
Version: v0.1.1
|
||||
|
||||
Models:
|
||||
- Name: rtmdet_tiny_syncbn_fast_8xb32-300e_coco
|
||||
|
@ -28,6 +42,19 @@ Models:
|
|||
box AP: 41.0
|
||||
Weights: https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_tiny_syncbn_fast_8xb32-300e_coco/rtmdet_tiny_syncbn_fast_8xb32-300e_coco_20230102_140117-dbb1dc83.pth
|
||||
|
||||
- Name: kd_tiny_rtmdet_s_neck_300e_coco
|
||||
In Collection: RTMDet
|
||||
Config: configs/rtmdet/distillation/kd_tiny_rtmdet_s_neck_300e_coco.py
|
||||
Metadata:
|
||||
Training Memory (GB): 11.9
|
||||
Epochs: 300
|
||||
Results:
|
||||
- Task: Object Detection
|
||||
Dataset: COCO
|
||||
Metrics:
|
||||
box AP: 41.8
|
||||
Weights: https://download.openmmlab.com/mmrazor/v1/rtmdet_distillation/kd_tiny_rtmdet_s_neck_300e_coco/kd_tiny_rtmdet_s_neck_300e_coco_20230213_104240-e1e4197c.pth
|
||||
|
||||
- Name: rtmdet_s_syncbn_fast_8xb32-300e_coco
|
||||
In Collection: RTMDet
|
||||
Config: configs/rtmdet/rtmdet_s_syncbn_fast_8xb32-300e_coco.py
|
||||
|
@ -41,6 +68,19 @@ Models:
|
|||
box AP: 44.6
|
||||
Weights: https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_s_syncbn_fast_8xb32-300e_coco/rtmdet_s_syncbn_fast_8xb32-300e_coco_20221230_182329-0a8c901a.pth
|
||||
|
||||
- Name: kd_s_rtmdet_m_neck_300e_coco
|
||||
In Collection: RTMDet
|
||||
Config: configs/rtmdet/distillation/kd_s_rtmdet_m_neck_300e_coco.py
|
||||
Metadata:
|
||||
Training Memory (GB): 16.3
|
||||
Epochs: 300
|
||||
Results:
|
||||
- Task: Object Detection
|
||||
Dataset: COCO
|
||||
Metrics:
|
||||
box AP: 45.7
|
||||
Weights: https://download.openmmlab.com/mmrazor/v1/rtmdet_distillation/kd_s_rtmdet_m_neck_300e_coco/kd_s_rtmdet_m_neck_300e_coco_20230220_140647-446ff003.pth
|
||||
|
||||
- Name: rtmdet_m_syncbn_fast_8xb32-300e_coco
|
||||
In Collection: RTMDet
|
||||
Config: configs/rtmdet/rtmdet_m_syncbn_fast_8xb32-300e_coco.py
|
||||
|
@ -54,6 +94,19 @@ Models:
|
|||
box AP: 49.3
|
||||
Weights: https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_m_syncbn_fast_8xb32-300e_coco/rtmdet_m_syncbn_fast_8xb32-300e_coco_20230102_135952-40af4fe8.pth
|
||||
|
||||
- Name: kd_m_rtmdet_l_neck_300e_coco
|
||||
In Collection: RTMDet
|
||||
Config: configs/rtmdet/distillation/kd_m_rtmdet_l_neck_300e_coco.py
|
||||
Metadata:
|
||||
Training Memory (GB): 29.0
|
||||
Epochs: 300
|
||||
Results:
|
||||
- Task: Object Detection
|
||||
Dataset: COCO
|
||||
Metrics:
|
||||
box AP: 50.2
|
||||
Weights: https://download.openmmlab.com/mmrazor/v1/rtmdet_distillation/kd_m_rtmdet_l_neck_300e_coco/kd_m_rtmdet_l_neck_300e_coco_20230220_141313-b806f503.pth
|
||||
|
||||
- Name: rtmdet_l_syncbn_fast_8xb32-300e_coco
|
||||
In Collection: RTMDet
|
||||
Config: configs/rtmdet/rtmdet_l_syncbn_fast_8xb32-300e_coco.py
|
||||
|
@ -67,6 +120,19 @@ Models:
|
|||
box AP: 51.4
|
||||
Weights: https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_l_syncbn_fast_8xb32-300e_coco/rtmdet_l_syncbn_fast_8xb32-300e_coco_20230102_135928-ee3abdc4.pth
|
||||
|
||||
- Name: kd_l_rtmdet_x_neck_300e_coco
|
||||
In Collection: RTMDet
|
||||
Config: configs/rtmdet/distillation/kd_l_rtmdet_x_neck_300e_coco.py
|
||||
Metadata:
|
||||
Training Memory (GB): 45.2
|
||||
Epochs: 300
|
||||
Results:
|
||||
- Task: Object Detection
|
||||
Dataset: COCO
|
||||
Metrics:
|
||||
box AP: 52.3
|
||||
Weights: https://download.openmmlab.com/mmrazor/v1/rtmdet_distillation/kd_l_rtmdet_x_neck_300e_coco/kd_l_rtmdet_x_neck_300e_coco_20230220_141912-c9979722.pth
|
||||
|
||||
- Name: rtmdet_x_syncbn_fast_8xb32-300e_coco
|
||||
In Collection: RTMDet
|
||||
Config: configs/rtmdet/rtmdet_x_syncbn_fast_8xb32-300e_coco.py
|
||||
|
@ -79,3 +145,71 @@ Models:
|
|||
Metrics:
|
||||
box AP: 52.8
|
||||
Weights: https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_x_syncbn_fast_8xb32-300e_coco/rtmdet_x_syncbn_fast_8xb32-300e_coco_20221231_100345-b85cd476.pth
|
||||
|
||||
- Name: rtmdet-r_tiny_fast_1xb8-36e_dota
|
||||
In Collection: Rotated_RTMDet
|
||||
Config: configs/rtmdet/rotated/rtmdet-r_tiny_fast_1xb8-36e_dota.py
|
||||
Metadata:
|
||||
Training Memory (GB): 12.7
|
||||
Epochs: 36
|
||||
Results:
|
||||
- Task: Oriented Object Detection
|
||||
Dataset: DOTAv1.0
|
||||
Metrics:
|
||||
mAP: 75.07
|
||||
Weights: https://download.openmmlab.com/mmyolo/v0/rtmdet/rotated/rtmdet-r_tiny_fast_1xb8-36e_dota/rtmdet-r_tiny_fast_1xb8-36e_dota_20230228_162210-e8ccfb1c.pth
|
||||
|
||||
- Name: rtmdet-r_s_fast_1xb8-36e_dota
|
||||
In Collection: Rotated_RTMDet
|
||||
Config: configs/rtmdet/rotated/rtmdet-r_s_fast_1xb8-36e_dota.py
|
||||
Metadata:
|
||||
Training Memory (GB): 16.6
|
||||
Epochs: 36
|
||||
Results:
|
||||
- Task: Oriented Object Detection
|
||||
Dataset: DOTAv1.0
|
||||
Metrics:
|
||||
mAP: 77.33
|
||||
Weights: https://download.openmmlab.com/mmyolo/v0/rtmdet/rotated/rtmdet-r_s_fast_1xb8-36e_dota/rtmdet-r_s_fast_1xb8-36e_dota_20230224_110307-3946a5aa.pth
|
||||
|
||||
- Name: rtmdet-r_m_syncbn_fast_2xb4-36e_dota
|
||||
In Collection: Rotated_RTMDet
|
||||
Config: configs/rtmdet/rotated/rtmdet-r_m_syncbn_fast_2xb4-36e_dota.py
|
||||
Metadata:
|
||||
Training Resources: 2x A100 GPUs
|
||||
Training Memory (GB): 10.9
|
||||
Epochs: 36
|
||||
Results:
|
||||
- Task: Oriented Object Detection
|
||||
Dataset: DOTAv1.0
|
||||
Metrics:
|
||||
mAP: 78.43
|
||||
Weights: https://download.openmmlab.com/mmyolo/v0/rtmdet/rotated/rtmdet-r_m_syncbn_fast_2xb4-36e_dota/rtmdet-r_m_syncbn_fast_2xb4-36e_dota_20230224_124237-29ae1619.pth
|
||||
|
||||
- Name: rtmdet-r_l_syncbn_fast_2xb4-36e_dota
|
||||
In Collection: Rotated_RTMDet
|
||||
Config: configs/rtmdet/rotated/rtmdet-r_l_syncbn_fast_2xb4-36e_dota.py
|
||||
Metadata:
|
||||
Training Resources: 2x A100 GPUs
|
||||
Training Memory (GB): 16.1
|
||||
Epochs: 36
|
||||
Results:
|
||||
- Task: Oriented Object Detection
|
||||
Dataset: DOTAv1.0
|
||||
Metrics:
|
||||
mAP: 78.66
|
||||
Weights: https://download.openmmlab.com/mmyolo/v0/rtmdet/rotated/rtmdet-r_l_syncbn_fast_2xb4-36e_dota/rtmdet-r_l_syncbn_fast_2xb4-36e_dota_20230224_124544-38bc5f08.pth
|
||||
|
||||
- Name: rtmdet-r_l_syncbn_fast_2xb4-aug-100e_dota
|
||||
In Collection: Rotated_RTMDet
|
||||
Config: configs/rtmdet/rotated/rtmdet-r_l_syncbn_fast_2xb4-aug-100e_dota.py
|
||||
Metadata:
|
||||
Training Resources: 2x A100 GPUs
|
||||
Training Memory (GB): 19.6
|
||||
Epochs: 100
|
||||
Results:
|
||||
- Task: Oriented Object Detection
|
||||
Dataset: DOTAv1.0
|
||||
Metrics:
|
||||
mAP: 80.14
|
||||
Weights: https://download.openmmlab.com/mmyolo/v0/rtmdet/rotated/rtmdet-r_l_syncbn_fast_2xb4-aug-100e_dota/rtmdet-r_l_syncbn_fast_2xb4-aug-100e_dota_20230224_124735-ed4ea966.pth
|
||||
|
|
|
@ -0,0 +1,30 @@
|
|||
_base_ = './rtmdet-r_l_syncbn_fast_2xb4-36e_dota.py'
|
||||
|
||||
# ========================modified parameters======================
|
||||
data_root = 'data/split_ms_dota/'
|
||||
# Path of test images folder
|
||||
test_data_prefix = 'test/images/'
|
||||
# Submission dir for result submit
|
||||
submission_dir = './work_dirs/{{fileBasenameNoExtension}}/submission'
|
||||
|
||||
# =======================Unmodified in most cases==================
|
||||
train_dataloader = dict(dataset=dict(data_root=data_root))
|
||||
|
||||
val_dataloader = dict(dataset=dict(data_root=data_root))
|
||||
|
||||
# Inference on val dataset
|
||||
test_dataloader = val_dataloader
|
||||
|
||||
# Inference on test dataset and format the output results
|
||||
# for submission. Note: the test set has no annotation.
|
||||
# test_dataloader = dict(
|
||||
# dataset=dict(
|
||||
# data_root=data_root,
|
||||
# ann_file='', # test set has no annotation
|
||||
# data_prefix=dict(img_path=test_data_prefix),
|
||||
# pipeline=_base_.test_pipeline))
|
||||
# test_evaluator = dict(
|
||||
# type='mmrotate.DOTAMetric',
|
||||
# format_only=True,
|
||||
# merge_patches=True,
|
||||
# outfile_prefix=submission_dir)
|
|
@ -0,0 +1,331 @@
|
|||
_base_ = '../../_base_/default_runtime.py'
|
||||
|
||||
checkpoint = 'https://download.openmmlab.com/mmdetection/v3.0/rtmdet/cspnext_rsb_pretrain/cspnext-l_8xb256-rsb-a1-600e_in1k-6a760974.pth' # noqa
|
||||
|
||||
# ========================Frequently modified parameters======================
|
||||
# -----data related-----
|
||||
data_root = 'data/split_ss_dota/'
|
||||
# Path of train annotation folder
|
||||
train_ann_file = 'trainval/annfiles/'
|
||||
train_data_prefix = 'trainval/images/' # Prefix of train image path
|
||||
# Path of val annotation folder
|
||||
val_ann_file = 'trainval/annfiles/'
|
||||
val_data_prefix = 'trainval/images/' # Prefix of val image path
|
||||
# Path of test images folder
|
||||
test_data_prefix = 'test/images/'
|
||||
|
||||
# Submission dir for result submit
|
||||
submission_dir = './work_dirs/{{fileBasenameNoExtension}}/submission'
|
||||
|
||||
num_classes = 15 # Number of classes for classification
|
||||
# Batch size of a single GPU during training
|
||||
train_batch_size_per_gpu = 4
|
||||
# Worker to pre-fetch data for each single GPU during training
|
||||
train_num_workers = 8
|
||||
# persistent_workers must be False if num_workers is 0.
|
||||
persistent_workers = True
|
||||
|
||||
# -----train val related-----
|
||||
# Base learning rate for optim_wrapper. Corresponding to 1xb8=8 bs
|
||||
base_lr = 0.00025 # 0.004 / 16
|
||||
max_epochs = 36 # Maximum training epochs
|
||||
|
||||
model_test_cfg = dict(
|
||||
# The config of multi-label for multi-class prediction.
|
||||
multi_label=True,
|
||||
# Decode rbox with angle, For RTMDet-R, Defaults to True.
|
||||
# When set to True, use rbox coder such as DistanceAnglePointCoder
|
||||
# When set to False, use hbox coder such as DistancePointBBoxCoder
|
||||
# different setting lead to different AP.
|
||||
decode_with_angle=True,
|
||||
# The number of boxes before NMS
|
||||
nms_pre=30000,
|
||||
score_thr=0.05, # Threshold to filter out boxes.
|
||||
nms=dict(type='nms_rotated', iou_threshold=0.1), # NMS type and threshold
|
||||
max_per_img=2000) # Max number of detections of each image
|
||||
|
||||
# ========================Possible modified parameters========================
|
||||
# -----data related-----
|
||||
img_scale = (1024, 1024) # width, height
|
||||
# ratio for random rotate
|
||||
random_rotate_ratio = 0.5
|
||||
# label ids for rect objs
|
||||
rotate_rect_obj_labels = [9, 11]
|
||||
# Dataset type, this will be used to define the dataset
|
||||
dataset_type = 'YOLOv5DOTADataset'
|
||||
# Batch size of a single GPU during validation
|
||||
val_batch_size_per_gpu = 8
|
||||
# Worker to pre-fetch data for each single GPU during validation
|
||||
val_num_workers = 8
|
||||
|
||||
# Config of batch shapes. Only on val. Not use in RTMDet-R
|
||||
batch_shapes_cfg = None
|
||||
|
||||
# -----model related-----
|
||||
# The scaling factor that controls the depth of the network structure
|
||||
deepen_factor = 1.0
|
||||
# The scaling factor that controls the width of the network structure
|
||||
widen_factor = 1.0
|
||||
# Strides of multi-scale prior box
|
||||
strides = [8, 16, 32]
|
||||
# The angle definition for model
|
||||
angle_version = 'le90' # le90, le135, oc are available options
|
||||
|
||||
norm_cfg = dict(type='BN') # Normalization config
|
||||
|
||||
# -----train val related-----
|
||||
lr_start_factor = 1.0e-5
|
||||
dsl_topk = 13 # Number of bbox selected in each level
|
||||
loss_cls_weight = 1.0
|
||||
loss_bbox_weight = 2.0
|
||||
qfl_beta = 2.0 # beta of QualityFocalLoss
|
||||
weight_decay = 0.05
|
||||
|
||||
# Save model checkpoint and validation intervals
|
||||
save_checkpoint_intervals = 1
|
||||
# The maximum checkpoints to keep.
|
||||
max_keep_ckpts = 3
|
||||
# single-scale training is recommended to
|
||||
# be turned on, which can speed up training.
|
||||
env_cfg = dict(cudnn_benchmark=True)
|
||||
|
||||
# ===============================Unmodified in most cases====================
|
||||
model = dict(
|
||||
type='YOLODetector',
|
||||
data_preprocessor=dict(
|
||||
type='YOLOv5DetDataPreprocessor',
|
||||
mean=[103.53, 116.28, 123.675],
|
||||
std=[57.375, 57.12, 58.395],
|
||||
bgr_to_rgb=False),
|
||||
backbone=dict(
|
||||
type='CSPNeXt',
|
||||
arch='P5',
|
||||
expand_ratio=0.5,
|
||||
deepen_factor=deepen_factor,
|
||||
widen_factor=widen_factor,
|
||||
channel_attention=True,
|
||||
norm_cfg=norm_cfg,
|
||||
act_cfg=dict(type='SiLU', inplace=True),
|
||||
init_cfg=dict(
|
||||
type='Pretrained', prefix='backbone.', checkpoint=checkpoint)),
|
||||
neck=dict(
|
||||
type='CSPNeXtPAFPN',
|
||||
deepen_factor=deepen_factor,
|
||||
widen_factor=widen_factor,
|
||||
in_channels=[256, 512, 1024],
|
||||
out_channels=256,
|
||||
num_csp_blocks=3,
|
||||
expand_ratio=0.5,
|
||||
norm_cfg=norm_cfg,
|
||||
act_cfg=dict(type='SiLU', inplace=True)),
|
||||
bbox_head=dict(
|
||||
type='RTMDetRotatedHead',
|
||||
head_module=dict(
|
||||
type='RTMDetRotatedSepBNHeadModule',
|
||||
num_classes=num_classes,
|
||||
widen_factor=widen_factor,
|
||||
in_channels=256,
|
||||
stacked_convs=2,
|
||||
feat_channels=256,
|
||||
norm_cfg=norm_cfg,
|
||||
act_cfg=dict(type='SiLU', inplace=True),
|
||||
share_conv=True,
|
||||
pred_kernel_size=1,
|
||||
featmap_strides=strides),
|
||||
prior_generator=dict(
|
||||
type='mmdet.MlvlPointGenerator', offset=0, strides=strides),
|
||||
bbox_coder=dict(
|
||||
type='DistanceAnglePointCoder', angle_version=angle_version),
|
||||
loss_cls=dict(
|
||||
type='mmdet.QualityFocalLoss',
|
||||
use_sigmoid=True,
|
||||
beta=qfl_beta,
|
||||
loss_weight=loss_cls_weight),
|
||||
loss_bbox=dict(
|
||||
type='mmrotate.RotatedIoULoss',
|
||||
mode='linear',
|
||||
loss_weight=loss_bbox_weight),
|
||||
angle_version=angle_version,
|
||||
# Used for angle encode and decode, similar to bbox coder
|
||||
angle_coder=dict(type='mmrotate.PseudoAngleCoder'),
|
||||
# If true, it will apply loss_bbox on horizontal box, and angle_loss
|
||||
# needs to be specified. In this case the loss_bbox should use
|
||||
# horizontal box loss e.g. IoULoss. Arg details can be seen in
|
||||
# `docs/zh_cn/tutorials/rotated_detection.md`
|
||||
use_hbbox_loss=False,
|
||||
loss_angle=None),
|
||||
train_cfg=dict(
|
||||
assigner=dict(
|
||||
type='BatchDynamicSoftLabelAssigner',
|
||||
num_classes=num_classes,
|
||||
topk=dsl_topk,
|
||||
iou_calculator=dict(type='mmrotate.RBboxOverlaps2D'),
|
||||
# RBboxOverlaps2D doesn't support batch input, use loop instead.
|
||||
batch_iou=False),
|
||||
allowed_border=-1,
|
||||
pos_weight=-1,
|
||||
debug=False),
|
||||
test_cfg=model_test_cfg,
|
||||
)
|
||||
|
||||
train_pipeline = [
|
||||
dict(type='LoadImageFromFile', file_client_args=_base_.file_client_args),
|
||||
dict(type='LoadAnnotations', with_bbox=True, box_type='qbox'),
|
||||
dict(
|
||||
type='mmrotate.ConvertBoxType',
|
||||
box_type_mapping=dict(gt_bboxes='rbox')),
|
||||
dict(type='mmdet.Resize', scale=img_scale, keep_ratio=True),
|
||||
dict(
|
||||
type='mmdet.RandomFlip',
|
||||
prob=0.75,
|
||||
direction=['horizontal', 'vertical', 'diagonal']),
|
||||
dict(
|
||||
type='mmrotate.RandomRotate',
|
||||
prob=random_rotate_ratio,
|
||||
angle_range=180,
|
||||
rotate_type='mmrotate.Rotate',
|
||||
rect_obj_labels=rotate_rect_obj_labels),
|
||||
dict(type='mmdet.Pad', size=img_scale, pad_val=dict(img=(114, 114, 114))),
|
||||
dict(type='RegularizeRotatedBox', angle_version=angle_version),
|
||||
dict(type='mmdet.PackDetInputs')
|
||||
]
|
||||
|
||||
val_pipeline = [
|
||||
dict(type='LoadImageFromFile', file_client_args=_base_.file_client_args),
|
||||
dict(type='mmdet.Resize', scale=img_scale, keep_ratio=True),
|
||||
dict(type='mmdet.Pad', size=img_scale, pad_val=dict(img=(114, 114, 114))),
|
||||
dict(
|
||||
type='LoadAnnotations',
|
||||
with_bbox=True,
|
||||
box_type='qbox',
|
||||
_scope_='mmdet'),
|
||||
dict(
|
||||
type='mmrotate.ConvertBoxType',
|
||||
box_type_mapping=dict(gt_bboxes='rbox')),
|
||||
dict(
|
||||
type='mmdet.PackDetInputs',
|
||||
meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
|
||||
'scale_factor'))
|
||||
]
|
||||
|
||||
test_pipeline = [
|
||||
dict(type='LoadImageFromFile', file_client_args=_base_.file_client_args),
|
||||
dict(type='mmdet.Resize', scale=img_scale, keep_ratio=True),
|
||||
dict(type='mmdet.Pad', size=img_scale, pad_val=dict(img=(114, 114, 114))),
|
||||
dict(
|
||||
type='mmdet.PackDetInputs',
|
||||
meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
|
||||
'scale_factor'))
|
||||
]
|
||||
|
||||
train_dataloader = dict(
|
||||
batch_size=train_batch_size_per_gpu,
|
||||
num_workers=train_num_workers,
|
||||
persistent_workers=persistent_workers,
|
||||
pin_memory=True,
|
||||
collate_fn=dict(type='yolov5_collate'),
|
||||
sampler=dict(type='DefaultSampler', shuffle=True),
|
||||
dataset=dict(
|
||||
type=dataset_type,
|
||||
data_root=data_root,
|
||||
ann_file=train_ann_file,
|
||||
data_prefix=dict(img_path=train_data_prefix),
|
||||
filter_cfg=dict(filter_empty_gt=True),
|
||||
pipeline=train_pipeline))
|
||||
|
||||
val_dataloader = dict(
|
||||
batch_size=val_batch_size_per_gpu,
|
||||
num_workers=val_num_workers,
|
||||
persistent_workers=persistent_workers,
|
||||
pin_memory=True,
|
||||
drop_last=False,
|
||||
sampler=dict(type='DefaultSampler', shuffle=False),
|
||||
dataset=dict(
|
||||
type=dataset_type,
|
||||
data_root=data_root,
|
||||
ann_file=val_ann_file,
|
||||
data_prefix=dict(img_path=val_data_prefix),
|
||||
test_mode=True,
|
||||
batch_shapes_cfg=batch_shapes_cfg,
|
||||
pipeline=val_pipeline))
|
||||
|
||||
val_evaluator = dict(type='mmrotate.DOTAMetric', metric='mAP')
|
||||
|
||||
# Inference on val dataset
|
||||
test_dataloader = val_dataloader
|
||||
test_evaluator = val_evaluator
|
||||
|
||||
# Inference on test dataset and format the output results
|
||||
# for submission. Note: the test set has no annotation.
|
||||
# test_dataloader = dict(
|
||||
# batch_size=val_batch_size_per_gpu,
|
||||
# num_workers=val_num_workers,
|
||||
# persistent_workers=True,
|
||||
# drop_last=False,
|
||||
# sampler=dict(type='DefaultSampler', shuffle=False),
|
||||
# dataset=dict(
|
||||
# type=dataset_type,
|
||||
# data_root=data_root,
|
||||
# data_prefix=dict(img_path=test_data_prefix),
|
||||
# test_mode=True,
|
||||
# batch_shapes_cfg=batch_shapes_cfg,
|
||||
# pipeline=test_pipeline))
|
||||
# test_evaluator = dict(
|
||||
# type='mmrotate.DOTAMetric',
|
||||
# format_only=True,
|
||||
# merge_patches=True,
|
||||
# outfile_prefix=submission_dir)
|
||||
|
||||
# optimizer
|
||||
optim_wrapper = dict(
|
||||
type='OptimWrapper',
|
||||
optimizer=dict(type='AdamW', lr=base_lr, weight_decay=weight_decay),
|
||||
paramwise_cfg=dict(
|
||||
norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True))
|
||||
|
||||
# learning rate
|
||||
param_scheduler = [
|
||||
dict(
|
||||
type='LinearLR',
|
||||
start_factor=lr_start_factor,
|
||||
by_epoch=False,
|
||||
begin=0,
|
||||
end=1000),
|
||||
dict(
|
||||
# use cosine lr from 150 to 300 epoch
|
||||
type='CosineAnnealingLR',
|
||||
eta_min=base_lr * 0.05,
|
||||
begin=max_epochs // 2,
|
||||
end=max_epochs,
|
||||
T_max=max_epochs // 2,
|
||||
by_epoch=True,
|
||||
convert_to_iter_based=True),
|
||||
]
|
||||
|
||||
# hooks
|
||||
default_hooks = dict(
|
||||
checkpoint=dict(
|
||||
type='CheckpointHook',
|
||||
interval=save_checkpoint_intervals,
|
||||
max_keep_ckpts=max_keep_ckpts, # only keep latest 3 checkpoints
|
||||
save_best='auto'))
|
||||
|
||||
custom_hooks = [
|
||||
dict(
|
||||
type='EMAHook',
|
||||
ema_type='ExpMomentumEMA',
|
||||
momentum=0.0002,
|
||||
update_buffers=True,
|
||||
strict_load=False,
|
||||
priority=49)
|
||||
]
|
||||
|
||||
train_cfg = dict(
|
||||
type='EpochBasedTrainLoop',
|
||||
max_epochs=max_epochs,
|
||||
val_interval=save_checkpoint_intervals)
|
||||
|
||||
val_cfg = dict(type='ValLoop')
|
||||
test_cfg = dict(type='TestLoop')
|
||||
|
||||
visualizer = dict(type='mmrotate.RotLocalVisualizer')
|
|
@ -0,0 +1,168 @@
|
|||
_base_ = './rtmdet-r_l_syncbn_fast_2xb4-36e_dota.py'
|
||||
|
||||
# This config use longer schedule with Mixup, Mosaic and Random Rotate.
|
||||
|
||||
checkpoint = 'https://download.openmmlab.com/mmdetection/v3.0/rtmdet/cspnext_rsb_pretrain/cspnext-l_8xb256-rsb-a1-600e_in1k-6a760974.pth' # noqa
|
||||
|
||||
# ========================modified parameters======================
|
||||
|
||||
# Base learning rate for optim_wrapper. Corresponding to 1xb8=8 bs
|
||||
base_lr = 0.00025 # 0.004 / 16
|
||||
lr_start_factor = 1.0e-5
|
||||
max_epochs = 100 # Maximum training epochs
|
||||
# Change train_pipeline for final 10 epochs (stage 2)
|
||||
num_epochs_stage2 = 10
|
||||
|
||||
img_scale = (1024, 1024) # width, height
|
||||
# ratio range for random resize
|
||||
random_resize_ratio_range = (0.1, 2.0)
|
||||
# Cached images number in mosaic
|
||||
mosaic_max_cached_images = 40
|
||||
# Number of cached images in mixup
|
||||
mixup_max_cached_images = 20
|
||||
# ratio for random rotate
|
||||
random_rotate_ratio = 0.5
|
||||
# label ids for rect objs
|
||||
rotate_rect_obj_labels = [9, 11]
|
||||
|
||||
# Save model checkpoint and validation intervals
|
||||
save_checkpoint_intervals = 1
|
||||
# validation intervals in stage 2
|
||||
val_interval_stage2 = 1
|
||||
# The maximum checkpoints to keep.
|
||||
max_keep_ckpts = 3
|
||||
|
||||
# Submission dir for result submit
|
||||
submission_dir = './work_dirs/{{fileBasenameNoExtension}}/submission'
|
||||
|
||||
# =======================Unmodified in most cases==================
|
||||
|
||||
train_pipeline = [
|
||||
dict(type='LoadImageFromFile', file_client_args=_base_.file_client_args),
|
||||
dict(type='LoadAnnotations', with_bbox=True, box_type='qbox'),
|
||||
dict(
|
||||
type='mmrotate.ConvertBoxType',
|
||||
box_type_mapping=dict(gt_bboxes='rbox')),
|
||||
dict(
|
||||
type='Mosaic',
|
||||
img_scale=img_scale,
|
||||
use_cached=True,
|
||||
max_cached_images=mosaic_max_cached_images,
|
||||
pad_val=114.0),
|
||||
dict(
|
||||
type='mmdet.RandomResize',
|
||||
# img_scale is (width, height)
|
||||
scale=(img_scale[0] * 2, img_scale[1] * 2),
|
||||
ratio_range=random_resize_ratio_range,
|
||||
resize_type='mmdet.Resize',
|
||||
keep_ratio=True),
|
||||
dict(
|
||||
type='mmrotate.RandomRotate',
|
||||
prob=random_rotate_ratio,
|
||||
angle_range=180,
|
||||
rotate_type='mmrotate.Rotate',
|
||||
rect_obj_labels=rotate_rect_obj_labels),
|
||||
dict(type='mmdet.RandomCrop', crop_size=img_scale),
|
||||
dict(type='mmdet.YOLOXHSVRandomAug'),
|
||||
dict(
|
||||
type='mmdet.RandomFlip',
|
||||
prob=0.75,
|
||||
direction=['horizontal', 'vertical', 'diagonal']),
|
||||
dict(type='mmdet.Pad', size=img_scale, pad_val=dict(img=(114, 114, 114))),
|
||||
dict(
|
||||
type='YOLOv5MixUp',
|
||||
use_cached=True,
|
||||
max_cached_images=mixup_max_cached_images),
|
||||
dict(type='mmdet.PackDetInputs')
|
||||
]
|
||||
|
||||
train_pipeline_stage2 = [
|
||||
dict(type='LoadImageFromFile', file_client_args=_base_.file_client_args),
|
||||
dict(type='LoadAnnotations', with_bbox=True, box_type='qbox'),
|
||||
dict(
|
||||
type='mmrotate.ConvertBoxType',
|
||||
box_type_mapping=dict(gt_bboxes='rbox')),
|
||||
dict(
|
||||
type='mmdet.RandomResize',
|
||||
scale=img_scale,
|
||||
ratio_range=random_resize_ratio_range,
|
||||
resize_type='mmdet.Resize',
|
||||
keep_ratio=True),
|
||||
dict(
|
||||
type='mmrotate.RandomRotate',
|
||||
prob=random_rotate_ratio,
|
||||
angle_range=180,
|
||||
rotate_type='mmrotate.Rotate',
|
||||
rect_obj_labels=rotate_rect_obj_labels),
|
||||
dict(type='mmdet.RandomCrop', crop_size=img_scale),
|
||||
dict(type='mmdet.YOLOXHSVRandomAug'),
|
||||
dict(
|
||||
type='mmdet.RandomFlip',
|
||||
prob=0.75,
|
||||
direction=['horizontal', 'vertical', 'diagonal']),
|
||||
dict(type='mmdet.Pad', size=img_scale, pad_val=dict(img=(114, 114, 114))),
|
||||
dict(type='mmdet.PackDetInputs')
|
||||
]
|
||||
|
||||
train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
|
||||
|
||||
# learning rate
|
||||
param_scheduler = [
|
||||
dict(
|
||||
type='LinearLR',
|
||||
start_factor=lr_start_factor,
|
||||
by_epoch=False,
|
||||
begin=0,
|
||||
end=1000),
|
||||
dict(
|
||||
# use cosine lr from 150 to 300 epoch
|
||||
type='CosineAnnealingLR',
|
||||
eta_min=base_lr * 0.05,
|
||||
begin=max_epochs // 2,
|
||||
end=max_epochs,
|
||||
T_max=max_epochs // 2,
|
||||
by_epoch=True,
|
||||
convert_to_iter_based=True),
|
||||
]
|
||||
|
||||
# hooks
|
||||
default_hooks = dict(
|
||||
checkpoint=dict(
|
||||
type='CheckpointHook',
|
||||
interval=save_checkpoint_intervals,
|
||||
max_keep_ckpts=max_keep_ckpts, # only keep latest 3 checkpoints
|
||||
save_best='auto'))
|
||||
|
||||
custom_hooks = [
|
||||
dict(
|
||||
type='EMAHook',
|
||||
ema_type='ExpMomentumEMA',
|
||||
momentum=0.0002,
|
||||
update_buffers=True,
|
||||
strict_load=False,
|
||||
priority=49),
|
||||
dict(
|
||||
type='mmdet.PipelineSwitchHook',
|
||||
switch_epoch=max_epochs - num_epochs_stage2,
|
||||
switch_pipeline=train_pipeline_stage2)
|
||||
]
|
||||
|
||||
train_cfg = dict(
|
||||
type='EpochBasedTrainLoop',
|
||||
max_epochs=max_epochs,
|
||||
val_interval=save_checkpoint_intervals,
|
||||
dynamic_intervals=[(max_epochs - num_epochs_stage2, val_interval_stage2)])
|
||||
|
||||
# Inference on test dataset and format the output results
|
||||
# for submission. Note: the test set has no annotation.
|
||||
# test_dataloader = dict(
|
||||
# dataset=dict(
|
||||
# data_root=_base_.data_root,
|
||||
# ann_file='', # test set has no annotation
|
||||
# data_prefix=dict(img_path=_base_.test_data_prefix),
|
||||
# pipeline=_base_.test_pipeline))
|
||||
# test_evaluator = dict(
|
||||
# type='mmrotate.DOTAMetric',
|
||||
# format_only=True,
|
||||
# merge_patches=True,
|
||||
# outfile_prefix=submission_dir)
|
|
@ -0,0 +1,20 @@
|
|||
_base_ = './rtmdet-r_l_syncbn_fast_2xb4-36e_dota-ms.py'
|
||||
|
||||
load_from = 'https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_l_syncbn_fast_8xb32-300e_coco/rtmdet_l_syncbn_fast_8xb32-300e_coco_20230102_135928-ee3abdc4.pth' # noqa
|
||||
|
||||
# Submission dir for result submit
|
||||
submission_dir = './work_dirs/{{fileBasenameNoExtension}}/submission'
|
||||
|
||||
# Inference on test dataset and format the output results
|
||||
# for submission. Note: the test set has no annotation.
|
||||
# test_dataloader = dict(
|
||||
# dataset=dict(
|
||||
# data_root=_base_.data_root,
|
||||
# ann_file='', # test set has no annotation
|
||||
# data_prefix=dict(img_path=_base_.test_data_prefix),
|
||||
# pipeline=_base_.test_pipeline))
|
||||
# test_evaluator = dict(
|
||||
# type='mmrotate.DOTAMetric',
|
||||
# format_only=True,
|
||||
# merge_patches=True,
|
||||
# outfile_prefix=submission_dir)
|
|
@ -0,0 +1,33 @@
|
|||
_base_ = './rtmdet-r_l_syncbn_fast_2xb4-36e_dota-ms.py'
|
||||
|
||||
checkpoint = 'https://download.openmmlab.com/mmdetection/v3.0/rtmdet/cspnext_rsb_pretrain/cspnext-m_8xb256-rsb-a1-600e_in1k-ecb3bbd9.pth' # noqa
|
||||
|
||||
# ========================modified parameters======================
|
||||
deepen_factor = 0.67
|
||||
widen_factor = 0.75
|
||||
|
||||
# Submission dir for result submit
|
||||
submission_dir = './work_dirs/{{fileBasenameNoExtension}}/submission'
|
||||
|
||||
# =======================Unmodified in most cases==================
|
||||
model = dict(
|
||||
backbone=dict(
|
||||
deepen_factor=deepen_factor,
|
||||
widen_factor=widen_factor,
|
||||
init_cfg=dict(checkpoint=checkpoint)),
|
||||
neck=dict(deepen_factor=deepen_factor, widen_factor=widen_factor),
|
||||
bbox_head=dict(head_module=dict(widen_factor=widen_factor)))
|
||||
|
||||
# Inference on test dataset and format the output results
|
||||
# for submission. Note: the test set has no annotation.
|
||||
# test_dataloader = dict(
|
||||
# dataset=dict(
|
||||
# data_root=_base_.data_root,
|
||||
# ann_file='', # test set has no annotation
|
||||
# data_prefix=dict(img_path=_base_.test_data_prefix),
|
||||
# pipeline=_base_.test_pipeline))
|
||||
# test_evaluator = dict(
|
||||
# type='mmrotate.DOTAMetric',
|
||||
# format_only=True,
|
||||
# merge_patches=True,
|
||||
# outfile_prefix=submission_dir)
|
|
@ -0,0 +1,33 @@
|
|||
_base_ = './rtmdet-r_l_syncbn_fast_2xb4-36e_dota.py'
|
||||
|
||||
checkpoint = 'https://download.openmmlab.com/mmdetection/v3.0/rtmdet/cspnext_rsb_pretrain/cspnext-m_8xb256-rsb-a1-600e_in1k-ecb3bbd9.pth' # noqa
|
||||
|
||||
# ========================modified parameters======================
|
||||
deepen_factor = 0.67
|
||||
widen_factor = 0.75
|
||||
|
||||
# Submission dir for result submit
|
||||
submission_dir = './work_dirs/{{fileBasenameNoExtension}}/submission'
|
||||
|
||||
# =======================Unmodified in most cases==================
|
||||
model = dict(
|
||||
backbone=dict(
|
||||
deepen_factor=deepen_factor,
|
||||
widen_factor=widen_factor,
|
||||
init_cfg=dict(checkpoint=checkpoint)),
|
||||
neck=dict(deepen_factor=deepen_factor, widen_factor=widen_factor),
|
||||
bbox_head=dict(head_module=dict(widen_factor=widen_factor)))
|
||||
|
||||
# Inference on test dataset and format the output results
|
||||
# for submission. Note: the test set has no annotation.
|
||||
# test_dataloader = dict(
|
||||
# dataset=dict(
|
||||
# data_root=_base_.data_root,
|
||||
# ann_file='', # test set has no annotation
|
||||
# data_prefix=dict(img_path=_base_.test_data_prefix),
|
||||
# pipeline=_base_.test_pipeline))
|
||||
# test_evaluator = dict(
|
||||
# type='mmrotate.DOTAMetric',
|
||||
# format_only=True,
|
||||
# merge_patches=True,
|
||||
# outfile_prefix=submission_dir)
|
|
@ -0,0 +1,38 @@
|
|||
_base_ = './rtmdet-r_l_syncbn_fast_2xb4-36e_dota-ms.py'
|
||||
|
||||
checkpoint = 'https://download.openmmlab.com/mmdetection/v3.0/rtmdet/cspnext_rsb_pretrain/cspnext-s_imagenet_600e.pth' # noqa
|
||||
|
||||
# ========================modified parameters======================
|
||||
deepen_factor = 0.33
|
||||
widen_factor = 0.5
|
||||
|
||||
# Batch size of a single GPU during training
|
||||
train_batch_size_per_gpu = 8
|
||||
|
||||
# Submission dir for result submit
|
||||
submission_dir = './work_dirs/{{fileBasenameNoExtension}}/submission'
|
||||
|
||||
# =======================Unmodified in most cases==================
|
||||
model = dict(
|
||||
backbone=dict(
|
||||
deepen_factor=deepen_factor,
|
||||
widen_factor=widen_factor,
|
||||
init_cfg=dict(checkpoint=checkpoint)),
|
||||
neck=dict(deepen_factor=deepen_factor, widen_factor=widen_factor),
|
||||
bbox_head=dict(head_module=dict(widen_factor=widen_factor)))
|
||||
|
||||
train_dataloader = dict(batch_size=train_batch_size_per_gpu)
|
||||
|
||||
# Inference on test dataset and format the output results
|
||||
# for submission. Note: the test set has no annotation.
|
||||
# test_dataloader = dict(
|
||||
# dataset=dict(
|
||||
# data_root=_base_.data_root,
|
||||
# ann_file='', # test set has no annotation
|
||||
# data_prefix=dict(img_path=_base_.test_data_prefix),
|
||||
# pipeline=_base_.test_pipeline))
|
||||
# test_evaluator = dict(
|
||||
# type='mmrotate.DOTAMetric',
|
||||
# format_only=True,
|
||||
# merge_patches=True,
|
||||
# outfile_prefix=submission_dir)
|
|
@ -0,0 +1,38 @@
|
|||
_base_ = './rtmdet-r_l_syncbn_fast_2xb4-36e_dota.py'
|
||||
|
||||
checkpoint = 'https://download.openmmlab.com/mmdetection/v3.0/rtmdet/cspnext_rsb_pretrain/cspnext-s_imagenet_600e.pth' # noqa
|
||||
|
||||
# ========================modified parameters======================
|
||||
deepen_factor = 0.33
|
||||
widen_factor = 0.5
|
||||
|
||||
# Batch size of a single GPU during training
|
||||
train_batch_size_per_gpu = 8
|
||||
|
||||
# Submission dir for result submit
|
||||
submission_dir = './work_dirs/{{fileBasenameNoExtension}}/submission'
|
||||
|
||||
# =======================Unmodified in most cases==================
|
||||
model = dict(
|
||||
backbone=dict(
|
||||
deepen_factor=deepen_factor,
|
||||
widen_factor=widen_factor,
|
||||
init_cfg=dict(checkpoint=checkpoint)),
|
||||
neck=dict(deepen_factor=deepen_factor, widen_factor=widen_factor),
|
||||
bbox_head=dict(head_module=dict(widen_factor=widen_factor)))
|
||||
|
||||
train_dataloader = dict(batch_size=train_batch_size_per_gpu)
|
||||
|
||||
# Inference on test dataset and format the output results
|
||||
# for submission. Note: the test set has no annotation.
|
||||
# test_dataloader = dict(
|
||||
# dataset=dict(
|
||||
# data_root=_base_.data_root,
|
||||
# ann_file='', # test set has no annotation
|
||||
# data_prefix=dict(img_path=_base_.test_data_prefix),
|
||||
# pipeline=_base_.test_pipeline))
|
||||
# test_evaluator = dict(
|
||||
# type='mmrotate.DOTAMetric',
|
||||
# format_only=True,
|
||||
# merge_patches=True,
|
||||
# outfile_prefix=submission_dir)
|
|
@ -0,0 +1,38 @@
|
|||
_base_ = './rtmdet-r_l_syncbn_fast_2xb4-36e_dota-ms.py'
|
||||
|
||||
checkpoint = 'https://download.openmmlab.com/mmdetection/v3.0/rtmdet/cspnext_rsb_pretrain/cspnext-tiny_imagenet_600e.pth' # noqa
|
||||
|
||||
# ========================modified parameters======================
|
||||
deepen_factor = 0.167
|
||||
widen_factor = 0.375
|
||||
|
||||
# Batch size of a single GPU during training
|
||||
train_batch_size_per_gpu = 8
|
||||
|
||||
# Submission dir for result submit
|
||||
submission_dir = './work_dirs/{{fileBasenameNoExtension}}/submission'
|
||||
|
||||
# =======================Unmodified in most cases==================
|
||||
model = dict(
|
||||
backbone=dict(
|
||||
deepen_factor=deepen_factor,
|
||||
widen_factor=widen_factor,
|
||||
init_cfg=dict(checkpoint=checkpoint)),
|
||||
neck=dict(deepen_factor=deepen_factor, widen_factor=widen_factor),
|
||||
bbox_head=dict(head_module=dict(widen_factor=widen_factor)))
|
||||
|
||||
train_dataloader = dict(batch_size=train_batch_size_per_gpu)
|
||||
|
||||
# Inference on test dataset and format the output results
|
||||
# for submission. Note: the test set has no annotation.
|
||||
# test_dataloader = dict(
|
||||
# dataset=dict(
|
||||
# data_root=_base_.data_root,
|
||||
# ann_file='', # test set has no annotation
|
||||
# data_prefix=dict(img_path=_base_.test_data_prefix),
|
||||
# pipeline=_base_.test_pipeline))
|
||||
# test_evaluator = dict(
|
||||
# type='mmrotate.DOTAMetric',
|
||||
# format_only=True,
|
||||
# merge_patches=True,
|
||||
# outfile_prefix=submission_dir)
|
|
@ -0,0 +1,38 @@
|
|||
_base_ = './rtmdet-r_l_syncbn_fast_2xb4-36e_dota.py'
|
||||
|
||||
checkpoint = 'https://download.openmmlab.com/mmdetection/v3.0/rtmdet/cspnext_rsb_pretrain/cspnext-tiny_imagenet_600e.pth' # noqa
|
||||
|
||||
# ========================modified parameters======================
|
||||
deepen_factor = 0.167
|
||||
widen_factor = 0.375
|
||||
|
||||
# Batch size of a single GPU during training
|
||||
train_batch_size_per_gpu = 8
|
||||
|
||||
# Submission dir for result submit
|
||||
submission_dir = './work_dirs/{{fileBasenameNoExtension}}/submission'
|
||||
|
||||
# =======================Unmodified in most cases==================
|
||||
model = dict(
|
||||
backbone=dict(
|
||||
deepen_factor=deepen_factor,
|
||||
widen_factor=widen_factor,
|
||||
init_cfg=dict(checkpoint=checkpoint)),
|
||||
neck=dict(deepen_factor=deepen_factor, widen_factor=widen_factor),
|
||||
bbox_head=dict(head_module=dict(widen_factor=widen_factor)))
|
||||
|
||||
train_dataloader = dict(batch_size=train_batch_size_per_gpu)
|
||||
|
||||
# Inference on test dataset and format the output results
|
||||
# for submission. Note: the test set has no annotation.
|
||||
# test_dataloader = dict(
|
||||
# dataset=dict(
|
||||
# data_root=_base_.data_root,
|
||||
# ann_file='', # test set has no annotation
|
||||
# data_prefix=dict(img_path=_base_.test_data_prefix),
|
||||
# pipeline=_base_.test_pipeline))
|
||||
# test_evaluator = dict(
|
||||
# type='mmrotate.DOTAMetric',
|
||||
# format_only=True,
|
||||
# merge_patches=True,
|
||||
# outfile_prefix=submission_dir)
|
|
@ -0,0 +1,31 @@
|
|||
_base_ = './rtmdet_s_syncbn_fast_8xb32-300e_coco.py'
|
||||
|
||||
widen_factor = 0.5
|
||||
|
||||
model = dict(
|
||||
bbox_head=dict(
|
||||
type='RTMDetInsSepBNHead',
|
||||
head_module=dict(
|
||||
type='RTMDetInsSepBNHeadModule',
|
||||
use_sigmoid_cls=True,
|
||||
widen_factor=widen_factor),
|
||||
loss_mask=dict(
|
||||
type='mmdet.DiceLoss', loss_weight=2.0, eps=5e-6,
|
||||
reduction='mean')),
|
||||
test_cfg=dict(
|
||||
multi_label=True,
|
||||
nms_pre=1000,
|
||||
min_bbox_size=0,
|
||||
score_thr=0.05,
|
||||
nms=dict(type='nms', iou_threshold=0.6),
|
||||
max_per_img=100,
|
||||
mask_thr_binary=0.5))
|
||||
|
||||
_base_.test_pipeline[-2] = dict(
|
||||
type='LoadAnnotations', with_bbox=True, with_mask=True, _scope_='mmdet')
|
||||
|
||||
val_dataloader = dict(dataset=dict(pipeline=_base_.test_pipeline))
|
||||
test_dataloader = val_dataloader
|
||||
|
||||
val_evaluator = dict(metric=['bbox', 'segm'])
|
||||
test_evaluator = val_evaluator
|
|
@ -1,30 +1,56 @@
|
|||
_base_ = '../_base_/default_runtime.py'
|
||||
_base_ = ['../_base_/default_runtime.py', '../_base_/det_p5_tta.py']
|
||||
|
||||
# ========================Frequently modified parameters======================
|
||||
# -----data related-----
|
||||
data_root = 'data/coco/'
|
||||
dataset_type = 'YOLOv5CocoDataset'
|
||||
|
||||
img_scale = (640, 640) # width, height
|
||||
deepen_factor = 1.0
|
||||
widen_factor = 1.0
|
||||
max_epochs = 300
|
||||
stage2_num_epochs = 20
|
||||
interval = 10
|
||||
num_classes = 80
|
||||
# Path of train annotation file
|
||||
train_ann_file = 'annotations/instances_train2017.json'
|
||||
train_data_prefix = 'train2017/' # Prefix of train image path
|
||||
# Path of val annotation file
|
||||
val_ann_file = 'annotations/instances_val2017.json'
|
||||
val_data_prefix = 'val2017/' # Prefix of val image path
|
||||
|
||||
num_classes = 80 # Number of classes for classification
|
||||
# Batch size of a single GPU during training
|
||||
train_batch_size_per_gpu = 32
|
||||
# Worker to pre-fetch data for each single GPU during training
|
||||
train_num_workers = 10
|
||||
val_batch_size_per_gpu = 32
|
||||
val_num_workers = 10
|
||||
# persistent_workers must be False if num_workers is 0.
|
||||
persistent_workers = True
|
||||
strides = [8, 16, 32]
|
||||
|
||||
# -----train val related-----
|
||||
# Base learning rate for optim_wrapper. Corresponding to 8xb16=64 bs
|
||||
base_lr = 0.004
|
||||
max_epochs = 300 # Maximum training epochs
|
||||
# Change train_pipeline for final 20 epochs (stage 2)
|
||||
num_epochs_stage2 = 20
|
||||
|
||||
# single-scale training is recommended to
|
||||
# be turned on, which can speed up training.
|
||||
env_cfg = dict(cudnn_benchmark=True)
|
||||
model_test_cfg = dict(
|
||||
# The config of multi-label for multi-class prediction.
|
||||
multi_label=True,
|
||||
# The number of boxes before NMS
|
||||
nms_pre=30000,
|
||||
score_thr=0.001, # Threshold to filter out boxes.
|
||||
nms=dict(type='nms', iou_threshold=0.65), # NMS type and threshold
|
||||
max_per_img=300) # Max number of detections of each image
|
||||
|
||||
# only on Val
|
||||
# ========================Possible modified parameters========================
|
||||
# -----data related-----
|
||||
img_scale = (640, 640) # width, height
|
||||
# ratio range for random resize
|
||||
random_resize_ratio_range = (0.1, 2.0)
|
||||
# Cached images number in mosaic
|
||||
mosaic_max_cached_images = 40
|
||||
# Number of cached images in mixup
|
||||
mixup_max_cached_images = 20
|
||||
# Dataset type, this will be used to define the dataset
|
||||
dataset_type = 'YOLOv5CocoDataset'
|
||||
# Batch size of a single GPU during validation
|
||||
val_batch_size_per_gpu = 32
|
||||
# Worker to pre-fetch data for each single GPU during validation
|
||||
val_num_workers = 10
|
||||
|
||||
# Config of batch shapes. Only on val.
|
||||
batch_shapes_cfg = dict(
|
||||
type='BatchShapePolicy',
|
||||
batch_size=val_batch_size_per_gpu,
|
||||
|
@ -32,6 +58,35 @@ batch_shapes_cfg = dict(
|
|||
size_divisor=32,
|
||||
extra_pad_ratio=0.5)
|
||||
|
||||
# -----model related-----
|
||||
# The scaling factor that controls the depth of the network structure
|
||||
deepen_factor = 1.0
|
||||
# The scaling factor that controls the width of the network structure
|
||||
widen_factor = 1.0
|
||||
# Strides of multi-scale prior box
|
||||
strides = [8, 16, 32]
|
||||
|
||||
norm_cfg = dict(type='BN') # Normalization config
|
||||
|
||||
# -----train val related-----
|
||||
lr_start_factor = 1.0e-5
|
||||
dsl_topk = 13 # Number of bbox selected in each level
|
||||
loss_cls_weight = 1.0
|
||||
loss_bbox_weight = 2.0
|
||||
qfl_beta = 2.0 # beta of QualityFocalLoss
|
||||
weight_decay = 0.05
|
||||
|
||||
# Save model checkpoint and validation intervals
|
||||
save_checkpoint_intervals = 10
|
||||
# validation intervals in stage 2
|
||||
val_interval_stage2 = 1
|
||||
# The maximum checkpoints to keep.
|
||||
max_keep_ckpts = 3
|
||||
# single-scale training is recommended to
|
||||
# be turned on, which can speed up training.
|
||||
env_cfg = dict(cudnn_benchmark=True)
|
||||
|
||||
# ===============================Unmodified in most cases====================
|
||||
model = dict(
|
||||
type='YOLODetector',
|
||||
data_preprocessor=dict(
|
||||
|
@ -46,7 +101,7 @@ model = dict(
|
|||
deepen_factor=deepen_factor,
|
||||
widen_factor=widen_factor,
|
||||
channel_attention=True,
|
||||
norm_cfg=dict(type='BN'),
|
||||
norm_cfg=norm_cfg,
|
||||
act_cfg=dict(type='SiLU', inplace=True)),
|
||||
neck=dict(
|
||||
type='CSPNeXtPAFPN',
|
||||
|
@ -56,7 +111,7 @@ model = dict(
|
|||
out_channels=256,
|
||||
num_csp_blocks=3,
|
||||
expand_ratio=0.5,
|
||||
norm_cfg=dict(type='BN'),
|
||||
norm_cfg=norm_cfg,
|
||||
act_cfg=dict(type='SiLU', inplace=True)),
|
||||
bbox_head=dict(
|
||||
type='RTMDetHead',
|
||||
|
@ -66,7 +121,7 @@ model = dict(
|
|||
in_channels=256,
|
||||
stacked_convs=2,
|
||||
feat_channels=256,
|
||||
norm_cfg=dict(type='BN'),
|
||||
norm_cfg=norm_cfg,
|
||||
act_cfg=dict(type='SiLU', inplace=True),
|
||||
share_conv=True,
|
||||
pred_kernel_size=1,
|
||||
|
@ -77,24 +132,19 @@ model = dict(
|
|||
loss_cls=dict(
|
||||
type='mmdet.QualityFocalLoss',
|
||||
use_sigmoid=True,
|
||||
beta=2.0,
|
||||
loss_weight=1.0),
|
||||
loss_bbox=dict(type='mmdet.GIoULoss', loss_weight=2.0)),
|
||||
beta=qfl_beta,
|
||||
loss_weight=loss_cls_weight),
|
||||
loss_bbox=dict(type='mmdet.GIoULoss', loss_weight=loss_bbox_weight)),
|
||||
train_cfg=dict(
|
||||
assigner=dict(
|
||||
type='BatchDynamicSoftLabelAssigner',
|
||||
num_classes=num_classes,
|
||||
topk=13,
|
||||
topk=dsl_topk,
|
||||
iou_calculator=dict(type='mmdet.BboxOverlaps2D')),
|
||||
allowed_border=-1,
|
||||
pos_weight=-1,
|
||||
debug=False),
|
||||
test_cfg=dict(
|
||||
multi_label=True,
|
||||
nms_pre=30000,
|
||||
score_thr=0.001,
|
||||
nms=dict(type='nms', iou_threshold=0.65),
|
||||
max_per_img=300),
|
||||
test_cfg=model_test_cfg,
|
||||
)
|
||||
|
||||
train_pipeline = [
|
||||
|
@ -104,20 +154,23 @@ train_pipeline = [
|
|||
type='Mosaic',
|
||||
img_scale=img_scale,
|
||||
use_cached=True,
|
||||
max_cached_images=40,
|
||||
max_cached_images=mosaic_max_cached_images,
|
||||
pad_val=114.0),
|
||||
dict(
|
||||
type='mmdet.RandomResize',
|
||||
# img_scale is (width, height)
|
||||
scale=(img_scale[0] * 2, img_scale[1] * 2),
|
||||
ratio_range=(0.1, 2.0),
|
||||
ratio_range=random_resize_ratio_range,
|
||||
resize_type='mmdet.Resize',
|
||||
keep_ratio=True),
|
||||
dict(type='mmdet.RandomCrop', crop_size=img_scale),
|
||||
dict(type='mmdet.YOLOXHSVRandomAug'),
|
||||
dict(type='mmdet.RandomFlip', prob=0.5),
|
||||
dict(type='mmdet.Pad', size=img_scale, pad_val=dict(img=(114, 114, 114))),
|
||||
dict(type='YOLOv5MixUp', use_cached=True, max_cached_images=20),
|
||||
dict(
|
||||
type='YOLOv5MixUp',
|
||||
use_cached=True,
|
||||
max_cached_images=mixup_max_cached_images),
|
||||
dict(type='mmdet.PackDetInputs')
|
||||
]
|
||||
|
||||
|
@ -127,7 +180,7 @@ train_pipeline_stage2 = [
|
|||
dict(
|
||||
type='mmdet.RandomResize',
|
||||
scale=img_scale,
|
||||
ratio_range=(0.1, 2.0),
|
||||
ratio_range=random_resize_ratio_range,
|
||||
resize_type='mmdet.Resize',
|
||||
keep_ratio=True),
|
||||
dict(type='mmdet.RandomCrop', crop_size=img_scale),
|
||||
|
@ -162,8 +215,8 @@ train_dataloader = dict(
|
|||
dataset=dict(
|
||||
type=dataset_type,
|
||||
data_root=data_root,
|
||||
ann_file='annotations/instances_train2017.json',
|
||||
data_prefix=dict(img='train2017/'),
|
||||
ann_file=train_ann_file,
|
||||
data_prefix=dict(img=train_data_prefix),
|
||||
filter_cfg=dict(filter_empty_gt=True, min_size=32),
|
||||
pipeline=train_pipeline))
|
||||
|
||||
|
@ -177,8 +230,8 @@ val_dataloader = dict(
|
|||
dataset=dict(
|
||||
type=dataset_type,
|
||||
data_root=data_root,
|
||||
ann_file='annotations/instances_val2017.json',
|
||||
data_prefix=dict(img='val2017/'),
|
||||
ann_file=val_ann_file,
|
||||
data_prefix=dict(img=val_data_prefix),
|
||||
test_mode=True,
|
||||
batch_shapes_cfg=batch_shapes_cfg,
|
||||
pipeline=test_pipeline))
|
||||
|
@ -189,14 +242,14 @@ test_dataloader = val_dataloader
|
|||
val_evaluator = dict(
|
||||
type='mmdet.CocoMetric',
|
||||
proposal_nums=(100, 1, 10),
|
||||
ann_file=data_root + 'annotations/instances_val2017.json',
|
||||
ann_file=data_root + val_ann_file,
|
||||
metric='bbox')
|
||||
test_evaluator = val_evaluator
|
||||
|
||||
# optimizer
|
||||
optim_wrapper = dict(
|
||||
type='OptimWrapper',
|
||||
optimizer=dict(type='AdamW', lr=base_lr, weight_decay=0.05),
|
||||
optimizer=dict(type='AdamW', lr=base_lr, weight_decay=weight_decay),
|
||||
paramwise_cfg=dict(
|
||||
norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True))
|
||||
|
||||
|
@ -204,7 +257,7 @@ optim_wrapper = dict(
|
|||
param_scheduler = [
|
||||
dict(
|
||||
type='LinearLR',
|
||||
start_factor=1.0e-5,
|
||||
start_factor=lr_start_factor,
|
||||
by_epoch=False,
|
||||
begin=0,
|
||||
end=1000),
|
||||
|
@ -223,8 +276,8 @@ param_scheduler = [
|
|||
default_hooks = dict(
|
||||
checkpoint=dict(
|
||||
type='CheckpointHook',
|
||||
interval=interval,
|
||||
max_keep_ckpts=3 # only keep latest 3 checkpoints
|
||||
interval=save_checkpoint_intervals,
|
||||
max_keep_ckpts=max_keep_ckpts # only keep latest 3 checkpoints
|
||||
))
|
||||
|
||||
custom_hooks = [
|
||||
|
@ -237,15 +290,15 @@ custom_hooks = [
|
|||
priority=49),
|
||||
dict(
|
||||
type='mmdet.PipelineSwitchHook',
|
||||
switch_epoch=max_epochs - stage2_num_epochs,
|
||||
switch_epoch=max_epochs - num_epochs_stage2,
|
||||
switch_pipeline=train_pipeline_stage2)
|
||||
]
|
||||
|
||||
train_cfg = dict(
|
||||
type='EpochBasedTrainLoop',
|
||||
max_epochs=max_epochs,
|
||||
val_interval=interval,
|
||||
dynamic_intervals=[(max_epochs - stage2_num_epochs, 1)])
|
||||
val_interval=save_checkpoint_intervals,
|
||||
dynamic_intervals=[(max_epochs - num_epochs_stage2, val_interval_stage2)])
|
||||
|
||||
val_cfg = dict(type='ValLoop')
|
||||
test_cfg = dict(type='TestLoop')
|
||||
|
|
|
@ -1,8 +1,10 @@
|
|||
_base_ = './rtmdet_l_syncbn_fast_8xb32-300e_coco.py'
|
||||
|
||||
# ========================modified parameters======================
|
||||
deepen_factor = 0.67
|
||||
widen_factor = 0.75
|
||||
|
||||
# =======================Unmodified in most cases==================
|
||||
model = dict(
|
||||
backbone=dict(deepen_factor=deepen_factor, widen_factor=widen_factor),
|
||||
neck=dict(deepen_factor=deepen_factor, widen_factor=widen_factor),
|
||||
|
|
|
@ -1,10 +1,19 @@
|
|||
_base_ = './rtmdet_l_syncbn_fast_8xb32-300e_coco.py'
|
||||
checkpoint = 'https://download.openmmlab.com/mmdetection/v3.0/rtmdet/cspnext_rsb_pretrain/cspnext-s_imagenet_600e.pth' # noqa
|
||||
|
||||
# ========================modified parameters======================
|
||||
deepen_factor = 0.33
|
||||
widen_factor = 0.5
|
||||
img_scale = _base_.img_scale
|
||||
|
||||
# ratio range for random resize
|
||||
random_resize_ratio_range = (0.5, 2.0)
|
||||
# Number of cached images in mosaic
|
||||
mosaic_max_cached_images = 40
|
||||
# Number of cached images in mixup
|
||||
mixup_max_cached_images = 20
|
||||
|
||||
# =======================Unmodified in most cases==================
|
||||
model = dict(
|
||||
backbone=dict(
|
||||
deepen_factor=deepen_factor,
|
||||
|
@ -30,20 +39,23 @@ train_pipeline = [
|
|||
type='Mosaic',
|
||||
img_scale=img_scale,
|
||||
use_cached=True,
|
||||
max_cached_images=40,
|
||||
max_cached_images=mosaic_max_cached_images,
|
||||
pad_val=114.0),
|
||||
dict(
|
||||
type='mmdet.RandomResize',
|
||||
# img_scale is (width, height)
|
||||
scale=(img_scale[0] * 2, img_scale[1] * 2),
|
||||
ratio_range=(0.5, 2.0), # note
|
||||
ratio_range=random_resize_ratio_range, # note
|
||||
resize_type='mmdet.Resize',
|
||||
keep_ratio=True),
|
||||
dict(type='mmdet.RandomCrop', crop_size=img_scale),
|
||||
dict(type='mmdet.YOLOXHSVRandomAug'),
|
||||
dict(type='mmdet.RandomFlip', prob=0.5),
|
||||
dict(type='mmdet.Pad', size=img_scale, pad_val=dict(img=(114, 114, 114))),
|
||||
dict(type='YOLOv5MixUp', use_cached=True, max_cached_images=20),
|
||||
dict(
|
||||
type='YOLOv5MixUp',
|
||||
use_cached=True,
|
||||
max_cached_images=mixup_max_cached_images),
|
||||
dict(type='mmdet.PackDetInputs')
|
||||
]
|
||||
|
||||
|
@ -53,7 +65,7 @@ train_pipeline_stage2 = [
|
|||
dict(
|
||||
type='mmdet.RandomResize',
|
||||
scale=img_scale,
|
||||
ratio_range=(0.5, 2.0), # note
|
||||
ratio_range=random_resize_ratio_range, # note
|
||||
resize_type='mmdet.Resize',
|
||||
keep_ratio=True),
|
||||
dict(type='mmdet.RandomCrop', crop_size=img_scale),
|
||||
|
@ -75,6 +87,6 @@ custom_hooks = [
|
|||
priority=49),
|
||||
dict(
|
||||
type='mmdet.PipelineSwitchHook',
|
||||
switch_epoch=_base_.max_epochs - _base_.stage2_num_epochs,
|
||||
switch_epoch=_base_.max_epochs - _base_.num_epochs_stage2,
|
||||
switch_pipeline=train_pipeline_stage2)
|
||||
]
|
||||
|
|
|
@ -0,0 +1,70 @@
|
|||
_base_ = 'rtmdet_tiny_syncbn_fast_8xb32-300e_coco.py'
|
||||
|
||||
data_root = './data/cat/'
|
||||
class_name = ('cat', )
|
||||
num_classes = len(class_name)
|
||||
metainfo = dict(classes=class_name, palette=[(20, 220, 60)])
|
||||
|
||||
num_epochs_stage2 = 5
|
||||
|
||||
max_epochs = 40
|
||||
train_batch_size_per_gpu = 12
|
||||
train_num_workers = 4
|
||||
val_batch_size_per_gpu = 1
|
||||
val_num_workers = 2
|
||||
|
||||
load_from = 'https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_tiny_syncbn_fast_8xb32-300e_coco/rtmdet_tiny_syncbn_fast_8xb32-300e_coco_20230102_140117-dbb1dc83.pth' # noqa
|
||||
|
||||
model = dict(
|
||||
backbone=dict(frozen_stages=4),
|
||||
bbox_head=dict(head_module=dict(num_classes=num_classes)),
|
||||
train_cfg=dict(assigner=dict(num_classes=num_classes)))
|
||||
|
||||
train_dataloader = dict(
|
||||
batch_size=train_batch_size_per_gpu,
|
||||
num_workers=train_num_workers,
|
||||
dataset=dict(
|
||||
data_root=data_root,
|
||||
metainfo=metainfo,
|
||||
ann_file='annotations/trainval.json',
|
||||
data_prefix=dict(img='images/')))
|
||||
|
||||
val_dataloader = dict(
|
||||
batch_size=val_batch_size_per_gpu,
|
||||
num_workers=val_num_workers,
|
||||
dataset=dict(
|
||||
metainfo=metainfo,
|
||||
data_root=data_root,
|
||||
ann_file='annotations/test.json',
|
||||
data_prefix=dict(img='images/')))
|
||||
|
||||
test_dataloader = val_dataloader
|
||||
|
||||
param_scheduler = [
|
||||
dict(
|
||||
type='LinearLR',
|
||||
start_factor=_base_.lr_start_factor,
|
||||
by_epoch=False,
|
||||
begin=0,
|
||||
end=30),
|
||||
dict(
|
||||
# use cosine lr from 150 to 300 epoch
|
||||
type='CosineAnnealingLR',
|
||||
eta_min=_base_.base_lr * 0.05,
|
||||
begin=max_epochs // 2,
|
||||
end=max_epochs,
|
||||
T_max=max_epochs // 2,
|
||||
by_epoch=True,
|
||||
convert_to_iter_based=True),
|
||||
]
|
||||
|
||||
_base_.custom_hooks[1].switch_epoch = max_epochs - num_epochs_stage2
|
||||
|
||||
val_evaluator = dict(ann_file=data_root + 'annotations/test.json')
|
||||
test_evaluator = val_evaluator
|
||||
|
||||
default_hooks = dict(
|
||||
checkpoint=dict(interval=10, max_keep_ckpts=2, save_best='auto'),
|
||||
logger=dict(type='LoggerHook', interval=5))
|
||||
train_cfg = dict(max_epochs=max_epochs, val_interval=10)
|
||||
# visualizer = dict(vis_backends = [dict(type='LocalVisBackend'), dict(type='WandbVisBackend')]) # noqa
|
|
@ -1,11 +1,19 @@
|
|||
_base_ = './rtmdet_s_syncbn_fast_8xb32-300e_coco.py'
|
||||
|
||||
checkpoint = 'https://download.openmmlab.com/mmdetection/v3.0/rtmdet/cspnext_rsb_pretrain/cspnext-tiny_imagenet_600e.pth' # noqa
|
||||
|
||||
# ========================modified parameters======================
|
||||
deepen_factor = 0.167
|
||||
widen_factor = 0.375
|
||||
img_scale = _base_.img_scale
|
||||
|
||||
# ratio range for random resize
|
||||
random_resize_ratio_range = (0.5, 2.0)
|
||||
# Number of cached images in mosaic
|
||||
mosaic_max_cached_images = 20
|
||||
# Number of cached images in mixup
|
||||
mixup_max_cached_images = 10
|
||||
|
||||
# =======================Unmodified in most cases==================
|
||||
model = dict(
|
||||
backbone=dict(
|
||||
deepen_factor=deepen_factor,
|
||||
|
@ -24,14 +32,14 @@ train_pipeline = [
|
|||
type='Mosaic',
|
||||
img_scale=img_scale,
|
||||
use_cached=True,
|
||||
max_cached_images=20, # note
|
||||
max_cached_images=mosaic_max_cached_images, # note
|
||||
random_pop=False, # note
|
||||
pad_val=114.0),
|
||||
dict(
|
||||
type='mmdet.RandomResize',
|
||||
# img_scale is (width, height)
|
||||
scale=(img_scale[0] * 2, img_scale[1] * 2),
|
||||
ratio_range=(0.5, 2.0),
|
||||
ratio_range=random_resize_ratio_range,
|
||||
resize_type='mmdet.Resize',
|
||||
keep_ratio=True),
|
||||
dict(type='mmdet.RandomCrop', crop_size=img_scale),
|
||||
|
@ -42,7 +50,7 @@ train_pipeline = [
|
|||
type='YOLOv5MixUp',
|
||||
use_cached=True,
|
||||
random_pop=False,
|
||||
max_cached_images=10,
|
||||
max_cached_images=mixup_max_cached_images,
|
||||
prob=0.5),
|
||||
dict(type='mmdet.PackDetInputs')
|
||||
]
|
||||
|
|
|
@ -1,8 +1,10 @@
|
|||
_base_ = './rtmdet_l_syncbn_fast_8xb32-300e_coco.py'
|
||||
|
||||
# ========================modified parameters======================
|
||||
deepen_factor = 1.33
|
||||
widen_factor = 1.25
|
||||
|
||||
# =======================Unmodified in most cases==================
|
||||
model = dict(
|
||||
backbone=dict(deepen_factor=deepen_factor, widen_factor=widen_factor),
|
||||
neck=dict(deepen_factor=deepen_factor, widen_factor=widen_factor),
|
||||
|
|
|
@ -20,16 +20,16 @@ YOLOv5-l-P6 model structure
|
|||
|
||||
### COCO
|
||||
|
||||
| Backbone | Arch | size | SyncBN | AMP | Mem (GB) | box AP | Config | Download |
|
||||
| :------: | :--: | :--: | :----: | :-: | :------: | :----: | :----------------------------------------------------------------------------------------------------------------------: | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
|
||||
| YOLOv5-n | P5 | 640 | Yes | Yes | 1.5 | 28.0 | [config](https://github.com/open-mmlab/mmyolo/tree/master/configs/yolov5/yolov5_n-v61_syncbn_fast_8xb16-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_n-v61_syncbn_fast_8xb16-300e_coco/yolov5_n-v61_syncbn_fast_8xb16-300e_coco_20220919_090739-b804c1ad.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_n-v61_syncbn_fast_8xb16-300e_coco/yolov5_n-v61_syncbn_fast_8xb16-300e_coco_20220919_090739.log.json) |
|
||||
| YOLOv5-s | P5 | 640 | Yes | Yes | 2.7 | 37.7 | [config](https://github.com/open-mmlab/mmyolo/tree/master/configs/yolov5/yolov5_s-v61_syncbn_fast_8xb16-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_s-v61_syncbn_fast_8xb16-300e_coco/yolov5_s-v61_syncbn_fast_8xb16-300e_coco_20220918_084700-86e02187.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_s-v61_syncbn_fast_8xb16-300e_coco/yolov5_s-v61_syncbn_fast_8xb16-300e_coco_20220918_084700.log.json) |
|
||||
| YOLOv5-m | P5 | 640 | Yes | Yes | 5.0 | 45.3 | [config](https://github.com/open-mmlab/mmyolo/tree/master/configs/yolov5/yolov5_m-v61_syncbn_fast_8xb16-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_m-v61_syncbn_fast_8xb16-300e_coco/yolov5_m-v61_syncbn_fast_8xb16-300e_coco_20220917_204944-516a710f.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_m-v61_syncbn_fast_8xb16-300e_coco/yolov5_m-v61_syncbn_fast_8xb16-300e_coco_20220917_204944.log.json) |
|
||||
| YOLOv5-l | P5 | 640 | Yes | Yes | 8.1 | 48.8 | [config](https://github.com/open-mmlab/mmyolo/tree/master/configs/yolov5/yolov5_l-v61_syncbn_fast_8xb16-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_l-v61_syncbn_fast_8xb16-300e_coco/yolov5_l-v61_syncbn_fast_8xb16-300e_coco_20220917_031007-096ef0eb.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_l-v61_syncbn_fast_8xb16-300e_coco/yolov5_l-v61_syncbn_fast_8xb16-300e_coco_20220917_031007.log.json) |
|
||||
| YOLOv5-n | P6 | 1280 | Yes | Yes | 5.8 | 35.9 | [config](https://github.com/open-mmlab/mmyolo/tree/master/configs/yolov5/yolov5_n-p6-v62_syncbn_fast_8xb16-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_n-p6-v62_syncbn_fast_8xb16-300e_coco/yolov5_n-p6-v62_syncbn_fast_8xb16-300e_coco_20221027_224705-d493c5f3.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_n-p6-v62_syncbn_fast_8xb16-300e_coco/yolov5_n-p6-v62_syncbn_fast_8xb16-300e_coco_20221027_224705.log.json) |
|
||||
| YOLOv5-s | P6 | 1280 | Yes | Yes | 10.5 | 44.4 | [config](https://github.com/open-mmlab/mmyolo/tree/master/configs/yolov5/yolov5_s-p6-v62_syncbn_fast_8xb16-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_s-p6-v62_syncbn_fast_8xb16-300e_coco/yolov5_s-p6-v62_syncbn_fast_8xb16-300e_coco_20221027_215044-58865c19.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_s-p6-v62_syncbn_fast_8xb16-300e_coco/yolov5_s-p6-v62_syncbn_fast_8xb16-300e_coco_20221027_215044.log.json) |
|
||||
| YOLOv5-m | P6 | 1280 | Yes | Yes | 19.1 | 51.3 | [config](https://github.com/open-mmlab/mmyolo/tree/master/configs/yolov5/yolov5_m-p6-v62_syncbn_fast_8xb16-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_m-p6-v62_syncbn_fast_8xb16-300e_coco/yolov5_m-p6-v62_syncbn_fast_8xb16-300e_coco_20221027_230453-49564d58.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_m-p6-v62_syncbn_fast_8xb16-300e_coco/yolov5_m-p6-v62_syncbn_fast_8xb16-300e_coco_20221027_230453.log.json) |
|
||||
| YOLOv5-l | P6 | 1280 | Yes | Yes | 30.5 | 53.7 | [config](https://github.com/open-mmlab/mmyolo/tree/master/configs/yolov5/yolov5_l-p6-v62_syncbn_fast_8xb16-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_l-p6-v62_syncbn_fast_8xb16-300e_coco/yolov5_l-p6-v62_syncbn_fast_8xb16-300e_coco_20221027_234308-7a2ba6bf.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_l-p6-v62_syncbn_fast_8xb16-300e_coco/yolov5_l-p6-v62_syncbn_fast_8xb16-300e_coco_20221027_234308.log.json) |
|
||||
| Backbone | Arch | size | SyncBN | AMP | Mem (GB) | box AP | TTA box AP | Config | Download |
|
||||
| :------: | :--: | :--: | :----: | :-: | :------: | :----: | :--------: | :--------------------------------------------------------------------------------------------------------------------: | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
|
||||
| YOLOv5-n | P5 | 640 | Yes | Yes | 1.5 | 28.0 | 30.7 | [config](https://github.com/open-mmlab/mmyolo/tree/main/configs/yolov5/yolov5_n-v61_syncbn_fast_8xb16-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_n-v61_syncbn_fast_8xb16-300e_coco/yolov5_n-v61_syncbn_fast_8xb16-300e_coco_20220919_090739-b804c1ad.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_n-v61_syncbn_fast_8xb16-300e_coco/yolov5_n-v61_syncbn_fast_8xb16-300e_coco_20220919_090739.log.json) |
|
||||
| YOLOv5-s | P5 | 640 | Yes | Yes | 2.7 | 37.7 | 40.2 | [config](https://github.com/open-mmlab/mmyolo/tree/main/configs/yolov5/yolov5_s-v61_syncbn_fast_8xb16-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_s-v61_syncbn_fast_8xb16-300e_coco/yolov5_s-v61_syncbn_fast_8xb16-300e_coco_20220918_084700-86e02187.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_s-v61_syncbn_fast_8xb16-300e_coco/yolov5_s-v61_syncbn_fast_8xb16-300e_coco_20220918_084700.log.json) |
|
||||
| YOLOv5-m | P5 | 640 | Yes | Yes | 5.0 | 45.3 | 46.9 | [config](https://github.com/open-mmlab/mmyolo/tree/main/configs/yolov5/yolov5_m-v61_syncbn_fast_8xb16-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_m-v61_syncbn_fast_8xb16-300e_coco/yolov5_m-v61_syncbn_fast_8xb16-300e_coco_20220917_204944-516a710f.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_m-v61_syncbn_fast_8xb16-300e_coco/yolov5_m-v61_syncbn_fast_8xb16-300e_coco_20220917_204944.log.json) |
|
||||
| YOLOv5-l | P5 | 640 | Yes | Yes | 8.1 | 48.8 | 49.9 | [config](https://github.com/open-mmlab/mmyolo/tree/main/configs/yolov5/yolov5_l-v61_syncbn_fast_8xb16-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_l-v61_syncbn_fast_8xb16-300e_coco/yolov5_l-v61_syncbn_fast_8xb16-300e_coco_20220917_031007-096ef0eb.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_l-v61_syncbn_fast_8xb16-300e_coco/yolov5_l-v61_syncbn_fast_8xb16-300e_coco_20220917_031007.log.json) |
|
||||
| YOLOv5-n | P6 | 1280 | Yes | Yes | 5.8 | 35.9 | | [config](https://github.com/open-mmlab/mmyolo/tree/main/configs/yolov5/yolov5_n-p6-v62_syncbn_fast_8xb16-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_n-p6-v62_syncbn_fast_8xb16-300e_coco/yolov5_n-p6-v62_syncbn_fast_8xb16-300e_coco_20221027_224705-d493c5f3.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_n-p6-v62_syncbn_fast_8xb16-300e_coco/yolov5_n-p6-v62_syncbn_fast_8xb16-300e_coco_20221027_224705.log.json) |
|
||||
| YOLOv5-s | P6 | 1280 | Yes | Yes | 10.5 | 44.4 | | [config](https://github.com/open-mmlab/mmyolo/tree/main/configs/yolov5/yolov5_s-p6-v62_syncbn_fast_8xb16-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_s-p6-v62_syncbn_fast_8xb16-300e_coco/yolov5_s-p6-v62_syncbn_fast_8xb16-300e_coco_20221027_215044-58865c19.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_s-p6-v62_syncbn_fast_8xb16-300e_coco/yolov5_s-p6-v62_syncbn_fast_8xb16-300e_coco_20221027_215044.log.json) |
|
||||
| YOLOv5-m | P6 | 1280 | Yes | Yes | 19.1 | 51.3 | | [config](https://github.com/open-mmlab/mmyolo/tree/main/configs/yolov5/yolov5_m-p6-v62_syncbn_fast_8xb16-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_m-p6-v62_syncbn_fast_8xb16-300e_coco/yolov5_m-p6-v62_syncbn_fast_8xb16-300e_coco_20221027_230453-49564d58.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_m-p6-v62_syncbn_fast_8xb16-300e_coco/yolov5_m-p6-v62_syncbn_fast_8xb16-300e_coco_20221027_230453.log.json) |
|
||||
| YOLOv5-l | P6 | 1280 | Yes | Yes | 30.5 | 53.7 | | [config](https://github.com/open-mmlab/mmyolo/tree/main/configs/yolov5/yolov5_l-p6-v62_syncbn_fast_8xb16-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_l-p6-v62_syncbn_fast_8xb16-300e_coco/yolov5_l-p6-v62_syncbn_fast_8xb16-300e_coco_20221027_234308-7a2ba6bf.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_l-p6-v62_syncbn_fast_8xb16-300e_coco/yolov5_l-p6-v62_syncbn_fast_8xb16-300e_coco_20221027_234308.log.json) |
|
||||
|
||||
**Note**:
|
||||
In the official YOLOv5 code, the `random_perspective` data augmentation in COCO object detection task training uses mask annotation information, which leads to higher performance. Object detection should not use mask annotation, so only box annotation information is used in `MMYOLO`. We will use the mask annotation information in the instance segmentation task. See https://github.com/ultralytics/yolov5/issues/9917 for details.
|
||||
|
@ -39,16 +39,16 @@ In the official YOLOv5 code, the `random_perspective` data augmentation in COCO
|
|||
3. `SyncBN` means use SyncBN, `AMP` indicates training with mixed precision.
|
||||
4. We use 8x A100 for training, and the single-GPU batch size is 16. This is different from the official code.
|
||||
5. The performance is unstable and may fluctuate by about 0.4 mAP and the highest performance weight in `COCO` training in `YOLOv5` may not be the last epoch.
|
||||
6. `balloon` means that this is a demo configuration.
|
||||
6. `TTA` means that Test Time Augmentation. It's perform 3 multi-scaling transformations on the image, followed by 2 flipping transformations (flipping and not flipping). You only need to specify `--tta` when testing to enable. see [TTA](https://github.com/open-mmlab/mmyolo/blob/dev/docs/en/common_usage/tta.md) for details.
|
||||
|
||||
### VOC
|
||||
|
||||
| Backbone | size | Batchsize | AMP | Mem (GB) | box AP(COCO metric) | Config | Download |
|
||||
| :------: | :--: | :-------: | :-: | :------: | :-----------------: | :--------------------------------------------------------------------------------------------------------------: | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
|
||||
| YOLOv5-n | 512 | 64 | Yes | 3.5 | 51.2 | [config](https://github.com/open-mmlab/mmyolo/tree/master/configs/yolov5/voc/yolov5_n-v61_fast_1xb64-50e_voc.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_n-v61_fast_1xb64-50e_voc/yolov5_n-v61_fast_1xb64-50e_voc_20221017_234254-f1493430.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_n-v61_fast_1xb64-50e_voc/yolov5_n-v61_fast_1xb64-50e_voc_20221017_234254.log.json) |
|
||||
| YOLOv5-s | 512 | 64 | Yes | 6.5 | 62.7 | [config](https://github.com/open-mmlab/mmyolo/tree/master/configs/yolov5/voc/yolov5_s-v61_fast_1xb64-50e_voc.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_s-v61_fast_1xb64-50e_voc/yolov5_s-v61_fast_1xb64-50e_voc_20221017_234156-0009b33e.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_s-v61_fast_1xb64-50e_voc/yolov5_s-v61_fast_1xb64-50e_voc_20221017_234156.log.json) |
|
||||
| YOLOv5-m | 512 | 64 | Yes | 12.0 | 70.1 | [config](https://github.com/open-mmlab/mmyolo/tree/master/configs/yolov5/voc/yolov5_m-v61_fast_1xb64-50e_voc.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_m-v61_fast_1xb64-50e_voc/yolov5_m-v61_fast_1xb64-50e_voc_20221017_114138-815c143a.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_m-v61_fast_1xb64-50e_voc/yolov5_m-v61_fast_1xb64-50e_voc_20221017_114138.log.json) |
|
||||
| YOLOv5-l | 512 | 32 | Yes | 10.0 | 73.1 | [config](https://github.com/open-mmlab/mmyolo/tree/master/configs/yolov5/voc/yolov5_l-v61_fast_1xb32-50e_voc.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_l-v61_fast_1xb32-50e_voc/yolov5_l-v61_fast_1xb32-50e_voc_20221017_045500-edc7e0d8.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_l-v61_fast_1xb32-50e_voc/yolov5_l-v61_fast_1xb32-50e_voc_20221017_045500.log.json) |
|
||||
| Backbone | size | Batchsize | AMP | Mem (GB) | box AP(COCO metric) | Config | Download |
|
||||
| :------: | :--: | :-------: | :-: | :------: | :-----------------: | :------------------------------------------------------------------------------------------------------------: | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
|
||||
| YOLOv5-n | 512 | 64 | Yes | 3.5 | 51.2 | [config](https://github.com/open-mmlab/mmyolo/tree/main/configs/yolov5/voc/yolov5_n-v61_fast_1xb64-50e_voc.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_n-v61_fast_1xb64-50e_voc/yolov5_n-v61_fast_1xb64-50e_voc_20221017_234254-f1493430.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_n-v61_fast_1xb64-50e_voc/yolov5_n-v61_fast_1xb64-50e_voc_20221017_234254.log.json) |
|
||||
| YOLOv5-s | 512 | 64 | Yes | 6.5 | 62.7 | [config](https://github.com/open-mmlab/mmyolo/tree/main/configs/yolov5/voc/yolov5_s-v61_fast_1xb64-50e_voc.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_s-v61_fast_1xb64-50e_voc/yolov5_s-v61_fast_1xb64-50e_voc_20221017_234156-0009b33e.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_s-v61_fast_1xb64-50e_voc/yolov5_s-v61_fast_1xb64-50e_voc_20221017_234156.log.json) |
|
||||
| YOLOv5-m | 512 | 64 | Yes | 12.0 | 70.1 | [config](https://github.com/open-mmlab/mmyolo/tree/main/configs/yolov5/voc/yolov5_m-v61_fast_1xb64-50e_voc.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_m-v61_fast_1xb64-50e_voc/yolov5_m-v61_fast_1xb64-50e_voc_20221017_114138-815c143a.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_m-v61_fast_1xb64-50e_voc/yolov5_m-v61_fast_1xb64-50e_voc_20221017_114138.log.json) |
|
||||
| YOLOv5-l | 512 | 32 | Yes | 10.0 | 73.1 | [config](https://github.com/open-mmlab/mmyolo/tree/main/configs/yolov5/voc/yolov5_l-v61_fast_1xb32-50e_voc.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_l-v61_fast_1xb32-50e_voc/yolov5_l-v61_fast_1xb32-50e_voc_20221017_045500-edc7e0d8.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_l-v61_fast_1xb32-50e_voc/yolov5_l-v61_fast_1xb32-50e_voc_20221017_045500.log.json) |
|
||||
|
||||
**Note**:
|
||||
|
||||
|
@ -62,10 +62,10 @@ In the official YOLOv5 code, the `random_perspective` data augmentation in COCO
|
|||
|
||||
Since the `iscrowd` annotation of the COCO dataset is not equivalent to `ignore`, we use the CrowdHuman dataset to verify that the YOLOv5 ignore logic is correct.
|
||||
|
||||
| Backbone | size | SyncBN | AMP | Mem (GB) | ignore_iof_thr | box AP50(CrowDHuman Metric) | MR | JI | Config | Download |
|
||||
| :------: | :--: | :----: | :-: | :------: | :------------: | :-------------------------: | :--: | :---: | :-------------------------------------------------------------------------------------------------------------------------------: | :------: |
|
||||
| YOLOv5-s | 640 | Yes | Yes | 2.6 | -1 | 85.79 | 48.7 | 75.33 | [config](https://github.com/open-mmlab/mmyolo/tree/master/configs/yolov5/crowdhuman/yolov5_s-v61_fast_8xb16-300e_crowdhuman.py) | |
|
||||
| YOLOv5-s | 640 | Yes | Yes | 2.6 | 0.5 | 86.17 | 48.8 | 75.87 | [config](https://github.com/open-mmlab/mmyolo/tree/master/configs/yolov5/crowdhuman/yolov5_s-v61_8xb16-300e_ignore_crowdhuman.py) | |
|
||||
| Backbone | size | SyncBN | AMP | Mem (GB) | ignore_iof_thr | box AP50(CrowDHuman Metric) | MR | JI | Config | Download |
|
||||
| :------: | :--: | :----: | :-: | :------: | :------------: | :-------------------------: | :--: | :---: | :-----------------------------------------------------------------------------------------------------------------------------: | :------: |
|
||||
| YOLOv5-s | 640 | Yes | Yes | 2.6 | -1 | 85.79 | 48.7 | 75.33 | [config](https://github.com/open-mmlab/mmyolo/tree/main/configs/yolov5/crowdhuman/yolov5_s-v61_fast_8xb16-300e_crowdhuman.py) | |
|
||||
| YOLOv5-s | 640 | Yes | Yes | 2.6 | 0.5 | 86.17 | 48.8 | 75.87 | [config](https://github.com/open-mmlab/mmyolo/tree/main/configs/yolov5/crowdhuman/yolov5_s-v61_8xb16-300e_ignore_crowdhuman.py) | |
|
||||
|
||||
**Note**:
|
||||
|
||||
|
|
|
@ -29,6 +29,8 @@ num_det_layers = 3
|
|||
|
||||
load_from = 'https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_s-v61_syncbn_fast_8xb16-300e_coco/yolov5_s-v61_syncbn_fast_8xb16-300e_coco_20220918_084700-86e02187.pth' # noqa
|
||||
|
||||
tta_img_scales = [img_scale, (416, 416), (640, 640)]
|
||||
|
||||
# Hyperparameter reference from:
|
||||
# https://github.com/ultralytics/yolov5/blob/master/data/hyps/hyp.VOC.yaml
|
||||
model = dict(
|
||||
|
@ -232,3 +234,37 @@ val_evaluator = dict(
|
|||
test_evaluator = val_evaluator
|
||||
|
||||
train_cfg = dict(max_epochs=max_epochs)
|
||||
|
||||
# Config for Test Time Augmentation. (TTA)
|
||||
_multiscale_resize_transforms = [
|
||||
dict(
|
||||
type='Compose',
|
||||
transforms=[
|
||||
dict(type='YOLOv5KeepRatioResize', scale=s),
|
||||
dict(
|
||||
type='LetterResize',
|
||||
scale=s,
|
||||
allow_scale_up=False,
|
||||
pad_val=dict(img=114))
|
||||
]) for s in tta_img_scales
|
||||
]
|
||||
|
||||
tta_pipeline = [
|
||||
dict(type='LoadImageFromFile', file_client_args=_base_.file_client_args),
|
||||
dict(
|
||||
type='TestTimeAug',
|
||||
transforms=[
|
||||
_multiscale_resize_transforms,
|
||||
[
|
||||
dict(type='mmdet.RandomFlip', prob=1.),
|
||||
dict(type='mmdet.RandomFlip', prob=0.)
|
||||
], [dict(type='mmdet.LoadAnnotations', with_bbox=True)],
|
||||
[
|
||||
dict(
|
||||
type='mmdet.PackDetInputs',
|
||||
meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
|
||||
'scale_factor', 'pad_param', 'flip',
|
||||
'flip_direction'))
|
||||
]
|
||||
])
|
||||
]
|
||||
|
|
|
@ -1,10 +1,15 @@
|
|||
_base_ = './yolov5_s-p6-v62_syncbn_fast_8xb16-300e_coco.py'
|
||||
|
||||
# ========================modified parameters======================
|
||||
deepen_factor = 0.67
|
||||
widen_factor = 0.75
|
||||
lr_factor = 0.1 # lrf=0.1
|
||||
lr_factor = 0.1
|
||||
affine_scale = 0.9
|
||||
loss_cls_weight = 0.3
|
||||
loss_obj_weight = 0.7
|
||||
mixup_prob = 0.1
|
||||
|
||||
# =======================Unmodified in most cases==================
|
||||
num_classes = _base_.num_classes
|
||||
num_det_layers = _base_.num_det_layers
|
||||
img_scale = _base_.img_scale
|
||||
|
@ -20,9 +25,9 @@ model = dict(
|
|||
),
|
||||
bbox_head=dict(
|
||||
head_module=dict(widen_factor=widen_factor),
|
||||
loss_cls=dict(loss_weight=0.3 *
|
||||
loss_cls=dict(loss_weight=loss_cls_weight *
|
||||
(num_classes / 80 * 3 / num_det_layers)),
|
||||
loss_obj=dict(loss_weight=0.7 *
|
||||
loss_obj=dict(loss_weight=loss_obj_weight *
|
||||
((img_scale[0] / 640)**2 * 3 / num_det_layers))))
|
||||
|
||||
pre_transform = _base_.pre_transform
|
||||
|
@ -49,7 +54,7 @@ train_pipeline = [
|
|||
*pre_transform, *mosaic_affine_pipeline,
|
||||
dict(
|
||||
type='YOLOv5MixUp',
|
||||
prob=0.1,
|
||||
prob=mixup_prob,
|
||||
pre_transform=[*pre_transform, *mosaic_affine_pipeline]),
|
||||
dict(
|
||||
type='mmdet.Albu',
|
||||
|
@ -71,5 +76,4 @@ train_pipeline = [
|
|||
]
|
||||
|
||||
train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
|
||||
|
||||
default_hooks = dict(param_scheduler=dict(lr_factor=lr_factor))
|
||||
|
|
|
@ -1,10 +1,15 @@
|
|||
_base_ = './yolov5_s-v61_syncbn_fast_8xb16-300e_coco.py'
|
||||
|
||||
# ========================modified parameters======================
|
||||
deepen_factor = 0.67
|
||||
widen_factor = 0.75
|
||||
lr_factor = 0.1 # lrf=0.1
|
||||
lr_factor = 0.1
|
||||
affine_scale = 0.9
|
||||
loss_cls_weight = 0.3
|
||||
loss_obj_weight = 0.7
|
||||
mixup_prob = 0.1
|
||||
|
||||
# =======================Unmodified in most cases==================
|
||||
num_classes = _base_.num_classes
|
||||
num_det_layers = _base_.num_det_layers
|
||||
img_scale = _base_.img_scale
|
||||
|
@ -20,9 +25,9 @@ model = dict(
|
|||
),
|
||||
bbox_head=dict(
|
||||
head_module=dict(widen_factor=widen_factor),
|
||||
loss_cls=dict(loss_weight=0.3 *
|
||||
loss_cls=dict(loss_weight=loss_cls_weight *
|
||||
(num_classes / 80 * 3 / num_det_layers)),
|
||||
loss_obj=dict(loss_weight=0.7 *
|
||||
loss_obj=dict(loss_weight=loss_obj_weight *
|
||||
((img_scale[0] / 640)**2 * 3 / num_det_layers))))
|
||||
|
||||
pre_transform = _base_.pre_transform
|
||||
|
@ -49,7 +54,7 @@ train_pipeline = [
|
|||
*pre_transform, *mosaic_affine_pipeline,
|
||||
dict(
|
||||
type='YOLOv5MixUp',
|
||||
prob=0.1,
|
||||
prob=mixup_prob,
|
||||
pre_transform=[*pre_transform, *mosaic_affine_pipeline]),
|
||||
dict(
|
||||
type='mmdet.Albu',
|
||||
|
@ -71,5 +76,4 @@ train_pipeline = [
|
|||
]
|
||||
|
||||
train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
|
||||
|
||||
default_hooks = dict(param_scheduler=dict(lr_factor=lr_factor))
|
||||
|
|
|
@ -1,19 +1,33 @@
|
|||
_base_ = 'yolov5_s-v61_syncbn_fast_8xb16-300e_coco.py'
|
||||
|
||||
# ========================modified parameters======================
|
||||
img_scale = (1280, 1280) # width, height
|
||||
num_classes = 80
|
||||
# only on Val
|
||||
batch_shapes_cfg = dict(img_size=img_scale[0], size_divisor=64)
|
||||
|
||||
num_classes = 80 # Number of classes for classification
|
||||
# Config of batch shapes. Only on val.
|
||||
# It means not used if batch_shapes_cfg is None.
|
||||
batch_shapes_cfg = dict(
|
||||
img_size=img_scale[0],
|
||||
# The image scale of padding should be divided by pad_size_divisor
|
||||
size_divisor=64)
|
||||
# Basic size of multi-scale prior box
|
||||
anchors = [
|
||||
[(19, 27), (44, 40), (38, 94)], # P3/8
|
||||
[(96, 68), (86, 152), (180, 137)], # P4/16
|
||||
[(140, 301), (303, 264), (238, 542)], # P5/32
|
||||
[(436, 615), (739, 380), (925, 792)] # P6/64
|
||||
]
|
||||
# Strides of multi-scale prior box
|
||||
strides = [8, 16, 32, 64]
|
||||
num_det_layers = 4
|
||||
num_det_layers = 4 # The number of model output scales
|
||||
loss_cls_weight = 0.5
|
||||
loss_bbox_weight = 0.05
|
||||
loss_obj_weight = 1.0
|
||||
# The obj loss weights of the three output layers
|
||||
obj_level_weights = [4.0, 1.0, 0.25, 0.06]
|
||||
affine_scale = 0.5 # YOLOv5RandomAffine scaling ratio
|
||||
|
||||
tta_img_scales = [(1280, 1280), (1024, 1024), (1536, 1536)]
|
||||
# =======================Unmodified in most cases==================
|
||||
model = dict(
|
||||
backbone=dict(arch='P6', out_indices=(2, 3, 4, 5)),
|
||||
neck=dict(
|
||||
|
@ -23,12 +37,12 @@ model = dict(
|
|||
in_channels=[256, 512, 768, 1024], featmap_strides=strides),
|
||||
prior_generator=dict(base_sizes=anchors, strides=strides),
|
||||
# scaled based on number of detection layers
|
||||
loss_cls=dict(loss_weight=0.5 *
|
||||
loss_cls=dict(loss_weight=loss_cls_weight *
|
||||
(num_classes / 80 * 3 / num_det_layers)),
|
||||
loss_bbox=dict(loss_weight=0.05 * (3 / num_det_layers)),
|
||||
loss_obj=dict(loss_weight=1.0 *
|
||||
loss_bbox=dict(loss_weight=loss_bbox_weight * (3 / num_det_layers)),
|
||||
loss_obj=dict(loss_weight=loss_obj_weight *
|
||||
((img_scale[0] / 640)**2 * 3 / num_det_layers)),
|
||||
obj_level_weights=[4.0, 1.0, 0.25, 0.06]))
|
||||
obj_level_weights=obj_level_weights))
|
||||
|
||||
pre_transform = _base_.pre_transform
|
||||
albu_train_transforms = _base_.albu_train_transforms
|
||||
|
@ -44,7 +58,7 @@ train_pipeline = [
|
|||
type='YOLOv5RandomAffine',
|
||||
max_rotate_degree=0.0,
|
||||
max_shear_degree=0.0,
|
||||
scaling_ratio_range=(0.5, 1.5),
|
||||
scaling_ratio_range=(1 - affine_scale, 1 + affine_scale),
|
||||
# img_scale is (width, height)
|
||||
border=(-img_scale[0] // 2, -img_scale[1] // 2),
|
||||
border_val=(114, 114, 114)),
|
||||
|
@ -88,3 +102,37 @@ val_dataloader = dict(
|
|||
dataset=dict(pipeline=test_pipeline, batch_shapes_cfg=batch_shapes_cfg))
|
||||
|
||||
test_dataloader = val_dataloader
|
||||
|
||||
# Config for Test Time Augmentation. (TTA)
|
||||
_multiscale_resize_transforms = [
|
||||
dict(
|
||||
type='Compose',
|
||||
transforms=[
|
||||
dict(type='YOLOv5KeepRatioResize', scale=s),
|
||||
dict(
|
||||
type='LetterResize',
|
||||
scale=s,
|
||||
allow_scale_up=False,
|
||||
pad_val=dict(img=114))
|
||||
]) for s in tta_img_scales
|
||||
]
|
||||
|
||||
tta_pipeline = [
|
||||
dict(type='LoadImageFromFile', file_client_args=_base_.file_client_args),
|
||||
dict(
|
||||
type='TestTimeAug',
|
||||
transforms=[
|
||||
_multiscale_resize_transforms,
|
||||
[
|
||||
dict(type='mmdet.RandomFlip', prob=1.),
|
||||
dict(type='mmdet.RandomFlip', prob=0.)
|
||||
], [dict(type='mmdet.LoadAnnotations', with_bbox=True)],
|
||||
[
|
||||
dict(
|
||||
type='mmdet.PackDetInputs',
|
||||
meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
|
||||
'scale_factor', 'pad_param', 'flip',
|
||||
'flip_direction'))
|
||||
]
|
||||
])
|
||||
]
|
||||
|
|
|
@ -0,0 +1,70 @@
|
|||
_base_ = 'yolov5_s-v61_fast_1xb12-40e_cat.py'
|
||||
|
||||
# This configuration is used to provide non-square training examples
|
||||
# Must be a multiple of 32
|
||||
img_scale = (608, 352) # w h
|
||||
|
||||
anchors = [
|
||||
[(65, 35), (159, 45), (119, 80)], # P3/8
|
||||
[(215, 77), (224, 116), (170, 166)], # P4/16
|
||||
[(376, 108), (339, 176), (483, 190)] # P5/32
|
||||
]
|
||||
|
||||
# ===============================Unmodified in most cases====================
|
||||
_base_.model.bbox_head.loss_obj.loss_weight = 1.0 * ((img_scale[1] / 640)**2)
|
||||
_base_.model.bbox_head.prior_generator.base_sizes = anchors
|
||||
|
||||
train_pipeline = [
|
||||
*_base_.pre_transform,
|
||||
dict(
|
||||
type='Mosaic',
|
||||
img_scale=img_scale,
|
||||
pad_val=114.0,
|
||||
pre_transform=_base_.pre_transform),
|
||||
dict(
|
||||
type='YOLOv5RandomAffine',
|
||||
max_rotate_degree=0.0,
|
||||
max_shear_degree=0.0,
|
||||
scaling_ratio_range=(1 - _base_.affine_scale, 1 + _base_.affine_scale),
|
||||
# img_scale is (width, height)
|
||||
border=(-img_scale[0] // 2, -img_scale[1] // 2),
|
||||
border_val=(114, 114, 114)),
|
||||
dict(
|
||||
type='mmdet.Albu',
|
||||
transforms=_base_.albu_train_transforms,
|
||||
bbox_params=dict(
|
||||
type='BboxParams',
|
||||
format='pascal_voc',
|
||||
label_fields=['gt_bboxes_labels', 'gt_ignore_flags']),
|
||||
keymap={
|
||||
'img': 'image',
|
||||
'gt_bboxes': 'bboxes'
|
||||
}),
|
||||
dict(type='YOLOv5HSVRandomAug'),
|
||||
dict(type='mmdet.RandomFlip', prob=0.5),
|
||||
dict(
|
||||
type='mmdet.PackDetInputs',
|
||||
meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 'flip',
|
||||
'flip_direction'))
|
||||
]
|
||||
|
||||
_base_.train_dataloader.dataset.pipeline = train_pipeline
|
||||
|
||||
test_pipeline = [
|
||||
dict(type='LoadImageFromFile', file_client_args=_base_.file_client_args),
|
||||
dict(type='YOLOv5KeepRatioResize', scale=img_scale),
|
||||
dict(
|
||||
type='LetterResize',
|
||||
scale=img_scale,
|
||||
allow_scale_up=False,
|
||||
pad_val=dict(img=114)),
|
||||
dict(type='mmdet.LoadAnnotations', with_bbox=True),
|
||||
dict(
|
||||
type='mmdet.PackDetInputs',
|
||||
meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
|
||||
'scale_factor', 'pad_param'))
|
||||
]
|
||||
|
||||
val_dataloader = dict(
|
||||
dataset=dict(pipeline=test_pipeline, batch_shapes_cfg=None))
|
||||
test_dataloader = val_dataloader
|
|
@ -0,0 +1,56 @@
|
|||
_base_ = 'yolov5_s-v61_syncbn_fast_8xb16-300e_coco.py'
|
||||
|
||||
data_root = './data/cat/'
|
||||
class_name = ('cat', )
|
||||
num_classes = len(class_name)
|
||||
metainfo = dict(classes=class_name, palette=[(20, 220, 60)])
|
||||
|
||||
anchors = [
|
||||
[(68, 69), (154, 91), (143, 162)], # P3/8
|
||||
[(242, 160), (189, 287), (391, 207)], # P4/16
|
||||
[(353, 337), (539, 341), (443, 432)] # P5/32
|
||||
]
|
||||
|
||||
max_epochs = 40
|
||||
train_batch_size_per_gpu = 12
|
||||
train_num_workers = 4
|
||||
|
||||
load_from = 'https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_s-v61_syncbn_fast_8xb16-300e_coco/yolov5_s-v61_syncbn_fast_8xb16-300e_coco_20220918_084700-86e02187.pth' # noqa
|
||||
|
||||
model = dict(
|
||||
backbone=dict(frozen_stages=4),
|
||||
bbox_head=dict(
|
||||
head_module=dict(num_classes=num_classes),
|
||||
prior_generator=dict(base_sizes=anchors)))
|
||||
|
||||
train_dataloader = dict(
|
||||
batch_size=train_batch_size_per_gpu,
|
||||
num_workers=train_num_workers,
|
||||
dataset=dict(
|
||||
data_root=data_root,
|
||||
metainfo=metainfo,
|
||||
ann_file='annotations/trainval.json',
|
||||
data_prefix=dict(img='images/')))
|
||||
|
||||
val_dataloader = dict(
|
||||
dataset=dict(
|
||||
metainfo=metainfo,
|
||||
data_root=data_root,
|
||||
ann_file='annotations/test.json',
|
||||
data_prefix=dict(img='images/')))
|
||||
|
||||
test_dataloader = val_dataloader
|
||||
|
||||
_base_.optim_wrapper.optimizer.batch_size_per_gpu = train_batch_size_per_gpu
|
||||
|
||||
val_evaluator = dict(ann_file=data_root + 'annotations/test.json')
|
||||
test_evaluator = val_evaluator
|
||||
|
||||
default_hooks = dict(
|
||||
checkpoint=dict(interval=10, max_keep_ckpts=2, save_best='auto'),
|
||||
# The warmup_mim_iter parameter is critical.
|
||||
# The default value is 1000 which is not suitable for cat datasets.
|
||||
param_scheduler=dict(max_epochs=max_epochs, warmup_mim_iter=10),
|
||||
logger=dict(type='LoggerHook', interval=5))
|
||||
train_cfg = dict(max_epochs=max_epochs, val_interval=10)
|
||||
# visualizer = dict(vis_backends = [dict(type='LocalVisBackend'), dict(type='WandbVisBackend')]) # noqa
|
|
@ -1,9 +1,7 @@
|
|||
_base_ = 'yolov5_s-v61_syncbn_8xb16-300e_coco.py'
|
||||
|
||||
test_pipeline = [
|
||||
dict(
|
||||
type='LoadImageFromFile',
|
||||
file_client_args={{_base_.file_client_args}}),
|
||||
dict(type='LoadImageFromFile', file_client_args=_base_.file_client_args),
|
||||
dict(
|
||||
type='LetterResize',
|
||||
scale=_base_.img_scale,
|
||||
|
|
|
@ -1,47 +1,95 @@
|
|||
_base_ = '../_base_/default_runtime.py'
|
||||
_base_ = ['../_base_/default_runtime.py', '../_base_/det_p5_tta.py']
|
||||
|
||||
# dataset settings
|
||||
data_root = 'data/coco/'
|
||||
dataset_type = 'YOLOv5CocoDataset'
|
||||
# ========================Frequently modified parameters======================
|
||||
# -----data related-----
|
||||
data_root = 'data/coco/' # Root path of data
|
||||
# Path of train annotation file
|
||||
train_ann_file = 'annotations/instances_train2017.json'
|
||||
train_data_prefix = 'train2017/' # Prefix of train image path
|
||||
# Path of val annotation file
|
||||
val_ann_file = 'annotations/instances_val2017.json'
|
||||
val_data_prefix = 'val2017/' # Prefix of val image path
|
||||
|
||||
# parameters that often need to be modified
|
||||
num_classes = 80
|
||||
img_scale = (640, 640) # width, height
|
||||
deepen_factor = 0.33
|
||||
widen_factor = 0.5
|
||||
max_epochs = 300
|
||||
save_epoch_intervals = 10
|
||||
num_classes = 80 # Number of classes for classification
|
||||
# Batch size of a single GPU during training
|
||||
train_batch_size_per_gpu = 16
|
||||
# Worker to pre-fetch data for each single GPU during training
|
||||
train_num_workers = 8
|
||||
val_batch_size_per_gpu = 1
|
||||
val_num_workers = 2
|
||||
|
||||
# persistent_workers must be False if num_workers is 0.
|
||||
# persistent_workers must be False if num_workers is 0
|
||||
persistent_workers = True
|
||||
|
||||
# Base learning rate for optim_wrapper
|
||||
base_lr = 0.01
|
||||
|
||||
# only on Val
|
||||
batch_shapes_cfg = dict(
|
||||
type='BatchShapePolicy',
|
||||
batch_size=val_batch_size_per_gpu,
|
||||
img_size=img_scale[0],
|
||||
size_divisor=32,
|
||||
extra_pad_ratio=0.5)
|
||||
|
||||
# -----model related-----
|
||||
# Basic size of multi-scale prior box
|
||||
anchors = [
|
||||
[(10, 13), (16, 30), (33, 23)], # P3/8
|
||||
[(30, 61), (62, 45), (59, 119)], # P4/16
|
||||
[(116, 90), (156, 198), (373, 326)] # P5/32
|
||||
]
|
||||
strides = [8, 16, 32]
|
||||
num_det_layers = 3
|
||||
|
||||
# single-scale training is recommended to
|
||||
# -----train val related-----
|
||||
# Base learning rate for optim_wrapper. Corresponding to 8xb16=128 bs
|
||||
base_lr = 0.01
|
||||
max_epochs = 300 # Maximum training epochs
|
||||
|
||||
model_test_cfg = dict(
|
||||
# The config of multi-label for multi-class prediction.
|
||||
multi_label=True,
|
||||
# The number of boxes before NMS
|
||||
nms_pre=30000,
|
||||
score_thr=0.001, # Threshold to filter out boxes.
|
||||
nms=dict(type='nms', iou_threshold=0.65), # NMS type and threshold
|
||||
max_per_img=300) # Max number of detections of each image
|
||||
|
||||
# ========================Possible modified parameters========================
|
||||
# -----data related-----
|
||||
img_scale = (640, 640) # width, height
|
||||
# Dataset type, this will be used to define the dataset
|
||||
dataset_type = 'YOLOv5CocoDataset'
|
||||
# Batch size of a single GPU during validation
|
||||
val_batch_size_per_gpu = 1
|
||||
# Worker to pre-fetch data for each single GPU during validation
|
||||
val_num_workers = 2
|
||||
|
||||
# Config of batch shapes. Only on val.
|
||||
# It means not used if batch_shapes_cfg is None.
|
||||
batch_shapes_cfg = dict(
|
||||
type='BatchShapePolicy',
|
||||
batch_size=val_batch_size_per_gpu,
|
||||
img_size=img_scale[0],
|
||||
# The image scale of padding should be divided by pad_size_divisor
|
||||
size_divisor=32,
|
||||
# Additional paddings for pixel scale
|
||||
extra_pad_ratio=0.5)
|
||||
|
||||
# -----model related-----
|
||||
# The scaling factor that controls the depth of the network structure
|
||||
deepen_factor = 0.33
|
||||
# The scaling factor that controls the width of the network structure
|
||||
widen_factor = 0.5
|
||||
# Strides of multi-scale prior box
|
||||
strides = [8, 16, 32]
|
||||
num_det_layers = 3 # The number of model output scales
|
||||
norm_cfg = dict(type='BN', momentum=0.03, eps=0.001) # Normalization config
|
||||
|
||||
# -----train val related-----
|
||||
affine_scale = 0.5 # YOLOv5RandomAffine scaling ratio
|
||||
loss_cls_weight = 0.5
|
||||
loss_bbox_weight = 0.05
|
||||
loss_obj_weight = 1.0
|
||||
prior_match_thr = 4. # Priori box matching threshold
|
||||
# The obj loss weights of the three output layers
|
||||
obj_level_weights = [4., 1., 0.4]
|
||||
lr_factor = 0.01 # Learning rate scaling factor
|
||||
weight_decay = 0.0005
|
||||
# Save model checkpoint and validation intervals
|
||||
save_checkpoint_intervals = 10
|
||||
# The maximum checkpoints to keep.
|
||||
max_keep_ckpts = 3
|
||||
# Single-scale training is recommended to
|
||||
# be turned on, which can speed up training.
|
||||
env_cfg = dict(cudnn_benchmark=True)
|
||||
|
||||
# ===============================Unmodified in most cases====================
|
||||
model = dict(
|
||||
type='YOLODetector',
|
||||
data_preprocessor=dict(
|
||||
|
@ -53,7 +101,7 @@ model = dict(
|
|||
type='YOLOv5CSPDarknet',
|
||||
deepen_factor=deepen_factor,
|
||||
widen_factor=widen_factor,
|
||||
norm_cfg=dict(type='BN', momentum=0.03, eps=0.001),
|
||||
norm_cfg=norm_cfg,
|
||||
act_cfg=dict(type='SiLU', inplace=True)),
|
||||
neck=dict(
|
||||
type='YOLOv5PAFPN',
|
||||
|
@ -62,7 +110,7 @@ model = dict(
|
|||
in_channels=[256, 512, 1024],
|
||||
out_channels=[256, 512, 1024],
|
||||
num_csp_blocks=3,
|
||||
norm_cfg=dict(type='BN', momentum=0.03, eps=0.001),
|
||||
norm_cfg=norm_cfg,
|
||||
act_cfg=dict(type='SiLU', inplace=True)),
|
||||
bbox_head=dict(
|
||||
type='YOLOv5Head',
|
||||
|
@ -82,28 +130,25 @@ model = dict(
|
|||
type='mmdet.CrossEntropyLoss',
|
||||
use_sigmoid=True,
|
||||
reduction='mean',
|
||||
loss_weight=0.5 * (num_classes / 80 * 3 / num_det_layers)),
|
||||
loss_weight=loss_cls_weight *
|
||||
(num_classes / 80 * 3 / num_det_layers)),
|
||||
loss_bbox=dict(
|
||||
type='IoULoss',
|
||||
iou_mode='ciou',
|
||||
bbox_format='xywh',
|
||||
eps=1e-7,
|
||||
reduction='mean',
|
||||
loss_weight=0.05 * (3 / num_det_layers),
|
||||
loss_weight=loss_bbox_weight * (3 / num_det_layers),
|
||||
return_iou=True),
|
||||
loss_obj=dict(
|
||||
type='mmdet.CrossEntropyLoss',
|
||||
use_sigmoid=True,
|
||||
reduction='mean',
|
||||
loss_weight=1.0 * ((img_scale[0] / 640)**2 * 3 / num_det_layers)),
|
||||
prior_match_thr=4.,
|
||||
obj_level_weights=[4., 1., 0.4]),
|
||||
test_cfg=dict(
|
||||
multi_label=True,
|
||||
nms_pre=30000,
|
||||
score_thr=0.001,
|
||||
nms=dict(type='nms', iou_threshold=0.65),
|
||||
max_per_img=300))
|
||||
loss_weight=loss_obj_weight *
|
||||
((img_scale[0] / 640)**2 * 3 / num_det_layers)),
|
||||
prior_match_thr=prior_match_thr,
|
||||
obj_level_weights=obj_level_weights),
|
||||
test_cfg=model_test_cfg)
|
||||
|
||||
albu_train_transforms = [
|
||||
dict(type='Blur', p=0.01),
|
||||
|
@ -128,7 +173,7 @@ train_pipeline = [
|
|||
type='YOLOv5RandomAffine',
|
||||
max_rotate_degree=0.0,
|
||||
max_shear_degree=0.0,
|
||||
scaling_ratio_range=(0.5, 1.5),
|
||||
scaling_ratio_range=(1 - affine_scale, 1 + affine_scale),
|
||||
# img_scale is (width, height)
|
||||
border=(-img_scale[0] // 2, -img_scale[1] // 2),
|
||||
border_val=(114, 114, 114)),
|
||||
|
@ -160,8 +205,8 @@ train_dataloader = dict(
|
|||
dataset=dict(
|
||||
type=dataset_type,
|
||||
data_root=data_root,
|
||||
ann_file='annotations/instances_train2017.json',
|
||||
data_prefix=dict(img='train2017/'),
|
||||
ann_file=train_ann_file,
|
||||
data_prefix=dict(img=train_data_prefix),
|
||||
filter_cfg=dict(filter_empty_gt=False, min_size=32),
|
||||
pipeline=train_pipeline))
|
||||
|
||||
|
@ -191,8 +236,8 @@ val_dataloader = dict(
|
|||
type=dataset_type,
|
||||
data_root=data_root,
|
||||
test_mode=True,
|
||||
data_prefix=dict(img='val2017/'),
|
||||
ann_file='annotations/instances_val2017.json',
|
||||
data_prefix=dict(img=val_data_prefix),
|
||||
ann_file=val_ann_file,
|
||||
pipeline=test_pipeline,
|
||||
batch_shapes_cfg=batch_shapes_cfg))
|
||||
|
||||
|
@ -205,7 +250,7 @@ optim_wrapper = dict(
|
|||
type='SGD',
|
||||
lr=base_lr,
|
||||
momentum=0.937,
|
||||
weight_decay=0.0005,
|
||||
weight_decay=weight_decay,
|
||||
nesterov=True,
|
||||
batch_size_per_gpu=train_batch_size_per_gpu),
|
||||
constructor='YOLOv5OptimizerConstructor')
|
||||
|
@ -214,13 +259,13 @@ default_hooks = dict(
|
|||
param_scheduler=dict(
|
||||
type='YOLOv5ParamSchedulerHook',
|
||||
scheduler_type='linear',
|
||||
lr_factor=0.01,
|
||||
lr_factor=lr_factor,
|
||||
max_epochs=max_epochs),
|
||||
checkpoint=dict(
|
||||
type='CheckpointHook',
|
||||
interval=save_epoch_intervals,
|
||||
interval=save_checkpoint_intervals,
|
||||
save_best='auto',
|
||||
max_keep_ckpts=3))
|
||||
max_keep_ckpts=max_keep_ckpts))
|
||||
|
||||
custom_hooks = [
|
||||
dict(
|
||||
|
@ -235,13 +280,13 @@ custom_hooks = [
|
|||
val_evaluator = dict(
|
||||
type='mmdet.CocoMetric',
|
||||
proposal_nums=(100, 1, 10),
|
||||
ann_file=data_root + 'annotations/instances_val2017.json',
|
||||
ann_file=data_root + val_ann_file,
|
||||
metric='bbox')
|
||||
test_evaluator = val_evaluator
|
||||
|
||||
train_cfg = dict(
|
||||
type='EpochBasedTrainLoop',
|
||||
max_epochs=max_epochs,
|
||||
val_interval=save_epoch_intervals)
|
||||
val_interval=save_checkpoint_intervals)
|
||||
val_cfg = dict(type='ValLoop')
|
||||
test_cfg = dict(type='TestLoop')
|
||||
|
|
|
@ -1,39 +1,42 @@
|
|||
_base_ = './yolov5_s-v61_syncbn_fast_8xb16-300e_coco.py'
|
||||
|
||||
# ========================modified parameters======================
|
||||
data_root = 'data/balloon/'
|
||||
|
||||
train_batch_size_per_gpu = 4
|
||||
train_num_workers = 2
|
||||
|
||||
# Path of train annotation file
|
||||
train_ann_file = 'train.json'
|
||||
train_data_prefix = 'train/' # Prefix of train image path
|
||||
# Path of val annotation file
|
||||
val_ann_file = 'val.json'
|
||||
val_data_prefix = 'val/' # Prefix of val image path
|
||||
metainfo = {
|
||||
'classes': ('balloon', ),
|
||||
'palette': [
|
||||
(220, 20, 60),
|
||||
]
|
||||
}
|
||||
num_classes = 1
|
||||
|
||||
train_batch_size_per_gpu = 4
|
||||
train_num_workers = 2
|
||||
log_interval = 1
|
||||
|
||||
# =======================Unmodified in most cases==================
|
||||
train_dataloader = dict(
|
||||
batch_size=train_batch_size_per_gpu,
|
||||
num_workers=train_num_workers,
|
||||
dataset=dict(
|
||||
data_root=data_root,
|
||||
metainfo=metainfo,
|
||||
data_prefix=dict(img='train/'),
|
||||
ann_file='train.json'))
|
||||
|
||||
data_prefix=dict(img=train_data_prefix),
|
||||
ann_file=train_ann_file))
|
||||
val_dataloader = dict(
|
||||
dataset=dict(
|
||||
data_root=data_root,
|
||||
metainfo=metainfo,
|
||||
data_prefix=dict(img='val/'),
|
||||
ann_file='val.json'))
|
||||
|
||||
data_prefix=dict(img=val_data_prefix),
|
||||
ann_file=val_ann_file))
|
||||
test_dataloader = val_dataloader
|
||||
|
||||
val_evaluator = dict(ann_file=data_root + 'val.json')
|
||||
|
||||
val_evaluator = dict(ann_file=data_root + val_ann_file)
|
||||
test_evaluator = val_evaluator
|
||||
|
||||
model = dict(bbox_head=dict(head_module=dict(num_classes=1)))
|
||||
|
||||
default_hooks = dict(logger=dict(interval=1))
|
||||
model = dict(bbox_head=dict(head_module=dict(num_classes=num_classes)))
|
||||
default_hooks = dict(logger=dict(interval=log_interval))
|
||||
|
|
|
@ -31,7 +31,7 @@ YOLOv6-l model structure
|
|||
| YOLOv6-n | P5 | 640 | 400 | Yes | Yes | 6.04 | 36.2 | [config](../yolov6/yolov6_n_syncbn_fast_8xb32-400e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov6/yolov6_n_syncbn_fast_8xb32-400e_coco/yolov6_n_syncbn_fast_8xb32-400e_coco_20221030_202726-d99b2e82.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov6/yolov6_n_syncbn_fast_8xb32-400e_coco/yolov6_n_syncbn_fast_8xb32-400e_coco_20221030_202726.log.json) |
|
||||
| YOLOv6-t | P5 | 640 | 400 | Yes | Yes | 8.13 | 41.0 | [config](../yolov6/yolov6_t_syncbn_fast_8xb32-400e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov6/yolov6_t_syncbn_fast_8xb32-400e_coco/yolov6_t_syncbn_fast_8xb32-400e_coco_20221030_143755-cf0d278f.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov6/yolov6_t_syncbn_fast_8xb32-400e_coco/yolov6_t_syncbn_fast_8xb32-400e_coco_20221030_143755.log.json) |
|
||||
| YOLOv6-s | P5 | 640 | 400 | Yes | Yes | 8.88 | 44.0 | [config](../yolov6/yolov6_s_syncbn_fast_8xb32-400e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov6/yolov6_s_syncbn_fast_8xb32-400e_coco/yolov6_s_syncbn_fast_8xb32-400e_coco_20221102_203035-932e1d91.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov6/yolov6_s_syncbn_fast_8xb32-400e_coco/yolov6_s_syncbn_fast_8xb32-400e_coco_20221102_203035.log.json) |
|
||||
| YOLOv6-m | P5 | 640 | 300 | Yes | Yes | 16.69 | 48.4 | [config](../yolov6/yolov6_m_syncbn_fast_8xb32-400e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov6/yolov6_m_syncbn_fast_8xb32-300e_coco/yolov6_m_syncbn_fast_8xb32-300e_coco_20221109_182658-85bda3f4.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov6/yolov6_m_syncbn_fast_8xb32-300e_coco/yolov6_m_syncbn_fast_8xb32-300e_coco_20221109_182658.log.json) |
|
||||
| YOLOv6-m | P5 | 640 | 300 | Yes | Yes | 16.69 | 48.4 | [config](../yolov6/yolov6_m_syncbn_fast_8xb32-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov6/yolov6_m_syncbn_fast_8xb32-300e_coco/yolov6_m_syncbn_fast_8xb32-300e_coco_20221109_182658-85bda3f4.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov6/yolov6_m_syncbn_fast_8xb32-300e_coco/yolov6_m_syncbn_fast_8xb32-300e_coco_20221109_182658.log.json) |
|
||||
| YOLOv6-l | P5 | 640 | 300 | Yes | Yes | 20.86 | 51.0 | [config](../yolov6/yolov6_l_syncbn_fast_8xb32-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov6/yolov6_l_syncbn_fast_8xb32-300e_coco/yolov6_l_syncbn_fast_8xb32-300e_coco_20221109_183156-91e3c447.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov6/yolov6_l_syncbn_fast_8xb32-300e_coco/yolov6_l_syncbn_fast_8xb32-300e_coco_20221109_183156.log.json) |
|
||||
|
||||
**Note**:
|
||||
|
|
|
@ -1,8 +1,13 @@
|
|||
_base_ = './yolov6_m_syncbn_fast_8xb32-300e_coco.py'
|
||||
|
||||
# ======================= Possible modified parameters =======================
|
||||
# -----model related-----
|
||||
# The scaling factor that controls the depth of the network structure
|
||||
deepen_factor = 1
|
||||
# The scaling factor that controls the width of the network structure
|
||||
widen_factor = 1
|
||||
|
||||
# ============================== Unmodified in most cases ===================
|
||||
model = dict(
|
||||
backbone=dict(
|
||||
deepen_factor=deepen_factor,
|
||||
|
|
|
@ -1,9 +1,16 @@
|
|||
_base_ = './yolov6_s_syncbn_fast_8xb32-300e_coco.py'
|
||||
|
||||
# ======================= Possible modified parameters =======================
|
||||
# -----model related-----
|
||||
# The scaling factor that controls the depth of the network structure
|
||||
deepen_factor = 0.6
|
||||
# The scaling factor that controls the width of the network structure
|
||||
widen_factor = 0.75
|
||||
affine_scale = 0.9
|
||||
|
||||
# -----train val related-----
|
||||
affine_scale = 0.9 # YOLOv5RandomAffine scaling ratio
|
||||
|
||||
# ============================== Unmodified in most cases ===================
|
||||
model = dict(
|
||||
backbone=dict(
|
||||
type='YOLOv6CSPBep',
|
||||
|
|
|
@ -1,8 +1,16 @@
|
|||
_base_ = './yolov6_s_syncbn_fast_8xb32-300e_coco.py'
|
||||
|
||||
# ======================= Possible modified parameters =======================
|
||||
# -----model related-----
|
||||
# The scaling factor that controls the depth of the network structure
|
||||
deepen_factor = 0.33
|
||||
# The scaling factor that controls the width of the network structure
|
||||
widen_factor = 0.25
|
||||
|
||||
# -----train val related-----
|
||||
lr_factor = 0.02 # Learning rate scaling factor
|
||||
|
||||
# ============================== Unmodified in most cases ===================
|
||||
model = dict(
|
||||
backbone=dict(deepen_factor=deepen_factor, widen_factor=widen_factor),
|
||||
neck=dict(deepen_factor=deepen_factor, widen_factor=widen_factor),
|
||||
|
@ -10,4 +18,4 @@ model = dict(
|
|||
head_module=dict(widen_factor=widen_factor),
|
||||
loss_bbox=dict(iou_mode='siou')))
|
||||
|
||||
default_hooks = dict(param_scheduler=dict(lr_factor=0.02))
|
||||
default_hooks = dict(param_scheduler=dict(lr_factor=lr_factor))
|
||||
|
|
|
@ -1,8 +1,16 @@
|
|||
_base_ = './yolov6_s_syncbn_fast_8xb32-400e_coco.py'
|
||||
|
||||
# ======================= Possible modified parameters =======================
|
||||
# -----model related-----
|
||||
# The scaling factor that controls the depth of the network structure
|
||||
deepen_factor = 0.33
|
||||
# The scaling factor that controls the width of the network structure
|
||||
widen_factor = 0.25
|
||||
|
||||
# -----train val related-----
|
||||
lr_factor = 0.02 # Learning rate scaling factor
|
||||
|
||||
# ============================== Unmodified in most cases ===================
|
||||
model = dict(
|
||||
backbone=dict(deepen_factor=deepen_factor, widen_factor=widen_factor),
|
||||
neck=dict(deepen_factor=deepen_factor, widen_factor=widen_factor),
|
||||
|
@ -10,4 +18,4 @@ model = dict(
|
|||
head_module=dict(widen_factor=widen_factor),
|
||||
loss_bbox=dict(iou_mode='siou')))
|
||||
|
||||
default_hooks = dict(param_scheduler=dict(lr_factor=0.02))
|
||||
default_hooks = dict(param_scheduler=dict(lr_factor=lr_factor))
|
||||
|
|
|
@ -0,0 +1,56 @@
|
|||
_base_ = './yolov6_s_syncbn_fast_8xb32-400e_coco.py'
|
||||
|
||||
data_root = './data/cat/'
|
||||
class_name = ('cat', )
|
||||
num_classes = len(class_name)
|
||||
metainfo = dict(classes=class_name, palette=[(20, 220, 60)])
|
||||
|
||||
max_epochs = 40
|
||||
train_batch_size_per_gpu = 12
|
||||
train_num_workers = 4
|
||||
num_last_epochs = 5
|
||||
|
||||
load_from = 'https://download.openmmlab.com/mmyolo/v0/yolov6/yolov6_s_syncbn_fast_8xb32-400e_coco/yolov6_s_syncbn_fast_8xb32-400e_coco_20221102_203035-932e1d91.pth' # noqa
|
||||
|
||||
model = dict(
|
||||
backbone=dict(frozen_stages=4),
|
||||
bbox_head=dict(head_module=dict(num_classes=num_classes)),
|
||||
train_cfg=dict(
|
||||
initial_assigner=dict(num_classes=num_classes),
|
||||
assigner=dict(num_classes=num_classes)))
|
||||
|
||||
train_dataloader = dict(
|
||||
batch_size=train_batch_size_per_gpu,
|
||||
num_workers=train_num_workers,
|
||||
dataset=dict(
|
||||
data_root=data_root,
|
||||
metainfo=metainfo,
|
||||
ann_file='annotations/trainval.json',
|
||||
data_prefix=dict(img='images/')))
|
||||
|
||||
val_dataloader = dict(
|
||||
dataset=dict(
|
||||
metainfo=metainfo,
|
||||
data_root=data_root,
|
||||
ann_file='annotations/test.json',
|
||||
data_prefix=dict(img='images/')))
|
||||
|
||||
test_dataloader = val_dataloader
|
||||
|
||||
val_evaluator = dict(ann_file=data_root + 'annotations/test.json')
|
||||
test_evaluator = val_evaluator
|
||||
|
||||
_base_.optim_wrapper.optimizer.batch_size_per_gpu = train_batch_size_per_gpu
|
||||
_base_.custom_hooks[1].switch_epoch = max_epochs - num_last_epochs
|
||||
|
||||
default_hooks = dict(
|
||||
checkpoint=dict(interval=10, max_keep_ckpts=2, save_best='auto'),
|
||||
# The warmup_mim_iter parameter is critical.
|
||||
# The default value is 1000 which is not suitable for cat datasets.
|
||||
param_scheduler=dict(max_epochs=max_epochs, warmup_mim_iter=10),
|
||||
logger=dict(type='LoggerHook', interval=5))
|
||||
train_cfg = dict(
|
||||
max_epochs=max_epochs,
|
||||
val_interval=10,
|
||||
dynamic_intervals=[(max_epochs - num_last_epochs, 1)])
|
||||
# visualizer = dict(vis_backends = [dict(type='LocalVisBackend'), dict(type='WandbVisBackend')]) # noqa
|
|
@ -1,8 +1,12 @@
|
|||
_base_ = './yolov6_s_syncbn_fast_8xb32-400e_coco.py'
|
||||
|
||||
max_epochs = 300
|
||||
num_last_epochs = 15
|
||||
# ======================= Frequently modified parameters =====================
|
||||
# -----train val related-----
|
||||
# Base learning rate for optim_wrapper
|
||||
max_epochs = 300 # Maximum training epochs
|
||||
num_last_epochs = 15 # Last epoch number to switch training pipeline
|
||||
|
||||
# ============================== Unmodified in most cases ===================
|
||||
default_hooks = dict(
|
||||
param_scheduler=dict(
|
||||
type='YOLOv5ParamSchedulerHook',
|
||||
|
|
|
@ -1,31 +1,41 @@
|
|||
_base_ = '../_base_/default_runtime.py'
|
||||
_base_ = ['../_base_/default_runtime.py', '../_base_/det_p5_tta.py']
|
||||
|
||||
# dataset settings
|
||||
data_root = 'data/coco/'
|
||||
dataset_type = 'YOLOv5CocoDataset'
|
||||
# ======================= Frequently modified parameters =====================
|
||||
# -----data related-----
|
||||
data_root = 'data/coco/' # Root path of data
|
||||
# Path of train annotation file
|
||||
train_ann_file = 'annotations/instances_train2017.json'
|
||||
train_data_prefix = 'train2017/' # Prefix of train image path
|
||||
# Path of val annotation file
|
||||
val_ann_file = 'annotations/instances_val2017.json'
|
||||
val_data_prefix = 'val2017/' # Prefix of val image path
|
||||
|
||||
num_last_epochs = 15
|
||||
max_epochs = 400
|
||||
num_classes = 80
|
||||
|
||||
# parameters that often need to be modified
|
||||
img_scale = (640, 640) # width, height
|
||||
deepen_factor = 0.33
|
||||
widen_factor = 0.5
|
||||
affine_scale = 0.5
|
||||
save_epoch_intervals = 10
|
||||
num_classes = 80 # Number of classes for classification
|
||||
# Batch size of a single GPU during training
|
||||
train_batch_size_per_gpu = 32
|
||||
# Worker to pre-fetch data for each single GPU during training
|
||||
train_num_workers = 8
|
||||
val_batch_size_per_gpu = 1
|
||||
val_num_workers = 2
|
||||
|
||||
# persistent_workers must be False if num_workers is 0.
|
||||
# persistent_workers must be False if num_workers is 0
|
||||
persistent_workers = True
|
||||
|
||||
# -----train val related-----
|
||||
# Base learning rate for optim_wrapper
|
||||
base_lr = 0.01
|
||||
max_epochs = 400 # Maximum training epochs
|
||||
num_last_epochs = 15 # Last epoch number to switch training pipeline
|
||||
|
||||
# only on Val
|
||||
# ======================= Possible modified parameters =======================
|
||||
# -----data related-----
|
||||
img_scale = (640, 640) # width, height
|
||||
# Dataset type, this will be used to define the dataset
|
||||
dataset_type = 'YOLOv5CocoDataset'
|
||||
# Batch size of a single GPU during validation
|
||||
val_batch_size_per_gpu = 1
|
||||
# Worker to pre-fetch data for each single GPU during validation
|
||||
val_num_workers = 2
|
||||
|
||||
# Config of batch shapes. Only on val.
|
||||
# It means not used if batch_shapes_cfg is None.
|
||||
batch_shapes_cfg = dict(
|
||||
type='BatchShapePolicy',
|
||||
batch_size=val_batch_size_per_gpu,
|
||||
|
@ -33,10 +43,25 @@ batch_shapes_cfg = dict(
|
|||
size_divisor=32,
|
||||
extra_pad_ratio=0.5)
|
||||
|
||||
# single-scale training is recommended to
|
||||
# -----model related-----
|
||||
# The scaling factor that controls the depth of the network structure
|
||||
deepen_factor = 0.33
|
||||
# The scaling factor that controls the width of the network structure
|
||||
widen_factor = 0.5
|
||||
|
||||
# -----train val related-----
|
||||
affine_scale = 0.5 # YOLOv5RandomAffine scaling ratio
|
||||
lr_factor = 0.01 # Learning rate scaling factor
|
||||
weight_decay = 0.0005
|
||||
# Save model checkpoint and validation intervals
|
||||
save_epoch_intervals = 10
|
||||
# The maximum checkpoints to keep.
|
||||
max_keep_ckpts = 3
|
||||
# Single-scale training is recommended to
|
||||
# be turned on, which can speed up training.
|
||||
env_cfg = dict(cudnn_benchmark=True)
|
||||
|
||||
# ============================== Unmodified in most cases ===================
|
||||
model = dict(
|
||||
type='YOLODetector',
|
||||
data_preprocessor=dict(
|
||||
|
@ -162,8 +187,8 @@ train_dataloader = dict(
|
|||
dataset=dict(
|
||||
type=dataset_type,
|
||||
data_root=data_root,
|
||||
ann_file='annotations/instances_train2017.json',
|
||||
data_prefix=dict(img='train2017/'),
|
||||
ann_file=train_ann_file,
|
||||
data_prefix=dict(img=train_data_prefix),
|
||||
filter_cfg=dict(filter_empty_gt=False, min_size=32),
|
||||
pipeline=train_pipeline))
|
||||
|
||||
|
@ -193,8 +218,8 @@ val_dataloader = dict(
|
|||
type=dataset_type,
|
||||
data_root=data_root,
|
||||
test_mode=True,
|
||||
data_prefix=dict(img='val2017/'),
|
||||
ann_file='annotations/instances_val2017.json',
|
||||
data_prefix=dict(img=val_data_prefix),
|
||||
ann_file=val_ann_file,
|
||||
pipeline=test_pipeline,
|
||||
batch_shapes_cfg=batch_shapes_cfg))
|
||||
|
||||
|
@ -208,7 +233,7 @@ optim_wrapper = dict(
|
|||
type='SGD',
|
||||
lr=base_lr,
|
||||
momentum=0.937,
|
||||
weight_decay=0.0005,
|
||||
weight_decay=weight_decay,
|
||||
nesterov=True,
|
||||
batch_size_per_gpu=train_batch_size_per_gpu),
|
||||
constructor='YOLOv5OptimizerConstructor')
|
||||
|
@ -217,12 +242,12 @@ default_hooks = dict(
|
|||
param_scheduler=dict(
|
||||
type='YOLOv5ParamSchedulerHook',
|
||||
scheduler_type='cosine',
|
||||
lr_factor=0.01,
|
||||
lr_factor=lr_factor,
|
||||
max_epochs=max_epochs),
|
||||
checkpoint=dict(
|
||||
type='CheckpointHook',
|
||||
interval=save_epoch_intervals,
|
||||
max_keep_ckpts=3,
|
||||
max_keep_ckpts=max_keep_ckpts,
|
||||
save_best='auto'))
|
||||
|
||||
custom_hooks = [
|
||||
|
@ -242,7 +267,7 @@ custom_hooks = [
|
|||
val_evaluator = dict(
|
||||
type='mmdet.CocoMetric',
|
||||
proposal_nums=(100, 1, 10),
|
||||
ann_file=data_root + 'annotations/instances_val2017.json',
|
||||
ann_file=data_root + val_ann_file,
|
||||
metric='bbox')
|
||||
test_evaluator = val_evaluator
|
||||
|
||||
|
|
|
@ -1,8 +1,13 @@
|
|||
_base_ = './yolov6_s_syncbn_fast_8xb32-300e_coco.py'
|
||||
|
||||
# ======================= Possible modified parameters =======================
|
||||
# -----model related-----
|
||||
# The scaling factor that controls the depth of the network structure
|
||||
deepen_factor = 0.33
|
||||
# The scaling factor that controls the width of the network structure
|
||||
widen_factor = 0.375
|
||||
|
||||
# ============================== Unmodified in most cases ===================
|
||||
model = dict(
|
||||
backbone=dict(deepen_factor=deepen_factor, widen_factor=widen_factor),
|
||||
neck=dict(deepen_factor=deepen_factor, widen_factor=widen_factor),
|
||||
|
|
|
@ -1,8 +1,13 @@
|
|||
_base_ = './yolov6_s_syncbn_fast_8xb32-400e_coco.py'
|
||||
|
||||
# ======================= Possible modified parameters =======================
|
||||
# -----model related-----
|
||||
# The scaling factor that controls the depth of the network structure
|
||||
deepen_factor = 0.33
|
||||
# The scaling factor that controls the width of the network structure
|
||||
widen_factor = 0.375
|
||||
|
||||
# ============================== Unmodified in most cases ===================
|
||||
model = dict(
|
||||
backbone=dict(deepen_factor=deepen_factor, widen_factor=widen_factor),
|
||||
neck=dict(deepen_factor=deepen_factor, widen_factor=widen_factor),
|
||||
|
|
|
@ -12,17 +12,22 @@ YOLOv7 surpasses all known object detectors in both speed and accuracy in the ra
|
|||
<img src="https://user-images.githubusercontent.com/17425982/204231759-cc5c77a9-38c6-4a41-85be-eb97e4b2bcbb.png"/>
|
||||
</div>
|
||||
|
||||
<div align=center>
|
||||
<img alt="YOLOv7-l" src="https://user-images.githubusercontent.com/68552295/216335336-963bd03a-71f3-4556-97af-18b20d69e065.png" width = 95.5%/>
|
||||
YOLOv7-l-P5 model structure
|
||||
</div>
|
||||
|
||||
## Results and models
|
||||
|
||||
### COCO
|
||||
|
||||
| Backbone | Arch | Size | SyncBN | AMP | Mem (GB) | Box AP | Config | Download |
|
||||
| :---------: | :--: | :--: | :----: | :-: | :------: | :----: | :------------------------------------------------------------------------------------------------------------------: | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
|
||||
| YOLOv7-tiny | P5 | 640 | Yes | Yes | 2.7 | 37.5 | [config](https://github.com/open-mmlab/mmyolo/tree/master/configs/yolov7/yolov7_tiny_syncbn_fast_8x16b-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov7/yolov7_tiny_syncbn_fast_8x16b-300e_coco/yolov7_tiny_syncbn_fast_8x16b-300e_coco_20221126_102719-0ee5bbdf.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov7/yolov7_tiny_syncbn_fast_8x16b-300e_coco/yolov7_tiny_syncbn_fast_8x16b-300e_coco_20221126_102719.log.json) |
|
||||
| YOLOv7-l | P5 | 640 | Yes | Yes | 10.3 | 50.9 | [config](https://github.com/open-mmlab/mmyolo/tree/master/configs/yolov7/yolov7_l_syncbn_fast_8x16b-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov7/yolov7_l_syncbn_fast_8x16b-300e_coco/yolov7_l_syncbn_fast_8x16b-300e_coco_20221123_023601-8113c0eb.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov7/yolov7_l_syncbn_fast_8x16b-300e_coco/yolov7_l_syncbn_fast_8x16b-300e_coco_20221123_023601.log.json) |
|
||||
| YOLOv7-x | P5 | 640 | Yes | Yes | 13.7 | 52.8 | [config](https://github.com/open-mmlab/mmyolo/tree/master/configs/yolov7/yolov7_x_syncbn_fast_8x16b-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov7/yolov7_x_syncbn_fast_8x16b-300e_coco/yolov7_x_syncbn_fast_8x16b-300e_coco_20221124_215331-ef949a68.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov7/yolov7_x_syncbn_fast_8x16b-300e_coco/yolov7_x_syncbn_fast_8x16b-300e_coco_20221124_215331.log.json) |
|
||||
| YOLOv7-w | P6 | 1280 | Yes | Yes | 27.0 | 54.1 | [config](https://github.com/open-mmlab/mmyolo/tree/master/configs/yolov7/yolov7_w-p6_syncbn_fast_8x16b-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov7/yolov7_w-p6_syncbn_fast_8x16b-300e_coco/yolov7_w-p6_syncbn_fast_8x16b-300e_coco_20221123_053031-a68ef9d2.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov7/yolov7_w-p6_syncbn_fast_8x16b-300e_coco/yolov7_w-p6_syncbn_fast_8x16b-300e_coco_20221123_053031.log.json) |
|
||||
| YOLOv7-e | P6 | 1280 | Yes | Yes | 42.5 | 55.1 | [config](https://github.com/open-mmlab/mmyolo/tree/master/configs/yolov7/yolov7_e-p6_syncbn_fast_8x16b-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov7/yolov7_e-p6_syncbn_fast_8x16b-300e_coco/yolov7_e-p6_syncbn_fast_8x16b-300e_coco_20221126_102636-34425033.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov7/yolov7_e-p6_syncbn_fast_8x16b-300e_coco/yolov7_e-p6_syncbn_fast_8x16b-300e_coco_20221126_102636.log.json) |
|
||||
| Backbone | Arch | Size | SyncBN | AMP | Mem (GB) | Box AP | Config | Download |
|
||||
| :---------: | :--: | :--: | :----: | :-: | :------: | :----: | :----------------------------------------------------------------------------------------------------------------: | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
|
||||
| YOLOv7-tiny | P5 | 640 | Yes | Yes | 2.7 | 37.5 | [config](https://github.com/open-mmlab/mmyolo/tree/main/configs/yolov7/yolov7_tiny_syncbn_fast_8x16b-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov7/yolov7_tiny_syncbn_fast_8x16b-300e_coco/yolov7_tiny_syncbn_fast_8x16b-300e_coco_20221126_102719-0ee5bbdf.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov7/yolov7_tiny_syncbn_fast_8x16b-300e_coco/yolov7_tiny_syncbn_fast_8x16b-300e_coco_20221126_102719.log.json) |
|
||||
| YOLOv7-l | P5 | 640 | Yes | Yes | 10.3 | 50.9 | [config](https://github.com/open-mmlab/mmyolo/tree/main/configs/yolov7/yolov7_l_syncbn_fast_8x16b-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov7/yolov7_l_syncbn_fast_8x16b-300e_coco/yolov7_l_syncbn_fast_8x16b-300e_coco_20221123_023601-8113c0eb.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov7/yolov7_l_syncbn_fast_8x16b-300e_coco/yolov7_l_syncbn_fast_8x16b-300e_coco_20221123_023601.log.json) |
|
||||
| YOLOv7-x | P5 | 640 | Yes | Yes | 13.7 | 52.8 | [config](https://github.com/open-mmlab/mmyolo/tree/main/configs/yolov7/yolov7_x_syncbn_fast_8x16b-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov7/yolov7_x_syncbn_fast_8x16b-300e_coco/yolov7_x_syncbn_fast_8x16b-300e_coco_20221124_215331-ef949a68.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov7/yolov7_x_syncbn_fast_8x16b-300e_coco/yolov7_x_syncbn_fast_8x16b-300e_coco_20221124_215331.log.json) |
|
||||
| YOLOv7-w | P6 | 1280 | Yes | Yes | 27.0 | 54.1 | [config](https://github.com/open-mmlab/mmyolo/tree/main/configs/yolov7/yolov7_w-p6_syncbn_fast_8x16b-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov7/yolov7_w-p6_syncbn_fast_8x16b-300e_coco/yolov7_w-p6_syncbn_fast_8x16b-300e_coco_20221123_053031-a68ef9d2.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov7/yolov7_w-p6_syncbn_fast_8x16b-300e_coco/yolov7_w-p6_syncbn_fast_8x16b-300e_coco_20221123_053031.log.json) |
|
||||
| YOLOv7-e | P6 | 1280 | Yes | Yes | 42.5 | 55.1 | [config](https://github.com/open-mmlab/mmyolo/tree/main/configs/yolov7/yolov7_e-p6_syncbn_fast_8x16b-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov7/yolov7_e-p6_syncbn_fast_8x16b-300e_coco/yolov7_e-p6_syncbn_fast_8x16b-300e_coco_20221126_102636-34425033.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov7/yolov7_e-p6_syncbn_fast_8x16b-300e_coco/yolov7_e-p6_syncbn_fast_8x16b-300e_coco_20221126_102636.log.json) |
|
||||
|
||||
**Note**:
|
||||
In the official YOLOv7 code, the `random_perspective` data augmentation in COCO object detection task training uses mask annotation information, which leads to higher performance. Object detection should not use mask annotation, so only box annotation information is used in `MMYOLO`. We will use the mask annotation information in the instance segmentation task.
|
||||
|
|
|
@ -1,42 +1,103 @@
|
|||
_base_ = '../_base_/default_runtime.py'
|
||||
_base_ = ['../_base_/default_runtime.py', '../_base_/det_p5_tta.py']
|
||||
|
||||
# dataset settings
|
||||
data_root = 'data/coco/'
|
||||
dataset_type = 'YOLOv5CocoDataset'
|
||||
# ========================Frequently modified parameters======================
|
||||
# -----data related-----
|
||||
data_root = 'data/coco/' # Root path of data
|
||||
# Path of train annotation file
|
||||
train_ann_file = 'annotations/instances_train2017.json'
|
||||
train_data_prefix = 'train2017/' # Prefix of train image path
|
||||
# Path of val annotation file
|
||||
val_ann_file = 'annotations/instances_val2017.json'
|
||||
val_data_prefix = 'val2017/' # Prefix of val image path
|
||||
|
||||
# parameters that often need to be modified
|
||||
img_scale = (640, 640) # width, height
|
||||
max_epochs = 300
|
||||
save_epoch_intervals = 10
|
||||
num_classes = 80 # Number of classes for classification
|
||||
# Batch size of a single GPU during training
|
||||
train_batch_size_per_gpu = 16
|
||||
# Worker to pre-fetch data for each single GPU during training
|
||||
train_num_workers = 8
|
||||
# persistent_workers must be False if num_workers is 0.
|
||||
# persistent_workers must be False if num_workers is 0
|
||||
persistent_workers = True
|
||||
val_batch_size_per_gpu = 1
|
||||
val_num_workers = 2
|
||||
|
||||
# only on Val
|
||||
batch_shapes_cfg = dict(
|
||||
type='BatchShapePolicy',
|
||||
batch_size=val_batch_size_per_gpu,
|
||||
img_size=img_scale[0],
|
||||
size_divisor=32,
|
||||
extra_pad_ratio=0.5)
|
||||
|
||||
# different from yolov5
|
||||
# -----model related-----
|
||||
# Basic size of multi-scale prior box
|
||||
anchors = [
|
||||
[(12, 16), (19, 36), (40, 28)], # P3/8
|
||||
[(36, 75), (76, 55), (72, 146)], # P4/16
|
||||
[(142, 110), (192, 243), (459, 401)] # P5/32
|
||||
]
|
||||
strides = [8, 16, 32]
|
||||
num_det_layers = 3
|
||||
num_classes = 80
|
||||
# -----train val related-----
|
||||
# Base learning rate for optim_wrapper. Corresponding to 8xb16=128 bs
|
||||
base_lr = 0.01
|
||||
max_epochs = 300 # Maximum training epochs
|
||||
|
||||
# single-scale training is recommended to
|
||||
num_epoch_stage2 = 30 # The last 30 epochs switch evaluation interval
|
||||
val_interval_stage2 = 1 # Evaluation interval
|
||||
|
||||
model_test_cfg = dict(
|
||||
# The config of multi-label for multi-class prediction.
|
||||
multi_label=True,
|
||||
# The number of boxes before NMS.
|
||||
nms_pre=30000,
|
||||
score_thr=0.001, # Threshold to filter out boxes.
|
||||
nms=dict(type='nms', iou_threshold=0.65), # NMS type and threshold
|
||||
max_per_img=300) # Max number of detections of each image
|
||||
|
||||
# ========================Possible modified parameters========================
|
||||
# -----data related-----
|
||||
img_scale = (640, 640) # width, height
|
||||
# Dataset type, this will be used to define the dataset
|
||||
dataset_type = 'YOLOv5CocoDataset'
|
||||
# Batch size of a single GPU during validation
|
||||
val_batch_size_per_gpu = 1
|
||||
# Worker to pre-fetch data for each single GPU during validation
|
||||
val_num_workers = 2
|
||||
|
||||
# Config of batch shapes. Only on val.
|
||||
# It means not used if batch_shapes_cfg is None.
|
||||
batch_shapes_cfg = dict(
|
||||
type='BatchShapePolicy',
|
||||
batch_size=val_batch_size_per_gpu,
|
||||
img_size=img_scale[0],
|
||||
# The image scale of padding should be divided by pad_size_divisor
|
||||
size_divisor=32,
|
||||
# Additional paddings for pixel scale
|
||||
extra_pad_ratio=0.5)
|
||||
|
||||
# -----model related-----
|
||||
strides = [8, 16, 32] # Strides of multi-scale prior box
|
||||
num_det_layers = 3 # The number of model output scales
|
||||
norm_cfg = dict(type='BN', momentum=0.03, eps=0.001)
|
||||
|
||||
# Data augmentation
|
||||
max_translate_ratio = 0.2 # YOLOv5RandomAffine
|
||||
scaling_ratio_range = (0.1, 2.0) # YOLOv5RandomAffine
|
||||
mixup_prob = 0.15 # YOLOv5MixUp
|
||||
randchoice_mosaic_prob = [0.8, 0.2]
|
||||
mixup_alpha = 8.0 # YOLOv5MixUp
|
||||
mixup_beta = 8.0 # YOLOv5MixUp
|
||||
|
||||
# -----train val related-----
|
||||
loss_cls_weight = 0.3
|
||||
loss_bbox_weight = 0.05
|
||||
loss_obj_weight = 0.7
|
||||
# BatchYOLOv7Assigner params
|
||||
simota_candidate_topk = 10
|
||||
simota_iou_weight = 3.0
|
||||
simota_cls_weight = 1.0
|
||||
prior_match_thr = 4. # Priori box matching threshold
|
||||
obj_level_weights = [4., 1.,
|
||||
0.4] # The obj loss weights of the three output layers
|
||||
|
||||
lr_factor = 0.1 # Learning rate scaling factor
|
||||
weight_decay = 0.0005
|
||||
save_epoch_intervals = 1 # Save model checkpoint and validation intervals
|
||||
max_keep_ckpts = 3 # The maximum checkpoints to keep.
|
||||
|
||||
# Single-scale training is recommended to
|
||||
# be turned on, which can speed up training.
|
||||
env_cfg = dict(cudnn_benchmark=True)
|
||||
|
||||
# ===============================Unmodified in most cases====================
|
||||
model = dict(
|
||||
type='YOLODetector',
|
||||
data_preprocessor=dict(
|
||||
|
@ -47,7 +108,7 @@ model = dict(
|
|||
backbone=dict(
|
||||
type='YOLOv7Backbone',
|
||||
arch='L',
|
||||
norm_cfg=dict(type='BN', momentum=0.03, eps=0.001),
|
||||
norm_cfg=norm_cfg,
|
||||
act_cfg=dict(type='SiLU', inplace=True)),
|
||||
neck=dict(
|
||||
type='YOLOv7PAFPN',
|
||||
|
@ -61,7 +122,7 @@ model = dict(
|
|||
in_channels=[512, 1024, 1024],
|
||||
# The real output channel will be multiplied by 2
|
||||
out_channels=[128, 256, 512],
|
||||
norm_cfg=dict(type='BN', momentum=0.03, eps=0.001),
|
||||
norm_cfg=norm_cfg,
|
||||
act_cfg=dict(type='SiLU', inplace=True)),
|
||||
bbox_head=dict(
|
||||
type='YOLOv7Head',
|
||||
|
@ -80,31 +141,28 @@ model = dict(
|
|||
type='mmdet.CrossEntropyLoss',
|
||||
use_sigmoid=True,
|
||||
reduction='mean',
|
||||
loss_weight=0.3 * (num_classes / 80 * 3 / num_det_layers)),
|
||||
loss_weight=loss_cls_weight *
|
||||
(num_classes / 80 * 3 / num_det_layers)),
|
||||
loss_bbox=dict(
|
||||
type='IoULoss',
|
||||
iou_mode='ciou',
|
||||
bbox_format='xywh',
|
||||
reduction='mean',
|
||||
loss_weight=0.05 * (3 / num_det_layers),
|
||||
loss_weight=loss_bbox_weight * (3 / num_det_layers),
|
||||
return_iou=True),
|
||||
loss_obj=dict(
|
||||
type='mmdet.CrossEntropyLoss',
|
||||
use_sigmoid=True,
|
||||
reduction='mean',
|
||||
loss_weight=0.7 * ((img_scale[0] / 640)**2 * 3 / num_det_layers)),
|
||||
obj_level_weights=[4., 1., 0.4],
|
||||
loss_weight=loss_obj_weight *
|
||||
((img_scale[0] / 640)**2 * 3 / num_det_layers)),
|
||||
prior_match_thr=prior_match_thr,
|
||||
obj_level_weights=obj_level_weights,
|
||||
# BatchYOLOv7Assigner params
|
||||
prior_match_thr=4.,
|
||||
simota_candidate_topk=10,
|
||||
simota_iou_weight=3.0,
|
||||
simota_cls_weight=1.0),
|
||||
test_cfg=dict(
|
||||
multi_label=True,
|
||||
nms_pre=30000,
|
||||
score_thr=0.001,
|
||||
nms=dict(type='nms', iou_threshold=0.65),
|
||||
max_per_img=300))
|
||||
simota_candidate_topk=simota_candidate_topk,
|
||||
simota_iou_weight=simota_iou_weight,
|
||||
simota_cls_weight=simota_cls_weight),
|
||||
test_cfg=model_test_cfg)
|
||||
|
||||
pre_transform = [
|
||||
dict(type='LoadImageFromFile', file_client_args=_base_.file_client_args),
|
||||
|
@ -121,8 +179,8 @@ mosiac4_pipeline = [
|
|||
type='YOLOv5RandomAffine',
|
||||
max_rotate_degree=0.0,
|
||||
max_shear_degree=0.0,
|
||||
max_translate_ratio=0.2, # note
|
||||
scaling_ratio_range=(0.1, 2.0), # note
|
||||
max_translate_ratio=max_translate_ratio, # note
|
||||
scaling_ratio_range=scaling_ratio_range, # note
|
||||
# img_scale is (width, height)
|
||||
border=(-img_scale[0] // 2, -img_scale[1] // 2),
|
||||
border_val=(114, 114, 114)),
|
||||
|
@ -138,8 +196,8 @@ mosiac9_pipeline = [
|
|||
type='YOLOv5RandomAffine',
|
||||
max_rotate_degree=0.0,
|
||||
max_shear_degree=0.0,
|
||||
max_translate_ratio=0.2, # note
|
||||
scaling_ratio_range=(0.1, 2.0), # note
|
||||
max_translate_ratio=max_translate_ratio, # note
|
||||
scaling_ratio_range=scaling_ratio_range, # note
|
||||
# img_scale is (width, height)
|
||||
border=(-img_scale[0] // 2, -img_scale[1] // 2),
|
||||
border_val=(114, 114, 114)),
|
||||
|
@ -148,16 +206,16 @@ mosiac9_pipeline = [
|
|||
randchoice_mosaic_pipeline = dict(
|
||||
type='RandomChoice',
|
||||
transforms=[mosiac4_pipeline, mosiac9_pipeline],
|
||||
prob=[0.8, 0.2])
|
||||
prob=randchoice_mosaic_prob)
|
||||
|
||||
train_pipeline = [
|
||||
*pre_transform,
|
||||
randchoice_mosaic_pipeline,
|
||||
dict(
|
||||
type='YOLOv5MixUp',
|
||||
alpha=8.0, # note
|
||||
beta=8.0, # note
|
||||
prob=0.15,
|
||||
alpha=mixup_alpha, # note
|
||||
beta=mixup_beta, # note
|
||||
prob=mixup_prob,
|
||||
pre_transform=[*pre_transform, randchoice_mosaic_pipeline]),
|
||||
dict(type='YOLOv5HSVRandomAug'),
|
||||
dict(type='mmdet.RandomFlip', prob=0.5),
|
||||
|
@ -177,8 +235,8 @@ train_dataloader = dict(
|
|||
dataset=dict(
|
||||
type=dataset_type,
|
||||
data_root=data_root,
|
||||
ann_file='annotations/instances_train2017.json',
|
||||
data_prefix=dict(img='train2017/'),
|
||||
ann_file=train_ann_file,
|
||||
data_prefix=dict(img=train_data_prefix),
|
||||
filter_cfg=dict(filter_empty_gt=False, min_size=32),
|
||||
pipeline=train_pipeline))
|
||||
|
||||
|
@ -208,8 +266,8 @@ val_dataloader = dict(
|
|||
type=dataset_type,
|
||||
data_root=data_root,
|
||||
test_mode=True,
|
||||
data_prefix=dict(img='val2017/'),
|
||||
ann_file='annotations/instances_val2017.json',
|
||||
data_prefix=dict(img=val_data_prefix),
|
||||
ann_file=val_ann_file,
|
||||
pipeline=test_pipeline,
|
||||
batch_shapes_cfg=batch_shapes_cfg))
|
||||
|
||||
|
@ -220,9 +278,9 @@ optim_wrapper = dict(
|
|||
type='OptimWrapper',
|
||||
optimizer=dict(
|
||||
type='SGD',
|
||||
lr=0.01,
|
||||
lr=base_lr,
|
||||
momentum=0.937,
|
||||
weight_decay=0.0005,
|
||||
weight_decay=weight_decay,
|
||||
nesterov=True,
|
||||
batch_size_per_gpu=train_batch_size_per_gpu),
|
||||
constructor='YOLOv7OptimWrapperConstructor')
|
||||
|
@ -231,27 +289,14 @@ default_hooks = dict(
|
|||
param_scheduler=dict(
|
||||
type='YOLOv5ParamSchedulerHook',
|
||||
scheduler_type='cosine',
|
||||
lr_factor=0.1, # note
|
||||
lr_factor=lr_factor, # note
|
||||
max_epochs=max_epochs),
|
||||
checkpoint=dict(
|
||||
type='CheckpointHook',
|
||||
save_param_scheduler=False,
|
||||
interval=1,
|
||||
interval=save_epoch_intervals,
|
||||
save_best='auto',
|
||||
max_keep_ckpts=3))
|
||||
|
||||
val_evaluator = dict(
|
||||
type='mmdet.CocoMetric',
|
||||
proposal_nums=(100, 1, 10), # Can be accelerated
|
||||
ann_file=data_root + 'annotations/instances_val2017.json',
|
||||
metric='bbox')
|
||||
test_evaluator = val_evaluator
|
||||
|
||||
train_cfg = dict(
|
||||
type='EpochBasedTrainLoop',
|
||||
max_epochs=max_epochs,
|
||||
val_interval=save_epoch_intervals,
|
||||
dynamic_intervals=[(270, 1)])
|
||||
max_keep_ckpts=max_keep_ckpts))
|
||||
|
||||
custom_hooks = [
|
||||
dict(
|
||||
|
@ -263,7 +308,17 @@ custom_hooks = [
|
|||
priority=49)
|
||||
]
|
||||
|
||||
val_evaluator = dict(
|
||||
type='mmdet.CocoMetric',
|
||||
proposal_nums=(100, 1, 10), # Can be accelerated
|
||||
ann_file=data_root + val_ann_file,
|
||||
metric='bbox')
|
||||
test_evaluator = val_evaluator
|
||||
|
||||
train_cfg = dict(
|
||||
type='EpochBasedTrainLoop',
|
||||
max_epochs=max_epochs,
|
||||
val_interval=save_epoch_intervals,
|
||||
dynamic_intervals=[(max_epochs - num_epoch_stage2, val_interval_stage2)])
|
||||
val_cfg = dict(type='ValLoop')
|
||||
test_cfg = dict(type='TestLoop')
|
||||
|
||||
# randomness = dict(seed=1, deterministic=True)
|
||||
|
|
|
@ -0,0 +1,56 @@
|
|||
_base_ = 'yolov7_tiny_syncbn_fast_8x16b-300e_coco.py'
|
||||
|
||||
data_root = './data/cat/'
|
||||
class_name = ('cat', )
|
||||
num_classes = len(class_name)
|
||||
metainfo = dict(classes=class_name, palette=[(20, 220, 60)])
|
||||
|
||||
anchors = [
|
||||
[(68, 69), (154, 91), (143, 162)], # P3/8
|
||||
[(242, 160), (189, 287), (391, 207)], # P4/16
|
||||
[(353, 337), (539, 341), (443, 432)] # P5/32
|
||||
]
|
||||
|
||||
max_epochs = 40
|
||||
train_batch_size_per_gpu = 12
|
||||
train_num_workers = 4
|
||||
|
||||
load_from = 'https://download.openmmlab.com/mmyolo/v0/yolov7/yolov7_tiny_syncbn_fast_8x16b-300e_coco/yolov7_tiny_syncbn_fast_8x16b-300e_coco_20221126_102719-0ee5bbdf.pth' # noqa
|
||||
|
||||
model = dict(
|
||||
backbone=dict(frozen_stages=4),
|
||||
bbox_head=dict(
|
||||
head_module=dict(num_classes=num_classes),
|
||||
prior_generator=dict(base_sizes=anchors)))
|
||||
|
||||
train_dataloader = dict(
|
||||
batch_size=train_batch_size_per_gpu,
|
||||
num_workers=train_num_workers,
|
||||
dataset=dict(
|
||||
data_root=data_root,
|
||||
metainfo=metainfo,
|
||||
ann_file='annotations/trainval.json',
|
||||
data_prefix=dict(img='images/')))
|
||||
|
||||
val_dataloader = dict(
|
||||
dataset=dict(
|
||||
metainfo=metainfo,
|
||||
data_root=data_root,
|
||||
ann_file='annotations/test.json',
|
||||
data_prefix=dict(img='images/')))
|
||||
|
||||
test_dataloader = val_dataloader
|
||||
|
||||
_base_.optim_wrapper.optimizer.batch_size_per_gpu = train_batch_size_per_gpu
|
||||
|
||||
val_evaluator = dict(ann_file=data_root + 'annotations/test.json')
|
||||
test_evaluator = val_evaluator
|
||||
|
||||
default_hooks = dict(
|
||||
checkpoint=dict(interval=10, max_keep_ckpts=2, save_best='auto'),
|
||||
# The warmup_mim_iter parameter is critical.
|
||||
# The default value is 1000 which is not suitable for cat datasets.
|
||||
param_scheduler=dict(max_epochs=max_epochs, warmup_mim_iter=10),
|
||||
logger=dict(type='LoggerHook', interval=5))
|
||||
train_cfg = dict(max_epochs=max_epochs, val_interval=10)
|
||||
# visualizer = dict(vis_backends = [dict(type='LocalVisBackend'), dict(type='WandbVisBackend')]) # noqa
|
|
@ -1,10 +1,26 @@
|
|||
_base_ = './yolov7_l_syncbn_fast_8x16b-300e_coco.py'
|
||||
|
||||
# ========================modified parameters========================
|
||||
|
||||
# -----model related-----
|
||||
# Data augmentation
|
||||
max_translate_ratio = 0.1 # YOLOv5RandomAffine
|
||||
scaling_ratio_range = (0.5, 1.6) # YOLOv5RandomAffine
|
||||
mixup_prob = 0.05 # YOLOv5MixUp
|
||||
randchoice_mosaic_prob = [0.8, 0.2]
|
||||
mixup_alpha = 8.0 # YOLOv5MixUp
|
||||
mixup_beta = 8.0 # YOLOv5MixUp
|
||||
|
||||
# -----train val related-----
|
||||
loss_cls_weight = 0.5
|
||||
loss_obj_weight = 1.0
|
||||
|
||||
lr_factor = 0.01 # Learning rate scaling factor
|
||||
# ===============================Unmodified in most cases====================
|
||||
num_classes = _base_.num_classes
|
||||
num_det_layers = _base_.num_det_layers
|
||||
img_scale = _base_.img_scale
|
||||
pre_transform = _base_.pre_transform
|
||||
|
||||
model = dict(
|
||||
backbone=dict(
|
||||
arch='Tiny', act_cfg=dict(type='LeakyReLU', negative_slope=0.1)),
|
||||
|
@ -18,9 +34,9 @@ model = dict(
|
|||
use_repconv_outs=False),
|
||||
bbox_head=dict(
|
||||
head_module=dict(in_channels=[128, 256, 512]),
|
||||
loss_cls=dict(loss_weight=0.5 *
|
||||
loss_cls=dict(loss_weight=loss_cls_weight *
|
||||
(num_classes / 80 * 3 / num_det_layers)),
|
||||
loss_obj=dict(loss_weight=1.0 *
|
||||
loss_obj=dict(loss_weight=loss_obj_weight *
|
||||
((img_scale[0] / 640)**2 * 3 / num_det_layers))))
|
||||
|
||||
mosiac4_pipeline = [
|
||||
|
@ -33,8 +49,8 @@ mosiac4_pipeline = [
|
|||
type='YOLOv5RandomAffine',
|
||||
max_rotate_degree=0.0,
|
||||
max_shear_degree=0.0,
|
||||
max_translate_ratio=0.1, # change
|
||||
scaling_ratio_range=(0.5, 1.6), # change
|
||||
max_translate_ratio=max_translate_ratio, # change
|
||||
scaling_ratio_range=scaling_ratio_range, # change
|
||||
# img_scale is (width, height)
|
||||
border=(-img_scale[0] // 2, -img_scale[1] // 2),
|
||||
border_val=(114, 114, 114)),
|
||||
|
@ -50,8 +66,8 @@ mosiac9_pipeline = [
|
|||
type='YOLOv5RandomAffine',
|
||||
max_rotate_degree=0.0,
|
||||
max_shear_degree=0.0,
|
||||
max_translate_ratio=0.1, # change
|
||||
scaling_ratio_range=(0.5, 1.6), # change
|
||||
max_translate_ratio=max_translate_ratio, # change
|
||||
scaling_ratio_range=scaling_ratio_range, # change
|
||||
border=(-img_scale[0] // 2, -img_scale[1] // 2),
|
||||
border_val=(114, 114, 114)),
|
||||
]
|
||||
|
@ -59,16 +75,16 @@ mosiac9_pipeline = [
|
|||
randchoice_mosaic_pipeline = dict(
|
||||
type='RandomChoice',
|
||||
transforms=[mosiac4_pipeline, mosiac9_pipeline],
|
||||
prob=[0.8, 0.2])
|
||||
prob=randchoice_mosaic_prob)
|
||||
|
||||
train_pipeline = [
|
||||
*pre_transform,
|
||||
randchoice_mosaic_pipeline,
|
||||
dict(
|
||||
type='YOLOv5MixUp',
|
||||
alpha=8.0,
|
||||
beta=8.0,
|
||||
prob=0.05, # change
|
||||
alpha=mixup_alpha,
|
||||
beta=mixup_beta,
|
||||
prob=mixup_prob, # change
|
||||
pre_transform=[*pre_transform, randchoice_mosaic_pipeline]),
|
||||
dict(type='YOLOv5HSVRandomAug'),
|
||||
dict(type='mmdet.RandomFlip', prob=0.5),
|
||||
|
@ -79,4 +95,4 @@ train_pipeline = [
|
|||
]
|
||||
|
||||
train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
|
||||
default_hooks = dict(param_scheduler=dict(lr_factor=0.01))
|
||||
default_hooks = dict(param_scheduler=dict(lr_factor=lr_factor))
|
||||
|
|
|
@ -1,18 +1,50 @@
|
|||
_base_ = './yolov7_l_syncbn_fast_8x16b-300e_coco.py'
|
||||
|
||||
# ========================modified parameters========================
|
||||
# -----data related-----
|
||||
img_scale = (1280, 1280) # height, width
|
||||
num_classes = 80
|
||||
# only on Val
|
||||
batch_shapes_cfg = dict(img_size=img_scale[0], size_divisor=64)
|
||||
num_classes = 80 # Number of classes for classification
|
||||
# Config of batch shapes. Only on val
|
||||
# It means not used if batch_shapes_cfg is None.
|
||||
batch_shapes_cfg = dict(
|
||||
img_size=img_scale[
|
||||
0], # The image scale of padding should be divided by pad_size_divisor
|
||||
size_divisor=64) # Additional paddings for pixel scale
|
||||
tta_img_scales = [(1280, 1280), (1024, 1024), (1536, 1536)]
|
||||
|
||||
# -----model related-----
|
||||
# Basic size of multi-scale prior box
|
||||
anchors = [
|
||||
[(19, 27), (44, 40), (38, 94)], # P3/8
|
||||
[(96, 68), (86, 152), (180, 137)], # P4/16
|
||||
[(140, 301), (303, 264), (238, 542)], # P5/32
|
||||
[(436, 615), (739, 380), (925, 792)] # P6/64
|
||||
]
|
||||
strides = [8, 16, 32, 64]
|
||||
num_det_layers = 4
|
||||
strides = [8, 16, 32, 64] # Strides of multi-scale prior box
|
||||
num_det_layers = 4 # # The number of model output scales
|
||||
norm_cfg = dict(type='BN', momentum=0.03, eps=0.001)
|
||||
|
||||
# Data augmentation
|
||||
max_translate_ratio = 0.2 # YOLOv5RandomAffine
|
||||
scaling_ratio_range = (0.1, 2.0) # YOLOv5RandomAffine
|
||||
mixup_prob = 0.15 # YOLOv5MixUp
|
||||
randchoice_mosaic_prob = [0.8, 0.2]
|
||||
mixup_alpha = 8.0 # YOLOv5MixUp
|
||||
mixup_beta = 8.0 # YOLOv5MixUp
|
||||
|
||||
# -----train val related-----
|
||||
loss_cls_weight = 0.3
|
||||
loss_bbox_weight = 0.05
|
||||
loss_obj_weight = 0.7
|
||||
obj_level_weights = [4.0, 1.0, 0.25, 0.06]
|
||||
simota_candidate_topk = 20
|
||||
|
||||
# The only difference between P6 and P5 in terms of
|
||||
# hyperparameters is lr_factor
|
||||
lr_factor = 0.2
|
||||
|
||||
# ===============================Unmodified in most cases====================
|
||||
pre_transform = _base_.pre_transform
|
||||
|
||||
model = dict(
|
||||
backbone=dict(arch='W', out_indices=(2, 3, 4, 5)),
|
||||
|
@ -26,19 +58,17 @@ model = dict(
|
|||
type='YOLOv7p6HeadModule',
|
||||
in_channels=[128, 256, 384, 512],
|
||||
featmap_strides=strides,
|
||||
norm_cfg=dict(type='BN', momentum=0.03, eps=0.001),
|
||||
norm_cfg=norm_cfg,
|
||||
act_cfg=dict(type='SiLU', inplace=True)),
|
||||
prior_generator=dict(base_sizes=anchors, strides=strides),
|
||||
simota_candidate_topk=20, # note
|
||||
simota_candidate_topk=simota_candidate_topk, # note
|
||||
# scaled based on number of detection layers
|
||||
loss_cls=dict(loss_weight=0.3 *
|
||||
loss_cls=dict(loss_weight=loss_cls_weight *
|
||||
(num_classes / 80 * 3 / num_det_layers)),
|
||||
loss_bbox=dict(loss_weight=0.05 * (3 / num_det_layers)),
|
||||
loss_obj=dict(loss_weight=0.7 *
|
||||
loss_bbox=dict(loss_weight=loss_bbox_weight * (3 / num_det_layers)),
|
||||
loss_obj=dict(loss_weight=loss_obj_weight *
|
||||
((img_scale[0] / 640)**2 * 3 / num_det_layers)),
|
||||
obj_level_weights=[4.0, 1.0, 0.25, 0.06]))
|
||||
|
||||
pre_transform = _base_.pre_transform
|
||||
obj_level_weights=obj_level_weights))
|
||||
|
||||
mosiac4_pipeline = [
|
||||
dict(
|
||||
|
@ -50,8 +80,8 @@ mosiac4_pipeline = [
|
|||
type='YOLOv5RandomAffine',
|
||||
max_rotate_degree=0.0,
|
||||
max_shear_degree=0.0,
|
||||
max_translate_ratio=0.2, # note
|
||||
scaling_ratio_range=(0.1, 2.0), # note
|
||||
max_translate_ratio=max_translate_ratio, # note
|
||||
scaling_ratio_range=scaling_ratio_range, # note
|
||||
# img_scale is (width, height)
|
||||
border=(-img_scale[0] // 2, -img_scale[1] // 2),
|
||||
border_val=(114, 114, 114)),
|
||||
|
@ -67,8 +97,8 @@ mosiac9_pipeline = [
|
|||
type='YOLOv5RandomAffine',
|
||||
max_rotate_degree=0.0,
|
||||
max_shear_degree=0.0,
|
||||
max_translate_ratio=0.2, # note
|
||||
scaling_ratio_range=(0.1, 2.0), # note
|
||||
max_translate_ratio=max_translate_ratio, # note
|
||||
scaling_ratio_range=scaling_ratio_range, # note
|
||||
# img_scale is (width, height)
|
||||
border=(-img_scale[0] // 2, -img_scale[1] // 2),
|
||||
border_val=(114, 114, 114)),
|
||||
|
@ -77,16 +107,16 @@ mosiac9_pipeline = [
|
|||
randchoice_mosaic_pipeline = dict(
|
||||
type='RandomChoice',
|
||||
transforms=[mosiac4_pipeline, mosiac9_pipeline],
|
||||
prob=[0.8, 0.2])
|
||||
prob=randchoice_mosaic_prob)
|
||||
|
||||
train_pipeline = [
|
||||
*pre_transform,
|
||||
randchoice_mosaic_pipeline,
|
||||
dict(
|
||||
type='YOLOv5MixUp',
|
||||
alpha=8.0, # note
|
||||
beta=8.0, # note
|
||||
prob=0.15,
|
||||
alpha=mixup_alpha, # note
|
||||
beta=mixup_beta, # note
|
||||
prob=mixup_prob,
|
||||
pre_transform=[*pre_transform, randchoice_mosaic_pipeline]),
|
||||
dict(type='YOLOv5HSVRandomAug'),
|
||||
dict(type='mmdet.RandomFlip', prob=0.5),
|
||||
|
@ -115,6 +145,38 @@ val_dataloader = dict(
|
|||
dataset=dict(pipeline=test_pipeline, batch_shapes_cfg=batch_shapes_cfg))
|
||||
test_dataloader = val_dataloader
|
||||
|
||||
# The only difference between P6 and P5 in terms of
|
||||
# hyperparameters is lr_factor
|
||||
default_hooks = dict(param_scheduler=dict(lr_factor=0.2))
|
||||
default_hooks = dict(param_scheduler=dict(lr_factor=lr_factor))
|
||||
|
||||
# Config for Test Time Augmentation. (TTA)
|
||||
_multiscale_resize_transforms = [
|
||||
dict(
|
||||
type='Compose',
|
||||
transforms=[
|
||||
dict(type='YOLOv5KeepRatioResize', scale=s),
|
||||
dict(
|
||||
type='LetterResize',
|
||||
scale=s,
|
||||
allow_scale_up=False,
|
||||
pad_val=dict(img=114))
|
||||
]) for s in tta_img_scales
|
||||
]
|
||||
|
||||
tta_pipeline = [
|
||||
dict(type='LoadImageFromFile', file_client_args=_base_.file_client_args),
|
||||
dict(
|
||||
type='TestTimeAug',
|
||||
transforms=[
|
||||
_multiscale_resize_transforms,
|
||||
[
|
||||
dict(type='mmdet.RandomFlip', prob=1.),
|
||||
dict(type='mmdet.RandomFlip', prob=0.)
|
||||
], [dict(type='mmdet.LoadAnnotations', with_bbox=True)],
|
||||
[
|
||||
dict(
|
||||
type='mmdet.PackDetInputs',
|
||||
meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
|
||||
'scale_factor', 'pad_param', 'flip',
|
||||
'flip_direction'))
|
||||
]
|
||||
])
|
||||
]
|
||||
|
|
|
@ -20,19 +20,26 @@ YOLOv8-P5 model structure
|
|||
|
||||
### COCO
|
||||
|
||||
| Backbone | Arch | size | SyncBN | AMP | Mem (GB) | box AP | Config | Download |
|
||||
| :------: | :--: | :--: | :----: | :-: | :------: | :----: | :-------------------------------------------------------------------------------------------------------: | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
|
||||
| YOLOv8-n | P5 | 640 | Yes | Yes | 2.8 | 37.2 | [config](https://github.com/open-mmlab/mmyolo/tree/dev/configs/yolov8/yolov8_n_syncbn_8xb16-500e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_n_syncbn_fast_8xb16-500e_coco/yolov8_n_syncbn_fast_8xb16-500e_coco_20230114_131804-88c11cdb.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_n_syncbn_fast_8xb16-500e_coco/yolov8_n_syncbn_fast_8xb16-500e_coco_20230114_131804.log.json) |
|
||||
| YOLOv8-s | P5 | 640 | Yes | Yes | 4.0 | 44.2 | [config](https://github.com/open-mmlab/mmyolo/tree/dev/configs/yolov8/yolov8_s_syncbn_8xb16-500e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_s_syncbn_fast_8xb16-500e_coco/yolov8_s_syncbn_fast_8xb16-500e_coco_20230117_180101-5aa5f0f1.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_s_syncbn_fast_8xb16-500e_coco/yolov8_s_syncbn_fast_8xb16-500e_coco_20230117_180101.log.json) |
|
||||
| YOLOv8-m | P5 | 640 | Yes | Yes | 7.2 | 49.8 | [config](https://github.com/open-mmlab/mmyolo/tree/dev/configs/yolov8/yolov8_m_syncbn_8xb16-500e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_m_syncbn_fast_8xb16-500e_coco/yolov8_m_syncbn_fast_8xb16-500e_coco_20230115_192200-c22e560a.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_m_syncbn_fast_8xb16-500e_coco/yolov8_m_syncbn_fast_8xb16-500e_coco_20230115_192200.log.json) |
|
||||
| Backbone | Arch | size | Mask Refine | SyncBN | AMP | Mem (GB) | box AP | TTA box AP | Config | Download |
|
||||
| :------: | :--: | :--: | :---------: | :----: | :-: | :------: | :---------: | :--------: | :---------------------------------------------------------------------: | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
|
||||
| YOLOv8-n | P5 | 640 | No | Yes | Yes | 2.8 | 37.2 | | [config](../yolov8/yolov8_n_syncbn_fast_8xb16-500e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_n_syncbn_fast_8xb16-500e_coco/yolov8_n_syncbn_fast_8xb16-500e_coco_20230114_131804-88c11cdb.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_n_syncbn_fast_8xb16-500e_coco/yolov8_n_syncbn_fast_8xb16-500e_coco_20230114_131804.log.json) |
|
||||
| YOLOv8-n | P5 | 640 | Yes | Yes | Yes | 2.5 | 37.4 (+0.2) | 39.9 | [config](../yolov8/yolov8_n_mask-refine_syncbn_fast_8xb16-500e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_n_mask-refine_syncbn_fast_8xb16-500e_coco/yolov8_n_mask-refine_syncbn_fast_8xb16-500e_coco_20230216_101206-b975b1cd.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_n_mask-refine_syncbn_fast_8xb16-500e_coco/yolov8_n_mask-refine_syncbn_fast_8xb16-500e_coco_20230216_101206.log.json) |
|
||||
| YOLOv8-s | P5 | 640 | No | Yes | Yes | 4.0 | 44.2 | | [config](../yolov8/yolov8_s_syncbn_fast_8xb16-500e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_s_syncbn_fast_8xb16-500e_coco/yolov8_s_syncbn_fast_8xb16-500e_coco_20230117_180101-5aa5f0f1.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_s_syncbn_fast_8xb16-500e_coco/yolov8_s_syncbn_fast_8xb16-500e_coco_20230117_180101.log.json) |
|
||||
| YOLOv8-s | P5 | 640 | Yes | Yes | Yes | 4.0 | 45.1 (+0.9) | 46.8 | [config](../yolov8/yolov8_s_mask-refine_syncbn_fast_8xb16-500e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_s_mask-refine_syncbn_fast_8xb16-500e_coco/yolov8_s_mask-refine_syncbn_fast_8xb16-500e_coco_20230216_095938-ce3c1b3f.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_s_mask-refine_syncbn_fast_8xb16-500e_coco/yolov8_s_mask-refine_syncbn_fast_8xb16-500e_coco_20230216_095938.log.json) |
|
||||
| YOLOv8-m | P5 | 640 | No | Yes | Yes | 7.2 | 49.8 | | [config](../yolov8/yolov8_m_syncbn_fast_8xb16-500e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_m_syncbn_fast_8xb16-500e_coco/yolov8_m_syncbn_fast_8xb16-500e_coco_20230115_192200-c22e560a.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_m_syncbn_fast_8xb16-500e_coco/yolov8_m_syncbn_fast_8xb16-500e_coco_20230115_192200.log.json) |
|
||||
| YOLOv8-m | P5 | 640 | Yes | Yes | Yes | 7.0 | 50.6 (+0.8) | 52.3 | [config](../yolov8/yolov8_m_mask-refine_syncbn_fast_8xb16-500e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_m_mask-refine_syncbn_fast_8xb16-500e_coco/yolov8_m_mask-refine_syncbn_fast_8xb16-500e_coco_20230216_223400-f40abfcd.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_m_mask-refine_syncbn_fast_8xb16-500e_coco/yolov8_m_mask-refine_syncbn_fast_8xb16-500e_coco_20230216_223400.log.json) |
|
||||
| YOLOv8-l | P5 | 640 | No | Yes | Yes | 9.8 | 52.1 | | [config](../yolov8/yolov8_l_syncbn_fast_8xb16-500e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_l_syncbn_fast_8xb16-500e_coco/yolov8_l_syncbn_fast_8xb16-500e_coco_20230217_182526-189611b6.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_l_syncbn_fast_8xb16-500e_coco/yolov8_l_syncbn_fast_8xb16-500e_coco_20230217_182526.log.json) |
|
||||
| YOLOv8-l | P5 | 640 | Yes | Yes | Yes | 9.1 | 53.0 (+0.9) | 54.4 | [config](../yolov8/yolov8_l_mask-refine_syncbn_fast_8xb16-500e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_l_mask-refine_syncbn_fast_8xb16-500e_coco/yolov8_l_mask-refine_syncbn_fast_8xb16-500e_coco_20230217_120100-5881dec4.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_l_mask-refine_syncbn_fast_8xb16-500e_coco/yolov8_l_mask-refine_syncbn_fast_8xb16-500e_coco_20230217_120100.log.json) |
|
||||
| YOLOv8-x | P5 | 640 | No | Yes | Yes | 12.2 | 52.7 | | [config](../yolov8/yolov8_x_syncbn_fast_8xb16-500e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_x_syncbn_fast_8xb16-500e_coco/yolov8_x_syncbn_fast_8xb16-500e_coco_20230218_023338-5674673c.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_x_syncbn_fast_8xb16-500e_coco/yolov8_x_syncbn_fast_8xb16-500e_coco_20230218_023338.log.json) |
|
||||
| YOLOv8-x | P5 | 640 | Yes | Yes | Yes | 12.4 | 54.0 (+1.3) | 55.0 | [config](../yolov8/yolov8_x_mask-refine_syncbn_fast_8xb16-500e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_x_mask-refine_syncbn_fast_8xb16-500e_coco/yolov8_x_mask-refine_syncbn_fast_8xb16-500e_coco_20230217_120411-079ca8d1.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_x_mask-refine_syncbn_fast_8xb16-500e_coco/yolov8_x_mask-refine_syncbn_fast_8xb16-500e_coco_20230217_120411.log.json) |
|
||||
|
||||
**Note**
|
||||
|
||||
In the official YOLOv8 code, the [bbox annotation](https://github.com/ultralytics/ultralytics/blob/0cb87f7dd340a2611148fbf2a0af59b544bd7b1b/ultralytics/yolo/data/dataloaders/v5loader.py#L1011), [`random_perspective`](https://github.com/ultralytics/ultralytics/blob/0cb87f7dd3/ultralytics/yolo/data/dataloaders/v5augmentations.py#L208) and [`copy_paste`](https://github.com/ultralytics/ultralytics/blob/0cb87f7dd3/ultralytics/yolo/data/dataloaders/v5augmentations.py#L208) data augmentation in COCO object detection task training uses mask annotation information, which leads to higher performance. Object detection should not use mask annotation, so only box annotation information is used in `MMYOLO`. We trained the official YOLOv8s code with `8xb16` configuration and its best performance is also 44.2. We will support mask annotations in object detection tasks in the next version.
|
||||
|
||||
1. We use 8x A100 for training, and the single-GPU batch size is 16. This is different from the official code, but has no effect on performance.
|
||||
2. The performance is unstable and may fluctuate by about 0.3 mAP and the highest performance weight in `COCO` training in `YOLOv8` may not be the last epoch. The performance shown above is the best model.
|
||||
3. We provide [scripts](https://github.com/open-mmlab/mmyolo/tree/dev/tools/model_converters/yolov8_to_mmyolo.py) to convert official weights to MMYOLO.
|
||||
4. `SyncBN` means use SyncBN, `AMP` indicates training with mixed precision.
|
||||
4. `SyncBN` means using SyncBN, `AMP` indicates training with mixed precision.
|
||||
5. The performance of `Mask Refine` training is for the weight performance officially released by YOLOv8. `Mask Refine` means refining bbox by mask while loading annotations and transforming after `YOLOv5RandomAffine`, and the L and X models use `Copy Paste`.
|
||||
6. `TTA` means that Test Time Augmentation. It's perform 3 multi-scaling transformations on the image, followed by 2 flipping transformations (flipping and not flipping). You only need to specify `--tta` when testing to enable. see [TTA](https://github.com/open-mmlab/mmyolo/blob/dev/docs/en/common_usage/tta.md) for details.
|
||||
|
||||
## Citation
|
||||
|
|
|
@ -54,3 +54,87 @@ Models:
|
|||
Metrics:
|
||||
box AP: 49.8
|
||||
Weights: https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_m_syncbn_fast_8xb16-500e_coco/yolov8_m_syncbn_fast_8xb16-500e_coco_20230115_192200-c22e560a.pth
|
||||
- Name: yolov8_l_syncbn_fast_8xb16-500e_coco
|
||||
In Collection: YOLOv8
|
||||
Config: configs/yolov8/yolov8_l_syncbn_fast_8xb16-500e_coco.py
|
||||
Metadata:
|
||||
Training Memory (GB): 9.8
|
||||
Epochs: 500
|
||||
Results:
|
||||
- Task: Object Detection
|
||||
Dataset: COCO
|
||||
Metrics:
|
||||
box AP: 52.1
|
||||
Weights: https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_l_syncbn_fast_8xb16-500e_coco/yolov8_l_syncbn_fast_8xb16-500e_coco_20230217_182526-189611b6.pth
|
||||
- Name: yolov8_x_syncbn_fast_8xb16-500e_coco
|
||||
In Collection: YOLOv8
|
||||
Config: configs/yolov8/yolov8_x_syncbn_fast_8xb16-500e_coco.py
|
||||
Metadata:
|
||||
Training Memory (GB): 12.2
|
||||
Epochs: 500
|
||||
Results:
|
||||
- Task: Object Detection
|
||||
Dataset: COCO
|
||||
Metrics:
|
||||
box AP: 52.7
|
||||
Weights: https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_x_syncbn_fast_8xb16-500e_coco/yolov8_x_syncbn_fast_8xb16-500e_coco_20230218_023338-5674673c.pth
|
||||
- Name: yolov8_n_mask-refine_syncbn_fast_8xb16-500e_coco
|
||||
In Collection: YOLOv8
|
||||
Config: configs/yolov8/yolov8_n_mask-refine_syncbn_fast_8xb16-500e_coco.py
|
||||
Metadata:
|
||||
Training Memory (GB): 2.5
|
||||
Epochs: 500
|
||||
Results:
|
||||
- Task: Object Detection
|
||||
Dataset: COCO
|
||||
Metrics:
|
||||
box AP: 37.4
|
||||
Weights: https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_n_mask-refine_syncbn_fast_8xb16-500e_coco/yolov8_n_mask-refine_syncbn_fast_8xb16-500e_coco_20230216_101206-b975b1cd.pth
|
||||
- Name: yolov8_s_mask-refine_syncbn_fast_8xb16-500e_coco
|
||||
In Collection: YOLOv8
|
||||
Config: configs/yolov8/yolov8_s_mask-refine_syncbn_fast_8xb16-500e_coco.py
|
||||
Metadata:
|
||||
Training Memory (GB): 4.0
|
||||
Epochs: 500
|
||||
Results:
|
||||
- Task: Object Detection
|
||||
Dataset: COCO
|
||||
Metrics:
|
||||
box AP: 45.1
|
||||
Weights: https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_s_mask-refine_syncbn_fast_8xb16-500e_coco/yolov8_s_mask-refine_syncbn_fast_8xb16-500e_coco_20230216_095938-ce3c1b3f.pth
|
||||
- Name: yolov8_m_mask-refine_syncbn_fast_8xb16-500e_coco
|
||||
In Collection: YOLOv8
|
||||
Config: configs/yolov8/yolov8_m_mask-refine_syncbn_fast_8xb16-500e_coco.py
|
||||
Metadata:
|
||||
Training Memory (GB): 7.0
|
||||
Epochs: 500
|
||||
Results:
|
||||
- Task: Object Detection
|
||||
Dataset: COCO
|
||||
Metrics:
|
||||
box AP: 50.6
|
||||
Weights: https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_m_mask-refine_syncbn_fast_8xb16-500e_coco/yolov8_m_mask-refine_syncbn_fast_8xb16-500e_coco_20230216_223400-f40abfcd.pth
|
||||
- Name: yolov8_l_mask-refine_syncbn_fast_8xb16-500e_coco
|
||||
In Collection: YOLOv8
|
||||
Config: configs/yolov8/yolov8_l_mask-refine_syncbn_fast_8xb16-500e_coco.py
|
||||
Metadata:
|
||||
Training Memory (GB): 9.1
|
||||
Epochs: 500
|
||||
Results:
|
||||
- Task: Object Detection
|
||||
Dataset: COCO
|
||||
Metrics:
|
||||
box AP: 53.0
|
||||
Weights: https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_l_mask-refine_syncbn_fast_8xb16-500e_coco/yolov8_l_mask-refine_syncbn_fast_8xb16-500e_coco_20230217_120100-5881dec4.pth
|
||||
- Name: yolov8_x_mask-refine_syncbn_fast_8xb16-500e_coco
|
||||
In Collection: YOLOv8
|
||||
Config: configs/yolov8/yolov8_x_mask-refine_syncbn_fast_8xb16-500e_coco.py
|
||||
Metadata:
|
||||
Training Memory (GB): 12.4
|
||||
Epochs: 500
|
||||
Results:
|
||||
- Task: Object Detection
|
||||
Dataset: COCO
|
||||
Metrics:
|
||||
box AP: 54.0
|
||||
Weights: https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_x_mask-refine_syncbn_fast_8xb16-500e_coco/yolov8_x_mask-refine_syncbn_fast_8xb16-500e_coco_20230217_120411-079ca8d1.pth
|
||||
|
|
|
@ -0,0 +1,65 @@
|
|||
_base_ = './yolov8_m_mask-refine_syncbn_fast_8xb16-500e_coco.py'
|
||||
|
||||
# This config use refining bbox and `YOLOv5CopyPaste`.
|
||||
# Refining bbox means refining bbox by mask while loading annotations and
|
||||
# transforming after `YOLOv5RandomAffine`
|
||||
|
||||
# ========================modified parameters======================
|
||||
deepen_factor = 1.00
|
||||
widen_factor = 1.00
|
||||
last_stage_out_channels = 512
|
||||
|
||||
mixup_prob = 0.15
|
||||
copypaste_prob = 0.3
|
||||
|
||||
# =======================Unmodified in most cases==================
|
||||
img_scale = _base_.img_scale
|
||||
pre_transform = _base_.pre_transform
|
||||
last_transform = _base_.last_transform
|
||||
affine_scale = _base_.affine_scale
|
||||
|
||||
model = dict(
|
||||
backbone=dict(
|
||||
last_stage_out_channels=last_stage_out_channels,
|
||||
deepen_factor=deepen_factor,
|
||||
widen_factor=widen_factor),
|
||||
neck=dict(
|
||||
deepen_factor=deepen_factor,
|
||||
widen_factor=widen_factor,
|
||||
in_channels=[256, 512, last_stage_out_channels],
|
||||
out_channels=[256, 512, last_stage_out_channels]),
|
||||
bbox_head=dict(
|
||||
head_module=dict(
|
||||
widen_factor=widen_factor,
|
||||
in_channels=[256, 512, last_stage_out_channels])))
|
||||
|
||||
mosaic_affine_transform = [
|
||||
dict(
|
||||
type='Mosaic',
|
||||
img_scale=img_scale,
|
||||
pad_val=114.0,
|
||||
pre_transform=pre_transform),
|
||||
dict(type='YOLOv5CopyPaste', prob=copypaste_prob),
|
||||
dict(
|
||||
type='YOLOv5RandomAffine',
|
||||
max_rotate_degree=0.0,
|
||||
max_shear_degree=0.0,
|
||||
max_aspect_ratio=100.,
|
||||
scaling_ratio_range=(1 - affine_scale, 1 + affine_scale),
|
||||
# img_scale is (width, height)
|
||||
border=(-img_scale[0] // 2, -img_scale[1] // 2),
|
||||
border_val=(114, 114, 114),
|
||||
min_area_ratio=_base_.min_area_ratio,
|
||||
use_mask_refine=_base_.use_mask2refine)
|
||||
]
|
||||
|
||||
train_pipeline = [
|
||||
*pre_transform, *mosaic_affine_transform,
|
||||
dict(
|
||||
type='YOLOv5MixUp',
|
||||
prob=mixup_prob,
|
||||
pre_transform=[*pre_transform, *mosaic_affine_transform]),
|
||||
*last_transform
|
||||
]
|
||||
|
||||
train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
|
|
@ -1,9 +1,16 @@
|
|||
_base_ = './yolov8_m_syncbn_fast_8xb16-500e_coco.py'
|
||||
|
||||
# ========================modified parameters======================
|
||||
deepen_factor = 1.00
|
||||
widen_factor = 1.00
|
||||
last_stage_out_channels = 512
|
||||
mixup_ratio = 0.15
|
||||
|
||||
mixup_prob = 0.15
|
||||
|
||||
# =======================Unmodified in most cases==================
|
||||
pre_transform = _base_.pre_transform
|
||||
mosaic_affine_transform = _base_.mosaic_affine_transform
|
||||
last_transform = _base_.last_transform
|
||||
|
||||
model = dict(
|
||||
backbone=dict(
|
||||
|
@ -20,16 +27,11 @@ model = dict(
|
|||
widen_factor=widen_factor,
|
||||
in_channels=[256, 512, last_stage_out_channels])))
|
||||
|
||||
pre_transform = _base_.pre_transform
|
||||
albu_train_transform = _base_.albu_train_transform
|
||||
mosaic_affine_transform = _base_.mosaic_affine_transform
|
||||
last_transform = _base_.last_transform
|
||||
|
||||
train_pipeline = [
|
||||
*pre_transform, *mosaic_affine_transform,
|
||||
dict(
|
||||
type='YOLOv5MixUp',
|
||||
prob=mixup_ratio,
|
||||
prob=mixup_prob,
|
||||
pre_transform=[*pre_transform, *mosaic_affine_transform]),
|
||||
*last_transform
|
||||
]
|
||||
|
|
|
@ -0,0 +1,85 @@
|
|||
_base_ = './yolov8_s_mask-refine_syncbn_fast_8xb16-500e_coco.py'
|
||||
|
||||
# This config use refining bbox and `YOLOv5CopyPaste`.
|
||||
# Refining bbox means refining bbox by mask while loading annotations and
|
||||
# transforming after `YOLOv5RandomAffine`
|
||||
|
||||
# ========================modified parameters======================
|
||||
deepen_factor = 0.67
|
||||
widen_factor = 0.75
|
||||
last_stage_out_channels = 768
|
||||
|
||||
affine_scale = 0.9
|
||||
mixup_prob = 0.1
|
||||
copypaste_prob = 0.1
|
||||
|
||||
# ===============================Unmodified in most cases====================
|
||||
img_scale = _base_.img_scale
|
||||
pre_transform = _base_.pre_transform
|
||||
last_transform = _base_.last_transform
|
||||
|
||||
model = dict(
|
||||
backbone=dict(
|
||||
last_stage_out_channels=last_stage_out_channels,
|
||||
deepen_factor=deepen_factor,
|
||||
widen_factor=widen_factor),
|
||||
neck=dict(
|
||||
deepen_factor=deepen_factor,
|
||||
widen_factor=widen_factor,
|
||||
in_channels=[256, 512, last_stage_out_channels],
|
||||
out_channels=[256, 512, last_stage_out_channels]),
|
||||
bbox_head=dict(
|
||||
head_module=dict(
|
||||
widen_factor=widen_factor,
|
||||
in_channels=[256, 512, last_stage_out_channels])))
|
||||
|
||||
mosaic_affine_transform = [
|
||||
dict(
|
||||
type='Mosaic',
|
||||
img_scale=img_scale,
|
||||
pad_val=114.0,
|
||||
pre_transform=pre_transform),
|
||||
dict(type='YOLOv5CopyPaste', prob=copypaste_prob),
|
||||
dict(
|
||||
type='YOLOv5RandomAffine',
|
||||
max_rotate_degree=0.0,
|
||||
max_shear_degree=0.0,
|
||||
max_aspect_ratio=100.,
|
||||
scaling_ratio_range=(1 - affine_scale, 1 + affine_scale),
|
||||
# img_scale is (width, height)
|
||||
border=(-img_scale[0] // 2, -img_scale[1] // 2),
|
||||
border_val=(114, 114, 114),
|
||||
min_area_ratio=_base_.min_area_ratio,
|
||||
use_mask_refine=_base_.use_mask2refine)
|
||||
]
|
||||
|
||||
train_pipeline = [
|
||||
*pre_transform, *mosaic_affine_transform,
|
||||
dict(
|
||||
type='YOLOv5MixUp',
|
||||
prob=mixup_prob,
|
||||
pre_transform=[*pre_transform, *mosaic_affine_transform]),
|
||||
*last_transform
|
||||
]
|
||||
|
||||
train_pipeline_stage2 = [
|
||||
*pre_transform,
|
||||
dict(type='YOLOv5KeepRatioResize', scale=img_scale),
|
||||
dict(
|
||||
type='LetterResize',
|
||||
scale=img_scale,
|
||||
allow_scale_up=True,
|
||||
pad_val=dict(img=114.0)),
|
||||
dict(
|
||||
type='YOLOv5RandomAffine',
|
||||
max_rotate_degree=0.0,
|
||||
max_shear_degree=0.0,
|
||||
scaling_ratio_range=(1 - affine_scale, 1 + affine_scale),
|
||||
max_aspect_ratio=_base_.max_aspect_ratio,
|
||||
border_val=(114, 114, 114),
|
||||
min_area_ratio=_base_.min_area_ratio,
|
||||
use_mask_refine=_base_.use_mask2refine), *last_transform
|
||||
]
|
||||
|
||||
train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
|
||||
_base_.custom_hooks[1].switch_pipeline = train_pipeline_stage2
|
|
@ -1,15 +1,17 @@
|
|||
_base_ = './yolov8_s_syncbn_fast_8xb16-500e_coco.py'
|
||||
|
||||
# ========================modified parameters======================
|
||||
deepen_factor = 0.67
|
||||
widen_factor = 0.75
|
||||
last_stage_out_channels = 768
|
||||
|
||||
affine_scale = 0.9
|
||||
mixup_ratio = 0.1
|
||||
mixup_prob = 0.1
|
||||
|
||||
num_classes = _base_.num_classes
|
||||
num_det_layers = _base_.num_det_layers
|
||||
# =======================Unmodified in most cases==================
|
||||
img_scale = _base_.img_scale
|
||||
pre_transform = _base_.pre_transform
|
||||
last_transform = _base_.last_transform
|
||||
|
||||
model = dict(
|
||||
backbone=dict(
|
||||
|
@ -26,10 +28,6 @@ model = dict(
|
|||
widen_factor=widen_factor,
|
||||
in_channels=[256, 512, last_stage_out_channels])))
|
||||
|
||||
pre_transform = _base_.pre_transform
|
||||
albu_train_transform = _base_.albu_train_transform
|
||||
last_transform = _base_.last_transform
|
||||
|
||||
mosaic_affine_transform = [
|
||||
dict(
|
||||
type='Mosaic',
|
||||
|
@ -47,17 +45,16 @@ mosaic_affine_transform = [
|
|||
border_val=(114, 114, 114))
|
||||
]
|
||||
|
||||
# enable mixup
|
||||
train_pipeline = [
|
||||
*pre_transform, *mosaic_affine_transform,
|
||||
dict(
|
||||
type='YOLOv5MixUp',
|
||||
prob=mixup_ratio,
|
||||
prob=mixup_prob,
|
||||
pre_transform=[*pre_transform, *mosaic_affine_transform]),
|
||||
*last_transform
|
||||
]
|
||||
|
||||
train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
|
||||
|
||||
train_pipeline_stage2 = [
|
||||
*pre_transform,
|
||||
dict(type='YOLOv5KeepRatioResize', scale=img_scale),
|
||||
|
@ -75,16 +72,5 @@ train_pipeline_stage2 = [
|
|||
border_val=(114, 114, 114)), *last_transform
|
||||
]
|
||||
|
||||
custom_hooks = [
|
||||
dict(
|
||||
type='EMAHook',
|
||||
ema_type='ExpMomentumEMA',
|
||||
momentum=0.0001,
|
||||
update_buffers=True,
|
||||
strict_load=False,
|
||||
priority=49),
|
||||
dict(
|
||||
type='mmdet.PipelineSwitchHook',
|
||||
switch_epoch=_base_.max_epochs - 10,
|
||||
switch_pipeline=train_pipeline_stage2)
|
||||
]
|
||||
train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
|
||||
_base_.custom_hooks[1].switch_pipeline = train_pipeline_stage2
|
||||
|
|
|
@ -0,0 +1,12 @@
|
|||
_base_ = './yolov8_s_mask-refine_syncbn_fast_8xb16-500e_coco.py'
|
||||
|
||||
# This config will refine bbox by mask while loading annotations and
|
||||
# transforming after `YOLOv5RandomAffine`
|
||||
|
||||
deepen_factor = 0.33
|
||||
widen_factor = 0.25
|
||||
|
||||
model = dict(
|
||||
backbone=dict(deepen_factor=deepen_factor, widen_factor=widen_factor),
|
||||
neck=dict(deepen_factor=deepen_factor, widen_factor=widen_factor),
|
||||
bbox_head=dict(head_module=dict(widen_factor=widen_factor)))
|
|
@ -0,0 +1,52 @@
|
|||
_base_ = 'yolov8_s_syncbn_fast_8xb16-500e_coco.py'
|
||||
|
||||
data_root = './data/cat/'
|
||||
class_name = ('cat', )
|
||||
num_classes = len(class_name)
|
||||
metainfo = dict(classes=class_name, palette=[(20, 220, 60)])
|
||||
|
||||
close_mosaic_epochs = 5
|
||||
|
||||
max_epochs = 40
|
||||
train_batch_size_per_gpu = 12
|
||||
train_num_workers = 4
|
||||
|
||||
load_from = 'https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_s_syncbn_fast_8xb16-500e_coco/yolov8_s_syncbn_fast_8xb16-500e_coco_20230117_180101-5aa5f0f1.pth' # noqa
|
||||
|
||||
model = dict(
|
||||
backbone=dict(frozen_stages=4),
|
||||
bbox_head=dict(head_module=dict(num_classes=num_classes)),
|
||||
train_cfg=dict(assigner=dict(num_classes=num_classes)))
|
||||
|
||||
train_dataloader = dict(
|
||||
batch_size=train_batch_size_per_gpu,
|
||||
num_workers=train_num_workers,
|
||||
dataset=dict(
|
||||
data_root=data_root,
|
||||
metainfo=metainfo,
|
||||
ann_file='annotations/trainval.json',
|
||||
data_prefix=dict(img='images/')))
|
||||
|
||||
val_dataloader = dict(
|
||||
dataset=dict(
|
||||
metainfo=metainfo,
|
||||
data_root=data_root,
|
||||
ann_file='annotations/test.json',
|
||||
data_prefix=dict(img='images/')))
|
||||
|
||||
test_dataloader = val_dataloader
|
||||
|
||||
_base_.optim_wrapper.optimizer.batch_size_per_gpu = train_batch_size_per_gpu
|
||||
_base_.custom_hooks[1].switch_epoch = max_epochs - close_mosaic_epochs
|
||||
|
||||
val_evaluator = dict(ann_file=data_root + 'annotations/test.json')
|
||||
test_evaluator = val_evaluator
|
||||
|
||||
default_hooks = dict(
|
||||
checkpoint=dict(interval=10, max_keep_ckpts=2, save_best='auto'),
|
||||
# The warmup_mim_iter parameter is critical.
|
||||
# The default value is 1000 which is not suitable for cat datasets.
|
||||
param_scheduler=dict(max_epochs=max_epochs, warmup_mim_iter=10),
|
||||
logger=dict(type='LoggerHook', interval=5))
|
||||
train_cfg = dict(max_epochs=max_epochs, val_interval=10)
|
||||
# visualizer = dict(vis_backends = [dict(type='LocalVisBackend'), dict(type='WandbVisBackend')]) # noqa
|
|
@ -0,0 +1,83 @@
|
|||
_base_ = './yolov8_s_syncbn_fast_8xb16-500e_coco.py'
|
||||
|
||||
# This config will refine bbox by mask while loading annotations and
|
||||
# transforming after `YOLOv5RandomAffine`
|
||||
|
||||
# ========================modified parameters======================
|
||||
use_mask2refine = True
|
||||
min_area_ratio = 0.01 # YOLOv5RandomAffine
|
||||
|
||||
# ===============================Unmodified in most cases====================
|
||||
pre_transform = [
|
||||
dict(type='LoadImageFromFile', file_client_args=_base_.file_client_args),
|
||||
dict(
|
||||
type='LoadAnnotations',
|
||||
with_bbox=True,
|
||||
with_mask=True,
|
||||
mask2bbox=use_mask2refine)
|
||||
]
|
||||
|
||||
last_transform = [
|
||||
# Delete gt_masks to avoid more computation
|
||||
dict(type='RemoveDataElement', keys=['gt_masks']),
|
||||
dict(
|
||||
type='mmdet.Albu',
|
||||
transforms=_base_.albu_train_transforms,
|
||||
bbox_params=dict(
|
||||
type='BboxParams',
|
||||
format='pascal_voc',
|
||||
label_fields=['gt_bboxes_labels', 'gt_ignore_flags']),
|
||||
keymap={
|
||||
'img': 'image',
|
||||
'gt_bboxes': 'bboxes'
|
||||
}),
|
||||
dict(type='YOLOv5HSVRandomAug'),
|
||||
dict(type='mmdet.RandomFlip', prob=0.5),
|
||||
dict(
|
||||
type='mmdet.PackDetInputs',
|
||||
meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 'flip',
|
||||
'flip_direction'))
|
||||
]
|
||||
|
||||
train_pipeline = [
|
||||
*pre_transform,
|
||||
dict(
|
||||
type='Mosaic',
|
||||
img_scale=_base_.img_scale,
|
||||
pad_val=114.0,
|
||||
pre_transform=pre_transform),
|
||||
dict(
|
||||
type='YOLOv5RandomAffine',
|
||||
max_rotate_degree=0.0,
|
||||
max_shear_degree=0.0,
|
||||
scaling_ratio_range=(1 - _base_.affine_scale, 1 + _base_.affine_scale),
|
||||
max_aspect_ratio=_base_.max_aspect_ratio,
|
||||
# img_scale is (width, height)
|
||||
border=(-_base_.img_scale[0] // 2, -_base_.img_scale[1] // 2),
|
||||
border_val=(114, 114, 114),
|
||||
min_area_ratio=min_area_ratio,
|
||||
use_mask_refine=use_mask2refine),
|
||||
*last_transform
|
||||
]
|
||||
|
||||
train_pipeline_stage2 = [
|
||||
*pre_transform,
|
||||
dict(type='YOLOv5KeepRatioResize', scale=_base_.img_scale),
|
||||
dict(
|
||||
type='LetterResize',
|
||||
scale=_base_.img_scale,
|
||||
allow_scale_up=True,
|
||||
pad_val=dict(img=114.0)),
|
||||
dict(
|
||||
type='YOLOv5RandomAffine',
|
||||
max_rotate_degree=0.0,
|
||||
max_shear_degree=0.0,
|
||||
scaling_ratio_range=(1 - _base_.affine_scale, 1 + _base_.affine_scale),
|
||||
max_aspect_ratio=_base_.max_aspect_ratio,
|
||||
border_val=(114, 114, 114),
|
||||
min_area_ratio=min_area_ratio,
|
||||
use_mask_refine=use_mask2refine), *last_transform
|
||||
]
|
||||
|
||||
train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
|
||||
_base_.custom_hooks[1].switch_pipeline = train_pipeline_stage2
|
|
@ -1,37 +1,100 @@
|
|||
_base_ = '../_base_/default_runtime.py'
|
||||
_base_ = ['../_base_/default_runtime.py', '../_base_/det_p5_tta.py']
|
||||
|
||||
# dataset settings
|
||||
data_root = 'data/coco/'
|
||||
dataset_type = 'YOLOv5CocoDataset'
|
||||
# ========================Frequently modified parameters======================
|
||||
# -----data related-----
|
||||
data_root = 'data/coco/' # Root path of data
|
||||
# Path of train annotation file
|
||||
train_ann_file = 'annotations/instances_train2017.json'
|
||||
train_data_prefix = 'train2017/' # Prefix of train image path
|
||||
# Path of val annotation file
|
||||
val_ann_file = 'annotations/instances_val2017.json'
|
||||
val_data_prefix = 'val2017/' # Prefix of val image path
|
||||
|
||||
# parameters that often need to be modified
|
||||
num_classes = 80
|
||||
img_scale = (640, 640) # height, width
|
||||
deepen_factor = 0.33
|
||||
widen_factor = 0.5
|
||||
max_epochs = 500
|
||||
save_epoch_intervals = 10
|
||||
num_classes = 80 # Number of classes for classification
|
||||
# Batch size of a single GPU during training
|
||||
train_batch_size_per_gpu = 16
|
||||
# Worker to pre-fetch data for each single GPU during training
|
||||
train_num_workers = 8
|
||||
val_batch_size_per_gpu = 1
|
||||
val_num_workers = 2
|
||||
|
||||
# persistent_workers must be False if num_workers is 0.
|
||||
# persistent_workers must be False if num_workers is 0
|
||||
persistent_workers = True
|
||||
|
||||
strides = [8, 16, 32]
|
||||
num_det_layers = 3
|
||||
|
||||
last_stage_out_channels = 1024
|
||||
|
||||
# Base learning rate for optim_wrapper
|
||||
# -----train val related-----
|
||||
# Base learning rate for optim_wrapper. Corresponding to 8xb16=64 bs
|
||||
base_lr = 0.01
|
||||
lr_factor = 0.01
|
||||
max_epochs = 500 # Maximum training epochs
|
||||
# Disable mosaic augmentation for final 10 epochs (stage 2)
|
||||
close_mosaic_epochs = 10
|
||||
|
||||
# single-scale training is recommended to
|
||||
model_test_cfg = dict(
|
||||
# The config of multi-label for multi-class prediction.
|
||||
multi_label=True,
|
||||
# The number of boxes before NMS
|
||||
nms_pre=30000,
|
||||
score_thr=0.001, # Threshold to filter out boxes.
|
||||
nms=dict(type='nms', iou_threshold=0.7), # NMS type and threshold
|
||||
max_per_img=300) # Max number of detections of each image
|
||||
|
||||
# ========================Possible modified parameters========================
|
||||
# -----data related-----
|
||||
img_scale = (640, 640) # width, height
|
||||
# Dataset type, this will be used to define the dataset
|
||||
dataset_type = 'YOLOv5CocoDataset'
|
||||
# Batch size of a single GPU during validation
|
||||
val_batch_size_per_gpu = 1
|
||||
# Worker to pre-fetch data for each single GPU during validation
|
||||
val_num_workers = 2
|
||||
|
||||
# Config of batch shapes. Only on val.
|
||||
# We tested YOLOv8-m will get 0.02 higher than not using it.
|
||||
batch_shapes_cfg = None
|
||||
# You can turn on `batch_shapes_cfg` by uncommenting the following lines.
|
||||
# batch_shapes_cfg = dict(
|
||||
# type='BatchShapePolicy',
|
||||
# batch_size=val_batch_size_per_gpu,
|
||||
# img_size=img_scale[0],
|
||||
# # The image scale of padding should be divided by pad_size_divisor
|
||||
# size_divisor=32,
|
||||
# # Additional paddings for pixel scale
|
||||
# extra_pad_ratio=0.5)
|
||||
|
||||
# -----model related-----
|
||||
# The scaling factor that controls the depth of the network structure
|
||||
deepen_factor = 0.33
|
||||
# The scaling factor that controls the width of the network structure
|
||||
widen_factor = 0.5
|
||||
# Strides of multi-scale prior box
|
||||
strides = [8, 16, 32]
|
||||
# The output channel of the last stage
|
||||
last_stage_out_channels = 1024
|
||||
num_det_layers = 3 # The number of model output scales
|
||||
norm_cfg = dict(type='BN', momentum=0.03, eps=0.001) # Normalization config
|
||||
|
||||
# -----train val related-----
|
||||
affine_scale = 0.5 # YOLOv5RandomAffine scaling ratio
|
||||
# YOLOv5RandomAffine aspect ratio of width and height thres to filter bboxes
|
||||
max_aspect_ratio = 100
|
||||
tal_topk = 10 # Number of bbox selected in each level
|
||||
tal_alpha = 0.5 # A Hyper-parameter related to alignment_metrics
|
||||
tal_beta = 6.0 # A Hyper-parameter related to alignment_metrics
|
||||
# TODO: Automatically scale loss_weight based on number of detection layers
|
||||
loss_cls_weight = 0.5
|
||||
loss_bbox_weight = 7.5
|
||||
# Since the dfloss is implemented differently in the official
|
||||
# and mmdet, we're going to divide loss_weight by 4.
|
||||
loss_dfl_weight = 1.5 / 4
|
||||
lr_factor = 0.01 # Learning rate scaling factor
|
||||
weight_decay = 0.0005
|
||||
# Save model checkpoint and validation intervals in stage 1
|
||||
save_epoch_intervals = 10
|
||||
# validation intervals in stage 2
|
||||
val_interval_stage2 = 1
|
||||
# The maximum checkpoints to keep.
|
||||
max_keep_ckpts = 2
|
||||
# Single-scale training is recommended to
|
||||
# be turned on, which can speed up training.
|
||||
env_cfg = dict(cudnn_benchmark=True)
|
||||
|
||||
# ===============================Unmodified in most cases====================
|
||||
model = dict(
|
||||
type='YOLODetector',
|
||||
data_preprocessor=dict(
|
||||
|
@ -45,7 +108,7 @@ model = dict(
|
|||
last_stage_out_channels=last_stage_out_channels,
|
||||
deepen_factor=deepen_factor,
|
||||
widen_factor=widen_factor,
|
||||
norm_cfg=dict(type='BN', momentum=0.03, eps=0.001),
|
||||
norm_cfg=norm_cfg,
|
||||
act_cfg=dict(type='SiLU', inplace=True)),
|
||||
neck=dict(
|
||||
type='YOLOv8PAFPN',
|
||||
|
@ -54,7 +117,7 @@ model = dict(
|
|||
in_channels=[256, 512, last_stage_out_channels],
|
||||
out_channels=[256, 512, last_stage_out_channels],
|
||||
num_csp_blocks=3,
|
||||
norm_cfg=dict(type='BN', momentum=0.03, eps=0.001),
|
||||
norm_cfg=norm_cfg,
|
||||
act_cfg=dict(type='SiLU', inplace=True)),
|
||||
bbox_head=dict(
|
||||
type='YOLOv8Head',
|
||||
|
@ -64,47 +127,41 @@ model = dict(
|
|||
in_channels=[256, 512, last_stage_out_channels],
|
||||
widen_factor=widen_factor,
|
||||
reg_max=16,
|
||||
norm_cfg=dict(type='BN', momentum=0.03, eps=0.001),
|
||||
norm_cfg=norm_cfg,
|
||||
act_cfg=dict(type='SiLU', inplace=True),
|
||||
featmap_strides=[8, 16, 32]),
|
||||
featmap_strides=strides),
|
||||
prior_generator=dict(
|
||||
type='mmdet.MlvlPointGenerator', offset=0.5, strides=[8, 16, 32]),
|
||||
type='mmdet.MlvlPointGenerator', offset=0.5, strides=strides),
|
||||
bbox_coder=dict(type='DistancePointBBoxCoder'),
|
||||
# scaled based on number of detection layers
|
||||
loss_cls=dict(
|
||||
type='mmdet.CrossEntropyLoss',
|
||||
use_sigmoid=True,
|
||||
reduction='none',
|
||||
loss_weight=0.5),
|
||||
loss_weight=loss_cls_weight),
|
||||
loss_bbox=dict(
|
||||
type='IoULoss',
|
||||
iou_mode='ciou',
|
||||
bbox_format='xyxy',
|
||||
reduction='sum',
|
||||
loss_weight=7.5,
|
||||
loss_weight=loss_bbox_weight,
|
||||
return_iou=False),
|
||||
# Since the dfloss is implemented differently in the official
|
||||
# and mmdet, we're going to divide loss_weight by 4.
|
||||
loss_dfl=dict(
|
||||
type='mmdet.DistributionFocalLoss',
|
||||
reduction='mean',
|
||||
loss_weight=1.5 / 4)),
|
||||
loss_weight=loss_dfl_weight)),
|
||||
train_cfg=dict(
|
||||
assigner=dict(
|
||||
type='BatchTaskAlignedAssigner',
|
||||
num_classes=num_classes,
|
||||
use_ciou=True,
|
||||
topk=10,
|
||||
alpha=0.5,
|
||||
beta=6.0,
|
||||
topk=tal_topk,
|
||||
alpha=tal_alpha,
|
||||
beta=tal_beta,
|
||||
eps=1e-9)),
|
||||
test_cfg=dict(
|
||||
multi_label=True,
|
||||
nms_pre=30000,
|
||||
score_thr=0.001,
|
||||
nms=dict(type='nms', iou_threshold=0.7),
|
||||
max_per_img=300))
|
||||
test_cfg=model_test_cfg)
|
||||
|
||||
albu_train_transform = [
|
||||
albu_train_transforms = [
|
||||
dict(type='Blur', p=0.01),
|
||||
dict(type='MedianBlur', p=0.01),
|
||||
dict(type='ToGray', p=0.01),
|
||||
|
@ -119,7 +176,7 @@ pre_transform = [
|
|||
last_transform = [
|
||||
dict(
|
||||
type='mmdet.Albu',
|
||||
transforms=albu_train_transform,
|
||||
transforms=albu_train_transforms,
|
||||
bbox_params=dict(
|
||||
type='BboxParams',
|
||||
format='pascal_voc',
|
||||
|
@ -135,6 +192,7 @@ last_transform = [
|
|||
meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 'flip',
|
||||
'flip_direction'))
|
||||
]
|
||||
|
||||
train_pipeline = [
|
||||
*pre_transform,
|
||||
dict(
|
||||
|
@ -146,8 +204,8 @@ train_pipeline = [
|
|||
type='YOLOv5RandomAffine',
|
||||
max_rotate_degree=0.0,
|
||||
max_shear_degree=0.0,
|
||||
scaling_ratio_range=(0.5, 1.5),
|
||||
max_aspect_ratio=100,
|
||||
scaling_ratio_range=(1 - affine_scale, 1 + affine_scale),
|
||||
max_aspect_ratio=max_aspect_ratio,
|
||||
# img_scale is (width, height)
|
||||
border=(-img_scale[0] // 2, -img_scale[1] // 2),
|
||||
border_val=(114, 114, 114)),
|
||||
|
@ -166,8 +224,8 @@ train_pipeline_stage2 = [
|
|||
type='YOLOv5RandomAffine',
|
||||
max_rotate_degree=0.0,
|
||||
max_shear_degree=0.0,
|
||||
scaling_ratio_range=(0.5, 1.5),
|
||||
max_aspect_ratio=100,
|
||||
scaling_ratio_range=(1 - affine_scale, 1 + affine_scale),
|
||||
max_aspect_ratio=max_aspect_ratio,
|
||||
border_val=(114, 114, 114)), *last_transform
|
||||
]
|
||||
|
||||
|
@ -181,8 +239,8 @@ train_dataloader = dict(
|
|||
dataset=dict(
|
||||
type=dataset_type,
|
||||
data_root=data_root,
|
||||
ann_file='annotations/instances_train2017.json',
|
||||
data_prefix=dict(img='train2017/'),
|
||||
ann_file=train_ann_file,
|
||||
data_prefix=dict(img=train_data_prefix),
|
||||
filter_cfg=dict(filter_empty_gt=False, min_size=32),
|
||||
pipeline=train_pipeline))
|
||||
|
||||
|
@ -201,17 +259,6 @@ test_pipeline = [
|
|||
'scale_factor', 'pad_param'))
|
||||
]
|
||||
|
||||
# only on Val
|
||||
# you can turn on `batch_shapes_cfg`,
|
||||
# we tested YOLOv8-m will get 0.02 higher than not using it.
|
||||
batch_shapes_cfg = None
|
||||
# batch_shapes_cfg = dict(
|
||||
# type='BatchShapePolicy',
|
||||
# batch_size=val_batch_size_per_gpu,
|
||||
# img_size=img_scale[0],
|
||||
# size_divisor=32,
|
||||
# extra_pad_ratio=0.5)
|
||||
|
||||
val_dataloader = dict(
|
||||
batch_size=val_batch_size_per_gpu,
|
||||
num_workers=val_num_workers,
|
||||
|
@ -223,8 +270,8 @@ val_dataloader = dict(
|
|||
type=dataset_type,
|
||||
data_root=data_root,
|
||||
test_mode=True,
|
||||
data_prefix=dict(img='val2017/'),
|
||||
ann_file='annotations/instances_val2017.json',
|
||||
data_prefix=dict(img=val_data_prefix),
|
||||
ann_file=val_ann_file,
|
||||
pipeline=test_pipeline,
|
||||
batch_shapes_cfg=batch_shapes_cfg))
|
||||
|
||||
|
@ -238,7 +285,7 @@ optim_wrapper = dict(
|
|||
type='SGD',
|
||||
lr=base_lr,
|
||||
momentum=0.937,
|
||||
weight_decay=0.0005,
|
||||
weight_decay=weight_decay,
|
||||
nesterov=True,
|
||||
batch_size_per_gpu=train_batch_size_per_gpu),
|
||||
constructor='YOLOv5OptimizerConstructor')
|
||||
|
@ -253,7 +300,7 @@ default_hooks = dict(
|
|||
type='CheckpointHook',
|
||||
interval=save_epoch_intervals,
|
||||
save_best='auto',
|
||||
max_keep_ckpts=2))
|
||||
max_keep_ckpts=max_keep_ckpts))
|
||||
|
||||
custom_hooks = [
|
||||
dict(
|
||||
|
@ -265,14 +312,14 @@ custom_hooks = [
|
|||
priority=49),
|
||||
dict(
|
||||
type='mmdet.PipelineSwitchHook',
|
||||
switch_epoch=max_epochs - 10,
|
||||
switch_epoch=max_epochs - close_mosaic_epochs,
|
||||
switch_pipeline=train_pipeline_stage2)
|
||||
]
|
||||
|
||||
val_evaluator = dict(
|
||||
type='mmdet.CocoMetric',
|
||||
proposal_nums=(100, 1, 10),
|
||||
ann_file=data_root + 'annotations/instances_val2017.json',
|
||||
ann_file=data_root + val_ann_file,
|
||||
metric='bbox')
|
||||
test_evaluator = val_evaluator
|
||||
|
||||
|
@ -280,7 +327,8 @@ train_cfg = dict(
|
|||
type='EpochBasedTrainLoop',
|
||||
max_epochs=max_epochs,
|
||||
val_interval=save_epoch_intervals,
|
||||
dynamic_intervals=[(max_epochs - 10, 1)])
|
||||
dynamic_intervals=[((max_epochs - close_mosaic_epochs),
|
||||
val_interval_stage2)])
|
||||
|
||||
val_cfg = dict(type='ValLoop')
|
||||
test_cfg = dict(type='TestLoop')
|
||||
|
|
|
@ -0,0 +1,13 @@
|
|||
_base_ = './yolov8_l_mask-refine_syncbn_fast_8xb16-500e_coco.py'
|
||||
|
||||
# This config use refining bbox and `YOLOv5CopyPaste`.
|
||||
# Refining bbox means refining bbox by mask while loading annotations and
|
||||
# transforming after `YOLOv5RandomAffine`
|
||||
|
||||
deepen_factor = 1.00
|
||||
widen_factor = 1.25
|
||||
|
||||
model = dict(
|
||||
backbone=dict(deepen_factor=deepen_factor, widen_factor=widen_factor),
|
||||
neck=dict(deepen_factor=deepen_factor, widen_factor=widen_factor),
|
||||
bbox_head=dict(head_module=dict(widen_factor=widen_factor)))
|
|
@ -13,16 +13,32 @@ In this report, we present some experienced improvements to YOLO series, forming
|
|||
</div>
|
||||
|
||||
<div align=center>
|
||||
<img src="https://user-images.githubusercontent.com/27466624/211143387-004c6718-3d61-44c8-9406-f56b9238452a.jpg"/>
|
||||
<img src="https://user-images.githubusercontent.com/71306851/218628641-6c0101e6-e40e-4b16-a696-c0f55b8d335c.png"/>
|
||||
YOLOX-l model structure
|
||||
</div>
|
||||
|
||||
## Results and Models
|
||||
## 🥳 🚀 Results and Models
|
||||
|
||||
| Backbone | size | Mem (GB) | box AP | Config | Download |
|
||||
| :--------: | :--: | :------: | :----: | :---------------------------------------------------------------------------------------------------: | :----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
|
||||
| YOLOX-tiny | 416 | 2.8 | 32.7 | [config](https://github.com/open-mmlab/mmyolo/tree/master/configs/yolox/yolox_tiny_8xb8-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolox/yolox_tiny_8xb8-300e_coco/yolox_tiny_8xb8-300e_coco_20220919_090908-0e40a6fc.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolox/yolox_tiny_8xb8-300e_coco/yolox_tiny_8xb8-300e_coco_20220919_090908.log.json) |
|
||||
| YOLOX-s | 640 | 5.6 | 40.8 | [config](https://github.com/open-mmlab/mmyolo/tree/master/configs/yolox/yolox_s_8xb8-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolox/yolox_s_8xb8-300e_coco/yolox_s_8xb8-300e_coco_20220917_030738-d7e60cb2.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolox/yolox_s_8xb8-300e_coco/yolox_s_8xb8-300e_coco_20220917_030738.log.json) |
|
||||
| Backbone | Size | Batch Size | AMP | RTMDet-Hyp | Mem (GB) | Box AP | Config | Download |
|
||||
| :--------: | :--: | :--------: | :-: | :--------: | :------: | :---------: | :-----------------------------------------------------------------------------------------------------------------: | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
|
||||
| YOLOX-tiny | 416 | 8xb8 | No | No | 2.8 | 32.7 | [config](https://github.com/open-mmlab/mmyolo/tree/dev/configs/yolox/yolox_tiny_fast_8xb8-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolox/yolox_tiny_8xb8-300e_coco/yolox_tiny_8xb8-300e_coco_20220919_090908-0e40a6fc.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolox/yolox_tiny_8xb8-300e_coco/yolox_tiny_8xb8-300e_coco_20220919_090908.log.json) |
|
||||
| YOLOX-tiny | 416 | 8xb32 | Yes | Yes | 4.9 | 34.3 (+1.6) | [config](https://github.com/open-mmlab/mmyolo/tree/dev/configs/yolox/yolox_tiny_fast_8xb32-300e-rtmdet-hyp_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolox/yolox_tiny_fast_8xb32-300e-rtmdet-hyp_coco/yolox_tiny_fast_8xb32-300e-rtmdet-hyp_coco_20230210_143637-4c338102.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolox/yolox_tiny_fast_8xb32-300e-rtmdet-hyp_coco/yolox_tiny_fast_8xb32-300e-rtmdet-hyp_coco_20230210_143637.log.json) |
|
||||
| YOLOX-s | 640 | 8xb8 | Yes | No | 2.9 | 40.7 | [config](https://github.com/open-mmlab/mmyolo/tree/dev/configs/yolox/yolox_s_fast_8xb8-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolox/yolox_s_fast_8xb8-300e_coco/yolox_s_fast_8xb8-300e_coco_20230213_142600-2b224d8b.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolox/yolox_s_fast_8xb8-300e_coco/yolox_s_fast_8xb8-300e_coco_20230213_142600.log.json) |
|
||||
| YOLOX-s | 640 | 8xb32 | Yes | Yes | 9.8 | 41.9 (+1.2) | [config](https://github.com/open-mmlab/mmyolo/tree/dev/configs/yolox/yolox_s_fast_8xb32-300e-rtmdet-hyp_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolox/yolox_s_fast_8xb32-300e-rtmdet-hyp_coco/yolox_s_fast_8xb32-300e-rtmdet-hyp_coco_20230210_134645-3a8dfbd7.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolox/yolox_s_fast_8xb32-300e-rtmdet-hyp_coco/yolox_s_fast_8xb32-300e-rtmdet-hyp_coco_20230210_134645.log.json) |
|
||||
| YOLOX-m | 640 | 8xb8 | Yes | No | 4.9 | 46.9 | [config](https://github.com/open-mmlab/mmyolo/tree/dev/configs/yolox/yolox_m_fast_8xb8-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolox/yolox_m_fast_8xb8-300e_coco/yolox_m_fast_8xb8-300e_coco_20230213_160218-a71a6b25.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolox/yolox_m_fast_8xb8-300e_coco/yolox_m_fast_8xb8-300e_coco_20230213_160218.log.json) |
|
||||
| YOLOX-m | 640 | 8xb32 | Yes | Yes | 17.6 | 47.5 (+0.6) | [config](https://github.com/open-mmlab/mmyolo/tree/dev/configs/yolox/yolox_m_fast_8xb32-300e-rtmdet-hyp_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolox/yolox_m_fast_8xb32-300e-rtmdet-hyp_coco/yolox_m_fast_8xb32-300e-rtmdet-hyp_coco_20230210_144328-e657e182.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolox/yolox_m_fast_8xb32-300e-rtmdet-hyp_coco/yolox_m_fast_8xb32-300e-rtmdet-hyp_coco_20230210_144328.log.json) |
|
||||
| YOLOX-l | 640 | 8xb8 | Yes | No | 8.0 | 50.1 | [config](https://github.com/open-mmlab/mmyolo/tree/dev/configs/yolox/yolox_l_fast_8xb8-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolox/yolox_l_fast__8xb8-300e_coco/yolox_l_fast_8xb8-300e_coco_20230213_160715-c731eb1c.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolox/yolox_l_fast_8xb8-300e_coco/yolox_l_fast_8xb8-300e_coco_20230213_160715.log.json) |
|
||||
| YOLOX-x | 640 | 8xb8 | Yes | No | 9.8 | 51.4 | [config](https://github.com/open-mmlab/mmyolo/tree/dev/configs/yolox/yolox_x_fast_8xb8-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolox/yolox_x_fast_8xb8-300e_coco/yolox_x_fast_8xb8-300e_coco_20230215_133950-1d509fab.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolox/yolox_x_fast_8xb8-300e_coco/yolox_x_fast_8xb8-300e_coco_20230215_133950.log.json) |
|
||||
|
||||
YOLOX uses a default training configuration of `8xbs8` which results in a long training time, we expect it to use `8xbs32` to speed up the training and not cause a decrease in mAP. We modified `train_batch_size_per_gpu` from 8 to 32, `batch_augments_interval` from 10 to 1 and `base_lr` from 0.01 to 0.04 under YOLOX-s default configuration based on the linear scaling rule, which resulted in mAP degradation. Finally, I found that using RTMDet's training hyperparameter can improve performance in YOLOX Tiny/S/M, which also validates the superiority of RTMDet's training hyperparameter.
|
||||
|
||||
The modified training parameters are as follows:
|
||||
|
||||
1. train_batch_size_per_gpu: 8 -> 32
|
||||
2. batch_augments_interval: 10 -> 1
|
||||
3. num_last_epochs: 15 -> 20
|
||||
4. optim cfg: SGD -> AdamW, base_lr 0.01 -> 0.004, weight_decay 0.0005 -> 0.05
|
||||
5. ema momentum: 0.0001 -> 0.0002
|
||||
|
||||
**Note**:
|
||||
|
||||
|
|
|
@ -20,9 +20,9 @@ Collections:
|
|||
|
||||
|
||||
Models:
|
||||
- Name: yolox_tiny_8xb8-300e_coco
|
||||
- Name: yolox_tiny_fast_8xb8-300e_coco
|
||||
In Collection: YOLOX
|
||||
Config: configs/yolox/yolox_tiny_8xb8-300e_coco.py
|
||||
Config: configs/yolox/yolox_tiny_fast_8xb8-300e_coco.py
|
||||
Metadata:
|
||||
Training Memory (GB): 2.8
|
||||
Epochs: 300
|
||||
|
@ -32,15 +32,87 @@ Models:
|
|||
Metrics:
|
||||
box AP: 32.7
|
||||
Weights: https://download.openmmlab.com/mmyolo/v0/yolox/yolox_tiny_8xb8-300e_coco/yolox_tiny_8xb8-300e_coco_20220919_090908-0e40a6fc.pth
|
||||
- Name: yolox_s_8xb8-300e_coco
|
||||
- Name: yolox_s_fast_8xb8-300e_coco
|
||||
In Collection: YOLOX
|
||||
Config: configs/yolox/yolox_s_8xb8-300e_coco.py
|
||||
Config: configs/yolox/yolox_s_fast_8xb8-300e_coco.py
|
||||
Metadata:
|
||||
Training Memory (GB): 5.6
|
||||
Training Memory (GB): 2.9
|
||||
Epochs: 300
|
||||
Results:
|
||||
- Task: Object Detection
|
||||
Dataset: COCO
|
||||
Metrics:
|
||||
box AP: 40.8
|
||||
Weights: https://download.openmmlab.com/mmyolo/v0/yolox/yolox_s_8xb8-300e_coco/yolox_s_8xb8-300e_coco_20220917_030738-d7e60cb2.pth
|
||||
box AP: 40.7
|
||||
Weights: https://download.openmmlab.com/mmyolo/v0/yolox/yolox_s_fast_8xb8-300e_coco/yolox_s_fast_8xb8-300e_coco_20230213_142600-2b224d8b.pth
|
||||
- Name: yolox_m_fast_8xb8-300e_coco
|
||||
In Collection: YOLOX
|
||||
Config: configs/yolox/yolox_m_fast_8xb8-300e_coco.py
|
||||
Metadata:
|
||||
Training Memory (GB): 4.9
|
||||
Epochs: 300
|
||||
Results:
|
||||
- Task: Object Detection
|
||||
Dataset: COCO
|
||||
Metrics:
|
||||
box AP: 46.9
|
||||
Weights: https://download.openmmlab.com/mmyolo/v0/yolox/yolox_m_fast_8xb8-300e_coco/yolox_m_fast_8xb8-300e_coco_20230213_160218-a71a6b25.pth
|
||||
- Name: yolox_l_fast_8xb8-300e_coco
|
||||
In Collection: YOLOX
|
||||
Config: configs/yolox/yolox_l_fast_8xb8-300e_coco.py
|
||||
Metadata:
|
||||
Training Memory (GB): 8.0
|
||||
Epochs: 300
|
||||
Results:
|
||||
- Task: Object Detection
|
||||
Dataset: COCO
|
||||
Metrics:
|
||||
box AP: 50.1
|
||||
Weights: https://download.openmmlab.com/mmyolo/v0/yolox/yolox_l_fast_8xb8-300e_coco/yolox_l_fast_8xb8-300e_coco_20230213_160715-c731eb1c.pth
|
||||
- Name: yolox_x_fast_8xb8-300e_coco
|
||||
In Collection: YOLOX
|
||||
Config: configs/yolox/yolox_x_fast_8xb8-300e_coco.py
|
||||
Metadata:
|
||||
Training Memory (GB): 9.8
|
||||
Epochs: 300
|
||||
Results:
|
||||
- Task: Object Detection
|
||||
Dataset: COCO
|
||||
Metrics:
|
||||
box AP: 51.4
|
||||
Weights: https://download.openmmlab.com/mmyolo/v0/yolox/yolox_x_fast_8xb8-300e_coco/yolox_x_fast_8xb8-300e_coco_20230215_133950-1d509fab.pth
|
||||
- Name: yolox_tiny_fast_8xb32-300e-rtmdet-hyp_coco
|
||||
In Collection: YOLOX
|
||||
Config: configs/yolox/yolox_tiny_fast_8xb32-300e-rtmdet-hyp_coco.py
|
||||
Metadata:
|
||||
Training Memory (GB): 4.9
|
||||
Epochs: 300
|
||||
Results:
|
||||
- Task: Object Detection
|
||||
Dataset: COCO
|
||||
Metrics:
|
||||
box AP: 34.3
|
||||
Weights: https://download.openmmlab.com/mmyolo/v0/yolox/yolox_tiny_fast_8xb32-300e-rtmdet-hyp_coco/yolox_tiny_fast_8xb32-300e-rtmdet-hyp_coco_20230210_143637-4c338102.pth
|
||||
- Name: yolox_s_fast_8xb32-300e-rtmdet-hyp_coco
|
||||
In Collection: YOLOX
|
||||
Config: configs/yolox/yolox_s_fast_8xb32-300e-rtmdet-hyp_coco.py
|
||||
Metadata:
|
||||
Training Memory (GB): 9.8
|
||||
Epochs: 300
|
||||
Results:
|
||||
- Task: Object Detection
|
||||
Dataset: COCO
|
||||
Metrics:
|
||||
box AP: 41.9
|
||||
Weights: https://download.openmmlab.com/mmyolo/v0/yolox/yolox_s_fast_8xb32-300e-rtmdet-hyp_coco/yolox_s_fast_8xb32-300e-rtmdet-hyp_coco_20230210_134645-3a8dfbd7.pth
|
||||
- Name: yolox_m_fast_8xb32-300e-rtmdet-hyp_coco
|
||||
In Collection: YOLOX
|
||||
Config: configs/yolox/yolox_m_fast_8xb32-300e-rtmdet-hyp_coco.py
|
||||
Metadata:
|
||||
Training Memory (GB): 17.6
|
||||
Epochs: 300
|
||||
Results:
|
||||
- Task: Object Detection
|
||||
Dataset: COCO
|
||||
Metrics:
|
||||
box AP: 47.5
|
||||
Weights: https://download.openmmlab.com/mmyolo/v0/yolox/yolox_m_fast_8xb32-300e-rtmdet-hyp_coco/yolox_m_fast_8xb32-300e-rtmdet-hyp_coco_20230210_144328-e657e182.pth
|
||||
|
|
|
@ -1,8 +1,10 @@
|
|||
_base_ = './yolox_s_8xb8-300e_coco.py'
|
||||
_base_ = './yolox_s_fast_8xb8-300e_coco.py'
|
||||
|
||||
# ========================modified parameters======================
|
||||
deepen_factor = 1.0
|
||||
widen_factor = 1.0
|
||||
|
||||
# =======================Unmodified in most cases==================
|
||||
# model settings
|
||||
model = dict(
|
||||
backbone=dict(deepen_factor=deepen_factor, widen_factor=widen_factor),
|
|
@ -0,0 +1,12 @@
|
|||
_base_ = './yolox_s_fast_8xb32-300e-rtmdet-hyp_coco.py'
|
||||
|
||||
# ========================modified parameters======================
|
||||
deepen_factor = 0.67
|
||||
widen_factor = 0.75
|
||||
|
||||
# =======================Unmodified in most cases==================
|
||||
# model settings
|
||||
model = dict(
|
||||
backbone=dict(deepen_factor=deepen_factor, widen_factor=widen_factor),
|
||||
neck=dict(deepen_factor=deepen_factor, widen_factor=widen_factor),
|
||||
bbox_head=dict(head_module=dict(widen_factor=widen_factor)))
|
|
@ -1,8 +1,10 @@
|
|||
_base_ = './yolox_s_8xb8-300e_coco.py'
|
||||
_base_ = './yolox_s_fast_8xb8-300e_coco.py'
|
||||
|
||||
# ========================modified parameters======================
|
||||
deepen_factor = 0.67
|
||||
widen_factor = 0.75
|
||||
|
||||
# =======================Unmodified in most cases==================
|
||||
# model settings
|
||||
model = dict(
|
||||
backbone=dict(deepen_factor=deepen_factor, widen_factor=widen_factor),
|
|
@ -0,0 +1,21 @@
|
|||
_base_ = './yolox_tiny_fast_8xb32-300e-rtmdet-hyp_coco.py'
|
||||
|
||||
# ========================modified parameters======================
|
||||
deepen_factor = 0.33
|
||||
widen_factor = 0.25
|
||||
use_depthwise = True
|
||||
|
||||
# =======================Unmodified in most cases==================
|
||||
# model settings
|
||||
model = dict(
|
||||
backbone=dict(
|
||||
deepen_factor=deepen_factor,
|
||||
widen_factor=widen_factor,
|
||||
use_depthwise=use_depthwise),
|
||||
neck=dict(
|
||||
deepen_factor=deepen_factor,
|
||||
widen_factor=widen_factor,
|
||||
use_depthwise=use_depthwise),
|
||||
bbox_head=dict(
|
||||
head_module=dict(
|
||||
widen_factor=widen_factor, use_depthwise=use_depthwise)))
|
|
@ -1,9 +1,11 @@
|
|||
_base_ = './yolox_tiny_8xb8-300e_coco.py'
|
||||
_base_ = './yolox_tiny_fast_8xb8-300e_coco.py'
|
||||
|
||||
# ========================modified parameters======================
|
||||
deepen_factor = 0.33
|
||||
widen_factor = 0.25
|
||||
use_depthwise = True
|
||||
|
||||
# =======================Unmodified in most cases==================
|
||||
# model settings
|
||||
model = dict(
|
||||
backbone=dict(
|
|
@ -0,0 +1,55 @@
|
|||
# TODO: Need to solve the problem of multiple file_client_args parameters
|
||||
# _file_client_args = dict(
|
||||
# backend='petrel',
|
||||
# path_mapping=dict({
|
||||
# './data/': 's3://openmmlab/datasets/detection/',
|
||||
# 'data/': 's3://openmmlab/datasets/detection/'
|
||||
# }))
|
||||
_file_client_args = dict(backend='disk')
|
||||
|
||||
tta_model = dict(
|
||||
type='mmdet.DetTTAModel',
|
||||
tta_cfg=dict(nms=dict(type='nms', iou_threshold=0.65), max_per_img=300))
|
||||
|
||||
img_scales = [(640, 640), (320, 320), (960, 960)]
|
||||
|
||||
# LoadImageFromFile
|
||||
# / | \
|
||||
# Resize Resize Resize # noqa
|
||||
# / \ / \ / \
|
||||
# RandomFlip RandomFlip RandomFlip RandomFlip RandomFlip RandomFlip # noqa
|
||||
# | | | | | |
|
||||
# LoadAnn LoadAnn LoadAnn LoadAnn LoadAnn LoadAnn
|
||||
# | | | | | |
|
||||
# PackDetIn PackDetIn PackDetIn PackDetIn PackDetIn PackDetIn # noqa
|
||||
|
||||
tta_pipeline = [
|
||||
dict(type='LoadImageFromFile', file_client_args=_file_client_args),
|
||||
dict(
|
||||
type='TestTimeAug',
|
||||
transforms=[
|
||||
[
|
||||
dict(type='mmdet.Resize', scale=s, keep_ratio=True)
|
||||
for s in img_scales
|
||||
],
|
||||
[
|
||||
# ``RandomFlip`` must be placed before ``Pad``, otherwise
|
||||
# bounding box coordinates after flipping cannot be
|
||||
# recovered correctly.
|
||||
dict(type='mmdet.RandomFlip', prob=1.),
|
||||
dict(type='mmdet.RandomFlip', prob=0.)
|
||||
],
|
||||
[
|
||||
dict(
|
||||
type='mmdet.Pad',
|
||||
pad_to_square=True,
|
||||
pad_val=dict(img=(114.0, 114.0, 114.0))),
|
||||
],
|
||||
[
|
||||
dict(
|
||||
type='mmdet.PackDetInputs',
|
||||
meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
|
||||
'scale_factor', 'flip', 'flip_direction'))
|
||||
]
|
||||
])
|
||||
]
|
|
@ -0,0 +1,76 @@
|
|||
_base_ = './yolox_s_fast_8xb32-300e-rtmdet-hyp_coco.py'
|
||||
|
||||
data_root = './data/cat/'
|
||||
class_name = ('cat', )
|
||||
num_classes = len(class_name)
|
||||
metainfo = dict(classes=class_name, palette=[(20, 220, 60)])
|
||||
|
||||
num_last_epochs = 5
|
||||
|
||||
max_epochs = 40
|
||||
train_batch_size_per_gpu = 12
|
||||
train_num_workers = 4
|
||||
|
||||
load_from = 'https://download.openmmlab.com/mmyolo/v0/yolox/yolox_s_fast_8xb32-300e-rtmdet-hyp_coco/yolox_s_fast_8xb32-300e-rtmdet-hyp_coco_20230210_134645-3a8dfbd7.pth' # noqa
|
||||
|
||||
model = dict(
|
||||
backbone=dict(frozen_stages=4),
|
||||
bbox_head=dict(head_module=dict(num_classes=num_classes)))
|
||||
|
||||
train_dataloader = dict(
|
||||
batch_size=train_batch_size_per_gpu,
|
||||
num_workers=train_num_workers,
|
||||
dataset=dict(
|
||||
data_root=data_root,
|
||||
metainfo=metainfo,
|
||||
ann_file='annotations/trainval.json',
|
||||
data_prefix=dict(img='images/')))
|
||||
|
||||
val_dataloader = dict(
|
||||
dataset=dict(
|
||||
metainfo=metainfo,
|
||||
data_root=data_root,
|
||||
ann_file='annotations/test.json',
|
||||
data_prefix=dict(img='images/')))
|
||||
|
||||
test_dataloader = val_dataloader
|
||||
|
||||
param_scheduler = [
|
||||
dict(
|
||||
# use quadratic formula to warm up 3 epochs
|
||||
# and lr is updated by iteration
|
||||
# TODO: fix default scope in get function
|
||||
type='mmdet.QuadraticWarmupLR',
|
||||
by_epoch=True,
|
||||
begin=0,
|
||||
end=3,
|
||||
convert_to_iter_based=True),
|
||||
dict(
|
||||
# use cosine lr from 5 to 35 epoch
|
||||
type='CosineAnnealingLR',
|
||||
eta_min=_base_.base_lr * 0.05,
|
||||
begin=5,
|
||||
T_max=max_epochs - num_last_epochs,
|
||||
end=max_epochs - num_last_epochs,
|
||||
by_epoch=True,
|
||||
convert_to_iter_based=True),
|
||||
dict(
|
||||
# use fixed lr during last num_last_epochs epochs
|
||||
type='ConstantLR',
|
||||
by_epoch=True,
|
||||
factor=1,
|
||||
begin=max_epochs - num_last_epochs,
|
||||
end=max_epochs,
|
||||
)
|
||||
]
|
||||
|
||||
_base_.custom_hooks[0].num_last_epochs = num_last_epochs
|
||||
|
||||
val_evaluator = dict(ann_file=data_root + 'annotations/test.json')
|
||||
test_evaluator = val_evaluator
|
||||
|
||||
default_hooks = dict(
|
||||
checkpoint=dict(interval=10, max_keep_ckpts=2, save_best='auto'),
|
||||
logger=dict(type='LoggerHook', interval=5))
|
||||
train_cfg = dict(max_epochs=max_epochs, val_interval=10)
|
||||
# visualizer = dict(vis_backends = [dict(type='LocalVisBackend'), dict(type='WandbVisBackend')]) # noqa
|
|
@ -0,0 +1,87 @@
|
|||
_base_ = './yolox_s_fast_8xb8-300e_coco.py'
|
||||
|
||||
# ========================modified parameters======================
|
||||
# Batch size of a single GPU during training
|
||||
# 8 -> 32
|
||||
train_batch_size_per_gpu = 32
|
||||
|
||||
# Multi-scale training intervals
|
||||
# 10 -> 1
|
||||
batch_augments_interval = 1
|
||||
|
||||
# Last epoch number to switch training pipeline
|
||||
# 15 -> 20
|
||||
num_last_epochs = 20
|
||||
|
||||
# Base learning rate for optim_wrapper. Corresponding to 8xb32=256 bs
|
||||
base_lr = 0.004
|
||||
|
||||
# SGD -> AdamW
|
||||
optim_wrapper = dict(
|
||||
_delete_=True,
|
||||
type='OptimWrapper',
|
||||
optimizer=dict(type='AdamW', lr=base_lr, weight_decay=0.05),
|
||||
paramwise_cfg=dict(
|
||||
norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True))
|
||||
|
||||
# 0.0001 -> 0.0002
|
||||
ema_momentum = 0.0002
|
||||
|
||||
# ============================== Unmodified in most cases ===================
|
||||
model = dict(
|
||||
data_preprocessor=dict(batch_augments=[
|
||||
dict(
|
||||
type='YOLOXBatchSyncRandomResize',
|
||||
random_size_range=(480, 800),
|
||||
size_divisor=32,
|
||||
interval=batch_augments_interval)
|
||||
]))
|
||||
|
||||
param_scheduler = [
|
||||
dict(
|
||||
# use quadratic formula to warm up 5 epochs
|
||||
# and lr is updated by iteration
|
||||
# TODO: fix default scope in get function
|
||||
type='mmdet.QuadraticWarmupLR',
|
||||
by_epoch=True,
|
||||
begin=0,
|
||||
end=5,
|
||||
convert_to_iter_based=True),
|
||||
dict(
|
||||
# use cosine lr from 5 to 285 epoch
|
||||
type='CosineAnnealingLR',
|
||||
eta_min=base_lr * 0.05,
|
||||
begin=5,
|
||||
T_max=_base_.max_epochs - num_last_epochs,
|
||||
end=_base_.max_epochs - num_last_epochs,
|
||||
by_epoch=True,
|
||||
convert_to_iter_based=True),
|
||||
dict(
|
||||
# use fixed lr during last num_last_epochs epochs
|
||||
type='ConstantLR',
|
||||
by_epoch=True,
|
||||
factor=1,
|
||||
begin=_base_.max_epochs - num_last_epochs,
|
||||
end=_base_.max_epochs,
|
||||
)
|
||||
]
|
||||
|
||||
custom_hooks = [
|
||||
dict(
|
||||
type='YOLOXModeSwitchHook',
|
||||
num_last_epochs=num_last_epochs,
|
||||
new_train_pipeline=_base_.train_pipeline_stage2,
|
||||
priority=48),
|
||||
dict(type='mmdet.SyncNormHook', priority=48),
|
||||
dict(
|
||||
type='EMAHook',
|
||||
ema_type='ExpMomentumEMA',
|
||||
momentum=ema_momentum,
|
||||
update_buffers=True,
|
||||
strict_load=False,
|
||||
priority=49)
|
||||
]
|
||||
|
||||
train_dataloader = dict(batch_size=train_batch_size_per_gpu)
|
||||
train_cfg = dict(dynamic_intervals=[(_base_.max_epochs - num_last_epochs, 1)])
|
||||
auto_scale_lr = dict(base_batch_size=8 * train_batch_size_per_gpu)
|
|
@ -1,21 +1,73 @@
|
|||
_base_ = '../_base_/default_runtime.py'
|
||||
_base_ = ['../_base_/default_runtime.py', 'yolox_p5_tta.py']
|
||||
|
||||
data_root = 'data/coco/'
|
||||
dataset_type = 'YOLOv5CocoDataset'
|
||||
# ========================Frequently modified parameters======================
|
||||
# -----data related-----
|
||||
data_root = 'data/coco/' # Root path of data
|
||||
# path of train annotation file
|
||||
train_ann_file = 'annotations/instances_train2017.json'
|
||||
train_data_prefix = 'train2017/' # Prefix of train image path
|
||||
# path of val annotation file
|
||||
val_ann_file = 'annotations/instances_val2017.json'
|
||||
val_data_prefix = 'val2017/' # Prefix of train image path
|
||||
|
||||
img_scale = (640, 640) # width, height
|
||||
deepen_factor = 0.33
|
||||
widen_factor = 0.5
|
||||
|
||||
save_epoch_intervals = 10
|
||||
num_classes = 80 # Number of classes for classification
|
||||
# Batch size of a single GPU during training
|
||||
train_batch_size_per_gpu = 8
|
||||
# Worker to pre-fetch data for each single GPU during tarining
|
||||
train_num_workers = 8
|
||||
# Presistent_workers must be False if num_workers is 0
|
||||
persistent_workers = True
|
||||
|
||||
# -----train val related-----
|
||||
# Base learning rate for optim_wrapper. Corresponding to 8xb16=64 bs
|
||||
base_lr = 0.01
|
||||
max_epochs = 300 # Maximum training epochs
|
||||
|
||||
model_test_cfg = dict(
|
||||
yolox_style=True, # better
|
||||
# The config of multi-label for multi-class prediction
|
||||
multi_label=True, # 40.5 -> 40.7
|
||||
score_thr=0.001, # Threshold to filter out boxes
|
||||
max_per_img=300, # Max number of detections of each image
|
||||
nms=dict(type='nms', iou_threshold=0.65)) # NMS type and threshold
|
||||
|
||||
# ========================Possible modified parameters========================
|
||||
# -----data related-----
|
||||
img_scale = (640, 640) # width, height
|
||||
# Dataset type, this will be used to define the dataset
|
||||
dataset_type = 'YOLOv5CocoDataset'
|
||||
# Batch size of a single GPU during validation
|
||||
val_batch_size_per_gpu = 1
|
||||
# Worker to pre-fetch data for each single GPU during validation
|
||||
val_num_workers = 2
|
||||
|
||||
max_epochs = 300
|
||||
num_last_epochs = 15
|
||||
# -----model related-----
|
||||
# The scaling factor that controls the depth of the network structure
|
||||
deepen_factor = 0.33
|
||||
# The scaling factor that controls the width of the network structure
|
||||
widen_factor = 0.5
|
||||
norm_cfg = dict(type='BN', momentum=0.03, eps=0.001)
|
||||
# generate new random resize shape interval
|
||||
batch_augments_interval = 10
|
||||
|
||||
# -----train val related-----
|
||||
weight_decay = 0.0005
|
||||
loss_cls_weight = 1.0
|
||||
loss_bbox_weight = 5.0
|
||||
loss_obj_weight = 1.0
|
||||
loss_bbox_aux_weight = 1.0
|
||||
center_radius = 2.5 # SimOTAAssigner
|
||||
num_last_epochs = 15
|
||||
random_affine_scaling_ratio_range = (0.1, 2)
|
||||
mixup_ratio_range = (0.8, 1.6)
|
||||
# Save model checkpoint and validation intervals
|
||||
save_epoch_intervals = 10
|
||||
# The maximum checkpoints to keep.
|
||||
max_keep_ckpts = 3
|
||||
|
||||
ema_momentum = 0.0001
|
||||
|
||||
# ===============================Unmodified in most cases====================
|
||||
# model settings
|
||||
model = dict(
|
||||
type='YOLODetector',
|
||||
|
@ -29,14 +81,14 @@ model = dict(
|
|||
# TODO: Waiting for mmengine support
|
||||
use_syncbn=False,
|
||||
data_preprocessor=dict(
|
||||
type='mmdet.DetDataPreprocessor',
|
||||
type='YOLOv5DetDataPreprocessor',
|
||||
pad_size_divisor=32,
|
||||
batch_augments=[
|
||||
dict(
|
||||
type='mmdet.BatchSyncRandomResize',
|
||||
type='YOLOXBatchSyncRandomResize',
|
||||
random_size_range=(480, 800),
|
||||
size_divisor=32,
|
||||
interval=10)
|
||||
interval=batch_augments_interval)
|
||||
]),
|
||||
backbone=dict(
|
||||
type='YOLOXCSPDarknet',
|
||||
|
@ -44,7 +96,7 @@ model = dict(
|
|||
widen_factor=widen_factor,
|
||||
out_indices=(2, 3, 4),
|
||||
spp_kernal_sizes=(5, 9, 13),
|
||||
norm_cfg=dict(type='BN', momentum=0.03, eps=0.001),
|
||||
norm_cfg=norm_cfg,
|
||||
act_cfg=dict(type='SiLU', inplace=True),
|
||||
),
|
||||
neck=dict(
|
||||
|
@ -53,51 +105,48 @@ model = dict(
|
|||
widen_factor=widen_factor,
|
||||
in_channels=[256, 512, 1024],
|
||||
out_channels=256,
|
||||
norm_cfg=dict(type='BN', momentum=0.03, eps=0.001),
|
||||
norm_cfg=norm_cfg,
|
||||
act_cfg=dict(type='SiLU', inplace=True)),
|
||||
bbox_head=dict(
|
||||
type='YOLOXHead',
|
||||
head_module=dict(
|
||||
type='YOLOXHeadModule',
|
||||
num_classes=80,
|
||||
num_classes=num_classes,
|
||||
in_channels=256,
|
||||
feat_channels=256,
|
||||
widen_factor=widen_factor,
|
||||
stacked_convs=2,
|
||||
featmap_strides=(8, 16, 32),
|
||||
use_depthwise=False,
|
||||
norm_cfg=dict(type='BN', momentum=0.03, eps=0.001),
|
||||
norm_cfg=norm_cfg,
|
||||
act_cfg=dict(type='SiLU', inplace=True),
|
||||
),
|
||||
loss_cls=dict(
|
||||
type='mmdet.CrossEntropyLoss',
|
||||
use_sigmoid=True,
|
||||
reduction='sum',
|
||||
loss_weight=1.0),
|
||||
loss_weight=loss_cls_weight),
|
||||
loss_bbox=dict(
|
||||
type='mmdet.IoULoss',
|
||||
mode='square',
|
||||
eps=1e-16,
|
||||
reduction='sum',
|
||||
loss_weight=5.0),
|
||||
loss_weight=loss_bbox_weight),
|
||||
loss_obj=dict(
|
||||
type='mmdet.CrossEntropyLoss',
|
||||
use_sigmoid=True,
|
||||
reduction='sum',
|
||||
loss_weight=1.0),
|
||||
loss_weight=loss_obj_weight),
|
||||
loss_bbox_aux=dict(
|
||||
type='mmdet.L1Loss', reduction='sum', loss_weight=1.0)),
|
||||
type='mmdet.L1Loss',
|
||||
reduction='sum',
|
||||
loss_weight=loss_bbox_aux_weight)),
|
||||
train_cfg=dict(
|
||||
assigner=dict(
|
||||
type='mmdet.SimOTAAssigner',
|
||||
center_radius=2.5,
|
||||
center_radius=center_radius,
|
||||
iou_calculator=dict(type='mmdet.BboxOverlaps2D'))),
|
||||
test_cfg=dict(
|
||||
yolox_style=True, # better
|
||||
multi_label=True, # 40.5 -> 40.7
|
||||
score_thr=0.001,
|
||||
max_per_img=300,
|
||||
nms=dict(type='nms', iou_threshold=0.65)))
|
||||
test_cfg=model_test_cfg)
|
||||
|
||||
pre_transform = [
|
||||
dict(type='LoadImageFromFile', file_client_args=_base_.file_client_args),
|
||||
|
@ -113,13 +162,13 @@ train_pipeline_stage1 = [
|
|||
pre_transform=pre_transform),
|
||||
dict(
|
||||
type='mmdet.RandomAffine',
|
||||
scaling_ratio_range=(0.1, 2),
|
||||
scaling_ratio_range=random_affine_scaling_ratio_range,
|
||||
# img_scale is (width, height)
|
||||
border=(-img_scale[0] // 2, -img_scale[1] // 2)),
|
||||
dict(
|
||||
type='YOLOXMixUp',
|
||||
img_scale=img_scale,
|
||||
ratio_range=(0.8, 1.6),
|
||||
ratio_range=mixup_ratio_range,
|
||||
pad_val=114.0,
|
||||
pre_transform=pre_transform),
|
||||
dict(type='mmdet.YOLOXHSVRandomAug'),
|
||||
|
@ -155,14 +204,15 @@ train_pipeline_stage2 = [
|
|||
train_dataloader = dict(
|
||||
batch_size=train_batch_size_per_gpu,
|
||||
num_workers=train_num_workers,
|
||||
persistent_workers=True,
|
||||
persistent_workers=persistent_workers,
|
||||
pin_memory=True,
|
||||
collate_fn=dict(type='yolov5_collate'),
|
||||
sampler=dict(type='DefaultSampler', shuffle=True),
|
||||
dataset=dict(
|
||||
type=dataset_type,
|
||||
data_root=data_root,
|
||||
ann_file='annotations/instances_train2017.json',
|
||||
data_prefix=dict(img='train2017/'),
|
||||
ann_file=train_ann_file,
|
||||
data_prefix=dict(img=train_data_prefix),
|
||||
filter_cfg=dict(filter_empty_gt=False, min_size=32),
|
||||
pipeline=train_pipeline_stage1))
|
||||
|
||||
|
@ -183,15 +233,15 @@ test_pipeline = [
|
|||
val_dataloader = dict(
|
||||
batch_size=val_batch_size_per_gpu,
|
||||
num_workers=val_num_workers,
|
||||
persistent_workers=True,
|
||||
persistent_workers=persistent_workers,
|
||||
pin_memory=True,
|
||||
drop_last=False,
|
||||
sampler=dict(type='DefaultSampler', shuffle=False),
|
||||
dataset=dict(
|
||||
type=dataset_type,
|
||||
data_root=data_root,
|
||||
ann_file='annotations/instances_val2017.json',
|
||||
data_prefix=dict(img='val2017/'),
|
||||
ann_file=val_ann_file,
|
||||
data_prefix=dict(img=val_data_prefix),
|
||||
test_mode=True,
|
||||
pipeline=test_pipeline))
|
||||
test_dataloader = val_dataloader
|
||||
|
@ -200,18 +250,20 @@ test_dataloader = val_dataloader
|
|||
val_evaluator = dict(
|
||||
type='mmdet.CocoMetric',
|
||||
proposal_nums=(100, 1, 10),
|
||||
ann_file=data_root + 'annotations/instances_val2017.json',
|
||||
ann_file=data_root + val_ann_file,
|
||||
metric='bbox')
|
||||
|
||||
test_evaluator = val_evaluator
|
||||
|
||||
# optimizer
|
||||
# default 8 gpu
|
||||
base_lr = 0.01
|
||||
optim_wrapper = dict(
|
||||
type='OptimWrapper',
|
||||
optimizer=dict(
|
||||
type='SGD', lr=base_lr, momentum=0.9, weight_decay=5e-4,
|
||||
type='SGD',
|
||||
lr=base_lr,
|
||||
momentum=0.9,
|
||||
weight_decay=weight_decay,
|
||||
nesterov=True),
|
||||
paramwise_cfg=dict(norm_decay_mult=0., bias_decay_mult=0.))
|
||||
|
||||
|
@ -247,7 +299,10 @@ param_scheduler = [
|
|||
|
||||
default_hooks = dict(
|
||||
checkpoint=dict(
|
||||
type='CheckpointHook', interval=1, max_keep_ckpts=3, save_best='auto'))
|
||||
type='CheckpointHook',
|
||||
interval=save_epoch_intervals,
|
||||
max_keep_ckpts=max_keep_ckpts,
|
||||
save_best='auto'))
|
||||
|
||||
custom_hooks = [
|
||||
dict(
|
||||
|
@ -259,7 +314,7 @@ custom_hooks = [
|
|||
dict(
|
||||
type='EMAHook',
|
||||
ema_type='ExpMomentumEMA',
|
||||
momentum=0.0001,
|
||||
momentum=ema_momentum,
|
||||
update_buffers=True,
|
||||
strict_load=False,
|
||||
priority=49)
|
||||
|
@ -271,6 +326,6 @@ train_cfg = dict(
|
|||
val_interval=save_epoch_intervals,
|
||||
dynamic_intervals=[(max_epochs - num_last_epochs, 1)])
|
||||
|
||||
auto_scale_lr = dict(base_batch_size=64)
|
||||
auto_scale_lr = dict(base_batch_size=8 * train_batch_size_per_gpu)
|
||||
val_cfg = dict(type='ValLoop')
|
||||
test_cfg = dict(type='TestLoop')
|
|
@ -1,24 +1,32 @@
|
|||
_base_ = './yolox_s_8xb8-300e_coco.py'
|
||||
_base_ = './yolox_s_fast_8xb32-300e-rtmdet-hyp_coco.py'
|
||||
|
||||
# ========================modified parameters======================
|
||||
deepen_factor = 0.33
|
||||
widen_factor = 0.375
|
||||
|
||||
# Multi-scale training intervals
|
||||
# 10 -> 1
|
||||
batch_augments_interval = 1
|
||||
|
||||
scaling_ratio_range = (0.5, 1.5)
|
||||
|
||||
# =======================Unmodified in most cases==================
|
||||
img_scale = _base_.img_scale
|
||||
pre_transform = _base_.pre_transform
|
||||
|
||||
# model settings
|
||||
model = dict(
|
||||
data_preprocessor=dict(batch_augments=[
|
||||
dict(
|
||||
type='mmdet.BatchSyncRandomResize',
|
||||
random_size_range=(320, 640), # note
|
||||
type='YOLOXBatchSyncRandomResize',
|
||||
random_size_range=(320, 640),
|
||||
size_divisor=32,
|
||||
interval=10)
|
||||
interval=batch_augments_interval)
|
||||
]),
|
||||
backbone=dict(deepen_factor=deepen_factor, widen_factor=widen_factor),
|
||||
neck=dict(deepen_factor=deepen_factor, widen_factor=widen_factor),
|
||||
bbox_head=dict(head_module=dict(widen_factor=widen_factor)))
|
||||
|
||||
img_scale = _base_.img_scale
|
||||
pre_transform = _base_.pre_transform
|
||||
|
||||
train_pipeline_stage1 = [
|
||||
*pre_transform,
|
||||
dict(
|
||||
|
@ -28,7 +36,7 @@ train_pipeline_stage1 = [
|
|||
pre_transform=pre_transform),
|
||||
dict(
|
||||
type='mmdet.RandomAffine',
|
||||
scaling_ratio_range=(0.5, 1.5), # note
|
||||
scaling_ratio_range=scaling_ratio_range, # note
|
||||
# img_scale is (width, height)
|
||||
border=(-img_scale[0] // 2, -img_scale[1] // 2)),
|
||||
dict(type='mmdet.YOLOXHSVRandomAug'),
|
|
@ -0,0 +1,100 @@
|
|||
_base_ = './yolox_s_fast_8xb8-300e_coco.py'
|
||||
|
||||
# ========================modified parameters======================
|
||||
deepen_factor = 0.33
|
||||
widen_factor = 0.375
|
||||
scaling_ratio_range = (0.5, 1.5)
|
||||
|
||||
# =======================Unmodified in most cases==================
|
||||
img_scale = _base_.img_scale
|
||||
pre_transform = _base_.pre_transform
|
||||
|
||||
test_img_scale = (416, 416)
|
||||
tta_img_scales = [test_img_scale, (320, 320), (640, 640)]
|
||||
|
||||
# model settings
|
||||
model = dict(
|
||||
data_preprocessor=dict(batch_augments=[
|
||||
dict(
|
||||
type='YOLOXBatchSyncRandomResize',
|
||||
random_size_range=(320, 640),
|
||||
size_divisor=32,
|
||||
interval=10)
|
||||
]),
|
||||
backbone=dict(deepen_factor=deepen_factor, widen_factor=widen_factor),
|
||||
neck=dict(deepen_factor=deepen_factor, widen_factor=widen_factor),
|
||||
bbox_head=dict(head_module=dict(widen_factor=widen_factor)))
|
||||
|
||||
train_pipeline_stage1 = [
|
||||
*pre_transform,
|
||||
dict(
|
||||
type='Mosaic',
|
||||
img_scale=img_scale,
|
||||
pad_val=114.0,
|
||||
pre_transform=pre_transform),
|
||||
dict(
|
||||
type='mmdet.RandomAffine',
|
||||
scaling_ratio_range=scaling_ratio_range, # note
|
||||
# img_scale is (width, height)
|
||||
border=(-img_scale[0] // 2, -img_scale[1] // 2)),
|
||||
dict(type='mmdet.YOLOXHSVRandomAug'),
|
||||
dict(type='mmdet.RandomFlip', prob=0.5),
|
||||
dict(
|
||||
type='mmdet.FilterAnnotations',
|
||||
min_gt_bbox_wh=(1, 1),
|
||||
keep_empty=False),
|
||||
dict(
|
||||
type='mmdet.PackDetInputs',
|
||||
meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 'flip',
|
||||
'flip_direction'))
|
||||
]
|
||||
|
||||
test_pipeline = [
|
||||
dict(type='LoadImageFromFile', file_client_args=_base_.file_client_args),
|
||||
dict(type='mmdet.Resize', scale=test_img_scale, keep_ratio=True), # note
|
||||
dict(
|
||||
type='mmdet.Pad',
|
||||
pad_to_square=True,
|
||||
pad_val=dict(img=(114.0, 114.0, 114.0))),
|
||||
dict(type='LoadAnnotations', with_bbox=True, _scope_='mmdet'),
|
||||
dict(
|
||||
type='mmdet.PackDetInputs',
|
||||
meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
|
||||
'scale_factor'))
|
||||
]
|
||||
|
||||
train_dataloader = dict(dataset=dict(pipeline=train_pipeline_stage1))
|
||||
val_dataloader = dict(dataset=dict(pipeline=test_pipeline))
|
||||
test_dataloader = val_dataloader
|
||||
|
||||
# Config for Test Time Augmentation. (TTA)
|
||||
tta_pipeline = [
|
||||
dict(type='LoadImageFromFile', file_client_args=_base_.file_client_args),
|
||||
dict(
|
||||
type='TestTimeAug',
|
||||
transforms=[
|
||||
[
|
||||
dict(type='mmdet.Resize', scale=s, keep_ratio=True)
|
||||
for s in tta_img_scales
|
||||
],
|
||||
[
|
||||
# ``RandomFlip`` must be placed before ``Pad``, otherwise
|
||||
# bounding box coordinates after flipping cannot be
|
||||
# recovered correctly.
|
||||
dict(type='mmdet.RandomFlip', prob=1.),
|
||||
dict(type='mmdet.RandomFlip', prob=0.)
|
||||
],
|
||||
[
|
||||
dict(
|
||||
type='mmdet.Pad',
|
||||
pad_to_square=True,
|
||||
pad_val=dict(img=(114.0, 114.0, 114.0))),
|
||||
],
|
||||
[
|
||||
dict(
|
||||
type='mmdet.PackDetInputs',
|
||||
meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
|
||||
'scale_factor', 'flip', 'flip_direction'))
|
||||
]
|
||||
])
|
||||
]
|
|
@ -1,8 +1,10 @@
|
|||
_base_ = './yolox_s_8xb8-300e_coco.py'
|
||||
_base_ = './yolox_s_fast_8xb8-300e_coco.py'
|
||||
|
||||
# ========================modified parameters======================
|
||||
deepen_factor = 1.33
|
||||
widen_factor = 1.25
|
||||
|
||||
# =======================Unmodified in most cases==================
|
||||
# model settings
|
||||
model = dict(
|
||||
backbone=dict(deepen_factor=deepen_factor, widen_factor=widen_factor),
|
|
@ -18,7 +18,6 @@ import mmcv
|
|||
from mmengine import Config, DictAction, MessageHub
|
||||
from mmengine.utils import ProgressBar
|
||||
|
||||
from mmyolo.utils import register_all_modules
|
||||
from mmyolo.utils.boxam_utils import (BoxAMDetectorVisualizer,
|
||||
BoxAMDetectorWrapper, DetAblationLayer,
|
||||
DetBoxScoreTarget, GradCAM,
|
||||
|
@ -47,6 +46,9 @@ IGNORE_LOSS_PARAMS = {
|
|||
'yolov6': ['loss_cls'],
|
||||
'yolox': ['loss_obj'],
|
||||
'rtmdet': ['loss_cls'],
|
||||
'yolov7': ['loss_obj'],
|
||||
'yolov8': ['loss_cls'],
|
||||
'ppyoloe': ['loss_cls'],
|
||||
}
|
||||
|
||||
# This parameter is required in some algorithms
|
||||
|
@ -177,8 +179,6 @@ def init_detector_and_visualizer(args, cfg):
|
|||
|
||||
|
||||
def main():
|
||||
register_all_modules()
|
||||
|
||||
args = parse_args()
|
||||
|
||||
# hard code
|
||||
|
|
|
@ -6,10 +6,10 @@ from typing import Sequence
|
|||
import mmcv
|
||||
from mmdet.apis import inference_detector, init_detector
|
||||
from mmengine import Config, DictAction
|
||||
from mmengine.registry import init_default_scope
|
||||
from mmengine.utils import ProgressBar
|
||||
|
||||
from mmyolo.registry import VISUALIZERS
|
||||
from mmyolo.utils import register_all_modules
|
||||
from mmyolo.utils.misc import auto_arrange_images, get_file_list
|
||||
|
||||
|
||||
|
@ -96,13 +96,12 @@ class ActivationsWrapper:
|
|||
def main():
|
||||
args = parse_args()
|
||||
|
||||
# register all modules in mmdet into the registries
|
||||
register_all_modules()
|
||||
|
||||
cfg = Config.fromfile(args.config)
|
||||
if args.cfg_options is not None:
|
||||
cfg.merge_from_dict(args.cfg_options)
|
||||
|
||||
init_default_scope(cfg.get('default_scope', 'mmyolo'))
|
||||
|
||||
channel_reduction = args.channel_reduction
|
||||
if channel_reduction == 'None':
|
||||
channel_reduction = None
|
||||
|
|
|
@ -1,14 +1,16 @@
|
|||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
import os
|
||||
from argparse import ArgumentParser
|
||||
from pathlib import Path
|
||||
|
||||
import mmcv
|
||||
from mmdet.apis import inference_detector, init_detector
|
||||
from mmengine.config import Config, ConfigDict
|
||||
from mmengine.logging import print_log
|
||||
from mmengine.utils import ProgressBar, path
|
||||
|
||||
from mmyolo.registry import VISUALIZERS
|
||||
from mmyolo.utils import register_all_modules, switch_to_deploy
|
||||
from mmyolo.utils import switch_to_deploy
|
||||
from mmyolo.utils.labelme_utils import LabelmeFormat
|
||||
from mmyolo.utils.misc import get_file_list, show_data_classes
|
||||
|
||||
|
@ -29,6 +31,10 @@ def parse_args():
|
|||
'--deploy',
|
||||
action='store_true',
|
||||
help='Switch model to deployment mode')
|
||||
parser.add_argument(
|
||||
'--tta',
|
||||
action='store_true',
|
||||
help='Whether to use test time augmentation')
|
||||
parser.add_argument(
|
||||
'--score-thr', type=float, default=0.3, help='Bbox score threshold')
|
||||
parser.add_argument(
|
||||
|
@ -50,12 +56,37 @@ def main():
|
|||
if args.to_labelme and args.show:
|
||||
raise RuntimeError('`--to-labelme` or `--show` only '
|
||||
'can choose one at the same time.')
|
||||
config = args.config
|
||||
|
||||
# register all modules in mmdet into the registries
|
||||
register_all_modules()
|
||||
if isinstance(config, (str, Path)):
|
||||
config = Config.fromfile(config)
|
||||
elif not isinstance(config, Config):
|
||||
raise TypeError('config must be a filename or Config object, '
|
||||
f'but got {type(config)}')
|
||||
if 'init_cfg' in config.model.backbone:
|
||||
config.model.backbone.init_cfg = None
|
||||
|
||||
if args.tta:
|
||||
assert 'tta_model' in config, 'Cannot find ``tta_model`` in config.' \
|
||||
" Can't use tta !"
|
||||
assert 'tta_pipeline' in config, 'Cannot find ``tta_pipeline`` ' \
|
||||
"in config. Can't use tta !"
|
||||
config.model = ConfigDict(**config.tta_model, module=config.model)
|
||||
test_data_cfg = config.test_dataloader.dataset
|
||||
while 'dataset' in test_data_cfg:
|
||||
test_data_cfg = test_data_cfg['dataset']
|
||||
|
||||
# batch_shapes_cfg will force control the size of the output image,
|
||||
# it is not compatible with tta.
|
||||
if 'batch_shapes_cfg' in test_data_cfg:
|
||||
test_data_cfg.batch_shapes_cfg = None
|
||||
test_data_cfg.pipeline = config.tta_pipeline
|
||||
|
||||
# TODO: TTA mode will error if cfg_options is not set.
|
||||
# This is an mmdet issue and needs to be fixed later.
|
||||
# build the model from a config file and a checkpoint file
|
||||
model = init_detector(args.config, args.checkpoint, device=args.device)
|
||||
model = init_detector(
|
||||
config, args.checkpoint, device=args.device, cfg_options={})
|
||||
|
||||
if args.deploy:
|
||||
switch_to_deploy(model)
|
||||
|
|
|
@ -14,10 +14,12 @@ python demo/large_image_demo.py \
|
|||
import os
|
||||
import random
|
||||
from argparse import ArgumentParser
|
||||
from pathlib import Path
|
||||
|
||||
import mmcv
|
||||
import numpy as np
|
||||
from mmdet.apis import inference_detector, init_detector
|
||||
from mmengine.config import Config, ConfigDict
|
||||
from mmengine.logging import print_log
|
||||
from mmengine.utils import ProgressBar
|
||||
|
||||
|
@ -28,7 +30,7 @@ except ImportError:
|
|||
'to install sahi first for large image inference.')
|
||||
|
||||
from mmyolo.registry import VISUALIZERS
|
||||
from mmyolo.utils import register_all_modules, switch_to_deploy
|
||||
from mmyolo.utils import switch_to_deploy
|
||||
from mmyolo.utils.large_image import merge_results_by_nms, shift_predictions
|
||||
from mmyolo.utils.misc import get_file_list
|
||||
|
||||
|
@ -50,13 +52,17 @@ def parse_args():
|
|||
'--deploy',
|
||||
action='store_true',
|
||||
help='Switch model to deployment mode')
|
||||
parser.add_argument(
|
||||
'--tta',
|
||||
action='store_true',
|
||||
help='Whether to use test time augmentation')
|
||||
parser.add_argument(
|
||||
'--score-thr', type=float, default=0.3, help='Bbox score threshold')
|
||||
parser.add_argument(
|
||||
'--patch-size', type=int, default=640, help='The size of patches')
|
||||
parser.add_argument(
|
||||
'--patch-overlap-ratio',
|
||||
type=int,
|
||||
type=float,
|
||||
default=0.25,
|
||||
help='Ratio of overlap between two patches')
|
||||
parser.add_argument(
|
||||
|
@ -90,11 +96,37 @@ def parse_args():
|
|||
def main():
|
||||
args = parse_args()
|
||||
|
||||
# register all modules in mmdet into the registries
|
||||
register_all_modules()
|
||||
config = args.config
|
||||
|
||||
if isinstance(config, (str, Path)):
|
||||
config = Config.fromfile(config)
|
||||
elif not isinstance(config, Config):
|
||||
raise TypeError('config must be a filename or Config object, '
|
||||
f'but got {type(config)}')
|
||||
if 'init_cfg' in config.model.backbone:
|
||||
config.model.backbone.init_cfg = None
|
||||
|
||||
if args.tta:
|
||||
assert 'tta_model' in config, 'Cannot find ``tta_model`` in config.' \
|
||||
" Can't use tta !"
|
||||
assert 'tta_pipeline' in config, 'Cannot find ``tta_pipeline`` ' \
|
||||
"in config. Can't use tta !"
|
||||
config.model = ConfigDict(**config.tta_model, module=config.model)
|
||||
test_data_cfg = config.test_dataloader.dataset
|
||||
while 'dataset' in test_data_cfg:
|
||||
test_data_cfg = test_data_cfg['dataset']
|
||||
|
||||
# batch_shapes_cfg will force control the size of the output image,
|
||||
# it is not compatible with tta.
|
||||
if 'batch_shapes_cfg' in test_data_cfg:
|
||||
test_data_cfg.batch_shapes_cfg = None
|
||||
test_data_cfg.pipeline = config.tta_pipeline
|
||||
|
||||
# TODO: TTA mode will error if cfg_options is not set.
|
||||
# This is an mmdet issue and needs to be fixed later.
|
||||
# build the model from a config file and a checkpoint file
|
||||
model = init_detector(args.config, args.checkpoint, device=args.device)
|
||||
model = init_detector(
|
||||
config, args.checkpoint, device=args.device, cfg_options={})
|
||||
|
||||
if args.deploy:
|
||||
switch_to_deploy(model)
|
||||
|
@ -238,7 +270,7 @@ def main():
|
|||
src_image_shape=(height, width),
|
||||
nms_cfg={
|
||||
'type': args.merge_nms_type,
|
||||
'iou_thr': args.merge_iou_thr
|
||||
'iou_threshold': args.merge_iou_thr
|
||||
})
|
||||
|
||||
visualizer.add_datasample(
|
||||
|
|
|
@ -20,7 +20,6 @@ from mmdet.apis import inference_detector, init_detector
|
|||
from mmengine.utils import track_iter_progress
|
||||
|
||||
from mmyolo.registry import VISUALIZERS
|
||||
from mmyolo.utils import register_all_modules
|
||||
|
||||
|
||||
def parse_args():
|
||||
|
@ -49,9 +48,6 @@ def main():
|
|||
('Please specify at least one operation (save/show the '
|
||||
'video) with the argument "--out" or "--show"')
|
||||
|
||||
# register all modules in mmdet into the registries
|
||||
register_all_modules()
|
||||
|
||||
# build the model from a config file and a checkpoint file
|
||||
model = init_detector(args.config, args.checkpoint, device=args.device)
|
||||
|
||||
|
|
|
@ -26,7 +26,7 @@ RUN apt-get update \
|
|||
|
||||
# Install MMEngine , MMCV and MMDet
|
||||
RUN pip install --no-cache-dir openmim && \
|
||||
mim install --no-cache-dir "mmengine>=0.3.1" "mmcv>=2.0.0rc1,<2.1.0" "mmdet>=3.0.0rc5,<3.1.0"
|
||||
mim install --no-cache-dir "mmengine>=0.6.0" "mmcv>=2.0.0rc4,<2.1.0" "mmdet>=3.0.0rc6,<3.1.0"
|
||||
|
||||
# Install MMYOLO
|
||||
RUN git clone https://github.com/open-mmlab/mmyolo.git /mmyolo && \
|
||||
|
|
|
@ -30,7 +30,7 @@ RUN wget -q https://github.com/microsoft/onnxruntime/releases/download/v${ONNXRU
|
|||
|
||||
# Install OPENMIM MMENGINE MMDET
|
||||
RUN pip install --no-cache-dir openmim \
|
||||
&& mim install --no-cache-dir "mmengine>=0.3.1" "mmdet>=3.0.0rc5,<3.1.0" \
|
||||
&& mim install --no-cache-dir "mmengine>=0.6.0" "mmdet>=3.0.0rc6,<3.1.0" \
|
||||
&& mim install --no-cache-dir opencv-python==4.5.5.64 opencv-python-headless==4.5.5.64
|
||||
|
||||
RUN git clone https://github.com/open-mmlab/mmcv.git -b 2.x mmcv \
|
||||
|
|
|
@ -0,0 +1,28 @@
|
|||
## Build Documentation
|
||||
|
||||
1. Clone MMYOLO
|
||||
|
||||
```bash
|
||||
git clone https://github.com/open-mmlab/mmyolo.git
|
||||
cd mmyolo
|
||||
```
|
||||
|
||||
2. Install the building dependencies of documentation
|
||||
|
||||
```bash
|
||||
pip install -r requirements/docs.txt
|
||||
```
|
||||
|
||||
3. Change directory to `docs/en` or `docs/zh_cn`
|
||||
|
||||
```bash
|
||||
cd docs/en # or docs/zh_cn
|
||||
```
|
||||
|
||||
4. Build documentation
|
||||
|
||||
```bash
|
||||
make html
|
||||
```
|
||||
|
||||
5. Open `_build/html/index.html` with browser
|
|
@ -0,0 +1 @@
|
|||
# MMYOLO cross-library application
|
|
@ -1,548 +0,0 @@
|
|||
# How to xxx
|
||||
|
||||
This tutorial collects answers to any `How to xxx with MMYOLO`. Feel free to update this doc if you meet new questions about `How to` and find the answers!
|
||||
|
||||
## Add plugins to the backbone network
|
||||
|
||||
Please see [Plugins](plugins.md).
|
||||
|
||||
## Apply multiple Necks
|
||||
|
||||
If you want to stack multiple Necks, you can directly set the Neck parameters in the config. MMYOLO supports concatenating multiple Necks in the form of `List`. You need to ensure that the output channel of the previous Neck matches the input channel of the next Neck. If you need to adjust the number of channels, you can insert the `mmdet.ChannelMapper` module to align the number of channels between multiple Necks. The specific configuration is as follows:
|
||||
|
||||
```python
|
||||
_base_ = './yolov5_s-v61_syncbn_8xb16-300e_coco.py'
|
||||
|
||||
deepen_factor = _base_.deepen_factor
|
||||
widen_factor = _base_.widen_factor
|
||||
model = dict(
|
||||
type='YOLODetector',
|
||||
neck=[
|
||||
dict(
|
||||
type='YOLOv5PAFPN',
|
||||
deepen_factor=deepen_factor,
|
||||
widen_factor=widen_factor,
|
||||
in_channels=[256, 512, 1024],
|
||||
out_channels=[256, 512, 1024], # The out_channels is controlled by widen_factor,so the YOLOv5PAFPN's out_channels equls to out_channels * widen_factor
|
||||
num_csp_blocks=3,
|
||||
norm_cfg=dict(type='BN', momentum=0.03, eps=0.001),
|
||||
act_cfg=dict(type='SiLU', inplace=True)),
|
||||
dict(
|
||||
type='mmdet.ChannelMapper',
|
||||
in_channels=[128, 256, 512],
|
||||
out_channels=128,
|
||||
),
|
||||
dict(
|
||||
type='mmdet.DyHead',
|
||||
in_channels=128,
|
||||
out_channels=256,
|
||||
num_blocks=2,
|
||||
# disable zero_init_offset to follow official implementation
|
||||
zero_init_offset=False)
|
||||
]
|
||||
bbox_head=dict(head_module=dict(in_channels=[512,512,512])) # The out_channels is controlled by widen_factor,so the YOLOv5HeadModuled in_channels * widen_factor equals to the last neck's out_channels
|
||||
)
|
||||
```
|
||||
|
||||
## Replace the backbone network
|
||||
|
||||
```{note}
|
||||
1. When using other backbone networks, you need to ensure that the output channels of the backbone network match the input channels of the neck network.
|
||||
2. The configuration files given below only ensure that the training will work correctly, and their training performance may not be optimal. Because some backbones require specific learning rates, optimizers, and other hyperparameters. Related contents will be added in the "Training Tips" section later.
|
||||
```
|
||||
|
||||
### Use backbone network implemented in MMYOLO
|
||||
|
||||
Suppose you want to use `YOLOv6EfficientRep` as the backbone network of `YOLOv5`, the example config is as the following:
|
||||
|
||||
```python
|
||||
_base_ = './yolov5_s-v61_syncbn_8xb16-300e_coco.py'
|
||||
|
||||
model = dict(
|
||||
backbone=dict(
|
||||
type='YOLOv6EfficientRep',
|
||||
norm_cfg=dict(type='BN', momentum=0.03, eps=0.001),
|
||||
act_cfg=dict(type='ReLU', inplace=True))
|
||||
)
|
||||
```
|
||||
|
||||
### Use backbone network implemented in other OpenMMLab repositories
|
||||
|
||||
The model registry in MMYOLO, MMDetection, MMClassification, and MMSegmentation all inherit from the root registry in MMEngine in the OpenMMLab 2.0 system, allowing these repositories to directly use modules already implemented by each other. Therefore, in MMYOLO, users can use backbone networks from MMDetection and MMClassification without reimplementation.
|
||||
|
||||
#### Use backbone network implemented in MMDetection
|
||||
|
||||
1. Suppose you want to use `ResNet-50` as the backbone network of `YOLOv5`, the example config is as the following:
|
||||
|
||||
```python
|
||||
_base_ = './yolov5_s-v61_syncbn_8xb16-300e_coco.py'
|
||||
|
||||
deepen_factor = _base_.deepen_factor
|
||||
widen_factor = 1.0
|
||||
channels = [512, 1024, 2048]
|
||||
|
||||
model = dict(
|
||||
backbone=dict(
|
||||
_delete_=True, # Delete the backbone field in _base_
|
||||
type='mmdet.ResNet', # Using ResNet from mmdet
|
||||
depth=50,
|
||||
num_stages=4,
|
||||
out_indices=(1, 2, 3),
|
||||
frozen_stages=1,
|
||||
norm_cfg=dict(type='BN', requires_grad=True),
|
||||
norm_eval=True,
|
||||
style='pytorch',
|
||||
init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
|
||||
neck=dict(
|
||||
type='YOLOv5PAFPN',
|
||||
widen_factor=widen_factor,
|
||||
in_channels=channels, # Note: The 3 channels of ResNet-50 output are [512, 1024, 2048], which do not match the original yolov5-s neck and need to be changed.
|
||||
out_channels=channels),
|
||||
bbox_head=dict(
|
||||
type='YOLOv5Head',
|
||||
head_module=dict(
|
||||
type='YOLOv5HeadModule',
|
||||
in_channels=channels, # input channels of head need to be changed accordingly
|
||||
widen_factor=widen_factor))
|
||||
)
|
||||
```
|
||||
|
||||
2. Suppose you want to use `SwinTransformer-Tiny` as the backbone network of `YOLOv5`, the example config is as the following:
|
||||
|
||||
```python
|
||||
_base_ = './yolov5_s-v61_syncbn_8xb16-300e_coco.py'
|
||||
|
||||
deepen_factor = _base_.deepen_factor
|
||||
widen_factor = 1.0
|
||||
channels = [192, 384, 768]
|
||||
checkpoint_file = 'https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_tiny_patch4_window7_224.pth' # noqa
|
||||
|
||||
model = dict(
|
||||
backbone=dict(
|
||||
_delete_=True, # Delete the backbone field in _base_
|
||||
type='mmdet.SwinTransformer', # Using SwinTransformer from mmdet
|
||||
embed_dims=96,
|
||||
depths=[2, 2, 6, 2],
|
||||
num_heads=[3, 6, 12, 24],
|
||||
window_size=7,
|
||||
mlp_ratio=4,
|
||||
qkv_bias=True,
|
||||
qk_scale=None,
|
||||
drop_rate=0.,
|
||||
attn_drop_rate=0.,
|
||||
drop_path_rate=0.2,
|
||||
patch_norm=True,
|
||||
out_indices=(1, 2, 3),
|
||||
with_cp=False,
|
||||
convert_weights=True,
|
||||
init_cfg=dict(type='Pretrained', checkpoint=checkpoint_file)),
|
||||
neck=dict(
|
||||
type='YOLOv5PAFPN',
|
||||
deepen_factor=deepen_factor,
|
||||
widen_factor=widen_factor,
|
||||
in_channels=channels, # Note: The 3 channels of SwinTransformer-Tiny output are [192, 384, 768], which do not match the original yolov5-s neck and need to be changed.
|
||||
out_channels=channels),
|
||||
bbox_head=dict(
|
||||
type='YOLOv5Head',
|
||||
head_module=dict(
|
||||
type='YOLOv5HeadModule',
|
||||
in_channels=channels, # input channels of head need to be changed accordingly
|
||||
widen_factor=widen_factor))
|
||||
)
|
||||
```
|
||||
|
||||
#### Use backbone network implemented in MMClassification
|
||||
|
||||
1. Suppose you want to use `ConvNeXt-Tiny` as the backbone network of `YOLOv5`, the example config is as the following:
|
||||
|
||||
```python
|
||||
_base_ = './yolov5_s-v61_syncbn_8xb16-300e_coco.py'
|
||||
|
||||
# please run the command, mim install "mmcls>=1.0.0rc2", to install mmcls
|
||||
# import mmcls.models to trigger register_module in mmcls
|
||||
custom_imports = dict(imports=['mmcls.models'], allow_failed_imports=False)
|
||||
checkpoint_file = 'https://download.openmmlab.com/mmclassification/v0/convnext/downstream/convnext-tiny_3rdparty_32xb128-noema_in1k_20220301-795e9634.pth' # noqa
|
||||
deepen_factor = _base_.deepen_factor
|
||||
widen_factor = 1.0
|
||||
channels = [192, 384, 768]
|
||||
|
||||
model = dict(
|
||||
backbone=dict(
|
||||
_delete_=True, # Delete the backbone field in _base_
|
||||
type='mmcls.ConvNeXt', # Using ConvNeXt from mmcls
|
||||
arch='tiny',
|
||||
out_indices=(1, 2, 3),
|
||||
drop_path_rate=0.4,
|
||||
layer_scale_init_value=1.0,
|
||||
gap_before_final_norm=False,
|
||||
init_cfg=dict(
|
||||
type='Pretrained', checkpoint=checkpoint_file,
|
||||
prefix='backbone.')), # The pre-trained weights of backbone network in MMCls have prefix='backbone.'. The prefix in the keys will be removed so that these weights can be normally loaded.
|
||||
neck=dict(
|
||||
type='YOLOv5PAFPN',
|
||||
deepen_factor=deepen_factor,
|
||||
widen_factor=widen_factor,
|
||||
in_channels=channels, # Note: The 3 channels of ConvNeXt-Tiny output are [192, 384, 768], which do not match the original yolov5-s neck and need to be changed.
|
||||
out_channels=channels),
|
||||
bbox_head=dict(
|
||||
type='YOLOv5Head',
|
||||
head_module=dict(
|
||||
type='YOLOv5HeadModule',
|
||||
in_channels=channels, # input channels of head need to be changed accordingly
|
||||
widen_factor=widen_factor))
|
||||
)
|
||||
```
|
||||
|
||||
2. Suppose you want to use `MobileNetV3-small` as the backbone network of `YOLOv5`, the example config is as the following:
|
||||
|
||||
```python
|
||||
_base_ = './yolov5_s-v61_syncbn_8xb16-300e_coco.py'
|
||||
|
||||
# please run the command, mim install "mmcls>=1.0.0rc2", to install mmcls
|
||||
# import mmcls.models to trigger register_module in mmcls
|
||||
custom_imports = dict(imports=['mmcls.models'], allow_failed_imports=False)
|
||||
checkpoint_file = 'https://download.openmmlab.com/mmclassification/v0/mobilenet_v3/convert/mobilenet_v3_small-8427ecf0.pth' # noqa
|
||||
deepen_factor = _base_.deepen_factor
|
||||
widen_factor = 1.0
|
||||
channels = [24, 48, 96]
|
||||
|
||||
model = dict(
|
||||
backbone=dict(
|
||||
_delete_=True, # Delete the backbone field in _base_
|
||||
type='mmcls.MobileNetV3', # Using MobileNetV3 from mmcls
|
||||
arch='small',
|
||||
out_indices=(3, 8, 11), # Modify out_indices
|
||||
init_cfg=dict(
|
||||
type='Pretrained',
|
||||
checkpoint=checkpoint_file,
|
||||
prefix='backbone.')), # The pre-trained weights of backbone network in MMCls have prefix='backbone.'. The prefix in the keys will be removed so that these weights can be normally loaded.
|
||||
neck=dict(
|
||||
type='YOLOv5PAFPN',
|
||||
deepen_factor=deepen_factor,
|
||||
widen_factor=widen_factor,
|
||||
in_channels=channels, # Note: The 3 channels of MobileNetV3 output are [24, 48, 96], which do not match the original yolov5-s neck and need to be changed.
|
||||
out_channels=channels),
|
||||
bbox_head=dict(
|
||||
type='YOLOv5Head',
|
||||
head_module=dict(
|
||||
type='YOLOv5HeadModule',
|
||||
in_channels=channels, # input channels of head need to be changed accordingly
|
||||
widen_factor=widen_factor))
|
||||
)
|
||||
```
|
||||
|
||||
#### Use backbone network in `timm` through MMClassification
|
||||
|
||||
MMClassification also provides a wrapper for the Py**T**orch **Im**age **M**odels (`timm`) backbone network, users can directly use the backbone network in `timm` through MMClassification. Suppose you want to use `EfficientNet-B1` as the backbone network of `YOLOv5`, the example config is as the following:
|
||||
|
||||
```python
|
||||
_base_ = './yolov5_s-v61_syncbn_8xb16-300e_coco.py'
|
||||
|
||||
# please run the command, mim install "mmcls>=1.0.0rc2", to install mmcls
|
||||
# and the command, pip install timm, to install timm
|
||||
# import mmcls.models to trigger register_module in mmcls
|
||||
custom_imports = dict(imports=['mmcls.models'], allow_failed_imports=False)
|
||||
|
||||
deepen_factor = _base_.deepen_factor
|
||||
widen_factor = 1.0
|
||||
channels = [40, 112, 320]
|
||||
|
||||
model = dict(
|
||||
backbone=dict(
|
||||
_delete_=True, # Delete the backbone field in _base_
|
||||
type='mmcls.TIMMBackbone', # Using timm from mmcls
|
||||
model_name='efficientnet_b1', # Using efficientnet_b1 in timm
|
||||
features_only=True,
|
||||
pretrained=True,
|
||||
out_indices=(2, 3, 4)),
|
||||
neck=dict(
|
||||
type='YOLOv5PAFPN',
|
||||
deepen_factor=deepen_factor,
|
||||
widen_factor=widen_factor,
|
||||
in_channels=channels, # Note: The 3 channels of EfficientNet-B1 output are [40, 112, 320], which do not match the original yolov5-s neck and need to be changed.
|
||||
out_channels=channels),
|
||||
bbox_head=dict(
|
||||
type='YOLOv5Head',
|
||||
head_module=dict(
|
||||
type='YOLOv5HeadModule',
|
||||
in_channels=channels, # input channels of head need to be changed accordingly
|
||||
widen_factor=widen_factor))
|
||||
)
|
||||
```
|
||||
|
||||
#### Use backbone network implemented in MMSelfSup
|
||||
|
||||
Suppose you want to use `ResNet-50` which is self-supervised trained by `MoCo v3` in MMSelfSup as the backbone network of `YOLOv5`, the example config is as the following:
|
||||
|
||||
```python
|
||||
_base_ = './yolov5_s-v61_syncbn_8xb16-300e_coco.py'
|
||||
|
||||
# please run the command, mim install "mmselfsup>=1.0.0rc3", to install mmselfsup
|
||||
# import mmselfsup.models to trigger register_module in mmselfsup
|
||||
custom_imports = dict(imports=['mmselfsup.models'], allow_failed_imports=False)
|
||||
checkpoint_file = 'https://download.openmmlab.com/mmselfsup/1.x/mocov3/mocov3_resnet50_8xb512-amp-coslr-800e_in1k/mocov3_resnet50_8xb512-amp-coslr-800e_in1k_20220927-e043f51a.pth' # noqa
|
||||
deepen_factor = _base_.deepen_factor
|
||||
widen_factor = 1.0
|
||||
channels = [512, 1024, 2048]
|
||||
|
||||
model = dict(
|
||||
backbone=dict(
|
||||
_delete_=True, # Delete the backbone field in _base_
|
||||
type='mmselfsup.ResNet',
|
||||
depth=50,
|
||||
num_stages=4,
|
||||
out_indices=(2, 3, 4), # Note: out_indices of ResNet in MMSelfSup are 1 larger than those in MMdet and MMCls
|
||||
frozen_stages=1,
|
||||
norm_cfg=dict(type='BN', requires_grad=True),
|
||||
norm_eval=True,
|
||||
style='pytorch',
|
||||
init_cfg=dict(type='Pretrained', checkpoint=checkpoint_file)),
|
||||
neck=dict(
|
||||
type='YOLOv5PAFPN',
|
||||
deepen_factor=deepen_factor,
|
||||
widen_factor=widen_factor,
|
||||
in_channels=channels, # Note: The 3 channels of ResNet-50 output are [512, 1024, 2048], which do not match the original yolov5-s neck and need to be changed.
|
||||
out_channels=channels),
|
||||
bbox_head=dict(
|
||||
type='YOLOv5Head',
|
||||
head_module=dict(
|
||||
type='YOLOv5HeadModule',
|
||||
in_channels=channels, # input channels of head need to be changed accordingly
|
||||
widen_factor=widen_factor))
|
||||
)
|
||||
```
|
||||
|
||||
#### Don't used pre-training weights
|
||||
|
||||
When we replace the backbone network, the model initialization is trained by default loading the pre-training weight of the backbone network. Instead of using the pre-training weights of the backbone network, if you want to train the time model from scratch,
|
||||
You can set `init_cfg` in 'backbone' to 'None'. In this case, the backbone network will be initialized with the default initialization method, instead of using the trained pre-training weight.
|
||||
|
||||
```python
|
||||
_base_ = './yolov5_s-v61_syncbn_8xb16-300e_coco.py'
|
||||
|
||||
deepen_factor = _base_.deepen_factor
|
||||
widen_factor = 1.0
|
||||
channels = [512, 1024, 2048]
|
||||
|
||||
model = dict(
|
||||
backbone=dict(
|
||||
_delete_=True, # Delete the backbone field in _base_
|
||||
type='mmdet.ResNet', # Using ResNet from mmdet
|
||||
depth=50,
|
||||
num_stages=4,
|
||||
out_indices=(1, 2, 3),
|
||||
frozen_stages=1,
|
||||
norm_cfg=dict(type='BN', requires_grad=True),
|
||||
norm_eval=True,
|
||||
style='pytorch',
|
||||
init_cfg=None # If init_cfg is set to None, backbone will not be initialized with pre-trained weights
|
||||
),
|
||||
neck=dict(
|
||||
type='YOLOv5PAFPN',
|
||||
widen_factor=widen_factor,
|
||||
in_channels=channels, # Note: The 3 channels of ResNet-50 output are [512, 1024, 2048], which do not match the original yolov5-s neck and need to be changed.
|
||||
out_channels=channels),
|
||||
bbox_head=dict(
|
||||
type='YOLOv5Head',
|
||||
head_module=dict(
|
||||
type='YOLOv5HeadModule',
|
||||
in_channels=channels, # input channels of head need to be changed accordingly
|
||||
widen_factor=widen_factor))
|
||||
)
|
||||
```
|
||||
|
||||
#### Freeze the weight of backbone or neck
|
||||
|
||||
In MMYOLO, we can freeze some `stages` of the backbone network by setting `frozen_stages` parameters, so that these `stage` parameters do not participate in model updating.
|
||||
It should be noted that `frozen_stages = i` means that all parameters from the initial `stage` to the `i`<sup>th</sup> `stage` will be frozen. The following is an example of `YOLOv5`. Other algorithms are the same logic.
|
||||
|
||||
```python
|
||||
_base_ = './yolov5_s-v61_syncbn_8xb16-300e_coco.py'
|
||||
|
||||
model = dict(
|
||||
backbone=dict(
|
||||
frozen_stages=1 # Indicates that the parameters in the first stage and all stages before it are frozen
|
||||
))
|
||||
```
|
||||
|
||||
In addition, it's able to freeze the whole `neck` with the parameter `freeze_all` in MMYOLO. The following is an example of `YOLOv5`. Other algorithms are the same logic.
|
||||
|
||||
```python
|
||||
_base_ = './yolov5_s-v61_syncbn_8xb16-300e_coco.py'
|
||||
|
||||
model = dict(
|
||||
neck=dict(
|
||||
freeze_all=True # If freeze_all=True, all parameters of the neck will be frozen
|
||||
))
|
||||
```
|
||||
|
||||
## Output prediction results
|
||||
|
||||
If you want to save the prediction results as a specific file for offline evaluation, MMYOLO currently supports both json and pkl formats.
|
||||
|
||||
```{note}
|
||||
The json file only save `image_id`, `bbox`, `score` and `category_id`. The json file can be read using the json library.
|
||||
The pkl file holds more content than the json file, and also holds information such as the file name and size of the predicted image; the pkl file can be read using the pickle library. The pkl file can be read using the pickle library.
|
||||
```
|
||||
|
||||
### Output into json file
|
||||
|
||||
If you want to output the prediction results as a json file, the command is as follows.
|
||||
|
||||
```shell
|
||||
python tools/test.py {path_to_config} {path_to_checkpoint} --json-prefix {json_prefix}
|
||||
```
|
||||
|
||||
The argument after `--json-prefix` should be a filename prefix (no need to enter the `.json` suffix) and can also contain a path. For a concrete example:
|
||||
|
||||
```shell
|
||||
python tools/test.py configs\yolov5\yolov5_s-v61_syncbn_8xb16-300e_coco.py yolov5_s-v61_syncbn_fast_8xb16-300e_coco_20220918_084700-86e02187.pth --json-prefix work_dirs/demo/json_demo
|
||||
```
|
||||
|
||||
Running the above command will output the `json_demo.bbox.json` file in the `work_dirs/demo` folder.
|
||||
|
||||
### Output into pkl file
|
||||
|
||||
If you want to output the prediction results as a pkl file, the command is as follows.
|
||||
|
||||
```shell
|
||||
python tools/test.py {path_to_config} {path_to_checkpoint} --out {path_to_output_file}
|
||||
```
|
||||
|
||||
The argument after `--out` should be a full filename (**must be** with a `.pkl` or `.pickle` suffix) and can also contain a path. For a concrete example:
|
||||
|
||||
```shell
|
||||
python tools/test.py configs\yolov5\yolov5_s-v61_syncbn_8xb16-300e_coco.py yolov5_s-v61_syncbn_fast_8xb16-300e_coco_20220918_084700-86e02187.pth --out work_dirs/demo/pkl_demo.pkl
|
||||
```
|
||||
|
||||
Running the above command will output the `pkl_demo.pkl` file in the `work_dirs/demo` folder.
|
||||
|
||||
## Use mim to run scripts from other OpenMMLab repositories
|
||||
|
||||
```{note}
|
||||
1. All script calls across libraries are currently not supported and are being fixed. More examples will be added to this document when the fix is complete. 2.
|
||||
2. mAP plotting and average training speed calculation are fixed in the MMDetection dev-3.x branch, which currently needs to be installed via the source code to be run successfully.
|
||||
```
|
||||
|
||||
## Log Analysis
|
||||
|
||||
#### Curve plotting
|
||||
|
||||
`tools/analysis_tools/analyze_logs.py` plots loss/mAP curves given a training log file. Run `pip install seaborn` first to install the dependency.
|
||||
|
||||
```shell
|
||||
mim run mmdet analyze_logs plot_curve \
|
||||
${LOG} \ # path of train log in json format
|
||||
[--keys ${KEYS}] \ # the metric that you want to plot, default to 'bbox_mAP'
|
||||
[--start-epoch ${START_EPOCH}] # the epoch that you want to start, default to 1
|
||||
[--eval-interval ${EVALUATION_INTERVAL}] \ # the evaluation interval when training, default to 1
|
||||
[--title ${TITLE}] \ # title of figure
|
||||
[--legend ${LEGEND}] \ # legend of each plot, default to None
|
||||
[--backend ${BACKEND}] \ # backend of plt, default to None
|
||||
[--style ${STYLE}] \ # style of plt, default to 'dark'
|
||||
[--out ${OUT_FILE}] # the path of output file
|
||||
# [] stands for optional parameters, when actually entering the command line, you do not need to enter []
|
||||
```
|
||||
|
||||
Examples:
|
||||
|
||||
- Plot the classification loss of some run.
|
||||
|
||||
```shell
|
||||
mim run mmdet analyze_logs plot_curve \
|
||||
yolov5_s-v61_syncbn_fast_8xb16-300e_coco_20220918_084700.log.json \
|
||||
--keys loss_cls \
|
||||
--legend loss_cls
|
||||
```
|
||||
|
||||
<img src="https://user-images.githubusercontent.com/27466624/204747359-754555df-1f97-4d5c-87ca-9ad3a0badcce.png" width="600"/>
|
||||
|
||||
- Plot the classification and regression loss of some run, and save the figure to a pdf.
|
||||
|
||||
```shell
|
||||
mim run mmdet analyze_logs plot_curve \
|
||||
yolov5_s-v61_syncbn_fast_8xb16-300e_coco_20220918_084700.log.json \
|
||||
--keys loss_cls loss_bbox \
|
||||
--legend loss_cls loss_bbox \
|
||||
--out losses_yolov5_s.pdf
|
||||
```
|
||||
|
||||
<img src="https://user-images.githubusercontent.com/27466624/204748560-2d17ce4b-fb5f-4732-a962-329109e73aad.png" width="600"/>
|
||||
|
||||
- Compare the bbox mAP of two runs in the same figure.
|
||||
|
||||
```shell
|
||||
mim run mmdet analyze_logs plot_curve \
|
||||
yolov5_s-v61_syncbn_fast_8xb16-300e_coco_20220918_084700.log.json \
|
||||
yolov5_n-v61_syncbn_fast_8xb16-300e_coco_20220919_090739.log.json \
|
||||
--keys bbox_mAP \
|
||||
--legend yolov5_s yolov5_n \
|
||||
--eval-interval 10 # Note that the evaluation interval must be the same as during training. Otherwise, it will raise an error.
|
||||
```
|
||||
|
||||
<img src="https://user-images.githubusercontent.com/27466624/204748704-21db9f9e-386e-449c-91c7-2ce3f8b51f24.png" width="600"/>
|
||||
|
||||
#### Compute the average training speed
|
||||
|
||||
```shell
|
||||
mim run mmdet analyze_logs cal_train_time \
|
||||
${LOG} \ # path of train log in json format
|
||||
[--include-outliers] # include the first value of every epoch when computing the average time
|
||||
```
|
||||
|
||||
Examples:
|
||||
|
||||
```shell
|
||||
mim run mmdet analyze_logs cal_train_time \
|
||||
yolov5_s-v61_syncbn_fast_8xb16-300e_coco_20220918_084700.log.json
|
||||
```
|
||||
|
||||
The output is expected to be like the following.
|
||||
|
||||
```text
|
||||
-----Analyze train time of yolov5_s-v61_syncbn_fast_8xb16-300e_coco_20220918_084700.log.json-----
|
||||
slowest epoch 278, average time is 0.1705 s/iter
|
||||
fastest epoch 300, average time is 0.1510 s/iter
|
||||
time std over epochs is 0.0026
|
||||
average iter time: 0.1556 s/iter
|
||||
```
|
||||
|
||||
### Print the whole config
|
||||
|
||||
`print_config.py` in MMDetection prints the whole config verbatim, expanding all its imports. The command is as following.
|
||||
|
||||
```shell
|
||||
mim run mmdet print_config \
|
||||
${CONFIG} \ # path of the config file
|
||||
[--save-path] \ # save path of whole config, suffixed with .py, .json or .yml
|
||||
[--cfg-options ${OPTIONS [OPTIONS...]}] # override some settings in the used config
|
||||
```
|
||||
|
||||
Examples:
|
||||
|
||||
```shell
|
||||
mim run mmdet print_config \
|
||||
configs/yolov5/yolov5_s-v61_syncbn_fast_1xb4-300e_balloon.py \
|
||||
--save-path ./work_dirs/yolov5_s-v61_syncbn_fast_1xb4-300e_balloon.py
|
||||
```
|
||||
|
||||
Running the above command will save the `yolov5_s-v61_syncbn_fast_1xb4-300e_balloon.py` config file with the inheritance relationship expanded to \`\`yolov5_s-v61_syncbn_fast_1xb4-300e_balloon_whole.py`in the`./work_dirs\` folder.
|
||||
|
||||
## Set the random seed
|
||||
|
||||
If you want to set the random seed during training, you can use the following command.
|
||||
|
||||
```shell
|
||||
python ./tools/train.py \
|
||||
${CONFIG} \ # path of the config file
|
||||
--cfg-options randomness.seed=2023 \ # set seed to 2023
|
||||
[randomness.diff_rank_seed=True] \ # set different seeds according to global rank
|
||||
[randomness.deterministic=True] # set the deterministic option for CUDNN backend
|
||||
# [] stands for optional parameters, when actually entering the command line, you do not need to enter []
|
||||
```
|
||||
|
||||
`randomness` has three parameters that can be set, with the following meanings.
|
||||
|
||||
- `randomness.seed=2023`, set the random seed to 2023.
|
||||
- `randomness.diff_rank_seed=True`, set different seeds according to global rank. Defaults to False.
|
||||
- `randomness.deterministic=True`, set the deterministic option for cuDNN backend, i.e., set `torch.backends.cudnn.deterministic` to True and `torch.backends.cudnn.benchmark` to False. Defaults to False. See https://pytorch.org/docs/stable/notes/randomness.html for more details.
|
|
@ -1,25 +0,0 @@
|
|||
Data flow
|
||||
************************
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 1
|
||||
|
||||
data_flow.md
|
||||
|
||||
|
||||
How to
|
||||
************************
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 1
|
||||
|
||||
how_to.md
|
||||
|
||||
|
||||
Plugins
|
||||
************************
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 1
|
||||
|
||||
plugins.md
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue