Add benchmark tools & Reorgnazie configs
parent
5ddfed5040
commit
5bf1eca4e4
.dev_scripts
configs
_base_
models/arch_settings/mobilenet
distill
nas
mmcls
pruning
autoslim
tests
tools
model_converters
|
@ -0,0 +1,250 @@
|
|||
import argparse
|
||||
import os
|
||||
import os.path as osp
|
||||
import re
|
||||
from collections import OrderedDict
|
||||
from pathlib import Path
|
||||
|
||||
import mmcv
|
||||
import wget
|
||||
from modelindex.load_model_index import load
|
||||
from rich.console import Console
|
||||
from rich.syntax import Syntax
|
||||
from rich.table import Table
|
||||
|
||||
console = Console()
|
||||
MMRAZOR_ROOT = Path(__file__).absolute().parents[1]
|
||||
|
||||
METRIC_MAPPINGS = {
|
||||
'accuracy/top1': 'Top 1 Accuracy',
|
||||
'accuracy/top5': 'Top 5 Accuracy'
|
||||
}
|
||||
|
||||
|
||||
def parse_args():
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Test all models' accuracy in model-index.yml")
|
||||
parser.add_argument(
|
||||
'partition', type=str, help='Cluster partition to use.')
|
||||
parser.add_argument('checkpoint_root', help='Checkpoint file root path.')
|
||||
parser.add_argument(
|
||||
'--job-name',
|
||||
type=str,
|
||||
default='razor-test-benchmark',
|
||||
help='Slurm job name prefix')
|
||||
parser.add_argument('--port', type=int, default=29666, help='dist port')
|
||||
parser.add_argument(
|
||||
'--models', nargs='+', type=str, help='Specify model names to run.')
|
||||
parser.add_argument('--gpus', type=int, default=8, help='num gpus')
|
||||
parser.add_argument(
|
||||
'--work-dir',
|
||||
default='work_dirs/benchmark_test',
|
||||
help='the dir to save metric')
|
||||
parser.add_argument(
|
||||
'--run', action='store_true', help='run script directly')
|
||||
parser.add_argument(
|
||||
'--summary', action='store_true', help='collect results')
|
||||
parser.add_argument(
|
||||
'--local',
|
||||
action='store_true',
|
||||
help='run at local instead of cluster.')
|
||||
parser.add_argument(
|
||||
'--mail', type=str, help='Mail address to watch test status.')
|
||||
parser.add_argument(
|
||||
'--mail-type',
|
||||
nargs='+',
|
||||
default=['BEGIN'],
|
||||
choices=['NONE', 'BEGIN', 'END', 'FAIL', 'REQUEUE', 'ALL'],
|
||||
help='Mail address to watch test status.')
|
||||
parser.add_argument(
|
||||
'--quotatype',
|
||||
default=None,
|
||||
choices=['reserved', 'auto', 'spot'],
|
||||
help='Quota type, only available for phoenix-slurm>=0.2')
|
||||
|
||||
args = parser.parse_args()
|
||||
return args
|
||||
|
||||
|
||||
def create_test_job_batch(commands, model_info, args, port):
|
||||
|
||||
fname = model_info.name
|
||||
|
||||
config = Path(model_info.config)
|
||||
# assert config.exists(), f'{fname}: {config} not found.'
|
||||
|
||||
http_prefix = 'https://download.openmmlab.com/mmrazor/'
|
||||
if 's3://' in args.checkpoint_root:
|
||||
from mmcv.fileio import FileClient
|
||||
from petrel_client.common.exception import AccessDeniedError
|
||||
file_client = FileClient.infer_client(uri=args.checkpoint_root)
|
||||
checkpoint = file_client.join_path(
|
||||
args.checkpoint_root, model_info.weights[len(http_prefix):])
|
||||
|
||||
try:
|
||||
exists = file_client.exists(checkpoint)
|
||||
except AccessDeniedError:
|
||||
exists = False
|
||||
else:
|
||||
checkpoint_root = Path(args.checkpoint_root)
|
||||
checkpoint = checkpoint_root / model_info.weights[len(http_prefix):]
|
||||
checkpoint.parent.mkdir(parents=True, exist_ok=True)
|
||||
exists = checkpoint.exists()
|
||||
if exists:
|
||||
print(f'{checkpoint} already exists.')
|
||||
else:
|
||||
wget.download(model_info.weights, str(checkpoint))
|
||||
print(f'\nSaved in {checkpoint}.')
|
||||
|
||||
job_name = f'{args.job_name}_{fname}'
|
||||
work_dir = Path(args.work_dir) / fname
|
||||
work_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
if args.quotatype is not None:
|
||||
quota_cfg = f'#SBATCH --quotatype {args.quotatype}\n'
|
||||
else:
|
||||
quota_cfg = ''
|
||||
|
||||
launcher = 'none' if args.local else 'slurm'
|
||||
runner = 'python' if args.local else 'srun python'
|
||||
master_port = f'NASTER_PORT={port}'
|
||||
|
||||
script_name = osp.join('tools', 'test.py')
|
||||
job_script = (
|
||||
f'#!/bin/bash\n'
|
||||
f'#SBATCH --output {work_dir}/job.%j.out\n'
|
||||
f'#SBATCH --partition={args.partition}\n'
|
||||
f'#SBATCH --job-name {job_name}\n'
|
||||
f'#SBATCH --gres=gpu:{args.gpus}\n'
|
||||
f'{quota_cfg}'
|
||||
f'#SBATCH --ntasks-per-node={args.gpus}\n'
|
||||
f'#SBATCH --ntasks={args.gpus}\n'
|
||||
f'#SBATCH --cpus-per-task=5\n\n'
|
||||
f'{master_port} {runner} -u {script_name} {config} {checkpoint} '
|
||||
f'--work-dir {work_dir} '
|
||||
f'--launcher={launcher}\n')
|
||||
|
||||
with open(work_dir / 'job.sh', 'w') as f:
|
||||
f.write(job_script)
|
||||
|
||||
commands.append(f'echo "{config}"')
|
||||
if args.local:
|
||||
commands.append(f'bash {work_dir}/job.sh')
|
||||
else:
|
||||
commands.append(f'sbatch {work_dir}/job.sh')
|
||||
|
||||
return work_dir / 'job.sh'
|
||||
|
||||
|
||||
def summary(args):
|
||||
# parse model-index.yml
|
||||
model_index_file = MMRAZOR_ROOT / 'model-index.yml'
|
||||
model_index = load(str(model_index_file))
|
||||
model_index.build_models_with_collections()
|
||||
models = OrderedDict({model.name: model for model in model_index.models})
|
||||
|
||||
if args.models:
|
||||
patterns = [re.compile(pattern) for pattern in args.models]
|
||||
filter_models = {}
|
||||
for k, v in models.items():
|
||||
if any([re.match(pattern, k) for pattern in patterns]):
|
||||
filter_models[k] = v
|
||||
if len(filter_models) == 0:
|
||||
print('No model found, please specify models in:')
|
||||
print('\n'.join(models.keys()))
|
||||
return
|
||||
models = filter_models
|
||||
|
||||
model_results = dict()
|
||||
for model_info in models.values():
|
||||
model_name = model_info.name
|
||||
work_dir = Path(args.work_dir) / model_name
|
||||
sub_dirs = [p.name for p in work_dir.iterdir() if p.is_dir()]
|
||||
|
||||
if len(sub_dirs) == 0:
|
||||
print(f'{model_name} has no results.')
|
||||
continue
|
||||
|
||||
latest_time = sub_dirs[-1]
|
||||
latest_json = work_dir / latest_time / f'{latest_time}.json'
|
||||
|
||||
if not latest_json.exists():
|
||||
print(f'{model_name} has no results.')
|
||||
continue
|
||||
latest_result = mmcv.load(latest_json, 'json')
|
||||
|
||||
expect_result = model_info.results[0].metrics
|
||||
summary_result = {
|
||||
'expect': expect_result,
|
||||
'actual':
|
||||
{METRIC_MAPPINGS[k]: v
|
||||
for k, v in latest_result.items()}
|
||||
}
|
||||
model_results[model_name] = summary_result
|
||||
|
||||
mmcv.fileio.dump(model_results,
|
||||
Path(args.work_dir) / 'summary.yml', 'yaml')
|
||||
print(f'Summary results saved in {Path(args.work_dir)}/summary.yml')
|
||||
|
||||
|
||||
def test(args):
|
||||
# parse model-index.yml
|
||||
model_index_file = MMRAZOR_ROOT / 'model-index.yml'
|
||||
model_index = load(str(model_index_file))
|
||||
model_index.build_models_with_collections()
|
||||
models = OrderedDict({model.name: model for model in model_index.models})
|
||||
|
||||
commands = []
|
||||
if args.models:
|
||||
patterns = [re.compile(pattern) for pattern in args.models]
|
||||
filter_models = {}
|
||||
for k, v in models.items():
|
||||
if any([re.match(pattern, k) for pattern in patterns]):
|
||||
filter_models[k] = v
|
||||
if len(filter_models) == 0:
|
||||
print('No model found, please specify models in:')
|
||||
print('\n'.join(models.keys()))
|
||||
return
|
||||
models = filter_models
|
||||
|
||||
preview_script = ''
|
||||
port = args.port
|
||||
for model_info in models.values():
|
||||
script_path = create_test_job_batch(commands, model_info, args, port)
|
||||
preview_script = script_path or preview_script
|
||||
port += 1
|
||||
command_str = '\n'.join(commands)
|
||||
|
||||
preview = Table()
|
||||
preview.add_column(str(preview_script))
|
||||
preview.add_column('Shell command preview')
|
||||
preview.add_row(
|
||||
Syntax.from_path(
|
||||
preview_script,
|
||||
background_color='default',
|
||||
line_numbers=True,
|
||||
word_wrap=True),
|
||||
Syntax(
|
||||
command_str,
|
||||
'bash',
|
||||
background_color='default',
|
||||
line_numbers=True,
|
||||
word_wrap=True))
|
||||
console.print(preview)
|
||||
|
||||
if args.run:
|
||||
os.system(command_str)
|
||||
else:
|
||||
console.print('Please set "--run" to start the job')
|
||||
|
||||
|
||||
def main():
|
||||
args = parse_args()
|
||||
if args.summary:
|
||||
summary(args)
|
||||
else:
|
||||
test(args)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
|
@ -0,0 +1,224 @@
|
|||
import argparse
|
||||
import os
|
||||
import os.path as osp
|
||||
import re
|
||||
from collections import OrderedDict
|
||||
from pathlib import Path
|
||||
|
||||
import mmcv
|
||||
from modelindex.load_model_index import load
|
||||
from rich.console import Console
|
||||
from rich.syntax import Syntax
|
||||
from rich.table import Table
|
||||
|
||||
console = Console()
|
||||
MMRAZOR_ROOT = Path(__file__).absolute().parents[1]
|
||||
|
||||
METRIC_MAPPINGS = {
|
||||
'accuracy/top1': 'Top 1 Accuracy',
|
||||
'accuracy/top5': 'Top 5 Accuracy'
|
||||
}
|
||||
|
||||
|
||||
def parse_args():
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Test all models' accuracy in model-index.yml")
|
||||
parser.add_argument(
|
||||
'partition', type=str, help='Cluster partition to use.')
|
||||
parser.add_argument(
|
||||
'--job-name',
|
||||
type=str,
|
||||
default='razor-train-benchmark',
|
||||
help='Slurm job name prefix')
|
||||
parser.add_argument('--port', type=int, default=29666, help='dist port')
|
||||
parser.add_argument(
|
||||
'--models', nargs='+', type=str, help='Specify model names to run.')
|
||||
parser.add_argument('--gpus', type=int, default=8, help='num gpus')
|
||||
parser.add_argument(
|
||||
'--work-dir',
|
||||
default='work_dirs/benchmark_test',
|
||||
help='the dir to save metric')
|
||||
parser.add_argument(
|
||||
'--run', action='store_true', help='run script directly')
|
||||
parser.add_argument(
|
||||
'--summary', action='store_true', help='collect results')
|
||||
parser.add_argument(
|
||||
'--local',
|
||||
action='store_true',
|
||||
help='run at local instead of cluster.')
|
||||
parser.add_argument(
|
||||
'--mail', type=str, help='Mail address to watch test status.')
|
||||
parser.add_argument(
|
||||
'--mail-type',
|
||||
nargs='+',
|
||||
default=['BEGIN'],
|
||||
choices=['NONE', 'BEGIN', 'END', 'FAIL', 'REQUEUE', 'ALL'],
|
||||
help='Mail address to watch test status.')
|
||||
parser.add_argument(
|
||||
'--quotatype',
|
||||
default=None,
|
||||
choices=['reserved', 'auto', 'spot'],
|
||||
help='Quota type, only available for phoenix-slurm>=0.2')
|
||||
|
||||
args = parser.parse_args()
|
||||
return args
|
||||
|
||||
|
||||
def create_train_job_batch(commands, model_info, args, port):
|
||||
|
||||
fname = model_info.name
|
||||
|
||||
config = Path(model_info.config)
|
||||
# assert config.exists(), f'{fname}: {config} not found.'
|
||||
|
||||
job_name = f'{args.job_name}_{fname}'
|
||||
work_dir = Path(args.work_dir) / fname
|
||||
work_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
if args.quotatype is not None:
|
||||
quota_cfg = f'#SBATCH --quotatype {args.quotatype}\n'
|
||||
else:
|
||||
quota_cfg = ''
|
||||
|
||||
launcher = 'none' if args.local else 'slurm'
|
||||
runner = 'python' if args.local else 'srun python'
|
||||
master_port = f'NASTER_PORT={port}'
|
||||
|
||||
script_name = osp.join('tools', 'train.py')
|
||||
job_script = (f'#!/bin/bash\n'
|
||||
f'#SBATCH --output {work_dir}/job.%j.out\n'
|
||||
f'#SBATCH --partition={args.partition}\n'
|
||||
f'#SBATCH --job-name {job_name}\n'
|
||||
f'#SBATCH --gres=gpu:{args.gpus}\n'
|
||||
f'{quota_cfg}'
|
||||
f'#SBATCH --ntasks-per-node={args.gpus}\n'
|
||||
f'#SBATCH --ntasks={args.gpus}\n'
|
||||
f'#SBATCH --cpus-per-task=5\n\n'
|
||||
f'{master_port} {runner} -u {script_name} {config} '
|
||||
f'--work-dir {work_dir} '
|
||||
f'--launcher={launcher}\n')
|
||||
|
||||
with open(work_dir / 'job.sh', 'w') as f:
|
||||
f.write(job_script)
|
||||
|
||||
commands.append(f'echo "{config}"')
|
||||
if args.local:
|
||||
commands.append(f'bash {work_dir}/job.sh')
|
||||
else:
|
||||
commands.append(f'sbatch {work_dir}/job.sh')
|
||||
|
||||
return work_dir / 'job.sh'
|
||||
|
||||
|
||||
def summary(args):
|
||||
# parse model-index.yml
|
||||
model_index_file = MMRAZOR_ROOT / 'model-index.yml'
|
||||
model_index = load(str(model_index_file))
|
||||
model_index.build_models_with_collections()
|
||||
models = OrderedDict({model.name: model for model in model_index.models})
|
||||
|
||||
if args.models:
|
||||
patterns = [re.compile(pattern) for pattern in args.models]
|
||||
filter_models = {}
|
||||
for k, v in models.items():
|
||||
if any([re.match(pattern, k) for pattern in patterns]):
|
||||
filter_models[k] = v
|
||||
if len(filter_models) == 0:
|
||||
print('No model found, please specify models in:')
|
||||
print('\n'.join(models.keys()))
|
||||
return
|
||||
models = filter_models
|
||||
|
||||
model_results = dict()
|
||||
for model_info in models.values():
|
||||
model_name = model_info.name
|
||||
work_dir = Path(args.work_dir) / model_name
|
||||
sub_dirs = [p.name for p in work_dir.iterdir() if p.is_dir()]
|
||||
|
||||
if len(sub_dirs) == 0:
|
||||
print(f'{model_name} has no results.')
|
||||
continue
|
||||
|
||||
latest_time = sub_dirs[-1]
|
||||
latest_json = work_dir / latest_time / f'{latest_time}.json'
|
||||
|
||||
if not latest_json.exists():
|
||||
print(f'{model_name} has no results.')
|
||||
continue
|
||||
latest_result = mmcv.load(latest_json, 'json')
|
||||
|
||||
expect_result = model_info.results[0].metrics
|
||||
summary_result = {
|
||||
'expect': expect_result,
|
||||
'actual':
|
||||
{METRIC_MAPPINGS[k]: v
|
||||
for k, v in latest_result.items()}
|
||||
}
|
||||
model_results[model_name] = summary_result
|
||||
|
||||
mmcv.fileio.dump(model_results,
|
||||
Path(args.work_dir) / 'summary.yml', 'yaml')
|
||||
print(f'Summary results saved in {Path(args.work_dir)}/summary.yml')
|
||||
|
||||
|
||||
def train(args):
|
||||
# parse model-index.yml
|
||||
model_index_file = MMRAZOR_ROOT / 'model-index.yml'
|
||||
model_index = load(str(model_index_file))
|
||||
model_index.build_models_with_collections()
|
||||
models = OrderedDict({model.name: model for model in model_index.models})
|
||||
|
||||
commands = []
|
||||
if args.models:
|
||||
patterns = [re.compile(pattern) for pattern in args.models]
|
||||
filter_models = {}
|
||||
for k, v in models.items():
|
||||
if any([re.match(pattern, k) for pattern in patterns]):
|
||||
filter_models[k] = v
|
||||
if len(filter_models) == 0:
|
||||
print('No model found, please specify models in:')
|
||||
print('\n'.join(models.keys()))
|
||||
return
|
||||
models = filter_models
|
||||
|
||||
preview_script = ''
|
||||
port = args.port
|
||||
for model_info in models.values():
|
||||
script_path = create_train_job_batch(commands, model_info, args, port)
|
||||
preview_script = script_path or preview_script
|
||||
port += 1
|
||||
command_str = '\n'.join(commands)
|
||||
|
||||
preview = Table()
|
||||
preview.add_column(str(preview_script))
|
||||
preview.add_column('Shell command preview')
|
||||
preview.add_row(
|
||||
Syntax.from_path(
|
||||
preview_script,
|
||||
background_color='default',
|
||||
line_numbers=True,
|
||||
word_wrap=True),
|
||||
Syntax(
|
||||
command_str,
|
||||
'bash',
|
||||
background_color='default',
|
||||
line_numbers=True,
|
||||
word_wrap=True))
|
||||
console.print(preview)
|
||||
|
||||
if args.run:
|
||||
os.system(command_str)
|
||||
else:
|
||||
console.print('Please set "--run" to start the job')
|
||||
|
||||
|
||||
def main():
|
||||
args = parse_args()
|
||||
if args.summary:
|
||||
summary(args)
|
||||
else:
|
||||
train(args)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
|
@ -1,76 +0,0 @@
|
|||
se_cfg = dict(
|
||||
ratio=4,
|
||||
divisor=1,
|
||||
act_cfg=(dict(type='HSwish'),
|
||||
dict(
|
||||
type='HSigmoid', bias=3, divisor=6, min_value=0,
|
||||
max_value=1)))
|
||||
|
||||
_FIRST_STAGE_MUTABLE = dict(
|
||||
type='OneShotMutableOP',
|
||||
candidates=dict(
|
||||
mb_k3e4_se=dict(
|
||||
type='MBBlock',
|
||||
kernel_size=3,
|
||||
expand_ratio=4,
|
||||
se_cfg=se_cfg,
|
||||
norm_cfg=dict(type='BN'),
|
||||
act_cfg=dict(type='HSwish'))))
|
||||
|
||||
_OTHER_STAGE_MUTABLE = dict(
|
||||
type='OneShotMutableOP',
|
||||
candidates=dict(
|
||||
mb_k3e4_se=dict(
|
||||
type='MBBlock',
|
||||
kernel_size=3,
|
||||
expand_ratio=4,
|
||||
se_cfg=se_cfg,
|
||||
norm_cfg=dict(type='BN'),
|
||||
act_cfg=dict(type='HSwish')),
|
||||
mb_k3e6_se=dict(
|
||||
type='MBBlock',
|
||||
kernel_size=3,
|
||||
expand_ratio=6,
|
||||
se_cfg=se_cfg,
|
||||
norm_cfg=dict(type='BN'),
|
||||
act_cfg=dict(type='HSwish')),
|
||||
mb_k5e4_se=dict(
|
||||
type='MBBlock',
|
||||
kernel_size=5,
|
||||
expand_ratio=4,
|
||||
se_cfg=se_cfg,
|
||||
norm_cfg=dict(type='BN'),
|
||||
act_cfg=dict(type='HSwish')),
|
||||
mb_k5e6_se=dict(
|
||||
type='MBBlock',
|
||||
kernel_size=5,
|
||||
expand_ratio=6,
|
||||
se_cfg=se_cfg,
|
||||
norm_cfg=dict(type='BN'),
|
||||
act_cfg=dict(type='HSwish')),
|
||||
mb_k7e4_se=dict(
|
||||
type='MBBlock',
|
||||
kernel_size=7,
|
||||
expand_ratio=4,
|
||||
se_cfg=se_cfg,
|
||||
norm_cfg=dict(type='BN'),
|
||||
act_cfg=dict(type='HSwish')),
|
||||
mb_k7e6_se=dict(
|
||||
type='MBBlock',
|
||||
kernel_size=7,
|
||||
expand_ratio=6,
|
||||
se_cfg=se_cfg,
|
||||
norm_cfg=dict(type='BN'),
|
||||
act_cfg=dict(type='HSwish'))))
|
||||
|
||||
arch_setting = [
|
||||
# Parameters to build layers. 4 parameters are needed to construct a
|
||||
# layer, from left to right: channel, num_blocks, stride, mutable cfg.
|
||||
[16, 1, 1, _FIRST_STAGE_MUTABLE],
|
||||
[24, 1, 2, _OTHER_STAGE_MUTABLE],
|
||||
[40, 2, 2, _OTHER_STAGE_MUTABLE],
|
||||
[80, 2, 2, _OTHER_STAGE_MUTABLE],
|
||||
[96, 3, 1, _OTHER_STAGE_MUTABLE],
|
||||
[192, 2, 2, _OTHER_STAGE_MUTABLE],
|
||||
[320, 1, 1, _OTHER_STAGE_MUTABLE]
|
||||
]
|
|
@ -1,76 +0,0 @@
|
|||
se_cfg = dict(
|
||||
ratio=4,
|
||||
divisor=1,
|
||||
act_cfg=(dict(type='HSwish'),
|
||||
dict(
|
||||
type='HSigmoid', bias=3, divisor=6, min_value=0,
|
||||
max_value=1)))
|
||||
|
||||
_FIRST_STAGE_MUTABLE = dict(
|
||||
type='OneShotMutableOP',
|
||||
candidates=dict(
|
||||
mb_k3e4_se=dict(
|
||||
type='MBBlock',
|
||||
kernel_size=3,
|
||||
expand_ratio=4,
|
||||
se_cfg=se_cfg,
|
||||
norm_cfg=dict(type='BN'),
|
||||
act_cfg=dict(type='HSwish'))))
|
||||
|
||||
_OTHER_STAGE_MUTABLE = dict(
|
||||
type='OneShotMutableOP',
|
||||
candidates=dict(
|
||||
mb_k3e4_se=dict(
|
||||
type='MBBlock',
|
||||
kernel_size=3,
|
||||
expand_ratio=4,
|
||||
se_cfg=se_cfg,
|
||||
norm_cfg=dict(type='BN'),
|
||||
act_cfg=dict(type='HSwish')),
|
||||
mb_k3e6_se=dict(
|
||||
type='MBBlock',
|
||||
kernel_size=3,
|
||||
expand_ratio=6,
|
||||
se_cfg=se_cfg,
|
||||
norm_cfg=dict(type='BN'),
|
||||
act_cfg=dict(type='HSwish')),
|
||||
mb_k5e4_se=dict(
|
||||
type='MBBlock',
|
||||
kernel_size=5,
|
||||
expand_ratio=4,
|
||||
se_cfg=se_cfg,
|
||||
norm_cfg=dict(type='BN'),
|
||||
act_cfg=dict(type='HSwish')),
|
||||
mb_k5e6_se=dict(
|
||||
type='MBBlock',
|
||||
kernel_size=5,
|
||||
expand_ratio=6,
|
||||
se_cfg=se_cfg,
|
||||
norm_cfg=dict(type='BN'),
|
||||
act_cfg=dict(type='HSwish')),
|
||||
mb_k7e4_se=dict(
|
||||
type='MBBlock',
|
||||
kernel_size=7,
|
||||
expand_ratio=4,
|
||||
se_cfg=se_cfg,
|
||||
norm_cfg=dict(type='BN'),
|
||||
act_cfg=dict(type='HSwish')),
|
||||
mb_k7e6_se=dict(
|
||||
type='MBBlock',
|
||||
kernel_size=7,
|
||||
expand_ratio=6,
|
||||
se_cfg=se_cfg,
|
||||
norm_cfg=dict(type='BN'),
|
||||
act_cfg=dict(type='HSwish'))))
|
||||
|
||||
arch_setting = [
|
||||
# Parameters to build layers. 4 parameters are needed to construct a
|
||||
# layer, from left to right: channel, num_blocks, stride, mutable cfg.
|
||||
[16, 1, 1, _FIRST_STAGE_MUTABLE],
|
||||
[24, 1, 2, _OTHER_STAGE_MUTABLE],
|
||||
[40, 2, 2, _OTHER_STAGE_MUTABLE],
|
||||
[80, 2, 2, _OTHER_STAGE_MUTABLE],
|
||||
[96, 1, 1, _OTHER_STAGE_MUTABLE],
|
||||
[192, 1, 2, _OTHER_STAGE_MUTABLE],
|
||||
[320, 1, 1, _OTHER_STAGE_MUTABLE]
|
||||
]
|
|
@ -1,76 +0,0 @@
|
|||
se_cfg = dict(
|
||||
ratio=4,
|
||||
divisor=1,
|
||||
act_cfg=(dict(type='HSwish'),
|
||||
dict(
|
||||
type='HSigmoid', bias=3, divisor=6, min_value=0,
|
||||
max_value=1)))
|
||||
|
||||
_FIRST_STAGE_MUTABLE = dict(
|
||||
type='OneShotMutableOP',
|
||||
candidates=dict(
|
||||
mb_k3e4_se=dict(
|
||||
type='MBBlock',
|
||||
kernel_size=3,
|
||||
expand_ratio=4,
|
||||
se_cfg=se_cfg,
|
||||
norm_cfg=dict(type='BN'),
|
||||
act_cfg=dict(type='HSwish'))))
|
||||
|
||||
_OTHER_STAGE_MUTABLE = dict(
|
||||
type='OneShotMutableOP',
|
||||
candidates=dict(
|
||||
mb_k3e4_se=dict(
|
||||
type='MBBlock',
|
||||
kernel_size=3,
|
||||
expand_ratio=4,
|
||||
se_cfg=se_cfg,
|
||||
norm_cfg=dict(type='BN'),
|
||||
act_cfg=dict(type='HSwish')),
|
||||
mb_k3e6_se=dict(
|
||||
type='MBBlock',
|
||||
kernel_size=3,
|
||||
expand_ratio=6,
|
||||
se_cfg=se_cfg,
|
||||
norm_cfg=dict(type='BN'),
|
||||
act_cfg=dict(type='HSwish')),
|
||||
mb_k5e4_se=dict(
|
||||
type='MBBlock',
|
||||
kernel_size=5,
|
||||
expand_ratio=4,
|
||||
se_cfg=se_cfg,
|
||||
norm_cfg=dict(type='BN'),
|
||||
act_cfg=dict(type='HSwish')),
|
||||
mb_k5e6_se=dict(
|
||||
type='MBBlock',
|
||||
kernel_size=5,
|
||||
expand_ratio=6,
|
||||
se_cfg=se_cfg,
|
||||
norm_cfg=dict(type='BN'),
|
||||
act_cfg=dict(type='HSwish')),
|
||||
mb_k7e4_se=dict(
|
||||
type='MBBlock',
|
||||
kernel_size=7,
|
||||
expand_ratio=4,
|
||||
se_cfg=se_cfg,
|
||||
norm_cfg=dict(type='BN'),
|
||||
act_cfg=dict(type='HSwish')),
|
||||
mb_k7e6_se=dict(
|
||||
type='MBBlock',
|
||||
kernel_size=7,
|
||||
expand_ratio=6,
|
||||
se_cfg=se_cfg,
|
||||
norm_cfg=dict(type='BN'),
|
||||
act_cfg=dict(type='HSwish'))))
|
||||
|
||||
arch_setting = [
|
||||
# Parameters to build layers. 4 parameters are needed to construct a
|
||||
# layer, from left to right: channel, num_blocks, stride, mutable cfg.
|
||||
[16, 1, 1, _FIRST_STAGE_MUTABLE],
|
||||
[24, 1, 2, _OTHER_STAGE_MUTABLE],
|
||||
[40, 2, 2, _OTHER_STAGE_MUTABLE],
|
||||
[80, 3, 2, _OTHER_STAGE_MUTABLE],
|
||||
[96, 4, 1, _OTHER_STAGE_MUTABLE],
|
||||
[192, 3, 2, _OTHER_STAGE_MUTABLE],
|
||||
[320, 1, 1, _OTHER_STAGE_MUTABLE]
|
||||
]
|
|
@ -1,76 +0,0 @@
|
|||
se_cfg = dict(
|
||||
ratio=4,
|
||||
divisor=1,
|
||||
act_cfg=(dict(type='HSwish'),
|
||||
dict(
|
||||
type='HSigmoid', bias=3, divisor=6, min_value=0,
|
||||
max_value=1)))
|
||||
|
||||
_FIRST_STAGE_MUTABLE = dict(
|
||||
type='OneShotMutableOP',
|
||||
candidates=dict(
|
||||
mb_k3e4_se=dict(
|
||||
type='MBBlock',
|
||||
kernel_size=3,
|
||||
expand_ratio=4,
|
||||
se_cfg=se_cfg,
|
||||
norm_cfg=dict(type='BN'),
|
||||
act_cfg=dict(type='HSwish'))))
|
||||
|
||||
_OTHER_STAGE_MUTABLE = dict(
|
||||
type='OneShotMutableOP',
|
||||
candidates=dict(
|
||||
mb_k3e4_se=dict(
|
||||
type='MBBlock',
|
||||
kernel_size=3,
|
||||
expand_ratio=4,
|
||||
se_cfg=se_cfg,
|
||||
norm_cfg=dict(type='BN'),
|
||||
act_cfg=dict(type='HSwish')),
|
||||
mb_k3e6_se=dict(
|
||||
type='MBBlock',
|
||||
kernel_size=3,
|
||||
expand_ratio=6,
|
||||
se_cfg=se_cfg,
|
||||
norm_cfg=dict(type='BN'),
|
||||
act_cfg=dict(type='HSwish')),
|
||||
mb_k5e4_se=dict(
|
||||
type='MBBlock',
|
||||
kernel_size=5,
|
||||
expand_ratio=4,
|
||||
se_cfg=se_cfg,
|
||||
norm_cfg=dict(type='BN'),
|
||||
act_cfg=dict(type='HSwish')),
|
||||
mb_k5e6_se=dict(
|
||||
type='MBBlock',
|
||||
kernel_size=5,
|
||||
expand_ratio=6,
|
||||
se_cfg=se_cfg,
|
||||
norm_cfg=dict(type='BN'),
|
||||
act_cfg=dict(type='HSwish')),
|
||||
mb_k7e4_se=dict(
|
||||
type='MBBlock',
|
||||
kernel_size=7,
|
||||
expand_ratio=4,
|
||||
se_cfg=se_cfg,
|
||||
norm_cfg=dict(type='BN'),
|
||||
act_cfg=dict(type='HSwish')),
|
||||
mb_k7e6_se=dict(
|
||||
type='MBBlock',
|
||||
kernel_size=7,
|
||||
expand_ratio=6,
|
||||
se_cfg=se_cfg,
|
||||
norm_cfg=dict(type='BN'),
|
||||
act_cfg=dict(type='HSwish'))))
|
||||
|
||||
arch_setting = [
|
||||
# Parameters to build layers. 4 parameters are needed to construct a
|
||||
# layer, from left to right: channel, num_blocks, stride, mutable cfg.
|
||||
[16, 1, 1, _FIRST_STAGE_MUTABLE],
|
||||
[24, 1, 2, _OTHER_STAGE_MUTABLE],
|
||||
[40, 2, 2, _OTHER_STAGE_MUTABLE],
|
||||
[80, 2, 2, _OTHER_STAGE_MUTABLE],
|
||||
[96, 3, 1, _OTHER_STAGE_MUTABLE],
|
||||
[192, 2, 2, _OTHER_STAGE_MUTABLE],
|
||||
[320, 1, 1, _OTHER_STAGE_MUTABLE]
|
||||
]
|
|
@ -1,76 +0,0 @@
|
|||
se_cfg = dict(
|
||||
ratio=4,
|
||||
divisor=1,
|
||||
act_cfg=(dict(type='HSwish'),
|
||||
dict(
|
||||
type='HSigmoid', bias=3, divisor=6, min_value=0,
|
||||
max_value=1)))
|
||||
|
||||
_FIRST_STAGE_MUTABLE = dict(
|
||||
type='OneShotMutableOP',
|
||||
candidates=dict(
|
||||
mb_k3e4_se=dict(
|
||||
type='MBBlock',
|
||||
kernel_size=3,
|
||||
expand_ratio=4,
|
||||
se_cfg=se_cfg,
|
||||
norm_cfg=dict(type='BN'),
|
||||
act_cfg=dict(type='HSwish'))))
|
||||
|
||||
_OTHER_STAGE_MUTABLE = dict(
|
||||
type='OneShotMutableOP',
|
||||
candidates=dict(
|
||||
mb_k3e4_se=dict(
|
||||
type='MBBlock',
|
||||
kernel_size=3,
|
||||
expand_ratio=4,
|
||||
se_cfg=se_cfg,
|
||||
norm_cfg=dict(type='BN'),
|
||||
act_cfg=dict(type='HSwish')),
|
||||
mb_k3e6_se=dict(
|
||||
type='MBBlock',
|
||||
kernel_size=3,
|
||||
expand_ratio=6,
|
||||
se_cfg=se_cfg,
|
||||
norm_cfg=dict(type='BN'),
|
||||
act_cfg=dict(type='HSwish')),
|
||||
mb_k5e4_se=dict(
|
||||
type='MBBlock',
|
||||
kernel_size=5,
|
||||
expand_ratio=4,
|
||||
se_cfg=se_cfg,
|
||||
norm_cfg=dict(type='BN'),
|
||||
act_cfg=dict(type='HSwish')),
|
||||
mb_k5e6_se=dict(
|
||||
type='MBBlock',
|
||||
kernel_size=5,
|
||||
expand_ratio=6,
|
||||
se_cfg=se_cfg,
|
||||
norm_cfg=dict(type='BN'),
|
||||
act_cfg=dict(type='HSwish')),
|
||||
mb_k7e4_se=dict(
|
||||
type='MBBlock',
|
||||
kernel_size=7,
|
||||
expand_ratio=4,
|
||||
se_cfg=se_cfg,
|
||||
norm_cfg=dict(type='BN'),
|
||||
act_cfg=dict(type='HSwish')),
|
||||
mb_k7e6_se=dict(
|
||||
type='MBBlock',
|
||||
kernel_size=7,
|
||||
expand_ratio=6,
|
||||
se_cfg=se_cfg,
|
||||
norm_cfg=dict(type='BN'),
|
||||
act_cfg=dict(type='HSwish'))))
|
||||
|
||||
arch_setting = [
|
||||
# Parameters to build layers. 4 parameters are needed to construct a
|
||||
# layer, from left to right: channel, num_blocks, stride, mutable cfg.
|
||||
[16, 1, 1, _FIRST_STAGE_MUTABLE],
|
||||
[24, 4, 2, _OTHER_STAGE_MUTABLE],
|
||||
[40, 4, 2, _OTHER_STAGE_MUTABLE],
|
||||
[80, 5, 2, _OTHER_STAGE_MUTABLE],
|
||||
[96, 4, 1, _OTHER_STAGE_MUTABLE],
|
||||
[192, 4, 2, _OTHER_STAGE_MUTABLE],
|
||||
[320, 1, 1, _OTHER_STAGE_MUTABLE]
|
||||
]
|
|
@ -1,76 +0,0 @@
|
|||
se_cfg = dict(
|
||||
ratio=4,
|
||||
divisor=1,
|
||||
act_cfg=(dict(type='HSwish'),
|
||||
dict(
|
||||
type='HSigmoid', bias=3, divisor=6, min_value=0,
|
||||
max_value=1)))
|
||||
|
||||
_FIRST_STAGE_MUTABLE = dict(
|
||||
type='OneShotMutableOP',
|
||||
candidates=dict(
|
||||
mb_k3e4_se=dict(
|
||||
type='MBBlock',
|
||||
kernel_size=3,
|
||||
expand_ratio=4,
|
||||
se_cfg=se_cfg,
|
||||
norm_cfg=dict(type='BN'),
|
||||
act_cfg=dict(type='HSwish'))))
|
||||
|
||||
_OTHER_STAGE_MUTABLE = dict(
|
||||
type='OneShotMutableOP',
|
||||
candidates=dict(
|
||||
mb_k3e4_se=dict(
|
||||
type='MBBlock',
|
||||
kernel_size=3,
|
||||
expand_ratio=4,
|
||||
se_cfg=se_cfg,
|
||||
norm_cfg=dict(type='BN'),
|
||||
act_cfg=dict(type='HSwish')),
|
||||
mb_k3e6_se=dict(
|
||||
type='MBBlock',
|
||||
kernel_size=3,
|
||||
expand_ratio=6,
|
||||
se_cfg=se_cfg,
|
||||
norm_cfg=dict(type='BN'),
|
||||
act_cfg=dict(type='HSwish')),
|
||||
mb_k5e4_se=dict(
|
||||
type='MBBlock',
|
||||
kernel_size=5,
|
||||
expand_ratio=4,
|
||||
se_cfg=se_cfg,
|
||||
norm_cfg=dict(type='BN'),
|
||||
act_cfg=dict(type='HSwish')),
|
||||
mb_k5e6_se=dict(
|
||||
type='MBBlock',
|
||||
kernel_size=5,
|
||||
expand_ratio=6,
|
||||
se_cfg=se_cfg,
|
||||
norm_cfg=dict(type='BN'),
|
||||
act_cfg=dict(type='HSwish')),
|
||||
mb_k7e4_se=dict(
|
||||
type='MBBlock',
|
||||
kernel_size=7,
|
||||
expand_ratio=4,
|
||||
se_cfg=se_cfg,
|
||||
norm_cfg=dict(type='BN'),
|
||||
act_cfg=dict(type='HSwish')),
|
||||
mb_k7e6_se=dict(
|
||||
type='MBBlock',
|
||||
kernel_size=7,
|
||||
expand_ratio=6,
|
||||
se_cfg=se_cfg,
|
||||
norm_cfg=dict(type='BN'),
|
||||
act_cfg=dict(type='HSwish'))))
|
||||
|
||||
arch_setting = [
|
||||
# Parameters to build layers. 4 parameters are needed to construct a
|
||||
# layer, from left to right: channel, num_blocks, stride, mutable cfg.
|
||||
[16, 1, 1, _FIRST_STAGE_MUTABLE],
|
||||
[24, 5, 2, _OTHER_STAGE_MUTABLE],
|
||||
[40, 5, 2, _OTHER_STAGE_MUTABLE],
|
||||
[80, 5, 2, _OTHER_STAGE_MUTABLE],
|
||||
[96, 6, 1, _OTHER_STAGE_MUTABLE],
|
||||
[192, 6, 2, _OTHER_STAGE_MUTABLE],
|
||||
[320, 1, 1, _OTHER_STAGE_MUTABLE]
|
||||
]
|
|
@ -0,0 +1,65 @@
|
|||
_STAGE_MUTABLE = dict(
|
||||
_scope_='mmrazor',
|
||||
type='OneShotMutableOP',
|
||||
candidates=dict(
|
||||
mb_k3e3=dict(
|
||||
type='MBBlock',
|
||||
kernel_size=3,
|
||||
expand_ratio=3,
|
||||
act_cfg=dict(type='ReLU6')),
|
||||
mb_k5e3=dict(
|
||||
type='MBBlock',
|
||||
kernel_size=5,
|
||||
expand_ratio=3,
|
||||
act_cfg=dict(type='ReLU6')),
|
||||
mb_k7e3=dict(
|
||||
type='MBBlock',
|
||||
kernel_size=7,
|
||||
expand_ratio=3,
|
||||
act_cfg=dict(type='ReLU6')),
|
||||
mb_k3e6=dict(
|
||||
type='MBBlock',
|
||||
kernel_size=3,
|
||||
expand_ratio=6,
|
||||
act_cfg=dict(type='ReLU6')),
|
||||
mb_k5e6=dict(
|
||||
type='MBBlock',
|
||||
kernel_size=5,
|
||||
expand_ratio=6,
|
||||
act_cfg=dict(type='ReLU6')),
|
||||
mb_k7e6=dict(
|
||||
type='MBBlock',
|
||||
kernel_size=7,
|
||||
expand_ratio=6,
|
||||
act_cfg=dict(type='ReLU6')),
|
||||
identity=dict(type='Identity')))
|
||||
|
||||
_FIRST_MUTABLE = dict(
|
||||
_scope_='mmrazor',
|
||||
type='OneShotMutableOP',
|
||||
candidates=dict(
|
||||
mb_k3e1=dict(
|
||||
type='MBBlock',
|
||||
kernel_size=3,
|
||||
expand_ratio=1,
|
||||
act_cfg=dict(type='ReLU6'))))
|
||||
|
||||
arch_setting = [
|
||||
# Parameters to build layers. 3 parameters are needed to construct a
|
||||
# layer, from left to right: channel, num_blocks, mutable_cfg.
|
||||
[24, 1, 1, _FIRST_MUTABLE],
|
||||
[32, 4, 2, _STAGE_MUTABLE],
|
||||
[56, 4, 2, _STAGE_MUTABLE],
|
||||
[112, 4, 2, _STAGE_MUTABLE],
|
||||
[128, 4, 1, _STAGE_MUTABLE],
|
||||
[256, 4, 2, _STAGE_MUTABLE],
|
||||
[432, 1, 1, _STAGE_MUTABLE]
|
||||
]
|
||||
|
||||
nas_backbone = dict(
|
||||
_scope_='mmrazor',
|
||||
type='SearchableMobileNet',
|
||||
first_channels=40,
|
||||
last_channels=1728,
|
||||
widen_factor=1.0,
|
||||
arch_setting=arch_setting)
|
|
@ -0,0 +1,23 @@
|
|||
_STAGE_MUTABLE = dict(
|
||||
_scope_='mmrazor',
|
||||
type='OneShotMutableOP',
|
||||
candidates=dict(
|
||||
shuffle_3x3=dict(type='ShuffleBlock', kernel_size=3),
|
||||
shuffle_5x5=dict(type='ShuffleBlock', kernel_size=5),
|
||||
shuffle_7x7=dict(type='ShuffleBlock', kernel_size=7),
|
||||
shuffle_xception=dict(type='ShuffleXception')))
|
||||
|
||||
arch_setting = [
|
||||
# Parameters to build layers. 3 parameters are needed to construct a
|
||||
# layer, from left to right: channel, num_blocks, mutable_cfg.
|
||||
[64, 4, _STAGE_MUTABLE],
|
||||
[160, 4, _STAGE_MUTABLE],
|
||||
[320, 8, _STAGE_MUTABLE],
|
||||
[640, 4, _STAGE_MUTABLE]
|
||||
]
|
||||
|
||||
nas_backbone = dict(
|
||||
_scope_='mmrazor',
|
||||
type='SearchableShuffleNetV2',
|
||||
widen_factor=1.0,
|
||||
arch_setting=arch_setting)
|
|
@ -0,0 +1,80 @@
|
|||
# dataset settings
|
||||
dataset_type = 'mmcls.ImageNet'
|
||||
preprocess_cfg = dict(
|
||||
# RGB format normalization parameters
|
||||
mean=[123.675, 116.28, 103.53],
|
||||
std=[58.395, 57.12, 57.375],
|
||||
# convert image from BGR to RGB
|
||||
to_rgb=True,
|
||||
)
|
||||
|
||||
train_pipeline = [
|
||||
dict(type='mmcls.LoadImageFromFile'),
|
||||
dict(type='mmcls.RandomResizedCrop', scale=224),
|
||||
dict(
|
||||
type='mmcls.ColorJitter', brightness=0.4, contrast=0.4,
|
||||
saturation=0.4),
|
||||
dict(type='mmcls.RandomFlip', prob=0.5, direction='horizontal'),
|
||||
dict(type='mmcls.PackClsInputs'),
|
||||
]
|
||||
|
||||
test_pipeline = [
|
||||
dict(type='mmcls.LoadImageFromFile'),
|
||||
dict(type='mmcls.ResizeEdge', scale=256, edge='short'),
|
||||
dict(type='mmcls.CenterCrop', crop_size=224),
|
||||
dict(type='mmcls.PackClsInputs'),
|
||||
]
|
||||
|
||||
train_dataloader = dict(
|
||||
batch_size=128,
|
||||
num_workers=4,
|
||||
dataset=dict(
|
||||
type=dataset_type,
|
||||
data_root='data/imagenet',
|
||||
ann_file='meta/train.txt',
|
||||
data_prefix='train',
|
||||
pipeline=train_pipeline),
|
||||
sampler=dict(type='mmcls.DefaultSampler', shuffle=True),
|
||||
persistent_workers=True,
|
||||
)
|
||||
|
||||
val_dataloader = dict(
|
||||
batch_size=128,
|
||||
num_workers=4,
|
||||
dataset=dict(
|
||||
type=dataset_type,
|
||||
data_root='data/imagenet',
|
||||
ann_file='meta/val.txt',
|
||||
data_prefix='val',
|
||||
pipeline=test_pipeline),
|
||||
sampler=dict(type='mmcls.DefaultSampler', shuffle=False),
|
||||
persistent_workers=True,
|
||||
)
|
||||
val_evaluator = dict(type='mmcls.Accuracy', topk=(1, 5))
|
||||
|
||||
# If you want standard test, please manually configure the test dataset
|
||||
test_dataloader = val_dataloader
|
||||
test_evaluator = val_evaluator
|
||||
|
||||
# optimizer
|
||||
paramwise_cfg = dict(
|
||||
bias_decay_mult=0.0, norm_decay_mult=0.0, dwconv_decay_mult=0.0)
|
||||
|
||||
optim_wrapper = dict(
|
||||
optimizer=dict(type='mmcls.SGD', lr=0.5, momentum=0.9, weight_decay=4e-5),
|
||||
paramwise_cfg=paramwise_cfg,
|
||||
clip_grad=None)
|
||||
|
||||
# leanring policy
|
||||
param_scheduler = dict(
|
||||
type='mmcls.PolyLR',
|
||||
power=1.0,
|
||||
eta_min=0.0,
|
||||
by_epoch=True,
|
||||
end=300,
|
||||
convert_to_iter_based=True)
|
||||
|
||||
# train, val, test setting
|
||||
train_cfg = dict(by_epoch=True, max_epochs=300)
|
||||
val_cfg = dict()
|
||||
test_cfg = dict()
|
|
@ -0,0 +1,9 @@
|
|||
_base_ = [
|
||||
'./imagenet_bs1024_spos.py',
|
||||
]
|
||||
|
||||
train_dataloader = dict(batch_size=256)
|
||||
|
||||
val_dataloader = dict(batch_size=256)
|
||||
|
||||
test_dataloader = dict(batch_size=256)
|
|
@ -1,144 +0,0 @@
|
|||
_base_ = [
|
||||
'../../_base_/datasets/mmdet/coco_detection.py',
|
||||
'../../_base_/schedules/mmdet/schedule_1x.py',
|
||||
'../../_base_/mmdet_runtime.py'
|
||||
]
|
||||
|
||||
# model settings
|
||||
student = dict(
|
||||
type='mmdet.GFL',
|
||||
backbone=dict(
|
||||
type='ResNet',
|
||||
depth=50,
|
||||
num_stages=4,
|
||||
out_indices=(0, 1, 2, 3),
|
||||
frozen_stages=1,
|
||||
norm_cfg=dict(type='BN', requires_grad=True),
|
||||
norm_eval=True,
|
||||
style='pytorch',
|
||||
init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
|
||||
neck=dict(
|
||||
type='FPN',
|
||||
in_channels=[256, 512, 1024, 2048],
|
||||
out_channels=256,
|
||||
start_level=1,
|
||||
add_extra_convs='on_output',
|
||||
num_outs=5),
|
||||
bbox_head=dict(
|
||||
type='GFLHead',
|
||||
num_classes=80,
|
||||
in_channels=256,
|
||||
stacked_convs=4,
|
||||
feat_channels=256,
|
||||
anchor_generator=dict(
|
||||
type='AnchorGenerator',
|
||||
ratios=[1.0],
|
||||
octave_base_scale=8,
|
||||
scales_per_octave=1,
|
||||
strides=[8, 16, 32, 64, 128]),
|
||||
loss_cls=dict(
|
||||
type='QualityFocalLoss',
|
||||
use_sigmoid=True,
|
||||
beta=2.0,
|
||||
loss_weight=1.0),
|
||||
loss_dfl=dict(type='DistributionFocalLoss', loss_weight=0.25),
|
||||
reg_max=16,
|
||||
loss_bbox=dict(type='GIoULoss', loss_weight=2.0)),
|
||||
# training and testing settings
|
||||
train_cfg=dict(
|
||||
assigner=dict(type='ATSSAssigner', topk=9),
|
||||
allowed_border=-1,
|
||||
pos_weight=-1,
|
||||
debug=False),
|
||||
test_cfg=dict(
|
||||
nms_pre=1000,
|
||||
min_bbox_size=0,
|
||||
score_thr=0.05,
|
||||
nms=dict(type='nms', iou_threshold=0.6),
|
||||
max_per_img=100))
|
||||
|
||||
checkpoint = 'https://download.openmmlab.com/mmdetection/v2.0/gfl/gfl_r101_fpn_mstrain_2x_coco/gfl_r101_fpn_mstrain_2x_coco_20200629_200126-dd12f847.pth' # noqa: E501
|
||||
|
||||
teacher = dict(
|
||||
type='mmdet.GFL',
|
||||
init_cfg=dict(type='Pretrained', checkpoint=checkpoint),
|
||||
backbone=dict(
|
||||
type='ResNet',
|
||||
depth=101,
|
||||
num_stages=4,
|
||||
out_indices=(0, 1, 2, 3),
|
||||
frozen_stages=1,
|
||||
norm_cfg=dict(type='BN', requires_grad=True),
|
||||
norm_eval=True,
|
||||
style='pytorch',
|
||||
init_cfg=None),
|
||||
neck=dict(
|
||||
type='FPN',
|
||||
in_channels=[256, 512, 1024, 2048],
|
||||
out_channels=256,
|
||||
start_level=1,
|
||||
add_extra_convs='on_output',
|
||||
num_outs=5),
|
||||
bbox_head=dict(
|
||||
type='GFLHead',
|
||||
num_classes=80,
|
||||
in_channels=256,
|
||||
stacked_convs=4,
|
||||
feat_channels=256,
|
||||
anchor_generator=dict(
|
||||
type='AnchorGenerator',
|
||||
ratios=[1.0],
|
||||
octave_base_scale=8,
|
||||
scales_per_octave=1,
|
||||
strides=[8, 16, 32, 64, 128]),
|
||||
loss_cls=dict(
|
||||
type='QualityFocalLoss',
|
||||
use_sigmoid=True,
|
||||
beta=2.0,
|
||||
loss_weight=1.0),
|
||||
loss_dfl=dict(type='DistributionFocalLoss', loss_weight=0.25),
|
||||
reg_max=16,
|
||||
loss_bbox=dict(type='GIoULoss', loss_weight=2.0)),
|
||||
# training and testing settings
|
||||
train_cfg=dict(
|
||||
assigner=dict(type='ATSSAssigner', topk=9),
|
||||
allowed_border=-1,
|
||||
pos_weight=-1,
|
||||
debug=False),
|
||||
test_cfg=dict(
|
||||
nms_pre=1000,
|
||||
min_bbox_size=0,
|
||||
score_thr=0.05,
|
||||
nms=dict(type='nms', iou_threshold=0.6),
|
||||
max_per_img=100))
|
||||
|
||||
# algorithm setting
|
||||
algorithm = dict(
|
||||
type='GeneralDistill',
|
||||
architecture=dict(
|
||||
type='MMDetArchitecture',
|
||||
model=student,
|
||||
),
|
||||
distiller=dict(
|
||||
type='SingleTeacherDistiller',
|
||||
teacher=teacher,
|
||||
teacher_trainable=False,
|
||||
components=[
|
||||
dict(
|
||||
student_module='bbox_head.gfl_cls',
|
||||
teacher_module='bbox_head.gfl_cls',
|
||||
losses=[
|
||||
dict(
|
||||
type='ChannelWiseDivergence',
|
||||
name='loss_cwd_cls_head',
|
||||
tau=1,
|
||||
loss_weight=5,
|
||||
)
|
||||
])
|
||||
]),
|
||||
)
|
||||
|
||||
find_unused_parameters = True
|
||||
|
||||
# optimizer
|
||||
optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001)
|
|
@ -1,110 +0,0 @@
|
|||
_base_ = [
|
||||
'../../_base_/datasets/mmseg/cityscapes.py',
|
||||
'../../_base_/mmseg_runtime.py',
|
||||
'../../_base_/schedules/mmseg/schedule_80k.py'
|
||||
]
|
||||
|
||||
norm_cfg = dict(type='SyncBN', requires_grad=True)
|
||||
|
||||
# pspnet r18
|
||||
student = dict(
|
||||
type='mmseg.EncoderDecoder',
|
||||
backbone=dict(
|
||||
type='ResNetV1c',
|
||||
init_cfg=dict(
|
||||
type='Pretrained', checkpoint='open-mmlab://resnet18_v1c'),
|
||||
depth=18,
|
||||
num_stages=4,
|
||||
out_indices=(0, 1, 2, 3),
|
||||
dilations=(1, 1, 2, 4),
|
||||
strides=(1, 2, 1, 1),
|
||||
norm_cfg=norm_cfg,
|
||||
norm_eval=False,
|
||||
style='pytorch',
|
||||
contract_dilation=True),
|
||||
decode_head=dict(
|
||||
type='PSPHead',
|
||||
in_channels=512,
|
||||
in_index=3,
|
||||
channels=128,
|
||||
pool_scales=(1, 2, 3, 6),
|
||||
dropout_ratio=0.1,
|
||||
num_classes=19,
|
||||
norm_cfg=norm_cfg,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
||||
auxiliary_head=dict(
|
||||
type='FCNHead',
|
||||
in_channels=256,
|
||||
in_index=2,
|
||||
channels=64,
|
||||
num_convs=1,
|
||||
concat_input=False,
|
||||
dropout_ratio=0.1,
|
||||
num_classes=19,
|
||||
norm_cfg=norm_cfg,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
|
||||
train_cfg=dict(),
|
||||
test_cfg=dict(mode='whole'))
|
||||
|
||||
checkpoint = 'https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x1024_80k_cityscapes/pspnet_r101-d8_512x1024_80k_cityscapes_20200606_112211-e1e1100f.pth' # noqa: E501
|
||||
|
||||
# pspnet r101
|
||||
teacher = dict(
|
||||
type='mmseg.EncoderDecoder',
|
||||
init_cfg=dict(type='Pretrained', checkpoint=checkpoint),
|
||||
backbone=dict(
|
||||
type='ResNetV1c',
|
||||
depth=101,
|
||||
num_stages=4,
|
||||
out_indices=(0, 1, 2, 3),
|
||||
dilations=(1, 1, 2, 4),
|
||||
strides=(1, 2, 1, 1),
|
||||
norm_cfg=norm_cfg,
|
||||
norm_eval=False,
|
||||
style='pytorch',
|
||||
contract_dilation=True),
|
||||
decode_head=dict(
|
||||
type='PSPHead',
|
||||
in_channels=2048,
|
||||
in_index=3,
|
||||
channels=512,
|
||||
pool_scales=(1, 2, 3, 6),
|
||||
dropout_ratio=0.1,
|
||||
num_classes=19,
|
||||
norm_cfg=norm_cfg,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
||||
)
|
||||
|
||||
# algorithm setting
|
||||
algorithm = dict(
|
||||
type='GeneralDistill',
|
||||
architecture=dict(
|
||||
type='MMSegArchitecture',
|
||||
model=student,
|
||||
),
|
||||
distiller=dict(
|
||||
type='SingleTeacherDistiller',
|
||||
teacher=teacher,
|
||||
teacher_trainable=False,
|
||||
components=[
|
||||
dict(
|
||||
student_module='decode_head.conv_seg',
|
||||
teacher_module='decode_head.conv_seg',
|
||||
losses=[
|
||||
dict(
|
||||
type='ChannelWiseDivergence',
|
||||
name='loss_cwd_logits',
|
||||
tau=1,
|
||||
loss_weight=5,
|
||||
)
|
||||
])
|
||||
]),
|
||||
)
|
||||
|
||||
find_unused_parameters = True
|
|
@ -1,50 +0,0 @@
|
|||
Collections:
|
||||
- Name: CWD
|
||||
Metadata:
|
||||
Training Data:
|
||||
- Cityscapes
|
||||
- COCO
|
||||
Paper:
|
||||
URL: https://arxiv.org/abs/2011.13256
|
||||
Title: Channel-wise Knowledge Distillation for Dense Prediction
|
||||
README: configs/distill/cwd/README.md
|
||||
Code:
|
||||
URL: https://github.com/open-mmlab/mmrazor/blob/v0.1.0/mmrazor/models/losses/cwd.py#L10
|
||||
Version: v0.1.0
|
||||
Converted From:
|
||||
Code:
|
||||
- https://github.com/pppppM/mmsegmentation-distiller
|
||||
- https://github.com/pppppM/mmdetection-distiller
|
||||
Models:
|
||||
- Name: cwd_cls_head_pspnet_r101_d8_pspnet_r18_d8_512x1024_cityscapes_80k
|
||||
In Collection: CWD
|
||||
Metadata:
|
||||
Location: cls head
|
||||
Student: pspnet-r18-d8
|
||||
Teacher: pspnet-r101-d8
|
||||
Teacher Checkpoint: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x1024_80k_cityscapes/pspnet_r101-d8_512x1024_80k_cityscapes_20200606_112211-e1e1100f.pth
|
||||
Results:
|
||||
- Task: Semantic Segmentation
|
||||
Dataset: Cityscapes
|
||||
Metrics:
|
||||
mIoU: 75.54
|
||||
mIoU(S): 74.87
|
||||
mIoU(T): 79.76
|
||||
Config: configs/distill/cwd/cwd_cls_head_pspnet_r101_d8_pspnet_r18_d8_512x1024_cityscapes_80k.py
|
||||
Weights: https://download.openmmlab.com/mmrazor/v0.1/distill/cwd/cwd_cls_head_pspnet_r101_d8_pspnet_r18_d8_512x1024_cityscapes_80k/cwd_cls_head_pspnet_r101_d8_pspnet_r18_d8_512x1024_cityscapes_80k_mIoU-75.54_20211222-3a26ee1c.pth
|
||||
- Name: cwd_cls_head_gfl_r101_fpn_gfl_r50_fpn_1x_coco
|
||||
In Collection: CWD
|
||||
Metadata:
|
||||
Location: cls head
|
||||
Student: gfl-r50-fpn
|
||||
Teacher: gfl-r101-fpn
|
||||
Teacher Checkpoint: https://download.openmmlab.com/mmdetection/v2.0/gfl/gfl_r101_fpn_mstrain_2x_coco/gfl_r101_fpn_mstrain_2x_coco_20200629_200126-dd12f847.pth
|
||||
Results:
|
||||
- Task: Object Detection
|
||||
Dataset: COCO
|
||||
Metrics:
|
||||
box AP: 41.9
|
||||
box AP(S): 40.2
|
||||
box AP(T): 44.7
|
||||
Config: configs/distill/cwd/cwd_cls_head_gfl_r101_fpn_gfl_r50_fpn_1x_coco.py
|
||||
Weights: https://download.openmmlab.com/mmrazor/v0.1/distill/cwd/cwd_cls_head_gfl_r101_fpn_gfl_r50_fpn_1x_coco/cwd_cls_head_gfl_r101_fpn_gfl_r50_fpn_1x_coco_20211222-655dff39.pth
|
|
@ -0,0 +1,32 @@
|
|||
# KD
|
||||
|
||||
> [Distilling the Knowledge in a Neural Network](https://arxiv.org/abs/1503.02531)
|
||||
|
||||
<!-- [ALGORITHM] -->
|
||||
|
||||
## Abstract
|
||||
|
||||
A very simple way to improve the performance of almost any machine learning algorithm is to train many different models on the same data and then to average their predictions. Unfortunately, making predictions using a whole ensemble of models is cumbersome and may be too computationally expensive to allow deployment to a large number of users, especially if the individual models are large neural nets. Caruana and his collaborators have shown that it is possible to compress the knowledge in an ensemble into a single model which is much easier to deploy and we develop this approach further using a different compression technique. We achieve some surprising results on MNIST and we show that we can significantly improve the acoustic model of a heavily used commercial system by distilling the knowledge in an ensemble of models into a single model. We also introduce a new type of ensemble composed of one or more full models and many specialist models which learn to distinguish fine-grained classes that the full models confuse. Unlike a mixture of experts, these specialist models can be trained rapidly and in parallel.
|
||||
|
||||

|
||||
|
||||
## Results and models
|
||||
|
||||
### Classification
|
||||
|
||||
| Location | Dataset | Teacher | Student | Acc | Acc(T) | Acc(S) | Config | Download |
|
||||
| :------: | :------: | :----------------------------------------------------------------------------------------------------------: | :----------------------------------------------------------------------------------------------------------: | :---: | :----: | :----: | :-------------------------------------------------------: | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| logits | ImageNet | [resnet34](https://github.com/open-mmlab/mmclassification/blob/master/configs/resnet/resnet34_8xb32_in1k.py) | [resnet18](https://github.com/open-mmlab/mmclassification/blob/master/configs/resnet/resnet18_8xb32_in1k.py) | 71.54 | 73.62 | 69.90 | [config](./wsld_cls_head_resnet34_resnet18_8xb32_in1k.py) | [teacher](https://download.openmmlab.com/mmclassification/v0/resnet/resnet50_8xb32_in1k_20210831-ea4938fc.pth) \|[model](https://openmmlab-share.oss-cn-hangzhou.aliyuncs.com/mmrazor/v0.1/distill/wsld/wsld_cls_head_resnet34_resnet18_8xb32_in1k/wsld_cls_head_resnet34_resnet18_8xb32_in1k_acc-71.54_20211222-91f28cf6.pth?versionId=CAEQHxiBgMC6memK7xciIGMzMDFlYTA4YzhlYTRiMTNiZWU0YTVhY2I5NjVkMjY2) \| [log](https://openmmlab-share.oss-cn-hangzhou.aliyuncs.com/mmrazor/v0.1/distill/wsld/wsld_cls_head_resnet34_resnet18_8xb32_in1k/wsld_cls_head_resnet34_resnet18_8xb32_in1k_20211221_181516.log.json?versionId=CAEQHxiBgIDLmemK7xciIGNkM2FiN2Y4N2E5YjRhNDE4NDVlNmExNDczZDIxN2E5) |
|
||||
|
||||
## Citation
|
||||
|
||||
```latex
|
||||
@article{hinton2015distilling,
|
||||
title={Distilling the knowledge in a neural network},
|
||||
author={Hinton, Geoffrey and Vinyals, Oriol and Dean, Jeff and others},
|
||||
journal={arXiv preprint arXiv:1503.02531},
|
||||
volume={2},
|
||||
number={7},
|
||||
year={2015}
|
||||
}
|
||||
```
|
|
@ -19,20 +19,18 @@ model = dict(
|
|||
teacher=dict(
|
||||
cfg_path='mmcls::resnet/resnet34_8xb32_in1k.py', pretrained=True),
|
||||
teacher_ckpt='resnet34_8xb32_in1k_20210831-f257d4e6.pth',
|
||||
student_recorders=dict(fc=dict(type='ModuleOutputs', source='head.fc')),
|
||||
teacher_recorders=dict(fc=dict(type='ModuleOutputs', source='head.fc')),
|
||||
distill_losses=dict(
|
||||
loss_kl=dict(type='KLDivergence', tau=1, loss_weight=5)),
|
||||
loss_forward_mappings=dict(
|
||||
loss_kl=dict(
|
||||
preds_S=dict(
|
||||
from_student=True,
|
||||
recorder='fc',
|
||||
),
|
||||
preds_T=dict(
|
||||
from_student=False,
|
||||
recorder='fc',
|
||||
))))
|
||||
distiller=dict(
|
||||
type='ConfigurableDistiller',
|
||||
student_recorders=dict(
|
||||
fc=dict(type='ModuleOutputs', source='head.fc')),
|
||||
teacher_recorders=dict(
|
||||
fc=dict(type='ModuleOutputs', source='head.fc')),
|
||||
distill_losses=dict(
|
||||
loss_kl=dict(type='KLDivergence', tau=1, loss_weight=5)),
|
||||
loss_forward_mappings=dict(
|
||||
loss_kl=dict(
|
||||
preds_S=dict(from_student=True, recorder='fc'),
|
||||
preds_T=dict(from_student=False, recorder='fc')))))
|
||||
|
||||
find_unused_parameters = True
|
||||
|
|
@ -0,0 +1,36 @@
|
|||
Collections:
|
||||
- Name: KD
|
||||
Metadata:
|
||||
Training Data:
|
||||
- ImageNet-1k
|
||||
Paper:
|
||||
URL: https://arxiv.org/abs/1503.02531
|
||||
Title: Distilling the Knowledge in a Neural Network
|
||||
README: configs/distill/mmcls/kd/README.md
|
||||
Code:
|
||||
URL: https://github.com/open-mmlab/mmrazor/blob/v0.1.0/mmrazor/models/losses/weighted_soft_label_distillation.py
|
||||
Version: v0.1.0
|
||||
Models:
|
||||
- Name: kd_logits_resnet34_resnet18_8xb32_in1k
|
||||
In Collection: KD
|
||||
Metadata:
|
||||
Location: logits
|
||||
Student:
|
||||
Config: mmcls::resnet/resnet18_8xb32_in1k.py
|
||||
Weights: https://download.openmmlab.com/mmclassification/v0/resnet/resnet18_8xb32_in1k_20210831-fbbb1da6.pth
|
||||
Metrics:
|
||||
Top 1 Accuracy: 69.90
|
||||
Top 5 Accuracy: 89.43
|
||||
Teacher:
|
||||
Config: mmcls::resnet/resnet34_8xb32_in1k.py
|
||||
Weights: https://download.openmmlab.com/mmclassification/v0/resnet/resnet34_8xb32_in1k_20210831-f257d4e6.pth
|
||||
Metrics:
|
||||
Top 1 Accuracy: 73.62
|
||||
Top 5 Accuracy: 91.59
|
||||
Results:
|
||||
- Task: Image Classification
|
||||
Dataset: ImageNet-1k
|
||||
Metrics:
|
||||
Top 1 Accuracy: 71.54
|
||||
Config: configs/distill/mmcls/kd/kd_logits_resnet34_resnet18_8xb32_in1k.py
|
||||
Weights: https://download.openmmlab.com/mmrazor/v0.1/distill/wsld/wsld_cls_head_resnet34_resnet18_8xb32_in1k/wsld_cls_head_resnet34_resnet18_8xb32_in1k_acc-71.54_20211222-91f28cf6.pth
|
|
@ -6,7 +6,7 @@ Collections:
|
|||
Paper:
|
||||
URL: https://arxiv.org/abs/1904.05068
|
||||
Title: Relational Knowledge Distillation
|
||||
README: configs/distill/rkd/README.md
|
||||
README: configs/distill/mmcls/rkd/README.md
|
||||
Code:
|
||||
URL: https://github.com/open-mmlab/mmrazor/blob/v0.3.0/mmrazor/models/losses/relation_kd.py
|
||||
Version: v0.3.0
|
||||
|
@ -17,15 +17,22 @@ Models:
|
|||
In Collection: RKD
|
||||
Metadata:
|
||||
Location: neck
|
||||
Student: R-18
|
||||
Teacher: R-34
|
||||
Teacher Checkpoint: https://download.openmmlab.com/mmclassification/v0/resnet/resnet34_8xb32_in1k_20210831-f257d4e6.pth
|
||||
Student:
|
||||
Config: mmcls::resnet/resnet18_8xb32_in1k.py
|
||||
Weights: https://download.openmmlab.com/mmclassification/v0/resnet/resnet18_8xb32_in1k_20210831-fbbb1da6.pth
|
||||
Metrics:
|
||||
Top 1 Accuracy: 69.90
|
||||
Top 5 Accuracy: 89.43
|
||||
Teacher:
|
||||
Config: mmcls::resnet/resnet34_8xb32_in1k.py
|
||||
Weights: https://download.openmmlab.com/mmclassification/v0/resnet/resnet34_8xb32_in1k_20210831-f257d4e6.pth
|
||||
Metrics:
|
||||
Top 1 Accuracy: 73.62
|
||||
Top 5 Accuracy: 91.59
|
||||
Results:
|
||||
- Task: Image Classification
|
||||
Dataset: ImageNet-1k
|
||||
Metrics:
|
||||
Top 1 Accuracy: 70.23
|
||||
Top 1 Accuracy:(S): 69.90
|
||||
Top 1 Accuracy:(T): 73.62
|
||||
Config: configs/distill/rkd/rkd_neck_resnet34_resnet18_8xb32_in1k.py
|
||||
Config: configs/distill/mmcls/rkd/rkd_neck_resnet34_resnet18_8xb32_in1k.py
|
||||
Weights: https://download.openmmlab.com/mmrazor/v0.3/distill/rkd/rkd_neck_resnet34_resnet18_8xb32_in1k_acc-70.23_20220401-f25700ac.pth
|
|
@ -0,0 +1,43 @@
|
|||
_base_ = [
|
||||
'mmcls::_base_/datasets/imagenet_bs32.py',
|
||||
'mmcls::_base_/schedules/imagenet_bs256.py',
|
||||
'mmcls::_base_/default_runtime.py'
|
||||
]
|
||||
|
||||
model = dict(
|
||||
_scope_='mmrazor',
|
||||
type='SingleTeacherDistill',
|
||||
data_preprocessor=dict(
|
||||
type='ImgDataPreprocessor',
|
||||
# RGB format normalization parameters
|
||||
mean=[123.675, 116.28, 103.53],
|
||||
std=[58.395, 57.12, 57.375],
|
||||
# convert image from BGR to RGB
|
||||
bgr_to_rgb=True),
|
||||
architecture=dict(
|
||||
cfg_path='mmcls::resnet/resnet18_8xb32_in1k.py', pretrained=False),
|
||||
teacher=dict(
|
||||
cfg_path='mmcls::resnet/resnet34_8xb32_in1k.py', pretrained=True),
|
||||
teacher_ckpt='resnet34_8xb32_in1k_20210831-f257d4e6.pth',
|
||||
distiller=dict(
|
||||
type='ConfigurableDistiller',
|
||||
student_recorders=dict(
|
||||
feat=dict(type='ModuleOutputs', source='neck.gap')),
|
||||
teacher_recorders=dict(
|
||||
feat=dict(type='ModuleOutputs', source='neck.gap')),
|
||||
distill_losses=dict(
|
||||
loss_dw=dict(
|
||||
type='DistanceWiseRKD', with_l2_norm=True, loss_weight=25),
|
||||
loss_aw=dict(
|
||||
type='AngleWiseRKD', with_l2_norm=True, loss_weight=50)),
|
||||
loss_forward_mappings=dict(
|
||||
loss_dw=dict(
|
||||
preds_S=dict(from_student=True, recorder='feat'),
|
||||
preds_T=dict(from_student=False, recorder='feat')),
|
||||
loss_aw=dict(
|
||||
preds_S=dict(from_student=True, recorder='feat'),
|
||||
preds_T=dict(from_student=False, recorder='feat')))))
|
||||
|
||||
find_unused_parameters = True
|
||||
|
||||
val_cfg = dict(_delete_=True, type='mmrazor.SingleTeacherDistillValLoop')
|
|
@ -27,9 +27,9 @@ effectiveness of our method.
|
|||
|
||||
### Classification
|
||||
|
||||
| Location | Dataset | Teacher | Student | Acc | Acc(T) | Acc(S) | Config | Download |
|
||||
| :------: | :------: | :----------------------------------------------------------------------------------------------------------: | :----------------------------------------------------------------------------------------------------------: | :---: | :----: | :----: | :-------------------------------------------------------: | :----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| cls head | ImageNet | [resnet34](https://github.com/open-mmlab/mmclassification/blob/master/configs/resnet/resnet34_8xb32_in1k.py) | [resnet18](https://github.com/open-mmlab/mmclassification/blob/master/configs/resnet/resnet18_8xb32_in1k.py) | 71.54 | 73.62 | 69.90 | [config](./wsld_cls_head_resnet34_resnet18_8xb32_in1k.py) | [teacher](https://download.openmmlab.com/mmclassification/v0/resnet/resnet50_8xb32_in1k_20210831-ea4938fc.pth) \|[model](https://download.openmmlab.com/mmrazor/v0.1/distill/wsld/wsld_cls_head_resnet34_resnet18_8xb32_in1k/wsld_cls_head_resnet34_resnet18_8xb32_in1k_acc-71.54_20211222-91f28cf6.pth) \| [log](https://download.openmmlab.com/mmrazor/v0.1/distill/wsld/wsld_cls_head_resnet34_resnet18_8xb32_in1k/wsld_cls_head_resnet34_resnet18_8xb32_in1k_20211221_181516.log.json) |
|
||||
| Location | Dataset | Teacher | Student | Acc | Acc(T) | Acc(S) | Config | Download |
|
||||
| :------: | :------: | :----------------------------------------------------------------------------------------------------------: | :----------------------------------------------------------------------------------------------------------: | :---: | :----: | :----: | :-------------------------------------------------------: | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| cls head | ImageNet | [resnet34](https://github.com/open-mmlab/mmclassification/blob/master/configs/resnet/resnet34_8xb32_in1k.py) | [resnet18](https://github.com/open-mmlab/mmclassification/blob/master/configs/resnet/resnet18_8xb32_in1k.py) | 71.54 | 73.62 | 69.90 | [config](./wsld_cls_head_resnet34_resnet18_8xb32_in1k.py) | [teacher](https://download.openmmlab.com/mmclassification/v0/resnet/resnet50_8xb32_in1k_20210831-ea4938fc.pth) \|[model](https://openmmlab-share.oss-cn-hangzhou.aliyuncs.com/mmrazor/v0.1/distill/wsld/wsld_cls_head_resnet34_resnet18_8xb32_in1k/wsld_cls_head_resnet34_resnet18_8xb32_in1k_acc-71.54_20211222-91f28cf6.pth?versionId=CAEQHxiBgMC6memK7xciIGMzMDFlYTA4YzhlYTRiMTNiZWU0YTVhY2I5NjVkMjY2) \| [log](https://openmmlab-share.oss-cn-hangzhou.aliyuncs.com/mmrazor/v0.1/distill/wsld/wsld_cls_head_resnet34_resnet18_8xb32_in1k/wsld_cls_head_resnet34_resnet18_8xb32_in1k_20211221_181516.log.json?versionId=CAEQHxiBgIDLmemK7xciIGNkM2FiN2Y4N2E5YjRhNDE4NDVlNmExNDczZDIxN2E5) |
|
||||
|
||||
## Citation
|
||||
|
|
@ -6,26 +6,33 @@ Collections:
|
|||
Paper:
|
||||
URL: https://arxiv.org/abs/2102.00650
|
||||
Title: Rethinking Soft Labels for Knowledge Distillation:A Bias-Variance Tradeoff Perspective
|
||||
README: configs/distill/wsld/README.md
|
||||
README: configs/distill/mmcls/wsld/README.md
|
||||
Code:
|
||||
URL: https://github.com/open-mmlab/mmrazor/blob/v0.1.0/mmrazor/models/losses/weighted_soft_label_distillation.py
|
||||
Version: v0.1.0
|
||||
Converted From:
|
||||
Code: https://github.com/bellymonster/Weighted-Soft-Label-Distillation
|
||||
Models:
|
||||
- Name: wsld_cls_head_resnet34_resnet18_8xb32_in1k
|
||||
- Name: wsld_logits_resnet34_resnet18_8xb32_in1k
|
||||
In Collection: WSLD
|
||||
Metadata:
|
||||
Location: cls head
|
||||
Student: R-18
|
||||
Teacher: R-34
|
||||
Teacher Checkpoint: https://download.openmmlab.com/mmclassification/v0/resnet/resnet50_8xb32_in1k_20210831-ea4938fc.pth
|
||||
Location: logits
|
||||
Student:
|
||||
Config: mmcls::resnet/resnet18_8xb32_in1k.py
|
||||
Weights: https://download.openmmlab.com/mmclassification/v0/resnet/resnet18_8xb32_in1k_20210831-fbbb1da6.pth
|
||||
Metrics:
|
||||
Top 1 Accuracy: 69.90
|
||||
Top 5 Accuracy: 89.43
|
||||
Teacher:
|
||||
Config: mmcls::resnet/resnet34_8xb32_in1k.py
|
||||
Weights: https://download.openmmlab.com/mmclassification/v0/resnet/resnet34_8xb32_in1k_20210831-f257d4e6.pth
|
||||
Metrics:
|
||||
Top 1 Accuracy: 73.62
|
||||
Top 5 Accuracy: 91.59
|
||||
Results:
|
||||
- Task: Image Classification
|
||||
Dataset: ImageNet-1k
|
||||
Metrics:
|
||||
Top 1 Accuracy: 71.54
|
||||
Top 1 Accuracy:(S): 69.90
|
||||
Top 1 Accuracy:(T): 73.62
|
||||
Config: configs/distill/wsld/wsld_cls_head_resnet34_resnet18_8xb32_in1k.py
|
||||
Config: configs/distill/mmcls/wsld/wsld_logits_resnet34_resnet18_8xb32_in1k.py
|
||||
Weights: https://download.openmmlab.com/mmrazor/v0.1/distill/wsld/wsld_cls_head_resnet34_resnet18_8xb32_in1k/wsld_cls_head_resnet34_resnet18_8xb32_in1k_acc-71.54_20211222-91f28cf6.pth
|
|
@ -19,17 +19,20 @@ model = dict(
|
|||
teacher=dict(
|
||||
cfg_path='mmcls::resnet/resnet34_8xb32_in1k.py', pretrained=True),
|
||||
teacher_ckpt='resnet34_8xb32_in1k_20210831-f257d4e6.pth',
|
||||
student_recorders=dict(
|
||||
fc=dict(type='ModuleOutputs', source='head.fc'),
|
||||
data_samples=dict(type='ModuleInputs', source='')),
|
||||
teacher_recorders=dict(fc=dict(type='ModuleOutputs', source='head.fc')),
|
||||
distill_losses=dict(loss_wsld=dict(type='WSLD', tau=2, loss_weight=2.5)),
|
||||
loss_forward_mappings=dict(
|
||||
loss_wsld=dict(
|
||||
student=dict(recorder='fc', from_student=True),
|
||||
teacher=dict(recorder='fc', from_student=False),
|
||||
data_samples=dict(
|
||||
recorder='data_samples', from_student=True, data_idx=1))))
|
||||
distiller=dict(
|
||||
student_recorders=dict(
|
||||
fc=dict(type='ModuleOutputs', source='head.fc'),
|
||||
data_samples=dict(type='ModuleInputs', source='')),
|
||||
teacher_recorders=dict(
|
||||
fc=dict(type='ModuleOutputs', source='head.fc')),
|
||||
distill_losses=dict(
|
||||
loss_wsld=dict(type='WSLD', tau=2, loss_weight=2.5)),
|
||||
loss_forward_mappings=dict(
|
||||
loss_wsld=dict(
|
||||
student=dict(recorder='fc', from_student=True),
|
||||
teacher=dict(recorder='fc', from_student=False),
|
||||
data_samples=dict(
|
||||
recorder='data_samples', from_student=True, data_idx=1)))))
|
||||
|
||||
find_unused_parameters = True
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
_base_ = ['./cwd_fpn_gfl_r101_gfl_r50_1x_coco.py']
|
||||
_base_ = ['./cwd_fpn_retina_r101_retina_r50_1x_coco.py']
|
||||
|
||||
model = dict(
|
||||
architecture=dict(
|
|
@ -16,42 +16,38 @@ model = dict(
|
|||
cfg_path='mmdet::faster_rcnn/faster_rcnn_r101_fpn_2x_coco.py',
|
||||
pretrained=False),
|
||||
teacher_ckpt=teacher_ckpt,
|
||||
distill_losses=dict(
|
||||
loss_cwd_fpn0=dict(
|
||||
type='ChannelWiseDivergence', tau=1, loss_weight=10),
|
||||
loss_cwd_fpn1=dict(
|
||||
type='ChannelWiseDivergence', tau=1, loss_weight=10),
|
||||
loss_cwd_fpn2=dict(
|
||||
type='ChannelWiseDivergence', tau=1, loss_weight=10),
|
||||
loss_cwd_fpn3=dict(
|
||||
type='ChannelWiseDivergence', tau=1, loss_weight=10),
|
||||
loss_cwd_fpn4=dict(
|
||||
type='ChannelWiseDivergence', tau=1, loss_weight=10)),
|
||||
student_recorders=dict(fpn=dict(type='ModuleOutputs', source='neck')),
|
||||
teacher_recorders=dict(fpn=dict(type='ModuleOutputs', source='neck')),
|
||||
loss_forward_mappings=dict(
|
||||
loss_cwd_fpn0=dict(
|
||||
preds_S=dict(from_student=True, recorder='fpn', data_idx=0),
|
||||
preds_T=dict(from_student=False, recorder='fpn', data_idx=0),
|
||||
),
|
||||
loss_cwd_fpn1=dict(
|
||||
preds_S=dict(from_student=True, recorder='fpn', data_idx=1),
|
||||
preds_T=dict(from_student=False, recorder='fpn', data_idx=1),
|
||||
),
|
||||
loss_cwd_fpn2=dict(
|
||||
preds_S=dict(from_student=True, recorder='fpn', data_idx=2),
|
||||
preds_T=dict(from_student=False, recorder='fpn', data_idx=2),
|
||||
),
|
||||
loss_cwd_fpn3=dict(
|
||||
preds_S=dict(from_student=True, recorder='fpn', data_idx=3),
|
||||
preds_T=dict(from_student=False, recorder='fpn', data_idx=3),
|
||||
),
|
||||
loss_cwd_fpn4=dict(
|
||||
preds_S=dict(from_student=True, recorder='fpn', data_idx=4),
|
||||
preds_T=dict(from_student=False, recorder='fpn', data_idx=4),
|
||||
),
|
||||
),
|
||||
)
|
||||
distiller=dict(
|
||||
type='ConfigurableDistiller',
|
||||
student_recorders=dict(fpn=dict(type='ModuleOutputs', source='neck')),
|
||||
teacher_recorders=dict(fpn=dict(type='ModuleOutputs', source='neck')),
|
||||
distill_losses=dict(
|
||||
loss_cwd_fpn0=dict(
|
||||
type='ChannelWiseDivergence', tau=1, loss_weight=10),
|
||||
loss_cwd_fpn1=dict(
|
||||
type='ChannelWiseDivergence', tau=1, loss_weight=10),
|
||||
loss_cwd_fpn2=dict(
|
||||
type='ChannelWiseDivergence', tau=1, loss_weight=10),
|
||||
loss_cwd_fpn3=dict(
|
||||
type='ChannelWiseDivergence', tau=1, loss_weight=10),
|
||||
loss_cwd_fpn4=dict(
|
||||
type='ChannelWiseDivergence', tau=1, loss_weight=10)),
|
||||
loss_forward_mappings=dict(
|
||||
loss_cwd_fpn0=dict(
|
||||
preds_S=dict(from_student=True, recorder='fpn', data_idx=0),
|
||||
preds_T=dict(from_student=False, recorder='fpn', data_idx=0)),
|
||||
loss_cwd_fpn1=dict(
|
||||
preds_S=dict(from_student=True, recorder='fpn', data_idx=1),
|
||||
preds_T=dict(from_student=False, recorder='fpn', data_idx=1)),
|
||||
loss_cwd_fpn2=dict(
|
||||
preds_S=dict(from_student=True, recorder='fpn', data_idx=2),
|
||||
preds_T=dict(from_student=False, recorder='fpn', data_idx=2)),
|
||||
loss_cwd_fpn3=dict(
|
||||
preds_S=dict(from_student=True, recorder='fpn', data_idx=3),
|
||||
preds_T=dict(from_student=False, recorder='fpn', data_idx=3)),
|
||||
loss_cwd_fpn4=dict(
|
||||
preds_S=dict(from_student=True, recorder='fpn', data_idx=4),
|
||||
preds_T=dict(from_student=False, recorder='fpn',
|
||||
data_idx=4)))))
|
||||
|
||||
find_unused_parameters = True
|
||||
|
||||
|
|
|
@ -7,3 +7,7 @@ model = dict(
|
|||
teacher=dict(
|
||||
cfg_path='mmdet::retinanet/retinanet_r101_fpn_2x_coco.py',
|
||||
pretrained=True))
|
||||
|
||||
# optimizer
|
||||
optim_wrapper = dict(
|
||||
optimizer=dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001))
|
||||
|
|
|
@ -0,0 +1,23 @@
|
|||
|
||||
Models:
|
||||
- Name: cwd_cls_head_gfl_r101_fpn_gfl_r50_fpn_1x_coco
|
||||
In Collection: CWD
|
||||
Metadata:
|
||||
Location: cls head
|
||||
Student:
|
||||
Metrics:
|
||||
box AP: 40.2
|
||||
Config: mmdet::gfl/gfl_r50_fpn_1x_coco.py
|
||||
Weights: https://download.openmmlab.com/mmdetection/v2.0/gfl/gfl_r50_fpn_1x_coco/gfl_r50_fpn_1x_coco_20200629_121244-25944287.pth
|
||||
Teacher:
|
||||
Metrics:
|
||||
box AP: 44.7
|
||||
Config: mmdet::gfl/gfl_r50_fpn_mstrain_2x_coco.py
|
||||
Weights: https://download.openmmlab.com/mmdetection/v2.0/gfl/gfl_r101_fpn_mstrain_2x_coco/gfl_r101_fpn_mstrain_2x_coco_20200629_200126-dd12f847.pth
|
||||
Results:
|
||||
- Task: Object Detection
|
||||
Dataset: COCO
|
||||
Metrics:
|
||||
box AP: 41.9
|
||||
Config: configs/distill/mmdet/cwd/cwd_cls_head_gfl_r101_fpn_gfl_r50_fpn_1x_coco.py
|
||||
Weights: https://download.openmmlab.com/mmrazor/v0.1/distill/cwd/cwd_cls_head_gfl_r101_fpn_gfl_r50_fpn_1x_coco/cwd_cls_head_gfl_r101_fpn_gfl_r50_fpn_1x_coco_20211222-655dff39.pth
|
|
@ -0,0 +1,37 @@
|
|||
# CWD
|
||||
|
||||
> [Channel-wise Knowledge Distillation for Dense Prediction](https://arxiv.org/abs/2011.13256)
|
||||
|
||||
<!-- [ALGORITHM] -->
|
||||
|
||||
## Abstract
|
||||
|
||||
Knowledge distillation (KD) has been proven to be a simple and effective tool for training compact models. Almost all KD variants for dense prediction tasks align the student and teacher networks' feature maps in the spatial domain, typically by minimizing point-wise and/or pair-wise discrepancy. Observing that in semantic segmentation, some layers' feature activations of each channel tend to encode saliency of scene categories (analogue to class activation mapping), we propose to align features channel-wise between the student and teacher networks. To this end, we first transform the feature map of each channel into a probability map using softmax normalization, and then minimize the Kullback-Leibler (KL) divergence of the corresponding channels of the two networks. By doing so, our method focuses on mimicking the soft distributions of channels between networks. In particular, the KL divergence enables learning to pay more attention to the most salient regions of the channel-wise maps, presumably corresponding to the most useful signals for semantic segmentation. Experiments demonstrate that our channel-wise distillation outperforms almost all existing spatial distillation methods for semantic segmentation considerably, and requires less computational cost during training. We consistently achieve superior performance on three benchmarks with various network structures.
|
||||
|
||||

|
||||
|
||||
## Results and models
|
||||
|
||||
### Segmentation
|
||||
|
||||
| Location | Dataset | Teacher | Student | mIoU | mIoU(T) | mIou(S) | Config | Download |
|
||||
| :------: | :--------: | :------------------------------------------------------------------------------------------------------------------------------: | :----------------------------------------------------------------------------------------------------------------------------: | :---: | :-----: | :-----: | :----------: | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
|
||||
| logits | cityscapes | [pspnet_r101](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/pspnet/pspnet_r101-d8_512x1024_80k_cityscapes.py) | [pspnet_r18](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/pspnet/pspnet_r18-d8_512x1024_80k_cityscapes.py) | 75.54 | 79.76 | 74.87 | [config](<>) | [teacher](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x1024_80k_cityscapes/pspnet_r101-d8_512x1024_80k_cityscapes_20200606_112211-e1e1100f.pth) \|[model](https://download.openmmlab.com/mmrazor/v0.1/distill/cwd/cwd_cls_head_pspnet_r101_d8_pspnet_r18_d8_512x1024_cityscapes_80k/cwd_cls_head_pspnet_r101_d8_pspnet_r18_d8_512x1024_cityscapes_80k_mIoU-75.54_20211222-3a26ee1c.pth) \| [log](https://download.openmmlab.com/mmrazor/v0.1/distill/cwd/cwd_cls_head_pspnet_r101_d8_pspnet_r18_d8_512x1024_cityscapes_80k/cwd_cls_head_pspnet_r101_d8_pspnet_r18_d8_512x1024_cityscapes_80k_20211212_205711.log.json?) |
|
||||
|
||||
### Detection
|
||||
|
||||
| Location | Dataset | Teacher | Student | mAP | mAP(T) | mAP(S) | Config | Download |
|
||||
| :------: | :-----: | :--------------------------------------------------------------------------------------------------------------: | :----------------------------------------------------------------------------------------------------: | :--: | :----: | :----: | :----------: | :-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| cls head | COCO | [gfl_r101_2x](https://github.com/open-mmlab/mmdetection/tree/master/configs/gfl/gfl_r101_fpn_mstrain_2x_coco.py) | [gfl_r50_1x](https://github.com/open-mmlab/mmdetection/tree/master/configs/gfl/gfl_r50_fpn_1x_coco.py) | 41.9 | 44.7 | 40.2 | [config](<>) | [teacher](https://download.openmmlab.com/mmdetection/v2.0/gfl/gfl_r101_fpn_mstrain_2x_coco/gfl_r101_fpn_mstrain_2x_coco_20200629_200126-dd12f847.pth) \|[model](https://download.openmmlab.com/mmrazor/v0.1/distill/cwd/cwd_cls_head_gfl_r101_fpn_gfl_r50_fpn_1x_coco/cwd_cls_head_gfl_r101_fpn_gfl_r50_fpn_1x_coco_20211222-655dff39.pth) \| [log](https://download.openmmlab.com/mmrazor/v0.1/distill/cwd/cwd_cls_head_gfl_r101_fpn_gfl_r50_fpn_1x_coco/cwd_cls_head_gfl_r101_fpn_gfl_r50_fpn_1x_coco_20211212_205444.log.json) |
|
||||
|
||||
## Citation
|
||||
|
||||
```latex
|
||||
@inproceedings{shu2021channel,
|
||||
title={Channel-Wise Knowledge Distillation for Dense Prediction},
|
||||
author={Shu, Changyong and Liu, Yifan and Gao, Jianfei and Yan, Zheng and Shen, Chunhua},
|
||||
booktitle={Proceedings of the IEEE/CVF International Conference on Computer Vision},
|
||||
pages={5311--5320},
|
||||
year={2021}
|
||||
}
|
||||
```
|
|
@ -0,0 +1,33 @@
|
|||
_base_ = [
|
||||
'mmseg::_base_/datasets/cityscapes.py',
|
||||
'mmseg::_base_/schedules/schedule_80k.py',
|
||||
'mmseg::_base_/default_runtime.py'
|
||||
]
|
||||
|
||||
teacher_ckpt = 'https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x1024_80k_cityscapes/pspnet_r101-d8_512x1024_80k_cityscapes_20200606_112211-e1e1100f.pth' # noqa: E501
|
||||
model = dict(
|
||||
_scope_='mmrazor',
|
||||
type='SingleTeacherDistill',
|
||||
architecture=dict(
|
||||
cfg_path='mmseg::pspnet/pspnet_r18-d8_512x1024_80k_cityscapes.py',
|
||||
pretrained=False),
|
||||
teacher=dict(
|
||||
cfg_path='mmseg::pspnet/pspnet_r101-d8_512x1024_80k_cityscapes.py',
|
||||
pretrained=False),
|
||||
teacher_ckpt=teacher_ckpt,
|
||||
distiller=dict(
|
||||
type='ConfigurableDistiller',
|
||||
distill_losses=dict(
|
||||
loss_cwd=dict(type='ChannelWiseDivergence', tau=1, loss_weight=5)),
|
||||
student_recorders=dict(
|
||||
logits=dict(type='ModuleOutputs', source='decode_head.conv_seg')),
|
||||
teacher_recorders=dict(
|
||||
logits=dict(type='ModuleOutputs', source='decode_head.conv_seg')),
|
||||
loss_forward_mappings=dict(
|
||||
loss_cwd=dict(
|
||||
preds_S=dict(from_student=True, recorder='logits'),
|
||||
preds_T=dict(from_student=False, recorder='logits')))))
|
||||
|
||||
find_unused_parameters = True
|
||||
|
||||
val_cfg = dict(_delete_=True, type='mmrazor.SingleTeacherDistillValLoop')
|
|
@ -0,0 +1,41 @@
|
|||
Collections:
|
||||
- Name: CWD
|
||||
Metadata:
|
||||
Training Data:
|
||||
- Cityscapes
|
||||
- COCO
|
||||
Paper:
|
||||
URL: https://arxiv.org/abs/2011.13256
|
||||
Title: Channel-wise Knowledge Distillation for Dense Prediction
|
||||
README: configs/distill/mmseg/cwd/README.md
|
||||
Code:
|
||||
URL: https://github.com/open-mmlab/mmrazor/blob/v0.1.0/mmrazor/models/losses/cwd.py#L10
|
||||
Version: v0.1.0
|
||||
Converted From:
|
||||
Code:
|
||||
- https://github.com/pppppM/mmsegmentation-distiller
|
||||
- https://github.com/pppppM/mmdetection-distiller
|
||||
Models:
|
||||
- Name: cwd_logits_pspnet_r101_d8_pspnet_r18_d8_512x1024_cityscapes_80k
|
||||
In Collection: CWD
|
||||
Metadata:
|
||||
Location: logits
|
||||
Student:
|
||||
Metrics:
|
||||
mIoU: 74.87
|
||||
mIoU(ms+flip): 76.04
|
||||
Config: mmseg::pspnet/pspnet_r18-d8_512x1024_80k_cityscapes.py
|
||||
Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r18-d8_512x1024_80k_cityscapes/pspnet_r18-d8_512x1024_80k_cityscapes_20201225_021458-09ffa746.pth
|
||||
Teacher:
|
||||
Metrics:
|
||||
mIoU: 79.76
|
||||
mIoU(ms+flip): 81.01
|
||||
Config: mmseg::pspnet/pspnet_r101-d8_512x1024_80k_cityscapes.py
|
||||
Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x1024_80k_cityscapes/pspnet_r101-d8_512x1024_80k_cityscapes_20200606_112211-e1e1100f.pth
|
||||
Results:
|
||||
- Task: Semantic Segmentation
|
||||
Dataset: Cityscapes
|
||||
Metrics:
|
||||
mIoU: 75.54
|
||||
Config: configs/distill/mmseg/cwd/cwd_logits_pspnet_r101_d8_pspnet_r18_d8_512x1024_cityscapes_80k.py
|
||||
Weights: https://download.openmmlab.com/mmrazor/v0.1/distill/cwd/cwd_cls_head_pspnet_r101_d8_pspnet_r18_d8_512x1024_cityscapes_80k/cwd_cls_head_pspnet_r101_d8_pspnet_r18_d8_512x1024_cityscapes_80k_mIoU-75.54_20211222-3a26ee1c.pth
|
|
@ -1,79 +0,0 @@
|
|||
_base_ = [
|
||||
'../../_base_/datasets/mmcls/imagenet_bs32.py',
|
||||
'../../_base_/schedules/mmcls/imagenet_bs256.py',
|
||||
'../../_base_/mmcls_runtime.py'
|
||||
]
|
||||
|
||||
# model settings
|
||||
student = dict(
|
||||
type='mmcls.ImageClassifier',
|
||||
backbone=dict(
|
||||
type='ResNet',
|
||||
depth=18,
|
||||
num_stages=4,
|
||||
out_indices=(3, ),
|
||||
style='pytorch'),
|
||||
neck=dict(type='GlobalAveragePooling'),
|
||||
head=dict(
|
||||
type='LinearClsHead',
|
||||
num_classes=1000,
|
||||
in_channels=512,
|
||||
loss=dict(type='CrossEntropyLoss', loss_weight=1.0),
|
||||
topk=(1, 5),
|
||||
))
|
||||
|
||||
# teacher settings
|
||||
teacher_ckpt = 'https://download.openmmlab.com/mmclassification/v0/resnet/resnet34_8xb32_in1k_20210831-f257d4e6.pth' # noqa: E501
|
||||
|
||||
teacher = dict(
|
||||
type='mmcls.ImageClassifier',
|
||||
init_cfg=dict(type='Pretrained', checkpoint=teacher_ckpt),
|
||||
backbone=dict(
|
||||
type='ResNet',
|
||||
depth=34,
|
||||
num_stages=4,
|
||||
out_indices=(3, ),
|
||||
style='pytorch'),
|
||||
neck=dict(type='GlobalAveragePooling'),
|
||||
head=dict(
|
||||
type='LinearClsHead',
|
||||
num_classes=1000,
|
||||
in_channels=512,
|
||||
loss=dict(type='CrossEntropyLoss', loss_weight=1.0),
|
||||
topk=(1, 5),
|
||||
))
|
||||
|
||||
# algorithm setting
|
||||
algorithm = dict(
|
||||
type='GeneralDistill',
|
||||
architecture=dict(
|
||||
type='MMClsArchitecture',
|
||||
model=student,
|
||||
),
|
||||
with_student_loss=True,
|
||||
with_teacher_loss=False,
|
||||
distiller=dict(
|
||||
type='SingleTeacherDistiller',
|
||||
teacher=teacher,
|
||||
teacher_trainable=False,
|
||||
teacher_norm_eval=True,
|
||||
components=[
|
||||
dict(
|
||||
student_module='neck.gap',
|
||||
teacher_module='neck.gap',
|
||||
losses=[
|
||||
dict(
|
||||
type='DistanceWiseRKD',
|
||||
name='distance_wise_loss',
|
||||
loss_weight=25.0,
|
||||
with_l2_norm=True),
|
||||
dict(
|
||||
type='AngleWiseRKD',
|
||||
name='angle_wise_loss',
|
||||
loss_weight=50.0,
|
||||
with_l2_norm=True),
|
||||
])
|
||||
]),
|
||||
)
|
||||
|
||||
find_unused_parameters = True
|
|
@ -1,78 +0,0 @@
|
|||
_base_ = [
|
||||
'../../_base_/datasets/mmcls/imagenet_bs32.py',
|
||||
'../../_base_/schedules/mmcls/imagenet_bs256.py',
|
||||
'../../_base_/mmcls_runtime.py'
|
||||
]
|
||||
|
||||
# model settings
|
||||
student = dict(
|
||||
type='mmcls.ImageClassifier',
|
||||
backbone=dict(
|
||||
type='ResNet',
|
||||
depth=18,
|
||||
num_stages=4,
|
||||
out_indices=(3, ),
|
||||
style='pytorch'),
|
||||
neck=dict(type='GlobalAveragePooling'),
|
||||
head=dict(
|
||||
type='LinearClsHead',
|
||||
num_classes=1000,
|
||||
in_channels=512,
|
||||
loss=dict(type='CrossEntropyLoss', loss_weight=1.0),
|
||||
topk=(1, 5),
|
||||
))
|
||||
|
||||
checkpoint = 'https://download.openmmlab.com/mmclassification/v0/resnet/resnet50_8xb32_in1k_20210831-ea4938fc.pth' # noqa: E501
|
||||
|
||||
# teacher settings
|
||||
teacher = dict(
|
||||
type='mmcls.ImageClassifier',
|
||||
init_cfg=dict(type='Pretrained', checkpoint=checkpoint),
|
||||
backbone=dict(
|
||||
type='ResNet',
|
||||
depth=34,
|
||||
num_stages=4,
|
||||
out_indices=(3, ),
|
||||
style='pytorch'),
|
||||
neck=dict(type='GlobalAveragePooling'),
|
||||
head=dict(
|
||||
type='LinearClsHead',
|
||||
num_classes=1000,
|
||||
in_channels=512,
|
||||
loss=dict(type='CrossEntropyLoss', loss_weight=1.0),
|
||||
topk=(1, 5),
|
||||
))
|
||||
|
||||
# algorithm setting
|
||||
algorithm = dict(
|
||||
type='GeneralDistill',
|
||||
architecture=dict(
|
||||
type='MMClsArchitecture',
|
||||
model=student,
|
||||
),
|
||||
with_student_loss=True,
|
||||
# teacher_trainable and with_teacher_loss have a dependency
|
||||
# relationship, if teacher_trainable is false, then
|
||||
# with_teacher_loss must be false.
|
||||
with_teacher_loss=False,
|
||||
distiller=dict(
|
||||
type='SingleTeacherDistiller',
|
||||
teacher=teacher,
|
||||
teacher_trainable=False,
|
||||
teacher_norm_eval=True,
|
||||
components=[
|
||||
dict(
|
||||
student_module='head.fc',
|
||||
teacher_module='head.fc',
|
||||
losses=[
|
||||
dict(
|
||||
type='WSLD',
|
||||
name='loss_wsld',
|
||||
tau=2,
|
||||
loss_weight=2.5,
|
||||
num_classes=1000)
|
||||
])
|
||||
]),
|
||||
)
|
||||
|
||||
find_unused_parameters = True
|
|
@ -1,11 +0,0 @@
|
|||
modules:
|
||||
backbone.layer1.0: depthsepconv
|
||||
backbone.layer2.0: mb_k3e4_se
|
||||
backbone.layer3.0: mb_k5e6_se
|
||||
backbone.layer3.1: mb_k5e6_se
|
||||
backbone.layer4.0: mb_k5e6_se
|
||||
backbone.layer4.1: mb_k5e6_se
|
||||
backbone.layer5.0: mb_k3e6_se
|
||||
backbone.layer6.0: mb_k5e6_se
|
||||
backbone.layer7.0: convbnact
|
||||
channels:
|
|
@ -1,8 +0,0 @@
|
|||
_base_ = ['./cream_14_supernet_mobilenet.py']
|
||||
|
||||
# FIXME: you may replace this with the mutable_cfg searched by yourself
|
||||
fix_subnet = 'configs/nas/cream/CREAM_14_MOBILENET_IN1k_2.0.yaml' # noqa: E501
|
||||
|
||||
model = dict(fix_subnet=fix_subnet)
|
||||
|
||||
find_unused_parameters = False
|
|
@ -1,241 +0,0 @@
|
|||
# dataset settings
|
||||
dataset_type = 'ImageNet'
|
||||
|
||||
preprocess_cfg = dict(
|
||||
# RGB format normalization parameters
|
||||
mean=[123.675, 116.28, 103.53],
|
||||
std=[58.395, 57.12, 57.375],
|
||||
# convert image from BGR to RGB
|
||||
to_rgb=True,
|
||||
)
|
||||
|
||||
# file_client_args = dict(
|
||||
# backend='petrel',
|
||||
# path_mapping=dict({
|
||||
# './data/imagenet': 's3://openmmlab/datasets/classification/imagenet',
|
||||
# 'data/imagenet': 's3://openmmlab/datasets/classification/imagenet'
|
||||
# }))
|
||||
|
||||
train_pipeline = [
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(type='RandomResizedCrop', scale=224),
|
||||
dict(type='ColorJitter', brightness=0.4, contrast=0.4, saturation=0.4),
|
||||
dict(type='RandomFlip', prob=0.5, direction='horizontal'),
|
||||
dict(type='PackClsInputs'),
|
||||
]
|
||||
|
||||
test_pipeline = [
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(
|
||||
type='ResizeEdge',
|
||||
scale=73,
|
||||
edge='short',
|
||||
backend='pillow',
|
||||
interpolation='bicubic'),
|
||||
dict(type='CenterCrop', crop_size=64),
|
||||
dict(type='PackClsInputs'),
|
||||
]
|
||||
|
||||
train_dataloader = dict(
|
||||
batch_size=128,
|
||||
num_workers=5,
|
||||
dataset=dict(
|
||||
type=dataset_type,
|
||||
data_root='/mnt/cache/share/images',
|
||||
ann_file='meta/train.txt',
|
||||
data_prefix='train',
|
||||
pipeline=train_pipeline),
|
||||
sampler=dict(type='DefaultSampler', shuffle=True),
|
||||
persistent_workers=True,
|
||||
)
|
||||
|
||||
# /mnt/lustre/share_data/wangjiaqi/data/imagenet',
|
||||
|
||||
val_dataloader = dict(
|
||||
batch_size=128,
|
||||
num_workers=5,
|
||||
dataset=dict(
|
||||
type=dataset_type,
|
||||
data_root='/mnt/cache/share/images',
|
||||
ann_file='meta/val.txt',
|
||||
data_prefix='val',
|
||||
pipeline=test_pipeline),
|
||||
sampler=dict(type='DefaultSampler', shuffle=False),
|
||||
persistent_workers=True,
|
||||
)
|
||||
val_evaluator = dict(type='Accuracy', topk=(1, 5))
|
||||
|
||||
# If you want standard test, please manually configure the test dataset
|
||||
test_dataloader = val_dataloader
|
||||
test_evaluator = val_evaluator
|
||||
|
||||
# scheduler
|
||||
|
||||
# optimizer
|
||||
optim_wrapper = dict(
|
||||
optimizer=dict(type='SGD', lr=0.5, momentum=0.9, weight_decay=4e-5),
|
||||
clip_grad=None)
|
||||
|
||||
# leanring policy
|
||||
param_scheduler = [
|
||||
dict(type='PolyLR', power=1.0, eta_min=0.0, by_epoch=False),
|
||||
]
|
||||
|
||||
# train, val, test setting
|
||||
train_cfg = dict(by_epoch=False, max_iters=300000)
|
||||
val_cfg = dict()
|
||||
test_cfg = dict()
|
||||
|
||||
# runtime
|
||||
|
||||
# defaults to use registries in mmrazor
|
||||
default_scope = 'mmcls'
|
||||
|
||||
# configure default hooks
|
||||
default_hooks = dict(
|
||||
timer=dict(type='IterTimerHook'),
|
||||
logger=dict(type='LoggerHook', interval=100),
|
||||
param_scheduler=dict(type='ParamSchedulerHook'),
|
||||
checkpoint=dict(type='CheckpointHook', by_epoch=False, interval=1000),
|
||||
sampler_seed=dict(type='DistSamplerSeedHook'),
|
||||
visualization=dict(type='VisualizationHook', enable=False),
|
||||
)
|
||||
|
||||
# configure environment
|
||||
env_cfg = dict(
|
||||
cudnn_benchmark=False,
|
||||
mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0),
|
||||
dist_cfg=dict(backend='nccl'),
|
||||
)
|
||||
|
||||
# set visualizer
|
||||
vis_backends = [dict(type='LocalVisBackend')]
|
||||
visualizer = dict(
|
||||
type='ClsVisualizer', vis_backends=vis_backends, name='visualizer')
|
||||
|
||||
# set log level
|
||||
log_level = 'INFO'
|
||||
|
||||
# load from which checkpoint
|
||||
load_from = None
|
||||
|
||||
# whether to resume training from the loaded checkpoint
|
||||
resume = False
|
||||
|
||||
se_cfg = dict(
|
||||
ratio=4,
|
||||
divisor=8,
|
||||
act_cfg=(dict(type='ReLU'),
|
||||
dict(
|
||||
type='HSigmoid', bias=3, divisor=6, min_value=0,
|
||||
max_value=1)))
|
||||
|
||||
_FIRST_STAGE_MUTABLE = dict( # DepthwiseSep
|
||||
type='OneShotMutableOP',
|
||||
candidates=dict(
|
||||
depthsepconv=dict(
|
||||
type='DepthwiseSeparableConv',
|
||||
dw_kernel_size=3,
|
||||
se_cfg=se_cfg,
|
||||
norm_cfg=dict(type='BN'),
|
||||
act_cfg=dict(type='Swish'))))
|
||||
|
||||
_MIDDLE_STAGE_MUTABLE = dict(
|
||||
type='OneShotMutableOP',
|
||||
candidates=dict(
|
||||
mb_k3e4_se=dict(
|
||||
type='MBBlock',
|
||||
kernel_size=3,
|
||||
expand_ratio=4,
|
||||
se_cfg=se_cfg,
|
||||
norm_cfg=dict(type='BN'),
|
||||
act_cfg=dict(type='Swish')),
|
||||
mb_k3e6_se=dict(
|
||||
type='MBBlock',
|
||||
kernel_size=3,
|
||||
expand_ratio=6,
|
||||
se_cfg=se_cfg,
|
||||
norm_cfg=dict(type='BN'),
|
||||
act_cfg=dict(type='Swish')),
|
||||
mb_k5e4_se=dict(
|
||||
type='MBBlock',
|
||||
kernel_size=5,
|
||||
expand_ratio=4,
|
||||
se_cfg=se_cfg,
|
||||
norm_cfg=dict(type='BN'),
|
||||
act_cfg=dict(type='Swish')),
|
||||
mb_k5e6_se=dict(
|
||||
type='MBBlock',
|
||||
kernel_size=5,
|
||||
expand_ratio=6,
|
||||
se_cfg=se_cfg,
|
||||
norm_cfg=dict(type='BN'),
|
||||
act_cfg=dict(type='Swish')),
|
||||
mb_k7e4_se=dict(
|
||||
type='MBBlock',
|
||||
kernel_size=7,
|
||||
expand_ratio=4,
|
||||
se_cfg=se_cfg,
|
||||
norm_cfg=dict(type='BN'),
|
||||
act_cfg=dict(type='Swish')),
|
||||
mb_k7e6_se=dict(
|
||||
type='MBBlock',
|
||||
kernel_size=7,
|
||||
expand_ratio=6,
|
||||
se_cfg=se_cfg,
|
||||
norm_cfg=dict(type='BN'),
|
||||
act_cfg=dict(type='Swish'))))
|
||||
|
||||
arch_setting = [
|
||||
# Parameters to build layers. 4 parameters are needed to construct a
|
||||
# layer, from left to right: channel, num_blocks, stride, mutable cfg.
|
||||
[16, 1, 1, _FIRST_STAGE_MUTABLE],
|
||||
[24, 1, 2, _MIDDLE_STAGE_MUTABLE],
|
||||
[40, 2, 2, _MIDDLE_STAGE_MUTABLE],
|
||||
[80, 2, 2, _MIDDLE_STAGE_MUTABLE],
|
||||
[96, 1, 1, _MIDDLE_STAGE_MUTABLE],
|
||||
[192, 1, 2, _MIDDLE_STAGE_MUTABLE],
|
||||
]
|
||||
|
||||
norm_cfg = dict(type='BN')
|
||||
supernet = dict(
|
||||
_scope_='mmcls',
|
||||
type='ImageClassifier',
|
||||
data_preprocessor=preprocess_cfg,
|
||||
backbone=dict(
|
||||
_scope_='mmrazor',
|
||||
type='SearchableMobileNet',
|
||||
arch_setting=arch_setting,
|
||||
first_channels=16,
|
||||
last_channels=320,
|
||||
widen_factor=1.0,
|
||||
norm_cfg=norm_cfg,
|
||||
act_cfg=dict(type='Swish'),
|
||||
out_indices=(6, ),
|
||||
),
|
||||
neck=dict(type='GlobalAveragePooling'),
|
||||
head=dict(
|
||||
type='mmrazor.CreamClsHead',
|
||||
num_classes=1000,
|
||||
in_channels=320,
|
||||
num_features=1280,
|
||||
act_cfg=dict(type='Swish'),
|
||||
loss=dict(
|
||||
type='LabelSmoothLoss',
|
||||
num_classes=1000,
|
||||
label_smooth_val=0.1,
|
||||
mode='original',
|
||||
loss_weight=1.0),
|
||||
topk=(1, 5),
|
||||
),
|
||||
)
|
||||
|
||||
mutator = dict(type='mmrazor.OneShotModuleMutator')
|
||||
|
||||
model = dict(
|
||||
type='mmrazor.SPOS',
|
||||
architecture=supernet,
|
||||
mutator=mutator,
|
||||
)
|
||||
|
||||
find_unused_parameters = True
|
|
@ -1,22 +0,0 @@
|
|||
modules:
|
||||
backbone.layers.0.0: shuffle_5x5
|
||||
backbone.layers.0.1: shuffle_3x3
|
||||
backbone.layers.0.2: shuffle_3x3
|
||||
backbone.layers.0.3: shuffle_3x3
|
||||
backbone.layers.1.0: shuffle_xception
|
||||
backbone.layers.1.1: shuffle_3x3
|
||||
backbone.layers.1.2: shuffle_xception
|
||||
backbone.layers.1.3: shuffle_7x7
|
||||
backbone.layers.2.0: shuffle_7x7
|
||||
backbone.layers.2.1: shuffle_7x7
|
||||
backbone.layers.2.2: shuffle_xception
|
||||
backbone.layers.2.3: shuffle_xception
|
||||
backbone.layers.2.4: shuffle_3x3
|
||||
backbone.layers.2.5: shuffle_7x7
|
||||
backbone.layers.2.6: shuffle_5x5
|
||||
backbone.layers.2.7: shuffle_xception
|
||||
backbone.layers.3.0: shuffle_7x7
|
||||
backbone.layers.3.1: shuffle_7x7
|
||||
backbone.layers.3.2: shuffle_7x7
|
||||
backbone.layers.3.3: shuffle_5x5
|
||||
channels:
|
|
@ -1,22 +0,0 @@
|
|||
modules:
|
||||
backbone.layers.0.0: shuffle_5x5
|
||||
backbone.layers.0.1: shuffle_3x3
|
||||
backbone.layers.0.2: shuffle_3x3
|
||||
backbone.layers.0.3: shuffle_3x3
|
||||
backbone.layers.1.0: shuffle_xception
|
||||
backbone.layers.1.1: shuffle_3x3
|
||||
backbone.layers.1.2: shuffle_xception
|
||||
backbone.layers.1.3: shuffle_7x7
|
||||
backbone.layers.2.0: shuffle_7x7
|
||||
backbone.layers.2.1: shuffle_7x7
|
||||
backbone.layers.2.2: shuffle_xception
|
||||
backbone.layers.2.3: shuffle_xception
|
||||
backbone.layers.2.4: shuffle_3x3
|
||||
backbone.layers.2.5: shuffle_7x7
|
||||
backbone.layers.2.6: shuffle_5x5
|
||||
backbone.layers.2.7: shuffle_xception
|
||||
backbone.layers.3.0: shuffle_7x7
|
||||
backbone.layers.3.1: shuffle_7x7
|
||||
backbone.layers.3.2: shuffle_7x7
|
||||
backbone.layers.3.3: shuffle_5x5
|
||||
channels:
|
|
@ -1,8 +0,0 @@
|
|||
_base_ = ['./detnas_supernet_shufflenetv2_coco_1x_2.0_frcnn.py']
|
||||
|
||||
# FIXME: you may replace this with the mutable_cfg searched by yourself
|
||||
fix_subnet = 'configs/nas/detnas/DETNAS_FRCNN_SHUFFLENETV2_340M_COCO_MMRAZOR_2.0.yaml' # noqa: E501
|
||||
|
||||
model = dict(fix_subnet=fix_subnet)
|
||||
|
||||
find_unused_parameters = False
|
|
@ -1,87 +0,0 @@
|
|||
_base_ = [
|
||||
'mmdet::_base_/models/faster_rcnn_r50_fpn.py',
|
||||
'mmdet::_base_/datasets/coco_detection.py',
|
||||
'mmdet::_base_/schedules/schedule_1x.py',
|
||||
'mmdet::_base_/default_runtime.py'
|
||||
]
|
||||
|
||||
data_root = '/mnt/lustre/share_data/zhangwenwei/data/coco/'
|
||||
|
||||
_base_.train_dataloader.dataset.data_root = data_root
|
||||
|
||||
visualizer = None
|
||||
|
||||
log_level = 'INFO'
|
||||
load_from = '/mnt/lustre/dongpeijie/detnas_subnet_shufflenetv2_8xb128_in1k_acc-74.08_20211223-92e9b66a_2.0.pth' # noqa: E501
|
||||
resume = False
|
||||
|
||||
norm_cfg = dict(type='SyncBN', requires_grad=True)
|
||||
# model settings
|
||||
_STAGE_MUTABLE = dict(
|
||||
_scope_='mmrazor',
|
||||
type='mmrazor.OneShotMutableOP',
|
||||
candidates=dict(
|
||||
shuffle_3x3=dict(
|
||||
type='mmrazor.ShuffleBlock', kernel_size=3, norm_cfg=norm_cfg),
|
||||
shuffle_5x5=dict(
|
||||
type='mmrazor.ShuffleBlock', kernel_size=5, norm_cfg=norm_cfg),
|
||||
shuffle_7x7=dict(
|
||||
type='mmrazor.ShuffleBlock', kernel_size=7, norm_cfg=norm_cfg),
|
||||
shuffle_xception=dict(
|
||||
type='mmrazor.ShuffleXception', norm_cfg=norm_cfg),
|
||||
))
|
||||
|
||||
arch_setting = [
|
||||
# Parameters to build layers. 3 parameters are needed to construct a
|
||||
# layer, from left to right: channel, num_blocks, mutable_cfg.
|
||||
[64, 4, _STAGE_MUTABLE],
|
||||
[160, 4, _STAGE_MUTABLE],
|
||||
[320, 8, _STAGE_MUTABLE],
|
||||
[640, 4, _STAGE_MUTABLE],
|
||||
]
|
||||
|
||||
supernet = _base_.model
|
||||
|
||||
supernet.backbone = dict(
|
||||
type='mmrazor.SearchableShuffleNetV2',
|
||||
arch_setting=arch_setting,
|
||||
norm_cfg=norm_cfg,
|
||||
out_indices=(0, 1, 2, 3),
|
||||
widen_factor=1.0,
|
||||
with_last_layer=False)
|
||||
|
||||
supernet.neck = dict(
|
||||
type='FPN',
|
||||
norm_cfg=norm_cfg,
|
||||
in_channels=[64, 160, 320, 640],
|
||||
out_channels=256,
|
||||
num_outs=5)
|
||||
|
||||
supernet.roi_head.bbox_head = dict(
|
||||
type='Shared4Conv1FCBBoxHead',
|
||||
norm_cfg=norm_cfg,
|
||||
in_channels=256,
|
||||
fc_out_channels=1024,
|
||||
roi_feat_size=7,
|
||||
num_classes=80,
|
||||
bbox_coder=dict(
|
||||
type='DeltaXYWHBBoxCoder',
|
||||
target_means=[0., 0., 0., 0.],
|
||||
target_stds=[0.1, 0.1, 0.2, 0.2]),
|
||||
reg_class_agnostic=False,
|
||||
loss_cls=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
|
||||
loss_bbox=dict(type='L1Loss', loss_weight=1.0))
|
||||
|
||||
mutator = dict(type='mmrazor.OneShotModuleMutator')
|
||||
|
||||
fix_subnet = 'configs/nas/detnas/DETNAS_FRCNN_SHUFFLENETV2_340M_COCO_MMRAZOR_2.0.yaml' # noqa: E501
|
||||
|
||||
model = dict(
|
||||
_delete_=True,
|
||||
type='mmrazor.SPOS',
|
||||
architecture=supernet,
|
||||
mutator=mutator,
|
||||
fix_subnet=fix_subnet,
|
||||
)
|
||||
|
||||
find_unused_parameters = True
|
|
@ -1,114 +0,0 @@
|
|||
_base_ = [
|
||||
'mmdet::faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py',
|
||||
'mmdet::datasets/coco_detection.py', 'mmdet::schedules/schedule_1x.py',
|
||||
'mmdet::default_runtime.py'
|
||||
]
|
||||
|
||||
data_root = '/mnt/lustre/share_data/zhangwenwei/data/coco/'
|
||||
|
||||
train_dataloader = dict(dataset=dict(data_root=data_root, ))
|
||||
|
||||
visualizer = None
|
||||
# custom_hooks = [dict(type='DetVisualizationHook', interval=10)]
|
||||
|
||||
log_level = 'INFO'
|
||||
load_from = None
|
||||
resume = False
|
||||
|
||||
# TODO: support auto scaling lr
|
||||
|
||||
norm_cfg = dict(type='SyncBN', requires_grad=True)
|
||||
# model settings
|
||||
_STAGE_MUTABLE = dict(
|
||||
_scope_='mmrazor',
|
||||
type='mmrazor.OneShotMutableOP',
|
||||
candidates=dict(
|
||||
shuffle_3x3=dict(
|
||||
type='mmrazor.ShuffleBlock', kernel_size=3, norm_cfg=norm_cfg),
|
||||
shuffle_5x5=dict(
|
||||
type='mmrazor.ShuffleBlock', kernel_size=5, norm_cfg=norm_cfg),
|
||||
shuffle_7x7=dict(
|
||||
type='mmrazor.ShuffleBlock', kernel_size=7, norm_cfg=norm_cfg),
|
||||
shuffle_xception=dict(
|
||||
type='mmrazor.ShuffleXception', norm_cfg=norm_cfg),
|
||||
))
|
||||
|
||||
arch_setting = [
|
||||
# Parameters to build layers. 3 parameters are needed to construct a
|
||||
# layer, from left to right: channel, num_blocks, mutable_cfg.
|
||||
[64, 4, _STAGE_MUTABLE],
|
||||
[160, 4, _STAGE_MUTABLE],
|
||||
[320, 8, _STAGE_MUTABLE],
|
||||
[640, 4, _STAGE_MUTABLE],
|
||||
]
|
||||
|
||||
supernet = dict(
|
||||
type='RetinaNet',
|
||||
data_preprocessor=dict(
|
||||
type='DetDataPreprocessor',
|
||||
mean=[123.675, 116.28, 103.53],
|
||||
std=[58.395, 57.12, 57.375],
|
||||
bgr_to_rgb=True,
|
||||
pad_size_divisor=32),
|
||||
backbone=dict(
|
||||
type='mmrazor.SearchableShuffleNetV2',
|
||||
arch_setting=arch_setting,
|
||||
norm_cfg=norm_cfg,
|
||||
out_indices=(0, 1, 2, 3),
|
||||
widen_factor=1.0,
|
||||
with_last_layer=False),
|
||||
neck=dict(
|
||||
type='FPN',
|
||||
in_channels=[64, 160, 320, 640],
|
||||
out_channels=256,
|
||||
num_outs=5),
|
||||
bbox_head=dict(
|
||||
type='RetinaHead',
|
||||
num_classes=80,
|
||||
in_channels=256,
|
||||
stacked_convs=4,
|
||||
feat_channels=256,
|
||||
anchor_generator=dict(
|
||||
type='AnchorGenerator',
|
||||
octave_base_scale=4,
|
||||
scales_per_octave=3,
|
||||
ratios=[0.5, 1.0, 2.0],
|
||||
strides=[8, 16, 32, 64, 128]),
|
||||
bbox_coder=dict(
|
||||
type='DeltaXYWHBBoxCoder',
|
||||
target_means=[.0, .0, .0, .0],
|
||||
target_stds=[1.0, 1.0, 1.0, 1.0]),
|
||||
loss_cls=dict(
|
||||
type='FocalLoss',
|
||||
use_sigmoid=True,
|
||||
gamma=2.0,
|
||||
alpha=0.25,
|
||||
loss_weight=1.0),
|
||||
loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
|
||||
# model training and testing settings
|
||||
train_cfg=dict(
|
||||
assigner=dict(
|
||||
type='MaxIoUAssigner',
|
||||
pos_iou_thr=0.5,
|
||||
neg_iou_thr=0.4,
|
||||
min_pos_iou=0,
|
||||
ignore_iof_thr=-1),
|
||||
allowed_border=-1,
|
||||
pos_weight=-1,
|
||||
debug=False),
|
||||
test_cfg=dict(
|
||||
nms_pre=1000,
|
||||
min_bbox_size=0,
|
||||
score_thr=0.05,
|
||||
nms=dict(type='nms', iou_threshold=0.5),
|
||||
max_per_img=100))
|
||||
|
||||
mutator = dict(type='mmrazor.OneShotModuleMutator')
|
||||
|
||||
model = dict(
|
||||
type='mmrazor.SPOS',
|
||||
architecture=supernet,
|
||||
mutator=mutator,
|
||||
)
|
||||
|
||||
find_unused_parameters = True
|
|
@ -6,24 +6,23 @@ Collections:
|
|||
Paper:
|
||||
URL: https://arxiv.org/abs/1904.00420
|
||||
Title: Single Path One-Shot Neural Architecture Search with Uniform Sampling
|
||||
README: configs/nas/spos/README.md
|
||||
README: configs/nas/mmcls/spos/README.md
|
||||
Code:
|
||||
URL: https://github.com/open-mmlab/mmrazor/blob/v0.1.0/mmrazor/models/algorithms/spos.py
|
||||
Version: v0.1.0
|
||||
Converted From:
|
||||
Code: https://github.com/megvii-model/SinglePathOneShot
|
||||
Models:
|
||||
- Name: spos_subnet_shufflenetv2_8xb128_in1k
|
||||
- Name: spos_shufflenet_subnet_8xb128_in1k
|
||||
In Collection: SPOS
|
||||
Metadata:
|
||||
FLOPs: 330 MB
|
||||
Supernet: ShuffleNetV2
|
||||
Mutable: https://download.openmmlab.com/mmrazor/v0.1/nas/spos/spos_shufflenetv2_subnet_8xb128_in1k/spos_shufflenetv2_subnet_8xb128_in1k_flops_0.33M_acc_73.87_20211222-454627be_mutable_cfg.yaml
|
||||
Subnet: https://download.openmmlab.com/mmrazor/v0.1/nas/spos/spos_shufflenetv2_subnet_8xb128_in1k/spos_shufflenetv2_subnet_8xb128_in1k_flops_0.33M_acc_73.87_20211222-454627be_mutable_cfg.yaml
|
||||
Results:
|
||||
- Task: Image Classification
|
||||
Dataset: ImageNet-1k
|
||||
Metrics:
|
||||
Top 1 Accuracy: 73.87
|
||||
Top 5 Accuracy: 91.60
|
||||
Config: configs/nas/spos/spos_subnet_shufflenetv2_8xb128_in1k.py
|
||||
Config: configs/nas/mmcls/spos/spos_shufflenet_subnet_8xb128_in1k.py
|
||||
Weights: https://download.openmmlab.com/mmrazor/v0.1/nas/spos/spos_shufflenetv2_subnet_8xb128_in1k/spos_shufflenetv2_subnet_8xb128_in1k_flops_0.33M_acc_73.87_20211222-1f0a0b4d.pth
|
|
@ -0,0 +1,30 @@
|
|||
_base_ = [
|
||||
'mmrazor::_base_/settings/imagenet_bs1024_spos.py',
|
||||
'mmrazor::_base_/nas_backbones/spos_mobilenet_supernet.py',
|
||||
'mmcls::_base_/default_runtime.py',
|
||||
]
|
||||
|
||||
# model
|
||||
supernet = dict(
|
||||
type='ImageClassifier',
|
||||
# data_preprocessor=_base_.preprocess_cfg,
|
||||
backbone=_base_.nas_backbone,
|
||||
neck=dict(type='GlobalAveragePooling'),
|
||||
head=dict(
|
||||
type='LinearClsHead',
|
||||
num_classes=1000,
|
||||
in_channels=1024,
|
||||
loss=dict(
|
||||
type='LabelSmoothLoss',
|
||||
num_classes=1000,
|
||||
label_smooth_val=0.1,
|
||||
mode='original',
|
||||
loss_weight=1.0),
|
||||
topk=(1, 5)))
|
||||
|
||||
model = dict(
|
||||
type='mmrazor.SPOS',
|
||||
architecture=supernet,
|
||||
mutator=dict(type='mmrazor.OneShotModuleMutator'))
|
||||
|
||||
find_unused_parameters = True
|
|
@ -0,0 +1,30 @@
|
|||
_base_ = [
|
||||
'mmrazor::_base_/settings/imagenet_bs1024_spos.py',
|
||||
'mmrazor::_base_/nas_backbones/spos_shufflenet_supernet.py',
|
||||
'mmcls::_base_/default_runtime.py',
|
||||
]
|
||||
|
||||
# model
|
||||
supernet = dict(
|
||||
type='ImageClassifier',
|
||||
# data_preprocessor=_base_.preprocess_cfg,
|
||||
backbone=_base_.nas_backbone,
|
||||
neck=dict(type='GlobalAveragePooling'),
|
||||
head=dict(
|
||||
type='LinearClsHead',
|
||||
num_classes=1000,
|
||||
in_channels=1024,
|
||||
loss=dict(
|
||||
type='LabelSmoothLoss',
|
||||
num_classes=1000,
|
||||
label_smooth_val=0.1,
|
||||
mode='original',
|
||||
loss_weight=1.0),
|
||||
topk=(1, 5)))
|
||||
|
||||
model = dict(
|
||||
type='mmrazor.SPOS',
|
||||
architecture=supernet,
|
||||
mutator=dict(type='mmrazor.OneShotModuleMutator'))
|
||||
|
||||
find_unused_parameters = True
|
|
@ -0,0 +1,9 @@
|
|||
_base_ = ['./spos_supernet_frcnn_shufflenet_coco_1x.py']
|
||||
|
||||
# FIXME: you may replace this with the mutable_cfg searched by yourself
|
||||
# fix_subnet = 'configs/nas/spos/SPOS_SHUFFLENETV2_330M_IN1k_PAPER_2.0.yaml' # noqa: E501
|
||||
fix_subnet = 'configs/nas/detnas/DetNAS_SPOS_SHUFFLENETV2_330M_IN1k_PAPER_2.0.yaml' # noqa: E501
|
||||
|
||||
model = dict(fix_subnet=fix_subnet)
|
||||
|
||||
find_unused_parameters = False
|
|
@ -0,0 +1,29 @@
|
|||
_base_ = [
|
||||
'mmdet::_base_/models/faster_rcnn_r50_fpn.py',
|
||||
'mmdet::_base_/datasets/coco_detection.py',
|
||||
'mmdet::_base_/schedules/schedule_1x.py',
|
||||
'mmdet::_base_/default_runtime.py',
|
||||
'mmrazor::_base_/nas_backbones/spos_shufflenet_supernet.py'
|
||||
]
|
||||
|
||||
norm_cfg = dict(type='SyncBN', requires_grad=True)
|
||||
|
||||
supernet = _base_.model
|
||||
|
||||
supernet.backbone = _base_.nas_backbone
|
||||
supernet.backbone.norm_cfg = norm_cfg
|
||||
supernet.backbone.out_indices = (0, 1, 2, 3)
|
||||
supernet.backbone.with_last_layer = False
|
||||
|
||||
supernet.neck.norm_cfg = norm_cfg
|
||||
supernet.neck.in_channels = [64, 160, 320, 640]
|
||||
|
||||
supernet.roi_head.bbox_head.norm_cfg = norm_cfg
|
||||
|
||||
model = dict(
|
||||
_delete_=True,
|
||||
type='mmrazor.SPOS',
|
||||
architecture=supernet,
|
||||
mutator=dict(type='mmrazor.OneShotModuleMutator'))
|
||||
|
||||
find_unused_parameters = True
|
|
@ -0,0 +1,27 @@
|
|||
_base_ = [
|
||||
'mmdet::_base_/models/retinanet_r50_fpn.py',
|
||||
'mmdet::_base_/datasets/coco_detection.py',
|
||||
'mmdet::_base_/schedules/schedule_1x.py',
|
||||
'mmdet::_base_/default_runtime.py',
|
||||
'mmrazor::_base_/nas_backbones/spos_shufflenet_supernet.py'
|
||||
]
|
||||
|
||||
norm_cfg = dict(type='SyncBN', requires_grad=True)
|
||||
|
||||
supernet = _base_.model
|
||||
|
||||
supernet.backbone = _base_.nas_backbone
|
||||
supernet.backbone.norm_cfg = norm_cfg
|
||||
supernet.backbone.out_indices = (0, 1, 2, 3)
|
||||
supernet.backbone.with_last_layer = False
|
||||
|
||||
supernet.neck.norm_cfg = norm_cfg
|
||||
supernet.neck.in_channels = [64, 160, 320, 640]
|
||||
|
||||
model = dict(
|
||||
_delete_=True,
|
||||
type='mmrazor.SPOS',
|
||||
architecture=supernet,
|
||||
mutator=dict(type='mmrazor.OneShotModuleMutator'))
|
||||
|
||||
find_unused_parameters = True
|
|
@ -0,0 +1,9 @@
|
|||
_base_ = './detnas_shufflenet_supernet_8xb128_in1k.py'
|
||||
|
||||
# FIXME: you may replace this with the mutable_cfg searched by yourself
|
||||
# fix_subnet = 'configs/nas/spos/SPOS_SHUFFLENETV2_330M_IN1k_PAPER_2.0.yaml' # noqa: E501
|
||||
fix_subnet = 'configs/nas/detnas/DetNAS_SPOS_SHUFFLENETV2_330M_IN1k_PAPER_2.0.yaml' # noqa: E501
|
||||
|
||||
model = dict(fix_subnet=fix_subnet)
|
||||
|
||||
find_unused_parameters = False
|
|
@ -0,0 +1 @@
|
|||
_base_ = 'mmrazor::nas/mmcls/spos/shufflenet/spos_shufflenet_supernet_8xb128_in1k.py' # noqa: E501
|
|
@ -7,14 +7,14 @@ Collections:
|
|||
Paper:
|
||||
URL: https://arxiv.org/abs/1903.10979
|
||||
Title: DetNAS:Backbone Search for Object Detection
|
||||
README: configs/nas/detnas/README.md
|
||||
README: configs/nas/mmdet/detnas/README.md
|
||||
Code:
|
||||
URL: https://github.com/open-mmlab/mmrazor/blob/v0.1.0/mmrazor/models/algorithms/detnas.py
|
||||
Version: v0.1.0
|
||||
Converted From:
|
||||
Code: https://github.com/megvii-model/DetNAS
|
||||
Models:
|
||||
- Name: detnas_subnet_frcnn_shufflenetv2_fpn_1x_coco
|
||||
- Name: detnas_frcnn_shufflenet_subnet_coco_1x
|
||||
In Collection: DetNAS
|
||||
Metadata:
|
||||
FLOPs(Backbone): 340 MB
|
||||
|
@ -26,5 +26,5 @@ Models:
|
|||
Dataset: COCO
|
||||
Metrics:
|
||||
box AP: 37.5
|
||||
Config: configs/nas/detnas/detnas_subnet_frcnn_shufflenetv2_fpn_1x_coco.py
|
||||
Config: configs/nas/mmdet/detnas/detnas_frcnn_shufflenet_subnet_coco_1x.py
|
||||
Weights: https://download.openmmlab.com/mmrazor/v0.1/nas/detnas/detnas_subnet_frcnn_shufflenetv2_fpn_1x_coco/detnas_subnet_frcnn_shufflenetv2_fpn_1x_coco_bbox_backbone_flops-0.34M_mAP-37.5_20211222-67fea61f_mutable_cfg.yaml
|
|
@ -1,24 +0,0 @@
|
|||
modules:
|
||||
backbone.layer1.0: mb_k3e1
|
||||
backbone.layer2.0: mb_k5e3
|
||||
backbone.layer2.1: mb_k5e3
|
||||
backbone.layer2.2: identity
|
||||
backbone.layer2.3: mb_k3e3
|
||||
backbone.layer3.0: mb_k3e3
|
||||
backbone.layer3.1: identity
|
||||
backbone.layer3.2: identity
|
||||
backbone.layer3.3: mb_k3e3
|
||||
backbone.layer4.0: mb_k7e6
|
||||
backbone.layer4.1: identity
|
||||
backbone.layer4.2: mb_k7e3
|
||||
backbone.layer4.3: mb_k7e3
|
||||
backbone.layer5.0: mb_k3e3
|
||||
backbone.layer5.1: mb_k3e3
|
||||
backbone.layer5.2: mb_k7e3
|
||||
backbone.layer5.3: mb_k5e3
|
||||
backbone.layer6.0: mb_k5e6
|
||||
backbone.layer6.1: mb_k7e3
|
||||
backbone.layer6.2: mb_k7e3
|
||||
backbone.layer6.3: mb_k7e3
|
||||
backbone.layer7.0: mb_k5e6
|
||||
channels:
|
|
@ -1,22 +0,0 @@
|
|||
modules:
|
||||
backbone.layers.0.0: shuffle_7x7
|
||||
backbone.layers.0.1: shuffle_3x3
|
||||
backbone.layers.0.2: shuffle_7x7
|
||||
backbone.layers.0.3: shuffle_3x3
|
||||
backbone.layers.1.0: shuffle_xception
|
||||
backbone.layers.1.1: shuffle_5x5
|
||||
backbone.layers.1.2: shuffle_5x5
|
||||
backbone.layers.1.3: shuffle_3x3
|
||||
backbone.layers.2.0: shuffle_3x3
|
||||
backbone.layers.2.1: shuffle_5x5
|
||||
backbone.layers.2.2: shuffle_3x3
|
||||
backbone.layers.2.3: shuffle_5x5
|
||||
backbone.layers.2.4: shuffle_3x3
|
||||
backbone.layers.2.5: shuffle_xception
|
||||
backbone.layers.2.6: shuffle_5x5
|
||||
backbone.layers.2.7: shuffle_7x7
|
||||
backbone.layers.3.0: shuffle_7x7
|
||||
backbone.layers.3.1: shuffle_3x3
|
||||
backbone.layers.3.2: shuffle_5x5
|
||||
backbone.layers.3.3: shuffle_xception
|
||||
channels:
|
|
@ -1,245 +0,0 @@
|
|||
# dataset settings
|
||||
dataset_type = 'ImageNet'
|
||||
preprocess_cfg = dict(
|
||||
# RGB format normalization parameters
|
||||
mean=[0., 0., 0.],
|
||||
std=[1., 1., 1.],
|
||||
# convert image from BGR to RGB
|
||||
to_rgb=False,
|
||||
)
|
||||
|
||||
file_client_args = dict(
|
||||
backend='petrel',
|
||||
path_mapping=dict({
|
||||
'./data/imagenet':
|
||||
'sproject:s3://openmmlab/datasets/classification/imagenet',
|
||||
'data/imagenet':
|
||||
'sproject:s3://openmmlab/datasets/classification/imagenet'
|
||||
}))
|
||||
|
||||
train_pipeline = [
|
||||
dict(type='LoadImageFromFile', file_client_args=file_client_args),
|
||||
dict(type='RandomResizedCrop', scale=224),
|
||||
dict(type='ColorJitter', brightness=0.4, contrast=0.4, saturation=0.4),
|
||||
dict(type='RandomFlip', prob=0.5, direction='horizontal'),
|
||||
dict(type='PackClsInputs'),
|
||||
]
|
||||
|
||||
test_pipeline = [
|
||||
dict(type='LoadImageFromFile', file_client_args=file_client_args),
|
||||
dict(
|
||||
type='ResizeEdge',
|
||||
scale=256,
|
||||
edge='short',
|
||||
backend='pillow',
|
||||
interpolation='bicubic'),
|
||||
dict(type='CenterCrop', crop_size=224),
|
||||
dict(type='PackClsInputs'),
|
||||
]
|
||||
|
||||
train_dataloader = dict(
|
||||
batch_size=128,
|
||||
num_workers=8,
|
||||
dataset=dict(
|
||||
type=dataset_type,
|
||||
data_root='/mnt/cache/share/images',
|
||||
ann_file='meta/train.txt',
|
||||
data_prefix='train',
|
||||
pipeline=train_pipeline),
|
||||
sampler=dict(type='DefaultSampler', shuffle=True),
|
||||
persistent_workers=True,
|
||||
)
|
||||
|
||||
# /mnt/lustre/share_data/wangjiaqi/data/imagenet',
|
||||
|
||||
val_dataloader = dict(
|
||||
batch_size=128,
|
||||
num_workers=8,
|
||||
dataset=dict(
|
||||
type=dataset_type,
|
||||
data_root='/mnt/cache/share/images',
|
||||
ann_file='meta/val.txt',
|
||||
data_prefix='val',
|
||||
pipeline=test_pipeline),
|
||||
sampler=dict(type='DefaultSampler', shuffle=False),
|
||||
persistent_workers=True,
|
||||
)
|
||||
val_evaluator = dict(type='Accuracy', topk=(1, 5))
|
||||
|
||||
# If you want standard test, please manually configure the test dataset
|
||||
test_dataloader = val_dataloader
|
||||
test_evaluator = val_evaluator
|
||||
|
||||
# scheduler
|
||||
|
||||
# optimizer
|
||||
optim_wrapper = dict(
|
||||
optimizer=dict(type='SGD', lr=0.5, momentum=0.9, weight_decay=4e-5),
|
||||
clip_grad=None)
|
||||
|
||||
# leanring policy
|
||||
param_scheduler = [
|
||||
dict(type='PolyLR', power=1.0, eta_min=0.0, by_epoch=False, end=300000),
|
||||
]
|
||||
|
||||
# train, val, test setting
|
||||
train_cfg = dict(by_epoch=False, max_iters=300000)
|
||||
val_cfg = dict()
|
||||
test_cfg = dict()
|
||||
|
||||
# runtime
|
||||
|
||||
# defaults to use registries in mmrazor
|
||||
default_scope = 'mmcls'
|
||||
|
||||
log_processor = dict(
|
||||
window_size=100,
|
||||
by_epoch=False,
|
||||
custom_cfg=[
|
||||
dict(
|
||||
data_src='loss',
|
||||
log_name='loss_large_window',
|
||||
method_name='mean',
|
||||
window_size=100)
|
||||
])
|
||||
|
||||
# configure default hooks
|
||||
default_hooks = dict(
|
||||
timer=dict(type='IterTimerHook'),
|
||||
logger=dict(type='LoggerHook', interval=100),
|
||||
param_scheduler=dict(type='ParamSchedulerHook'),
|
||||
checkpoint=dict(
|
||||
type='CheckpointHook',
|
||||
by_epoch=False,
|
||||
interval=10000,
|
||||
save_last=True,
|
||||
max_keep_ckpts=3),
|
||||
sampler_seed=dict(type='DistSamplerSeedHook'),
|
||||
visualization=dict(type='VisualizationHook', enable=False),
|
||||
)
|
||||
|
||||
# configure environment
|
||||
env_cfg = dict(
|
||||
cudnn_benchmark=False,
|
||||
mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0),
|
||||
dist_cfg=dict(backend='nccl'),
|
||||
)
|
||||
|
||||
# set visualizer
|
||||
visualizer = None
|
||||
# dict(type='ClsVisualizer', vis_backends=vis_backends, name='visualizer')
|
||||
# vis_backends = [dict(type='LocalVisBackend')]
|
||||
|
||||
# set log level
|
||||
log_level = 'INFO'
|
||||
|
||||
# load from which checkpoint
|
||||
load_from = None
|
||||
|
||||
# whether to resume training from the loaded checkpoint
|
||||
resume = False
|
||||
|
||||
# model
|
||||
norm_cfg = dict(type='BN')
|
||||
_STAGE_MUTABLE = dict(
|
||||
_scope_='mmrazor',
|
||||
type='OneShotMutableOP',
|
||||
candidates=dict(
|
||||
mb_k3e3=dict(
|
||||
type='MBBlock',
|
||||
kernel_size=3,
|
||||
expand_ratio=3,
|
||||
norm_cfg=norm_cfg,
|
||||
act_cfg=dict(type='ReLU6')),
|
||||
mb_k5e3=dict(
|
||||
type='MBBlock',
|
||||
kernel_size=5,
|
||||
expand_ratio=3,
|
||||
norm_cfg=norm_cfg,
|
||||
act_cfg=dict(type='ReLU6')),
|
||||
mb_k7e3=dict(
|
||||
type='MBBlock',
|
||||
kernel_size=7,
|
||||
expand_ratio=3,
|
||||
norm_cfg=norm_cfg,
|
||||
act_cfg=dict(type='ReLU6')),
|
||||
mb_k3e6=dict(
|
||||
type='MBBlock',
|
||||
kernel_size=3,
|
||||
expand_ratio=6,
|
||||
norm_cfg=norm_cfg,
|
||||
act_cfg=dict(type='ReLU6')),
|
||||
mb_k5e6=dict(
|
||||
type='MBBlock',
|
||||
kernel_size=5,
|
||||
expand_ratio=6,
|
||||
norm_cfg=norm_cfg,
|
||||
act_cfg=dict(type='ReLU6')),
|
||||
mb_k7e6=dict(
|
||||
type='MBBlock',
|
||||
kernel_size=7,
|
||||
expand_ratio=6,
|
||||
norm_cfg=norm_cfg,
|
||||
act_cfg=dict(type='ReLU6')),
|
||||
identity=dict(type='Identity'),
|
||||
))
|
||||
|
||||
_FIRST_MUTABLE = dict(
|
||||
_scope_='mmrazor',
|
||||
type='OneShotMutableOP',
|
||||
candidates=dict(
|
||||
mb_k3e1=dict(
|
||||
type='MBBlock',
|
||||
kernel_size=3,
|
||||
expand_ratio=1,
|
||||
norm_cfg=norm_cfg,
|
||||
act_cfg=dict(type='ReLU6')), ))
|
||||
|
||||
arch_setting = [
|
||||
# Parameters to build layers. 3 parameters are needed to construct a
|
||||
# layer, from left to right: channel, num_blocks, mutable_cfg.
|
||||
[24, 1, 1, _FIRST_MUTABLE],
|
||||
[32, 4, 2, _STAGE_MUTABLE],
|
||||
[56, 4, 2, _STAGE_MUTABLE],
|
||||
[112, 4, 2, _STAGE_MUTABLE],
|
||||
[128, 4, 1, _STAGE_MUTABLE],
|
||||
[256, 4, 2, _STAGE_MUTABLE],
|
||||
[432, 1, 1, _STAGE_MUTABLE]
|
||||
]
|
||||
|
||||
norm_cfg = dict(type='BN')
|
||||
supernet = dict(
|
||||
type='ImageClassifier',
|
||||
data_preprocessor=preprocess_cfg,
|
||||
backbone=dict(
|
||||
_scope_='mmrazor',
|
||||
type='SearchableMobileNet',
|
||||
first_channels=40,
|
||||
last_channels=1728,
|
||||
widen_factor=1.0,
|
||||
norm_cfg=norm_cfg,
|
||||
arch_setting=arch_setting),
|
||||
neck=dict(type='GlobalAveragePooling'),
|
||||
head=dict(
|
||||
type='LinearClsHead',
|
||||
num_classes=1000,
|
||||
in_channels=1728,
|
||||
loss=dict(
|
||||
type='LabelSmoothLoss',
|
||||
num_classes=1000,
|
||||
label_smooth_val=0.1,
|
||||
mode='original',
|
||||
loss_weight=1.0),
|
||||
topk=(1, 5),
|
||||
),
|
||||
)
|
||||
|
||||
mutator = dict(type='mmrazor.OneShotModuleMutator')
|
||||
|
||||
model = dict(
|
||||
type='mmrazor.SPOS',
|
||||
architecture=supernet,
|
||||
mutator=mutator,
|
||||
)
|
||||
|
||||
find_unused_parameters = True
|
|
@ -1,214 +0,0 @@
|
|||
# dataset settings
|
||||
dataset_type = 'ImageNet'
|
||||
preprocess_cfg = dict(
|
||||
# RGB format normalization parameters
|
||||
mean=[123.675, 116.28, 103.53],
|
||||
std=[58.395, 57.12, 57.375],
|
||||
# convert image from BGR to RGB
|
||||
to_rgb=True,
|
||||
)
|
||||
|
||||
file_client_args = dict(
|
||||
backend='petrel',
|
||||
path_mapping=dict({
|
||||
'./data/imagenet':
|
||||
'sproject:s3://openmmlab/datasets/classification/imagenet',
|
||||
'data/imagenet':
|
||||
'sproject:s3://openmmlab/datasets/classification/imagenet'
|
||||
}))
|
||||
|
||||
train_pipeline = [
|
||||
dict(type='LoadImageFromFile', file_client_args=file_client_args),
|
||||
dict(type='RandomResizedCrop', scale=224),
|
||||
dict(type='ColorJitter', brightness=0.4, contrast=0.4, saturation=0.4),
|
||||
dict(type='RandomFlip', prob=0.5, direction='horizontal'),
|
||||
dict(type='PackClsInputs'),
|
||||
]
|
||||
|
||||
test_pipeline = [
|
||||
dict(type='LoadImageFromFile', file_client_args=file_client_args),
|
||||
dict(type='ResizeEdge', scale=256, edge='short', backend='cv2'),
|
||||
dict(type='CenterCrop', crop_size=224),
|
||||
dict(type='PackClsInputs'),
|
||||
]
|
||||
|
||||
train_dataloader = dict(
|
||||
batch_size=128,
|
||||
num_workers=5,
|
||||
dataset=dict(
|
||||
type=dataset_type,
|
||||
data_root='/mnt/cache/share/images',
|
||||
ann_file='meta/train.txt',
|
||||
data_prefix='train',
|
||||
pipeline=train_pipeline),
|
||||
sampler=dict(type='DefaultSampler', shuffle=True),
|
||||
persistent_workers=True,
|
||||
)
|
||||
|
||||
# /mnt/lustre/share_data/wangjiaqi/data/imagenet',
|
||||
|
||||
val_dataloader = dict(
|
||||
batch_size=128,
|
||||
num_workers=5,
|
||||
dataset=dict(
|
||||
type=dataset_type,
|
||||
data_root='/mnt/cache/share/images',
|
||||
ann_file='meta/val.txt',
|
||||
data_prefix='val',
|
||||
pipeline=test_pipeline),
|
||||
sampler=dict(type='DefaultSampler', shuffle=False),
|
||||
persistent_workers=True,
|
||||
)
|
||||
val_evaluator = dict(type='Accuracy', topk=(1, 5))
|
||||
|
||||
# If you want standard test, please manually configure the test dataset
|
||||
test_dataloader = val_dataloader
|
||||
test_evaluator = val_evaluator
|
||||
|
||||
# scheduler
|
||||
|
||||
# optimizer
|
||||
optim_wrapper = dict(
|
||||
optimizer=dict(type='SGD', lr=0.5, momentum=0.9, weight_decay=4e-5),
|
||||
clip_grad=None)
|
||||
|
||||
# leanring policy
|
||||
param_scheduler = [
|
||||
dict(type='PolyLR', power=1.0, eta_min=0.0, by_epoch=False, end=300000),
|
||||
]
|
||||
|
||||
# train, val, test setting
|
||||
train_cfg = dict(by_epoch=False, max_iters=300000)
|
||||
val_cfg = dict()
|
||||
test_cfg = dict()
|
||||
|
||||
# runtime
|
||||
|
||||
# defaults to use registries in mmrazor
|
||||
default_scope = 'mmcls'
|
||||
|
||||
log_processor = dict(
|
||||
window_size=100,
|
||||
by_epoch=False,
|
||||
custom_cfg=[
|
||||
dict(
|
||||
data_src='loss',
|
||||
log_name='loss_large_window',
|
||||
method_name='mean',
|
||||
window_size=100)
|
||||
])
|
||||
|
||||
# configure default hooks
|
||||
default_hooks = dict(
|
||||
# record the time of every iteration.
|
||||
timer=dict(type='IterTimerHook'),
|
||||
|
||||
# print log every 100 iterations.
|
||||
logger=dict(type='LoggerHook', interval=100),
|
||||
|
||||
# enable the parameter scheduler.
|
||||
param_scheduler=dict(type='ParamSchedulerHook'),
|
||||
|
||||
# save checkpoint per epoch.
|
||||
checkpoint=dict(
|
||||
type='CheckpointHook',
|
||||
by_epoch=False,
|
||||
interval=10000,
|
||||
save_last=True,
|
||||
max_keep_ckpts=3),
|
||||
|
||||
# set sampler seed in distributed evrionment.
|
||||
sampler_seed=dict(type='DistSamplerSeedHook'),
|
||||
|
||||
# validation results visualization, set True to enable it.
|
||||
visualization=dict(type='VisualizationHook', enable=False),
|
||||
)
|
||||
|
||||
# configure environment
|
||||
env_cfg = dict(
|
||||
# whether to enable cudnn benchmark
|
||||
cudnn_benchmark=False,
|
||||
|
||||
# set multi process parameters
|
||||
mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0),
|
||||
|
||||
# set distributed parameters
|
||||
dist_cfg=dict(backend='nccl'),
|
||||
)
|
||||
|
||||
# set visualizer
|
||||
visualizer = None
|
||||
# dict(type='ClsVisualizer', vis_backends=vis_backends, name='visualizer')
|
||||
# vis_backends = [dict(type='LocalVisBackend')]
|
||||
|
||||
# set log level
|
||||
log_level = 'INFO'
|
||||
|
||||
# load from which checkpoint
|
||||
load_from = None
|
||||
|
||||
# "/mnt/lustre/dongpeijie/spos_shufflenetv2_subnet_8xb128_in1k_flops_0.33M_acc_73.87_20211222-1f0a0b4d.pth"
|
||||
|
||||
# whether to resume training from the loaded checkpoint
|
||||
resume = False
|
||||
|
||||
# model
|
||||
|
||||
_STAGE_MUTABLE = dict(
|
||||
_scope_='mmrazor',
|
||||
type='OneShotMutableOP',
|
||||
candidates=dict(
|
||||
shuffle_3x3=dict(
|
||||
type='ShuffleBlock', kernel_size=3, norm_cfg=dict(type='BN')),
|
||||
shuffle_5x5=dict(
|
||||
type='ShuffleBlock', kernel_size=5, norm_cfg=dict(type='BN')),
|
||||
shuffle_7x7=dict(
|
||||
type='ShuffleBlock', kernel_size=7, norm_cfg=dict(type='BN')),
|
||||
shuffle_xception=dict(
|
||||
type='ShuffleXception', norm_cfg=dict(type='BN')),
|
||||
))
|
||||
|
||||
arch_setting = [
|
||||
# Parameters to build layers. 3 parameters are needed to construct a
|
||||
# layer, from left to right: channel, num_blocks, mutable_cfg.
|
||||
[64, 4, _STAGE_MUTABLE],
|
||||
[160, 4, _STAGE_MUTABLE],
|
||||
[320, 8, _STAGE_MUTABLE],
|
||||
[640, 4, _STAGE_MUTABLE],
|
||||
]
|
||||
|
||||
norm_cfg = dict(type='BN')
|
||||
supernet = dict(
|
||||
type='ImageClassifier',
|
||||
data_preprocessor=preprocess_cfg,
|
||||
backbone=dict(
|
||||
_scope_='mmrazor',
|
||||
type='SearchableShuffleNetV2',
|
||||
widen_factor=1.0,
|
||||
norm_cfg=norm_cfg,
|
||||
arch_setting=arch_setting),
|
||||
neck=dict(type='GlobalAveragePooling'),
|
||||
head=dict(
|
||||
type='LinearClsHead',
|
||||
num_classes=1000,
|
||||
in_channels=1024,
|
||||
loss=dict(
|
||||
type='LabelSmoothLoss',
|
||||
num_classes=1000,
|
||||
label_smooth_val=0.1,
|
||||
mode='original',
|
||||
loss_weight=1.0),
|
||||
topk=(1, 5),
|
||||
),
|
||||
)
|
||||
|
||||
mutator = dict(type='mmrazor.OneShotModuleMutator')
|
||||
|
||||
model = dict(
|
||||
type='mmrazor.SPOS',
|
||||
architecture=supernet,
|
||||
mutator=mutator,
|
||||
# fix_subnet='configs/nas/spos/SPOS_SHUFFLENETV2_330M_IN1k_PAPER_2.0.yaml'
|
||||
)
|
||||
|
||||
find_unused_parameters = True
|
|
@ -1,421 +0,0 @@
|
|||
backbone.conv1.bn:
|
||||
out_channels: 8
|
||||
raw_out_channels: 48
|
||||
backbone.conv1.conv:
|
||||
in_channels: 3
|
||||
out_channels: 8
|
||||
raw_in_channels: 3
|
||||
raw_out_channels: 48
|
||||
backbone.conv2.bn:
|
||||
out_channels: 1920
|
||||
raw_out_channels: 1920
|
||||
backbone.conv2.conv:
|
||||
in_channels: 280
|
||||
out_channels: 1920
|
||||
raw_in_channels: 480
|
||||
raw_out_channels: 1920
|
||||
backbone.layer1.0.conv.0.bn:
|
||||
out_channels: 8
|
||||
raw_out_channels: 48
|
||||
backbone.layer1.0.conv.0.conv:
|
||||
in_channels: 8
|
||||
out_channels: 8
|
||||
raw_in_channels: 48
|
||||
raw_out_channels: 48
|
||||
backbone.layer1.0.conv.1.bn:
|
||||
out_channels: 8
|
||||
raw_out_channels: 24
|
||||
backbone.layer1.0.conv.1.conv:
|
||||
in_channels: 8
|
||||
out_channels: 8
|
||||
raw_in_channels: 48
|
||||
raw_out_channels: 24
|
||||
backbone.layer2.0.conv.0.bn:
|
||||
out_channels: 96
|
||||
raw_out_channels: 144
|
||||
backbone.layer2.0.conv.0.conv:
|
||||
in_channels: 8
|
||||
out_channels: 96
|
||||
raw_in_channels: 24
|
||||
raw_out_channels: 144
|
||||
backbone.layer2.0.conv.1.bn:
|
||||
out_channels: 96
|
||||
raw_out_channels: 144
|
||||
backbone.layer2.0.conv.1.conv:
|
||||
in_channels: 96
|
||||
out_channels: 96
|
||||
raw_in_channels: 144
|
||||
raw_out_channels: 144
|
||||
backbone.layer2.0.conv.2.bn:
|
||||
out_channels: 16
|
||||
raw_out_channels: 40
|
||||
backbone.layer2.0.conv.2.conv:
|
||||
in_channels: 96
|
||||
out_channels: 16
|
||||
raw_in_channels: 144
|
||||
raw_out_channels: 40
|
||||
backbone.layer2.1.conv.0.bn:
|
||||
out_channels: 96
|
||||
raw_out_channels: 240
|
||||
backbone.layer2.1.conv.0.conv:
|
||||
in_channels: 16
|
||||
out_channels: 96
|
||||
raw_in_channels: 40
|
||||
raw_out_channels: 240
|
||||
backbone.layer2.1.conv.1.bn:
|
||||
out_channels: 96
|
||||
raw_out_channels: 240
|
||||
backbone.layer2.1.conv.1.conv:
|
||||
in_channels: 96
|
||||
out_channels: 96
|
||||
raw_in_channels: 240
|
||||
raw_out_channels: 240
|
||||
backbone.layer2.1.conv.2.bn:
|
||||
out_channels: 16
|
||||
raw_out_channels: 40
|
||||
backbone.layer2.1.conv.2.conv:
|
||||
in_channels: 96
|
||||
out_channels: 16
|
||||
raw_in_channels: 240
|
||||
raw_out_channels: 40
|
||||
backbone.layer3.0.conv.0.bn:
|
||||
out_channels: 96
|
||||
raw_out_channels: 240
|
||||
backbone.layer3.0.conv.0.conv:
|
||||
in_channels: 16
|
||||
out_channels: 96
|
||||
raw_in_channels: 40
|
||||
raw_out_channels: 240
|
||||
backbone.layer3.0.conv.1.bn:
|
||||
out_channels: 96
|
||||
raw_out_channels: 240
|
||||
backbone.layer3.0.conv.1.conv:
|
||||
in_channels: 96
|
||||
out_channels: 96
|
||||
raw_in_channels: 240
|
||||
raw_out_channels: 240
|
||||
backbone.layer3.0.conv.2.bn:
|
||||
out_channels: 24
|
||||
raw_out_channels: 48
|
||||
backbone.layer3.0.conv.2.conv:
|
||||
in_channels: 96
|
||||
out_channels: 24
|
||||
raw_in_channels: 240
|
||||
raw_out_channels: 48
|
||||
backbone.layer3.1.conv.0.bn:
|
||||
out_channels: 144
|
||||
raw_out_channels: 288
|
||||
backbone.layer3.1.conv.0.conv:
|
||||
in_channels: 24
|
||||
out_channels: 144
|
||||
raw_in_channels: 48
|
||||
raw_out_channels: 288
|
||||
backbone.layer3.1.conv.1.bn:
|
||||
out_channels: 144
|
||||
raw_out_channels: 288
|
||||
backbone.layer3.1.conv.1.conv:
|
||||
in_channels: 144
|
||||
out_channels: 144
|
||||
raw_in_channels: 288
|
||||
raw_out_channels: 288
|
||||
backbone.layer3.1.conv.2.bn:
|
||||
out_channels: 24
|
||||
raw_out_channels: 48
|
||||
backbone.layer3.1.conv.2.conv:
|
||||
in_channels: 144
|
||||
out_channels: 24
|
||||
raw_in_channels: 288
|
||||
raw_out_channels: 48
|
||||
backbone.layer3.2.conv.0.bn:
|
||||
out_channels: 144
|
||||
raw_out_channels: 288
|
||||
backbone.layer3.2.conv.0.conv:
|
||||
in_channels: 24
|
||||
out_channels: 144
|
||||
raw_in_channels: 48
|
||||
raw_out_channels: 288
|
||||
backbone.layer3.2.conv.1.bn:
|
||||
out_channels: 144
|
||||
raw_out_channels: 288
|
||||
backbone.layer3.2.conv.1.conv:
|
||||
in_channels: 144
|
||||
out_channels: 144
|
||||
raw_in_channels: 288
|
||||
raw_out_channels: 288
|
||||
backbone.layer3.2.conv.2.bn:
|
||||
out_channels: 24
|
||||
raw_out_channels: 48
|
||||
backbone.layer3.2.conv.2.conv:
|
||||
in_channels: 144
|
||||
out_channels: 24
|
||||
raw_in_channels: 288
|
||||
raw_out_channels: 48
|
||||
backbone.layer4.0.conv.0.bn:
|
||||
out_channels: 144
|
||||
raw_out_channels: 288
|
||||
backbone.layer4.0.conv.0.conv:
|
||||
in_channels: 24
|
||||
out_channels: 144
|
||||
raw_in_channels: 48
|
||||
raw_out_channels: 288
|
||||
backbone.layer4.0.conv.1.bn:
|
||||
out_channels: 144
|
||||
raw_out_channels: 288
|
||||
backbone.layer4.0.conv.1.conv:
|
||||
in_channels: 144
|
||||
out_channels: 144
|
||||
raw_in_channels: 288
|
||||
raw_out_channels: 288
|
||||
backbone.layer4.0.conv.2.bn:
|
||||
out_channels: 48
|
||||
raw_out_channels: 96
|
||||
backbone.layer4.0.conv.2.conv:
|
||||
in_channels: 144
|
||||
out_channels: 48
|
||||
raw_in_channels: 288
|
||||
raw_out_channels: 96
|
||||
backbone.layer4.1.conv.0.bn:
|
||||
out_channels: 288
|
||||
raw_out_channels: 576
|
||||
backbone.layer4.1.conv.0.conv:
|
||||
in_channels: 48
|
||||
out_channels: 288
|
||||
raw_in_channels: 96
|
||||
raw_out_channels: 576
|
||||
backbone.layer4.1.conv.1.bn:
|
||||
out_channels: 288
|
||||
raw_out_channels: 576
|
||||
backbone.layer4.1.conv.1.conv:
|
||||
in_channels: 288
|
||||
out_channels: 288
|
||||
raw_in_channels: 576
|
||||
raw_out_channels: 576
|
||||
backbone.layer4.1.conv.2.bn:
|
||||
out_channels: 48
|
||||
raw_out_channels: 96
|
||||
backbone.layer4.1.conv.2.conv:
|
||||
in_channels: 288
|
||||
out_channels: 48
|
||||
raw_in_channels: 576
|
||||
raw_out_channels: 96
|
||||
backbone.layer4.2.conv.0.bn:
|
||||
out_channels: 288
|
||||
raw_out_channels: 576
|
||||
backbone.layer4.2.conv.0.conv:
|
||||
in_channels: 48
|
||||
out_channels: 288
|
||||
raw_in_channels: 96
|
||||
raw_out_channels: 576
|
||||
backbone.layer4.2.conv.1.bn:
|
||||
out_channels: 288
|
||||
raw_out_channels: 576
|
||||
backbone.layer4.2.conv.1.conv:
|
||||
in_channels: 288
|
||||
out_channels: 288
|
||||
raw_in_channels: 576
|
||||
raw_out_channels: 576
|
||||
backbone.layer4.2.conv.2.bn:
|
||||
out_channels: 48
|
||||
raw_out_channels: 96
|
||||
backbone.layer4.2.conv.2.conv:
|
||||
in_channels: 288
|
||||
out_channels: 48
|
||||
raw_in_channels: 576
|
||||
raw_out_channels: 96
|
||||
backbone.layer4.3.conv.0.bn:
|
||||
out_channels: 288
|
||||
raw_out_channels: 576
|
||||
backbone.layer4.3.conv.0.conv:
|
||||
in_channels: 48
|
||||
out_channels: 288
|
||||
raw_in_channels: 96
|
||||
raw_out_channels: 576
|
||||
backbone.layer4.3.conv.1.bn:
|
||||
out_channels: 288
|
||||
raw_out_channels: 576
|
||||
backbone.layer4.3.conv.1.conv:
|
||||
in_channels: 288
|
||||
out_channels: 288
|
||||
raw_in_channels: 576
|
||||
raw_out_channels: 576
|
||||
backbone.layer4.3.conv.2.bn:
|
||||
out_channels: 48
|
||||
raw_out_channels: 96
|
||||
backbone.layer4.3.conv.2.conv:
|
||||
in_channels: 288
|
||||
out_channels: 48
|
||||
raw_in_channels: 576
|
||||
raw_out_channels: 96
|
||||
backbone.layer5.0.conv.0.bn:
|
||||
out_channels: 288
|
||||
raw_out_channels: 576
|
||||
backbone.layer5.0.conv.0.conv:
|
||||
in_channels: 48
|
||||
out_channels: 288
|
||||
raw_in_channels: 96
|
||||
raw_out_channels: 576
|
||||
backbone.layer5.0.conv.1.bn:
|
||||
out_channels: 288
|
||||
raw_out_channels: 576
|
||||
backbone.layer5.0.conv.1.conv:
|
||||
in_channels: 288
|
||||
out_channels: 288
|
||||
raw_in_channels: 576
|
||||
raw_out_channels: 576
|
||||
backbone.layer5.0.conv.2.bn:
|
||||
out_channels: 64
|
||||
raw_out_channels: 144
|
||||
backbone.layer5.0.conv.2.conv:
|
||||
in_channels: 288
|
||||
out_channels: 64
|
||||
raw_in_channels: 576
|
||||
raw_out_channels: 144
|
||||
backbone.layer5.1.conv.0.bn:
|
||||
out_channels: 432
|
||||
raw_out_channels: 864
|
||||
backbone.layer5.1.conv.0.conv:
|
||||
in_channels: 64
|
||||
out_channels: 432
|
||||
raw_in_channels: 144
|
||||
raw_out_channels: 864
|
||||
backbone.layer5.1.conv.1.bn:
|
||||
out_channels: 432
|
||||
raw_out_channels: 864
|
||||
backbone.layer5.1.conv.1.conv:
|
||||
in_channels: 432
|
||||
out_channels: 432
|
||||
raw_in_channels: 864
|
||||
raw_out_channels: 864
|
||||
backbone.layer5.1.conv.2.bn:
|
||||
out_channels: 64
|
||||
raw_out_channels: 144
|
||||
backbone.layer5.1.conv.2.conv:
|
||||
in_channels: 432
|
||||
out_channels: 64
|
||||
raw_in_channels: 864
|
||||
raw_out_channels: 144
|
||||
backbone.layer5.2.conv.0.bn:
|
||||
out_channels: 432
|
||||
raw_out_channels: 864
|
||||
backbone.layer5.2.conv.0.conv:
|
||||
in_channels: 64
|
||||
out_channels: 432
|
||||
raw_in_channels: 144
|
||||
raw_out_channels: 864
|
||||
backbone.layer5.2.conv.1.bn:
|
||||
out_channels: 432
|
||||
raw_out_channels: 864
|
||||
backbone.layer5.2.conv.1.conv:
|
||||
in_channels: 432
|
||||
out_channels: 432
|
||||
raw_in_channels: 864
|
||||
raw_out_channels: 864
|
||||
backbone.layer5.2.conv.2.bn:
|
||||
out_channels: 64
|
||||
raw_out_channels: 144
|
||||
backbone.layer5.2.conv.2.conv:
|
||||
in_channels: 432
|
||||
out_channels: 64
|
||||
raw_in_channels: 864
|
||||
raw_out_channels: 144
|
||||
backbone.layer6.0.conv.0.bn:
|
||||
out_channels: 648
|
||||
raw_out_channels: 864
|
||||
backbone.layer6.0.conv.0.conv:
|
||||
in_channels: 64
|
||||
out_channels: 648
|
||||
raw_in_channels: 144
|
||||
raw_out_channels: 864
|
||||
backbone.layer6.0.conv.1.bn:
|
||||
out_channels: 648
|
||||
raw_out_channels: 864
|
||||
backbone.layer6.0.conv.1.conv:
|
||||
in_channels: 648
|
||||
out_channels: 648
|
||||
raw_in_channels: 864
|
||||
raw_out_channels: 864
|
||||
backbone.layer6.0.conv.2.bn:
|
||||
out_channels: 176
|
||||
raw_out_channels: 240
|
||||
backbone.layer6.0.conv.2.conv:
|
||||
in_channels: 648
|
||||
out_channels: 176
|
||||
raw_in_channels: 864
|
||||
raw_out_channels: 240
|
||||
backbone.layer6.1.conv.0.bn:
|
||||
out_channels: 720
|
||||
raw_out_channels: 1440
|
||||
backbone.layer6.1.conv.0.conv:
|
||||
in_channels: 176
|
||||
out_channels: 720
|
||||
raw_in_channels: 240
|
||||
raw_out_channels: 1440
|
||||
backbone.layer6.1.conv.1.bn:
|
||||
out_channels: 720
|
||||
raw_out_channels: 1440
|
||||
backbone.layer6.1.conv.1.conv:
|
||||
in_channels: 720
|
||||
out_channels: 720
|
||||
raw_in_channels: 1440
|
||||
raw_out_channels: 1440
|
||||
backbone.layer6.1.conv.2.bn:
|
||||
out_channels: 176
|
||||
raw_out_channels: 240
|
||||
backbone.layer6.1.conv.2.conv:
|
||||
in_channels: 720
|
||||
out_channels: 176
|
||||
raw_in_channels: 1440
|
||||
raw_out_channels: 240
|
||||
backbone.layer6.2.conv.0.bn:
|
||||
out_channels: 720
|
||||
raw_out_channels: 1440
|
||||
backbone.layer6.2.conv.0.conv:
|
||||
in_channels: 176
|
||||
out_channels: 720
|
||||
raw_in_channels: 240
|
||||
raw_out_channels: 1440
|
||||
backbone.layer6.2.conv.1.bn:
|
||||
out_channels: 720
|
||||
raw_out_channels: 1440
|
||||
backbone.layer6.2.conv.1.conv:
|
||||
in_channels: 720
|
||||
out_channels: 720
|
||||
raw_in_channels: 1440
|
||||
raw_out_channels: 1440
|
||||
backbone.layer6.2.conv.2.bn:
|
||||
out_channels: 176
|
||||
raw_out_channels: 240
|
||||
backbone.layer6.2.conv.2.conv:
|
||||
in_channels: 720
|
||||
out_channels: 176
|
||||
raw_in_channels: 1440
|
||||
raw_out_channels: 240
|
||||
backbone.layer7.0.conv.0.bn:
|
||||
out_channels: 1440
|
||||
raw_out_channels: 1440
|
||||
backbone.layer7.0.conv.0.conv:
|
||||
in_channels: 176
|
||||
out_channels: 1440
|
||||
raw_in_channels: 240
|
||||
raw_out_channels: 1440
|
||||
backbone.layer7.0.conv.1.bn:
|
||||
out_channels: 1440
|
||||
raw_out_channels: 1440
|
||||
backbone.layer7.0.conv.1.conv:
|
||||
in_channels: 1440
|
||||
out_channels: 1440
|
||||
raw_in_channels: 1440
|
||||
raw_out_channels: 1440
|
||||
backbone.layer7.0.conv.2.bn:
|
||||
out_channels: 280
|
||||
raw_out_channels: 480
|
||||
backbone.layer7.0.conv.2.conv:
|
||||
in_channels: 1440
|
||||
out_channels: 280
|
||||
raw_in_channels: 1440
|
||||
raw_out_channels: 480
|
||||
head.fc:
|
||||
in_channels: 1920
|
||||
out_channels: 1000
|
||||
raw_in_channels: 1920
|
||||
raw_out_channels: 1000
|
|
@ -1,421 +0,0 @@
|
|||
backbone.conv1.bn:
|
||||
out_channels: 8
|
||||
raw_out_channels: 48
|
||||
backbone.conv1.conv:
|
||||
in_channels: 3
|
||||
out_channels: 8
|
||||
raw_in_channels: 3
|
||||
raw_out_channels: 48
|
||||
backbone.conv2.bn:
|
||||
out_channels: 1920
|
||||
raw_out_channels: 1920
|
||||
backbone.conv2.conv:
|
||||
in_channels: 480
|
||||
out_channels: 1920
|
||||
raw_in_channels: 480
|
||||
raw_out_channels: 1920
|
||||
backbone.layer1.0.conv.0.bn:
|
||||
out_channels: 8
|
||||
raw_out_channels: 48
|
||||
backbone.layer1.0.conv.0.conv:
|
||||
in_channels: 8
|
||||
out_channels: 8
|
||||
raw_in_channels: 48
|
||||
raw_out_channels: 48
|
||||
backbone.layer1.0.conv.1.bn:
|
||||
out_channels: 8
|
||||
raw_out_channels: 24
|
||||
backbone.layer1.0.conv.1.conv:
|
||||
in_channels: 8
|
||||
out_channels: 8
|
||||
raw_in_channels: 48
|
||||
raw_out_channels: 24
|
||||
backbone.layer2.0.conv.0.bn:
|
||||
out_channels: 96
|
||||
raw_out_channels: 144
|
||||
backbone.layer2.0.conv.0.conv:
|
||||
in_channels: 8
|
||||
out_channels: 96
|
||||
raw_in_channels: 24
|
||||
raw_out_channels: 144
|
||||
backbone.layer2.0.conv.1.bn:
|
||||
out_channels: 96
|
||||
raw_out_channels: 144
|
||||
backbone.layer2.0.conv.1.conv:
|
||||
in_channels: 96
|
||||
out_channels: 96
|
||||
raw_in_channels: 144
|
||||
raw_out_channels: 144
|
||||
backbone.layer2.0.conv.2.bn:
|
||||
out_channels: 16
|
||||
raw_out_channels: 40
|
||||
backbone.layer2.0.conv.2.conv:
|
||||
in_channels: 96
|
||||
out_channels: 16
|
||||
raw_in_channels: 144
|
||||
raw_out_channels: 40
|
||||
backbone.layer2.1.conv.0.bn:
|
||||
out_channels: 96
|
||||
raw_out_channels: 240
|
||||
backbone.layer2.1.conv.0.conv:
|
||||
in_channels: 16
|
||||
out_channels: 96
|
||||
raw_in_channels: 40
|
||||
raw_out_channels: 240
|
||||
backbone.layer2.1.conv.1.bn:
|
||||
out_channels: 96
|
||||
raw_out_channels: 240
|
||||
backbone.layer2.1.conv.1.conv:
|
||||
in_channels: 96
|
||||
out_channels: 96
|
||||
raw_in_channels: 240
|
||||
raw_out_channels: 240
|
||||
backbone.layer2.1.conv.2.bn:
|
||||
out_channels: 16
|
||||
raw_out_channels: 40
|
||||
backbone.layer2.1.conv.2.conv:
|
||||
in_channels: 96
|
||||
out_channels: 16
|
||||
raw_in_channels: 240
|
||||
raw_out_channels: 40
|
||||
backbone.layer3.0.conv.0.bn:
|
||||
out_channels: 96
|
||||
raw_out_channels: 240
|
||||
backbone.layer3.0.conv.0.conv:
|
||||
in_channels: 16
|
||||
out_channels: 96
|
||||
raw_in_channels: 40
|
||||
raw_out_channels: 240
|
||||
backbone.layer3.0.conv.1.bn:
|
||||
out_channels: 96
|
||||
raw_out_channels: 240
|
||||
backbone.layer3.0.conv.1.conv:
|
||||
in_channels: 96
|
||||
out_channels: 96
|
||||
raw_in_channels: 240
|
||||
raw_out_channels: 240
|
||||
backbone.layer3.0.conv.2.bn:
|
||||
out_channels: 24
|
||||
raw_out_channels: 48
|
||||
backbone.layer3.0.conv.2.conv:
|
||||
in_channels: 96
|
||||
out_channels: 24
|
||||
raw_in_channels: 240
|
||||
raw_out_channels: 48
|
||||
backbone.layer3.1.conv.0.bn:
|
||||
out_channels: 144
|
||||
raw_out_channels: 288
|
||||
backbone.layer3.1.conv.0.conv:
|
||||
in_channels: 24
|
||||
out_channels: 144
|
||||
raw_in_channels: 48
|
||||
raw_out_channels: 288
|
||||
backbone.layer3.1.conv.1.bn:
|
||||
out_channels: 144
|
||||
raw_out_channels: 288
|
||||
backbone.layer3.1.conv.1.conv:
|
||||
in_channels: 144
|
||||
out_channels: 144
|
||||
raw_in_channels: 288
|
||||
raw_out_channels: 288
|
||||
backbone.layer3.1.conv.2.bn:
|
||||
out_channels: 24
|
||||
raw_out_channels: 48
|
||||
backbone.layer3.1.conv.2.conv:
|
||||
in_channels: 144
|
||||
out_channels: 24
|
||||
raw_in_channels: 288
|
||||
raw_out_channels: 48
|
||||
backbone.layer3.2.conv.0.bn:
|
||||
out_channels: 144
|
||||
raw_out_channels: 288
|
||||
backbone.layer3.2.conv.0.conv:
|
||||
in_channels: 24
|
||||
out_channels: 144
|
||||
raw_in_channels: 48
|
||||
raw_out_channels: 288
|
||||
backbone.layer3.2.conv.1.bn:
|
||||
out_channels: 144
|
||||
raw_out_channels: 288
|
||||
backbone.layer3.2.conv.1.conv:
|
||||
in_channels: 144
|
||||
out_channels: 144
|
||||
raw_in_channels: 288
|
||||
raw_out_channels: 288
|
||||
backbone.layer3.2.conv.2.bn:
|
||||
out_channels: 24
|
||||
raw_out_channels: 48
|
||||
backbone.layer3.2.conv.2.conv:
|
||||
in_channels: 144
|
||||
out_channels: 24
|
||||
raw_in_channels: 288
|
||||
raw_out_channels: 48
|
||||
backbone.layer4.0.conv.0.bn:
|
||||
out_channels: 144
|
||||
raw_out_channels: 288
|
||||
backbone.layer4.0.conv.0.conv:
|
||||
in_channels: 24
|
||||
out_channels: 144
|
||||
raw_in_channels: 48
|
||||
raw_out_channels: 288
|
||||
backbone.layer4.0.conv.1.bn:
|
||||
out_channels: 144
|
||||
raw_out_channels: 288
|
||||
backbone.layer4.0.conv.1.conv:
|
||||
in_channels: 144
|
||||
out_channels: 144
|
||||
raw_in_channels: 288
|
||||
raw_out_channels: 288
|
||||
backbone.layer4.0.conv.2.bn:
|
||||
out_channels: 56
|
||||
raw_out_channels: 96
|
||||
backbone.layer4.0.conv.2.conv:
|
||||
in_channels: 144
|
||||
out_channels: 56
|
||||
raw_in_channels: 288
|
||||
raw_out_channels: 96
|
||||
backbone.layer4.1.conv.0.bn:
|
||||
out_channels: 288
|
||||
raw_out_channels: 576
|
||||
backbone.layer4.1.conv.0.conv:
|
||||
in_channels: 56
|
||||
out_channels: 288
|
||||
raw_in_channels: 96
|
||||
raw_out_channels: 576
|
||||
backbone.layer4.1.conv.1.bn:
|
||||
out_channels: 288
|
||||
raw_out_channels: 576
|
||||
backbone.layer4.1.conv.1.conv:
|
||||
in_channels: 288
|
||||
out_channels: 288
|
||||
raw_in_channels: 576
|
||||
raw_out_channels: 576
|
||||
backbone.layer4.1.conv.2.bn:
|
||||
out_channels: 56
|
||||
raw_out_channels: 96
|
||||
backbone.layer4.1.conv.2.conv:
|
||||
in_channels: 288
|
||||
out_channels: 56
|
||||
raw_in_channels: 576
|
||||
raw_out_channels: 96
|
||||
backbone.layer4.2.conv.0.bn:
|
||||
out_channels: 288
|
||||
raw_out_channels: 576
|
||||
backbone.layer4.2.conv.0.conv:
|
||||
in_channels: 56
|
||||
out_channels: 288
|
||||
raw_in_channels: 96
|
||||
raw_out_channels: 576
|
||||
backbone.layer4.2.conv.1.bn:
|
||||
out_channels: 288
|
||||
raw_out_channels: 576
|
||||
backbone.layer4.2.conv.1.conv:
|
||||
in_channels: 288
|
||||
out_channels: 288
|
||||
raw_in_channels: 576
|
||||
raw_out_channels: 576
|
||||
backbone.layer4.2.conv.2.bn:
|
||||
out_channels: 56
|
||||
raw_out_channels: 96
|
||||
backbone.layer4.2.conv.2.conv:
|
||||
in_channels: 288
|
||||
out_channels: 56
|
||||
raw_in_channels: 576
|
||||
raw_out_channels: 96
|
||||
backbone.layer4.3.conv.0.bn:
|
||||
out_channels: 288
|
||||
raw_out_channels: 576
|
||||
backbone.layer4.3.conv.0.conv:
|
||||
in_channels: 56
|
||||
out_channels: 288
|
||||
raw_in_channels: 96
|
||||
raw_out_channels: 576
|
||||
backbone.layer4.3.conv.1.bn:
|
||||
out_channels: 288
|
||||
raw_out_channels: 576
|
||||
backbone.layer4.3.conv.1.conv:
|
||||
in_channels: 288
|
||||
out_channels: 288
|
||||
raw_in_channels: 576
|
||||
raw_out_channels: 576
|
||||
backbone.layer4.3.conv.2.bn:
|
||||
out_channels: 56
|
||||
raw_out_channels: 96
|
||||
backbone.layer4.3.conv.2.conv:
|
||||
in_channels: 288
|
||||
out_channels: 56
|
||||
raw_in_channels: 576
|
||||
raw_out_channels: 96
|
||||
backbone.layer5.0.conv.0.bn:
|
||||
out_channels: 288
|
||||
raw_out_channels: 576
|
||||
backbone.layer5.0.conv.0.conv:
|
||||
in_channels: 56
|
||||
out_channels: 288
|
||||
raw_in_channels: 96
|
||||
raw_out_channels: 576
|
||||
backbone.layer5.0.conv.1.bn:
|
||||
out_channels: 288
|
||||
raw_out_channels: 576
|
||||
backbone.layer5.0.conv.1.conv:
|
||||
in_channels: 288
|
||||
out_channels: 288
|
||||
raw_in_channels: 576
|
||||
raw_out_channels: 576
|
||||
backbone.layer5.0.conv.2.bn:
|
||||
out_channels: 96
|
||||
raw_out_channels: 144
|
||||
backbone.layer5.0.conv.2.conv:
|
||||
in_channels: 288
|
||||
out_channels: 96
|
||||
raw_in_channels: 576
|
||||
raw_out_channels: 144
|
||||
backbone.layer5.1.conv.0.bn:
|
||||
out_channels: 432
|
||||
raw_out_channels: 864
|
||||
backbone.layer5.1.conv.0.conv:
|
||||
in_channels: 96
|
||||
out_channels: 432
|
||||
raw_in_channels: 144
|
||||
raw_out_channels: 864
|
||||
backbone.layer5.1.conv.1.bn:
|
||||
out_channels: 432
|
||||
raw_out_channels: 864
|
||||
backbone.layer5.1.conv.1.conv:
|
||||
in_channels: 432
|
||||
out_channels: 432
|
||||
raw_in_channels: 864
|
||||
raw_out_channels: 864
|
||||
backbone.layer5.1.conv.2.bn:
|
||||
out_channels: 96
|
||||
raw_out_channels: 144
|
||||
backbone.layer5.1.conv.2.conv:
|
||||
in_channels: 432
|
||||
out_channels: 96
|
||||
raw_in_channels: 864
|
||||
raw_out_channels: 144
|
||||
backbone.layer5.2.conv.0.bn:
|
||||
out_channels: 432
|
||||
raw_out_channels: 864
|
||||
backbone.layer5.2.conv.0.conv:
|
||||
in_channels: 96
|
||||
out_channels: 432
|
||||
raw_in_channels: 144
|
||||
raw_out_channels: 864
|
||||
backbone.layer5.2.conv.1.bn:
|
||||
out_channels: 432
|
||||
raw_out_channels: 864
|
||||
backbone.layer5.2.conv.1.conv:
|
||||
in_channels: 432
|
||||
out_channels: 432
|
||||
raw_in_channels: 864
|
||||
raw_out_channels: 864
|
||||
backbone.layer5.2.conv.2.bn:
|
||||
out_channels: 96
|
||||
raw_out_channels: 144
|
||||
backbone.layer5.2.conv.2.conv:
|
||||
in_channels: 432
|
||||
out_channels: 96
|
||||
raw_in_channels: 864
|
||||
raw_out_channels: 144
|
||||
backbone.layer6.0.conv.0.bn:
|
||||
out_channels: 864
|
||||
raw_out_channels: 864
|
||||
backbone.layer6.0.conv.0.conv:
|
||||
in_channels: 96
|
||||
out_channels: 864
|
||||
raw_in_channels: 144
|
||||
raw_out_channels: 864
|
||||
backbone.layer6.0.conv.1.bn:
|
||||
out_channels: 864
|
||||
raw_out_channels: 864
|
||||
backbone.layer6.0.conv.1.conv:
|
||||
in_channels: 864
|
||||
out_channels: 864
|
||||
raw_in_channels: 864
|
||||
raw_out_channels: 864
|
||||
backbone.layer6.0.conv.2.bn:
|
||||
out_channels: 240
|
||||
raw_out_channels: 240
|
||||
backbone.layer6.0.conv.2.conv:
|
||||
in_channels: 864
|
||||
out_channels: 240
|
||||
raw_in_channels: 864
|
||||
raw_out_channels: 240
|
||||
backbone.layer6.1.conv.0.bn:
|
||||
out_channels: 1440
|
||||
raw_out_channels: 1440
|
||||
backbone.layer6.1.conv.0.conv:
|
||||
in_channels: 240
|
||||
out_channels: 1440
|
||||
raw_in_channels: 240
|
||||
raw_out_channels: 1440
|
||||
backbone.layer6.1.conv.1.bn:
|
||||
out_channels: 1440
|
||||
raw_out_channels: 1440
|
||||
backbone.layer6.1.conv.1.conv:
|
||||
in_channels: 1440
|
||||
out_channels: 1440
|
||||
raw_in_channels: 1440
|
||||
raw_out_channels: 1440
|
||||
backbone.layer6.1.conv.2.bn:
|
||||
out_channels: 240
|
||||
raw_out_channels: 240
|
||||
backbone.layer6.1.conv.2.conv:
|
||||
in_channels: 1440
|
||||
out_channels: 240
|
||||
raw_in_channels: 1440
|
||||
raw_out_channels: 240
|
||||
backbone.layer6.2.conv.0.bn:
|
||||
out_channels: 960
|
||||
raw_out_channels: 1440
|
||||
backbone.layer6.2.conv.0.conv:
|
||||
in_channels: 240
|
||||
out_channels: 960
|
||||
raw_in_channels: 240
|
||||
raw_out_channels: 1440
|
||||
backbone.layer6.2.conv.1.bn:
|
||||
out_channels: 960
|
||||
raw_out_channels: 1440
|
||||
backbone.layer6.2.conv.1.conv:
|
||||
in_channels: 960
|
||||
out_channels: 960
|
||||
raw_in_channels: 1440
|
||||
raw_out_channels: 1440
|
||||
backbone.layer6.2.conv.2.bn:
|
||||
out_channels: 240
|
||||
raw_out_channels: 240
|
||||
backbone.layer6.2.conv.2.conv:
|
||||
in_channels: 960
|
||||
out_channels: 240
|
||||
raw_in_channels: 1440
|
||||
raw_out_channels: 240
|
||||
backbone.layer7.0.conv.0.bn:
|
||||
out_channels: 1440
|
||||
raw_out_channels: 1440
|
||||
backbone.layer7.0.conv.0.conv:
|
||||
in_channels: 240
|
||||
out_channels: 1440
|
||||
raw_in_channels: 240
|
||||
raw_out_channels: 1440
|
||||
backbone.layer7.0.conv.1.bn:
|
||||
out_channels: 1440
|
||||
raw_out_channels: 1440
|
||||
backbone.layer7.0.conv.1.conv:
|
||||
in_channels: 1440
|
||||
out_channels: 1440
|
||||
raw_in_channels: 1440
|
||||
raw_out_channels: 1440
|
||||
backbone.layer7.0.conv.2.bn:
|
||||
out_channels: 480
|
||||
raw_out_channels: 480
|
||||
backbone.layer7.0.conv.2.conv:
|
||||
in_channels: 1440
|
||||
out_channels: 480
|
||||
raw_in_channels: 1440
|
||||
raw_out_channels: 480
|
||||
head.fc:
|
||||
in_channels: 1920
|
||||
out_channels: 1000
|
||||
raw_in_channels: 1920
|
||||
raw_out_channels: 1000
|
|
@ -1,421 +0,0 @@
|
|||
backbone.conv1.bn:
|
||||
out_channels: 32
|
||||
raw_out_channels: 48
|
||||
backbone.conv1.conv:
|
||||
in_channels: 3
|
||||
out_channels: 32
|
||||
raw_in_channels: 3
|
||||
raw_out_channels: 48
|
||||
backbone.conv2.bn:
|
||||
out_channels: 1920
|
||||
raw_out_channels: 1920
|
||||
backbone.conv2.conv:
|
||||
in_channels: 480
|
||||
out_channels: 1920
|
||||
raw_in_channels: 480
|
||||
raw_out_channels: 1920
|
||||
backbone.layer1.0.conv.0.bn:
|
||||
out_channels: 32
|
||||
raw_out_channels: 48
|
||||
backbone.layer1.0.conv.0.conv:
|
||||
in_channels: 32
|
||||
out_channels: 32
|
||||
raw_in_channels: 48
|
||||
raw_out_channels: 48
|
||||
backbone.layer1.0.conv.1.bn:
|
||||
out_channels: 16
|
||||
raw_out_channels: 24
|
||||
backbone.layer1.0.conv.1.conv:
|
||||
in_channels: 32
|
||||
out_channels: 16
|
||||
raw_in_channels: 48
|
||||
raw_out_channels: 24
|
||||
backbone.layer2.0.conv.0.bn:
|
||||
out_channels: 144
|
||||
raw_out_channels: 144
|
||||
backbone.layer2.0.conv.0.conv:
|
||||
in_channels: 16
|
||||
out_channels: 144
|
||||
raw_in_channels: 24
|
||||
raw_out_channels: 144
|
||||
backbone.layer2.0.conv.1.bn:
|
||||
out_channels: 144
|
||||
raw_out_channels: 144
|
||||
backbone.layer2.0.conv.1.conv:
|
||||
in_channels: 144
|
||||
out_channels: 144
|
||||
raw_in_channels: 144
|
||||
raw_out_channels: 144
|
||||
backbone.layer2.0.conv.2.bn:
|
||||
out_channels: 24
|
||||
raw_out_channels: 40
|
||||
backbone.layer2.0.conv.2.conv:
|
||||
in_channels: 144
|
||||
out_channels: 24
|
||||
raw_in_channels: 144
|
||||
raw_out_channels: 40
|
||||
backbone.layer2.1.conv.0.bn:
|
||||
out_channels: 176
|
||||
raw_out_channels: 240
|
||||
backbone.layer2.1.conv.0.conv:
|
||||
in_channels: 24
|
||||
out_channels: 176
|
||||
raw_in_channels: 40
|
||||
raw_out_channels: 240
|
||||
backbone.layer2.1.conv.1.bn:
|
||||
out_channels: 176
|
||||
raw_out_channels: 240
|
||||
backbone.layer2.1.conv.1.conv:
|
||||
in_channels: 176
|
||||
out_channels: 176
|
||||
raw_in_channels: 240
|
||||
raw_out_channels: 240
|
||||
backbone.layer2.1.conv.2.bn:
|
||||
out_channels: 24
|
||||
raw_out_channels: 40
|
||||
backbone.layer2.1.conv.2.conv:
|
||||
in_channels: 176
|
||||
out_channels: 24
|
||||
raw_in_channels: 240
|
||||
raw_out_channels: 40
|
||||
backbone.layer3.0.conv.0.bn:
|
||||
out_channels: 192
|
||||
raw_out_channels: 240
|
||||
backbone.layer3.0.conv.0.conv:
|
||||
in_channels: 24
|
||||
out_channels: 192
|
||||
raw_in_channels: 40
|
||||
raw_out_channels: 240
|
||||
backbone.layer3.0.conv.1.bn:
|
||||
out_channels: 192
|
||||
raw_out_channels: 240
|
||||
backbone.layer3.0.conv.1.conv:
|
||||
in_channels: 192
|
||||
out_channels: 192
|
||||
raw_in_channels: 240
|
||||
raw_out_channels: 240
|
||||
backbone.layer3.0.conv.2.bn:
|
||||
out_channels: 48
|
||||
raw_out_channels: 48
|
||||
backbone.layer3.0.conv.2.conv:
|
||||
in_channels: 192
|
||||
out_channels: 48
|
||||
raw_in_channels: 240
|
||||
raw_out_channels: 48
|
||||
backbone.layer3.1.conv.0.bn:
|
||||
out_channels: 240
|
||||
raw_out_channels: 288
|
||||
backbone.layer3.1.conv.0.conv:
|
||||
in_channels: 48
|
||||
out_channels: 240
|
||||
raw_in_channels: 48
|
||||
raw_out_channels: 288
|
||||
backbone.layer3.1.conv.1.bn:
|
||||
out_channels: 240
|
||||
raw_out_channels: 288
|
||||
backbone.layer3.1.conv.1.conv:
|
||||
in_channels: 240
|
||||
out_channels: 240
|
||||
raw_in_channels: 288
|
||||
raw_out_channels: 288
|
||||
backbone.layer3.1.conv.2.bn:
|
||||
out_channels: 48
|
||||
raw_out_channels: 48
|
||||
backbone.layer3.1.conv.2.conv:
|
||||
in_channels: 240
|
||||
out_channels: 48
|
||||
raw_in_channels: 288
|
||||
raw_out_channels: 48
|
||||
backbone.layer3.2.conv.0.bn:
|
||||
out_channels: 144
|
||||
raw_out_channels: 288
|
||||
backbone.layer3.2.conv.0.conv:
|
||||
in_channels: 48
|
||||
out_channels: 144
|
||||
raw_in_channels: 48
|
||||
raw_out_channels: 288
|
||||
backbone.layer3.2.conv.1.bn:
|
||||
out_channels: 144
|
||||
raw_out_channels: 288
|
||||
backbone.layer3.2.conv.1.conv:
|
||||
in_channels: 144
|
||||
out_channels: 144
|
||||
raw_in_channels: 288
|
||||
raw_out_channels: 288
|
||||
backbone.layer3.2.conv.2.bn:
|
||||
out_channels: 48
|
||||
raw_out_channels: 48
|
||||
backbone.layer3.2.conv.2.conv:
|
||||
in_channels: 144
|
||||
out_channels: 48
|
||||
raw_in_channels: 288
|
||||
raw_out_channels: 48
|
||||
backbone.layer4.0.conv.0.bn:
|
||||
out_channels: 264
|
||||
raw_out_channels: 288
|
||||
backbone.layer4.0.conv.0.conv:
|
||||
in_channels: 48
|
||||
out_channels: 264
|
||||
raw_in_channels: 48
|
||||
raw_out_channels: 288
|
||||
backbone.layer4.0.conv.1.bn:
|
||||
out_channels: 264
|
||||
raw_out_channels: 288
|
||||
backbone.layer4.0.conv.1.conv:
|
||||
in_channels: 264
|
||||
out_channels: 264
|
||||
raw_in_channels: 288
|
||||
raw_out_channels: 288
|
||||
backbone.layer4.0.conv.2.bn:
|
||||
out_channels: 88
|
||||
raw_out_channels: 96
|
||||
backbone.layer4.0.conv.2.conv:
|
||||
in_channels: 264
|
||||
out_channels: 88
|
||||
raw_in_channels: 288
|
||||
raw_out_channels: 96
|
||||
backbone.layer4.1.conv.0.bn:
|
||||
out_channels: 288
|
||||
raw_out_channels: 576
|
||||
backbone.layer4.1.conv.0.conv:
|
||||
in_channels: 88
|
||||
out_channels: 288
|
||||
raw_in_channels: 96
|
||||
raw_out_channels: 576
|
||||
backbone.layer4.1.conv.1.bn:
|
||||
out_channels: 288
|
||||
raw_out_channels: 576
|
||||
backbone.layer4.1.conv.1.conv:
|
||||
in_channels: 288
|
||||
out_channels: 288
|
||||
raw_in_channels: 576
|
||||
raw_out_channels: 576
|
||||
backbone.layer4.1.conv.2.bn:
|
||||
out_channels: 88
|
||||
raw_out_channels: 96
|
||||
backbone.layer4.1.conv.2.conv:
|
||||
in_channels: 288
|
||||
out_channels: 88
|
||||
raw_in_channels: 576
|
||||
raw_out_channels: 96
|
||||
backbone.layer4.2.conv.0.bn:
|
||||
out_channels: 336
|
||||
raw_out_channels: 576
|
||||
backbone.layer4.2.conv.0.conv:
|
||||
in_channels: 88
|
||||
out_channels: 336
|
||||
raw_in_channels: 96
|
||||
raw_out_channels: 576
|
||||
backbone.layer4.2.conv.1.bn:
|
||||
out_channels: 336
|
||||
raw_out_channels: 576
|
||||
backbone.layer4.2.conv.1.conv:
|
||||
in_channels: 336
|
||||
out_channels: 336
|
||||
raw_in_channels: 576
|
||||
raw_out_channels: 576
|
||||
backbone.layer4.2.conv.2.bn:
|
||||
out_channels: 88
|
||||
raw_out_channels: 96
|
||||
backbone.layer4.2.conv.2.conv:
|
||||
in_channels: 336
|
||||
out_channels: 88
|
||||
raw_in_channels: 576
|
||||
raw_out_channels: 96
|
||||
backbone.layer4.3.conv.0.bn:
|
||||
out_channels: 432
|
||||
raw_out_channels: 576
|
||||
backbone.layer4.3.conv.0.conv:
|
||||
in_channels: 88
|
||||
out_channels: 432
|
||||
raw_in_channels: 96
|
||||
raw_out_channels: 576
|
||||
backbone.layer4.3.conv.1.bn:
|
||||
out_channels: 432
|
||||
raw_out_channels: 576
|
||||
backbone.layer4.3.conv.1.conv:
|
||||
in_channels: 432
|
||||
out_channels: 432
|
||||
raw_in_channels: 576
|
||||
raw_out_channels: 576
|
||||
backbone.layer4.3.conv.2.bn:
|
||||
out_channels: 88
|
||||
raw_out_channels: 96
|
||||
backbone.layer4.3.conv.2.conv:
|
||||
in_channels: 432
|
||||
out_channels: 88
|
||||
raw_in_channels: 576
|
||||
raw_out_channels: 96
|
||||
backbone.layer5.0.conv.0.bn:
|
||||
out_channels: 576
|
||||
raw_out_channels: 576
|
||||
backbone.layer5.0.conv.0.conv:
|
||||
in_channels: 88
|
||||
out_channels: 576
|
||||
raw_in_channels: 96
|
||||
raw_out_channels: 576
|
||||
backbone.layer5.0.conv.1.bn:
|
||||
out_channels: 576
|
||||
raw_out_channels: 576
|
||||
backbone.layer5.0.conv.1.conv:
|
||||
in_channels: 576
|
||||
out_channels: 576
|
||||
raw_in_channels: 576
|
||||
raw_out_channels: 576
|
||||
backbone.layer5.0.conv.2.bn:
|
||||
out_channels: 144
|
||||
raw_out_channels: 144
|
||||
backbone.layer5.0.conv.2.conv:
|
||||
in_channels: 576
|
||||
out_channels: 144
|
||||
raw_in_channels: 576
|
||||
raw_out_channels: 144
|
||||
backbone.layer5.1.conv.0.bn:
|
||||
out_channels: 576
|
||||
raw_out_channels: 864
|
||||
backbone.layer5.1.conv.0.conv:
|
||||
in_channels: 144
|
||||
out_channels: 576
|
||||
raw_in_channels: 144
|
||||
raw_out_channels: 864
|
||||
backbone.layer5.1.conv.1.bn:
|
||||
out_channels: 576
|
||||
raw_out_channels: 864
|
||||
backbone.layer5.1.conv.1.conv:
|
||||
in_channels: 576
|
||||
out_channels: 576
|
||||
raw_in_channels: 864
|
||||
raw_out_channels: 864
|
||||
backbone.layer5.1.conv.2.bn:
|
||||
out_channels: 144
|
||||
raw_out_channels: 144
|
||||
backbone.layer5.1.conv.2.conv:
|
||||
in_channels: 576
|
||||
out_channels: 144
|
||||
raw_in_channels: 864
|
||||
raw_out_channels: 144
|
||||
backbone.layer5.2.conv.0.bn:
|
||||
out_channels: 648
|
||||
raw_out_channels: 864
|
||||
backbone.layer5.2.conv.0.conv:
|
||||
in_channels: 144
|
||||
out_channels: 648
|
||||
raw_in_channels: 144
|
||||
raw_out_channels: 864
|
||||
backbone.layer5.2.conv.1.bn:
|
||||
out_channels: 648
|
||||
raw_out_channels: 864
|
||||
backbone.layer5.2.conv.1.conv:
|
||||
in_channels: 648
|
||||
out_channels: 648
|
||||
raw_in_channels: 864
|
||||
raw_out_channels: 864
|
||||
backbone.layer5.2.conv.2.bn:
|
||||
out_channels: 144
|
||||
raw_out_channels: 144
|
||||
backbone.layer5.2.conv.2.conv:
|
||||
in_channels: 648
|
||||
out_channels: 144
|
||||
raw_in_channels: 864
|
||||
raw_out_channels: 144
|
||||
backbone.layer6.0.conv.0.bn:
|
||||
out_channels: 864
|
||||
raw_out_channels: 864
|
||||
backbone.layer6.0.conv.0.conv:
|
||||
in_channels: 144
|
||||
out_channels: 864
|
||||
raw_in_channels: 144
|
||||
raw_out_channels: 864
|
||||
backbone.layer6.0.conv.1.bn:
|
||||
out_channels: 864
|
||||
raw_out_channels: 864
|
||||
backbone.layer6.0.conv.1.conv:
|
||||
in_channels: 864
|
||||
out_channels: 864
|
||||
raw_in_channels: 864
|
||||
raw_out_channels: 864
|
||||
backbone.layer6.0.conv.2.bn:
|
||||
out_channels: 240
|
||||
raw_out_channels: 240
|
||||
backbone.layer6.0.conv.2.conv:
|
||||
in_channels: 864
|
||||
out_channels: 240
|
||||
raw_in_channels: 864
|
||||
raw_out_channels: 240
|
||||
backbone.layer6.1.conv.0.bn:
|
||||
out_channels: 1440
|
||||
raw_out_channels: 1440
|
||||
backbone.layer6.1.conv.0.conv:
|
||||
in_channels: 240
|
||||
out_channels: 1440
|
||||
raw_in_channels: 240
|
||||
raw_out_channels: 1440
|
||||
backbone.layer6.1.conv.1.bn:
|
||||
out_channels: 1440
|
||||
raw_out_channels: 1440
|
||||
backbone.layer6.1.conv.1.conv:
|
||||
in_channels: 1440
|
||||
out_channels: 1440
|
||||
raw_in_channels: 1440
|
||||
raw_out_channels: 1440
|
||||
backbone.layer6.1.conv.2.bn:
|
||||
out_channels: 240
|
||||
raw_out_channels: 240
|
||||
backbone.layer6.1.conv.2.conv:
|
||||
in_channels: 1440
|
||||
out_channels: 240
|
||||
raw_in_channels: 1440
|
||||
raw_out_channels: 240
|
||||
backbone.layer6.2.conv.0.bn:
|
||||
out_channels: 1440
|
||||
raw_out_channels: 1440
|
||||
backbone.layer6.2.conv.0.conv:
|
||||
in_channels: 240
|
||||
out_channels: 1440
|
||||
raw_in_channels: 240
|
||||
raw_out_channels: 1440
|
||||
backbone.layer6.2.conv.1.bn:
|
||||
out_channels: 1440
|
||||
raw_out_channels: 1440
|
||||
backbone.layer6.2.conv.1.conv:
|
||||
in_channels: 1440
|
||||
out_channels: 1440
|
||||
raw_in_channels: 1440
|
||||
raw_out_channels: 1440
|
||||
backbone.layer6.2.conv.2.bn:
|
||||
out_channels: 240
|
||||
raw_out_channels: 240
|
||||
backbone.layer6.2.conv.2.conv:
|
||||
in_channels: 1440
|
||||
out_channels: 240
|
||||
raw_in_channels: 1440
|
||||
raw_out_channels: 240
|
||||
backbone.layer7.0.conv.0.bn:
|
||||
out_channels: 1440
|
||||
raw_out_channels: 1440
|
||||
backbone.layer7.0.conv.0.conv:
|
||||
in_channels: 240
|
||||
out_channels: 1440
|
||||
raw_in_channels: 240
|
||||
raw_out_channels: 1440
|
||||
backbone.layer7.0.conv.1.bn:
|
||||
out_channels: 1440
|
||||
raw_out_channels: 1440
|
||||
backbone.layer7.0.conv.1.conv:
|
||||
in_channels: 1440
|
||||
out_channels: 1440
|
||||
raw_in_channels: 1440
|
||||
raw_out_channels: 1440
|
||||
backbone.layer7.0.conv.2.bn:
|
||||
out_channels: 480
|
||||
raw_out_channels: 480
|
||||
backbone.layer7.0.conv.2.conv:
|
||||
in_channels: 1440
|
||||
out_channels: 480
|
||||
raw_in_channels: 1440
|
||||
raw_out_channels: 480
|
||||
head.fc:
|
||||
in_channels: 1920
|
||||
out_channels: 1000
|
||||
raw_in_channels: 1920
|
||||
raw_out_channels: 1000
|
|
@ -1,13 +0,0 @@
|
|||
_base_ = [
|
||||
'./autoslim_mbv2_supernet_8xb256_in1k.py',
|
||||
]
|
||||
|
||||
algorithm = dict(distiller=None, input_shape=(3, 224, 224))
|
||||
|
||||
searcher = dict(
|
||||
type='GreedySearcher',
|
||||
target_flops=[500000000, 300000000, 200000000],
|
||||
max_channel_bins=12,
|
||||
metrics='accuracy')
|
||||
|
||||
data = dict(samples_per_gpu=1024, workers_per_gpu=4)
|
|
@ -1,29 +0,0 @@
|
|||
_base_ = [
|
||||
'./autoslim_mbv2_supernet_8xb256_in1k.py',
|
||||
]
|
||||
|
||||
model = dict(
|
||||
head=dict(
|
||||
loss=dict(
|
||||
type='LabelSmoothLoss',
|
||||
mode='original',
|
||||
label_smooth_val=0.1,
|
||||
loss_weight=1.0)))
|
||||
|
||||
# FIXME: you may replace this with the channel_cfg searched by yourself
|
||||
channel_cfg = [
|
||||
'https://download.openmmlab.com/mmrazor/v0.1/pruning/autoslim/autoslim_mbv2_subnet_8xb256_in1k/autoslim_mbv2_subnet_8xb256_in1k_flops-0.53M_acc-74.23_20211222-e5208bbd_channel_cfg.yaml', # noqa: E501
|
||||
'https://download.openmmlab.com/mmrazor/v0.1/pruning/autoslim/autoslim_mbv2_subnet_8xb256_in1k/autoslim_mbv2_subnet_8xb256_in1k_flops-0.32M_acc-72.73_20211222-b5b0b33c_channel_cfg.yaml', # noqa: E501
|
||||
'https://download.openmmlab.com/mmrazor/v0.1/pruning/autoslim/autoslim_mbv2_subnet_8xb256_in1k/autoslim_mbv2_subnet_8xb256_in1k_flops-0.22M_acc-71.39_20211222-43117c7b_channel_cfg.yaml' # noqa: E501
|
||||
]
|
||||
|
||||
algorithm = dict(
|
||||
architecture=dict(type='MMClsArchitecture', model=model),
|
||||
distiller=None,
|
||||
retraining=True,
|
||||
bn_training_mode=False,
|
||||
channel_cfg=channel_cfg)
|
||||
|
||||
runner = dict(type='EpochBasedRunner', max_epochs=300)
|
||||
|
||||
find_unused_parameters = True
|
|
@ -1,51 +0,0 @@
|
|||
_base_ = [
|
||||
'../../_base_/datasets/mmcls/imagenet_bs256_autoslim.py',
|
||||
'../../_base_/schedules/mmcls/imagenet_bs2048_autoslim.py',
|
||||
'../../_base_/mmcls_runtime.py'
|
||||
]
|
||||
|
||||
model = dict(
|
||||
type='mmcls.ImageClassifier',
|
||||
backbone=dict(type='MobileNetV2', widen_factor=1.5),
|
||||
neck=dict(type='GlobalAveragePooling'),
|
||||
head=dict(
|
||||
type='LinearClsHead',
|
||||
num_classes=1000,
|
||||
in_channels=1920,
|
||||
loss=dict(
|
||||
type='LabelSmoothLoss',
|
||||
mode='original',
|
||||
label_smooth_val=0.1,
|
||||
loss_weight=1.0),
|
||||
topk=(1, 5),
|
||||
))
|
||||
|
||||
algorithm = dict(
|
||||
type='AutoSlim',
|
||||
architecture=dict(type='MMClsArchitecture', model=model),
|
||||
distiller=dict(
|
||||
type='SelfDistiller',
|
||||
components=[
|
||||
dict(
|
||||
student_module='head.fc',
|
||||
teacher_module='head.fc',
|
||||
losses=[
|
||||
dict(
|
||||
type='KLDivergence',
|
||||
name='loss_kd',
|
||||
tau=1,
|
||||
loss_weight=1,
|
||||
)
|
||||
]),
|
||||
]),
|
||||
pruner=dict(
|
||||
type='RatioPruner',
|
||||
ratios=(2 / 12, 3 / 12, 4 / 12, 5 / 12, 6 / 12, 7 / 12, 8 / 12, 9 / 12,
|
||||
10 / 12, 11 / 12, 1.0)),
|
||||
retraining=False,
|
||||
bn_training_mode=True,
|
||||
input_shape=None)
|
||||
|
||||
runner = dict(type='EpochBasedRunner', max_epochs=50)
|
||||
|
||||
use_ddp_wrapper = True
|
|
@ -1,216 +0,0 @@
|
|||
# defaults to use registries in mmcls
|
||||
default_scope = 'mmcls'
|
||||
|
||||
# !architecture config
|
||||
# ==========================================================================
|
||||
architecture = dict(
|
||||
_scope_='mmcls',
|
||||
type='ImageClassifier',
|
||||
backbone=dict(type='MobileNetV2', widen_factor=1.5),
|
||||
neck=dict(type='GlobalAveragePooling'),
|
||||
head=dict(
|
||||
type='LinearClsHead',
|
||||
num_classes=1000,
|
||||
in_channels=1920,
|
||||
loss=dict(
|
||||
type='LabelSmoothLoss',
|
||||
mode='original',
|
||||
label_smooth_val=0.1,
|
||||
loss_weight=1.0),
|
||||
topk=(1, 5),
|
||||
))
|
||||
# ==========================================================================
|
||||
|
||||
# !dataset config
|
||||
# ==========================================================================
|
||||
# data preprocessor
|
||||
data_preprocessor = dict(
|
||||
type='ImgDataPreprocessor',
|
||||
# RGB format normalization parameters
|
||||
mean=[123.675, 116.28, 103.53],
|
||||
std=[58.395, 57.12, 57.375],
|
||||
# convert image from BGR to RGB
|
||||
bgr_to_rgb=True,
|
||||
)
|
||||
|
||||
dataset_type = 'ImageNet'
|
||||
|
||||
# ceph config
|
||||
use_ceph = True
|
||||
|
||||
ceph_file_client_args = dict(
|
||||
backend='petrel',
|
||||
path_mapping=dict({
|
||||
'./data/imagenet':
|
||||
'sproject:s3://openmmlab/datasets/classification/imagenet',
|
||||
'data/imagenet':
|
||||
'sproject:s3://openmmlab/datasets/classification/imagenet'
|
||||
}))
|
||||
disk_file_client_args = dict(backend='disk')
|
||||
|
||||
if use_ceph:
|
||||
file_client_args = ceph_file_client_args
|
||||
else:
|
||||
file_client_args = disk_file_client_args
|
||||
|
||||
train_pipeline = [
|
||||
dict(type='LoadImageFromFile', file_client_args=file_client_args),
|
||||
dict(
|
||||
type='RandomResizedCrop',
|
||||
scale=224,
|
||||
crop_ratio_range=(0.25, 1.0),
|
||||
backend='pillow'),
|
||||
dict(type='ColorJitter', brightness=0.4, contrast=0.4, saturation=0.4),
|
||||
dict(type='RandomFlip', prob=0.5, direction='horizontal'),
|
||||
dict(type='PackClsInputs'),
|
||||
]
|
||||
|
||||
test_pipeline = [
|
||||
dict(type='LoadImageFromFile', file_client_args=file_client_args),
|
||||
dict(type='ResizeEdge', scale=256, edge='short', backend='pillow'),
|
||||
dict(type='CenterCrop', crop_size=224),
|
||||
dict(type='PackClsInputs'),
|
||||
]
|
||||
|
||||
_batch_size_per_gpu = 256
|
||||
|
||||
train_dataloader = dict(
|
||||
batch_size=_batch_size_per_gpu,
|
||||
num_workers=16,
|
||||
dataset=dict(
|
||||
type=dataset_type,
|
||||
data_root='data/imagenet',
|
||||
ann_file='meta/train.txt',
|
||||
data_prefix='train',
|
||||
pipeline=train_pipeline),
|
||||
sampler=dict(type='DefaultSampler', shuffle=True),
|
||||
persistent_workers=True,
|
||||
)
|
||||
|
||||
val_dataloader = dict(
|
||||
batch_size=_batch_size_per_gpu,
|
||||
num_workers=16,
|
||||
dataset=dict(
|
||||
type=dataset_type,
|
||||
data_root='data/imagenet',
|
||||
ann_file='meta/val.txt',
|
||||
data_prefix='val',
|
||||
pipeline=test_pipeline),
|
||||
sampler=dict(type='DefaultSampler', shuffle=False),
|
||||
persistent_workers=True,
|
||||
)
|
||||
val_evaluator = dict(type='Accuracy', topk=(1, 5))
|
||||
|
||||
# If you want standard test, please manually configure the test dataset
|
||||
test_dataloader = val_dataloader
|
||||
test_evaluator = val_evaluator
|
||||
|
||||
# !runtime config
|
||||
# ==========================================================================
|
||||
# configure log processor
|
||||
log_processor = dict(window_size=100)
|
||||
|
||||
# configure default hooks
|
||||
default_hooks = dict(
|
||||
# record the time of every iteration.
|
||||
timer=dict(type='IterTimerHook'),
|
||||
|
||||
# print log every 100 iterations.
|
||||
logger=dict(type='LoggerHook', interval=100),
|
||||
|
||||
# enable the parameter scheduler.
|
||||
param_scheduler=dict(type='ParamSchedulerHook'),
|
||||
|
||||
# save checkpoint per epoch.
|
||||
checkpoint=dict(
|
||||
type='CheckpointHook', max_keep_ckpts=50, save_best='auto',
|
||||
interval=1),
|
||||
|
||||
# set sampler seed in distributed evrionment.
|
||||
sampler_seed=dict(type='DistSamplerSeedHook'),
|
||||
|
||||
# validation results visualization, set True to enable it.
|
||||
visualization=dict(type='VisualizationHook', enable=False),
|
||||
)
|
||||
|
||||
# configure environment
|
||||
env_cfg = dict(
|
||||
# whether to enable cudnn benchmark
|
||||
cudnn_benchmark=False,
|
||||
|
||||
# set multi process parameters
|
||||
mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0),
|
||||
|
||||
# set distributed parameters
|
||||
dist_cfg=dict(backend='nccl'),
|
||||
)
|
||||
|
||||
# set visualizer
|
||||
vis_backends = [dict(type='LocalVisBackend')]
|
||||
visualizer = dict(type='ClsVisualizer', vis_backends=vis_backends)
|
||||
|
||||
# set log level
|
||||
log_level = 'INFO'
|
||||
|
||||
# load from which checkpoint
|
||||
load_from = None
|
||||
|
||||
# whether to resume training from the loaded checkpoint
|
||||
resume = False
|
||||
# ==========================================================================
|
||||
|
||||
# !autoslim algorithm config
|
||||
# ==========================================================================
|
||||
channel_cfg_paths = [
|
||||
'tests/data/MBV2_220M.yaml', 'tests/data/MBV2_320M.yaml',
|
||||
'tests/data/MBV2_530M.yaml'
|
||||
]
|
||||
model = dict(
|
||||
_scope_='mmrazor',
|
||||
type='SlimmableNetwork',
|
||||
architecture=architecture,
|
||||
data_preprocessor=data_preprocessor,
|
||||
channel_cfg_paths=channel_cfg_paths,
|
||||
mutator=dict(
|
||||
type='SlimmableChannelMutator',
|
||||
mutable_cfg=dict(type='SlimmableMutableChannel'),
|
||||
tracer_cfg=dict(
|
||||
type='BackwardTracer',
|
||||
loss_calculator=dict(type='ImageClassifierPseudoLoss'))))
|
||||
# ==========================================================================
|
||||
|
||||
# !model wrapper config
|
||||
# ==========================================================================
|
||||
model_wrapper_cfg = dict(
|
||||
type='mmrazor.SlimmableNetworkDDP',
|
||||
broadcast_buffers=False,
|
||||
find_unused_parameters=True)
|
||||
# ==========================================================================
|
||||
|
||||
# !scheduler config
|
||||
# ==========================================================================
|
||||
paramwise_cfg = dict(
|
||||
bias_decay_mult=0.0, norm_decay_mult=0.0, dwconv_decay_mult=0.0)
|
||||
optimizer = dict(
|
||||
type='SGD', lr=0.5, momentum=0.9, nesterov=True, weight_decay=0.0001)
|
||||
optim_wrapper = dict(
|
||||
optimizer=optimizer,
|
||||
paramwise_cfg=paramwise_cfg,
|
||||
accumulative_counts=len(channel_cfg_paths))
|
||||
|
||||
# learning policy
|
||||
max_epochs = 300
|
||||
|
||||
param_scheduler = dict(
|
||||
type='PolyLR',
|
||||
power=1.0,
|
||||
eta_min=0.0,
|
||||
by_epoch=True,
|
||||
end=max_epochs,
|
||||
convert_to_iter_based=True)
|
||||
|
||||
# train, val, test setting
|
||||
train_cfg = dict(by_epoch=True, max_epochs=max_epochs, val_interval=1)
|
||||
val_cfg = dict(type='mmrazor.SlimmableValLoop')
|
||||
test_cfg = dict()
|
||||
# ==========================================================================
|
|
@ -1,228 +0,0 @@
|
|||
# defaults to use registries in mmcls
|
||||
default_scope = 'mmcls'
|
||||
|
||||
# !architecture config
|
||||
# ==========================================================================
|
||||
architecture = dict(
|
||||
_scope_='mmcls',
|
||||
type='ImageClassifier',
|
||||
backbone=dict(type='MobileNetV2', widen_factor=1.5),
|
||||
neck=dict(type='GlobalAveragePooling'),
|
||||
head=dict(
|
||||
type='LinearClsHead',
|
||||
num_classes=1000,
|
||||
in_channels=1920,
|
||||
loss=dict(
|
||||
type='LabelSmoothLoss',
|
||||
mode='original',
|
||||
label_smooth_val=0.1,
|
||||
loss_weight=1.0),
|
||||
topk=(1, 5),
|
||||
))
|
||||
# ==========================================================================
|
||||
|
||||
# !dataset config
|
||||
# ==========================================================================
|
||||
# data preprocessor
|
||||
data_preprocessor = dict(
|
||||
type='ImgDataPreprocessor',
|
||||
# RGB format normalization parameters
|
||||
mean=[123.675, 116.28, 103.53],
|
||||
std=[58.395, 57.12, 57.375],
|
||||
# convert image from BGR to RGB
|
||||
bgr_to_rgb=True,
|
||||
)
|
||||
|
||||
dataset_type = 'ImageNet'
|
||||
|
||||
# ceph config
|
||||
use_ceph = True
|
||||
|
||||
ceph_file_client_args = dict(
|
||||
backend='petrel',
|
||||
path_mapping=dict({
|
||||
'./data/imagenet':
|
||||
'sproject:s3://openmmlab/datasets/classification/imagenet',
|
||||
'data/imagenet':
|
||||
'sproject:s3://openmmlab/datasets/classification/imagenet'
|
||||
}))
|
||||
disk_file_client_args = dict(backend='disk')
|
||||
|
||||
if use_ceph:
|
||||
file_client_args = ceph_file_client_args
|
||||
else:
|
||||
file_client_args = disk_file_client_args
|
||||
|
||||
train_pipeline = [
|
||||
dict(type='LoadImageFromFile', file_client_args=file_client_args),
|
||||
dict(
|
||||
type='RandomResizedCrop',
|
||||
scale=224,
|
||||
crop_ratio_range=(0.25, 1.0),
|
||||
backend='pillow'),
|
||||
dict(type='ColorJitter', brightness=0.4, contrast=0.4, saturation=0.4),
|
||||
dict(type='RandomFlip', prob=0.5, direction='horizontal'),
|
||||
dict(type='PackClsInputs'),
|
||||
]
|
||||
|
||||
test_pipeline = [
|
||||
dict(type='LoadImageFromFile', file_client_args=file_client_args),
|
||||
dict(type='ResizeEdge', scale=256, edge='short', backend='pillow'),
|
||||
dict(type='CenterCrop', crop_size=224),
|
||||
dict(type='PackClsInputs'),
|
||||
]
|
||||
|
||||
_batch_size_per_gpu = 256
|
||||
|
||||
train_dataloader = dict(
|
||||
batch_size=_batch_size_per_gpu,
|
||||
num_workers=16,
|
||||
dataset=dict(
|
||||
type=dataset_type,
|
||||
data_root='data/imagenet',
|
||||
ann_file='meta/train.txt',
|
||||
data_prefix='train',
|
||||
pipeline=train_pipeline),
|
||||
sampler=dict(type='DefaultSampler', shuffle=True),
|
||||
persistent_workers=True,
|
||||
)
|
||||
|
||||
val_dataloader = dict(
|
||||
batch_size=_batch_size_per_gpu,
|
||||
num_workers=16,
|
||||
dataset=dict(
|
||||
type=dataset_type,
|
||||
data_root='data/imagenet',
|
||||
ann_file='meta/val.txt',
|
||||
data_prefix='val',
|
||||
pipeline=test_pipeline),
|
||||
sampler=dict(type='DefaultSampler', shuffle=False),
|
||||
persistent_workers=True,
|
||||
)
|
||||
val_evaluator = dict(type='Accuracy', topk=(1, 5))
|
||||
|
||||
# If you want standard test, please manually configure the test dataset
|
||||
test_dataloader = val_dataloader
|
||||
test_evaluator = val_evaluator
|
||||
|
||||
# !runtime config
|
||||
# ==========================================================================
|
||||
# configure log processor
|
||||
log_processor = dict(window_size=100)
|
||||
|
||||
# configure default hooks
|
||||
default_hooks = dict(
|
||||
# record the time of every iteration.
|
||||
timer=dict(type='IterTimerHook'),
|
||||
|
||||
# print log every 100 iterations.
|
||||
logger=dict(type='LoggerHook', interval=100),
|
||||
|
||||
# enable the parameter scheduler.
|
||||
param_scheduler=dict(type='ParamSchedulerHook'),
|
||||
|
||||
# save checkpoint per epoch.
|
||||
checkpoint=dict(
|
||||
type='CheckpointHook', max_keep_ckpts=50, save_best='auto',
|
||||
interval=1),
|
||||
|
||||
# set sampler seed in distributed evrionment.
|
||||
sampler_seed=dict(type='DistSamplerSeedHook'),
|
||||
|
||||
# validation results visualization, set True to enable it.
|
||||
visualization=dict(type='VisualizationHook', enable=False),
|
||||
)
|
||||
|
||||
# configure environment
|
||||
env_cfg = dict(
|
||||
# whether to enable cudnn benchmark
|
||||
cudnn_benchmark=False,
|
||||
|
||||
# set multi process parameters
|
||||
mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0),
|
||||
|
||||
# set distributed parameters
|
||||
dist_cfg=dict(backend='nccl'),
|
||||
)
|
||||
|
||||
# set visualizer
|
||||
vis_backends = [dict(type='LocalVisBackend')]
|
||||
visualizer = dict(type='ClsVisualizer', vis_backends=vis_backends)
|
||||
|
||||
# set log level
|
||||
log_level = 'INFO'
|
||||
|
||||
# load from which checkpoint
|
||||
load_from = None
|
||||
|
||||
# whether to resume training from the loaded checkpoint
|
||||
resume = False
|
||||
# ==========================================================================
|
||||
|
||||
# !autoslim algorithm config
|
||||
# ==========================================================================
|
||||
num_samples = 2
|
||||
model = dict(
|
||||
_scope_='mmrazor',
|
||||
type='AutoSlim',
|
||||
num_samples=num_samples,
|
||||
architecture=architecture,
|
||||
data_preprocessor=data_preprocessor,
|
||||
distiller=dict(
|
||||
type='ConfigurableDistiller',
|
||||
teacher_recorders=dict(
|
||||
fc=dict(type='ModuleOutputs', source='head.fc')),
|
||||
student_recorders=dict(
|
||||
fc=dict(type='ModuleOutputs', source='head.fc')),
|
||||
distill_losses=dict(
|
||||
loss_kl=dict(type='KLDivergence', tau=1, loss_weight=1)),
|
||||
loss_forward_mappings=dict(
|
||||
loss_kl=dict(
|
||||
preds_S=dict(recorder='fc', from_student=True),
|
||||
preds_T=dict(recorder='fc', from_student=False)))),
|
||||
mutator=dict(
|
||||
type='OneShotChannelMutator',
|
||||
mutable_cfg=dict(
|
||||
type='OneShotMutableChannel',
|
||||
candidate_choices=list(i / 12 for i in range(2, 13)),
|
||||
candidate_mode='ratio'),
|
||||
tracer_cfg=dict(
|
||||
type='BackwardTracer',
|
||||
loss_calculator=dict(type='ImageClassifierPseudoLoss'))))
|
||||
# ==========================================================================
|
||||
|
||||
# !model wrapper config
|
||||
# ==========================================================================
|
||||
model_wrapper_cfg = dict(
|
||||
type='mmrazor.AutoSlimDDP',
|
||||
broadcast_buffers=False,
|
||||
find_unused_parameters=False)
|
||||
# ==========================================================================
|
||||
|
||||
# !scheduler config
|
||||
# ==========================================================================
|
||||
paramwise_cfg = dict(
|
||||
bias_decay_mult=0.0, norm_decay_mult=0.0, dwconv_decay_mult=0.0)
|
||||
optimizer = dict(
|
||||
type='SGD', lr=0.5, momentum=0.9, nesterov=True, weight_decay=0.0001)
|
||||
optim_wrapper = dict(
|
||||
optimizer=optimizer,
|
||||
paramwise_cfg=paramwise_cfg,
|
||||
accumulative_counts=num_samples + 2)
|
||||
|
||||
# learning policy
|
||||
max_epochs = 50
|
||||
|
||||
param_scheduler = dict(
|
||||
type='PolyLR',
|
||||
power=1.0,
|
||||
eta_min=0.0,
|
||||
by_epoch=True,
|
||||
end=max_epochs,
|
||||
convert_to_iter_based=True)
|
||||
|
||||
# train, val, test setting
|
||||
train_cfg = dict(by_epoch=True, max_epochs=max_epochs, val_interval=1)
|
||||
val_cfg = dict(type='mmrazor.AutoSlimValLoop')
|
||||
test_cfg = dict()
|
||||
# ==========================================================================
|
|
@ -0,0 +1,50 @@
|
|||
_base_ = [
|
||||
'mmrazor::_base_/settings/imagenet_bs2048_autoslim.py',
|
||||
'mmcls::_base_/models/mobilenet_v2_1x.py',
|
||||
'mmcls::_base_/default_runtime.py',
|
||||
]
|
||||
|
||||
supernet = _base_.model
|
||||
supernet.backbone.widen_factor = 1.5
|
||||
supernet.head.in_channels = 1920
|
||||
|
||||
# !dataset config
|
||||
# ==========================================================================
|
||||
# data preprocessor
|
||||
data_preprocessor = dict(
|
||||
type='ImgDataPreprocessor',
|
||||
# RGB format normalization parameters
|
||||
mean=[123.675, 116.28, 103.53],
|
||||
std=[58.395, 57.12, 57.375],
|
||||
# convert image from BGR to RGB
|
||||
bgr_to_rgb=True)
|
||||
|
||||
# !autoslim algorithm config
|
||||
# ==========================================================================
|
||||
channel_cfg_paths = [
|
||||
'tests/data/MBV2_220M.yaml', 'tests/data/MBV2_320M.yaml',
|
||||
'tests/data/MBV2_530M.yaml'
|
||||
]
|
||||
|
||||
model = dict(
|
||||
_delete_=True,
|
||||
_scope_='mmrazor',
|
||||
type='SlimmableNetwork',
|
||||
architecture=supernet,
|
||||
data_preprocessor=data_preprocessor,
|
||||
channel_cfg_paths=channel_cfg_paths,
|
||||
mutator=dict(
|
||||
type='SlimmableChannelMutator',
|
||||
mutable_cfg=dict(type='SlimmableMutableChannel'),
|
||||
tracer_cfg=dict(
|
||||
type='BackwardTracer',
|
||||
loss_calculator=dict(type='ImageClassifierPseudoLoss'))))
|
||||
|
||||
model_wrapper_cfg = dict(
|
||||
type='mmrazor.SlimmableNetworkDDP',
|
||||
broadcast_buffers=False,
|
||||
find_unused_parameters=True)
|
||||
|
||||
optim_wrapper = dict(accumulative_counts=3)
|
||||
|
||||
val_cfg = dict(type='mmrazor.SlimmableValLoop')
|
|
@ -1,3 +1,3 @@
|
|||
_base_ = 'autoslim_slimmable.py'
|
||||
_base_ = 'autoslim_mbv2_1.5x_supernet_8xb256_in1k.py'
|
||||
|
||||
model = dict(channel_cfg_paths='tests/data/MBV2_530M.yaml')
|
|
@ -0,0 +1,66 @@
|
|||
_base_ = [
|
||||
'mmrazor::_base_/settings/imagenet_bs2048_autoslim.py',
|
||||
'mmcls::_base_/models/mobilenet_v2_1x.py',
|
||||
'mmcls::_base_/default_runtime.py',
|
||||
]
|
||||
|
||||
supernet = _base_.model
|
||||
supernet.backbone.widen_factor = 1.5
|
||||
supernet.head.in_channels = 1920
|
||||
|
||||
# !dataset config
|
||||
# ==========================================================================
|
||||
# data preprocessor
|
||||
data_preprocessor = dict(
|
||||
type='ImgDataPreprocessor',
|
||||
# RGB format normalization parameters
|
||||
mean=[123.675, 116.28, 103.53],
|
||||
std=[58.395, 57.12, 57.375],
|
||||
# convert image from BGR to RGB
|
||||
bgr_to_rgb=True,
|
||||
)
|
||||
|
||||
# !autoslim algorithm config
|
||||
num_samples = 2
|
||||
model = dict(
|
||||
_scope_='mmrazor',
|
||||
type='AutoSlim',
|
||||
num_samples=num_samples,
|
||||
architecture=supernet,
|
||||
data_preprocessor=data_preprocessor,
|
||||
distiller=dict(
|
||||
type='ConfigurableDistiller',
|
||||
teacher_recorders=dict(
|
||||
fc=dict(type='ModuleOutputs', source='head.fc')),
|
||||
student_recorders=dict(
|
||||
fc=dict(type='ModuleOutputs', source='head.fc')),
|
||||
distill_losses=dict(
|
||||
loss_kl=dict(type='KLDivergence', tau=1, loss_weight=1)),
|
||||
loss_forward_mappings=dict(
|
||||
loss_kl=dict(
|
||||
preds_S=dict(recorder='fc', from_student=True),
|
||||
preds_T=dict(recorder='fc', from_student=False)))),
|
||||
mutator=dict(
|
||||
type='OneShotChannelMutator',
|
||||
mutable_cfg=dict(
|
||||
type='OneShotMutableChannel',
|
||||
candidate_choices=list(i / 12 for i in range(2, 13)),
|
||||
candidate_mode='ratio'),
|
||||
tracer_cfg=dict(
|
||||
type='BackwardTracer',
|
||||
loss_calculator=dict(type='ImageClassifierPseudoLoss'))))
|
||||
|
||||
model_wrapper_cfg = dict(
|
||||
type='mmrazor.AutoSlimDDP',
|
||||
broadcast_buffers=False,
|
||||
find_unused_parameters=False)
|
||||
|
||||
optim_wrapper = dict(accumulative_counts=num_samples + 2)
|
||||
|
||||
# learning policy
|
||||
max_epochs = 50
|
||||
param_scheduler = dict(end=max_epochs)
|
||||
|
||||
# train, val, test setting
|
||||
train_cfg = dict(max_epochs=max_epochs)
|
||||
val_cfg = dict(type='mmrazor.AutoSlimValLoop')
|
|
@ -1,8 +1,9 @@
|
|||
Import:
|
||||
- configs/distill/cwd/metafile.yml
|
||||
- configs/distill/wsld/metafile.yml
|
||||
- configs/distill/rkd/metafile.yml
|
||||
- configs/nas/darts/metafile.yml
|
||||
- configs/nas/detnas/metafile.yml
|
||||
- configs/nas/spos/metafile.yml
|
||||
- configs/pruning/autoslim/metafile.yml
|
||||
- configs/distill/mmseg/cwd/metafile.yml
|
||||
- configs/distill/mmdet/cwd/metafile.yml
|
||||
- configs/distill/mmcls/wsld/metafile.yml
|
||||
- configs/distill/mmcls/rkd/metafile.yml
|
||||
# - configs/nas/darts/metafile.yml
|
||||
- configs/nas/mmdet/detnas/metafile.yml
|
||||
- configs/nas/mmcls/spos/metafile.yml
|
||||
# - configs/pruning/autoslim/metafile.yml
|
||||
|
|
|
@ -0,0 +1,53 @@
|
|||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
import requests
|
||||
import yaml
|
||||
|
||||
MMRAZOR_ROOT = Path(__file__).absolute().parents[1]
|
||||
|
||||
|
||||
class TestMetafiles:
|
||||
|
||||
def get_metafiles(self, code_path):
|
||||
"""
|
||||
Function: get the metafile of all configs from model-index.yml
|
||||
"""
|
||||
metafile = os.path.join(code_path, 'model-index.yml')
|
||||
with open(metafile, 'r') as f:
|
||||
meta = yaml.safe_load(f)
|
||||
return meta['Import']
|
||||
|
||||
def test_metafiles(self):
|
||||
metafiles = self.get_metafiles(MMRAZOR_ROOT)
|
||||
for mf in metafiles:
|
||||
metafile = os.path.abspath(os.path.join(MMRAZOR_ROOT, mf))
|
||||
with open(metafile, 'r') as f:
|
||||
meta = yaml.safe_load(f)
|
||||
for model in meta['Models']:
|
||||
# 1. weights url check
|
||||
r = requests.head(model['Weights'], timeout=4)
|
||||
assert r.status_code != 404, \
|
||||
f"can't connect url {model['Weights']} in " \
|
||||
f'metafile {metafile}'
|
||||
|
||||
# 2. config check
|
||||
dir_path = os.path.abspath(os.path.join(metafile, '../'))
|
||||
# list all files which are in the same directory of
|
||||
# current metafile
|
||||
config_files = os.listdir(dir_path)
|
||||
|
||||
if isinstance(model['Config'], list):
|
||||
# TODO: 3. log error
|
||||
continue
|
||||
|
||||
assert (model['Config'].split('/')[-1] in config_files), \
|
||||
f"config error in {metafile} model {model['Name']}"
|
||||
|
||||
# 4. name check
|
||||
# erase '.py'
|
||||
correct_name = model['Config'].split('/')[-1][:-3]
|
||||
assert model['Name'] == correct_name, \
|
||||
f'name error in {metafile}, correct name should ' \
|
||||
f'be {correct_name}'
|
|
@ -0,0 +1,47 @@
|
|||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
import argparse
|
||||
from pathlib import Path
|
||||
|
||||
import torch
|
||||
|
||||
|
||||
def parse_args():
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Process a checkpoint to be published')
|
||||
parser.add_argument('checkpoint', help='input checkpoint filename')
|
||||
parser.add_argument(
|
||||
'--inplace', action='store_true', help='replace origin ckpt')
|
||||
args = parser.parse_args()
|
||||
return args
|
||||
|
||||
|
||||
def main():
|
||||
args = parse_args()
|
||||
checkpoint = torch.load(args.checkpoint, map_location='cpu')
|
||||
new_state_dict = dict()
|
||||
|
||||
for key, value in checkpoint['state_dict'].items():
|
||||
if key.startswith('architecture.model.distiller.teacher'):
|
||||
new_key = key.replace('architecture.model.distiller.teacher',
|
||||
'architecture.teacher')
|
||||
elif key.startswith('architecture.model'):
|
||||
new_key = key.replace('architecture.model', 'architecture')
|
||||
else:
|
||||
new_key = key
|
||||
|
||||
new_state_dict[new_key] = value
|
||||
|
||||
checkpoint['state_dict'] = new_state_dict
|
||||
|
||||
if args.inplace:
|
||||
torch.save(checkpoint, args.checkpoint)
|
||||
else:
|
||||
ckpt_path = Path(args.checkpoint)
|
||||
ckpt_name = ckpt_path.stem
|
||||
ckpt_dir = ckpt_path.parent
|
||||
new_ckpt_path = ckpt_dir / f'{ckpt_name}_latest.pth'
|
||||
torch.save(checkpoint, new_ckpt_path)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
|
@ -45,7 +45,8 @@ def main():
|
|||
cfg.launcher = args.launcher
|
||||
if args.cfg_options is not None:
|
||||
cfg.merge_from_dict(args.cfg_options)
|
||||
|
||||
import pdb
|
||||
pdb.set_trace()
|
||||
# work_dir is determined in this priority: CLI > segment in file > filename
|
||||
if args.work_dir is not None:
|
||||
# update configs according to CLI args if args.work_dir is not None
|
||||
|
|
Loading…
Reference in New Issue