Add benchmark tools & Reorgnazie configs

pull/198/head
pppppM 2022-07-14 16:11:38 +00:00 committed by pppppM
parent 5ddfed5040
commit 5bf1eca4e4
83 changed files with 1361 additions and 3833 deletions
configs
tools

View File

@ -0,0 +1,250 @@
import argparse
import os
import os.path as osp
import re
from collections import OrderedDict
from pathlib import Path
import mmcv
import wget
from modelindex.load_model_index import load
from rich.console import Console
from rich.syntax import Syntax
from rich.table import Table
console = Console()
MMRAZOR_ROOT = Path(__file__).absolute().parents[1]
METRIC_MAPPINGS = {
'accuracy/top1': 'Top 1 Accuracy',
'accuracy/top5': 'Top 5 Accuracy'
}
def parse_args():
parser = argparse.ArgumentParser(
description="Test all models' accuracy in model-index.yml")
parser.add_argument(
'partition', type=str, help='Cluster partition to use.')
parser.add_argument('checkpoint_root', help='Checkpoint file root path.')
parser.add_argument(
'--job-name',
type=str,
default='razor-test-benchmark',
help='Slurm job name prefix')
parser.add_argument('--port', type=int, default=29666, help='dist port')
parser.add_argument(
'--models', nargs='+', type=str, help='Specify model names to run.')
parser.add_argument('--gpus', type=int, default=8, help='num gpus')
parser.add_argument(
'--work-dir',
default='work_dirs/benchmark_test',
help='the dir to save metric')
parser.add_argument(
'--run', action='store_true', help='run script directly')
parser.add_argument(
'--summary', action='store_true', help='collect results')
parser.add_argument(
'--local',
action='store_true',
help='run at local instead of cluster.')
parser.add_argument(
'--mail', type=str, help='Mail address to watch test status.')
parser.add_argument(
'--mail-type',
nargs='+',
default=['BEGIN'],
choices=['NONE', 'BEGIN', 'END', 'FAIL', 'REQUEUE', 'ALL'],
help='Mail address to watch test status.')
parser.add_argument(
'--quotatype',
default=None,
choices=['reserved', 'auto', 'spot'],
help='Quota type, only available for phoenix-slurm>=0.2')
args = parser.parse_args()
return args
def create_test_job_batch(commands, model_info, args, port):
fname = model_info.name
config = Path(model_info.config)
# assert config.exists(), f'{fname}: {config} not found.'
http_prefix = 'https://download.openmmlab.com/mmrazor/'
if 's3://' in args.checkpoint_root:
from mmcv.fileio import FileClient
from petrel_client.common.exception import AccessDeniedError
file_client = FileClient.infer_client(uri=args.checkpoint_root)
checkpoint = file_client.join_path(
args.checkpoint_root, model_info.weights[len(http_prefix):])
try:
exists = file_client.exists(checkpoint)
except AccessDeniedError:
exists = False
else:
checkpoint_root = Path(args.checkpoint_root)
checkpoint = checkpoint_root / model_info.weights[len(http_prefix):]
checkpoint.parent.mkdir(parents=True, exist_ok=True)
exists = checkpoint.exists()
if exists:
print(f'{checkpoint} already exists.')
else:
wget.download(model_info.weights, str(checkpoint))
print(f'\nSaved in {checkpoint}.')
job_name = f'{args.job_name}_{fname}'
work_dir = Path(args.work_dir) / fname
work_dir.mkdir(parents=True, exist_ok=True)
if args.quotatype is not None:
quota_cfg = f'#SBATCH --quotatype {args.quotatype}\n'
else:
quota_cfg = ''
launcher = 'none' if args.local else 'slurm'
runner = 'python' if args.local else 'srun python'
master_port = f'NASTER_PORT={port}'
script_name = osp.join('tools', 'test.py')
job_script = (
f'#!/bin/bash\n'
f'#SBATCH --output {work_dir}/job.%j.out\n'
f'#SBATCH --partition={args.partition}\n'
f'#SBATCH --job-name {job_name}\n'
f'#SBATCH --gres=gpu:{args.gpus}\n'
f'{quota_cfg}'
f'#SBATCH --ntasks-per-node={args.gpus}\n'
f'#SBATCH --ntasks={args.gpus}\n'
f'#SBATCH --cpus-per-task=5\n\n'
f'{master_port} {runner} -u {script_name} {config} {checkpoint} '
f'--work-dir {work_dir} '
f'--launcher={launcher}\n')
with open(work_dir / 'job.sh', 'w') as f:
f.write(job_script)
commands.append(f'echo "{config}"')
if args.local:
commands.append(f'bash {work_dir}/job.sh')
else:
commands.append(f'sbatch {work_dir}/job.sh')
return work_dir / 'job.sh'
def summary(args):
# parse model-index.yml
model_index_file = MMRAZOR_ROOT / 'model-index.yml'
model_index = load(str(model_index_file))
model_index.build_models_with_collections()
models = OrderedDict({model.name: model for model in model_index.models})
if args.models:
patterns = [re.compile(pattern) for pattern in args.models]
filter_models = {}
for k, v in models.items():
if any([re.match(pattern, k) for pattern in patterns]):
filter_models[k] = v
if len(filter_models) == 0:
print('No model found, please specify models in:')
print('\n'.join(models.keys()))
return
models = filter_models
model_results = dict()
for model_info in models.values():
model_name = model_info.name
work_dir = Path(args.work_dir) / model_name
sub_dirs = [p.name for p in work_dir.iterdir() if p.is_dir()]
if len(sub_dirs) == 0:
print(f'{model_name} has no results.')
continue
latest_time = sub_dirs[-1]
latest_json = work_dir / latest_time / f'{latest_time}.json'
if not latest_json.exists():
print(f'{model_name} has no results.')
continue
latest_result = mmcv.load(latest_json, 'json')
expect_result = model_info.results[0].metrics
summary_result = {
'expect': expect_result,
'actual':
{METRIC_MAPPINGS[k]: v
for k, v in latest_result.items()}
}
model_results[model_name] = summary_result
mmcv.fileio.dump(model_results,
Path(args.work_dir) / 'summary.yml', 'yaml')
print(f'Summary results saved in {Path(args.work_dir)}/summary.yml')
def test(args):
# parse model-index.yml
model_index_file = MMRAZOR_ROOT / 'model-index.yml'
model_index = load(str(model_index_file))
model_index.build_models_with_collections()
models = OrderedDict({model.name: model for model in model_index.models})
commands = []
if args.models:
patterns = [re.compile(pattern) for pattern in args.models]
filter_models = {}
for k, v in models.items():
if any([re.match(pattern, k) for pattern in patterns]):
filter_models[k] = v
if len(filter_models) == 0:
print('No model found, please specify models in:')
print('\n'.join(models.keys()))
return
models = filter_models
preview_script = ''
port = args.port
for model_info in models.values():
script_path = create_test_job_batch(commands, model_info, args, port)
preview_script = script_path or preview_script
port += 1
command_str = '\n'.join(commands)
preview = Table()
preview.add_column(str(preview_script))
preview.add_column('Shell command preview')
preview.add_row(
Syntax.from_path(
preview_script,
background_color='default',
line_numbers=True,
word_wrap=True),
Syntax(
command_str,
'bash',
background_color='default',
line_numbers=True,
word_wrap=True))
console.print(preview)
if args.run:
os.system(command_str)
else:
console.print('Please set "--run" to start the job')
def main():
args = parse_args()
if args.summary:
summary(args)
else:
test(args)
if __name__ == '__main__':
main()

View File

@ -0,0 +1,224 @@
import argparse
import os
import os.path as osp
import re
from collections import OrderedDict
from pathlib import Path
import mmcv
from modelindex.load_model_index import load
from rich.console import Console
from rich.syntax import Syntax
from rich.table import Table
console = Console()
MMRAZOR_ROOT = Path(__file__).absolute().parents[1]
METRIC_MAPPINGS = {
'accuracy/top1': 'Top 1 Accuracy',
'accuracy/top5': 'Top 5 Accuracy'
}
def parse_args():
parser = argparse.ArgumentParser(
description="Test all models' accuracy in model-index.yml")
parser.add_argument(
'partition', type=str, help='Cluster partition to use.')
parser.add_argument(
'--job-name',
type=str,
default='razor-train-benchmark',
help='Slurm job name prefix')
parser.add_argument('--port', type=int, default=29666, help='dist port')
parser.add_argument(
'--models', nargs='+', type=str, help='Specify model names to run.')
parser.add_argument('--gpus', type=int, default=8, help='num gpus')
parser.add_argument(
'--work-dir',
default='work_dirs/benchmark_test',
help='the dir to save metric')
parser.add_argument(
'--run', action='store_true', help='run script directly')
parser.add_argument(
'--summary', action='store_true', help='collect results')
parser.add_argument(
'--local',
action='store_true',
help='run at local instead of cluster.')
parser.add_argument(
'--mail', type=str, help='Mail address to watch test status.')
parser.add_argument(
'--mail-type',
nargs='+',
default=['BEGIN'],
choices=['NONE', 'BEGIN', 'END', 'FAIL', 'REQUEUE', 'ALL'],
help='Mail address to watch test status.')
parser.add_argument(
'--quotatype',
default=None,
choices=['reserved', 'auto', 'spot'],
help='Quota type, only available for phoenix-slurm>=0.2')
args = parser.parse_args()
return args
def create_train_job_batch(commands, model_info, args, port):
fname = model_info.name
config = Path(model_info.config)
# assert config.exists(), f'{fname}: {config} not found.'
job_name = f'{args.job_name}_{fname}'
work_dir = Path(args.work_dir) / fname
work_dir.mkdir(parents=True, exist_ok=True)
if args.quotatype is not None:
quota_cfg = f'#SBATCH --quotatype {args.quotatype}\n'
else:
quota_cfg = ''
launcher = 'none' if args.local else 'slurm'
runner = 'python' if args.local else 'srun python'
master_port = f'NASTER_PORT={port}'
script_name = osp.join('tools', 'train.py')
job_script = (f'#!/bin/bash\n'
f'#SBATCH --output {work_dir}/job.%j.out\n'
f'#SBATCH --partition={args.partition}\n'
f'#SBATCH --job-name {job_name}\n'
f'#SBATCH --gres=gpu:{args.gpus}\n'
f'{quota_cfg}'
f'#SBATCH --ntasks-per-node={args.gpus}\n'
f'#SBATCH --ntasks={args.gpus}\n'
f'#SBATCH --cpus-per-task=5\n\n'
f'{master_port} {runner} -u {script_name} {config} '
f'--work-dir {work_dir} '
f'--launcher={launcher}\n')
with open(work_dir / 'job.sh', 'w') as f:
f.write(job_script)
commands.append(f'echo "{config}"')
if args.local:
commands.append(f'bash {work_dir}/job.sh')
else:
commands.append(f'sbatch {work_dir}/job.sh')
return work_dir / 'job.sh'
def summary(args):
# parse model-index.yml
model_index_file = MMRAZOR_ROOT / 'model-index.yml'
model_index = load(str(model_index_file))
model_index.build_models_with_collections()
models = OrderedDict({model.name: model for model in model_index.models})
if args.models:
patterns = [re.compile(pattern) for pattern in args.models]
filter_models = {}
for k, v in models.items():
if any([re.match(pattern, k) for pattern in patterns]):
filter_models[k] = v
if len(filter_models) == 0:
print('No model found, please specify models in:')
print('\n'.join(models.keys()))
return
models = filter_models
model_results = dict()
for model_info in models.values():
model_name = model_info.name
work_dir = Path(args.work_dir) / model_name
sub_dirs = [p.name for p in work_dir.iterdir() if p.is_dir()]
if len(sub_dirs) == 0:
print(f'{model_name} has no results.')
continue
latest_time = sub_dirs[-1]
latest_json = work_dir / latest_time / f'{latest_time}.json'
if not latest_json.exists():
print(f'{model_name} has no results.')
continue
latest_result = mmcv.load(latest_json, 'json')
expect_result = model_info.results[0].metrics
summary_result = {
'expect': expect_result,
'actual':
{METRIC_MAPPINGS[k]: v
for k, v in latest_result.items()}
}
model_results[model_name] = summary_result
mmcv.fileio.dump(model_results,
Path(args.work_dir) / 'summary.yml', 'yaml')
print(f'Summary results saved in {Path(args.work_dir)}/summary.yml')
def train(args):
# parse model-index.yml
model_index_file = MMRAZOR_ROOT / 'model-index.yml'
model_index = load(str(model_index_file))
model_index.build_models_with_collections()
models = OrderedDict({model.name: model for model in model_index.models})
commands = []
if args.models:
patterns = [re.compile(pattern) for pattern in args.models]
filter_models = {}
for k, v in models.items():
if any([re.match(pattern, k) for pattern in patterns]):
filter_models[k] = v
if len(filter_models) == 0:
print('No model found, please specify models in:')
print('\n'.join(models.keys()))
return
models = filter_models
preview_script = ''
port = args.port
for model_info in models.values():
script_path = create_train_job_batch(commands, model_info, args, port)
preview_script = script_path or preview_script
port += 1
command_str = '\n'.join(commands)
preview = Table()
preview.add_column(str(preview_script))
preview.add_column('Shell command preview')
preview.add_row(
Syntax.from_path(
preview_script,
background_color='default',
line_numbers=True,
word_wrap=True),
Syntax(
command_str,
'bash',
background_color='default',
line_numbers=True,
word_wrap=True))
console.print(preview)
if args.run:
os.system(command_str)
else:
console.print('Please set "--run" to start the job')
def main():
args = parse_args()
if args.summary:
summary(args)
else:
train(args)
if __name__ == '__main__':
main()

View File

@ -1,76 +0,0 @@
se_cfg = dict(
ratio=4,
divisor=1,
act_cfg=(dict(type='HSwish'),
dict(
type='HSigmoid', bias=3, divisor=6, min_value=0,
max_value=1)))
_FIRST_STAGE_MUTABLE = dict(
type='OneShotMutableOP',
candidates=dict(
mb_k3e4_se=dict(
type='MBBlock',
kernel_size=3,
expand_ratio=4,
se_cfg=se_cfg,
norm_cfg=dict(type='BN'),
act_cfg=dict(type='HSwish'))))
_OTHER_STAGE_MUTABLE = dict(
type='OneShotMutableOP',
candidates=dict(
mb_k3e4_se=dict(
type='MBBlock',
kernel_size=3,
expand_ratio=4,
se_cfg=se_cfg,
norm_cfg=dict(type='BN'),
act_cfg=dict(type='HSwish')),
mb_k3e6_se=dict(
type='MBBlock',
kernel_size=3,
expand_ratio=6,
se_cfg=se_cfg,
norm_cfg=dict(type='BN'),
act_cfg=dict(type='HSwish')),
mb_k5e4_se=dict(
type='MBBlock',
kernel_size=5,
expand_ratio=4,
se_cfg=se_cfg,
norm_cfg=dict(type='BN'),
act_cfg=dict(type='HSwish')),
mb_k5e6_se=dict(
type='MBBlock',
kernel_size=5,
expand_ratio=6,
se_cfg=se_cfg,
norm_cfg=dict(type='BN'),
act_cfg=dict(type='HSwish')),
mb_k7e4_se=dict(
type='MBBlock',
kernel_size=7,
expand_ratio=4,
se_cfg=se_cfg,
norm_cfg=dict(type='BN'),
act_cfg=dict(type='HSwish')),
mb_k7e6_se=dict(
type='MBBlock',
kernel_size=7,
expand_ratio=6,
se_cfg=se_cfg,
norm_cfg=dict(type='BN'),
act_cfg=dict(type='HSwish'))))
arch_setting = [
# Parameters to build layers. 4 parameters are needed to construct a
# layer, from left to right: channel, num_blocks, stride, mutable cfg.
[16, 1, 1, _FIRST_STAGE_MUTABLE],
[24, 1, 2, _OTHER_STAGE_MUTABLE],
[40, 2, 2, _OTHER_STAGE_MUTABLE],
[80, 2, 2, _OTHER_STAGE_MUTABLE],
[96, 3, 1, _OTHER_STAGE_MUTABLE],
[192, 2, 2, _OTHER_STAGE_MUTABLE],
[320, 1, 1, _OTHER_STAGE_MUTABLE]
]

View File

@ -1,76 +0,0 @@
se_cfg = dict(
ratio=4,
divisor=1,
act_cfg=(dict(type='HSwish'),
dict(
type='HSigmoid', bias=3, divisor=6, min_value=0,
max_value=1)))
_FIRST_STAGE_MUTABLE = dict(
type='OneShotMutableOP',
candidates=dict(
mb_k3e4_se=dict(
type='MBBlock',
kernel_size=3,
expand_ratio=4,
se_cfg=se_cfg,
norm_cfg=dict(type='BN'),
act_cfg=dict(type='HSwish'))))
_OTHER_STAGE_MUTABLE = dict(
type='OneShotMutableOP',
candidates=dict(
mb_k3e4_se=dict(
type='MBBlock',
kernel_size=3,
expand_ratio=4,
se_cfg=se_cfg,
norm_cfg=dict(type='BN'),
act_cfg=dict(type='HSwish')),
mb_k3e6_se=dict(
type='MBBlock',
kernel_size=3,
expand_ratio=6,
se_cfg=se_cfg,
norm_cfg=dict(type='BN'),
act_cfg=dict(type='HSwish')),
mb_k5e4_se=dict(
type='MBBlock',
kernel_size=5,
expand_ratio=4,
se_cfg=se_cfg,
norm_cfg=dict(type='BN'),
act_cfg=dict(type='HSwish')),
mb_k5e6_se=dict(
type='MBBlock',
kernel_size=5,
expand_ratio=6,
se_cfg=se_cfg,
norm_cfg=dict(type='BN'),
act_cfg=dict(type='HSwish')),
mb_k7e4_se=dict(
type='MBBlock',
kernel_size=7,
expand_ratio=4,
se_cfg=se_cfg,
norm_cfg=dict(type='BN'),
act_cfg=dict(type='HSwish')),
mb_k7e6_se=dict(
type='MBBlock',
kernel_size=7,
expand_ratio=6,
se_cfg=se_cfg,
norm_cfg=dict(type='BN'),
act_cfg=dict(type='HSwish'))))
arch_setting = [
# Parameters to build layers. 4 parameters are needed to construct a
# layer, from left to right: channel, num_blocks, stride, mutable cfg.
[16, 1, 1, _FIRST_STAGE_MUTABLE],
[24, 1, 2, _OTHER_STAGE_MUTABLE],
[40, 2, 2, _OTHER_STAGE_MUTABLE],
[80, 2, 2, _OTHER_STAGE_MUTABLE],
[96, 1, 1, _OTHER_STAGE_MUTABLE],
[192, 1, 2, _OTHER_STAGE_MUTABLE],
[320, 1, 1, _OTHER_STAGE_MUTABLE]
]

View File

@ -1,76 +0,0 @@
se_cfg = dict(
ratio=4,
divisor=1,
act_cfg=(dict(type='HSwish'),
dict(
type='HSigmoid', bias=3, divisor=6, min_value=0,
max_value=1)))
_FIRST_STAGE_MUTABLE = dict(
type='OneShotMutableOP',
candidates=dict(
mb_k3e4_se=dict(
type='MBBlock',
kernel_size=3,
expand_ratio=4,
se_cfg=se_cfg,
norm_cfg=dict(type='BN'),
act_cfg=dict(type='HSwish'))))
_OTHER_STAGE_MUTABLE = dict(
type='OneShotMutableOP',
candidates=dict(
mb_k3e4_se=dict(
type='MBBlock',
kernel_size=3,
expand_ratio=4,
se_cfg=se_cfg,
norm_cfg=dict(type='BN'),
act_cfg=dict(type='HSwish')),
mb_k3e6_se=dict(
type='MBBlock',
kernel_size=3,
expand_ratio=6,
se_cfg=se_cfg,
norm_cfg=dict(type='BN'),
act_cfg=dict(type='HSwish')),
mb_k5e4_se=dict(
type='MBBlock',
kernel_size=5,
expand_ratio=4,
se_cfg=se_cfg,
norm_cfg=dict(type='BN'),
act_cfg=dict(type='HSwish')),
mb_k5e6_se=dict(
type='MBBlock',
kernel_size=5,
expand_ratio=6,
se_cfg=se_cfg,
norm_cfg=dict(type='BN'),
act_cfg=dict(type='HSwish')),
mb_k7e4_se=dict(
type='MBBlock',
kernel_size=7,
expand_ratio=4,
se_cfg=se_cfg,
norm_cfg=dict(type='BN'),
act_cfg=dict(type='HSwish')),
mb_k7e6_se=dict(
type='MBBlock',
kernel_size=7,
expand_ratio=6,
se_cfg=se_cfg,
norm_cfg=dict(type='BN'),
act_cfg=dict(type='HSwish'))))
arch_setting = [
# Parameters to build layers. 4 parameters are needed to construct a
# layer, from left to right: channel, num_blocks, stride, mutable cfg.
[16, 1, 1, _FIRST_STAGE_MUTABLE],
[24, 1, 2, _OTHER_STAGE_MUTABLE],
[40, 2, 2, _OTHER_STAGE_MUTABLE],
[80, 3, 2, _OTHER_STAGE_MUTABLE],
[96, 4, 1, _OTHER_STAGE_MUTABLE],
[192, 3, 2, _OTHER_STAGE_MUTABLE],
[320, 1, 1, _OTHER_STAGE_MUTABLE]
]

View File

@ -1,76 +0,0 @@
se_cfg = dict(
ratio=4,
divisor=1,
act_cfg=(dict(type='HSwish'),
dict(
type='HSigmoid', bias=3, divisor=6, min_value=0,
max_value=1)))
_FIRST_STAGE_MUTABLE = dict(
type='OneShotMutableOP',
candidates=dict(
mb_k3e4_se=dict(
type='MBBlock',
kernel_size=3,
expand_ratio=4,
se_cfg=se_cfg,
norm_cfg=dict(type='BN'),
act_cfg=dict(type='HSwish'))))
_OTHER_STAGE_MUTABLE = dict(
type='OneShotMutableOP',
candidates=dict(
mb_k3e4_se=dict(
type='MBBlock',
kernel_size=3,
expand_ratio=4,
se_cfg=se_cfg,
norm_cfg=dict(type='BN'),
act_cfg=dict(type='HSwish')),
mb_k3e6_se=dict(
type='MBBlock',
kernel_size=3,
expand_ratio=6,
se_cfg=se_cfg,
norm_cfg=dict(type='BN'),
act_cfg=dict(type='HSwish')),
mb_k5e4_se=dict(
type='MBBlock',
kernel_size=5,
expand_ratio=4,
se_cfg=se_cfg,
norm_cfg=dict(type='BN'),
act_cfg=dict(type='HSwish')),
mb_k5e6_se=dict(
type='MBBlock',
kernel_size=5,
expand_ratio=6,
se_cfg=se_cfg,
norm_cfg=dict(type='BN'),
act_cfg=dict(type='HSwish')),
mb_k7e4_se=dict(
type='MBBlock',
kernel_size=7,
expand_ratio=4,
se_cfg=se_cfg,
norm_cfg=dict(type='BN'),
act_cfg=dict(type='HSwish')),
mb_k7e6_se=dict(
type='MBBlock',
kernel_size=7,
expand_ratio=6,
se_cfg=se_cfg,
norm_cfg=dict(type='BN'),
act_cfg=dict(type='HSwish'))))
arch_setting = [
# Parameters to build layers. 4 parameters are needed to construct a
# layer, from left to right: channel, num_blocks, stride, mutable cfg.
[16, 1, 1, _FIRST_STAGE_MUTABLE],
[24, 1, 2, _OTHER_STAGE_MUTABLE],
[40, 2, 2, _OTHER_STAGE_MUTABLE],
[80, 2, 2, _OTHER_STAGE_MUTABLE],
[96, 3, 1, _OTHER_STAGE_MUTABLE],
[192, 2, 2, _OTHER_STAGE_MUTABLE],
[320, 1, 1, _OTHER_STAGE_MUTABLE]
]

View File

@ -1,76 +0,0 @@
se_cfg = dict(
ratio=4,
divisor=1,
act_cfg=(dict(type='HSwish'),
dict(
type='HSigmoid', bias=3, divisor=6, min_value=0,
max_value=1)))
_FIRST_STAGE_MUTABLE = dict(
type='OneShotMutableOP',
candidates=dict(
mb_k3e4_se=dict(
type='MBBlock',
kernel_size=3,
expand_ratio=4,
se_cfg=se_cfg,
norm_cfg=dict(type='BN'),
act_cfg=dict(type='HSwish'))))
_OTHER_STAGE_MUTABLE = dict(
type='OneShotMutableOP',
candidates=dict(
mb_k3e4_se=dict(
type='MBBlock',
kernel_size=3,
expand_ratio=4,
se_cfg=se_cfg,
norm_cfg=dict(type='BN'),
act_cfg=dict(type='HSwish')),
mb_k3e6_se=dict(
type='MBBlock',
kernel_size=3,
expand_ratio=6,
se_cfg=se_cfg,
norm_cfg=dict(type='BN'),
act_cfg=dict(type='HSwish')),
mb_k5e4_se=dict(
type='MBBlock',
kernel_size=5,
expand_ratio=4,
se_cfg=se_cfg,
norm_cfg=dict(type='BN'),
act_cfg=dict(type='HSwish')),
mb_k5e6_se=dict(
type='MBBlock',
kernel_size=5,
expand_ratio=6,
se_cfg=se_cfg,
norm_cfg=dict(type='BN'),
act_cfg=dict(type='HSwish')),
mb_k7e4_se=dict(
type='MBBlock',
kernel_size=7,
expand_ratio=4,
se_cfg=se_cfg,
norm_cfg=dict(type='BN'),
act_cfg=dict(type='HSwish')),
mb_k7e6_se=dict(
type='MBBlock',
kernel_size=7,
expand_ratio=6,
se_cfg=se_cfg,
norm_cfg=dict(type='BN'),
act_cfg=dict(type='HSwish'))))
arch_setting = [
# Parameters to build layers. 4 parameters are needed to construct a
# layer, from left to right: channel, num_blocks, stride, mutable cfg.
[16, 1, 1, _FIRST_STAGE_MUTABLE],
[24, 4, 2, _OTHER_STAGE_MUTABLE],
[40, 4, 2, _OTHER_STAGE_MUTABLE],
[80, 5, 2, _OTHER_STAGE_MUTABLE],
[96, 4, 1, _OTHER_STAGE_MUTABLE],
[192, 4, 2, _OTHER_STAGE_MUTABLE],
[320, 1, 1, _OTHER_STAGE_MUTABLE]
]

View File

@ -1,76 +0,0 @@
se_cfg = dict(
ratio=4,
divisor=1,
act_cfg=(dict(type='HSwish'),
dict(
type='HSigmoid', bias=3, divisor=6, min_value=0,
max_value=1)))
_FIRST_STAGE_MUTABLE = dict(
type='OneShotMutableOP',
candidates=dict(
mb_k3e4_se=dict(
type='MBBlock',
kernel_size=3,
expand_ratio=4,
se_cfg=se_cfg,
norm_cfg=dict(type='BN'),
act_cfg=dict(type='HSwish'))))
_OTHER_STAGE_MUTABLE = dict(
type='OneShotMutableOP',
candidates=dict(
mb_k3e4_se=dict(
type='MBBlock',
kernel_size=3,
expand_ratio=4,
se_cfg=se_cfg,
norm_cfg=dict(type='BN'),
act_cfg=dict(type='HSwish')),
mb_k3e6_se=dict(
type='MBBlock',
kernel_size=3,
expand_ratio=6,
se_cfg=se_cfg,
norm_cfg=dict(type='BN'),
act_cfg=dict(type='HSwish')),
mb_k5e4_se=dict(
type='MBBlock',
kernel_size=5,
expand_ratio=4,
se_cfg=se_cfg,
norm_cfg=dict(type='BN'),
act_cfg=dict(type='HSwish')),
mb_k5e6_se=dict(
type='MBBlock',
kernel_size=5,
expand_ratio=6,
se_cfg=se_cfg,
norm_cfg=dict(type='BN'),
act_cfg=dict(type='HSwish')),
mb_k7e4_se=dict(
type='MBBlock',
kernel_size=7,
expand_ratio=4,
se_cfg=se_cfg,
norm_cfg=dict(type='BN'),
act_cfg=dict(type='HSwish')),
mb_k7e6_se=dict(
type='MBBlock',
kernel_size=7,
expand_ratio=6,
se_cfg=se_cfg,
norm_cfg=dict(type='BN'),
act_cfg=dict(type='HSwish'))))
arch_setting = [
# Parameters to build layers. 4 parameters are needed to construct a
# layer, from left to right: channel, num_blocks, stride, mutable cfg.
[16, 1, 1, _FIRST_STAGE_MUTABLE],
[24, 5, 2, _OTHER_STAGE_MUTABLE],
[40, 5, 2, _OTHER_STAGE_MUTABLE],
[80, 5, 2, _OTHER_STAGE_MUTABLE],
[96, 6, 1, _OTHER_STAGE_MUTABLE],
[192, 6, 2, _OTHER_STAGE_MUTABLE],
[320, 1, 1, _OTHER_STAGE_MUTABLE]
]

View File

@ -0,0 +1,65 @@
_STAGE_MUTABLE = dict(
_scope_='mmrazor',
type='OneShotMutableOP',
candidates=dict(
mb_k3e3=dict(
type='MBBlock',
kernel_size=3,
expand_ratio=3,
act_cfg=dict(type='ReLU6')),
mb_k5e3=dict(
type='MBBlock',
kernel_size=5,
expand_ratio=3,
act_cfg=dict(type='ReLU6')),
mb_k7e3=dict(
type='MBBlock',
kernel_size=7,
expand_ratio=3,
act_cfg=dict(type='ReLU6')),
mb_k3e6=dict(
type='MBBlock',
kernel_size=3,
expand_ratio=6,
act_cfg=dict(type='ReLU6')),
mb_k5e6=dict(
type='MBBlock',
kernel_size=5,
expand_ratio=6,
act_cfg=dict(type='ReLU6')),
mb_k7e6=dict(
type='MBBlock',
kernel_size=7,
expand_ratio=6,
act_cfg=dict(type='ReLU6')),
identity=dict(type='Identity')))
_FIRST_MUTABLE = dict(
_scope_='mmrazor',
type='OneShotMutableOP',
candidates=dict(
mb_k3e1=dict(
type='MBBlock',
kernel_size=3,
expand_ratio=1,
act_cfg=dict(type='ReLU6'))))
arch_setting = [
# Parameters to build layers. 3 parameters are needed to construct a
# layer, from left to right: channel, num_blocks, mutable_cfg.
[24, 1, 1, _FIRST_MUTABLE],
[32, 4, 2, _STAGE_MUTABLE],
[56, 4, 2, _STAGE_MUTABLE],
[112, 4, 2, _STAGE_MUTABLE],
[128, 4, 1, _STAGE_MUTABLE],
[256, 4, 2, _STAGE_MUTABLE],
[432, 1, 1, _STAGE_MUTABLE]
]
nas_backbone = dict(
_scope_='mmrazor',
type='SearchableMobileNet',
first_channels=40,
last_channels=1728,
widen_factor=1.0,
arch_setting=arch_setting)

View File

@ -0,0 +1,23 @@
_STAGE_MUTABLE = dict(
_scope_='mmrazor',
type='OneShotMutableOP',
candidates=dict(
shuffle_3x3=dict(type='ShuffleBlock', kernel_size=3),
shuffle_5x5=dict(type='ShuffleBlock', kernel_size=5),
shuffle_7x7=dict(type='ShuffleBlock', kernel_size=7),
shuffle_xception=dict(type='ShuffleXception')))
arch_setting = [
# Parameters to build layers. 3 parameters are needed to construct a
# layer, from left to right: channel, num_blocks, mutable_cfg.
[64, 4, _STAGE_MUTABLE],
[160, 4, _STAGE_MUTABLE],
[320, 8, _STAGE_MUTABLE],
[640, 4, _STAGE_MUTABLE]
]
nas_backbone = dict(
_scope_='mmrazor',
type='SearchableShuffleNetV2',
widen_factor=1.0,
arch_setting=arch_setting)

View File

@ -0,0 +1,80 @@
# dataset settings
dataset_type = 'mmcls.ImageNet'
preprocess_cfg = dict(
# RGB format normalization parameters
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
# convert image from BGR to RGB
to_rgb=True,
)
train_pipeline = [
dict(type='mmcls.LoadImageFromFile'),
dict(type='mmcls.RandomResizedCrop', scale=224),
dict(
type='mmcls.ColorJitter', brightness=0.4, contrast=0.4,
saturation=0.4),
dict(type='mmcls.RandomFlip', prob=0.5, direction='horizontal'),
dict(type='mmcls.PackClsInputs'),
]
test_pipeline = [
dict(type='mmcls.LoadImageFromFile'),
dict(type='mmcls.ResizeEdge', scale=256, edge='short'),
dict(type='mmcls.CenterCrop', crop_size=224),
dict(type='mmcls.PackClsInputs'),
]
train_dataloader = dict(
batch_size=128,
num_workers=4,
dataset=dict(
type=dataset_type,
data_root='data/imagenet',
ann_file='meta/train.txt',
data_prefix='train',
pipeline=train_pipeline),
sampler=dict(type='mmcls.DefaultSampler', shuffle=True),
persistent_workers=True,
)
val_dataloader = dict(
batch_size=128,
num_workers=4,
dataset=dict(
type=dataset_type,
data_root='data/imagenet',
ann_file='meta/val.txt',
data_prefix='val',
pipeline=test_pipeline),
sampler=dict(type='mmcls.DefaultSampler', shuffle=False),
persistent_workers=True,
)
val_evaluator = dict(type='mmcls.Accuracy', topk=(1, 5))
# If you want standard test, please manually configure the test dataset
test_dataloader = val_dataloader
test_evaluator = val_evaluator
# optimizer
paramwise_cfg = dict(
bias_decay_mult=0.0, norm_decay_mult=0.0, dwconv_decay_mult=0.0)
optim_wrapper = dict(
optimizer=dict(type='mmcls.SGD', lr=0.5, momentum=0.9, weight_decay=4e-5),
paramwise_cfg=paramwise_cfg,
clip_grad=None)
# leanring policy
param_scheduler = dict(
type='mmcls.PolyLR',
power=1.0,
eta_min=0.0,
by_epoch=True,
end=300,
convert_to_iter_based=True)
# train, val, test setting
train_cfg = dict(by_epoch=True, max_epochs=300)
val_cfg = dict()
test_cfg = dict()

View File

@ -0,0 +1,9 @@
_base_ = [
'./imagenet_bs1024_spos.py',
]
train_dataloader = dict(batch_size=256)
val_dataloader = dict(batch_size=256)
test_dataloader = dict(batch_size=256)

View File

@ -1,144 +0,0 @@
_base_ = [
'../../_base_/datasets/mmdet/coco_detection.py',
'../../_base_/schedules/mmdet/schedule_1x.py',
'../../_base_/mmdet_runtime.py'
]
# model settings
student = dict(
type='mmdet.GFL',
backbone=dict(
type='ResNet',
depth=50,
num_stages=4,
out_indices=(0, 1, 2, 3),
frozen_stages=1,
norm_cfg=dict(type='BN', requires_grad=True),
norm_eval=True,
style='pytorch',
init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
neck=dict(
type='FPN',
in_channels=[256, 512, 1024, 2048],
out_channels=256,
start_level=1,
add_extra_convs='on_output',
num_outs=5),
bbox_head=dict(
type='GFLHead',
num_classes=80,
in_channels=256,
stacked_convs=4,
feat_channels=256,
anchor_generator=dict(
type='AnchorGenerator',
ratios=[1.0],
octave_base_scale=8,
scales_per_octave=1,
strides=[8, 16, 32, 64, 128]),
loss_cls=dict(
type='QualityFocalLoss',
use_sigmoid=True,
beta=2.0,
loss_weight=1.0),
loss_dfl=dict(type='DistributionFocalLoss', loss_weight=0.25),
reg_max=16,
loss_bbox=dict(type='GIoULoss', loss_weight=2.0)),
# training and testing settings
train_cfg=dict(
assigner=dict(type='ATSSAssigner', topk=9),
allowed_border=-1,
pos_weight=-1,
debug=False),
test_cfg=dict(
nms_pre=1000,
min_bbox_size=0,
score_thr=0.05,
nms=dict(type='nms', iou_threshold=0.6),
max_per_img=100))
checkpoint = 'https://download.openmmlab.com/mmdetection/v2.0/gfl/gfl_r101_fpn_mstrain_2x_coco/gfl_r101_fpn_mstrain_2x_coco_20200629_200126-dd12f847.pth' # noqa: E501
teacher = dict(
type='mmdet.GFL',
init_cfg=dict(type='Pretrained', checkpoint=checkpoint),
backbone=dict(
type='ResNet',
depth=101,
num_stages=4,
out_indices=(0, 1, 2, 3),
frozen_stages=1,
norm_cfg=dict(type='BN', requires_grad=True),
norm_eval=True,
style='pytorch',
init_cfg=None),
neck=dict(
type='FPN',
in_channels=[256, 512, 1024, 2048],
out_channels=256,
start_level=1,
add_extra_convs='on_output',
num_outs=5),
bbox_head=dict(
type='GFLHead',
num_classes=80,
in_channels=256,
stacked_convs=4,
feat_channels=256,
anchor_generator=dict(
type='AnchorGenerator',
ratios=[1.0],
octave_base_scale=8,
scales_per_octave=1,
strides=[8, 16, 32, 64, 128]),
loss_cls=dict(
type='QualityFocalLoss',
use_sigmoid=True,
beta=2.0,
loss_weight=1.0),
loss_dfl=dict(type='DistributionFocalLoss', loss_weight=0.25),
reg_max=16,
loss_bbox=dict(type='GIoULoss', loss_weight=2.0)),
# training and testing settings
train_cfg=dict(
assigner=dict(type='ATSSAssigner', topk=9),
allowed_border=-1,
pos_weight=-1,
debug=False),
test_cfg=dict(
nms_pre=1000,
min_bbox_size=0,
score_thr=0.05,
nms=dict(type='nms', iou_threshold=0.6),
max_per_img=100))
# algorithm setting
algorithm = dict(
type='GeneralDistill',
architecture=dict(
type='MMDetArchitecture',
model=student,
),
distiller=dict(
type='SingleTeacherDistiller',
teacher=teacher,
teacher_trainable=False,
components=[
dict(
student_module='bbox_head.gfl_cls',
teacher_module='bbox_head.gfl_cls',
losses=[
dict(
type='ChannelWiseDivergence',
name='loss_cwd_cls_head',
tau=1,
loss_weight=5,
)
])
]),
)
find_unused_parameters = True
# optimizer
optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001)

View File

@ -1,110 +0,0 @@
_base_ = [
'../../_base_/datasets/mmseg/cityscapes.py',
'../../_base_/mmseg_runtime.py',
'../../_base_/schedules/mmseg/schedule_80k.py'
]
norm_cfg = dict(type='SyncBN', requires_grad=True)
# pspnet r18
student = dict(
type='mmseg.EncoderDecoder',
backbone=dict(
type='ResNetV1c',
init_cfg=dict(
type='Pretrained', checkpoint='open-mmlab://resnet18_v1c'),
depth=18,
num_stages=4,
out_indices=(0, 1, 2, 3),
dilations=(1, 1, 2, 4),
strides=(1, 2, 1, 1),
norm_cfg=norm_cfg,
norm_eval=False,
style='pytorch',
contract_dilation=True),
decode_head=dict(
type='PSPHead',
in_channels=512,
in_index=3,
channels=128,
pool_scales=(1, 2, 3, 6),
dropout_ratio=0.1,
num_classes=19,
norm_cfg=norm_cfg,
align_corners=False,
loss_decode=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
auxiliary_head=dict(
type='FCNHead',
in_channels=256,
in_index=2,
channels=64,
num_convs=1,
concat_input=False,
dropout_ratio=0.1,
num_classes=19,
norm_cfg=norm_cfg,
align_corners=False,
loss_decode=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
train_cfg=dict(),
test_cfg=dict(mode='whole'))
checkpoint = 'https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x1024_80k_cityscapes/pspnet_r101-d8_512x1024_80k_cityscapes_20200606_112211-e1e1100f.pth' # noqa: E501
# pspnet r101
teacher = dict(
type='mmseg.EncoderDecoder',
init_cfg=dict(type='Pretrained', checkpoint=checkpoint),
backbone=dict(
type='ResNetV1c',
depth=101,
num_stages=4,
out_indices=(0, 1, 2, 3),
dilations=(1, 1, 2, 4),
strides=(1, 2, 1, 1),
norm_cfg=norm_cfg,
norm_eval=False,
style='pytorch',
contract_dilation=True),
decode_head=dict(
type='PSPHead',
in_channels=2048,
in_index=3,
channels=512,
pool_scales=(1, 2, 3, 6),
dropout_ratio=0.1,
num_classes=19,
norm_cfg=norm_cfg,
align_corners=False,
loss_decode=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
)
# algorithm setting
algorithm = dict(
type='GeneralDistill',
architecture=dict(
type='MMSegArchitecture',
model=student,
),
distiller=dict(
type='SingleTeacherDistiller',
teacher=teacher,
teacher_trainable=False,
components=[
dict(
student_module='decode_head.conv_seg',
teacher_module='decode_head.conv_seg',
losses=[
dict(
type='ChannelWiseDivergence',
name='loss_cwd_logits',
tau=1,
loss_weight=5,
)
])
]),
)
find_unused_parameters = True

View File

@ -1,50 +0,0 @@
Collections:
- Name: CWD
Metadata:
Training Data:
- Cityscapes
- COCO
Paper:
URL: https://arxiv.org/abs/2011.13256
Title: Channel-wise Knowledge Distillation for Dense Prediction
README: configs/distill/cwd/README.md
Code:
URL: https://github.com/open-mmlab/mmrazor/blob/v0.1.0/mmrazor/models/losses/cwd.py#L10
Version: v0.1.0
Converted From:
Code:
- https://github.com/pppppM/mmsegmentation-distiller
- https://github.com/pppppM/mmdetection-distiller
Models:
- Name: cwd_cls_head_pspnet_r101_d8_pspnet_r18_d8_512x1024_cityscapes_80k
In Collection: CWD
Metadata:
Location: cls head
Student: pspnet-r18-d8
Teacher: pspnet-r101-d8
Teacher Checkpoint: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x1024_80k_cityscapes/pspnet_r101-d8_512x1024_80k_cityscapes_20200606_112211-e1e1100f.pth
Results:
- Task: Semantic Segmentation
Dataset: Cityscapes
Metrics:
mIoU: 75.54
mIoU(S): 74.87
mIoU(T): 79.76
Config: configs/distill/cwd/cwd_cls_head_pspnet_r101_d8_pspnet_r18_d8_512x1024_cityscapes_80k.py
Weights: https://download.openmmlab.com/mmrazor/v0.1/distill/cwd/cwd_cls_head_pspnet_r101_d8_pspnet_r18_d8_512x1024_cityscapes_80k/cwd_cls_head_pspnet_r101_d8_pspnet_r18_d8_512x1024_cityscapes_80k_mIoU-75.54_20211222-3a26ee1c.pth
- Name: cwd_cls_head_gfl_r101_fpn_gfl_r50_fpn_1x_coco
In Collection: CWD
Metadata:
Location: cls head
Student: gfl-r50-fpn
Teacher: gfl-r101-fpn
Teacher Checkpoint: https://download.openmmlab.com/mmdetection/v2.0/gfl/gfl_r101_fpn_mstrain_2x_coco/gfl_r101_fpn_mstrain_2x_coco_20200629_200126-dd12f847.pth
Results:
- Task: Object Detection
Dataset: COCO
Metrics:
box AP: 41.9
box AP(S): 40.2
box AP(T): 44.7
Config: configs/distill/cwd/cwd_cls_head_gfl_r101_fpn_gfl_r50_fpn_1x_coco.py
Weights: https://download.openmmlab.com/mmrazor/v0.1/distill/cwd/cwd_cls_head_gfl_r101_fpn_gfl_r50_fpn_1x_coco/cwd_cls_head_gfl_r101_fpn_gfl_r50_fpn_1x_coco_20211222-655dff39.pth

View File

@ -0,0 +1,32 @@
# KD
> [Distilling the Knowledge in a Neural Network](https://arxiv.org/abs/1503.02531)
<!-- [ALGORITHM] -->
## Abstract
A very simple way to improve the performance of almost any machine learning algorithm is to train many different models on the same data and then to average their predictions. Unfortunately, making predictions using a whole ensemble of models is cumbersome and may be too computationally expensive to allow deployment to a large number of users, especially if the individual models are large neural nets. Caruana and his collaborators have shown that it is possible to compress the knowledge in an ensemble into a single model which is much easier to deploy and we develop this approach further using a different compression technique. We achieve some surprising results on MNIST and we show that we can significantly improve the acoustic model of a heavily used commercial system by distilling the knowledge in an ensemble of models into a single model. We also introduce a new type of ensemble composed of one or more full models and many specialist models which learn to distinguish fine-grained classes that the full models confuse. Unlike a mixture of experts, these specialist models can be trained rapidly and in parallel.
![pipeline](/docs/en/imgs/model_zoo/kd/pipeline.png)
## Results and models
### Classification
| Location | Dataset | Teacher | Student | Acc | Acc(T) | Acc(S) | Config | Download |
| :------: | :------: | :----------------------------------------------------------------------------------------------------------: | :----------------------------------------------------------------------------------------------------------: | :---: | :----: | :----: | :-------------------------------------------------------: | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| logits | ImageNet | [resnet34](https://github.com/open-mmlab/mmclassification/blob/master/configs/resnet/resnet34_8xb32_in1k.py) | [resnet18](https://github.com/open-mmlab/mmclassification/blob/master/configs/resnet/resnet18_8xb32_in1k.py) | 71.54 | 73.62 | 69.90 | [config](./wsld_cls_head_resnet34_resnet18_8xb32_in1k.py) | [teacher](https://download.openmmlab.com/mmclassification/v0/resnet/resnet50_8xb32_in1k_20210831-ea4938fc.pth) \|[model](https://openmmlab-share.oss-cn-hangzhou.aliyuncs.com/mmrazor/v0.1/distill/wsld/wsld_cls_head_resnet34_resnet18_8xb32_in1k/wsld_cls_head_resnet34_resnet18_8xb32_in1k_acc-71.54_20211222-91f28cf6.pth?versionId=CAEQHxiBgMC6memK7xciIGMzMDFlYTA4YzhlYTRiMTNiZWU0YTVhY2I5NjVkMjY2) \| [log](https://openmmlab-share.oss-cn-hangzhou.aliyuncs.com/mmrazor/v0.1/distill/wsld/wsld_cls_head_resnet34_resnet18_8xb32_in1k/wsld_cls_head_resnet34_resnet18_8xb32_in1k_20211221_181516.log.json?versionId=CAEQHxiBgIDLmemK7xciIGNkM2FiN2Y4N2E5YjRhNDE4NDVlNmExNDczZDIxN2E5) |
## Citation
```latex
@article{hinton2015distilling,
title={Distilling the knowledge in a neural network},
author={Hinton, Geoffrey and Vinyals, Oriol and Dean, Jeff and others},
journal={arXiv preprint arXiv:1503.02531},
volume={2},
number={7},
year={2015}
}
```

View File

@ -19,20 +19,18 @@ model = dict(
teacher=dict(
cfg_path='mmcls::resnet/resnet34_8xb32_in1k.py', pretrained=True),
teacher_ckpt='resnet34_8xb32_in1k_20210831-f257d4e6.pth',
student_recorders=dict(fc=dict(type='ModuleOutputs', source='head.fc')),
teacher_recorders=dict(fc=dict(type='ModuleOutputs', source='head.fc')),
distill_losses=dict(
loss_kl=dict(type='KLDivergence', tau=1, loss_weight=5)),
loss_forward_mappings=dict(
loss_kl=dict(
preds_S=dict(
from_student=True,
recorder='fc',
),
preds_T=dict(
from_student=False,
recorder='fc',
))))
distiller=dict(
type='ConfigurableDistiller',
student_recorders=dict(
fc=dict(type='ModuleOutputs', source='head.fc')),
teacher_recorders=dict(
fc=dict(type='ModuleOutputs', source='head.fc')),
distill_losses=dict(
loss_kl=dict(type='KLDivergence', tau=1, loss_weight=5)),
loss_forward_mappings=dict(
loss_kl=dict(
preds_S=dict(from_student=True, recorder='fc'),
preds_T=dict(from_student=False, recorder='fc')))))
find_unused_parameters = True

View File

@ -0,0 +1,36 @@
Collections:
- Name: KD
Metadata:
Training Data:
- ImageNet-1k
Paper:
URL: https://arxiv.org/abs/1503.02531
Title: Distilling the Knowledge in a Neural Network
README: configs/distill/mmcls/kd/README.md
Code:
URL: https://github.com/open-mmlab/mmrazor/blob/v0.1.0/mmrazor/models/losses/weighted_soft_label_distillation.py
Version: v0.1.0
Models:
- Name: kd_logits_resnet34_resnet18_8xb32_in1k
In Collection: KD
Metadata:
Location: logits
Student:
Config: mmcls::resnet/resnet18_8xb32_in1k.py
Weights: https://download.openmmlab.com/mmclassification/v0/resnet/resnet18_8xb32_in1k_20210831-fbbb1da6.pth
Metrics:
Top 1 Accuracy: 69.90
Top 5 Accuracy: 89.43
Teacher:
Config: mmcls::resnet/resnet34_8xb32_in1k.py
Weights: https://download.openmmlab.com/mmclassification/v0/resnet/resnet34_8xb32_in1k_20210831-f257d4e6.pth
Metrics:
Top 1 Accuracy: 73.62
Top 5 Accuracy: 91.59
Results:
- Task: Image Classification
Dataset: ImageNet-1k
Metrics:
Top 1 Accuracy: 71.54
Config: configs/distill/mmcls/kd/kd_logits_resnet34_resnet18_8xb32_in1k.py
Weights: https://download.openmmlab.com/mmrazor/v0.1/distill/wsld/wsld_cls_head_resnet34_resnet18_8xb32_in1k/wsld_cls_head_resnet34_resnet18_8xb32_in1k_acc-71.54_20211222-91f28cf6.pth

View File

@ -6,7 +6,7 @@ Collections:
Paper:
URL: https://arxiv.org/abs/1904.05068
Title: Relational Knowledge Distillation
README: configs/distill/rkd/README.md
README: configs/distill/mmcls/rkd/README.md
Code:
URL: https://github.com/open-mmlab/mmrazor/blob/v0.3.0/mmrazor/models/losses/relation_kd.py
Version: v0.3.0
@ -17,15 +17,22 @@ Models:
In Collection: RKD
Metadata:
Location: neck
Student: R-18
Teacher: R-34
Teacher Checkpoint: https://download.openmmlab.com/mmclassification/v0/resnet/resnet34_8xb32_in1k_20210831-f257d4e6.pth
Student:
Config: mmcls::resnet/resnet18_8xb32_in1k.py
Weights: https://download.openmmlab.com/mmclassification/v0/resnet/resnet18_8xb32_in1k_20210831-fbbb1da6.pth
Metrics:
Top 1 Accuracy: 69.90
Top 5 Accuracy: 89.43
Teacher:
Config: mmcls::resnet/resnet34_8xb32_in1k.py
Weights: https://download.openmmlab.com/mmclassification/v0/resnet/resnet34_8xb32_in1k_20210831-f257d4e6.pth
Metrics:
Top 1 Accuracy: 73.62
Top 5 Accuracy: 91.59
Results:
- Task: Image Classification
Dataset: ImageNet-1k
Metrics:
Top 1 Accuracy: 70.23
Top 1 Accuracy:(S): 69.90
Top 1 Accuracy:(T): 73.62
Config: configs/distill/rkd/rkd_neck_resnet34_resnet18_8xb32_in1k.py
Config: configs/distill/mmcls/rkd/rkd_neck_resnet34_resnet18_8xb32_in1k.py
Weights: https://download.openmmlab.com/mmrazor/v0.3/distill/rkd/rkd_neck_resnet34_resnet18_8xb32_in1k_acc-70.23_20220401-f25700ac.pth

View File

@ -0,0 +1,43 @@
_base_ = [
'mmcls::_base_/datasets/imagenet_bs32.py',
'mmcls::_base_/schedules/imagenet_bs256.py',
'mmcls::_base_/default_runtime.py'
]
model = dict(
_scope_='mmrazor',
type='SingleTeacherDistill',
data_preprocessor=dict(
type='ImgDataPreprocessor',
# RGB format normalization parameters
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
# convert image from BGR to RGB
bgr_to_rgb=True),
architecture=dict(
cfg_path='mmcls::resnet/resnet18_8xb32_in1k.py', pretrained=False),
teacher=dict(
cfg_path='mmcls::resnet/resnet34_8xb32_in1k.py', pretrained=True),
teacher_ckpt='resnet34_8xb32_in1k_20210831-f257d4e6.pth',
distiller=dict(
type='ConfigurableDistiller',
student_recorders=dict(
feat=dict(type='ModuleOutputs', source='neck.gap')),
teacher_recorders=dict(
feat=dict(type='ModuleOutputs', source='neck.gap')),
distill_losses=dict(
loss_dw=dict(
type='DistanceWiseRKD', with_l2_norm=True, loss_weight=25),
loss_aw=dict(
type='AngleWiseRKD', with_l2_norm=True, loss_weight=50)),
loss_forward_mappings=dict(
loss_dw=dict(
preds_S=dict(from_student=True, recorder='feat'),
preds_T=dict(from_student=False, recorder='feat')),
loss_aw=dict(
preds_S=dict(from_student=True, recorder='feat'),
preds_T=dict(from_student=False, recorder='feat')))))
find_unused_parameters = True
val_cfg = dict(_delete_=True, type='mmrazor.SingleTeacherDistillValLoop')

View File

@ -27,9 +27,9 @@ effectiveness of our method.
### Classification
| Location | Dataset | Teacher | Student | Acc | Acc(T) | Acc(S) | Config | Download |
| :------: | :------: | :----------------------------------------------------------------------------------------------------------: | :----------------------------------------------------------------------------------------------------------: | :---: | :----: | :----: | :-------------------------------------------------------: | :----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| cls head | ImageNet | [resnet34](https://github.com/open-mmlab/mmclassification/blob/master/configs/resnet/resnet34_8xb32_in1k.py) | [resnet18](https://github.com/open-mmlab/mmclassification/blob/master/configs/resnet/resnet18_8xb32_in1k.py) | 71.54 | 73.62 | 69.90 | [config](./wsld_cls_head_resnet34_resnet18_8xb32_in1k.py) | [teacher](https://download.openmmlab.com/mmclassification/v0/resnet/resnet50_8xb32_in1k_20210831-ea4938fc.pth) \|[model](https://download.openmmlab.com/mmrazor/v0.1/distill/wsld/wsld_cls_head_resnet34_resnet18_8xb32_in1k/wsld_cls_head_resnet34_resnet18_8xb32_in1k_acc-71.54_20211222-91f28cf6.pth) \| [log](https://download.openmmlab.com/mmrazor/v0.1/distill/wsld/wsld_cls_head_resnet34_resnet18_8xb32_in1k/wsld_cls_head_resnet34_resnet18_8xb32_in1k_20211221_181516.log.json) |
| Location | Dataset | Teacher | Student | Acc | Acc(T) | Acc(S) | Config | Download |
| :------: | :------: | :----------------------------------------------------------------------------------------------------------: | :----------------------------------------------------------------------------------------------------------: | :---: | :----: | :----: | :-------------------------------------------------------: | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| cls head | ImageNet | [resnet34](https://github.com/open-mmlab/mmclassification/blob/master/configs/resnet/resnet34_8xb32_in1k.py) | [resnet18](https://github.com/open-mmlab/mmclassification/blob/master/configs/resnet/resnet18_8xb32_in1k.py) | 71.54 | 73.62 | 69.90 | [config](./wsld_cls_head_resnet34_resnet18_8xb32_in1k.py) | [teacher](https://download.openmmlab.com/mmclassification/v0/resnet/resnet50_8xb32_in1k_20210831-ea4938fc.pth) \|[model](https://openmmlab-share.oss-cn-hangzhou.aliyuncs.com/mmrazor/v0.1/distill/wsld/wsld_cls_head_resnet34_resnet18_8xb32_in1k/wsld_cls_head_resnet34_resnet18_8xb32_in1k_acc-71.54_20211222-91f28cf6.pth?versionId=CAEQHxiBgMC6memK7xciIGMzMDFlYTA4YzhlYTRiMTNiZWU0YTVhY2I5NjVkMjY2) \| [log](https://openmmlab-share.oss-cn-hangzhou.aliyuncs.com/mmrazor/v0.1/distill/wsld/wsld_cls_head_resnet34_resnet18_8xb32_in1k/wsld_cls_head_resnet34_resnet18_8xb32_in1k_20211221_181516.log.json?versionId=CAEQHxiBgIDLmemK7xciIGNkM2FiN2Y4N2E5YjRhNDE4NDVlNmExNDczZDIxN2E5) |
## Citation

View File

@ -6,26 +6,33 @@ Collections:
Paper:
URL: https://arxiv.org/abs/2102.00650
Title: Rethinking Soft Labels for Knowledge Distillation:A Bias-Variance Tradeoff Perspective
README: configs/distill/wsld/README.md
README: configs/distill/mmcls/wsld/README.md
Code:
URL: https://github.com/open-mmlab/mmrazor/blob/v0.1.0/mmrazor/models/losses/weighted_soft_label_distillation.py
Version: v0.1.0
Converted From:
Code: https://github.com/bellymonster/Weighted-Soft-Label-Distillation
Models:
- Name: wsld_cls_head_resnet34_resnet18_8xb32_in1k
- Name: wsld_logits_resnet34_resnet18_8xb32_in1k
In Collection: WSLD
Metadata:
Location: cls head
Student: R-18
Teacher: R-34
Teacher Checkpoint: https://download.openmmlab.com/mmclassification/v0/resnet/resnet50_8xb32_in1k_20210831-ea4938fc.pth
Location: logits
Student:
Config: mmcls::resnet/resnet18_8xb32_in1k.py
Weights: https://download.openmmlab.com/mmclassification/v0/resnet/resnet18_8xb32_in1k_20210831-fbbb1da6.pth
Metrics:
Top 1 Accuracy: 69.90
Top 5 Accuracy: 89.43
Teacher:
Config: mmcls::resnet/resnet34_8xb32_in1k.py
Weights: https://download.openmmlab.com/mmclassification/v0/resnet/resnet34_8xb32_in1k_20210831-f257d4e6.pth
Metrics:
Top 1 Accuracy: 73.62
Top 5 Accuracy: 91.59
Results:
- Task: Image Classification
Dataset: ImageNet-1k
Metrics:
Top 1 Accuracy: 71.54
Top 1 Accuracy:(S): 69.90
Top 1 Accuracy:(T): 73.62
Config: configs/distill/wsld/wsld_cls_head_resnet34_resnet18_8xb32_in1k.py
Config: configs/distill/mmcls/wsld/wsld_logits_resnet34_resnet18_8xb32_in1k.py
Weights: https://download.openmmlab.com/mmrazor/v0.1/distill/wsld/wsld_cls_head_resnet34_resnet18_8xb32_in1k/wsld_cls_head_resnet34_resnet18_8xb32_in1k_acc-71.54_20211222-91f28cf6.pth

View File

@ -19,17 +19,20 @@ model = dict(
teacher=dict(
cfg_path='mmcls::resnet/resnet34_8xb32_in1k.py', pretrained=True),
teacher_ckpt='resnet34_8xb32_in1k_20210831-f257d4e6.pth',
student_recorders=dict(
fc=dict(type='ModuleOutputs', source='head.fc'),
data_samples=dict(type='ModuleInputs', source='')),
teacher_recorders=dict(fc=dict(type='ModuleOutputs', source='head.fc')),
distill_losses=dict(loss_wsld=dict(type='WSLD', tau=2, loss_weight=2.5)),
loss_forward_mappings=dict(
loss_wsld=dict(
student=dict(recorder='fc', from_student=True),
teacher=dict(recorder='fc', from_student=False),
data_samples=dict(
recorder='data_samples', from_student=True, data_idx=1))))
distiller=dict(
student_recorders=dict(
fc=dict(type='ModuleOutputs', source='head.fc'),
data_samples=dict(type='ModuleInputs', source='')),
teacher_recorders=dict(
fc=dict(type='ModuleOutputs', source='head.fc')),
distill_losses=dict(
loss_wsld=dict(type='WSLD', tau=2, loss_weight=2.5)),
loss_forward_mappings=dict(
loss_wsld=dict(
student=dict(recorder='fc', from_student=True),
teacher=dict(recorder='fc', from_student=False),
data_samples=dict(
recorder='data_samples', from_student=True, data_idx=1)))))
find_unused_parameters = True

View File

@ -1,4 +1,4 @@
_base_ = ['./cwd_fpn_gfl_r101_gfl_r50_1x_coco.py']
_base_ = ['./cwd_fpn_retina_r101_retina_r50_1x_coco.py']
model = dict(
architecture=dict(

View File

@ -16,42 +16,38 @@ model = dict(
cfg_path='mmdet::faster_rcnn/faster_rcnn_r101_fpn_2x_coco.py',
pretrained=False),
teacher_ckpt=teacher_ckpt,
distill_losses=dict(
loss_cwd_fpn0=dict(
type='ChannelWiseDivergence', tau=1, loss_weight=10),
loss_cwd_fpn1=dict(
type='ChannelWiseDivergence', tau=1, loss_weight=10),
loss_cwd_fpn2=dict(
type='ChannelWiseDivergence', tau=1, loss_weight=10),
loss_cwd_fpn3=dict(
type='ChannelWiseDivergence', tau=1, loss_weight=10),
loss_cwd_fpn4=dict(
type='ChannelWiseDivergence', tau=1, loss_weight=10)),
student_recorders=dict(fpn=dict(type='ModuleOutputs', source='neck')),
teacher_recorders=dict(fpn=dict(type='ModuleOutputs', source='neck')),
loss_forward_mappings=dict(
loss_cwd_fpn0=dict(
preds_S=dict(from_student=True, recorder='fpn', data_idx=0),
preds_T=dict(from_student=False, recorder='fpn', data_idx=0),
),
loss_cwd_fpn1=dict(
preds_S=dict(from_student=True, recorder='fpn', data_idx=1),
preds_T=dict(from_student=False, recorder='fpn', data_idx=1),
),
loss_cwd_fpn2=dict(
preds_S=dict(from_student=True, recorder='fpn', data_idx=2),
preds_T=dict(from_student=False, recorder='fpn', data_idx=2),
),
loss_cwd_fpn3=dict(
preds_S=dict(from_student=True, recorder='fpn', data_idx=3),
preds_T=dict(from_student=False, recorder='fpn', data_idx=3),
),
loss_cwd_fpn4=dict(
preds_S=dict(from_student=True, recorder='fpn', data_idx=4),
preds_T=dict(from_student=False, recorder='fpn', data_idx=4),
),
),
)
distiller=dict(
type='ConfigurableDistiller',
student_recorders=dict(fpn=dict(type='ModuleOutputs', source='neck')),
teacher_recorders=dict(fpn=dict(type='ModuleOutputs', source='neck')),
distill_losses=dict(
loss_cwd_fpn0=dict(
type='ChannelWiseDivergence', tau=1, loss_weight=10),
loss_cwd_fpn1=dict(
type='ChannelWiseDivergence', tau=1, loss_weight=10),
loss_cwd_fpn2=dict(
type='ChannelWiseDivergence', tau=1, loss_weight=10),
loss_cwd_fpn3=dict(
type='ChannelWiseDivergence', tau=1, loss_weight=10),
loss_cwd_fpn4=dict(
type='ChannelWiseDivergence', tau=1, loss_weight=10)),
loss_forward_mappings=dict(
loss_cwd_fpn0=dict(
preds_S=dict(from_student=True, recorder='fpn', data_idx=0),
preds_T=dict(from_student=False, recorder='fpn', data_idx=0)),
loss_cwd_fpn1=dict(
preds_S=dict(from_student=True, recorder='fpn', data_idx=1),
preds_T=dict(from_student=False, recorder='fpn', data_idx=1)),
loss_cwd_fpn2=dict(
preds_S=dict(from_student=True, recorder='fpn', data_idx=2),
preds_T=dict(from_student=False, recorder='fpn', data_idx=2)),
loss_cwd_fpn3=dict(
preds_S=dict(from_student=True, recorder='fpn', data_idx=3),
preds_T=dict(from_student=False, recorder='fpn', data_idx=3)),
loss_cwd_fpn4=dict(
preds_S=dict(from_student=True, recorder='fpn', data_idx=4),
preds_T=dict(from_student=False, recorder='fpn',
data_idx=4)))))
find_unused_parameters = True

View File

@ -7,3 +7,7 @@ model = dict(
teacher=dict(
cfg_path='mmdet::retinanet/retinanet_r101_fpn_2x_coco.py',
pretrained=True))
# optimizer
optim_wrapper = dict(
optimizer=dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001))

View File

@ -0,0 +1,23 @@
Models:
- Name: cwd_cls_head_gfl_r101_fpn_gfl_r50_fpn_1x_coco
In Collection: CWD
Metadata:
Location: cls head
Student:
Metrics:
box AP: 40.2
Config: mmdet::gfl/gfl_r50_fpn_1x_coco.py
Weights: https://download.openmmlab.com/mmdetection/v2.0/gfl/gfl_r50_fpn_1x_coco/gfl_r50_fpn_1x_coco_20200629_121244-25944287.pth
Teacher:
Metrics:
box AP: 44.7
Config: mmdet::gfl/gfl_r50_fpn_mstrain_2x_coco.py
Weights: https://download.openmmlab.com/mmdetection/v2.0/gfl/gfl_r101_fpn_mstrain_2x_coco/gfl_r101_fpn_mstrain_2x_coco_20200629_200126-dd12f847.pth
Results:
- Task: Object Detection
Dataset: COCO
Metrics:
box AP: 41.9
Config: configs/distill/mmdet/cwd/cwd_cls_head_gfl_r101_fpn_gfl_r50_fpn_1x_coco.py
Weights: https://download.openmmlab.com/mmrazor/v0.1/distill/cwd/cwd_cls_head_gfl_r101_fpn_gfl_r50_fpn_1x_coco/cwd_cls_head_gfl_r101_fpn_gfl_r50_fpn_1x_coco_20211222-655dff39.pth

View File

@ -0,0 +1,37 @@
# CWD
> [Channel-wise Knowledge Distillation for Dense Prediction](https://arxiv.org/abs/2011.13256)
<!-- [ALGORITHM] -->
## Abstract
Knowledge distillation (KD) has been proven to be a simple and effective tool for training compact models. Almost all KD variants for dense prediction tasks align the student and teacher networks' feature maps in the spatial domain, typically by minimizing point-wise and/or pair-wise discrepancy. Observing that in semantic segmentation, some layers' feature activations of each channel tend to encode saliency of scene categories (analogue to class activation mapping), we propose to align features channel-wise between the student and teacher networks. To this end, we first transform the feature map of each channel into a probability map using softmax normalization, and then minimize the Kullback-Leibler (KL) divergence of the corresponding channels of the two networks. By doing so, our method focuses on mimicking the soft distributions of channels between networks. In particular, the KL divergence enables learning to pay more attention to the most salient regions of the channel-wise maps, presumably corresponding to the most useful signals for semantic segmentation. Experiments demonstrate that our channel-wise distillation outperforms almost all existing spatial distillation methods for semantic segmentation considerably, and requires less computational cost during training. We consistently achieve superior performance on three benchmarks with various network structures.
![pipeline](/docs/en/imgs/model_zoo/cwd/pipeline.png)
## Results and models
### Segmentation
| Location | Dataset | Teacher | Student | mIoU | mIoU(T) | mIou(S) | Config | Download |
| :------: | :--------: | :------------------------------------------------------------------------------------------------------------------------------: | :----------------------------------------------------------------------------------------------------------------------------: | :---: | :-----: | :-----: | :----------: | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
| logits | cityscapes | [pspnet_r101](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/pspnet/pspnet_r101-d8_512x1024_80k_cityscapes.py) | [pspnet_r18](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/pspnet/pspnet_r18-d8_512x1024_80k_cityscapes.py) | 75.54 | 79.76 | 74.87 | [config](<>) | [teacher](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x1024_80k_cityscapes/pspnet_r101-d8_512x1024_80k_cityscapes_20200606_112211-e1e1100f.pth) \|[model](https://download.openmmlab.com/mmrazor/v0.1/distill/cwd/cwd_cls_head_pspnet_r101_d8_pspnet_r18_d8_512x1024_cityscapes_80k/cwd_cls_head_pspnet_r101_d8_pspnet_r18_d8_512x1024_cityscapes_80k_mIoU-75.54_20211222-3a26ee1c.pth) \| [log](https://download.openmmlab.com/mmrazor/v0.1/distill/cwd/cwd_cls_head_pspnet_r101_d8_pspnet_r18_d8_512x1024_cityscapes_80k/cwd_cls_head_pspnet_r101_d8_pspnet_r18_d8_512x1024_cityscapes_80k_20211212_205711.log.json?) |
### Detection
| Location | Dataset | Teacher | Student | mAP | mAP(T) | mAP(S) | Config | Download |
| :------: | :-----: | :--------------------------------------------------------------------------------------------------------------: | :----------------------------------------------------------------------------------------------------: | :--: | :----: | :----: | :----------: | :-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| cls head | COCO | [gfl_r101_2x](https://github.com/open-mmlab/mmdetection/tree/master/configs/gfl/gfl_r101_fpn_mstrain_2x_coco.py) | [gfl_r50_1x](https://github.com/open-mmlab/mmdetection/tree/master/configs/gfl/gfl_r50_fpn_1x_coco.py) | 41.9 | 44.7 | 40.2 | [config](<>) | [teacher](https://download.openmmlab.com/mmdetection/v2.0/gfl/gfl_r101_fpn_mstrain_2x_coco/gfl_r101_fpn_mstrain_2x_coco_20200629_200126-dd12f847.pth) \|[model](https://download.openmmlab.com/mmrazor/v0.1/distill/cwd/cwd_cls_head_gfl_r101_fpn_gfl_r50_fpn_1x_coco/cwd_cls_head_gfl_r101_fpn_gfl_r50_fpn_1x_coco_20211222-655dff39.pth) \| [log](https://download.openmmlab.com/mmrazor/v0.1/distill/cwd/cwd_cls_head_gfl_r101_fpn_gfl_r50_fpn_1x_coco/cwd_cls_head_gfl_r101_fpn_gfl_r50_fpn_1x_coco_20211212_205444.log.json) |
## Citation
```latex
@inproceedings{shu2021channel,
title={Channel-Wise Knowledge Distillation for Dense Prediction},
author={Shu, Changyong and Liu, Yifan and Gao, Jianfei and Yan, Zheng and Shen, Chunhua},
booktitle={Proceedings of the IEEE/CVF International Conference on Computer Vision},
pages={5311--5320},
year={2021}
}
```

View File

@ -0,0 +1,33 @@
_base_ = [
'mmseg::_base_/datasets/cityscapes.py',
'mmseg::_base_/schedules/schedule_80k.py',
'mmseg::_base_/default_runtime.py'
]
teacher_ckpt = 'https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x1024_80k_cityscapes/pspnet_r101-d8_512x1024_80k_cityscapes_20200606_112211-e1e1100f.pth' # noqa: E501
model = dict(
_scope_='mmrazor',
type='SingleTeacherDistill',
architecture=dict(
cfg_path='mmseg::pspnet/pspnet_r18-d8_512x1024_80k_cityscapes.py',
pretrained=False),
teacher=dict(
cfg_path='mmseg::pspnet/pspnet_r101-d8_512x1024_80k_cityscapes.py',
pretrained=False),
teacher_ckpt=teacher_ckpt,
distiller=dict(
type='ConfigurableDistiller',
distill_losses=dict(
loss_cwd=dict(type='ChannelWiseDivergence', tau=1, loss_weight=5)),
student_recorders=dict(
logits=dict(type='ModuleOutputs', source='decode_head.conv_seg')),
teacher_recorders=dict(
logits=dict(type='ModuleOutputs', source='decode_head.conv_seg')),
loss_forward_mappings=dict(
loss_cwd=dict(
preds_S=dict(from_student=True, recorder='logits'),
preds_T=dict(from_student=False, recorder='logits')))))
find_unused_parameters = True
val_cfg = dict(_delete_=True, type='mmrazor.SingleTeacherDistillValLoop')

View File

@ -0,0 +1,41 @@
Collections:
- Name: CWD
Metadata:
Training Data:
- Cityscapes
- COCO
Paper:
URL: https://arxiv.org/abs/2011.13256
Title: Channel-wise Knowledge Distillation for Dense Prediction
README: configs/distill/mmseg/cwd/README.md
Code:
URL: https://github.com/open-mmlab/mmrazor/blob/v0.1.0/mmrazor/models/losses/cwd.py#L10
Version: v0.1.0
Converted From:
Code:
- https://github.com/pppppM/mmsegmentation-distiller
- https://github.com/pppppM/mmdetection-distiller
Models:
- Name: cwd_logits_pspnet_r101_d8_pspnet_r18_d8_512x1024_cityscapes_80k
In Collection: CWD
Metadata:
Location: logits
Student:
Metrics:
mIoU: 74.87
mIoU(ms+flip): 76.04
Config: mmseg::pspnet/pspnet_r18-d8_512x1024_80k_cityscapes.py
Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r18-d8_512x1024_80k_cityscapes/pspnet_r18-d8_512x1024_80k_cityscapes_20201225_021458-09ffa746.pth
Teacher:
Metrics:
mIoU: 79.76
mIoU(ms+flip): 81.01
Config: mmseg::pspnet/pspnet_r101-d8_512x1024_80k_cityscapes.py
Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x1024_80k_cityscapes/pspnet_r101-d8_512x1024_80k_cityscapes_20200606_112211-e1e1100f.pth
Results:
- Task: Semantic Segmentation
Dataset: Cityscapes
Metrics:
mIoU: 75.54
Config: configs/distill/mmseg/cwd/cwd_logits_pspnet_r101_d8_pspnet_r18_d8_512x1024_cityscapes_80k.py
Weights: https://download.openmmlab.com/mmrazor/v0.1/distill/cwd/cwd_cls_head_pspnet_r101_d8_pspnet_r18_d8_512x1024_cityscapes_80k/cwd_cls_head_pspnet_r101_d8_pspnet_r18_d8_512x1024_cityscapes_80k_mIoU-75.54_20211222-3a26ee1c.pth

View File

@ -1,79 +0,0 @@
_base_ = [
'../../_base_/datasets/mmcls/imagenet_bs32.py',
'../../_base_/schedules/mmcls/imagenet_bs256.py',
'../../_base_/mmcls_runtime.py'
]
# model settings
student = dict(
type='mmcls.ImageClassifier',
backbone=dict(
type='ResNet',
depth=18,
num_stages=4,
out_indices=(3, ),
style='pytorch'),
neck=dict(type='GlobalAveragePooling'),
head=dict(
type='LinearClsHead',
num_classes=1000,
in_channels=512,
loss=dict(type='CrossEntropyLoss', loss_weight=1.0),
topk=(1, 5),
))
# teacher settings
teacher_ckpt = 'https://download.openmmlab.com/mmclassification/v0/resnet/resnet34_8xb32_in1k_20210831-f257d4e6.pth' # noqa: E501
teacher = dict(
type='mmcls.ImageClassifier',
init_cfg=dict(type='Pretrained', checkpoint=teacher_ckpt),
backbone=dict(
type='ResNet',
depth=34,
num_stages=4,
out_indices=(3, ),
style='pytorch'),
neck=dict(type='GlobalAveragePooling'),
head=dict(
type='LinearClsHead',
num_classes=1000,
in_channels=512,
loss=dict(type='CrossEntropyLoss', loss_weight=1.0),
topk=(1, 5),
))
# algorithm setting
algorithm = dict(
type='GeneralDistill',
architecture=dict(
type='MMClsArchitecture',
model=student,
),
with_student_loss=True,
with_teacher_loss=False,
distiller=dict(
type='SingleTeacherDistiller',
teacher=teacher,
teacher_trainable=False,
teacher_norm_eval=True,
components=[
dict(
student_module='neck.gap',
teacher_module='neck.gap',
losses=[
dict(
type='DistanceWiseRKD',
name='distance_wise_loss',
loss_weight=25.0,
with_l2_norm=True),
dict(
type='AngleWiseRKD',
name='angle_wise_loss',
loss_weight=50.0,
with_l2_norm=True),
])
]),
)
find_unused_parameters = True

View File

@ -1,78 +0,0 @@
_base_ = [
'../../_base_/datasets/mmcls/imagenet_bs32.py',
'../../_base_/schedules/mmcls/imagenet_bs256.py',
'../../_base_/mmcls_runtime.py'
]
# model settings
student = dict(
type='mmcls.ImageClassifier',
backbone=dict(
type='ResNet',
depth=18,
num_stages=4,
out_indices=(3, ),
style='pytorch'),
neck=dict(type='GlobalAveragePooling'),
head=dict(
type='LinearClsHead',
num_classes=1000,
in_channels=512,
loss=dict(type='CrossEntropyLoss', loss_weight=1.0),
topk=(1, 5),
))
checkpoint = 'https://download.openmmlab.com/mmclassification/v0/resnet/resnet50_8xb32_in1k_20210831-ea4938fc.pth' # noqa: E501
# teacher settings
teacher = dict(
type='mmcls.ImageClassifier',
init_cfg=dict(type='Pretrained', checkpoint=checkpoint),
backbone=dict(
type='ResNet',
depth=34,
num_stages=4,
out_indices=(3, ),
style='pytorch'),
neck=dict(type='GlobalAveragePooling'),
head=dict(
type='LinearClsHead',
num_classes=1000,
in_channels=512,
loss=dict(type='CrossEntropyLoss', loss_weight=1.0),
topk=(1, 5),
))
# algorithm setting
algorithm = dict(
type='GeneralDistill',
architecture=dict(
type='MMClsArchitecture',
model=student,
),
with_student_loss=True,
# teacher_trainable and with_teacher_loss have a dependency
# relationship, if teacher_trainable is false, then
# with_teacher_loss must be false.
with_teacher_loss=False,
distiller=dict(
type='SingleTeacherDistiller',
teacher=teacher,
teacher_trainable=False,
teacher_norm_eval=True,
components=[
dict(
student_module='head.fc',
teacher_module='head.fc',
losses=[
dict(
type='WSLD',
name='loss_wsld',
tau=2,
loss_weight=2.5,
num_classes=1000)
])
]),
)
find_unused_parameters = True

View File

@ -1,11 +0,0 @@
modules:
backbone.layer1.0: depthsepconv
backbone.layer2.0: mb_k3e4_se
backbone.layer3.0: mb_k5e6_se
backbone.layer3.1: mb_k5e6_se
backbone.layer4.0: mb_k5e6_se
backbone.layer4.1: mb_k5e6_se
backbone.layer5.0: mb_k3e6_se
backbone.layer6.0: mb_k5e6_se
backbone.layer7.0: convbnact
channels:

View File

@ -1,8 +0,0 @@
_base_ = ['./cream_14_supernet_mobilenet.py']
# FIXME: you may replace this with the mutable_cfg searched by yourself
fix_subnet = 'configs/nas/cream/CREAM_14_MOBILENET_IN1k_2.0.yaml' # noqa: E501
model = dict(fix_subnet=fix_subnet)
find_unused_parameters = False

View File

@ -1,241 +0,0 @@
# dataset settings
dataset_type = 'ImageNet'
preprocess_cfg = dict(
# RGB format normalization parameters
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
# convert image from BGR to RGB
to_rgb=True,
)
# file_client_args = dict(
# backend='petrel',
# path_mapping=dict({
# './data/imagenet': 's3://openmmlab/datasets/classification/imagenet',
# 'data/imagenet': 's3://openmmlab/datasets/classification/imagenet'
# }))
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='RandomResizedCrop', scale=224),
dict(type='ColorJitter', brightness=0.4, contrast=0.4, saturation=0.4),
dict(type='RandomFlip', prob=0.5, direction='horizontal'),
dict(type='PackClsInputs'),
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(
type='ResizeEdge',
scale=73,
edge='short',
backend='pillow',
interpolation='bicubic'),
dict(type='CenterCrop', crop_size=64),
dict(type='PackClsInputs'),
]
train_dataloader = dict(
batch_size=128,
num_workers=5,
dataset=dict(
type=dataset_type,
data_root='/mnt/cache/share/images',
ann_file='meta/train.txt',
data_prefix='train',
pipeline=train_pipeline),
sampler=dict(type='DefaultSampler', shuffle=True),
persistent_workers=True,
)
# /mnt/lustre/share_data/wangjiaqi/data/imagenet',
val_dataloader = dict(
batch_size=128,
num_workers=5,
dataset=dict(
type=dataset_type,
data_root='/mnt/cache/share/images',
ann_file='meta/val.txt',
data_prefix='val',
pipeline=test_pipeline),
sampler=dict(type='DefaultSampler', shuffle=False),
persistent_workers=True,
)
val_evaluator = dict(type='Accuracy', topk=(1, 5))
# If you want standard test, please manually configure the test dataset
test_dataloader = val_dataloader
test_evaluator = val_evaluator
# scheduler
# optimizer
optim_wrapper = dict(
optimizer=dict(type='SGD', lr=0.5, momentum=0.9, weight_decay=4e-5),
clip_grad=None)
# leanring policy
param_scheduler = [
dict(type='PolyLR', power=1.0, eta_min=0.0, by_epoch=False),
]
# train, val, test setting
train_cfg = dict(by_epoch=False, max_iters=300000)
val_cfg = dict()
test_cfg = dict()
# runtime
# defaults to use registries in mmrazor
default_scope = 'mmcls'
# configure default hooks
default_hooks = dict(
timer=dict(type='IterTimerHook'),
logger=dict(type='LoggerHook', interval=100),
param_scheduler=dict(type='ParamSchedulerHook'),
checkpoint=dict(type='CheckpointHook', by_epoch=False, interval=1000),
sampler_seed=dict(type='DistSamplerSeedHook'),
visualization=dict(type='VisualizationHook', enable=False),
)
# configure environment
env_cfg = dict(
cudnn_benchmark=False,
mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0),
dist_cfg=dict(backend='nccl'),
)
# set visualizer
vis_backends = [dict(type='LocalVisBackend')]
visualizer = dict(
type='ClsVisualizer', vis_backends=vis_backends, name='visualizer')
# set log level
log_level = 'INFO'
# load from which checkpoint
load_from = None
# whether to resume training from the loaded checkpoint
resume = False
se_cfg = dict(
ratio=4,
divisor=8,
act_cfg=(dict(type='ReLU'),
dict(
type='HSigmoid', bias=3, divisor=6, min_value=0,
max_value=1)))
_FIRST_STAGE_MUTABLE = dict( # DepthwiseSep
type='OneShotMutableOP',
candidates=dict(
depthsepconv=dict(
type='DepthwiseSeparableConv',
dw_kernel_size=3,
se_cfg=se_cfg,
norm_cfg=dict(type='BN'),
act_cfg=dict(type='Swish'))))
_MIDDLE_STAGE_MUTABLE = dict(
type='OneShotMutableOP',
candidates=dict(
mb_k3e4_se=dict(
type='MBBlock',
kernel_size=3,
expand_ratio=4,
se_cfg=se_cfg,
norm_cfg=dict(type='BN'),
act_cfg=dict(type='Swish')),
mb_k3e6_se=dict(
type='MBBlock',
kernel_size=3,
expand_ratio=6,
se_cfg=se_cfg,
norm_cfg=dict(type='BN'),
act_cfg=dict(type='Swish')),
mb_k5e4_se=dict(
type='MBBlock',
kernel_size=5,
expand_ratio=4,
se_cfg=se_cfg,
norm_cfg=dict(type='BN'),
act_cfg=dict(type='Swish')),
mb_k5e6_se=dict(
type='MBBlock',
kernel_size=5,
expand_ratio=6,
se_cfg=se_cfg,
norm_cfg=dict(type='BN'),
act_cfg=dict(type='Swish')),
mb_k7e4_se=dict(
type='MBBlock',
kernel_size=7,
expand_ratio=4,
se_cfg=se_cfg,
norm_cfg=dict(type='BN'),
act_cfg=dict(type='Swish')),
mb_k7e6_se=dict(
type='MBBlock',
kernel_size=7,
expand_ratio=6,
se_cfg=se_cfg,
norm_cfg=dict(type='BN'),
act_cfg=dict(type='Swish'))))
arch_setting = [
# Parameters to build layers. 4 parameters are needed to construct a
# layer, from left to right: channel, num_blocks, stride, mutable cfg.
[16, 1, 1, _FIRST_STAGE_MUTABLE],
[24, 1, 2, _MIDDLE_STAGE_MUTABLE],
[40, 2, 2, _MIDDLE_STAGE_MUTABLE],
[80, 2, 2, _MIDDLE_STAGE_MUTABLE],
[96, 1, 1, _MIDDLE_STAGE_MUTABLE],
[192, 1, 2, _MIDDLE_STAGE_MUTABLE],
]
norm_cfg = dict(type='BN')
supernet = dict(
_scope_='mmcls',
type='ImageClassifier',
data_preprocessor=preprocess_cfg,
backbone=dict(
_scope_='mmrazor',
type='SearchableMobileNet',
arch_setting=arch_setting,
first_channels=16,
last_channels=320,
widen_factor=1.0,
norm_cfg=norm_cfg,
act_cfg=dict(type='Swish'),
out_indices=(6, ),
),
neck=dict(type='GlobalAveragePooling'),
head=dict(
type='mmrazor.CreamClsHead',
num_classes=1000,
in_channels=320,
num_features=1280,
act_cfg=dict(type='Swish'),
loss=dict(
type='LabelSmoothLoss',
num_classes=1000,
label_smooth_val=0.1,
mode='original',
loss_weight=1.0),
topk=(1, 5),
),
)
mutator = dict(type='mmrazor.OneShotModuleMutator')
model = dict(
type='mmrazor.SPOS',
architecture=supernet,
mutator=mutator,
)
find_unused_parameters = True

View File

@ -1,22 +0,0 @@
modules:
backbone.layers.0.0: shuffle_5x5
backbone.layers.0.1: shuffle_3x3
backbone.layers.0.2: shuffle_3x3
backbone.layers.0.3: shuffle_3x3
backbone.layers.1.0: shuffle_xception
backbone.layers.1.1: shuffle_3x3
backbone.layers.1.2: shuffle_xception
backbone.layers.1.3: shuffle_7x7
backbone.layers.2.0: shuffle_7x7
backbone.layers.2.1: shuffle_7x7
backbone.layers.2.2: shuffle_xception
backbone.layers.2.3: shuffle_xception
backbone.layers.2.4: shuffle_3x3
backbone.layers.2.5: shuffle_7x7
backbone.layers.2.6: shuffle_5x5
backbone.layers.2.7: shuffle_xception
backbone.layers.3.0: shuffle_7x7
backbone.layers.3.1: shuffle_7x7
backbone.layers.3.2: shuffle_7x7
backbone.layers.3.3: shuffle_5x5
channels:

View File

@ -1,22 +0,0 @@
modules:
backbone.layers.0.0: shuffle_5x5
backbone.layers.0.1: shuffle_3x3
backbone.layers.0.2: shuffle_3x3
backbone.layers.0.3: shuffle_3x3
backbone.layers.1.0: shuffle_xception
backbone.layers.1.1: shuffle_3x3
backbone.layers.1.2: shuffle_xception
backbone.layers.1.3: shuffle_7x7
backbone.layers.2.0: shuffle_7x7
backbone.layers.2.1: shuffle_7x7
backbone.layers.2.2: shuffle_xception
backbone.layers.2.3: shuffle_xception
backbone.layers.2.4: shuffle_3x3
backbone.layers.2.5: shuffle_7x7
backbone.layers.2.6: shuffle_5x5
backbone.layers.2.7: shuffle_xception
backbone.layers.3.0: shuffle_7x7
backbone.layers.3.1: shuffle_7x7
backbone.layers.3.2: shuffle_7x7
backbone.layers.3.3: shuffle_5x5
channels:

View File

@ -1,8 +0,0 @@
_base_ = ['./detnas_supernet_shufflenetv2_coco_1x_2.0_frcnn.py']
# FIXME: you may replace this with the mutable_cfg searched by yourself
fix_subnet = 'configs/nas/detnas/DETNAS_FRCNN_SHUFFLENETV2_340M_COCO_MMRAZOR_2.0.yaml' # noqa: E501
model = dict(fix_subnet=fix_subnet)
find_unused_parameters = False

View File

@ -1,87 +0,0 @@
_base_ = [
'mmdet::_base_/models/faster_rcnn_r50_fpn.py',
'mmdet::_base_/datasets/coco_detection.py',
'mmdet::_base_/schedules/schedule_1x.py',
'mmdet::_base_/default_runtime.py'
]
data_root = '/mnt/lustre/share_data/zhangwenwei/data/coco/'
_base_.train_dataloader.dataset.data_root = data_root
visualizer = None
log_level = 'INFO'
load_from = '/mnt/lustre/dongpeijie/detnas_subnet_shufflenetv2_8xb128_in1k_acc-74.08_20211223-92e9b66a_2.0.pth' # noqa: E501
resume = False
norm_cfg = dict(type='SyncBN', requires_grad=True)
# model settings
_STAGE_MUTABLE = dict(
_scope_='mmrazor',
type='mmrazor.OneShotMutableOP',
candidates=dict(
shuffle_3x3=dict(
type='mmrazor.ShuffleBlock', kernel_size=3, norm_cfg=norm_cfg),
shuffle_5x5=dict(
type='mmrazor.ShuffleBlock', kernel_size=5, norm_cfg=norm_cfg),
shuffle_7x7=dict(
type='mmrazor.ShuffleBlock', kernel_size=7, norm_cfg=norm_cfg),
shuffle_xception=dict(
type='mmrazor.ShuffleXception', norm_cfg=norm_cfg),
))
arch_setting = [
# Parameters to build layers. 3 parameters are needed to construct a
# layer, from left to right: channel, num_blocks, mutable_cfg.
[64, 4, _STAGE_MUTABLE],
[160, 4, _STAGE_MUTABLE],
[320, 8, _STAGE_MUTABLE],
[640, 4, _STAGE_MUTABLE],
]
supernet = _base_.model
supernet.backbone = dict(
type='mmrazor.SearchableShuffleNetV2',
arch_setting=arch_setting,
norm_cfg=norm_cfg,
out_indices=(0, 1, 2, 3),
widen_factor=1.0,
with_last_layer=False)
supernet.neck = dict(
type='FPN',
norm_cfg=norm_cfg,
in_channels=[64, 160, 320, 640],
out_channels=256,
num_outs=5)
supernet.roi_head.bbox_head = dict(
type='Shared4Conv1FCBBoxHead',
norm_cfg=norm_cfg,
in_channels=256,
fc_out_channels=1024,
roi_feat_size=7,
num_classes=80,
bbox_coder=dict(
type='DeltaXYWHBBoxCoder',
target_means=[0., 0., 0., 0.],
target_stds=[0.1, 0.1, 0.2, 0.2]),
reg_class_agnostic=False,
loss_cls=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
loss_bbox=dict(type='L1Loss', loss_weight=1.0))
mutator = dict(type='mmrazor.OneShotModuleMutator')
fix_subnet = 'configs/nas/detnas/DETNAS_FRCNN_SHUFFLENETV2_340M_COCO_MMRAZOR_2.0.yaml' # noqa: E501
model = dict(
_delete_=True,
type='mmrazor.SPOS',
architecture=supernet,
mutator=mutator,
fix_subnet=fix_subnet,
)
find_unused_parameters = True

View File

@ -1,114 +0,0 @@
_base_ = [
'mmdet::faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py',
'mmdet::datasets/coco_detection.py', 'mmdet::schedules/schedule_1x.py',
'mmdet::default_runtime.py'
]
data_root = '/mnt/lustre/share_data/zhangwenwei/data/coco/'
train_dataloader = dict(dataset=dict(data_root=data_root, ))
visualizer = None
# custom_hooks = [dict(type='DetVisualizationHook', interval=10)]
log_level = 'INFO'
load_from = None
resume = False
# TODO: support auto scaling lr
norm_cfg = dict(type='SyncBN', requires_grad=True)
# model settings
_STAGE_MUTABLE = dict(
_scope_='mmrazor',
type='mmrazor.OneShotMutableOP',
candidates=dict(
shuffle_3x3=dict(
type='mmrazor.ShuffleBlock', kernel_size=3, norm_cfg=norm_cfg),
shuffle_5x5=dict(
type='mmrazor.ShuffleBlock', kernel_size=5, norm_cfg=norm_cfg),
shuffle_7x7=dict(
type='mmrazor.ShuffleBlock', kernel_size=7, norm_cfg=norm_cfg),
shuffle_xception=dict(
type='mmrazor.ShuffleXception', norm_cfg=norm_cfg),
))
arch_setting = [
# Parameters to build layers. 3 parameters are needed to construct a
# layer, from left to right: channel, num_blocks, mutable_cfg.
[64, 4, _STAGE_MUTABLE],
[160, 4, _STAGE_MUTABLE],
[320, 8, _STAGE_MUTABLE],
[640, 4, _STAGE_MUTABLE],
]
supernet = dict(
type='RetinaNet',
data_preprocessor=dict(
type='DetDataPreprocessor',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
bgr_to_rgb=True,
pad_size_divisor=32),
backbone=dict(
type='mmrazor.SearchableShuffleNetV2',
arch_setting=arch_setting,
norm_cfg=norm_cfg,
out_indices=(0, 1, 2, 3),
widen_factor=1.0,
with_last_layer=False),
neck=dict(
type='FPN',
in_channels=[64, 160, 320, 640],
out_channels=256,
num_outs=5),
bbox_head=dict(
type='RetinaHead',
num_classes=80,
in_channels=256,
stacked_convs=4,
feat_channels=256,
anchor_generator=dict(
type='AnchorGenerator',
octave_base_scale=4,
scales_per_octave=3,
ratios=[0.5, 1.0, 2.0],
strides=[8, 16, 32, 64, 128]),
bbox_coder=dict(
type='DeltaXYWHBBoxCoder',
target_means=[.0, .0, .0, .0],
target_stds=[1.0, 1.0, 1.0, 1.0]),
loss_cls=dict(
type='FocalLoss',
use_sigmoid=True,
gamma=2.0,
alpha=0.25,
loss_weight=1.0),
loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
# model training and testing settings
train_cfg=dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.5,
neg_iou_thr=0.4,
min_pos_iou=0,
ignore_iof_thr=-1),
allowed_border=-1,
pos_weight=-1,
debug=False),
test_cfg=dict(
nms_pre=1000,
min_bbox_size=0,
score_thr=0.05,
nms=dict(type='nms', iou_threshold=0.5),
max_per_img=100))
mutator = dict(type='mmrazor.OneShotModuleMutator')
model = dict(
type='mmrazor.SPOS',
architecture=supernet,
mutator=mutator,
)
find_unused_parameters = True

View File

@ -6,24 +6,23 @@ Collections:
Paper:
URL: https://arxiv.org/abs/1904.00420
Title: Single Path One-Shot Neural Architecture Search with Uniform Sampling
README: configs/nas/spos/README.md
README: configs/nas/mmcls/spos/README.md
Code:
URL: https://github.com/open-mmlab/mmrazor/blob/v0.1.0/mmrazor/models/algorithms/spos.py
Version: v0.1.0
Converted From:
Code: https://github.com/megvii-model/SinglePathOneShot
Models:
- Name: spos_subnet_shufflenetv2_8xb128_in1k
- Name: spos_shufflenet_subnet_8xb128_in1k
In Collection: SPOS
Metadata:
FLOPs: 330 MB
Supernet: ShuffleNetV2
Mutable: https://download.openmmlab.com/mmrazor/v0.1/nas/spos/spos_shufflenetv2_subnet_8xb128_in1k/spos_shufflenetv2_subnet_8xb128_in1k_flops_0.33M_acc_73.87_20211222-454627be_mutable_cfg.yaml
Subnet: https://download.openmmlab.com/mmrazor/v0.1/nas/spos/spos_shufflenetv2_subnet_8xb128_in1k/spos_shufflenetv2_subnet_8xb128_in1k_flops_0.33M_acc_73.87_20211222-454627be_mutable_cfg.yaml
Results:
- Task: Image Classification
Dataset: ImageNet-1k
Metrics:
Top 1 Accuracy: 73.87
Top 5 Accuracy: 91.60
Config: configs/nas/spos/spos_subnet_shufflenetv2_8xb128_in1k.py
Config: configs/nas/mmcls/spos/spos_shufflenet_subnet_8xb128_in1k.py
Weights: https://download.openmmlab.com/mmrazor/v0.1/nas/spos/spos_shufflenetv2_subnet_8xb128_in1k/spos_shufflenetv2_subnet_8xb128_in1k_flops_0.33M_acc_73.87_20211222-1f0a0b4d.pth

View File

@ -0,0 +1,30 @@
_base_ = [
'mmrazor::_base_/settings/imagenet_bs1024_spos.py',
'mmrazor::_base_/nas_backbones/spos_mobilenet_supernet.py',
'mmcls::_base_/default_runtime.py',
]
# model
supernet = dict(
type='ImageClassifier',
# data_preprocessor=_base_.preprocess_cfg,
backbone=_base_.nas_backbone,
neck=dict(type='GlobalAveragePooling'),
head=dict(
type='LinearClsHead',
num_classes=1000,
in_channels=1024,
loss=dict(
type='LabelSmoothLoss',
num_classes=1000,
label_smooth_val=0.1,
mode='original',
loss_weight=1.0),
topk=(1, 5)))
model = dict(
type='mmrazor.SPOS',
architecture=supernet,
mutator=dict(type='mmrazor.OneShotModuleMutator'))
find_unused_parameters = True

View File

@ -0,0 +1,30 @@
_base_ = [
'mmrazor::_base_/settings/imagenet_bs1024_spos.py',
'mmrazor::_base_/nas_backbones/spos_shufflenet_supernet.py',
'mmcls::_base_/default_runtime.py',
]
# model
supernet = dict(
type='ImageClassifier',
# data_preprocessor=_base_.preprocess_cfg,
backbone=_base_.nas_backbone,
neck=dict(type='GlobalAveragePooling'),
head=dict(
type='LinearClsHead',
num_classes=1000,
in_channels=1024,
loss=dict(
type='LabelSmoothLoss',
num_classes=1000,
label_smooth_val=0.1,
mode='original',
loss_weight=1.0),
topk=(1, 5)))
model = dict(
type='mmrazor.SPOS',
architecture=supernet,
mutator=dict(type='mmrazor.OneShotModuleMutator'))
find_unused_parameters = True

View File

@ -0,0 +1,9 @@
_base_ = ['./spos_supernet_frcnn_shufflenet_coco_1x.py']
# FIXME: you may replace this with the mutable_cfg searched by yourself
# fix_subnet = 'configs/nas/spos/SPOS_SHUFFLENETV2_330M_IN1k_PAPER_2.0.yaml' # noqa: E501
fix_subnet = 'configs/nas/detnas/DetNAS_SPOS_SHUFFLENETV2_330M_IN1k_PAPER_2.0.yaml' # noqa: E501
model = dict(fix_subnet=fix_subnet)
find_unused_parameters = False

View File

@ -0,0 +1,29 @@
_base_ = [
'mmdet::_base_/models/faster_rcnn_r50_fpn.py',
'mmdet::_base_/datasets/coco_detection.py',
'mmdet::_base_/schedules/schedule_1x.py',
'mmdet::_base_/default_runtime.py',
'mmrazor::_base_/nas_backbones/spos_shufflenet_supernet.py'
]
norm_cfg = dict(type='SyncBN', requires_grad=True)
supernet = _base_.model
supernet.backbone = _base_.nas_backbone
supernet.backbone.norm_cfg = norm_cfg
supernet.backbone.out_indices = (0, 1, 2, 3)
supernet.backbone.with_last_layer = False
supernet.neck.norm_cfg = norm_cfg
supernet.neck.in_channels = [64, 160, 320, 640]
supernet.roi_head.bbox_head.norm_cfg = norm_cfg
model = dict(
_delete_=True,
type='mmrazor.SPOS',
architecture=supernet,
mutator=dict(type='mmrazor.OneShotModuleMutator'))
find_unused_parameters = True

View File

@ -0,0 +1,27 @@
_base_ = [
'mmdet::_base_/models/retinanet_r50_fpn.py',
'mmdet::_base_/datasets/coco_detection.py',
'mmdet::_base_/schedules/schedule_1x.py',
'mmdet::_base_/default_runtime.py',
'mmrazor::_base_/nas_backbones/spos_shufflenet_supernet.py'
]
norm_cfg = dict(type='SyncBN', requires_grad=True)
supernet = _base_.model
supernet.backbone = _base_.nas_backbone
supernet.backbone.norm_cfg = norm_cfg
supernet.backbone.out_indices = (0, 1, 2, 3)
supernet.backbone.with_last_layer = False
supernet.neck.norm_cfg = norm_cfg
supernet.neck.in_channels = [64, 160, 320, 640]
model = dict(
_delete_=True,
type='mmrazor.SPOS',
architecture=supernet,
mutator=dict(type='mmrazor.OneShotModuleMutator'))
find_unused_parameters = True

View File

@ -0,0 +1,9 @@
_base_ = './detnas_shufflenet_supernet_8xb128_in1k.py'
# FIXME: you may replace this with the mutable_cfg searched by yourself
# fix_subnet = 'configs/nas/spos/SPOS_SHUFFLENETV2_330M_IN1k_PAPER_2.0.yaml' # noqa: E501
fix_subnet = 'configs/nas/detnas/DetNAS_SPOS_SHUFFLENETV2_330M_IN1k_PAPER_2.0.yaml' # noqa: E501
model = dict(fix_subnet=fix_subnet)
find_unused_parameters = False

View File

@ -0,0 +1 @@
_base_ = 'mmrazor::nas/mmcls/spos/shufflenet/spos_shufflenet_supernet_8xb128_in1k.py' # noqa: E501

View File

@ -7,14 +7,14 @@ Collections:
Paper:
URL: https://arxiv.org/abs/1903.10979
Title: DetNAS:Backbone Search for Object Detection
README: configs/nas/detnas/README.md
README: configs/nas/mmdet/detnas/README.md
Code:
URL: https://github.com/open-mmlab/mmrazor/blob/v0.1.0/mmrazor/models/algorithms/detnas.py
Version: v0.1.0
Converted From:
Code: https://github.com/megvii-model/DetNAS
Models:
- Name: detnas_subnet_frcnn_shufflenetv2_fpn_1x_coco
- Name: detnas_frcnn_shufflenet_subnet_coco_1x
In Collection: DetNAS
Metadata:
FLOPs(Backbone): 340 MB
@ -26,5 +26,5 @@ Models:
Dataset: COCO
Metrics:
box AP: 37.5
Config: configs/nas/detnas/detnas_subnet_frcnn_shufflenetv2_fpn_1x_coco.py
Config: configs/nas/mmdet/detnas/detnas_frcnn_shufflenet_subnet_coco_1x.py
Weights: https://download.openmmlab.com/mmrazor/v0.1/nas/detnas/detnas_subnet_frcnn_shufflenetv2_fpn_1x_coco/detnas_subnet_frcnn_shufflenetv2_fpn_1x_coco_bbox_backbone_flops-0.34M_mAP-37.5_20211222-67fea61f_mutable_cfg.yaml

View File

@ -1,24 +0,0 @@
modules:
backbone.layer1.0: mb_k3e1
backbone.layer2.0: mb_k5e3
backbone.layer2.1: mb_k5e3
backbone.layer2.2: identity
backbone.layer2.3: mb_k3e3
backbone.layer3.0: mb_k3e3
backbone.layer3.1: identity
backbone.layer3.2: identity
backbone.layer3.3: mb_k3e3
backbone.layer4.0: mb_k7e6
backbone.layer4.1: identity
backbone.layer4.2: mb_k7e3
backbone.layer4.3: mb_k7e3
backbone.layer5.0: mb_k3e3
backbone.layer5.1: mb_k3e3
backbone.layer5.2: mb_k7e3
backbone.layer5.3: mb_k5e3
backbone.layer6.0: mb_k5e6
backbone.layer6.1: mb_k7e3
backbone.layer6.2: mb_k7e3
backbone.layer6.3: mb_k7e3
backbone.layer7.0: mb_k5e6
channels:

View File

@ -1,22 +0,0 @@
modules:
backbone.layers.0.0: shuffle_7x7
backbone.layers.0.1: shuffle_3x3
backbone.layers.0.2: shuffle_7x7
backbone.layers.0.3: shuffle_3x3
backbone.layers.1.0: shuffle_xception
backbone.layers.1.1: shuffle_5x5
backbone.layers.1.2: shuffle_5x5
backbone.layers.1.3: shuffle_3x3
backbone.layers.2.0: shuffle_3x3
backbone.layers.2.1: shuffle_5x5
backbone.layers.2.2: shuffle_3x3
backbone.layers.2.3: shuffle_5x5
backbone.layers.2.4: shuffle_3x3
backbone.layers.2.5: shuffle_xception
backbone.layers.2.6: shuffle_5x5
backbone.layers.2.7: shuffle_7x7
backbone.layers.3.0: shuffle_7x7
backbone.layers.3.1: shuffle_3x3
backbone.layers.3.2: shuffle_5x5
backbone.layers.3.3: shuffle_xception
channels:

View File

@ -1,245 +0,0 @@
# dataset settings
dataset_type = 'ImageNet'
preprocess_cfg = dict(
# RGB format normalization parameters
mean=[0., 0., 0.],
std=[1., 1., 1.],
# convert image from BGR to RGB
to_rgb=False,
)
file_client_args = dict(
backend='petrel',
path_mapping=dict({
'./data/imagenet':
'sproject:s3://openmmlab/datasets/classification/imagenet',
'data/imagenet':
'sproject:s3://openmmlab/datasets/classification/imagenet'
}))
train_pipeline = [
dict(type='LoadImageFromFile', file_client_args=file_client_args),
dict(type='RandomResizedCrop', scale=224),
dict(type='ColorJitter', brightness=0.4, contrast=0.4, saturation=0.4),
dict(type='RandomFlip', prob=0.5, direction='horizontal'),
dict(type='PackClsInputs'),
]
test_pipeline = [
dict(type='LoadImageFromFile', file_client_args=file_client_args),
dict(
type='ResizeEdge',
scale=256,
edge='short',
backend='pillow',
interpolation='bicubic'),
dict(type='CenterCrop', crop_size=224),
dict(type='PackClsInputs'),
]
train_dataloader = dict(
batch_size=128,
num_workers=8,
dataset=dict(
type=dataset_type,
data_root='/mnt/cache/share/images',
ann_file='meta/train.txt',
data_prefix='train',
pipeline=train_pipeline),
sampler=dict(type='DefaultSampler', shuffle=True),
persistent_workers=True,
)
# /mnt/lustre/share_data/wangjiaqi/data/imagenet',
val_dataloader = dict(
batch_size=128,
num_workers=8,
dataset=dict(
type=dataset_type,
data_root='/mnt/cache/share/images',
ann_file='meta/val.txt',
data_prefix='val',
pipeline=test_pipeline),
sampler=dict(type='DefaultSampler', shuffle=False),
persistent_workers=True,
)
val_evaluator = dict(type='Accuracy', topk=(1, 5))
# If you want standard test, please manually configure the test dataset
test_dataloader = val_dataloader
test_evaluator = val_evaluator
# scheduler
# optimizer
optim_wrapper = dict(
optimizer=dict(type='SGD', lr=0.5, momentum=0.9, weight_decay=4e-5),
clip_grad=None)
# leanring policy
param_scheduler = [
dict(type='PolyLR', power=1.0, eta_min=0.0, by_epoch=False, end=300000),
]
# train, val, test setting
train_cfg = dict(by_epoch=False, max_iters=300000)
val_cfg = dict()
test_cfg = dict()
# runtime
# defaults to use registries in mmrazor
default_scope = 'mmcls'
log_processor = dict(
window_size=100,
by_epoch=False,
custom_cfg=[
dict(
data_src='loss',
log_name='loss_large_window',
method_name='mean',
window_size=100)
])
# configure default hooks
default_hooks = dict(
timer=dict(type='IterTimerHook'),
logger=dict(type='LoggerHook', interval=100),
param_scheduler=dict(type='ParamSchedulerHook'),
checkpoint=dict(
type='CheckpointHook',
by_epoch=False,
interval=10000,
save_last=True,
max_keep_ckpts=3),
sampler_seed=dict(type='DistSamplerSeedHook'),
visualization=dict(type='VisualizationHook', enable=False),
)
# configure environment
env_cfg = dict(
cudnn_benchmark=False,
mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0),
dist_cfg=dict(backend='nccl'),
)
# set visualizer
visualizer = None
# dict(type='ClsVisualizer', vis_backends=vis_backends, name='visualizer')
# vis_backends = [dict(type='LocalVisBackend')]
# set log level
log_level = 'INFO'
# load from which checkpoint
load_from = None
# whether to resume training from the loaded checkpoint
resume = False
# model
norm_cfg = dict(type='BN')
_STAGE_MUTABLE = dict(
_scope_='mmrazor',
type='OneShotMutableOP',
candidates=dict(
mb_k3e3=dict(
type='MBBlock',
kernel_size=3,
expand_ratio=3,
norm_cfg=norm_cfg,
act_cfg=dict(type='ReLU6')),
mb_k5e3=dict(
type='MBBlock',
kernel_size=5,
expand_ratio=3,
norm_cfg=norm_cfg,
act_cfg=dict(type='ReLU6')),
mb_k7e3=dict(
type='MBBlock',
kernel_size=7,
expand_ratio=3,
norm_cfg=norm_cfg,
act_cfg=dict(type='ReLU6')),
mb_k3e6=dict(
type='MBBlock',
kernel_size=3,
expand_ratio=6,
norm_cfg=norm_cfg,
act_cfg=dict(type='ReLU6')),
mb_k5e6=dict(
type='MBBlock',
kernel_size=5,
expand_ratio=6,
norm_cfg=norm_cfg,
act_cfg=dict(type='ReLU6')),
mb_k7e6=dict(
type='MBBlock',
kernel_size=7,
expand_ratio=6,
norm_cfg=norm_cfg,
act_cfg=dict(type='ReLU6')),
identity=dict(type='Identity'),
))
_FIRST_MUTABLE = dict(
_scope_='mmrazor',
type='OneShotMutableOP',
candidates=dict(
mb_k3e1=dict(
type='MBBlock',
kernel_size=3,
expand_ratio=1,
norm_cfg=norm_cfg,
act_cfg=dict(type='ReLU6')), ))
arch_setting = [
# Parameters to build layers. 3 parameters are needed to construct a
# layer, from left to right: channel, num_blocks, mutable_cfg.
[24, 1, 1, _FIRST_MUTABLE],
[32, 4, 2, _STAGE_MUTABLE],
[56, 4, 2, _STAGE_MUTABLE],
[112, 4, 2, _STAGE_MUTABLE],
[128, 4, 1, _STAGE_MUTABLE],
[256, 4, 2, _STAGE_MUTABLE],
[432, 1, 1, _STAGE_MUTABLE]
]
norm_cfg = dict(type='BN')
supernet = dict(
type='ImageClassifier',
data_preprocessor=preprocess_cfg,
backbone=dict(
_scope_='mmrazor',
type='SearchableMobileNet',
first_channels=40,
last_channels=1728,
widen_factor=1.0,
norm_cfg=norm_cfg,
arch_setting=arch_setting),
neck=dict(type='GlobalAveragePooling'),
head=dict(
type='LinearClsHead',
num_classes=1000,
in_channels=1728,
loss=dict(
type='LabelSmoothLoss',
num_classes=1000,
label_smooth_val=0.1,
mode='original',
loss_weight=1.0),
topk=(1, 5),
),
)
mutator = dict(type='mmrazor.OneShotModuleMutator')
model = dict(
type='mmrazor.SPOS',
architecture=supernet,
mutator=mutator,
)
find_unused_parameters = True

View File

@ -1,214 +0,0 @@
# dataset settings
dataset_type = 'ImageNet'
preprocess_cfg = dict(
# RGB format normalization parameters
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
# convert image from BGR to RGB
to_rgb=True,
)
file_client_args = dict(
backend='petrel',
path_mapping=dict({
'./data/imagenet':
'sproject:s3://openmmlab/datasets/classification/imagenet',
'data/imagenet':
'sproject:s3://openmmlab/datasets/classification/imagenet'
}))
train_pipeline = [
dict(type='LoadImageFromFile', file_client_args=file_client_args),
dict(type='RandomResizedCrop', scale=224),
dict(type='ColorJitter', brightness=0.4, contrast=0.4, saturation=0.4),
dict(type='RandomFlip', prob=0.5, direction='horizontal'),
dict(type='PackClsInputs'),
]
test_pipeline = [
dict(type='LoadImageFromFile', file_client_args=file_client_args),
dict(type='ResizeEdge', scale=256, edge='short', backend='cv2'),
dict(type='CenterCrop', crop_size=224),
dict(type='PackClsInputs'),
]
train_dataloader = dict(
batch_size=128,
num_workers=5,
dataset=dict(
type=dataset_type,
data_root='/mnt/cache/share/images',
ann_file='meta/train.txt',
data_prefix='train',
pipeline=train_pipeline),
sampler=dict(type='DefaultSampler', shuffle=True),
persistent_workers=True,
)
# /mnt/lustre/share_data/wangjiaqi/data/imagenet',
val_dataloader = dict(
batch_size=128,
num_workers=5,
dataset=dict(
type=dataset_type,
data_root='/mnt/cache/share/images',
ann_file='meta/val.txt',
data_prefix='val',
pipeline=test_pipeline),
sampler=dict(type='DefaultSampler', shuffle=False),
persistent_workers=True,
)
val_evaluator = dict(type='Accuracy', topk=(1, 5))
# If you want standard test, please manually configure the test dataset
test_dataloader = val_dataloader
test_evaluator = val_evaluator
# scheduler
# optimizer
optim_wrapper = dict(
optimizer=dict(type='SGD', lr=0.5, momentum=0.9, weight_decay=4e-5),
clip_grad=None)
# leanring policy
param_scheduler = [
dict(type='PolyLR', power=1.0, eta_min=0.0, by_epoch=False, end=300000),
]
# train, val, test setting
train_cfg = dict(by_epoch=False, max_iters=300000)
val_cfg = dict()
test_cfg = dict()
# runtime
# defaults to use registries in mmrazor
default_scope = 'mmcls'
log_processor = dict(
window_size=100,
by_epoch=False,
custom_cfg=[
dict(
data_src='loss',
log_name='loss_large_window',
method_name='mean',
window_size=100)
])
# configure default hooks
default_hooks = dict(
# record the time of every iteration.
timer=dict(type='IterTimerHook'),
# print log every 100 iterations.
logger=dict(type='LoggerHook', interval=100),
# enable the parameter scheduler.
param_scheduler=dict(type='ParamSchedulerHook'),
# save checkpoint per epoch.
checkpoint=dict(
type='CheckpointHook',
by_epoch=False,
interval=10000,
save_last=True,
max_keep_ckpts=3),
# set sampler seed in distributed evrionment.
sampler_seed=dict(type='DistSamplerSeedHook'),
# validation results visualization, set True to enable it.
visualization=dict(type='VisualizationHook', enable=False),
)
# configure environment
env_cfg = dict(
# whether to enable cudnn benchmark
cudnn_benchmark=False,
# set multi process parameters
mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0),
# set distributed parameters
dist_cfg=dict(backend='nccl'),
)
# set visualizer
visualizer = None
# dict(type='ClsVisualizer', vis_backends=vis_backends, name='visualizer')
# vis_backends = [dict(type='LocalVisBackend')]
# set log level
log_level = 'INFO'
# load from which checkpoint
load_from = None
# "/mnt/lustre/dongpeijie/spos_shufflenetv2_subnet_8xb128_in1k_flops_0.33M_acc_73.87_20211222-1f0a0b4d.pth"
# whether to resume training from the loaded checkpoint
resume = False
# model
_STAGE_MUTABLE = dict(
_scope_='mmrazor',
type='OneShotMutableOP',
candidates=dict(
shuffle_3x3=dict(
type='ShuffleBlock', kernel_size=3, norm_cfg=dict(type='BN')),
shuffle_5x5=dict(
type='ShuffleBlock', kernel_size=5, norm_cfg=dict(type='BN')),
shuffle_7x7=dict(
type='ShuffleBlock', kernel_size=7, norm_cfg=dict(type='BN')),
shuffle_xception=dict(
type='ShuffleXception', norm_cfg=dict(type='BN')),
))
arch_setting = [
# Parameters to build layers. 3 parameters are needed to construct a
# layer, from left to right: channel, num_blocks, mutable_cfg.
[64, 4, _STAGE_MUTABLE],
[160, 4, _STAGE_MUTABLE],
[320, 8, _STAGE_MUTABLE],
[640, 4, _STAGE_MUTABLE],
]
norm_cfg = dict(type='BN')
supernet = dict(
type='ImageClassifier',
data_preprocessor=preprocess_cfg,
backbone=dict(
_scope_='mmrazor',
type='SearchableShuffleNetV2',
widen_factor=1.0,
norm_cfg=norm_cfg,
arch_setting=arch_setting),
neck=dict(type='GlobalAveragePooling'),
head=dict(
type='LinearClsHead',
num_classes=1000,
in_channels=1024,
loss=dict(
type='LabelSmoothLoss',
num_classes=1000,
label_smooth_val=0.1,
mode='original',
loss_weight=1.0),
topk=(1, 5),
),
)
mutator = dict(type='mmrazor.OneShotModuleMutator')
model = dict(
type='mmrazor.SPOS',
architecture=supernet,
mutator=mutator,
# fix_subnet='configs/nas/spos/SPOS_SHUFFLENETV2_330M_IN1k_PAPER_2.0.yaml'
)
find_unused_parameters = True

View File

@ -1,421 +0,0 @@
backbone.conv1.bn:
out_channels: 8
raw_out_channels: 48
backbone.conv1.conv:
in_channels: 3
out_channels: 8
raw_in_channels: 3
raw_out_channels: 48
backbone.conv2.bn:
out_channels: 1920
raw_out_channels: 1920
backbone.conv2.conv:
in_channels: 280
out_channels: 1920
raw_in_channels: 480
raw_out_channels: 1920
backbone.layer1.0.conv.0.bn:
out_channels: 8
raw_out_channels: 48
backbone.layer1.0.conv.0.conv:
in_channels: 8
out_channels: 8
raw_in_channels: 48
raw_out_channels: 48
backbone.layer1.0.conv.1.bn:
out_channels: 8
raw_out_channels: 24
backbone.layer1.0.conv.1.conv:
in_channels: 8
out_channels: 8
raw_in_channels: 48
raw_out_channels: 24
backbone.layer2.0.conv.0.bn:
out_channels: 96
raw_out_channels: 144
backbone.layer2.0.conv.0.conv:
in_channels: 8
out_channels: 96
raw_in_channels: 24
raw_out_channels: 144
backbone.layer2.0.conv.1.bn:
out_channels: 96
raw_out_channels: 144
backbone.layer2.0.conv.1.conv:
in_channels: 96
out_channels: 96
raw_in_channels: 144
raw_out_channels: 144
backbone.layer2.0.conv.2.bn:
out_channels: 16
raw_out_channels: 40
backbone.layer2.0.conv.2.conv:
in_channels: 96
out_channels: 16
raw_in_channels: 144
raw_out_channels: 40
backbone.layer2.1.conv.0.bn:
out_channels: 96
raw_out_channels: 240
backbone.layer2.1.conv.0.conv:
in_channels: 16
out_channels: 96
raw_in_channels: 40
raw_out_channels: 240
backbone.layer2.1.conv.1.bn:
out_channels: 96
raw_out_channels: 240
backbone.layer2.1.conv.1.conv:
in_channels: 96
out_channels: 96
raw_in_channels: 240
raw_out_channels: 240
backbone.layer2.1.conv.2.bn:
out_channels: 16
raw_out_channels: 40
backbone.layer2.1.conv.2.conv:
in_channels: 96
out_channels: 16
raw_in_channels: 240
raw_out_channels: 40
backbone.layer3.0.conv.0.bn:
out_channels: 96
raw_out_channels: 240
backbone.layer3.0.conv.0.conv:
in_channels: 16
out_channels: 96
raw_in_channels: 40
raw_out_channels: 240
backbone.layer3.0.conv.1.bn:
out_channels: 96
raw_out_channels: 240
backbone.layer3.0.conv.1.conv:
in_channels: 96
out_channels: 96
raw_in_channels: 240
raw_out_channels: 240
backbone.layer3.0.conv.2.bn:
out_channels: 24
raw_out_channels: 48
backbone.layer3.0.conv.2.conv:
in_channels: 96
out_channels: 24
raw_in_channels: 240
raw_out_channels: 48
backbone.layer3.1.conv.0.bn:
out_channels: 144
raw_out_channels: 288
backbone.layer3.1.conv.0.conv:
in_channels: 24
out_channels: 144
raw_in_channels: 48
raw_out_channels: 288
backbone.layer3.1.conv.1.bn:
out_channels: 144
raw_out_channels: 288
backbone.layer3.1.conv.1.conv:
in_channels: 144
out_channels: 144
raw_in_channels: 288
raw_out_channels: 288
backbone.layer3.1.conv.2.bn:
out_channels: 24
raw_out_channels: 48
backbone.layer3.1.conv.2.conv:
in_channels: 144
out_channels: 24
raw_in_channels: 288
raw_out_channels: 48
backbone.layer3.2.conv.0.bn:
out_channels: 144
raw_out_channels: 288
backbone.layer3.2.conv.0.conv:
in_channels: 24
out_channels: 144
raw_in_channels: 48
raw_out_channels: 288
backbone.layer3.2.conv.1.bn:
out_channels: 144
raw_out_channels: 288
backbone.layer3.2.conv.1.conv:
in_channels: 144
out_channels: 144
raw_in_channels: 288
raw_out_channels: 288
backbone.layer3.2.conv.2.bn:
out_channels: 24
raw_out_channels: 48
backbone.layer3.2.conv.2.conv:
in_channels: 144
out_channels: 24
raw_in_channels: 288
raw_out_channels: 48
backbone.layer4.0.conv.0.bn:
out_channels: 144
raw_out_channels: 288
backbone.layer4.0.conv.0.conv:
in_channels: 24
out_channels: 144
raw_in_channels: 48
raw_out_channels: 288
backbone.layer4.0.conv.1.bn:
out_channels: 144
raw_out_channels: 288
backbone.layer4.0.conv.1.conv:
in_channels: 144
out_channels: 144
raw_in_channels: 288
raw_out_channels: 288
backbone.layer4.0.conv.2.bn:
out_channels: 48
raw_out_channels: 96
backbone.layer4.0.conv.2.conv:
in_channels: 144
out_channels: 48
raw_in_channels: 288
raw_out_channels: 96
backbone.layer4.1.conv.0.bn:
out_channels: 288
raw_out_channels: 576
backbone.layer4.1.conv.0.conv:
in_channels: 48
out_channels: 288
raw_in_channels: 96
raw_out_channels: 576
backbone.layer4.1.conv.1.bn:
out_channels: 288
raw_out_channels: 576
backbone.layer4.1.conv.1.conv:
in_channels: 288
out_channels: 288
raw_in_channels: 576
raw_out_channels: 576
backbone.layer4.1.conv.2.bn:
out_channels: 48
raw_out_channels: 96
backbone.layer4.1.conv.2.conv:
in_channels: 288
out_channels: 48
raw_in_channels: 576
raw_out_channels: 96
backbone.layer4.2.conv.0.bn:
out_channels: 288
raw_out_channels: 576
backbone.layer4.2.conv.0.conv:
in_channels: 48
out_channels: 288
raw_in_channels: 96
raw_out_channels: 576
backbone.layer4.2.conv.1.bn:
out_channels: 288
raw_out_channels: 576
backbone.layer4.2.conv.1.conv:
in_channels: 288
out_channels: 288
raw_in_channels: 576
raw_out_channels: 576
backbone.layer4.2.conv.2.bn:
out_channels: 48
raw_out_channels: 96
backbone.layer4.2.conv.2.conv:
in_channels: 288
out_channels: 48
raw_in_channels: 576
raw_out_channels: 96
backbone.layer4.3.conv.0.bn:
out_channels: 288
raw_out_channels: 576
backbone.layer4.3.conv.0.conv:
in_channels: 48
out_channels: 288
raw_in_channels: 96
raw_out_channels: 576
backbone.layer4.3.conv.1.bn:
out_channels: 288
raw_out_channels: 576
backbone.layer4.3.conv.1.conv:
in_channels: 288
out_channels: 288
raw_in_channels: 576
raw_out_channels: 576
backbone.layer4.3.conv.2.bn:
out_channels: 48
raw_out_channels: 96
backbone.layer4.3.conv.2.conv:
in_channels: 288
out_channels: 48
raw_in_channels: 576
raw_out_channels: 96
backbone.layer5.0.conv.0.bn:
out_channels: 288
raw_out_channels: 576
backbone.layer5.0.conv.0.conv:
in_channels: 48
out_channels: 288
raw_in_channels: 96
raw_out_channels: 576
backbone.layer5.0.conv.1.bn:
out_channels: 288
raw_out_channels: 576
backbone.layer5.0.conv.1.conv:
in_channels: 288
out_channels: 288
raw_in_channels: 576
raw_out_channels: 576
backbone.layer5.0.conv.2.bn:
out_channels: 64
raw_out_channels: 144
backbone.layer5.0.conv.2.conv:
in_channels: 288
out_channels: 64
raw_in_channels: 576
raw_out_channels: 144
backbone.layer5.1.conv.0.bn:
out_channels: 432
raw_out_channels: 864
backbone.layer5.1.conv.0.conv:
in_channels: 64
out_channels: 432
raw_in_channels: 144
raw_out_channels: 864
backbone.layer5.1.conv.1.bn:
out_channels: 432
raw_out_channels: 864
backbone.layer5.1.conv.1.conv:
in_channels: 432
out_channels: 432
raw_in_channels: 864
raw_out_channels: 864
backbone.layer5.1.conv.2.bn:
out_channels: 64
raw_out_channels: 144
backbone.layer5.1.conv.2.conv:
in_channels: 432
out_channels: 64
raw_in_channels: 864
raw_out_channels: 144
backbone.layer5.2.conv.0.bn:
out_channels: 432
raw_out_channels: 864
backbone.layer5.2.conv.0.conv:
in_channels: 64
out_channels: 432
raw_in_channels: 144
raw_out_channels: 864
backbone.layer5.2.conv.1.bn:
out_channels: 432
raw_out_channels: 864
backbone.layer5.2.conv.1.conv:
in_channels: 432
out_channels: 432
raw_in_channels: 864
raw_out_channels: 864
backbone.layer5.2.conv.2.bn:
out_channels: 64
raw_out_channels: 144
backbone.layer5.2.conv.2.conv:
in_channels: 432
out_channels: 64
raw_in_channels: 864
raw_out_channels: 144
backbone.layer6.0.conv.0.bn:
out_channels: 648
raw_out_channels: 864
backbone.layer6.0.conv.0.conv:
in_channels: 64
out_channels: 648
raw_in_channels: 144
raw_out_channels: 864
backbone.layer6.0.conv.1.bn:
out_channels: 648
raw_out_channels: 864
backbone.layer6.0.conv.1.conv:
in_channels: 648
out_channels: 648
raw_in_channels: 864
raw_out_channels: 864
backbone.layer6.0.conv.2.bn:
out_channels: 176
raw_out_channels: 240
backbone.layer6.0.conv.2.conv:
in_channels: 648
out_channels: 176
raw_in_channels: 864
raw_out_channels: 240
backbone.layer6.1.conv.0.bn:
out_channels: 720
raw_out_channels: 1440
backbone.layer6.1.conv.0.conv:
in_channels: 176
out_channels: 720
raw_in_channels: 240
raw_out_channels: 1440
backbone.layer6.1.conv.1.bn:
out_channels: 720
raw_out_channels: 1440
backbone.layer6.1.conv.1.conv:
in_channels: 720
out_channels: 720
raw_in_channels: 1440
raw_out_channels: 1440
backbone.layer6.1.conv.2.bn:
out_channels: 176
raw_out_channels: 240
backbone.layer6.1.conv.2.conv:
in_channels: 720
out_channels: 176
raw_in_channels: 1440
raw_out_channels: 240
backbone.layer6.2.conv.0.bn:
out_channels: 720
raw_out_channels: 1440
backbone.layer6.2.conv.0.conv:
in_channels: 176
out_channels: 720
raw_in_channels: 240
raw_out_channels: 1440
backbone.layer6.2.conv.1.bn:
out_channels: 720
raw_out_channels: 1440
backbone.layer6.2.conv.1.conv:
in_channels: 720
out_channels: 720
raw_in_channels: 1440
raw_out_channels: 1440
backbone.layer6.2.conv.2.bn:
out_channels: 176
raw_out_channels: 240
backbone.layer6.2.conv.2.conv:
in_channels: 720
out_channels: 176
raw_in_channels: 1440
raw_out_channels: 240
backbone.layer7.0.conv.0.bn:
out_channels: 1440
raw_out_channels: 1440
backbone.layer7.0.conv.0.conv:
in_channels: 176
out_channels: 1440
raw_in_channels: 240
raw_out_channels: 1440
backbone.layer7.0.conv.1.bn:
out_channels: 1440
raw_out_channels: 1440
backbone.layer7.0.conv.1.conv:
in_channels: 1440
out_channels: 1440
raw_in_channels: 1440
raw_out_channels: 1440
backbone.layer7.0.conv.2.bn:
out_channels: 280
raw_out_channels: 480
backbone.layer7.0.conv.2.conv:
in_channels: 1440
out_channels: 280
raw_in_channels: 1440
raw_out_channels: 480
head.fc:
in_channels: 1920
out_channels: 1000
raw_in_channels: 1920
raw_out_channels: 1000

View File

@ -1,421 +0,0 @@
backbone.conv1.bn:
out_channels: 8
raw_out_channels: 48
backbone.conv1.conv:
in_channels: 3
out_channels: 8
raw_in_channels: 3
raw_out_channels: 48
backbone.conv2.bn:
out_channels: 1920
raw_out_channels: 1920
backbone.conv2.conv:
in_channels: 480
out_channels: 1920
raw_in_channels: 480
raw_out_channels: 1920
backbone.layer1.0.conv.0.bn:
out_channels: 8
raw_out_channels: 48
backbone.layer1.0.conv.0.conv:
in_channels: 8
out_channels: 8
raw_in_channels: 48
raw_out_channels: 48
backbone.layer1.0.conv.1.bn:
out_channels: 8
raw_out_channels: 24
backbone.layer1.0.conv.1.conv:
in_channels: 8
out_channels: 8
raw_in_channels: 48
raw_out_channels: 24
backbone.layer2.0.conv.0.bn:
out_channels: 96
raw_out_channels: 144
backbone.layer2.0.conv.0.conv:
in_channels: 8
out_channels: 96
raw_in_channels: 24
raw_out_channels: 144
backbone.layer2.0.conv.1.bn:
out_channels: 96
raw_out_channels: 144
backbone.layer2.0.conv.1.conv:
in_channels: 96
out_channels: 96
raw_in_channels: 144
raw_out_channels: 144
backbone.layer2.0.conv.2.bn:
out_channels: 16
raw_out_channels: 40
backbone.layer2.0.conv.2.conv:
in_channels: 96
out_channels: 16
raw_in_channels: 144
raw_out_channels: 40
backbone.layer2.1.conv.0.bn:
out_channels: 96
raw_out_channels: 240
backbone.layer2.1.conv.0.conv:
in_channels: 16
out_channels: 96
raw_in_channels: 40
raw_out_channels: 240
backbone.layer2.1.conv.1.bn:
out_channels: 96
raw_out_channels: 240
backbone.layer2.1.conv.1.conv:
in_channels: 96
out_channels: 96
raw_in_channels: 240
raw_out_channels: 240
backbone.layer2.1.conv.2.bn:
out_channels: 16
raw_out_channels: 40
backbone.layer2.1.conv.2.conv:
in_channels: 96
out_channels: 16
raw_in_channels: 240
raw_out_channels: 40
backbone.layer3.0.conv.0.bn:
out_channels: 96
raw_out_channels: 240
backbone.layer3.0.conv.0.conv:
in_channels: 16
out_channels: 96
raw_in_channels: 40
raw_out_channels: 240
backbone.layer3.0.conv.1.bn:
out_channels: 96
raw_out_channels: 240
backbone.layer3.0.conv.1.conv:
in_channels: 96
out_channels: 96
raw_in_channels: 240
raw_out_channels: 240
backbone.layer3.0.conv.2.bn:
out_channels: 24
raw_out_channels: 48
backbone.layer3.0.conv.2.conv:
in_channels: 96
out_channels: 24
raw_in_channels: 240
raw_out_channels: 48
backbone.layer3.1.conv.0.bn:
out_channels: 144
raw_out_channels: 288
backbone.layer3.1.conv.0.conv:
in_channels: 24
out_channels: 144
raw_in_channels: 48
raw_out_channels: 288
backbone.layer3.1.conv.1.bn:
out_channels: 144
raw_out_channels: 288
backbone.layer3.1.conv.1.conv:
in_channels: 144
out_channels: 144
raw_in_channels: 288
raw_out_channels: 288
backbone.layer3.1.conv.2.bn:
out_channels: 24
raw_out_channels: 48
backbone.layer3.1.conv.2.conv:
in_channels: 144
out_channels: 24
raw_in_channels: 288
raw_out_channels: 48
backbone.layer3.2.conv.0.bn:
out_channels: 144
raw_out_channels: 288
backbone.layer3.2.conv.0.conv:
in_channels: 24
out_channels: 144
raw_in_channels: 48
raw_out_channels: 288
backbone.layer3.2.conv.1.bn:
out_channels: 144
raw_out_channels: 288
backbone.layer3.2.conv.1.conv:
in_channels: 144
out_channels: 144
raw_in_channels: 288
raw_out_channels: 288
backbone.layer3.2.conv.2.bn:
out_channels: 24
raw_out_channels: 48
backbone.layer3.2.conv.2.conv:
in_channels: 144
out_channels: 24
raw_in_channels: 288
raw_out_channels: 48
backbone.layer4.0.conv.0.bn:
out_channels: 144
raw_out_channels: 288
backbone.layer4.0.conv.0.conv:
in_channels: 24
out_channels: 144
raw_in_channels: 48
raw_out_channels: 288
backbone.layer4.0.conv.1.bn:
out_channels: 144
raw_out_channels: 288
backbone.layer4.0.conv.1.conv:
in_channels: 144
out_channels: 144
raw_in_channels: 288
raw_out_channels: 288
backbone.layer4.0.conv.2.bn:
out_channels: 56
raw_out_channels: 96
backbone.layer4.0.conv.2.conv:
in_channels: 144
out_channels: 56
raw_in_channels: 288
raw_out_channels: 96
backbone.layer4.1.conv.0.bn:
out_channels: 288
raw_out_channels: 576
backbone.layer4.1.conv.0.conv:
in_channels: 56
out_channels: 288
raw_in_channels: 96
raw_out_channels: 576
backbone.layer4.1.conv.1.bn:
out_channels: 288
raw_out_channels: 576
backbone.layer4.1.conv.1.conv:
in_channels: 288
out_channels: 288
raw_in_channels: 576
raw_out_channels: 576
backbone.layer4.1.conv.2.bn:
out_channels: 56
raw_out_channels: 96
backbone.layer4.1.conv.2.conv:
in_channels: 288
out_channels: 56
raw_in_channels: 576
raw_out_channels: 96
backbone.layer4.2.conv.0.bn:
out_channels: 288
raw_out_channels: 576
backbone.layer4.2.conv.0.conv:
in_channels: 56
out_channels: 288
raw_in_channels: 96
raw_out_channels: 576
backbone.layer4.2.conv.1.bn:
out_channels: 288
raw_out_channels: 576
backbone.layer4.2.conv.1.conv:
in_channels: 288
out_channels: 288
raw_in_channels: 576
raw_out_channels: 576
backbone.layer4.2.conv.2.bn:
out_channels: 56
raw_out_channels: 96
backbone.layer4.2.conv.2.conv:
in_channels: 288
out_channels: 56
raw_in_channels: 576
raw_out_channels: 96
backbone.layer4.3.conv.0.bn:
out_channels: 288
raw_out_channels: 576
backbone.layer4.3.conv.0.conv:
in_channels: 56
out_channels: 288
raw_in_channels: 96
raw_out_channels: 576
backbone.layer4.3.conv.1.bn:
out_channels: 288
raw_out_channels: 576
backbone.layer4.3.conv.1.conv:
in_channels: 288
out_channels: 288
raw_in_channels: 576
raw_out_channels: 576
backbone.layer4.3.conv.2.bn:
out_channels: 56
raw_out_channels: 96
backbone.layer4.3.conv.2.conv:
in_channels: 288
out_channels: 56
raw_in_channels: 576
raw_out_channels: 96
backbone.layer5.0.conv.0.bn:
out_channels: 288
raw_out_channels: 576
backbone.layer5.0.conv.0.conv:
in_channels: 56
out_channels: 288
raw_in_channels: 96
raw_out_channels: 576
backbone.layer5.0.conv.1.bn:
out_channels: 288
raw_out_channels: 576
backbone.layer5.0.conv.1.conv:
in_channels: 288
out_channels: 288
raw_in_channels: 576
raw_out_channels: 576
backbone.layer5.0.conv.2.bn:
out_channels: 96
raw_out_channels: 144
backbone.layer5.0.conv.2.conv:
in_channels: 288
out_channels: 96
raw_in_channels: 576
raw_out_channels: 144
backbone.layer5.1.conv.0.bn:
out_channels: 432
raw_out_channels: 864
backbone.layer5.1.conv.0.conv:
in_channels: 96
out_channels: 432
raw_in_channels: 144
raw_out_channels: 864
backbone.layer5.1.conv.1.bn:
out_channels: 432
raw_out_channels: 864
backbone.layer5.1.conv.1.conv:
in_channels: 432
out_channels: 432
raw_in_channels: 864
raw_out_channels: 864
backbone.layer5.1.conv.2.bn:
out_channels: 96
raw_out_channels: 144
backbone.layer5.1.conv.2.conv:
in_channels: 432
out_channels: 96
raw_in_channels: 864
raw_out_channels: 144
backbone.layer5.2.conv.0.bn:
out_channels: 432
raw_out_channels: 864
backbone.layer5.2.conv.0.conv:
in_channels: 96
out_channels: 432
raw_in_channels: 144
raw_out_channels: 864
backbone.layer5.2.conv.1.bn:
out_channels: 432
raw_out_channels: 864
backbone.layer5.2.conv.1.conv:
in_channels: 432
out_channels: 432
raw_in_channels: 864
raw_out_channels: 864
backbone.layer5.2.conv.2.bn:
out_channels: 96
raw_out_channels: 144
backbone.layer5.2.conv.2.conv:
in_channels: 432
out_channels: 96
raw_in_channels: 864
raw_out_channels: 144
backbone.layer6.0.conv.0.bn:
out_channels: 864
raw_out_channels: 864
backbone.layer6.0.conv.0.conv:
in_channels: 96
out_channels: 864
raw_in_channels: 144
raw_out_channels: 864
backbone.layer6.0.conv.1.bn:
out_channels: 864
raw_out_channels: 864
backbone.layer6.0.conv.1.conv:
in_channels: 864
out_channels: 864
raw_in_channels: 864
raw_out_channels: 864
backbone.layer6.0.conv.2.bn:
out_channels: 240
raw_out_channels: 240
backbone.layer6.0.conv.2.conv:
in_channels: 864
out_channels: 240
raw_in_channels: 864
raw_out_channels: 240
backbone.layer6.1.conv.0.bn:
out_channels: 1440
raw_out_channels: 1440
backbone.layer6.1.conv.0.conv:
in_channels: 240
out_channels: 1440
raw_in_channels: 240
raw_out_channels: 1440
backbone.layer6.1.conv.1.bn:
out_channels: 1440
raw_out_channels: 1440
backbone.layer6.1.conv.1.conv:
in_channels: 1440
out_channels: 1440
raw_in_channels: 1440
raw_out_channels: 1440
backbone.layer6.1.conv.2.bn:
out_channels: 240
raw_out_channels: 240
backbone.layer6.1.conv.2.conv:
in_channels: 1440
out_channels: 240
raw_in_channels: 1440
raw_out_channels: 240
backbone.layer6.2.conv.0.bn:
out_channels: 960
raw_out_channels: 1440
backbone.layer6.2.conv.0.conv:
in_channels: 240
out_channels: 960
raw_in_channels: 240
raw_out_channels: 1440
backbone.layer6.2.conv.1.bn:
out_channels: 960
raw_out_channels: 1440
backbone.layer6.2.conv.1.conv:
in_channels: 960
out_channels: 960
raw_in_channels: 1440
raw_out_channels: 1440
backbone.layer6.2.conv.2.bn:
out_channels: 240
raw_out_channels: 240
backbone.layer6.2.conv.2.conv:
in_channels: 960
out_channels: 240
raw_in_channels: 1440
raw_out_channels: 240
backbone.layer7.0.conv.0.bn:
out_channels: 1440
raw_out_channels: 1440
backbone.layer7.0.conv.0.conv:
in_channels: 240
out_channels: 1440
raw_in_channels: 240
raw_out_channels: 1440
backbone.layer7.0.conv.1.bn:
out_channels: 1440
raw_out_channels: 1440
backbone.layer7.0.conv.1.conv:
in_channels: 1440
out_channels: 1440
raw_in_channels: 1440
raw_out_channels: 1440
backbone.layer7.0.conv.2.bn:
out_channels: 480
raw_out_channels: 480
backbone.layer7.0.conv.2.conv:
in_channels: 1440
out_channels: 480
raw_in_channels: 1440
raw_out_channels: 480
head.fc:
in_channels: 1920
out_channels: 1000
raw_in_channels: 1920
raw_out_channels: 1000

View File

@ -1,421 +0,0 @@
backbone.conv1.bn:
out_channels: 32
raw_out_channels: 48
backbone.conv1.conv:
in_channels: 3
out_channels: 32
raw_in_channels: 3
raw_out_channels: 48
backbone.conv2.bn:
out_channels: 1920
raw_out_channels: 1920
backbone.conv2.conv:
in_channels: 480
out_channels: 1920
raw_in_channels: 480
raw_out_channels: 1920
backbone.layer1.0.conv.0.bn:
out_channels: 32
raw_out_channels: 48
backbone.layer1.0.conv.0.conv:
in_channels: 32
out_channels: 32
raw_in_channels: 48
raw_out_channels: 48
backbone.layer1.0.conv.1.bn:
out_channels: 16
raw_out_channels: 24
backbone.layer1.0.conv.1.conv:
in_channels: 32
out_channels: 16
raw_in_channels: 48
raw_out_channels: 24
backbone.layer2.0.conv.0.bn:
out_channels: 144
raw_out_channels: 144
backbone.layer2.0.conv.0.conv:
in_channels: 16
out_channels: 144
raw_in_channels: 24
raw_out_channels: 144
backbone.layer2.0.conv.1.bn:
out_channels: 144
raw_out_channels: 144
backbone.layer2.0.conv.1.conv:
in_channels: 144
out_channels: 144
raw_in_channels: 144
raw_out_channels: 144
backbone.layer2.0.conv.2.bn:
out_channels: 24
raw_out_channels: 40
backbone.layer2.0.conv.2.conv:
in_channels: 144
out_channels: 24
raw_in_channels: 144
raw_out_channels: 40
backbone.layer2.1.conv.0.bn:
out_channels: 176
raw_out_channels: 240
backbone.layer2.1.conv.0.conv:
in_channels: 24
out_channels: 176
raw_in_channels: 40
raw_out_channels: 240
backbone.layer2.1.conv.1.bn:
out_channels: 176
raw_out_channels: 240
backbone.layer2.1.conv.1.conv:
in_channels: 176
out_channels: 176
raw_in_channels: 240
raw_out_channels: 240
backbone.layer2.1.conv.2.bn:
out_channels: 24
raw_out_channels: 40
backbone.layer2.1.conv.2.conv:
in_channels: 176
out_channels: 24
raw_in_channels: 240
raw_out_channels: 40
backbone.layer3.0.conv.0.bn:
out_channels: 192
raw_out_channels: 240
backbone.layer3.0.conv.0.conv:
in_channels: 24
out_channels: 192
raw_in_channels: 40
raw_out_channels: 240
backbone.layer3.0.conv.1.bn:
out_channels: 192
raw_out_channels: 240
backbone.layer3.0.conv.1.conv:
in_channels: 192
out_channels: 192
raw_in_channels: 240
raw_out_channels: 240
backbone.layer3.0.conv.2.bn:
out_channels: 48
raw_out_channels: 48
backbone.layer3.0.conv.2.conv:
in_channels: 192
out_channels: 48
raw_in_channels: 240
raw_out_channels: 48
backbone.layer3.1.conv.0.bn:
out_channels: 240
raw_out_channels: 288
backbone.layer3.1.conv.0.conv:
in_channels: 48
out_channels: 240
raw_in_channels: 48
raw_out_channels: 288
backbone.layer3.1.conv.1.bn:
out_channels: 240
raw_out_channels: 288
backbone.layer3.1.conv.1.conv:
in_channels: 240
out_channels: 240
raw_in_channels: 288
raw_out_channels: 288
backbone.layer3.1.conv.2.bn:
out_channels: 48
raw_out_channels: 48
backbone.layer3.1.conv.2.conv:
in_channels: 240
out_channels: 48
raw_in_channels: 288
raw_out_channels: 48
backbone.layer3.2.conv.0.bn:
out_channels: 144
raw_out_channels: 288
backbone.layer3.2.conv.0.conv:
in_channels: 48
out_channels: 144
raw_in_channels: 48
raw_out_channels: 288
backbone.layer3.2.conv.1.bn:
out_channels: 144
raw_out_channels: 288
backbone.layer3.2.conv.1.conv:
in_channels: 144
out_channels: 144
raw_in_channels: 288
raw_out_channels: 288
backbone.layer3.2.conv.2.bn:
out_channels: 48
raw_out_channels: 48
backbone.layer3.2.conv.2.conv:
in_channels: 144
out_channels: 48
raw_in_channels: 288
raw_out_channels: 48
backbone.layer4.0.conv.0.bn:
out_channels: 264
raw_out_channels: 288
backbone.layer4.0.conv.0.conv:
in_channels: 48
out_channels: 264
raw_in_channels: 48
raw_out_channels: 288
backbone.layer4.0.conv.1.bn:
out_channels: 264
raw_out_channels: 288
backbone.layer4.0.conv.1.conv:
in_channels: 264
out_channels: 264
raw_in_channels: 288
raw_out_channels: 288
backbone.layer4.0.conv.2.bn:
out_channels: 88
raw_out_channels: 96
backbone.layer4.0.conv.2.conv:
in_channels: 264
out_channels: 88
raw_in_channels: 288
raw_out_channels: 96
backbone.layer4.1.conv.0.bn:
out_channels: 288
raw_out_channels: 576
backbone.layer4.1.conv.0.conv:
in_channels: 88
out_channels: 288
raw_in_channels: 96
raw_out_channels: 576
backbone.layer4.1.conv.1.bn:
out_channels: 288
raw_out_channels: 576
backbone.layer4.1.conv.1.conv:
in_channels: 288
out_channels: 288
raw_in_channels: 576
raw_out_channels: 576
backbone.layer4.1.conv.2.bn:
out_channels: 88
raw_out_channels: 96
backbone.layer4.1.conv.2.conv:
in_channels: 288
out_channels: 88
raw_in_channels: 576
raw_out_channels: 96
backbone.layer4.2.conv.0.bn:
out_channels: 336
raw_out_channels: 576
backbone.layer4.2.conv.0.conv:
in_channels: 88
out_channels: 336
raw_in_channels: 96
raw_out_channels: 576
backbone.layer4.2.conv.1.bn:
out_channels: 336
raw_out_channels: 576
backbone.layer4.2.conv.1.conv:
in_channels: 336
out_channels: 336
raw_in_channels: 576
raw_out_channels: 576
backbone.layer4.2.conv.2.bn:
out_channels: 88
raw_out_channels: 96
backbone.layer4.2.conv.2.conv:
in_channels: 336
out_channels: 88
raw_in_channels: 576
raw_out_channels: 96
backbone.layer4.3.conv.0.bn:
out_channels: 432
raw_out_channels: 576
backbone.layer4.3.conv.0.conv:
in_channels: 88
out_channels: 432
raw_in_channels: 96
raw_out_channels: 576
backbone.layer4.3.conv.1.bn:
out_channels: 432
raw_out_channels: 576
backbone.layer4.3.conv.1.conv:
in_channels: 432
out_channels: 432
raw_in_channels: 576
raw_out_channels: 576
backbone.layer4.3.conv.2.bn:
out_channels: 88
raw_out_channels: 96
backbone.layer4.3.conv.2.conv:
in_channels: 432
out_channels: 88
raw_in_channels: 576
raw_out_channels: 96
backbone.layer5.0.conv.0.bn:
out_channels: 576
raw_out_channels: 576
backbone.layer5.0.conv.0.conv:
in_channels: 88
out_channels: 576
raw_in_channels: 96
raw_out_channels: 576
backbone.layer5.0.conv.1.bn:
out_channels: 576
raw_out_channels: 576
backbone.layer5.0.conv.1.conv:
in_channels: 576
out_channels: 576
raw_in_channels: 576
raw_out_channels: 576
backbone.layer5.0.conv.2.bn:
out_channels: 144
raw_out_channels: 144
backbone.layer5.0.conv.2.conv:
in_channels: 576
out_channels: 144
raw_in_channels: 576
raw_out_channels: 144
backbone.layer5.1.conv.0.bn:
out_channels: 576
raw_out_channels: 864
backbone.layer5.1.conv.0.conv:
in_channels: 144
out_channels: 576
raw_in_channels: 144
raw_out_channels: 864
backbone.layer5.1.conv.1.bn:
out_channels: 576
raw_out_channels: 864
backbone.layer5.1.conv.1.conv:
in_channels: 576
out_channels: 576
raw_in_channels: 864
raw_out_channels: 864
backbone.layer5.1.conv.2.bn:
out_channels: 144
raw_out_channels: 144
backbone.layer5.1.conv.2.conv:
in_channels: 576
out_channels: 144
raw_in_channels: 864
raw_out_channels: 144
backbone.layer5.2.conv.0.bn:
out_channels: 648
raw_out_channels: 864
backbone.layer5.2.conv.0.conv:
in_channels: 144
out_channels: 648
raw_in_channels: 144
raw_out_channels: 864
backbone.layer5.2.conv.1.bn:
out_channels: 648
raw_out_channels: 864
backbone.layer5.2.conv.1.conv:
in_channels: 648
out_channels: 648
raw_in_channels: 864
raw_out_channels: 864
backbone.layer5.2.conv.2.bn:
out_channels: 144
raw_out_channels: 144
backbone.layer5.2.conv.2.conv:
in_channels: 648
out_channels: 144
raw_in_channels: 864
raw_out_channels: 144
backbone.layer6.0.conv.0.bn:
out_channels: 864
raw_out_channels: 864
backbone.layer6.0.conv.0.conv:
in_channels: 144
out_channels: 864
raw_in_channels: 144
raw_out_channels: 864
backbone.layer6.0.conv.1.bn:
out_channels: 864
raw_out_channels: 864
backbone.layer6.0.conv.1.conv:
in_channels: 864
out_channels: 864
raw_in_channels: 864
raw_out_channels: 864
backbone.layer6.0.conv.2.bn:
out_channels: 240
raw_out_channels: 240
backbone.layer6.0.conv.2.conv:
in_channels: 864
out_channels: 240
raw_in_channels: 864
raw_out_channels: 240
backbone.layer6.1.conv.0.bn:
out_channels: 1440
raw_out_channels: 1440
backbone.layer6.1.conv.0.conv:
in_channels: 240
out_channels: 1440
raw_in_channels: 240
raw_out_channels: 1440
backbone.layer6.1.conv.1.bn:
out_channels: 1440
raw_out_channels: 1440
backbone.layer6.1.conv.1.conv:
in_channels: 1440
out_channels: 1440
raw_in_channels: 1440
raw_out_channels: 1440
backbone.layer6.1.conv.2.bn:
out_channels: 240
raw_out_channels: 240
backbone.layer6.1.conv.2.conv:
in_channels: 1440
out_channels: 240
raw_in_channels: 1440
raw_out_channels: 240
backbone.layer6.2.conv.0.bn:
out_channels: 1440
raw_out_channels: 1440
backbone.layer6.2.conv.0.conv:
in_channels: 240
out_channels: 1440
raw_in_channels: 240
raw_out_channels: 1440
backbone.layer6.2.conv.1.bn:
out_channels: 1440
raw_out_channels: 1440
backbone.layer6.2.conv.1.conv:
in_channels: 1440
out_channels: 1440
raw_in_channels: 1440
raw_out_channels: 1440
backbone.layer6.2.conv.2.bn:
out_channels: 240
raw_out_channels: 240
backbone.layer6.2.conv.2.conv:
in_channels: 1440
out_channels: 240
raw_in_channels: 1440
raw_out_channels: 240
backbone.layer7.0.conv.0.bn:
out_channels: 1440
raw_out_channels: 1440
backbone.layer7.0.conv.0.conv:
in_channels: 240
out_channels: 1440
raw_in_channels: 240
raw_out_channels: 1440
backbone.layer7.0.conv.1.bn:
out_channels: 1440
raw_out_channels: 1440
backbone.layer7.0.conv.1.conv:
in_channels: 1440
out_channels: 1440
raw_in_channels: 1440
raw_out_channels: 1440
backbone.layer7.0.conv.2.bn:
out_channels: 480
raw_out_channels: 480
backbone.layer7.0.conv.2.conv:
in_channels: 1440
out_channels: 480
raw_in_channels: 1440
raw_out_channels: 480
head.fc:
in_channels: 1920
out_channels: 1000
raw_in_channels: 1920
raw_out_channels: 1000

View File

@ -1,13 +0,0 @@
_base_ = [
'./autoslim_mbv2_supernet_8xb256_in1k.py',
]
algorithm = dict(distiller=None, input_shape=(3, 224, 224))
searcher = dict(
type='GreedySearcher',
target_flops=[500000000, 300000000, 200000000],
max_channel_bins=12,
metrics='accuracy')
data = dict(samples_per_gpu=1024, workers_per_gpu=4)

View File

@ -1,29 +0,0 @@
_base_ = [
'./autoslim_mbv2_supernet_8xb256_in1k.py',
]
model = dict(
head=dict(
loss=dict(
type='LabelSmoothLoss',
mode='original',
label_smooth_val=0.1,
loss_weight=1.0)))
# FIXME: you may replace this with the channel_cfg searched by yourself
channel_cfg = [
'https://download.openmmlab.com/mmrazor/v0.1/pruning/autoslim/autoslim_mbv2_subnet_8xb256_in1k/autoslim_mbv2_subnet_8xb256_in1k_flops-0.53M_acc-74.23_20211222-e5208bbd_channel_cfg.yaml', # noqa: E501
'https://download.openmmlab.com/mmrazor/v0.1/pruning/autoslim/autoslim_mbv2_subnet_8xb256_in1k/autoslim_mbv2_subnet_8xb256_in1k_flops-0.32M_acc-72.73_20211222-b5b0b33c_channel_cfg.yaml', # noqa: E501
'https://download.openmmlab.com/mmrazor/v0.1/pruning/autoslim/autoslim_mbv2_subnet_8xb256_in1k/autoslim_mbv2_subnet_8xb256_in1k_flops-0.22M_acc-71.39_20211222-43117c7b_channel_cfg.yaml' # noqa: E501
]
algorithm = dict(
architecture=dict(type='MMClsArchitecture', model=model),
distiller=None,
retraining=True,
bn_training_mode=False,
channel_cfg=channel_cfg)
runner = dict(type='EpochBasedRunner', max_epochs=300)
find_unused_parameters = True

View File

@ -1,51 +0,0 @@
_base_ = [
'../../_base_/datasets/mmcls/imagenet_bs256_autoslim.py',
'../../_base_/schedules/mmcls/imagenet_bs2048_autoslim.py',
'../../_base_/mmcls_runtime.py'
]
model = dict(
type='mmcls.ImageClassifier',
backbone=dict(type='MobileNetV2', widen_factor=1.5),
neck=dict(type='GlobalAveragePooling'),
head=dict(
type='LinearClsHead',
num_classes=1000,
in_channels=1920,
loss=dict(
type='LabelSmoothLoss',
mode='original',
label_smooth_val=0.1,
loss_weight=1.0),
topk=(1, 5),
))
algorithm = dict(
type='AutoSlim',
architecture=dict(type='MMClsArchitecture', model=model),
distiller=dict(
type='SelfDistiller',
components=[
dict(
student_module='head.fc',
teacher_module='head.fc',
losses=[
dict(
type='KLDivergence',
name='loss_kd',
tau=1,
loss_weight=1,
)
]),
]),
pruner=dict(
type='RatioPruner',
ratios=(2 / 12, 3 / 12, 4 / 12, 5 / 12, 6 / 12, 7 / 12, 8 / 12, 9 / 12,
10 / 12, 11 / 12, 1.0)),
retraining=False,
bn_training_mode=True,
input_shape=None)
runner = dict(type='EpochBasedRunner', max_epochs=50)
use_ddp_wrapper = True

View File

@ -1,216 +0,0 @@
# defaults to use registries in mmcls
default_scope = 'mmcls'
# !architecture config
# ==========================================================================
architecture = dict(
_scope_='mmcls',
type='ImageClassifier',
backbone=dict(type='MobileNetV2', widen_factor=1.5),
neck=dict(type='GlobalAveragePooling'),
head=dict(
type='LinearClsHead',
num_classes=1000,
in_channels=1920,
loss=dict(
type='LabelSmoothLoss',
mode='original',
label_smooth_val=0.1,
loss_weight=1.0),
topk=(1, 5),
))
# ==========================================================================
# !dataset config
# ==========================================================================
# data preprocessor
data_preprocessor = dict(
type='ImgDataPreprocessor',
# RGB format normalization parameters
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
# convert image from BGR to RGB
bgr_to_rgb=True,
)
dataset_type = 'ImageNet'
# ceph config
use_ceph = True
ceph_file_client_args = dict(
backend='petrel',
path_mapping=dict({
'./data/imagenet':
'sproject:s3://openmmlab/datasets/classification/imagenet',
'data/imagenet':
'sproject:s3://openmmlab/datasets/classification/imagenet'
}))
disk_file_client_args = dict(backend='disk')
if use_ceph:
file_client_args = ceph_file_client_args
else:
file_client_args = disk_file_client_args
train_pipeline = [
dict(type='LoadImageFromFile', file_client_args=file_client_args),
dict(
type='RandomResizedCrop',
scale=224,
crop_ratio_range=(0.25, 1.0),
backend='pillow'),
dict(type='ColorJitter', brightness=0.4, contrast=0.4, saturation=0.4),
dict(type='RandomFlip', prob=0.5, direction='horizontal'),
dict(type='PackClsInputs'),
]
test_pipeline = [
dict(type='LoadImageFromFile', file_client_args=file_client_args),
dict(type='ResizeEdge', scale=256, edge='short', backend='pillow'),
dict(type='CenterCrop', crop_size=224),
dict(type='PackClsInputs'),
]
_batch_size_per_gpu = 256
train_dataloader = dict(
batch_size=_batch_size_per_gpu,
num_workers=16,
dataset=dict(
type=dataset_type,
data_root='data/imagenet',
ann_file='meta/train.txt',
data_prefix='train',
pipeline=train_pipeline),
sampler=dict(type='DefaultSampler', shuffle=True),
persistent_workers=True,
)
val_dataloader = dict(
batch_size=_batch_size_per_gpu,
num_workers=16,
dataset=dict(
type=dataset_type,
data_root='data/imagenet',
ann_file='meta/val.txt',
data_prefix='val',
pipeline=test_pipeline),
sampler=dict(type='DefaultSampler', shuffle=False),
persistent_workers=True,
)
val_evaluator = dict(type='Accuracy', topk=(1, 5))
# If you want standard test, please manually configure the test dataset
test_dataloader = val_dataloader
test_evaluator = val_evaluator
# !runtime config
# ==========================================================================
# configure log processor
log_processor = dict(window_size=100)
# configure default hooks
default_hooks = dict(
# record the time of every iteration.
timer=dict(type='IterTimerHook'),
# print log every 100 iterations.
logger=dict(type='LoggerHook', interval=100),
# enable the parameter scheduler.
param_scheduler=dict(type='ParamSchedulerHook'),
# save checkpoint per epoch.
checkpoint=dict(
type='CheckpointHook', max_keep_ckpts=50, save_best='auto',
interval=1),
# set sampler seed in distributed evrionment.
sampler_seed=dict(type='DistSamplerSeedHook'),
# validation results visualization, set True to enable it.
visualization=dict(type='VisualizationHook', enable=False),
)
# configure environment
env_cfg = dict(
# whether to enable cudnn benchmark
cudnn_benchmark=False,
# set multi process parameters
mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0),
# set distributed parameters
dist_cfg=dict(backend='nccl'),
)
# set visualizer
vis_backends = [dict(type='LocalVisBackend')]
visualizer = dict(type='ClsVisualizer', vis_backends=vis_backends)
# set log level
log_level = 'INFO'
# load from which checkpoint
load_from = None
# whether to resume training from the loaded checkpoint
resume = False
# ==========================================================================
# !autoslim algorithm config
# ==========================================================================
channel_cfg_paths = [
'tests/data/MBV2_220M.yaml', 'tests/data/MBV2_320M.yaml',
'tests/data/MBV2_530M.yaml'
]
model = dict(
_scope_='mmrazor',
type='SlimmableNetwork',
architecture=architecture,
data_preprocessor=data_preprocessor,
channel_cfg_paths=channel_cfg_paths,
mutator=dict(
type='SlimmableChannelMutator',
mutable_cfg=dict(type='SlimmableMutableChannel'),
tracer_cfg=dict(
type='BackwardTracer',
loss_calculator=dict(type='ImageClassifierPseudoLoss'))))
# ==========================================================================
# !model wrapper config
# ==========================================================================
model_wrapper_cfg = dict(
type='mmrazor.SlimmableNetworkDDP',
broadcast_buffers=False,
find_unused_parameters=True)
# ==========================================================================
# !scheduler config
# ==========================================================================
paramwise_cfg = dict(
bias_decay_mult=0.0, norm_decay_mult=0.0, dwconv_decay_mult=0.0)
optimizer = dict(
type='SGD', lr=0.5, momentum=0.9, nesterov=True, weight_decay=0.0001)
optim_wrapper = dict(
optimizer=optimizer,
paramwise_cfg=paramwise_cfg,
accumulative_counts=len(channel_cfg_paths))
# learning policy
max_epochs = 300
param_scheduler = dict(
type='PolyLR',
power=1.0,
eta_min=0.0,
by_epoch=True,
end=max_epochs,
convert_to_iter_based=True)
# train, val, test setting
train_cfg = dict(by_epoch=True, max_epochs=max_epochs, val_interval=1)
val_cfg = dict(type='mmrazor.SlimmableValLoop')
test_cfg = dict()
# ==========================================================================

View File

@ -1,228 +0,0 @@
# defaults to use registries in mmcls
default_scope = 'mmcls'
# !architecture config
# ==========================================================================
architecture = dict(
_scope_='mmcls',
type='ImageClassifier',
backbone=dict(type='MobileNetV2', widen_factor=1.5),
neck=dict(type='GlobalAveragePooling'),
head=dict(
type='LinearClsHead',
num_classes=1000,
in_channels=1920,
loss=dict(
type='LabelSmoothLoss',
mode='original',
label_smooth_val=0.1,
loss_weight=1.0),
topk=(1, 5),
))
# ==========================================================================
# !dataset config
# ==========================================================================
# data preprocessor
data_preprocessor = dict(
type='ImgDataPreprocessor',
# RGB format normalization parameters
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
# convert image from BGR to RGB
bgr_to_rgb=True,
)
dataset_type = 'ImageNet'
# ceph config
use_ceph = True
ceph_file_client_args = dict(
backend='petrel',
path_mapping=dict({
'./data/imagenet':
'sproject:s3://openmmlab/datasets/classification/imagenet',
'data/imagenet':
'sproject:s3://openmmlab/datasets/classification/imagenet'
}))
disk_file_client_args = dict(backend='disk')
if use_ceph:
file_client_args = ceph_file_client_args
else:
file_client_args = disk_file_client_args
train_pipeline = [
dict(type='LoadImageFromFile', file_client_args=file_client_args),
dict(
type='RandomResizedCrop',
scale=224,
crop_ratio_range=(0.25, 1.0),
backend='pillow'),
dict(type='ColorJitter', brightness=0.4, contrast=0.4, saturation=0.4),
dict(type='RandomFlip', prob=0.5, direction='horizontal'),
dict(type='PackClsInputs'),
]
test_pipeline = [
dict(type='LoadImageFromFile', file_client_args=file_client_args),
dict(type='ResizeEdge', scale=256, edge='short', backend='pillow'),
dict(type='CenterCrop', crop_size=224),
dict(type='PackClsInputs'),
]
_batch_size_per_gpu = 256
train_dataloader = dict(
batch_size=_batch_size_per_gpu,
num_workers=16,
dataset=dict(
type=dataset_type,
data_root='data/imagenet',
ann_file='meta/train.txt',
data_prefix='train',
pipeline=train_pipeline),
sampler=dict(type='DefaultSampler', shuffle=True),
persistent_workers=True,
)
val_dataloader = dict(
batch_size=_batch_size_per_gpu,
num_workers=16,
dataset=dict(
type=dataset_type,
data_root='data/imagenet',
ann_file='meta/val.txt',
data_prefix='val',
pipeline=test_pipeline),
sampler=dict(type='DefaultSampler', shuffle=False),
persistent_workers=True,
)
val_evaluator = dict(type='Accuracy', topk=(1, 5))
# If you want standard test, please manually configure the test dataset
test_dataloader = val_dataloader
test_evaluator = val_evaluator
# !runtime config
# ==========================================================================
# configure log processor
log_processor = dict(window_size=100)
# configure default hooks
default_hooks = dict(
# record the time of every iteration.
timer=dict(type='IterTimerHook'),
# print log every 100 iterations.
logger=dict(type='LoggerHook', interval=100),
# enable the parameter scheduler.
param_scheduler=dict(type='ParamSchedulerHook'),
# save checkpoint per epoch.
checkpoint=dict(
type='CheckpointHook', max_keep_ckpts=50, save_best='auto',
interval=1),
# set sampler seed in distributed evrionment.
sampler_seed=dict(type='DistSamplerSeedHook'),
# validation results visualization, set True to enable it.
visualization=dict(type='VisualizationHook', enable=False),
)
# configure environment
env_cfg = dict(
# whether to enable cudnn benchmark
cudnn_benchmark=False,
# set multi process parameters
mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0),
# set distributed parameters
dist_cfg=dict(backend='nccl'),
)
# set visualizer
vis_backends = [dict(type='LocalVisBackend')]
visualizer = dict(type='ClsVisualizer', vis_backends=vis_backends)
# set log level
log_level = 'INFO'
# load from which checkpoint
load_from = None
# whether to resume training from the loaded checkpoint
resume = False
# ==========================================================================
# !autoslim algorithm config
# ==========================================================================
num_samples = 2
model = dict(
_scope_='mmrazor',
type='AutoSlim',
num_samples=num_samples,
architecture=architecture,
data_preprocessor=data_preprocessor,
distiller=dict(
type='ConfigurableDistiller',
teacher_recorders=dict(
fc=dict(type='ModuleOutputs', source='head.fc')),
student_recorders=dict(
fc=dict(type='ModuleOutputs', source='head.fc')),
distill_losses=dict(
loss_kl=dict(type='KLDivergence', tau=1, loss_weight=1)),
loss_forward_mappings=dict(
loss_kl=dict(
preds_S=dict(recorder='fc', from_student=True),
preds_T=dict(recorder='fc', from_student=False)))),
mutator=dict(
type='OneShotChannelMutator',
mutable_cfg=dict(
type='OneShotMutableChannel',
candidate_choices=list(i / 12 for i in range(2, 13)),
candidate_mode='ratio'),
tracer_cfg=dict(
type='BackwardTracer',
loss_calculator=dict(type='ImageClassifierPseudoLoss'))))
# ==========================================================================
# !model wrapper config
# ==========================================================================
model_wrapper_cfg = dict(
type='mmrazor.AutoSlimDDP',
broadcast_buffers=False,
find_unused_parameters=False)
# ==========================================================================
# !scheduler config
# ==========================================================================
paramwise_cfg = dict(
bias_decay_mult=0.0, norm_decay_mult=0.0, dwconv_decay_mult=0.0)
optimizer = dict(
type='SGD', lr=0.5, momentum=0.9, nesterov=True, weight_decay=0.0001)
optim_wrapper = dict(
optimizer=optimizer,
paramwise_cfg=paramwise_cfg,
accumulative_counts=num_samples + 2)
# learning policy
max_epochs = 50
param_scheduler = dict(
type='PolyLR',
power=1.0,
eta_min=0.0,
by_epoch=True,
end=max_epochs,
convert_to_iter_based=True)
# train, val, test setting
train_cfg = dict(by_epoch=True, max_epochs=max_epochs, val_interval=1)
val_cfg = dict(type='mmrazor.AutoSlimValLoop')
test_cfg = dict()
# ==========================================================================

View File

@ -0,0 +1,50 @@
_base_ = [
'mmrazor::_base_/settings/imagenet_bs2048_autoslim.py',
'mmcls::_base_/models/mobilenet_v2_1x.py',
'mmcls::_base_/default_runtime.py',
]
supernet = _base_.model
supernet.backbone.widen_factor = 1.5
supernet.head.in_channels = 1920
# !dataset config
# ==========================================================================
# data preprocessor
data_preprocessor = dict(
type='ImgDataPreprocessor',
# RGB format normalization parameters
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
# convert image from BGR to RGB
bgr_to_rgb=True)
# !autoslim algorithm config
# ==========================================================================
channel_cfg_paths = [
'tests/data/MBV2_220M.yaml', 'tests/data/MBV2_320M.yaml',
'tests/data/MBV2_530M.yaml'
]
model = dict(
_delete_=True,
_scope_='mmrazor',
type='SlimmableNetwork',
architecture=supernet,
data_preprocessor=data_preprocessor,
channel_cfg_paths=channel_cfg_paths,
mutator=dict(
type='SlimmableChannelMutator',
mutable_cfg=dict(type='SlimmableMutableChannel'),
tracer_cfg=dict(
type='BackwardTracer',
loss_calculator=dict(type='ImageClassifierPseudoLoss'))))
model_wrapper_cfg = dict(
type='mmrazor.SlimmableNetworkDDP',
broadcast_buffers=False,
find_unused_parameters=True)
optim_wrapper = dict(accumulative_counts=3)
val_cfg = dict(type='mmrazor.SlimmableValLoop')

View File

@ -1,3 +1,3 @@
_base_ = 'autoslim_slimmable.py'
_base_ = 'autoslim_mbv2_1.5x_supernet_8xb256_in1k.py'
model = dict(channel_cfg_paths='tests/data/MBV2_530M.yaml')

View File

@ -0,0 +1,66 @@
_base_ = [
'mmrazor::_base_/settings/imagenet_bs2048_autoslim.py',
'mmcls::_base_/models/mobilenet_v2_1x.py',
'mmcls::_base_/default_runtime.py',
]
supernet = _base_.model
supernet.backbone.widen_factor = 1.5
supernet.head.in_channels = 1920
# !dataset config
# ==========================================================================
# data preprocessor
data_preprocessor = dict(
type='ImgDataPreprocessor',
# RGB format normalization parameters
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
# convert image from BGR to RGB
bgr_to_rgb=True,
)
# !autoslim algorithm config
num_samples = 2
model = dict(
_scope_='mmrazor',
type='AutoSlim',
num_samples=num_samples,
architecture=supernet,
data_preprocessor=data_preprocessor,
distiller=dict(
type='ConfigurableDistiller',
teacher_recorders=dict(
fc=dict(type='ModuleOutputs', source='head.fc')),
student_recorders=dict(
fc=dict(type='ModuleOutputs', source='head.fc')),
distill_losses=dict(
loss_kl=dict(type='KLDivergence', tau=1, loss_weight=1)),
loss_forward_mappings=dict(
loss_kl=dict(
preds_S=dict(recorder='fc', from_student=True),
preds_T=dict(recorder='fc', from_student=False)))),
mutator=dict(
type='OneShotChannelMutator',
mutable_cfg=dict(
type='OneShotMutableChannel',
candidate_choices=list(i / 12 for i in range(2, 13)),
candidate_mode='ratio'),
tracer_cfg=dict(
type='BackwardTracer',
loss_calculator=dict(type='ImageClassifierPseudoLoss'))))
model_wrapper_cfg = dict(
type='mmrazor.AutoSlimDDP',
broadcast_buffers=False,
find_unused_parameters=False)
optim_wrapper = dict(accumulative_counts=num_samples + 2)
# learning policy
max_epochs = 50
param_scheduler = dict(end=max_epochs)
# train, val, test setting
train_cfg = dict(max_epochs=max_epochs)
val_cfg = dict(type='mmrazor.AutoSlimValLoop')

View File

@ -1,8 +1,9 @@
Import:
- configs/distill/cwd/metafile.yml
- configs/distill/wsld/metafile.yml
- configs/distill/rkd/metafile.yml
- configs/nas/darts/metafile.yml
- configs/nas/detnas/metafile.yml
- configs/nas/spos/metafile.yml
- configs/pruning/autoslim/metafile.yml
- configs/distill/mmseg/cwd/metafile.yml
- configs/distill/mmdet/cwd/metafile.yml
- configs/distill/mmcls/wsld/metafile.yml
- configs/distill/mmcls/rkd/metafile.yml
# - configs/nas/darts/metafile.yml
- configs/nas/mmdet/detnas/metafile.yml
- configs/nas/mmcls/spos/metafile.yml
# - configs/pruning/autoslim/metafile.yml

View File

@ -0,0 +1,53 @@
# Copyright (c) OpenMMLab. All rights reserved.
import os
from pathlib import Path
import requests
import yaml
MMRAZOR_ROOT = Path(__file__).absolute().parents[1]
class TestMetafiles:
def get_metafiles(self, code_path):
"""
Function: get the metafile of all configs from model-index.yml
"""
metafile = os.path.join(code_path, 'model-index.yml')
with open(metafile, 'r') as f:
meta = yaml.safe_load(f)
return meta['Import']
def test_metafiles(self):
metafiles = self.get_metafiles(MMRAZOR_ROOT)
for mf in metafiles:
metafile = os.path.abspath(os.path.join(MMRAZOR_ROOT, mf))
with open(metafile, 'r') as f:
meta = yaml.safe_load(f)
for model in meta['Models']:
# 1. weights url check
r = requests.head(model['Weights'], timeout=4)
assert r.status_code != 404, \
f"can't connect url {model['Weights']} in " \
f'metafile {metafile}'
# 2. config check
dir_path = os.path.abspath(os.path.join(metafile, '../'))
# list all files which are in the same directory of
# current metafile
config_files = os.listdir(dir_path)
if isinstance(model['Config'], list):
# TODO: 3. log error
continue
assert (model['Config'].split('/')[-1] in config_files), \
f"config error in {metafile} model {model['Name']}"
# 4. name check
# erase '.py'
correct_name = model['Config'].split('/')[-1][:-3]
assert model['Name'] == correct_name, \
f'name error in {metafile}, correct name should ' \
f'be {correct_name}'

View File

@ -0,0 +1,47 @@
# Copyright (c) OpenMMLab. All rights reserved.
import argparse
from pathlib import Path
import torch
def parse_args():
parser = argparse.ArgumentParser(
description='Process a checkpoint to be published')
parser.add_argument('checkpoint', help='input checkpoint filename')
parser.add_argument(
'--inplace', action='store_true', help='replace origin ckpt')
args = parser.parse_args()
return args
def main():
args = parse_args()
checkpoint = torch.load(args.checkpoint, map_location='cpu')
new_state_dict = dict()
for key, value in checkpoint['state_dict'].items():
if key.startswith('architecture.model.distiller.teacher'):
new_key = key.replace('architecture.model.distiller.teacher',
'architecture.teacher')
elif key.startswith('architecture.model'):
new_key = key.replace('architecture.model', 'architecture')
else:
new_key = key
new_state_dict[new_key] = value
checkpoint['state_dict'] = new_state_dict
if args.inplace:
torch.save(checkpoint, args.checkpoint)
else:
ckpt_path = Path(args.checkpoint)
ckpt_name = ckpt_path.stem
ckpt_dir = ckpt_path.parent
new_ckpt_path = ckpt_dir / f'{ckpt_name}_latest.pth'
torch.save(checkpoint, new_ckpt_path)
if __name__ == '__main__':
main()

View File

@ -45,7 +45,8 @@ def main():
cfg.launcher = args.launcher
if args.cfg_options is not None:
cfg.merge_from_dict(args.cfg_options)
import pdb
pdb.set_trace()
# work_dir is determined in this priority: CLI > segment in file > filename
if args.work_dir is not None:
# update configs according to CLI args if args.work_dir is not None