Align SPOS and DetNAS to MMRazor2.0

pull/198/head
PJDong 2022-07-13 08:35:31 +00:00 committed by pppppM
parent 2d5e8bc675
commit 6c920c88ee
68 changed files with 3108 additions and 863 deletions

12
bash_anglenas.sh 100644
View File

@ -0,0 +1,12 @@
#!/usr/bin/env sh
MKL_NUM_THREADS=4
OMP_NUM_THREADS=1
# bash tools/slurm_train.sh mm_model detnas_train configs/nas/detnas/detnas_supernet_shufflenetv2_coco_1x_2.0_frcnn.py /mnt/lustre/dongpeijie/checkpoints/tests/detnas_pretrain_test
bash tools/slurm_test.sh mm_model angle_test configs/nas/spos/spos_subnet_mobilenet_proxyless_gpu_8xb128_in1k_2.0.py /mnt/lustre/dongpeijie/spos_angelnas_flops_0.49G_acc_75.98_20220307-54f4698f_2.0.pth

View File

@ -0,0 +1,56 @@
#!/usr/bin/env sh
MKL_NUM_THREADS=4
OMP_NUM_THREADS=1
# train
# srun --partition=mm_model \
# --job-name=spos_train \
# --gres=gpu:8 \
# --ntasks=8 \
# --ntasks-per-node=8 \
# --cpus-per-task=8 \
# --kill-on-bad-exit=1 \
# python tools/train.py configs/nas/spos/spos_supernet_shufflenetv2_8xb128_in1k_2.0_example.py
# bash tools/slurm_train.sh mm_model spos_train configs/nas/spos/spos_supernet_shufflenetv2_8xb128_in1k_2.0_example.py ./work_dir/spos
# SPOS test
# srun --partition=mm_model \
# --job-name=spos_test \
# --gres=gpu:1 \
# --ntasks=1 \
# --ntasks-per-node=1 \
# --cpus-per-task=8 \
# --kill-on-bad-exit=1 \
# python tools/test.py configs/nas/spos/spos_subnet_shufflenetv2_8xb128_in1k_2.0_example.py "/mnt/lustre/dongpeijie/spos_shufflenetv2_subnet_8xb128_in1k_flops_0.33M_acc_73.87_20211222-1f0a0b4d_2.0.pth"
# DetNAS train
# srun --partition=mm_model \
# --job-name=detnas_train \
# --gres=gpu:8 \
# --ntasks=8 \
# --ntasks-per-node=8 \
# --cpus-per-task=8 \
# --kill-on-bad-exit=1 \
# python tools/train.py configs/nas/detnas/detnas_supernet_shufflenetv2_coco_1x_2.0_frcnn.py
# bash tools/slurm_train.sh mm_model detnas_train configs/nas/detnas/detnas_supernet_shufflenetv2_coco_1x_2.0_frcnn.py ./work_dir/detnas_pretrain
# DetNAS test
# srun --partition=mm_model \
# --job-name=detnas_test \
# --gres=gpu:1 \
# --ntasks=1 \
# --ntasks-per-node=1 \
# --cpus-per-task=8 \
# --kill-on-bad-exit=1 \
# python tools/test.py configs/nas/detnas/detnas_subnet_shufflenetv2_8xb128_in1k_2.0_frcnn.py "/mnt/lustre/dongpeijie/detnas_subnet_frcnn_shufflenetv2_fpn_1x_coco_bbox_backbone_flops-0.34M_mAP-37.5_20211222-67fea61f_2.0.pth"
# CREAM Test
# bash tools/slurm_test.sh mm_model cream_test configs/nas/cream/cream_14_subnet_mobilenet.py '/mnt/lustre/dongpeijie/14_2.0.pth'
# CREAM Train
bash tools/slurm_train.sh mm_model cream_train configs/nas/cream/cream_14_subnet_mobilenet.py

View File

@ -0,0 +1,7 @@
#!/usr/bin/env sh
MKL_NUM_THREADS=4
OMP_NUM_THREADS=1
bash tools/slurm_test.sh mm_model spos_test configs/nas/darts/darts_subnet_1xb96_cifar10_2.0.py '/mnt/lustre/dongpeijie/darts_subnetnet_1xb96_cifar10_acc-97.32_20211222-e5727921_2.0.pth'

View File

@ -0,0 +1,31 @@
#!/usr/bin/env sh
MKL_NUM_THREADS=4
OMP_NUM_THREADS=1
# DetNAS train
# srun --partition=mm_model \
# --job-name=detnas_train \
# --gres=gpu:8 \
# --ntasks=8 \
# --ntasks-per-node=8 \
# --cpus-per-task=8 \
# --kill-on-bad-exit=1 \
# python tools/train.py configs/nas/detnas/detnas_supernet_shufflenetv2_coco_1x_2.0_frcnn.py
# bash tools/slurm_train.sh mm_model detnas_train configs/nas/detnas/detnas_supernet_shufflenetv2_coco_1x_2.0_frcnn.py /mnt/lustre/dongpeijie/checkpoints/tests/detnas_pretrain_test
# bash tools/slurm_test.sh mm_model detnas_test configs/nas/detnas/detnas_supernet_shufflenetv2_coco_1x_2.0_frcnn.py /mnt/lustre/dongpeijie/detnas_subnet_frcnn_shufflenetv2_fpn_1x_coco_bbox_backbone_flops-0.34M_mAP-37.5_20211222-67fea61f_2.0.pth
# DetNAS test
srun --partition=mm_model \
--job-name=detnas_test \
--gres=gpu:1 \
--ntasks=1 \
--ntasks-per-node=1 \
--cpus-per-task=8 \
--kill-on-bad-exit=1 \
--quotatype=auto \
python tools/test.py configs/nas/detnas/detnas_subnet_shufflenetv2_8xb128_in1k_2.0_frcnn.py "/mnt/lustre/dongpeijie/detnas_subnet_frcnn_shufflenetv2_fpn_1x_coco_bbox_backbone_flops-0.34M_mAP-37.5_20211222-67fea61f_2.0.pth" --launcher=slurm

51
bash_spos_train.sh 100644
View File

@ -0,0 +1,51 @@
#!/usr/bin/env sh
MKL_NUM_THREADS=4
OMP_NUM_THREADS=1
# train
# srun --partition=mm_model \
# --job-name=spos_train \
# --gres=gpu:8 \
# --ntasks=8 \
# --ntasks-per-node=8 \
# --cpus-per-task=8 \
# --kill-on-bad-exit=1 \
# python tools/train.py configs/nas/spos/spos_supernet_shufflenetv2_8xb128_in1k_2.0_example.py
# bash tools/slurm_train.sh mm_model spos_train configs/nas/spos/spos_supernet_shufflenetv2_8xb128_in1k_2.0_example.py /mnt/lustre/dongpeijie/checkpoints/work_dirs/spos_format_output
# bash tools/slurm_train.sh mm_model spos_retrain configs/nas/spos/spos_subnet_shufflenetv2_8xb128_in1k_2.0_example.py /mnt/lustre/dongpeijie/checkpoints/work_dirs/spos_retrain_detnas_with_ceph
# 55% wrong settings of PolyLR
# bash tools/slurm_train.sh mm_model spos_retrain_w_cj configs/nas/spos/spos_subnet_shufflenetv2_8xb128_in1k_2.0_example.py /mnt/lustre/dongpeijie/checkpoints/work_dirs/spos_retrain_detnas_with_ceph
# fix setting of PolyLR and rerun with colorjittor
# bash tools/slurm_train.sh mm_model spos_retrain_w_cj configs/nas/spos/spos_subnet_shufflenetv2_8xb128_in1k_2.0_example.py /mnt/lustre/dongpeijie/checkpoints/work_dirs/retrain_detnas_spos_with_colorjittor
# fix setting of PolyLR and rerun w/o colorjittor
# bash tools/slurm_train.sh mm_model spos_retrain_wo_cj configs/nas/spos/spos_subnet_shufflenetv2_8xb128_in1k_2.0_example_wo_colorjittor.py /mnt/lustre/dongpeijie/checkpoints/work_dirs/retrain_detnas_spos_wo_colorjittor
# fix setting of optimizer decay[wo cj] (paramwise_cfg)
# bash tools/slurm_train.sh mm_model spos_retrain_fix_decay_wo_cj configs/nas/spos/spos_subnet_shufflenetv2_8xb128_in1k_2.0_example_wo_colorjittor.py /mnt/lustre/dongpeijie/checkpoints/work_dirs/retrain_detnas_spos_retrain_fix_decay_wo_cj
# fix setting of optimizer decay[with cj] (paramwise_cfg)
# bash tools/slurm_train.sh mm_model spos_retrain_fix_decay_w_cj configs/nas/spos/spos_subnet_shufflenetv2_8xb128_in1k_2.0_example.py /mnt/lustre/dongpeijie/checkpoints/work_dirs/retrain_detnas_spos_retrain_fix_decay_w_cj
# SPOS test
# srun --partition=mm_model \
# --job-name=spos_test \
# --gres=gpu:1 \
# --ntasks=1 \
# --ntasks-per-node=1 \
# --cpus-per-task=8 \
# --kill-on-bad-exit=1 \
# python tools/test.py configs/nas/spos/spos_subnet_shufflenetv2_8xb128_in1k_2.0_example.py "/mnt/lustre/dongpeijie/spos_shufflenetv2_subnet_8xb128_in1k_flops_0.33M_acc_73.87_20211222-1f0a0b4d_2.0.pth"
bash tools/slurm_test.sh mm_model spos_test configs/nas/spos/spos_subnet_shufflenetv2_8xb128_in1k_2.0_example.py '/mnt/lustre/dongpeijie/detnas_subnet_shufflenetv2_8xb128_in1k_acc-74.08_20211223-92e9b66a_2.0.pth'
# bash tools/slurm_train.sh mm_model spos_retrain configs/nas/spos/spos_subnet_shufflenetv2_8xb128_in1k_2.0_example.py /mnt/lustre/dongpeijie/checkpoints/work_dirs/spos_retrain_detnas_spos

View File

@ -0,0 +1,76 @@
se_cfg = dict(
ratio=4,
divisor=1,
act_cfg=(dict(type='HSwish'),
dict(
type='HSigmoid', bias=3, divisor=6, min_value=0,
max_value=1)))
_FIRST_STAGE_MUTABLE = dict(
type='OneShotMutableOP',
candidates=dict(
mb_k3e4_se=dict(
type='MBBlock',
kernel_size=3,
expand_ratio=4,
se_cfg=se_cfg,
norm_cfg=dict(type='BN'),
act_cfg=dict(type='HSwish'))))
_OTHER_STAGE_MUTABLE = dict(
type='OneShotMutableOP',
candidates=dict(
mb_k3e4_se=dict(
type='MBBlock',
kernel_size=3,
expand_ratio=4,
se_cfg=se_cfg,
norm_cfg=dict(type='BN'),
act_cfg=dict(type='HSwish')),
mb_k3e6_se=dict(
type='MBBlock',
kernel_size=3,
expand_ratio=6,
se_cfg=se_cfg,
norm_cfg=dict(type='BN'),
act_cfg=dict(type='HSwish')),
mb_k5e4_se=dict(
type='MBBlock',
kernel_size=5,
expand_ratio=4,
se_cfg=se_cfg,
norm_cfg=dict(type='BN'),
act_cfg=dict(type='HSwish')),
mb_k5e6_se=dict(
type='MBBlock',
kernel_size=5,
expand_ratio=6,
se_cfg=se_cfg,
norm_cfg=dict(type='BN'),
act_cfg=dict(type='HSwish')),
mb_k7e4_se=dict(
type='MBBlock',
kernel_size=7,
expand_ratio=4,
se_cfg=se_cfg,
norm_cfg=dict(type='BN'),
act_cfg=dict(type='HSwish')),
mb_k7e6_se=dict(
type='MBBlock',
kernel_size=7,
expand_ratio=6,
se_cfg=se_cfg,
norm_cfg=dict(type='BN'),
act_cfg=dict(type='HSwish'))))
arch_setting = [
# Parameters to build layers. 4 parameters are needed to construct a
# layer, from left to right: channel, num_blocks, stride, mutable cfg.
[16, 1, 1, _FIRST_STAGE_MUTABLE],
[24, 1, 2, _OTHER_STAGE_MUTABLE],
[40, 2, 2, _OTHER_STAGE_MUTABLE],
[80, 2, 2, _OTHER_STAGE_MUTABLE],
[96, 3, 1, _OTHER_STAGE_MUTABLE],
[192, 2, 2, _OTHER_STAGE_MUTABLE],
[320, 1, 1, _OTHER_STAGE_MUTABLE]
]

View File

@ -0,0 +1,76 @@
se_cfg = dict(
ratio=4,
divisor=1,
act_cfg=(dict(type='HSwish'),
dict(
type='HSigmoid', bias=3, divisor=6, min_value=0,
max_value=1)))
_FIRST_STAGE_MUTABLE = dict(
type='OneShotMutableOP',
candidates=dict(
mb_k3e4_se=dict(
type='MBBlock',
kernel_size=3,
expand_ratio=4,
se_cfg=se_cfg,
norm_cfg=dict(type='BN'),
act_cfg=dict(type='HSwish'))))
_OTHER_STAGE_MUTABLE = dict(
type='OneShotMutableOP',
candidates=dict(
mb_k3e4_se=dict(
type='MBBlock',
kernel_size=3,
expand_ratio=4,
se_cfg=se_cfg,
norm_cfg=dict(type='BN'),
act_cfg=dict(type='HSwish')),
mb_k3e6_se=dict(
type='MBBlock',
kernel_size=3,
expand_ratio=6,
se_cfg=se_cfg,
norm_cfg=dict(type='BN'),
act_cfg=dict(type='HSwish')),
mb_k5e4_se=dict(
type='MBBlock',
kernel_size=5,
expand_ratio=4,
se_cfg=se_cfg,
norm_cfg=dict(type='BN'),
act_cfg=dict(type='HSwish')),
mb_k5e6_se=dict(
type='MBBlock',
kernel_size=5,
expand_ratio=6,
se_cfg=se_cfg,
norm_cfg=dict(type='BN'),
act_cfg=dict(type='HSwish')),
mb_k7e4_se=dict(
type='MBBlock',
kernel_size=7,
expand_ratio=4,
se_cfg=se_cfg,
norm_cfg=dict(type='BN'),
act_cfg=dict(type='HSwish')),
mb_k7e6_se=dict(
type='MBBlock',
kernel_size=7,
expand_ratio=6,
se_cfg=se_cfg,
norm_cfg=dict(type='BN'),
act_cfg=dict(type='HSwish'))))
arch_setting = [
# Parameters to build layers. 4 parameters are needed to construct a
# layer, from left to right: channel, num_blocks, stride, mutable cfg.
[16, 1, 1, _FIRST_STAGE_MUTABLE],
[24, 1, 2, _OTHER_STAGE_MUTABLE],
[40, 2, 2, _OTHER_STAGE_MUTABLE],
[80, 2, 2, _OTHER_STAGE_MUTABLE],
[96, 1, 1, _OTHER_STAGE_MUTABLE],
[192, 1, 2, _OTHER_STAGE_MUTABLE],
[320, 1, 1, _OTHER_STAGE_MUTABLE]
]

View File

@ -0,0 +1,76 @@
se_cfg = dict(
ratio=4,
divisor=1,
act_cfg=(dict(type='HSwish'),
dict(
type='HSigmoid', bias=3, divisor=6, min_value=0,
max_value=1)))
_FIRST_STAGE_MUTABLE = dict(
type='OneShotMutableOP',
candidates=dict(
mb_k3e4_se=dict(
type='MBBlock',
kernel_size=3,
expand_ratio=4,
se_cfg=se_cfg,
norm_cfg=dict(type='BN'),
act_cfg=dict(type='HSwish'))))
_OTHER_STAGE_MUTABLE = dict(
type='OneShotMutableOP',
candidates=dict(
mb_k3e4_se=dict(
type='MBBlock',
kernel_size=3,
expand_ratio=4,
se_cfg=se_cfg,
norm_cfg=dict(type='BN'),
act_cfg=dict(type='HSwish')),
mb_k3e6_se=dict(
type='MBBlock',
kernel_size=3,
expand_ratio=6,
se_cfg=se_cfg,
norm_cfg=dict(type='BN'),
act_cfg=dict(type='HSwish')),
mb_k5e4_se=dict(
type='MBBlock',
kernel_size=5,
expand_ratio=4,
se_cfg=se_cfg,
norm_cfg=dict(type='BN'),
act_cfg=dict(type='HSwish')),
mb_k5e6_se=dict(
type='MBBlock',
kernel_size=5,
expand_ratio=6,
se_cfg=se_cfg,
norm_cfg=dict(type='BN'),
act_cfg=dict(type='HSwish')),
mb_k7e4_se=dict(
type='MBBlock',
kernel_size=7,
expand_ratio=4,
se_cfg=se_cfg,
norm_cfg=dict(type='BN'),
act_cfg=dict(type='HSwish')),
mb_k7e6_se=dict(
type='MBBlock',
kernel_size=7,
expand_ratio=6,
se_cfg=se_cfg,
norm_cfg=dict(type='BN'),
act_cfg=dict(type='HSwish'))))
arch_setting = [
# Parameters to build layers. 4 parameters are needed to construct a
# layer, from left to right: channel, num_blocks, stride, mutable cfg.
[16, 1, 1, _FIRST_STAGE_MUTABLE],
[24, 1, 2, _OTHER_STAGE_MUTABLE],
[40, 2, 2, _OTHER_STAGE_MUTABLE],
[80, 3, 2, _OTHER_STAGE_MUTABLE],
[96, 4, 1, _OTHER_STAGE_MUTABLE],
[192, 3, 2, _OTHER_STAGE_MUTABLE],
[320, 1, 1, _OTHER_STAGE_MUTABLE]
]

View File

@ -0,0 +1,76 @@
se_cfg = dict(
ratio=4,
divisor=1,
act_cfg=(dict(type='HSwish'),
dict(
type='HSigmoid', bias=3, divisor=6, min_value=0,
max_value=1)))
_FIRST_STAGE_MUTABLE = dict(
type='OneShotMutableOP',
candidates=dict(
mb_k3e4_se=dict(
type='MBBlock',
kernel_size=3,
expand_ratio=4,
se_cfg=se_cfg,
norm_cfg=dict(type='BN'),
act_cfg=dict(type='HSwish'))))
_OTHER_STAGE_MUTABLE = dict(
type='OneShotMutableOP',
candidates=dict(
mb_k3e4_se=dict(
type='MBBlock',
kernel_size=3,
expand_ratio=4,
se_cfg=se_cfg,
norm_cfg=dict(type='BN'),
act_cfg=dict(type='HSwish')),
mb_k3e6_se=dict(
type='MBBlock',
kernel_size=3,
expand_ratio=6,
se_cfg=se_cfg,
norm_cfg=dict(type='BN'),
act_cfg=dict(type='HSwish')),
mb_k5e4_se=dict(
type='MBBlock',
kernel_size=5,
expand_ratio=4,
se_cfg=se_cfg,
norm_cfg=dict(type='BN'),
act_cfg=dict(type='HSwish')),
mb_k5e6_se=dict(
type='MBBlock',
kernel_size=5,
expand_ratio=6,
se_cfg=se_cfg,
norm_cfg=dict(type='BN'),
act_cfg=dict(type='HSwish')),
mb_k7e4_se=dict(
type='MBBlock',
kernel_size=7,
expand_ratio=4,
se_cfg=se_cfg,
norm_cfg=dict(type='BN'),
act_cfg=dict(type='HSwish')),
mb_k7e6_se=dict(
type='MBBlock',
kernel_size=7,
expand_ratio=6,
se_cfg=se_cfg,
norm_cfg=dict(type='BN'),
act_cfg=dict(type='HSwish'))))
arch_setting = [
# Parameters to build layers. 4 parameters are needed to construct a
# layer, from left to right: channel, num_blocks, stride, mutable cfg.
[16, 1, 1, _FIRST_STAGE_MUTABLE],
[24, 1, 2, _OTHER_STAGE_MUTABLE],
[40, 2, 2, _OTHER_STAGE_MUTABLE],
[80, 2, 2, _OTHER_STAGE_MUTABLE],
[96, 3, 1, _OTHER_STAGE_MUTABLE],
[192, 2, 2, _OTHER_STAGE_MUTABLE],
[320, 1, 1, _OTHER_STAGE_MUTABLE]
]

View File

@ -0,0 +1,76 @@
se_cfg = dict(
ratio=4,
divisor=1,
act_cfg=(dict(type='HSwish'),
dict(
type='HSigmoid', bias=3, divisor=6, min_value=0,
max_value=1)))
_FIRST_STAGE_MUTABLE = dict(
type='OneShotMutableOP',
candidates=dict(
mb_k3e4_se=dict(
type='MBBlock',
kernel_size=3,
expand_ratio=4,
se_cfg=se_cfg,
norm_cfg=dict(type='BN'),
act_cfg=dict(type='HSwish'))))
_OTHER_STAGE_MUTABLE = dict(
type='OneShotMutableOP',
candidates=dict(
mb_k3e4_se=dict(
type='MBBlock',
kernel_size=3,
expand_ratio=4,
se_cfg=se_cfg,
norm_cfg=dict(type='BN'),
act_cfg=dict(type='HSwish')),
mb_k3e6_se=dict(
type='MBBlock',
kernel_size=3,
expand_ratio=6,
se_cfg=se_cfg,
norm_cfg=dict(type='BN'),
act_cfg=dict(type='HSwish')),
mb_k5e4_se=dict(
type='MBBlock',
kernel_size=5,
expand_ratio=4,
se_cfg=se_cfg,
norm_cfg=dict(type='BN'),
act_cfg=dict(type='HSwish')),
mb_k5e6_se=dict(
type='MBBlock',
kernel_size=5,
expand_ratio=6,
se_cfg=se_cfg,
norm_cfg=dict(type='BN'),
act_cfg=dict(type='HSwish')),
mb_k7e4_se=dict(
type='MBBlock',
kernel_size=7,
expand_ratio=4,
se_cfg=se_cfg,
norm_cfg=dict(type='BN'),
act_cfg=dict(type='HSwish')),
mb_k7e6_se=dict(
type='MBBlock',
kernel_size=7,
expand_ratio=6,
se_cfg=se_cfg,
norm_cfg=dict(type='BN'),
act_cfg=dict(type='HSwish'))))
arch_setting = [
# Parameters to build layers. 4 parameters are needed to construct a
# layer, from left to right: channel, num_blocks, stride, mutable cfg.
[16, 1, 1, _FIRST_STAGE_MUTABLE],
[24, 4, 2, _OTHER_STAGE_MUTABLE],
[40, 4, 2, _OTHER_STAGE_MUTABLE],
[80, 5, 2, _OTHER_STAGE_MUTABLE],
[96, 4, 1, _OTHER_STAGE_MUTABLE],
[192, 4, 2, _OTHER_STAGE_MUTABLE],
[320, 1, 1, _OTHER_STAGE_MUTABLE]
]

View File

@ -0,0 +1,76 @@
se_cfg = dict(
ratio=4,
divisor=1,
act_cfg=(dict(type='HSwish'),
dict(
type='HSigmoid', bias=3, divisor=6, min_value=0,
max_value=1)))
_FIRST_STAGE_MUTABLE = dict(
type='OneShotMutableOP',
candidates=dict(
mb_k3e4_se=dict(
type='MBBlock',
kernel_size=3,
expand_ratio=4,
se_cfg=se_cfg,
norm_cfg=dict(type='BN'),
act_cfg=dict(type='HSwish'))))
_OTHER_STAGE_MUTABLE = dict(
type='OneShotMutableOP',
candidates=dict(
mb_k3e4_se=dict(
type='MBBlock',
kernel_size=3,
expand_ratio=4,
se_cfg=se_cfg,
norm_cfg=dict(type='BN'),
act_cfg=dict(type='HSwish')),
mb_k3e6_se=dict(
type='MBBlock',
kernel_size=3,
expand_ratio=6,
se_cfg=se_cfg,
norm_cfg=dict(type='BN'),
act_cfg=dict(type='HSwish')),
mb_k5e4_se=dict(
type='MBBlock',
kernel_size=5,
expand_ratio=4,
se_cfg=se_cfg,
norm_cfg=dict(type='BN'),
act_cfg=dict(type='HSwish')),
mb_k5e6_se=dict(
type='MBBlock',
kernel_size=5,
expand_ratio=6,
se_cfg=se_cfg,
norm_cfg=dict(type='BN'),
act_cfg=dict(type='HSwish')),
mb_k7e4_se=dict(
type='MBBlock',
kernel_size=7,
expand_ratio=4,
se_cfg=se_cfg,
norm_cfg=dict(type='BN'),
act_cfg=dict(type='HSwish')),
mb_k7e6_se=dict(
type='MBBlock',
kernel_size=7,
expand_ratio=6,
se_cfg=se_cfg,
norm_cfg=dict(type='BN'),
act_cfg=dict(type='HSwish'))))
arch_setting = [
# Parameters to build layers. 4 parameters are needed to construct a
# layer, from left to right: channel, num_blocks, stride, mutable cfg.
[16, 1, 1, _FIRST_STAGE_MUTABLE],
[24, 5, 2, _OTHER_STAGE_MUTABLE],
[40, 5, 2, _OTHER_STAGE_MUTABLE],
[80, 5, 2, _OTHER_STAGE_MUTABLE],
[96, 6, 1, _OTHER_STAGE_MUTABLE],
[192, 6, 2, _OTHER_STAGE_MUTABLE],
[320, 1, 1, _OTHER_STAGE_MUTABLE]
]

View File

@ -0,0 +1,11 @@
modules:
backbone.layer1.0: depthsepconv
backbone.layer2.0: mb_k3e4_se
backbone.layer3.0: mb_k5e6_se
backbone.layer3.1: mb_k5e6_se
backbone.layer4.0: mb_k5e6_se
backbone.layer4.1: mb_k5e6_se
backbone.layer5.0: mb_k3e6_se
backbone.layer6.0: mb_k5e6_se
backbone.layer7.0: convbnact
channels:

View File

@ -0,0 +1,8 @@
_base_ = ['./cream_14_supernet_mobilenet.py']
# FIXME: you may replace this with the mutable_cfg searched by yourself
fix_subnet = 'configs/nas/cream/CREAM_14_MOBILENET_IN1k_2.0.yaml' # noqa: E501
model = dict(fix_subnet=fix_subnet)
find_unused_parameters = False

View File

@ -0,0 +1,241 @@
# dataset settings
dataset_type = 'ImageNet'
preprocess_cfg = dict(
# RGB format normalization parameters
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
# convert image from BGR to RGB
to_rgb=True,
)
# file_client_args = dict(
# backend='petrel',
# path_mapping=dict({
# './data/imagenet': 's3://openmmlab/datasets/classification/imagenet',
# 'data/imagenet': 's3://openmmlab/datasets/classification/imagenet'
# }))
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='RandomResizedCrop', scale=224),
dict(type='ColorJitter', brightness=0.4, contrast=0.4, saturation=0.4),
dict(type='RandomFlip', prob=0.5, direction='horizontal'),
dict(type='PackClsInputs'),
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(
type='ResizeEdge',
scale=73,
edge='short',
backend='pillow',
interpolation='bicubic'),
dict(type='CenterCrop', crop_size=64),
dict(type='PackClsInputs'),
]
train_dataloader = dict(
batch_size=128,
num_workers=5,
dataset=dict(
type=dataset_type,
data_root='/mnt/cache/share/images',
ann_file='meta/train.txt',
data_prefix='train',
pipeline=train_pipeline),
sampler=dict(type='DefaultSampler', shuffle=True),
persistent_workers=True,
)
# /mnt/lustre/share_data/wangjiaqi/data/imagenet',
val_dataloader = dict(
batch_size=128,
num_workers=5,
dataset=dict(
type=dataset_type,
data_root='/mnt/cache/share/images',
ann_file='meta/val.txt',
data_prefix='val',
pipeline=test_pipeline),
sampler=dict(type='DefaultSampler', shuffle=False),
persistent_workers=True,
)
val_evaluator = dict(type='Accuracy', topk=(1, 5))
# If you want standard test, please manually configure the test dataset
test_dataloader = val_dataloader
test_evaluator = val_evaluator
# scheduler
# optimizer
optim_wrapper = dict(
optimizer=dict(type='SGD', lr=0.5, momentum=0.9, weight_decay=4e-5),
clip_grad=None)
# leanring policy
param_scheduler = [
dict(type='PolyLR', power=1.0, eta_min=0.0, by_epoch=False),
]
# train, val, test setting
train_cfg = dict(by_epoch=False, max_iters=300000)
val_cfg = dict()
test_cfg = dict()
# runtime
# defaults to use registries in mmrazor
default_scope = 'mmcls'
# configure default hooks
default_hooks = dict(
timer=dict(type='IterTimerHook'),
logger=dict(type='LoggerHook', interval=100),
param_scheduler=dict(type='ParamSchedulerHook'),
checkpoint=dict(type='CheckpointHook', by_epoch=False, interval=1000),
sampler_seed=dict(type='DistSamplerSeedHook'),
visualization=dict(type='VisualizationHook', enable=False),
)
# configure environment
env_cfg = dict(
cudnn_benchmark=False,
mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0),
dist_cfg=dict(backend='nccl'),
)
# set visualizer
vis_backends = [dict(type='LocalVisBackend')]
visualizer = dict(
type='ClsVisualizer', vis_backends=vis_backends, name='visualizer')
# set log level
log_level = 'INFO'
# load from which checkpoint
load_from = None
# whether to resume training from the loaded checkpoint
resume = False
se_cfg = dict(
ratio=4,
divisor=8,
act_cfg=(dict(type='ReLU'),
dict(
type='HSigmoid', bias=3, divisor=6, min_value=0,
max_value=1)))
_FIRST_STAGE_MUTABLE = dict( # DepthwiseSep
type='OneShotMutableOP',
candidates=dict(
depthsepconv=dict(
type='DepthwiseSeparableConv',
dw_kernel_size=3,
se_cfg=se_cfg,
norm_cfg=dict(type='BN'),
act_cfg=dict(type='Swish'))))
_MIDDLE_STAGE_MUTABLE = dict(
type='OneShotMutableOP',
candidates=dict(
mb_k3e4_se=dict(
type='MBBlock',
kernel_size=3,
expand_ratio=4,
se_cfg=se_cfg,
norm_cfg=dict(type='BN'),
act_cfg=dict(type='Swish')),
mb_k3e6_se=dict(
type='MBBlock',
kernel_size=3,
expand_ratio=6,
se_cfg=se_cfg,
norm_cfg=dict(type='BN'),
act_cfg=dict(type='Swish')),
mb_k5e4_se=dict(
type='MBBlock',
kernel_size=5,
expand_ratio=4,
se_cfg=se_cfg,
norm_cfg=dict(type='BN'),
act_cfg=dict(type='Swish')),
mb_k5e6_se=dict(
type='MBBlock',
kernel_size=5,
expand_ratio=6,
se_cfg=se_cfg,
norm_cfg=dict(type='BN'),
act_cfg=dict(type='Swish')),
mb_k7e4_se=dict(
type='MBBlock',
kernel_size=7,
expand_ratio=4,
se_cfg=se_cfg,
norm_cfg=dict(type='BN'),
act_cfg=dict(type='Swish')),
mb_k7e6_se=dict(
type='MBBlock',
kernel_size=7,
expand_ratio=6,
se_cfg=se_cfg,
norm_cfg=dict(type='BN'),
act_cfg=dict(type='Swish'))))
arch_setting = [
# Parameters to build layers. 4 parameters are needed to construct a
# layer, from left to right: channel, num_blocks, stride, mutable cfg.
[16, 1, 1, _FIRST_STAGE_MUTABLE],
[24, 1, 2, _MIDDLE_STAGE_MUTABLE],
[40, 2, 2, _MIDDLE_STAGE_MUTABLE],
[80, 2, 2, _MIDDLE_STAGE_MUTABLE],
[96, 1, 1, _MIDDLE_STAGE_MUTABLE],
[192, 1, 2, _MIDDLE_STAGE_MUTABLE],
]
norm_cfg = dict(type='BN')
supernet = dict(
_scope_='mmcls',
type='ImageClassifier',
data_preprocessor=preprocess_cfg,
backbone=dict(
_scope_='mmrazor',
type='SearchableMobileNet',
arch_setting=arch_setting,
first_channels=16,
last_channels=320,
widen_factor=1.0,
norm_cfg=norm_cfg,
act_cfg=dict(type='Swish'),
out_indices=(6, ),
),
neck=dict(type='GlobalAveragePooling'),
head=dict(
type='mmrazor.CreamClsHead',
num_classes=1000,
in_channels=320,
num_features=1280,
act_cfg=dict(type='Swish'),
loss=dict(
type='LabelSmoothLoss',
num_classes=1000,
label_smooth_val=0.1,
mode='original',
loss_weight=1.0),
topk=(1, 5),
),
)
mutator = dict(type='mmrazor.OneShotModuleMutator')
model = dict(
type='mmrazor.SPOS',
architecture=supernet,
mutator=mutator,
)
find_unused_parameters = True

View File

@ -1,116 +0,0 @@
normal_n2:
chosen:
- normal_n2_p1
- normal_n2_p0
normal_n3:
chosen:
- normal_n3_p0
- normal_n3_p1
normal_n4:
chosen:
- normal_n4_p0
- normal_n4_p1
normal_n5:
chosen:
- normal_n5_p2
- normal_n5_p0
reduce_n2:
chosen:
- reduce_n2_p0
- reduce_n2_p1
reduce_n3:
chosen:
- reduce_n3_p1
- reduce_n3_p2
reduce_n4:
chosen:
- reduce_n4_p2
- reduce_n4_p0
reduce_n5:
chosen:
- reduce_n5_p1
- reduce_n5_p2
normal_n2_p0:
chosen:
- sep_conv_3x3
normal_n2_p1:
chosen:
- sep_conv_3x3
normal_n3_p0:
chosen:
- sep_conv_3x3
normal_n3_p1:
chosen:
- sep_conv_3x3
normal_n3_p2:
chosen:
- sep_conv_3x3
normal_n4_p0:
chosen:
- skip_connect
normal_n4_p1:
chosen:
- sep_conv_3x3
normal_n4_p2:
chosen:
- skip_connect
normal_n4_p3:
chosen:
- sep_conv_3x3
normal_n5_p0:
chosen:
- skip_connect
normal_n5_p1:
chosen:
- skip_connect
normal_n5_p2:
chosen:
- dil_conv_3x3
normal_n5_p3:
chosen:
- skip_connect
normal_n5_p4:
chosen:
- skip_connect
reduce_n2_p0:
chosen:
- max_pool_3x3
reduce_n2_p1:
chosen:
- max_pool_3x3
reduce_n3_p0:
chosen:
- max_pool_3x3
reduce_n3_p1:
chosen:
- max_pool_3x3
reduce_n3_p2:
chosen:
- skip_connect
reduce_n4_p0:
chosen:
- max_pool_3x3
reduce_n4_p1:
chosen:
- max_pool_3x3
reduce_n4_p2:
chosen:
- skip_connect
reduce_n4_p3:
chosen:
- skip_connect
reduce_n5_p0:
chosen:
- max_pool_3x3
reduce_n5_p1:
chosen:
- max_pool_3x3
reduce_n5_p2:
chosen:
- skip_connect
reduce_n5_p3:
chosen:
- skip_connect
reduce_n5_p4:
chosen:
- skip_connect

View File

@ -0,0 +1,58 @@
modules:
normal_n2:
- normal_n2_p0
- normal_n2_p1
normal_n2_p0:
- sep_conv_3x3
normal_n2_p1:
- sep_conv_3x3
normal_n3:
- normal_n3_p0
- normal_n3_p1
normal_n3_p0:
- skip_connect
normal_n3_p1:
- sep_conv_5x5
normal_n4:
- normal_n4_p0
- normal_n4_p1
normal_n4_p0:
- sep_conv_3x3
normal_n4_p1:
- skip_connect
normal_n5:
- normal_n5_p0
- normal_n5_p1
normal_n5_p0:
- skip_connect
normal_n5_p1:
- skip_connect
reduce_n2:
- reduce_n2_p0
- reduce_n2_p1
reduce_n2_p0:
- max_pool_3x3
reduce_n2_p1:
- sep_conv_3x3
reduce_n3:
- reduce_n3_p0
- reduce_n3_p2
reduce_n3_p0:
- max_pool_3x3
reduce_n3_p2:
- dil_conv_5x5
reduce_n4:
- reduce_n4_p0
- reduce_n4_p2
reduce_n4_p0:
- max_pool_3x3
reduce_n4_p2:
- skip_connect
reduce_n5:
- reduce_n5_p0
- reduce_n5_p2
reduce_n5_p0:
- max_pool_3x3
reduce_n5_p2:
- skip_connect
channels:

View File

@ -0,0 +1,196 @@
# dataset settings
dataset_type = 'CIFAR10'
preprocess_cfg = dict(
# RGB format normalization parameters
mean=[125.307, 122.961, 113.8575],
std=[51.5865, 50.847, 51.255],
# loaded images are already RGB format
to_rgb=False)
train_pipeline = [
dict(type='RandomCrop', crop_size=32, padding=4),
dict(type='RandomFlip', prob=0.5, direction='horizontal'),
dict(type='PackClsInputs'),
dict(
type='Cutout',
magnitude_key='shape',
magnitude_range=(1, 16),
pad_val=0,
prob=0.5),
]
test_pipeline = [
dict(type='PackClsInputs'),
]
train_dataloader = dict(
batch_size=96,
num_workers=2,
dataset=dict(
type=dataset_type,
data_prefix='/mnt/cache/share_data/dongpeijie/data/cifar10',
test_mode=False,
pipeline=train_pipeline),
sampler=dict(type='DefaultSampler', shuffle=True),
persistent_workers=True,
)
val_dataloader = dict(
batch_size=16,
num_workers=2,
dataset=dict(
type=dataset_type,
data_prefix='/mnt/cache/share_data/dongpeijie/data/cifar10/',
test_mode=True,
pipeline=test_pipeline),
sampler=dict(type='DefaultSampler', shuffle=False),
persistent_workers=True,
)
val_evaluator = dict(type='Accuracy', topk=(1, ))
test_dataloader = val_dataloader
test_evaluator = val_evaluator
# optimizer
optim_wrapper = dict(
architecture=dict(type='SGD', lr=0.025, momentum=0.9, weight_decay=3e-4),
mutator=dict(type='Adam', lr=3e-4, weight_decay=1e-3),
clip_grad=dict(max_norm=5, norm_type=2))
# leanring policy
param_scheduler = [
dict(
type='CosineAnnealingLR',
T_max=600,
by_epoch=True,
begin=0,
end=600,
)
]
# train, val, test setting
train_cfg = dict(by_epoch=True, max_epochs=600)
val_cfg = dict(interval=1) # validate each epoch
test_cfg = dict()
# defaults to use registries in mmcls
default_scope = 'mmcls'
# configure default hooks
default_hooks = dict(
timer=dict(type='IterTimerHook'),
logger=dict(type='LoggerHook', interval=100),
param_scheduler=dict(type='ParamSchedulerHook'),
checkpoint=dict(
type='CheckpointHook', interval=1, save_last=True, max_keep_ckpts=3),
sampler_seed=dict(type='DistSamplerSeedHook'),
visualization=dict(type='VisualizationHook', enable=False),
)
# configure environment
env_cfg = dict(
cudnn_benchmark=False,
mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0),
dist_cfg=dict(backend='nccl'),
)
# set visualizer
visualizer = None
# set log level
log_level = 'INFO'
# load from which checkpoint
load_from = None
# whether to resume training from the loaded checkpoint
resume = False
# model
norm_cfg = dict(type='BN', affine=True)
mutable_cfg = dict(
_scope_='mmrazor',
type='mmrazor.DiffMutableOP',
candidates=dict(
zero=dict(type='mmrazor.DartsZero'),
skip_connect=dict(
type='mmrazor.DartsSkipConnect',
norm_cfg=norm_cfg,
use_drop_path=True),
max_pool_3x3=dict(
type='mmrazor.DartsPoolBN',
pool_type='max',
norm_cfg=norm_cfg,
use_drop_path=True),
avg_pool_3x3=dict(
type='mmrazor.DartsPoolBN',
pool_type='avg',
norm_cfg=norm_cfg,
use_drop_path=True),
sep_conv_3x3=dict(
type='mmrazor.DartsSepConv',
kernel_size=3,
norm_cfg=norm_cfg,
use_drop_path=True),
sep_conv_5x5=dict(
type='mmrazor.DartsSepConv',
kernel_size=5,
norm_cfg=norm_cfg,
use_drop_path=True),
dil_conv_3x3=dict(
type='mmrazor.DartsDilConv',
kernel_size=3,
norm_cfg=norm_cfg,
use_drop_path=True),
dil_conv_5x5=dict(
type='mmrazor.DartsDilConv',
kernel_size=5,
norm_cfg=norm_cfg,
use_drop_path=True),
))
route_cfg = dict(
type='mmrazor.DiffChoiceRoute',
with_arch_param=True,
)
supernet = dict(
type='mmcls.ImageClassifier',
data_preprocessor=preprocess_cfg,
backbone=dict(
type='mmrazor.DartsBackbone',
in_channels=3,
base_channels=36,
num_layers=20,
num_nodes=4,
stem_multiplier=3,
auxliary=True,
aux_channels=128,
aux_out_channels=768,
out_indices=(19, ),
mutable_cfg=mutable_cfg,
route_cfg=route_cfg),
neck=dict(type='mmcls.GlobalAveragePooling'),
head=dict(
type='mmrazor.DartsSubnetClsHead',
num_classes=10,
in_channels=576,
aux_in_channels=768,
loss=dict(type='mmcls.CrossEntropyLoss', loss_weight=1.0),
aux_loss=dict(type='mmcls.CrossEntropyLoss', loss_weight=0.4),
topk=(1, 5),
cal_acc=True),
)
mutator = dict(type='mmrazor.DiffModuleMutator')
fix_subnet = 'configs/nas/darts/DARTS_SUBNET_CIFAR_PAPER_ALIAS.yaml'
model = dict(
type='mmrazor.SPOS',
architecture=supernet,
mutator=mutator,
fix_subnet=fix_subnet,
)
find_unused_parameter = False

View File

@ -0,0 +1,163 @@
# dataset settings
dataset_type = 'CIFAR10'
preprocess_cfg = dict(
# RGB format normalization parameters
mean=[125.307, 122.961, 113.8575],
std=[51.5865, 50.847, 51.255],
# loaded images are already RGB format
to_rgb=False)
train_pipeline = [
dict(type='RandomCrop', crop_size=32, padding=4),
dict(type='RandomFlip', prob=0.5, direction='horizontal'),
dict(type='PackClsInputs'),
]
test_pipeline = [
dict(type='PackClsInputs'),
]
train_dataloader = dict(
batch_size=16,
num_workers=2,
dataset=dict(
type=dataset_type,
data_prefix='/mnt/cache/share_data/dongpeijie/data/cifar10',
test_mode=False,
pipeline=train_pipeline),
sampler=dict(type='DefaultSampler', shuffle=True),
persistent_workers=True,
)
val_dataloader = dict(
batch_size=16,
num_workers=2,
dataset=dict(
type=dataset_type,
data_prefix='/mnt/cache/share_data/dongpeijie/data/cifar10/',
test_mode=True,
pipeline=test_pipeline),
sampler=dict(type='DefaultSampler', shuffle=False),
persistent_workers=True,
)
val_evaluator = dict(type='Accuracy', topk=(1, ))
test_dataloader = val_dataloader
test_evaluator = val_evaluator
# optimizer
optim_wrapper = dict(
architecture=dict(type='SGD', lr=0.025, momentum=0.9, weight_decay=3e-4),
mutator=dict(type='Adam', lr=3e-4, weight_decay=1e-3),
clip_grad=None)
# leanring policy
param_scheduler = [
dict(
type='CosineAnnealingLR',
T_max=50,
by_epoch=True,
min_lr=1e-3,
begin=0,
end=50,
)
]
# train, val, test setting
train_cfg = dict(by_epoch=True, max_epochs=50)
val_cfg = dict(interval=1) # validate each epoch
test_cfg = dict()
# defaults to use registries in mmcls
default_scope = 'mmcls'
# configure default hooks
default_hooks = dict(
timer=dict(type='IterTimerHook'),
logger=dict(type='LoggerHook', interval=100),
param_scheduler=dict(type='ParamSchedulerHook'),
checkpoint=dict(
type='CheckpointHook', interval=1, save_last=True, max_keep_ckpts=3),
sampler_seed=dict(type='DistSamplerSeedHook'),
visualization=dict(type='VisualizationHook', enable=False),
)
# configure environment
env_cfg = dict(
cudnn_benchmark=False,
mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0),
dist_cfg=dict(backend='nccl'),
)
# set visualizer
visualizer = None
# set log level
log_level = 'INFO'
# load from which checkpoint
load_from = None
# whether to resume training from the loaded checkpoint
resume = False
# model
norm_cfg = dict(type='BN', affine=False)
mutable_cfg = dict(
_scope_='mmrazor',
type='mmrazor.DiffMutableOP',
candidates=dict(
zero=dict(type='mmrazor.DartsZero'),
skip_connect=dict(type='mmrazor.DartsSkipConnect', norm_cfg=norm_cfg),
max_pool_3x3=dict(
type='mmrazor.DartsPoolBN', pool_type='max', norm_cfg=norm_cfg),
avg_pool_3x3=dict(
type='mmrazor.DartsPoolBN', pool_type='avg', norm_cfg=norm_cfg),
sep_conv_3x3=dict(
type='mmrazor.DartsSepConv', kernel_size=3, norm_cfg=norm_cfg),
sep_conv_5x5=dict(
type='mmrazor.DartsSepConv', kernel_size=5, norm_cfg=norm_cfg),
dil_conv_3x3=dict(
type='mmrazor.DartsDilConv', kernel_size=3, norm_cfg=norm_cfg),
dil_conv_5x5=dict(
type='mmrazor.DartsDilConv', kernel_size=5, norm_cfg=norm_cfg),
))
route_cfg = dict(
type='mmrazor.DiffChoiceRoute',
with_arch_param=True,
)
supernet = dict(
type='mmcls.ImageClassifier',
backbone=dict(
type='mmrazor.DartsBackbone',
in_channels=3,
base_channels=36,
num_layers=20,
num_nodes=4,
stem_multiplier=3,
auxliary=False,
out_indices=(19, ),
mutable_cfg=mutable_cfg,
route_cfg=route_cfg),
neck=dict(type='mmcls.GlobalAveragePooling'),
head=dict(
type='mmrazor.DartsSubnetClsHead',
num_classes=10,
in_channels=576,
aux_in_channels=768,
loss=dict(type='mmcls.CrossEntropyLoss', loss_weight=1.0),
aux_loss=dict(type='mmcls.CrossEntropyLoss', loss_weight=0.4),
topk=(1, 5),
cal_acc=True),
)
mutator = dict(type='mmrazor.DiffModuleMutator')
model = dict(
type='mmrazor.SPOS',
architecture=supernet,
mutator=mutator,
)
find_unused_parameter = True

View File

@ -1,60 +0,0 @@
stage_0_block_0:
chosen:
- shuffle_7x7
stage_0_block_1:
chosen:
- shuffle_5x5
stage_0_block_2:
chosen:
- shuffle_7x7
stage_0_block_3:
chosen:
- shuffle_3x3
stage_1_block_0:
chosen:
- shuffle_7x7
stage_1_block_1:
chosen:
- shuffle_5x5
stage_1_block_2:
chosen:
- shuffle_5x5
stage_1_block_3:
chosen:
- shuffle_7x7
stage_2_block_0:
chosen:
- shuffle_xception
stage_2_block_1:
chosen:
- shuffle_xception
stage_2_block_2:
chosen:
- shuffle_5x5
stage_2_block_3:
chosen:
- shuffle_xception
stage_2_block_4:
chosen:
- shuffle_3x3
stage_2_block_5:
chosen:
- shuffle_3x3
stage_2_block_6:
chosen:
- shuffle_xception
stage_2_block_7:
chosen:
- shuffle_5x5
stage_3_block_0:
chosen:
- shuffle_xception
stage_3_block_1:
chosen:
- shuffle_5x5
stage_3_block_2:
chosen:
- shuffle_xception
stage_3_block_3:
chosen:
- shuffle_7x7

View File

@ -0,0 +1,22 @@
modules:
backbone.layers.0.0: shuffle_5x5
backbone.layers.0.1: shuffle_3x3
backbone.layers.0.2: shuffle_3x3
backbone.layers.0.3: shuffle_3x3
backbone.layers.1.0: shuffle_xception
backbone.layers.1.1: shuffle_3x3
backbone.layers.1.2: shuffle_xception
backbone.layers.1.3: shuffle_7x7
backbone.layers.2.0: shuffle_7x7
backbone.layers.2.1: shuffle_7x7
backbone.layers.2.2: shuffle_xception
backbone.layers.2.3: shuffle_xception
backbone.layers.2.4: shuffle_3x3
backbone.layers.2.5: shuffle_7x7
backbone.layers.2.6: shuffle_5x5
backbone.layers.2.7: shuffle_xception
backbone.layers.3.0: shuffle_7x7
backbone.layers.3.1: shuffle_7x7
backbone.layers.3.2: shuffle_7x7
backbone.layers.3.3: shuffle_5x5
channels:

View File

@ -0,0 +1,22 @@
modules:
backbone.layers.0.0: shuffle_5x5
backbone.layers.0.1: shuffle_3x3
backbone.layers.0.2: shuffle_3x3
backbone.layers.0.3: shuffle_3x3
backbone.layers.1.0: shuffle_xception
backbone.layers.1.1: shuffle_3x3
backbone.layers.1.2: shuffle_xception
backbone.layers.1.3: shuffle_7x7
backbone.layers.2.0: shuffle_7x7
backbone.layers.2.1: shuffle_7x7
backbone.layers.2.2: shuffle_xception
backbone.layers.2.3: shuffle_xception
backbone.layers.2.4: shuffle_3x3
backbone.layers.2.5: shuffle_7x7
backbone.layers.2.6: shuffle_5x5
backbone.layers.2.7: shuffle_xception
backbone.layers.3.0: shuffle_7x7
backbone.layers.3.1: shuffle_7x7
backbone.layers.3.2: shuffle_7x7
backbone.layers.3.3: shuffle_5x5
channels:

View File

@ -1,20 +0,0 @@
_base_ = ['./detnas_supernet_frcnn_shufflenetv2_fpn_1x_coco.py']
data = dict(
samples_per_gpu=128,
workers_per_gpu=8,
)
algorithm = dict(bn_training_mode=True)
searcher = dict(
type='EvolutionSearcher',
metrics='bbox',
score_key='bbox_mAP',
constraints=dict(flops=300 * 1e6),
candidate_pool_size=50,
candidate_top_k=10,
max_epoch=20,
num_mutation=20,
num_crossover=20,
)

View File

@ -1,6 +0,0 @@
_base_ = ['./detnas_supernet_frcnn_shufflenetv2_fpn_1x_coco.py']
# FIXME: you may replace this with the mutable_cfg searched by yourself
mutable_cfg = 'https://download.openmmlab.com/mmrazor/v0.1/nas/detnas/detnas_subnet_frcnn_shufflenetv2_fpn_1x_coco/detnas_subnet_frcnn_shufflenetv2_fpn_1x_coco_bbox_backbone_flops-0.34M_mAP-37.5_20211222-67fea61f_mutable_cfg.yaml' # noqa: E501
algorithm = dict(retraining=True, mutable_cfg=mutable_cfg)

View File

@ -1,8 +0,0 @@
_base_ = [
'../spos/spos_subnet_shufflenetv2_8xb128_in1k.py',
]
# FIXME: you may replace this with the mutable_cfg searched by yourself
mutable_cfg = 'https://download.openmmlab.com/mmrazor/v0.1/nas/detnas/detnas_subnet_frcnn_shufflenetv2_fpn_1x_coco/detnas_subnet_frcnn_shufflenetv2_fpn_1x_coco_bbox_backbone_flops-0.34M_mAP-37.5_20211222-67fea61f_mutable_cfg.yaml' # noqa: E501
algorithm = dict(mutable_cfg=mutable_cfg)

View File

@ -0,0 +1,8 @@
_base_ = ['./detnas_supernet_shufflenetv2_coco_1x_2.0_frcnn.py']
# FIXME: you may replace this with the mutable_cfg searched by yourself
fix_subnet = 'configs/nas/detnas/DETNAS_FRCNN_SHUFFLENETV2_340M_COCO_MMRAZOR_2.0.yaml' # noqa: E501
model = dict(fix_subnet=fix_subnet)
find_unused_parameters = False

View File

@ -1,144 +0,0 @@
_base_ = [
'../../_base_/datasets/mmdet/coco_detection.py',
'../../_base_/schedules/mmdet/schedule_1x.py',
'../../_base_/mmdet_runtime.py'
]
norm_cfg = dict(type='SyncBN', requires_grad=True)
model = dict(
type='mmdet.FasterRCNN',
backbone=dict(
type='mmcls.SearchableShuffleNetV2',
norm_cfg=norm_cfg,
out_indices=(0, 1, 2, 3),
widen_factor=1.0,
with_last_layer=False),
neck=dict(
type='FPN',
norm_cfg=norm_cfg,
in_channels=[64, 160, 320, 640],
out_channels=256,
num_outs=5),
rpn_head=dict(
type='RPNHead',
in_channels=256,
feat_channels=256,
anchor_generator=dict(
type='AnchorGenerator',
scales=[8],
ratios=[0.5, 1.0, 2.0],
strides=[4, 8, 16, 32, 64]),
bbox_coder=dict(
type='DeltaXYWHBBoxCoder',
target_means=[.0, .0, .0, .0],
target_stds=[1.0, 1.0, 1.0, 1.0]),
loss_cls=dict(
type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
roi_head=dict(
type='StandardRoIHead',
bbox_roi_extractor=dict(
type='SingleRoIExtractor',
roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
out_channels=256,
featmap_strides=[4, 8, 16, 32]),
bbox_head=dict(
type='Shared4Conv1FCBBoxHead',
norm_cfg=norm_cfg,
in_channels=256,
fc_out_channels=1024,
roi_feat_size=7,
num_classes=80,
bbox_coder=dict(
type='DeltaXYWHBBoxCoder',
target_means=[0., 0., 0., 0.],
target_stds=[0.1, 0.1, 0.2, 0.2]),
reg_class_agnostic=False,
loss_cls=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
loss_bbox=dict(type='L1Loss', loss_weight=1.0))),
train_cfg=dict(
rpn=dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.7,
neg_iou_thr=0.3,
min_pos_iou=0.3,
match_low_quality=True,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=256,
pos_fraction=0.5,
neg_pos_ub=-1,
add_gt_as_proposals=False),
allowed_border=-1,
pos_weight=-1,
debug=False),
rpn_proposal=dict(
nms_pre=2000,
max_per_img=1000,
nms=dict(type='nms', iou_threshold=0.7),
min_bbox_size=0),
rcnn=dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.5,
neg_iou_thr=0.5,
min_pos_iou=0.5,
match_low_quality=False,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=512,
pos_fraction=0.25,
neg_pos_ub=-1,
add_gt_as_proposals=True),
pos_weight=-1,
debug=False)),
test_cfg=dict(
rpn=dict(
nms_pre=1000,
max_per_img=1000,
nms=dict(type='nms', iou_threshold=0.7),
min_bbox_size=0),
rcnn=dict(
score_thr=0.05,
nms=dict(type='nms', iou_threshold=0.5),
max_per_img=100)
# soft-nms is also supported for rcnn testing
# e.g., nms=dict(type='soft_nms', iou_threshold=0.5, min_score=0.05)
),
)
mutator = dict(
type='OneShotModuleMutator',
placeholder_mapping=dict(
all_blocks=dict(
type='OneShotMutableOP',
choices=dict(
shuffle_3x3=dict(
type='ShuffleBlock', norm_cfg=norm_cfg, kernel_size=3),
shuffle_5x5=dict(
type='ShuffleBlock', norm_cfg=norm_cfg, kernel_size=5),
shuffle_7x7=dict(
type='ShuffleBlock', norm_cfg=norm_cfg, kernel_size=7),
shuffle_xception=dict(
type='ShuffleXception',
norm_cfg=norm_cfg,
),
))))
algorithm = dict(
type='DetNAS',
architecture=dict(
type='MMDetArchitecture',
model=model,
),
mutator=mutator,
pruner=None,
distiller=None,
retraining=False,
)
find_unused_parameters = True

View File

@ -1,5 +0,0 @@
_base_ = [
'../spos/spos_supernet_shufflenetv2_8xb128_in1k.py',
]
runner = dict(max_iters=300000)

View File

@ -0,0 +1,87 @@
_base_ = [
'mmdet::_base_/models/faster_rcnn_r50_fpn.py',
'mmdet::_base_/datasets/coco_detection.py',
'mmdet::_base_/schedules/schedule_1x.py',
'mmdet::_base_/default_runtime.py'
]
data_root = '/mnt/lustre/share_data/zhangwenwei/data/coco/'
_base_.train_dataloader.dataset.data_root = data_root
visualizer = None
log_level = 'INFO'
load_from = '/mnt/lustre/dongpeijie/detnas_subnet_shufflenetv2_8xb128_in1k_acc-74.08_20211223-92e9b66a_2.0.pth' # noqa: E501
resume = False
norm_cfg = dict(type='SyncBN', requires_grad=True)
# model settings
_STAGE_MUTABLE = dict(
_scope_='mmrazor',
type='mmrazor.OneShotMutableOP',
candidates=dict(
shuffle_3x3=dict(
type='mmrazor.ShuffleBlock', kernel_size=3, norm_cfg=norm_cfg),
shuffle_5x5=dict(
type='mmrazor.ShuffleBlock', kernel_size=5, norm_cfg=norm_cfg),
shuffle_7x7=dict(
type='mmrazor.ShuffleBlock', kernel_size=7, norm_cfg=norm_cfg),
shuffle_xception=dict(
type='mmrazor.ShuffleXception', norm_cfg=norm_cfg),
))
arch_setting = [
# Parameters to build layers. 3 parameters are needed to construct a
# layer, from left to right: channel, num_blocks, mutable_cfg.
[64, 4, _STAGE_MUTABLE],
[160, 4, _STAGE_MUTABLE],
[320, 8, _STAGE_MUTABLE],
[640, 4, _STAGE_MUTABLE],
]
supernet = _base_.model
supernet.backbone = dict(
type='mmrazor.SearchableShuffleNetV2',
arch_setting=arch_setting,
norm_cfg=norm_cfg,
out_indices=(0, 1, 2, 3),
widen_factor=1.0,
with_last_layer=False)
supernet.neck = dict(
type='FPN',
norm_cfg=norm_cfg,
in_channels=[64, 160, 320, 640],
out_channels=256,
num_outs=5)
supernet.roi_head.bbox_head = dict(
type='Shared4Conv1FCBBoxHead',
norm_cfg=norm_cfg,
in_channels=256,
fc_out_channels=1024,
roi_feat_size=7,
num_classes=80,
bbox_coder=dict(
type='DeltaXYWHBBoxCoder',
target_means=[0., 0., 0., 0.],
target_stds=[0.1, 0.1, 0.2, 0.2]),
reg_class_agnostic=False,
loss_cls=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
loss_bbox=dict(type='L1Loss', loss_weight=1.0))
mutator = dict(type='mmrazor.OneShotModuleMutator')
fix_subnet = 'configs/nas/detnas/DETNAS_FRCNN_SHUFFLENETV2_340M_COCO_MMRAZOR_2.0.yaml' # noqa: E501
model = dict(
_delete_=True,
type='mmrazor.SPOS',
architecture=supernet,
mutator=mutator,
fix_subnet=fix_subnet,
)
find_unused_parameters = True

View File

@ -0,0 +1,114 @@
_base_ = [
'mmdet::faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py',
'mmdet::datasets/coco_detection.py', 'mmdet::schedules/schedule_1x.py',
'mmdet::default_runtime.py'
]
data_root = '/mnt/lustre/share_data/zhangwenwei/data/coco/'
train_dataloader = dict(dataset=dict(data_root=data_root, ))
visualizer = None
# custom_hooks = [dict(type='DetVisualizationHook', interval=10)]
log_level = 'INFO'
load_from = None
resume = False
# TODO: support auto scaling lr
norm_cfg = dict(type='SyncBN', requires_grad=True)
# model settings
_STAGE_MUTABLE = dict(
_scope_='mmrazor',
type='mmrazor.OneShotMutableOP',
candidates=dict(
shuffle_3x3=dict(
type='mmrazor.ShuffleBlock', kernel_size=3, norm_cfg=norm_cfg),
shuffle_5x5=dict(
type='mmrazor.ShuffleBlock', kernel_size=5, norm_cfg=norm_cfg),
shuffle_7x7=dict(
type='mmrazor.ShuffleBlock', kernel_size=7, norm_cfg=norm_cfg),
shuffle_xception=dict(
type='mmrazor.ShuffleXception', norm_cfg=norm_cfg),
))
arch_setting = [
# Parameters to build layers. 3 parameters are needed to construct a
# layer, from left to right: channel, num_blocks, mutable_cfg.
[64, 4, _STAGE_MUTABLE],
[160, 4, _STAGE_MUTABLE],
[320, 8, _STAGE_MUTABLE],
[640, 4, _STAGE_MUTABLE],
]
supernet = dict(
type='RetinaNet',
data_preprocessor=dict(
type='DetDataPreprocessor',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
bgr_to_rgb=True,
pad_size_divisor=32),
backbone=dict(
type='mmrazor.SearchableShuffleNetV2',
arch_setting=arch_setting,
norm_cfg=norm_cfg,
out_indices=(0, 1, 2, 3),
widen_factor=1.0,
with_last_layer=False),
neck=dict(
type='FPN',
in_channels=[64, 160, 320, 640],
out_channels=256,
num_outs=5),
bbox_head=dict(
type='RetinaHead',
num_classes=80,
in_channels=256,
stacked_convs=4,
feat_channels=256,
anchor_generator=dict(
type='AnchorGenerator',
octave_base_scale=4,
scales_per_octave=3,
ratios=[0.5, 1.0, 2.0],
strides=[8, 16, 32, 64, 128]),
bbox_coder=dict(
type='DeltaXYWHBBoxCoder',
target_means=[.0, .0, .0, .0],
target_stds=[1.0, 1.0, 1.0, 1.0]),
loss_cls=dict(
type='FocalLoss',
use_sigmoid=True,
gamma=2.0,
alpha=0.25,
loss_weight=1.0),
loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
# model training and testing settings
train_cfg=dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.5,
neg_iou_thr=0.4,
min_pos_iou=0,
ignore_iof_thr=-1),
allowed_border=-1,
pos_weight=-1,
debug=False),
test_cfg=dict(
nms_pre=1000,
min_bbox_size=0,
score_thr=0.05,
nms=dict(type='nms', iou_threshold=0.5),
max_per_img=100))
mutator = dict(type='mmrazor.OneShotModuleMutator')
model = dict(
type='mmrazor.SPOS',
architecture=supernet,
mutator=mutator,
)
find_unused_parameters = True

View File

@ -0,0 +1,24 @@
modules:
backbone.layer1.0: mb_k3e1
backbone.layer2.0: mb_k5e3
backbone.layer2.1: mb_k5e3
backbone.layer2.2: identity
backbone.layer2.3: mb_k3e3
backbone.layer3.0: mb_k3e3
backbone.layer3.1: identity
backbone.layer3.2: identity
backbone.layer3.3: mb_k3e3
backbone.layer4.0: mb_k7e6
backbone.layer4.1: identity
backbone.layer4.2: mb_k7e3
backbone.layer4.3: mb_k7e3
backbone.layer5.0: mb_k3e3
backbone.layer5.1: mb_k3e3
backbone.layer5.2: mb_k7e3
backbone.layer5.3: mb_k5e3
backbone.layer6.0: mb_k5e6
backbone.layer6.1: mb_k7e3
backbone.layer6.2: mb_k7e3
backbone.layer6.3: mb_k7e3
backbone.layer7.0: mb_k5e6
channels:

View File

@ -1,66 +0,0 @@
stage_0_block_0:
chosen:
- mb_k3e1
stage_1_block_0:
chosen:
- mb_k5e3
stage_1_block_1:
chosen:
- mb_k5e3
stage_1_block_2:
chosen:
- identity
stage_1_block_3:
chosen:
- mb_k3e3
stage_2_block_0:
chosen:
- mb_k3e3
stage_2_block_1:
chosen:
- identity
stage_2_block_2:
chosen:
- identity
stage_2_block_3:
chosen:
- mb_k3e3
stage_3_block_0:
chosen:
- mb_k7e6
stage_3_block_1:
chosen:
- identity
stage_3_block_2:
chosen:
- mb_k7e3
stage_3_block_3:
chosen:
- mb_k7e3
stage_4_block_0:
chosen:
- mb_k3e3
stage_4_block_1:
chosen:
- mb_k3e3
stage_4_block_2:
chosen:
- mb_k7e3
stage_4_block_3:
chosen:
- mb_k5e3
stage_5_block_0:
chosen:
- mb_k5e6
stage_5_block_1:
chosen:
- mb_k7e3
stage_5_block_2:
chosen:
- mb_k7e3
stage_5_block_3:
chosen:
- mb_k7e3
stage_6_block_0:
chosen:
- mb_k5e6

View File

@ -1,60 +0,0 @@
stage_0_block_0:
chosen:
- shuffle_7x7
stage_0_block_1:
chosen:
- shuffle_5x5
stage_0_block_2:
chosen:
- shuffle_3x3
stage_0_block_3:
chosen:
- shuffle_5x5
stage_1_block_0:
chosen:
- shuffle_7x7
stage_1_block_1:
chosen:
- shuffle_3x3
stage_1_block_2:
chosen:
- shuffle_7x7
stage_1_block_3:
chosen:
- shuffle_3x3
stage_2_block_0:
chosen:
- shuffle_7x7
stage_2_block_1:
chosen:
- shuffle_3x3
stage_2_block_2:
chosen:
- shuffle_7x7
stage_2_block_3:
chosen:
- shuffle_xception
stage_2_block_4:
chosen:
- shuffle_3x3
stage_2_block_5:
chosen:
- shuffle_3x3
stage_2_block_6:
chosen:
- shuffle_3x3
stage_2_block_7:
chosen:
- shuffle_3x3
stage_3_block_0:
chosen:
- shuffle_xception
stage_3_block_1:
chosen:
- shuffle_7x7
stage_3_block_2:
chosen:
- shuffle_xception
stage_3_block_3:
chosen:
- shuffle_xception

View File

@ -0,0 +1,22 @@
modules:
backbone.layers.0.0: shuffle_7x7
backbone.layers.0.1: shuffle_3x3
backbone.layers.0.2: shuffle_7x7
backbone.layers.0.3: shuffle_3x3
backbone.layers.1.0: shuffle_xception
backbone.layers.1.1: shuffle_5x5
backbone.layers.1.2: shuffle_5x5
backbone.layers.1.3: shuffle_3x3
backbone.layers.2.0: shuffle_3x3
backbone.layers.2.1: shuffle_5x5
backbone.layers.2.2: shuffle_3x3
backbone.layers.2.3: shuffle_5x5
backbone.layers.2.4: shuffle_3x3
backbone.layers.2.5: shuffle_xception
backbone.layers.2.6: shuffle_5x5
backbone.layers.2.7: shuffle_7x7
backbone.layers.3.0: shuffle_7x7
backbone.layers.3.1: shuffle_3x3
backbone.layers.3.2: shuffle_5x5
backbone.layers.3.3: shuffle_xception
channels:

View File

@ -1,20 +0,0 @@
_base_ = ['./spos_supernet_mobilenet_proxyless_gpu_8xb128_in1k.py']
data = dict(
samples_per_gpu=512,
workers_per_gpu=16,
)
algorithm = dict(bn_training_mode=True)
searcher = dict(
type='EvolutionSearcher',
candidate_pool_size=50,
candidate_top_k=10,
constraints=dict(flops=465 * 1e6),
metrics='accuracy',
score_key='accuracy_top-1',
max_epoch=20,
num_mutation=25,
num_crossover=25,
mutate_prob=0.1)

View File

@ -1,20 +0,0 @@
_base_ = ['./spos_supernet_shufflenetv2_8xb128_in1k.py']
data = dict(
samples_per_gpu=2048,
workers_per_gpu=16,
)
algorithm = dict(bn_training_mode=True)
searcher = dict(
type='EvolutionSearcher',
candidate_pool_size=50,
candidate_top_k=10,
constraints=dict(flops=330 * 1e6),
metrics='accuracy',
score_key='accuracy_top-1',
max_epoch=20,
num_mutation=25,
num_crossover=25,
mutate_prob=0.1)

View File

@ -1,27 +0,0 @@
_base_ = [
'./spos_subnet_mobilenet_proxyless_gpu_8xb128_in1k.py',
]
img_norm_cfg = dict(mean=[0., 0., 0.], std=[1., 1., 1.], to_rgb=False)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='RandomResizedCrop', size=224),
dict(type='ColorJitter', brightness=0.4, contrast=0.4, saturation=0.4),
dict(type='RandomFlip', flip_prob=0.5, direction='horizontal'),
dict(type='Normalize', **img_norm_cfg),
dict(type='ImageToTensor', keys=['img']),
dict(type='ToTensor', keys=['gt_label']),
dict(type='Collect', keys=['img', 'gt_label'])
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='Resize', size=(256, -1)),
dict(type='CenterCrop', crop_size=224),
dict(type='Normalize', **img_norm_cfg),
dict(type='ImageToTensor', keys=['img']),
dict(type='Collect', keys=['img'])
]
data = dict(
train=dict(pipeline=train_pipeline),
val=dict(pipeline=test_pipeline),
test=dict(pipeline=test_pipeline))

View File

@ -1,13 +0,0 @@
_base_ = [
'./spos_supernet_mobilenet_proxyless_gpu_8xb128_in1k.py',
]
# FIXME: you may replace this with the mutable_cfg searched by yourself
mutable_cfg = 'https://download.openmmlab.com/mmrazor/v0.1/nas/spos/spos_mobilenet_subnet/spos_angelnas_flops_0.49G_acc_75.98_20220307-54f4698f_mutable_cfg.yaml' # noqa: E501
algorithm = dict(retraining=True, mutable_cfg=mutable_cfg)
evaluation = dict(interval=10000, metric='accuracy')
checkpoint_config = dict(interval=30000)
runner = dict(max_iters=300000)
find_unused_parameters = False

View File

@ -0,0 +1,8 @@
_base_ = ['./spos_supernet_mobilenet_proxyless_gpu_8xb128_in1k_2.0.py']
# FIXME: you may replace this with the mutable_cfg searched by yourself
fix_subnet = 'configs/nas/spos/AngleNAS_SHUFFLENETV2_IN1k_2.0.yaml' # noqa: E501
model = dict(fix_subnet=fix_subnet)
find_unused_parameters = False

View File

@ -1,11 +0,0 @@
_base_ = [
'./spos_supernet_shufflenetv2_8xb128_in1k.py',
]
# FIXME: you may replace this with the mutable_cfg searched by yourself
mutable_cfg = 'https://download.openmmlab.com/mmrazor/v0.1/nas/spos/spos_shufflenetv2_subnet_8xb128_in1k/spos_shufflenetv2_subnet_8xb128_in1k_flops_0.33M_acc_73.87_20211222-454627be_mutable_cfg.yaml' # noqa: E501
algorithm = dict(retraining=True, mutable_cfg=mutable_cfg)
runner = dict(max_iters=300000)
find_unused_parameters = False

View File

@ -0,0 +1,9 @@
_base_ = ['./spos_supernet_shufflenetv2_8xb128_in1k_2.0_example.py']
# FIXME: you may replace this with the mutable_cfg searched by yourself
# fix_subnet = 'configs/nas/spos/SPOS_SHUFFLENETV2_330M_IN1k_PAPER_2.0.yaml' # noqa: E501
fix_subnet = 'configs/nas/detnas/DetNAS_SPOS_SHUFFLENETV2_330M_IN1k_PAPER_2.0.yaml' # noqa: E501
model = dict(fix_subnet=fix_subnet)
find_unused_parameters = False

View File

@ -1,101 +0,0 @@
_base_ = [
'../../_base_/datasets/mmcls/imagenet_bs128_colorjittor.py',
'../../_base_/schedules/mmcls/imagenet_bs1024_spos.py',
'../../_base_/mmcls_runtime.py'
]
norm_cfg = dict(type='BN')
model = dict(
type='mmcls.ImageClassifier',
backbone=dict(
type='SearchableMobileNet',
first_channels=40,
last_channels=1728,
widen_factor=1.0,
norm_cfg=norm_cfg,
arch_setting_type='proxyless_gpu'),
neck=dict(type='GlobalAveragePooling'),
head=dict(
type='LinearClsHead',
num_classes=1000,
in_channels=1728,
loss=dict(
type='LabelSmoothLoss',
num_classes=1000,
label_smooth_val=0.1,
mode='original',
loss_weight=1.0),
topk=(1, 5),
),
)
mutator = dict(
type='OneShotModuleMutator',
placeholder_mapping=dict(
searchable_blocks=dict(
type='OneShotMutableOP',
choices=dict(
mb_k3e3=dict(
type='MBBlock',
kernel_size=3,
expand_ratio=3,
norm_cfg=norm_cfg,
act_cfg=dict(type='ReLU6')),
mb_k5e3=dict(
type='MBBlock',
kernel_size=5,
expand_ratio=3,
norm_cfg=norm_cfg,
act_cfg=dict(type='ReLU6')),
mb_k7e3=dict(
type='MBBlock',
kernel_size=7,
expand_ratio=3,
norm_cfg=norm_cfg,
act_cfg=dict(type='ReLU6')),
mb_k3e6=dict(
type='MBBlock',
kernel_size=3,
expand_ratio=6,
norm_cfg=norm_cfg,
act_cfg=dict(type='ReLU6')),
mb_k5e6=dict(
type='MBBlock',
kernel_size=5,
expand_ratio=6,
norm_cfg=norm_cfg,
act_cfg=dict(type='ReLU6')),
mb_k7e6=dict(
type='MBBlock',
kernel_size=7,
expand_ratio=6,
norm_cfg=norm_cfg,
act_cfg=dict(type='ReLU6')),
identity=dict(type='Identity'))),
first_blocks=dict(
type='OneShotMutableOP',
choices=dict(
mb_k3e1=dict(
type='MBBlock',
kernel_size=3,
expand_ratio=1,
norm_cfg=norm_cfg,
act_cfg=dict(type='ReLU6')), ))))
algorithm = dict(
type='SPOS',
architecture=dict(
type='MMClsArchitecture',
model=model,
),
mutator=mutator,
distiller=None,
retraining=False,
)
runner = dict(max_iters=150000)
evaluation = dict(interval=10000, metric='accuracy')
# checkpoint saving
checkpoint_config = dict(interval=30000)
find_unused_parameters = True

View File

@ -0,0 +1,245 @@
# dataset settings
dataset_type = 'ImageNet'
preprocess_cfg = dict(
# RGB format normalization parameters
mean=[0., 0., 0.],
std=[1., 1., 1.],
# convert image from BGR to RGB
to_rgb=False,
)
file_client_args = dict(
backend='petrel',
path_mapping=dict({
'./data/imagenet':
'sproject:s3://openmmlab/datasets/classification/imagenet',
'data/imagenet':
'sproject:s3://openmmlab/datasets/classification/imagenet'
}))
train_pipeline = [
dict(type='LoadImageFromFile', file_client_args=file_client_args),
dict(type='RandomResizedCrop', scale=224),
dict(type='ColorJitter', brightness=0.4, contrast=0.4, saturation=0.4),
dict(type='RandomFlip', prob=0.5, direction='horizontal'),
dict(type='PackClsInputs'),
]
test_pipeline = [
dict(type='LoadImageFromFile', file_client_args=file_client_args),
dict(
type='ResizeEdge',
scale=256,
edge='short',
backend='pillow',
interpolation='bicubic'),
dict(type='CenterCrop', crop_size=224),
dict(type='PackClsInputs'),
]
train_dataloader = dict(
batch_size=128,
num_workers=8,
dataset=dict(
type=dataset_type,
data_root='/mnt/cache/share/images',
ann_file='meta/train.txt',
data_prefix='train',
pipeline=train_pipeline),
sampler=dict(type='DefaultSampler', shuffle=True),
persistent_workers=True,
)
# /mnt/lustre/share_data/wangjiaqi/data/imagenet',
val_dataloader = dict(
batch_size=128,
num_workers=8,
dataset=dict(
type=dataset_type,
data_root='/mnt/cache/share/images',
ann_file='meta/val.txt',
data_prefix='val',
pipeline=test_pipeline),
sampler=dict(type='DefaultSampler', shuffle=False),
persistent_workers=True,
)
val_evaluator = dict(type='Accuracy', topk=(1, 5))
# If you want standard test, please manually configure the test dataset
test_dataloader = val_dataloader
test_evaluator = val_evaluator
# scheduler
# optimizer
optim_wrapper = dict(
optimizer=dict(type='SGD', lr=0.5, momentum=0.9, weight_decay=4e-5),
clip_grad=None)
# leanring policy
param_scheduler = [
dict(type='PolyLR', power=1.0, eta_min=0.0, by_epoch=False, end=300000),
]
# train, val, test setting
train_cfg = dict(by_epoch=False, max_iters=300000)
val_cfg = dict()
test_cfg = dict()
# runtime
# defaults to use registries in mmrazor
default_scope = 'mmcls'
log_processor = dict(
window_size=100,
by_epoch=False,
custom_cfg=[
dict(
data_src='loss',
log_name='loss_large_window',
method_name='mean',
window_size=100)
])
# configure default hooks
default_hooks = dict(
timer=dict(type='IterTimerHook'),
logger=dict(type='LoggerHook', interval=100),
param_scheduler=dict(type='ParamSchedulerHook'),
checkpoint=dict(
type='CheckpointHook',
by_epoch=False,
interval=10000,
save_last=True,
max_keep_ckpts=3),
sampler_seed=dict(type='DistSamplerSeedHook'),
visualization=dict(type='VisualizationHook', enable=False),
)
# configure environment
env_cfg = dict(
cudnn_benchmark=False,
mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0),
dist_cfg=dict(backend='nccl'),
)
# set visualizer
visualizer = None
# dict(type='ClsVisualizer', vis_backends=vis_backends, name='visualizer')
# vis_backends = [dict(type='LocalVisBackend')]
# set log level
log_level = 'INFO'
# load from which checkpoint
load_from = None
# whether to resume training from the loaded checkpoint
resume = False
# model
norm_cfg = dict(type='BN')
_STAGE_MUTABLE = dict(
_scope_='mmrazor',
type='OneShotMutableOP',
candidates=dict(
mb_k3e3=dict(
type='MBBlock',
kernel_size=3,
expand_ratio=3,
norm_cfg=norm_cfg,
act_cfg=dict(type='ReLU6')),
mb_k5e3=dict(
type='MBBlock',
kernel_size=5,
expand_ratio=3,
norm_cfg=norm_cfg,
act_cfg=dict(type='ReLU6')),
mb_k7e3=dict(
type='MBBlock',
kernel_size=7,
expand_ratio=3,
norm_cfg=norm_cfg,
act_cfg=dict(type='ReLU6')),
mb_k3e6=dict(
type='MBBlock',
kernel_size=3,
expand_ratio=6,
norm_cfg=norm_cfg,
act_cfg=dict(type='ReLU6')),
mb_k5e6=dict(
type='MBBlock',
kernel_size=5,
expand_ratio=6,
norm_cfg=norm_cfg,
act_cfg=dict(type='ReLU6')),
mb_k7e6=dict(
type='MBBlock',
kernel_size=7,
expand_ratio=6,
norm_cfg=norm_cfg,
act_cfg=dict(type='ReLU6')),
identity=dict(type='Identity'),
))
_FIRST_MUTABLE = dict(
_scope_='mmrazor',
type='OneShotMutableOP',
candidates=dict(
mb_k3e1=dict(
type='MBBlock',
kernel_size=3,
expand_ratio=1,
norm_cfg=norm_cfg,
act_cfg=dict(type='ReLU6')), ))
arch_setting = [
# Parameters to build layers. 3 parameters are needed to construct a
# layer, from left to right: channel, num_blocks, mutable_cfg.
[24, 1, 1, _FIRST_MUTABLE],
[32, 4, 2, _STAGE_MUTABLE],
[56, 4, 2, _STAGE_MUTABLE],
[112, 4, 2, _STAGE_MUTABLE],
[128, 4, 1, _STAGE_MUTABLE],
[256, 4, 2, _STAGE_MUTABLE],
[432, 1, 1, _STAGE_MUTABLE]
]
norm_cfg = dict(type='BN')
supernet = dict(
type='ImageClassifier',
data_preprocessor=preprocess_cfg,
backbone=dict(
_scope_='mmrazor',
type='SearchableMobileNet',
first_channels=40,
last_channels=1728,
widen_factor=1.0,
norm_cfg=norm_cfg,
arch_setting=arch_setting),
neck=dict(type='GlobalAveragePooling'),
head=dict(
type='LinearClsHead',
num_classes=1000,
in_channels=1728,
loss=dict(
type='LabelSmoothLoss',
num_classes=1000,
label_smooth_val=0.1,
mode='original',
loss_weight=1.0),
topk=(1, 5),
),
)
mutator = dict(type='mmrazor.OneShotModuleMutator')
model = dict(
type='mmrazor.SPOS',
architecture=supernet,
mutator=mutator,
)
find_unused_parameters = True

View File

@ -1,59 +0,0 @@
_base_ = [
'../../_base_/datasets/mmcls/imagenet_bs128_colorjittor.py',
'../../_base_/schedules/mmcls/imagenet_bs1024_spos.py',
'../../_base_/mmcls_runtime.py'
]
norm_cfg = dict(type='BN')
model = dict(
type='mmcls.ImageClassifier',
backbone=dict(
type='SearchableShuffleNetV2', widen_factor=1.0, norm_cfg=norm_cfg),
neck=dict(type='GlobalAveragePooling'),
head=dict(
type='LinearClsHead',
num_classes=1000,
in_channels=1024,
loss=dict(
type='LabelSmoothLoss',
num_classes=1000,
label_smooth_val=0.1,
mode='original',
loss_weight=1.0),
topk=(1, 5),
),
)
mutator = dict(
type='OneShotModuleMutator',
placeholder_mapping=dict(
all_blocks=dict(
type='OneShotMutableOP',
choices=dict(
shuffle_3x3=dict(
type='ShuffleBlock', kernel_size=3, norm_cfg=norm_cfg),
shuffle_5x5=dict(
type='ShuffleBlock', kernel_size=5, norm_cfg=norm_cfg),
shuffle_7x7=dict(
type='ShuffleBlock', kernel_size=7, norm_cfg=norm_cfg),
shuffle_xception=dict(
type='ShuffleXception', norm_cfg=norm_cfg),
))))
algorithm = dict(
type='SPOS',
architecture=dict(
type='MMClsArchitecture',
model=model,
),
mutator=mutator,
distiller=None,
retraining=False,
)
runner = dict(max_iters=150000)
evaluation = dict(interval=1000, metric='accuracy')
# checkpoint saving
checkpoint_config = dict(interval=1000)
find_unused_parameters = True

View File

@ -0,0 +1,214 @@
# dataset settings
dataset_type = 'ImageNet'
preprocess_cfg = dict(
# RGB format normalization parameters
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
# convert image from BGR to RGB
to_rgb=True,
)
file_client_args = dict(
backend='petrel',
path_mapping=dict({
'./data/imagenet':
'sproject:s3://openmmlab/datasets/classification/imagenet',
'data/imagenet':
'sproject:s3://openmmlab/datasets/classification/imagenet'
}))
train_pipeline = [
dict(type='LoadImageFromFile', file_client_args=file_client_args),
dict(type='RandomResizedCrop', scale=224),
dict(type='ColorJitter', brightness=0.4, contrast=0.4, saturation=0.4),
dict(type='RandomFlip', prob=0.5, direction='horizontal'),
dict(type='PackClsInputs'),
]
test_pipeline = [
dict(type='LoadImageFromFile', file_client_args=file_client_args),
dict(type='ResizeEdge', scale=256, edge='short', backend='cv2'),
dict(type='CenterCrop', crop_size=224),
dict(type='PackClsInputs'),
]
train_dataloader = dict(
batch_size=128,
num_workers=5,
dataset=dict(
type=dataset_type,
data_root='/mnt/cache/share/images',
ann_file='meta/train.txt',
data_prefix='train',
pipeline=train_pipeline),
sampler=dict(type='DefaultSampler', shuffle=True),
persistent_workers=True,
)
# /mnt/lustre/share_data/wangjiaqi/data/imagenet',
val_dataloader = dict(
batch_size=128,
num_workers=5,
dataset=dict(
type=dataset_type,
data_root='/mnt/cache/share/images',
ann_file='meta/val.txt',
data_prefix='val',
pipeline=test_pipeline),
sampler=dict(type='DefaultSampler', shuffle=False),
persistent_workers=True,
)
val_evaluator = dict(type='Accuracy', topk=(1, 5))
# If you want standard test, please manually configure the test dataset
test_dataloader = val_dataloader
test_evaluator = val_evaluator
# scheduler
# optimizer
optim_wrapper = dict(
optimizer=dict(type='SGD', lr=0.5, momentum=0.9, weight_decay=4e-5),
clip_grad=None)
# leanring policy
param_scheduler = [
dict(type='PolyLR', power=1.0, eta_min=0.0, by_epoch=False, end=300000),
]
# train, val, test setting
train_cfg = dict(by_epoch=False, max_iters=300000)
val_cfg = dict()
test_cfg = dict()
# runtime
# defaults to use registries in mmrazor
default_scope = 'mmcls'
log_processor = dict(
window_size=100,
by_epoch=False,
custom_cfg=[
dict(
data_src='loss',
log_name='loss_large_window',
method_name='mean',
window_size=100)
])
# configure default hooks
default_hooks = dict(
# record the time of every iteration.
timer=dict(type='IterTimerHook'),
# print log every 100 iterations.
logger=dict(type='LoggerHook', interval=100),
# enable the parameter scheduler.
param_scheduler=dict(type='ParamSchedulerHook'),
# save checkpoint per epoch.
checkpoint=dict(
type='CheckpointHook',
by_epoch=False,
interval=10000,
save_last=True,
max_keep_ckpts=3),
# set sampler seed in distributed evrionment.
sampler_seed=dict(type='DistSamplerSeedHook'),
# validation results visualization, set True to enable it.
visualization=dict(type='VisualizationHook', enable=False),
)
# configure environment
env_cfg = dict(
# whether to enable cudnn benchmark
cudnn_benchmark=False,
# set multi process parameters
mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0),
# set distributed parameters
dist_cfg=dict(backend='nccl'),
)
# set visualizer
visualizer = None
# dict(type='ClsVisualizer', vis_backends=vis_backends, name='visualizer')
# vis_backends = [dict(type='LocalVisBackend')]
# set log level
log_level = 'INFO'
# load from which checkpoint
load_from = None
# "/mnt/lustre/dongpeijie/spos_shufflenetv2_subnet_8xb128_in1k_flops_0.33M_acc_73.87_20211222-1f0a0b4d.pth"
# whether to resume training from the loaded checkpoint
resume = False
# model
_STAGE_MUTABLE = dict(
_scope_='mmrazor',
type='OneShotMutableOP',
candidates=dict(
shuffle_3x3=dict(
type='ShuffleBlock', kernel_size=3, norm_cfg=dict(type='BN')),
shuffle_5x5=dict(
type='ShuffleBlock', kernel_size=5, norm_cfg=dict(type='BN')),
shuffle_7x7=dict(
type='ShuffleBlock', kernel_size=7, norm_cfg=dict(type='BN')),
shuffle_xception=dict(
type='ShuffleXception', norm_cfg=dict(type='BN')),
))
arch_setting = [
# Parameters to build layers. 3 parameters are needed to construct a
# layer, from left to right: channel, num_blocks, mutable_cfg.
[64, 4, _STAGE_MUTABLE],
[160, 4, _STAGE_MUTABLE],
[320, 8, _STAGE_MUTABLE],
[640, 4, _STAGE_MUTABLE],
]
norm_cfg = dict(type='BN')
supernet = dict(
type='ImageClassifier',
data_preprocessor=preprocess_cfg,
backbone=dict(
_scope_='mmrazor',
type='SearchableShuffleNetV2',
widen_factor=1.0,
norm_cfg=norm_cfg,
arch_setting=arch_setting),
neck=dict(type='GlobalAveragePooling'),
head=dict(
type='LinearClsHead',
num_classes=1000,
in_channels=1024,
loss=dict(
type='LabelSmoothLoss',
num_classes=1000,
label_smooth_val=0.1,
mode='original',
loss_weight=1.0),
topk=(1, 5),
),
)
mutator = dict(type='mmrazor.OneShotModuleMutator')
model = dict(
type='mmrazor.SPOS',
architecture=supernet,
mutator=mutator,
# fix_subnet='configs/nas/spos/SPOS_SHUFFLENETV2_330M_IN1k_PAPER_2.0.yaml'
)
find_unused_parameters = True

372
convert_keys.py 100644
View File

@ -0,0 +1,372 @@
from collections import OrderedDict
import torch
from mmengine.config import Config
from mmrazor.core import * # noqa: F401,F403
from mmrazor.models import * # noqa: F401,F403
from mmrazor.registry import MODELS
from mmrazor.utils import register_all_modules
def convert_spos_key(old_path, new_path):
old_dict = torch.load(old_path)
new_dict = {'meta': old_dict['meta'], 'state_dict': {}}
mapping = {
'choices': '_candidates',
'architecture.': '',
'model.': '',
}
for k, v in old_dict['state_dict'].items():
new_key = k
for _from, _to in mapping.items():
new_key = new_key.replace(_from, _to)
new_key = f'architecture.{new_key}'
new_dict['state_dict'][new_key] = v
torch.save(new_dict, new_path)
def convert_detnas_key(old_path, new_path):
old_dict = torch.load(old_path)
new_dict = {'meta': old_dict['meta'], 'state_dict': {}}
mapping = {
'choices': '_candidates',
'model.': '',
}
for k, v in old_dict['state_dict'].items():
new_key = k
for _from, _to in mapping.items():
new_key = new_key.replace(_from, _to)
new_dict['state_dict'][new_key] = v
torch.save(new_dict, new_path)
def convert_anglenas_key(old_path, new_path):
old_dict = torch.load(old_path)
new_dict = {'state_dict': {}}
mapping = {
'choices': '_candidates',
'model.': '',
'mbv2': 'mb',
}
for k, v in old_dict.items():
new_key = k
for _from, _to in mapping.items():
new_key = new_key.replace(_from, _to)
new_dict['state_dict'][new_key] = v
torch.save(new_dict, new_path)
def convert_darts_key(old_path, new_path):
old_dict = torch.load(old_path)
new_dict = {'meta': old_dict['meta'], 'state_dict': {}}
cfg = Config.fromfile(
'configs/nas/darts/darts_subnet_1xb96_cifar10_2.0.py')
# import ipdb; ipdb.set_trace()
model = MODELS.build(cfg.model)
print('============> module name')
for name, module in model.state_dict().items():
print(name)
mapping = {
'choices': '_candidates',
'model.': '',
'edges': 'route',
}
for k, v in old_dict['state_dict'].items():
new_key = k
for _from, _to in mapping.items():
new_key = new_key.replace(_from, _to)
# cells.0.nodes.0.edges.choices.normal_n2_p1.0.choices.sep_conv_3x3.conv1.2.weight
splited_list = new_key.split('.')
if len(splited_list) > 10 and splited_list[-6] == '0':
del splited_list[-6]
new_key = '.'.join(splited_list)
elif len(splited_list) > 10 and splited_list[-5] == '0':
del splited_list[-5]
new_key = '.'.join(splited_list)
new_dict['state_dict'][new_key] = v
print('============> new dict')
for key, v in new_dict['state_dict'].items():
print(key)
model.load_state_dict(new_dict['state_dict'], strict=True)
torch.save(new_dict, new_path)
def convert_cream_key(old_path, new_path):
old_dict = torch.load(old_path, map_location=torch.device('cpu'))
new_dict = {'state_dict': {}} # noqa: F841
ordered_old_dict = OrderedDict(old_dict['state_dict'])
cfg = Config.fromfile('configs/nas/cream/cream_14_subnet_mobilenet.py')
model = MODELS.build(cfg.model)
model_name_list = []
model_module_list = []
# TODO show structure of model and checkpoint
print('=' * 30, 'the key of model')
for k, v in model.state_dict().items():
print(k)
print('=' * 30, 'the key of ckpt')
for k, v in ordered_old_dict.items():
print(k)
# final mapping dict
mapping = {}
middle_razor2cream = { # noqa: F841
# point-wise expansion
'expand_conv.conv.weight': 'conv_pw.weight',
'expand_conv.bn.weight': 'bn1.weight',
'expand_conv.bn.bias': 'bn1.bias',
'expand_conv.bn.running_mean': 'bn1.running_mean',
'expand_conv.bn.running_var': 'bn1.running_var',
'expand_conv.bn.num_batches_tracked': 'bn1.num_batches_tracked',
# se
'se.conv1.conv.weight': 'se.conv_reduce.weight',
'se.conv1.conv.bias': 'se.conv_reduce.bias',
'se.conv2.conv.weight': 'se.conv_expand.weight',
'se.conv2.conv.bias': 'se.conv_expand.bias',
# depth-wise conv
'depthwise_conv.conv.weight': 'conv_dw.weight',
'depthwise_conv.bn.weight': 'bn2.weight',
'depthwise_conv.bn.bias': 'bn2.bias',
'depthwise_conv.bn.running_mean': 'bn2.running_mean',
'depthwise_conv.bn.running_var': 'bn2.running_var',
'depthwise_conv.bn.num_batches_tracked': 'bn2.num_batches_tracked',
# point-wise linear projection
'linear_conv.conv.weight': 'conv_pwl.weight',
'linear_conv.bn.weight': 'bn3.weight',
'linear_conv.bn.bias': 'bn3.bias',
'linear_conv.bn.running_mean': 'bn3.running_mean',
'linear_conv.bn.running_var': 'bn3.running_var',
'linear_conv.bn.num_batches_tracked': 'bn3.num_batches_tracked',
}
first_razor2cream = {
# for first depthsepconv dw
'conv_dw.conv.weight': 'conv_dw.weight',
'conv_dw.bn.weight': 'bn1.weight',
'conv_dw.bn.bias': 'bn1.bias',
'conv_dw.bn.running_mean': 'bn1.running_mean',
'conv_dw.bn.running_var': 'bn1.running_var',
'conv_dw.bn.num_batches_tracked': 'bn1.num_batches_tracked',
# for first depthsepconv pw
'conv_pw.conv.weight': 'conv_pw.weight',
'conv_pw.bn.weight': 'bn2.weight',
'conv_pw.bn.bias': 'bn2.bias',
'conv_pw.bn.running_mean': 'bn2.running_mean',
'conv_pw.bn.running_var': 'bn2.running_var',
'conv_pw.bn.num_batches_tracked': 'bn2.num_batches_tracked',
# se
'se.conv1.conv.weight': 'se.conv_reduce.weight',
'se.conv1.conv.bias': 'se.conv_reduce.bias',
'se.conv2.conv.weight': 'se.conv_expand.weight',
'se.conv2.conv.bias': 'se.conv_expand.bias',
}
last_razor2cream = {
# for last convbnact
'conv2.conv.weight': 'conv.weight',
'conv2.bn.weight': 'bn1.weight',
'conv2.bn.bias': 'bn1.bias',
'conv2.bn.running_mean': 'bn1.running_mean',
'conv2.bn.running_var': 'bn1.running_var',
'conv2.bn.num_batches_tracked': 'bn1.num_batches_tracked',
}
middle_cream2razor = {v: k for k, v in middle_razor2cream.items()}
first_cream2razor = {v: k for k, v in first_razor2cream.items()}
last_cream2razor = {v: k for k, v in last_razor2cream.items()}
# 1. group the razor's module names
grouped_razor_module_name = {
'middle': {},
'first': [],
'last': [],
}
for name, module in model.state_dict().items():
tmp_name: str = name.split(
'backbone.')[1] if 'backbone' in name else name
model_name_list.append(tmp_name)
model_module_list.append(module)
if 'conv1' in tmp_name and len(tmp_name) <= 35:
# belong to stem conv
grouped_razor_module_name['first'].append(name)
elif 'head' in tmp_name:
# belong to last linear
grouped_razor_module_name['last'].append(name)
else:
# middle
if tmp_name.startswith('layer'):
key_of_middle = tmp_name[5:8]
if key_of_middle not in grouped_razor_module_name['middle']:
grouped_razor_module_name['middle'][key_of_middle] = [name]
else:
grouped_razor_module_name['middle'][key_of_middle].append(
name)
elif tmp_name.startswith('conv2'):
key_of_middle = '7.0'
if key_of_middle not in grouped_razor_module_name['middle']:
grouped_razor_module_name['middle'][key_of_middle] = [name]
else:
grouped_razor_module_name['middle'][key_of_middle].append(
name)
# 2. group the cream's module names
grouped_cream_module_name = {
'middle': {},
'first': [],
'last': [],
}
for k in ordered_old_dict.keys():
if 'classifier' in k or 'conv_head' in k:
# last conv
grouped_cream_module_name['last'].append(k)
elif 'blocks' in k:
# middle blocks
key_of_middle = k[7:10]
if key_of_middle not in grouped_cream_module_name['middle']:
grouped_cream_module_name['middle'][key_of_middle] = [k]
else:
grouped_cream_module_name['middle'][key_of_middle].append(k)
else:
# first blocks
grouped_cream_module_name['first'].append(k)
# 4. process the first modules
for cream_item in grouped_cream_module_name['first']:
if 'conv_stem' in cream_item:
# get corresponding item from razor
for razor_item in grouped_razor_module_name['first']:
if 'conv.weight' in razor_item:
mapping[cream_item] = razor_item
grouped_razor_module_name['first'].remove(razor_item)
break
else:
kws = cream_item.split('.')[-1]
# get corresponding item from razor
for razor_item in grouped_razor_module_name['first']:
if kws in razor_item:
mapping[cream_item] = razor_item
grouped_razor_module_name['first'].remove(razor_item)
# 5. process the last modules
for cream_item in grouped_cream_module_name['last']:
if 'classifier' in cream_item:
kws = cream_item.split('.')[-1]
for razor_item in grouped_razor_module_name['last']:
if 'fc' in razor_item:
if kws in razor_item:
mapping[cream_item] = razor_item
grouped_razor_module_name['last'].remove(razor_item)
break
elif 'conv_head' in cream_item:
kws = cream_item.split('.')[-1]
for razor_item in grouped_razor_module_name['last']:
if 'head.conv2' in razor_item:
if kws in razor_item:
mapping[cream_item] = razor_item
grouped_razor_module_name['last'].remove(razor_item)
# 6. process the middle modules
for cream_group_id, cream_items in grouped_cream_module_name[
'middle'].items():
# get the corresponding group from razor
razor_group_id: str = str(float(cream_group_id) + 1)
razor_items: list = grouped_razor_module_name['middle'][razor_group_id]
if int(razor_group_id[0]) == 1:
key_cream2razor = first_cream2razor
elif int(razor_group_id[0]) == 7:
key_cream2razor = last_cream2razor
else:
key_cream2razor = middle_cream2razor
# matching razor items and cream items
for cream_item in cream_items:
# traverse all of key_cream2razor
for cream_match, razor_match in key_cream2razor.items():
if cream_match in cream_item:
# traverse razor_items to get the corresponding razor name
for razor_item in razor_items:
if razor_match in razor_item:
mapping[cream_item] = razor_item
break
print('=' * 100)
print('length of mapping: ', len(mapping.keys()))
for k, v in mapping.items():
print(k, '\t=>\t', v)
print('#' * 100)
# TODO DELETE this print
print('**' * 20)
for c, cm, r, rm in zip(ordered_old_dict.keys(), ordered_old_dict.values(),
model_name_list, model_module_list):
print(f'{c}: shape {cm.shape} => {r}: shape {rm.shape}')
print('**' * 20)
for k, v in ordered_old_dict.items():
print(f'Mapping from {k} to {mapping[k]}......')
new_dict['state_dict'][mapping[k]] = v
model.load_state_dict(new_dict['state_dict'], strict=True)
torch.save(new_dict, new_path)
if __name__ == '__main__':
register_all_modules(True)
# old_path = '/mnt/lustre/dongpeijie/detnas_subnet_shufflenetv2_8xb128_in1k_acc-74.08_20211223-92e9b66a.pth' # noqa: E501
# new_path = '/mnt/lustre/dongpeijie/detnas_subnet_shufflenetv2_8xb128_in1k_acc-74.08_20211223-92e9b66a_2.0.pth' # noqa: E501
# convert_spos_key(old_path, new_path)
# old_path = '/mnt/lustre/dongpeijie/detnas_subnet_frcnn_shufflenetv2_fpn_1x_coco_bbox_backbone_flops-0.34M_mAP-37.5_20211222-67fea61f.pth' # noqa: E501
# new_path = '/mnt/lustre/dongpeijie/detnas_subnet_frcnn_shufflenetv2_fpn_1x_coco_bbox_backbone_flops-0.34M_mAP-37.5_20211222-67fea61f_2.0.pth' # noqa: E501
# convert_detnas_key(old_path, new_path)
# old_path = './data/14.pth.tar'
# new_path = './data/14_2.0.pth'
# old_path = '/mnt/lustre/dongpeijie/14.pth.tar'
# new_path = '/mnt/lustre/dongpeijie/14_2.0.pth'
# convert_cream_key(old_path, new_path)
# old_path = '/mnt/lustre/dongpeijie/darts_subnetnet_1xb96_cifar10_acc-97.32_20211222-e5727921.pth' # noqa: E501
# new_path = '/mnt/lustre/dongpeijie/darts_subnetnet_1xb96_cifar10_acc-97.32_20211222-e5727921_2.0.pth' # noqa: E501
# convert_darts_key(old_path, new_path)
old_path = '/mnt/lustre/dongpeijie/spos_angelnas_flops_0.49G_acc_75.98_20220307-54f4698f.pth' # noqa: E501
new_path = '/mnt/lustre/dongpeijie/spos_angelnas_flops_0.49G_acc_75.98_20220307-54f4698f_2.0.pth' # noqa: E501
convert_anglenas_key(old_path, new_path)

View File

@ -0,0 +1,280 @@
# Copyright (c) OpenMMLab. All rights reserved.
import copy
from typing import Any, Dict, List, Optional, Union
import torch
from mmengine import BaseDataElement
from mmengine.model import BaseModel
from mmengine.optim import OptimWrapper, OptimWrapperDict
from torch import nn
from torch.nn.modules.batchnorm import _BatchNorm
from mmrazor.models.mutators import DiffModuleMutator
from mmrazor.models.subnet import (SINGLE_MUTATOR_RANDOM_SUBNET, FixSubnet,
FixSubnetMixin)
from mmrazor.registry import MODELS
from ..base import BaseAlgorithm, LossResults
VALID_FIX_SUBNET = Union[str, FixSubnet, Dict[str, Dict[str, Any]]]
@MODELS.register_module()
class Darts(BaseAlgorithm, FixSubnetMixin):
"""Implementation of `DARTS <https://arxiv.org/abs/1806.09055>`_
DARTS means Differentiable Architecture Search, a classic NAS algorithm.
:class:`Darts` implements the APIs required by the DARTS, as well as the
supernet training and subnet retraining logic for each iter.
Args:
architecture (dict|:obj:`BaseModel`): The config of :class:`BaseModel`
or built model. Corresponding to supernet in NAS algorithm.
mutator (dict|:obj:`DiffModuleMutator`): The config of
:class:`DiffModuleMutator` or built mutator.
fix_subnet (str | dict | :obj:`FixSubnet`): The path of yaml file or
loaded dict or built :obj:`FixSubnet`.
norm_training (bool): Whether to set norm layers to training mode,
namely, not freeze running stats (mean and var). Note: Effect on
Batch Norm and its variants only. Defaults to False.
data_preprocessor (dict, optional): The pre-process config of
:class:`BaseDataPreprocessor`. Defaults to None.
init_cfg (dict): Init config for ``BaseModule``.
Note:
Darts has two training mode: supernet training and subnet retraining.
If `fix_subnet` is None, it means supernet training.
If `fix_subnet` is not None, it means subnet training.
Note:
During supernet training, since each op is not fully trained, the
statistics of :obj:_BatchNorm are inaccurate. This problem affects the
evaluation of the performance of each subnet in the search phase. There
are usually two ways to solve this problem, both need to set
`norm_training` to True:
1) Using a large batch size, BNs use the mean and variance of the
current batch during forward.
2) Recalibrate the statistics of BN before searching.
"""
def __init__(self,
architecture: Union[BaseModel, Dict],
mutator: Optional[Union[DiffModuleMutator, Dict]] = None,
fix_subnet: Optional[VALID_FIX_SUBNET] = None,
unroll: bool = False,
norm_training: bool = False,
data_preprocessor: Optional[Union[dict, nn.Module]] = None,
init_cfg: Optional[dict] = None):
super().__init__(architecture, data_preprocessor, init_cfg)
# Darts has two training mode: supernet training and subnet retraining.
# fix_subnet is not None, means subnet retraining.
if fix_subnet:
# According to fix_subnet, delete the unchosen part of supernet
self.load_fix_subnet(fix_subnet, prefix='architecture.')
self.is_supernet = False
else:
assert mutator is not None, \
'mutator cannot be None when fix_subnet is None.'
if isinstance(mutator, DiffModuleMutator):
self.mutator = mutator
elif isinstance(mutator, dict):
self.mutator = MODELS.build(mutator)
else:
raise TypeError('mutator should be a `dict` or '
f'`DiffModuleMutator` instance, but got '
f'{type(mutator)}')
# Mutator is an essential component of the NAS algorithm. It
# provides some APIs commonly used by NAS.
# Before using it, you must do some preparations according to
# the supernet.
self.mutator.prepare_from_supernet(self.architecture)
self.is_supernet = True
self.norm_training = norm_training
self.unroll = unroll
def sample_subnet(self) -> SINGLE_MUTATOR_RANDOM_SUBNET:
"""Random sample subnet by mutator."""
return self.mutator.sample_choices()
def set_subnet(self, subnet: SINGLE_MUTATOR_RANDOM_SUBNET):
"""Set the subnet sampled by :meth:sample_subnet."""
self.mutator.set_choices(subnet)
def loss(
self,
batch_inputs: torch.Tensor,
data_samples: Optional[List[BaseDataElement]] = None,
) -> LossResults:
"""Calculate losses from a batch of inputs and data samples."""
if self.is_supernet:
random_subnet = self.sample_subnet()
self.set_subnet(random_subnet)
return self.architecture(batch_inputs, data_samples, mode='loss')
else:
return self.architecture(batch_inputs, data_samples, mode='loss')
def train(self, mode=True):
"""Convert the model into eval mode while keep normalization layer
unfreezed."""
super().train(mode)
if self.norm_training and not mode:
for module in self.architecture.modules():
if isinstance(module, _BatchNorm):
module.training = True
def train_step(self, data: List[dict],
optim_wrapper: OptimWrapper) -> Dict[str, torch.Tensor]:
"""The iteration step during training.
This method defines an iteration step during training, except for the
back propagation and optimizer updating, which are done in an optimizer
hook. Note that in some complicated cases or models, the whole process
including back propagation and optimizer updating are also defined in
this method, such as GAN.
Args:
data (dict): The output of dataloader.
optimizer (:obj:`torch.optim.Optimizer` | dict): The optimizer of
runner is passed to ``train_step()``. This argument is unused
and reserved.
Returns:
dict: It should contain at least 3 keys: ``loss``, ``log_vars``,
``num_samples``.
``loss`` is a tensor for back propagation, which can be a
weighted sum of multiple losses.
``log_vars`` contains all the variables to be sent to the
logger.
``num_samples`` indicates the batch size (when the model is
DDP, it means the batch size on each GPU), which is used for
averaging the logs.
"""
if isinstance(data, (tuple, list)) and isinstance(
optim_wrapper, OptimWrapperDict):
assert len(data) == len(optim_wrapper), \
f'The length of data {len(data)} should be equal to that of optimizers {len(optim_wrapper)}.' # noqa: E501
# TODO check the order of data
train_supernet_data, train_arch_data = data
# TODO mutator optimizer zero_grad
optim_wrapper.zero_grad()
if self.unroll:
self._unrolled_backward(train_arch_data, train_supernet_data,
optim_wrapper) # TODO optimizer
else:
# TODO process the input
arch_loss = self.loss(train_arch_data) # noqa: F841
# arch_loss.backward()
# TODO mutator optimizer step
optim_wrapper.step()
model_loss = self.loss(train_supernet_data)
# TODO optimizer architecture zero_grad
optim_wrapper.zero_grad()
# model_loss.backward()
nn.utils.clip_grad_norm_(
self.architecture.parameters(), max_norm=5, norm_type=2)
# TODO optimizer architecture step
optim_wrapper.step()
outputs = dict(
loss=model_loss,
num_samples=len(train_supernet_data['img'].data))
else:
outputs = super().train_step(data, optim_wrapper)
return outputs
def _unrolled_backward(self, train_arch_data, train_supernet_data,
optimizer):
"""Compute unrolled loss and backward its gradients."""
backup_params = copy.deepcopy(tuple(self.architecture.parameters()))
# do virtual step on training data
lr = optimizer['architecture'].param_groups[0]['lr']
momentum = optimizer['architecture'].param_groups[0]['momentum']
weight_decay = optimizer['architecture'].param_groups[0][
'weight_decay']
self._compute_virtual_model(train_supernet_data, lr, momentum,
weight_decay, optimizer)
# calculate unrolled loss on validation data
# keep gradients for model here for compute hessian
losses = self(**train_arch_data)
loss, _ = self._parse_losses(losses)
w_model, w_arch = tuple(self.architecture.parameters()), tuple(
self.mutator.parameters())
w_grads = torch.autograd.grad(loss, w_model + w_arch)
d_model, d_arch = w_grads[:len(w_model)], w_grads[len(w_model):]
# compute hessian and final gradients
hessian = self._compute_hessian(backup_params, d_model,
train_supernet_data)
with torch.no_grad():
for param, d, h in zip(w_arch, d_arch, hessian):
# gradient = dalpha - lr * hessian
param.grad = d - lr * h
# restore weights
self._restore_weights(backup_params)
def _compute_virtual_model(self, data, lr, momentum, weight_decay,
optimizer):
"""Compute unrolled weights w`"""
# don't need zero_grad, using autograd to calculate gradients
losses = self(**data)
loss, _ = self._parse_losses(losses)
gradients = torch.autograd.grad(loss, self.architecture.parameters())
with torch.no_grad():
for w, g in zip(self.architecture.parameters(), gradients):
m = optimizer['architecture'].state[w].get(
'momentum_buffer', 0.)
w = w - lr * (momentum * m + g + weight_decay * w)
def _restore_weights(self, backup_params):
with torch.no_grad():
for param, backup in zip(self.architecture.parameters(),
backup_params):
param.copy_(backup)
def _compute_hessian(self, backup_params, dw, data):
"""
dw = dw` { L_val(w`, alpha) }
w+ = w + eps * dw
w- = w - eps * dw
hessian = (dalpha { L_trn(w+, alpha) } \
- dalpha { L_trn(w-, alpha) }) / (2*eps)
eps = 0.01 / ||dw||
"""
self._restore_weights(backup_params)
norm = torch.cat([w.view(-1) for w in dw]).norm()
eps = 0.01 / norm
if norm < 1E-8:
print(
'In computing hessian, norm is smaller than 1E-8, \
cause eps to be %.6f.', norm.item())
dalphas = []
for e in [eps, -2. * eps]:
# w+ = w + eps*dw`, w- = w - eps*dw`
with torch.no_grad():
for p, d in zip(self.architecture.parameters(), dw):
p += e * d
losses = self(**data)
loss, _ = self._parse_losses(losses)
dalphas.append(
torch.autograd.grad(loss, tuple(self.mutator.parameters())))
# dalpha { L_trn(w+) }, # dalpha { L_trn(w-) }
dalpha_pos, dalpha_neg = dalphas
hessian = [(p - n) / (2. * eps)
for p, n in zip(dalpha_pos, dalpha_neg)]
return hessian

View File

@ -1,3 +1,4 @@
# Copyright (c) OpenMMLab. All rights reserved.
from .backbones import * # noqa: F401,F403
from .components import * # noqa: F401,F403
from .dynamic_op import * # noqa: F401,F403

View File

@ -4,6 +4,7 @@ from typing import Dict, List, Optional, Tuple, Union
import torch
import torch.nn as nn
from mmcls.models.backbones.base_backbone import BaseBackbone
from mmcv.cnn import build_activation_layer, build_norm_layer
from torch import Tensor
@ -126,12 +127,8 @@ class Node(nn.Module):
super().__init__()
edges = nn.ModuleDict()
for i in range(num_prev_nodes):
if i < num_downsample_nodes:
stride = 2
else:
stride = 1
edge_id = '{}_p{}'.format(node_id, i)
stride = 2 if i < num_downsample_nodes else 1
edge_id = f'{node_id}_p{i}'
module_kwargs = dict(
in_channels=channels,
@ -143,13 +140,14 @@ class Node(nn.Module):
mutable_cfg.update(alias=edge_id)
edges.add_module(edge_id, MODELS.build(mutable_cfg))
route_cfg.update(alias=node_id)
route_cfg.update(edges=edges)
self.edges = MODELS.build(route_cfg)
self.route = MODELS.build(route_cfg)
def forward(self, prev_nodes: Union[List[Tensor],
Tuple[Tensor]]) -> Tensor:
"""Forward with the previous nodes list."""
return self.edges(prev_nodes)
return self.route(prev_nodes)
class Cell(nn.Module):
@ -223,8 +221,7 @@ class Cell(nn.Module):
cur_tensor = node(tensors)
tensors.append(cur_tensor)
output = torch.cat(tensors[2:], dim=1)
return output
return torch.cat(tensors[2:], dim=1)
class AuxiliaryModule(nn.Module):
@ -263,7 +260,7 @@ class AuxiliaryModule(nn.Module):
@MODELS.register_module()
class DartsBackbone(nn.Module, FixSubnetMixin):
class DartsBackbone(BaseBackbone, FixSubnetMixin):
"""Backbone of Differentiable Architecture Search (DARTS).
Args:
@ -348,7 +345,7 @@ class DartsBackbone(nn.Module, FixSubnetMixin):
prev_reduction, reduction = reduction, False
# Reduce featuremap size and double channels in 1/3
# and 2/3 layer.
if i == self.num_layers // 3 or i == 2 * self.num_layers // 3:
if i in [self.num_layers // 3, 2 * self.num_layers // 3]:
self.out_channels *= 2
reduction = True

View File

@ -46,7 +46,7 @@ class SearchableMobileNet(BaseBackbone, FixSubnetMixin):
Excamples:
>>> mutable_cfg = dict(
... type='OneShotMutableOP',
... candidate_ops=dict(
... candidates=dict(
... mb_k3e1=dict(
... type='MBBlock',
... kernel_size=3,
@ -87,7 +87,7 @@ class SearchableMobileNet(BaseBackbone, FixSubnetMixin):
]
) -> None:
for index in out_indices:
if index not in range(0, 8):
if index not in range(8):
raise ValueError('the item in out_indices must in '
f'range(0, 8). But received {index}')
@ -147,6 +147,7 @@ class SearchableMobileNet(BaseBackbone, FixSubnetMixin):
conv_cfg=self.conv_cfg,
norm_cfg=self.norm_cfg,
act_cfg=self.act_cfg)
self.add_module('conv2', layer)
self.layers.append('conv2')

View File

@ -48,7 +48,7 @@ class SearchableShuffleNetV2(BaseBackbone, FixSubnetMixin):
Excamples:
>>> mutable_cfg = dict(
... type='OneShotMutableOP',
... candidate_ops=dict(
... candidates=dict(
... shuffle_3x3=dict(
... type='ShuffleBlock',
... kernel_size=3,

View File

@ -0,0 +1,4 @@
# Copyright (c) OpenMMLab. All rights reserved.
from .heads import CreamClsHead
__all__ = ['CreamClsHead']

View File

@ -0,0 +1,4 @@
# Copyright (c) OpenMMLab. All rights reserved.
from .cream_head import CreamClsHead
__all__ = ['CreamClsHead']

View File

@ -0,0 +1,72 @@
# Copyright (c) OpenMMLab. All rights reserved.
from typing import Dict, Optional, Tuple
from mmcls.models.heads import LinearClsHead
from mmcv.cnn import ConvModule
from torch import Tensor, nn
from mmrazor.registry import MODELS
@MODELS.register_module()
class CreamClsHead(LinearClsHead):
"""Linear classifier head for cream.
Args:
num_classes (int): Number of categories excluding the background
category.
in_channels (int): Number of channels in the input feature map.
num_features (int): Number of features in the conv2d.
act_cfg (dict): Config dict for activation layer.
Default: dict(type='ReLU6').
init_cfg (dict, optional): the config to control the initialization.
Defaults to ``dict(type='Normal', layer='Linear', std=0.01)``.
"""
def __init__(self,
num_classes: int,
in_channels: int,
num_features: int = 1280,
act_cfg: Dict = dict(type='ReLU6'),
init_cfg: Optional[dict] = dict(
type='Normal', layer='Linear', std=0.01),
**kwargs):
super().__init__(
num_classes=num_classes,
in_channels=in_channels,
init_cfg=init_cfg,
**kwargs)
layer = ConvModule(
in_channels=self.in_channels,
out_channels=num_features,
kernel_size=1,
stride=1,
padding=0,
conv_cfg=None,
norm_cfg=None,
act_cfg=act_cfg)
self.add_module('conv2', layer)
self.fc = nn.Linear(num_features, self.num_classes)
# def pre_logits(self, feats: Tuple[Tensor]) -> Tensor:
# """The process before the final classification head.
# The input ``feats`` is a tuple of tensor, and each tensor is the
# feature of a backbone stage. In ``LinearClsHead``, we just obtain the
# feature of the last stage.
# """
# # The LinearClsHead doesn't have other module, just return after
# # unpacking.
# return feats[-1]
def forward(self, feats: Tuple[Tensor]) -> Tensor:
"""The forward process."""
logits = self.pre_logits(feats)
logits = logits.unsqueeze(-1).unsqueeze(-1)
logits = self.conv2(logits)
logits = logits.flatten(1)
return self.fc(logits)

View File

@ -99,7 +99,7 @@ class DiffMutableOP(DiffMutableModule[str, str]):
DARTS. Search the best module by learnable parameters `arch_param`.
Args:
candidate_ops (dict[str, dict]): the configs for the candidate
candidates (dict[str, dict]): the configs for the candidate
operations.
module_kwargs (dict[str, dict], optional): Module initialization named
arguments. Defaults to None.
@ -110,23 +110,29 @@ class DiffMutableOP(DiffMutableModule[str, str]):
and `Pretrained`.
"""
def __init__(self, candidate_ops: Dict[str, Dict], **kwargs) -> None:
super().__init__(**kwargs)
assert len(candidate_ops) >= 1, \
def __init__(
self,
candidates: Dict[str, Dict],
module_kwargs: Optional[Dict[str, Dict]] = None,
alias: Optional[str] = None,
init_cfg: Optional[Dict] = None,
) -> None:
super().__init__(
module_kwargs=module_kwargs, alias=alias, init_cfg=init_cfg)
assert len(candidates) >= 1, \
f'Number of candidate op must greater than or equal to 1, ' \
f'but got: {len(candidate_ops)}'
f'but got: {len(candidates)}'
self._is_fixed = False
self._candidate_ops = self._build_ops(candidate_ops,
self.module_kwargs)
self._candidates = self._build_ops(candidates, self.module_kwargs)
@staticmethod
def _build_ops(candidate_ops: Dict[str, Dict],
def _build_ops(candidates: Dict[str, Dict],
module_kwargs: Optional[Dict[str, Dict]]) -> nn.ModuleDict:
"""Build candidate operations based on candidate_ops configures.
"""Build candidate operations based on candidates configures.
Args:
candidate_ops (dict[str, dict]): the configs for the candidate
candidates (dict[str, dict]): the configs for the candidate
operations.
module_kwargs (dict[str, dict], optional): Module initialization
named arguments.
@ -137,7 +143,7 @@ class DiffMutableOP(DiffMutableModule[str, str]):
is the corresponding candidate operation.
"""
ops = nn.ModuleDict()
for name, op_cfg in candidate_ops.items():
for name, op_cfg in candidates.items():
assert name not in ops
if module_kwargs is not None:
op_cfg.update(module_kwargs)
@ -154,7 +160,7 @@ class DiffMutableOP(DiffMutableModule[str, str]):
Returns:
Tensor: the result of forward the fixed operation.
"""
return self._candidate_ops[self._chosen](x)
return sum(self._candidates[choice](x) for choice in self._chosen)
def forward_arch_param(self,
x: Any,
@ -180,7 +186,7 @@ class DiffMutableOP(DiffMutableModule[str, str]):
# forward based on probs
outputs = list()
for prob, module in zip(probs, self._candidate_ops.values()):
for prob, module in zip(probs, self._candidates.values()):
if prob > 0.:
outputs.append(prob * module(x))
@ -197,11 +203,11 @@ class DiffMutableOP(DiffMutableModule[str, str]):
Tensor: the result of forward all of the ``choice`` operation.
"""
outputs = list()
for op in self._candidate_ops.values():
for op in self._candidates.values():
outputs.append(op(x))
return sum(outputs)
def fix_chosen(self, chosen: str) -> None:
def fix_chosen(self, chosen: Union[str, List[str]]) -> None:
"""Fix mutable with `choice`. This operation would convert `unfixed`
mode to `fixed` mode. The :attr:`is_fixed` will be set to True and only
the selected operations can be retained.
@ -215,9 +221,12 @@ class DiffMutableOP(DiffMutableModule[str, str]):
'The mode of current MUTABLE is `fixed`. '
'Please do not call `fix_chosen` function again.')
if isinstance(chosen, str):
chosen = [chosen]
for c in self.choices:
if c != chosen:
self._candidate_ops.pop(c)
if c not in chosen:
self._candidates.pop(c)
self._chosen = chosen
self.is_fixed = True
@ -225,7 +234,7 @@ class DiffMutableOP(DiffMutableModule[str, str]):
@property
def choices(self) -> List[str]:
"""list: all choices. """
return list(self._candidate_ops.keys())
return list(self._candidates.keys())
@MODELS.register_module()
@ -241,6 +250,7 @@ class DiffChoiceRoute(DiffMutableModule[str, List[str]]):
with_arch_param (bool): whether forward with arch_param. When set to
`True`, a differentiable way is adopted. When set to `False`,
a non-differentiable way is adopted.
alias (str, optional): alias of the `DiffChoiceRoute`.
init_cfg (dict, optional): initialization configuration dict for
``BaseModule``. OpenMMLab has implement 6 initializers including
`Constant`, `Xavier`, `Normal`, `Uniform`, `Kaiming`,
@ -274,16 +284,17 @@ class DiffChoiceRoute(DiffMutableModule[str, List[str]]):
self,
edges: nn.ModuleDict,
with_arch_param: bool = False,
alias: Optional[str] = None,
init_cfg: Optional[Dict] = None,
) -> None:
super().__init__(init_cfg=init_cfg)
super().__init__(alias=alias, init_cfg=init_cfg)
assert len(edges) >= 1, \
f'Number of edges must greater than or equal to 1, ' \
f'but got: {len(edges)}'
self._with_arch_param = with_arch_param
self._is_fixed = False
self._edges: nn.ModuleDict = edges
self._candidates: nn.ModuleDict = edges
def forward_fixed(self, inputs: Union[List, Tuple]) -> Tensor:
"""Forward when the mutable is in `fixed` mode.
@ -302,7 +313,7 @@ class DiffChoiceRoute(DiffMutableModule[str, List[str]]):
outputs = list()
for choice, x in zip(self._unfixed_choices, inputs):
if choice in self._chosen:
outputs.append(self._edges[choice](x))
outputs.append(self._candidates[choice](x))
return sum(outputs)
def forward_arch_param(self,
@ -319,15 +330,16 @@ class DiffChoiceRoute(DiffMutableModule[str, List[str]]):
Returns:
Tensor: the result of forward with ``arch_param``.
"""
assert len(x) == len(self._edges), \
f'Length of `edges` {len(self._edges)} should be same as ' \
f'the length of inputs {len(x)}.'
assert len(x) == len(self._candidates), \
f'Length of `edges` {len(self._candidates)} should be ' \
f'same as the length of inputs {len(x)}.'
if self._with_arch_param:
probs = self.compute_arch_probs(arch_param=arch_param)
outputs = list()
for prob, module, input in zip(probs, self._edges.values(), x):
for prob, module, input in zip(probs, self._candidates.values(),
x):
if prob > 0:
# prob may equal to 0 in gumbel softmax.
outputs.append(prob * module(input))
@ -346,12 +358,12 @@ class DiffChoiceRoute(DiffMutableModule[str, List[str]]):
Returns:
Tensor: the result of forward all of the ``choice`` operation.
"""
assert len(x) == len(self._edges), \
f'Lenght of edges {len(self._edges)} should be same as ' \
assert len(x) == len(self._candidates), \
f'Lenght of edges {len(self._candidates)} should be same as ' \
f'the length of inputs {len(x)}.'
outputs = list()
for op, input in zip(self._edges.values(), x):
for op, input in zip(self._candidates.values(), x):
outputs.append(op(input))
return sum(outputs)
@ -373,7 +385,7 @@ class DiffChoiceRoute(DiffMutableModule[str, List[str]]):
for c in self.choices:
if c not in chosen:
self._edges.pop(c)
self._candidates.pop(c)
self._chosen = chosen
self.is_fixed = True
@ -381,7 +393,7 @@ class DiffChoiceRoute(DiffMutableModule[str, List[str]]):
@property
def choices(self) -> List[CHOSEN_TYPE]:
"""list: all choices. """
return list(self._edges.keys())
return list(self._candidates.keys())
@MODELS.register_module()
@ -413,10 +425,14 @@ class GumbelChoiceRoute(DiffChoiceRoute):
tau: float = 1.0,
hard: bool = True,
with_arch_param: bool = False,
alias: Optional[str] = None,
init_cfg: Optional[Dict] = None,
) -> None:
super().__init__(
edges=edges, with_arch_param=with_arch_param, init_cfg=init_cfg)
edges=edges,
with_arch_param=with_arch_param,
alias=alias,
init_cfg=init_cfg)
self.tau = tau
self.hard = hard

View File

@ -100,7 +100,7 @@ class OneShotMutableOP(OneShotMutableModule[str, str]):
blocks.
Args:
candidate_ops (dict[str, dict]): the configs for the candidate
candidates (dict[str, dict]): the configs for the candidate
operations.
module_kwargs (dict[str, dict], optional): Module initialization named
arguments. Defaults to None.
@ -114,13 +114,13 @@ class OneShotMutableOP(OneShotMutableModule[str, str]):
>>> import torch
>>> from mmrazor.models.mutables import OneShotMutableOP
>>> candidate_ops = nn.ModuleDict({
>>> candidates = nn.ModuleDict({
... 'conv3x3': nn.Conv2d(32, 32, 3, 1, 1),
... 'conv5x5': nn.Conv2d(32, 32, 5, 1, 2),
... 'conv7x7': nn.Conv2d(32, 32, 7, 1, 3)})
>>> input = torch.randn(1, 32, 64, 64)
>>> op = OneShotMutableOP(candidate_ops)
>>> op = OneShotMutableOP(candidates)
>>> op.choices
['conv3x3', 'conv5x5', 'conv7x7']
@ -131,7 +131,7 @@ class OneShotMutableOP(OneShotMutableModule[str, str]):
>>> op.current_choice = 'conv3x3'
>>> unfix_output = op.forward(input)
>>> torch.all(unfixed_output == candidate_ops['conv3x3'](input))
>>> torch.all(unfixed_output == candidates['conv3x3'](input))
True
>>> op.fix_chosen('conv3x3')
@ -147,36 +147,41 @@ class OneShotMutableOP(OneShotMutableModule[str, str]):
True
"""
def __init__(self, candidate_ops: Union[Dict[str, Dict], nn.ModuleDict],
**kwargs) -> None:
super().__init__(**kwargs)
assert len(candidate_ops) >= 1, \
def __init__(
self,
candidates: Union[Dict[str, Dict], nn.ModuleDict],
module_kwargs: Optional[Dict[str, Dict]] = None,
alias: Optional[str] = None,
init_cfg: Optional[Dict] = None,
) -> None:
super().__init__(
module_kwargs=module_kwargs, alias=alias, init_cfg=init_cfg)
assert len(candidates) >= 1, \
f'Number of candidate op must greater than 1, ' \
f'but got: {len(candidate_ops)}'
f'but got: {len(candidates)}'
self._chosen: Optional[str] = None
if isinstance(candidate_ops, dict):
self._candidate_ops = self._build_ops(candidate_ops,
self.module_kwargs)
elif isinstance(candidate_ops, nn.ModuleDict):
self._candidate_ops = candidate_ops
if isinstance(candidates, dict):
self._candidates = self._build_ops(candidates, self.module_kwargs)
elif isinstance(candidates, nn.ModuleDict):
self._candidates = candidates
else:
raise TypeError('candidata_ops should be a `dict` or '
f'`nn.ModuleDict` instance, but got '
f'{type(candidate_ops)}')
f'{type(candidates)}')
assert len(self._candidate_ops) >= 1, \
assert len(self._candidates) >= 1, \
f'Number of candidate op must greater than or equal to 1, ' \
f'but got {len(self._candidate_ops)}'
f'but got {len(self._candidates)}'
@staticmethod
def _build_ops(
candidate_ops: Union[Dict[str, Dict], nn.ModuleDict],
candidates: Union[Dict[str, Dict], nn.ModuleDict],
module_kwargs: Optional[Dict[str, Dict]] = None) -> nn.ModuleDict:
"""Build candidate operations based on choice configures.
Args:
candidate_ops (dict[str, dict] | :obj:`nn.ModuleDict`): the configs
candidates (dict[str, dict] | :obj:`nn.ModuleDict`): the configs
for the candidate operations or nn.ModuleDict.
module_kwargs (dict[str, dict], optional): Module initialization
named arguments.
@ -186,11 +191,11 @@ class OneShotMutableOP(OneShotMutableModule[str, str]):
the name of each choice in configs and the value of ``ops``
is the corresponding candidate operation.
"""
if isinstance(candidate_ops, nn.ModuleDict):
return candidate_ops
if isinstance(candidates, nn.ModuleDict):
return candidates
ops = nn.ModuleDict()
for name, op_cfg in candidate_ops.items():
for name, op_cfg in candidates.items():
assert name not in ops
if module_kwargs is not None:
op_cfg.update(module_kwargs)
@ -207,7 +212,7 @@ class OneShotMutableOP(OneShotMutableModule[str, str]):
Returns:
Tensor: the result of forward the fixed operation.
"""
return self._candidate_ops[self._chosen](x)
return self._candidates[self._chosen](x)
def forward_choice(self, x: Any, choice: str) -> Tensor:
"""Forward with the `unfixed` mutable and current choice is not None.
@ -221,7 +226,7 @@ class OneShotMutableOP(OneShotMutableModule[str, str]):
Tensor: the result of forward the ``choice`` operation.
"""
assert isinstance(choice, str) and choice in self.choices
return self._candidate_ops[choice](x)
return self._candidates[choice](x)
def forward_all(self, x: Any) -> Tensor:
"""Forward all choices. Used to calculate FLOPs.
@ -233,7 +238,9 @@ class OneShotMutableOP(OneShotMutableModule[str, str]):
Returns:
Tensor: the result of forward all of the ``choice`` operation.
"""
outputs = [op(x) for op in self._candidate_ops.values()]
outputs = list()
for op in self._candidates.values():
outputs.append(op(x))
return sum(outputs)
def fix_chosen(self, chosen: str) -> None:
@ -251,7 +258,7 @@ class OneShotMutableOP(OneShotMutableModule[str, str]):
for c in self.choices:
if c != chosen:
self._candidate_ops.pop(c)
self._candidates.pop(c)
self._chosen = chosen
self.is_fixed = True
@ -263,7 +270,7 @@ class OneShotMutableOP(OneShotMutableModule[str, str]):
@property
def choices(self) -> List[str]:
"""list: all choices. """
return list(self._candidate_ops.keys())
return list(self._candidates.keys())
@property
def num_choices(self):
@ -275,7 +282,7 @@ class OneShotProbMutableOP(OneShotMutableOP):
"""Sampling candidate operation according to probability.
Args:
candidate_ops (dict[str, dict]): the configs for the candidate
candidates (dict[str, dict]): the configs for the candidate
operations.
choice_probs (list): the probability of sampling each
candidate operation.
@ -289,13 +296,13 @@ class OneShotProbMutableOP(OneShotMutableOP):
"""
def __init__(self,
candidate_ops: Dict[str, Dict],
candidates: Dict[str, Dict],
choice_probs: list = None,
module_kwargs: Optional[Dict[str, Dict]] = None,
alias: Optional[str] = None,
init_cfg: Optional[Dict] = None) -> None:
super().__init__(
candidate_ops=candidate_ops,
candidates=candidates,
module_kwargs=module_kwargs,
alias=alias,
init_cfg=init_cfg)
@ -306,5 +313,7 @@ class OneShotProbMutableOP(OneShotMutableOP):
def sample_choice(self) -> str:
"""Sampling with probabilities."""
assert len(self.choice_probs) == len(self._candidate_ops.keys())
return random.choices(self.choices, weights=self.choice_probs, k=1)[0]
assert len(self.choice_probs) == len(self._candidates.keys())
choice = random.choices(
self.choices, weights=self.choice_probs, k=1)[0]
return choice

View File

@ -2,10 +2,12 @@
from .common import Identity
from .darts_series import (DartsDilConv, DartsPoolBN, DartsSepConv,
DartsSkipConnect, DartsZero)
from .efficientnet_series import ConvBnAct, DepthwiseSeparableConv
from .mobilenet_series import MBBlock
from .shufflenet_series import ShuffleBlock, ShuffleXception
__all__ = [
'ShuffleBlock', 'ShuffleXception', 'DartsPoolBN', 'DartsDilConv',
'DartsSepConv', 'DartsSkipConnect', 'DartsZero', 'MBBlock', 'Identity'
'DartsSepConv', 'DartsSkipConnect', 'DartsZero', 'MBBlock', 'Identity',
'ConvBnAct', 'DepthwiseSeparableConv'
]

View File

@ -27,10 +27,7 @@ class DartsPoolBN(BaseOP):
self.kernel_size, self.stride, 1, count_include_pad=False)
self.bn = build_norm_layer(self.norm_cfg, self.out_channels)[1]
if use_drop_path:
self.drop_path = DropPath()
else:
self.drop_path = None
self.drop_path = DropPath() if use_drop_path else None
def forward(self, x):
out = self.pool(x)
@ -69,10 +66,7 @@ class DartsDilConv(BaseOP):
self.in_channels, self.out_channels, 1, stride=1, bias=False),
build_norm_layer(self.norm_cfg, self.in_channels)[1])
if use_drop_path:
self.drop_path = DropPath()
else:
self.drop_path = None
self.drop_path = DropPath() if use_drop_path else None
def forward(self, x):
out = self.conv1(x)
@ -122,10 +116,7 @@ class DartsSepConv(BaseOP):
self.out_channels, self.out_channels, 1, stride=1, bias=False),
build_norm_layer(self.norm_cfg, self.out_channels)[1])
if use_drop_path:
self.drop_path = DropPath()
else:
self.drop_path = None
self.drop_path = DropPath() if use_drop_path else None
def forward(self, x):
out = self.conv1(x)
@ -163,10 +154,7 @@ class DartsSkipConnect(BaseOP):
bias=False)
self.bn = build_norm_layer(self.norm_cfg, self.out_channels)[1]
if use_drop_path:
self.drop_path = DropPath()
else:
self.drop_path = None
self.drop_path = DropPath() if use_drop_path else None
def forward(self, x):
if self.stride > 1:

View File

@ -0,0 +1,160 @@
# Copyright (c) OpenMMLab. All rights reserved.
from typing import Dict, Optional
import torch.nn as nn
from mmcls.models.utils import SELayer
from mmcv.cnn import ConvModule
from mmrazor.registry import MODELS
from .base import BaseOP
@MODELS.register_module()
class ConvBnAct(BaseOP):
"""ConvBnAct block from timm.
Args:
in_channels (int): number of in channels.
out_channels (int): number of out channels.
kernel_size (int): kernel size of convolution.
stride (int, optional): stride of convolution. Defaults to 1.
dilation (int, optional): dilation rate of convolution. Defaults to 1.
padding (int, optional): padding size of convolution. Defaults to 0.
skip (bool, optional): whether using skip connect. Defaults to False.
conv_cfg (Optional[dict], optional): Config dict for convolution layer.
Default: None, which means using conv2d.
norm_cfg (Dict, optional): Config dict for normalization layer.
Default: dict(type='BN').
act_cfg (Dict, optional):Config dict for activation layer.
Default: dict(type='ReLU').
"""
def __init__(self,
in_channels: int,
out_channels: int,
kernel_size: int,
stride: int = 1,
dilation: int = 1,
padding: int = 0,
skip: bool = False,
conv_cfg: Optional[dict] = None,
se_cfg: Dict = None,
norm_cfg: Dict = dict(type='BN'),
act_cfg: Dict = dict(type='ReLU')):
super().__init__(
in_channels=in_channels, out_channels=out_channels, stride=stride)
self.has_residual = skip and stride == 1 \
and in_channels == out_channels
self.with_se = se_cfg is not None
if self.with_se:
assert isinstance(se_cfg, dict)
self.se = SELayer(self.out_channels, **se_cfg)
self.convModule = ConvModule(
in_channels=in_channels,
out_channels=out_channels,
kernel_size=kernel_size,
stride=stride,
dilation=dilation,
padding=padding,
conv_cfg=conv_cfg,
norm_cfg=norm_cfg,
act_cfg=act_cfg)
def forward(self, x):
"""Forward function."""
shortcut = x
x = self.convModule(x)
if self.has_residual:
x += shortcut
return x
@MODELS.register_module()
class DepthwiseSeparableConv(BaseOP):
"""DepthwiseSeparable block Used for DS convs in MobileNet-V1 and in the
place of IR blocks that have no expansion (factor of 1.0). This is an
alternative to having a IR with an optional first pw conv.
Args:
in_channels (int): number of in channels.
out_channels (int): number of out channels.
dw_kernel_size (int, optional): the kernel size of depth-wise
convolution. Defaults to 3.
stride (int, optional): stride of convolution.
Defaults to 1.
dilation (int, optional): dilation rate of convolution.
Defaults to 1.
noskip (bool, optional): whether use skip connection.
Defaults to False.
pw_kernel_size (int, optional): kernel size of point wise convolution.
Defaults to 1.
pw_act (bool, optional): whether using activation in point-wise
convolution. Defaults to False.
se_cfg (Dict, optional): _description_. Defaults to None.
conv_cfg (Optional[dict], optional): Config dict for convolution layer.
Default: None, which means using conv2d.
norm_cfg (Dict, optional): Config dict for normalization layer.
Default: dict(type='BN').
act_cfg (Dict, optional):Config dict for activation layer.
Default: dict(type='ReLU').
"""
def __init__(self,
in_channels: int,
out_channels: int,
dw_kernel_size: int = 3,
stride: int = 1,
dilation: int = 1,
noskip: bool = False,
pw_kernel_size: int = 1,
pw_act: bool = False,
conv_cfg: Optional[dict] = None,
se_cfg: Dict = None,
norm_cfg: Dict = dict(type='BN'),
act_cfg: Dict = dict(type='ReLU')):
super().__init__(
in_channels=in_channels, out_channels=out_channels, stride=stride)
self.has_residual = (stride == 1
and in_channels == out_channels) and not noskip
self.has_pw_act = pw_act # activation after point-wise conv
self.se_cfg = se_cfg
self.conv_dw = ConvModule(
in_channels=in_channels,
out_channels=in_channels,
kernel_size=dw_kernel_size,
stride=stride,
dilation=dilation,
padding=dw_kernel_size // 2,
groups=in_channels,
conv_cfg=conv_cfg,
norm_cfg=norm_cfg,
act_cfg=act_cfg,
)
# Squeeze-and-excitation
self.se = SELayer(out_channels, **
se_cfg) if self.se_cfg else nn.Identity()
self.conv_pw = ConvModule(
in_channels=in_channels,
out_channels=out_channels,
kernel_size=pw_kernel_size,
padding=pw_kernel_size // 2,
conv_cfg=conv_cfg,
norm_cfg=norm_cfg,
act_cfg=act_cfg if self.has_pw_act else None,
)
def forward(self, x):
shortcut = x
x = self.conv_dw(x)
x = self.se(x)
x = self.conv_pw(x)
if self.has_residual:
x += shortcut
return x

View File

@ -65,10 +65,10 @@ class FlopsEstimator:
... def __init__(self) -> None:
... super().__init__()
...
... candidate_ops = nn.ModuleDict({
... candidates = nn.ModuleDict({
... 'conv3x3': nn.Conv2d(3, 32, 3),
... 'conv5x5': nn.Conv2d(3, 32, 5)})
... self.op = OneShotMutableOP(candidate_ops)
... self.op = OneShotMutableOP(candidates)
... self.op.current_choice = 'conv3x3'
...
... def forward(self, x: Tensor) -> Tensor:

View File

@ -90,12 +90,19 @@ class FixSubnetMixin:
# In the corresponding mutable, it will check whether the `chosen`
# format is correct.
if isinstance(module, BaseMutable):
mutable_name = name.lstrip(prefix)
assert mutable_name in fix_modules, \
f'{mutable_name} is not in fix_modules {fix_modules}, '\
'please check your `fix_subnet`.'
chosen = fix_modules.get(mutable_name, None)
if getattr(module, 'alias', None):
alias = module.alias
assert alias in fix_modules, \
f'The alias {alias} is not in fix_modules ' \
f'{fix_modules}, please check your `fix_subnet`.'
chosen = fix_modules.get(alias, None)
else:
mutable_name = name.lstrip(prefix)
assert mutable_name in fix_modules, \
f'The module name {mutable_name} is not in ' \
f'fix_modules {fix_modules} ' \
'please check your `fix_subnet`.'
chosen = fix_modules.get(mutable_name, None)
module.fix_chosen(chosen)
# TODO support load fix channels after mr #29 merged

View File

@ -19,7 +19,7 @@ class TestDartsBackbone(TestCase):
def setUp(self) -> None:
self.mutable_cfg = dict(
type='DiffMutableOP',
candidate_ops=dict(
candidates=dict(
torch_conv2d_3x3=dict(
type='torchConv2d',
kernel_size=3,
@ -96,17 +96,17 @@ class TestDartsBackbone(TestCase):
tmp_dict = dict()
for key, _ in model.named_modules():
node_type = key.split('._candidate_ops')[0].split('.')[-1].split(
node_type = key.split('._candidates')[0].split('.')[-1].split(
'_')[0]
if node_type not in ['normal', 'reduce']:
# not supported type
continue
node_name = key.split('._candidate_ops')[0].split('.')[-1]
node_name = key.split('._candidates')[0].split('.')[-1]
if node_name not in tmp_dict.keys():
tmp_dict[node_name] = [key.split('._candidate_ops')[0]]
tmp_dict[node_name] = [key.split('._candidates')[0]]
else:
current_key = key.split('._candidate_ops')[0]
current_key = key.split('._candidates')[0]
if current_key not in tmp_dict[node_name]:
tmp_dict[node_name].append(current_key)

View File

@ -18,7 +18,7 @@ class TestDiffOP(TestCase):
def test_forward_arch_param(self):
op_cfg = dict(
type='DiffMutableOP',
candidate_ops=dict(
candidates=dict(
torch_conv2d_3x3=dict(
type='torchConv2d',
kernel_size=3,
@ -56,7 +56,7 @@ class TestDiffOP(TestCase):
def test_forward_fixed(self):
op_cfg = dict(
type='DiffMutableOP',
candidate_ops=dict(
candidates=dict(
torch_conv2d_3x3=dict(
type='torchConv2d',
kernel_size=3,
@ -84,7 +84,7 @@ class TestDiffOP(TestCase):
def test_forward(self):
op_cfg = dict(
type='DiffMutableOP',
candidate_ops=dict(
candidates=dict(
torch_conv2d_3x3=dict(
type='torchConv2d',
kernel_size=3,
@ -119,7 +119,7 @@ class TestDiffOP(TestCase):
def test_property(self):
op_cfg = dict(
type='DiffMutableOP',
candidate_ops=dict(
candidates=dict(
torch_conv2d_3x3=dict(
type='torchConv2d',
kernel_size=3,
@ -158,7 +158,7 @@ class TestDiffOP(TestCase):
def test_module_kwargs(self):
op_cfg = dict(
type='DiffMutableOP',
candidate_ops=dict(
candidates=dict(
torch_conv2d_3x3=dict(
type='torchConv2d',
kernel_size=3,

View File

@ -15,7 +15,7 @@ class TestMutables(TestCase):
norm_cfg = dict(type='BN', requires_grad=True)
op_cfg = dict(
type='OneShotMutableOP',
candidate_ops=dict(
candidates=dict(
shuffle_3x3=dict(
type='ShuffleBlock', norm_cfg=norm_cfg, kernel_size=3),
shuffle_5x5=dict(
@ -80,7 +80,7 @@ class TestMutables(TestCase):
op_cfg = dict(
type='OneShotProbMutableOP',
choice_probs=[0.1, 0.2, 0.3, 0.4],
candidate_ops=dict(
candidates=dict(
shuffle_3x3=dict(
type='ShuffleBlock', norm_cfg=norm_cfg, kernel_size=3),
shuffle_5x5=dict(
@ -142,7 +142,7 @@ class TestMutables(TestCase):
norm_cfg = dict(type='BN', requires_grad=True)
op_cfg = dict(
type='OneShotMutableOP',
candidate_ops=dict(
candidates=dict(
shuffle_3x3=dict(
type='ShuffleBlock', norm_cfg=norm_cfg, kernel_size=3),
shuffle_5x5=dict(
@ -165,7 +165,7 @@ class TestMutables(TestCase):
norm_cfg = dict(type='BN', requires_grad=True)
op_cfg = dict(
type='OneShotMutableOP',
candidate_ops=dict(
candidates=dict(
shuffle_3x3=dict(
type='ShuffleBlock', norm_cfg=norm_cfg, kernel_size=3),
shuffle_5x5=dict(
@ -189,7 +189,7 @@ class TestMutables(TestCase):
norm_cfg = dict(type='BN', requires_grad=True)
op_cfg = dict(
type='OneShotMutableOP',
candidate_ops=dict(
candidates=dict(
shuffle_3x3=dict(
type='ShuffleBlock',
norm_cfg=norm_cfg,
@ -221,9 +221,9 @@ class TestMutables(TestCase):
output = op.forward_all(input)
assert output is not None
def test_candidate_ops(self):
def test_candidates(self):
candidate_ops = nn.ModuleDict({
candidates = nn.ModuleDict({
'conv3x3': nn.Conv2d(32, 32, 3, 1, 1),
'conv5x5': nn.Conv2d(32, 32, 5, 1, 2),
'conv7x7': nn.Conv2d(32, 32, 7, 1, 3),
@ -231,7 +231,7 @@ class TestMutables(TestCase):
'avgpool3x3': nn.AvgPool2d(3, 1, 1),
})
op_cfg = dict(type='OneShotMutableOP', candidate_ops=candidate_ops)
op_cfg = dict(type='OneShotMutableOP', candidates=candidates)
op = MODELS.build(op_cfg)

View File

@ -72,12 +72,12 @@ class SearchableModelAlias(nn.Module):
return self.slayer3(x)
class TestDiffMutator(TestCase):
class TestDiffModuleMutator(TestCase):
def setUp(self):
self.MUTABLE_CFG = dict(
type='DiffMutableOP',
candidate_ops=dict(
candidates=dict(
torch_conv2d_3x3=dict(
type='torchConv2d',
kernel_size=3,

View File

@ -30,7 +30,7 @@ MUTATOR_CFG = dict(type='OneShotModuleMutator')
MUTABLE_CFG = dict(
type='OneShotMutableOP',
candidate_ops=dict(
candidates=dict(
choice1=dict(
type='MBBlock',
in_channels=3,

View File

@ -13,7 +13,7 @@ from mmrazor.registry import MODELS
_FIRST_STAGE_MUTABLE = dict(
type='OneShotMutableOP',
candidate_ops=dict(
candidates=dict(
mb_k3e1=dict(
type='MBBlock',
kernel_size=3,
@ -23,7 +23,7 @@ _FIRST_STAGE_MUTABLE = dict(
_OTHER_STAGE_MUTABLE = dict(
type='OneShotMutableOP',
candidate_ops=dict(
candidates=dict(
mb_k3e3=dict(
type='MBBlock',
kernel_size=3,

View File

@ -3,6 +3,10 @@ import argparse
import os
import os.path as osp
from mmcls.core import * # noqa: F401,F403
from mmcls.datasets import * # noqa: F401,F403
from mmcls.metrics import * # noqa: F401,F403
from mmcls.models import * # noqa: F401,F403
# TODO import mmcls and mmseg
from mmdet.core import * # noqa: F401,F403
from mmdet.datasets import * # noqa: F401,F403

View File

@ -38,7 +38,6 @@ def parse_args():
def main():
register_all_modules(False)
args = parse_args()
# load config