Align SPOS and DetNAS to MMRazor2.0
parent
2d5e8bc675
commit
6c920c88ee
|
@ -0,0 +1,12 @@
|
|||
#!/usr/bin/env sh
|
||||
|
||||
|
||||
MKL_NUM_THREADS=4
|
||||
OMP_NUM_THREADS=1
|
||||
|
||||
|
||||
|
||||
# bash tools/slurm_train.sh mm_model detnas_train configs/nas/detnas/detnas_supernet_shufflenetv2_coco_1x_2.0_frcnn.py /mnt/lustre/dongpeijie/checkpoints/tests/detnas_pretrain_test
|
||||
|
||||
|
||||
bash tools/slurm_test.sh mm_model angle_test configs/nas/spos/spos_subnet_mobilenet_proxyless_gpu_8xb128_in1k_2.0.py /mnt/lustre/dongpeijie/spos_angelnas_flops_0.49G_acc_75.98_20220307-54f4698f_2.0.pth
|
|
@ -0,0 +1,56 @@
|
|||
#!/usr/bin/env sh
|
||||
|
||||
|
||||
MKL_NUM_THREADS=4
|
||||
OMP_NUM_THREADS=1
|
||||
|
||||
# train
|
||||
# srun --partition=mm_model \
|
||||
# --job-name=spos_train \
|
||||
# --gres=gpu:8 \
|
||||
# --ntasks=8 \
|
||||
# --ntasks-per-node=8 \
|
||||
# --cpus-per-task=8 \
|
||||
# --kill-on-bad-exit=1 \
|
||||
# python tools/train.py configs/nas/spos/spos_supernet_shufflenetv2_8xb128_in1k_2.0_example.py
|
||||
|
||||
# bash tools/slurm_train.sh mm_model spos_train configs/nas/spos/spos_supernet_shufflenetv2_8xb128_in1k_2.0_example.py ./work_dir/spos
|
||||
|
||||
# SPOS test
|
||||
# srun --partition=mm_model \
|
||||
# --job-name=spos_test \
|
||||
# --gres=gpu:1 \
|
||||
# --ntasks=1 \
|
||||
# --ntasks-per-node=1 \
|
||||
# --cpus-per-task=8 \
|
||||
# --kill-on-bad-exit=1 \
|
||||
# python tools/test.py configs/nas/spos/spos_subnet_shufflenetv2_8xb128_in1k_2.0_example.py "/mnt/lustre/dongpeijie/spos_shufflenetv2_subnet_8xb128_in1k_flops_0.33M_acc_73.87_20211222-1f0a0b4d_2.0.pth"
|
||||
|
||||
# DetNAS train
|
||||
# srun --partition=mm_model \
|
||||
# --job-name=detnas_train \
|
||||
# --gres=gpu:8 \
|
||||
# --ntasks=8 \
|
||||
# --ntasks-per-node=8 \
|
||||
# --cpus-per-task=8 \
|
||||
# --kill-on-bad-exit=1 \
|
||||
# python tools/train.py configs/nas/detnas/detnas_supernet_shufflenetv2_coco_1x_2.0_frcnn.py
|
||||
|
||||
# bash tools/slurm_train.sh mm_model detnas_train configs/nas/detnas/detnas_supernet_shufflenetv2_coco_1x_2.0_frcnn.py ./work_dir/detnas_pretrain
|
||||
|
||||
# DetNAS test
|
||||
# srun --partition=mm_model \
|
||||
# --job-name=detnas_test \
|
||||
# --gres=gpu:1 \
|
||||
# --ntasks=1 \
|
||||
# --ntasks-per-node=1 \
|
||||
# --cpus-per-task=8 \
|
||||
# --kill-on-bad-exit=1 \
|
||||
# python tools/test.py configs/nas/detnas/detnas_subnet_shufflenetv2_8xb128_in1k_2.0_frcnn.py "/mnt/lustre/dongpeijie/detnas_subnet_frcnn_shufflenetv2_fpn_1x_coco_bbox_backbone_flops-0.34M_mAP-37.5_20211222-67fea61f_2.0.pth"
|
||||
|
||||
|
||||
# CREAM Test
|
||||
# bash tools/slurm_test.sh mm_model cream_test configs/nas/cream/cream_14_subnet_mobilenet.py '/mnt/lustre/dongpeijie/14_2.0.pth'
|
||||
|
||||
# CREAM Train
|
||||
bash tools/slurm_train.sh mm_model cream_train configs/nas/cream/cream_14_subnet_mobilenet.py
|
|
@ -0,0 +1,7 @@
|
|||
#!/usr/bin/env sh
|
||||
|
||||
|
||||
MKL_NUM_THREADS=4
|
||||
OMP_NUM_THREADS=1
|
||||
|
||||
bash tools/slurm_test.sh mm_model spos_test configs/nas/darts/darts_subnet_1xb96_cifar10_2.0.py '/mnt/lustre/dongpeijie/darts_subnetnet_1xb96_cifar10_acc-97.32_20211222-e5727921_2.0.pth'
|
|
@ -0,0 +1,31 @@
|
|||
#!/usr/bin/env sh
|
||||
|
||||
|
||||
MKL_NUM_THREADS=4
|
||||
OMP_NUM_THREADS=1
|
||||
|
||||
# DetNAS train
|
||||
# srun --partition=mm_model \
|
||||
# --job-name=detnas_train \
|
||||
# --gres=gpu:8 \
|
||||
# --ntasks=8 \
|
||||
# --ntasks-per-node=8 \
|
||||
# --cpus-per-task=8 \
|
||||
# --kill-on-bad-exit=1 \
|
||||
# python tools/train.py configs/nas/detnas/detnas_supernet_shufflenetv2_coco_1x_2.0_frcnn.py
|
||||
|
||||
# bash tools/slurm_train.sh mm_model detnas_train configs/nas/detnas/detnas_supernet_shufflenetv2_coco_1x_2.0_frcnn.py /mnt/lustre/dongpeijie/checkpoints/tests/detnas_pretrain_test
|
||||
|
||||
|
||||
# bash tools/slurm_test.sh mm_model detnas_test configs/nas/detnas/detnas_supernet_shufflenetv2_coco_1x_2.0_frcnn.py /mnt/lustre/dongpeijie/detnas_subnet_frcnn_shufflenetv2_fpn_1x_coco_bbox_backbone_flops-0.34M_mAP-37.5_20211222-67fea61f_2.0.pth
|
||||
|
||||
# DetNAS test
|
||||
srun --partition=mm_model \
|
||||
--job-name=detnas_test \
|
||||
--gres=gpu:1 \
|
||||
--ntasks=1 \
|
||||
--ntasks-per-node=1 \
|
||||
--cpus-per-task=8 \
|
||||
--kill-on-bad-exit=1 \
|
||||
--quotatype=auto \
|
||||
python tools/test.py configs/nas/detnas/detnas_subnet_shufflenetv2_8xb128_in1k_2.0_frcnn.py "/mnt/lustre/dongpeijie/detnas_subnet_frcnn_shufflenetv2_fpn_1x_coco_bbox_backbone_flops-0.34M_mAP-37.5_20211222-67fea61f_2.0.pth" --launcher=slurm
|
|
@ -0,0 +1,51 @@
|
|||
#!/usr/bin/env sh
|
||||
|
||||
|
||||
MKL_NUM_THREADS=4
|
||||
OMP_NUM_THREADS=1
|
||||
|
||||
# train
|
||||
# srun --partition=mm_model \
|
||||
# --job-name=spos_train \
|
||||
# --gres=gpu:8 \
|
||||
# --ntasks=8 \
|
||||
# --ntasks-per-node=8 \
|
||||
# --cpus-per-task=8 \
|
||||
# --kill-on-bad-exit=1 \
|
||||
# python tools/train.py configs/nas/spos/spos_supernet_shufflenetv2_8xb128_in1k_2.0_example.py
|
||||
|
||||
# bash tools/slurm_train.sh mm_model spos_train configs/nas/spos/spos_supernet_shufflenetv2_8xb128_in1k_2.0_example.py /mnt/lustre/dongpeijie/checkpoints/work_dirs/spos_format_output
|
||||
|
||||
# bash tools/slurm_train.sh mm_model spos_retrain configs/nas/spos/spos_subnet_shufflenetv2_8xb128_in1k_2.0_example.py /mnt/lustre/dongpeijie/checkpoints/work_dirs/spos_retrain_detnas_with_ceph
|
||||
|
||||
# 55% wrong settings of PolyLR
|
||||
# bash tools/slurm_train.sh mm_model spos_retrain_w_cj configs/nas/spos/spos_subnet_shufflenetv2_8xb128_in1k_2.0_example.py /mnt/lustre/dongpeijie/checkpoints/work_dirs/spos_retrain_detnas_with_ceph
|
||||
|
||||
# fix setting of PolyLR and rerun with colorjittor
|
||||
# bash tools/slurm_train.sh mm_model spos_retrain_w_cj configs/nas/spos/spos_subnet_shufflenetv2_8xb128_in1k_2.0_example.py /mnt/lustre/dongpeijie/checkpoints/work_dirs/retrain_detnas_spos_with_colorjittor
|
||||
|
||||
# fix setting of PolyLR and rerun w/o colorjittor
|
||||
# bash tools/slurm_train.sh mm_model spos_retrain_wo_cj configs/nas/spos/spos_subnet_shufflenetv2_8xb128_in1k_2.0_example_wo_colorjittor.py /mnt/lustre/dongpeijie/checkpoints/work_dirs/retrain_detnas_spos_wo_colorjittor
|
||||
|
||||
# fix setting of optimizer decay[wo cj] (paramwise_cfg)
|
||||
# bash tools/slurm_train.sh mm_model spos_retrain_fix_decay_wo_cj configs/nas/spos/spos_subnet_shufflenetv2_8xb128_in1k_2.0_example_wo_colorjittor.py /mnt/lustre/dongpeijie/checkpoints/work_dirs/retrain_detnas_spos_retrain_fix_decay_wo_cj
|
||||
|
||||
# fix setting of optimizer decay[with cj] (paramwise_cfg)
|
||||
# bash tools/slurm_train.sh mm_model spos_retrain_fix_decay_w_cj configs/nas/spos/spos_subnet_shufflenetv2_8xb128_in1k_2.0_example.py /mnt/lustre/dongpeijie/checkpoints/work_dirs/retrain_detnas_spos_retrain_fix_decay_w_cj
|
||||
|
||||
|
||||
|
||||
# SPOS test
|
||||
# srun --partition=mm_model \
|
||||
# --job-name=spos_test \
|
||||
# --gres=gpu:1 \
|
||||
# --ntasks=1 \
|
||||
# --ntasks-per-node=1 \
|
||||
# --cpus-per-task=8 \
|
||||
# --kill-on-bad-exit=1 \
|
||||
# python tools/test.py configs/nas/spos/spos_subnet_shufflenetv2_8xb128_in1k_2.0_example.py "/mnt/lustre/dongpeijie/spos_shufflenetv2_subnet_8xb128_in1k_flops_0.33M_acc_73.87_20211222-1f0a0b4d_2.0.pth"
|
||||
|
||||
|
||||
bash tools/slurm_test.sh mm_model spos_test configs/nas/spos/spos_subnet_shufflenetv2_8xb128_in1k_2.0_example.py '/mnt/lustre/dongpeijie/detnas_subnet_shufflenetv2_8xb128_in1k_acc-74.08_20211223-92e9b66a_2.0.pth'
|
||||
|
||||
# bash tools/slurm_train.sh mm_model spos_retrain configs/nas/spos/spos_subnet_shufflenetv2_8xb128_in1k_2.0_example.py /mnt/lustre/dongpeijie/checkpoints/work_dirs/spos_retrain_detnas_spos
|
|
@ -0,0 +1,76 @@
|
|||
se_cfg = dict(
|
||||
ratio=4,
|
||||
divisor=1,
|
||||
act_cfg=(dict(type='HSwish'),
|
||||
dict(
|
||||
type='HSigmoid', bias=3, divisor=6, min_value=0,
|
||||
max_value=1)))
|
||||
|
||||
_FIRST_STAGE_MUTABLE = dict(
|
||||
type='OneShotMutableOP',
|
||||
candidates=dict(
|
||||
mb_k3e4_se=dict(
|
||||
type='MBBlock',
|
||||
kernel_size=3,
|
||||
expand_ratio=4,
|
||||
se_cfg=se_cfg,
|
||||
norm_cfg=dict(type='BN'),
|
||||
act_cfg=dict(type='HSwish'))))
|
||||
|
||||
_OTHER_STAGE_MUTABLE = dict(
|
||||
type='OneShotMutableOP',
|
||||
candidates=dict(
|
||||
mb_k3e4_se=dict(
|
||||
type='MBBlock',
|
||||
kernel_size=3,
|
||||
expand_ratio=4,
|
||||
se_cfg=se_cfg,
|
||||
norm_cfg=dict(type='BN'),
|
||||
act_cfg=dict(type='HSwish')),
|
||||
mb_k3e6_se=dict(
|
||||
type='MBBlock',
|
||||
kernel_size=3,
|
||||
expand_ratio=6,
|
||||
se_cfg=se_cfg,
|
||||
norm_cfg=dict(type='BN'),
|
||||
act_cfg=dict(type='HSwish')),
|
||||
mb_k5e4_se=dict(
|
||||
type='MBBlock',
|
||||
kernel_size=5,
|
||||
expand_ratio=4,
|
||||
se_cfg=se_cfg,
|
||||
norm_cfg=dict(type='BN'),
|
||||
act_cfg=dict(type='HSwish')),
|
||||
mb_k5e6_se=dict(
|
||||
type='MBBlock',
|
||||
kernel_size=5,
|
||||
expand_ratio=6,
|
||||
se_cfg=se_cfg,
|
||||
norm_cfg=dict(type='BN'),
|
||||
act_cfg=dict(type='HSwish')),
|
||||
mb_k7e4_se=dict(
|
||||
type='MBBlock',
|
||||
kernel_size=7,
|
||||
expand_ratio=4,
|
||||
se_cfg=se_cfg,
|
||||
norm_cfg=dict(type='BN'),
|
||||
act_cfg=dict(type='HSwish')),
|
||||
mb_k7e6_se=dict(
|
||||
type='MBBlock',
|
||||
kernel_size=7,
|
||||
expand_ratio=6,
|
||||
se_cfg=se_cfg,
|
||||
norm_cfg=dict(type='BN'),
|
||||
act_cfg=dict(type='HSwish'))))
|
||||
|
||||
arch_setting = [
|
||||
# Parameters to build layers. 4 parameters are needed to construct a
|
||||
# layer, from left to right: channel, num_blocks, stride, mutable cfg.
|
||||
[16, 1, 1, _FIRST_STAGE_MUTABLE],
|
||||
[24, 1, 2, _OTHER_STAGE_MUTABLE],
|
||||
[40, 2, 2, _OTHER_STAGE_MUTABLE],
|
||||
[80, 2, 2, _OTHER_STAGE_MUTABLE],
|
||||
[96, 3, 1, _OTHER_STAGE_MUTABLE],
|
||||
[192, 2, 2, _OTHER_STAGE_MUTABLE],
|
||||
[320, 1, 1, _OTHER_STAGE_MUTABLE]
|
||||
]
|
|
@ -0,0 +1,76 @@
|
|||
se_cfg = dict(
|
||||
ratio=4,
|
||||
divisor=1,
|
||||
act_cfg=(dict(type='HSwish'),
|
||||
dict(
|
||||
type='HSigmoid', bias=3, divisor=6, min_value=0,
|
||||
max_value=1)))
|
||||
|
||||
_FIRST_STAGE_MUTABLE = dict(
|
||||
type='OneShotMutableOP',
|
||||
candidates=dict(
|
||||
mb_k3e4_se=dict(
|
||||
type='MBBlock',
|
||||
kernel_size=3,
|
||||
expand_ratio=4,
|
||||
se_cfg=se_cfg,
|
||||
norm_cfg=dict(type='BN'),
|
||||
act_cfg=dict(type='HSwish'))))
|
||||
|
||||
_OTHER_STAGE_MUTABLE = dict(
|
||||
type='OneShotMutableOP',
|
||||
candidates=dict(
|
||||
mb_k3e4_se=dict(
|
||||
type='MBBlock',
|
||||
kernel_size=3,
|
||||
expand_ratio=4,
|
||||
se_cfg=se_cfg,
|
||||
norm_cfg=dict(type='BN'),
|
||||
act_cfg=dict(type='HSwish')),
|
||||
mb_k3e6_se=dict(
|
||||
type='MBBlock',
|
||||
kernel_size=3,
|
||||
expand_ratio=6,
|
||||
se_cfg=se_cfg,
|
||||
norm_cfg=dict(type='BN'),
|
||||
act_cfg=dict(type='HSwish')),
|
||||
mb_k5e4_se=dict(
|
||||
type='MBBlock',
|
||||
kernel_size=5,
|
||||
expand_ratio=4,
|
||||
se_cfg=se_cfg,
|
||||
norm_cfg=dict(type='BN'),
|
||||
act_cfg=dict(type='HSwish')),
|
||||
mb_k5e6_se=dict(
|
||||
type='MBBlock',
|
||||
kernel_size=5,
|
||||
expand_ratio=6,
|
||||
se_cfg=se_cfg,
|
||||
norm_cfg=dict(type='BN'),
|
||||
act_cfg=dict(type='HSwish')),
|
||||
mb_k7e4_se=dict(
|
||||
type='MBBlock',
|
||||
kernel_size=7,
|
||||
expand_ratio=4,
|
||||
se_cfg=se_cfg,
|
||||
norm_cfg=dict(type='BN'),
|
||||
act_cfg=dict(type='HSwish')),
|
||||
mb_k7e6_se=dict(
|
||||
type='MBBlock',
|
||||
kernel_size=7,
|
||||
expand_ratio=6,
|
||||
se_cfg=se_cfg,
|
||||
norm_cfg=dict(type='BN'),
|
||||
act_cfg=dict(type='HSwish'))))
|
||||
|
||||
arch_setting = [
|
||||
# Parameters to build layers. 4 parameters are needed to construct a
|
||||
# layer, from left to right: channel, num_blocks, stride, mutable cfg.
|
||||
[16, 1, 1, _FIRST_STAGE_MUTABLE],
|
||||
[24, 1, 2, _OTHER_STAGE_MUTABLE],
|
||||
[40, 2, 2, _OTHER_STAGE_MUTABLE],
|
||||
[80, 2, 2, _OTHER_STAGE_MUTABLE],
|
||||
[96, 1, 1, _OTHER_STAGE_MUTABLE],
|
||||
[192, 1, 2, _OTHER_STAGE_MUTABLE],
|
||||
[320, 1, 1, _OTHER_STAGE_MUTABLE]
|
||||
]
|
|
@ -0,0 +1,76 @@
|
|||
se_cfg = dict(
|
||||
ratio=4,
|
||||
divisor=1,
|
||||
act_cfg=(dict(type='HSwish'),
|
||||
dict(
|
||||
type='HSigmoid', bias=3, divisor=6, min_value=0,
|
||||
max_value=1)))
|
||||
|
||||
_FIRST_STAGE_MUTABLE = dict(
|
||||
type='OneShotMutableOP',
|
||||
candidates=dict(
|
||||
mb_k3e4_se=dict(
|
||||
type='MBBlock',
|
||||
kernel_size=3,
|
||||
expand_ratio=4,
|
||||
se_cfg=se_cfg,
|
||||
norm_cfg=dict(type='BN'),
|
||||
act_cfg=dict(type='HSwish'))))
|
||||
|
||||
_OTHER_STAGE_MUTABLE = dict(
|
||||
type='OneShotMutableOP',
|
||||
candidates=dict(
|
||||
mb_k3e4_se=dict(
|
||||
type='MBBlock',
|
||||
kernel_size=3,
|
||||
expand_ratio=4,
|
||||
se_cfg=se_cfg,
|
||||
norm_cfg=dict(type='BN'),
|
||||
act_cfg=dict(type='HSwish')),
|
||||
mb_k3e6_se=dict(
|
||||
type='MBBlock',
|
||||
kernel_size=3,
|
||||
expand_ratio=6,
|
||||
se_cfg=se_cfg,
|
||||
norm_cfg=dict(type='BN'),
|
||||
act_cfg=dict(type='HSwish')),
|
||||
mb_k5e4_se=dict(
|
||||
type='MBBlock',
|
||||
kernel_size=5,
|
||||
expand_ratio=4,
|
||||
se_cfg=se_cfg,
|
||||
norm_cfg=dict(type='BN'),
|
||||
act_cfg=dict(type='HSwish')),
|
||||
mb_k5e6_se=dict(
|
||||
type='MBBlock',
|
||||
kernel_size=5,
|
||||
expand_ratio=6,
|
||||
se_cfg=se_cfg,
|
||||
norm_cfg=dict(type='BN'),
|
||||
act_cfg=dict(type='HSwish')),
|
||||
mb_k7e4_se=dict(
|
||||
type='MBBlock',
|
||||
kernel_size=7,
|
||||
expand_ratio=4,
|
||||
se_cfg=se_cfg,
|
||||
norm_cfg=dict(type='BN'),
|
||||
act_cfg=dict(type='HSwish')),
|
||||
mb_k7e6_se=dict(
|
||||
type='MBBlock',
|
||||
kernel_size=7,
|
||||
expand_ratio=6,
|
||||
se_cfg=se_cfg,
|
||||
norm_cfg=dict(type='BN'),
|
||||
act_cfg=dict(type='HSwish'))))
|
||||
|
||||
arch_setting = [
|
||||
# Parameters to build layers. 4 parameters are needed to construct a
|
||||
# layer, from left to right: channel, num_blocks, stride, mutable cfg.
|
||||
[16, 1, 1, _FIRST_STAGE_MUTABLE],
|
||||
[24, 1, 2, _OTHER_STAGE_MUTABLE],
|
||||
[40, 2, 2, _OTHER_STAGE_MUTABLE],
|
||||
[80, 3, 2, _OTHER_STAGE_MUTABLE],
|
||||
[96, 4, 1, _OTHER_STAGE_MUTABLE],
|
||||
[192, 3, 2, _OTHER_STAGE_MUTABLE],
|
||||
[320, 1, 1, _OTHER_STAGE_MUTABLE]
|
||||
]
|
|
@ -0,0 +1,76 @@
|
|||
se_cfg = dict(
|
||||
ratio=4,
|
||||
divisor=1,
|
||||
act_cfg=(dict(type='HSwish'),
|
||||
dict(
|
||||
type='HSigmoid', bias=3, divisor=6, min_value=0,
|
||||
max_value=1)))
|
||||
|
||||
_FIRST_STAGE_MUTABLE = dict(
|
||||
type='OneShotMutableOP',
|
||||
candidates=dict(
|
||||
mb_k3e4_se=dict(
|
||||
type='MBBlock',
|
||||
kernel_size=3,
|
||||
expand_ratio=4,
|
||||
se_cfg=se_cfg,
|
||||
norm_cfg=dict(type='BN'),
|
||||
act_cfg=dict(type='HSwish'))))
|
||||
|
||||
_OTHER_STAGE_MUTABLE = dict(
|
||||
type='OneShotMutableOP',
|
||||
candidates=dict(
|
||||
mb_k3e4_se=dict(
|
||||
type='MBBlock',
|
||||
kernel_size=3,
|
||||
expand_ratio=4,
|
||||
se_cfg=se_cfg,
|
||||
norm_cfg=dict(type='BN'),
|
||||
act_cfg=dict(type='HSwish')),
|
||||
mb_k3e6_se=dict(
|
||||
type='MBBlock',
|
||||
kernel_size=3,
|
||||
expand_ratio=6,
|
||||
se_cfg=se_cfg,
|
||||
norm_cfg=dict(type='BN'),
|
||||
act_cfg=dict(type='HSwish')),
|
||||
mb_k5e4_se=dict(
|
||||
type='MBBlock',
|
||||
kernel_size=5,
|
||||
expand_ratio=4,
|
||||
se_cfg=se_cfg,
|
||||
norm_cfg=dict(type='BN'),
|
||||
act_cfg=dict(type='HSwish')),
|
||||
mb_k5e6_se=dict(
|
||||
type='MBBlock',
|
||||
kernel_size=5,
|
||||
expand_ratio=6,
|
||||
se_cfg=se_cfg,
|
||||
norm_cfg=dict(type='BN'),
|
||||
act_cfg=dict(type='HSwish')),
|
||||
mb_k7e4_se=dict(
|
||||
type='MBBlock',
|
||||
kernel_size=7,
|
||||
expand_ratio=4,
|
||||
se_cfg=se_cfg,
|
||||
norm_cfg=dict(type='BN'),
|
||||
act_cfg=dict(type='HSwish')),
|
||||
mb_k7e6_se=dict(
|
||||
type='MBBlock',
|
||||
kernel_size=7,
|
||||
expand_ratio=6,
|
||||
se_cfg=se_cfg,
|
||||
norm_cfg=dict(type='BN'),
|
||||
act_cfg=dict(type='HSwish'))))
|
||||
|
||||
arch_setting = [
|
||||
# Parameters to build layers. 4 parameters are needed to construct a
|
||||
# layer, from left to right: channel, num_blocks, stride, mutable cfg.
|
||||
[16, 1, 1, _FIRST_STAGE_MUTABLE],
|
||||
[24, 1, 2, _OTHER_STAGE_MUTABLE],
|
||||
[40, 2, 2, _OTHER_STAGE_MUTABLE],
|
||||
[80, 2, 2, _OTHER_STAGE_MUTABLE],
|
||||
[96, 3, 1, _OTHER_STAGE_MUTABLE],
|
||||
[192, 2, 2, _OTHER_STAGE_MUTABLE],
|
||||
[320, 1, 1, _OTHER_STAGE_MUTABLE]
|
||||
]
|
|
@ -0,0 +1,76 @@
|
|||
se_cfg = dict(
|
||||
ratio=4,
|
||||
divisor=1,
|
||||
act_cfg=(dict(type='HSwish'),
|
||||
dict(
|
||||
type='HSigmoid', bias=3, divisor=6, min_value=0,
|
||||
max_value=1)))
|
||||
|
||||
_FIRST_STAGE_MUTABLE = dict(
|
||||
type='OneShotMutableOP',
|
||||
candidates=dict(
|
||||
mb_k3e4_se=dict(
|
||||
type='MBBlock',
|
||||
kernel_size=3,
|
||||
expand_ratio=4,
|
||||
se_cfg=se_cfg,
|
||||
norm_cfg=dict(type='BN'),
|
||||
act_cfg=dict(type='HSwish'))))
|
||||
|
||||
_OTHER_STAGE_MUTABLE = dict(
|
||||
type='OneShotMutableOP',
|
||||
candidates=dict(
|
||||
mb_k3e4_se=dict(
|
||||
type='MBBlock',
|
||||
kernel_size=3,
|
||||
expand_ratio=4,
|
||||
se_cfg=se_cfg,
|
||||
norm_cfg=dict(type='BN'),
|
||||
act_cfg=dict(type='HSwish')),
|
||||
mb_k3e6_se=dict(
|
||||
type='MBBlock',
|
||||
kernel_size=3,
|
||||
expand_ratio=6,
|
||||
se_cfg=se_cfg,
|
||||
norm_cfg=dict(type='BN'),
|
||||
act_cfg=dict(type='HSwish')),
|
||||
mb_k5e4_se=dict(
|
||||
type='MBBlock',
|
||||
kernel_size=5,
|
||||
expand_ratio=4,
|
||||
se_cfg=se_cfg,
|
||||
norm_cfg=dict(type='BN'),
|
||||
act_cfg=dict(type='HSwish')),
|
||||
mb_k5e6_se=dict(
|
||||
type='MBBlock',
|
||||
kernel_size=5,
|
||||
expand_ratio=6,
|
||||
se_cfg=se_cfg,
|
||||
norm_cfg=dict(type='BN'),
|
||||
act_cfg=dict(type='HSwish')),
|
||||
mb_k7e4_se=dict(
|
||||
type='MBBlock',
|
||||
kernel_size=7,
|
||||
expand_ratio=4,
|
||||
se_cfg=se_cfg,
|
||||
norm_cfg=dict(type='BN'),
|
||||
act_cfg=dict(type='HSwish')),
|
||||
mb_k7e6_se=dict(
|
||||
type='MBBlock',
|
||||
kernel_size=7,
|
||||
expand_ratio=6,
|
||||
se_cfg=se_cfg,
|
||||
norm_cfg=dict(type='BN'),
|
||||
act_cfg=dict(type='HSwish'))))
|
||||
|
||||
arch_setting = [
|
||||
# Parameters to build layers. 4 parameters are needed to construct a
|
||||
# layer, from left to right: channel, num_blocks, stride, mutable cfg.
|
||||
[16, 1, 1, _FIRST_STAGE_MUTABLE],
|
||||
[24, 4, 2, _OTHER_STAGE_MUTABLE],
|
||||
[40, 4, 2, _OTHER_STAGE_MUTABLE],
|
||||
[80, 5, 2, _OTHER_STAGE_MUTABLE],
|
||||
[96, 4, 1, _OTHER_STAGE_MUTABLE],
|
||||
[192, 4, 2, _OTHER_STAGE_MUTABLE],
|
||||
[320, 1, 1, _OTHER_STAGE_MUTABLE]
|
||||
]
|
|
@ -0,0 +1,76 @@
|
|||
se_cfg = dict(
|
||||
ratio=4,
|
||||
divisor=1,
|
||||
act_cfg=(dict(type='HSwish'),
|
||||
dict(
|
||||
type='HSigmoid', bias=3, divisor=6, min_value=0,
|
||||
max_value=1)))
|
||||
|
||||
_FIRST_STAGE_MUTABLE = dict(
|
||||
type='OneShotMutableOP',
|
||||
candidates=dict(
|
||||
mb_k3e4_se=dict(
|
||||
type='MBBlock',
|
||||
kernel_size=3,
|
||||
expand_ratio=4,
|
||||
se_cfg=se_cfg,
|
||||
norm_cfg=dict(type='BN'),
|
||||
act_cfg=dict(type='HSwish'))))
|
||||
|
||||
_OTHER_STAGE_MUTABLE = dict(
|
||||
type='OneShotMutableOP',
|
||||
candidates=dict(
|
||||
mb_k3e4_se=dict(
|
||||
type='MBBlock',
|
||||
kernel_size=3,
|
||||
expand_ratio=4,
|
||||
se_cfg=se_cfg,
|
||||
norm_cfg=dict(type='BN'),
|
||||
act_cfg=dict(type='HSwish')),
|
||||
mb_k3e6_se=dict(
|
||||
type='MBBlock',
|
||||
kernel_size=3,
|
||||
expand_ratio=6,
|
||||
se_cfg=se_cfg,
|
||||
norm_cfg=dict(type='BN'),
|
||||
act_cfg=dict(type='HSwish')),
|
||||
mb_k5e4_se=dict(
|
||||
type='MBBlock',
|
||||
kernel_size=5,
|
||||
expand_ratio=4,
|
||||
se_cfg=se_cfg,
|
||||
norm_cfg=dict(type='BN'),
|
||||
act_cfg=dict(type='HSwish')),
|
||||
mb_k5e6_se=dict(
|
||||
type='MBBlock',
|
||||
kernel_size=5,
|
||||
expand_ratio=6,
|
||||
se_cfg=se_cfg,
|
||||
norm_cfg=dict(type='BN'),
|
||||
act_cfg=dict(type='HSwish')),
|
||||
mb_k7e4_se=dict(
|
||||
type='MBBlock',
|
||||
kernel_size=7,
|
||||
expand_ratio=4,
|
||||
se_cfg=se_cfg,
|
||||
norm_cfg=dict(type='BN'),
|
||||
act_cfg=dict(type='HSwish')),
|
||||
mb_k7e6_se=dict(
|
||||
type='MBBlock',
|
||||
kernel_size=7,
|
||||
expand_ratio=6,
|
||||
se_cfg=se_cfg,
|
||||
norm_cfg=dict(type='BN'),
|
||||
act_cfg=dict(type='HSwish'))))
|
||||
|
||||
arch_setting = [
|
||||
# Parameters to build layers. 4 parameters are needed to construct a
|
||||
# layer, from left to right: channel, num_blocks, stride, mutable cfg.
|
||||
[16, 1, 1, _FIRST_STAGE_MUTABLE],
|
||||
[24, 5, 2, _OTHER_STAGE_MUTABLE],
|
||||
[40, 5, 2, _OTHER_STAGE_MUTABLE],
|
||||
[80, 5, 2, _OTHER_STAGE_MUTABLE],
|
||||
[96, 6, 1, _OTHER_STAGE_MUTABLE],
|
||||
[192, 6, 2, _OTHER_STAGE_MUTABLE],
|
||||
[320, 1, 1, _OTHER_STAGE_MUTABLE]
|
||||
]
|
|
@ -0,0 +1,11 @@
|
|||
modules:
|
||||
backbone.layer1.0: depthsepconv
|
||||
backbone.layer2.0: mb_k3e4_se
|
||||
backbone.layer3.0: mb_k5e6_se
|
||||
backbone.layer3.1: mb_k5e6_se
|
||||
backbone.layer4.0: mb_k5e6_se
|
||||
backbone.layer4.1: mb_k5e6_se
|
||||
backbone.layer5.0: mb_k3e6_se
|
||||
backbone.layer6.0: mb_k5e6_se
|
||||
backbone.layer7.0: convbnact
|
||||
channels:
|
|
@ -0,0 +1,8 @@
|
|||
_base_ = ['./cream_14_supernet_mobilenet.py']
|
||||
|
||||
# FIXME: you may replace this with the mutable_cfg searched by yourself
|
||||
fix_subnet = 'configs/nas/cream/CREAM_14_MOBILENET_IN1k_2.0.yaml' # noqa: E501
|
||||
|
||||
model = dict(fix_subnet=fix_subnet)
|
||||
|
||||
find_unused_parameters = False
|
|
@ -0,0 +1,241 @@
|
|||
# dataset settings
|
||||
dataset_type = 'ImageNet'
|
||||
|
||||
preprocess_cfg = dict(
|
||||
# RGB format normalization parameters
|
||||
mean=[123.675, 116.28, 103.53],
|
||||
std=[58.395, 57.12, 57.375],
|
||||
# convert image from BGR to RGB
|
||||
to_rgb=True,
|
||||
)
|
||||
|
||||
# file_client_args = dict(
|
||||
# backend='petrel',
|
||||
# path_mapping=dict({
|
||||
# './data/imagenet': 's3://openmmlab/datasets/classification/imagenet',
|
||||
# 'data/imagenet': 's3://openmmlab/datasets/classification/imagenet'
|
||||
# }))
|
||||
|
||||
train_pipeline = [
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(type='RandomResizedCrop', scale=224),
|
||||
dict(type='ColorJitter', brightness=0.4, contrast=0.4, saturation=0.4),
|
||||
dict(type='RandomFlip', prob=0.5, direction='horizontal'),
|
||||
dict(type='PackClsInputs'),
|
||||
]
|
||||
|
||||
test_pipeline = [
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(
|
||||
type='ResizeEdge',
|
||||
scale=73,
|
||||
edge='short',
|
||||
backend='pillow',
|
||||
interpolation='bicubic'),
|
||||
dict(type='CenterCrop', crop_size=64),
|
||||
dict(type='PackClsInputs'),
|
||||
]
|
||||
|
||||
train_dataloader = dict(
|
||||
batch_size=128,
|
||||
num_workers=5,
|
||||
dataset=dict(
|
||||
type=dataset_type,
|
||||
data_root='/mnt/cache/share/images',
|
||||
ann_file='meta/train.txt',
|
||||
data_prefix='train',
|
||||
pipeline=train_pipeline),
|
||||
sampler=dict(type='DefaultSampler', shuffle=True),
|
||||
persistent_workers=True,
|
||||
)
|
||||
|
||||
# /mnt/lustre/share_data/wangjiaqi/data/imagenet',
|
||||
|
||||
val_dataloader = dict(
|
||||
batch_size=128,
|
||||
num_workers=5,
|
||||
dataset=dict(
|
||||
type=dataset_type,
|
||||
data_root='/mnt/cache/share/images',
|
||||
ann_file='meta/val.txt',
|
||||
data_prefix='val',
|
||||
pipeline=test_pipeline),
|
||||
sampler=dict(type='DefaultSampler', shuffle=False),
|
||||
persistent_workers=True,
|
||||
)
|
||||
val_evaluator = dict(type='Accuracy', topk=(1, 5))
|
||||
|
||||
# If you want standard test, please manually configure the test dataset
|
||||
test_dataloader = val_dataloader
|
||||
test_evaluator = val_evaluator
|
||||
|
||||
# scheduler
|
||||
|
||||
# optimizer
|
||||
optim_wrapper = dict(
|
||||
optimizer=dict(type='SGD', lr=0.5, momentum=0.9, weight_decay=4e-5),
|
||||
clip_grad=None)
|
||||
|
||||
# leanring policy
|
||||
param_scheduler = [
|
||||
dict(type='PolyLR', power=1.0, eta_min=0.0, by_epoch=False),
|
||||
]
|
||||
|
||||
# train, val, test setting
|
||||
train_cfg = dict(by_epoch=False, max_iters=300000)
|
||||
val_cfg = dict()
|
||||
test_cfg = dict()
|
||||
|
||||
# runtime
|
||||
|
||||
# defaults to use registries in mmrazor
|
||||
default_scope = 'mmcls'
|
||||
|
||||
# configure default hooks
|
||||
default_hooks = dict(
|
||||
timer=dict(type='IterTimerHook'),
|
||||
logger=dict(type='LoggerHook', interval=100),
|
||||
param_scheduler=dict(type='ParamSchedulerHook'),
|
||||
checkpoint=dict(type='CheckpointHook', by_epoch=False, interval=1000),
|
||||
sampler_seed=dict(type='DistSamplerSeedHook'),
|
||||
visualization=dict(type='VisualizationHook', enable=False),
|
||||
)
|
||||
|
||||
# configure environment
|
||||
env_cfg = dict(
|
||||
cudnn_benchmark=False,
|
||||
mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0),
|
||||
dist_cfg=dict(backend='nccl'),
|
||||
)
|
||||
|
||||
# set visualizer
|
||||
vis_backends = [dict(type='LocalVisBackend')]
|
||||
visualizer = dict(
|
||||
type='ClsVisualizer', vis_backends=vis_backends, name='visualizer')
|
||||
|
||||
# set log level
|
||||
log_level = 'INFO'
|
||||
|
||||
# load from which checkpoint
|
||||
load_from = None
|
||||
|
||||
# whether to resume training from the loaded checkpoint
|
||||
resume = False
|
||||
|
||||
se_cfg = dict(
|
||||
ratio=4,
|
||||
divisor=8,
|
||||
act_cfg=(dict(type='ReLU'),
|
||||
dict(
|
||||
type='HSigmoid', bias=3, divisor=6, min_value=0,
|
||||
max_value=1)))
|
||||
|
||||
_FIRST_STAGE_MUTABLE = dict( # DepthwiseSep
|
||||
type='OneShotMutableOP',
|
||||
candidates=dict(
|
||||
depthsepconv=dict(
|
||||
type='DepthwiseSeparableConv',
|
||||
dw_kernel_size=3,
|
||||
se_cfg=se_cfg,
|
||||
norm_cfg=dict(type='BN'),
|
||||
act_cfg=dict(type='Swish'))))
|
||||
|
||||
_MIDDLE_STAGE_MUTABLE = dict(
|
||||
type='OneShotMutableOP',
|
||||
candidates=dict(
|
||||
mb_k3e4_se=dict(
|
||||
type='MBBlock',
|
||||
kernel_size=3,
|
||||
expand_ratio=4,
|
||||
se_cfg=se_cfg,
|
||||
norm_cfg=dict(type='BN'),
|
||||
act_cfg=dict(type='Swish')),
|
||||
mb_k3e6_se=dict(
|
||||
type='MBBlock',
|
||||
kernel_size=3,
|
||||
expand_ratio=6,
|
||||
se_cfg=se_cfg,
|
||||
norm_cfg=dict(type='BN'),
|
||||
act_cfg=dict(type='Swish')),
|
||||
mb_k5e4_se=dict(
|
||||
type='MBBlock',
|
||||
kernel_size=5,
|
||||
expand_ratio=4,
|
||||
se_cfg=se_cfg,
|
||||
norm_cfg=dict(type='BN'),
|
||||
act_cfg=dict(type='Swish')),
|
||||
mb_k5e6_se=dict(
|
||||
type='MBBlock',
|
||||
kernel_size=5,
|
||||
expand_ratio=6,
|
||||
se_cfg=se_cfg,
|
||||
norm_cfg=dict(type='BN'),
|
||||
act_cfg=dict(type='Swish')),
|
||||
mb_k7e4_se=dict(
|
||||
type='MBBlock',
|
||||
kernel_size=7,
|
||||
expand_ratio=4,
|
||||
se_cfg=se_cfg,
|
||||
norm_cfg=dict(type='BN'),
|
||||
act_cfg=dict(type='Swish')),
|
||||
mb_k7e6_se=dict(
|
||||
type='MBBlock',
|
||||
kernel_size=7,
|
||||
expand_ratio=6,
|
||||
se_cfg=se_cfg,
|
||||
norm_cfg=dict(type='BN'),
|
||||
act_cfg=dict(type='Swish'))))
|
||||
|
||||
arch_setting = [
|
||||
# Parameters to build layers. 4 parameters are needed to construct a
|
||||
# layer, from left to right: channel, num_blocks, stride, mutable cfg.
|
||||
[16, 1, 1, _FIRST_STAGE_MUTABLE],
|
||||
[24, 1, 2, _MIDDLE_STAGE_MUTABLE],
|
||||
[40, 2, 2, _MIDDLE_STAGE_MUTABLE],
|
||||
[80, 2, 2, _MIDDLE_STAGE_MUTABLE],
|
||||
[96, 1, 1, _MIDDLE_STAGE_MUTABLE],
|
||||
[192, 1, 2, _MIDDLE_STAGE_MUTABLE],
|
||||
]
|
||||
|
||||
norm_cfg = dict(type='BN')
|
||||
supernet = dict(
|
||||
_scope_='mmcls',
|
||||
type='ImageClassifier',
|
||||
data_preprocessor=preprocess_cfg,
|
||||
backbone=dict(
|
||||
_scope_='mmrazor',
|
||||
type='SearchableMobileNet',
|
||||
arch_setting=arch_setting,
|
||||
first_channels=16,
|
||||
last_channels=320,
|
||||
widen_factor=1.0,
|
||||
norm_cfg=norm_cfg,
|
||||
act_cfg=dict(type='Swish'),
|
||||
out_indices=(6, ),
|
||||
),
|
||||
neck=dict(type='GlobalAveragePooling'),
|
||||
head=dict(
|
||||
type='mmrazor.CreamClsHead',
|
||||
num_classes=1000,
|
||||
in_channels=320,
|
||||
num_features=1280,
|
||||
act_cfg=dict(type='Swish'),
|
||||
loss=dict(
|
||||
type='LabelSmoothLoss',
|
||||
num_classes=1000,
|
||||
label_smooth_val=0.1,
|
||||
mode='original',
|
||||
loss_weight=1.0),
|
||||
topk=(1, 5),
|
||||
),
|
||||
)
|
||||
|
||||
mutator = dict(type='mmrazor.OneShotModuleMutator')
|
||||
|
||||
model = dict(
|
||||
type='mmrazor.SPOS',
|
||||
architecture=supernet,
|
||||
mutator=mutator,
|
||||
)
|
||||
|
||||
find_unused_parameters = True
|
|
@ -1,116 +0,0 @@
|
|||
normal_n2:
|
||||
chosen:
|
||||
- normal_n2_p1
|
||||
- normal_n2_p0
|
||||
normal_n3:
|
||||
chosen:
|
||||
- normal_n3_p0
|
||||
- normal_n3_p1
|
||||
normal_n4:
|
||||
chosen:
|
||||
- normal_n4_p0
|
||||
- normal_n4_p1
|
||||
normal_n5:
|
||||
chosen:
|
||||
- normal_n5_p2
|
||||
- normal_n5_p0
|
||||
reduce_n2:
|
||||
chosen:
|
||||
- reduce_n2_p0
|
||||
- reduce_n2_p1
|
||||
reduce_n3:
|
||||
chosen:
|
||||
- reduce_n3_p1
|
||||
- reduce_n3_p2
|
||||
reduce_n4:
|
||||
chosen:
|
||||
- reduce_n4_p2
|
||||
- reduce_n4_p0
|
||||
reduce_n5:
|
||||
chosen:
|
||||
- reduce_n5_p1
|
||||
- reduce_n5_p2
|
||||
normal_n2_p0:
|
||||
chosen:
|
||||
- sep_conv_3x3
|
||||
normal_n2_p1:
|
||||
chosen:
|
||||
- sep_conv_3x3
|
||||
normal_n3_p0:
|
||||
chosen:
|
||||
- sep_conv_3x3
|
||||
normal_n3_p1:
|
||||
chosen:
|
||||
- sep_conv_3x3
|
||||
normal_n3_p2:
|
||||
chosen:
|
||||
- sep_conv_3x3
|
||||
normal_n4_p0:
|
||||
chosen:
|
||||
- skip_connect
|
||||
normal_n4_p1:
|
||||
chosen:
|
||||
- sep_conv_3x3
|
||||
normal_n4_p2:
|
||||
chosen:
|
||||
- skip_connect
|
||||
normal_n4_p3:
|
||||
chosen:
|
||||
- sep_conv_3x3
|
||||
normal_n5_p0:
|
||||
chosen:
|
||||
- skip_connect
|
||||
normal_n5_p1:
|
||||
chosen:
|
||||
- skip_connect
|
||||
normal_n5_p2:
|
||||
chosen:
|
||||
- dil_conv_3x3
|
||||
normal_n5_p3:
|
||||
chosen:
|
||||
- skip_connect
|
||||
normal_n5_p4:
|
||||
chosen:
|
||||
- skip_connect
|
||||
reduce_n2_p0:
|
||||
chosen:
|
||||
- max_pool_3x3
|
||||
reduce_n2_p1:
|
||||
chosen:
|
||||
- max_pool_3x3
|
||||
reduce_n3_p0:
|
||||
chosen:
|
||||
- max_pool_3x3
|
||||
reduce_n3_p1:
|
||||
chosen:
|
||||
- max_pool_3x3
|
||||
reduce_n3_p2:
|
||||
chosen:
|
||||
- skip_connect
|
||||
reduce_n4_p0:
|
||||
chosen:
|
||||
- max_pool_3x3
|
||||
reduce_n4_p1:
|
||||
chosen:
|
||||
- max_pool_3x3
|
||||
reduce_n4_p2:
|
||||
chosen:
|
||||
- skip_connect
|
||||
reduce_n4_p3:
|
||||
chosen:
|
||||
- skip_connect
|
||||
reduce_n5_p0:
|
||||
chosen:
|
||||
- max_pool_3x3
|
||||
reduce_n5_p1:
|
||||
chosen:
|
||||
- max_pool_3x3
|
||||
reduce_n5_p2:
|
||||
chosen:
|
||||
- skip_connect
|
||||
reduce_n5_p3:
|
||||
chosen:
|
||||
- skip_connect
|
||||
reduce_n5_p4:
|
||||
chosen:
|
||||
- skip_connect
|
|
@ -0,0 +1,58 @@
|
|||
modules:
|
||||
normal_n2:
|
||||
- normal_n2_p0
|
||||
- normal_n2_p1
|
||||
normal_n2_p0:
|
||||
- sep_conv_3x3
|
||||
normal_n2_p1:
|
||||
- sep_conv_3x3
|
||||
normal_n3:
|
||||
- normal_n3_p0
|
||||
- normal_n3_p1
|
||||
normal_n3_p0:
|
||||
- skip_connect
|
||||
normal_n3_p1:
|
||||
- sep_conv_5x5
|
||||
normal_n4:
|
||||
- normal_n4_p0
|
||||
- normal_n4_p1
|
||||
normal_n4_p0:
|
||||
- sep_conv_3x3
|
||||
normal_n4_p1:
|
||||
- skip_connect
|
||||
normal_n5:
|
||||
- normal_n5_p0
|
||||
- normal_n5_p1
|
||||
normal_n5_p0:
|
||||
- skip_connect
|
||||
normal_n5_p1:
|
||||
- skip_connect
|
||||
reduce_n2:
|
||||
- reduce_n2_p0
|
||||
- reduce_n2_p1
|
||||
reduce_n2_p0:
|
||||
- max_pool_3x3
|
||||
reduce_n2_p1:
|
||||
- sep_conv_3x3
|
||||
reduce_n3:
|
||||
- reduce_n3_p0
|
||||
- reduce_n3_p2
|
||||
reduce_n3_p0:
|
||||
- max_pool_3x3
|
||||
reduce_n3_p2:
|
||||
- dil_conv_5x5
|
||||
reduce_n4:
|
||||
- reduce_n4_p0
|
||||
- reduce_n4_p2
|
||||
reduce_n4_p0:
|
||||
- max_pool_3x3
|
||||
reduce_n4_p2:
|
||||
- skip_connect
|
||||
reduce_n5:
|
||||
- reduce_n5_p0
|
||||
- reduce_n5_p2
|
||||
reduce_n5_p0:
|
||||
- max_pool_3x3
|
||||
reduce_n5_p2:
|
||||
- skip_connect
|
||||
channels:
|
|
@ -0,0 +1,196 @@
|
|||
# dataset settings
|
||||
dataset_type = 'CIFAR10'
|
||||
preprocess_cfg = dict(
|
||||
# RGB format normalization parameters
|
||||
mean=[125.307, 122.961, 113.8575],
|
||||
std=[51.5865, 50.847, 51.255],
|
||||
# loaded images are already RGB format
|
||||
to_rgb=False)
|
||||
|
||||
train_pipeline = [
|
||||
dict(type='RandomCrop', crop_size=32, padding=4),
|
||||
dict(type='RandomFlip', prob=0.5, direction='horizontal'),
|
||||
dict(type='PackClsInputs'),
|
||||
dict(
|
||||
type='Cutout',
|
||||
magnitude_key='shape',
|
||||
magnitude_range=(1, 16),
|
||||
pad_val=0,
|
||||
prob=0.5),
|
||||
]
|
||||
|
||||
test_pipeline = [
|
||||
dict(type='PackClsInputs'),
|
||||
]
|
||||
|
||||
train_dataloader = dict(
|
||||
batch_size=96,
|
||||
num_workers=2,
|
||||
dataset=dict(
|
||||
type=dataset_type,
|
||||
data_prefix='/mnt/cache/share_data/dongpeijie/data/cifar10',
|
||||
test_mode=False,
|
||||
pipeline=train_pipeline),
|
||||
sampler=dict(type='DefaultSampler', shuffle=True),
|
||||
persistent_workers=True,
|
||||
)
|
||||
|
||||
val_dataloader = dict(
|
||||
batch_size=16,
|
||||
num_workers=2,
|
||||
dataset=dict(
|
||||
type=dataset_type,
|
||||
data_prefix='/mnt/cache/share_data/dongpeijie/data/cifar10/',
|
||||
test_mode=True,
|
||||
pipeline=test_pipeline),
|
||||
sampler=dict(type='DefaultSampler', shuffle=False),
|
||||
persistent_workers=True,
|
||||
)
|
||||
val_evaluator = dict(type='Accuracy', topk=(1, ))
|
||||
|
||||
test_dataloader = val_dataloader
|
||||
test_evaluator = val_evaluator
|
||||
|
||||
# optimizer
|
||||
optim_wrapper = dict(
|
||||
architecture=dict(type='SGD', lr=0.025, momentum=0.9, weight_decay=3e-4),
|
||||
mutator=dict(type='Adam', lr=3e-4, weight_decay=1e-3),
|
||||
clip_grad=dict(max_norm=5, norm_type=2))
|
||||
|
||||
# leanring policy
|
||||
param_scheduler = [
|
||||
dict(
|
||||
type='CosineAnnealingLR',
|
||||
T_max=600,
|
||||
by_epoch=True,
|
||||
begin=0,
|
||||
end=600,
|
||||
)
|
||||
]
|
||||
|
||||
# train, val, test setting
|
||||
train_cfg = dict(by_epoch=True, max_epochs=600)
|
||||
val_cfg = dict(interval=1) # validate each epoch
|
||||
test_cfg = dict()
|
||||
|
||||
# defaults to use registries in mmcls
|
||||
default_scope = 'mmcls'
|
||||
|
||||
# configure default hooks
|
||||
default_hooks = dict(
|
||||
timer=dict(type='IterTimerHook'),
|
||||
logger=dict(type='LoggerHook', interval=100),
|
||||
param_scheduler=dict(type='ParamSchedulerHook'),
|
||||
checkpoint=dict(
|
||||
type='CheckpointHook', interval=1, save_last=True, max_keep_ckpts=3),
|
||||
sampler_seed=dict(type='DistSamplerSeedHook'),
|
||||
visualization=dict(type='VisualizationHook', enable=False),
|
||||
)
|
||||
|
||||
# configure environment
|
||||
env_cfg = dict(
|
||||
cudnn_benchmark=False,
|
||||
mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0),
|
||||
dist_cfg=dict(backend='nccl'),
|
||||
)
|
||||
|
||||
# set visualizer
|
||||
visualizer = None
|
||||
|
||||
# set log level
|
||||
log_level = 'INFO'
|
||||
|
||||
# load from which checkpoint
|
||||
load_from = None
|
||||
|
||||
# whether to resume training from the loaded checkpoint
|
||||
resume = False
|
||||
|
||||
# model
|
||||
norm_cfg = dict(type='BN', affine=True)
|
||||
mutable_cfg = dict(
|
||||
_scope_='mmrazor',
|
||||
type='mmrazor.DiffMutableOP',
|
||||
candidates=dict(
|
||||
zero=dict(type='mmrazor.DartsZero'),
|
||||
skip_connect=dict(
|
||||
type='mmrazor.DartsSkipConnect',
|
||||
norm_cfg=norm_cfg,
|
||||
use_drop_path=True),
|
||||
max_pool_3x3=dict(
|
||||
type='mmrazor.DartsPoolBN',
|
||||
pool_type='max',
|
||||
norm_cfg=norm_cfg,
|
||||
use_drop_path=True),
|
||||
avg_pool_3x3=dict(
|
||||
type='mmrazor.DartsPoolBN',
|
||||
pool_type='avg',
|
||||
norm_cfg=norm_cfg,
|
||||
use_drop_path=True),
|
||||
sep_conv_3x3=dict(
|
||||
type='mmrazor.DartsSepConv',
|
||||
kernel_size=3,
|
||||
norm_cfg=norm_cfg,
|
||||
use_drop_path=True),
|
||||
sep_conv_5x5=dict(
|
||||
type='mmrazor.DartsSepConv',
|
||||
kernel_size=5,
|
||||
norm_cfg=norm_cfg,
|
||||
use_drop_path=True),
|
||||
dil_conv_3x3=dict(
|
||||
type='mmrazor.DartsDilConv',
|
||||
kernel_size=3,
|
||||
norm_cfg=norm_cfg,
|
||||
use_drop_path=True),
|
||||
dil_conv_5x5=dict(
|
||||
type='mmrazor.DartsDilConv',
|
||||
kernel_size=5,
|
||||
norm_cfg=norm_cfg,
|
||||
use_drop_path=True),
|
||||
))
|
||||
|
||||
route_cfg = dict(
|
||||
type='mmrazor.DiffChoiceRoute',
|
||||
with_arch_param=True,
|
||||
)
|
||||
|
||||
supernet = dict(
|
||||
type='mmcls.ImageClassifier',
|
||||
data_preprocessor=preprocess_cfg,
|
||||
backbone=dict(
|
||||
type='mmrazor.DartsBackbone',
|
||||
in_channels=3,
|
||||
base_channels=36,
|
||||
num_layers=20,
|
||||
num_nodes=4,
|
||||
stem_multiplier=3,
|
||||
auxliary=True,
|
||||
aux_channels=128,
|
||||
aux_out_channels=768,
|
||||
out_indices=(19, ),
|
||||
mutable_cfg=mutable_cfg,
|
||||
route_cfg=route_cfg),
|
||||
neck=dict(type='mmcls.GlobalAveragePooling'),
|
||||
head=dict(
|
||||
type='mmrazor.DartsSubnetClsHead',
|
||||
num_classes=10,
|
||||
in_channels=576,
|
||||
aux_in_channels=768,
|
||||
loss=dict(type='mmcls.CrossEntropyLoss', loss_weight=1.0),
|
||||
aux_loss=dict(type='mmcls.CrossEntropyLoss', loss_weight=0.4),
|
||||
topk=(1, 5),
|
||||
cal_acc=True),
|
||||
)
|
||||
|
||||
mutator = dict(type='mmrazor.DiffModuleMutator')
|
||||
|
||||
fix_subnet = 'configs/nas/darts/DARTS_SUBNET_CIFAR_PAPER_ALIAS.yaml'
|
||||
|
||||
model = dict(
|
||||
type='mmrazor.SPOS',
|
||||
architecture=supernet,
|
||||
mutator=mutator,
|
||||
fix_subnet=fix_subnet,
|
||||
)
|
||||
|
||||
find_unused_parameter = False
|
|
@ -0,0 +1,163 @@
|
|||
# dataset settings
|
||||
dataset_type = 'CIFAR10'
|
||||
preprocess_cfg = dict(
|
||||
# RGB format normalization parameters
|
||||
mean=[125.307, 122.961, 113.8575],
|
||||
std=[51.5865, 50.847, 51.255],
|
||||
# loaded images are already RGB format
|
||||
to_rgb=False)
|
||||
|
||||
train_pipeline = [
|
||||
dict(type='RandomCrop', crop_size=32, padding=4),
|
||||
dict(type='RandomFlip', prob=0.5, direction='horizontal'),
|
||||
dict(type='PackClsInputs'),
|
||||
]
|
||||
|
||||
test_pipeline = [
|
||||
dict(type='PackClsInputs'),
|
||||
]
|
||||
|
||||
train_dataloader = dict(
|
||||
batch_size=16,
|
||||
num_workers=2,
|
||||
dataset=dict(
|
||||
type=dataset_type,
|
||||
data_prefix='/mnt/cache/share_data/dongpeijie/data/cifar10',
|
||||
test_mode=False,
|
||||
pipeline=train_pipeline),
|
||||
sampler=dict(type='DefaultSampler', shuffle=True),
|
||||
persistent_workers=True,
|
||||
)
|
||||
|
||||
val_dataloader = dict(
|
||||
batch_size=16,
|
||||
num_workers=2,
|
||||
dataset=dict(
|
||||
type=dataset_type,
|
||||
data_prefix='/mnt/cache/share_data/dongpeijie/data/cifar10/',
|
||||
test_mode=True,
|
||||
pipeline=test_pipeline),
|
||||
sampler=dict(type='DefaultSampler', shuffle=False),
|
||||
persistent_workers=True,
|
||||
)
|
||||
val_evaluator = dict(type='Accuracy', topk=(1, ))
|
||||
|
||||
test_dataloader = val_dataloader
|
||||
test_evaluator = val_evaluator
|
||||
|
||||
# optimizer
|
||||
optim_wrapper = dict(
|
||||
architecture=dict(type='SGD', lr=0.025, momentum=0.9, weight_decay=3e-4),
|
||||
mutator=dict(type='Adam', lr=3e-4, weight_decay=1e-3),
|
||||
clip_grad=None)
|
||||
|
||||
# leanring policy
|
||||
param_scheduler = [
|
||||
dict(
|
||||
type='CosineAnnealingLR',
|
||||
T_max=50,
|
||||
by_epoch=True,
|
||||
min_lr=1e-3,
|
||||
begin=0,
|
||||
end=50,
|
||||
)
|
||||
]
|
||||
# train, val, test setting
|
||||
train_cfg = dict(by_epoch=True, max_epochs=50)
|
||||
val_cfg = dict(interval=1) # validate each epoch
|
||||
test_cfg = dict()
|
||||
|
||||
# defaults to use registries in mmcls
|
||||
default_scope = 'mmcls'
|
||||
|
||||
# configure default hooks
|
||||
default_hooks = dict(
|
||||
timer=dict(type='IterTimerHook'),
|
||||
logger=dict(type='LoggerHook', interval=100),
|
||||
param_scheduler=dict(type='ParamSchedulerHook'),
|
||||
checkpoint=dict(
|
||||
type='CheckpointHook', interval=1, save_last=True, max_keep_ckpts=3),
|
||||
sampler_seed=dict(type='DistSamplerSeedHook'),
|
||||
visualization=dict(type='VisualizationHook', enable=False),
|
||||
)
|
||||
|
||||
# configure environment
|
||||
env_cfg = dict(
|
||||
cudnn_benchmark=False,
|
||||
mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0),
|
||||
dist_cfg=dict(backend='nccl'),
|
||||
)
|
||||
|
||||
# set visualizer
|
||||
visualizer = None
|
||||
|
||||
# set log level
|
||||
log_level = 'INFO'
|
||||
|
||||
# load from which checkpoint
|
||||
load_from = None
|
||||
|
||||
# whether to resume training from the loaded checkpoint
|
||||
resume = False
|
||||
|
||||
# model
|
||||
norm_cfg = dict(type='BN', affine=False)
|
||||
mutable_cfg = dict(
|
||||
_scope_='mmrazor',
|
||||
type='mmrazor.DiffMutableOP',
|
||||
candidates=dict(
|
||||
zero=dict(type='mmrazor.DartsZero'),
|
||||
skip_connect=dict(type='mmrazor.DartsSkipConnect', norm_cfg=norm_cfg),
|
||||
max_pool_3x3=dict(
|
||||
type='mmrazor.DartsPoolBN', pool_type='max', norm_cfg=norm_cfg),
|
||||
avg_pool_3x3=dict(
|
||||
type='mmrazor.DartsPoolBN', pool_type='avg', norm_cfg=norm_cfg),
|
||||
sep_conv_3x3=dict(
|
||||
type='mmrazor.DartsSepConv', kernel_size=3, norm_cfg=norm_cfg),
|
||||
sep_conv_5x5=dict(
|
||||
type='mmrazor.DartsSepConv', kernel_size=5, norm_cfg=norm_cfg),
|
||||
dil_conv_3x3=dict(
|
||||
type='mmrazor.DartsDilConv', kernel_size=3, norm_cfg=norm_cfg),
|
||||
dil_conv_5x5=dict(
|
||||
type='mmrazor.DartsDilConv', kernel_size=5, norm_cfg=norm_cfg),
|
||||
))
|
||||
|
||||
route_cfg = dict(
|
||||
type='mmrazor.DiffChoiceRoute',
|
||||
with_arch_param=True,
|
||||
)
|
||||
|
||||
supernet = dict(
|
||||
type='mmcls.ImageClassifier',
|
||||
backbone=dict(
|
||||
type='mmrazor.DartsBackbone',
|
||||
in_channels=3,
|
||||
base_channels=36,
|
||||
num_layers=20,
|
||||
num_nodes=4,
|
||||
stem_multiplier=3,
|
||||
auxliary=False,
|
||||
out_indices=(19, ),
|
||||
mutable_cfg=mutable_cfg,
|
||||
route_cfg=route_cfg),
|
||||
neck=dict(type='mmcls.GlobalAveragePooling'),
|
||||
head=dict(
|
||||
type='mmrazor.DartsSubnetClsHead',
|
||||
num_classes=10,
|
||||
in_channels=576,
|
||||
aux_in_channels=768,
|
||||
loss=dict(type='mmcls.CrossEntropyLoss', loss_weight=1.0),
|
||||
aux_loss=dict(type='mmcls.CrossEntropyLoss', loss_weight=0.4),
|
||||
topk=(1, 5),
|
||||
cal_acc=True),
|
||||
)
|
||||
|
||||
mutator = dict(type='mmrazor.DiffModuleMutator')
|
||||
|
||||
model = dict(
|
||||
type='mmrazor.SPOS',
|
||||
architecture=supernet,
|
||||
mutator=mutator,
|
||||
)
|
||||
|
||||
find_unused_parameter = True
|
|
@ -1,60 +0,0 @@
|
|||
stage_0_block_0:
|
||||
chosen:
|
||||
- shuffle_7x7
|
||||
stage_0_block_1:
|
||||
chosen:
|
||||
- shuffle_5x5
|
||||
stage_0_block_2:
|
||||
chosen:
|
||||
- shuffle_7x7
|
||||
stage_0_block_3:
|
||||
chosen:
|
||||
- shuffle_3x3
|
||||
stage_1_block_0:
|
||||
chosen:
|
||||
- shuffle_7x7
|
||||
stage_1_block_1:
|
||||
chosen:
|
||||
- shuffle_5x5
|
||||
stage_1_block_2:
|
||||
chosen:
|
||||
- shuffle_5x5
|
||||
stage_1_block_3:
|
||||
chosen:
|
||||
- shuffle_7x7
|
||||
stage_2_block_0:
|
||||
chosen:
|
||||
- shuffle_xception
|
||||
stage_2_block_1:
|
||||
chosen:
|
||||
- shuffle_xception
|
||||
stage_2_block_2:
|
||||
chosen:
|
||||
- shuffle_5x5
|
||||
stage_2_block_3:
|
||||
chosen:
|
||||
- shuffle_xception
|
||||
stage_2_block_4:
|
||||
chosen:
|
||||
- shuffle_3x3
|
||||
stage_2_block_5:
|
||||
chosen:
|
||||
- shuffle_3x3
|
||||
stage_2_block_6:
|
||||
chosen:
|
||||
- shuffle_xception
|
||||
stage_2_block_7:
|
||||
chosen:
|
||||
- shuffle_5x5
|
||||
stage_3_block_0:
|
||||
chosen:
|
||||
- shuffle_xception
|
||||
stage_3_block_1:
|
||||
chosen:
|
||||
- shuffle_5x5
|
||||
stage_3_block_2:
|
||||
chosen:
|
||||
- shuffle_xception
|
||||
stage_3_block_3:
|
||||
chosen:
|
||||
- shuffle_7x7
|
|
@ -0,0 +1,22 @@
|
|||
modules:
|
||||
backbone.layers.0.0: shuffle_5x5
|
||||
backbone.layers.0.1: shuffle_3x3
|
||||
backbone.layers.0.2: shuffle_3x3
|
||||
backbone.layers.0.3: shuffle_3x3
|
||||
backbone.layers.1.0: shuffle_xception
|
||||
backbone.layers.1.1: shuffle_3x3
|
||||
backbone.layers.1.2: shuffle_xception
|
||||
backbone.layers.1.3: shuffle_7x7
|
||||
backbone.layers.2.0: shuffle_7x7
|
||||
backbone.layers.2.1: shuffle_7x7
|
||||
backbone.layers.2.2: shuffle_xception
|
||||
backbone.layers.2.3: shuffle_xception
|
||||
backbone.layers.2.4: shuffle_3x3
|
||||
backbone.layers.2.5: shuffle_7x7
|
||||
backbone.layers.2.6: shuffle_5x5
|
||||
backbone.layers.2.7: shuffle_xception
|
||||
backbone.layers.3.0: shuffle_7x7
|
||||
backbone.layers.3.1: shuffle_7x7
|
||||
backbone.layers.3.2: shuffle_7x7
|
||||
backbone.layers.3.3: shuffle_5x5
|
||||
channels:
|
|
@ -0,0 +1,22 @@
|
|||
modules:
|
||||
backbone.layers.0.0: shuffle_5x5
|
||||
backbone.layers.0.1: shuffle_3x3
|
||||
backbone.layers.0.2: shuffle_3x3
|
||||
backbone.layers.0.3: shuffle_3x3
|
||||
backbone.layers.1.0: shuffle_xception
|
||||
backbone.layers.1.1: shuffle_3x3
|
||||
backbone.layers.1.2: shuffle_xception
|
||||
backbone.layers.1.3: shuffle_7x7
|
||||
backbone.layers.2.0: shuffle_7x7
|
||||
backbone.layers.2.1: shuffle_7x7
|
||||
backbone.layers.2.2: shuffle_xception
|
||||
backbone.layers.2.3: shuffle_xception
|
||||
backbone.layers.2.4: shuffle_3x3
|
||||
backbone.layers.2.5: shuffle_7x7
|
||||
backbone.layers.2.6: shuffle_5x5
|
||||
backbone.layers.2.7: shuffle_xception
|
||||
backbone.layers.3.0: shuffle_7x7
|
||||
backbone.layers.3.1: shuffle_7x7
|
||||
backbone.layers.3.2: shuffle_7x7
|
||||
backbone.layers.3.3: shuffle_5x5
|
||||
channels:
|
|
@ -1,20 +0,0 @@
|
|||
_base_ = ['./detnas_supernet_frcnn_shufflenetv2_fpn_1x_coco.py']
|
||||
|
||||
data = dict(
|
||||
samples_per_gpu=128,
|
||||
workers_per_gpu=8,
|
||||
)
|
||||
|
||||
algorithm = dict(bn_training_mode=True)
|
||||
|
||||
searcher = dict(
|
||||
type='EvolutionSearcher',
|
||||
metrics='bbox',
|
||||
score_key='bbox_mAP',
|
||||
constraints=dict(flops=300 * 1e6),
|
||||
candidate_pool_size=50,
|
||||
candidate_top_k=10,
|
||||
max_epoch=20,
|
||||
num_mutation=20,
|
||||
num_crossover=20,
|
||||
)
|
|
@ -1,6 +0,0 @@
|
|||
_base_ = ['./detnas_supernet_frcnn_shufflenetv2_fpn_1x_coco.py']
|
||||
|
||||
# FIXME: you may replace this with the mutable_cfg searched by yourself
|
||||
mutable_cfg = 'https://download.openmmlab.com/mmrazor/v0.1/nas/detnas/detnas_subnet_frcnn_shufflenetv2_fpn_1x_coco/detnas_subnet_frcnn_shufflenetv2_fpn_1x_coco_bbox_backbone_flops-0.34M_mAP-37.5_20211222-67fea61f_mutable_cfg.yaml' # noqa: E501
|
||||
|
||||
algorithm = dict(retraining=True, mutable_cfg=mutable_cfg)
|
|
@ -1,8 +0,0 @@
|
|||
_base_ = [
|
||||
'../spos/spos_subnet_shufflenetv2_8xb128_in1k.py',
|
||||
]
|
||||
|
||||
# FIXME: you may replace this with the mutable_cfg searched by yourself
|
||||
mutable_cfg = 'https://download.openmmlab.com/mmrazor/v0.1/nas/detnas/detnas_subnet_frcnn_shufflenetv2_fpn_1x_coco/detnas_subnet_frcnn_shufflenetv2_fpn_1x_coco_bbox_backbone_flops-0.34M_mAP-37.5_20211222-67fea61f_mutable_cfg.yaml' # noqa: E501
|
||||
|
||||
algorithm = dict(mutable_cfg=mutable_cfg)
|
|
@ -0,0 +1,8 @@
|
|||
_base_ = ['./detnas_supernet_shufflenetv2_coco_1x_2.0_frcnn.py']
|
||||
|
||||
# FIXME: you may replace this with the mutable_cfg searched by yourself
|
||||
fix_subnet = 'configs/nas/detnas/DETNAS_FRCNN_SHUFFLENETV2_340M_COCO_MMRAZOR_2.0.yaml' # noqa: E501
|
||||
|
||||
model = dict(fix_subnet=fix_subnet)
|
||||
|
||||
find_unused_parameters = False
|
|
@ -1,144 +0,0 @@
|
|||
_base_ = [
|
||||
'../../_base_/datasets/mmdet/coco_detection.py',
|
||||
'../../_base_/schedules/mmdet/schedule_1x.py',
|
||||
'../../_base_/mmdet_runtime.py'
|
||||
]
|
||||
|
||||
norm_cfg = dict(type='SyncBN', requires_grad=True)
|
||||
model = dict(
|
||||
type='mmdet.FasterRCNN',
|
||||
backbone=dict(
|
||||
type='mmcls.SearchableShuffleNetV2',
|
||||
norm_cfg=norm_cfg,
|
||||
out_indices=(0, 1, 2, 3),
|
||||
widen_factor=1.0,
|
||||
with_last_layer=False),
|
||||
neck=dict(
|
||||
type='FPN',
|
||||
norm_cfg=norm_cfg,
|
||||
in_channels=[64, 160, 320, 640],
|
||||
out_channels=256,
|
||||
num_outs=5),
|
||||
rpn_head=dict(
|
||||
type='RPNHead',
|
||||
in_channels=256,
|
||||
feat_channels=256,
|
||||
anchor_generator=dict(
|
||||
type='AnchorGenerator',
|
||||
scales=[8],
|
||||
ratios=[0.5, 1.0, 2.0],
|
||||
strides=[4, 8, 16, 32, 64]),
|
||||
bbox_coder=dict(
|
||||
type='DeltaXYWHBBoxCoder',
|
||||
target_means=[.0, .0, .0, .0],
|
||||
target_stds=[1.0, 1.0, 1.0, 1.0]),
|
||||
loss_cls=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
|
||||
loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
|
||||
roi_head=dict(
|
||||
type='StandardRoIHead',
|
||||
bbox_roi_extractor=dict(
|
||||
type='SingleRoIExtractor',
|
||||
roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
|
||||
out_channels=256,
|
||||
featmap_strides=[4, 8, 16, 32]),
|
||||
bbox_head=dict(
|
||||
type='Shared4Conv1FCBBoxHead',
|
||||
norm_cfg=norm_cfg,
|
||||
in_channels=256,
|
||||
fc_out_channels=1024,
|
||||
roi_feat_size=7,
|
||||
num_classes=80,
|
||||
bbox_coder=dict(
|
||||
type='DeltaXYWHBBoxCoder',
|
||||
target_means=[0., 0., 0., 0.],
|
||||
target_stds=[0.1, 0.1, 0.2, 0.2]),
|
||||
reg_class_agnostic=False,
|
||||
loss_cls=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
|
||||
loss_bbox=dict(type='L1Loss', loss_weight=1.0))),
|
||||
train_cfg=dict(
|
||||
rpn=dict(
|
||||
assigner=dict(
|
||||
type='MaxIoUAssigner',
|
||||
pos_iou_thr=0.7,
|
||||
neg_iou_thr=0.3,
|
||||
min_pos_iou=0.3,
|
||||
match_low_quality=True,
|
||||
ignore_iof_thr=-1),
|
||||
sampler=dict(
|
||||
type='RandomSampler',
|
||||
num=256,
|
||||
pos_fraction=0.5,
|
||||
neg_pos_ub=-1,
|
||||
add_gt_as_proposals=False),
|
||||
allowed_border=-1,
|
||||
pos_weight=-1,
|
||||
debug=False),
|
||||
rpn_proposal=dict(
|
||||
nms_pre=2000,
|
||||
max_per_img=1000,
|
||||
nms=dict(type='nms', iou_threshold=0.7),
|
||||
min_bbox_size=0),
|
||||
rcnn=dict(
|
||||
assigner=dict(
|
||||
type='MaxIoUAssigner',
|
||||
pos_iou_thr=0.5,
|
||||
neg_iou_thr=0.5,
|
||||
min_pos_iou=0.5,
|
||||
match_low_quality=False,
|
||||
ignore_iof_thr=-1),
|
||||
sampler=dict(
|
||||
type='RandomSampler',
|
||||
num=512,
|
||||
pos_fraction=0.25,
|
||||
neg_pos_ub=-1,
|
||||
add_gt_as_proposals=True),
|
||||
pos_weight=-1,
|
||||
debug=False)),
|
||||
test_cfg=dict(
|
||||
rpn=dict(
|
||||
nms_pre=1000,
|
||||
max_per_img=1000,
|
||||
nms=dict(type='nms', iou_threshold=0.7),
|
||||
min_bbox_size=0),
|
||||
rcnn=dict(
|
||||
score_thr=0.05,
|
||||
nms=dict(type='nms', iou_threshold=0.5),
|
||||
max_per_img=100)
|
||||
# soft-nms is also supported for rcnn testing
|
||||
# e.g., nms=dict(type='soft_nms', iou_threshold=0.5, min_score=0.05)
|
||||
),
|
||||
)
|
||||
|
||||
mutator = dict(
|
||||
type='OneShotModuleMutator',
|
||||
placeholder_mapping=dict(
|
||||
all_blocks=dict(
|
||||
type='OneShotMutableOP',
|
||||
choices=dict(
|
||||
shuffle_3x3=dict(
|
||||
type='ShuffleBlock', norm_cfg=norm_cfg, kernel_size=3),
|
||||
shuffle_5x5=dict(
|
||||
type='ShuffleBlock', norm_cfg=norm_cfg, kernel_size=5),
|
||||
shuffle_7x7=dict(
|
||||
type='ShuffleBlock', norm_cfg=norm_cfg, kernel_size=7),
|
||||
shuffle_xception=dict(
|
||||
type='ShuffleXception',
|
||||
norm_cfg=norm_cfg,
|
||||
),
|
||||
))))
|
||||
|
||||
algorithm = dict(
|
||||
type='DetNAS',
|
||||
architecture=dict(
|
||||
type='MMDetArchitecture',
|
||||
model=model,
|
||||
),
|
||||
mutator=mutator,
|
||||
pruner=None,
|
||||
distiller=None,
|
||||
retraining=False,
|
||||
)
|
||||
|
||||
find_unused_parameters = True
|
|
@ -1,5 +0,0 @@
|
|||
_base_ = [
|
||||
'../spos/spos_supernet_shufflenetv2_8xb128_in1k.py',
|
||||
]
|
||||
|
||||
runner = dict(max_iters=300000)
|
|
@ -0,0 +1,87 @@
|
|||
_base_ = [
|
||||
'mmdet::_base_/models/faster_rcnn_r50_fpn.py',
|
||||
'mmdet::_base_/datasets/coco_detection.py',
|
||||
'mmdet::_base_/schedules/schedule_1x.py',
|
||||
'mmdet::_base_/default_runtime.py'
|
||||
]
|
||||
|
||||
data_root = '/mnt/lustre/share_data/zhangwenwei/data/coco/'
|
||||
|
||||
_base_.train_dataloader.dataset.data_root = data_root
|
||||
|
||||
visualizer = None
|
||||
|
||||
log_level = 'INFO'
|
||||
load_from = '/mnt/lustre/dongpeijie/detnas_subnet_shufflenetv2_8xb128_in1k_acc-74.08_20211223-92e9b66a_2.0.pth' # noqa: E501
|
||||
resume = False
|
||||
|
||||
norm_cfg = dict(type='SyncBN', requires_grad=True)
|
||||
# model settings
|
||||
_STAGE_MUTABLE = dict(
|
||||
_scope_='mmrazor',
|
||||
type='mmrazor.OneShotMutableOP',
|
||||
candidates=dict(
|
||||
shuffle_3x3=dict(
|
||||
type='mmrazor.ShuffleBlock', kernel_size=3, norm_cfg=norm_cfg),
|
||||
shuffle_5x5=dict(
|
||||
type='mmrazor.ShuffleBlock', kernel_size=5, norm_cfg=norm_cfg),
|
||||
shuffle_7x7=dict(
|
||||
type='mmrazor.ShuffleBlock', kernel_size=7, norm_cfg=norm_cfg),
|
||||
shuffle_xception=dict(
|
||||
type='mmrazor.ShuffleXception', norm_cfg=norm_cfg),
|
||||
))
|
||||
|
||||
arch_setting = [
|
||||
# Parameters to build layers. 3 parameters are needed to construct a
|
||||
# layer, from left to right: channel, num_blocks, mutable_cfg.
|
||||
[64, 4, _STAGE_MUTABLE],
|
||||
[160, 4, _STAGE_MUTABLE],
|
||||
[320, 8, _STAGE_MUTABLE],
|
||||
[640, 4, _STAGE_MUTABLE],
|
||||
]
|
||||
|
||||
supernet = _base_.model
|
||||
|
||||
supernet.backbone = dict(
|
||||
type='mmrazor.SearchableShuffleNetV2',
|
||||
arch_setting=arch_setting,
|
||||
norm_cfg=norm_cfg,
|
||||
out_indices=(0, 1, 2, 3),
|
||||
widen_factor=1.0,
|
||||
with_last_layer=False)
|
||||
|
||||
supernet.neck = dict(
|
||||
type='FPN',
|
||||
norm_cfg=norm_cfg,
|
||||
in_channels=[64, 160, 320, 640],
|
||||
out_channels=256,
|
||||
num_outs=5)
|
||||
|
||||
supernet.roi_head.bbox_head = dict(
|
||||
type='Shared4Conv1FCBBoxHead',
|
||||
norm_cfg=norm_cfg,
|
||||
in_channels=256,
|
||||
fc_out_channels=1024,
|
||||
roi_feat_size=7,
|
||||
num_classes=80,
|
||||
bbox_coder=dict(
|
||||
type='DeltaXYWHBBoxCoder',
|
||||
target_means=[0., 0., 0., 0.],
|
||||
target_stds=[0.1, 0.1, 0.2, 0.2]),
|
||||
reg_class_agnostic=False,
|
||||
loss_cls=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
|
||||
loss_bbox=dict(type='L1Loss', loss_weight=1.0))
|
||||
|
||||
mutator = dict(type='mmrazor.OneShotModuleMutator')
|
||||
|
||||
fix_subnet = 'configs/nas/detnas/DETNAS_FRCNN_SHUFFLENETV2_340M_COCO_MMRAZOR_2.0.yaml' # noqa: E501
|
||||
|
||||
model = dict(
|
||||
_delete_=True,
|
||||
type='mmrazor.SPOS',
|
||||
architecture=supernet,
|
||||
mutator=mutator,
|
||||
fix_subnet=fix_subnet,
|
||||
)
|
||||
|
||||
find_unused_parameters = True
|
|
@ -0,0 +1,114 @@
|
|||
_base_ = [
|
||||
'mmdet::faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py',
|
||||
'mmdet::datasets/coco_detection.py', 'mmdet::schedules/schedule_1x.py',
|
||||
'mmdet::default_runtime.py'
|
||||
]
|
||||
|
||||
data_root = '/mnt/lustre/share_data/zhangwenwei/data/coco/'
|
||||
|
||||
train_dataloader = dict(dataset=dict(data_root=data_root, ))
|
||||
|
||||
visualizer = None
|
||||
# custom_hooks = [dict(type='DetVisualizationHook', interval=10)]
|
||||
|
||||
log_level = 'INFO'
|
||||
load_from = None
|
||||
resume = False
|
||||
|
||||
# TODO: support auto scaling lr
|
||||
|
||||
norm_cfg = dict(type='SyncBN', requires_grad=True)
|
||||
# model settings
|
||||
_STAGE_MUTABLE = dict(
|
||||
_scope_='mmrazor',
|
||||
type='mmrazor.OneShotMutableOP',
|
||||
candidates=dict(
|
||||
shuffle_3x3=dict(
|
||||
type='mmrazor.ShuffleBlock', kernel_size=3, norm_cfg=norm_cfg),
|
||||
shuffle_5x5=dict(
|
||||
type='mmrazor.ShuffleBlock', kernel_size=5, norm_cfg=norm_cfg),
|
||||
shuffle_7x7=dict(
|
||||
type='mmrazor.ShuffleBlock', kernel_size=7, norm_cfg=norm_cfg),
|
||||
shuffle_xception=dict(
|
||||
type='mmrazor.ShuffleXception', norm_cfg=norm_cfg),
|
||||
))
|
||||
|
||||
arch_setting = [
|
||||
# Parameters to build layers. 3 parameters are needed to construct a
|
||||
# layer, from left to right: channel, num_blocks, mutable_cfg.
|
||||
[64, 4, _STAGE_MUTABLE],
|
||||
[160, 4, _STAGE_MUTABLE],
|
||||
[320, 8, _STAGE_MUTABLE],
|
||||
[640, 4, _STAGE_MUTABLE],
|
||||
]
|
||||
|
||||
supernet = dict(
|
||||
type='RetinaNet',
|
||||
data_preprocessor=dict(
|
||||
type='DetDataPreprocessor',
|
||||
mean=[123.675, 116.28, 103.53],
|
||||
std=[58.395, 57.12, 57.375],
|
||||
bgr_to_rgb=True,
|
||||
pad_size_divisor=32),
|
||||
backbone=dict(
|
||||
type='mmrazor.SearchableShuffleNetV2',
|
||||
arch_setting=arch_setting,
|
||||
norm_cfg=norm_cfg,
|
||||
out_indices=(0, 1, 2, 3),
|
||||
widen_factor=1.0,
|
||||
with_last_layer=False),
|
||||
neck=dict(
|
||||
type='FPN',
|
||||
in_channels=[64, 160, 320, 640],
|
||||
out_channels=256,
|
||||
num_outs=5),
|
||||
bbox_head=dict(
|
||||
type='RetinaHead',
|
||||
num_classes=80,
|
||||
in_channels=256,
|
||||
stacked_convs=4,
|
||||
feat_channels=256,
|
||||
anchor_generator=dict(
|
||||
type='AnchorGenerator',
|
||||
octave_base_scale=4,
|
||||
scales_per_octave=3,
|
||||
ratios=[0.5, 1.0, 2.0],
|
||||
strides=[8, 16, 32, 64, 128]),
|
||||
bbox_coder=dict(
|
||||
type='DeltaXYWHBBoxCoder',
|
||||
target_means=[.0, .0, .0, .0],
|
||||
target_stds=[1.0, 1.0, 1.0, 1.0]),
|
||||
loss_cls=dict(
|
||||
type='FocalLoss',
|
||||
use_sigmoid=True,
|
||||
gamma=2.0,
|
||||
alpha=0.25,
|
||||
loss_weight=1.0),
|
||||
loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
|
||||
# model training and testing settings
|
||||
train_cfg=dict(
|
||||
assigner=dict(
|
||||
type='MaxIoUAssigner',
|
||||
pos_iou_thr=0.5,
|
||||
neg_iou_thr=0.4,
|
||||
min_pos_iou=0,
|
||||
ignore_iof_thr=-1),
|
||||
allowed_border=-1,
|
||||
pos_weight=-1,
|
||||
debug=False),
|
||||
test_cfg=dict(
|
||||
nms_pre=1000,
|
||||
min_bbox_size=0,
|
||||
score_thr=0.05,
|
||||
nms=dict(type='nms', iou_threshold=0.5),
|
||||
max_per_img=100))
|
||||
|
||||
mutator = dict(type='mmrazor.OneShotModuleMutator')
|
||||
|
||||
model = dict(
|
||||
type='mmrazor.SPOS',
|
||||
architecture=supernet,
|
||||
mutator=mutator,
|
||||
)
|
||||
|
||||
find_unused_parameters = True
|
|
@ -0,0 +1,24 @@
|
|||
modules:
|
||||
backbone.layer1.0: mb_k3e1
|
||||
backbone.layer2.0: mb_k5e3
|
||||
backbone.layer2.1: mb_k5e3
|
||||
backbone.layer2.2: identity
|
||||
backbone.layer2.3: mb_k3e3
|
||||
backbone.layer3.0: mb_k3e3
|
||||
backbone.layer3.1: identity
|
||||
backbone.layer3.2: identity
|
||||
backbone.layer3.3: mb_k3e3
|
||||
backbone.layer4.0: mb_k7e6
|
||||
backbone.layer4.1: identity
|
||||
backbone.layer4.2: mb_k7e3
|
||||
backbone.layer4.3: mb_k7e3
|
||||
backbone.layer5.0: mb_k3e3
|
||||
backbone.layer5.1: mb_k3e3
|
||||
backbone.layer5.2: mb_k7e3
|
||||
backbone.layer5.3: mb_k5e3
|
||||
backbone.layer6.0: mb_k5e6
|
||||
backbone.layer6.1: mb_k7e3
|
||||
backbone.layer6.2: mb_k7e3
|
||||
backbone.layer6.3: mb_k7e3
|
||||
backbone.layer7.0: mb_k5e6
|
||||
channels:
|
|
@ -1,66 +0,0 @@
|
|||
stage_0_block_0:
|
||||
chosen:
|
||||
- mb_k3e1
|
||||
stage_1_block_0:
|
||||
chosen:
|
||||
- mb_k5e3
|
||||
stage_1_block_1:
|
||||
chosen:
|
||||
- mb_k5e3
|
||||
stage_1_block_2:
|
||||
chosen:
|
||||
- identity
|
||||
stage_1_block_3:
|
||||
chosen:
|
||||
- mb_k3e3
|
||||
stage_2_block_0:
|
||||
chosen:
|
||||
- mb_k3e3
|
||||
stage_2_block_1:
|
||||
chosen:
|
||||
- identity
|
||||
stage_2_block_2:
|
||||
chosen:
|
||||
- identity
|
||||
stage_2_block_3:
|
||||
chosen:
|
||||
- mb_k3e3
|
||||
stage_3_block_0:
|
||||
chosen:
|
||||
- mb_k7e6
|
||||
stage_3_block_1:
|
||||
chosen:
|
||||
- identity
|
||||
stage_3_block_2:
|
||||
chosen:
|
||||
- mb_k7e3
|
||||
stage_3_block_3:
|
||||
chosen:
|
||||
- mb_k7e3
|
||||
stage_4_block_0:
|
||||
chosen:
|
||||
- mb_k3e3
|
||||
stage_4_block_1:
|
||||
chosen:
|
||||
- mb_k3e3
|
||||
stage_4_block_2:
|
||||
chosen:
|
||||
- mb_k7e3
|
||||
stage_4_block_3:
|
||||
chosen:
|
||||
- mb_k5e3
|
||||
stage_5_block_0:
|
||||
chosen:
|
||||
- mb_k5e6
|
||||
stage_5_block_1:
|
||||
chosen:
|
||||
- mb_k7e3
|
||||
stage_5_block_2:
|
||||
chosen:
|
||||
- mb_k7e3
|
||||
stage_5_block_3:
|
||||
chosen:
|
||||
- mb_k7e3
|
||||
stage_6_block_0:
|
||||
chosen:
|
||||
- mb_k5e6
|
|
@ -1,60 +0,0 @@
|
|||
stage_0_block_0:
|
||||
chosen:
|
||||
- shuffle_7x7
|
||||
stage_0_block_1:
|
||||
chosen:
|
||||
- shuffle_5x5
|
||||
stage_0_block_2:
|
||||
chosen:
|
||||
- shuffle_3x3
|
||||
stage_0_block_3:
|
||||
chosen:
|
||||
- shuffle_5x5
|
||||
stage_1_block_0:
|
||||
chosen:
|
||||
- shuffle_7x7
|
||||
stage_1_block_1:
|
||||
chosen:
|
||||
- shuffle_3x3
|
||||
stage_1_block_2:
|
||||
chosen:
|
||||
- shuffle_7x7
|
||||
stage_1_block_3:
|
||||
chosen:
|
||||
- shuffle_3x3
|
||||
stage_2_block_0:
|
||||
chosen:
|
||||
- shuffle_7x7
|
||||
stage_2_block_1:
|
||||
chosen:
|
||||
- shuffle_3x3
|
||||
stage_2_block_2:
|
||||
chosen:
|
||||
- shuffle_7x7
|
||||
stage_2_block_3:
|
||||
chosen:
|
||||
- shuffle_xception
|
||||
stage_2_block_4:
|
||||
chosen:
|
||||
- shuffle_3x3
|
||||
stage_2_block_5:
|
||||
chosen:
|
||||
- shuffle_3x3
|
||||
stage_2_block_6:
|
||||
chosen:
|
||||
- shuffle_3x3
|
||||
stage_2_block_7:
|
||||
chosen:
|
||||
- shuffle_3x3
|
||||
stage_3_block_0:
|
||||
chosen:
|
||||
- shuffle_xception
|
||||
stage_3_block_1:
|
||||
chosen:
|
||||
- shuffle_7x7
|
||||
stage_3_block_2:
|
||||
chosen:
|
||||
- shuffle_xception
|
||||
stage_3_block_3:
|
||||
chosen:
|
||||
- shuffle_xception
|
|
@ -0,0 +1,22 @@
|
|||
modules:
|
||||
backbone.layers.0.0: shuffle_7x7
|
||||
backbone.layers.0.1: shuffle_3x3
|
||||
backbone.layers.0.2: shuffle_7x7
|
||||
backbone.layers.0.3: shuffle_3x3
|
||||
backbone.layers.1.0: shuffle_xception
|
||||
backbone.layers.1.1: shuffle_5x5
|
||||
backbone.layers.1.2: shuffle_5x5
|
||||
backbone.layers.1.3: shuffle_3x3
|
||||
backbone.layers.2.0: shuffle_3x3
|
||||
backbone.layers.2.1: shuffle_5x5
|
||||
backbone.layers.2.2: shuffle_3x3
|
||||
backbone.layers.2.3: shuffle_5x5
|
||||
backbone.layers.2.4: shuffle_3x3
|
||||
backbone.layers.2.5: shuffle_xception
|
||||
backbone.layers.2.6: shuffle_5x5
|
||||
backbone.layers.2.7: shuffle_7x7
|
||||
backbone.layers.3.0: shuffle_7x7
|
||||
backbone.layers.3.1: shuffle_3x3
|
||||
backbone.layers.3.2: shuffle_5x5
|
||||
backbone.layers.3.3: shuffle_xception
|
||||
channels:
|
|
@ -1,20 +0,0 @@
|
|||
_base_ = ['./spos_supernet_mobilenet_proxyless_gpu_8xb128_in1k.py']
|
||||
|
||||
data = dict(
|
||||
samples_per_gpu=512,
|
||||
workers_per_gpu=16,
|
||||
)
|
||||
|
||||
algorithm = dict(bn_training_mode=True)
|
||||
|
||||
searcher = dict(
|
||||
type='EvolutionSearcher',
|
||||
candidate_pool_size=50,
|
||||
candidate_top_k=10,
|
||||
constraints=dict(flops=465 * 1e6),
|
||||
metrics='accuracy',
|
||||
score_key='accuracy_top-1',
|
||||
max_epoch=20,
|
||||
num_mutation=25,
|
||||
num_crossover=25,
|
||||
mutate_prob=0.1)
|
|
@ -1,20 +0,0 @@
|
|||
_base_ = ['./spos_supernet_shufflenetv2_8xb128_in1k.py']
|
||||
|
||||
data = dict(
|
||||
samples_per_gpu=2048,
|
||||
workers_per_gpu=16,
|
||||
)
|
||||
|
||||
algorithm = dict(bn_training_mode=True)
|
||||
|
||||
searcher = dict(
|
||||
type='EvolutionSearcher',
|
||||
candidate_pool_size=50,
|
||||
candidate_top_k=10,
|
||||
constraints=dict(flops=330 * 1e6),
|
||||
metrics='accuracy',
|
||||
score_key='accuracy_top-1',
|
||||
max_epoch=20,
|
||||
num_mutation=25,
|
||||
num_crossover=25,
|
||||
mutate_prob=0.1)
|
|
@ -1,27 +0,0 @@
|
|||
_base_ = [
|
||||
'./spos_subnet_mobilenet_proxyless_gpu_8xb128_in1k.py',
|
||||
]
|
||||
|
||||
img_norm_cfg = dict(mean=[0., 0., 0.], std=[1., 1., 1.], to_rgb=False)
|
||||
train_pipeline = [
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(type='RandomResizedCrop', size=224),
|
||||
dict(type='ColorJitter', brightness=0.4, contrast=0.4, saturation=0.4),
|
||||
dict(type='RandomFlip', flip_prob=0.5, direction='horizontal'),
|
||||
dict(type='Normalize', **img_norm_cfg),
|
||||
dict(type='ImageToTensor', keys=['img']),
|
||||
dict(type='ToTensor', keys=['gt_label']),
|
||||
dict(type='Collect', keys=['img', 'gt_label'])
|
||||
]
|
||||
test_pipeline = [
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(type='Resize', size=(256, -1)),
|
||||
dict(type='CenterCrop', crop_size=224),
|
||||
dict(type='Normalize', **img_norm_cfg),
|
||||
dict(type='ImageToTensor', keys=['img']),
|
||||
dict(type='Collect', keys=['img'])
|
||||
]
|
||||
data = dict(
|
||||
train=dict(pipeline=train_pipeline),
|
||||
val=dict(pipeline=test_pipeline),
|
||||
test=dict(pipeline=test_pipeline))
|
|
@ -1,13 +0,0 @@
|
|||
_base_ = [
|
||||
'./spos_supernet_mobilenet_proxyless_gpu_8xb128_in1k.py',
|
||||
]
|
||||
|
||||
# FIXME: you may replace this with the mutable_cfg searched by yourself
|
||||
mutable_cfg = 'https://download.openmmlab.com/mmrazor/v0.1/nas/spos/spos_mobilenet_subnet/spos_angelnas_flops_0.49G_acc_75.98_20220307-54f4698f_mutable_cfg.yaml' # noqa: E501
|
||||
|
||||
algorithm = dict(retraining=True, mutable_cfg=mutable_cfg)
|
||||
evaluation = dict(interval=10000, metric='accuracy')
|
||||
checkpoint_config = dict(interval=30000)
|
||||
|
||||
runner = dict(max_iters=300000)
|
||||
find_unused_parameters = False
|
|
@ -0,0 +1,8 @@
|
|||
_base_ = ['./spos_supernet_mobilenet_proxyless_gpu_8xb128_in1k_2.0.py']
|
||||
|
||||
# FIXME: you may replace this with the mutable_cfg searched by yourself
|
||||
fix_subnet = 'configs/nas/spos/AngleNAS_SHUFFLENETV2_IN1k_2.0.yaml' # noqa: E501
|
||||
|
||||
model = dict(fix_subnet=fix_subnet)
|
||||
|
||||
find_unused_parameters = False
|
|
@ -1,11 +0,0 @@
|
|||
_base_ = [
|
||||
'./spos_supernet_shufflenetv2_8xb128_in1k.py',
|
||||
]
|
||||
|
||||
# FIXME: you may replace this with the mutable_cfg searched by yourself
|
||||
mutable_cfg = 'https://download.openmmlab.com/mmrazor/v0.1/nas/spos/spos_shufflenetv2_subnet_8xb128_in1k/spos_shufflenetv2_subnet_8xb128_in1k_flops_0.33M_acc_73.87_20211222-454627be_mutable_cfg.yaml' # noqa: E501
|
||||
|
||||
algorithm = dict(retraining=True, mutable_cfg=mutable_cfg)
|
||||
|
||||
runner = dict(max_iters=300000)
|
||||
find_unused_parameters = False
|
|
@ -0,0 +1,9 @@
|
|||
_base_ = ['./spos_supernet_shufflenetv2_8xb128_in1k_2.0_example.py']
|
||||
|
||||
# FIXME: you may replace this with the mutable_cfg searched by yourself
|
||||
# fix_subnet = 'configs/nas/spos/SPOS_SHUFFLENETV2_330M_IN1k_PAPER_2.0.yaml' # noqa: E501
|
||||
fix_subnet = 'configs/nas/detnas/DetNAS_SPOS_SHUFFLENETV2_330M_IN1k_PAPER_2.0.yaml' # noqa: E501
|
||||
|
||||
model = dict(fix_subnet=fix_subnet)
|
||||
|
||||
find_unused_parameters = False
|
|
@ -1,101 +0,0 @@
|
|||
_base_ = [
|
||||
'../../_base_/datasets/mmcls/imagenet_bs128_colorjittor.py',
|
||||
'../../_base_/schedules/mmcls/imagenet_bs1024_spos.py',
|
||||
'../../_base_/mmcls_runtime.py'
|
||||
]
|
||||
norm_cfg = dict(type='BN')
|
||||
model = dict(
|
||||
type='mmcls.ImageClassifier',
|
||||
backbone=dict(
|
||||
type='SearchableMobileNet',
|
||||
first_channels=40,
|
||||
last_channels=1728,
|
||||
widen_factor=1.0,
|
||||
norm_cfg=norm_cfg,
|
||||
arch_setting_type='proxyless_gpu'),
|
||||
neck=dict(type='GlobalAveragePooling'),
|
||||
head=dict(
|
||||
type='LinearClsHead',
|
||||
num_classes=1000,
|
||||
in_channels=1728,
|
||||
loss=dict(
|
||||
type='LabelSmoothLoss',
|
||||
num_classes=1000,
|
||||
label_smooth_val=0.1,
|
||||
mode='original',
|
||||
loss_weight=1.0),
|
||||
topk=(1, 5),
|
||||
),
|
||||
)
|
||||
|
||||
mutator = dict(
|
||||
type='OneShotModuleMutator',
|
||||
placeholder_mapping=dict(
|
||||
searchable_blocks=dict(
|
||||
type='OneShotMutableOP',
|
||||
choices=dict(
|
||||
mb_k3e3=dict(
|
||||
type='MBBlock',
|
||||
kernel_size=3,
|
||||
expand_ratio=3,
|
||||
norm_cfg=norm_cfg,
|
||||
act_cfg=dict(type='ReLU6')),
|
||||
mb_k5e3=dict(
|
||||
type='MBBlock',
|
||||
kernel_size=5,
|
||||
expand_ratio=3,
|
||||
norm_cfg=norm_cfg,
|
||||
act_cfg=dict(type='ReLU6')),
|
||||
mb_k7e3=dict(
|
||||
type='MBBlock',
|
||||
kernel_size=7,
|
||||
expand_ratio=3,
|
||||
norm_cfg=norm_cfg,
|
||||
act_cfg=dict(type='ReLU6')),
|
||||
mb_k3e6=dict(
|
||||
type='MBBlock',
|
||||
kernel_size=3,
|
||||
expand_ratio=6,
|
||||
norm_cfg=norm_cfg,
|
||||
act_cfg=dict(type='ReLU6')),
|
||||
mb_k5e6=dict(
|
||||
type='MBBlock',
|
||||
kernel_size=5,
|
||||
expand_ratio=6,
|
||||
norm_cfg=norm_cfg,
|
||||
act_cfg=dict(type='ReLU6')),
|
||||
mb_k7e6=dict(
|
||||
type='MBBlock',
|
||||
kernel_size=7,
|
||||
expand_ratio=6,
|
||||
norm_cfg=norm_cfg,
|
||||
act_cfg=dict(type='ReLU6')),
|
||||
identity=dict(type='Identity'))),
|
||||
first_blocks=dict(
|
||||
type='OneShotMutableOP',
|
||||
choices=dict(
|
||||
mb_k3e1=dict(
|
||||
type='MBBlock',
|
||||
kernel_size=3,
|
||||
expand_ratio=1,
|
||||
norm_cfg=norm_cfg,
|
||||
act_cfg=dict(type='ReLU6')), ))))
|
||||
|
||||
algorithm = dict(
|
||||
type='SPOS',
|
||||
architecture=dict(
|
||||
type='MMClsArchitecture',
|
||||
model=model,
|
||||
),
|
||||
mutator=mutator,
|
||||
distiller=None,
|
||||
retraining=False,
|
||||
)
|
||||
|
||||
runner = dict(max_iters=150000)
|
||||
evaluation = dict(interval=10000, metric='accuracy')
|
||||
|
||||
# checkpoint saving
|
||||
checkpoint_config = dict(interval=30000)
|
||||
|
||||
find_unused_parameters = True
|
|
@ -0,0 +1,245 @@
|
|||
# dataset settings
|
||||
dataset_type = 'ImageNet'
|
||||
preprocess_cfg = dict(
|
||||
# RGB format normalization parameters
|
||||
mean=[0., 0., 0.],
|
||||
std=[1., 1., 1.],
|
||||
# convert image from BGR to RGB
|
||||
to_rgb=False,
|
||||
)
|
||||
|
||||
file_client_args = dict(
|
||||
backend='petrel',
|
||||
path_mapping=dict({
|
||||
'./data/imagenet':
|
||||
'sproject:s3://openmmlab/datasets/classification/imagenet',
|
||||
'data/imagenet':
|
||||
'sproject:s3://openmmlab/datasets/classification/imagenet'
|
||||
}))
|
||||
|
||||
train_pipeline = [
|
||||
dict(type='LoadImageFromFile', file_client_args=file_client_args),
|
||||
dict(type='RandomResizedCrop', scale=224),
|
||||
dict(type='ColorJitter', brightness=0.4, contrast=0.4, saturation=0.4),
|
||||
dict(type='RandomFlip', prob=0.5, direction='horizontal'),
|
||||
dict(type='PackClsInputs'),
|
||||
]
|
||||
|
||||
test_pipeline = [
|
||||
dict(type='LoadImageFromFile', file_client_args=file_client_args),
|
||||
dict(
|
||||
type='ResizeEdge',
|
||||
scale=256,
|
||||
edge='short',
|
||||
backend='pillow',
|
||||
interpolation='bicubic'),
|
||||
dict(type='CenterCrop', crop_size=224),
|
||||
dict(type='PackClsInputs'),
|
||||
]
|
||||
|
||||
train_dataloader = dict(
|
||||
batch_size=128,
|
||||
num_workers=8,
|
||||
dataset=dict(
|
||||
type=dataset_type,
|
||||
data_root='/mnt/cache/share/images',
|
||||
ann_file='meta/train.txt',
|
||||
data_prefix='train',
|
||||
pipeline=train_pipeline),
|
||||
sampler=dict(type='DefaultSampler', shuffle=True),
|
||||
persistent_workers=True,
|
||||
)
|
||||
|
||||
# /mnt/lustre/share_data/wangjiaqi/data/imagenet',
|
||||
|
||||
val_dataloader = dict(
|
||||
batch_size=128,
|
||||
num_workers=8,
|
||||
dataset=dict(
|
||||
type=dataset_type,
|
||||
data_root='/mnt/cache/share/images',
|
||||
ann_file='meta/val.txt',
|
||||
data_prefix='val',
|
||||
pipeline=test_pipeline),
|
||||
sampler=dict(type='DefaultSampler', shuffle=False),
|
||||
persistent_workers=True,
|
||||
)
|
||||
val_evaluator = dict(type='Accuracy', topk=(1, 5))
|
||||
|
||||
# If you want standard test, please manually configure the test dataset
|
||||
test_dataloader = val_dataloader
|
||||
test_evaluator = val_evaluator
|
||||
|
||||
# scheduler
|
||||
|
||||
# optimizer
|
||||
optim_wrapper = dict(
|
||||
optimizer=dict(type='SGD', lr=0.5, momentum=0.9, weight_decay=4e-5),
|
||||
clip_grad=None)
|
||||
|
||||
# leanring policy
|
||||
param_scheduler = [
|
||||
dict(type='PolyLR', power=1.0, eta_min=0.0, by_epoch=False, end=300000),
|
||||
]
|
||||
|
||||
# train, val, test setting
|
||||
train_cfg = dict(by_epoch=False, max_iters=300000)
|
||||
val_cfg = dict()
|
||||
test_cfg = dict()
|
||||
|
||||
# runtime
|
||||
|
||||
# defaults to use registries in mmrazor
|
||||
default_scope = 'mmcls'
|
||||
|
||||
log_processor = dict(
|
||||
window_size=100,
|
||||
by_epoch=False,
|
||||
custom_cfg=[
|
||||
dict(
|
||||
data_src='loss',
|
||||
log_name='loss_large_window',
|
||||
method_name='mean',
|
||||
window_size=100)
|
||||
])
|
||||
|
||||
# configure default hooks
|
||||
default_hooks = dict(
|
||||
timer=dict(type='IterTimerHook'),
|
||||
logger=dict(type='LoggerHook', interval=100),
|
||||
param_scheduler=dict(type='ParamSchedulerHook'),
|
||||
checkpoint=dict(
|
||||
type='CheckpointHook',
|
||||
by_epoch=False,
|
||||
interval=10000,
|
||||
save_last=True,
|
||||
max_keep_ckpts=3),
|
||||
sampler_seed=dict(type='DistSamplerSeedHook'),
|
||||
visualization=dict(type='VisualizationHook', enable=False),
|
||||
)
|
||||
|
||||
# configure environment
|
||||
env_cfg = dict(
|
||||
cudnn_benchmark=False,
|
||||
mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0),
|
||||
dist_cfg=dict(backend='nccl'),
|
||||
)
|
||||
|
||||
# set visualizer
|
||||
visualizer = None
|
||||
# dict(type='ClsVisualizer', vis_backends=vis_backends, name='visualizer')
|
||||
# vis_backends = [dict(type='LocalVisBackend')]
|
||||
|
||||
# set log level
|
||||
log_level = 'INFO'
|
||||
|
||||
# load from which checkpoint
|
||||
load_from = None
|
||||
|
||||
# whether to resume training from the loaded checkpoint
|
||||
resume = False
|
||||
|
||||
# model
|
||||
norm_cfg = dict(type='BN')
|
||||
_STAGE_MUTABLE = dict(
|
||||
_scope_='mmrazor',
|
||||
type='OneShotMutableOP',
|
||||
candidates=dict(
|
||||
mb_k3e3=dict(
|
||||
type='MBBlock',
|
||||
kernel_size=3,
|
||||
expand_ratio=3,
|
||||
norm_cfg=norm_cfg,
|
||||
act_cfg=dict(type='ReLU6')),
|
||||
mb_k5e3=dict(
|
||||
type='MBBlock',
|
||||
kernel_size=5,
|
||||
expand_ratio=3,
|
||||
norm_cfg=norm_cfg,
|
||||
act_cfg=dict(type='ReLU6')),
|
||||
mb_k7e3=dict(
|
||||
type='MBBlock',
|
||||
kernel_size=7,
|
||||
expand_ratio=3,
|
||||
norm_cfg=norm_cfg,
|
||||
act_cfg=dict(type='ReLU6')),
|
||||
mb_k3e6=dict(
|
||||
type='MBBlock',
|
||||
kernel_size=3,
|
||||
expand_ratio=6,
|
||||
norm_cfg=norm_cfg,
|
||||
act_cfg=dict(type='ReLU6')),
|
||||
mb_k5e6=dict(
|
||||
type='MBBlock',
|
||||
kernel_size=5,
|
||||
expand_ratio=6,
|
||||
norm_cfg=norm_cfg,
|
||||
act_cfg=dict(type='ReLU6')),
|
||||
mb_k7e6=dict(
|
||||
type='MBBlock',
|
||||
kernel_size=7,
|
||||
expand_ratio=6,
|
||||
norm_cfg=norm_cfg,
|
||||
act_cfg=dict(type='ReLU6')),
|
||||
identity=dict(type='Identity'),
|
||||
))
|
||||
|
||||
_FIRST_MUTABLE = dict(
|
||||
_scope_='mmrazor',
|
||||
type='OneShotMutableOP',
|
||||
candidates=dict(
|
||||
mb_k3e1=dict(
|
||||
type='MBBlock',
|
||||
kernel_size=3,
|
||||
expand_ratio=1,
|
||||
norm_cfg=norm_cfg,
|
||||
act_cfg=dict(type='ReLU6')), ))
|
||||
|
||||
arch_setting = [
|
||||
# Parameters to build layers. 3 parameters are needed to construct a
|
||||
# layer, from left to right: channel, num_blocks, mutable_cfg.
|
||||
[24, 1, 1, _FIRST_MUTABLE],
|
||||
[32, 4, 2, _STAGE_MUTABLE],
|
||||
[56, 4, 2, _STAGE_MUTABLE],
|
||||
[112, 4, 2, _STAGE_MUTABLE],
|
||||
[128, 4, 1, _STAGE_MUTABLE],
|
||||
[256, 4, 2, _STAGE_MUTABLE],
|
||||
[432, 1, 1, _STAGE_MUTABLE]
|
||||
]
|
||||
|
||||
norm_cfg = dict(type='BN')
|
||||
supernet = dict(
|
||||
type='ImageClassifier',
|
||||
data_preprocessor=preprocess_cfg,
|
||||
backbone=dict(
|
||||
_scope_='mmrazor',
|
||||
type='SearchableMobileNet',
|
||||
first_channels=40,
|
||||
last_channels=1728,
|
||||
widen_factor=1.0,
|
||||
norm_cfg=norm_cfg,
|
||||
arch_setting=arch_setting),
|
||||
neck=dict(type='GlobalAveragePooling'),
|
||||
head=dict(
|
||||
type='LinearClsHead',
|
||||
num_classes=1000,
|
||||
in_channels=1728,
|
||||
loss=dict(
|
||||
type='LabelSmoothLoss',
|
||||
num_classes=1000,
|
||||
label_smooth_val=0.1,
|
||||
mode='original',
|
||||
loss_weight=1.0),
|
||||
topk=(1, 5),
|
||||
),
|
||||
)
|
||||
|
||||
mutator = dict(type='mmrazor.OneShotModuleMutator')
|
||||
|
||||
model = dict(
|
||||
type='mmrazor.SPOS',
|
||||
architecture=supernet,
|
||||
mutator=mutator,
|
||||
)
|
||||
|
||||
find_unused_parameters = True
|
|
@ -1,59 +0,0 @@
|
|||
_base_ = [
|
||||
'../../_base_/datasets/mmcls/imagenet_bs128_colorjittor.py',
|
||||
'../../_base_/schedules/mmcls/imagenet_bs1024_spos.py',
|
||||
'../../_base_/mmcls_runtime.py'
|
||||
]
|
||||
norm_cfg = dict(type='BN')
|
||||
model = dict(
|
||||
type='mmcls.ImageClassifier',
|
||||
backbone=dict(
|
||||
type='SearchableShuffleNetV2', widen_factor=1.0, norm_cfg=norm_cfg),
|
||||
neck=dict(type='GlobalAveragePooling'),
|
||||
head=dict(
|
||||
type='LinearClsHead',
|
||||
num_classes=1000,
|
||||
in_channels=1024,
|
||||
loss=dict(
|
||||
type='LabelSmoothLoss',
|
||||
num_classes=1000,
|
||||
label_smooth_val=0.1,
|
||||
mode='original',
|
||||
loss_weight=1.0),
|
||||
topk=(1, 5),
|
||||
),
|
||||
)
|
||||
|
||||
mutator = dict(
|
||||
type='OneShotModuleMutator',
|
||||
placeholder_mapping=dict(
|
||||
all_blocks=dict(
|
||||
type='OneShotMutableOP',
|
||||
choices=dict(
|
||||
shuffle_3x3=dict(
|
||||
type='ShuffleBlock', kernel_size=3, norm_cfg=norm_cfg),
|
||||
shuffle_5x5=dict(
|
||||
type='ShuffleBlock', kernel_size=5, norm_cfg=norm_cfg),
|
||||
shuffle_7x7=dict(
|
||||
type='ShuffleBlock', kernel_size=7, norm_cfg=norm_cfg),
|
||||
shuffle_xception=dict(
|
||||
type='ShuffleXception', norm_cfg=norm_cfg),
|
||||
))))
|
||||
|
||||
algorithm = dict(
|
||||
type='SPOS',
|
||||
architecture=dict(
|
||||
type='MMClsArchitecture',
|
||||
model=model,
|
||||
),
|
||||
mutator=mutator,
|
||||
distiller=None,
|
||||
retraining=False,
|
||||
)
|
||||
|
||||
runner = dict(max_iters=150000)
|
||||
evaluation = dict(interval=1000, metric='accuracy')
|
||||
|
||||
# checkpoint saving
|
||||
checkpoint_config = dict(interval=1000)
|
||||
|
||||
find_unused_parameters = True
|
|
@ -0,0 +1,214 @@
|
|||
# dataset settings
|
||||
dataset_type = 'ImageNet'
|
||||
preprocess_cfg = dict(
|
||||
# RGB format normalization parameters
|
||||
mean=[123.675, 116.28, 103.53],
|
||||
std=[58.395, 57.12, 57.375],
|
||||
# convert image from BGR to RGB
|
||||
to_rgb=True,
|
||||
)
|
||||
|
||||
file_client_args = dict(
|
||||
backend='petrel',
|
||||
path_mapping=dict({
|
||||
'./data/imagenet':
|
||||
'sproject:s3://openmmlab/datasets/classification/imagenet',
|
||||
'data/imagenet':
|
||||
'sproject:s3://openmmlab/datasets/classification/imagenet'
|
||||
}))
|
||||
|
||||
train_pipeline = [
|
||||
dict(type='LoadImageFromFile', file_client_args=file_client_args),
|
||||
dict(type='RandomResizedCrop', scale=224),
|
||||
dict(type='ColorJitter', brightness=0.4, contrast=0.4, saturation=0.4),
|
||||
dict(type='RandomFlip', prob=0.5, direction='horizontal'),
|
||||
dict(type='PackClsInputs'),
|
||||
]
|
||||
|
||||
test_pipeline = [
|
||||
dict(type='LoadImageFromFile', file_client_args=file_client_args),
|
||||
dict(type='ResizeEdge', scale=256, edge='short', backend='cv2'),
|
||||
dict(type='CenterCrop', crop_size=224),
|
||||
dict(type='PackClsInputs'),
|
||||
]
|
||||
|
||||
train_dataloader = dict(
|
||||
batch_size=128,
|
||||
num_workers=5,
|
||||
dataset=dict(
|
||||
type=dataset_type,
|
||||
data_root='/mnt/cache/share/images',
|
||||
ann_file='meta/train.txt',
|
||||
data_prefix='train',
|
||||
pipeline=train_pipeline),
|
||||
sampler=dict(type='DefaultSampler', shuffle=True),
|
||||
persistent_workers=True,
|
||||
)
|
||||
|
||||
# /mnt/lustre/share_data/wangjiaqi/data/imagenet',
|
||||
|
||||
val_dataloader = dict(
|
||||
batch_size=128,
|
||||
num_workers=5,
|
||||
dataset=dict(
|
||||
type=dataset_type,
|
||||
data_root='/mnt/cache/share/images',
|
||||
ann_file='meta/val.txt',
|
||||
data_prefix='val',
|
||||
pipeline=test_pipeline),
|
||||
sampler=dict(type='DefaultSampler', shuffle=False),
|
||||
persistent_workers=True,
|
||||
)
|
||||
val_evaluator = dict(type='Accuracy', topk=(1, 5))
|
||||
|
||||
# If you want standard test, please manually configure the test dataset
|
||||
test_dataloader = val_dataloader
|
||||
test_evaluator = val_evaluator
|
||||
|
||||
# scheduler
|
||||
|
||||
# optimizer
|
||||
optim_wrapper = dict(
|
||||
optimizer=dict(type='SGD', lr=0.5, momentum=0.9, weight_decay=4e-5),
|
||||
clip_grad=None)
|
||||
|
||||
# leanring policy
|
||||
param_scheduler = [
|
||||
dict(type='PolyLR', power=1.0, eta_min=0.0, by_epoch=False, end=300000),
|
||||
]
|
||||
|
||||
# train, val, test setting
|
||||
train_cfg = dict(by_epoch=False, max_iters=300000)
|
||||
val_cfg = dict()
|
||||
test_cfg = dict()
|
||||
|
||||
# runtime
|
||||
|
||||
# defaults to use registries in mmrazor
|
||||
default_scope = 'mmcls'
|
||||
|
||||
log_processor = dict(
|
||||
window_size=100,
|
||||
by_epoch=False,
|
||||
custom_cfg=[
|
||||
dict(
|
||||
data_src='loss',
|
||||
log_name='loss_large_window',
|
||||
method_name='mean',
|
||||
window_size=100)
|
||||
])
|
||||
|
||||
# configure default hooks
|
||||
default_hooks = dict(
|
||||
# record the time of every iteration.
|
||||
timer=dict(type='IterTimerHook'),
|
||||
|
||||
# print log every 100 iterations.
|
||||
logger=dict(type='LoggerHook', interval=100),
|
||||
|
||||
# enable the parameter scheduler.
|
||||
param_scheduler=dict(type='ParamSchedulerHook'),
|
||||
|
||||
# save checkpoint per epoch.
|
||||
checkpoint=dict(
|
||||
type='CheckpointHook',
|
||||
by_epoch=False,
|
||||
interval=10000,
|
||||
save_last=True,
|
||||
max_keep_ckpts=3),
|
||||
|
||||
# set sampler seed in distributed evrionment.
|
||||
sampler_seed=dict(type='DistSamplerSeedHook'),
|
||||
|
||||
# validation results visualization, set True to enable it.
|
||||
visualization=dict(type='VisualizationHook', enable=False),
|
||||
)
|
||||
|
||||
# configure environment
|
||||
env_cfg = dict(
|
||||
# whether to enable cudnn benchmark
|
||||
cudnn_benchmark=False,
|
||||
|
||||
# set multi process parameters
|
||||
mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0),
|
||||
|
||||
# set distributed parameters
|
||||
dist_cfg=dict(backend='nccl'),
|
||||
)
|
||||
|
||||
# set visualizer
|
||||
visualizer = None
|
||||
# dict(type='ClsVisualizer', vis_backends=vis_backends, name='visualizer')
|
||||
# vis_backends = [dict(type='LocalVisBackend')]
|
||||
|
||||
# set log level
|
||||
log_level = 'INFO'
|
||||
|
||||
# load from which checkpoint
|
||||
load_from = None
|
||||
|
||||
# "/mnt/lustre/dongpeijie/spos_shufflenetv2_subnet_8xb128_in1k_flops_0.33M_acc_73.87_20211222-1f0a0b4d.pth"
|
||||
|
||||
# whether to resume training from the loaded checkpoint
|
||||
resume = False
|
||||
|
||||
# model
|
||||
|
||||
_STAGE_MUTABLE = dict(
|
||||
_scope_='mmrazor',
|
||||
type='OneShotMutableOP',
|
||||
candidates=dict(
|
||||
shuffle_3x3=dict(
|
||||
type='ShuffleBlock', kernel_size=3, norm_cfg=dict(type='BN')),
|
||||
shuffle_5x5=dict(
|
||||
type='ShuffleBlock', kernel_size=5, norm_cfg=dict(type='BN')),
|
||||
shuffle_7x7=dict(
|
||||
type='ShuffleBlock', kernel_size=7, norm_cfg=dict(type='BN')),
|
||||
shuffle_xception=dict(
|
||||
type='ShuffleXception', norm_cfg=dict(type='BN')),
|
||||
))
|
||||
|
||||
arch_setting = [
|
||||
# Parameters to build layers. 3 parameters are needed to construct a
|
||||
# layer, from left to right: channel, num_blocks, mutable_cfg.
|
||||
[64, 4, _STAGE_MUTABLE],
|
||||
[160, 4, _STAGE_MUTABLE],
|
||||
[320, 8, _STAGE_MUTABLE],
|
||||
[640, 4, _STAGE_MUTABLE],
|
||||
]
|
||||
|
||||
norm_cfg = dict(type='BN')
|
||||
supernet = dict(
|
||||
type='ImageClassifier',
|
||||
data_preprocessor=preprocess_cfg,
|
||||
backbone=dict(
|
||||
_scope_='mmrazor',
|
||||
type='SearchableShuffleNetV2',
|
||||
widen_factor=1.0,
|
||||
norm_cfg=norm_cfg,
|
||||
arch_setting=arch_setting),
|
||||
neck=dict(type='GlobalAveragePooling'),
|
||||
head=dict(
|
||||
type='LinearClsHead',
|
||||
num_classes=1000,
|
||||
in_channels=1024,
|
||||
loss=dict(
|
||||
type='LabelSmoothLoss',
|
||||
num_classes=1000,
|
||||
label_smooth_val=0.1,
|
||||
mode='original',
|
||||
loss_weight=1.0),
|
||||
topk=(1, 5),
|
||||
),
|
||||
)
|
||||
|
||||
mutator = dict(type='mmrazor.OneShotModuleMutator')
|
||||
|
||||
model = dict(
|
||||
type='mmrazor.SPOS',
|
||||
architecture=supernet,
|
||||
mutator=mutator,
|
||||
# fix_subnet='configs/nas/spos/SPOS_SHUFFLENETV2_330M_IN1k_PAPER_2.0.yaml'
|
||||
)
|
||||
|
||||
find_unused_parameters = True
|
|
@ -0,0 +1,372 @@
|
|||
from collections import OrderedDict
|
||||
|
||||
import torch
|
||||
from mmengine.config import Config
|
||||
|
||||
from mmrazor.core import * # noqa: F401,F403
|
||||
from mmrazor.models import * # noqa: F401,F403
|
||||
from mmrazor.registry import MODELS
|
||||
from mmrazor.utils import register_all_modules
|
||||
|
||||
|
||||
def convert_spos_key(old_path, new_path):
|
||||
old_dict = torch.load(old_path)
|
||||
new_dict = {'meta': old_dict['meta'], 'state_dict': {}}
|
||||
|
||||
mapping = {
|
||||
'choices': '_candidates',
|
||||
'architecture.': '',
|
||||
'model.': '',
|
||||
}
|
||||
|
||||
for k, v in old_dict['state_dict'].items():
|
||||
new_key = k
|
||||
for _from, _to in mapping.items():
|
||||
new_key = new_key.replace(_from, _to)
|
||||
|
||||
new_key = f'architecture.{new_key}'
|
||||
|
||||
new_dict['state_dict'][new_key] = v
|
||||
|
||||
torch.save(new_dict, new_path)
|
||||
|
||||
|
||||
def convert_detnas_key(old_path, new_path):
|
||||
old_dict = torch.load(old_path)
|
||||
new_dict = {'meta': old_dict['meta'], 'state_dict': {}}
|
||||
|
||||
mapping = {
|
||||
'choices': '_candidates',
|
||||
'model.': '',
|
||||
}
|
||||
|
||||
for k, v in old_dict['state_dict'].items():
|
||||
new_key = k
|
||||
for _from, _to in mapping.items():
|
||||
new_key = new_key.replace(_from, _to)
|
||||
|
||||
new_dict['state_dict'][new_key] = v
|
||||
torch.save(new_dict, new_path)
|
||||
|
||||
|
||||
def convert_anglenas_key(old_path, new_path):
|
||||
old_dict = torch.load(old_path)
|
||||
new_dict = {'state_dict': {}}
|
||||
|
||||
mapping = {
|
||||
'choices': '_candidates',
|
||||
'model.': '',
|
||||
'mbv2': 'mb',
|
||||
}
|
||||
|
||||
for k, v in old_dict.items():
|
||||
new_key = k
|
||||
for _from, _to in mapping.items():
|
||||
new_key = new_key.replace(_from, _to)
|
||||
|
||||
new_dict['state_dict'][new_key] = v
|
||||
torch.save(new_dict, new_path)
|
||||
|
||||
|
||||
def convert_darts_key(old_path, new_path):
|
||||
old_dict = torch.load(old_path)
|
||||
new_dict = {'meta': old_dict['meta'], 'state_dict': {}}
|
||||
cfg = Config.fromfile(
|
||||
'configs/nas/darts/darts_subnet_1xb96_cifar10_2.0.py')
|
||||
# import ipdb; ipdb.set_trace()
|
||||
model = MODELS.build(cfg.model)
|
||||
|
||||
print('============> module name')
|
||||
for name, module in model.state_dict().items():
|
||||
print(name)
|
||||
|
||||
mapping = {
|
||||
'choices': '_candidates',
|
||||
'model.': '',
|
||||
'edges': 'route',
|
||||
}
|
||||
|
||||
for k, v in old_dict['state_dict'].items():
|
||||
new_key = k
|
||||
for _from, _to in mapping.items():
|
||||
new_key = new_key.replace(_from, _to)
|
||||
# cells.0.nodes.0.edges.choices.normal_n2_p1.0.choices.sep_conv_3x3.conv1.2.weight
|
||||
splited_list = new_key.split('.')
|
||||
if len(splited_list) > 10 and splited_list[-6] == '0':
|
||||
del splited_list[-6]
|
||||
new_key = '.'.join(splited_list)
|
||||
elif len(splited_list) > 10 and splited_list[-5] == '0':
|
||||
del splited_list[-5]
|
||||
new_key = '.'.join(splited_list)
|
||||
|
||||
new_dict['state_dict'][new_key] = v
|
||||
|
||||
print('============> new dict')
|
||||
for key, v in new_dict['state_dict'].items():
|
||||
print(key)
|
||||
|
||||
model.load_state_dict(new_dict['state_dict'], strict=True)
|
||||
|
||||
torch.save(new_dict, new_path)
|
||||
|
||||
|
||||
def convert_cream_key(old_path, new_path):
|
||||
|
||||
old_dict = torch.load(old_path, map_location=torch.device('cpu'))
|
||||
new_dict = {'state_dict': {}} # noqa: F841
|
||||
|
||||
ordered_old_dict = OrderedDict(old_dict['state_dict'])
|
||||
|
||||
cfg = Config.fromfile('configs/nas/cream/cream_14_subnet_mobilenet.py')
|
||||
model = MODELS.build(cfg.model)
|
||||
|
||||
model_name_list = []
|
||||
model_module_list = []
|
||||
|
||||
# TODO show structure of model and checkpoint
|
||||
print('=' * 30, 'the key of model')
|
||||
for k, v in model.state_dict().items():
|
||||
print(k)
|
||||
|
||||
print('=' * 30, 'the key of ckpt')
|
||||
for k, v in ordered_old_dict.items():
|
||||
print(k)
|
||||
|
||||
# final mapping dict
|
||||
mapping = {}
|
||||
|
||||
middle_razor2cream = { # noqa: F841
|
||||
# point-wise expansion
|
||||
'expand_conv.conv.weight': 'conv_pw.weight',
|
||||
'expand_conv.bn.weight': 'bn1.weight',
|
||||
'expand_conv.bn.bias': 'bn1.bias',
|
||||
'expand_conv.bn.running_mean': 'bn1.running_mean',
|
||||
'expand_conv.bn.running_var': 'bn1.running_var',
|
||||
'expand_conv.bn.num_batches_tracked': 'bn1.num_batches_tracked',
|
||||
|
||||
# se
|
||||
'se.conv1.conv.weight': 'se.conv_reduce.weight',
|
||||
'se.conv1.conv.bias': 'se.conv_reduce.bias',
|
||||
'se.conv2.conv.weight': 'se.conv_expand.weight',
|
||||
'se.conv2.conv.bias': 'se.conv_expand.bias',
|
||||
|
||||
# depth-wise conv
|
||||
'depthwise_conv.conv.weight': 'conv_dw.weight',
|
||||
'depthwise_conv.bn.weight': 'bn2.weight',
|
||||
'depthwise_conv.bn.bias': 'bn2.bias',
|
||||
'depthwise_conv.bn.running_mean': 'bn2.running_mean',
|
||||
'depthwise_conv.bn.running_var': 'bn2.running_var',
|
||||
'depthwise_conv.bn.num_batches_tracked': 'bn2.num_batches_tracked',
|
||||
|
||||
# point-wise linear projection
|
||||
'linear_conv.conv.weight': 'conv_pwl.weight',
|
||||
'linear_conv.bn.weight': 'bn3.weight',
|
||||
'linear_conv.bn.bias': 'bn3.bias',
|
||||
'linear_conv.bn.running_mean': 'bn3.running_mean',
|
||||
'linear_conv.bn.running_var': 'bn3.running_var',
|
||||
'linear_conv.bn.num_batches_tracked': 'bn3.num_batches_tracked',
|
||||
|
||||
}
|
||||
|
||||
first_razor2cream = {
|
||||
# for first depthsepconv dw
|
||||
'conv_dw.conv.weight': 'conv_dw.weight',
|
||||
'conv_dw.bn.weight': 'bn1.weight',
|
||||
'conv_dw.bn.bias': 'bn1.bias',
|
||||
'conv_dw.bn.running_mean': 'bn1.running_mean',
|
||||
'conv_dw.bn.running_var': 'bn1.running_var',
|
||||
'conv_dw.bn.num_batches_tracked': 'bn1.num_batches_tracked',
|
||||
|
||||
# for first depthsepconv pw
|
||||
'conv_pw.conv.weight': 'conv_pw.weight',
|
||||
'conv_pw.bn.weight': 'bn2.weight',
|
||||
'conv_pw.bn.bias': 'bn2.bias',
|
||||
'conv_pw.bn.running_mean': 'bn2.running_mean',
|
||||
'conv_pw.bn.running_var': 'bn2.running_var',
|
||||
'conv_pw.bn.num_batches_tracked': 'bn2.num_batches_tracked',
|
||||
|
||||
# se
|
||||
'se.conv1.conv.weight': 'se.conv_reduce.weight',
|
||||
'se.conv1.conv.bias': 'se.conv_reduce.bias',
|
||||
'se.conv2.conv.weight': 'se.conv_expand.weight',
|
||||
'se.conv2.conv.bias': 'se.conv_expand.bias',
|
||||
}
|
||||
|
||||
last_razor2cream = {
|
||||
# for last convbnact
|
||||
'conv2.conv.weight': 'conv.weight',
|
||||
'conv2.bn.weight': 'bn1.weight',
|
||||
'conv2.bn.bias': 'bn1.bias',
|
||||
'conv2.bn.running_mean': 'bn1.running_mean',
|
||||
'conv2.bn.running_var': 'bn1.running_var',
|
||||
'conv2.bn.num_batches_tracked': 'bn1.num_batches_tracked',
|
||||
}
|
||||
|
||||
middle_cream2razor = {v: k for k, v in middle_razor2cream.items()}
|
||||
first_cream2razor = {v: k for k, v in first_razor2cream.items()}
|
||||
last_cream2razor = {v: k for k, v in last_razor2cream.items()}
|
||||
|
||||
# 1. group the razor's module names
|
||||
grouped_razor_module_name = {
|
||||
'middle': {},
|
||||
'first': [],
|
||||
'last': [],
|
||||
}
|
||||
|
||||
for name, module in model.state_dict().items():
|
||||
tmp_name: str = name.split(
|
||||
'backbone.')[1] if 'backbone' in name else name
|
||||
model_name_list.append(tmp_name)
|
||||
model_module_list.append(module)
|
||||
|
||||
if 'conv1' in tmp_name and len(tmp_name) <= 35:
|
||||
# belong to stem conv
|
||||
grouped_razor_module_name['first'].append(name)
|
||||
elif 'head' in tmp_name:
|
||||
# belong to last linear
|
||||
grouped_razor_module_name['last'].append(name)
|
||||
else:
|
||||
# middle
|
||||
if tmp_name.startswith('layer'):
|
||||
key_of_middle = tmp_name[5:8]
|
||||
if key_of_middle not in grouped_razor_module_name['middle']:
|
||||
grouped_razor_module_name['middle'][key_of_middle] = [name]
|
||||
else:
|
||||
grouped_razor_module_name['middle'][key_of_middle].append(
|
||||
name)
|
||||
elif tmp_name.startswith('conv2'):
|
||||
key_of_middle = '7.0'
|
||||
if key_of_middle not in grouped_razor_module_name['middle']:
|
||||
grouped_razor_module_name['middle'][key_of_middle] = [name]
|
||||
else:
|
||||
grouped_razor_module_name['middle'][key_of_middle].append(
|
||||
name)
|
||||
|
||||
# 2. group the cream's module names
|
||||
grouped_cream_module_name = {
|
||||
'middle': {},
|
||||
'first': [],
|
||||
'last': [],
|
||||
}
|
||||
|
||||
for k in ordered_old_dict.keys():
|
||||
if 'classifier' in k or 'conv_head' in k:
|
||||
# last conv
|
||||
grouped_cream_module_name['last'].append(k)
|
||||
elif 'blocks' in k:
|
||||
# middle blocks
|
||||
key_of_middle = k[7:10]
|
||||
if key_of_middle not in grouped_cream_module_name['middle']:
|
||||
grouped_cream_module_name['middle'][key_of_middle] = [k]
|
||||
else:
|
||||
grouped_cream_module_name['middle'][key_of_middle].append(k)
|
||||
else:
|
||||
# first blocks
|
||||
grouped_cream_module_name['first'].append(k)
|
||||
|
||||
# 4. process the first modules
|
||||
for cream_item in grouped_cream_module_name['first']:
|
||||
if 'conv_stem' in cream_item:
|
||||
# get corresponding item from razor
|
||||
for razor_item in grouped_razor_module_name['first']:
|
||||
if 'conv.weight' in razor_item:
|
||||
mapping[cream_item] = razor_item
|
||||
grouped_razor_module_name['first'].remove(razor_item)
|
||||
break
|
||||
else:
|
||||
kws = cream_item.split('.')[-1]
|
||||
# get corresponding item from razor
|
||||
for razor_item in grouped_razor_module_name['first']:
|
||||
if kws in razor_item:
|
||||
mapping[cream_item] = razor_item
|
||||
grouped_razor_module_name['first'].remove(razor_item)
|
||||
|
||||
# 5. process the last modules
|
||||
for cream_item in grouped_cream_module_name['last']:
|
||||
if 'classifier' in cream_item:
|
||||
kws = cream_item.split('.')[-1]
|
||||
for razor_item in grouped_razor_module_name['last']:
|
||||
if 'fc' in razor_item:
|
||||
if kws in razor_item:
|
||||
mapping[cream_item] = razor_item
|
||||
grouped_razor_module_name['last'].remove(razor_item)
|
||||
break
|
||||
|
||||
elif 'conv_head' in cream_item:
|
||||
kws = cream_item.split('.')[-1]
|
||||
for razor_item in grouped_razor_module_name['last']:
|
||||
if 'head.conv2' in razor_item:
|
||||
if kws in razor_item:
|
||||
mapping[cream_item] = razor_item
|
||||
grouped_razor_module_name['last'].remove(razor_item)
|
||||
|
||||
# 6. process the middle modules
|
||||
for cream_group_id, cream_items in grouped_cream_module_name[
|
||||
'middle'].items():
|
||||
# get the corresponding group from razor
|
||||
razor_group_id: str = str(float(cream_group_id) + 1)
|
||||
razor_items: list = grouped_razor_module_name['middle'][razor_group_id]
|
||||
|
||||
if int(razor_group_id[0]) == 1:
|
||||
key_cream2razor = first_cream2razor
|
||||
elif int(razor_group_id[0]) == 7:
|
||||
key_cream2razor = last_cream2razor
|
||||
else:
|
||||
key_cream2razor = middle_cream2razor
|
||||
|
||||
# matching razor items and cream items
|
||||
for cream_item in cream_items:
|
||||
# traverse all of key_cream2razor
|
||||
for cream_match, razor_match in key_cream2razor.items():
|
||||
if cream_match in cream_item:
|
||||
# traverse razor_items to get the corresponding razor name
|
||||
for razor_item in razor_items:
|
||||
if razor_match in razor_item:
|
||||
mapping[cream_item] = razor_item
|
||||
break
|
||||
|
||||
print('=' * 100)
|
||||
print('length of mapping: ', len(mapping.keys()))
|
||||
for k, v in mapping.items():
|
||||
print(k, '\t=>\t', v)
|
||||
print('#' * 100)
|
||||
|
||||
# TODO DELETE this print
|
||||
print('**' * 20)
|
||||
for c, cm, r, rm in zip(ordered_old_dict.keys(), ordered_old_dict.values(),
|
||||
model_name_list, model_module_list):
|
||||
print(f'{c}: shape {cm.shape} => {r}: shape {rm.shape}')
|
||||
print('**' * 20)
|
||||
|
||||
for k, v in ordered_old_dict.items():
|
||||
print(f'Mapping from {k} to {mapping[k]}......')
|
||||
new_dict['state_dict'][mapping[k]] = v
|
||||
|
||||
model.load_state_dict(new_dict['state_dict'], strict=True)
|
||||
|
||||
torch.save(new_dict, new_path)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
register_all_modules(True)
|
||||
# old_path = '/mnt/lustre/dongpeijie/detnas_subnet_shufflenetv2_8xb128_in1k_acc-74.08_20211223-92e9b66a.pth' # noqa: E501
|
||||
# new_path = '/mnt/lustre/dongpeijie/detnas_subnet_shufflenetv2_8xb128_in1k_acc-74.08_20211223-92e9b66a_2.0.pth' # noqa: E501
|
||||
# convert_spos_key(old_path, new_path)
|
||||
|
||||
# old_path = '/mnt/lustre/dongpeijie/detnas_subnet_frcnn_shufflenetv2_fpn_1x_coco_bbox_backbone_flops-0.34M_mAP-37.5_20211222-67fea61f.pth' # noqa: E501
|
||||
# new_path = '/mnt/lustre/dongpeijie/detnas_subnet_frcnn_shufflenetv2_fpn_1x_coco_bbox_backbone_flops-0.34M_mAP-37.5_20211222-67fea61f_2.0.pth' # noqa: E501
|
||||
# convert_detnas_key(old_path, new_path)
|
||||
|
||||
# old_path = './data/14.pth.tar'
|
||||
# new_path = './data/14_2.0.pth'
|
||||
# old_path = '/mnt/lustre/dongpeijie/14.pth.tar'
|
||||
# new_path = '/mnt/lustre/dongpeijie/14_2.0.pth'
|
||||
# convert_cream_key(old_path, new_path)
|
||||
|
||||
# old_path = '/mnt/lustre/dongpeijie/darts_subnetnet_1xb96_cifar10_acc-97.32_20211222-e5727921.pth' # noqa: E501
|
||||
# new_path = '/mnt/lustre/dongpeijie/darts_subnetnet_1xb96_cifar10_acc-97.32_20211222-e5727921_2.0.pth' # noqa: E501
|
||||
# convert_darts_key(old_path, new_path)
|
||||
|
||||
old_path = '/mnt/lustre/dongpeijie/spos_angelnas_flops_0.49G_acc_75.98_20220307-54f4698f.pth' # noqa: E501
|
||||
new_path = '/mnt/lustre/dongpeijie/spos_angelnas_flops_0.49G_acc_75.98_20220307-54f4698f_2.0.pth' # noqa: E501
|
||||
convert_anglenas_key(old_path, new_path)
|
|
@ -0,0 +1,280 @@
|
|||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
import copy
|
||||
from typing import Any, Dict, List, Optional, Union
|
||||
|
||||
import torch
|
||||
from mmengine import BaseDataElement
|
||||
from mmengine.model import BaseModel
|
||||
from mmengine.optim import OptimWrapper, OptimWrapperDict
|
||||
from torch import nn
|
||||
from torch.nn.modules.batchnorm import _BatchNorm
|
||||
|
||||
from mmrazor.models.mutators import DiffModuleMutator
|
||||
from mmrazor.models.subnet import (SINGLE_MUTATOR_RANDOM_SUBNET, FixSubnet,
|
||||
FixSubnetMixin)
|
||||
from mmrazor.registry import MODELS
|
||||
from ..base import BaseAlgorithm, LossResults
|
||||
|
||||
VALID_FIX_SUBNET = Union[str, FixSubnet, Dict[str, Dict[str, Any]]]
|
||||
|
||||
|
||||
@MODELS.register_module()
|
||||
class Darts(BaseAlgorithm, FixSubnetMixin):
|
||||
"""Implementation of `DARTS <https://arxiv.org/abs/1806.09055>`_
|
||||
|
||||
DARTS means Differentiable Architecture Search, a classic NAS algorithm.
|
||||
:class:`Darts` implements the APIs required by the DARTS, as well as the
|
||||
supernet training and subnet retraining logic for each iter.
|
||||
|
||||
Args:
|
||||
architecture (dict|:obj:`BaseModel`): The config of :class:`BaseModel`
|
||||
or built model. Corresponding to supernet in NAS algorithm.
|
||||
mutator (dict|:obj:`DiffModuleMutator`): The config of
|
||||
:class:`DiffModuleMutator` or built mutator.
|
||||
fix_subnet (str | dict | :obj:`FixSubnet`): The path of yaml file or
|
||||
loaded dict or built :obj:`FixSubnet`.
|
||||
norm_training (bool): Whether to set norm layers to training mode,
|
||||
namely, not freeze running stats (mean and var). Note: Effect on
|
||||
Batch Norm and its variants only. Defaults to False.
|
||||
data_preprocessor (dict, optional): The pre-process config of
|
||||
:class:`BaseDataPreprocessor`. Defaults to None.
|
||||
init_cfg (dict): Init config for ``BaseModule``.
|
||||
|
||||
Note:
|
||||
Darts has two training mode: supernet training and subnet retraining.
|
||||
If `fix_subnet` is None, it means supernet training.
|
||||
If `fix_subnet` is not None, it means subnet training.
|
||||
|
||||
Note:
|
||||
During supernet training, since each op is not fully trained, the
|
||||
statistics of :obj:_BatchNorm are inaccurate. This problem affects the
|
||||
evaluation of the performance of each subnet in the search phase. There
|
||||
are usually two ways to solve this problem, both need to set
|
||||
`norm_training` to True:
|
||||
|
||||
1) Using a large batch size, BNs use the mean and variance of the
|
||||
current batch during forward.
|
||||
2) Recalibrate the statistics of BN before searching.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
architecture: Union[BaseModel, Dict],
|
||||
mutator: Optional[Union[DiffModuleMutator, Dict]] = None,
|
||||
fix_subnet: Optional[VALID_FIX_SUBNET] = None,
|
||||
unroll: bool = False,
|
||||
norm_training: bool = False,
|
||||
data_preprocessor: Optional[Union[dict, nn.Module]] = None,
|
||||
init_cfg: Optional[dict] = None):
|
||||
super().__init__(architecture, data_preprocessor, init_cfg)
|
||||
|
||||
# Darts has two training mode: supernet training and subnet retraining.
|
||||
# fix_subnet is not None, means subnet retraining.
|
||||
if fix_subnet:
|
||||
# According to fix_subnet, delete the unchosen part of supernet
|
||||
self.load_fix_subnet(fix_subnet, prefix='architecture.')
|
||||
self.is_supernet = False
|
||||
else:
|
||||
assert mutator is not None, \
|
||||
'mutator cannot be None when fix_subnet is None.'
|
||||
if isinstance(mutator, DiffModuleMutator):
|
||||
self.mutator = mutator
|
||||
elif isinstance(mutator, dict):
|
||||
self.mutator = MODELS.build(mutator)
|
||||
else:
|
||||
raise TypeError('mutator should be a `dict` or '
|
||||
f'`DiffModuleMutator` instance, but got '
|
||||
f'{type(mutator)}')
|
||||
|
||||
# Mutator is an essential component of the NAS algorithm. It
|
||||
# provides some APIs commonly used by NAS.
|
||||
# Before using it, you must do some preparations according to
|
||||
# the supernet.
|
||||
self.mutator.prepare_from_supernet(self.architecture)
|
||||
self.is_supernet = True
|
||||
|
||||
self.norm_training = norm_training
|
||||
self.unroll = unroll
|
||||
|
||||
def sample_subnet(self) -> SINGLE_MUTATOR_RANDOM_SUBNET:
|
||||
"""Random sample subnet by mutator."""
|
||||
return self.mutator.sample_choices()
|
||||
|
||||
def set_subnet(self, subnet: SINGLE_MUTATOR_RANDOM_SUBNET):
|
||||
"""Set the subnet sampled by :meth:sample_subnet."""
|
||||
self.mutator.set_choices(subnet)
|
||||
|
||||
def loss(
|
||||
self,
|
||||
batch_inputs: torch.Tensor,
|
||||
data_samples: Optional[List[BaseDataElement]] = None,
|
||||
) -> LossResults:
|
||||
"""Calculate losses from a batch of inputs and data samples."""
|
||||
if self.is_supernet:
|
||||
random_subnet = self.sample_subnet()
|
||||
self.set_subnet(random_subnet)
|
||||
return self.architecture(batch_inputs, data_samples, mode='loss')
|
||||
else:
|
||||
return self.architecture(batch_inputs, data_samples, mode='loss')
|
||||
|
||||
def train(self, mode=True):
|
||||
"""Convert the model into eval mode while keep normalization layer
|
||||
unfreezed."""
|
||||
|
||||
super().train(mode)
|
||||
if self.norm_training and not mode:
|
||||
for module in self.architecture.modules():
|
||||
if isinstance(module, _BatchNorm):
|
||||
module.training = True
|
||||
|
||||
def train_step(self, data: List[dict],
|
||||
optim_wrapper: OptimWrapper) -> Dict[str, torch.Tensor]:
|
||||
"""The iteration step during training.
|
||||
|
||||
This method defines an iteration step during training, except for the
|
||||
back propagation and optimizer updating, which are done in an optimizer
|
||||
hook. Note that in some complicated cases or models, the whole process
|
||||
including back propagation and optimizer updating are also defined in
|
||||
this method, such as GAN.
|
||||
Args:
|
||||
data (dict): The output of dataloader.
|
||||
optimizer (:obj:`torch.optim.Optimizer` | dict): The optimizer of
|
||||
runner is passed to ``train_step()``. This argument is unused
|
||||
and reserved.
|
||||
Returns:
|
||||
dict: It should contain at least 3 keys: ``loss``, ``log_vars``,
|
||||
``num_samples``.
|
||||
``loss`` is a tensor for back propagation, which can be a
|
||||
weighted sum of multiple losses.
|
||||
``log_vars`` contains all the variables to be sent to the
|
||||
logger.
|
||||
``num_samples`` indicates the batch size (when the model is
|
||||
DDP, it means the batch size on each GPU), which is used for
|
||||
averaging the logs.
|
||||
"""
|
||||
if isinstance(data, (tuple, list)) and isinstance(
|
||||
optim_wrapper, OptimWrapperDict):
|
||||
assert len(data) == len(optim_wrapper), \
|
||||
f'The length of data {len(data)} should be equal to that of optimizers {len(optim_wrapper)}.' # noqa: E501
|
||||
|
||||
# TODO check the order of data
|
||||
train_supernet_data, train_arch_data = data
|
||||
|
||||
# TODO mutator optimizer zero_grad
|
||||
optim_wrapper.zero_grad()
|
||||
|
||||
if self.unroll:
|
||||
self._unrolled_backward(train_arch_data, train_supernet_data,
|
||||
optim_wrapper) # TODO optimizer
|
||||
else:
|
||||
# TODO process the input
|
||||
arch_loss = self.loss(train_arch_data) # noqa: F841
|
||||
# arch_loss.backward()
|
||||
|
||||
# TODO mutator optimizer step
|
||||
optim_wrapper.step()
|
||||
|
||||
model_loss = self.loss(train_supernet_data)
|
||||
|
||||
# TODO optimizer architecture zero_grad
|
||||
optim_wrapper.zero_grad()
|
||||
# model_loss.backward()
|
||||
|
||||
nn.utils.clip_grad_norm_(
|
||||
self.architecture.parameters(), max_norm=5, norm_type=2)
|
||||
|
||||
# TODO optimizer architecture step
|
||||
optim_wrapper.step()
|
||||
|
||||
outputs = dict(
|
||||
loss=model_loss,
|
||||
num_samples=len(train_supernet_data['img'].data))
|
||||
else:
|
||||
outputs = super().train_step(data, optim_wrapper)
|
||||
|
||||
return outputs
|
||||
|
||||
def _unrolled_backward(self, train_arch_data, train_supernet_data,
|
||||
optimizer):
|
||||
"""Compute unrolled loss and backward its gradients."""
|
||||
backup_params = copy.deepcopy(tuple(self.architecture.parameters()))
|
||||
|
||||
# do virtual step on training data
|
||||
lr = optimizer['architecture'].param_groups[0]['lr']
|
||||
momentum = optimizer['architecture'].param_groups[0]['momentum']
|
||||
weight_decay = optimizer['architecture'].param_groups[0][
|
||||
'weight_decay']
|
||||
self._compute_virtual_model(train_supernet_data, lr, momentum,
|
||||
weight_decay, optimizer)
|
||||
|
||||
# calculate unrolled loss on validation data
|
||||
# keep gradients for model here for compute hessian
|
||||
losses = self(**train_arch_data)
|
||||
loss, _ = self._parse_losses(losses)
|
||||
w_model, w_arch = tuple(self.architecture.parameters()), tuple(
|
||||
self.mutator.parameters())
|
||||
w_grads = torch.autograd.grad(loss, w_model + w_arch)
|
||||
d_model, d_arch = w_grads[:len(w_model)], w_grads[len(w_model):]
|
||||
|
||||
# compute hessian and final gradients
|
||||
hessian = self._compute_hessian(backup_params, d_model,
|
||||
train_supernet_data)
|
||||
with torch.no_grad():
|
||||
for param, d, h in zip(w_arch, d_arch, hessian):
|
||||
# gradient = dalpha - lr * hessian
|
||||
param.grad = d - lr * h
|
||||
|
||||
# restore weights
|
||||
self._restore_weights(backup_params)
|
||||
|
||||
def _compute_virtual_model(self, data, lr, momentum, weight_decay,
|
||||
optimizer):
|
||||
"""Compute unrolled weights w`"""
|
||||
# don't need zero_grad, using autograd to calculate gradients
|
||||
losses = self(**data)
|
||||
loss, _ = self._parse_losses(losses)
|
||||
gradients = torch.autograd.grad(loss, self.architecture.parameters())
|
||||
with torch.no_grad():
|
||||
for w, g in zip(self.architecture.parameters(), gradients):
|
||||
m = optimizer['architecture'].state[w].get(
|
||||
'momentum_buffer', 0.)
|
||||
w = w - lr * (momentum * m + g + weight_decay * w)
|
||||
|
||||
def _restore_weights(self, backup_params):
|
||||
with torch.no_grad():
|
||||
for param, backup in zip(self.architecture.parameters(),
|
||||
backup_params):
|
||||
param.copy_(backup)
|
||||
|
||||
def _compute_hessian(self, backup_params, dw, data):
|
||||
"""
|
||||
dw = dw` { L_val(w`, alpha) }
|
||||
w+ = w + eps * dw
|
||||
w- = w - eps * dw
|
||||
hessian = (dalpha { L_trn(w+, alpha) } \
|
||||
- dalpha { L_trn(w-, alpha) }) / (2*eps)
|
||||
eps = 0.01 / ||dw||
|
||||
"""
|
||||
self._restore_weights(backup_params)
|
||||
norm = torch.cat([w.view(-1) for w in dw]).norm()
|
||||
eps = 0.01 / norm
|
||||
if norm < 1E-8:
|
||||
print(
|
||||
'In computing hessian, norm is smaller than 1E-8, \
|
||||
cause eps to be %.6f.', norm.item())
|
||||
|
||||
dalphas = []
|
||||
for e in [eps, -2. * eps]:
|
||||
# w+ = w + eps*dw`, w- = w - eps*dw`
|
||||
with torch.no_grad():
|
||||
for p, d in zip(self.architecture.parameters(), dw):
|
||||
p += e * d
|
||||
|
||||
losses = self(**data)
|
||||
loss, _ = self._parse_losses(losses)
|
||||
dalphas.append(
|
||||
torch.autograd.grad(loss, tuple(self.mutator.parameters())))
|
||||
# dalpha { L_trn(w+) }, # dalpha { L_trn(w-) }
|
||||
dalpha_pos, dalpha_neg = dalphas
|
||||
hessian = [(p - n) / (2. * eps)
|
||||
for p, n in zip(dalpha_pos, dalpha_neg)]
|
||||
return hessian
|
|
@ -1,3 +1,4 @@
|
|||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
from .backbones import * # noqa: F401,F403
|
||||
from .components import * # noqa: F401,F403
|
||||
from .dynamic_op import * # noqa: F401,F403
|
||||
|
|
|
@ -4,6 +4,7 @@ from typing import Dict, List, Optional, Tuple, Union
|
|||
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
from mmcls.models.backbones.base_backbone import BaseBackbone
|
||||
from mmcv.cnn import build_activation_layer, build_norm_layer
|
||||
from torch import Tensor
|
||||
|
||||
|
@ -126,12 +127,8 @@ class Node(nn.Module):
|
|||
super().__init__()
|
||||
edges = nn.ModuleDict()
|
||||
for i in range(num_prev_nodes):
|
||||
if i < num_downsample_nodes:
|
||||
stride = 2
|
||||
else:
|
||||
stride = 1
|
||||
|
||||
edge_id = '{}_p{}'.format(node_id, i)
|
||||
stride = 2 if i < num_downsample_nodes else 1
|
||||
edge_id = f'{node_id}_p{i}'
|
||||
|
||||
module_kwargs = dict(
|
||||
in_channels=channels,
|
||||
|
@ -143,13 +140,14 @@ class Node(nn.Module):
|
|||
mutable_cfg.update(alias=edge_id)
|
||||
edges.add_module(edge_id, MODELS.build(mutable_cfg))
|
||||
|
||||
route_cfg.update(alias=node_id)
|
||||
route_cfg.update(edges=edges)
|
||||
self.edges = MODELS.build(route_cfg)
|
||||
self.route = MODELS.build(route_cfg)
|
||||
|
||||
def forward(self, prev_nodes: Union[List[Tensor],
|
||||
Tuple[Tensor]]) -> Tensor:
|
||||
"""Forward with the previous nodes list."""
|
||||
return self.edges(prev_nodes)
|
||||
return self.route(prev_nodes)
|
||||
|
||||
|
||||
class Cell(nn.Module):
|
||||
|
@ -223,8 +221,7 @@ class Cell(nn.Module):
|
|||
cur_tensor = node(tensors)
|
||||
tensors.append(cur_tensor)
|
||||
|
||||
output = torch.cat(tensors[2:], dim=1)
|
||||
return output
|
||||
return torch.cat(tensors[2:], dim=1)
|
||||
|
||||
|
||||
class AuxiliaryModule(nn.Module):
|
||||
|
@ -263,7 +260,7 @@ class AuxiliaryModule(nn.Module):
|
|||
|
||||
|
||||
@MODELS.register_module()
|
||||
class DartsBackbone(nn.Module, FixSubnetMixin):
|
||||
class DartsBackbone(BaseBackbone, FixSubnetMixin):
|
||||
"""Backbone of Differentiable Architecture Search (DARTS).
|
||||
|
||||
Args:
|
||||
|
@ -348,7 +345,7 @@ class DartsBackbone(nn.Module, FixSubnetMixin):
|
|||
prev_reduction, reduction = reduction, False
|
||||
# Reduce featuremap size and double channels in 1/3
|
||||
# and 2/3 layer.
|
||||
if i == self.num_layers // 3 or i == 2 * self.num_layers // 3:
|
||||
if i in [self.num_layers // 3, 2 * self.num_layers // 3]:
|
||||
self.out_channels *= 2
|
||||
reduction = True
|
||||
|
||||
|
|
|
@ -46,7 +46,7 @@ class SearchableMobileNet(BaseBackbone, FixSubnetMixin):
|
|||
Excamples:
|
||||
>>> mutable_cfg = dict(
|
||||
... type='OneShotMutableOP',
|
||||
... candidate_ops=dict(
|
||||
... candidates=dict(
|
||||
... mb_k3e1=dict(
|
||||
... type='MBBlock',
|
||||
... kernel_size=3,
|
||||
|
@ -87,7 +87,7 @@ class SearchableMobileNet(BaseBackbone, FixSubnetMixin):
|
|||
]
|
||||
) -> None:
|
||||
for index in out_indices:
|
||||
if index not in range(0, 8):
|
||||
if index not in range(8):
|
||||
raise ValueError('the item in out_indices must in '
|
||||
f'range(0, 8). But received {index}')
|
||||
|
||||
|
@ -147,6 +147,7 @@ class SearchableMobileNet(BaseBackbone, FixSubnetMixin):
|
|||
conv_cfg=self.conv_cfg,
|
||||
norm_cfg=self.norm_cfg,
|
||||
act_cfg=self.act_cfg)
|
||||
|
||||
self.add_module('conv2', layer)
|
||||
self.layers.append('conv2')
|
||||
|
||||
|
|
|
@ -48,7 +48,7 @@ class SearchableShuffleNetV2(BaseBackbone, FixSubnetMixin):
|
|||
Excamples:
|
||||
>>> mutable_cfg = dict(
|
||||
... type='OneShotMutableOP',
|
||||
... candidate_ops=dict(
|
||||
... candidates=dict(
|
||||
... shuffle_3x3=dict(
|
||||
... type='ShuffleBlock',
|
||||
... kernel_size=3,
|
||||
|
|
|
@ -0,0 +1,4 @@
|
|||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
from .heads import CreamClsHead
|
||||
|
||||
__all__ = ['CreamClsHead']
|
|
@ -0,0 +1,4 @@
|
|||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
from .cream_head import CreamClsHead
|
||||
|
||||
__all__ = ['CreamClsHead']
|
|
@ -0,0 +1,72 @@
|
|||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
|
||||
from typing import Dict, Optional, Tuple
|
||||
|
||||
from mmcls.models.heads import LinearClsHead
|
||||
from mmcv.cnn import ConvModule
|
||||
from torch import Tensor, nn
|
||||
|
||||
from mmrazor.registry import MODELS
|
||||
|
||||
|
||||
@MODELS.register_module()
|
||||
class CreamClsHead(LinearClsHead):
|
||||
"""Linear classifier head for cream.
|
||||
|
||||
Args:
|
||||
num_classes (int): Number of categories excluding the background
|
||||
category.
|
||||
in_channels (int): Number of channels in the input feature map.
|
||||
num_features (int): Number of features in the conv2d.
|
||||
act_cfg (dict): Config dict for activation layer.
|
||||
Default: dict(type='ReLU6').
|
||||
init_cfg (dict, optional): the config to control the initialization.
|
||||
Defaults to ``dict(type='Normal', layer='Linear', std=0.01)``.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
num_classes: int,
|
||||
in_channels: int,
|
||||
num_features: int = 1280,
|
||||
act_cfg: Dict = dict(type='ReLU6'),
|
||||
init_cfg: Optional[dict] = dict(
|
||||
type='Normal', layer='Linear', std=0.01),
|
||||
**kwargs):
|
||||
super().__init__(
|
||||
num_classes=num_classes,
|
||||
in_channels=in_channels,
|
||||
init_cfg=init_cfg,
|
||||
**kwargs)
|
||||
|
||||
layer = ConvModule(
|
||||
in_channels=self.in_channels,
|
||||
out_channels=num_features,
|
||||
kernel_size=1,
|
||||
stride=1,
|
||||
padding=0,
|
||||
conv_cfg=None,
|
||||
norm_cfg=None,
|
||||
act_cfg=act_cfg)
|
||||
|
||||
self.add_module('conv2', layer)
|
||||
|
||||
self.fc = nn.Linear(num_features, self.num_classes)
|
||||
|
||||
# def pre_logits(self, feats: Tuple[Tensor]) -> Tensor:
|
||||
# """The process before the final classification head.
|
||||
|
||||
# The input ``feats`` is a tuple of tensor, and each tensor is the
|
||||
# feature of a backbone stage. In ``LinearClsHead``, we just obtain the
|
||||
# feature of the last stage.
|
||||
# """
|
||||
# # The LinearClsHead doesn't have other module, just return after
|
||||
# # unpacking.
|
||||
# return feats[-1]
|
||||
|
||||
def forward(self, feats: Tuple[Tensor]) -> Tensor:
|
||||
"""The forward process."""
|
||||
logits = self.pre_logits(feats)
|
||||
logits = logits.unsqueeze(-1).unsqueeze(-1)
|
||||
logits = self.conv2(logits)
|
||||
logits = logits.flatten(1)
|
||||
return self.fc(logits)
|
|
@ -99,7 +99,7 @@ class DiffMutableOP(DiffMutableModule[str, str]):
|
|||
DARTS. Search the best module by learnable parameters `arch_param`.
|
||||
|
||||
Args:
|
||||
candidate_ops (dict[str, dict]): the configs for the candidate
|
||||
candidates (dict[str, dict]): the configs for the candidate
|
||||
operations.
|
||||
module_kwargs (dict[str, dict], optional): Module initialization named
|
||||
arguments. Defaults to None.
|
||||
|
@ -110,23 +110,29 @@ class DiffMutableOP(DiffMutableModule[str, str]):
|
|||
and `Pretrained`.
|
||||
"""
|
||||
|
||||
def __init__(self, candidate_ops: Dict[str, Dict], **kwargs) -> None:
|
||||
super().__init__(**kwargs)
|
||||
assert len(candidate_ops) >= 1, \
|
||||
def __init__(
|
||||
self,
|
||||
candidates: Dict[str, Dict],
|
||||
module_kwargs: Optional[Dict[str, Dict]] = None,
|
||||
alias: Optional[str] = None,
|
||||
init_cfg: Optional[Dict] = None,
|
||||
) -> None:
|
||||
super().__init__(
|
||||
module_kwargs=module_kwargs, alias=alias, init_cfg=init_cfg)
|
||||
assert len(candidates) >= 1, \
|
||||
f'Number of candidate op must greater than or equal to 1, ' \
|
||||
f'but got: {len(candidate_ops)}'
|
||||
f'but got: {len(candidates)}'
|
||||
|
||||
self._is_fixed = False
|
||||
self._candidate_ops = self._build_ops(candidate_ops,
|
||||
self.module_kwargs)
|
||||
self._candidates = self._build_ops(candidates, self.module_kwargs)
|
||||
|
||||
@staticmethod
|
||||
def _build_ops(candidate_ops: Dict[str, Dict],
|
||||
def _build_ops(candidates: Dict[str, Dict],
|
||||
module_kwargs: Optional[Dict[str, Dict]]) -> nn.ModuleDict:
|
||||
"""Build candidate operations based on candidate_ops configures.
|
||||
"""Build candidate operations based on candidates configures.
|
||||
|
||||
Args:
|
||||
candidate_ops (dict[str, dict]): the configs for the candidate
|
||||
candidates (dict[str, dict]): the configs for the candidate
|
||||
operations.
|
||||
module_kwargs (dict[str, dict], optional): Module initialization
|
||||
named arguments.
|
||||
|
@ -137,7 +143,7 @@ class DiffMutableOP(DiffMutableModule[str, str]):
|
|||
is the corresponding candidate operation.
|
||||
"""
|
||||
ops = nn.ModuleDict()
|
||||
for name, op_cfg in candidate_ops.items():
|
||||
for name, op_cfg in candidates.items():
|
||||
assert name not in ops
|
||||
if module_kwargs is not None:
|
||||
op_cfg.update(module_kwargs)
|
||||
|
@ -154,7 +160,7 @@ class DiffMutableOP(DiffMutableModule[str, str]):
|
|||
Returns:
|
||||
Tensor: the result of forward the fixed operation.
|
||||
"""
|
||||
return self._candidate_ops[self._chosen](x)
|
||||
return sum(self._candidates[choice](x) for choice in self._chosen)
|
||||
|
||||
def forward_arch_param(self,
|
||||
x: Any,
|
||||
|
@ -180,7 +186,7 @@ class DiffMutableOP(DiffMutableModule[str, str]):
|
|||
|
||||
# forward based on probs
|
||||
outputs = list()
|
||||
for prob, module in zip(probs, self._candidate_ops.values()):
|
||||
for prob, module in zip(probs, self._candidates.values()):
|
||||
if prob > 0.:
|
||||
outputs.append(prob * module(x))
|
||||
|
||||
|
@ -197,11 +203,11 @@ class DiffMutableOP(DiffMutableModule[str, str]):
|
|||
Tensor: the result of forward all of the ``choice`` operation.
|
||||
"""
|
||||
outputs = list()
|
||||
for op in self._candidate_ops.values():
|
||||
for op in self._candidates.values():
|
||||
outputs.append(op(x))
|
||||
return sum(outputs)
|
||||
|
||||
def fix_chosen(self, chosen: str) -> None:
|
||||
def fix_chosen(self, chosen: Union[str, List[str]]) -> None:
|
||||
"""Fix mutable with `choice`. This operation would convert `unfixed`
|
||||
mode to `fixed` mode. The :attr:`is_fixed` will be set to True and only
|
||||
the selected operations can be retained.
|
||||
|
@ -215,9 +221,12 @@ class DiffMutableOP(DiffMutableModule[str, str]):
|
|||
'The mode of current MUTABLE is `fixed`. '
|
||||
'Please do not call `fix_chosen` function again.')
|
||||
|
||||
if isinstance(chosen, str):
|
||||
chosen = [chosen]
|
||||
|
||||
for c in self.choices:
|
||||
if c != chosen:
|
||||
self._candidate_ops.pop(c)
|
||||
if c not in chosen:
|
||||
self._candidates.pop(c)
|
||||
|
||||
self._chosen = chosen
|
||||
self.is_fixed = True
|
||||
|
@ -225,7 +234,7 @@ class DiffMutableOP(DiffMutableModule[str, str]):
|
|||
@property
|
||||
def choices(self) -> List[str]:
|
||||
"""list: all choices. """
|
||||
return list(self._candidate_ops.keys())
|
||||
return list(self._candidates.keys())
|
||||
|
||||
|
||||
@MODELS.register_module()
|
||||
|
@ -241,6 +250,7 @@ class DiffChoiceRoute(DiffMutableModule[str, List[str]]):
|
|||
with_arch_param (bool): whether forward with arch_param. When set to
|
||||
`True`, a differentiable way is adopted. When set to `False`,
|
||||
a non-differentiable way is adopted.
|
||||
alias (str, optional): alias of the `DiffChoiceRoute`.
|
||||
init_cfg (dict, optional): initialization configuration dict for
|
||||
``BaseModule``. OpenMMLab has implement 6 initializers including
|
||||
`Constant`, `Xavier`, `Normal`, `Uniform`, `Kaiming`,
|
||||
|
@ -274,16 +284,17 @@ class DiffChoiceRoute(DiffMutableModule[str, List[str]]):
|
|||
self,
|
||||
edges: nn.ModuleDict,
|
||||
with_arch_param: bool = False,
|
||||
alias: Optional[str] = None,
|
||||
init_cfg: Optional[Dict] = None,
|
||||
) -> None:
|
||||
super().__init__(init_cfg=init_cfg)
|
||||
super().__init__(alias=alias, init_cfg=init_cfg)
|
||||
assert len(edges) >= 1, \
|
||||
f'Number of edges must greater than or equal to 1, ' \
|
||||
f'but got: {len(edges)}'
|
||||
|
||||
self._with_arch_param = with_arch_param
|
||||
self._is_fixed = False
|
||||
self._edges: nn.ModuleDict = edges
|
||||
self._candidates: nn.ModuleDict = edges
|
||||
|
||||
def forward_fixed(self, inputs: Union[List, Tuple]) -> Tensor:
|
||||
"""Forward when the mutable is in `fixed` mode.
|
||||
|
@ -302,7 +313,7 @@ class DiffChoiceRoute(DiffMutableModule[str, List[str]]):
|
|||
outputs = list()
|
||||
for choice, x in zip(self._unfixed_choices, inputs):
|
||||
if choice in self._chosen:
|
||||
outputs.append(self._edges[choice](x))
|
||||
outputs.append(self._candidates[choice](x))
|
||||
return sum(outputs)
|
||||
|
||||
def forward_arch_param(self,
|
||||
|
@ -319,15 +330,16 @@ class DiffChoiceRoute(DiffMutableModule[str, List[str]]):
|
|||
Returns:
|
||||
Tensor: the result of forward with ``arch_param``.
|
||||
"""
|
||||
assert len(x) == len(self._edges), \
|
||||
f'Length of `edges` {len(self._edges)} should be same as ' \
|
||||
f'the length of inputs {len(x)}.'
|
||||
assert len(x) == len(self._candidates), \
|
||||
f'Length of `edges` {len(self._candidates)} should be ' \
|
||||
f'same as the length of inputs {len(x)}.'
|
||||
|
||||
if self._with_arch_param:
|
||||
probs = self.compute_arch_probs(arch_param=arch_param)
|
||||
|
||||
outputs = list()
|
||||
for prob, module, input in zip(probs, self._edges.values(), x):
|
||||
for prob, module, input in zip(probs, self._candidates.values(),
|
||||
x):
|
||||
if prob > 0:
|
||||
# prob may equal to 0 in gumbel softmax.
|
||||
outputs.append(prob * module(input))
|
||||
|
@ -346,12 +358,12 @@ class DiffChoiceRoute(DiffMutableModule[str, List[str]]):
|
|||
Returns:
|
||||
Tensor: the result of forward all of the ``choice`` operation.
|
||||
"""
|
||||
assert len(x) == len(self._edges), \
|
||||
f'Lenght of edges {len(self._edges)} should be same as ' \
|
||||
assert len(x) == len(self._candidates), \
|
||||
f'Lenght of edges {len(self._candidates)} should be same as ' \
|
||||
f'the length of inputs {len(x)}.'
|
||||
|
||||
outputs = list()
|
||||
for op, input in zip(self._edges.values(), x):
|
||||
for op, input in zip(self._candidates.values(), x):
|
||||
outputs.append(op(input))
|
||||
|
||||
return sum(outputs)
|
||||
|
@ -373,7 +385,7 @@ class DiffChoiceRoute(DiffMutableModule[str, List[str]]):
|
|||
|
||||
for c in self.choices:
|
||||
if c not in chosen:
|
||||
self._edges.pop(c)
|
||||
self._candidates.pop(c)
|
||||
|
||||
self._chosen = chosen
|
||||
self.is_fixed = True
|
||||
|
@ -381,7 +393,7 @@ class DiffChoiceRoute(DiffMutableModule[str, List[str]]):
|
|||
@property
|
||||
def choices(self) -> List[CHOSEN_TYPE]:
|
||||
"""list: all choices. """
|
||||
return list(self._edges.keys())
|
||||
return list(self._candidates.keys())
|
||||
|
||||
|
||||
@MODELS.register_module()
|
||||
|
@ -413,10 +425,14 @@ class GumbelChoiceRoute(DiffChoiceRoute):
|
|||
tau: float = 1.0,
|
||||
hard: bool = True,
|
||||
with_arch_param: bool = False,
|
||||
alias: Optional[str] = None,
|
||||
init_cfg: Optional[Dict] = None,
|
||||
) -> None:
|
||||
super().__init__(
|
||||
edges=edges, with_arch_param=with_arch_param, init_cfg=init_cfg)
|
||||
edges=edges,
|
||||
with_arch_param=with_arch_param,
|
||||
alias=alias,
|
||||
init_cfg=init_cfg)
|
||||
self.tau = tau
|
||||
self.hard = hard
|
||||
|
||||
|
|
|
@ -100,7 +100,7 @@ class OneShotMutableOP(OneShotMutableModule[str, str]):
|
|||
blocks.
|
||||
|
||||
Args:
|
||||
candidate_ops (dict[str, dict]): the configs for the candidate
|
||||
candidates (dict[str, dict]): the configs for the candidate
|
||||
operations.
|
||||
module_kwargs (dict[str, dict], optional): Module initialization named
|
||||
arguments. Defaults to None.
|
||||
|
@ -114,13 +114,13 @@ class OneShotMutableOP(OneShotMutableModule[str, str]):
|
|||
>>> import torch
|
||||
>>> from mmrazor.models.mutables import OneShotMutableOP
|
||||
|
||||
>>> candidate_ops = nn.ModuleDict({
|
||||
>>> candidates = nn.ModuleDict({
|
||||
... 'conv3x3': nn.Conv2d(32, 32, 3, 1, 1),
|
||||
... 'conv5x5': nn.Conv2d(32, 32, 5, 1, 2),
|
||||
... 'conv7x7': nn.Conv2d(32, 32, 7, 1, 3)})
|
||||
|
||||
>>> input = torch.randn(1, 32, 64, 64)
|
||||
>>> op = OneShotMutableOP(candidate_ops)
|
||||
>>> op = OneShotMutableOP(candidates)
|
||||
|
||||
>>> op.choices
|
||||
['conv3x3', 'conv5x5', 'conv7x7']
|
||||
|
@ -131,7 +131,7 @@ class OneShotMutableOP(OneShotMutableModule[str, str]):
|
|||
|
||||
>>> op.current_choice = 'conv3x3'
|
||||
>>> unfix_output = op.forward(input)
|
||||
>>> torch.all(unfixed_output == candidate_ops['conv3x3'](input))
|
||||
>>> torch.all(unfixed_output == candidates['conv3x3'](input))
|
||||
True
|
||||
|
||||
>>> op.fix_chosen('conv3x3')
|
||||
|
@ -147,36 +147,41 @@ class OneShotMutableOP(OneShotMutableModule[str, str]):
|
|||
True
|
||||
"""
|
||||
|
||||
def __init__(self, candidate_ops: Union[Dict[str, Dict], nn.ModuleDict],
|
||||
**kwargs) -> None:
|
||||
super().__init__(**kwargs)
|
||||
assert len(candidate_ops) >= 1, \
|
||||
def __init__(
|
||||
self,
|
||||
candidates: Union[Dict[str, Dict], nn.ModuleDict],
|
||||
module_kwargs: Optional[Dict[str, Dict]] = None,
|
||||
alias: Optional[str] = None,
|
||||
init_cfg: Optional[Dict] = None,
|
||||
) -> None:
|
||||
super().__init__(
|
||||
module_kwargs=module_kwargs, alias=alias, init_cfg=init_cfg)
|
||||
assert len(candidates) >= 1, \
|
||||
f'Number of candidate op must greater than 1, ' \
|
||||
f'but got: {len(candidate_ops)}'
|
||||
f'but got: {len(candidates)}'
|
||||
|
||||
self._chosen: Optional[str] = None
|
||||
if isinstance(candidate_ops, dict):
|
||||
self._candidate_ops = self._build_ops(candidate_ops,
|
||||
self.module_kwargs)
|
||||
elif isinstance(candidate_ops, nn.ModuleDict):
|
||||
self._candidate_ops = candidate_ops
|
||||
if isinstance(candidates, dict):
|
||||
self._candidates = self._build_ops(candidates, self.module_kwargs)
|
||||
elif isinstance(candidates, nn.ModuleDict):
|
||||
self._candidates = candidates
|
||||
else:
|
||||
raise TypeError('candidata_ops should be a `dict` or '
|
||||
f'`nn.ModuleDict` instance, but got '
|
||||
f'{type(candidate_ops)}')
|
||||
f'{type(candidates)}')
|
||||
|
||||
assert len(self._candidate_ops) >= 1, \
|
||||
assert len(self._candidates) >= 1, \
|
||||
f'Number of candidate op must greater than or equal to 1, ' \
|
||||
f'but got {len(self._candidate_ops)}'
|
||||
f'but got {len(self._candidates)}'
|
||||
|
||||
@staticmethod
|
||||
def _build_ops(
|
||||
candidate_ops: Union[Dict[str, Dict], nn.ModuleDict],
|
||||
candidates: Union[Dict[str, Dict], nn.ModuleDict],
|
||||
module_kwargs: Optional[Dict[str, Dict]] = None) -> nn.ModuleDict:
|
||||
"""Build candidate operations based on choice configures.
|
||||
|
||||
Args:
|
||||
candidate_ops (dict[str, dict] | :obj:`nn.ModuleDict`): the configs
|
||||
candidates (dict[str, dict] | :obj:`nn.ModuleDict`): the configs
|
||||
for the candidate operations or nn.ModuleDict.
|
||||
module_kwargs (dict[str, dict], optional): Module initialization
|
||||
named arguments.
|
||||
|
@ -186,11 +191,11 @@ class OneShotMutableOP(OneShotMutableModule[str, str]):
|
|||
the name of each choice in configs and the value of ``ops``
|
||||
is the corresponding candidate operation.
|
||||
"""
|
||||
if isinstance(candidate_ops, nn.ModuleDict):
|
||||
return candidate_ops
|
||||
if isinstance(candidates, nn.ModuleDict):
|
||||
return candidates
|
||||
|
||||
ops = nn.ModuleDict()
|
||||
for name, op_cfg in candidate_ops.items():
|
||||
for name, op_cfg in candidates.items():
|
||||
assert name not in ops
|
||||
if module_kwargs is not None:
|
||||
op_cfg.update(module_kwargs)
|
||||
|
@ -207,7 +212,7 @@ class OneShotMutableOP(OneShotMutableModule[str, str]):
|
|||
Returns:
|
||||
Tensor: the result of forward the fixed operation.
|
||||
"""
|
||||
return self._candidate_ops[self._chosen](x)
|
||||
return self._candidates[self._chosen](x)
|
||||
|
||||
def forward_choice(self, x: Any, choice: str) -> Tensor:
|
||||
"""Forward with the `unfixed` mutable and current choice is not None.
|
||||
|
@ -221,7 +226,7 @@ class OneShotMutableOP(OneShotMutableModule[str, str]):
|
|||
Tensor: the result of forward the ``choice`` operation.
|
||||
"""
|
||||
assert isinstance(choice, str) and choice in self.choices
|
||||
return self._candidate_ops[choice](x)
|
||||
return self._candidates[choice](x)
|
||||
|
||||
def forward_all(self, x: Any) -> Tensor:
|
||||
"""Forward all choices. Used to calculate FLOPs.
|
||||
|
@ -233,7 +238,9 @@ class OneShotMutableOP(OneShotMutableModule[str, str]):
|
|||
Returns:
|
||||
Tensor: the result of forward all of the ``choice`` operation.
|
||||
"""
|
||||
outputs = [op(x) for op in self._candidate_ops.values()]
|
||||
outputs = list()
|
||||
for op in self._candidates.values():
|
||||
outputs.append(op(x))
|
||||
return sum(outputs)
|
||||
|
||||
def fix_chosen(self, chosen: str) -> None:
|
||||
|
@ -251,7 +258,7 @@ class OneShotMutableOP(OneShotMutableModule[str, str]):
|
|||
|
||||
for c in self.choices:
|
||||
if c != chosen:
|
||||
self._candidate_ops.pop(c)
|
||||
self._candidates.pop(c)
|
||||
|
||||
self._chosen = chosen
|
||||
self.is_fixed = True
|
||||
|
@ -263,7 +270,7 @@ class OneShotMutableOP(OneShotMutableModule[str, str]):
|
|||
@property
|
||||
def choices(self) -> List[str]:
|
||||
"""list: all choices. """
|
||||
return list(self._candidate_ops.keys())
|
||||
return list(self._candidates.keys())
|
||||
|
||||
@property
|
||||
def num_choices(self):
|
||||
|
@ -275,7 +282,7 @@ class OneShotProbMutableOP(OneShotMutableOP):
|
|||
"""Sampling candidate operation according to probability.
|
||||
|
||||
Args:
|
||||
candidate_ops (dict[str, dict]): the configs for the candidate
|
||||
candidates (dict[str, dict]): the configs for the candidate
|
||||
operations.
|
||||
choice_probs (list): the probability of sampling each
|
||||
candidate operation.
|
||||
|
@ -289,13 +296,13 @@ class OneShotProbMutableOP(OneShotMutableOP):
|
|||
"""
|
||||
|
||||
def __init__(self,
|
||||
candidate_ops: Dict[str, Dict],
|
||||
candidates: Dict[str, Dict],
|
||||
choice_probs: list = None,
|
||||
module_kwargs: Optional[Dict[str, Dict]] = None,
|
||||
alias: Optional[str] = None,
|
||||
init_cfg: Optional[Dict] = None) -> None:
|
||||
super().__init__(
|
||||
candidate_ops=candidate_ops,
|
||||
candidates=candidates,
|
||||
module_kwargs=module_kwargs,
|
||||
alias=alias,
|
||||
init_cfg=init_cfg)
|
||||
|
@ -306,5 +313,7 @@ class OneShotProbMutableOP(OneShotMutableOP):
|
|||
|
||||
def sample_choice(self) -> str:
|
||||
"""Sampling with probabilities."""
|
||||
assert len(self.choice_probs) == len(self._candidate_ops.keys())
|
||||
return random.choices(self.choices, weights=self.choice_probs, k=1)[0]
|
||||
assert len(self.choice_probs) == len(self._candidates.keys())
|
||||
choice = random.choices(
|
||||
self.choices, weights=self.choice_probs, k=1)[0]
|
||||
return choice
|
||||
|
|
|
@ -2,10 +2,12 @@
|
|||
from .common import Identity
|
||||
from .darts_series import (DartsDilConv, DartsPoolBN, DartsSepConv,
|
||||
DartsSkipConnect, DartsZero)
|
||||
from .efficientnet_series import ConvBnAct, DepthwiseSeparableConv
|
||||
from .mobilenet_series import MBBlock
|
||||
from .shufflenet_series import ShuffleBlock, ShuffleXception
|
||||
|
||||
__all__ = [
|
||||
'ShuffleBlock', 'ShuffleXception', 'DartsPoolBN', 'DartsDilConv',
|
||||
'DartsSepConv', 'DartsSkipConnect', 'DartsZero', 'MBBlock', 'Identity'
|
||||
'DartsSepConv', 'DartsSkipConnect', 'DartsZero', 'MBBlock', 'Identity',
|
||||
'ConvBnAct', 'DepthwiseSeparableConv'
|
||||
]
|
||||
|
|
|
@ -27,10 +27,7 @@ class DartsPoolBN(BaseOP):
|
|||
self.kernel_size, self.stride, 1, count_include_pad=False)
|
||||
self.bn = build_norm_layer(self.norm_cfg, self.out_channels)[1]
|
||||
|
||||
if use_drop_path:
|
||||
self.drop_path = DropPath()
|
||||
else:
|
||||
self.drop_path = None
|
||||
self.drop_path = DropPath() if use_drop_path else None
|
||||
|
||||
def forward(self, x):
|
||||
out = self.pool(x)
|
||||
|
@ -69,10 +66,7 @@ class DartsDilConv(BaseOP):
|
|||
self.in_channels, self.out_channels, 1, stride=1, bias=False),
|
||||
build_norm_layer(self.norm_cfg, self.in_channels)[1])
|
||||
|
||||
if use_drop_path:
|
||||
self.drop_path = DropPath()
|
||||
else:
|
||||
self.drop_path = None
|
||||
self.drop_path = DropPath() if use_drop_path else None
|
||||
|
||||
def forward(self, x):
|
||||
out = self.conv1(x)
|
||||
|
@ -122,10 +116,7 @@ class DartsSepConv(BaseOP):
|
|||
self.out_channels, self.out_channels, 1, stride=1, bias=False),
|
||||
build_norm_layer(self.norm_cfg, self.out_channels)[1])
|
||||
|
||||
if use_drop_path:
|
||||
self.drop_path = DropPath()
|
||||
else:
|
||||
self.drop_path = None
|
||||
self.drop_path = DropPath() if use_drop_path else None
|
||||
|
||||
def forward(self, x):
|
||||
out = self.conv1(x)
|
||||
|
@ -163,10 +154,7 @@ class DartsSkipConnect(BaseOP):
|
|||
bias=False)
|
||||
self.bn = build_norm_layer(self.norm_cfg, self.out_channels)[1]
|
||||
|
||||
if use_drop_path:
|
||||
self.drop_path = DropPath()
|
||||
else:
|
||||
self.drop_path = None
|
||||
self.drop_path = DropPath() if use_drop_path else None
|
||||
|
||||
def forward(self, x):
|
||||
if self.stride > 1:
|
||||
|
|
|
@ -0,0 +1,160 @@
|
|||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
from typing import Dict, Optional
|
||||
|
||||
import torch.nn as nn
|
||||
from mmcls.models.utils import SELayer
|
||||
from mmcv.cnn import ConvModule
|
||||
|
||||
from mmrazor.registry import MODELS
|
||||
from .base import BaseOP
|
||||
|
||||
|
||||
@MODELS.register_module()
|
||||
class ConvBnAct(BaseOP):
|
||||
"""ConvBnAct block from timm.
|
||||
|
||||
Args:
|
||||
in_channels (int): number of in channels.
|
||||
out_channels (int): number of out channels.
|
||||
kernel_size (int): kernel size of convolution.
|
||||
stride (int, optional): stride of convolution. Defaults to 1.
|
||||
dilation (int, optional): dilation rate of convolution. Defaults to 1.
|
||||
padding (int, optional): padding size of convolution. Defaults to 0.
|
||||
skip (bool, optional): whether using skip connect. Defaults to False.
|
||||
conv_cfg (Optional[dict], optional): Config dict for convolution layer.
|
||||
Default: None, which means using conv2d.
|
||||
norm_cfg (Dict, optional): Config dict for normalization layer.
|
||||
Default: dict(type='BN').
|
||||
act_cfg (Dict, optional):Config dict for activation layer.
|
||||
Default: dict(type='ReLU').
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
in_channels: int,
|
||||
out_channels: int,
|
||||
kernel_size: int,
|
||||
stride: int = 1,
|
||||
dilation: int = 1,
|
||||
padding: int = 0,
|
||||
skip: bool = False,
|
||||
conv_cfg: Optional[dict] = None,
|
||||
se_cfg: Dict = None,
|
||||
norm_cfg: Dict = dict(type='BN'),
|
||||
act_cfg: Dict = dict(type='ReLU')):
|
||||
super().__init__(
|
||||
in_channels=in_channels, out_channels=out_channels, stride=stride)
|
||||
self.has_residual = skip and stride == 1 \
|
||||
and in_channels == out_channels
|
||||
self.with_se = se_cfg is not None
|
||||
|
||||
if self.with_se:
|
||||
assert isinstance(se_cfg, dict)
|
||||
self.se = SELayer(self.out_channels, **se_cfg)
|
||||
|
||||
self.convModule = ConvModule(
|
||||
in_channels=in_channels,
|
||||
out_channels=out_channels,
|
||||
kernel_size=kernel_size,
|
||||
stride=stride,
|
||||
dilation=dilation,
|
||||
padding=padding,
|
||||
conv_cfg=conv_cfg,
|
||||
norm_cfg=norm_cfg,
|
||||
act_cfg=act_cfg)
|
||||
|
||||
def forward(self, x):
|
||||
"""Forward function."""
|
||||
shortcut = x
|
||||
x = self.convModule(x)
|
||||
if self.has_residual:
|
||||
x += shortcut
|
||||
return x
|
||||
|
||||
|
||||
@MODELS.register_module()
|
||||
class DepthwiseSeparableConv(BaseOP):
|
||||
"""DepthwiseSeparable block Used for DS convs in MobileNet-V1 and in the
|
||||
place of IR blocks that have no expansion (factor of 1.0). This is an
|
||||
alternative to having a IR with an optional first pw conv.
|
||||
|
||||
Args:
|
||||
in_channels (int): number of in channels.
|
||||
out_channels (int): number of out channels.
|
||||
dw_kernel_size (int, optional): the kernel size of depth-wise
|
||||
convolution. Defaults to 3.
|
||||
stride (int, optional): stride of convolution.
|
||||
Defaults to 1.
|
||||
dilation (int, optional): dilation rate of convolution.
|
||||
Defaults to 1.
|
||||
noskip (bool, optional): whether use skip connection.
|
||||
Defaults to False.
|
||||
pw_kernel_size (int, optional): kernel size of point wise convolution.
|
||||
Defaults to 1.
|
||||
pw_act (bool, optional): whether using activation in point-wise
|
||||
convolution. Defaults to False.
|
||||
se_cfg (Dict, optional): _description_. Defaults to None.
|
||||
conv_cfg (Optional[dict], optional): Config dict for convolution layer.
|
||||
Default: None, which means using conv2d.
|
||||
norm_cfg (Dict, optional): Config dict for normalization layer.
|
||||
Default: dict(type='BN').
|
||||
act_cfg (Dict, optional):Config dict for activation layer.
|
||||
Default: dict(type='ReLU').
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
in_channels: int,
|
||||
out_channels: int,
|
||||
dw_kernel_size: int = 3,
|
||||
stride: int = 1,
|
||||
dilation: int = 1,
|
||||
noskip: bool = False,
|
||||
pw_kernel_size: int = 1,
|
||||
pw_act: bool = False,
|
||||
conv_cfg: Optional[dict] = None,
|
||||
se_cfg: Dict = None,
|
||||
norm_cfg: Dict = dict(type='BN'),
|
||||
act_cfg: Dict = dict(type='ReLU')):
|
||||
|
||||
super().__init__(
|
||||
in_channels=in_channels, out_channels=out_channels, stride=stride)
|
||||
self.has_residual = (stride == 1
|
||||
and in_channels == out_channels) and not noskip
|
||||
self.has_pw_act = pw_act # activation after point-wise conv
|
||||
|
||||
self.se_cfg = se_cfg
|
||||
|
||||
self.conv_dw = ConvModule(
|
||||
in_channels=in_channels,
|
||||
out_channels=in_channels,
|
||||
kernel_size=dw_kernel_size,
|
||||
stride=stride,
|
||||
dilation=dilation,
|
||||
padding=dw_kernel_size // 2,
|
||||
groups=in_channels,
|
||||
conv_cfg=conv_cfg,
|
||||
norm_cfg=norm_cfg,
|
||||
act_cfg=act_cfg,
|
||||
)
|
||||
|
||||
# Squeeze-and-excitation
|
||||
self.se = SELayer(out_channels, **
|
||||
se_cfg) if self.se_cfg else nn.Identity()
|
||||
|
||||
self.conv_pw = ConvModule(
|
||||
in_channels=in_channels,
|
||||
out_channels=out_channels,
|
||||
kernel_size=pw_kernel_size,
|
||||
padding=pw_kernel_size // 2,
|
||||
conv_cfg=conv_cfg,
|
||||
norm_cfg=norm_cfg,
|
||||
act_cfg=act_cfg if self.has_pw_act else None,
|
||||
)
|
||||
|
||||
def forward(self, x):
|
||||
shortcut = x
|
||||
x = self.conv_dw(x)
|
||||
x = self.se(x)
|
||||
x = self.conv_pw(x)
|
||||
if self.has_residual:
|
||||
x += shortcut
|
||||
return x
|
|
@ -65,10 +65,10 @@ class FlopsEstimator:
|
|||
... def __init__(self) -> None:
|
||||
... super().__init__()
|
||||
...
|
||||
... candidate_ops = nn.ModuleDict({
|
||||
... candidates = nn.ModuleDict({
|
||||
... 'conv3x3': nn.Conv2d(3, 32, 3),
|
||||
... 'conv5x5': nn.Conv2d(3, 32, 5)})
|
||||
... self.op = OneShotMutableOP(candidate_ops)
|
||||
... self.op = OneShotMutableOP(candidates)
|
||||
... self.op.current_choice = 'conv3x3'
|
||||
...
|
||||
... def forward(self, x: Tensor) -> Tensor:
|
||||
|
|
|
@ -90,12 +90,19 @@ class FixSubnetMixin:
|
|||
# In the corresponding mutable, it will check whether the `chosen`
|
||||
# format is correct.
|
||||
if isinstance(module, BaseMutable):
|
||||
mutable_name = name.lstrip(prefix)
|
||||
assert mutable_name in fix_modules, \
|
||||
f'{mutable_name} is not in fix_modules {fix_modules}, '\
|
||||
'please check your `fix_subnet`.'
|
||||
|
||||
chosen = fix_modules.get(mutable_name, None)
|
||||
if getattr(module, 'alias', None):
|
||||
alias = module.alias
|
||||
assert alias in fix_modules, \
|
||||
f'The alias {alias} is not in fix_modules ' \
|
||||
f'{fix_modules}, please check your `fix_subnet`.'
|
||||
chosen = fix_modules.get(alias, None)
|
||||
else:
|
||||
mutable_name = name.lstrip(prefix)
|
||||
assert mutable_name in fix_modules, \
|
||||
f'The module name {mutable_name} is not in ' \
|
||||
f'fix_modules {fix_modules} ' \
|
||||
'please check your `fix_subnet`.'
|
||||
chosen = fix_modules.get(mutable_name, None)
|
||||
module.fix_chosen(chosen)
|
||||
|
||||
# TODO support load fix channels after mr #29 merged
|
||||
|
|
|
@ -19,7 +19,7 @@ class TestDartsBackbone(TestCase):
|
|||
def setUp(self) -> None:
|
||||
self.mutable_cfg = dict(
|
||||
type='DiffMutableOP',
|
||||
candidate_ops=dict(
|
||||
candidates=dict(
|
||||
torch_conv2d_3x3=dict(
|
||||
type='torchConv2d',
|
||||
kernel_size=3,
|
||||
|
@ -96,17 +96,17 @@ class TestDartsBackbone(TestCase):
|
|||
tmp_dict = dict()
|
||||
|
||||
for key, _ in model.named_modules():
|
||||
node_type = key.split('._candidate_ops')[0].split('.')[-1].split(
|
||||
node_type = key.split('._candidates')[0].split('.')[-1].split(
|
||||
'_')[0]
|
||||
if node_type not in ['normal', 'reduce']:
|
||||
# not supported type
|
||||
continue
|
||||
|
||||
node_name = key.split('._candidate_ops')[0].split('.')[-1]
|
||||
node_name = key.split('._candidates')[0].split('.')[-1]
|
||||
if node_name not in tmp_dict.keys():
|
||||
tmp_dict[node_name] = [key.split('._candidate_ops')[0]]
|
||||
tmp_dict[node_name] = [key.split('._candidates')[0]]
|
||||
else:
|
||||
current_key = key.split('._candidate_ops')[0]
|
||||
current_key = key.split('._candidates')[0]
|
||||
if current_key not in tmp_dict[node_name]:
|
||||
tmp_dict[node_name].append(current_key)
|
||||
|
||||
|
|
|
@ -18,7 +18,7 @@ class TestDiffOP(TestCase):
|
|||
def test_forward_arch_param(self):
|
||||
op_cfg = dict(
|
||||
type='DiffMutableOP',
|
||||
candidate_ops=dict(
|
||||
candidates=dict(
|
||||
torch_conv2d_3x3=dict(
|
||||
type='torchConv2d',
|
||||
kernel_size=3,
|
||||
|
@ -56,7 +56,7 @@ class TestDiffOP(TestCase):
|
|||
def test_forward_fixed(self):
|
||||
op_cfg = dict(
|
||||
type='DiffMutableOP',
|
||||
candidate_ops=dict(
|
||||
candidates=dict(
|
||||
torch_conv2d_3x3=dict(
|
||||
type='torchConv2d',
|
||||
kernel_size=3,
|
||||
|
@ -84,7 +84,7 @@ class TestDiffOP(TestCase):
|
|||
def test_forward(self):
|
||||
op_cfg = dict(
|
||||
type='DiffMutableOP',
|
||||
candidate_ops=dict(
|
||||
candidates=dict(
|
||||
torch_conv2d_3x3=dict(
|
||||
type='torchConv2d',
|
||||
kernel_size=3,
|
||||
|
@ -119,7 +119,7 @@ class TestDiffOP(TestCase):
|
|||
def test_property(self):
|
||||
op_cfg = dict(
|
||||
type='DiffMutableOP',
|
||||
candidate_ops=dict(
|
||||
candidates=dict(
|
||||
torch_conv2d_3x3=dict(
|
||||
type='torchConv2d',
|
||||
kernel_size=3,
|
||||
|
@ -158,7 +158,7 @@ class TestDiffOP(TestCase):
|
|||
def test_module_kwargs(self):
|
||||
op_cfg = dict(
|
||||
type='DiffMutableOP',
|
||||
candidate_ops=dict(
|
||||
candidates=dict(
|
||||
torch_conv2d_3x3=dict(
|
||||
type='torchConv2d',
|
||||
kernel_size=3,
|
||||
|
|
|
@ -15,7 +15,7 @@ class TestMutables(TestCase):
|
|||
norm_cfg = dict(type='BN', requires_grad=True)
|
||||
op_cfg = dict(
|
||||
type='OneShotMutableOP',
|
||||
candidate_ops=dict(
|
||||
candidates=dict(
|
||||
shuffle_3x3=dict(
|
||||
type='ShuffleBlock', norm_cfg=norm_cfg, kernel_size=3),
|
||||
shuffle_5x5=dict(
|
||||
|
@ -80,7 +80,7 @@ class TestMutables(TestCase):
|
|||
op_cfg = dict(
|
||||
type='OneShotProbMutableOP',
|
||||
choice_probs=[0.1, 0.2, 0.3, 0.4],
|
||||
candidate_ops=dict(
|
||||
candidates=dict(
|
||||
shuffle_3x3=dict(
|
||||
type='ShuffleBlock', norm_cfg=norm_cfg, kernel_size=3),
|
||||
shuffle_5x5=dict(
|
||||
|
@ -142,7 +142,7 @@ class TestMutables(TestCase):
|
|||
norm_cfg = dict(type='BN', requires_grad=True)
|
||||
op_cfg = dict(
|
||||
type='OneShotMutableOP',
|
||||
candidate_ops=dict(
|
||||
candidates=dict(
|
||||
shuffle_3x3=dict(
|
||||
type='ShuffleBlock', norm_cfg=norm_cfg, kernel_size=3),
|
||||
shuffle_5x5=dict(
|
||||
|
@ -165,7 +165,7 @@ class TestMutables(TestCase):
|
|||
norm_cfg = dict(type='BN', requires_grad=True)
|
||||
op_cfg = dict(
|
||||
type='OneShotMutableOP',
|
||||
candidate_ops=dict(
|
||||
candidates=dict(
|
||||
shuffle_3x3=dict(
|
||||
type='ShuffleBlock', norm_cfg=norm_cfg, kernel_size=3),
|
||||
shuffle_5x5=dict(
|
||||
|
@ -189,7 +189,7 @@ class TestMutables(TestCase):
|
|||
norm_cfg = dict(type='BN', requires_grad=True)
|
||||
op_cfg = dict(
|
||||
type='OneShotMutableOP',
|
||||
candidate_ops=dict(
|
||||
candidates=dict(
|
||||
shuffle_3x3=dict(
|
||||
type='ShuffleBlock',
|
||||
norm_cfg=norm_cfg,
|
||||
|
@ -221,9 +221,9 @@ class TestMutables(TestCase):
|
|||
output = op.forward_all(input)
|
||||
assert output is not None
|
||||
|
||||
def test_candidate_ops(self):
|
||||
def test_candidates(self):
|
||||
|
||||
candidate_ops = nn.ModuleDict({
|
||||
candidates = nn.ModuleDict({
|
||||
'conv3x3': nn.Conv2d(32, 32, 3, 1, 1),
|
||||
'conv5x5': nn.Conv2d(32, 32, 5, 1, 2),
|
||||
'conv7x7': nn.Conv2d(32, 32, 7, 1, 3),
|
||||
|
@ -231,7 +231,7 @@ class TestMutables(TestCase):
|
|||
'avgpool3x3': nn.AvgPool2d(3, 1, 1),
|
||||
})
|
||||
|
||||
op_cfg = dict(type='OneShotMutableOP', candidate_ops=candidate_ops)
|
||||
op_cfg = dict(type='OneShotMutableOP', candidates=candidates)
|
||||
|
||||
op = MODELS.build(op_cfg)
|
||||
|
||||
|
|
|
@ -72,12 +72,12 @@ class SearchableModelAlias(nn.Module):
|
|||
return self.slayer3(x)
|
||||
|
||||
|
||||
class TestDiffMutator(TestCase):
|
||||
class TestDiffModuleMutator(TestCase):
|
||||
|
||||
def setUp(self):
|
||||
self.MUTABLE_CFG = dict(
|
||||
type='DiffMutableOP',
|
||||
candidate_ops=dict(
|
||||
candidates=dict(
|
||||
torch_conv2d_3x3=dict(
|
||||
type='torchConv2d',
|
||||
kernel_size=3,
|
||||
|
|
|
@ -30,7 +30,7 @@ MUTATOR_CFG = dict(type='OneShotModuleMutator')
|
|||
|
||||
MUTABLE_CFG = dict(
|
||||
type='OneShotMutableOP',
|
||||
candidate_ops=dict(
|
||||
candidates=dict(
|
||||
choice1=dict(
|
||||
type='MBBlock',
|
||||
in_channels=3,
|
||||
|
|
|
@ -13,7 +13,7 @@ from mmrazor.registry import MODELS
|
|||
|
||||
_FIRST_STAGE_MUTABLE = dict(
|
||||
type='OneShotMutableOP',
|
||||
candidate_ops=dict(
|
||||
candidates=dict(
|
||||
mb_k3e1=dict(
|
||||
type='MBBlock',
|
||||
kernel_size=3,
|
||||
|
@ -23,7 +23,7 @@ _FIRST_STAGE_MUTABLE = dict(
|
|||
|
||||
_OTHER_STAGE_MUTABLE = dict(
|
||||
type='OneShotMutableOP',
|
||||
candidate_ops=dict(
|
||||
candidates=dict(
|
||||
mb_k3e3=dict(
|
||||
type='MBBlock',
|
||||
kernel_size=3,
|
||||
|
|
|
@ -3,6 +3,10 @@ import argparse
|
|||
import os
|
||||
import os.path as osp
|
||||
|
||||
from mmcls.core import * # noqa: F401,F403
|
||||
from mmcls.datasets import * # noqa: F401,F403
|
||||
from mmcls.metrics import * # noqa: F401,F403
|
||||
from mmcls.models import * # noqa: F401,F403
|
||||
# TODO import mmcls and mmseg
|
||||
from mmdet.core import * # noqa: F401,F403
|
||||
from mmdet.datasets import * # noqa: F401,F403
|
||||
|
|
|
@ -38,7 +38,6 @@ def parse_args():
|
|||
|
||||
def main():
|
||||
register_all_modules(False)
|
||||
|
||||
args = parse_args()
|
||||
|
||||
# load config
|
||||
|
|
Loading…
Reference in New Issue