Align SPOS and DetNAS to MMRazor2.0
parent
2d5e8bc675
commit
6c920c88ee
|
@ -0,0 +1,12 @@
|
||||||
|
#!/usr/bin/env sh
|
||||||
|
|
||||||
|
|
||||||
|
MKL_NUM_THREADS=4
|
||||||
|
OMP_NUM_THREADS=1
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# bash tools/slurm_train.sh mm_model detnas_train configs/nas/detnas/detnas_supernet_shufflenetv2_coco_1x_2.0_frcnn.py /mnt/lustre/dongpeijie/checkpoints/tests/detnas_pretrain_test
|
||||||
|
|
||||||
|
|
||||||
|
bash tools/slurm_test.sh mm_model angle_test configs/nas/spos/spos_subnet_mobilenet_proxyless_gpu_8xb128_in1k_2.0.py /mnt/lustre/dongpeijie/spos_angelnas_flops_0.49G_acc_75.98_20220307-54f4698f_2.0.pth
|
|
@ -0,0 +1,56 @@
|
||||||
|
#!/usr/bin/env sh
|
||||||
|
|
||||||
|
|
||||||
|
MKL_NUM_THREADS=4
|
||||||
|
OMP_NUM_THREADS=1
|
||||||
|
|
||||||
|
# train
|
||||||
|
# srun --partition=mm_model \
|
||||||
|
# --job-name=spos_train \
|
||||||
|
# --gres=gpu:8 \
|
||||||
|
# --ntasks=8 \
|
||||||
|
# --ntasks-per-node=8 \
|
||||||
|
# --cpus-per-task=8 \
|
||||||
|
# --kill-on-bad-exit=1 \
|
||||||
|
# python tools/train.py configs/nas/spos/spos_supernet_shufflenetv2_8xb128_in1k_2.0_example.py
|
||||||
|
|
||||||
|
# bash tools/slurm_train.sh mm_model spos_train configs/nas/spos/spos_supernet_shufflenetv2_8xb128_in1k_2.0_example.py ./work_dir/spos
|
||||||
|
|
||||||
|
# SPOS test
|
||||||
|
# srun --partition=mm_model \
|
||||||
|
# --job-name=spos_test \
|
||||||
|
# --gres=gpu:1 \
|
||||||
|
# --ntasks=1 \
|
||||||
|
# --ntasks-per-node=1 \
|
||||||
|
# --cpus-per-task=8 \
|
||||||
|
# --kill-on-bad-exit=1 \
|
||||||
|
# python tools/test.py configs/nas/spos/spos_subnet_shufflenetv2_8xb128_in1k_2.0_example.py "/mnt/lustre/dongpeijie/spos_shufflenetv2_subnet_8xb128_in1k_flops_0.33M_acc_73.87_20211222-1f0a0b4d_2.0.pth"
|
||||||
|
|
||||||
|
# DetNAS train
|
||||||
|
# srun --partition=mm_model \
|
||||||
|
# --job-name=detnas_train \
|
||||||
|
# --gres=gpu:8 \
|
||||||
|
# --ntasks=8 \
|
||||||
|
# --ntasks-per-node=8 \
|
||||||
|
# --cpus-per-task=8 \
|
||||||
|
# --kill-on-bad-exit=1 \
|
||||||
|
# python tools/train.py configs/nas/detnas/detnas_supernet_shufflenetv2_coco_1x_2.0_frcnn.py
|
||||||
|
|
||||||
|
# bash tools/slurm_train.sh mm_model detnas_train configs/nas/detnas/detnas_supernet_shufflenetv2_coco_1x_2.0_frcnn.py ./work_dir/detnas_pretrain
|
||||||
|
|
||||||
|
# DetNAS test
|
||||||
|
# srun --partition=mm_model \
|
||||||
|
# --job-name=detnas_test \
|
||||||
|
# --gres=gpu:1 \
|
||||||
|
# --ntasks=1 \
|
||||||
|
# --ntasks-per-node=1 \
|
||||||
|
# --cpus-per-task=8 \
|
||||||
|
# --kill-on-bad-exit=1 \
|
||||||
|
# python tools/test.py configs/nas/detnas/detnas_subnet_shufflenetv2_8xb128_in1k_2.0_frcnn.py "/mnt/lustre/dongpeijie/detnas_subnet_frcnn_shufflenetv2_fpn_1x_coco_bbox_backbone_flops-0.34M_mAP-37.5_20211222-67fea61f_2.0.pth"
|
||||||
|
|
||||||
|
|
||||||
|
# CREAM Test
|
||||||
|
# bash tools/slurm_test.sh mm_model cream_test configs/nas/cream/cream_14_subnet_mobilenet.py '/mnt/lustre/dongpeijie/14_2.0.pth'
|
||||||
|
|
||||||
|
# CREAM Train
|
||||||
|
bash tools/slurm_train.sh mm_model cream_train configs/nas/cream/cream_14_subnet_mobilenet.py
|
|
@ -0,0 +1,7 @@
|
||||||
|
#!/usr/bin/env sh
|
||||||
|
|
||||||
|
|
||||||
|
MKL_NUM_THREADS=4
|
||||||
|
OMP_NUM_THREADS=1
|
||||||
|
|
||||||
|
bash tools/slurm_test.sh mm_model spos_test configs/nas/darts/darts_subnet_1xb96_cifar10_2.0.py '/mnt/lustre/dongpeijie/darts_subnetnet_1xb96_cifar10_acc-97.32_20211222-e5727921_2.0.pth'
|
|
@ -0,0 +1,31 @@
|
||||||
|
#!/usr/bin/env sh
|
||||||
|
|
||||||
|
|
||||||
|
MKL_NUM_THREADS=4
|
||||||
|
OMP_NUM_THREADS=1
|
||||||
|
|
||||||
|
# DetNAS train
|
||||||
|
# srun --partition=mm_model \
|
||||||
|
# --job-name=detnas_train \
|
||||||
|
# --gres=gpu:8 \
|
||||||
|
# --ntasks=8 \
|
||||||
|
# --ntasks-per-node=8 \
|
||||||
|
# --cpus-per-task=8 \
|
||||||
|
# --kill-on-bad-exit=1 \
|
||||||
|
# python tools/train.py configs/nas/detnas/detnas_supernet_shufflenetv2_coco_1x_2.0_frcnn.py
|
||||||
|
|
||||||
|
# bash tools/slurm_train.sh mm_model detnas_train configs/nas/detnas/detnas_supernet_shufflenetv2_coco_1x_2.0_frcnn.py /mnt/lustre/dongpeijie/checkpoints/tests/detnas_pretrain_test
|
||||||
|
|
||||||
|
|
||||||
|
# bash tools/slurm_test.sh mm_model detnas_test configs/nas/detnas/detnas_supernet_shufflenetv2_coco_1x_2.0_frcnn.py /mnt/lustre/dongpeijie/detnas_subnet_frcnn_shufflenetv2_fpn_1x_coco_bbox_backbone_flops-0.34M_mAP-37.5_20211222-67fea61f_2.0.pth
|
||||||
|
|
||||||
|
# DetNAS test
|
||||||
|
srun --partition=mm_model \
|
||||||
|
--job-name=detnas_test \
|
||||||
|
--gres=gpu:1 \
|
||||||
|
--ntasks=1 \
|
||||||
|
--ntasks-per-node=1 \
|
||||||
|
--cpus-per-task=8 \
|
||||||
|
--kill-on-bad-exit=1 \
|
||||||
|
--quotatype=auto \
|
||||||
|
python tools/test.py configs/nas/detnas/detnas_subnet_shufflenetv2_8xb128_in1k_2.0_frcnn.py "/mnt/lustre/dongpeijie/detnas_subnet_frcnn_shufflenetv2_fpn_1x_coco_bbox_backbone_flops-0.34M_mAP-37.5_20211222-67fea61f_2.0.pth" --launcher=slurm
|
|
@ -0,0 +1,51 @@
|
||||||
|
#!/usr/bin/env sh
|
||||||
|
|
||||||
|
|
||||||
|
MKL_NUM_THREADS=4
|
||||||
|
OMP_NUM_THREADS=1
|
||||||
|
|
||||||
|
# train
|
||||||
|
# srun --partition=mm_model \
|
||||||
|
# --job-name=spos_train \
|
||||||
|
# --gres=gpu:8 \
|
||||||
|
# --ntasks=8 \
|
||||||
|
# --ntasks-per-node=8 \
|
||||||
|
# --cpus-per-task=8 \
|
||||||
|
# --kill-on-bad-exit=1 \
|
||||||
|
# python tools/train.py configs/nas/spos/spos_supernet_shufflenetv2_8xb128_in1k_2.0_example.py
|
||||||
|
|
||||||
|
# bash tools/slurm_train.sh mm_model spos_train configs/nas/spos/spos_supernet_shufflenetv2_8xb128_in1k_2.0_example.py /mnt/lustre/dongpeijie/checkpoints/work_dirs/spos_format_output
|
||||||
|
|
||||||
|
# bash tools/slurm_train.sh mm_model spos_retrain configs/nas/spos/spos_subnet_shufflenetv2_8xb128_in1k_2.0_example.py /mnt/lustre/dongpeijie/checkpoints/work_dirs/spos_retrain_detnas_with_ceph
|
||||||
|
|
||||||
|
# 55% wrong settings of PolyLR
|
||||||
|
# bash tools/slurm_train.sh mm_model spos_retrain_w_cj configs/nas/spos/spos_subnet_shufflenetv2_8xb128_in1k_2.0_example.py /mnt/lustre/dongpeijie/checkpoints/work_dirs/spos_retrain_detnas_with_ceph
|
||||||
|
|
||||||
|
# fix setting of PolyLR and rerun with colorjittor
|
||||||
|
# bash tools/slurm_train.sh mm_model spos_retrain_w_cj configs/nas/spos/spos_subnet_shufflenetv2_8xb128_in1k_2.0_example.py /mnt/lustre/dongpeijie/checkpoints/work_dirs/retrain_detnas_spos_with_colorjittor
|
||||||
|
|
||||||
|
# fix setting of PolyLR and rerun w/o colorjittor
|
||||||
|
# bash tools/slurm_train.sh mm_model spos_retrain_wo_cj configs/nas/spos/spos_subnet_shufflenetv2_8xb128_in1k_2.0_example_wo_colorjittor.py /mnt/lustre/dongpeijie/checkpoints/work_dirs/retrain_detnas_spos_wo_colorjittor
|
||||||
|
|
||||||
|
# fix setting of optimizer decay[wo cj] (paramwise_cfg)
|
||||||
|
# bash tools/slurm_train.sh mm_model spos_retrain_fix_decay_wo_cj configs/nas/spos/spos_subnet_shufflenetv2_8xb128_in1k_2.0_example_wo_colorjittor.py /mnt/lustre/dongpeijie/checkpoints/work_dirs/retrain_detnas_spos_retrain_fix_decay_wo_cj
|
||||||
|
|
||||||
|
# fix setting of optimizer decay[with cj] (paramwise_cfg)
|
||||||
|
# bash tools/slurm_train.sh mm_model spos_retrain_fix_decay_w_cj configs/nas/spos/spos_subnet_shufflenetv2_8xb128_in1k_2.0_example.py /mnt/lustre/dongpeijie/checkpoints/work_dirs/retrain_detnas_spos_retrain_fix_decay_w_cj
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# SPOS test
|
||||||
|
# srun --partition=mm_model \
|
||||||
|
# --job-name=spos_test \
|
||||||
|
# --gres=gpu:1 \
|
||||||
|
# --ntasks=1 \
|
||||||
|
# --ntasks-per-node=1 \
|
||||||
|
# --cpus-per-task=8 \
|
||||||
|
# --kill-on-bad-exit=1 \
|
||||||
|
# python tools/test.py configs/nas/spos/spos_subnet_shufflenetv2_8xb128_in1k_2.0_example.py "/mnt/lustre/dongpeijie/spos_shufflenetv2_subnet_8xb128_in1k_flops_0.33M_acc_73.87_20211222-1f0a0b4d_2.0.pth"
|
||||||
|
|
||||||
|
|
||||||
|
bash tools/slurm_test.sh mm_model spos_test configs/nas/spos/spos_subnet_shufflenetv2_8xb128_in1k_2.0_example.py '/mnt/lustre/dongpeijie/detnas_subnet_shufflenetv2_8xb128_in1k_acc-74.08_20211223-92e9b66a_2.0.pth'
|
||||||
|
|
||||||
|
# bash tools/slurm_train.sh mm_model spos_retrain configs/nas/spos/spos_subnet_shufflenetv2_8xb128_in1k_2.0_example.py /mnt/lustre/dongpeijie/checkpoints/work_dirs/spos_retrain_detnas_spos
|
|
@ -0,0 +1,76 @@
|
||||||
|
se_cfg = dict(
|
||||||
|
ratio=4,
|
||||||
|
divisor=1,
|
||||||
|
act_cfg=(dict(type='HSwish'),
|
||||||
|
dict(
|
||||||
|
type='HSigmoid', bias=3, divisor=6, min_value=0,
|
||||||
|
max_value=1)))
|
||||||
|
|
||||||
|
_FIRST_STAGE_MUTABLE = dict(
|
||||||
|
type='OneShotMutableOP',
|
||||||
|
candidates=dict(
|
||||||
|
mb_k3e4_se=dict(
|
||||||
|
type='MBBlock',
|
||||||
|
kernel_size=3,
|
||||||
|
expand_ratio=4,
|
||||||
|
se_cfg=se_cfg,
|
||||||
|
norm_cfg=dict(type='BN'),
|
||||||
|
act_cfg=dict(type='HSwish'))))
|
||||||
|
|
||||||
|
_OTHER_STAGE_MUTABLE = dict(
|
||||||
|
type='OneShotMutableOP',
|
||||||
|
candidates=dict(
|
||||||
|
mb_k3e4_se=dict(
|
||||||
|
type='MBBlock',
|
||||||
|
kernel_size=3,
|
||||||
|
expand_ratio=4,
|
||||||
|
se_cfg=se_cfg,
|
||||||
|
norm_cfg=dict(type='BN'),
|
||||||
|
act_cfg=dict(type='HSwish')),
|
||||||
|
mb_k3e6_se=dict(
|
||||||
|
type='MBBlock',
|
||||||
|
kernel_size=3,
|
||||||
|
expand_ratio=6,
|
||||||
|
se_cfg=se_cfg,
|
||||||
|
norm_cfg=dict(type='BN'),
|
||||||
|
act_cfg=dict(type='HSwish')),
|
||||||
|
mb_k5e4_se=dict(
|
||||||
|
type='MBBlock',
|
||||||
|
kernel_size=5,
|
||||||
|
expand_ratio=4,
|
||||||
|
se_cfg=se_cfg,
|
||||||
|
norm_cfg=dict(type='BN'),
|
||||||
|
act_cfg=dict(type='HSwish')),
|
||||||
|
mb_k5e6_se=dict(
|
||||||
|
type='MBBlock',
|
||||||
|
kernel_size=5,
|
||||||
|
expand_ratio=6,
|
||||||
|
se_cfg=se_cfg,
|
||||||
|
norm_cfg=dict(type='BN'),
|
||||||
|
act_cfg=dict(type='HSwish')),
|
||||||
|
mb_k7e4_se=dict(
|
||||||
|
type='MBBlock',
|
||||||
|
kernel_size=7,
|
||||||
|
expand_ratio=4,
|
||||||
|
se_cfg=se_cfg,
|
||||||
|
norm_cfg=dict(type='BN'),
|
||||||
|
act_cfg=dict(type='HSwish')),
|
||||||
|
mb_k7e6_se=dict(
|
||||||
|
type='MBBlock',
|
||||||
|
kernel_size=7,
|
||||||
|
expand_ratio=6,
|
||||||
|
se_cfg=se_cfg,
|
||||||
|
norm_cfg=dict(type='BN'),
|
||||||
|
act_cfg=dict(type='HSwish'))))
|
||||||
|
|
||||||
|
arch_setting = [
|
||||||
|
# Parameters to build layers. 4 parameters are needed to construct a
|
||||||
|
# layer, from left to right: channel, num_blocks, stride, mutable cfg.
|
||||||
|
[16, 1, 1, _FIRST_STAGE_MUTABLE],
|
||||||
|
[24, 1, 2, _OTHER_STAGE_MUTABLE],
|
||||||
|
[40, 2, 2, _OTHER_STAGE_MUTABLE],
|
||||||
|
[80, 2, 2, _OTHER_STAGE_MUTABLE],
|
||||||
|
[96, 3, 1, _OTHER_STAGE_MUTABLE],
|
||||||
|
[192, 2, 2, _OTHER_STAGE_MUTABLE],
|
||||||
|
[320, 1, 1, _OTHER_STAGE_MUTABLE]
|
||||||
|
]
|
|
@ -0,0 +1,76 @@
|
||||||
|
se_cfg = dict(
|
||||||
|
ratio=4,
|
||||||
|
divisor=1,
|
||||||
|
act_cfg=(dict(type='HSwish'),
|
||||||
|
dict(
|
||||||
|
type='HSigmoid', bias=3, divisor=6, min_value=0,
|
||||||
|
max_value=1)))
|
||||||
|
|
||||||
|
_FIRST_STAGE_MUTABLE = dict(
|
||||||
|
type='OneShotMutableOP',
|
||||||
|
candidates=dict(
|
||||||
|
mb_k3e4_se=dict(
|
||||||
|
type='MBBlock',
|
||||||
|
kernel_size=3,
|
||||||
|
expand_ratio=4,
|
||||||
|
se_cfg=se_cfg,
|
||||||
|
norm_cfg=dict(type='BN'),
|
||||||
|
act_cfg=dict(type='HSwish'))))
|
||||||
|
|
||||||
|
_OTHER_STAGE_MUTABLE = dict(
|
||||||
|
type='OneShotMutableOP',
|
||||||
|
candidates=dict(
|
||||||
|
mb_k3e4_se=dict(
|
||||||
|
type='MBBlock',
|
||||||
|
kernel_size=3,
|
||||||
|
expand_ratio=4,
|
||||||
|
se_cfg=se_cfg,
|
||||||
|
norm_cfg=dict(type='BN'),
|
||||||
|
act_cfg=dict(type='HSwish')),
|
||||||
|
mb_k3e6_se=dict(
|
||||||
|
type='MBBlock',
|
||||||
|
kernel_size=3,
|
||||||
|
expand_ratio=6,
|
||||||
|
se_cfg=se_cfg,
|
||||||
|
norm_cfg=dict(type='BN'),
|
||||||
|
act_cfg=dict(type='HSwish')),
|
||||||
|
mb_k5e4_se=dict(
|
||||||
|
type='MBBlock',
|
||||||
|
kernel_size=5,
|
||||||
|
expand_ratio=4,
|
||||||
|
se_cfg=se_cfg,
|
||||||
|
norm_cfg=dict(type='BN'),
|
||||||
|
act_cfg=dict(type='HSwish')),
|
||||||
|
mb_k5e6_se=dict(
|
||||||
|
type='MBBlock',
|
||||||
|
kernel_size=5,
|
||||||
|
expand_ratio=6,
|
||||||
|
se_cfg=se_cfg,
|
||||||
|
norm_cfg=dict(type='BN'),
|
||||||
|
act_cfg=dict(type='HSwish')),
|
||||||
|
mb_k7e4_se=dict(
|
||||||
|
type='MBBlock',
|
||||||
|
kernel_size=7,
|
||||||
|
expand_ratio=4,
|
||||||
|
se_cfg=se_cfg,
|
||||||
|
norm_cfg=dict(type='BN'),
|
||||||
|
act_cfg=dict(type='HSwish')),
|
||||||
|
mb_k7e6_se=dict(
|
||||||
|
type='MBBlock',
|
||||||
|
kernel_size=7,
|
||||||
|
expand_ratio=6,
|
||||||
|
se_cfg=se_cfg,
|
||||||
|
norm_cfg=dict(type='BN'),
|
||||||
|
act_cfg=dict(type='HSwish'))))
|
||||||
|
|
||||||
|
arch_setting = [
|
||||||
|
# Parameters to build layers. 4 parameters are needed to construct a
|
||||||
|
# layer, from left to right: channel, num_blocks, stride, mutable cfg.
|
||||||
|
[16, 1, 1, _FIRST_STAGE_MUTABLE],
|
||||||
|
[24, 1, 2, _OTHER_STAGE_MUTABLE],
|
||||||
|
[40, 2, 2, _OTHER_STAGE_MUTABLE],
|
||||||
|
[80, 2, 2, _OTHER_STAGE_MUTABLE],
|
||||||
|
[96, 1, 1, _OTHER_STAGE_MUTABLE],
|
||||||
|
[192, 1, 2, _OTHER_STAGE_MUTABLE],
|
||||||
|
[320, 1, 1, _OTHER_STAGE_MUTABLE]
|
||||||
|
]
|
|
@ -0,0 +1,76 @@
|
||||||
|
se_cfg = dict(
|
||||||
|
ratio=4,
|
||||||
|
divisor=1,
|
||||||
|
act_cfg=(dict(type='HSwish'),
|
||||||
|
dict(
|
||||||
|
type='HSigmoid', bias=3, divisor=6, min_value=0,
|
||||||
|
max_value=1)))
|
||||||
|
|
||||||
|
_FIRST_STAGE_MUTABLE = dict(
|
||||||
|
type='OneShotMutableOP',
|
||||||
|
candidates=dict(
|
||||||
|
mb_k3e4_se=dict(
|
||||||
|
type='MBBlock',
|
||||||
|
kernel_size=3,
|
||||||
|
expand_ratio=4,
|
||||||
|
se_cfg=se_cfg,
|
||||||
|
norm_cfg=dict(type='BN'),
|
||||||
|
act_cfg=dict(type='HSwish'))))
|
||||||
|
|
||||||
|
_OTHER_STAGE_MUTABLE = dict(
|
||||||
|
type='OneShotMutableOP',
|
||||||
|
candidates=dict(
|
||||||
|
mb_k3e4_se=dict(
|
||||||
|
type='MBBlock',
|
||||||
|
kernel_size=3,
|
||||||
|
expand_ratio=4,
|
||||||
|
se_cfg=se_cfg,
|
||||||
|
norm_cfg=dict(type='BN'),
|
||||||
|
act_cfg=dict(type='HSwish')),
|
||||||
|
mb_k3e6_se=dict(
|
||||||
|
type='MBBlock',
|
||||||
|
kernel_size=3,
|
||||||
|
expand_ratio=6,
|
||||||
|
se_cfg=se_cfg,
|
||||||
|
norm_cfg=dict(type='BN'),
|
||||||
|
act_cfg=dict(type='HSwish')),
|
||||||
|
mb_k5e4_se=dict(
|
||||||
|
type='MBBlock',
|
||||||
|
kernel_size=5,
|
||||||
|
expand_ratio=4,
|
||||||
|
se_cfg=se_cfg,
|
||||||
|
norm_cfg=dict(type='BN'),
|
||||||
|
act_cfg=dict(type='HSwish')),
|
||||||
|
mb_k5e6_se=dict(
|
||||||
|
type='MBBlock',
|
||||||
|
kernel_size=5,
|
||||||
|
expand_ratio=6,
|
||||||
|
se_cfg=se_cfg,
|
||||||
|
norm_cfg=dict(type='BN'),
|
||||||
|
act_cfg=dict(type='HSwish')),
|
||||||
|
mb_k7e4_se=dict(
|
||||||
|
type='MBBlock',
|
||||||
|
kernel_size=7,
|
||||||
|
expand_ratio=4,
|
||||||
|
se_cfg=se_cfg,
|
||||||
|
norm_cfg=dict(type='BN'),
|
||||||
|
act_cfg=dict(type='HSwish')),
|
||||||
|
mb_k7e6_se=dict(
|
||||||
|
type='MBBlock',
|
||||||
|
kernel_size=7,
|
||||||
|
expand_ratio=6,
|
||||||
|
se_cfg=se_cfg,
|
||||||
|
norm_cfg=dict(type='BN'),
|
||||||
|
act_cfg=dict(type='HSwish'))))
|
||||||
|
|
||||||
|
arch_setting = [
|
||||||
|
# Parameters to build layers. 4 parameters are needed to construct a
|
||||||
|
# layer, from left to right: channel, num_blocks, stride, mutable cfg.
|
||||||
|
[16, 1, 1, _FIRST_STAGE_MUTABLE],
|
||||||
|
[24, 1, 2, _OTHER_STAGE_MUTABLE],
|
||||||
|
[40, 2, 2, _OTHER_STAGE_MUTABLE],
|
||||||
|
[80, 3, 2, _OTHER_STAGE_MUTABLE],
|
||||||
|
[96, 4, 1, _OTHER_STAGE_MUTABLE],
|
||||||
|
[192, 3, 2, _OTHER_STAGE_MUTABLE],
|
||||||
|
[320, 1, 1, _OTHER_STAGE_MUTABLE]
|
||||||
|
]
|
|
@ -0,0 +1,76 @@
|
||||||
|
se_cfg = dict(
|
||||||
|
ratio=4,
|
||||||
|
divisor=1,
|
||||||
|
act_cfg=(dict(type='HSwish'),
|
||||||
|
dict(
|
||||||
|
type='HSigmoid', bias=3, divisor=6, min_value=0,
|
||||||
|
max_value=1)))
|
||||||
|
|
||||||
|
_FIRST_STAGE_MUTABLE = dict(
|
||||||
|
type='OneShotMutableOP',
|
||||||
|
candidates=dict(
|
||||||
|
mb_k3e4_se=dict(
|
||||||
|
type='MBBlock',
|
||||||
|
kernel_size=3,
|
||||||
|
expand_ratio=4,
|
||||||
|
se_cfg=se_cfg,
|
||||||
|
norm_cfg=dict(type='BN'),
|
||||||
|
act_cfg=dict(type='HSwish'))))
|
||||||
|
|
||||||
|
_OTHER_STAGE_MUTABLE = dict(
|
||||||
|
type='OneShotMutableOP',
|
||||||
|
candidates=dict(
|
||||||
|
mb_k3e4_se=dict(
|
||||||
|
type='MBBlock',
|
||||||
|
kernel_size=3,
|
||||||
|
expand_ratio=4,
|
||||||
|
se_cfg=se_cfg,
|
||||||
|
norm_cfg=dict(type='BN'),
|
||||||
|
act_cfg=dict(type='HSwish')),
|
||||||
|
mb_k3e6_se=dict(
|
||||||
|
type='MBBlock',
|
||||||
|
kernel_size=3,
|
||||||
|
expand_ratio=6,
|
||||||
|
se_cfg=se_cfg,
|
||||||
|
norm_cfg=dict(type='BN'),
|
||||||
|
act_cfg=dict(type='HSwish')),
|
||||||
|
mb_k5e4_se=dict(
|
||||||
|
type='MBBlock',
|
||||||
|
kernel_size=5,
|
||||||
|
expand_ratio=4,
|
||||||
|
se_cfg=se_cfg,
|
||||||
|
norm_cfg=dict(type='BN'),
|
||||||
|
act_cfg=dict(type='HSwish')),
|
||||||
|
mb_k5e6_se=dict(
|
||||||
|
type='MBBlock',
|
||||||
|
kernel_size=5,
|
||||||
|
expand_ratio=6,
|
||||||
|
se_cfg=se_cfg,
|
||||||
|
norm_cfg=dict(type='BN'),
|
||||||
|
act_cfg=dict(type='HSwish')),
|
||||||
|
mb_k7e4_se=dict(
|
||||||
|
type='MBBlock',
|
||||||
|
kernel_size=7,
|
||||||
|
expand_ratio=4,
|
||||||
|
se_cfg=se_cfg,
|
||||||
|
norm_cfg=dict(type='BN'),
|
||||||
|
act_cfg=dict(type='HSwish')),
|
||||||
|
mb_k7e6_se=dict(
|
||||||
|
type='MBBlock',
|
||||||
|
kernel_size=7,
|
||||||
|
expand_ratio=6,
|
||||||
|
se_cfg=se_cfg,
|
||||||
|
norm_cfg=dict(type='BN'),
|
||||||
|
act_cfg=dict(type='HSwish'))))
|
||||||
|
|
||||||
|
arch_setting = [
|
||||||
|
# Parameters to build layers. 4 parameters are needed to construct a
|
||||||
|
# layer, from left to right: channel, num_blocks, stride, mutable cfg.
|
||||||
|
[16, 1, 1, _FIRST_STAGE_MUTABLE],
|
||||||
|
[24, 1, 2, _OTHER_STAGE_MUTABLE],
|
||||||
|
[40, 2, 2, _OTHER_STAGE_MUTABLE],
|
||||||
|
[80, 2, 2, _OTHER_STAGE_MUTABLE],
|
||||||
|
[96, 3, 1, _OTHER_STAGE_MUTABLE],
|
||||||
|
[192, 2, 2, _OTHER_STAGE_MUTABLE],
|
||||||
|
[320, 1, 1, _OTHER_STAGE_MUTABLE]
|
||||||
|
]
|
|
@ -0,0 +1,76 @@
|
||||||
|
se_cfg = dict(
|
||||||
|
ratio=4,
|
||||||
|
divisor=1,
|
||||||
|
act_cfg=(dict(type='HSwish'),
|
||||||
|
dict(
|
||||||
|
type='HSigmoid', bias=3, divisor=6, min_value=0,
|
||||||
|
max_value=1)))
|
||||||
|
|
||||||
|
_FIRST_STAGE_MUTABLE = dict(
|
||||||
|
type='OneShotMutableOP',
|
||||||
|
candidates=dict(
|
||||||
|
mb_k3e4_se=dict(
|
||||||
|
type='MBBlock',
|
||||||
|
kernel_size=3,
|
||||||
|
expand_ratio=4,
|
||||||
|
se_cfg=se_cfg,
|
||||||
|
norm_cfg=dict(type='BN'),
|
||||||
|
act_cfg=dict(type='HSwish'))))
|
||||||
|
|
||||||
|
_OTHER_STAGE_MUTABLE = dict(
|
||||||
|
type='OneShotMutableOP',
|
||||||
|
candidates=dict(
|
||||||
|
mb_k3e4_se=dict(
|
||||||
|
type='MBBlock',
|
||||||
|
kernel_size=3,
|
||||||
|
expand_ratio=4,
|
||||||
|
se_cfg=se_cfg,
|
||||||
|
norm_cfg=dict(type='BN'),
|
||||||
|
act_cfg=dict(type='HSwish')),
|
||||||
|
mb_k3e6_se=dict(
|
||||||
|
type='MBBlock',
|
||||||
|
kernel_size=3,
|
||||||
|
expand_ratio=6,
|
||||||
|
se_cfg=se_cfg,
|
||||||
|
norm_cfg=dict(type='BN'),
|
||||||
|
act_cfg=dict(type='HSwish')),
|
||||||
|
mb_k5e4_se=dict(
|
||||||
|
type='MBBlock',
|
||||||
|
kernel_size=5,
|
||||||
|
expand_ratio=4,
|
||||||
|
se_cfg=se_cfg,
|
||||||
|
norm_cfg=dict(type='BN'),
|
||||||
|
act_cfg=dict(type='HSwish')),
|
||||||
|
mb_k5e6_se=dict(
|
||||||
|
type='MBBlock',
|
||||||
|
kernel_size=5,
|
||||||
|
expand_ratio=6,
|
||||||
|
se_cfg=se_cfg,
|
||||||
|
norm_cfg=dict(type='BN'),
|
||||||
|
act_cfg=dict(type='HSwish')),
|
||||||
|
mb_k7e4_se=dict(
|
||||||
|
type='MBBlock',
|
||||||
|
kernel_size=7,
|
||||||
|
expand_ratio=4,
|
||||||
|
se_cfg=se_cfg,
|
||||||
|
norm_cfg=dict(type='BN'),
|
||||||
|
act_cfg=dict(type='HSwish')),
|
||||||
|
mb_k7e6_se=dict(
|
||||||
|
type='MBBlock',
|
||||||
|
kernel_size=7,
|
||||||
|
expand_ratio=6,
|
||||||
|
se_cfg=se_cfg,
|
||||||
|
norm_cfg=dict(type='BN'),
|
||||||
|
act_cfg=dict(type='HSwish'))))
|
||||||
|
|
||||||
|
arch_setting = [
|
||||||
|
# Parameters to build layers. 4 parameters are needed to construct a
|
||||||
|
# layer, from left to right: channel, num_blocks, stride, mutable cfg.
|
||||||
|
[16, 1, 1, _FIRST_STAGE_MUTABLE],
|
||||||
|
[24, 4, 2, _OTHER_STAGE_MUTABLE],
|
||||||
|
[40, 4, 2, _OTHER_STAGE_MUTABLE],
|
||||||
|
[80, 5, 2, _OTHER_STAGE_MUTABLE],
|
||||||
|
[96, 4, 1, _OTHER_STAGE_MUTABLE],
|
||||||
|
[192, 4, 2, _OTHER_STAGE_MUTABLE],
|
||||||
|
[320, 1, 1, _OTHER_STAGE_MUTABLE]
|
||||||
|
]
|
|
@ -0,0 +1,76 @@
|
||||||
|
se_cfg = dict(
|
||||||
|
ratio=4,
|
||||||
|
divisor=1,
|
||||||
|
act_cfg=(dict(type='HSwish'),
|
||||||
|
dict(
|
||||||
|
type='HSigmoid', bias=3, divisor=6, min_value=0,
|
||||||
|
max_value=1)))
|
||||||
|
|
||||||
|
_FIRST_STAGE_MUTABLE = dict(
|
||||||
|
type='OneShotMutableOP',
|
||||||
|
candidates=dict(
|
||||||
|
mb_k3e4_se=dict(
|
||||||
|
type='MBBlock',
|
||||||
|
kernel_size=3,
|
||||||
|
expand_ratio=4,
|
||||||
|
se_cfg=se_cfg,
|
||||||
|
norm_cfg=dict(type='BN'),
|
||||||
|
act_cfg=dict(type='HSwish'))))
|
||||||
|
|
||||||
|
_OTHER_STAGE_MUTABLE = dict(
|
||||||
|
type='OneShotMutableOP',
|
||||||
|
candidates=dict(
|
||||||
|
mb_k3e4_se=dict(
|
||||||
|
type='MBBlock',
|
||||||
|
kernel_size=3,
|
||||||
|
expand_ratio=4,
|
||||||
|
se_cfg=se_cfg,
|
||||||
|
norm_cfg=dict(type='BN'),
|
||||||
|
act_cfg=dict(type='HSwish')),
|
||||||
|
mb_k3e6_se=dict(
|
||||||
|
type='MBBlock',
|
||||||
|
kernel_size=3,
|
||||||
|
expand_ratio=6,
|
||||||
|
se_cfg=se_cfg,
|
||||||
|
norm_cfg=dict(type='BN'),
|
||||||
|
act_cfg=dict(type='HSwish')),
|
||||||
|
mb_k5e4_se=dict(
|
||||||
|
type='MBBlock',
|
||||||
|
kernel_size=5,
|
||||||
|
expand_ratio=4,
|
||||||
|
se_cfg=se_cfg,
|
||||||
|
norm_cfg=dict(type='BN'),
|
||||||
|
act_cfg=dict(type='HSwish')),
|
||||||
|
mb_k5e6_se=dict(
|
||||||
|
type='MBBlock',
|
||||||
|
kernel_size=5,
|
||||||
|
expand_ratio=6,
|
||||||
|
se_cfg=se_cfg,
|
||||||
|
norm_cfg=dict(type='BN'),
|
||||||
|
act_cfg=dict(type='HSwish')),
|
||||||
|
mb_k7e4_se=dict(
|
||||||
|
type='MBBlock',
|
||||||
|
kernel_size=7,
|
||||||
|
expand_ratio=4,
|
||||||
|
se_cfg=se_cfg,
|
||||||
|
norm_cfg=dict(type='BN'),
|
||||||
|
act_cfg=dict(type='HSwish')),
|
||||||
|
mb_k7e6_se=dict(
|
||||||
|
type='MBBlock',
|
||||||
|
kernel_size=7,
|
||||||
|
expand_ratio=6,
|
||||||
|
se_cfg=se_cfg,
|
||||||
|
norm_cfg=dict(type='BN'),
|
||||||
|
act_cfg=dict(type='HSwish'))))
|
||||||
|
|
||||||
|
arch_setting = [
|
||||||
|
# Parameters to build layers. 4 parameters are needed to construct a
|
||||||
|
# layer, from left to right: channel, num_blocks, stride, mutable cfg.
|
||||||
|
[16, 1, 1, _FIRST_STAGE_MUTABLE],
|
||||||
|
[24, 5, 2, _OTHER_STAGE_MUTABLE],
|
||||||
|
[40, 5, 2, _OTHER_STAGE_MUTABLE],
|
||||||
|
[80, 5, 2, _OTHER_STAGE_MUTABLE],
|
||||||
|
[96, 6, 1, _OTHER_STAGE_MUTABLE],
|
||||||
|
[192, 6, 2, _OTHER_STAGE_MUTABLE],
|
||||||
|
[320, 1, 1, _OTHER_STAGE_MUTABLE]
|
||||||
|
]
|
|
@ -0,0 +1,11 @@
|
||||||
|
modules:
|
||||||
|
backbone.layer1.0: depthsepconv
|
||||||
|
backbone.layer2.0: mb_k3e4_se
|
||||||
|
backbone.layer3.0: mb_k5e6_se
|
||||||
|
backbone.layer3.1: mb_k5e6_se
|
||||||
|
backbone.layer4.0: mb_k5e6_se
|
||||||
|
backbone.layer4.1: mb_k5e6_se
|
||||||
|
backbone.layer5.0: mb_k3e6_se
|
||||||
|
backbone.layer6.0: mb_k5e6_se
|
||||||
|
backbone.layer7.0: convbnact
|
||||||
|
channels:
|
|
@ -0,0 +1,8 @@
|
||||||
|
_base_ = ['./cream_14_supernet_mobilenet.py']
|
||||||
|
|
||||||
|
# FIXME: you may replace this with the mutable_cfg searched by yourself
|
||||||
|
fix_subnet = 'configs/nas/cream/CREAM_14_MOBILENET_IN1k_2.0.yaml' # noqa: E501
|
||||||
|
|
||||||
|
model = dict(fix_subnet=fix_subnet)
|
||||||
|
|
||||||
|
find_unused_parameters = False
|
|
@ -0,0 +1,241 @@
|
||||||
|
# dataset settings
|
||||||
|
dataset_type = 'ImageNet'
|
||||||
|
|
||||||
|
preprocess_cfg = dict(
|
||||||
|
# RGB format normalization parameters
|
||||||
|
mean=[123.675, 116.28, 103.53],
|
||||||
|
std=[58.395, 57.12, 57.375],
|
||||||
|
# convert image from BGR to RGB
|
||||||
|
to_rgb=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
# file_client_args = dict(
|
||||||
|
# backend='petrel',
|
||||||
|
# path_mapping=dict({
|
||||||
|
# './data/imagenet': 's3://openmmlab/datasets/classification/imagenet',
|
||||||
|
# 'data/imagenet': 's3://openmmlab/datasets/classification/imagenet'
|
||||||
|
# }))
|
||||||
|
|
||||||
|
train_pipeline = [
|
||||||
|
dict(type='LoadImageFromFile'),
|
||||||
|
dict(type='RandomResizedCrop', scale=224),
|
||||||
|
dict(type='ColorJitter', brightness=0.4, contrast=0.4, saturation=0.4),
|
||||||
|
dict(type='RandomFlip', prob=0.5, direction='horizontal'),
|
||||||
|
dict(type='PackClsInputs'),
|
||||||
|
]
|
||||||
|
|
||||||
|
test_pipeline = [
|
||||||
|
dict(type='LoadImageFromFile'),
|
||||||
|
dict(
|
||||||
|
type='ResizeEdge',
|
||||||
|
scale=73,
|
||||||
|
edge='short',
|
||||||
|
backend='pillow',
|
||||||
|
interpolation='bicubic'),
|
||||||
|
dict(type='CenterCrop', crop_size=64),
|
||||||
|
dict(type='PackClsInputs'),
|
||||||
|
]
|
||||||
|
|
||||||
|
train_dataloader = dict(
|
||||||
|
batch_size=128,
|
||||||
|
num_workers=5,
|
||||||
|
dataset=dict(
|
||||||
|
type=dataset_type,
|
||||||
|
data_root='/mnt/cache/share/images',
|
||||||
|
ann_file='meta/train.txt',
|
||||||
|
data_prefix='train',
|
||||||
|
pipeline=train_pipeline),
|
||||||
|
sampler=dict(type='DefaultSampler', shuffle=True),
|
||||||
|
persistent_workers=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
# /mnt/lustre/share_data/wangjiaqi/data/imagenet',
|
||||||
|
|
||||||
|
val_dataloader = dict(
|
||||||
|
batch_size=128,
|
||||||
|
num_workers=5,
|
||||||
|
dataset=dict(
|
||||||
|
type=dataset_type,
|
||||||
|
data_root='/mnt/cache/share/images',
|
||||||
|
ann_file='meta/val.txt',
|
||||||
|
data_prefix='val',
|
||||||
|
pipeline=test_pipeline),
|
||||||
|
sampler=dict(type='DefaultSampler', shuffle=False),
|
||||||
|
persistent_workers=True,
|
||||||
|
)
|
||||||
|
val_evaluator = dict(type='Accuracy', topk=(1, 5))
|
||||||
|
|
||||||
|
# If you want standard test, please manually configure the test dataset
|
||||||
|
test_dataloader = val_dataloader
|
||||||
|
test_evaluator = val_evaluator
|
||||||
|
|
||||||
|
# scheduler
|
||||||
|
|
||||||
|
# optimizer
|
||||||
|
optim_wrapper = dict(
|
||||||
|
optimizer=dict(type='SGD', lr=0.5, momentum=0.9, weight_decay=4e-5),
|
||||||
|
clip_grad=None)
|
||||||
|
|
||||||
|
# leanring policy
|
||||||
|
param_scheduler = [
|
||||||
|
dict(type='PolyLR', power=1.0, eta_min=0.0, by_epoch=False),
|
||||||
|
]
|
||||||
|
|
||||||
|
# train, val, test setting
|
||||||
|
train_cfg = dict(by_epoch=False, max_iters=300000)
|
||||||
|
val_cfg = dict()
|
||||||
|
test_cfg = dict()
|
||||||
|
|
||||||
|
# runtime
|
||||||
|
|
||||||
|
# defaults to use registries in mmrazor
|
||||||
|
default_scope = 'mmcls'
|
||||||
|
|
||||||
|
# configure default hooks
|
||||||
|
default_hooks = dict(
|
||||||
|
timer=dict(type='IterTimerHook'),
|
||||||
|
logger=dict(type='LoggerHook', interval=100),
|
||||||
|
param_scheduler=dict(type='ParamSchedulerHook'),
|
||||||
|
checkpoint=dict(type='CheckpointHook', by_epoch=False, interval=1000),
|
||||||
|
sampler_seed=dict(type='DistSamplerSeedHook'),
|
||||||
|
visualization=dict(type='VisualizationHook', enable=False),
|
||||||
|
)
|
||||||
|
|
||||||
|
# configure environment
|
||||||
|
env_cfg = dict(
|
||||||
|
cudnn_benchmark=False,
|
||||||
|
mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0),
|
||||||
|
dist_cfg=dict(backend='nccl'),
|
||||||
|
)
|
||||||
|
|
||||||
|
# set visualizer
|
||||||
|
vis_backends = [dict(type='LocalVisBackend')]
|
||||||
|
visualizer = dict(
|
||||||
|
type='ClsVisualizer', vis_backends=vis_backends, name='visualizer')
|
||||||
|
|
||||||
|
# set log level
|
||||||
|
log_level = 'INFO'
|
||||||
|
|
||||||
|
# load from which checkpoint
|
||||||
|
load_from = None
|
||||||
|
|
||||||
|
# whether to resume training from the loaded checkpoint
|
||||||
|
resume = False
|
||||||
|
|
||||||
|
se_cfg = dict(
|
||||||
|
ratio=4,
|
||||||
|
divisor=8,
|
||||||
|
act_cfg=(dict(type='ReLU'),
|
||||||
|
dict(
|
||||||
|
type='HSigmoid', bias=3, divisor=6, min_value=0,
|
||||||
|
max_value=1)))
|
||||||
|
|
||||||
|
_FIRST_STAGE_MUTABLE = dict( # DepthwiseSep
|
||||||
|
type='OneShotMutableOP',
|
||||||
|
candidates=dict(
|
||||||
|
depthsepconv=dict(
|
||||||
|
type='DepthwiseSeparableConv',
|
||||||
|
dw_kernel_size=3,
|
||||||
|
se_cfg=se_cfg,
|
||||||
|
norm_cfg=dict(type='BN'),
|
||||||
|
act_cfg=dict(type='Swish'))))
|
||||||
|
|
||||||
|
_MIDDLE_STAGE_MUTABLE = dict(
|
||||||
|
type='OneShotMutableOP',
|
||||||
|
candidates=dict(
|
||||||
|
mb_k3e4_se=dict(
|
||||||
|
type='MBBlock',
|
||||||
|
kernel_size=3,
|
||||||
|
expand_ratio=4,
|
||||||
|
se_cfg=se_cfg,
|
||||||
|
norm_cfg=dict(type='BN'),
|
||||||
|
act_cfg=dict(type='Swish')),
|
||||||
|
mb_k3e6_se=dict(
|
||||||
|
type='MBBlock',
|
||||||
|
kernel_size=3,
|
||||||
|
expand_ratio=6,
|
||||||
|
se_cfg=se_cfg,
|
||||||
|
norm_cfg=dict(type='BN'),
|
||||||
|
act_cfg=dict(type='Swish')),
|
||||||
|
mb_k5e4_se=dict(
|
||||||
|
type='MBBlock',
|
||||||
|
kernel_size=5,
|
||||||
|
expand_ratio=4,
|
||||||
|
se_cfg=se_cfg,
|
||||||
|
norm_cfg=dict(type='BN'),
|
||||||
|
act_cfg=dict(type='Swish')),
|
||||||
|
mb_k5e6_se=dict(
|
||||||
|
type='MBBlock',
|
||||||
|
kernel_size=5,
|
||||||
|
expand_ratio=6,
|
||||||
|
se_cfg=se_cfg,
|
||||||
|
norm_cfg=dict(type='BN'),
|
||||||
|
act_cfg=dict(type='Swish')),
|
||||||
|
mb_k7e4_se=dict(
|
||||||
|
type='MBBlock',
|
||||||
|
kernel_size=7,
|
||||||
|
expand_ratio=4,
|
||||||
|
se_cfg=se_cfg,
|
||||||
|
norm_cfg=dict(type='BN'),
|
||||||
|
act_cfg=dict(type='Swish')),
|
||||||
|
mb_k7e6_se=dict(
|
||||||
|
type='MBBlock',
|
||||||
|
kernel_size=7,
|
||||||
|
expand_ratio=6,
|
||||||
|
se_cfg=se_cfg,
|
||||||
|
norm_cfg=dict(type='BN'),
|
||||||
|
act_cfg=dict(type='Swish'))))
|
||||||
|
|
||||||
|
arch_setting = [
|
||||||
|
# Parameters to build layers. 4 parameters are needed to construct a
|
||||||
|
# layer, from left to right: channel, num_blocks, stride, mutable cfg.
|
||||||
|
[16, 1, 1, _FIRST_STAGE_MUTABLE],
|
||||||
|
[24, 1, 2, _MIDDLE_STAGE_MUTABLE],
|
||||||
|
[40, 2, 2, _MIDDLE_STAGE_MUTABLE],
|
||||||
|
[80, 2, 2, _MIDDLE_STAGE_MUTABLE],
|
||||||
|
[96, 1, 1, _MIDDLE_STAGE_MUTABLE],
|
||||||
|
[192, 1, 2, _MIDDLE_STAGE_MUTABLE],
|
||||||
|
]
|
||||||
|
|
||||||
|
norm_cfg = dict(type='BN')
|
||||||
|
supernet = dict(
|
||||||
|
_scope_='mmcls',
|
||||||
|
type='ImageClassifier',
|
||||||
|
data_preprocessor=preprocess_cfg,
|
||||||
|
backbone=dict(
|
||||||
|
_scope_='mmrazor',
|
||||||
|
type='SearchableMobileNet',
|
||||||
|
arch_setting=arch_setting,
|
||||||
|
first_channels=16,
|
||||||
|
last_channels=320,
|
||||||
|
widen_factor=1.0,
|
||||||
|
norm_cfg=norm_cfg,
|
||||||
|
act_cfg=dict(type='Swish'),
|
||||||
|
out_indices=(6, ),
|
||||||
|
),
|
||||||
|
neck=dict(type='GlobalAveragePooling'),
|
||||||
|
head=dict(
|
||||||
|
type='mmrazor.CreamClsHead',
|
||||||
|
num_classes=1000,
|
||||||
|
in_channels=320,
|
||||||
|
num_features=1280,
|
||||||
|
act_cfg=dict(type='Swish'),
|
||||||
|
loss=dict(
|
||||||
|
type='LabelSmoothLoss',
|
||||||
|
num_classes=1000,
|
||||||
|
label_smooth_val=0.1,
|
||||||
|
mode='original',
|
||||||
|
loss_weight=1.0),
|
||||||
|
topk=(1, 5),
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
mutator = dict(type='mmrazor.OneShotModuleMutator')
|
||||||
|
|
||||||
|
model = dict(
|
||||||
|
type='mmrazor.SPOS',
|
||||||
|
architecture=supernet,
|
||||||
|
mutator=mutator,
|
||||||
|
)
|
||||||
|
|
||||||
|
find_unused_parameters = True
|
|
@ -1,116 +0,0 @@
|
||||||
normal_n2:
|
|
||||||
chosen:
|
|
||||||
- normal_n2_p1
|
|
||||||
- normal_n2_p0
|
|
||||||
normal_n3:
|
|
||||||
chosen:
|
|
||||||
- normal_n3_p0
|
|
||||||
- normal_n3_p1
|
|
||||||
normal_n4:
|
|
||||||
chosen:
|
|
||||||
- normal_n4_p0
|
|
||||||
- normal_n4_p1
|
|
||||||
normal_n5:
|
|
||||||
chosen:
|
|
||||||
- normal_n5_p2
|
|
||||||
- normal_n5_p0
|
|
||||||
reduce_n2:
|
|
||||||
chosen:
|
|
||||||
- reduce_n2_p0
|
|
||||||
- reduce_n2_p1
|
|
||||||
reduce_n3:
|
|
||||||
chosen:
|
|
||||||
- reduce_n3_p1
|
|
||||||
- reduce_n3_p2
|
|
||||||
reduce_n4:
|
|
||||||
chosen:
|
|
||||||
- reduce_n4_p2
|
|
||||||
- reduce_n4_p0
|
|
||||||
reduce_n5:
|
|
||||||
chosen:
|
|
||||||
- reduce_n5_p1
|
|
||||||
- reduce_n5_p2
|
|
||||||
normal_n2_p0:
|
|
||||||
chosen:
|
|
||||||
- sep_conv_3x3
|
|
||||||
normal_n2_p1:
|
|
||||||
chosen:
|
|
||||||
- sep_conv_3x3
|
|
||||||
normal_n3_p0:
|
|
||||||
chosen:
|
|
||||||
- sep_conv_3x3
|
|
||||||
normal_n3_p1:
|
|
||||||
chosen:
|
|
||||||
- sep_conv_3x3
|
|
||||||
normal_n3_p2:
|
|
||||||
chosen:
|
|
||||||
- sep_conv_3x3
|
|
||||||
normal_n4_p0:
|
|
||||||
chosen:
|
|
||||||
- skip_connect
|
|
||||||
normal_n4_p1:
|
|
||||||
chosen:
|
|
||||||
- sep_conv_3x3
|
|
||||||
normal_n4_p2:
|
|
||||||
chosen:
|
|
||||||
- skip_connect
|
|
||||||
normal_n4_p3:
|
|
||||||
chosen:
|
|
||||||
- sep_conv_3x3
|
|
||||||
normal_n5_p0:
|
|
||||||
chosen:
|
|
||||||
- skip_connect
|
|
||||||
normal_n5_p1:
|
|
||||||
chosen:
|
|
||||||
- skip_connect
|
|
||||||
normal_n5_p2:
|
|
||||||
chosen:
|
|
||||||
- dil_conv_3x3
|
|
||||||
normal_n5_p3:
|
|
||||||
chosen:
|
|
||||||
- skip_connect
|
|
||||||
normal_n5_p4:
|
|
||||||
chosen:
|
|
||||||
- skip_connect
|
|
||||||
reduce_n2_p0:
|
|
||||||
chosen:
|
|
||||||
- max_pool_3x3
|
|
||||||
reduce_n2_p1:
|
|
||||||
chosen:
|
|
||||||
- max_pool_3x3
|
|
||||||
reduce_n3_p0:
|
|
||||||
chosen:
|
|
||||||
- max_pool_3x3
|
|
||||||
reduce_n3_p1:
|
|
||||||
chosen:
|
|
||||||
- max_pool_3x3
|
|
||||||
reduce_n3_p2:
|
|
||||||
chosen:
|
|
||||||
- skip_connect
|
|
||||||
reduce_n4_p0:
|
|
||||||
chosen:
|
|
||||||
- max_pool_3x3
|
|
||||||
reduce_n4_p1:
|
|
||||||
chosen:
|
|
||||||
- max_pool_3x3
|
|
||||||
reduce_n4_p2:
|
|
||||||
chosen:
|
|
||||||
- skip_connect
|
|
||||||
reduce_n4_p3:
|
|
||||||
chosen:
|
|
||||||
- skip_connect
|
|
||||||
reduce_n5_p0:
|
|
||||||
chosen:
|
|
||||||
- max_pool_3x3
|
|
||||||
reduce_n5_p1:
|
|
||||||
chosen:
|
|
||||||
- max_pool_3x3
|
|
||||||
reduce_n5_p2:
|
|
||||||
chosen:
|
|
||||||
- skip_connect
|
|
||||||
reduce_n5_p3:
|
|
||||||
chosen:
|
|
||||||
- skip_connect
|
|
||||||
reduce_n5_p4:
|
|
||||||
chosen:
|
|
||||||
- skip_connect
|
|
|
@ -0,0 +1,58 @@
|
||||||
|
modules:
|
||||||
|
normal_n2:
|
||||||
|
- normal_n2_p0
|
||||||
|
- normal_n2_p1
|
||||||
|
normal_n2_p0:
|
||||||
|
- sep_conv_3x3
|
||||||
|
normal_n2_p1:
|
||||||
|
- sep_conv_3x3
|
||||||
|
normal_n3:
|
||||||
|
- normal_n3_p0
|
||||||
|
- normal_n3_p1
|
||||||
|
normal_n3_p0:
|
||||||
|
- skip_connect
|
||||||
|
normal_n3_p1:
|
||||||
|
- sep_conv_5x5
|
||||||
|
normal_n4:
|
||||||
|
- normal_n4_p0
|
||||||
|
- normal_n4_p1
|
||||||
|
normal_n4_p0:
|
||||||
|
- sep_conv_3x3
|
||||||
|
normal_n4_p1:
|
||||||
|
- skip_connect
|
||||||
|
normal_n5:
|
||||||
|
- normal_n5_p0
|
||||||
|
- normal_n5_p1
|
||||||
|
normal_n5_p0:
|
||||||
|
- skip_connect
|
||||||
|
normal_n5_p1:
|
||||||
|
- skip_connect
|
||||||
|
reduce_n2:
|
||||||
|
- reduce_n2_p0
|
||||||
|
- reduce_n2_p1
|
||||||
|
reduce_n2_p0:
|
||||||
|
- max_pool_3x3
|
||||||
|
reduce_n2_p1:
|
||||||
|
- sep_conv_3x3
|
||||||
|
reduce_n3:
|
||||||
|
- reduce_n3_p0
|
||||||
|
- reduce_n3_p2
|
||||||
|
reduce_n3_p0:
|
||||||
|
- max_pool_3x3
|
||||||
|
reduce_n3_p2:
|
||||||
|
- dil_conv_5x5
|
||||||
|
reduce_n4:
|
||||||
|
- reduce_n4_p0
|
||||||
|
- reduce_n4_p2
|
||||||
|
reduce_n4_p0:
|
||||||
|
- max_pool_3x3
|
||||||
|
reduce_n4_p2:
|
||||||
|
- skip_connect
|
||||||
|
reduce_n5:
|
||||||
|
- reduce_n5_p0
|
||||||
|
- reduce_n5_p2
|
||||||
|
reduce_n5_p0:
|
||||||
|
- max_pool_3x3
|
||||||
|
reduce_n5_p2:
|
||||||
|
- skip_connect
|
||||||
|
channels:
|
|
@ -0,0 +1,196 @@
|
||||||
|
# dataset settings
|
||||||
|
dataset_type = 'CIFAR10'
|
||||||
|
preprocess_cfg = dict(
|
||||||
|
# RGB format normalization parameters
|
||||||
|
mean=[125.307, 122.961, 113.8575],
|
||||||
|
std=[51.5865, 50.847, 51.255],
|
||||||
|
# loaded images are already RGB format
|
||||||
|
to_rgb=False)
|
||||||
|
|
||||||
|
train_pipeline = [
|
||||||
|
dict(type='RandomCrop', crop_size=32, padding=4),
|
||||||
|
dict(type='RandomFlip', prob=0.5, direction='horizontal'),
|
||||||
|
dict(type='PackClsInputs'),
|
||||||
|
dict(
|
||||||
|
type='Cutout',
|
||||||
|
magnitude_key='shape',
|
||||||
|
magnitude_range=(1, 16),
|
||||||
|
pad_val=0,
|
||||||
|
prob=0.5),
|
||||||
|
]
|
||||||
|
|
||||||
|
test_pipeline = [
|
||||||
|
dict(type='PackClsInputs'),
|
||||||
|
]
|
||||||
|
|
||||||
|
train_dataloader = dict(
|
||||||
|
batch_size=96,
|
||||||
|
num_workers=2,
|
||||||
|
dataset=dict(
|
||||||
|
type=dataset_type,
|
||||||
|
data_prefix='/mnt/cache/share_data/dongpeijie/data/cifar10',
|
||||||
|
test_mode=False,
|
||||||
|
pipeline=train_pipeline),
|
||||||
|
sampler=dict(type='DefaultSampler', shuffle=True),
|
||||||
|
persistent_workers=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
val_dataloader = dict(
|
||||||
|
batch_size=16,
|
||||||
|
num_workers=2,
|
||||||
|
dataset=dict(
|
||||||
|
type=dataset_type,
|
||||||
|
data_prefix='/mnt/cache/share_data/dongpeijie/data/cifar10/',
|
||||||
|
test_mode=True,
|
||||||
|
pipeline=test_pipeline),
|
||||||
|
sampler=dict(type='DefaultSampler', shuffle=False),
|
||||||
|
persistent_workers=True,
|
||||||
|
)
|
||||||
|
val_evaluator = dict(type='Accuracy', topk=(1, ))
|
||||||
|
|
||||||
|
test_dataloader = val_dataloader
|
||||||
|
test_evaluator = val_evaluator
|
||||||
|
|
||||||
|
# optimizer
|
||||||
|
optim_wrapper = dict(
|
||||||
|
architecture=dict(type='SGD', lr=0.025, momentum=0.9, weight_decay=3e-4),
|
||||||
|
mutator=dict(type='Adam', lr=3e-4, weight_decay=1e-3),
|
||||||
|
clip_grad=dict(max_norm=5, norm_type=2))
|
||||||
|
|
||||||
|
# leanring policy
|
||||||
|
param_scheduler = [
|
||||||
|
dict(
|
||||||
|
type='CosineAnnealingLR',
|
||||||
|
T_max=600,
|
||||||
|
by_epoch=True,
|
||||||
|
begin=0,
|
||||||
|
end=600,
|
||||||
|
)
|
||||||
|
]
|
||||||
|
|
||||||
|
# train, val, test setting
|
||||||
|
train_cfg = dict(by_epoch=True, max_epochs=600)
|
||||||
|
val_cfg = dict(interval=1) # validate each epoch
|
||||||
|
test_cfg = dict()
|
||||||
|
|
||||||
|
# defaults to use registries in mmcls
|
||||||
|
default_scope = 'mmcls'
|
||||||
|
|
||||||
|
# configure default hooks
|
||||||
|
default_hooks = dict(
|
||||||
|
timer=dict(type='IterTimerHook'),
|
||||||
|
logger=dict(type='LoggerHook', interval=100),
|
||||||
|
param_scheduler=dict(type='ParamSchedulerHook'),
|
||||||
|
checkpoint=dict(
|
||||||
|
type='CheckpointHook', interval=1, save_last=True, max_keep_ckpts=3),
|
||||||
|
sampler_seed=dict(type='DistSamplerSeedHook'),
|
||||||
|
visualization=dict(type='VisualizationHook', enable=False),
|
||||||
|
)
|
||||||
|
|
||||||
|
# configure environment
|
||||||
|
env_cfg = dict(
|
||||||
|
cudnn_benchmark=False,
|
||||||
|
mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0),
|
||||||
|
dist_cfg=dict(backend='nccl'),
|
||||||
|
)
|
||||||
|
|
||||||
|
# set visualizer
|
||||||
|
visualizer = None
|
||||||
|
|
||||||
|
# set log level
|
||||||
|
log_level = 'INFO'
|
||||||
|
|
||||||
|
# load from which checkpoint
|
||||||
|
load_from = None
|
||||||
|
|
||||||
|
# whether to resume training from the loaded checkpoint
|
||||||
|
resume = False
|
||||||
|
|
||||||
|
# model
|
||||||
|
norm_cfg = dict(type='BN', affine=True)
|
||||||
|
mutable_cfg = dict(
|
||||||
|
_scope_='mmrazor',
|
||||||
|
type='mmrazor.DiffMutableOP',
|
||||||
|
candidates=dict(
|
||||||
|
zero=dict(type='mmrazor.DartsZero'),
|
||||||
|
skip_connect=dict(
|
||||||
|
type='mmrazor.DartsSkipConnect',
|
||||||
|
norm_cfg=norm_cfg,
|
||||||
|
use_drop_path=True),
|
||||||
|
max_pool_3x3=dict(
|
||||||
|
type='mmrazor.DartsPoolBN',
|
||||||
|
pool_type='max',
|
||||||
|
norm_cfg=norm_cfg,
|
||||||
|
use_drop_path=True),
|
||||||
|
avg_pool_3x3=dict(
|
||||||
|
type='mmrazor.DartsPoolBN',
|
||||||
|
pool_type='avg',
|
||||||
|
norm_cfg=norm_cfg,
|
||||||
|
use_drop_path=True),
|
||||||
|
sep_conv_3x3=dict(
|
||||||
|
type='mmrazor.DartsSepConv',
|
||||||
|
kernel_size=3,
|
||||||
|
norm_cfg=norm_cfg,
|
||||||
|
use_drop_path=True),
|
||||||
|
sep_conv_5x5=dict(
|
||||||
|
type='mmrazor.DartsSepConv',
|
||||||
|
kernel_size=5,
|
||||||
|
norm_cfg=norm_cfg,
|
||||||
|
use_drop_path=True),
|
||||||
|
dil_conv_3x3=dict(
|
||||||
|
type='mmrazor.DartsDilConv',
|
||||||
|
kernel_size=3,
|
||||||
|
norm_cfg=norm_cfg,
|
||||||
|
use_drop_path=True),
|
||||||
|
dil_conv_5x5=dict(
|
||||||
|
type='mmrazor.DartsDilConv',
|
||||||
|
kernel_size=5,
|
||||||
|
norm_cfg=norm_cfg,
|
||||||
|
use_drop_path=True),
|
||||||
|
))
|
||||||
|
|
||||||
|
route_cfg = dict(
|
||||||
|
type='mmrazor.DiffChoiceRoute',
|
||||||
|
with_arch_param=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
supernet = dict(
|
||||||
|
type='mmcls.ImageClassifier',
|
||||||
|
data_preprocessor=preprocess_cfg,
|
||||||
|
backbone=dict(
|
||||||
|
type='mmrazor.DartsBackbone',
|
||||||
|
in_channels=3,
|
||||||
|
base_channels=36,
|
||||||
|
num_layers=20,
|
||||||
|
num_nodes=4,
|
||||||
|
stem_multiplier=3,
|
||||||
|
auxliary=True,
|
||||||
|
aux_channels=128,
|
||||||
|
aux_out_channels=768,
|
||||||
|
out_indices=(19, ),
|
||||||
|
mutable_cfg=mutable_cfg,
|
||||||
|
route_cfg=route_cfg),
|
||||||
|
neck=dict(type='mmcls.GlobalAveragePooling'),
|
||||||
|
head=dict(
|
||||||
|
type='mmrazor.DartsSubnetClsHead',
|
||||||
|
num_classes=10,
|
||||||
|
in_channels=576,
|
||||||
|
aux_in_channels=768,
|
||||||
|
loss=dict(type='mmcls.CrossEntropyLoss', loss_weight=1.0),
|
||||||
|
aux_loss=dict(type='mmcls.CrossEntropyLoss', loss_weight=0.4),
|
||||||
|
topk=(1, 5),
|
||||||
|
cal_acc=True),
|
||||||
|
)
|
||||||
|
|
||||||
|
mutator = dict(type='mmrazor.DiffModuleMutator')
|
||||||
|
|
||||||
|
fix_subnet = 'configs/nas/darts/DARTS_SUBNET_CIFAR_PAPER_ALIAS.yaml'
|
||||||
|
|
||||||
|
model = dict(
|
||||||
|
type='mmrazor.SPOS',
|
||||||
|
architecture=supernet,
|
||||||
|
mutator=mutator,
|
||||||
|
fix_subnet=fix_subnet,
|
||||||
|
)
|
||||||
|
|
||||||
|
find_unused_parameter = False
|
|
@ -0,0 +1,163 @@
|
||||||
|
# dataset settings
|
||||||
|
dataset_type = 'CIFAR10'
|
||||||
|
preprocess_cfg = dict(
|
||||||
|
# RGB format normalization parameters
|
||||||
|
mean=[125.307, 122.961, 113.8575],
|
||||||
|
std=[51.5865, 50.847, 51.255],
|
||||||
|
# loaded images are already RGB format
|
||||||
|
to_rgb=False)
|
||||||
|
|
||||||
|
train_pipeline = [
|
||||||
|
dict(type='RandomCrop', crop_size=32, padding=4),
|
||||||
|
dict(type='RandomFlip', prob=0.5, direction='horizontal'),
|
||||||
|
dict(type='PackClsInputs'),
|
||||||
|
]
|
||||||
|
|
||||||
|
test_pipeline = [
|
||||||
|
dict(type='PackClsInputs'),
|
||||||
|
]
|
||||||
|
|
||||||
|
train_dataloader = dict(
|
||||||
|
batch_size=16,
|
||||||
|
num_workers=2,
|
||||||
|
dataset=dict(
|
||||||
|
type=dataset_type,
|
||||||
|
data_prefix='/mnt/cache/share_data/dongpeijie/data/cifar10',
|
||||||
|
test_mode=False,
|
||||||
|
pipeline=train_pipeline),
|
||||||
|
sampler=dict(type='DefaultSampler', shuffle=True),
|
||||||
|
persistent_workers=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
val_dataloader = dict(
|
||||||
|
batch_size=16,
|
||||||
|
num_workers=2,
|
||||||
|
dataset=dict(
|
||||||
|
type=dataset_type,
|
||||||
|
data_prefix='/mnt/cache/share_data/dongpeijie/data/cifar10/',
|
||||||
|
test_mode=True,
|
||||||
|
pipeline=test_pipeline),
|
||||||
|
sampler=dict(type='DefaultSampler', shuffle=False),
|
||||||
|
persistent_workers=True,
|
||||||
|
)
|
||||||
|
val_evaluator = dict(type='Accuracy', topk=(1, ))
|
||||||
|
|
||||||
|
test_dataloader = val_dataloader
|
||||||
|
test_evaluator = val_evaluator
|
||||||
|
|
||||||
|
# optimizer
|
||||||
|
optim_wrapper = dict(
|
||||||
|
architecture=dict(type='SGD', lr=0.025, momentum=0.9, weight_decay=3e-4),
|
||||||
|
mutator=dict(type='Adam', lr=3e-4, weight_decay=1e-3),
|
||||||
|
clip_grad=None)
|
||||||
|
|
||||||
|
# leanring policy
|
||||||
|
param_scheduler = [
|
||||||
|
dict(
|
||||||
|
type='CosineAnnealingLR',
|
||||||
|
T_max=50,
|
||||||
|
by_epoch=True,
|
||||||
|
min_lr=1e-3,
|
||||||
|
begin=0,
|
||||||
|
end=50,
|
||||||
|
)
|
||||||
|
]
|
||||||
|
# train, val, test setting
|
||||||
|
train_cfg = dict(by_epoch=True, max_epochs=50)
|
||||||
|
val_cfg = dict(interval=1) # validate each epoch
|
||||||
|
test_cfg = dict()
|
||||||
|
|
||||||
|
# defaults to use registries in mmcls
|
||||||
|
default_scope = 'mmcls'
|
||||||
|
|
||||||
|
# configure default hooks
|
||||||
|
default_hooks = dict(
|
||||||
|
timer=dict(type='IterTimerHook'),
|
||||||
|
logger=dict(type='LoggerHook', interval=100),
|
||||||
|
param_scheduler=dict(type='ParamSchedulerHook'),
|
||||||
|
checkpoint=dict(
|
||||||
|
type='CheckpointHook', interval=1, save_last=True, max_keep_ckpts=3),
|
||||||
|
sampler_seed=dict(type='DistSamplerSeedHook'),
|
||||||
|
visualization=dict(type='VisualizationHook', enable=False),
|
||||||
|
)
|
||||||
|
|
||||||
|
# configure environment
|
||||||
|
env_cfg = dict(
|
||||||
|
cudnn_benchmark=False,
|
||||||
|
mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0),
|
||||||
|
dist_cfg=dict(backend='nccl'),
|
||||||
|
)
|
||||||
|
|
||||||
|
# set visualizer
|
||||||
|
visualizer = None
|
||||||
|
|
||||||
|
# set log level
|
||||||
|
log_level = 'INFO'
|
||||||
|
|
||||||
|
# load from which checkpoint
|
||||||
|
load_from = None
|
||||||
|
|
||||||
|
# whether to resume training from the loaded checkpoint
|
||||||
|
resume = False
|
||||||
|
|
||||||
|
# model
|
||||||
|
norm_cfg = dict(type='BN', affine=False)
|
||||||
|
mutable_cfg = dict(
|
||||||
|
_scope_='mmrazor',
|
||||||
|
type='mmrazor.DiffMutableOP',
|
||||||
|
candidates=dict(
|
||||||
|
zero=dict(type='mmrazor.DartsZero'),
|
||||||
|
skip_connect=dict(type='mmrazor.DartsSkipConnect', norm_cfg=norm_cfg),
|
||||||
|
max_pool_3x3=dict(
|
||||||
|
type='mmrazor.DartsPoolBN', pool_type='max', norm_cfg=norm_cfg),
|
||||||
|
avg_pool_3x3=dict(
|
||||||
|
type='mmrazor.DartsPoolBN', pool_type='avg', norm_cfg=norm_cfg),
|
||||||
|
sep_conv_3x3=dict(
|
||||||
|
type='mmrazor.DartsSepConv', kernel_size=3, norm_cfg=norm_cfg),
|
||||||
|
sep_conv_5x5=dict(
|
||||||
|
type='mmrazor.DartsSepConv', kernel_size=5, norm_cfg=norm_cfg),
|
||||||
|
dil_conv_3x3=dict(
|
||||||
|
type='mmrazor.DartsDilConv', kernel_size=3, norm_cfg=norm_cfg),
|
||||||
|
dil_conv_5x5=dict(
|
||||||
|
type='mmrazor.DartsDilConv', kernel_size=5, norm_cfg=norm_cfg),
|
||||||
|
))
|
||||||
|
|
||||||
|
route_cfg = dict(
|
||||||
|
type='mmrazor.DiffChoiceRoute',
|
||||||
|
with_arch_param=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
supernet = dict(
|
||||||
|
type='mmcls.ImageClassifier',
|
||||||
|
backbone=dict(
|
||||||
|
type='mmrazor.DartsBackbone',
|
||||||
|
in_channels=3,
|
||||||
|
base_channels=36,
|
||||||
|
num_layers=20,
|
||||||
|
num_nodes=4,
|
||||||
|
stem_multiplier=3,
|
||||||
|
auxliary=False,
|
||||||
|
out_indices=(19, ),
|
||||||
|
mutable_cfg=mutable_cfg,
|
||||||
|
route_cfg=route_cfg),
|
||||||
|
neck=dict(type='mmcls.GlobalAveragePooling'),
|
||||||
|
head=dict(
|
||||||
|
type='mmrazor.DartsSubnetClsHead',
|
||||||
|
num_classes=10,
|
||||||
|
in_channels=576,
|
||||||
|
aux_in_channels=768,
|
||||||
|
loss=dict(type='mmcls.CrossEntropyLoss', loss_weight=1.0),
|
||||||
|
aux_loss=dict(type='mmcls.CrossEntropyLoss', loss_weight=0.4),
|
||||||
|
topk=(1, 5),
|
||||||
|
cal_acc=True),
|
||||||
|
)
|
||||||
|
|
||||||
|
mutator = dict(type='mmrazor.DiffModuleMutator')
|
||||||
|
|
||||||
|
model = dict(
|
||||||
|
type='mmrazor.SPOS',
|
||||||
|
architecture=supernet,
|
||||||
|
mutator=mutator,
|
||||||
|
)
|
||||||
|
|
||||||
|
find_unused_parameter = True
|
|
@ -1,60 +0,0 @@
|
||||||
stage_0_block_0:
|
|
||||||
chosen:
|
|
||||||
- shuffle_7x7
|
|
||||||
stage_0_block_1:
|
|
||||||
chosen:
|
|
||||||
- shuffle_5x5
|
|
||||||
stage_0_block_2:
|
|
||||||
chosen:
|
|
||||||
- shuffle_7x7
|
|
||||||
stage_0_block_3:
|
|
||||||
chosen:
|
|
||||||
- shuffle_3x3
|
|
||||||
stage_1_block_0:
|
|
||||||
chosen:
|
|
||||||
- shuffle_7x7
|
|
||||||
stage_1_block_1:
|
|
||||||
chosen:
|
|
||||||
- shuffle_5x5
|
|
||||||
stage_1_block_2:
|
|
||||||
chosen:
|
|
||||||
- shuffle_5x5
|
|
||||||
stage_1_block_3:
|
|
||||||
chosen:
|
|
||||||
- shuffle_7x7
|
|
||||||
stage_2_block_0:
|
|
||||||
chosen:
|
|
||||||
- shuffle_xception
|
|
||||||
stage_2_block_1:
|
|
||||||
chosen:
|
|
||||||
- shuffle_xception
|
|
||||||
stage_2_block_2:
|
|
||||||
chosen:
|
|
||||||
- shuffle_5x5
|
|
||||||
stage_2_block_3:
|
|
||||||
chosen:
|
|
||||||
- shuffle_xception
|
|
||||||
stage_2_block_4:
|
|
||||||
chosen:
|
|
||||||
- shuffle_3x3
|
|
||||||
stage_2_block_5:
|
|
||||||
chosen:
|
|
||||||
- shuffle_3x3
|
|
||||||
stage_2_block_6:
|
|
||||||
chosen:
|
|
||||||
- shuffle_xception
|
|
||||||
stage_2_block_7:
|
|
||||||
chosen:
|
|
||||||
- shuffle_5x5
|
|
||||||
stage_3_block_0:
|
|
||||||
chosen:
|
|
||||||
- shuffle_xception
|
|
||||||
stage_3_block_1:
|
|
||||||
chosen:
|
|
||||||
- shuffle_5x5
|
|
||||||
stage_3_block_2:
|
|
||||||
chosen:
|
|
||||||
- shuffle_xception
|
|
||||||
stage_3_block_3:
|
|
||||||
chosen:
|
|
||||||
- shuffle_7x7
|
|
|
@ -0,0 +1,22 @@
|
||||||
|
modules:
|
||||||
|
backbone.layers.0.0: shuffle_5x5
|
||||||
|
backbone.layers.0.1: shuffle_3x3
|
||||||
|
backbone.layers.0.2: shuffle_3x3
|
||||||
|
backbone.layers.0.3: shuffle_3x3
|
||||||
|
backbone.layers.1.0: shuffle_xception
|
||||||
|
backbone.layers.1.1: shuffle_3x3
|
||||||
|
backbone.layers.1.2: shuffle_xception
|
||||||
|
backbone.layers.1.3: shuffle_7x7
|
||||||
|
backbone.layers.2.0: shuffle_7x7
|
||||||
|
backbone.layers.2.1: shuffle_7x7
|
||||||
|
backbone.layers.2.2: shuffle_xception
|
||||||
|
backbone.layers.2.3: shuffle_xception
|
||||||
|
backbone.layers.2.4: shuffle_3x3
|
||||||
|
backbone.layers.2.5: shuffle_7x7
|
||||||
|
backbone.layers.2.6: shuffle_5x5
|
||||||
|
backbone.layers.2.7: shuffle_xception
|
||||||
|
backbone.layers.3.0: shuffle_7x7
|
||||||
|
backbone.layers.3.1: shuffle_7x7
|
||||||
|
backbone.layers.3.2: shuffle_7x7
|
||||||
|
backbone.layers.3.3: shuffle_5x5
|
||||||
|
channels:
|
|
@ -0,0 +1,22 @@
|
||||||
|
modules:
|
||||||
|
backbone.layers.0.0: shuffle_5x5
|
||||||
|
backbone.layers.0.1: shuffle_3x3
|
||||||
|
backbone.layers.0.2: shuffle_3x3
|
||||||
|
backbone.layers.0.3: shuffle_3x3
|
||||||
|
backbone.layers.1.0: shuffle_xception
|
||||||
|
backbone.layers.1.1: shuffle_3x3
|
||||||
|
backbone.layers.1.2: shuffle_xception
|
||||||
|
backbone.layers.1.3: shuffle_7x7
|
||||||
|
backbone.layers.2.0: shuffle_7x7
|
||||||
|
backbone.layers.2.1: shuffle_7x7
|
||||||
|
backbone.layers.2.2: shuffle_xception
|
||||||
|
backbone.layers.2.3: shuffle_xception
|
||||||
|
backbone.layers.2.4: shuffle_3x3
|
||||||
|
backbone.layers.2.5: shuffle_7x7
|
||||||
|
backbone.layers.2.6: shuffle_5x5
|
||||||
|
backbone.layers.2.7: shuffle_xception
|
||||||
|
backbone.layers.3.0: shuffle_7x7
|
||||||
|
backbone.layers.3.1: shuffle_7x7
|
||||||
|
backbone.layers.3.2: shuffle_7x7
|
||||||
|
backbone.layers.3.3: shuffle_5x5
|
||||||
|
channels:
|
|
@ -1,20 +0,0 @@
|
||||||
_base_ = ['./detnas_supernet_frcnn_shufflenetv2_fpn_1x_coco.py']
|
|
||||||
|
|
||||||
data = dict(
|
|
||||||
samples_per_gpu=128,
|
|
||||||
workers_per_gpu=8,
|
|
||||||
)
|
|
||||||
|
|
||||||
algorithm = dict(bn_training_mode=True)
|
|
||||||
|
|
||||||
searcher = dict(
|
|
||||||
type='EvolutionSearcher',
|
|
||||||
metrics='bbox',
|
|
||||||
score_key='bbox_mAP',
|
|
||||||
constraints=dict(flops=300 * 1e6),
|
|
||||||
candidate_pool_size=50,
|
|
||||||
candidate_top_k=10,
|
|
||||||
max_epoch=20,
|
|
||||||
num_mutation=20,
|
|
||||||
num_crossover=20,
|
|
||||||
)
|
|
|
@ -1,6 +0,0 @@
|
||||||
_base_ = ['./detnas_supernet_frcnn_shufflenetv2_fpn_1x_coco.py']
|
|
||||||
|
|
||||||
# FIXME: you may replace this with the mutable_cfg searched by yourself
|
|
||||||
mutable_cfg = 'https://download.openmmlab.com/mmrazor/v0.1/nas/detnas/detnas_subnet_frcnn_shufflenetv2_fpn_1x_coco/detnas_subnet_frcnn_shufflenetv2_fpn_1x_coco_bbox_backbone_flops-0.34M_mAP-37.5_20211222-67fea61f_mutable_cfg.yaml' # noqa: E501
|
|
||||||
|
|
||||||
algorithm = dict(retraining=True, mutable_cfg=mutable_cfg)
|
|
|
@ -1,8 +0,0 @@
|
||||||
_base_ = [
|
|
||||||
'../spos/spos_subnet_shufflenetv2_8xb128_in1k.py',
|
|
||||||
]
|
|
||||||
|
|
||||||
# FIXME: you may replace this with the mutable_cfg searched by yourself
|
|
||||||
mutable_cfg = 'https://download.openmmlab.com/mmrazor/v0.1/nas/detnas/detnas_subnet_frcnn_shufflenetv2_fpn_1x_coco/detnas_subnet_frcnn_shufflenetv2_fpn_1x_coco_bbox_backbone_flops-0.34M_mAP-37.5_20211222-67fea61f_mutable_cfg.yaml' # noqa: E501
|
|
||||||
|
|
||||||
algorithm = dict(mutable_cfg=mutable_cfg)
|
|
|
@ -0,0 +1,8 @@
|
||||||
|
_base_ = ['./detnas_supernet_shufflenetv2_coco_1x_2.0_frcnn.py']
|
||||||
|
|
||||||
|
# FIXME: you may replace this with the mutable_cfg searched by yourself
|
||||||
|
fix_subnet = 'configs/nas/detnas/DETNAS_FRCNN_SHUFFLENETV2_340M_COCO_MMRAZOR_2.0.yaml' # noqa: E501
|
||||||
|
|
||||||
|
model = dict(fix_subnet=fix_subnet)
|
||||||
|
|
||||||
|
find_unused_parameters = False
|
|
@ -1,144 +0,0 @@
|
||||||
_base_ = [
|
|
||||||
'../../_base_/datasets/mmdet/coco_detection.py',
|
|
||||||
'../../_base_/schedules/mmdet/schedule_1x.py',
|
|
||||||
'../../_base_/mmdet_runtime.py'
|
|
||||||
]
|
|
||||||
|
|
||||||
norm_cfg = dict(type='SyncBN', requires_grad=True)
|
|
||||||
model = dict(
|
|
||||||
type='mmdet.FasterRCNN',
|
|
||||||
backbone=dict(
|
|
||||||
type='mmcls.SearchableShuffleNetV2',
|
|
||||||
norm_cfg=norm_cfg,
|
|
||||||
out_indices=(0, 1, 2, 3),
|
|
||||||
widen_factor=1.0,
|
|
||||||
with_last_layer=False),
|
|
||||||
neck=dict(
|
|
||||||
type='FPN',
|
|
||||||
norm_cfg=norm_cfg,
|
|
||||||
in_channels=[64, 160, 320, 640],
|
|
||||||
out_channels=256,
|
|
||||||
num_outs=5),
|
|
||||||
rpn_head=dict(
|
|
||||||
type='RPNHead',
|
|
||||||
in_channels=256,
|
|
||||||
feat_channels=256,
|
|
||||||
anchor_generator=dict(
|
|
||||||
type='AnchorGenerator',
|
|
||||||
scales=[8],
|
|
||||||
ratios=[0.5, 1.0, 2.0],
|
|
||||||
strides=[4, 8, 16, 32, 64]),
|
|
||||||
bbox_coder=dict(
|
|
||||||
type='DeltaXYWHBBoxCoder',
|
|
||||||
target_means=[.0, .0, .0, .0],
|
|
||||||
target_stds=[1.0, 1.0, 1.0, 1.0]),
|
|
||||||
loss_cls=dict(
|
|
||||||
type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
|
|
||||||
loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
|
|
||||||
roi_head=dict(
|
|
||||||
type='StandardRoIHead',
|
|
||||||
bbox_roi_extractor=dict(
|
|
||||||
type='SingleRoIExtractor',
|
|
||||||
roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
|
|
||||||
out_channels=256,
|
|
||||||
featmap_strides=[4, 8, 16, 32]),
|
|
||||||
bbox_head=dict(
|
|
||||||
type='Shared4Conv1FCBBoxHead',
|
|
||||||
norm_cfg=norm_cfg,
|
|
||||||
in_channels=256,
|
|
||||||
fc_out_channels=1024,
|
|
||||||
roi_feat_size=7,
|
|
||||||
num_classes=80,
|
|
||||||
bbox_coder=dict(
|
|
||||||
type='DeltaXYWHBBoxCoder',
|
|
||||||
target_means=[0., 0., 0., 0.],
|
|
||||||
target_stds=[0.1, 0.1, 0.2, 0.2]),
|
|
||||||
reg_class_agnostic=False,
|
|
||||||
loss_cls=dict(
|
|
||||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
|
|
||||||
loss_bbox=dict(type='L1Loss', loss_weight=1.0))),
|
|
||||||
train_cfg=dict(
|
|
||||||
rpn=dict(
|
|
||||||
assigner=dict(
|
|
||||||
type='MaxIoUAssigner',
|
|
||||||
pos_iou_thr=0.7,
|
|
||||||
neg_iou_thr=0.3,
|
|
||||||
min_pos_iou=0.3,
|
|
||||||
match_low_quality=True,
|
|
||||||
ignore_iof_thr=-1),
|
|
||||||
sampler=dict(
|
|
||||||
type='RandomSampler',
|
|
||||||
num=256,
|
|
||||||
pos_fraction=0.5,
|
|
||||||
neg_pos_ub=-1,
|
|
||||||
add_gt_as_proposals=False),
|
|
||||||
allowed_border=-1,
|
|
||||||
pos_weight=-1,
|
|
||||||
debug=False),
|
|
||||||
rpn_proposal=dict(
|
|
||||||
nms_pre=2000,
|
|
||||||
max_per_img=1000,
|
|
||||||
nms=dict(type='nms', iou_threshold=0.7),
|
|
||||||
min_bbox_size=0),
|
|
||||||
rcnn=dict(
|
|
||||||
assigner=dict(
|
|
||||||
type='MaxIoUAssigner',
|
|
||||||
pos_iou_thr=0.5,
|
|
||||||
neg_iou_thr=0.5,
|
|
||||||
min_pos_iou=0.5,
|
|
||||||
match_low_quality=False,
|
|
||||||
ignore_iof_thr=-1),
|
|
||||||
sampler=dict(
|
|
||||||
type='RandomSampler',
|
|
||||||
num=512,
|
|
||||||
pos_fraction=0.25,
|
|
||||||
neg_pos_ub=-1,
|
|
||||||
add_gt_as_proposals=True),
|
|
||||||
pos_weight=-1,
|
|
||||||
debug=False)),
|
|
||||||
test_cfg=dict(
|
|
||||||
rpn=dict(
|
|
||||||
nms_pre=1000,
|
|
||||||
max_per_img=1000,
|
|
||||||
nms=dict(type='nms', iou_threshold=0.7),
|
|
||||||
min_bbox_size=0),
|
|
||||||
rcnn=dict(
|
|
||||||
score_thr=0.05,
|
|
||||||
nms=dict(type='nms', iou_threshold=0.5),
|
|
||||||
max_per_img=100)
|
|
||||||
# soft-nms is also supported for rcnn testing
|
|
||||||
# e.g., nms=dict(type='soft_nms', iou_threshold=0.5, min_score=0.05)
|
|
||||||
),
|
|
||||||
)
|
|
||||||
|
|
||||||
mutator = dict(
|
|
||||||
type='OneShotModuleMutator',
|
|
||||||
placeholder_mapping=dict(
|
|
||||||
all_blocks=dict(
|
|
||||||
type='OneShotMutableOP',
|
|
||||||
choices=dict(
|
|
||||||
shuffle_3x3=dict(
|
|
||||||
type='ShuffleBlock', norm_cfg=norm_cfg, kernel_size=3),
|
|
||||||
shuffle_5x5=dict(
|
|
||||||
type='ShuffleBlock', norm_cfg=norm_cfg, kernel_size=5),
|
|
||||||
shuffle_7x7=dict(
|
|
||||||
type='ShuffleBlock', norm_cfg=norm_cfg, kernel_size=7),
|
|
||||||
shuffle_xception=dict(
|
|
||||||
type='ShuffleXception',
|
|
||||||
norm_cfg=norm_cfg,
|
|
||||||
),
|
|
||||||
))))
|
|
||||||
|
|
||||||
algorithm = dict(
|
|
||||||
type='DetNAS',
|
|
||||||
architecture=dict(
|
|
||||||
type='MMDetArchitecture',
|
|
||||||
model=model,
|
|
||||||
),
|
|
||||||
mutator=mutator,
|
|
||||||
pruner=None,
|
|
||||||
distiller=None,
|
|
||||||
retraining=False,
|
|
||||||
)
|
|
||||||
|
|
||||||
find_unused_parameters = True
|
|
|
@ -1,5 +0,0 @@
|
||||||
_base_ = [
|
|
||||||
'../spos/spos_supernet_shufflenetv2_8xb128_in1k.py',
|
|
||||||
]
|
|
||||||
|
|
||||||
runner = dict(max_iters=300000)
|
|
|
@ -0,0 +1,87 @@
|
||||||
|
_base_ = [
|
||||||
|
'mmdet::_base_/models/faster_rcnn_r50_fpn.py',
|
||||||
|
'mmdet::_base_/datasets/coco_detection.py',
|
||||||
|
'mmdet::_base_/schedules/schedule_1x.py',
|
||||||
|
'mmdet::_base_/default_runtime.py'
|
||||||
|
]
|
||||||
|
|
||||||
|
data_root = '/mnt/lustre/share_data/zhangwenwei/data/coco/'
|
||||||
|
|
||||||
|
_base_.train_dataloader.dataset.data_root = data_root
|
||||||
|
|
||||||
|
visualizer = None
|
||||||
|
|
||||||
|
log_level = 'INFO'
|
||||||
|
load_from = '/mnt/lustre/dongpeijie/detnas_subnet_shufflenetv2_8xb128_in1k_acc-74.08_20211223-92e9b66a_2.0.pth' # noqa: E501
|
||||||
|
resume = False
|
||||||
|
|
||||||
|
norm_cfg = dict(type='SyncBN', requires_grad=True)
|
||||||
|
# model settings
|
||||||
|
_STAGE_MUTABLE = dict(
|
||||||
|
_scope_='mmrazor',
|
||||||
|
type='mmrazor.OneShotMutableOP',
|
||||||
|
candidates=dict(
|
||||||
|
shuffle_3x3=dict(
|
||||||
|
type='mmrazor.ShuffleBlock', kernel_size=3, norm_cfg=norm_cfg),
|
||||||
|
shuffle_5x5=dict(
|
||||||
|
type='mmrazor.ShuffleBlock', kernel_size=5, norm_cfg=norm_cfg),
|
||||||
|
shuffle_7x7=dict(
|
||||||
|
type='mmrazor.ShuffleBlock', kernel_size=7, norm_cfg=norm_cfg),
|
||||||
|
shuffle_xception=dict(
|
||||||
|
type='mmrazor.ShuffleXception', norm_cfg=norm_cfg),
|
||||||
|
))
|
||||||
|
|
||||||
|
arch_setting = [
|
||||||
|
# Parameters to build layers. 3 parameters are needed to construct a
|
||||||
|
# layer, from left to right: channel, num_blocks, mutable_cfg.
|
||||||
|
[64, 4, _STAGE_MUTABLE],
|
||||||
|
[160, 4, _STAGE_MUTABLE],
|
||||||
|
[320, 8, _STAGE_MUTABLE],
|
||||||
|
[640, 4, _STAGE_MUTABLE],
|
||||||
|
]
|
||||||
|
|
||||||
|
supernet = _base_.model
|
||||||
|
|
||||||
|
supernet.backbone = dict(
|
||||||
|
type='mmrazor.SearchableShuffleNetV2',
|
||||||
|
arch_setting=arch_setting,
|
||||||
|
norm_cfg=norm_cfg,
|
||||||
|
out_indices=(0, 1, 2, 3),
|
||||||
|
widen_factor=1.0,
|
||||||
|
with_last_layer=False)
|
||||||
|
|
||||||
|
supernet.neck = dict(
|
||||||
|
type='FPN',
|
||||||
|
norm_cfg=norm_cfg,
|
||||||
|
in_channels=[64, 160, 320, 640],
|
||||||
|
out_channels=256,
|
||||||
|
num_outs=5)
|
||||||
|
|
||||||
|
supernet.roi_head.bbox_head = dict(
|
||||||
|
type='Shared4Conv1FCBBoxHead',
|
||||||
|
norm_cfg=norm_cfg,
|
||||||
|
in_channels=256,
|
||||||
|
fc_out_channels=1024,
|
||||||
|
roi_feat_size=7,
|
||||||
|
num_classes=80,
|
||||||
|
bbox_coder=dict(
|
||||||
|
type='DeltaXYWHBBoxCoder',
|
||||||
|
target_means=[0., 0., 0., 0.],
|
||||||
|
target_stds=[0.1, 0.1, 0.2, 0.2]),
|
||||||
|
reg_class_agnostic=False,
|
||||||
|
loss_cls=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
|
||||||
|
loss_bbox=dict(type='L1Loss', loss_weight=1.0))
|
||||||
|
|
||||||
|
mutator = dict(type='mmrazor.OneShotModuleMutator')
|
||||||
|
|
||||||
|
fix_subnet = 'configs/nas/detnas/DETNAS_FRCNN_SHUFFLENETV2_340M_COCO_MMRAZOR_2.0.yaml' # noqa: E501
|
||||||
|
|
||||||
|
model = dict(
|
||||||
|
_delete_=True,
|
||||||
|
type='mmrazor.SPOS',
|
||||||
|
architecture=supernet,
|
||||||
|
mutator=mutator,
|
||||||
|
fix_subnet=fix_subnet,
|
||||||
|
)
|
||||||
|
|
||||||
|
find_unused_parameters = True
|
|
@ -0,0 +1,114 @@
|
||||||
|
_base_ = [
|
||||||
|
'mmdet::faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py',
|
||||||
|
'mmdet::datasets/coco_detection.py', 'mmdet::schedules/schedule_1x.py',
|
||||||
|
'mmdet::default_runtime.py'
|
||||||
|
]
|
||||||
|
|
||||||
|
data_root = '/mnt/lustre/share_data/zhangwenwei/data/coco/'
|
||||||
|
|
||||||
|
train_dataloader = dict(dataset=dict(data_root=data_root, ))
|
||||||
|
|
||||||
|
visualizer = None
|
||||||
|
# custom_hooks = [dict(type='DetVisualizationHook', interval=10)]
|
||||||
|
|
||||||
|
log_level = 'INFO'
|
||||||
|
load_from = None
|
||||||
|
resume = False
|
||||||
|
|
||||||
|
# TODO: support auto scaling lr
|
||||||
|
|
||||||
|
norm_cfg = dict(type='SyncBN', requires_grad=True)
|
||||||
|
# model settings
|
||||||
|
_STAGE_MUTABLE = dict(
|
||||||
|
_scope_='mmrazor',
|
||||||
|
type='mmrazor.OneShotMutableOP',
|
||||||
|
candidates=dict(
|
||||||
|
shuffle_3x3=dict(
|
||||||
|
type='mmrazor.ShuffleBlock', kernel_size=3, norm_cfg=norm_cfg),
|
||||||
|
shuffle_5x5=dict(
|
||||||
|
type='mmrazor.ShuffleBlock', kernel_size=5, norm_cfg=norm_cfg),
|
||||||
|
shuffle_7x7=dict(
|
||||||
|
type='mmrazor.ShuffleBlock', kernel_size=7, norm_cfg=norm_cfg),
|
||||||
|
shuffle_xception=dict(
|
||||||
|
type='mmrazor.ShuffleXception', norm_cfg=norm_cfg),
|
||||||
|
))
|
||||||
|
|
||||||
|
arch_setting = [
|
||||||
|
# Parameters to build layers. 3 parameters are needed to construct a
|
||||||
|
# layer, from left to right: channel, num_blocks, mutable_cfg.
|
||||||
|
[64, 4, _STAGE_MUTABLE],
|
||||||
|
[160, 4, _STAGE_MUTABLE],
|
||||||
|
[320, 8, _STAGE_MUTABLE],
|
||||||
|
[640, 4, _STAGE_MUTABLE],
|
||||||
|
]
|
||||||
|
|
||||||
|
supernet = dict(
|
||||||
|
type='RetinaNet',
|
||||||
|
data_preprocessor=dict(
|
||||||
|
type='DetDataPreprocessor',
|
||||||
|
mean=[123.675, 116.28, 103.53],
|
||||||
|
std=[58.395, 57.12, 57.375],
|
||||||
|
bgr_to_rgb=True,
|
||||||
|
pad_size_divisor=32),
|
||||||
|
backbone=dict(
|
||||||
|
type='mmrazor.SearchableShuffleNetV2',
|
||||||
|
arch_setting=arch_setting,
|
||||||
|
norm_cfg=norm_cfg,
|
||||||
|
out_indices=(0, 1, 2, 3),
|
||||||
|
widen_factor=1.0,
|
||||||
|
with_last_layer=False),
|
||||||
|
neck=dict(
|
||||||
|
type='FPN',
|
||||||
|
in_channels=[64, 160, 320, 640],
|
||||||
|
out_channels=256,
|
||||||
|
num_outs=5),
|
||||||
|
bbox_head=dict(
|
||||||
|
type='RetinaHead',
|
||||||
|
num_classes=80,
|
||||||
|
in_channels=256,
|
||||||
|
stacked_convs=4,
|
||||||
|
feat_channels=256,
|
||||||
|
anchor_generator=dict(
|
||||||
|
type='AnchorGenerator',
|
||||||
|
octave_base_scale=4,
|
||||||
|
scales_per_octave=3,
|
||||||
|
ratios=[0.5, 1.0, 2.0],
|
||||||
|
strides=[8, 16, 32, 64, 128]),
|
||||||
|
bbox_coder=dict(
|
||||||
|
type='DeltaXYWHBBoxCoder',
|
||||||
|
target_means=[.0, .0, .0, .0],
|
||||||
|
target_stds=[1.0, 1.0, 1.0, 1.0]),
|
||||||
|
loss_cls=dict(
|
||||||
|
type='FocalLoss',
|
||||||
|
use_sigmoid=True,
|
||||||
|
gamma=2.0,
|
||||||
|
alpha=0.25,
|
||||||
|
loss_weight=1.0),
|
||||||
|
loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
|
||||||
|
# model training and testing settings
|
||||||
|
train_cfg=dict(
|
||||||
|
assigner=dict(
|
||||||
|
type='MaxIoUAssigner',
|
||||||
|
pos_iou_thr=0.5,
|
||||||
|
neg_iou_thr=0.4,
|
||||||
|
min_pos_iou=0,
|
||||||
|
ignore_iof_thr=-1),
|
||||||
|
allowed_border=-1,
|
||||||
|
pos_weight=-1,
|
||||||
|
debug=False),
|
||||||
|
test_cfg=dict(
|
||||||
|
nms_pre=1000,
|
||||||
|
min_bbox_size=0,
|
||||||
|
score_thr=0.05,
|
||||||
|
nms=dict(type='nms', iou_threshold=0.5),
|
||||||
|
max_per_img=100))
|
||||||
|
|
||||||
|
mutator = dict(type='mmrazor.OneShotModuleMutator')
|
||||||
|
|
||||||
|
model = dict(
|
||||||
|
type='mmrazor.SPOS',
|
||||||
|
architecture=supernet,
|
||||||
|
mutator=mutator,
|
||||||
|
)
|
||||||
|
|
||||||
|
find_unused_parameters = True
|
|
@ -0,0 +1,24 @@
|
||||||
|
modules:
|
||||||
|
backbone.layer1.0: mb_k3e1
|
||||||
|
backbone.layer2.0: mb_k5e3
|
||||||
|
backbone.layer2.1: mb_k5e3
|
||||||
|
backbone.layer2.2: identity
|
||||||
|
backbone.layer2.3: mb_k3e3
|
||||||
|
backbone.layer3.0: mb_k3e3
|
||||||
|
backbone.layer3.1: identity
|
||||||
|
backbone.layer3.2: identity
|
||||||
|
backbone.layer3.3: mb_k3e3
|
||||||
|
backbone.layer4.0: mb_k7e6
|
||||||
|
backbone.layer4.1: identity
|
||||||
|
backbone.layer4.2: mb_k7e3
|
||||||
|
backbone.layer4.3: mb_k7e3
|
||||||
|
backbone.layer5.0: mb_k3e3
|
||||||
|
backbone.layer5.1: mb_k3e3
|
||||||
|
backbone.layer5.2: mb_k7e3
|
||||||
|
backbone.layer5.3: mb_k5e3
|
||||||
|
backbone.layer6.0: mb_k5e6
|
||||||
|
backbone.layer6.1: mb_k7e3
|
||||||
|
backbone.layer6.2: mb_k7e3
|
||||||
|
backbone.layer6.3: mb_k7e3
|
||||||
|
backbone.layer7.0: mb_k5e6
|
||||||
|
channels:
|
|
@ -1,66 +0,0 @@
|
||||||
stage_0_block_0:
|
|
||||||
chosen:
|
|
||||||
- mb_k3e1
|
|
||||||
stage_1_block_0:
|
|
||||||
chosen:
|
|
||||||
- mb_k5e3
|
|
||||||
stage_1_block_1:
|
|
||||||
chosen:
|
|
||||||
- mb_k5e3
|
|
||||||
stage_1_block_2:
|
|
||||||
chosen:
|
|
||||||
- identity
|
|
||||||
stage_1_block_3:
|
|
||||||
chosen:
|
|
||||||
- mb_k3e3
|
|
||||||
stage_2_block_0:
|
|
||||||
chosen:
|
|
||||||
- mb_k3e3
|
|
||||||
stage_2_block_1:
|
|
||||||
chosen:
|
|
||||||
- identity
|
|
||||||
stage_2_block_2:
|
|
||||||
chosen:
|
|
||||||
- identity
|
|
||||||
stage_2_block_3:
|
|
||||||
chosen:
|
|
||||||
- mb_k3e3
|
|
||||||
stage_3_block_0:
|
|
||||||
chosen:
|
|
||||||
- mb_k7e6
|
|
||||||
stage_3_block_1:
|
|
||||||
chosen:
|
|
||||||
- identity
|
|
||||||
stage_3_block_2:
|
|
||||||
chosen:
|
|
||||||
- mb_k7e3
|
|
||||||
stage_3_block_3:
|
|
||||||
chosen:
|
|
||||||
- mb_k7e3
|
|
||||||
stage_4_block_0:
|
|
||||||
chosen:
|
|
||||||
- mb_k3e3
|
|
||||||
stage_4_block_1:
|
|
||||||
chosen:
|
|
||||||
- mb_k3e3
|
|
||||||
stage_4_block_2:
|
|
||||||
chosen:
|
|
||||||
- mb_k7e3
|
|
||||||
stage_4_block_3:
|
|
||||||
chosen:
|
|
||||||
- mb_k5e3
|
|
||||||
stage_5_block_0:
|
|
||||||
chosen:
|
|
||||||
- mb_k5e6
|
|
||||||
stage_5_block_1:
|
|
||||||
chosen:
|
|
||||||
- mb_k7e3
|
|
||||||
stage_5_block_2:
|
|
||||||
chosen:
|
|
||||||
- mb_k7e3
|
|
||||||
stage_5_block_3:
|
|
||||||
chosen:
|
|
||||||
- mb_k7e3
|
|
||||||
stage_6_block_0:
|
|
||||||
chosen:
|
|
||||||
- mb_k5e6
|
|
|
@ -1,60 +0,0 @@
|
||||||
stage_0_block_0:
|
|
||||||
chosen:
|
|
||||||
- shuffle_7x7
|
|
||||||
stage_0_block_1:
|
|
||||||
chosen:
|
|
||||||
- shuffle_5x5
|
|
||||||
stage_0_block_2:
|
|
||||||
chosen:
|
|
||||||
- shuffle_3x3
|
|
||||||
stage_0_block_3:
|
|
||||||
chosen:
|
|
||||||
- shuffle_5x5
|
|
||||||
stage_1_block_0:
|
|
||||||
chosen:
|
|
||||||
- shuffle_7x7
|
|
||||||
stage_1_block_1:
|
|
||||||
chosen:
|
|
||||||
- shuffle_3x3
|
|
||||||
stage_1_block_2:
|
|
||||||
chosen:
|
|
||||||
- shuffle_7x7
|
|
||||||
stage_1_block_3:
|
|
||||||
chosen:
|
|
||||||
- shuffle_3x3
|
|
||||||
stage_2_block_0:
|
|
||||||
chosen:
|
|
||||||
- shuffle_7x7
|
|
||||||
stage_2_block_1:
|
|
||||||
chosen:
|
|
||||||
- shuffle_3x3
|
|
||||||
stage_2_block_2:
|
|
||||||
chosen:
|
|
||||||
- shuffle_7x7
|
|
||||||
stage_2_block_3:
|
|
||||||
chosen:
|
|
||||||
- shuffle_xception
|
|
||||||
stage_2_block_4:
|
|
||||||
chosen:
|
|
||||||
- shuffle_3x3
|
|
||||||
stage_2_block_5:
|
|
||||||
chosen:
|
|
||||||
- shuffle_3x3
|
|
||||||
stage_2_block_6:
|
|
||||||
chosen:
|
|
||||||
- shuffle_3x3
|
|
||||||
stage_2_block_7:
|
|
||||||
chosen:
|
|
||||||
- shuffle_3x3
|
|
||||||
stage_3_block_0:
|
|
||||||
chosen:
|
|
||||||
- shuffle_xception
|
|
||||||
stage_3_block_1:
|
|
||||||
chosen:
|
|
||||||
- shuffle_7x7
|
|
||||||
stage_3_block_2:
|
|
||||||
chosen:
|
|
||||||
- shuffle_xception
|
|
||||||
stage_3_block_3:
|
|
||||||
chosen:
|
|
||||||
- shuffle_xception
|
|
|
@ -0,0 +1,22 @@
|
||||||
|
modules:
|
||||||
|
backbone.layers.0.0: shuffle_7x7
|
||||||
|
backbone.layers.0.1: shuffle_3x3
|
||||||
|
backbone.layers.0.2: shuffle_7x7
|
||||||
|
backbone.layers.0.3: shuffle_3x3
|
||||||
|
backbone.layers.1.0: shuffle_xception
|
||||||
|
backbone.layers.1.1: shuffle_5x5
|
||||||
|
backbone.layers.1.2: shuffle_5x5
|
||||||
|
backbone.layers.1.3: shuffle_3x3
|
||||||
|
backbone.layers.2.0: shuffle_3x3
|
||||||
|
backbone.layers.2.1: shuffle_5x5
|
||||||
|
backbone.layers.2.2: shuffle_3x3
|
||||||
|
backbone.layers.2.3: shuffle_5x5
|
||||||
|
backbone.layers.2.4: shuffle_3x3
|
||||||
|
backbone.layers.2.5: shuffle_xception
|
||||||
|
backbone.layers.2.6: shuffle_5x5
|
||||||
|
backbone.layers.2.7: shuffle_7x7
|
||||||
|
backbone.layers.3.0: shuffle_7x7
|
||||||
|
backbone.layers.3.1: shuffle_3x3
|
||||||
|
backbone.layers.3.2: shuffle_5x5
|
||||||
|
backbone.layers.3.3: shuffle_xception
|
||||||
|
channels:
|
|
@ -1,20 +0,0 @@
|
||||||
_base_ = ['./spos_supernet_mobilenet_proxyless_gpu_8xb128_in1k.py']
|
|
||||||
|
|
||||||
data = dict(
|
|
||||||
samples_per_gpu=512,
|
|
||||||
workers_per_gpu=16,
|
|
||||||
)
|
|
||||||
|
|
||||||
algorithm = dict(bn_training_mode=True)
|
|
||||||
|
|
||||||
searcher = dict(
|
|
||||||
type='EvolutionSearcher',
|
|
||||||
candidate_pool_size=50,
|
|
||||||
candidate_top_k=10,
|
|
||||||
constraints=dict(flops=465 * 1e6),
|
|
||||||
metrics='accuracy',
|
|
||||||
score_key='accuracy_top-1',
|
|
||||||
max_epoch=20,
|
|
||||||
num_mutation=25,
|
|
||||||
num_crossover=25,
|
|
||||||
mutate_prob=0.1)
|
|
|
@ -1,20 +0,0 @@
|
||||||
_base_ = ['./spos_supernet_shufflenetv2_8xb128_in1k.py']
|
|
||||||
|
|
||||||
data = dict(
|
|
||||||
samples_per_gpu=2048,
|
|
||||||
workers_per_gpu=16,
|
|
||||||
)
|
|
||||||
|
|
||||||
algorithm = dict(bn_training_mode=True)
|
|
||||||
|
|
||||||
searcher = dict(
|
|
||||||
type='EvolutionSearcher',
|
|
||||||
candidate_pool_size=50,
|
|
||||||
candidate_top_k=10,
|
|
||||||
constraints=dict(flops=330 * 1e6),
|
|
||||||
metrics='accuracy',
|
|
||||||
score_key='accuracy_top-1',
|
|
||||||
max_epoch=20,
|
|
||||||
num_mutation=25,
|
|
||||||
num_crossover=25,
|
|
||||||
mutate_prob=0.1)
|
|
|
@ -1,27 +0,0 @@
|
||||||
_base_ = [
|
|
||||||
'./spos_subnet_mobilenet_proxyless_gpu_8xb128_in1k.py',
|
|
||||||
]
|
|
||||||
|
|
||||||
img_norm_cfg = dict(mean=[0., 0., 0.], std=[1., 1., 1.], to_rgb=False)
|
|
||||||
train_pipeline = [
|
|
||||||
dict(type='LoadImageFromFile'),
|
|
||||||
dict(type='RandomResizedCrop', size=224),
|
|
||||||
dict(type='ColorJitter', brightness=0.4, contrast=0.4, saturation=0.4),
|
|
||||||
dict(type='RandomFlip', flip_prob=0.5, direction='horizontal'),
|
|
||||||
dict(type='Normalize', **img_norm_cfg),
|
|
||||||
dict(type='ImageToTensor', keys=['img']),
|
|
||||||
dict(type='ToTensor', keys=['gt_label']),
|
|
||||||
dict(type='Collect', keys=['img', 'gt_label'])
|
|
||||||
]
|
|
||||||
test_pipeline = [
|
|
||||||
dict(type='LoadImageFromFile'),
|
|
||||||
dict(type='Resize', size=(256, -1)),
|
|
||||||
dict(type='CenterCrop', crop_size=224),
|
|
||||||
dict(type='Normalize', **img_norm_cfg),
|
|
||||||
dict(type='ImageToTensor', keys=['img']),
|
|
||||||
dict(type='Collect', keys=['img'])
|
|
||||||
]
|
|
||||||
data = dict(
|
|
||||||
train=dict(pipeline=train_pipeline),
|
|
||||||
val=dict(pipeline=test_pipeline),
|
|
||||||
test=dict(pipeline=test_pipeline))
|
|
|
@ -1,13 +0,0 @@
|
||||||
_base_ = [
|
|
||||||
'./spos_supernet_mobilenet_proxyless_gpu_8xb128_in1k.py',
|
|
||||||
]
|
|
||||||
|
|
||||||
# FIXME: you may replace this with the mutable_cfg searched by yourself
|
|
||||||
mutable_cfg = 'https://download.openmmlab.com/mmrazor/v0.1/nas/spos/spos_mobilenet_subnet/spos_angelnas_flops_0.49G_acc_75.98_20220307-54f4698f_mutable_cfg.yaml' # noqa: E501
|
|
||||||
|
|
||||||
algorithm = dict(retraining=True, mutable_cfg=mutable_cfg)
|
|
||||||
evaluation = dict(interval=10000, metric='accuracy')
|
|
||||||
checkpoint_config = dict(interval=30000)
|
|
||||||
|
|
||||||
runner = dict(max_iters=300000)
|
|
||||||
find_unused_parameters = False
|
|
|
@ -0,0 +1,8 @@
|
||||||
|
_base_ = ['./spos_supernet_mobilenet_proxyless_gpu_8xb128_in1k_2.0.py']
|
||||||
|
|
||||||
|
# FIXME: you may replace this with the mutable_cfg searched by yourself
|
||||||
|
fix_subnet = 'configs/nas/spos/AngleNAS_SHUFFLENETV2_IN1k_2.0.yaml' # noqa: E501
|
||||||
|
|
||||||
|
model = dict(fix_subnet=fix_subnet)
|
||||||
|
|
||||||
|
find_unused_parameters = False
|
|
@ -1,11 +0,0 @@
|
||||||
_base_ = [
|
|
||||||
'./spos_supernet_shufflenetv2_8xb128_in1k.py',
|
|
||||||
]
|
|
||||||
|
|
||||||
# FIXME: you may replace this with the mutable_cfg searched by yourself
|
|
||||||
mutable_cfg = 'https://download.openmmlab.com/mmrazor/v0.1/nas/spos/spos_shufflenetv2_subnet_8xb128_in1k/spos_shufflenetv2_subnet_8xb128_in1k_flops_0.33M_acc_73.87_20211222-454627be_mutable_cfg.yaml' # noqa: E501
|
|
||||||
|
|
||||||
algorithm = dict(retraining=True, mutable_cfg=mutable_cfg)
|
|
||||||
|
|
||||||
runner = dict(max_iters=300000)
|
|
||||||
find_unused_parameters = False
|
|
|
@ -0,0 +1,9 @@
|
||||||
|
_base_ = ['./spos_supernet_shufflenetv2_8xb128_in1k_2.0_example.py']
|
||||||
|
|
||||||
|
# FIXME: you may replace this with the mutable_cfg searched by yourself
|
||||||
|
# fix_subnet = 'configs/nas/spos/SPOS_SHUFFLENETV2_330M_IN1k_PAPER_2.0.yaml' # noqa: E501
|
||||||
|
fix_subnet = 'configs/nas/detnas/DetNAS_SPOS_SHUFFLENETV2_330M_IN1k_PAPER_2.0.yaml' # noqa: E501
|
||||||
|
|
||||||
|
model = dict(fix_subnet=fix_subnet)
|
||||||
|
|
||||||
|
find_unused_parameters = False
|
|
@ -1,101 +0,0 @@
|
||||||
_base_ = [
|
|
||||||
'../../_base_/datasets/mmcls/imagenet_bs128_colorjittor.py',
|
|
||||||
'../../_base_/schedules/mmcls/imagenet_bs1024_spos.py',
|
|
||||||
'../../_base_/mmcls_runtime.py'
|
|
||||||
]
|
|
||||||
norm_cfg = dict(type='BN')
|
|
||||||
model = dict(
|
|
||||||
type='mmcls.ImageClassifier',
|
|
||||||
backbone=dict(
|
|
||||||
type='SearchableMobileNet',
|
|
||||||
first_channels=40,
|
|
||||||
last_channels=1728,
|
|
||||||
widen_factor=1.0,
|
|
||||||
norm_cfg=norm_cfg,
|
|
||||||
arch_setting_type='proxyless_gpu'),
|
|
||||||
neck=dict(type='GlobalAveragePooling'),
|
|
||||||
head=dict(
|
|
||||||
type='LinearClsHead',
|
|
||||||
num_classes=1000,
|
|
||||||
in_channels=1728,
|
|
||||||
loss=dict(
|
|
||||||
type='LabelSmoothLoss',
|
|
||||||
num_classes=1000,
|
|
||||||
label_smooth_val=0.1,
|
|
||||||
mode='original',
|
|
||||||
loss_weight=1.0),
|
|
||||||
topk=(1, 5),
|
|
||||||
),
|
|
||||||
)
|
|
||||||
|
|
||||||
mutator = dict(
|
|
||||||
type='OneShotModuleMutator',
|
|
||||||
placeholder_mapping=dict(
|
|
||||||
searchable_blocks=dict(
|
|
||||||
type='OneShotMutableOP',
|
|
||||||
choices=dict(
|
|
||||||
mb_k3e3=dict(
|
|
||||||
type='MBBlock',
|
|
||||||
kernel_size=3,
|
|
||||||
expand_ratio=3,
|
|
||||||
norm_cfg=norm_cfg,
|
|
||||||
act_cfg=dict(type='ReLU6')),
|
|
||||||
mb_k5e3=dict(
|
|
||||||
type='MBBlock',
|
|
||||||
kernel_size=5,
|
|
||||||
expand_ratio=3,
|
|
||||||
norm_cfg=norm_cfg,
|
|
||||||
act_cfg=dict(type='ReLU6')),
|
|
||||||
mb_k7e3=dict(
|
|
||||||
type='MBBlock',
|
|
||||||
kernel_size=7,
|
|
||||||
expand_ratio=3,
|
|
||||||
norm_cfg=norm_cfg,
|
|
||||||
act_cfg=dict(type='ReLU6')),
|
|
||||||
mb_k3e6=dict(
|
|
||||||
type='MBBlock',
|
|
||||||
kernel_size=3,
|
|
||||||
expand_ratio=6,
|
|
||||||
norm_cfg=norm_cfg,
|
|
||||||
act_cfg=dict(type='ReLU6')),
|
|
||||||
mb_k5e6=dict(
|
|
||||||
type='MBBlock',
|
|
||||||
kernel_size=5,
|
|
||||||
expand_ratio=6,
|
|
||||||
norm_cfg=norm_cfg,
|
|
||||||
act_cfg=dict(type='ReLU6')),
|
|
||||||
mb_k7e6=dict(
|
|
||||||
type='MBBlock',
|
|
||||||
kernel_size=7,
|
|
||||||
expand_ratio=6,
|
|
||||||
norm_cfg=norm_cfg,
|
|
||||||
act_cfg=dict(type='ReLU6')),
|
|
||||||
identity=dict(type='Identity'))),
|
|
||||||
first_blocks=dict(
|
|
||||||
type='OneShotMutableOP',
|
|
||||||
choices=dict(
|
|
||||||
mb_k3e1=dict(
|
|
||||||
type='MBBlock',
|
|
||||||
kernel_size=3,
|
|
||||||
expand_ratio=1,
|
|
||||||
norm_cfg=norm_cfg,
|
|
||||||
act_cfg=dict(type='ReLU6')), ))))
|
|
||||||
|
|
||||||
algorithm = dict(
|
|
||||||
type='SPOS',
|
|
||||||
architecture=dict(
|
|
||||||
type='MMClsArchitecture',
|
|
||||||
model=model,
|
|
||||||
),
|
|
||||||
mutator=mutator,
|
|
||||||
distiller=None,
|
|
||||||
retraining=False,
|
|
||||||
)
|
|
||||||
|
|
||||||
runner = dict(max_iters=150000)
|
|
||||||
evaluation = dict(interval=10000, metric='accuracy')
|
|
||||||
|
|
||||||
# checkpoint saving
|
|
||||||
checkpoint_config = dict(interval=30000)
|
|
||||||
|
|
||||||
find_unused_parameters = True
|
|
|
@ -0,0 +1,245 @@
|
||||||
|
# dataset settings
|
||||||
|
dataset_type = 'ImageNet'
|
||||||
|
preprocess_cfg = dict(
|
||||||
|
# RGB format normalization parameters
|
||||||
|
mean=[0., 0., 0.],
|
||||||
|
std=[1., 1., 1.],
|
||||||
|
# convert image from BGR to RGB
|
||||||
|
to_rgb=False,
|
||||||
|
)
|
||||||
|
|
||||||
|
file_client_args = dict(
|
||||||
|
backend='petrel',
|
||||||
|
path_mapping=dict({
|
||||||
|
'./data/imagenet':
|
||||||
|
'sproject:s3://openmmlab/datasets/classification/imagenet',
|
||||||
|
'data/imagenet':
|
||||||
|
'sproject:s3://openmmlab/datasets/classification/imagenet'
|
||||||
|
}))
|
||||||
|
|
||||||
|
train_pipeline = [
|
||||||
|
dict(type='LoadImageFromFile', file_client_args=file_client_args),
|
||||||
|
dict(type='RandomResizedCrop', scale=224),
|
||||||
|
dict(type='ColorJitter', brightness=0.4, contrast=0.4, saturation=0.4),
|
||||||
|
dict(type='RandomFlip', prob=0.5, direction='horizontal'),
|
||||||
|
dict(type='PackClsInputs'),
|
||||||
|
]
|
||||||
|
|
||||||
|
test_pipeline = [
|
||||||
|
dict(type='LoadImageFromFile', file_client_args=file_client_args),
|
||||||
|
dict(
|
||||||
|
type='ResizeEdge',
|
||||||
|
scale=256,
|
||||||
|
edge='short',
|
||||||
|
backend='pillow',
|
||||||
|
interpolation='bicubic'),
|
||||||
|
dict(type='CenterCrop', crop_size=224),
|
||||||
|
dict(type='PackClsInputs'),
|
||||||
|
]
|
||||||
|
|
||||||
|
train_dataloader = dict(
|
||||||
|
batch_size=128,
|
||||||
|
num_workers=8,
|
||||||
|
dataset=dict(
|
||||||
|
type=dataset_type,
|
||||||
|
data_root='/mnt/cache/share/images',
|
||||||
|
ann_file='meta/train.txt',
|
||||||
|
data_prefix='train',
|
||||||
|
pipeline=train_pipeline),
|
||||||
|
sampler=dict(type='DefaultSampler', shuffle=True),
|
||||||
|
persistent_workers=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
# /mnt/lustre/share_data/wangjiaqi/data/imagenet',
|
||||||
|
|
||||||
|
val_dataloader = dict(
|
||||||
|
batch_size=128,
|
||||||
|
num_workers=8,
|
||||||
|
dataset=dict(
|
||||||
|
type=dataset_type,
|
||||||
|
data_root='/mnt/cache/share/images',
|
||||||
|
ann_file='meta/val.txt',
|
||||||
|
data_prefix='val',
|
||||||
|
pipeline=test_pipeline),
|
||||||
|
sampler=dict(type='DefaultSampler', shuffle=False),
|
||||||
|
persistent_workers=True,
|
||||||
|
)
|
||||||
|
val_evaluator = dict(type='Accuracy', topk=(1, 5))
|
||||||
|
|
||||||
|
# If you want standard test, please manually configure the test dataset
|
||||||
|
test_dataloader = val_dataloader
|
||||||
|
test_evaluator = val_evaluator
|
||||||
|
|
||||||
|
# scheduler
|
||||||
|
|
||||||
|
# optimizer
|
||||||
|
optim_wrapper = dict(
|
||||||
|
optimizer=dict(type='SGD', lr=0.5, momentum=0.9, weight_decay=4e-5),
|
||||||
|
clip_grad=None)
|
||||||
|
|
||||||
|
# leanring policy
|
||||||
|
param_scheduler = [
|
||||||
|
dict(type='PolyLR', power=1.0, eta_min=0.0, by_epoch=False, end=300000),
|
||||||
|
]
|
||||||
|
|
||||||
|
# train, val, test setting
|
||||||
|
train_cfg = dict(by_epoch=False, max_iters=300000)
|
||||||
|
val_cfg = dict()
|
||||||
|
test_cfg = dict()
|
||||||
|
|
||||||
|
# runtime
|
||||||
|
|
||||||
|
# defaults to use registries in mmrazor
|
||||||
|
default_scope = 'mmcls'
|
||||||
|
|
||||||
|
log_processor = dict(
|
||||||
|
window_size=100,
|
||||||
|
by_epoch=False,
|
||||||
|
custom_cfg=[
|
||||||
|
dict(
|
||||||
|
data_src='loss',
|
||||||
|
log_name='loss_large_window',
|
||||||
|
method_name='mean',
|
||||||
|
window_size=100)
|
||||||
|
])
|
||||||
|
|
||||||
|
# configure default hooks
|
||||||
|
default_hooks = dict(
|
||||||
|
timer=dict(type='IterTimerHook'),
|
||||||
|
logger=dict(type='LoggerHook', interval=100),
|
||||||
|
param_scheduler=dict(type='ParamSchedulerHook'),
|
||||||
|
checkpoint=dict(
|
||||||
|
type='CheckpointHook',
|
||||||
|
by_epoch=False,
|
||||||
|
interval=10000,
|
||||||
|
save_last=True,
|
||||||
|
max_keep_ckpts=3),
|
||||||
|
sampler_seed=dict(type='DistSamplerSeedHook'),
|
||||||
|
visualization=dict(type='VisualizationHook', enable=False),
|
||||||
|
)
|
||||||
|
|
||||||
|
# configure environment
|
||||||
|
env_cfg = dict(
|
||||||
|
cudnn_benchmark=False,
|
||||||
|
mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0),
|
||||||
|
dist_cfg=dict(backend='nccl'),
|
||||||
|
)
|
||||||
|
|
||||||
|
# set visualizer
|
||||||
|
visualizer = None
|
||||||
|
# dict(type='ClsVisualizer', vis_backends=vis_backends, name='visualizer')
|
||||||
|
# vis_backends = [dict(type='LocalVisBackend')]
|
||||||
|
|
||||||
|
# set log level
|
||||||
|
log_level = 'INFO'
|
||||||
|
|
||||||
|
# load from which checkpoint
|
||||||
|
load_from = None
|
||||||
|
|
||||||
|
# whether to resume training from the loaded checkpoint
|
||||||
|
resume = False
|
||||||
|
|
||||||
|
# model
|
||||||
|
norm_cfg = dict(type='BN')
|
||||||
|
_STAGE_MUTABLE = dict(
|
||||||
|
_scope_='mmrazor',
|
||||||
|
type='OneShotMutableOP',
|
||||||
|
candidates=dict(
|
||||||
|
mb_k3e3=dict(
|
||||||
|
type='MBBlock',
|
||||||
|
kernel_size=3,
|
||||||
|
expand_ratio=3,
|
||||||
|
norm_cfg=norm_cfg,
|
||||||
|
act_cfg=dict(type='ReLU6')),
|
||||||
|
mb_k5e3=dict(
|
||||||
|
type='MBBlock',
|
||||||
|
kernel_size=5,
|
||||||
|
expand_ratio=3,
|
||||||
|
norm_cfg=norm_cfg,
|
||||||
|
act_cfg=dict(type='ReLU6')),
|
||||||
|
mb_k7e3=dict(
|
||||||
|
type='MBBlock',
|
||||||
|
kernel_size=7,
|
||||||
|
expand_ratio=3,
|
||||||
|
norm_cfg=norm_cfg,
|
||||||
|
act_cfg=dict(type='ReLU6')),
|
||||||
|
mb_k3e6=dict(
|
||||||
|
type='MBBlock',
|
||||||
|
kernel_size=3,
|
||||||
|
expand_ratio=6,
|
||||||
|
norm_cfg=norm_cfg,
|
||||||
|
act_cfg=dict(type='ReLU6')),
|
||||||
|
mb_k5e6=dict(
|
||||||
|
type='MBBlock',
|
||||||
|
kernel_size=5,
|
||||||
|
expand_ratio=6,
|
||||||
|
norm_cfg=norm_cfg,
|
||||||
|
act_cfg=dict(type='ReLU6')),
|
||||||
|
mb_k7e6=dict(
|
||||||
|
type='MBBlock',
|
||||||
|
kernel_size=7,
|
||||||
|
expand_ratio=6,
|
||||||
|
norm_cfg=norm_cfg,
|
||||||
|
act_cfg=dict(type='ReLU6')),
|
||||||
|
identity=dict(type='Identity'),
|
||||||
|
))
|
||||||
|
|
||||||
|
_FIRST_MUTABLE = dict(
|
||||||
|
_scope_='mmrazor',
|
||||||
|
type='OneShotMutableOP',
|
||||||
|
candidates=dict(
|
||||||
|
mb_k3e1=dict(
|
||||||
|
type='MBBlock',
|
||||||
|
kernel_size=3,
|
||||||
|
expand_ratio=1,
|
||||||
|
norm_cfg=norm_cfg,
|
||||||
|
act_cfg=dict(type='ReLU6')), ))
|
||||||
|
|
||||||
|
arch_setting = [
|
||||||
|
# Parameters to build layers. 3 parameters are needed to construct a
|
||||||
|
# layer, from left to right: channel, num_blocks, mutable_cfg.
|
||||||
|
[24, 1, 1, _FIRST_MUTABLE],
|
||||||
|
[32, 4, 2, _STAGE_MUTABLE],
|
||||||
|
[56, 4, 2, _STAGE_MUTABLE],
|
||||||
|
[112, 4, 2, _STAGE_MUTABLE],
|
||||||
|
[128, 4, 1, _STAGE_MUTABLE],
|
||||||
|
[256, 4, 2, _STAGE_MUTABLE],
|
||||||
|
[432, 1, 1, _STAGE_MUTABLE]
|
||||||
|
]
|
||||||
|
|
||||||
|
norm_cfg = dict(type='BN')
|
||||||
|
supernet = dict(
|
||||||
|
type='ImageClassifier',
|
||||||
|
data_preprocessor=preprocess_cfg,
|
||||||
|
backbone=dict(
|
||||||
|
_scope_='mmrazor',
|
||||||
|
type='SearchableMobileNet',
|
||||||
|
first_channels=40,
|
||||||
|
last_channels=1728,
|
||||||
|
widen_factor=1.0,
|
||||||
|
norm_cfg=norm_cfg,
|
||||||
|
arch_setting=arch_setting),
|
||||||
|
neck=dict(type='GlobalAveragePooling'),
|
||||||
|
head=dict(
|
||||||
|
type='LinearClsHead',
|
||||||
|
num_classes=1000,
|
||||||
|
in_channels=1728,
|
||||||
|
loss=dict(
|
||||||
|
type='LabelSmoothLoss',
|
||||||
|
num_classes=1000,
|
||||||
|
label_smooth_val=0.1,
|
||||||
|
mode='original',
|
||||||
|
loss_weight=1.0),
|
||||||
|
topk=(1, 5),
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
mutator = dict(type='mmrazor.OneShotModuleMutator')
|
||||||
|
|
||||||
|
model = dict(
|
||||||
|
type='mmrazor.SPOS',
|
||||||
|
architecture=supernet,
|
||||||
|
mutator=mutator,
|
||||||
|
)
|
||||||
|
|
||||||
|
find_unused_parameters = True
|
|
@ -1,59 +0,0 @@
|
||||||
_base_ = [
|
|
||||||
'../../_base_/datasets/mmcls/imagenet_bs128_colorjittor.py',
|
|
||||||
'../../_base_/schedules/mmcls/imagenet_bs1024_spos.py',
|
|
||||||
'../../_base_/mmcls_runtime.py'
|
|
||||||
]
|
|
||||||
norm_cfg = dict(type='BN')
|
|
||||||
model = dict(
|
|
||||||
type='mmcls.ImageClassifier',
|
|
||||||
backbone=dict(
|
|
||||||
type='SearchableShuffleNetV2', widen_factor=1.0, norm_cfg=norm_cfg),
|
|
||||||
neck=dict(type='GlobalAveragePooling'),
|
|
||||||
head=dict(
|
|
||||||
type='LinearClsHead',
|
|
||||||
num_classes=1000,
|
|
||||||
in_channels=1024,
|
|
||||||
loss=dict(
|
|
||||||
type='LabelSmoothLoss',
|
|
||||||
num_classes=1000,
|
|
||||||
label_smooth_val=0.1,
|
|
||||||
mode='original',
|
|
||||||
loss_weight=1.0),
|
|
||||||
topk=(1, 5),
|
|
||||||
),
|
|
||||||
)
|
|
||||||
|
|
||||||
mutator = dict(
|
|
||||||
type='OneShotModuleMutator',
|
|
||||||
placeholder_mapping=dict(
|
|
||||||
all_blocks=dict(
|
|
||||||
type='OneShotMutableOP',
|
|
||||||
choices=dict(
|
|
||||||
shuffle_3x3=dict(
|
|
||||||
type='ShuffleBlock', kernel_size=3, norm_cfg=norm_cfg),
|
|
||||||
shuffle_5x5=dict(
|
|
||||||
type='ShuffleBlock', kernel_size=5, norm_cfg=norm_cfg),
|
|
||||||
shuffle_7x7=dict(
|
|
||||||
type='ShuffleBlock', kernel_size=7, norm_cfg=norm_cfg),
|
|
||||||
shuffle_xception=dict(
|
|
||||||
type='ShuffleXception', norm_cfg=norm_cfg),
|
|
||||||
))))
|
|
||||||
|
|
||||||
algorithm = dict(
|
|
||||||
type='SPOS',
|
|
||||||
architecture=dict(
|
|
||||||
type='MMClsArchitecture',
|
|
||||||
model=model,
|
|
||||||
),
|
|
||||||
mutator=mutator,
|
|
||||||
distiller=None,
|
|
||||||
retraining=False,
|
|
||||||
)
|
|
||||||
|
|
||||||
runner = dict(max_iters=150000)
|
|
||||||
evaluation = dict(interval=1000, metric='accuracy')
|
|
||||||
|
|
||||||
# checkpoint saving
|
|
||||||
checkpoint_config = dict(interval=1000)
|
|
||||||
|
|
||||||
find_unused_parameters = True
|
|
|
@ -0,0 +1,214 @@
|
||||||
|
# dataset settings
|
||||||
|
dataset_type = 'ImageNet'
|
||||||
|
preprocess_cfg = dict(
|
||||||
|
# RGB format normalization parameters
|
||||||
|
mean=[123.675, 116.28, 103.53],
|
||||||
|
std=[58.395, 57.12, 57.375],
|
||||||
|
# convert image from BGR to RGB
|
||||||
|
to_rgb=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
file_client_args = dict(
|
||||||
|
backend='petrel',
|
||||||
|
path_mapping=dict({
|
||||||
|
'./data/imagenet':
|
||||||
|
'sproject:s3://openmmlab/datasets/classification/imagenet',
|
||||||
|
'data/imagenet':
|
||||||
|
'sproject:s3://openmmlab/datasets/classification/imagenet'
|
||||||
|
}))
|
||||||
|
|
||||||
|
train_pipeline = [
|
||||||
|
dict(type='LoadImageFromFile', file_client_args=file_client_args),
|
||||||
|
dict(type='RandomResizedCrop', scale=224),
|
||||||
|
dict(type='ColorJitter', brightness=0.4, contrast=0.4, saturation=0.4),
|
||||||
|
dict(type='RandomFlip', prob=0.5, direction='horizontal'),
|
||||||
|
dict(type='PackClsInputs'),
|
||||||
|
]
|
||||||
|
|
||||||
|
test_pipeline = [
|
||||||
|
dict(type='LoadImageFromFile', file_client_args=file_client_args),
|
||||||
|
dict(type='ResizeEdge', scale=256, edge='short', backend='cv2'),
|
||||||
|
dict(type='CenterCrop', crop_size=224),
|
||||||
|
dict(type='PackClsInputs'),
|
||||||
|
]
|
||||||
|
|
||||||
|
train_dataloader = dict(
|
||||||
|
batch_size=128,
|
||||||
|
num_workers=5,
|
||||||
|
dataset=dict(
|
||||||
|
type=dataset_type,
|
||||||
|
data_root='/mnt/cache/share/images',
|
||||||
|
ann_file='meta/train.txt',
|
||||||
|
data_prefix='train',
|
||||||
|
pipeline=train_pipeline),
|
||||||
|
sampler=dict(type='DefaultSampler', shuffle=True),
|
||||||
|
persistent_workers=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
# /mnt/lustre/share_data/wangjiaqi/data/imagenet',
|
||||||
|
|
||||||
|
val_dataloader = dict(
|
||||||
|
batch_size=128,
|
||||||
|
num_workers=5,
|
||||||
|
dataset=dict(
|
||||||
|
type=dataset_type,
|
||||||
|
data_root='/mnt/cache/share/images',
|
||||||
|
ann_file='meta/val.txt',
|
||||||
|
data_prefix='val',
|
||||||
|
pipeline=test_pipeline),
|
||||||
|
sampler=dict(type='DefaultSampler', shuffle=False),
|
||||||
|
persistent_workers=True,
|
||||||
|
)
|
||||||
|
val_evaluator = dict(type='Accuracy', topk=(1, 5))
|
||||||
|
|
||||||
|
# If you want standard test, please manually configure the test dataset
|
||||||
|
test_dataloader = val_dataloader
|
||||||
|
test_evaluator = val_evaluator
|
||||||
|
|
||||||
|
# scheduler
|
||||||
|
|
||||||
|
# optimizer
|
||||||
|
optim_wrapper = dict(
|
||||||
|
optimizer=dict(type='SGD', lr=0.5, momentum=0.9, weight_decay=4e-5),
|
||||||
|
clip_grad=None)
|
||||||
|
|
||||||
|
# leanring policy
|
||||||
|
param_scheduler = [
|
||||||
|
dict(type='PolyLR', power=1.0, eta_min=0.0, by_epoch=False, end=300000),
|
||||||
|
]
|
||||||
|
|
||||||
|
# train, val, test setting
|
||||||
|
train_cfg = dict(by_epoch=False, max_iters=300000)
|
||||||
|
val_cfg = dict()
|
||||||
|
test_cfg = dict()
|
||||||
|
|
||||||
|
# runtime
|
||||||
|
|
||||||
|
# defaults to use registries in mmrazor
|
||||||
|
default_scope = 'mmcls'
|
||||||
|
|
||||||
|
log_processor = dict(
|
||||||
|
window_size=100,
|
||||||
|
by_epoch=False,
|
||||||
|
custom_cfg=[
|
||||||
|
dict(
|
||||||
|
data_src='loss',
|
||||||
|
log_name='loss_large_window',
|
||||||
|
method_name='mean',
|
||||||
|
window_size=100)
|
||||||
|
])
|
||||||
|
|
||||||
|
# configure default hooks
|
||||||
|
default_hooks = dict(
|
||||||
|
# record the time of every iteration.
|
||||||
|
timer=dict(type='IterTimerHook'),
|
||||||
|
|
||||||
|
# print log every 100 iterations.
|
||||||
|
logger=dict(type='LoggerHook', interval=100),
|
||||||
|
|
||||||
|
# enable the parameter scheduler.
|
||||||
|
param_scheduler=dict(type='ParamSchedulerHook'),
|
||||||
|
|
||||||
|
# save checkpoint per epoch.
|
||||||
|
checkpoint=dict(
|
||||||
|
type='CheckpointHook',
|
||||||
|
by_epoch=False,
|
||||||
|
interval=10000,
|
||||||
|
save_last=True,
|
||||||
|
max_keep_ckpts=3),
|
||||||
|
|
||||||
|
# set sampler seed in distributed evrionment.
|
||||||
|
sampler_seed=dict(type='DistSamplerSeedHook'),
|
||||||
|
|
||||||
|
# validation results visualization, set True to enable it.
|
||||||
|
visualization=dict(type='VisualizationHook', enable=False),
|
||||||
|
)
|
||||||
|
|
||||||
|
# configure environment
|
||||||
|
env_cfg = dict(
|
||||||
|
# whether to enable cudnn benchmark
|
||||||
|
cudnn_benchmark=False,
|
||||||
|
|
||||||
|
# set multi process parameters
|
||||||
|
mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0),
|
||||||
|
|
||||||
|
# set distributed parameters
|
||||||
|
dist_cfg=dict(backend='nccl'),
|
||||||
|
)
|
||||||
|
|
||||||
|
# set visualizer
|
||||||
|
visualizer = None
|
||||||
|
# dict(type='ClsVisualizer', vis_backends=vis_backends, name='visualizer')
|
||||||
|
# vis_backends = [dict(type='LocalVisBackend')]
|
||||||
|
|
||||||
|
# set log level
|
||||||
|
log_level = 'INFO'
|
||||||
|
|
||||||
|
# load from which checkpoint
|
||||||
|
load_from = None
|
||||||
|
|
||||||
|
# "/mnt/lustre/dongpeijie/spos_shufflenetv2_subnet_8xb128_in1k_flops_0.33M_acc_73.87_20211222-1f0a0b4d.pth"
|
||||||
|
|
||||||
|
# whether to resume training from the loaded checkpoint
|
||||||
|
resume = False
|
||||||
|
|
||||||
|
# model
|
||||||
|
|
||||||
|
_STAGE_MUTABLE = dict(
|
||||||
|
_scope_='mmrazor',
|
||||||
|
type='OneShotMutableOP',
|
||||||
|
candidates=dict(
|
||||||
|
shuffle_3x3=dict(
|
||||||
|
type='ShuffleBlock', kernel_size=3, norm_cfg=dict(type='BN')),
|
||||||
|
shuffle_5x5=dict(
|
||||||
|
type='ShuffleBlock', kernel_size=5, norm_cfg=dict(type='BN')),
|
||||||
|
shuffle_7x7=dict(
|
||||||
|
type='ShuffleBlock', kernel_size=7, norm_cfg=dict(type='BN')),
|
||||||
|
shuffle_xception=dict(
|
||||||
|
type='ShuffleXception', norm_cfg=dict(type='BN')),
|
||||||
|
))
|
||||||
|
|
||||||
|
arch_setting = [
|
||||||
|
# Parameters to build layers. 3 parameters are needed to construct a
|
||||||
|
# layer, from left to right: channel, num_blocks, mutable_cfg.
|
||||||
|
[64, 4, _STAGE_MUTABLE],
|
||||||
|
[160, 4, _STAGE_MUTABLE],
|
||||||
|
[320, 8, _STAGE_MUTABLE],
|
||||||
|
[640, 4, _STAGE_MUTABLE],
|
||||||
|
]
|
||||||
|
|
||||||
|
norm_cfg = dict(type='BN')
|
||||||
|
supernet = dict(
|
||||||
|
type='ImageClassifier',
|
||||||
|
data_preprocessor=preprocess_cfg,
|
||||||
|
backbone=dict(
|
||||||
|
_scope_='mmrazor',
|
||||||
|
type='SearchableShuffleNetV2',
|
||||||
|
widen_factor=1.0,
|
||||||
|
norm_cfg=norm_cfg,
|
||||||
|
arch_setting=arch_setting),
|
||||||
|
neck=dict(type='GlobalAveragePooling'),
|
||||||
|
head=dict(
|
||||||
|
type='LinearClsHead',
|
||||||
|
num_classes=1000,
|
||||||
|
in_channels=1024,
|
||||||
|
loss=dict(
|
||||||
|
type='LabelSmoothLoss',
|
||||||
|
num_classes=1000,
|
||||||
|
label_smooth_val=0.1,
|
||||||
|
mode='original',
|
||||||
|
loss_weight=1.0),
|
||||||
|
topk=(1, 5),
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
mutator = dict(type='mmrazor.OneShotModuleMutator')
|
||||||
|
|
||||||
|
model = dict(
|
||||||
|
type='mmrazor.SPOS',
|
||||||
|
architecture=supernet,
|
||||||
|
mutator=mutator,
|
||||||
|
# fix_subnet='configs/nas/spos/SPOS_SHUFFLENETV2_330M_IN1k_PAPER_2.0.yaml'
|
||||||
|
)
|
||||||
|
|
||||||
|
find_unused_parameters = True
|
|
@ -0,0 +1,372 @@
|
||||||
|
from collections import OrderedDict
|
||||||
|
|
||||||
|
import torch
|
||||||
|
from mmengine.config import Config
|
||||||
|
|
||||||
|
from mmrazor.core import * # noqa: F401,F403
|
||||||
|
from mmrazor.models import * # noqa: F401,F403
|
||||||
|
from mmrazor.registry import MODELS
|
||||||
|
from mmrazor.utils import register_all_modules
|
||||||
|
|
||||||
|
|
||||||
|
def convert_spos_key(old_path, new_path):
|
||||||
|
old_dict = torch.load(old_path)
|
||||||
|
new_dict = {'meta': old_dict['meta'], 'state_dict': {}}
|
||||||
|
|
||||||
|
mapping = {
|
||||||
|
'choices': '_candidates',
|
||||||
|
'architecture.': '',
|
||||||
|
'model.': '',
|
||||||
|
}
|
||||||
|
|
||||||
|
for k, v in old_dict['state_dict'].items():
|
||||||
|
new_key = k
|
||||||
|
for _from, _to in mapping.items():
|
||||||
|
new_key = new_key.replace(_from, _to)
|
||||||
|
|
||||||
|
new_key = f'architecture.{new_key}'
|
||||||
|
|
||||||
|
new_dict['state_dict'][new_key] = v
|
||||||
|
|
||||||
|
torch.save(new_dict, new_path)
|
||||||
|
|
||||||
|
|
||||||
|
def convert_detnas_key(old_path, new_path):
|
||||||
|
old_dict = torch.load(old_path)
|
||||||
|
new_dict = {'meta': old_dict['meta'], 'state_dict': {}}
|
||||||
|
|
||||||
|
mapping = {
|
||||||
|
'choices': '_candidates',
|
||||||
|
'model.': '',
|
||||||
|
}
|
||||||
|
|
||||||
|
for k, v in old_dict['state_dict'].items():
|
||||||
|
new_key = k
|
||||||
|
for _from, _to in mapping.items():
|
||||||
|
new_key = new_key.replace(_from, _to)
|
||||||
|
|
||||||
|
new_dict['state_dict'][new_key] = v
|
||||||
|
torch.save(new_dict, new_path)
|
||||||
|
|
||||||
|
|
||||||
|
def convert_anglenas_key(old_path, new_path):
|
||||||
|
old_dict = torch.load(old_path)
|
||||||
|
new_dict = {'state_dict': {}}
|
||||||
|
|
||||||
|
mapping = {
|
||||||
|
'choices': '_candidates',
|
||||||
|
'model.': '',
|
||||||
|
'mbv2': 'mb',
|
||||||
|
}
|
||||||
|
|
||||||
|
for k, v in old_dict.items():
|
||||||
|
new_key = k
|
||||||
|
for _from, _to in mapping.items():
|
||||||
|
new_key = new_key.replace(_from, _to)
|
||||||
|
|
||||||
|
new_dict['state_dict'][new_key] = v
|
||||||
|
torch.save(new_dict, new_path)
|
||||||
|
|
||||||
|
|
||||||
|
def convert_darts_key(old_path, new_path):
|
||||||
|
old_dict = torch.load(old_path)
|
||||||
|
new_dict = {'meta': old_dict['meta'], 'state_dict': {}}
|
||||||
|
cfg = Config.fromfile(
|
||||||
|
'configs/nas/darts/darts_subnet_1xb96_cifar10_2.0.py')
|
||||||
|
# import ipdb; ipdb.set_trace()
|
||||||
|
model = MODELS.build(cfg.model)
|
||||||
|
|
||||||
|
print('============> module name')
|
||||||
|
for name, module in model.state_dict().items():
|
||||||
|
print(name)
|
||||||
|
|
||||||
|
mapping = {
|
||||||
|
'choices': '_candidates',
|
||||||
|
'model.': '',
|
||||||
|
'edges': 'route',
|
||||||
|
}
|
||||||
|
|
||||||
|
for k, v in old_dict['state_dict'].items():
|
||||||
|
new_key = k
|
||||||
|
for _from, _to in mapping.items():
|
||||||
|
new_key = new_key.replace(_from, _to)
|
||||||
|
# cells.0.nodes.0.edges.choices.normal_n2_p1.0.choices.sep_conv_3x3.conv1.2.weight
|
||||||
|
splited_list = new_key.split('.')
|
||||||
|
if len(splited_list) > 10 and splited_list[-6] == '0':
|
||||||
|
del splited_list[-6]
|
||||||
|
new_key = '.'.join(splited_list)
|
||||||
|
elif len(splited_list) > 10 and splited_list[-5] == '0':
|
||||||
|
del splited_list[-5]
|
||||||
|
new_key = '.'.join(splited_list)
|
||||||
|
|
||||||
|
new_dict['state_dict'][new_key] = v
|
||||||
|
|
||||||
|
print('============> new dict')
|
||||||
|
for key, v in new_dict['state_dict'].items():
|
||||||
|
print(key)
|
||||||
|
|
||||||
|
model.load_state_dict(new_dict['state_dict'], strict=True)
|
||||||
|
|
||||||
|
torch.save(new_dict, new_path)
|
||||||
|
|
||||||
|
|
||||||
|
def convert_cream_key(old_path, new_path):
|
||||||
|
|
||||||
|
old_dict = torch.load(old_path, map_location=torch.device('cpu'))
|
||||||
|
new_dict = {'state_dict': {}} # noqa: F841
|
||||||
|
|
||||||
|
ordered_old_dict = OrderedDict(old_dict['state_dict'])
|
||||||
|
|
||||||
|
cfg = Config.fromfile('configs/nas/cream/cream_14_subnet_mobilenet.py')
|
||||||
|
model = MODELS.build(cfg.model)
|
||||||
|
|
||||||
|
model_name_list = []
|
||||||
|
model_module_list = []
|
||||||
|
|
||||||
|
# TODO show structure of model and checkpoint
|
||||||
|
print('=' * 30, 'the key of model')
|
||||||
|
for k, v in model.state_dict().items():
|
||||||
|
print(k)
|
||||||
|
|
||||||
|
print('=' * 30, 'the key of ckpt')
|
||||||
|
for k, v in ordered_old_dict.items():
|
||||||
|
print(k)
|
||||||
|
|
||||||
|
# final mapping dict
|
||||||
|
mapping = {}
|
||||||
|
|
||||||
|
middle_razor2cream = { # noqa: F841
|
||||||
|
# point-wise expansion
|
||||||
|
'expand_conv.conv.weight': 'conv_pw.weight',
|
||||||
|
'expand_conv.bn.weight': 'bn1.weight',
|
||||||
|
'expand_conv.bn.bias': 'bn1.bias',
|
||||||
|
'expand_conv.bn.running_mean': 'bn1.running_mean',
|
||||||
|
'expand_conv.bn.running_var': 'bn1.running_var',
|
||||||
|
'expand_conv.bn.num_batches_tracked': 'bn1.num_batches_tracked',
|
||||||
|
|
||||||
|
# se
|
||||||
|
'se.conv1.conv.weight': 'se.conv_reduce.weight',
|
||||||
|
'se.conv1.conv.bias': 'se.conv_reduce.bias',
|
||||||
|
'se.conv2.conv.weight': 'se.conv_expand.weight',
|
||||||
|
'se.conv2.conv.bias': 'se.conv_expand.bias',
|
||||||
|
|
||||||
|
# depth-wise conv
|
||||||
|
'depthwise_conv.conv.weight': 'conv_dw.weight',
|
||||||
|
'depthwise_conv.bn.weight': 'bn2.weight',
|
||||||
|
'depthwise_conv.bn.bias': 'bn2.bias',
|
||||||
|
'depthwise_conv.bn.running_mean': 'bn2.running_mean',
|
||||||
|
'depthwise_conv.bn.running_var': 'bn2.running_var',
|
||||||
|
'depthwise_conv.bn.num_batches_tracked': 'bn2.num_batches_tracked',
|
||||||
|
|
||||||
|
# point-wise linear projection
|
||||||
|
'linear_conv.conv.weight': 'conv_pwl.weight',
|
||||||
|
'linear_conv.bn.weight': 'bn3.weight',
|
||||||
|
'linear_conv.bn.bias': 'bn3.bias',
|
||||||
|
'linear_conv.bn.running_mean': 'bn3.running_mean',
|
||||||
|
'linear_conv.bn.running_var': 'bn3.running_var',
|
||||||
|
'linear_conv.bn.num_batches_tracked': 'bn3.num_batches_tracked',
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
first_razor2cream = {
|
||||||
|
# for first depthsepconv dw
|
||||||
|
'conv_dw.conv.weight': 'conv_dw.weight',
|
||||||
|
'conv_dw.bn.weight': 'bn1.weight',
|
||||||
|
'conv_dw.bn.bias': 'bn1.bias',
|
||||||
|
'conv_dw.bn.running_mean': 'bn1.running_mean',
|
||||||
|
'conv_dw.bn.running_var': 'bn1.running_var',
|
||||||
|
'conv_dw.bn.num_batches_tracked': 'bn1.num_batches_tracked',
|
||||||
|
|
||||||
|
# for first depthsepconv pw
|
||||||
|
'conv_pw.conv.weight': 'conv_pw.weight',
|
||||||
|
'conv_pw.bn.weight': 'bn2.weight',
|
||||||
|
'conv_pw.bn.bias': 'bn2.bias',
|
||||||
|
'conv_pw.bn.running_mean': 'bn2.running_mean',
|
||||||
|
'conv_pw.bn.running_var': 'bn2.running_var',
|
||||||
|
'conv_pw.bn.num_batches_tracked': 'bn2.num_batches_tracked',
|
||||||
|
|
||||||
|
# se
|
||||||
|
'se.conv1.conv.weight': 'se.conv_reduce.weight',
|
||||||
|
'se.conv1.conv.bias': 'se.conv_reduce.bias',
|
||||||
|
'se.conv2.conv.weight': 'se.conv_expand.weight',
|
||||||
|
'se.conv2.conv.bias': 'se.conv_expand.bias',
|
||||||
|
}
|
||||||
|
|
||||||
|
last_razor2cream = {
|
||||||
|
# for last convbnact
|
||||||
|
'conv2.conv.weight': 'conv.weight',
|
||||||
|
'conv2.bn.weight': 'bn1.weight',
|
||||||
|
'conv2.bn.bias': 'bn1.bias',
|
||||||
|
'conv2.bn.running_mean': 'bn1.running_mean',
|
||||||
|
'conv2.bn.running_var': 'bn1.running_var',
|
||||||
|
'conv2.bn.num_batches_tracked': 'bn1.num_batches_tracked',
|
||||||
|
}
|
||||||
|
|
||||||
|
middle_cream2razor = {v: k for k, v in middle_razor2cream.items()}
|
||||||
|
first_cream2razor = {v: k for k, v in first_razor2cream.items()}
|
||||||
|
last_cream2razor = {v: k for k, v in last_razor2cream.items()}
|
||||||
|
|
||||||
|
# 1. group the razor's module names
|
||||||
|
grouped_razor_module_name = {
|
||||||
|
'middle': {},
|
||||||
|
'first': [],
|
||||||
|
'last': [],
|
||||||
|
}
|
||||||
|
|
||||||
|
for name, module in model.state_dict().items():
|
||||||
|
tmp_name: str = name.split(
|
||||||
|
'backbone.')[1] if 'backbone' in name else name
|
||||||
|
model_name_list.append(tmp_name)
|
||||||
|
model_module_list.append(module)
|
||||||
|
|
||||||
|
if 'conv1' in tmp_name and len(tmp_name) <= 35:
|
||||||
|
# belong to stem conv
|
||||||
|
grouped_razor_module_name['first'].append(name)
|
||||||
|
elif 'head' in tmp_name:
|
||||||
|
# belong to last linear
|
||||||
|
grouped_razor_module_name['last'].append(name)
|
||||||
|
else:
|
||||||
|
# middle
|
||||||
|
if tmp_name.startswith('layer'):
|
||||||
|
key_of_middle = tmp_name[5:8]
|
||||||
|
if key_of_middle not in grouped_razor_module_name['middle']:
|
||||||
|
grouped_razor_module_name['middle'][key_of_middle] = [name]
|
||||||
|
else:
|
||||||
|
grouped_razor_module_name['middle'][key_of_middle].append(
|
||||||
|
name)
|
||||||
|
elif tmp_name.startswith('conv2'):
|
||||||
|
key_of_middle = '7.0'
|
||||||
|
if key_of_middle not in grouped_razor_module_name['middle']:
|
||||||
|
grouped_razor_module_name['middle'][key_of_middle] = [name]
|
||||||
|
else:
|
||||||
|
grouped_razor_module_name['middle'][key_of_middle].append(
|
||||||
|
name)
|
||||||
|
|
||||||
|
# 2. group the cream's module names
|
||||||
|
grouped_cream_module_name = {
|
||||||
|
'middle': {},
|
||||||
|
'first': [],
|
||||||
|
'last': [],
|
||||||
|
}
|
||||||
|
|
||||||
|
for k in ordered_old_dict.keys():
|
||||||
|
if 'classifier' in k or 'conv_head' in k:
|
||||||
|
# last conv
|
||||||
|
grouped_cream_module_name['last'].append(k)
|
||||||
|
elif 'blocks' in k:
|
||||||
|
# middle blocks
|
||||||
|
key_of_middle = k[7:10]
|
||||||
|
if key_of_middle not in grouped_cream_module_name['middle']:
|
||||||
|
grouped_cream_module_name['middle'][key_of_middle] = [k]
|
||||||
|
else:
|
||||||
|
grouped_cream_module_name['middle'][key_of_middle].append(k)
|
||||||
|
else:
|
||||||
|
# first blocks
|
||||||
|
grouped_cream_module_name['first'].append(k)
|
||||||
|
|
||||||
|
# 4. process the first modules
|
||||||
|
for cream_item in grouped_cream_module_name['first']:
|
||||||
|
if 'conv_stem' in cream_item:
|
||||||
|
# get corresponding item from razor
|
||||||
|
for razor_item in grouped_razor_module_name['first']:
|
||||||
|
if 'conv.weight' in razor_item:
|
||||||
|
mapping[cream_item] = razor_item
|
||||||
|
grouped_razor_module_name['first'].remove(razor_item)
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
kws = cream_item.split('.')[-1]
|
||||||
|
# get corresponding item from razor
|
||||||
|
for razor_item in grouped_razor_module_name['first']:
|
||||||
|
if kws in razor_item:
|
||||||
|
mapping[cream_item] = razor_item
|
||||||
|
grouped_razor_module_name['first'].remove(razor_item)
|
||||||
|
|
||||||
|
# 5. process the last modules
|
||||||
|
for cream_item in grouped_cream_module_name['last']:
|
||||||
|
if 'classifier' in cream_item:
|
||||||
|
kws = cream_item.split('.')[-1]
|
||||||
|
for razor_item in grouped_razor_module_name['last']:
|
||||||
|
if 'fc' in razor_item:
|
||||||
|
if kws in razor_item:
|
||||||
|
mapping[cream_item] = razor_item
|
||||||
|
grouped_razor_module_name['last'].remove(razor_item)
|
||||||
|
break
|
||||||
|
|
||||||
|
elif 'conv_head' in cream_item:
|
||||||
|
kws = cream_item.split('.')[-1]
|
||||||
|
for razor_item in grouped_razor_module_name['last']:
|
||||||
|
if 'head.conv2' in razor_item:
|
||||||
|
if kws in razor_item:
|
||||||
|
mapping[cream_item] = razor_item
|
||||||
|
grouped_razor_module_name['last'].remove(razor_item)
|
||||||
|
|
||||||
|
# 6. process the middle modules
|
||||||
|
for cream_group_id, cream_items in grouped_cream_module_name[
|
||||||
|
'middle'].items():
|
||||||
|
# get the corresponding group from razor
|
||||||
|
razor_group_id: str = str(float(cream_group_id) + 1)
|
||||||
|
razor_items: list = grouped_razor_module_name['middle'][razor_group_id]
|
||||||
|
|
||||||
|
if int(razor_group_id[0]) == 1:
|
||||||
|
key_cream2razor = first_cream2razor
|
||||||
|
elif int(razor_group_id[0]) == 7:
|
||||||
|
key_cream2razor = last_cream2razor
|
||||||
|
else:
|
||||||
|
key_cream2razor = middle_cream2razor
|
||||||
|
|
||||||
|
# matching razor items and cream items
|
||||||
|
for cream_item in cream_items:
|
||||||
|
# traverse all of key_cream2razor
|
||||||
|
for cream_match, razor_match in key_cream2razor.items():
|
||||||
|
if cream_match in cream_item:
|
||||||
|
# traverse razor_items to get the corresponding razor name
|
||||||
|
for razor_item in razor_items:
|
||||||
|
if razor_match in razor_item:
|
||||||
|
mapping[cream_item] = razor_item
|
||||||
|
break
|
||||||
|
|
||||||
|
print('=' * 100)
|
||||||
|
print('length of mapping: ', len(mapping.keys()))
|
||||||
|
for k, v in mapping.items():
|
||||||
|
print(k, '\t=>\t', v)
|
||||||
|
print('#' * 100)
|
||||||
|
|
||||||
|
# TODO DELETE this print
|
||||||
|
print('**' * 20)
|
||||||
|
for c, cm, r, rm in zip(ordered_old_dict.keys(), ordered_old_dict.values(),
|
||||||
|
model_name_list, model_module_list):
|
||||||
|
print(f'{c}: shape {cm.shape} => {r}: shape {rm.shape}')
|
||||||
|
print('**' * 20)
|
||||||
|
|
||||||
|
for k, v in ordered_old_dict.items():
|
||||||
|
print(f'Mapping from {k} to {mapping[k]}......')
|
||||||
|
new_dict['state_dict'][mapping[k]] = v
|
||||||
|
|
||||||
|
model.load_state_dict(new_dict['state_dict'], strict=True)
|
||||||
|
|
||||||
|
torch.save(new_dict, new_path)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
register_all_modules(True)
|
||||||
|
# old_path = '/mnt/lustre/dongpeijie/detnas_subnet_shufflenetv2_8xb128_in1k_acc-74.08_20211223-92e9b66a.pth' # noqa: E501
|
||||||
|
# new_path = '/mnt/lustre/dongpeijie/detnas_subnet_shufflenetv2_8xb128_in1k_acc-74.08_20211223-92e9b66a_2.0.pth' # noqa: E501
|
||||||
|
# convert_spos_key(old_path, new_path)
|
||||||
|
|
||||||
|
# old_path = '/mnt/lustre/dongpeijie/detnas_subnet_frcnn_shufflenetv2_fpn_1x_coco_bbox_backbone_flops-0.34M_mAP-37.5_20211222-67fea61f.pth' # noqa: E501
|
||||||
|
# new_path = '/mnt/lustre/dongpeijie/detnas_subnet_frcnn_shufflenetv2_fpn_1x_coco_bbox_backbone_flops-0.34M_mAP-37.5_20211222-67fea61f_2.0.pth' # noqa: E501
|
||||||
|
# convert_detnas_key(old_path, new_path)
|
||||||
|
|
||||||
|
# old_path = './data/14.pth.tar'
|
||||||
|
# new_path = './data/14_2.0.pth'
|
||||||
|
# old_path = '/mnt/lustre/dongpeijie/14.pth.tar'
|
||||||
|
# new_path = '/mnt/lustre/dongpeijie/14_2.0.pth'
|
||||||
|
# convert_cream_key(old_path, new_path)
|
||||||
|
|
||||||
|
# old_path = '/mnt/lustre/dongpeijie/darts_subnetnet_1xb96_cifar10_acc-97.32_20211222-e5727921.pth' # noqa: E501
|
||||||
|
# new_path = '/mnt/lustre/dongpeijie/darts_subnetnet_1xb96_cifar10_acc-97.32_20211222-e5727921_2.0.pth' # noqa: E501
|
||||||
|
# convert_darts_key(old_path, new_path)
|
||||||
|
|
||||||
|
old_path = '/mnt/lustre/dongpeijie/spos_angelnas_flops_0.49G_acc_75.98_20220307-54f4698f.pth' # noqa: E501
|
||||||
|
new_path = '/mnt/lustre/dongpeijie/spos_angelnas_flops_0.49G_acc_75.98_20220307-54f4698f_2.0.pth' # noqa: E501
|
||||||
|
convert_anglenas_key(old_path, new_path)
|
|
@ -0,0 +1,280 @@
|
||||||
|
# Copyright (c) OpenMMLab. All rights reserved.
|
||||||
|
import copy
|
||||||
|
from typing import Any, Dict, List, Optional, Union
|
||||||
|
|
||||||
|
import torch
|
||||||
|
from mmengine import BaseDataElement
|
||||||
|
from mmengine.model import BaseModel
|
||||||
|
from mmengine.optim import OptimWrapper, OptimWrapperDict
|
||||||
|
from torch import nn
|
||||||
|
from torch.nn.modules.batchnorm import _BatchNorm
|
||||||
|
|
||||||
|
from mmrazor.models.mutators import DiffModuleMutator
|
||||||
|
from mmrazor.models.subnet import (SINGLE_MUTATOR_RANDOM_SUBNET, FixSubnet,
|
||||||
|
FixSubnetMixin)
|
||||||
|
from mmrazor.registry import MODELS
|
||||||
|
from ..base import BaseAlgorithm, LossResults
|
||||||
|
|
||||||
|
VALID_FIX_SUBNET = Union[str, FixSubnet, Dict[str, Dict[str, Any]]]
|
||||||
|
|
||||||
|
|
||||||
|
@MODELS.register_module()
|
||||||
|
class Darts(BaseAlgorithm, FixSubnetMixin):
|
||||||
|
"""Implementation of `DARTS <https://arxiv.org/abs/1806.09055>`_
|
||||||
|
|
||||||
|
DARTS means Differentiable Architecture Search, a classic NAS algorithm.
|
||||||
|
:class:`Darts` implements the APIs required by the DARTS, as well as the
|
||||||
|
supernet training and subnet retraining logic for each iter.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
architecture (dict|:obj:`BaseModel`): The config of :class:`BaseModel`
|
||||||
|
or built model. Corresponding to supernet in NAS algorithm.
|
||||||
|
mutator (dict|:obj:`DiffModuleMutator`): The config of
|
||||||
|
:class:`DiffModuleMutator` or built mutator.
|
||||||
|
fix_subnet (str | dict | :obj:`FixSubnet`): The path of yaml file or
|
||||||
|
loaded dict or built :obj:`FixSubnet`.
|
||||||
|
norm_training (bool): Whether to set norm layers to training mode,
|
||||||
|
namely, not freeze running stats (mean and var). Note: Effect on
|
||||||
|
Batch Norm and its variants only. Defaults to False.
|
||||||
|
data_preprocessor (dict, optional): The pre-process config of
|
||||||
|
:class:`BaseDataPreprocessor`. Defaults to None.
|
||||||
|
init_cfg (dict): Init config for ``BaseModule``.
|
||||||
|
|
||||||
|
Note:
|
||||||
|
Darts has two training mode: supernet training and subnet retraining.
|
||||||
|
If `fix_subnet` is None, it means supernet training.
|
||||||
|
If `fix_subnet` is not None, it means subnet training.
|
||||||
|
|
||||||
|
Note:
|
||||||
|
During supernet training, since each op is not fully trained, the
|
||||||
|
statistics of :obj:_BatchNorm are inaccurate. This problem affects the
|
||||||
|
evaluation of the performance of each subnet in the search phase. There
|
||||||
|
are usually two ways to solve this problem, both need to set
|
||||||
|
`norm_training` to True:
|
||||||
|
|
||||||
|
1) Using a large batch size, BNs use the mean and variance of the
|
||||||
|
current batch during forward.
|
||||||
|
2) Recalibrate the statistics of BN before searching.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self,
|
||||||
|
architecture: Union[BaseModel, Dict],
|
||||||
|
mutator: Optional[Union[DiffModuleMutator, Dict]] = None,
|
||||||
|
fix_subnet: Optional[VALID_FIX_SUBNET] = None,
|
||||||
|
unroll: bool = False,
|
||||||
|
norm_training: bool = False,
|
||||||
|
data_preprocessor: Optional[Union[dict, nn.Module]] = None,
|
||||||
|
init_cfg: Optional[dict] = None):
|
||||||
|
super().__init__(architecture, data_preprocessor, init_cfg)
|
||||||
|
|
||||||
|
# Darts has two training mode: supernet training and subnet retraining.
|
||||||
|
# fix_subnet is not None, means subnet retraining.
|
||||||
|
if fix_subnet:
|
||||||
|
# According to fix_subnet, delete the unchosen part of supernet
|
||||||
|
self.load_fix_subnet(fix_subnet, prefix='architecture.')
|
||||||
|
self.is_supernet = False
|
||||||
|
else:
|
||||||
|
assert mutator is not None, \
|
||||||
|
'mutator cannot be None when fix_subnet is None.'
|
||||||
|
if isinstance(mutator, DiffModuleMutator):
|
||||||
|
self.mutator = mutator
|
||||||
|
elif isinstance(mutator, dict):
|
||||||
|
self.mutator = MODELS.build(mutator)
|
||||||
|
else:
|
||||||
|
raise TypeError('mutator should be a `dict` or '
|
||||||
|
f'`DiffModuleMutator` instance, but got '
|
||||||
|
f'{type(mutator)}')
|
||||||
|
|
||||||
|
# Mutator is an essential component of the NAS algorithm. It
|
||||||
|
# provides some APIs commonly used by NAS.
|
||||||
|
# Before using it, you must do some preparations according to
|
||||||
|
# the supernet.
|
||||||
|
self.mutator.prepare_from_supernet(self.architecture)
|
||||||
|
self.is_supernet = True
|
||||||
|
|
||||||
|
self.norm_training = norm_training
|
||||||
|
self.unroll = unroll
|
||||||
|
|
||||||
|
def sample_subnet(self) -> SINGLE_MUTATOR_RANDOM_SUBNET:
|
||||||
|
"""Random sample subnet by mutator."""
|
||||||
|
return self.mutator.sample_choices()
|
||||||
|
|
||||||
|
def set_subnet(self, subnet: SINGLE_MUTATOR_RANDOM_SUBNET):
|
||||||
|
"""Set the subnet sampled by :meth:sample_subnet."""
|
||||||
|
self.mutator.set_choices(subnet)
|
||||||
|
|
||||||
|
def loss(
|
||||||
|
self,
|
||||||
|
batch_inputs: torch.Tensor,
|
||||||
|
data_samples: Optional[List[BaseDataElement]] = None,
|
||||||
|
) -> LossResults:
|
||||||
|
"""Calculate losses from a batch of inputs and data samples."""
|
||||||
|
if self.is_supernet:
|
||||||
|
random_subnet = self.sample_subnet()
|
||||||
|
self.set_subnet(random_subnet)
|
||||||
|
return self.architecture(batch_inputs, data_samples, mode='loss')
|
||||||
|
else:
|
||||||
|
return self.architecture(batch_inputs, data_samples, mode='loss')
|
||||||
|
|
||||||
|
def train(self, mode=True):
|
||||||
|
"""Convert the model into eval mode while keep normalization layer
|
||||||
|
unfreezed."""
|
||||||
|
|
||||||
|
super().train(mode)
|
||||||
|
if self.norm_training and not mode:
|
||||||
|
for module in self.architecture.modules():
|
||||||
|
if isinstance(module, _BatchNorm):
|
||||||
|
module.training = True
|
||||||
|
|
||||||
|
def train_step(self, data: List[dict],
|
||||||
|
optim_wrapper: OptimWrapper) -> Dict[str, torch.Tensor]:
|
||||||
|
"""The iteration step during training.
|
||||||
|
|
||||||
|
This method defines an iteration step during training, except for the
|
||||||
|
back propagation and optimizer updating, which are done in an optimizer
|
||||||
|
hook. Note that in some complicated cases or models, the whole process
|
||||||
|
including back propagation and optimizer updating are also defined in
|
||||||
|
this method, such as GAN.
|
||||||
|
Args:
|
||||||
|
data (dict): The output of dataloader.
|
||||||
|
optimizer (:obj:`torch.optim.Optimizer` | dict): The optimizer of
|
||||||
|
runner is passed to ``train_step()``. This argument is unused
|
||||||
|
and reserved.
|
||||||
|
Returns:
|
||||||
|
dict: It should contain at least 3 keys: ``loss``, ``log_vars``,
|
||||||
|
``num_samples``.
|
||||||
|
``loss`` is a tensor for back propagation, which can be a
|
||||||
|
weighted sum of multiple losses.
|
||||||
|
``log_vars`` contains all the variables to be sent to the
|
||||||
|
logger.
|
||||||
|
``num_samples`` indicates the batch size (when the model is
|
||||||
|
DDP, it means the batch size on each GPU), which is used for
|
||||||
|
averaging the logs.
|
||||||
|
"""
|
||||||
|
if isinstance(data, (tuple, list)) and isinstance(
|
||||||
|
optim_wrapper, OptimWrapperDict):
|
||||||
|
assert len(data) == len(optim_wrapper), \
|
||||||
|
f'The length of data {len(data)} should be equal to that of optimizers {len(optim_wrapper)}.' # noqa: E501
|
||||||
|
|
||||||
|
# TODO check the order of data
|
||||||
|
train_supernet_data, train_arch_data = data
|
||||||
|
|
||||||
|
# TODO mutator optimizer zero_grad
|
||||||
|
optim_wrapper.zero_grad()
|
||||||
|
|
||||||
|
if self.unroll:
|
||||||
|
self._unrolled_backward(train_arch_data, train_supernet_data,
|
||||||
|
optim_wrapper) # TODO optimizer
|
||||||
|
else:
|
||||||
|
# TODO process the input
|
||||||
|
arch_loss = self.loss(train_arch_data) # noqa: F841
|
||||||
|
# arch_loss.backward()
|
||||||
|
|
||||||
|
# TODO mutator optimizer step
|
||||||
|
optim_wrapper.step()
|
||||||
|
|
||||||
|
model_loss = self.loss(train_supernet_data)
|
||||||
|
|
||||||
|
# TODO optimizer architecture zero_grad
|
||||||
|
optim_wrapper.zero_grad()
|
||||||
|
# model_loss.backward()
|
||||||
|
|
||||||
|
nn.utils.clip_grad_norm_(
|
||||||
|
self.architecture.parameters(), max_norm=5, norm_type=2)
|
||||||
|
|
||||||
|
# TODO optimizer architecture step
|
||||||
|
optim_wrapper.step()
|
||||||
|
|
||||||
|
outputs = dict(
|
||||||
|
loss=model_loss,
|
||||||
|
num_samples=len(train_supernet_data['img'].data))
|
||||||
|
else:
|
||||||
|
outputs = super().train_step(data, optim_wrapper)
|
||||||
|
|
||||||
|
return outputs
|
||||||
|
|
||||||
|
def _unrolled_backward(self, train_arch_data, train_supernet_data,
|
||||||
|
optimizer):
|
||||||
|
"""Compute unrolled loss and backward its gradients."""
|
||||||
|
backup_params = copy.deepcopy(tuple(self.architecture.parameters()))
|
||||||
|
|
||||||
|
# do virtual step on training data
|
||||||
|
lr = optimizer['architecture'].param_groups[0]['lr']
|
||||||
|
momentum = optimizer['architecture'].param_groups[0]['momentum']
|
||||||
|
weight_decay = optimizer['architecture'].param_groups[0][
|
||||||
|
'weight_decay']
|
||||||
|
self._compute_virtual_model(train_supernet_data, lr, momentum,
|
||||||
|
weight_decay, optimizer)
|
||||||
|
|
||||||
|
# calculate unrolled loss on validation data
|
||||||
|
# keep gradients for model here for compute hessian
|
||||||
|
losses = self(**train_arch_data)
|
||||||
|
loss, _ = self._parse_losses(losses)
|
||||||
|
w_model, w_arch = tuple(self.architecture.parameters()), tuple(
|
||||||
|
self.mutator.parameters())
|
||||||
|
w_grads = torch.autograd.grad(loss, w_model + w_arch)
|
||||||
|
d_model, d_arch = w_grads[:len(w_model)], w_grads[len(w_model):]
|
||||||
|
|
||||||
|
# compute hessian and final gradients
|
||||||
|
hessian = self._compute_hessian(backup_params, d_model,
|
||||||
|
train_supernet_data)
|
||||||
|
with torch.no_grad():
|
||||||
|
for param, d, h in zip(w_arch, d_arch, hessian):
|
||||||
|
# gradient = dalpha - lr * hessian
|
||||||
|
param.grad = d - lr * h
|
||||||
|
|
||||||
|
# restore weights
|
||||||
|
self._restore_weights(backup_params)
|
||||||
|
|
||||||
|
def _compute_virtual_model(self, data, lr, momentum, weight_decay,
|
||||||
|
optimizer):
|
||||||
|
"""Compute unrolled weights w`"""
|
||||||
|
# don't need zero_grad, using autograd to calculate gradients
|
||||||
|
losses = self(**data)
|
||||||
|
loss, _ = self._parse_losses(losses)
|
||||||
|
gradients = torch.autograd.grad(loss, self.architecture.parameters())
|
||||||
|
with torch.no_grad():
|
||||||
|
for w, g in zip(self.architecture.parameters(), gradients):
|
||||||
|
m = optimizer['architecture'].state[w].get(
|
||||||
|
'momentum_buffer', 0.)
|
||||||
|
w = w - lr * (momentum * m + g + weight_decay * w)
|
||||||
|
|
||||||
|
def _restore_weights(self, backup_params):
|
||||||
|
with torch.no_grad():
|
||||||
|
for param, backup in zip(self.architecture.parameters(),
|
||||||
|
backup_params):
|
||||||
|
param.copy_(backup)
|
||||||
|
|
||||||
|
def _compute_hessian(self, backup_params, dw, data):
|
||||||
|
"""
|
||||||
|
dw = dw` { L_val(w`, alpha) }
|
||||||
|
w+ = w + eps * dw
|
||||||
|
w- = w - eps * dw
|
||||||
|
hessian = (dalpha { L_trn(w+, alpha) } \
|
||||||
|
- dalpha { L_trn(w-, alpha) }) / (2*eps)
|
||||||
|
eps = 0.01 / ||dw||
|
||||||
|
"""
|
||||||
|
self._restore_weights(backup_params)
|
||||||
|
norm = torch.cat([w.view(-1) for w in dw]).norm()
|
||||||
|
eps = 0.01 / norm
|
||||||
|
if norm < 1E-8:
|
||||||
|
print(
|
||||||
|
'In computing hessian, norm is smaller than 1E-8, \
|
||||||
|
cause eps to be %.6f.', norm.item())
|
||||||
|
|
||||||
|
dalphas = []
|
||||||
|
for e in [eps, -2. * eps]:
|
||||||
|
# w+ = w + eps*dw`, w- = w - eps*dw`
|
||||||
|
with torch.no_grad():
|
||||||
|
for p, d in zip(self.architecture.parameters(), dw):
|
||||||
|
p += e * d
|
||||||
|
|
||||||
|
losses = self(**data)
|
||||||
|
loss, _ = self._parse_losses(losses)
|
||||||
|
dalphas.append(
|
||||||
|
torch.autograd.grad(loss, tuple(self.mutator.parameters())))
|
||||||
|
# dalpha { L_trn(w+) }, # dalpha { L_trn(w-) }
|
||||||
|
dalpha_pos, dalpha_neg = dalphas
|
||||||
|
hessian = [(p - n) / (2. * eps)
|
||||||
|
for p, n in zip(dalpha_pos, dalpha_neg)]
|
||||||
|
return hessian
|
|
@ -1,3 +1,4 @@
|
||||||
# Copyright (c) OpenMMLab. All rights reserved.
|
# Copyright (c) OpenMMLab. All rights reserved.
|
||||||
from .backbones import * # noqa: F401,F403
|
from .backbones import * # noqa: F401,F403
|
||||||
|
from .components import * # noqa: F401,F403
|
||||||
from .dynamic_op import * # noqa: F401,F403
|
from .dynamic_op import * # noqa: F401,F403
|
||||||
|
|
|
@ -4,6 +4,7 @@ from typing import Dict, List, Optional, Tuple, Union
|
||||||
|
|
||||||
import torch
|
import torch
|
||||||
import torch.nn as nn
|
import torch.nn as nn
|
||||||
|
from mmcls.models.backbones.base_backbone import BaseBackbone
|
||||||
from mmcv.cnn import build_activation_layer, build_norm_layer
|
from mmcv.cnn import build_activation_layer, build_norm_layer
|
||||||
from torch import Tensor
|
from torch import Tensor
|
||||||
|
|
||||||
|
@ -126,12 +127,8 @@ class Node(nn.Module):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
edges = nn.ModuleDict()
|
edges = nn.ModuleDict()
|
||||||
for i in range(num_prev_nodes):
|
for i in range(num_prev_nodes):
|
||||||
if i < num_downsample_nodes:
|
stride = 2 if i < num_downsample_nodes else 1
|
||||||
stride = 2
|
edge_id = f'{node_id}_p{i}'
|
||||||
else:
|
|
||||||
stride = 1
|
|
||||||
|
|
||||||
edge_id = '{}_p{}'.format(node_id, i)
|
|
||||||
|
|
||||||
module_kwargs = dict(
|
module_kwargs = dict(
|
||||||
in_channels=channels,
|
in_channels=channels,
|
||||||
|
@ -143,13 +140,14 @@ class Node(nn.Module):
|
||||||
mutable_cfg.update(alias=edge_id)
|
mutable_cfg.update(alias=edge_id)
|
||||||
edges.add_module(edge_id, MODELS.build(mutable_cfg))
|
edges.add_module(edge_id, MODELS.build(mutable_cfg))
|
||||||
|
|
||||||
|
route_cfg.update(alias=node_id)
|
||||||
route_cfg.update(edges=edges)
|
route_cfg.update(edges=edges)
|
||||||
self.edges = MODELS.build(route_cfg)
|
self.route = MODELS.build(route_cfg)
|
||||||
|
|
||||||
def forward(self, prev_nodes: Union[List[Tensor],
|
def forward(self, prev_nodes: Union[List[Tensor],
|
||||||
Tuple[Tensor]]) -> Tensor:
|
Tuple[Tensor]]) -> Tensor:
|
||||||
"""Forward with the previous nodes list."""
|
"""Forward with the previous nodes list."""
|
||||||
return self.edges(prev_nodes)
|
return self.route(prev_nodes)
|
||||||
|
|
||||||
|
|
||||||
class Cell(nn.Module):
|
class Cell(nn.Module):
|
||||||
|
@ -223,8 +221,7 @@ class Cell(nn.Module):
|
||||||
cur_tensor = node(tensors)
|
cur_tensor = node(tensors)
|
||||||
tensors.append(cur_tensor)
|
tensors.append(cur_tensor)
|
||||||
|
|
||||||
output = torch.cat(tensors[2:], dim=1)
|
return torch.cat(tensors[2:], dim=1)
|
||||||
return output
|
|
||||||
|
|
||||||
|
|
||||||
class AuxiliaryModule(nn.Module):
|
class AuxiliaryModule(nn.Module):
|
||||||
|
@ -263,7 +260,7 @@ class AuxiliaryModule(nn.Module):
|
||||||
|
|
||||||
|
|
||||||
@MODELS.register_module()
|
@MODELS.register_module()
|
||||||
class DartsBackbone(nn.Module, FixSubnetMixin):
|
class DartsBackbone(BaseBackbone, FixSubnetMixin):
|
||||||
"""Backbone of Differentiable Architecture Search (DARTS).
|
"""Backbone of Differentiable Architecture Search (DARTS).
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
|
@ -348,7 +345,7 @@ class DartsBackbone(nn.Module, FixSubnetMixin):
|
||||||
prev_reduction, reduction = reduction, False
|
prev_reduction, reduction = reduction, False
|
||||||
# Reduce featuremap size and double channels in 1/3
|
# Reduce featuremap size and double channels in 1/3
|
||||||
# and 2/3 layer.
|
# and 2/3 layer.
|
||||||
if i == self.num_layers // 3 or i == 2 * self.num_layers // 3:
|
if i in [self.num_layers // 3, 2 * self.num_layers // 3]:
|
||||||
self.out_channels *= 2
|
self.out_channels *= 2
|
||||||
reduction = True
|
reduction = True
|
||||||
|
|
||||||
|
|
|
@ -46,7 +46,7 @@ class SearchableMobileNet(BaseBackbone, FixSubnetMixin):
|
||||||
Excamples:
|
Excamples:
|
||||||
>>> mutable_cfg = dict(
|
>>> mutable_cfg = dict(
|
||||||
... type='OneShotMutableOP',
|
... type='OneShotMutableOP',
|
||||||
... candidate_ops=dict(
|
... candidates=dict(
|
||||||
... mb_k3e1=dict(
|
... mb_k3e1=dict(
|
||||||
... type='MBBlock',
|
... type='MBBlock',
|
||||||
... kernel_size=3,
|
... kernel_size=3,
|
||||||
|
@ -87,7 +87,7 @@ class SearchableMobileNet(BaseBackbone, FixSubnetMixin):
|
||||||
]
|
]
|
||||||
) -> None:
|
) -> None:
|
||||||
for index in out_indices:
|
for index in out_indices:
|
||||||
if index not in range(0, 8):
|
if index not in range(8):
|
||||||
raise ValueError('the item in out_indices must in '
|
raise ValueError('the item in out_indices must in '
|
||||||
f'range(0, 8). But received {index}')
|
f'range(0, 8). But received {index}')
|
||||||
|
|
||||||
|
@ -147,6 +147,7 @@ class SearchableMobileNet(BaseBackbone, FixSubnetMixin):
|
||||||
conv_cfg=self.conv_cfg,
|
conv_cfg=self.conv_cfg,
|
||||||
norm_cfg=self.norm_cfg,
|
norm_cfg=self.norm_cfg,
|
||||||
act_cfg=self.act_cfg)
|
act_cfg=self.act_cfg)
|
||||||
|
|
||||||
self.add_module('conv2', layer)
|
self.add_module('conv2', layer)
|
||||||
self.layers.append('conv2')
|
self.layers.append('conv2')
|
||||||
|
|
||||||
|
|
|
@ -48,7 +48,7 @@ class SearchableShuffleNetV2(BaseBackbone, FixSubnetMixin):
|
||||||
Excamples:
|
Excamples:
|
||||||
>>> mutable_cfg = dict(
|
>>> mutable_cfg = dict(
|
||||||
... type='OneShotMutableOP',
|
... type='OneShotMutableOP',
|
||||||
... candidate_ops=dict(
|
... candidates=dict(
|
||||||
... shuffle_3x3=dict(
|
... shuffle_3x3=dict(
|
||||||
... type='ShuffleBlock',
|
... type='ShuffleBlock',
|
||||||
... kernel_size=3,
|
... kernel_size=3,
|
||||||
|
|
|
@ -0,0 +1,4 @@
|
||||||
|
# Copyright (c) OpenMMLab. All rights reserved.
|
||||||
|
from .heads import CreamClsHead
|
||||||
|
|
||||||
|
__all__ = ['CreamClsHead']
|
|
@ -0,0 +1,4 @@
|
||||||
|
# Copyright (c) OpenMMLab. All rights reserved.
|
||||||
|
from .cream_head import CreamClsHead
|
||||||
|
|
||||||
|
__all__ = ['CreamClsHead']
|
|
@ -0,0 +1,72 @@
|
||||||
|
# Copyright (c) OpenMMLab. All rights reserved.
|
||||||
|
|
||||||
|
from typing import Dict, Optional, Tuple
|
||||||
|
|
||||||
|
from mmcls.models.heads import LinearClsHead
|
||||||
|
from mmcv.cnn import ConvModule
|
||||||
|
from torch import Tensor, nn
|
||||||
|
|
||||||
|
from mmrazor.registry import MODELS
|
||||||
|
|
||||||
|
|
||||||
|
@MODELS.register_module()
|
||||||
|
class CreamClsHead(LinearClsHead):
|
||||||
|
"""Linear classifier head for cream.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
num_classes (int): Number of categories excluding the background
|
||||||
|
category.
|
||||||
|
in_channels (int): Number of channels in the input feature map.
|
||||||
|
num_features (int): Number of features in the conv2d.
|
||||||
|
act_cfg (dict): Config dict for activation layer.
|
||||||
|
Default: dict(type='ReLU6').
|
||||||
|
init_cfg (dict, optional): the config to control the initialization.
|
||||||
|
Defaults to ``dict(type='Normal', layer='Linear', std=0.01)``.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self,
|
||||||
|
num_classes: int,
|
||||||
|
in_channels: int,
|
||||||
|
num_features: int = 1280,
|
||||||
|
act_cfg: Dict = dict(type='ReLU6'),
|
||||||
|
init_cfg: Optional[dict] = dict(
|
||||||
|
type='Normal', layer='Linear', std=0.01),
|
||||||
|
**kwargs):
|
||||||
|
super().__init__(
|
||||||
|
num_classes=num_classes,
|
||||||
|
in_channels=in_channels,
|
||||||
|
init_cfg=init_cfg,
|
||||||
|
**kwargs)
|
||||||
|
|
||||||
|
layer = ConvModule(
|
||||||
|
in_channels=self.in_channels,
|
||||||
|
out_channels=num_features,
|
||||||
|
kernel_size=1,
|
||||||
|
stride=1,
|
||||||
|
padding=0,
|
||||||
|
conv_cfg=None,
|
||||||
|
norm_cfg=None,
|
||||||
|
act_cfg=act_cfg)
|
||||||
|
|
||||||
|
self.add_module('conv2', layer)
|
||||||
|
|
||||||
|
self.fc = nn.Linear(num_features, self.num_classes)
|
||||||
|
|
||||||
|
# def pre_logits(self, feats: Tuple[Tensor]) -> Tensor:
|
||||||
|
# """The process before the final classification head.
|
||||||
|
|
||||||
|
# The input ``feats`` is a tuple of tensor, and each tensor is the
|
||||||
|
# feature of a backbone stage. In ``LinearClsHead``, we just obtain the
|
||||||
|
# feature of the last stage.
|
||||||
|
# """
|
||||||
|
# # The LinearClsHead doesn't have other module, just return after
|
||||||
|
# # unpacking.
|
||||||
|
# return feats[-1]
|
||||||
|
|
||||||
|
def forward(self, feats: Tuple[Tensor]) -> Tensor:
|
||||||
|
"""The forward process."""
|
||||||
|
logits = self.pre_logits(feats)
|
||||||
|
logits = logits.unsqueeze(-1).unsqueeze(-1)
|
||||||
|
logits = self.conv2(logits)
|
||||||
|
logits = logits.flatten(1)
|
||||||
|
return self.fc(logits)
|
|
@ -99,7 +99,7 @@ class DiffMutableOP(DiffMutableModule[str, str]):
|
||||||
DARTS. Search the best module by learnable parameters `arch_param`.
|
DARTS. Search the best module by learnable parameters `arch_param`.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
candidate_ops (dict[str, dict]): the configs for the candidate
|
candidates (dict[str, dict]): the configs for the candidate
|
||||||
operations.
|
operations.
|
||||||
module_kwargs (dict[str, dict], optional): Module initialization named
|
module_kwargs (dict[str, dict], optional): Module initialization named
|
||||||
arguments. Defaults to None.
|
arguments. Defaults to None.
|
||||||
|
@ -110,23 +110,29 @@ class DiffMutableOP(DiffMutableModule[str, str]):
|
||||||
and `Pretrained`.
|
and `Pretrained`.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, candidate_ops: Dict[str, Dict], **kwargs) -> None:
|
def __init__(
|
||||||
super().__init__(**kwargs)
|
self,
|
||||||
assert len(candidate_ops) >= 1, \
|
candidates: Dict[str, Dict],
|
||||||
|
module_kwargs: Optional[Dict[str, Dict]] = None,
|
||||||
|
alias: Optional[str] = None,
|
||||||
|
init_cfg: Optional[Dict] = None,
|
||||||
|
) -> None:
|
||||||
|
super().__init__(
|
||||||
|
module_kwargs=module_kwargs, alias=alias, init_cfg=init_cfg)
|
||||||
|
assert len(candidates) >= 1, \
|
||||||
f'Number of candidate op must greater than or equal to 1, ' \
|
f'Number of candidate op must greater than or equal to 1, ' \
|
||||||
f'but got: {len(candidate_ops)}'
|
f'but got: {len(candidates)}'
|
||||||
|
|
||||||
self._is_fixed = False
|
self._is_fixed = False
|
||||||
self._candidate_ops = self._build_ops(candidate_ops,
|
self._candidates = self._build_ops(candidates, self.module_kwargs)
|
||||||
self.module_kwargs)
|
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _build_ops(candidate_ops: Dict[str, Dict],
|
def _build_ops(candidates: Dict[str, Dict],
|
||||||
module_kwargs: Optional[Dict[str, Dict]]) -> nn.ModuleDict:
|
module_kwargs: Optional[Dict[str, Dict]]) -> nn.ModuleDict:
|
||||||
"""Build candidate operations based on candidate_ops configures.
|
"""Build candidate operations based on candidates configures.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
candidate_ops (dict[str, dict]): the configs for the candidate
|
candidates (dict[str, dict]): the configs for the candidate
|
||||||
operations.
|
operations.
|
||||||
module_kwargs (dict[str, dict], optional): Module initialization
|
module_kwargs (dict[str, dict], optional): Module initialization
|
||||||
named arguments.
|
named arguments.
|
||||||
|
@ -137,7 +143,7 @@ class DiffMutableOP(DiffMutableModule[str, str]):
|
||||||
is the corresponding candidate operation.
|
is the corresponding candidate operation.
|
||||||
"""
|
"""
|
||||||
ops = nn.ModuleDict()
|
ops = nn.ModuleDict()
|
||||||
for name, op_cfg in candidate_ops.items():
|
for name, op_cfg in candidates.items():
|
||||||
assert name not in ops
|
assert name not in ops
|
||||||
if module_kwargs is not None:
|
if module_kwargs is not None:
|
||||||
op_cfg.update(module_kwargs)
|
op_cfg.update(module_kwargs)
|
||||||
|
@ -154,7 +160,7 @@ class DiffMutableOP(DiffMutableModule[str, str]):
|
||||||
Returns:
|
Returns:
|
||||||
Tensor: the result of forward the fixed operation.
|
Tensor: the result of forward the fixed operation.
|
||||||
"""
|
"""
|
||||||
return self._candidate_ops[self._chosen](x)
|
return sum(self._candidates[choice](x) for choice in self._chosen)
|
||||||
|
|
||||||
def forward_arch_param(self,
|
def forward_arch_param(self,
|
||||||
x: Any,
|
x: Any,
|
||||||
|
@ -180,7 +186,7 @@ class DiffMutableOP(DiffMutableModule[str, str]):
|
||||||
|
|
||||||
# forward based on probs
|
# forward based on probs
|
||||||
outputs = list()
|
outputs = list()
|
||||||
for prob, module in zip(probs, self._candidate_ops.values()):
|
for prob, module in zip(probs, self._candidates.values()):
|
||||||
if prob > 0.:
|
if prob > 0.:
|
||||||
outputs.append(prob * module(x))
|
outputs.append(prob * module(x))
|
||||||
|
|
||||||
|
@ -197,11 +203,11 @@ class DiffMutableOP(DiffMutableModule[str, str]):
|
||||||
Tensor: the result of forward all of the ``choice`` operation.
|
Tensor: the result of forward all of the ``choice`` operation.
|
||||||
"""
|
"""
|
||||||
outputs = list()
|
outputs = list()
|
||||||
for op in self._candidate_ops.values():
|
for op in self._candidates.values():
|
||||||
outputs.append(op(x))
|
outputs.append(op(x))
|
||||||
return sum(outputs)
|
return sum(outputs)
|
||||||
|
|
||||||
def fix_chosen(self, chosen: str) -> None:
|
def fix_chosen(self, chosen: Union[str, List[str]]) -> None:
|
||||||
"""Fix mutable with `choice`. This operation would convert `unfixed`
|
"""Fix mutable with `choice`. This operation would convert `unfixed`
|
||||||
mode to `fixed` mode. The :attr:`is_fixed` will be set to True and only
|
mode to `fixed` mode. The :attr:`is_fixed` will be set to True and only
|
||||||
the selected operations can be retained.
|
the selected operations can be retained.
|
||||||
|
@ -215,9 +221,12 @@ class DiffMutableOP(DiffMutableModule[str, str]):
|
||||||
'The mode of current MUTABLE is `fixed`. '
|
'The mode of current MUTABLE is `fixed`. '
|
||||||
'Please do not call `fix_chosen` function again.')
|
'Please do not call `fix_chosen` function again.')
|
||||||
|
|
||||||
|
if isinstance(chosen, str):
|
||||||
|
chosen = [chosen]
|
||||||
|
|
||||||
for c in self.choices:
|
for c in self.choices:
|
||||||
if c != chosen:
|
if c not in chosen:
|
||||||
self._candidate_ops.pop(c)
|
self._candidates.pop(c)
|
||||||
|
|
||||||
self._chosen = chosen
|
self._chosen = chosen
|
||||||
self.is_fixed = True
|
self.is_fixed = True
|
||||||
|
@ -225,7 +234,7 @@ class DiffMutableOP(DiffMutableModule[str, str]):
|
||||||
@property
|
@property
|
||||||
def choices(self) -> List[str]:
|
def choices(self) -> List[str]:
|
||||||
"""list: all choices. """
|
"""list: all choices. """
|
||||||
return list(self._candidate_ops.keys())
|
return list(self._candidates.keys())
|
||||||
|
|
||||||
|
|
||||||
@MODELS.register_module()
|
@MODELS.register_module()
|
||||||
|
@ -241,6 +250,7 @@ class DiffChoiceRoute(DiffMutableModule[str, List[str]]):
|
||||||
with_arch_param (bool): whether forward with arch_param. When set to
|
with_arch_param (bool): whether forward with arch_param. When set to
|
||||||
`True`, a differentiable way is adopted. When set to `False`,
|
`True`, a differentiable way is adopted. When set to `False`,
|
||||||
a non-differentiable way is adopted.
|
a non-differentiable way is adopted.
|
||||||
|
alias (str, optional): alias of the `DiffChoiceRoute`.
|
||||||
init_cfg (dict, optional): initialization configuration dict for
|
init_cfg (dict, optional): initialization configuration dict for
|
||||||
``BaseModule``. OpenMMLab has implement 6 initializers including
|
``BaseModule``. OpenMMLab has implement 6 initializers including
|
||||||
`Constant`, `Xavier`, `Normal`, `Uniform`, `Kaiming`,
|
`Constant`, `Xavier`, `Normal`, `Uniform`, `Kaiming`,
|
||||||
|
@ -274,16 +284,17 @@ class DiffChoiceRoute(DiffMutableModule[str, List[str]]):
|
||||||
self,
|
self,
|
||||||
edges: nn.ModuleDict,
|
edges: nn.ModuleDict,
|
||||||
with_arch_param: bool = False,
|
with_arch_param: bool = False,
|
||||||
|
alias: Optional[str] = None,
|
||||||
init_cfg: Optional[Dict] = None,
|
init_cfg: Optional[Dict] = None,
|
||||||
) -> None:
|
) -> None:
|
||||||
super().__init__(init_cfg=init_cfg)
|
super().__init__(alias=alias, init_cfg=init_cfg)
|
||||||
assert len(edges) >= 1, \
|
assert len(edges) >= 1, \
|
||||||
f'Number of edges must greater than or equal to 1, ' \
|
f'Number of edges must greater than or equal to 1, ' \
|
||||||
f'but got: {len(edges)}'
|
f'but got: {len(edges)}'
|
||||||
|
|
||||||
self._with_arch_param = with_arch_param
|
self._with_arch_param = with_arch_param
|
||||||
self._is_fixed = False
|
self._is_fixed = False
|
||||||
self._edges: nn.ModuleDict = edges
|
self._candidates: nn.ModuleDict = edges
|
||||||
|
|
||||||
def forward_fixed(self, inputs: Union[List, Tuple]) -> Tensor:
|
def forward_fixed(self, inputs: Union[List, Tuple]) -> Tensor:
|
||||||
"""Forward when the mutable is in `fixed` mode.
|
"""Forward when the mutable is in `fixed` mode.
|
||||||
|
@ -302,7 +313,7 @@ class DiffChoiceRoute(DiffMutableModule[str, List[str]]):
|
||||||
outputs = list()
|
outputs = list()
|
||||||
for choice, x in zip(self._unfixed_choices, inputs):
|
for choice, x in zip(self._unfixed_choices, inputs):
|
||||||
if choice in self._chosen:
|
if choice in self._chosen:
|
||||||
outputs.append(self._edges[choice](x))
|
outputs.append(self._candidates[choice](x))
|
||||||
return sum(outputs)
|
return sum(outputs)
|
||||||
|
|
||||||
def forward_arch_param(self,
|
def forward_arch_param(self,
|
||||||
|
@ -319,15 +330,16 @@ class DiffChoiceRoute(DiffMutableModule[str, List[str]]):
|
||||||
Returns:
|
Returns:
|
||||||
Tensor: the result of forward with ``arch_param``.
|
Tensor: the result of forward with ``arch_param``.
|
||||||
"""
|
"""
|
||||||
assert len(x) == len(self._edges), \
|
assert len(x) == len(self._candidates), \
|
||||||
f'Length of `edges` {len(self._edges)} should be same as ' \
|
f'Length of `edges` {len(self._candidates)} should be ' \
|
||||||
f'the length of inputs {len(x)}.'
|
f'same as the length of inputs {len(x)}.'
|
||||||
|
|
||||||
if self._with_arch_param:
|
if self._with_arch_param:
|
||||||
probs = self.compute_arch_probs(arch_param=arch_param)
|
probs = self.compute_arch_probs(arch_param=arch_param)
|
||||||
|
|
||||||
outputs = list()
|
outputs = list()
|
||||||
for prob, module, input in zip(probs, self._edges.values(), x):
|
for prob, module, input in zip(probs, self._candidates.values(),
|
||||||
|
x):
|
||||||
if prob > 0:
|
if prob > 0:
|
||||||
# prob may equal to 0 in gumbel softmax.
|
# prob may equal to 0 in gumbel softmax.
|
||||||
outputs.append(prob * module(input))
|
outputs.append(prob * module(input))
|
||||||
|
@ -346,12 +358,12 @@ class DiffChoiceRoute(DiffMutableModule[str, List[str]]):
|
||||||
Returns:
|
Returns:
|
||||||
Tensor: the result of forward all of the ``choice`` operation.
|
Tensor: the result of forward all of the ``choice`` operation.
|
||||||
"""
|
"""
|
||||||
assert len(x) == len(self._edges), \
|
assert len(x) == len(self._candidates), \
|
||||||
f'Lenght of edges {len(self._edges)} should be same as ' \
|
f'Lenght of edges {len(self._candidates)} should be same as ' \
|
||||||
f'the length of inputs {len(x)}.'
|
f'the length of inputs {len(x)}.'
|
||||||
|
|
||||||
outputs = list()
|
outputs = list()
|
||||||
for op, input in zip(self._edges.values(), x):
|
for op, input in zip(self._candidates.values(), x):
|
||||||
outputs.append(op(input))
|
outputs.append(op(input))
|
||||||
|
|
||||||
return sum(outputs)
|
return sum(outputs)
|
||||||
|
@ -373,7 +385,7 @@ class DiffChoiceRoute(DiffMutableModule[str, List[str]]):
|
||||||
|
|
||||||
for c in self.choices:
|
for c in self.choices:
|
||||||
if c not in chosen:
|
if c not in chosen:
|
||||||
self._edges.pop(c)
|
self._candidates.pop(c)
|
||||||
|
|
||||||
self._chosen = chosen
|
self._chosen = chosen
|
||||||
self.is_fixed = True
|
self.is_fixed = True
|
||||||
|
@ -381,7 +393,7 @@ class DiffChoiceRoute(DiffMutableModule[str, List[str]]):
|
||||||
@property
|
@property
|
||||||
def choices(self) -> List[CHOSEN_TYPE]:
|
def choices(self) -> List[CHOSEN_TYPE]:
|
||||||
"""list: all choices. """
|
"""list: all choices. """
|
||||||
return list(self._edges.keys())
|
return list(self._candidates.keys())
|
||||||
|
|
||||||
|
|
||||||
@MODELS.register_module()
|
@MODELS.register_module()
|
||||||
|
@ -413,10 +425,14 @@ class GumbelChoiceRoute(DiffChoiceRoute):
|
||||||
tau: float = 1.0,
|
tau: float = 1.0,
|
||||||
hard: bool = True,
|
hard: bool = True,
|
||||||
with_arch_param: bool = False,
|
with_arch_param: bool = False,
|
||||||
|
alias: Optional[str] = None,
|
||||||
init_cfg: Optional[Dict] = None,
|
init_cfg: Optional[Dict] = None,
|
||||||
) -> None:
|
) -> None:
|
||||||
super().__init__(
|
super().__init__(
|
||||||
edges=edges, with_arch_param=with_arch_param, init_cfg=init_cfg)
|
edges=edges,
|
||||||
|
with_arch_param=with_arch_param,
|
||||||
|
alias=alias,
|
||||||
|
init_cfg=init_cfg)
|
||||||
self.tau = tau
|
self.tau = tau
|
||||||
self.hard = hard
|
self.hard = hard
|
||||||
|
|
||||||
|
|
|
@ -100,7 +100,7 @@ class OneShotMutableOP(OneShotMutableModule[str, str]):
|
||||||
blocks.
|
blocks.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
candidate_ops (dict[str, dict]): the configs for the candidate
|
candidates (dict[str, dict]): the configs for the candidate
|
||||||
operations.
|
operations.
|
||||||
module_kwargs (dict[str, dict], optional): Module initialization named
|
module_kwargs (dict[str, dict], optional): Module initialization named
|
||||||
arguments. Defaults to None.
|
arguments. Defaults to None.
|
||||||
|
@ -114,13 +114,13 @@ class OneShotMutableOP(OneShotMutableModule[str, str]):
|
||||||
>>> import torch
|
>>> import torch
|
||||||
>>> from mmrazor.models.mutables import OneShotMutableOP
|
>>> from mmrazor.models.mutables import OneShotMutableOP
|
||||||
|
|
||||||
>>> candidate_ops = nn.ModuleDict({
|
>>> candidates = nn.ModuleDict({
|
||||||
... 'conv3x3': nn.Conv2d(32, 32, 3, 1, 1),
|
... 'conv3x3': nn.Conv2d(32, 32, 3, 1, 1),
|
||||||
... 'conv5x5': nn.Conv2d(32, 32, 5, 1, 2),
|
... 'conv5x5': nn.Conv2d(32, 32, 5, 1, 2),
|
||||||
... 'conv7x7': nn.Conv2d(32, 32, 7, 1, 3)})
|
... 'conv7x7': nn.Conv2d(32, 32, 7, 1, 3)})
|
||||||
|
|
||||||
>>> input = torch.randn(1, 32, 64, 64)
|
>>> input = torch.randn(1, 32, 64, 64)
|
||||||
>>> op = OneShotMutableOP(candidate_ops)
|
>>> op = OneShotMutableOP(candidates)
|
||||||
|
|
||||||
>>> op.choices
|
>>> op.choices
|
||||||
['conv3x3', 'conv5x5', 'conv7x7']
|
['conv3x3', 'conv5x5', 'conv7x7']
|
||||||
|
@ -131,7 +131,7 @@ class OneShotMutableOP(OneShotMutableModule[str, str]):
|
||||||
|
|
||||||
>>> op.current_choice = 'conv3x3'
|
>>> op.current_choice = 'conv3x3'
|
||||||
>>> unfix_output = op.forward(input)
|
>>> unfix_output = op.forward(input)
|
||||||
>>> torch.all(unfixed_output == candidate_ops['conv3x3'](input))
|
>>> torch.all(unfixed_output == candidates['conv3x3'](input))
|
||||||
True
|
True
|
||||||
|
|
||||||
>>> op.fix_chosen('conv3x3')
|
>>> op.fix_chosen('conv3x3')
|
||||||
|
@ -147,36 +147,41 @@ class OneShotMutableOP(OneShotMutableModule[str, str]):
|
||||||
True
|
True
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, candidate_ops: Union[Dict[str, Dict], nn.ModuleDict],
|
def __init__(
|
||||||
**kwargs) -> None:
|
self,
|
||||||
super().__init__(**kwargs)
|
candidates: Union[Dict[str, Dict], nn.ModuleDict],
|
||||||
assert len(candidate_ops) >= 1, \
|
module_kwargs: Optional[Dict[str, Dict]] = None,
|
||||||
|
alias: Optional[str] = None,
|
||||||
|
init_cfg: Optional[Dict] = None,
|
||||||
|
) -> None:
|
||||||
|
super().__init__(
|
||||||
|
module_kwargs=module_kwargs, alias=alias, init_cfg=init_cfg)
|
||||||
|
assert len(candidates) >= 1, \
|
||||||
f'Number of candidate op must greater than 1, ' \
|
f'Number of candidate op must greater than 1, ' \
|
||||||
f'but got: {len(candidate_ops)}'
|
f'but got: {len(candidates)}'
|
||||||
|
|
||||||
self._chosen: Optional[str] = None
|
self._chosen: Optional[str] = None
|
||||||
if isinstance(candidate_ops, dict):
|
if isinstance(candidates, dict):
|
||||||
self._candidate_ops = self._build_ops(candidate_ops,
|
self._candidates = self._build_ops(candidates, self.module_kwargs)
|
||||||
self.module_kwargs)
|
elif isinstance(candidates, nn.ModuleDict):
|
||||||
elif isinstance(candidate_ops, nn.ModuleDict):
|
self._candidates = candidates
|
||||||
self._candidate_ops = candidate_ops
|
|
||||||
else:
|
else:
|
||||||
raise TypeError('candidata_ops should be a `dict` or '
|
raise TypeError('candidata_ops should be a `dict` or '
|
||||||
f'`nn.ModuleDict` instance, but got '
|
f'`nn.ModuleDict` instance, but got '
|
||||||
f'{type(candidate_ops)}')
|
f'{type(candidates)}')
|
||||||
|
|
||||||
assert len(self._candidate_ops) >= 1, \
|
assert len(self._candidates) >= 1, \
|
||||||
f'Number of candidate op must greater than or equal to 1, ' \
|
f'Number of candidate op must greater than or equal to 1, ' \
|
||||||
f'but got {len(self._candidate_ops)}'
|
f'but got {len(self._candidates)}'
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _build_ops(
|
def _build_ops(
|
||||||
candidate_ops: Union[Dict[str, Dict], nn.ModuleDict],
|
candidates: Union[Dict[str, Dict], nn.ModuleDict],
|
||||||
module_kwargs: Optional[Dict[str, Dict]] = None) -> nn.ModuleDict:
|
module_kwargs: Optional[Dict[str, Dict]] = None) -> nn.ModuleDict:
|
||||||
"""Build candidate operations based on choice configures.
|
"""Build candidate operations based on choice configures.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
candidate_ops (dict[str, dict] | :obj:`nn.ModuleDict`): the configs
|
candidates (dict[str, dict] | :obj:`nn.ModuleDict`): the configs
|
||||||
for the candidate operations or nn.ModuleDict.
|
for the candidate operations or nn.ModuleDict.
|
||||||
module_kwargs (dict[str, dict], optional): Module initialization
|
module_kwargs (dict[str, dict], optional): Module initialization
|
||||||
named arguments.
|
named arguments.
|
||||||
|
@ -186,11 +191,11 @@ class OneShotMutableOP(OneShotMutableModule[str, str]):
|
||||||
the name of each choice in configs and the value of ``ops``
|
the name of each choice in configs and the value of ``ops``
|
||||||
is the corresponding candidate operation.
|
is the corresponding candidate operation.
|
||||||
"""
|
"""
|
||||||
if isinstance(candidate_ops, nn.ModuleDict):
|
if isinstance(candidates, nn.ModuleDict):
|
||||||
return candidate_ops
|
return candidates
|
||||||
|
|
||||||
ops = nn.ModuleDict()
|
ops = nn.ModuleDict()
|
||||||
for name, op_cfg in candidate_ops.items():
|
for name, op_cfg in candidates.items():
|
||||||
assert name not in ops
|
assert name not in ops
|
||||||
if module_kwargs is not None:
|
if module_kwargs is not None:
|
||||||
op_cfg.update(module_kwargs)
|
op_cfg.update(module_kwargs)
|
||||||
|
@ -207,7 +212,7 @@ class OneShotMutableOP(OneShotMutableModule[str, str]):
|
||||||
Returns:
|
Returns:
|
||||||
Tensor: the result of forward the fixed operation.
|
Tensor: the result of forward the fixed operation.
|
||||||
"""
|
"""
|
||||||
return self._candidate_ops[self._chosen](x)
|
return self._candidates[self._chosen](x)
|
||||||
|
|
||||||
def forward_choice(self, x: Any, choice: str) -> Tensor:
|
def forward_choice(self, x: Any, choice: str) -> Tensor:
|
||||||
"""Forward with the `unfixed` mutable and current choice is not None.
|
"""Forward with the `unfixed` mutable and current choice is not None.
|
||||||
|
@ -221,7 +226,7 @@ class OneShotMutableOP(OneShotMutableModule[str, str]):
|
||||||
Tensor: the result of forward the ``choice`` operation.
|
Tensor: the result of forward the ``choice`` operation.
|
||||||
"""
|
"""
|
||||||
assert isinstance(choice, str) and choice in self.choices
|
assert isinstance(choice, str) and choice in self.choices
|
||||||
return self._candidate_ops[choice](x)
|
return self._candidates[choice](x)
|
||||||
|
|
||||||
def forward_all(self, x: Any) -> Tensor:
|
def forward_all(self, x: Any) -> Tensor:
|
||||||
"""Forward all choices. Used to calculate FLOPs.
|
"""Forward all choices. Used to calculate FLOPs.
|
||||||
|
@ -233,7 +238,9 @@ class OneShotMutableOP(OneShotMutableModule[str, str]):
|
||||||
Returns:
|
Returns:
|
||||||
Tensor: the result of forward all of the ``choice`` operation.
|
Tensor: the result of forward all of the ``choice`` operation.
|
||||||
"""
|
"""
|
||||||
outputs = [op(x) for op in self._candidate_ops.values()]
|
outputs = list()
|
||||||
|
for op in self._candidates.values():
|
||||||
|
outputs.append(op(x))
|
||||||
return sum(outputs)
|
return sum(outputs)
|
||||||
|
|
||||||
def fix_chosen(self, chosen: str) -> None:
|
def fix_chosen(self, chosen: str) -> None:
|
||||||
|
@ -251,7 +258,7 @@ class OneShotMutableOP(OneShotMutableModule[str, str]):
|
||||||
|
|
||||||
for c in self.choices:
|
for c in self.choices:
|
||||||
if c != chosen:
|
if c != chosen:
|
||||||
self._candidate_ops.pop(c)
|
self._candidates.pop(c)
|
||||||
|
|
||||||
self._chosen = chosen
|
self._chosen = chosen
|
||||||
self.is_fixed = True
|
self.is_fixed = True
|
||||||
|
@ -263,7 +270,7 @@ class OneShotMutableOP(OneShotMutableModule[str, str]):
|
||||||
@property
|
@property
|
||||||
def choices(self) -> List[str]:
|
def choices(self) -> List[str]:
|
||||||
"""list: all choices. """
|
"""list: all choices. """
|
||||||
return list(self._candidate_ops.keys())
|
return list(self._candidates.keys())
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def num_choices(self):
|
def num_choices(self):
|
||||||
|
@ -275,7 +282,7 @@ class OneShotProbMutableOP(OneShotMutableOP):
|
||||||
"""Sampling candidate operation according to probability.
|
"""Sampling candidate operation according to probability.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
candidate_ops (dict[str, dict]): the configs for the candidate
|
candidates (dict[str, dict]): the configs for the candidate
|
||||||
operations.
|
operations.
|
||||||
choice_probs (list): the probability of sampling each
|
choice_probs (list): the probability of sampling each
|
||||||
candidate operation.
|
candidate operation.
|
||||||
|
@ -289,13 +296,13 @@ class OneShotProbMutableOP(OneShotMutableOP):
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self,
|
def __init__(self,
|
||||||
candidate_ops: Dict[str, Dict],
|
candidates: Dict[str, Dict],
|
||||||
choice_probs: list = None,
|
choice_probs: list = None,
|
||||||
module_kwargs: Optional[Dict[str, Dict]] = None,
|
module_kwargs: Optional[Dict[str, Dict]] = None,
|
||||||
alias: Optional[str] = None,
|
alias: Optional[str] = None,
|
||||||
init_cfg: Optional[Dict] = None) -> None:
|
init_cfg: Optional[Dict] = None) -> None:
|
||||||
super().__init__(
|
super().__init__(
|
||||||
candidate_ops=candidate_ops,
|
candidates=candidates,
|
||||||
module_kwargs=module_kwargs,
|
module_kwargs=module_kwargs,
|
||||||
alias=alias,
|
alias=alias,
|
||||||
init_cfg=init_cfg)
|
init_cfg=init_cfg)
|
||||||
|
@ -306,5 +313,7 @@ class OneShotProbMutableOP(OneShotMutableOP):
|
||||||
|
|
||||||
def sample_choice(self) -> str:
|
def sample_choice(self) -> str:
|
||||||
"""Sampling with probabilities."""
|
"""Sampling with probabilities."""
|
||||||
assert len(self.choice_probs) == len(self._candidate_ops.keys())
|
assert len(self.choice_probs) == len(self._candidates.keys())
|
||||||
return random.choices(self.choices, weights=self.choice_probs, k=1)[0]
|
choice = random.choices(
|
||||||
|
self.choices, weights=self.choice_probs, k=1)[0]
|
||||||
|
return choice
|
||||||
|
|
|
@ -2,10 +2,12 @@
|
||||||
from .common import Identity
|
from .common import Identity
|
||||||
from .darts_series import (DartsDilConv, DartsPoolBN, DartsSepConv,
|
from .darts_series import (DartsDilConv, DartsPoolBN, DartsSepConv,
|
||||||
DartsSkipConnect, DartsZero)
|
DartsSkipConnect, DartsZero)
|
||||||
|
from .efficientnet_series import ConvBnAct, DepthwiseSeparableConv
|
||||||
from .mobilenet_series import MBBlock
|
from .mobilenet_series import MBBlock
|
||||||
from .shufflenet_series import ShuffleBlock, ShuffleXception
|
from .shufflenet_series import ShuffleBlock, ShuffleXception
|
||||||
|
|
||||||
__all__ = [
|
__all__ = [
|
||||||
'ShuffleBlock', 'ShuffleXception', 'DartsPoolBN', 'DartsDilConv',
|
'ShuffleBlock', 'ShuffleXception', 'DartsPoolBN', 'DartsDilConv',
|
||||||
'DartsSepConv', 'DartsSkipConnect', 'DartsZero', 'MBBlock', 'Identity'
|
'DartsSepConv', 'DartsSkipConnect', 'DartsZero', 'MBBlock', 'Identity',
|
||||||
|
'ConvBnAct', 'DepthwiseSeparableConv'
|
||||||
]
|
]
|
||||||
|
|
|
@ -27,10 +27,7 @@ class DartsPoolBN(BaseOP):
|
||||||
self.kernel_size, self.stride, 1, count_include_pad=False)
|
self.kernel_size, self.stride, 1, count_include_pad=False)
|
||||||
self.bn = build_norm_layer(self.norm_cfg, self.out_channels)[1]
|
self.bn = build_norm_layer(self.norm_cfg, self.out_channels)[1]
|
||||||
|
|
||||||
if use_drop_path:
|
self.drop_path = DropPath() if use_drop_path else None
|
||||||
self.drop_path = DropPath()
|
|
||||||
else:
|
|
||||||
self.drop_path = None
|
|
||||||
|
|
||||||
def forward(self, x):
|
def forward(self, x):
|
||||||
out = self.pool(x)
|
out = self.pool(x)
|
||||||
|
@ -69,10 +66,7 @@ class DartsDilConv(BaseOP):
|
||||||
self.in_channels, self.out_channels, 1, stride=1, bias=False),
|
self.in_channels, self.out_channels, 1, stride=1, bias=False),
|
||||||
build_norm_layer(self.norm_cfg, self.in_channels)[1])
|
build_norm_layer(self.norm_cfg, self.in_channels)[1])
|
||||||
|
|
||||||
if use_drop_path:
|
self.drop_path = DropPath() if use_drop_path else None
|
||||||
self.drop_path = DropPath()
|
|
||||||
else:
|
|
||||||
self.drop_path = None
|
|
||||||
|
|
||||||
def forward(self, x):
|
def forward(self, x):
|
||||||
out = self.conv1(x)
|
out = self.conv1(x)
|
||||||
|
@ -122,10 +116,7 @@ class DartsSepConv(BaseOP):
|
||||||
self.out_channels, self.out_channels, 1, stride=1, bias=False),
|
self.out_channels, self.out_channels, 1, stride=1, bias=False),
|
||||||
build_norm_layer(self.norm_cfg, self.out_channels)[1])
|
build_norm_layer(self.norm_cfg, self.out_channels)[1])
|
||||||
|
|
||||||
if use_drop_path:
|
self.drop_path = DropPath() if use_drop_path else None
|
||||||
self.drop_path = DropPath()
|
|
||||||
else:
|
|
||||||
self.drop_path = None
|
|
||||||
|
|
||||||
def forward(self, x):
|
def forward(self, x):
|
||||||
out = self.conv1(x)
|
out = self.conv1(x)
|
||||||
|
@ -163,10 +154,7 @@ class DartsSkipConnect(BaseOP):
|
||||||
bias=False)
|
bias=False)
|
||||||
self.bn = build_norm_layer(self.norm_cfg, self.out_channels)[1]
|
self.bn = build_norm_layer(self.norm_cfg, self.out_channels)[1]
|
||||||
|
|
||||||
if use_drop_path:
|
self.drop_path = DropPath() if use_drop_path else None
|
||||||
self.drop_path = DropPath()
|
|
||||||
else:
|
|
||||||
self.drop_path = None
|
|
||||||
|
|
||||||
def forward(self, x):
|
def forward(self, x):
|
||||||
if self.stride > 1:
|
if self.stride > 1:
|
||||||
|
|
|
@ -0,0 +1,160 @@
|
||||||
|
# Copyright (c) OpenMMLab. All rights reserved.
|
||||||
|
from typing import Dict, Optional
|
||||||
|
|
||||||
|
import torch.nn as nn
|
||||||
|
from mmcls.models.utils import SELayer
|
||||||
|
from mmcv.cnn import ConvModule
|
||||||
|
|
||||||
|
from mmrazor.registry import MODELS
|
||||||
|
from .base import BaseOP
|
||||||
|
|
||||||
|
|
||||||
|
@MODELS.register_module()
|
||||||
|
class ConvBnAct(BaseOP):
|
||||||
|
"""ConvBnAct block from timm.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
in_channels (int): number of in channels.
|
||||||
|
out_channels (int): number of out channels.
|
||||||
|
kernel_size (int): kernel size of convolution.
|
||||||
|
stride (int, optional): stride of convolution. Defaults to 1.
|
||||||
|
dilation (int, optional): dilation rate of convolution. Defaults to 1.
|
||||||
|
padding (int, optional): padding size of convolution. Defaults to 0.
|
||||||
|
skip (bool, optional): whether using skip connect. Defaults to False.
|
||||||
|
conv_cfg (Optional[dict], optional): Config dict for convolution layer.
|
||||||
|
Default: None, which means using conv2d.
|
||||||
|
norm_cfg (Dict, optional): Config dict for normalization layer.
|
||||||
|
Default: dict(type='BN').
|
||||||
|
act_cfg (Dict, optional):Config dict for activation layer.
|
||||||
|
Default: dict(type='ReLU').
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self,
|
||||||
|
in_channels: int,
|
||||||
|
out_channels: int,
|
||||||
|
kernel_size: int,
|
||||||
|
stride: int = 1,
|
||||||
|
dilation: int = 1,
|
||||||
|
padding: int = 0,
|
||||||
|
skip: bool = False,
|
||||||
|
conv_cfg: Optional[dict] = None,
|
||||||
|
se_cfg: Dict = None,
|
||||||
|
norm_cfg: Dict = dict(type='BN'),
|
||||||
|
act_cfg: Dict = dict(type='ReLU')):
|
||||||
|
super().__init__(
|
||||||
|
in_channels=in_channels, out_channels=out_channels, stride=stride)
|
||||||
|
self.has_residual = skip and stride == 1 \
|
||||||
|
and in_channels == out_channels
|
||||||
|
self.with_se = se_cfg is not None
|
||||||
|
|
||||||
|
if self.with_se:
|
||||||
|
assert isinstance(se_cfg, dict)
|
||||||
|
self.se = SELayer(self.out_channels, **se_cfg)
|
||||||
|
|
||||||
|
self.convModule = ConvModule(
|
||||||
|
in_channels=in_channels,
|
||||||
|
out_channels=out_channels,
|
||||||
|
kernel_size=kernel_size,
|
||||||
|
stride=stride,
|
||||||
|
dilation=dilation,
|
||||||
|
padding=padding,
|
||||||
|
conv_cfg=conv_cfg,
|
||||||
|
norm_cfg=norm_cfg,
|
||||||
|
act_cfg=act_cfg)
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
"""Forward function."""
|
||||||
|
shortcut = x
|
||||||
|
x = self.convModule(x)
|
||||||
|
if self.has_residual:
|
||||||
|
x += shortcut
|
||||||
|
return x
|
||||||
|
|
||||||
|
|
||||||
|
@MODELS.register_module()
|
||||||
|
class DepthwiseSeparableConv(BaseOP):
|
||||||
|
"""DepthwiseSeparable block Used for DS convs in MobileNet-V1 and in the
|
||||||
|
place of IR blocks that have no expansion (factor of 1.0). This is an
|
||||||
|
alternative to having a IR with an optional first pw conv.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
in_channels (int): number of in channels.
|
||||||
|
out_channels (int): number of out channels.
|
||||||
|
dw_kernel_size (int, optional): the kernel size of depth-wise
|
||||||
|
convolution. Defaults to 3.
|
||||||
|
stride (int, optional): stride of convolution.
|
||||||
|
Defaults to 1.
|
||||||
|
dilation (int, optional): dilation rate of convolution.
|
||||||
|
Defaults to 1.
|
||||||
|
noskip (bool, optional): whether use skip connection.
|
||||||
|
Defaults to False.
|
||||||
|
pw_kernel_size (int, optional): kernel size of point wise convolution.
|
||||||
|
Defaults to 1.
|
||||||
|
pw_act (bool, optional): whether using activation in point-wise
|
||||||
|
convolution. Defaults to False.
|
||||||
|
se_cfg (Dict, optional): _description_. Defaults to None.
|
||||||
|
conv_cfg (Optional[dict], optional): Config dict for convolution layer.
|
||||||
|
Default: None, which means using conv2d.
|
||||||
|
norm_cfg (Dict, optional): Config dict for normalization layer.
|
||||||
|
Default: dict(type='BN').
|
||||||
|
act_cfg (Dict, optional):Config dict for activation layer.
|
||||||
|
Default: dict(type='ReLU').
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self,
|
||||||
|
in_channels: int,
|
||||||
|
out_channels: int,
|
||||||
|
dw_kernel_size: int = 3,
|
||||||
|
stride: int = 1,
|
||||||
|
dilation: int = 1,
|
||||||
|
noskip: bool = False,
|
||||||
|
pw_kernel_size: int = 1,
|
||||||
|
pw_act: bool = False,
|
||||||
|
conv_cfg: Optional[dict] = None,
|
||||||
|
se_cfg: Dict = None,
|
||||||
|
norm_cfg: Dict = dict(type='BN'),
|
||||||
|
act_cfg: Dict = dict(type='ReLU')):
|
||||||
|
|
||||||
|
super().__init__(
|
||||||
|
in_channels=in_channels, out_channels=out_channels, stride=stride)
|
||||||
|
self.has_residual = (stride == 1
|
||||||
|
and in_channels == out_channels) and not noskip
|
||||||
|
self.has_pw_act = pw_act # activation after point-wise conv
|
||||||
|
|
||||||
|
self.se_cfg = se_cfg
|
||||||
|
|
||||||
|
self.conv_dw = ConvModule(
|
||||||
|
in_channels=in_channels,
|
||||||
|
out_channels=in_channels,
|
||||||
|
kernel_size=dw_kernel_size,
|
||||||
|
stride=stride,
|
||||||
|
dilation=dilation,
|
||||||
|
padding=dw_kernel_size // 2,
|
||||||
|
groups=in_channels,
|
||||||
|
conv_cfg=conv_cfg,
|
||||||
|
norm_cfg=norm_cfg,
|
||||||
|
act_cfg=act_cfg,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Squeeze-and-excitation
|
||||||
|
self.se = SELayer(out_channels, **
|
||||||
|
se_cfg) if self.se_cfg else nn.Identity()
|
||||||
|
|
||||||
|
self.conv_pw = ConvModule(
|
||||||
|
in_channels=in_channels,
|
||||||
|
out_channels=out_channels,
|
||||||
|
kernel_size=pw_kernel_size,
|
||||||
|
padding=pw_kernel_size // 2,
|
||||||
|
conv_cfg=conv_cfg,
|
||||||
|
norm_cfg=norm_cfg,
|
||||||
|
act_cfg=act_cfg if self.has_pw_act else None,
|
||||||
|
)
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
shortcut = x
|
||||||
|
x = self.conv_dw(x)
|
||||||
|
x = self.se(x)
|
||||||
|
x = self.conv_pw(x)
|
||||||
|
if self.has_residual:
|
||||||
|
x += shortcut
|
||||||
|
return x
|
|
@ -65,10 +65,10 @@ class FlopsEstimator:
|
||||||
... def __init__(self) -> None:
|
... def __init__(self) -> None:
|
||||||
... super().__init__()
|
... super().__init__()
|
||||||
...
|
...
|
||||||
... candidate_ops = nn.ModuleDict({
|
... candidates = nn.ModuleDict({
|
||||||
... 'conv3x3': nn.Conv2d(3, 32, 3),
|
... 'conv3x3': nn.Conv2d(3, 32, 3),
|
||||||
... 'conv5x5': nn.Conv2d(3, 32, 5)})
|
... 'conv5x5': nn.Conv2d(3, 32, 5)})
|
||||||
... self.op = OneShotMutableOP(candidate_ops)
|
... self.op = OneShotMutableOP(candidates)
|
||||||
... self.op.current_choice = 'conv3x3'
|
... self.op.current_choice = 'conv3x3'
|
||||||
...
|
...
|
||||||
... def forward(self, x: Tensor) -> Tensor:
|
... def forward(self, x: Tensor) -> Tensor:
|
||||||
|
|
|
@ -90,12 +90,19 @@ class FixSubnetMixin:
|
||||||
# In the corresponding mutable, it will check whether the `chosen`
|
# In the corresponding mutable, it will check whether the `chosen`
|
||||||
# format is correct.
|
# format is correct.
|
||||||
if isinstance(module, BaseMutable):
|
if isinstance(module, BaseMutable):
|
||||||
mutable_name = name.lstrip(prefix)
|
if getattr(module, 'alias', None):
|
||||||
assert mutable_name in fix_modules, \
|
alias = module.alias
|
||||||
f'{mutable_name} is not in fix_modules {fix_modules}, '\
|
assert alias in fix_modules, \
|
||||||
'please check your `fix_subnet`.'
|
f'The alias {alias} is not in fix_modules ' \
|
||||||
|
f'{fix_modules}, please check your `fix_subnet`.'
|
||||||
chosen = fix_modules.get(mutable_name, None)
|
chosen = fix_modules.get(alias, None)
|
||||||
|
else:
|
||||||
|
mutable_name = name.lstrip(prefix)
|
||||||
|
assert mutable_name in fix_modules, \
|
||||||
|
f'The module name {mutable_name} is not in ' \
|
||||||
|
f'fix_modules {fix_modules} ' \
|
||||||
|
'please check your `fix_subnet`.'
|
||||||
|
chosen = fix_modules.get(mutable_name, None)
|
||||||
module.fix_chosen(chosen)
|
module.fix_chosen(chosen)
|
||||||
|
|
||||||
# TODO support load fix channels after mr #29 merged
|
# TODO support load fix channels after mr #29 merged
|
||||||
|
|
|
@ -19,7 +19,7 @@ class TestDartsBackbone(TestCase):
|
||||||
def setUp(self) -> None:
|
def setUp(self) -> None:
|
||||||
self.mutable_cfg = dict(
|
self.mutable_cfg = dict(
|
||||||
type='DiffMutableOP',
|
type='DiffMutableOP',
|
||||||
candidate_ops=dict(
|
candidates=dict(
|
||||||
torch_conv2d_3x3=dict(
|
torch_conv2d_3x3=dict(
|
||||||
type='torchConv2d',
|
type='torchConv2d',
|
||||||
kernel_size=3,
|
kernel_size=3,
|
||||||
|
@ -96,17 +96,17 @@ class TestDartsBackbone(TestCase):
|
||||||
tmp_dict = dict()
|
tmp_dict = dict()
|
||||||
|
|
||||||
for key, _ in model.named_modules():
|
for key, _ in model.named_modules():
|
||||||
node_type = key.split('._candidate_ops')[0].split('.')[-1].split(
|
node_type = key.split('._candidates')[0].split('.')[-1].split(
|
||||||
'_')[0]
|
'_')[0]
|
||||||
if node_type not in ['normal', 'reduce']:
|
if node_type not in ['normal', 'reduce']:
|
||||||
# not supported type
|
# not supported type
|
||||||
continue
|
continue
|
||||||
|
|
||||||
node_name = key.split('._candidate_ops')[0].split('.')[-1]
|
node_name = key.split('._candidates')[0].split('.')[-1]
|
||||||
if node_name not in tmp_dict.keys():
|
if node_name not in tmp_dict.keys():
|
||||||
tmp_dict[node_name] = [key.split('._candidate_ops')[0]]
|
tmp_dict[node_name] = [key.split('._candidates')[0]]
|
||||||
else:
|
else:
|
||||||
current_key = key.split('._candidate_ops')[0]
|
current_key = key.split('._candidates')[0]
|
||||||
if current_key not in tmp_dict[node_name]:
|
if current_key not in tmp_dict[node_name]:
|
||||||
tmp_dict[node_name].append(current_key)
|
tmp_dict[node_name].append(current_key)
|
||||||
|
|
||||||
|
|
|
@ -18,7 +18,7 @@ class TestDiffOP(TestCase):
|
||||||
def test_forward_arch_param(self):
|
def test_forward_arch_param(self):
|
||||||
op_cfg = dict(
|
op_cfg = dict(
|
||||||
type='DiffMutableOP',
|
type='DiffMutableOP',
|
||||||
candidate_ops=dict(
|
candidates=dict(
|
||||||
torch_conv2d_3x3=dict(
|
torch_conv2d_3x3=dict(
|
||||||
type='torchConv2d',
|
type='torchConv2d',
|
||||||
kernel_size=3,
|
kernel_size=3,
|
||||||
|
@ -56,7 +56,7 @@ class TestDiffOP(TestCase):
|
||||||
def test_forward_fixed(self):
|
def test_forward_fixed(self):
|
||||||
op_cfg = dict(
|
op_cfg = dict(
|
||||||
type='DiffMutableOP',
|
type='DiffMutableOP',
|
||||||
candidate_ops=dict(
|
candidates=dict(
|
||||||
torch_conv2d_3x3=dict(
|
torch_conv2d_3x3=dict(
|
||||||
type='torchConv2d',
|
type='torchConv2d',
|
||||||
kernel_size=3,
|
kernel_size=3,
|
||||||
|
@ -84,7 +84,7 @@ class TestDiffOP(TestCase):
|
||||||
def test_forward(self):
|
def test_forward(self):
|
||||||
op_cfg = dict(
|
op_cfg = dict(
|
||||||
type='DiffMutableOP',
|
type='DiffMutableOP',
|
||||||
candidate_ops=dict(
|
candidates=dict(
|
||||||
torch_conv2d_3x3=dict(
|
torch_conv2d_3x3=dict(
|
||||||
type='torchConv2d',
|
type='torchConv2d',
|
||||||
kernel_size=3,
|
kernel_size=3,
|
||||||
|
@ -119,7 +119,7 @@ class TestDiffOP(TestCase):
|
||||||
def test_property(self):
|
def test_property(self):
|
||||||
op_cfg = dict(
|
op_cfg = dict(
|
||||||
type='DiffMutableOP',
|
type='DiffMutableOP',
|
||||||
candidate_ops=dict(
|
candidates=dict(
|
||||||
torch_conv2d_3x3=dict(
|
torch_conv2d_3x3=dict(
|
||||||
type='torchConv2d',
|
type='torchConv2d',
|
||||||
kernel_size=3,
|
kernel_size=3,
|
||||||
|
@ -158,7 +158,7 @@ class TestDiffOP(TestCase):
|
||||||
def test_module_kwargs(self):
|
def test_module_kwargs(self):
|
||||||
op_cfg = dict(
|
op_cfg = dict(
|
||||||
type='DiffMutableOP',
|
type='DiffMutableOP',
|
||||||
candidate_ops=dict(
|
candidates=dict(
|
||||||
torch_conv2d_3x3=dict(
|
torch_conv2d_3x3=dict(
|
||||||
type='torchConv2d',
|
type='torchConv2d',
|
||||||
kernel_size=3,
|
kernel_size=3,
|
||||||
|
|
|
@ -15,7 +15,7 @@ class TestMutables(TestCase):
|
||||||
norm_cfg = dict(type='BN', requires_grad=True)
|
norm_cfg = dict(type='BN', requires_grad=True)
|
||||||
op_cfg = dict(
|
op_cfg = dict(
|
||||||
type='OneShotMutableOP',
|
type='OneShotMutableOP',
|
||||||
candidate_ops=dict(
|
candidates=dict(
|
||||||
shuffle_3x3=dict(
|
shuffle_3x3=dict(
|
||||||
type='ShuffleBlock', norm_cfg=norm_cfg, kernel_size=3),
|
type='ShuffleBlock', norm_cfg=norm_cfg, kernel_size=3),
|
||||||
shuffle_5x5=dict(
|
shuffle_5x5=dict(
|
||||||
|
@ -80,7 +80,7 @@ class TestMutables(TestCase):
|
||||||
op_cfg = dict(
|
op_cfg = dict(
|
||||||
type='OneShotProbMutableOP',
|
type='OneShotProbMutableOP',
|
||||||
choice_probs=[0.1, 0.2, 0.3, 0.4],
|
choice_probs=[0.1, 0.2, 0.3, 0.4],
|
||||||
candidate_ops=dict(
|
candidates=dict(
|
||||||
shuffle_3x3=dict(
|
shuffle_3x3=dict(
|
||||||
type='ShuffleBlock', norm_cfg=norm_cfg, kernel_size=3),
|
type='ShuffleBlock', norm_cfg=norm_cfg, kernel_size=3),
|
||||||
shuffle_5x5=dict(
|
shuffle_5x5=dict(
|
||||||
|
@ -142,7 +142,7 @@ class TestMutables(TestCase):
|
||||||
norm_cfg = dict(type='BN', requires_grad=True)
|
norm_cfg = dict(type='BN', requires_grad=True)
|
||||||
op_cfg = dict(
|
op_cfg = dict(
|
||||||
type='OneShotMutableOP',
|
type='OneShotMutableOP',
|
||||||
candidate_ops=dict(
|
candidates=dict(
|
||||||
shuffle_3x3=dict(
|
shuffle_3x3=dict(
|
||||||
type='ShuffleBlock', norm_cfg=norm_cfg, kernel_size=3),
|
type='ShuffleBlock', norm_cfg=norm_cfg, kernel_size=3),
|
||||||
shuffle_5x5=dict(
|
shuffle_5x5=dict(
|
||||||
|
@ -165,7 +165,7 @@ class TestMutables(TestCase):
|
||||||
norm_cfg = dict(type='BN', requires_grad=True)
|
norm_cfg = dict(type='BN', requires_grad=True)
|
||||||
op_cfg = dict(
|
op_cfg = dict(
|
||||||
type='OneShotMutableOP',
|
type='OneShotMutableOP',
|
||||||
candidate_ops=dict(
|
candidates=dict(
|
||||||
shuffle_3x3=dict(
|
shuffle_3x3=dict(
|
||||||
type='ShuffleBlock', norm_cfg=norm_cfg, kernel_size=3),
|
type='ShuffleBlock', norm_cfg=norm_cfg, kernel_size=3),
|
||||||
shuffle_5x5=dict(
|
shuffle_5x5=dict(
|
||||||
|
@ -189,7 +189,7 @@ class TestMutables(TestCase):
|
||||||
norm_cfg = dict(type='BN', requires_grad=True)
|
norm_cfg = dict(type='BN', requires_grad=True)
|
||||||
op_cfg = dict(
|
op_cfg = dict(
|
||||||
type='OneShotMutableOP',
|
type='OneShotMutableOP',
|
||||||
candidate_ops=dict(
|
candidates=dict(
|
||||||
shuffle_3x3=dict(
|
shuffle_3x3=dict(
|
||||||
type='ShuffleBlock',
|
type='ShuffleBlock',
|
||||||
norm_cfg=norm_cfg,
|
norm_cfg=norm_cfg,
|
||||||
|
@ -221,9 +221,9 @@ class TestMutables(TestCase):
|
||||||
output = op.forward_all(input)
|
output = op.forward_all(input)
|
||||||
assert output is not None
|
assert output is not None
|
||||||
|
|
||||||
def test_candidate_ops(self):
|
def test_candidates(self):
|
||||||
|
|
||||||
candidate_ops = nn.ModuleDict({
|
candidates = nn.ModuleDict({
|
||||||
'conv3x3': nn.Conv2d(32, 32, 3, 1, 1),
|
'conv3x3': nn.Conv2d(32, 32, 3, 1, 1),
|
||||||
'conv5x5': nn.Conv2d(32, 32, 5, 1, 2),
|
'conv5x5': nn.Conv2d(32, 32, 5, 1, 2),
|
||||||
'conv7x7': nn.Conv2d(32, 32, 7, 1, 3),
|
'conv7x7': nn.Conv2d(32, 32, 7, 1, 3),
|
||||||
|
@ -231,7 +231,7 @@ class TestMutables(TestCase):
|
||||||
'avgpool3x3': nn.AvgPool2d(3, 1, 1),
|
'avgpool3x3': nn.AvgPool2d(3, 1, 1),
|
||||||
})
|
})
|
||||||
|
|
||||||
op_cfg = dict(type='OneShotMutableOP', candidate_ops=candidate_ops)
|
op_cfg = dict(type='OneShotMutableOP', candidates=candidates)
|
||||||
|
|
||||||
op = MODELS.build(op_cfg)
|
op = MODELS.build(op_cfg)
|
||||||
|
|
||||||
|
|
|
@ -72,12 +72,12 @@ class SearchableModelAlias(nn.Module):
|
||||||
return self.slayer3(x)
|
return self.slayer3(x)
|
||||||
|
|
||||||
|
|
||||||
class TestDiffMutator(TestCase):
|
class TestDiffModuleMutator(TestCase):
|
||||||
|
|
||||||
def setUp(self):
|
def setUp(self):
|
||||||
self.MUTABLE_CFG = dict(
|
self.MUTABLE_CFG = dict(
|
||||||
type='DiffMutableOP',
|
type='DiffMutableOP',
|
||||||
candidate_ops=dict(
|
candidates=dict(
|
||||||
torch_conv2d_3x3=dict(
|
torch_conv2d_3x3=dict(
|
||||||
type='torchConv2d',
|
type='torchConv2d',
|
||||||
kernel_size=3,
|
kernel_size=3,
|
||||||
|
|
|
@ -30,7 +30,7 @@ MUTATOR_CFG = dict(type='OneShotModuleMutator')
|
||||||
|
|
||||||
MUTABLE_CFG = dict(
|
MUTABLE_CFG = dict(
|
||||||
type='OneShotMutableOP',
|
type='OneShotMutableOP',
|
||||||
candidate_ops=dict(
|
candidates=dict(
|
||||||
choice1=dict(
|
choice1=dict(
|
||||||
type='MBBlock',
|
type='MBBlock',
|
||||||
in_channels=3,
|
in_channels=3,
|
||||||
|
|
|
@ -13,7 +13,7 @@ from mmrazor.registry import MODELS
|
||||||
|
|
||||||
_FIRST_STAGE_MUTABLE = dict(
|
_FIRST_STAGE_MUTABLE = dict(
|
||||||
type='OneShotMutableOP',
|
type='OneShotMutableOP',
|
||||||
candidate_ops=dict(
|
candidates=dict(
|
||||||
mb_k3e1=dict(
|
mb_k3e1=dict(
|
||||||
type='MBBlock',
|
type='MBBlock',
|
||||||
kernel_size=3,
|
kernel_size=3,
|
||||||
|
@ -23,7 +23,7 @@ _FIRST_STAGE_MUTABLE = dict(
|
||||||
|
|
||||||
_OTHER_STAGE_MUTABLE = dict(
|
_OTHER_STAGE_MUTABLE = dict(
|
||||||
type='OneShotMutableOP',
|
type='OneShotMutableOP',
|
||||||
candidate_ops=dict(
|
candidates=dict(
|
||||||
mb_k3e3=dict(
|
mb_k3e3=dict(
|
||||||
type='MBBlock',
|
type='MBBlock',
|
||||||
kernel_size=3,
|
kernel_size=3,
|
||||||
|
|
|
@ -3,6 +3,10 @@ import argparse
|
||||||
import os
|
import os
|
||||||
import os.path as osp
|
import os.path as osp
|
||||||
|
|
||||||
|
from mmcls.core import * # noqa: F401,F403
|
||||||
|
from mmcls.datasets import * # noqa: F401,F403
|
||||||
|
from mmcls.metrics import * # noqa: F401,F403
|
||||||
|
from mmcls.models import * # noqa: F401,F403
|
||||||
# TODO import mmcls and mmseg
|
# TODO import mmcls and mmseg
|
||||||
from mmdet.core import * # noqa: F401,F403
|
from mmdet.core import * # noqa: F401,F403
|
||||||
from mmdet.datasets import * # noqa: F401,F403
|
from mmdet.datasets import * # noqa: F401,F403
|
||||||
|
|
|
@ -38,7 +38,6 @@ def parse_args():
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
register_all_modules(False)
|
register_all_modules(False)
|
||||||
|
|
||||||
args = parse_args()
|
args = parse_args()
|
||||||
|
|
||||||
# load config
|
# load config
|
||||||
|
|
Loading…
Reference in New Issue