diff --git a/bash_anglenas.sh b/bash_anglenas.sh new file mode 100644 index 00000000..f34ade8d --- /dev/null +++ b/bash_anglenas.sh @@ -0,0 +1,12 @@ +#!/usr/bin/env sh + + +MKL_NUM_THREADS=4 +OMP_NUM_THREADS=1 + + + +# bash tools/slurm_train.sh mm_model detnas_train configs/nas/detnas/detnas_supernet_shufflenetv2_coco_1x_2.0_frcnn.py /mnt/lustre/dongpeijie/checkpoints/tests/detnas_pretrain_test + + +bash tools/slurm_test.sh mm_model angle_test configs/nas/spos/spos_subnet_mobilenet_proxyless_gpu_8xb128_in1k_2.0.py /mnt/lustre/dongpeijie/spos_angelnas_flops_0.49G_acc_75.98_20220307-54f4698f_2.0.pth diff --git a/bash_cream_train.sh b/bash_cream_train.sh new file mode 100644 index 00000000..c9ba275b --- /dev/null +++ b/bash_cream_train.sh @@ -0,0 +1,56 @@ +#!/usr/bin/env sh + + +MKL_NUM_THREADS=4 +OMP_NUM_THREADS=1 + +# train +# srun --partition=mm_model \ +# --job-name=spos_train \ +# --gres=gpu:8 \ +# --ntasks=8 \ +# --ntasks-per-node=8 \ +# --cpus-per-task=8 \ +# --kill-on-bad-exit=1 \ +# python tools/train.py configs/nas/spos/spos_supernet_shufflenetv2_8xb128_in1k_2.0_example.py + +# bash tools/slurm_train.sh mm_model spos_train configs/nas/spos/spos_supernet_shufflenetv2_8xb128_in1k_2.0_example.py ./work_dir/spos + +# SPOS test +# srun --partition=mm_model \ +# --job-name=spos_test \ +# --gres=gpu:1 \ +# --ntasks=1 \ +# --ntasks-per-node=1 \ +# --cpus-per-task=8 \ +# --kill-on-bad-exit=1 \ +# python tools/test.py configs/nas/spos/spos_subnet_shufflenetv2_8xb128_in1k_2.0_example.py "/mnt/lustre/dongpeijie/spos_shufflenetv2_subnet_8xb128_in1k_flops_0.33M_acc_73.87_20211222-1f0a0b4d_2.0.pth" + +# DetNAS train +# srun --partition=mm_model \ +# --job-name=detnas_train \ +# --gres=gpu:8 \ +# --ntasks=8 \ +# --ntasks-per-node=8 \ +# --cpus-per-task=8 \ +# --kill-on-bad-exit=1 \ +# python tools/train.py configs/nas/detnas/detnas_supernet_shufflenetv2_coco_1x_2.0_frcnn.py + +# bash tools/slurm_train.sh mm_model detnas_train configs/nas/detnas/detnas_supernet_shufflenetv2_coco_1x_2.0_frcnn.py ./work_dir/detnas_pretrain + +# DetNAS test +# srun --partition=mm_model \ +# --job-name=detnas_test \ +# --gres=gpu:1 \ +# --ntasks=1 \ +# --ntasks-per-node=1 \ +# --cpus-per-task=8 \ +# --kill-on-bad-exit=1 \ +# python tools/test.py configs/nas/detnas/detnas_subnet_shufflenetv2_8xb128_in1k_2.0_frcnn.py "/mnt/lustre/dongpeijie/detnas_subnet_frcnn_shufflenetv2_fpn_1x_coco_bbox_backbone_flops-0.34M_mAP-37.5_20211222-67fea61f_2.0.pth" + + +# CREAM Test +# bash tools/slurm_test.sh mm_model cream_test configs/nas/cream/cream_14_subnet_mobilenet.py '/mnt/lustre/dongpeijie/14_2.0.pth' + +# CREAM Train +bash tools/slurm_train.sh mm_model cream_train configs/nas/cream/cream_14_subnet_mobilenet.py diff --git a/bash_darts_test.sh b/bash_darts_test.sh new file mode 100644 index 00000000..f92b27f1 --- /dev/null +++ b/bash_darts_test.sh @@ -0,0 +1,7 @@ +#!/usr/bin/env sh + + +MKL_NUM_THREADS=4 +OMP_NUM_THREADS=1 + +bash tools/slurm_test.sh mm_model spos_test configs/nas/darts/darts_subnet_1xb96_cifar10_2.0.py '/mnt/lustre/dongpeijie/darts_subnetnet_1xb96_cifar10_acc-97.32_20211222-e5727921_2.0.pth' diff --git a/bash_detnas_train.sh b/bash_detnas_train.sh new file mode 100644 index 00000000..bff5a5aa --- /dev/null +++ b/bash_detnas_train.sh @@ -0,0 +1,31 @@ +#!/usr/bin/env sh + + +MKL_NUM_THREADS=4 +OMP_NUM_THREADS=1 + +# DetNAS train +# srun --partition=mm_model \ +# --job-name=detnas_train \ +# --gres=gpu:8 \ +# --ntasks=8 \ +# --ntasks-per-node=8 \ +# --cpus-per-task=8 \ +# --kill-on-bad-exit=1 \ +# python tools/train.py configs/nas/detnas/detnas_supernet_shufflenetv2_coco_1x_2.0_frcnn.py + +# bash tools/slurm_train.sh mm_model detnas_train configs/nas/detnas/detnas_supernet_shufflenetv2_coco_1x_2.0_frcnn.py /mnt/lustre/dongpeijie/checkpoints/tests/detnas_pretrain_test + + +# bash tools/slurm_test.sh mm_model detnas_test configs/nas/detnas/detnas_supernet_shufflenetv2_coco_1x_2.0_frcnn.py /mnt/lustre/dongpeijie/detnas_subnet_frcnn_shufflenetv2_fpn_1x_coco_bbox_backbone_flops-0.34M_mAP-37.5_20211222-67fea61f_2.0.pth + +# DetNAS test +srun --partition=mm_model \ + --job-name=detnas_test \ + --gres=gpu:1 \ + --ntasks=1 \ + --ntasks-per-node=1 \ + --cpus-per-task=8 \ + --kill-on-bad-exit=1 \ + --quotatype=auto \ + python tools/test.py configs/nas/detnas/detnas_subnet_shufflenetv2_8xb128_in1k_2.0_frcnn.py "/mnt/lustre/dongpeijie/detnas_subnet_frcnn_shufflenetv2_fpn_1x_coco_bbox_backbone_flops-0.34M_mAP-37.5_20211222-67fea61f_2.0.pth" --launcher=slurm diff --git a/bash_spos_train.sh b/bash_spos_train.sh new file mode 100644 index 00000000..094d6015 --- /dev/null +++ b/bash_spos_train.sh @@ -0,0 +1,51 @@ +#!/usr/bin/env sh + + +MKL_NUM_THREADS=4 +OMP_NUM_THREADS=1 + +# train +# srun --partition=mm_model \ +# --job-name=spos_train \ +# --gres=gpu:8 \ +# --ntasks=8 \ +# --ntasks-per-node=8 \ +# --cpus-per-task=8 \ +# --kill-on-bad-exit=1 \ +# python tools/train.py configs/nas/spos/spos_supernet_shufflenetv2_8xb128_in1k_2.0_example.py + +# bash tools/slurm_train.sh mm_model spos_train configs/nas/spos/spos_supernet_shufflenetv2_8xb128_in1k_2.0_example.py /mnt/lustre/dongpeijie/checkpoints/work_dirs/spos_format_output + +# bash tools/slurm_train.sh mm_model spos_retrain configs/nas/spos/spos_subnet_shufflenetv2_8xb128_in1k_2.0_example.py /mnt/lustre/dongpeijie/checkpoints/work_dirs/spos_retrain_detnas_with_ceph + +# 55% wrong settings of PolyLR +# bash tools/slurm_train.sh mm_model spos_retrain_w_cj configs/nas/spos/spos_subnet_shufflenetv2_8xb128_in1k_2.0_example.py /mnt/lustre/dongpeijie/checkpoints/work_dirs/spos_retrain_detnas_with_ceph + +# fix setting of PolyLR and rerun with colorjittor +# bash tools/slurm_train.sh mm_model spos_retrain_w_cj configs/nas/spos/spos_subnet_shufflenetv2_8xb128_in1k_2.0_example.py /mnt/lustre/dongpeijie/checkpoints/work_dirs/retrain_detnas_spos_with_colorjittor + +# fix setting of PolyLR and rerun w/o colorjittor +# bash tools/slurm_train.sh mm_model spos_retrain_wo_cj configs/nas/spos/spos_subnet_shufflenetv2_8xb128_in1k_2.0_example_wo_colorjittor.py /mnt/lustre/dongpeijie/checkpoints/work_dirs/retrain_detnas_spos_wo_colorjittor + +# fix setting of optimizer decay[wo cj] (paramwise_cfg) +# bash tools/slurm_train.sh mm_model spos_retrain_fix_decay_wo_cj configs/nas/spos/spos_subnet_shufflenetv2_8xb128_in1k_2.0_example_wo_colorjittor.py /mnt/lustre/dongpeijie/checkpoints/work_dirs/retrain_detnas_spos_retrain_fix_decay_wo_cj + +# fix setting of optimizer decay[with cj] (paramwise_cfg) +# bash tools/slurm_train.sh mm_model spos_retrain_fix_decay_w_cj configs/nas/spos/spos_subnet_shufflenetv2_8xb128_in1k_2.0_example.py /mnt/lustre/dongpeijie/checkpoints/work_dirs/retrain_detnas_spos_retrain_fix_decay_w_cj + + + +# SPOS test +# srun --partition=mm_model \ +# --job-name=spos_test \ +# --gres=gpu:1 \ +# --ntasks=1 \ +# --ntasks-per-node=1 \ +# --cpus-per-task=8 \ +# --kill-on-bad-exit=1 \ +# python tools/test.py configs/nas/spos/spos_subnet_shufflenetv2_8xb128_in1k_2.0_example.py "/mnt/lustre/dongpeijie/spos_shufflenetv2_subnet_8xb128_in1k_flops_0.33M_acc_73.87_20211222-1f0a0b4d_2.0.pth" + + +bash tools/slurm_test.sh mm_model spos_test configs/nas/spos/spos_subnet_shufflenetv2_8xb128_in1k_2.0_example.py '/mnt/lustre/dongpeijie/detnas_subnet_shufflenetv2_8xb128_in1k_acc-74.08_20211223-92e9b66a_2.0.pth' + +# bash tools/slurm_train.sh mm_model spos_retrain configs/nas/spos/spos_subnet_shufflenetv2_8xb128_in1k_2.0_example.py /mnt/lustre/dongpeijie/checkpoints/work_dirs/spos_retrain_detnas_spos diff --git a/configs/_base_/models/arch_settings/mobilenet/cream_114.py b/configs/_base_/models/arch_settings/mobilenet/cream_114.py new file mode 100644 index 00000000..ea5c3e7c --- /dev/null +++ b/configs/_base_/models/arch_settings/mobilenet/cream_114.py @@ -0,0 +1,76 @@ +se_cfg = dict( + ratio=4, + divisor=1, + act_cfg=(dict(type='HSwish'), + dict( + type='HSigmoid', bias=3, divisor=6, min_value=0, + max_value=1))) + +_FIRST_STAGE_MUTABLE = dict( + type='OneShotMutableOP', + candidates=dict( + mb_k3e4_se=dict( + type='MBBlock', + kernel_size=3, + expand_ratio=4, + se_cfg=se_cfg, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='HSwish')))) + +_OTHER_STAGE_MUTABLE = dict( + type='OneShotMutableOP', + candidates=dict( + mb_k3e4_se=dict( + type='MBBlock', + kernel_size=3, + expand_ratio=4, + se_cfg=se_cfg, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='HSwish')), + mb_k3e6_se=dict( + type='MBBlock', + kernel_size=3, + expand_ratio=6, + se_cfg=se_cfg, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='HSwish')), + mb_k5e4_se=dict( + type='MBBlock', + kernel_size=5, + expand_ratio=4, + se_cfg=se_cfg, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='HSwish')), + mb_k5e6_se=dict( + type='MBBlock', + kernel_size=5, + expand_ratio=6, + se_cfg=se_cfg, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='HSwish')), + mb_k7e4_se=dict( + type='MBBlock', + kernel_size=7, + expand_ratio=4, + se_cfg=se_cfg, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='HSwish')), + mb_k7e6_se=dict( + type='MBBlock', + kernel_size=7, + expand_ratio=6, + se_cfg=se_cfg, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='HSwish')))) + +arch_setting = [ + # Parameters to build layers. 4 parameters are needed to construct a + # layer, from left to right: channel, num_blocks, stride, mutable cfg. + [16, 1, 1, _FIRST_STAGE_MUTABLE], + [24, 1, 2, _OTHER_STAGE_MUTABLE], + [40, 2, 2, _OTHER_STAGE_MUTABLE], + [80, 2, 2, _OTHER_STAGE_MUTABLE], + [96, 3, 1, _OTHER_STAGE_MUTABLE], + [192, 2, 2, _OTHER_STAGE_MUTABLE], + [320, 1, 1, _OTHER_STAGE_MUTABLE] +] diff --git a/configs/_base_/models/arch_settings/mobilenet/cream_14.py b/configs/_base_/models/arch_settings/mobilenet/cream_14.py new file mode 100644 index 00000000..0f7c409c --- /dev/null +++ b/configs/_base_/models/arch_settings/mobilenet/cream_14.py @@ -0,0 +1,76 @@ +se_cfg = dict( + ratio=4, + divisor=1, + act_cfg=(dict(type='HSwish'), + dict( + type='HSigmoid', bias=3, divisor=6, min_value=0, + max_value=1))) + +_FIRST_STAGE_MUTABLE = dict( + type='OneShotMutableOP', + candidates=dict( + mb_k3e4_se=dict( + type='MBBlock', + kernel_size=3, + expand_ratio=4, + se_cfg=se_cfg, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='HSwish')))) + +_OTHER_STAGE_MUTABLE = dict( + type='OneShotMutableOP', + candidates=dict( + mb_k3e4_se=dict( + type='MBBlock', + kernel_size=3, + expand_ratio=4, + se_cfg=se_cfg, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='HSwish')), + mb_k3e6_se=dict( + type='MBBlock', + kernel_size=3, + expand_ratio=6, + se_cfg=se_cfg, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='HSwish')), + mb_k5e4_se=dict( + type='MBBlock', + kernel_size=5, + expand_ratio=4, + se_cfg=se_cfg, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='HSwish')), + mb_k5e6_se=dict( + type='MBBlock', + kernel_size=5, + expand_ratio=6, + se_cfg=se_cfg, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='HSwish')), + mb_k7e4_se=dict( + type='MBBlock', + kernel_size=7, + expand_ratio=4, + se_cfg=se_cfg, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='HSwish')), + mb_k7e6_se=dict( + type='MBBlock', + kernel_size=7, + expand_ratio=6, + se_cfg=se_cfg, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='HSwish')))) + +arch_setting = [ + # Parameters to build layers. 4 parameters are needed to construct a + # layer, from left to right: channel, num_blocks, stride, mutable cfg. + [16, 1, 1, _FIRST_STAGE_MUTABLE], + [24, 1, 2, _OTHER_STAGE_MUTABLE], + [40, 2, 2, _OTHER_STAGE_MUTABLE], + [80, 2, 2, _OTHER_STAGE_MUTABLE], + [96, 1, 1, _OTHER_STAGE_MUTABLE], + [192, 1, 2, _OTHER_STAGE_MUTABLE], + [320, 1, 1, _OTHER_STAGE_MUTABLE] +] diff --git a/configs/_base_/models/arch_settings/mobilenet/cream_287.py b/configs/_base_/models/arch_settings/mobilenet/cream_287.py new file mode 100644 index 00000000..5b55af57 --- /dev/null +++ b/configs/_base_/models/arch_settings/mobilenet/cream_287.py @@ -0,0 +1,76 @@ +se_cfg = dict( + ratio=4, + divisor=1, + act_cfg=(dict(type='HSwish'), + dict( + type='HSigmoid', bias=3, divisor=6, min_value=0, + max_value=1))) + +_FIRST_STAGE_MUTABLE = dict( + type='OneShotMutableOP', + candidates=dict( + mb_k3e4_se=dict( + type='MBBlock', + kernel_size=3, + expand_ratio=4, + se_cfg=se_cfg, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='HSwish')))) + +_OTHER_STAGE_MUTABLE = dict( + type='OneShotMutableOP', + candidates=dict( + mb_k3e4_se=dict( + type='MBBlock', + kernel_size=3, + expand_ratio=4, + se_cfg=se_cfg, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='HSwish')), + mb_k3e6_se=dict( + type='MBBlock', + kernel_size=3, + expand_ratio=6, + se_cfg=se_cfg, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='HSwish')), + mb_k5e4_se=dict( + type='MBBlock', + kernel_size=5, + expand_ratio=4, + se_cfg=se_cfg, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='HSwish')), + mb_k5e6_se=dict( + type='MBBlock', + kernel_size=5, + expand_ratio=6, + se_cfg=se_cfg, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='HSwish')), + mb_k7e4_se=dict( + type='MBBlock', + kernel_size=7, + expand_ratio=4, + se_cfg=se_cfg, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='HSwish')), + mb_k7e6_se=dict( + type='MBBlock', + kernel_size=7, + expand_ratio=6, + se_cfg=se_cfg, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='HSwish')))) + +arch_setting = [ + # Parameters to build layers. 4 parameters are needed to construct a + # layer, from left to right: channel, num_blocks, stride, mutable cfg. + [16, 1, 1, _FIRST_STAGE_MUTABLE], + [24, 1, 2, _OTHER_STAGE_MUTABLE], + [40, 2, 2, _OTHER_STAGE_MUTABLE], + [80, 3, 2, _OTHER_STAGE_MUTABLE], + [96, 4, 1, _OTHER_STAGE_MUTABLE], + [192, 3, 2, _OTHER_STAGE_MUTABLE], + [320, 1, 1, _OTHER_STAGE_MUTABLE] +] diff --git a/configs/_base_/models/arch_settings/mobilenet/cream_43.py b/configs/_base_/models/arch_settings/mobilenet/cream_43.py new file mode 100644 index 00000000..ea5c3e7c --- /dev/null +++ b/configs/_base_/models/arch_settings/mobilenet/cream_43.py @@ -0,0 +1,76 @@ +se_cfg = dict( + ratio=4, + divisor=1, + act_cfg=(dict(type='HSwish'), + dict( + type='HSigmoid', bias=3, divisor=6, min_value=0, + max_value=1))) + +_FIRST_STAGE_MUTABLE = dict( + type='OneShotMutableOP', + candidates=dict( + mb_k3e4_se=dict( + type='MBBlock', + kernel_size=3, + expand_ratio=4, + se_cfg=se_cfg, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='HSwish')))) + +_OTHER_STAGE_MUTABLE = dict( + type='OneShotMutableOP', + candidates=dict( + mb_k3e4_se=dict( + type='MBBlock', + kernel_size=3, + expand_ratio=4, + se_cfg=se_cfg, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='HSwish')), + mb_k3e6_se=dict( + type='MBBlock', + kernel_size=3, + expand_ratio=6, + se_cfg=se_cfg, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='HSwish')), + mb_k5e4_se=dict( + type='MBBlock', + kernel_size=5, + expand_ratio=4, + se_cfg=se_cfg, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='HSwish')), + mb_k5e6_se=dict( + type='MBBlock', + kernel_size=5, + expand_ratio=6, + se_cfg=se_cfg, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='HSwish')), + mb_k7e4_se=dict( + type='MBBlock', + kernel_size=7, + expand_ratio=4, + se_cfg=se_cfg, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='HSwish')), + mb_k7e6_se=dict( + type='MBBlock', + kernel_size=7, + expand_ratio=6, + se_cfg=se_cfg, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='HSwish')))) + +arch_setting = [ + # Parameters to build layers. 4 parameters are needed to construct a + # layer, from left to right: channel, num_blocks, stride, mutable cfg. + [16, 1, 1, _FIRST_STAGE_MUTABLE], + [24, 1, 2, _OTHER_STAGE_MUTABLE], + [40, 2, 2, _OTHER_STAGE_MUTABLE], + [80, 2, 2, _OTHER_STAGE_MUTABLE], + [96, 3, 1, _OTHER_STAGE_MUTABLE], + [192, 2, 2, _OTHER_STAGE_MUTABLE], + [320, 1, 1, _OTHER_STAGE_MUTABLE] +] diff --git a/configs/_base_/models/arch_settings/mobilenet/cream_481.py b/configs/_base_/models/arch_settings/mobilenet/cream_481.py new file mode 100644 index 00000000..f3e3c07d --- /dev/null +++ b/configs/_base_/models/arch_settings/mobilenet/cream_481.py @@ -0,0 +1,76 @@ +se_cfg = dict( + ratio=4, + divisor=1, + act_cfg=(dict(type='HSwish'), + dict( + type='HSigmoid', bias=3, divisor=6, min_value=0, + max_value=1))) + +_FIRST_STAGE_MUTABLE = dict( + type='OneShotMutableOP', + candidates=dict( + mb_k3e4_se=dict( + type='MBBlock', + kernel_size=3, + expand_ratio=4, + se_cfg=se_cfg, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='HSwish')))) + +_OTHER_STAGE_MUTABLE = dict( + type='OneShotMutableOP', + candidates=dict( + mb_k3e4_se=dict( + type='MBBlock', + kernel_size=3, + expand_ratio=4, + se_cfg=se_cfg, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='HSwish')), + mb_k3e6_se=dict( + type='MBBlock', + kernel_size=3, + expand_ratio=6, + se_cfg=se_cfg, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='HSwish')), + mb_k5e4_se=dict( + type='MBBlock', + kernel_size=5, + expand_ratio=4, + se_cfg=se_cfg, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='HSwish')), + mb_k5e6_se=dict( + type='MBBlock', + kernel_size=5, + expand_ratio=6, + se_cfg=se_cfg, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='HSwish')), + mb_k7e4_se=dict( + type='MBBlock', + kernel_size=7, + expand_ratio=4, + se_cfg=se_cfg, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='HSwish')), + mb_k7e6_se=dict( + type='MBBlock', + kernel_size=7, + expand_ratio=6, + se_cfg=se_cfg, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='HSwish')))) + +arch_setting = [ + # Parameters to build layers. 4 parameters are needed to construct a + # layer, from left to right: channel, num_blocks, stride, mutable cfg. + [16, 1, 1, _FIRST_STAGE_MUTABLE], + [24, 4, 2, _OTHER_STAGE_MUTABLE], + [40, 4, 2, _OTHER_STAGE_MUTABLE], + [80, 5, 2, _OTHER_STAGE_MUTABLE], + [96, 4, 1, _OTHER_STAGE_MUTABLE], + [192, 4, 2, _OTHER_STAGE_MUTABLE], + [320, 1, 1, _OTHER_STAGE_MUTABLE] +] diff --git a/configs/_base_/models/arch_settings/mobilenet/cream_604.py b/configs/_base_/models/arch_settings/mobilenet/cream_604.py new file mode 100644 index 00000000..a727a02d --- /dev/null +++ b/configs/_base_/models/arch_settings/mobilenet/cream_604.py @@ -0,0 +1,76 @@ +se_cfg = dict( + ratio=4, + divisor=1, + act_cfg=(dict(type='HSwish'), + dict( + type='HSigmoid', bias=3, divisor=6, min_value=0, + max_value=1))) + +_FIRST_STAGE_MUTABLE = dict( + type='OneShotMutableOP', + candidates=dict( + mb_k3e4_se=dict( + type='MBBlock', + kernel_size=3, + expand_ratio=4, + se_cfg=se_cfg, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='HSwish')))) + +_OTHER_STAGE_MUTABLE = dict( + type='OneShotMutableOP', + candidates=dict( + mb_k3e4_se=dict( + type='MBBlock', + kernel_size=3, + expand_ratio=4, + se_cfg=se_cfg, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='HSwish')), + mb_k3e6_se=dict( + type='MBBlock', + kernel_size=3, + expand_ratio=6, + se_cfg=se_cfg, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='HSwish')), + mb_k5e4_se=dict( + type='MBBlock', + kernel_size=5, + expand_ratio=4, + se_cfg=se_cfg, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='HSwish')), + mb_k5e6_se=dict( + type='MBBlock', + kernel_size=5, + expand_ratio=6, + se_cfg=se_cfg, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='HSwish')), + mb_k7e4_se=dict( + type='MBBlock', + kernel_size=7, + expand_ratio=4, + se_cfg=se_cfg, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='HSwish')), + mb_k7e6_se=dict( + type='MBBlock', + kernel_size=7, + expand_ratio=6, + se_cfg=se_cfg, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='HSwish')))) + +arch_setting = [ + # Parameters to build layers. 4 parameters are needed to construct a + # layer, from left to right: channel, num_blocks, stride, mutable cfg. + [16, 1, 1, _FIRST_STAGE_MUTABLE], + [24, 5, 2, _OTHER_STAGE_MUTABLE], + [40, 5, 2, _OTHER_STAGE_MUTABLE], + [80, 5, 2, _OTHER_STAGE_MUTABLE], + [96, 6, 1, _OTHER_STAGE_MUTABLE], + [192, 6, 2, _OTHER_STAGE_MUTABLE], + [320, 1, 1, _OTHER_STAGE_MUTABLE] +] diff --git a/configs/nas/cream/CREAM_14_MOBILENET_IN1k_2.0.yaml b/configs/nas/cream/CREAM_14_MOBILENET_IN1k_2.0.yaml new file mode 100644 index 00000000..cfa69fcb --- /dev/null +++ b/configs/nas/cream/CREAM_14_MOBILENET_IN1k_2.0.yaml @@ -0,0 +1,11 @@ +modules: + backbone.layer1.0: depthsepconv + backbone.layer2.0: mb_k3e4_se + backbone.layer3.0: mb_k5e6_se + backbone.layer3.1: mb_k5e6_se + backbone.layer4.0: mb_k5e6_se + backbone.layer4.1: mb_k5e6_se + backbone.layer5.0: mb_k3e6_se + backbone.layer6.0: mb_k5e6_se + backbone.layer7.0: convbnact +channels: diff --git a/configs/nas/cream/cream_14_subnet_mobilenet.py b/configs/nas/cream/cream_14_subnet_mobilenet.py new file mode 100644 index 00000000..bf308ff0 --- /dev/null +++ b/configs/nas/cream/cream_14_subnet_mobilenet.py @@ -0,0 +1,8 @@ +_base_ = ['./cream_14_supernet_mobilenet.py'] + +# FIXME: you may replace this with the mutable_cfg searched by yourself +fix_subnet = 'configs/nas/cream/CREAM_14_MOBILENET_IN1k_2.0.yaml' # noqa: E501 + +model = dict(fix_subnet=fix_subnet) + +find_unused_parameters = False diff --git a/configs/nas/cream/cream_14_supernet_mobilenet.py b/configs/nas/cream/cream_14_supernet_mobilenet.py new file mode 100644 index 00000000..cc072219 --- /dev/null +++ b/configs/nas/cream/cream_14_supernet_mobilenet.py @@ -0,0 +1,241 @@ +# dataset settings +dataset_type = 'ImageNet' + +preprocess_cfg = dict( + # RGB format normalization parameters + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + # convert image from BGR to RGB + to_rgb=True, +) + +# file_client_args = dict( +# backend='petrel', +# path_mapping=dict({ +# './data/imagenet': 's3://openmmlab/datasets/classification/imagenet', +# 'data/imagenet': 's3://openmmlab/datasets/classification/imagenet' +# })) + +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='RandomResizedCrop', scale=224), + dict(type='ColorJitter', brightness=0.4, contrast=0.4, saturation=0.4), + dict(type='RandomFlip', prob=0.5, direction='horizontal'), + dict(type='PackClsInputs'), +] + +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='ResizeEdge', + scale=73, + edge='short', + backend='pillow', + interpolation='bicubic'), + dict(type='CenterCrop', crop_size=64), + dict(type='PackClsInputs'), +] + +train_dataloader = dict( + batch_size=128, + num_workers=5, + dataset=dict( + type=dataset_type, + data_root='/mnt/cache/share/images', + ann_file='meta/train.txt', + data_prefix='train', + pipeline=train_pipeline), + sampler=dict(type='DefaultSampler', shuffle=True), + persistent_workers=True, +) + +# /mnt/lustre/share_data/wangjiaqi/data/imagenet', + +val_dataloader = dict( + batch_size=128, + num_workers=5, + dataset=dict( + type=dataset_type, + data_root='/mnt/cache/share/images', + ann_file='meta/val.txt', + data_prefix='val', + pipeline=test_pipeline), + sampler=dict(type='DefaultSampler', shuffle=False), + persistent_workers=True, +) +val_evaluator = dict(type='Accuracy', topk=(1, 5)) + +# If you want standard test, please manually configure the test dataset +test_dataloader = val_dataloader +test_evaluator = val_evaluator + +# scheduler + +# optimizer +optim_wrapper = dict( + optimizer=dict(type='SGD', lr=0.5, momentum=0.9, weight_decay=4e-5), + clip_grad=None) + +# leanring policy +param_scheduler = [ + dict(type='PolyLR', power=1.0, eta_min=0.0, by_epoch=False), +] + +# train, val, test setting +train_cfg = dict(by_epoch=False, max_iters=300000) +val_cfg = dict() +test_cfg = dict() + +# runtime + +# defaults to use registries in mmrazor +default_scope = 'mmcls' + +# configure default hooks +default_hooks = dict( + timer=dict(type='IterTimerHook'), + logger=dict(type='LoggerHook', interval=100), + param_scheduler=dict(type='ParamSchedulerHook'), + checkpoint=dict(type='CheckpointHook', by_epoch=False, interval=1000), + sampler_seed=dict(type='DistSamplerSeedHook'), + visualization=dict(type='VisualizationHook', enable=False), +) + +# configure environment +env_cfg = dict( + cudnn_benchmark=False, + mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0), + dist_cfg=dict(backend='nccl'), +) + +# set visualizer +vis_backends = [dict(type='LocalVisBackend')] +visualizer = dict( + type='ClsVisualizer', vis_backends=vis_backends, name='visualizer') + +# set log level +log_level = 'INFO' + +# load from which checkpoint +load_from = None + +# whether to resume training from the loaded checkpoint +resume = False + +se_cfg = dict( + ratio=4, + divisor=8, + act_cfg=(dict(type='ReLU'), + dict( + type='HSigmoid', bias=3, divisor=6, min_value=0, + max_value=1))) + +_FIRST_STAGE_MUTABLE = dict( # DepthwiseSep + type='OneShotMutableOP', + candidates=dict( + depthsepconv=dict( + type='DepthwiseSeparableConv', + dw_kernel_size=3, + se_cfg=se_cfg, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='Swish')))) + +_MIDDLE_STAGE_MUTABLE = dict( + type='OneShotMutableOP', + candidates=dict( + mb_k3e4_se=dict( + type='MBBlock', + kernel_size=3, + expand_ratio=4, + se_cfg=se_cfg, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='Swish')), + mb_k3e6_se=dict( + type='MBBlock', + kernel_size=3, + expand_ratio=6, + se_cfg=se_cfg, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='Swish')), + mb_k5e4_se=dict( + type='MBBlock', + kernel_size=5, + expand_ratio=4, + se_cfg=se_cfg, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='Swish')), + mb_k5e6_se=dict( + type='MBBlock', + kernel_size=5, + expand_ratio=6, + se_cfg=se_cfg, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='Swish')), + mb_k7e4_se=dict( + type='MBBlock', + kernel_size=7, + expand_ratio=4, + se_cfg=se_cfg, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='Swish')), + mb_k7e6_se=dict( + type='MBBlock', + kernel_size=7, + expand_ratio=6, + se_cfg=se_cfg, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='Swish')))) + +arch_setting = [ + # Parameters to build layers. 4 parameters are needed to construct a + # layer, from left to right: channel, num_blocks, stride, mutable cfg. + [16, 1, 1, _FIRST_STAGE_MUTABLE], + [24, 1, 2, _MIDDLE_STAGE_MUTABLE], + [40, 2, 2, _MIDDLE_STAGE_MUTABLE], + [80, 2, 2, _MIDDLE_STAGE_MUTABLE], + [96, 1, 1, _MIDDLE_STAGE_MUTABLE], + [192, 1, 2, _MIDDLE_STAGE_MUTABLE], +] + +norm_cfg = dict(type='BN') +supernet = dict( + _scope_='mmcls', + type='ImageClassifier', + data_preprocessor=preprocess_cfg, + backbone=dict( + _scope_='mmrazor', + type='SearchableMobileNet', + arch_setting=arch_setting, + first_channels=16, + last_channels=320, + widen_factor=1.0, + norm_cfg=norm_cfg, + act_cfg=dict(type='Swish'), + out_indices=(6, ), + ), + neck=dict(type='GlobalAveragePooling'), + head=dict( + type='mmrazor.CreamClsHead', + num_classes=1000, + in_channels=320, + num_features=1280, + act_cfg=dict(type='Swish'), + loss=dict( + type='LabelSmoothLoss', + num_classes=1000, + label_smooth_val=0.1, + mode='original', + loss_weight=1.0), + topk=(1, 5), + ), +) + +mutator = dict(type='mmrazor.OneShotModuleMutator') + +model = dict( + type='mmrazor.SPOS', + architecture=supernet, + mutator=mutator, +) + +find_unused_parameters = True diff --git a/configs/nas/darts/DARTS_SUBNET_CIFAR_PAPER.yaml b/configs/nas/darts/DARTS_SUBNET_CIFAR_PAPER.yaml deleted file mode 100644 index 6cfba708..00000000 --- a/configs/nas/darts/DARTS_SUBNET_CIFAR_PAPER.yaml +++ /dev/null @@ -1,116 +0,0 @@ -normal_n2: - chosen: - - normal_n2_p1 - - normal_n2_p0 -normal_n3: - chosen: - - normal_n3_p0 - - normal_n3_p1 -normal_n4: - chosen: - - normal_n4_p0 - - normal_n4_p1 -normal_n5: - chosen: - - normal_n5_p2 - - normal_n5_p0 -reduce_n2: - chosen: - - reduce_n2_p0 - - reduce_n2_p1 -reduce_n3: - chosen: - - reduce_n3_p1 - - reduce_n3_p2 -reduce_n4: - chosen: - - reduce_n4_p2 - - reduce_n4_p0 -reduce_n5: - chosen: - - reduce_n5_p1 - - reduce_n5_p2 -normal_n2_p0: - chosen: - - sep_conv_3x3 -normal_n2_p1: - chosen: - - sep_conv_3x3 -normal_n3_p0: - chosen: - - sep_conv_3x3 -normal_n3_p1: - chosen: - - sep_conv_3x3 -normal_n3_p2: - chosen: - - sep_conv_3x3 -normal_n4_p0: - chosen: - - skip_connect -normal_n4_p1: - chosen: - - sep_conv_3x3 -normal_n4_p2: - chosen: - - skip_connect -normal_n4_p3: - chosen: - - sep_conv_3x3 -normal_n5_p0: - chosen: - - skip_connect -normal_n5_p1: - chosen: - - skip_connect -normal_n5_p2: - chosen: - - dil_conv_3x3 -normal_n5_p3: - chosen: - - skip_connect -normal_n5_p4: - chosen: - - skip_connect -reduce_n2_p0: - chosen: - - max_pool_3x3 -reduce_n2_p1: - chosen: - - max_pool_3x3 -reduce_n3_p0: - chosen: - - max_pool_3x3 -reduce_n3_p1: - chosen: - - max_pool_3x3 -reduce_n3_p2: - chosen: - - skip_connect -reduce_n4_p0: - chosen: - - max_pool_3x3 -reduce_n4_p1: - chosen: - - max_pool_3x3 -reduce_n4_p2: - chosen: - - skip_connect -reduce_n4_p3: - chosen: - - skip_connect -reduce_n5_p0: - chosen: - - max_pool_3x3 -reduce_n5_p1: - chosen: - - max_pool_3x3 -reduce_n5_p2: - chosen: - - skip_connect -reduce_n5_p3: - chosen: - - skip_connect -reduce_n5_p4: - chosen: - - skip_connect diff --git a/configs/nas/darts/DARTS_SUBNET_CIFAR_PAPER_ALIAS.yaml b/configs/nas/darts/DARTS_SUBNET_CIFAR_PAPER_ALIAS.yaml new file mode 100644 index 00000000..a3ceee1a --- /dev/null +++ b/configs/nas/darts/DARTS_SUBNET_CIFAR_PAPER_ALIAS.yaml @@ -0,0 +1,58 @@ +modules: + normal_n2: + - normal_n2_p0 + - normal_n2_p1 + normal_n2_p0: + - sep_conv_3x3 + normal_n2_p1: + - sep_conv_3x3 + normal_n3: + - normal_n3_p0 + - normal_n3_p1 + normal_n3_p0: + - skip_connect + normal_n3_p1: + - sep_conv_5x5 + normal_n4: + - normal_n4_p0 + - normal_n4_p1 + normal_n4_p0: + - sep_conv_3x3 + normal_n4_p1: + - skip_connect + normal_n5: + - normal_n5_p0 + - normal_n5_p1 + normal_n5_p0: + - skip_connect + normal_n5_p1: + - skip_connect + reduce_n2: + - reduce_n2_p0 + - reduce_n2_p1 + reduce_n2_p0: + - max_pool_3x3 + reduce_n2_p1: + - sep_conv_3x3 + reduce_n3: + - reduce_n3_p0 + - reduce_n3_p2 + reduce_n3_p0: + - max_pool_3x3 + reduce_n3_p2: + - dil_conv_5x5 + reduce_n4: + - reduce_n4_p0 + - reduce_n4_p2 + reduce_n4_p0: + - max_pool_3x3 + reduce_n4_p2: + - skip_connect + reduce_n5: + - reduce_n5_p0 + - reduce_n5_p2 + reduce_n5_p0: + - max_pool_3x3 + reduce_n5_p2: + - skip_connect +channels: diff --git a/configs/nas/darts/darts_subnet_1xb96_cifar10_2.0.py b/configs/nas/darts/darts_subnet_1xb96_cifar10_2.0.py new file mode 100644 index 00000000..cdda0d8b --- /dev/null +++ b/configs/nas/darts/darts_subnet_1xb96_cifar10_2.0.py @@ -0,0 +1,196 @@ +# dataset settings +dataset_type = 'CIFAR10' +preprocess_cfg = dict( + # RGB format normalization parameters + mean=[125.307, 122.961, 113.8575], + std=[51.5865, 50.847, 51.255], + # loaded images are already RGB format + to_rgb=False) + +train_pipeline = [ + dict(type='RandomCrop', crop_size=32, padding=4), + dict(type='RandomFlip', prob=0.5, direction='horizontal'), + dict(type='PackClsInputs'), + dict( + type='Cutout', + magnitude_key='shape', + magnitude_range=(1, 16), + pad_val=0, + prob=0.5), +] + +test_pipeline = [ + dict(type='PackClsInputs'), +] + +train_dataloader = dict( + batch_size=96, + num_workers=2, + dataset=dict( + type=dataset_type, + data_prefix='/mnt/cache/share_data/dongpeijie/data/cifar10', + test_mode=False, + pipeline=train_pipeline), + sampler=dict(type='DefaultSampler', shuffle=True), + persistent_workers=True, +) + +val_dataloader = dict( + batch_size=16, + num_workers=2, + dataset=dict( + type=dataset_type, + data_prefix='/mnt/cache/share_data/dongpeijie/data/cifar10/', + test_mode=True, + pipeline=test_pipeline), + sampler=dict(type='DefaultSampler', shuffle=False), + persistent_workers=True, +) +val_evaluator = dict(type='Accuracy', topk=(1, )) + +test_dataloader = val_dataloader +test_evaluator = val_evaluator + +# optimizer +optim_wrapper = dict( + architecture=dict(type='SGD', lr=0.025, momentum=0.9, weight_decay=3e-4), + mutator=dict(type='Adam', lr=3e-4, weight_decay=1e-3), + clip_grad=dict(max_norm=5, norm_type=2)) + +# leanring policy +param_scheduler = [ + dict( + type='CosineAnnealingLR', + T_max=600, + by_epoch=True, + begin=0, + end=600, + ) +] + +# train, val, test setting +train_cfg = dict(by_epoch=True, max_epochs=600) +val_cfg = dict(interval=1) # validate each epoch +test_cfg = dict() + +# defaults to use registries in mmcls +default_scope = 'mmcls' + +# configure default hooks +default_hooks = dict( + timer=dict(type='IterTimerHook'), + logger=dict(type='LoggerHook', interval=100), + param_scheduler=dict(type='ParamSchedulerHook'), + checkpoint=dict( + type='CheckpointHook', interval=1, save_last=True, max_keep_ckpts=3), + sampler_seed=dict(type='DistSamplerSeedHook'), + visualization=dict(type='VisualizationHook', enable=False), +) + +# configure environment +env_cfg = dict( + cudnn_benchmark=False, + mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0), + dist_cfg=dict(backend='nccl'), +) + +# set visualizer +visualizer = None + +# set log level +log_level = 'INFO' + +# load from which checkpoint +load_from = None + +# whether to resume training from the loaded checkpoint +resume = False + +# model +norm_cfg = dict(type='BN', affine=True) +mutable_cfg = dict( + _scope_='mmrazor', + type='mmrazor.DiffMutableOP', + candidates=dict( + zero=dict(type='mmrazor.DartsZero'), + skip_connect=dict( + type='mmrazor.DartsSkipConnect', + norm_cfg=norm_cfg, + use_drop_path=True), + max_pool_3x3=dict( + type='mmrazor.DartsPoolBN', + pool_type='max', + norm_cfg=norm_cfg, + use_drop_path=True), + avg_pool_3x3=dict( + type='mmrazor.DartsPoolBN', + pool_type='avg', + norm_cfg=norm_cfg, + use_drop_path=True), + sep_conv_3x3=dict( + type='mmrazor.DartsSepConv', + kernel_size=3, + norm_cfg=norm_cfg, + use_drop_path=True), + sep_conv_5x5=dict( + type='mmrazor.DartsSepConv', + kernel_size=5, + norm_cfg=norm_cfg, + use_drop_path=True), + dil_conv_3x3=dict( + type='mmrazor.DartsDilConv', + kernel_size=3, + norm_cfg=norm_cfg, + use_drop_path=True), + dil_conv_5x5=dict( + type='mmrazor.DartsDilConv', + kernel_size=5, + norm_cfg=norm_cfg, + use_drop_path=True), + )) + +route_cfg = dict( + type='mmrazor.DiffChoiceRoute', + with_arch_param=True, +) + +supernet = dict( + type='mmcls.ImageClassifier', + data_preprocessor=preprocess_cfg, + backbone=dict( + type='mmrazor.DartsBackbone', + in_channels=3, + base_channels=36, + num_layers=20, + num_nodes=4, + stem_multiplier=3, + auxliary=True, + aux_channels=128, + aux_out_channels=768, + out_indices=(19, ), + mutable_cfg=mutable_cfg, + route_cfg=route_cfg), + neck=dict(type='mmcls.GlobalAveragePooling'), + head=dict( + type='mmrazor.DartsSubnetClsHead', + num_classes=10, + in_channels=576, + aux_in_channels=768, + loss=dict(type='mmcls.CrossEntropyLoss', loss_weight=1.0), + aux_loss=dict(type='mmcls.CrossEntropyLoss', loss_weight=0.4), + topk=(1, 5), + cal_acc=True), +) + +mutator = dict(type='mmrazor.DiffModuleMutator') + +fix_subnet = 'configs/nas/darts/DARTS_SUBNET_CIFAR_PAPER_ALIAS.yaml' + +model = dict( + type='mmrazor.SPOS', + architecture=supernet, + mutator=mutator, + fix_subnet=fix_subnet, +) + +find_unused_parameter = False diff --git a/configs/nas/darts/darts_supernet_unroll_1xb64_cifar10_2.0.py b/configs/nas/darts/darts_supernet_unroll_1xb64_cifar10_2.0.py new file mode 100644 index 00000000..074f989e --- /dev/null +++ b/configs/nas/darts/darts_supernet_unroll_1xb64_cifar10_2.0.py @@ -0,0 +1,163 @@ +# dataset settings +dataset_type = 'CIFAR10' +preprocess_cfg = dict( + # RGB format normalization parameters + mean=[125.307, 122.961, 113.8575], + std=[51.5865, 50.847, 51.255], + # loaded images are already RGB format + to_rgb=False) + +train_pipeline = [ + dict(type='RandomCrop', crop_size=32, padding=4), + dict(type='RandomFlip', prob=0.5, direction='horizontal'), + dict(type='PackClsInputs'), +] + +test_pipeline = [ + dict(type='PackClsInputs'), +] + +train_dataloader = dict( + batch_size=16, + num_workers=2, + dataset=dict( + type=dataset_type, + data_prefix='/mnt/cache/share_data/dongpeijie/data/cifar10', + test_mode=False, + pipeline=train_pipeline), + sampler=dict(type='DefaultSampler', shuffle=True), + persistent_workers=True, +) + +val_dataloader = dict( + batch_size=16, + num_workers=2, + dataset=dict( + type=dataset_type, + data_prefix='/mnt/cache/share_data/dongpeijie/data/cifar10/', + test_mode=True, + pipeline=test_pipeline), + sampler=dict(type='DefaultSampler', shuffle=False), + persistent_workers=True, +) +val_evaluator = dict(type='Accuracy', topk=(1, )) + +test_dataloader = val_dataloader +test_evaluator = val_evaluator + +# optimizer +optim_wrapper = dict( + architecture=dict(type='SGD', lr=0.025, momentum=0.9, weight_decay=3e-4), + mutator=dict(type='Adam', lr=3e-4, weight_decay=1e-3), + clip_grad=None) + +# leanring policy +param_scheduler = [ + dict( + type='CosineAnnealingLR', + T_max=50, + by_epoch=True, + min_lr=1e-3, + begin=0, + end=50, + ) +] +# train, val, test setting +train_cfg = dict(by_epoch=True, max_epochs=50) +val_cfg = dict(interval=1) # validate each epoch +test_cfg = dict() + +# defaults to use registries in mmcls +default_scope = 'mmcls' + +# configure default hooks +default_hooks = dict( + timer=dict(type='IterTimerHook'), + logger=dict(type='LoggerHook', interval=100), + param_scheduler=dict(type='ParamSchedulerHook'), + checkpoint=dict( + type='CheckpointHook', interval=1, save_last=True, max_keep_ckpts=3), + sampler_seed=dict(type='DistSamplerSeedHook'), + visualization=dict(type='VisualizationHook', enable=False), +) + +# configure environment +env_cfg = dict( + cudnn_benchmark=False, + mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0), + dist_cfg=dict(backend='nccl'), +) + +# set visualizer +visualizer = None + +# set log level +log_level = 'INFO' + +# load from which checkpoint +load_from = None + +# whether to resume training from the loaded checkpoint +resume = False + +# model +norm_cfg = dict(type='BN', affine=False) +mutable_cfg = dict( + _scope_='mmrazor', + type='mmrazor.DiffMutableOP', + candidates=dict( + zero=dict(type='mmrazor.DartsZero'), + skip_connect=dict(type='mmrazor.DartsSkipConnect', norm_cfg=norm_cfg), + max_pool_3x3=dict( + type='mmrazor.DartsPoolBN', pool_type='max', norm_cfg=norm_cfg), + avg_pool_3x3=dict( + type='mmrazor.DartsPoolBN', pool_type='avg', norm_cfg=norm_cfg), + sep_conv_3x3=dict( + type='mmrazor.DartsSepConv', kernel_size=3, norm_cfg=norm_cfg), + sep_conv_5x5=dict( + type='mmrazor.DartsSepConv', kernel_size=5, norm_cfg=norm_cfg), + dil_conv_3x3=dict( + type='mmrazor.DartsDilConv', kernel_size=3, norm_cfg=norm_cfg), + dil_conv_5x5=dict( + type='mmrazor.DartsDilConv', kernel_size=5, norm_cfg=norm_cfg), + )) + +route_cfg = dict( + type='mmrazor.DiffChoiceRoute', + with_arch_param=True, +) + +supernet = dict( + type='mmcls.ImageClassifier', + backbone=dict( + type='mmrazor.DartsBackbone', + in_channels=3, + base_channels=36, + num_layers=20, + num_nodes=4, + stem_multiplier=3, + auxliary=False, + out_indices=(19, ), + mutable_cfg=mutable_cfg, + route_cfg=route_cfg), + neck=dict(type='mmcls.GlobalAveragePooling'), + head=dict( + type='mmrazor.DartsSubnetClsHead', + num_classes=10, + in_channels=576, + aux_in_channels=768, + loss=dict(type='mmcls.CrossEntropyLoss', loss_weight=1.0), + aux_loss=dict(type='mmcls.CrossEntropyLoss', loss_weight=0.4), + topk=(1, 5), + cal_acc=True), +) + +mutator = dict(type='mmrazor.DiffModuleMutator') + +model = dict( + type='mmrazor.SPOS', + architecture=supernet, + mutator=mutator, +) + +find_unused_parameter = True diff --git a/configs/nas/detnas/DETNAS_FRCNN_SHUFFLENETV2_340M_COCO_MMRAZOR.yaml b/configs/nas/detnas/DETNAS_FRCNN_SHUFFLENETV2_340M_COCO_MMRAZOR.yaml deleted file mode 100644 index 5321759f..00000000 --- a/configs/nas/detnas/DETNAS_FRCNN_SHUFFLENETV2_340M_COCO_MMRAZOR.yaml +++ /dev/null @@ -1,60 +0,0 @@ -stage_0_block_0: - chosen: - - shuffle_7x7 -stage_0_block_1: - chosen: - - shuffle_5x5 -stage_0_block_2: - chosen: - - shuffle_7x7 -stage_0_block_3: - chosen: - - shuffle_3x3 -stage_1_block_0: - chosen: - - shuffle_7x7 -stage_1_block_1: - chosen: - - shuffle_5x5 -stage_1_block_2: - chosen: - - shuffle_5x5 -stage_1_block_3: - chosen: - - shuffle_7x7 -stage_2_block_0: - chosen: - - shuffle_xception -stage_2_block_1: - chosen: - - shuffle_xception -stage_2_block_2: - chosen: - - shuffle_5x5 -stage_2_block_3: - chosen: - - shuffle_xception -stage_2_block_4: - chosen: - - shuffle_3x3 -stage_2_block_5: - chosen: - - shuffle_3x3 -stage_2_block_6: - chosen: - - shuffle_xception -stage_2_block_7: - chosen: - - shuffle_5x5 -stage_3_block_0: - chosen: - - shuffle_xception -stage_3_block_1: - chosen: - - shuffle_5x5 -stage_3_block_2: - chosen: - - shuffle_xception -stage_3_block_3: - chosen: - - shuffle_7x7 diff --git a/configs/nas/detnas/DETNAS_FRCNN_SHUFFLENETV2_340M_COCO_MMRAZOR_2.0.yaml b/configs/nas/detnas/DETNAS_FRCNN_SHUFFLENETV2_340M_COCO_MMRAZOR_2.0.yaml new file mode 100644 index 00000000..ed3bc069 --- /dev/null +++ b/configs/nas/detnas/DETNAS_FRCNN_SHUFFLENETV2_340M_COCO_MMRAZOR_2.0.yaml @@ -0,0 +1,22 @@ +modules: + backbone.layers.0.0: shuffle_5x5 + backbone.layers.0.1: shuffle_3x3 + backbone.layers.0.2: shuffle_3x3 + backbone.layers.0.3: shuffle_3x3 + backbone.layers.1.0: shuffle_xception + backbone.layers.1.1: shuffle_3x3 + backbone.layers.1.2: shuffle_xception + backbone.layers.1.3: shuffle_7x7 + backbone.layers.2.0: shuffle_7x7 + backbone.layers.2.1: shuffle_7x7 + backbone.layers.2.2: shuffle_xception + backbone.layers.2.3: shuffle_xception + backbone.layers.2.4: shuffle_3x3 + backbone.layers.2.5: shuffle_7x7 + backbone.layers.2.6: shuffle_5x5 + backbone.layers.2.7: shuffle_xception + backbone.layers.3.0: shuffle_7x7 + backbone.layers.3.1: shuffle_7x7 + backbone.layers.3.2: shuffle_7x7 + backbone.layers.3.3: shuffle_5x5 +channels: diff --git a/configs/nas/detnas/DetNAS_SPOS_SHUFFLENETV2_330M_IN1k_PAPER_2.0.yaml b/configs/nas/detnas/DetNAS_SPOS_SHUFFLENETV2_330M_IN1k_PAPER_2.0.yaml new file mode 100644 index 00000000..ed3bc069 --- /dev/null +++ b/configs/nas/detnas/DetNAS_SPOS_SHUFFLENETV2_330M_IN1k_PAPER_2.0.yaml @@ -0,0 +1,22 @@ +modules: + backbone.layers.0.0: shuffle_5x5 + backbone.layers.0.1: shuffle_3x3 + backbone.layers.0.2: shuffle_3x3 + backbone.layers.0.3: shuffle_3x3 + backbone.layers.1.0: shuffle_xception + backbone.layers.1.1: shuffle_3x3 + backbone.layers.1.2: shuffle_xception + backbone.layers.1.3: shuffle_7x7 + backbone.layers.2.0: shuffle_7x7 + backbone.layers.2.1: shuffle_7x7 + backbone.layers.2.2: shuffle_xception + backbone.layers.2.3: shuffle_xception + backbone.layers.2.4: shuffle_3x3 + backbone.layers.2.5: shuffle_7x7 + backbone.layers.2.6: shuffle_5x5 + backbone.layers.2.7: shuffle_xception + backbone.layers.3.0: shuffle_7x7 + backbone.layers.3.1: shuffle_7x7 + backbone.layers.3.2: shuffle_7x7 + backbone.layers.3.3: shuffle_5x5 +channels: diff --git a/configs/nas/detnas/detnas_evolution_search_frcnn_shufflenetv2_fpn_coco.py b/configs/nas/detnas/detnas_evolution_search_frcnn_shufflenetv2_fpn_coco.py deleted file mode 100644 index 894fad85..00000000 --- a/configs/nas/detnas/detnas_evolution_search_frcnn_shufflenetv2_fpn_coco.py +++ /dev/null @@ -1,20 +0,0 @@ -_base_ = ['./detnas_supernet_frcnn_shufflenetv2_fpn_1x_coco.py'] - -data = dict( - samples_per_gpu=128, - workers_per_gpu=8, -) - -algorithm = dict(bn_training_mode=True) - -searcher = dict( - type='EvolutionSearcher', - metrics='bbox', - score_key='bbox_mAP', - constraints=dict(flops=300 * 1e6), - candidate_pool_size=50, - candidate_top_k=10, - max_epoch=20, - num_mutation=20, - num_crossover=20, -) diff --git a/configs/nas/detnas/detnas_subnet_frcnn_shufflenetv2_fpn_1x_coco.py b/configs/nas/detnas/detnas_subnet_frcnn_shufflenetv2_fpn_1x_coco.py deleted file mode 100644 index dc929cc8..00000000 --- a/configs/nas/detnas/detnas_subnet_frcnn_shufflenetv2_fpn_1x_coco.py +++ /dev/null @@ -1,6 +0,0 @@ -_base_ = ['./detnas_supernet_frcnn_shufflenetv2_fpn_1x_coco.py'] - -# FIXME: you may replace this with the mutable_cfg searched by yourself -mutable_cfg = 'https://download.openmmlab.com/mmrazor/v0.1/nas/detnas/detnas_subnet_frcnn_shufflenetv2_fpn_1x_coco/detnas_subnet_frcnn_shufflenetv2_fpn_1x_coco_bbox_backbone_flops-0.34M_mAP-37.5_20211222-67fea61f_mutable_cfg.yaml' # noqa: E501 - -algorithm = dict(retraining=True, mutable_cfg=mutable_cfg) diff --git a/configs/nas/detnas/detnas_subnet_shufflenetv2_8xb128_in1k.py b/configs/nas/detnas/detnas_subnet_shufflenetv2_8xb128_in1k.py deleted file mode 100644 index 9486cba6..00000000 --- a/configs/nas/detnas/detnas_subnet_shufflenetv2_8xb128_in1k.py +++ /dev/null @@ -1,8 +0,0 @@ -_base_ = [ - '../spos/spos_subnet_shufflenetv2_8xb128_in1k.py', -] - -# FIXME: you may replace this with the mutable_cfg searched by yourself -mutable_cfg = 'https://download.openmmlab.com/mmrazor/v0.1/nas/detnas/detnas_subnet_frcnn_shufflenetv2_fpn_1x_coco/detnas_subnet_frcnn_shufflenetv2_fpn_1x_coco_bbox_backbone_flops-0.34M_mAP-37.5_20211222-67fea61f_mutable_cfg.yaml' # noqa: E501 - -algorithm = dict(mutable_cfg=mutable_cfg) diff --git a/configs/nas/detnas/detnas_subnet_shufflenetv2_8xb128_in1k_2.0_frcnn.py b/configs/nas/detnas/detnas_subnet_shufflenetv2_8xb128_in1k_2.0_frcnn.py new file mode 100644 index 00000000..c22c0500 --- /dev/null +++ b/configs/nas/detnas/detnas_subnet_shufflenetv2_8xb128_in1k_2.0_frcnn.py @@ -0,0 +1,8 @@ +_base_ = ['./detnas_supernet_shufflenetv2_coco_1x_2.0_frcnn.py'] + +# FIXME: you may replace this with the mutable_cfg searched by yourself +fix_subnet = 'configs/nas/detnas/DETNAS_FRCNN_SHUFFLENETV2_340M_COCO_MMRAZOR_2.0.yaml' # noqa: E501 + +model = dict(fix_subnet=fix_subnet) + +find_unused_parameters = False diff --git a/configs/nas/detnas/detnas_supernet_frcnn_shufflenetv2_fpn_1x_coco.py b/configs/nas/detnas/detnas_supernet_frcnn_shufflenetv2_fpn_1x_coco.py deleted file mode 100644 index faa58f78..00000000 --- a/configs/nas/detnas/detnas_supernet_frcnn_shufflenetv2_fpn_1x_coco.py +++ /dev/null @@ -1,144 +0,0 @@ -_base_ = [ - '../../_base_/datasets/mmdet/coco_detection.py', - '../../_base_/schedules/mmdet/schedule_1x.py', - '../../_base_/mmdet_runtime.py' -] - -norm_cfg = dict(type='SyncBN', requires_grad=True) -model = dict( - type='mmdet.FasterRCNN', - backbone=dict( - type='mmcls.SearchableShuffleNetV2', - norm_cfg=norm_cfg, - out_indices=(0, 1, 2, 3), - widen_factor=1.0, - with_last_layer=False), - neck=dict( - type='FPN', - norm_cfg=norm_cfg, - in_channels=[64, 160, 320, 640], - out_channels=256, - num_outs=5), - rpn_head=dict( - type='RPNHead', - in_channels=256, - feat_channels=256, - anchor_generator=dict( - type='AnchorGenerator', - scales=[8], - ratios=[0.5, 1.0, 2.0], - strides=[4, 8, 16, 32, 64]), - bbox_coder=dict( - type='DeltaXYWHBBoxCoder', - target_means=[.0, .0, .0, .0], - target_stds=[1.0, 1.0, 1.0, 1.0]), - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), - loss_bbox=dict(type='L1Loss', loss_weight=1.0)), - roi_head=dict( - type='StandardRoIHead', - bbox_roi_extractor=dict( - type='SingleRoIExtractor', - roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0), - out_channels=256, - featmap_strides=[4, 8, 16, 32]), - bbox_head=dict( - type='Shared4Conv1FCBBoxHead', - norm_cfg=norm_cfg, - in_channels=256, - fc_out_channels=1024, - roi_feat_size=7, - num_classes=80, - bbox_coder=dict( - type='DeltaXYWHBBoxCoder', - target_means=[0., 0., 0., 0.], - target_stds=[0.1, 0.1, 0.2, 0.2]), - reg_class_agnostic=False, - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), - loss_bbox=dict(type='L1Loss', loss_weight=1.0))), - train_cfg=dict( - rpn=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.7, - neg_iou_thr=0.3, - min_pos_iou=0.3, - match_low_quality=True, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=256, - pos_fraction=0.5, - neg_pos_ub=-1, - add_gt_as_proposals=False), - allowed_border=-1, - pos_weight=-1, - debug=False), - rpn_proposal=dict( - nms_pre=2000, - max_per_img=1000, - nms=dict(type='nms', iou_threshold=0.7), - min_bbox_size=0), - rcnn=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.5, - neg_iou_thr=0.5, - min_pos_iou=0.5, - match_low_quality=False, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=512, - pos_fraction=0.25, - neg_pos_ub=-1, - add_gt_as_proposals=True), - pos_weight=-1, - debug=False)), - test_cfg=dict( - rpn=dict( - nms_pre=1000, - max_per_img=1000, - nms=dict(type='nms', iou_threshold=0.7), - min_bbox_size=0), - rcnn=dict( - score_thr=0.05, - nms=dict(type='nms', iou_threshold=0.5), - max_per_img=100) - # soft-nms is also supported for rcnn testing - # e.g., nms=dict(type='soft_nms', iou_threshold=0.5, min_score=0.05) - ), -) - -mutator = dict( - type='OneShotModuleMutator', - placeholder_mapping=dict( - all_blocks=dict( - type='OneShotMutableOP', - choices=dict( - shuffle_3x3=dict( - type='ShuffleBlock', norm_cfg=norm_cfg, kernel_size=3), - shuffle_5x5=dict( - type='ShuffleBlock', norm_cfg=norm_cfg, kernel_size=5), - shuffle_7x7=dict( - type='ShuffleBlock', norm_cfg=norm_cfg, kernel_size=7), - shuffle_xception=dict( - type='ShuffleXception', - norm_cfg=norm_cfg, - ), - )))) - -algorithm = dict( - type='DetNAS', - architecture=dict( - type='MMDetArchitecture', - model=model, - ), - mutator=mutator, - pruner=None, - distiller=None, - retraining=False, -) - -find_unused_parameters = True diff --git a/configs/nas/detnas/detnas_supernet_shufflenetv2_8xb128_in1k.py b/configs/nas/detnas/detnas_supernet_shufflenetv2_8xb128_in1k.py deleted file mode 100644 index b2049f5b..00000000 --- a/configs/nas/detnas/detnas_supernet_shufflenetv2_8xb128_in1k.py +++ /dev/null @@ -1,5 +0,0 @@ -_base_ = [ - '../spos/spos_supernet_shufflenetv2_8xb128_in1k.py', -] - -runner = dict(max_iters=300000) diff --git a/configs/nas/detnas/detnas_supernet_shufflenetv2_coco_1x_2.0_frcnn.py b/configs/nas/detnas/detnas_supernet_shufflenetv2_coco_1x_2.0_frcnn.py new file mode 100644 index 00000000..34f997ff --- /dev/null +++ b/configs/nas/detnas/detnas_supernet_shufflenetv2_coco_1x_2.0_frcnn.py @@ -0,0 +1,87 @@ +_base_ = [ + 'mmdet::_base_/models/faster_rcnn_r50_fpn.py', + 'mmdet::_base_/datasets/coco_detection.py', + 'mmdet::_base_/schedules/schedule_1x.py', + 'mmdet::_base_/default_runtime.py' +] + +data_root = '/mnt/lustre/share_data/zhangwenwei/data/coco/' + +_base_.train_dataloader.dataset.data_root = data_root + +visualizer = None + +log_level = 'INFO' +load_from = '/mnt/lustre/dongpeijie/detnas_subnet_shufflenetv2_8xb128_in1k_acc-74.08_20211223-92e9b66a_2.0.pth' # noqa: E501 +resume = False + +norm_cfg = dict(type='SyncBN', requires_grad=True) +# model settings +_STAGE_MUTABLE = dict( + _scope_='mmrazor', + type='mmrazor.OneShotMutableOP', + candidates=dict( + shuffle_3x3=dict( + type='mmrazor.ShuffleBlock', kernel_size=3, norm_cfg=norm_cfg), + shuffle_5x5=dict( + type='mmrazor.ShuffleBlock', kernel_size=5, norm_cfg=norm_cfg), + shuffle_7x7=dict( + type='mmrazor.ShuffleBlock', kernel_size=7, norm_cfg=norm_cfg), + shuffle_xception=dict( + type='mmrazor.ShuffleXception', norm_cfg=norm_cfg), + )) + +arch_setting = [ + # Parameters to build layers. 3 parameters are needed to construct a + # layer, from left to right: channel, num_blocks, mutable_cfg. + [64, 4, _STAGE_MUTABLE], + [160, 4, _STAGE_MUTABLE], + [320, 8, _STAGE_MUTABLE], + [640, 4, _STAGE_MUTABLE], +] + +supernet = _base_.model + +supernet.backbone = dict( + type='mmrazor.SearchableShuffleNetV2', + arch_setting=arch_setting, + norm_cfg=norm_cfg, + out_indices=(0, 1, 2, 3), + widen_factor=1.0, + with_last_layer=False) + +supernet.neck = dict( + type='FPN', + norm_cfg=norm_cfg, + in_channels=[64, 160, 320, 640], + out_channels=256, + num_outs=5) + +supernet.roi_head.bbox_head = dict( + type='Shared4Conv1FCBBoxHead', + norm_cfg=norm_cfg, + in_channels=256, + fc_out_channels=1024, + roi_feat_size=7, + num_classes=80, + bbox_coder=dict( + type='DeltaXYWHBBoxCoder', + target_means=[0., 0., 0., 0.], + target_stds=[0.1, 0.1, 0.2, 0.2]), + reg_class_agnostic=False, + loss_cls=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + loss_bbox=dict(type='L1Loss', loss_weight=1.0)) + +mutator = dict(type='mmrazor.OneShotModuleMutator') + +fix_subnet = 'configs/nas/detnas/DETNAS_FRCNN_SHUFFLENETV2_340M_COCO_MMRAZOR_2.0.yaml' # noqa: E501 + +model = dict( + _delete_=True, + type='mmrazor.SPOS', + architecture=supernet, + mutator=mutator, + fix_subnet=fix_subnet, +) + +find_unused_parameters = True diff --git a/configs/nas/detnas/detnas_supernet_shufflenetv2_coco_1x_2.0_retinanet.py b/configs/nas/detnas/detnas_supernet_shufflenetv2_coco_1x_2.0_retinanet.py new file mode 100644 index 00000000..f8c55687 --- /dev/null +++ b/configs/nas/detnas/detnas_supernet_shufflenetv2_coco_1x_2.0_retinanet.py @@ -0,0 +1,114 @@ +_base_ = [ + 'mmdet::faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py', + 'mmdet::datasets/coco_detection.py', 'mmdet::schedules/schedule_1x.py', + 'mmdet::default_runtime.py' +] + +data_root = '/mnt/lustre/share_data/zhangwenwei/data/coco/' + +train_dataloader = dict(dataset=dict(data_root=data_root, )) + +visualizer = None +# custom_hooks = [dict(type='DetVisualizationHook', interval=10)] + +log_level = 'INFO' +load_from = None +resume = False + +# TODO: support auto scaling lr + +norm_cfg = dict(type='SyncBN', requires_grad=True) +# model settings +_STAGE_MUTABLE = dict( + _scope_='mmrazor', + type='mmrazor.OneShotMutableOP', + candidates=dict( + shuffle_3x3=dict( + type='mmrazor.ShuffleBlock', kernel_size=3, norm_cfg=norm_cfg), + shuffle_5x5=dict( + type='mmrazor.ShuffleBlock', kernel_size=5, norm_cfg=norm_cfg), + shuffle_7x7=dict( + type='mmrazor.ShuffleBlock', kernel_size=7, norm_cfg=norm_cfg), + shuffle_xception=dict( + type='mmrazor.ShuffleXception', norm_cfg=norm_cfg), + )) + +arch_setting = [ + # Parameters to build layers. 3 parameters are needed to construct a + # layer, from left to right: channel, num_blocks, mutable_cfg. + [64, 4, _STAGE_MUTABLE], + [160, 4, _STAGE_MUTABLE], + [320, 8, _STAGE_MUTABLE], + [640, 4, _STAGE_MUTABLE], +] + +supernet = dict( + type='RetinaNet', + data_preprocessor=dict( + type='DetDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True, + pad_size_divisor=32), + backbone=dict( + type='mmrazor.SearchableShuffleNetV2', + arch_setting=arch_setting, + norm_cfg=norm_cfg, + out_indices=(0, 1, 2, 3), + widen_factor=1.0, + with_last_layer=False), + neck=dict( + type='FPN', + in_channels=[64, 160, 320, 640], + out_channels=256, + num_outs=5), + bbox_head=dict( + type='RetinaHead', + num_classes=80, + in_channels=256, + stacked_convs=4, + feat_channels=256, + anchor_generator=dict( + type='AnchorGenerator', + octave_base_scale=4, + scales_per_octave=3, + ratios=[0.5, 1.0, 2.0], + strides=[8, 16, 32, 64, 128]), + bbox_coder=dict( + type='DeltaXYWHBBoxCoder', + target_means=[.0, .0, .0, .0], + target_stds=[1.0, 1.0, 1.0, 1.0]), + loss_cls=dict( + type='FocalLoss', + use_sigmoid=True, + gamma=2.0, + alpha=0.25, + loss_weight=1.0), + loss_bbox=dict(type='L1Loss', loss_weight=1.0)), + # model training and testing settings + train_cfg=dict( + assigner=dict( + type='MaxIoUAssigner', + pos_iou_thr=0.5, + neg_iou_thr=0.4, + min_pos_iou=0, + ignore_iof_thr=-1), + allowed_border=-1, + pos_weight=-1, + debug=False), + test_cfg=dict( + nms_pre=1000, + min_bbox_size=0, + score_thr=0.05, + nms=dict(type='nms', iou_threshold=0.5), + max_per_img=100)) + +mutator = dict(type='mmrazor.OneShotModuleMutator') + +model = dict( + type='mmrazor.SPOS', + architecture=supernet, + mutator=mutator, +) + +find_unused_parameters = True diff --git a/configs/nas/spos/AngleNAS_SHUFFLENETV2_IN1k_2.0.yaml b/configs/nas/spos/AngleNAS_SHUFFLENETV2_IN1k_2.0.yaml new file mode 100644 index 00000000..bc8f82ed --- /dev/null +++ b/configs/nas/spos/AngleNAS_SHUFFLENETV2_IN1k_2.0.yaml @@ -0,0 +1,24 @@ +modules: + backbone.layer1.0: mb_k3e1 + backbone.layer2.0: mb_k5e3 + backbone.layer2.1: mb_k5e3 + backbone.layer2.2: identity + backbone.layer2.3: mb_k3e3 + backbone.layer3.0: mb_k3e3 + backbone.layer3.1: identity + backbone.layer3.2: identity + backbone.layer3.3: mb_k3e3 + backbone.layer4.0: mb_k7e6 + backbone.layer4.1: identity + backbone.layer4.2: mb_k7e3 + backbone.layer4.3: mb_k7e3 + backbone.layer5.0: mb_k3e3 + backbone.layer5.1: mb_k3e3 + backbone.layer5.2: mb_k7e3 + backbone.layer5.3: mb_k5e3 + backbone.layer6.0: mb_k5e6 + backbone.layer6.1: mb_k7e3 + backbone.layer6.2: mb_k7e3 + backbone.layer6.3: mb_k7e3 + backbone.layer7.0: mb_k5e6 +channels: diff --git a/configs/nas/spos/SPOS_MOBILENET_490M_FROM_ANGELNAS.yaml b/configs/nas/spos/SPOS_MOBILENET_490M_FROM_ANGELNAS.yaml deleted file mode 100644 index 154cc670..00000000 --- a/configs/nas/spos/SPOS_MOBILENET_490M_FROM_ANGELNAS.yaml +++ /dev/null @@ -1,66 +0,0 @@ -stage_0_block_0: - chosen: - - mb_k3e1 -stage_1_block_0: - chosen: - - mb_k5e3 -stage_1_block_1: - chosen: - - mb_k5e3 -stage_1_block_2: - chosen: - - identity -stage_1_block_3: - chosen: - - mb_k3e3 -stage_2_block_0: - chosen: - - mb_k3e3 -stage_2_block_1: - chosen: - - identity -stage_2_block_2: - chosen: - - identity -stage_2_block_3: - chosen: - - mb_k3e3 -stage_3_block_0: - chosen: - - mb_k7e6 -stage_3_block_1: - chosen: - - identity -stage_3_block_2: - chosen: - - mb_k7e3 -stage_3_block_3: - chosen: - - mb_k7e3 -stage_4_block_0: - chosen: - - mb_k3e3 -stage_4_block_1: - chosen: - - mb_k3e3 -stage_4_block_2: - chosen: - - mb_k7e3 -stage_4_block_3: - chosen: - - mb_k5e3 -stage_5_block_0: - chosen: - - mb_k5e6 -stage_5_block_1: - chosen: - - mb_k7e3 -stage_5_block_2: - chosen: - - mb_k7e3 -stage_5_block_3: - chosen: - - mb_k7e3 -stage_6_block_0: - chosen: - - mb_k5e6 diff --git a/configs/nas/spos/SPOS_SHUFFLENETV2_330M_IN1k_PAPER.yaml b/configs/nas/spos/SPOS_SHUFFLENETV2_330M_IN1k_PAPER.yaml deleted file mode 100644 index 024347f1..00000000 --- a/configs/nas/spos/SPOS_SHUFFLENETV2_330M_IN1k_PAPER.yaml +++ /dev/null @@ -1,60 +0,0 @@ -stage_0_block_0: - chosen: - - shuffle_7x7 -stage_0_block_1: - chosen: - - shuffle_5x5 -stage_0_block_2: - chosen: - - shuffle_3x3 -stage_0_block_3: - chosen: - - shuffle_5x5 -stage_1_block_0: - chosen: - - shuffle_7x7 -stage_1_block_1: - chosen: - - shuffle_3x3 -stage_1_block_2: - chosen: - - shuffle_7x7 -stage_1_block_3: - chosen: - - shuffle_3x3 -stage_2_block_0: - chosen: - - shuffle_7x7 -stage_2_block_1: - chosen: - - shuffle_3x3 -stage_2_block_2: - chosen: - - shuffle_7x7 -stage_2_block_3: - chosen: - - shuffle_xception -stage_2_block_4: - chosen: - - shuffle_3x3 -stage_2_block_5: - chosen: - - shuffle_3x3 -stage_2_block_6: - chosen: - - shuffle_3x3 -stage_2_block_7: - chosen: - - shuffle_3x3 -stage_3_block_0: - chosen: - - shuffle_xception -stage_3_block_1: - chosen: - - shuffle_7x7 -stage_3_block_2: - chosen: - - shuffle_xception -stage_3_block_3: - chosen: - - shuffle_xception diff --git a/configs/nas/spos/SPOS_SHUFFLENETV2_330M_IN1k_PAPER_2.0.yaml b/configs/nas/spos/SPOS_SHUFFLENETV2_330M_IN1k_PAPER_2.0.yaml new file mode 100644 index 00000000..e5c11d46 --- /dev/null +++ b/configs/nas/spos/SPOS_SHUFFLENETV2_330M_IN1k_PAPER_2.0.yaml @@ -0,0 +1,22 @@ +modules: + backbone.layers.0.0: shuffle_7x7 + backbone.layers.0.1: shuffle_3x3 + backbone.layers.0.2: shuffle_7x7 + backbone.layers.0.3: shuffle_3x3 + backbone.layers.1.0: shuffle_xception + backbone.layers.1.1: shuffle_5x5 + backbone.layers.1.2: shuffle_5x5 + backbone.layers.1.3: shuffle_3x3 + backbone.layers.2.0: shuffle_3x3 + backbone.layers.2.1: shuffle_5x5 + backbone.layers.2.2: shuffle_3x3 + backbone.layers.2.3: shuffle_5x5 + backbone.layers.2.4: shuffle_3x3 + backbone.layers.2.5: shuffle_xception + backbone.layers.2.6: shuffle_5x5 + backbone.layers.2.7: shuffle_7x7 + backbone.layers.3.0: shuffle_7x7 + backbone.layers.3.1: shuffle_3x3 + backbone.layers.3.2: shuffle_5x5 + backbone.layers.3.3: shuffle_xception +channels: diff --git a/configs/nas/spos/spos_evolution_search_mobilenet_proxyless_gpu_flops465_8xb512_in1k.py b/configs/nas/spos/spos_evolution_search_mobilenet_proxyless_gpu_flops465_8xb512_in1k.py deleted file mode 100644 index a37fafc0..00000000 --- a/configs/nas/spos/spos_evolution_search_mobilenet_proxyless_gpu_flops465_8xb512_in1k.py +++ /dev/null @@ -1,20 +0,0 @@ -_base_ = ['./spos_supernet_mobilenet_proxyless_gpu_8xb128_in1k.py'] - -data = dict( - samples_per_gpu=512, - workers_per_gpu=16, -) - -algorithm = dict(bn_training_mode=True) - -searcher = dict( - type='EvolutionSearcher', - candidate_pool_size=50, - candidate_top_k=10, - constraints=dict(flops=465 * 1e6), - metrics='accuracy', - score_key='accuracy_top-1', - max_epoch=20, - num_mutation=25, - num_crossover=25, - mutate_prob=0.1) diff --git a/configs/nas/spos/spos_evolution_search_shufflenetv2_8xb2048_in1k.py b/configs/nas/spos/spos_evolution_search_shufflenetv2_8xb2048_in1k.py deleted file mode 100644 index 249f10f4..00000000 --- a/configs/nas/spos/spos_evolution_search_shufflenetv2_8xb2048_in1k.py +++ /dev/null @@ -1,20 +0,0 @@ -_base_ = ['./spos_supernet_shufflenetv2_8xb128_in1k.py'] - -data = dict( - samples_per_gpu=2048, - workers_per_gpu=16, -) - -algorithm = dict(bn_training_mode=True) - -searcher = dict( - type='EvolutionSearcher', - candidate_pool_size=50, - candidate_top_k=10, - constraints=dict(flops=330 * 1e6), - metrics='accuracy', - score_key='accuracy_top-1', - max_epoch=20, - num_mutation=25, - num_crossover=25, - mutate_prob=0.1) diff --git a/configs/nas/spos/spos_mobilenet_for_check_ckpt_from_anglenas.py b/configs/nas/spos/spos_mobilenet_for_check_ckpt_from_anglenas.py deleted file mode 100644 index 3423abd4..00000000 --- a/configs/nas/spos/spos_mobilenet_for_check_ckpt_from_anglenas.py +++ /dev/null @@ -1,27 +0,0 @@ -_base_ = [ - './spos_subnet_mobilenet_proxyless_gpu_8xb128_in1k.py', -] - -img_norm_cfg = dict(mean=[0., 0., 0.], std=[1., 1., 1.], to_rgb=False) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='RandomResizedCrop', size=224), - dict(type='ColorJitter', brightness=0.4, contrast=0.4, saturation=0.4), - dict(type='RandomFlip', flip_prob=0.5, direction='horizontal'), - dict(type='Normalize', **img_norm_cfg), - dict(type='ImageToTensor', keys=['img']), - dict(type='ToTensor', keys=['gt_label']), - dict(type='Collect', keys=['img', 'gt_label']) -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='Resize', size=(256, -1)), - dict(type='CenterCrop', crop_size=224), - dict(type='Normalize', **img_norm_cfg), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']) -] -data = dict( - train=dict(pipeline=train_pipeline), - val=dict(pipeline=test_pipeline), - test=dict(pipeline=test_pipeline)) diff --git a/configs/nas/spos/spos_subnet_mobilenet_proxyless_gpu_8xb128_in1k.py b/configs/nas/spos/spos_subnet_mobilenet_proxyless_gpu_8xb128_in1k.py deleted file mode 100644 index 54d5ff5f..00000000 --- a/configs/nas/spos/spos_subnet_mobilenet_proxyless_gpu_8xb128_in1k.py +++ /dev/null @@ -1,13 +0,0 @@ -_base_ = [ - './spos_supernet_mobilenet_proxyless_gpu_8xb128_in1k.py', -] - -# FIXME: you may replace this with the mutable_cfg searched by yourself -mutable_cfg = 'https://download.openmmlab.com/mmrazor/v0.1/nas/spos/spos_mobilenet_subnet/spos_angelnas_flops_0.49G_acc_75.98_20220307-54f4698f_mutable_cfg.yaml' # noqa: E501 - -algorithm = dict(retraining=True, mutable_cfg=mutable_cfg) -evaluation = dict(interval=10000, metric='accuracy') -checkpoint_config = dict(interval=30000) - -runner = dict(max_iters=300000) -find_unused_parameters = False diff --git a/configs/nas/spos/spos_subnet_mobilenet_proxyless_gpu_8xb128_in1k_2.0.py b/configs/nas/spos/spos_subnet_mobilenet_proxyless_gpu_8xb128_in1k_2.0.py new file mode 100644 index 00000000..cc1519a8 --- /dev/null +++ b/configs/nas/spos/spos_subnet_mobilenet_proxyless_gpu_8xb128_in1k_2.0.py @@ -0,0 +1,8 @@ +_base_ = ['./spos_supernet_mobilenet_proxyless_gpu_8xb128_in1k_2.0.py'] + +# FIXME: you may replace this with the mutable_cfg searched by yourself +fix_subnet = 'configs/nas/spos/AngleNAS_SHUFFLENETV2_IN1k_2.0.yaml' # noqa: E501 + +model = dict(fix_subnet=fix_subnet) + +find_unused_parameters = False diff --git a/configs/nas/spos/spos_subnet_shufflenetv2_8xb128_in1k.py b/configs/nas/spos/spos_subnet_shufflenetv2_8xb128_in1k.py deleted file mode 100644 index 110ee047..00000000 --- a/configs/nas/spos/spos_subnet_shufflenetv2_8xb128_in1k.py +++ /dev/null @@ -1,11 +0,0 @@ -_base_ = [ - './spos_supernet_shufflenetv2_8xb128_in1k.py', -] - -# FIXME: you may replace this with the mutable_cfg searched by yourself -mutable_cfg = 'https://download.openmmlab.com/mmrazor/v0.1/nas/spos/spos_shufflenetv2_subnet_8xb128_in1k/spos_shufflenetv2_subnet_8xb128_in1k_flops_0.33M_acc_73.87_20211222-454627be_mutable_cfg.yaml' # noqa: E501 - -algorithm = dict(retraining=True, mutable_cfg=mutable_cfg) - -runner = dict(max_iters=300000) -find_unused_parameters = False diff --git a/configs/nas/spos/spos_subnet_shufflenetv2_8xb128_in1k_2.0_example.py b/configs/nas/spos/spos_subnet_shufflenetv2_8xb128_in1k_2.0_example.py new file mode 100644 index 00000000..785adfe5 --- /dev/null +++ b/configs/nas/spos/spos_subnet_shufflenetv2_8xb128_in1k_2.0_example.py @@ -0,0 +1,9 @@ +_base_ = ['./spos_supernet_shufflenetv2_8xb128_in1k_2.0_example.py'] + +# FIXME: you may replace this with the mutable_cfg searched by yourself +# fix_subnet = 'configs/nas/spos/SPOS_SHUFFLENETV2_330M_IN1k_PAPER_2.0.yaml' # noqa: E501 +fix_subnet = 'configs/nas/detnas/DetNAS_SPOS_SHUFFLENETV2_330M_IN1k_PAPER_2.0.yaml' # noqa: E501 + +model = dict(fix_subnet=fix_subnet) + +find_unused_parameters = False diff --git a/configs/nas/spos/spos_supernet_mobilenet_proxyless_gpu_8xb128_in1k.py b/configs/nas/spos/spos_supernet_mobilenet_proxyless_gpu_8xb128_in1k.py deleted file mode 100644 index 58c7e7d4..00000000 --- a/configs/nas/spos/spos_supernet_mobilenet_proxyless_gpu_8xb128_in1k.py +++ /dev/null @@ -1,101 +0,0 @@ -_base_ = [ - '../../_base_/datasets/mmcls/imagenet_bs128_colorjittor.py', - '../../_base_/schedules/mmcls/imagenet_bs1024_spos.py', - '../../_base_/mmcls_runtime.py' -] -norm_cfg = dict(type='BN') -model = dict( - type='mmcls.ImageClassifier', - backbone=dict( - type='SearchableMobileNet', - first_channels=40, - last_channels=1728, - widen_factor=1.0, - norm_cfg=norm_cfg, - arch_setting_type='proxyless_gpu'), - neck=dict(type='GlobalAveragePooling'), - head=dict( - type='LinearClsHead', - num_classes=1000, - in_channels=1728, - loss=dict( - type='LabelSmoothLoss', - num_classes=1000, - label_smooth_val=0.1, - mode='original', - loss_weight=1.0), - topk=(1, 5), - ), -) - -mutator = dict( - type='OneShotModuleMutator', - placeholder_mapping=dict( - searchable_blocks=dict( - type='OneShotMutableOP', - choices=dict( - mb_k3e3=dict( - type='MBBlock', - kernel_size=3, - expand_ratio=3, - norm_cfg=norm_cfg, - act_cfg=dict(type='ReLU6')), - mb_k5e3=dict( - type='MBBlock', - kernel_size=5, - expand_ratio=3, - norm_cfg=norm_cfg, - act_cfg=dict(type='ReLU6')), - mb_k7e3=dict( - type='MBBlock', - kernel_size=7, - expand_ratio=3, - norm_cfg=norm_cfg, - act_cfg=dict(type='ReLU6')), - mb_k3e6=dict( - type='MBBlock', - kernel_size=3, - expand_ratio=6, - norm_cfg=norm_cfg, - act_cfg=dict(type='ReLU6')), - mb_k5e6=dict( - type='MBBlock', - kernel_size=5, - expand_ratio=6, - norm_cfg=norm_cfg, - act_cfg=dict(type='ReLU6')), - mb_k7e6=dict( - type='MBBlock', - kernel_size=7, - expand_ratio=6, - norm_cfg=norm_cfg, - act_cfg=dict(type='ReLU6')), - identity=dict(type='Identity'))), - first_blocks=dict( - type='OneShotMutableOP', - choices=dict( - mb_k3e1=dict( - type='MBBlock', - kernel_size=3, - expand_ratio=1, - norm_cfg=norm_cfg, - act_cfg=dict(type='ReLU6')), )))) - -algorithm = dict( - type='SPOS', - architecture=dict( - type='MMClsArchitecture', - model=model, - ), - mutator=mutator, - distiller=None, - retraining=False, -) - -runner = dict(max_iters=150000) -evaluation = dict(interval=10000, metric='accuracy') - -# checkpoint saving -checkpoint_config = dict(interval=30000) - -find_unused_parameters = True diff --git a/configs/nas/spos/spos_supernet_mobilenet_proxyless_gpu_8xb128_in1k_2.0.py b/configs/nas/spos/spos_supernet_mobilenet_proxyless_gpu_8xb128_in1k_2.0.py new file mode 100644 index 00000000..8e274a93 --- /dev/null +++ b/configs/nas/spos/spos_supernet_mobilenet_proxyless_gpu_8xb128_in1k_2.0.py @@ -0,0 +1,245 @@ +# dataset settings +dataset_type = 'ImageNet' +preprocess_cfg = dict( + # RGB format normalization parameters + mean=[0., 0., 0.], + std=[1., 1., 1.], + # convert image from BGR to RGB + to_rgb=False, +) + +file_client_args = dict( + backend='petrel', + path_mapping=dict({ + './data/imagenet': + 'sproject:s3://openmmlab/datasets/classification/imagenet', + 'data/imagenet': + 'sproject:s3://openmmlab/datasets/classification/imagenet' + })) + +train_pipeline = [ + dict(type='LoadImageFromFile', file_client_args=file_client_args), + dict(type='RandomResizedCrop', scale=224), + dict(type='ColorJitter', brightness=0.4, contrast=0.4, saturation=0.4), + dict(type='RandomFlip', prob=0.5, direction='horizontal'), + dict(type='PackClsInputs'), +] + +test_pipeline = [ + dict(type='LoadImageFromFile', file_client_args=file_client_args), + dict( + type='ResizeEdge', + scale=256, + edge='short', + backend='pillow', + interpolation='bicubic'), + dict(type='CenterCrop', crop_size=224), + dict(type='PackClsInputs'), +] + +train_dataloader = dict( + batch_size=128, + num_workers=8, + dataset=dict( + type=dataset_type, + data_root='/mnt/cache/share/images', + ann_file='meta/train.txt', + data_prefix='train', + pipeline=train_pipeline), + sampler=dict(type='DefaultSampler', shuffle=True), + persistent_workers=True, +) + +# /mnt/lustre/share_data/wangjiaqi/data/imagenet', + +val_dataloader = dict( + batch_size=128, + num_workers=8, + dataset=dict( + type=dataset_type, + data_root='/mnt/cache/share/images', + ann_file='meta/val.txt', + data_prefix='val', + pipeline=test_pipeline), + sampler=dict(type='DefaultSampler', shuffle=False), + persistent_workers=True, +) +val_evaluator = dict(type='Accuracy', topk=(1, 5)) + +# If you want standard test, please manually configure the test dataset +test_dataloader = val_dataloader +test_evaluator = val_evaluator + +# scheduler + +# optimizer +optim_wrapper = dict( + optimizer=dict(type='SGD', lr=0.5, momentum=0.9, weight_decay=4e-5), + clip_grad=None) + +# leanring policy +param_scheduler = [ + dict(type='PolyLR', power=1.0, eta_min=0.0, by_epoch=False, end=300000), +] + +# train, val, test setting +train_cfg = dict(by_epoch=False, max_iters=300000) +val_cfg = dict() +test_cfg = dict() + +# runtime + +# defaults to use registries in mmrazor +default_scope = 'mmcls' + +log_processor = dict( + window_size=100, + by_epoch=False, + custom_cfg=[ + dict( + data_src='loss', + log_name='loss_large_window', + method_name='mean', + window_size=100) + ]) + +# configure default hooks +default_hooks = dict( + timer=dict(type='IterTimerHook'), + logger=dict(type='LoggerHook', interval=100), + param_scheduler=dict(type='ParamSchedulerHook'), + checkpoint=dict( + type='CheckpointHook', + by_epoch=False, + interval=10000, + save_last=True, + max_keep_ckpts=3), + sampler_seed=dict(type='DistSamplerSeedHook'), + visualization=dict(type='VisualizationHook', enable=False), +) + +# configure environment +env_cfg = dict( + cudnn_benchmark=False, + mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0), + dist_cfg=dict(backend='nccl'), +) + +# set visualizer +visualizer = None +# dict(type='ClsVisualizer', vis_backends=vis_backends, name='visualizer') +# vis_backends = [dict(type='LocalVisBackend')] + +# set log level +log_level = 'INFO' + +# load from which checkpoint +load_from = None + +# whether to resume training from the loaded checkpoint +resume = False + +# model +norm_cfg = dict(type='BN') +_STAGE_MUTABLE = dict( + _scope_='mmrazor', + type='OneShotMutableOP', + candidates=dict( + mb_k3e3=dict( + type='MBBlock', + kernel_size=3, + expand_ratio=3, + norm_cfg=norm_cfg, + act_cfg=dict(type='ReLU6')), + mb_k5e3=dict( + type='MBBlock', + kernel_size=5, + expand_ratio=3, + norm_cfg=norm_cfg, + act_cfg=dict(type='ReLU6')), + mb_k7e3=dict( + type='MBBlock', + kernel_size=7, + expand_ratio=3, + norm_cfg=norm_cfg, + act_cfg=dict(type='ReLU6')), + mb_k3e6=dict( + type='MBBlock', + kernel_size=3, + expand_ratio=6, + norm_cfg=norm_cfg, + act_cfg=dict(type='ReLU6')), + mb_k5e6=dict( + type='MBBlock', + kernel_size=5, + expand_ratio=6, + norm_cfg=norm_cfg, + act_cfg=dict(type='ReLU6')), + mb_k7e6=dict( + type='MBBlock', + kernel_size=7, + expand_ratio=6, + norm_cfg=norm_cfg, + act_cfg=dict(type='ReLU6')), + identity=dict(type='Identity'), + )) + +_FIRST_MUTABLE = dict( + _scope_='mmrazor', + type='OneShotMutableOP', + candidates=dict( + mb_k3e1=dict( + type='MBBlock', + kernel_size=3, + expand_ratio=1, + norm_cfg=norm_cfg, + act_cfg=dict(type='ReLU6')), )) + +arch_setting = [ + # Parameters to build layers. 3 parameters are needed to construct a + # layer, from left to right: channel, num_blocks, mutable_cfg. + [24, 1, 1, _FIRST_MUTABLE], + [32, 4, 2, _STAGE_MUTABLE], + [56, 4, 2, _STAGE_MUTABLE], + [112, 4, 2, _STAGE_MUTABLE], + [128, 4, 1, _STAGE_MUTABLE], + [256, 4, 2, _STAGE_MUTABLE], + [432, 1, 1, _STAGE_MUTABLE] +] + +norm_cfg = dict(type='BN') +supernet = dict( + type='ImageClassifier', + data_preprocessor=preprocess_cfg, + backbone=dict( + _scope_='mmrazor', + type='SearchableMobileNet', + first_channels=40, + last_channels=1728, + widen_factor=1.0, + norm_cfg=norm_cfg, + arch_setting=arch_setting), + neck=dict(type='GlobalAveragePooling'), + head=dict( + type='LinearClsHead', + num_classes=1000, + in_channels=1728, + loss=dict( + type='LabelSmoothLoss', + num_classes=1000, + label_smooth_val=0.1, + mode='original', + loss_weight=1.0), + topk=(1, 5), + ), +) + +mutator = dict(type='mmrazor.OneShotModuleMutator') + +model = dict( + type='mmrazor.SPOS', + architecture=supernet, + mutator=mutator, +) + +find_unused_parameters = True diff --git a/configs/nas/spos/spos_supernet_shufflenetv2_8xb128_in1k.py b/configs/nas/spos/spos_supernet_shufflenetv2_8xb128_in1k.py deleted file mode 100644 index 82bb5326..00000000 --- a/configs/nas/spos/spos_supernet_shufflenetv2_8xb128_in1k.py +++ /dev/null @@ -1,59 +0,0 @@ -_base_ = [ - '../../_base_/datasets/mmcls/imagenet_bs128_colorjittor.py', - '../../_base_/schedules/mmcls/imagenet_bs1024_spos.py', - '../../_base_/mmcls_runtime.py' -] -norm_cfg = dict(type='BN') -model = dict( - type='mmcls.ImageClassifier', - backbone=dict( - type='SearchableShuffleNetV2', widen_factor=1.0, norm_cfg=norm_cfg), - neck=dict(type='GlobalAveragePooling'), - head=dict( - type='LinearClsHead', - num_classes=1000, - in_channels=1024, - loss=dict( - type='LabelSmoothLoss', - num_classes=1000, - label_smooth_val=0.1, - mode='original', - loss_weight=1.0), - topk=(1, 5), - ), -) - -mutator = dict( - type='OneShotModuleMutator', - placeholder_mapping=dict( - all_blocks=dict( - type='OneShotMutableOP', - choices=dict( - shuffle_3x3=dict( - type='ShuffleBlock', kernel_size=3, norm_cfg=norm_cfg), - shuffle_5x5=dict( - type='ShuffleBlock', kernel_size=5, norm_cfg=norm_cfg), - shuffle_7x7=dict( - type='ShuffleBlock', kernel_size=7, norm_cfg=norm_cfg), - shuffle_xception=dict( - type='ShuffleXception', norm_cfg=norm_cfg), - )))) - -algorithm = dict( - type='SPOS', - architecture=dict( - type='MMClsArchitecture', - model=model, - ), - mutator=mutator, - distiller=None, - retraining=False, -) - -runner = dict(max_iters=150000) -evaluation = dict(interval=1000, metric='accuracy') - -# checkpoint saving -checkpoint_config = dict(interval=1000) - -find_unused_parameters = True diff --git a/configs/nas/spos/spos_supernet_shufflenetv2_8xb128_in1k_2.0_example.py b/configs/nas/spos/spos_supernet_shufflenetv2_8xb128_in1k_2.0_example.py new file mode 100644 index 00000000..6a0f7c59 --- /dev/null +++ b/configs/nas/spos/spos_supernet_shufflenetv2_8xb128_in1k_2.0_example.py @@ -0,0 +1,214 @@ +# dataset settings +dataset_type = 'ImageNet' +preprocess_cfg = dict( + # RGB format normalization parameters + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + # convert image from BGR to RGB + to_rgb=True, +) + +file_client_args = dict( + backend='petrel', + path_mapping=dict({ + './data/imagenet': + 'sproject:s3://openmmlab/datasets/classification/imagenet', + 'data/imagenet': + 'sproject:s3://openmmlab/datasets/classification/imagenet' + })) + +train_pipeline = [ + dict(type='LoadImageFromFile', file_client_args=file_client_args), + dict(type='RandomResizedCrop', scale=224), + dict(type='ColorJitter', brightness=0.4, contrast=0.4, saturation=0.4), + dict(type='RandomFlip', prob=0.5, direction='horizontal'), + dict(type='PackClsInputs'), +] + +test_pipeline = [ + dict(type='LoadImageFromFile', file_client_args=file_client_args), + dict(type='ResizeEdge', scale=256, edge='short', backend='cv2'), + dict(type='CenterCrop', crop_size=224), + dict(type='PackClsInputs'), +] + +train_dataloader = dict( + batch_size=128, + num_workers=5, + dataset=dict( + type=dataset_type, + data_root='/mnt/cache/share/images', + ann_file='meta/train.txt', + data_prefix='train', + pipeline=train_pipeline), + sampler=dict(type='DefaultSampler', shuffle=True), + persistent_workers=True, +) + +# /mnt/lustre/share_data/wangjiaqi/data/imagenet', + +val_dataloader = dict( + batch_size=128, + num_workers=5, + dataset=dict( + type=dataset_type, + data_root='/mnt/cache/share/images', + ann_file='meta/val.txt', + data_prefix='val', + pipeline=test_pipeline), + sampler=dict(type='DefaultSampler', shuffle=False), + persistent_workers=True, +) +val_evaluator = dict(type='Accuracy', topk=(1, 5)) + +# If you want standard test, please manually configure the test dataset +test_dataloader = val_dataloader +test_evaluator = val_evaluator + +# scheduler + +# optimizer +optim_wrapper = dict( + optimizer=dict(type='SGD', lr=0.5, momentum=0.9, weight_decay=4e-5), + clip_grad=None) + +# leanring policy +param_scheduler = [ + dict(type='PolyLR', power=1.0, eta_min=0.0, by_epoch=False, end=300000), +] + +# train, val, test setting +train_cfg = dict(by_epoch=False, max_iters=300000) +val_cfg = dict() +test_cfg = dict() + +# runtime + +# defaults to use registries in mmrazor +default_scope = 'mmcls' + +log_processor = dict( + window_size=100, + by_epoch=False, + custom_cfg=[ + dict( + data_src='loss', + log_name='loss_large_window', + method_name='mean', + window_size=100) + ]) + +# configure default hooks +default_hooks = dict( + # record the time of every iteration. + timer=dict(type='IterTimerHook'), + + # print log every 100 iterations. + logger=dict(type='LoggerHook', interval=100), + + # enable the parameter scheduler. + param_scheduler=dict(type='ParamSchedulerHook'), + + # save checkpoint per epoch. + checkpoint=dict( + type='CheckpointHook', + by_epoch=False, + interval=10000, + save_last=True, + max_keep_ckpts=3), + + # set sampler seed in distributed evrionment. + sampler_seed=dict(type='DistSamplerSeedHook'), + + # validation results visualization, set True to enable it. + visualization=dict(type='VisualizationHook', enable=False), +) + +# configure environment +env_cfg = dict( + # whether to enable cudnn benchmark + cudnn_benchmark=False, + + # set multi process parameters + mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0), + + # set distributed parameters + dist_cfg=dict(backend='nccl'), +) + +# set visualizer +visualizer = None +# dict(type='ClsVisualizer', vis_backends=vis_backends, name='visualizer') +# vis_backends = [dict(type='LocalVisBackend')] + +# set log level +log_level = 'INFO' + +# load from which checkpoint +load_from = None + +# "/mnt/lustre/dongpeijie/spos_shufflenetv2_subnet_8xb128_in1k_flops_0.33M_acc_73.87_20211222-1f0a0b4d.pth" + +# whether to resume training from the loaded checkpoint +resume = False + +# model + +_STAGE_MUTABLE = dict( + _scope_='mmrazor', + type='OneShotMutableOP', + candidates=dict( + shuffle_3x3=dict( + type='ShuffleBlock', kernel_size=3, norm_cfg=dict(type='BN')), + shuffle_5x5=dict( + type='ShuffleBlock', kernel_size=5, norm_cfg=dict(type='BN')), + shuffle_7x7=dict( + type='ShuffleBlock', kernel_size=7, norm_cfg=dict(type='BN')), + shuffle_xception=dict( + type='ShuffleXception', norm_cfg=dict(type='BN')), + )) + +arch_setting = [ + # Parameters to build layers. 3 parameters are needed to construct a + # layer, from left to right: channel, num_blocks, mutable_cfg. + [64, 4, _STAGE_MUTABLE], + [160, 4, _STAGE_MUTABLE], + [320, 8, _STAGE_MUTABLE], + [640, 4, _STAGE_MUTABLE], +] + +norm_cfg = dict(type='BN') +supernet = dict( + type='ImageClassifier', + data_preprocessor=preprocess_cfg, + backbone=dict( + _scope_='mmrazor', + type='SearchableShuffleNetV2', + widen_factor=1.0, + norm_cfg=norm_cfg, + arch_setting=arch_setting), + neck=dict(type='GlobalAveragePooling'), + head=dict( + type='LinearClsHead', + num_classes=1000, + in_channels=1024, + loss=dict( + type='LabelSmoothLoss', + num_classes=1000, + label_smooth_val=0.1, + mode='original', + loss_weight=1.0), + topk=(1, 5), + ), +) + +mutator = dict(type='mmrazor.OneShotModuleMutator') + +model = dict( + type='mmrazor.SPOS', + architecture=supernet, + mutator=mutator, + # fix_subnet='configs/nas/spos/SPOS_SHUFFLENETV2_330M_IN1k_PAPER_2.0.yaml' +) + +find_unused_parameters = True diff --git a/convert_keys.py b/convert_keys.py new file mode 100644 index 00000000..ba4d4738 --- /dev/null +++ b/convert_keys.py @@ -0,0 +1,372 @@ +from collections import OrderedDict + +import torch +from mmengine.config import Config + +from mmrazor.core import * # noqa: F401,F403 +from mmrazor.models import * # noqa: F401,F403 +from mmrazor.registry import MODELS +from mmrazor.utils import register_all_modules + + +def convert_spos_key(old_path, new_path): + old_dict = torch.load(old_path) + new_dict = {'meta': old_dict['meta'], 'state_dict': {}} + + mapping = { + 'choices': '_candidates', + 'architecture.': '', + 'model.': '', + } + + for k, v in old_dict['state_dict'].items(): + new_key = k + for _from, _to in mapping.items(): + new_key = new_key.replace(_from, _to) + + new_key = f'architecture.{new_key}' + + new_dict['state_dict'][new_key] = v + + torch.save(new_dict, new_path) + + +def convert_detnas_key(old_path, new_path): + old_dict = torch.load(old_path) + new_dict = {'meta': old_dict['meta'], 'state_dict': {}} + + mapping = { + 'choices': '_candidates', + 'model.': '', + } + + for k, v in old_dict['state_dict'].items(): + new_key = k + for _from, _to in mapping.items(): + new_key = new_key.replace(_from, _to) + + new_dict['state_dict'][new_key] = v + torch.save(new_dict, new_path) + + +def convert_anglenas_key(old_path, new_path): + old_dict = torch.load(old_path) + new_dict = {'state_dict': {}} + + mapping = { + 'choices': '_candidates', + 'model.': '', + 'mbv2': 'mb', + } + + for k, v in old_dict.items(): + new_key = k + for _from, _to in mapping.items(): + new_key = new_key.replace(_from, _to) + + new_dict['state_dict'][new_key] = v + torch.save(new_dict, new_path) + + +def convert_darts_key(old_path, new_path): + old_dict = torch.load(old_path) + new_dict = {'meta': old_dict['meta'], 'state_dict': {}} + cfg = Config.fromfile( + 'configs/nas/darts/darts_subnet_1xb96_cifar10_2.0.py') + # import ipdb; ipdb.set_trace() + model = MODELS.build(cfg.model) + + print('============> module name') + for name, module in model.state_dict().items(): + print(name) + + mapping = { + 'choices': '_candidates', + 'model.': '', + 'edges': 'route', + } + + for k, v in old_dict['state_dict'].items(): + new_key = k + for _from, _to in mapping.items(): + new_key = new_key.replace(_from, _to) + # cells.0.nodes.0.edges.choices.normal_n2_p1.0.choices.sep_conv_3x3.conv1.2.weight + splited_list = new_key.split('.') + if len(splited_list) > 10 and splited_list[-6] == '0': + del splited_list[-6] + new_key = '.'.join(splited_list) + elif len(splited_list) > 10 and splited_list[-5] == '0': + del splited_list[-5] + new_key = '.'.join(splited_list) + + new_dict['state_dict'][new_key] = v + + print('============> new dict') + for key, v in new_dict['state_dict'].items(): + print(key) + + model.load_state_dict(new_dict['state_dict'], strict=True) + + torch.save(new_dict, new_path) + + +def convert_cream_key(old_path, new_path): + + old_dict = torch.load(old_path, map_location=torch.device('cpu')) + new_dict = {'state_dict': {}} # noqa: F841 + + ordered_old_dict = OrderedDict(old_dict['state_dict']) + + cfg = Config.fromfile('configs/nas/cream/cream_14_subnet_mobilenet.py') + model = MODELS.build(cfg.model) + + model_name_list = [] + model_module_list = [] + + # TODO show structure of model and checkpoint + print('=' * 30, 'the key of model') + for k, v in model.state_dict().items(): + print(k) + + print('=' * 30, 'the key of ckpt') + for k, v in ordered_old_dict.items(): + print(k) + + # final mapping dict + mapping = {} + + middle_razor2cream = { # noqa: F841 + # point-wise expansion + 'expand_conv.conv.weight': 'conv_pw.weight', + 'expand_conv.bn.weight': 'bn1.weight', + 'expand_conv.bn.bias': 'bn1.bias', + 'expand_conv.bn.running_mean': 'bn1.running_mean', + 'expand_conv.bn.running_var': 'bn1.running_var', + 'expand_conv.bn.num_batches_tracked': 'bn1.num_batches_tracked', + + # se + 'se.conv1.conv.weight': 'se.conv_reduce.weight', + 'se.conv1.conv.bias': 'se.conv_reduce.bias', + 'se.conv2.conv.weight': 'se.conv_expand.weight', + 'se.conv2.conv.bias': 'se.conv_expand.bias', + + # depth-wise conv + 'depthwise_conv.conv.weight': 'conv_dw.weight', + 'depthwise_conv.bn.weight': 'bn2.weight', + 'depthwise_conv.bn.bias': 'bn2.bias', + 'depthwise_conv.bn.running_mean': 'bn2.running_mean', + 'depthwise_conv.bn.running_var': 'bn2.running_var', + 'depthwise_conv.bn.num_batches_tracked': 'bn2.num_batches_tracked', + + # point-wise linear projection + 'linear_conv.conv.weight': 'conv_pwl.weight', + 'linear_conv.bn.weight': 'bn3.weight', + 'linear_conv.bn.bias': 'bn3.bias', + 'linear_conv.bn.running_mean': 'bn3.running_mean', + 'linear_conv.bn.running_var': 'bn3.running_var', + 'linear_conv.bn.num_batches_tracked': 'bn3.num_batches_tracked', + + } + + first_razor2cream = { + # for first depthsepconv dw + 'conv_dw.conv.weight': 'conv_dw.weight', + 'conv_dw.bn.weight': 'bn1.weight', + 'conv_dw.bn.bias': 'bn1.bias', + 'conv_dw.bn.running_mean': 'bn1.running_mean', + 'conv_dw.bn.running_var': 'bn1.running_var', + 'conv_dw.bn.num_batches_tracked': 'bn1.num_batches_tracked', + + # for first depthsepconv pw + 'conv_pw.conv.weight': 'conv_pw.weight', + 'conv_pw.bn.weight': 'bn2.weight', + 'conv_pw.bn.bias': 'bn2.bias', + 'conv_pw.bn.running_mean': 'bn2.running_mean', + 'conv_pw.bn.running_var': 'bn2.running_var', + 'conv_pw.bn.num_batches_tracked': 'bn2.num_batches_tracked', + + # se + 'se.conv1.conv.weight': 'se.conv_reduce.weight', + 'se.conv1.conv.bias': 'se.conv_reduce.bias', + 'se.conv2.conv.weight': 'se.conv_expand.weight', + 'se.conv2.conv.bias': 'se.conv_expand.bias', + } + + last_razor2cream = { + # for last convbnact + 'conv2.conv.weight': 'conv.weight', + 'conv2.bn.weight': 'bn1.weight', + 'conv2.bn.bias': 'bn1.bias', + 'conv2.bn.running_mean': 'bn1.running_mean', + 'conv2.bn.running_var': 'bn1.running_var', + 'conv2.bn.num_batches_tracked': 'bn1.num_batches_tracked', + } + + middle_cream2razor = {v: k for k, v in middle_razor2cream.items()} + first_cream2razor = {v: k for k, v in first_razor2cream.items()} + last_cream2razor = {v: k for k, v in last_razor2cream.items()} + + # 1. group the razor's module names + grouped_razor_module_name = { + 'middle': {}, + 'first': [], + 'last': [], + } + + for name, module in model.state_dict().items(): + tmp_name: str = name.split( + 'backbone.')[1] if 'backbone' in name else name + model_name_list.append(tmp_name) + model_module_list.append(module) + + if 'conv1' in tmp_name and len(tmp_name) <= 35: + # belong to stem conv + grouped_razor_module_name['first'].append(name) + elif 'head' in tmp_name: + # belong to last linear + grouped_razor_module_name['last'].append(name) + else: + # middle + if tmp_name.startswith('layer'): + key_of_middle = tmp_name[5:8] + if key_of_middle not in grouped_razor_module_name['middle']: + grouped_razor_module_name['middle'][key_of_middle] = [name] + else: + grouped_razor_module_name['middle'][key_of_middle].append( + name) + elif tmp_name.startswith('conv2'): + key_of_middle = '7.0' + if key_of_middle not in grouped_razor_module_name['middle']: + grouped_razor_module_name['middle'][key_of_middle] = [name] + else: + grouped_razor_module_name['middle'][key_of_middle].append( + name) + + # 2. group the cream's module names + grouped_cream_module_name = { + 'middle': {}, + 'first': [], + 'last': [], + } + + for k in ordered_old_dict.keys(): + if 'classifier' in k or 'conv_head' in k: + # last conv + grouped_cream_module_name['last'].append(k) + elif 'blocks' in k: + # middle blocks + key_of_middle = k[7:10] + if key_of_middle not in grouped_cream_module_name['middle']: + grouped_cream_module_name['middle'][key_of_middle] = [k] + else: + grouped_cream_module_name['middle'][key_of_middle].append(k) + else: + # first blocks + grouped_cream_module_name['first'].append(k) + + # 4. process the first modules + for cream_item in grouped_cream_module_name['first']: + if 'conv_stem' in cream_item: + # get corresponding item from razor + for razor_item in grouped_razor_module_name['first']: + if 'conv.weight' in razor_item: + mapping[cream_item] = razor_item + grouped_razor_module_name['first'].remove(razor_item) + break + else: + kws = cream_item.split('.')[-1] + # get corresponding item from razor + for razor_item in grouped_razor_module_name['first']: + if kws in razor_item: + mapping[cream_item] = razor_item + grouped_razor_module_name['first'].remove(razor_item) + + # 5. process the last modules + for cream_item in grouped_cream_module_name['last']: + if 'classifier' in cream_item: + kws = cream_item.split('.')[-1] + for razor_item in grouped_razor_module_name['last']: + if 'fc' in razor_item: + if kws in razor_item: + mapping[cream_item] = razor_item + grouped_razor_module_name['last'].remove(razor_item) + break + + elif 'conv_head' in cream_item: + kws = cream_item.split('.')[-1] + for razor_item in grouped_razor_module_name['last']: + if 'head.conv2' in razor_item: + if kws in razor_item: + mapping[cream_item] = razor_item + grouped_razor_module_name['last'].remove(razor_item) + + # 6. process the middle modules + for cream_group_id, cream_items in grouped_cream_module_name[ + 'middle'].items(): + # get the corresponding group from razor + razor_group_id: str = str(float(cream_group_id) + 1) + razor_items: list = grouped_razor_module_name['middle'][razor_group_id] + + if int(razor_group_id[0]) == 1: + key_cream2razor = first_cream2razor + elif int(razor_group_id[0]) == 7: + key_cream2razor = last_cream2razor + else: + key_cream2razor = middle_cream2razor + + # matching razor items and cream items + for cream_item in cream_items: + # traverse all of key_cream2razor + for cream_match, razor_match in key_cream2razor.items(): + if cream_match in cream_item: + # traverse razor_items to get the corresponding razor name + for razor_item in razor_items: + if razor_match in razor_item: + mapping[cream_item] = razor_item + break + + print('=' * 100) + print('length of mapping: ', len(mapping.keys())) + for k, v in mapping.items(): + print(k, '\t=>\t', v) + print('#' * 100) + + # TODO DELETE this print + print('**' * 20) + for c, cm, r, rm in zip(ordered_old_dict.keys(), ordered_old_dict.values(), + model_name_list, model_module_list): + print(f'{c}: shape {cm.shape} => {r}: shape {rm.shape}') + print('**' * 20) + + for k, v in ordered_old_dict.items(): + print(f'Mapping from {k} to {mapping[k]}......') + new_dict['state_dict'][mapping[k]] = v + + model.load_state_dict(new_dict['state_dict'], strict=True) + + torch.save(new_dict, new_path) + + +if __name__ == '__main__': + register_all_modules(True) + # old_path = '/mnt/lustre/dongpeijie/detnas_subnet_shufflenetv2_8xb128_in1k_acc-74.08_20211223-92e9b66a.pth' # noqa: E501 + # new_path = '/mnt/lustre/dongpeijie/detnas_subnet_shufflenetv2_8xb128_in1k_acc-74.08_20211223-92e9b66a_2.0.pth' # noqa: E501 + # convert_spos_key(old_path, new_path) + + # old_path = '/mnt/lustre/dongpeijie/detnas_subnet_frcnn_shufflenetv2_fpn_1x_coco_bbox_backbone_flops-0.34M_mAP-37.5_20211222-67fea61f.pth' # noqa: E501 + # new_path = '/mnt/lustre/dongpeijie/detnas_subnet_frcnn_shufflenetv2_fpn_1x_coco_bbox_backbone_flops-0.34M_mAP-37.5_20211222-67fea61f_2.0.pth' # noqa: E501 + # convert_detnas_key(old_path, new_path) + + # old_path = './data/14.pth.tar' + # new_path = './data/14_2.0.pth' + # old_path = '/mnt/lustre/dongpeijie/14.pth.tar' + # new_path = '/mnt/lustre/dongpeijie/14_2.0.pth' + # convert_cream_key(old_path, new_path) + + # old_path = '/mnt/lustre/dongpeijie/darts_subnetnet_1xb96_cifar10_acc-97.32_20211222-e5727921.pth' # noqa: E501 + # new_path = '/mnt/lustre/dongpeijie/darts_subnetnet_1xb96_cifar10_acc-97.32_20211222-e5727921_2.0.pth' # noqa: E501 + # convert_darts_key(old_path, new_path) + + old_path = '/mnt/lustre/dongpeijie/spos_angelnas_flops_0.49G_acc_75.98_20220307-54f4698f.pth' # noqa: E501 + new_path = '/mnt/lustre/dongpeijie/spos_angelnas_flops_0.49G_acc_75.98_20220307-54f4698f_2.0.pth' # noqa: E501 + convert_anglenas_key(old_path, new_path) diff --git a/mmrazor/models/algorithms/nas/darts.py b/mmrazor/models/algorithms/nas/darts.py new file mode 100644 index 00000000..2871da15 --- /dev/null +++ b/mmrazor/models/algorithms/nas/darts.py @@ -0,0 +1,280 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import copy +from typing import Any, Dict, List, Optional, Union + +import torch +from mmengine import BaseDataElement +from mmengine.model import BaseModel +from mmengine.optim import OptimWrapper, OptimWrapperDict +from torch import nn +from torch.nn.modules.batchnorm import _BatchNorm + +from mmrazor.models.mutators import DiffModuleMutator +from mmrazor.models.subnet import (SINGLE_MUTATOR_RANDOM_SUBNET, FixSubnet, + FixSubnetMixin) +from mmrazor.registry import MODELS +from ..base import BaseAlgorithm, LossResults + +VALID_FIX_SUBNET = Union[str, FixSubnet, Dict[str, Dict[str, Any]]] + + +@MODELS.register_module() +class Darts(BaseAlgorithm, FixSubnetMixin): + """Implementation of `DARTS `_ + + DARTS means Differentiable Architecture Search, a classic NAS algorithm. + :class:`Darts` implements the APIs required by the DARTS, as well as the + supernet training and subnet retraining logic for each iter. + + Args: + architecture (dict|:obj:`BaseModel`): The config of :class:`BaseModel` + or built model. Corresponding to supernet in NAS algorithm. + mutator (dict|:obj:`DiffModuleMutator`): The config of + :class:`DiffModuleMutator` or built mutator. + fix_subnet (str | dict | :obj:`FixSubnet`): The path of yaml file or + loaded dict or built :obj:`FixSubnet`. + norm_training (bool): Whether to set norm layers to training mode, + namely, not freeze running stats (mean and var). Note: Effect on + Batch Norm and its variants only. Defaults to False. + data_preprocessor (dict, optional): The pre-process config of + :class:`BaseDataPreprocessor`. Defaults to None. + init_cfg (dict): Init config for ``BaseModule``. + + Note: + Darts has two training mode: supernet training and subnet retraining. + If `fix_subnet` is None, it means supernet training. + If `fix_subnet` is not None, it means subnet training. + + Note: + During supernet training, since each op is not fully trained, the + statistics of :obj:_BatchNorm are inaccurate. This problem affects the + evaluation of the performance of each subnet in the search phase. There + are usually two ways to solve this problem, both need to set + `norm_training` to True: + + 1) Using a large batch size, BNs use the mean and variance of the + current batch during forward. + 2) Recalibrate the statistics of BN before searching. + """ + + def __init__(self, + architecture: Union[BaseModel, Dict], + mutator: Optional[Union[DiffModuleMutator, Dict]] = None, + fix_subnet: Optional[VALID_FIX_SUBNET] = None, + unroll: bool = False, + norm_training: bool = False, + data_preprocessor: Optional[Union[dict, nn.Module]] = None, + init_cfg: Optional[dict] = None): + super().__init__(architecture, data_preprocessor, init_cfg) + + # Darts has two training mode: supernet training and subnet retraining. + # fix_subnet is not None, means subnet retraining. + if fix_subnet: + # According to fix_subnet, delete the unchosen part of supernet + self.load_fix_subnet(fix_subnet, prefix='architecture.') + self.is_supernet = False + else: + assert mutator is not None, \ + 'mutator cannot be None when fix_subnet is None.' + if isinstance(mutator, DiffModuleMutator): + self.mutator = mutator + elif isinstance(mutator, dict): + self.mutator = MODELS.build(mutator) + else: + raise TypeError('mutator should be a `dict` or ' + f'`DiffModuleMutator` instance, but got ' + f'{type(mutator)}') + + # Mutator is an essential component of the NAS algorithm. It + # provides some APIs commonly used by NAS. + # Before using it, you must do some preparations according to + # the supernet. + self.mutator.prepare_from_supernet(self.architecture) + self.is_supernet = True + + self.norm_training = norm_training + self.unroll = unroll + + def sample_subnet(self) -> SINGLE_MUTATOR_RANDOM_SUBNET: + """Random sample subnet by mutator.""" + return self.mutator.sample_choices() + + def set_subnet(self, subnet: SINGLE_MUTATOR_RANDOM_SUBNET): + """Set the subnet sampled by :meth:sample_subnet.""" + self.mutator.set_choices(subnet) + + def loss( + self, + batch_inputs: torch.Tensor, + data_samples: Optional[List[BaseDataElement]] = None, + ) -> LossResults: + """Calculate losses from a batch of inputs and data samples.""" + if self.is_supernet: + random_subnet = self.sample_subnet() + self.set_subnet(random_subnet) + return self.architecture(batch_inputs, data_samples, mode='loss') + else: + return self.architecture(batch_inputs, data_samples, mode='loss') + + def train(self, mode=True): + """Convert the model into eval mode while keep normalization layer + unfreezed.""" + + super().train(mode) + if self.norm_training and not mode: + for module in self.architecture.modules(): + if isinstance(module, _BatchNorm): + module.training = True + + def train_step(self, data: List[dict], + optim_wrapper: OptimWrapper) -> Dict[str, torch.Tensor]: + """The iteration step during training. + + This method defines an iteration step during training, except for the + back propagation and optimizer updating, which are done in an optimizer + hook. Note that in some complicated cases or models, the whole process + including back propagation and optimizer updating are also defined in + this method, such as GAN. + Args: + data (dict): The output of dataloader. + optimizer (:obj:`torch.optim.Optimizer` | dict): The optimizer of + runner is passed to ``train_step()``. This argument is unused + and reserved. + Returns: + dict: It should contain at least 3 keys: ``loss``, ``log_vars``, + ``num_samples``. + ``loss`` is a tensor for back propagation, which can be a + weighted sum of multiple losses. + ``log_vars`` contains all the variables to be sent to the + logger. + ``num_samples`` indicates the batch size (when the model is + DDP, it means the batch size on each GPU), which is used for + averaging the logs. + """ + if isinstance(data, (tuple, list)) and isinstance( + optim_wrapper, OptimWrapperDict): + assert len(data) == len(optim_wrapper), \ + f'The length of data {len(data)} should be equal to that of optimizers {len(optim_wrapper)}.' # noqa: E501 + + # TODO check the order of data + train_supernet_data, train_arch_data = data + + # TODO mutator optimizer zero_grad + optim_wrapper.zero_grad() + + if self.unroll: + self._unrolled_backward(train_arch_data, train_supernet_data, + optim_wrapper) # TODO optimizer + else: + # TODO process the input + arch_loss = self.loss(train_arch_data) # noqa: F841 + # arch_loss.backward() + + # TODO mutator optimizer step + optim_wrapper.step() + + model_loss = self.loss(train_supernet_data) + + # TODO optimizer architecture zero_grad + optim_wrapper.zero_grad() + # model_loss.backward() + + nn.utils.clip_grad_norm_( + self.architecture.parameters(), max_norm=5, norm_type=2) + + # TODO optimizer architecture step + optim_wrapper.step() + + outputs = dict( + loss=model_loss, + num_samples=len(train_supernet_data['img'].data)) + else: + outputs = super().train_step(data, optim_wrapper) + + return outputs + + def _unrolled_backward(self, train_arch_data, train_supernet_data, + optimizer): + """Compute unrolled loss and backward its gradients.""" + backup_params = copy.deepcopy(tuple(self.architecture.parameters())) + + # do virtual step on training data + lr = optimizer['architecture'].param_groups[0]['lr'] + momentum = optimizer['architecture'].param_groups[0]['momentum'] + weight_decay = optimizer['architecture'].param_groups[0][ + 'weight_decay'] + self._compute_virtual_model(train_supernet_data, lr, momentum, + weight_decay, optimizer) + + # calculate unrolled loss on validation data + # keep gradients for model here for compute hessian + losses = self(**train_arch_data) + loss, _ = self._parse_losses(losses) + w_model, w_arch = tuple(self.architecture.parameters()), tuple( + self.mutator.parameters()) + w_grads = torch.autograd.grad(loss, w_model + w_arch) + d_model, d_arch = w_grads[:len(w_model)], w_grads[len(w_model):] + + # compute hessian and final gradients + hessian = self._compute_hessian(backup_params, d_model, + train_supernet_data) + with torch.no_grad(): + for param, d, h in zip(w_arch, d_arch, hessian): + # gradient = dalpha - lr * hessian + param.grad = d - lr * h + + # restore weights + self._restore_weights(backup_params) + + def _compute_virtual_model(self, data, lr, momentum, weight_decay, + optimizer): + """Compute unrolled weights w`""" + # don't need zero_grad, using autograd to calculate gradients + losses = self(**data) + loss, _ = self._parse_losses(losses) + gradients = torch.autograd.grad(loss, self.architecture.parameters()) + with torch.no_grad(): + for w, g in zip(self.architecture.parameters(), gradients): + m = optimizer['architecture'].state[w].get( + 'momentum_buffer', 0.) + w = w - lr * (momentum * m + g + weight_decay * w) + + def _restore_weights(self, backup_params): + with torch.no_grad(): + for param, backup in zip(self.architecture.parameters(), + backup_params): + param.copy_(backup) + + def _compute_hessian(self, backup_params, dw, data): + """ + dw = dw` { L_val(w`, alpha) } + w+ = w + eps * dw + w- = w - eps * dw + hessian = (dalpha { L_trn(w+, alpha) } \ + - dalpha { L_trn(w-, alpha) }) / (2*eps) + eps = 0.01 / ||dw|| + """ + self._restore_weights(backup_params) + norm = torch.cat([w.view(-1) for w in dw]).norm() + eps = 0.01 / norm + if norm < 1E-8: + print( + 'In computing hessian, norm is smaller than 1E-8, \ + cause eps to be %.6f.', norm.item()) + + dalphas = [] + for e in [eps, -2. * eps]: + # w+ = w + eps*dw`, w- = w - eps*dw` + with torch.no_grad(): + for p, d in zip(self.architecture.parameters(), dw): + p += e * d + + losses = self(**data) + loss, _ = self._parse_losses(losses) + dalphas.append( + torch.autograd.grad(loss, tuple(self.mutator.parameters()))) + # dalpha { L_trn(w+) }, # dalpha { L_trn(w-) } + dalpha_pos, dalpha_neg = dalphas + hessian = [(p - n) / (2. * eps) + for p, n in zip(dalpha_pos, dalpha_neg)] + return hessian diff --git a/mmrazor/models/architectures/__init__.py b/mmrazor/models/architectures/__init__.py index 28f44622..fd1c9486 100644 --- a/mmrazor/models/architectures/__init__.py +++ b/mmrazor/models/architectures/__init__.py @@ -1,3 +1,4 @@ # Copyright (c) OpenMMLab. All rights reserved. from .backbones import * # noqa: F401,F403 +from .components import * # noqa: F401,F403 from .dynamic_op import * # noqa: F401,F403 diff --git a/mmrazor/models/architectures/backbones/darts_backbone.py b/mmrazor/models/architectures/backbones/darts_backbone.py index 5b30d272..a911508b 100644 --- a/mmrazor/models/architectures/backbones/darts_backbone.py +++ b/mmrazor/models/architectures/backbones/darts_backbone.py @@ -4,6 +4,7 @@ from typing import Dict, List, Optional, Tuple, Union import torch import torch.nn as nn +from mmcls.models.backbones.base_backbone import BaseBackbone from mmcv.cnn import build_activation_layer, build_norm_layer from torch import Tensor @@ -126,12 +127,8 @@ class Node(nn.Module): super().__init__() edges = nn.ModuleDict() for i in range(num_prev_nodes): - if i < num_downsample_nodes: - stride = 2 - else: - stride = 1 - - edge_id = '{}_p{}'.format(node_id, i) + stride = 2 if i < num_downsample_nodes else 1 + edge_id = f'{node_id}_p{i}' module_kwargs = dict( in_channels=channels, @@ -143,13 +140,14 @@ class Node(nn.Module): mutable_cfg.update(alias=edge_id) edges.add_module(edge_id, MODELS.build(mutable_cfg)) + route_cfg.update(alias=node_id) route_cfg.update(edges=edges) - self.edges = MODELS.build(route_cfg) + self.route = MODELS.build(route_cfg) def forward(self, prev_nodes: Union[List[Tensor], Tuple[Tensor]]) -> Tensor: """Forward with the previous nodes list.""" - return self.edges(prev_nodes) + return self.route(prev_nodes) class Cell(nn.Module): @@ -223,8 +221,7 @@ class Cell(nn.Module): cur_tensor = node(tensors) tensors.append(cur_tensor) - output = torch.cat(tensors[2:], dim=1) - return output + return torch.cat(tensors[2:], dim=1) class AuxiliaryModule(nn.Module): @@ -263,7 +260,7 @@ class AuxiliaryModule(nn.Module): @MODELS.register_module() -class DartsBackbone(nn.Module, FixSubnetMixin): +class DartsBackbone(BaseBackbone, FixSubnetMixin): """Backbone of Differentiable Architecture Search (DARTS). Args: @@ -348,7 +345,7 @@ class DartsBackbone(nn.Module, FixSubnetMixin): prev_reduction, reduction = reduction, False # Reduce featuremap size and double channels in 1/3 # and 2/3 layer. - if i == self.num_layers // 3 or i == 2 * self.num_layers // 3: + if i in [self.num_layers // 3, 2 * self.num_layers // 3]: self.out_channels *= 2 reduction = True diff --git a/mmrazor/models/architectures/backbones/searchable_mobilenet.py b/mmrazor/models/architectures/backbones/searchable_mobilenet.py index a899bba7..28552182 100644 --- a/mmrazor/models/architectures/backbones/searchable_mobilenet.py +++ b/mmrazor/models/architectures/backbones/searchable_mobilenet.py @@ -46,7 +46,7 @@ class SearchableMobileNet(BaseBackbone, FixSubnetMixin): Excamples: >>> mutable_cfg = dict( ... type='OneShotMutableOP', - ... candidate_ops=dict( + ... candidates=dict( ... mb_k3e1=dict( ... type='MBBlock', ... kernel_size=3, @@ -87,7 +87,7 @@ class SearchableMobileNet(BaseBackbone, FixSubnetMixin): ] ) -> None: for index in out_indices: - if index not in range(0, 8): + if index not in range(8): raise ValueError('the item in out_indices must in ' f'range(0, 8). But received {index}') @@ -147,6 +147,7 @@ class SearchableMobileNet(BaseBackbone, FixSubnetMixin): conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, act_cfg=self.act_cfg) + self.add_module('conv2', layer) self.layers.append('conv2') diff --git a/mmrazor/models/architectures/backbones/searchable_shufflenet_v2.py b/mmrazor/models/architectures/backbones/searchable_shufflenet_v2.py index ff2f3a48..bb429495 100644 --- a/mmrazor/models/architectures/backbones/searchable_shufflenet_v2.py +++ b/mmrazor/models/architectures/backbones/searchable_shufflenet_v2.py @@ -48,7 +48,7 @@ class SearchableShuffleNetV2(BaseBackbone, FixSubnetMixin): Excamples: >>> mutable_cfg = dict( ... type='OneShotMutableOP', - ... candidate_ops=dict( + ... candidates=dict( ... shuffle_3x3=dict( ... type='ShuffleBlock', ... kernel_size=3, diff --git a/mmrazor/models/architectures/components/__init__.py b/mmrazor/models/architectures/components/__init__.py new file mode 100644 index 00000000..a1c30822 --- /dev/null +++ b/mmrazor/models/architectures/components/__init__.py @@ -0,0 +1,4 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from .heads import CreamClsHead + +__all__ = ['CreamClsHead'] diff --git a/mmrazor/models/architectures/components/heads/__init__.py b/mmrazor/models/architectures/components/heads/__init__.py new file mode 100644 index 00000000..f118daee --- /dev/null +++ b/mmrazor/models/architectures/components/heads/__init__.py @@ -0,0 +1,4 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from .cream_head import CreamClsHead + +__all__ = ['CreamClsHead'] diff --git a/mmrazor/models/architectures/components/heads/cream_head.py b/mmrazor/models/architectures/components/heads/cream_head.py new file mode 100644 index 00000000..9990b3e3 --- /dev/null +++ b/mmrazor/models/architectures/components/heads/cream_head.py @@ -0,0 +1,72 @@ +# Copyright (c) OpenMMLab. All rights reserved. + +from typing import Dict, Optional, Tuple + +from mmcls.models.heads import LinearClsHead +from mmcv.cnn import ConvModule +from torch import Tensor, nn + +from mmrazor.registry import MODELS + + +@MODELS.register_module() +class CreamClsHead(LinearClsHead): + """Linear classifier head for cream. + + Args: + num_classes (int): Number of categories excluding the background + category. + in_channels (int): Number of channels in the input feature map. + num_features (int): Number of features in the conv2d. + act_cfg (dict): Config dict for activation layer. + Default: dict(type='ReLU6'). + init_cfg (dict, optional): the config to control the initialization. + Defaults to ``dict(type='Normal', layer='Linear', std=0.01)``. + """ + + def __init__(self, + num_classes: int, + in_channels: int, + num_features: int = 1280, + act_cfg: Dict = dict(type='ReLU6'), + init_cfg: Optional[dict] = dict( + type='Normal', layer='Linear', std=0.01), + **kwargs): + super().__init__( + num_classes=num_classes, + in_channels=in_channels, + init_cfg=init_cfg, + **kwargs) + + layer = ConvModule( + in_channels=self.in_channels, + out_channels=num_features, + kernel_size=1, + stride=1, + padding=0, + conv_cfg=None, + norm_cfg=None, + act_cfg=act_cfg) + + self.add_module('conv2', layer) + + self.fc = nn.Linear(num_features, self.num_classes) + + # def pre_logits(self, feats: Tuple[Tensor]) -> Tensor: + # """The process before the final classification head. + + # The input ``feats`` is a tuple of tensor, and each tensor is the + # feature of a backbone stage. In ``LinearClsHead``, we just obtain the + # feature of the last stage. + # """ + # # The LinearClsHead doesn't have other module, just return after + # # unpacking. + # return feats[-1] + + def forward(self, feats: Tuple[Tensor]) -> Tensor: + """The forward process.""" + logits = self.pre_logits(feats) + logits = logits.unsqueeze(-1).unsqueeze(-1) + logits = self.conv2(logits) + logits = logits.flatten(1) + return self.fc(logits) diff --git a/mmrazor/models/mutables/mutable_module/diff_mutable_module.py b/mmrazor/models/mutables/mutable_module/diff_mutable_module.py index d5112e04..4cb3a803 100644 --- a/mmrazor/models/mutables/mutable_module/diff_mutable_module.py +++ b/mmrazor/models/mutables/mutable_module/diff_mutable_module.py @@ -99,7 +99,7 @@ class DiffMutableOP(DiffMutableModule[str, str]): DARTS. Search the best module by learnable parameters `arch_param`. Args: - candidate_ops (dict[str, dict]): the configs for the candidate + candidates (dict[str, dict]): the configs for the candidate operations. module_kwargs (dict[str, dict], optional): Module initialization named arguments. Defaults to None. @@ -110,23 +110,29 @@ class DiffMutableOP(DiffMutableModule[str, str]): and `Pretrained`. """ - def __init__(self, candidate_ops: Dict[str, Dict], **kwargs) -> None: - super().__init__(**kwargs) - assert len(candidate_ops) >= 1, \ + def __init__( + self, + candidates: Dict[str, Dict], + module_kwargs: Optional[Dict[str, Dict]] = None, + alias: Optional[str] = None, + init_cfg: Optional[Dict] = None, + ) -> None: + super().__init__( + module_kwargs=module_kwargs, alias=alias, init_cfg=init_cfg) + assert len(candidates) >= 1, \ f'Number of candidate op must greater than or equal to 1, ' \ - f'but got: {len(candidate_ops)}' + f'but got: {len(candidates)}' self._is_fixed = False - self._candidate_ops = self._build_ops(candidate_ops, - self.module_kwargs) + self._candidates = self._build_ops(candidates, self.module_kwargs) @staticmethod - def _build_ops(candidate_ops: Dict[str, Dict], + def _build_ops(candidates: Dict[str, Dict], module_kwargs: Optional[Dict[str, Dict]]) -> nn.ModuleDict: - """Build candidate operations based on candidate_ops configures. + """Build candidate operations based on candidates configures. Args: - candidate_ops (dict[str, dict]): the configs for the candidate + candidates (dict[str, dict]): the configs for the candidate operations. module_kwargs (dict[str, dict], optional): Module initialization named arguments. @@ -137,7 +143,7 @@ class DiffMutableOP(DiffMutableModule[str, str]): is the corresponding candidate operation. """ ops = nn.ModuleDict() - for name, op_cfg in candidate_ops.items(): + for name, op_cfg in candidates.items(): assert name not in ops if module_kwargs is not None: op_cfg.update(module_kwargs) @@ -154,7 +160,7 @@ class DiffMutableOP(DiffMutableModule[str, str]): Returns: Tensor: the result of forward the fixed operation. """ - return self._candidate_ops[self._chosen](x) + return sum(self._candidates[choice](x) for choice in self._chosen) def forward_arch_param(self, x: Any, @@ -180,7 +186,7 @@ class DiffMutableOP(DiffMutableModule[str, str]): # forward based on probs outputs = list() - for prob, module in zip(probs, self._candidate_ops.values()): + for prob, module in zip(probs, self._candidates.values()): if prob > 0.: outputs.append(prob * module(x)) @@ -197,11 +203,11 @@ class DiffMutableOP(DiffMutableModule[str, str]): Tensor: the result of forward all of the ``choice`` operation. """ outputs = list() - for op in self._candidate_ops.values(): + for op in self._candidates.values(): outputs.append(op(x)) return sum(outputs) - def fix_chosen(self, chosen: str) -> None: + def fix_chosen(self, chosen: Union[str, List[str]]) -> None: """Fix mutable with `choice`. This operation would convert `unfixed` mode to `fixed` mode. The :attr:`is_fixed` will be set to True and only the selected operations can be retained. @@ -215,9 +221,12 @@ class DiffMutableOP(DiffMutableModule[str, str]): 'The mode of current MUTABLE is `fixed`. ' 'Please do not call `fix_chosen` function again.') + if isinstance(chosen, str): + chosen = [chosen] + for c in self.choices: - if c != chosen: - self._candidate_ops.pop(c) + if c not in chosen: + self._candidates.pop(c) self._chosen = chosen self.is_fixed = True @@ -225,7 +234,7 @@ class DiffMutableOP(DiffMutableModule[str, str]): @property def choices(self) -> List[str]: """list: all choices. """ - return list(self._candidate_ops.keys()) + return list(self._candidates.keys()) @MODELS.register_module() @@ -241,6 +250,7 @@ class DiffChoiceRoute(DiffMutableModule[str, List[str]]): with_arch_param (bool): whether forward with arch_param. When set to `True`, a differentiable way is adopted. When set to `False`, a non-differentiable way is adopted. + alias (str, optional): alias of the `DiffChoiceRoute`. init_cfg (dict, optional): initialization configuration dict for ``BaseModule``. OpenMMLab has implement 6 initializers including `Constant`, `Xavier`, `Normal`, `Uniform`, `Kaiming`, @@ -274,16 +284,17 @@ class DiffChoiceRoute(DiffMutableModule[str, List[str]]): self, edges: nn.ModuleDict, with_arch_param: bool = False, + alias: Optional[str] = None, init_cfg: Optional[Dict] = None, ) -> None: - super().__init__(init_cfg=init_cfg) + super().__init__(alias=alias, init_cfg=init_cfg) assert len(edges) >= 1, \ f'Number of edges must greater than or equal to 1, ' \ f'but got: {len(edges)}' self._with_arch_param = with_arch_param self._is_fixed = False - self._edges: nn.ModuleDict = edges + self._candidates: nn.ModuleDict = edges def forward_fixed(self, inputs: Union[List, Tuple]) -> Tensor: """Forward when the mutable is in `fixed` mode. @@ -302,7 +313,7 @@ class DiffChoiceRoute(DiffMutableModule[str, List[str]]): outputs = list() for choice, x in zip(self._unfixed_choices, inputs): if choice in self._chosen: - outputs.append(self._edges[choice](x)) + outputs.append(self._candidates[choice](x)) return sum(outputs) def forward_arch_param(self, @@ -319,15 +330,16 @@ class DiffChoiceRoute(DiffMutableModule[str, List[str]]): Returns: Tensor: the result of forward with ``arch_param``. """ - assert len(x) == len(self._edges), \ - f'Length of `edges` {len(self._edges)} should be same as ' \ - f'the length of inputs {len(x)}.' + assert len(x) == len(self._candidates), \ + f'Length of `edges` {len(self._candidates)} should be ' \ + f'same as the length of inputs {len(x)}.' if self._with_arch_param: probs = self.compute_arch_probs(arch_param=arch_param) outputs = list() - for prob, module, input in zip(probs, self._edges.values(), x): + for prob, module, input in zip(probs, self._candidates.values(), + x): if prob > 0: # prob may equal to 0 in gumbel softmax. outputs.append(prob * module(input)) @@ -346,12 +358,12 @@ class DiffChoiceRoute(DiffMutableModule[str, List[str]]): Returns: Tensor: the result of forward all of the ``choice`` operation. """ - assert len(x) == len(self._edges), \ - f'Lenght of edges {len(self._edges)} should be same as ' \ + assert len(x) == len(self._candidates), \ + f'Lenght of edges {len(self._candidates)} should be same as ' \ f'the length of inputs {len(x)}.' outputs = list() - for op, input in zip(self._edges.values(), x): + for op, input in zip(self._candidates.values(), x): outputs.append(op(input)) return sum(outputs) @@ -373,7 +385,7 @@ class DiffChoiceRoute(DiffMutableModule[str, List[str]]): for c in self.choices: if c not in chosen: - self._edges.pop(c) + self._candidates.pop(c) self._chosen = chosen self.is_fixed = True @@ -381,7 +393,7 @@ class DiffChoiceRoute(DiffMutableModule[str, List[str]]): @property def choices(self) -> List[CHOSEN_TYPE]: """list: all choices. """ - return list(self._edges.keys()) + return list(self._candidates.keys()) @MODELS.register_module() @@ -413,10 +425,14 @@ class GumbelChoiceRoute(DiffChoiceRoute): tau: float = 1.0, hard: bool = True, with_arch_param: bool = False, + alias: Optional[str] = None, init_cfg: Optional[Dict] = None, ) -> None: super().__init__( - edges=edges, with_arch_param=with_arch_param, init_cfg=init_cfg) + edges=edges, + with_arch_param=with_arch_param, + alias=alias, + init_cfg=init_cfg) self.tau = tau self.hard = hard diff --git a/mmrazor/models/mutables/mutable_module/one_shot_mutable_module.py b/mmrazor/models/mutables/mutable_module/one_shot_mutable_module.py index 4c5e981b..3ea44871 100644 --- a/mmrazor/models/mutables/mutable_module/one_shot_mutable_module.py +++ b/mmrazor/models/mutables/mutable_module/one_shot_mutable_module.py @@ -100,7 +100,7 @@ class OneShotMutableOP(OneShotMutableModule[str, str]): blocks. Args: - candidate_ops (dict[str, dict]): the configs for the candidate + candidates (dict[str, dict]): the configs for the candidate operations. module_kwargs (dict[str, dict], optional): Module initialization named arguments. Defaults to None. @@ -114,13 +114,13 @@ class OneShotMutableOP(OneShotMutableModule[str, str]): >>> import torch >>> from mmrazor.models.mutables import OneShotMutableOP - >>> candidate_ops = nn.ModuleDict({ + >>> candidates = nn.ModuleDict({ ... 'conv3x3': nn.Conv2d(32, 32, 3, 1, 1), ... 'conv5x5': nn.Conv2d(32, 32, 5, 1, 2), ... 'conv7x7': nn.Conv2d(32, 32, 7, 1, 3)}) >>> input = torch.randn(1, 32, 64, 64) - >>> op = OneShotMutableOP(candidate_ops) + >>> op = OneShotMutableOP(candidates) >>> op.choices ['conv3x3', 'conv5x5', 'conv7x7'] @@ -131,7 +131,7 @@ class OneShotMutableOP(OneShotMutableModule[str, str]): >>> op.current_choice = 'conv3x3' >>> unfix_output = op.forward(input) - >>> torch.all(unfixed_output == candidate_ops['conv3x3'](input)) + >>> torch.all(unfixed_output == candidates['conv3x3'](input)) True >>> op.fix_chosen('conv3x3') @@ -147,36 +147,41 @@ class OneShotMutableOP(OneShotMutableModule[str, str]): True """ - def __init__(self, candidate_ops: Union[Dict[str, Dict], nn.ModuleDict], - **kwargs) -> None: - super().__init__(**kwargs) - assert len(candidate_ops) >= 1, \ + def __init__( + self, + candidates: Union[Dict[str, Dict], nn.ModuleDict], + module_kwargs: Optional[Dict[str, Dict]] = None, + alias: Optional[str] = None, + init_cfg: Optional[Dict] = None, + ) -> None: + super().__init__( + module_kwargs=module_kwargs, alias=alias, init_cfg=init_cfg) + assert len(candidates) >= 1, \ f'Number of candidate op must greater than 1, ' \ - f'but got: {len(candidate_ops)}' + f'but got: {len(candidates)}' self._chosen: Optional[str] = None - if isinstance(candidate_ops, dict): - self._candidate_ops = self._build_ops(candidate_ops, - self.module_kwargs) - elif isinstance(candidate_ops, nn.ModuleDict): - self._candidate_ops = candidate_ops + if isinstance(candidates, dict): + self._candidates = self._build_ops(candidates, self.module_kwargs) + elif isinstance(candidates, nn.ModuleDict): + self._candidates = candidates else: raise TypeError('candidata_ops should be a `dict` or ' f'`nn.ModuleDict` instance, but got ' - f'{type(candidate_ops)}') + f'{type(candidates)}') - assert len(self._candidate_ops) >= 1, \ + assert len(self._candidates) >= 1, \ f'Number of candidate op must greater than or equal to 1, ' \ - f'but got {len(self._candidate_ops)}' + f'but got {len(self._candidates)}' @staticmethod def _build_ops( - candidate_ops: Union[Dict[str, Dict], nn.ModuleDict], + candidates: Union[Dict[str, Dict], nn.ModuleDict], module_kwargs: Optional[Dict[str, Dict]] = None) -> nn.ModuleDict: """Build candidate operations based on choice configures. Args: - candidate_ops (dict[str, dict] | :obj:`nn.ModuleDict`): the configs + candidates (dict[str, dict] | :obj:`nn.ModuleDict`): the configs for the candidate operations or nn.ModuleDict. module_kwargs (dict[str, dict], optional): Module initialization named arguments. @@ -186,11 +191,11 @@ class OneShotMutableOP(OneShotMutableModule[str, str]): the name of each choice in configs and the value of ``ops`` is the corresponding candidate operation. """ - if isinstance(candidate_ops, nn.ModuleDict): - return candidate_ops + if isinstance(candidates, nn.ModuleDict): + return candidates ops = nn.ModuleDict() - for name, op_cfg in candidate_ops.items(): + for name, op_cfg in candidates.items(): assert name not in ops if module_kwargs is not None: op_cfg.update(module_kwargs) @@ -207,7 +212,7 @@ class OneShotMutableOP(OneShotMutableModule[str, str]): Returns: Tensor: the result of forward the fixed operation. """ - return self._candidate_ops[self._chosen](x) + return self._candidates[self._chosen](x) def forward_choice(self, x: Any, choice: str) -> Tensor: """Forward with the `unfixed` mutable and current choice is not None. @@ -221,7 +226,7 @@ class OneShotMutableOP(OneShotMutableModule[str, str]): Tensor: the result of forward the ``choice`` operation. """ assert isinstance(choice, str) and choice in self.choices - return self._candidate_ops[choice](x) + return self._candidates[choice](x) def forward_all(self, x: Any) -> Tensor: """Forward all choices. Used to calculate FLOPs. @@ -233,7 +238,9 @@ class OneShotMutableOP(OneShotMutableModule[str, str]): Returns: Tensor: the result of forward all of the ``choice`` operation. """ - outputs = [op(x) for op in self._candidate_ops.values()] + outputs = list() + for op in self._candidates.values(): + outputs.append(op(x)) return sum(outputs) def fix_chosen(self, chosen: str) -> None: @@ -251,7 +258,7 @@ class OneShotMutableOP(OneShotMutableModule[str, str]): for c in self.choices: if c != chosen: - self._candidate_ops.pop(c) + self._candidates.pop(c) self._chosen = chosen self.is_fixed = True @@ -263,7 +270,7 @@ class OneShotMutableOP(OneShotMutableModule[str, str]): @property def choices(self) -> List[str]: """list: all choices. """ - return list(self._candidate_ops.keys()) + return list(self._candidates.keys()) @property def num_choices(self): @@ -275,7 +282,7 @@ class OneShotProbMutableOP(OneShotMutableOP): """Sampling candidate operation according to probability. Args: - candidate_ops (dict[str, dict]): the configs for the candidate + candidates (dict[str, dict]): the configs for the candidate operations. choice_probs (list): the probability of sampling each candidate operation. @@ -289,13 +296,13 @@ class OneShotProbMutableOP(OneShotMutableOP): """ def __init__(self, - candidate_ops: Dict[str, Dict], + candidates: Dict[str, Dict], choice_probs: list = None, module_kwargs: Optional[Dict[str, Dict]] = None, alias: Optional[str] = None, init_cfg: Optional[Dict] = None) -> None: super().__init__( - candidate_ops=candidate_ops, + candidates=candidates, module_kwargs=module_kwargs, alias=alias, init_cfg=init_cfg) @@ -306,5 +313,7 @@ class OneShotProbMutableOP(OneShotMutableOP): def sample_choice(self) -> str: """Sampling with probabilities.""" - assert len(self.choice_probs) == len(self._candidate_ops.keys()) - return random.choices(self.choices, weights=self.choice_probs, k=1)[0] + assert len(self.choice_probs) == len(self._candidates.keys()) + choice = random.choices( + self.choices, weights=self.choice_probs, k=1)[0] + return choice diff --git a/mmrazor/models/ops/__init__.py b/mmrazor/models/ops/__init__.py index a3b8c1e0..a2fd7d81 100644 --- a/mmrazor/models/ops/__init__.py +++ b/mmrazor/models/ops/__init__.py @@ -2,10 +2,12 @@ from .common import Identity from .darts_series import (DartsDilConv, DartsPoolBN, DartsSepConv, DartsSkipConnect, DartsZero) +from .efficientnet_series import ConvBnAct, DepthwiseSeparableConv from .mobilenet_series import MBBlock from .shufflenet_series import ShuffleBlock, ShuffleXception __all__ = [ 'ShuffleBlock', 'ShuffleXception', 'DartsPoolBN', 'DartsDilConv', - 'DartsSepConv', 'DartsSkipConnect', 'DartsZero', 'MBBlock', 'Identity' + 'DartsSepConv', 'DartsSkipConnect', 'DartsZero', 'MBBlock', 'Identity', + 'ConvBnAct', 'DepthwiseSeparableConv' ] diff --git a/mmrazor/models/ops/darts_series.py b/mmrazor/models/ops/darts_series.py index 2b19307a..71368f51 100644 --- a/mmrazor/models/ops/darts_series.py +++ b/mmrazor/models/ops/darts_series.py @@ -27,10 +27,7 @@ class DartsPoolBN(BaseOP): self.kernel_size, self.stride, 1, count_include_pad=False) self.bn = build_norm_layer(self.norm_cfg, self.out_channels)[1] - if use_drop_path: - self.drop_path = DropPath() - else: - self.drop_path = None + self.drop_path = DropPath() if use_drop_path else None def forward(self, x): out = self.pool(x) @@ -69,10 +66,7 @@ class DartsDilConv(BaseOP): self.in_channels, self.out_channels, 1, stride=1, bias=False), build_norm_layer(self.norm_cfg, self.in_channels)[1]) - if use_drop_path: - self.drop_path = DropPath() - else: - self.drop_path = None + self.drop_path = DropPath() if use_drop_path else None def forward(self, x): out = self.conv1(x) @@ -122,10 +116,7 @@ class DartsSepConv(BaseOP): self.out_channels, self.out_channels, 1, stride=1, bias=False), build_norm_layer(self.norm_cfg, self.out_channels)[1]) - if use_drop_path: - self.drop_path = DropPath() - else: - self.drop_path = None + self.drop_path = DropPath() if use_drop_path else None def forward(self, x): out = self.conv1(x) @@ -163,10 +154,7 @@ class DartsSkipConnect(BaseOP): bias=False) self.bn = build_norm_layer(self.norm_cfg, self.out_channels)[1] - if use_drop_path: - self.drop_path = DropPath() - else: - self.drop_path = None + self.drop_path = DropPath() if use_drop_path else None def forward(self, x): if self.stride > 1: diff --git a/mmrazor/models/ops/efficientnet_series.py b/mmrazor/models/ops/efficientnet_series.py new file mode 100644 index 00000000..de83b9d6 --- /dev/null +++ b/mmrazor/models/ops/efficientnet_series.py @@ -0,0 +1,160 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from typing import Dict, Optional + +import torch.nn as nn +from mmcls.models.utils import SELayer +from mmcv.cnn import ConvModule + +from mmrazor.registry import MODELS +from .base import BaseOP + + +@MODELS.register_module() +class ConvBnAct(BaseOP): + """ConvBnAct block from timm. + + Args: + in_channels (int): number of in channels. + out_channels (int): number of out channels. + kernel_size (int): kernel size of convolution. + stride (int, optional): stride of convolution. Defaults to 1. + dilation (int, optional): dilation rate of convolution. Defaults to 1. + padding (int, optional): padding size of convolution. Defaults to 0. + skip (bool, optional): whether using skip connect. Defaults to False. + conv_cfg (Optional[dict], optional): Config dict for convolution layer. + Default: None, which means using conv2d. + norm_cfg (Dict, optional): Config dict for normalization layer. + Default: dict(type='BN'). + act_cfg (Dict, optional):Config dict for activation layer. + Default: dict(type='ReLU'). + """ + + def __init__(self, + in_channels: int, + out_channels: int, + kernel_size: int, + stride: int = 1, + dilation: int = 1, + padding: int = 0, + skip: bool = False, + conv_cfg: Optional[dict] = None, + se_cfg: Dict = None, + norm_cfg: Dict = dict(type='BN'), + act_cfg: Dict = dict(type='ReLU')): + super().__init__( + in_channels=in_channels, out_channels=out_channels, stride=stride) + self.has_residual = skip and stride == 1 \ + and in_channels == out_channels + self.with_se = se_cfg is not None + + if self.with_se: + assert isinstance(se_cfg, dict) + self.se = SELayer(self.out_channels, **se_cfg) + + self.convModule = ConvModule( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=kernel_size, + stride=stride, + dilation=dilation, + padding=padding, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg) + + def forward(self, x): + """Forward function.""" + shortcut = x + x = self.convModule(x) + if self.has_residual: + x += shortcut + return x + + +@MODELS.register_module() +class DepthwiseSeparableConv(BaseOP): + """DepthwiseSeparable block Used for DS convs in MobileNet-V1 and in the + place of IR blocks that have no expansion (factor of 1.0). This is an + alternative to having a IR with an optional first pw conv. + + Args: + in_channels (int): number of in channels. + out_channels (int): number of out channels. + dw_kernel_size (int, optional): the kernel size of depth-wise + convolution. Defaults to 3. + stride (int, optional): stride of convolution. + Defaults to 1. + dilation (int, optional): dilation rate of convolution. + Defaults to 1. + noskip (bool, optional): whether use skip connection. + Defaults to False. + pw_kernel_size (int, optional): kernel size of point wise convolution. + Defaults to 1. + pw_act (bool, optional): whether using activation in point-wise + convolution. Defaults to False. + se_cfg (Dict, optional): _description_. Defaults to None. + conv_cfg (Optional[dict], optional): Config dict for convolution layer. + Default: None, which means using conv2d. + norm_cfg (Dict, optional): Config dict for normalization layer. + Default: dict(type='BN'). + act_cfg (Dict, optional):Config dict for activation layer. + Default: dict(type='ReLU'). + """ + + def __init__(self, + in_channels: int, + out_channels: int, + dw_kernel_size: int = 3, + stride: int = 1, + dilation: int = 1, + noskip: bool = False, + pw_kernel_size: int = 1, + pw_act: bool = False, + conv_cfg: Optional[dict] = None, + se_cfg: Dict = None, + norm_cfg: Dict = dict(type='BN'), + act_cfg: Dict = dict(type='ReLU')): + + super().__init__( + in_channels=in_channels, out_channels=out_channels, stride=stride) + self.has_residual = (stride == 1 + and in_channels == out_channels) and not noskip + self.has_pw_act = pw_act # activation after point-wise conv + + self.se_cfg = se_cfg + + self.conv_dw = ConvModule( + in_channels=in_channels, + out_channels=in_channels, + kernel_size=dw_kernel_size, + stride=stride, + dilation=dilation, + padding=dw_kernel_size // 2, + groups=in_channels, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg, + ) + + # Squeeze-and-excitation + self.se = SELayer(out_channels, ** + se_cfg) if self.se_cfg else nn.Identity() + + self.conv_pw = ConvModule( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=pw_kernel_size, + padding=pw_kernel_size // 2, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg if self.has_pw_act else None, + ) + + def forward(self, x): + shortcut = x + x = self.conv_dw(x) + x = self.se(x) + x = self.conv_pw(x) + if self.has_residual: + x += shortcut + return x diff --git a/mmrazor/models/subnet/estimators/flops.py b/mmrazor/models/subnet/estimators/flops.py index a88bcaed..2241d724 100644 --- a/mmrazor/models/subnet/estimators/flops.py +++ b/mmrazor/models/subnet/estimators/flops.py @@ -65,10 +65,10 @@ class FlopsEstimator: ... def __init__(self) -> None: ... super().__init__() ... - ... candidate_ops = nn.ModuleDict({ + ... candidates = nn.ModuleDict({ ... 'conv3x3': nn.Conv2d(3, 32, 3), ... 'conv5x5': nn.Conv2d(3, 32, 5)}) - ... self.op = OneShotMutableOP(candidate_ops) + ... self.op = OneShotMutableOP(candidates) ... self.op.current_choice = 'conv3x3' ... ... def forward(self, x: Tensor) -> Tensor: diff --git a/mmrazor/models/subnet/fix_subnet.py b/mmrazor/models/subnet/fix_subnet.py index 7d25192c..c72cc08f 100644 --- a/mmrazor/models/subnet/fix_subnet.py +++ b/mmrazor/models/subnet/fix_subnet.py @@ -90,12 +90,19 @@ class FixSubnetMixin: # In the corresponding mutable, it will check whether the `chosen` # format is correct. if isinstance(module, BaseMutable): - mutable_name = name.lstrip(prefix) - assert mutable_name in fix_modules, \ - f'{mutable_name} is not in fix_modules {fix_modules}, '\ - 'please check your `fix_subnet`.' - - chosen = fix_modules.get(mutable_name, None) + if getattr(module, 'alias', None): + alias = module.alias + assert alias in fix_modules, \ + f'The alias {alias} is not in fix_modules ' \ + f'{fix_modules}, please check your `fix_subnet`.' + chosen = fix_modules.get(alias, None) + else: + mutable_name = name.lstrip(prefix) + assert mutable_name in fix_modules, \ + f'The module name {mutable_name} is not in ' \ + f'fix_modules {fix_modules} ' \ + 'please check your `fix_subnet`.' + chosen = fix_modules.get(mutable_name, None) module.fix_chosen(chosen) # TODO support load fix channels after mr #29 merged diff --git a/tests/test_models/test_architectures/test_backbones/test_dartsbackbone.py b/tests/test_models/test_architectures/test_backbones/test_dartsbackbone.py index d939feb4..7c201673 100644 --- a/tests/test_models/test_architectures/test_backbones/test_dartsbackbone.py +++ b/tests/test_models/test_architectures/test_backbones/test_dartsbackbone.py @@ -19,7 +19,7 @@ class TestDartsBackbone(TestCase): def setUp(self) -> None: self.mutable_cfg = dict( type='DiffMutableOP', - candidate_ops=dict( + candidates=dict( torch_conv2d_3x3=dict( type='torchConv2d', kernel_size=3, @@ -96,17 +96,17 @@ class TestDartsBackbone(TestCase): tmp_dict = dict() for key, _ in model.named_modules(): - node_type = key.split('._candidate_ops')[0].split('.')[-1].split( + node_type = key.split('._candidates')[0].split('.')[-1].split( '_')[0] if node_type not in ['normal', 'reduce']: # not supported type continue - node_name = key.split('._candidate_ops')[0].split('.')[-1] + node_name = key.split('._candidates')[0].split('.')[-1] if node_name not in tmp_dict.keys(): - tmp_dict[node_name] = [key.split('._candidate_ops')[0]] + tmp_dict[node_name] = [key.split('._candidates')[0]] else: - current_key = key.split('._candidate_ops')[0] + current_key = key.split('._candidates')[0] if current_key not in tmp_dict[node_name]: tmp_dict[node_name].append(current_key) diff --git a/tests/test_models/test_mutables/test_diffop.py b/tests/test_models/test_mutables/test_diffop.py index 71ffd40e..e32e2a19 100644 --- a/tests/test_models/test_mutables/test_diffop.py +++ b/tests/test_models/test_mutables/test_diffop.py @@ -18,7 +18,7 @@ class TestDiffOP(TestCase): def test_forward_arch_param(self): op_cfg = dict( type='DiffMutableOP', - candidate_ops=dict( + candidates=dict( torch_conv2d_3x3=dict( type='torchConv2d', kernel_size=3, @@ -56,7 +56,7 @@ class TestDiffOP(TestCase): def test_forward_fixed(self): op_cfg = dict( type='DiffMutableOP', - candidate_ops=dict( + candidates=dict( torch_conv2d_3x3=dict( type='torchConv2d', kernel_size=3, @@ -84,7 +84,7 @@ class TestDiffOP(TestCase): def test_forward(self): op_cfg = dict( type='DiffMutableOP', - candidate_ops=dict( + candidates=dict( torch_conv2d_3x3=dict( type='torchConv2d', kernel_size=3, @@ -119,7 +119,7 @@ class TestDiffOP(TestCase): def test_property(self): op_cfg = dict( type='DiffMutableOP', - candidate_ops=dict( + candidates=dict( torch_conv2d_3x3=dict( type='torchConv2d', kernel_size=3, @@ -158,7 +158,7 @@ class TestDiffOP(TestCase): def test_module_kwargs(self): op_cfg = dict( type='DiffMutableOP', - candidate_ops=dict( + candidates=dict( torch_conv2d_3x3=dict( type='torchConv2d', kernel_size=3, diff --git a/tests/test_models/test_mutables/test_oneshotop.py b/tests/test_models/test_mutables/test_oneshotop.py index c669e906..3704e67a 100644 --- a/tests/test_models/test_mutables/test_oneshotop.py +++ b/tests/test_models/test_mutables/test_oneshotop.py @@ -15,7 +15,7 @@ class TestMutables(TestCase): norm_cfg = dict(type='BN', requires_grad=True) op_cfg = dict( type='OneShotMutableOP', - candidate_ops=dict( + candidates=dict( shuffle_3x3=dict( type='ShuffleBlock', norm_cfg=norm_cfg, kernel_size=3), shuffle_5x5=dict( @@ -80,7 +80,7 @@ class TestMutables(TestCase): op_cfg = dict( type='OneShotProbMutableOP', choice_probs=[0.1, 0.2, 0.3, 0.4], - candidate_ops=dict( + candidates=dict( shuffle_3x3=dict( type='ShuffleBlock', norm_cfg=norm_cfg, kernel_size=3), shuffle_5x5=dict( @@ -142,7 +142,7 @@ class TestMutables(TestCase): norm_cfg = dict(type='BN', requires_grad=True) op_cfg = dict( type='OneShotMutableOP', - candidate_ops=dict( + candidates=dict( shuffle_3x3=dict( type='ShuffleBlock', norm_cfg=norm_cfg, kernel_size=3), shuffle_5x5=dict( @@ -165,7 +165,7 @@ class TestMutables(TestCase): norm_cfg = dict(type='BN', requires_grad=True) op_cfg = dict( type='OneShotMutableOP', - candidate_ops=dict( + candidates=dict( shuffle_3x3=dict( type='ShuffleBlock', norm_cfg=norm_cfg, kernel_size=3), shuffle_5x5=dict( @@ -189,7 +189,7 @@ class TestMutables(TestCase): norm_cfg = dict(type='BN', requires_grad=True) op_cfg = dict( type='OneShotMutableOP', - candidate_ops=dict( + candidates=dict( shuffle_3x3=dict( type='ShuffleBlock', norm_cfg=norm_cfg, @@ -221,9 +221,9 @@ class TestMutables(TestCase): output = op.forward_all(input) assert output is not None - def test_candidate_ops(self): + def test_candidates(self): - candidate_ops = nn.ModuleDict({ + candidates = nn.ModuleDict({ 'conv3x3': nn.Conv2d(32, 32, 3, 1, 1), 'conv5x5': nn.Conv2d(32, 32, 5, 1, 2), 'conv7x7': nn.Conv2d(32, 32, 7, 1, 3), @@ -231,7 +231,7 @@ class TestMutables(TestCase): 'avgpool3x3': nn.AvgPool2d(3, 1, 1), }) - op_cfg = dict(type='OneShotMutableOP', candidate_ops=candidate_ops) + op_cfg = dict(type='OneShotMutableOP', candidates=candidates) op = MODELS.build(op_cfg) diff --git a/tests/test_models/test_mutators/test_diff_mutator.py b/tests/test_models/test_mutators/test_diff_mutator.py index 31a15728..0d9ada6c 100644 --- a/tests/test_models/test_mutators/test_diff_mutator.py +++ b/tests/test_models/test_mutators/test_diff_mutator.py @@ -72,12 +72,12 @@ class SearchableModelAlias(nn.Module): return self.slayer3(x) -class TestDiffMutator(TestCase): +class TestDiffModuleMutator(TestCase): def setUp(self): self.MUTABLE_CFG = dict( type='DiffMutableOP', - candidate_ops=dict( + candidates=dict( torch_conv2d_3x3=dict( type='torchConv2d', kernel_size=3, diff --git a/tests/test_models/test_mutators/test_one_shot_mutator.py b/tests/test_models/test_mutators/test_one_shot_mutator.py index 776f8899..41921a2b 100644 --- a/tests/test_models/test_mutators/test_one_shot_mutator.py +++ b/tests/test_models/test_mutators/test_one_shot_mutator.py @@ -30,7 +30,7 @@ MUTATOR_CFG = dict(type='OneShotModuleMutator') MUTABLE_CFG = dict( type='OneShotMutableOP', - candidate_ops=dict( + candidates=dict( choice1=dict( type='MBBlock', in_channels=3, diff --git a/tests/test_models/test_subnet/test_estimators/test_flops.py b/tests/test_models/test_subnet/test_estimators/test_flops.py index 416561f3..b7a9e9ca 100644 --- a/tests/test_models/test_subnet/test_estimators/test_flops.py +++ b/tests/test_models/test_subnet/test_estimators/test_flops.py @@ -13,7 +13,7 @@ from mmrazor.registry import MODELS _FIRST_STAGE_MUTABLE = dict( type='OneShotMutableOP', - candidate_ops=dict( + candidates=dict( mb_k3e1=dict( type='MBBlock', kernel_size=3, @@ -23,7 +23,7 @@ _FIRST_STAGE_MUTABLE = dict( _OTHER_STAGE_MUTABLE = dict( type='OneShotMutableOP', - candidate_ops=dict( + candidates=dict( mb_k3e3=dict( type='MBBlock', kernel_size=3, diff --git a/tools/test.py b/tools/test.py index 026f5834..70477abb 100644 --- a/tools/test.py +++ b/tools/test.py @@ -3,6 +3,10 @@ import argparse import os import os.path as osp +from mmcls.core import * # noqa: F401,F403 +from mmcls.datasets import * # noqa: F401,F403 +from mmcls.metrics import * # noqa: F401,F403 +from mmcls.models import * # noqa: F401,F403 # TODO import mmcls and mmseg from mmdet.core import * # noqa: F401,F403 from mmdet.datasets import * # noqa: F401,F403 diff --git a/tools/train.py b/tools/train.py index 594f624b..2ccff305 100644 --- a/tools/train.py +++ b/tools/train.py @@ -38,7 +38,6 @@ def parse_args(): def main(): register_all_modules(False) - args = parse_args() # load config