Merge pull request #1743 from RainFrost1/benchmark_bug

Benchmark bug
2022-03-09 11:47:19 +08:00 · 2022-03-09 11:47:19 +08:00 · 9e5c244b01
parent abbe1b85c1 7bb9f8c891
commit 9e5c244b01
12 changed files with 83 additions and 11 deletions
--- a/test_tipc/benchmark_train.sh
+++ b/test_tipc/benchmark_train.sh
@ -69,8 +69,8 @@ PARAMS=$3
 IFS=$'\n'
 # parser params from train_benchmark.txt
 sed -i 's/ -o DataLoader.Train.sampler.shuffle=False//g' $FILENAME
-sed -i 's/ -o DataLoader.Train.loader.num_workers=0//g' $FILENAME
-sed -i 's/-o DataLoader.Train.loader.use_shared_memory=False/-o Global.eval_during_train=False/g' $FILENAME
+sed -i 's/ -o DataLoader.Train.loader.num_workers=0/ -o Global.print_batch_step=1/g' $FILENAME
+sed -i 's/-o DataLoader.Train.loader.use_shared_memory=False/ -o Global.eval_during_train=False/g' $FILENAME
 dataline=`cat $FILENAME`
 # parser params
 IFS=$'\n'
@ -117,10 +117,14 @@ line_profile=13
 line_eval_py=24
 line_export_py=30
 line_norm_train=16
+line_pact_train=17
+line_fgpm_train=18

 func_sed_params "$FILENAME" "${line_eval_py}" "null"
 func_sed_params "$FILENAME" "${line_export_py}" "null"
 func_sed_params "$FILENAME" "${line_python}"  "$python"
+func_sed_params "$FILENAME" "${line_pact_train}" "null"
+func_sed_params "$FILENAME" "${line_fgpm_train}" "null"

 # if params
 if  [ ! -n "$PARAMS" ] ;then
--- a/test_tipc/config/ShuffleNet/ShuffleNetV2_x1_0_train_infer_python.txt
+++ b/test_tipc/config/ShuffleNet/ShuffleNetV2_x1_0_train_infer_python.txt
@ -53,7 +53,7 @@ null:null
 ===========================train_benchmark_params==========================
 batch_size:256|1536
 fp_items:fp32
-epoch:1
+epoch:2
 --profiler_options:batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile
 flags:FLAGS_eager_delete_tensor_gb=0.0;FLAGS_fraction_of_gpu_memory_to_use=0.98;FLAGS_conv_workspace_size_limit=4096
 ===========================infer_benchmark_params==========================
--- a/test_tipc/config/Twins/alt_gvt_base_train_infer_python.txt
+++ b/test_tipc/config/Twins/alt_gvt_base_train_infer_python.txt
@ -51,7 +51,7 @@ inference:python/predict_cls.py -c configs/inference_cls.yaml
 null:null
 null:null
 ===========================train_benchmark_params==========================
-batch_size:64|176
+batch_size:64|144
 fp_items:fp32
 epoch:1
 --profiler_options:batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile
--- a/test_tipc/static/ResNet50/N1C1/ResNet50_bs256_fp16_SingleP_DP.sh
+++ b/test_tipc/static/ResNet50/N1C1/ResNet50_bs256_fp16_SingleP_DP.sh
@ -0,0 +1,13 @@
+model_item=ResNet50
+bs_item=256
+fp_item=fp16
+run_process_type=SingleP
+run_mode=DP
+device_num=N1C1
+max_epochs=1
+num_workers=8
+
+# get data
+bash test_tipc/static/${model_item}/benchmark_common/prepare.sh
+# run
+bash test_tipc/static/${model_item}/benchmark_common/run_benchmark.sh ${model_item} ${bs_item} ${fp_item} ${run_process_type} ${run_mode} ${device_num} ${max_epochs} ${num_workers} 2>&1;
--- a/test_tipc/static/ResNet50/N1C1/ResNet50_bs256_fp32_SingleP_DP.sh
+++ b/test_tipc/static/ResNet50/N1C1/ResNet50_bs256_fp32_SingleP_DP.sh
@ -0,0 +1,17 @@
+model_item=ResNet50
+bs_item=256
+fp_item=fp32
+run_process_type=SingleP
+run_mode=DP
+device_num=N1C1
+max_epochs=1
+num_workers=8
+
+# get data
+bash test_tipc/static/${model_item}/benchmark_common/prepare.sh
+# run
+bash test_tipc/static/${model_item}/benchmark_common/run_benchmark.sh ${model_item} ${bs_item} ${fp_item} ${run_process_type} ${run_mode} ${device_num} ${max_epochs} ${num_workers} 2>&1;
+# run profiling
+sleep 10;
+export PROFILING=true
+bash test_tipc/static/${model_item}/benchmark_common/run_benchmark.sh ${model_item} ${bs_item} ${fp_item} ${run_process_type} ${run_mode} ${device_num} ${max_epochs} ${num_workers} 2>&1;
--- a/test_tipc/static/ResNet50/N1C1/ResNet50_bs64_fp16_SingleP_DP.sh
+++ b/test_tipc/static/ResNet50/N1C1/ResNet50_bs64_fp16_SingleP_DP.sh
@ -5,7 +5,7 @@ run_process_type=SingleP
 run_mode=DP
 device_num=N1C1
 max_epochs=1
-num_workers=4
+num_workers=8

 # get data
 bash test_tipc/static/${model_item}/benchmark_common/prepare.sh
--- a/test_tipc/static/ResNet50/N1C1/ResNet50_bs64_fp32_SingleP_DP.sh
+++ b/test_tipc/static/ResNet50/N1C1/ResNet50_bs64_fp32_SingleP_DP.sh
@ -5,7 +5,7 @@ run_process_type=SingleP
 run_mode=DP
 device_num=N1C1
 max_epochs=1
-num_workers=4
+num_workers=8

 # get data
 bash test_tipc/static/${model_item}/benchmark_common/prepare.sh
--- a/test_tipc/static/ResNet50/N1C8/ResNet50_bs256_fp16_MultiP_DP.sh
+++ b/test_tipc/static/ResNet50/N1C8/ResNet50_bs256_fp16_MultiP_DP.sh
@ -0,0 +1,13 @@
+model_item=ResNet50
+bs_item=256
+fp_item=fp16
+run_process_type=MultiP
+run_mode=DP
+device_num=N1C8
+max_epochs=1
+num_workers=8
+
+# get data
+bash test_tipc/static/${model_item}/benchmark_common/prepare.sh
+# run
+bash test_tipc/static/${model_item}/benchmark_common/run_benchmark.sh ${model_item} ${bs_item} ${fp_item} ${run_process_type} ${run_mode} ${device_num} ${max_epochs} ${num_workers} 2>&1;
--- a/test_tipc/static/ResNet50/N1C8/ResNet50_bs256_fp32_MultiP_DP.sh
+++ b/test_tipc/static/ResNet50/N1C8/ResNet50_bs256_fp32_MultiP_DP.sh
@ -0,0 +1,13 @@
+model_item=ResNet50
+bs_item=256
+fp_item=fp32
+run_process_type=MultiP
+run_mode=DP
+device_num=N1C8
+max_epochs=1
+num_workers=8
+
+# get data
+bash test_tipc/static/${model_item}/benchmark_common/prepare.sh
+# run
+bash test_tipc/static/${model_item}/benchmark_common/run_benchmark.sh ${model_item} ${bs_item} ${fp_item} ${run_process_type} ${run_mode} ${device_num} ${max_epochs} ${num_workers} 2>&1;
--- a/test_tipc/static/ResNet50/N1C8/ResNet50_bs64_fp16_MultiP_DP.sh
+++ b/test_tipc/static/ResNet50/N1C8/ResNet50_bs64_fp16_MultiP_DP.sh
@ -5,7 +5,7 @@ run_process_type=MultiP
 run_mode=DP
 device_num=N1C8
 max_epochs=1
-num_workers=4
+num_workers=8

 # get data
 bash test_tipc/static/${model_item}/benchmark_common/prepare.sh
--- a/test_tipc/static/ResNet50/N1C8/ResNet50_bs64_fp32_MultiP_DP.sh
+++ b/test_tipc/static/ResNet50/N1C8/ResNet50_bs64_fp32_MultiP_DP.sh
@ -5,7 +5,7 @@ run_process_type=MultiP
 run_mode=DP
 device_num=N1C8
 max_epochs=1
-num_workers=4
+num_workers=8

 # get data
 bash test_tipc/static/${model_item}/benchmark_common/prepare.sh
--- a/test_tipc/static/ResNet50/benchmark_common/run_benchmark.sh
+++ b/test_tipc/static/ResNet50/benchmark_common/run_benchmark.sh
@ -46,7 +46,7 @@ function _train(){
 	log_file=${profiling_log_file}
    fi

-    train_cmd="${config_file} -o DataLoader.Train.sampler.batch_size=${base_batch_size} -o Global.epochs=${max_epochs} -o DataLoader.Train.loader.num_workers=${num_workers} ${profiling_config}"
+    train_cmd="${config_file} -o DataLoader.Train.sampler.batch_size=${base_batch_size} -o Global.epochs=${max_epochs} -o DataLoader.Train.loader.num_workers=${num_workers} ${profiling_config} -o Global.eval_during_train=False"
 #   以下为通用执行命令，无特殊可不用修改
    case ${run_process_type} in
    SingleP) 
@ -69,7 +69,19 @@ function _train(){
    fi
    cd ../
 }
+
+function _set_env(){
+    #开启gc
+    export FLAGS_eager_delete_tensor_gb=0.0
+    export FLAGS_fraction_of_gpu_memory_to_use=0.98
+    ####
+    export FLAGS_cudnn_exhaustive_search=1
+    export FLAGS_conv_workspace_size_limit=4000 #MB
+}
+
+
 source ${BENCHMARK_ROOT}/scripts/run_model.sh   # 在该脚本中会对符合benchmark规范的log使用analysis.py 脚本进行性能数据解析;如果不联调只想要产出训练log可以注掉本行,提交时需打开
 _set_params $@
 # _train       # 如果只产出训练log,不解析,可取消注释
+_set_env
 _run     # 该函数在run_model.sh中,执行时会调用_train; 如果不联调只产出训练log可以注掉本行,提交时需打开