commit
9e5c244b01
|
@ -69,8 +69,8 @@ PARAMS=$3
|
|||
IFS=$'\n'
|
||||
# parser params from train_benchmark.txt
|
||||
sed -i 's/ -o DataLoader.Train.sampler.shuffle=False//g' $FILENAME
|
||||
sed -i 's/ -o DataLoader.Train.loader.num_workers=0//g' $FILENAME
|
||||
sed -i 's/-o DataLoader.Train.loader.use_shared_memory=False/-o Global.eval_during_train=False/g' $FILENAME
|
||||
sed -i 's/ -o DataLoader.Train.loader.num_workers=0/ -o Global.print_batch_step=1/g' $FILENAME
|
||||
sed -i 's/-o DataLoader.Train.loader.use_shared_memory=False/ -o Global.eval_during_train=False/g' $FILENAME
|
||||
dataline=`cat $FILENAME`
|
||||
# parser params
|
||||
IFS=$'\n'
|
||||
|
@ -117,10 +117,14 @@ line_profile=13
|
|||
line_eval_py=24
|
||||
line_export_py=30
|
||||
line_norm_train=16
|
||||
line_pact_train=17
|
||||
line_fgpm_train=18
|
||||
|
||||
func_sed_params "$FILENAME" "${line_eval_py}" "null"
|
||||
func_sed_params "$FILENAME" "${line_export_py}" "null"
|
||||
func_sed_params "$FILENAME" "${line_python}" "$python"
|
||||
func_sed_params "$FILENAME" "${line_pact_train}" "null"
|
||||
func_sed_params "$FILENAME" "${line_fgpm_train}" "null"
|
||||
|
||||
# if params
|
||||
if [ ! -n "$PARAMS" ] ;then
|
||||
|
|
|
@ -53,7 +53,7 @@ null:null
|
|||
===========================train_benchmark_params==========================
|
||||
batch_size:256|1536
|
||||
fp_items:fp32
|
||||
epoch:1
|
||||
epoch:2
|
||||
--profiler_options:batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile
|
||||
flags:FLAGS_eager_delete_tensor_gb=0.0;FLAGS_fraction_of_gpu_memory_to_use=0.98;FLAGS_conv_workspace_size_limit=4096
|
||||
===========================infer_benchmark_params==========================
|
||||
|
|
|
@ -51,7 +51,7 @@ inference:python/predict_cls.py -c configs/inference_cls.yaml
|
|||
null:null
|
||||
null:null
|
||||
===========================train_benchmark_params==========================
|
||||
batch_size:64|176
|
||||
batch_size:64|144
|
||||
fp_items:fp32
|
||||
epoch:1
|
||||
--profiler_options:batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile
|
||||
|
|
|
@ -0,0 +1,13 @@
|
|||
model_item=ResNet50
|
||||
bs_item=256
|
||||
fp_item=fp16
|
||||
run_process_type=SingleP
|
||||
run_mode=DP
|
||||
device_num=N1C1
|
||||
max_epochs=1
|
||||
num_workers=8
|
||||
|
||||
# get data
|
||||
bash test_tipc/static/${model_item}/benchmark_common/prepare.sh
|
||||
# run
|
||||
bash test_tipc/static/${model_item}/benchmark_common/run_benchmark.sh ${model_item} ${bs_item} ${fp_item} ${run_process_type} ${run_mode} ${device_num} ${max_epochs} ${num_workers} 2>&1;
|
|
@ -0,0 +1,17 @@
|
|||
model_item=ResNet50
|
||||
bs_item=256
|
||||
fp_item=fp32
|
||||
run_process_type=SingleP
|
||||
run_mode=DP
|
||||
device_num=N1C1
|
||||
max_epochs=1
|
||||
num_workers=8
|
||||
|
||||
# get data
|
||||
bash test_tipc/static/${model_item}/benchmark_common/prepare.sh
|
||||
# run
|
||||
bash test_tipc/static/${model_item}/benchmark_common/run_benchmark.sh ${model_item} ${bs_item} ${fp_item} ${run_process_type} ${run_mode} ${device_num} ${max_epochs} ${num_workers} 2>&1;
|
||||
# run profiling
|
||||
sleep 10;
|
||||
export PROFILING=true
|
||||
bash test_tipc/static/${model_item}/benchmark_common/run_benchmark.sh ${model_item} ${bs_item} ${fp_item} ${run_process_type} ${run_mode} ${device_num} ${max_epochs} ${num_workers} 2>&1;
|
|
@ -5,7 +5,7 @@ run_process_type=SingleP
|
|||
run_mode=DP
|
||||
device_num=N1C1
|
||||
max_epochs=1
|
||||
num_workers=4
|
||||
num_workers=8
|
||||
|
||||
# get data
|
||||
bash test_tipc/static/${model_item}/benchmark_common/prepare.sh
|
||||
|
|
|
@ -5,7 +5,7 @@ run_process_type=SingleP
|
|||
run_mode=DP
|
||||
device_num=N1C1
|
||||
max_epochs=1
|
||||
num_workers=4
|
||||
num_workers=8
|
||||
|
||||
# get data
|
||||
bash test_tipc/static/${model_item}/benchmark_common/prepare.sh
|
||||
|
|
|
@ -0,0 +1,13 @@
|
|||
model_item=ResNet50
|
||||
bs_item=256
|
||||
fp_item=fp16
|
||||
run_process_type=MultiP
|
||||
run_mode=DP
|
||||
device_num=N1C8
|
||||
max_epochs=1
|
||||
num_workers=8
|
||||
|
||||
# get data
|
||||
bash test_tipc/static/${model_item}/benchmark_common/prepare.sh
|
||||
# run
|
||||
bash test_tipc/static/${model_item}/benchmark_common/run_benchmark.sh ${model_item} ${bs_item} ${fp_item} ${run_process_type} ${run_mode} ${device_num} ${max_epochs} ${num_workers} 2>&1;
|
|
@ -0,0 +1,13 @@
|
|||
model_item=ResNet50
|
||||
bs_item=256
|
||||
fp_item=fp32
|
||||
run_process_type=MultiP
|
||||
run_mode=DP
|
||||
device_num=N1C8
|
||||
max_epochs=1
|
||||
num_workers=8
|
||||
|
||||
# get data
|
||||
bash test_tipc/static/${model_item}/benchmark_common/prepare.sh
|
||||
# run
|
||||
bash test_tipc/static/${model_item}/benchmark_common/run_benchmark.sh ${model_item} ${bs_item} ${fp_item} ${run_process_type} ${run_mode} ${device_num} ${max_epochs} ${num_workers} 2>&1;
|
|
@ -5,7 +5,7 @@ run_process_type=MultiP
|
|||
run_mode=DP
|
||||
device_num=N1C8
|
||||
max_epochs=1
|
||||
num_workers=4
|
||||
num_workers=8
|
||||
|
||||
# get data
|
||||
bash test_tipc/static/${model_item}/benchmark_common/prepare.sh
|
||||
|
|
|
@ -5,7 +5,7 @@ run_process_type=MultiP
|
|||
run_mode=DP
|
||||
device_num=N1C8
|
||||
max_epochs=1
|
||||
num_workers=4
|
||||
num_workers=8
|
||||
|
||||
# get data
|
||||
bash test_tipc/static/${model_item}/benchmark_common/prepare.sh
|
||||
|
|
|
@ -46,7 +46,7 @@ function _train(){
|
|||
log_file=${profiling_log_file}
|
||||
fi
|
||||
|
||||
train_cmd="${config_file} -o DataLoader.Train.sampler.batch_size=${base_batch_size} -o Global.epochs=${max_epochs} -o DataLoader.Train.loader.num_workers=${num_workers} ${profiling_config}"
|
||||
train_cmd="${config_file} -o DataLoader.Train.sampler.batch_size=${base_batch_size} -o Global.epochs=${max_epochs} -o DataLoader.Train.loader.num_workers=${num_workers} ${profiling_config} -o Global.eval_during_train=False"
|
||||
# 以下为通用执行命令,无特殊可不用修改
|
||||
case ${run_process_type} in
|
||||
SingleP)
|
||||
|
@ -69,7 +69,19 @@ function _train(){
|
|||
fi
|
||||
cd ../
|
||||
}
|
||||
|
||||
function _set_env(){
|
||||
#开启gc
|
||||
export FLAGS_eager_delete_tensor_gb=0.0
|
||||
export FLAGS_fraction_of_gpu_memory_to_use=0.98
|
||||
####
|
||||
export FLAGS_cudnn_exhaustive_search=1
|
||||
export FLAGS_conv_workspace_size_limit=4000 #MB
|
||||
}
|
||||
|
||||
|
||||
source ${BENCHMARK_ROOT}/scripts/run_model.sh # 在该脚本中会对符合benchmark规范的log使用analysis.py 脚本进行性能数据解析;如果不联调只想要产出训练log可以注掉本行,提交时需打开
|
||||
_set_params $@
|
||||
# _train # 如果只产出训练log,不解析,可取消注释
|
||||
_set_env
|
||||
_run # 该函数在run_model.sh中,执行时会调用_train; 如果不联调只产出训练log可以注掉本行,提交时需打开
|
||||
|
|
Loading…
Reference in New Issue