support specify model_type in config

such as norm_train and to_static_train
pull/2648/head
gaotingquan 2023-01-18 09:09:30 +00:00 committed by Tingquan Gao
parent 811b483e30
commit 2d66aeeb77
44 changed files with 237 additions and 168 deletions

View File

@ -90,6 +90,8 @@ line_num=`expr $line_num + 1`
fp_items=$(func_parser_value "${lines[line_num]}") fp_items=$(func_parser_value "${lines[line_num]}")
line_num=`expr $line_num + 1` line_num=`expr $line_num + 1`
epoch=$(func_parser_value "${lines[line_num]}") epoch=$(func_parser_value "${lines[line_num]}")
line_num=`expr $line_num + 1`
model_type=$(func_parser_value "${lines[line_num]}")
line_num=`expr $line_num + 1` line_num=`expr $line_num + 1`
profile_option_key=$(func_parser_key "${lines[line_num]}") profile_option_key=$(func_parser_key "${lines[line_num]}")
@ -118,6 +120,7 @@ line_gpuid=4
line_precision=6 line_precision=6
line_epoch=7 line_epoch=7
line_batchsize=9 line_batchsize=9
line_model_type=15
line_profile=13 line_profile=13
line_eval_py=24 line_eval_py=24
line_export_py=30 line_export_py=30
@ -138,6 +141,7 @@ if [[ ! -n "$PARAMS" ]];then
batch_size_list=(${batch_size}) batch_size_list=(${batch_size})
fp_items_list=(${fp_items}) fp_items_list=(${fp_items})
device_num_list=(N1C4) device_num_list=(N1C4)
model_type_list=(${model_type})
run_mode="DP" run_mode="DP"
elif [[ ${PARAMS} = "dynamicTostatic" ]];then elif [[ ${PARAMS} = "dynamicTostatic" ]];then
IFS="|" IFS="|"
@ -165,14 +169,13 @@ else
fp_items_list=($precision) fp_items_list=($precision)
batch_size_list=($batch_size) batch_size_list=($batch_size)
device_num_list=($device_num) device_num_list=($device_num)
fi
# for log name # parse "to_static" options and modify trainer into "to_static_trainer"
to_static="" if [[ ${model_type} = "dynamicTostatic" ]];then
# parse "to_static" options and modify trainer into "to_static_trainer" model_type_list="to_static_train"
if [[ ${model_type} = "dynamicTostatic" ]];then else
to_static="d2sT_" model_type_list="norm_train"
sed -i 's/trainer:norm_train/trainer:to_static_train/g' $FILENAME fi
fi fi
@ -180,131 +183,142 @@ IFS="|"
for batch_size in ${batch_size_list[*]}; do for batch_size in ${batch_size_list[*]}; do
for precision in ${fp_items_list[*]}; do for precision in ${fp_items_list[*]}; do
for device_num in ${device_num_list[*]}; do for device_num in ${device_num_list[*]}; do
# sed batchsize and precision for model_type in ${model_type_list[*]}; do
func_sed_params "$FILENAME" "${line_precision}" "$precision" # sed batchsize and precision
func_sed_params "$FILENAME" "${line_batchsize}" "$batch_size" func_sed_params "$FILENAME" "${line_precision}" "$precision"
func_sed_params "$FILENAME" "${line_epoch}" "$epoch" func_sed_params "$FILENAME" "${line_batchsize}" "$batch_size"
gpu_id=$(set_gpu_id $device_num) func_sed_params "$FILENAME" "${line_epoch}" "$epoch"
func_sed_params "$FILENAME" "${line_model_type}" "$model_type"
# It is needed that using dali, NHWC and 4 channels when training ResNet50 with AMPO2 # for log name
if [[ $model_name == "ResNet50" && $precision == "fp16" ]]; then if [[ ${model_type} = "to_static_train" ]];then
sed -i "s/ResNet50.yaml/ResNet50_amp_O2_ultra.yaml/g" $FILENAME to_static="d2sT_"
fi
# if bs is big, then copy train_list.txt to generate more train log
# At least 25 log number would be good to calculate ips for benchmark system.
# So the copy number for train_list is as follows:
total_batch_size=`echo $[$batch_size*${device_num:1:1}*${device_num:3:3}]`
if [[ $model_name == *GeneralRecognition* ]]; then
cd dataset/
train_list_length=`cat train_reg_all_data.txt | wc -l`
copy_num=`echo $[25*10*$total_batch_size/$train_list_length]`
if [[ $copy_num -gt 1 ]];then
rm -rf train_reg_all_data.txt
for ((i=1; i <=$copy_num; i++));do
cat tipc_shitu_demo_data/demo_train.txt >> train_reg_all_data.txt
done
fi
cd ..
else
cd dataset/ILSVRC2012
val_list_length=`cat val_list.txt | wc -l`
copy_num=`echo $[25*10*$total_batch_size/$val_list_length]`
rm -rf train_list.txt
if [[ $copy_num -gt 1 ]];then
for ((i=1; i <=$copy_num; i++));do
cat val_list.txt >> train_list.txt
done
else else
ln -s val_list.txt train_list.txt to_static=""
fi fi
cd ../../
fi
if [[ ${#gpu_id} -le 1 ]];then gpu_id=$(set_gpu_id $device_num)
log_path="$SAVE_LOG/profiling_log"
mkdir -p $log_path
log_name="${repo_name}_${model_name}_bs${batch_size}_${precision}_${run_mode}_${device_num}_${to_static}profiling"
func_sed_params "$FILENAME" "${line_gpuid}" "0" # sed used gpu_id
# set profile_option params
tmp=`sed -i "${line_profile}s/.*/${profile_option}/" "${FILENAME}"`
# run test_train_inference_python.sh # It is needed that using dali, NHWC and 4 channels when training ResNet50 with AMPO2
cmd="bash test_tipc/test_train_inference_python.sh ${FILENAME} benchmark_train > ${log_path}/${log_name} 2>&1 " if [[ $model_name == "ResNet50" && $precision == "fp16" ]]; then
echo $cmd sed -i "s/ResNet50.yaml/ResNet50_amp_O2_ultra.yaml/g" $FILENAME
eval $cmd fi
eval "cat ${log_path}/${log_name}"
# without profile # if bs is big, then copy train_list.txt to generate more train log
log_path="$SAVE_LOG/train_log" # At least 25 log number would be good to calculate ips for benchmark system.
speed_log_path="$SAVE_LOG/index" # So the copy number for train_list is as follows:
mkdir -p $log_path total_batch_size=`echo $[$batch_size*${device_num:1:1}*${device_num:3:3}]`
mkdir -p $speed_log_path if [[ $model_name == *GeneralRecognition* ]]; then
log_name="${repo_name}_${model_name}_bs${batch_size}_${precision}_${run_mode}_${device_num}_${to_static}log" cd dataset/
speed_log_name="${repo_name}_${model_name}_bs${batch_size}_${precision}_${run_mode}_${device_num}_${to_static}speed" train_list_length=`cat train_reg_all_data.txt | wc -l`
func_sed_params "$FILENAME" "${line_profile}" "null" # sed profile_id as null copy_num=`echo $[25*10*$total_batch_size/$train_list_length]`
cmd="bash test_tipc/test_train_inference_python.sh ${FILENAME} benchmark_train > ${log_path}/${log_name} 2>&1 " if [[ $copy_num -gt 1 ]];then
echo $cmd rm -rf train_reg_all_data.txt
job_bt=`date '+%Y%m%d%H%M%S'` for ((i=1; i <=$copy_num; i++));do
eval $cmd cat tipc_shitu_demo_data/demo_train.txt >> train_reg_all_data.txt
job_et=`date '+%Y%m%d%H%M%S'` done
export model_run_time=$((${job_et}-${job_bt})) fi
eval "cat ${log_path}/${log_name}" cd ..
else
cd dataset/ILSVRC2012
val_list_length=`cat val_list.txt | wc -l`
copy_num=`echo $[25*10*$total_batch_size/$val_list_length]`
rm -rf train_list.txt
if [[ $copy_num -gt 1 ]];then
for ((i=1; i <=$copy_num; i++));do
cat val_list.txt >> train_list.txt
done
else
ln -s val_list.txt train_list.txt
fi
cd ../../
fi
# parser log if [[ ${#gpu_id} -le 1 ]];then
_model_name="${model_name}_bs${batch_size}_${precision}_${run_mode}" log_path="$SAVE_LOG/profiling_log"
cmd="${python} ${BENCHMARK_ROOT}/scripts/analysis.py --filename ${log_path}/${log_name} \ mkdir -p $log_path
--speed_log_file '${speed_log_path}/${speed_log_name}' \ log_name="${repo_name}_${model_name}_bs${batch_size}_${precision}_${run_mode}_${device_num}_${to_static}profiling"
--model_name ${_model_name} \ func_sed_params "$FILENAME" "${line_gpuid}" "0" # sed used gpu_id
--base_batch_size ${batch_size} \ # set profile_option params
--run_mode ${run_mode} \ tmp=`sed -i "${line_profile}s/.*/${profile_option}/" "${FILENAME}"`
--fp_item ${precision} \
--keyword ips: \
--skip_steps 100 \
--device_num ${device_num} \
--speed_unit samples/s \
--convergence_key loss: "
echo $cmd
eval $cmd
last_status=${PIPESTATUS[0]}
status_check $last_status "${cmd}" "${status_log}" "${model_name}"
else
IFS=";"
unset_env=`unset CUDA_VISIBLE_DEVICES`
log_path="$SAVE_LOG/train_log"
speed_log_path="$SAVE_LOG/index"
mkdir -p $log_path
mkdir -p $speed_log_path
log_name="${repo_name}_${model_name}_bs${batch_size}_${precision}_${run_mode}_${device_num}_${to_static}log"
speed_log_name="${repo_name}_${model_name}_bs${batch_size}_${precision}_${run_mode}_${device_num}_${to_static}speed"
func_sed_params "$FILENAME" "${line_gpuid}" "$gpu_id" # sed used gpu_id
func_sed_params "$FILENAME" "${line_profile}" "null" # sed --profile_option as null
cmd="bash test_tipc/test_train_inference_python.sh ${FILENAME} benchmark_train > ${log_path}/${log_name} 2>&1 "
echo $cmd
job_bt=`date '+%Y%m%d%H%M%S'`
eval $cmd
job_et=`date '+%Y%m%d%H%M%S'`
export model_run_time=$((${job_et}-${job_bt}))
eval "cat ${log_path}/${log_name}"
# parser log
_model_name="${model_name}_bs${batch_size}_${precision}_${run_mode}"
cmd="${python} ${BENCHMARK_ROOT}/scripts/analysis.py --filename ${log_path}/${log_name} \ # run test_train_inference_python.sh
--speed_log_file '${speed_log_path}/${speed_log_name}' \ cmd="bash test_tipc/test_train_inference_python.sh ${FILENAME} benchmark_train > ${log_path}/${log_name} 2>&1 "
--model_name ${_model_name} \ echo $cmd
--base_batch_size ${batch_size} \ eval $cmd
--run_mode ${run_mode} \ eval "cat ${log_path}/${log_name}"
--fp_item ${precision} \
--keyword ips: \ # without profile
--skip_steps 100 \ log_path="$SAVE_LOG/train_log"
--device_num ${device_num} \ speed_log_path="$SAVE_LOG/index"
--speed_unit images/s \ mkdir -p $log_path
--convergence_key loss: " mkdir -p $speed_log_path
echo $cmd log_name="${repo_name}_${model_name}_bs${batch_size}_${precision}_${run_mode}_${device_num}_${to_static}log"
eval $cmd speed_log_name="${repo_name}_${model_name}_bs${batch_size}_${precision}_${run_mode}_${device_num}_${to_static}speed"
last_status=${PIPESTATUS[0]} func_sed_params "$FILENAME" "${line_profile}" "null" # sed profile_id as null
status_check $last_status "${cmd}" "${status_log}" "${model_name}" cmd="bash test_tipc/test_train_inference_python.sh ${FILENAME} benchmark_train > ${log_path}/${log_name} 2>&1 "
fi echo $cmd
job_bt=`date '+%Y%m%d%H%M%S'`
eval $cmd
job_et=`date '+%Y%m%d%H%M%S'`
export model_run_time=$((${job_et}-${job_bt}))
eval "cat ${log_path}/${log_name}"
# parser log
_model_name="${model_name}_bs${batch_size}_${precision}_${run_mode}"
cmd="${python} ${BENCHMARK_ROOT}/scripts/analysis.py --filename ${log_path}/${log_name} \
--speed_log_file '${speed_log_path}/${speed_log_name}' \
--model_name ${_model_name} \
--base_batch_size ${batch_size} \
--run_mode ${run_mode} \
--fp_item ${precision} \
--keyword ips: \
--skip_steps 100 \
--device_num ${device_num} \
--speed_unit samples/s \
--convergence_key loss: "
echo $cmd
eval $cmd
last_status=${PIPESTATUS[0]}
status_check $last_status "${cmd}" "${status_log}" "${model_name}"
else
IFS=";"
unset_env=`unset CUDA_VISIBLE_DEVICES`
log_path="$SAVE_LOG/train_log"
speed_log_path="$SAVE_LOG/index"
mkdir -p $log_path
mkdir -p $speed_log_path
log_name="${repo_name}_${model_name}_bs${batch_size}_${precision}_${run_mode}_${device_num}_${to_static}log"
speed_log_name="${repo_name}_${model_name}_bs${batch_size}_${precision}_${run_mode}_${device_num}_${to_static}speed"
func_sed_params "$FILENAME" "${line_gpuid}" "$gpu_id" # sed used gpu_id
func_sed_params "$FILENAME" "${line_profile}" "null" # sed --profile_option as null
cmd="bash test_tipc/test_train_inference_python.sh ${FILENAME} benchmark_train > ${log_path}/${log_name} 2>&1 "
echo $cmd
job_bt=`date '+%Y%m%d%H%M%S'`
eval $cmd
job_et=`date '+%Y%m%d%H%M%S'`
export model_run_time=$((${job_et}-${job_bt}))
eval "cat ${log_path}/${log_name}"
# parser log
_model_name="${model_name}_bs${batch_size}_${precision}_${run_mode}"
cmd="${python} ${BENCHMARK_ROOT}/scripts/analysis.py --filename ${log_path}/${log_name} \
--speed_log_file '${speed_log_path}/${speed_log_name}' \
--model_name ${_model_name} \
--base_batch_size ${batch_size} \
--run_mode ${run_mode} \
--fp_item ${precision} \
--keyword ips: \
--skip_steps 100 \
--device_num ${device_num} \
--speed_unit images/s \
--convergence_key loss: "
echo $cmd
eval $cmd
last_status=${PIPESTATUS[0]}
status_check $last_status "${cmd}" "${status_log}" "${model_name}"
fi
done
done done
done done
done done

View File

@ -54,6 +54,7 @@ null:null
batch_size:128 batch_size:128
fp_items:fp32 fp_items:fp32
epoch:1 epoch:1
model_type:norm_train
--profiler_options:batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile --profiler_options:batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile
flags:FLAGS_eager_delete_tensor_gb=0.0;FLAGS_fraction_of_gpu_memory_to_use=0.98;FLAGS_conv_workspace_size_limit=4096 flags:FLAGS_eager_delete_tensor_gb=0.0;FLAGS_fraction_of_gpu_memory_to_use=0.98;FLAGS_conv_workspace_size_limit=4096
===========================infer_benchmark_params========================== ===========================infer_benchmark_params==========================

View File

@ -50,5 +50,12 @@ inference:python/predict_cls.py -c configs/inference_cls.yaml
-o Global.benchmark:False -o Global.benchmark:False
null:null null:null
null:null null:null
===========================train_benchmark_params==========================
batch_size:64
fp_items:fp32|fp16
epoch:1
model_type:norm_train|to_static_train
--profiler_options:batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile
flags:FLAGS_eager_delete_tensor_gb=0.0;FLAGS_fraction_of_gpu_memory_to_use=0.98;FLAGS_conv_workspace_size_limit=4096
===========================infer_benchmark_params========================== ===========================infer_benchmark_params==========================
random_infer_input:[{float32,[3,224,224]}] random_infer_input:[{float32,[3,224,224]}]

View File

@ -54,6 +54,7 @@ null:null
batch_size:256 batch_size:256
fp_items:fp32|fp16 fp_items:fp32|fp16
epoch:1 epoch:1
model_type:norm_train
--profiler_options:batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile --profiler_options:batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile
flags:FLAGS_eager_delete_tensor_gb=0.0;FLAGS_fraction_of_gpu_memory_to_use=0.98;FLAGS_conv_workspace_size_limit=4096 flags:FLAGS_eager_delete_tensor_gb=0.0;FLAGS_fraction_of_gpu_memory_to_use=0.98;FLAGS_conv_workspace_size_limit=4096
===========================infer_benchmark_params========================== ===========================infer_benchmark_params==========================

View File

@ -54,6 +54,7 @@ null:null
batch_size:128 batch_size:128
fp_items:fp32|fp16 fp_items:fp32|fp16
epoch:1 epoch:1
model_type:norm_train
--profiler_options:batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile --profiler_options:batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile
flags:FLAGS_eager_delete_tensor_gb=0.0;FLAGS_fraction_of_gpu_memory_to_use=0.98;FLAGS_conv_workspace_size_limit=4096 flags:FLAGS_eager_delete_tensor_gb=0.0;FLAGS_fraction_of_gpu_memory_to_use=0.98;FLAGS_conv_workspace_size_limit=4096
===========================infer_benchmark_params========================== ===========================infer_benchmark_params==========================

View File

@ -54,6 +54,7 @@ null:null
batch_size:64 batch_size:64
fp_items:fp32|fp16 fp_items:fp32|fp16
epoch:1 epoch:1
model_type:norm_train
--profiler_options:batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile --profiler_options:batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile
flags:FLAGS_eager_delete_tensor_gb=0.0;FLAGS_fraction_of_gpu_memory_to_use=0.98;FLAGS_conv_workspace_size_limit=4096 flags:FLAGS_eager_delete_tensor_gb=0.0;FLAGS_fraction_of_gpu_memory_to_use=0.98;FLAGS_conv_workspace_size_limit=4096
===========================infer_benchmark_params========================== ===========================infer_benchmark_params==========================

View File

@ -54,6 +54,7 @@ null:null
batch_size:64|128 batch_size:64|128
fp_items:fp32 fp_items:fp32
epoch:1 epoch:1
model_type:norm_train
--profiler_options:batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile --profiler_options:batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile
flags:FLAGS_eager_delete_tensor_gb=0.0;FLAGS_fraction_of_gpu_memory_to_use=0.98;FLAGS_conv_workspace_size_limit=4096 flags:FLAGS_eager_delete_tensor_gb=0.0;FLAGS_fraction_of_gpu_memory_to_use=0.98;FLAGS_conv_workspace_size_limit=4096
===========================infer_benchmark_params========================== ===========================infer_benchmark_params==========================

View File

@ -54,6 +54,7 @@ null:null
batch_size:64|128 batch_size:64|128
fp_items:fp32|fp16 fp_items:fp32|fp16
epoch:1 epoch:1
model_type:norm_train
--profiler_options:batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile --profiler_options:batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile
flags:FLAGS_eager_delete_tensor_gb=0.0;FLAGS_fraction_of_gpu_memory_to_use=0.98;FLAGS_conv_workspace_size_limit=4096 flags:FLAGS_eager_delete_tensor_gb=0.0;FLAGS_fraction_of_gpu_memory_to_use=0.98;FLAGS_conv_workspace_size_limit=4096
===========================infer_benchmark_params========================== ===========================infer_benchmark_params==========================

View File

@ -54,6 +54,7 @@ null:null
batch_size:64|128 batch_size:64|128
fp_items:fp32|fp16 fp_items:fp32|fp16
epoch:1 epoch:1
model_type:norm_train
--profiler_options:batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile --profiler_options:batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile
flags:FLAGS_eager_delete_tensor_gb=0.0;FLAGS_fraction_of_gpu_memory_to_use=0.98;FLAGS_conv_workspace_size_limit=4096 flags:FLAGS_eager_delete_tensor_gb=0.0;FLAGS_fraction_of_gpu_memory_to_use=0.98;FLAGS_conv_workspace_size_limit=4096
===========================infer_benchmark_params========================== ===========================infer_benchmark_params==========================

View File

@ -54,6 +54,7 @@ null:null
batch_size:256|640 batch_size:256|640
fp_items:fp32|fp16 fp_items:fp32|fp16
epoch:1 epoch:1
model_type:norm_train|to_static_train
--profiler_options:batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile --profiler_options:batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile
flags:FLAGS_eager_delete_tensor_gb=0.0;FLAGS_fraction_of_gpu_memory_to_use=0.98;FLAGS_conv_workspace_size_limit=4096 flags:FLAGS_eager_delete_tensor_gb=0.0;FLAGS_fraction_of_gpu_memory_to_use=0.98;FLAGS_conv_workspace_size_limit=4096
===========================infer_benchmark_params========================== ===========================infer_benchmark_params==========================

View File

@ -54,6 +54,7 @@ null:null
batch_size:256|640 batch_size:256|640
fp_items:fp32 fp_items:fp32
epoch:1 epoch:1
model_type:norm_train
--profiler_options:batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile --profiler_options:batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile
flags:FLAGS_eager_delete_tensor_gb=0.0;FLAGS_fraction_of_gpu_memory_to_use=0.98;FLAGS_conv_workspace_size_limit=4096 flags:FLAGS_eager_delete_tensor_gb=0.0;FLAGS_fraction_of_gpu_memory_to_use=0.98;FLAGS_conv_workspace_size_limit=4096
===========================infer_benchmark_params========================== ===========================infer_benchmark_params==========================

View File

@ -54,6 +54,7 @@ null:null
batch_size:256|640 batch_size:256|640
fp_items:fp32 fp_items:fp32
epoch:1 epoch:1
model_type:norm_train
--profiler_options:batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile --profiler_options:batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile
flags:FLAGS_eager_delete_tensor_gb=0.0;FLAGS_fraction_of_gpu_memory_to_use=0.98;FLAGS_conv_workspace_size_limit=4096 flags:FLAGS_eager_delete_tensor_gb=0.0;FLAGS_fraction_of_gpu_memory_to_use=0.98;FLAGS_conv_workspace_size_limit=4096
===========================infer_benchmark_params========================== ===========================infer_benchmark_params==========================

View File

@ -54,6 +54,7 @@ null:null
batch_size:256|640 batch_size:256|640
fp_items:fp32 fp_items:fp32
epoch:1 epoch:1
model_type:norm_train
--profiler_options:batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile --profiler_options:batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile
flags:FLAGS_eager_delete_tensor_gb=0.0;FLAGS_fraction_of_gpu_memory_to_use=0.98;FLAGS_conv_workspace_size_limit=4096 flags:FLAGS_eager_delete_tensor_gb=0.0;FLAGS_fraction_of_gpu_memory_to_use=0.98;FLAGS_conv_workspace_size_limit=4096
===========================infer_benchmark_params========================== ===========================infer_benchmark_params==========================

View File

@ -54,6 +54,7 @@ null:null
batch_size:128 batch_size:128
fp_items:fp32 fp_items:fp32
epoch:1 epoch:1
model_type:norm_train
--profiler_options:batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile --profiler_options:batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile
flags:FLAGS_eager_delete_tensor_gb=0.0;FLAGS_fraction_of_gpu_memory_to_use=0.98;FLAGS_conv_workspace_size_limit=4096 flags:FLAGS_eager_delete_tensor_gb=0.0;FLAGS_fraction_of_gpu_memory_to_use=0.98;FLAGS_conv_workspace_size_limit=4096
===========================infer_benchmark_params========================== ===========================infer_benchmark_params==========================

View File

@ -53,6 +53,7 @@ null:null
batch_size:128 batch_size:128
fp_items:fp32|fp16 fp_items:fp32|fp16
epoch:1 epoch:1
model_type:norm_train
--profiler_options:batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile --profiler_options:batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile
flags:FLAGS_eager_delete_tensor_gb=0.0;FLAGS_fraction_of_gpu_memory_to_use=0.98;FLAGS_conv_workspace_size_limit=4096 flags:FLAGS_eager_delete_tensor_gb=0.0;FLAGS_fraction_of_gpu_memory_to_use=0.98;FLAGS_conv_workspace_size_limit=4096
===========================infer_benchmark_params========================== ===========================infer_benchmark_params==========================

View File

@ -53,6 +53,7 @@ null:null
batch_size:128 batch_size:128
fp_items:fp32|fp16 fp_items:fp32|fp16
epoch:1 epoch:1
model_type:norm_train
--profiler_options:batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile --profiler_options:batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile
flags:FLAGS_eager_delete_tensor_gb=0.0;FLAGS_fraction_of_gpu_memory_to_use=0.98;FLAGS_conv_workspace_size_limit=4096 flags:FLAGS_eager_delete_tensor_gb=0.0;FLAGS_fraction_of_gpu_memory_to_use=0.98;FLAGS_conv_workspace_size_limit=4096
===========================infer_benchmark_params========================== ===========================infer_benchmark_params==========================

View File

@ -53,6 +53,7 @@ null:null
batch_size:512 batch_size:512
fp_items:fp32|fp16 fp_items:fp32|fp16
epoch:1 epoch:1
model_type:norm_train
--profiler_options:batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile --profiler_options:batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile
flags:FLAGS_eager_delete_tensor_gb=0.0;FLAGS_fraction_of_gpu_memory_to_use=0.98;FLAGS_conv_workspace_size_limit=4096 flags:FLAGS_eager_delete_tensor_gb=0.0;FLAGS_fraction_of_gpu_memory_to_use=0.98;FLAGS_conv_workspace_size_limit=4096
===========================infer_benchmark_params========================== ===========================infer_benchmark_params==========================

View File

@ -53,6 +53,7 @@ null:null
batch_size:512 batch_size:512
fp_items:fp32|fp16 fp_items:fp32|fp16
epoch:1 epoch:1
model_type:norm_train
--profiler_options:batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile --profiler_options:batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile
flags:FLAGS_eager_delete_tensor_gb=0.0;FLAGS_fraction_of_gpu_memory_to_use=0.98;FLAGS_conv_workspace_size_limit=4096 flags:FLAGS_eager_delete_tensor_gb=0.0;FLAGS_fraction_of_gpu_memory_to_use=0.98;FLAGS_conv_workspace_size_limit=4096
===========================infer_benchmark_params========================== ===========================infer_benchmark_params==========================

View File

@ -53,6 +53,7 @@ null:null
batch_size:512 batch_size:512
fp_items:fp32|fp16 fp_items:fp32|fp16
epoch:1 epoch:1
model_type:norm_train
--profiler_options:batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile --profiler_options:batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile
flags:FLAGS_eager_delete_tensor_gb=0.0;FLAGS_fraction_of_gpu_memory_to_use=0.98;FLAGS_conv_workspace_size_limit=4096 flags:FLAGS_eager_delete_tensor_gb=0.0;FLAGS_fraction_of_gpu_memory_to_use=0.98;FLAGS_conv_workspace_size_limit=4096
===========================infer_benchmark_params========================== ===========================infer_benchmark_params==========================

View File

@ -53,6 +53,7 @@ null:null
batch_size:512 batch_size:512
fp_items:fp32|fp16 fp_items:fp32|fp16
epoch:1 epoch:1
model_type:norm_train
--profiler_options:batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile --profiler_options:batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile
flags:FLAGS_eager_delete_tensor_gb=0.0;FLAGS_fraction_of_gpu_memory_to_use=0.98;FLAGS_conv_workspace_size_limit=4096 flags:FLAGS_eager_delete_tensor_gb=0.0;FLAGS_fraction_of_gpu_memory_to_use=0.98;FLAGS_conv_workspace_size_limit=4096
===========================infer_benchmark_params========================== ===========================infer_benchmark_params==========================

View File

@ -53,6 +53,7 @@ null:null
batch_size:512 batch_size:512
fp_items:fp32|fp16 fp_items:fp32|fp16
epoch:1 epoch:1
model_type:norm_train
--profiler_options:batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile --profiler_options:batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile
flags:FLAGS_eager_delete_tensor_gb=0.0;FLAGS_fraction_of_gpu_memory_to_use=0.98;FLAGS_conv_workspace_size_limit=4096 flags:FLAGS_eager_delete_tensor_gb=0.0;FLAGS_fraction_of_gpu_memory_to_use=0.98;FLAGS_conv_workspace_size_limit=4096
===========================infer_benchmark_params========================== ===========================infer_benchmark_params==========================

View File

@ -53,6 +53,7 @@ null:null
batch_size:512 batch_size:512
fp_items:fp32|fp16 fp_items:fp32|fp16
epoch:1 epoch:1
model_type:norm_train
--profiler_options:batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile --profiler_options:batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile
flags:FLAGS_eager_delete_tensor_gb=0.0;FLAGS_fraction_of_gpu_memory_to_use=0.98;FLAGS_conv_workspace_size_limit=4096 flags:FLAGS_eager_delete_tensor_gb=0.0;FLAGS_fraction_of_gpu_memory_to_use=0.98;FLAGS_conv_workspace_size_limit=4096
===========================infer_benchmark_params========================== ===========================infer_benchmark_params==========================

View File

@ -53,6 +53,7 @@ null:null
batch_size:512 batch_size:512
fp_items:fp32|fp16 fp_items:fp32|fp16
epoch:1 epoch:1
model_type:norm_train
--profiler_options:batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile --profiler_options:batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile
flags:FLAGS_eager_delete_tensor_gb=0.0;FLAGS_fraction_of_gpu_memory_to_use=0.98;FLAGS_conv_workspace_size_limit=4096 flags:FLAGS_eager_delete_tensor_gb=0.0;FLAGS_fraction_of_gpu_memory_to_use=0.98;FLAGS_conv_workspace_size_limit=4096
===========================infer_benchmark_params========================== ===========================infer_benchmark_params==========================

View File

@ -53,6 +53,7 @@ null:null
batch_size:256 batch_size:256
fp_items:fp32|fp16 fp_items:fp32|fp16
epoch:1 epoch:1
model_type:norm_train
--profiler_options:batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile --profiler_options:batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile
flags:FLAGS_eager_delete_tensor_gb=0.0;FLAGS_fraction_of_gpu_memory_to_use=0.98;FLAGS_conv_workspace_size_limit=4096 flags:FLAGS_eager_delete_tensor_gb=0.0;FLAGS_fraction_of_gpu_memory_to_use=0.98;FLAGS_conv_workspace_size_limit=4096
===========================infer_benchmark_params========================== ===========================infer_benchmark_params==========================

View File

@ -53,6 +53,7 @@ null:null
batch_size:500 batch_size:500
fp_items:fp32|fp16 fp_items:fp32|fp16
epoch:1 epoch:1
model_type:norm_train
--profiler_options:batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile --profiler_options:batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile
flags:FLAGS_eager_delete_tensor_gb=0.0;FLAGS_fraction_of_gpu_memory_to_use=0.98;FLAGS_conv_workspace_size_limit=4096 flags:FLAGS_eager_delete_tensor_gb=0.0;FLAGS_fraction_of_gpu_memory_to_use=0.98;FLAGS_conv_workspace_size_limit=4096
===========================infer_benchmark_params========================== ===========================infer_benchmark_params==========================

View File

@ -54,6 +54,7 @@ null:null
batch_size:128 batch_size:128
fp_items:fp32 fp_items:fp32
epoch:1 epoch:1
model_type:norm_train
--profiler_options:batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile --profiler_options:batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile
flags:FLAGS_eager_delete_tensor_gb=0.0;FLAGS_fraction_of_gpu_memory_to_use=0.98;FLAGS_conv_workspace_size_limit=4096 flags:FLAGS_eager_delete_tensor_gb=0.0;FLAGS_fraction_of_gpu_memory_to_use=0.98;FLAGS_conv_workspace_size_limit=4096
===========================infer_benchmark_params========================== ===========================infer_benchmark_params==========================

View File

@ -50,5 +50,12 @@ inference:python/predict_cls.py -c configs/inference_cls.yaml
-o Global.benchmark:False -o Global.benchmark:False
null:null null:null
null:null null:null
===========================train_benchmark_params==========================
batch_size:64
fp_items:fp32|fp16
epoch:1
model_type:norm_train|to_static_train
--profiler_options:batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile
flags:FLAGS_eager_delete_tensor_gb=0.0;FLAGS_fraction_of_gpu_memory_to_use=0.98;FLAGS_conv_workspace_size_limit=4096
===========================infer_benchmark_params========================== ===========================infer_benchmark_params==========================
random_infer_input:[{float32,[3,224,224]}] random_infer_input:[{float32,[3,224,224]}]

View File

@ -54,6 +54,7 @@ null:null
batch_size:32|64 batch_size:32|64
fp_items:fp32|fp16 fp_items:fp32|fp16
epoch:1 epoch:1
model_type:norm_train
--profiler_options:batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile --profiler_options:batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile
flags:FLAGS_eager_delete_tensor_gb=0.0;FLAGS_fraction_of_gpu_memory_to_use=0.98;FLAGS_conv_workspace_size_limit=4096 flags:FLAGS_eager_delete_tensor_gb=0.0;FLAGS_fraction_of_gpu_memory_to_use=0.98;FLAGS_conv_workspace_size_limit=4096
===========================infer_benchmark_params========================== ===========================infer_benchmark_params==========================

View File

@ -54,3 +54,4 @@ null:null
batch_size:128|256 batch_size:128|256
fp_items:ampfp16 fp_items:ampfp16
epoch:1 epoch:1
model_type:norm_train

View File

@ -54,6 +54,7 @@ null:null
batch_size:128|64 batch_size:128|64
fp_items:fp32|fp16 fp_items:fp32|fp16
epoch:1 epoch:1
model_type:norm_train|to_static_train
--profiler_options:batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile --profiler_options:batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile
flags:FLAGS_eager_delete_tensor_gb=0.0;FLAGS_fraction_of_gpu_memory_to_use=0.98;FLAGS_conv_workspace_size_limit=4096 flags:FLAGS_eager_delete_tensor_gb=0.0;FLAGS_fraction_of_gpu_memory_to_use=0.98;FLAGS_conv_workspace_size_limit=4096
===========================infer_benchmark_params========================== ===========================infer_benchmark_params==========================

View File

@ -54,6 +54,7 @@ null:null
batch_size:128 batch_size:128
fp_items:fp32 fp_items:fp32
epoch:1 epoch:1
model_type:norm_train
--profiler_options:batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile --profiler_options:batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile
flags:FLAGS_eager_delete_tensor_gb=0.0;FLAGS_fraction_of_gpu_memory_to_use=0.98;FLAGS_conv_workspace_size_limit=4096 flags:FLAGS_eager_delete_tensor_gb=0.0;FLAGS_fraction_of_gpu_memory_to_use=0.98;FLAGS_conv_workspace_size_limit=4096
===========================infer_benchmark_params========================== ===========================infer_benchmark_params==========================

View File

@ -54,3 +54,4 @@ null:null
batch_size:128|256 batch_size:128|256
fp_items:purefp16 fp_items:purefp16
epoch:1 epoch:1
model_type:norm_train

View File

@ -54,6 +54,7 @@ null:null
batch_size:128 batch_size:128
fp_items:fp32 fp_items:fp32
epoch:1 epoch:1
model_type:norm_train
--profiler_options:batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile --profiler_options:batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile
flags:FLAGS_eager_delete_tensor_gb=0.0;FLAGS_fraction_of_gpu_memory_to_use=0.98;FLAGS_conv_workspace_size_limit=4096 flags:FLAGS_eager_delete_tensor_gb=0.0;FLAGS_fraction_of_gpu_memory_to_use=0.98;FLAGS_conv_workspace_size_limit=4096
===========================infer_benchmark_params========================== ===========================infer_benchmark_params==========================

View File

@ -54,6 +54,7 @@ null:null
batch_size:128 batch_size:128
fp_items:fp32 fp_items:fp32
epoch:1 epoch:1
model_type:norm_train
--profiler_options:batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile --profiler_options:batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile
flags:FLAGS_eager_delete_tensor_gb=0.0;FLAGS_fraction_of_gpu_memory_to_use=0.98;FLAGS_conv_workspace_size_limit=4096 flags:FLAGS_eager_delete_tensor_gb=0.0;FLAGS_fraction_of_gpu_memory_to_use=0.98;FLAGS_conv_workspace_size_limit=4096
===========================infer_benchmark_params========================== ===========================infer_benchmark_params==========================

View File

@ -54,6 +54,7 @@ null:null
batch_size:64 batch_size:64
fp_items:fp32|fp16 fp_items:fp32|fp16
epoch:1 epoch:1
model_type:norm_train
--profiler_options:batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile --profiler_options:batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile
flags:FLAGS_eager_delete_tensor_gb=0.0;FLAGS_fraction_of_gpu_memory_to_use=0.98;FLAGS_conv_workspace_size_limit=4096 flags:FLAGS_eager_delete_tensor_gb=0.0;FLAGS_fraction_of_gpu_memory_to_use=0.98;FLAGS_conv_workspace_size_limit=4096
===========================infer_benchmark_params========================== ===========================infer_benchmark_params==========================

View File

@ -54,6 +54,7 @@ null:null
batch_size:128 batch_size:128
fp_items:fp32 fp_items:fp32
epoch:1 epoch:1
model_type:norm_train
--profiler_options:batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile --profiler_options:batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile
flags:FLAGS_eager_delete_tensor_gb=0.0;FLAGS_fraction_of_gpu_memory_to_use=0.98;FLAGS_conv_workspace_size_limit=4096 flags:FLAGS_eager_delete_tensor_gb=0.0;FLAGS_fraction_of_gpu_memory_to_use=0.98;FLAGS_conv_workspace_size_limit=4096
===========================infer_benchmark_params========================== ===========================infer_benchmark_params==========================

View File

@ -54,6 +54,7 @@ null:null
batch_size:128 batch_size:128
fp_items:fp32 fp_items:fp32
epoch:1 epoch:1
model_type:norm_train
--profiler_options:batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile --profiler_options:batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile
flags:FLAGS_eager_delete_tensor_gb=0.0;FLAGS_fraction_of_gpu_memory_to_use=0.98;FLAGS_conv_workspace_size_limit=4096 flags:FLAGS_eager_delete_tensor_gb=0.0;FLAGS_fraction_of_gpu_memory_to_use=0.98;FLAGS_conv_workspace_size_limit=4096
===========================infer_benchmark_params========================== ===========================infer_benchmark_params==========================

View File

@ -54,6 +54,7 @@ null:null
batch_size:256|1536 batch_size:256|1536
fp_items:fp32|fp16 fp_items:fp32|fp16
epoch:2 epoch:2
model_type:norm_train
--profiler_options:batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile --profiler_options:batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile
flags:FLAGS_eager_delete_tensor_gb=0.0;FLAGS_fraction_of_gpu_memory_to_use=0.98;FLAGS_conv_workspace_size_limit=4096 flags:FLAGS_eager_delete_tensor_gb=0.0;FLAGS_fraction_of_gpu_memory_to_use=0.98;FLAGS_conv_workspace_size_limit=4096
===========================infer_benchmark_params========================== ===========================infer_benchmark_params==========================

View File

@ -54,6 +54,7 @@ null:null
batch_size:104|128 batch_size:104|128
fp_items:fp32|fp16 fp_items:fp32|fp16
epoch:1 epoch:1
model_type:norm_train|to_static_train
--profiler_options:batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile --profiler_options:batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile
flags:FLAGS_eager_delete_tensor_gb=0.0;FLAGS_fraction_of_gpu_memory_to_use=0.98;FLAGS_conv_workspace_size_limit=4096 flags:FLAGS_eager_delete_tensor_gb=0.0;FLAGS_fraction_of_gpu_memory_to_use=0.98;FLAGS_conv_workspace_size_limit=4096
===========================infer_benchmark_params========================== ===========================infer_benchmark_params==========================

View File

@ -54,6 +54,7 @@ null:null
batch_size:64|104 batch_size:64|104
fp_items:fp32 fp_items:fp32
epoch:1 epoch:1
model_type:norm_train
--profiler_options:batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile --profiler_options:batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile
flags:FLAGS_eager_delete_tensor_gb=0.0;FLAGS_fraction_of_gpu_memory_to_use=0.98;FLAGS_conv_workspace_size_limit=4096 flags:FLAGS_eager_delete_tensor_gb=0.0;FLAGS_fraction_of_gpu_memory_to_use=0.98;FLAGS_conv_workspace_size_limit=4096
===========================infer_benchmark_params========================== ===========================infer_benchmark_params==========================

View File

@ -54,6 +54,7 @@ null:null
batch_size:64|104 batch_size:64|104
fp_items:fp32 fp_items:fp32
epoch:1 epoch:1
model_type:norm_train
--profiler_options:batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile --profiler_options:batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile
flags:FLAGS_eager_delete_tensor_gb=0.0;FLAGS_fraction_of_gpu_memory_to_use=0.98;FLAGS_conv_workspace_size_limit=4096 flags:FLAGS_eager_delete_tensor_gb=0.0;FLAGS_fraction_of_gpu_memory_to_use=0.98;FLAGS_conv_workspace_size_limit=4096
===========================infer_benchmark_params========================== ===========================infer_benchmark_params==========================

View File

@ -54,6 +54,7 @@ null:null
batch_size:64|104 batch_size:64|104
fp_items:fp32 fp_items:fp32
epoch:1 epoch:1
model_type:norm_train
--profiler_options:batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile --profiler_options:batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile
flags:FLAGS_eager_delete_tensor_gb=0.0;FLAGS_fraction_of_gpu_memory_to_use=0.98;FLAGS_conv_workspace_size_limit=4096 flags:FLAGS_eager_delete_tensor_gb=0.0;FLAGS_fraction_of_gpu_memory_to_use=0.98;FLAGS_conv_workspace_size_limit=4096
===========================infer_benchmark_params========================== ===========================infer_benchmark_params==========================

View File

@ -54,6 +54,7 @@ null:null
batch_size:64|144 batch_size:64|144
fp_items:fp32 fp_items:fp32
epoch:1 epoch:1
model_type:norm_train
--profiler_options:batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile --profiler_options:batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile
flags:FLAGS_eager_delete_tensor_gb=0.0;FLAGS_fraction_of_gpu_memory_to_use=0.98;FLAGS_conv_workspace_size_limit=4096 flags:FLAGS_eager_delete_tensor_gb=0.0;FLAGS_fraction_of_gpu_memory_to_use=0.98;FLAGS_conv_workspace_size_limit=4096
===========================infer_benchmark_params========================== ===========================infer_benchmark_params==========================

View File

@ -54,6 +54,7 @@ null:null
batch_size:128 batch_size:128
fp_items:fp32|fp16 fp_items:fp32|fp16
epoch:1 epoch:1
model_type:norm_train
--profiler_options:batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile --profiler_options:batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile
flags:FLAGS_eager_delete_tensor_gb=0.0;FLAGS_fraction_of_gpu_memory_to_use=0.98;FLAGS_conv_workspace_size_limit=4096 flags:FLAGS_eager_delete_tensor_gb=0.0;FLAGS_fraction_of_gpu_memory_to_use=0.98;FLAGS_conv_workspace_size_limit=4096
===========================infer_benchmark_params========================== ===========================infer_benchmark_params==========================