From 90f2cc00c3481422ccae5770431daa36577fb696 Mon Sep 17 00:00:00 2001 From: littletomatodonkey Date: Tue, 6 Jul 2021 12:26:40 +0800 Subject: [PATCH 01/28] add google link --- doc/doc_ch/recognition.md | 4 +++- doc/doc_en/recognition_en.md | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/doc/doc_ch/recognition.md b/doc/doc_ch/recognition.md index 0f860065be..2efd80e6e1 100644 --- a/doc/doc_ch/recognition.md +++ b/doc/doc_ch/recognition.md @@ -375,7 +375,9 @@ PaddleOCR目前已支持80种(除中文外)语种识别,`configs/rec/multi 更多支持语种请参考: [多语言模型](https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.1/doc/doc_ch/multi_languages.md#%E8%AF%AD%E7%A7%8D%E7%BC%A9%E5%86%99) -多语言模型训练方式与中文模型一致,训练数据集均为100w的合成数据,少量的字体可以在 [百度网盘](https://pan.baidu.com/s/1bS_u207Rm7YbY33wOECKDA) 上下载,提取码:frgi。 +多语言模型训练方式与中文模型一致,训练数据集均为100w的合成数据,少量的字体可以通过下面两种方式下载。 +* [百度网盘](https://pan.baidu.com/s/1bS_u207Rm7YbY33wOECKDA)。提取码:frgi。 +* [google drive](https://drive.google.com/file/d/18cSWX7wXSy4G0tbKJ0d9PuIaiwRLHpjA/view) 如您希望在现有模型效果的基础上调优,请参考下列说明修改配置文件: diff --git a/doc/doc_en/recognition_en.md b/doc/doc_en/recognition_en.md index e23166e0ca..556b75a515 100644 --- a/doc/doc_en/recognition_en.md +++ b/doc/doc_en/recognition_en.md @@ -375,7 +375,9 @@ Currently, the multi-language algorithms supported by PaddleOCR are: For more supported languages, please refer to : [Multi-language model](https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.1/doc/doc_en/multi_languages_en.md#4-support-languages-and-abbreviations) -The multi-language model training method is the same as the Chinese model. The training data set is 100w synthetic data. A small amount of fonts and test data can be downloaded on [Baidu Netdisk](https://pan.baidu.com/s/1bS_u207Rm7YbY33wOECKDA),Extraction code:frgi. +The multi-language model training method is the same as the Chinese model. The training data set is 100w synthetic data. A small amount of fonts and test data can be downloaded using the following two methods. +* [Baidu Netdisk](https://pan.baidu.com/s/1bS_u207Rm7YbY33wOECKDA),Extraction code:frgi. +* [Google drive](https://drive.google.com/file/d/18cSWX7wXSy4G0tbKJ0d9PuIaiwRLHpjA/view) If you want to finetune on the basis of the existing model effect, please refer to the following instructions to modify the configuration file: From 51193f88206a035dec17d9503686495ab86ef789 Mon Sep 17 00:00:00 2001 From: littletomatodonkey Date: Tue, 6 Jul 2021 14:02:25 +0800 Subject: [PATCH 02/28] fix bug of inference --- deploy/hubserving/readme.md | 2 +- deploy/hubserving/readme_en.md | 2 +- tools/infer/predict_det.py | 8 ++++---- tools/infer/predict_system.py | 2 -- 4 files changed, 6 insertions(+), 8 deletions(-) diff --git a/deploy/hubserving/readme.md b/deploy/hubserving/readme.md index a39ac5a42b..9351fa8d4f 100755 --- a/deploy/hubserving/readme.md +++ b/deploy/hubserving/readme.md @@ -29,7 +29,7 @@ deploy/hubserving/ocr_system/ ### 1. 准备环境 ```shell # 安装paddlehub -pip3 install paddlehub==1.8.3 --upgrade -i https://pypi.tuna.tsinghua.edu.cn/simple +pip3 install paddlehub==2.1.0 --upgrade -i https://pypi.tuna.tsinghua.edu.cn/simple ``` ### 2. 下载推理模型 diff --git a/deploy/hubserving/readme_en.md b/deploy/hubserving/readme_en.md index 7d9a8629ef..98ffcad63c 100755 --- a/deploy/hubserving/readme_en.md +++ b/deploy/hubserving/readme_en.md @@ -30,7 +30,7 @@ The following steps take the 2-stage series service as an example. If only the d ### 1. Prepare the environment ```shell # Install paddlehub -pip3 install paddlehub==1.8.3 --upgrade -i https://pypi.tuna.tsinghua.edu.cn/simple +pip3 install paddlehub==2.1.0 --upgrade -i https://pypi.tuna.tsinghua.edu.cn/simple ``` ### 2. Download inference model diff --git a/tools/infer/predict_det.py b/tools/infer/predict_det.py index bbf3659cbc..6a45f81e48 100755 --- a/tools/infer/predict_det.py +++ b/tools/infer/predict_det.py @@ -175,7 +175,7 @@ class TextDetector(object): st = time.time() - if args.benchmark: + if self.args.benchmark: self.autolog.times.start() data = transform(data, self.preprocess_op) @@ -186,7 +186,7 @@ class TextDetector(object): shape_list = np.expand_dims(shape_list, axis=0) img = img.copy() - if args.benchmark: + if self.args.benchmark: self.autolog.times.stamp() self.input_tensor.copy_from_cpu(img) @@ -195,7 +195,7 @@ class TextDetector(object): for output_tensor in self.output_tensors: output = output_tensor.copy_to_cpu() outputs.append(output) - if args.benchmark: + if self.args.benchmark: self.autolog.times.stamp() preds = {} @@ -220,7 +220,7 @@ class TextDetector(object): else: dt_boxes = self.filter_tag_det_res(dt_boxes, ori_im.shape) - if args.benchmark: + if self.args.benchmark: self.autolog.times.end(stamp=True) et = time.time() return dt_boxes, et - st diff --git a/tools/infer/predict_system.py b/tools/infer/predict_system.py index 715bd3fa9d..eae0e27cd2 100755 --- a/tools/infer/predict_system.py +++ b/tools/infer/predict_system.py @@ -174,8 +174,6 @@ def main(args): logger.info("The predict total time is {}".format(time.time() - _st)) logger.info("\nThe predict total time is {}".format(total_time)) - img_num = text_sys.text_detector.det_times.img_num - if __name__ == "__main__": args = utility.parse_args() From b10f12a292f312d0d5199ecd61d3ba783aaadb2e Mon Sep 17 00:00:00 2001 From: LDOUBLEV Date: Tue, 6 Jul 2021 14:46:38 +0800 Subject: [PATCH 03/28] fix export --- deploy/slim/quantization/export_model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deploy/slim/quantization/export_model.py b/deploy/slim/quantization/export_model.py index e9c1a8d311..87d2f333c1 100755 --- a/deploy/slim/quantization/export_model.py +++ b/deploy/slim/quantization/export_model.py @@ -113,7 +113,7 @@ def main(): use_srn = config['Architecture']['algorithm'] == "SRN" model_type = config['Architecture']['model_type'] # start eval - metirc = program.eval(model, valid_dataloader, post_process_class, + metric = program.eval(model, valid_dataloader, post_process_class, eval_class, model_type, use_srn) logger.info('metric eval ***************') From e4a939cbbc42a349aa1ba8e9ae27d537bfec1c5b Mon Sep 17 00:00:00 2001 From: LDOUBLEV Date: Tue, 6 Jul 2021 14:46:58 +0800 Subject: [PATCH 04/28] update test_ci to v6 --- test/ocr_det_params.txt | 16 ++--- test/prepare.sh | 14 ++-- test/test.sh | 138 ++++++++++++++++++++++------------------ 3 files changed, 92 insertions(+), 76 deletions(-) diff --git a/test/ocr_det_params.txt b/test/ocr_det_params.txt index 01ac82d3d7..da7e034bdd 100644 --- a/test/ocr_det_params.txt +++ b/test/ocr_det_params.txt @@ -1,13 +1,12 @@ model_name:ocr_det python:python3.7 gpu_list:0|0,1 -Global.auto_cast:False +Global.auto_cast:null Global.epoch_num:10 Global.save_model_dir:./output/ -Global.save_inference_dir:./output/ Train.loader.batch_size_per_card: -Global.use_gpu -Global.pretrained_model +Global.use_gpu: +Global.pretrained_model:null trainer:norm|pact norm_train:tools/train.py -c configs/det/det_mv3_db.yml -o Global.pretrained_model=./pretrain_models/MobileNetV3_large_x0_5_pretrained @@ -17,6 +16,8 @@ distill_train:null eval:tools/eval.py -c configs/det/det_mv3_db.yml -o +Global.save_inference_dir:./output/ +Global.checkpoints: norm_export:tools/export_model.py -c configs/det/det_mv3_db.yml -o quant_export:deploy/slim/quantization/export_model.py -c configs/det/det_mv3_db.yml -o fpgm_export:deploy/slim/prune/export_prune_model.py @@ -29,7 +30,6 @@ inference:tools/infer/predict_det.py --rec_batch_num:1 --use_tensorrt:True|False --precision:fp32|fp16|int8 ---det_model_dir ---image_dir ---save_log_path - +--det_model_dir:./inference/ch_ppocr_mobile_v2.0_det_infer/ +--image_dir:./inference/ch_det_data_50/all-sum-510/ +--save_log_path:./test/output/ diff --git a/test/prepare.sh b/test/prepare.sh index 1506824696..14b62383c2 100644 --- a/test/prepare.sh +++ b/test/prepare.sh @@ -26,8 +26,10 @@ IFS=$'\n' # The training params model_name=$(func_parser_value "${lines[0]}") train_model_list=$(func_parser_value "${lines[0]}") + trainer_list=$(func_parser_value "${lines[10]}") + # MODE be one of ['lite_train_infer' 'whole_infer' 'whole_train_infer'] MODE=$2 # prepare pretrained weights and dataset @@ -62,8 +64,8 @@ else rm -rf ./train_data/icdar2015 wget -nc -P ./train_data https://paddleocr.bj.bcebos.com/dygraph_v2.0/test/ch_det_data_50.tar if [ ${model_name} = "ocr_det" ]; then - eval_model_name="ch_ppocr_mobile_v2.0_det_train" - wget -nc -P ./inference https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_train.tar + eval_model_name="ch_ppocr_mobile_v2.0_det_infer" + wget -nc -P ./inference https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_infer.tar cd ./inference && tar xf ${eval_model_name}.tar && cd ../ else eval_model_name="ch_ppocr_mobile_v2.0_rec_train" @@ -94,7 +96,7 @@ for train_model in ${train_model_list[*]}; do # eval for slim_trainer in ${trainer_list[*]}; do if [ ${slim_trainer} = "norm" ]; then - if [ ${model_name} = "ocr_det" ]; then + if [ ${model_name} = "det" ]; then eval_model_name="ch_ppocr_mobile_v2.0_det_train" wget -nc -P ./inference https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_train.tar cd ./inference && tar xf ${eval_model_name}.tar && cd ../ @@ -104,7 +106,7 @@ for train_model in ${train_model_list[*]}; do cd ./inference && tar xf ${eval_model_name}.tar && cd ../ fi elif [ ${slim_trainer} = "pact" ]; then - if [ ${model_name} = "ocr_det" ]; then + if [ ${model_name} = "det" ]; then eval_model_name="ch_ppocr_mobile_v2.0_det_quant_train" wget -nc -P ./inference https://paddleocr.bj.bcebos.com/dygraph_v2.0/slim/ch_ppocr_mobile_v2.0_det_quant_train.tar cd ./inference && tar xf ${eval_model_name}.tar && cd ../ @@ -114,7 +116,7 @@ for train_model in ${train_model_list[*]}; do cd ./inference && tar xf ${eval_model_name}.tar && cd ../ fi elif [ ${slim_trainer} = "distill" ]; then - if [ ${model_name} = "ocr_det" ]; then + if [ ${model_name} = "det" ]; then eval_model_name="ch_ppocr_mobile_v2.0_det_distill_train" wget -nc -P ./inference https://paddleocr.bj.bcebos.com/dygraph_v2.0/slim/ch_ppocr_mobile_v2.0_det_distill_train.tar cd ./inference && tar xf ${eval_model_name}.tar && cd ../ @@ -124,7 +126,7 @@ for train_model in ${train_model_list[*]}; do cd ./inference && tar xf ${eval_model_name}.tar && cd ../ fi elif [ ${slim_trainer} = "fpgm" ]; then - if [ ${model_name} = "ocr_det" ]; then + if [ ${model_name} = "det" ]; then eval_model_name="ch_ppocr_mobile_v2.0_det_prune_train" wget -nc -P ./inference https://paddleocr.bj.bcebos.com/dygraph_v2.0/slim/ch_ppocr_mobile_v2.0_det_prune_train.tar cd ./inference && tar xf ${eval_model_name}.tar && cd ../ diff --git a/test/test.sh b/test/test.sh index 2a27563ffa..a75aed4269 100644 --- a/test/test.sh +++ b/test/test.sh @@ -41,59 +41,51 @@ gpu_list=$(func_parser_value "${lines[2]}") autocast_list=$(func_parser_value "${lines[3]}") autocast_key=$(func_parser_key "${lines[3]}") epoch_key=$(func_parser_key "${lines[4]}") +epoch_num=$(func_parser_value "${lines[4]}") save_model_key=$(func_parser_key "${lines[5]}") -save_infer_key=$(func_parser_key "${lines[6]}") -train_batch_key=$(func_parser_key "${lines[7]}") -train_use_gpu_key=$(func_parser_key "${lines[8]}") -pretrain_model_key=$(func_parser_key "${lines[9]}") +train_batch_key=$(func_parser_key "${lines[6]}") +train_use_gpu_key=$(func_parser_key "${lines[7]}") +pretrain_model_key=$(func_parser_key "${lines[8]}") +pretrain_model_value=$(func_parser_value "${lines[8]}") -trainer_list=$(func_parser_value "${lines[10]}") -norm_trainer=$(func_parser_value "${lines[11]}") -pact_trainer=$(func_parser_value "${lines[12]}") -fpgm_trainer=$(func_parser_value "${lines[13]}") -distill_trainer=$(func_parser_value "${lines[14]}") +trainer_list=$(func_parser_value "${lines[9]}") +norm_trainer=$(func_parser_value "${lines[10]}") +pact_trainer=$(func_parser_value "${lines[11]}") +fpgm_trainer=$(func_parser_value "${lines[12]}") +distill_trainer=$(func_parser_value "${lines[13]}") -eval_py=$(func_parser_value "${lines[15]}") -norm_export=$(func_parser_value "${lines[16]}") -pact_export=$(func_parser_value "${lines[17]}") -fpgm_export=$(func_parser_value "${lines[18]}") -distill_export=$(func_parser_value "${lines[19]}") +eval_py=$(func_parser_value "${lines[14]}") -inference_py=$(func_parser_value "${lines[20]}") -use_gpu_key=$(func_parser_key "${lines[21]}") -use_gpu_list=$(func_parser_value "${lines[21]}") -use_mkldnn_key=$(func_parser_key "${lines[22]}") -use_mkldnn_list=$(func_parser_value "${lines[22]}") -cpu_threads_key=$(func_parser_key "${lines[23]}") -cpu_threads_list=$(func_parser_value "${lines[23]}") -batch_size_key=$(func_parser_key "${lines[24]}") -batch_size_list=$(func_parser_value "${lines[24]}") -use_trt_key=$(func_parser_key "${lines[25]}") -use_trt_list=$(func_parser_value "${lines[25]}") -precision_key=$(func_parser_key "${lines[26]}") -precision_list=$(func_parser_value "${lines[26]}") -model_dir_key=$(func_parser_key "${lines[27]}") -image_dir_key=$(func_parser_key "${lines[28]}") -save_log_key=$(func_parser_key "${lines[29]}") +save_infer_key=$(func_parser_key "${lines[15]}") +export_weight=$(func_parser_key "${lines[16]}") +norm_export=$(func_parser_value "${lines[17]}") +pact_export=$(func_parser_value "${lines[18]}") +fpgm_export=$(func_parser_value "${lines[19]}") +distill_export=$(func_parser_value "${lines[20]}") + +inference_py=$(func_parser_value "${lines[21]}") +use_gpu_key=$(func_parser_key "${lines[22]}") +use_gpu_list=$(func_parser_value "${lines[22]}") +use_mkldnn_key=$(func_parser_key "${lines[23]}") +use_mkldnn_list=$(func_parser_value "${lines[23]}") +cpu_threads_key=$(func_parser_key "${lines[24]}") +cpu_threads_list=$(func_parser_value "${lines[24]}") +batch_size_key=$(func_parser_key "${lines[25]}") +batch_size_list=$(func_parser_value "${lines[25]}") +use_trt_key=$(func_parser_key "${lines[26]}") +use_trt_list=$(func_parser_value "${lines[26]}") +precision_key=$(func_parser_key "${lines[27]}") +precision_list=$(func_parser_value "${lines[27]}") +infer_model_key=$(func_parser_key "${lines[28]}") +infer_model=$(func_parser_value "${lines[28]}") +image_dir_key=$(func_parser_key "${lines[29]}") +infer_img_dir=$(func_parser_value "${lines[29]}") +save_log_key=$(func_parser_key "${lines[30]}") LOG_PATH="./test/output" mkdir -p ${LOG_PATH} status_log="${LOG_PATH}/results.log" -if [ ${MODE} = "lite_train_infer" ]; then - export infer_img_dir="./train_data/icdar2015/text_localization/ch4_test_images/" - export epoch_num=10 -elif [ ${MODE} = "whole_infer" ]; then - export infer_img_dir="./train_data/icdar2015/text_localization/ch4_test_images/" - export epoch_num=10 -elif [ ${MODE} = "whole_train_infer" ]; then - export infer_img_dir="./train_data/icdar2015/text_localization/ch4_test_images/" - export epoch_num=300 -else - export infer_img_dir="./inference/ch_det_data_50/all-sum-510" - export infer_model_dir="./inference/ch_ppocr_mobile_v2.0_det_train/best_accuracy" -fi - function func_inference(){ IFS='|' @@ -110,7 +102,7 @@ function func_inference(){ for threads in ${cpu_threads_list[*]}; do for batch_size in ${batch_size_list[*]}; do _save_log_path="${_log_path}/infer_cpu_usemkldnn_${use_mkldnn}_threads_${threads}_batchsize_${batch_size}" - command="${_python} ${_script} ${use_gpu_key}=${use_gpu} ${use_mkldnn_key}=${use_mkldnn} ${cpu_threads_key}=${threads} ${model_dir_key}=${_model_dir} ${batch_size_key}=${batch_size} ${image_dir_key}=${_img_dir} ${save_log_key}=${_save_log_path} --benchmark=True" + command="${_python} ${_script} ${use_gpu_key}=${use_gpu} ${use_mkldnn_key}=${use_mkldnn} ${cpu_threads_key}=${threads} ${infer_model_key}=${_model_dir} ${batch_size_key}=${batch_size} ${image_dir_key}=${_img_dir} ${save_log_key}=${_save_log_path} --benchmark=True" eval $command status_check $? "${command}" "${status_log}" done @@ -124,7 +116,7 @@ function func_inference(){ fi for batch_size in ${batch_size_list[*]}; do _save_log_path="${_log_path}/infer_gpu_usetrt_${use_trt}_precision_${precision}_batchsize_${batch_size}" - command="${_python} ${_script} ${use_gpu_key}=${use_gpu} ${use_trt_key}=${use_trt} ${precision_key}=${precision} ${model_dir_key}=${_model_dir} ${batch_size_key}=${batch_size} ${image_dir_key}=${_img_dir} ${save_log_key}=${_save_log_path} --benchmark=True" + command="${_python} ${_script} ${use_gpu_key}=${use_gpu} ${use_trt_key}=${use_trt} ${precision_key}=${precision} ${infer_model_key}=${_model_dir} ${batch_size_key}=${batch_size} ${image_dir_key}=${_img_dir} ${save_log_key}=${_save_log_path} --benchmark=True" eval $command status_check $? "${command}" "${status_log}" done @@ -138,9 +130,9 @@ if [ ${MODE} != "infer" ]; then IFS="|" for gpu in ${gpu_list[*]}; do - train_use_gpu=True + use_gpu=True if [ ${gpu} = "-1" ];then - train_use_gpu=False + use_gpu=False env="" elif [ ${#gpu} -le 1 ];then env="export CUDA_VISIBLE_DEVICES=${gpu}" @@ -155,6 +147,7 @@ for gpu in ${gpu_list[*]}; do ips=${array[0]} gpu=${array[1]} IFS="|" + env=" " fi for autocast in ${autocast_list[*]}; do for trainer in ${trainer_list[*]}; do @@ -179,13 +172,32 @@ for gpu in ${gpu_list[*]}; do continue fi - save_log="${LOG_PATH}/${trainer}_gpus_${gpu}_autocast_${autocast}" - if [ ${#gpu} -le 2 ];then # epoch_num #TODO - cmd="${python} ${run_train} ${train_use_gpu_key}=${train_use_gpu} ${autocast_key}=${autocast} ${epoch_key}=${epoch_num} ${save_model_key}=${save_log} " - elif [ ${#gpu} -le 15 ];then - cmd="${python} -m paddle.distributed.launch --gpus=${gpu} ${run_train} ${autocast_key}=${autocast} ${epoch_key}=${epoch_num} ${save_model_key}=${save_log}" + # not set autocast when autocast is null + if [ ${autocast} = "null" ]; then + set_autocast=" " else - cmd="${python} -m paddle.distributed.launch --ips=${ips} --gpus=${gpu} ${run_train} ${autocast_key}=${autocast} ${epoch_key}=${epoch_num} ${save_model_key}=${save_log}" + set_autocast="${autocast_key}=${autocast}" + fi + # not set epoch when whole_train_infer + if [ ${MODE} != "whole_train_infer" ]; then + set_epoch="${epoch_key}=${epoch_num}" + else + set_epoch=" " + fi + # set pretrain + if [ ${pretrain_model_value} != "null" ]; then + set_pretrain="${pretrain_model_key}=${pretrain_model_value}" + else + set_pretrain=" " + fi + + save_log="${LOG_PATH}/${trainer}_gpus_${gpu}_autocast_${autocast}" + if [ ${#gpu} -le 2 ];then # train with cpu or single gpu + cmd="${python} ${run_train} ${train_use_gpu_key}=${use_gpu} ${save_model_key}=${save_log} ${set_epoch} ${set_pretrain} ${set_autocast}" + elif [ ${#gpu} -le 15 ];then # train with multi-gpu + cmd="${python} -m paddle.distributed.launch --gpus=${gpu} ${run_train} ${save_model_key}=${save_log} ${set_epoch} ${set_pretrain} ${set_autocast}" + else # train with multi-machine + cmd="${python} -m paddle.distributed.launch --ips=${ips} --gpus=${gpu} ${run_train} ${save_model_key}=${save_log} ${set_pretrain} ${set_epoch} ${set_autocast}" fi # run train eval $cmd @@ -198,11 +210,12 @@ for gpu in ${gpu_list[*]}; do # run export model save_infer_path="${save_log}" - export_cmd="${python} ${run_export} ${save_model_key}=${save_log} ${pretrain_model_key}=${save_log}/latest ${save_infer_key}=${save_infer_path}" + export_cmd="${python} ${run_export} ${save_model_key}=${save_log} ${export_weight}=${save_log}/latest ${save_infer_key}=${save_infer_path}" eval $export_cmd status_check $? "${export_cmd}" "${status_log}" #run inference + echo $env save_infer_path="${save_log}" func_inference "${python}" "${inference_py}" "${save_infer_path}" "${LOG_PATH}" "${infer_img_dir}" done @@ -210,12 +223,13 @@ for gpu in ${gpu_list[*]}; do done else - save_infer_path="${LOG_PATH}/${MODE}" - run_export=${norm_export} - export_cmd="${python} ${run_export} ${save_model_key}=${save_infer_path} ${pretrain_model_key}=${infer_model_dir} ${save_infer_key}=${save_infer_path}" - eval $export_cmd - status_check $? "${export_cmd}" "${status_log}" - + GPUID=$3 + if [ ${#GPUID} -le 0 ];then + env=" " + else + env="export CUDA_VISIBLE_DEVICES=${GPUID}" + fi + echo $env #run inference - func_inference "${python}" "${inference_py}" "${save_infer_path}" "${LOG_PATH}" "${infer_img_dir}" + func_inference "${python}" "${inference_py}" "${infer_model}" "${LOG_PATH}" "${infer_img_dir}" fi From 10e3729fd51c98b7c0ccf3e8ce5e9383430b0ff2 Mon Sep 17 00:00:00 2001 From: LDOUBLEV Date: Tue, 6 Jul 2021 14:49:38 +0800 Subject: [PATCH 05/28] minsubgraph_size 3 to 10 --- tools/infer/utility.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/infer/utility.py b/tools/infer/utility.py index cf14e4abd7..e464722f09 100755 --- a/tools/infer/utility.py +++ b/tools/infer/utility.py @@ -37,7 +37,7 @@ def init_args(): parser.add_argument("--use_gpu", type=str2bool, default=True) parser.add_argument("--ir_optim", type=str2bool, default=True) parser.add_argument("--use_tensorrt", type=str2bool, default=False) - parser.add_argument("--min_subgraph_size", type=int, default=3) + parser.add_argument("--min_subgraph_size", type=int, default=10) parser.add_argument("--precision", type=str, default="fp32") parser.add_argument("--gpu_mem", type=int, default=500) From 917118ae8ee8e88a0fbca2cca32700c986704311 Mon Sep 17 00:00:00 2001 From: LDOUBLEV Date: Tue, 6 Jul 2021 15:12:15 +0800 Subject: [PATCH 06/28] set env --- test/test.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/test/test.sh b/test/test.sh index a75aed4269..601708cc98 100644 --- a/test/test.sh +++ b/test/test.sh @@ -136,6 +136,7 @@ for gpu in ${gpu_list[*]}; do env="" elif [ ${#gpu} -le 1 ];then env="export CUDA_VISIBLE_DEVICES=${gpu}" + eval ${env} elif [ ${#gpu} -le 15 ];then IFS="," array=(${gpu}) @@ -215,7 +216,7 @@ for gpu in ${gpu_list[*]}; do status_check $? "${export_cmd}" "${status_log}" #run inference - echo $env + eval $env save_infer_path="${save_log}" func_inference "${python}" "${inference_py}" "${save_infer_path}" "${LOG_PATH}" "${infer_img_dir}" done From 202a0b5b859ebb72eea950b1ba0093de1ec7a2f2 Mon Sep 17 00:00:00 2001 From: LDOUBLEV Date: Tue, 6 Jul 2021 07:54:39 +0000 Subject: [PATCH 07/28] add det distill combined --- ppocr/losses/combined_loss.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/ppocr/losses/combined_loss.py b/ppocr/losses/combined_loss.py index 54da70174c..639265ede9 100644 --- a/ppocr/losses/combined_loss.py +++ b/ppocr/losses/combined_loss.py @@ -44,15 +44,17 @@ class CombinedLoss(nn.Layer): def forward(self, input, batch, **kargs): loss_dict = {} + loss_all = 0. for idx, loss_func in enumerate(self.loss_func): loss = loss_func(input, batch, **kargs) if isinstance(loss, paddle.Tensor): loss = {"loss_{}_{}".format(str(loss), idx): loss} weight = self.loss_weight[idx] - loss = { - "{}_{}".format(key, idx): loss[key] * weight - for key in loss - } + for key in loss: + if key == "loss": + loss_all += loss[key] * weight + # else: + # loss[f"{key}_{idx}"] = loss[key] loss_dict.update(loss) - loss_dict["loss"] = paddle.add_n(list(loss_dict.values())) + loss_dict["loss"] = loss_all return loss_dict From 80561b154e1c32dc8d80dd3f28be5dcd6b462639 Mon Sep 17 00:00:00 2001 From: LDOUBLEV Date: Tue, 6 Jul 2021 07:55:05 +0000 Subject: [PATCH 08/28] x as dict --- ppocr/modeling/architectures/base_model.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/ppocr/modeling/architectures/base_model.py b/ppocr/modeling/architectures/base_model.py index 03fbcee846..ff3da01a95 100644 --- a/ppocr/modeling/architectures/base_model.py +++ b/ppocr/modeling/architectures/base_model.py @@ -79,7 +79,10 @@ class BaseModel(nn.Layer): x = self.neck(x) y["neck_out"] = x x = self.head(x, targets=data) - y["head_out"] = x + if type(x) is dict: + y.update(x) + else: + y["head_out"] = x if self.return_all_feats: return y else: From a411c825e2bfab4dea986effb57e2f7833334f43 Mon Sep 17 00:00:00 2001 From: LDOUBLEV Date: Tue, 6 Jul 2021 07:56:58 +0000 Subject: [PATCH 09/28] add sum loss --- ppocr/losses/distillation_loss.py | 161 ++++++++++++++++++++++++++++-- 1 file changed, 153 insertions(+), 8 deletions(-) diff --git a/ppocr/losses/distillation_loss.py b/ppocr/losses/distillation_loss.py index 1e8aa0d860..1c5d8a2b5a 100644 --- a/ppocr/losses/distillation_loss.py +++ b/ppocr/losses/distillation_loss.py @@ -18,19 +18,60 @@ import paddle.nn as nn from .rec_ctc_loss import CTCLoss from .basic_loss import DMLLoss from .basic_loss import DistanceLoss +from .det_db_loss import DBLoss +from .det_basic_loss import BalanceLoss, MaskL1Loss, DiceLoss -class DistillationDMLLoss(DMLLoss): +def _sum_loss(loss_dict): + if "loss" in loss_dict.keys(): + return loss_dict + else: + loss_dict["loss"] = 0. + for k, value in loss_dict.items(): + if k == "loss": + continue + else: + loss_dict["loss"] += value + return loss_dict + +# class DistillationDMLLoss(DMLLoss): """ """ - def __init__(self, model_name_pairs=[], act=None, key=None, + def __init__(self, + model_name_pairs=[], + act=None, + key=None, + maps_name=None, name="loss_dml"): super().__init__(act=act) assert isinstance(model_name_pairs, list) self.key = key self.model_name_pairs = model_name_pairs self.name = name + self.maps_name = self.maps_name + + def _check_maps_name(self, maps_name): + if maps_name is None: + return None + elif type(maps_name) == str: + return [maps_name] + elif type(maps_name) == list: + return [maps_name] + else: + return None + + def _slice_out(self, outs): + new_outs = {} + for k in self.maps_name: + if k == "thrink_maps": + new_outs[k] = paddle.slice(outs, axes=1, starts=0, ends=1) + elif k == "threshold_maps": + new_outs[k] = paddle.slice(outs, axes=1, starts=1, ends=2) + elif k == "binary_maps": + new_outs[k] = paddle.slice(outs, axes=1, starts=2, ends=3) + else: + continue def forward(self, predicts, batch): loss_dict = dict() @@ -40,13 +81,30 @@ class DistillationDMLLoss(DMLLoss): if self.key is not None: out1 = out1[self.key] out2 = out2[self.key] - loss = super().forward(out1, out2) - if isinstance(loss, dict): - for key in loss: - loss_dict["{}_{}_{}_{}".format(key, pair[0], pair[1], - idx)] = loss[key] + + if self.maps_name is None: + loss = super().forward(out1, out2) + if isinstance(loss, dict): + for key in loss: + loss_dict["{}_{}_{}_{}".format(key, pair[0], pair[1], + idx)] = loss[key] + else: + loss_dict["{}_{}".format(self.name, idx)] = loss else: - loss_dict["{}_{}".format(self.name, idx)] = loss + outs1 = self._slice_out(out1) + outs2 = self._slice_out(out2) + for k in outs1.keys(): + loss = super().forward(outs1[k], outs2[k]) + if isinstance(loss, dict): + for key in loss: + loss_dict["{}_{}_{}_{}_{}".format(key, pair[ + 0], pair[1], map_name, idx)] = loss[key] + else: + loss_dict["{}_{}_{}".format(self.name, map_name, + idx)] = loss + + loss_dict = _sum_loss(loss_dict) + return loss_dict @@ -73,6 +131,93 @@ class DistillationCTCLoss(CTCLoss): return loss_dict +""" +class DistillationDBLoss(DBLoss): + def __init__(self, + model_name_list=[], + balance_loss=True, + main_loss_type='DiceLoss', + alpha=5, + beta=10, + ohem_ratio=3, + eps=1e-6, + name="db_loss", + **kwargs): + super().__init__() + self.model_name_list = model_name_list + self.name = name + + def forward(self, predicts, batch): + loss_dict = dict() + for idx, model_name in enumerate(self.model_name_list): + out = predicts[model_name] + if self.key is not None: + out = out[self.key] + + loss = super().forward(out, batch) + + if isinstance(loss, dict): + for key in loss.keys(): + if key == "loss": + continue + loss_dict[f"{self.name}_{model_name}_{key}"] = loss[key] + else: + loss_dict[f"{self.name}_{model_name}"] = loss + + loss_dict = _sum_loss(loss_dict) + return loss_dict + + +class DistillationDilaDBLoss(DBLoss): + def __init__(self, model_name_pairs=[], + balance_loss=True, + main_loss_type='DiceLoss', + alpha=5, + beta=10, + ohem_ratio=3, + eps=1e-6, + name="dila_dbloss"): + super().__init__() + self.model_name_pairs = model_name_pairs + self.name = name + + def forward(self, predicts, batch): + loss_dict = dict() + for idx, pair in enumerate(self.model_name_pairs): + stu_outs = predicts[pair[0]] + tch_outs = predicts[pair[1]] + if self.key is not None: + stu_preds = stu_outs[self.key] + tch_preds = tch_outs[self.key] + + stu_shrink_maps = stu_preds[:, 0, :, :] + stu_binary_maps = stu_preds[:, 2, :, :] + + # dilation to teacher prediction + dilation_w = np.array([[1,1], [1,1]]) + th_shrink_maps = tch_preds[:, 0, :, :] + th_shrink_maps = th_shrink_maps.numpy() > 0.3 # thresh = 0.3 + dilate_maps = np.zeros_like(th_shrink_maps).astype(np.float32) + for i in range(th_shrink_maps.shape[0]): + dilate_maps[i] = cv2.dilate(th_shrink_maps[i, :, :].astype(np.uint8), dilation_w) + th_shrink_maps = paddle.to_tensor(dilate_maps) + + label_threshold_map, label_threshold_mask, label_shrink_map, label_shrink_mask = batch[1:] + + # calculate the shrink map loss + bce_loss = self.alpha * self.bce_loss(stu_shrink_maps, th_shrink_maps, + label_shrink_mask) + loss_binary_maps = self.dice_loss(stu_binary_maps, th_shrink_maps, + label_shrink_mask) + + k = f"{self.name}_{pair[0]}_{pair[1]}" + loss_dict[k] = bce_loss + loss_binary_maps + + loss_dict = _sum_loss(loss_dict) + return loss +""" + + class DistillationDistanceLoss(DistanceLoss): """ """ From 40bf3b10537148452a6223310a9ab55be47084c4 Mon Sep 17 00:00:00 2001 From: LDOUBLEV Date: Tue, 6 Jul 2021 08:02:01 +0000 Subject: [PATCH 10/28] add DistillationDilaDBLoss loss --- ppocr/losses/distillation_loss.py | 90 +------------------------------ 1 file changed, 2 insertions(+), 88 deletions(-) diff --git a/ppocr/losses/distillation_loss.py b/ppocr/losses/distillation_loss.py index 1c5d8a2b5a..b19f3f892d 100644 --- a/ppocr/losses/distillation_loss.py +++ b/ppocr/losses/distillation_loss.py @@ -34,7 +34,8 @@ def _sum_loss(loss_dict): loss_dict["loss"] += value return loss_dict -# class DistillationDMLLoss(DMLLoss): + +class DistillationDMLLoss(DMLLoss): """ """ @@ -131,93 +132,6 @@ class DistillationCTCLoss(CTCLoss): return loss_dict -""" -class DistillationDBLoss(DBLoss): - def __init__(self, - model_name_list=[], - balance_loss=True, - main_loss_type='DiceLoss', - alpha=5, - beta=10, - ohem_ratio=3, - eps=1e-6, - name="db_loss", - **kwargs): - super().__init__() - self.model_name_list = model_name_list - self.name = name - - def forward(self, predicts, batch): - loss_dict = dict() - for idx, model_name in enumerate(self.model_name_list): - out = predicts[model_name] - if self.key is not None: - out = out[self.key] - - loss = super().forward(out, batch) - - if isinstance(loss, dict): - for key in loss.keys(): - if key == "loss": - continue - loss_dict[f"{self.name}_{model_name}_{key}"] = loss[key] - else: - loss_dict[f"{self.name}_{model_name}"] = loss - - loss_dict = _sum_loss(loss_dict) - return loss_dict - - -class DistillationDilaDBLoss(DBLoss): - def __init__(self, model_name_pairs=[], - balance_loss=True, - main_loss_type='DiceLoss', - alpha=5, - beta=10, - ohem_ratio=3, - eps=1e-6, - name="dila_dbloss"): - super().__init__() - self.model_name_pairs = model_name_pairs - self.name = name - - def forward(self, predicts, batch): - loss_dict = dict() - for idx, pair in enumerate(self.model_name_pairs): - stu_outs = predicts[pair[0]] - tch_outs = predicts[pair[1]] - if self.key is not None: - stu_preds = stu_outs[self.key] - tch_preds = tch_outs[self.key] - - stu_shrink_maps = stu_preds[:, 0, :, :] - stu_binary_maps = stu_preds[:, 2, :, :] - - # dilation to teacher prediction - dilation_w = np.array([[1,1], [1,1]]) - th_shrink_maps = tch_preds[:, 0, :, :] - th_shrink_maps = th_shrink_maps.numpy() > 0.3 # thresh = 0.3 - dilate_maps = np.zeros_like(th_shrink_maps).astype(np.float32) - for i in range(th_shrink_maps.shape[0]): - dilate_maps[i] = cv2.dilate(th_shrink_maps[i, :, :].astype(np.uint8), dilation_w) - th_shrink_maps = paddle.to_tensor(dilate_maps) - - label_threshold_map, label_threshold_mask, label_shrink_map, label_shrink_mask = batch[1:] - - # calculate the shrink map loss - bce_loss = self.alpha * self.bce_loss(stu_shrink_maps, th_shrink_maps, - label_shrink_mask) - loss_binary_maps = self.dice_loss(stu_binary_maps, th_shrink_maps, - label_shrink_mask) - - k = f"{self.name}_{pair[0]}_{pair[1]}" - loss_dict[k] = bce_loss + loss_binary_maps - - loss_dict = _sum_loss(loss_dict) - return loss -""" - - class DistillationDistanceLoss(DistanceLoss): """ """ From 48898ac357f4b242b98276c5bf48984eb60b5833 Mon Sep 17 00:00:00 2001 From: LDOUBLEV Date: Tue, 6 Jul 2021 08:13:13 +0000 Subject: [PATCH 11/28] add config --- .../ch_det_lite_train_distill_v2.1.yml | 194 ++++++++++++++++++ ppocr/losses/distillation_loss.py | 90 ++++++++ ppocr/modeling/architectures/base_model.py | 2 +- 3 files changed, 285 insertions(+), 1 deletion(-) create mode 100644 configs/det/ch_ppocr_v2.1/ch_det_lite_train_distill_v2.1.yml diff --git a/configs/det/ch_ppocr_v2.1/ch_det_lite_train_distill_v2.1.yml b/configs/det/ch_ppocr_v2.1/ch_det_lite_train_distill_v2.1.yml new file mode 100644 index 0000000000..54ef12a1d7 --- /dev/null +++ b/configs/det/ch_ppocr_v2.1/ch_det_lite_train_distill_v2.1.yml @@ -0,0 +1,194 @@ +Global: + use_gpu: true + epoch_num: 1200 + log_smooth_window: 20 + print_batch_step: 2 + save_model_dir: ./output/ch_db_mv3/ + save_epoch_step: 1200 + # evaluation is run every 5000 iterations after the 4000th iteration + eval_batch_step: [3000, 2000] + cal_metric_during_train: False + pretrained_model: ./pretrain_models/MobileNetV3_large_x0_5_pretrained + checkpoints: + save_inference_dir: + use_visualdl: False + infer_img: doc/imgs_en/img_10.jpg + save_res_path: ./output/det_db/predicts_db.txt + +Architecture: + name: DistillationModel + algorithm: Distillation + Models: + Student: + pretrained: + freeze_params: false + return_all_feats: false + model_type: det + algorithm: DB + Backbone: + name: MobileNetV3 + scale: 0.5 + model_name: large + disable_se: True + Neck: + name: DBFPN + out_channels: 96 + Head: + name: DBHead + k: 50 + Student2: + pretrained: + freeze_params: false + return_all_feats: false + model_type: det + algorithm: DB + Transform: + Backbone: + name: MobileNetV3 + scale: 0.5 + model_name: large + disable_se: True + Neck: + name: DBFPN + out_channels: 96 + Head: + name: DBHead + k: 50 + Teacher: + model_type: det + algorithm: DB + Transform: + Backbone: + name: ResNet + layers: 18 + Neck: + name: DBFPN + out_channels: 256 + Head: + name: DBHead + k: 50 + +Loss: + name: CombinedLoss + loss_config_list: + - DistillationDilaDBLoss: + weight: 1.0 + model_name_list: ["Student", "Student2", "Teacher"] + key: maps + balance_loss: true + main_loss_type: DiceLoss + alpha: 5 + beta: 10 + ohem_ratio: 3 + - DistillationDMLLoss: + maps_name: ["thrink_maps"] + weight: 1.0 + act: "softmax" + model_name_pairs: ["Student", "Student2"] + key: maps + - DistillationDBLoss: + model_name_list: ["Student", "Teacher"] + key: maps + name: DBLoss + balance_loss: true + main_loss_type: DiceLoss + alpha: 5 + beta: 10 + ohem_ratio: 3 + + +Optimizer: + name: Adam + beta1: 0.9 + beta2: 0.999 + lr: + name: Cosine + learning_rate: 0.001 + warmup_epoch: 2 + regularizer: + name: 'L2' + factor: 0 + +PostProcess: + name: DistillationCTDBPostProcessCLabelDecode + model_name: ["Student", "Student2"] + key: head_out + thresh: 0.3 + box_thresh: 0.6 + max_candidates: 1000 + unclip_ratio: 1.5 + +Metric: + name: DistillationMetric + base_metric_name: DetMetric + main_indicator: hmean + key: "Student" + +Train: + dataset: + name: SimpleDataSet + data_dir: ./train_data/icdar2015/text_localization/ + label_file_list: + - ./train_data/icdar2015/text_localization/train_icdar2015_label.txt + ratio_list: [1.0] + transforms: + - DecodeImage: # load image + img_mode: BGR + channel_first: False + - DetLabelEncode: # Class handling label + - IaaAugment: + augmenter_args: + - { 'type': Fliplr, 'args': { 'p': 0.5 } } + - { 'type': Affine, 'args': { 'rotate': [-10, 10] } } + - { 'type': Resize, 'args': { 'size': [0.5, 3] } } + - EastRandomCropData: + size: [960, 960] + max_tries: 50 + keep_ratio: true + - MakeBorderMap: + shrink_ratio: 0.4 + thresh_min: 0.3 + thresh_max: 0.7 + - MakeShrinkMap: + shrink_ratio: 0.4 + min_text_size: 8 + - NormalizeImage: + scale: 1./255. + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: 'hwc' + - ToCHWImage: + - KeepKeys: + keep_keys: ['image', 'threshold_map', 'threshold_mask', 'shrink_map', 'shrink_mask'] # the order of the dataloader list + loader: + shuffle: True + drop_last: False + batch_size_per_card: 8 + num_workers: 4 + +Eval: + dataset: + name: SimpleDataSet + data_dir: ./train_data/icdar2015/text_localization/ + label_file_list: + - ./train_data/icdar2015/text_localization/test_icdar2015_label.txt + transforms: + - DecodeImage: # load image + img_mode: BGR + channel_first: False + - DetLabelEncode: # Class handling label + - DetResizeForTest: +# image_shape: [736, 1280] + - NormalizeImage: + scale: 1./255. + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: 'hwc' + - ToCHWImage: + - KeepKeys: + keep_keys: ['image', 'shape', 'polys', 'ignore_tags'] + loader: + shuffle: False + drop_last: False + batch_size_per_card: 1 # must be 1 + num_workers: 2 diff --git a/ppocr/losses/distillation_loss.py b/ppocr/losses/distillation_loss.py index b19f3f892d..421bbaba61 100644 --- a/ppocr/losses/distillation_loss.py +++ b/ppocr/losses/distillation_loss.py @@ -132,6 +132,96 @@ class DistillationCTCLoss(CTCLoss): return loss_dict +class DistillationDBLoss(DBLoss): + def __init__(self, + model_name_list=[], + balance_loss=True, + main_loss_type='DiceLoss', + alpha=5, + beta=10, + ohem_ratio=3, + eps=1e-6, + name="db_loss", + **kwargs): + super().__init__() + self.model_name_list = model_name_list + self.name = name + self.key = None + + def forward(self, preicts, batch): + loss_dict = {} + for idx, model_name in enumerate(self.model_name_list): + out = predicts[model_name] + if self.key is not None: + out = out[self.key] + loss = super().forward(out, batch) + + if isinstance(loss, dict): + for key in loss.keys(): + if key == "loss": + continue + name = "{}_{}_{}".format(self.name, model_name, key) + loss_dict[name] = loss[key] + else: + loss_dict["{}_{}".format(self.name, model_name)] = loss + + loss_dict = _sum_loss(loss_dict) + return loss_dict + + +class DistillationDilaDBLoss(DBLoss): + def __init__(self, + model_name_pairs=[], + balance_loss=True, + main_loss_type='DiceLoss', + alpha=5, + beta=10, + ohem_ratio=3, + eps=1e-6, + name="dila_dbloss"): + super().__init__() + self.model_name_pairs = model_name_pairs + self.name = name + + def forward(self, predicts, batch): + loss_dict = dict() + for idx, pair in enumerate(self.model_name_pairs): + stu_outs = predicts[pair[0]] + tch_outs = predicts[pair[1]] + if self.key is not None: + stu_preds = stu_outs[self.key] + tch_preds = tch_outs[self.key] + + stu_shrink_maps = stu_preds[:, 0, :, :] + stu_binary_maps = stu_preds[:, 2, :, :] + + # dilation to teacher prediction + dilation_w = np.array([[1, 1], [1, 1]]) + th_shrink_maps = tch_preds[:, 0, :, :] + th_shrink_maps = th_shrink_maps.numpy() > 0.3 # thresh = 0.3 + dilate_maps = np.zeros_like(th_shrink_maps).astype(np.float32) + for i in range(th_shrink_maps.shape[0]): + dilate_maps[i] = cv2.dilate( + th_shrink_maps[i, :, :].astype(np.uint8), dilation_w) + th_shrink_maps = paddle.to_tensor(dilate_maps) + + label_threshold_map, label_threshold_mask, label_shrink_map, label_shrink_mask = batch[ + 1:] + + # calculate the shrink map loss + bce_loss = self.alpha * self.bce_loss( + stu_shrink_maps, th_shrink_maps, label_shrink_mask) + loss_binary_maps = self.dice_loss(stu_binary_maps, th_shrink_maps, + label_shrink_mask) + + # k = f"{self.name}_{pair[0]}_{pair[1]}" + k = "{}_{}_{}".format(self.name, pair[0], pair[1]) + loss_dict[k] = bce_loss + loss_binary_maps + + loss_dict = _sum_loss(loss_dict) + return loss + + class DistillationDistanceLoss(DistanceLoss): """ """ diff --git a/ppocr/modeling/architectures/base_model.py b/ppocr/modeling/architectures/base_model.py index ff3da01a95..dbd18070b3 100644 --- a/ppocr/modeling/architectures/base_model.py +++ b/ppocr/modeling/architectures/base_model.py @@ -79,7 +79,7 @@ class BaseModel(nn.Layer): x = self.neck(x) y["neck_out"] = x x = self.head(x, targets=data) - if type(x) is dict: + if isinstance(x, dict): y.update(x) else: y["head_out"] = x From 9811a09e939d64ea76d05aba169d61937bfe0c32 Mon Sep 17 00:00:00 2001 From: LDOUBLEV Date: Tue, 6 Jul 2021 08:16:01 +0000 Subject: [PATCH 12/28] fix combined loss --- ppocr/losses/combined_loss.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/ppocr/losses/combined_loss.py b/ppocr/losses/combined_loss.py index 639265ede9..eb1ad9bf92 100644 --- a/ppocr/losses/combined_loss.py +++ b/ppocr/losses/combined_loss.py @@ -53,8 +53,9 @@ class CombinedLoss(nn.Layer): for key in loss: if key == "loss": loss_all += loss[key] * weight - # else: - # loss[f"{key}_{idx}"] = loss[key] + else: + loss["{}_{}".format(key, idx)] = loss[key] + # loss[f"{key}_{idx}"] = loss[key] loss_dict.update(loss) loss_dict["loss"] = loss_all return loss_dict From c93aba2d344fc6f061d57f95788ce220d0e3715e Mon Sep 17 00:00:00 2001 From: LDOUBLEV Date: Tue, 6 Jul 2021 16:33:33 +0800 Subject: [PATCH 13/28] fix slim export model error --- deploy/slim/quantization/export_model.py | 2 +- test/ocr_det_params.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/deploy/slim/quantization/export_model.py b/deploy/slim/quantization/export_model.py index 87d2f333c1..d94e53034a 100755 --- a/deploy/slim/quantization/export_model.py +++ b/deploy/slim/quantization/export_model.py @@ -101,7 +101,7 @@ def main(): quanter = QAT(config=quant_config) quanter.quantize(model) - init_model(config, model, logger) + init_model(config, model) model.eval() # build metric diff --git a/test/ocr_det_params.txt b/test/ocr_det_params.txt index da7e034bdd..bdfd4d4f47 100644 --- a/test/ocr_det_params.txt +++ b/test/ocr_det_params.txt @@ -17,7 +17,7 @@ distill_train:null eval:tools/eval.py -c configs/det/det_mv3_db.yml -o Global.save_inference_dir:./output/ -Global.checkpoints: +Global.pretrained_model: norm_export:tools/export_model.py -c configs/det/det_mv3_db.yml -o quant_export:deploy/slim/quantization/export_model.py -c configs/det/det_mv3_db.yml -o fpgm_export:deploy/slim/prune/export_prune_model.py From bf4e0f7a28c111ec069bc307822f7198d3c2566e Mon Sep 17 00:00:00 2001 From: Double_V Date: Tue, 6 Jul 2021 17:15:47 +0800 Subject: [PATCH 14/28] add .log --- test/test.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/test.sh b/test/test.sh index 601708cc98..1cbb36012b 100644 --- a/test/test.sh +++ b/test/test.sh @@ -101,7 +101,7 @@ function func_inference(){ for use_mkldnn in ${use_mkldnn_list[*]}; do for threads in ${cpu_threads_list[*]}; do for batch_size in ${batch_size_list[*]}; do - _save_log_path="${_log_path}/infer_cpu_usemkldnn_${use_mkldnn}_threads_${threads}_batchsize_${batch_size}" + _save_log_path="${_log_path}/infer_cpu_usemkldnn_${use_mkldnn}_threads_${threads}_batchsize_${batch_size}.log" command="${_python} ${_script} ${use_gpu_key}=${use_gpu} ${use_mkldnn_key}=${use_mkldnn} ${cpu_threads_key}=${threads} ${infer_model_key}=${_model_dir} ${batch_size_key}=${batch_size} ${image_dir_key}=${_img_dir} ${save_log_key}=${_save_log_path} --benchmark=True" eval $command status_check $? "${command}" "${status_log}" @@ -115,7 +115,7 @@ function func_inference(){ continue fi for batch_size in ${batch_size_list[*]}; do - _save_log_path="${_log_path}/infer_gpu_usetrt_${use_trt}_precision_${precision}_batchsize_${batch_size}" + _save_log_path="${_log_path}/infer_gpu_usetrt_${use_trt}_precision_${precision}_batchsize_${batch_size}.log" command="${_python} ${_script} ${use_gpu_key}=${use_gpu} ${use_trt_key}=${use_trt} ${precision_key}=${precision} ${infer_model_key}=${_model_dir} ${batch_size_key}=${batch_size} ${image_dir_key}=${_img_dir} ${save_log_key}=${_save_log_path} --benchmark=True" eval $command status_check $? "${command}" "${status_log}" From a91bbd743270394b31a612f4ff2982fbb8e92a70 Mon Sep 17 00:00:00 2001 From: LDOUBLEV Date: Tue, 6 Jul 2021 11:50:56 +0000 Subject: [PATCH 15/28] add import to combined_loss --- ppocr/losses/combined_loss.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ppocr/losses/combined_loss.py b/ppocr/losses/combined_loss.py index eb1ad9bf92..f37c4db128 100644 --- a/ppocr/losses/combined_loss.py +++ b/ppocr/losses/combined_loss.py @@ -17,7 +17,7 @@ import paddle.nn as nn from .distillation_loss import DistillationCTCLoss from .distillation_loss import DistillationDMLLoss -from .distillation_loss import DistillationDistanceLoss +from .distillation_loss import DistillationDistanceLoss, DistillationDBLoss, DistillationDilaDBLoss class CombinedLoss(nn.Layer): From 77b554fa2548b79d7c1a1cad2c9c9ab4dff6d709 Mon Sep 17 00:00:00 2001 From: MissPenguin Date: Tue, 6 Jul 2021 11:53:49 +0000 Subject: [PATCH 16/28] add doc for table training --- test1/table/README_ch.md | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/test1/table/README_ch.md b/test1/table/README_ch.md index 03f002f98b..5c3c9a285f 100644 --- a/test1/table/README_ch.md +++ b/test1/table/README_ch.md @@ -19,7 +19,29 @@ ### 2.1 训练 -TBD +#### 数据准备 +训练数据使用公开数据集[PubTabNet](https://arxiv.org/abs/1911.10683),可以从[官网](https://github.com/ibm-aur-nlp/PubTabNet)下载。PubTabNet数据集包含约50万张表格数据的图像,以及图像对应的html格式的注释。 + +#### 启动训练 +*如果您安装的是cpu版本,请将配置文件中的 `use_gpu` 字段修改为false* +```shell +# 单机单卡训练 +python3 tools/train.py -c configs/table/table_mv3.yml +# 单机多卡训练,通过 --gpus 参数设置使用的GPU ID +python3 -m paddle.distributed.launch --gpus '0,1,2,3' tools/train.py -c configs/table/table_mv3.yml +``` + +上述指令中,通过-c 选择训练使用configs/table/table_mv3.yml配置文件。有关配置文件的详细解释,请参考[链接](./config.md)。 + +#### 断点训练 + +如果训练程序中断,如果希望加载训练中断的模型从而恢复训练,可以通过指定Global.checkpoints指定要加载的模型路径: +```shell +python3 tools/train.py -c configs/table/table_mv3.yml -o Global.checkpoints=./your/trained/model +``` + +**注意**:`Global.checkpoints`的优先级高于`Global.pretrain_weights`的优先级,即同时指定两个参数时,优先加载`Global.checkpoints`指定的模型,如果`Global.checkpoints`指定的模型路径有误,会加载`Global.pretrain_weights`指定的模型。 + ### 2.2 评估 先cd到PaddleOCR/ppstructure目录下 From 185d1e1f929642e4bae576c3888b9500771a84fe Mon Sep 17 00:00:00 2001 From: LDOUBLEV Date: Wed, 7 Jul 2021 01:54:03 +0000 Subject: [PATCH 17/28] fix bug --- .../ch_det_lite_train_distill_v2.1.yml | 18 +++++--- ppocr/losses/distillation_loss.py | 9 +++- .../architectures/distillation_model.py | 4 +- ppocr/postprocess/__init__.py | 4 +- ppocr/postprocess/db_postprocess.py | 41 +++++++++++++++++++ ppocr/utils/save_load.py | 20 +++++++++ tools/program.py | 5 ++- 7 files changed, 89 insertions(+), 12 deletions(-) diff --git a/configs/det/ch_ppocr_v2.1/ch_det_lite_train_distill_v2.1.yml b/configs/det/ch_ppocr_v2.1/ch_det_lite_train_distill_v2.1.yml index 54ef12a1d7..b27eb2f9c9 100644 --- a/configs/det/ch_ppocr_v2.1/ch_det_lite_train_distill_v2.1.yml +++ b/configs/det/ch_ppocr_v2.1/ch_det_lite_train_distill_v2.1.yml @@ -20,7 +20,7 @@ Architecture: algorithm: Distillation Models: Student: - pretrained: + pretrained: ./pretrain_models/MobileNetV3_large_x0_5_pretrained freeze_params: false return_all_feats: false model_type: det @@ -37,7 +37,7 @@ Architecture: name: DBHead k: 50 Student2: - pretrained: + pretrained: ./pretrain_models/MobileNetV3_large_x0_5_pretrained freeze_params: false return_all_feats: false model_type: det @@ -55,6 +55,9 @@ Architecture: name: DBHead k: 50 Teacher: + pretrained: ./pretrain_models/ch_ppocr_server_v2.0_det_train/best_accuracy + freeze_params: true + return_all_feats: false model_type: det algorithm: DB Transform: @@ -73,7 +76,9 @@ Loss: loss_config_list: - DistillationDilaDBLoss: weight: 1.0 - model_name_list: ["Student", "Student2", "Teacher"] + model_name_pairs: + - ["Student", "Teacher"] + - ["Student2", "Teacher"] key: maps balance_loss: true main_loss_type: DiceLoss @@ -81,13 +86,16 @@ Loss: beta: 10 ohem_ratio: 3 - DistillationDMLLoss: + model_name_pairs: + - ["Student", "Student2"] maps_name: ["thrink_maps"] weight: 1.0 act: "softmax" model_name_pairs: ["Student", "Student2"] key: maps - DistillationDBLoss: - model_name_list: ["Student", "Teacher"] + weight: 1.0 + model_name_list: ["Student", "Student2"] key: maps name: DBLoss balance_loss: true @@ -110,7 +118,7 @@ Optimizer: factor: 0 PostProcess: - name: DistillationCTDBPostProcessCLabelDecode + name: DistillationDBPostProcess model_name: ["Student", "Student2"] key: head_out thresh: 0.3 diff --git a/ppocr/losses/distillation_loss.py b/ppocr/losses/distillation_loss.py index 421bbaba61..d4e4a8a2a4 100644 --- a/ppocr/losses/distillation_loss.py +++ b/ppocr/losses/distillation_loss.py @@ -14,6 +14,8 @@ import paddle import paddle.nn as nn +import numpy as np +import cv2 from .rec_ctc_loss import CTCLoss from .basic_loss import DMLLoss @@ -22,6 +24,7 @@ from .det_db_loss import DBLoss from .det_basic_loss import BalanceLoss, MaskL1Loss, DiceLoss + def _sum_loss(loss_dict): if "loss" in loss_dict.keys(): return loss_dict @@ -50,7 +53,7 @@ class DistillationDMLLoss(DMLLoss): self.key = key self.model_name_pairs = model_name_pairs self.name = name - self.maps_name = self.maps_name + self.maps_name = maps_name def _check_maps_name(self, maps_name): if maps_name is None: @@ -172,6 +175,7 @@ class DistillationDBLoss(DBLoss): class DistillationDilaDBLoss(DBLoss): def __init__(self, model_name_pairs=[], + key=None, balance_loss=True, main_loss_type='DiceLoss', alpha=5, @@ -182,6 +186,7 @@ class DistillationDilaDBLoss(DBLoss): super().__init__() self.model_name_pairs = model_name_pairs self.name = name + self.key = key def forward(self, predicts, batch): loss_dict = dict() @@ -219,7 +224,7 @@ class DistillationDilaDBLoss(DBLoss): loss_dict[k] = bce_loss + loss_binary_maps loss_dict = _sum_loss(loss_dict) - return loss + return loss_dict class DistillationDistanceLoss(DistanceLoss): diff --git a/ppocr/modeling/architectures/distillation_model.py b/ppocr/modeling/architectures/distillation_model.py index 2e512331af..1e95fe5744 100644 --- a/ppocr/modeling/architectures/distillation_model.py +++ b/ppocr/modeling/architectures/distillation_model.py @@ -21,7 +21,7 @@ from ppocr.modeling.backbones import build_backbone from ppocr.modeling.necks import build_neck from ppocr.modeling.heads import build_head from .base_model import BaseModel -from ppocr.utils.save_load import init_model +from ppocr.utils.save_load import init_model, load_pretrained_params __all__ = ['DistillationModel'] @@ -46,7 +46,7 @@ class DistillationModel(nn.Layer): pretrained = model_config.pop("pretrained") model = BaseModel(model_config) if pretrained is not None: - init_model(model, path=pretrained) + load_pretrained_params(model, pretrained) if freeze_params: for param in model.parameters(): param.trainable = False diff --git a/ppocr/postprocess/__init__.py b/ppocr/postprocess/__init__.py index 2f5bdc3b13..f2ac65c4f9 100644 --- a/ppocr/postprocess/__init__.py +++ b/ppocr/postprocess/__init__.py @@ -21,7 +21,7 @@ import copy __all__ = ['build_post_process'] -from .db_postprocess import DBPostProcess +from .db_postprocess import DBPostProcess, DistillationDBPostProcess from .east_postprocess import EASTPostProcess from .sast_postprocess import SASTPostProcess from .rec_postprocess import CTCLabelDecode, AttnLabelDecode, SRNLabelDecode, DistillationCTCLabelDecode, \ @@ -34,7 +34,7 @@ def build_post_process(config, global_config=None): support_dict = [ 'DBPostProcess', 'EASTPostProcess', 'SASTPostProcess', 'CTCLabelDecode', 'AttnLabelDecode', 'ClsPostProcess', 'SRNLabelDecode', 'PGPostProcess', - 'DistillationCTCLabelDecode', 'TableLabelDecode' + 'DistillationCTCLabelDecode', 'TableLabelDecode', 'DistillationDBPostProcess' ] config = copy.deepcopy(config) diff --git a/ppocr/postprocess/db_postprocess.py b/ppocr/postprocess/db_postprocess.py index 769ddbe232..4561b4642a 100755 --- a/ppocr/postprocess/db_postprocess.py +++ b/ppocr/postprocess/db_postprocess.py @@ -187,3 +187,44 @@ class DBPostProcess(object): boxes_batch.append({'points': boxes}) return boxes_batch + + +class DistillationDBPostProcess(DBPostProcess): + def __init__(self, + model_name=["student"], + key=None, + thresh=0.3, + box_thresh=0.7, + max_candidates=1000, + unclip_ratio=2.0, + use_dilation=False, + score_mode="fast", + **kwargs): + super(DistillationDBPostProcess, self).__init__(thresh, + box_thresh, + max_candidates, + unclip_ratio, + use_dilation, + score_mode) + if not isinstance(model_name, list): + model_name = [model_name] + self.model_name = model_name + + self.key = key + + def forward(self, predicts, shape_list): + results = {} + for name in self.model_name: + pred = predicts[name] + if self.key is not None: + pred = pred[self.key] + results[name] = super().__call__(pred, shape_list=label) + + return results + + + + + + + diff --git a/ppocr/utils/save_load.py b/ppocr/utils/save_load.py index 76420abb5a..732f9e2048 100644 --- a/ppocr/utils/save_load.py +++ b/ppocr/utils/save_load.py @@ -116,6 +116,26 @@ def load_dygraph_params(config, model, logger, optimizer): logger.info(f"loaded pretrained_model successful from {pm}") return {} +def load_pretrained_params(model, path): + if path is None: + return False + if not os.path.exists(path) and not os.path.exists(path + ".pdparams"): + print(f"The pretrained_model {path} does not exists!") + return False + + path = path if path.endswith('.pdparams') else path + '.pdparams' + params = paddle.load(path) + state_dict = model.state_dict() + new_state_dict = {} + for k1, k2 in zip(state_dict.keys(), params.keys()): + if list(state_dict[k1].shape) == list(params[k2].shape): + new_state_dict[k1] = params[k2] + else: + print( + f"The shape of model params {k1} {state_dict[k1].shape} not matched with loaded params {k2} {params[k2].shape} !" + ) + model.set_state_dict(new_state_dict) + return True def save_model(model, optimizer, diff --git a/tools/program.py b/tools/program.py index 2d99f2968a..595fe4cb96 100755 --- a/tools/program.py +++ b/tools/program.py @@ -186,7 +186,10 @@ def train(config, model.train() use_srn = config['Architecture']['algorithm'] == "SRN" - model_type = config['Architecture']['model_type'] + try: + model_type = config['Architecture']['model_type'] + except: + model_type = None if 'start_epoch' in best_model_dict: start_epoch = best_model_dict['start_epoch'] From 6ce44198196a8e967e637dfad785734fd231f3e2 Mon Sep 17 00:00:00 2001 From: LDOUBLEV Date: Wed, 7 Jul 2021 02:45:32 +0000 Subject: [PATCH 18/28] fix bug --- .../ch_det_lite_train_distill_v2.1.yml | 4 ++-- ppocr/losses/combined_loss.py | 4 ++-- ppocr/losses/distillation_loss.py | 22 +++++++++++++------ ppocr/postprocess/__init__.py | 3 ++- ppocr/postprocess/db_postprocess.py | 16 +++----------- 5 files changed, 24 insertions(+), 25 deletions(-) diff --git a/configs/det/ch_ppocr_v2.1/ch_det_lite_train_distill_v2.1.yml b/configs/det/ch_ppocr_v2.1/ch_det_lite_train_distill_v2.1.yml index b27eb2f9c9..5f0846fa1a 100644 --- a/configs/det/ch_ppocr_v2.1/ch_det_lite_train_distill_v2.1.yml +++ b/configs/det/ch_ppocr_v2.1/ch_det_lite_train_distill_v2.1.yml @@ -88,7 +88,7 @@ Loss: - DistillationDMLLoss: model_name_pairs: - ["Student", "Student2"] - maps_name: ["thrink_maps"] + maps_name: "thrink_maps" weight: 1.0 act: "softmax" model_name_pairs: ["Student", "Student2"] @@ -96,7 +96,7 @@ Loss: - DistillationDBLoss: weight: 1.0 model_name_list: ["Student", "Student2"] - key: maps + # key: maps name: DBLoss balance_loss: true main_loss_type: DiceLoss diff --git a/ppocr/losses/combined_loss.py b/ppocr/losses/combined_loss.py index f37c4db128..f10efa31e2 100644 --- a/ppocr/losses/combined_loss.py +++ b/ppocr/losses/combined_loss.py @@ -50,11 +50,11 @@ class CombinedLoss(nn.Layer): if isinstance(loss, paddle.Tensor): loss = {"loss_{}_{}".format(str(loss), idx): loss} weight = self.loss_weight[idx] - for key in loss: + for key in loss.keys(): if key == "loss": loss_all += loss[key] * weight else: - loss["{}_{}".format(key, idx)] = loss[key] + loss_dict["{}_{}".format(key, idx)] = loss[key] # loss[f"{key}_{idx}"] = loss[key] loss_dict.update(loss) loss_dict["loss"] = loss_all diff --git a/ppocr/losses/distillation_loss.py b/ppocr/losses/distillation_loss.py index d4e4a8a2a4..43356c6f6e 100644 --- a/ppocr/losses/distillation_loss.py +++ b/ppocr/losses/distillation_loss.py @@ -24,7 +24,6 @@ from .det_db_loss import DBLoss from .det_basic_loss import BalanceLoss, MaskL1Loss, DiceLoss - def _sum_loss(loss_dict): if "loss" in loss_dict.keys(): return loss_dict @@ -51,9 +50,17 @@ class DistillationDMLLoss(DMLLoss): super().__init__(act=act) assert isinstance(model_name_pairs, list) self.key = key - self.model_name_pairs = model_name_pairs + self.model_name_pairs = self._check_model_name_pairs(model_name_pairs) self.name = name self.maps_name = maps_name + + def _check_model_name_pairs(self, model_name_pairs): + if not isinstance(model_name_pairs, list): + return [] + elif isinstance(model_name_pairs[0], list) and isinstance(model_name_pairs[0][0], str): + return model_name_pairs + else: + return [model_name_pairs] def _check_maps_name(self, maps_name): if maps_name is None: @@ -69,13 +76,14 @@ class DistillationDMLLoss(DMLLoss): new_outs = {} for k in self.maps_name: if k == "thrink_maps": - new_outs[k] = paddle.slice(outs, axes=1, starts=0, ends=1) + new_outs[k] = paddle.slice(outs, axes=[1], starts=[0], ends=[1]) elif k == "threshold_maps": - new_outs[k] = paddle.slice(outs, axes=1, starts=1, ends=2) + new_outs[k] = paddle.slice(outs, axes=[1], starts=[1], ends=[2]) elif k == "binary_maps": - new_outs[k] = paddle.slice(outs, axes=1, starts=2, ends=3) + new_outs[k] = paddle.slice(outs, axes=[1], starts=[2], ends=[3]) else: continue + return new_outs def forward(self, predicts, batch): loss_dict = dict() @@ -104,7 +112,7 @@ class DistillationDMLLoss(DMLLoss): loss_dict["{}_{}_{}_{}_{}".format(key, pair[ 0], pair[1], map_name, idx)] = loss[key] else: - loss_dict["{}_{}_{}".format(self.name, map_name, + loss_dict["{}_{}_{}".format(self.name, self.maps_name, idx)] = loss loss_dict = _sum_loss(loss_dict) @@ -151,7 +159,7 @@ class DistillationDBLoss(DBLoss): self.name = name self.key = None - def forward(self, preicts, batch): + def forward(self, predicts, batch): loss_dict = {} for idx, model_name in enumerate(self.model_name_list): out = predicts[model_name] diff --git a/ppocr/postprocess/__init__.py b/ppocr/postprocess/__init__.py index f2ac65c4f9..654ddf39d2 100644 --- a/ppocr/postprocess/__init__.py +++ b/ppocr/postprocess/__init__.py @@ -34,7 +34,8 @@ def build_post_process(config, global_config=None): support_dict = [ 'DBPostProcess', 'EASTPostProcess', 'SASTPostProcess', 'CTCLabelDecode', 'AttnLabelDecode', 'ClsPostProcess', 'SRNLabelDecode', 'PGPostProcess', - 'DistillationCTCLabelDecode', 'TableLabelDecode', 'DistillationDBPostProcess' + 'DistillationCTCLabelDecode', 'TableLabelDecode', + 'DistillationDBPostProcess' ] config = copy.deepcopy(config) diff --git a/ppocr/postprocess/db_postprocess.py b/ppocr/postprocess/db_postprocess.py index 4561b4642a..f2b2fc69ef 100755 --- a/ppocr/postprocess/db_postprocess.py +++ b/ppocr/postprocess/db_postprocess.py @@ -200,12 +200,9 @@ class DistillationDBPostProcess(DBPostProcess): use_dilation=False, score_mode="fast", **kwargs): - super(DistillationDBPostProcess, self).__init__(thresh, - box_thresh, - max_candidates, - unclip_ratio, - use_dilation, - score_mode) + super(DistillationDBPostProcess, self).__init__( + thresh, box_thresh, max_candidates, unclip_ratio, use_dilation, + score_mode) if not isinstance(model_name, list): model_name = [model_name] self.model_name = model_name @@ -221,10 +218,3 @@ class DistillationDBPostProcess(DBPostProcess): results[name] = super().__call__(pred, shape_list=label) return results - - - - - - - From a7b32ca82b9b10ee960601ac695b8f5d1f252bc5 Mon Sep 17 00:00:00 2001 From: LDOUBLEV Date: Wed, 7 Jul 2021 02:48:55 +0000 Subject: [PATCH 19/28] add config --- .../ch_det_lite_train_cml_v2.1.yml | 202 ++++++++++++++++++ .../ch_det_lite_train_distill_v2.1.yml | 30 +-- .../ch_det_lite_train_dml_v2.1.yml | 176 +++++++++++++++ 3 files changed, 379 insertions(+), 29 deletions(-) create mode 100644 configs/det/ch_ppocr_v2.1/ch_det_lite_train_cml_v2.1.yml create mode 100644 configs/det/ch_ppocr_v2.1/ch_det_lite_train_dml_v2.1.yml diff --git a/configs/det/ch_ppocr_v2.1/ch_det_lite_train_cml_v2.1.yml b/configs/det/ch_ppocr_v2.1/ch_det_lite_train_cml_v2.1.yml new file mode 100644 index 0000000000..5f0846fa1a --- /dev/null +++ b/configs/det/ch_ppocr_v2.1/ch_det_lite_train_cml_v2.1.yml @@ -0,0 +1,202 @@ +Global: + use_gpu: true + epoch_num: 1200 + log_smooth_window: 20 + print_batch_step: 2 + save_model_dir: ./output/ch_db_mv3/ + save_epoch_step: 1200 + # evaluation is run every 5000 iterations after the 4000th iteration + eval_batch_step: [3000, 2000] + cal_metric_during_train: False + pretrained_model: ./pretrain_models/MobileNetV3_large_x0_5_pretrained + checkpoints: + save_inference_dir: + use_visualdl: False + infer_img: doc/imgs_en/img_10.jpg + save_res_path: ./output/det_db/predicts_db.txt + +Architecture: + name: DistillationModel + algorithm: Distillation + Models: + Student: + pretrained: ./pretrain_models/MobileNetV3_large_x0_5_pretrained + freeze_params: false + return_all_feats: false + model_type: det + algorithm: DB + Backbone: + name: MobileNetV3 + scale: 0.5 + model_name: large + disable_se: True + Neck: + name: DBFPN + out_channels: 96 + Head: + name: DBHead + k: 50 + Student2: + pretrained: ./pretrain_models/MobileNetV3_large_x0_5_pretrained + freeze_params: false + return_all_feats: false + model_type: det + algorithm: DB + Transform: + Backbone: + name: MobileNetV3 + scale: 0.5 + model_name: large + disable_se: True + Neck: + name: DBFPN + out_channels: 96 + Head: + name: DBHead + k: 50 + Teacher: + pretrained: ./pretrain_models/ch_ppocr_server_v2.0_det_train/best_accuracy + freeze_params: true + return_all_feats: false + model_type: det + algorithm: DB + Transform: + Backbone: + name: ResNet + layers: 18 + Neck: + name: DBFPN + out_channels: 256 + Head: + name: DBHead + k: 50 + +Loss: + name: CombinedLoss + loss_config_list: + - DistillationDilaDBLoss: + weight: 1.0 + model_name_pairs: + - ["Student", "Teacher"] + - ["Student2", "Teacher"] + key: maps + balance_loss: true + main_loss_type: DiceLoss + alpha: 5 + beta: 10 + ohem_ratio: 3 + - DistillationDMLLoss: + model_name_pairs: + - ["Student", "Student2"] + maps_name: "thrink_maps" + weight: 1.0 + act: "softmax" + model_name_pairs: ["Student", "Student2"] + key: maps + - DistillationDBLoss: + weight: 1.0 + model_name_list: ["Student", "Student2"] + # key: maps + name: DBLoss + balance_loss: true + main_loss_type: DiceLoss + alpha: 5 + beta: 10 + ohem_ratio: 3 + + +Optimizer: + name: Adam + beta1: 0.9 + beta2: 0.999 + lr: + name: Cosine + learning_rate: 0.001 + warmup_epoch: 2 + regularizer: + name: 'L2' + factor: 0 + +PostProcess: + name: DistillationDBPostProcess + model_name: ["Student", "Student2"] + key: head_out + thresh: 0.3 + box_thresh: 0.6 + max_candidates: 1000 + unclip_ratio: 1.5 + +Metric: + name: DistillationMetric + base_metric_name: DetMetric + main_indicator: hmean + key: "Student" + +Train: + dataset: + name: SimpleDataSet + data_dir: ./train_data/icdar2015/text_localization/ + label_file_list: + - ./train_data/icdar2015/text_localization/train_icdar2015_label.txt + ratio_list: [1.0] + transforms: + - DecodeImage: # load image + img_mode: BGR + channel_first: False + - DetLabelEncode: # Class handling label + - IaaAugment: + augmenter_args: + - { 'type': Fliplr, 'args': { 'p': 0.5 } } + - { 'type': Affine, 'args': { 'rotate': [-10, 10] } } + - { 'type': Resize, 'args': { 'size': [0.5, 3] } } + - EastRandomCropData: + size: [960, 960] + max_tries: 50 + keep_ratio: true + - MakeBorderMap: + shrink_ratio: 0.4 + thresh_min: 0.3 + thresh_max: 0.7 + - MakeShrinkMap: + shrink_ratio: 0.4 + min_text_size: 8 + - NormalizeImage: + scale: 1./255. + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: 'hwc' + - ToCHWImage: + - KeepKeys: + keep_keys: ['image', 'threshold_map', 'threshold_mask', 'shrink_map', 'shrink_mask'] # the order of the dataloader list + loader: + shuffle: True + drop_last: False + batch_size_per_card: 8 + num_workers: 4 + +Eval: + dataset: + name: SimpleDataSet + data_dir: ./train_data/icdar2015/text_localization/ + label_file_list: + - ./train_data/icdar2015/text_localization/test_icdar2015_label.txt + transforms: + - DecodeImage: # load image + img_mode: BGR + channel_first: False + - DetLabelEncode: # Class handling label + - DetResizeForTest: +# image_shape: [736, 1280] + - NormalizeImage: + scale: 1./255. + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: 'hwc' + - ToCHWImage: + - KeepKeys: + keep_keys: ['image', 'shape', 'polys', 'ignore_tags'] + loader: + shuffle: False + drop_last: False + batch_size_per_card: 1 # must be 1 + num_workers: 2 diff --git a/configs/det/ch_ppocr_v2.1/ch_det_lite_train_distill_v2.1.yml b/configs/det/ch_ppocr_v2.1/ch_det_lite_train_distill_v2.1.yml index 5f0846fa1a..1159d71bf9 100644 --- a/configs/det/ch_ppocr_v2.1/ch_det_lite_train_distill_v2.1.yml +++ b/configs/det/ch_ppocr_v2.1/ch_det_lite_train_distill_v2.1.yml @@ -36,24 +36,6 @@ Architecture: Head: name: DBHead k: 50 - Student2: - pretrained: ./pretrain_models/MobileNetV3_large_x0_5_pretrained - freeze_params: false - return_all_feats: false - model_type: det - algorithm: DB - Transform: - Backbone: - name: MobileNetV3 - scale: 0.5 - model_name: large - disable_se: True - Neck: - name: DBFPN - out_channels: 96 - Head: - name: DBHead - k: 50 Teacher: pretrained: ./pretrain_models/ch_ppocr_server_v2.0_det_train/best_accuracy freeze_params: true @@ -78,24 +60,15 @@ Loss: weight: 1.0 model_name_pairs: - ["Student", "Teacher"] - - ["Student2", "Teacher"] key: maps balance_loss: true main_loss_type: DiceLoss alpha: 5 beta: 10 ohem_ratio: 3 - - DistillationDMLLoss: - model_name_pairs: - - ["Student", "Student2"] - maps_name: "thrink_maps" - weight: 1.0 - act: "softmax" - model_name_pairs: ["Student", "Student2"] - key: maps - DistillationDBLoss: weight: 1.0 - model_name_list: ["Student", "Student2"] + model_name_list: ["Student", "Teacher"] # key: maps name: DBLoss balance_loss: true @@ -104,7 +77,6 @@ Loss: beta: 10 ohem_ratio: 3 - Optimizer: name: Adam beta1: 0.9 diff --git a/configs/det/ch_ppocr_v2.1/ch_det_lite_train_dml_v2.1.yml b/configs/det/ch_ppocr_v2.1/ch_det_lite_train_dml_v2.1.yml new file mode 100644 index 0000000000..7fe2d2e1a0 --- /dev/null +++ b/configs/det/ch_ppocr_v2.1/ch_det_lite_train_dml_v2.1.yml @@ -0,0 +1,176 @@ +Global: + use_gpu: true + epoch_num: 1200 + log_smooth_window: 20 + print_batch_step: 2 + save_model_dir: ./output/ch_db_mv3/ + save_epoch_step: 1200 + # evaluation is run every 5000 iterations after the 4000th iteration + eval_batch_step: [3000, 2000] + cal_metric_during_train: False + pretrained_model: ./pretrain_models/MobileNetV3_large_x0_5_pretrained + checkpoints: + save_inference_dir: + use_visualdl: False + infer_img: doc/imgs_en/img_10.jpg + save_res_path: ./output/det_db/predicts_db.txt + +Architecture: + name: DistillationModel + algorithm: Distillation + Models: + Student: + pretrained: ./pretrain_models/MobileNetV3_large_x0_5_pretrained + freeze_params: false + return_all_feats: false + model_type: det + algorithm: DB + Backbone: + name: MobileNetV3 + scale: 0.5 + model_name: large + disable_se: True + Neck: + name: DBFPN + out_channels: 96 + Head: + name: DBHead + k: 50 + Student2: + pretrained: ./pretrain_models/MobileNetV3_large_x0_5_pretrained + freeze_params: false + return_all_feats: false + model_type: det + algorithm: DB + Transform: + Backbone: + name: MobileNetV3 + scale: 0.5 + model_name: large + disable_se: True + Neck: + name: DBFPN + out_channels: 96 + Head: + name: DBHead + k: 50 + + +Loss: + name: CombinedLoss + loss_config_list: + - DistillationDMLLoss: + model_name_pairs: + - ["Student", "Student2"] + maps_name: "thrink_maps" + weight: 1.0 + act: "softmax" + model_name_pairs: ["Student", "Student2"] + key: maps + - DistillationDBLoss: + weight: 1.0 + model_name_list: ["Student", "Student2"] + # key: maps + name: DBLoss + balance_loss: true + main_loss_type: DiceLoss + alpha: 5 + beta: 10 + ohem_ratio: 3 + + +Optimizer: + name: Adam + beta1: 0.9 + beta2: 0.999 + lr: + name: Cosine + learning_rate: 0.001 + warmup_epoch: 2 + regularizer: + name: 'L2' + factor: 0 + +PostProcess: + name: DistillationDBPostProcess + model_name: ["Student", "Student2"] + key: head_out + thresh: 0.3 + box_thresh: 0.6 + max_candidates: 1000 + unclip_ratio: 1.5 + +Metric: + name: DistillationMetric + base_metric_name: DetMetric + main_indicator: hmean + key: "Student" + +Train: + dataset: + name: SimpleDataSet + data_dir: ./train_data/icdar2015/text_localization/ + label_file_list: + - ./train_data/icdar2015/text_localization/train_icdar2015_label.txt + ratio_list: [1.0] + transforms: + - DecodeImage: # load image + img_mode: BGR + channel_first: False + - DetLabelEncode: # Class handling label + - IaaAugment: + augmenter_args: + - { 'type': Fliplr, 'args': { 'p': 0.5 } } + - { 'type': Affine, 'args': { 'rotate': [-10, 10] } } + - { 'type': Resize, 'args': { 'size': [0.5, 3] } } + - EastRandomCropData: + size: [960, 960] + max_tries: 50 + keep_ratio: true + - MakeBorderMap: + shrink_ratio: 0.4 + thresh_min: 0.3 + thresh_max: 0.7 + - MakeShrinkMap: + shrink_ratio: 0.4 + min_text_size: 8 + - NormalizeImage: + scale: 1./255. + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: 'hwc' + - ToCHWImage: + - KeepKeys: + keep_keys: ['image', 'threshold_map', 'threshold_mask', 'shrink_map', 'shrink_mask'] # the order of the dataloader list + loader: + shuffle: True + drop_last: False + batch_size_per_card: 8 + num_workers: 4 + +Eval: + dataset: + name: SimpleDataSet + data_dir: ./train_data/icdar2015/text_localization/ + label_file_list: + - ./train_data/icdar2015/text_localization/test_icdar2015_label.txt + transforms: + - DecodeImage: # load image + img_mode: BGR + channel_first: False + - DetLabelEncode: # Class handling label + - DetResizeForTest: +# image_shape: [736, 1280] + - NormalizeImage: + scale: 1./255. + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: 'hwc' + - ToCHWImage: + - KeepKeys: + keep_keys: ['image', 'shape', 'polys', 'ignore_tags'] + loader: + shuffle: False + drop_last: False + batch_size_per_card: 1 # must be 1 + num_workers: 2 From 75e42644a31248492b9e342d288945a9dce2f564 Mon Sep 17 00:00:00 2001 From: LDOUBLEV Date: Wed, 7 Jul 2021 12:09:37 +0800 Subject: [PATCH 20/28] unset env --- test/test.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/test/test.sh b/test/test.sh index 1cbb36012b..f2ac3f8b29 100644 --- a/test/test.sh +++ b/test/test.sh @@ -219,6 +219,7 @@ for gpu in ${gpu_list[*]}; do eval $env save_infer_path="${save_log}" func_inference "${python}" "${inference_py}" "${save_infer_path}" "${LOG_PATH}" "${infer_img_dir}" + eval "unset CUDA_VISIBLE_DEVICES" done done done From 0742f5c5213e44a88c8286026586bb9475376542 Mon Sep 17 00:00:00 2001 From: LDOUBLEV Date: Wed, 7 Jul 2021 07:54:02 +0000 Subject: [PATCH 21/28] fix metric etc.al --- .../ch_det_lite_train_cml_v2.1.yml | 8 ++-- ppocr/losses/basic_loss.py | 37 ++++++++++++++++--- ppocr/losses/combined_loss.py | 2 - ppocr/losses/distillation_loss.py | 18 ++++----- ppocr/metrics/det_metric.py | 4 ++ ppocr/postprocess/db_postprocess.py | 9 ++--- ppocr/utils/save_load.py | 9 +++-- tools/eval.py | 6 ++- 8 files changed, 60 insertions(+), 33 deletions(-) diff --git a/configs/det/ch_ppocr_v2.1/ch_det_lite_train_cml_v2.1.yml b/configs/det/ch_ppocr_v2.1/ch_det_lite_train_cml_v2.1.yml index 5f0846fa1a..dcf0e1f25f 100644 --- a/configs/det/ch_ppocr_v2.1/ch_det_lite_train_cml_v2.1.yml +++ b/configs/det/ch_ppocr_v2.1/ch_det_lite_train_cml_v2.1.yml @@ -90,14 +90,14 @@ Loss: - ["Student", "Student2"] maps_name: "thrink_maps" weight: 1.0 - act: "softmax" + # act: None model_name_pairs: ["Student", "Student2"] key: maps - DistillationDBLoss: weight: 1.0 model_name_list: ["Student", "Student2"] # key: maps - name: DBLoss + # name: DBLoss balance_loss: true main_loss_type: DiceLoss alpha: 5 @@ -119,8 +119,8 @@ Optimizer: PostProcess: name: DistillationDBPostProcess - model_name: ["Student", "Student2"] - key: head_out + model_name: ["Student", "Student2", "Teacher"] + # key: maps thresh: 0.3 box_thresh: 0.6 max_candidates: 1000 diff --git a/ppocr/losses/basic_loss.py b/ppocr/losses/basic_loss.py index fa3ceda1b7..8306523ac1 100644 --- a/ppocr/losses/basic_loss.py +++ b/ppocr/losses/basic_loss.py @@ -54,6 +54,27 @@ class CELoss(nn.Layer): return loss +class KLJSLoss(object): + def __init__(self, mode='kl'): + assert mode in ['kl', 'js', 'KL', 'JS'], "mode can only be one of ['kl', 'js', 'KL', 'JS']" + self.mode = mode + + def __call__(self, p1, p2, reduction="mean"): + + loss = paddle.multiply(p2, paddle.log( (p2+1e-5)/(p1+1e-5) + 1e-5)) + + if self.mode.lower() == "js": + loss += paddle.multiply(p1, paddle.log((p1+1e-5)/(p2+1e-5) + 1e-5)) + loss *= 0.5 + if reduction == "mean": + loss = paddle.mean(loss, axis=[1,2]) + elif reduction=="none" or reduction is None: + return loss + else: + loss = paddle.sum(loss, axis=[1,2]) + + return loss + class DMLLoss(nn.Layer): """ DMLLoss @@ -69,17 +90,21 @@ class DMLLoss(nn.Layer): self.act = nn.Sigmoid() else: self.act = None + + self.jskl_loss = KLJSLoss(mode="js") def forward(self, out1, out2): if self.act is not None: out1 = self.act(out1) out2 = self.act(out2) - - log_out1 = paddle.log(out1) - log_out2 = paddle.log(out2) - loss = (F.kl_div( - log_out1, out2, reduction='batchmean') + F.kl_div( - log_out2, out1, reduction='batchmean')) / 2.0 + if len(out1.shape) < 2: + log_out1 = paddle.log(out1) + log_out2 = paddle.log(out2) + loss = (F.kl_div( + log_out1, out2, reduction='batchmean') + F.kl_div( + log_out2, out1, reduction='batchmean')) / 2.0 + else: + loss = self.jskl_loss(out1, out2) return loss diff --git a/ppocr/losses/combined_loss.py b/ppocr/losses/combined_loss.py index f10efa31e2..0d6fe968d0 100644 --- a/ppocr/losses/combined_loss.py +++ b/ppocr/losses/combined_loss.py @@ -55,7 +55,5 @@ class CombinedLoss(nn.Layer): loss_all += loss[key] * weight else: loss_dict["{}_{}".format(key, idx)] = loss[key] - # loss[f"{key}_{idx}"] = loss[key] - loss_dict.update(loss) loss_dict["loss"] = loss_all return loss_dict diff --git a/ppocr/losses/distillation_loss.py b/ppocr/losses/distillation_loss.py index 43356c6f6e..75f0a77315 100644 --- a/ppocr/losses/distillation_loss.py +++ b/ppocr/losses/distillation_loss.py @@ -46,13 +46,13 @@ class DistillationDMLLoss(DMLLoss): act=None, key=None, maps_name=None, - name="loss_dml"): + name="dml"): super().__init__(act=act) assert isinstance(model_name_pairs, list) self.key = key self.model_name_pairs = self._check_model_name_pairs(model_name_pairs) self.name = name - self.maps_name = maps_name + self.maps_name = self._check_maps_name(maps_name) def _check_model_name_pairs(self, model_name_pairs): if not isinstance(model_name_pairs, list): @@ -76,11 +76,11 @@ class DistillationDMLLoss(DMLLoss): new_outs = {} for k in self.maps_name: if k == "thrink_maps": - new_outs[k] = paddle.slice(outs, axes=[1], starts=[0], ends=[1]) + new_outs[k] = outs[:, 0, :, :] elif k == "threshold_maps": - new_outs[k] = paddle.slice(outs, axes=[1], starts=[1], ends=[2]) + new_outs[k] = outs[:, 1, :, :] elif k == "binary_maps": - new_outs[k] = paddle.slice(outs, axes=[1], starts=[2], ends=[3]) + new_outs[k] = outs[:, 2, :, :] else: continue return new_outs @@ -105,16 +105,16 @@ class DistillationDMLLoss(DMLLoss): else: outs1 = self._slice_out(out1) outs2 = self._slice_out(out2) - for k in outs1.keys(): + for _c, k in enumerate(outs1.keys()): loss = super().forward(outs1[k], outs2[k]) if isinstance(loss, dict): for key in loss: loss_dict["{}_{}_{}_{}_{}".format(key, pair[ 0], pair[1], map_name, idx)] = loss[key] else: - loss_dict["{}_{}_{}".format(self.name, self.maps_name, + loss_dict["{}_{}_{}".format(self.name, self.maps_name[_c], idx)] = loss - + loss_dict = _sum_loss(loss_dict) return loss_dict @@ -152,7 +152,7 @@ class DistillationDBLoss(DBLoss): beta=10, ohem_ratio=3, eps=1e-6, - name="db_loss", + name="db", **kwargs): super().__init__() self.model_name_list = model_name_list diff --git a/ppocr/metrics/det_metric.py b/ppocr/metrics/det_metric.py index 0f9e94df42..811ee2fad5 100644 --- a/ppocr/metrics/det_metric.py +++ b/ppocr/metrics/det_metric.py @@ -55,6 +55,10 @@ class DetMetric(object): result = self.evaluator.evaluate_image(gt_info_list, det_info_list) self.results.append(result) + metircs = self.evaluator.combine_results(self.results) + self.reset() + return metircs + def get_metric(self): """ return metrics { diff --git a/ppocr/postprocess/db_postprocess.py b/ppocr/postprocess/db_postprocess.py index f2b2fc69ef..e318c5254b 100755 --- a/ppocr/postprocess/db_postprocess.py +++ b/ppocr/postprocess/db_postprocess.py @@ -200,21 +200,18 @@ class DistillationDBPostProcess(DBPostProcess): use_dilation=False, score_mode="fast", **kwargs): - super(DistillationDBPostProcess, self).__init__( - thresh, box_thresh, max_candidates, unclip_ratio, use_dilation, - score_mode) + super().__init__() if not isinstance(model_name, list): model_name = [model_name] self.model_name = model_name - self.key = key - def forward(self, predicts, shape_list): + def __call__(self, predicts, shape_list): results = {} for name in self.model_name: pred = predicts[name] if self.key is not None: pred = pred[self.key] - results[name] = super().__call__(pred, shape_list=label) + results[name] = super().__call__(pred, shape_list=shape_list) return results diff --git a/ppocr/utils/save_load.py b/ppocr/utils/save_load.py index 732f9e2048..4ee4b29f43 100644 --- a/ppocr/utils/save_load.py +++ b/ppocr/utils/save_load.py @@ -130,11 +130,12 @@ def load_pretrained_params(model, path): for k1, k2 in zip(state_dict.keys(), params.keys()): if list(state_dict[k1].shape) == list(params[k2].shape): new_state_dict[k1] = params[k2] - else: - print( - f"The shape of model params {k1} {state_dict[k1].shape} not matched with loaded params {k2} {params[k2].shape} !" - ) + else: + print( + f"The shape of model params {k1} {state_dict[k1].shape} not matched with loaded params {k2} {params[k2].shape} !" + ) model.set_state_dict(new_state_dict) + print(f"load pretrain successful from {path}") return True def save_model(model, diff --git a/tools/eval.py b/tools/eval.py index c1315805b5..022498bbef 100755 --- a/tools/eval.py +++ b/tools/eval.py @@ -55,8 +55,10 @@ def main(): model = build_model(config['Architecture']) use_srn = config['Architecture']['algorithm'] == "SRN" - model_type = config['Architecture']['model_type'] - + if "model_type" in config['Architecture'].keys(): + model_type = config['Architecture']['model_type'] + else: + model_type = None best_model_dict = init_model(config, model) if len(best_model_dict): logger.info('metric in ckpt ***************') From 4066478ea4a297312570c078de41191b5982ca6b Mon Sep 17 00:00:00 2001 From: Double_V Date: Thu, 8 Jul 2021 11:56:52 +0800 Subject: [PATCH 22/28] fix precision --- tools/infer/utility.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/infer/utility.py b/tools/infer/utility.py index e464722f09..bf05a0dbaf 100755 --- a/tools/infer/utility.py +++ b/tools/infer/utility.py @@ -164,7 +164,7 @@ def create_predictor(args, mode, logger): config.enable_use_gpu(args.gpu_mem, 0) if args.use_tensorrt: config.enable_tensorrt_engine( - precision_mode=inference.PrecisionType.Float32, + precision_mode=precision, max_batch_size=args.max_batch_size, min_subgraph_size=args.min_subgraph_size) # skip the minmum trt subgraph From b8a65d4333cead891b6320b6f627f7aeb4fb155c Mon Sep 17 00:00:00 2001 From: LDOUBLEV Date: Thu, 8 Jul 2021 14:32:44 +0000 Subject: [PATCH 23/28] fix eval bug --- ppocr/metrics/det_metric.py | 6 ++--- ppocr/metrics/distillation_metric.py | 11 +++----- .../architectures/distillation_model.py | 2 +- ppocr/postprocess/db_postprocess.py | 26 +++++++++---------- ppocr/utils/save_load.py | 2 +- tools/eval.py | 7 ++--- tools/program.py | 1 + tools/train.py | 4 +-- 8 files changed, 28 insertions(+), 31 deletions(-) diff --git a/ppocr/metrics/det_metric.py b/ppocr/metrics/det_metric.py index 811ee2fad5..e68cb3905b 100644 --- a/ppocr/metrics/det_metric.py +++ b/ppocr/metrics/det_metric.py @@ -55,9 +55,9 @@ class DetMetric(object): result = self.evaluator.evaluate_image(gt_info_list, det_info_list) self.results.append(result) - metircs = self.evaluator.combine_results(self.results) - self.reset() - return metircs + # metircs = self.evaluator.combine_results(self.results) + # self.reset() + # return metircs def get_metric(self): """ diff --git a/ppocr/metrics/distillation_metric.py b/ppocr/metrics/distillation_metric.py index a7d3d095a7..c440cebdd0 100644 --- a/ppocr/metrics/distillation_metric.py +++ b/ppocr/metrics/distillation_metric.py @@ -24,8 +24,8 @@ from .cls_metric import ClsMetric class DistillationMetric(object): def __init__(self, key=None, - base_metric_name="RecMetric", - main_indicator='acc', + base_metric_name=None, + main_indicator=None, **kwargs): self.main_indicator = main_indicator self.key = key @@ -42,16 +42,13 @@ class DistillationMetric(object): main_indicator=self.main_indicator, **self.kwargs) self.metrics[key].reset() - def __call__(self, preds, *args, **kwargs): + def __call__(self, preds, batch, **kwargs): assert isinstance(preds, dict) if self.metrics is None: self._init_metrcis(preds) output = dict() for key in preds: - metric = self.metrics[key].__call__(preds[key], *args, **kwargs) - for sub_key in metric: - output["{}_{}".format(key, sub_key)] = metric[sub_key] - return output + self.metrics[key].__call__(preds[key], batch, **kwargs) def get_metric(self): """ diff --git a/ppocr/modeling/architectures/distillation_model.py b/ppocr/modeling/architectures/distillation_model.py index 1e95fe5744..2b1d3aae3b 100644 --- a/ppocr/modeling/architectures/distillation_model.py +++ b/ppocr/modeling/architectures/distillation_model.py @@ -46,7 +46,7 @@ class DistillationModel(nn.Layer): pretrained = model_config.pop("pretrained") model = BaseModel(model_config) if pretrained is not None: - load_pretrained_params(model, pretrained) + model = load_pretrained_params(model, pretrained) if freeze_params: for param in model.parameters(): param.trainable = False diff --git a/ppocr/postprocess/db_postprocess.py b/ppocr/postprocess/db_postprocess.py index e318c5254b..d9c9869dfc 100755 --- a/ppocr/postprocess/db_postprocess.py +++ b/ppocr/postprocess/db_postprocess.py @@ -189,29 +189,27 @@ class DBPostProcess(object): return boxes_batch -class DistillationDBPostProcess(DBPostProcess): - def __init__(self, - model_name=["student"], +class DistillationDBPostProcess(object): + def __init__(self, model_name=["student"], key=None, thresh=0.3, - box_thresh=0.7, + box_thresh=0.6, max_candidates=1000, - unclip_ratio=2.0, + unclip_ratio=1.5, use_dilation=False, score_mode="fast", **kwargs): - super().__init__() - if not isinstance(model_name, list): - model_name = [model_name] self.model_name = model_name self.key = key + self.post_process = DBPostProcess(thresh=thresh, + box_thresh=box_thresh, + max_candidates=max_candidates, + unclip_ratio=unclip_ratio, + use_dilation=use_dilation, + score_mode=score_mode) def __call__(self, predicts, shape_list): results = {} - for name in self.model_name: - pred = predicts[name] - if self.key is not None: - pred = pred[self.key] - results[name] = super().__call__(pred, shape_list=shape_list) - + for k in self.model_name: + results[k] = self.post_process(predicts[k], shape_list=shape_list) return results diff --git a/ppocr/utils/save_load.py b/ppocr/utils/save_load.py index 4ee4b29f43..b3724c2ddd 100644 --- a/ppocr/utils/save_load.py +++ b/ppocr/utils/save_load.py @@ -136,7 +136,7 @@ def load_pretrained_params(model, path): ) model.set_state_dict(new_state_dict) print(f"load pretrain successful from {path}") - return True + return model def save_model(model, optimizer, diff --git a/tools/eval.py b/tools/eval.py index 022498bbef..c99c7d474d 100755 --- a/tools/eval.py +++ b/tools/eval.py @@ -27,7 +27,7 @@ from ppocr.data import build_dataloader from ppocr.modeling.architectures import build_model from ppocr.postprocess import build_post_process from ppocr.metrics import build_metric -from ppocr.utils.save_load import init_model +from ppocr.utils.save_load import init_model, load_pretrained_params from ppocr.utils.utility import print_dict import tools.program as program @@ -59,7 +59,8 @@ def main(): model_type = config['Architecture']['model_type'] else: model_type = None - best_model_dict = init_model(config, model) + + best_model_dict = init_model(config, model, model_type) if len(best_model_dict): logger.info('metric in ckpt ***************') for k, v in best_model_dict.items(): @@ -70,7 +71,7 @@ def main(): # start eval metric = program.eval(model, valid_dataloader, post_process_class, - eval_class, model_type, use_srn) + eval_class, model_type, use_srn) logger.info('metric eval ***************') for k, v in metric.items(): logger.info('{}:{}'.format(k, v)) diff --git a/tools/program.py b/tools/program.py index 595fe4cb96..4c12bc09cc 100755 --- a/tools/program.py +++ b/tools/program.py @@ -374,6 +374,7 @@ def eval(model, eval_class(preds, batch) else: post_result = post_process_class(preds, batch[1]) + # post_result = post_result_["Student"] eval_class(post_result, batch) pbar.update(1) total_frame += len(images) diff --git a/tools/train.py b/tools/train.py index 20f5a670d5..2091ff48b4 100755 --- a/tools/train.py +++ b/tools/train.py @@ -97,8 +97,8 @@ def main(config, device, logger, vdl_writer): # build metric eval_class = build_metric(config['Metric']) # load pretrain model - pre_best_model_dict = load_dygraph_params(config, model, logger, optimizer) - + #pre_best_model_dict = load_dygraph_params(config, model, logger, optimizer) + pre_best_model_dict = {} logger.info('train dataloader has {} iters'.format(len(train_dataloader))) if valid_dataloader is not None: logger.info('valid dataloader has {} iters'.format( From 9de346b2347fcc67c3219119be5980824878835b Mon Sep 17 00:00:00 2001 From: Double_V Date: Fri, 9 Jul 2021 09:27:07 +0800 Subject: [PATCH 24/28] Update train.py --- tools/train.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tools/train.py b/tools/train.py index 2091ff48b4..05d295aa99 100755 --- a/tools/train.py +++ b/tools/train.py @@ -97,8 +97,7 @@ def main(config, device, logger, vdl_writer): # build metric eval_class = build_metric(config['Metric']) # load pretrain model - #pre_best_model_dict = load_dygraph_params(config, model, logger, optimizer) - pre_best_model_dict = {} + pre_best_model_dict = load_dygraph_params(config, model, logger, optimizer) logger.info('train dataloader has {} iters'.format(len(train_dataloader))) if valid_dataloader is not None: logger.info('valid dataloader has {} iters'.format( From 11151e51d20b351bbaf35c4137c46bda229e88fa Mon Sep 17 00:00:00 2001 From: Double_V Date: Fri, 9 Jul 2021 09:27:35 +0800 Subject: [PATCH 25/28] Update program.py --- tools/program.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tools/program.py b/tools/program.py index 4c12bc09cc..595fe4cb96 100755 --- a/tools/program.py +++ b/tools/program.py @@ -374,7 +374,6 @@ def eval(model, eval_class(preds, batch) else: post_result = post_process_class(preds, batch[1]) - # post_result = post_result_["Student"] eval_class(post_result, batch) pbar.update(1) total_frame += len(images) From d2fce0a87e881af810ceb9fd8c8008c2aabda6b0 Mon Sep 17 00:00:00 2001 From: Double_V Date: Fri, 9 Jul 2021 09:28:15 +0800 Subject: [PATCH 26/28] Update det_metric.py --- ppocr/metrics/det_metric.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/ppocr/metrics/det_metric.py b/ppocr/metrics/det_metric.py index e68cb3905b..d3d3530425 100644 --- a/ppocr/metrics/det_metric.py +++ b/ppocr/metrics/det_metric.py @@ -55,9 +55,6 @@ class DetMetric(object): result = self.evaluator.evaluate_image(gt_info_list, det_info_list) self.results.append(result) - # metircs = self.evaluator.combine_results(self.results) - # self.reset() - # return metircs def get_metric(self): """ From 54ed8d062633c355dd212e9e2b8833a8c2c9bf00 Mon Sep 17 00:00:00 2001 From: Double_V Date: Fri, 9 Jul 2021 11:38:01 +0800 Subject: [PATCH 27/28] fix ci --- tools/eval.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/tools/eval.py b/tools/eval.py index c99c7d474d..8ae270b355 100755 --- a/tools/eval.py +++ b/tools/eval.py @@ -55,12 +55,8 @@ def main(): model = build_model(config['Architecture']) use_srn = config['Architecture']['algorithm'] == "SRN" - if "model_type" in config['Architecture'].keys(): - model_type = config['Architecture']['model_type'] - else: - model_type = None - best_model_dict = init_model(config, model, model_type) + best_model_dict = init_model(config, model) if len(best_model_dict): logger.info('metric in ckpt ***************') for k, v in best_model_dict.items(): From 76bb40fc3f4f585131e72b5eb598e8b84749dac8 Mon Sep 17 00:00:00 2001 From: Double_V Date: Fri, 9 Jul 2021 14:29:39 +0800 Subject: [PATCH 28/28] fix ci --- tools/eval.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/eval.py b/tools/eval.py index 8ae270b355..0120baab0f 100755 --- a/tools/eval.py +++ b/tools/eval.py @@ -55,6 +55,10 @@ def main(): model = build_model(config['Architecture']) use_srn = config['Architecture']['algorithm'] == "SRN" + if "model_type" in config['Architecture'].keys(): + model_type = config['Architecture']['model_type'] + else: + model_type = None best_model_dict = init_model(config, model) if len(best_model_dict):