Merge remote-tracking branch 'origin/dygraph' into dygraph

2021-10-29 16:19:15 +08:00 · 2021-10-29 16:19:15 +08:00 · 84ce34bd3f
parent f470ede810 529133fb3f
commit 84ce34bd3f
49 changed files with 1047 additions and 177 deletions
--- a/benchmark/readme.md
+++ b/benchmark/readme.md
@ -1,5 +1,5 @@

-# PaddleOCR DB/EAST 算法训练benchmark测试
+# PaddleOCR DB/EAST/PSE 算法训练benchmark测试

 PaddleOCR/benchmark目录下的文件用于获取并分析训练日志。
 训练采用icdar2015数据集，包括1000张训练图像和500张测试图像。模型配置采用resnet18_vd作为backbone，分别训练batch_size=8和batch_size=16的情况。
@ -18,7 +18,7 @@ run_det.sh 执行方式如下:

 ```
 # cd PaddleOCR/
-bash benchmark/run_det.sh 
+bash benchmark/run_det.sh
 ```

 以DB为例，将得到四个日志文件，如下：
@ -28,7 +28,3 @@ det_res18_db_v2.0_sp_bs8_fp32_1
 det_res18_db_v2.0_mp_bs16_fp32_1
 det_res18_db_v2.0_mp_bs8_fp32_1
 ```
-
-
-
-
--- a/benchmark/run_benchmark_det.sh
+++ b/benchmark/run_benchmark_det.sh
@ -6,7 +6,7 @@ function _set_params(){
    run_mode=${1:-"sp"}          # 单卡sp|多卡mp
    batch_size=${2:-"64"}
    fp_item=${3:-"fp32"}        # fp32|fp16
-    max_iter=${4:-"500"}       # 可选，如果需要修改代码提前中断
+    max_iter=${4:-"10"}       # 可选，如果需要修改代码提前中断
    model_name=${5:-"model_name"}
    run_log_path=${TRAIN_LOG_DIR:-$(pwd)}  # TRAIN_LOG_DIR 后续QA设置该参数

@ -20,7 +20,7 @@ function _train(){
    echo "Train on ${num_gpu_devices} GPUs"
    echo "current CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES, gpus=$num_gpu_devices, batch_size=$batch_size"

-    train_cmd="-c configs/det/${model_name}.yml -o Train.loader.batch_size_per_card=${batch_size} Global.epoch_num=${max_iter} "   
+    train_cmd="-c configs/det/${model_name}.yml -o Train.loader.batch_size_per_card=${batch_size} Global.epoch_num=${max_iter} Global.eval_batch_step=[0,20000] Global.print_batch_step=2"   
    case ${run_mode} in
      sp) 
        train_cmd="python3.7 tools/train.py "${train_cmd}""
@ -39,18 +39,24 @@ function _train(){
        echo -e "${model_name}, SUCCESS"
        export job_fail_flag=0
    fi
-    kill -9 `ps -ef|grep 'python3.7'|awk '{print $2}'`

    if [ $run_mode = "mp" -a -d mylog ]; then
        rm ${log_file}
        cp mylog/workerlog.0 ${log_file}
    fi
+}

-    # run log analysis
-    analysis_cmd="python3.7 benchmark/analysis.py --filename ${log_file}  --mission_name ${model_name} --run_mode ${mode} --direction_id 0 --keyword 'ips:' --base_batch_size ${batch_szie} --skip_steps 1 --gpu_num ${num_gpu_devices}  --index 1  --model_mode=-1  --ips_unit=samples/sec"
+function _analysis_log(){
+    analysis_cmd="python3.7 benchmark/analysis.py --filename ${log_file}  --mission_name ${model_name} --run_mode ${run_mode} --direction_id 0 --keyword 'ips:' --base_batch_size ${batch_size} --skip_steps 1 --gpu_num ${num_gpu_devices}  --index 1  --model_mode=-1  --ips_unit=samples/sec"
    eval $analysis_cmd
 }

+function _kill_process(){
+    kill -9 `ps -ef|grep 'python3.7'|awk '{print $2}'`
+}
+
+
 _set_params $@
 _train
-
+_analysis_log
+_kill_process
--- a/benchmark/run_det.sh
+++ b/benchmark/run_det.sh
@ -3,11 +3,11 @@
 # 1 安装该模型需要的依赖 (如需开启优化策略请注明)
 python3.7 -m pip install -r requirements.txt
 # 2 拷贝该模型需要数据、预训练模型
-wget -c  -p ./tain_data/  https://paddleocr.bj.bcebos.com/dygraph_v2.0/test/icdar2015.tar && cd train_data  && tar xf icdar2015.tar && cd ../
-wget -c -p ./pretrain_models/ https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNet50_vd_pretrained.pdparams
+wget -P ./train_data/  https://paddleocr.bj.bcebos.com/dygraph_v2.0/test/icdar2015.tar && cd train_data  && tar xf icdar2015.tar && cd ../
+wget -P ./pretrain_models/ https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNet50_vd_pretrained.pdparams
 # 3 批量运行（如不方便批量，1，2需放到单个模型中）

-model_mode_list=(det_res18_db_v2.0 det_r50_vd_east)
+model_mode_list=(det_res18_db_v2.0 det_r50_vd_east det_r50_vd_pse)
 fp_item_list=(fp32)
 bs_list=(8 16)
 for model_mode in ${model_mode_list[@]}; do
@ -15,11 +15,11 @@ for model_mode in ${model_mode_list[@]}; do
          for bs_item in ${bs_list[@]}; do
            echo "index is speed, 1gpus, begin, ${model_name}"
            run_mode=sp
-            CUDA_VISIBLE_DEVICES=0 bash benchmark/run_benchmark_det.sh ${run_mode} ${bs_item} ${fp_item} 10 ${model_mode}     #  (5min)
+            CUDA_VISIBLE_DEVICES=0 bash benchmark/run_benchmark_det.sh ${run_mode} ${bs_item} ${fp_item} 2 ${model_mode}     #  (5min)
            sleep 60
            echo "index is speed, 8gpus, run_mode is multi_process, begin, ${model_name}"
            run_mode=mp
-            CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 bash benchmark/run_benchmark_det.sh ${run_mode} ${bs_item} ${fp_item} 10 ${model_mode} 
+            CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 bash benchmark/run_benchmark_det.sh ${run_mode} ${bs_item} ${fp_item} 2 ${model_mode} 
            sleep 60
            done
      done
--- a/configs/rec/rec_mtb_nrtr.yml
+++ b/configs/rec/rec_mtb_nrtr.yml
@ -17,7 +17,7 @@ Global:
  character_dict_path: ppocr/utils/EN_symbol_dict.txt
  max_text_length: 25
  infer_mode: False
-  use_space_char: True
+  use_space_char: False
  save_res_path: ./output/rec/predicts_nrtr.txt

 Optimizer:
--- a/deploy/lite/ocr_db_crnn.cc
+++ b/deploy/lite/ocr_db_crnn.cc
@ -12,12 +12,14 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-#include "paddle_api.h" // NOLINT
 #include <chrono>
+#include "paddle_api.h" // NOLINT
+#include "paddle_place.h"

 #include "cls_process.h"
 #include "crnn_process.h"
 #include "db_post_process.h"
+#include "AutoLog/auto_log/lite_autolog.h"

 using namespace paddle::lite_api; // NOLINT
 using namespace std;
@ -27,7 +29,7 @@ void NeonMeanScale(const float *din, float *dout, int size,
                   const std::vector<float> mean,
                   const std::vector<float> scale) {
  if (mean.size() != 3 || scale.size() != 3) {
-    std::cerr << "[ERROR] mean or scale size must equal to 3\n";
+    std::cerr << "[ERROR] mean or scale size must equal to 3" << std::endl;
    exit(1);
  }
  float32x4_t vmean0 = vdupq_n_f32(mean[0]);
@ -159,7 +161,8 @@ void RunRecModel(std::vector<std::vector<std::vector<int>>> boxes, cv::Mat img,
                 std::vector<float> &rec_text_score,
                 std::vector<std::string> charactor_dict,
                 std::shared_ptr<PaddlePredictor> predictor_cls,
-                 int use_direction_classify) {
+                 int use_direction_classify,
+                 std::vector<double> *times) {
  std::vector<float> mean = {0.5f, 0.5f, 0.5f};
  std::vector<float> scale = {1 / 0.5f, 1 / 0.5f, 1 / 0.5f};

@ -226,14 +229,15 @@ void RunRecModel(std::vector<std::vector<std::vector<int>>> boxes, cv::Mat img,

 std::vector<std::vector<std::vector<int>>>
 RunDetModel(std::shared_ptr<PaddlePredictor> predictor, cv::Mat img,
-            std::map<std::string, double> Config) {
+            std::map<std::string, double> Config, std::vector<double> *times) {
  // Read img
  int max_side_len = int(Config["max_side_len"]);
  int det_db_use_dilate = int(Config["det_db_use_dilate"]);

  cv::Mat srcimg;
  img.copyTo(srcimg);
-
+  
+  auto preprocess_start = std::chrono::steady_clock::now();
  std::vector<float> ratio_hw;
  img = DetResizeImg(img, max_side_len, ratio_hw);
  cv::Mat img_fp;
@ -248,8 +252,10 @@ RunDetModel(std::shared_ptr<PaddlePredictor> predictor, cv::Mat img,
  std::vector<float> scale = {1 / 0.229f, 1 / 0.224f, 1 / 0.225f};
  const float *dimg = reinterpret_cast<const float *>(img_fp.data);
  NeonMeanScale(dimg, data0, img_fp.rows * img_fp.cols, mean, scale);
+  auto preprocess_end = std::chrono::steady_clock::now();

  // Run predictor
+  auto inference_start = std::chrono::steady_clock::now();
  predictor->Run();

  // Get output and post process
@ -257,8 +263,10 @@ RunDetModel(std::shared_ptr<PaddlePredictor> predictor, cv::Mat img,
      std::move(predictor->GetOutput(0)));
  auto *outptr = output_tensor->data<float>();
  auto shape_out = output_tensor->shape();
+  auto inference_end = std::chrono::steady_clock::now();

  // Save output
+  auto postprocess_start = std::chrono::steady_clock::now();
  float pred[shape_out[2] * shape_out[3]];
  unsigned char cbuf[shape_out[2] * shape_out[3]];

@ -287,14 +295,35 @@ RunDetModel(std::shared_ptr<PaddlePredictor> predictor, cv::Mat img,

  std::vector<std::vector<std::vector<int>>> filter_boxes =
      FilterTagDetRes(boxes, ratio_hw[0], ratio_hw[1], srcimg);
+  auto postprocess_end = std::chrono::steady_clock::now();
+
+  std::chrono::duration<float> preprocess_diff = preprocess_end - preprocess_start;
+  times->push_back(double(preprocess_diff.count() * 1000));
+  std::chrono::duration<float> inference_diff = inference_end - inference_start;
+  times->push_back(double(inference_diff.count() * 1000));
+  std::chrono::duration<float> postprocess_diff = postprocess_end - postprocess_start;
+  times->push_back(double(postprocess_diff.count() * 1000));

  return filter_boxes;
 }

-std::shared_ptr<PaddlePredictor> loadModel(std::string model_file) {
+std::shared_ptr<PaddlePredictor> loadModel(std::string model_file, std::string power_mode, int num_threads) {
  MobileConfig config;
  config.set_model_from_file(model_file);

+  if (power_mode == "LITE_POWER_HIGH"){
+      config.set_power_mode(LITE_POWER_HIGH);
+  } else {
+      if (power_mode == "LITE_POWER_LOW") {
+          config.set_power_mode(LITE_POWER_HIGH);
+      } else {
+          std::cerr << "Only support LITE_POWER_HIGH or LITE_POWER_HIGH." << std::endl;
+          exit(1);
+      }
+  }
+
+  config.set_threads(num_threads);
+
  std::shared_ptr<PaddlePredictor> predictor =
      CreatePaddlePredictor<MobileConfig>(config);
  return predictor;
@ -354,60 +383,255 @@ std::map<std::string, double> LoadConfigTxt(std::string config_path) {
  return dict;
 }

-int main(int argc, char **argv) {
-  if (argc < 5) {
-    std::cerr << "[ERROR] usage: " << argv[0]
-              << " det_model_file cls_model_file rec_model_file image_path "
-                 "charactor_dict\n";
+void check_params(int argc, char **argv) {
+  if (argc<=1 || (strcmp(argv[1], "det")!=0 && strcmp(argv[1], "rec")!=0 && strcmp(argv[1], "system")!=0)) {
+    std::cerr << "Please choose one mode of [det, rec, system] !" << std::endl;
    exit(1);
  }
-  std::string det_model_file = argv[1];
-  std::string rec_model_file = argv[2];
-  std::string cls_model_file = argv[3];
-  std::string img_path = argv[4];
-  std::string dict_path = argv[5];
+  if (strcmp(argv[1], "det") == 0) {
+      if (argc < 9){
+        std::cerr << "[ERROR] usage:" << argv[0]
+                  << " det det_model num_threads batchsize power_mode img_dir det_config lite_benchmark_value" << std::endl;
+        exit(1);
+      }
+  }
+
+  if (strcmp(argv[1], "rec") == 0) {
+      if (argc < 9){
+        std::cerr << "[ERROR] usage:" << argv[0]
+                  << " rec rec_model num_threads batchsize power_mode img_dir key_txt lite_benchmark_value" << std::endl;
+        exit(1);
+      }
+  }
+
+  if (strcmp(argv[1], "system") == 0) {
+      if (argc < 12){
+        std::cerr << "[ERROR] usage:" << argv[0]
+                  << " system det_model rec_model clas_model num_threads batchsize power_mode img_dir det_config key_txt lite_benchmark_value" << std::endl;
+        exit(1);
+      }
+  }
+}
+
+void system(char **argv){
+  std::string det_model_file = argv[2];
+  std::string rec_model_file = argv[3];
+  std::string cls_model_file = argv[4];
+  std::string precision = argv[5];
+  std::string num_threads = argv[6];
+  std::string batchsize = argv[7];
+  std::string power_mode = argv[8];
+  std::string img_dir = argv[9];
+  std::string det_config_path = argv[10];
+  std::string dict_path = argv[11];
+
+  if (strcmp(argv[5], "FP32") != 0 && strcmp(argv[5], "INT8") != 0) {
+      std::cerr << "Only support FP32 or INT8." << std::endl;
+      exit(1);
+  }
+
+  std::vector<cv::String> cv_all_img_names;
+  cv::glob(img_dir, cv_all_img_names);

  //// load config from txt file
-  auto Config = LoadConfigTxt("./config.txt");
+  auto Config = LoadConfigTxt(det_config_path);
  int use_direction_classify = int(Config["use_direction_classify"]);

-  auto start = std::chrono::system_clock::now();
-
-  auto det_predictor = loadModel(det_model_file);
-  auto rec_predictor = loadModel(rec_model_file);
-  auto cls_predictor = loadModel(cls_model_file);
-
  auto charactor_dict = ReadDict(dict_path);
  charactor_dict.insert(charactor_dict.begin(), "#"); // blank char for ctc
  charactor_dict.push_back(" ");

-  cv::Mat srcimg = cv::imread(img_path, cv::IMREAD_COLOR);
-  auto boxes = RunDetModel(det_predictor, srcimg, Config);
+  auto det_predictor = loadModel(det_model_file, power_mode, std::stoi(num_threads));
+  auto rec_predictor = loadModel(rec_model_file, power_mode, std::stoi(num_threads));
+  auto cls_predictor = loadModel(cls_model_file, power_mode, std::stoi(num_threads));

-  std::vector<std::string> rec_text;
-  std::vector<float> rec_text_score;
+  for (int i = 0; i < cv_all_img_names.size(); ++i) {
+    std::cout << "The predict img: " << cv_all_img_names[i] << std::endl;
+    cv::Mat srcimg = cv::imread(cv_all_img_names[i], cv::IMREAD_COLOR);

-  RunRecModel(boxes, srcimg, rec_predictor, rec_text, rec_text_score,
-              charactor_dict, cls_predictor, use_direction_classify);
+    if (!srcimg.data) {
+      std::cerr << "[ERROR] image read failed! image path: " << cv_all_img_names[i] << std::endl;
+      exit(1);
+    }

-  auto end = std::chrono::system_clock::now();
-  auto duration =
-      std::chrono::duration_cast<std::chrono::microseconds>(end - start);
+    std::vector<double> det_times;
+    auto boxes = RunDetModel(det_predictor, srcimg, Config, &det_times);
+  
+    std::vector<std::string> rec_text;
+    std::vector<float> rec_text_score;
+  
+    std::vector<double> rec_times;
+    RunRecModel(boxes, srcimg, rec_predictor, rec_text, rec_text_score,
+                charactor_dict, cls_predictor, use_direction_classify, &rec_times);
+  
+    //// visualization
+    auto img_vis = Visualization(srcimg, boxes);
+  
+    //// print recognized text
+    for (int i = 0; i < rec_text.size(); i++) {
+      std::cout << i << "\t" << rec_text[i] << "\t" << rec_text_score[i]
+                << std::endl;
+    }
+  }
+}

-  //// visualization
-  auto img_vis = Visualization(srcimg, boxes);
+void det(int argc, char **argv) {
+  std::string det_model_file = argv[2];
+  std::string precision = argv[3];
+  std::string num_threads = argv[4];
+  std::string batchsize = argv[5];
+  std::string power_mode = argv[6];
+  std::string img_dir = argv[7];
+  std::string det_config_path = argv[8];

-  //// print recognized text
-  for (int i = 0; i < rec_text.size(); i++) {
-    std::cout << i << "\t" << rec_text[i] << "\t" << rec_text_score[i]
-              << std::endl;
+  if (strcmp(argv[3], "FP32") != 0 && strcmp(argv[3], "INT8") != 0) {
+      std::cerr << "Only support FP32 or INT8." << std::endl;
+      exit(1);
  }

-  std::cout << "花费了"
-            << double(duration.count()) *
-                   std::chrono::microseconds::period::num /
-                   std::chrono::microseconds::period::den
-            << "秒" << std::endl;
+  std::vector<cv::String> cv_all_img_names;
+  cv::glob(img_dir, cv_all_img_names);
+
+  //// load config from txt file
+  auto Config = LoadConfigTxt(det_config_path);
+
+  auto det_predictor = loadModel(det_model_file, power_mode, std::stoi(num_threads));
+
+  std::vector<double> time_info = {0, 0, 0};
+  for (int i = 0; i < cv_all_img_names.size(); ++i) {
+    std::cout << "The predict img: " << cv_all_img_names[i] << std::endl;
+    cv::Mat srcimg = cv::imread(cv_all_img_names[i], cv::IMREAD_COLOR);
+
+    if (!srcimg.data) {
+      std::cerr << "[ERROR] image read failed! image path: " << cv_all_img_names[i] << std::endl;
+      exit(1);
+    }
+
+    std::vector<double> times;
+    auto boxes = RunDetModel(det_predictor, srcimg, Config, &times);
+
+    //// visualization
+    auto img_vis = Visualization(srcimg, boxes);
+    std::cout << boxes.size() << " bboxes have detected:" << std::endl;
+
+    // for (int i=0; i<boxes.size(); i++){
+    //   std::cout << "The " << i << " box:" << std::endl;
+    //   for (int j=0; j<4; j++){
+    //     for (int k=0; k<2; k++){
+    //       std::cout << boxes[i][j][k] << "\t";
+    //     }
+    //   }
+    //   std::cout << std::endl;
+    // }
+    time_info[0] += times[0];
+    time_info[1] += times[1];
+    time_info[2] += times[2];
+  }
+
+  if (strcmp(argv[9], "True") == 0) {
+    AutoLogger autolog(det_model_file, 
+                       0,
+                       0,
+                       0,
+                       std::stoi(num_threads),
+                       std::stoi(batchsize), 
+                       "dynamic", 
+                       precision, 
+                       power_mode,
+                       time_info, 
+                       cv_all_img_names.size());
+    autolog.report();
+  }
+}
+
+void rec(int argc, char **argv) {
+  std::string rec_model_file = argv[2];
+  std::string precision = argv[3];
+  std::string num_threads = argv[4];
+  std::string batchsize = argv[5];
+  std::string power_mode = argv[6];
+  std::string img_dir = argv[7];
+  std::string dict_path = argv[8];
+
+  if (strcmp(argv[3], "FP32") != 0 && strcmp(argv[3], "INT8") != 0) {
+      std::cerr << "Only support FP32 or INT8." << std::endl;
+      exit(1);
+  }
+
+  std::vector<cv::String> cv_all_img_names;
+  cv::glob(img_dir, cv_all_img_names);
+
+  auto charactor_dict = ReadDict(dict_path);
+  charactor_dict.insert(charactor_dict.begin(), "#"); // blank char for ctc
+  charactor_dict.push_back(" ");
+
+  auto rec_predictor = loadModel(rec_model_file, power_mode, std::stoi(num_threads));
+
+  std::shared_ptr<PaddlePredictor> cls_predictor;
+
+  std::vector<double> time_info = {0, 0, 0};
+  for (int i = 0; i < cv_all_img_names.size(); ++i) {
+    std::cout << "The predict img: " << cv_all_img_names[i] << std::endl;
+    cv::Mat srcimg = cv::imread(cv_all_img_names[i], cv::IMREAD_COLOR);
+
+    if (!srcimg.data) {
+      std::cerr << "[ERROR] image read failed! image path: " << cv_all_img_names[i] << std::endl;
+      exit(1);
+    }
+
+    int width = srcimg.cols;
+    int height = srcimg.rows;
+    std::vector<int> upper_left = {0, 0};
+    std::vector<int> upper_right = {width, 0};
+    std::vector<int> lower_right = {width, height};
+    std::vector<int> lower_left  = {0, height};
+    std::vector<std::vector<int>> box = {upper_left, upper_right, lower_right, lower_left};
+    std::vector<std::vector<std::vector<int>>> boxes = {box};
+
+    std::vector<std::string> rec_text;
+    std::vector<float> rec_text_score;
+    std::vector<double> times;
+    RunRecModel(boxes, srcimg, rec_predictor, rec_text, rec_text_score,
+                charactor_dict, cls_predictor, 0, &times);
+  
+    //// print recognized text
+    for (int i = 0; i < rec_text.size(); i++) {
+      std::cout << i << "\t" << rec_text[i] << "\t" << rec_text_score[i]
+                << std::endl;
+    }
+  }
+  // TODO: support autolog
+  if (strcmp(argv[9], "True") == 0) {
+    AutoLogger autolog(rec_model_file, 
+                       0,
+                       0,
+                       0,
+                       std::stoi(num_threads),
+                       std::stoi(batchsize), 
+                       "dynamic", 
+                       precision, 
+                       power_mode,
+                       time_info, 
+                       cv_all_img_names.size());
+    autolog.report();
+  }
+}
+
+int main(int argc, char **argv) {
+  check_params(argc, argv);
+  std::cout << "mode: " << argv[1] << endl;
+
+  if (strcmp(argv[1], "system") == 0) {
+    system(argv);
+  }
+
+  if (strcmp(argv[1], "det") == 0) {
+    det(argc, argv);
+  }
+
+  if (strcmp(argv[1], "rec") == 0) {
+    rec(argc, argv);
+  }

  return 0;
-}
+}
--- a/doc/doc_ch/enhanced_ctc_loss.md
+++ b/doc/doc_ch/enhanced_ctc_loss.md
@ -64,7 +64,7 @@ C-CTC Loss是CTC Loss + Center Loss的简称。 其中Center Loss出自论文 <

 以配置文件`configs/rec/ch_PP-OCRv2/ch_PP-OCRv2_rec.yml`为例， center提取命令如下所示:
 ```
-python tools/export_center.py -c configs/rec/ch_PP-OCRv2/ch_PP-OCRv2_rec.yml -o  Global.pretrained_model: "./output/rec_mobile_pp-OCRv2/best_accuracy"
+python tools/export_center.py -c configs/rec/ch_PP-OCRv2/ch_PP-OCRv2_rec.yml -o Global.pretrained_model="./output/rec_mobile_pp-OCRv2/best_accuracy"
 ```
 运行完后，会在PaddleOCR主目录下生成`train_center.pkl`.

--- a/ppocr/losses/rec_nrtr_loss.py
+++ b/ppocr/losses/rec_nrtr_loss.py
@ -22,7 +22,7 @@ class NRTRLoss(nn.Layer):
            log_prb = F.log_softmax(pred, axis=1)
            non_pad_mask = paddle.not_equal(
                tgt, paddle.zeros(
-                    tgt.shape, dtype='int64'))
+                    tgt.shape, dtype=tgt.dtype))
            loss = -(one_hot * log_prb).sum(axis=1)
            loss = loss.masked_select(non_pad_mask).mean()
        else:
--- a/ppocr/postprocess/init.py
+++ b/ppocr/postprocess/init.py
@ -29,10 +29,7 @@ from .rec_postprocess import CTCLabelDecode, AttnLabelDecode, SRNLabelDecode, Di
    TableLabelDecode, NRTRLabelDecode, SARLabelDecode , SEEDLabelDecode
 from .cls_postprocess import ClsPostProcess
 from .pg_postprocess import PGPostProcess
-
-if platform.system() != "Windows":
-    # pse is not support in Windows
-    from .pse_postprocess import PSEPostProcess
+from .pse_postprocess import PSEPostProcess


 def build_post_process(config, global_config=None):
--- a/ppocr/postprocess/pse_postprocess/pse/init.py
+++ b/ppocr/postprocess/pse_postprocess/pse/init.py
@ -17,7 +17,12 @@ import subprocess

 python_path = sys.executable

-if subprocess.call('cd ppocr/postprocess/pse_postprocess/pse;{} setup.py build_ext --inplace;cd -'.format(python_path), shell=True) != 0:
-    raise RuntimeError('Cannot compile pse: {}'.format(os.path.dirname(os.path.realpath(__file__))))
+ori_path = os.getcwd()
+os.chdir('ppocr/postprocess/pse_postprocess/pse')
+if subprocess.call(
+        '{} setup.py build_ext --inplace'.format(python_path), shell=True) != 0:
+    raise RuntimeError('Cannot compile pse: {}'.format(
+        os.path.dirname(os.path.realpath(__file__))))
+os.chdir(ori_path)

-from .pse import pse
+from .pse import pse
--- a/test_tipc/common_func.sh
+++ b/test_tipc/common_func.sh
--- a/test_tipc/compare_results.py
+++ b/test_tipc/compare_results.py
@ -32,6 +32,7 @@ def run_shell_command(cmd):
    else:
        return None

+
 def parser_results_from_log_by_name(log_path, names_list):
    if not os.path.exists(log_path):
        raise ValueError("The log file {} does not exists!".format(log_path))
@ -52,6 +53,7 @@ def parser_results_from_log_by_name(log_path, names_list):
        parser_results[name] = result
    return parser_results

+
 def load_gt_from_file(gt_file):
    if not os.path.exists(gt_file):
        raise ValueError("The log file {} does not exists!".format(gt_file))
--- a/test_tipc/configs/det_mv3_db.yml
+++ b/test_tipc/configs/det_mv3_db.yml
--- a/test_tipc/configs/det_r50_vd_db.yml
+++ b/test_tipc/configs/det_r50_vd_db.yml
--- a/test_tipc/configs/ppocr_det_mobile_params.txt
+++ b/test_tipc/configs/ppocr_det_mobile_params.txt
@ -1,21 +1,21 @@
 ===========================train_params===========================
 model_name:ocr_det
 python:python3.7
-gpu_list:0|0,1
-Global.use_gpu:True|True
-Global.auto_cast:null
-Global.epoch_num:lite_train_infer=1|whole_train_infer=300
+gpu_list:0|0,1|10.21.226.181,10.21.226.133;0,1
+Global.use_gpu:True|True|True
+Global.auto_cast:fp32|amp
+Global.epoch_num:lite_train_lite_infer=1|whole_train_whole_infer=300
 Global.save_model_dir:./output/
-Train.loader.batch_size_per_card:lite_train_infer=2|whole_train_infer=4
+Train.loader.batch_size_per_card:lite_train_lite_infer=2|whole_train_whole_infer=4
 Global.pretrained_model:null
 train_model_name:latest
 train_infer_img_dir:./train_data/icdar2015/text_localization/ch4_test_images/
 null:null
 ##
 trainer:norm_train|pact_train|fpgm_train
-norm_train:tools/train.py -c tests/configs/det_mv3_db.yml -o Global.pretrained_model=./pretrain_models/MobileNetV3_large_x0_5_pretrained
-pact_train:deploy/slim/quantization/quant.py -c tests/configs/det_mv3_db.yml -o
-fpgm_train:deploy/slim/prune/sensitivity_anal.py -c tests/configs/det_mv3_db.yml -o Global.pretrained_model=./pretrain_models/det_mv3_db_v2.0_train/best_accuracy
+norm_train:tools/train.py -c test_tipc/configs/det_mv3_db.yml -o Global.pretrained_model=./pretrain_models/MobileNetV3_large_x0_5_pretrained
+pact_train:deploy/slim/quantization/quant.py -c test_tipc/configs/det_mv3_db.yml -o
+fpgm_train:deploy/slim/prune/sensitivity_anal.py -c test_tipc/configs/det_mv3_db.yml -o Global.pretrained_model=./pretrain_models/det_mv3_db_v2.0_train/best_accuracy
 distill_train:null
 null:null
 null:null
@ -27,13 +27,13 @@ null:null
 ===========================infer_params===========================
 Global.save_inference_dir:./output/
 Global.pretrained_model:
-norm_export:tools/export_model.py -c tests/configs/det_mv3_db.yml -o 
-quant_export:deploy/slim/quantization/export_model.py -c tests/configs/det_mv3_db.yml -o 
-fpgm_export:deploy/slim/prune/export_prune_model.py -c tests/configs/det_mv3_db.yml -o 
+norm_export:tools/export_model.py -c test_tipc/configs/det_mv3_db.yml -o 
+quant_export:deploy/slim/quantization/export_model.py -c test_tipc/configs/det_mv3_db.yml -o 
+fpgm_export:deploy/slim/prune/export_prune_model.py -c test_tipc/configs/det_mv3_db.yml -o 
 distill_export:null
 export1:null
 export2:null
-##
+inference_dir:null
 train_model:./inference/ch_ppocr_mobile_v2.0_det_train/best_accuracy
 infer_export:tools/export_model.py -c configs/det/ch_ppocr_v2.0/ch_det_mv3_db_v2.0.yml -o
 infer_quant:False
@ -98,3 +98,13 @@ null:null
 --benchmark:True
 null:null
 null:null
+===========================lite_params===========================
+inference:./ocr_db_crnn det
+infer_model:./models/ch_ppocr_mobile_v2.0_det_opt.nb|./models/ch_ppocr_mobile_v2.0_det_slim_opt.nb
+--cpu_threads:1|4
+--batch_size:1
+--power_mode:LITE_POWER_HIGH|LITE_POWER_LOW
+--image_dir:./test_data/icdar2015_lite/text_localization/ch4_test_images/|./test_data/icdar2015_lite/text_localization/ch4_test_images/img_233.jpg
+--config_dir:./config.txt
+--rec_dict_dir:./ppocr_keys_v1.txt
+--benchmark:True
--- a/test_tipc/configs/ppocr_det_server_params.txt
+++ b/test_tipc/configs/ppocr_det_server_params.txt
--- a/test_tipc/configs/ppocr_rec_mobile_params.txt
+++ b/test_tipc/configs/ppocr_rec_mobile_params.txt
--- a/test_tipc/configs/ppocr_rec_server_params.txt
+++ b/test_tipc/configs/ppocr_rec_server_params.txt
--- a/test_tipc/configs/ppocr_sys_mobile_params.txt
+++ b/test_tipc/configs/ppocr_sys_mobile_params.txt
--- a/test_tipc/configs/ppocr_sys_server_params.txt
+++ b/test_tipc/configs/ppocr_sys_server_params.txt
--- a/test_tipc/configs/ppocrv2_det_mobile_params.txt
+++ b/test_tipc/configs/ppocrv2_det_mobile_params.txt
@ -0,0 +1,51 @@
+===========================train_params===========================
+model_name:PPOCRv2_ocr_det
+python:python3.7
+gpu_list:0|0,1
+Global.use_gpu:True|True
+Global.auto_cast:fp32
+Global.epoch_num:lite_train_infer=1|whole_train_infer=500
+Global.save_model_dir:./output/
+Train.loader.batch_size_per_card:lite_train_infer=2|whole_train_infer=4
+Global.pretrained_model:null
+train_model_name:latest
+train_infer_img_dir:./train_data/icdar2015/text_localization/ch4_test_images/
+null:null
+##
+trainer:norm_train|pact_train
+norm_train:tools/train.py -c configs/det/ch_PP-OCRv2/ch_PP-OCR_det_cml.yml -o 
+pact_train:deploy/slim/quantization/quant.py -c configs/det/ch_PP-OCRv2/ch_PP-OCR_det_cml.yml -o
+fpgm_train:null
+distill_train:null
+null:null
+null:null
+##
+===========================eval_params=========================== 
+eval:null
+null:null
+##
+===========================infer_params===========================
+Global.save_inference_dir:./output/
+Global.pretrained_model:
+norm_export:tools/export_model.py -c configs/det/ch_PP-OCRv2/ch_PP-OCR_det_cml.yml -o 
+quant_export:deploy/slim/quantization/export_model.py -c configs/det/ch_PP-OCRv2/ch_PP-OCR_det_cml.yml -o 
+fpgm_export: 
+distill_export:null
+export1:null
+export2:null
+inference_dir:Student
+infer_model:./inference/ch_PP-OCRv2_det_infer/
+infer_export:null
+infer_quant:False
+inference:tools/infer/predict_det.py
+--use_gpu:True|False
+--enable_mkldnn:True|False
+--cpu_threads:1|6
+--rec_batch_num:1
+--use_tensorrt:False|True
+--precision:fp32|fp16|int8
+--det_model_dir:
+--image_dir:./inference/ch_det_data_50/all-sum-510/
+null:null
+--benchmark:True
+null:null
--- a/test_tipc/configs/rec_icdar15_r34_train.yml
+++ b/test_tipc/configs/rec_icdar15_r34_train.yml
--- a/test_tipc/docs/compare_cpp_right.png
+++ b/test_tipc/docs/compare_cpp_right.png
--- a/test_tipc/docs/compare_cpp_wrong.png
+++ b/test_tipc/docs/compare_cpp_wrong.png
--- a/test_tipc/docs/compare_right.png
+++ b/test_tipc/docs/compare_right.png
--- a/test_tipc/docs/compare_wrong.png
+++ b/test_tipc/docs/compare_wrong.png
--- a/test_tipc/docs/guide.png
+++ b/test_tipc/docs/guide.png
--- a/test_tipc/docs/install.md
+++ b/test_tipc/docs/install.md
@ -0,0 +1,121 @@
+## 1. 环境准备
+
+本教程适用于PTDN目录下基础功能测试的运行环境搭建。
+
+推荐环境：
+- CUDA 10.1/10.2
+- CUDNN 7.6/cudnn8.1
+- TensorRT 6.1.0.5 / 7.1 / 7.2
+
+环境配置可以选择docker镜像安装，或者在本地环境Python搭建环境。推荐使用docker镜像安装，避免不必要的环境配置。
+
+## 2. Docker 镜像安装
+
+推荐docker镜像安装，按照如下命令创建镜像，当前目录映射到镜像中的`/paddle`目录下
+```
+nvidia-docker run --name paddle -it -v $PWD:/paddle paddlepaddle/paddle:latest-dev-cuda10.1-cudnn7-gcc82 /bin/bash
+cd /paddle
+
+# 安装带TRT的paddle
+pip3.7 install https://paddle-wheel.bj.bcebos.com/with-trt/2.1.3/linux-gpu-cuda10.1-cudnn7-mkl-gcc8.2-trt6-avx/paddlepaddle_gpu-2.1.3.post101-cp37-cp37m-linux_x86_64.whl
+```
+
+## 3 Python 环境构建
+
+非docker环境下，环境配置比较灵活，推荐环境组合配置：
+- CUDA10.1 + CUDNN7.6 + TensorRT 6
+- CUDA10.2 + CUDNN8.1 + TensorRT 7
+- CUDA11.1 + CUDNN8.1 + TensorRT 7
+
+下面以 CUDA10.2 + CUDNN8.1 + TensorRT 7 配置为例，介绍环境配置的流程。
+
+### 3.1 安装CUDNN
+
+如果当前环境满足CUDNN版本的要求，可以跳过此步骤。
+
+以CUDNN8.1 安装安装为例，安装步骤如下，首先下载CUDNN，从[Nvidia官网](https://developer.nvidia.com/rdp/cudnn-archive)下载CUDNN8.1版本，下载符合当前系统版本的三个deb文件，分别是：
+- cuDNN Runtime Library ，如：libcudnn8_8.1.0.77-1+cuda10.2_amd64.deb
+- cuDNN Developer Library ，如：libcudnn8-dev_8.1.0.77-1+cuda10.2_amd64.deb
+- cuDNN Code Samples，如：libcudnn8-samples_8.1.0.77-1+cuda10.2_amd64.deb
+
+deb安装可以参考[官方文档](https://docs.nvidia.com/deeplearning/cudnn/install-guide/index.html#installlinux-deb)，安装方式如下
+```
+# x.x.x表示下载的版本号
+# $HOME为工作目录
+sudo dpkg -i libcudnn8_x.x.x-1+cudax.x_arm64.deb
+sudo dpkg -i libcudnn8-dev_8.x.x.x-1+cudax.x_arm64.deb
+sudo dpkg -i libcudnn8-samples_8.x.x.x-1+cudax.x_arm64.deb
+
+# 验证是否正确安装
+cp -r /usr/src/cudnn_samples_v8/ $HOME
+cd  $HOME/cudnn_samples_v8/mnistCUDNN
+
+# 编译
+make clean && make
+./mnistCUDNN
+```
+如果运行mnistCUDNN完后提示运行成功，则表示安装成功。如果运行后出现freeimage相关的报错，需要按照提示安装freeimage库:
+```
+sudo apt-get install libfreeimage-dev
+sudo apt-get install libfreeimage
+```
+
+### 3.2 安装TensorRT
+
+首先，从[Nvidia官网TensorRT板块](https://developer.nvidia.com/tensorrt-getting-started)下载TensorRT，这里选择7.1.3.4版本的TensorRT，注意选择适合自己系统版本和CUDA版本的TensorRT，另外建议下载TAR package的安装包。
+
+以Ubuntu16.04+CUDA10.2为例，下载并解压后可以参考[官方文档](https://docs.nvidia.com/deeplearning/tensorrt/archives/tensorrt-713/install-guide/index.html#installing-tar)的安装步骤，按照如下步骤安装:
+```
+# 以下安装命令中 '${version}' 为下载的TensorRT版本，如7.1.3.4
+# 设置环境变量，<TensorRT-${version}/lib> 为解压后的TensorRT的lib目录
+export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:<TensorRT-${version}/lib>
+
+# 安装TensorRT
+cd TensorRT-${version}/python
+pip3.7 install tensorrt-*-cp3x-none-linux_x86_64.whl
+
+# 安装graphsurgeon
+cd TensorRT-${version}/graphsurgeon
+```
+
+
+### 3.3 安装PaddlePaddle
+
+下载支持TensorRT版本的Paddle安装包，注意安装包的TensorRT版本需要与本地TensorRT一致，下载[链接](https://paddleinference.paddlepaddle.org.cn/user_guides/download_lib.html#python)
+选择下载 linux-cuda10.2-trt7-gcc8.2 Python3.7版本的Paddle：
+```
+# 从下载链接中可以看到是paddle2.1.1-cuda10.2-cudnn8.1版本
+wget  https://paddle-wheel.bj.bcebos.com/with-trt/2.1.1-gpu-cuda10.2-cudnn8.1-mkl-gcc8.2/paddlepaddle_gpu-2.1.1-cp37-cp37m-linux_x86_64.whl
+pip3.7 install -U paddlepaddle_gpu-2.1.1-cp37-cp37m-linux_x86_64.whl
+```
+
+## 4. 安装PaddleOCR依赖
+```
+# 安装AutoLog
+git clone https://github.com/LDOUBLEV/AutoLog
+cd AutoLog
+pip3.7 install -r requirements.txt
+python3.7 setup.py bdist_wheel
+pip3.7 install ./dist/auto_log-1.0.0-py3-none-any.whl
+
+# 下载OCR代码
+cd ../
+git clone https://github.com/PaddlePaddle/PaddleOCR
+
+```
+
+安装PaddleOCR依赖：
+```
+cd PaddleOCR
+pip3.7 install -r requirements.txt
+```
+
+## FAQ :
+Q. You are using Paddle compiled with TensorRT, but TensorRT dynamic library is not found. Ignore this if TensorRT is not needed.
+
+A. 问题一般是当前安装paddle版本带TRT，但是本地环境找不到TensorRT的预测库，需要下载TensorRT库，解压后设置环境变量LD_LIBRARY_PATH;
+如：
+```
+export LD_LIBRARY_PATH=/usr/local/python3.7.0/lib:/usr/local/nvidia/lib:/usr/local/nvidia/lib64:/paddle/package/TensorRT-6.0.1.5/lib
+```
+或者问题是下载的TensorRT版本和当前paddle中编译的TRT版本不匹配，需要下载版本相符的TensorRT重新安装。
--- a/test_tipc/docs/lite_auto_log.png
+++ b/test_tipc/docs/lite_auto_log.png
--- a/test_tipc/docs/lite_log.png
+++ b/test_tipc/docs/lite_log.png
--- a/test_tipc/docs/ssh_termux_ls.png
+++ b/test_tipc/docs/ssh_termux_ls.png
--- a/test_tipc/docs/termux.jpg
+++ b/test_tipc/docs/termux.jpg
--- a/test_tipc/docs/termux_for_android.md
+++ b/test_tipc/docs/termux_for_android.md
@ -0,0 +1,128 @@
+# 安卓手机通过Termux连接电脑
+
+由于通过adb方式连接手机后，很多linux命令无法运行，自动化测试受阻，所以此处特此介绍另外一种通过Termux的连接方式，不仅可以运行大部分linux命令，方便开发者在手机上在线调试，甚至还可以多实现台机器同时连接手机。Termux不是真实的Linux环境，但是Termux可以安装真实的Linux，而且不会损失性能，与此同时，Termux不需要root。在配置Termux之前，请确保电脑已经安装adb工具，安装方式请参考[Lite端部署](https://github.com/PaddlePaddle/PaddleOCR/blob/develop/deploy/lite/readme.md) 。在运行以下命令后确保可以显示安卓设备信息。
+
+```
+adb devices
+```
+连接成功信息提示：
+```
+List of devices attached
+744be294    device
+```
+
+## 1.安卓手机安装termux app
+
+### 1.1 下载termux apk文件
+
+由于目前该app目前各大商城暂无，所以可以直接下载如下apk文件。
+
+打开电脑终端，执行以下命令：
+
+```
+wget http://10.12.121.133:8911/cuicheng01/fullchain/termux-v1.0.3.apk
+```
+
+### 1.2 安装termux到手机上
+
+在手机端的开发者模式下，允许USB调试，允许USB安装。在电脑终端，执行如下命令，将termux app安装到手机上：
+
+```
+adb install termux-v1.0.3.apk
+```
+
+此处需要手机端确认安装，点击确认。
+
+### 1.3 验证是否安装成功
+
+打开手机，检验termux是否安装成功，如果没有，重新执行1.2，如果有相应的app，点击进入，会有如下显示。
+
+<img src="termux.jpg" width="300" height = "300">
+
+接下来的配置环境需要在手机上此终端运行相关命令。
+
+## 2.手机端配置termux
+
+首先将手机联网，最好可以连接外网，部分的配置需要外网。打开Termux终端，执行以下命令安装基础件`proot`，并使用`termux-chroot`命令可以模拟 root 环境与标准的 Linux 目录结构。
+
+```
+pkg i -y proot
+termux-chroot
+```
+
+Termux 默认只能访问自身内部的数据，如果要访问手机中其它的数据，输入下面的命令后，手机弹出对请求权限的窗口，允许即可（方便对部分运行出的结果在手机端可视化）。
+
+```
+termux-setup-storage
+```
+
+### 2.1 配置SSH
+
+作为 Linux 终端或者服务器，必须有SSH。不管你是 SSH 连接到 Termux还是使用Termux去连其它主机，都需要先安装openssh。如果安装失败，请重复执行命令。
+
+```
+pkg i -y openssh
+```
+
+启动 SSH 服务端，默认端口号为8022
+
+```
+sshd
+```
+
+
+### 2.2 电脑通过SSH方式连接手机
+
+1.保证手机和电脑处于同一局域网下
+手机端分别输入以下命令获得ip地址和当前用户：
+
+```
+# 获取ip地址
+ifconfig
+
+# 获取当前用户
+whoami
+```
+
+如获取到的ip地址和当前用户分别是`172.24.162.117`和`u0_a374`。
+
+2.电脑端通过SSH连接手机
+
+```
+#默认端口号为8022
+ssh u0_a374@172.24.162.117 -p 8022
+```
+
+3.运行ls命令后，会有如下显示：
+
+```
+ls
+```
+
+<img src="ssh_termux_ls.png" width="800">
+
+
+### 2.3 通过scp传输数据
+
+1.在当前目录上新建test目录
+
+```
+mkdir test
+```
+
+2.测试scp功能
+
+将电脑中的某个文件拷贝到手机上：
+```
+scp -P 8022 test.txt u0_a374@172.24.162.117:/home/storage/test
+```
+
+3.手机端查看
+
+打开手机终端，在`/home/storage/test`下查看是否存在`test.txt`
+
+
+## 3. 更多教程
+
+本教程可以完成Termux基本配置，更多关于Termux的用法，请参考：[Termux高级终端安装使用配置教程](https://www.sqlsec.com/2018/05/termux.html)。
+
--- a/test_tipc/docs/test.png
+++ b/test_tipc/docs/test.png
--- a/test_tipc/docs/test_inference_cpp.md
+++ b/test_tipc/docs/test_inference_cpp.md
@ -6,7 +6,7 @@ C++预测功能测试的主程序为`test_inference_cpp.sh`，可以测试基于

 基于训练是否使用量化，进行本测试的模型可以分为`正常模型`和`量化模型`，这两类模型对应的C++预测功能汇总如下：

-| 模型类型 |device | batchsize | tensorrt | mkldnn | cpu多线程 | 
+| 模型类型 |device | batchsize | tensorrt | mkldnn | cpu多线程 |
 |  ----   |  ---- |   ----   |  :----:  |   :----:   |  :----:  |
 | 正常模型 | GPU | 1/6 | fp32/fp16 | - | - |
 | 正常模型 | CPU | 1/6 | - | fp32 | 支持 |
@ -15,17 +15,17 @@ C++预测功能测试的主程序为`test_inference_cpp.sh`，可以测试基于

 ## 2. 测试流程
 ### 2.1 功能测试
-先运行`prepare.sh`准备数据和模型，然后运行`test_inference_cpp.sh`进行测试，最终在```tests/output```目录下生成`cpp_infer_*.log`后缀的日志文件。
+先运行`prepare.sh`准备数据和模型，然后运行`test_inference_cpp.sh`进行测试，最终在```test_tipc/output```目录下生成`cpp_infer_*.log`后缀的日志文件。

 ```shell
-bash tests/prepare.sh ./tests/configs/ppocr_det_mobile_params.txt "cpp_infer"
+bash test_tipc/prepare.sh ./test_tipc/configs/ppocr_det_mobile_params.txt "cpp_infer"

 # 用法1:
-bash tests/test_inference_cpp.sh ./tests/configs/ppocr_det_mobile_params.txt
+bash test_tipc/test_inference_cpp.sh ./test_tipc/configs/ppocr_det_mobile_params.txt
 # 用法2: 指定GPU卡预测，第三个传入参数为GPU卡号
-bash tests/test_inference_cpp.sh ./tests/configs/ppocr_det_mobile_params.txt '1'
+bash test_tipc/test_inference_cpp.sh ./test_tipc/configs/ppocr_det_mobile_params.txt '1'
 ```  
- 
+

 ### 2.2 精度测试

@ -37,12 +37,12 @@ bash tests/test_inference_cpp.sh ./tests/configs/ppocr_det_mobile_params.txt '1'
 #### 使用方式
 运行命令：
 ```shell
-python3.7 tests/compare_results.py --gt_file=./tests/results/cpp_*.txt  --log_file=./tests/output/cpp_*.log --atol=1e-3 --rtol=1e-3
+python3.7 test_tipc/compare_results.py --gt_file=./test_tipc/results/cpp_*.txt  --log_file=./test_tipc/output/cpp_*.log --atol=1e-3 --rtol=1e-3
 ```

 参数介绍：  
- gt_file： 指向事先保存好的预测结果路径，支持*.txt 结尾，会自动索引*.txt格式的文件，文件默认保存在tests/result/ 文件夹下
- log_file: 指向运行tests/test.sh 脚本的infer模式保存的预测日志，预测日志中打印的有预测结果，比如：文本框，预测文本，类别等等，同样支持infer_*.log格式传入
+- gt_file： 指向事先保存好的预测结果路径，支持*.txt 结尾，会自动索引*.txt格式的文件，文件默认保存在test_tipc/result/ 文件夹下
+- log_file: 指向运行test_tipc/test_inference_cpp.sh 脚本的infer模式保存的预测日志，预测日志中打印的有预测结果，比如：文本框，预测文本，类别等等，同样支持cpp_infer_*.log格式传入
 - atol: 设置的绝对误差
 - rtol: 设置的相对误差

--- a/test_tipc/docs/test_lite.md
+++ b/test_tipc/docs/test_lite.md
@ -0,0 +1,71 @@
+# Lite预测功能测试
+
+Lite预测功能测试的主程序为`test_lite.sh`，可以测试基于Lite预测库的模型推理功能。
+
+## 1. 测试结论汇总
+
+目前Lite端的样本间支持以方式的组合：
+
+**字段说明：**
+- 输入设置：包括C++预测、python预测、java预测
+- 模型类型：包括正常模型（FP32）和量化模型（FP16）
+- batch-size：包括1和4
+- predictor数量：包括多predictor预测和单predictor预测
+- 功耗模式：包括高性能模式（LITE_POWER_HIGH）和省电模式（LITE_POWER_LOW）
+- 预测库来源：包括下载方式和编译方式，其中编译方式分为以下目标硬件：(1)ARM CPU;(2)Linux XPU;(3)OpenCL GPU;(4)Metal GPU
+
+| 模型类型 | batch-size | predictor数量 | 功耗模式 | 预测库来源 | 支持语言 |
+|  :----:   |  :----:   |  :----:  |  :----:  |  :----:  |  :----:  |
+| 正常模型/量化模型 | 1 | 1 | 高性能模式/省电模式 | 下载方式 | C++预测 |
+
+
+## 2. 测试流程
+
+### 2.1 功能测试
+
+先运行`prepare.sh`准备数据和模型，模型和数据会打包到test_lite.tar中，将test_lite.tar上传到手机上，解压后进`入test_lite`目录中，然后运行`test_lite.sh`进行测试，最终在`test_lite/output`目录下生成`lite_*.log`后缀的日志文件。
+
+```shell
+
+# 数据和模型准备
+bash test_tipc/prepare.sh ./test_tipc/configs/ppocr_det_mobile_params.txt "lite_infer"
+
+# 手机端测试:
+bash test_lite.sh ppocr_det_mobile_params.txt
+
+```  
+
+**注意**：由于运行该项目需要bash等命令，传统的adb方式不能很好的安装。所以此处推荐通在手机上开启虚拟终端的方式连接电脑，连接方式可以参考[安卓手机termux连接电脑](./termux_for_android.md)。
+
+#### 运行结果
+
+各测试的运行情况会打印在 `./output/` 中：
+运行成功时会输出：
+
+```
+Run successfully with command - ./ocr_db_crnn det ./models/ch_ppocr_mobile_v2.0_det_slim_opt.nb INT8 4 1 LITE_POWER_LOW ./test_data/icdar2015_lite/text_localization/ch4_test_images/img_233.jpg ./config.txt True > ./output/lite_ch_ppocr_mobile_v2.0_det_slim_opt.nb_precision_INT8_batchsize_1_threads_4_powermode_LITE_POWER_LOW_singleimg_True.log 2>&1!
+Run successfully with command xxx
+...
+```
+
+运行失败时会输出：
+
+```
+Run failed with command - ./ocr_db_crnn det ./models/ch_ppocr_mobile_v2.0_det_slim_opt.nb INT8 4 1 LITE_POWER_LOW ./test_data/icdar2015_lite/text_localization/ch4_test_images/img_233.jpg ./config.txt True > ./output/lite_ch_ppocr_mobile_v2.0_det_slim_opt.nb_precision_INT8_batchsize_1_threads_4_powermode_LITE_POWER_LOW_singleimg_True.log 2>&1!
+Run failed with command xxx
+...
+```
+
+在./output/文件夹下，会存在如下日志，每一个日志都是不同配置下的log结果：
+
+<img src="lite_log.png" width="1000">
+
+在每一个log中，都会调用autolog打印如下信息：
+
+<img src="lite_auto_log.png" width="1000">
+
+
+
+## 3. 更多教程
+
+本文档为功能测试用，更详细的Lite端预测使用教程请参考：[Lite端部署](https://github.com/PaddlePaddle/PaddleOCR/blob/develop/deploy/lite/readme.md)。
--- a/test_tipc/docs/test_serving.md
+++ b/test_tipc/docs/test_serving.md
@ -0,0 +1,78 @@
+# PaddleServing预测功能测试
+
+PaddleServing预测功能测试的主程序为`test_serving.sh`，可以测试基于PaddleServing的部署功能。
+
+## 1. 测试结论汇总
+
+基于训练是否使用量化，进行本测试的模型可以分为`正常模型`和`量化模型`，这两类模型对应的C++预测功能汇总如下：
+
+| 模型类型 |device | batchsize | tensorrt | mkldnn | cpu多线程 |
+|  ----   |  ---- |   ----   |  :----:  |   :----:   |  :----:  |
+| 正常模型 | GPU | 1/6 | fp32/fp16 | - | - |
+| 正常模型 | CPU | 1/6 | - | fp32 | 支持 |
+| 量化模型 | GPU | 1/6 | int8 | - | - |
+| 量化模型 | CPU | 1/6 | - | int8 | 支持 |
+
+## 2. 测试流程
+### 2.1 功能测试
+先运行`prepare.sh`准备数据和模型，然后运行`test_serving.sh`进行测试，最终在```test_tipc/output```目录下生成`serving_infer_*.log`后缀的日志文件。
+
+```shell
+bash test_tipc/prepare.sh ./test_tipc/configs/ppocr_det_mobile_params.txt "serving_infer"
+
+# 用法:
+bash test_tipc/test_serving.sh ./test_tipc/configs/ppocr_det_mobile_params.txt
+```  
+
+#### 运行结果
+
+各测试的运行情况会打印在 `test_tipc/output/results_serving.log` 中：
+运行成功时会输出：
+
+```
+Run successfully  with command - python3.7 pipeline_http_client.py --image_dir=../../doc/imgs > ../../tests/output/server_infer_cpu_usemkldnn_True_threads_1_batchsize_1.log 2>&1 !
+Run successfully  with command - xxxxx
+...
+```
+
+运行失败时会输出：
+
+```
+Run failed with command - python3.7 pipeline_http_client.py --image_dir=../../doc/imgs > ../../tests/output/server_infer_cpu_usemkldnn_True_threads_1_batchsize_1.log 2>&1 !
+Run failed with command - python3.7 pipeline_http_client.py --image_dir=../../doc/imgs > ../../tests/output/server_infer_cpu_usemkldnn_True_threads_6_batchsize_1.log 2>&1 !
+Run failed with command - xxxxx
+...
+```
+
+详细的预测结果会存在 test_tipc/output/ 文件夹下，例如`server_infer_gpu_usetrt_True_precision_fp16_batchsize_1.log`中会返回检测框的坐标:
+
+```
+{'err_no': 0, 'err_msg': '', 'key': ['dt_boxes'], 'value': ['[[[ 78. 642.]\n  [409. 640.]\n  [409. 657.]\n  
+[ 78. 659.]]\n\n [[ 75. 614.]\n  [211. 614.]\n      [211. 635.]\n  [ 75. 635.]]\n\n
+[[103. 554.]\n  [135. 554.]\n  [135. 575.]\n  [103. 575.]]\n\n [[ 75. 531.]\n  
+[347. 531.]\n  [347. 549.]\n  [ 75. 549.]    ]\n\n [[ 76. 503.]\n  [309. 498.]\n  
+[309. 521.]\n  [ 76. 526.]]\n\n [[163. 462.]\n  [317. 462.]\n  [317. 493.]\n  
+[163. 493.]]\n\n [[324. 431.]\n  [414.     431.]\n  [414. 452.]\n  [324. 452.]]\n\n
+[[ 76. 412.]\n  [208. 408.]\n  [209. 424.]\n  [ 76. 428.]]\n\n [[307. 409.]\n  
+[428. 409.]\n  [428. 426.]\n  [307    . 426.]]\n\n [[ 74. 385.]\n  [217. 382.]\n  
+[217. 400.]\n  [ 74. 403.]]\n\n [[308. 381.]\n  [427. 380.]\n  [427. 400.]\n  
+[308. 401.]]\n\n [[ 74. 363.]\n      [195. 362.]\n  [195. 378.]\n  [ 74. 379.]]\n\n
+[[303. 359.]\n  [423. 357.]\n  [423. 375.]\n  [303. 377.]]\n\n [[ 70. 336.]\n  
+[239. 334.]\n  [239. 354.]\    n  [ 70. 356.]]\n\n [[ 70. 312.]\n  [204. 310.]\n  
+[204. 327.]\n  [ 70. 330.]]\n\n [[303. 308.]\n  [419. 306.]\n  [419. 326.]\n  
+[303. 328.]]\n\n [[113. 2    72.]\n  [246. 270.]\n  [247. 299.]\n  [113. 301.]]\n\n
+ [[361. 269.]\n  [384. 269.]\n  [384. 296.]\n  [361. 296.]]\n\n [[ 70. 250.]\n
+ [243. 246.]\n  [243.     265.]\n  [ 70. 269.]]\n\n [[ 65. 221.]\n  [187. 220.]\n  
+[187. 240.]\n  [ 65. 241.]]\n\n [[337. 216.]\n  [382. 216.]\n  [382. 240.]\n  
+[337. 240.]]\n\n [    [ 65. 196.]\n  [247. 193.]\n  [247. 213.]\n  [ 65. 216.]]\n\n
+[[296. 197.]\n  [423. 191.]\n  [424. 209.]\n  [296. 215.]]\n\n [[ 65. 167.]\n  [244. 167.]\n  
+[244. 186.]\n  [ 65. 186.]]\n\n [[ 67. 139.]\n  [290. 139.]\n  [290. 159.]\n  [ 67. 159.]]\n\n
+[[ 68. 113.]\n  [410. 113.]\n  [410. 128.]\n  [ 68. 129.]    ]\n\n [[277.  87.]\n  [416.  87.]\n  
+[416. 108.]\n  [277. 108.]]\n\n [[ 79.  28.]\n  [132.  28.]\n  [132.  62.]\n  [ 79.  62.]]\n\n
+[[163.  17.]\n  [410.      14.]\n  [410.  50.]\n  [163.  53.]]]']}
+```
+
+
+## 3. 更多教程
+
+本文档为功能测试用，更详细的Serving预测使用教程请参考：[PPOCR 服务化部署](https://github.com/PaddlePaddle/PaddleOCR/blob/dygraph/deploy/pdserving/README_CN.md)  
--- a/test_tipc/docs/test_train_inference_python.md
+++ b/test_tipc/docs/test_train_inference_python.md
@ -19,7 +19,7 @@

 - 预测相关：基于训练是否使用量化，可以将训练产出的模型可以分为`正常模型`和`量化模型`，这两类模型对应的预测功能汇总如下，

-| 模型类型 |device | batchsize | tensorrt | mkldnn | cpu多线程 | 
+| 模型类型 |device | batchsize | tensorrt | mkldnn | cpu多线程 |
 |  ----   |  ---- |   ----   |  :----:  |   :----:   |  :----:  |
 | 正常模型 | GPU | 1/6 | fp32/fp16 | - | - |
 | 正常模型 | CPU | 1/6 | - | fp32 | 支持 |
@ -46,42 +46,42 @@


 ### 2.2 功能测试
-先运行`prepare.sh`准备数据和模型，然后运行`test_train_inference_python.sh`进行测试，最终在```tests/output```目录下生成`python_infer_*.log`格式的日志文件。
+先运行`prepare.sh`准备数据和模型，然后运行`test_train_inference_python.sh`进行测试，最终在```test_tipc/output```目录下生成`python_infer_*.log`格式的日志文件。


 `test_train_inference_python.sh`包含5种运行模式，每种模式的运行数据不同，分别用于测试速度和精度，分别是：

- 模式1：lite_train_infer，使用少量数据训练，用于快速验证训练到预测的走通流程，不验证精度和速度；
+- 模式1：lite_train_lite_infer，使用少量数据训练，用于快速验证训练到预测的走通流程，不验证精度和速度；
 ```shell
-bash tests/prepare.sh ./tests/configs/ppocr_det_mobile_params.txt 'lite_train_infer'
-bash tests/test_train_inference_python.sh ./tests/configs/ppocr_det_mobile_params.txt 'lite_train_infer'
+bash test_tipc/prepare.sh ./test_tipc/configs/ppocr_det_mobile_params.txt 'lite_train_lite_infer'
+bash test_tipc/test_train_inference_python.sh ./test_tipc/configs/ppocr_det_mobile_params.txt 'lite_train_lite_infer'
 ```  

- 模式2：whole_infer，使用少量数据训练，一定量数据预测，用于验证训练后的模型执行预测，预测速度是否合理；
+- 模式2：lite_train_whole_infer，使用少量数据训练，一定量数据预测，用于验证训练后的模型执行预测，预测速度是否合理；
 ```shell
-bash tests/prepare.sh ./tests/configs/ppocr_det_mobile_params.txt 'whole_infer'
-bash tests/test_train_inference_python.sh ./tests/configs/ppocr_det_mobile_params.txt 'whole_infer'
+bash test_tipc/prepare.sh ./test_tipc/configs/ppocr_det_mobile_params.txt 'lite_train_whole_infer'
+bash test_tipc/test_train_inference_python.sh ./test_tipc/configs/ppocr_det_mobile_params.txt 'lite_train_whole_infer'
 ```  

- 模式3：infer，不训练，全量数据预测，走通开源模型评估、动转静，检查inference model预测时间和精度;
+- 模式3：whole_infer，不训练，全量数据预测，走通开源模型评估、动转静，检查inference model预测时间和精度;
 ```shell
-bash tests/prepare.sh ./tests/configs/ppocr_det_mobile_params.txt 'infer'
+bash test_tipc/prepare.sh ./test_tipc/configs/ppocr_det_mobile_params.txt 'whole_infer'
 # 用法1:
-bash tests/test_train_inference_python.sh ./tests/configs/ppocr_det_mobile_params.txt 'infer'
+bash test_tipc/test_train_inference_python.sh ./test_tipc/configs/ppocr_det_mobile_params.txt 'whole_infer'
 # 用法2: 指定GPU卡预测，第三个传入参数为GPU卡号
-bash tests/test_train_inference_python.sh ./tests/configs/ppocr_det_mobile_params.txt 'infer' '1'
+bash test_tipc/test_train_inference_python.sh ./test_tipc/configs/ppocr_det_mobile_params.txt 'whole_infer' '1'
 ```  

- 模式4：whole_train_infer，CE： 全量数据训练，全量数据预测，验证模型训练精度，预测精度，预测速度；
+- 模式4：whole_train_whole_infer，CE： 全量数据训练，全量数据预测，验证模型训练精度，预测精度，预测速度；
 ```shell
-bash tests/prepare.sh ./tests/configs/ppocr_det_mobile_params.txt 'whole_train_infer'
-bash tests/test_train_inference_python.sh ./tests/configs/ppocr_det_mobile_params.txt 'whole_train_infer'
+bash test_tipc/prepare.sh ./test_tipc/configs/ppocr_det_mobile_params.txt 'whole_train_whole_infer'
+bash test_tipc/test_train_inference_python.sh ./test_tipc/configs/ppocr_det_mobile_params.txt 'whole_train_whole_infer'
 ```  

- 模式5：klquant_infer，测试离线量化；
+- 模式5：klquant_whole_infer，测试离线量化；
 ```shell
-bash tests/prepare.sh ./tests/configs/ppocr_det_mobile_params.txt 'klquant_infer'
-bash tests/test_train_inference_python.sh tests/configs/ppocr_det_mobile_params.txt  'klquant_infer'
+bash test_tipc/prepare.sh ./test_tipc/configs/ppocr_det_mobile_params.txt 'klquant_whole_infer'
+bash test_tipc/test_train_inference_python.sh test_tipc/configs/ppocr_det_mobile_params.txt  'klquant_whole_infer'
 ```


@ -95,12 +95,12 @@ bash tests/test_train_inference_python.sh tests/configs/ppocr_det_mobile_params.
 #### 使用方式
 运行命令：
 ```shell
-python3.7 tests/compare_results.py --gt_file=./tests/results/python_*.txt  --log_file=./tests/output/python_*.log --atol=1e-3 --rtol=1e-3
+python3.7 test_tipc/compare_results.py --gt_file=./test_tipc/results/python_*.txt  --log_file=./test_tipc/output/python_*.log --atol=1e-3 --rtol=1e-3
 ```

 参数介绍：  
- gt_file： 指向事先保存好的预测结果路径，支持*.txt 结尾，会自动索引*.txt格式的文件，文件默认保存在tests/result/ 文件夹下
- log_file: 指向运行tests/test.sh 脚本的infer模式保存的预测日志，预测日志中打印的有预测结果，比如：文本框，预测文本，类别等等，同样支持infer_*.log格式传入
+- gt_file： 指向事先保存好的预测结果路径，支持*.txt 结尾，会自动索引*.txt格式的文件，文件默认保存在test_tipc/result/ 文件夹下
+- log_file: 指向运行test_tipc/test_train_inference_python.sh 脚本的infer模式保存的预测日志，预测日志中打印的有预测结果，比如：文本框，预测文本，类别等等，同样支持python_infer_*.log格式传入
 - atol: 设置的绝对误差
 - rtol: 设置的相对误差

--- a/test_tipc/prepare.sh
+++ b/test_tipc/prepare.sh
@ -1,8 +1,9 @@
 #!/bin/bash
 FILENAME=$1

-# MODE be one of ['lite_train_infer' 'whole_infer' 'whole_train_infer',  'infer', 
-#                 'cpp_infer', 'serving_infer', 'klquant_infer']
+# MODE be one of ['lite_train_lite_infer' 'lite_train_whole_infer' 'whole_train_whole_infer',  
+#                 'whole_infer', 'klquant_whole_infer',
+#                 'cpp_infer', 'serving_infer',  'lite_infer']

 MODE=$2

@ -34,10 +35,14 @@ trainer_list=$(func_parser_value "${lines[14]}")
 # MODE be one of ['lite_train_infer' 'whole_infer' 'whole_train_infer']
 MODE=$2

-if [ ${MODE} = "lite_train_infer" ];then
+if [ ${MODE} = "lite_train_lite_infer" ];then
    # pretrain lite train data
    wget -nc -P  ./pretrain_models/ https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV3_large_x0_5_pretrained.pdparams
    wget -nc -P ./pretrain_models/  https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_mv3_db_v2.0_train.tar
+    if [ ${model_name} == "PPOCRv2_ocr_det" ]; then
+        wget -nc -P ./pretrain_models/ https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_det_distill_train.tar
+        cd ./pretrain_models/ && tar xf ch_PP-OCRv2_det_distill_train.tar && cd ../
+    fi
    cd ./pretrain_models/ && tar xf det_mv3_db_v2.0_train.tar && cd ../
    rm -rf ./train_data/icdar2015
    rm -rf ./train_data/ic15_data
@ -50,14 +55,18 @@ if [ ${MODE} = "lite_train_infer" ];then
    ln -s ./icdar2015_lite ./icdar2015
    cd ../
    cd ./inference && tar xf rec_inference.tar && cd ../
-elif [ ${MODE} = "whole_train_infer" ];then
+elif [ ${MODE} = "whole_train_whole_infer" ];then
    wget -nc -P  ./pretrain_models/ https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV3_large_x0_5_pretrained.pdparams
    rm -rf ./train_data/icdar2015
    rm -rf ./train_data/ic15_data
    wget -nc -P ./train_data/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/test/icdar2015.tar
    wget -nc -P ./train_data/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/test/ic15_data.tar
    cd ./train_data/ && tar xf icdar2015.tar && tar xf ic15_data.tar && cd ../
-elif [ ${MODE} = "whole_infer" ];then
+    if [ ${model_name} == "PPOCRv2_ocr_det" ]; then
+        wget -nc -P ./pretrain_models/ https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_det_distill_train.tar
+        cd ./pretrain_models/ && tar xf ch_PP-OCRv2_det_distill_train.tar && cd ../
+    fi
+elif [ ${MODE} = "lite_train_whole_infer" ];then
    wget -nc -P  ./pretrain_models/ https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV3_large_x0_5_pretrained.pdparams
    rm -rf ./train_data/icdar2015
    rm -rf ./train_data/ic15_data
@ -66,7 +75,11 @@ elif [ ${MODE} = "whole_infer" ];then
    cd ./train_data/ && tar xf icdar2015_infer.tar && tar xf ic15_data.tar
    ln -s ./icdar2015_infer ./icdar2015
    cd ../
-elif [ ${MODE} = "infer" ];then
+    if [ ${model_name} == "PPOCRv2_ocr_det" ]; then
+        wget -nc -P ./pretrain_models/ https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_det_distill_train.tar
+        cd ./pretrain_models/ && tar xf ch_PP-OCRv2_det_distill_train.tar && cd ../
+    fi
+elif [ ${MODE} = "whole_infer" ];then
    if [ ${model_name} = "ocr_det" ]; then
        eval_model_name="ch_ppocr_mobile_v2.0_det_train"
        rm -rf ./train_data/icdar2015
@ -100,13 +113,29 @@ elif [ ${MODE} = "infer" ];then
        wget -nc  -P ./inference https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_rec_infer.tar
        cd ./inference && tar xf ${eval_model_name}.tar && tar xf rec_inference.tar && cd ../
    fi 
-elif [ ${MODE} = "klquant_infer" ];then
+
+    elif [ ${model_name} = "PPOCRv2_ocr_det" ]; then
+        eval_model_name="ch_PP-OCRv2_det_infer"
+        wget -nc -P ./inference https://paddleocr.bj.bcebos.com/dygraph_v2.0/test/ch_det_data_50.tar
+        wget -nc -P ./inference/ https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_det_infer.tar
+        cd ./inference && tar xf ${eval_model_name}.tar && tar xf ch_det_data_50.tar && cd ../
+    fi
+
+if [ ${MODE} = "klquant_whole_infer" ]; then
    if [ ${model_name} = "ocr_det" ]; then
        wget -nc  -P ./inference https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_infer.tar
        wget -nc -P ./inference https://paddleocr.bj.bcebos.com/dygraph_v2.0/test/ch_det_data_50.tar
        cd ./inference && tar xf ch_ppocr_mobile_v2.0_det_infer.tar && tar xf ch_det_data_50.tar && cd ../
    fi
-elif [ ${MODE} = "cpp_infer" ];then
+    if [ ${model_name} = "PPOCRv2_ocr_det" ]; then
+        eval_model_name="ch_PP-OCRv2_det_infer"
+        wget -nc -P ./inference https://paddleocr.bj.bcebos.com/dygraph_v2.0/test/ch_det_data_50.tar
+        wget -nc -P ./inference/ https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_det_infer.tar
+        cd ./inference && tar xf ${eval_model_name}.tar && tar xf ch_det_data_50.tar && cd ../
+    fi 
+fi
+
+if [ ${MODE} = "cpp_infer" ];then
    if [ ${model_name} = "ocr_det" ]; then
        wget -nc -P ./inference https://paddleocr.bj.bcebos.com/dygraph_v2.0/test/ch_det_data_50.tar
        wget -nc  -P ./inference https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_infer.tar
@ -136,3 +165,37 @@ if [ ${MODE} = "serving_infer" ];then
    wget -nc  -P ./inference https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_rec_infer.tar
    cd ./inference && tar xf ch_ppocr_mobile_v2.0_det_infer.tar && tar xf ch_ppocr_mobile_v2.0_rec_infer.tar && tar xf ch_ppocr_server_v2.0_rec_infer.tar && tar xf ch_ppocr_server_v2.0_det_infer.tar && cd ../
 fi
+
+
+if [ ${MODE} = "lite_infer" ];then    
+    # prepare lite nb model and test data
+    current_dir=${PWD}
+    wget -nc  -P ./models https://paddleocr.bj.bcebos.com/dygraph_v2.0/lite/ch_ppocr_mobile_v2.0_det_opt.nb
+    wget -nc  -P ./models https://paddleocr.bj.bcebos.com/dygraph_v2.0/lite/ch_ppocr_mobile_v2.0_det_slim_opt.nb
+    wget -nc  -P ./test_data https://paddleocr.bj.bcebos.com/dygraph_v2.0/test/icdar2015_lite.tar
+    cd ./test_data && tar -xf icdar2015_lite.tar && rm icdar2015_lite.tar && cd ../
+    # prepare lite env
+    export http_proxy=http://172.19.57.45:3128
+    export https_proxy=http://172.19.57.45:3128
+    paddlelite_url=https://github.com/PaddlePaddle/Paddle-Lite/releases/download/v2.9/inference_lite_lib.android.armv8.gcc.c++_shared.with_extra.with_cv.tar.gz
+    paddlelite_zipfile=$(echo $paddlelite_url | awk -F "/" '{print $NF}')
+    paddlelite_file=inference_lite_lib.android.armv8.gcc.c++_shared.with_extra.with_cv
+    wget ${paddlelite_url}
+    tar -xf ${paddlelite_zipfile}
+    mkdir -p  ${paddlelite_file}/demo/cxx/ocr/test_lite
+    mv models test_data ${paddlelite_file}/demo/cxx/ocr/test_lite
+    cp ppocr/utils/ppocr_keys_v1.txt deploy/lite/config.txt ${paddlelite_file}/demo/cxx/ocr/test_lite
+    cp ./deploy/lite/* ${paddlelite_file}/demo/cxx/ocr/
+    cp ${paddlelite_file}/cxx/lib/libpaddle_light_api_shared.so ${paddlelite_file}/demo/cxx/ocr/test_lite
+    cp PTDN/configs/ppocr_det_mobile_params.txt PTDN/test_lite.sh PTDN/common_func.sh ${paddlelite_file}/demo/cxx/ocr/test_lite
+    cd ${paddlelite_file}/demo/cxx/ocr/
+    git clone https://github.com/LDOUBLEV/AutoLog.git
+    unset http_proxy
+    unset https_proxy
+    make -j
+    sleep 1
+    make -j
+    cp ocr_db_crnn test_lite && cp test_lite/libpaddle_light_api_shared.so test_lite/libc++_shared.so
+    tar -cf test_lite.tar ./test_lite && cp test_lite.tar ${current_dir} && cd ${current_dir}
+fi
+
--- a/test_tipc/readme.md
+++ b/test_tipc/readme.md
@ -1,9 +1,9 @@

-# 推理部署导航
+# 飞桨训推一体认证

 ## 1. 简介

-飞桨除了基本的模型训练和预测，还提供了支持多端多平台的高性能推理部署工具。本文档提供了PaddleOCR中所有模型的推理部署导航PTDN（Paddle Train Deploy Navigation），方便用户查阅每种模型的推理部署打通情况，并可以进行一键测试。
+飞桨除了基本的模型训练和预测，还提供了支持多端多平台的高性能推理部署工具。本文档提供了PaddleOCR中所有模型的飞桨训推一体认证 (Training and Inference Pipeline Certification(TIPC)) 信息和测试工具，方便用户查阅每种模型的训练推理部署打通情况，并可以进行一键测试。

 <div align="center">
    <img src="docs/guide.png" width="1000">
@ -15,20 +15,23 @@

 **字段说明：**
 - 基础训练预测：包括模型训练、Paddle Inference Python预测。
- 其他：包括Paddle Inference C++预测、Paddle Serving部署、Paddle-Lite部署等。
+- 更多训练方式：包括多机多卡、混合精度。
+- 模型压缩：包括裁剪、离线/在线量化、蒸馏。
+- 其他预测部署：包括Paddle Inference C++预测、Paddle Serving部署、Paddle-Lite部署等。

+更详细的mkldnn、Tensorrt等预测加速相关功能的支持情况可以查看各测试工具的[更多教程](#more)。

-| 算法论文 | 模型名称 | 模型类型 | 基础训练预测 |   其他  |
-| :--- | :--- |  :----:  | :--------: |  :----  |
-| DB     |ch_ppocr_mobile_v2.0_det | 检测  | 支持 | Paddle Inference: C++ <br> Paddle Serving: Python, C++ <br> Paddle-Lite: <br> (1) ARM CPU(C++) |
-| DB     |ch_ppocr_server_v2.0_det | 检测  | 支持 | Paddle Inference: C++ <br> Paddle Serving: Python, C++ <br> Paddle-Lite: <br> (1) ARM CPU(C++) |
+| 算法论文 | 模型名称 | 模型类型 | 基础<br>训练预测 | 更多<br>训练方式 | 模型压缩 |  其他预测部署  |
+| :--- | :--- |  :----:  | :--------: |  :----  |   :----  |   :----  |
+| DB     |ch_ppocr_mobile_v2.0_det | 检测  | 支持 | 多机多卡 <br> 混合精度 | FPGM裁剪 <br> 离线量化| Paddle Inference: C++ <br> Paddle Serving: Python, C++ <br> Paddle-Lite: <br> (1) ARM CPU(C++) |
+| DB     |ch_ppocr_server_v2.0_det | 检测  | 支持 | 多机多卡 <br> 混合精度 | FPGM裁剪 <br> 离线量化| Paddle Inference: C++ <br> Paddle Serving: Python, C++ <br> Paddle-Lite: <br> (1) ARM CPU(C++) |
 | DB     |ch_PP-OCRv2_det          | 检测  |
-| CRNN   |ch_ppocr_mobile_v2.0_rec | 识别  | 支持 | Paddle Inference: C++ <br> Paddle Serving: Python, C++ <br> Paddle-Lite: <br> (1) ARM CPU(C++) |
-| CRNN   |ch_ppocr_server_v2.0_rec | 识别  | 支持 | Paddle Inference: C++ <br> Paddle Serving: Python, C++ <br> Paddle-Lite: <br> (1) ARM CPU(C++) |
+| CRNN   |ch_ppocr_mobile_v2.0_rec | 识别  | 支持 | 多机多卡 <br> 混合精度 | PACT量化 <br> 离线量化| Paddle Inference: C++ <br> Paddle Serving: Python, C++ <br> Paddle-Lite: <br> (1) ARM CPU(C++) |
+| CRNN   |ch_ppocr_server_v2.0_rec | 识别  | 支持 | 多机多卡 <br> 混合精度 | PACT量化 <br> 离线量化| Paddle Inference: C++ <br> Paddle Serving: Python, C++ <br> Paddle-Lite: <br> (1) ARM CPU(C++) |
 | CRNN   |ch_PP-OCRv2_rec          | 识别  |
-| PP-OCR |ch_ppocr_mobile_v2.0 | 检测+识别  | 支持 | Paddle Inference: C++ <br> Paddle Serving: Python, C++ <br> Paddle-Lite: <br> (1) ARM CPU(C++) |
-| PP-OCR |ch_ppocr_server_v2.0 | 检测+识别  | 支持 | Paddle Inference: C++ <br> Paddle Serving: Python, C++ <br> Paddle-Lite: <br> (1) ARM CPU(C++) |
-|PP-OCRv2|ch_PP-OCRv2 | 检测+识别  | 支持 | Paddle Inference: C++ <br> Paddle Serving: Python, C++ <br> Paddle-Lite: <br> (1) ARM CPU(C++) |
+| PP-OCR |ch_ppocr_mobile_v2.0 | 检测+识别  | 支持 | 多机多卡 <br> 混合精度 | - | Paddle Inference: C++ <br> Paddle Serving: Python, C++ <br> Paddle-Lite: <br> (1) ARM CPU(C++) |
+| PP-OCR |ch_ppocr_server_v2.0 | 检测+识别  | 支持 | 多机多卡 <br> 混合精度 | - | Paddle Inference: C++ <br> Paddle Serving: Python, C++ <br> Paddle-Lite: <br> (1) ARM CPU(C++) |
+|PP-OCRv2|ch_PP-OCRv2 | 检测+识别  |
 | DB     |det_mv3_db_v2.0                | 检测  |
 | DB     |det_r50_vd_db_v2.0             | 检测  |
 | EAST   |det_mv3_east_v2.0              | 检测  |
@ -55,7 +58,7 @@
 ### 目录介绍

 ```shell
-PTDN/
+test_tipc/
 ├── configs/  # 配置文件目录
 	├── det_mv3_db.yml               # 测试mobile版ppocr检测模型训练的yml文件
 	├── det_r50_vd_db.yml            # 测试server版ppocr检测模型训练的yml文件
@ -66,7 +69,7 @@ PTDN/
 	├── ppocr_sys_server_params.txt     # 测试server版ppocr检测+识别模型串联的参数配置文件
 	├── ppocr_det_server_params.txt     # 测试server版ppocr检测模型的参数配置文件
 	├── ppocr_rec_server_params.txt     # 测试server版ppocr识别模型的参数配置文件
-	├── ...                                
+	├── ...  
 ├── results/   # 预先保存的预测结果，用于和实际预测结果进行精读比对
 	├── python_ppocr_det_mobile_results_fp32.txt           # 预存的mobile版ppocr检测模型python预测fp32精度的结果
 	├── python_ppocr_det_mobile_results_fp16.txt           # 预存的mobile版ppocr检测模型python预测fp16精度的结果
@ -98,6 +101,8 @@ PTDN/
 - `test_serving.sh`：测试基于Paddle Serving的服务化部署功能。
 - `test_lite.sh`：测试基于Paddle-Lite的端侧预测部署功能。

+<a name="more"></a>
+#### 更多教程
 各功能测试中涉及混合精度、裁剪、量化等训练相关，及mkldnn、Tensorrt等多种预测相关参数配置，请点击下方相应链接了解更多细节和使用教程：  
 [test_train_inference_python 使用](docs/test_train_inference_python.md)  
 [test_inference_cpp 使用](docs/test_inference_cpp.md)  
--- a/test_tipc/results/cpp_ppocr_det_mobile_results_fp16.txt
+++ b/test_tipc/results/cpp_ppocr_det_mobile_results_fp16.txt
--- a/test_tipc/results/cpp_ppocr_det_mobile_results_fp32.txt
+++ b/test_tipc/results/cpp_ppocr_det_mobile_results_fp32.txt
--- a/test_tipc/results/python_ppocr_det_mobile_results_fp16.txt
+++ b/test_tipc/results/python_ppocr_det_mobile_results_fp16.txt
--- a/test_tipc/results/python_ppocr_det_mobile_results_fp32.txt
+++ b/test_tipc/results/python_ppocr_det_mobile_results_fp32.txt
--- a/test_tipc/test_inference_cpp.sh
+++ b/test_tipc/test_inference_cpp.sh
@ -1,5 +1,5 @@
 #!/bin/bash
-source tests/common_func.sh
+source test_tipc/common_func.sh

 FILENAME=$1
 dataline=$(awk 'NR==52, NR==66{print}'  $FILENAME)
@ -35,7 +35,7 @@ cpp_benchmark_key=$(func_parser_key "${lines[14]}")
 cpp_benchmark_value=$(func_parser_value "${lines[14]}")


-LOG_PATH="./tests/output"
+LOG_PATH="./test_tipc/output"
 mkdir -p ${LOG_PATH}
 status_log="${LOG_PATH}/results_cpp.log"

--- a/test_tipc/test_lite.sh
+++ b/test_tipc/test_lite.sh
@ -0,0 +1,69 @@
+#!/bin/bash
+source ./common_func.sh
+export LD_LIBRARY_PATH=${PWD}:$LD_LIBRARY_PATH
+
+FILENAME=$1
+dataline=$(awk 'NR==101, NR==110{print}'  $FILENAME)
+echo $dataline
+# parser params
+IFS=$'\n'
+lines=(${dataline})
+
+# parser lite inference
+lite_inference_cmd=$(func_parser_value "${lines[1]}")
+lite_model_dir_list=$(func_parser_value "${lines[2]}")
+lite_cpu_threads_list=$(func_parser_value "${lines[3]}")
+lite_batch_size_list=$(func_parser_value "${lines[4]}")
+lite_power_mode_list=$(func_parser_value "${lines[5]}")
+lite_infer_img_dir_list=$(func_parser_value "${lines[6]}")
+lite_config_dir=$(func_parser_value "${lines[7]}")
+lite_rec_dict_dir=$(func_parser_value "${lines[8]}")
+lite_benchmark_value=$(func_parser_value "${lines[9]}")
+
+LOG_PATH="./output"
+mkdir -p ${LOG_PATH}
+status_log="${LOG_PATH}/results.log"
+
+
+function func_lite(){
+    IFS='|'
+    _script=$1
+    _lite_model=$2
+    _log_path=$3
+    _img_dir=$4
+    _config=$5
+    if [[ $lite_model =~ "slim" ]]; then
+        precision="INT8"
+    else
+        precision="FP32"
+    fi
+    is_single_img=$(echo $_img_dir | grep -E ".jpg|.jpeg|.png|.JPEG|.JPG")
+    if [[ "$is_single_img" != "" ]]; then
+        single_img="True"
+    else
+        single_img="False"
+    fi
+
+    # lite inference
+    for num_threads in ${lite_cpu_threads_list[*]}; do
+        for power_mode in ${lite_power_mode_list[*]}; do
+            for batchsize in ${lite_batch_size_list[*]}; do
+                model_name=$(echo $lite_model | awk -F "/" '{print $NF}')
+                _save_log_path="${_log_path}/lite_${model_name}_precision_${precision}_batchsize_${batchsize}_threads_${num_threads}_powermode_${power_mode}_singleimg_${single_img}.log"
+                command="${_script} ${lite_model} ${precision} ${num_threads} ${batchsize} ${power_mode} ${_img_dir} ${_config} ${lite_benchmark_value} > ${_save_log_path} 2>&1"
+                eval ${command}
+                status_check $? "${command}" "${status_log}"
+            done
+        done
+    done
+}
+
+
+echo "################### run test ###################"
+IFS="|"
+for lite_model in ${lite_model_dir_list[*]}; do
+    #run lite inference
+    for img_dir in ${lite_infer_img_dir_list[*]}; do
+        func_lite "${lite_inference_cmd}" "${lite_model}" "${LOG_PATH}" "${img_dir}" "${lite_config_dir}"
+    done
+done
--- a/test_tipc/test_serving.sh
+++ b/test_tipc/test_serving.sh
@ -1,5 +1,5 @@
 #!/bin/bash
-source tests/common_func.sh
+source test_tipc/common_func.sh

 FILENAME=$1
 dataline=$(awk 'NR==67, NR==83{print}'  $FILENAME)
@ -36,8 +36,8 @@ web_precision_key=$(func_parser_key "${lines[15]}")
 web_precision_list=$(func_parser_value "${lines[15]}")
 pipeline_py=$(func_parser_value "${lines[16]}")

-LOG_PATH="../../tests/output"
-mkdir -p ./tests/output
+LOG_PATH="../../test_tipc/output"
+mkdir -p ./test_tipc/output
 status_log="${LOG_PATH}/results_serving.log"

 function func_serving(){
--- a/test_tipc/test_train_inference_python.sh
+++ b/test_tipc/test_train_inference_python.sh
@ -1,8 +1,8 @@
 #!/bin/bash
-source tests/common_func.sh
+source test_tipc/common_func.sh

 FILENAME=$1
-# MODE be one of ['lite_train_infer' 'whole_infer' 'whole_train_infer', 'infer', 'klquant_infer']
+# MODE be one of ['lite_train_lite_infer' 'lite_train_whole_infer' 'whole_train_whole_infer', 'whole_infer', 'klquant_whole_infer']
 MODE=$2

 dataline=$(awk 'NR==1, NR==51{print}'  $FILENAME)
@ -59,6 +59,7 @@ export_key1=$(func_parser_key "${lines[33]}")
 export_value1=$(func_parser_value "${lines[33]}")
 export_key2=$(func_parser_key "${lines[34]}")
 export_value2=$(func_parser_value "${lines[34]}")
+inference_dir=$(func_parser_value "${lines[35]}")

 # parser inference model 
 infer_model_dir_list=$(func_parser_value "${lines[36]}")
@ -88,7 +89,7 @@ infer_key1=$(func_parser_key "${lines[50]}")
 infer_value1=$(func_parser_value "${lines[50]}")

 # parser klquant_infer
-if [ ${MODE} = "klquant_infer" ]; then
+if [ ${MODE} = "klquant_whole_infer" ]; then
    dataline=$(awk 'NR==82, NR==98{print}'  $FILENAME)
    lines=(${dataline})
    # parser inference model 
@ -119,7 +120,7 @@ if [ ${MODE} = "klquant_infer" ]; then
    infer_value1=$(func_parser_value "${lines[15]}")
 fi

-LOG_PATH="./tests/output"
+LOG_PATH="./test_tipc/output"
 mkdir -p ${LOG_PATH}
 status_log="${LOG_PATH}/results_python.log"

@ -202,7 +203,7 @@ function func_inference(){
    done
 }

-if [ ${MODE} = "infer" ] || [ ${MODE} = "klquant_infer" ]; then
+if [ ${MODE} = "whole_infer" ] || [ ${MODE} = "klquant_whole_infer" ]; then
    GPUID=$3
    if [ ${#GPUID} -le 0 ];then
        env=" "
@ -245,6 +246,7 @@ else
    for gpu in ${gpu_list[*]}; do
        use_gpu=${USE_GPU_KEY[Count]}
        Count=$(($Count + 1))
+        ips=""
        if [ ${gpu} = "-1" ];then
            env=""
        elif [ ${#gpu} -le 1 ];then
@ -264,6 +266,11 @@ else
            env=" "
        fi
        for autocast in ${autocast_list[*]}; do 
+            if [ ${autocast} = "amp" ]; then
+                set_amp_config="Global.use_amp=True Global.scale_loss=1024.0 Global.use_dynamic_loss_scaling=True"
+            else
+                set_amp_config=" "
+            fi          
            for trainer in ${trainer_list[*]}; do 
                flag_quant=False
                if [ ${trainer} = ${pact_key} ]; then
@ -290,7 +297,6 @@ else
                if [ ${run_train} = "null" ]; then
                    continue
                fi
-                
                set_autocast=$(func_set_params "${autocast_key}" "${autocast}")
                set_epoch=$(func_set_params "${epoch_key}" "${epoch_num}")
                set_pretrain=$(func_set_params "${pretrain_model_key}" "${pretrain_model_value}")
@ -306,11 +312,11 @@ else

                set_save_model=$(func_set_params "${save_model_key}" "${save_log}")
                if [ ${#gpu} -le 2 ];then  # train with cpu or single gpu
-                    cmd="${python} ${run_train} ${set_use_gpu}  ${set_save_model} ${set_epoch} ${set_pretrain} ${set_autocast} ${set_batchsize} ${set_train_params1} "
-                elif [ ${#gpu} -le 15 ];then  # train with multi-gpu
-                    cmd="${python} -m paddle.distributed.launch --gpus=${gpu} ${run_train} ${set_save_model} ${set_epoch} ${set_pretrain} ${set_autocast} ${set_batchsize} ${set_train_params1}"
+                    cmd="${python} ${run_train} ${set_use_gpu}  ${set_save_model} ${set_epoch} ${set_pretrain} ${set_autocast} ${set_batchsize} ${set_train_params1} ${set_amp_config} "
+                elif [ ${#ips} -le 26 ];then  # train with multi-gpu
+                    cmd="${python} -m paddle.distributed.launch --gpus=${gpu} ${run_train} ${set_use_gpu} ${set_save_model} ${set_epoch} ${set_pretrain} ${set_autocast} ${set_batchsize} ${set_train_params1} ${set_amp_config}"
                else     # train with multi-machine
-                    cmd="${python} -m paddle.distributed.launch --ips=${ips} --gpus=${gpu} ${run_train} ${set_save_model} ${set_pretrain} ${set_epoch} ${set_autocast} ${set_batchsize} ${set_train_params1}"
+                    cmd="${python} -m paddle.distributed.launch --ips=${ips} --gpus=${gpu} ${set_use_gpu} ${run_train} ${set_save_model} ${set_pretrain} ${set_epoch} ${set_autocast} ${set_batchsize} ${set_train_params1} ${set_amp_config}"
                fi
                # run train
                eval "unset CUDA_VISIBLE_DEVICES"
@ -342,7 +348,13 @@ else
                    #run inference
                    eval $env
                    save_infer_path="${save_log}"
-                    func_inference "${python}" "${inference_py}" "${save_infer_path}" "${LOG_PATH}" "${train_infer_img_dir}" "${flag_quant}"
+                    if [ ${inference_dir} != "null" ] && [ ${inference_dir} != '##' ]; then
+                        infer_model_dir="${save_infer_path}/${inference_dir}"
+                    else
+                        infer_model_dir=${save_infer_path}
+                    fi
+                    func_inference "${python}" "${inference_py}" "${infer_model_dir}" "${LOG_PATH}" "${train_infer_img_dir}" "${flag_quant}"
+                    
                    eval "unset CUDA_VISIBLE_DEVICES"
                fi
            done  # done with:    for trainer in ${trainer_list[*]}; do 
--- a/tools/program.py
+++ b/tools/program.py
@ -159,7 +159,8 @@ def train(config,
          eval_class,
          pre_best_model_dict,
          logger,
-          vdl_writer=None):
+          vdl_writer=None,
+          scaler=None):
    cal_metric_during_train = config['Global'].get('cal_metric_during_train',
                                                   False)
    log_smooth_window = config['Global']['log_smooth_window']
@ -211,33 +212,49 @@ def train(config,
    for epoch in range(start_epoch, epoch_num + 1):
        train_dataloader = build_dataloader(
            config, 'Train', device, logger, seed=epoch)
-        train_batch_cost = 0.0
        train_reader_cost = 0.0
-        batch_sum = 0
-        batch_start = time.time()
+        train_run_cost = 0.0
+        total_samples = 0
+        reader_start = time.time()
        max_iter = len(train_dataloader) - 1 if platform.system(
        ) == "Windows" else len(train_dataloader)
        for idx, batch in enumerate(train_dataloader):
            profiler.add_profiler_step(profiler_options)
-            train_reader_cost += time.time() - batch_start
+            train_reader_cost += time.time() - reader_start
            if idx >= max_iter:
                break
            lr = optimizer.get_lr()
            images = batch[0]
            if use_srn:
                model_average = True
-            if model_type == 'table' or extra_input:
-                preds = model(images, data=batch[1:])
+
+            train_start = time.time()
+            # use amp
+            if scaler:
+                with paddle.amp.auto_cast():
+                    if model_type == 'table' or extra_input:
+                        preds = model(images, data=batch[1:])
+                    else:
+                        preds = model(images)
            else:
-                preds = model(images)
+                if model_type == 'table' or extra_input:
+                    preds = model(images, data=batch[1:])
+                else:
+                    preds = model(images)
            loss = loss_class(preds, batch)
            avg_loss = loss['loss']
-            avg_loss.backward()
-            optimizer.step()
+
+            if scaler:
+                scaled_avg_loss = scaler.scale(avg_loss)
+                scaled_avg_loss.backward()
+                scaler.minimize(optimizer, scaled_avg_loss)
+            else:
+                avg_loss.backward()
+                optimizer.step()
            optimizer.clear_grad()

-            train_batch_cost += time.time() - batch_start
-            batch_sum += len(images)
+            train_run_cost += time.time() - train_start
+            total_samples += len(images)

            if not isinstance(lr_scheduler, float):
                lr_scheduler.step()
@ -268,12 +285,13 @@ def train(config,
                logs = train_stats.log()
                strs = 'epoch: [{}/{}], iter: {}, {}, reader_cost: {:.5f} s, batch_cost: {:.5f} s, samples: {}, ips: {:.5f}'.format(
                    epoch, epoch_num, global_step, logs, train_reader_cost /
-                    print_batch_step, train_batch_cost / print_batch_step,
-                    batch_sum, batch_sum / train_batch_cost)
+                    print_batch_step, (train_reader_cost + train_run_cost) /
+                    print_batch_step, total_samples,
+                    total_samples / (train_reader_cost + train_run_cost))
                logger.info(strs)
-                train_batch_cost = 0.0
                train_reader_cost = 0.0
-                batch_sum = 0
+                train_run_cost = 0.0
+                total_samples = 0
            # eval
            if global_step > start_eval_step and \
                    (global_step - start_eval_step) % eval_batch_step == 0 and dist.get_rank() == 0:
@ -326,7 +344,7 @@ def train(config,
                                          global_step)
            global_step += 1
            optimizer.clear_grad()
-            batch_start = time.time()
+            reader_start = time.time()
        if dist.get_rank() == 0:
            save_model(
                model,
@ -367,7 +385,11 @@ def eval(model,
    with paddle.no_grad():
        total_frame = 0.0
        total_time = 0.0
-        pbar = tqdm(total=len(valid_dataloader), desc='eval model:')
+        pbar = tqdm(
+            total=len(valid_dataloader),
+            desc='eval model:',
+            position=0,
+            leave=True)
        max_iter = len(valid_dataloader) - 1 if platform.system(
        ) == "Windows" else len(valid_dataloader)
        for idx, batch in enumerate(valid_dataloader):
@ -436,8 +458,6 @@ def get_center(model, eval_dataloader, post_process_class):

        batch = [item.numpy() for item in batch]
        # Obtain usable results from post-processing methods
-        total_time += time.time() - start
-        # Evaluate the results of the current batch
        post_result = post_process_class(preds, batch[1])

        #update char_center
@ -480,11 +500,6 @@ def preprocess(is_train=False):
        'CLS', 'PGNet', 'Distillation', 'NRTR', 'TableAttn', 'SAR', 'PSE',
        'SEED'
    ]
-    windows_not_support_list = ['PSE']
-    if platform.system() == "Windows" and alg in windows_not_support_list:
-        logger.warning('{} is not support in Windows now'.format(
-            windows_not_support_list))
-        sys.exit()

    device = 'gpu:{}'.format(dist.ParallelEnv().dev_id) if use_gpu else 'cpu'
    device = paddle.set_device(device)
--- a/tools/train.py
+++ b/tools/train.py
@ -102,10 +102,27 @@ def main(config, device, logger, vdl_writer):
    if valid_dataloader is not None:
        logger.info('valid dataloader has {} iters'.format(
            len(valid_dataloader)))
+
+    use_amp = config["Global"].get("use_amp", False)
+    if use_amp:
+        AMP_RELATED_FLAGS_SETTING = {
+            'FLAGS_cudnn_batchnorm_spatial_persistent': 1,
+            'FLAGS_max_inplace_grad_add': 8,
+        }
+        paddle.fluid.set_flags(AMP_RELATED_FLAGS_SETTING)
+        scale_loss = config["Global"].get("scale_loss", 1.0)
+        use_dynamic_loss_scaling = config["Global"].get(
+            "use_dynamic_loss_scaling", False)
+        scaler = paddle.amp.GradScaler(
+            init_loss_scaling=scale_loss,
+            use_dynamic_loss_scaling=use_dynamic_loss_scaling)
+    else:
+        scaler = None
+
    # start train
    program.train(config, train_dataloader, valid_dataloader, device, model,
                  loss_class, optimizer, lr_scheduler, post_process_class,
-                  eval_class, pre_best_model_dict, logger, vdl_writer)
+                  eval_class, pre_best_model_dict, logger, vdl_writer, scaler)


 def test_reader(config, device, logger):