diff --git a/deploy/cpp/readme.md b/deploy/cpp/readme.md
index a8af240c9..72ce8dacc 100644
--- a/deploy/cpp/readme.md
+++ b/deploy/cpp/readme.md
@@ -215,9 +215,9 @@ cp ../configs/inference_cls.yaml tools/
 #### 2.3.2 执行
 
 ```shell
-./build/clas_system -c inference_cls.yaml
+./build/clas_system -c tools/inference_cls.yaml
 # or
-./build/clas_system -config inference_cls.yaml
+./build/clas_system -config tools/inference_cls.yaml
 ```
 
 最终屏幕上会输出结果，如下图所示。
diff --git a/deploy/cpp_shitu/src/feature_extracter.cpp b/deploy/cpp_shitu/src/feature_extracter.cpp
index f442b51ce..37c7590ff 100644
--- a/deploy/cpp_shitu/src/feature_extracter.cpp
+++ b/deploy/cpp_shitu/src/feature_extracter.cpp
@@ -18,102 +18,102 @@
 
 namespace Feature {
 
-void FeatureExtracter::LoadModel(const std::string &model_path,
-                                 const std::string &params_path) {
-  paddle_infer::Config config;
-  config.SetModel(model_path, params_path);
+    void FeatureExtracter::LoadModel(const std::string &model_path,
+                                     const std::string &params_path) {
+        paddle_infer::Config config;
+        config.SetModel(model_path, params_path);
 
-  if (this->use_gpu_) {
-    config.EnableUseGpu(this->gpu_mem_, this->gpu_id_);
-    if (this->use_tensorrt_) {
-      config.EnableTensorRtEngine(
-          1 << 20, 1, 3,
-          this->use_fp16_ ? paddle_infer::Config::Precision::kHalf
-                          : paddle_infer::Config::Precision::kFloat32,
-          false, false);
+        if (this->use_gpu_) {
+            config.EnableUseGpu(this->gpu_mem_, this->gpu_id_);
+            if (this->use_tensorrt_) {
+                config.EnableTensorRtEngine(
+                        1 << 20, 1, 3,
+                        this->use_fp16_ ? paddle_infer::Config::Precision::kHalf
+                                        : paddle_infer::Config::Precision::kFloat32,
+                        false, false);
+            }
+        } else {
+            config.DisableGpu();
+            if (this->use_mkldnn_) {
+                config.EnableMKLDNN();
+                // cache 10 different shapes for mkldnn to avoid memory leak
+                config.SetMkldnnCacheCapacity(10);
+            }
+            config.SetCpuMathLibraryNumThreads(this->cpu_math_library_num_threads_);
+        }
+
+        config.SwitchUseFeedFetchOps(false);
+        // true for multiple input
+        config.SwitchSpecifyInputNames(true);
+
+        config.SwitchIrOptim(true);
+
+        config.EnableMemoryOptim();
+        config.DisableGlogInfo();
+
+        this->predictor_ = CreatePredictor(config);
     }
-  } else {
-    config.DisableGpu();
-    if (this->use_mkldnn_) {
-      config.EnableMKLDNN();
-      // cache 10 different shapes for mkldnn to avoid memory leak
-      config.SetMkldnnCacheCapacity(10);
+
+    void FeatureExtracter::Run(cv::Mat &img, std::vector<float> &out_data,
+                               std::vector<double> &times) {
+        cv::Mat resize_img;
+        std::vector<double> time;
+
+        auto preprocess_start = std::chrono::steady_clock::now();
+        this->resize_op_.Run(img, resize_img, this->resize_short_,
+                             this->resize_size_);
+
+        this->normalize_op_.Run(&resize_img, this->mean_, this->std_, this->scale_);
+        std::vector<float> input(1 * 3 * resize_img.rows * resize_img.cols, 0.0f);
+        this->permute_op_.Run(&resize_img, input.data());
+
+        auto input_names = this->predictor_->GetInputNames();
+        auto input_t = this->predictor_->GetInputHandle(input_names[0]);
+        input_t->Reshape({1, 3, resize_img.rows, resize_img.cols});
+        auto preprocess_end = std::chrono::steady_clock::now();
+
+        auto infer_start = std::chrono::steady_clock::now();
+        input_t->CopyFromCpu(input.data());
+        this->predictor_->Run();
+
+        auto output_names = this->predictor_->GetOutputNames();
+        auto output_t = this->predictor_->GetOutputHandle(output_names[0]);
+        std::vector<int> output_shape = output_t->shape();
+        int out_num = std::accumulate(output_shape.begin(), output_shape.end(), 1,
+                                      std::multiplies<int>());
+
+        out_data.resize(out_num);
+        output_t->CopyToCpu(out_data.data());
+        auto infer_end = std::chrono::steady_clock::now();
+
+        auto postprocess_start = std::chrono::steady_clock::now();
+        if (this->feature_norm)
+            FeatureNorm(out_data);
+        auto postprocess_end = std::chrono::steady_clock::now();
+
+        std::chrono::duration<float> preprocess_diff =
+                preprocess_end - preprocess_start;
+        time.push_back(double(preprocess_diff.count()) * 1000);
+        std::chrono::duration<float> inference_diff = infer_end - infer_start;
+        double inference_cost_time = double(inference_diff.count() * 1000);
+        time.push_back(inference_cost_time);
+        // std::chrono::duration<float> postprocess_diff =
+        //     postprocess_end - postprocess_start;
+        time.push_back(0);
+
+        // std::cout << "result: " << std::endl;
+        // std::cout << "\tclass id: " << maxPosition << std::endl;
+        // std::cout << std::fixed << std::setprecision(10)
+        //           << "\tscore: " << double(out_data[maxPosition]) << std::endl;
+        times[0] += time[0];
+        times[1] += time[1];
+        times[2] += time[2];
     }
-    config.SetCpuMathLibraryNumThreads(this->cpu_math_library_num_threads_);
-  }
 
-  config.SwitchUseFeedFetchOps(false);
-  // true for multiple input
-  config.SwitchSpecifyInputNames(true);
-
-  config.SwitchIrOptim(true);
-
-  config.EnableMemoryOptim();
-  config.DisableGlogInfo();
-
-  this->predictor_ = CreatePredictor(config);
-}
-
-void FeatureExtracter::Run(cv::Mat &img, std::vector<float> &out_data,
-                           std::vector<double> &times) {
-  cv::Mat resize_img;
-  std::vector<double> time;
-
-  auto preprocess_start = std::chrono::steady_clock::now();
-  this->resize_op_.Run(img, resize_img, this->resize_short_,
-                       this->resize_size_);
-
-  this->normalize_op_.Run(&resize_img, this->mean_, this->std_, this->scale_);
-  std::vector<float> input(1 * 3 * resize_img.rows * resize_img.cols, 0.0f);
-  this->permute_op_.Run(&resize_img, input.data());
-
-  auto input_names = this->predictor_->GetInputNames();
-  auto input_t = this->predictor_->GetInputHandle(input_names[0]);
-  input_t->Reshape({1, 3, resize_img.rows, resize_img.cols});
-  auto preprocess_end = std::chrono::steady_clock::now();
-
-  auto infer_start = std::chrono::steady_clock::now();
-  input_t->CopyFromCpu(input.data());
-  this->predictor_->Run();
-
-  auto output_names = this->predictor_->GetOutputNames();
-  auto output_t = this->predictor_->GetOutputHandle(output_names[0]);
-  std::vector<int> output_shape = output_t->shape();
-  int out_num = std::accumulate(output_shape.begin(), output_shape.end(), 1,
-                                std::multiplies<int>());
-
-  out_data.resize(out_num);
-  output_t->CopyToCpu(out_data.data());
-  auto infer_end = std::chrono::steady_clock::now();
-
-  auto postprocess_start = std::chrono::steady_clock::now();
-  if (this->feature_norm)
-    FeatureNorm(out_data);
-  auto postprocess_end = std::chrono::steady_clock::now();
-
-  std::chrono::duration<float> preprocess_diff =
-      preprocess_end - preprocess_start;
-  time.push_back(double(preprocess_diff.count()) * 1000);
-  std::chrono::duration<float> inference_diff = infer_end - infer_start;
-  double inference_cost_time = double(inference_diff.count() * 1000);
-  time.push_back(inference_cost_time);
-  // std::chrono::duration<float> postprocess_diff =
-  //     postprocess_end - postprocess_start;
-  time.push_back(0);
-
-  // std::cout << "result: " << std::endl;
-  // std::cout << "\tclass id: " << maxPosition << std::endl;
-  // std::cout << std::fixed << std::setprecision(10)
-  //           << "\tscore: " << double(out_data[maxPosition]) << std::endl;
-  times[0] += time[0];
-  times[1] += time[1];
-  times[2] += time[2];
-}
-
-void FeatureExtracter::FeatureNorm(std::vector<float> &featuer) {
-  float featuer_sqrt = std::sqrt(std::inner_product(
-      featuer.begin(), featuer.end(), featuer.begin(), 0.0f));
-  for (int i = 0; i < featuer.size(); ++i)
-    featuer[i] /= featuer_sqrt;
-}
+    void FeatureExtracter::FeatureNorm(std::vector<float> &featuer) {
+        float featuer_sqrt = std::sqrt(std::inner_product(
+                featuer.begin(), featuer.end(), featuer.begin(), 0.0f));
+        for (int i = 0; i < featuer.size(); ++i)
+            featuer[i] /= featuer_sqrt;
+    }
 } // namespace Feature
diff --git a/deploy/cpp_shitu/src/main.cpp b/deploy/cpp_shitu/src/main.cpp
index 700d9aa3e..be37d3afd 100644
--- a/deploy/cpp_shitu/src/main.cpp
+++ b/deploy/cpp_shitu/src/main.cpp
@@ -37,304 +37,306 @@
 using namespace std;
 using namespace cv;
 
-DEFINE_string(config, "", "Path of yaml file");
-DEFINE_string(c, "", "Path of yaml file");
+DEFINE_string(config,
+"", "Path of yaml file");
+DEFINE_string(c,
+"", "Path of yaml file");
 
-void DetPredictImage(const std::vector<cv::Mat> &batch_imgs,
-                     const std::vector<std::string> &all_img_paths,
+void DetPredictImage(const std::vector <cv::Mat> &batch_imgs,
+                     const std::vector <std::string> &all_img_paths,
                      const int batch_size, Detection::ObjectDetector *det,
-                     std::vector<Detection::ObjectResult> &im_result,
+                     std::vector <Detection::ObjectResult> &im_result,
                      std::vector<int> &im_bbox_num, std::vector<double> &det_t,
                      const bool visual_det = false,
                      const bool run_benchmark = false,
                      const std::string &output_dir = "output") {
-  int steps = ceil(float(all_img_paths.size()) / batch_size);
-  //   printf("total images = %d, batch_size = %d, total steps = %d\n",
-  //                 all_img_paths.size(), batch_size, steps);
-  for (int idx = 0; idx < steps; idx++) {
-    int left_image_cnt = all_img_paths.size() - idx * batch_size;
-    if (left_image_cnt > batch_size) {
-      left_image_cnt = batch_size;
-    }
-    // for (int bs = 0; bs < left_image_cnt; bs++) {
-    // std::string image_file_path = all_img_paths.at(idx * batch_size+bs);
-    // cv::Mat im = cv::imread(image_file_path, 1);
-    // batch_imgs.insert(batch_imgs.end(), im);
-    // }
+    int steps = ceil(float(all_img_paths.size()) / batch_size);
+    //   printf("total images = %d, batch_size = %d, total steps = %d\n",
+    //                 all_img_paths.size(), batch_size, steps);
+    for (int idx = 0; idx < steps; idx++) {
+        int left_image_cnt = all_img_paths.size() - idx * batch_size;
+        if (left_image_cnt > batch_size) {
+            left_image_cnt = batch_size;
+        }
+        // for (int bs = 0; bs < left_image_cnt; bs++) {
+        // std::string image_file_path = all_img_paths.at(idx * batch_size+bs);
+        // cv::Mat im = cv::imread(image_file_path, 1);
+        // batch_imgs.insert(batch_imgs.end(), im);
+        // }
 
-    // Store all detected result
-    std::vector<Detection::ObjectResult> result;
-    std::vector<int> bbox_num;
-    std::vector<double> det_times;
-    bool is_rbox = false;
-    if (run_benchmark) {
-      det->Predict(batch_imgs, 10, 10, &result, &bbox_num, &det_times);
-    } else {
-      det->Predict(batch_imgs, 0, 1, &result, &bbox_num, &det_times);
-      // get labels and colormap
-      auto labels = det->GetLabelList();
-      auto colormap = Detection::GenerateColorMap(labels.size());
+        // Store all detected result
+        std::vector <Detection::ObjectResult> result;
+        std::vector<int> bbox_num;
+        std::vector<double> det_times;
+        bool is_rbox = false;
+        if (run_benchmark) {
+            det->Predict(batch_imgs, 10, 10, &result, &bbox_num, &det_times);
+        } else {
+            det->Predict(batch_imgs, 0, 1, &result, &bbox_num, &det_times);
+            // get labels and colormap
+            auto labels = det->GetLabelList();
+            auto colormap = Detection::GenerateColorMap(labels.size());
 
-      int item_start_idx = 0;
-      for (int i = 0; i < left_image_cnt; i++) {
-        cv::Mat im = batch_imgs[i];
-        int detect_num = 0;
+            int item_start_idx = 0;
+            for (int i = 0; i < left_image_cnt; i++) {
+                cv::Mat im = batch_imgs[i];
+                int detect_num = 0;
 
-        for (int j = 0; j < bbox_num[i]; j++) {
-          Detection::ObjectResult item = result[item_start_idx + j];
-          if (item.confidence < det->GetThreshold() || item.class_id == -1) {
-            continue;
-          }
-          detect_num += 1;
-          im_result.push_back(item);
-          if (visual_det) {
-            if (item.rect.size() > 6) {
-              is_rbox = true;
-              printf(
-                  "class=%d confidence=%.4f rect=[%d %d %d %d %d %d %d %d]\n",
-                  item.class_id, item.confidence, item.rect[0], item.rect[1],
-                  item.rect[2], item.rect[3], item.rect[4], item.rect[5],
-                  item.rect[6], item.rect[7]);
-            } else {
-              printf("class=%d confidence=%.4f rect=[%d %d %d %d]\n",
-                     item.class_id, item.confidence, item.rect[0], item.rect[1],
-                     item.rect[2], item.rect[3]);
+                for (int j = 0; j < bbox_num[i]; j++) {
+                    Detection::ObjectResult item = result[item_start_idx + j];
+                    if (item.confidence < det->GetThreshold() || item.class_id == -1) {
+                        continue;
+                    }
+                    detect_num += 1;
+                    im_result.push_back(item);
+                    if (visual_det) {
+                        if (item.rect.size() > 6) {
+                            is_rbox = true;
+                            printf(
+                                    "class=%d confidence=%.4f rect=[%d %d %d %d %d %d %d %d]\n",
+                                    item.class_id, item.confidence, item.rect[0], item.rect[1],
+                                    item.rect[2], item.rect[3], item.rect[4], item.rect[5],
+                                    item.rect[6], item.rect[7]);
+                        } else {
+                            printf("class=%d confidence=%.4f rect=[%d %d %d %d]\n",
+                                   item.class_id, item.confidence, item.rect[0], item.rect[1],
+                                   item.rect[2], item.rect[3]);
+                        }
+                    }
+                }
+                im_bbox_num.push_back(detect_num);
+                item_start_idx = item_start_idx + bbox_num[i];
+
+                // Visualization result
+                if (visual_det) {
+                    std::cout << all_img_paths.at(idx * batch_size + i)
+                              << " The number of detected box: " << detect_num
+                              << std::endl;
+                    cv::Mat vis_img = Detection::VisualizeResult(im, im_result, labels,
+                                                                 colormap, is_rbox);
+                    std::vector<int> compression_params;
+                    compression_params.push_back(CV_IMWRITE_JPEG_QUALITY);
+                    compression_params.push_back(95);
+                    std::string output_path(output_dir);
+                    if (output_dir.rfind(OS_PATH_SEP) != output_dir.size() - 1) {
+                        output_path += OS_PATH_SEP;
+                    }
+                    std::string image_file_path = all_img_paths.at(idx * batch_size + i);
+                    output_path +=
+                            image_file_path.substr(image_file_path.find_last_of('/') + 1);
+                    cv::imwrite(output_path, vis_img, compression_params);
+                    printf("Visualized output saved as %s\n", output_path.c_str());
+                }
             }
-          }
         }
-        im_bbox_num.push_back(detect_num);
-        item_start_idx = item_start_idx + bbox_num[i];
-
-        // Visualization result
-        if (visual_det) {
-          std::cout << all_img_paths.at(idx * batch_size + i)
-                    << " The number of detected box: " << detect_num
-                    << std::endl;
-          cv::Mat vis_img = Detection::VisualizeResult(im, im_result, labels,
-                                                       colormap, is_rbox);
-          std::vector<int> compression_params;
-          compression_params.push_back(CV_IMWRITE_JPEG_QUALITY);
-          compression_params.push_back(95);
-          std::string output_path(output_dir);
-          if (output_dir.rfind(OS_PATH_SEP) != output_dir.size() - 1) {
-            output_path += OS_PATH_SEP;
-          }
-          std::string image_file_path = all_img_paths.at(idx * batch_size + i);
-          output_path +=
-              image_file_path.substr(image_file_path.find_last_of('/') + 1);
-          cv::imwrite(output_path, vis_img, compression_params);
-          printf("Visualized output saved as %s\n", output_path.c_str());
-        }
-      }
+        det_t[0] += det_times[0];
+        det_t[1] += det_times[1];
+        det_t[2] += det_times[2];
     }
-    det_t[0] += det_times[0];
-    det_t[1] += det_times[1];
-    det_t[2] += det_times[2];
-  }
 }
 
 void PrintResult(std::string &img_path,
-                 std::vector<Detection::ObjectResult> &det_result,
+                 std::vector <Detection::ObjectResult> &det_result,
                  std::vector<int> &indeices, VectorSearch &vector_search,
                  SearchResult &search_result) {
-  printf("%s:\n", img_path.c_str());
-  for (int i = 0; i < indeices.size(); ++i) {
-    int t = indeices[i];
-    printf("\tresult%d: bbox[%d, %d, %d, %d], score: %f, label: %s\n", i,
-           det_result[t].rect[0], det_result[t].rect[1], det_result[t].rect[2],
-           det_result[t].rect[3], det_result[t].confidence,
-           vector_search.GetLabel(search_result.I[search_result.return_k * t])
-               .c_str());
-  }
+    printf("%s:\n", img_path.c_str());
+    for (int i = 0; i < indeices.size(); ++i) {
+        int t = indeices[i];
+        printf("\tresult%d: bbox[%d, %d, %d, %d], score: %f, label: %s\n", i,
+               det_result[t].rect[0], det_result[t].rect[1], det_result[t].rect[2],
+               det_result[t].rect[3], det_result[t].confidence,
+               vector_search.GetLabel(search_result.I[search_result.return_k * t])
+                       .c_str());
+    }
 }
 
 int main(int argc, char **argv) {
-  google::ParseCommandLineFlags(&argc, &argv, true);
-  std::string yaml_path = "";
-  if (FLAGS_config == "" && FLAGS_c == "") {
-    std::cerr << "[ERROR] usage: " << std::endl
-              << argv[0] << " -c $yaml_path" << std::endl
-              << "or:" << std::endl
-              << argv[0] << " -config $yaml_path" << std::endl;
-    exit(1);
-  } else if (FLAGS_config != "") {
-    yaml_path = FLAGS_config;
-  } else {
-    yaml_path = FLAGS_c;
-  }
-
-  YamlConfig config(yaml_path);
-  config.PrintConfigInfo();
-
-  // initialize detector, rec_Model, vector_search
-  Feature::FeatureExtracter feature_extracter(config.config_file);
-  Detection::ObjectDetector detector(config.config_file);
-  VectorSearch searcher(config.config_file);
-
-  // config
-  const int batch_size = config.config_file["Global"]["batch_size"].as<int>();
-  bool visual_det = false;
-  if (config.config_file["Global"]["visual_det"].IsDefined()) {
-    visual_det = config.config_file["Global"]["visual_det"].as<bool>();
-  }
-  bool benchmark = false;
-  if (config.config_file["Global"]["benchmark"].IsDefined()) {
-    benchmark = config.config_file["Global"]["benchmark"].as<bool>();
-  }
-  int max_det_results = 5;
-  if (config.config_file["Global"]["max_det_results"].IsDefined()) {
-    max_det_results = config.config_file["Global"]["max_det_results"].as<int>();
-  }
-  float rec_nms_thresold = 0.05;
-  if (config.config_file["Global"]["rec_nms_thresold"].IsDefined()) {
-    rec_nms_thresold =
-        config.config_file["Global"]["rec_nms_thresold"].as<float>();
-  }
-
-  // load image_file_path
-  std::string path =
-      config.config_file["Global"]["infer_imgs"].as<std::string>();
-  std::vector<std::string> img_files_list;
-  if (cv::utils::fs::isDirectory(path)) {
-    std::vector<cv::String> filenames;
-    cv::glob(path, filenames);
-    for (auto f : filenames) {
-      img_files_list.push_back(f);
-    }
-  } else {
-    img_files_list.push_back(path);
-  }
-  std::cout << "img_file_list length: " << img_files_list.size() << std::endl;
-  // for time log
-  std::vector<double> cls_times = {0, 0, 0};
-  std::vector<double> det_times = {0, 0, 0};
-  std::vector<double> search_times = {0, 0, 0};
-  int instance_num = 0;
-  // for read images
-  std::vector<cv::Mat> batch_imgs;
-  std::vector<std::string> img_paths;
-  // for detection
-  std::vector<Detection::ObjectResult> det_result;
-  std::vector<int> det_bbox_num;
-  // for vector search
-  std::vector<float> features;
-  std::vector<float> feature;
-  // for nms
-  std::vector<int> indeices;
-
-  int warmup_iter = img_files_list.size() > 5 ? 5 : img_files_list.size();
-  if (benchmark) {
-    img_files_list.insert(img_files_list.begin(), img_files_list.begin(),
-                          img_files_list.begin() + warmup_iter);
-  }
-
-  for (int idx = 0; idx < img_files_list.size(); ++idx) {
-    std::string img_path = img_files_list[idx];
-    cv::Mat srcimg = cv::imread(img_path, cv::IMREAD_COLOR);
-    if (!srcimg.data) {
-      std::cerr << "[ERROR] image read failed! image path: " << img_path
-                << "\n";
-      exit(-1);
-    }
-    cv::cvtColor(srcimg, srcimg, cv::COLOR_BGR2RGB);
-
-    batch_imgs.push_back(srcimg);
-    img_paths.push_back(img_path);
-
-    // step1: get all detection results
-    DetPredictImage(batch_imgs, img_paths, batch_size, &detector, det_result,
-                    det_bbox_num, det_times, visual_det, false);
-
-    // select max_det_results bbox
-    if (det_result.size() > max_det_results) {
-      det_result.resize(max_det_results);
-    }
-    instance_num += det_result.size();
-
-    // step2: add the whole image for recognition to improve recall
-    Detection::ObjectResult result_whole_img = {
-        {0, 0, srcimg.cols - 1, srcimg.rows - 1}, 0, 1.0};
-    det_result.push_back(result_whole_img);
-    det_bbox_num[0] = det_result.size() + 1;
-
-    // step3: extract feature for all boxes in an inmage
-    SearchResult search_result;
-    for (int j = 0; j < det_result.size(); ++j) {
-      int w = det_result[j].rect[2] - det_result[j].rect[0];
-      int h = det_result[j].rect[3] - det_result[j].rect[1];
-      cv::Rect rect(det_result[j].rect[0], det_result[j].rect[1], w, h);
-      cv::Mat crop_img = srcimg(rect);
-      feature_extracter.Run(crop_img, feature, cls_times);
-      features.insert(features.end(), feature.begin(), feature.end());
+    google::ParseCommandLineFlags(&argc, &argv, true);
+    std::string yaml_path = "";
+    if (FLAGS_config == "" && FLAGS_c == "") {
+        std::cerr << "[ERROR] usage: " << std::endl
+                  << argv[0] << " -c $yaml_path" << std::endl
+                  << "or:" << std::endl
+                  << argv[0] << " -config $yaml_path" << std::endl;
+        exit(1);
+    } else if (FLAGS_config != "") {
+        yaml_path = FLAGS_config;
+    } else {
+        yaml_path = FLAGS_c;
     }
 
-    // step4: get search result
-    auto search_start = std::chrono::steady_clock::now();
-    search_result = searcher.Search(features.data(), det_result.size());
-    auto search_end = std::chrono::steady_clock::now();
+    YamlConfig config(yaml_path);
+    config.PrintConfigInfo();
 
-    // nms for search result
-    for (int i = 0; i < det_result.size(); ++i) {
-      det_result[i].confidence = search_result.D[search_result.return_k * i];
+    // initialize detector, rec_Model, vector_search
+    Feature::FeatureExtracter feature_extracter(config.config_file);
+    Detection::ObjectDetector detector(config.config_file);
+    VectorSearch searcher(config.config_file);
+
+    // config
+    const int batch_size = config.config_file["Global"]["batch_size"].as<int>();
+    bool visual_det = false;
+    if (config.config_file["Global"]["visual_det"].IsDefined()) {
+        visual_det = config.config_file["Global"]["visual_det"].as<bool>();
     }
-    NMSBoxes(det_result, searcher.GetThreshold(), rec_nms_thresold, indeices);
-    auto nms_end = std::chrono::steady_clock::now();
-    std::chrono::duration<float> search_diff = search_end - search_start;
-    search_times[1] += double(search_diff.count() * 1000);
-
-    std::chrono::duration<float> nms_diff = nms_end - search_end;
-    search_times[2] += double(nms_diff.count() * 1000);
-
-    // print result
-    if (not benchmark or (benchmark and idx >= warmup_iter))
-      PrintResult(img_path, det_result, indeices, searcher, search_result);
-
-    // for postprocess
-    batch_imgs.clear();
-    img_paths.clear();
-    det_bbox_num.clear();
-    det_result.clear();
-    feature.clear();
-    features.clear();
-    indeices.clear();
-    if (benchmark and warmup_iter == idx + 1) {
-      det_times = {0, 0, 0};
-      cls_times = {0, 0, 0};
-      search_times = {0, 0, 0};
-      instance_num = 0;
+    bool benchmark = false;
+    if (config.config_file["Global"]["benchmark"].IsDefined()) {
+        benchmark = config.config_file["Global"]["benchmark"].as<bool>();
+    }
+    int max_det_results = 5;
+    if (config.config_file["Global"]["max_det_results"].IsDefined()) {
+        max_det_results = config.config_file["Global"]["max_det_results"].as<int>();
+    }
+    float rec_nms_thresold = 0.05;
+    if (config.config_file["Global"]["rec_nms_thresold"].IsDefined()) {
+        rec_nms_thresold =
+                config.config_file["Global"]["rec_nms_thresold"].as<float>();
     }
-  }
 
-  if (benchmark) {
-    std::string presion = "fp32";
-    if (config.config_file["Global"]["use_fp16"].IsDefined() and
-        config.config_file["Global"]["use_fp16"].as<bool>())
-      presion = "fp16";
-    bool use_gpu = config.config_file["Global"]["use_gpu"].as<bool>();
-    bool use_tensorrt = config.config_file["Global"]["use_tensorrt"].as<bool>();
-    bool enable_mkldnn =
-        config.config_file["Global"]["enable_mkldnn"].as<bool>();
-    int cpu_num_threads =
-        config.config_file["Global"]["cpu_num_threads"].as<int>();
-    int batch_size = config.config_file["Global"]["batch_size"].as<int>();
-    std::vector<int> shape =
-        config.config_file["Global"]["image_shape"].as<std::vector<int>>();
-    std::string det_shape = std::to_string(shape[0]);
-    for (int i = 1; i < shape.size(); ++i)
-      det_shape = det_shape + ", " + std::to_string(shape[i]);
+    // load image_file_path
+    std::string path =
+            config.config_file["Global"]["infer_imgs"].as<std::string>();
+    std::vector <std::string> img_files_list;
+    if (cv::utils::fs::isDirectory(path)) {
+        std::vector <cv::String> filenames;
+        cv::glob(path, filenames);
+        for (auto f : filenames) {
+            img_files_list.push_back(f);
+        }
+    } else {
+        img_files_list.push_back(path);
+    }
+    std::cout << "img_file_list length: " << img_files_list.size() << std::endl;
+    // for time log
+    std::vector<double> cls_times = {0, 0, 0};
+    std::vector<double> det_times = {0, 0, 0};
+    std::vector<double> search_times = {0, 0, 0};
+    int instance_num = 0;
+    // for read images
+    std::vector <cv::Mat> batch_imgs;
+    std::vector <std::string> img_paths;
+    // for detection
+    std::vector <Detection::ObjectResult> det_result;
+    std::vector<int> det_bbox_num;
+    // for vector search
+    std::vector<float> features;
+    std::vector<float> feature;
+    // for nms
+    std::vector<int> indeices;
 
-    AutoLogger autolog_det("Det", use_gpu, use_tensorrt, enable_mkldnn,
-                           cpu_num_threads, batch_size, det_shape, presion,
-                           det_times, img_files_list.size() - warmup_iter);
-    autolog_det.report();
-    AutoLogger autolog_rec("Rec", use_gpu, use_tensorrt, enable_mkldnn,
-                           cpu_num_threads, batch_size, "3, 224, 224", presion,
-                           cls_times, instance_num);
-    autolog_rec.report();
-    AutoLogger autolog_search("Search", false, use_tensorrt, enable_mkldnn,
-                              cpu_num_threads, batch_size, "dynamic", presion,
-                              search_times, instance_num);
-    autolog_search.report();
-  }
-  return 0;
+    int warmup_iter = img_files_list.size() > 5 ? 5 : img_files_list.size();
+    if (benchmark) {
+        img_files_list.insert(img_files_list.begin(), img_files_list.begin(),
+                              img_files_list.begin() + warmup_iter);
+    }
+
+    for (int idx = 0; idx < img_files_list.size(); ++idx) {
+        std::string img_path = img_files_list[idx];
+        cv::Mat srcimg = cv::imread(img_path, cv::IMREAD_COLOR);
+        if (!srcimg.data) {
+            std::cerr << "[ERROR] image read failed! image path: " << img_path
+                      << "\n";
+            exit(-1);
+        }
+        cv::cvtColor(srcimg, srcimg, cv::COLOR_BGR2RGB);
+
+        batch_imgs.push_back(srcimg);
+        img_paths.push_back(img_path);
+
+        // step1: get all detection results
+        DetPredictImage(batch_imgs, img_paths, batch_size, &detector, det_result,
+                        det_bbox_num, det_times, visual_det, false);
+
+        // select max_det_results bbox
+        if (det_result.size() > max_det_results) {
+            det_result.resize(max_det_results);
+        }
+        instance_num += det_result.size();
+
+        // step2: add the whole image for recognition to improve recall
+        Detection::ObjectResult result_whole_img = {
+                {0, 0, srcimg.cols - 1, srcimg.rows - 1}, 0, 1.0};
+        det_result.push_back(result_whole_img);
+        det_bbox_num[0] = det_result.size() + 1;
+
+        // step3: extract feature for all boxes in an inmage
+        SearchResult search_result;
+        for (int j = 0; j < det_result.size(); ++j) {
+            int w = det_result[j].rect[2] - det_result[j].rect[0];
+            int h = det_result[j].rect[3] - det_result[j].rect[1];
+            cv::Rect rect(det_result[j].rect[0], det_result[j].rect[1], w, h);
+            cv::Mat crop_img = srcimg(rect);
+            feature_extracter.Run(crop_img, feature, cls_times);
+            features.insert(features.end(), feature.begin(), feature.end());
+        }
+
+        // step4: get search result
+        auto search_start = std::chrono::steady_clock::now();
+        search_result = searcher.Search(features.data(), det_result.size());
+        auto search_end = std::chrono::steady_clock::now();
+
+        // nms for search result
+        for (int i = 0; i < det_result.size(); ++i) {
+            det_result[i].confidence = search_result.D[search_result.return_k * i];
+        }
+        NMSBoxes(det_result, searcher.GetThreshold(), rec_nms_thresold, indeices);
+        auto nms_end = std::chrono::steady_clock::now();
+        std::chrono::duration<float> search_diff = search_end - search_start;
+        search_times[1] += double(search_diff.count() * 1000);
+
+        std::chrono::duration<float> nms_diff = nms_end - search_end;
+        search_times[2] += double(nms_diff.count() * 1000);
+
+        // print result
+        if (not benchmark or (benchmark and idx >= warmup_iter))
+            PrintResult(img_path, det_result, indeices, searcher, search_result);
+
+        // for postprocess
+        batch_imgs.clear();
+        img_paths.clear();
+        det_bbox_num.clear();
+        det_result.clear();
+        feature.clear();
+        features.clear();
+        indeices.clear();
+        if (benchmark and warmup_iter == idx + 1) {
+            det_times = {0, 0, 0};
+            cls_times = {0, 0, 0};
+            search_times = {0, 0, 0};
+            instance_num = 0;
+        }
+    }
+
+    if (benchmark) {
+        std::string presion = "fp32";
+        if (config.config_file["Global"]["use_fp16"].IsDefined() and
+            config.config_file["Global"]["use_fp16"].as<bool>())
+            presion = "fp16";
+        bool use_gpu = config.config_file["Global"]["use_gpu"].as<bool>();
+        bool use_tensorrt = config.config_file["Global"]["use_tensorrt"].as<bool>();
+        bool enable_mkldnn =
+                config.config_file["Global"]["enable_mkldnn"].as<bool>();
+        int cpu_num_threads =
+                config.config_file["Global"]["cpu_num_threads"].as<int>();
+        int batch_size = config.config_file["Global"]["batch_size"].as<int>();
+        std::vector<int> shape =
+                config.config_file["Global"]["image_shape"].as < std::vector < int >> ();
+        std::string det_shape = std::to_string(shape[0]);
+        for (int i = 1; i < shape.size(); ++i)
+            det_shape = det_shape + ", " + std::to_string(shape[i]);
+
+        AutoLogger autolog_det("Det", use_gpu, use_tensorrt, enable_mkldnn,
+                               cpu_num_threads, batch_size, det_shape, presion,
+                               det_times, img_files_list.size() - warmup_iter);
+        autolog_det.report();
+        AutoLogger autolog_rec("Rec", use_gpu, use_tensorrt, enable_mkldnn,
+                               cpu_num_threads, batch_size, "3, 224, 224", presion,
+                               cls_times, instance_num);
+        autolog_rec.report();
+        AutoLogger autolog_search("Search", false, use_tensorrt, enable_mkldnn,
+                                  cpu_num_threads, batch_size, "dynamic", presion,
+                                  search_times, instance_num);
+        autolog_search.report();
+    }
+    return 0;
 }