diff --git a/deploy/cpp_shitu/include/feature_extracter.h b/deploy/cpp_shitu/include/feature_extracter.h
index 43156986a..e83caee73 100644
--- a/deploy/cpp_shitu/include/feature_extracter.h
+++ b/deploy/cpp_shitu/include/feature_extracter.h
@@ -35,77 +35,76 @@ using namespace paddle_infer;
 
 namespace Feature {
 
-class FeatureExtracter {
-public:
-  explicit FeatureExtracter(const YAML::Node &config_file) {
-    this->use_gpu_ = config_file["Global"]["use_gpu"].as<bool>();
-    if (config_file["Global"]["gpu_id"].IsDefined())
-      this->gpu_id_ = config_file["Global"]["gpu_id"].as<int>();
-    else
-      this->gpu_id_ = 0;
-    this->gpu_mem_ = config_file["Global"]["gpu_mem"].as<int>();
-    this->cpu_math_library_num_threads_ =
-        config_file["Global"]["cpu_num_threads"].as<int>();
-    this->use_mkldnn_ = config_file["Global"]["enable_mkldnn"].as<bool>();
-    this->use_tensorrt_ = config_file["Global"]["use_tensorrt"].as<bool>();
-    this->use_fp16_ = config_file["Global"]["use_fp16"].as<bool>();
+    class FeatureExtracter {
+    public:
+        explicit FeatureExtracter(const YAML::Node &config_file) {
+            this->use_gpu_ = config_file["Global"]["use_gpu"].as<bool>();
+            if (config_file["Global"]["gpu_id"].IsDefined())
+                this->gpu_id_ = config_file["Global"]["gpu_id"].as<int>();
+            else
+                this->gpu_id_ = 0;
+            this->gpu_mem_ = config_file["Global"]["gpu_mem"].as<int>();
+            this->cpu_math_library_num_threads_ =
+                    config_file["Global"]["cpu_num_threads"].as<int>();
+            this->use_mkldnn_ = config_file["Global"]["enable_mkldnn"].as<bool>();
+            this->use_tensorrt_ = config_file["Global"]["use_tensorrt"].as<bool>();
+            this->use_fp16_ = config_file["Global"]["use_fp16"].as<bool>();
 
-    this->cls_model_path_ =
-        config_file["Global"]["rec_inference_model_dir"].as<std::string>() +
-        OS_PATH_SEP + "inference.pdmodel";
-    this->cls_params_path_ =
-        config_file["Global"]["rec_inference_model_dir"].as<std::string>() +
-        OS_PATH_SEP + "inference.pdiparams";
-    this->resize_size_ =
-        config_file["RecPreProcess"]["transform_ops"][0]["ResizeImage"]["size"]
-            .as<int>();
-    this->scale_ = config_file["RecPreProcess"]["transform_ops"][1]
-                              ["NormalizeImage"]["scale"]
-                                  .as<float>();
-    this->mean_ = config_file["RecPreProcess"]["transform_ops"][1]
-                             ["NormalizeImage"]["mean"]
-                                 .as<std::vector<float>>();
-    this->std_ = config_file["RecPreProcess"]["transform_ops"][1]
-                            ["NormalizeImage"]["std"]
-                                .as<std::vector<float>>();
-    if (config_file["Global"]["rec_feature_normlize"].IsDefined())
-      this->feature_norm =
-          config_file["Global"]["rec_feature_normlize"].as<bool>();
+            this->cls_model_path_ =
+                    config_file["Global"]["rec_inference_model_dir"].as<std::string>() +
+                    OS_PATH_SEP + "inference.pdmodel";
+            this->cls_params_path_ =
+                    config_file["Global"]["rec_inference_model_dir"].as<std::string>() +
+                    OS_PATH_SEP + "inference.pdiparams";
+            this->resize_size_ =
+                    config_file["RecPreProcess"]["transform_ops"][0]["ResizeImage"]["size"]
+                            .as<int>();
+            this->scale_ = config_file["RecPreProcess"]["transform_ops"][1]["NormalizeImage"]["scale"].as<float>();
+            this->mean_ = config_file["RecPreProcess"]["transform_ops"][1]
+                          ["NormalizeImage"]["mean"]
+                                  .as < std::vector < float >> ();
+            this->std_ = config_file["RecPreProcess"]["transform_ops"][1]
+                         ["NormalizeImage"]["std"]
+                                 .as < std::vector < float >> ();
+            if (config_file["Global"]["rec_feature_normlize"].IsDefined())
+                this->feature_norm =
+                        config_file["Global"]["rec_feature_normlize"].as<bool>();
 
-    LoadModel(cls_model_path_, cls_params_path_);
-  }
+            LoadModel(cls_model_path_, cls_params_path_);
+        }
 
-  // Load Paddle inference model
-  void LoadModel(const std::string &model_path, const std::string &params_path);
+        // Load Paddle inference model
+        void LoadModel(const std::string &model_path, const std::string &params_path);
 
-  // Run predictor
-  void Run(cv::Mat &img, std::vector<float> &out_data,
-           std::vector<double> &times);
-  void FeatureNorm(std::vector<float> &feature);
+        // Run predictor
+        void Run(cv::Mat &img, std::vector<float> &out_data,
+                 std::vector<double> &times);
 
-  std::shared_ptr<Predictor> predictor_;
+        void FeatureNorm(std::vector<float> &feature);
 
-private:
-  bool use_gpu_ = false;
-  int gpu_id_ = 0;
-  int gpu_mem_ = 4000;
-  int cpu_math_library_num_threads_ = 4;
-  bool use_mkldnn_ = false;
-  bool use_tensorrt_ = false;
-  bool feature_norm = true;
-  bool use_fp16_ = false;
-  std::vector<float> mean_ = {0.485f, 0.456f, 0.406f};
-  std::vector<float> std_ = {0.229f, 0.224f, 0.225f};
-  float scale_ = 0.00392157;
-  int resize_size_ = 224;
-  int resize_short_ = 224;
-  std::string cls_model_path_;
-  std::string cls_params_path_;
+        std::shared_ptr <Predictor> predictor_;
 
-  // pre-process
-  ResizeImg resize_op_;
-  Normalize normalize_op_;
-  Permute permute_op_;
-};
+    private:
+        bool use_gpu_ = false;
+        int gpu_id_ = 0;
+        int gpu_mem_ = 4000;
+        int cpu_math_library_num_threads_ = 4;
+        bool use_mkldnn_ = false;
+        bool use_tensorrt_ = false;
+        bool feature_norm = true;
+        bool use_fp16_ = false;
+        std::vector<float> mean_ = {0.485f, 0.456f, 0.406f};
+        std::vector<float> std_ = {0.229f, 0.224f, 0.225f};
+        float scale_ = 0.00392157;
+        int resize_size_ = 224;
+        int resize_short_ = 224;
+        std::string cls_model_path_;
+        std::string cls_params_path_;
+
+        // pre-process
+        ResizeImg resize_op_;
+        Normalize normalize_op_;
+        Permute permute_op_;
+    };
 
 } // namespace Feature
diff --git a/deploy/cpp_shitu/include/nms.h b/deploy/cpp_shitu/include/nms.h
index 52d800550..02956246a 100644
--- a/deploy/cpp_shitu/include/nms.h
+++ b/deploy/cpp_shitu/include/nms.h
@@ -17,21 +17,21 @@
 #include <algorithm>
 #include <include/object_detector.h>
 
-template <typename T>
+template<typename T>
 static inline bool SortScorePairDescend(const std::pair<float, T> &pair1,
                                         const std::pair<float, T> &pair2) {
-  return pair1.first > pair2.first;
+    return pair1.first > pair2.first;
 }
 
 float RectOverlap(const Detection::ObjectResult &a,
                   const Detection::ObjectResult &b) {
-  float Aa = (a.rect[2] - a.rect[0] + 1) * (a.rect[3] - a.rect[1] + 1);
-  float Ab = (b.rect[2] - b.rect[0] + 1) * (b.rect[3] - b.rect[1] + 1);
+    float Aa = (a.rect[2] - a.rect[0] + 1) * (a.rect[3] - a.rect[1] + 1);
+    float Ab = (b.rect[2] - b.rect[0] + 1) * (b.rect[3] - b.rect[1] + 1);
 
-  int iou_w = max(min(a.rect[2], b.rect[2]) - max(a.rect[0], b.rect[0]) + 1, 0);
-  int iou_h = max(min(a.rect[3], b.rect[3]) - max(a.rect[1], b.rect[1]) + 1, 0);
-  float Aab = iou_w * iou_h;
-  return Aab / (Aa + Ab - Aab);
+    int iou_w = max(min(a.rect[2], b.rect[2]) - max(a.rect[0], b.rect[0]) + 1, 0);
+    int iou_h = max(min(a.rect[3], b.rect[3]) - max(a.rect[1], b.rect[1]) + 1, 0);
+    float Aab = iou_w * iou_h;
+    return Aab / (Aa + Ab - Aab);
 }
 
 // Get max scores with corresponding indices.
@@ -40,46 +40,46 @@ float RectOverlap(const Detection::ObjectResult &a,
 //    top_k: if -1, keep all; otherwise, keep at most top_k.
 //    score_index_vec: store the sorted (score, index) pair.
 inline void
-GetMaxScoreIndex(const std::vector<Detection::ObjectResult> &det_result,
+GetMaxScoreIndex(const std::vector <Detection::ObjectResult> &det_result,
                  const float threshold,
-                 std::vector<std::pair<float, int>> &score_index_vec) {
-  // Generate index score pairs.
-  for (size_t i = 0; i < det_result.size(); ++i) {
-    if (det_result[i].confidence > threshold) {
-      score_index_vec.push_back(std::make_pair(det_result[i].confidence, i));
+                 std::vector <std::pair<float, int>> &score_index_vec) {
+    // Generate index score pairs.
+    for (size_t i = 0; i < det_result.size(); ++i) {
+        if (det_result[i].confidence > threshold) {
+            score_index_vec.push_back(std::make_pair(det_result[i].confidence, i));
+        }
     }
-  }
 
-  // Sort the score pair according to the scores in descending order
-  std::stable_sort(score_index_vec.begin(), score_index_vec.end(),
-                   SortScorePairDescend<int>);
+    // Sort the score pair according to the scores in descending order
+    std::stable_sort(score_index_vec.begin(), score_index_vec.end(),
+                     SortScorePairDescend<int>);
 
-  // // Keep top_k scores if needed.
-  // if (top_k > 0 && top_k < (int)score_index_vec.size())
-  // {
-  //     score_index_vec.resize(top_k);
-  // }
+    // // Keep top_k scores if needed.
+    // if (top_k > 0 && top_k < (int)score_index_vec.size())
+    // {
+    //     score_index_vec.resize(top_k);
+    // }
 }
 
-void NMSBoxes(const std::vector<Detection::ObjectResult> det_result,
+void NMSBoxes(const std::vector <Detection::ObjectResult> det_result,
               const float score_threshold, const float nms_threshold,
               std::vector<int> &indices) {
-  int a = 1;
-  // Get top_k scores (with corresponding indices).
-  std::vector<std::pair<float, int>> score_index_vec;
-  GetMaxScoreIndex(det_result, score_threshold, score_index_vec);
+    int a = 1;
+    // Get top_k scores (with corresponding indices).
+    std::vector <std::pair<float, int>> score_index_vec;
+    GetMaxScoreIndex(det_result, score_threshold, score_index_vec);
 
-  // Do nms
-  indices.clear();
-  for (size_t i = 0; i < score_index_vec.size(); ++i) {
-    const int idx = score_index_vec[i].second;
-    bool keep = true;
-    for (int k = 0; k < (int)indices.size() && keep; ++k) {
-      const int kept_idx = indices[k];
-      float overlap = RectOverlap(det_result[idx], det_result[kept_idx]);
-      keep = overlap <= nms_threshold;
+    // Do nms
+    indices.clear();
+    for (size_t i = 0; i < score_index_vec.size(); ++i) {
+        const int idx = score_index_vec[i].second;
+        bool keep = true;
+        for (int k = 0; k < (int) indices.size() && keep; ++k) {
+            const int kept_idx = indices[k];
+            float overlap = RectOverlap(det_result[idx], det_result[kept_idx]);
+            keep = overlap <= nms_threshold;
+        }
+        if (keep)
+            indices.push_back(idx);
     }
-    if (keep)
-      indices.push_back(idx);
-  }
 }
diff --git a/deploy/cpp_shitu/include/object_detector.h b/deploy/cpp_shitu/include/object_detector.h
index bc5f0c06f..5bfc56253 100644
--- a/deploy/cpp_shitu/include/object_detector.h
+++ b/deploy/cpp_shitu/include/object_detector.h
@@ -33,103 +33,106 @@ using namespace paddle_infer;
 
 namespace Detection {
 // Object Detection Result
-struct ObjectResult {
-  // Rectangle coordinates of detected object: left, right, top, down
-  std::vector<int> rect;
-  // Class id of detected object
-  int class_id;
-  // Confidence of detected object
-  float confidence;
-};
+    struct ObjectResult {
+        // Rectangle coordinates of detected object: left, right, top, down
+        std::vector<int> rect;
+        // Class id of detected object
+        int class_id;
+        // Confidence of detected object
+        float confidence;
+    };
 
 // Generate visualization colormap for each class
-std::vector<int> GenerateColorMap(int num_class);
+    std::vector<int> GenerateColorMap(int num_class);
 
 // Visualiztion Detection Result
-cv::Mat VisualizeResult(const cv::Mat &img,
-                        const std::vector<ObjectResult> &results,
-                        const std::vector<std::string> &lables,
-                        const std::vector<int> &colormap, const bool is_rbox);
+    cv::Mat VisualizeResult(const cv::Mat &img,
+                            const std::vector <ObjectResult> &results,
+                            const std::vector <std::string> &lables,
+                            const std::vector<int> &colormap, const bool is_rbox);
 
-class ObjectDetector {
-public:
-  explicit ObjectDetector(const YAML::Node &config_file) {
-    this->use_gpu_ = config_file["Global"]["use_gpu"].as<bool>();
-    if (config_file["Global"]["gpu_id"].IsDefined())
-      this->gpu_id_ = config_file["Global"]["gpu_id"].as<int>();
-    this->gpu_mem_ = config_file["Global"]["gpu_mem"].as<int>();
-    this->cpu_math_library_num_threads_ =
-        config_file["Global"]["cpu_num_threads"].as<int>();
-    this->use_mkldnn_ = config_file["Global"]["enable_mkldnn"].as<bool>();
-    this->use_tensorrt_ = config_file["Global"]["use_tensorrt"].as<bool>();
-    this->use_fp16_ = config_file["Global"]["use_fp16"].as<bool>();
-    this->model_dir_ =
-        config_file["Global"]["det_inference_model_dir"].as<std::string>();
-    this->threshold_ = config_file["Global"]["threshold"].as<float>();
-    this->max_det_results_ = config_file["Global"]["max_det_results"].as<int>();
-    this->image_shape_ =
-        config_file["Global"]["image_shape"].as<std::vector<int>>();
-    this->label_list_ =
-        config_file["Global"]["labe_list"].as<std::vector<std::string>>();
-    this->ir_optim_ = config_file["Global"]["ir_optim"].as<bool>();
-    this->batch_size_ = config_file["Global"]["batch_size"].as<int>();
+    class ObjectDetector {
+    public:
+        explicit ObjectDetector(const YAML::Node &config_file) {
+            this->use_gpu_ = config_file["Global"]["use_gpu"].as<bool>();
+            if (config_file["Global"]["gpu_id"].IsDefined())
+                this->gpu_id_ = config_file["Global"]["gpu_id"].as<int>();
+            this->gpu_mem_ = config_file["Global"]["gpu_mem"].as<int>();
+            this->cpu_math_library_num_threads_ =
+                    config_file["Global"]["cpu_num_threads"].as<int>();
+            this->use_mkldnn_ = config_file["Global"]["enable_mkldnn"].as<bool>();
+            this->use_tensorrt_ = config_file["Global"]["use_tensorrt"].as<bool>();
+            this->use_fp16_ = config_file["Global"]["use_fp16"].as<bool>();
+            this->model_dir_ =
+                    config_file["Global"]["det_inference_model_dir"].as<std::string>();
+            this->threshold_ = config_file["Global"]["threshold"].as<float>();
+            this->max_det_results_ = config_file["Global"]["max_det_results"].as<int>();
+            this->image_shape_ =
+                    config_file["Global"]["image_shape"].as < std::vector < int >> ();
+            this->label_list_ =
+                    config_file["Global"]["labe_list"].as < std::vector < std::string >> ();
+            this->ir_optim_ = config_file["Global"]["ir_optim"].as<bool>();
+            this->batch_size_ = config_file["Global"]["batch_size"].as<int>();
 
-    preprocessor_.Init(config_file["DetPreProcess"]["transform_ops"]);
-    LoadModel(model_dir_, batch_size_, run_mode);
-  }
+            preprocessor_.Init(config_file["DetPreProcess"]["transform_ops"]);
+            LoadModel(model_dir_, batch_size_, run_mode);
+        }
 
-  // Load Paddle inference model
-  void LoadModel(const std::string &model_dir, const int batch_size = 1,
-                 const std::string &run_mode = "fluid");
+        // Load Paddle inference model
+        void LoadModel(const std::string &model_dir, const int batch_size = 1,
+                       const std::string &run_mode = "fluid");
 
-  // Run predictor
-  void Predict(const std::vector<cv::Mat> imgs, const int warmup = 0,
-               const int repeats = 1,
-               std::vector<ObjectResult> *result = nullptr,
-               std::vector<int> *bbox_num = nullptr,
-               std::vector<double> *times = nullptr);
-  const std::vector<std::string> &GetLabelList() const {
-    return this->label_list_;
-  }
-  const float &GetThreshold() const { return this->threshold_; }
+        // Run predictor
+        void Predict(const std::vector <cv::Mat> imgs, const int warmup = 0,
+                     const int repeats = 1,
+                     std::vector <ObjectResult> *result = nullptr,
+                     std::vector<int> *bbox_num = nullptr,
+                     std::vector<double> *times = nullptr);
 
-private:
-  bool use_gpu_ = true;
-  int gpu_id_ = 0;
-  int gpu_mem_ = 800;
-  int cpu_math_library_num_threads_ = 6;
-  std::string run_mode = "fluid";
-  bool use_mkldnn_ = false;
-  bool use_tensorrt_ = false;
-  bool batch_size_ = 1;
-  bool use_fp16_ = false;
-  std::string model_dir_;
-  float threshold_ = 0.5;
-  float max_det_results_ = 5;
-  std::vector<int> image_shape_ = {3, 640, 640};
-  std::vector<std::string> label_list_;
-  bool ir_optim_ = true;
-  bool det_permute_ = true;
-  bool det_postprocess_ = true;
-  int min_subgraph_size_ = 30;
-  bool use_dynamic_shape_ = false;
-  int trt_min_shape_ = 1;
-  int trt_max_shape_ = 1280;
-  int trt_opt_shape_ = 640;
-  bool trt_calib_mode_ = false;
+        const std::vector <std::string> &GetLabelList() const {
+            return this->label_list_;
+        }
 
-  // Preprocess image and copy data to input buffer
-  void Preprocess(const cv::Mat &image_mat);
-  // Postprocess result
-  void Postprocess(const std::vector<cv::Mat> mats,
-                   std::vector<ObjectResult> *result, std::vector<int> bbox_num,
-                   bool is_rbox);
+        const float &GetThreshold() const { return this->threshold_; }
 
-  std::shared_ptr<Predictor> predictor_;
-  Preprocessor preprocessor_;
-  ImageBlob inputs_;
-  std::vector<float> output_data_;
-  std::vector<int> out_bbox_num_data_;
-};
+    private:
+        bool use_gpu_ = true;
+        int gpu_id_ = 0;
+        int gpu_mem_ = 800;
+        int cpu_math_library_num_threads_ = 6;
+        std::string run_mode = "fluid";
+        bool use_mkldnn_ = false;
+        bool use_tensorrt_ = false;
+        bool batch_size_ = 1;
+        bool use_fp16_ = false;
+        std::string model_dir_;
+        float threshold_ = 0.5;
+        float max_det_results_ = 5;
+        std::vector<int> image_shape_ = {3, 640, 640};
+        std::vector <std::string> label_list_;
+        bool ir_optim_ = true;
+        bool det_permute_ = true;
+        bool det_postprocess_ = true;
+        int min_subgraph_size_ = 30;
+        bool use_dynamic_shape_ = false;
+        int trt_min_shape_ = 1;
+        int trt_max_shape_ = 1280;
+        int trt_opt_shape_ = 640;
+        bool trt_calib_mode_ = false;
+
+        // Preprocess image and copy data to input buffer
+        void Preprocess(const cv::Mat &image_mat);
+
+        // Postprocess result
+        void Postprocess(const std::vector <cv::Mat> mats,
+                         std::vector <ObjectResult> *result, std::vector<int> bbox_num,
+                         bool is_rbox);
+
+        std::shared_ptr <Predictor> predictor_;
+        Preprocessor preprocessor_;
+        ImageBlob inputs_;
+        std::vector<float> output_data_;
+        std::vector<int> out_bbox_num_data_;
+    };
 
 } // namespace Detection
diff --git a/deploy/cpp_shitu/include/preprocess_op.h b/deploy/cpp_shitu/include/preprocess_op.h
index 93de9332c..db4c6b2c5 100644
--- a/deploy/cpp_shitu/include/preprocess_op.h
+++ b/deploy/cpp_shitu/include/preprocess_op.h
@@ -31,27 +31,27 @@ using namespace std;
 
 namespace Feature {
 
-class Normalize {
-public:
-  virtual void Run(cv::Mat *im, const std::vector<float> &mean,
-                   const std::vector<float> &std, float scale);
-};
+    class Normalize {
+    public:
+        virtual void Run(cv::Mat *im, const std::vector<float> &mean,
+                         const std::vector<float> &std, float scale);
+    };
 
 // RGB -> CHW
-class Permute {
-public:
-  virtual void Run(const cv::Mat *im, float *data);
-};
+    class Permute {
+    public:
+        virtual void Run(const cv::Mat *im, float *data);
+    };
 
-class CenterCropImg {
-public:
-  virtual void Run(cv::Mat &im, const int crop_size = 224);
-};
+    class CenterCropImg {
+    public:
+        virtual void Run(cv::Mat &im, const int crop_size = 224);
+    };
 
-class ResizeImg {
-public:
-  virtual void Run(const cv::Mat &img, cv::Mat &resize_img, int max_size_len,
-                   int size = 0);
-};
+    class ResizeImg {
+    public:
+        virtual void Run(const cv::Mat &img, cv::Mat &resize_img, int max_size_len,
+                         int size = 0);
+    };
 
 } // namespace Feature
diff --git a/deploy/cpp_shitu/include/preprocess_op_det.h b/deploy/cpp_shitu/include/preprocess_op_det.h
index 57d9ad724..cd663409c 100644
--- a/deploy/cpp_shitu/include/preprocess_op_det.h
+++ b/deploy/cpp_shitu/include/preprocess_op_det.h
@@ -31,125 +31,128 @@
 namespace Detection {
 
 // Object for storing all preprocessed data
-class ImageBlob {
-public:
-  // image width and height
-  std::vector<float> im_shape_;
-  // Buffer for image data after preprocessing
-  std::vector<float> im_data_;
-  // in net data shape(after pad)
-  std::vector<float> in_net_shape_;
-  // Evaluation image width and height
-  // std::vector<float>  eval_im_size_f_;
-  // Scale factor for image size to origin image size
-  std::vector<float> scale_factor_;
-};
+    class ImageBlob {
+    public:
+        // image width and height
+        std::vector<float> im_shape_;
+        // Buffer for image data after preprocessing
+        std::vector<float> im_data_;
+        // in net data shape(after pad)
+        std::vector<float> in_net_shape_;
+        // Evaluation image width and height
+        // std::vector<float>  eval_im_size_f_;
+        // Scale factor for image size to origin image size
+        std::vector<float> scale_factor_;
+    };
 
 // Abstraction of preprocessing opration class
-class PreprocessOp {
-public:
-  virtual void Init(const YAML::Node &item) = 0;
-  virtual void Run(cv::Mat *im, ImageBlob *data) = 0;
-};
+    class PreprocessOp {
+    public:
+        virtual void Init(const YAML::Node &item) = 0;
 
-class InitInfo : public PreprocessOp {
-public:
-  virtual void Init(const YAML::Node &item) {}
-  virtual void Run(cv::Mat *im, ImageBlob *data);
-};
+        virtual void Run(cv::Mat *im, ImageBlob *data) = 0;
+    };
 
-class NormalizeImage : public PreprocessOp {
-public:
-  virtual void Init(const YAML::Node &item) {
-    mean_ = item["mean"].as<std::vector<float>>();
-    scale_ = item["std"].as<std::vector<float>>();
-    is_scale_ = item["is_scale"].as<bool>();
-  }
+    class InitInfo : public PreprocessOp {
+    public:
+        virtual void Init(const YAML::Node &item) {}
 
-  virtual void Run(cv::Mat *im, ImageBlob *data);
+        virtual void Run(cv::Mat *im, ImageBlob *data);
+    };
 
-private:
-  // CHW or HWC
-  std::vector<float> mean_;
-  std::vector<float> scale_;
-  bool is_scale_;
-};
+    class NormalizeImage : public PreprocessOp {
+    public:
+        virtual void Init(const YAML::Node &item) {
+            mean_ = item["mean"].as < std::vector < float >> ();
+            scale_ = item["std"].as < std::vector < float >> ();
+            is_scale_ = item["is_scale"].as<bool>();
+        }
 
-class Permute : public PreprocessOp {
-public:
-  virtual void Init(const YAML::Node &item) {}
-  virtual void Run(cv::Mat *im, ImageBlob *data);
-};
+        virtual void Run(cv::Mat *im, ImageBlob *data);
 
-class Resize : public PreprocessOp {
-public:
-  virtual void Init(const YAML::Node &item) {
-    interp_ = item["interp"].as<int>();
-    // max_size_ = item["target_size"].as<int>();
-    keep_ratio_ = item["keep_ratio"].as<bool>();
-    target_size_ = item["target_size"].as<std::vector<int>>();
-  }
+    private:
+        // CHW or HWC
+        std::vector<float> mean_;
+        std::vector<float> scale_;
+        bool is_scale_;
+    };
 
-  // Compute best resize scale for x-dimension, y-dimension
-  std::pair<double, double> GenerateScale(const cv::Mat &im);
+    class Permute : public PreprocessOp {
+    public:
+        virtual void Init(const YAML::Node &item) {}
 
-  virtual void Run(cv::Mat *im, ImageBlob *data);
+        virtual void Run(cv::Mat *im, ImageBlob *data);
+    };
 
-private:
-  int interp_ = 2;
-  bool keep_ratio_;
-  std::vector<int> target_size_;
-  std::vector<int> in_net_shape_;
-};
+    class Resize : public PreprocessOp {
+    public:
+        virtual void Init(const YAML::Node &item) {
+            interp_ = item["interp"].as<int>();
+            // max_size_ = item["target_size"].as<int>();
+            keep_ratio_ = item["keep_ratio"].as<bool>();
+            target_size_ = item["target_size"].as < std::vector < int >> ();
+        }
+
+        // Compute best resize scale for x-dimension, y-dimension
+        std::pair<double, double> GenerateScale(const cv::Mat &im);
+
+        virtual void Run(cv::Mat *im, ImageBlob *data);
+
+    private:
+        int interp_ = 2;
+        bool keep_ratio_;
+        std::vector<int> target_size_;
+        std::vector<int> in_net_shape_;
+    };
 
 // Models with FPN need input shape % stride == 0
-class PadStride : public PreprocessOp {
-public:
-  virtual void Init(const YAML::Node &item) {
-    stride_ = item["stride"].as<int>();
-  }
+    class PadStride : public PreprocessOp {
+    public:
+        virtual void Init(const YAML::Node &item) {
+            stride_ = item["stride"].as<int>();
+        }
 
-  virtual void Run(cv::Mat *im, ImageBlob *data);
+        virtual void Run(cv::Mat *im, ImageBlob *data);
 
-private:
-  int stride_;
-};
+    private:
+        int stride_;
+    };
 
-class Preprocessor {
-public:
-  void Init(const YAML::Node &config_node) {
-    // initialize image info at first
-    ops_["InitInfo"] = std::make_shared<InitInfo>();
-    for (int i = 0; i < config_node.size(); ++i) {
-      if (config_node[i]["DetResize"].IsDefined()) {
-        ops_["Resize"] = std::make_shared<Resize>();
-        ops_["Resize"]->Init(config_node[i]["DetResize"]);
-      }
+    class Preprocessor {
+    public:
+        void Init(const YAML::Node &config_node) {
+            // initialize image info at first
+            ops_["InitInfo"] = std::make_shared<InitInfo>();
+            for (int i = 0; i < config_node.size(); ++i) {
+                if (config_node[i]["DetResize"].IsDefined()) {
+                    ops_["Resize"] = std::make_shared<Resize>();
+                    ops_["Resize"]->Init(config_node[i]["DetResize"]);
+                }
 
-      if (config_node[i]["DetNormalizeImage"].IsDefined()) {
-        ops_["NormalizeImage"] = std::make_shared<NormalizeImage>();
-        ops_["NormalizeImage"]->Init(config_node[i]["DetNormalizeImage"]);
-      }
+                if (config_node[i]["DetNormalizeImage"].IsDefined()) {
+                    ops_["NormalizeImage"] = std::make_shared<NormalizeImage>();
+                    ops_["NormalizeImage"]->Init(config_node[i]["DetNormalizeImage"]);
+                }
 
-      if (config_node[i]["DetPermute"].IsDefined()) {
-        ops_["Permute"] = std::make_shared<Permute>();
-        ops_["Permute"]->Init(config_node[i]["DetPermute"]);
-      }
+                if (config_node[i]["DetPermute"].IsDefined()) {
+                    ops_["Permute"] = std::make_shared<Permute>();
+                    ops_["Permute"]->Init(config_node[i]["DetPermute"]);
+                }
 
-      if (config_node[i]["DetPadStrid"].IsDefined()) {
-        ops_["PadStride"] = std::make_shared<PadStride>();
-        ops_["PadStride"]->Init(config_node[i]["DetPadStrid"]);
-      }
-    }
-  }
+                if (config_node[i]["DetPadStrid"].IsDefined()) {
+                    ops_["PadStride"] = std::make_shared<PadStride>();
+                    ops_["PadStride"]->Init(config_node[i]["DetPadStrid"]);
+                }
+            }
+        }
 
-  void Run(cv::Mat *im, ImageBlob *data);
+        void Run(cv::Mat *im, ImageBlob *data);
 
-public:
-  static const std::vector<std::string> RUN_ORDER;
+    public:
+        static const std::vector <std::string> RUN_ORDER;
 
-private:
-  std::unordered_map<std::string, std::shared_ptr<PreprocessOp>> ops_;
-};
+    private:
+        std::unordered_map <std::string, std::shared_ptr<PreprocessOp>> ops_;
+    };
 
 } // namespace Detection
diff --git a/deploy/cpp_shitu/include/vector_search.h b/deploy/cpp_shitu/include/vector_search.h
index 7e42c6050..4b354f78d 100644
--- a/deploy/cpp_shitu/include/vector_search.h
+++ b/deploy/cpp_shitu/include/vector_search.h
@@ -26,40 +26,45 @@
 #include <map>
 
 struct SearchResult {
-  std::vector<faiss::Index::idx_t> I;
-  std::vector<float> D;
-  int return_k;
+    std::vector <faiss::Index::idx_t> I;
+    std::vector<float> D;
+    int return_k;
 };
 
 class VectorSearch {
 public:
-  explicit VectorSearch(const YAML::Node &config_file) {
-    // IndexProcess
-    this->index_dir =
-        config_file["IndexProcess"]["index_dir"].as<std::string>();
-    this->return_k = config_file["IndexProcess"]["return_k"].as<int>();
-    this->score_thres = config_file["IndexProcess"]["score_thres"].as<float>();
-    this->max_query_number =
-        config_file["Global"]["max_det_results"].as<int>() + 1;
-    LoadIdMap();
-    LoadIndexFile();
-    this->I.resize(this->return_k * this->max_query_number);
-    this->D.resize(this->return_k * this->max_query_number);
-  };
-  void LoadIdMap();
-  void LoadIndexFile();
-  const SearchResult &Search(float *feature, int query_number);
-  const std::string &GetLabel(faiss::Index::idx_t ind);
-  const float &GetThreshold() { return this->score_thres; }
+    explicit VectorSearch(const YAML::Node &config_file) {
+        // IndexProcess
+        this->index_dir =
+                config_file["IndexProcess"]["index_dir"].as<std::string>();
+        this->return_k = config_file["IndexProcess"]["return_k"].as<int>();
+        this->score_thres = config_file["IndexProcess"]["score_thres"].as<float>();
+        this->max_query_number =
+                config_file["Global"]["max_det_results"].as<int>() + 1;
+        LoadIdMap();
+        LoadIndexFile();
+        this->I.resize(this->return_k * this->max_query_number);
+        this->D.resize(this->return_k * this->max_query_number);
+    };
+
+    void LoadIdMap();
+
+    void LoadIndexFile();
+
+    const SearchResult &Search(float *feature, int query_number);
+
+    const std::string &GetLabel(faiss::Index::idx_t ind);
+
+    const float &GetThreshold() { return this->score_thres; }
 
 private:
-  std::string index_dir;
-  int return_k = 5;
-  float score_thres = 0.5;
-  std::map<long int, std::string> id_map;
-  faiss::Index *index;
-  int max_query_number = 6;
-  std::vector<float> D;
-  std::vector<faiss::Index::idx_t> I;
-  SearchResult sr;
+    std::string index_dir;
+    int return_k = 5;
+    float score_thres = 0.5;
+    std::map<long int, std::string> id_map;
+    faiss::Index *index;
+    int max_query_number = 6;
+    std::vector<float> D;
+    std::vector <faiss::Index::idx_t> I;
+    SearchResult sr;
 };
diff --git a/deploy/cpp_shitu/include/yaml_config.h b/deploy/cpp_shitu/include/yaml_config.h
index 23e6aa1b7..78243e2fe 100644
--- a/deploy/cpp_shitu/include/yaml_config.h
+++ b/deploy/cpp_shitu/include/yaml_config.h
@@ -42,12 +42,17 @@
 
 class YamlConfig {
 public:
-  explicit YamlConfig(const std::string &path) {
-    config_file = ReadYamlConfig(path);
-  }
-  static std::vector<std::string> ReadDict(const std::string &path);
-  static std::map<int, std::string> ReadIndexId(const std::string &path);
-  static YAML::Node ReadYamlConfig(const std::string &path);
-  void PrintConfigInfo();
-  YAML::Node config_file;
+    explicit YamlConfig(const std::string &path) {
+        config_file = ReadYamlConfig(path);
+    }
+
+    static std::vector <std::string> ReadDict(const std::string &path);
+
+    static std::map<int, std::string> ReadIndexId(const std::string &path);
+
+    static YAML::Node ReadYamlConfig(const std::string &path);
+
+    void PrintConfigInfo();
+
+    YAML::Node config_file;
 };
diff --git a/deploy/cpp_shitu/readme.md b/deploy/cpp_shitu/readme.md
index b65103a65..d63a0e393 100644
--- a/deploy/cpp_shitu/readme.md
+++ b/deploy/cpp_shitu/readme.md
@@ -6,10 +6,7 @@
 ## 1. 准备环境
 
 ### 运行准备
-- Linux环境，推荐使用docker。
-- Windows环境，目前支持基于`Visual Studio 2019 Community`进行编译；此外，如果您希望通过生成`sln解决方案`的方式进行编译，可以参考该文档：[https://zhuanlan.zhihu.com/p/145446681](https://zhuanlan.zhihu.com/p/145446681)
-
-* 该文档主要介绍基于Linux环境下的PaddleClas C++预测流程，如果需要在Windows环境下使用预测库进行C++预测，具体编译方法请参考[Windows下编译教程](./docs/windows_vs2019_build.md)。
+- Linux环境，推荐使用ubuntu docker。
 
 ### 1.1 编译opencv库
 
@@ -103,7 +100,7 @@ make -j
 make inference_lib_dist
 ```
 
-更多编译参数选项可以参考Paddle C++预测库官网：[https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/guides/05_inference_deployment/inference/build_and_install_lib_cn.html#id16](https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/guides/05_inference_deployment/inference/build_and_install_lib_cn.html#id16)。
+更多编译参数选项可以参考[Paddle C++预测库官网](https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/guides/05_inference_deployment/inference/build_and_install_lib_cn.html#id16)。
 
 
 * 编译完成之后，可以在`build/paddle_inference_install_dir/`文件下看到生成了以下文件及文件夹。
@@ -137,29 +134,27 @@ tar -xvf paddle_inference.tgz
 ### 1.3 安装faiss库
 
 ```shell
+ # 下载 faiss
  git clone https://github.com/facebookresearch/faiss.git
  cd faiss 
- cmake -B build . -DFAISS_ENABLE_PYTHON=OFF -DCMAKE_INSTALL_PREFIX=${faiss_install_path}
+ cmake -B build . -DFAISS_ENABLE_PYTHON=OFF  -DCMAKE_INSTALL_PREFIX=${faiss_install_path}
  make -C build -j faiss
  make -C build install
 ```
 
-## 2 开始运行
-
-### 2.1 将模型导出为inference model
-
-* 可以参考[模型导出](../../tools/export_model.py)，导出`inference model`，用于模型预测。得到预测模型后，假设模型文件放在`inference`目录下，则目录结构如下。
+在安装`faiss`前，请安装`openblas`，`ubuntu`系统中安装命令如下：
 
+```shell
+apt-get install libopenblas-dev
 ```
-inference/
-|--cls_infer.pdmodel
-|--cls_infer.pdiparams
-```
-**注意**：上述文件中，`cls_infer.pdmodel`文件存储了模型结构信息，`cls_infer.pdiparams`文件存储了模型参数信息。注意两个文件的路径需要与配置文件`tools/config.txt`中的`cls_model_path`和`cls_params_path`参数对应一致。
+
+注意本教程以安装faiss cpu版本为例，安装时请参考[faiss](https://github.com/facebookresearch/faiss)官网文档，根据需求自行安装。
+
+## 2 代码编译
 
 ### 2.2 编译PaddleClas C++预测demo
 
-* 编译命令如下，其中Paddle C++预测库、opencv等其他依赖库的地址需要换成自己机器上的实际地址。
+编译命令如下，其中Paddle C++预测库、opencv等其他依赖库的地址需要换成自己机器上的实际地址。同时，编译过程中需要下载编译`yaml-cpp`等C++库，请保持联网环境。
 
 
 ```shell
@@ -169,11 +164,12 @@ sh tools/build.sh
 具体地，`tools/build.sh`中内容如下。
 
 ```shell
-OPENCV_DIR=your_opencv_dir
-LIB_DIR=your_paddle_inference_dir
-CUDA_LIB_DIR=your_cuda_lib_dir
-CUDNN_LIB_DIR=your_cudnn_lib_dir
-TENSORRT_DIR=your_tensorrt_lib_dir
+OPENCV_DIR=${opencv_install_dir}
+LIB_DIR=${paddle_inference_dir}
+CUDA_LIB_DIR=/usr/local/cuda/lib64
+CUDNN_LIB_DIR=/usr/lib/x86_64-linux-gnu/
+FAISS_DIR=${faiss_install_dir}
+FAISS_WITH_MKL=OFF
 
 BUILD_DIR=build
 rm -rf ${BUILD_DIR}
@@ -182,14 +178,14 @@ cd ${BUILD_DIR}
 cmake .. \
     -DPADDLE_LIB=${LIB_DIR} \
     -DWITH_MKL=ON \
-    -DDEMO_NAME=clas_system \
     -DWITH_GPU=OFF \
     -DWITH_STATIC_LIB=OFF \
-    -DWITH_TENSORRT=OFF \
-    -DTENSORRT_DIR=${TENSORRT_DIR} \
+    -DUSE_TENSORRT=OFF \
     -DOPENCV_DIR=${OPENCV_DIR} \
     -DCUDNN_LIB=${CUDNN_LIB_DIR} \
     -DCUDA_LIB=${CUDA_LIB_DIR} \
+    -DFAISS_DIR=${FAISS_DIR} \
+    -DFAISS_WITH_MKL=${FAISS_WITH_MKL}
 
 make -j
 ```
@@ -197,47 +193,75 @@ make -j
 上述命令中，
 
 * `OPENCV_DIR`为opencv编译安装的地址（本例中为`opencv-3.4.7/opencv3`文件夹的路径）；
-
 * `LIB_DIR`为下载的Paddle预测库（`paddle_inference`文件夹），或编译生成的Paddle预测库（`build/paddle_inference_install_dir`文件夹）的路径；
-
 * `CUDA_LIB_DIR`为cuda库文件地址，在docker中为`/usr/local/cuda/lib64`；
-
 * `CUDNN_LIB_DIR`为cudnn库文件地址，在docker中为`/usr/lib/x86_64-linux-gnu/`。
-
 * `TENSORRT_DIR`是tensorrt库文件地址，在dokcer中为`/usr/local/TensorRT6-cuda10.0-cudnn7/`，TensorRT需要结合GPU使用。
-
-在执行上述命令，编译完成之后，会在当前路径下生成`build`文件夹，其中生成一个名为`clas_system`的可执行文件。
+* `FAISS_DIR`是faiss的安装地址
+  * `FAISS_WITH_MKL`是指在编译faiss的过程中，是否使用了mkldnn，本文档中编译faiss，没有使用，而使用了openblas，故设置为`OFF`，若使用了mkldnn，则为`ON`.
 
 
-### 运行demo
-* 首先修改`tools/config.txt`中对应字段：
-  * use_gpu：是否使用GPU；
-  * gpu_id：使用的GPU卡号；
-  * gpu_mem：显存；
-  * cpu_math_library_num_threads：底层科学计算库所用线程的数量；
-  * use_mkldnn：是否使用MKLDNN加速；
-  * use_tensorrt: 是否使用tensorRT进行加速；
-  * use_fp16：是否使用半精度浮点数进行计算，该选项仅在use_tensorrt为true时有效；
-  * cls_model_path：预测模型结构文件路径；
-  * cls_params_path：预测模型参数文件路径；
-  * resize_short_size：预处理时图像缩放大小；
-  * crop_size：预处理时图像裁剪后的大小。
+在执行上述命令，编译完成之后，会在当前路径下生成`build`文件夹，其中生成一个名为`pp_shitu`的可执行文件。
 
-* 然后修改`tools/run.sh`：
-  * `./build/clas_system ./tools/config.txt ./docs/imgs/ILSVRC2012_val_00000666.JPEG`
-  * 上述命令中分别为：编译得到的可执行文件`clas_system`；运行时的配置文件`config.txt`；待预测的图像。
+## 3 运行demo
 
-* 最后执行以下命令，完成对一幅图像的分类。
+- 请参考[识别快速开始文档](../../docs/zh_CN/quick_start/quick_start_recognition.md)，下载好相应的 轻量级通用主体检测模型、轻量级通用识别模型及瓶装饮料测试数据并解压。
 
-```shell
-sh tools/run.sh
-```
+  ```shell
+  mkdir models
+  cd models
+  wget https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/rec/models/inference/picodet_PPLCNet_x2_5_mainbody_lite_v1.0_infer.tar
+  tar -xf picodet_PPLCNet_x2_5_mainbody_lite_v1.0_infer.tar
+  wget https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/rec/models/inference/general_PPLCNet_x2_5_lite_v1.0_infer.tar
+  tar -xf general_PPLCNet_x2_5_lite_v1.0_infer.tar
+  cd ..
+  
+  mkdir data 
+  cd data
+  wget https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/rec/data/drink_dataset_v1.0.tar
+  tar -xf drink_dataset_v1.0.tar
+  cd ..
+  ```
 
-* 最终屏幕上会输出结果，如下图所示。
+- 将相应的yaml文件拷到`test`文件夹下
 
-<div align="center">
-    <img src="./docs/imgs/cpp_infer_result.png" width="600">
-</div>
+  ```shell
+  cp ../configs/inference_drink.yaml .
+  ```
 
+- 将`inference_drink.yaml`中的相对路径，改成基于本目录的路径或者绝对路径。涉及到的参数有
 
-其中`class id`表示置信度最高的类别对应的id，score表示图片属于该类别的概率。
+  - Global.infer_imgs ：此参数可以是具体的图像地址，也可以是图像集所在的目录
+  - Global.det_inference_model_dir ： 检测模型存储目录
+  - Global.rec_inference_model_dir ： 识别模型存储目录
+  - IndexProcess.index_dir ： 检索库的存储目录，在示例中，检索库在下载的demo数据中。
+
+- 字典转换
+
+  由于python的检索库的字典，使用`pickle`进行的序列化存储，导致C++不方便读取，因此进行转换
+
+  ```shell
+  python tools/transform_id_map.py -c inference_drink.yaml
+  ```
+
+  转换成功后，在`IndexProcess.index_dir`目录下生成`id_map.txt`，方便c++ 读取。
+
+- 执行程序
+
+  ```shell
+  ./build/pp_shitu -c inference_drink.yaml
+  # or
+  ./build/pp_shitu -config inference_drink.yaml
+  ```
+
+  若对图像集进行检索，则可能得到，如下结果。注意，此结果只做展示，具体以实际运行结果为准。
+
+  同时，需注意的是，由于opencv 版本问题，会导致图像在预处理的过程中，resize产生细微差别，导致python 和c++结果，轻微不同，如bbox相差几个像素，检索结果小数点后3位diff等。但不会改变最终检索label。
+
+  ![](../../docs/images/quick_start/shitu_c++_result.png)
+
+## 4  使用自己模型
+
+使用自己训练的模型，可以参考[模型导出](../../docs/zh_CN/inference_deployment/export_model.md)，导出`inference model`，用于模型预测。
+
+同时注意修改`yaml`文件中具体参数。
diff --git a/deploy/cpp_shitu/src/feature_extracter.cpp b/deploy/cpp_shitu/src/feature_extracter.cpp
index f7410dca6..9588ff57e 100644
--- a/deploy/cpp_shitu/src/feature_extracter.cpp
+++ b/deploy/cpp_shitu/src/feature_extracter.cpp
@@ -18,102 +18,102 @@
 
 namespace Feature {
 
-void FeatureExtracter::LoadModel(const std::string &model_path,
-                                 const std::string &params_path) {
-  paddle_infer::Config config;
-  config.SetModel(model_path, params_path);
+    void FeatureExtracter::LoadModel(const std::string &model_path,
+                                     const std::string &params_path) {
+        paddle_infer::Config config;
+        config.SetModel(model_path, params_path);
 
-  if (this->use_gpu_) {
-    config.EnableUseGpu(this->gpu_mem_, this->gpu_id_);
-    if (this->use_tensorrt_) {
-      config.EnableTensorRtEngine(
-          1 << 20, 1, 3,
-          this->use_fp16_ ? paddle_infer::Config::Precision::kHalf
-                          : paddle_infer::Config::Precision::kFloat32,
-          false, false);
+        if (this->use_gpu_) {
+            config.EnableUseGpu(this->gpu_mem_, this->gpu_id_);
+            if (this->use_tensorrt_) {
+                config.EnableTensorRtEngine(
+                        1 << 20, 1, 3,
+                        this->use_fp16_ ? paddle_infer::Config::Precision::kHalf
+                                        : paddle_infer::Config::Precision::kFloat32,
+                        false, false);
+            }
+        } else {
+            config.DisableGpu();
+            if (this->use_mkldnn_) {
+                config.EnableMKLDNN();
+                // cache 10 different shapes for mkldnn to avoid memory leak
+                config.SetMkldnnCacheCapacity(10);
+            }
+            config.SetCpuMathLibraryNumThreads(this->cpu_math_library_num_threads_);
+        }
+
+        config.SwitchUseFeedFetchOps(false);
+        // true for multiple input
+        config.SwitchSpecifyInputNames(true);
+
+        config.SwitchIrOptim(true);
+
+        config.EnableMemoryOptim();
+        config.DisableGlogInfo();
+
+        this->predictor_ = CreatePredictor(config);
     }
-  } else {
-    config.DisableGpu();
-    if (this->use_mkldnn_) {
-      config.EnableMKLDNN();
-      // cache 10 different shapes for mkldnn to avoid memory leak
-      config.SetMkldnnCacheCapacity(10);
+
+    void FeatureExtracter::Run(cv::Mat &img, std::vector<float> &out_data,
+                               std::vector<double> &times) {
+        cv::Mat resize_img;
+        std::vector<double> time;
+
+        auto preprocess_start = std::chrono::system_clock::now();
+        this->resize_op_.Run(img, resize_img, this->resize_short_,
+                             this->resize_size_);
+
+        this->normalize_op_.Run(&resize_img, this->mean_, this->std_, this->scale_);
+        std::vector<float> input(1 * 3 * resize_img.rows * resize_img.cols, 0.0f);
+        this->permute_op_.Run(&resize_img, input.data());
+
+        auto input_names = this->predictor_->GetInputNames();
+        auto input_t = this->predictor_->GetInputHandle(input_names[0]);
+        input_t->Reshape({1, 3, resize_img.rows, resize_img.cols});
+        auto preprocess_end = std::chrono::system_clock::now();
+
+        auto infer_start = std::chrono::system_clock::now();
+        input_t->CopyFromCpu(input.data());
+        this->predictor_->Run();
+
+        auto output_names = this->predictor_->GetOutputNames();
+        auto output_t = this->predictor_->GetOutputHandle(output_names[0]);
+        std::vector<int> output_shape = output_t->shape();
+        int out_num = std::accumulate(output_shape.begin(), output_shape.end(), 1,
+                                      std::multiplies<int>());
+
+        out_data.resize(out_num);
+        output_t->CopyToCpu(out_data.data());
+        auto infer_end = std::chrono::system_clock::now();
+
+        auto postprocess_start = std::chrono::system_clock::now();
+        if (this->feature_norm)
+            FeatureNorm(out_data);
+        auto postprocess_end = std::chrono::system_clock::now();
+
+        std::chrono::duration<float> preprocess_diff =
+                preprocess_end - preprocess_start;
+        time.push_back(double(preprocess_diff.count()));
+        std::chrono::duration<float> inference_diff = infer_end - infer_start;
+        double inference_cost_time = double(inference_diff.count());
+        time.push_back(inference_cost_time);
+        // std::chrono::duration<float> postprocess_diff =
+        //     postprocess_end - postprocess_start;
+        time.push_back(0);
+
+        // std::cout << "result: " << std::endl;
+        // std::cout << "\tclass id: " << maxPosition << std::endl;
+        // std::cout << std::fixed << std::setprecision(10)
+        //           << "\tscore: " << double(out_data[maxPosition]) << std::endl;
+        times[0] += time[0];
+        times[1] += time[1];
+        times[2] += time[2];
     }
-    config.SetCpuMathLibraryNumThreads(this->cpu_math_library_num_threads_);
-  }
 
-  config.SwitchUseFeedFetchOps(false);
-  // true for multiple input
-  config.SwitchSpecifyInputNames(true);
-
-  config.SwitchIrOptim(true);
-
-  config.EnableMemoryOptim();
-  config.DisableGlogInfo();
-
-  this->predictor_ = CreatePredictor(config);
-}
-
-void FeatureExtracter::Run(cv::Mat &img, std::vector<float> &out_data,
-                           std::vector<double> &times) {
-  cv::Mat resize_img;
-  std::vector<double> time;
-
-  auto preprocess_start = std::chrono::system_clock::now();
-  this->resize_op_.Run(img, resize_img, this->resize_short_,
-                       this->resize_size_);
-
-  this->normalize_op_.Run(&resize_img, this->mean_, this->std_, this->scale_);
-  std::vector<float> input(1 * 3 * resize_img.rows * resize_img.cols, 0.0f);
-  this->permute_op_.Run(&resize_img, input.data());
-
-  auto input_names = this->predictor_->GetInputNames();
-  auto input_t = this->predictor_->GetInputHandle(input_names[0]);
-  input_t->Reshape({1, 3, resize_img.rows, resize_img.cols});
-  auto preprocess_end = std::chrono::system_clock::now();
-
-  auto infer_start = std::chrono::system_clock::now();
-  input_t->CopyFromCpu(input.data());
-  this->predictor_->Run();
-
-  auto output_names = this->predictor_->GetOutputNames();
-  auto output_t = this->predictor_->GetOutputHandle(output_names[0]);
-  std::vector<int> output_shape = output_t->shape();
-  int out_num = std::accumulate(output_shape.begin(), output_shape.end(), 1,
-                                std::multiplies<int>());
-
-  out_data.resize(out_num);
-  output_t->CopyToCpu(out_data.data());
-  auto infer_end = std::chrono::system_clock::now();
-
-  auto postprocess_start = std::chrono::system_clock::now();
-  if (this->feature_norm)
-    FeatureNorm(out_data);
-  auto postprocess_end = std::chrono::system_clock::now();
-
-  std::chrono::duration<float> preprocess_diff =
-      preprocess_end - preprocess_start;
-  time.push_back(double(preprocess_diff.count()));
-  std::chrono::duration<float> inference_diff = infer_end - infer_start;
-  double inference_cost_time = double(inference_diff.count());
-  time.push_back(inference_cost_time);
-  // std::chrono::duration<float> postprocess_diff =
-  //     postprocess_end - postprocess_start;
-  time.push_back(0);
-
-  // std::cout << "result: " << std::endl;
-  // std::cout << "\tclass id: " << maxPosition << std::endl;
-  // std::cout << std::fixed << std::setprecision(10)
-  //           << "\tscore: " << double(out_data[maxPosition]) << std::endl;
-  times[0] += time[0];
-  times[1] += time[1];
-  times[2] += time[2];
-}
-
-void FeatureExtracter::FeatureNorm(std::vector<float> &featuer) {
-  float featuer_sqrt = std::sqrt(std::inner_product(
-      featuer.begin(), featuer.end(), featuer.begin(), 0.0f));
-  for (int i = 0; i < featuer.size(); ++i)
-    featuer[i] /= featuer_sqrt;
-}
+    void FeatureExtracter::FeatureNorm(std::vector<float> &featuer) {
+        float featuer_sqrt = std::sqrt(std::inner_product(
+                featuer.begin(), featuer.end(), featuer.begin(), 0.0f));
+        for (int i = 0; i < featuer.size(); ++i)
+            featuer[i] /= featuer_sqrt;
+    }
 } // namespace Feature
diff --git a/deploy/cpp_shitu/src/main.cpp b/deploy/cpp_shitu/src/main.cpp
index 9176b7262..d89ba14d5 100644
--- a/deploy/cpp_shitu/src/main.cpp
+++ b/deploy/cpp_shitu/src/main.cpp
@@ -37,260 +37,262 @@
 using namespace std;
 using namespace cv;
 
-DEFINE_string(config, "", "Path of yaml file");
-DEFINE_string(c, "", "Path of yaml file");
+DEFINE_string(config,
+"", "Path of yaml file");
+DEFINE_string(c,
+"", "Path of yaml file");
 
-void DetPredictImage(const std::vector<cv::Mat> &batch_imgs,
-                     const std::vector<std::string> &all_img_paths,
+void DetPredictImage(const std::vector <cv::Mat> &batch_imgs,
+                     const std::vector <std::string> &all_img_paths,
                      const int batch_size, Detection::ObjectDetector *det,
-                     std::vector<Detection::ObjectResult> &im_result,
+                     std::vector <Detection::ObjectResult> &im_result,
                      std::vector<int> &im_bbox_num, std::vector<double> &det_t,
                      const bool visual_det = false,
                      const bool run_benchmark = false,
                      const std::string &output_dir = "output") {
-  int steps = ceil(float(all_img_paths.size()) / batch_size);
-  //   printf("total images = %d, batch_size = %d, total steps = %d\n",
-  //                 all_img_paths.size(), batch_size, steps);
-  for (int idx = 0; idx < steps; idx++) {
-    int left_image_cnt = all_img_paths.size() - idx * batch_size;
-    if (left_image_cnt > batch_size) {
-      left_image_cnt = batch_size;
-    }
-    // for (int bs = 0; bs < left_image_cnt; bs++) {
-    // std::string image_file_path = all_img_paths.at(idx * batch_size+bs);
-    // cv::Mat im = cv::imread(image_file_path, 1);
-    // batch_imgs.insert(batch_imgs.end(), im);
-    // }
+    int steps = ceil(float(all_img_paths.size()) / batch_size);
+    //   printf("total images = %d, batch_size = %d, total steps = %d\n",
+    //                 all_img_paths.size(), batch_size, steps);
+    for (int idx = 0; idx < steps; idx++) {
+        int left_image_cnt = all_img_paths.size() - idx * batch_size;
+        if (left_image_cnt > batch_size) {
+            left_image_cnt = batch_size;
+        }
+        // for (int bs = 0; bs < left_image_cnt; bs++) {
+        // std::string image_file_path = all_img_paths.at(idx * batch_size+bs);
+        // cv::Mat im = cv::imread(image_file_path, 1);
+        // batch_imgs.insert(batch_imgs.end(), im);
+        // }
 
-    // Store all detected result
-    std::vector<Detection::ObjectResult> result;
-    std::vector<int> bbox_num;
-    std::vector<double> det_times;
-    bool is_rbox = false;
-    if (run_benchmark) {
-      det->Predict(batch_imgs, 10, 10, &result, &bbox_num, &det_times);
-    } else {
-      det->Predict(batch_imgs, 0, 1, &result, &bbox_num, &det_times);
-      // get labels and colormap
-      auto labels = det->GetLabelList();
-      auto colormap = Detection::GenerateColorMap(labels.size());
+        // Store all detected result
+        std::vector <Detection::ObjectResult> result;
+        std::vector<int> bbox_num;
+        std::vector<double> det_times;
+        bool is_rbox = false;
+        if (run_benchmark) {
+            det->Predict(batch_imgs, 10, 10, &result, &bbox_num, &det_times);
+        } else {
+            det->Predict(batch_imgs, 0, 1, &result, &bbox_num, &det_times);
+            // get labels and colormap
+            auto labels = det->GetLabelList();
+            auto colormap = Detection::GenerateColorMap(labels.size());
 
-      int item_start_idx = 0;
-      for (int i = 0; i < left_image_cnt; i++) {
-        cv::Mat im = batch_imgs[i];
-        int detect_num = 0;
+            int item_start_idx = 0;
+            for (int i = 0; i < left_image_cnt; i++) {
+                cv::Mat im = batch_imgs[i];
+                int detect_num = 0;
 
-        for (int j = 0; j < bbox_num[i]; j++) {
-          Detection::ObjectResult item = result[item_start_idx + j];
-          if (item.confidence < det->GetThreshold() || item.class_id == -1) {
-            continue;
-          }
-          detect_num += 1;
-          im_result.push_back(item);
-          if (visual_det) {
-            if (item.rect.size() > 6) {
-              is_rbox = true;
-              printf(
-                  "class=%d confidence=%.4f rect=[%d %d %d %d %d %d %d %d]\n",
-                  item.class_id, item.confidence, item.rect[0], item.rect[1],
-                  item.rect[2], item.rect[3], item.rect[4], item.rect[5],
-                  item.rect[6], item.rect[7]);
-            } else {
-              printf("class=%d confidence=%.4f rect=[%d %d %d %d]\n",
-                     item.class_id, item.confidence, item.rect[0], item.rect[1],
-                     item.rect[2], item.rect[3]);
+                for (int j = 0; j < bbox_num[i]; j++) {
+                    Detection::ObjectResult item = result[item_start_idx + j];
+                    if (item.confidence < det->GetThreshold() || item.class_id == -1) {
+                        continue;
+                    }
+                    detect_num += 1;
+                    im_result.push_back(item);
+                    if (visual_det) {
+                        if (item.rect.size() > 6) {
+                            is_rbox = true;
+                            printf(
+                                    "class=%d confidence=%.4f rect=[%d %d %d %d %d %d %d %d]\n",
+                                    item.class_id, item.confidence, item.rect[0], item.rect[1],
+                                    item.rect[2], item.rect[3], item.rect[4], item.rect[5],
+                                    item.rect[6], item.rect[7]);
+                        } else {
+                            printf("class=%d confidence=%.4f rect=[%d %d %d %d]\n",
+                                   item.class_id, item.confidence, item.rect[0], item.rect[1],
+                                   item.rect[2], item.rect[3]);
+                        }
+                    }
+                }
+                im_bbox_num.push_back(detect_num);
+                item_start_idx = item_start_idx + bbox_num[i];
+
+                // Visualization result
+                if (visual_det) {
+                    std::cout << all_img_paths.at(idx * batch_size + i)
+                              << " The number of detected box: " << detect_num
+                              << std::endl;
+                    cv::Mat vis_img = Detection::VisualizeResult(im, im_result, labels,
+                                                                 colormap, is_rbox);
+                    std::vector<int> compression_params;
+                    compression_params.push_back(CV_IMWRITE_JPEG_QUALITY);
+                    compression_params.push_back(95);
+                    std::string output_path(output_dir);
+                    if (output_dir.rfind(OS_PATH_SEP) != output_dir.size() - 1) {
+                        output_path += OS_PATH_SEP;
+                    }
+                    std::string image_file_path = all_img_paths.at(idx * batch_size + i);
+                    output_path +=
+                            image_file_path.substr(image_file_path.find_last_of('/') + 1);
+                    cv::imwrite(output_path, vis_img, compression_params);
+                    printf("Visualized output saved as %s\n", output_path.c_str());
+                }
             }
-          }
         }
-        im_bbox_num.push_back(detect_num);
-        item_start_idx = item_start_idx + bbox_num[i];
-
-        // Visualization result
-        if (visual_det) {
-          std::cout << all_img_paths.at(idx * batch_size + i)
-                    << " The number of detected box: " << detect_num
-                    << std::endl;
-          cv::Mat vis_img = Detection::VisualizeResult(im, im_result, labels,
-                                                       colormap, is_rbox);
-          std::vector<int> compression_params;
-          compression_params.push_back(CV_IMWRITE_JPEG_QUALITY);
-          compression_params.push_back(95);
-          std::string output_path(output_dir);
-          if (output_dir.rfind(OS_PATH_SEP) != output_dir.size() - 1) {
-            output_path += OS_PATH_SEP;
-          }
-          std::string image_file_path = all_img_paths.at(idx * batch_size + i);
-          output_path +=
-              image_file_path.substr(image_file_path.find_last_of('/') + 1);
-          cv::imwrite(output_path, vis_img, compression_params);
-          printf("Visualized output saved as %s\n", output_path.c_str());
-        }
-      }
+        det_t[0] += det_times[0];
+        det_t[1] += det_times[1];
+        det_t[2] += det_times[2];
     }
-    det_t[0] += det_times[0];
-    det_t[1] += det_times[1];
-    det_t[2] += det_times[2];
-  }
 }
 
 void PrintResult(std::string &img_path,
-                 std::vector<Detection::ObjectResult> &det_result,
+                 std::vector <Detection::ObjectResult> &det_result,
                  std::vector<int> &indeices, VectorSearch &vector_search,
                  SearchResult &search_result) {
-  printf("%s:\n", img_path.c_str());
-  for (int i = 0; i < indeices.size(); ++i) {
-    int t = indeices[i];
-    printf("\tresult%d: bbox[%d, %d, %d, %d], score: %f, label: %s\n", i,
-           det_result[t].rect[0], det_result[t].rect[1], det_result[t].rect[2],
-           det_result[t].rect[3], det_result[t].confidence,
-           vector_search.GetLabel(search_result.I[search_result.return_k * t])
-               .c_str());
-  }
+    printf("%s:\n", img_path.c_str());
+    for (int i = 0; i < indeices.size(); ++i) {
+        int t = indeices[i];
+        printf("\tresult%d: bbox[%d, %d, %d, %d], score: %f, label: %s\n", i,
+               det_result[t].rect[0], det_result[t].rect[1], det_result[t].rect[2],
+               det_result[t].rect[3], det_result[t].confidence,
+               vector_search.GetLabel(search_result.I[search_result.return_k * t])
+                       .c_str());
+    }
 }
 
 int main(int argc, char **argv) {
-  google::ParseCommandLineFlags(&argc, &argv, true);
-  std::string yaml_path = "";
-  if (FLAGS_config == "" && FLAGS_c == "") {
-    std::cerr << "[ERROR] usage: " << std::endl
-              << argv[0] << " -c $yaml_path" << std::endl
-              << "or:" << std::endl
-              << argv[0] << " -config $yaml_path" << std::endl;
-    exit(1);
-  } else if (FLAGS_config != "") {
-    yaml_path = FLAGS_config;
-  } else {
-    yaml_path = FLAGS_c;
-  }
-
-  YamlConfig config(yaml_path);
-  config.PrintConfigInfo();
-
-  // initialize detector, rec_Model, vector_search
-  Feature::FeatureExtracter feature_extracter(config.config_file);
-  Detection::ObjectDetector detector(config.config_file);
-  VectorSearch searcher(config.config_file);
-
-  // config
-  const int batch_size = config.config_file["Global"]["batch_size"].as<int>();
-  bool visual_det = false;
-  if (config.config_file["Global"]["visual_det"].IsDefined()) {
-    visual_det = config.config_file["Global"]["visual_det"].as<bool>();
-  }
-  bool run_benchmark = false;
-  if (config.config_file["Global"]["benchmark"].IsDefined()) {
-    run_benchmark = config.config_file["Global"]["benchmark"].as<bool>();
-  }
-  int max_det_results = 5;
-  if (config.config_file["Global"]["max_det_results"].IsDefined()) {
-    max_det_results = config.config_file["Global"]["max_det_results"].as<int>();
-  }
-  float rec_nms_thresold = 0.05;
-  if (config.config_file["Global"]["rec_nms_thresold"].IsDefined()) {
-    rec_nms_thresold =
-        config.config_file["Global"]["rec_nms_thresold"].as<float>();
-  }
-
-  // load image_file_path
-  std::string path =
-      config.config_file["Global"]["infer_imgs"].as<std::string>();
-  std::vector<std::string> img_files_list;
-  if (cv::utils::fs::isDirectory(path)) {
-    std::vector<cv::String> filenames;
-    cv::glob(path, filenames);
-    for (auto f : filenames) {
-      img_files_list.push_back(f);
-    }
-  } else {
-    img_files_list.push_back(path);
-  }
-  std::cout << "img_file_list length: " << img_files_list.size() << std::endl;
-  // for time log
-  std::vector<double> cls_times = {0, 0, 0};
-  std::vector<double> det_times = {0, 0, 0};
-  // for read images
-  std::vector<cv::Mat> batch_imgs;
-  std::vector<std::string> img_paths;
-  // for detection
-  std::vector<Detection::ObjectResult> det_result;
-  std::vector<int> det_bbox_num;
-  // for vector search
-  std::vector<float> features;
-  std::vector<float> feature;
-  // for nms
-  std::vector<int> indeices;
-
-  int warmup_iter = img_files_list.size() > 5 ? 5 : 0;
-  for (int idx = 0; idx < img_files_list.size(); ++idx) {
-    std::string img_path = img_files_list[idx];
-    cv::Mat srcimg = cv::imread(img_path, cv::IMREAD_COLOR);
-    if (!srcimg.data) {
-      std::cerr << "[ERROR] image read failed! image path: " << img_path
-                << "\n";
-      exit(-1);
-    }
-    cv::cvtColor(srcimg, srcimg, cv::COLOR_BGR2RGB);
-
-    batch_imgs.push_back(srcimg);
-    img_paths.push_back(img_path);
-
-    // step1: get all detection results
-    DetPredictImage(batch_imgs, img_paths, batch_size, &detector, det_result,
-                    det_bbox_num, det_times, visual_det, run_benchmark);
-
-    // select max_det_results bbox
-    if (det_result.size() > max_det_results) {
-      det_result.resize(max_det_results);
-    }
-    // step2: add the whole image for recognition to improve recall
-    Detection::ObjectResult result_whole_img = {
-        {0, 0, srcimg.cols - 1, srcimg.rows - 1}, 0, 1.0};
-    det_result.push_back(result_whole_img);
-    det_bbox_num[0] = det_result.size() + 1;
-
-    // step3: extract feature for all boxes in an inmage
-    SearchResult search_result;
-    for (int j = 0; j < det_result.size(); ++j) {
-      int w = det_result[j].rect[2] - det_result[j].rect[0];
-      int h = det_result[j].rect[3] - det_result[j].rect[1];
-      cv::Rect rect(det_result[j].rect[0], det_result[j].rect[1], w, h);
-      cv::Mat crop_img = srcimg(rect);
-      feature_extracter.Run(crop_img, feature, cls_times);
-      features.insert(features.end(), feature.begin(), feature.end());
+    google::ParseCommandLineFlags(&argc, &argv, true);
+    std::string yaml_path = "";
+    if (FLAGS_config == "" && FLAGS_c == "") {
+        std::cerr << "[ERROR] usage: " << std::endl
+                  << argv[0] << " -c $yaml_path" << std::endl
+                  << "or:" << std::endl
+                  << argv[0] << " -config $yaml_path" << std::endl;
+        exit(1);
+    } else if (FLAGS_config != "") {
+        yaml_path = FLAGS_config;
+    } else {
+        yaml_path = FLAGS_c;
     }
 
-    // step4: get search result
-    search_result = searcher.Search(features.data(), det_result.size());
+    YamlConfig config(yaml_path);
+    config.PrintConfigInfo();
 
-    // nms for search result
-    for (int i = 0; i < det_result.size(); ++i) {
-      det_result[i].confidence = search_result.D[search_result.return_k * i];
+    // initialize detector, rec_Model, vector_search
+    Feature::FeatureExtracter feature_extracter(config.config_file);
+    Detection::ObjectDetector detector(config.config_file);
+    VectorSearch searcher(config.config_file);
+
+    // config
+    const int batch_size = config.config_file["Global"]["batch_size"].as<int>();
+    bool visual_det = false;
+    if (config.config_file["Global"]["visual_det"].IsDefined()) {
+        visual_det = config.config_file["Global"]["visual_det"].as<bool>();
+    }
+    bool run_benchmark = false;
+    if (config.config_file["Global"]["benchmark"].IsDefined()) {
+        run_benchmark = config.config_file["Global"]["benchmark"].as<bool>();
+    }
+    int max_det_results = 5;
+    if (config.config_file["Global"]["max_det_results"].IsDefined()) {
+        max_det_results = config.config_file["Global"]["max_det_results"].as<int>();
+    }
+    float rec_nms_thresold = 0.05;
+    if (config.config_file["Global"]["rec_nms_thresold"].IsDefined()) {
+        rec_nms_thresold =
+                config.config_file["Global"]["rec_nms_thresold"].as<float>();
     }
-    NMSBoxes(det_result, searcher.GetThreshold(), rec_nms_thresold, indeices);
 
-    // print result
-    PrintResult(img_path, det_result, indeices, searcher, search_result);
+    // load image_file_path
+    std::string path =
+            config.config_file["Global"]["infer_imgs"].as<std::string>();
+    std::vector <std::string> img_files_list;
+    if (cv::utils::fs::isDirectory(path)) {
+        std::vector <cv::String> filenames;
+        cv::glob(path, filenames);
+        for (auto f : filenames) {
+            img_files_list.push_back(f);
+        }
+    } else {
+        img_files_list.push_back(path);
+    }
+    std::cout << "img_file_list length: " << img_files_list.size() << std::endl;
+    // for time log
+    std::vector<double> cls_times = {0, 0, 0};
+    std::vector<double> det_times = {0, 0, 0};
+    // for read images
+    std::vector <cv::Mat> batch_imgs;
+    std::vector <std::string> img_paths;
+    // for detection
+    std::vector <Detection::ObjectResult> det_result;
+    std::vector<int> det_bbox_num;
+    // for vector search
+    std::vector<float> features;
+    std::vector<float> feature;
+    // for nms
+    std::vector<int> indeices;
 
-    // for postprocess
-    batch_imgs.clear();
-    img_paths.clear();
-    det_bbox_num.clear();
-    det_result.clear();
-    feature.clear();
-    features.clear();
-    indeices.clear();
-  }
+    int warmup_iter = img_files_list.size() > 5 ? 5 : 0;
+    for (int idx = 0; idx < img_files_list.size(); ++idx) {
+        std::string img_path = img_files_list[idx];
+        cv::Mat srcimg = cv::imread(img_path, cv::IMREAD_COLOR);
+        if (!srcimg.data) {
+            std::cerr << "[ERROR] image read failed! image path: " << img_path
+                      << "\n";
+            exit(-1);
+        }
+        cv::cvtColor(srcimg, srcimg, cv::COLOR_BGR2RGB);
 
-  std::string presion = "fp32";
+        batch_imgs.push_back(srcimg);
+        img_paths.push_back(img_path);
 
-  // if (config.use_fp16)
-  //   presion = "fp16";
-  // if (config.benchmark) {
-  //   AutoLogger autolog("Classification", config.use_gpu, config.use_tensorrt,
-  //                      config.use_mkldnn, config.cpu_threads, 1,
-  //                      "1, 3, 224, 224", presion, cls_times,
-  //                      img_files_list.size());
-  //   autolog.report();
-  // }
-  return 0;
+        // step1: get all detection results
+        DetPredictImage(batch_imgs, img_paths, batch_size, &detector, det_result,
+                        det_bbox_num, det_times, visual_det, run_benchmark);
+
+        // select max_det_results bbox
+        if (det_result.size() > max_det_results) {
+            det_result.resize(max_det_results);
+        }
+        // step2: add the whole image for recognition to improve recall
+        Detection::ObjectResult result_whole_img = {
+                {0, 0, srcimg.cols - 1, srcimg.rows - 1}, 0, 1.0};
+        det_result.push_back(result_whole_img);
+        det_bbox_num[0] = det_result.size() + 1;
+
+        // step3: extract feature for all boxes in an inmage
+        SearchResult search_result;
+        for (int j = 0; j < det_result.size(); ++j) {
+            int w = det_result[j].rect[2] - det_result[j].rect[0];
+            int h = det_result[j].rect[3] - det_result[j].rect[1];
+            cv::Rect rect(det_result[j].rect[0], det_result[j].rect[1], w, h);
+            cv::Mat crop_img = srcimg(rect);
+            feature_extracter.Run(crop_img, feature, cls_times);
+            features.insert(features.end(), feature.begin(), feature.end());
+        }
+
+        // step4: get search result
+        search_result = searcher.Search(features.data(), det_result.size());
+
+        // nms for search result
+        for (int i = 0; i < det_result.size(); ++i) {
+            det_result[i].confidence = search_result.D[search_result.return_k * i];
+        }
+        NMSBoxes(det_result, searcher.GetThreshold(), rec_nms_thresold, indeices);
+
+        // print result
+        PrintResult(img_path, det_result, indeices, searcher, search_result);
+
+        // for postprocess
+        batch_imgs.clear();
+        img_paths.clear();
+        det_bbox_num.clear();
+        det_result.clear();
+        feature.clear();
+        features.clear();
+        indeices.clear();
+    }
+
+    std::string presion = "fp32";
+
+    // if (config.use_fp16)
+    //   presion = "fp16";
+    // if (config.benchmark) {
+    //   AutoLogger autolog("Classification", config.use_gpu, config.use_tensorrt,
+    //                      config.use_mkldnn, config.cpu_threads, 1,
+    //                      "1, 3, 224, 224", presion, cls_times,
+    //                      img_files_list.size());
+    //   autolog.report();
+    // }
+    return 0;
 }
diff --git a/deploy/cpp_shitu/src/object_detector.cpp b/deploy/cpp_shitu/src/object_detector.cpp
index 369f88606..03b02b973 100644
--- a/deploy/cpp_shitu/src/object_detector.cpp
+++ b/deploy/cpp_shitu/src/object_detector.cpp
@@ -22,344 +22,344 @@ using namespace paddle_infer;
 namespace Detection {
 
 // Load Model and create model predictor
-void ObjectDetector::LoadModel(const std::string &model_dir,
-                               const int batch_size,
-                               const std::string &run_mode) {
-  paddle_infer::Config config;
-  std::string prog_file = model_dir + OS_PATH_SEP + "inference.pdmodel";
-  std::string params_file = model_dir + OS_PATH_SEP + "inference.pdiparams";
-  config.SetModel(prog_file, params_file);
-  if (this->use_gpu_) {
-    config.EnableUseGpu(this->gpu_mem_, this->gpu_id_);
-    config.SwitchIrOptim(this->ir_optim_);
-    //     // use tensorrt
-    // if (run_mode != "fluid") {
-    //   auto precision = paddle_infer::Config::Precision::kFloat32;
-    //   if (run_mode == "trt_fp32") {
-    //     precision = paddle_infer::Config::Precision::kFloat32;
-    //   }
-    //   else if (run_mode == "trt_fp16") {
-    //     precision = paddle_infer::Config::Precision::kHalf;
-    //   }
-    //   else if (run_mode == "trt_int8") {
-    //     precision = paddle_infer::Config::Precision::kInt8;
-    //   } else {
-    //       printf("run_mode should be 'fluid', 'trt_fp32', 'trt_fp16' or
-    //       'trt_int8'");
-    //       }
-    // set tensorrt
+    void ObjectDetector::LoadModel(const std::string &model_dir,
+                                   const int batch_size,
+                                   const std::string &run_mode) {
+        paddle_infer::Config config;
+        std::string prog_file = model_dir + OS_PATH_SEP + "inference.pdmodel";
+        std::string params_file = model_dir + OS_PATH_SEP + "inference.pdiparams";
+        config.SetModel(prog_file, params_file);
+        if (this->use_gpu_) {
+            config.EnableUseGpu(this->gpu_mem_, this->gpu_id_);
+            config.SwitchIrOptim(this->ir_optim_);
+            //     // use tensorrt
+            // if (run_mode != "fluid") {
+            //   auto precision = paddle_infer::Config::Precision::kFloat32;
+            //   if (run_mode == "trt_fp32") {
+            //     precision = paddle_infer::Config::Precision::kFloat32;
+            //   }
+            //   else if (run_mode == "trt_fp16") {
+            //     precision = paddle_infer::Config::Precision::kHalf;
+            //   }
+            //   else if (run_mode == "trt_int8") {
+            //     precision = paddle_infer::Config::Precision::kInt8;
+            //   } else {
+            //       printf("run_mode should be 'fluid', 'trt_fp32', 'trt_fp16' or
+            //       'trt_int8'");
+            //       }
+            // set tensorrt
 
-    if (this->use_tensorrt_) {
-      config.EnableTensorRtEngine(
-          1 << 30, batch_size, this->min_subgraph_size_,
-          this->use_fp16_ ? paddle_infer::Config::Precision::kHalf
-                          : paddle_infer::Config::Precision::kFloat32,
-          false, this->trt_calib_mode_);
-      // set use dynamic shape
-      if (this->use_dynamic_shape_) {
-        // set DynamicShsape for image tensor
-        const std::vector<int> min_input_shape = {1, 3, this->trt_min_shape_,
-                                                  this->trt_min_shape_};
-        const std::vector<int> max_input_shape = {1, 3, this->trt_max_shape_,
-                                                  this->trt_max_shape_};
-        const std::vector<int> opt_input_shape = {1, 3, this->trt_opt_shape_,
-                                                  this->trt_opt_shape_};
-        const std::map<std::string, std::vector<int>> map_min_input_shape = {
-            {"image", min_input_shape}};
-        const std::map<std::string, std::vector<int>> map_max_input_shape = {
-            {"image", max_input_shape}};
-        const std::map<std::string, std::vector<int>> map_opt_input_shape = {
-            {"image", opt_input_shape}};
+            if (this->use_tensorrt_) {
+                config.EnableTensorRtEngine(
+                        1 << 30, batch_size, this->min_subgraph_size_,
+                        this->use_fp16_ ? paddle_infer::Config::Precision::kHalf
+                                        : paddle_infer::Config::Precision::kFloat32,
+                        false, this->trt_calib_mode_);
+                // set use dynamic shape
+                if (this->use_dynamic_shape_) {
+                    // set DynamicShsape for image tensor
+                    const std::vector<int> min_input_shape = {1, 3, this->trt_min_shape_,
+                                                              this->trt_min_shape_};
+                    const std::vector<int> max_input_shape = {1, 3, this->trt_max_shape_,
+                                                              this->trt_max_shape_};
+                    const std::vector<int> opt_input_shape = {1, 3, this->trt_opt_shape_,
+                                                              this->trt_opt_shape_};
+                    const std::map <std::string, std::vector<int>> map_min_input_shape = {
+                            {"image", min_input_shape}};
+                    const std::map <std::string, std::vector<int>> map_max_input_shape = {
+                            {"image", max_input_shape}};
+                    const std::map <std::string, std::vector<int>> map_opt_input_shape = {
+                            {"image", opt_input_shape}};
 
-        config.SetTRTDynamicShapeInfo(map_min_input_shape, map_max_input_shape,
-                                      map_opt_input_shape);
-        std::cout << "TensorRT dynamic shape enabled" << std::endl;
-      }
+                    config.SetTRTDynamicShapeInfo(map_min_input_shape, map_max_input_shape,
+                                                  map_opt_input_shape);
+                    std::cout << "TensorRT dynamic shape enabled" << std::endl;
+                }
+            }
+
+            // } else if (this->device_ == "XPU"){
+            //   config.EnableXpu(10*1024*1024);
+        } else {
+            config.DisableGpu();
+            if (this->use_mkldnn_) {
+                config.EnableMKLDNN();
+                // cache 10 different shapes for mkldnn to avoid memory leak
+                config.SetMkldnnCacheCapacity(10);
+            }
+            config.SetCpuMathLibraryNumThreads(this->cpu_math_library_num_threads_);
+        }
+        config.SwitchUseFeedFetchOps(false);
+        config.SwitchIrOptim(this->ir_optim_);
+        config.DisableGlogInfo();
+        // Memory optimization
+        config.EnableMemoryOptim();
+        predictor_ = std::move(CreatePredictor(config));
     }
 
-    // } else if (this->device_ == "XPU"){
-    //   config.EnableXpu(10*1024*1024);
-  } else {
-    config.DisableGpu();
-    if (this->use_mkldnn_) {
-      config.EnableMKLDNN();
-      // cache 10 different shapes for mkldnn to avoid memory leak
-      config.SetMkldnnCacheCapacity(10);
-    }
-    config.SetCpuMathLibraryNumThreads(this->cpu_math_library_num_threads_);
-  }
-  config.SwitchUseFeedFetchOps(false);
-  config.SwitchIrOptim(this->ir_optim_);
-  config.DisableGlogInfo();
-  // Memory optimization
-  config.EnableMemoryOptim();
-  predictor_ = std::move(CreatePredictor(config));
-}
-
 // Visualiztion MaskDetector results
-cv::Mat VisualizeResult(const cv::Mat &img,
-                        const std::vector<ObjectResult> &results,
-                        const std::vector<std::string> &lables,
-                        const std::vector<int> &colormap,
-                        const bool is_rbox = false) {
-  cv::Mat vis_img = img.clone();
-  for (int i = 0; i < results.size(); ++i) {
-    // Configure color and text size
-    std::ostringstream oss;
-    oss << std::setiosflags(std::ios::fixed) << std::setprecision(4);
-    oss << lables[results[i].class_id] << " ";
-    oss << results[i].confidence;
-    std::string text = oss.str();
-    int c1 = colormap[3 * results[i].class_id + 0];
-    int c2 = colormap[3 * results[i].class_id + 1];
-    int c3 = colormap[3 * results[i].class_id + 2];
-    cv::Scalar roi_color = cv::Scalar(c1, c2, c3);
-    int font_face = cv::FONT_HERSHEY_COMPLEX_SMALL;
-    double font_scale = 0.5f;
-    float thickness = 0.5;
-    cv::Size text_size =
-        cv::getTextSize(text, font_face, font_scale, thickness, nullptr);
-    cv::Point origin;
+    cv::Mat VisualizeResult(const cv::Mat &img,
+                            const std::vector <ObjectResult> &results,
+                            const std::vector <std::string> &lables,
+                            const std::vector<int> &colormap,
+                            const bool is_rbox = false) {
+        cv::Mat vis_img = img.clone();
+        for (int i = 0; i < results.size(); ++i) {
+            // Configure color and text size
+            std::ostringstream oss;
+            oss << std::setiosflags(std::ios::fixed) << std::setprecision(4);
+            oss << lables[results[i].class_id] << " ";
+            oss << results[i].confidence;
+            std::string text = oss.str();
+            int c1 = colormap[3 * results[i].class_id + 0];
+            int c2 = colormap[3 * results[i].class_id + 1];
+            int c3 = colormap[3 * results[i].class_id + 2];
+            cv::Scalar roi_color = cv::Scalar(c1, c2, c3);
+            int font_face = cv::FONT_HERSHEY_COMPLEX_SMALL;
+            double font_scale = 0.5f;
+            float thickness = 0.5;
+            cv::Size text_size =
+                    cv::getTextSize(text, font_face, font_scale, thickness, nullptr);
+            cv::Point origin;
 
-    if (is_rbox) {
-      // Draw object, text, and background
-      for (int k = 0; k < 4; k++) {
-        cv::Point pt1 = cv::Point(results[i].rect[(k * 2) % 8],
-                                  results[i].rect[(k * 2 + 1) % 8]);
-        cv::Point pt2 = cv::Point(results[i].rect[(k * 2 + 2) % 8],
-                                  results[i].rect[(k * 2 + 3) % 8]);
-        cv::line(vis_img, pt1, pt2, roi_color, 2);
-      }
-    } else {
-      int w = results[i].rect[2] - results[i].rect[0];
-      int h = results[i].rect[3] - results[i].rect[1];
-      cv::Rect roi = cv::Rect(results[i].rect[0], results[i].rect[1], w, h);
-      // Draw roi object, text, and background
-      cv::rectangle(vis_img, roi, roi_color, 2);
+            if (is_rbox) {
+                // Draw object, text, and background
+                for (int k = 0; k < 4; k++) {
+                    cv::Point pt1 = cv::Point(results[i].rect[(k * 2) % 8],
+                                              results[i].rect[(k * 2 + 1) % 8]);
+                    cv::Point pt2 = cv::Point(results[i].rect[(k * 2 + 2) % 8],
+                                              results[i].rect[(k * 2 + 3) % 8]);
+                    cv::line(vis_img, pt1, pt2, roi_color, 2);
+                }
+            } else {
+                int w = results[i].rect[2] - results[i].rect[0];
+                int h = results[i].rect[3] - results[i].rect[1];
+                cv::Rect roi = cv::Rect(results[i].rect[0], results[i].rect[1], w, h);
+                // Draw roi object, text, and background
+                cv::rectangle(vis_img, roi, roi_color, 2);
+            }
+
+            origin.x = results[i].rect[0];
+            origin.y = results[i].rect[1];
+
+            // Configure text background
+            cv::Rect text_back =
+                    cv::Rect(results[i].rect[0], results[i].rect[1] - text_size.height,
+                             text_size.width, text_size.height);
+            // Draw text, and background
+            cv::rectangle(vis_img, text_back, roi_color, -1);
+            cv::putText(vis_img, text, origin, font_face, font_scale,
+                        cv::Scalar(255, 255, 255), thickness);
+        }
+        return vis_img;
     }
 
-    origin.x = results[i].rect[0];
-    origin.y = results[i].rect[1];
-
-    // Configure text background
-    cv::Rect text_back =
-        cv::Rect(results[i].rect[0], results[i].rect[1] - text_size.height,
-                 text_size.width, text_size.height);
-    // Draw text, and background
-    cv::rectangle(vis_img, text_back, roi_color, -1);
-    cv::putText(vis_img, text, origin, font_face, font_scale,
-                cv::Scalar(255, 255, 255), thickness);
-  }
-  return vis_img;
-}
-
-void ObjectDetector::Preprocess(const cv::Mat &ori_im) {
-  // Clone the image : keep the original mat for postprocess
-  cv::Mat im = ori_im.clone();
-  cv::cvtColor(im, im, cv::COLOR_BGR2RGB);
-  preprocessor_.Run(&im, &inputs_);
-}
-
-void ObjectDetector::Postprocess(const std::vector<cv::Mat> mats,
-                                 std::vector<ObjectResult> *result,
-                                 std::vector<int> bbox_num,
-                                 bool is_rbox = false) {
-  result->clear();
-  int start_idx = 0;
-  for (int im_id = 0; im_id < mats.size(); im_id++) {
-    cv::Mat raw_mat = mats[im_id];
-    int rh = 1;
-    int rw = 1;
-    //     if (config_.arch_ == "Face") {
-    // rh = raw_mat.rows;
-    // rw = raw_mat.cols;
-    //     }
-    for (int j = start_idx; j < start_idx + bbox_num[im_id]; j++) {
-      if (is_rbox) {
-        // Class id
-        int class_id = static_cast<int>(round(output_data_[0 + j * 10]));
-        // Confidence score
-        float score = output_data_[1 + j * 10];
-        int x1 = (output_data_[2 + j * 10] * rw);
-        int y1 = (output_data_[3 + j * 10] * rh);
-        int x2 = (output_data_[4 + j * 10] * rw);
-        int y2 = (output_data_[5 + j * 10] * rh);
-        int x3 = (output_data_[6 + j * 10] * rw);
-        int y3 = (output_data_[7 + j * 10] * rh);
-        int x4 = (output_data_[8 + j * 10] * rw);
-        int y4 = (output_data_[9 + j * 10] * rh);
-
-        ObjectResult result_item;
-        result_item.rect = {x1, y1, x2, y2, x3, y3, x4, y4};
-        result_item.class_id = class_id;
-        result_item.confidence = score;
-        result->push_back(result_item);
-      } else {
-        // Class id
-        int class_id = static_cast<int>(round(output_data_[0 + j * 6]));
-        // Confidence score
-        float score = output_data_[1 + j * 6];
-        int xmin = (output_data_[2 + j * 6] * rw);
-        int ymin = (output_data_[3 + j * 6] * rh);
-        int xmax = (output_data_[4 + j * 6] * rw);
-        int ymax = (output_data_[5 + j * 6] * rh);
-        int wd = xmax - xmin;
-        int hd = ymax - ymin;
-
-        ObjectResult result_item;
-        result_item.rect = {xmin, ymin, xmax, ymax};
-        result_item.class_id = class_id;
-        result_item.confidence = score;
-        result->push_back(result_item);
-      }
-    }
-    start_idx += bbox_num[im_id];
-  }
-}
-
-void ObjectDetector::Predict(const std::vector<cv::Mat> imgs, const int warmup,
-                             const int repeats,
-                             std::vector<ObjectResult> *result,
-                             std::vector<int> *bbox_num,
-                             std::vector<double> *times) {
-  auto preprocess_start = std::chrono::steady_clock::now();
-  int batch_size = imgs.size();
-
-  // in_data_batch
-  std::vector<float> in_data_all;
-  std::vector<float> im_shape_all(batch_size * 2);
-  std::vector<float> scale_factor_all(batch_size * 2);
-
-  // Preprocess image
-  for (int bs_idx = 0; bs_idx < batch_size; bs_idx++) {
-    cv::Mat im = imgs.at(bs_idx);
-    Preprocess(im);
-    im_shape_all[bs_idx * 2] = inputs_.im_shape_[0];
-    im_shape_all[bs_idx * 2 + 1] = inputs_.im_shape_[1];
-
-    scale_factor_all[bs_idx * 2] = inputs_.scale_factor_[0];
-    scale_factor_all[bs_idx * 2 + 1] = inputs_.scale_factor_[1];
-
-    // TODO: reduce cost time
-    in_data_all.insert(in_data_all.end(), inputs_.im_data_.begin(),
-                       inputs_.im_data_.end());
-  }
-
-  // Prepare input tensor
-  auto input_names = predictor_->GetInputNames();
-  for (const auto &tensor_name : input_names) {
-    auto in_tensor = predictor_->GetInputHandle(tensor_name);
-    if (tensor_name == "image") {
-      int rh = inputs_.in_net_shape_[0];
-      int rw = inputs_.in_net_shape_[1];
-      in_tensor->Reshape({batch_size, 3, rh, rw});
-      in_tensor->CopyFromCpu(in_data_all.data());
-    } else if (tensor_name == "im_shape") {
-      in_tensor->Reshape({batch_size, 2});
-      in_tensor->CopyFromCpu(im_shape_all.data());
-    } else if (tensor_name == "scale_factor") {
-      in_tensor->Reshape({batch_size, 2});
-      in_tensor->CopyFromCpu(scale_factor_all.data());
-    }
-  }
-
-  auto preprocess_end = std::chrono::steady_clock::now();
-  // Run predictor
-  // warmup
-  for (int i = 0; i < warmup; i++) {
-    predictor_->Run();
-    // Get output tensor
-    auto output_names = predictor_->GetOutputNames();
-    auto out_tensor = predictor_->GetOutputHandle(output_names[0]);
-    std::vector<int> output_shape = out_tensor->shape();
-    auto out_bbox_num = predictor_->GetOutputHandle(output_names[1]);
-    std::vector<int> out_bbox_num_shape = out_bbox_num->shape();
-    // Calculate output length
-    int output_size = 1;
-    for (int j = 0; j < output_shape.size(); ++j) {
-      output_size *= output_shape[j];
+    void ObjectDetector::Preprocess(const cv::Mat &ori_im) {
+        // Clone the image : keep the original mat for postprocess
+        cv::Mat im = ori_im.clone();
+        cv::cvtColor(im, im, cv::COLOR_BGR2RGB);
+        preprocessor_.Run(&im, &inputs_);
     }
 
-    if (output_size < 6) {
-      std::cerr << "[WARNING] No object detected." << std::endl;
-    }
-    output_data_.resize(output_size);
-    out_tensor->CopyToCpu(output_data_.data());
+    void ObjectDetector::Postprocess(const std::vector <cv::Mat> mats,
+                                     std::vector <ObjectResult> *result,
+                                     std::vector<int> bbox_num,
+                                     bool is_rbox = false) {
+        result->clear();
+        int start_idx = 0;
+        for (int im_id = 0; im_id < mats.size(); im_id++) {
+            cv::Mat raw_mat = mats[im_id];
+            int rh = 1;
+            int rw = 1;
+            //     if (config_.arch_ == "Face") {
+            // rh = raw_mat.rows;
+            // rw = raw_mat.cols;
+            //     }
+            for (int j = start_idx; j < start_idx + bbox_num[im_id]; j++) {
+                if (is_rbox) {
+                    // Class id
+                    int class_id = static_cast<int>(round(output_data_[0 + j * 10]));
+                    // Confidence score
+                    float score = output_data_[1 + j * 10];
+                    int x1 = (output_data_[2 + j * 10] * rw);
+                    int y1 = (output_data_[3 + j * 10] * rh);
+                    int x2 = (output_data_[4 + j * 10] * rw);
+                    int y2 = (output_data_[5 + j * 10] * rh);
+                    int x3 = (output_data_[6 + j * 10] * rw);
+                    int y3 = (output_data_[7 + j * 10] * rh);
+                    int x4 = (output_data_[8 + j * 10] * rw);
+                    int y4 = (output_data_[9 + j * 10] * rh);
 
-    int out_bbox_num_size = 1;
-    for (int j = 0; j < out_bbox_num_shape.size(); ++j) {
-      out_bbox_num_size *= out_bbox_num_shape[j];
-    }
-    out_bbox_num_data_.resize(out_bbox_num_size);
-    out_bbox_num->CopyToCpu(out_bbox_num_data_.data());
-  }
+                    ObjectResult result_item;
+                    result_item.rect = {x1, y1, x2, y2, x3, y3, x4, y4};
+                    result_item.class_id = class_id;
+                    result_item.confidence = score;
+                    result->push_back(result_item);
+                } else {
+                    // Class id
+                    int class_id = static_cast<int>(round(output_data_[0 + j * 6]));
+                    // Confidence score
+                    float score = output_data_[1 + j * 6];
+                    int xmin = (output_data_[2 + j * 6] * rw);
+                    int ymin = (output_data_[3 + j * 6] * rh);
+                    int xmax = (output_data_[4 + j * 6] * rw);
+                    int ymax = (output_data_[5 + j * 6] * rh);
+                    int wd = xmax - xmin;
+                    int hd = ymax - ymin;
 
-  bool is_rbox = false;
-  auto inference_start = std::chrono::steady_clock::now();
-  for (int i = 0; i < repeats; i++) {
-    predictor_->Run();
-    // Get output tensor
-    auto output_names = predictor_->GetOutputNames();
-    auto out_tensor = predictor_->GetOutputHandle(output_names[0]);
-    std::vector<int> output_shape = out_tensor->shape();
-    auto out_bbox_num = predictor_->GetOutputHandle(output_names[1]);
-    std::vector<int> out_bbox_num_shape = out_bbox_num->shape();
-    // Calculate output length
-    int output_size = 1;
-    for (int j = 0; j < output_shape.size(); ++j) {
-      output_size *= output_shape[j];
+                    ObjectResult result_item;
+                    result_item.rect = {xmin, ymin, xmax, ymax};
+                    result_item.class_id = class_id;
+                    result_item.confidence = score;
+                    result->push_back(result_item);
+                }
+            }
+            start_idx += bbox_num[im_id];
+        }
     }
-    is_rbox = output_shape[output_shape.size() - 1] % 10 == 0;
 
-    if (output_size < 6) {
-      std::cerr << "[WARNING] No object detected." << std::endl;
+    void ObjectDetector::Predict(const std::vector <cv::Mat> imgs, const int warmup,
+                                 const int repeats,
+                                 std::vector <ObjectResult> *result,
+                                 std::vector<int> *bbox_num,
+                                 std::vector<double> *times) {
+        auto preprocess_start = std::chrono::steady_clock::now();
+        int batch_size = imgs.size();
+
+        // in_data_batch
+        std::vector<float> in_data_all;
+        std::vector<float> im_shape_all(batch_size * 2);
+        std::vector<float> scale_factor_all(batch_size * 2);
+
+        // Preprocess image
+        for (int bs_idx = 0; bs_idx < batch_size; bs_idx++) {
+            cv::Mat im = imgs.at(bs_idx);
+            Preprocess(im);
+            im_shape_all[bs_idx * 2] = inputs_.im_shape_[0];
+            im_shape_all[bs_idx * 2 + 1] = inputs_.im_shape_[1];
+
+            scale_factor_all[bs_idx * 2] = inputs_.scale_factor_[0];
+            scale_factor_all[bs_idx * 2 + 1] = inputs_.scale_factor_[1];
+
+            // TODO: reduce cost time
+            in_data_all.insert(in_data_all.end(), inputs_.im_data_.begin(),
+                               inputs_.im_data_.end());
+        }
+
+        // Prepare input tensor
+        auto input_names = predictor_->GetInputNames();
+        for (const auto &tensor_name : input_names) {
+            auto in_tensor = predictor_->GetInputHandle(tensor_name);
+            if (tensor_name == "image") {
+                int rh = inputs_.in_net_shape_[0];
+                int rw = inputs_.in_net_shape_[1];
+                in_tensor->Reshape({batch_size, 3, rh, rw});
+                in_tensor->CopyFromCpu(in_data_all.data());
+            } else if (tensor_name == "im_shape") {
+                in_tensor->Reshape({batch_size, 2});
+                in_tensor->CopyFromCpu(im_shape_all.data());
+            } else if (tensor_name == "scale_factor") {
+                in_tensor->Reshape({batch_size, 2});
+                in_tensor->CopyFromCpu(scale_factor_all.data());
+            }
+        }
+
+        auto preprocess_end = std::chrono::steady_clock::now();
+        // Run predictor
+        // warmup
+        for (int i = 0; i < warmup; i++) {
+            predictor_->Run();
+            // Get output tensor
+            auto output_names = predictor_->GetOutputNames();
+            auto out_tensor = predictor_->GetOutputHandle(output_names[0]);
+            std::vector<int> output_shape = out_tensor->shape();
+            auto out_bbox_num = predictor_->GetOutputHandle(output_names[1]);
+            std::vector<int> out_bbox_num_shape = out_bbox_num->shape();
+            // Calculate output length
+            int output_size = 1;
+            for (int j = 0; j < output_shape.size(); ++j) {
+                output_size *= output_shape[j];
+            }
+
+            if (output_size < 6) {
+                std::cerr << "[WARNING] No object detected." << std::endl;
+            }
+            output_data_.resize(output_size);
+            out_tensor->CopyToCpu(output_data_.data());
+
+            int out_bbox_num_size = 1;
+            for (int j = 0; j < out_bbox_num_shape.size(); ++j) {
+                out_bbox_num_size *= out_bbox_num_shape[j];
+            }
+            out_bbox_num_data_.resize(out_bbox_num_size);
+            out_bbox_num->CopyToCpu(out_bbox_num_data_.data());
+        }
+
+        bool is_rbox = false;
+        auto inference_start = std::chrono::steady_clock::now();
+        for (int i = 0; i < repeats; i++) {
+            predictor_->Run();
+            // Get output tensor
+            auto output_names = predictor_->GetOutputNames();
+            auto out_tensor = predictor_->GetOutputHandle(output_names[0]);
+            std::vector<int> output_shape = out_tensor->shape();
+            auto out_bbox_num = predictor_->GetOutputHandle(output_names[1]);
+            std::vector<int> out_bbox_num_shape = out_bbox_num->shape();
+            // Calculate output length
+            int output_size = 1;
+            for (int j = 0; j < output_shape.size(); ++j) {
+                output_size *= output_shape[j];
+            }
+            is_rbox = output_shape[output_shape.size() - 1] % 10 == 0;
+
+            if (output_size < 6) {
+                std::cerr << "[WARNING] No object detected." << std::endl;
+            }
+            output_data_.resize(output_size);
+            out_tensor->CopyToCpu(output_data_.data());
+
+            int out_bbox_num_size = 1;
+            for (int j = 0; j < out_bbox_num_shape.size(); ++j) {
+                out_bbox_num_size *= out_bbox_num_shape[j];
+            }
+            out_bbox_num_data_.resize(out_bbox_num_size);
+            out_bbox_num->CopyToCpu(out_bbox_num_data_.data());
+        }
+        auto inference_end = std::chrono::steady_clock::now();
+        auto postprocess_start = std::chrono::steady_clock::now();
+        // Postprocessing result
+        result->clear();
+        Postprocess(imgs, result, out_bbox_num_data_, is_rbox);
+        bbox_num->clear();
+        for (int k = 0; k < out_bbox_num_data_.size(); k++) {
+            int tmp = out_bbox_num_data_[k];
+            bbox_num->push_back(tmp);
+        }
+        auto postprocess_end = std::chrono::steady_clock::now();
+
+        std::chrono::duration<float> preprocess_diff =
+                preprocess_end - preprocess_start;
+        times->push_back(double(preprocess_diff.count() * 1000));
+        std::chrono::duration<float> inference_diff = inference_end - inference_start;
+        times->push_back(double(inference_diff.count() / repeats * 1000));
+        std::chrono::duration<float> postprocess_diff =
+                postprocess_end - postprocess_start;
+        times->push_back(double(postprocess_diff.count() * 1000));
     }
-    output_data_.resize(output_size);
-    out_tensor->CopyToCpu(output_data_.data());
 
-    int out_bbox_num_size = 1;
-    for (int j = 0; j < out_bbox_num_shape.size(); ++j) {
-      out_bbox_num_size *= out_bbox_num_shape[j];
+    std::vector<int> GenerateColorMap(int num_class) {
+        auto colormap = std::vector<int>(3 * num_class, 0);
+        for (int i = 0; i < num_class; ++i) {
+            int j = 0;
+            int lab = i;
+            while (lab) {
+                colormap[i * 3] |= (((lab >> 0) & 1) << (7 - j));
+                colormap[i * 3 + 1] |= (((lab >> 1) & 1) << (7 - j));
+                colormap[i * 3 + 2] |= (((lab >> 2) & 1) << (7 - j));
+                ++j;
+                lab >>= 3;
+            }
+        }
+        return colormap;
     }
-    out_bbox_num_data_.resize(out_bbox_num_size);
-    out_bbox_num->CopyToCpu(out_bbox_num_data_.data());
-  }
-  auto inference_end = std::chrono::steady_clock::now();
-  auto postprocess_start = std::chrono::steady_clock::now();
-  // Postprocessing result
-  result->clear();
-  Postprocess(imgs, result, out_bbox_num_data_, is_rbox);
-  bbox_num->clear();
-  for (int k = 0; k < out_bbox_num_data_.size(); k++) {
-    int tmp = out_bbox_num_data_[k];
-    bbox_num->push_back(tmp);
-  }
-  auto postprocess_end = std::chrono::steady_clock::now();
-
-  std::chrono::duration<float> preprocess_diff =
-      preprocess_end - preprocess_start;
-  times->push_back(double(preprocess_diff.count() * 1000));
-  std::chrono::duration<float> inference_diff = inference_end - inference_start;
-  times->push_back(double(inference_diff.count() / repeats * 1000));
-  std::chrono::duration<float> postprocess_diff =
-      postprocess_end - postprocess_start;
-  times->push_back(double(postprocess_diff.count() * 1000));
-}
-
-std::vector<int> GenerateColorMap(int num_class) {
-  auto colormap = std::vector<int>(3 * num_class, 0);
-  for (int i = 0; i < num_class; ++i) {
-    int j = 0;
-    int lab = i;
-    while (lab) {
-      colormap[i * 3] |= (((lab >> 0) & 1) << (7 - j));
-      colormap[i * 3 + 1] |= (((lab >> 1) & 1) << (7 - j));
-      colormap[i * 3 + 2] |= (((lab >> 2) & 1) << (7 - j));
-      ++j;
-      lab >>= 3;
-    }
-  }
-  return colormap;
-}
 
 } // namespace Detection
diff --git a/deploy/cpp_shitu/src/preprocess_op.cpp b/deploy/cpp_shitu/src/preprocess_op.cpp
index 13200deda..6bf1ec91f 100644
--- a/deploy/cpp_shitu/src/preprocess_op.cpp
+++ b/deploy/cpp_shitu/src/preprocess_op.cpp
@@ -32,60 +32,60 @@
 
 namespace Feature {
 
-void Permute::Run(const cv::Mat *im, float *data) {
-  int rh = im->rows;
-  int rw = im->cols;
-  int rc = im->channels();
-  for (int i = 0; i < rc; ++i) {
-    cv::extractChannel(*im, cv::Mat(rh, rw, CV_32FC1, data + i * rh * rw), i);
-  }
-}
-
-void Normalize::Run(cv::Mat *im, const std::vector<float> &mean,
-                    const std::vector<float> &std, float scale) {
-  (*im).convertTo(*im, CV_32FC3, scale);
-  for (int h = 0; h < im->rows; h++) {
-    for (int w = 0; w < im->cols; w++) {
-      im->at<cv::Vec3f>(h, w)[0] =
-          (im->at<cv::Vec3f>(h, w)[0] - mean[0]) / std[0];
-      im->at<cv::Vec3f>(h, w)[1] =
-          (im->at<cv::Vec3f>(h, w)[1] - mean[1]) / std[1];
-      im->at<cv::Vec3f>(h, w)[2] =
-          (im->at<cv::Vec3f>(h, w)[2] - mean[2]) / std[2];
+    void Permute::Run(const cv::Mat *im, float *data) {
+        int rh = im->rows;
+        int rw = im->cols;
+        int rc = im->channels();
+        for (int i = 0; i < rc; ++i) {
+            cv::extractChannel(*im, cv::Mat(rh, rw, CV_32FC1, data + i * rh * rw), i);
+        }
     }
-  }
-}
 
-void CenterCropImg::Run(cv::Mat &img, const int crop_size) {
-  int resize_w = img.cols;
-  int resize_h = img.rows;
-  int w_start = int((resize_w - crop_size) / 2);
-  int h_start = int((resize_h - crop_size) / 2);
-  cv::Rect rect(w_start, h_start, crop_size, crop_size);
-  img = img(rect);
-}
-
-void ResizeImg::Run(const cv::Mat &img, cv::Mat &resize_img,
-                    int resize_short_size, int size) {
-  int resize_h = 0;
-  int resize_w = 0;
-  if (size > 0) {
-    resize_h = size;
-    resize_w = size;
-  } else {
-    int w = img.cols;
-    int h = img.rows;
-
-    float ratio = 1.f;
-    if (h < w) {
-      ratio = float(resize_short_size) / float(h);
-    } else {
-      ratio = float(resize_short_size) / float(w);
+    void Normalize::Run(cv::Mat *im, const std::vector<float> &mean,
+                        const std::vector<float> &std, float scale) {
+        (*im).convertTo(*im, CV_32FC3, scale);
+        for (int h = 0; h < im->rows; h++) {
+            for (int w = 0; w < im->cols; w++) {
+                im->at<cv::Vec3f>(h, w)[0] =
+                        (im->at<cv::Vec3f>(h, w)[0] - mean[0]) / std[0];
+                im->at<cv::Vec3f>(h, w)[1] =
+                        (im->at<cv::Vec3f>(h, w)[1] - mean[1]) / std[1];
+                im->at<cv::Vec3f>(h, w)[2] =
+                        (im->at<cv::Vec3f>(h, w)[2] - mean[2]) / std[2];
+            }
+        }
+    }
+
+    void CenterCropImg::Run(cv::Mat &img, const int crop_size) {
+        int resize_w = img.cols;
+        int resize_h = img.rows;
+        int w_start = int((resize_w - crop_size) / 2);
+        int h_start = int((resize_h - crop_size) / 2);
+        cv::Rect rect(w_start, h_start, crop_size, crop_size);
+        img = img(rect);
+    }
+
+    void ResizeImg::Run(const cv::Mat &img, cv::Mat &resize_img,
+                        int resize_short_size, int size) {
+        int resize_h = 0;
+        int resize_w = 0;
+        if (size > 0) {
+            resize_h = size;
+            resize_w = size;
+        } else {
+            int w = img.cols;
+            int h = img.rows;
+
+            float ratio = 1.f;
+            if (h < w) {
+                ratio = float(resize_short_size) / float(h);
+            } else {
+                ratio = float(resize_short_size) / float(w);
+            }
+            resize_h = round(float(h) * ratio);
+            resize_w = round(float(w) * ratio);
+        }
+        cv::resize(img, resize_img, cv::Size(resize_w, resize_h));
     }
-    resize_h = round(float(h) * ratio);
-    resize_w = round(float(w) * ratio);
-  }
-  cv::resize(img, resize_img, cv::Size(resize_w, resize_h));
-}
 
 } // namespace Feature
diff --git a/deploy/cpp_shitu/src/preprocess_op_det.cpp b/deploy/cpp_shitu/src/preprocess_op_det.cpp
index a61112f60..e32b03a47 100644
--- a/deploy/cpp_shitu/src/preprocess_op_det.cpp
+++ b/deploy/cpp_shitu/src/preprocess_op_det.cpp
@@ -19,112 +19,112 @@
 
 namespace Detection {
 
-void InitInfo::Run(cv::Mat *im, ImageBlob *data) {
-  data->im_shape_ = {static_cast<float>(im->rows),
-                     static_cast<float>(im->cols)};
-  data->scale_factor_ = {1., 1.};
-  data->in_net_shape_ = {static_cast<float>(im->rows),
-                         static_cast<float>(im->cols)};
-}
-
-void NormalizeImage::Run(cv::Mat *im, ImageBlob *data) {
-  double e = 1.0;
-  if (is_scale_) {
-    e /= 255.0;
-  }
-  (*im).convertTo(*im, CV_32FC3, e);
-  for (int h = 0; h < im->rows; h++) {
-    for (int w = 0; w < im->cols; w++) {
-      im->at<cv::Vec3f>(h, w)[0] =
-          (im->at<cv::Vec3f>(h, w)[0] - mean_[0]) / scale_[0];
-      im->at<cv::Vec3f>(h, w)[1] =
-          (im->at<cv::Vec3f>(h, w)[1] - mean_[1]) / scale_[1];
-      im->at<cv::Vec3f>(h, w)[2] =
-          (im->at<cv::Vec3f>(h, w)[2] - mean_[2]) / scale_[2];
+    void InitInfo::Run(cv::Mat *im, ImageBlob *data) {
+        data->im_shape_ = {static_cast<float>(im->rows),
+                           static_cast<float>(im->cols)};
+        data->scale_factor_ = {1., 1.};
+        data->in_net_shape_ = {static_cast<float>(im->rows),
+                               static_cast<float>(im->cols)};
     }
-  }
-}
 
-void Permute::Run(cv::Mat *im, ImageBlob *data) {
-  int rh = im->rows;
-  int rw = im->cols;
-  int rc = im->channels();
-  (data->im_data_).resize(rc * rh * rw);
-  float *base = (data->im_data_).data();
-  for (int i = 0; i < rc; ++i) {
-    cv::extractChannel(*im, cv::Mat(rh, rw, CV_32FC1, base + i * rh * rw), i);
-  }
-}
+    void NormalizeImage::Run(cv::Mat *im, ImageBlob *data) {
+        double e = 1.0;
+        if (is_scale_) {
+            e /= 255.0;
+        }
+        (*im).convertTo(*im, CV_32FC3, e);
+        for (int h = 0; h < im->rows; h++) {
+            for (int w = 0; w < im->cols; w++) {
+                im->at<cv::Vec3f>(h, w)[0] =
+                        (im->at<cv::Vec3f>(h, w)[0] - mean_[0]) / scale_[0];
+                im->at<cv::Vec3f>(h, w)[1] =
+                        (im->at<cv::Vec3f>(h, w)[1] - mean_[1]) / scale_[1];
+                im->at<cv::Vec3f>(h, w)[2] =
+                        (im->at<cv::Vec3f>(h, w)[2] - mean_[2]) / scale_[2];
+            }
+        }
+    }
 
-void Resize::Run(cv::Mat *im, ImageBlob *data) {
-  auto resize_scale = GenerateScale(*im);
-  data->im_shape_ = {static_cast<float>(im->cols * resize_scale.first),
-                     static_cast<float>(im->rows * resize_scale.second)};
-  data->in_net_shape_ = {static_cast<float>(im->cols * resize_scale.first),
-                         static_cast<float>(im->rows * resize_scale.second)};
-  cv::resize(*im, *im, cv::Size(), resize_scale.first, resize_scale.second,
-             interp_);
-  data->im_shape_ = {
-      static_cast<float>(im->rows), static_cast<float>(im->cols),
-  };
-  data->scale_factor_ = {
-      resize_scale.second, resize_scale.first,
-  };
-}
+    void Permute::Run(cv::Mat *im, ImageBlob *data) {
+        int rh = im->rows;
+        int rw = im->cols;
+        int rc = im->channels();
+        (data->im_data_).resize(rc * rh * rw);
+        float *base = (data->im_data_).data();
+        for (int i = 0; i < rc; ++i) {
+            cv::extractChannel(*im, cv::Mat(rh, rw, CV_32FC1, base + i * rh * rw), i);
+        }
+    }
 
-std::pair<double, double> Resize::GenerateScale(const cv::Mat &im) {
-  std::pair<double, double> resize_scale;
-  int origin_w = im.cols;
-  int origin_h = im.rows;
+    void Resize::Run(cv::Mat *im, ImageBlob *data) {
+        auto resize_scale = GenerateScale(*im);
+        data->im_shape_ = {static_cast<float>(im->cols * resize_scale.first),
+                           static_cast<float>(im->rows * resize_scale.second)};
+        data->in_net_shape_ = {static_cast<float>(im->cols * resize_scale.first),
+                               static_cast<float>(im->rows * resize_scale.second)};
+        cv::resize(*im, *im, cv::Size(), resize_scale.first, resize_scale.second,
+                   interp_);
+        data->im_shape_ = {
+                static_cast<float>(im->rows), static_cast<float>(im->cols),
+        };
+        data->scale_factor_ = {
+                resize_scale.second, resize_scale.first,
+        };
+    }
 
-  if (keep_ratio_) {
-    int im_size_max = std::max(origin_w, origin_h);
-    int im_size_min = std::min(origin_w, origin_h);
-    int target_size_max =
-        *std::max_element(target_size_.begin(), target_size_.end());
-    int target_size_min =
-        *std::min_element(target_size_.begin(), target_size_.end());
-    double scale_min =
-        static_cast<double>(target_size_min) / static_cast<double>(im_size_min);
-    double scale_max =
-        static_cast<double>(target_size_max) / static_cast<double>(im_size_max);
-    double scale_ratio = std::min(scale_min, scale_max);
-    resize_scale = {scale_ratio, scale_ratio};
-  } else {
-    resize_scale.first =
-        static_cast<double>(target_size_[1]) / static_cast<double>(origin_w);
-    resize_scale.second =
-        static_cast<double>(target_size_[0]) / static_cast<double>(origin_h);
-  }
-  return resize_scale;
-}
+    std::pair<double, double> Resize::GenerateScale(const cv::Mat &im) {
+        std::pair<double, double> resize_scale;
+        int origin_w = im.cols;
+        int origin_h = im.rows;
 
-void PadStride::Run(cv::Mat *im, ImageBlob *data) {
-  if (stride_ <= 0) {
-    return;
-  }
-  int rc = im->channels();
-  int rh = im->rows;
-  int rw = im->cols;
-  int nh = (rh / stride_) * stride_ + (rh % stride_ != 0) * stride_;
-  int nw = (rw / stride_) * stride_ + (rw % stride_ != 0) * stride_;
-  cv::copyMakeBorder(*im, *im, 0, nh - rh, 0, nw - rw, cv::BORDER_CONSTANT,
-                     cv::Scalar(0));
-  data->in_net_shape_ = {
-      static_cast<float>(im->rows), static_cast<float>(im->cols),
-  };
-}
+        if (keep_ratio_) {
+            int im_size_max = std::max(origin_w, origin_h);
+            int im_size_min = std::min(origin_w, origin_h);
+            int target_size_max =
+                    *std::max_element(target_size_.begin(), target_size_.end());
+            int target_size_min =
+                    *std::min_element(target_size_.begin(), target_size_.end());
+            double scale_min =
+                    static_cast<double>(target_size_min) / static_cast<double>(im_size_min);
+            double scale_max =
+                    static_cast<double>(target_size_max) / static_cast<double>(im_size_max);
+            double scale_ratio = std::min(scale_min, scale_max);
+            resize_scale = {scale_ratio, scale_ratio};
+        } else {
+            resize_scale.first =
+                    static_cast<double>(target_size_[1]) / static_cast<double>(origin_w);
+            resize_scale.second =
+                    static_cast<double>(target_size_[0]) / static_cast<double>(origin_h);
+        }
+        return resize_scale;
+    }
+
+    void PadStride::Run(cv::Mat *im, ImageBlob *data) {
+        if (stride_ <= 0) {
+            return;
+        }
+        int rc = im->channels();
+        int rh = im->rows;
+        int rw = im->cols;
+        int nh = (rh / stride_) * stride_ + (rh % stride_ != 0) * stride_;
+        int nw = (rw / stride_) * stride_ + (rw % stride_ != 0) * stride_;
+        cv::copyMakeBorder(*im, *im, 0, nh - rh, 0, nw - rw, cv::BORDER_CONSTANT,
+                           cv::Scalar(0));
+        data->in_net_shape_ = {
+                static_cast<float>(im->rows), static_cast<float>(im->cols),
+        };
+    }
 
 // Preprocessor op running order
-const std::vector<std::string> Preprocessor::RUN_ORDER = {
-    "InitInfo", "Resize", "NormalizeImage", "PadStride", "Permute"};
+    const std::vector <std::string> Preprocessor::RUN_ORDER = {
+            "InitInfo", "Resize", "NormalizeImage", "PadStride", "Permute"};
 
-void Preprocessor::Run(cv::Mat *im, ImageBlob *data) {
-  for (const auto &name : RUN_ORDER) {
-    if (ops_.find(name) != ops_.end()) {
-      ops_[name]->Run(im, data);
+    void Preprocessor::Run(cv::Mat *im, ImageBlob *data) {
+        for (const auto &name : RUN_ORDER) {
+            if (ops_.find(name) != ops_.end()) {
+                ops_[name]->Run(im, data);
+            }
+        }
     }
-  }
-}
 
 } // namespace Detection
diff --git a/deploy/cpp_shitu/src/vector_search.cpp b/deploy/cpp_shitu/src/vector_search.cpp
index 19b637a74..85c487a77 100644
--- a/deploy/cpp_shitu/src/vector_search.cpp
+++ b/deploy/cpp_shitu/src/vector_search.cpp
@@ -20,43 +20,43 @@
 #include <regex>
 
 void VectorSearch::LoadIndexFile() {
-  std::string file_path = this->index_dir + OS_PATH_SEP + "vector.index";
-  const char *fname = file_path.c_str();
-  this->index = faiss::read_index(fname, 0);
+    std::string file_path = this->index_dir + OS_PATH_SEP + "vector.index";
+    const char *fname = file_path.c_str();
+    this->index = faiss::read_index(fname, 0);
 }
 
 void VectorSearch::LoadIdMap() {
-  std::string file_path = this->index_dir + OS_PATH_SEP + "id_map.txt";
-  std::ifstream in(file_path);
-  std::string line;
-  std::vector<std::string> m_vec;
-  if (in) {
-    while (getline(in, line)) {
-      std::regex ws_re("\\s+");
-      std::vector<std::string> v(
-          std::sregex_token_iterator(line.begin(), line.end(), ws_re, -1),
-          std::sregex_token_iterator());
-      if (v.size() != 2) {
-        std::cout << "The number of element for each line in : " << file_path
-                  << "must be 2, exit the program..." << std::endl;
-        exit(1);
-      } else
-        this->id_map.insert(std::pair<long int, std::string>(
-            std::stol(v[0], nullptr, 10), v[1]));
+    std::string file_path = this->index_dir + OS_PATH_SEP + "id_map.txt";
+    std::ifstream in(file_path);
+    std::string line;
+    std::vector <std::string> m_vec;
+    if (in) {
+        while (getline(in, line)) {
+            std::regex ws_re("\\s+");
+            std::vector <std::string> v(
+                    std::sregex_token_iterator(line.begin(), line.end(), ws_re, -1),
+                    std::sregex_token_iterator());
+            if (v.size() != 2) {
+                std::cout << "The number of element for each line in : " << file_path
+                          << "must be 2, exit the program..." << std::endl;
+                exit(1);
+            } else
+                this->id_map.insert(std::pair<long int, std::string>(
+                        std::stol(v[0], nullptr, 10), v[1]));
+        }
     }
-  }
 }
 
 const SearchResult &VectorSearch::Search(float *feature, int query_number) {
-  this->D.resize(this->return_k * query_number);
-  this->I.resize(this->return_k * query_number);
-  this->index->search(query_number, feature, return_k, D.data(), I.data());
-  this->sr.return_k = this->return_k;
-  this->sr.D = this->D;
-  this->sr.I = this->I;
-  return this->sr;
+    this->D.resize(this->return_k * query_number);
+    this->I.resize(this->return_k * query_number);
+    this->index->search(query_number, feature, return_k, D.data(), I.data());
+    this->sr.return_k = this->return_k;
+    this->sr.D = this->D;
+    this->sr.I = this->I;
+    return this->sr;
 }
 
 const std::string &VectorSearch::GetLabel(faiss::Index::idx_t ind) {
-  return this->id_map.at(ind);
+    return this->id_map.at(ind);
 }
diff --git a/deploy/cpp_shitu/src/yaml_config.cpp b/deploy/cpp_shitu/src/yaml_config.cpp
index dd877d227..b20fcde9e 100644
--- a/deploy/cpp_shitu/src/yaml_config.cpp
+++ b/deploy/cpp_shitu/src/yaml_config.cpp
@@ -19,60 +19,60 @@
 #include <include/yaml_config.h>
 
 
-std::vector<std::string> YamlConfig::ReadDict(const std::string &path) {
-  std::ifstream in(path);
-  std::string line;
-  std::vector<std::string> m_vec;
-  if (in) {
-    while (getline(in, line)) {
-      m_vec.push_back(line);
+std::vector <std::string> YamlConfig::ReadDict(const std::string &path) {
+    std::ifstream in(path);
+    std::string line;
+    std::vector <std::string> m_vec;
+    if (in) {
+        while (getline(in, line)) {
+            m_vec.push_back(line);
+        }
+    } else {
+        std::cout << "no such label file: " << path << ", exit the program..."
+                  << std::endl;
+        exit(1);
     }
-  } else {
-    std::cout << "no such label file: " << path << ", exit the program..."
-              << std::endl;
-    exit(1);
-  }
-  return m_vec;
+    return m_vec;
 }
 
 std::map<int, std::string> YamlConfig::ReadIndexId(const std::string &path) {
-  std::ifstream in(path);
-  std::string line;
-  std::map<int, std::string> m_vec;
-  if (in) {
-    while (getline(in, line)) {
-      std::regex ws_re("\\s+");
-      std::vector<std::string> v(
-          std::sregex_token_iterator(line.begin(), line.end(), ws_re, -1),
-          std::sregex_token_iterator());
-      if (v.size() != 3) {
-        std::cout << "The number of element for each line in : " << path
-                  << "must be 3, exit the program..." << std::endl;
-        exit(1);
-      } else
-        m_vec.insert(std::pair<int, std::string>(stoi(v[0]), v[2]));
+    std::ifstream in(path);
+    std::string line;
+    std::map<int, std::string> m_vec;
+    if (in) {
+        while (getline(in, line)) {
+            std::regex ws_re("\\s+");
+            std::vector <std::string> v(
+                    std::sregex_token_iterator(line.begin(), line.end(), ws_re, -1),
+                    std::sregex_token_iterator());
+            if (v.size() != 3) {
+                std::cout << "The number of element for each line in : " << path
+                          << "must be 3, exit the program..." << std::endl;
+                exit(1);
+            } else
+                m_vec.insert(std::pair<int, std::string>(stoi(v[0]), v[2]));
+        }
     }
-  }
-  return m_vec;
+    return m_vec;
 }
 
 YAML::Node YamlConfig::ReadYamlConfig(const std::string &path) {
-  YAML::Node config;
-  try {
-    config = YAML::LoadFile(path);
-  } catch (YAML::BadFile &e) {
-    std::cout << "Something wrong in yaml file, please check yaml file"
-              << std::endl;
-    exit(1);
-  }
-  return config;
+    YAML::Node config;
+    try {
+        config = YAML::LoadFile(path);
+    } catch (YAML::BadFile &e) {
+        std::cout << "Something wrong in yaml file, please check yaml file"
+                  << std::endl;
+        exit(1);
+    }
+    return config;
 }
 
 void YamlConfig::PrintConfigInfo() {
-  std::cout << this->config_file << std::endl;
-  //   for (YAML::const_iterator
-  //   it=config_file.begin();it!=config_file.end();++it)
-  // {
-  //   std::cout << it->as<std::string>() << "\n";
-  //   }
+    std::cout << this->config_file << std::endl;
+    //   for (YAML::const_iterator
+    //   it=config_file.begin();it!=config_file.end();++it)
+    // {
+    //   std::cout << it->as<std::string>() << "\n";
+    //   }
 }
diff --git a/deploy/cpp_shitu/tools/build.sh b/deploy/cpp_shitu/tools/build.sh
index a7ecaa30f..5307af061 100755
--- a/deploy/cpp_shitu/tools/build.sh
+++ b/deploy/cpp_shitu/tools/build.sh
@@ -1,8 +1,8 @@
-OPENCV_DIR=/work/project/project/cpp_infer/opencv-3.4.7/opencv3
-LIB_DIR=/work/project/project/cpp_infer/paddle_inference/
+OPENCV_DIR=${opencv_install_dir}
+LIB_DIR=${paddle_inference_dir}
 CUDA_LIB_DIR=/usr/local/cuda/lib64
 CUDNN_LIB_DIR=/usr/lib/x86_64-linux-gnu/
-FAISS_DIR=/work/project/project/cpp_infer/faiss/faiss_install
+FAISS_DIR=${faiss_install_dir}
 FAISS_WITH_MKL=OFF
 
 BUILD_DIR=build
@@ -21,4 +21,4 @@ cmake .. \
     -DFAISS_DIR=${FAISS_DIR} \
     -DFAISS_WITH_MKL=${FAISS_WITH_MKL}
 
-make -j
+make -j
\ No newline at end of file
diff --git a/deploy/cpp_shitu/tools/config.txt b/deploy/cpp_shitu/tools/config.txt
deleted file mode 100755
index 0d915a91a..000000000
--- a/deploy/cpp_shitu/tools/config.txt
+++ /dev/null
@@ -1,17 +0,0 @@
-# model load config
-use_gpu  0
-gpu_id  0
-gpu_mem  4000
-cpu_threads  10
-use_mkldnn 1
-use_tensorrt 0
-use_fp16 0
-
-# cls config
-cls_model_path  /PaddleClas/inference/cls_infer.pdmodel
-cls_params_path /PaddleClas/inference/cls_infer.pdiparams
-resize_short_size 256
-crop_size 224
-
-# for log env info
-benchmark 0
diff --git a/deploy/cpp_shitu/tools/run.sh b/deploy/cpp_shitu/tools/run.sh
deleted file mode 100755
index a8102a825..000000000
--- a/deploy/cpp_shitu/tools/run.sh
+++ /dev/null
@@ -1 +0,0 @@
-./build/clas_system ../configs/inference_rec.yaml
diff --git a/docs/images/quick_start/shitu_c++_result.png b/docs/images/quick_start/shitu_c++_result.png
new file mode 100644
index 000000000..f2cbee5ef
Binary files /dev/null and b/docs/images/quick_start/shitu_c++_result.png differ