From 47c3b093a28f1e7ae520a96f173f96d8fde238ee Mon Sep 17 00:00:00 2001
From: lubin <lubin10@baidu.com>
Date: Tue, 8 Mar 2022 06:23:21 +0000
Subject: [PATCH] do feature extraction

---
 deploy/lite_shitu/Makefile                    |   6 +-
 deploy/lite_shitu/generate_json_config.py     |   2 +
 .../{recognition.h => feature_extractor.h}    |  36 ++-----
 deploy/lite_shitu/include/utils.h             |   2 +-
 deploy/lite_shitu/shitu_config.json           | 101 ++++++++++++++++++
 .../{recognition.cc => feature_extractor.cc}  |  68 ++++--------
 deploy/lite_shitu/src/main.cc                 |  21 ++--
 7 files changed, 146 insertions(+), 90 deletions(-)
 rename deploy/lite_shitu/include/{recognition.h => feature_extractor.h} (72%)
 create mode 100644 deploy/lite_shitu/shitu_config.json
 rename deploy/lite_shitu/src/{recognition.cc => feature_extractor.cc} (65%)
diff --git a/deploy/lite_shitu/Makefile b/deploy/lite_shitu/Makefile
index 64ee6aed8..53cc7d7aa 100644
--- a/deploy/lite_shitu/Makefile
+++ b/deploy/lite_shitu/Makefile
@@ -9,15 +9,13 @@ endif
 ${info ARM_ABI: ${ARM_ABI}}
 ${info ARM_PLAT: ${ARM_PLAT}; option[arm7/arm8]}
 
-include ../Makefile.def
-
-LITE_ROOT=../../../
+LITE_ROOT=libs/inference_lite_lib.android.armv8
+include ${LITE_ROOT}/demo/cxx/Makefile.def
 ${info LITE_ROOT: $(abspath ${LITE_ROOT})}
 
 THIRD_PARTY_DIR=third_party
 ${info THIRD_PARTY_DIR: $(abspath ${THIRD_PARTY_DIR})}
 
-
 OPENCV_VERSION=opencv4.1.0
 OPENCV_LIBS = ${THIRD_PARTY_DIR}/${OPENCV_VERSION}/${ARM_PLAT}/libs/libopencv_imgcodecs.a \
               ${THIRD_PARTY_DIR}/${OPENCV_VERSION}/${ARM_PLAT}/libs/libopencv_imgproc.a \
diff --git a/deploy/lite_shitu/generate_json_config.py b/deploy/lite_shitu/generate_json_config.py
index 1525cdab9..37d06c47e 100644
--- a/deploy/lite_shitu/generate_json_config.py
+++ b/deploy/lite_shitu/generate_json_config.py
@@ -130,6 +130,8 @@ def main():
             y["type"] = k
             config_json["RecPreProcess"]["transform_ops"].append(y)
 
+    # set IndexProces
+    config_json["IndexProcess"] = config_yaml["IndexProcess"]
     with open('shitu_config.json', 'w') as fd:
         json.dump(config_json, fd, indent=4)
 
diff --git a/deploy/lite_shitu/include/recognition.h b/deploy/lite_shitu/include/feature_extractor.h
similarity index 72%
rename from deploy/lite_shitu/include/recognition.h
rename to deploy/lite_shitu/include/feature_extractor.h
index 0c45e946e..1961459ec 100644
--- a/deploy/lite_shitu/include/recognition.h
+++ b/deploy/lite_shitu/include/feature_extractor.h
@@ -36,10 +36,9 @@ struct RESULT {
   float score;
 };
 
-class Recognition {
-
+class FeatureExtract {
 public:
-  explicit Recognition(const Json::Value &config_file) {
+  explicit FeatureExtract(const Json::Value &config_file) {
     MobileConfig config;
     if (config_file["Global"]["rec_model_path"].as<std::string>().empty()) {
       std::cout << "Please set [rec_model_path] in config file" << std::endl;
@@ -53,29 +52,8 @@ public:
       std::cout << "Please set [rec_label_path] in config file" << std::endl;
       exit(-1);
     }
-    LoadLabel(config_file["Global"]["rec_label_path"].as<std::string>());
     SetPreProcessParam(config_file["RecPreProcess"]["transform_ops"]);
-    if (!config_file["Global"].isMember("return_k")){
-      this->topk = config_file["Global"]["return_k"].as<int>();
-    }
-    printf("rec model create!\n");
-  }
-
-  void LoadLabel(std::string path) {
-    std::ifstream file;
-    std::vector<std::string> label_list;
-    file.open(path);
-    while (file) {
-      std::string line;
-      std::getline(file, line);
-      std::string::size_type pos = line.find(" ");
-      if (pos != std::string::npos) {
-        line = line.substr(pos);
-      }
-      this->label_list.push_back(line);
-    }
-    file.clear();
-    file.close();
+    printf("feature extract model create!\n");
   }
 
   void SetPreProcessParam(const Json::Value &config_file) {
@@ -97,19 +75,17 @@ public:
     }
   }
 
-  std::vector<RESULT> RunRecModel(const cv::Mat &img, double &cost_time);
-  std::vector<RESULT> PostProcess(const float *output_data, int output_size,
-                                  cv::Mat &output_image);
+  void RunRecModel(const cv::Mat &img, double &cost_time, std::vector<float> &feature);
+  //void PostProcess(std::vector<float> &feature);
   cv::Mat ResizeImage(const cv::Mat &img);
   void NeonMeanScale(const float *din, float *dout, int size);
 
 private:
   std::shared_ptr<PaddlePredictor> predictor;
-  std::vector<std::string> label_list;
+  //std::vector<std::string> label_list;
   std::vector<float> mean = {0.485f, 0.456f, 0.406f};
   std::vector<float> std = {1 / 0.229f, 1 / 0.224f, 1 / 0.225f};
   double scale = 0.00392157;
   float size = 224;
-  int topk = 5;
 };
 } // namespace PPShiTu
diff --git a/deploy/lite_shitu/include/utils.h b/deploy/lite_shitu/include/utils.h
index 18a04cf34..a3b57c882 100644
--- a/deploy/lite_shitu/include/utils.h
+++ b/deploy/lite_shitu/include/utils.h
@@ -16,7 +16,7 @@
 
 #include <algorithm>
 #include <ctime>
-#include <include/recognition.h>
+#include <include/feature_extractor.h>
 #include <memory>
 #include <numeric>
 #include <string>
diff --git a/deploy/lite_shitu/shitu_config.json b/deploy/lite_shitu/shitu_config.json
new file mode 100644
index 000000000..346f77cae
--- /dev/null
+++ b/deploy/lite_shitu/shitu_config.json
@@ -0,0 +1,101 @@
+{
+    "Global": {
+        "infer_imgs": "images/demo.jpg",
+        "batch_size": 1,
+        "cpu_num_threads": 4,
+        "image_shape": [
+            3,
+            640,
+            640
+        ],
+        "det_model_path": "ppshitu_lite_models_v1.0/mainbody_PPLCNet_x2_5_640_quant_v1.0_lite.nb",
+        "rec_model_path": "ppshitu_lite_models_v1.0/general_PPLCNet_x2_5_lite_v1.0_infer.nb",
+        "rec_label_path": "ppshitu_lite_models_v1.0/label.txt",
+        "label_list": [
+            "foreground"
+        ],
+        "rec_nms_thresold": 0.05,
+        "max_det_results": 5,
+        "det_fpn_stride": [
+            8,
+            16,
+            32,
+            64
+        ],
+        "det_arch": "PicoDet",
+        "return_k": 5
+    },
+    "DetPreProcess": {
+        "transform_ops": [
+            {
+                "interp": 2,
+                "keep_ratio": false,
+                "target_size": [
+                    640,
+                    640
+                ],
+                "type": "DetResize"
+            },
+            {
+                "is_scale": true,
+                "mean": [
+                    0.485,
+                    0.456,
+                    0.406
+                ],
+                "std": [
+                    0.229,
+                    0.224,
+                    0.225
+                ],
+                "type": "DetNormalizeImage"
+            },
+            {
+                "type": "DetPermute"
+            }
+        ]
+    },
+    "DetPostProcess": {
+        "keep_top_k": 100,
+        "name": "MultiClassNMS",
+        "nms_threshold": 0.5,
+        "nms_top_k": 1000,
+        "score_threshold": 0.3
+    },
+    "RecPreProcess": {
+        "transform_ops": [
+            {
+                "size": 224,
+                "type": "ResizeImage"
+            },
+            {
+                "scale": 0.00392157,
+                "mean": [
+                    0.485,
+                    0.456,
+                    0.406
+                ],
+                "std": [
+                    0.229,
+                    0.224,
+                    0.225
+                ],
+                "order": "",
+                "type": "NormalizeImage"
+            }
+        ]
+    },
+    "IndexProcess": {
+        "index_method": "HNSW32",
+        "index_dir": "./drink_dataset_v1.0/index",
+        "image_root": "./drink_dataset_v1.0/gallery",
+        "data_file": "./drink_dataset_v1.0/gallery/drink_label.txt",
+        "index_operation": "new",
+        "delimiter": " ",
+        "dist_type": "IP",
+        "embedding_size": 512,
+        "batch_size": 32,
+        "return_k": 5,
+        "score_thres": 0.4
+    }
+}
\ No newline at end of file
diff --git a/deploy/lite_shitu/src/recognition.cc b/deploy/lite_shitu/src/feature_extractor.cc
similarity index 65%
rename from deploy/lite_shitu/src/recognition.cc
rename to deploy/lite_shitu/src/feature_extractor.cc
index 0e711f386..407ffd9d7 100644
--- a/deploy/lite_shitu/src/recognition.cc
+++ b/deploy/lite_shitu/src/feature_extractor.cc
@@ -12,12 +12,12 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#include "include/recognition.h"
+#include "include/feature_extractor.h"
 
 namespace PPShiTu {
-std::vector<RESULT> Recognition::RunRecModel(const cv::Mat &img,
-                                             double &cost_time) {
-
+void FeatureExtract::RunRecModel(const cv::Mat &img,
+                                 double &cost_time,
+                                 std::vector<float> &feature) {
   // Read img
   cv::Mat resize_image = ResizeImage(img);
 
@@ -38,8 +38,7 @@ std::vector<RESULT> Recognition::RunRecModel(const cv::Mat &img,
 
   // Get output and post process
   std::unique_ptr<const Tensor> output_tensor(
-      std::move(this->predictor->GetOutput(1)));
-  auto *output_data = output_tensor->data<float>();
+      std::move(this->predictor->GetOutput(0)));  //only one output
   auto end = std::chrono::system_clock::now();
   auto duration =
       std::chrono::duration_cast<std::chrono::microseconds>(end - start);
@@ -47,17 +46,28 @@ std::vector<RESULT> Recognition::RunRecModel(const cv::Mat &img,
               std::chrono::microseconds::period::num /
               std::chrono::microseconds::period::den;
 
+  //do postprocess
   int output_size = 1;
   for (auto dim : output_tensor->shape()) {
     output_size *= dim;
   }
+  std::cout << "output len is:  " << output_size << std::endl;
+  feature.resize(output_size);
+  output_tensor->CopyToCpu(feature.data());
 
-  cv::Mat output_image;
-  auto results = PostProcess(output_data, output_size, output_image);
-  return results;
+  //postprocess include sqrt or binarize.
+  //PostProcess(feature);
+  return;
 }
 
-void Recognition::NeonMeanScale(const float *din, float *dout, int size) {
+// void FeatureExtract::PostProcess(std::vector<float> &feature){
+//     float feature_sqrt = std::sqrt(std::inner_product(
+//             feature.begin(), feature.end(), feature.begin(), 0.0f));
+//     for (int i = 0; i < feature.size(); ++i)
+//         feature[i] /= feature_sqrt;
+// }
+
+void FeatureExtract::NeonMeanScale(const float *din, float *dout, int size) {
 
   if (this->mean.size() != 3 || this->std.size() != 3) {
     std::cerr << "[ERROR] mean or scale size must equal to 3\n";
@@ -99,45 +109,9 @@ void Recognition::NeonMeanScale(const float *din, float *dout, int size) {
   }
 }
 
-cv::Mat Recognition::ResizeImage(const cv::Mat &img) {
+cv::Mat FeatureExtract::ResizeImage(const cv::Mat &img) {
   cv::Mat resize_img;
   cv::resize(img, resize_img, cv::Size(this->size, this->size));
   return resize_img;
 }
-std::vector<RESULT> Recognition::PostProcess(const float *output_data,
-                                             int output_size,
-                                             cv::Mat &output_image) {
-
-  int max_indices[this->topk];
-  double max_scores[this->topk];
-  for (int i = 0; i < this->topk; i++) {
-    max_indices[i] = 0;
-    max_scores[i] = 0;
-  }
-  for (int i = 0; i < output_size; i++) {
-    float score = output_data[i];
-    int index = i;
-    for (int j = 0; j < this->topk; j++) {
-      if (score > max_scores[j]) {
-        index += max_indices[j];
-        max_indices[j] = index - max_indices[j];
-        index -= max_indices[j];
-        score += max_scores[j];
-        max_scores[j] = score - max_scores[j];
-        score -= max_scores[j];
-      }
-    }
-  }
-
-  std::vector<RESULT> results(this->topk);
-  for (int i = 0; i < results.size(); i++) {
-    results[i].class_name = "Unknown";
-    if (max_indices[i] >= 0 && max_indices[i] < this->label_list.size()) {
-      results[i].class_name = this->label_list[max_indices[i]];
-    }
-    results[i].score = max_scores[i];
-    results[i].class_id = max_indices[i];
-  }
-  return results;
-}
 }
diff --git a/deploy/lite_shitu/src/main.cc b/deploy/lite_shitu/src/main.cc
index 690162a49..1223a7053 100644
--- a/deploy/lite_shitu/src/main.cc
+++ b/deploy/lite_shitu/src/main.cc
@@ -26,7 +26,7 @@
 #include "include/config_parser.h"
 #include "include/object_detector.h"
 #include "include/preprocess_op.h"
-#include "include/recognition.h"
+#include "include/feature_extractor.h"
 #include "json/json.h"
 
 Json::Value RT_Config;
@@ -159,11 +159,15 @@ int main(int argc, char **argv) {
       RT_Config["Global"]["cpu_num_threads"].as<int>(),
       RT_Config["Global"]["batch_size"].as<int>());
   // create rec model
-  PPShiTu::Recognition rec(RT_Config);
+  PPShiTu::FeatureExtract rec(RT_Config);
   // Do inference on input image
 
   std::vector<PPShiTu::ObjectResult> det_result;
   std::vector<cv::Mat> batch_imgs;
+
+  //for vector search
+  std::vector<float> feature;
+  std::vector<float> features;
   double rec_time;
   if (!RT_Config["Global"]["infer_imgs"].as<std::string>().empty() ||
       !img_dir.empty()) {
@@ -209,14 +213,15 @@ int main(int argc, char **argv) {
         int h = det_result[j].rect[3] - det_result[j].rect[1];
         cv::Rect rect(det_result[j].rect[0], det_result[j].rect[1], w, h);
         cv::Mat crop_img = srcimg(rect);
-        std::vector<PPShiTu::RESULT> result =
-            rec.RunRecModel(crop_img, rec_time);
-        det_result[j].rec_result.assign(result.begin(), result.end());
+        rec.RunRecModel(crop_img, rec_time, feature);
+        features.insert(features.end(), feature.begin(), feature.end());
       }
+
+      std::cout << "feature len is:  " << features.size() << std::endl;
       // rec nms
-      PPShiTu::nms(det_result,
-                   RT_Config["Global"]["rec_nms_thresold"].as<float>(), true);
-      PrintResult(img_path, det_result);
+      // PPShiTu::nms(det_result,
+      //              RT_Config["Global"]["rec_nms_thresold"].as<float>(), true);
+      // PrintResult(img_path, det_result);
       batch_imgs.clear();
       det_result.clear();
     }