do feature extraction

2025-06-03 21:55:06 +08:00 · 2022-03-08 06:23:21 +00:00 · 2022-03-08 06:23:21 +00:00 · 47c3b093a2
commit 47c3b093a2
parent 8d52387a83
7 changed files with 146 additions and 90 deletions
--- a/deploy/lite_shitu/Makefile
+++ b/deploy/lite_shitu/Makefile
@ -9,15 +9,13 @@ endif
 ${info ARM_ABI: ${ARM_ABI}}
 ${info ARM_PLAT: ${ARM_PLAT}; option[arm7/arm8]}

-include ../Makefile.def
-
-LITE_ROOT=../../../
+LITE_ROOT=libs/inference_lite_lib.android.armv8
+include ${LITE_ROOT}/demo/cxx/Makefile.def
 ${info LITE_ROOT: $(abspath ${LITE_ROOT})}

 THIRD_PARTY_DIR=third_party
 ${info THIRD_PARTY_DIR: $(abspath ${THIRD_PARTY_DIR})}

-
 OPENCV_VERSION=opencv4.1.0
 OPENCV_LIBS = ${THIRD_PARTY_DIR}/${OPENCV_VERSION}/${ARM_PLAT}/libs/libopencv_imgcodecs.a \
              ${THIRD_PARTY_DIR}/${OPENCV_VERSION}/${ARM_PLAT}/libs/libopencv_imgproc.a \
--- a/deploy/lite_shitu/generate_json_config.py
+++ b/deploy/lite_shitu/generate_json_config.py
@ -130,6 +130,8 @@ def main():
            y["type"] = k
            config_json["RecPreProcess"]["transform_ops"].append(y)

+    # set IndexProces
+    config_json["IndexProcess"] = config_yaml["IndexProcess"]
    with open('shitu_config.json', 'w') as fd:
        json.dump(config_json, fd, indent=4)

--- a/deploy/lite_shitu/include/feature_extractor.h
+++ b/deploy/lite_shitu/include/feature_extractor.h
@ -36,10 +36,9 @@ struct RESULT {
  float score;
 };

-class Recognition {
-
+class FeatureExtract {
 public:
-  explicit Recognition(const Json::Value &config_file) {
+  explicit FeatureExtract(const Json::Value &config_file) {
    MobileConfig config;
    if (config_file["Global"]["rec_model_path"].as<std::string>().empty()) {
      std::cout << "Please set [rec_model_path] in config file" << std::endl;
@ -53,29 +52,8 @@ public:
      std::cout << "Please set [rec_label_path] in config file" << std::endl;
      exit(-1);
    }
-    LoadLabel(config_file["Global"]["rec_label_path"].as<std::string>());
    SetPreProcessParam(config_file["RecPreProcess"]["transform_ops"]);
-    if (!config_file["Global"].isMember("return_k")){
-      this->topk = config_file["Global"]["return_k"].as<int>();
-    }
-    printf("rec model create!\n");
-  }
-
-  void LoadLabel(std::string path) {
-    std::ifstream file;
-    std::vector<std::string> label_list;
-    file.open(path);
-    while (file) {
-      std::string line;
-      std::getline(file, line);
-      std::string::size_type pos = line.find(" ");
-      if (pos != std::string::npos) {
-        line = line.substr(pos);
-      }
-      this->label_list.push_back(line);
-    }
-    file.clear();
-    file.close();
+    printf("feature extract model create!\n");
  }

  void SetPreProcessParam(const Json::Value &config_file) {
@ -97,19 +75,17 @@ public:
    }
  }

-  std::vector<RESULT> RunRecModel(const cv::Mat &img, double &cost_time);
-  std::vector<RESULT> PostProcess(const float *output_data, int output_size,
-                                  cv::Mat &output_image);
+  void RunRecModel(const cv::Mat &img, double &cost_time, std::vector<float> &feature);
+  //void PostProcess(std::vector<float> &feature);
  cv::Mat ResizeImage(const cv::Mat &img);
  void NeonMeanScale(const float *din, float *dout, int size);

 private:
  std::shared_ptr<PaddlePredictor> predictor;
-  std::vector<std::string> label_list;
+  //std::vector<std::string> label_list;
  std::vector<float> mean = {0.485f, 0.456f, 0.406f};
  std::vector<float> std = {1 / 0.229f, 1 / 0.224f, 1 / 0.225f};
  double scale = 0.00392157;
  float size = 224;
-  int topk = 5;
 };
 } // namespace PPShiTu
--- a/deploy/lite_shitu/include/utils.h
+++ b/deploy/lite_shitu/include/utils.h
@ -16,7 +16,7 @@

 #include <algorithm>
 #include <ctime>
-#include <include/recognition.h>
+#include <include/feature_extractor.h>
 #include <memory>
 #include <numeric>
 #include <string>
--- a/deploy/lite_shitu/shitu_config.json
+++ b/deploy/lite_shitu/shitu_config.json
@ -0,0 +1,101 @@
+{
+    "Global": {
+        "infer_imgs": "images/demo.jpg",
+        "batch_size": 1,
+        "cpu_num_threads": 4,
+        "image_shape": [
+            3,
+            640,
+            640
+        ],
+        "det_model_path": "ppshitu_lite_models_v1.0/mainbody_PPLCNet_x2_5_640_quant_v1.0_lite.nb",
+        "rec_model_path": "ppshitu_lite_models_v1.0/general_PPLCNet_x2_5_lite_v1.0_infer.nb",
+        "rec_label_path": "ppshitu_lite_models_v1.0/label.txt",
+        "label_list": [
+            "foreground"
+        ],
+        "rec_nms_thresold": 0.05,
+        "max_det_results": 5,
+        "det_fpn_stride": [
+            8,
+            16,
+            32,
+            64
+        ],
+        "det_arch": "PicoDet",
+        "return_k": 5
+    },
+    "DetPreProcess": {
+        "transform_ops": [
+            {
+                "interp": 2,
+                "keep_ratio": false,
+                "target_size": [
+                    640,
+                    640
+                ],
+                "type": "DetResize"
+            },
+            {
+                "is_scale": true,
+                "mean": [
+                    0.485,
+                    0.456,
+                    0.406
+                ],
+                "std": [
+                    0.229,
+                    0.224,
+                    0.225
+                ],
+                "type": "DetNormalizeImage"
+            },
+            {
+                "type": "DetPermute"
+            }
+        ]
+    },
+    "DetPostProcess": {
+        "keep_top_k": 100,
+        "name": "MultiClassNMS",
+        "nms_threshold": 0.5,
+        "nms_top_k": 1000,
+        "score_threshold": 0.3
+    },
+    "RecPreProcess": {
+        "transform_ops": [
+            {
+                "size": 224,
+                "type": "ResizeImage"
+            },
+            {
+                "scale": 0.00392157,
+                "mean": [
+                    0.485,
+                    0.456,
+                    0.406
+                ],
+                "std": [
+                    0.229,
+                    0.224,
+                    0.225
+                ],
+                "order": "",
+                "type": "NormalizeImage"
+            }
+        ]
+    },
+    "IndexProcess": {
+        "index_method": "HNSW32",
+        "index_dir": "./drink_dataset_v1.0/index",
+        "image_root": "./drink_dataset_v1.0/gallery",
+        "data_file": "./drink_dataset_v1.0/gallery/drink_label.txt",
+        "index_operation": "new",
+        "delimiter": " ",
+        "dist_type": "IP",
+        "embedding_size": 512,
+        "batch_size": 32,
+        "return_k": 5,
+        "score_thres": 0.4
+    }
+}
--- a/deploy/lite_shitu/src/feature_extractor.cc
+++ b/deploy/lite_shitu/src/feature_extractor.cc
@ -12,12 +12,12 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-#include "include/recognition.h"
+#include "include/feature_extractor.h"

 namespace PPShiTu {
-std::vector<RESULT> Recognition::RunRecModel(const cv::Mat &img,
-                                             double &cost_time) {
-
+void FeatureExtract::RunRecModel(const cv::Mat &img,
+                                 double &cost_time,
+                                 std::vector<float> &feature) {
  // Read img
  cv::Mat resize_image = ResizeImage(img);

@ -38,8 +38,7 @@ std::vector<RESULT> Recognition::RunRecModel(const cv::Mat &img,

  // Get output and post process
  std::unique_ptr<const Tensor> output_tensor(
-      std::move(this->predictor->GetOutput(1)));
-  auto *output_data = output_tensor->data<float>();
+      std::move(this->predictor->GetOutput(0)));  //only one output
  auto end = std::chrono::system_clock::now();
  auto duration =
      std::chrono::duration_cast<std::chrono::microseconds>(end - start);
@ -47,17 +46,28 @@ std::vector<RESULT> Recognition::RunRecModel(const cv::Mat &img,
              std::chrono::microseconds::period::num /
              std::chrono::microseconds::period::den;

+  //do postprocess
  int output_size = 1;
  for (auto dim : output_tensor->shape()) {
    output_size *= dim;
  }
+  std::cout << "output len is:  " << output_size << std::endl;
+  feature.resize(output_size);
+  output_tensor->CopyToCpu(feature.data());

-  cv::Mat output_image;
-  auto results = PostProcess(output_data, output_size, output_image);
-  return results;
+  //postprocess include sqrt or binarize.
+  //PostProcess(feature);
+  return;
 }

-void Recognition::NeonMeanScale(const float *din, float *dout, int size) {
+// void FeatureExtract::PostProcess(std::vector<float> &feature){
+//     float feature_sqrt = std::sqrt(std::inner_product(
+//             feature.begin(), feature.end(), feature.begin(), 0.0f));
+//     for (int i = 0; i < feature.size(); ++i)
+//         feature[i] /= feature_sqrt;
+// }
+
+void FeatureExtract::NeonMeanScale(const float *din, float *dout, int size) {

  if (this->mean.size() != 3 || this->std.size() != 3) {
    std::cerr << "[ERROR] mean or scale size must equal to 3\n";
@ -99,45 +109,9 @@ void Recognition::NeonMeanScale(const float *din, float *dout, int size) {
  }
 }

-cv::Mat Recognition::ResizeImage(const cv::Mat &img) {
+cv::Mat FeatureExtract::ResizeImage(const cv::Mat &img) {
  cv::Mat resize_img;
  cv::resize(img, resize_img, cv::Size(this->size, this->size));
  return resize_img;
 }
-std::vector<RESULT> Recognition::PostProcess(const float *output_data,
-                                             int output_size,
-                                             cv::Mat &output_image) {
-
-  int max_indices[this->topk];
-  double max_scores[this->topk];
-  for (int i = 0; i < this->topk; i++) {
-    max_indices[i] = 0;
-    max_scores[i] = 0;
-  }
-  for (int i = 0; i < output_size; i++) {
-    float score = output_data[i];
-    int index = i;
-    for (int j = 0; j < this->topk; j++) {
-      if (score > max_scores[j]) {
-        index += max_indices[j];
-        max_indices[j] = index - max_indices[j];
-        index -= max_indices[j];
-        score += max_scores[j];
-        max_scores[j] = score - max_scores[j];
-        score -= max_scores[j];
-      }
-    }
-  }
-
-  std::vector<RESULT> results(this->topk);
-  for (int i = 0; i < results.size(); i++) {
-    results[i].class_name = "Unknown";
-    if (max_indices[i] >= 0 && max_indices[i] < this->label_list.size()) {
-      results[i].class_name = this->label_list[max_indices[i]];
-    }
-    results[i].score = max_scores[i];
-    results[i].class_id = max_indices[i];
-  }
-  return results;
-}
 }
--- a/deploy/lite_shitu/src/main.cc
+++ b/deploy/lite_shitu/src/main.cc
@ -26,7 +26,7 @@
 #include "include/config_parser.h"
 #include "include/object_detector.h"
 #include "include/preprocess_op.h"
-#include "include/recognition.h"
+#include "include/feature_extractor.h"
 #include "json/json.h"

 Json::Value RT_Config;
@ -159,11 +159,15 @@ int main(int argc, char **argv) {
      RT_Config["Global"]["cpu_num_threads"].as<int>(),
      RT_Config["Global"]["batch_size"].as<int>());
  // create rec model
-  PPShiTu::Recognition rec(RT_Config);
+  PPShiTu::FeatureExtract rec(RT_Config);
  // Do inference on input image

  std::vector<PPShiTu::ObjectResult> det_result;
  std::vector<cv::Mat> batch_imgs;
+
+  //for vector search
+  std::vector<float> feature;
+  std::vector<float> features;
  double rec_time;
  if (!RT_Config["Global"]["infer_imgs"].as<std::string>().empty() ||
      !img_dir.empty()) {
@ -209,14 +213,15 @@ int main(int argc, char **argv) {
        int h = det_result[j].rect[3] - det_result[j].rect[1];
        cv::Rect rect(det_result[j].rect[0], det_result[j].rect[1], w, h);
        cv::Mat crop_img = srcimg(rect);
-        std::vector<PPShiTu::RESULT> result =
-            rec.RunRecModel(crop_img, rec_time);
-        det_result[j].rec_result.assign(result.begin(), result.end());
+        rec.RunRecModel(crop_img, rec_time, feature);
+        features.insert(features.end(), feature.begin(), feature.end());
      }
+
+      std::cout << "feature len is:  " << features.size() << std::endl;
      // rec nms
-      PPShiTu::nms(det_result,
-                   RT_Config["Global"]["rec_nms_thresold"].as<float>(), true);
-      PrintResult(img_path, det_result);
+      // PPShiTu::nms(det_result,
+      //              RT_Config["Global"]["rec_nms_thresold"].as<float>(), true);
+      // PrintResult(img_path, det_result);
      batch_imgs.clear();
      det_result.clear();
    }