do feature extraction

This commit is contained in:
lubin 2022-03-08 06:23:21 +00:00
parent 8d52387a83
commit 47c3b093a2
7 changed files with 146 additions and 90 deletions

View File

@ -9,15 +9,13 @@ endif
${info ARM_ABI: ${ARM_ABI}}
${info ARM_PLAT: ${ARM_PLAT}; option[arm7/arm8]}
include ../Makefile.def
LITE_ROOT=../../../
LITE_ROOT=libs/inference_lite_lib.android.armv8
include ${LITE_ROOT}/demo/cxx/Makefile.def
${info LITE_ROOT: $(abspath ${LITE_ROOT})}
THIRD_PARTY_DIR=third_party
${info THIRD_PARTY_DIR: $(abspath ${THIRD_PARTY_DIR})}
OPENCV_VERSION=opencv4.1.0
OPENCV_LIBS = ${THIRD_PARTY_DIR}/${OPENCV_VERSION}/${ARM_PLAT}/libs/libopencv_imgcodecs.a \
${THIRD_PARTY_DIR}/${OPENCV_VERSION}/${ARM_PLAT}/libs/libopencv_imgproc.a \

View File

@ -130,6 +130,8 @@ def main():
y["type"] = k
config_json["RecPreProcess"]["transform_ops"].append(y)
# set IndexProces
config_json["IndexProcess"] = config_yaml["IndexProcess"]
with open('shitu_config.json', 'w') as fd:
json.dump(config_json, fd, indent=4)

View File

@ -36,10 +36,9 @@ struct RESULT {
float score;
};
class Recognition {
class FeatureExtract {
public:
explicit Recognition(const Json::Value &config_file) {
explicit FeatureExtract(const Json::Value &config_file) {
MobileConfig config;
if (config_file["Global"]["rec_model_path"].as<std::string>().empty()) {
std::cout << "Please set [rec_model_path] in config file" << std::endl;
@ -53,29 +52,8 @@ public:
std::cout << "Please set [rec_label_path] in config file" << std::endl;
exit(-1);
}
LoadLabel(config_file["Global"]["rec_label_path"].as<std::string>());
SetPreProcessParam(config_file["RecPreProcess"]["transform_ops"]);
if (!config_file["Global"].isMember("return_k")){
this->topk = config_file["Global"]["return_k"].as<int>();
}
printf("rec model create!\n");
}
void LoadLabel(std::string path) {
std::ifstream file;
std::vector<std::string> label_list;
file.open(path);
while (file) {
std::string line;
std::getline(file, line);
std::string::size_type pos = line.find(" ");
if (pos != std::string::npos) {
line = line.substr(pos);
}
this->label_list.push_back(line);
}
file.clear();
file.close();
printf("feature extract model create!\n");
}
void SetPreProcessParam(const Json::Value &config_file) {
@ -97,19 +75,17 @@ public:
}
}
std::vector<RESULT> RunRecModel(const cv::Mat &img, double &cost_time);
std::vector<RESULT> PostProcess(const float *output_data, int output_size,
cv::Mat &output_image);
void RunRecModel(const cv::Mat &img, double &cost_time, std::vector<float> &feature);
//void PostProcess(std::vector<float> &feature);
cv::Mat ResizeImage(const cv::Mat &img);
void NeonMeanScale(const float *din, float *dout, int size);
private:
std::shared_ptr<PaddlePredictor> predictor;
std::vector<std::string> label_list;
//std::vector<std::string> label_list;
std::vector<float> mean = {0.485f, 0.456f, 0.406f};
std::vector<float> std = {1 / 0.229f, 1 / 0.224f, 1 / 0.225f};
double scale = 0.00392157;
float size = 224;
int topk = 5;
};
} // namespace PPShiTu

View File

@ -16,7 +16,7 @@
#include <algorithm>
#include <ctime>
#include <include/recognition.h>
#include <include/feature_extractor.h>
#include <memory>
#include <numeric>
#include <string>

View File

@ -0,0 +1,101 @@
{
"Global": {
"infer_imgs": "images/demo.jpg",
"batch_size": 1,
"cpu_num_threads": 4,
"image_shape": [
3,
640,
640
],
"det_model_path": "ppshitu_lite_models_v1.0/mainbody_PPLCNet_x2_5_640_quant_v1.0_lite.nb",
"rec_model_path": "ppshitu_lite_models_v1.0/general_PPLCNet_x2_5_lite_v1.0_infer.nb",
"rec_label_path": "ppshitu_lite_models_v1.0/label.txt",
"label_list": [
"foreground"
],
"rec_nms_thresold": 0.05,
"max_det_results": 5,
"det_fpn_stride": [
8,
16,
32,
64
],
"det_arch": "PicoDet",
"return_k": 5
},
"DetPreProcess": {
"transform_ops": [
{
"interp": 2,
"keep_ratio": false,
"target_size": [
640,
640
],
"type": "DetResize"
},
{
"is_scale": true,
"mean": [
0.485,
0.456,
0.406
],
"std": [
0.229,
0.224,
0.225
],
"type": "DetNormalizeImage"
},
{
"type": "DetPermute"
}
]
},
"DetPostProcess": {
"keep_top_k": 100,
"name": "MultiClassNMS",
"nms_threshold": 0.5,
"nms_top_k": 1000,
"score_threshold": 0.3
},
"RecPreProcess": {
"transform_ops": [
{
"size": 224,
"type": "ResizeImage"
},
{
"scale": 0.00392157,
"mean": [
0.485,
0.456,
0.406
],
"std": [
0.229,
0.224,
0.225
],
"order": "",
"type": "NormalizeImage"
}
]
},
"IndexProcess": {
"index_method": "HNSW32",
"index_dir": "./drink_dataset_v1.0/index",
"image_root": "./drink_dataset_v1.0/gallery",
"data_file": "./drink_dataset_v1.0/gallery/drink_label.txt",
"index_operation": "new",
"delimiter": " ",
"dist_type": "IP",
"embedding_size": 512,
"batch_size": 32,
"return_k": 5,
"score_thres": 0.4
}
}

View File

@ -12,12 +12,12 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include "include/recognition.h"
#include "include/feature_extractor.h"
namespace PPShiTu {
std::vector<RESULT> Recognition::RunRecModel(const cv::Mat &img,
double &cost_time) {
void FeatureExtract::RunRecModel(const cv::Mat &img,
double &cost_time,
std::vector<float> &feature) {
// Read img
cv::Mat resize_image = ResizeImage(img);
@ -38,8 +38,7 @@ std::vector<RESULT> Recognition::RunRecModel(const cv::Mat &img,
// Get output and post process
std::unique_ptr<const Tensor> output_tensor(
std::move(this->predictor->GetOutput(1)));
auto *output_data = output_tensor->data<float>();
std::move(this->predictor->GetOutput(0))); //only one output
auto end = std::chrono::system_clock::now();
auto duration =
std::chrono::duration_cast<std::chrono::microseconds>(end - start);
@ -47,17 +46,28 @@ std::vector<RESULT> Recognition::RunRecModel(const cv::Mat &img,
std::chrono::microseconds::period::num /
std::chrono::microseconds::period::den;
//do postprocess
int output_size = 1;
for (auto dim : output_tensor->shape()) {
output_size *= dim;
}
std::cout << "output len is: " << output_size << std::endl;
feature.resize(output_size);
output_tensor->CopyToCpu(feature.data());
cv::Mat output_image;
auto results = PostProcess(output_data, output_size, output_image);
return results;
//postprocess include sqrt or binarize.
//PostProcess(feature);
return;
}
void Recognition::NeonMeanScale(const float *din, float *dout, int size) {
// void FeatureExtract::PostProcess(std::vector<float> &feature){
// float feature_sqrt = std::sqrt(std::inner_product(
// feature.begin(), feature.end(), feature.begin(), 0.0f));
// for (int i = 0; i < feature.size(); ++i)
// feature[i] /= feature_sqrt;
// }
void FeatureExtract::NeonMeanScale(const float *din, float *dout, int size) {
if (this->mean.size() != 3 || this->std.size() != 3) {
std::cerr << "[ERROR] mean or scale size must equal to 3\n";
@ -99,45 +109,9 @@ void Recognition::NeonMeanScale(const float *din, float *dout, int size) {
}
}
cv::Mat Recognition::ResizeImage(const cv::Mat &img) {
cv::Mat FeatureExtract::ResizeImage(const cv::Mat &img) {
cv::Mat resize_img;
cv::resize(img, resize_img, cv::Size(this->size, this->size));
return resize_img;
}
std::vector<RESULT> Recognition::PostProcess(const float *output_data,
int output_size,
cv::Mat &output_image) {
int max_indices[this->topk];
double max_scores[this->topk];
for (int i = 0; i < this->topk; i++) {
max_indices[i] = 0;
max_scores[i] = 0;
}
for (int i = 0; i < output_size; i++) {
float score = output_data[i];
int index = i;
for (int j = 0; j < this->topk; j++) {
if (score > max_scores[j]) {
index += max_indices[j];
max_indices[j] = index - max_indices[j];
index -= max_indices[j];
score += max_scores[j];
max_scores[j] = score - max_scores[j];
score -= max_scores[j];
}
}
}
std::vector<RESULT> results(this->topk);
for (int i = 0; i < results.size(); i++) {
results[i].class_name = "Unknown";
if (max_indices[i] >= 0 && max_indices[i] < this->label_list.size()) {
results[i].class_name = this->label_list[max_indices[i]];
}
results[i].score = max_scores[i];
results[i].class_id = max_indices[i];
}
return results;
}
}

View File

@ -26,7 +26,7 @@
#include "include/config_parser.h"
#include "include/object_detector.h"
#include "include/preprocess_op.h"
#include "include/recognition.h"
#include "include/feature_extractor.h"
#include "json/json.h"
Json::Value RT_Config;
@ -159,11 +159,15 @@ int main(int argc, char **argv) {
RT_Config["Global"]["cpu_num_threads"].as<int>(),
RT_Config["Global"]["batch_size"].as<int>());
// create rec model
PPShiTu::Recognition rec(RT_Config);
PPShiTu::FeatureExtract rec(RT_Config);
// Do inference on input image
std::vector<PPShiTu::ObjectResult> det_result;
std::vector<cv::Mat> batch_imgs;
//for vector search
std::vector<float> feature;
std::vector<float> features;
double rec_time;
if (!RT_Config["Global"]["infer_imgs"].as<std::string>().empty() ||
!img_dir.empty()) {
@ -209,14 +213,15 @@ int main(int argc, char **argv) {
int h = det_result[j].rect[3] - det_result[j].rect[1];
cv::Rect rect(det_result[j].rect[0], det_result[j].rect[1], w, h);
cv::Mat crop_img = srcimg(rect);
std::vector<PPShiTu::RESULT> result =
rec.RunRecModel(crop_img, rec_time);
det_result[j].rec_result.assign(result.begin(), result.end());
rec.RunRecModel(crop_img, rec_time, feature);
features.insert(features.end(), feature.begin(), feature.end());
}
std::cout << "feature len is: " << features.size() << std::endl;
// rec nms
PPShiTu::nms(det_result,
RT_Config["Global"]["rec_nms_thresold"].as<float>(), true);
PrintResult(img_path, det_result);
// PPShiTu::nms(det_result,
// RT_Config["Global"]["rec_nms_thresold"].as<float>(), true);
// PrintResult(img_path, det_result);
batch_imgs.clear();
det_result.clear();
}