mmdeploy/csrc/codebase/mmocr/pixel_group.cpp
lzhangzz 5e51739485
[Feature] Support DBNet, PANet and PSENet for SDK, with GPU aided post-processing (#526)
* add SDK support for PANet

* fix panet

* fix panet

* simplify panet

* add PSENet support

* fix-psenet

* add CUDA post-processing for DBNet

* fix dbnet

* fix dbnet

* add cpu support for PANet

* fix panet

* add CUDA support for PANet

* fix panet

* format

* add cpu impls for PSENet

* fix psenet

* add cuda impl for PSENet

* fix psenet

* add param parsing

* simplify impls

* simplify impls

* clean-up

* fix lint

* fix cuda-10 build

* fix cuda-10 build
2022-05-31 21:24:09 +08:00

126 lines
4.7 KiB
C++

// Copyright (c) OpenMMLab. All rights reserved.
// Modified from https://github.com/WenmuZhou/PAN.pytorch
// and
// https://github.com/open-mmlab/mmcv/blob/master/mmcv/ops/csrc/pytorch/cpu/pixel_group.cpp
#include <cmath>
#include <queue>
#include <vector>
#include "core/tensor.h"
#include "opencv2/opencv.hpp"
namespace mmdeploy::mmocr {
std::vector<std::vector<float>> estimate_confidence(const int32_t* label, const float* score,
int label_num, int height, int width) {
std::vector<std::vector<float>> point_vector;
for (int i = 0; i < label_num; i++) {
std::vector<float> point;
point.push_back(0);
point.push_back(0);
point_vector.push_back(point);
}
for (int y = 0; y < height; y++) {
auto label_tmp = label + y * width;
auto score_tmp = score + y * width;
for (int x = 0; x < width; x++) {
auto l = label_tmp[x];
if (l > 0) {
float confidence = score_tmp[x];
point_vector[l].push_back(x);
point_vector[l].push_back(y);
point_vector[l][0] += confidence;
point_vector[l][1] += 1;
}
}
}
for (size_t l = 0; l < point_vector.size(); l++)
if (point_vector[l][1] > 0) {
point_vector[l][0] /= point_vector[l][1];
}
return point_vector;
}
std::vector<std::vector<float>> pixel_group_cpu(const cv::Mat_<float>& score,
const cv::Mat_<uint8_t>& mask,
const cv::Mat_<float>& embedding,
const cv::Mat_<int32_t>& kernel_label,
const cv::Mat_<uint8_t>& kernel_contour,
int kernel_region_num, float dis_threshold) {
int height = score.rows;
int width = score.cols;
assert(embedding.rows == height * width);
assert(height == mask.rows);
assert(width == mask.cols);
auto threshold_square = dis_threshold * dis_threshold;
auto ptr_score = score.ptr<float>();
auto ptr_mask = mask.ptr<uint8_t>();
auto ptr_kernel_contour = kernel_contour.ptr<uint8_t>();
auto ptr_embedding = embedding.ptr<float>();
auto ptr_kernel_label = kernel_label.ptr<int32_t>();
std::queue<std::tuple<int, int, int32_t>> contour_pixels;
auto embedding_dim = embedding.cols;
std::vector<std::vector<float>> kernel_vector(kernel_region_num,
std::vector<float>(embedding_dim + 1, 0));
cv::Mat_<int32_t> text_label = kernel_label.clone();
auto ptr_text_label = text_label.ptr<int32_t>();
for (int i = 0; i < height; i++) {
auto ptr_embedding_tmp = ptr_embedding + i * width * embedding_dim;
auto ptr_kernel_label_tmp = ptr_kernel_label + i * width;
auto ptr_kernel_contour_tmp = ptr_kernel_contour + i * width;
for (int j = 0, k = 0; j < width && k < width * embedding_dim; j++, k += embedding_dim) {
int32_t label = ptr_kernel_label_tmp[j];
if (label > 0) {
for (int d = 0; d < embedding_dim; d++) kernel_vector[label][d] += ptr_embedding_tmp[k + d];
kernel_vector[label][embedding_dim] += 1;
// kernel pixel number
if (ptr_kernel_contour_tmp[j]) {
contour_pixels.push(std::make_tuple(i, j, label));
}
}
}
}
for (int i = 0; i < kernel_region_num; i++) {
for (int j = 0; j < embedding_dim; j++) {
kernel_vector[i][j] /= kernel_vector[i][embedding_dim];
}
}
int dx[4] = {-1, 1, 0, 0};
int dy[4] = {0, 0, -1, 1};
while (!contour_pixels.empty()) {
auto query_pixel = contour_pixels.front();
contour_pixels.pop();
int y = std::get<0>(query_pixel);
int x = std::get<1>(query_pixel);
int32_t l = std::get<2>(query_pixel);
auto kernel_cv = kernel_vector[l];
for (int idx = 0; idx < 4; idx++) {
int tmpy = y + dy[idx];
int tmpx = x + dx[idx];
auto ptr_text_label_tmp = ptr_text_label + tmpy * width;
if (tmpy < 0 || tmpy >= height || tmpx < 0 || tmpx >= width) continue;
if (!ptr_mask[tmpy * width + tmpx] || ptr_text_label_tmp[tmpx] > 0) continue;
float dis = 0;
auto ptr_embedding_tmp = ptr_embedding + tmpy * width * embedding_dim;
for (size_t i = 0; i < embedding_dim; i++) {
dis += std::pow(kernel_cv[i] - ptr_embedding_tmp[tmpx * embedding_dim + i], 2);
// ignore further computing if dis is big enough
if (dis >= threshold_square) break;
}
if (dis >= threshold_square) continue;
contour_pixels.push(std::make_tuple(tmpy, tmpx, l));
ptr_text_label_tmp[tmpx] = l;
}
}
return estimate_confidence(ptr_text_label, ptr_score, kernel_region_num, height, width);
}
} // namespace mmdeploy::mmocr