mirror of
https://github.com/open-mmlab/mmdeploy.git
synced 2025-01-14 08:09:43 +08:00
* minor changes * support windows * fix GCC build * fix lint * reformat * fix Windows build * fix GCC build * search backend ops for onnxruntime * fix lint * fix lint * code clean-up * code clean-up * fix clang build * fix trt support * fix cmake for ncnn * fix cmake for openvino * fix SDK Python API * handle ops for other backends (ncnn, trt) * handle SDK Python API library location * robustify linkage * fix cuda * minor fix for openvino & ncnn * use CMAKE_CUDA_ARCHITECTURES if set * fix cuda preprocessor * fix misc * fix pplnn & pplcv, drop support for pplcv<0.6.0 * robustify cmake * update build.md (#2) * build dynamic modules as module library & fix demo (partially) * fix candidate path for mmdeploy_python * move "enable CUDA" to cmake config for demo * refine demo cmake * add comment * fix ubuntu build * revert docs/en/build.md * fix C API * fix lint * Windows build doc (#3) * check in docs related to mmdeploy build on windows * update build guide on windows platform * update build guide on windows platform * make path of thirdparty libraries consistent * make path consistency * correct build command for custom ops * correct build command for sdk * update sdk build instructions * update doc * correct build command * fix lint * correct build command and fix lint Co-authored-by: lvhan <lvhan@pjlab.org> * trailing whitespace (#4) * minor fix * fix sr sdk model * fix type deduction * fix cudaFree after driver shutting down * update ppl.cv installation warning (#5) * fix device allocator threshold & fix lint * update doc (#6) * update ppl.cv installation warning * missing 'git clone' Co-authored-by: chenxin <chenxin2@sensetime.com> Co-authored-by: zhangli <zhangli@sensetime.com> Co-authored-by: lvhan028 <lvhan_028@163.com> Co-authored-by: lvhan <lvhan@pjlab.org>
183 lines
5.6 KiB
C++
183 lines
5.6 KiB
C++
// Copyright (c) OpenMMLab. All rights reserved.
|
|
|
|
#include <opencv2/imgcodecs.hpp>
|
|
#include <opencv2/imgproc.hpp>
|
|
|
|
#include "clipper.hpp"
|
|
#include "core/device.h"
|
|
#include "core/registry.h"
|
|
#include "core/serialization.h"
|
|
#include "core/tensor.h"
|
|
#include "core/utils/device_utils.h"
|
|
#include "core/utils/formatter.h"
|
|
#include "core/value.h"
|
|
#include "experimental/module_adapter.h"
|
|
#include "mmocr.h"
|
|
#include "opencv_utils.h"
|
|
|
|
namespace mmdeploy::mmocr {
|
|
|
|
using std::string;
|
|
using std::vector;
|
|
|
|
class DBHead : public MMOCR {
|
|
public:
|
|
explicit DBHead(const Value& config) : MMOCR(config) {
|
|
if (config.contains("params")) {
|
|
auto& params = config["params"];
|
|
text_repr_type_ = params.value("text_repr_type", string{"quad"});
|
|
mask_thr_ = params.value("mask_thr", 0.3f);
|
|
min_text_score_ = params.value("min_text_score", 0.3f);
|
|
min_text_width_ = params.value("min_text_width", 5);
|
|
unclip_ratio_ = params.value("unclip_ratio", 1.5f);
|
|
max_candidates_ = params.value("max_candidate", 3000);
|
|
rescale_ = params.value("rescale", true);
|
|
downsample_ratio_ = params.value("downsample_ratio", 1.0f);
|
|
}
|
|
}
|
|
|
|
Result<Value> operator()(const Value& _data, const Value& _prob) {
|
|
MMDEPLOY_DEBUG("preprocess_result: {}", _data);
|
|
MMDEPLOY_DEBUG("inference_result: {}", _prob);
|
|
|
|
auto img = _data["img"].get<Tensor>();
|
|
MMDEPLOY_DEBUG("img shape: {}", img.shape());
|
|
|
|
Device cpu_device{"cpu"};
|
|
OUTCOME_TRY(auto conf,
|
|
MakeAvailableOnDevice(_prob["output"].get<Tensor>(), cpu_device, stream_));
|
|
OUTCOME_TRY(stream_.Wait());
|
|
MMDEPLOY_DEBUG("shape: {}", conf.shape());
|
|
|
|
if (!(conf.shape().size() == 4 && conf.data_type() == DataType::kFLOAT)) {
|
|
MMDEPLOY_ERROR("unsupported `output` tensor, shape: {}, dtype: {}", conf.shape(),
|
|
(int)conf.data_type());
|
|
return Status(eNotSupported);
|
|
}
|
|
|
|
auto h = conf.shape(2);
|
|
auto w = conf.shape(3);
|
|
auto data = conf.buffer().GetNative();
|
|
|
|
cv::Mat score_map((int)h, (int)w, CV_32F, data);
|
|
|
|
// cv::imwrite("conf.png", score_map * 255.);
|
|
|
|
cv::Mat text_mask;
|
|
cv::threshold(score_map, text_mask, mask_thr_, 1.f, cv::THRESH_BINARY);
|
|
|
|
text_mask.convertTo(text_mask, CV_8U, 255);
|
|
// cv::imwrite("text_mask.png", text_mask);
|
|
|
|
std::vector<std::vector<cv::Point>> contours;
|
|
cv::findContours(text_mask, contours, cv::RETR_LIST, cv::CHAIN_APPROX_SIMPLE);
|
|
|
|
if (contours.size() > max_candidates_) {
|
|
contours.resize(max_candidates_);
|
|
}
|
|
|
|
TextDetectorOutput output;
|
|
for (auto& poly : contours) {
|
|
auto epsilon = 0.01 * cv::arcLength(poly, true);
|
|
std::vector<cv::Point> approx;
|
|
cv::approxPolyDP(poly, approx, epsilon, true);
|
|
if (approx.size() < 4) {
|
|
continue;
|
|
}
|
|
auto score = box_score_fast(score_map, approx);
|
|
if (score < min_text_score_) {
|
|
continue;
|
|
}
|
|
approx = unclip(approx, unclip_ratio_);
|
|
if (approx.empty()) {
|
|
continue;
|
|
}
|
|
|
|
if (text_repr_type_ == "quad") {
|
|
auto rect = cv::minAreaRect(approx);
|
|
if ((int)rect.size.width <= min_text_width_) continue;
|
|
std::vector<cv::Point2f> box_points(4);
|
|
rect.points(box_points.data());
|
|
approx.assign(begin(box_points), end(box_points));
|
|
} else if (text_repr_type_ == "poly") {
|
|
} else {
|
|
assert(0);
|
|
}
|
|
MMDEPLOY_DEBUG("score: {}", score);
|
|
// cv::drawContours(score_map, vector<vector<cv::Point>>{approx}, -1, 1);
|
|
|
|
vector<cv::Point2f> scaled(begin(approx), end(approx));
|
|
|
|
if (rescale_) {
|
|
auto scale_w = _data["img_metas"]["scale_factor"][0].get<float>();
|
|
auto scale_h = _data["img_metas"]["scale_factor"][1].get<float>();
|
|
for (auto& p : scaled) {
|
|
p.x /= scale_w * downsample_ratio_;
|
|
p.y /= scale_h * downsample_ratio_;
|
|
}
|
|
}
|
|
|
|
auto& bbox = output.boxes.emplace_back();
|
|
for (int i = 0; i < 4; ++i) {
|
|
bbox[i * 2] = scaled[i].x;
|
|
bbox[i * 2 + 1] = scaled[i].y;
|
|
}
|
|
output.scores.push_back(score);
|
|
}
|
|
|
|
return to_value(output);
|
|
}
|
|
|
|
static float box_score_fast(const cv::Mat& bitmap, const std::vector<cv::Point>& box) noexcept {
|
|
auto rect = cv::boundingRect(box) & cv::Rect({}, bitmap.size());
|
|
|
|
cv::Mat mask(rect.size(), CV_8U, cv::Scalar(0));
|
|
|
|
cv::fillPoly(mask, std::vector<std::vector<cv::Point>>{box}, 1, cv::LINE_8, 0, -rect.tl());
|
|
auto mean = cv::mean(bitmap(rect), mask)[0];
|
|
return static_cast<float>(mean);
|
|
}
|
|
|
|
static std::vector<cv::Point> unclip(std::vector<cv::Point>& box, float unclip_ratio) {
|
|
namespace cl = ClipperLib;
|
|
|
|
auto area = cv::contourArea(box);
|
|
auto length = cv::arcLength(box, true);
|
|
auto distance = area * unclip_ratio / length;
|
|
|
|
cl::Path src;
|
|
transform(begin(box), end(box), back_inserter(src), [](auto p) {
|
|
return cl::IntPoint{p.x, p.y};
|
|
});
|
|
|
|
cl::ClipperOffset offset;
|
|
offset.AddPath(src, cl::jtRound, cl::etClosedPolygon);
|
|
|
|
std::vector<cl::Path> dst;
|
|
offset.Execute(dst, distance);
|
|
if (dst.size() != 1) {
|
|
return {};
|
|
}
|
|
|
|
std::vector<cv::Point> ret;
|
|
transform(begin(dst[0]), end(dst[0]), back_inserter(ret), [](auto p) {
|
|
return cv::Point{static_cast<int>(p.X), static_cast<int>(p.Y)};
|
|
});
|
|
return ret;
|
|
}
|
|
|
|
private:
|
|
std::string text_repr_type_{"quad"};
|
|
float mask_thr_{.3};
|
|
float min_text_score_{.3};
|
|
int min_text_width_{5};
|
|
float unclip_ratio_{1.5};
|
|
int max_candidates_{3000};
|
|
bool rescale_{true};
|
|
float downsample_ratio_{1.};
|
|
};
|
|
|
|
REGISTER_CODEBASE_COMPONENT(MMOCR, DBHead);
|
|
|
|
} // namespace mmdeploy::mmocr
|