// Copyright (c) OpenMMLab. All rights reserved. #include #include #include "clipper.hpp" #include "core/device.h" #include "core/registry.h" #include "core/serialization.h" #include "core/tensor.h" #include "core/utils/device_utils.h" #include "core/utils/formatter.h" #include "core/value.h" #include "experimental/module_adapter.h" #include "mmocr.h" #include "preprocess/cpu/opencv_utils.h" namespace mmdeploy::mmocr { using std::string; using std::vector; class DBHead : public MMOCR { public: explicit DBHead(const Value& config) : MMOCR(config) { if (config.contains("params")) { auto& params = config["params"]; text_repr_type_ = params.value("text_repr_type", string{"quad"}); mask_thr_ = params.value("mask_thr", 0.3f); min_text_score_ = params.value("min_text_score", 0.3f); min_text_width_ = params.value("min_text_width", 5); unclip_ratio_ = params.value("unclip_ratio", 1.5f); max_candidates_ = params.value("max_candidate", 3000); rescale_ = params.value("rescale", true); downsample_ratio_ = params.value("downsample_ratio", 1.0f); } } Result operator()(const Value& _data, const Value& _prob) { DEBUG("preprocess_result: {}", _data); DEBUG("inference_result: {}", _prob); auto img = _data["img"].get(); DEBUG("img shape: {}", img.shape()); Device cpu_device{"cpu"}; OUTCOME_TRY(auto conf, MakeAvailableOnDevice(_prob["output"].get(), cpu_device, stream_)); OUTCOME_TRY(stream_.Wait()); DEBUG("shape: {}", conf.shape()); if (!(conf.shape().size() == 4 && conf.data_type() == DataType::kFLOAT)) { ERROR("unsupported `output` tensor, shape: {}, dtype: {}", conf.shape(), (int)conf.data_type()); return Status(eNotSupported); } auto h = conf.shape(2); auto w = conf.shape(3); auto data = conf.buffer().GetNative(); cv::Mat score_map((int)h, (int)w, CV_32F, data); // cv::imwrite("conf.png", score_map * 255.); cv::Mat text_mask; cv::threshold(score_map, text_mask, mask_thr_, 1.f, cv::THRESH_BINARY); text_mask.convertTo(text_mask, CV_8U, 255); // cv::imwrite("text_mask.png", text_mask); std::vector> contours; cv::findContours(text_mask, contours, cv::RETR_LIST, cv::CHAIN_APPROX_SIMPLE); if (contours.size() > max_candidates_) { contours.resize(max_candidates_); } TextDetectorOutput output; for (auto& poly : contours) { auto epsilon = 0.01 * cv::arcLength(poly, true); std::vector approx; cv::approxPolyDP(poly, approx, epsilon, true); if (approx.size() < 4) { continue; } auto score = box_score_fast(score_map, approx); if (score < min_text_score_) { continue; } approx = unclip(approx, unclip_ratio_); if (approx.empty()) { continue; } if (text_repr_type_ == "quad") { auto rect = cv::minAreaRect(approx); if ((int)rect.size.width <= min_text_width_) continue; std::vector box_points(4); rect.points(box_points.data()); approx.assign(begin(box_points), end(box_points)); } else if (text_repr_type_ == "poly") { } else { assert(0); } DEBUG("score: {}", score); // cv::drawContours(score_map, vector>{approx}, -1, 1); vector scaled(begin(approx), end(approx)); if (rescale_) { auto scale_w = _data["img_metas"]["scale_factor"][0].get(); auto scale_h = _data["img_metas"]["scale_factor"][1].get(); for (auto& p : scaled) { p.x /= scale_w * downsample_ratio_; p.y /= scale_h * downsample_ratio_; } } auto& bbox = output.boxes.emplace_back(); for (int i = 0; i < 4; ++i) { bbox[i * 2] = scaled[i].x; bbox[i * 2 + 1] = scaled[i].y; } output.scores.push_back(score); } return to_value(output); } static float box_score_fast(const cv::Mat& bitmap, const std::vector& box) noexcept { auto rect = cv::boundingRect(box) & cv::Rect({}, bitmap.size()); cv::Mat mask(rect.size(), CV_8U, cv::Scalar(0)); cv::fillPoly(mask, std::vector{box}, 1, cv::LINE_8, 0, -rect.tl()); auto mean = cv::mean(bitmap(rect), mask)[0]; return static_cast(mean); } static std::vector unclip(std::vector& box, float unclip_ratio) { namespace cl = ClipperLib; auto area = cv::contourArea(box); auto length = cv::arcLength(box, true); auto distance = area * unclip_ratio / length; cl::Path src; transform(begin(box), end(box), back_inserter(src), [](auto p) { return cl::IntPoint{p.x, p.y}; }); cl::ClipperOffset offset; offset.AddPath(src, cl::jtRound, cl::etClosedPolygon); std::vector dst; offset.Execute(dst, distance); if (dst.size() != 1) { return {}; } std::vector ret; transform(begin(dst[0]), end(dst[0]), back_inserter(ret), [](auto p) { return cv::Point{static_cast(p.X), static_cast(p.Y)}; }); return ret; } private: std::string text_repr_type_{"quad"}; float mask_thr_{.3}; float min_text_score_{.3}; int min_text_width_{5}; float unclip_ratio_{1.5}; int max_candidates_{3000}; bool rescale_{true}; float downsample_ratio_{1.}; }; REGISTER_CODEBASE_COMPONENT(MMOCR, DBHead); } // namespace mmdeploy::mmocr