428 lines
14 KiB
C++
428 lines
14 KiB
C++
|
|
|
|
#include "mmdeploy/archive/json_archive.h"
|
|
#include "mmdeploy/archive/value_archive.h"
|
|
#include "mmdeploy/common.hpp"
|
|
#include "mmdeploy/core/mat.h"
|
|
#include "mmdeploy/core/module.h"
|
|
#include "mmdeploy/core/utils/formatter.h"
|
|
#include "mmdeploy/experimental/module_adapter.h"
|
|
#include "mmdeploy/pipeline.hpp"
|
|
#include "opencv2/highgui.hpp"
|
|
#include "opencv2/imgproc.hpp"
|
|
#include "opencv2/videoio.hpp"
|
|
|
|
const auto config_json = R"(
|
|
{
|
|
"type": "Pipeline",
|
|
"input": ["data", "use_det", "state"],
|
|
"output": "targets",
|
|
"tasks": [
|
|
{
|
|
"type": "Cond",
|
|
"input": ["use_det", "data"],
|
|
"output": "dets",
|
|
"body": {
|
|
"name": "detection",
|
|
"type": "Inference",
|
|
"params": { "model": "detection" }
|
|
}
|
|
},
|
|
{
|
|
"type": "Task",
|
|
"module": "ProcessBboxes",
|
|
"input": ["dets", "data", "state"],
|
|
"output": "rois"
|
|
},
|
|
{
|
|
"input": "*rois",
|
|
"output": "*keypoints",
|
|
"name": "pose",
|
|
"type": "Inference",
|
|
"params": { "model": "pose" }
|
|
},
|
|
{
|
|
"type": "Task",
|
|
"module": "TrackPose",
|
|
"scheduler": "pool",
|
|
"input": ["keypoints", "state"],
|
|
"output": "targets"
|
|
}
|
|
]
|
|
}
|
|
)"_json;
|
|
|
|
namespace mmdeploy {
|
|
|
|
#define REGISTER_SIMPLE_MODULE(name, fn) \
|
|
class name##_Creator : public ::mmdeploy::Creator<Module> { \
|
|
const char* GetName() const override { return #name; } \
|
|
std::unique_ptr<Module> Create(const Value&) override { return CreateTask(fn); } \
|
|
}; \
|
|
REGISTER_MODULE(Module, name##_Creator)
|
|
|
|
std::optional<std::array<float, 4>> keypoints_to_bbox(const std::vector<cv::Point2f>& keypoints,
|
|
const std::vector<float>& scores, float img_h,
|
|
float img_w, float scale = 1.5,
|
|
float kpt_thr = 0.3) {
|
|
auto valid = false;
|
|
auto x1 = static_cast<float>(img_w);
|
|
auto y1 = static_cast<float>(img_h);
|
|
auto x2 = 0.f;
|
|
auto y2 = 0.f;
|
|
for (size_t i = 0; i < keypoints.size(); ++i) {
|
|
auto& kpt = keypoints[i];
|
|
if (scores[i] > kpt_thr) {
|
|
x1 = std::min(x1, kpt.x);
|
|
y1 = std::min(y1, kpt.y);
|
|
x2 = std::max(x2, kpt.x);
|
|
y2 = std::max(y2, kpt.y);
|
|
valid = true;
|
|
}
|
|
}
|
|
if (!valid) {
|
|
return std::nullopt;
|
|
}
|
|
auto xc = .5f * (x1 + x2);
|
|
auto yc = .5f * (y1 + y2);
|
|
auto w = (x2 - x1) * scale;
|
|
auto h = (y2 - y1) * scale;
|
|
|
|
return std::array<float, 4>{
|
|
std::max(0.f, std::min(img_w, xc - .5f * w)),
|
|
std::max(0.f, std::min(img_h, yc - .5f * h)),
|
|
std::max(0.f, std::min(img_w, xc + .5f * w)),
|
|
std::max(0.f, std::min(img_h, yc + .5f * h)),
|
|
};
|
|
}
|
|
|
|
struct Track {
|
|
std::vector<std::vector<cv::Point2f>> keypoints;
|
|
std::vector<std::vector<float>> scores;
|
|
std::vector<std::array<float, 4>> bboxes;
|
|
int64_t track_id{-1};
|
|
};
|
|
|
|
struct TrackInfo {
|
|
std::vector<Track> tracks;
|
|
int64_t next_id{0};
|
|
};
|
|
|
|
MMDEPLOY_REGISTER_TYPE_ID(TrackInfo, 0xcfe87980aa895d3a); // randomly generated type id
|
|
|
|
Value::Array GetObjectsByTracking(Value& state, int img_h, int img_w) {
|
|
Value::Array objs;
|
|
auto& track_info = state["track_info"].get_ref<TrackInfo&>();
|
|
for (auto& track : track_info.tracks) {
|
|
auto bbox = keypoints_to_bbox(track.keypoints.back(), track.scores.back(),
|
|
static_cast<float>(img_h), static_cast<float>(img_w));
|
|
if (bbox) {
|
|
objs.push_back({{"bbox", to_value(*bbox)}});
|
|
}
|
|
}
|
|
return objs;
|
|
}
|
|
|
|
Value ProcessBboxes(const Value& detections, const Value& data, Value state) {
|
|
assert(state.is_pointer());
|
|
Value::Array bboxes;
|
|
if (detections.is_array()) { // has detections
|
|
auto& dets = detections.array();
|
|
for (const auto& det : dets) {
|
|
if (det["label_id"].get<int>() == 0 && det["score"].get<float>() >= .3f) {
|
|
bboxes.push_back(det);
|
|
}
|
|
}
|
|
MMDEPLOY_INFO("bboxes by detection: {}", bboxes.size());
|
|
state["bboxes"] = bboxes;
|
|
} else { // no detections, use tracked results
|
|
auto img_h = state["img_shape"][0].get<int>();
|
|
auto img_w = state["img_shape"][1].get<int>();
|
|
bboxes = GetObjectsByTracking(state, img_h, img_w);
|
|
MMDEPLOY_INFO("GetObjectsByTracking: {}", bboxes.size());
|
|
}
|
|
// attach bboxes to image data
|
|
for (auto& bbox : bboxes) {
|
|
auto img = data["ori_img"].get<framework::Mat>();
|
|
auto box = from_value<std::array<float, 4>>(bbox["bbox"]);
|
|
cv::Rect rect(cv::Rect2f(cv::Point2f(box[0], box[1]), cv::Point2f(box[2], box[3])));
|
|
bbox = Value::Object{
|
|
{"ori_img", img}, {"bbox", {rect.x, rect.y, rect.width, rect.height}}, {"rotation", 0.f}};
|
|
};
|
|
return bboxes;
|
|
}
|
|
REGISTER_SIMPLE_MODULE(ProcessBboxes, ProcessBboxes);
|
|
|
|
// xyxy format
|
|
float ComputeIoU(const std::array<float, 4>& a, const std::array<float, 4>& b) {
|
|
auto x1 = std::max(a[0], b[0]);
|
|
auto y1 = std::max(a[1], b[1]);
|
|
auto x2 = std::min(a[2], b[2]);
|
|
auto y2 = std::min(a[3], b[3]);
|
|
|
|
auto inter_area = std::max(0.f, x2 - x1) * std::max(0.f, y2 - y1);
|
|
|
|
auto a_area = (a[2] - a[0]) * (a[3] - a[1]);
|
|
auto b_area = (b[2] - b[0]) * (b[3] - b[1]);
|
|
auto union_area = a_area + b_area - inter_area;
|
|
|
|
if (union_area == 0.f) {
|
|
return 0;
|
|
}
|
|
|
|
return inter_area / union_area;
|
|
}
|
|
|
|
void UpdateTrack(Track& track, std::vector<cv::Point2f>& keypoints, std::vector<float>& score,
|
|
const std::array<float, 4>& bbox, int n_history) {
|
|
if (track.scores.size() == n_history) {
|
|
std::rotate(track.keypoints.begin(), track.keypoints.begin() + 1, track.keypoints.end());
|
|
std::rotate(track.scores.begin(), track.scores.begin() + 1, track.scores.end());
|
|
std::rotate(track.bboxes.begin(), track.bboxes.begin() + 1, track.bboxes.end());
|
|
track.keypoints.back() = std::move(keypoints);
|
|
track.scores.back() = std::move(score);
|
|
track.bboxes.back() = bbox;
|
|
} else {
|
|
track.keypoints.push_back(std::move(keypoints));
|
|
track.scores.push_back(std::move(score));
|
|
track.bboxes.push_back(bbox);
|
|
}
|
|
}
|
|
|
|
std::vector<std::tuple<int, int, float>> GreedyAssignment(const std::vector<float>& scores,
|
|
int n_rows, int n_cols, float thr) {
|
|
std::vector<int> used_rows(n_rows);
|
|
std::vector<int> used_cols(n_cols);
|
|
std::vector<std::tuple<int, int, float>> assignment;
|
|
assignment.reserve(std::max(n_rows, n_cols));
|
|
while (true) {
|
|
auto max_score = 0.f;
|
|
int max_row = -1;
|
|
int max_col = -1;
|
|
for (int i = 0; i < n_rows; ++i) {
|
|
if (!used_rows[i]) {
|
|
for (int j = 0; j < n_cols; ++j) {
|
|
if (!used_cols[j]) {
|
|
if (scores[i * n_cols + j] > max_score) {
|
|
max_score = scores[i * n_cols + j];
|
|
max_row = i;
|
|
max_col = j;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
if (max_score < thr) {
|
|
break;
|
|
}
|
|
used_rows[max_row] = 1;
|
|
used_cols[max_col] = 1;
|
|
assignment.emplace_back(max_row, max_col, max_score);
|
|
}
|
|
return assignment;
|
|
}
|
|
|
|
void TrackStep(std::vector<std::vector<cv::Point2f>>& keypoints,
|
|
std::vector<std::vector<float>>& scores, TrackInfo& track_info, int img_h, int img_w,
|
|
float iou_thr, int min_keypoints, int n_history) {
|
|
auto& tracks = track_info.tracks;
|
|
|
|
std::vector<Track> new_tracks;
|
|
new_tracks.reserve(tracks.size());
|
|
|
|
std::vector<std::array<float, 4>> bboxes;
|
|
bboxes.reserve(keypoints.size());
|
|
|
|
std::vector<int> indices;
|
|
indices.reserve(keypoints.size());
|
|
|
|
for (size_t i = 0; i < keypoints.size(); ++i) {
|
|
if (auto bbox = keypoints_to_bbox(keypoints[i], scores[i], img_h, img_w, 1.f, 0.f)) {
|
|
bboxes.push_back(*bbox);
|
|
indices.push_back(i);
|
|
}
|
|
}
|
|
|
|
const auto n_rows = static_cast<int>(bboxes.size());
|
|
const auto n_cols = static_cast<int>(tracks.size());
|
|
|
|
std::vector<float> similarities(n_rows * n_cols);
|
|
for (size_t i = 0; i < n_rows; ++i) {
|
|
for (size_t j = 0; j < n_cols; ++j) {
|
|
similarities[i * n_cols + j] = ComputeIoU(bboxes[i], tracks[j].bboxes.back());
|
|
}
|
|
}
|
|
|
|
const auto assignment = GreedyAssignment(similarities, n_rows, n_cols, iou_thr);
|
|
|
|
std::vector<int> used(n_rows);
|
|
for (auto [i, j, _] : assignment) {
|
|
auto k = indices[i];
|
|
UpdateTrack(tracks[j], keypoints[k], scores[k], bboxes[i], n_history);
|
|
new_tracks.push_back(std::move(tracks[j]));
|
|
used[i] = true;
|
|
}
|
|
|
|
for (size_t i = 0; i < used.size(); ++i) {
|
|
if (used[i] == 0) {
|
|
auto k = indices[i];
|
|
auto count = std::count_if(scores[k].begin(), scores[k].end(), [](auto x) { return x > 0; });
|
|
if (count >= min_keypoints) {
|
|
auto& track = new_tracks.emplace_back();
|
|
track.track_id = track_info.next_id++;
|
|
UpdateTrack(track, keypoints[k], scores[k], bboxes[i], n_history);
|
|
}
|
|
}
|
|
}
|
|
|
|
tracks = std::move(new_tracks);
|
|
}
|
|
|
|
Value TrackPose(const Value& result, Value state) {
|
|
assert(state.is_pointer());
|
|
assert(result.is_array());
|
|
std::vector<std::vector<cv::Point2f>> keypoints;
|
|
std::vector<std::vector<float>> scores;
|
|
for (auto& output : result.array()) {
|
|
auto& k = keypoints.emplace_back();
|
|
auto& s = scores.emplace_back();
|
|
for (auto& kpt : output["key_points"].array()) {
|
|
k.push_back(cv::Point2f{kpt["bbox"][0].get<float>(), kpt["bbox"][1].get<float>()});
|
|
s.push_back(kpt["score"].get<float>());
|
|
}
|
|
}
|
|
auto& track_info = state["track_info"].get_ref<TrackInfo&>();
|
|
auto img_h = state["img_shape"][0].get<int>();
|
|
auto img_w = state["img_shape"][1].get<int>();
|
|
auto iou_thr = state["iou_thr"].get<float>();
|
|
auto min_keypoints = state["min_keypoints"].get<int>();
|
|
auto n_history = state["n_history"].get<int>();
|
|
TrackStep(keypoints, scores, track_info, img_h, img_w, iou_thr, min_keypoints, n_history);
|
|
|
|
Value::Array targets;
|
|
for (const auto& track : track_info.tracks) {
|
|
if (auto bbox = keypoints_to_bbox(track.keypoints.back(), track.scores.back(), img_h, img_w)) {
|
|
Value::Array kpts;
|
|
kpts.reserve(track.keypoints.back().size());
|
|
for (const auto& kpt : track.keypoints.back()) {
|
|
kpts.push_back(kpt.x);
|
|
kpts.push_back(kpt.y);
|
|
}
|
|
targets.push_back({{"bbox", to_value(*bbox)}, {"keypoints", std::move(kpts)}});
|
|
}
|
|
}
|
|
return targets;
|
|
}
|
|
REGISTER_SIMPLE_MODULE(TrackPose, TrackPose);
|
|
|
|
class PoseTracker {
|
|
public:
|
|
using State = Value;
|
|
|
|
public:
|
|
PoseTracker(const Model& det_model, const Model& pose_model, Context context)
|
|
: pipeline_([&] {
|
|
context.Add("detection", det_model);
|
|
context.Add("pose", pose_model);
|
|
auto config = from_json<Value>(config_json);
|
|
return Pipeline{config, context};
|
|
}()) {}
|
|
|
|
State CreateState() { // NOLINT
|
|
return make_pointer({{"frame_id", 0},
|
|
{"n_history", 10},
|
|
{"iou_thr", .3f},
|
|
{"min_keypoints", 3},
|
|
{"track_info", TrackInfo{}}});
|
|
}
|
|
|
|
Value Track(const Mat& img, State& state, int use_detector = -1) {
|
|
assert(state.is_pointer());
|
|
framework::Mat mat(img.desc().height, img.desc().width,
|
|
static_cast<PixelFormat>(img.desc().format),
|
|
static_cast<DataType>(img.desc().type), {img.desc().data, [](void*) {}});
|
|
// TODO: get_ref<int&> is not working
|
|
auto frame_id = state["frame_id"].get<int>();
|
|
if (use_detector < 0) {
|
|
use_detector = frame_id % 10 == 0;
|
|
if (use_detector) {
|
|
MMDEPLOY_WARN("use detector");
|
|
}
|
|
}
|
|
state["frame_id"] = frame_id + 1;
|
|
state["img_shape"] = {mat.height(), mat.width()};
|
|
Value::Object data{{"ori_img", mat}};
|
|
Value input{{data}, {use_detector}, {state}};
|
|
return pipeline_.Apply(input)[0][0];
|
|
}
|
|
|
|
private:
|
|
Pipeline pipeline_;
|
|
};
|
|
|
|
} // namespace mmdeploy
|
|
|
|
using namespace mmdeploy;
|
|
|
|
void Visualize(cv::Mat& frame, const Value& result) {
|
|
static std::vector<std::pair<int, int>> skeleton{
|
|
{15, 13}, {13, 11}, {16, 14}, {14, 12}, {11, 12}, {5, 11}, {6, 12}, {5, 6}, {5, 7}, {6, 8},
|
|
{7, 9}, {8, 10}, {1, 2}, {0, 1}, {0, 2}, {1, 3}, {2, 4}, {3, 5}, {4, 6}};
|
|
const auto& targets = result.array();
|
|
for (const auto& target : targets) {
|
|
auto bbox = from_value<std::array<float, 4>>(target["bbox"]);
|
|
auto kpts = from_value<std::vector<float>>(target["keypoints"]);
|
|
cv::Point p1(bbox[0], bbox[1]);
|
|
cv::Point p2(bbox[2], bbox[3]);
|
|
cv::rectangle(frame, p1, p2, cv::Scalar(0, 255, 0));
|
|
for (int i = 0; i < kpts.size(); i += 2) {
|
|
cv::Point p(kpts[i], kpts[i + 1]);
|
|
cv::circle(frame, p, 1, cv::Scalar(0, 255, 255), 2, cv::LINE_AA);
|
|
}
|
|
for (int i = 0; i < skeleton.size(); ++i) {
|
|
auto [u, v] = skeleton[i];
|
|
cv::Point p_u(kpts[u * 2], kpts[u * 2 + 1]);
|
|
cv::Point p_v(kpts[v * 2], kpts[v * 2 + 1]);
|
|
cv::line(frame, p_u, p_v, cv::Scalar(0, 255, 255), 1, cv::LINE_AA);
|
|
}
|
|
}
|
|
cv::imshow("", frame);
|
|
cv::waitKey(10);
|
|
}
|
|
|
|
int main(int argc, char* argv[]) {
|
|
const auto device_name = argv[1];
|
|
const auto det_model_path = argv[2];
|
|
const auto pose_model_path = argv[3];
|
|
const auto video_path = argv[4];
|
|
Device device(device_name);
|
|
Context context(device);
|
|
auto pool = Scheduler::ThreadPool(4);
|
|
auto infer = Scheduler::Thread();
|
|
context.Add("pool", pool);
|
|
context.Add("infer", infer);
|
|
PoseTracker tracker(Model(det_model_path), Model(pose_model_path), context);
|
|
auto state = tracker.CreateState();
|
|
|
|
cv::Mat frame;
|
|
std::chrono::duration<double, std::milli> dt{};
|
|
|
|
int frame_id{};
|
|
|
|
cv::VideoCapture video(video_path);
|
|
while (true) {
|
|
video >> frame;
|
|
if (!frame.data) {
|
|
break;
|
|
}
|
|
auto t0 = std::chrono::high_resolution_clock::now();
|
|
auto result = tracker.Track(frame, state);
|
|
auto t1 = std::chrono::high_resolution_clock::now();
|
|
dt += t1 - t0;
|
|
++frame_id;
|
|
Visualize(frame, result);
|
|
}
|
|
|
|
MMDEPLOY_INFO("frames: {}, time {} ms", frame_id, dt.count());
|
|
}
|