mmdeploy/demo/csrc/cpp/pose_tracker.cpp

428 lines
14 KiB
C++

#include "mmdeploy/archive/json_archive.h"
#include "mmdeploy/archive/value_archive.h"
#include "mmdeploy/common.hpp"
#include "mmdeploy/core/mat.h"
#include "mmdeploy/core/module.h"
#include "mmdeploy/core/utils/formatter.h"
#include "mmdeploy/experimental/module_adapter.h"
#include "mmdeploy/pipeline.hpp"
#include "opencv2/highgui.hpp"
#include "opencv2/imgproc.hpp"
#include "opencv2/videoio.hpp"
const auto config_json = R"(
{
"type": "Pipeline",
"input": ["data", "use_det", "state"],
"output": "targets",
"tasks": [
{
"type": "Cond",
"input": ["use_det", "data"],
"output": "dets",
"body": {
"name": "detection",
"type": "Inference",
"params": { "model": "detection" }
}
},
{
"type": "Task",
"module": "ProcessBboxes",
"input": ["dets", "data", "state"],
"output": "rois"
},
{
"input": "*rois",
"output": "*keypoints",
"name": "pose",
"type": "Inference",
"params": { "model": "pose" }
},
{
"type": "Task",
"module": "TrackPose",
"scheduler": "pool",
"input": ["keypoints", "state"],
"output": "targets"
}
]
}
)"_json;
namespace mmdeploy {
#define REGISTER_SIMPLE_MODULE(name, fn) \
class name##_Creator : public ::mmdeploy::Creator<Module> { \
const char* GetName() const override { return #name; } \
std::unique_ptr<Module> Create(const Value&) override { return CreateTask(fn); } \
}; \
REGISTER_MODULE(Module, name##_Creator)
std::optional<std::array<float, 4>> keypoints_to_bbox(const std::vector<cv::Point2f>& keypoints,
const std::vector<float>& scores, float img_h,
float img_w, float scale = 1.5,
float kpt_thr = 0.3) {
auto valid = false;
auto x1 = static_cast<float>(img_w);
auto y1 = static_cast<float>(img_h);
auto x2 = 0.f;
auto y2 = 0.f;
for (size_t i = 0; i < keypoints.size(); ++i) {
auto& kpt = keypoints[i];
if (scores[i] > kpt_thr) {
x1 = std::min(x1, kpt.x);
y1 = std::min(y1, kpt.y);
x2 = std::max(x2, kpt.x);
y2 = std::max(y2, kpt.y);
valid = true;
}
}
if (!valid) {
return std::nullopt;
}
auto xc = .5f * (x1 + x2);
auto yc = .5f * (y1 + y2);
auto w = (x2 - x1) * scale;
auto h = (y2 - y1) * scale;
return std::array<float, 4>{
std::max(0.f, std::min(img_w, xc - .5f * w)),
std::max(0.f, std::min(img_h, yc - .5f * h)),
std::max(0.f, std::min(img_w, xc + .5f * w)),
std::max(0.f, std::min(img_h, yc + .5f * h)),
};
}
struct Track {
std::vector<std::vector<cv::Point2f>> keypoints;
std::vector<std::vector<float>> scores;
std::vector<std::array<float, 4>> bboxes;
int64_t track_id{-1};
};
struct TrackInfo {
std::vector<Track> tracks;
int64_t next_id{0};
};
MMDEPLOY_REGISTER_TYPE_ID(TrackInfo, 0xcfe87980aa895d3a); // randomly generated type id
Value::Array GetObjectsByTracking(Value& state, int img_h, int img_w) {
Value::Array objs;
auto& track_info = state["track_info"].get_ref<TrackInfo&>();
for (auto& track : track_info.tracks) {
auto bbox = keypoints_to_bbox(track.keypoints.back(), track.scores.back(),
static_cast<float>(img_h), static_cast<float>(img_w));
if (bbox) {
objs.push_back({{"bbox", to_value(*bbox)}});
}
}
return objs;
}
Value ProcessBboxes(const Value& detections, const Value& data, Value state) {
assert(state.is_pointer());
Value::Array bboxes;
if (detections.is_array()) { // has detections
auto& dets = detections.array();
for (const auto& det : dets) {
if (det["label_id"].get<int>() == 0 && det["score"].get<float>() >= .3f) {
bboxes.push_back(det);
}
}
MMDEPLOY_INFO("bboxes by detection: {}", bboxes.size());
state["bboxes"] = bboxes;
} else { // no detections, use tracked results
auto img_h = state["img_shape"][0].get<int>();
auto img_w = state["img_shape"][1].get<int>();
bboxes = GetObjectsByTracking(state, img_h, img_w);
MMDEPLOY_INFO("GetObjectsByTracking: {}", bboxes.size());
}
// attach bboxes to image data
for (auto& bbox : bboxes) {
auto img = data["ori_img"].get<framework::Mat>();
auto box = from_value<std::array<float, 4>>(bbox["bbox"]);
cv::Rect rect(cv::Rect2f(cv::Point2f(box[0], box[1]), cv::Point2f(box[2], box[3])));
bbox = Value::Object{
{"ori_img", img}, {"bbox", {rect.x, rect.y, rect.width, rect.height}}, {"rotation", 0.f}};
};
return bboxes;
}
REGISTER_SIMPLE_MODULE(ProcessBboxes, ProcessBboxes);
// xyxy format
float ComputeIoU(const std::array<float, 4>& a, const std::array<float, 4>& b) {
auto x1 = std::max(a[0], b[0]);
auto y1 = std::max(a[1], b[1]);
auto x2 = std::min(a[2], b[2]);
auto y2 = std::min(a[3], b[3]);
auto inter_area = std::max(0.f, x2 - x1) * std::max(0.f, y2 - y1);
auto a_area = (a[2] - a[0]) * (a[3] - a[1]);
auto b_area = (b[2] - b[0]) * (b[3] - b[1]);
auto union_area = a_area + b_area - inter_area;
if (union_area == 0.f) {
return 0;
}
return inter_area / union_area;
}
void UpdateTrack(Track& track, std::vector<cv::Point2f>& keypoints, std::vector<float>& score,
const std::array<float, 4>& bbox, int n_history) {
if (track.scores.size() == n_history) {
std::rotate(track.keypoints.begin(), track.keypoints.begin() + 1, track.keypoints.end());
std::rotate(track.scores.begin(), track.scores.begin() + 1, track.scores.end());
std::rotate(track.bboxes.begin(), track.bboxes.begin() + 1, track.bboxes.end());
track.keypoints.back() = std::move(keypoints);
track.scores.back() = std::move(score);
track.bboxes.back() = bbox;
} else {
track.keypoints.push_back(std::move(keypoints));
track.scores.push_back(std::move(score));
track.bboxes.push_back(bbox);
}
}
std::vector<std::tuple<int, int, float>> GreedyAssignment(const std::vector<float>& scores,
int n_rows, int n_cols, float thr) {
std::vector<int> used_rows(n_rows);
std::vector<int> used_cols(n_cols);
std::vector<std::tuple<int, int, float>> assignment;
assignment.reserve(std::max(n_rows, n_cols));
while (true) {
auto max_score = 0.f;
int max_row = -1;
int max_col = -1;
for (int i = 0; i < n_rows; ++i) {
if (!used_rows[i]) {
for (int j = 0; j < n_cols; ++j) {
if (!used_cols[j]) {
if (scores[i * n_cols + j] > max_score) {
max_score = scores[i * n_cols + j];
max_row = i;
max_col = j;
}
}
}
}
}
if (max_score < thr) {
break;
}
used_rows[max_row] = 1;
used_cols[max_col] = 1;
assignment.emplace_back(max_row, max_col, max_score);
}
return assignment;
}
void TrackStep(std::vector<std::vector<cv::Point2f>>& keypoints,
std::vector<std::vector<float>>& scores, TrackInfo& track_info, int img_h, int img_w,
float iou_thr, int min_keypoints, int n_history) {
auto& tracks = track_info.tracks;
std::vector<Track> new_tracks;
new_tracks.reserve(tracks.size());
std::vector<std::array<float, 4>> bboxes;
bboxes.reserve(keypoints.size());
std::vector<int> indices;
indices.reserve(keypoints.size());
for (size_t i = 0; i < keypoints.size(); ++i) {
if (auto bbox = keypoints_to_bbox(keypoints[i], scores[i], img_h, img_w, 1.f, 0.f)) {
bboxes.push_back(*bbox);
indices.push_back(i);
}
}
const auto n_rows = static_cast<int>(bboxes.size());
const auto n_cols = static_cast<int>(tracks.size());
std::vector<float> similarities(n_rows * n_cols);
for (size_t i = 0; i < n_rows; ++i) {
for (size_t j = 0; j < n_cols; ++j) {
similarities[i * n_cols + j] = ComputeIoU(bboxes[i], tracks[j].bboxes.back());
}
}
const auto assignment = GreedyAssignment(similarities, n_rows, n_cols, iou_thr);
std::vector<int> used(n_rows);
for (auto [i, j, _] : assignment) {
auto k = indices[i];
UpdateTrack(tracks[j], keypoints[k], scores[k], bboxes[i], n_history);
new_tracks.push_back(std::move(tracks[j]));
used[i] = true;
}
for (size_t i = 0; i < used.size(); ++i) {
if (used[i] == 0) {
auto k = indices[i];
auto count = std::count_if(scores[k].begin(), scores[k].end(), [](auto x) { return x > 0; });
if (count >= min_keypoints) {
auto& track = new_tracks.emplace_back();
track.track_id = track_info.next_id++;
UpdateTrack(track, keypoints[k], scores[k], bboxes[i], n_history);
}
}
}
tracks = std::move(new_tracks);
}
Value TrackPose(const Value& result, Value state) {
assert(state.is_pointer());
assert(result.is_array());
std::vector<std::vector<cv::Point2f>> keypoints;
std::vector<std::vector<float>> scores;
for (auto& output : result.array()) {
auto& k = keypoints.emplace_back();
auto& s = scores.emplace_back();
for (auto& kpt : output["key_points"].array()) {
k.push_back(cv::Point2f{kpt["bbox"][0].get<float>(), kpt["bbox"][1].get<float>()});
s.push_back(kpt["score"].get<float>());
}
}
auto& track_info = state["track_info"].get_ref<TrackInfo&>();
auto img_h = state["img_shape"][0].get<int>();
auto img_w = state["img_shape"][1].get<int>();
auto iou_thr = state["iou_thr"].get<float>();
auto min_keypoints = state["min_keypoints"].get<int>();
auto n_history = state["n_history"].get<int>();
TrackStep(keypoints, scores, track_info, img_h, img_w, iou_thr, min_keypoints, n_history);
Value::Array targets;
for (const auto& track : track_info.tracks) {
if (auto bbox = keypoints_to_bbox(track.keypoints.back(), track.scores.back(), img_h, img_w)) {
Value::Array kpts;
kpts.reserve(track.keypoints.back().size());
for (const auto& kpt : track.keypoints.back()) {
kpts.push_back(kpt.x);
kpts.push_back(kpt.y);
}
targets.push_back({{"bbox", to_value(*bbox)}, {"keypoints", std::move(kpts)}});
}
}
return targets;
}
REGISTER_SIMPLE_MODULE(TrackPose, TrackPose);
class PoseTracker {
public:
using State = Value;
public:
PoseTracker(const Model& det_model, const Model& pose_model, Context context)
: pipeline_([&] {
context.Add("detection", det_model);
context.Add("pose", pose_model);
auto config = from_json<Value>(config_json);
return Pipeline{config, context};
}()) {}
State CreateState() { // NOLINT
return make_pointer({{"frame_id", 0},
{"n_history", 10},
{"iou_thr", .3f},
{"min_keypoints", 3},
{"track_info", TrackInfo{}}});
}
Value Track(const Mat& img, State& state, int use_detector = -1) {
assert(state.is_pointer());
framework::Mat mat(img.desc().height, img.desc().width,
static_cast<PixelFormat>(img.desc().format),
static_cast<DataType>(img.desc().type), {img.desc().data, [](void*) {}});
// TODO: get_ref<int&> is not working
auto frame_id = state["frame_id"].get<int>();
if (use_detector < 0) {
use_detector = frame_id % 10 == 0;
if (use_detector) {
MMDEPLOY_WARN("use detector");
}
}
state["frame_id"] = frame_id + 1;
state["img_shape"] = {mat.height(), mat.width()};
Value::Object data{{"ori_img", mat}};
Value input{{data}, {use_detector}, {state}};
return pipeline_.Apply(input)[0][0];
}
private:
Pipeline pipeline_;
};
} // namespace mmdeploy
using namespace mmdeploy;
void Visualize(cv::Mat& frame, const Value& result) {
static std::vector<std::pair<int, int>> skeleton{
{15, 13}, {13, 11}, {16, 14}, {14, 12}, {11, 12}, {5, 11}, {6, 12}, {5, 6}, {5, 7}, {6, 8},
{7, 9}, {8, 10}, {1, 2}, {0, 1}, {0, 2}, {1, 3}, {2, 4}, {3, 5}, {4, 6}};
const auto& targets = result.array();
for (const auto& target : targets) {
auto bbox = from_value<std::array<float, 4>>(target["bbox"]);
auto kpts = from_value<std::vector<float>>(target["keypoints"]);
cv::Point p1(bbox[0], bbox[1]);
cv::Point p2(bbox[2], bbox[3]);
cv::rectangle(frame, p1, p2, cv::Scalar(0, 255, 0));
for (int i = 0; i < kpts.size(); i += 2) {
cv::Point p(kpts[i], kpts[i + 1]);
cv::circle(frame, p, 1, cv::Scalar(0, 255, 255), 2, cv::LINE_AA);
}
for (int i = 0; i < skeleton.size(); ++i) {
auto [u, v] = skeleton[i];
cv::Point p_u(kpts[u * 2], kpts[u * 2 + 1]);
cv::Point p_v(kpts[v * 2], kpts[v * 2 + 1]);
cv::line(frame, p_u, p_v, cv::Scalar(0, 255, 255), 1, cv::LINE_AA);
}
}
cv::imshow("", frame);
cv::waitKey(10);
}
int main(int argc, char* argv[]) {
const auto device_name = argv[1];
const auto det_model_path = argv[2];
const auto pose_model_path = argv[3];
const auto video_path = argv[4];
Device device(device_name);
Context context(device);
auto pool = Scheduler::ThreadPool(4);
auto infer = Scheduler::Thread();
context.Add("pool", pool);
context.Add("infer", infer);
PoseTracker tracker(Model(det_model_path), Model(pose_model_path), context);
auto state = tracker.CreateState();
cv::Mat frame;
std::chrono::duration<double, std::milli> dt{};
int frame_id{};
cv::VideoCapture video(video_path);
while (true) {
video >> frame;
if (!frame.data) {
break;
}
auto t0 = std::chrono::high_resolution_clock::now();
auto result = tracker.Track(frame, state);
auto t1 = std::chrono::high_resolution_clock::now();
dt += t1 - t0;
++frame_id;
Visualize(frame, result);
}
MMDEPLOY_INFO("frames: {}, time {} ms", frame_id, dt.count());
}