[Feature] Pose tracker C/C++/Python API&demos (#1663)

* add PoseTracker API * add mahalanobis distance, add det_pose demo * simplify api * simplify api * fix cmake & fix `CropResizePad` * ignore out of frame bboxes * clean-up * fix lint * add c api docs * add c++ api docs/comments * fix gcc7 build * fix gcc7+opencv3 * fix stupid lint * fix ci * add help info & webcam support for C++ pose tracker demo * add webcam support for Python pose tracker demo * fix lint * minor * minor * fix MSVC build * fix python binding * simplify module adapter * fix module adapter * minor fix
2023-01-31 11:24:24 +08:00 · 2023-01-31 11:24:24 +08:00 · 3d425bbb9f
parent 093badf90c
commit 3d425bbb9f
55 changed files with 2724 additions and 1202 deletions
--- a/csrc/mmdeploy/apis/c/CMakeLists.txt
+++ b/csrc/mmdeploy/apis/c/CMakeLists.txt
@ -12,6 +12,9 @@ macro(add_object name)
        target_compile_options(${name} PRIVATE $<$<COMPILE_LANGUAGE:CXX>:-fvisibility=hidden>)
    endif ()
    target_link_libraries(${name} PRIVATE mmdeploy::core)
+    target_include_directories(${name} PUBLIC
+            $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}>
+            $<INSTALL_INTERFACE:include>)
    set(CAPI_OBJS ${CAPI_OBJS} ${name})
    mmdeploy_export(${name})
 endmacro()
@ -31,6 +34,7 @@ foreach (TASK ${COMMON_LIST})
    mmdeploy_add_library(${TARGET_NAME})
    target_link_libraries(${TARGET_NAME} PRIVATE ${OBJECT_NAME})
    target_include_directories(${TARGET_NAME} PUBLIC
+            $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}>
            $<INSTALL_INTERFACE:include>)
    install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/mmdeploy/${TASK}.h
            DESTINATION include/mmdeploy)
--- a/csrc/mmdeploy/apis/c/mmdeploy/classifier.cpp
+++ b/csrc/mmdeploy/apis/c/mmdeploy/classifier.cpp
@ -1,17 +1,17 @@
 // Copyright (c) OpenMMLab. All rights reserved.

-#include "classifier.h"
+#include "mmdeploy/classifier.h"

 #include <numeric>

-#include "common_internal.h"
-#include "handle.h"
 #include "mmdeploy/archive/value_archive.h"
 #include "mmdeploy/codebase/mmcls/mmcls.h"
+#include "mmdeploy/common_internal.h"
 #include "mmdeploy/core/device.h"
 #include "mmdeploy/core/graph.h"
 #include "mmdeploy/core/utils/formatter.h"
-#include "pipeline.h"
+#include "mmdeploy/handle.h"
+#include "mmdeploy/pipeline.h"

 using namespace mmdeploy;
 using namespace std;
--- a/csrc/mmdeploy/apis/c/mmdeploy/classifier.h
+++ b/csrc/mmdeploy/apis/c/mmdeploy/classifier.h
@ -8,9 +8,9 @@
 #ifndef MMDEPLOY_CLASSIFIER_H
 #define MMDEPLOY_CLASSIFIER_H

-#include "common.h"
-#include "executor.h"
-#include "model.h"
+#include "mmdeploy/common.h"
+#include "mmdeploy/executor.h"
+#include "mmdeploy/model.h"

 #ifdef __cplusplus
 extern "C" {
--- a/csrc/mmdeploy/apis/c/mmdeploy/common.cpp
+++ b/csrc/mmdeploy/apis/c/mmdeploy/common.cpp
@ -1,9 +1,9 @@
-#include "common.h"
+#include "mmdeploy/common.h"

-#include "common_internal.h"
-#include "executor_internal.h"
+#include "mmdeploy/common_internal.h"
 #include "mmdeploy/core/mat.h"
 #include "mmdeploy/core/profiler.h"
+#include "mmdeploy/executor_internal.h"

 mmdeploy_value_t mmdeploy_value_copy(mmdeploy_value_t value) {
  if (!value) {
--- a/csrc/mmdeploy/apis/c/mmdeploy/common_internal.h
+++ b/csrc/mmdeploy/apis/c/mmdeploy/common_internal.h
@ -3,12 +3,12 @@
 #ifndef MMDEPLOY_CSRC_APIS_C_COMMON_INTERNAL_H_
 #define MMDEPLOY_CSRC_APIS_C_COMMON_INTERNAL_H_

-#include "common.h"
-#include "handle.h"
+#include "mmdeploy/common.h"
 #include "mmdeploy/core/mat.h"
 #include "mmdeploy/core/value.h"
-#include "model.h"
-#include "pipeline.h"
+#include "mmdeploy/handle.h"
+#include "mmdeploy/model.h"
+#include "mmdeploy/pipeline.h"

 using namespace mmdeploy;

--- a/csrc/mmdeploy/apis/c/mmdeploy/detector.cpp
+++ b/csrc/mmdeploy/apis/c/mmdeploy/detector.cpp
@ -1,6 +1,6 @@
 // Copyright (c) OpenMMLab. All rights reserved.

-#include "detector.h"
+#include "mmdeploy/detector.h"

 #include <deque>
 #include <numeric>
--- a/csrc/mmdeploy/apis/c/mmdeploy/detector.h
+++ b/csrc/mmdeploy/apis/c/mmdeploy/detector.h
@ -8,9 +8,9 @@
 #ifndef MMDEPLOY_DETECTOR_H
 #define MMDEPLOY_DETECTOR_H

-#include "common.h"
-#include "executor.h"
-#include "model.h"
+#include "mmdeploy/common.h"
+#include "mmdeploy/executor.h"
+#include "mmdeploy/model.h"

 #ifdef __cplusplus
 extern "C" {
--- a/csrc/mmdeploy/apis/c/mmdeploy/executor.cpp
+++ b/csrc/mmdeploy/apis/c/mmdeploy/executor.cpp
@ -1,11 +1,11 @@
 // Copyright (c) OpenMMLab. All rights reserved.

-#include "executor.h"
+#include "mmdeploy/executor.h"

-#include "common.h"
-#include "common_internal.h"
-#include "executor_internal.h"
+#include "mmdeploy/common.h"
+#include "mmdeploy/common_internal.h"
 #include "mmdeploy/execution/when_all_value.h"
+#include "mmdeploy/executor_internal.h"

 using namespace mmdeploy;

--- a/csrc/mmdeploy/apis/c/mmdeploy/executor.h
+++ b/csrc/mmdeploy/apis/c/mmdeploy/executor.h
@ -3,7 +3,7 @@
 #ifndef MMDEPLOY_CSRC_APIS_C_EXECUTOR_H_
 #define MMDEPLOY_CSRC_APIS_C_EXECUTOR_H_

-#include "common.h"
+#include "mmdeploy/common.h"

 #if __cplusplus
 extern "C" {
--- a/csrc/mmdeploy/apis/c/mmdeploy/executor_internal.h
+++ b/csrc/mmdeploy/apis/c/mmdeploy/executor_internal.h
@ -3,8 +3,8 @@
 #ifndef MMDEPLOY_CSRC_APIS_C_EXECUTOR_INTERNAL_H_
 #define MMDEPLOY_CSRC_APIS_C_EXECUTOR_INTERNAL_H_

-#include "executor.h"
 #include "mmdeploy/execution/schedulers/registry.h"
+#include "mmdeploy/executor.h"

 using namespace mmdeploy;

--- a/csrc/mmdeploy/apis/c/mmdeploy/model.cpp
+++ b/csrc/mmdeploy/apis/c/mmdeploy/model.cpp
@ -1,11 +1,11 @@
 // Copyright (c) OpenMMLab. All rights reserved.

 // clang-format off
-#include "model.h"
+#include "mmdeploy/model.h"

 #include <memory>

-#include "common_internal.h"
+#include "mmdeploy/common_internal.h"
 #include "mmdeploy/core/logger.h"
 #include "mmdeploy/core/model.h"
 // clang-format on
--- a/csrc/mmdeploy/apis/c/mmdeploy/model.h
+++ b/csrc/mmdeploy/apis/c/mmdeploy/model.h
@ -8,7 +8,7 @@
 #ifndef MMDEPLOY_SRC_APIS_C_MODEL_H_
 #define MMDEPLOY_SRC_APIS_C_MODEL_H_

-#include "common.h"
+#include "mmdeploy/common.h"

 #ifdef __cplusplus
 extern "C" {
--- a/csrc/mmdeploy/apis/c/mmdeploy/pipeline.cpp
+++ b/csrc/mmdeploy/apis/c/mmdeploy/pipeline.cpp
@ -1,10 +1,10 @@
 // Copyright (c) OpenMMLab. All rights reserved.

-#include "pipeline.h"
+#include "mmdeploy/pipeline.h"

-#include "common_internal.h"
-#include "executor_internal.h"
-#include "handle.h"
+#include "mmdeploy/common_internal.h"
+#include "mmdeploy/executor_internal.h"
+#include "mmdeploy/handle.h"

 int mmdeploy_pipeline_create_v3(mmdeploy_value_t config, mmdeploy_context_t context,
                                mmdeploy_pipeline_t* pipeline) {
--- a/csrc/mmdeploy/apis/c/mmdeploy/pipeline.h
+++ b/csrc/mmdeploy/apis/c/mmdeploy/pipeline.h
@ -3,8 +3,8 @@
 #ifndef MMDEPLOY_CSRC_APIS_C_PIPELINE_H_
 #define MMDEPLOY_CSRC_APIS_C_PIPELINE_H_

-#include "common.h"
-#include "executor.h"
+#include "mmdeploy/common.h"
+#include "mmdeploy/executor.h"

 #ifdef __cplusplus
 extern "C" {
--- a/csrc/mmdeploy/apis/c/mmdeploy/pose_detector.cpp
+++ b/csrc/mmdeploy/apis/c/mmdeploy/pose_detector.cpp
@ -1,17 +1,17 @@
 // Copyright (c) OpenMMLab. All rights reserved.

-#include "pose_detector.h"
+#include "mmdeploy/pose_detector.h"

 #include <numeric>

-#include "common_internal.h"
-#include "handle.h"
 #include "mmdeploy/codebase/mmpose/mmpose.h"
+#include "mmdeploy/common_internal.h"
 #include "mmdeploy/core/device.h"
 #include "mmdeploy/core/graph.h"
 #include "mmdeploy/core/mat.h"
 #include "mmdeploy/core/utils/formatter.h"
-#include "pipeline.h"
+#include "mmdeploy/handle.h"
+#include "mmdeploy/pipeline.h"

 using namespace std;
 using namespace mmdeploy;
--- a/csrc/mmdeploy/apis/c/mmdeploy/pose_detector.h
+++ b/csrc/mmdeploy/apis/c/mmdeploy/pose_detector.h
@ -8,9 +8,9 @@
 #ifndef MMDEPLOY_SRC_APIS_C_POSE_DETECTOR_H_
 #define MMDEPLOY_SRC_APIS_C_POSE_DETECTOR_H_

-#include "common.h"
-#include "executor.h"
-#include "model.h"
+#include "mmdeploy/common.h"
+#include "mmdeploy/executor.h"
+#include "mmdeploy/model.h"

 #ifdef __cplusplus
 extern "C" {
--- a/csrc/mmdeploy/apis/c/mmdeploy/pose_tracker.cpp
+++ b/csrc/mmdeploy/apis/c/mmdeploy/pose_tracker.cpp
@ -0,0 +1,225 @@
+// Copyright (c) OpenMMLab. All rights reserved.
+
+#include "mmdeploy/pose_tracker.h"
+
+#include "mmdeploy/archive/json_archive.h"
+#include "mmdeploy/archive/value_archive.h"
+#include "mmdeploy/codebase/mmpose/pose_tracker/common.h"
+#include "mmdeploy/common_internal.h"
+#include "mmdeploy/core/mpl/structure.h"
+#include "mmdeploy/pipeline.h"
+
+namespace mmdeploy {
+
+using namespace framework;
+
+}  // namespace mmdeploy
+
+using namespace mmdeploy;
+
+namespace {
+
+Value config_template() {
+  static const auto json = R"(
+{
+  "type": "Pipeline",
+  "input": ["img", "force_det", "state"],
+  "output": "targets",
+  "tasks": [
+    {
+      "type": "Task",
+      "name": "prepare",
+      "module": "pose_tracker::Prepare",
+      "input": ["img", "force_det", "state"],
+      "output": "use_det"
+    },
+    {
+      "type": "Task",
+      "module": "Transform",
+      "name": "preload",
+      "input": "img",
+      "output": "data",
+      "transforms": [ { "type": "LoadImageFromFile" } ]
+    },
+    {
+      "type": "Cond",
+      "name": "cond",
+      "input": ["use_det", "data"],
+      "output": "dets",
+      "body": {
+        "name": "detection",
+        "type": "Inference",
+        "params": { "model": "detection" }
+      }
+    },
+    {
+      "type": "Task",
+      "name": "process_bboxes",
+      "module": "pose_tracker::ProcessBboxes",
+      "input": ["dets", "data", "state"],
+      "output": ["rois", "track_ids"]
+    },
+    {
+      "input": "*rois",
+      "output": "*keypoints",
+      "name": "pose",
+      "type": "Inference",
+      "params": { "model": "pose" }
+    },
+    {
+      "type": "Task",
+      "name": "track_step",
+      "module": "pose_tracker::TrackStep",
+      "scheduler": "pool",
+      "input": ["keypoints", "track_ids", "state"],
+      "output": "targets"
+    }
+  ]
+}
+)"_json;
+  static const auto config = from_json<Value>(json);
+  return config;
+}
+
+}  // namespace
+
+int mmdeploy_pose_tracker_default_params(mmdeploy_pose_tracker_param_t* params) {
+  mmpose::_pose_tracker::SetDefaultParams(*params);
+  return 0;
+}
+
+int mmdeploy_pose_tracker_create(mmdeploy_model_t det_model, mmdeploy_model_t pose_model,
+                                 mmdeploy_context_t context, mmdeploy_pose_tracker_t* pipeline) {
+  mmdeploy_context_add(context, MMDEPLOY_TYPE_MODEL, "detection", det_model);
+  mmdeploy_context_add(context, MMDEPLOY_TYPE_MODEL, "pose", pose_model);
+  auto config = config_template();
+  return mmdeploy_pipeline_create_v3(Cast(&config), context, (mmdeploy_pipeline_t*)pipeline);
+}
+
+void mmdeploy_pose_tracker_destroy(mmdeploy_pose_tracker_t pipeline) {
+  mmdeploy_pipeline_destroy((mmdeploy_pipeline_t)pipeline);
+}
+
+int mmdeploy_pose_tracker_create_state(mmdeploy_pose_tracker_t pipeline,
+                                       const mmdeploy_pose_tracker_param_t* params,
+                                       mmdeploy_pose_tracker_state_t* state) {
+  try {
+    auto create_fn = gRegistry<Module>().Create("pose_tracker::Create", Value()).value();
+    *state = reinterpret_cast<mmdeploy_pose_tracker_state_t>(new Value(
+        create_fn->Process({const_cast<mmdeploy_pose_tracker_param_t*>(params)}).value()[0]));
+    return MMDEPLOY_SUCCESS;
+  } catch (const std::exception& e) {
+    MMDEPLOY_ERROR("unhandled exception: {}", e.what());
+  } catch (...) {
+    MMDEPLOY_ERROR("unknown exception caught");
+  }
+  return MMDEPLOY_E_FAIL;
+}
+
+void mmdeploy_pose_tracker_destroy_state(mmdeploy_pose_tracker_state_t state) {
+  delete reinterpret_cast<Value*>(state);
+}
+
+int mmdeploy_pose_tracker_create_input(mmdeploy_pose_tracker_state_t* states,
+                                       const mmdeploy_mat_t* frames, const int32_t* use_detect,
+                                       int batch_size, mmdeploy_value_t* value) {
+  try {
+    Value::Array images;
+    Value::Array use_dets;
+    Value::Array trackers;
+    for (int i = 0; i < batch_size; ++i) {
+      images.push_back({{"ori_img", Cast(frames[i])}});
+      use_dets.emplace_back(use_detect ? use_detect[i] : -1);
+      trackers.push_back(*reinterpret_cast<Value*>(states[i]));
+    }
+    *value = Take(Value{std::move(images), std::move(use_dets), std::move(trackers)});
+    return MMDEPLOY_SUCCESS;
+  } catch (const std::exception& e) {
+    MMDEPLOY_ERROR("unhandled exception: {}", e.what());
+  } catch (...) {
+    MMDEPLOY_ERROR("unknown exception caught");
+  }
+  return MMDEPLOY_E_FAIL;
+}
+
+using ResultType = mmdeploy::Structure<mmdeploy_pose_tracker_target_t, std::vector<int32_t>,
+                                       std::vector<mmpose::_pose_tracker::TrackerResult>>;
+
+int mmdeploy_pose_tracker_get_result(mmdeploy_value_t output,
+                                     mmdeploy_pose_tracker_target_t** results,
+                                     int32_t** result_count) {
+  if (!output || !results) {
+    return MMDEPLOY_E_INVALID_ARG;
+  }
+  try {
+    // convert result from Values
+    std::vector<mmpose::_pose_tracker::TrackerResult> res;
+    from_value(Cast(output)->front(), res);
+
+    size_t total = 0;
+    for (const auto& r : res) {
+      total += r.bboxes.size();
+    }
+
+    // preserve space for the output structure
+    ResultType result_type({total, 1, 1});
+    auto [result_data, result_cnt, result_holder] = result_type.pointers();
+
+    auto result_ptr = result_data;
+
+    result_holder->swap(res);
+
+    // build output structure
+    for (auto& r : *result_holder) {
+      for (int j = 0; j < r.bboxes.size(); ++j) {
+        auto& p = *result_ptr++;
+        p.keypoint_count = static_cast<int32_t>(r.keypoints[j].size());
+        p.keypoints = r.keypoints[j].data();
+        p.scores = r.scores[j].data();
+        p.bbox = r.bboxes[j];
+        p.target_id = r.track_ids[j];
+      }
+      result_cnt->push_back(r.bboxes.size());
+      // debug info
+      //  p.reserved0 = new std::vector(r.pose_input_bboxes);
+      //  p.reserved1 = new std::vector(r.pose_output_bboxes);
+    }
+
+    *results = result_data;
+    *result_count = result_cnt->data();
+    result_type.release();
+
+    return MMDEPLOY_SUCCESS;
+
+  } catch (const std::exception& e) {
+    MMDEPLOY_ERROR("unhandled exception: {}", e.what());
+  } catch (...) {
+    MMDEPLOY_ERROR("unknown exception caught");
+  }
+  return MMDEPLOY_E_FAIL;
+}
+
+int mmdeploy_pose_tracker_apply(mmdeploy_pose_tracker_t pipeline,
+                                mmdeploy_pose_tracker_state_t* states, const mmdeploy_mat_t* frames,
+                                const int32_t* use_detect, int32_t count,
+                                mmdeploy_pose_tracker_target_t** results, int32_t** result_count) {
+  wrapped<mmdeploy_value_t> input;
+  if (auto ec =
+          mmdeploy_pose_tracker_create_input(states, frames, use_detect, count, input.ptr())) {
+    return ec;
+  }
+  wrapped<mmdeploy_value_t> output;
+  if (auto ec = mmdeploy_pipeline_apply((mmdeploy_pipeline_t)pipeline, input, output.ptr())) {
+    return ec;
+  }
+  if (auto ec = mmdeploy_pose_tracker_get_result(output, results, result_count)) {
+    return ec;
+  }
+  return MMDEPLOY_SUCCESS;
+}
+
+void mmdeploy_pose_tracker_release_result(mmdeploy_pose_tracker_target_t* results,
+                                          const int32_t* result_count, int count) {
+  auto total = std::accumulate(result_count, result_count + count, 0);
+  ResultType deleter({static_cast<size_t>(total), 1, 1}, results);
+}
--- a/csrc/mmdeploy/apis/c/mmdeploy/pose_tracker.h
+++ b/csrc/mmdeploy/apis/c/mmdeploy/pose_tracker.h
@ -0,0 +1,158 @@
+// Copyright (c) OpenMMLab. All rights reserved.
+
+/**
+ * @file pose_tracker.h
+ * @brief Pose tracker C API
+ */
+
+#ifndef MMDEPLOY_POSE_TRACKER_H
+#define MMDEPLOY_POSE_TRACKER_H
+
+#include "mmdeploy/common.h"
+#include "mmdeploy/detector.h"
+#include "mmdeploy/model.h"
+#include "mmdeploy/pose_detector.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct mmdeploy_pose_tracker* mmdeploy_pose_tracker_t;
+typedef struct mmdeploy_pose_tracker_state* mmdeploy_pose_tracker_state_t;
+
+typedef struct mmdeploy_pose_tracker_param_t {
+  // detection interval, default = 1
+  int32_t det_interval;
+  // detection label use for pose estimation, default = 0
+  int32_t det_label;
+  // detection score threshold, default = 0.5
+  float det_thr;
+  // detection minimum bbox size (compute as sqrt(area)), default = -1
+  float det_min_bbox_size;
+  // nms iou threshold for merging detected bboxes and bboxes from tracked targets, default = 0.7
+  float det_nms_thr;
+
+  // max number of bboxes used for pose estimation per frame, default = -1
+  int32_t pose_max_num_bboxes;
+  // threshold for visible key-points, default = 0.5
+  float pose_kpt_thr;
+  // min number of key-points for valid poses, default = -1
+  int32_t pose_min_keypoints;
+  // scale for expanding key-points to bbox, default = 1.25
+  float pose_bbox_scale;
+  // min pose bbox size, tracks with bbox size smaller than the threshold will be dropped,
+  // default = -1
+  float pose_min_bbox_size;
+  // nms oks/iou threshold for suppressing overlapped poses, useful when multiple pose estimations
+  // collapse to the same target, default = 0.5
+  float pose_nms_thr;
+  // keypoint sigmas for computing OKS, will use IOU if not set, default = nullptr
+  float* keypoint_sigmas;
+  // size of keypoint sigma array, must be consistent with the number of key-points, default = 0
+  int32_t keypoint_sigmas_size;
+
+  // iou threshold for associating missing tracks, default = 0.4
+  float track_iou_thr;
+  // max number of missing frames before a missing tracks is removed, default = 10
+  int32_t track_max_missing;
+  // track history size, default = 1
+  int32_t track_history_size;
+
+  // weight of position for setting covariance matrices of kalman filters, default = 0.05
+  float std_weight_position;
+  // weight of velocity for setting covariance matrices of kalman filters, default = 0.00625
+  float std_weight_velocity;
+
+  // params for the one-euro filter for smoothing the outputs - (beta, fc_min, fc_derivative)
+  // default = (0.007, 1, 1)
+  float smooth_params[3];
+} mmdeploy_pose_tracker_param_t;
+
+typedef struct mmdeploy_pose_tracker_target_t {
+  mmdeploy_point_t* keypoints;  // key-points of the target
+  int32_t keypoint_count;       // size of `keypoints` array
+  float* scores;                // scores of each key-point
+  mmdeploy_rect_t bbox;         // estimated bbox from key-points
+  uint32_t target_id;           // target id from internal tracker
+} mmdeploy_pose_tracker_target_t;
+
+/**
+ * @brief Fill params with default parameters
+ * @param[in,out] params
+ * @return status of the operation
+ */
+MMDEPLOY_API int mmdeploy_pose_tracker_default_params(mmdeploy_pose_tracker_param_t* params);
+
+/**
+ * @brief Create pose tracker pipeline
+ * @param[in] det_model detection model object, created by \ref mmdeploy_model_create
+ * @param[in] pose_model pose model object
+ * @param[in] context context object describing execution environment (device, profiler, etc...),
+ * created by \ref mmdeploy_context_create
+ * @param[out] pipeline handle of the created pipeline
+ * @return status of the operation
+ */
+MMDEPLOY_API int mmdeploy_pose_tracker_create(mmdeploy_model_t det_model,
+                                              mmdeploy_model_t pose_model,
+                                              mmdeploy_context_t context,
+                                              mmdeploy_pose_tracker_t* pipeline);
+
+/**
+ * @brief Destroy pose tracker pipeline
+ * @param[in] pipeline
+ */
+MMDEPLOY_API void mmdeploy_pose_tracker_destroy(mmdeploy_pose_tracker_t pipeline);
+
+/**
+ * @brief Create a tracker state handle corresponds to a video stream
+ * @param[in] pipeline handle of a pose tracker pipeline
+ * @param[in] params params for creating the tracker state
+ * @param[out] state handle of the created tracker state
+ * @return status of the operation
+ */
+MMDEPLOY_API int mmdeploy_pose_tracker_create_state(mmdeploy_pose_tracker_t pipeline,
+                                                    const mmdeploy_pose_tracker_param_t* params,
+                                                    mmdeploy_pose_tracker_state_t* state);
+
+/**
+ * @brief Destroy tracker state
+ * @param[in] state handle of the tracker state
+ */
+MMDEPLOY_API void mmdeploy_pose_tracker_destroy_state(mmdeploy_pose_tracker_state_t state);
+
+/**
+ * @brief Apply pose tracker pipeline, notice that this function supports batch operation by feeding
+ * arrays of size \p count to \p states, \p frames and \p use_detect
+ * @param[in] pipeline handle of a pose tracker pipeline
+ * @param[in] states tracker states handles, array of size \p count
+ * @param[in] frames input frames of size \p count
+ * @param[in] use_detect control the use of detector, array of size \p count
+ *   -1: use params.det_interval, 0: don't use detector, 1: force use detector
+ * @param[in] count batch size
+ * @param[out] results a linear buffer contains the tracked targets of input frames. Should be
+ * released by \ref mmdeploy_pose_tracker_release_result
+ * @param[out] result_count a linear buffer of size \p count contains the number of tracked
+ * targets of the frames. Should be released by \ref mmdeploy_pose_tracker_release_result
+ * @return status of the operation
+ */
+MMDEPLOY_API int mmdeploy_pose_tracker_apply(mmdeploy_pose_tracker_t pipeline,
+                                             mmdeploy_pose_tracker_state_t* states,
+                                             const mmdeploy_mat_t* frames,
+                                             const int32_t* use_detect, int32_t count,
+                                             mmdeploy_pose_tracker_target_t** results,
+                                             int32_t** result_count);
+
+/**
+ * @brief Release result objects
+ * @param[in] results
+ * @param[in] result_count
+ * @param[in] count
+ */
+MMDEPLOY_API void mmdeploy_pose_tracker_release_result(mmdeploy_pose_tracker_target_t* results,
+                                                       const int32_t* result_count, int count);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  // MMDEPLOY_POSE_TRACKER_H
--- a/csrc/mmdeploy/apis/c/mmdeploy/restorer.cpp
+++ b/csrc/mmdeploy/apis/c/mmdeploy/restorer.cpp
@ -1,16 +1,16 @@
 // Copyright (c) OpenMMLab. All rights reserved.

-#include "restorer.h"
+#include "mmdeploy/restorer.h"

-#include "common_internal.h"
-#include "executor_internal.h"
-#include "handle.h"
 #include "mmdeploy/codebase/mmedit/mmedit.h"
+#include "mmdeploy/common_internal.h"
 #include "mmdeploy/core/device.h"
 #include "mmdeploy/core/graph.h"
 #include "mmdeploy/core/mpl/structure.h"
 #include "mmdeploy/core/utils/formatter.h"
-#include "pipeline.h"
+#include "mmdeploy/executor_internal.h"
+#include "mmdeploy/handle.h"
+#include "mmdeploy/pipeline.h"

 using namespace mmdeploy;

--- a/csrc/mmdeploy/apis/c/mmdeploy/restorer.h
+++ b/csrc/mmdeploy/apis/c/mmdeploy/restorer.h
@ -8,9 +8,9 @@
 #ifndef MMDEPLOY_SRC_APIS_C_RESTORER_H_
 #define MMDEPLOY_SRC_APIS_C_RESTORER_H_

-#include "common.h"
-#include "executor.h"
-#include "model.h"
+#include "mmdeploy/common.h"
+#include "mmdeploy/executor.h"
+#include "mmdeploy/model.h"

 #ifdef __cplusplus
 extern "C" {
--- a/csrc/mmdeploy/apis/c/mmdeploy/rotated_detector.cpp
+++ b/csrc/mmdeploy/apis/c/mmdeploy/rotated_detector.cpp
@ -1,16 +1,16 @@
 // Copyright (c) OpenMMLab. All rights reserved.

-#include "rotated_detector.h"
+#include "mmdeploy/rotated_detector.h"

 #include <numeric>

-#include "common_internal.h"
-#include "handle.h"
 #include "mmdeploy/codebase/mmrotate/mmrotate.h"
+#include "mmdeploy/common_internal.h"
 #include "mmdeploy/core/graph.h"
 #include "mmdeploy/core/mat.h"
 #include "mmdeploy/core/utils/formatter.h"
-#include "pipeline.h"
+#include "mmdeploy/handle.h"
+#include "mmdeploy/pipeline.h"

 using namespace std;
 using namespace mmdeploy;
--- a/csrc/mmdeploy/apis/c/mmdeploy/rotated_detector.h
+++ b/csrc/mmdeploy/apis/c/mmdeploy/rotated_detector.h
@ -8,9 +8,9 @@
 #ifndef MMDEPLOY_SRC_APIS_C_ROTATED_DETECTOR_H_
 #define MMDEPLOY_SRC_APIS_C_ROTATED_DETECTOR_H_

-#include "common.h"
-#include "executor.h"
-#include "model.h"
+#include "mmdeploy/common.h"
+#include "mmdeploy/executor.h"
+#include "mmdeploy/model.h"

 #ifdef __cplusplus
 extern "C" {
--- a/csrc/mmdeploy/apis/c/mmdeploy/segmentor.cpp
+++ b/csrc/mmdeploy/apis/c/mmdeploy/segmentor.cpp
@ -1,17 +1,17 @@
 // Copyright (c) OpenMMLab. All rights reserved.

-#include "segmentor.h"
+#include "mmdeploy/segmentor.h"

-#include "common_internal.h"
-#include "handle.h"
 #include "mmdeploy/codebase/mmseg/mmseg.h"
+#include "mmdeploy/common_internal.h"
 #include "mmdeploy/core/device.h"
 #include "mmdeploy/core/graph.h"
 #include "mmdeploy/core/mat.h"
 #include "mmdeploy/core/mpl/structure.h"
 #include "mmdeploy/core/tensor.h"
 #include "mmdeploy/core/utils/formatter.h"
-#include "pipeline.h"
+#include "mmdeploy/handle.h"
+#include "mmdeploy/pipeline.h"

 using namespace std;
 using namespace mmdeploy;
--- a/csrc/mmdeploy/apis/c/mmdeploy/segmentor.h
+++ b/csrc/mmdeploy/apis/c/mmdeploy/segmentor.h
@ -8,9 +8,9 @@
 #ifndef MMDEPLOY_SEGMENTOR_H
 #define MMDEPLOY_SEGMENTOR_H

-#include "common.h"
-#include "executor.h"
-#include "model.h"
+#include "mmdeploy/common.h"
+#include "mmdeploy/executor.h"
+#include "mmdeploy/model.h"

 #ifdef __cplusplus
 extern "C" {
--- a/csrc/mmdeploy/apis/c/mmdeploy/text_detector.cpp
+++ b/csrc/mmdeploy/apis/c/mmdeploy/text_detector.cpp
@ -1,17 +1,17 @@
 // Copyright (c) OpenMMLab. All rights reserved.

-#include "text_detector.h"
+#include "mmdeploy/text_detector.h"

 #include <numeric>

-#include "common_internal.h"
-#include "executor_internal.h"
 #include "mmdeploy/codebase/mmocr/mmocr.h"
+#include "mmdeploy/common_internal.h"
 #include "mmdeploy/core/model.h"
 #include "mmdeploy/core/status_code.h"
 #include "mmdeploy/core/utils/formatter.h"
-#include "model.h"
-#include "pipeline.h"
+#include "mmdeploy/executor_internal.h"
+#include "mmdeploy/model.h"
+#include "mmdeploy/pipeline.h"

 using namespace std;
 using namespace mmdeploy;
--- a/csrc/mmdeploy/apis/c/mmdeploy/text_detector.h
+++ b/csrc/mmdeploy/apis/c/mmdeploy/text_detector.h
@ -8,9 +8,9 @@
 #ifndef MMDEPLOY_TEXT_DETECTOR_H
 #define MMDEPLOY_TEXT_DETECTOR_H

-#include "common.h"
-#include "executor.h"
-#include "model.h"
+#include "mmdeploy/common.h"
+#include "mmdeploy/executor.h"
+#include "mmdeploy/model.h"

 #ifdef __cplusplus
 extern "C" {
--- a/csrc/mmdeploy/apis/c/mmdeploy/text_recognizer.cpp
+++ b/csrc/mmdeploy/apis/c/mmdeploy/text_recognizer.cpp
@ -1,21 +1,21 @@
 // Copyright (c) OpenMMLab. All rights reserved.

-#include "text_recognizer.h"
+#include "mmdeploy/text_recognizer.h"

 #include <numeric>

-#include "common_internal.h"
-#include "executor_internal.h"
 #include "mmdeploy/archive/value_archive.h"
 #include "mmdeploy/codebase/mmocr/mmocr.h"
+#include "mmdeploy/common_internal.h"
 #include "mmdeploy/core/device.h"
 #include "mmdeploy/core/mat.h"
 #include "mmdeploy/core/model.h"
 #include "mmdeploy/core/status_code.h"
 #include "mmdeploy/core/utils/formatter.h"
 #include "mmdeploy/core/value.h"
-#include "model.h"
-#include "pipeline.h"
+#include "mmdeploy/executor_internal.h"
+#include "mmdeploy/model.h"
+#include "mmdeploy/pipeline.h"

 using namespace mmdeploy;

--- a/csrc/mmdeploy/apis/c/mmdeploy/text_recognizer.h
+++ b/csrc/mmdeploy/apis/c/mmdeploy/text_recognizer.h
@ -8,9 +8,9 @@
 #ifndef MMDEPLOY_SRC_APIS_C_TEXT_RECOGNIZER_H_
 #define MMDEPLOY_SRC_APIS_C_TEXT_RECOGNIZER_H_

-#include "common.h"
-#include "executor.h"
-#include "text_detector.h"
+#include "mmdeploy/common.h"
+#include "mmdeploy/executor.h"
+#include "mmdeploy/text_detector.h"

 #ifdef __cplusplus
 extern "C" {
--- a/csrc/mmdeploy/apis/c/mmdeploy/video_recognizer.cpp
+++ b/csrc/mmdeploy/apis/c/mmdeploy/video_recognizer.cpp
@ -1,22 +1,22 @@
 // Copyright (c) OpenMMLab. All rights reserved.

-#include "video_recognizer.h"
+#include "mmdeploy/video_recognizer.h"

 #include <numeric>
 #include <vector>

-#include "common_internal.h"
-#include "executor_internal.h"
 #include "mmdeploy/archive/value_archive.h"
 #include "mmdeploy/codebase/mmaction/mmaction.h"
+#include "mmdeploy/common_internal.h"
 #include "mmdeploy/core/device.h"
 #include "mmdeploy/core/mat.h"
 #include "mmdeploy/core/model.h"
 #include "mmdeploy/core/status_code.h"
 #include "mmdeploy/core/utils/formatter.h"
 #include "mmdeploy/core/value.h"
-#include "model.h"
-#include "pipeline.h"
+#include "mmdeploy/executor_internal.h"
+#include "mmdeploy/model.h"
+#include "mmdeploy/pipeline.h"

 using namespace mmdeploy;

--- a/csrc/mmdeploy/apis/c/mmdeploy/video_recognizer.h
+++ b/csrc/mmdeploy/apis/c/mmdeploy/video_recognizer.h
@ -8,9 +8,9 @@
 #ifndef MMDEPLOY_VIDEO_RECOGNIZER_H
 #define MMDEPLOY_VIDEO_RECOGNIZER_H

-#include "common.h"
-#include "executor.h"
-#include "model.h"
+#include "mmdeploy/common.h"
+#include "mmdeploy/executor.h"
+#include "mmdeploy/model.h"

 #ifdef __cplusplus
 extern "C" {
--- a/csrc/mmdeploy/apis/cxx/mmdeploy/common.hpp
+++ b/csrc/mmdeploy/apis/cxx/mmdeploy/common.hpp
@ -28,6 +28,30 @@ namespace cxx {

 using Rect = mmdeploy_rect_t;

+template <typename T>
+class UniqueHandle : public NonCopyable {
+ public:
+  UniqueHandle() = default;
+  explicit UniqueHandle(T handle) : handle_(handle) {}
+
+  // derived class must destroy the object and reset `handle_`
+  ~UniqueHandle() { assert(handle_ == nullptr); }
+
+  UniqueHandle(UniqueHandle&& o) noexcept : handle_(std::exchange(o.handle_, nullptr)) {}
+  UniqueHandle& operator=(UniqueHandle&& o) noexcept {
+    if (this != &o) {
+      handle_ = std::exchange(o.handle_, nullptr);
+    }
+    return *this;
+  }
+
+  explicit operator T() const noexcept { return handle_; }
+  T operator->() const noexcept { return handle_; }
+
+ protected:
+  T handle_{};
+};
+
 class Model {
 public:
  explicit Model(const char* path) {
@ -39,6 +63,8 @@ class Model {
    model_.reset(model, [](auto p) { mmdeploy_model_destroy(p); });
  }

+  explicit Model(const std::string& path) : Model(path.c_str()) {}
+
  Model(const void* buffer, size_t size) {
    mmdeploy_model_t model{};
    auto ec = mmdeploy_model_create(buffer, static_cast<int>(size), &model);
@ -102,6 +128,8 @@ class Mat {
      mmdeploy_data_type_t type, uint8_t* data, mmdeploy_device_t device = nullptr)
      : desc_{data, height, width, channels, format, type, device} {}

+  Mat(const mmdeploy_mat_t& desc) : desc_(desc) {}  // NOLINT
+
  const mmdeploy_mat_t& desc() const noexcept { return desc_; }

 #if MMDEPLOY_CXX_USE_OPENCV
@ -146,6 +174,16 @@ class Mat {
 template <typename T>
 class Result_ {
 public:
+  using value_type = T;
+  using size_type = size_t;
+  using difference_type = ptrdiff_t;
+  using reference = T&;
+  using const_reference = const T&;
+  using pointer = T*;
+  using const_pointer = const T*;
+  using iterator = T*;
+  using const_iterator = T*;
+
  Result_(size_t offset, size_t size, std::shared_ptr<T> data)
      : offset_(offset), size_(size), data_(std::move(data)) {}

--- a/csrc/mmdeploy/apis/cxx/mmdeploy/pose_tracker.hpp
+++ b/csrc/mmdeploy/apis/cxx/mmdeploy/pose_tracker.hpp
@ -0,0 +1,151 @@
+// Copyright (c) OpenMMLab. All rights reserved.
+
+#ifndef MMDEPLOY_POSE_TRACKER_HPP
+#define MMDEPLOY_POSE_TRACKER_HPP
+
+#include "mmdeploy/common.hpp"
+#include "mmdeploy/pose_tracker.h"
+
+namespace mmdeploy {
+
+namespace cxx {
+
+class PoseTracker : public UniqueHandle<mmdeploy_pose_tracker_t> {
+ public:
+  using Result = Result_<mmdeploy_pose_tracker_target_t>;
+  class State;
+  class Params;
+
+ public:
+  /**
+   * @brief Create pose tracker pipeline
+   * @param detect object detection model
+   * @param pose pose estimation model
+   * @param context execution context
+   */
+  PoseTracker(const Model& detect, const Model& pose, const Context& context) {
+    auto ec = mmdeploy_pose_tracker_create(detect, pose, context, &handle_);
+    if (ec != MMDEPLOY_SUCCESS) {
+      throw_exception(static_cast<ErrorCode>(ec));
+    }
+  }
+  ~PoseTracker() {
+    if (handle_) {
+      mmdeploy_pose_tracker_destroy(handle_);
+      handle_ = {};
+    }
+  }
+  PoseTracker(PoseTracker&&) noexcept = default;
+
+  /**
+   * @brief Create a tracker state corresponds to a video stream
+   * @param params params for creating the tracker state
+   * @return created tracker state
+   */
+  State CreateState(const Params& params);
+
+  /**
+   * @brief Apply pose tracker pipeline
+   * @param state tracker state
+   * @param frame input video frame
+   * @param detect control the use of detector
+   *   -1: use params.det_interval, 0: don't use detector, 1: force use detector
+   * @return
+   */
+  Result Apply(State& state, const Mat& frame, int detect = -1);
+
+  /**
+   * @brief batched version of Apply
+   * @param states
+   * @param frames
+   * @param detects
+   * @return
+   */
+  std::vector<Result> Apply(const Span<State>& states, const Span<const Mat>& frames,
+                            const Span<const int>& detects = {});
+
+ public:
+  /**
+   * see \ref mmdeploy/pose_tracker.h for detail
+   */
+  class Params : public UniqueHandle<mmdeploy_pose_tracker_param_t*> {
+   public:
+    explicit Params() {
+      handle_ = new mmdeploy_pose_tracker_param_t{};
+      mmdeploy_pose_tracker_default_params(handle_);
+    }
+    ~Params() {
+      if (handle_) {
+        delete handle_;
+        handle_ = {};
+      }
+    }
+  };
+
+  class State : public UniqueHandle<mmdeploy_pose_tracker_state_t> {
+   public:
+    explicit State(mmdeploy_pose_tracker_t pipeline, const mmdeploy_pose_tracker_param_t* params) {
+      auto ec = mmdeploy_pose_tracker_create_state(pipeline, params, &handle_);
+      if (ec != MMDEPLOY_SUCCESS) {
+        throw_exception(static_cast<ErrorCode>(ec));
+      }
+    }
+    ~State() {
+      if (handle_) {
+        mmdeploy_pose_tracker_destroy_state(handle_);
+        handle_ = {};
+      }
+    }
+    State(State&&) noexcept = default;
+  };
+};
+
+inline PoseTracker::State PoseTracker::CreateState(const PoseTracker::Params& params) {
+  return State(handle_, static_cast<mmdeploy_pose_tracker_param_t*>(params));
+}
+
+inline std::vector<PoseTracker::Result> PoseTracker::Apply(const Span<State>& states,
+                                                           const Span<const Mat>& frames,
+                                                           const Span<const int32_t>& detects) {
+  if (frames.empty()) {
+    return {};
+  }
+  mmdeploy_pose_tracker_target_t* results{};
+  int32_t* result_count{};
+
+  auto ec = mmdeploy_pose_tracker_apply(
+      handle_, reinterpret_cast<mmdeploy_pose_tracker_state_t*>(states.data()),
+      reinterpret(frames.data()), detects.data(), static_cast<int32_t>(frames.size()), &results,
+      &result_count);
+  if (ec != MMDEPLOY_SUCCESS) {
+    throw_exception(static_cast<ErrorCode>(ec));
+  }
+
+  std::shared_ptr<mmdeploy_pose_tracker_target_t> data(
+      results, [result_count, count = frames.size()](auto p) {
+        mmdeploy_pose_tracker_release_result(p, result_count, count);
+      });
+
+  std::vector<Result> rets;
+  rets.reserve(frames.size());
+
+  size_t offset = 0;
+  for (size_t i = 0; i < frames.size(); ++i) {
+    offset += rets.emplace_back(offset, result_count[i], data).size();
+  }
+
+  return rets;
+}
+
+inline PoseTracker::Result PoseTracker::Apply(PoseTracker::State& state, const Mat& frame,
+                                              int32_t detect) {
+  return Apply(Span(&state, 1), Span{frame}, Span{detect})[0];
+}
+
+}  // namespace cxx
+
+using cxx::PoseTracker;
+
+}  // namespace mmdeploy
+
+#endif  // MMDEPLOY_POSE_TRACKER_HPP
--- a/csrc/mmdeploy/apis/python/pose_tracker.cpp
+++ b/csrc/mmdeploy/apis/python/pose_tracker.cpp
@ -0,0 +1,149 @@
+// Copyright (c) OpenMMLab. All rights reserved.
+
+#include "mmdeploy/pose_tracker.hpp"
+
+#include "common.h"
+#include "mmdeploy/common.hpp"
+
+namespace mmdeploy::python {
+
+namespace {
+
+std::vector<py::tuple> Apply(mmdeploy::PoseTracker* self,
+                             const std::vector<mmdeploy::PoseTracker::State*>& _states,
+                             const std::vector<PyImage>& _frames, std::vector<int> detect) {
+  std::vector<mmdeploy_pose_tracker_state_t> tmp;
+  for (const auto& s : _states) {
+    tmp.push_back(static_cast<mmdeploy_pose_tracker_state_t>(*s));
+  }
+  mmdeploy::Span states(reinterpret_cast<mmdeploy::PoseTracker::State*>(tmp.data()), tmp.size());
+  std::vector<mmdeploy::Mat> frames;
+  for (const auto& f : _frames) {
+    frames.emplace_back(GetMat(f));
+  }
+  if (detect.empty()) {
+    detect.resize(frames.size(), -1);
+  }
+  assert(states.size() == frames.size());
+  assert(states.size() == detect.size());
+  auto results = self->Apply(states, frames, detect);
+  std::vector<py::tuple> batch_ret;
+  batch_ret.reserve(frames.size());
+  for (const auto& rs : results) {
+    py::array_t<float> keypoints({static_cast<int>(rs.size()), rs[0].keypoint_count, 3});
+    py::array_t<float> bboxes({static_cast<int>(rs.size()), 4});
+    py::array_t<uint32_t> track_ids(static_cast<int>(rs.size()));
+    auto kpts_ptr = keypoints.mutable_data();
+    auto bbox_ptr = bboxes.mutable_data();
+    auto track_id_ptr = track_ids.mutable_data();
+    for (const auto& r : rs) {
+      for (int i = 0; i < r.keypoint_count; ++i) {
+        kpts_ptr[0] = r.keypoints[i].x;
+        kpts_ptr[1] = r.keypoints[i].y;
+        kpts_ptr[2] = r.scores[i];
+        kpts_ptr += 3;
+      }
+      {
+        auto tmp_bbox = (std::array<float, 4>&)r.bbox;
+        bbox_ptr[0] = tmp_bbox[0];
+        bbox_ptr[1] = tmp_bbox[1];
+        bbox_ptr[2] = tmp_bbox[2];
+        bbox_ptr[3] = tmp_bbox[3];
+        bbox_ptr += 4;
+      }
+      *track_id_ptr++ = r.target_id;
+    }
+    batch_ret.push_back(
+        py::make_tuple(std::move(keypoints), std::move(bboxes), std::move(track_ids)));
+  }
+  return batch_ret;
+}
+
+template <typename T, size_t N>
+void Copy(const py::handle& h, T (&a)[N]) {
+  auto array = h.cast<py::array_t<float>>();
+  assert(array.size() == N);
+  auto data = array.data();
+  for (int i = 0; i < N; ++i) {
+    a[i] = data[i];
+  }
+}
+
+void Parse(const py::dict& dict, PoseTracker::Params& params, py::array_t<float>& sigmas) {
+  for (const auto& [_name, value] : dict) {
+    auto name = _name.cast<std::string>();
+    if (name == "det_interval") {
+      params->det_interval = value.cast<int32_t>();
+    } else if (name == "det_label") {
+      params->det_label = value.cast<int32_t>();
+    } else if (name == "det_thr") {
+      params->det_thr = value.cast<float>();
+    } else if (name == "det_min_bbox_size") {
+      params->det_min_bbox_size = value.cast<float>();
+    } else if (name == "det_nms_thr") {
+      params->det_nms_thr = value.cast<float>();
+    } else if (name == "pose_max_num_bboxes") {
+      params->pose_max_num_bboxes = value.cast<int32_t>();
+    } else if (name == "pose_min_keypoints") {
+      params->pose_min_keypoints = value.cast<int32_t>();
+    } else if (name == "pose_min_bbox_size") {
+      params->pose_min_bbox_size = value.cast<float>();
+    } else if (name == "pose_nms_thr") {
+      params->pose_nms_thr = value.cast<float>();
+    } else if (name == "track_kpt_thr") {
+      params->pose_kpt_thr = value.cast<float>();
+    } else if (name == "track_iou_thr") {
+      params->track_iou_thr = value.cast<float>();
+    } else if (name == "pose_bbox_scale") {
+      params->pose_bbox_scale = value.cast<float>();
+    } else if (name == "track_max_missing") {
+      params->track_max_missing = value.cast<float>();
+    } else if (name == "track_history_size") {
+      params->track_history_size = value.cast<int32_t>();
+    } else if (name == "keypoint_sigmas") {
+      sigmas = value.cast<py::array_t<float>>();
+      params->keypoint_sigmas = const_cast<float*>(sigmas.data());
+      params->keypoint_sigmas_size = sigmas.size();
+    } else if (name == "std_weight_position") {
+      params->std_weight_position = value.cast<float>();
+    } else if (name == "std_weight_velocity") {
+      params->std_weight_velocity = value.cast<float>();
+    } else if (name == "smooth_params") {
+      Copy(value, params->smooth_params);
+    } else {
+      MMDEPLOY_ERROR("unused argument: {}", name);
+    }
+  }
+}
+
+}  // namespace
+
+static PythonBindingRegisterer register_pose_tracker{[](py::module& m) {
+  py::class_<mmdeploy::PoseTracker::State>(m, "PoseTracker.State");
+  py::class_<mmdeploy::PoseTracker>(m, "PoseTracker")
+      .def(py::init([](const char* det_model_path, const char* pose_model_path,
+                       const char* device_name, int device_id) {
+             return mmdeploy::PoseTracker(
+                 mmdeploy::Model(det_model_path), mmdeploy::Model(pose_model_path),
+                 mmdeploy::Context(mmdeploy::Device(device_name, device_id)));
+           }),
+           py::arg("det_model"), py::arg("pose_model"), py::arg("device_name"),
+           py::arg("device_id") = 0)
+      .def(
+          "__call__",
+          [](mmdeploy::PoseTracker* self, mmdeploy::PoseTracker::State* state, const PyImage& img,
+             int detect) { return Apply(self, {state}, {img}, {detect})[0]; },
+          py::arg("state"), py::arg("frame"), py::arg("detect") = -1)
+      .def("batch", &Apply, py::arg("states"), py::arg("frames"),
+           py::arg("detects") = std::vector<int>{})
+      .def("create_state", [](mmdeploy::PoseTracker* self, const py::kwargs& kwargs) {
+        PoseTracker::Params params;
+        py::array_t<float> sigmas;
+        if (kwargs) {
+          Parse(kwargs, params, sigmas);
+        }
+        return self->CreateState(params);
+      });
+}};
+
+}  // namespace mmdeploy::python
--- a/csrc/mmdeploy/codebase/mmpose/CMakeLists.txt
+++ b/csrc/mmdeploy/codebase/mmpose/CMakeLists.txt
@ -5,12 +5,17 @@ project(mmdeploy_mmpose)
 find_package(OpenCV REQUIRED)

 aux_source_directory(${CMAKE_CURRENT_SOURCE_DIR} MMPOSE_SRCS)
+aux_source_directory(${CMAKE_CURRENT_SOURCE_DIR}/pose_tracker POSE_TRACKER_SRCS)

-mmdeploy_add_module(${PROJECT_NAME} ${MMPOSE_SRCS})
+mmdeploy_add_module(${PROJECT_NAME} ${MMPOSE_SRCS} ${POSE_TRACKER_SRCS})
 target_link_libraries(${PROJECT_NAME} PRIVATE
        mmdeploy::transform
        mmdeploy_operation
+        mmdeploy_opencv_utils
        ${OpenCV_LIBS})
+target_include_directories(${PROJECT_NAME} PRIVATE
+        ${CMAKE_CURRENT_SOURCE_DIR}
+        ${CMAKE_CURRENT_SOURCE_DIR}/../../apis/c)
 add_library(mmdeploy::mmpose ALIAS ${PROJECT_NAME})

-set(MMDEPLOY_TASKS ${MMDEPLOY_TASKS} pose_detector CACHE INTERNAL "")
+set(MMDEPLOY_TASKS ${MMDEPLOY_TASKS} pose_detector pose_tracker CACHE INTERNAL "")
--- a/csrc/mmdeploy/codebase/mmpose/pose_tracker/common.h
+++ b/csrc/mmdeploy/codebase/mmpose/pose_tracker/common.h
@ -0,0 +1,57 @@
+// Copyright (c) OpenMMLab. All rights reserved.
+
+#ifndef MMDEPLOY_CODEBASE_MMPOSE_POSE_TRACKER_COMMON_H
+#define MMDEPLOY_CODEBASE_MMPOSE_POSE_TRACKER_COMMON_H
+
+#include <array>
+#include <vector>
+
+#include "mmdeploy/core/mpl/type_traits.h"
+#include "mmdeploy/pose_tracker.h"
+
+namespace mmdeploy::mmpose::_pose_tracker {
+
+struct TrackerResult {
+  std::vector<std::vector<mmdeploy_point_t>> keypoints;
+  std::vector<std::vector<float>> scores;
+  std::vector<mmdeploy_rect_t> bboxes;
+  std::vector<uint32_t> track_ids;
+  // debug info
+  std::vector<std::array<float, 4>> pose_input_bboxes;
+  std::vector<std::array<float, 4>> pose_output_bboxes;
+};
+
+inline void SetDefaultParams(mmdeploy_pose_tracker_param_t& p) {
+  p.det_interval = 1;
+  p.det_label = 0;
+  p.det_min_bbox_size = -1;
+  p.det_thr = .5f;
+  p.det_nms_thr = .7f;
+  p.pose_max_num_bboxes = -1;
+  p.pose_min_keypoints = -1;
+  p.pose_min_bbox_size = 0;
+  p.pose_kpt_thr = .5f;
+  p.pose_nms_thr = .5f;
+  p.keypoint_sigmas = nullptr;
+  p.keypoint_sigmas_size = 0;
+  p.track_iou_thr = .4f;
+  p.pose_bbox_scale = 1.25f;
+  p.track_max_missing = 10;
+  p.track_history_size = 1;
+
+  p.std_weight_position = 1 / 20.f;
+  p.std_weight_velocity = 1 / 160.f;
+
+  (std::array<float, 3>&)p.smooth_params = {0.007, 1., 1.};
+}
+
+}  // namespace mmdeploy::mmpose::_pose_tracker
+
+namespace mmdeploy {
+
+MMDEPLOY_REGISTER_TYPE_ID(mmdeploy_pose_tracker_param_t*, 0x32bc6919d5287035);
+MMDEPLOY_REGISTER_TYPE_ID(mmpose::_pose_tracker::TrackerResult, 0xacb6ddb7dc1ffbca);
+
+}  // namespace mmdeploy
+
+#endif  // MMDEPLOY_COMMON_H
--- a/csrc/mmdeploy/codebase/mmpose/pose_tracker/pipeline.cpp
+++ b/csrc/mmdeploy/codebase/mmpose/pose_tracker/pipeline.cpp
@ -0,0 +1,117 @@
+// Copyright (c) OpenMMLab. All rights reserved.
+
+#include "mmdeploy/archive/value_archive.h"
+#include "mmdeploy/core/mat.h"
+#include "mmdeploy/core/module.h"
+#include "mmdeploy/experimental/module_adapter.h"
+#include "pose_tracker/common.h"
+#include "pose_tracker/pose_tracker.h"
+
+namespace mmdeploy {
+
+MMDEPLOY_REGISTER_TYPE_ID(mmpose::_pose_tracker::Tracker, 0xcfe87980aa895d3a);
+
+}
+
+namespace mmdeploy::mmpose::_pose_tracker {
+
+#define REGISTER_SIMPLE_MODULE(name, fn) \
+  MMDEPLOY_REGISTER_FACTORY_FUNC(Module, (name, 0), [](const Value&) { return CreateTask(fn); });
+
+Value Prepare(const Value& data, const Value& use_det, Value state) {
+  auto& tracker = state.get_ref<Tracker&>();
+  // set frame size for the first frame
+  if (tracker.frame_id() == 0) {
+    auto& frame = data["ori_img"].get_ref<const framework::Mat&>();
+    tracker.SetFrameSize(frame.height(), frame.width());
+  }
+  // use_det is set to non-auto mode
+  if (use_det.get<int>() != -1) {
+    return use_det;
+  }
+  auto interval = tracker.params().det_interval;
+  return interval > 0 && tracker.frame_id() % interval == 0;
+}
+
+REGISTER_SIMPLE_MODULE(pose_tracker::Prepare, Prepare);
+
+std::tuple<Value, Value> ProcessBboxes(const Value& det_val, const Value& data,
+                                       Value state) noexcept {
+  auto& tracker = state.get_ref<Tracker&>();
+
+  std::optional<Tracker::Detections> dets;
+
+  if (det_val.is_array()) {  // has detections
+    auto& [bboxes, scores, labels] = dets.emplace();
+    for (const auto& det : det_val.array()) {
+      bboxes.push_back(from_value<Bbox>(det["bbox"]));
+      scores.push_back(det["score"].get<float>());
+      labels.push_back(det["label_id"].get<int>());
+    }
+  }
+
+  auto [bboxes, ids] = tracker.ProcessBboxes(dets);
+
+  Value::Array bbox_array;
+  Value track_ids_array;
+  // attach bboxes to image data
+  for (auto& bbox : bboxes) {
+    cv::Rect rect(cv::Rect2f(cv::Point2f(bbox[0], bbox[1]), cv::Point2f(bbox[2], bbox[3])));
+    bbox_array.push_back({
+        {"img", data["img"]},                                 // img
+        {"bbox", {rect.x, rect.y, rect.width, rect.height}},  // bbox
+    });
+  }
+
+  track_ids_array = to_value(ids);
+  return {std::move(bbox_array), std::move(track_ids_array)};
+}
+REGISTER_SIMPLE_MODULE(pose_tracker::ProcessBboxes, ProcessBboxes);
+
+Value TrackStep(const Value& poses, const Value& track_indices, Value state) noexcept {
+  assert(poses.is_array());
+  vector<Points> keypoints;
+  vector<Scores> scores;
+  for (auto& output : poses.array()) {
+    auto& k = keypoints.emplace_back();
+    auto& s = scores.emplace_back();
+    float avg = 0.f;
+    for (auto& kpt : output["key_points"].array()) {
+      k.emplace_back(kpt["bbox"][0].get<float>(), kpt["bbox"][1].get<float>());
+      s.push_back(kpt["score"].get<float>());
+      avg += s.back();
+    }
+  }
+  vector<int64_t> track_ids;
+  from_value(track_indices, track_ids);
+  auto& tracker = state.get_ref<Tracker&>();
+  tracker.TrackStep(keypoints, scores, track_ids);
+  TrackerResult result;
+  for (const auto& track : tracker.tracks()) {
+    if (track->missing()) {
+      continue;
+    }
+    vector<mmdeploy_point_t> kpts;
+    kpts.reserve(track->smoothed_kpts().size());
+    for (const auto& kpt : track->smoothed_kpts()) {
+      kpts.push_back({kpt.x, kpt.y});
+    }
+    result.keypoints.push_back(std::move(kpts));
+    result.scores.push_back(track->scores());
+    auto& bbox = track->smoothed_bbox();
+    result.bboxes.push_back({bbox[0], bbox[1], bbox[2], bbox[3]});
+    result.track_ids.push_back(track->track_id());
+  }
+  result.pose_input_bboxes = tracker.pose_input_bboxes();
+  result.pose_output_bboxes = tracker.pose_output_bboxes();
+  return result;
+}
+REGISTER_SIMPLE_MODULE(pose_tracker::TrackStep, TrackStep);
+
+// MSVC toolset v143 keeps ICEing when using a lambda here
+static Value CreateTracker(mmdeploy_pose_tracker_param_t* param) {
+  return make_pointer(Tracker{*param});
+}
+REGISTER_SIMPLE_MODULE(pose_tracker::Create, CreateTracker);
+
+}  // namespace mmdeploy::mmpose::_pose_tracker
--- a/csrc/mmdeploy/codebase/mmpose/pose_tracker/pose_tracker.cpp
+++ b/csrc/mmdeploy/codebase/mmpose/pose_tracker/pose_tracker.cpp
@ -0,0 +1,399 @@
+// Copyright (c) OpenMMLab. All rights reserved.
+
+#include "pose_tracker/pose_tracker.h"
+
+#include <array>
+#include <cmath>
+#include <numeric>
+
+#include "mmdeploy/core/utils/formatter.h"
+#include "pose_tracker/utils.h"
+
+namespace mmdeploy::mmpose::_pose_tracker {
+
+Tracker::Tracker(const mmdeploy_pose_tracker_param_t& _params) : params_(_params) {
+  if (params_.keypoint_sigmas && params_.keypoint_sigmas_size) {
+    std::copy_n(params_.keypoint_sigmas, params_.keypoint_sigmas_size,
+                std::back_inserter(key_point_sigmas_));
+    params_.keypoint_sigmas = key_point_sigmas_.data();
+  }
+}
+
+void Tracker::SuppressOverlappingBoxes(const vector<Bbox>& bboxes,
+                                       vector<std::pair<int, float>>& ranks,
+                                       vector<int>& is_valid_bbox) const {
+  vector<float> iou(ranks.size() * ranks.size());
+  for (int i = 0; i < bboxes.size(); ++i) {
+    for (int j = 0; j < i; ++j) {
+      iou[i * bboxes.size() + j] = iou[j * bboxes.size() + i] =
+          intersection_over_union(bboxes[i], bboxes[j]);
+    }
+  }
+  suppress_non_maximum(ranks, iou, is_valid_bbox, params_.det_nms_thr);
+}
+
+void Tracker::SuppressOverlappingPoses(const vector<Points>& keypoints,
+                                       const vector<Scores>& scores, const vector<Bbox>& bboxes,
+                                       const vector<int64_t>& track_ids, vector<int>& is_valid,
+                                       float oks_thr) {
+  assert(keypoints.size() == is_valid.size());
+  assert(scores.size() == is_valid.size());
+  assert(bboxes.size() == is_valid.size());
+  const auto size = is_valid.size();
+  vector<float> similarity(size * size);
+  for (int i = 0; i < size; ++i) {
+    if (is_valid[i]) {
+      for (int j = 0; j < i; ++j) {
+        if (is_valid[j]) {
+          similarity[i * size + j] = similarity[j * size + i] =
+              GetPosePoseSimilarity(bboxes[i], keypoints[i], bboxes[j], keypoints[j]);
+        }
+      }
+    }
+  }
+  vector<std::pair<bool, float>> ranks;
+  ranks.reserve(size);
+  for (int i = 0; i < size; ++i) {
+    bool is_visible = false;
+    for (const auto& track : tracks_) {
+      if (track->track_id() == track_ids[i]) {
+        is_visible = track->missing() == 0;
+        break;
+      }
+    }
+    auto score = std::accumulate(scores[i].begin(), scores[i].end(), 0.f);
+    // prevents bboxes from missing tracks to suppress visible tracks
+    ranks.emplace_back(is_visible, score);
+  }
+  suppress_non_maximum(ranks, similarity, is_valid, oks_thr);
+}
+
+std::tuple<vector<Bbox>, vector<int64_t>> Tracker::ProcessBboxes(
+    const std::optional<Detections>& dets) {
+  vector<Bbox> bboxes;
+  vector<int64_t> prev_ids;
+
+  // 2 - visible tracks
+  // 1 - detection
+  // 0 - missing tracks
+  vector<int> types;
+
+  GetDetectedObjects(dets, bboxes, prev_ids, types);
+
+  GetTrackedObjects(bboxes, prev_ids, types);
+
+  vector<int> is_valid_bboxes(bboxes.size(), 1);
+
+  auto count = [&] {
+    std::array<int, 3> acc{};
+    for (size_t i = 0; i < is_valid_bboxes.size(); ++i) {
+      if (is_valid_bboxes[i]) {
+        ++acc[types[i]];
+      }
+    }
+    return acc;
+  };
+  POSE_TRACKER_DEBUG("frame {}, bboxes {}", frame_id_, count());
+
+  vector<std::pair<int, float>> ranks;
+  ranks.reserve(bboxes.size());
+  for (int i = 0; i < bboxes.size(); ++i) {
+    ranks.emplace_back(types[i], get_area(bboxes[i]));
+  }
+  SuppressOverlappingBoxes(bboxes, ranks, is_valid_bboxes);
+  POSE_TRACKER_DEBUG("frame {}, bboxes after nms: {}", frame_id_, count());
+
+  vector<int> idxs;
+  idxs.reserve(bboxes.size());
+  for (int i = 0; i < bboxes.size(); ++i) {
+    if (is_valid_bboxes[i]) {
+      idxs.push_back(i);
+    }
+  }
+
+  std::stable_sort(idxs.begin(), idxs.end(), [&](int i, int j) { return ranks[i] > ranks[j]; });
+  std::fill(is_valid_bboxes.begin(), is_valid_bboxes.end(), 0);
+  {
+    vector<Bbox> tmp_bboxes;
+    vector<int64_t> tmp_track_ids;
+    for (const auto& i : idxs) {
+      if (tmp_bboxes.size() >= params_.pose_max_num_bboxes) {
+        break;
+      }
+      tmp_bboxes.push_back(bboxes[i]);
+      tmp_track_ids.push_back(prev_ids[i]);
+      is_valid_bboxes[i] = 1;
+    }
+    bboxes = std::move(tmp_bboxes);
+    prev_ids = std::move(tmp_track_ids);
+  }
+
+  pose_input_bboxes_ = bboxes;
+
+  POSE_TRACKER_DEBUG("frame {}, bboxes after sort: {}", frame_id_, count());
+  return {bboxes, prev_ids};
+}
+
+void Tracker::TrackStep(vector<Points>& keypoints, vector<Scores>& scores,
+                        const vector<int64_t>& prev_ids) noexcept {
+  vector<Bbox> bboxes(keypoints.size());
+  vector<int> is_unused_bbox(keypoints.size(), 1);
+
+  // key-points to bboxes
+  for (size_t i = 0; i < keypoints.size(); ++i) {
+    if (auto bbox = KeypointsToBbox(keypoints[i], scores[i])) {
+      bboxes[i] = *bbox;
+    } else {
+      is_unused_bbox[i] = false;
+    }
+  }
+
+  pose_output_bboxes_ = bboxes;
+
+  SuppressOverlappingPoses(keypoints, scores, bboxes, prev_ids, is_unused_bbox,
+                           params_.pose_nms_thr);
+  assert(is_unused_bbox.size() == bboxes.size());
+
+  vector<float> similarity0;    // average mahalanobis dist - proportion of tracked key-points
+  vector<float> similarity1;    // iou
+  vector<vector<bool>> gating;  // key-point gating result
+  GetSimilarityMatrices(bboxes, keypoints, prev_ids, similarity0, similarity1, gating);
+
+  vector<int> is_unused_track(tracks_.size(), 1);
+  // disable missing tracks in the #1 assignment
+  for (int i = 0; i < tracks_.size(); ++i) {
+    if (tracks_[i]->missing()) {
+      is_unused_track[i] = 0;
+    }
+  }
+  const auto assignment_visible =
+      greedy_assignment(similarity0, is_unused_bbox, is_unused_track, -kInf / 10);
+  POSE_TRACKER_DEBUG("frame {}, assignment for visible {}", frame_id_, assignment_visible);
+
+  // enable missing tracks in the #2 assignment
+  for (int i = 0; i < tracks_.size(); ++i) {
+    if (tracks_[i]->missing()) {
+      is_unused_track[i] = 1;
+    }
+  }
+  const auto assignment_missing =
+      greedy_assignment(similarity1, is_unused_bbox, is_unused_track, params_.track_iou_thr);
+  POSE_TRACKER_DEBUG("frame {}, assignment for missing {}", frame_id_, assignment_missing);
+
+  // update assigned tracks
+  for (auto [i, j, _] : assignment_visible) {
+    tracks_[j]->UpdateVisible(bboxes[i], keypoints[i], scores[i], gating[i * tracks_.size() + j]);
+  }
+
+  // update recovered tracks
+  for (auto [i, j, _] : assignment_missing) {
+    tracks_[j]->UpdateRecovered(bboxes[i], keypoints[i], scores[i]);
+  }
+
+  // generating new tracks from detected bboxes
+  for (size_t i = 0; i < is_unused_bbox.size(); ++i) {
+    if (is_unused_bbox[i] && prev_ids[i] == -1) {
+      CreateTrack(bboxes[i], keypoints[i], scores[i]);
+    }
+  }
+
+  // update missing tracks
+  for (size_t i = 0; i < is_unused_track.size(); ++i) {
+    if (is_unused_track[i]) {
+      tracks_[i]->UpdateMissing();
+    }
+  }
+
+  // diagnostic for missing tracks
+  DiagnosticMissingTracks(is_unused_track, is_unused_bbox, similarity0, similarity1);
+
+  RemoveMissingTracks();
+
+  for (auto& track : tracks_) {
+    track->Predict();
+  }
+
+  ++frame_id_;
+
+  // print track summary
+  //  SummaryTracks();
+}
+
+void Tracker::GetTrackedObjects(vector<Bbox>& bboxes, vector<int64_t>& track_ids,
+                                vector<int>& types) const {
+  for (auto& track : tracks_) {
+    std::optional<Bbox> bbox;
+    if (track->missing()) {
+      bbox = track->predicted_bbox();
+    } else {
+      bbox = keypoints_to_bbox(track->predicted_kpts(), track->scores(), frame_h_, frame_w_,
+                               params_.pose_bbox_scale, params_.pose_kpt_thr,
+                               params_.pose_min_keypoints);
+    }
+    if (bbox) {
+      auto& b = *bbox;
+      cv::Rect_<float> img_rect(0, 0, frame_w_, frame_h_);
+      cv::Rect_<float> box_rect(b[0], b[1], b[2] - b[0], b[3] - b[1]);
+      auto roi = img_rect & box_rect;
+      if (roi.area() > 0 && get_area(b) > params_.pose_min_bbox_size * params_.pose_min_bbox_size) {
+        bboxes.push_back(*bbox);
+        track_ids.push_back(track->track_id());
+        types.push_back(track->missing() ? 0 : 2);
+      }
+    }
+  }
+}
+
+void Tracker::GetDetectedObjects(const std::optional<Detections>& dets, vector<Bbox>& _bboxes,
+                                 vector<int64_t>& track_ids, vector<int>& types) const {
+  if (dets) {
+    auto& [bboxes, scores, labels] = *dets;
+    for (size_t i = 0; i < bboxes.size(); ++i) {
+      if (labels[i] == params_.det_label && scores[i] > params_.det_thr &&
+          get_area(bboxes[i]) >= params_.det_min_bbox_size * params_.det_min_bbox_size) {
+        _bboxes.push_back(bboxes[i]);
+        track_ids.push_back(-1);
+        types.push_back(1);
+      }
+    }
+  }
+}
+
+std::tuple<float, float, vector<bool>> Tracker::GetTrackPoseSimilarity(Track& track,
+                                                                       const Bbox& bbox,
+                                                                       const Points& kpts) const {
+  static constexpr const std::array chi2inv95{0.f,     3.8415f, 5.9915f, 7.8147f, 9.4877f,
+                                              11.070f, 12.592f, 14.067f, 15.507f, 16.919f};
+  auto dists = track.KeyPointDistance(kpts);
+  vector<bool> gating;
+  gating.reserve(dists.size());
+  float dist = 0.f;
+  int count = 0;
+  for (const auto& d : dists) {
+    if (d < chi2inv95[2]) {
+      dist += d;
+      ++count;
+      gating.push_back(true);
+    } else {
+      gating.push_back(false);
+    }
+  }
+  auto count_thr =
+      params_.pose_min_keypoints >= 0 ? params_.pose_min_keypoints : (dists.size() + 1) / 2;
+  if (count >= count_thr) {
+    auto fcount = static_cast<float>(count);
+    dist = dist / fcount - fcount / static_cast<float>(dists.size());
+  } else {
+    dist = kInf;
+  }
+
+  auto iou = intersection_over_union(track.predicted_bbox(), bbox);
+  if (key_point_sigmas_.empty()) {
+    return {dist, iou, gating};
+  }
+
+  return {dist, iou, gating};
+}
+
+void Tracker::GetSimilarityMatrices(const vector<Bbox>& bboxes, const vector<Points>& keypoints,
+                                    const vector<int64_t>& prev_ids, vector<float>& similarity0,
+                                    vector<float>& similarity1, vector<vector<bool>>& gating) {
+  const auto n_rows = static_cast<int>(bboxes.size());
+  const auto n_cols = static_cast<int>(tracks_.size());
+
+  // generate similarity matrix
+  similarity0 = vector<float>(n_rows * n_cols, -kInf);
+  similarity1 = vector<float>(n_rows * n_cols, -kInf);
+  gating = vector<vector<bool>>(n_rows * n_cols);
+  for (size_t i = 0; i < n_rows; ++i) {
+    const auto& bbox = bboxes[i];
+    const auto& kpts = keypoints[i];
+    for (size_t j = 0; j < n_cols; ++j) {
+      auto& track = *tracks_[j];
+      if (prev_ids[i] != -1 && prev_ids[i] != track.track_id()) {
+        continue;
+      }
+      const auto index = i * n_cols + j;
+      auto&& [dist, iou, gate] = GetTrackPoseSimilarity(track, bbox, kpts);
+      similarity0[index] = -dist;
+      similarity1[index] = iou;
+      gating.push_back(std::move(gate));
+    }
+  }
+}
+
+float Tracker::GetPosePoseSimilarity(const Bbox& bbox0, const Points& kpts0, const Bbox& bbox1,
+                                     const Points& kpts1) {
+  if (key_point_sigmas_.empty()) {
+    return intersection_over_union(bbox0, bbox1);
+  }
+  // symmetric
+  return object_keypoint_similarity(kpts0, bbox0, kpts1, bbox1, key_point_sigmas_);
+}
+
+void Tracker::CreateTrack(const Bbox& bbox, const Points& kpts, const Scores& scores) {
+  *tracks_.emplace_back(std::make_unique<Track>(&params_, bbox, kpts, scores, next_id_++));
+}
+
+std::optional<Bbox> Tracker::KeypointsToBbox(const Points& kpts, const Scores& scores) const {
+  return keypoints_to_bbox(kpts, scores, frame_h_, frame_w_, params_.pose_bbox_scale,
+                           params_.pose_kpt_thr, params_.pose_min_keypoints);
+}
+
+void Tracker::RemoveMissingTracks() {
+  size_t count{};
+  for (auto& track : tracks_) {
+    if (track->missing() <= params_.track_max_missing) {
+      tracks_[count++] = std::move(track);
+    }
+  }
+  tracks_.resize(count);
+}
+
+void Tracker::DiagnosticMissingTracks(const vector<int>& is_unused_track,
+                                      const vector<int>& is_unused_bbox,
+                                      const vector<float>& similarity0,
+                                      const vector<float>& similarity1) {
+  int missing = 0;
+  const auto n_cols = static_cast<int>(is_unused_track.size());
+  const auto n_rows = static_cast<int>(is_unused_bbox.size());
+  for (int i = 0; i < is_unused_track.size(); ++i) {
+    if (is_unused_track[i]) {
+      float best_s0 = 0.f;
+      float best_s1 = 0.f;
+      for (int j = 0; j < is_unused_bbox.size(); ++j) {
+        if (is_unused_bbox[j]) {
+          best_s0 = std::max(similarity0[j * n_cols + i], best_s0);
+          best_s1 = std::max(similarity1[j * n_cols + i], best_s1);
+        }
+      }
+      POSE_TRACKER_DEBUG("frame {}: track missing {}, best_s0={}, best_s1={}", frame_id_,
+                         tracks_[i]->track_id(), best_s0, best_s1);
+      ++missing;
+    }
+  }
+  if (missing) {
+    std::stringstream ss;
+    ss << cv::Mat_<float>(n_rows, n_cols, const_cast<float*>(similarity0.data()));
+    POSE_TRACKER_DEBUG("frame {}, similarity#0: \n{}", frame_id_, ss.str());
+    ss = std::stringstream{};
+    ss << cv::Mat_<float>(n_rows, n_cols, const_cast<float*>(similarity1.data()));
+    POSE_TRACKER_DEBUG("frame {}, similarity#1: \n{}", frame_id_, ss.str());
+  }
+}
+
+void Tracker::SummaryTracks() {
+  vector<std::tuple<int64_t, int>> summary;
+  for (const auto& track : tracks_) {
+    summary.emplace_back(track->track_id(), track->missing());
+  }
+  POSE_TRACKER_DEBUG("frame {}, track summary {}", frame_id_, summary);
+  for (const auto& track : tracks_) {
+    if (!track->missing()) {
+      POSE_TRACKER_DEBUG("frame {}, track {}, scores {}", frame_id_, track->track_id(),
+                         track->scores());
+    }
+  }
+}
+
+}  // namespace mmdeploy::mmpose::_pose_tracker
--- a/csrc/mmdeploy/codebase/mmpose/pose_tracker/pose_tracker.h
+++ b/csrc/mmdeploy/codebase/mmpose/pose_tracker/pose_tracker.h
@ -0,0 +1,101 @@
+// Copyright (c) OpenMMLab. All rights reserved.
+
+#ifndef MMDEPLOY_CODEBASE_MMPOSE_POSE_TRACKER_HPP
+#define MMDEPLOY_CODEBASE_MMPOSE_POSE_TRACKER_HPP
+
+#include "mmdeploy/pose_tracker.h"
+#include "pose_tracker/common.h"
+#include "pose_tracker/track.h"
+
+namespace mmdeploy::mmpose::_pose_tracker {
+
+class Tracker {
+ public:
+  explicit Tracker(const mmdeploy_pose_tracker_param_t& _params);
+
+  Tracker(const Tracker&) { assert(0); }
+  Tracker(Tracker&& o) noexcept = default;
+
+  struct Detections {
+    Bboxes bboxes;
+    Scores scores;
+    vector<int> labels;
+  };
+
+  void SetFrameSize(int height, int width) {
+    frame_h_ = static_cast<float>(height);
+    frame_w_ = static_cast<float>(width);
+  }
+
+  const mmdeploy_pose_tracker_param_t& params() const noexcept { return params_; }
+
+  int64_t frame_id() const noexcept { return frame_id_; }
+
+  const vector<std::unique_ptr<Track>>& tracks() const noexcept { return tracks_; }
+
+  std::tuple<vector<Bbox>, vector<int64_t>> ProcessBboxes(const std::optional<Detections>& dets);
+
+  void TrackStep(vector<Points>& keypoints, vector<Scores>& scores,
+                 const vector<int64_t>& prev_ids) noexcept;
+
+ private:
+  void GetDetectedObjects(const std::optional<Detections>& dets, vector<Bbox>& _bboxes,
+                          vector<int64_t>& track_ids, vector<int>& types) const;
+
+  void GetTrackedObjects(vector<Bbox>& bboxes, vector<int64_t>& track_ids,
+                         vector<int>& types) const;
+
+  void SuppressOverlappingBoxes(const vector<Bbox>& bboxes, vector<std::pair<int, float>>& ranks,
+                                vector<int>& is_valid_bbox) const;
+
+  void SuppressOverlappingPoses(const vector<Points>& keypoints, const vector<Scores>& scores,
+                                const vector<Bbox>& bboxes, const vector<int64_t>& track_ids,
+                                vector<int>& is_valid, float oks_thr);
+
+  std::optional<Bbox> KeypointsToBbox(const Points& kpts, const Scores& scores) const;
+
+  float GetPosePoseSimilarity(const Bbox& bbox0, const Points& kpts0, const Bbox& bbox1,
+                              const Points& kpts1);
+
+  void GetSimilarityMatrices(const vector<Bbox>& bboxes, const vector<Points>& keypoints,
+                             const vector<int64_t>& prev_ids, vector<float>& similarity0,
+                             vector<float>& similarity1, vector<vector<bool>>& gating);
+
+  std::tuple<float, float, vector<bool>> GetTrackPoseSimilarity(Track& track, const Bbox& bbox,
+                                                                const Points& kpts) const;
+
+  void CreateTrack(const Bbox& bbox, const Points& kpts, const Scores& scores);
+
+  void RemoveMissingTracks();
+
+  void DiagnosticMissingTracks(const vector<int>& is_unused_track,
+                               const vector<int>& is_unused_bbox, const vector<float>& similarity0,
+                               const vector<float>& similarity1);
+
+  void SummaryTracks();
+
+ private:
+  static constexpr const auto kInf = 1000.f;
+
+  float frame_h_ = 0;
+  float frame_w_ = 0;
+
+  vector<std::unique_ptr<Track>> tracks_;
+  int64_t next_id_{0};
+
+  std::vector<float> key_point_sigmas_;
+  mmdeploy_pose_tracker_param_t params_;
+
+  vector<Bbox> pose_input_bboxes_;
+  vector<Bbox> pose_output_bboxes_;
+
+  int64_t frame_id_ = 0;
+
+ public:
+  const vector<Bbox>& pose_input_bboxes() const noexcept { return pose_input_bboxes_; }
+  const vector<Bbox>& pose_output_bboxes() const noexcept { return pose_output_bboxes_; }
+};
+
+}  // namespace mmdeploy::mmpose::_pose_tracker
+
+#endif  // MMDEPLOY_CODEBASE_MMPOSE_POSE_TRACKER_HPP
--- a/csrc/mmdeploy/codebase/mmpose/pose_tracker/smoothing_filter.cpp
+++ b/csrc/mmdeploy/codebase/mmpose/pose_tracker/smoothing_filter.cpp
@ -0,0 +1,49 @@
+// Copyright (c) OpenMMLab. All rights reserved.
+
+#include "smoothing_filter.h"
+
+namespace mmdeploy::mmpose::_pose_tracker {
+
+SmoothingFilter::SmoothingFilter(const Bbox& bbox, const Points& pts,
+                                 const SmoothingFilter::Params& params)
+    : params_(params),
+      pts_v_(pts.size()),
+      pts_x_(pts),
+      center_v_{},
+      center_x_{get_center(bbox)},
+      scale_v_{},
+      scale_x_{get_scale(bbox)} {}
+
+std::pair<Bbox, Points> SmoothingFilter::Step(const Bbox& bbox, const Points& kpts) {
+  constexpr auto abs = [](const Point& p) { return std::sqrt(p.dot(p)); };
+
+  // filter key-points
+  step<Point>(pts_x_, pts_v_, kpts, params_, abs);
+
+  // filter bbox center
+  std::array c{get_center(bbox)};
+  step<Point>(center_x_, center_v_, c, params_, abs);
+
+  // filter bbox scales
+  auto s = get_scale(bbox);
+  step<float>(scale_x_, scale_v_, s, params_, [](auto x) { return x; });
+
+  return {get_bbox(center_x_[0], scale_x_), pts_x_};
+}
+
+void SmoothingFilter::Reset(const Bbox& bbox, const Points& pts) {
+  pts_v_ = Points(pts_v_.size());
+  center_v_ = {};
+  scale_v_ = {};
+  pts_x_ = pts;
+  center_v_ = {get_center(bbox)};
+  scale_v_ = get_scale(bbox);
+}
+
+float SmoothingFilter::smoothing_factor(float cutoff) {
+  static constexpr float kPi = 3.1415926;
+  auto r = 2.f * kPi * cutoff;
+  return r / (r + 1.f);
+}
+
+}  // namespace mmdeploy::mmpose::_pose_tracker
--- a/csrc/mmdeploy/codebase/mmpose/pose_tracker/smoothing_filter.h
+++ b/csrc/mmdeploy/codebase/mmpose/pose_tracker/smoothing_filter.h
@ -0,0 +1,58 @@
+// Copyright (c) OpenMMLab. All rights reserved.
+
+#ifndef MMDEPLOY_CODEBASE_MMPOSE_POSE_TRACKER_SMOOTHING_FILTER_H
+#define MMDEPLOY_CODEBASE_MMPOSE_POSE_TRACKER_SMOOTHING_FILTER_H
+
+#include "mmdeploy/core/mpl/span.h"
+#include "pose_tracker/utils.h"
+
+namespace mmdeploy::mmpose::_pose_tracker {
+
+template <typename T>
+using span = mmdeploy::Span<T>;
+
+class SmoothingFilter {
+ public:
+  struct Params {
+    float beta;
+    float fc_min;
+    float fc_v;
+  };
+  explicit SmoothingFilter(const Bbox& bbox, const Points& pts, const Params& params);
+
+  std::pair<Bbox, Points> Step(const Bbox& bbox, const Points& kpts);
+
+  void Reset(const Bbox& bbox, const Points& pts);
+
+ private:
+  static float smoothing_factor(float cutoff);
+
+  template <typename T, typename Norm>
+  static void step(span<T> x, span<T> v, span<const T> x1, const Params& params, Norm norm) {
+    auto a_v = smoothing_factor(params.fc_v);
+    for (int i = 0; i < v.size(); ++i) {
+      v[i] = smooth(a_v, v[i], x1[i] - x[i]);
+      auto fc = params.fc_min + params.beta * norm(v[i]);
+      auto a_x = smoothing_factor(fc);
+      x[i] = smooth(a_x, x[i], x1[i]);
+    }
+  }
+
+  template <typename T>
+  static T smooth(float a, const T& x0, const T& x1) {
+    return (1.f - a) * x0 + a * x1;
+  }
+
+ private:
+  Params params_;
+  std::vector<Point> pts_v_;
+  std::vector<Point> pts_x_;
+  std::array<Point, 1> center_v_;
+  std::array<Point, 1> center_x_;
+  std::array<float, 2> scale_v_;
+  std::array<float, 2> scale_x_;
+};
+
+}  // namespace mmdeploy::mmpose::_pose_tracker
+
+#endif  // MMDEPLOY_SMOOTHING_FILTER_H
--- a/csrc/mmdeploy/codebase/mmpose/pose_tracker/track.cpp
+++ b/csrc/mmdeploy/codebase/mmpose/pose_tracker/track.cpp
@ -0,0 +1,70 @@
+// Copyright (c) OpenMMLab. All rights reserved.
+
+#include "pose_tracker/track.h"
+
+namespace mmdeploy::mmpose::_pose_tracker {
+
+Track::Track(const mmdeploy_pose_tracker_param_t* params, const Bbox& bbox, const Points& kpts,
+             const Scores& ss, int64_t id)
+    : params_(params),
+      filter_(CreateFilter(bbox, kpts)),
+      smoother_(CreateSmoother(bbox, kpts)),
+      track_id_(id) {
+  POSE_TRACKER_DEBUG("new track {}", track_id_);
+  Add(bbox, kpts, ss);
+}
+
+Track::~Track() { POSE_TRACKER_DEBUG("track lost {}", track_id_); }
+
+void Track::UpdateVisible(const Bbox& bbox, const Points& kpts, const Scores& scores,
+                          const vector<bool>& tracked) {
+  auto [bbox_corr, kpts_corr] = filter_.Correct(bbox, kpts, tracked);
+  Add(bbox_corr, kpts_corr, scores);
+}
+
+void Track::UpdateRecovered(const Bbox& bbox, const Points& kpts, const Scores& scores) {
+  POSE_TRACKER_DEBUG("track recovered {}", track_id_);
+  filter_ = CreateFilter(bbox, kpts);
+  smoother_.Reset(bbox, kpts);
+  Add(bbox, kpts, scores);
+  missing_ = 0;
+}
+
+void Track::UpdateMissing() {
+  missing_++;
+  if (missing_ <= params_->track_max_missing) {
+    // use predicted state to update the missing tracks
+    Add(bbox_predict_, kpts_predict_, vector<float>(kpts_predict_.size()));
+  }
+}
+
+void Track::Predict() {
+  // TODO: velocity decay for missing tracks
+  std::tie(bbox_predict_, kpts_predict_) = filter_.Predict();
+}
+
+void Track::Add(const Bbox& bbox, const Points& kpts, const Scores& ss) {
+  bboxes_.push_back(bbox);
+  keypoints_.push_back(kpts);
+  scores_.push_back(ss);
+  if (bboxes_.size() > params_->track_history_size) {
+    std::rotate(bboxes_.begin(), bboxes_.begin() + 1, bboxes_.end());
+    std::rotate(keypoints_.begin(), keypoints_.begin() + 1, keypoints_.end());
+    std::rotate(scores_.begin(), scores_.begin() + 1, scores_.end());
+    bboxes_.pop_back();
+    keypoints_.pop_back();
+    scores_.pop_back();
+  }
+  std::tie(bbox_smooth_, kpts_smooth_) = smoother_.Step(bbox, kpts);
+}
+
+TrackingFilter Track::CreateFilter(const Bbox& bbox, const Points& pts) {
+  return {bbox, pts, params_->std_weight_position, params_->std_weight_velocity};
+}
+
+SmoothingFilter Track::CreateSmoother(const Bbox& bbox, const Points& pts) {
+  return SmoothingFilter(
+      bbox, pts, {params_->smooth_params[0], params_->smooth_params[1], params_->smooth_params[2]});
+}
+
+}  // namespace mmdeploy::mmpose::_pose_tracker
--- a/csrc/mmdeploy/codebase/mmpose/pose_tracker/track.h
+++ b/csrc/mmdeploy/codebase/mmpose/pose_tracker/track.h
@ -0,0 +1,65 @@
+//
+// Created by zhangli on 1/9/23.
+//
+
+#ifndef MMDEPLOY_CODEBASE_MMPOSE_POSE_TRACKER_TRACK_H
+#define MMDEPLOY_CODEBASE_MMPOSE_POSE_TRACKER_TRACK_H
+
+#include "pose_tracker/common.h"
+#include "pose_tracker/smoothing_filter.h"
+#include "pose_tracker/tracking_filter.h"
+#include "pose_tracker/utils.h"
+
+namespace mmdeploy::mmpose::_pose_tracker {
+
+class Track {
+ public:
+  Track(const mmdeploy_pose_tracker_param_t* params, const Bbox& bbox, const Points& kpts,
+        const Scores& ss, int64_t id);
+  ~Track();
+
+  void UpdateVisible(const Bbox& bbox, const Points& kpts, const Scores& scores,
+                     const vector<bool>& tracked);
+  void UpdateRecovered(const Bbox& bbox, const Points& kpts, const Scores& scores);
+  void UpdateMissing();
+  void Predict();
+
+  float BboxDistance(const Bbox& bbox) { return filter_.BboxDistance(bbox); }
+
+  vector<float> KeyPointDistance(const Points& kpts) { return filter_.KeyPointDistance(kpts); }
+
+  uint32_t track_id() const noexcept { return track_id_; }
+  uint32_t missing() const noexcept { return missing_; }
+
+  const Bbox& predicted_bbox() const noexcept { return bbox_predict_; }
+  const Bbox& smoothed_bbox() const noexcept { return bbox_smooth_; }
+
+  const Points& predicted_kpts() const noexcept { return kpts_predict_; }
+  const Points& smoothed_kpts() const noexcept { return kpts_smooth_; }
+
+  const Scores& scores() const noexcept { return scores_.back(); }
+
+ private:
+  void Add(const Bbox& bbox, const Points& kpts, const Scores& ss);
+
+  TrackingFilter CreateFilter(const Bbox& bbox, const Points& pts);
+  SmoothingFilter CreateSmoother(const Bbox& bbox, const Points& pts);
+
+ private:
+  const mmdeploy_pose_tracker_param_t* params_;
+  vector<Bbox> bboxes_;
+  vector<Points> keypoints_;
+  vector<Scores> scores_;
+  uint32_t track_id_{};
+  Bbox bbox_predict_{};
+  Bbox bbox_smooth_{};
+  Points kpts_predict_;
+  Points kpts_smooth_;
+  uint32_t missing_{0};
+  TrackingFilter filter_;
+  SmoothingFilter smoother_;
+};
+
+}  // namespace mmdeploy::mmpose::_pose_tracker
+
+#endif  // MMDEPLOY_TRACK_H
--- a/csrc/mmdeploy/codebase/mmpose/pose_tracker/tracking_filter.cpp
+++ b/csrc/mmdeploy/codebase/mmpose/pose_tracker/tracking_filter.cpp
@ -0,0 +1,199 @@
+// Copyright (c) OpenMMLab. All rights reserved.
+
+#include "pose_tracker/tracking_filter.h"
+
+namespace mmdeploy::mmpose::_pose_tracker {
+
+float get_mean_scale(float scale_w, float scale_h) { return std::sqrt(scale_w * scale_h); }
+
+TrackingFilter::TrackingFilter(const Bbox& bbox, const vector<Point>& kpts,
+                               float std_weight_position, float std_weight_velocity)
+    : std_weight_position_(std_weight_position), std_weight_velocity_(std_weight_velocity) {
+  auto center = get_center(bbox);
+  auto scale = get_scale(bbox);
+
+  auto mean_scale = get_mean_scale(scale[0], scale[1]);
+
+  const auto n = kpts.size();
+  pt_filters_.resize(n);
+  for (int i = 0; i < n; ++i) {
+    auto& f = pt_filters_[i];
+    f.init(4, 2);
+    SetKeyPointTransitionMat(i);
+    SetKeyPointMeasurementMat(i);
+
+    ResetKeyPoint(i, kpts[i], mean_scale);
+  }
+
+  {
+    // [x, y, w, h, dx, dy, dw, dh]
+    auto& f = bbox_filter_;
+
+    f.init(8, 4);
+
+    SetBboxTransitionMat();
+    SetBboxMeasurementMat();
+
+    SetBboxErrorCov(2 * std_weight_position * mean_scale,  //
+                    10 * std_weight_velocity * mean_scale);
+
+    f.statePost.at<float>(0) = center.x;
+    f.statePost.at<float>(1) = center.y;
+    f.statePost.at<float>(2) = scale[0];
+    f.statePost.at<float>(3) = scale[1];
+  }
+}
+
+std::pair<Bbox, Points> TrackingFilter::Predict() {
+  auto mean_scale = get_mean_scale(bbox_filter_.statePost.at<float>(2),  //
+                                   bbox_filter_.statePost.at<float>(3));
+  const auto n = pt_filters_.size();
+  Points pts(n);
+  for (int i = 0; i < n; ++i) {
+    SetKeyPointProcessCov(i, std_weight_position_ * mean_scale, std_weight_velocity_ * mean_scale);
+    auto mat = pt_filters_[i].predict();
+    pts[i].x = mat.at<float>(0);
+    pts[i].y = mat.at<float>(1);
+  }
+  Bbox bbox;
+  {
+    SetBboxProcessCov(std_weight_position_ * mean_scale, std_weight_velocity_ * mean_scale);
+    auto mat = bbox_filter_.predict();
+    auto x = mat.ptr<float>();
+    bbox = get_bbox({x[0], x[1]}, {x[2], x[3]});
+  }
+  return {bbox, pts};
+}
+
+std::pair<Bbox, Points> TrackingFilter::Correct(const Bbox& bbox, const Points& kpts,
+                                                const vector<bool>& tracked) {
+  auto mean_scale = get_mean_scale(bbox_filter_.statePre.at<float>(2),  //
+                                   bbox_filter_.statePre.at<float>(3));
+  const auto n = pt_filters_.size();
+  Points corr_kpts(n);
+  for (int i = 0; i < n; ++i) {
+    if (!tracked.empty() && tracked[i]) {
+      SetKeyPointMeasurementCov(i, std_weight_position_ * mean_scale);
+      std::array<float, 2> m{kpts[i].x, kpts[i].y};
+      auto mat = pt_filters_[i].correct(as_mat(m));
+      corr_kpts[i].x = mat.at<float>(0);
+      corr_kpts[i].y = mat.at<float>(1);
+    } else {
+      ResetKeyPoint(i, kpts[i], mean_scale);
+      corr_kpts[i] = kpts[i];
+    }
+  }
+  Bbox corr_bbox;
+  {
+    SetBboxMeasurementCov(std_weight_position_ * mean_scale);
+    auto c = get_center(bbox);
+    auto s = get_scale(bbox);
+    std::array<float, 4> m{c.x, c.y, s[0], s[1]};
+    auto mat = bbox_filter_.correct(as_mat(m));
+    auto x = mat.ptr<float>();
+    corr_bbox = get_bbox({x[0], x[1]}, {x[2], x[3]});
+  }
+  return {corr_bbox, corr_kpts};
+}
+
+float TrackingFilter::BboxDistance(const Bbox& bbox) {
+  auto mean_scale = get_mean_scale(bbox_filter_.statePre.at<float>(2),  //
+                                   bbox_filter_.statePre.at<float>(3));
+  SetBboxMeasurementCov(std_weight_position_ * mean_scale);
+  auto c = get_center(bbox);
+  auto s = get_scale(bbox);
+  std::array<float, 4> m{c.x, c.y, s[0], s[1]};
+  cv::Mat z = as_mat(m);
+  auto& f = bbox_filter_;
+  cv::Mat sigma;
+  cv::gemm(f.measurementMatrix * f.errorCovPre, f.measurementMatrix, 1, f.measurementNoiseCov, 1,
+           sigma, cv::GEMM_2_T);
+  cv::Mat r = z - f.measurementMatrix * f.statePre;
+  // ignore contribution of scales as it is unstable when inferred from key-points
+  r.at<float>(2) = 0;
+  r.at<float>(3) = 0;
+  cv::Mat d = r.t() * sigma.inv() * r;
+  return d.at<float>();
+}
+
+vector<float> TrackingFilter::KeyPointDistance(const Points& kpts) {
+  auto mean_scale = get_mean_scale(bbox_filter_.statePre.at<float>(2),  //
+                                   bbox_filter_.statePre.at<float>(3));
+
+  const auto n = pt_filters_.size();
+  vector<float> dists(n);
+  for (int i = 0; i < n; ++i) {
+    SetKeyPointMeasurementCov(i, std_weight_position_ * mean_scale);
+    std::array<float, 2> m{kpts[i].x, kpts[i].y};
+    cv::Mat z = as_mat(m);
+    auto& f = pt_filters_[i];
+    cv::Mat sigma;
+    cv::gemm(f.measurementMatrix * f.errorCovPre, f.measurementMatrix, 1, f.measurementNoiseCov, 1,
+             sigma, cv::GEMM_2_T);
+    cv::Mat r = z - f.measurementMatrix * f.statePre;
+    cv::Mat d = r.t() * sigma.inv() * r;
+    dists[i] = d.at<float>();
+  }
+  return dists;
+}
+
+void TrackingFilter::SetBboxProcessCov(float sigma_p, float sigma_v) {
+  auto& m = bbox_filter_.processNoiseCov;
+  cv::setIdentity(m(cv::Rect(0, 0, 4, 4)), sigma_p * sigma_p);
+  cv::setIdentity(m(cv::Rect(4, 4, 4, 4)), sigma_v * sigma_v);
+}
+void TrackingFilter::SetBboxMeasurementCov(float sigma_p) {
+  auto& m = bbox_filter_.measurementNoiseCov;
+  cv::setIdentity(m, sigma_p * sigma_p);
+}
+void TrackingFilter::SetBboxErrorCov(float sigma_p, float sigma_v) {
+  auto& m = bbox_filter_.errorCovPost;
+  cv::setIdentity(m(cv::Rect(0, 0, 4, 4)), sigma_p * sigma_p);
+  cv::setIdentity(m(cv::Rect(4, 4, 4, 4)), sigma_v * sigma_v);
+}
+void TrackingFilter::SetBboxTransitionMat() {
+  auto& m = bbox_filter_.transitionMatrix;
+  cv::setIdentity(m(cv::Rect(4, 0, 4, 4)));  // with scale velocity
+  //  cv::setIdentity(m(cv::Rect(4, 0, 2, 2)));  // w/o scale velocity
+}
+void TrackingFilter::SetBboxMeasurementMat() {
+  auto& m = bbox_filter_.measurementMatrix;
+  cv::setIdentity(m(cv::Rect(0, 0, 4, 4)));
+}
+
+void TrackingFilter::SetKeyPointProcessCov(int index, float sigma_p, float sigma_v) {
+  auto& m = pt_filters_[index].processNoiseCov;
+  m.at<float>(0, 0) = sigma_p * sigma_p;
+  m.at<float>(1, 1) = sigma_p * sigma_p;
+  m.at<float>(2, 2) = sigma_v * sigma_v;
+  m.at<float>(3, 3) = sigma_v * sigma_v;
+}
+void TrackingFilter::SetKeyPointMeasurementCov(int index, float sigma_p) {
+  auto& m = pt_filters_[index].measurementNoiseCov;
+  m.at<float>(0, 0) = sigma_p * sigma_p;
+  m.at<float>(1, 1) = sigma_p * sigma_p;
+}
+void TrackingFilter::SetKeyPointErrorCov(int index, float sigma_p, float sigma_v) {
+  auto& m = pt_filters_[index].errorCovPost;
+  m.at<float>(0, 0) = sigma_p * sigma_p;
+  m.at<float>(1, 1) = sigma_p * sigma_p;
+  m.at<float>(2, 2) = sigma_v * sigma_v;
+  m.at<float>(3, 3) = sigma_v * sigma_v;
+}
+void TrackingFilter::SetKeyPointTransitionMat(int index) {
+  auto& m = pt_filters_[index].transitionMatrix;
+  cv::setIdentity(m(cv::Rect(2, 0, 2, 2)));
+}
+void TrackingFilter::SetKeyPointMeasurementMat(int index) {
+  auto& m = pt_filters_[index].measurementMatrix;
+  cv::setIdentity(m(cv::Rect(0, 0, 2, 2)));
+}
+
+void TrackingFilter::ResetKeyPoint(int index, const Point& kpt, float scale) {
+  auto& f = pt_filters_[index];
+  SetKeyPointErrorCov(index, 2 * std_weight_position_ * scale, 10 * std_weight_velocity_ * scale);
+  f.statePost.at<float>(0) = kpt.x;
+  f.statePost.at<float>(1) = kpt.y;
+}
+
+}  // namespace mmdeploy::mmpose::_pose_tracker
--- a/csrc/mmdeploy/codebase/mmpose/pose_tracker/tracking_filter.h
+++ b/csrc/mmdeploy/codebase/mmpose/pose_tracker/tracking_filter.h
@ -0,0 +1,50 @@
+// Copyright (c) OpenMMLab. All rights reserved.
+
+#ifndef MMDEPLOY_CODEBASE_MMPOSE_POSE_TRACKER_TRACKING_FILTER_H
+#define MMDEPLOY_CODEBASE_MMPOSE_POSE_TRACKER_TRACKING_FILTER_H
+
+#include "opencv2/video/video.hpp"
+#include "pose_tracker/utils.h"
+
+namespace mmdeploy::mmpose::_pose_tracker {
+
+// use Kalman filter to estimate and predict true states
+class TrackingFilter {
+ public:
+  TrackingFilter(const Bbox& bbox, const vector<Point>& kpts, float std_weight_position,
+                 float std_weight_velocity);
+
+  std::pair<Bbox, Points> Predict();
+
+  vector<float> KeyPointDistance(const Points& kpts);
+
+  float BboxDistance(const Bbox& bbox);
+
+  std::pair<Bbox, Points> Correct(const Bbox& bbox, const Points& kpts,
+                                  const vector<bool>& tracked);
+
+ private:
+  void SetBboxProcessCov(float sigma_p, float sigma_v);
+  void SetBboxMeasurementCov(float sigma_p);
+  void SetBboxErrorCov(float sigma_p, float sigma_v);
+  void SetBboxTransitionMat();
+  void SetBboxMeasurementMat();
+
+  void SetKeyPointProcessCov(int index, float sigma_p, float sigma_v);
+  void SetKeyPointMeasurementCov(int index, float sigma_p);
+  void SetKeyPointErrorCov(int index, float sigma_p, float sigma_v);
+  void SetKeyPointTransitionMat(int index);
+  void SetKeyPointMeasurementMat(int index);
+
+  void ResetKeyPoint(int index, const Point& kpt, float scale);
+
+ private:
+  std::vector<cv::KalmanFilter> pt_filters_;
+  cv::KalmanFilter bbox_filter_;
+  float std_weight_position_;
+  float std_weight_velocity_;
+};
+
+}  // namespace mmdeploy::mmpose::_pose_tracker
+
+#endif  // MMDEPLOY_TRACKING_FILTER_H
--- a/csrc/mmdeploy/codebase/mmpose/pose_tracker/utils.cpp
+++ b/csrc/mmdeploy/codebase/mmpose/pose_tracker/utils.cpp
@ -0,0 +1,136 @@
+// Copyright (c) OpenMMLab. All rights reserved.
+
+#include "pose_tracker/utils.h"
+
+namespace mmdeploy::mmpose::_pose_tracker {
+
+vector<std::tuple<int, int, float>> greedy_assignment(const vector<float>& scores,
+                                                      vector<int>& is_valid_row,
+                                                      vector<int>& is_valid_col, float thr) {
+  const auto n_rows = is_valid_row.size();
+  const auto n_cols = is_valid_col.size();
+  vector<std::tuple<int, int, float>> assignment;
+  assignment.reserve(std::max(n_rows, n_cols));
+  while (true) {
+    auto max_score = std::numeric_limits<float>::lowest();
+    int max_row = -1;
+    int max_col = -1;
+    for (int i = 0; i < n_rows; ++i) {
+      if (is_valid_row[i]) {
+        for (int j = 0; j < n_cols; ++j) {
+          if (is_valid_col[j]) {
+            if (scores[i * n_cols + j] > max_score) {
+              max_score = scores[i * n_cols + j];
+              max_row = i;
+              max_col = j;
+            }
+          }
+        }
+      }
+    }
+    if (max_score < thr) {
+      break;
+    }
+    is_valid_row[max_row] = 0;
+    is_valid_col[max_col] = 0;
+    assignment.emplace_back(max_row, max_col, max_score);
+  }
+  return assignment;
+}
+
+float intersection_over_union(const Bbox& a, const Bbox& b) {
+  auto x1 = std::max(a[0], b[0]);
+  auto y1 = std::max(a[1], b[1]);
+  auto x2 = std::min(a[2], b[2]);
+  auto y2 = std::min(a[3], b[3]);
+
+  auto inter_area = std::max(0.f, x2 - x1) * std::max(0.f, y2 - y1);
+
+  auto a_area = get_area(a);
+  auto b_area = get_area(b);
+  auto union_area = a_area + b_area - inter_area;
+
+  if (union_area == 0.f) {
+    return 0;
+  }
+
+  return inter_area / union_area;
+}
+
+float object_keypoint_similarity(const Points& pts_a, const Bbox& box_a, const Points& pts_b,
+                                 const Bbox& box_b, const vector<float>& sigmas) {
+  assert(pts_a.size() == sigmas.size());
+  assert(pts_b.size() == sigmas.size());
+  auto scale = [](const Bbox& bbox) -> float {
+    auto a = bbox[2] - bbox[0];
+    auto b = bbox[3] - bbox[1];
+    return std::sqrt(a * a + b * b);
+  };
+  auto oks = [](const Point& pa, const Point& pb, float s, float k) {
+    return std::exp(-(pa - pb).dot(pa - pb) / (2.f * s * s * k * k));
+  };
+  auto sum = 0.f;
+  const auto s = .5f * (scale(box_a) + scale(box_b));
+  for (int i = 0; i < sigmas.size(); ++i) {
+    sum += oks(pts_a[i], pts_b[i], s, sigmas[i]);
+  }
+  sum /= static_cast<float>(sigmas.size());
+  return sum;
+}
+
+std::optional<Bbox> keypoints_to_bbox(const Points& keypoints, const Scores& scores, float img_h,
+                                      float img_w, float scale, float kpt_thr, int min_keypoints) {
+  int valid = 0;
+  auto x1 = static_cast<float>(img_w);
+  auto y1 = static_cast<float>(img_h);
+  auto x2 = 0.f;
+  auto y2 = 0.f;
+  for (size_t i = 0; i < keypoints.size(); ++i) {
+    auto& kpt = keypoints[i];
+    if (scores[i] >= kpt_thr) {
+      x1 = std::min(x1, kpt.x);
+      y1 = std::min(y1, kpt.y);
+      x2 = std::max(x2, kpt.x);
+      y2 = std::max(y2, kpt.y);
+      ++valid;
+    }
+  }
+  if (min_keypoints < 0) {
+    min_keypoints = (static_cast<int>(scores.size()) + 1) / 2;
+  }
+  if (valid < min_keypoints) {
+    return std::nullopt;
+  }
+  auto xc = .5f * (x1 + x2);
+  auto yc = .5f * (y1 + y2);
+  auto w = (x2 - x1) * scale;
+  auto h = (y2 - y1) * scale;
+
+  return std::array<float, 4>{
+      std::max(0.f, std::min(img_w, xc - .5f * w)),
+      std::max(0.f, std::min(img_h, yc - .5f * h)),
+      std::max(0.f, std::min(img_w, xc + .5f * w)),
+      std::max(0.f, std::min(img_h, yc + .5f * h)),
+  };
+}
+
+Bbox map_bbox(const Bbox& box) {
+  Point p0(box[0], box[1]);
+  Point p1(box[2], box[3]);
+  auto c = .5f * (p0 + p1);
+  auto s = p1 - p0;
+  static constexpr std::array image_size{192.f, 256.f};
+  float aspect_ratio = image_size[0] * 1.0 / image_size[1];
+  if (s.x > aspect_ratio * s.y) {
+    s.y = s.x / aspect_ratio;
+  } else if (s.x < aspect_ratio * s.y) {
+    s.x = s.y * aspect_ratio;
+  }
+  s.x *= 1.25f;
+  s.y *= 1.25f;
+  p0 = c - .5f * s;
+  p1 = c + .5f * s;
+  return {p0.x, p0.y, p1.x, p1.y};
+}
+
+}  // namespace mmdeploy::mmpose::_pose_tracker
--- a/csrc/mmdeploy/codebase/mmpose/pose_tracker/utils.h
+++ b/csrc/mmdeploy/codebase/mmpose/pose_tracker/utils.h
@ -0,0 +1,96 @@
+// Copyright (c) OpenMMLab. All rights reserved.
+
+#ifndef MMDEPLOY_CODEBASE_MMPOSE_POSE_TRACKER_UTILS_H
+#define MMDEPLOY_CODEBASE_MMPOSE_POSE_TRACKER_UTILS_H
+
+#include <array>
+#include <numeric>
+#include <optional>
+#include <vector>
+
+#include "mmdeploy/core/utils/formatter.h"
+#include "opencv2/core/core.hpp"
+#include "pose_tracker/common.h"
+
+namespace mmdeploy::mmpose::_pose_tracker {
+
+using std::vector;
+using Bbox = std::array<float, 4>;
+using Bboxes = vector<Bbox>;
+using Point = cv::Point2f;
+using Points = vector<cv::Point2f>;
+using Score = float;
+using Scores = vector<float>;
+
+#define POSE_TRACKER_DEBUG(...) MMDEPLOY_INFO(__VA_ARGS__)
+
+// opencv3 can't construct cv::Mat from std::array
+template <size_t N>
+cv::Mat as_mat(const std::array<float, N>& a) {
+  return cv::Mat_<float>(a.size(), 1, const_cast<float*>(a.data()));
+}
+
+// scale = 1.5, kpt_thr = 0.3
+std::optional<Bbox> keypoints_to_bbox(const Points& keypoints, const Scores& scores, float img_h,
+                                      float img_w, float scale, float kpt_thr, int min_keypoints);
+
+// xyxy format
+float intersection_over_union(const Bbox& a, const Bbox& b);
+
+float object_keypoint_similarity(const Points& pts_a, const Bbox& box_a, const Points& pts_b,
+                                 const Bbox& box_b, const vector<float>& sigmas);
+
+template <typename T>
+void suppress_non_maximum(const vector<T>& scores, const vector<float>& similarities,
+                          vector<int>& is_valid, float thresh);
+
+inline float get_area(const Bbox& bbox) { return (bbox[2] - bbox[0]) * (bbox[3] - bbox[1]); }
+
+inline Point get_center(const Bbox& bbox) {
+  return {.5f * (bbox[0] + bbox[2]), .5f * (bbox[1] + bbox[3])};
+}
+
+inline std::array<float, 2> get_scale(const Bbox& bbox) {
+  return {bbox[2] - bbox[0], bbox[3] - bbox[1]};
+}
+
+inline Bbox get_bbox(const Point& center, const std::array<float, 2>& scale) {
+  return {
+      center.x - .5f * scale[0],
+      center.y - .5f * scale[1],
+      center.x + .5f * scale[0],
+      center.y + .5f * scale[1],
+  };
+}
+
+vector<std::tuple<int, int, float>> greedy_assignment(const vector<float>& scores,
+                                                      vector<int>& is_valid_row,
+                                                      vector<int>& is_valid_col, float thr);
+
+template <typename T>
+inline void suppress_non_maximum(const vector<T>& scores, const vector<float>& similarities,
+                                 vector<int>& is_valid, float thresh) {
+  assert(is_valid.size() == scores.size());
+  vector<int> indices(scores.size());
+  std::iota(indices.begin(), indices.end(), 0);
+  std::sort(indices.begin(), indices.end(), [&](int i, int j) { return scores[i] > scores[j]; });
+  // suppress similar samples
+  for (int i = 0; i < indices.size(); ++i) {
+    if (auto u = indices[i]; is_valid[u]) {
+      for (int j = i + 1; j < indices.size(); ++j) {
+        if (auto v = indices[j]; is_valid[v]) {
+          if (similarities[u * scores.size() + v] >= thresh) {
+            is_valid[v] = false;
+          }
+        }
+      }
+    }
+  }
+}
+
+// TopDownAffine's internal logic for mapping pose model inputs
+Bbox map_bbox(const Bbox& box);
+
+}  // namespace mmdeploy::mmpose::_pose_tracker
+
+#endif  // MMDEPLOY_UTILS_H
--- a/csrc/mmdeploy/experimental/module_adapter.h
+++ b/csrc/mmdeploy/experimental/module_adapter.h
@ -9,7 +9,7 @@

 namespace mmdeploy {

-namespace module_detail {
+namespace module_adapter {

 template <typename T>
 struct is_tuple : std::false_type {};
@ -20,16 +20,13 @@ struct is_tuple<std::tuple<Ts...>> : std::true_type {};
 template <typename T>
 inline constexpr auto is_tuple_v = is_tuple<T>::value;

-template <typename Ret, typename... Args>
+template <typename... Args>
 struct InvokeImpl {
-  template <typename F, typename... Ts>
-  static Result<Value> apply(F&& f, const Value& params, Ts&&... ts) {
-    std::tuple<uncvref_t<Args>...> args;
+  template <typename F>
+  static Result<Value> apply(F&& f, const Value& args) {
    try {
-      from_value(params, args);
-      auto ret = apply_impl(std::forward<F>(f), std::move(args), std::index_sequence_for<Args...>{},
-                            std::forward<Ts>(ts)...);
-      return make_ret_val(std::move(ret));
+      using ArgsType = std::tuple<uncvref_t<Args>...>;
+      return make_ret_val(std::apply((F &&) f, from_value<ArgsType>(args)));
    } catch (const std::exception& e) {
      MMDEPLOY_ERROR("unhandled exception: {}", e.what());
      return Status(eFail);
@ -38,15 +35,9 @@ struct InvokeImpl {
    }
  }

-  template <typename F, typename Tuple, size_t... Is, typename... Ts>
-  static decltype(auto) apply_impl(F&& f, Tuple&& tuple, std::index_sequence<Is...>, Ts&&... ts) {
-    return std::invoke(std::forward<F>(f), std::forward<Ts>(ts)...,
-                       std::get<Is>(std::forward<Tuple>(tuple))...);
-  }
-
  template <typename T, typename T0 = uncvref_t<T>>
  static Result<Value> make_ret_val(T&& ret) {
-    if constexpr (module_detail::is_tuple_v<T0>) {
+    if constexpr (is_tuple_v<T0>) {
      return to_value(std::forward<T>(ret));
    } else if constexpr (is_result_v<T0>) {
      return ret ? make_ret_val(std::forward<T>(ret).value()) : std::forward<T>(ret).as_failure();
@ -56,60 +47,49 @@ struct InvokeImpl {
  }
 };

-// function pointer
+// match function pointer
 template <typename Ret, typename... Args>
 Result<Value> Invoke(Ret (*f)(Args...), const Value& args) {
-  return InvokeImpl<Ret, Args...>::apply(f, args);
+  return InvokeImpl<Args...>::apply(f, args);
 }

-// member function pointer
-template <typename Ret, typename C0, typename C1, typename... Args>
-Result<Value> Invoke(Ret (C0::*f)(Args...) const, C1* inst, const Value& args) {
-  return InvokeImpl<Ret, Args...>::apply(f, args, inst);
+// match member function pointer `&C::operator()`
+template <typename Ret, typename C, typename F, typename... Args>
+Result<Value> Invoke(Ret (C::*)(Args...) const, const F& f, const Value& args) {
+  return InvokeImpl<Args...>::apply(f, args);
 }
-template <typename Ret, typename C0, typename C1, typename... Args>
-Result<Value> Invoke(Ret (C0::*f)(Args...), C1* inst, const Value& args) {
-  return InvokeImpl<Ret, Args...>::apply(f, args, inst);
+template <typename Ret, typename C, typename F, typename... Args>
+Result<Value> Invoke(Ret (C::*)(Args...), F& f, const Value& args) {
+  return InvokeImpl<Args...>::apply(f, args);
 }

-// function object
-template <typename T, typename C = std::remove_reference_t<T>,
+// match function object
+template <typename F, typename C = std::remove_reference_t<F>,
          typename = std::void_t<decltype(&C::operator())>>
-Result<Value> Invoke(T&& t, const Value& args) {
-  return Invoke(&C::operator(), &t, args);
+Result<Value> Invoke(F&& f, const Value& args) {
+  return Invoke(&C::operator(), (F &&) f, args);
 }

-template <typename T>
-struct IsPointer : std::false_type {};
-template <typename R, typename... Args>
-struct IsPointer<R (*)(Args...)> : std::false_type {};
-template <typename T>
-struct IsPointer<std::shared_ptr<T>> : std::true_type {};
-template <typename T, typename D>
-struct IsPointer<std::unique_ptr<T, D>> : std::true_type {};
-template <typename T>
-struct IsPointer<T*> : std::true_type {};
+template <typename F>
+Result<Value> Invoke(const std::unique_ptr<F>& f, const Value& args) {
+  return Invoke(*f, args);
+}
+template <typename F>
+Result<Value> Invoke(const std::shared_ptr<F>& f, const Value& args) {
+  return Invoke(*f, args);
+}

-template <typename T, typename SFINAE = void>
-struct AccessPolicy {
-  static constexpr auto apply = [](auto& x) -> decltype(auto) { return x; };
-};
-template <typename T>
-struct AccessPolicy<T, std::enable_if_t<IsPointer<T>::value>> {
-  static constexpr auto apply = [](auto& x) -> decltype(auto) { return *x; };
-};
-
-template <typename T, typename A = AccessPolicy<T>>
+template <typename Func>
 class Task : public Module {
 public:
-  explicit Task(T task) : task_(std::move(task)) {}
+  explicit Task(Func func) : func_(std::move(func)) {}

  Result<Value> Process(const Value& arg) override {
-    return module_detail::Invoke(A::apply(task_), arg);
+    return ::mmdeploy::module_adapter::Invoke(func_, arg);
  }

 private:
-  T task_;
+  Func func_;
 };

 template <typename T>
@ -122,11 +102,10 @@ auto MakeTask(T&& x) {
  return Task(std::forward<T>(x));
 }

-}  // namespace module_detail
+}  // namespace module_adapter

-using module_detail::CreateTask;
-
-using module_detail::MakeTask;
+using module_adapter::CreateTask;
+using module_adapter::MakeTask;

 }  // namespace mmdeploy

--- a/csrc/mmdeploy/graph/inference.cpp
+++ b/csrc/mmdeploy/graph/inference.cpp
@ -41,7 +41,7 @@ Result<unique_ptr<Node>> InferenceBuilder::BuildImpl() {
  }
  pipeline_config["context"] = context;

-  MMDEPLOY_INFO("{}", pipeline_config);
+  MMDEPLOY_DEBUG("{}", pipeline_config);

  OUTCOME_TRY(auto pipeline_builder, Builder::CreateFromConfig(pipeline_config));
  OUTCOME_TRY(auto node, pipeline_builder->Build());
--- a/csrc/mmdeploy/operation/cuda/crop_resize_pad.cpp
+++ b/csrc/mmdeploy/operation/cuda/crop_resize_pad.cpp
@ -21,15 +21,17 @@ class CropResizePadImpl : public CropResizePad {
    Tensor dst_tensor(desc);
    cudaMemsetAsync(dst_tensor.data<uint8_t>(), 0, dst_tensor.byte_size(), cuda_stream);

-    if (src.data_type() == DataType::kINT8) {
-      OUTCOME_TRY(
-          ResizeDispatch<uint8_t>(src, crop_rect, target_size, pad_rect, dst_tensor, cuda_stream));
-    } else if (src.data_type() == DataType::kFLOAT) {
-      OUTCOME_TRY(
-          ResizeDispatch<float>(src, crop_rect, target_size, pad_rect, dst_tensor, cuda_stream));
-    } else {
-      MMDEPLOY_ERROR("unsupported data type {}", src.data_type());
-      return Status(eNotSupported);
+    if (crop_rect[2] - crop_rect[0] + 1 > 0 && crop_rect[3] - crop_rect[1] + 1 > 0) {
+      if (src.data_type() == DataType::kINT8) {
+        OUTCOME_TRY(ResizeDispatch<uint8_t>(src, crop_rect, target_size, pad_rect, dst_tensor,
+                                            cuda_stream));
+      } else if (src.data_type() == DataType::kFLOAT) {
+        OUTCOME_TRY(
+            ResizeDispatch<float>(src, crop_rect, target_size, pad_rect, dst_tensor, cuda_stream));
+      } else {
+        MMDEPLOY_ERROR("unsupported data type {}", src.data_type());
+        return Status(eNotSupported);
+      }
    }

    dst = std::move(dst_tensor);
--- a/csrc/mmdeploy/utils/opencv/opencv_utils.cpp
+++ b/csrc/mmdeploy/utils/opencv/opencv_utils.cpp
@ -295,10 +295,12 @@ cv::Mat CropResizePad(const cv::Mat& src, const std::vector<int>& crop_rect,
  int width = target_size[0] + pad_rect[1] + pad_rect[3];
  int height = target_size[1] + pad_rect[0] + pad_rect[2];
  cv::Mat dst = cv::Mat::zeros(height, width, src.type());
-  cv::Rect roi1 = {crop_rect[1], crop_rect[0], crop_rect[3] - crop_rect[1] + 1,
-                   crop_rect[2] - crop_rect[0] + 1};
-  cv::Rect roi2 = {pad_rect[1], pad_rect[0], target_size[0], target_size[1]};
-  cv::resize(src(roi1), dst(roi2), {target_size[0], target_size[1]});
+  if (crop_rect[2] - crop_rect[0] + 1 > 0 && crop_rect[3] - crop_rect[1] + 1 > 0) {
+    cv::Rect roi1 = {crop_rect[1], crop_rect[0], crop_rect[3] - crop_rect[1] + 1,
+                     crop_rect[2] - crop_rect[0] + 1};
+    cv::Rect roi2 = {pad_rect[1], pad_rect[0], target_size[0], target_size[1]};
+    cv::resize(src(roi1), dst(roi2), {target_size[0], target_size[1]});
+  }
  return dst;
 }

--- a/demo/csrc/CMakeLists.txt
+++ b/demo/csrc/CMakeLists.txt
@ -41,7 +41,6 @@ add_example(rotated_detector c rotated_object_detection)
 add_example(video_recognizer c video_recognition)
 # TODO: figure out a better way
 # add_example("" c det_cls)
-# add_example("" c det_pose)

 if (MMDEPLOY_BUILD_SDK_CXX_API)
    add_example(classifier cpp classifier)
@ -52,6 +51,7 @@ if (MMDEPLOY_BUILD_SDK_CXX_API)
    add_example(text_detector cpp text_det_recog)
    add_example(pose_detector cpp pose_detector)
    add_example(rotated_detector cpp rotated_detector)
-    add_example(pose_detector cpp pose_tracker)
+    add_example(pose_tracker cpp pose_tracker)
+    add_example(pose_detector cpp det_pose)
    add_example(video_recognizer cpp video_cls)
 endif ()
--- a/demo/csrc/cpp/det_pose.cpp
+++ b/demo/csrc/cpp/det_pose.cpp
@ -0,0 +1,113 @@
+// Copyright (c) OpenMMLab. All rights reserved.
+
+#include <iostream>
+
+#include "mmdeploy/detector.hpp"
+#include "mmdeploy/pose_detector.hpp"
+#include "opencv2/imgcodecs/imgcodecs.hpp"
+#include "opencv2/imgproc/imgproc.hpp"
+
+using std::vector;
+
+cv::Mat Visualize(cv::Mat frame, const std::vector<mmdeploy_pose_detection_t>& poses, int size);
+
+int main(int argc, char* argv[]) {
+  const auto device_name = argv[1];
+  const auto det_model_path = argv[2];
+  const auto pose_model_path = argv[3];
+  const auto image_path = argv[4];
+
+  if (argc != 5) {
+    std::cerr << "usage:\n\tpose_tracker device_name det_model_path pose_model_path image_path\n";
+    return -1;
+  }
+  auto img = cv::imread(image_path);
+  if (!img.data) {
+    std::cerr << "failed to load image: " << image_path << "\n";
+    return -1;
+  }
+
+  using namespace mmdeploy;
+
+  Context context(Device{device_name});  // create context for holding the device handle
+  Detector detector(Model(det_model_path), context);   // create object detector
+  PoseDetector pose(Model(pose_model_path), context);  // create pose detector
+
+  // apply detector
+  auto dets = detector.Apply(img);
+
+  // filter detections and extract bboxes for pose model
+  std::vector<mmdeploy_rect_t> bboxes;
+  for (const auto& det : dets) {
+    if (det.label_id == 0 && det.score > .6f) {
+      bboxes.push_back(det.bbox);
+    }
+  }
+  // apply pose detector
+  auto poses = pose.Apply(img, bboxes);
+
+  // visualize
+  auto vis = Visualize(img, {poses.begin(), poses.end()}, 1280);
+  cv::imwrite("det_pose_output.jpg", vis);
+
+  return 0;
+}
+
+struct Skeleton {
+  vector<std::pair<int, int>> skeleton;
+  vector<cv::Scalar> palette;
+  vector<int> link_color;
+  vector<int> point_color;
+};
+
+const Skeleton& gCocoSkeleton() {
+  static const Skeleton inst{
+      {
+          {15, 13}, {13, 11}, {16, 14}, {14, 12}, {11, 12}, {5, 11}, {6, 12},
+          {5, 6},   {5, 7},   {6, 8},   {7, 9},   {8, 10},  {1, 2},  {0, 1},
+          {0, 2},   {1, 3},   {2, 4},   {3, 5},   {4, 6},
+      },
+      {
+          {255, 128, 0},   {255, 153, 51},  {255, 178, 102}, {230, 230, 0},   {255, 153, 255},
+          {153, 204, 255}, {255, 102, 255}, {255, 51, 255},  {102, 178, 255}, {51, 153, 255},
+          {255, 153, 153}, {255, 102, 102}, {255, 51, 51},   {153, 255, 153}, {102, 255, 102},
+          {51, 255, 51},   {0, 255, 0},     {0, 0, 255},     {255, 0, 0},     {255, 255, 255},
+      },
+      {0, 0, 0, 0, 7, 7, 7, 9, 9, 9, 9, 9, 16, 16, 16, 16, 16, 16, 16},
+      {16, 16, 16, 16, 16, 9, 9, 9, 9, 9, 9, 0, 0, 0, 0, 0, 0},
+  };
+  return inst;
+}
+
+cv::Mat Visualize(cv::Mat frame, const vector<mmdeploy_pose_detection_t>& poses, int size) {
+  auto& [skeleton, palette, link_color, point_color] = gCocoSkeleton();
+  auto scale = (float)size / (float)std::max(frame.cols, frame.rows);
+  if (scale != 1) {
+    cv::resize(frame, frame, {}, scale, scale);
+  } else {
+    frame = frame.clone();
+  }
+  for (const auto& pose : poses) {
+    vector<float> kpts(&pose.point[0].x, &pose.point[pose.length - 1].y + 1);
+    vector<float> scores(pose.score, pose.score + pose.length);
+    std::for_each(kpts.begin(), kpts.end(), [&](auto& x) { x *= scale; });
+    constexpr auto score_thr = .5f;
+    vector<int> used(kpts.size());
+    for (size_t i = 0; i < skeleton.size(); ++i) {
+      auto [u, v] = skeleton[i];
+      if (scores[u] > score_thr && scores[v] > score_thr) {
+        used[u] = used[v] = 1;
+        cv::Point p_u(kpts[u * 2], kpts[u * 2 + 1]);
+        cv::Point p_v(kpts[v * 2], kpts[v * 2 + 1]);
+        cv::line(frame, p_u, p_v, palette[link_color[i]], 1, cv::LINE_AA);
+      }
+    }
+    for (size_t i = 0; i < kpts.size(); i += 2) {
+      if (used[i / 2]) {
+        cv::Point p(kpts[i], kpts[i + 1]);
+        cv::circle(frame, p, 1, palette[point_color[i / 2]], 2, cv::LINE_AA);
+      }
+    }
+  }
+  return frame;
+}
--- a/demo/csrc/cpp/pose_tracker.cpp
+++ b/demo/csrc/cpp/pose_tracker.cpp
--- a/demo/python/det_pose.py
+++ b/demo/python/det_pose.py
@ -0,0 +1,85 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import argparse
+
+import cv2
+import numpy as np
+from mmdeploy_python import Detector, PoseDetector
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description='show how to use SDK Python API')
+    parser.add_argument('device_name', help='name of device, cuda or cpu')
+    parser.add_argument(
+        'det_model_path',
+        help='path of mmdeploy SDK model dumped by model converter')
+    parser.add_argument(
+        'pose_model_path',
+        help='path of mmdeploy SDK model dumped by model converter')
+    parser.add_argument('image_path', help='path of input image')
+    args = parser.parse_args()
+    return args
+
+
+def visualize(frame, keypoints, filename, thr=0.5, resize=1280):
+    skeleton = [(15, 13), (13, 11), (16, 14), (14, 12), (11, 12), (5, 11),
+                (6, 12), (5, 6), (5, 7), (6, 8), (7, 9), (8, 10), (1, 2),
+                (0, 1), (0, 2), (1, 3), (2, 4), (3, 5), (4, 6)]
+    palette = [(255, 128, 0), (255, 153, 51), (255, 178, 102), (230, 230, 0),
+               (255, 153, 255), (153, 204, 255), (255, 102, 255),
+               (255, 51, 255), (102, 178, 255),
+               (51, 153, 255), (255, 153, 153), (255, 102, 102), (255, 51, 51),
+               (153, 255, 153), (102, 255, 102), (51, 255, 51), (0, 255, 0),
+               (0, 0, 255), (255, 0, 0), (255, 255, 255)]
+    link_color = [
+        0, 0, 0, 0, 7, 7, 7, 9, 9, 9, 9, 9, 16, 16, 16, 16, 16, 16, 16
+    ]
+    point_color = [16, 16, 16, 16, 16, 9, 9, 9, 9, 9, 9, 0, 0, 0, 0, 0, 0]
+
+    scale = resize / max(frame.shape[0], frame.shape[1])
+
+    scores = keypoints[..., 2]
+    keypoints = (keypoints[..., :2] * scale).astype(int)
+
+    img = cv2.resize(frame, (0, 0), fx=scale, fy=scale)
+    for kpts, score in zip(keypoints, scores):
+        show = [0] * len(kpts)
+        for (u, v), color in zip(skeleton, link_color):
+            if score[u] > thr and score[v] > thr:
+                cv2.line(img, kpts[u], tuple(kpts[v]), palette[color], 1,
+                         cv2.LINE_AA)
+                show[u] = show[v] = 1
+        for kpt, show, color in zip(kpts, show, point_color):
+            if show:
+                cv2.circle(img, kpt, 1, palette[color], 2, cv2.LINE_AA)
+    cv2.imwrite(filename, img)
+
+
+def main():
+    args = parse_args()
+
+    # load image
+    img = cv2.imread(args.image_path)
+
+    # create object detector
+    detector = Detector(
+        model_path=args.det_model_path, device_name=args.device_name)
+    # create pose detector
+    pose_detector = PoseDetector(
+        model_path=args.pose_model_path, device_name=args.device_name)
+
+    # apply detector
+    bboxes, labels, _ = detector(img)
+
+    # filter detections
+    keep = np.logical_and(labels == 0, bboxes[..., 4] > 0.6)
+    bboxes = bboxes[keep, :4]
+
+    # apply pose detector
+    poses = pose_detector(img, bboxes)
+
+    visualize(img, poses, 'det_pose_output.jpg', 0.5, 1280)
+
+
+if __name__ == '__main__':
+    main()
--- a/demo/python/pose_tracker.py
+++ b/demo/python/pose_tracker.py
@ -0,0 +1,98 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import argparse
+import os
+
+import cv2
+from mmdeploy_python import PoseTracker
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description='show how to use SDK Python API')
+    parser.add_argument('device_name', help='name of device, cuda or cpu')
+    parser.add_argument(
+        'det_model',
+        help='path of mmdeploy SDK model dumped by model converter')
+    parser.add_argument(
+        'pose_model',
+        help='path of mmdeploy SDK model dumped by model converter')
+    parser.add_argument('video', help='video path or camera index')
+    parser.add_argument('--output_dir', help='output directory', default=None)
+    args = parser.parse_args()
+    if args.video.isnumeric():
+        args.video = int(args.video)
+    return args
+
+
+def visualize(frame, results, output_dir, frame_id, thr=0.5, resize=1280):
+    skeleton = [(15, 13), (13, 11), (16, 14), (14, 12), (11, 12), (5, 11),
+                (6, 12), (5, 6), (5, 7), (6, 8), (7, 9), (8, 10), (1, 2),
+                (0, 1), (0, 2), (1, 3), (2, 4), (3, 5), (4, 6)]
+    palette = [(255, 128, 0), (255, 153, 51), (255, 178, 102), (230, 230, 0),
+               (255, 153, 255), (153, 204, 255), (255, 102, 255),
+               (255, 51, 255), (102, 178, 255),
+               (51, 153, 255), (255, 153, 153), (255, 102, 102), (255, 51, 51),
+               (153, 255, 153), (102, 255, 102), (51, 255, 51), (0, 255, 0),
+               (0, 0, 255), (255, 0, 0), (255, 255, 255)]
+    link_color = [
+        0, 0, 0, 0, 7, 7, 7, 9, 9, 9, 9, 9, 16, 16, 16, 16, 16, 16, 16
+    ]
+    point_color = [16, 16, 16, 16, 16, 9, 9, 9, 9, 9, 9, 0, 0, 0, 0, 0, 0]
+    scale = resize / max(frame.shape[0], frame.shape[1])
+    keypoints, bboxes, _ = results
+    scores = keypoints[..., 2]
+    keypoints = (keypoints[..., :2] * scale).astype(int)
+    bboxes *= scale
+    img = cv2.resize(frame, (0, 0), fx=scale, fy=scale)
+    for kpts, score, bbox in zip(keypoints, scores, bboxes):
+        show = [0] * len(kpts)
+        for (u, v), color in zip(skeleton, link_color):
+            if score[u] > thr and score[v] > thr:
+                cv2.line(img, kpts[u], tuple(kpts[v]), palette[color], 1,
+                         cv2.LINE_AA)
+                show[u] = show[v] = 1
+        for kpt, show, color in zip(kpts, show, point_color):
+            if show:
+                cv2.circle(img, kpt, 1, palette[color], 2, cv2.LINE_AA)
+    if output_dir:
+        cv2.imwrite(f'{output_dir}/{str(frame_id).zfill(6)}.jpg', img)
+    else:
+        cv2.imshow('pose_tracker', img)
+        return cv2.waitKey(1) != 'q'
+    return True
+
+
+def main():
+    args = parse_args()
+
+    video = cv2.VideoCapture(args.video)
+
+    tracker = PoseTracker(
+        det_model=args.det_model,
+        pose_model=args.pose_model,
+        device_name=args.device_name)
+
+    # optionally use OKS for keypoints similarity comparison
+    coco_sigmas = [
+        0.026, 0.025, 0.025, 0.035, 0.035, 0.079, 0.079, 0.072, 0.072, 0.062,
+        0.062, 0.107, 0.107, 0.087, 0.087, 0.089, 0.089
+    ]
+    state = tracker.create_state(
+        det_interval=1, det_min_bbox_size=100, keypoint_sigmas=coco_sigmas)
+
+    if args.output_dir:
+        os.makedirs(args.output_dir, exist_ok=True)
+
+    frame_id = 0
+    while True:
+        success, frame = video.read()
+        if not success:
+            break
+        results = tracker(state, frame, detect=-1)
+        if not visualize(frame, results, args.output_dir, frame_id):
+            break
+        frame_id += 1
+
+
+if __name__ == '__main__':
+    main()