[Sync] Sync csharp api to dev-1.x (#1889)

* [Feature] Sync csharp apis with newly added c apis && demo (#1718) * sync c api to c# * fix typo * add pose tracker c# demo * udpate gitignore * remove print * fix lint * update rotated detection api * update rotated detection demo * rename pose_tracking -> pose_tracker * use input size as default * fix clang-format
2023-03-21 11:54:32 +08:00 · 2023-03-21 11:54:32 +08:00 · 423e27a4fe
parent 2fcc8dc19d
commit 423e27a4fe
22 changed files with 1419 additions and 280 deletions
--- a/.gitignore
+++ b/.gitignore
@ -166,3 +166,6 @@ service/snpe/grpc_cpp_plugin
 csrc/mmdeploy/preprocess/elena/json
 csrc/mmdeploy/preprocess/elena/cpu_kernel/*
 csrc/mmdeploy/preprocess/elena/cuda_kernel/*
+
+# c#
+demo/csharp/*/Properties
--- a/csrc/mmdeploy/apis/csharp/MMDeploy/APIs/Context.cs
+++ b/csrc/mmdeploy/apis/csharp/MMDeploy/APIs/Context.cs
@ -0,0 +1,69 @@
+namespace MMDeploy
+{
+    /// <summary>
+    /// Context.
+    /// </summary>
+    public class Context : DisposableObject
+    {
+        /// <summary>
+        /// Initializes a new instance of the <see cref="Context"/> class.
+        /// </summary>
+        public Context()
+        {
+            ThrowException(NativeMethods.mmdeploy_context_create(out _handle));
+        }
+
+        /// <summary>
+        /// Initializes a new instance of the <see cref="Context"/> class with device.
+        /// </summary>
+        /// <param name="device">device.</param>
+        public Context(Device device) : this()
+        {
+            Add(device);
+        }
+
+        /// <summary>
+        /// Add model to the context.
+        /// </summary>
+        /// <param name="name">name.</param>
+        /// <param name="model">model.</param>
+        public void Add(string name, Model model)
+        {
+            ThrowException(NativeMethods.mmdeploy_context_add(this, (int)ContextType.MODEL, name, model));
+        }
+
+        /// <summary>
+        /// Add scheduler to the context.
+        /// </summary>
+        /// <param name="name">name.</param>
+        /// <param name="scheduler">scheduler.</param>
+        public void Add(string name, Scheduler scheduler)
+        {
+            ThrowException(NativeMethods.mmdeploy_context_add(this, (int)ContextType.SCHEDULER, name, scheduler));
+        }
+
+        /// <summary>
+        /// Add device to the context.
+        /// </summary>
+        /// <param name="device">device.</param>
+        public void Add(Device device)
+        {
+            ThrowException(NativeMethods.mmdeploy_context_add(this, (int)ContextType.DEVICE, "", device));
+        }
+
+        /// <summary>
+        /// Add profiler to the context.
+        /// </summary>
+        /// <param name="profiler">profiler.</param>
+        public void Add(Profiler profiler)
+        {
+            ThrowException(NativeMethods.mmdeploy_context_add(this, (int)ContextType.PROFILER, "", profiler));
+        }
+
+        /// <inheritdoc/>
+        protected override void ReleaseHandle()
+        {
+            NativeMethods.mmdeploy_model_destroy(_handle);
+        }
+    }
+}
--- a/csrc/mmdeploy/apis/csharp/MMDeploy/APIs/Device.cs
+++ b/csrc/mmdeploy/apis/csharp/MMDeploy/APIs/Device.cs
@ -0,0 +1,39 @@
+namespace MMDeploy
+{
+    /// <summary>
+    /// Device.
+    /// </summary>
+    public class Device : DisposableObject
+    {
+        private readonly string _name;
+        private readonly int _index;
+
+        /// <summary>
+        /// Initializes a new instance of the <see cref="Device"/> class.
+        /// </summary>
+        /// <param name="name">device name.</param>
+        /// <param name="index">device index.</param>
+        public Device(string name, int index = 0)
+        {
+            this._name = name;
+            this._index = index;
+            ThrowException(NativeMethods.mmdeploy_device_create(name, index, out _handle));
+        }
+
+        /// <summary>
+        /// Gets device name.
+        /// </summary>
+        public string Name { get => _name; }
+
+        /// <summary>
+        /// Gets device index.
+        /// </summary>
+        public int Index { get => _index; }
+
+        /// <inheritdoc/>
+        protected override void ReleaseHandle()
+        {
+            NativeMethods.mmdeploy_device_destroy(_handle);
+        }
+    }
+}
--- a/csrc/mmdeploy/apis/csharp/MMDeploy/APIs/DisposableObject.cs
+++ b/csrc/mmdeploy/apis/csharp/MMDeploy/APIs/DisposableObject.cs
@ -92,5 +92,11 @@ namespace MMDeploy
                throw new Exception(result.ToString());
            }
        }
+
+        /// <summary>
+        /// Gets internal handle.
+        /// </summary>
+        /// <param name="obj">instance.</param>
+        public static implicit operator IntPtr(DisposableObject obj) => obj._handle;
    }
 }
--- a/csrc/mmdeploy/apis/csharp/MMDeploy/APIs/Model.cs
+++ b/csrc/mmdeploy/apis/csharp/MMDeploy/APIs/Model.cs
@ -0,0 +1,23 @@
+namespace MMDeploy
+{
+    /// <summary>
+    /// model.
+    /// </summary>
+    public class Model : DisposableObject
+    {
+        /// <summary>
+        /// Initializes a new instance of the <see cref="Model"/> class.
+        /// </summary>
+        /// <param name="modelPath">model path.</param>
+        public Model(string modelPath)
+        {
+            ThrowException(NativeMethods.mmdeploy_model_create_by_path(modelPath, out _handle));
+        }
+
+        /// <inheritdoc/>
+        protected override void ReleaseHandle()
+        {
+            NativeMethods.mmdeploy_model_destroy(_handle);
+        }
+    }
+}
--- a/csrc/mmdeploy/apis/csharp/MMDeploy/APIs/PoseTracker.cs
+++ b/csrc/mmdeploy/apis/csharp/MMDeploy/APIs/PoseTracker.cs
@ -0,0 +1,350 @@
+using System;
+using System.Collections.Generic;
+using System.Runtime.InteropServices;
+
+namespace MMDeploy
+{
+#pragma warning disable 0649
+    internal unsafe struct CPoseTrack
+    {
+        public Pointf* Keypoints;
+        public int KeypointCount;
+        public float* Scores;
+        public Rect Bbox;
+        public int TargetId;
+    }
+#pragma warning restore 0649
+
+    /// <summary>
+    /// Single tracking result of a bbox.
+    /// A picture may contains multiple reuslts.
+    /// </summary>
+    public struct PoseTrack
+    {
+        /// <summary>
+        /// Keypoints.
+        /// </summary>
+        public List<Pointf> Keypoints;
+
+        /// <summary>
+        /// Scores.
+        /// </summary>
+        public List<float> Scores;
+
+        /// <summary>
+        /// Bbox.
+        /// </summary>
+        public Rect Bbox;
+
+        /// <summary>
+        /// TargetId.
+        /// </summary>
+        public int TargetId;
+
+        /// <summary>
+        /// Init data.
+        /// </summary>
+        private void Init()
+        {
+            if (Keypoints == null || Scores == null)
+            {
+                Keypoints = new List<Pointf>();
+                Scores = new List<float>();
+            }
+        }
+
+        internal unsafe void Add(CPoseTrack* result)
+        {
+            Init();
+            for (int i = 0; i < result->KeypointCount; i++)
+            {
+                Keypoints.Add(new Pointf(result->Keypoints[i].X, result->Keypoints[i].Y));
+                Scores.Add(result->Scores[i]);
+            }
+
+            Bbox = result->Bbox;
+            TargetId = result->TargetId;
+        }
+    }
+
+    /// <summary>
+    /// Output of PoseTracker.
+    /// </summary>
+    public struct PoseTrackerOutput
+    {
+        /// <summary>
+        /// Tracking results for single image.
+        /// </summary>
+        public List<PoseTrack> Results;
+
+        /// <summary>
+        /// Gets number of output.
+        /// </summary>
+        public int Count
+        {
+            get { return (Results == null) ? 0 : Results.Count; }
+        }
+
+        /// <summary>
+        /// Result for box level.
+        /// </summary>
+        /// <param name="boxRes">Box res.</param>
+        public void Add(PoseTrack boxRes)
+        {
+            if (Results == null)
+            {
+                Results = new List<PoseTrack>();
+            }
+
+            Results.Add(boxRes);
+        }
+    }
+
+    /// <summary>
+    /// PoseTracker.
+    /// </summary>
+    public class PoseTracker : DisposableObject
+    {
+        /// <summary>
+        /// Params.
+        /// </summary>
+        public struct Params
+        {
+            /// <summary>
+            /// init with default value.
+            /// </summary>
+            public void Init()
+            {
+                IntPtr ptr = Marshal.AllocHGlobal(Marshal.SizeOf(typeof(Params)));
+                NativeMethods.mmdeploy_pose_tracker_default_params(ptr);
+                this = Marshal.PtrToStructure<Params>(ptr);
+                Marshal.DestroyStructure<Params>(ptr);
+                Marshal.FreeHGlobal(ptr);
+            }
+
+            /// <summary>
+            /// Sets keypoint sigmas.
+            /// </summary>
+            /// <param name="array">keypoint sigmas.</param>
+            public void SetKeypointSigmas(float[] array)
+            {
+                this.KeypointSigmasSize = array.Length;
+                this.KeypointSigmas = Marshal.AllocHGlobal(sizeof(float) * array.Length);
+                Marshal.Copy(array, 0, this.KeypointSigmas, array.Length);
+            }
+
+            /// <summary>
+            /// Release ptr.
+            /// </summary>
+            public void DeleteKeypointSigmas()
+            {
+                if (this.KeypointSigmas != null)
+                {
+                    Marshal.FreeHGlobal(this.KeypointSigmas);
+                    this.KeypointSigmasSize = 0;
+                }
+            }
+
+            /// <summary>
+            /// detection interval, default = 1.
+            /// </summary>
+            public int DetInterval;
+
+            /// <summary>
+            /// detection label use for pose estimation, default = 0.
+            /// </summary>
+            public int DetLabel;
+
+            /// <summary>
+            /// detection score threshold, default = 0.5.
+            /// </summary>
+            public float DetThr;
+
+            /// <summary>
+            /// detection minimum bbox size (compute as sqrt(area)), default = -1.
+            /// </summary>
+            public float DetMinBboxSize;
+
+            /// <summary>
+            /// nms iou threshold for merging detected bboxes and bboxes from tracked targets, default = 0.7.
+            /// </summary>
+            public float DetNmsThr;
+
+            /// <summary>
+            /// max number of bboxes used for pose estimation per frame, default = -1.
+            /// </summary>
+            public int PoseMaxNumBboxes;
+
+            /// <summary>
+            /// threshold for visible key-points, default = 0.5.
+            /// </summary>
+            public float PoseKptThr;
+
+            /// <summary>
+            /// min number of key-points for valid poses, default = -1.
+            /// </summary>
+            public int PoseMinKeypoints;
+
+            /// <summary>
+            /// scale for expanding key-points to bbox, default = 1.25.
+            /// </summary>
+            public float PoseBboxScale;
+
+            /// <summary>
+            /// min pose bbox size, tracks with bbox size smaller than the threshold will be dropped,default = -1.
+            /// </summary>
+            public float PoseMinBboxSize;
+
+            /// <summary>
+            /// nms oks/iou threshold for suppressing overlapped poses, useful when multiple pose estimations
+            /// collapse to the same target, default = 0.5.
+            /// </summary>
+            public float PoseNmsThr;
+
+            /// <summary>
+            /// keypoint sigmas for computing OKS, will use IOU if not set, default = nullptr.
+            /// </summary>
+            public IntPtr KeypointSigmas;
+
+            /// <summary>
+            /// size of keypoint sigma array, must be consistent with the number of key-points, default = 0.
+            /// </summary>
+            public int KeypointSigmasSize;
+
+            /// <summary>
+            /// iou threshold for associating missing tracks, default = 0.4.
+            /// </summary>
+            public float TrackIouThr;
+
+            /// <summary>
+            /// max number of missing frames before a missing tracks is removed, default = 10.
+            /// </summary>
+            public int TrackMaxMissing;
+
+            /// <summary>
+            /// track history size, default = 1.
+            /// </summary>
+            public int TrackHistorySize;
+
+            /// <summary>
+            /// weight of position for setting covariance matrices of kalman filters, default = 0.05.
+            /// </summary>
+            public float StdWeightPosition;
+
+            /// <summary>
+            /// weight of velocity for setting covariance matrices of kalman filters, default = 0.00625.
+            /// </summary>
+            public float StdWeightVelocity;
+
+            /// <summary>
+            /// params for the one-euro filter for smoothing the outputs - (beta, fc_min, fc_derivative)
+            /// default = (0.007, 1, 1).
+            /// </summary>
+            [MarshalAs(UnmanagedType.ByValArray, SizeConst = 3)]
+            public float[] SmoothParams;
+        }
+
+        /// <summary>
+        /// tracking state.
+        /// </summary>
+        public class State : DisposableObject
+        {
+            /// <summary>
+            /// Initializes a new instance of the <see cref="State"/> class.
+            /// </summary>
+            /// <param name="pipeline">pipeline.</param>
+            /// <param name="param">param.</param>
+            public State(IntPtr pipeline, Params param)
+            {
+                ThrowException(NativeMethods.mmdeploy_pose_tracker_create_state(pipeline, param, out _handle));
+            }
+
+            /// <inheritdoc/>
+            protected override void ReleaseHandle()
+            {
+                NativeMethods.mmdeploy_pose_tracker_destroy_state(_handle);
+            }
+        }
+
+        /// <summary>
+        /// Initializes a new instance of the <see cref="PoseTracker"/> class.
+        /// </summary>
+        /// <param name="detect">detect model.</param>
+        /// <param name="pose">pose model.</param>
+        /// <param name="context">context.</param>
+        public PoseTracker(Model detect, Model pose, Context context)
+        {
+            ThrowException(NativeMethods.mmdeploy_pose_tracker_create(detect, pose, context, out _handle));
+        }
+
+        /// <summary>
+        /// Get track information of image.
+        /// </summary>
+        /// <param name="state">state for video.</param>
+        /// <param name="mat">input mat.</param>
+        /// <param name="detect">control the use of detector.
+        /// -1: use params.DetInterval, 0: don't use detector, 1: force use detector.</param>
+        /// <returns>results of this frame.</returns>
+        public PoseTrackerOutput Apply(State state, Mat mat, int detect = -1)
+        {
+            PoseTrackerOutput output = default;
+
+            IntPtr[] states = new IntPtr[1] { state };
+            Mat[] mats = new Mat[1] { mat };
+            int[] detects = new int[1] { -1 };
+
+            unsafe
+            {
+                CPoseTrack* results = null;
+                int* resultCount = null;
+                fixed (Mat* _mats = mats)
+                fixed (IntPtr* _states = states)
+                fixed (int* _detects = detects)
+                {
+                    ThrowException(NativeMethods.mmdeploy_pose_tracker_apply(_handle, _states, _mats, _detects,
+                        mats.Length, &results, &resultCount));
+
+                    FormatResult(resultCount, results, ref output, out var total);
+                    ReleaseResult(results, resultCount, mats.Length);
+                }
+            }
+
+            return output;
+        }
+
+        private unsafe void FormatResult(int* resultCount, CPoseTrack* results, ref PoseTrackerOutput output, out int total)
+        {
+            total = resultCount[0];
+            for (int i = 0; i < total; i++)
+            {
+                PoseTrack outi = default;
+                outi.Add(results);
+                output.Add(outi);
+                results++;
+            }
+        }
+
+        private unsafe void ReleaseResult(CPoseTrack* results, int* resultCount, int count)
+        {
+            NativeMethods.mmdeploy_pose_tracker_release_result(results, resultCount, count);
+        }
+
+        /// <summary>
+        /// Create internal state.
+        /// </summary>
+        /// <param name="param">instance of Params.</param>
+        /// <returns>instance of State.</returns>
+        public State CreateState(Params param)
+        {
+            State state = new State(_handle, param);
+            return state;
+        }
+
+        /// <inheritdoc/>
+        protected override void ReleaseHandle()
+        {
+            // _state.Dispose();
+            NativeMethods.mmdeploy_pose_tracker_destroy(_handle);
+        }
+    }
+}
--- a/csrc/mmdeploy/apis/csharp/MMDeploy/APIs/Profiler.cs
+++ b/csrc/mmdeploy/apis/csharp/MMDeploy/APIs/Profiler.cs
@ -0,0 +1,23 @@
+namespace MMDeploy
+{
+    /// <summary>
+    /// Profiler.
+    /// </summary>
+    public class Profiler : DisposableObject
+    {
+        /// <summary>
+        /// Initializes a new instance of the <see cref="Profiler"/> class.
+        /// </summary>
+        /// <param name="path">path.</param>
+        public Profiler(string path)
+        {
+            ThrowException(NativeMethods.mmdeploy_profiler_create(path, out _handle));
+        }
+
+        /// <inheritdoc/>
+        protected override void ReleaseHandle()
+        {
+            NativeMethods.mmdeploy_profiler_destroy(_handle);
+        }
+    }
+}
--- a/csrc/mmdeploy/apis/csharp/MMDeploy/APIs/RotatedDetector.cs
+++ b/csrc/mmdeploy/apis/csharp/MMDeploy/APIs/RotatedDetector.cs
@ -0,0 +1,157 @@
+using System;
+using System.Collections.Generic;
+
+namespace MMDeploy
+{
+    /// <summary>
+    /// Single detection result of a picture.
+    /// A picture may contains multiple reuslts.
+    /// </summary>
+    public struct RDetect
+    {
+        /// <summary>
+        /// Label id.
+        /// </summary>
+        public int LabelId;
+
+        /// <summary>
+        /// Score.
+        /// </summary>
+        public float Score;
+
+        /// <summary>
+        /// Center x.
+        /// </summary>
+        public float Cx;
+
+        /// <summary>
+        /// Center y.
+        /// </summary>
+        public float Cy;
+
+        /// <summary>
+        /// Width.
+        /// </summary>
+        public float Width;
+
+        /// <summary>
+        /// Height.
+        /// </summary>
+        public float Height;
+
+        /// <summary>
+        /// Angle.
+        /// </summary>
+        public float Angle;
+
+        internal unsafe RDetect(RDetect* result) : this()
+        {
+            this = *result;
+        }
+    }
+
+    /// <summary>
+    /// Output of RotatedDetector.
+    /// </summary>
+    public struct RotatedDetectorOutput
+    {
+        /// <summary>
+        /// Rotated detection results for single image.
+        /// </summary>
+        public List<RDetect> Results;
+
+        private void Init()
+        {
+            if (Results == null)
+            {
+                Results = new List<RDetect>();
+            }
+        }
+
+        internal unsafe void Add(RDetect* result)
+        {
+            Init();
+            Results.Add(new RDetect(result));
+        }
+
+        /// <summary>
+        /// Gets number of output.
+        /// </summary>
+        public int Count
+        {
+            get { return (Results == null) ? 0 : Results.Count; }
+        }
+    }
+
+    /// <summary>
+    /// RotatedDetector.
+    /// </summary>
+    public class RotatedDetector : DisposableObject
+    {
+        /// <summary>
+        /// Initializes a new instance of the <see cref="RotatedDetector"/> class.
+        /// </summary>
+        /// <param name="modelPath">model path.</param>
+        /// <param name="deviceName">device name.</param>
+        /// <param name="deviceId">device id.</param>
+        public RotatedDetector(string modelPath, string deviceName, int deviceId)
+        {
+            ThrowException(NativeMethods.mmdeploy_rotated_detector_create_by_path(modelPath,
+                deviceName, deviceId, out _handle));
+        }
+
+        /// <summary>
+        /// Get information of each image in a batch.
+        /// </summary>
+        /// <param name="mats">input mats.</param>
+        /// <returns>Results of each input mat.</returns>
+        public List<RotatedDetectorOutput> Apply(Mat[] mats)
+        {
+            List<RotatedDetectorOutput> output = new List<RotatedDetectorOutput>();
+
+            unsafe
+            {
+                RDetect* results = null;
+                int* resultCount = null;
+                fixed (Mat* _mats = mats)
+                {
+                    ThrowException(NativeMethods.mmdeploy_rotated_detector_apply(_handle,
+                        _mats, mats.Length, &results, &resultCount));
+                }
+
+                FormatResult(mats.Length, resultCount, results, ref output, out var total);
+                ReleaseResult(results, resultCount);
+            }
+
+            return output;
+        }
+
+        private unsafe void FormatResult(int matCount, int* resultCount, RDetect* results,
+            ref List<RotatedDetectorOutput> output, out int total)
+        {
+            total = matCount;
+            for (int i = 0; i < matCount; i++)
+            {
+                RotatedDetectorOutput outi = default;
+                for (int j = 0; j < resultCount[i]; j++)
+                {
+                    outi.Add(results);
+                    results++;
+                }
+
+                output.Add(outi);
+            }
+        }
+
+        private unsafe void ReleaseResult(RDetect* results, int* resultCount)
+        {
+            NativeMethods.mmdeploy_rotated_detector_release_result(results, resultCount);
+        }
+
+        /// <inheritdoc/>
+        protected override void ReleaseHandle()
+        {
+            NativeMethods.mmdeploy_rotated_detector_destroy(_handle);
+        }
+    }
+}
--- a/csrc/mmdeploy/apis/csharp/MMDeploy/APIs/Scheduler.cs
+++ b/csrc/mmdeploy/apis/csharp/MMDeploy/APIs/Scheduler.cs
@ -0,0 +1,51 @@
+using System;
+
+namespace MMDeploy
+{
+    /// <summary>
+    /// Scheduler.
+    /// </summary>
+    public class Scheduler : DisposableObject
+    {
+        private Scheduler()
+        {
+        }
+
+        /// <summary>
+        /// Create thread pool scheduler.
+        /// </summary>
+        /// <param name="num_threads">thread number.</param>
+        /// <returns>scheduler.</returns>
+        public static Scheduler ThreadPool(int num_threads)
+        {
+            Scheduler result = new Scheduler();
+            unsafe
+            {
+                result._handle = (IntPtr)NativeMethods.mmdeploy_executor_create_thread_pool(num_threads);
+            }
+
+            return result;
+        }
+
+        /// <summary>
+        /// Create single thread scheduler.
+        /// </summary>
+        /// <returns>scheduler.</returns>
+        public static Scheduler Thread()
+        {
+            Scheduler result = new Scheduler();
+            unsafe
+            {
+                result._handle = (IntPtr)NativeMethods.mmdeploy_executor_create_thread();
+            }
+
+            return result;
+        }
+
+        /// <inheritdoc/>
+        protected override void ReleaseHandle()
+        {
+            NativeMethods.mmdeploy_scheduler_destroy(_handle);
+        }
+    }
+}
--- a/csrc/mmdeploy/apis/csharp/MMDeploy/APIs/TextDetector.cs
+++ b/csrc/mmdeploy/apis/csharp/MMDeploy/APIs/TextDetector.cs
@ -99,7 +99,7 @@ namespace MMDeploy
    }

    /// <summary>
-    /// Output of DetectorOutput.
+    /// Output of TextDetector.
    /// </summary>
    public struct TextDetectorOutput
    {
--- a/csrc/mmdeploy/apis/csharp/MMDeploy/APIs/common.cs
+++ b/csrc/mmdeploy/apis/csharp/MMDeploy/APIs/common.cs
@ -89,4 +89,17 @@ namespace MMDeploy
            Y = y;
        }
    }
+
+    /// <summary>
+    /// Context type.
+    /// </summary>
+    public enum ContextType
+    {
+        DEVICE = 0,
+        STREAM = 1,
+        MODEL = 2,
+        SCHEDULER = 3,
+        MAT = 4,
+        PROFILER = 5,
+    }
 }
--- a/csrc/mmdeploy/apis/csharp/MMDeploy/NativeAPIs/NativeMethods_c_api.cs
+++ b/csrc/mmdeploy/apis/csharp/MMDeploy/NativeAPIs/NativeMethods_c_api.cs
@ -9,6 +9,37 @@ namespace MMDeploy
    /// </summary>
    internal static partial class NativeMethods
    {
+        #region common.h
+        [Pure, DllImport(DllExtern, CallingConvention = CallingConvention.Cdecl, ExactSpelling = true)]
+        public static extern int mmdeploy_context_create(out IntPtr handle);
+        [Pure, DllImport(DllExtern, CallingConvention = CallingConvention.Cdecl, ExactSpelling = true)]
+        public static extern int mmdeploy_context_create_by_device(string deviceName, int deviceId,
+            out IntPtr handle);
+        [Pure, DllImport(DllExtern, CallingConvention = CallingConvention.Cdecl, ExactSpelling = true)]
+        public static extern void mmdeploy_context_destroy(IntPtr handle);
+        [Pure, DllImport(DllExtern, CallingConvention = CallingConvention.Cdecl, ExactSpelling = true)]
+        public static extern int mmdeploy_context_add(IntPtr handle, int type, string name,
+            IntPtr obj);
+        [Pure, DllImport(DllExtern, CallingConvention = CallingConvention.Cdecl, ExactSpelling = true)]
+        public static extern int mmdeploy_device_create(string device_name, int device_id,
+            out IntPtr device);
+        [Pure, DllImport(DllExtern, CallingConvention = CallingConvention.Cdecl, ExactSpelling = true)]
+        public static extern void mmdeploy_device_destroy(IntPtr device);
+        [Pure, DllImport(DllExtern, CallingConvention = CallingConvention.Cdecl, ExactSpelling = true)]
+        public static extern int mmdeploy_profiler_create(string path, out IntPtr handle);
+        [Pure, DllImport(DllExtern, CallingConvention = CallingConvention.Cdecl, ExactSpelling = true)]
+        public static extern unsafe void mmdeploy_profiler_destroy(IntPtr handle);
+        #endregion
+
+        #region scheduler.h
+        [Pure, DllImport(DllExtern, CallingConvention = CallingConvention.Cdecl, ExactSpelling = true)]
+        public static extern unsafe void* mmdeploy_executor_create_thread();
+        [Pure, DllImport(DllExtern, CallingConvention = CallingConvention.Cdecl, ExactSpelling = true)]
+        public static extern unsafe void* mmdeploy_executor_create_thread_pool(int num_threads);
+        [Pure, DllImport(DllExtern, CallingConvention = CallingConvention.Cdecl, ExactSpelling = true)]
+        public static extern void mmdeploy_scheduler_destroy(IntPtr handle);
+        #endregion
+
        #region model.h
        [Pure, DllImport(DllExtern, CallingConvention = CallingConvention.Cdecl, ExactSpelling = true)]
        public static extern int mmdeploy_model_create_by_path(string path, out IntPtr handle);
@ -38,6 +69,27 @@ namespace MMDeploy
        public static extern void mmdeploy_pose_detector_destroy(IntPtr handle);
        #endregion

+        #region pose_tracker.h
+        [Pure, DllImport(DllExtern, CallingConvention = CallingConvention.Cdecl, ExactSpelling = true)]
+        public static extern int mmdeploy_pose_tracker_create(IntPtr det_model, IntPtr pose_model,
+            IntPtr context, out IntPtr handle);
+        [Pure, DllImport(DllExtern, CallingConvention = CallingConvention.Cdecl, ExactSpelling = true)]
+        public static extern int mmdeploy_pose_tracker_destroy(IntPtr handle);
+        [Pure, DllImport(DllExtern, CallingConvention = CallingConvention.Cdecl, ExactSpelling = true)]
+        public static extern int mmdeploy_pose_tracker_default_params(IntPtr handle);
+        [Pure, DllImport(DllExtern, CallingConvention = CallingConvention.Cdecl, ExactSpelling = true)]
+        public static extern int mmdeploy_pose_tracker_create_state(IntPtr pipeline,
+            PoseTracker.Params param, out IntPtr state);
+        [Pure, DllImport(DllExtern, CallingConvention = CallingConvention.Cdecl, ExactSpelling = true)]
+        public static extern void mmdeploy_pose_tracker_destroy_state(IntPtr state);
+        [Pure, DllImport(DllExtern, CallingConvention = CallingConvention.Cdecl, ExactSpelling = true)]
+        public static extern unsafe int mmdeploy_pose_tracker_apply(IntPtr handle, IntPtr* state,
+            Mat* mats, int* useDet, int count, CPoseTrack** results, int** resultCount);
+        [Pure, DllImport(DllExtern, CallingConvention = CallingConvention.Cdecl, ExactSpelling = true)]
+        public static extern unsafe void mmdeploy_pose_tracker_release_result(CPoseTrack* results,
+            int* resultCount, int count);
+        #endregion
+
        #region classifier.h
        [Pure, DllImport(DllExtern, CallingConvention = CallingConvention.Cdecl, ExactSpelling = true)]
        public static extern int mmdeploy_classifier_create(IntPtr model, string deviceName,
@ -55,6 +107,23 @@ namespace MMDeploy
        public static extern void mmdeploy_classifier_destroy(IntPtr handle);
        #endregion

+        #region rotated_detector.h
+        [Pure, DllImport(DllExtern, CallingConvention = CallingConvention.Cdecl, ExactSpelling = true)]
+        public static extern int mmdeploy_rotated_detector_create(IntPtr model,
+            string deviceName, int deviceId, out IntPtr handle);
+        [Pure, DllImport(DllExtern, CallingConvention = CallingConvention.Cdecl, ExactSpelling = true)]
+        public static extern int mmdeploy_rotated_detector_create_by_path(string modelPath,
+            string deviceName, int deviceId, out IntPtr handle);
+        [Pure, DllImport(DllExtern, CallingConvention = CallingConvention.Cdecl, ExactSpelling = true)]
+        public static extern unsafe int mmdeploy_rotated_detector_apply(IntPtr handle, Mat* mats,
+            int matCount, RDetect** results, int** resultCount);
+        [Pure, DllImport(DllExtern, CallingConvention = CallingConvention.Cdecl, ExactSpelling = true)]
+        public static extern unsafe void mmdeploy_rotated_detector_release_result(RDetect* results,
+            int* resultCount);
+        [Pure, DllImport(DllExtern, CallingConvention = CallingConvention.Cdecl, ExactSpelling = true)]
+        public static extern void mmdeploy_rotated_detector_destroy(IntPtr handle);
+        #endregion
+
        #region detector.h
        [Pure, DllImport(DllExtern, CallingConvention = CallingConvention.Cdecl, ExactSpelling = true)]
        public static extern int mmdeploy_detector_create(IntPtr model, string deviceName,
--- a/csrc/mmdeploy/backend_ops/tensorrt/multi_scale_deform_attn/trt_ms_deform_attn.cpp
+++ b/csrc/mmdeploy/backend_ops/tensorrt/multi_scale_deform_attn/trt_ms_deform_attn.cpp
@ -16,17 +16,19 @@ static const char *PLUGIN_VERSION{"1"};
 static const char *PLUGIN_NAME{"MMCVMultiScaleDeformableAttention"};
 }  // namespace

-MultiScaleDeformableAttnPluginDynamic::MultiScaleDeformableAttnPluginDynamic(const std::string &name)
+MultiScaleDeformableAttnPluginDynamic::MultiScaleDeformableAttnPluginDynamic(
+    const std::string &name)
    : TRTPluginBase(name) {}

 MultiScaleDeformableAttnPluginDynamic::MultiScaleDeformableAttnPluginDynamic(const std::string name,
-                                                                           const void *data,
-                                                                           size_t length)
+                                                                             const void *data,
+                                                                             size_t length)
    : TRTPluginBase(name) {}
 MultiScaleDeformableAttnPluginDynamic::~MultiScaleDeformableAttnPluginDynamic() {}

 nvinfer1::IPluginV2DynamicExt *MultiScaleDeformableAttnPluginDynamic::clone() const TRT_NOEXCEPT {
-  MultiScaleDeformableAttnPluginDynamic *plugin = new MultiScaleDeformableAttnPluginDynamic(mLayerName);
+  MultiScaleDeformableAttnPluginDynamic *plugin =
+      new MultiScaleDeformableAttnPluginDynamic(mLayerName);
  plugin->setPluginNamespace(getPluginNamespace());

  return plugin;
@ -40,37 +42,29 @@ nvinfer1::DimsExprs MultiScaleDeformableAttnPluginDynamic::getOutputDimensions(
  ret.d[0] = inputs[0].d[0];
  ret.d[1] = inputs[3].d[1];

-  ret.d[2] = exprBuilder.operation(DimensionOperation::kPROD,
-                                   *inputs[0].d[2], *inputs[0].d[3]);
+  ret.d[2] = exprBuilder.operation(DimensionOperation::kPROD, *inputs[0].d[2], *inputs[0].d[3]);

  return ret;
 }

 bool MultiScaleDeformableAttnPluginDynamic::supportsFormatCombination(
    int pos, const nvinfer1::PluginTensorDesc *ioDesc, int nbInputs, int nbOutputs) TRT_NOEXCEPT {
-
-    if (ioDesc[pos].format == nvinfer1::TensorFormat::kLINEAR)
-    {
-        if ((pos == 1) || (pos == 2))
-        {
-            return (ioDesc[pos].type == nvinfer1::DataType::kINT32);
-        }
-        else
-        {
-            return ((ioDesc[pos].type == ioDesc[0].type) &&
-                  ((ioDesc[pos].type == nvinfer1::DataType::kFLOAT) || (ioDesc[pos].type == nvinfer1::DataType::kHALF)));
-        }
-    }
-    else
-    {
-        return false;
+  if (ioDesc[pos].format == nvinfer1::TensorFormat::kLINEAR) {
+    if ((pos == 1) || (pos == 2)) {
+      return (ioDesc[pos].type == nvinfer1::DataType::kINT32);
+    } else {
+      return ((ioDesc[pos].type == ioDesc[0].type) &&
+              ((ioDesc[pos].type == nvinfer1::DataType::kFLOAT) ||
+               (ioDesc[pos].type == nvinfer1::DataType::kHALF)));
    }
+  } else {
+    return false;
+  }
 }

 void MultiScaleDeformableAttnPluginDynamic::configurePlugin(
    const nvinfer1::DynamicPluginTensorDesc *inputs, int nbInputs,
-    const nvinfer1::DynamicPluginTensorDesc *outputs, int nbOutputs) TRT_NOEXCEPT {
-}
+    const nvinfer1::DynamicPluginTensorDesc *outputs, int nbOutputs) TRT_NOEXCEPT {}

 size_t MultiScaleDeformableAttnPluginDynamic::getWorkspaceSize(
    const nvinfer1::PluginTensorDesc *inputs, int nbInputs,
@ -79,44 +73,43 @@ size_t MultiScaleDeformableAttnPluginDynamic::getWorkspaceSize(
 }

 int MultiScaleDeformableAttnPluginDynamic::enqueue(const nvinfer1::PluginTensorDesc *inputDesc,
-                                                  const nvinfer1::PluginTensorDesc *outputDesc,
-                                                  const void *const *inputs, void *const *outputs,
-                                                  void *workSpace,
-                                                  cudaStream_t stream) TRT_NOEXCEPT {
-    int32_t const batch = inputDesc[0].dims.d[0];
-    int32_t spatial_size = inputDesc[0].dims.d[1];
-    int32_t num_heads = inputDesc[0].dims.d[2];
-    int32_t channels = inputDesc[0].dims.d[3];
-    int32_t num_levels = inputDesc[1].dims.d[0];
-    int32_t num_query = inputDesc[3].dims.d[1];
-    int32_t num_point = inputDesc[3].dims.d[4];
-    int32_t rc = 0;
-    if (inputDesc[0].type == nvinfer1::DataType::kFLOAT)
-    {
-        float const* value = static_cast<float const*>(inputs[0]);
-        int32_t const* spatialShapes = static_cast<int32_t const*>(inputs[1]);
-        int32_t const* levelStartIndex = static_cast<int32_t const*>(inputs[2]);
-        float const* samplingLoc = static_cast<float const*>(inputs[3]);
-        float const* attnWeight = static_cast<float const*>(inputs[4]);
-        float* output = static_cast<float*>(outputs[0]);
+                                                   const nvinfer1::PluginTensorDesc *outputDesc,
+                                                   const void *const *inputs, void *const *outputs,
+                                                   void *workSpace,
+                                                   cudaStream_t stream) TRT_NOEXCEPT {
+  int32_t const batch = inputDesc[0].dims.d[0];
+  int32_t spatial_size = inputDesc[0].dims.d[1];
+  int32_t num_heads = inputDesc[0].dims.d[2];
+  int32_t channels = inputDesc[0].dims.d[3];
+  int32_t num_levels = inputDesc[1].dims.d[0];
+  int32_t num_query = inputDesc[3].dims.d[1];
+  int32_t num_point = inputDesc[3].dims.d[4];
+  int32_t rc = 0;
+  if (inputDesc[0].type == nvinfer1::DataType::kFLOAT) {
+    float const *value = static_cast<float const *>(inputs[0]);
+    int32_t const *spatialShapes = static_cast<int32_t const *>(inputs[1]);
+    int32_t const *levelStartIndex = static_cast<int32_t const *>(inputs[2]);
+    float const *samplingLoc = static_cast<float const *>(inputs[3]);
+    float const *attnWeight = static_cast<float const *>(inputs[4]);
+    float *output = static_cast<float *>(outputs[0]);

-        rc = ms_deform_attn_cuda_forward(value, spatialShapes, levelStartIndex, samplingLoc, attnWeight, output,
-            batch, spatial_size, num_heads, channels, num_levels, num_query, num_point, stream);
-    }
-    else if (inputDesc[0].type == nvinfer1::DataType::kHALF)
-    {
-        const __half* value = static_cast<const __half*>(inputs[0]);
-        int32_t const* spatialShapes = static_cast<int32_t const*>(inputs[1]);
-        int32_t const* levelStartIndex = static_cast<int32_t const*>(inputs[2]);
-        const __half* samplingLoc = static_cast<const __half*>(inputs[3]);
-        const __half* attnWeight = static_cast<const __half*>(inputs[4]);
-        __half* output = static_cast<__half*>(outputs[0]);
+    rc = ms_deform_attn_cuda_forward(value, spatialShapes, levelStartIndex, samplingLoc, attnWeight,
+                                     output, batch, spatial_size, num_heads, channels, num_levels,
+                                     num_query, num_point, stream);
+  } else if (inputDesc[0].type == nvinfer1::DataType::kHALF) {
+    const __half *value = static_cast<const __half *>(inputs[0]);
+    int32_t const *spatialShapes = static_cast<int32_t const *>(inputs[1]);
+    int32_t const *levelStartIndex = static_cast<int32_t const *>(inputs[2]);
+    const __half *samplingLoc = static_cast<const __half *>(inputs[3]);
+    const __half *attnWeight = static_cast<const __half *>(inputs[4]);
+    __half *output = static_cast<__half *>(outputs[0]);

-        rc = ms_deform_attn_cuda_forward(value, spatialShapes, levelStartIndex, samplingLoc, attnWeight, output,
-            batch, spatial_size, num_heads, channels, num_levels, num_query, num_point, stream);
-    }
+    rc = ms_deform_attn_cuda_forward(value, spatialShapes, levelStartIndex, samplingLoc, attnWeight,
+                                     output, batch, spatial_size, num_heads, channels, num_levels,
+                                     num_query, num_point, stream);
+  }

-    return rc;
+  return rc;
 }

 nvinfer1::DataType MultiScaleDeformableAttnPluginDynamic::getOutputDataType(
@ -165,7 +158,6 @@ const char *MultiScaleDeformableAttnPluginDynamicCreator::getPluginVersion() con

 nvinfer1::IPluginV2 *MultiScaleDeformableAttnPluginDynamicCreator::createPlugin(
    const char *name, const nvinfer1::PluginFieldCollection *fc) TRT_NOEXCEPT {
-
  MultiScaleDeformableAttnPluginDynamic *plugin = new MultiScaleDeformableAttnPluginDynamic(name);
  plugin->setPluginNamespace(getPluginNamespace());
  return plugin;
--- a/csrc/mmdeploy/backend_ops/tensorrt/multi_scale_deform_attn/trt_ms_deform_attn.hpp
+++ b/csrc/mmdeploy/backend_ops/tensorrt/multi_scale_deform_attn/trt_ms_deform_attn.hpp
@ -12,7 +12,6 @@
 namespace mmdeploy {
 class MultiScaleDeformableAttnPluginDynamic : public TRTPluginBase {
 public:
-
  MultiScaleDeformableAttnPluginDynamic(const std::string &name);

  MultiScaleDeformableAttnPluginDynamic(const std::string name, const void *data, size_t length);
--- a/csrc/mmdeploy/backend_ops/tensorrt/multi_scale_deform_attn/trt_ms_deform_attn_kernel.cu
+++ b/csrc/mmdeploy/backend_ops/tensorrt/multi_scale_deform_attn/trt_ms_deform_attn_kernel.cu
@ -8,50 +8,57 @@
 #include "trt_plugin_helper.hpp"

 template <typename scalar_t>
-void ms_deformable_im2col_cuda(cudaStream_t stream, scalar_t const* dataValue, int32_t const* dataSpatialShapes,
-    int32_t const* dataLevelStartIndex, scalar_t const* dataSamplingLoc, scalar_t const* dataAttnWeight,
-    int32_t const batchSize, int32_t const spatialSize, int32_t const numHeads, int32_t const channels, int32_t const numLevels,
-    int32_t const numQuery, int32_t const numPoint, scalar_t* dataCol)
-{
-    int32_t const numKernels = batchSize * numQuery * numHeads * channels;
-    int32_t const numActualKernels = batchSize * numQuery * numHeads * channels;
+void ms_deformable_im2col_cuda(cudaStream_t stream, scalar_t const* dataValue,
+                               int32_t const* dataSpatialShapes, int32_t const* dataLevelStartIndex,
+                               scalar_t const* dataSamplingLoc, scalar_t const* dataAttnWeight,
+                               int32_t const batchSize, int32_t const spatialSize,
+                               int32_t const numHeads, int32_t const channels,
+                               int32_t const numLevels, int32_t const numQuery,
+                               int32_t const numPoint, scalar_t* dataCol) {
+  int32_t const numKernels = batchSize * numQuery * numHeads * channels;
+  int32_t const numActualKernels = batchSize * numQuery * numHeads * channels;

-    ms_deformable_im2col_gpu_kernel<scalar_t><<<GET_BLOCKS(numActualKernels), THREADS_PER_BLOCK, 0, stream>>>(
-        numKernels, dataValue, dataSpatialShapes, dataLevelStartIndex, dataSamplingLoc, dataAttnWeight, batchSize,
-        spatialSize, numHeads, channels, numLevels, numQuery, numPoint, dataCol);
+  ms_deformable_im2col_gpu_kernel<scalar_t>
+      <<<GET_BLOCKS(numActualKernels), THREADS_PER_BLOCK, 0, stream>>>(
+          numKernels, dataValue, dataSpatialShapes, dataLevelStartIndex, dataSamplingLoc,
+          dataAttnWeight, batchSize, spatialSize, numHeads, channels, numLevels, numQuery, numPoint,
+          dataCol);
 }

-
 template <typename scalar_t>
 int32_t ms_deform_attn_cuda_forward(const scalar_t* value, const int32_t* spatialShapes,
-    const int32_t* levelStartIndex, const scalar_t* samplingLoc, const scalar_t* attnWeight, scalar_t* output, int32_t batch,
-    int32_t mSpatialSize, int32_t mNumHeads, int32_t mChannels, int32_t mNumLevels, int32_t mNumQuery, int32_t mNumPoint,
-    cudaStream_t stream)
-{
-    auto perValueSize = mSpatialSize * mNumHeads * mChannels;
-    auto perSampleLocSize = mNumQuery * mNumHeads * mNumLevels * mNumPoint * 2;
-    auto perAttnWeightSize = mNumQuery * mNumHeads * mNumLevels * mNumPoint;
-    auto perOutputSize = mNumQuery * mNumHeads * mChannels;
+                                    const int32_t* levelStartIndex, const scalar_t* samplingLoc,
+                                    const scalar_t* attnWeight, scalar_t* output, int32_t batch,
+                                    int32_t mSpatialSize, int32_t mNumHeads, int32_t mChannels,
+                                    int32_t mNumLevels, int32_t mNumQuery, int32_t mNumPoint,
+                                    cudaStream_t stream) {
+  auto perValueSize = mSpatialSize * mNumHeads * mChannels;
+  auto perSampleLocSize = mNumQuery * mNumHeads * mNumLevels * mNumPoint * 2;
+  auto perAttnWeightSize = mNumQuery * mNumHeads * mNumLevels * mNumPoint;
+  auto perOutputSize = mNumQuery * mNumHeads * mChannels;

-    int32_t mIm2colStep = batch;
+  int32_t mIm2colStep = batch;

-    for (int32_t n = 0; n < batch / mIm2colStep; ++n)
-    {
-        auto columns = output +  n * mIm2colStep * perOutputSize;
-        ms_deformable_im2col_cuda<scalar_t>(stream, value + n * mIm2colStep * perValueSize, spatialShapes, levelStartIndex,
-            samplingLoc + n * mIm2colStep * perSampleLocSize, attnWeight + n * mIm2colStep * perAttnWeightSize, mIm2colStep,
-            mSpatialSize, mNumHeads, mChannels, mNumLevels, mNumQuery, mNumPoint, columns);
-    }
+  for (int32_t n = 0; n < batch / mIm2colStep; ++n) {
+    auto columns = output + n * mIm2colStep * perOutputSize;
+    ms_deformable_im2col_cuda<scalar_t>(
+        stream, value + n * mIm2colStep * perValueSize, spatialShapes, levelStartIndex,
+        samplingLoc + n * mIm2colStep * perSampleLocSize,
+        attnWeight + n * mIm2colStep * perAttnWeightSize, mIm2colStep, mSpatialSize, mNumHeads,
+        mChannels, mNumLevels, mNumQuery, mNumPoint, columns);
+  }

-    return 0;
+  return 0;
 }

-template int32_t ms_deform_attn_cuda_forward<float>(const float* value, const int32_t* spatialShapes,
-    const int32_t* levelStartIndex, const float* samplingLoc, const float* attnWeight, float* output, int32_t batch,
-    int32_t mSpatialSize, int32_t mNumHeads, int32_t mChannels, int32_t mNumLevels, int32_t mNumQuery, int32_t mNumPoint,
-    cudaStream_t stream);
+template int32_t ms_deform_attn_cuda_forward<float>(
+    const float* value, const int32_t* spatialShapes, const int32_t* levelStartIndex,
+    const float* samplingLoc, const float* attnWeight, float* output, int32_t batch,
+    int32_t mSpatialSize, int32_t mNumHeads, int32_t mChannels, int32_t mNumLevels,
+    int32_t mNumQuery, int32_t mNumPoint, cudaStream_t stream);

-template int32_t ms_deform_attn_cuda_forward<__half>(const __half* value, const int32_t* spatialShapes,
-    const int32_t* levelStartIndex, const __half* samplingLoc, const __half* attnWeight, __half* output, int32_t batch,
-    int32_t mSpatialSize, int32_t mNumHeads, int32_t mChannels, int32_t mNumLevels, int32_t mNumQuery, int32_t mNumPoint,
-    cudaStream_t stream);
+template int32_t ms_deform_attn_cuda_forward<__half>(
+    const __half* value, const int32_t* spatialShapes, const int32_t* levelStartIndex,
+    const __half* samplingLoc, const __half* attnWeight, __half* output, int32_t batch,
+    int32_t mSpatialSize, int32_t mNumHeads, int32_t mChannels, int32_t mNumLevels,
+    int32_t mNumQuery, int32_t mNumPoint, cudaStream_t stream);
--- a/csrc/mmdeploy/backend_ops/tensorrt/multi_scale_deform_attn/trt_ms_deform_attn_kernel.cuh
+++ b/csrc/mmdeploy/backend_ops/tensorrt/multi_scale_deform_attn/trt_ms_deform_attn_kernel.cuh
@ -5,10 +5,10 @@
 #include "common_cuda_helper.hpp"

 template <typename scalar_t>
-__device__ scalar_t ms_deform_attn_im2col_bilinear(
-    const scalar_t *&bottom_data, const int &height, const int &width,
-    const int &nheads, const int &channels, const scalar_t &h,
-    const scalar_t &w, const int &m, const int &c) {
+__device__ scalar_t ms_deform_attn_im2col_bilinear(const scalar_t*& bottom_data, const int& height,
+                                                   const int& width, const int& nheads,
+                                                   const int& channels, const scalar_t& h,
+                                                   const scalar_t& w, const int& m, const int& c) {
  const int h_low = floorf(h);
  const int w_low = floorf(w);
  const int h_high = h_low + 1;
@ -54,210 +54,204 @@ __device__ scalar_t ms_deform_attn_im2col_bilinear(
 }

 template <>
-__device__ __half ms_deform_attn_im2col_bilinear<__half>(const __half*& bottomData, int32_t const& height, int32_t const& width,
-    int32_t const& nHeads, int32_t const& channels, const __half& h, const __half& w, int32_t const& m, int32_t const& c)
-{
-    int32_t const hLow = __half2int_rd(h);
-    int32_t const wLow = __half2int_rd(w);
-    int32_t const hHigh = hLow + 1;
-    int32_t const wHigh = wLow + 1;
+__device__ __half ms_deform_attn_im2col_bilinear<__half>(
+    const __half*& bottomData, int32_t const& height, int32_t const& width, int32_t const& nHeads,
+    int32_t const& channels, const __half& h, const __half& w, int32_t const& m, int32_t const& c) {
+  int32_t const hLow = __half2int_rd(h);
+  int32_t const wLow = __half2int_rd(w);
+  int32_t const hHigh = hLow + 1;
+  int32_t const wHigh = wLow + 1;

-    const __half kZERO = __int2half_rz(0);
-    const __half one = __int2half_rz(1);
+  const __half kZERO = __int2half_rz(0);
+  const __half one = __int2half_rz(1);

-#if __CUDA_ARCH__>=530
-    const __half lh = __hsub(h, __int2half_rd(hLow));
-    const __half lw = __hsub(w, __int2half_rd(wLow));
-    const __half hh = __hsub(one, lh), hw = __hsub(one, lw);
+#if __CUDA_ARCH__ >= 530
+  const __half lh = __hsub(h, __int2half_rd(hLow));
+  const __half lw = __hsub(w, __int2half_rd(wLow));
+  const __half hh = __hsub(one, lh), hw = __hsub(one, lw);
 #else
-    const __half lh = __float2half(__half2float(h) - hLow);
-    const __half lw = __float2half(__half2float(w) - wLow);
-    const __half hh = __float2half(__half2float(one) - __half2float(lh));
-    const __half hw = __float2half(__half2float(one) - __half2float(lw));
+  const __half lh = __float2half(__half2float(h) - hLow);
+  const __half lw = __float2half(__half2float(w) - wLow);
+  const __half hh = __float2half(__half2float(one) - __half2float(lh));
+  const __half hw = __float2half(__half2float(one) - __half2float(lw));
 #endif
-    int32_t const wStride = nHeads * channels;
-    int32_t const hStride = width * wStride;
-    int32_t const hLowPtrOffset = hLow * hStride;
-    int32_t const hHighPtrOffset = hLowPtrOffset + hStride;
-    int32_t const wLowPtrOffset = wLow * wStride;
-    int32_t const wHighPtrOffset = wLowPtrOffset + wStride;
-    int32_t const basePtr = m * channels + c;
+  int32_t const wStride = nHeads * channels;
+  int32_t const hStride = width * wStride;
+  int32_t const hLowPtrOffset = hLow * hStride;
+  int32_t const hHighPtrOffset = hLowPtrOffset + hStride;
+  int32_t const wLowPtrOffset = wLow * wStride;
+  int32_t const wHighPtrOffset = wLowPtrOffset + wStride;
+  int32_t const basePtr = m * channels + c;

-    __half v1 = kZERO;
-    if (hLow >= 0 && wLow >= 0)
-    {
-        int32_t const ptr1 = hLowPtrOffset + wLowPtrOffset + basePtr;
-        v1 = bottomData[ptr1];
-    }
-    __half v2 = kZERO;
-    if (hLow >= 0 && wHigh <= width - 1)
-    {
-        int32_t const ptr2 = hLowPtrOffset + wHighPtrOffset + basePtr;
-        v2 = bottomData[ptr2];
-    }
-    __half v3 = kZERO;
-    if (hHigh <= height - 1 && wLow >= 0)
-    {
-        int32_t const ptr3 = hHighPtrOffset + wLowPtrOffset + basePtr;
-        v3 = bottomData[ptr3];
-    }
-    __half v4 = kZERO;
-    if (hHigh <= height - 1 && wHigh <= width - 1)
-    {
-        int32_t const ptr4 = hHighPtrOffset + wHighPtrOffset + basePtr;
-        v4 = bottomData[ptr4];
-    }
+  __half v1 = kZERO;
+  if (hLow >= 0 && wLow >= 0) {
+    int32_t const ptr1 = hLowPtrOffset + wLowPtrOffset + basePtr;
+    v1 = bottomData[ptr1];
+  }
+  __half v2 = kZERO;
+  if (hLow >= 0 && wHigh <= width - 1) {
+    int32_t const ptr2 = hLowPtrOffset + wHighPtrOffset + basePtr;
+    v2 = bottomData[ptr2];
+  }
+  __half v3 = kZERO;
+  if (hHigh <= height - 1 && wLow >= 0) {
+    int32_t const ptr3 = hHighPtrOffset + wLowPtrOffset + basePtr;
+    v3 = bottomData[ptr3];
+  }
+  __half v4 = kZERO;
+  if (hHigh <= height - 1 && wHigh <= width - 1) {
+    int32_t const ptr4 = hHighPtrOffset + wHighPtrOffset + basePtr;
+    v4 = bottomData[ptr4];
+  }

-#if __CUDA_ARCH__>=530
-    __half w1 = __hmul(__hmul(hh, hw), v1);
-    __half w2 = __hmul(__hmul(hh, lw), v2);
-    __half w3 = __hmul(__hmul(lh, hw), v3);
-    __half w4 = __hmul(__hmul(lh, lw), v4);
+#if __CUDA_ARCH__ >= 530
+  __half w1 = __hmul(__hmul(hh, hw), v1);
+  __half w2 = __hmul(__hmul(hh, lw), v2);
+  __half w3 = __hmul(__hmul(lh, hw), v3);
+  __half w4 = __hmul(__hmul(lh, lw), v4);

-    w1 = __hadd(w1, w2);
-    w3 = __hadd(w3, w4);
+  w1 = __hadd(w1, w2);
+  w3 = __hadd(w3, w4);

-    const __half val = __hadd(w1, w3);
+  const __half val = __hadd(w1, w3);
 #else
-    __half w1 = __float2half((__half2float(hh) * __half2float(hw)) * __half2float(v1));
-    __half w2 = __float2half((__half2float(hh) * __half2float(lw)) * __half2float(v2));
-    __half w3 = __float2half((__half2float(lh) * __half2float(hw)) * __half2float(v3));
-    __half w4 = __float2half((__half2float(lh) * __half2float(lw)) * __half2float(v4));
+  __half w1 = __float2half((__half2float(hh) * __half2float(hw)) * __half2float(v1));
+  __half w2 = __float2half((__half2float(hh) * __half2float(lw)) * __half2float(v2));
+  __half w3 = __float2half((__half2float(lh) * __half2float(hw)) * __half2float(v3));
+  __half w4 = __float2half((__half2float(lh) * __half2float(lw)) * __half2float(v4));

-    w1 = __float2half(__half2float(w1) + __half2float(w2));
-    w3 = __float2half(__half2float(w3) + __half2float(w4));
+  w1 = __float2half(__half2float(w1) + __half2float(w2));
+  w3 = __float2half(__half2float(w3) + __half2float(w4));

-    const __half val = __float2half(__half2float(w1) + __half2float(w3));
+  const __half val = __float2half(__half2float(w1) + __half2float(w3));
 #endif
-    return val;
+  return val;
 }

 #if 1
 template <typename scalar_t>
-__global__ void ms_deformable_im2col_gpu_kernel(int32_t const n, scalar_t const* dataValue,
-    int32_t const* dataSpatialShapes, int32_t const* dataLevelStartIndex, scalar_t const* dataSamplingLoc,
-    scalar_t const* dataAttnWeight, int32_t const batchSize, int32_t const spatialSize, int32_t const numHeads, int32_t const channels,
-    int32_t const numLevels, int32_t const numQuery, int32_t const numPoint, scalar_t* dataCol)
-{
-    CUDA_1D_KERNEL_LOOP(index, n)
-    {
-        int32_t _temp = index;
-        int32_t const cCol = _temp % channels;
-        _temp /= channels;
-        int32_t const samplingIndex = _temp;
-        int32_t const mCol = _temp % numHeads;
-        _temp /= numHeads;
-        _temp /= numQuery;
-        int32_t const bCol = _temp;
+__global__ void ms_deformable_im2col_gpu_kernel(
+    int32_t const n, scalar_t const* dataValue, int32_t const* dataSpatialShapes,
+    int32_t const* dataLevelStartIndex, scalar_t const* dataSamplingLoc,
+    scalar_t const* dataAttnWeight, int32_t const batchSize, int32_t const spatialSize,
+    int32_t const numHeads, int32_t const channels, int32_t const numLevels, int32_t const numQuery,
+    int32_t const numPoint, scalar_t* dataCol) {
+  CUDA_1D_KERNEL_LOOP(index, n) {
+    int32_t _temp = index;
+    int32_t const cCol = _temp % channels;
+    _temp /= channels;
+    int32_t const samplingIndex = _temp;
+    int32_t const mCol = _temp % numHeads;
+    _temp /= numHeads;
+    _temp /= numQuery;
+    int32_t const bCol = _temp;

-        scalar_t* dataColPtr = dataCol + index;
-        int32_t dataWeightPtr = samplingIndex * numLevels * numPoint;
-        int32_t dataLocWPtr = dataWeightPtr << 1;
-        int32_t const qidStride = numHeads * channels;
-        int32_t const dataValuePtrInitOffset = bCol * spatialSize * qidStride;
-        scalar_t col = 0;
+    scalar_t* dataColPtr = dataCol + index;
+    int32_t dataWeightPtr = samplingIndex * numLevels * numPoint;
+    int32_t dataLocWPtr = dataWeightPtr << 1;
+    int32_t const qidStride = numHeads * channels;
+    int32_t const dataValuePtrInitOffset = bCol * spatialSize * qidStride;
+    scalar_t col = 0;

-        for (int32_t lCol = 0; lCol < numLevels; ++lCol)
-        {
-            int32_t const levelStartId = dataLevelStartIndex[lCol];
-            int32_t const spatialHPtr = lCol << 1;
-            int32_t const spatialH = dataSpatialShapes[spatialHPtr];
-            int32_t const spatialW = dataSpatialShapes[spatialHPtr + 1];
-            scalar_t const* dataValuePtr = dataValue + (dataValuePtrInitOffset + levelStartId * qidStride);
-            for (int32_t pCol = 0; pCol < numPoint; ++pCol)
-            {
-                scalar_t const locW = dataSamplingLoc[dataLocWPtr];
-                scalar_t const locH = dataSamplingLoc[dataLocWPtr + 1];
-                scalar_t const weight = dataAttnWeight[dataWeightPtr];
+    for (int32_t lCol = 0; lCol < numLevels; ++lCol) {
+      int32_t const levelStartId = dataLevelStartIndex[lCol];
+      int32_t const spatialHPtr = lCol << 1;
+      int32_t const spatialH = dataSpatialShapes[spatialHPtr];
+      int32_t const spatialW = dataSpatialShapes[spatialHPtr + 1];
+      scalar_t const* dataValuePtr =
+          dataValue + (dataValuePtrInitOffset + levelStartId * qidStride);
+      for (int32_t pCol = 0; pCol < numPoint; ++pCol) {
+        scalar_t const locW = dataSamplingLoc[dataLocWPtr];
+        scalar_t const locH = dataSamplingLoc[dataLocWPtr + 1];
+        scalar_t const weight = dataAttnWeight[dataWeightPtr];

-                scalar_t const hIm = locH * spatialH - 0.5;
-                scalar_t const wIm = locW * spatialW - 0.5;
+        scalar_t const hIm = locH * spatialH - 0.5;
+        scalar_t const wIm = locW * spatialW - 0.5;

-                if (hIm > -1 && wIm > -1 && hIm < spatialH && wIm < spatialW)
-                {
-                    col += ms_deform_attn_im2col_bilinear(
-                               dataValuePtr, spatialH, spatialW, numHeads, channels, hIm, wIm, mCol, cCol)
-                        * weight;
-                }
-
-                dataWeightPtr += 1;
-                dataLocWPtr += 2;
-            }
+        if (hIm > -1 && wIm > -1 && hIm < spatialH && wIm < spatialW) {
+          col += ms_deform_attn_im2col_bilinear(dataValuePtr, spatialH, spatialW, numHeads,
+                                                channels, hIm, wIm, mCol, cCol) *
+                 weight;
        }
-        *dataColPtr = col;
+
+        dataWeightPtr += 1;
+        dataLocWPtr += 2;
+      }
    }
+    *dataColPtr = col;
+  }
 }

 template <>
-__global__ void ms_deformable_im2col_gpu_kernel<__half>(int32_t const n, const __half* dataValue,
-    int32_t const* dataSpatialShapes, int32_t const* dataLevelStartIndex, const __half* dataSamplingLoc,
-    const __half* dataAttnWeight, int32_t const batchSize, int32_t const spatialSize, int32_t const numHeads, int32_t const channels,
-    int32_t const numLevels, int32_t const numQuery, int32_t const numPoint, __half* dataCol)
-{
-    CUDA_1D_KERNEL_LOOP(index, n)
-    {
-        int32_t _temp = index;
-        int32_t const cCol = _temp % channels;
-        _temp /= channels;
-        int32_t const samplingIndex = _temp;
-        int32_t const mCol = _temp % numHeads;
-        _temp /= numHeads;
-        _temp /= numQuery;
-        int32_t const bCol = _temp;
+__global__ void ms_deformable_im2col_gpu_kernel<__half>(
+    int32_t const n, const __half* dataValue, int32_t const* dataSpatialShapes,
+    int32_t const* dataLevelStartIndex, const __half* dataSamplingLoc, const __half* dataAttnWeight,
+    int32_t const batchSize, int32_t const spatialSize, int32_t const numHeads,
+    int32_t const channels, int32_t const numLevels, int32_t const numQuery, int32_t const numPoint,
+    __half* dataCol) {
+  CUDA_1D_KERNEL_LOOP(index, n) {
+    int32_t _temp = index;
+    int32_t const cCol = _temp % channels;
+    _temp /= channels;
+    int32_t const samplingIndex = _temp;
+    int32_t const mCol = _temp % numHeads;
+    _temp /= numHeads;
+    _temp /= numQuery;
+    int32_t const bCol = _temp;

-        __half* dataColPtr = dataCol + index;
-        int32_t dataWeightPtr = samplingIndex * numLevels * numPoint;
-        int32_t dataLocWPtr = dataWeightPtr << 1;
-        int32_t const qidStride = numHeads * channels;
-        int32_t const dataValuePtrInitOffset = bCol * spatialSize * qidStride;
-        const __half kZERO_POINT_FIVE = __float2half(0.5f);
-        const __half kMINUS_ONE = __float2half(-1.0f);
-        const __half kZERO = __int2half_rz(0);
-        __half tpVal = kZERO;
-        __half col = kZERO;
+    __half* dataColPtr = dataCol + index;
+    int32_t dataWeightPtr = samplingIndex * numLevels * numPoint;
+    int32_t dataLocWPtr = dataWeightPtr << 1;
+    int32_t const qidStride = numHeads * channels;
+    int32_t const dataValuePtrInitOffset = bCol * spatialSize * qidStride;
+    const __half kZERO_POINT_FIVE = __float2half(0.5f);
+    const __half kMINUS_ONE = __float2half(-1.0f);
+    const __half kZERO = __int2half_rz(0);
+    __half tpVal = kZERO;
+    __half col = kZERO;

-        for (int32_t lCol = 0; lCol < numLevels; ++lCol)
-        {
-            int32_t const levelStartId = dataLevelStartIndex[lCol];
-            int32_t const spatialHPtr = lCol << 1;
-            int32_t const spatialH = dataSpatialShapes[spatialHPtr];
-            int32_t const spatialW = dataSpatialShapes[spatialHPtr + 1];
-            const __half spatialHHalf = __int2half_rd(spatialH);
-            const __half spatialWHalf = __int2half_rd(spatialW);
-            const __half* dataValuePtr = dataValue + (dataValuePtrInitOffset + levelStartId * qidStride);
-            for (int32_t pCol = 0; pCol < numPoint; ++pCol)
-            {
-                const __half locW = dataSamplingLoc[dataLocWPtr];
-                const __half locH = dataSamplingLoc[dataLocWPtr + 1];
-                const __half weight = dataAttnWeight[dataWeightPtr];
+    for (int32_t lCol = 0; lCol < numLevels; ++lCol) {
+      int32_t const levelStartId = dataLevelStartIndex[lCol];
+      int32_t const spatialHPtr = lCol << 1;
+      int32_t const spatialH = dataSpatialShapes[spatialHPtr];
+      int32_t const spatialW = dataSpatialShapes[spatialHPtr + 1];
+      const __half spatialHHalf = __int2half_rd(spatialH);
+      const __half spatialWHalf = __int2half_rd(spatialW);
+      const __half* dataValuePtr = dataValue + (dataValuePtrInitOffset + levelStartId * qidStride);
+      for (int32_t pCol = 0; pCol < numPoint; ++pCol) {
+        const __half locW = dataSamplingLoc[dataLocWPtr];
+        const __half locH = dataSamplingLoc[dataLocWPtr + 1];
+        const __half weight = dataAttnWeight[dataWeightPtr];
 #if __CUDA_ARCH__ >= 530
-                const __half hIm = __hsub(__hmul(locH, spatialHHalf), kZERO_POINT_FIVE);
-                const __half wIm = __hsub(__hmul(locW, spatialWHalf), kZERO_POINT_FIVE);
+        const __half hIm = __hsub(__hmul(locH, spatialHHalf), kZERO_POINT_FIVE);
+        const __half wIm = __hsub(__hmul(locW, spatialWHalf), kZERO_POINT_FIVE);

-                if (__hgt(hIm, kMINUS_ONE) && __hgt(wIm, kMINUS_ONE) && __hlt(hIm, spatialHHalf)
-                    && __hlt(wIm, spatialWHalf))
-                {
-                    tpVal = ms_deform_attn_im2col_bilinear(
-                        dataValuePtr, spatialH, spatialW, numHeads, channels, hIm, wIm, mCol, cCol);
-                    col = __hadd(col, __hmul(tpVal, weight));
-                }
-#else
-                const __half hIm = __float2half(__half2float(locH) * __half2float(spatialHHalf) - __half2float(kZERO_POINT_FIVE));
-                const __half wIm = __float2half(__half2float(locW) * __half2float(spatialWHalf) - __half2float(kZERO_POINT_FIVE));
-
-                if((__half2float(hIm)>__half2float(kMINUS_ONE)) && (__half2float(wIm)>__half2float(kMINUS_ONE))
-                    && (__half2float(hIm)<__half2float(spatialHHalf)) && (__half2float(wIm)<__half2float(spatialWHalf)))
-                {
-                    tpVal = ms_deform_attn_im2col_bilinear(
-                        dataValuePtr, spatialH, spatialW, numHeads, channels, hIm, wIm, mCol, cCol);
-                    col = __float2half(__half2float(col) + (__half2float(tpVal) * __half2float(weight)));
-                }
-#endif
-                dataWeightPtr += 1;
-                dataLocWPtr += 2;
-            }
+        if (__hgt(hIm, kMINUS_ONE) && __hgt(wIm, kMINUS_ONE) && __hlt(hIm, spatialHHalf) &&
+            __hlt(wIm, spatialWHalf)) {
+          tpVal = ms_deform_attn_im2col_bilinear(dataValuePtr, spatialH, spatialW, numHeads,
+                                                 channels, hIm, wIm, mCol, cCol);
+          col = __hadd(col, __hmul(tpVal, weight));
        }
-        *dataColPtr = col;
+#else
+        const __half hIm = __float2half(__half2float(locH) * __half2float(spatialHHalf) -
+                                        __half2float(kZERO_POINT_FIVE));
+        const __half wIm = __float2half(__half2float(locW) * __half2float(spatialWHalf) -
+                                        __half2float(kZERO_POINT_FIVE));
+
+        if ((__half2float(hIm) > __half2float(kMINUS_ONE)) &&
+            (__half2float(wIm) > __half2float(kMINUS_ONE)) &&
+            (__half2float(hIm) < __half2float(spatialHHalf)) &&
+            (__half2float(wIm) < __half2float(spatialWHalf))) {
+          tpVal = ms_deform_attn_im2col_bilinear(dataValuePtr, spatialH, spatialW, numHeads,
+                                                 channels, hIm, wIm, mCol, cCol);
+          col = __float2half(__half2float(col) + (__half2float(tpVal) * __half2float(weight)));
+        }
+#endif
+        dataWeightPtr += 1;
+        dataLocWPtr += 2;
+      }
    }
+    *dataColPtr = col;
+  }
 }
 #endif
--- a/csrc/mmdeploy/backend_ops/tensorrt/multi_scale_deform_attn/trt_ms_deform_attn_kernel.hpp
+++ b/csrc/mmdeploy/backend_ops/tensorrt/multi_scale_deform_attn/trt_ms_deform_attn_kernel.hpp
@ -5,9 +5,11 @@
 #include <cuda_runtime.h>

 template <typename scalar_t>
-int32_t ms_deform_attn_cuda_forward(const scalar_t* value, const int32_t* spatialShapes, const int32_t* levelStartIndex,
-    const scalar_t* samplingLoc, const scalar_t* attnWeight, scalar_t* output, int32_t batch, int32_t mSpatialSize,
-    int32_t mNumHeads, int32_t mChannels, int32_t mNumLevels, int32_t mNumQuery, int32_t mNumPoint,
-    cudaStream_t stream);
+int32_t ms_deform_attn_cuda_forward(const scalar_t* value, const int32_t* spatialShapes,
+                                    const int32_t* levelStartIndex, const scalar_t* samplingLoc,
+                                    const scalar_t* attnWeight, scalar_t* output, int32_t batch,
+                                    int32_t mSpatialSize, int32_t mNumHeads, int32_t mChannels,
+                                    int32_t mNumLevels, int32_t mNumQuery, int32_t mNumPoint,
+                                    cudaStream_t stream);

 #endif
--- a/demo/csharp/Demo.sln
+++ b/demo/csharp/Demo.sln
@ -17,6 +17,10 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "ocr_recognition", "ocr_reco
 EndProject
 Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "pose_detection", "pose_detection\pose_detection.csproj", "{10E3B87C-7544-4F4D-90A3-65D5654CBF94}"
 EndProject
+Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "pose_tracker", "pose_tracker\pose_tracker.csproj", "{42FC54A1-73D5-429D-AF5E-09BAEC4F0D9E}"
+EndProject
+Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "rotated_detection", "rotated_detection\rotated_detection.csproj", "{1957C2D2-F6D1-4E28-920C-2B7DE98EAF50}"
+EndProject
 Global
 	GlobalSection(SolutionConfigurationPlatforms) = preSolution
 		Debug|Any CPU = Debug|Any CPU
@ -51,6 +55,14 @@ Global
 		{10E3B87C-7544-4F4D-90A3-65D5654CBF94}.Debug|Any CPU.Build.0 = Debug|Any CPU
 		{10E3B87C-7544-4F4D-90A3-65D5654CBF94}.Release|Any CPU.ActiveCfg = Release|Any CPU
 		{10E3B87C-7544-4F4D-90A3-65D5654CBF94}.Release|Any CPU.Build.0 = Release|Any CPU
+		{42FC54A1-73D5-429D-AF5E-09BAEC4F0D9E}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+		{42FC54A1-73D5-429D-AF5E-09BAEC4F0D9E}.Debug|Any CPU.Build.0 = Debug|Any CPU
+		{42FC54A1-73D5-429D-AF5E-09BAEC4F0D9E}.Release|Any CPU.ActiveCfg = Release|Any CPU
+		{42FC54A1-73D5-429D-AF5E-09BAEC4F0D9E}.Release|Any CPU.Build.0 = Release|Any CPU
+		{1957C2D2-F6D1-4E28-920C-2B7DE98EAF50}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+		{1957C2D2-F6D1-4E28-920C-2B7DE98EAF50}.Debug|Any CPU.Build.0 = Debug|Any CPU
+		{1957C2D2-F6D1-4E28-920C-2B7DE98EAF50}.Release|Any CPU.ActiveCfg = Release|Any CPU
+		{1957C2D2-F6D1-4E28-920C-2B7DE98EAF50}.Release|Any CPU.Build.0 = Release|Any CPU
 	EndGlobalSection
 	GlobalSection(SolutionProperties) = preSolution
 		HideSolutionNode = FALSE
--- a/demo/csharp/pose_tracker/Program.cs
+++ b/demo/csharp/pose_tracker/Program.cs
@ -0,0 +1,203 @@
+using System;
+using System.Collections.Generic;
+using OpenCvSharp;
+using MMDeploy;
+using System.Linq;
+
+namespace pose_tracker
+{
+    internal class Program
+    {
+        static class CocoSkeleton
+        {
+            public static List<(int, int)> Skeleton = new List<(int, int)>
+            {
+                (15, 13), (13, 11), (16, 14), (14, 12), (11, 12), (5, 11), (6, 12),
+                (5, 6),   (5, 7),   (6, 8),   (7, 9),   (8, 10),  (1, 2),  (0, 1),
+                (0, 2),   (1, 3),   (2, 4),   (3, 5),   (4, 6)
+            };
+
+            public static List<Scalar> Palette = new List<Scalar>
+            {
+                new Scalar(255, 128, 0),   new Scalar(255, 153, 51),  new Scalar(255, 178, 102),
+                new Scalar(230, 230, 0),   new Scalar(255, 153, 255), new Scalar(153, 204, 255),
+                new Scalar(255, 102, 255), new Scalar(255, 51, 255),  new Scalar(102, 178, 255),
+                new Scalar(51, 153, 255),  new Scalar(255, 153, 153), new Scalar(255, 102, 102),
+                new Scalar(255, 51, 51),   new Scalar(153, 255, 153), new Scalar(102, 255, 102),
+                new Scalar(51, 255, 51),   new Scalar(0, 255, 0),     new Scalar(0, 0, 255),
+                new Scalar(255, 0, 0),     new Scalar(255, 255, 255),
+            };
+
+            public static List<int> LinkColor = new List<int>
+            {
+                0, 0, 0, 0, 7, 7, 7, 9, 9, 9, 9, 9, 16, 16, 16, 16, 16, 16, 16
+            };
+
+            public static List<int> PointColor = new List<int>
+            {
+                16, 16, 16, 16, 16, 9, 9, 9, 9, 9, 9, 0, 0, 0, 0, 0, 0
+            };
+        }
+
+        static bool Visualize(OpenCvSharp.Mat frame, PoseTrackerOutput result, int long_edge,
+            int frame_id, bool with_bbox)
+        {
+            var skeleton = CocoSkeleton.Skeleton;
+            var palette = CocoSkeleton.Palette;
+            var link_color = CocoSkeleton.LinkColor;
+            var point_color = CocoSkeleton.PointColor;
+            float scale = 1;
+            if (long_edge != 0)
+            {
+                scale = (float)long_edge / (float)Math.Max(frame.Cols, frame.Rows);
+            }
+            if (scale != 1)
+            {
+                Cv2.Resize(frame, frame, new Size(), scale, scale);
+            }
+            else
+            {
+                frame = frame.Clone();
+            }
+
+            Action<List<float>, Scalar> drawBbox = (bbox, color) =>
+            {
+                for (int i = 0; i < bbox.Count; i++)
+                {
+                    bbox[i] *= scale;
+                }
+                Cv2.Rectangle(frame, new OpenCvSharp.Point(bbox[0], bbox[1]),
+                    new OpenCvSharp.Point(bbox[2], bbox[3]), color);
+            };
+
+            for (int i = 0; i < result.Count; i++)
+            {
+                PoseTrack pt = result.Results[i];
+                for (int j = 0; j < pt.Keypoints.Count; j++)
+                {
+                    Pointf p = pt.Keypoints[j];
+                    p.X *= scale;
+                    p.Y *= scale;
+                    pt.Keypoints[j] = p;
+                }
+                float score_thr = 0.5f;
+                int[] used = new int[pt.Keypoints.Count * 2];
+                for (int j = 0; j < skeleton.Count; j++)
+                {
+                    int u = skeleton[j].Item1;
+                    int v = skeleton[j].Item2;
+                    if (pt.Scores[u] > score_thr && pt.Scores[v] > score_thr)
+                    {
+                        used[u] = used[v] = 1;
+                        var p_u = new OpenCvSharp.Point(pt.Keypoints[u].X, pt.Keypoints[u].Y);
+                        var p_v = new OpenCvSharp.Point(pt.Keypoints[v].X, pt.Keypoints[v].Y);
+                        Cv2.Line(frame, p_u, p_v, palette[link_color[j]], 1, LineTypes.AntiAlias);
+                    }
+                }
+                for (int j = 0; j < pt.Keypoints.Count; j++)
+                {
+                    if (used[j] == 1)
+                    {
+                        var p = new OpenCvSharp.Point(pt.Keypoints[j].X, pt.Keypoints[j].Y);
+                        Cv2.Circle(frame, p, 1, palette[point_color[j]], 2, LineTypes.AntiAlias);
+                    }
+                }
+                if (with_bbox)
+                {
+                    var bbox = new List<float> { pt.Bbox.Left, pt.Bbox.Top, pt.Bbox.Right, pt.Bbox.Bottom };
+                    drawBbox(bbox, new Scalar(0, 255, 0));
+                }
+            }
+
+            Cv2.ImShow("pose_tracker", frame);
+            return Cv2.WaitKey(1) != 'q';
+        }
+        static void CvMatToMat(OpenCvSharp.Mat cvMat, out MMDeploy.Mat mat)
+        {
+            mat = new MMDeploy.Mat();
+            unsafe
+            {
+                mat.Data = cvMat.DataPointer;
+                mat.Height = cvMat.Height;
+                mat.Width = cvMat.Width;
+                mat.Channel = cvMat.Dims;
+                mat.Format = PixelFormat.BGR;
+                mat.Type = DataType.Int8;
+                mat.Device = null;
+            }
+        }
+
+        static void PrintHelperMessage()
+        {
+            string message = "usage:\n pose_tracker device det_model pose_model video";
+            Console.WriteLine(message);
+        }
+
+        static void Main(string[] args)
+        {
+            if (args.Length != 4)
+            {
+                PrintHelperMessage();
+                Environment.Exit(1);
+            }
+
+            string device_ = args[0];
+            string det_model_ = args[1];
+            string pose_model_ = args[2];
+            string video = args[3];
+
+            Model det_model = new Model(det_model_);
+            Model pose_model = new Model(pose_model_);
+            Device device = new Device(device_);
+            Context context = new Context(device);
+
+            // initialize tracker
+            PoseTracker tracker = new PoseTracker(det_model, pose_model, context);
+
+            PoseTracker.Params param = new PoseTracker.Params();
+            // set default param
+            param.Init();
+            // set custom param
+            param.DetMinBboxSize  = 100;
+            param.DetInterval = 1;
+            param.PoseMaxNumBboxes = 6;
+            // optionally use OKS for keypoints similarity comparison
+            float[] sigmas = {0.026f, 0.025f, 0.025f, 0.035f, 0.035f, 0.079f, 0.079f, 0.072f, 0.072f,
+                              0.062f, 0.062f, 0.107f, 0.107f, 0.087f, 0.087f, 0.089f, 0.089f };
+            param.SetKeypointSigmas(sigmas);
+
+            // create state
+            PoseTracker.State state = tracker.CreateState(param);
+
+            VideoCapture cap = new VideoCapture(video);
+            if (!cap.IsOpened())
+            {
+                Console.WriteLine("failed to open video: " + video);
+                Environment.Exit(1);
+            }
+
+            int frame_id = 0;
+            OpenCvSharp.Mat frame = new OpenCvSharp.Mat();
+            while (true)
+            {
+                cap.Read(frame);
+                if (frame.Empty())
+                {
+                    break;
+                }
+                CvMatToMat(frame, out var mat);
+                // process
+                PoseTrackerOutput result = tracker.Apply(state, mat);
+
+                // visualize
+                if (!Visualize(frame, result, 0, frame_id++, true))
+                {
+                    break;
+                }
+            }
+
+            param.DeleteKeypointSigmas();
+            tracker.Close();
+        }
+    }
+}
--- a/demo/csharp/pose_tracker/pose_tracker.csproj
+++ b/demo/csharp/pose_tracker/pose_tracker.csproj
@ -0,0 +1,22 @@
+<Project Sdk="Microsoft.NET.Sdk">
+
+  <PropertyGroup>
+    <OutputType>Exe</OutputType>
+    <TargetFramework>netcoreapp3.1</TargetFramework>
+  </PropertyGroup>
+
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|AnyCPU'">
+    <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
+  </PropertyGroup>
+
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|AnyCPU'">
+    <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
+  </PropertyGroup>
+
+  <ItemGroup>
+    <PackageReference Include="MMDeployCSharp" Version="0.12.0" />
+    <PackageReference Include="OpenCvSharp4" Version="4.5.5.20211231" />
+    <PackageReference Include="OpenCvSharp4.runtime.win" Version="4.5.5.20211231" />
+  </ItemGroup>
+
+</Project>
--- a/demo/csharp/rotated_detection/Program.cs
+++ b/demo/csharp/rotated_detection/Program.cs
@ -0,0 +1,83 @@
+using System;
+using System.Collections.Generic;
+using OpenCvSharp;
+using MMDeploy;
+
+namespace object_detection
+{
+    class Program
+    {
+        static void CvMatToMat(OpenCvSharp.Mat[] cvMats, out MMDeploy.Mat[] mats)
+        {
+            mats = new MMDeploy.Mat[cvMats.Length];
+            unsafe
+            {
+                for (int i = 0; i < cvMats.Length; i++)
+                {
+                    mats[i].Data = cvMats[i].DataPointer;
+                    mats[i].Height = cvMats[i].Height;
+                    mats[i].Width = cvMats[i].Width;
+                    mats[i].Channel = cvMats[i].Dims;
+                    mats[i].Format = PixelFormat.BGR;
+                    mats[i].Type = DataType.Int8;
+                    mats[i].Device = null;
+                }
+            }
+        }
+
+        static void CvWaitKey()
+        {
+            Cv2.WaitKey();
+        }
+
+        static void Main(string[] args)
+        {
+            if (args.Length != 3)
+            {
+                Console.WriteLine("usage:\n  object_detection deviceName modelPath imagePath\n");
+                Environment.Exit(1);
+            }
+
+            string deviceName = args[0];
+            string modelPath = args[1];
+            string imagePath = args[2];
+
+            // 1. create handle
+            RotatedDetector handle = new RotatedDetector(modelPath, deviceName, 0);
+
+            // 2. prepare input
+            OpenCvSharp.Mat[] imgs = new OpenCvSharp.Mat[1] { Cv2.ImRead(imagePath, ImreadModes.Color) };
+            CvMatToMat(imgs, out var mats);
+
+            // 3. process
+            List<RotatedDetectorOutput> output = handle.Apply(mats);
+
+            // 4. show result
+            foreach (var obj in output[0].Results)
+            {
+                if (obj.Score < 0.1)
+                {
+                    continue;
+                }
+                float xc = obj.Cx;
+                float yc = obj.Cy;
+                float wx = obj.Width / 2 * (float)Math.Cos(obj.Angle);
+                float wy = obj.Width / 2 * (float)Math.Sin(obj.Angle);
+                float hx = -obj.Height / 2 * (float)Math.Sin(obj.Angle);
+                float hy = obj.Height / 2 * (float)Math.Cos(obj.Angle);
+                OpenCvSharp.Point p1 = new OpenCvSharp.Point(xc - wx - hx, yc - wy - hy);
+                OpenCvSharp.Point p2 = new OpenCvSharp.Point(xc + wx - hx, yc + wy - hy);
+                OpenCvSharp.Point p3 = new OpenCvSharp.Point(xc + wx + hx, yc + wy + hy);
+                OpenCvSharp.Point p4 = new OpenCvSharp.Point(xc - wx + hx, yc - wy + hy);
+                var contours = new OpenCvSharp.Point[1][];
+                contours[0] = new OpenCvSharp.Point[4] { p1, p2, p3, p4 };
+                Cv2.DrawContours(imgs[0], contours, -1, new Scalar(0, 255, 0), 2);
+            }
+            Cv2.NamedWindow("mmrotate", WindowFlags.GuiExpanded);
+            Cv2.ImShow("mmrotate", imgs[0]);
+            CvWaitKey();
+
+            handle.Close();
+        }
+    }
+}
--- a/demo/csharp/rotated_detection/rotated_detection.csproj
+++ b/demo/csharp/rotated_detection/rotated_detection.csproj
@ -0,0 +1,22 @@
+<Project Sdk="Microsoft.NET.Sdk">
+
+  <PropertyGroup>
+    <OutputType>Exe</OutputType>
+    <TargetFramework>netcoreapp3.1</TargetFramework>
+  </PropertyGroup>
+
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|AnyCPU'">
+    <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
+  </PropertyGroup>
+
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|AnyCPU'">
+    <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
+  </PropertyGroup>
+
+  <ItemGroup>
+    <PackageReference Include="MMDeployCSharp" Version="0.12.0" />
+    <PackageReference Include="OpenCvSharp4" Version="4.5.5.20211231" />
+    <PackageReference Include="OpenCvSharp4.runtime.win" Version="4.5.5.20211231" />
+  </ItemGroup>
+
+</Project>