[Sync] Sync csharp api to dev-1.x (#1889)
* [Feature] Sync csharp apis with newly added c apis && demo (#1718) * sync c api to c# * fix typo * add pose tracker c# demo * udpate gitignore * remove print * fix lint * update rotated detection api * update rotated detection demo * rename pose_tracking -> pose_tracker * use input size as default * fix clang-formatpull/1901/head
parent
2fcc8dc19d
commit
423e27a4fe
csrc/mmdeploy
backend_ops/tensorrt/multi_scale_deform_attn
demo/csharp
pose_tracker
rotated_detection
|
@ -166,3 +166,6 @@ service/snpe/grpc_cpp_plugin
|
|||
csrc/mmdeploy/preprocess/elena/json
|
||||
csrc/mmdeploy/preprocess/elena/cpu_kernel/*
|
||||
csrc/mmdeploy/preprocess/elena/cuda_kernel/*
|
||||
|
||||
# c#
|
||||
demo/csharp/*/Properties
|
||||
|
|
|
@ -0,0 +1,69 @@
|
|||
namespace MMDeploy
|
||||
{
|
||||
/// <summary>
|
||||
/// Context.
|
||||
/// </summary>
|
||||
public class Context : DisposableObject
|
||||
{
|
||||
/// <summary>
|
||||
/// Initializes a new instance of the <see cref="Context"/> class.
|
||||
/// </summary>
|
||||
public Context()
|
||||
{
|
||||
ThrowException(NativeMethods.mmdeploy_context_create(out _handle));
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Initializes a new instance of the <see cref="Context"/> class with device.
|
||||
/// </summary>
|
||||
/// <param name="device">device.</param>
|
||||
public Context(Device device) : this()
|
||||
{
|
||||
Add(device);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Add model to the context.
|
||||
/// </summary>
|
||||
/// <param name="name">name.</param>
|
||||
/// <param name="model">model.</param>
|
||||
public void Add(string name, Model model)
|
||||
{
|
||||
ThrowException(NativeMethods.mmdeploy_context_add(this, (int)ContextType.MODEL, name, model));
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Add scheduler to the context.
|
||||
/// </summary>
|
||||
/// <param name="name">name.</param>
|
||||
/// <param name="scheduler">scheduler.</param>
|
||||
public void Add(string name, Scheduler scheduler)
|
||||
{
|
||||
ThrowException(NativeMethods.mmdeploy_context_add(this, (int)ContextType.SCHEDULER, name, scheduler));
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Add device to the context.
|
||||
/// </summary>
|
||||
/// <param name="device">device.</param>
|
||||
public void Add(Device device)
|
||||
{
|
||||
ThrowException(NativeMethods.mmdeploy_context_add(this, (int)ContextType.DEVICE, "", device));
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Add profiler to the context.
|
||||
/// </summary>
|
||||
/// <param name="profiler">profiler.</param>
|
||||
public void Add(Profiler profiler)
|
||||
{
|
||||
ThrowException(NativeMethods.mmdeploy_context_add(this, (int)ContextType.PROFILER, "", profiler));
|
||||
}
|
||||
|
||||
/// <inheritdoc/>
|
||||
protected override void ReleaseHandle()
|
||||
{
|
||||
NativeMethods.mmdeploy_model_destroy(_handle);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,39 @@
|
|||
namespace MMDeploy
|
||||
{
|
||||
/// <summary>
|
||||
/// Device.
|
||||
/// </summary>
|
||||
public class Device : DisposableObject
|
||||
{
|
||||
private readonly string _name;
|
||||
private readonly int _index;
|
||||
|
||||
/// <summary>
|
||||
/// Initializes a new instance of the <see cref="Device"/> class.
|
||||
/// </summary>
|
||||
/// <param name="name">device name.</param>
|
||||
/// <param name="index">device index.</param>
|
||||
public Device(string name, int index = 0)
|
||||
{
|
||||
this._name = name;
|
||||
this._index = index;
|
||||
ThrowException(NativeMethods.mmdeploy_device_create(name, index, out _handle));
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Gets device name.
|
||||
/// </summary>
|
||||
public string Name { get => _name; }
|
||||
|
||||
/// <summary>
|
||||
/// Gets device index.
|
||||
/// </summary>
|
||||
public int Index { get => _index; }
|
||||
|
||||
/// <inheritdoc/>
|
||||
protected override void ReleaseHandle()
|
||||
{
|
||||
NativeMethods.mmdeploy_device_destroy(_handle);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -92,5 +92,11 @@ namespace MMDeploy
|
|||
throw new Exception(result.ToString());
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Gets internal handle.
|
||||
/// </summary>
|
||||
/// <param name="obj">instance.</param>
|
||||
public static implicit operator IntPtr(DisposableObject obj) => obj._handle;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,23 @@
|
|||
namespace MMDeploy
|
||||
{
|
||||
/// <summary>
|
||||
/// model.
|
||||
/// </summary>
|
||||
public class Model : DisposableObject
|
||||
{
|
||||
/// <summary>
|
||||
/// Initializes a new instance of the <see cref="Model"/> class.
|
||||
/// </summary>
|
||||
/// <param name="modelPath">model path.</param>
|
||||
public Model(string modelPath)
|
||||
{
|
||||
ThrowException(NativeMethods.mmdeploy_model_create_by_path(modelPath, out _handle));
|
||||
}
|
||||
|
||||
/// <inheritdoc/>
|
||||
protected override void ReleaseHandle()
|
||||
{
|
||||
NativeMethods.mmdeploy_model_destroy(_handle);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,350 @@
|
|||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Runtime.InteropServices;
|
||||
|
||||
namespace MMDeploy
|
||||
{
|
||||
#pragma warning disable 0649
|
||||
internal unsafe struct CPoseTrack
|
||||
{
|
||||
public Pointf* Keypoints;
|
||||
public int KeypointCount;
|
||||
public float* Scores;
|
||||
public Rect Bbox;
|
||||
public int TargetId;
|
||||
}
|
||||
#pragma warning restore 0649
|
||||
|
||||
/// <summary>
|
||||
/// Single tracking result of a bbox.
|
||||
/// A picture may contains multiple reuslts.
|
||||
/// </summary>
|
||||
public struct PoseTrack
|
||||
{
|
||||
/// <summary>
|
||||
/// Keypoints.
|
||||
/// </summary>
|
||||
public List<Pointf> Keypoints;
|
||||
|
||||
/// <summary>
|
||||
/// Scores.
|
||||
/// </summary>
|
||||
public List<float> Scores;
|
||||
|
||||
/// <summary>
|
||||
/// Bbox.
|
||||
/// </summary>
|
||||
public Rect Bbox;
|
||||
|
||||
/// <summary>
|
||||
/// TargetId.
|
||||
/// </summary>
|
||||
public int TargetId;
|
||||
|
||||
/// <summary>
|
||||
/// Init data.
|
||||
/// </summary>
|
||||
private void Init()
|
||||
{
|
||||
if (Keypoints == null || Scores == null)
|
||||
{
|
||||
Keypoints = new List<Pointf>();
|
||||
Scores = new List<float>();
|
||||
}
|
||||
}
|
||||
|
||||
internal unsafe void Add(CPoseTrack* result)
|
||||
{
|
||||
Init();
|
||||
for (int i = 0; i < result->KeypointCount; i++)
|
||||
{
|
||||
Keypoints.Add(new Pointf(result->Keypoints[i].X, result->Keypoints[i].Y));
|
||||
Scores.Add(result->Scores[i]);
|
||||
}
|
||||
|
||||
Bbox = result->Bbox;
|
||||
TargetId = result->TargetId;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Output of PoseTracker.
|
||||
/// </summary>
|
||||
public struct PoseTrackerOutput
|
||||
{
|
||||
/// <summary>
|
||||
/// Tracking results for single image.
|
||||
/// </summary>
|
||||
public List<PoseTrack> Results;
|
||||
|
||||
/// <summary>
|
||||
/// Gets number of output.
|
||||
/// </summary>
|
||||
public int Count
|
||||
{
|
||||
get { return (Results == null) ? 0 : Results.Count; }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Result for box level.
|
||||
/// </summary>
|
||||
/// <param name="boxRes">Box res.</param>
|
||||
public void Add(PoseTrack boxRes)
|
||||
{
|
||||
if (Results == null)
|
||||
{
|
||||
Results = new List<PoseTrack>();
|
||||
}
|
||||
|
||||
Results.Add(boxRes);
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// PoseTracker.
|
||||
/// </summary>
|
||||
public class PoseTracker : DisposableObject
|
||||
{
|
||||
/// <summary>
|
||||
/// Params.
|
||||
/// </summary>
|
||||
public struct Params
|
||||
{
|
||||
/// <summary>
|
||||
/// init with default value.
|
||||
/// </summary>
|
||||
public void Init()
|
||||
{
|
||||
IntPtr ptr = Marshal.AllocHGlobal(Marshal.SizeOf(typeof(Params)));
|
||||
NativeMethods.mmdeploy_pose_tracker_default_params(ptr);
|
||||
this = Marshal.PtrToStructure<Params>(ptr);
|
||||
Marshal.DestroyStructure<Params>(ptr);
|
||||
Marshal.FreeHGlobal(ptr);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Sets keypoint sigmas.
|
||||
/// </summary>
|
||||
/// <param name="array">keypoint sigmas.</param>
|
||||
public void SetKeypointSigmas(float[] array)
|
||||
{
|
||||
this.KeypointSigmasSize = array.Length;
|
||||
this.KeypointSigmas = Marshal.AllocHGlobal(sizeof(float) * array.Length);
|
||||
Marshal.Copy(array, 0, this.KeypointSigmas, array.Length);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Release ptr.
|
||||
/// </summary>
|
||||
public void DeleteKeypointSigmas()
|
||||
{
|
||||
if (this.KeypointSigmas != null)
|
||||
{
|
||||
Marshal.FreeHGlobal(this.KeypointSigmas);
|
||||
this.KeypointSigmasSize = 0;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// detection interval, default = 1.
|
||||
/// </summary>
|
||||
public int DetInterval;
|
||||
|
||||
/// <summary>
|
||||
/// detection label use for pose estimation, default = 0.
|
||||
/// </summary>
|
||||
public int DetLabel;
|
||||
|
||||
/// <summary>
|
||||
/// detection score threshold, default = 0.5.
|
||||
/// </summary>
|
||||
public float DetThr;
|
||||
|
||||
/// <summary>
|
||||
/// detection minimum bbox size (compute as sqrt(area)), default = -1.
|
||||
/// </summary>
|
||||
public float DetMinBboxSize;
|
||||
|
||||
/// <summary>
|
||||
/// nms iou threshold for merging detected bboxes and bboxes from tracked targets, default = 0.7.
|
||||
/// </summary>
|
||||
public float DetNmsThr;
|
||||
|
||||
/// <summary>
|
||||
/// max number of bboxes used for pose estimation per frame, default = -1.
|
||||
/// </summary>
|
||||
public int PoseMaxNumBboxes;
|
||||
|
||||
/// <summary>
|
||||
/// threshold for visible key-points, default = 0.5.
|
||||
/// </summary>
|
||||
public float PoseKptThr;
|
||||
|
||||
/// <summary>
|
||||
/// min number of key-points for valid poses, default = -1.
|
||||
/// </summary>
|
||||
public int PoseMinKeypoints;
|
||||
|
||||
/// <summary>
|
||||
/// scale for expanding key-points to bbox, default = 1.25.
|
||||
/// </summary>
|
||||
public float PoseBboxScale;
|
||||
|
||||
/// <summary>
|
||||
/// min pose bbox size, tracks with bbox size smaller than the threshold will be dropped,default = -1.
|
||||
/// </summary>
|
||||
public float PoseMinBboxSize;
|
||||
|
||||
/// <summary>
|
||||
/// nms oks/iou threshold for suppressing overlapped poses, useful when multiple pose estimations
|
||||
/// collapse to the same target, default = 0.5.
|
||||
/// </summary>
|
||||
public float PoseNmsThr;
|
||||
|
||||
/// <summary>
|
||||
/// keypoint sigmas for computing OKS, will use IOU if not set, default = nullptr.
|
||||
/// </summary>
|
||||
public IntPtr KeypointSigmas;
|
||||
|
||||
/// <summary>
|
||||
/// size of keypoint sigma array, must be consistent with the number of key-points, default = 0.
|
||||
/// </summary>
|
||||
public int KeypointSigmasSize;
|
||||
|
||||
/// <summary>
|
||||
/// iou threshold for associating missing tracks, default = 0.4.
|
||||
/// </summary>
|
||||
public float TrackIouThr;
|
||||
|
||||
/// <summary>
|
||||
/// max number of missing frames before a missing tracks is removed, default = 10.
|
||||
/// </summary>
|
||||
public int TrackMaxMissing;
|
||||
|
||||
/// <summary>
|
||||
/// track history size, default = 1.
|
||||
/// </summary>
|
||||
public int TrackHistorySize;
|
||||
|
||||
/// <summary>
|
||||
/// weight of position for setting covariance matrices of kalman filters, default = 0.05.
|
||||
/// </summary>
|
||||
public float StdWeightPosition;
|
||||
|
||||
/// <summary>
|
||||
/// weight of velocity for setting covariance matrices of kalman filters, default = 0.00625.
|
||||
/// </summary>
|
||||
public float StdWeightVelocity;
|
||||
|
||||
/// <summary>
|
||||
/// params for the one-euro filter for smoothing the outputs - (beta, fc_min, fc_derivative)
|
||||
/// default = (0.007, 1, 1).
|
||||
/// </summary>
|
||||
[MarshalAs(UnmanagedType.ByValArray, SizeConst = 3)]
|
||||
public float[] SmoothParams;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// tracking state.
|
||||
/// </summary>
|
||||
public class State : DisposableObject
|
||||
{
|
||||
/// <summary>
|
||||
/// Initializes a new instance of the <see cref="State"/> class.
|
||||
/// </summary>
|
||||
/// <param name="pipeline">pipeline.</param>
|
||||
/// <param name="param">param.</param>
|
||||
public State(IntPtr pipeline, Params param)
|
||||
{
|
||||
ThrowException(NativeMethods.mmdeploy_pose_tracker_create_state(pipeline, param, out _handle));
|
||||
}
|
||||
|
||||
/// <inheritdoc/>
|
||||
protected override void ReleaseHandle()
|
||||
{
|
||||
NativeMethods.mmdeploy_pose_tracker_destroy_state(_handle);
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Initializes a new instance of the <see cref="PoseTracker"/> class.
|
||||
/// </summary>
|
||||
/// <param name="detect">detect model.</param>
|
||||
/// <param name="pose">pose model.</param>
|
||||
/// <param name="context">context.</param>
|
||||
public PoseTracker(Model detect, Model pose, Context context)
|
||||
{
|
||||
ThrowException(NativeMethods.mmdeploy_pose_tracker_create(detect, pose, context, out _handle));
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Get track information of image.
|
||||
/// </summary>
|
||||
/// <param name="state">state for video.</param>
|
||||
/// <param name="mat">input mat.</param>
|
||||
/// <param name="detect">control the use of detector.
|
||||
/// -1: use params.DetInterval, 0: don't use detector, 1: force use detector.</param>
|
||||
/// <returns>results of this frame.</returns>
|
||||
public PoseTrackerOutput Apply(State state, Mat mat, int detect = -1)
|
||||
{
|
||||
PoseTrackerOutput output = default;
|
||||
|
||||
IntPtr[] states = new IntPtr[1] { state };
|
||||
Mat[] mats = new Mat[1] { mat };
|
||||
int[] detects = new int[1] { -1 };
|
||||
|
||||
unsafe
|
||||
{
|
||||
CPoseTrack* results = null;
|
||||
int* resultCount = null;
|
||||
fixed (Mat* _mats = mats)
|
||||
fixed (IntPtr* _states = states)
|
||||
fixed (int* _detects = detects)
|
||||
{
|
||||
ThrowException(NativeMethods.mmdeploy_pose_tracker_apply(_handle, _states, _mats, _detects,
|
||||
mats.Length, &results, &resultCount));
|
||||
|
||||
FormatResult(resultCount, results, ref output, out var total);
|
||||
ReleaseResult(results, resultCount, mats.Length);
|
||||
}
|
||||
}
|
||||
|
||||
return output;
|
||||
}
|
||||
|
||||
private unsafe void FormatResult(int* resultCount, CPoseTrack* results, ref PoseTrackerOutput output, out int total)
|
||||
{
|
||||
total = resultCount[0];
|
||||
for (int i = 0; i < total; i++)
|
||||
{
|
||||
PoseTrack outi = default;
|
||||
outi.Add(results);
|
||||
output.Add(outi);
|
||||
results++;
|
||||
}
|
||||
}
|
||||
|
||||
private unsafe void ReleaseResult(CPoseTrack* results, int* resultCount, int count)
|
||||
{
|
||||
NativeMethods.mmdeploy_pose_tracker_release_result(results, resultCount, count);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Create internal state.
|
||||
/// </summary>
|
||||
/// <param name="param">instance of Params.</param>
|
||||
/// <returns>instance of State.</returns>
|
||||
public State CreateState(Params param)
|
||||
{
|
||||
State state = new State(_handle, param);
|
||||
return state;
|
||||
}
|
||||
|
||||
/// <inheritdoc/>
|
||||
protected override void ReleaseHandle()
|
||||
{
|
||||
// _state.Dispose();
|
||||
NativeMethods.mmdeploy_pose_tracker_destroy(_handle);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,23 @@
|
|||
namespace MMDeploy
|
||||
{
|
||||
/// <summary>
|
||||
/// Profiler.
|
||||
/// </summary>
|
||||
public class Profiler : DisposableObject
|
||||
{
|
||||
/// <summary>
|
||||
/// Initializes a new instance of the <see cref="Profiler"/> class.
|
||||
/// </summary>
|
||||
/// <param name="path">path.</param>
|
||||
public Profiler(string path)
|
||||
{
|
||||
ThrowException(NativeMethods.mmdeploy_profiler_create(path, out _handle));
|
||||
}
|
||||
|
||||
/// <inheritdoc/>
|
||||
protected override void ReleaseHandle()
|
||||
{
|
||||
NativeMethods.mmdeploy_profiler_destroy(_handle);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,157 @@
|
|||
using System;
|
||||
using System.Collections.Generic;
|
||||
|
||||
namespace MMDeploy
|
||||
{
|
||||
/// <summary>
|
||||
/// Single detection result of a picture.
|
||||
/// A picture may contains multiple reuslts.
|
||||
/// </summary>
|
||||
public struct RDetect
|
||||
{
|
||||
/// <summary>
|
||||
/// Label id.
|
||||
/// </summary>
|
||||
public int LabelId;
|
||||
|
||||
/// <summary>
|
||||
/// Score.
|
||||
/// </summary>
|
||||
public float Score;
|
||||
|
||||
/// <summary>
|
||||
/// Center x.
|
||||
/// </summary>
|
||||
public float Cx;
|
||||
|
||||
/// <summary>
|
||||
/// Center y.
|
||||
/// </summary>
|
||||
public float Cy;
|
||||
|
||||
/// <summary>
|
||||
/// Width.
|
||||
/// </summary>
|
||||
public float Width;
|
||||
|
||||
/// <summary>
|
||||
/// Height.
|
||||
/// </summary>
|
||||
public float Height;
|
||||
|
||||
/// <summary>
|
||||
/// Angle.
|
||||
/// </summary>
|
||||
public float Angle;
|
||||
|
||||
internal unsafe RDetect(RDetect* result) : this()
|
||||
{
|
||||
this = *result;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Output of RotatedDetector.
|
||||
/// </summary>
|
||||
public struct RotatedDetectorOutput
|
||||
{
|
||||
/// <summary>
|
||||
/// Rotated detection results for single image.
|
||||
/// </summary>
|
||||
public List<RDetect> Results;
|
||||
|
||||
private void Init()
|
||||
{
|
||||
if (Results == null)
|
||||
{
|
||||
Results = new List<RDetect>();
|
||||
}
|
||||
}
|
||||
|
||||
internal unsafe void Add(RDetect* result)
|
||||
{
|
||||
Init();
|
||||
Results.Add(new RDetect(result));
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Gets number of output.
|
||||
/// </summary>
|
||||
public int Count
|
||||
{
|
||||
get { return (Results == null) ? 0 : Results.Count; }
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// RotatedDetector.
|
||||
/// </summary>
|
||||
public class RotatedDetector : DisposableObject
|
||||
{
|
||||
/// <summary>
|
||||
/// Initializes a new instance of the <see cref="RotatedDetector"/> class.
|
||||
/// </summary>
|
||||
/// <param name="modelPath">model path.</param>
|
||||
/// <param name="deviceName">device name.</param>
|
||||
/// <param name="deviceId">device id.</param>
|
||||
public RotatedDetector(string modelPath, string deviceName, int deviceId)
|
||||
{
|
||||
ThrowException(NativeMethods.mmdeploy_rotated_detector_create_by_path(modelPath,
|
||||
deviceName, deviceId, out _handle));
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Get information of each image in a batch.
|
||||
/// </summary>
|
||||
/// <param name="mats">input mats.</param>
|
||||
/// <returns>Results of each input mat.</returns>
|
||||
public List<RotatedDetectorOutput> Apply(Mat[] mats)
|
||||
{
|
||||
List<RotatedDetectorOutput> output = new List<RotatedDetectorOutput>();
|
||||
|
||||
unsafe
|
||||
{
|
||||
RDetect* results = null;
|
||||
int* resultCount = null;
|
||||
fixed (Mat* _mats = mats)
|
||||
{
|
||||
ThrowException(NativeMethods.mmdeploy_rotated_detector_apply(_handle,
|
||||
_mats, mats.Length, &results, &resultCount));
|
||||
}
|
||||
|
||||
FormatResult(mats.Length, resultCount, results, ref output, out var total);
|
||||
ReleaseResult(results, resultCount);
|
||||
}
|
||||
|
||||
return output;
|
||||
}
|
||||
|
||||
private unsafe void FormatResult(int matCount, int* resultCount, RDetect* results,
|
||||
ref List<RotatedDetectorOutput> output, out int total)
|
||||
{
|
||||
total = matCount;
|
||||
for (int i = 0; i < matCount; i++)
|
||||
{
|
||||
RotatedDetectorOutput outi = default;
|
||||
for (int j = 0; j < resultCount[i]; j++)
|
||||
{
|
||||
outi.Add(results);
|
||||
results++;
|
||||
}
|
||||
|
||||
output.Add(outi);
|
||||
}
|
||||
}
|
||||
|
||||
private unsafe void ReleaseResult(RDetect* results, int* resultCount)
|
||||
{
|
||||
NativeMethods.mmdeploy_rotated_detector_release_result(results, resultCount);
|
||||
}
|
||||
|
||||
/// <inheritdoc/>
|
||||
protected override void ReleaseHandle()
|
||||
{
|
||||
NativeMethods.mmdeploy_rotated_detector_destroy(_handle);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,51 @@
|
|||
using System;
|
||||
|
||||
namespace MMDeploy
|
||||
{
|
||||
/// <summary>
|
||||
/// Scheduler.
|
||||
/// </summary>
|
||||
public class Scheduler : DisposableObject
|
||||
{
|
||||
private Scheduler()
|
||||
{
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Create thread pool scheduler.
|
||||
/// </summary>
|
||||
/// <param name="num_threads">thread number.</param>
|
||||
/// <returns>scheduler.</returns>
|
||||
public static Scheduler ThreadPool(int num_threads)
|
||||
{
|
||||
Scheduler result = new Scheduler();
|
||||
unsafe
|
||||
{
|
||||
result._handle = (IntPtr)NativeMethods.mmdeploy_executor_create_thread_pool(num_threads);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Create single thread scheduler.
|
||||
/// </summary>
|
||||
/// <returns>scheduler.</returns>
|
||||
public static Scheduler Thread()
|
||||
{
|
||||
Scheduler result = new Scheduler();
|
||||
unsafe
|
||||
{
|
||||
result._handle = (IntPtr)NativeMethods.mmdeploy_executor_create_thread();
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/// <inheritdoc/>
|
||||
protected override void ReleaseHandle()
|
||||
{
|
||||
NativeMethods.mmdeploy_scheduler_destroy(_handle);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -99,7 +99,7 @@ namespace MMDeploy
|
|||
}
|
||||
|
||||
/// <summary>
|
||||
/// Output of DetectorOutput.
|
||||
/// Output of TextDetector.
|
||||
/// </summary>
|
||||
public struct TextDetectorOutput
|
||||
{
|
||||
|
|
|
@ -89,4 +89,17 @@ namespace MMDeploy
|
|||
Y = y;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Context type.
|
||||
/// </summary>
|
||||
public enum ContextType
|
||||
{
|
||||
DEVICE = 0,
|
||||
STREAM = 1,
|
||||
MODEL = 2,
|
||||
SCHEDULER = 3,
|
||||
MAT = 4,
|
||||
PROFILER = 5,
|
||||
}
|
||||
}
|
||||
|
|
|
@ -9,6 +9,37 @@ namespace MMDeploy
|
|||
/// </summary>
|
||||
internal static partial class NativeMethods
|
||||
{
|
||||
#region common.h
|
||||
[Pure, DllImport(DllExtern, CallingConvention = CallingConvention.Cdecl, ExactSpelling = true)]
|
||||
public static extern int mmdeploy_context_create(out IntPtr handle);
|
||||
[Pure, DllImport(DllExtern, CallingConvention = CallingConvention.Cdecl, ExactSpelling = true)]
|
||||
public static extern int mmdeploy_context_create_by_device(string deviceName, int deviceId,
|
||||
out IntPtr handle);
|
||||
[Pure, DllImport(DllExtern, CallingConvention = CallingConvention.Cdecl, ExactSpelling = true)]
|
||||
public static extern void mmdeploy_context_destroy(IntPtr handle);
|
||||
[Pure, DllImport(DllExtern, CallingConvention = CallingConvention.Cdecl, ExactSpelling = true)]
|
||||
public static extern int mmdeploy_context_add(IntPtr handle, int type, string name,
|
||||
IntPtr obj);
|
||||
[Pure, DllImport(DllExtern, CallingConvention = CallingConvention.Cdecl, ExactSpelling = true)]
|
||||
public static extern int mmdeploy_device_create(string device_name, int device_id,
|
||||
out IntPtr device);
|
||||
[Pure, DllImport(DllExtern, CallingConvention = CallingConvention.Cdecl, ExactSpelling = true)]
|
||||
public static extern void mmdeploy_device_destroy(IntPtr device);
|
||||
[Pure, DllImport(DllExtern, CallingConvention = CallingConvention.Cdecl, ExactSpelling = true)]
|
||||
public static extern int mmdeploy_profiler_create(string path, out IntPtr handle);
|
||||
[Pure, DllImport(DllExtern, CallingConvention = CallingConvention.Cdecl, ExactSpelling = true)]
|
||||
public static extern unsafe void mmdeploy_profiler_destroy(IntPtr handle);
|
||||
#endregion
|
||||
|
||||
#region scheduler.h
|
||||
[Pure, DllImport(DllExtern, CallingConvention = CallingConvention.Cdecl, ExactSpelling = true)]
|
||||
public static extern unsafe void* mmdeploy_executor_create_thread();
|
||||
[Pure, DllImport(DllExtern, CallingConvention = CallingConvention.Cdecl, ExactSpelling = true)]
|
||||
public static extern unsafe void* mmdeploy_executor_create_thread_pool(int num_threads);
|
||||
[Pure, DllImport(DllExtern, CallingConvention = CallingConvention.Cdecl, ExactSpelling = true)]
|
||||
public static extern void mmdeploy_scheduler_destroy(IntPtr handle);
|
||||
#endregion
|
||||
|
||||
#region model.h
|
||||
[Pure, DllImport(DllExtern, CallingConvention = CallingConvention.Cdecl, ExactSpelling = true)]
|
||||
public static extern int mmdeploy_model_create_by_path(string path, out IntPtr handle);
|
||||
|
@ -38,6 +69,27 @@ namespace MMDeploy
|
|||
public static extern void mmdeploy_pose_detector_destroy(IntPtr handle);
|
||||
#endregion
|
||||
|
||||
#region pose_tracker.h
|
||||
[Pure, DllImport(DllExtern, CallingConvention = CallingConvention.Cdecl, ExactSpelling = true)]
|
||||
public static extern int mmdeploy_pose_tracker_create(IntPtr det_model, IntPtr pose_model,
|
||||
IntPtr context, out IntPtr handle);
|
||||
[Pure, DllImport(DllExtern, CallingConvention = CallingConvention.Cdecl, ExactSpelling = true)]
|
||||
public static extern int mmdeploy_pose_tracker_destroy(IntPtr handle);
|
||||
[Pure, DllImport(DllExtern, CallingConvention = CallingConvention.Cdecl, ExactSpelling = true)]
|
||||
public static extern int mmdeploy_pose_tracker_default_params(IntPtr handle);
|
||||
[Pure, DllImport(DllExtern, CallingConvention = CallingConvention.Cdecl, ExactSpelling = true)]
|
||||
public static extern int mmdeploy_pose_tracker_create_state(IntPtr pipeline,
|
||||
PoseTracker.Params param, out IntPtr state);
|
||||
[Pure, DllImport(DllExtern, CallingConvention = CallingConvention.Cdecl, ExactSpelling = true)]
|
||||
public static extern void mmdeploy_pose_tracker_destroy_state(IntPtr state);
|
||||
[Pure, DllImport(DllExtern, CallingConvention = CallingConvention.Cdecl, ExactSpelling = true)]
|
||||
public static extern unsafe int mmdeploy_pose_tracker_apply(IntPtr handle, IntPtr* state,
|
||||
Mat* mats, int* useDet, int count, CPoseTrack** results, int** resultCount);
|
||||
[Pure, DllImport(DllExtern, CallingConvention = CallingConvention.Cdecl, ExactSpelling = true)]
|
||||
public static extern unsafe void mmdeploy_pose_tracker_release_result(CPoseTrack* results,
|
||||
int* resultCount, int count);
|
||||
#endregion
|
||||
|
||||
#region classifier.h
|
||||
[Pure, DllImport(DllExtern, CallingConvention = CallingConvention.Cdecl, ExactSpelling = true)]
|
||||
public static extern int mmdeploy_classifier_create(IntPtr model, string deviceName,
|
||||
|
@ -55,6 +107,23 @@ namespace MMDeploy
|
|||
public static extern void mmdeploy_classifier_destroy(IntPtr handle);
|
||||
#endregion
|
||||
|
||||
#region rotated_detector.h
|
||||
[Pure, DllImport(DllExtern, CallingConvention = CallingConvention.Cdecl, ExactSpelling = true)]
|
||||
public static extern int mmdeploy_rotated_detector_create(IntPtr model,
|
||||
string deviceName, int deviceId, out IntPtr handle);
|
||||
[Pure, DllImport(DllExtern, CallingConvention = CallingConvention.Cdecl, ExactSpelling = true)]
|
||||
public static extern int mmdeploy_rotated_detector_create_by_path(string modelPath,
|
||||
string deviceName, int deviceId, out IntPtr handle);
|
||||
[Pure, DllImport(DllExtern, CallingConvention = CallingConvention.Cdecl, ExactSpelling = true)]
|
||||
public static extern unsafe int mmdeploy_rotated_detector_apply(IntPtr handle, Mat* mats,
|
||||
int matCount, RDetect** results, int** resultCount);
|
||||
[Pure, DllImport(DllExtern, CallingConvention = CallingConvention.Cdecl, ExactSpelling = true)]
|
||||
public static extern unsafe void mmdeploy_rotated_detector_release_result(RDetect* results,
|
||||
int* resultCount);
|
||||
[Pure, DllImport(DllExtern, CallingConvention = CallingConvention.Cdecl, ExactSpelling = true)]
|
||||
public static extern void mmdeploy_rotated_detector_destroy(IntPtr handle);
|
||||
#endregion
|
||||
|
||||
#region detector.h
|
||||
[Pure, DllImport(DllExtern, CallingConvention = CallingConvention.Cdecl, ExactSpelling = true)]
|
||||
public static extern int mmdeploy_detector_create(IntPtr model, string deviceName,
|
||||
|
|
|
@ -16,17 +16,19 @@ static const char *PLUGIN_VERSION{"1"};
|
|||
static const char *PLUGIN_NAME{"MMCVMultiScaleDeformableAttention"};
|
||||
} // namespace
|
||||
|
||||
MultiScaleDeformableAttnPluginDynamic::MultiScaleDeformableAttnPluginDynamic(const std::string &name)
|
||||
MultiScaleDeformableAttnPluginDynamic::MultiScaleDeformableAttnPluginDynamic(
|
||||
const std::string &name)
|
||||
: TRTPluginBase(name) {}
|
||||
|
||||
MultiScaleDeformableAttnPluginDynamic::MultiScaleDeformableAttnPluginDynamic(const std::string name,
|
||||
const void *data,
|
||||
size_t length)
|
||||
const void *data,
|
||||
size_t length)
|
||||
: TRTPluginBase(name) {}
|
||||
MultiScaleDeformableAttnPluginDynamic::~MultiScaleDeformableAttnPluginDynamic() {}
|
||||
|
||||
nvinfer1::IPluginV2DynamicExt *MultiScaleDeformableAttnPluginDynamic::clone() const TRT_NOEXCEPT {
|
||||
MultiScaleDeformableAttnPluginDynamic *plugin = new MultiScaleDeformableAttnPluginDynamic(mLayerName);
|
||||
MultiScaleDeformableAttnPluginDynamic *plugin =
|
||||
new MultiScaleDeformableAttnPluginDynamic(mLayerName);
|
||||
plugin->setPluginNamespace(getPluginNamespace());
|
||||
|
||||
return plugin;
|
||||
|
@ -40,37 +42,29 @@ nvinfer1::DimsExprs MultiScaleDeformableAttnPluginDynamic::getOutputDimensions(
|
|||
ret.d[0] = inputs[0].d[0];
|
||||
ret.d[1] = inputs[3].d[1];
|
||||
|
||||
ret.d[2] = exprBuilder.operation(DimensionOperation::kPROD,
|
||||
*inputs[0].d[2], *inputs[0].d[3]);
|
||||
ret.d[2] = exprBuilder.operation(DimensionOperation::kPROD, *inputs[0].d[2], *inputs[0].d[3]);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
bool MultiScaleDeformableAttnPluginDynamic::supportsFormatCombination(
|
||||
int pos, const nvinfer1::PluginTensorDesc *ioDesc, int nbInputs, int nbOutputs) TRT_NOEXCEPT {
|
||||
|
||||
if (ioDesc[pos].format == nvinfer1::TensorFormat::kLINEAR)
|
||||
{
|
||||
if ((pos == 1) || (pos == 2))
|
||||
{
|
||||
return (ioDesc[pos].type == nvinfer1::DataType::kINT32);
|
||||
}
|
||||
else
|
||||
{
|
||||
return ((ioDesc[pos].type == ioDesc[0].type) &&
|
||||
((ioDesc[pos].type == nvinfer1::DataType::kFLOAT) || (ioDesc[pos].type == nvinfer1::DataType::kHALF)));
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
return false;
|
||||
if (ioDesc[pos].format == nvinfer1::TensorFormat::kLINEAR) {
|
||||
if ((pos == 1) || (pos == 2)) {
|
||||
return (ioDesc[pos].type == nvinfer1::DataType::kINT32);
|
||||
} else {
|
||||
return ((ioDesc[pos].type == ioDesc[0].type) &&
|
||||
((ioDesc[pos].type == nvinfer1::DataType::kFLOAT) ||
|
||||
(ioDesc[pos].type == nvinfer1::DataType::kHALF)));
|
||||
}
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
void MultiScaleDeformableAttnPluginDynamic::configurePlugin(
|
||||
const nvinfer1::DynamicPluginTensorDesc *inputs, int nbInputs,
|
||||
const nvinfer1::DynamicPluginTensorDesc *outputs, int nbOutputs) TRT_NOEXCEPT {
|
||||
}
|
||||
const nvinfer1::DynamicPluginTensorDesc *outputs, int nbOutputs) TRT_NOEXCEPT {}
|
||||
|
||||
size_t MultiScaleDeformableAttnPluginDynamic::getWorkspaceSize(
|
||||
const nvinfer1::PluginTensorDesc *inputs, int nbInputs,
|
||||
|
@ -79,44 +73,43 @@ size_t MultiScaleDeformableAttnPluginDynamic::getWorkspaceSize(
|
|||
}
|
||||
|
||||
int MultiScaleDeformableAttnPluginDynamic::enqueue(const nvinfer1::PluginTensorDesc *inputDesc,
|
||||
const nvinfer1::PluginTensorDesc *outputDesc,
|
||||
const void *const *inputs, void *const *outputs,
|
||||
void *workSpace,
|
||||
cudaStream_t stream) TRT_NOEXCEPT {
|
||||
int32_t const batch = inputDesc[0].dims.d[0];
|
||||
int32_t spatial_size = inputDesc[0].dims.d[1];
|
||||
int32_t num_heads = inputDesc[0].dims.d[2];
|
||||
int32_t channels = inputDesc[0].dims.d[3];
|
||||
int32_t num_levels = inputDesc[1].dims.d[0];
|
||||
int32_t num_query = inputDesc[3].dims.d[1];
|
||||
int32_t num_point = inputDesc[3].dims.d[4];
|
||||
int32_t rc = 0;
|
||||
if (inputDesc[0].type == nvinfer1::DataType::kFLOAT)
|
||||
{
|
||||
float const* value = static_cast<float const*>(inputs[0]);
|
||||
int32_t const* spatialShapes = static_cast<int32_t const*>(inputs[1]);
|
||||
int32_t const* levelStartIndex = static_cast<int32_t const*>(inputs[2]);
|
||||
float const* samplingLoc = static_cast<float const*>(inputs[3]);
|
||||
float const* attnWeight = static_cast<float const*>(inputs[4]);
|
||||
float* output = static_cast<float*>(outputs[0]);
|
||||
const nvinfer1::PluginTensorDesc *outputDesc,
|
||||
const void *const *inputs, void *const *outputs,
|
||||
void *workSpace,
|
||||
cudaStream_t stream) TRT_NOEXCEPT {
|
||||
int32_t const batch = inputDesc[0].dims.d[0];
|
||||
int32_t spatial_size = inputDesc[0].dims.d[1];
|
||||
int32_t num_heads = inputDesc[0].dims.d[2];
|
||||
int32_t channels = inputDesc[0].dims.d[3];
|
||||
int32_t num_levels = inputDesc[1].dims.d[0];
|
||||
int32_t num_query = inputDesc[3].dims.d[1];
|
||||
int32_t num_point = inputDesc[3].dims.d[4];
|
||||
int32_t rc = 0;
|
||||
if (inputDesc[0].type == nvinfer1::DataType::kFLOAT) {
|
||||
float const *value = static_cast<float const *>(inputs[0]);
|
||||
int32_t const *spatialShapes = static_cast<int32_t const *>(inputs[1]);
|
||||
int32_t const *levelStartIndex = static_cast<int32_t const *>(inputs[2]);
|
||||
float const *samplingLoc = static_cast<float const *>(inputs[3]);
|
||||
float const *attnWeight = static_cast<float const *>(inputs[4]);
|
||||
float *output = static_cast<float *>(outputs[0]);
|
||||
|
||||
rc = ms_deform_attn_cuda_forward(value, spatialShapes, levelStartIndex, samplingLoc, attnWeight, output,
|
||||
batch, spatial_size, num_heads, channels, num_levels, num_query, num_point, stream);
|
||||
}
|
||||
else if (inputDesc[0].type == nvinfer1::DataType::kHALF)
|
||||
{
|
||||
const __half* value = static_cast<const __half*>(inputs[0]);
|
||||
int32_t const* spatialShapes = static_cast<int32_t const*>(inputs[1]);
|
||||
int32_t const* levelStartIndex = static_cast<int32_t const*>(inputs[2]);
|
||||
const __half* samplingLoc = static_cast<const __half*>(inputs[3]);
|
||||
const __half* attnWeight = static_cast<const __half*>(inputs[4]);
|
||||
__half* output = static_cast<__half*>(outputs[0]);
|
||||
rc = ms_deform_attn_cuda_forward(value, spatialShapes, levelStartIndex, samplingLoc, attnWeight,
|
||||
output, batch, spatial_size, num_heads, channels, num_levels,
|
||||
num_query, num_point, stream);
|
||||
} else if (inputDesc[0].type == nvinfer1::DataType::kHALF) {
|
||||
const __half *value = static_cast<const __half *>(inputs[0]);
|
||||
int32_t const *spatialShapes = static_cast<int32_t const *>(inputs[1]);
|
||||
int32_t const *levelStartIndex = static_cast<int32_t const *>(inputs[2]);
|
||||
const __half *samplingLoc = static_cast<const __half *>(inputs[3]);
|
||||
const __half *attnWeight = static_cast<const __half *>(inputs[4]);
|
||||
__half *output = static_cast<__half *>(outputs[0]);
|
||||
|
||||
rc = ms_deform_attn_cuda_forward(value, spatialShapes, levelStartIndex, samplingLoc, attnWeight, output,
|
||||
batch, spatial_size, num_heads, channels, num_levels, num_query, num_point, stream);
|
||||
}
|
||||
rc = ms_deform_attn_cuda_forward(value, spatialShapes, levelStartIndex, samplingLoc, attnWeight,
|
||||
output, batch, spatial_size, num_heads, channels, num_levels,
|
||||
num_query, num_point, stream);
|
||||
}
|
||||
|
||||
return rc;
|
||||
return rc;
|
||||
}
|
||||
|
||||
nvinfer1::DataType MultiScaleDeformableAttnPluginDynamic::getOutputDataType(
|
||||
|
@ -165,7 +158,6 @@ const char *MultiScaleDeformableAttnPluginDynamicCreator::getPluginVersion() con
|
|||
|
||||
nvinfer1::IPluginV2 *MultiScaleDeformableAttnPluginDynamicCreator::createPlugin(
|
||||
const char *name, const nvinfer1::PluginFieldCollection *fc) TRT_NOEXCEPT {
|
||||
|
||||
MultiScaleDeformableAttnPluginDynamic *plugin = new MultiScaleDeformableAttnPluginDynamic(name);
|
||||
plugin->setPluginNamespace(getPluginNamespace());
|
||||
return plugin;
|
||||
|
|
|
@ -12,7 +12,6 @@
|
|||
namespace mmdeploy {
|
||||
class MultiScaleDeformableAttnPluginDynamic : public TRTPluginBase {
|
||||
public:
|
||||
|
||||
MultiScaleDeformableAttnPluginDynamic(const std::string &name);
|
||||
|
||||
MultiScaleDeformableAttnPluginDynamic(const std::string name, const void *data, size_t length);
|
||||
|
|
|
@ -8,50 +8,57 @@
|
|||
#include "trt_plugin_helper.hpp"
|
||||
|
||||
template <typename scalar_t>
|
||||
void ms_deformable_im2col_cuda(cudaStream_t stream, scalar_t const* dataValue, int32_t const* dataSpatialShapes,
|
||||
int32_t const* dataLevelStartIndex, scalar_t const* dataSamplingLoc, scalar_t const* dataAttnWeight,
|
||||
int32_t const batchSize, int32_t const spatialSize, int32_t const numHeads, int32_t const channels, int32_t const numLevels,
|
||||
int32_t const numQuery, int32_t const numPoint, scalar_t* dataCol)
|
||||
{
|
||||
int32_t const numKernels = batchSize * numQuery * numHeads * channels;
|
||||
int32_t const numActualKernels = batchSize * numQuery * numHeads * channels;
|
||||
void ms_deformable_im2col_cuda(cudaStream_t stream, scalar_t const* dataValue,
|
||||
int32_t const* dataSpatialShapes, int32_t const* dataLevelStartIndex,
|
||||
scalar_t const* dataSamplingLoc, scalar_t const* dataAttnWeight,
|
||||
int32_t const batchSize, int32_t const spatialSize,
|
||||
int32_t const numHeads, int32_t const channels,
|
||||
int32_t const numLevels, int32_t const numQuery,
|
||||
int32_t const numPoint, scalar_t* dataCol) {
|
||||
int32_t const numKernels = batchSize * numQuery * numHeads * channels;
|
||||
int32_t const numActualKernels = batchSize * numQuery * numHeads * channels;
|
||||
|
||||
ms_deformable_im2col_gpu_kernel<scalar_t><<<GET_BLOCKS(numActualKernels), THREADS_PER_BLOCK, 0, stream>>>(
|
||||
numKernels, dataValue, dataSpatialShapes, dataLevelStartIndex, dataSamplingLoc, dataAttnWeight, batchSize,
|
||||
spatialSize, numHeads, channels, numLevels, numQuery, numPoint, dataCol);
|
||||
ms_deformable_im2col_gpu_kernel<scalar_t>
|
||||
<<<GET_BLOCKS(numActualKernels), THREADS_PER_BLOCK, 0, stream>>>(
|
||||
numKernels, dataValue, dataSpatialShapes, dataLevelStartIndex, dataSamplingLoc,
|
||||
dataAttnWeight, batchSize, spatialSize, numHeads, channels, numLevels, numQuery, numPoint,
|
||||
dataCol);
|
||||
}
|
||||
|
||||
|
||||
template <typename scalar_t>
|
||||
int32_t ms_deform_attn_cuda_forward(const scalar_t* value, const int32_t* spatialShapes,
|
||||
const int32_t* levelStartIndex, const scalar_t* samplingLoc, const scalar_t* attnWeight, scalar_t* output, int32_t batch,
|
||||
int32_t mSpatialSize, int32_t mNumHeads, int32_t mChannels, int32_t mNumLevels, int32_t mNumQuery, int32_t mNumPoint,
|
||||
cudaStream_t stream)
|
||||
{
|
||||
auto perValueSize = mSpatialSize * mNumHeads * mChannels;
|
||||
auto perSampleLocSize = mNumQuery * mNumHeads * mNumLevels * mNumPoint * 2;
|
||||
auto perAttnWeightSize = mNumQuery * mNumHeads * mNumLevels * mNumPoint;
|
||||
auto perOutputSize = mNumQuery * mNumHeads * mChannels;
|
||||
const int32_t* levelStartIndex, const scalar_t* samplingLoc,
|
||||
const scalar_t* attnWeight, scalar_t* output, int32_t batch,
|
||||
int32_t mSpatialSize, int32_t mNumHeads, int32_t mChannels,
|
||||
int32_t mNumLevels, int32_t mNumQuery, int32_t mNumPoint,
|
||||
cudaStream_t stream) {
|
||||
auto perValueSize = mSpatialSize * mNumHeads * mChannels;
|
||||
auto perSampleLocSize = mNumQuery * mNumHeads * mNumLevels * mNumPoint * 2;
|
||||
auto perAttnWeightSize = mNumQuery * mNumHeads * mNumLevels * mNumPoint;
|
||||
auto perOutputSize = mNumQuery * mNumHeads * mChannels;
|
||||
|
||||
int32_t mIm2colStep = batch;
|
||||
int32_t mIm2colStep = batch;
|
||||
|
||||
for (int32_t n = 0; n < batch / mIm2colStep; ++n)
|
||||
{
|
||||
auto columns = output + n * mIm2colStep * perOutputSize;
|
||||
ms_deformable_im2col_cuda<scalar_t>(stream, value + n * mIm2colStep * perValueSize, spatialShapes, levelStartIndex,
|
||||
samplingLoc + n * mIm2colStep * perSampleLocSize, attnWeight + n * mIm2colStep * perAttnWeightSize, mIm2colStep,
|
||||
mSpatialSize, mNumHeads, mChannels, mNumLevels, mNumQuery, mNumPoint, columns);
|
||||
}
|
||||
for (int32_t n = 0; n < batch / mIm2colStep; ++n) {
|
||||
auto columns = output + n * mIm2colStep * perOutputSize;
|
||||
ms_deformable_im2col_cuda<scalar_t>(
|
||||
stream, value + n * mIm2colStep * perValueSize, spatialShapes, levelStartIndex,
|
||||
samplingLoc + n * mIm2colStep * perSampleLocSize,
|
||||
attnWeight + n * mIm2colStep * perAttnWeightSize, mIm2colStep, mSpatialSize, mNumHeads,
|
||||
mChannels, mNumLevels, mNumQuery, mNumPoint, columns);
|
||||
}
|
||||
|
||||
return 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
template int32_t ms_deform_attn_cuda_forward<float>(const float* value, const int32_t* spatialShapes,
|
||||
const int32_t* levelStartIndex, const float* samplingLoc, const float* attnWeight, float* output, int32_t batch,
|
||||
int32_t mSpatialSize, int32_t mNumHeads, int32_t mChannels, int32_t mNumLevels, int32_t mNumQuery, int32_t mNumPoint,
|
||||
cudaStream_t stream);
|
||||
template int32_t ms_deform_attn_cuda_forward<float>(
|
||||
const float* value, const int32_t* spatialShapes, const int32_t* levelStartIndex,
|
||||
const float* samplingLoc, const float* attnWeight, float* output, int32_t batch,
|
||||
int32_t mSpatialSize, int32_t mNumHeads, int32_t mChannels, int32_t mNumLevels,
|
||||
int32_t mNumQuery, int32_t mNumPoint, cudaStream_t stream);
|
||||
|
||||
template int32_t ms_deform_attn_cuda_forward<__half>(const __half* value, const int32_t* spatialShapes,
|
||||
const int32_t* levelStartIndex, const __half* samplingLoc, const __half* attnWeight, __half* output, int32_t batch,
|
||||
int32_t mSpatialSize, int32_t mNumHeads, int32_t mChannels, int32_t mNumLevels, int32_t mNumQuery, int32_t mNumPoint,
|
||||
cudaStream_t stream);
|
||||
template int32_t ms_deform_attn_cuda_forward<__half>(
|
||||
const __half* value, const int32_t* spatialShapes, const int32_t* levelStartIndex,
|
||||
const __half* samplingLoc, const __half* attnWeight, __half* output, int32_t batch,
|
||||
int32_t mSpatialSize, int32_t mNumHeads, int32_t mChannels, int32_t mNumLevels,
|
||||
int32_t mNumQuery, int32_t mNumPoint, cudaStream_t stream);
|
||||
|
|
|
@ -5,10 +5,10 @@
|
|||
#include "common_cuda_helper.hpp"
|
||||
|
||||
template <typename scalar_t>
|
||||
__device__ scalar_t ms_deform_attn_im2col_bilinear(
|
||||
const scalar_t *&bottom_data, const int &height, const int &width,
|
||||
const int &nheads, const int &channels, const scalar_t &h,
|
||||
const scalar_t &w, const int &m, const int &c) {
|
||||
__device__ scalar_t ms_deform_attn_im2col_bilinear(const scalar_t*& bottom_data, const int& height,
|
||||
const int& width, const int& nheads,
|
||||
const int& channels, const scalar_t& h,
|
||||
const scalar_t& w, const int& m, const int& c) {
|
||||
const int h_low = floorf(h);
|
||||
const int w_low = floorf(w);
|
||||
const int h_high = h_low + 1;
|
||||
|
@ -54,210 +54,204 @@ __device__ scalar_t ms_deform_attn_im2col_bilinear(
|
|||
}
|
||||
|
||||
template <>
|
||||
__device__ __half ms_deform_attn_im2col_bilinear<__half>(const __half*& bottomData, int32_t const& height, int32_t const& width,
|
||||
int32_t const& nHeads, int32_t const& channels, const __half& h, const __half& w, int32_t const& m, int32_t const& c)
|
||||
{
|
||||
int32_t const hLow = __half2int_rd(h);
|
||||
int32_t const wLow = __half2int_rd(w);
|
||||
int32_t const hHigh = hLow + 1;
|
||||
int32_t const wHigh = wLow + 1;
|
||||
__device__ __half ms_deform_attn_im2col_bilinear<__half>(
|
||||
const __half*& bottomData, int32_t const& height, int32_t const& width, int32_t const& nHeads,
|
||||
int32_t const& channels, const __half& h, const __half& w, int32_t const& m, int32_t const& c) {
|
||||
int32_t const hLow = __half2int_rd(h);
|
||||
int32_t const wLow = __half2int_rd(w);
|
||||
int32_t const hHigh = hLow + 1;
|
||||
int32_t const wHigh = wLow + 1;
|
||||
|
||||
const __half kZERO = __int2half_rz(0);
|
||||
const __half one = __int2half_rz(1);
|
||||
const __half kZERO = __int2half_rz(0);
|
||||
const __half one = __int2half_rz(1);
|
||||
|
||||
#if __CUDA_ARCH__>=530
|
||||
const __half lh = __hsub(h, __int2half_rd(hLow));
|
||||
const __half lw = __hsub(w, __int2half_rd(wLow));
|
||||
const __half hh = __hsub(one, lh), hw = __hsub(one, lw);
|
||||
#if __CUDA_ARCH__ >= 530
|
||||
const __half lh = __hsub(h, __int2half_rd(hLow));
|
||||
const __half lw = __hsub(w, __int2half_rd(wLow));
|
||||
const __half hh = __hsub(one, lh), hw = __hsub(one, lw);
|
||||
#else
|
||||
const __half lh = __float2half(__half2float(h) - hLow);
|
||||
const __half lw = __float2half(__half2float(w) - wLow);
|
||||
const __half hh = __float2half(__half2float(one) - __half2float(lh));
|
||||
const __half hw = __float2half(__half2float(one) - __half2float(lw));
|
||||
const __half lh = __float2half(__half2float(h) - hLow);
|
||||
const __half lw = __float2half(__half2float(w) - wLow);
|
||||
const __half hh = __float2half(__half2float(one) - __half2float(lh));
|
||||
const __half hw = __float2half(__half2float(one) - __half2float(lw));
|
||||
#endif
|
||||
int32_t const wStride = nHeads * channels;
|
||||
int32_t const hStride = width * wStride;
|
||||
int32_t const hLowPtrOffset = hLow * hStride;
|
||||
int32_t const hHighPtrOffset = hLowPtrOffset + hStride;
|
||||
int32_t const wLowPtrOffset = wLow * wStride;
|
||||
int32_t const wHighPtrOffset = wLowPtrOffset + wStride;
|
||||
int32_t const basePtr = m * channels + c;
|
||||
int32_t const wStride = nHeads * channels;
|
||||
int32_t const hStride = width * wStride;
|
||||
int32_t const hLowPtrOffset = hLow * hStride;
|
||||
int32_t const hHighPtrOffset = hLowPtrOffset + hStride;
|
||||
int32_t const wLowPtrOffset = wLow * wStride;
|
||||
int32_t const wHighPtrOffset = wLowPtrOffset + wStride;
|
||||
int32_t const basePtr = m * channels + c;
|
||||
|
||||
__half v1 = kZERO;
|
||||
if (hLow >= 0 && wLow >= 0)
|
||||
{
|
||||
int32_t const ptr1 = hLowPtrOffset + wLowPtrOffset + basePtr;
|
||||
v1 = bottomData[ptr1];
|
||||
}
|
||||
__half v2 = kZERO;
|
||||
if (hLow >= 0 && wHigh <= width - 1)
|
||||
{
|
||||
int32_t const ptr2 = hLowPtrOffset + wHighPtrOffset + basePtr;
|
||||
v2 = bottomData[ptr2];
|
||||
}
|
||||
__half v3 = kZERO;
|
||||
if (hHigh <= height - 1 && wLow >= 0)
|
||||
{
|
||||
int32_t const ptr3 = hHighPtrOffset + wLowPtrOffset + basePtr;
|
||||
v3 = bottomData[ptr3];
|
||||
}
|
||||
__half v4 = kZERO;
|
||||
if (hHigh <= height - 1 && wHigh <= width - 1)
|
||||
{
|
||||
int32_t const ptr4 = hHighPtrOffset + wHighPtrOffset + basePtr;
|
||||
v4 = bottomData[ptr4];
|
||||
}
|
||||
__half v1 = kZERO;
|
||||
if (hLow >= 0 && wLow >= 0) {
|
||||
int32_t const ptr1 = hLowPtrOffset + wLowPtrOffset + basePtr;
|
||||
v1 = bottomData[ptr1];
|
||||
}
|
||||
__half v2 = kZERO;
|
||||
if (hLow >= 0 && wHigh <= width - 1) {
|
||||
int32_t const ptr2 = hLowPtrOffset + wHighPtrOffset + basePtr;
|
||||
v2 = bottomData[ptr2];
|
||||
}
|
||||
__half v3 = kZERO;
|
||||
if (hHigh <= height - 1 && wLow >= 0) {
|
||||
int32_t const ptr3 = hHighPtrOffset + wLowPtrOffset + basePtr;
|
||||
v3 = bottomData[ptr3];
|
||||
}
|
||||
__half v4 = kZERO;
|
||||
if (hHigh <= height - 1 && wHigh <= width - 1) {
|
||||
int32_t const ptr4 = hHighPtrOffset + wHighPtrOffset + basePtr;
|
||||
v4 = bottomData[ptr4];
|
||||
}
|
||||
|
||||
#if __CUDA_ARCH__>=530
|
||||
__half w1 = __hmul(__hmul(hh, hw), v1);
|
||||
__half w2 = __hmul(__hmul(hh, lw), v2);
|
||||
__half w3 = __hmul(__hmul(lh, hw), v3);
|
||||
__half w4 = __hmul(__hmul(lh, lw), v4);
|
||||
#if __CUDA_ARCH__ >= 530
|
||||
__half w1 = __hmul(__hmul(hh, hw), v1);
|
||||
__half w2 = __hmul(__hmul(hh, lw), v2);
|
||||
__half w3 = __hmul(__hmul(lh, hw), v3);
|
||||
__half w4 = __hmul(__hmul(lh, lw), v4);
|
||||
|
||||
w1 = __hadd(w1, w2);
|
||||
w3 = __hadd(w3, w4);
|
||||
w1 = __hadd(w1, w2);
|
||||
w3 = __hadd(w3, w4);
|
||||
|
||||
const __half val = __hadd(w1, w3);
|
||||
const __half val = __hadd(w1, w3);
|
||||
#else
|
||||
__half w1 = __float2half((__half2float(hh) * __half2float(hw)) * __half2float(v1));
|
||||
__half w2 = __float2half((__half2float(hh) * __half2float(lw)) * __half2float(v2));
|
||||
__half w3 = __float2half((__half2float(lh) * __half2float(hw)) * __half2float(v3));
|
||||
__half w4 = __float2half((__half2float(lh) * __half2float(lw)) * __half2float(v4));
|
||||
__half w1 = __float2half((__half2float(hh) * __half2float(hw)) * __half2float(v1));
|
||||
__half w2 = __float2half((__half2float(hh) * __half2float(lw)) * __half2float(v2));
|
||||
__half w3 = __float2half((__half2float(lh) * __half2float(hw)) * __half2float(v3));
|
||||
__half w4 = __float2half((__half2float(lh) * __half2float(lw)) * __half2float(v4));
|
||||
|
||||
w1 = __float2half(__half2float(w1) + __half2float(w2));
|
||||
w3 = __float2half(__half2float(w3) + __half2float(w4));
|
||||
w1 = __float2half(__half2float(w1) + __half2float(w2));
|
||||
w3 = __float2half(__half2float(w3) + __half2float(w4));
|
||||
|
||||
const __half val = __float2half(__half2float(w1) + __half2float(w3));
|
||||
const __half val = __float2half(__half2float(w1) + __half2float(w3));
|
||||
#endif
|
||||
return val;
|
||||
return val;
|
||||
}
|
||||
|
||||
#if 1
|
||||
template <typename scalar_t>
|
||||
__global__ void ms_deformable_im2col_gpu_kernel(int32_t const n, scalar_t const* dataValue,
|
||||
int32_t const* dataSpatialShapes, int32_t const* dataLevelStartIndex, scalar_t const* dataSamplingLoc,
|
||||
scalar_t const* dataAttnWeight, int32_t const batchSize, int32_t const spatialSize, int32_t const numHeads, int32_t const channels,
|
||||
int32_t const numLevels, int32_t const numQuery, int32_t const numPoint, scalar_t* dataCol)
|
||||
{
|
||||
CUDA_1D_KERNEL_LOOP(index, n)
|
||||
{
|
||||
int32_t _temp = index;
|
||||
int32_t const cCol = _temp % channels;
|
||||
_temp /= channels;
|
||||
int32_t const samplingIndex = _temp;
|
||||
int32_t const mCol = _temp % numHeads;
|
||||
_temp /= numHeads;
|
||||
_temp /= numQuery;
|
||||
int32_t const bCol = _temp;
|
||||
__global__ void ms_deformable_im2col_gpu_kernel(
|
||||
int32_t const n, scalar_t const* dataValue, int32_t const* dataSpatialShapes,
|
||||
int32_t const* dataLevelStartIndex, scalar_t const* dataSamplingLoc,
|
||||
scalar_t const* dataAttnWeight, int32_t const batchSize, int32_t const spatialSize,
|
||||
int32_t const numHeads, int32_t const channels, int32_t const numLevels, int32_t const numQuery,
|
||||
int32_t const numPoint, scalar_t* dataCol) {
|
||||
CUDA_1D_KERNEL_LOOP(index, n) {
|
||||
int32_t _temp = index;
|
||||
int32_t const cCol = _temp % channels;
|
||||
_temp /= channels;
|
||||
int32_t const samplingIndex = _temp;
|
||||
int32_t const mCol = _temp % numHeads;
|
||||
_temp /= numHeads;
|
||||
_temp /= numQuery;
|
||||
int32_t const bCol = _temp;
|
||||
|
||||
scalar_t* dataColPtr = dataCol + index;
|
||||
int32_t dataWeightPtr = samplingIndex * numLevels * numPoint;
|
||||
int32_t dataLocWPtr = dataWeightPtr << 1;
|
||||
int32_t const qidStride = numHeads * channels;
|
||||
int32_t const dataValuePtrInitOffset = bCol * spatialSize * qidStride;
|
||||
scalar_t col = 0;
|
||||
scalar_t* dataColPtr = dataCol + index;
|
||||
int32_t dataWeightPtr = samplingIndex * numLevels * numPoint;
|
||||
int32_t dataLocWPtr = dataWeightPtr << 1;
|
||||
int32_t const qidStride = numHeads * channels;
|
||||
int32_t const dataValuePtrInitOffset = bCol * spatialSize * qidStride;
|
||||
scalar_t col = 0;
|
||||
|
||||
for (int32_t lCol = 0; lCol < numLevels; ++lCol)
|
||||
{
|
||||
int32_t const levelStartId = dataLevelStartIndex[lCol];
|
||||
int32_t const spatialHPtr = lCol << 1;
|
||||
int32_t const spatialH = dataSpatialShapes[spatialHPtr];
|
||||
int32_t const spatialW = dataSpatialShapes[spatialHPtr + 1];
|
||||
scalar_t const* dataValuePtr = dataValue + (dataValuePtrInitOffset + levelStartId * qidStride);
|
||||
for (int32_t pCol = 0; pCol < numPoint; ++pCol)
|
||||
{
|
||||
scalar_t const locW = dataSamplingLoc[dataLocWPtr];
|
||||
scalar_t const locH = dataSamplingLoc[dataLocWPtr + 1];
|
||||
scalar_t const weight = dataAttnWeight[dataWeightPtr];
|
||||
for (int32_t lCol = 0; lCol < numLevels; ++lCol) {
|
||||
int32_t const levelStartId = dataLevelStartIndex[lCol];
|
||||
int32_t const spatialHPtr = lCol << 1;
|
||||
int32_t const spatialH = dataSpatialShapes[spatialHPtr];
|
||||
int32_t const spatialW = dataSpatialShapes[spatialHPtr + 1];
|
||||
scalar_t const* dataValuePtr =
|
||||
dataValue + (dataValuePtrInitOffset + levelStartId * qidStride);
|
||||
for (int32_t pCol = 0; pCol < numPoint; ++pCol) {
|
||||
scalar_t const locW = dataSamplingLoc[dataLocWPtr];
|
||||
scalar_t const locH = dataSamplingLoc[dataLocWPtr + 1];
|
||||
scalar_t const weight = dataAttnWeight[dataWeightPtr];
|
||||
|
||||
scalar_t const hIm = locH * spatialH - 0.5;
|
||||
scalar_t const wIm = locW * spatialW - 0.5;
|
||||
scalar_t const hIm = locH * spatialH - 0.5;
|
||||
scalar_t const wIm = locW * spatialW - 0.5;
|
||||
|
||||
if (hIm > -1 && wIm > -1 && hIm < spatialH && wIm < spatialW)
|
||||
{
|
||||
col += ms_deform_attn_im2col_bilinear(
|
||||
dataValuePtr, spatialH, spatialW, numHeads, channels, hIm, wIm, mCol, cCol)
|
||||
* weight;
|
||||
}
|
||||
|
||||
dataWeightPtr += 1;
|
||||
dataLocWPtr += 2;
|
||||
}
|
||||
if (hIm > -1 && wIm > -1 && hIm < spatialH && wIm < spatialW) {
|
||||
col += ms_deform_attn_im2col_bilinear(dataValuePtr, spatialH, spatialW, numHeads,
|
||||
channels, hIm, wIm, mCol, cCol) *
|
||||
weight;
|
||||
}
|
||||
*dataColPtr = col;
|
||||
|
||||
dataWeightPtr += 1;
|
||||
dataLocWPtr += 2;
|
||||
}
|
||||
}
|
||||
*dataColPtr = col;
|
||||
}
|
||||
}
|
||||
|
||||
template <>
|
||||
__global__ void ms_deformable_im2col_gpu_kernel<__half>(int32_t const n, const __half* dataValue,
|
||||
int32_t const* dataSpatialShapes, int32_t const* dataLevelStartIndex, const __half* dataSamplingLoc,
|
||||
const __half* dataAttnWeight, int32_t const batchSize, int32_t const spatialSize, int32_t const numHeads, int32_t const channels,
|
||||
int32_t const numLevels, int32_t const numQuery, int32_t const numPoint, __half* dataCol)
|
||||
{
|
||||
CUDA_1D_KERNEL_LOOP(index, n)
|
||||
{
|
||||
int32_t _temp = index;
|
||||
int32_t const cCol = _temp % channels;
|
||||
_temp /= channels;
|
||||
int32_t const samplingIndex = _temp;
|
||||
int32_t const mCol = _temp % numHeads;
|
||||
_temp /= numHeads;
|
||||
_temp /= numQuery;
|
||||
int32_t const bCol = _temp;
|
||||
__global__ void ms_deformable_im2col_gpu_kernel<__half>(
|
||||
int32_t const n, const __half* dataValue, int32_t const* dataSpatialShapes,
|
||||
int32_t const* dataLevelStartIndex, const __half* dataSamplingLoc, const __half* dataAttnWeight,
|
||||
int32_t const batchSize, int32_t const spatialSize, int32_t const numHeads,
|
||||
int32_t const channels, int32_t const numLevels, int32_t const numQuery, int32_t const numPoint,
|
||||
__half* dataCol) {
|
||||
CUDA_1D_KERNEL_LOOP(index, n) {
|
||||
int32_t _temp = index;
|
||||
int32_t const cCol = _temp % channels;
|
||||
_temp /= channels;
|
||||
int32_t const samplingIndex = _temp;
|
||||
int32_t const mCol = _temp % numHeads;
|
||||
_temp /= numHeads;
|
||||
_temp /= numQuery;
|
||||
int32_t const bCol = _temp;
|
||||
|
||||
__half* dataColPtr = dataCol + index;
|
||||
int32_t dataWeightPtr = samplingIndex * numLevels * numPoint;
|
||||
int32_t dataLocWPtr = dataWeightPtr << 1;
|
||||
int32_t const qidStride = numHeads * channels;
|
||||
int32_t const dataValuePtrInitOffset = bCol * spatialSize * qidStride;
|
||||
const __half kZERO_POINT_FIVE = __float2half(0.5f);
|
||||
const __half kMINUS_ONE = __float2half(-1.0f);
|
||||
const __half kZERO = __int2half_rz(0);
|
||||
__half tpVal = kZERO;
|
||||
__half col = kZERO;
|
||||
__half* dataColPtr = dataCol + index;
|
||||
int32_t dataWeightPtr = samplingIndex * numLevels * numPoint;
|
||||
int32_t dataLocWPtr = dataWeightPtr << 1;
|
||||
int32_t const qidStride = numHeads * channels;
|
||||
int32_t const dataValuePtrInitOffset = bCol * spatialSize * qidStride;
|
||||
const __half kZERO_POINT_FIVE = __float2half(0.5f);
|
||||
const __half kMINUS_ONE = __float2half(-1.0f);
|
||||
const __half kZERO = __int2half_rz(0);
|
||||
__half tpVal = kZERO;
|
||||
__half col = kZERO;
|
||||
|
||||
for (int32_t lCol = 0; lCol < numLevels; ++lCol)
|
||||
{
|
||||
int32_t const levelStartId = dataLevelStartIndex[lCol];
|
||||
int32_t const spatialHPtr = lCol << 1;
|
||||
int32_t const spatialH = dataSpatialShapes[spatialHPtr];
|
||||
int32_t const spatialW = dataSpatialShapes[spatialHPtr + 1];
|
||||
const __half spatialHHalf = __int2half_rd(spatialH);
|
||||
const __half spatialWHalf = __int2half_rd(spatialW);
|
||||
const __half* dataValuePtr = dataValue + (dataValuePtrInitOffset + levelStartId * qidStride);
|
||||
for (int32_t pCol = 0; pCol < numPoint; ++pCol)
|
||||
{
|
||||
const __half locW = dataSamplingLoc[dataLocWPtr];
|
||||
const __half locH = dataSamplingLoc[dataLocWPtr + 1];
|
||||
const __half weight = dataAttnWeight[dataWeightPtr];
|
||||
for (int32_t lCol = 0; lCol < numLevels; ++lCol) {
|
||||
int32_t const levelStartId = dataLevelStartIndex[lCol];
|
||||
int32_t const spatialHPtr = lCol << 1;
|
||||
int32_t const spatialH = dataSpatialShapes[spatialHPtr];
|
||||
int32_t const spatialW = dataSpatialShapes[spatialHPtr + 1];
|
||||
const __half spatialHHalf = __int2half_rd(spatialH);
|
||||
const __half spatialWHalf = __int2half_rd(spatialW);
|
||||
const __half* dataValuePtr = dataValue + (dataValuePtrInitOffset + levelStartId * qidStride);
|
||||
for (int32_t pCol = 0; pCol < numPoint; ++pCol) {
|
||||
const __half locW = dataSamplingLoc[dataLocWPtr];
|
||||
const __half locH = dataSamplingLoc[dataLocWPtr + 1];
|
||||
const __half weight = dataAttnWeight[dataWeightPtr];
|
||||
#if __CUDA_ARCH__ >= 530
|
||||
const __half hIm = __hsub(__hmul(locH, spatialHHalf), kZERO_POINT_FIVE);
|
||||
const __half wIm = __hsub(__hmul(locW, spatialWHalf), kZERO_POINT_FIVE);
|
||||
const __half hIm = __hsub(__hmul(locH, spatialHHalf), kZERO_POINT_FIVE);
|
||||
const __half wIm = __hsub(__hmul(locW, spatialWHalf), kZERO_POINT_FIVE);
|
||||
|
||||
if (__hgt(hIm, kMINUS_ONE) && __hgt(wIm, kMINUS_ONE) && __hlt(hIm, spatialHHalf)
|
||||
&& __hlt(wIm, spatialWHalf))
|
||||
{
|
||||
tpVal = ms_deform_attn_im2col_bilinear(
|
||||
dataValuePtr, spatialH, spatialW, numHeads, channels, hIm, wIm, mCol, cCol);
|
||||
col = __hadd(col, __hmul(tpVal, weight));
|
||||
}
|
||||
#else
|
||||
const __half hIm = __float2half(__half2float(locH) * __half2float(spatialHHalf) - __half2float(kZERO_POINT_FIVE));
|
||||
const __half wIm = __float2half(__half2float(locW) * __half2float(spatialWHalf) - __half2float(kZERO_POINT_FIVE));
|
||||
|
||||
if((__half2float(hIm)>__half2float(kMINUS_ONE)) && (__half2float(wIm)>__half2float(kMINUS_ONE))
|
||||
&& (__half2float(hIm)<__half2float(spatialHHalf)) && (__half2float(wIm)<__half2float(spatialWHalf)))
|
||||
{
|
||||
tpVal = ms_deform_attn_im2col_bilinear(
|
||||
dataValuePtr, spatialH, spatialW, numHeads, channels, hIm, wIm, mCol, cCol);
|
||||
col = __float2half(__half2float(col) + (__half2float(tpVal) * __half2float(weight)));
|
||||
}
|
||||
#endif
|
||||
dataWeightPtr += 1;
|
||||
dataLocWPtr += 2;
|
||||
}
|
||||
if (__hgt(hIm, kMINUS_ONE) && __hgt(wIm, kMINUS_ONE) && __hlt(hIm, spatialHHalf) &&
|
||||
__hlt(wIm, spatialWHalf)) {
|
||||
tpVal = ms_deform_attn_im2col_bilinear(dataValuePtr, spatialH, spatialW, numHeads,
|
||||
channels, hIm, wIm, mCol, cCol);
|
||||
col = __hadd(col, __hmul(tpVal, weight));
|
||||
}
|
||||
*dataColPtr = col;
|
||||
#else
|
||||
const __half hIm = __float2half(__half2float(locH) * __half2float(spatialHHalf) -
|
||||
__half2float(kZERO_POINT_FIVE));
|
||||
const __half wIm = __float2half(__half2float(locW) * __half2float(spatialWHalf) -
|
||||
__half2float(kZERO_POINT_FIVE));
|
||||
|
||||
if ((__half2float(hIm) > __half2float(kMINUS_ONE)) &&
|
||||
(__half2float(wIm) > __half2float(kMINUS_ONE)) &&
|
||||
(__half2float(hIm) < __half2float(spatialHHalf)) &&
|
||||
(__half2float(wIm) < __half2float(spatialWHalf))) {
|
||||
tpVal = ms_deform_attn_im2col_bilinear(dataValuePtr, spatialH, spatialW, numHeads,
|
||||
channels, hIm, wIm, mCol, cCol);
|
||||
col = __float2half(__half2float(col) + (__half2float(tpVal) * __half2float(weight)));
|
||||
}
|
||||
#endif
|
||||
dataWeightPtr += 1;
|
||||
dataLocWPtr += 2;
|
||||
}
|
||||
}
|
||||
*dataColPtr = col;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -5,9 +5,11 @@
|
|||
#include <cuda_runtime.h>
|
||||
|
||||
template <typename scalar_t>
|
||||
int32_t ms_deform_attn_cuda_forward(const scalar_t* value, const int32_t* spatialShapes, const int32_t* levelStartIndex,
|
||||
const scalar_t* samplingLoc, const scalar_t* attnWeight, scalar_t* output, int32_t batch, int32_t mSpatialSize,
|
||||
int32_t mNumHeads, int32_t mChannels, int32_t mNumLevels, int32_t mNumQuery, int32_t mNumPoint,
|
||||
cudaStream_t stream);
|
||||
int32_t ms_deform_attn_cuda_forward(const scalar_t* value, const int32_t* spatialShapes,
|
||||
const int32_t* levelStartIndex, const scalar_t* samplingLoc,
|
||||
const scalar_t* attnWeight, scalar_t* output, int32_t batch,
|
||||
int32_t mSpatialSize, int32_t mNumHeads, int32_t mChannels,
|
||||
int32_t mNumLevels, int32_t mNumQuery, int32_t mNumPoint,
|
||||
cudaStream_t stream);
|
||||
|
||||
#endif
|
||||
|
|
|
@ -17,6 +17,10 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "ocr_recognition", "ocr_reco
|
|||
EndProject
|
||||
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "pose_detection", "pose_detection\pose_detection.csproj", "{10E3B87C-7544-4F4D-90A3-65D5654CBF94}"
|
||||
EndProject
|
||||
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "pose_tracker", "pose_tracker\pose_tracker.csproj", "{42FC54A1-73D5-429D-AF5E-09BAEC4F0D9E}"
|
||||
EndProject
|
||||
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "rotated_detection", "rotated_detection\rotated_detection.csproj", "{1957C2D2-F6D1-4E28-920C-2B7DE98EAF50}"
|
||||
EndProject
|
||||
Global
|
||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
||||
Debug|Any CPU = Debug|Any CPU
|
||||
|
@ -51,6 +55,14 @@ Global
|
|||
{10E3B87C-7544-4F4D-90A3-65D5654CBF94}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
||||
{10E3B87C-7544-4F4D-90A3-65D5654CBF94}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
||||
{10E3B87C-7544-4F4D-90A3-65D5654CBF94}.Release|Any CPU.Build.0 = Release|Any CPU
|
||||
{42FC54A1-73D5-429D-AF5E-09BAEC4F0D9E}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
|
||||
{42FC54A1-73D5-429D-AF5E-09BAEC4F0D9E}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
||||
{42FC54A1-73D5-429D-AF5E-09BAEC4F0D9E}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
||||
{42FC54A1-73D5-429D-AF5E-09BAEC4F0D9E}.Release|Any CPU.Build.0 = Release|Any CPU
|
||||
{1957C2D2-F6D1-4E28-920C-2B7DE98EAF50}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
|
||||
{1957C2D2-F6D1-4E28-920C-2B7DE98EAF50}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
||||
{1957C2D2-F6D1-4E28-920C-2B7DE98EAF50}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
||||
{1957C2D2-F6D1-4E28-920C-2B7DE98EAF50}.Release|Any CPU.Build.0 = Release|Any CPU
|
||||
EndGlobalSection
|
||||
GlobalSection(SolutionProperties) = preSolution
|
||||
HideSolutionNode = FALSE
|
||||
|
|
|
@ -0,0 +1,203 @@
|
|||
using System;
|
||||
using System.Collections.Generic;
|
||||
using OpenCvSharp;
|
||||
using MMDeploy;
|
||||
using System.Linq;
|
||||
|
||||
namespace pose_tracker
|
||||
{
|
||||
internal class Program
|
||||
{
|
||||
static class CocoSkeleton
|
||||
{
|
||||
public static List<(int, int)> Skeleton = new List<(int, int)>
|
||||
{
|
||||
(15, 13), (13, 11), (16, 14), (14, 12), (11, 12), (5, 11), (6, 12),
|
||||
(5, 6), (5, 7), (6, 8), (7, 9), (8, 10), (1, 2), (0, 1),
|
||||
(0, 2), (1, 3), (2, 4), (3, 5), (4, 6)
|
||||
};
|
||||
|
||||
public static List<Scalar> Palette = new List<Scalar>
|
||||
{
|
||||
new Scalar(255, 128, 0), new Scalar(255, 153, 51), new Scalar(255, 178, 102),
|
||||
new Scalar(230, 230, 0), new Scalar(255, 153, 255), new Scalar(153, 204, 255),
|
||||
new Scalar(255, 102, 255), new Scalar(255, 51, 255), new Scalar(102, 178, 255),
|
||||
new Scalar(51, 153, 255), new Scalar(255, 153, 153), new Scalar(255, 102, 102),
|
||||
new Scalar(255, 51, 51), new Scalar(153, 255, 153), new Scalar(102, 255, 102),
|
||||
new Scalar(51, 255, 51), new Scalar(0, 255, 0), new Scalar(0, 0, 255),
|
||||
new Scalar(255, 0, 0), new Scalar(255, 255, 255),
|
||||
};
|
||||
|
||||
public static List<int> LinkColor = new List<int>
|
||||
{
|
||||
0, 0, 0, 0, 7, 7, 7, 9, 9, 9, 9, 9, 16, 16, 16, 16, 16, 16, 16
|
||||
};
|
||||
|
||||
public static List<int> PointColor = new List<int>
|
||||
{
|
||||
16, 16, 16, 16, 16, 9, 9, 9, 9, 9, 9, 0, 0, 0, 0, 0, 0
|
||||
};
|
||||
}
|
||||
|
||||
static bool Visualize(OpenCvSharp.Mat frame, PoseTrackerOutput result, int long_edge,
|
||||
int frame_id, bool with_bbox)
|
||||
{
|
||||
var skeleton = CocoSkeleton.Skeleton;
|
||||
var palette = CocoSkeleton.Palette;
|
||||
var link_color = CocoSkeleton.LinkColor;
|
||||
var point_color = CocoSkeleton.PointColor;
|
||||
float scale = 1;
|
||||
if (long_edge != 0)
|
||||
{
|
||||
scale = (float)long_edge / (float)Math.Max(frame.Cols, frame.Rows);
|
||||
}
|
||||
if (scale != 1)
|
||||
{
|
||||
Cv2.Resize(frame, frame, new Size(), scale, scale);
|
||||
}
|
||||
else
|
||||
{
|
||||
frame = frame.Clone();
|
||||
}
|
||||
|
||||
Action<List<float>, Scalar> drawBbox = (bbox, color) =>
|
||||
{
|
||||
for (int i = 0; i < bbox.Count; i++)
|
||||
{
|
||||
bbox[i] *= scale;
|
||||
}
|
||||
Cv2.Rectangle(frame, new OpenCvSharp.Point(bbox[0], bbox[1]),
|
||||
new OpenCvSharp.Point(bbox[2], bbox[3]), color);
|
||||
};
|
||||
|
||||
for (int i = 0; i < result.Count; i++)
|
||||
{
|
||||
PoseTrack pt = result.Results[i];
|
||||
for (int j = 0; j < pt.Keypoints.Count; j++)
|
||||
{
|
||||
Pointf p = pt.Keypoints[j];
|
||||
p.X *= scale;
|
||||
p.Y *= scale;
|
||||
pt.Keypoints[j] = p;
|
||||
}
|
||||
float score_thr = 0.5f;
|
||||
int[] used = new int[pt.Keypoints.Count * 2];
|
||||
for (int j = 0; j < skeleton.Count; j++)
|
||||
{
|
||||
int u = skeleton[j].Item1;
|
||||
int v = skeleton[j].Item2;
|
||||
if (pt.Scores[u] > score_thr && pt.Scores[v] > score_thr)
|
||||
{
|
||||
used[u] = used[v] = 1;
|
||||
var p_u = new OpenCvSharp.Point(pt.Keypoints[u].X, pt.Keypoints[u].Y);
|
||||
var p_v = new OpenCvSharp.Point(pt.Keypoints[v].X, pt.Keypoints[v].Y);
|
||||
Cv2.Line(frame, p_u, p_v, palette[link_color[j]], 1, LineTypes.AntiAlias);
|
||||
}
|
||||
}
|
||||
for (int j = 0; j < pt.Keypoints.Count; j++)
|
||||
{
|
||||
if (used[j] == 1)
|
||||
{
|
||||
var p = new OpenCvSharp.Point(pt.Keypoints[j].X, pt.Keypoints[j].Y);
|
||||
Cv2.Circle(frame, p, 1, palette[point_color[j]], 2, LineTypes.AntiAlias);
|
||||
}
|
||||
}
|
||||
if (with_bbox)
|
||||
{
|
||||
var bbox = new List<float> { pt.Bbox.Left, pt.Bbox.Top, pt.Bbox.Right, pt.Bbox.Bottom };
|
||||
drawBbox(bbox, new Scalar(0, 255, 0));
|
||||
}
|
||||
}
|
||||
|
||||
Cv2.ImShow("pose_tracker", frame);
|
||||
return Cv2.WaitKey(1) != 'q';
|
||||
}
|
||||
static void CvMatToMat(OpenCvSharp.Mat cvMat, out MMDeploy.Mat mat)
|
||||
{
|
||||
mat = new MMDeploy.Mat();
|
||||
unsafe
|
||||
{
|
||||
mat.Data = cvMat.DataPointer;
|
||||
mat.Height = cvMat.Height;
|
||||
mat.Width = cvMat.Width;
|
||||
mat.Channel = cvMat.Dims;
|
||||
mat.Format = PixelFormat.BGR;
|
||||
mat.Type = DataType.Int8;
|
||||
mat.Device = null;
|
||||
}
|
||||
}
|
||||
|
||||
static void PrintHelperMessage()
|
||||
{
|
||||
string message = "usage:\n pose_tracker device det_model pose_model video";
|
||||
Console.WriteLine(message);
|
||||
}
|
||||
|
||||
static void Main(string[] args)
|
||||
{
|
||||
if (args.Length != 4)
|
||||
{
|
||||
PrintHelperMessage();
|
||||
Environment.Exit(1);
|
||||
}
|
||||
|
||||
string device_ = args[0];
|
||||
string det_model_ = args[1];
|
||||
string pose_model_ = args[2];
|
||||
string video = args[3];
|
||||
|
||||
Model det_model = new Model(det_model_);
|
||||
Model pose_model = new Model(pose_model_);
|
||||
Device device = new Device(device_);
|
||||
Context context = new Context(device);
|
||||
|
||||
// initialize tracker
|
||||
PoseTracker tracker = new PoseTracker(det_model, pose_model, context);
|
||||
|
||||
PoseTracker.Params param = new PoseTracker.Params();
|
||||
// set default param
|
||||
param.Init();
|
||||
// set custom param
|
||||
param.DetMinBboxSize = 100;
|
||||
param.DetInterval = 1;
|
||||
param.PoseMaxNumBboxes = 6;
|
||||
// optionally use OKS for keypoints similarity comparison
|
||||
float[] sigmas = {0.026f, 0.025f, 0.025f, 0.035f, 0.035f, 0.079f, 0.079f, 0.072f, 0.072f,
|
||||
0.062f, 0.062f, 0.107f, 0.107f, 0.087f, 0.087f, 0.089f, 0.089f };
|
||||
param.SetKeypointSigmas(sigmas);
|
||||
|
||||
// create state
|
||||
PoseTracker.State state = tracker.CreateState(param);
|
||||
|
||||
VideoCapture cap = new VideoCapture(video);
|
||||
if (!cap.IsOpened())
|
||||
{
|
||||
Console.WriteLine("failed to open video: " + video);
|
||||
Environment.Exit(1);
|
||||
}
|
||||
|
||||
int frame_id = 0;
|
||||
OpenCvSharp.Mat frame = new OpenCvSharp.Mat();
|
||||
while (true)
|
||||
{
|
||||
cap.Read(frame);
|
||||
if (frame.Empty())
|
||||
{
|
||||
break;
|
||||
}
|
||||
CvMatToMat(frame, out var mat);
|
||||
// process
|
||||
PoseTrackerOutput result = tracker.Apply(state, mat);
|
||||
|
||||
// visualize
|
||||
if (!Visualize(frame, result, 0, frame_id++, true))
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
param.DeleteKeypointSigmas();
|
||||
tracker.Close();
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,22 @@
|
|||
<Project Sdk="Microsoft.NET.Sdk">
|
||||
|
||||
<PropertyGroup>
|
||||
<OutputType>Exe</OutputType>
|
||||
<TargetFramework>netcoreapp3.1</TargetFramework>
|
||||
</PropertyGroup>
|
||||
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|AnyCPU'">
|
||||
<AllowUnsafeBlocks>true</AllowUnsafeBlocks>
|
||||
</PropertyGroup>
|
||||
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|AnyCPU'">
|
||||
<AllowUnsafeBlocks>true</AllowUnsafeBlocks>
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<PackageReference Include="MMDeployCSharp" Version="0.12.0" />
|
||||
<PackageReference Include="OpenCvSharp4" Version="4.5.5.20211231" />
|
||||
<PackageReference Include="OpenCvSharp4.runtime.win" Version="4.5.5.20211231" />
|
||||
</ItemGroup>
|
||||
|
||||
</Project>
|
|
@ -0,0 +1,83 @@
|
|||
using System;
|
||||
using System.Collections.Generic;
|
||||
using OpenCvSharp;
|
||||
using MMDeploy;
|
||||
|
||||
namespace object_detection
|
||||
{
|
||||
class Program
|
||||
{
|
||||
static void CvMatToMat(OpenCvSharp.Mat[] cvMats, out MMDeploy.Mat[] mats)
|
||||
{
|
||||
mats = new MMDeploy.Mat[cvMats.Length];
|
||||
unsafe
|
||||
{
|
||||
for (int i = 0; i < cvMats.Length; i++)
|
||||
{
|
||||
mats[i].Data = cvMats[i].DataPointer;
|
||||
mats[i].Height = cvMats[i].Height;
|
||||
mats[i].Width = cvMats[i].Width;
|
||||
mats[i].Channel = cvMats[i].Dims;
|
||||
mats[i].Format = PixelFormat.BGR;
|
||||
mats[i].Type = DataType.Int8;
|
||||
mats[i].Device = null;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void CvWaitKey()
|
||||
{
|
||||
Cv2.WaitKey();
|
||||
}
|
||||
|
||||
static void Main(string[] args)
|
||||
{
|
||||
if (args.Length != 3)
|
||||
{
|
||||
Console.WriteLine("usage:\n object_detection deviceName modelPath imagePath\n");
|
||||
Environment.Exit(1);
|
||||
}
|
||||
|
||||
string deviceName = args[0];
|
||||
string modelPath = args[1];
|
||||
string imagePath = args[2];
|
||||
|
||||
// 1. create handle
|
||||
RotatedDetector handle = new RotatedDetector(modelPath, deviceName, 0);
|
||||
|
||||
// 2. prepare input
|
||||
OpenCvSharp.Mat[] imgs = new OpenCvSharp.Mat[1] { Cv2.ImRead(imagePath, ImreadModes.Color) };
|
||||
CvMatToMat(imgs, out var mats);
|
||||
|
||||
// 3. process
|
||||
List<RotatedDetectorOutput> output = handle.Apply(mats);
|
||||
|
||||
// 4. show result
|
||||
foreach (var obj in output[0].Results)
|
||||
{
|
||||
if (obj.Score < 0.1)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
float xc = obj.Cx;
|
||||
float yc = obj.Cy;
|
||||
float wx = obj.Width / 2 * (float)Math.Cos(obj.Angle);
|
||||
float wy = obj.Width / 2 * (float)Math.Sin(obj.Angle);
|
||||
float hx = -obj.Height / 2 * (float)Math.Sin(obj.Angle);
|
||||
float hy = obj.Height / 2 * (float)Math.Cos(obj.Angle);
|
||||
OpenCvSharp.Point p1 = new OpenCvSharp.Point(xc - wx - hx, yc - wy - hy);
|
||||
OpenCvSharp.Point p2 = new OpenCvSharp.Point(xc + wx - hx, yc + wy - hy);
|
||||
OpenCvSharp.Point p3 = new OpenCvSharp.Point(xc + wx + hx, yc + wy + hy);
|
||||
OpenCvSharp.Point p4 = new OpenCvSharp.Point(xc - wx + hx, yc - wy + hy);
|
||||
var contours = new OpenCvSharp.Point[1][];
|
||||
contours[0] = new OpenCvSharp.Point[4] { p1, p2, p3, p4 };
|
||||
Cv2.DrawContours(imgs[0], contours, -1, new Scalar(0, 255, 0), 2);
|
||||
}
|
||||
Cv2.NamedWindow("mmrotate", WindowFlags.GuiExpanded);
|
||||
Cv2.ImShow("mmrotate", imgs[0]);
|
||||
CvWaitKey();
|
||||
|
||||
handle.Close();
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,22 @@
|
|||
<Project Sdk="Microsoft.NET.Sdk">
|
||||
|
||||
<PropertyGroup>
|
||||
<OutputType>Exe</OutputType>
|
||||
<TargetFramework>netcoreapp3.1</TargetFramework>
|
||||
</PropertyGroup>
|
||||
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|AnyCPU'">
|
||||
<AllowUnsafeBlocks>true</AllowUnsafeBlocks>
|
||||
</PropertyGroup>
|
||||
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|AnyCPU'">
|
||||
<AllowUnsafeBlocks>true</AllowUnsafeBlocks>
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<PackageReference Include="MMDeployCSharp" Version="0.12.0" />
|
||||
<PackageReference Include="OpenCvSharp4" Version="4.5.5.20211231" />
|
||||
<PackageReference Include="OpenCvSharp4.runtime.win" Version="4.5.5.20211231" />
|
||||
</ItemGroup>
|
||||
|
||||
</Project>
|
Loading…
Reference in New Issue