mirror of
https://github.com/open-mmlab/mmdeploy.git
synced 2025-01-14 08:09:43 +08:00
* check in cmake * move backend_ops to csrc/backend_ops * check in preprocess, model, some codebase and their c-apis * check in CMakeLists.txt * check in parts of test_csrc * commit everything else * add readme * update core's BUILD_INTERFACE directory * skip codespell on third_party * update trt_net and ort_net's CMakeLists * ignore clion's build directory * check in pybind11 * add onnx.proto. Remove MMDeploy's dependency on ncnn's source code * export MMDeployTargets only when MMDEPLOY_BUILD_SDK is ON * remove useless message * target include directory is wrong * change target name from mmdeploy_ppl_net to mmdeploy_pplnn_net * skip install directory * update project's cmake * remove useless code * set CMAKE_BUILD_TYPE to Release by force if it isn't set by user * update custom ops CMakeLists * pass object target's source lists * fix lint end-of-file * fix lint: trailing whitespace * fix codespell hook * remove bicubic_interpolate to csrc/backend_ops/ * set MMDEPLOY_BUILD_SDK OFF * change custom ops build command * add spdlog installation command * update docs on how to checkout pybind11 * move bicubic_interpolate to backend_ops/tensorrt directory * remove useless code * correct cmake * fix typo * fix typo * fix install directory * correct sdk's readme * set cub dir when cuda version < 11.0 * change directory where clang-format will apply to * fix build command * add .clang-format * change clang-format style from google to file * reformat csrc/backend_ops * format sdk's code * turn off clang-format for some files * add -Xcompiler=-fno-gnu-unique * fix trt topk initialize * check in config for sdk demo * update cmake script and csrc's readme * correct config's path * add cuda include directory, otherwise compile failed in case of tensorrt8.2 * clang-format onnx2ncnn.cpp Co-authored-by: zhangli <lzhang329@gmail.com> Co-authored-by: grimoire <yaoqian@sensetime.com>
96 lines
4.2 KiB
C++
96 lines
4.2 KiB
C++
// Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
|
// modify from
|
|
// https://github.com/NVIDIA/TensorRT/tree/master/plugin/batchedNMSPlugin
|
|
#ifndef TRT_KERNEL_H
|
|
#define TRT_KERNEL_H
|
|
|
|
#include <cuda_runtime.h>
|
|
|
|
#include <cassert>
|
|
#include <cstdio>
|
|
|
|
#include "cublas_v2.h"
|
|
#include "trt_plugin_helper.hpp"
|
|
|
|
using namespace nvinfer1;
|
|
#define DEBUG_ENABLE 0
|
|
|
|
template <typename T>
|
|
struct Bbox {
|
|
T xmin, ymin, xmax, ymax;
|
|
Bbox(T xmin, T ymin, T xmax, T ymax) : xmin(xmin), ymin(ymin), xmax(xmax), ymax(ymax) {}
|
|
Bbox() = default;
|
|
};
|
|
|
|
template <typename T>
|
|
struct BboxInfo {
|
|
T conf_score;
|
|
int label;
|
|
int bbox_idx;
|
|
bool kept;
|
|
BboxInfo(T conf_score, int label, int bbox_idx, bool kept)
|
|
: conf_score(conf_score), label(label), bbox_idx(bbox_idx), kept(kept) {}
|
|
BboxInfo() = default;
|
|
};
|
|
|
|
int8_t* alignPtr(int8_t* ptr, uintptr_t to);
|
|
|
|
int8_t* nextWorkspacePtr(int8_t* ptr, uintptr_t previousWorkspaceSize);
|
|
|
|
void setUniformOffsets(cudaStream_t stream, int num_segments, int offset, int* d_offsets);
|
|
|
|
pluginStatus_t allClassNMS(cudaStream_t stream, int num, int num_classes, int num_preds_per_class,
|
|
int top_k, float nms_threshold, bool share_location, bool isNormalized,
|
|
DataType DT_SCORE, DataType DT_BBOX, void* bbox_data,
|
|
void* beforeNMS_scores, void* beforeNMS_index_array,
|
|
void* afterNMS_scores, void* afterNMS_index_array, bool flipXY = false);
|
|
|
|
size_t detectionForwardBBoxDataSize(int N, int C1, DataType DT_BBOX);
|
|
|
|
size_t detectionForwardBBoxPermuteSize(bool shareLocation, int N, int C1, DataType DT_BBOX);
|
|
|
|
size_t sortScoresPerClassWorkspaceSize(int num, int num_classes, int num_preds_per_class,
|
|
DataType DT_CONF);
|
|
|
|
size_t sortScoresPerImageWorkspaceSize(int num_images, int num_items_per_image, DataType DT_SCORE);
|
|
|
|
pluginStatus_t sortScoresPerImage(cudaStream_t stream, int num_images, int num_items_per_image,
|
|
DataType DT_SCORE, void* unsorted_scores,
|
|
void* unsorted_bbox_indices, void* sorted_scores,
|
|
void* sorted_bbox_indices, void* workspace);
|
|
|
|
pluginStatus_t sortScoresPerClass(cudaStream_t stream, int num, int num_classes,
|
|
int num_preds_per_class, int background_label_id,
|
|
float confidence_threshold, DataType DT_SCORE,
|
|
void* conf_scores_gpu, void* index_array_gpu, void* workspace);
|
|
|
|
size_t calculateTotalWorkspaceSize(size_t* workspaces, int count);
|
|
|
|
pluginStatus_t permuteData(cudaStream_t stream, int nthreads, int num_classes, int num_data,
|
|
int num_dim, DataType DT_DATA, bool confSigmoid, const void* data,
|
|
void* new_data);
|
|
|
|
size_t detectionForwardPreNMSSize(int N, int C2);
|
|
|
|
size_t detectionForwardPostNMSSize(int N, int numClasses, int topK);
|
|
|
|
pluginStatus_t gatherNMSOutputs(cudaStream_t stream, bool shareLocation, int numImages,
|
|
int numPredsPerClass, int numClasses, int topK, int keepTopK,
|
|
DataType DT_BBOX, DataType DT_SCORE, const void* indices,
|
|
const void* scores, const void* bboxData, void* nmsedDets,
|
|
void* nmsedLabels, bool clipBoxes = true);
|
|
|
|
size_t detectionInferenceWorkspaceSize(bool shareLocation, int N, int C1, int C2, int numClasses,
|
|
int numPredsPerClass, int topK, DataType DT_BBOX,
|
|
DataType DT_SCORE);
|
|
|
|
pluginStatus_t nmsInference(cudaStream_t stream, int N, int boxesSize, int scoresSize,
|
|
bool shareLocation, int backgroundLabelId, int numPredsPerClass,
|
|
int numClasses, int topK, int keepTopK, float scoreThreshold,
|
|
float iouThreshold, DataType DT_BBOX, const void* locData,
|
|
DataType DT_SCORE, const void* confData, void* nmsedDets,
|
|
void* nmsedLabels, void* workspace, bool isNormalized = true,
|
|
bool confSigmoid = false, bool clipBoxes = true);
|
|
|
|
#endif
|