[Feature] Multi level roi align (#9)
* add multi level roi align * better trt plugin code * fix lint * optimizer trt_multi_level_roi_align.cpp * fix lintpull/12/head
parent
dae6a8ccf9
commit
8408ae2bcf
|
@ -51,7 +51,8 @@ INCLUDE_DIRECTORIES(${CUB_ROOT_DIR})
|
|||
set(PLUGIN_LISTS scatternd
|
||||
nms
|
||||
roi_align
|
||||
batched_nms)
|
||||
batched_nms
|
||||
multi_level_roi_align)
|
||||
|
||||
foreach(PLUGIN_ITER ${PLUGIN_LISTS})
|
||||
file(GLOB PLUGIN_OPS_SRCS ${PLUGIN_ITER}/*.cpp ${PLUGIN_ITER}/*.cu)
|
||||
|
@ -59,11 +60,10 @@ foreach(PLUGIN_ITER ${PLUGIN_LISTS})
|
|||
set(BACKEND_OPS_SRCS ${BACKEND_OPS_SRCS} ${PLUGIN_OPS_SRCS} ${PLUGIN_OPS_HEADS})
|
||||
endforeach(PLUGIN_ITER)
|
||||
|
||||
list(APPEND BACKEND_OPS_SRCS "${CMAKE_CURRENT_SOURCE_DIR}/trt_plugin.cpp")
|
||||
list(APPEND BACKEND_OPS_SRCS "${CMAKE_CURRENT_SOURCE_DIR}/common_impl/trt_cuda_helper.cu")
|
||||
|
||||
set(INFER_PLUGIN_LIB ${TENSORRT_LIBRARY})
|
||||
|
||||
cuda_add_library(${SHARED_TARGET} SHARED ${BACKEND_OPS_SRCS})
|
||||
cuda_add_library(${SHARED_TARGET} MODULE ${BACKEND_OPS_SRCS})
|
||||
target_link_libraries(${SHARED_TARGET} ${INFER_PLUGIN_LIB})
|
||||
target_include_directories(${SHARED_TARGET} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/common)
|
||||
|
|
|
@ -16,9 +16,6 @@ static const char* NMS_PLUGIN_VERSION{"1"};
|
|||
static const char* NMS_PLUGIN_NAME{"TRTBatchedNMS"};
|
||||
} // namespace
|
||||
|
||||
PluginFieldCollection TRTBatchedNMSPluginDynamicCreator::mFC{};
|
||||
std::vector<PluginField> TRTBatchedNMSPluginDynamicCreator::mPluginAttributes;
|
||||
|
||||
TRTBatchedNMSPluginDynamic::TRTBatchedNMSPluginDynamic(NMSParameters params)
|
||||
: param(params) {}
|
||||
|
||||
|
@ -268,3 +265,5 @@ void TRTBatchedNMSPluginDynamicCreator::setPluginNamespace(
|
|||
const char* TRTBatchedNMSPluginDynamicCreator::getPluginNamespace() const {
|
||||
return mNamespace.c_str();
|
||||
}
|
||||
|
||||
REGISTER_TENSORRT_PLUGIN(TRTBatchedNMSPluginDynamicCreator);
|
||||
|
|
|
@ -109,9 +109,9 @@ class TRTBatchedNMSPluginDynamicCreator : public nvinfer1::IPluginCreator {
|
|||
const char* getPluginNamespace() const override;
|
||||
|
||||
private:
|
||||
static nvinfer1::PluginFieldCollection mFC;
|
||||
nvinfer1::PluginFieldCollection mFC;
|
||||
nvinfer1::plugin::NMSParameters params;
|
||||
static std::vector<nvinfer1::PluginField> mPluginAttributes;
|
||||
std::vector<nvinfer1::PluginField> mPluginAttributes;
|
||||
std::string mNamespace;
|
||||
};
|
||||
|
||||
|
|
|
@ -0,0 +1,260 @@
|
|||
|
||||
#include "trt_multi_level_roi_align.hpp"
|
||||
|
||||
#include <assert.h>
|
||||
|
||||
#include <chrono>
|
||||
|
||||
#include "trt_multi_level_roi_align_kernel.hpp"
|
||||
#include "trt_serialize.hpp"
|
||||
|
||||
namespace {
|
||||
static const char *PLUGIN_VERSION{"1"};
|
||||
static const char *PLUGIN_NAME{"MMCVMultiLevelRoiAlign"};
|
||||
} // namespace
|
||||
|
||||
MultiLevelRoiAlignPluginDynamic::MultiLevelRoiAlignPluginDynamic(
|
||||
const std::string &name, int alignedHeight, int alignedWidth, int sampleNum,
|
||||
const std::vector<float> &featmapStrides, float roiScaleFactor,
|
||||
int finestScale, bool aligned)
|
||||
: mLayerName(name),
|
||||
mAlignedHeight(alignedHeight),
|
||||
mAlignedWidth(alignedWidth),
|
||||
mSampleNum(sampleNum),
|
||||
mFeatmapStrides(featmapStrides),
|
||||
mRoiScaleFactor(roiScaleFactor),
|
||||
mFinestScale(finestScale),
|
||||
mAligned(aligned) {}
|
||||
|
||||
MultiLevelRoiAlignPluginDynamic::MultiLevelRoiAlignPluginDynamic(
|
||||
const std::string name, const void *data, size_t length)
|
||||
: mLayerName(name) {
|
||||
deserialize_value(&data, &length, &mAlignedHeight);
|
||||
deserialize_value(&data, &length, &mAlignedWidth);
|
||||
deserialize_value(&data, &length, &mSampleNum);
|
||||
deserialize_value(&data, &length, &mRoiScaleFactor);
|
||||
deserialize_value(&data, &length, &mFinestScale);
|
||||
deserialize_value(&data, &length, &mAligned);
|
||||
deserialize_value(&data, &length, &mFeatmapStrides);
|
||||
}
|
||||
|
||||
nvinfer1::IPluginV2DynamicExt *MultiLevelRoiAlignPluginDynamic::clone() const {
|
||||
MultiLevelRoiAlignPluginDynamic *plugin = new MultiLevelRoiAlignPluginDynamic(
|
||||
mLayerName, mAlignedHeight, mAlignedWidth, mSampleNum, mFeatmapStrides,
|
||||
mRoiScaleFactor, mFinestScale, mAligned);
|
||||
plugin->setPluginNamespace(getPluginNamespace());
|
||||
|
||||
return plugin;
|
||||
}
|
||||
|
||||
nvinfer1::DimsExprs MultiLevelRoiAlignPluginDynamic::getOutputDimensions(
|
||||
int outputIndex, const nvinfer1::DimsExprs *inputs, int nbInputs,
|
||||
nvinfer1::IExprBuilder &exprBuilder) {
|
||||
assert(nbInputs == mFeatmapStrides.size() + 1);
|
||||
|
||||
nvinfer1::DimsExprs ret;
|
||||
ret.nbDims = 4;
|
||||
ret.d[0] = inputs[0].d[0];
|
||||
ret.d[1] = inputs[1].d[1];
|
||||
ret.d[2] = exprBuilder.constant(mAlignedHeight);
|
||||
ret.d[3] = exprBuilder.constant(mAlignedWidth);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
bool MultiLevelRoiAlignPluginDynamic::supportsFormatCombination(
|
||||
int pos, const nvinfer1::PluginTensorDesc *inOut, int nbInputs,
|
||||
int nbOutputs) {
|
||||
const auto *in = inOut;
|
||||
const auto *out = inOut + nbInputs;
|
||||
return inOut[pos].type == nvinfer1::DataType::kFLOAT &&
|
||||
inOut[pos].format == nvinfer1::TensorFormat::kLINEAR;
|
||||
}
|
||||
|
||||
void MultiLevelRoiAlignPluginDynamic::configurePlugin(
|
||||
const nvinfer1::DynamicPluginTensorDesc *inputs, int nbInputs,
|
||||
const nvinfer1::DynamicPluginTensorDesc *outputs, int nbOutputs) {
|
||||
// Validate input arguments
|
||||
assert(nbOutputs == 1);
|
||||
assert(nbInputs == mFeatmapStrides.size() + 1);
|
||||
}
|
||||
|
||||
size_t MultiLevelRoiAlignPluginDynamic::getWorkspaceSize(
|
||||
const nvinfer1::PluginTensorDesc *inputs, int nbInputs,
|
||||
const nvinfer1::PluginTensorDesc *outputs, int nbOutputs) const {
|
||||
return 0;
|
||||
}
|
||||
|
||||
int MultiLevelRoiAlignPluginDynamic::enqueue(
|
||||
const nvinfer1::PluginTensorDesc *inputDesc,
|
||||
const nvinfer1::PluginTensorDesc *outputDesc, const void *const *inputs,
|
||||
void *const *outputs, void *workSpace, cudaStream_t stream) {
|
||||
int num_rois = inputDesc[0].dims.d[0];
|
||||
int batch_size = inputDesc[1].dims.d[0];
|
||||
int channels = inputDesc[1].dims.d[1];
|
||||
|
||||
const int kMaxFeatMap = 10;
|
||||
int heights[kMaxFeatMap];
|
||||
int widths[kMaxFeatMap];
|
||||
float strides[kMaxFeatMap];
|
||||
|
||||
int num_feats = mFeatmapStrides.size();
|
||||
for (int i = 0; i < num_feats; ++i) {
|
||||
heights[i] = inputDesc[i + 1].dims.d[2];
|
||||
widths[i] = inputDesc[i + 1].dims.d[3];
|
||||
strides[i] = mFeatmapStrides[i];
|
||||
}
|
||||
|
||||
const void *rois = inputs[0];
|
||||
const void *const *feats = inputs + 1;
|
||||
|
||||
multi_level_roi_align<float>((float *)outputs[0], (const float *)rois,
|
||||
num_rois, feats, num_feats, batch_size, channels,
|
||||
&heights[0], &widths[0], &strides[0],
|
||||
mAlignedHeight, mAlignedWidth, mSampleNum,
|
||||
mRoiScaleFactor, mFinestScale, mAligned, stream);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
nvinfer1::DataType MultiLevelRoiAlignPluginDynamic::getOutputDataType(
|
||||
int index, const nvinfer1::DataType *inputTypes, int nbInputs) const {
|
||||
return nvinfer1::DataType::kFLOAT;
|
||||
}
|
||||
|
||||
// IPluginV2 Methods
|
||||
const char *MultiLevelRoiAlignPluginDynamic::getPluginType() const {
|
||||
return PLUGIN_NAME;
|
||||
}
|
||||
|
||||
const char *MultiLevelRoiAlignPluginDynamic::getPluginVersion() const {
|
||||
return PLUGIN_VERSION;
|
||||
}
|
||||
|
||||
int MultiLevelRoiAlignPluginDynamic::getNbOutputs() const { return 1; }
|
||||
|
||||
int MultiLevelRoiAlignPluginDynamic::initialize() { return 0; }
|
||||
|
||||
void MultiLevelRoiAlignPluginDynamic::terminate() {}
|
||||
|
||||
size_t MultiLevelRoiAlignPluginDynamic::getSerializationSize() const {
|
||||
return serialized_size(mFeatmapStrides) + serialized_size(mAlignedHeight) +
|
||||
serialized_size(mAlignedWidth) + serialized_size(mSampleNum) +
|
||||
serialized_size(mRoiScaleFactor) + serialized_size(mFinestScale) +
|
||||
serialized_size(mAligned);
|
||||
}
|
||||
|
||||
void MultiLevelRoiAlignPluginDynamic::serialize(void *buffer) const {
|
||||
serialize_value(&buffer, mAlignedHeight);
|
||||
serialize_value(&buffer, mAlignedWidth);
|
||||
serialize_value(&buffer, mSampleNum);
|
||||
serialize_value(&buffer, mRoiScaleFactor);
|
||||
serialize_value(&buffer, mFinestScale);
|
||||
serialize_value(&buffer, mAligned);
|
||||
serialize_value(&buffer, mFeatmapStrides);
|
||||
}
|
||||
|
||||
void MultiLevelRoiAlignPluginDynamic::destroy() {
|
||||
// This gets called when the network containing plugin is destroyed
|
||||
delete this;
|
||||
}
|
||||
|
||||
void MultiLevelRoiAlignPluginDynamic::setPluginNamespace(
|
||||
const char *libNamespace) {
|
||||
mNamespace = libNamespace;
|
||||
}
|
||||
|
||||
const char *MultiLevelRoiAlignPluginDynamic::getPluginNamespace() const {
|
||||
return mNamespace.c_str();
|
||||
}
|
||||
|
||||
MultiLevelRoiAlignPluginDynamicCreator::
|
||||
MultiLevelRoiAlignPluginDynamicCreator() {
|
||||
mPluginAttributes = std::vector<nvinfer1::PluginField>(
|
||||
{nvinfer1::PluginField("output_height"),
|
||||
nvinfer1::PluginField("output_width"),
|
||||
nvinfer1::PluginField("sampling_ratio"),
|
||||
nvinfer1::PluginField("featmap_strides"),
|
||||
nvinfer1::PluginField("roi_scale_factor"),
|
||||
nvinfer1::PluginField("finest_scale"),
|
||||
nvinfer1::PluginField("aligned")});
|
||||
mFC.nbFields = mPluginAttributes.size();
|
||||
mFC.fields = mPluginAttributes.data();
|
||||
}
|
||||
|
||||
const char *MultiLevelRoiAlignPluginDynamicCreator::getPluginName() const {
|
||||
return PLUGIN_NAME;
|
||||
}
|
||||
|
||||
const char *MultiLevelRoiAlignPluginDynamicCreator::getPluginVersion() const {
|
||||
return PLUGIN_VERSION;
|
||||
}
|
||||
|
||||
const nvinfer1::PluginFieldCollection *
|
||||
MultiLevelRoiAlignPluginDynamicCreator::getFieldNames() {
|
||||
return &mFC;
|
||||
}
|
||||
|
||||
nvinfer1::IPluginV2 *MultiLevelRoiAlignPluginDynamicCreator::createPlugin(
|
||||
const char *name, const nvinfer1::PluginFieldCollection *fc) {
|
||||
int alignedHeight = 7;
|
||||
int alignedWidth = 7;
|
||||
int sampleNum = 2;
|
||||
std::vector<float> featmapStrides;
|
||||
float roiScaleFactor = -1;
|
||||
int finestScale = 56;
|
||||
bool aligned = false;
|
||||
|
||||
for (int i = 0; i < fc->nbFields; i++) {
|
||||
if (fc->fields[i].data == nullptr) {
|
||||
continue;
|
||||
}
|
||||
std::string field_name(fc->fields[i].name);
|
||||
|
||||
if (field_name.compare("output_height") == 0) {
|
||||
alignedHeight = static_cast<const int *>(fc->fields[i].data)[0];
|
||||
} else if (field_name.compare("output_width") == 0) {
|
||||
alignedWidth = static_cast<const int *>(fc->fields[i].data)[0];
|
||||
} else if (field_name.compare("sampling_ratio") == 0) {
|
||||
sampleNum = static_cast<const int *>(fc->fields[i].data)[0];
|
||||
} else if (field_name.compare("roi_scale_factor") == 0) {
|
||||
roiScaleFactor = static_cast<const float *>(fc->fields[i].data)[0];
|
||||
} else if (field_name.compare("finest_scale") == 0) {
|
||||
finestScale = static_cast<const int *>(fc->fields[i].data)[0];
|
||||
} else if (field_name.compare("featmap_strides") == 0) {
|
||||
int data_size = (fc->fields[i].length) / sizeof(float);
|
||||
const float *data_start = static_cast<const float *>(fc->fields[i].data);
|
||||
featmapStrides = std::vector<float>(data_start, data_start + data_size);
|
||||
} else if (field_name.compare("aligned") == 0) {
|
||||
int aligned_int = static_cast<const int *>(fc->fields[i].data)[0];
|
||||
aligned = aligned_int != 0;
|
||||
}
|
||||
}
|
||||
|
||||
assert(featmapStrides.size() != 0);
|
||||
|
||||
MultiLevelRoiAlignPluginDynamic *plugin = new MultiLevelRoiAlignPluginDynamic(
|
||||
name, alignedHeight, alignedWidth, sampleNum, featmapStrides,
|
||||
roiScaleFactor, finestScale, aligned);
|
||||
plugin->setPluginNamespace(getPluginNamespace());
|
||||
return plugin;
|
||||
}
|
||||
|
||||
nvinfer1::IPluginV2 *MultiLevelRoiAlignPluginDynamicCreator::deserializePlugin(
|
||||
const char *name, const void *serialData, size_t serialLength) {
|
||||
auto plugin =
|
||||
new MultiLevelRoiAlignPluginDynamic(name, serialData, serialLength);
|
||||
plugin->setPluginNamespace(getPluginNamespace());
|
||||
return plugin;
|
||||
}
|
||||
|
||||
void MultiLevelRoiAlignPluginDynamicCreator::setPluginNamespace(
|
||||
const char *libNamespace) {
|
||||
mNamespace = libNamespace;
|
||||
}
|
||||
|
||||
const char *MultiLevelRoiAlignPluginDynamicCreator::getPluginNamespace() const {
|
||||
return mNamespace.c_str();
|
||||
}
|
||||
|
||||
REGISTER_TENSORRT_PLUGIN(MultiLevelRoiAlignPluginDynamicCreator);
|
|
@ -0,0 +1,115 @@
|
|||
#ifndef TRT_MULTI_LEVEL_ROI_ALIGN_HPP
|
||||
#define TRT_MULTI_LEVEL_ROI_ALIGN_HPP
|
||||
|
||||
#include <cublas_v2.h>
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "trt_plugin_helper.hpp"
|
||||
|
||||
class MultiLevelRoiAlignPluginDynamic : public nvinfer1::IPluginV2DynamicExt {
|
||||
public:
|
||||
MultiLevelRoiAlignPluginDynamic(const std::string &name, int alignedHeight,
|
||||
int alignedWidth, int sampleNum,
|
||||
const std::vector<float> &featmapStrides,
|
||||
float roiScaleFactor = -1,
|
||||
int finestScale = 56, bool aligned = false);
|
||||
|
||||
MultiLevelRoiAlignPluginDynamic(const std::string name, const void *data,
|
||||
size_t length);
|
||||
|
||||
// It doesn't make sense to make MultiLevelRoiAlignPluginDynamic without
|
||||
// arguments, so we delete default constructor.
|
||||
MultiLevelRoiAlignPluginDynamic() = delete;
|
||||
|
||||
// IPluginV2DynamicExt Methods
|
||||
nvinfer1::IPluginV2DynamicExt *clone() const override;
|
||||
nvinfer1::DimsExprs getOutputDimensions(
|
||||
int outputIndex, const nvinfer1::DimsExprs *inputs, int nbInputs,
|
||||
nvinfer1::IExprBuilder &exprBuilder) override;
|
||||
bool supportsFormatCombination(int pos,
|
||||
const nvinfer1::PluginTensorDesc *inOut,
|
||||
int nbInputs, int nbOutputs) override;
|
||||
void configurePlugin(const nvinfer1::DynamicPluginTensorDesc *in,
|
||||
int nbInputs,
|
||||
const nvinfer1::DynamicPluginTensorDesc *out,
|
||||
int nbOutputs) override;
|
||||
size_t getWorkspaceSize(const nvinfer1::PluginTensorDesc *inputs,
|
||||
int nbInputs,
|
||||
const nvinfer1::PluginTensorDesc *outputs,
|
||||
int nbOutputs) const override;
|
||||
int enqueue(const nvinfer1::PluginTensorDesc *inputDesc,
|
||||
const nvinfer1::PluginTensorDesc *outputDesc,
|
||||
const void *const *inputs, void *const *outputs, void *workspace,
|
||||
cudaStream_t stream) override;
|
||||
|
||||
// IPluginV2Ext Methods
|
||||
nvinfer1::DataType getOutputDataType(int index,
|
||||
const nvinfer1::DataType *inputTypes,
|
||||
int nbInputs) const override;
|
||||
|
||||
// IPluginV2 Methods
|
||||
const char *getPluginType() const override;
|
||||
const char *getPluginVersion() const override;
|
||||
int getNbOutputs() const override;
|
||||
int initialize() override;
|
||||
void terminate() override;
|
||||
size_t getSerializationSize() const override;
|
||||
void serialize(void *buffer) const override;
|
||||
void destroy() override;
|
||||
void setPluginNamespace(const char *pluginNamespace) override;
|
||||
const char *getPluginNamespace() const override;
|
||||
|
||||
private:
|
||||
const std::string mLayerName;
|
||||
std::string mNamespace;
|
||||
|
||||
int mAlignedHeight;
|
||||
int mAlignedWidth;
|
||||
int mSampleNum;
|
||||
std::vector<float> mFeatmapStrides;
|
||||
float mRoiScaleFactor;
|
||||
int mFinestScale;
|
||||
bool mAligned;
|
||||
|
||||
protected:
|
||||
// To prevent compiler warnings.
|
||||
using nvinfer1::IPluginV2DynamicExt::canBroadcastInputAcrossBatch;
|
||||
using nvinfer1::IPluginV2DynamicExt::configurePlugin;
|
||||
using nvinfer1::IPluginV2DynamicExt::enqueue;
|
||||
using nvinfer1::IPluginV2DynamicExt::getOutputDimensions;
|
||||
using nvinfer1::IPluginV2DynamicExt::getWorkspaceSize;
|
||||
using nvinfer1::IPluginV2DynamicExt::isOutputBroadcastAcrossBatch;
|
||||
using nvinfer1::IPluginV2DynamicExt::supportsFormat;
|
||||
};
|
||||
|
||||
class MultiLevelRoiAlignPluginDynamicCreator : public nvinfer1::IPluginCreator {
|
||||
public:
|
||||
MultiLevelRoiAlignPluginDynamicCreator();
|
||||
|
||||
const char *getPluginName() const override;
|
||||
|
||||
const char *getPluginVersion() const override;
|
||||
|
||||
const nvinfer1::PluginFieldCollection *getFieldNames() override;
|
||||
|
||||
nvinfer1::IPluginV2 *createPlugin(
|
||||
const char *name, const nvinfer1::PluginFieldCollection *fc) override;
|
||||
|
||||
nvinfer1::IPluginV2 *deserializePlugin(const char *name,
|
||||
const void *serialData,
|
||||
size_t serialLength) override;
|
||||
|
||||
void setPluginNamespace(const char *pluginNamespace) override;
|
||||
|
||||
const char *getPluginNamespace() const override;
|
||||
|
||||
private:
|
||||
nvinfer1::PluginFieldCollection mFC;
|
||||
std::vector<nvinfer1::PluginField> mPluginAttributes;
|
||||
std::string mNamespace;
|
||||
};
|
||||
|
||||
#endif // TRT_ROI_ALIGN_HPP
|
|
@ -0,0 +1,212 @@
|
|||
#include <stdio.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <cmath>
|
||||
|
||||
#include "common_cuda_helper.hpp"
|
||||
#include "trt_cuda_helper.cuh"
|
||||
#include "trt_multi_level_roi_align_kernel.hpp"
|
||||
#include "trt_plugin_helper.hpp"
|
||||
|
||||
const int kMAX_FEATMAP_SIZE = 10;
|
||||
struct FeatData {
|
||||
const void *data[kMAX_FEATMAP_SIZE];
|
||||
int batch_size;
|
||||
int channels;
|
||||
int h[kMAX_FEATMAP_SIZE];
|
||||
int w[kMAX_FEATMAP_SIZE];
|
||||
float spatial_scale[kMAX_FEATMAP_SIZE];
|
||||
int num_featmap;
|
||||
};
|
||||
|
||||
template <typename scalar_t>
|
||||
__device__ scalar_t bilinear_interpolate(const scalar_t *bottom_data,
|
||||
const int height, const int width,
|
||||
scalar_t y, scalar_t x) {
|
||||
// deal with cases that inverse elements are out of feature map boundary
|
||||
if (y < -1.0 || y > height || x < -1.0 || x > width) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (y <= 0) y = 0;
|
||||
if (x <= 0) x = 0;
|
||||
|
||||
int y_low = (int)y;
|
||||
int x_low = (int)x;
|
||||
int y_high;
|
||||
int x_high;
|
||||
|
||||
if (y_low >= height - 1) {
|
||||
y_high = y_low = height - 1;
|
||||
y = (scalar_t)y_low;
|
||||
} else {
|
||||
y_high = y_low + 1;
|
||||
}
|
||||
|
||||
if (x_low >= width - 1) {
|
||||
x_high = x_low = width - 1;
|
||||
x = (scalar_t)x_low;
|
||||
} else {
|
||||
x_high = x_low + 1;
|
||||
}
|
||||
|
||||
scalar_t ly = y - y_low;
|
||||
scalar_t lx = x - x_low;
|
||||
scalar_t hy = 1. - ly;
|
||||
scalar_t hx = 1. - lx;
|
||||
// do bilinear interpolation
|
||||
scalar_t lt = bottom_data[y_low * width + x_low];
|
||||
scalar_t rt = bottom_data[y_low * width + x_high];
|
||||
scalar_t lb = bottom_data[y_high * width + x_low];
|
||||
scalar_t rb = bottom_data[y_high * width + x_high];
|
||||
scalar_t w1 = hy * hx, w2 = hy * lx, w3 = ly * hx, w4 = ly * lx;
|
||||
|
||||
scalar_t val = (w1 * lt + w2 * rt + w3 * lb + w4 * rb);
|
||||
|
||||
return val;
|
||||
}
|
||||
|
||||
template <typename scalar_t>
|
||||
__device__ scalar_t roi_align_single(
|
||||
const scalar_t *bottom_data, const int roi_batch_ind,
|
||||
const scalar_t roi_start_w, const scalar_t roi_start_h,
|
||||
const scalar_t roi_end_w, const scalar_t roi_end_h,
|
||||
const scalar_t spatial_scale, const int pw, const int ph, const int c,
|
||||
const int sample_num, const int channels, const int height, const int width,
|
||||
const int pooled_height, const int pooled_width, const bool aligned) {
|
||||
// Force malformed ROIs to be 1x1
|
||||
scalar_t roi_width = fmaxf((scalar_t)roi_end_w - (scalar_t)roi_start_w, 0.);
|
||||
scalar_t roi_height = fmaxf((scalar_t)roi_end_h - (scalar_t)roi_start_h, 0.);
|
||||
if (!aligned) {
|
||||
roi_width = max(roi_width, (scalar_t)1.);
|
||||
roi_height = max(roi_height, (scalar_t)1.);
|
||||
}
|
||||
|
||||
const scalar_t bin_size_h = roi_height / pooled_height;
|
||||
const scalar_t bin_size_w = roi_width / pooled_width;
|
||||
|
||||
const scalar_t *offset_bottom_data =
|
||||
bottom_data + (roi_batch_ind * channels + c) * height * width;
|
||||
|
||||
int sample_num_h = (sample_num > 0)
|
||||
? sample_num
|
||||
: ceil(roi_height / pooled_height); // e.g., = 2
|
||||
int sample_num_w =
|
||||
(sample_num > 0) ? sample_num : ceil(roi_width / pooled_width);
|
||||
|
||||
scalar_t output_val = 0;
|
||||
#pragma unroll
|
||||
for (int iy = 0; iy < sample_num_h; iy++) {
|
||||
const scalar_t y =
|
||||
roi_start_h + ph * bin_size_h +
|
||||
(scalar_t)(iy + scalar_t(.5f)) * bin_size_h / (scalar_t)(sample_num_h);
|
||||
#pragma unroll
|
||||
for (int ix = 0; ix < sample_num_w; ix++) {
|
||||
const scalar_t x = roi_start_w + pw * bin_size_w +
|
||||
(scalar_t)(ix + scalar_t(.5f)) * bin_size_w /
|
||||
(scalar_t)(sample_num_w);
|
||||
scalar_t val = bilinear_interpolate<scalar_t>(offset_bottom_data, height,
|
||||
width, y, x);
|
||||
output_val += val;
|
||||
}
|
||||
}
|
||||
output_val /= max(sample_num_h * sample_num_w, 1);
|
||||
|
||||
return output_val;
|
||||
}
|
||||
|
||||
template <typename scalar_t>
|
||||
__global__ void roi_extractor_kernel(
|
||||
scalar_t *output, const scalar_t *bottom_rois, FeatData feat_data,
|
||||
const int sample_num, const float roi_scale_factor, const int finest_scale,
|
||||
const int pooled_height, const int pooled_width, const bool aligned,
|
||||
int nThreads) {
|
||||
CUDA_1D_KERNEL_LOOP(index, nThreads) {
|
||||
const int channels = feat_data.channels;
|
||||
const int pw = index % pooled_width;
|
||||
const int ph = (index / pooled_width) % pooled_height;
|
||||
const int c = (index / pooled_width / pooled_height) % channels;
|
||||
const int n = index / pooled_width / pooled_height / channels;
|
||||
|
||||
const scalar_t *offset_bottom_rois = bottom_rois + n * 5;
|
||||
|
||||
scalar_t roi_offset_x0 = offset_bottom_rois[1];
|
||||
scalar_t roi_offset_y0 = offset_bottom_rois[2];
|
||||
scalar_t roi_offset_x1 = offset_bottom_rois[3];
|
||||
scalar_t roi_offset_y1 = offset_bottom_rois[4];
|
||||
|
||||
const scalar_t scale = sqrtf((roi_offset_y1 - roi_offset_y0) *
|
||||
(roi_offset_x1 - roi_offset_x0));
|
||||
|
||||
const int target_lvls =
|
||||
fminf(feat_data.num_featmap - 1,
|
||||
fmaxf(0, floorf(log2f(scale / (scalar_t)(finest_scale) + 1e-6))));
|
||||
|
||||
if (roi_scale_factor > 0.) {
|
||||
const scalar_t roi_off_cx = (roi_offset_x0 + roi_offset_x1) * 0.5;
|
||||
const scalar_t roi_off_cy = (roi_offset_y0 + roi_offset_y1) * 0.5;
|
||||
const scalar_t roi_off_w =
|
||||
(roi_offset_x1 - roi_offset_x0 + 1) * roi_scale_factor;
|
||||
const scalar_t roi_off_h =
|
||||
(roi_offset_y1 - roi_offset_y0 + 1) * roi_scale_factor;
|
||||
|
||||
roi_offset_x0 = roi_off_cx - roi_off_w * 0.5 + 0.5;
|
||||
roi_offset_x1 = roi_off_cx + roi_off_w * 0.5 - 0.5;
|
||||
roi_offset_y0 = roi_off_cy - roi_off_h * 0.5 + 0.5;
|
||||
roi_offset_y1 = roi_off_cy + roi_off_h * 0.5 - 0.5;
|
||||
}
|
||||
|
||||
const scalar_t spatial_scale =
|
||||
(scalar_t)feat_data.spatial_scale[target_lvls];
|
||||
const int height = feat_data.h[target_lvls];
|
||||
const int width = feat_data.w[target_lvls];
|
||||
const scalar_t *bottom_data = (scalar_t *)feat_data.data[target_lvls];
|
||||
|
||||
const int roi_batch_ind = offset_bottom_rois[0];
|
||||
const scalar_t offset = aligned ? (scalar_t)0.5 : (scalar_t)0.0;
|
||||
const scalar_t roi_start_w = roi_offset_x0 * spatial_scale - offset;
|
||||
const scalar_t roi_start_h = roi_offset_y0 * spatial_scale - offset;
|
||||
const scalar_t roi_end_w = (roi_offset_x1)*spatial_scale - offset;
|
||||
const scalar_t roi_end_h = (roi_offset_y1)*spatial_scale - offset;
|
||||
|
||||
const scalar_t output_val = roi_align_single<scalar_t>(
|
||||
bottom_data, roi_batch_ind, roi_start_w, roi_start_h, roi_end_w,
|
||||
roi_end_h, spatial_scale, pw, ph, c, sample_num, channels, height,
|
||||
width, pooled_height, pooled_width, aligned);
|
||||
|
||||
output[index] = output_val;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void multi_level_roi_align(T *output, const T *rois, int num_rois,
|
||||
const void *const *feats, int num_feats, int n,
|
||||
int c, int *h, int *w, float *strides,
|
||||
int aligned_height, int aligned_width,
|
||||
int sample_num, float roi_scale_factor,
|
||||
int finest_scale, bool aligned,
|
||||
cudaStream_t stream) {
|
||||
FeatData feat_data;
|
||||
feat_data.batch_size = n;
|
||||
feat_data.channels = c;
|
||||
feat_data.num_featmap = num_feats;
|
||||
for (int i = 0; i < num_feats; ++i) {
|
||||
feat_data.data[i] = feats[i];
|
||||
feat_data.h[i] = h[i];
|
||||
feat_data.w[i] = w[i];
|
||||
feat_data.spatial_scale[i] = 1. / float(strides[i]);
|
||||
}
|
||||
int nThreads = num_rois * c * aligned_height * aligned_width;
|
||||
// bool aligned = true;
|
||||
roi_extractor_kernel<T>
|
||||
<<<GET_BLOCKS(nThreads), THREADS_PER_BLOCK, 0, stream>>>(
|
||||
output, rois, feat_data, sample_num, roi_scale_factor, finest_scale,
|
||||
aligned_height, aligned_width, aligned, nThreads);
|
||||
}
|
||||
|
||||
template void multi_level_roi_align<float>(
|
||||
float *output, const float *rois, int num_rois, const void *const *feats,
|
||||
int num_feats, int n, int c, int *h, int *w, float *strides,
|
||||
int aligned_height, int aligned_width, int sample_num,
|
||||
float roi_scale_factor, int finest_scale, bool aligned,
|
||||
cudaStream_t stream);
|
|
@ -0,0 +1,13 @@
|
|||
#ifndef TRT_MULTI_LEVEL_ROI_ALIGN_KERNEL_HPP
|
||||
#define TRT_MULTI_LEVEL_ROI_ALIGN_KERNEL_HPP
|
||||
#include <cuda_runtime.h>
|
||||
|
||||
template <typename T>
|
||||
void multi_level_roi_align(T *output, const T *rois, int num_rois,
|
||||
const void *const *feats, int num_feats, int n,
|
||||
int c, int *h, int *w, float *strides,
|
||||
int aligned_height, int aligned_width,
|
||||
int sample_num, float roi_scale_factor,
|
||||
int finest_scale, bool aligned, cudaStream_t stream);
|
||||
|
||||
#endif // TRT_MULTI_LEVEL_ROI_ALIGN_KERNEL_HPP
|
|
@ -24,10 +24,6 @@ static const char *PLUGIN_VERSION{"1"};
|
|||
static const char *PLUGIN_NAME{"NonMaxSuppression"};
|
||||
} // namespace
|
||||
|
||||
nvinfer1::PluginFieldCollection NonMaxSuppressionDynamicCreator::mFC{};
|
||||
std::vector<nvinfer1::PluginField>
|
||||
NonMaxSuppressionDynamicCreator::mPluginAttributes;
|
||||
|
||||
NonMaxSuppressionDynamic::NonMaxSuppressionDynamic(
|
||||
const std::string &name, int centerPointBox, int maxOutputBoxesPerClass,
|
||||
float iouThreshold, float scoreThreshold, int offset)
|
||||
|
@ -168,8 +164,10 @@ int NonMaxSuppressionDynamic::initialize() { return 0; }
|
|||
void NonMaxSuppressionDynamic::terminate() {}
|
||||
|
||||
size_t NonMaxSuppressionDynamic::getSerializationSize() const {
|
||||
return sizeof(mCenterPointBox) + sizeof(mMaxOutputBoxesPerClass) +
|
||||
sizeof(mIouThreshold) + sizeof(mScoreThreshold) + sizeof(mOffset);
|
||||
return serialized_size(mCenterPointBox) +
|
||||
serialized_size(mMaxOutputBoxesPerClass) +
|
||||
serialized_size(mIouThreshold) + serialized_size(mScoreThreshold) +
|
||||
serialized_size(mOffset);
|
||||
}
|
||||
|
||||
void NonMaxSuppressionDynamic::serialize(void *buffer) const {
|
||||
|
@ -276,3 +274,5 @@ void NonMaxSuppressionDynamicCreator::setPluginNamespace(
|
|||
const char *NonMaxSuppressionDynamicCreator::getPluginNamespace() const {
|
||||
return mNamespace.c_str();
|
||||
}
|
||||
|
||||
REGISTER_TENSORRT_PLUGIN(NonMaxSuppressionDynamicCreator);
|
||||
|
|
|
@ -100,8 +100,8 @@ class NonMaxSuppressionDynamicCreator : public nvinfer1::IPluginCreator {
|
|||
const char *getPluginNamespace() const override;
|
||||
|
||||
private:
|
||||
static nvinfer1::PluginFieldCollection mFC;
|
||||
static std::vector<nvinfer1::PluginField> mPluginAttributes;
|
||||
nvinfer1::PluginFieldCollection mFC;
|
||||
std::vector<nvinfer1::PluginField> mPluginAttributes;
|
||||
std::string mNamespace;
|
||||
};
|
||||
#endif // TRT_NMS_HPP
|
||||
|
|
|
@ -9,8 +9,8 @@
|
|||
#include <vector>
|
||||
|
||||
#include "common_cuda_helper.hpp"
|
||||
#include "nms_cuda_kernel.cuh"
|
||||
#include "trt_cuda_helper.cuh"
|
||||
#include "trt_nms_kernel.cuh"
|
||||
#include "trt_plugin_helper.hpp"
|
||||
|
||||
struct NMSBox {
|
||||
|
@ -72,8 +72,8 @@ __global__ void mask_to_output_kernel(const unsigned long long* dev_mask,
|
|||
int start = *output_count;
|
||||
int out_per_class_count = 0;
|
||||
for (int i = 0; i < spatial_dimension; i++) {
|
||||
const int nblock = i / threadsPerBlock;
|
||||
const int inblock = i % threadsPerBlock;
|
||||
const int nblock = i / THREADS_PER_BLOCK;
|
||||
const int inblock = i % THREADS_PER_BLOCK;
|
||||
if (!(remv[nblock] & (1ULL << inblock))) {
|
||||
if (threadIdx.x == 0) {
|
||||
output[start * 3 + 0] = batch_id;
|
||||
|
@ -113,7 +113,7 @@ size_t get_onnxnms_workspace_size(size_t num_batches, size_t spatial_dimension,
|
|||
size_t scores_workspace = getAlignedSize(spatial_dimension * boxes_word_size);
|
||||
size_t boxes_workspace =
|
||||
getAlignedSize(spatial_dimension * 4 * boxes_word_size);
|
||||
const int col_blocks = DIVUP(spatial_dimension, threadsPerBlock);
|
||||
const int col_blocks = DIVUP(spatial_dimension, THREADS_PER_BLOCK);
|
||||
size_t mask_workspace = getAlignedSize(spatial_dimension * col_blocks *
|
||||
sizeof(unsigned long long));
|
||||
size_t index_template_workspace =
|
||||
|
@ -162,7 +162,7 @@ void TRTNMSCUDAKernelLauncher_float(const float* boxes, const float* scores,
|
|||
size_t output_length, void* workspace,
|
||||
cudaStream_t stream) {
|
||||
using mmlab::getAlignedSize;
|
||||
const int col_blocks = DIVUP(spatial_dimension, threadsPerBlock);
|
||||
const int col_blocks = DIVUP(spatial_dimension, THREADS_PER_BLOCK);
|
||||
float* boxes_sorted = (float*)workspace;
|
||||
workspace = static_cast<char*>(workspace) +
|
||||
getAlignedSize(spatial_dimension * 4 * sizeof(float));
|
||||
|
@ -214,7 +214,7 @@ void TRTNMSCUDAKernelLauncher_float(const float* boxes, const float* scores,
|
|||
cudaCheckError();
|
||||
|
||||
dim3 blocks(col_blocks, col_blocks);
|
||||
dim3 threads(threadsPerBlock);
|
||||
dim3 threads(THREADS_PER_BLOCK);
|
||||
|
||||
for (int batch_id = 0; batch_id < num_batches; ++batch_id) {
|
||||
for (int cls_id = 0; cls_id < num_classes; ++cls_id) {
|
||||
|
@ -261,7 +261,7 @@ void TRTNMSCUDAKernelLauncher_float(const float* boxes, const float* scores,
|
|||
offset, boxes_sorted, dev_mask);
|
||||
|
||||
// will be performed when dev_mask is full.
|
||||
mask_to_output_kernel<<<1, threadsPerBlock,
|
||||
mask_to_output_kernel<<<1, THREADS_PER_BLOCK,
|
||||
col_blocks * sizeof(unsigned long long),
|
||||
stream>>>(
|
||||
dev_mask, index_cache, output, output_count, batch_id, cls_id,
|
||||
|
|
|
@ -17,10 +17,6 @@ static const char *PLUGIN_VERSION{"1"};
|
|||
static const char *PLUGIN_NAME{"MMCVRoiAlign"};
|
||||
} // namespace
|
||||
|
||||
nvinfer1::PluginFieldCollection RoIAlignPluginDynamicCreator::mFC{};
|
||||
std::vector<nvinfer1::PluginField>
|
||||
RoIAlignPluginDynamicCreator::mPluginAttributes;
|
||||
|
||||
RoIAlignPluginDynamic::RoIAlignPluginDynamic(const std::string &name,
|
||||
int outWidth, int outHeight,
|
||||
float spatialScale,
|
||||
|
@ -162,8 +158,9 @@ int RoIAlignPluginDynamic::initialize() { return 0; }
|
|||
void RoIAlignPluginDynamic::terminate() {}
|
||||
|
||||
size_t RoIAlignPluginDynamic::getSerializationSize() const {
|
||||
return sizeof(mOutWidth) + sizeof(mOutHeight) + sizeof(mSpatialScale) +
|
||||
sizeof(mSampleRatio) + sizeof(mPoolMode) + sizeof(mAligned);
|
||||
return serialized_size(mOutWidth) + serialized_size(mOutHeight) +
|
||||
serialized_size(mSpatialScale) + serialized_size(mSampleRatio) +
|
||||
serialized_size(mPoolMode) + serialized_size(mAligned);
|
||||
}
|
||||
|
||||
void RoIAlignPluginDynamic::serialize(void *buffer) const {
|
||||
|
@ -291,3 +288,5 @@ void RoIAlignPluginDynamicCreator::setPluginNamespace(
|
|||
const char *RoIAlignPluginDynamicCreator::getPluginNamespace() const {
|
||||
return mNamespace.c_str();
|
||||
}
|
||||
|
||||
REGISTER_TENSORRT_PLUGIN(RoIAlignPluginDynamicCreator);
|
||||
|
|
|
@ -101,8 +101,8 @@ class RoIAlignPluginDynamicCreator : public nvinfer1::IPluginCreator {
|
|||
const char *getPluginNamespace() const override;
|
||||
|
||||
private:
|
||||
static nvinfer1::PluginFieldCollection mFC;
|
||||
static std::vector<nvinfer1::PluginField> mPluginAttributes;
|
||||
nvinfer1::PluginFieldCollection mFC;
|
||||
std::vector<nvinfer1::PluginField> mPluginAttributes;
|
||||
std::string mNamespace;
|
||||
};
|
||||
#endif // TRT_ROI_ALIGN_HPP
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
#include "common_cuda_helper.hpp"
|
||||
#include "roi_align_cuda_kernel.cuh"
|
||||
#include "trt_roi_align_kernel.cuh"
|
||||
|
||||
template <typename scalar_t>
|
||||
void TRTRoIAlignForwardCUDAKernelLauncher(
|
||||
|
|
|
@ -5,27 +5,14 @@
|
|||
|
||||
#include <chrono>
|
||||
|
||||
#include "trt_scatternd_kernel.hpp"
|
||||
#include "trt_serialize.hpp"
|
||||
|
||||
extern void TRTONNXScatterNDKernelLauncher_float(
|
||||
const float *data, const int *indices, const float *update, const int *dims,
|
||||
int nbDims, const int *indices_dims, int indice_nbDims, float *output,
|
||||
cudaStream_t stream);
|
||||
|
||||
extern void TRTONNXScatterNDKernelLauncher_int32(
|
||||
const int *data, const int *indices, const int *update, const int *dims,
|
||||
int nbDims, const int *indices_dims, int indice_nbDims, int *output,
|
||||
cudaStream_t stream);
|
||||
|
||||
namespace {
|
||||
static const char *PLUGIN_VERSION{"1"};
|
||||
static const char *PLUGIN_NAME{"ScatterND"};
|
||||
} // namespace
|
||||
|
||||
nvinfer1::PluginFieldCollection ONNXScatterNDDynamicCreator::mFC{};
|
||||
std::vector<nvinfer1::PluginField>
|
||||
ONNXScatterNDDynamicCreator::mPluginAttributes;
|
||||
|
||||
ONNXScatterNDDynamic::ONNXScatterNDDynamic(const std::string &name)
|
||||
: mLayerName(name) {}
|
||||
|
||||
|
@ -110,13 +97,13 @@ int ONNXScatterNDDynamic::enqueue(const nvinfer1::PluginTensorDesc *inputDesc,
|
|||
|
||||
switch (data_type) {
|
||||
case nvinfer1::DataType::kFLOAT:
|
||||
TRTONNXScatterNDKernelLauncher_float(
|
||||
TRTONNXScatterNDKernelLauncher<float>(
|
||||
(float *)data, (int *)indices, (float *)update, dims, nbDims,
|
||||
indices_dims, indice_nbDims, (float *)output, stream);
|
||||
break;
|
||||
|
||||
case nvinfer1::DataType::kINT32:
|
||||
TRTONNXScatterNDKernelLauncher_int32(
|
||||
TRTONNXScatterNDKernelLauncher<int>(
|
||||
(int *)data, (int *)indices, (int *)update, dims, nbDims,
|
||||
indices_dims, indice_nbDims, (int *)output, stream);
|
||||
break;
|
||||
|
@ -204,3 +191,5 @@ void ONNXScatterNDDynamicCreator::setPluginNamespace(const char *libNamespace) {
|
|||
const char *ONNXScatterNDDynamicCreator::getPluginNamespace() const {
|
||||
return mNamespace.c_str();
|
||||
}
|
||||
|
||||
REGISTER_TENSORRT_PLUGIN(ONNXScatterNDDynamicCreator);
|
||||
|
|
|
@ -91,8 +91,8 @@ class ONNXScatterNDDynamicCreator : public nvinfer1::IPluginCreator {
|
|||
const char *getPluginNamespace() const override;
|
||||
|
||||
private:
|
||||
static nvinfer1::PluginFieldCollection mFC;
|
||||
static std::vector<nvinfer1::PluginField> mPluginAttributes;
|
||||
nvinfer1::PluginFieldCollection mFC;
|
||||
std::vector<nvinfer1::PluginField> mPluginAttributes;
|
||||
std::string mNamespace;
|
||||
};
|
||||
#endif // TRT_SCATTERND_HPP
|
||||
|
|
|
@ -6,8 +6,6 @@
|
|||
#include "trt_cuda_helper.cuh"
|
||||
#include "trt_plugin_helper.hpp"
|
||||
|
||||
static int const threadsPerBlock = sizeof(unsigned long long int) * 8;
|
||||
|
||||
using mmlab::TensorDesc;
|
||||
|
||||
template <typename T>
|
||||
|
@ -66,27 +64,17 @@ void TRTONNXScatterNDKernelLauncher(const T* data, const int* indices,
|
|||
num_update_indice *= indice_desc.shape[i];
|
||||
}
|
||||
// scatter
|
||||
const int col_block = DIVUP(num_update_indice, threadsPerBlock);
|
||||
onnx_scatternd_kernel<<<col_block, threadsPerBlock, 0, stream>>>(
|
||||
const int col_block = DIVUP(num_update_indice, THREADS_PER_BLOCK);
|
||||
onnx_scatternd_kernel<<<col_block, THREADS_PER_BLOCK, 0, stream>>>(
|
||||
num_update_indice, indices, update, output, tensor_desc, indice_desc);
|
||||
}
|
||||
|
||||
void TRTONNXScatterNDKernelLauncher_float(const float* data, const int* indices,
|
||||
const float* update, const int* dims,
|
||||
int nbDims, const int* indices_dims,
|
||||
int indice_nbDims, float* output,
|
||||
cudaStream_t stream) {
|
||||
TRTONNXScatterNDKernelLauncher<float>(data, indices, update, dims, nbDims,
|
||||
indices_dims, indice_nbDims, output,
|
||||
stream);
|
||||
}
|
||||
template void TRTONNXScatterNDKernelLauncher<float>(
|
||||
const float* data, const int* indices, const float* update, const int* dims,
|
||||
int nbDims, const int* indices_dims, int indice_nbDims, float* output,
|
||||
cudaStream_t stream);
|
||||
|
||||
void TRTONNXScatterNDKernelLauncher_int32(const int* data, const int* indices,
|
||||
const int* update, const int* dims,
|
||||
int nbDims, const int* indices_dims,
|
||||
int indice_nbDims, int* output,
|
||||
cudaStream_t stream) {
|
||||
TRTONNXScatterNDKernelLauncher<int>(data, indices, update, dims, nbDims,
|
||||
indices_dims, indice_nbDims, output,
|
||||
stream);
|
||||
}
|
||||
template void TRTONNXScatterNDKernelLauncher<int>(
|
||||
const int* data, const int* indices, const int* update, const int* dims,
|
||||
int nbDims, const int* indices_dims, int indice_nbDims, int* output,
|
||||
cudaStream_t stream);
|
||||
|
|
|
@ -0,0 +1,12 @@
|
|||
#ifndef TRT_SCATTERND_KERNEL_HPP
|
||||
#define TRT_SCATTERND_KERNEL_HPP
|
||||
#include <cuda_runtime.h>
|
||||
|
||||
template <typename T>
|
||||
void TRTONNXScatterNDKernelLauncher(const T* data, const int* indices,
|
||||
const T* update, const int* dims,
|
||||
int nbDims, const int* indices_dims,
|
||||
int indice_nbDims, T* output,
|
||||
cudaStream_t stream);
|
||||
|
||||
#endif // TRT_SCATTERND_KERNEL_HPP
|
|
@ -1,13 +0,0 @@
|
|||
#include "batched_nms/trt_batched_nms.hpp"
|
||||
#include "nms/trt_nms.hpp"
|
||||
#include "roi_align/trt_roi_align.hpp"
|
||||
#include "scatternd/trt_scatternd.hpp"
|
||||
|
||||
REGISTER_TENSORRT_PLUGIN(TRTBatchedNMSPluginDynamicCreator);
|
||||
REGISTER_TENSORRT_PLUGIN(NonMaxSuppressionDynamicCreator);
|
||||
REGISTER_TENSORRT_PLUGIN(ONNXScatterNDDynamicCreator);
|
||||
REGISTER_TENSORRT_PLUGIN(RoIAlignPluginDynamicCreator);
|
||||
|
||||
extern "C" {
|
||||
bool initLibMMCVInferPlugins() { return true; }
|
||||
} // extern "C"
|
|
@ -0,0 +1 @@
|
|||
from .roi_extractors import * # noqa: F401, F403
|
|
@ -0,0 +1 @@
|
|||
from .base_roi_extractor import * # noqa: F401, F403
|
|
@ -0,0 +1,72 @@
|
|||
from torch.autograd import Function
|
||||
|
||||
from mmdeploy.utils import FUNCTION_REWRITERS
|
||||
|
||||
|
||||
class MultiLevelRoiAlign(Function):
|
||||
|
||||
def __init__(self) -> None:
|
||||
super().__init__()
|
||||
|
||||
@staticmethod
|
||||
def symbolic(g, *args):
|
||||
aligned = args[-1]
|
||||
featmap_strides = args[-2]
|
||||
finest_scale = args[-3]
|
||||
roi_scale_factor = args[-4]
|
||||
sampling_ratio = args[-5]
|
||||
output_size = args[-6]
|
||||
inputs = args[:len(featmap_strides)]
|
||||
rois = args[len(featmap_strides)]
|
||||
return g.op(
|
||||
'mmlab::MMCVMultiLevelRoiAlign',
|
||||
rois,
|
||||
*inputs,
|
||||
output_height_i=output_size[1],
|
||||
output_width_i=output_size[0],
|
||||
sampling_ratio_i=sampling_ratio,
|
||||
roi_scale_factor_f=roi_scale_factor,
|
||||
finest_scale_i=finest_scale,
|
||||
featmap_strides_f=featmap_strides,
|
||||
aligned_i=aligned)
|
||||
|
||||
@staticmethod
|
||||
def forward(g, *args):
|
||||
# aligned = args[-1]
|
||||
featmap_strides = args[-2]
|
||||
# finest_scale = args[-3]
|
||||
# roi_scale_factor = args[-4]
|
||||
# sampling_ratio = args[-5]
|
||||
output_size = args[-6]
|
||||
inputs = args[:len(featmap_strides)]
|
||||
rois = args[len(featmap_strides)]
|
||||
|
||||
num_proposals = rois.shape[0]
|
||||
channel = inputs[0].shape[1]
|
||||
|
||||
return rois.new_zeros(
|
||||
(num_proposals, channel, output_size[1], output_size[0]))
|
||||
|
||||
|
||||
@FUNCTION_REWRITERS.register_rewriter(
|
||||
func_name='mmdet.models.roi_heads.SingleRoIExtractor.forward',
|
||||
backend='tensorrt')
|
||||
def SingleRoIExtractor_forward_static(rewriter,
|
||||
self,
|
||||
feats,
|
||||
rois,
|
||||
roi_scale_factor=None):
|
||||
featmap_strides = self.featmap_strides
|
||||
finest_scale = self.finest_scale
|
||||
|
||||
roi_layer = self.roi_layers[0]
|
||||
out_size = roi_layer.output_size
|
||||
sampling_ratio = roi_layer.sampling_ratio
|
||||
aligned = roi_layer.aligned
|
||||
if roi_scale_factor is None:
|
||||
roi_scale_factor = 1.0
|
||||
|
||||
featmap_strides = [float(s) for s in featmap_strides]
|
||||
return MultiLevelRoiAlign.apply(*feats, rois, out_size, sampling_ratio,
|
||||
roi_scale_factor, finest_scale,
|
||||
featmap_strides, aligned)
|
Loading…
Reference in New Issue