[Feature] Multi level roi align (#9)

* add multi level roi align

* better trt plugin code

* fix lint

* optimizer trt_multi_level_roi_align.cpp

* fix lint
pull/12/head
q.yao 2021-07-09 13:56:50 +08:00 committed by GitHub
parent dae6a8ccf9
commit 8408ae2bcf
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
23 changed files with 733 additions and 85 deletions

View File

@ -51,7 +51,8 @@ INCLUDE_DIRECTORIES(${CUB_ROOT_DIR})
set(PLUGIN_LISTS scatternd
nms
roi_align
batched_nms)
batched_nms
multi_level_roi_align)
foreach(PLUGIN_ITER ${PLUGIN_LISTS})
file(GLOB PLUGIN_OPS_SRCS ${PLUGIN_ITER}/*.cpp ${PLUGIN_ITER}/*.cu)
@ -59,11 +60,10 @@ foreach(PLUGIN_ITER ${PLUGIN_LISTS})
set(BACKEND_OPS_SRCS ${BACKEND_OPS_SRCS} ${PLUGIN_OPS_SRCS} ${PLUGIN_OPS_HEADS})
endforeach(PLUGIN_ITER)
list(APPEND BACKEND_OPS_SRCS "${CMAKE_CURRENT_SOURCE_DIR}/trt_plugin.cpp")
list(APPEND BACKEND_OPS_SRCS "${CMAKE_CURRENT_SOURCE_DIR}/common_impl/trt_cuda_helper.cu")
set(INFER_PLUGIN_LIB ${TENSORRT_LIBRARY})
cuda_add_library(${SHARED_TARGET} SHARED ${BACKEND_OPS_SRCS})
cuda_add_library(${SHARED_TARGET} MODULE ${BACKEND_OPS_SRCS})
target_link_libraries(${SHARED_TARGET} ${INFER_PLUGIN_LIB})
target_include_directories(${SHARED_TARGET} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/common)

View File

@ -16,9 +16,6 @@ static const char* NMS_PLUGIN_VERSION{"1"};
static const char* NMS_PLUGIN_NAME{"TRTBatchedNMS"};
} // namespace
PluginFieldCollection TRTBatchedNMSPluginDynamicCreator::mFC{};
std::vector<PluginField> TRTBatchedNMSPluginDynamicCreator::mPluginAttributes;
TRTBatchedNMSPluginDynamic::TRTBatchedNMSPluginDynamic(NMSParameters params)
: param(params) {}
@ -268,3 +265,5 @@ void TRTBatchedNMSPluginDynamicCreator::setPluginNamespace(
const char* TRTBatchedNMSPluginDynamicCreator::getPluginNamespace() const {
return mNamespace.c_str();
}
REGISTER_TENSORRT_PLUGIN(TRTBatchedNMSPluginDynamicCreator);

View File

@ -109,9 +109,9 @@ class TRTBatchedNMSPluginDynamicCreator : public nvinfer1::IPluginCreator {
const char* getPluginNamespace() const override;
private:
static nvinfer1::PluginFieldCollection mFC;
nvinfer1::PluginFieldCollection mFC;
nvinfer1::plugin::NMSParameters params;
static std::vector<nvinfer1::PluginField> mPluginAttributes;
std::vector<nvinfer1::PluginField> mPluginAttributes;
std::string mNamespace;
};

View File

@ -0,0 +1,260 @@
#include "trt_multi_level_roi_align.hpp"
#include <assert.h>
#include <chrono>
#include "trt_multi_level_roi_align_kernel.hpp"
#include "trt_serialize.hpp"
namespace {
static const char *PLUGIN_VERSION{"1"};
static const char *PLUGIN_NAME{"MMCVMultiLevelRoiAlign"};
} // namespace
MultiLevelRoiAlignPluginDynamic::MultiLevelRoiAlignPluginDynamic(
const std::string &name, int alignedHeight, int alignedWidth, int sampleNum,
const std::vector<float> &featmapStrides, float roiScaleFactor,
int finestScale, bool aligned)
: mLayerName(name),
mAlignedHeight(alignedHeight),
mAlignedWidth(alignedWidth),
mSampleNum(sampleNum),
mFeatmapStrides(featmapStrides),
mRoiScaleFactor(roiScaleFactor),
mFinestScale(finestScale),
mAligned(aligned) {}
MultiLevelRoiAlignPluginDynamic::MultiLevelRoiAlignPluginDynamic(
const std::string name, const void *data, size_t length)
: mLayerName(name) {
deserialize_value(&data, &length, &mAlignedHeight);
deserialize_value(&data, &length, &mAlignedWidth);
deserialize_value(&data, &length, &mSampleNum);
deserialize_value(&data, &length, &mRoiScaleFactor);
deserialize_value(&data, &length, &mFinestScale);
deserialize_value(&data, &length, &mAligned);
deserialize_value(&data, &length, &mFeatmapStrides);
}
nvinfer1::IPluginV2DynamicExt *MultiLevelRoiAlignPluginDynamic::clone() const {
MultiLevelRoiAlignPluginDynamic *plugin = new MultiLevelRoiAlignPluginDynamic(
mLayerName, mAlignedHeight, mAlignedWidth, mSampleNum, mFeatmapStrides,
mRoiScaleFactor, mFinestScale, mAligned);
plugin->setPluginNamespace(getPluginNamespace());
return plugin;
}
nvinfer1::DimsExprs MultiLevelRoiAlignPluginDynamic::getOutputDimensions(
int outputIndex, const nvinfer1::DimsExprs *inputs, int nbInputs,
nvinfer1::IExprBuilder &exprBuilder) {
assert(nbInputs == mFeatmapStrides.size() + 1);
nvinfer1::DimsExprs ret;
ret.nbDims = 4;
ret.d[0] = inputs[0].d[0];
ret.d[1] = inputs[1].d[1];
ret.d[2] = exprBuilder.constant(mAlignedHeight);
ret.d[3] = exprBuilder.constant(mAlignedWidth);
return ret;
}
bool MultiLevelRoiAlignPluginDynamic::supportsFormatCombination(
int pos, const nvinfer1::PluginTensorDesc *inOut, int nbInputs,
int nbOutputs) {
const auto *in = inOut;
const auto *out = inOut + nbInputs;
return inOut[pos].type == nvinfer1::DataType::kFLOAT &&
inOut[pos].format == nvinfer1::TensorFormat::kLINEAR;
}
void MultiLevelRoiAlignPluginDynamic::configurePlugin(
const nvinfer1::DynamicPluginTensorDesc *inputs, int nbInputs,
const nvinfer1::DynamicPluginTensorDesc *outputs, int nbOutputs) {
// Validate input arguments
assert(nbOutputs == 1);
assert(nbInputs == mFeatmapStrides.size() + 1);
}
size_t MultiLevelRoiAlignPluginDynamic::getWorkspaceSize(
const nvinfer1::PluginTensorDesc *inputs, int nbInputs,
const nvinfer1::PluginTensorDesc *outputs, int nbOutputs) const {
return 0;
}
int MultiLevelRoiAlignPluginDynamic::enqueue(
const nvinfer1::PluginTensorDesc *inputDesc,
const nvinfer1::PluginTensorDesc *outputDesc, const void *const *inputs,
void *const *outputs, void *workSpace, cudaStream_t stream) {
int num_rois = inputDesc[0].dims.d[0];
int batch_size = inputDesc[1].dims.d[0];
int channels = inputDesc[1].dims.d[1];
const int kMaxFeatMap = 10;
int heights[kMaxFeatMap];
int widths[kMaxFeatMap];
float strides[kMaxFeatMap];
int num_feats = mFeatmapStrides.size();
for (int i = 0; i < num_feats; ++i) {
heights[i] = inputDesc[i + 1].dims.d[2];
widths[i] = inputDesc[i + 1].dims.d[3];
strides[i] = mFeatmapStrides[i];
}
const void *rois = inputs[0];
const void *const *feats = inputs + 1;
multi_level_roi_align<float>((float *)outputs[0], (const float *)rois,
num_rois, feats, num_feats, batch_size, channels,
&heights[0], &widths[0], &strides[0],
mAlignedHeight, mAlignedWidth, mSampleNum,
mRoiScaleFactor, mFinestScale, mAligned, stream);
return 0;
}
nvinfer1::DataType MultiLevelRoiAlignPluginDynamic::getOutputDataType(
int index, const nvinfer1::DataType *inputTypes, int nbInputs) const {
return nvinfer1::DataType::kFLOAT;
}
// IPluginV2 Methods
const char *MultiLevelRoiAlignPluginDynamic::getPluginType() const {
return PLUGIN_NAME;
}
const char *MultiLevelRoiAlignPluginDynamic::getPluginVersion() const {
return PLUGIN_VERSION;
}
int MultiLevelRoiAlignPluginDynamic::getNbOutputs() const { return 1; }
int MultiLevelRoiAlignPluginDynamic::initialize() { return 0; }
void MultiLevelRoiAlignPluginDynamic::terminate() {}
size_t MultiLevelRoiAlignPluginDynamic::getSerializationSize() const {
return serialized_size(mFeatmapStrides) + serialized_size(mAlignedHeight) +
serialized_size(mAlignedWidth) + serialized_size(mSampleNum) +
serialized_size(mRoiScaleFactor) + serialized_size(mFinestScale) +
serialized_size(mAligned);
}
void MultiLevelRoiAlignPluginDynamic::serialize(void *buffer) const {
serialize_value(&buffer, mAlignedHeight);
serialize_value(&buffer, mAlignedWidth);
serialize_value(&buffer, mSampleNum);
serialize_value(&buffer, mRoiScaleFactor);
serialize_value(&buffer, mFinestScale);
serialize_value(&buffer, mAligned);
serialize_value(&buffer, mFeatmapStrides);
}
void MultiLevelRoiAlignPluginDynamic::destroy() {
// This gets called when the network containing plugin is destroyed
delete this;
}
void MultiLevelRoiAlignPluginDynamic::setPluginNamespace(
const char *libNamespace) {
mNamespace = libNamespace;
}
const char *MultiLevelRoiAlignPluginDynamic::getPluginNamespace() const {
return mNamespace.c_str();
}
MultiLevelRoiAlignPluginDynamicCreator::
MultiLevelRoiAlignPluginDynamicCreator() {
mPluginAttributes = std::vector<nvinfer1::PluginField>(
{nvinfer1::PluginField("output_height"),
nvinfer1::PluginField("output_width"),
nvinfer1::PluginField("sampling_ratio"),
nvinfer1::PluginField("featmap_strides"),
nvinfer1::PluginField("roi_scale_factor"),
nvinfer1::PluginField("finest_scale"),
nvinfer1::PluginField("aligned")});
mFC.nbFields = mPluginAttributes.size();
mFC.fields = mPluginAttributes.data();
}
const char *MultiLevelRoiAlignPluginDynamicCreator::getPluginName() const {
return PLUGIN_NAME;
}
const char *MultiLevelRoiAlignPluginDynamicCreator::getPluginVersion() const {
return PLUGIN_VERSION;
}
const nvinfer1::PluginFieldCollection *
MultiLevelRoiAlignPluginDynamicCreator::getFieldNames() {
return &mFC;
}
nvinfer1::IPluginV2 *MultiLevelRoiAlignPluginDynamicCreator::createPlugin(
const char *name, const nvinfer1::PluginFieldCollection *fc) {
int alignedHeight = 7;
int alignedWidth = 7;
int sampleNum = 2;
std::vector<float> featmapStrides;
float roiScaleFactor = -1;
int finestScale = 56;
bool aligned = false;
for (int i = 0; i < fc->nbFields; i++) {
if (fc->fields[i].data == nullptr) {
continue;
}
std::string field_name(fc->fields[i].name);
if (field_name.compare("output_height") == 0) {
alignedHeight = static_cast<const int *>(fc->fields[i].data)[0];
} else if (field_name.compare("output_width") == 0) {
alignedWidth = static_cast<const int *>(fc->fields[i].data)[0];
} else if (field_name.compare("sampling_ratio") == 0) {
sampleNum = static_cast<const int *>(fc->fields[i].data)[0];
} else if (field_name.compare("roi_scale_factor") == 0) {
roiScaleFactor = static_cast<const float *>(fc->fields[i].data)[0];
} else if (field_name.compare("finest_scale") == 0) {
finestScale = static_cast<const int *>(fc->fields[i].data)[0];
} else if (field_name.compare("featmap_strides") == 0) {
int data_size = (fc->fields[i].length) / sizeof(float);
const float *data_start = static_cast<const float *>(fc->fields[i].data);
featmapStrides = std::vector<float>(data_start, data_start + data_size);
} else if (field_name.compare("aligned") == 0) {
int aligned_int = static_cast<const int *>(fc->fields[i].data)[0];
aligned = aligned_int != 0;
}
}
assert(featmapStrides.size() != 0);
MultiLevelRoiAlignPluginDynamic *plugin = new MultiLevelRoiAlignPluginDynamic(
name, alignedHeight, alignedWidth, sampleNum, featmapStrides,
roiScaleFactor, finestScale, aligned);
plugin->setPluginNamespace(getPluginNamespace());
return plugin;
}
nvinfer1::IPluginV2 *MultiLevelRoiAlignPluginDynamicCreator::deserializePlugin(
const char *name, const void *serialData, size_t serialLength) {
auto plugin =
new MultiLevelRoiAlignPluginDynamic(name, serialData, serialLength);
plugin->setPluginNamespace(getPluginNamespace());
return plugin;
}
void MultiLevelRoiAlignPluginDynamicCreator::setPluginNamespace(
const char *libNamespace) {
mNamespace = libNamespace;
}
const char *MultiLevelRoiAlignPluginDynamicCreator::getPluginNamespace() const {
return mNamespace.c_str();
}
REGISTER_TENSORRT_PLUGIN(MultiLevelRoiAlignPluginDynamicCreator);

View File

@ -0,0 +1,115 @@
#ifndef TRT_MULTI_LEVEL_ROI_ALIGN_HPP
#define TRT_MULTI_LEVEL_ROI_ALIGN_HPP
#include <cublas_v2.h>
#include <memory>
#include <string>
#include <vector>
#include "trt_plugin_helper.hpp"
class MultiLevelRoiAlignPluginDynamic : public nvinfer1::IPluginV2DynamicExt {
public:
MultiLevelRoiAlignPluginDynamic(const std::string &name, int alignedHeight,
int alignedWidth, int sampleNum,
const std::vector<float> &featmapStrides,
float roiScaleFactor = -1,
int finestScale = 56, bool aligned = false);
MultiLevelRoiAlignPluginDynamic(const std::string name, const void *data,
size_t length);
// It doesn't make sense to make MultiLevelRoiAlignPluginDynamic without
// arguments, so we delete default constructor.
MultiLevelRoiAlignPluginDynamic() = delete;
// IPluginV2DynamicExt Methods
nvinfer1::IPluginV2DynamicExt *clone() const override;
nvinfer1::DimsExprs getOutputDimensions(
int outputIndex, const nvinfer1::DimsExprs *inputs, int nbInputs,
nvinfer1::IExprBuilder &exprBuilder) override;
bool supportsFormatCombination(int pos,
const nvinfer1::PluginTensorDesc *inOut,
int nbInputs, int nbOutputs) override;
void configurePlugin(const nvinfer1::DynamicPluginTensorDesc *in,
int nbInputs,
const nvinfer1::DynamicPluginTensorDesc *out,
int nbOutputs) override;
size_t getWorkspaceSize(const nvinfer1::PluginTensorDesc *inputs,
int nbInputs,
const nvinfer1::PluginTensorDesc *outputs,
int nbOutputs) const override;
int enqueue(const nvinfer1::PluginTensorDesc *inputDesc,
const nvinfer1::PluginTensorDesc *outputDesc,
const void *const *inputs, void *const *outputs, void *workspace,
cudaStream_t stream) override;
// IPluginV2Ext Methods
nvinfer1::DataType getOutputDataType(int index,
const nvinfer1::DataType *inputTypes,
int nbInputs) const override;
// IPluginV2 Methods
const char *getPluginType() const override;
const char *getPluginVersion() const override;
int getNbOutputs() const override;
int initialize() override;
void terminate() override;
size_t getSerializationSize() const override;
void serialize(void *buffer) const override;
void destroy() override;
void setPluginNamespace(const char *pluginNamespace) override;
const char *getPluginNamespace() const override;
private:
const std::string mLayerName;
std::string mNamespace;
int mAlignedHeight;
int mAlignedWidth;
int mSampleNum;
std::vector<float> mFeatmapStrides;
float mRoiScaleFactor;
int mFinestScale;
bool mAligned;
protected:
// To prevent compiler warnings.
using nvinfer1::IPluginV2DynamicExt::canBroadcastInputAcrossBatch;
using nvinfer1::IPluginV2DynamicExt::configurePlugin;
using nvinfer1::IPluginV2DynamicExt::enqueue;
using nvinfer1::IPluginV2DynamicExt::getOutputDimensions;
using nvinfer1::IPluginV2DynamicExt::getWorkspaceSize;
using nvinfer1::IPluginV2DynamicExt::isOutputBroadcastAcrossBatch;
using nvinfer1::IPluginV2DynamicExt::supportsFormat;
};
class MultiLevelRoiAlignPluginDynamicCreator : public nvinfer1::IPluginCreator {
public:
MultiLevelRoiAlignPluginDynamicCreator();
const char *getPluginName() const override;
const char *getPluginVersion() const override;
const nvinfer1::PluginFieldCollection *getFieldNames() override;
nvinfer1::IPluginV2 *createPlugin(
const char *name, const nvinfer1::PluginFieldCollection *fc) override;
nvinfer1::IPluginV2 *deserializePlugin(const char *name,
const void *serialData,
size_t serialLength) override;
void setPluginNamespace(const char *pluginNamespace) override;
const char *getPluginNamespace() const override;
private:
nvinfer1::PluginFieldCollection mFC;
std::vector<nvinfer1::PluginField> mPluginAttributes;
std::string mNamespace;
};
#endif // TRT_ROI_ALIGN_HPP

View File

@ -0,0 +1,212 @@
#include <stdio.h>
#include <algorithm>
#include <cmath>
#include "common_cuda_helper.hpp"
#include "trt_cuda_helper.cuh"
#include "trt_multi_level_roi_align_kernel.hpp"
#include "trt_plugin_helper.hpp"
const int kMAX_FEATMAP_SIZE = 10;
struct FeatData {
const void *data[kMAX_FEATMAP_SIZE];
int batch_size;
int channels;
int h[kMAX_FEATMAP_SIZE];
int w[kMAX_FEATMAP_SIZE];
float spatial_scale[kMAX_FEATMAP_SIZE];
int num_featmap;
};
template <typename scalar_t>
__device__ scalar_t bilinear_interpolate(const scalar_t *bottom_data,
const int height, const int width,
scalar_t y, scalar_t x) {
// deal with cases that inverse elements are out of feature map boundary
if (y < -1.0 || y > height || x < -1.0 || x > width) {
return 0;
}
if (y <= 0) y = 0;
if (x <= 0) x = 0;
int y_low = (int)y;
int x_low = (int)x;
int y_high;
int x_high;
if (y_low >= height - 1) {
y_high = y_low = height - 1;
y = (scalar_t)y_low;
} else {
y_high = y_low + 1;
}
if (x_low >= width - 1) {
x_high = x_low = width - 1;
x = (scalar_t)x_low;
} else {
x_high = x_low + 1;
}
scalar_t ly = y - y_low;
scalar_t lx = x - x_low;
scalar_t hy = 1. - ly;
scalar_t hx = 1. - lx;
// do bilinear interpolation
scalar_t lt = bottom_data[y_low * width + x_low];
scalar_t rt = bottom_data[y_low * width + x_high];
scalar_t lb = bottom_data[y_high * width + x_low];
scalar_t rb = bottom_data[y_high * width + x_high];
scalar_t w1 = hy * hx, w2 = hy * lx, w3 = ly * hx, w4 = ly * lx;
scalar_t val = (w1 * lt + w2 * rt + w3 * lb + w4 * rb);
return val;
}
template <typename scalar_t>
__device__ scalar_t roi_align_single(
const scalar_t *bottom_data, const int roi_batch_ind,
const scalar_t roi_start_w, const scalar_t roi_start_h,
const scalar_t roi_end_w, const scalar_t roi_end_h,
const scalar_t spatial_scale, const int pw, const int ph, const int c,
const int sample_num, const int channels, const int height, const int width,
const int pooled_height, const int pooled_width, const bool aligned) {
// Force malformed ROIs to be 1x1
scalar_t roi_width = fmaxf((scalar_t)roi_end_w - (scalar_t)roi_start_w, 0.);
scalar_t roi_height = fmaxf((scalar_t)roi_end_h - (scalar_t)roi_start_h, 0.);
if (!aligned) {
roi_width = max(roi_width, (scalar_t)1.);
roi_height = max(roi_height, (scalar_t)1.);
}
const scalar_t bin_size_h = roi_height / pooled_height;
const scalar_t bin_size_w = roi_width / pooled_width;
const scalar_t *offset_bottom_data =
bottom_data + (roi_batch_ind * channels + c) * height * width;
int sample_num_h = (sample_num > 0)
? sample_num
: ceil(roi_height / pooled_height); // e.g., = 2
int sample_num_w =
(sample_num > 0) ? sample_num : ceil(roi_width / pooled_width);
scalar_t output_val = 0;
#pragma unroll
for (int iy = 0; iy < sample_num_h; iy++) {
const scalar_t y =
roi_start_h + ph * bin_size_h +
(scalar_t)(iy + scalar_t(.5f)) * bin_size_h / (scalar_t)(sample_num_h);
#pragma unroll
for (int ix = 0; ix < sample_num_w; ix++) {
const scalar_t x = roi_start_w + pw * bin_size_w +
(scalar_t)(ix + scalar_t(.5f)) * bin_size_w /
(scalar_t)(sample_num_w);
scalar_t val = bilinear_interpolate<scalar_t>(offset_bottom_data, height,
width, y, x);
output_val += val;
}
}
output_val /= max(sample_num_h * sample_num_w, 1);
return output_val;
}
template <typename scalar_t>
__global__ void roi_extractor_kernel(
scalar_t *output, const scalar_t *bottom_rois, FeatData feat_data,
const int sample_num, const float roi_scale_factor, const int finest_scale,
const int pooled_height, const int pooled_width, const bool aligned,
int nThreads) {
CUDA_1D_KERNEL_LOOP(index, nThreads) {
const int channels = feat_data.channels;
const int pw = index % pooled_width;
const int ph = (index / pooled_width) % pooled_height;
const int c = (index / pooled_width / pooled_height) % channels;
const int n = index / pooled_width / pooled_height / channels;
const scalar_t *offset_bottom_rois = bottom_rois + n * 5;
scalar_t roi_offset_x0 = offset_bottom_rois[1];
scalar_t roi_offset_y0 = offset_bottom_rois[2];
scalar_t roi_offset_x1 = offset_bottom_rois[3];
scalar_t roi_offset_y1 = offset_bottom_rois[4];
const scalar_t scale = sqrtf((roi_offset_y1 - roi_offset_y0) *
(roi_offset_x1 - roi_offset_x0));
const int target_lvls =
fminf(feat_data.num_featmap - 1,
fmaxf(0, floorf(log2f(scale / (scalar_t)(finest_scale) + 1e-6))));
if (roi_scale_factor > 0.) {
const scalar_t roi_off_cx = (roi_offset_x0 + roi_offset_x1) * 0.5;
const scalar_t roi_off_cy = (roi_offset_y0 + roi_offset_y1) * 0.5;
const scalar_t roi_off_w =
(roi_offset_x1 - roi_offset_x0 + 1) * roi_scale_factor;
const scalar_t roi_off_h =
(roi_offset_y1 - roi_offset_y0 + 1) * roi_scale_factor;
roi_offset_x0 = roi_off_cx - roi_off_w * 0.5 + 0.5;
roi_offset_x1 = roi_off_cx + roi_off_w * 0.5 - 0.5;
roi_offset_y0 = roi_off_cy - roi_off_h * 0.5 + 0.5;
roi_offset_y1 = roi_off_cy + roi_off_h * 0.5 - 0.5;
}
const scalar_t spatial_scale =
(scalar_t)feat_data.spatial_scale[target_lvls];
const int height = feat_data.h[target_lvls];
const int width = feat_data.w[target_lvls];
const scalar_t *bottom_data = (scalar_t *)feat_data.data[target_lvls];
const int roi_batch_ind = offset_bottom_rois[0];
const scalar_t offset = aligned ? (scalar_t)0.5 : (scalar_t)0.0;
const scalar_t roi_start_w = roi_offset_x0 * spatial_scale - offset;
const scalar_t roi_start_h = roi_offset_y0 * spatial_scale - offset;
const scalar_t roi_end_w = (roi_offset_x1)*spatial_scale - offset;
const scalar_t roi_end_h = (roi_offset_y1)*spatial_scale - offset;
const scalar_t output_val = roi_align_single<scalar_t>(
bottom_data, roi_batch_ind, roi_start_w, roi_start_h, roi_end_w,
roi_end_h, spatial_scale, pw, ph, c, sample_num, channels, height,
width, pooled_height, pooled_width, aligned);
output[index] = output_val;
}
}
template <typename T>
void multi_level_roi_align(T *output, const T *rois, int num_rois,
const void *const *feats, int num_feats, int n,
int c, int *h, int *w, float *strides,
int aligned_height, int aligned_width,
int sample_num, float roi_scale_factor,
int finest_scale, bool aligned,
cudaStream_t stream) {
FeatData feat_data;
feat_data.batch_size = n;
feat_data.channels = c;
feat_data.num_featmap = num_feats;
for (int i = 0; i < num_feats; ++i) {
feat_data.data[i] = feats[i];
feat_data.h[i] = h[i];
feat_data.w[i] = w[i];
feat_data.spatial_scale[i] = 1. / float(strides[i]);
}
int nThreads = num_rois * c * aligned_height * aligned_width;
// bool aligned = true;
roi_extractor_kernel<T>
<<<GET_BLOCKS(nThreads), THREADS_PER_BLOCK, 0, stream>>>(
output, rois, feat_data, sample_num, roi_scale_factor, finest_scale,
aligned_height, aligned_width, aligned, nThreads);
}
template void multi_level_roi_align<float>(
float *output, const float *rois, int num_rois, const void *const *feats,
int num_feats, int n, int c, int *h, int *w, float *strides,
int aligned_height, int aligned_width, int sample_num,
float roi_scale_factor, int finest_scale, bool aligned,
cudaStream_t stream);

View File

@ -0,0 +1,13 @@
#ifndef TRT_MULTI_LEVEL_ROI_ALIGN_KERNEL_HPP
#define TRT_MULTI_LEVEL_ROI_ALIGN_KERNEL_HPP
#include <cuda_runtime.h>
template <typename T>
void multi_level_roi_align(T *output, const T *rois, int num_rois,
const void *const *feats, int num_feats, int n,
int c, int *h, int *w, float *strides,
int aligned_height, int aligned_width,
int sample_num, float roi_scale_factor,
int finest_scale, bool aligned, cudaStream_t stream);
#endif // TRT_MULTI_LEVEL_ROI_ALIGN_KERNEL_HPP

View File

@ -24,10 +24,6 @@ static const char *PLUGIN_VERSION{"1"};
static const char *PLUGIN_NAME{"NonMaxSuppression"};
} // namespace
nvinfer1::PluginFieldCollection NonMaxSuppressionDynamicCreator::mFC{};
std::vector<nvinfer1::PluginField>
NonMaxSuppressionDynamicCreator::mPluginAttributes;
NonMaxSuppressionDynamic::NonMaxSuppressionDynamic(
const std::string &name, int centerPointBox, int maxOutputBoxesPerClass,
float iouThreshold, float scoreThreshold, int offset)
@ -168,8 +164,10 @@ int NonMaxSuppressionDynamic::initialize() { return 0; }
void NonMaxSuppressionDynamic::terminate() {}
size_t NonMaxSuppressionDynamic::getSerializationSize() const {
return sizeof(mCenterPointBox) + sizeof(mMaxOutputBoxesPerClass) +
sizeof(mIouThreshold) + sizeof(mScoreThreshold) + sizeof(mOffset);
return serialized_size(mCenterPointBox) +
serialized_size(mMaxOutputBoxesPerClass) +
serialized_size(mIouThreshold) + serialized_size(mScoreThreshold) +
serialized_size(mOffset);
}
void NonMaxSuppressionDynamic::serialize(void *buffer) const {
@ -276,3 +274,5 @@ void NonMaxSuppressionDynamicCreator::setPluginNamespace(
const char *NonMaxSuppressionDynamicCreator::getPluginNamespace() const {
return mNamespace.c_str();
}
REGISTER_TENSORRT_PLUGIN(NonMaxSuppressionDynamicCreator);

View File

@ -100,8 +100,8 @@ class NonMaxSuppressionDynamicCreator : public nvinfer1::IPluginCreator {
const char *getPluginNamespace() const override;
private:
static nvinfer1::PluginFieldCollection mFC;
static std::vector<nvinfer1::PluginField> mPluginAttributes;
nvinfer1::PluginFieldCollection mFC;
std::vector<nvinfer1::PluginField> mPluginAttributes;
std::string mNamespace;
};
#endif // TRT_NMS_HPP

View File

@ -9,8 +9,8 @@
#include <vector>
#include "common_cuda_helper.hpp"
#include "nms_cuda_kernel.cuh"
#include "trt_cuda_helper.cuh"
#include "trt_nms_kernel.cuh"
#include "trt_plugin_helper.hpp"
struct NMSBox {
@ -72,8 +72,8 @@ __global__ void mask_to_output_kernel(const unsigned long long* dev_mask,
int start = *output_count;
int out_per_class_count = 0;
for (int i = 0; i < spatial_dimension; i++) {
const int nblock = i / threadsPerBlock;
const int inblock = i % threadsPerBlock;
const int nblock = i / THREADS_PER_BLOCK;
const int inblock = i % THREADS_PER_BLOCK;
if (!(remv[nblock] & (1ULL << inblock))) {
if (threadIdx.x == 0) {
output[start * 3 + 0] = batch_id;
@ -113,7 +113,7 @@ size_t get_onnxnms_workspace_size(size_t num_batches, size_t spatial_dimension,
size_t scores_workspace = getAlignedSize(spatial_dimension * boxes_word_size);
size_t boxes_workspace =
getAlignedSize(spatial_dimension * 4 * boxes_word_size);
const int col_blocks = DIVUP(spatial_dimension, threadsPerBlock);
const int col_blocks = DIVUP(spatial_dimension, THREADS_PER_BLOCK);
size_t mask_workspace = getAlignedSize(spatial_dimension * col_blocks *
sizeof(unsigned long long));
size_t index_template_workspace =
@ -162,7 +162,7 @@ void TRTNMSCUDAKernelLauncher_float(const float* boxes, const float* scores,
size_t output_length, void* workspace,
cudaStream_t stream) {
using mmlab::getAlignedSize;
const int col_blocks = DIVUP(spatial_dimension, threadsPerBlock);
const int col_blocks = DIVUP(spatial_dimension, THREADS_PER_BLOCK);
float* boxes_sorted = (float*)workspace;
workspace = static_cast<char*>(workspace) +
getAlignedSize(spatial_dimension * 4 * sizeof(float));
@ -214,7 +214,7 @@ void TRTNMSCUDAKernelLauncher_float(const float* boxes, const float* scores,
cudaCheckError();
dim3 blocks(col_blocks, col_blocks);
dim3 threads(threadsPerBlock);
dim3 threads(THREADS_PER_BLOCK);
for (int batch_id = 0; batch_id < num_batches; ++batch_id) {
for (int cls_id = 0; cls_id < num_classes; ++cls_id) {
@ -261,7 +261,7 @@ void TRTNMSCUDAKernelLauncher_float(const float* boxes, const float* scores,
offset, boxes_sorted, dev_mask);
// will be performed when dev_mask is full.
mask_to_output_kernel<<<1, threadsPerBlock,
mask_to_output_kernel<<<1, THREADS_PER_BLOCK,
col_blocks * sizeof(unsigned long long),
stream>>>(
dev_mask, index_cache, output, output_count, batch_id, cls_id,

View File

@ -17,10 +17,6 @@ static const char *PLUGIN_VERSION{"1"};
static const char *PLUGIN_NAME{"MMCVRoiAlign"};
} // namespace
nvinfer1::PluginFieldCollection RoIAlignPluginDynamicCreator::mFC{};
std::vector<nvinfer1::PluginField>
RoIAlignPluginDynamicCreator::mPluginAttributes;
RoIAlignPluginDynamic::RoIAlignPluginDynamic(const std::string &name,
int outWidth, int outHeight,
float spatialScale,
@ -162,8 +158,9 @@ int RoIAlignPluginDynamic::initialize() { return 0; }
void RoIAlignPluginDynamic::terminate() {}
size_t RoIAlignPluginDynamic::getSerializationSize() const {
return sizeof(mOutWidth) + sizeof(mOutHeight) + sizeof(mSpatialScale) +
sizeof(mSampleRatio) + sizeof(mPoolMode) + sizeof(mAligned);
return serialized_size(mOutWidth) + serialized_size(mOutHeight) +
serialized_size(mSpatialScale) + serialized_size(mSampleRatio) +
serialized_size(mPoolMode) + serialized_size(mAligned);
}
void RoIAlignPluginDynamic::serialize(void *buffer) const {
@ -291,3 +288,5 @@ void RoIAlignPluginDynamicCreator::setPluginNamespace(
const char *RoIAlignPluginDynamicCreator::getPluginNamespace() const {
return mNamespace.c_str();
}
REGISTER_TENSORRT_PLUGIN(RoIAlignPluginDynamicCreator);

View File

@ -101,8 +101,8 @@ class RoIAlignPluginDynamicCreator : public nvinfer1::IPluginCreator {
const char *getPluginNamespace() const override;
private:
static nvinfer1::PluginFieldCollection mFC;
static std::vector<nvinfer1::PluginField> mPluginAttributes;
nvinfer1::PluginFieldCollection mFC;
std::vector<nvinfer1::PluginField> mPluginAttributes;
std::string mNamespace;
};
#endif // TRT_ROI_ALIGN_HPP

View File

@ -1,5 +1,5 @@
#include "common_cuda_helper.hpp"
#include "roi_align_cuda_kernel.cuh"
#include "trt_roi_align_kernel.cuh"
template <typename scalar_t>
void TRTRoIAlignForwardCUDAKernelLauncher(

View File

@ -5,27 +5,14 @@
#include <chrono>
#include "trt_scatternd_kernel.hpp"
#include "trt_serialize.hpp"
extern void TRTONNXScatterNDKernelLauncher_float(
const float *data, const int *indices, const float *update, const int *dims,
int nbDims, const int *indices_dims, int indice_nbDims, float *output,
cudaStream_t stream);
extern void TRTONNXScatterNDKernelLauncher_int32(
const int *data, const int *indices, const int *update, const int *dims,
int nbDims, const int *indices_dims, int indice_nbDims, int *output,
cudaStream_t stream);
namespace {
static const char *PLUGIN_VERSION{"1"};
static const char *PLUGIN_NAME{"ScatterND"};
} // namespace
nvinfer1::PluginFieldCollection ONNXScatterNDDynamicCreator::mFC{};
std::vector<nvinfer1::PluginField>
ONNXScatterNDDynamicCreator::mPluginAttributes;
ONNXScatterNDDynamic::ONNXScatterNDDynamic(const std::string &name)
: mLayerName(name) {}
@ -110,13 +97,13 @@ int ONNXScatterNDDynamic::enqueue(const nvinfer1::PluginTensorDesc *inputDesc,
switch (data_type) {
case nvinfer1::DataType::kFLOAT:
TRTONNXScatterNDKernelLauncher_float(
TRTONNXScatterNDKernelLauncher<float>(
(float *)data, (int *)indices, (float *)update, dims, nbDims,
indices_dims, indice_nbDims, (float *)output, stream);
break;
case nvinfer1::DataType::kINT32:
TRTONNXScatterNDKernelLauncher_int32(
TRTONNXScatterNDKernelLauncher<int>(
(int *)data, (int *)indices, (int *)update, dims, nbDims,
indices_dims, indice_nbDims, (int *)output, stream);
break;
@ -204,3 +191,5 @@ void ONNXScatterNDDynamicCreator::setPluginNamespace(const char *libNamespace) {
const char *ONNXScatterNDDynamicCreator::getPluginNamespace() const {
return mNamespace.c_str();
}
REGISTER_TENSORRT_PLUGIN(ONNXScatterNDDynamicCreator);

View File

@ -91,8 +91,8 @@ class ONNXScatterNDDynamicCreator : public nvinfer1::IPluginCreator {
const char *getPluginNamespace() const override;
private:
static nvinfer1::PluginFieldCollection mFC;
static std::vector<nvinfer1::PluginField> mPluginAttributes;
nvinfer1::PluginFieldCollection mFC;
std::vector<nvinfer1::PluginField> mPluginAttributes;
std::string mNamespace;
};
#endif // TRT_SCATTERND_HPP

View File

@ -6,8 +6,6 @@
#include "trt_cuda_helper.cuh"
#include "trt_plugin_helper.hpp"
static int const threadsPerBlock = sizeof(unsigned long long int) * 8;
using mmlab::TensorDesc;
template <typename T>
@ -66,27 +64,17 @@ void TRTONNXScatterNDKernelLauncher(const T* data, const int* indices,
num_update_indice *= indice_desc.shape[i];
}
// scatter
const int col_block = DIVUP(num_update_indice, threadsPerBlock);
onnx_scatternd_kernel<<<col_block, threadsPerBlock, 0, stream>>>(
const int col_block = DIVUP(num_update_indice, THREADS_PER_BLOCK);
onnx_scatternd_kernel<<<col_block, THREADS_PER_BLOCK, 0, stream>>>(
num_update_indice, indices, update, output, tensor_desc, indice_desc);
}
void TRTONNXScatterNDKernelLauncher_float(const float* data, const int* indices,
const float* update, const int* dims,
int nbDims, const int* indices_dims,
int indice_nbDims, float* output,
cudaStream_t stream) {
TRTONNXScatterNDKernelLauncher<float>(data, indices, update, dims, nbDims,
indices_dims, indice_nbDims, output,
stream);
}
template void TRTONNXScatterNDKernelLauncher<float>(
const float* data, const int* indices, const float* update, const int* dims,
int nbDims, const int* indices_dims, int indice_nbDims, float* output,
cudaStream_t stream);
void TRTONNXScatterNDKernelLauncher_int32(const int* data, const int* indices,
const int* update, const int* dims,
int nbDims, const int* indices_dims,
int indice_nbDims, int* output,
cudaStream_t stream) {
TRTONNXScatterNDKernelLauncher<int>(data, indices, update, dims, nbDims,
indices_dims, indice_nbDims, output,
stream);
}
template void TRTONNXScatterNDKernelLauncher<int>(
const int* data, const int* indices, const int* update, const int* dims,
int nbDims, const int* indices_dims, int indice_nbDims, int* output,
cudaStream_t stream);

View File

@ -0,0 +1,12 @@
#ifndef TRT_SCATTERND_KERNEL_HPP
#define TRT_SCATTERND_KERNEL_HPP
#include <cuda_runtime.h>
template <typename T>
void TRTONNXScatterNDKernelLauncher(const T* data, const int* indices,
const T* update, const int* dims,
int nbDims, const int* indices_dims,
int indice_nbDims, T* output,
cudaStream_t stream);
#endif // TRT_SCATTERND_KERNEL_HPP

View File

@ -1,13 +0,0 @@
#include "batched_nms/trt_batched_nms.hpp"
#include "nms/trt_nms.hpp"
#include "roi_align/trt_roi_align.hpp"
#include "scatternd/trt_scatternd.hpp"
REGISTER_TENSORRT_PLUGIN(TRTBatchedNMSPluginDynamicCreator);
REGISTER_TENSORRT_PLUGIN(NonMaxSuppressionDynamicCreator);
REGISTER_TENSORRT_PLUGIN(ONNXScatterNDDynamicCreator);
REGISTER_TENSORRT_PLUGIN(RoIAlignPluginDynamicCreator);
extern "C" {
bool initLibMMCVInferPlugins() { return true; }
} // extern "C"

View File

@ -0,0 +1 @@
from .roi_extractors import * # noqa: F401, F403

View File

@ -0,0 +1 @@
from .base_roi_extractor import * # noqa: F401, F403

View File

@ -0,0 +1,72 @@
from torch.autograd import Function
from mmdeploy.utils import FUNCTION_REWRITERS
class MultiLevelRoiAlign(Function):
def __init__(self) -> None:
super().__init__()
@staticmethod
def symbolic(g, *args):
aligned = args[-1]
featmap_strides = args[-2]
finest_scale = args[-3]
roi_scale_factor = args[-4]
sampling_ratio = args[-5]
output_size = args[-6]
inputs = args[:len(featmap_strides)]
rois = args[len(featmap_strides)]
return g.op(
'mmlab::MMCVMultiLevelRoiAlign',
rois,
*inputs,
output_height_i=output_size[1],
output_width_i=output_size[0],
sampling_ratio_i=sampling_ratio,
roi_scale_factor_f=roi_scale_factor,
finest_scale_i=finest_scale,
featmap_strides_f=featmap_strides,
aligned_i=aligned)
@staticmethod
def forward(g, *args):
# aligned = args[-1]
featmap_strides = args[-2]
# finest_scale = args[-3]
# roi_scale_factor = args[-4]
# sampling_ratio = args[-5]
output_size = args[-6]
inputs = args[:len(featmap_strides)]
rois = args[len(featmap_strides)]
num_proposals = rois.shape[0]
channel = inputs[0].shape[1]
return rois.new_zeros(
(num_proposals, channel, output_size[1], output_size[0]))
@FUNCTION_REWRITERS.register_rewriter(
func_name='mmdet.models.roi_heads.SingleRoIExtractor.forward',
backend='tensorrt')
def SingleRoIExtractor_forward_static(rewriter,
self,
feats,
rois,
roi_scale_factor=None):
featmap_strides = self.featmap_strides
finest_scale = self.finest_scale
roi_layer = self.roi_layers[0]
out_size = roi_layer.output_size
sampling_ratio = roi_layer.sampling_ratio
aligned = roi_layer.aligned
if roi_scale_factor is None:
roi_scale_factor = 1.0
featmap_strides = [float(s) for s in featmap_strides]
return MultiLevelRoiAlign.apply(*feats, rois, out_size, sampling_ratio,
roi_scale_factor, finest_scale,
featmap_strides, aligned)