[Feature] Merge NCNN deployment to grimoire based on mmcls - revert [#25](https://github.com/grimoire/deploy_prototype/pull/25) (#30)
* add * change VulkanSDK to 1.2.176.1 * add ncnn cmakelist * add ncnn source code as third party * add all ncnn * ncnn compile passed * onnx2ncnn correctly * fix code style * merge_as_grimoire_design, only backend_ops, manually register. * remove data and test sh * remove build example * remove config ncnn * remove onnx2ncnn intermediate files * remove other files auto-generated * remove vulkan tools * remove Vulkan, gitignore new rules, __init__ new lines * rollback __init__ to grimoire * remove pytorch version pending * grimoire comments reply 1, 3, 4 * reply comment 5,6,7 * add auto definer, add python register * fix lint * add ncnn deploy support * add model_wrapper, fix a typo bug, and add code comment for onnx2ncnn(WIP) * add model wrapper ncnn * fix lint * fix pep8 * fix pre-commit-config.yaml paths * fix import * fix lint * remove sys.path.append * remove sys * isort fix * fix double quoted * fix trailing space * try fix isort * fix clang-format-9 * fix requests * fix all comments * Fix typo * test code for grimoire * fix ops register * new definere * fix visualization of mmcls * remove temp * fix flake8 * fix seed-isort-config * fix thirdparty * fix thirdparty * fix yapf * fix third_party_sort * fix third party * fix clang-format * try fix clang-format * try to fix clang format 9 customreshape * try fix clang-format-9 * try fix clang-format-9 * try fix clang-format-9 * try fix ext * fix onnx2ncnn * Fix comments * Fix Comments * Fix Comments * Fix Comments * Fix conflict * Fix flake8 * Update .isort.cfg * Update ncnn_ext.cpp * Update ncnn_ext.cpp * fix missing ncnn backend code * delete out of date comments of gather.cpp * add DeployBaseClassifier * add return -100 error * clear out-of-date to do comments Co-authored-by: 韩睿 <SENSETIME\hanrui1@cn0614008774l.domain.sensetime.com> Co-authored-by: grimoire <yaoqian@sensetime.com> Co-authored-by: grimoire <streetyao@live.com>pull/12/head
parent
f607f1965b
commit
e05521c933
|
@ -1,2 +1,2 @@
|
|||
[settings]
|
||||
known_third_party = mmcls,mmcv,mmdet,numpy,onnx,onnxruntime,packaging,pytest,setuptools,tensorrt,torch
|
||||
known_third_party = mmcls,mmcv,mmdet,numpy,onnx,packaging,pytest,setuptools,tensorrt,torch
|
||||
|
|
|
@ -26,4 +26,18 @@ if (BUILD_TENSORRT_OPS)
|
|||
endif()
|
||||
endif()
|
||||
|
||||
# NCNN config
|
||||
|
||||
# enable ncnn
|
||||
option(BUILD_NCNN_OPS "enable NCNN ops" OFF)
|
||||
# NCNN search path
|
||||
if (BUILD_NCNN_OPS)
|
||||
if (NOT DEFINED NCNN_DIR)
|
||||
set(NCNN_DIR $ENV{NCNN_DIR})
|
||||
endif()
|
||||
if (NOT NCNN_DIR)
|
||||
message(ERROR " NCNN_DIR is not found.")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
add_subdirectory (backend_ops)
|
||||
|
|
|
@ -16,3 +16,9 @@ if (BUILD_TENSORRT_OPS)
|
|||
message("Build TensorRT custom ops.")
|
||||
add_subdirectory (tensorrt)
|
||||
endif()
|
||||
|
||||
# build NCNN ops
|
||||
if (BUILD_NCNN_OPS)
|
||||
message("Build NCNN custom ops")
|
||||
add_subdirectory (ncnn)
|
||||
endif()
|
||||
|
|
|
@ -0,0 +1,16 @@
|
|||
set(TARGET_NAME mmlab_ncnn_ops)
|
||||
set(SHARED_TARGET ${TARGET_NAME})
|
||||
|
||||
# ncnn
|
||||
set(ncnn_DIR ${NCNN_DIR}/build/install/lib/cmake/ncnn)
|
||||
find_package(ncnn)
|
||||
|
||||
if (ncnn_FOUND)
|
||||
message(STATUS "ncnn library found!")
|
||||
else ()
|
||||
message(FATAL_ERROR "Could not locate ncnn" \n)
|
||||
endif()
|
||||
|
||||
add_subdirectory (ops)
|
||||
add_subdirectory (onnx2ncnn)
|
||||
add_subdirectory (pyncnn_ext)
|
|
@ -0,0 +1,15 @@
|
|||
|
||||
find_package(Protobuf)
|
||||
|
||||
if(PROTOBUF_FOUND)
|
||||
protobuf_generate_cpp(ONNX_PROTO_SRCS ONNX_PROTO_HDRS ${NCNN_DIR}/tools/onnx/onnx.proto)
|
||||
add_executable(onnx2ncnn onnx2ncnn.cpp ${ONNX_PROTO_SRCS} ${ONNX_PROTO_HDRS})
|
||||
target_include_directories(onnx2ncnn
|
||||
PRIVATE
|
||||
${PROTOBUF_INCLUDE_DIR}
|
||||
${CMAKE_CURRENT_BINARY_DIR})
|
||||
target_link_libraries(onnx2ncnn PRIVATE ${PROTOBUF_LIBRARIES})
|
||||
|
||||
else()
|
||||
message(FATAL_ERROR "Protobuf not found, onnx model convert tool won't be built")
|
||||
endif()
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,15 @@
|
|||
# add plugin source
|
||||
set(PLUGIN_LISTS custom_reshape
|
||||
gather
|
||||
shape)
|
||||
|
||||
foreach(PLUGIN_ITER ${PLUGIN_LISTS})
|
||||
file(GLOB PLUGIN_OPS_SRCS ${PLUGIN_ITER}/*.cpp)
|
||||
file(GLOB PLUGIN_OPS_HEADS ${PLUGIN_ITER}/*.h)
|
||||
set(BACKEND_OPS_SRCS ${BACKEND_OPS_SRCS} ${PLUGIN_OPS_SRCS} ${PLUGIN_OPS_HEADS})
|
||||
endforeach(PLUGIN_ITER)
|
||||
|
||||
set(BACKEND_OPS_SRCS ${BACKEND_OPS_SRCS} ncnn_ops_register.cpp)
|
||||
|
||||
add_library(${SHARED_TARGET} SHARED ${BACKEND_OPS_SRCS})
|
||||
target_link_libraries(${SHARED_TARGET} ncnn)
|
|
@ -0,0 +1,217 @@
|
|||
#include "custom_reshape.h"
|
||||
|
||||
#include "../ncnn_ops_definer.h"
|
||||
|
||||
namespace mmlab {
|
||||
using namespace ncnn;
|
||||
DEFINE_LAYER_CREATOR(CustomReshape)
|
||||
DEFINE_NCNN_OPS(CustomReshape, CustomReshape)
|
||||
CustomReshape::CustomReshape() {
|
||||
one_blob_only = false;
|
||||
support_inplace = false;
|
||||
}
|
||||
|
||||
int CustomReshape::load_param(const ParamDict &pd) {
|
||||
permute = pd.get(0, 0);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int CustomReshape::forward(const std::vector<Mat> &bottom_blobs,
|
||||
std::vector<Mat> &top_blobs,
|
||||
const Option &opt) const {
|
||||
const Mat &bottom_blob = bottom_blobs[0];
|
||||
Mat &top_blob = top_blobs[0];
|
||||
int ndim = bottom_blobs[1].w;
|
||||
int w = 0;
|
||||
int h = 0;
|
||||
int c = 0;
|
||||
if (ndim == 1) {
|
||||
w = (int)(bottom_blobs[1].row(0)[0] + 0.5);
|
||||
}
|
||||
if (ndim == 2) {
|
||||
h = (int)(bottom_blobs[1].row(0)[0] + 0.5);
|
||||
w = (int)(bottom_blobs[1].row(0)[1] + 0.5);
|
||||
}
|
||||
if (ndim == 3) {
|
||||
c = (int)(bottom_blobs[1].row(0)[0] + 0.5);
|
||||
h = (int)(bottom_blobs[1].row(0)[1] + 0.5);
|
||||
w = (int)(bottom_blobs[1].row(0)[2] + 0.5);
|
||||
}
|
||||
|
||||
size_t elemsize = bottom_blob.elemsize;
|
||||
int total = bottom_blob.w * bottom_blob.h * bottom_blob.c;
|
||||
|
||||
int dims = bottom_blob.dims;
|
||||
|
||||
// resolve out shape
|
||||
int outw = w;
|
||||
int outh = h;
|
||||
int outc = c;
|
||||
|
||||
if (ndim == 1) {
|
||||
if (outw == 0)
|
||||
outw = bottom_blob.w;
|
||||
|
||||
else if (outw == -1)
|
||||
outw = total;
|
||||
|
||||
else {
|
||||
fprintf(stderr,
|
||||
"Warning: custom shape memory maybe invalid, using "
|
||||
"bottom_blob shape!\n");
|
||||
outw = bottom_blob.w;
|
||||
}
|
||||
|
||||
if (dims == 1 && bottom_blob.w == outw) {
|
||||
top_blob = bottom_blob;
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
if (ndim == 2) {
|
||||
if (outw == 0) outw = bottom_blob.w;
|
||||
if (outh == 0) outh = bottom_blob.h;
|
||||
|
||||
if (outw == -1) outw = total / outh;
|
||||
if (outh == -1) outh = total / outw;
|
||||
|
||||
if (dims == 2 && bottom_blob.h == outh) {
|
||||
top_blob = bottom_blob;
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
if (ndim == 3) {
|
||||
if (outw == 0) outw = bottom_blob.w;
|
||||
if (outh == 0) outh = bottom_blob.h;
|
||||
if (outc == 0) outc = bottom_blob.c;
|
||||
|
||||
if (outw == -1) outw = total / outc / outh;
|
||||
if (outh == -1) outh = total / outc / outw;
|
||||
if (outc == -1) outc = total / outh / outw;
|
||||
|
||||
if (dims == 3 && bottom_blob.c == outc) {
|
||||
top_blob = bottom_blob;
|
||||
top_blob.w = outw;
|
||||
top_blob.h = outh;
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
bool need_permute = permute == 1;
|
||||
if (dims == 2 && ndim == 2 && bottom_blob.h == outh) need_permute = false;
|
||||
if (dims == 3 && ndim == 3 && bottom_blob.c == outc) need_permute = false;
|
||||
|
||||
if (need_permute) {
|
||||
Mat bottom_blob_permuted = bottom_blob;
|
||||
|
||||
if (dims == 2) {
|
||||
// hw -> wh
|
||||
int _w = bottom_blob.w;
|
||||
int _h = bottom_blob.h;
|
||||
|
||||
bottom_blob_permuted.create(_h, _w, elemsize, opt.workspace_allocator);
|
||||
if (bottom_blob_permuted.empty()) return -100;
|
||||
const float *ptr = bottom_blob;
|
||||
float *outptr = bottom_blob_permuted;
|
||||
|
||||
for (int i = 0; i < _w; i++) {
|
||||
for (int j = 0; j < _h; j++) {
|
||||
outptr[i * _h + j] = ptr[j * _w + i];
|
||||
}
|
||||
}
|
||||
}
|
||||
if (dims == 3) {
|
||||
// chw -> hwc
|
||||
int _w = bottom_blob.w;
|
||||
int _h = bottom_blob.h;
|
||||
int channels = bottom_blob.c;
|
||||
|
||||
bottom_blob_permuted.create(channels, _w, _h, elemsize,
|
||||
opt.workspace_allocator);
|
||||
if (bottom_blob_permuted.empty()) return -100;
|
||||
|
||||
#pragma omp parallel for num_threads(opt.num_threads)
|
||||
for (int q = 0; q < _h; q++) {
|
||||
float *outptr = bottom_blob_permuted.channel(q);
|
||||
|
||||
for (int i = 0; i < _w; i++) {
|
||||
for (int j = 0; j < channels; j++) {
|
||||
const float *ptr = bottom_blob.channel(j).row(q);
|
||||
outptr[i * channels + j] = ptr[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (ndim == 1) {
|
||||
top_blob = bottom_blob_permuted.reshape(outw, opt.blob_allocator);
|
||||
if (top_blob.empty()) return -100;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// permute on nhwc/nhc
|
||||
Mat top_blob_permuted;
|
||||
if (ndim == 2) {
|
||||
top_blob_permuted =
|
||||
bottom_blob_permuted.reshape(outh, outw, opt.workspace_allocator);
|
||||
}
|
||||
if (ndim == 3) {
|
||||
top_blob_permuted = bottom_blob_permuted.reshape(outc, outw, outh,
|
||||
opt.workspace_allocator);
|
||||
}
|
||||
|
||||
if (top_blob_permuted.empty()) return -100;
|
||||
|
||||
if (ndim == 2) {
|
||||
// wh -> hw
|
||||
top_blob.create(outw, outh, elemsize, opt.blob_allocator);
|
||||
if (top_blob.empty()) return -100;
|
||||
|
||||
const float *ptr = top_blob_permuted;
|
||||
float *outptr = top_blob;
|
||||
|
||||
for (int i = 0; i < outh; i++) {
|
||||
for (int j = 0; j < outw; j++) {
|
||||
outptr[i * outw + j] = ptr[j * outh + i];
|
||||
}
|
||||
}
|
||||
}
|
||||
if (ndim == 3) {
|
||||
// chw -> hwc
|
||||
top_blob.create(outw, outh, outc, elemsize, opt.blob_allocator);
|
||||
if (top_blob.empty()) return -100;
|
||||
|
||||
#pragma omp parallel for num_threads(opt.num_threads)
|
||||
for (int q = 0; q < outc; q++) {
|
||||
float *outptr = top_blob.channel(q);
|
||||
|
||||
for (int i = 0; i < outh; i++) {
|
||||
const float *ptr = top_blob_permuted.channel(i);
|
||||
|
||||
for (int j = 0; j < outw; j++) {
|
||||
outptr[i * outw + j] = ptr[j * outc + q];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (ndim == 1) {
|
||||
top_blob = bottom_blob.reshape(outw, opt.blob_allocator);
|
||||
}
|
||||
if (ndim == 2) {
|
||||
top_blob = bottom_blob.reshape(outw, outh, opt.blob_allocator);
|
||||
}
|
||||
if (ndim == 3) {
|
||||
top_blob = bottom_blob.reshape(outw, outh, outc, opt.blob_allocator);
|
||||
}
|
||||
|
||||
if (top_blob.empty()) return -100;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
} // namespace mmlab
|
|
@ -0,0 +1,30 @@
|
|||
#ifndef LAYER_CUSTOMRESHAPE_H
|
||||
#define LAYER_CUSTOMRESHAPE_H
|
||||
|
||||
#include "layer.h"
|
||||
|
||||
namespace mmlab {
|
||||
|
||||
class CustomReshape : public ncnn::Layer {
|
||||
public:
|
||||
CustomReshape();
|
||||
|
||||
virtual int load_param(const ncnn::ParamDict& pd);
|
||||
|
||||
virtual int forward(const std::vector<ncnn::Mat>& bottom_blobs,
|
||||
std::vector<ncnn::Mat>& top_blobs,
|
||||
const ncnn::Option& opt) const;
|
||||
|
||||
public:
|
||||
// reshape flag
|
||||
// 0 = copy from bottom
|
||||
// -1 = remaining
|
||||
// -233 = drop this dim (default)
|
||||
|
||||
// flag permute chw->hwc or hw->wh before and after reshape
|
||||
int permute;
|
||||
};
|
||||
|
||||
} // namespace mmlab
|
||||
|
||||
#endif // LAYER_CUSTOMRESHAPE_H
|
|
@ -0,0 +1,246 @@
|
|||
#include "gather.h"
|
||||
|
||||
#include "../ncnn_ops_definer.h"
|
||||
|
||||
namespace mmlab {
|
||||
using namespace ncnn;
|
||||
DEFINE_LAYER_CREATOR(Gather)
|
||||
DEFINE_NCNN_OPS(Gather, Gather)
|
||||
Gather::Gather() {
|
||||
one_blob_only = false;
|
||||
support_inplace = false;
|
||||
}
|
||||
|
||||
int Gather::load_param(const ParamDict &pd) {
|
||||
axis = pd.get(0, 0);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int Gather::forward(const std::vector<Mat> &bottom_blobs,
|
||||
std::vector<Mat> &top_blobs, const Option &opt) const {
|
||||
const Mat &bottom_blob = bottom_blobs[0];
|
||||
const Mat &indices = bottom_blobs[1];
|
||||
int dims = bottom_blob.dims;
|
||||
int indices_dims = indices.dims;
|
||||
size_t elemsize = bottom_blob.elemsize;
|
||||
int positive_axis = axis < 0 ? dims + axis : axis;
|
||||
Mat &top_blob = top_blobs[0];
|
||||
|
||||
const float *indices_ptr = indices;
|
||||
|
||||
if (dims == 1 && indices_dims == 1) // positive_axis == 0
|
||||
{
|
||||
int w = indices.w;
|
||||
top_blob.create(w, elemsize, opt.blob_allocator);
|
||||
if (top_blob.empty()) {
|
||||
return -100;
|
||||
}
|
||||
const float *ptr = bottom_blob;
|
||||
float *outptr = top_blob;
|
||||
for (int i = 0; i < w; i++) {
|
||||
float indice = indices_ptr[i];
|
||||
outptr[i] = ptr[(int)(indice + 0.5)];
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (dims == 1 && indices_dims == 2) // positive_axis == 0
|
||||
{
|
||||
int w = indices.w;
|
||||
int h = indices.h;
|
||||
top_blob.create(w, h, elemsize, opt.blob_allocator);
|
||||
if (top_blob.empty()) {
|
||||
return -100;
|
||||
}
|
||||
const float *ptr = bottom_blob;
|
||||
float *outptr = top_blob;
|
||||
for (int j = 0; j < h; j++) {
|
||||
for (int i = 0; i < w; i++) {
|
||||
int indice = (int)(indices_ptr[j * w + i] + 0.5);
|
||||
outptr[j * w + i] = ptr[indice];
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
if (dims == 1 && indices_dims == 3) // positive_axis == 0
|
||||
{
|
||||
int c = indices.c;
|
||||
int w = indices.w;
|
||||
int h = indices.h;
|
||||
top_blob.create(c, w, h, elemsize, opt.blob_allocator);
|
||||
if (top_blob.empty()) {
|
||||
return -100;
|
||||
}
|
||||
const float *ptr = bottom_blob;
|
||||
|
||||
for (int page = 0; page < c; page++) {
|
||||
indices_ptr = indices.channel(page);
|
||||
float *outptr = top_blob.channel(page);
|
||||
for (int j = 0; j < h; j++) {
|
||||
for (int i = 0; i < w; i++) {
|
||||
int indice = (int)(indices_ptr[j * w + i] + 0.5);
|
||||
outptr[j * w + i] = ptr[indice];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (dims == 2 && positive_axis == 0 && indices_dims == 1) {
|
||||
int w = bottom_blob.w;
|
||||
int h = bottom_blob.h;
|
||||
top_blob.create(w, indices.w, elemsize, opt.blob_allocator);
|
||||
// w -> w
|
||||
// h -> indices.w
|
||||
// h * w -> indices.w * w
|
||||
if (top_blob.empty()) {
|
||||
return -100;
|
||||
}
|
||||
const float *ptr = bottom_blob;
|
||||
float *outptr = top_blob;
|
||||
for (int i = 0; i < indices.w; i++) {
|
||||
for (int j = 0; j < w; j++) {
|
||||
int selected = (float)(indices_ptr[i] + 0.5);
|
||||
outptr[i * w + j] = ptr[selected * w + j];
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (dims == 2 && positive_axis == 1 && indices_dims == 1) {
|
||||
int w = bottom_blob.w;
|
||||
int h = bottom_blob.h;
|
||||
top_blob.create(h, indices.w, elemsize, opt.blob_allocator);
|
||||
// w -> h
|
||||
// h -> indices.w
|
||||
// h * w -> indices.w * h
|
||||
if (top_blob.empty()) {
|
||||
return -100;
|
||||
}
|
||||
const float *ptr = bottom_blob;
|
||||
float *outptr = top_blob;
|
||||
for (int i = 0; i < indices.w; i++) {
|
||||
for (int j = 0; j < h; j++) {
|
||||
int selected = (int)(indices_ptr[i] + 0.5);
|
||||
outptr[i * h + j] = ptr[j * w + selected];
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (dims == 2 && positive_axis == 0 && indices_dims == 2) {
|
||||
int w = bottom_blob.w;
|
||||
int h = bottom_blob.h;
|
||||
top_blob.create(w, indices.w, indices.h, elemsize, opt.blob_allocator);
|
||||
|
||||
if (top_blob.empty()) {
|
||||
return -100;
|
||||
}
|
||||
const float *ptr = bottom_blob;
|
||||
|
||||
for (int k = 0; k < indices.h; k++) {
|
||||
float *outptr = top_blob.channel(k);
|
||||
for (int i = 0; i < indices.w; i++) {
|
||||
for (int j = 0; j < w; j++) {
|
||||
int selected = (float)(indices_ptr[k * indices.w + i] + 0.5);
|
||||
outptr[i * w + j] = ptr[selected * w + j];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (dims == 2 && positive_axis == 1 && indices_dims == 2) {
|
||||
int w = bottom_blob.w;
|
||||
int h = bottom_blob.h;
|
||||
top_blob.create(h, indices.w, indices.h, elemsize, opt.blob_allocator);
|
||||
|
||||
if (top_blob.empty()) {
|
||||
return -100;
|
||||
}
|
||||
const float *ptr = bottom_blob;
|
||||
for (int k = 0; k < indices.h; k++) {
|
||||
float *outptr = top_blob.channel(k);
|
||||
for (int i = 0; i < indices.w; i++) {
|
||||
for (int j = 0; j < h; j++) {
|
||||
int selected = (int)(indices_ptr[k * indices.w + i] + 0.5);
|
||||
outptr[i * h + j] = ptr[j * w + selected];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (dims == 3 && positive_axis == 0 && indices_dims == 1) {
|
||||
int w = bottom_blob.w;
|
||||
int h = bottom_blob.h;
|
||||
int channels = bottom_blob.c;
|
||||
top_blob.create(w, h, indices.w, elemsize, opt.blob_allocator);
|
||||
|
||||
if (top_blob.empty()) {
|
||||
return -100;
|
||||
}
|
||||
for (int i = 0; i < indices.w; i++) {
|
||||
int selected = (int)(indices_ptr[i] + 0.5);
|
||||
const unsigned char *ptr = bottom_blob.channel(selected);
|
||||
unsigned char *outptr = top_blob.channel(i);
|
||||
|
||||
memcpy(outptr, ptr, w * h * elemsize);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (dims == 3 && positive_axis == 1 && indices_dims == 1) {
|
||||
int w = bottom_blob.w;
|
||||
int h = bottom_blob.h;
|
||||
int channels = bottom_blob.c;
|
||||
top_blob.create(w, channels, indices.w, elemsize, opt.blob_allocator);
|
||||
#pragma omp parallel for num_threads(opt.num_threads)
|
||||
// use parallel programming
|
||||
for (int i = 0; i < indices.w; i++) {
|
||||
int selected = (int)(indices_ptr[i] + 0.5);
|
||||
float *outptr = top_blob.channel(i);
|
||||
for (int j = 0; j < channels; j++) {
|
||||
const float *ptr = bottom_blob.channel(j);
|
||||
for (int k = 0; k < w; k++) {
|
||||
outptr[j * w + k] = ptr[selected * w + k];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (dims == 3 && positive_axis == 2 && indices_dims == 1) {
|
||||
fprintf(stderr, "gather: dim = 3\n");
|
||||
int w = bottom_blob.w;
|
||||
int h = bottom_blob.h;
|
||||
int channels = bottom_blob.c;
|
||||
top_blob.create(h, channels, indices.w, elemsize, opt.blob_allocator);
|
||||
#pragma omp parallel for num_threads(opt.num_threads)
|
||||
// use parallel programming
|
||||
for (int i = 0; i < indices.w; i++) {
|
||||
int selected = (int)(indices_ptr[i] + 0.5);
|
||||
float *outptr = top_blob.channel(i);
|
||||
for (int j = 0; j < channels; j++) {
|
||||
const float *ptr = bottom_blob.channel(j);
|
||||
for (int k = 0; k < h; k++) {
|
||||
outptr[j * h + k] = ptr[k * w + selected];
|
||||
}
|
||||
}
|
||||
}
|
||||
fprintf(stderr, "top_blob.size: (%d %d %d)\n", top_blob.c, top_blob.h,
|
||||
top_blob.w);
|
||||
return 0;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
} // namespace mmlab
|
|
@ -0,0 +1,24 @@
|
|||
#ifndef LAYER_GATHER_H
|
||||
#define LAYER_GATHER_H
|
||||
|
||||
#include "layer.h"
|
||||
|
||||
namespace mmlab {
|
||||
|
||||
class Gather : public ncnn::Layer {
|
||||
public:
|
||||
Gather();
|
||||
|
||||
virtual int load_param(const ncnn::ParamDict& pd);
|
||||
|
||||
virtual int forward(const std::vector<ncnn::Mat>& bottom_blobs,
|
||||
std::vector<ncnn::Mat>& top_blobs,
|
||||
const ncnn::Option& opt) const;
|
||||
|
||||
public:
|
||||
int axis;
|
||||
};
|
||||
|
||||
} // namespace mmlab
|
||||
|
||||
#endif // LAYER_GATHER_H
|
|
@ -0,0 +1,30 @@
|
|||
#ifndef NCNN_OPS_DEFINER_H
|
||||
#define NCNN_OPS_DEFINER_H
|
||||
|
||||
#include <string>
|
||||
|
||||
#include "layer.h"
|
||||
#include "ncnn_ops_register.h"
|
||||
|
||||
namespace mmlab {
|
||||
|
||||
class NCNNOpsDefiner {
|
||||
public:
|
||||
NCNNOpsDefiner(const std::string& ops_name,
|
||||
const ncnn::layer_creator_func& creator_func = 0,
|
||||
const ncnn::layer_destroyer_func& destroyer_func = 0)
|
||||
: _ops_name(ops_name) {
|
||||
get_mm_layer_creator()[_ops_name.c_str()] = creator_func;
|
||||
}
|
||||
|
||||
private:
|
||||
const std::string _ops_name;
|
||||
};
|
||||
|
||||
#define DEFINE_NCNN_OPS(ops_name, OpsLayer) \
|
||||
static mmlab::NCNNOpsDefiner NCNNOpsDefiner##ops_name{ \
|
||||
#ops_name, OpsLayer##_layer_creator};
|
||||
|
||||
} // namespace mmlab
|
||||
|
||||
#endif
|
|
@ -0,0 +1,35 @@
|
|||
#include "ncnn_ops_register.h"
|
||||
|
||||
#include <iostream>
|
||||
|
||||
std::map<const char *, ncnn::layer_creator_func> &get_mm_layer_creator() {
|
||||
static std::map<const char *, ncnn::layer_creator_func> _layer_creator_map;
|
||||
return _layer_creator_map;
|
||||
}
|
||||
|
||||
std::map<const char *, ncnn::layer_destroyer_func> &get_mm_layer_destroyer() {
|
||||
static std::map<const char *, ncnn::layer_destroyer_func>
|
||||
_layer_destroyer_map;
|
||||
return _layer_destroyer_map;
|
||||
}
|
||||
|
||||
int register_mm_custom_layers(ncnn::Net &net) {
|
||||
auto &layer_creator_map = get_mm_layer_creator();
|
||||
auto &layer_destroyer_map = get_mm_layer_destroyer();
|
||||
|
||||
for (auto const &creator_pair : layer_creator_map) {
|
||||
auto creator_name = creator_pair.first;
|
||||
auto creator_func = creator_pair.second;
|
||||
|
||||
ncnn::layer_destroyer_func destroyer_func = 0;
|
||||
if (layer_destroyer_map.find(creator_name) != layer_destroyer_map.end()) {
|
||||
destroyer_func = layer_destroyer_map[creator_name];
|
||||
}
|
||||
int ret =
|
||||
net.register_custom_layer(creator_name, creator_func, destroyer_func);
|
||||
if (0 != ret) {
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
|
@ -0,0 +1,16 @@
|
|||
#ifndef NCNN_OPS_REGISTER_H
|
||||
#define NCNN_OPS_REGISTER_H
|
||||
|
||||
#include <map>
|
||||
#include <string>
|
||||
|
||||
#include "net.h"
|
||||
|
||||
extern "C" {
|
||||
std::map<const char*, ncnn::layer_creator_func>& get_mm_layer_creator();
|
||||
std::map<const char*, ncnn::layer_destroyer_func>& get_mm_layer_destroyer();
|
||||
|
||||
int register_mm_custom_layers(ncnn::Net& net);
|
||||
}
|
||||
|
||||
#endif
|
|
@ -0,0 +1,44 @@
|
|||
#include "shape.h"
|
||||
|
||||
#include "../ncnn_ops_definer.h"
|
||||
|
||||
namespace mmlab {
|
||||
using namespace ncnn;
|
||||
DEFINE_LAYER_CREATOR(Shape)
|
||||
DEFINE_NCNN_OPS(Shape, Shape)
|
||||
Shape::Shape() {
|
||||
one_blob_only = true;
|
||||
support_inplace = false;
|
||||
}
|
||||
|
||||
int Shape::forward(const Mat &bottom_blob, Mat &top_blob,
|
||||
const Option &opt) const {
|
||||
int dims = bottom_blob.dims;
|
||||
int w = bottom_blob.w;
|
||||
size_t elemsize = sizeof(bottom_blob.w);
|
||||
top_blob.create(dims, elemsize, opt.blob_allocator);
|
||||
if (top_blob.empty()) {
|
||||
return -100;
|
||||
}
|
||||
float *outptr = top_blob;
|
||||
if (dims == 1) {
|
||||
outptr[0] = w;
|
||||
return 0;
|
||||
}
|
||||
if (dims == 2) {
|
||||
int h = bottom_blob.h;
|
||||
outptr[0] = h;
|
||||
outptr[1] = w;
|
||||
return 0;
|
||||
}
|
||||
if (dims == 3) {
|
||||
int h = bottom_blob.h;
|
||||
int channels = bottom_blob.c;
|
||||
outptr[0] = channels;
|
||||
outptr[1] = h;
|
||||
outptr[2] = w;
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace mmlab
|
|
@ -0,0 +1,18 @@
|
|||
#ifndef LAYER_SHAPE_H
|
||||
#define LAYER_SHAPE_H
|
||||
|
||||
#include "layer.h"
|
||||
|
||||
namespace mmlab {
|
||||
|
||||
class Shape : public ncnn::Layer {
|
||||
public:
|
||||
Shape();
|
||||
|
||||
virtual int forward(const ncnn::Mat& bottom_blob, ncnn::Mat& top_blob,
|
||||
const ncnn::Option& opt) const;
|
||||
};
|
||||
|
||||
} // namespace mmlab
|
||||
|
||||
#endif // LAYER_SHAPE_H
|
|
@ -0,0 +1,12 @@
|
|||
# ncnn
|
||||
set(ncnn_DIR ${NCNN_DIR}/build/install/lib/cmake/ncnn)
|
||||
find_package(ncnn)
|
||||
|
||||
# pybind11
|
||||
set(PYBIND11_DIR ${NCNN_DIR}/python/pybind11)
|
||||
add_subdirectory(${PYBIND11_DIR} pybind11)
|
||||
|
||||
include_directories(${pybind11_INCLUDE_DIR} ${PYTHON_INCLUDE_DIRS})
|
||||
pybind11_add_module(ncnn_ext ncnn_ext.cpp)
|
||||
target_link_libraries(ncnn_ext PUBLIC ncnn ${SHARED_TARGET})
|
||||
set_target_properties(ncnn_ext PROPERTIES LIBRARY_OUTPUT_DIRECTORY ${CMAKE_SOURCE_DIR}/mmdeploy/apis/ncnn)
|
|
@ -0,0 +1,11 @@
|
|||
#include <pybind11/pybind11.h>
|
||||
|
||||
#include "../ops/ncnn_ops_register.h"
|
||||
#include "net.h"
|
||||
|
||||
PYBIND11_MODULE(ncnn_ext, m) {
|
||||
m.def(
|
||||
"register_mm_custom_layers",
|
||||
[](ncnn::Net &net) { return register_mm_custom_layers(net); },
|
||||
"register all mmlab custom ncnn layers.");
|
||||
}
|
|
@ -0,0 +1 @@
|
|||
backend = 'ncnn'
|
|
@ -0,0 +1 @@
|
|||
_base_ = ['./mmcls_base.py', '../_base_/backends/ncnn.py']
|
|
@ -0,0 +1,17 @@
|
|||
import importlib
|
||||
import os.path as osp
|
||||
|
||||
from .init_plugins import get_onnx2ncnn_path, get_ops_path
|
||||
|
||||
__all__ = ['get_ops_path', 'get_onnx2ncnn_path']
|
||||
|
||||
|
||||
def is_available():
|
||||
ncnn_ops_path = get_ops_path()
|
||||
if not osp.exists(ncnn_ops_path):
|
||||
return False
|
||||
has_pyncnn = importlib.util.find_spec('ncnn') is not None
|
||||
has_pyncnn_ext = importlib.util.find_spec(
|
||||
'mmdeploy.apis.ncnn.ncnn_ext') is not None
|
||||
|
||||
return has_pyncnn and has_pyncnn_ext
|
|
@ -0,0 +1,25 @@
|
|||
import glob
|
||||
import os
|
||||
|
||||
|
||||
def get_ops_path():
|
||||
"""Get NCNN custom ops library path."""
|
||||
wildcard = os.path.abspath(
|
||||
os.path.join(
|
||||
os.path.dirname(__file__),
|
||||
'../../../build/lib/libmmlab_ncnn_ops.so'))
|
||||
|
||||
paths = glob.glob(wildcard)
|
||||
lib_path = paths[0] if len(paths) > 0 else ''
|
||||
return lib_path
|
||||
|
||||
|
||||
def get_onnx2ncnn_path():
|
||||
"""Get onnx2ncnn path."""
|
||||
wildcard = os.path.abspath(
|
||||
os.path.join(
|
||||
os.path.dirname(__file__), '../../../build/bin/onnx2ncnn'))
|
||||
|
||||
paths = glob.glob(wildcard)
|
||||
lib_path = paths[0] if len(paths) > 0 else ''
|
||||
return lib_path
|
|
@ -15,8 +15,8 @@ def is_available():
|
|||
|
||||
if is_available():
|
||||
from .onnx2tensorrt import onnx2tensorrt
|
||||
from .tensorrt_utils import (TRTWrapper, load_trt_engine,
|
||||
create_trt_engine, save_trt_engine)
|
||||
from .tensorrt_utils import (TRTWrapper, create_trt_engine,
|
||||
load_trt_engine, save_trt_engine)
|
||||
|
||||
# load tensorrt plugin lib
|
||||
load_tensorrt_plugin()
|
||||
|
|
|
@ -104,6 +104,13 @@ def init_backend_model(model_files: Sequence[str],
|
|||
from mmdeploy.mmcls.export import TensorRTClassifier
|
||||
backend_model = TensorRTClassifier(
|
||||
model_files[0], class_names=class_names, device_id=device_id)
|
||||
elif backend == 'ncnn':
|
||||
from mmdeploy.mmcls.export import NCNNClassifier
|
||||
backend_model = NCNNClassifier(
|
||||
model_files[0],
|
||||
model_files[1],
|
||||
class_names=class_names,
|
||||
device_id=device_id)
|
||||
else:
|
||||
raise NotImplementedError(f'Unsupported backend type: {backend}')
|
||||
return backend_model
|
||||
|
|
|
@ -1,4 +1,8 @@
|
|||
from .model_wrappers import ONNXRuntimeClassifier, TensorRTClassifier
|
||||
from .model_wrappers import (NCNNClassifier, ONNXRuntimeClassifier,
|
||||
TensorRTClassifier)
|
||||
from .prepare_input import create_input
|
||||
|
||||
__all__ = ['create_input', 'ONNXRuntimeClassifier', 'TensorRTClassifier']
|
||||
__all__ = [
|
||||
'create_input', 'NCNNClassifier', 'ONNXRuntimeClassifier',
|
||||
'TensorRTClassifier'
|
||||
]
|
||||
|
|
|
@ -5,12 +5,34 @@ import torch
|
|||
from mmcls.models import BaseClassifier
|
||||
|
||||
|
||||
class ONNXRuntimeClassifier(BaseClassifier):
|
||||
class DeployBaseClassifier(BaseClassifier):
|
||||
"""Base Class of Wrapper for classifier's inference."""
|
||||
|
||||
def __init__(self, class_names, device_id):
|
||||
super(DeployBaseClassifier, self).__init__()
|
||||
self.CLASSES = class_names
|
||||
self.device_id = device_id
|
||||
|
||||
def simple_test(self, img, *args, **kwargs):
|
||||
raise NotImplementedError('This method is not implemented.')
|
||||
|
||||
def extract_feat(self, imgs):
|
||||
raise NotImplementedError('This method is not implemented.')
|
||||
|
||||
def forward_train(self, imgs, **kwargs):
|
||||
raise NotImplementedError('This method is not implemented.')
|
||||
|
||||
def forward_test(self, imgs, *args, **kwargs):
|
||||
raise NotImplementedError('This method is not implemented.')
|
||||
|
||||
|
||||
class ONNXRuntimeClassifier(DeployBaseClassifier):
|
||||
"""Wrapper for classifier's inference with ONNXRuntime."""
|
||||
|
||||
def __init__(self, onnx_file, class_names, device_id):
|
||||
super(ONNXRuntimeClassifier, self).__init__()
|
||||
super(ONNXRuntimeClassifier, self).__init__(class_names, device_id)
|
||||
import onnxruntime as ort
|
||||
|
||||
sess = ort.InferenceSession(onnx_file)
|
||||
|
||||
providers = ['CPUExecutionProvider']
|
||||
|
@ -22,21 +44,10 @@ class ONNXRuntimeClassifier(BaseClassifier):
|
|||
sess.set_providers(providers, options)
|
||||
|
||||
self.sess = sess
|
||||
self.CLASSES = class_names
|
||||
self.device_id = device_id
|
||||
self.io_binding = sess.io_binding()
|
||||
self.output_names = [_.name for _ in sess.get_outputs()]
|
||||
self.is_cuda_available = is_cuda_available
|
||||
|
||||
def simple_test(self, img, *args, **kwargs):
|
||||
raise NotImplementedError('This method is not implemented.')
|
||||
|
||||
def extract_feat(self, imgs):
|
||||
raise NotImplementedError('This method is not implemented.')
|
||||
|
||||
def forward_train(self, imgs, **kwargs):
|
||||
raise NotImplementedError('This method is not implemented.')
|
||||
|
||||
def forward_test(self, imgs, *args, **kwargs):
|
||||
input_data = imgs
|
||||
# set io binding for inputs/outputs
|
||||
|
@ -59,10 +70,10 @@ class ONNXRuntimeClassifier(BaseClassifier):
|
|||
return list(results)
|
||||
|
||||
|
||||
class TensorRTClassifier(BaseClassifier):
|
||||
class TensorRTClassifier(DeployBaseClassifier):
|
||||
|
||||
def __init__(self, trt_file, class_names, device_id):
|
||||
super(TensorRTClassifier, self).__init__()
|
||||
super(TensorRTClassifier, self).__init__(class_names, device_id)
|
||||
from mmdeploy.apis.tensorrt import TRTWrapper, load_tensorrt_plugin
|
||||
try:
|
||||
load_tensorrt_plugin()
|
||||
|
@ -72,17 +83,6 @@ class TensorRTClassifier(BaseClassifier):
|
|||
model = TRTWrapper(trt_file)
|
||||
|
||||
self.model = model
|
||||
self.device_id = device_id
|
||||
self.CLASSES = class_names
|
||||
|
||||
def simple_test(self, img, *args, **kwargs):
|
||||
raise NotImplementedError('This method is not implemented.')
|
||||
|
||||
def extract_feat(self, imgs):
|
||||
raise NotImplementedError('This method is not implemented.')
|
||||
|
||||
def forward_train(self, imgs, **kwargs):
|
||||
raise NotImplementedError('This method is not implemented.')
|
||||
|
||||
def forward_test(self, imgs, *args, **kwargs):
|
||||
input_data = imgs
|
||||
|
@ -91,3 +91,33 @@ class TensorRTClassifier(BaseClassifier):
|
|||
results = results.detach().cpu().numpy()
|
||||
|
||||
return list(results)
|
||||
|
||||
|
||||
class NCNNClassifier(DeployBaseClassifier):
|
||||
|
||||
def __init__(self, ncnn_param_file, ncnn_bin_file, class_names, device_id):
|
||||
super(NCNNClassifier, self).__init__(class_names, device_id)
|
||||
import ncnn
|
||||
from mmdeploy.apis.ncnn import ncnn_ext
|
||||
self.net = ncnn.Net()
|
||||
ncnn_ext.register_mm_custom_layers(self.net)
|
||||
self.net.load_param(ncnn_param_file)
|
||||
self.net.load_model(ncnn_bin_file)
|
||||
|
||||
def forward_test(self, imgs, *args, **kwargs):
|
||||
import ncnn
|
||||
assert len(imgs.shape) == 4
|
||||
# Only for batch == 1 now.
|
||||
assert imgs.shape[0] == 1
|
||||
input_data = imgs[0].cpu().numpy()
|
||||
input_data = ncnn.Mat(input_data)
|
||||
if self.device_id == -1:
|
||||
ex = self.net.create_extractor()
|
||||
ex.input('input', input_data)
|
||||
ret, results = ex.extract('output')
|
||||
results = np.array(results)
|
||||
assert ret != -100, 'Memory allocation failed in ncnn layers'
|
||||
assert ret == 0
|
||||
return [results]
|
||||
else:
|
||||
raise NotImplementedError('GPU device is not implemented.')
|
||||
|
|
|
@ -55,7 +55,7 @@ def instance_norm(g, input, num_groups, weight, bias, eps, cudnn_enabled):
|
|||
|
||||
# Norm has shape [N, C, *] so we reshape weight and bias to [C, *]
|
||||
axes = list(range(1, input_rank - 1))
|
||||
from torch.onnx.symbolic_opset9 import mul, add
|
||||
from torch.onnx.symbolic_opset9 import add, mul
|
||||
return add(g, mul(g, norm, _unsqueeze_helper(g, weight, axes)),
|
||||
_unsqueeze_helper(g, bias, axes))
|
||||
|
||||
|
|
|
@ -69,9 +69,10 @@ def test_function_rewriter():
|
|||
|
||||
|
||||
def test_module_rewriter():
|
||||
from mmdeploy.core import MODULE_REWRITER, patch_model
|
||||
from torchvision.models.resnet import resnet50
|
||||
|
||||
from mmdeploy.core import MODULE_REWRITER, patch_model
|
||||
|
||||
@MODULE_REWRITER.register_rewrite_module(
|
||||
module_type='torchvision.models.resnet.Bottleneck', backend='tensorrt')
|
||||
class BottleneckWrapper(torch.nn.Module):
|
||||
|
@ -105,10 +106,11 @@ def test_module_rewriter():
|
|||
|
||||
|
||||
def test_symbolic_register():
|
||||
import onnx
|
||||
from torch.autograd import Function
|
||||
|
||||
import mmdeploy
|
||||
from mmdeploy.core import SYMBOLIC_REGISTER, register_extra_symbolics
|
||||
from torch.autograd import Function
|
||||
import onnx
|
||||
|
||||
class TestFunc(Function):
|
||||
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
import argparse
|
||||
import logging
|
||||
import os.path as osp
|
||||
import subprocess
|
||||
from functools import partial
|
||||
|
||||
import mmcv
|
||||
|
@ -140,6 +141,29 @@ def main():
|
|||
|
||||
backend_files.append(osp.join(args.work_dir, save_file))
|
||||
|
||||
elif backend == 'ncnn':
|
||||
from mmdeploy.apis.ncnn import get_onnx2ncnn_path
|
||||
from mmdeploy.apis.ncnn import is_available as is_available_ncnn
|
||||
|
||||
if not is_available_ncnn():
|
||||
logging.error('ncnn support is not available.')
|
||||
exit(-1)
|
||||
|
||||
onnx2ncnn_path = get_onnx2ncnn_path()
|
||||
|
||||
backend_files = []
|
||||
for onnx_path in onnx_files:
|
||||
onnx_name = osp.splitext(osp.split(onnx_path)[1])[0]
|
||||
save_param = onnx_name + '.param'
|
||||
save_bin = onnx_name + '.bin'
|
||||
|
||||
save_param = osp.join(args.work_dir, save_param)
|
||||
save_bin = osp.join(args.work_dir, save_bin)
|
||||
|
||||
subprocess.call([onnx2ncnn_path, onnx_path, save_param, save_bin])
|
||||
|
||||
backend_files += [save_param, save_bin]
|
||||
|
||||
# check model outputs by visualization
|
||||
codebase = deploy_cfg['codebase']
|
||||
|
||||
|
|
Loading…
Reference in New Issue