[Feature] Merge NCNN deployment to grimoire based on mmcls - revert [#25](https://github.com/grimoire/deploy_prototype/pull/25) (#30)

* add

* change VulkanSDK to 1.2.176.1

* add ncnn cmakelist

* add ncnn source code as third party

* add all ncnn

* ncnn compile passed

* onnx2ncnn correctly

* fix code style

* merge_as_grimoire_design, only backend_ops, manually register.

* remove data and test sh

* remove build example

* remove config ncnn

* remove onnx2ncnn intermediate files

* remove other files auto-generated

* remove vulkan tools

* remove Vulkan, gitignore new rules, __init__ new lines

* rollback __init__ to grimoire

* remove pytorch version pending

* grimoire comments reply 1, 3, 4

* reply comment 5,6,7

* add auto definer, add python register

* fix lint

* add ncnn deploy support

* add model_wrapper, fix a typo bug, and add code comment for onnx2ncnn(WIP)

* add model wrapper ncnn

* fix lint

* fix pep8

* fix pre-commit-config.yaml paths

* fix import

* fix lint

* remove sys.path.append

* remove sys

* isort fix

* fix double quoted

* fix trailing space

* try fix isort

* fix clang-format-9

* fix requests

* fix all comments

* Fix typo

* test code for grimoire

* fix ops register

* new definere

* fix visualization of mmcls

* remove temp

* fix flake8

* fix seed-isort-config

* fix thirdparty

* fix thirdparty

* fix yapf

* fix third_party_sort

* fix third party

* fix clang-format

* try fix clang-format

* try to fix clang format 9 customreshape

* try fix clang-format-9

* try fix clang-format-9

* try fix clang-format-9

* try fix ext

* fix onnx2ncnn

* Fix comments

* Fix Comments

* Fix Comments

* Fix Comments

* Fix conflict

* Fix flake8

* Update .isort.cfg

* Update ncnn_ext.cpp

* Update ncnn_ext.cpp

* fix missing ncnn backend code

* delete out of date comments of gather.cpp

* add DeployBaseClassifier

* add return -100 error

* clear out-of-date to do comments

Co-authored-by: 韩睿 <SENSETIME\hanrui1@cn0614008774l.domain.sensetime.com>
Co-authored-by: grimoire <yaoqian@sensetime.com>
Co-authored-by: grimoire <streetyao@live.com>
pull/12/head
hanrui1sensetime 2021-08-05 14:06:47 +08:00 committed by GitHub
parent f607f1965b
commit e05521c933
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
29 changed files with 6348 additions and 35 deletions

View File

@ -1,2 +1,2 @@
[settings]
known_third_party = mmcls,mmcv,mmdet,numpy,onnx,onnxruntime,packaging,pytest,setuptools,tensorrt,torch
known_third_party = mmcls,mmcv,mmdet,numpy,onnx,packaging,pytest,setuptools,tensorrt,torch

View File

@ -26,4 +26,18 @@ if (BUILD_TENSORRT_OPS)
endif()
endif()
# NCNN config
# enable ncnn
option(BUILD_NCNN_OPS "enable NCNN ops" OFF)
# NCNN search path
if (BUILD_NCNN_OPS)
if (NOT DEFINED NCNN_DIR)
set(NCNN_DIR $ENV{NCNN_DIR})
endif()
if (NOT NCNN_DIR)
message(ERROR " NCNN_DIR is not found.")
endif()
endif()
add_subdirectory (backend_ops)

View File

@ -16,3 +16,9 @@ if (BUILD_TENSORRT_OPS)
message("Build TensorRT custom ops.")
add_subdirectory (tensorrt)
endif()
# build NCNN ops
if (BUILD_NCNN_OPS)
message("Build NCNN custom ops")
add_subdirectory (ncnn)
endif()

View File

@ -0,0 +1,16 @@
set(TARGET_NAME mmlab_ncnn_ops)
set(SHARED_TARGET ${TARGET_NAME})
# ncnn
set(ncnn_DIR ${NCNN_DIR}/build/install/lib/cmake/ncnn)
find_package(ncnn)
if (ncnn_FOUND)
message(STATUS "ncnn library found!")
else ()
message(FATAL_ERROR "Could not locate ncnn" \n)
endif()
add_subdirectory (ops)
add_subdirectory (onnx2ncnn)
add_subdirectory (pyncnn_ext)

View File

@ -0,0 +1,15 @@
find_package(Protobuf)
if(PROTOBUF_FOUND)
protobuf_generate_cpp(ONNX_PROTO_SRCS ONNX_PROTO_HDRS ${NCNN_DIR}/tools/onnx/onnx.proto)
add_executable(onnx2ncnn onnx2ncnn.cpp ${ONNX_PROTO_SRCS} ${ONNX_PROTO_HDRS})
target_include_directories(onnx2ncnn
PRIVATE
${PROTOBUF_INCLUDE_DIR}
${CMAKE_CURRENT_BINARY_DIR})
target_link_libraries(onnx2ncnn PRIVATE ${PROTOBUF_LIBRARIES})
else()
message(FATAL_ERROR "Protobuf not found, onnx model convert tool won't be built")
endif()

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,15 @@
# add plugin source
set(PLUGIN_LISTS custom_reshape
gather
shape)
foreach(PLUGIN_ITER ${PLUGIN_LISTS})
file(GLOB PLUGIN_OPS_SRCS ${PLUGIN_ITER}/*.cpp)
file(GLOB PLUGIN_OPS_HEADS ${PLUGIN_ITER}/*.h)
set(BACKEND_OPS_SRCS ${BACKEND_OPS_SRCS} ${PLUGIN_OPS_SRCS} ${PLUGIN_OPS_HEADS})
endforeach(PLUGIN_ITER)
set(BACKEND_OPS_SRCS ${BACKEND_OPS_SRCS} ncnn_ops_register.cpp)
add_library(${SHARED_TARGET} SHARED ${BACKEND_OPS_SRCS})
target_link_libraries(${SHARED_TARGET} ncnn)

View File

@ -0,0 +1,217 @@
#include "custom_reshape.h"
#include "../ncnn_ops_definer.h"
namespace mmlab {
using namespace ncnn;
DEFINE_LAYER_CREATOR(CustomReshape)
DEFINE_NCNN_OPS(CustomReshape, CustomReshape)
CustomReshape::CustomReshape() {
one_blob_only = false;
support_inplace = false;
}
int CustomReshape::load_param(const ParamDict &pd) {
permute = pd.get(0, 0);
return 0;
}
int CustomReshape::forward(const std::vector<Mat> &bottom_blobs,
std::vector<Mat> &top_blobs,
const Option &opt) const {
const Mat &bottom_blob = bottom_blobs[0];
Mat &top_blob = top_blobs[0];
int ndim = bottom_blobs[1].w;
int w = 0;
int h = 0;
int c = 0;
if (ndim == 1) {
w = (int)(bottom_blobs[1].row(0)[0] + 0.5);
}
if (ndim == 2) {
h = (int)(bottom_blobs[1].row(0)[0] + 0.5);
w = (int)(bottom_blobs[1].row(0)[1] + 0.5);
}
if (ndim == 3) {
c = (int)(bottom_blobs[1].row(0)[0] + 0.5);
h = (int)(bottom_blobs[1].row(0)[1] + 0.5);
w = (int)(bottom_blobs[1].row(0)[2] + 0.5);
}
size_t elemsize = bottom_blob.elemsize;
int total = bottom_blob.w * bottom_blob.h * bottom_blob.c;
int dims = bottom_blob.dims;
// resolve out shape
int outw = w;
int outh = h;
int outc = c;
if (ndim == 1) {
if (outw == 0)
outw = bottom_blob.w;
else if (outw == -1)
outw = total;
else {
fprintf(stderr,
"Warning: custom shape memory maybe invalid, using "
"bottom_blob shape!\n");
outw = bottom_blob.w;
}
if (dims == 1 && bottom_blob.w == outw) {
top_blob = bottom_blob;
return 0;
}
}
if (ndim == 2) {
if (outw == 0) outw = bottom_blob.w;
if (outh == 0) outh = bottom_blob.h;
if (outw == -1) outw = total / outh;
if (outh == -1) outh = total / outw;
if (dims == 2 && bottom_blob.h == outh) {
top_blob = bottom_blob;
return 0;
}
}
if (ndim == 3) {
if (outw == 0) outw = bottom_blob.w;
if (outh == 0) outh = bottom_blob.h;
if (outc == 0) outc = bottom_blob.c;
if (outw == -1) outw = total / outc / outh;
if (outh == -1) outh = total / outc / outw;
if (outc == -1) outc = total / outh / outw;
if (dims == 3 && bottom_blob.c == outc) {
top_blob = bottom_blob;
top_blob.w = outw;
top_blob.h = outh;
return 0;
}
}
bool need_permute = permute == 1;
if (dims == 2 && ndim == 2 && bottom_blob.h == outh) need_permute = false;
if (dims == 3 && ndim == 3 && bottom_blob.c == outc) need_permute = false;
if (need_permute) {
Mat bottom_blob_permuted = bottom_blob;
if (dims == 2) {
// hw -> wh
int _w = bottom_blob.w;
int _h = bottom_blob.h;
bottom_blob_permuted.create(_h, _w, elemsize, opt.workspace_allocator);
if (bottom_blob_permuted.empty()) return -100;
const float *ptr = bottom_blob;
float *outptr = bottom_blob_permuted;
for (int i = 0; i < _w; i++) {
for (int j = 0; j < _h; j++) {
outptr[i * _h + j] = ptr[j * _w + i];
}
}
}
if (dims == 3) {
// chw -> hwc
int _w = bottom_blob.w;
int _h = bottom_blob.h;
int channels = bottom_blob.c;
bottom_blob_permuted.create(channels, _w, _h, elemsize,
opt.workspace_allocator);
if (bottom_blob_permuted.empty()) return -100;
#pragma omp parallel for num_threads(opt.num_threads)
for (int q = 0; q < _h; q++) {
float *outptr = bottom_blob_permuted.channel(q);
for (int i = 0; i < _w; i++) {
for (int j = 0; j < channels; j++) {
const float *ptr = bottom_blob.channel(j).row(q);
outptr[i * channels + j] = ptr[i];
}
}
}
}
if (ndim == 1) {
top_blob = bottom_blob_permuted.reshape(outw, opt.blob_allocator);
if (top_blob.empty()) return -100;
return 0;
}
// permute on nhwc/nhc
Mat top_blob_permuted;
if (ndim == 2) {
top_blob_permuted =
bottom_blob_permuted.reshape(outh, outw, opt.workspace_allocator);
}
if (ndim == 3) {
top_blob_permuted = bottom_blob_permuted.reshape(outc, outw, outh,
opt.workspace_allocator);
}
if (top_blob_permuted.empty()) return -100;
if (ndim == 2) {
// wh -> hw
top_blob.create(outw, outh, elemsize, opt.blob_allocator);
if (top_blob.empty()) return -100;
const float *ptr = top_blob_permuted;
float *outptr = top_blob;
for (int i = 0; i < outh; i++) {
for (int j = 0; j < outw; j++) {
outptr[i * outw + j] = ptr[j * outh + i];
}
}
}
if (ndim == 3) {
// chw -> hwc
top_blob.create(outw, outh, outc, elemsize, opt.blob_allocator);
if (top_blob.empty()) return -100;
#pragma omp parallel for num_threads(opt.num_threads)
for (int q = 0; q < outc; q++) {
float *outptr = top_blob.channel(q);
for (int i = 0; i < outh; i++) {
const float *ptr = top_blob_permuted.channel(i);
for (int j = 0; j < outw; j++) {
outptr[i * outw + j] = ptr[j * outc + q];
}
}
}
}
return 0;
}
if (ndim == 1) {
top_blob = bottom_blob.reshape(outw, opt.blob_allocator);
}
if (ndim == 2) {
top_blob = bottom_blob.reshape(outw, outh, opt.blob_allocator);
}
if (ndim == 3) {
top_blob = bottom_blob.reshape(outw, outh, outc, opt.blob_allocator);
}
if (top_blob.empty()) return -100;
return 0;
}
} // namespace mmlab

View File

@ -0,0 +1,30 @@
#ifndef LAYER_CUSTOMRESHAPE_H
#define LAYER_CUSTOMRESHAPE_H
#include "layer.h"
namespace mmlab {
class CustomReshape : public ncnn::Layer {
public:
CustomReshape();
virtual int load_param(const ncnn::ParamDict& pd);
virtual int forward(const std::vector<ncnn::Mat>& bottom_blobs,
std::vector<ncnn::Mat>& top_blobs,
const ncnn::Option& opt) const;
public:
// reshape flag
// 0 = copy from bottom
// -1 = remaining
// -233 = drop this dim (default)
// flag permute chw->hwc or hw->wh before and after reshape
int permute;
};
} // namespace mmlab
#endif // LAYER_CUSTOMRESHAPE_H

View File

@ -0,0 +1,246 @@
#include "gather.h"
#include "../ncnn_ops_definer.h"
namespace mmlab {
using namespace ncnn;
DEFINE_LAYER_CREATOR(Gather)
DEFINE_NCNN_OPS(Gather, Gather)
Gather::Gather() {
one_blob_only = false;
support_inplace = false;
}
int Gather::load_param(const ParamDict &pd) {
axis = pd.get(0, 0);
return 0;
}
int Gather::forward(const std::vector<Mat> &bottom_blobs,
std::vector<Mat> &top_blobs, const Option &opt) const {
const Mat &bottom_blob = bottom_blobs[0];
const Mat &indices = bottom_blobs[1];
int dims = bottom_blob.dims;
int indices_dims = indices.dims;
size_t elemsize = bottom_blob.elemsize;
int positive_axis = axis < 0 ? dims + axis : axis;
Mat &top_blob = top_blobs[0];
const float *indices_ptr = indices;
if (dims == 1 && indices_dims == 1) // positive_axis == 0
{
int w = indices.w;
top_blob.create(w, elemsize, opt.blob_allocator);
if (top_blob.empty()) {
return -100;
}
const float *ptr = bottom_blob;
float *outptr = top_blob;
for (int i = 0; i < w; i++) {
float indice = indices_ptr[i];
outptr[i] = ptr[(int)(indice + 0.5)];
}
return 0;
}
if (dims == 1 && indices_dims == 2) // positive_axis == 0
{
int w = indices.w;
int h = indices.h;
top_blob.create(w, h, elemsize, opt.blob_allocator);
if (top_blob.empty()) {
return -100;
}
const float *ptr = bottom_blob;
float *outptr = top_blob;
for (int j = 0; j < h; j++) {
for (int i = 0; i < w; i++) {
int indice = (int)(indices_ptr[j * w + i] + 0.5);
outptr[j * w + i] = ptr[indice];
}
}
return 0;
}
if (dims == 1 && indices_dims == 3) // positive_axis == 0
{
int c = indices.c;
int w = indices.w;
int h = indices.h;
top_blob.create(c, w, h, elemsize, opt.blob_allocator);
if (top_blob.empty()) {
return -100;
}
const float *ptr = bottom_blob;
for (int page = 0; page < c; page++) {
indices_ptr = indices.channel(page);
float *outptr = top_blob.channel(page);
for (int j = 0; j < h; j++) {
for (int i = 0; i < w; i++) {
int indice = (int)(indices_ptr[j * w + i] + 0.5);
outptr[j * w + i] = ptr[indice];
}
}
}
return 0;
}
if (dims == 2 && positive_axis == 0 && indices_dims == 1) {
int w = bottom_blob.w;
int h = bottom_blob.h;
top_blob.create(w, indices.w, elemsize, opt.blob_allocator);
// w -> w
// h -> indices.w
// h * w -> indices.w * w
if (top_blob.empty()) {
return -100;
}
const float *ptr = bottom_blob;
float *outptr = top_blob;
for (int i = 0; i < indices.w; i++) {
for (int j = 0; j < w; j++) {
int selected = (float)(indices_ptr[i] + 0.5);
outptr[i * w + j] = ptr[selected * w + j];
}
}
return 0;
}
if (dims == 2 && positive_axis == 1 && indices_dims == 1) {
int w = bottom_blob.w;
int h = bottom_blob.h;
top_blob.create(h, indices.w, elemsize, opt.blob_allocator);
// w -> h
// h -> indices.w
// h * w -> indices.w * h
if (top_blob.empty()) {
return -100;
}
const float *ptr = bottom_blob;
float *outptr = top_blob;
for (int i = 0; i < indices.w; i++) {
for (int j = 0; j < h; j++) {
int selected = (int)(indices_ptr[i] + 0.5);
outptr[i * h + j] = ptr[j * w + selected];
}
}
return 0;
}
if (dims == 2 && positive_axis == 0 && indices_dims == 2) {
int w = bottom_blob.w;
int h = bottom_blob.h;
top_blob.create(w, indices.w, indices.h, elemsize, opt.blob_allocator);
if (top_blob.empty()) {
return -100;
}
const float *ptr = bottom_blob;
for (int k = 0; k < indices.h; k++) {
float *outptr = top_blob.channel(k);
for (int i = 0; i < indices.w; i++) {
for (int j = 0; j < w; j++) {
int selected = (float)(indices_ptr[k * indices.w + i] + 0.5);
outptr[i * w + j] = ptr[selected * w + j];
}
}
}
return 0;
}
if (dims == 2 && positive_axis == 1 && indices_dims == 2) {
int w = bottom_blob.w;
int h = bottom_blob.h;
top_blob.create(h, indices.w, indices.h, elemsize, opt.blob_allocator);
if (top_blob.empty()) {
return -100;
}
const float *ptr = bottom_blob;
for (int k = 0; k < indices.h; k++) {
float *outptr = top_blob.channel(k);
for (int i = 0; i < indices.w; i++) {
for (int j = 0; j < h; j++) {
int selected = (int)(indices_ptr[k * indices.w + i] + 0.5);
outptr[i * h + j] = ptr[j * w + selected];
}
}
}
return 0;
}
if (dims == 3 && positive_axis == 0 && indices_dims == 1) {
int w = bottom_blob.w;
int h = bottom_blob.h;
int channels = bottom_blob.c;
top_blob.create(w, h, indices.w, elemsize, opt.blob_allocator);
if (top_blob.empty()) {
return -100;
}
for (int i = 0; i < indices.w; i++) {
int selected = (int)(indices_ptr[i] + 0.5);
const unsigned char *ptr = bottom_blob.channel(selected);
unsigned char *outptr = top_blob.channel(i);
memcpy(outptr, ptr, w * h * elemsize);
}
return 0;
}
if (dims == 3 && positive_axis == 1 && indices_dims == 1) {
int w = bottom_blob.w;
int h = bottom_blob.h;
int channels = bottom_blob.c;
top_blob.create(w, channels, indices.w, elemsize, opt.blob_allocator);
#pragma omp parallel for num_threads(opt.num_threads)
// use parallel programming
for (int i = 0; i < indices.w; i++) {
int selected = (int)(indices_ptr[i] + 0.5);
float *outptr = top_blob.channel(i);
for (int j = 0; j < channels; j++) {
const float *ptr = bottom_blob.channel(j);
for (int k = 0; k < w; k++) {
outptr[j * w + k] = ptr[selected * w + k];
}
}
}
return 0;
}
if (dims == 3 && positive_axis == 2 && indices_dims == 1) {
fprintf(stderr, "gather: dim = 3\n");
int w = bottom_blob.w;
int h = bottom_blob.h;
int channels = bottom_blob.c;
top_blob.create(h, channels, indices.w, elemsize, opt.blob_allocator);
#pragma omp parallel for num_threads(opt.num_threads)
// use parallel programming
for (int i = 0; i < indices.w; i++) {
int selected = (int)(indices_ptr[i] + 0.5);
float *outptr = top_blob.channel(i);
for (int j = 0; j < channels; j++) {
const float *ptr = bottom_blob.channel(j);
for (int k = 0; k < h; k++) {
outptr[j * h + k] = ptr[k * w + selected];
}
}
}
fprintf(stderr, "top_blob.size: (%d %d %d)\n", top_blob.c, top_blob.h,
top_blob.w);
return 0;
}
return 0;
}
} // namespace mmlab

View File

@ -0,0 +1,24 @@
#ifndef LAYER_GATHER_H
#define LAYER_GATHER_H
#include "layer.h"
namespace mmlab {
class Gather : public ncnn::Layer {
public:
Gather();
virtual int load_param(const ncnn::ParamDict& pd);
virtual int forward(const std::vector<ncnn::Mat>& bottom_blobs,
std::vector<ncnn::Mat>& top_blobs,
const ncnn::Option& opt) const;
public:
int axis;
};
} // namespace mmlab
#endif // LAYER_GATHER_H

View File

@ -0,0 +1,30 @@
#ifndef NCNN_OPS_DEFINER_H
#define NCNN_OPS_DEFINER_H
#include <string>
#include "layer.h"
#include "ncnn_ops_register.h"
namespace mmlab {
class NCNNOpsDefiner {
public:
NCNNOpsDefiner(const std::string& ops_name,
const ncnn::layer_creator_func& creator_func = 0,
const ncnn::layer_destroyer_func& destroyer_func = 0)
: _ops_name(ops_name) {
get_mm_layer_creator()[_ops_name.c_str()] = creator_func;
}
private:
const std::string _ops_name;
};
#define DEFINE_NCNN_OPS(ops_name, OpsLayer) \
static mmlab::NCNNOpsDefiner NCNNOpsDefiner##ops_name{ \
#ops_name, OpsLayer##_layer_creator};
} // namespace mmlab
#endif

View File

@ -0,0 +1,35 @@
#include "ncnn_ops_register.h"
#include <iostream>
std::map<const char *, ncnn::layer_creator_func> &get_mm_layer_creator() {
static std::map<const char *, ncnn::layer_creator_func> _layer_creator_map;
return _layer_creator_map;
}
std::map<const char *, ncnn::layer_destroyer_func> &get_mm_layer_destroyer() {
static std::map<const char *, ncnn::layer_destroyer_func>
_layer_destroyer_map;
return _layer_destroyer_map;
}
int register_mm_custom_layers(ncnn::Net &net) {
auto &layer_creator_map = get_mm_layer_creator();
auto &layer_destroyer_map = get_mm_layer_destroyer();
for (auto const &creator_pair : layer_creator_map) {
auto creator_name = creator_pair.first;
auto creator_func = creator_pair.second;
ncnn::layer_destroyer_func destroyer_func = 0;
if (layer_destroyer_map.find(creator_name) != layer_destroyer_map.end()) {
destroyer_func = layer_destroyer_map[creator_name];
}
int ret =
net.register_custom_layer(creator_name, creator_func, destroyer_func);
if (0 != ret) {
return ret;
}
}
return 0;
}

View File

@ -0,0 +1,16 @@
#ifndef NCNN_OPS_REGISTER_H
#define NCNN_OPS_REGISTER_H
#include <map>
#include <string>
#include "net.h"
extern "C" {
std::map<const char*, ncnn::layer_creator_func>& get_mm_layer_creator();
std::map<const char*, ncnn::layer_destroyer_func>& get_mm_layer_destroyer();
int register_mm_custom_layers(ncnn::Net& net);
}
#endif

View File

@ -0,0 +1,44 @@
#include "shape.h"
#include "../ncnn_ops_definer.h"
namespace mmlab {
using namespace ncnn;
DEFINE_LAYER_CREATOR(Shape)
DEFINE_NCNN_OPS(Shape, Shape)
Shape::Shape() {
one_blob_only = true;
support_inplace = false;
}
int Shape::forward(const Mat &bottom_blob, Mat &top_blob,
const Option &opt) const {
int dims = bottom_blob.dims;
int w = bottom_blob.w;
size_t elemsize = sizeof(bottom_blob.w);
top_blob.create(dims, elemsize, opt.blob_allocator);
if (top_blob.empty()) {
return -100;
}
float *outptr = top_blob;
if (dims == 1) {
outptr[0] = w;
return 0;
}
if (dims == 2) {
int h = bottom_blob.h;
outptr[0] = h;
outptr[1] = w;
return 0;
}
if (dims == 3) {
int h = bottom_blob.h;
int channels = bottom_blob.c;
outptr[0] = channels;
outptr[1] = h;
outptr[2] = w;
return 0;
}
}
} // namespace mmlab

View File

@ -0,0 +1,18 @@
#ifndef LAYER_SHAPE_H
#define LAYER_SHAPE_H
#include "layer.h"
namespace mmlab {
class Shape : public ncnn::Layer {
public:
Shape();
virtual int forward(const ncnn::Mat& bottom_blob, ncnn::Mat& top_blob,
const ncnn::Option& opt) const;
};
} // namespace mmlab
#endif // LAYER_SHAPE_H

View File

@ -0,0 +1,12 @@
# ncnn
set(ncnn_DIR ${NCNN_DIR}/build/install/lib/cmake/ncnn)
find_package(ncnn)
# pybind11
set(PYBIND11_DIR ${NCNN_DIR}/python/pybind11)
add_subdirectory(${PYBIND11_DIR} pybind11)
include_directories(${pybind11_INCLUDE_DIR} ${PYTHON_INCLUDE_DIRS})
pybind11_add_module(ncnn_ext ncnn_ext.cpp)
target_link_libraries(ncnn_ext PUBLIC ncnn ${SHARED_TARGET})
set_target_properties(ncnn_ext PROPERTIES LIBRARY_OUTPUT_DIRECTORY ${CMAKE_SOURCE_DIR}/mmdeploy/apis/ncnn)

View File

@ -0,0 +1,11 @@
#include <pybind11/pybind11.h>
#include "../ops/ncnn_ops_register.h"
#include "net.h"
PYBIND11_MODULE(ncnn_ext, m) {
m.def(
"register_mm_custom_layers",
[](ncnn::Net &net) { return register_mm_custom_layers(net); },
"register all mmlab custom ncnn layers.");
}

View File

@ -0,0 +1 @@
backend = 'ncnn'

View File

@ -0,0 +1 @@
_base_ = ['./mmcls_base.py', '../_base_/backends/ncnn.py']

View File

@ -0,0 +1,17 @@
import importlib
import os.path as osp
from .init_plugins import get_onnx2ncnn_path, get_ops_path
__all__ = ['get_ops_path', 'get_onnx2ncnn_path']
def is_available():
ncnn_ops_path = get_ops_path()
if not osp.exists(ncnn_ops_path):
return False
has_pyncnn = importlib.util.find_spec('ncnn') is not None
has_pyncnn_ext = importlib.util.find_spec(
'mmdeploy.apis.ncnn.ncnn_ext') is not None
return has_pyncnn and has_pyncnn_ext

View File

@ -0,0 +1,25 @@
import glob
import os
def get_ops_path():
"""Get NCNN custom ops library path."""
wildcard = os.path.abspath(
os.path.join(
os.path.dirname(__file__),
'../../../build/lib/libmmlab_ncnn_ops.so'))
paths = glob.glob(wildcard)
lib_path = paths[0] if len(paths) > 0 else ''
return lib_path
def get_onnx2ncnn_path():
"""Get onnx2ncnn path."""
wildcard = os.path.abspath(
os.path.join(
os.path.dirname(__file__), '../../../build/bin/onnx2ncnn'))
paths = glob.glob(wildcard)
lib_path = paths[0] if len(paths) > 0 else ''
return lib_path

View File

@ -15,8 +15,8 @@ def is_available():
if is_available():
from .onnx2tensorrt import onnx2tensorrt
from .tensorrt_utils import (TRTWrapper, load_trt_engine,
create_trt_engine, save_trt_engine)
from .tensorrt_utils import (TRTWrapper, create_trt_engine,
load_trt_engine, save_trt_engine)
# load tensorrt plugin lib
load_tensorrt_plugin()

View File

@ -104,6 +104,13 @@ def init_backend_model(model_files: Sequence[str],
from mmdeploy.mmcls.export import TensorRTClassifier
backend_model = TensorRTClassifier(
model_files[0], class_names=class_names, device_id=device_id)
elif backend == 'ncnn':
from mmdeploy.mmcls.export import NCNNClassifier
backend_model = NCNNClassifier(
model_files[0],
model_files[1],
class_names=class_names,
device_id=device_id)
else:
raise NotImplementedError(f'Unsupported backend type: {backend}')
return backend_model

View File

@ -1,4 +1,8 @@
from .model_wrappers import ONNXRuntimeClassifier, TensorRTClassifier
from .model_wrappers import (NCNNClassifier, ONNXRuntimeClassifier,
TensorRTClassifier)
from .prepare_input import create_input
__all__ = ['create_input', 'ONNXRuntimeClassifier', 'TensorRTClassifier']
__all__ = [
'create_input', 'NCNNClassifier', 'ONNXRuntimeClassifier',
'TensorRTClassifier'
]

View File

@ -5,12 +5,34 @@ import torch
from mmcls.models import BaseClassifier
class ONNXRuntimeClassifier(BaseClassifier):
class DeployBaseClassifier(BaseClassifier):
"""Base Class of Wrapper for classifier's inference."""
def __init__(self, class_names, device_id):
super(DeployBaseClassifier, self).__init__()
self.CLASSES = class_names
self.device_id = device_id
def simple_test(self, img, *args, **kwargs):
raise NotImplementedError('This method is not implemented.')
def extract_feat(self, imgs):
raise NotImplementedError('This method is not implemented.')
def forward_train(self, imgs, **kwargs):
raise NotImplementedError('This method is not implemented.')
def forward_test(self, imgs, *args, **kwargs):
raise NotImplementedError('This method is not implemented.')
class ONNXRuntimeClassifier(DeployBaseClassifier):
"""Wrapper for classifier's inference with ONNXRuntime."""
def __init__(self, onnx_file, class_names, device_id):
super(ONNXRuntimeClassifier, self).__init__()
super(ONNXRuntimeClassifier, self).__init__(class_names, device_id)
import onnxruntime as ort
sess = ort.InferenceSession(onnx_file)
providers = ['CPUExecutionProvider']
@ -22,21 +44,10 @@ class ONNXRuntimeClassifier(BaseClassifier):
sess.set_providers(providers, options)
self.sess = sess
self.CLASSES = class_names
self.device_id = device_id
self.io_binding = sess.io_binding()
self.output_names = [_.name for _ in sess.get_outputs()]
self.is_cuda_available = is_cuda_available
def simple_test(self, img, *args, **kwargs):
raise NotImplementedError('This method is not implemented.')
def extract_feat(self, imgs):
raise NotImplementedError('This method is not implemented.')
def forward_train(self, imgs, **kwargs):
raise NotImplementedError('This method is not implemented.')
def forward_test(self, imgs, *args, **kwargs):
input_data = imgs
# set io binding for inputs/outputs
@ -59,10 +70,10 @@ class ONNXRuntimeClassifier(BaseClassifier):
return list(results)
class TensorRTClassifier(BaseClassifier):
class TensorRTClassifier(DeployBaseClassifier):
def __init__(self, trt_file, class_names, device_id):
super(TensorRTClassifier, self).__init__()
super(TensorRTClassifier, self).__init__(class_names, device_id)
from mmdeploy.apis.tensorrt import TRTWrapper, load_tensorrt_plugin
try:
load_tensorrt_plugin()
@ -72,17 +83,6 @@ class TensorRTClassifier(BaseClassifier):
model = TRTWrapper(trt_file)
self.model = model
self.device_id = device_id
self.CLASSES = class_names
def simple_test(self, img, *args, **kwargs):
raise NotImplementedError('This method is not implemented.')
def extract_feat(self, imgs):
raise NotImplementedError('This method is not implemented.')
def forward_train(self, imgs, **kwargs):
raise NotImplementedError('This method is not implemented.')
def forward_test(self, imgs, *args, **kwargs):
input_data = imgs
@ -91,3 +91,33 @@ class TensorRTClassifier(BaseClassifier):
results = results.detach().cpu().numpy()
return list(results)
class NCNNClassifier(DeployBaseClassifier):
def __init__(self, ncnn_param_file, ncnn_bin_file, class_names, device_id):
super(NCNNClassifier, self).__init__(class_names, device_id)
import ncnn
from mmdeploy.apis.ncnn import ncnn_ext
self.net = ncnn.Net()
ncnn_ext.register_mm_custom_layers(self.net)
self.net.load_param(ncnn_param_file)
self.net.load_model(ncnn_bin_file)
def forward_test(self, imgs, *args, **kwargs):
import ncnn
assert len(imgs.shape) == 4
# Only for batch == 1 now.
assert imgs.shape[0] == 1
input_data = imgs[0].cpu().numpy()
input_data = ncnn.Mat(input_data)
if self.device_id == -1:
ex = self.net.create_extractor()
ex.input('input', input_data)
ret, results = ex.extract('output')
results = np.array(results)
assert ret != -100, 'Memory allocation failed in ncnn layers'
assert ret == 0
return [results]
else:
raise NotImplementedError('GPU device is not implemented.')

View File

@ -55,7 +55,7 @@ def instance_norm(g, input, num_groups, weight, bias, eps, cudnn_enabled):
# Norm has shape [N, C, *] so we reshape weight and bias to [C, *]
axes = list(range(1, input_rank - 1))
from torch.onnx.symbolic_opset9 import mul, add
from torch.onnx.symbolic_opset9 import add, mul
return add(g, mul(g, norm, _unsqueeze_helper(g, weight, axes)),
_unsqueeze_helper(g, bias, axes))

View File

@ -69,9 +69,10 @@ def test_function_rewriter():
def test_module_rewriter():
from mmdeploy.core import MODULE_REWRITER, patch_model
from torchvision.models.resnet import resnet50
from mmdeploy.core import MODULE_REWRITER, patch_model
@MODULE_REWRITER.register_rewrite_module(
module_type='torchvision.models.resnet.Bottleneck', backend='tensorrt')
class BottleneckWrapper(torch.nn.Module):
@ -105,10 +106,11 @@ def test_module_rewriter():
def test_symbolic_register():
import onnx
from torch.autograd import Function
import mmdeploy
from mmdeploy.core import SYMBOLIC_REGISTER, register_extra_symbolics
from torch.autograd import Function
import onnx
class TestFunc(Function):

View File

@ -1,6 +1,7 @@
import argparse
import logging
import os.path as osp
import subprocess
from functools import partial
import mmcv
@ -140,6 +141,29 @@ def main():
backend_files.append(osp.join(args.work_dir, save_file))
elif backend == 'ncnn':
from mmdeploy.apis.ncnn import get_onnx2ncnn_path
from mmdeploy.apis.ncnn import is_available as is_available_ncnn
if not is_available_ncnn():
logging.error('ncnn support is not available.')
exit(-1)
onnx2ncnn_path = get_onnx2ncnn_path()
backend_files = []
for onnx_path in onnx_files:
onnx_name = osp.splitext(osp.split(onnx_path)[1])[0]
save_param = onnx_name + '.param'
save_bin = onnx_name + '.bin'
save_param = osp.join(args.work_dir, save_param)
save_bin = osp.join(args.work_dir, save_bin)
subprocess.call([onnx2ncnn_path, onnx_path, save_param, save_bin])
backend_files += [save_param, save_bin]
# check model outputs by visualization
codebase = deploy_cfg['codebase']