mmdeploy/csrc/net/ppl/ppl_net.cpp

316 lines
10 KiB
C++
Raw Normal View History

Merge sdk (#251) * check in cmake * move backend_ops to csrc/backend_ops * check in preprocess, model, some codebase and their c-apis * check in CMakeLists.txt * check in parts of test_csrc * commit everything else * add readme * update core's BUILD_INTERFACE directory * skip codespell on third_party * update trt_net and ort_net's CMakeLists * ignore clion's build directory * check in pybind11 * add onnx.proto. Remove MMDeploy's dependency on ncnn's source code * export MMDeployTargets only when MMDEPLOY_BUILD_SDK is ON * remove useless message * target include directory is wrong * change target name from mmdeploy_ppl_net to mmdeploy_pplnn_net * skip install directory * update project's cmake * remove useless code * set CMAKE_BUILD_TYPE to Release by force if it isn't set by user * update custom ops CMakeLists * pass object target's source lists * fix lint end-of-file * fix lint: trailing whitespace * fix codespell hook * remove bicubic_interpolate to csrc/backend_ops/ * set MMDEPLOY_BUILD_SDK OFF * change custom ops build command * add spdlog installation command * update docs on how to checkout pybind11 * move bicubic_interpolate to backend_ops/tensorrt directory * remove useless code * correct cmake * fix typo * fix typo * fix install directory * correct sdk's readme * set cub dir when cuda version < 11.0 * change directory where clang-format will apply to * fix build command * add .clang-format * change clang-format style from google to file * reformat csrc/backend_ops * format sdk's code * turn off clang-format for some files * add -Xcompiler=-fno-gnu-unique * fix trt topk initialize * check in config for sdk demo * update cmake script and csrc's readme * correct config's path * add cuda include directory, otherwise compile failed in case of tensorrt8.2 * clang-format onnx2ncnn.cpp Co-authored-by: zhangli <lzhang329@gmail.com> Co-authored-by: grimoire <yaoqian@sensetime.com>
2021-12-07 10:57:55 +08:00
// Copyright (c) OpenMMLab. All rights reserved.
#include "ppl_net.h"
#include "archive/value_archive.h"
#include "core/logger.h"
#include "core/model.h"
#include "core/utils/formatter.h"
#include "ppl/nn/common/logger.h"
#include "ppl/nn/models/onnx/onnx_runtime_builder_factory.h"
#if PPL_NN_HAS_X86
#include "ppl/nn/engines/x86/engine_factory.h"
#include "ppl/nn/engines/x86/x86_options.h"
#endif
#if PPL_NN_HAS_CUDA
#include "ppl/nn/engines/cuda/cuda_options.h"
#include "ppl/nn/engines/cuda/engine_factory.h"
#endif
namespace mmdeploy {
Result<void> ppl_try(int code) {
if (code == 0) {
return success();
}
ERROR("ppl error: {}", ppl::common::GetRetCodeStr(code));
return Status(eFail);
}
template <typename T>
Result<std::unique_ptr<T>> ppl_try(T* v) {
if (v) {
return success(v);
}
return Status(eFail);
}
Tensor PPLNet::CreateInternalTensor(ppl::nn::Tensor* src, Device device) {
auto desc = src->GetShape();
auto name = src->GetName();
std::vector<int64_t> shape{desc.GetDims(), desc.GetDims() + desc.GetDimCount()};
if (std::any_of(begin(shape), end(shape), [](auto x) { return x <= 0; })) {
shape = {};
}
return TensorDesc{.device = device, .data_type = DataType::kFLOAT, .shape = shape, .name = name};
}
Result<void> PPLNet::Init(const Value& args) {
auto& context = args["context"];
device_ = context["device"].get<Device>();
stream_ = context["stream"].get<Stream>();
auto name = args["name"].get<std::string>();
auto model = context["model"].get<Model>();
OUTCOME_TRY(auto config, model.GetModelConfig(name));
OUTCOME_TRY(auto onnx, model.ReadFile(config.net));
#if PPL_NN_HAS_CUDA
if (device_.is_device()) {
engines_.emplace_back(ppl::nn::CudaEngineFactory::Create({}));
// Use default algorithms until PPL can set algorithms from a memory buffer
// since the optimization process is really slow
engines_.back()->Configure(ppl::nn::CUDA_CONF_USE_DEFAULT_ALGORITHMS, true);
}
#endif
#if PPL_NN_HAS_X86
if (device_.is_host()) {
engines_.emplace_back(ppl::nn::X86EngineFactory::Create({}));
}
#endif
std::vector<ppl::nn::Engine*> engines;
for (const auto& engine : engines_) {
engines.push_back(engine.get());
}
OUTCOME_TRY(auto builder, ppl_try(ppl::nn::OnnxRuntimeBuilderFactory::Create(
onnx.data(), onnx.size(), engines.data(), engines.size())));
OUTCOME_TRY(auto runtime, ppl_try(builder->CreateRuntime()));
for (int i = 0; i < runtime->GetInputCount(); ++i) {
auto src = runtime->GetInputTensor(i);
inputs_internal_.push_back(src);
inputs_external_.push_back(CreateInternalTensor(src, device_));
/// debug only
auto& desc = inputs_internal_[i]->GetShape();
std::vector<long> shape_(desc.GetDims(), desc.GetDims() + desc.GetDimCount());
DEBUG("input {}: datatype = {}, dataformat = {}, shape = {}", i,
ppl::common::GetDataTypeStr(desc.GetDataType()),
ppl::common::GetDataFormatStr(desc.GetDataFormat()), shape_);
Merge sdk (#251) * check in cmake * move backend_ops to csrc/backend_ops * check in preprocess, model, some codebase and their c-apis * check in CMakeLists.txt * check in parts of test_csrc * commit everything else * add readme * update core's BUILD_INTERFACE directory * skip codespell on third_party * update trt_net and ort_net's CMakeLists * ignore clion's build directory * check in pybind11 * add onnx.proto. Remove MMDeploy's dependency on ncnn's source code * export MMDeployTargets only when MMDEPLOY_BUILD_SDK is ON * remove useless message * target include directory is wrong * change target name from mmdeploy_ppl_net to mmdeploy_pplnn_net * skip install directory * update project's cmake * remove useless code * set CMAKE_BUILD_TYPE to Release by force if it isn't set by user * update custom ops CMakeLists * pass object target's source lists * fix lint end-of-file * fix lint: trailing whitespace * fix codespell hook * remove bicubic_interpolate to csrc/backend_ops/ * set MMDEPLOY_BUILD_SDK OFF * change custom ops build command * add spdlog installation command * update docs on how to checkout pybind11 * move bicubic_interpolate to backend_ops/tensorrt directory * remove useless code * correct cmake * fix typo * fix typo * fix install directory * correct sdk's readme * set cub dir when cuda version < 11.0 * change directory where clang-format will apply to * fix build command * add .clang-format * change clang-format style from google to file * reformat csrc/backend_ops * format sdk's code * turn off clang-format for some files * add -Xcompiler=-fno-gnu-unique * fix trt topk initialize * check in config for sdk demo * update cmake script and csrc's readme * correct config's path * add cuda include directory, otherwise compile failed in case of tensorrt8.2 * clang-format onnx2ncnn.cpp Co-authored-by: zhangli <lzhang329@gmail.com> Co-authored-by: grimoire <yaoqian@sensetime.com>
2021-12-07 10:57:55 +08:00
}
for (int i = 0; i < runtime->GetOutputCount(); ++i) {
auto src = runtime->GetOutputTensor(i);
outputs_internal_.push_back(src);
outputs_external_.push_back(CreateInternalTensor(src, device_));
auto desc = outputs_internal_[i]->GetShape();
std::vector<long> shape_(desc.GetDims(), desc.GetDims() + desc.GetDimCount());
DEBUG("output {}: datatype = {}, dataformat = {}, shape = {}", i,
ppl::common::GetDataTypeStr(desc.GetDataType()),
ppl::common::GetDataFormatStr(desc.GetDataFormat()), shape_);
Merge sdk (#251) * check in cmake * move backend_ops to csrc/backend_ops * check in preprocess, model, some codebase and their c-apis * check in CMakeLists.txt * check in parts of test_csrc * commit everything else * add readme * update core's BUILD_INTERFACE directory * skip codespell on third_party * update trt_net and ort_net's CMakeLists * ignore clion's build directory * check in pybind11 * add onnx.proto. Remove MMDeploy's dependency on ncnn's source code * export MMDeployTargets only when MMDEPLOY_BUILD_SDK is ON * remove useless message * target include directory is wrong * change target name from mmdeploy_ppl_net to mmdeploy_pplnn_net * skip install directory * update project's cmake * remove useless code * set CMAKE_BUILD_TYPE to Release by force if it isn't set by user * update custom ops CMakeLists * pass object target's source lists * fix lint end-of-file * fix lint: trailing whitespace * fix codespell hook * remove bicubic_interpolate to csrc/backend_ops/ * set MMDEPLOY_BUILD_SDK OFF * change custom ops build command * add spdlog installation command * update docs on how to checkout pybind11 * move bicubic_interpolate to backend_ops/tensorrt directory * remove useless code * correct cmake * fix typo * fix typo * fix install directory * correct sdk's readme * set cub dir when cuda version < 11.0 * change directory where clang-format will apply to * fix build command * add .clang-format * change clang-format style from google to file * reformat csrc/backend_ops * format sdk's code * turn off clang-format for some files * add -Xcompiler=-fno-gnu-unique * fix trt topk initialize * check in config for sdk demo * update cmake script and csrc's readme * correct config's path * add cuda include directory, otherwise compile failed in case of tensorrt8.2 * clang-format onnx2ncnn.cpp Co-authored-by: zhangli <lzhang329@gmail.com> Co-authored-by: grimoire <yaoqian@sensetime.com>
2021-12-07 10:57:55 +08:00
TensorShape shape(desc.GetDims(), desc.GetDims() + desc.GetDimCount());
}
auto input_shapes = GetShapes(inputs_external_);
if (auto input_batch_size = GetBatchSize(input_shapes)) {
auto output_shapes = GetShapes(outputs_external_);
if (auto output_batch_size = GetBatchSize(output_shapes)) {
if (input_batch_size.value() == output_batch_size.value()) {
can_infer_output_shapes_ = true;
}
}
}
runtime_ = std::move(runtime);
return success();
}
Result<void> PPLNet::Deinit() {
try {
runtime_.reset();
return success();
} catch (...) {
return Status(eFail);
}
}
static TensorShape GetShape(const PPLTensor& tensor) {
auto& desc = tensor.GetShape();
return {desc.GetDims(), desc.GetDims() + desc.GetDimCount()};
}
Result<ppl::common::datatype_t> GetPPLDataType(DataType data_type) {
switch (data_type) {
case DataType::kFLOAT:
return ppl::common::DATATYPE_FLOAT32;
case DataType::kHALF:
return ppl::common::DATATYPE_FLOAT16;
case DataType::kINT8:
return ppl::common::DATATYPE_INT8;
case DataType::kINT32:
return ppl::common::DATATYPE_INT32;
case DataType::kINT64:
return ppl::common::DATATYPE_INT64;
default:
return Status(eNotSupported);
}
}
Result<DataType> GetMMDeployDataType(ppl::common::datatype_t data_type) {
switch (data_type) {
case ppl::common::DATATYPE_FLOAT32:
return DataType::kFLOAT;
case ppl::common::DATATYPE_FLOAT16:
return DataType::kHALF;
case ppl::common::DATATYPE_INT8:
return DataType::kINT8;
case ppl::common::DATATYPE_INT32:
return DataType::kINT32;
case ppl::common::DATATYPE_INT64:
return DataType::kINT64;
default:
return Status(eNotSupported);
}
}
Merge sdk (#251) * check in cmake * move backend_ops to csrc/backend_ops * check in preprocess, model, some codebase and their c-apis * check in CMakeLists.txt * check in parts of test_csrc * commit everything else * add readme * update core's BUILD_INTERFACE directory * skip codespell on third_party * update trt_net and ort_net's CMakeLists * ignore clion's build directory * check in pybind11 * add onnx.proto. Remove MMDeploy's dependency on ncnn's source code * export MMDeployTargets only when MMDEPLOY_BUILD_SDK is ON * remove useless message * target include directory is wrong * change target name from mmdeploy_ppl_net to mmdeploy_pplnn_net * skip install directory * update project's cmake * remove useless code * set CMAKE_BUILD_TYPE to Release by force if it isn't set by user * update custom ops CMakeLists * pass object target's source lists * fix lint end-of-file * fix lint: trailing whitespace * fix codespell hook * remove bicubic_interpolate to csrc/backend_ops/ * set MMDEPLOY_BUILD_SDK OFF * change custom ops build command * add spdlog installation command * update docs on how to checkout pybind11 * move bicubic_interpolate to backend_ops/tensorrt directory * remove useless code * correct cmake * fix typo * fix typo * fix install directory * correct sdk's readme * set cub dir when cuda version < 11.0 * change directory where clang-format will apply to * fix build command * add .clang-format * change clang-format style from google to file * reformat csrc/backend_ops * format sdk's code * turn off clang-format for some files * add -Xcompiler=-fno-gnu-unique * fix trt topk initialize * check in config for sdk demo * update cmake script and csrc's readme * correct config's path * add cuda include directory, otherwise compile failed in case of tensorrt8.2 * clang-format onnx2ncnn.cpp Co-authored-by: zhangli <lzhang329@gmail.com> Co-authored-by: grimoire <yaoqian@sensetime.com>
2021-12-07 10:57:55 +08:00
Result<void> PPLNet::Forward() {
OUTCOME_TRY(stream_.Wait());
OUTCOME_TRY(ppl_try(runtime_->Run()));
OUTCOME_TRY(ppl_try(runtime_->Sync()));
for (int i = 0; i < outputs_external_.size(); ++i) {
auto& internal = *outputs_internal_[i];
auto format = internal.GetShape().GetDataFormat();
if (format != ppl::common::DATAFORMAT_NDARRAY) {
ERROR("output {}'s format is {}, only NDARRAY is currently supported", i,
ppl::common::GetDataFormatStr(format));
return Status(eNotSupported);
}
auto& external = outputs_external_[i];
auto dtype_int = internal.GetShape().GetDataType();
OUTCOME_TRY(auto dtype_ext, GetPPLDataType(external.data_type()));
Merge sdk (#251) * check in cmake * move backend_ops to csrc/backend_ops * check in preprocess, model, some codebase and their c-apis * check in CMakeLists.txt * check in parts of test_csrc * commit everything else * add readme * update core's BUILD_INTERFACE directory * skip codespell on third_party * update trt_net and ort_net's CMakeLists * ignore clion's build directory * check in pybind11 * add onnx.proto. Remove MMDeploy's dependency on ncnn's source code * export MMDeployTargets only when MMDEPLOY_BUILD_SDK is ON * remove useless message * target include directory is wrong * change target name from mmdeploy_ppl_net to mmdeploy_pplnn_net * skip install directory * update project's cmake * remove useless code * set CMAKE_BUILD_TYPE to Release by force if it isn't set by user * update custom ops CMakeLists * pass object target's source lists * fix lint end-of-file * fix lint: trailing whitespace * fix codespell hook * remove bicubic_interpolate to csrc/backend_ops/ * set MMDEPLOY_BUILD_SDK OFF * change custom ops build command * add spdlog installation command * update docs on how to checkout pybind11 * move bicubic_interpolate to backend_ops/tensorrt directory * remove useless code * correct cmake * fix typo * fix typo * fix install directory * correct sdk's readme * set cub dir when cuda version < 11.0 * change directory where clang-format will apply to * fix build command * add .clang-format * change clang-format style from google to file * reformat csrc/backend_ops * format sdk's code * turn off clang-format for some files * add -Xcompiler=-fno-gnu-unique * fix trt topk initialize * check in config for sdk demo * update cmake script and csrc's readme * correct config's path * add cuda include directory, otherwise compile failed in case of tensorrt8.2 * clang-format onnx2ncnn.cpp Co-authored-by: zhangli <lzhang329@gmail.com> Co-authored-by: grimoire <yaoqian@sensetime.com>
2021-12-07 10:57:55 +08:00
auto shape_int = GetShape(internal);
auto shape_ext = external.shape();
auto data_int = internal.GetBufferPtr();
auto data_ext = external.data();
if (shape_int != shape_ext || dtype_int != dtype_ext || data_int != data_ext) {
if (dtype_int != dtype_ext) {
auto desc = external.desc();
desc.shape = shape_int;
OUTCOME_TRY(desc.data_type, GetMMDeployDataType(dtype_int));
external = Tensor(desc, external.allocator());
} else {
external.Reshape(shape_int);
}
Merge sdk (#251) * check in cmake * move backend_ops to csrc/backend_ops * check in preprocess, model, some codebase and their c-apis * check in CMakeLists.txt * check in parts of test_csrc * commit everything else * add readme * update core's BUILD_INTERFACE directory * skip codespell on third_party * update trt_net and ort_net's CMakeLists * ignore clion's build directory * check in pybind11 * add onnx.proto. Remove MMDeploy's dependency on ncnn's source code * export MMDeployTargets only when MMDEPLOY_BUILD_SDK is ON * remove useless message * target include directory is wrong * change target name from mmdeploy_ppl_net to mmdeploy_pplnn_net * skip install directory * update project's cmake * remove useless code * set CMAKE_BUILD_TYPE to Release by force if it isn't set by user * update custom ops CMakeLists * pass object target's source lists * fix lint end-of-file * fix lint: trailing whitespace * fix codespell hook * remove bicubic_interpolate to csrc/backend_ops/ * set MMDEPLOY_BUILD_SDK OFF * change custom ops build command * add spdlog installation command * update docs on how to checkout pybind11 * move bicubic_interpolate to backend_ops/tensorrt directory * remove useless code * correct cmake * fix typo * fix typo * fix install directory * correct sdk's readme * set cub dir when cuda version < 11.0 * change directory where clang-format will apply to * fix build command * add .clang-format * change clang-format style from google to file * reformat csrc/backend_ops * format sdk's code * turn off clang-format for some files * add -Xcompiler=-fno-gnu-unique * fix trt topk initialize * check in config for sdk demo * update cmake script and csrc's readme * correct config's path * add cuda include directory, otherwise compile failed in case of tensorrt8.2 * clang-format onnx2ncnn.cpp Co-authored-by: zhangli <lzhang329@gmail.com> Co-authored-by: grimoire <yaoqian@sensetime.com>
2021-12-07 10:57:55 +08:00
std::shared_ptr<void> data(data_int, [](void*) {});
if (external.size() > 0) {
OUTCOME_TRY(Tensor(external.desc(), data).CopyTo(external, stream_));
} else {
WARN("copy skipped due to zero sized tensor: {} {}", external.name(), external.shape());
}
}
}
OUTCOME_TRY(stream_.Wait());
return success();
}
Result<void> PPLNet::ForwardAsync(Event* event) { return Status(eNotSupported); }
Result<void> ReshapeLike(PPLTensor& dst, Tensor& src) {
auto& dst_desc = dst.GetShape();
auto& src_desc = src.desc();
OUTCOME_TRY(auto data_type, GetPPLDataType(src_desc.data_type));
dst_desc.SetDataType(data_type);
dst_desc.SetDataFormat(ppl::common::DATAFORMAT_NDARRAY);
dst_desc.Reshape({begin(src_desc.shape), end(src_desc.shape)});
dst.SetBufferPtr(src.data());
return success();
}
Result<void> PPLNet::Reshape(Span<TensorShape> input_shapes) {
auto prev_in_shapes = GetShapes(inputs_external_);
auto prev_out_shapes = GetShapes(outputs_external_);
for (int i = 0; i < inputs_external_.size(); ++i) {
auto& input = inputs_external_[i];
input.Reshape(input_shapes[i]);
OUTCOME_TRY(ReshapeLike(*inputs_internal_[i], input));
}
if (can_infer_output_shapes_) {
OUTCOME_TRY(auto output_shapes,
InferOutputShapes(input_shapes, prev_in_shapes, prev_out_shapes));
// ERROR("inferred output shapes: {}", output_shapes);
for (int i = 0; i < outputs_external_.size(); ++i) {
auto& output = outputs_external_[i];
output.Reshape(output_shapes[i]);
OUTCOME_TRY(ReshapeLike(*outputs_internal_[i], output));
}
}
return success();
}
Result<Span<Tensor>> PPLNet::GetInputTensors() { return inputs_external_; }
Result<Span<Tensor>> PPLNet::GetOutputTensors() { return outputs_external_; }
std::vector<TensorShape> PPLNet::GetShapes(Span<Tensor> tensors) {
std::vector<TensorShape> shapes;
shapes.reserve(tensors.size());
for (const auto& t : tensors) {
shapes.push_back(t.shape());
}
return shapes;
}
Result<int64_t> PPLNet::GetBatchSize(Span<TensorShape> shapes) {
int64_t batch_size = -1;
for (const auto& s : shapes) {
if (s.empty()) {
return Status(eNotSupported);
}
if (batch_size < 0) {
batch_size = s.front();
} else if (batch_size != s.front()) {
return Status(eNotSupported);
}
}
return batch_size;
}
Result<std::vector<TensorShape>> PPLNet::InferOutputShapes(Span<TensorShape> input_shapes,
Span<TensorShape> prev_in_shapes,
Span<TensorShape> prev_out_shapes) {
OUTCOME_TRY(auto batch_size, GetBatchSize(input_shapes));
if (input_shapes.size() != prev_in_shapes.size()) {
return Status(eInvalidArgument);
}
for (int i = 0; i < input_shapes.size(); ++i) {
prev_in_shapes[i][0] = batch_size;
if (prev_in_shapes[i] != input_shapes[i]) {
return Status(eNotSupported);
}
}
std::vector<TensorShape> output_shapes(prev_out_shapes.begin(), prev_out_shapes.end());
for (auto& shape : output_shapes) {
shape[0] = batch_size;
}
return output_shapes;
}
PPLNet::~PPLNet() = default;
class PPLNetCreator : public Creator<Net> {
public:
const char* GetName() const override { return "pplnn"; }
int GetVersion() const override { return 0; }
std::unique_ptr<Net> Create(const Value& args) override {
auto p = std::make_unique<PPLNet>();
if (auto r = p->Init(args)) {
return p;
} else {
ERROR("error creating PPLNet: {}", r.error().message().c_str());
return nullptr;
}
}
};
REGISTER_MODULE(Net, PPLNetCreator);
} // namespace mmdeploy