mmdeploy/csrc/net/trt/trt_net.cpp
lzhangzz 640aa03538
Support Windows (#106)
* minor changes

* support windows

* fix GCC build

* fix lint

* reformat

* fix Windows build

* fix GCC build

* search backend ops for onnxruntime

* fix lint

* fix lint

* code clean-up

* code clean-up

* fix clang build

* fix trt support

* fix cmake for ncnn

* fix cmake for openvino

* fix SDK Python API

* handle ops for other backends (ncnn, trt)

* handle SDK Python API library location

* robustify linkage

* fix cuda

* minor fix for openvino & ncnn

* use CMAKE_CUDA_ARCHITECTURES if set

* fix cuda preprocessor

* fix misc

* fix pplnn & pplcv, drop support for pplcv<0.6.0

* robustify cmake

* update build.md (#2)

* build dynamic modules as module library & fix demo (partially)

* fix candidate path for mmdeploy_python

* move "enable CUDA" to cmake config for demo

* refine demo cmake

* add comment

* fix ubuntu build

* revert docs/en/build.md

* fix C API

* fix lint

* Windows build doc (#3)

* check in docs related to mmdeploy build on windows

* update build guide on windows platform

* update build guide on windows platform

* make path of thirdparty libraries consistent

* make path consistency

* correct build command for custom ops

* correct build command for sdk

* update sdk build instructions

* update doc

* correct build command

* fix lint

* correct build command and fix lint

Co-authored-by: lvhan <lvhan@pjlab.org>

* trailing whitespace (#4)

* minor fix

* fix sr sdk model

* fix type deduction

* fix cudaFree after driver shutting down

* update ppl.cv installation warning (#5)

* fix device allocator threshold & fix lint

* update doc (#6)

* update ppl.cv installation warning

* missing 'git clone'

Co-authored-by: chenxin <chenxin2@sensetime.com>
Co-authored-by: zhangli <zhangli@sensetime.com>
Co-authored-by: lvhan028 <lvhan_028@163.com>
Co-authored-by: lvhan <lvhan@pjlab.org>
2022-02-24 20:08:44 +08:00

228 lines
6.3 KiB
C++

// Copyright (c) OpenMMLab. All rights reserved.
#include "trt_net.h"
#include <sstream>
#include "core/logger.h"
#include "core/model.h"
#include "core/module.h"
#include "core/utils/formatter.h"
namespace mmdeploy {
namespace trt_detail {
class TRTLogger : public nvinfer1::ILogger {
public:
void log(Severity severity, const char* msg) noexcept override {
switch (severity) {
case Severity::kINFO:
// MMDEPLOY_INFO("TRTNet: {}", msg);
break;
case Severity::kWARNING:
MMDEPLOY_WARN("TRTNet: {}", msg);
break;
case Severity::kERROR:
case Severity::kINTERNAL_ERROR:
MMDEPLOY_ERROR("TRTNet: {}", msg);
break;
default:
break;
}
}
static TRTLogger& get() {
static TRTLogger trt_logger{};
return trt_logger;
}
};
nvinfer1::Dims to_dims(const TensorShape& shape) {
nvinfer1::Dims dims{};
dims.nbDims = shape.size();
for (size_t i = 0; i < shape.size(); ++i) {
dims.d[i] = shape[i];
}
return dims;
}
TensorShape to_shape(const nvinfer1::Dims& dims) {
TensorShape shape(dims.nbDims);
for (int i = 0; i < shape.size(); ++i) {
shape[i] = dims.d[i];
}
return shape;
}
} // namespace trt_detail
std::string to_string(const nvinfer1::Dims& dims) {
std::stringstream ss;
ss << "(";
for (int i = 0; i < dims.nbDims; ++i) {
if (i) ss << ", ";
ss << dims.d[i];
}
ss << ")";
return ss.str();
}
static inline Result<void> trt_try(bool code, const char* msg = nullptr, Status e = Status(eFail)) {
if (code) {
return success();
}
if (msg) {
MMDEPLOY_ERROR("{}", msg);
}
return e;
}
#define TRT_TRY(...) OUTCOME_TRY(trt_try(__VA_ARGS__))
TRTNet::~TRTNet() = default;
static Result<DataType> MapDataType(nvinfer1::DataType dtype) {
switch (dtype) {
case nvinfer1::DataType::kFLOAT:
return DataType::kFLOAT;
case nvinfer1::DataType::kHALF:
return DataType::kHALF;
case nvinfer1::DataType::kINT8:
return DataType::kINT8;
case nvinfer1::DataType::kINT32:
return DataType::kINT32;
default:
return Status(eNotSupported);
}
}
Result<void> TRTNet::Init(const Value& args) {
using namespace trt_detail;
auto& context = args["context"];
device_ = context["device"].get<Device>();
if (device_.is_host()) {
MMDEPLOY_ERROR("TRTNet: device must be a GPU!");
return Status(eNotSupported);
}
stream_ = context["stream"].get<Stream>();
event_ = Event(device_);
auto name = args["name"].get<std::string>();
auto model = context["model"].get<Model>();
OUTCOME_TRY(auto config, model.GetModelConfig(name));
OUTCOME_TRY(auto plan, model.ReadFile(config.net));
TRTWrapper runtime = nvinfer1::createInferRuntime(TRTLogger::get());
TRT_TRY(!!runtime, "failed to create TRT infer runtime");
engine_ = runtime->deserializeCudaEngine(plan.data(), plan.size());
TRT_TRY(!!engine_, "failed to deserialize TRT CUDA engine");
TRT_TRY(engine_->getNbOptimizationProfiles() == 1, "only 1 optimization profile supported",
Status(eNotSupported));
auto n_bindings = engine_->getNbBindings();
for (int i = 0; i < n_bindings; ++i) {
auto binding_name = engine_->getBindingName(i);
auto dims = engine_->getBindingDimensions(i);
if (engine_->isShapeBinding(i)) {
MMDEPLOY_ERROR("shape binding is not supported.");
return Status(eNotSupported);
}
OUTCOME_TRY(auto dtype, MapDataType(engine_->getBindingDataType(i)));
TensorDesc desc{device_, dtype, to_shape(dims), binding_name};
if (engine_->bindingIsInput(i)) {
MMDEPLOY_DEBUG("input binding {} {} {}", i, binding_name, to_string(dims));
input_ids_.push_back(i);
input_names_.emplace_back(binding_name);
input_tensors_.emplace_back(desc, Buffer());
} else {
MMDEPLOY_DEBUG("output binding {} {} {}", i, binding_name, to_string(dims));
output_ids_.push_back(i);
output_names_.emplace_back(binding_name);
output_tensors_.emplace_back(desc, Buffer());
}
}
context_ = engine_->createExecutionContext();
TRT_TRY(!!context_, "failed to create TRT execution context");
context_->setOptimizationProfileAsync(0, static_cast<cudaStream_t>(stream_.GetNative()));
OUTCOME_TRY(stream_.Wait());
return success();
}
Result<void> TRTNet::Deinit() {
context_.reset();
engine_.reset();
return success();
}
Result<void> TRTNet::Reshape(Span<TensorShape> input_shapes) {
using namespace trt_detail;
if (input_shapes.size() != input_tensors_.size()) {
return Status(eInvalidArgument);
}
for (int i = 0; i < input_tensors_.size(); ++i) {
auto dims = to_dims(input_shapes[i]);
// MMDEPLOY_ERROR("input shape: {}", to_string(dims));
TRT_TRY(context_->setBindingDimensions(input_ids_[i], dims));
input_tensors_[i].Reshape(input_shapes[i]);
}
if (!context_->allInputDimensionsSpecified()) {
MMDEPLOY_ERROR("not all input dimensions specified");
return Status(eFail);
}
for (int i = 0; i < output_tensors_.size(); ++i) {
auto dims = context_->getBindingDimensions(output_ids_[i]);
// MMDEPLOY_ERROR("output shape: {}", to_string(dims));
output_tensors_[i].Reshape(to_shape(dims));
}
return success();
}
Result<Span<Tensor>> TRTNet::GetInputTensors() { return input_tensors_; }
Result<Span<Tensor>> TRTNet::GetOutputTensors() { return output_tensors_; }
Result<void> TRTNet::Forward() {
using namespace trt_detail;
std::vector<void*> bindings(engine_->getNbBindings());
for (int i = 0; i < input_tensors_.size(); ++i) {
bindings[input_ids_[i]] = input_tensors_[i].data();
}
for (int i = 0; i < output_tensors_.size(); ++i) {
bindings[output_ids_[i]] = output_tensors_[i].data();
}
auto event = GetNative<cudaEvent_t>(event_);
auto status = context_->enqueueV2(bindings.data(), GetNative<cudaStream_t>(stream_), &event);
TRT_TRY(status, "TRT forward failed", Status(eFail));
OUTCOME_TRY(event_.Wait());
return success();
}
Result<void> TRTNet::ForwardAsync(Event* event) { return Status(eNotSupported); }
class TRTNetCreator : public Creator<Net> {
public:
const char* GetName() const override { return "tensorrt"; }
int GetVersion() const override { return 0; }
std::unique_ptr<Net> Create(const Value& args) override {
auto p = std::make_unique<TRTNet>();
if (p->Init(args)) {
return p;
}
return nullptr;
}
};
REGISTER_MODULE(Net, TRTNetCreator);
} // namespace mmdeploy