/************************************************************************************ * Handle memory pre-alloc both on host(pinned memory, allow CUDA DMA) & device * Author: Darren Hsieh * Date: 2020/07/07 *************************************************************************************/ #pragma once #include #include #include #include #include #include "utils.h" #include "struct.h" #include "holder.h" #include "logging.h" #include "NvInfer.h" #include "cuda_runtime_api.h" static Logger gLogger; namespace trt { class InferenceEngine { public: InferenceEngine(const EngineConfig &enginecfg); InferenceEngine(InferenceEngine &&other) noexcept; ~InferenceEngine(); InferenceEngine(const InferenceEngine &) = delete; InferenceEngine& operator=(const InferenceEngine &) = delete; InferenceEngine& operator=(InferenceEngine && other) = delete; bool doInference(const int inference_batch_size, std::function preprocessing); float* getOutput() { return _output; } std::thread::id getThreadID() { return std::this_thread::get_id(); } private: EngineConfig _engineCfg; float* _input{nullptr}; float* _output{nullptr}; // Pointers to input and output device buffers to pass to engine. // Engine requires exactly IEngine::getNbBindings() number of buffers. void* _buffers[2]; // In order to bind the buffers, we need to know the names of the input and output tensors. // Note that indices are guaranteed to be less than IEngine::getNbBindings() int _inputIndex; int _outputIndex; int _inputSize; int _outputSize; static constexpr std::size_t _depth{sizeof(float)}; TensorRTHolder _runtime{nullptr}; TensorRTHolder _engine{nullptr}; TensorRTHolder _context{nullptr}; std::shared_ptr _streamptr; }; }