#pragma once #include "utils.h" #include "holder.h" #include "layers.h" #include "struct.h" #include "InferenceEngine.h" #include #include #include extern Logger gLogger; using namespace trt; using namespace trtxapi; namespace fastrt { class Model { public: Model(const trt::ModelConfig &modelcfg, const FastreidConfig &reidcfg, const std::string input_name="input", const std::string output_name="output"); virtual ~Model() {}; template bool serializeEngine(const std::string engine_file, std::function&, ITensor&, const FastreidConfig&)> backbone, std::function&, ITensor&, const FastreidConfig&)> head) { /* Create builder */ auto builder = make_holder(createInferBuilder(gLogger)); /* Create model to populate the network, then set the outputs and create an engine */ auto engine = createEngine(builder.get(), backbone, head); TRTASSERT(engine.get()); /* Serialize the engine */ auto modelStream = make_holder(engine->serialize()); TRTASSERT(modelStream.get()); std::ofstream p(engine_file, std::ios::binary | std::ios::out); if (!p) { std::cerr << "could not open plan output file" << std::endl; return false; } p.write(reinterpret_cast(modelStream->data()), modelStream->size()); std::cout << "[Save serialized engine]: " << engine_file << std::endl; return true; } bool deserializeEngine(const std::string engine_file); /* Support batch inference */ bool inference(std::vector &input); /* * Access the memory allocated by cudaMallocHost. (It's on CPU side) * Use this after each inference. */ float* getOutput(); /* * Output buffer size */ int getOutputSize(); /* * Cuda device id * You may need this in multi-thread/multi-engine inference */ int getDeviceID(); private: template TensorRTHolder createEngine(IBuilder* builder, std::function&, ITensor&, const FastreidConfig&)> backbone, std::function&, ITensor&, const FastreidConfig&)> head) { auto network = make_holder(builder->createNetworkV2(0U)); auto config = make_holder(builder->createBuilderConfig()); auto data = network->addInput(_engineCfg.input_name.c_str(), _dt, Dims3{3, _engineCfg.input_h, _engineCfg.input_w}); TRTASSERT(data); auto weightMap = loadWeights(_engineCfg.weights_path); /* Preprocessing */ auto pre_input = preprocessing_gpu(network.get(), weightMap, data); if (!pre_input) pre_input = data; /* Modeling */ auto feat_map = backbone(network.get(), weightMap, *pre_input, _reidcfg); TRTASSERT(feat_map); auto embedding = head(network.get(), weightMap, *feat_map->getOutput(0), _reidcfg); TRTASSERT(embedding); /* Set output */ embedding->getOutput(0)->setName(_engineCfg.output_name.c_str()); network->markOutput(*embedding->getOutput(0)); /* Build engine */ builder->setMaxBatchSize(_engineCfg.max_batch_size); config->setMaxWorkspaceSize(1 << 20); #ifdef BUILD_FP16 std::cout << "[Build fp16]" << std::endl; config->setFlag(BuilderFlag::kFP16); #endif auto engine = make_holder(builder->buildEngineWithConfig(*network, *config)); std::cout << "[TRT engine build out]" << std::endl; for (auto& mem : weightMap) { free((void*) (mem.second.values)); } return engine; } virtual void preprocessing_cpu(const cv::Mat& img, float* const data, const std::size_t stride) = 0; virtual ITensor* preprocessing_gpu(INetworkDefinition* network, std::map& weightMap, ITensor* input) { return nullptr; }; private: DataType _dt{DataType::kFLOAT}; trt::EngineConfig _engineCfg; FastreidConfig _reidcfg; std::unique_ptr _inferEngine{nullptr}; }; }