// Copyright (c) OpenMMLab. All rights reserved. #include "service_impl.h" #include #include #include #include #include #include #include #include #include #include "scope_timer.h" #include "text_table.h" zdl::DlSystem::Runtime_t InferenceServiceImpl::CheckRuntime(zdl::DlSystem::Runtime_t runtime, bool& staticQuantization) { static zdl::DlSystem::Version_t Version = zdl::SNPE::SNPEFactory::getLibraryVersion(); fprintf(stdout, "SNPE Version: %s\n", Version.asString().c_str()); if ((runtime != zdl::DlSystem::Runtime_t::DSP) && staticQuantization) { fprintf(stderr, "ERROR: Cannot use static quantization with CPU/GPU runtimes. " "It is only designed for DSP/AIP runtimes.\n" "ERROR: Proceeding without static quantization on selected " "runtime.\n"); staticQuantization = false; } if (!zdl::SNPE::SNPEFactory::isRuntimeAvailable(runtime)) { fprintf(stderr, "Selected runtime not present. Falling back to CPU.\n"); runtime = zdl::DlSystem::Runtime_t::CPU; } return runtime; } void InferenceServiceImpl::Build(std::unique_ptr& container, zdl::DlSystem::Runtime_t runtime, zdl::DlSystem::RuntimeList runtimeList, bool useUserSuppliedBuffers, zdl::DlSystem::PlatformConfig platformConfig) { zdl::SNPE::SNPEBuilder snpeBuilder(container.get()); if (runtimeList.empty()) { runtimeList.add(runtime); } snpe = snpeBuilder.setOutputLayers({}) .setRuntimeProcessorOrder(runtimeList) .setUseUserSuppliedBuffers(useUserSuppliedBuffers) .setPlatformConfig(platformConfig) .setExecutionPriorityHint(zdl::DlSystem::ExecutionPriorityHint_t::HIGH) .setPerformanceProfile(zdl::DlSystem::PerformanceProfile_t::SUSTAINED_HIGH_PERFORMANCE) .build(); return; } void InferenceServiceImpl::SaveDLC(const ::mmdeploy::Model* request, const std::string& filename) { auto model = request->weights(); fprintf(stdout, "saving file to %s\n", filename.c_str()); std::ofstream fout; fout.open(filename, std::ios::binary | std::ios::out); fout.write(model.data(), model.size()); fout.flush(); fout.close(); } void InferenceServiceImpl::LoadFloatData(const std::string& data, std::vector& vec) { size_t len = data.size(); assert(len % sizeof(float) == 0); const char* ptr = data.data(); for (int i = 0; i < len; i += sizeof(float)) { vec.push_back(*(float*)(ptr + i)); } } ::grpc::Status InferenceServiceImpl::Echo(::grpc::ServerContext* context, const ::mmdeploy::Empty* request, ::mmdeploy::Reply* response) { response->set_info("echo"); return Status::OK; } // Logic and data behind the server's behavior. ::grpc::Status InferenceServiceImpl::Init(::grpc::ServerContext* context, const ::mmdeploy::Model* request, ::mmdeploy::Reply* response) { zdl::SNPE::SNPEFactory::initializeLogging(zdl::DlSystem::LogLevel_t::LOG_ERROR); zdl::SNPE::SNPEFactory::setLogLevel(zdl::DlSystem::LogLevel_t::LOG_ERROR); if (snpe != nullptr) { snpe.reset(); } if (container != nullptr) { container.reset(); } auto model = request->weights(); container = zdl::DlContainer::IDlContainer::open(reinterpret_cast(model.data()), model.size()); if (container == nullptr) { fprintf(stdout, "Stage Init: load dlc failed.\n"); response->set_status(-1); response->set_info(zdl::DlSystem::getLastErrorString()); return Status::OK; } fprintf(stdout, "Stage Init: load dlc success.\n"); zdl::DlSystem::Runtime_t runtime = zdl::DlSystem::Runtime_t::GPU; if (request->has_device()) { switch (request->device()) { case mmdeploy::Model_Device_GPU: runtime = zdl::DlSystem::Runtime_t::GPU; break; case mmdeploy::Model_Device_DSP: runtime = zdl::DlSystem::Runtime_t::DSP; default: break; } } if (runtime != zdl::DlSystem::Runtime_t::CPU) { bool static_quant = false; runtime = CheckRuntime(runtime, static_quant); } zdl::DlSystem::RuntimeList runtimeList; runtimeList.add(zdl::DlSystem::Runtime_t::CPU); runtimeList.add(runtime); zdl::DlSystem::PlatformConfig platformConfig; { ScopeTimer timer("build snpe"); Build(container, runtime, runtimeList, false, platformConfig); } if (snpe == nullptr) { response->set_status(-1); response->set_info(zdl::DlSystem::getLastErrorString()); } // setup logger auto logger_opt = snpe->getDiagLogInterface(); if (!logger_opt) throw std::runtime_error("SNPE failed to obtain logging interface"); auto logger = *logger_opt; auto opts = logger->getOptions(); static std::string OutputDir = "./output/"; opts.LogFileDirectory = OutputDir; if (!logger->setOptions(opts)) { std::cerr << "Failed to set options" << std::endl; return Status::OK; } if (!logger->start()) { std::cerr << "Failed to start logger" << std::endl; return Status::OK; } const auto& inputTensorNamesRef = snpe->getInputTensorNames(); const auto& inputTensorNames = *inputTensorNamesRef; inputTensors.resize(inputTensorNames.size()); for (int i = 0; i < inputTensorNames.size(); ++i) { const char* pname = inputTensorNames.at(i); const auto& shape_opt = snpe->getInputDimensions(pname); const auto& shape = *shape_opt; fprintf(stdout, "Stage Init: input tensor info:\n"); switch (shape.rank()) { case 1: fprintf(stdout, "name: %s, shape: [%ld]\n", pname, shape[0]); break; case 2: fprintf(stdout, "name: %s, shape: [%ld,%ld]\n", pname, shape[0], shape[1]); break; case 3: fprintf(stdout, "name: %s, shape: [%ld,%ld,%ld]\n", pname, shape[0], shape[1], shape[2]); break; case 4: fprintf(stdout, "name: %s, shape: [%ld,%ld,%ld,%ld]\n", pname, shape[0], shape[1], shape[2], shape[3]); break; } inputTensors[i] = zdl::SNPE::SNPEFactory::getTensorFactory().createTensor(shape); inputTensorMap.add(pname, inputTensors[i].get()); } response->set_status(0); response->set_info("Stage Init: success"); return Status::OK; } std::string InferenceServiceImpl::ContentStr(zdl::DlSystem::ITensor* pTensor) { std::string str; const size_t N = std::min(5UL, pTensor->getSize()); auto it = pTensor->cbegin(); for (int i = 0; i < N; ++i) { str += std::to_string(*(it + i)); str += " "; } str += ".."; str += std::to_string(*(it + pTensor->getSize() - 1)); return str; } std::string InferenceServiceImpl::ShapeStr(zdl::DlSystem::ITensor* pTensor) { std::string str; str += "["; auto shape = pTensor->getShape(); for (int i = 0; i < shape.rank(); ++i) { str += std::to_string(shape[i]); str += ","; } str += ']'; return str; } ::grpc::Status InferenceServiceImpl::OutputNames(::grpc::ServerContext* context, const ::mmdeploy::Empty* request, ::mmdeploy::Names* response) { const auto& outputTensorNamesRef = snpe->getOutputTensorNames(); const auto& outputTensorNames = *outputTensorNamesRef; for (int i = 0; i < outputTensorNames.size(); ++i) { response->add_names(outputTensorNames.at(i)); } return Status::OK; } ::grpc::Status InferenceServiceImpl::Inference(::grpc::ServerContext* context, const ::mmdeploy::TensorList* request, ::mmdeploy::Reply* response) { // Get input names and number const auto& inputTensorNamesRef = snpe->getInputTensorNames(); if (!inputTensorNamesRef) { response->set_status(-1); response->set_info(zdl::DlSystem::getLastErrorString()); return Status::OK; } const auto& inputTensorNames = *inputTensorNamesRef; if (inputTensorNames.size() != request->data_size()) { response->set_status(-1); response->set_info("Stage Inference: input names count not match !"); return Status::OK; } helper::TextTable table("Inference"); table.padding(1); table.add("type").add("name").add("shape").add("content").eor(); // Load input/output buffers with TensorMap { // ScopeTimer timer("convert input"); for (int i = 0; i < request->data_size(); ++i) { auto tensor = request->data(i); std::vector float_input; LoadFloatData(tensor.data(), float_input); zdl::DlSystem::ITensor* ptensor = inputTensorMap.getTensor(tensor.name().c_str()); if (ptensor == nullptr) { fprintf(stderr, "Stage Inference: name: %s not existed in input tensor map\n", tensor.name().c_str()); response->set_status(-1); response->set_info("cannot find name in input tensor map."); return Status::OK; } if (float_input.size() != ptensor->getSize()) { fprintf(stderr, "Stage Inference: input size not match, get %ld, expect %ld.\n", float_input.size(), ptensor->getSize()); response->set_status(-1); response->set_info(zdl::DlSystem::getLastErrorString()); return Status::OK; } std::copy(float_input.begin(), float_input.end(), ptensor->begin()); table.add("IN").add(tensor.name()).add(ShapeStr(ptensor)).add(ContentStr(ptensor)).eor(); } } // A tensor map for SNPE execution outputs zdl::DlSystem::TensorMap outputTensorMap; // Execute the multiple input tensorMap on the model with SNPE bool success = false; { ScopeTimer timer("execute", false); success = snpe->execute(inputTensorMap, outputTensorMap); if (!success) { response->set_status(-1); response->set_info(zdl::DlSystem::getLastErrorString()); return Status::OK; } table.add("EXECUTE").add(std::to_string(timer.cost()) + "ms").eor(); } { // ScopeTimer timer("convert output"); auto out_names = outputTensorMap.getTensorNames(); for (size_t i = 0; i < out_names.size(); ++i) { const char* name = out_names.at(i); zdl::DlSystem::ITensor* ptensor = outputTensorMap.getTensor(name); table.add("OUT").add(std::string(name)).add(ShapeStr(ptensor)).add(ContentStr(ptensor)).eor(); const size_t data_length = ptensor->getSize(); std::string result; result.resize(sizeof(float) * data_length); int j = 0; for (auto it = ptensor->cbegin(); it != ptensor->cend(); ++it, j += sizeof(float)) { float f = *it; memcpy(&result[0] + j, reinterpret_cast(&f), sizeof(float)); } auto shape = ptensor->getShape(); ::mmdeploy::Tensor* pData = response->add_data(); pData->set_dtype("float32"); pData->set_name(name); pData->set_data(result); for (int j = 0; j < shape.rank(); ++j) { pData->add_shape(shape[j]); } } } std::cout << table << std::endl << std::endl; // build output status response->set_status(0); response->set_info("Stage Inference: success"); return Status::OK; } ::grpc::Status InferenceServiceImpl::Destroy(::grpc::ServerContext* context, const ::mmdeploy::Empty* request, ::mmdeploy::Reply* response) { snpe.reset(); container.reset(); inputTensors.clear(); response->set_status(0); zdl::SNPE::SNPEFactory::terminateLogging(); return Status::OK; }