mmdeploy/service/snpe/server/service_impl.cpp

359 lines
12 KiB
C++
Raw Normal View History

// Copyright (c) OpenMMLab. All rights reserved.
#include "service_impl.h"
#include <algorithm>
#include <cstdlib>
#include <cstring>
#include <fstream>
#include <iostream>
#include <iterator>
#include <string>
#include <unordered_map>
#include <vector>
#include "scope_timer.h"
#include "text_table.h"
zdl::DlSystem::Runtime_t InferenceServiceImpl::CheckRuntime(zdl::DlSystem::Runtime_t runtime,
bool& staticQuantization) {
static zdl::DlSystem::Version_t Version = zdl::SNPE::SNPEFactory::getLibraryVersion();
fprintf(stdout, "SNPE Version: %s\n", Version.asString().c_str());
if ((runtime != zdl::DlSystem::Runtime_t::DSP) && staticQuantization) {
fprintf(stderr,
"ERROR: Cannot use static quantization with CPU/GPU runtimes. "
"It is only designed for DSP/AIP runtimes.\n"
"ERROR: Proceeding without static quantization on selected "
"runtime.\n");
staticQuantization = false;
}
if (!zdl::SNPE::SNPEFactory::isRuntimeAvailable(runtime)) {
fprintf(stderr, "Selected runtime not present. Falling back to CPU.\n");
runtime = zdl::DlSystem::Runtime_t::CPU;
}
return runtime;
}
void InferenceServiceImpl::Build(std::unique_ptr<zdl::DlContainer::IDlContainer>& container,
zdl::DlSystem::Runtime_t runtime,
zdl::DlSystem::RuntimeList runtimeList,
bool useUserSuppliedBuffers,
zdl::DlSystem::PlatformConfig platformConfig) {
zdl::SNPE::SNPEBuilder snpeBuilder(container.get());
if (runtimeList.empty()) {
runtimeList.add(runtime);
}
snpe = snpeBuilder.setOutputLayers({})
.setRuntimeProcessorOrder(runtimeList)
.setUseUserSuppliedBuffers(useUserSuppliedBuffers)
.setPlatformConfig(platformConfig)
.setExecutionPriorityHint(zdl::DlSystem::ExecutionPriorityHint_t::HIGH)
.setPerformanceProfile(zdl::DlSystem::PerformanceProfile_t::SUSTAINED_HIGH_PERFORMANCE)
.build();
return;
}
void InferenceServiceImpl::SaveDLC(const ::mmdeploy::Model* request, const std::string& filename) {
auto model = request->weights();
fprintf(stdout, "saving file to %s\n", filename.c_str());
std::ofstream fout;
fout.open(filename, std::ios::binary | std::ios::out);
fout.write(model.data(), model.size());
fout.flush();
fout.close();
}
void InferenceServiceImpl::LoadFloatData(const std::string& data, std::vector<float>& vec) {
size_t len = data.size();
assert(len % sizeof(float) == 0);
const char* ptr = data.data();
for (int i = 0; i < len; i += sizeof(float)) {
vec.push_back(*(float*)(ptr + i));
}
}
::grpc::Status InferenceServiceImpl::Echo(::grpc::ServerContext* context,
const ::mmdeploy::Empty* request,
::mmdeploy::Reply* response) {
response->set_info("echo");
return Status::OK;
}
// Logic and data behind the server's behavior.
::grpc::Status InferenceServiceImpl::Init(::grpc::ServerContext* context,
const ::mmdeploy::Model* request,
::mmdeploy::Reply* response) {
zdl::SNPE::SNPEFactory::initializeLogging(zdl::DlSystem::LogLevel_t::LOG_ERROR);
zdl::SNPE::SNPEFactory::setLogLevel(zdl::DlSystem::LogLevel_t::LOG_ERROR);
if (snpe != nullptr) {
snpe.reset();
}
if (container != nullptr) {
container.reset();
}
auto model = request->weights();
container =
zdl::DlContainer::IDlContainer::open(reinterpret_cast<uint8_t*>(model.data()), model.size());
if (container == nullptr) {
fprintf(stdout, "Stage Init: load dlc failed.\n");
response->set_status(-1);
response->set_info(zdl::DlSystem::getLastErrorString());
return Status::OK;
}
fprintf(stdout, "Stage Init: load dlc success.\n");
zdl::DlSystem::Runtime_t runtime = zdl::DlSystem::Runtime_t::GPU;
if (request->has_device()) {
switch (request->device()) {
case mmdeploy::Model_Device_GPU:
runtime = zdl::DlSystem::Runtime_t::GPU;
break;
case mmdeploy::Model_Device_DSP:
runtime = zdl::DlSystem::Runtime_t::DSP;
default:
break;
}
}
if (runtime != zdl::DlSystem::Runtime_t::CPU) {
bool static_quant = false;
runtime = CheckRuntime(runtime, static_quant);
}
zdl::DlSystem::RuntimeList runtimeList;
runtimeList.add(zdl::DlSystem::Runtime_t::CPU);
runtimeList.add(runtime);
zdl::DlSystem::PlatformConfig platformConfig;
{
ScopeTimer timer("build snpe");
Build(container, runtime, runtimeList, false, platformConfig);
}
if (snpe == nullptr) {
response->set_status(-1);
response->set_info(zdl::DlSystem::getLastErrorString());
}
// setup logger
auto logger_opt = snpe->getDiagLogInterface();
if (!logger_opt) throw std::runtime_error("SNPE failed to obtain logging interface");
auto logger = *logger_opt;
auto opts = logger->getOptions();
static std::string OutputDir = "./output/";
opts.LogFileDirectory = OutputDir;
if (!logger->setOptions(opts)) {
std::cerr << "Failed to set options" << std::endl;
return Status::OK;
}
if (!logger->start()) {
std::cerr << "Failed to start logger" << std::endl;
return Status::OK;
}
const auto& inputTensorNamesRef = snpe->getInputTensorNames();
const auto& inputTensorNames = *inputTensorNamesRef;
inputTensors.resize(inputTensorNames.size());
for (int i = 0; i < inputTensorNames.size(); ++i) {
const char* pname = inputTensorNames.at(i);
const auto& shape_opt = snpe->getInputDimensions(pname);
const auto& shape = *shape_opt;
fprintf(stdout, "Stage Init: input tensor info:\n");
switch (shape.rank()) {
case 1:
fprintf(stdout, "name: %s, shape: [%ld]\n", pname, shape[0]);
break;
case 2:
fprintf(stdout, "name: %s, shape: [%ld,%ld]\n", pname, shape[0], shape[1]);
break;
case 3:
fprintf(stdout, "name: %s, shape: [%ld,%ld,%ld]\n", pname, shape[0], shape[1], shape[2]);
break;
case 4:
fprintf(stdout, "name: %s, shape: [%ld,%ld,%ld,%ld]\n", pname, shape[0], shape[1], shape[2],
shape[3]);
break;
}
inputTensors[i] = zdl::SNPE::SNPEFactory::getTensorFactory().createTensor(shape);
inputTensorMap.add(pname, inputTensors[i].get());
}
response->set_status(0);
response->set_info("Stage Init: success");
return Status::OK;
}
std::string InferenceServiceImpl::ContentStr(zdl::DlSystem::ITensor* pTensor) {
std::string str;
const size_t N = std::min(5UL, pTensor->getSize());
auto it = pTensor->cbegin();
for (int i = 0; i < N; ++i) {
str += std::to_string(*(it + i));
str += " ";
}
str += "..";
str += std::to_string(*(it + pTensor->getSize() - 1));
return str;
}
std::string InferenceServiceImpl::ShapeStr(zdl::DlSystem::ITensor* pTensor) {
std::string str;
str += "[";
auto shape = pTensor->getShape();
for (int i = 0; i < shape.rank(); ++i) {
str += std::to_string(shape[i]);
str += ",";
}
str += ']';
return str;
}
::grpc::Status InferenceServiceImpl::OutputNames(::grpc::ServerContext* context,
const ::mmdeploy::Empty* request,
::mmdeploy::Names* response) {
const auto& outputTensorNamesRef = snpe->getOutputTensorNames();
const auto& outputTensorNames = *outputTensorNamesRef;
for (int i = 0; i < outputTensorNames.size(); ++i) {
response->add_names(outputTensorNames.at(i));
}
return Status::OK;
}
::grpc::Status InferenceServiceImpl::Inference(::grpc::ServerContext* context,
const ::mmdeploy::TensorList* request,
::mmdeploy::Reply* response) {
// Get input names and number
const auto& inputTensorNamesRef = snpe->getInputTensorNames();
if (!inputTensorNamesRef) {
response->set_status(-1);
response->set_info(zdl::DlSystem::getLastErrorString());
return Status::OK;
}
const auto& inputTensorNames = *inputTensorNamesRef;
if (inputTensorNames.size() != request->data_size()) {
response->set_status(-1);
response->set_info("Stage Inference: input names count not match !");
return Status::OK;
}
helper::TextTable table("Inference");
table.padding(1);
table.add("type").add("name").add("shape").add("content").eor();
// Load input/output buffers with TensorMap
{
// ScopeTimer timer("convert input");
for (int i = 0; i < request->data_size(); ++i) {
auto tensor = request->data(i);
std::vector<float> float_input;
LoadFloatData(tensor.data(), float_input);
zdl::DlSystem::ITensor* ptensor = inputTensorMap.getTensor(tensor.name().c_str());
if (ptensor == nullptr) {
fprintf(stderr, "Stage Inference: name: %s not existed in input tensor map\n",
tensor.name().c_str());
response->set_status(-1);
response->set_info("cannot find name in input tensor map.");
return Status::OK;
}
if (float_input.size() != ptensor->getSize()) {
fprintf(stderr, "Stage Inference: input size not match, get %ld, expect %ld.\n",
float_input.size(), ptensor->getSize());
response->set_status(-1);
response->set_info(zdl::DlSystem::getLastErrorString());
return Status::OK;
}
std::copy(float_input.begin(), float_input.end(), ptensor->begin());
table.add("IN").add(tensor.name()).add(ShapeStr(ptensor)).add(ContentStr(ptensor)).eor();
}
}
// A tensor map for SNPE execution outputs
zdl::DlSystem::TensorMap outputTensorMap;
// Execute the multiple input tensorMap on the model with SNPE
bool success = false;
{
ScopeTimer timer("execute", false);
success = snpe->execute(inputTensorMap, outputTensorMap);
if (!success) {
response->set_status(-1);
response->set_info(zdl::DlSystem::getLastErrorString());
return Status::OK;
}
table.add("EXECUTE").add(std::to_string(timer.cost()) + "ms").eor();
}
{
// ScopeTimer timer("convert output");
auto out_names = outputTensorMap.getTensorNames();
for (size_t i = 0; i < out_names.size(); ++i) {
const char* name = out_names.at(i);
zdl::DlSystem::ITensor* ptensor = outputTensorMap.getTensor(name);
table.add("OUT").add(std::string(name)).add(ShapeStr(ptensor)).add(ContentStr(ptensor)).eor();
const size_t data_length = ptensor->getSize();
std::string result;
result.resize(sizeof(float) * data_length);
int j = 0;
for (auto it = ptensor->cbegin(); it != ptensor->cend(); ++it, j += sizeof(float)) {
float f = *it;
memcpy(&result[0] + j, reinterpret_cast<char*>(&f), sizeof(float));
}
auto shape = ptensor->getShape();
::mmdeploy::Tensor* pData = response->add_data();
pData->set_dtype("float32");
pData->set_name(name);
pData->set_data(result);
for (int j = 0; j < shape.rank(); ++j) {
pData->add_shape(shape[j]);
}
}
}
std::cout << table << std::endl << std::endl;
// build output status
response->set_status(0);
response->set_info("Stage Inference: success");
return Status::OK;
}
::grpc::Status InferenceServiceImpl::Destroy(::grpc::ServerContext* context,
const ::mmdeploy::Empty* request,
::mmdeploy::Reply* response) {
snpe.reset();
container.reset();
inputTensors.clear();
response->set_status(0);
zdl::SNPE::SNPEFactory::terminateLogging();
return Status::OK;
}