mmdeploy/service/snpe/server/service_impl.cpp

// Copyright (c) OpenMMLab. All rights reserved.

#include "service_impl.h"

#include <algorithm>
#include <cstdlib>
#include <cstring>
#include <fstream>
#include <iostream>
#include <iterator>
#include <string>
#include <unordered_map>
#include <vector>

#include "scope_timer.h"
#include "text_table.h"

zdl::DlSystem::Runtime_t InferenceServiceImpl::CheckRuntime(zdl::DlSystem::Runtime_t runtime,
                                                            bool& staticQuantization) {
  static zdl::DlSystem::Version_t Version = zdl::SNPE::SNPEFactory::getLibraryVersion();

  fprintf(stdout, "SNPE Version: %s\n", Version.asString().c_str());

  if ((runtime != zdl::DlSystem::Runtime_t::DSP) && staticQuantization) {
    fprintf(stderr,
            "ERROR: Cannot use static quantization with CPU/GPU runtimes. "
            "It is only designed for DSP/AIP runtimes.\n"
            "ERROR: Proceeding without static quantization on selected "
            "runtime.\n");
    staticQuantization = false;
  }

  if (!zdl::SNPE::SNPEFactory::isRuntimeAvailable(runtime)) {
    fprintf(stderr, "Selected runtime not present. Falling back to CPU.\n");
    runtime = zdl::DlSystem::Runtime_t::CPU;
  }

  return runtime;
}

void InferenceServiceImpl::Build(std::unique_ptr<zdl::DlContainer::IDlContainer>& container,
                                 zdl::DlSystem::Runtime_t runtime,
                                 zdl::DlSystem::RuntimeList runtimeList,
                                 bool useUserSuppliedBuffers,
                                 zdl::DlSystem::PlatformConfig platformConfig) {
  zdl::SNPE::SNPEBuilder snpeBuilder(container.get());

  if (runtimeList.empty()) {
    runtimeList.add(runtime);
  }

  snpe = snpeBuilder.setOutputLayers({})
             .setRuntimeProcessorOrder(runtimeList)
             .setUseUserSuppliedBuffers(useUserSuppliedBuffers)
             .setPlatformConfig(platformConfig)
             .setExecutionPriorityHint(zdl::DlSystem::ExecutionPriorityHint_t::HIGH)
             .setPerformanceProfile(zdl::DlSystem::PerformanceProfile_t::SUSTAINED_HIGH_PERFORMANCE)
             .build();
  return;
}

void InferenceServiceImpl::SaveDLC(const ::mmdeploy::Model* request, const std::string& filename) {
  auto model = request->weights();
  fprintf(stdout, "saving file to %s\n", filename.c_str());
  std::ofstream fout;
  fout.open(filename, std::ios::binary | std::ios::out);
  fout.write(model.data(), model.size());
  fout.flush();
  fout.close();
}

void InferenceServiceImpl::LoadFloatData(const std::string& data, std::vector<float>& vec) {
  size_t len = data.size();
  assert(len % sizeof(float) == 0);
  const char* ptr = data.data();
  for (int i = 0; i < len; i += sizeof(float)) {
    vec.push_back(*(float*)(ptr + i));
  }
}

::grpc::Status InferenceServiceImpl::Echo(::grpc::ServerContext* context,
                                          const ::mmdeploy::Empty* request,
                                          ::mmdeploy::Reply* response) {
  response->set_info("echo");
  return Status::OK;
}

// Logic and data behind the server's behavior.
::grpc::Status InferenceServiceImpl::Init(::grpc::ServerContext* context,
                                          const ::mmdeploy::Model* request,
                                          ::mmdeploy::Reply* response) {
  zdl::SNPE::SNPEFactory::initializeLogging(zdl::DlSystem::LogLevel_t::LOG_ERROR);
  zdl::SNPE::SNPEFactory::setLogLevel(zdl::DlSystem::LogLevel_t::LOG_ERROR);

  if (snpe != nullptr) {
    snpe.reset();
  }
  if (container != nullptr) {
    container.reset();
  }

  auto model = request->weights();
  container =
      zdl::DlContainer::IDlContainer::open(reinterpret_cast<uint8_t*>(model.data()), model.size());
  if (container == nullptr) {
    fprintf(stdout, "Stage Init: load dlc failed.\n");

    response->set_status(-1);
    response->set_info(zdl::DlSystem::getLastErrorString());
    return Status::OK;
  }
  fprintf(stdout, "Stage Init: load dlc success.\n");

  zdl::DlSystem::Runtime_t runtime = zdl::DlSystem::Runtime_t::GPU;
  if (request->has_device()) {
    switch (request->device()) {
      case mmdeploy::Model_Device_GPU:
        runtime = zdl::DlSystem::Runtime_t::GPU;
        break;
      case mmdeploy::Model_Device_DSP:
        runtime = zdl::DlSystem::Runtime_t::DSP;
      default:
        break;
    }
  }

  if (runtime != zdl::DlSystem::Runtime_t::CPU) {
    bool static_quant = false;
    runtime = CheckRuntime(runtime, static_quant);
  }

  zdl::DlSystem::RuntimeList runtimeList;
  runtimeList.add(zdl::DlSystem::Runtime_t::CPU);
  runtimeList.add(runtime);
  zdl::DlSystem::PlatformConfig platformConfig;

  {
    ScopeTimer timer("build snpe");
    Build(container, runtime, runtimeList, false, platformConfig);
  }

  if (snpe == nullptr) {
    response->set_status(-1);
    response->set_info(zdl::DlSystem::getLastErrorString());
  }

  // setup logger
  auto logger_opt = snpe->getDiagLogInterface();
  if (!logger_opt) throw std::runtime_error("SNPE failed to obtain logging interface");
  auto logger = *logger_opt;
  auto opts = logger->getOptions();
  static std::string OutputDir = "./output/";

  opts.LogFileDirectory = OutputDir;
  if (!logger->setOptions(opts)) {
    std::cerr << "Failed to set options" << std::endl;
    return Status::OK;
  }
  if (!logger->start()) {
    std::cerr << "Failed to start logger" << std::endl;
    return Status::OK;
  }

  const auto& inputTensorNamesRef = snpe->getInputTensorNames();
  const auto& inputTensorNames = *inputTensorNamesRef;

  inputTensors.resize(inputTensorNames.size());
  for (int i = 0; i < inputTensorNames.size(); ++i) {
    const char* pname = inputTensorNames.at(i);
    const auto& shape_opt = snpe->getInputDimensions(pname);
    const auto& shape = *shape_opt;

    fprintf(stdout, "Stage Init: input tensor info:\n");
    switch (shape.rank()) {
      case 1:
        fprintf(stdout, "name: %s, shape: [%ld]\n", pname, shape[0]);
        break;
      case 2:
        fprintf(stdout, "name: %s, shape: [%ld,%ld]\n", pname, shape[0], shape[1]);
        break;
      case 3:
        fprintf(stdout, "name: %s, shape: [%ld,%ld,%ld]\n", pname, shape[0], shape[1], shape[2]);
        break;
      case 4:
        fprintf(stdout, "name: %s, shape: [%ld,%ld,%ld,%ld]\n", pname, shape[0], shape[1], shape[2],
                shape[3]);
        break;
    }
    inputTensors[i] = zdl::SNPE::SNPEFactory::getTensorFactory().createTensor(shape);
    inputTensorMap.add(pname, inputTensors[i].get());
  }

  response->set_status(0);
  response->set_info("Stage Init: success");
  return Status::OK;
}

std::string InferenceServiceImpl::ContentStr(zdl::DlSystem::ITensor* pTensor) {
  std::string str;

  const size_t N = std::min(5UL, pTensor->getSize());
  auto it = pTensor->cbegin();
  for (int i = 0; i < N; ++i) {
    str += std::to_string(*(it + i));
    str += " ";
  }
  str += "..";
  str += std::to_string(*(it + pTensor->getSize() - 1));
  return str;
}

std::string InferenceServiceImpl::ShapeStr(zdl::DlSystem::ITensor* pTensor) {
  std::string str;

  str += "[";
  auto shape = pTensor->getShape();
  for (int i = 0; i < shape.rank(); ++i) {
    str += std::to_string(shape[i]);
    str += ",";
  }
  str += ']';
  return str;
}

::grpc::Status InferenceServiceImpl::OutputNames(::grpc::ServerContext* context,
                                                 const ::mmdeploy::Empty* request,
                                                 ::mmdeploy::Names* response) {
  const auto& outputTensorNamesRef = snpe->getOutputTensorNames();
  const auto& outputTensorNames = *outputTensorNamesRef;

  for (int i = 0; i < outputTensorNames.size(); ++i) {
    response->add_names(outputTensorNames.at(i));
  }

  return Status::OK;
}

::grpc::Status InferenceServiceImpl::Inference(::grpc::ServerContext* context,
                                               const ::mmdeploy::TensorList* request,
                                               ::mmdeploy::Reply* response) {
  // Get input names and number
  const auto& inputTensorNamesRef = snpe->getInputTensorNames();

  if (!inputTensorNamesRef) {
    response->set_status(-1);
    response->set_info(zdl::DlSystem::getLastErrorString());
    return Status::OK;
  }

  const auto& inputTensorNames = *inputTensorNamesRef;
  if (inputTensorNames.size() != request->data_size()) {
    response->set_status(-1);
    response->set_info("Stage Inference: input names count not match !");
    return Status::OK;
  }

  helper::TextTable table("Inference");
  table.padding(1);
  table.add("type").add("name").add("shape").add("content").eor();

  // Load input/output buffers with TensorMap
  {
    // ScopeTimer timer("convert input");

    for (int i = 0; i < request->data_size(); ++i) {
      auto tensor = request->data(i);
      std::vector<float> float_input;
      LoadFloatData(tensor.data(), float_input);

      zdl::DlSystem::ITensor* ptensor = inputTensorMap.getTensor(tensor.name().c_str());
      if (ptensor == nullptr) {
        fprintf(stderr, "Stage Inference: name: %s not existed in input tensor map\n",
                tensor.name().c_str());
        response->set_status(-1);
        response->set_info("cannot find name in input tensor map.");
        return Status::OK;
      }

      if (float_input.size() != ptensor->getSize()) {
        fprintf(stderr, "Stage Inference: input size not match, get %ld, expect %ld.\n",
                float_input.size(), ptensor->getSize());
        response->set_status(-1);
        response->set_info(zdl::DlSystem::getLastErrorString());
        return Status::OK;
      }

      std::copy(float_input.begin(), float_input.end(), ptensor->begin());

      table.add("IN").add(tensor.name()).add(ShapeStr(ptensor)).add(ContentStr(ptensor)).eor();
    }
  }

  // A tensor map for SNPE execution outputs
  zdl::DlSystem::TensorMap outputTensorMap;
  // Execute the multiple input tensorMap on the model with SNPE
  bool success = false;
  {
    ScopeTimer timer("execute", false);
    success = snpe->execute(inputTensorMap, outputTensorMap);

    if (!success) {
      response->set_status(-1);
      response->set_info(zdl::DlSystem::getLastErrorString());
      return Status::OK;
    }

    table.add("EXECUTE").add(std::to_string(timer.cost()) + "ms").eor();
  }

  {
    // ScopeTimer timer("convert output");
    auto out_names = outputTensorMap.getTensorNames();
    for (size_t i = 0; i < out_names.size(); ++i) {
      const char* name = out_names.at(i);
      zdl::DlSystem::ITensor* ptensor = outputTensorMap.getTensor(name);

      table.add("OUT").add(std::string(name)).add(ShapeStr(ptensor)).add(ContentStr(ptensor)).eor();

      const size_t data_length = ptensor->getSize();

      std::string result;
      result.resize(sizeof(float) * data_length);
      int j = 0;
      for (auto it = ptensor->cbegin(); it != ptensor->cend(); ++it, j += sizeof(float)) {
        float f = *it;
        memcpy(&result[0] + j, reinterpret_cast<char*>(&f), sizeof(float));
      }

      auto shape = ptensor->getShape();

      ::mmdeploy::Tensor* pData = response->add_data();
      pData->set_dtype("float32");
      pData->set_name(name);
      pData->set_data(result);
      for (int j = 0; j < shape.rank(); ++j) {
        pData->add_shape(shape[j]);
      }
    }
  }

  std::cout << table << std::endl << std::endl;

  // build output status
  response->set_status(0);
  response->set_info("Stage Inference: success");
  return Status::OK;
}

::grpc::Status InferenceServiceImpl::Destroy(::grpc::ServerContext* context,
                                             const ::mmdeploy::Empty* request,
                                             ::mmdeploy::Reply* response) {
  snpe.reset();
  container.reset();
  inputTensors.clear();
  response->set_status(0);
  zdl::SNPE::SNPEFactory::terminateLogging();
  return Status::OK;
}