fix

Merge branch 'support-snpe-fix' into support-snpe
fix(tools/deploy.py): support snpe
2022-07-11 13:12:19 +08:00 · 2022-07-11 13:11:18 +08:00 · 2022-07-11 13:09:16 +08:00 · 2022-07-10 12:36:37 +08:00 · 2022-07-08 21:09:47 +08:00 · 2022-07-07 19:20:36 +08:00
31 changed files with 1808 additions and 124 deletions
--- a/configs/_base_/backends/snpe.py
+++ b/configs/_base_/backends/snpe.py
@ -0,0 +1 @@
+backend_config = dict(type='snpe')
--- a/configs/mmcls/classification_snpe_dynamic.py
+++ b/configs/mmcls/classification_snpe_dynamic.py
@ -0,0 +1,3 @@
+_base_ = ['./classification_static.py', '../_base_/backends/snpe.py']
+
+onnx_config = dict(input_shape=None)
--- a/csrc/mmdeploy/backend_ops/snpe/client/end2end.dlc
+++ b/csrc/mmdeploy/backend_ops/snpe/client/end2end.dlc
@ -0,0 +1 @@
+/home/PJLAB/konghuanjun/GitProjects/mmdeploy/resnet18/end2end.dlc
--- a/csrc/mmdeploy/backend_ops/snpe/client/end2end.onnx
+++ b/csrc/mmdeploy/backend_ops/snpe/client/end2end.onnx
@ -0,0 +1 @@
+/home/PJLAB/konghuanjun/GitProjects/mmdeploy/resnet18/end2end.onnx
--- a/csrc/mmdeploy/backend_ops/snpe/client/inference_client.py
+++ b/csrc/mmdeploy/backend_ops/snpe/client/inference_client.py
@ -0,0 +1,82 @@
+# Copyright 2015 gRPC authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""The Python implementation of the GRPC helloworld.Greeter client."""
+
+from __future__ import print_function
+
+import logging
+
+import grpc
+import inference_pb2
+import inference_pb2_grpc
+import os
+import cv2
+import numpy as np
+
+def build_dummy_tensor():
+    img = cv2.imread('/home/PJLAB/konghuanjun/Downloads/snpe-1.55.0.2958/models/alexnet/data/chairs.jpg')
+    m = cv2.resize(img, (600, 400))
+    data = (m.astype(np.float32) - 127.5) / 127.5
+    print(data.shape)
+    tensor = inference_pb2.Tensor(data=data.tobytes(), shape=list(data.shape), name='data_0', dtype='float32')
+    return tensor
+
+def run():
+    # NOTE(gRPC Python Team): .close() is possible on a channel and should be
+    # used in circumstances in which the with statement does not fit the needs
+    # of the code.
+    filename = 'end2end.dlc'
+    filesize = os.stat(filename).st_size
+    
+    weights = bytes()
+    # with open(filename, 'rb') as f:
+    #     weights = f.read(filesize)
+    # if len(weights) >= (2 << 29):
+    #     print('model size too big')
+        
+    # https://github.com/grpc/grpc/blob/v1.46.x/include/grpc/impl/codegen/grpc_types.h
+    # https://grpc.io/docs/guides/performance/
+    with grpc.insecure_channel('10.1.80.67:50051', 
+                               options=(
+                                   ('grpc.GRPC_ARG_KEEPALIVE_TIME_MS', 2000),
+                                   ('grpc.max_send_message_length', 2<<29),
+                                   ('grpc.keepalive_permit_without_calls', 1))) as channel:
+        print("channel type {}".format(type(channel)))
+        # with grpc.insecure_channel('[0:0:fe80::3455:bf2a]:50051') as channel:
+        stub = inference_pb2_grpc.InferenceStub(channel)
+        response = stub.Echo(inference_pb2.Empty())
+        print("Response echo {}".format(response))
+        
+        model = inference_pb2.Model(name= filename, weights=weights, device=1)
+        print("Sending model to init, please wait...")
+        response = stub.Init(model)
+        print("Response init {}".format(response))
+        
+        response = stub.OutputNames(inference_pb2.Empty())
+        print("Response outputnames {}".format(response))
+        
+        tensor = build_dummy_tensor()
+        tensorList = inference_pb2.TensorList(datas = [tensor])
+        
+        for x in range(1):
+            response = stub.Inference(tensorList)
+            if response.status == 0:
+                prob = np.frombuffer(response.datas[0].data, dtype=np.float32)
+                print("prob argmax: {} max: {}".format(prob.argmax(), prob.max()))
+            else:
+                print(response.info)
+
+if __name__ == '__main__':
+    logging.basicConfig()
+    run()
--- a/csrc/mmdeploy/backend_ops/snpe/client/inference_pb2.py
+++ b/csrc/mmdeploy/backend_ops/snpe/client/inference_pb2.py
@ -0,0 +1,91 @@
+# -*- coding: utf-8 -*-
+# Generated by the protocol buffer compiler.  DO NOT EDIT!
+# source: inference.proto
+"""Generated protocol buffer code."""
+from google.protobuf import descriptor as _descriptor
+from google.protobuf import descriptor_pool as _descriptor_pool
+from google.protobuf import message as _message
+from google.protobuf import reflection as _reflection
+from google.protobuf import symbol_database as _symbol_database
+# @@protoc_insertion_point(imports)
+
+_sym_db = _symbol_database.Default()
+
+
+
+
+DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x0finference.proto\x12\x08mmdeploy\"\x91\x01\n\x05Model\x12\x11\n\x04name\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x0f\n\x07weights\x18\x02 \x01(\x0c\x12+\n\x06\x64\x65vice\x18\x03 \x01(\x0e\x32\x16.mmdeploy.Model.DeviceH\x01\x88\x01\x01\"#\n\x06\x44\x65vice\x12\x07\n\x03\x43PU\x10\x00\x12\x07\n\x03GPU\x10\x01\x12\x07\n\x03\x44SP\x10\x02\x42\x07\n\x05_nameB\t\n\x07_device\"\x07\n\x05\x45mpty\"Q\n\x06Tensor\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x12\n\x05\x64type\x18\x02 \x01(\tH\x00\x88\x01\x01\x12\x0c\n\x04\x64\x61ta\x18\x03 \x01(\x0c\x12\r\n\x05shape\x18\x04 \x03(\x05\x42\x08\n\x06_dtype\"-\n\nTensorList\x12\x1f\n\x05\x64\x61tas\x18\x01 \x03(\x0b\x32\x10.mmdeploy.Tensor\"F\n\x05Reply\x12\x0e\n\x06status\x18\x01 \x01(\x05\x12\x0c\n\x04info\x18\x02 \x01(\t\x12\x1f\n\x05\x64\x61tas\x18\x03 \x03(\x0b\x32\x10.mmdeploy.Tensor\"\x16\n\x05Names\x12\r\n\x05names\x18\x01 \x03(\t2\xfb\x01\n\tInference\x12*\n\x04\x45\x63ho\x12\x0f.mmdeploy.Empty\x1a\x0f.mmdeploy.Reply\"\x00\x12*\n\x04Init\x12\x0f.mmdeploy.Model\x1a\x0f.mmdeploy.Reply\"\x00\x12\x31\n\x0bOutputNames\x12\x0f.mmdeploy.Empty\x1a\x0f.mmdeploy.Names\"\x00\x12\x34\n\tInference\x12\x14.mmdeploy.TensorList\x1a\x0f.mmdeploy.Reply\"\x00\x12-\n\x07\x44\x65stroy\x12\x0f.mmdeploy.Empty\x1a\x0f.mmdeploy.Reply\"\x00\x42%\n\rmmdeploy.snpeB\x0bSNPEWrapperP\x01\xa2\x02\x04SNPEb\x06proto3')
+
+
+
+_MODEL = DESCRIPTOR.message_types_by_name['Model']
+_EMPTY = DESCRIPTOR.message_types_by_name['Empty']
+_TENSOR = DESCRIPTOR.message_types_by_name['Tensor']
+_TENSORLIST = DESCRIPTOR.message_types_by_name['TensorList']
+_REPLY = DESCRIPTOR.message_types_by_name['Reply']
+_NAMES = DESCRIPTOR.message_types_by_name['Names']
+_MODEL_DEVICE = _MODEL.enum_types_by_name['Device']
+Model = _reflection.GeneratedProtocolMessageType('Model', (_message.Message,), {
+  'DESCRIPTOR' : _MODEL,
+  '__module__' : 'inference_pb2'
+  # @@protoc_insertion_point(class_scope:mmdeploy.Model)
+  })
+_sym_db.RegisterMessage(Model)
+
+Empty = _reflection.GeneratedProtocolMessageType('Empty', (_message.Message,), {
+  'DESCRIPTOR' : _EMPTY,
+  '__module__' : 'inference_pb2'
+  # @@protoc_insertion_point(class_scope:mmdeploy.Empty)
+  })
+_sym_db.RegisterMessage(Empty)
+
+Tensor = _reflection.GeneratedProtocolMessageType('Tensor', (_message.Message,), {
+  'DESCRIPTOR' : _TENSOR,
+  '__module__' : 'inference_pb2'
+  # @@protoc_insertion_point(class_scope:mmdeploy.Tensor)
+  })
+_sym_db.RegisterMessage(Tensor)
+
+TensorList = _reflection.GeneratedProtocolMessageType('TensorList', (_message.Message,), {
+  'DESCRIPTOR' : _TENSORLIST,
+  '__module__' : 'inference_pb2'
+  # @@protoc_insertion_point(class_scope:mmdeploy.TensorList)
+  })
+_sym_db.RegisterMessage(TensorList)
+
+Reply = _reflection.GeneratedProtocolMessageType('Reply', (_message.Message,), {
+  'DESCRIPTOR' : _REPLY,
+  '__module__' : 'inference_pb2'
+  # @@protoc_insertion_point(class_scope:mmdeploy.Reply)
+  })
+_sym_db.RegisterMessage(Reply)
+
+Names = _reflection.GeneratedProtocolMessageType('Names', (_message.Message,), {
+  'DESCRIPTOR' : _NAMES,
+  '__module__' : 'inference_pb2'
+  # @@protoc_insertion_point(class_scope:mmdeploy.Names)
+  })
+_sym_db.RegisterMessage(Names)
+
+_INFERENCE = DESCRIPTOR.services_by_name['Inference']
+if _descriptor._USE_C_DESCRIPTORS == False:
+
+  DESCRIPTOR._options = None
+  DESCRIPTOR._serialized_options = b'\n\rmmdeploy.snpeB\013SNPEWrapperP\001\242\002\004SNPE'
+  _MODEL._serialized_start=30
+  _MODEL._serialized_end=175
+  _MODEL_DEVICE._serialized_start=120
+  _MODEL_DEVICE._serialized_end=155
+  _EMPTY._serialized_start=177
+  _EMPTY._serialized_end=184
+  _TENSOR._serialized_start=186
+  _TENSOR._serialized_end=267
+  _TENSORLIST._serialized_start=269
+  _TENSORLIST._serialized_end=314
+  _REPLY._serialized_start=316
+  _REPLY._serialized_end=386
+  _NAMES._serialized_start=388
+  _NAMES._serialized_end=410
+  _INFERENCE._serialized_start=413
+  _INFERENCE._serialized_end=664
+# @@protoc_insertion_point(module_scope)
--- a/csrc/mmdeploy/backend_ops/snpe/client/inference_pb2_grpc.py
+++ b/csrc/mmdeploy/backend_ops/snpe/client/inference_pb2_grpc.py
@ -0,0 +1,205 @@
+# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT!
+"""Client and server classes corresponding to protobuf-defined services."""
+import grpc
+
+import inference_pb2 as inference__pb2
+
+
+class InferenceStub(object):
+    """The inference service definition.
+    """
+
+    def __init__(self, channel):
+        """Constructor.
+
+        Args:
+            channel: A grpc.Channel.
+        """
+        self.Echo = channel.unary_unary(
+                '/mmdeploy.Inference/Echo',
+                request_serializer=inference__pb2.Empty.SerializeToString,
+                response_deserializer=inference__pb2.Reply.FromString,
+                )
+        self.Init = channel.unary_unary(
+                '/mmdeploy.Inference/Init',
+                request_serializer=inference__pb2.Model.SerializeToString,
+                response_deserializer=inference__pb2.Reply.FromString,
+                )
+        self.OutputNames = channel.unary_unary(
+                '/mmdeploy.Inference/OutputNames',
+                request_serializer=inference__pb2.Empty.SerializeToString,
+                response_deserializer=inference__pb2.Names.FromString,
+                )
+        self.Inference = channel.unary_unary(
+                '/mmdeploy.Inference/Inference',
+                request_serializer=inference__pb2.TensorList.SerializeToString,
+                response_deserializer=inference__pb2.Reply.FromString,
+                )
+        self.Destroy = channel.unary_unary(
+                '/mmdeploy.Inference/Destroy',
+                request_serializer=inference__pb2.Empty.SerializeToString,
+                response_deserializer=inference__pb2.Reply.FromString,
+                )
+
+
+class InferenceServicer(object):
+    """The inference service definition.
+    """
+
+    def Echo(self, request, context):
+        """Missing associated documentation comment in .proto file."""
+        context.set_code(grpc.StatusCode.UNIMPLEMENTED)
+        context.set_details('Method not implemented!')
+        raise NotImplementedError('Method not implemented!')
+
+    def Init(self, request, context):
+        """Init Model with model file
+        """
+        context.set_code(grpc.StatusCode.UNIMPLEMENTED)
+        context.set_details('Method not implemented!')
+        raise NotImplementedError('Method not implemented!')
+
+    def OutputNames(self, request, context):
+        """Get output names
+        """
+        context.set_code(grpc.StatusCode.UNIMPLEMENTED)
+        context.set_details('Method not implemented!')
+        raise NotImplementedError('Method not implemented!')
+
+    def Inference(self, request, context):
+        """Inference with inputs 
+        """
+        context.set_code(grpc.StatusCode.UNIMPLEMENTED)
+        context.set_details('Method not implemented!')
+        raise NotImplementedError('Method not implemented!')
+
+    def Destroy(self, request, context):
+        """Destroy handle
+        """
+        context.set_code(grpc.StatusCode.UNIMPLEMENTED)
+        context.set_details('Method not implemented!')
+        raise NotImplementedError('Method not implemented!')
+
+
+def add_InferenceServicer_to_server(servicer, server):
+    rpc_method_handlers = {
+            'Echo': grpc.unary_unary_rpc_method_handler(
+                    servicer.Echo,
+                    request_deserializer=inference__pb2.Empty.FromString,
+                    response_serializer=inference__pb2.Reply.SerializeToString,
+            ),
+            'Init': grpc.unary_unary_rpc_method_handler(
+                    servicer.Init,
+                    request_deserializer=inference__pb2.Model.FromString,
+                    response_serializer=inference__pb2.Reply.SerializeToString,
+            ),
+            'OutputNames': grpc.unary_unary_rpc_method_handler(
+                    servicer.OutputNames,
+                    request_deserializer=inference__pb2.Empty.FromString,
+                    response_serializer=inference__pb2.Names.SerializeToString,
+            ),
+            'Inference': grpc.unary_unary_rpc_method_handler(
+                    servicer.Inference,
+                    request_deserializer=inference__pb2.TensorList.FromString,
+                    response_serializer=inference__pb2.Reply.SerializeToString,
+            ),
+            'Destroy': grpc.unary_unary_rpc_method_handler(
+                    servicer.Destroy,
+                    request_deserializer=inference__pb2.Empty.FromString,
+                    response_serializer=inference__pb2.Reply.SerializeToString,
+            ),
+    }
+    generic_handler = grpc.method_handlers_generic_handler(
+            'mmdeploy.Inference', rpc_method_handlers)
+    server.add_generic_rpc_handlers((generic_handler,))
+
+
+ # This class is part of an EXPERIMENTAL API.
+class Inference(object):
+    """The inference service definition.
+    """
+
+    @staticmethod
+    def Echo(request,
+            target,
+            options=(),
+            channel_credentials=None,
+            call_credentials=None,
+            insecure=False,
+            compression=None,
+            wait_for_ready=None,
+            timeout=None,
+            metadata=None):
+        return grpc.experimental.unary_unary(request, target, '/mmdeploy.Inference/Echo',
+            inference__pb2.Empty.SerializeToString,
+            inference__pb2.Reply.FromString,
+            options, channel_credentials,
+            insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
+
+    @staticmethod
+    def Init(request,
+            target,
+            options=(),
+            channel_credentials=None,
+            call_credentials=None,
+            insecure=False,
+            compression=None,
+            wait_for_ready=None,
+            timeout=None,
+            metadata=None):
+        return grpc.experimental.unary_unary(request, target, '/mmdeploy.Inference/Init',
+            inference__pb2.Model.SerializeToString,
+            inference__pb2.Reply.FromString,
+            options, channel_credentials,
+            insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
+
+    @staticmethod
+    def OutputNames(request,
+            target,
+            options=(),
+            channel_credentials=None,
+            call_credentials=None,
+            insecure=False,
+            compression=None,
+            wait_for_ready=None,
+            timeout=None,
+            metadata=None):
+        return grpc.experimental.unary_unary(request, target, '/mmdeploy.Inference/OutputNames',
+            inference__pb2.Empty.SerializeToString,
+            inference__pb2.Names.FromString,
+            options, channel_credentials,
+            insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
+
+    @staticmethod
+    def Inference(request,
+            target,
+            options=(),
+            channel_credentials=None,
+            call_credentials=None,
+            insecure=False,
+            compression=None,
+            wait_for_ready=None,
+            timeout=None,
+            metadata=None):
+        return grpc.experimental.unary_unary(request, target, '/mmdeploy.Inference/Inference',
+            inference__pb2.TensorList.SerializeToString,
+            inference__pb2.Reply.FromString,
+            options, channel_credentials,
+            insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
+
+    @staticmethod
+    def Destroy(request,
+            target,
+            options=(),
+            channel_credentials=None,
+            call_credentials=None,
+            insecure=False,
+            compression=None,
+            wait_for_ready=None,
+            timeout=None,
+            metadata=None):
+        return grpc.experimental.unary_unary(request, target, '/mmdeploy.Inference/Destroy',
+            inference__pb2.Empty.SerializeToString,
+            inference__pb2.Reply.FromString,
+            options, channel_credentials,
+            insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
--- a/csrc/mmdeploy/backend_ops/snpe/inference.proto
+++ b/csrc/mmdeploy/backend_ops/snpe/inference.proto
@ -0,0 +1,70 @@
+syntax = "proto3";
+
+option java_multiple_files = true;
+option java_package = "mmdeploy.snpe";
+option java_outer_classname = "SNPEWrapper";
+option objc_class_prefix = "SNPE";
+
+package mmdeploy;
+
+// The inference service definition.
+service Inference {
+
+  rpc Echo(Empty) returns (Reply) {}
+
+  // Init Model with model file
+  rpc Init(Model) returns (Reply) {}
+
+  // Get output names
+  rpc OutputNames(Empty) returns (Names) {}
+
+  // Inference with inputs 
+  rpc Inference(TensorList) returns (Reply) {}
+
+  // Destroy handle
+  rpc Destroy(Empty) returns (Reply) {}
+}
+
+message Model {
+  optional string name = 1;
+  // bin
+  bytes weights = 2;
+  // config
+  enum Device {
+    CPU = 0;
+    GPU = 1;
+    DSP = 2;
+  }
+  optional Device device = 3;
+}
+
+// https://stackoverflow.com/questions/31768665/can-i-define-a-grpc-call-with-a-null-request-or-response
+message Empty {}
+
+message Tensor {
+  // name
+  string name = 1;
+  
+  // datatype
+  optional string dtype = 2;
+
+  // data
+  bytes data = 3;
+
+  // shape
+  repeated int32 shape = 4;
+}
+
+message TensorList {
+  repeated Tensor datas = 1;
+}
+
+message Reply {
+  int32 status = 1;
+  string info = 2;
+  repeated Tensor datas =  3;
+}
+
+message Names {
+  repeated string names = 1;
+}
--- a/csrc/mmdeploy/backend_ops/snpe/server/CMakeLists.txt
+++ b/csrc/mmdeploy/backend_ops/snpe/server/CMakeLists.txt
@ -0,0 +1,81 @@
+# Copyright 2018 gRPC authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# cmake build file for C++ helloworld example.
+# Assumes protobuf and gRPC have been installed using cmake.
+# See cmake_externalproject/CMakeLists.txt for all-in-one cmake build
+# that automatically builds all the dependencies before building helloworld.
+
+cmake_minimum_required(VERSION 3.5.1)
+project(SNPEServer C CXX)
+include(./common.cmake)
+
+# Proto file
+get_filename_component(hw_proto "../inference.proto" ABSOLUTE)
+get_filename_component(hw_proto_path "${hw_proto}" PATH)
+
+# Generated sources
+set(hw_proto_srcs "${CMAKE_CURRENT_BINARY_DIR}/inference.pb.cc")
+set(hw_proto_hdrs "${CMAKE_CURRENT_BINARY_DIR}/inference.pb.h")
+set(hw_grpc_srcs "${CMAKE_CURRENT_BINARY_DIR}/inference.grpc.pb.cc")
+set(hw_grpc_hdrs "${CMAKE_CURRENT_BINARY_DIR}/inference.grpc.pb.h")
+
+add_custom_command(
+      OUTPUT "${hw_proto_srcs}" "${hw_proto_hdrs}" "${hw_grpc_srcs}" "${hw_grpc_hdrs}"
+      COMMAND ${_PROTOBUF_PROTOC}
+      ARGS --grpc_out "${CMAKE_CURRENT_BINARY_DIR}"
+        --cpp_out "${CMAKE_CURRENT_BINARY_DIR}"
+        -I "${hw_proto_path}"
+        --plugin=protoc-gen-grpc="${_GRPC_CPP_PLUGIN_EXECUTABLE}"
+        "${hw_proto}"
+      DEPENDS "${hw_proto}")
+
+# Include generated *.pb.h files
+include_directories("${CMAKE_CURRENT_BINARY_DIR}")
+
+# hw_grpc_proto
+add_library(hw_grpc_proto
+  ${hw_grpc_srcs}
+  ${hw_grpc_hdrs}
+  ${hw_proto_srcs}
+  ${hw_proto_hdrs})
+
+target_link_libraries(hw_grpc_proto
+  ${_REFLECTION}
+  ${_GRPC_GRPCPP}
+  ${_PROTOBUF_LIBPROTOBUF})
+
+add_library(snpe SHARED IMPORTED)
+
+if (NOT EXISTS $ENV{SNPE_ROOT}/lib/aarch64-android-clang6.0/)
+  message(FATAL_ERROR "snpe directory not exist: "$ENV{SNPE_ROOT}/lib/aarch64-android-clang6.0/)
+endif()
+
+set_target_properties(snpe PROPERTIES
+  IMPORTED_LOCATION "$ENV{SNPE_ROOT}/lib/aarch64-android-clang6.0/libSNPE.so"
+  INTERFACE_INCLUDE_DIRECTORIES "$ENV{SNPE_ROOT}/include/zdl"
+)
+target_link_directories(
+  snpe
+  INTERFACE
+)
+
+add_executable(inference_server  inference_server.cc service_impl.cpp)
+
+target_link_libraries(inference_server
+  hw_grpc_proto
+  ${_REFLECTION}
+  ${_GRPC_GRPCPP}
+  ${_PROTOBUF_LIBPROTOBUF}
+  snpe)
--- a/csrc/mmdeploy/backend_ops/snpe/server/build_shared.sh
+++ b/csrc/mmdeploy/backend_ops/snpe/server/build_shared.sh
@ -0,0 +1,10 @@
+export ANDROID_NDK=/home/PJLAB/konghuanjun/Downloads/android-ndk-r17c
+cmake .. \
+ -DCMAKE_TOOLCHAIN_FILE=${ANDROID_NDK}/build/cmake/android.toolchain.cmake \
+ -DANDROID_ABI=arm64-v8a \
+ -DANDROID_PLATFORM=android-26 \
+ -DANDROID_STL=c++_shared \
+ -DCMAKE_BUILD_TYPE=Release \
+ -Dabsl_DIR=/tmp/android_grpc_install_shared/lib/cmake/absl \
+ -DProtobuf_DIR=/tmp/android_grpc_install_shared/lib/cmake/protobuf \
+ -DgRPC_DIR=/tmp/android_grpc_install_shared/lib/cmake/grpc
--- a/csrc/mmdeploy/backend_ops/snpe/server/common.cmake
+++ b/csrc/mmdeploy/backend_ops/snpe/server/common.cmake
@ -0,0 +1,123 @@
+# Copyright 2018 gRPC authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# cmake build file for C++ route_guide example.
+# Assumes protobuf and gRPC have been installed using cmake.
+# See cmake_externalproject/CMakeLists.txt for all-in-one cmake build
+# that automatically builds all the dependencies before building route_guide.
+
+cmake_minimum_required(VERSION 3.5.1)
+
+set (CMAKE_CXX_STANDARD 14)
+
+if(MSVC)
+  add_definitions(-D_WIN32_WINNT=0x600)
+endif()
+
+find_package(Threads REQUIRED)
+
+if(GRPC_AS_SUBMODULE)
+  # One way to build a projects that uses gRPC is to just include the
+  # entire gRPC project tree via "add_subdirectory".
+  # This approach is very simple to use, but the are some potential
+  # disadvantages:
+  # * it includes gRPC's CMakeLists.txt directly into your build script
+  #   without and that can make gRPC's internal setting interfere with your
+  #   own build.
+  # * depending on what's installed on your system, the contents of submodules
+  #   in gRPC's third_party/* might need to be available (and there might be
+  #   additional prerequisites required to build them). Consider using
+  #   the gRPC_*_PROVIDER options to fine-tune the expected behavior.
+  #
+  # A more robust approach to add dependency on gRPC is using
+  # cmake's ExternalProject_Add (see cmake_externalproject/CMakeLists.txt).
+
+  # Include the gRPC's cmake build (normally grpc source code would live
+  # in a git submodule called "third_party/grpc", but this example lives in
+  # the same repository as gRPC sources, so we just look a few directories up)
+  add_subdirectory(../../.. ${CMAKE_CURRENT_BINARY_DIR}/grpc EXCLUDE_FROM_ALL)
+  message(STATUS "Using gRPC via add_subdirectory.")
+
+  # After using add_subdirectory, we can now use the grpc targets directly from
+  # this build.
+  set(_PROTOBUF_LIBPROTOBUF libprotobuf)
+  set(_REFLECTION grpc++_reflection)
+  if(CMAKE_CROSSCOMPILING)
+    find_program(_PROTOBUF_PROTOC protoc)
+  else()
+    set(_PROTOBUF_PROTOC $<TARGET_FILE:protobuf::protoc>)
+  endif()
+  set(_GRPC_GRPCPP grpc++)
+  if(CMAKE_CROSSCOMPILING)
+    find_program(_GRPC_CPP_PLUGIN_EXECUTABLE grpc_cpp_plugin)
+  else()
+    set(_GRPC_CPP_PLUGIN_EXECUTABLE $<TARGET_FILE:grpc_cpp_plugin>)
+  endif()
+elseif(GRPC_FETCHCONTENT)
+  # Another way is to use CMake's FetchContent module to clone gRPC at
+  # configure time. This makes gRPC's source code available to your project,
+  # similar to a git submodule.
+  message(STATUS "Using gRPC via add_subdirectory (FetchContent).")
+  include(FetchContent)
+  FetchContent_Declare(
+    grpc
+    GIT_REPOSITORY https://github.com/grpc/grpc.git
+    # when using gRPC, you will actually set this to an existing tag, such as
+    # v1.25.0, v1.26.0 etc..
+    # For the purpose of testing, we override the tag used to the commit
+    # that's currently under test.
+    GIT_TAG        vGRPC_TAG_VERSION_OF_YOUR_CHOICE)
+  FetchContent_MakeAvailable(grpc)
+
+  # Since FetchContent uses add_subdirectory under the hood, we can use
+  # the grpc targets directly from this build.
+  set(_PROTOBUF_LIBPROTOBUF libprotobuf)
+  set(_REFLECTION grpc++_reflection)
+  set(_PROTOBUF_PROTOC $<TARGET_FILE:protoc>)
+  set(_GRPC_GRPCPP grpc++)
+  if(CMAKE_CROSSCOMPILING)
+    find_program(_GRPC_CPP_PLUGIN_EXECUTABLE grpc_cpp_plugin)
+  else()
+    set(_GRPC_CPP_PLUGIN_EXECUTABLE $<TARGET_FILE:grpc_cpp_plugin>)
+  endif()
+else()
+  # This branch assumes that gRPC and all its dependencies are already installed
+  # on this system, so they can be located by find_package().
+
+  # Find Protobuf installation
+  # Looks for protobuf-config.cmake file installed by Protobuf's cmake installation.
+  set(protobuf_MODULE_COMPATIBLE TRUE)
+  find_package(Protobuf CONFIG REQUIRED)
+  message(STATUS "Using protobuf ${Protobuf_VERSION}")
+
+  set(_PROTOBUF_LIBPROTOBUF protobuf::libprotobuf)
+  set(_REFLECTION gRPC::grpc++_reflection)
+  if(CMAKE_CROSSCOMPILING)
+    find_program(_PROTOBUF_PROTOC protoc)
+  else()
+    set(_PROTOBUF_PROTOC $<TARGET_FILE:protobuf::protoc>)
+  endif()
+
+  # Find gRPC installation
+  # Looks for gRPCConfig.cmake file installed by gRPC's cmake installation.
+  find_package(gRPC CONFIG REQUIRED)
+  message(STATUS "Using gRPC ${gRPC_VERSION}")
+
+  set(_GRPC_GRPCPP gRPC::grpc++)
+  if(CMAKE_CROSSCOMPILING)
+    find_program(_GRPC_CPP_PLUGIN_EXECUTABLE grpc_cpp_plugin)
+  else()
+    set(_GRPC_CPP_PLUGIN_EXECUTABLE $<TARGET_FILE:gRPC::grpc_cpp_plugin>)
+  endif()
+endif()
--- a/csrc/mmdeploy/backend_ops/snpe/server/inference_server.cc
+++ b/csrc/mmdeploy/backend_ops/snpe/server/inference_server.cc
@ -0,0 +1,89 @@
+/*
+ *
+ * Copyright 2015 gRPC authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+// Copyright (c) OpenMMLab. All rights reserved.
+
+#include <arpa/inet.h>
+#include <ifaddrs.h>
+#include <netinet/in.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+
+#include <iostream>
+
+#include "service_impl.h"
+
+void PrintIPv4() {
+  struct ifaddrs* ifAddrStruct = NULL;
+  void* tmpAddrPtr = NULL;
+
+  getifaddrs(&ifAddrStruct);
+
+  while (ifAddrStruct != NULL) {
+    if (ifAddrStruct->ifa_addr->sa_family == AF_INET) {
+      tmpAddrPtr = &((struct sockaddr_in*)ifAddrStruct->ifa_addr)->sin_addr;
+      char addressBuffer[INET_ADDRSTRLEN];
+      inet_ntop(AF_INET, tmpAddrPtr, addressBuffer, INET_ADDRSTRLEN);
+      fprintf(stdout, "%s IP Address %s\n", ifAddrStruct->ifa_name,
+              addressBuffer);
+    } else if (ifAddrStruct->ifa_addr->sa_family == AF_INET6) {
+      tmpAddrPtr = &((struct sockaddr_in*)ifAddrStruct->ifa_addr)->sin_addr;
+      char addressBuffer[INET6_ADDRSTRLEN];
+      inet_ntop(AF_INET6, tmpAddrPtr, addressBuffer, INET6_ADDRSTRLEN);
+      fprintf(stdout, "%s IP Address %s\n", ifAddrStruct->ifa_name,
+              addressBuffer);
+    }
+    ifAddrStruct = ifAddrStruct->ifa_next;
+  }
+}
+
+void RunServer() {
+  // listen IPv4 and IPv6
+  std::string server_address("[::]:50051");
+  InferenceServiceImpl service;
+
+  grpc::EnableDefaultHealthCheckService(true);
+  grpc::reflection::InitProtoReflectionServerBuilderPlugin();
+  ServerBuilder builder;
+  // Listen on the given address without any authentication mechanism.
+  builder.AddListeningPort(server_address, grpc::InsecureServerCredentials());
+
+  // Max 128MB
+  builder.SetMaxMessageSize(2 << 29);
+  builder.SetMaxSendMessageSize(2 << 29);
+
+  // Register "service" as the instance through which we'll communicate with
+  // clients. In this case it corresponds to an *synchronous* service.
+
+  builder.RegisterService(&service);
+  // Finally assemble the server.
+  std::unique_ptr<Server> server(builder.BuildAndStart());
+  std::cout << "Server listening on " << server_address << std::endl;
+
+  // Wait for the server to shutdown. Note that some other thread must be
+  // responsible for shutting down the server for this call to ever return.
+  server->Wait();
+}
+
+int main(int argc, char** argv) {
+  PrintIPv4();
+  RunServer();
+
+  return 0;
+}
--- a/csrc/mmdeploy/backend_ops/snpe/server/service_impl.cpp
+++ b/csrc/mmdeploy/backend_ops/snpe/server/service_impl.cpp
@ -0,0 +1,265 @@
+// Copyright (c) OpenMMLab. All rights reserved.
+
+#include "service_impl.h"
+
+#include <getopt.h>
+
+#include <algorithm>
+#include <cstdlib>
+#include <cstring>
+#include <fstream>
+#include <iostream>
+#include <iterator>
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+zdl::DlSystem::Runtime_t InferenceServiceImpl::CheckRuntime(
+    zdl::DlSystem::Runtime_t runtime, bool& staticQuantization) {
+  static zdl::DlSystem::Version_t Version =
+      zdl::SNPE::SNPEFactory::getLibraryVersion();
+
+  fprintf(stdout, "SNPE Version: %s\n", Version.asString().c_str());
+
+  if ((runtime != zdl::DlSystem::Runtime_t::DSP) && staticQuantization) {
+    fprintf(stderr,
+            "ERROR: Cannot use static quantization with CPU/GPU runtimes. "
+            "It is only designed for DSP/AIP runtimes.\n"
+            "ERROR: Proceeding without static quantization on selected "
+            "runtime.\n");
+    staticQuantization = false;
+  }
+
+  if (!zdl::SNPE::SNPEFactory::isRuntimeAvailable(runtime)) {
+    fprintf(stderr, "Selected runtime not present. Falling back to CPU.\n");
+    runtime = zdl::DlSystem::Runtime_t::CPU;
+  }
+
+  return runtime;
+}
+
+std::unique_ptr<zdl::SNPE::SNPE> InferenceServiceImpl::SetBuilderOptions(
+    std::unique_ptr<zdl::DlContainer::IDlContainer>& container,
+    zdl::DlSystem::Runtime_t runtime, zdl::DlSystem::RuntimeList runtimeList,
+    bool useUserSuppliedBuffers, zdl::DlSystem::PlatformConfig platformConfig,
+    bool useCaching) {
+  std::unique_ptr<zdl::SNPE::SNPE> psnpe;
+
+  zdl::SNPE::SNPEBuilder snpeBuilder(container.get());
+
+  if (runtimeList.empty()) {
+    runtimeList.add(runtime);
+  }
+
+  psnpe = snpeBuilder.setOutputLayers({})
+              .setRuntimeProcessorOrder(runtimeList)
+              .setUseUserSuppliedBuffers(useUserSuppliedBuffers)
+              .setPlatformConfig(platformConfig)
+              .setInitCacheMode(useCaching)
+              .build();
+  return psnpe;
+}
+
+std::string InferenceServiceImpl::SaveDLC(const ::mmdeploy::Model* request) {
+  std::string filename = "tmp.dlc";
+  if (request->has_name()) {
+    filename = request->name();
+  }
+  auto model = request->weights();
+  fprintf(stdout, "saving file to %s\n", filename.c_str());
+  std::ofstream fout;
+  fout.open(filename, std::ios::binary | std::ios::out);
+  fout.write(model.data(), model.size());
+  fout.flush();
+  fout.close();
+  return filename;
+}
+
+void InferenceServiceImpl::LoadFloatData(const std::string& data,
+                                         std::vector<float>& vec) {
+  size_t len = data.size();
+  assert(len % sizeof(float) == 0);
+  const char* ptr = data.data();
+  for (int i = 0; i < len; i += sizeof(float)) {
+    vec.push_back(*(float*)(ptr + i));
+  }
+}
+
+::grpc::Status InferenceServiceImpl::Echo(::grpc::ServerContext* context,
+                                          const ::mmdeploy::Empty* request,
+                                          ::mmdeploy::Reply* response) {
+  fprintf(stdout, "Stage Echo: recv command\n");
+  response->set_info("echo");
+  return Status::OK;
+}
+
+// Logic and data behind the server's behavior.
+::grpc::Status InferenceServiceImpl::Init(::grpc::ServerContext* context,
+                                          const ::mmdeploy::Model* request,
+                                          ::mmdeploy::Reply* response) {
+  fprintf(stdout, "Stage Init: recv command\n");
+  // std::string filename = SaveDLC(request);
+  std::string filename = "end2end.dlc";
+
+  if (snpe != nullptr) {
+    snpe.reset();
+  }
+  if (container != nullptr) {
+    container.reset();
+  }
+
+  container =
+      zdl::DlContainer::IDlContainer::open(zdl::DlSystem::String(filename));
+  if (container == nullptr) {
+    fprintf(stdout, "Stage Init: load dlc failed.\n");
+
+    response->set_status(-1);
+    response->set_info(zdl::DlSystem::getLastErrorString());
+    return Status::OK;
+  }
+
+  zdl::DlSystem::Runtime_t runtime = zdl::DlSystem::Runtime_t::CPU;
+  if (request->has_device()) {
+    switch (request->device()) {
+      case mmdeploy::Model_Device_GPU:
+        runtime = zdl::DlSystem::Runtime_t::GPU;
+        break;
+      case mmdeploy::Model_Device_DSP:
+        runtime = zdl::DlSystem::Runtime_t::DSP;
+      default:
+        break;
+    }
+  }
+
+  if (runtime != zdl::DlSystem::Runtime_t::CPU) {
+    bool static_quant = false;
+    runtime = CheckRuntime(runtime, static_quant);
+  }
+
+  zdl::DlSystem::RuntimeList runtimeList;
+  runtimeList.add(runtime);
+  zdl::DlSystem::PlatformConfig platformConfig;
+  snpe = SetBuilderOptions(container, runtime, runtimeList, false,
+                           platformConfig, false);
+
+  if (snpe == nullptr) {
+    response->set_status(-2);
+    response->set_info(zdl::DlSystem::getLastErrorString());
+  }
+
+  response->set_status(0);
+  response->set_info("Stage Init: success");
+  return Status::OK;
+}
+
+::grpc::Status InferenceServiceImpl::OutputNames(
+    ::grpc::ServerContext* context, const ::mmdeploy::Empty* request,
+    ::mmdeploy::Names* response) {
+  const auto& outputTensorNamesRef = snpe->getOutputTensorNames();
+  const auto& outputTensorNames = *outputTensorNamesRef;
+
+  for (int i = 0; i < outputTensorNames.size(); ++i) {
+    response->add_names(outputTensorNames.at(i));
+  }
+
+  return Status::OK;
+}
+
+::grpc::Status InferenceServiceImpl::Inference(
+    ::grpc::ServerContext* context, const ::mmdeploy::TensorList* request,
+    ::mmdeploy::Reply* response) {
+  // Get input names and number
+  fprintf(stdout, "Stage Inference: command\n");
+
+  const auto& inputTensorNamesRef = snpe->getInputTensorNames();
+
+  if (!inputTensorNamesRef) {
+    response->set_status(-1);
+    response->set_info(zdl::DlSystem::getLastErrorString());
+    return Status::OK;
+  }
+
+  const auto& inputTensorNames = *inputTensorNamesRef;
+  if (inputTensorNames.size() != request->datas_size()) {
+    response->set_status(-2);
+    response->set_info("Stage Inference: input names count not match !");
+    return Status::OK;
+  }
+
+  std::vector<std::unique_ptr<zdl::DlSystem::ITensor>> inputTensors(
+      inputTensorNames.size());
+  zdl::DlSystem::TensorMap inputTensorMap;
+  // Load input/output buffers with TensorMap
+  for (int i = 0; i < request->datas_size(); ++i) {
+    auto tensor = request->datas(i);
+    std::vector<float> float_input;
+    LoadFloatData(tensor.data(), float_input);
+
+    const auto& inputShape_opt =
+        snpe->getInputDimensions(tensor.name().c_str());
+    const auto& inputShape = *inputShape_opt;
+
+    fprintf(stdout, "Stage Inference: tensor name: %s  input data len %lu, [",
+            inputTensorNames.at(i), float_input.size());
+    for (int j = 0; j < inputShape.rank(); ++j) {
+      fprintf(stdout, " %ld,", inputShape[j]);
+    }
+    fprintf(stdout, "]\n");
+
+    inputTensors[i] =
+        zdl::SNPE::SNPEFactory::getTensorFactory().createTensor(inputShape);
+    std::copy(float_input.begin(), float_input.end(), inputTensors[i]->begin());
+
+    inputTensorMap.add(tensor.name().c_str(), inputTensors[i].get());
+  }
+
+  // A tensor map for SNPE execution outputs
+  zdl::DlSystem::TensorMap outputTensorMap;
+  // Execute the multiple input tensorMap on the model with SNPE
+  bool success = snpe->execute(inputTensorMap, outputTensorMap);
+  if (!success) {
+    // build output status
+    response->set_status(-3);
+    response->set_info(zdl::DlSystem::getLastErrorString());
+    return Status::OK;
+  }
+
+  // build output tensor list
+  {
+    auto out_names = outputTensorMap.getTensorNames();
+    for (size_t i = 0; i < out_names.size(); ++i) {
+      const char* name = out_names.at(i);
+      zdl::DlSystem::ITensor* pTensor = outputTensorMap.getTensor(name);
+
+      size_t data_length = pTensor->getSize();
+
+      std::string result;
+      result.resize(sizeof(float) * data_length);
+      int j = 0;
+      for (auto it = pTensor->cbegin(); it != pTensor->cend();
+           ++it, j += sizeof(float)) {
+        float f = *it;
+        memcpy(&result[0] + j, reinterpret_cast<char*>(&f), sizeof(float));
+      }
+
+      ::mmdeploy::Tensor* pData = response->add_datas();
+      pData->set_dtype("float32");
+      pData->set_name(name);
+      pData->set_data(result);
+    }
+  }
+
+  // build output status
+  response->set_status(0);
+  response->set_info("Stage Inference: success");
+  return Status::OK;
+}
+
+::grpc::Status InferenceServiceImpl::Destroy(::grpc::ServerContext* context,
+                                             const ::mmdeploy::Empty* request,
+                                             ::mmdeploy::Reply* response) {
+  snpe.reset();
+  container.reset();
+  response->set_status(0);
+  return Status::OK;
+}
--- a/csrc/mmdeploy/backend_ops/snpe/server/service_impl.h
+++ b/csrc/mmdeploy/backend_ops/snpe/server/service_impl.h
@ -0,0 +1,80 @@
+// Copyright (c) OpenMMLab. All rights reserved.
+
+#ifndef SERVICE_IMPL_H
+#define SERVICE_IMPL_H
+
+#include <iostream>
+#include <memory>
+#include <string>
+
+#include "DiagLog/IDiagLog.hpp"
+#include "DlContainer/IDlContainer.hpp"
+#include "DlSystem/DlEnums.hpp"
+#include "DlSystem/DlError.hpp"
+#include "DlSystem/ITensorFactory.hpp"
+#include "DlSystem/IUserBuffer.hpp"
+#include "DlSystem/PlatformConfig.hpp"
+#include "DlSystem/RuntimeList.hpp"
+#include "DlSystem/UserBufferMap.hpp"
+#include "SNPE/SNPE.hpp"
+#include "SNPE/SNPEBuilder.hpp"
+#include "SNPE/SNPEFactory.hpp"
+#include "inference.grpc.pb.h"
+
+#include <grpcpp/ext/proto_server_reflection_plugin.h>
+#include <grpcpp/grpcpp.h>
+#include <grpcpp/health_check_service_interface.h>
+
+using grpc::Server;
+using grpc::ServerBuilder;
+using grpc::ServerContext;
+using grpc::Status;
+
+using mmdeploy::Empty;
+using mmdeploy::Inference;
+using mmdeploy::Model;
+using mmdeploy::Reply;
+using mmdeploy::Tensor;
+using mmdeploy::TensorList;
+
+// Logic and data behind the server's behavior.
+class InferenceServiceImpl final : public Inference::Service {
+  ::grpc::Status Echo(::grpc::ServerContext* context,
+                      const ::mmdeploy::Empty* request,
+                      ::mmdeploy::Reply* response) override;
+
+  // Init Model with model file
+  ::grpc::Status Init(::grpc::ServerContext* context,
+                      const ::mmdeploy::Model* request,
+                      ::mmdeploy::Reply* response) override;
+  // Get output names
+  ::grpc::Status OutputNames(::grpc::ServerContext* context,
+                             const ::mmdeploy::Empty* request,
+                             ::mmdeploy::Names* response) override;
+  // Inference with inputs
+  ::grpc::Status Inference(::grpc::ServerContext* context,
+                           const ::mmdeploy::TensorList* request,
+                           ::mmdeploy::Reply* response) override;
+  // Destory handle
+  ::grpc::Status Destroy(::grpc::ServerContext* context,
+                         const ::mmdeploy::Empty* request,
+                         ::mmdeploy::Reply* response) override;
+
+  std::string SaveDLC(const ::mmdeploy::Model* request);
+
+  void LoadFloatData(const std::string& data, std::vector<float>& vec);
+
+  zdl::DlSystem::Runtime_t CheckRuntime(zdl::DlSystem::Runtime_t runtime,
+                                        bool& staticQuantization);
+
+  std::unique_ptr<zdl::SNPE::SNPE> SetBuilderOptions(
+      std::unique_ptr<zdl::DlContainer::IDlContainer>& container,
+      zdl::DlSystem::Runtime_t runtime, zdl::DlSystem::RuntimeList runtimeList,
+      bool useUserSuppliedBuffers, zdl::DlSystem::PlatformConfig platformConfig,
+      bool useCaching);
+
+  std::unique_ptr<zdl::SNPE::SNPE> snpe;
+  std::unique_ptr<zdl::DlContainer::IDlContainer> container;
+};
+
+#endif
--- a/csrc/mmdeploy/net/snpe/CMakeLists.txt
+++ b/csrc/mmdeploy/net/snpe/CMakeLists.txt
@ -0,0 +1,17 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+
+project(mmdeploy_ncnn_net)
+
+if ("cpu" IN_LIST MMDEPLOY_TARGET_DEVICES)
+
+    find_package(ncnn REQUIRED)
+
+    mmdeploy_add_module(${PROJECT_NAME} snpe_net.cpp)
+    target_link_libraries(${PROJECT_NAME} PRIVATE mmdeploy_ncnn_ops_obj)
+    target_link_libraries(${PROJECT_NAME} PRIVATE ncnn)
+    add_library(mmdeploy::ncnn_net ALIAS ${PROJECT_NAME})
+else ()
+    message(
+            ERROR
+            "'snpe_net' is NOT supported in target devices: ${MMDEPLOY_TARGET_DEVICES}")
+endif ()
--- a/csrc/mmdeploy/net/snpe/snpe_net.cpp
+++ b/csrc/mmdeploy/net/snpe/snpe_net.cpp
@ -0,0 +1,142 @@
+// Copyright (c) OpenMMLab. All rights reserved.
+
+#include "ncnn_net.h"
+
+#include "mmdeploy/core/logger.h"
+#include "mmdeploy/core/model.h"
+#include "mmdeploy/core/utils/formatter.h"
+
+namespace mmdeploy {
+
+SNPENet::~SNPENet() {}
+
+Result<void> SNPENet::Init(const Value& args) {
+  auto& context = args["context"];
+  device_ = context["device"].get<Device>();
+  stream_ = context["stream"].get<Stream>();
+  if (!device_.is_host()) {
+    return Status(eNotSupported);
+  }
+
+  auto name = args["name"].get<std::string>();
+  auto model = context["model"].get<Model>();
+  OUTCOME_TRY(auto config, model.GetModelConfig(name));
+
+  container_ = zdl::DlContainer::IDlContainer::open(zdl::DlSystem::String(config.net));
+  if (container_ == nullptr) {
+      MMDEPLOY_ERROR("Load .dlc failed: {}", config.net);
+  }
+
+  zdl::DlSystem::Runtime_t runtime = zdl::DlSystem::Runtime_t::GPU;
+  if (!zdl::SNPE::SNPEFactory::isRuntimeAvailable(runtime)) {
+    MMDEPLOY_WARN("Selected runtime not present. Falling back to CPU.\n");
+    runtime = zdl::DlSystem::Runtime_t::CPU;
+  }
+
+  zdl::DlSystem::RuntimeList runtimeList;
+  runtimeList.add(runtime);
+  zdl::DlSystem::PlatformConfig platformConfig;
+  snpe_ = SetBuilderOptions(container_, runtime, runtimeList, false,
+                           platformConfig, false);
+
+  // init internal input tensor list
+  const auto& inputTensorNamesRef = snpe->getInputTensorNames();
+  const auto& inputTensorNames = *inputTensorNamesRef;
+  inputs_internal_.resize(inputTensorNames.size());
+
+  for (int i = 0; i < inputTensorNames.size(); ++i) {
+    const auto& inputShape_opt = snpe->getInputDimensions(inputTensorNames.at(i));
+    const auto& inputShape = *inputShape_opt;
+
+    inputs_internal_[i] = zdl::SNPE::SNPEFactory::getTensorFactory().createTensor(inputShape);
+    input_tensor_map_.add(tensor.name().c_str(), inputTensors[i].get());
+  }
+
+  return success();
+}
+
+Result<void> SNPENet::Deinit() {
+  input_tensor_map_.clear();
+  container_.reset();
+  snpe_.reset();
+  return success();
+}
+
+Result<void> SNPENet::Reshape(Span<TensorShape> input_shapes) {
+  for (size_t i = 0; i < input_shapes.size(); ++i) {
+    input_tensors_[i].Reshape(input_shapes[i]);
+  }
+  return success();
+}
+
+Result<Span<Tensor>> SNPENet::GetInputTensors() { return input_tensors_; }
+
+Result<Span<Tensor>> SNPENet::GetOutputTensors() { return output_tensors_; }
+
+Result<void> SNPENet::Forward() {
+  OUTCOME_TRY(stream_.Wait());
+
+  const int LEN = inputs_internal_.size();
+  for (int i = 0; i < LEN; ++i) {
+    float *from = input_tensors_[i].data<float>();
+    std::vector<float> vec = {from, from + inpute_tensors_[i].size()};
+    std::copy(vec.begin(), vec.end(), input_tensors[i]->begin());
+  }
+
+  bool success = snpe->execute(inputTensorMap, outputTensorMap);
+  if (! success) {
+    MMDEPLOY_ERROR("snpe Inference error: {}", std::string(zdl::DlSystem::getLastErrorString()));
+  }
+
+  // extract result
+  auto out_names = outputTensorMap.getTensorNames();
+  for (size_t i = 0; i < out_names.size(); ++i) {
+    const char* name = out_names.at(i);
+    zdl::DlSystem::ITensor* pTensor = outputTensorMap.getTensor(name);
+
+    size_t data_size = sizeof(float) * pTensor->getSize();
+
+    auto& tensor = output_tensors_[i];
+    auto& shape = pTensor->getShape();
+    switch (shape.rank())
+    {
+    case 1:
+      tensor.Reshape({shape[0]}):
+      break;
+    case 2:
+      tensor.Reshape({shape[0], shape[1]}):
+      break;
+    case 3:
+      tensor.Reshape({shape[0], shape[1], shape[2]}):
+      break;
+    case 4:
+      tensor.Reshape({shape[0], shape[1], shape[2], shape[3]}):
+    default:
+      break;
+    }
+
+    float* to = tensor.data<float>();
+    int j = 0;
+    for (auto it = pTensor->cbegin(); it != pTensor->cend(); ++it, ++j) {
+      to[j] = *it;
+    }
+}
+
+class SNPENetCreator : public Creator<Net> {
+ public:
+  const char* GetName() const override { return "snpe"; }
+  int GetVersion() const override { return 0; }
+  std::unique_ptr<Net> Create(const Value& args) override {
+    auto p = std::make_unique<SNPENet>();
+    if (auto r = p->Init(args)) {
+      return p;
+    } else {
+      MMDEPLOY_ERROR("error creating SNPENet: {}", r.error().message().c_str());
+      return nullptr;
+    }
+  }
+};
+
+REGISTER_MODULE(Net, SNPENetCreator);
+
+}  // namespace mmdeploy
--- a/csrc/mmdeploy/net/snpe/snpe_net.h
+++ b/csrc/mmdeploy/net/snpe/snpe_net.h
@ -0,0 +1,38 @@
+// Copyright (c) OpenMMLab. All rights reserved.
+
+#ifndef MMDEPLOY_SRC_NET_NCNN_NCNN_NET_H_
+#define MMDEPLOY_SRC_NET_NCNN_NCNN_NET_H_
+
+#include "mmdeploy/core/net.h"
+#include "SNPE/SNPE.hpp"
+#include "DlContainer/IDlContainer.hpp"
+
+namespace mmdeploy {
+
+class SNPENet : public Net {
+ public:
+  ~SNPENet() override;
+  Result<void> Init(const Value& args) override;
+  Result<void> Deinit() override;
+  Result<Span<Tensor>> GetInputTensors() override;
+  Result<Span<Tensor>> GetOutputTensors() override;
+  Result<void> Reshape(Span<TensorShape> input_shapes) override;
+  Result<void> Forward() override;
+  Result<void> ForwardAsync(Event* event) override { return Status(eNotSupported); };
+
+ private:
+  Device device_;
+  Stream stream_;
+  std::vector<Tensor> input_tensors_;
+  std::vector<Tensor> output_tensors_;
+
+  std::unique_ptr<zdl::SNPE::SNPE> snpe_;
+  std::unique_ptr<zdl::DlContainer::IDlContainer> container_;
+
+  std::vector<std::unique_ptr<zdl::DlSystem::ITensor>> inputs_internal_;
+  zdl::DlSystem::TensorMap input_tensor_map_;
+};
+
+}  // namespace mmdeploy
+
+#endif  // MMDEPLOY_SRC_NET_NCNN_NCNN_NET_H_
--- a/docs/zh_cn/01-how-to-build/appendix/cross-build-ndk-gRPC.md
+++ b/docs/zh_cn/01-how-to-build/appendix/cross-build-ndk-gRPC.md
@ -1,112 +0,0 @@
-# Linux 交叉编译 android gRPC
-
-mmdeploy 已提供 prebuilt snpe inference server，如果你想自己编译、或需要对 .proto 接口做修改，可参考此文档。
-
-注意 gRPC 官方文档并没有对 NDK 的完整支持。
-
-## 环境说明
-
-| 项目 | 版本 | 备注 |
-| ------ | ----- | ------ |
-| snpe | 1.63.0.3523 | - |
-| host OS | ubuntu18.04 | snpe1.63.0 文档指定版本 |
-| NDK | r17c | snpe1.63.0 文档指定版本 |
-| gRPC | commit 6f698b5 | - |
-| 硬件设备 | 红米 K40 | 需要 qcom 芯片 |
-
-## NDK 交叉编译 gRPC
-1. 拉取 gRPC repo,  在 host 上编译出 `protoc` 和 `grpc_cpp_plugin`
-
-```bash
-# 安装依赖
-$ apt-get update && apt-get install -y libssl-dev
-# 编译
-$ git clone https://github.com/grpc/grpc --recursive=1 --depth=1
-$ mkdir -p cmake/build
-$ pushd cmake/build
-
-$ cmake \
-  -DCMAKE_BUILD_TYPE=Release \
-  -DgRPC_INSTALL=ON \
-  -DgRPC_BUILD_TESTS=OFF \
-  -DgRPC_SSL_PROVIDER=package \
-  ../..
-# 需要安装到 host 环境
-$ make -j
-$ sudo make install
-```
-
-2. 下载 NDK，交叉编译 android aarch64 所需静态库
-```bash
-$ wget https://dl.google.com/android/repository/android-ndk-r17c-linux-x86_64.zip
-$ unzip android-ndk-r17c-linux-x86_64.zip
-
-# 设置环境变量
-$ export ANDROID_NDK=/path/to/android-ndk-r17c
-
-# 编译
-$ cd /path/to/grpc
-$ mkdir -p cmake/build_aarch64  && pushd cmake/build_aarch64
-
-$ cmake ../.. \
- -DCMAKE_TOOLCHAIN_FILE=${ANDROID_NDK}/build/cmake/android.toolchain.cmake \
- -DANDROID_ABI=arm64-v8a \
- -DANDROID_PLATFORM=android-26 \
- -DANDROID_STL=c++_static \
- -DRUN_HAVE_STD_REGEX=0 \
- -DRUN_HAVE_POSIX_REGEX=0 \
- -DRUN_HAVE_STEADY_CLOCK=0 \
- -DCMAKE_BUILD_TYPE=Release \
- -DCMAKE_INSTALL_PREFIX=/tmp/android_grpc_install
-$ make -j
-$ make install
-```
-
-3. install 结束后，`/tmp/android_grpc_install` 应有完整的安装文件
-```bash
-$ cd /tmp/android_grpc_install
-$ tree -L 1
-.
-├── bin
-├── include
-├── lib
-└── share
-```
-
-## 测试 gRPC
-1. 编译 gRPC 自带的 helloworld
-```bash
-$ cd /path/to/grpc/examples/cpp/helloworld/
-$ mkdir cmake/build_aarch64 -p && pushd cmake/build_aarch64
-
-$ cmake ../.. \
- -DCMAKE_TOOLCHAIN_FILE=${ANDROID_NDK}/build/cmake/android.toolchain.cmake \
- -DANDROID_ABI=arm64-v8a \
- -DANDROID_PLATFORM=android-26 \
- -DANDROID_STL=c++_static \
- -DRUN_HAVE_STD_REGEX=0 \
- -DRUN_HAVE_POSIX_REGEX=0 \
- -DRUN_HAVE_STEADY_CLOCK=0 \
- -DCMAKE_BUILD_TYPE=Release \
- -Dabsl_DIR=/tmp/android_grpc_install/lib/cmake/absl \
- -DProtobuf_DIR=/tmp/android_grpc_install/lib/cmake/protobuf \
- -DgRPC_DIR=/tmp/android_grpc_install/lib/cmake/grpc
-$ make -j
-$ ls greeter*
-greeter_async_client   greeter_async_server     greeter_callback_server  greeter_server
-greeter_async_client2  greeter_callback_client  greeter_client
-```
-2. 打卡手机的 adb 调试模式，push 编译结果到 `/data/local/tmp` 目录
-```bash
-$ adb push greeter* /data/local/tmp
-```
-3. `adb shell` 进手机，执行 client/server
-```bash
-/data/local/tmp $ ./greeter_client                                        
-Greeter received: Hello world
-```
-
-## 参考文档
-
-* gRPC cross build script https://raw.githubusercontent.com/grpc/grpc/master/test/distrib/cpp/run_distrib_test_cmake_aarch64_cross.sh
-* stackoverflow https://stackoverflow.com/questions/54052229/build-grpc-c-for-android-using-ndk-arm-linux-androideabi-clang-compiler
--- a/docs/zh_cn/01-how-to-build/build_from_source.md
+++ b/docs/zh_cn/01-how-to-build/build_from_source.md
@ -42,3 +42,4 @@ git clone -b master git@github.com:open-mmlab/mmdeploy.git --recursive
 - [Windows](windows.md)
 - [Android-aarch64](android.md)
 - [NVIDIA Jetson](jetsons.md)
+- [snpe](snpe.md)
--- a/docs/zh_cn/01-how-to-build/snpe.md
+++ b/docs/zh_cn/01-how-to-build/snpe.md
@ -1 +1,39 @@
-## 
+# 支持 SNPE
+
+mmdeploy 集成 snpe 的方式简单且有效： Client/Server 模式。
+
+这种模式
+
+1. 能剥离`模型转换`和`推理`环境：
+
+- 推理无关事项在算力更高的设备上完成；
+- 推理计算，能拿到 gpu/npu 真实结果，而非 CPU 模拟器数值。
+
+2. 能覆盖到成本敏感的设备。armv7/risc-v/mips 芯片满足产品需求，但往往对 Python 支持有限；
+
+3. 能简化 mmdeploy 安装步骤。如果只想转 .dlc 模型测试精度，不需要接触 snpe tutorial。
+
+## 一、部署推理服务
+
+下载编译好的 snpe inference server 包， `adb push` 到手机，执行。
+
+```bash
+$ wget https://media.githubusercontent.com/media/tpoisonooo/mmdeploy-onnx2ncnn-testdata/main/snpe-inference-server-1.59.zip
+$ unzip snpe-inference-server-1.59.zip
+$ adb push snpe-inference-server-1.59  /data/local/tmp/
+```
+
+如果需要自己编译，可参照 [NDK 交叉编译 snpe inference sever](../06-appendix/cross-build-ndk-gRPC.md) 。
+
+## 二、安装 mmdeploy
+
+1. 环境要求
+
+| 事项    | 版本               | 备注              |
+| ------- | ------------------ | ----------------- |
+| host OS | ubuntu18.04 x86_64 | snpe 工具指定版本 |
+| Python  | 3.6.0              | snpe 工具指定版本 |
+
+## 三、测试模型
+
+## 四、编译 SDK
--- a/docs/zh_cn/06-appendix/cross-build-ndk-gRPC.md
+++ b/docs/zh_cn/06-appendix/cross-build-ndk-gRPC.md
@ -0,0 +1,158 @@
+# Ubuntu18.04 交叉编译 NDK snpe service
+
+mmdeploy 已提供 prebuilt snpe inference server，如果你想自己编译、或需要对 .proto 接口做修改，可参考此文档。
+
+注意 gRPC 官方文档并没有对 NDK 的完整支持。
+
+## 一、环境说明
+
+| 项目     | 版本           | 备注                                  |
+| -------- | -------------- | ------------------------------------- |
+| snpe     | 1.59           | 1.60 使用 clang-8.0，可能导致兼容问题 |
+| host OS  | ubuntu18.04    | snpe1.59 指定版本                     |
+| NDK      | r17c           | snpe1.59 指定版本                     |
+| gRPC     | commit 6f698b5 | -                                     |
+| 硬件设备 | 红米 K40       | 需要 qcom 芯片                        |
+
+## 二、NDK 交叉编译 gRPC
+
+1. 拉取 gRPC repo,  在 host 上编译出 `protoc` 和 `grpc_cpp_plugin`
+
+```bash
+# 安装依赖
+$ apt-get update && apt-get install -y libssl-dev
+# 编译
+$ git clone https://github.com/grpc/grpc --recursive=1 --depth=1
+$ mkdir -p cmake/build
+$ pushd cmake/build
+
+$ cmake \
+  -DCMAKE_BUILD_TYPE=Release \
+  -DgRPC_INSTALL=ON \
+  -DgRPC_BUILD_TESTS=OFF \
+  -DgRPC_SSL_PROVIDER=package \
+  ../..
+# 需要安装到 host 环境
+$ make -j
+$ sudo make install
+```
+
+2. 下载 NDK，交叉编译 android aarch64 所需静态库
+
+```bash
+$ wget https://dl.google.com/android/repository/android-ndk-r17c-linux-x86_64.zip
+$ unzip android-ndk-r17c-linux-x86_64.zip
+
+# 设置环境变量
+$ export ANDROID_NDK=/path/to/android-ndk-r17c
+
+# 编译
+$ cd /path/to/grpc
+$ mkdir -p cmake/build_aarch64  && pushd cmake/build_aarch64
+
+$ cmake ../.. \
+ -DCMAKE_TOOLCHAIN_FILE=${ANDROID_NDK}/build/cmake/android.toolchain.cmake \
+ -DANDROID_ABI=arm64-v8a \
+ -DANDROID_PLATFORM=android-26 \
+ -DANDROID_TOOLCHAIN=clang \
+ -DANDROID_STL=c++_shared \
+ -DCMAKE_BUILD_TYPE=Release \
+ -DCMAKE_INSTALL_PREFIX=/tmp/android_grpc_install_shared
+
+$ make -j
+$ make install
+```
+
+3. 此时 `/tmp/android_grpc_install` 应有完整的安装文件
+
+```bash
+$ cd /tmp/android_grpc_install
+$ tree -L 1
+.
+├── bin
+├── include
+├── lib
+└── share
+```
+
+## 三、【可跳过】自测 NDK gRPC 是否正常
+
+1. 编译 gRPC 自带的 helloworld
+
+```bash
+$ cd /path/to/grpc/examples/cpp/helloworld/
+$ mkdir cmake/build_aarch64 -p && pushd cmake/build_aarch64
+
+$ cmake ../.. \
+ -DCMAKE_TOOLCHAIN_FILE=${ANDROID_NDK}/build/cmake/android.toolchain.cmake \
+ -DANDROID_ABI=arm64-v8a \
+ -DANDROID_PLATFORM=android-26 \
+ -DANDROID_STL=c++_shared \
+ -DANDROID_TOOLCHAIN=clang \
+ -DCMAKE_BUILD_TYPE=Release \
+ -Dabsl_DIR=/tmp/android_grpc_install_shared/lib/cmake/absl \
+ -DProtobuf_DIR=/tmp/android_grpc_install_shared/lib/cmake/protobuf \
+ -DgRPC_DIR=/tmp/android_grpc_install_shared/lib/cmake/grpc
+
+$ make -j
+$ ls greeter*
+greeter_async_client   greeter_async_server     greeter_callback_server  greeter_server
+greeter_async_client2  greeter_callback_client  greeter_client
+```
+
+2. 打开手机调试模式，push 编译结果到 `/data/local/tmp` 目录
+
+tips：对于国产手机，设置 - 版本号，点击 7 次可进入开发者模式，然后才能打开 USB 调试
+
+```bash
+$ adb push greeter* /data/local/tmp
+```
+
+3. `adb shell` 进手机，执行 client/server
+
+```bash
+/data/local/tmp $ ./greeter_client
+Greeter received: Hello world
+```
+
+## 四、交叉编译 snpe service
+
+1. 打开 [snpe tools 官网](https://developer.qualcomm.com/software/qualcomm-neural-processing-sdk/tools)，下载 1.59 版本。 解压并设置环境变量
+
+**注意 snpe 1.60 开始使用 `clang-8.0`，可能导致旧设备与 `libc++_shared.so` 不兼容。**
+
+```bash
+$ export SNPE_ROOT=/path/to/snpe-1.59.0.3230
+```
+
+2. 打开 mmdeploy  snpe server 目录，使用交叉编译 gRPC 时的选项
+
+```bash
+$ cd /path/to/mmdeploy
+$ cd csrc/mmdeploy/backend_ops/snpe/service
+
+$ mkdir -p build && cd build
+$ export ANDROID_NDK=/path/to/android-ndk-r17c
+$ cmake .. \
+ -DCMAKE_TOOLCHAIN_FILE=${ANDROID_NDK}/build/cmake/android.toolchain.cmake \
+ -DANDROID_ABI=arm64-v8a \
+ -DANDROID_PLATFORM=android-26 \
+ -DANDROID_STL=c++_shared \
+ -DANDROID_TOOLCHAIN=clang \
+ -DCMAKE_BUILD_TYPE=Release \
+ -Dabsl_DIR=/tmp/android_grpc_install_shared/lib/cmake/absl \
+ -DProtobuf_DIR=/tmp/android_grpc_install_shared/lib/cmake/protobuf \
+ -DgRPC_DIR=/tmp/android_grpc_install_shared/lib/cmake/grpc
+
+ $ make -j
+ $ file inference_server
+inference_server: ELF 64-bit LSB shared object, ARM aarch64, version 1 (SYSV), dynamically linked, interpreter /system/bin/linker64, BuildID[sha1]=252aa04e2b982681603dacb74b571be2851176d2, with debug_info, not stripped
+```
+
+最终可得到 `infernece_server`，`adb push` 到设备上即可执行。
+
+## 参考文档
+
+- snpe tutorial https://developer.qualcomm.com/sites/default/files/docs/snpe/cplus_plus_tutorial.html
+- gRPC cross build script https://raw.githubusercontent.com/grpc/grpc/master/test/distrib/cpp/run_distrib_test_cmake_aarch64_cross.sh
+- stackoverflow https://stackoverflow.com/questions/54052229/build-grpc-c-for-android-using-ndk-arm-linux-androideabi-clang-compiler
--- a/mmdeploy/apis/snpe/init.py
+++ b/mmdeploy/apis/snpe/init.py
@ -0,0 +1,15 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from mmdeploy.backend.snpe import from_onnx as _from_onnx
+from mmdeploy.backend.snpe import is_available
+from ..core import PIPELINE_MANAGER
+
+from_onnx = PIPELINE_MANAGER.register_pipeline()(_from_onnx)
+
+__all__ = ['is_available', 'from_onnx']
+
+if is_available():
+    try:
+        from mmdeploy.backend.snpe.onnx2dlc import get_output_model_file
+        __all__ += ['get_output_model_file']
+    except Exception:
+        pass
--- a/mmdeploy/backend/snpe/init.py
+++ b/mmdeploy/backend/snpe/init.py
@ -0,0 +1,27 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import os.path as osp
+
+from .init_plugins import get_onnx2dlc_path
+from .onnx2dlc import from_onnx
+
+def is_available():
+    """Check whether ncnn and snpe-onnx-to-dlc tool are installed.
+
+    Returns:
+        bool: True if snpe-onnx-to-dlc tool are installed.
+    """
+
+    onnx2dlc = get_onnx2dlc_path()
+    return osp.exists(onnx2dlc)
+
+
+__all__ = ['from_onnx']
+
+if is_available():
+    try:
+        from .wrapper import SNPEWrapper
+
+        __all__ += ['SNPEWrapper']
+    except Exception as e:
+        print(e)
+        pass
--- a/mmdeploy/backend/snpe/init_plugins.py
+++ b/mmdeploy/backend/snpe/init_plugins.py
@ -0,0 +1,11 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import shutil
+
+
+def get_onnx2dlc_path() -> str:
+    """Get snpe-onnx-to-dlc path.
+
+    Returns:
+        str: A path of snpe-onnx-to-dlc tool.
+    """
+    return shutil.which('snpe-onnx-to-dlc')
--- a/mmdeploy/backend/snpe/onnx2dlc.py
+++ b/mmdeploy/backend/snpe/onnx2dlc.py
@ -0,0 +1,69 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import os
+import os.path as osp
+import tempfile
+from subprocess import call
+from typing import List, Optional, Union
+
+import onnx
+
+from .init_plugins import get_onnx2dlc_path
+
+
+def mkdir_or_exist(dir_name, mode=0o777):
+    if dir_name == '':
+        return
+    dir_name = osp.expanduser(dir_name)
+    os.makedirs(dir_name, mode=mode, exist_ok=True)
+
+
+def get_output_model_file(onnx_path: str,
+                          work_dir: Optional[str] = None) -> List[str]:
+    """Returns the path to the .dlc file with export result.
+
+    Args:
+        onnx_path (str): The path to the onnx model.
+        work_dir (str|None): The path to the directory for saving the results.
+            Defaults to `None`, which means use the directory of onnx_path.
+
+    Returns:
+        List[str]: The path to the files where the export result will be
+            located.
+    """
+    if work_dir is None:
+        work_dir = osp.dirname(onnx_path)
+    mkdir_or_exist(osp.abspath(work_dir))
+    file_name = osp.splitext(osp.split(onnx_path)[1])[0]
+    save_dlc = osp.join(work_dir, file_name + '.dlc')
+    return save_dlc
+
+
+def from_onnx(onnx_model: Union[onnx.ModelProto, str],
+              output_file_prefix: str):
+    """Convert ONNX to dlc.
+
+    We need to use a executable program to convert the `.onnx` file to a `.dlc`
+
+    Example:
+        >>> from mmdeploy.apis.snpe import from_onnx
+        >>> onnx_path = 'work_dir/end2end.onnx'
+        >>> output_file_prefix = 'work_dir/end2end'
+        >>> from_onnx(onnx_path, output_file_prefix)
+
+    Args:
+        onnx_path (ModelProto|str): The path of the onnx model.
+        output_file_prefix (str): The path to save the output .dlc file.
+    """
+
+    if not isinstance(onnx_model, str):
+        onnx_path = tempfile.NamedTemporaryFile(suffix='.onnx').name
+        onnx.save(onnx_model, onnx_path)
+    else:
+        onnx_path = onnx_model
+
+    save_dlc = output_file_prefix + '.dlc'
+
+    onnx2dlc = get_onnx2dlc_path()
+    ret_code = call(
+        [onnx2dlc, '--input_network', onnx_path, '--output', save_dlc])
+    assert ret_code == 0, 'onnx2dlc failed'
--- a/mmdeploy/backend/snpe/wrapper.py
+++ b/mmdeploy/backend/snpe/wrapper.py
@ -0,0 +1,140 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import os
+import sys
+from typing import Dict, Optional, Sequence
+
+import grpc
+# import mmdeploy.backend.snpe.inference_pb2
+# import mmdeploy.backend.snpe.inference_pb2_grpc
+import inference_pb2
+import inference_pb2_grpc
+import numpy as np
+import torch
+
+from mmdeploy.utils import Backend, get_root_logger
+from mmdeploy.utils.timer import TimeCounter
+from ..base import BACKEND_WRAPPER, BaseWrapper
+
+
+@BACKEND_WRAPPER.register_module(Backend.SNPE.value)
+class SNPEWrapper(BaseWrapper):
+    """ncnn wrapper class for inference.
+
+    Args:
+        dlc_file (str): Path of a weight file.
+        output_names (Sequence[str] | None): Names of model outputs in order.
+            Defaults to `None` and the wrapper will load the output names from
+            snpe model.
+
+    Examples:
+        >>> from mmdeploy.backend.snpe import SNPEWrapper
+        >>> import torch
+        >>>
+        >>> snple_file = 'alexnet.dlc'
+        >>> model = SNPEWrapper(snpe_file)
+        >>> inputs = dict(input=torch.randn(1, 3, 224, 224))
+        >>> outputs = model(inputs)
+        >>> print(outputs)
+    """
+
+    def __init__(self,
+                 dlc_file: str,
+                 output_names: Optional[Sequence[str]] = None,
+                 **kwargs):
+
+        logger = get_root_logger()
+
+        # The maximum model file size is 512MB
+        MAX_SIZE = 2 << 29
+        uri = os.environ['__MMDEPLOY_GRPC_URI']
+        logger.info(f'fetch uri: {uri}')
+        self.channel = grpc.insecure_channel(
+            uri,
+            options=(('grpc.GRPC_ARG_KEEPALIVE_TIME_MS',
+                      2000), ('grpc.max_send_message_length', MAX_SIZE),
+                     ('grpc.keepalive_permit_without_calls', 1)))
+
+        weights = bytes()
+        filesize = os.stat(dlc_file).st_size
+
+        logger.info(f'reading local model file {dlc_file}')
+        # with open(dlc_file, 'rb') as f:
+        #     weights = f.read(filesize)
+
+        stub = inference_pb2_grpc.InferenceStub(self.channel)
+        logger.info(f'init remote SNPE engine with RPC, please wait...')
+        model = inference_pb2.Model(name=dlc_file, weights=weights, device=1)
+        resp = stub.Init(model)
+
+        if resp.status != 0:
+            logger.error(f'init SNPE model failed {resp.info}')
+            return
+
+        output = stub.OutputNames(inference_pb2.Empty())
+        output_names = output.names
+
+        super().__init__(output_names)
+        logger.info(f'init success, outputs {output_names}')
+
+    # def __del__(self):
+    #     stub = inference_pb2_grpc.InferenceStub(self.channel)
+    #     stub.Destroy()
+
+    def forward(self, inputs: Dict[str,
+                                   torch.Tensor]) -> Dict[str, torch.Tensor]:
+        """Run forward inference.
+
+        Args:
+            inputs (Dict[str, torch.Tensor]): Key-value pairs of model inputs.
+
+        Returns:
+            Dict[str, torch.Tensor]: Key-value pairs of model outputs.
+        """
+        input_list = list(inputs.values())
+        device_type = input_list[0].device.type
+
+        logger = get_root_logger()
+
+        # build `list` inputs for remote snpe engine
+        snpe_inputs = []
+        for name, input_tensor in inputs.items():
+            data = input_tensor.contiguous()
+            data = data.detach().cpu().numpy()
+            if data.dtype != np.float32:
+                logger.error('SNPE now only support fp32 input')
+                data = data.astype(dtype=np.float32)
+            tensor = inference_pb2.Tensor(
+                data=data.tobytes(), name=name, dtype='float32')
+
+            snpe_inputs.append(tensor)
+
+        return self.__snpe_execute(
+            inference_pb2.TensorList(datas=snpe_inputs), device_type)
+
+    @TimeCounter.count_time()
+    def __snpe_execute(self, tensorList: inference_pb2.TensorList,
+                       device: str) -> Dict[str, torch.tensor]:
+        """Run inference with snpe remote inference engine.
+
+        Args:
+            tensorList (inference_pb2.TensorList): snpe input tensor.
+
+        Returns:
+            dict[str, torch.tensor]: Inference results of snpe model.
+        """
+        stub = inference_pb2_grpc.InferenceStub(self.channel)
+        resp = stub.Inference(tensorList)
+
+        result = dict()
+        if resp.status == 0:
+            for tensor in resp.datas:
+                ndarray = np.frombuffer(tensor.data, dtype=np.float32)
+                import pdb
+                pdb.set_trace()
+                result[tensor.name] = torch.from_numpy(
+                    ndarray.copy()).to(device)
+        else:
+            logger = get_root_logger()
+            logger.error(f'snpe inference failed {resp.info}')
+
+        return result
--- a/mmdeploy/codebase/base/backend_model.py
+++ b/mmdeploy/codebase/base/backend_model.py
@ -106,6 +106,10 @@ class BaseBackendModel(torch.nn.Module, metaclass=ABCMeta):
                model=backend_files[0],
                input_names=input_names,
                output_names=output_names)
+        elif backend == Backend.SNPE:
+            from mmdeploy.backend.snpe import SNPEWrapper
+            return SNPEWrapper(
+                dlc_file=backend_files[0], output_names=output_names)
        else:
            raise NotImplementedError(f'Unknown backend type: {backend.value}')

--- a/mmdeploy/utils/constants.py
+++ b/mmdeploy/utils/constants.py
@ -55,6 +55,7 @@ class Backend(AdvancedEnum):
    ONNXRUNTIME = 'onnxruntime'
    PPLNN = 'pplnn'
    NCNN = 'ncnn'
+    SNPE = 'snpe'
    OPENVINO = 'openvino'
    SDK = 'sdk'
    TORCHSCRIPT = 'torchscript'
--- a/requirements/optional.txt
+++ b/requirements/optional.txt
@ -7,3 +7,4 @@ mmrazor>=0.3.0
 mmsegmentation
 onnxruntime>=1.8.0
 openvino-dev
+grpcio
--- a/tools/check_env.py
+++ b/tools/check_env.py
@ -41,6 +41,9 @@ def check_backend():
    import mmdeploy.apis.openvino as openvino_apis
    logger.info(f'openvino_is_avaliable: {openvino_apis.is_available()}')

+    import mmdeploy.apis.snpe as snpe_apis
+    logger.info(f'snpe_is_available: {snpe_apis.is_available()}')
+

 def check_codebase():
    codebase_versions = get_codebase_version()
--- a/tools/deploy.py
+++ b/tools/deploy.py
@ -54,6 +54,11 @@ def parse_args():
        help='Image directory for quantize model.')
    parser.add_argument(
        '--quant', action='store_true', help='Quantize model to low bit.')
+    parser.add_argument(
+        '--uri',
+        action='store_true',
+        default='10.1.80.67:50051',
+        help='Remote ipv4:port or ipv6:port for inference on edge device.')
    args = parser.parse_args()
    return args

@ -266,6 +271,28 @@ def main():
            else:
                backend_files += [model_param_path, model_bin_path]

+    elif backend == Backend.SNPE:
+        os.environ['__MMDEPLOY_GRPC_URI'] = args.uri
+        from mmdeploy.apis.snpe import is_available as is_available
+
+        if not is_available():
+            logger.error('snpe support is not available, please check \
+                1) `snpe-onnx-to-dlc` existed in `PATH` 2) snpe only support ubuntu18.04'
+                         )
+            exit(1)
+
+        import mmdeploy.apis.snpe as snpe_api
+        from mmdeploy.apis.snpe import get_output_model_file
+
+        PIPELINE_MANAGER.set_log_level(log_level, [snpe_api.from_onnx])
+
+        backend_files = []
+        for onnx_path in ir_files:
+            dlc_path = get_output_model_file(onnx_path, args.work_dir)
+            onnx_name = osp.splitext(osp.split(onnx_path)[1])[0]
+            snpe_api.from_onnx(onnx_path, osp.join(args.work_dir, onnx_name))
+            backend_files = [dlc_path]
+
    elif backend == Backend.OPENVINO:
        from mmdeploy.apis.openvino import \
            is_available as is_available_openvino
@ -332,17 +359,19 @@ def main():
    # for headless installation.
    if not headless:
        # visualize model of the backend
-        create_process(
-            f'visualize {backend.value} model',
-            target=visualize_model,
-            args=(model_cfg_path, deploy_cfg_path, backend_files,
-                  args.test_img, args.device),
-            kwargs=dict(
-                backend=backend,
-                output_file=osp.join(args.work_dir,
-                                     f'output_{backend.value}.jpg'),
-                show_result=args.show),
-            ret_value=ret_value)
+        visualize_model(model_cfg_path, deploy_cfg_path, backend_files,
+                        args.test_img, args.device)
+        # create_process(
+        #     f'visualize {backend.value} model',
+        #     target=visualize_model,
+        #     args=(model_cfg_path, deploy_cfg_path, backend_files,
+        #           args.test_img, args.device),
+        #     kwargs=dict(
+        #         backend=backend,
+        #         output_file=osp.join(args.work_dir,
+        #                              f'output_{backend.value}.jpg'),
+        #         show_result=args.show),
+        #     ret_value=ret_value)

        # visualize pytorch model
        create_process(
Author	SHA1	Message	Date
tpoisonooo	ccd00c14ae	fix	2022-07-11 13:12:19 +08:00
tpoisonooo	a15bd81dbc	Merge branch 'support-snpe-fix' into support-snpe	2022-07-11 13:11:18 +08:00
tpoisonooo	5666c88b9f	fix(tools/deploy.py): support snpe	2022-07-11 13:09:16 +08:00
tpoisonooo	1ad3534814	improvement(snpe): fix error	2022-07-10 12:36:37 +08:00
tpoisonooo	d1226cb167	feat(csrc/mmdeploy/backend_ops): support snpe	2022-07-08 21:09:47 +08:00
tpoisonooo	2c1f4b99cb	feat(backend): add snpe support	2022-07-07 19:20:36 +08:00
				`@ -0,0 +1 @@`
				`/home/PJLAB/konghuanjun/GitProjects/mmdeploy/resnet18/end2end.dlc`