Compare commits

...

6 Commits

Author SHA1 Message Date
tpoisonooo ccd00c14ae fix 2022-07-11 13:12:19 +08:00
tpoisonooo a15bd81dbc Merge branch 'support-snpe-fix' into support-snpe 2022-07-11 13:11:18 +08:00
tpoisonooo 5666c88b9f fix(tools/deploy.py): support snpe 2022-07-11 13:09:16 +08:00
tpoisonooo 1ad3534814 improvement(snpe): fix error 2022-07-10 12:36:37 +08:00
tpoisonooo d1226cb167 feat(csrc/mmdeploy/backend_ops): support snpe 2022-07-08 21:09:47 +08:00
tpoisonooo 2c1f4b99cb feat(backend): add snpe support 2022-07-07 19:20:36 +08:00
31 changed files with 1808 additions and 124 deletions

View File

@ -0,0 +1 @@
backend_config = dict(type='snpe')

View File

@ -0,0 +1,3 @@
_base_ = ['./classification_static.py', '../_base_/backends/snpe.py']
onnx_config = dict(input_shape=None)

View File

@ -0,0 +1 @@
/home/PJLAB/konghuanjun/GitProjects/mmdeploy/resnet18/end2end.dlc

View File

@ -0,0 +1 @@
/home/PJLAB/konghuanjun/GitProjects/mmdeploy/resnet18/end2end.onnx

View File

@ -0,0 +1,82 @@
# Copyright 2015 gRPC authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""The Python implementation of the GRPC helloworld.Greeter client."""
from __future__ import print_function
import logging
import grpc
import inference_pb2
import inference_pb2_grpc
import os
import cv2
import numpy as np
def build_dummy_tensor():
img = cv2.imread('/home/PJLAB/konghuanjun/Downloads/snpe-1.55.0.2958/models/alexnet/data/chairs.jpg')
m = cv2.resize(img, (600, 400))
data = (m.astype(np.float32) - 127.5) / 127.5
print(data.shape)
tensor = inference_pb2.Tensor(data=data.tobytes(), shape=list(data.shape), name='data_0', dtype='float32')
return tensor
def run():
# NOTE(gRPC Python Team): .close() is possible on a channel and should be
# used in circumstances in which the with statement does not fit the needs
# of the code.
filename = 'end2end.dlc'
filesize = os.stat(filename).st_size
weights = bytes()
# with open(filename, 'rb') as f:
# weights = f.read(filesize)
# if len(weights) >= (2 << 29):
# print('model size too big')
# https://github.com/grpc/grpc/blob/v1.46.x/include/grpc/impl/codegen/grpc_types.h
# https://grpc.io/docs/guides/performance/
with grpc.insecure_channel('10.1.80.67:50051',
options=(
('grpc.GRPC_ARG_KEEPALIVE_TIME_MS', 2000),
('grpc.max_send_message_length', 2<<29),
('grpc.keepalive_permit_without_calls', 1))) as channel:
print("channel type {}".format(type(channel)))
# with grpc.insecure_channel('[0:0:fe80::3455:bf2a]:50051') as channel:
stub = inference_pb2_grpc.InferenceStub(channel)
response = stub.Echo(inference_pb2.Empty())
print("Response echo {}".format(response))
model = inference_pb2.Model(name= filename, weights=weights, device=1)
print("Sending model to init, please wait...")
response = stub.Init(model)
print("Response init {}".format(response))
response = stub.OutputNames(inference_pb2.Empty())
print("Response outputnames {}".format(response))
tensor = build_dummy_tensor()
tensorList = inference_pb2.TensorList(datas = [tensor])
for x in range(1):
response = stub.Inference(tensorList)
if response.status == 0:
prob = np.frombuffer(response.datas[0].data, dtype=np.float32)
print("prob argmax: {} max: {}".format(prob.argmax(), prob.max()))
else:
print(response.info)
if __name__ == '__main__':
logging.basicConfig()
run()

View File

@ -0,0 +1,91 @@
# -*- coding: utf-8 -*-
# Generated by the protocol buffer compiler. DO NOT EDIT!
# source: inference.proto
"""Generated protocol buffer code."""
from google.protobuf import descriptor as _descriptor
from google.protobuf import descriptor_pool as _descriptor_pool
from google.protobuf import message as _message
from google.protobuf import reflection as _reflection
from google.protobuf import symbol_database as _symbol_database
# @@protoc_insertion_point(imports)
_sym_db = _symbol_database.Default()
DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x0finference.proto\x12\x08mmdeploy\"\x91\x01\n\x05Model\x12\x11\n\x04name\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x0f\n\x07weights\x18\x02 \x01(\x0c\x12+\n\x06\x64\x65vice\x18\x03 \x01(\x0e\x32\x16.mmdeploy.Model.DeviceH\x01\x88\x01\x01\"#\n\x06\x44\x65vice\x12\x07\n\x03\x43PU\x10\x00\x12\x07\n\x03GPU\x10\x01\x12\x07\n\x03\x44SP\x10\x02\x42\x07\n\x05_nameB\t\n\x07_device\"\x07\n\x05\x45mpty\"Q\n\x06Tensor\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x12\n\x05\x64type\x18\x02 \x01(\tH\x00\x88\x01\x01\x12\x0c\n\x04\x64\x61ta\x18\x03 \x01(\x0c\x12\r\n\x05shape\x18\x04 \x03(\x05\x42\x08\n\x06_dtype\"-\n\nTensorList\x12\x1f\n\x05\x64\x61tas\x18\x01 \x03(\x0b\x32\x10.mmdeploy.Tensor\"F\n\x05Reply\x12\x0e\n\x06status\x18\x01 \x01(\x05\x12\x0c\n\x04info\x18\x02 \x01(\t\x12\x1f\n\x05\x64\x61tas\x18\x03 \x03(\x0b\x32\x10.mmdeploy.Tensor\"\x16\n\x05Names\x12\r\n\x05names\x18\x01 \x03(\t2\xfb\x01\n\tInference\x12*\n\x04\x45\x63ho\x12\x0f.mmdeploy.Empty\x1a\x0f.mmdeploy.Reply\"\x00\x12*\n\x04Init\x12\x0f.mmdeploy.Model\x1a\x0f.mmdeploy.Reply\"\x00\x12\x31\n\x0bOutputNames\x12\x0f.mmdeploy.Empty\x1a\x0f.mmdeploy.Names\"\x00\x12\x34\n\tInference\x12\x14.mmdeploy.TensorList\x1a\x0f.mmdeploy.Reply\"\x00\x12-\n\x07\x44\x65stroy\x12\x0f.mmdeploy.Empty\x1a\x0f.mmdeploy.Reply\"\x00\x42%\n\rmmdeploy.snpeB\x0bSNPEWrapperP\x01\xa2\x02\x04SNPEb\x06proto3')
_MODEL = DESCRIPTOR.message_types_by_name['Model']
_EMPTY = DESCRIPTOR.message_types_by_name['Empty']
_TENSOR = DESCRIPTOR.message_types_by_name['Tensor']
_TENSORLIST = DESCRIPTOR.message_types_by_name['TensorList']
_REPLY = DESCRIPTOR.message_types_by_name['Reply']
_NAMES = DESCRIPTOR.message_types_by_name['Names']
_MODEL_DEVICE = _MODEL.enum_types_by_name['Device']
Model = _reflection.GeneratedProtocolMessageType('Model', (_message.Message,), {
'DESCRIPTOR' : _MODEL,
'__module__' : 'inference_pb2'
# @@protoc_insertion_point(class_scope:mmdeploy.Model)
})
_sym_db.RegisterMessage(Model)
Empty = _reflection.GeneratedProtocolMessageType('Empty', (_message.Message,), {
'DESCRIPTOR' : _EMPTY,
'__module__' : 'inference_pb2'
# @@protoc_insertion_point(class_scope:mmdeploy.Empty)
})
_sym_db.RegisterMessage(Empty)
Tensor = _reflection.GeneratedProtocolMessageType('Tensor', (_message.Message,), {
'DESCRIPTOR' : _TENSOR,
'__module__' : 'inference_pb2'
# @@protoc_insertion_point(class_scope:mmdeploy.Tensor)
})
_sym_db.RegisterMessage(Tensor)
TensorList = _reflection.GeneratedProtocolMessageType('TensorList', (_message.Message,), {
'DESCRIPTOR' : _TENSORLIST,
'__module__' : 'inference_pb2'
# @@protoc_insertion_point(class_scope:mmdeploy.TensorList)
})
_sym_db.RegisterMessage(TensorList)
Reply = _reflection.GeneratedProtocolMessageType('Reply', (_message.Message,), {
'DESCRIPTOR' : _REPLY,
'__module__' : 'inference_pb2'
# @@protoc_insertion_point(class_scope:mmdeploy.Reply)
})
_sym_db.RegisterMessage(Reply)
Names = _reflection.GeneratedProtocolMessageType('Names', (_message.Message,), {
'DESCRIPTOR' : _NAMES,
'__module__' : 'inference_pb2'
# @@protoc_insertion_point(class_scope:mmdeploy.Names)
})
_sym_db.RegisterMessage(Names)
_INFERENCE = DESCRIPTOR.services_by_name['Inference']
if _descriptor._USE_C_DESCRIPTORS == False:
DESCRIPTOR._options = None
DESCRIPTOR._serialized_options = b'\n\rmmdeploy.snpeB\013SNPEWrapperP\001\242\002\004SNPE'
_MODEL._serialized_start=30
_MODEL._serialized_end=175
_MODEL_DEVICE._serialized_start=120
_MODEL_DEVICE._serialized_end=155
_EMPTY._serialized_start=177
_EMPTY._serialized_end=184
_TENSOR._serialized_start=186
_TENSOR._serialized_end=267
_TENSORLIST._serialized_start=269
_TENSORLIST._serialized_end=314
_REPLY._serialized_start=316
_REPLY._serialized_end=386
_NAMES._serialized_start=388
_NAMES._serialized_end=410
_INFERENCE._serialized_start=413
_INFERENCE._serialized_end=664
# @@protoc_insertion_point(module_scope)

View File

@ -0,0 +1,205 @@
# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT!
"""Client and server classes corresponding to protobuf-defined services."""
import grpc
import inference_pb2 as inference__pb2
class InferenceStub(object):
"""The inference service definition.
"""
def __init__(self, channel):
"""Constructor.
Args:
channel: A grpc.Channel.
"""
self.Echo = channel.unary_unary(
'/mmdeploy.Inference/Echo',
request_serializer=inference__pb2.Empty.SerializeToString,
response_deserializer=inference__pb2.Reply.FromString,
)
self.Init = channel.unary_unary(
'/mmdeploy.Inference/Init',
request_serializer=inference__pb2.Model.SerializeToString,
response_deserializer=inference__pb2.Reply.FromString,
)
self.OutputNames = channel.unary_unary(
'/mmdeploy.Inference/OutputNames',
request_serializer=inference__pb2.Empty.SerializeToString,
response_deserializer=inference__pb2.Names.FromString,
)
self.Inference = channel.unary_unary(
'/mmdeploy.Inference/Inference',
request_serializer=inference__pb2.TensorList.SerializeToString,
response_deserializer=inference__pb2.Reply.FromString,
)
self.Destroy = channel.unary_unary(
'/mmdeploy.Inference/Destroy',
request_serializer=inference__pb2.Empty.SerializeToString,
response_deserializer=inference__pb2.Reply.FromString,
)
class InferenceServicer(object):
"""The inference service definition.
"""
def Echo(self, request, context):
"""Missing associated documentation comment in .proto file."""
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
context.set_details('Method not implemented!')
raise NotImplementedError('Method not implemented!')
def Init(self, request, context):
"""Init Model with model file
"""
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
context.set_details('Method not implemented!')
raise NotImplementedError('Method not implemented!')
def OutputNames(self, request, context):
"""Get output names
"""
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
context.set_details('Method not implemented!')
raise NotImplementedError('Method not implemented!')
def Inference(self, request, context):
"""Inference with inputs
"""
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
context.set_details('Method not implemented!')
raise NotImplementedError('Method not implemented!')
def Destroy(self, request, context):
"""Destroy handle
"""
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
context.set_details('Method not implemented!')
raise NotImplementedError('Method not implemented!')
def add_InferenceServicer_to_server(servicer, server):
rpc_method_handlers = {
'Echo': grpc.unary_unary_rpc_method_handler(
servicer.Echo,
request_deserializer=inference__pb2.Empty.FromString,
response_serializer=inference__pb2.Reply.SerializeToString,
),
'Init': grpc.unary_unary_rpc_method_handler(
servicer.Init,
request_deserializer=inference__pb2.Model.FromString,
response_serializer=inference__pb2.Reply.SerializeToString,
),
'OutputNames': grpc.unary_unary_rpc_method_handler(
servicer.OutputNames,
request_deserializer=inference__pb2.Empty.FromString,
response_serializer=inference__pb2.Names.SerializeToString,
),
'Inference': grpc.unary_unary_rpc_method_handler(
servicer.Inference,
request_deserializer=inference__pb2.TensorList.FromString,
response_serializer=inference__pb2.Reply.SerializeToString,
),
'Destroy': grpc.unary_unary_rpc_method_handler(
servicer.Destroy,
request_deserializer=inference__pb2.Empty.FromString,
response_serializer=inference__pb2.Reply.SerializeToString,
),
}
generic_handler = grpc.method_handlers_generic_handler(
'mmdeploy.Inference', rpc_method_handlers)
server.add_generic_rpc_handlers((generic_handler,))
# This class is part of an EXPERIMENTAL API.
class Inference(object):
"""The inference service definition.
"""
@staticmethod
def Echo(request,
target,
options=(),
channel_credentials=None,
call_credentials=None,
insecure=False,
compression=None,
wait_for_ready=None,
timeout=None,
metadata=None):
return grpc.experimental.unary_unary(request, target, '/mmdeploy.Inference/Echo',
inference__pb2.Empty.SerializeToString,
inference__pb2.Reply.FromString,
options, channel_credentials,
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
@staticmethod
def Init(request,
target,
options=(),
channel_credentials=None,
call_credentials=None,
insecure=False,
compression=None,
wait_for_ready=None,
timeout=None,
metadata=None):
return grpc.experimental.unary_unary(request, target, '/mmdeploy.Inference/Init',
inference__pb2.Model.SerializeToString,
inference__pb2.Reply.FromString,
options, channel_credentials,
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
@staticmethod
def OutputNames(request,
target,
options=(),
channel_credentials=None,
call_credentials=None,
insecure=False,
compression=None,
wait_for_ready=None,
timeout=None,
metadata=None):
return grpc.experimental.unary_unary(request, target, '/mmdeploy.Inference/OutputNames',
inference__pb2.Empty.SerializeToString,
inference__pb2.Names.FromString,
options, channel_credentials,
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
@staticmethod
def Inference(request,
target,
options=(),
channel_credentials=None,
call_credentials=None,
insecure=False,
compression=None,
wait_for_ready=None,
timeout=None,
metadata=None):
return grpc.experimental.unary_unary(request, target, '/mmdeploy.Inference/Inference',
inference__pb2.TensorList.SerializeToString,
inference__pb2.Reply.FromString,
options, channel_credentials,
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
@staticmethod
def Destroy(request,
target,
options=(),
channel_credentials=None,
call_credentials=None,
insecure=False,
compression=None,
wait_for_ready=None,
timeout=None,
metadata=None):
return grpc.experimental.unary_unary(request, target, '/mmdeploy.Inference/Destroy',
inference__pb2.Empty.SerializeToString,
inference__pb2.Reply.FromString,
options, channel_credentials,
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)

View File

@ -0,0 +1,70 @@
syntax = "proto3";
option java_multiple_files = true;
option java_package = "mmdeploy.snpe";
option java_outer_classname = "SNPEWrapper";
option objc_class_prefix = "SNPE";
package mmdeploy;
// The inference service definition.
service Inference {
rpc Echo(Empty) returns (Reply) {}
// Init Model with model file
rpc Init(Model) returns (Reply) {}
// Get output names
rpc OutputNames(Empty) returns (Names) {}
// Inference with inputs
rpc Inference(TensorList) returns (Reply) {}
// Destroy handle
rpc Destroy(Empty) returns (Reply) {}
}
message Model {
optional string name = 1;
// bin
bytes weights = 2;
// config
enum Device {
CPU = 0;
GPU = 1;
DSP = 2;
}
optional Device device = 3;
}
// https://stackoverflow.com/questions/31768665/can-i-define-a-grpc-call-with-a-null-request-or-response
message Empty {}
message Tensor {
// name
string name = 1;
// datatype
optional string dtype = 2;
// data
bytes data = 3;
// shape
repeated int32 shape = 4;
}
message TensorList {
repeated Tensor datas = 1;
}
message Reply {
int32 status = 1;
string info = 2;
repeated Tensor datas = 3;
}
message Names {
repeated string names = 1;
}

View File

@ -0,0 +1,81 @@
# Copyright 2018 gRPC authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# cmake build file for C++ helloworld example.
# Assumes protobuf and gRPC have been installed using cmake.
# See cmake_externalproject/CMakeLists.txt for all-in-one cmake build
# that automatically builds all the dependencies before building helloworld.
cmake_minimum_required(VERSION 3.5.1)
project(SNPEServer C CXX)
include(./common.cmake)
# Proto file
get_filename_component(hw_proto "../inference.proto" ABSOLUTE)
get_filename_component(hw_proto_path "${hw_proto}" PATH)
# Generated sources
set(hw_proto_srcs "${CMAKE_CURRENT_BINARY_DIR}/inference.pb.cc")
set(hw_proto_hdrs "${CMAKE_CURRENT_BINARY_DIR}/inference.pb.h")
set(hw_grpc_srcs "${CMAKE_CURRENT_BINARY_DIR}/inference.grpc.pb.cc")
set(hw_grpc_hdrs "${CMAKE_CURRENT_BINARY_DIR}/inference.grpc.pb.h")
add_custom_command(
OUTPUT "${hw_proto_srcs}" "${hw_proto_hdrs}" "${hw_grpc_srcs}" "${hw_grpc_hdrs}"
COMMAND ${_PROTOBUF_PROTOC}
ARGS --grpc_out "${CMAKE_CURRENT_BINARY_DIR}"
--cpp_out "${CMAKE_CURRENT_BINARY_DIR}"
-I "${hw_proto_path}"
--plugin=protoc-gen-grpc="${_GRPC_CPP_PLUGIN_EXECUTABLE}"
"${hw_proto}"
DEPENDS "${hw_proto}")
# Include generated *.pb.h files
include_directories("${CMAKE_CURRENT_BINARY_DIR}")
# hw_grpc_proto
add_library(hw_grpc_proto
${hw_grpc_srcs}
${hw_grpc_hdrs}
${hw_proto_srcs}
${hw_proto_hdrs})
target_link_libraries(hw_grpc_proto
${_REFLECTION}
${_GRPC_GRPCPP}
${_PROTOBUF_LIBPROTOBUF})
add_library(snpe SHARED IMPORTED)
if (NOT EXISTS $ENV{SNPE_ROOT}/lib/aarch64-android-clang6.0/)
message(FATAL_ERROR "snpe directory not exist: "$ENV{SNPE_ROOT}/lib/aarch64-android-clang6.0/)
endif()
set_target_properties(snpe PROPERTIES
IMPORTED_LOCATION "$ENV{SNPE_ROOT}/lib/aarch64-android-clang6.0/libSNPE.so"
INTERFACE_INCLUDE_DIRECTORIES "$ENV{SNPE_ROOT}/include/zdl"
)
target_link_directories(
snpe
INTERFACE
)
add_executable(inference_server inference_server.cc service_impl.cpp)
target_link_libraries(inference_server
hw_grpc_proto
${_REFLECTION}
${_GRPC_GRPCPP}
${_PROTOBUF_LIBPROTOBUF}
snpe)

View File

@ -0,0 +1,10 @@
export ANDROID_NDK=/home/PJLAB/konghuanjun/Downloads/android-ndk-r17c
cmake .. \
-DCMAKE_TOOLCHAIN_FILE=${ANDROID_NDK}/build/cmake/android.toolchain.cmake \
-DANDROID_ABI=arm64-v8a \
-DANDROID_PLATFORM=android-26 \
-DANDROID_STL=c++_shared \
-DCMAKE_BUILD_TYPE=Release \
-Dabsl_DIR=/tmp/android_grpc_install_shared/lib/cmake/absl \
-DProtobuf_DIR=/tmp/android_grpc_install_shared/lib/cmake/protobuf \
-DgRPC_DIR=/tmp/android_grpc_install_shared/lib/cmake/grpc

View File

@ -0,0 +1,123 @@
# Copyright 2018 gRPC authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# cmake build file for C++ route_guide example.
# Assumes protobuf and gRPC have been installed using cmake.
# See cmake_externalproject/CMakeLists.txt for all-in-one cmake build
# that automatically builds all the dependencies before building route_guide.
cmake_minimum_required(VERSION 3.5.1)
set (CMAKE_CXX_STANDARD 14)
if(MSVC)
add_definitions(-D_WIN32_WINNT=0x600)
endif()
find_package(Threads REQUIRED)
if(GRPC_AS_SUBMODULE)
# One way to build a projects that uses gRPC is to just include the
# entire gRPC project tree via "add_subdirectory".
# This approach is very simple to use, but the are some potential
# disadvantages:
# * it includes gRPC's CMakeLists.txt directly into your build script
# without and that can make gRPC's internal setting interfere with your
# own build.
# * depending on what's installed on your system, the contents of submodules
# in gRPC's third_party/* might need to be available (and there might be
# additional prerequisites required to build them). Consider using
# the gRPC_*_PROVIDER options to fine-tune the expected behavior.
#
# A more robust approach to add dependency on gRPC is using
# cmake's ExternalProject_Add (see cmake_externalproject/CMakeLists.txt).
# Include the gRPC's cmake build (normally grpc source code would live
# in a git submodule called "third_party/grpc", but this example lives in
# the same repository as gRPC sources, so we just look a few directories up)
add_subdirectory(../../.. ${CMAKE_CURRENT_BINARY_DIR}/grpc EXCLUDE_FROM_ALL)
message(STATUS "Using gRPC via add_subdirectory.")
# After using add_subdirectory, we can now use the grpc targets directly from
# this build.
set(_PROTOBUF_LIBPROTOBUF libprotobuf)
set(_REFLECTION grpc++_reflection)
if(CMAKE_CROSSCOMPILING)
find_program(_PROTOBUF_PROTOC protoc)
else()
set(_PROTOBUF_PROTOC $<TARGET_FILE:protobuf::protoc>)
endif()
set(_GRPC_GRPCPP grpc++)
if(CMAKE_CROSSCOMPILING)
find_program(_GRPC_CPP_PLUGIN_EXECUTABLE grpc_cpp_plugin)
else()
set(_GRPC_CPP_PLUGIN_EXECUTABLE $<TARGET_FILE:grpc_cpp_plugin>)
endif()
elseif(GRPC_FETCHCONTENT)
# Another way is to use CMake's FetchContent module to clone gRPC at
# configure time. This makes gRPC's source code available to your project,
# similar to a git submodule.
message(STATUS "Using gRPC via add_subdirectory (FetchContent).")
include(FetchContent)
FetchContent_Declare(
grpc
GIT_REPOSITORY https://github.com/grpc/grpc.git
# when using gRPC, you will actually set this to an existing tag, such as
# v1.25.0, v1.26.0 etc..
# For the purpose of testing, we override the tag used to the commit
# that's currently under test.
GIT_TAG vGRPC_TAG_VERSION_OF_YOUR_CHOICE)
FetchContent_MakeAvailable(grpc)
# Since FetchContent uses add_subdirectory under the hood, we can use
# the grpc targets directly from this build.
set(_PROTOBUF_LIBPROTOBUF libprotobuf)
set(_REFLECTION grpc++_reflection)
set(_PROTOBUF_PROTOC $<TARGET_FILE:protoc>)
set(_GRPC_GRPCPP grpc++)
if(CMAKE_CROSSCOMPILING)
find_program(_GRPC_CPP_PLUGIN_EXECUTABLE grpc_cpp_plugin)
else()
set(_GRPC_CPP_PLUGIN_EXECUTABLE $<TARGET_FILE:grpc_cpp_plugin>)
endif()
else()
# This branch assumes that gRPC and all its dependencies are already installed
# on this system, so they can be located by find_package().
# Find Protobuf installation
# Looks for protobuf-config.cmake file installed by Protobuf's cmake installation.
set(protobuf_MODULE_COMPATIBLE TRUE)
find_package(Protobuf CONFIG REQUIRED)
message(STATUS "Using protobuf ${Protobuf_VERSION}")
set(_PROTOBUF_LIBPROTOBUF protobuf::libprotobuf)
set(_REFLECTION gRPC::grpc++_reflection)
if(CMAKE_CROSSCOMPILING)
find_program(_PROTOBUF_PROTOC protoc)
else()
set(_PROTOBUF_PROTOC $<TARGET_FILE:protobuf::protoc>)
endif()
# Find gRPC installation
# Looks for gRPCConfig.cmake file installed by gRPC's cmake installation.
find_package(gRPC CONFIG REQUIRED)
message(STATUS "Using gRPC ${gRPC_VERSION}")
set(_GRPC_GRPCPP gRPC::grpc++)
if(CMAKE_CROSSCOMPILING)
find_program(_GRPC_CPP_PLUGIN_EXECUTABLE grpc_cpp_plugin)
else()
set(_GRPC_CPP_PLUGIN_EXECUTABLE $<TARGET_FILE:gRPC::grpc_cpp_plugin>)
endif()
endif()

View File

@ -0,0 +1,89 @@
/*
*
* Copyright 2015 gRPC authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
// Copyright (c) OpenMMLab. All rights reserved.
#include <arpa/inet.h>
#include <ifaddrs.h>
#include <netinet/in.h>
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <iostream>
#include "service_impl.h"
void PrintIPv4() {
struct ifaddrs* ifAddrStruct = NULL;
void* tmpAddrPtr = NULL;
getifaddrs(&ifAddrStruct);
while (ifAddrStruct != NULL) {
if (ifAddrStruct->ifa_addr->sa_family == AF_INET) {
tmpAddrPtr = &((struct sockaddr_in*)ifAddrStruct->ifa_addr)->sin_addr;
char addressBuffer[INET_ADDRSTRLEN];
inet_ntop(AF_INET, tmpAddrPtr, addressBuffer, INET_ADDRSTRLEN);
fprintf(stdout, "%s IP Address %s\n", ifAddrStruct->ifa_name,
addressBuffer);
} else if (ifAddrStruct->ifa_addr->sa_family == AF_INET6) {
tmpAddrPtr = &((struct sockaddr_in*)ifAddrStruct->ifa_addr)->sin_addr;
char addressBuffer[INET6_ADDRSTRLEN];
inet_ntop(AF_INET6, tmpAddrPtr, addressBuffer, INET6_ADDRSTRLEN);
fprintf(stdout, "%s IP Address %s\n", ifAddrStruct->ifa_name,
addressBuffer);
}
ifAddrStruct = ifAddrStruct->ifa_next;
}
}
void RunServer() {
// listen IPv4 and IPv6
std::string server_address("[::]:50051");
InferenceServiceImpl service;
grpc::EnableDefaultHealthCheckService(true);
grpc::reflection::InitProtoReflectionServerBuilderPlugin();
ServerBuilder builder;
// Listen on the given address without any authentication mechanism.
builder.AddListeningPort(server_address, grpc::InsecureServerCredentials());
// Max 128MB
builder.SetMaxMessageSize(2 << 29);
builder.SetMaxSendMessageSize(2 << 29);
// Register "service" as the instance through which we'll communicate with
// clients. In this case it corresponds to an *synchronous* service.
builder.RegisterService(&service);
// Finally assemble the server.
std::unique_ptr<Server> server(builder.BuildAndStart());
std::cout << "Server listening on " << server_address << std::endl;
// Wait for the server to shutdown. Note that some other thread must be
// responsible for shutting down the server for this call to ever return.
server->Wait();
}
int main(int argc, char** argv) {
PrintIPv4();
RunServer();
return 0;
}

View File

@ -0,0 +1,265 @@
// Copyright (c) OpenMMLab. All rights reserved.
#include "service_impl.h"
#include <getopt.h>
#include <algorithm>
#include <cstdlib>
#include <cstring>
#include <fstream>
#include <iostream>
#include <iterator>
#include <string>
#include <unordered_map>
#include <vector>
zdl::DlSystem::Runtime_t InferenceServiceImpl::CheckRuntime(
zdl::DlSystem::Runtime_t runtime, bool& staticQuantization) {
static zdl::DlSystem::Version_t Version =
zdl::SNPE::SNPEFactory::getLibraryVersion();
fprintf(stdout, "SNPE Version: %s\n", Version.asString().c_str());
if ((runtime != zdl::DlSystem::Runtime_t::DSP) && staticQuantization) {
fprintf(stderr,
"ERROR: Cannot use static quantization with CPU/GPU runtimes. "
"It is only designed for DSP/AIP runtimes.\n"
"ERROR: Proceeding without static quantization on selected "
"runtime.\n");
staticQuantization = false;
}
if (!zdl::SNPE::SNPEFactory::isRuntimeAvailable(runtime)) {
fprintf(stderr, "Selected runtime not present. Falling back to CPU.\n");
runtime = zdl::DlSystem::Runtime_t::CPU;
}
return runtime;
}
std::unique_ptr<zdl::SNPE::SNPE> InferenceServiceImpl::SetBuilderOptions(
std::unique_ptr<zdl::DlContainer::IDlContainer>& container,
zdl::DlSystem::Runtime_t runtime, zdl::DlSystem::RuntimeList runtimeList,
bool useUserSuppliedBuffers, zdl::DlSystem::PlatformConfig platformConfig,
bool useCaching) {
std::unique_ptr<zdl::SNPE::SNPE> psnpe;
zdl::SNPE::SNPEBuilder snpeBuilder(container.get());
if (runtimeList.empty()) {
runtimeList.add(runtime);
}
psnpe = snpeBuilder.setOutputLayers({})
.setRuntimeProcessorOrder(runtimeList)
.setUseUserSuppliedBuffers(useUserSuppliedBuffers)
.setPlatformConfig(platformConfig)
.setInitCacheMode(useCaching)
.build();
return psnpe;
}
std::string InferenceServiceImpl::SaveDLC(const ::mmdeploy::Model* request) {
std::string filename = "tmp.dlc";
if (request->has_name()) {
filename = request->name();
}
auto model = request->weights();
fprintf(stdout, "saving file to %s\n", filename.c_str());
std::ofstream fout;
fout.open(filename, std::ios::binary | std::ios::out);
fout.write(model.data(), model.size());
fout.flush();
fout.close();
return filename;
}
void InferenceServiceImpl::LoadFloatData(const std::string& data,
std::vector<float>& vec) {
size_t len = data.size();
assert(len % sizeof(float) == 0);
const char* ptr = data.data();
for (int i = 0; i < len; i += sizeof(float)) {
vec.push_back(*(float*)(ptr + i));
}
}
::grpc::Status InferenceServiceImpl::Echo(::grpc::ServerContext* context,
const ::mmdeploy::Empty* request,
::mmdeploy::Reply* response) {
fprintf(stdout, "Stage Echo: recv command\n");
response->set_info("echo");
return Status::OK;
}
// Logic and data behind the server's behavior.
::grpc::Status InferenceServiceImpl::Init(::grpc::ServerContext* context,
const ::mmdeploy::Model* request,
::mmdeploy::Reply* response) {
fprintf(stdout, "Stage Init: recv command\n");
// std::string filename = SaveDLC(request);
std::string filename = "end2end.dlc";
if (snpe != nullptr) {
snpe.reset();
}
if (container != nullptr) {
container.reset();
}
container =
zdl::DlContainer::IDlContainer::open(zdl::DlSystem::String(filename));
if (container == nullptr) {
fprintf(stdout, "Stage Init: load dlc failed.\n");
response->set_status(-1);
response->set_info(zdl::DlSystem::getLastErrorString());
return Status::OK;
}
zdl::DlSystem::Runtime_t runtime = zdl::DlSystem::Runtime_t::CPU;
if (request->has_device()) {
switch (request->device()) {
case mmdeploy::Model_Device_GPU:
runtime = zdl::DlSystem::Runtime_t::GPU;
break;
case mmdeploy::Model_Device_DSP:
runtime = zdl::DlSystem::Runtime_t::DSP;
default:
break;
}
}
if (runtime != zdl::DlSystem::Runtime_t::CPU) {
bool static_quant = false;
runtime = CheckRuntime(runtime, static_quant);
}
zdl::DlSystem::RuntimeList runtimeList;
runtimeList.add(runtime);
zdl::DlSystem::PlatformConfig platformConfig;
snpe = SetBuilderOptions(container, runtime, runtimeList, false,
platformConfig, false);
if (snpe == nullptr) {
response->set_status(-2);
response->set_info(zdl::DlSystem::getLastErrorString());
}
response->set_status(0);
response->set_info("Stage Init: success");
return Status::OK;
}
::grpc::Status InferenceServiceImpl::OutputNames(
::grpc::ServerContext* context, const ::mmdeploy::Empty* request,
::mmdeploy::Names* response) {
const auto& outputTensorNamesRef = snpe->getOutputTensorNames();
const auto& outputTensorNames = *outputTensorNamesRef;
for (int i = 0; i < outputTensorNames.size(); ++i) {
response->add_names(outputTensorNames.at(i));
}
return Status::OK;
}
::grpc::Status InferenceServiceImpl::Inference(
::grpc::ServerContext* context, const ::mmdeploy::TensorList* request,
::mmdeploy::Reply* response) {
// Get input names and number
fprintf(stdout, "Stage Inference: command\n");
const auto& inputTensorNamesRef = snpe->getInputTensorNames();
if (!inputTensorNamesRef) {
response->set_status(-1);
response->set_info(zdl::DlSystem::getLastErrorString());
return Status::OK;
}
const auto& inputTensorNames = *inputTensorNamesRef;
if (inputTensorNames.size() != request->datas_size()) {
response->set_status(-2);
response->set_info("Stage Inference: input names count not match !");
return Status::OK;
}
std::vector<std::unique_ptr<zdl::DlSystem::ITensor>> inputTensors(
inputTensorNames.size());
zdl::DlSystem::TensorMap inputTensorMap;
// Load input/output buffers with TensorMap
for (int i = 0; i < request->datas_size(); ++i) {
auto tensor = request->datas(i);
std::vector<float> float_input;
LoadFloatData(tensor.data(), float_input);
const auto& inputShape_opt =
snpe->getInputDimensions(tensor.name().c_str());
const auto& inputShape = *inputShape_opt;
fprintf(stdout, "Stage Inference: tensor name: %s input data len %lu, [",
inputTensorNames.at(i), float_input.size());
for (int j = 0; j < inputShape.rank(); ++j) {
fprintf(stdout, " %ld,", inputShape[j]);
}
fprintf(stdout, "]\n");
inputTensors[i] =
zdl::SNPE::SNPEFactory::getTensorFactory().createTensor(inputShape);
std::copy(float_input.begin(), float_input.end(), inputTensors[i]->begin());
inputTensorMap.add(tensor.name().c_str(), inputTensors[i].get());
}
// A tensor map for SNPE execution outputs
zdl::DlSystem::TensorMap outputTensorMap;
// Execute the multiple input tensorMap on the model with SNPE
bool success = snpe->execute(inputTensorMap, outputTensorMap);
if (!success) {
// build output status
response->set_status(-3);
response->set_info(zdl::DlSystem::getLastErrorString());
return Status::OK;
}
// build output tensor list
{
auto out_names = outputTensorMap.getTensorNames();
for (size_t i = 0; i < out_names.size(); ++i) {
const char* name = out_names.at(i);
zdl::DlSystem::ITensor* pTensor = outputTensorMap.getTensor(name);
size_t data_length = pTensor->getSize();
std::string result;
result.resize(sizeof(float) * data_length);
int j = 0;
for (auto it = pTensor->cbegin(); it != pTensor->cend();
++it, j += sizeof(float)) {
float f = *it;
memcpy(&result[0] + j, reinterpret_cast<char*>(&f), sizeof(float));
}
::mmdeploy::Tensor* pData = response->add_datas();
pData->set_dtype("float32");
pData->set_name(name);
pData->set_data(result);
}
}
// build output status
response->set_status(0);
response->set_info("Stage Inference: success");
return Status::OK;
}
::grpc::Status InferenceServiceImpl::Destroy(::grpc::ServerContext* context,
const ::mmdeploy::Empty* request,
::mmdeploy::Reply* response) {
snpe.reset();
container.reset();
response->set_status(0);
return Status::OK;
}

View File

@ -0,0 +1,80 @@
// Copyright (c) OpenMMLab. All rights reserved.
#ifndef SERVICE_IMPL_H
#define SERVICE_IMPL_H
#include <iostream>
#include <memory>
#include <string>
#include "DiagLog/IDiagLog.hpp"
#include "DlContainer/IDlContainer.hpp"
#include "DlSystem/DlEnums.hpp"
#include "DlSystem/DlError.hpp"
#include "DlSystem/ITensorFactory.hpp"
#include "DlSystem/IUserBuffer.hpp"
#include "DlSystem/PlatformConfig.hpp"
#include "DlSystem/RuntimeList.hpp"
#include "DlSystem/UserBufferMap.hpp"
#include "SNPE/SNPE.hpp"
#include "SNPE/SNPEBuilder.hpp"
#include "SNPE/SNPEFactory.hpp"
#include "inference.grpc.pb.h"
#include <grpcpp/ext/proto_server_reflection_plugin.h>
#include <grpcpp/grpcpp.h>
#include <grpcpp/health_check_service_interface.h>
using grpc::Server;
using grpc::ServerBuilder;
using grpc::ServerContext;
using grpc::Status;
using mmdeploy::Empty;
using mmdeploy::Inference;
using mmdeploy::Model;
using mmdeploy::Reply;
using mmdeploy::Tensor;
using mmdeploy::TensorList;
// Logic and data behind the server's behavior.
class InferenceServiceImpl final : public Inference::Service {
::grpc::Status Echo(::grpc::ServerContext* context,
const ::mmdeploy::Empty* request,
::mmdeploy::Reply* response) override;
// Init Model with model file
::grpc::Status Init(::grpc::ServerContext* context,
const ::mmdeploy::Model* request,
::mmdeploy::Reply* response) override;
// Get output names
::grpc::Status OutputNames(::grpc::ServerContext* context,
const ::mmdeploy::Empty* request,
::mmdeploy::Names* response) override;
// Inference with inputs
::grpc::Status Inference(::grpc::ServerContext* context,
const ::mmdeploy::TensorList* request,
::mmdeploy::Reply* response) override;
// Destory handle
::grpc::Status Destroy(::grpc::ServerContext* context,
const ::mmdeploy::Empty* request,
::mmdeploy::Reply* response) override;
std::string SaveDLC(const ::mmdeploy::Model* request);
void LoadFloatData(const std::string& data, std::vector<float>& vec);
zdl::DlSystem::Runtime_t CheckRuntime(zdl::DlSystem::Runtime_t runtime,
bool& staticQuantization);
std::unique_ptr<zdl::SNPE::SNPE> SetBuilderOptions(
std::unique_ptr<zdl::DlContainer::IDlContainer>& container,
zdl::DlSystem::Runtime_t runtime, zdl::DlSystem::RuntimeList runtimeList,
bool useUserSuppliedBuffers, zdl::DlSystem::PlatformConfig platformConfig,
bool useCaching);
std::unique_ptr<zdl::SNPE::SNPE> snpe;
std::unique_ptr<zdl::DlContainer::IDlContainer> container;
};
#endif

View File

@ -0,0 +1,17 @@
# Copyright (c) OpenMMLab. All rights reserved.
project(mmdeploy_ncnn_net)
if ("cpu" IN_LIST MMDEPLOY_TARGET_DEVICES)
find_package(ncnn REQUIRED)
mmdeploy_add_module(${PROJECT_NAME} snpe_net.cpp)
target_link_libraries(${PROJECT_NAME} PRIVATE mmdeploy_ncnn_ops_obj)
target_link_libraries(${PROJECT_NAME} PRIVATE ncnn)
add_library(mmdeploy::ncnn_net ALIAS ${PROJECT_NAME})
else ()
message(
ERROR
"'snpe_net' is NOT supported in target devices: ${MMDEPLOY_TARGET_DEVICES}")
endif ()

View File

@ -0,0 +1,142 @@
// Copyright (c) OpenMMLab. All rights reserved.
#include "ncnn_net.h"
#include "mmdeploy/core/logger.h"
#include "mmdeploy/core/model.h"
#include "mmdeploy/core/utils/formatter.h"
namespace mmdeploy {
SNPENet::~SNPENet() {}
Result<void> SNPENet::Init(const Value& args) {
auto& context = args["context"];
device_ = context["device"].get<Device>();
stream_ = context["stream"].get<Stream>();
if (!device_.is_host()) {
return Status(eNotSupported);
}
auto name = args["name"].get<std::string>();
auto model = context["model"].get<Model>();
OUTCOME_TRY(auto config, model.GetModelConfig(name));
container_ = zdl::DlContainer::IDlContainer::open(zdl::DlSystem::String(config.net));
if (container_ == nullptr) {
MMDEPLOY_ERROR("Load .dlc failed: {}", config.net);
}
zdl::DlSystem::Runtime_t runtime = zdl::DlSystem::Runtime_t::GPU;
if (!zdl::SNPE::SNPEFactory::isRuntimeAvailable(runtime)) {
MMDEPLOY_WARN("Selected runtime not present. Falling back to CPU.\n");
runtime = zdl::DlSystem::Runtime_t::CPU;
}
zdl::DlSystem::RuntimeList runtimeList;
runtimeList.add(runtime);
zdl::DlSystem::PlatformConfig platformConfig;
snpe_ = SetBuilderOptions(container_, runtime, runtimeList, false,
platformConfig, false);
// init internal input tensor list
const auto& inputTensorNamesRef = snpe->getInputTensorNames();
const auto& inputTensorNames = *inputTensorNamesRef;
inputs_internal_.resize(inputTensorNames.size());
for (int i = 0; i < inputTensorNames.size(); ++i) {
const auto& inputShape_opt = snpe->getInputDimensions(inputTensorNames.at(i));
const auto& inputShape = *inputShape_opt;
inputs_internal_[i] = zdl::SNPE::SNPEFactory::getTensorFactory().createTensor(inputShape);
input_tensor_map_.add(tensor.name().c_str(), inputTensors[i].get());
}
return success();
}
Result<void> SNPENet::Deinit() {
input_tensor_map_.clear();
container_.reset();
snpe_.reset();
return success();
}
Result<void> SNPENet::Reshape(Span<TensorShape> input_shapes) {
for (size_t i = 0; i < input_shapes.size(); ++i) {
input_tensors_[i].Reshape(input_shapes[i]);
}
return success();
}
Result<Span<Tensor>> SNPENet::GetInputTensors() { return input_tensors_; }
Result<Span<Tensor>> SNPENet::GetOutputTensors() { return output_tensors_; }
Result<void> SNPENet::Forward() {
OUTCOME_TRY(stream_.Wait());
const int LEN = inputs_internal_.size();
for (int i = 0; i < LEN; ++i) {
float *from = input_tensors_[i].data<float>();
std::vector<float> vec = {from, from + inpute_tensors_[i].size()};
std::copy(vec.begin(), vec.end(), input_tensors[i]->begin());
}
bool success = snpe->execute(inputTensorMap, outputTensorMap);
if (! success) {
MMDEPLOY_ERROR("snpe Inference error: {}", std::string(zdl::DlSystem::getLastErrorString()));
}
// extract result
auto out_names = outputTensorMap.getTensorNames();
for (size_t i = 0; i < out_names.size(); ++i) {
const char* name = out_names.at(i);
zdl::DlSystem::ITensor* pTensor = outputTensorMap.getTensor(name);
size_t data_size = sizeof(float) * pTensor->getSize();
auto& tensor = output_tensors_[i];
auto& shape = pTensor->getShape();
switch (shape.rank())
{
case 1:
tensor.Reshape({shape[0]}):
break;
case 2:
tensor.Reshape({shape[0], shape[1]}):
break;
case 3:
tensor.Reshape({shape[0], shape[1], shape[2]}):
break;
case 4:
tensor.Reshape({shape[0], shape[1], shape[2], shape[3]}):
default:
break;
}
float* to = tensor.data<float>();
int j = 0;
for (auto it = pTensor->cbegin(); it != pTensor->cend(); ++it, ++j) {
to[j] = *it;
}
}
class SNPENetCreator : public Creator<Net> {
public:
const char* GetName() const override { return "snpe"; }
int GetVersion() const override { return 0; }
std::unique_ptr<Net> Create(const Value& args) override {
auto p = std::make_unique<SNPENet>();
if (auto r = p->Init(args)) {
return p;
} else {
MMDEPLOY_ERROR("error creating SNPENet: {}", r.error().message().c_str());
return nullptr;
}
}
};
REGISTER_MODULE(Net, SNPENetCreator);
} // namespace mmdeploy

View File

@ -0,0 +1,38 @@
// Copyright (c) OpenMMLab. All rights reserved.
#ifndef MMDEPLOY_SRC_NET_NCNN_NCNN_NET_H_
#define MMDEPLOY_SRC_NET_NCNN_NCNN_NET_H_
#include "mmdeploy/core/net.h"
#include "SNPE/SNPE.hpp"
#include "DlContainer/IDlContainer.hpp"
namespace mmdeploy {
class SNPENet : public Net {
public:
~SNPENet() override;
Result<void> Init(const Value& args) override;
Result<void> Deinit() override;
Result<Span<Tensor>> GetInputTensors() override;
Result<Span<Tensor>> GetOutputTensors() override;
Result<void> Reshape(Span<TensorShape> input_shapes) override;
Result<void> Forward() override;
Result<void> ForwardAsync(Event* event) override { return Status(eNotSupported); };
private:
Device device_;
Stream stream_;
std::vector<Tensor> input_tensors_;
std::vector<Tensor> output_tensors_;
std::unique_ptr<zdl::SNPE::SNPE> snpe_;
std::unique_ptr<zdl::DlContainer::IDlContainer> container_;
std::vector<std::unique_ptr<zdl::DlSystem::ITensor>> inputs_internal_;
zdl::DlSystem::TensorMap input_tensor_map_;
};
} // namespace mmdeploy
#endif // MMDEPLOY_SRC_NET_NCNN_NCNN_NET_H_

View File

@ -1,112 +0,0 @@
# Linux 交叉编译 android gRPC
mmdeploy 已提供 prebuilt snpe inference server如果你想自己编译、或需要对 .proto 接口做修改,可参考此文档。
注意 gRPC 官方文档并没有对 NDK 的完整支持。
## 环境说明
| 项目 | 版本 | 备注 |
| ------ | ----- | ------ |
| snpe | 1.63.0.3523 | - |
| host OS | ubuntu18.04 | snpe1.63.0 文档指定版本 |
| NDK | r17c | snpe1.63.0 文档指定版本 |
| gRPC | commit 6f698b5 | - |
| 硬件设备 | 红米 K40 | 需要 qcom 芯片 |
## NDK 交叉编译 gRPC
1. 拉取 gRPC repo, 在 host 上编译出 `protoc``grpc_cpp_plugin`
```bash
# 安装依赖
$ apt-get update && apt-get install -y libssl-dev
# 编译
$ git clone https://github.com/grpc/grpc --recursive=1 --depth=1
$ mkdir -p cmake/build
$ pushd cmake/build
$ cmake \
-DCMAKE_BUILD_TYPE=Release \
-DgRPC_INSTALL=ON \
-DgRPC_BUILD_TESTS=OFF \
-DgRPC_SSL_PROVIDER=package \
../..
# 需要安装到 host 环境
$ make -j
$ sudo make install
```
2. 下载 NDK交叉编译 android aarch64 所需静态库
```bash
$ wget https://dl.google.com/android/repository/android-ndk-r17c-linux-x86_64.zip
$ unzip android-ndk-r17c-linux-x86_64.zip
# 设置环境变量
$ export ANDROID_NDK=/path/to/android-ndk-r17c
# 编译
$ cd /path/to/grpc
$ mkdir -p cmake/build_aarch64 && pushd cmake/build_aarch64
$ cmake ../.. \
-DCMAKE_TOOLCHAIN_FILE=${ANDROID_NDK}/build/cmake/android.toolchain.cmake \
-DANDROID_ABI=arm64-v8a \
-DANDROID_PLATFORM=android-26 \
-DANDROID_STL=c++_static \
-DRUN_HAVE_STD_REGEX=0 \
-DRUN_HAVE_POSIX_REGEX=0 \
-DRUN_HAVE_STEADY_CLOCK=0 \
-DCMAKE_BUILD_TYPE=Release \
-DCMAKE_INSTALL_PREFIX=/tmp/android_grpc_install
$ make -j
$ make install
```
3. install 结束后,`/tmp/android_grpc_install` 应有完整的安装文件
```bash
$ cd /tmp/android_grpc_install
$ tree -L 1
.
├── bin
├── include
├── lib
└── share
```
## 测试 gRPC
1. 编译 gRPC 自带的 helloworld
```bash
$ cd /path/to/grpc/examples/cpp/helloworld/
$ mkdir cmake/build_aarch64 -p && pushd cmake/build_aarch64
$ cmake ../.. \
-DCMAKE_TOOLCHAIN_FILE=${ANDROID_NDK}/build/cmake/android.toolchain.cmake \
-DANDROID_ABI=arm64-v8a \
-DANDROID_PLATFORM=android-26 \
-DANDROID_STL=c++_static \
-DRUN_HAVE_STD_REGEX=0 \
-DRUN_HAVE_POSIX_REGEX=0 \
-DRUN_HAVE_STEADY_CLOCK=0 \
-DCMAKE_BUILD_TYPE=Release \
-Dabsl_DIR=/tmp/android_grpc_install/lib/cmake/absl \
-DProtobuf_DIR=/tmp/android_grpc_install/lib/cmake/protobuf \
-DgRPC_DIR=/tmp/android_grpc_install/lib/cmake/grpc
$ make -j
$ ls greeter*
greeter_async_client greeter_async_server greeter_callback_server greeter_server
greeter_async_client2 greeter_callback_client greeter_client
```
2. 打卡手机的 adb 调试模式push 编译结果到 `/data/local/tmp` 目录
```bash
$ adb push greeter* /data/local/tmp
```
3. `adb shell` 进手机,执行 client/server
```bash
/data/local/tmp $ ./greeter_client
Greeter received: Hello world
```
## 参考文档
* gRPC cross build script https://raw.githubusercontent.com/grpc/grpc/master/test/distrib/cpp/run_distrib_test_cmake_aarch64_cross.sh
* stackoverflow https://stackoverflow.com/questions/54052229/build-grpc-c-for-android-using-ndk-arm-linux-androideabi-clang-compiler

View File

@ -42,3 +42,4 @@ git clone -b master git@github.com:open-mmlab/mmdeploy.git --recursive
- [Windows](windows.md)
- [Android-aarch64](android.md)
- [NVIDIA Jetson](jetsons.md)
- [snpe](snpe.md)

View File

@ -1 +1,39 @@
##
# 支持 SNPE
mmdeploy 集成 snpe 的方式简单且有效: Client/Server 模式。
这种模式
1. 能剥离`模型转换`和`推理`环境:
- 推理无关事项在算力更高的设备上完成;
- 推理计算,能拿到 gpu/npu 真实结果,而非 CPU 模拟器数值。
2. 能覆盖到成本敏感的设备。armv7/risc-v/mips 芯片满足产品需求,但往往对 Python 支持有限;
3. 能简化 mmdeploy 安装步骤。如果只想转 .dlc 模型测试精度,不需要接触 snpe tutorial。
## 一、部署推理服务
下载编译好的 snpe inference server 包, `adb push` 到手机,执行。
```bash
$ wget https://media.githubusercontent.com/media/tpoisonooo/mmdeploy-onnx2ncnn-testdata/main/snpe-inference-server-1.59.zip
$ unzip snpe-inference-server-1.59.zip
$ adb push snpe-inference-server-1.59 /data/local/tmp/
```
如果需要自己编译,可参照 [NDK 交叉编译 snpe inference sever](../06-appendix/cross-build-ndk-gRPC.md) 。
## 二、安装 mmdeploy
1. 环境要求
| 事项 | 版本 | 备注 |
| ------- | ------------------ | ----------------- |
| host OS | ubuntu18.04 x86_64 | snpe 工具指定版本 |
| Python | 3.6.0 | snpe 工具指定版本 |
## 三、测试模型
## 四、编译 SDK

View File

@ -0,0 +1,158 @@
# Ubuntu18.04 交叉编译 NDK snpe service
mmdeploy 已提供 prebuilt snpe inference server如果你想自己编译、或需要对 .proto 接口做修改,可参考此文档。
注意 gRPC 官方文档并没有对 NDK 的完整支持。
## 一、环境说明
| 项目 | 版本 | 备注 |
| -------- | -------------- | ------------------------------------- |
| snpe | 1.59 | 1.60 使用 clang-8.0,可能导致兼容问题 |
| host OS | ubuntu18.04 | snpe1.59 指定版本 |
| NDK | r17c | snpe1.59 指定版本 |
| gRPC | commit 6f698b5 | - |
| 硬件设备 | 红米 K40 | 需要 qcom 芯片 |
## 二、NDK 交叉编译 gRPC
1. 拉取 gRPC repo, 在 host 上编译出 `protoc``grpc_cpp_plugin`
```bash
# 安装依赖
$ apt-get update && apt-get install -y libssl-dev
# 编译
$ git clone https://github.com/grpc/grpc --recursive=1 --depth=1
$ mkdir -p cmake/build
$ pushd cmake/build
$ cmake \
-DCMAKE_BUILD_TYPE=Release \
-DgRPC_INSTALL=ON \
-DgRPC_BUILD_TESTS=OFF \
-DgRPC_SSL_PROVIDER=package \
../..
# 需要安装到 host 环境
$ make -j
$ sudo make install
```
2. 下载 NDK交叉编译 android aarch64 所需静态库
```bash
$ wget https://dl.google.com/android/repository/android-ndk-r17c-linux-x86_64.zip
$ unzip android-ndk-r17c-linux-x86_64.zip
# 设置环境变量
$ export ANDROID_NDK=/path/to/android-ndk-r17c
# 编译
$ cd /path/to/grpc
$ mkdir -p cmake/build_aarch64 && pushd cmake/build_aarch64
$ cmake ../.. \
-DCMAKE_TOOLCHAIN_FILE=${ANDROID_NDK}/build/cmake/android.toolchain.cmake \
-DANDROID_ABI=arm64-v8a \
-DANDROID_PLATFORM=android-26 \
-DANDROID_TOOLCHAIN=clang \
-DANDROID_STL=c++_shared \
-DCMAKE_BUILD_TYPE=Release \
-DCMAKE_INSTALL_PREFIX=/tmp/android_grpc_install_shared
$ make -j
$ make install
```
3. 此时 `/tmp/android_grpc_install` 应有完整的安装文件
```bash
$ cd /tmp/android_grpc_install
$ tree -L 1
.
├── bin
├── include
├── lib
└── share
```
## 三、【可跳过】自测 NDK gRPC 是否正常
1. 编译 gRPC 自带的 helloworld
```bash
$ cd /path/to/grpc/examples/cpp/helloworld/
$ mkdir cmake/build_aarch64 -p && pushd cmake/build_aarch64
$ cmake ../.. \
-DCMAKE_TOOLCHAIN_FILE=${ANDROID_NDK}/build/cmake/android.toolchain.cmake \
-DANDROID_ABI=arm64-v8a \
-DANDROID_PLATFORM=android-26 \
-DANDROID_STL=c++_shared \
-DANDROID_TOOLCHAIN=clang \
-DCMAKE_BUILD_TYPE=Release \
-Dabsl_DIR=/tmp/android_grpc_install_shared/lib/cmake/absl \
-DProtobuf_DIR=/tmp/android_grpc_install_shared/lib/cmake/protobuf \
-DgRPC_DIR=/tmp/android_grpc_install_shared/lib/cmake/grpc
$ make -j
$ ls greeter*
greeter_async_client greeter_async_server greeter_callback_server greeter_server
greeter_async_client2 greeter_callback_client greeter_client
```
2. 打开手机调试模式push 编译结果到 `/data/local/tmp` 目录
tips对于国产手机设置 - 版本号,点击 7 次可进入开发者模式,然后才能打开 USB 调试
```bash
$ adb push greeter* /data/local/tmp
```
3. `adb shell` 进手机,执行 client/server
```bash
/data/local/tmp $ ./greeter_client
Greeter received: Hello world
```
## 四、交叉编译 snpe service
1. 打开 [snpe tools 官网](https://developer.qualcomm.com/software/qualcomm-neural-processing-sdk/tools),下载 1.59 版本。 解压并设置环境变量
**注意 snpe 1.60 开始使用 `clang-8.0`,可能导致旧设备与 `libc++_shared.so` 不兼容。**
```bash
$ export SNPE_ROOT=/path/to/snpe-1.59.0.3230
```
2. 打开 mmdeploy snpe server 目录,使用交叉编译 gRPC 时的选项
```bash
$ cd /path/to/mmdeploy
$ cd csrc/mmdeploy/backend_ops/snpe/service
$ mkdir -p build && cd build
$ export ANDROID_NDK=/path/to/android-ndk-r17c
$ cmake .. \
-DCMAKE_TOOLCHAIN_FILE=${ANDROID_NDK}/build/cmake/android.toolchain.cmake \
-DANDROID_ABI=arm64-v8a \
-DANDROID_PLATFORM=android-26 \
-DANDROID_STL=c++_shared \
-DANDROID_TOOLCHAIN=clang \
-DCMAKE_BUILD_TYPE=Release \
-Dabsl_DIR=/tmp/android_grpc_install_shared/lib/cmake/absl \
-DProtobuf_DIR=/tmp/android_grpc_install_shared/lib/cmake/protobuf \
-DgRPC_DIR=/tmp/android_grpc_install_shared/lib/cmake/grpc
$ make -j
$ file inference_server
inference_server: ELF 64-bit LSB shared object, ARM aarch64, version 1 (SYSV), dynamically linked, interpreter /system/bin/linker64, BuildID[sha1]=252aa04e2b982681603dacb74b571be2851176d2, with debug_info, not stripped
```
最终可得到 `infernece_server``adb push` 到设备上即可执行。
## 参考文档
- snpe tutorial https://developer.qualcomm.com/sites/default/files/docs/snpe/cplus_plus_tutorial.html
- gRPC cross build script https://raw.githubusercontent.com/grpc/grpc/master/test/distrib/cpp/run_distrib_test_cmake_aarch64_cross.sh
- stackoverflow https://stackoverflow.com/questions/54052229/build-grpc-c-for-android-using-ndk-arm-linux-androideabi-clang-compiler

View File

@ -0,0 +1,15 @@
# Copyright (c) OpenMMLab. All rights reserved.
from mmdeploy.backend.snpe import from_onnx as _from_onnx
from mmdeploy.backend.snpe import is_available
from ..core import PIPELINE_MANAGER
from_onnx = PIPELINE_MANAGER.register_pipeline()(_from_onnx)
__all__ = ['is_available', 'from_onnx']
if is_available():
try:
from mmdeploy.backend.snpe.onnx2dlc import get_output_model_file
__all__ += ['get_output_model_file']
except Exception:
pass

View File

@ -0,0 +1,27 @@
# Copyright (c) OpenMMLab. All rights reserved.
import os.path as osp
from .init_plugins import get_onnx2dlc_path
from .onnx2dlc import from_onnx
def is_available():
"""Check whether ncnn and snpe-onnx-to-dlc tool are installed.
Returns:
bool: True if snpe-onnx-to-dlc tool are installed.
"""
onnx2dlc = get_onnx2dlc_path()
return osp.exists(onnx2dlc)
__all__ = ['from_onnx']
if is_available():
try:
from .wrapper import SNPEWrapper
__all__ += ['SNPEWrapper']
except Exception as e:
print(e)
pass

View File

@ -0,0 +1,11 @@
# Copyright (c) OpenMMLab. All rights reserved.
import shutil
def get_onnx2dlc_path() -> str:
"""Get snpe-onnx-to-dlc path.
Returns:
str: A path of snpe-onnx-to-dlc tool.
"""
return shutil.which('snpe-onnx-to-dlc')

View File

@ -0,0 +1,69 @@
# Copyright (c) OpenMMLab. All rights reserved.
import os
import os.path as osp
import tempfile
from subprocess import call
from typing import List, Optional, Union
import onnx
from .init_plugins import get_onnx2dlc_path
def mkdir_or_exist(dir_name, mode=0o777):
if dir_name == '':
return
dir_name = osp.expanduser(dir_name)
os.makedirs(dir_name, mode=mode, exist_ok=True)
def get_output_model_file(onnx_path: str,
work_dir: Optional[str] = None) -> List[str]:
"""Returns the path to the .dlc file with export result.
Args:
onnx_path (str): The path to the onnx model.
work_dir (str|None): The path to the directory for saving the results.
Defaults to `None`, which means use the directory of onnx_path.
Returns:
List[str]: The path to the files where the export result will be
located.
"""
if work_dir is None:
work_dir = osp.dirname(onnx_path)
mkdir_or_exist(osp.abspath(work_dir))
file_name = osp.splitext(osp.split(onnx_path)[1])[0]
save_dlc = osp.join(work_dir, file_name + '.dlc')
return save_dlc
def from_onnx(onnx_model: Union[onnx.ModelProto, str],
output_file_prefix: str):
"""Convert ONNX to dlc.
We need to use a executable program to convert the `.onnx` file to a `.dlc`
Example:
>>> from mmdeploy.apis.snpe import from_onnx
>>> onnx_path = 'work_dir/end2end.onnx'
>>> output_file_prefix = 'work_dir/end2end'
>>> from_onnx(onnx_path, output_file_prefix)
Args:
onnx_path (ModelProto|str): The path of the onnx model.
output_file_prefix (str): The path to save the output .dlc file.
"""
if not isinstance(onnx_model, str):
onnx_path = tempfile.NamedTemporaryFile(suffix='.onnx').name
onnx.save(onnx_model, onnx_path)
else:
onnx_path = onnx_model
save_dlc = output_file_prefix + '.dlc'
onnx2dlc = get_onnx2dlc_path()
ret_code = call(
[onnx2dlc, '--input_network', onnx_path, '--output', save_dlc])
assert ret_code == 0, 'onnx2dlc failed'

View File

@ -0,0 +1,140 @@
# Copyright (c) OpenMMLab. All rights reserved.
import os
import sys
from typing import Dict, Optional, Sequence
import grpc
# import mmdeploy.backend.snpe.inference_pb2
# import mmdeploy.backend.snpe.inference_pb2_grpc
import inference_pb2
import inference_pb2_grpc
import numpy as np
import torch
from mmdeploy.utils import Backend, get_root_logger
from mmdeploy.utils.timer import TimeCounter
from ..base import BACKEND_WRAPPER, BaseWrapper
@BACKEND_WRAPPER.register_module(Backend.SNPE.value)
class SNPEWrapper(BaseWrapper):
"""ncnn wrapper class for inference.
Args:
dlc_file (str): Path of a weight file.
output_names (Sequence[str] | None): Names of model outputs in order.
Defaults to `None` and the wrapper will load the output names from
snpe model.
Examples:
>>> from mmdeploy.backend.snpe import SNPEWrapper
>>> import torch
>>>
>>> snple_file = 'alexnet.dlc'
>>> model = SNPEWrapper(snpe_file)
>>> inputs = dict(input=torch.randn(1, 3, 224, 224))
>>> outputs = model(inputs)
>>> print(outputs)
"""
def __init__(self,
dlc_file: str,
output_names: Optional[Sequence[str]] = None,
**kwargs):
logger = get_root_logger()
# The maximum model file size is 512MB
MAX_SIZE = 2 << 29
uri = os.environ['__MMDEPLOY_GRPC_URI']
logger.info(f'fetch uri: {uri}')
self.channel = grpc.insecure_channel(
uri,
options=(('grpc.GRPC_ARG_KEEPALIVE_TIME_MS',
2000), ('grpc.max_send_message_length', MAX_SIZE),
('grpc.keepalive_permit_without_calls', 1)))
weights = bytes()
filesize = os.stat(dlc_file).st_size
logger.info(f'reading local model file {dlc_file}')
# with open(dlc_file, 'rb') as f:
# weights = f.read(filesize)
stub = inference_pb2_grpc.InferenceStub(self.channel)
logger.info(f'init remote SNPE engine with RPC, please wait...')
model = inference_pb2.Model(name=dlc_file, weights=weights, device=1)
resp = stub.Init(model)
if resp.status != 0:
logger.error(f'init SNPE model failed {resp.info}')
return
output = stub.OutputNames(inference_pb2.Empty())
output_names = output.names
super().__init__(output_names)
logger.info(f'init success, outputs {output_names}')
# def __del__(self):
# stub = inference_pb2_grpc.InferenceStub(self.channel)
# stub.Destroy()
def forward(self, inputs: Dict[str,
torch.Tensor]) -> Dict[str, torch.Tensor]:
"""Run forward inference.
Args:
inputs (Dict[str, torch.Tensor]): Key-value pairs of model inputs.
Returns:
Dict[str, torch.Tensor]: Key-value pairs of model outputs.
"""
input_list = list(inputs.values())
device_type = input_list[0].device.type
logger = get_root_logger()
# build `list` inputs for remote snpe engine
snpe_inputs = []
for name, input_tensor in inputs.items():
data = input_tensor.contiguous()
data = data.detach().cpu().numpy()
if data.dtype != np.float32:
logger.error('SNPE now only support fp32 input')
data = data.astype(dtype=np.float32)
tensor = inference_pb2.Tensor(
data=data.tobytes(), name=name, dtype='float32')
snpe_inputs.append(tensor)
return self.__snpe_execute(
inference_pb2.TensorList(datas=snpe_inputs), device_type)
@TimeCounter.count_time()
def __snpe_execute(self, tensorList: inference_pb2.TensorList,
device: str) -> Dict[str, torch.tensor]:
"""Run inference with snpe remote inference engine.
Args:
tensorList (inference_pb2.TensorList): snpe input tensor.
Returns:
dict[str, torch.tensor]: Inference results of snpe model.
"""
stub = inference_pb2_grpc.InferenceStub(self.channel)
resp = stub.Inference(tensorList)
result = dict()
if resp.status == 0:
for tensor in resp.datas:
ndarray = np.frombuffer(tensor.data, dtype=np.float32)
import pdb
pdb.set_trace()
result[tensor.name] = torch.from_numpy(
ndarray.copy()).to(device)
else:
logger = get_root_logger()
logger.error(f'snpe inference failed {resp.info}')
return result

View File

@ -106,6 +106,10 @@ class BaseBackendModel(torch.nn.Module, metaclass=ABCMeta):
model=backend_files[0],
input_names=input_names,
output_names=output_names)
elif backend == Backend.SNPE:
from mmdeploy.backend.snpe import SNPEWrapper
return SNPEWrapper(
dlc_file=backend_files[0], output_names=output_names)
else:
raise NotImplementedError(f'Unknown backend type: {backend.value}')

View File

@ -55,6 +55,7 @@ class Backend(AdvancedEnum):
ONNXRUNTIME = 'onnxruntime'
PPLNN = 'pplnn'
NCNN = 'ncnn'
SNPE = 'snpe'
OPENVINO = 'openvino'
SDK = 'sdk'
TORCHSCRIPT = 'torchscript'

View File

@ -7,3 +7,4 @@ mmrazor>=0.3.0
mmsegmentation
onnxruntime>=1.8.0
openvino-dev
grpcio

View File

@ -41,6 +41,9 @@ def check_backend():
import mmdeploy.apis.openvino as openvino_apis
logger.info(f'openvino_is_avaliable: {openvino_apis.is_available()}')
import mmdeploy.apis.snpe as snpe_apis
logger.info(f'snpe_is_available: {snpe_apis.is_available()}')
def check_codebase():
codebase_versions = get_codebase_version()

View File

@ -54,6 +54,11 @@ def parse_args():
help='Image directory for quantize model.')
parser.add_argument(
'--quant', action='store_true', help='Quantize model to low bit.')
parser.add_argument(
'--uri',
action='store_true',
default='10.1.80.67:50051',
help='Remote ipv4:port or ipv6:port for inference on edge device.')
args = parser.parse_args()
return args
@ -266,6 +271,28 @@ def main():
else:
backend_files += [model_param_path, model_bin_path]
elif backend == Backend.SNPE:
os.environ['__MMDEPLOY_GRPC_URI'] = args.uri
from mmdeploy.apis.snpe import is_available as is_available
if not is_available():
logger.error('snpe support is not available, please check \
1) `snpe-onnx-to-dlc` existed in `PATH` 2) snpe only support ubuntu18.04'
)
exit(1)
import mmdeploy.apis.snpe as snpe_api
from mmdeploy.apis.snpe import get_output_model_file
PIPELINE_MANAGER.set_log_level(log_level, [snpe_api.from_onnx])
backend_files = []
for onnx_path in ir_files:
dlc_path = get_output_model_file(onnx_path, args.work_dir)
onnx_name = osp.splitext(osp.split(onnx_path)[1])[0]
snpe_api.from_onnx(onnx_path, osp.join(args.work_dir, onnx_name))
backend_files = [dlc_path]
elif backend == Backend.OPENVINO:
from mmdeploy.apis.openvino import \
is_available as is_available_openvino
@ -332,17 +359,19 @@ def main():
# for headless installation.
if not headless:
# visualize model of the backend
create_process(
f'visualize {backend.value} model',
target=visualize_model,
args=(model_cfg_path, deploy_cfg_path, backend_files,
args.test_img, args.device),
kwargs=dict(
backend=backend,
output_file=osp.join(args.work_dir,
f'output_{backend.value}.jpg'),
show_result=args.show),
ret_value=ret_value)
visualize_model(model_cfg_path, deploy_cfg_path, backend_files,
args.test_img, args.device)
# create_process(
# f'visualize {backend.value} model',
# target=visualize_model,
# args=(model_cfg_path, deploy_cfg_path, backend_files,
# args.test_img, args.device),
# kwargs=dict(
# backend=backend,
# output_file=osp.join(args.work_dir,
# f'output_{backend.value}.jpg'),
# show_result=args.show),
# ret_value=ret_value)
# visualize pytorch model
create_process(