Compare commits
6 Commits
253d5bcaf7
...
ccd00c14ae
Author | SHA1 | Date |
---|---|---|
|
ccd00c14ae | |
|
a15bd81dbc | |
|
5666c88b9f | |
|
1ad3534814 | |
|
d1226cb167 | |
|
2c1f4b99cb |
|
@ -0,0 +1 @@
|
|||
backend_config = dict(type='snpe')
|
|
@ -0,0 +1,3 @@
|
|||
_base_ = ['./classification_static.py', '../_base_/backends/snpe.py']
|
||||
|
||||
onnx_config = dict(input_shape=None)
|
|
@ -0,0 +1 @@
|
|||
/home/PJLAB/konghuanjun/GitProjects/mmdeploy/resnet18/end2end.dlc
|
|
@ -0,0 +1 @@
|
|||
/home/PJLAB/konghuanjun/GitProjects/mmdeploy/resnet18/end2end.onnx
|
|
@ -0,0 +1,82 @@
|
|||
# Copyright 2015 gRPC authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""The Python implementation of the GRPC helloworld.Greeter client."""
|
||||
|
||||
from __future__ import print_function
|
||||
|
||||
import logging
|
||||
|
||||
import grpc
|
||||
import inference_pb2
|
||||
import inference_pb2_grpc
|
||||
import os
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
def build_dummy_tensor():
|
||||
img = cv2.imread('/home/PJLAB/konghuanjun/Downloads/snpe-1.55.0.2958/models/alexnet/data/chairs.jpg')
|
||||
m = cv2.resize(img, (600, 400))
|
||||
data = (m.astype(np.float32) - 127.5) / 127.5
|
||||
print(data.shape)
|
||||
tensor = inference_pb2.Tensor(data=data.tobytes(), shape=list(data.shape), name='data_0', dtype='float32')
|
||||
return tensor
|
||||
|
||||
def run():
|
||||
# NOTE(gRPC Python Team): .close() is possible on a channel and should be
|
||||
# used in circumstances in which the with statement does not fit the needs
|
||||
# of the code.
|
||||
filename = 'end2end.dlc'
|
||||
filesize = os.stat(filename).st_size
|
||||
|
||||
weights = bytes()
|
||||
# with open(filename, 'rb') as f:
|
||||
# weights = f.read(filesize)
|
||||
# if len(weights) >= (2 << 29):
|
||||
# print('model size too big')
|
||||
|
||||
# https://github.com/grpc/grpc/blob/v1.46.x/include/grpc/impl/codegen/grpc_types.h
|
||||
# https://grpc.io/docs/guides/performance/
|
||||
with grpc.insecure_channel('10.1.80.67:50051',
|
||||
options=(
|
||||
('grpc.GRPC_ARG_KEEPALIVE_TIME_MS', 2000),
|
||||
('grpc.max_send_message_length', 2<<29),
|
||||
('grpc.keepalive_permit_without_calls', 1))) as channel:
|
||||
print("channel type {}".format(type(channel)))
|
||||
# with grpc.insecure_channel('[0:0:fe80::3455:bf2a]:50051') as channel:
|
||||
stub = inference_pb2_grpc.InferenceStub(channel)
|
||||
response = stub.Echo(inference_pb2.Empty())
|
||||
print("Response echo {}".format(response))
|
||||
|
||||
model = inference_pb2.Model(name= filename, weights=weights, device=1)
|
||||
print("Sending model to init, please wait...")
|
||||
response = stub.Init(model)
|
||||
print("Response init {}".format(response))
|
||||
|
||||
response = stub.OutputNames(inference_pb2.Empty())
|
||||
print("Response outputnames {}".format(response))
|
||||
|
||||
tensor = build_dummy_tensor()
|
||||
tensorList = inference_pb2.TensorList(datas = [tensor])
|
||||
|
||||
for x in range(1):
|
||||
response = stub.Inference(tensorList)
|
||||
if response.status == 0:
|
||||
prob = np.frombuffer(response.datas[0].data, dtype=np.float32)
|
||||
print("prob argmax: {} max: {}".format(prob.argmax(), prob.max()))
|
||||
else:
|
||||
print(response.info)
|
||||
|
||||
if __name__ == '__main__':
|
||||
logging.basicConfig()
|
||||
run()
|
|
@ -0,0 +1,91 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
# Generated by the protocol buffer compiler. DO NOT EDIT!
|
||||
# source: inference.proto
|
||||
"""Generated protocol buffer code."""
|
||||
from google.protobuf import descriptor as _descriptor
|
||||
from google.protobuf import descriptor_pool as _descriptor_pool
|
||||
from google.protobuf import message as _message
|
||||
from google.protobuf import reflection as _reflection
|
||||
from google.protobuf import symbol_database as _symbol_database
|
||||
# @@protoc_insertion_point(imports)
|
||||
|
||||
_sym_db = _symbol_database.Default()
|
||||
|
||||
|
||||
|
||||
|
||||
DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x0finference.proto\x12\x08mmdeploy\"\x91\x01\n\x05Model\x12\x11\n\x04name\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x0f\n\x07weights\x18\x02 \x01(\x0c\x12+\n\x06\x64\x65vice\x18\x03 \x01(\x0e\x32\x16.mmdeploy.Model.DeviceH\x01\x88\x01\x01\"#\n\x06\x44\x65vice\x12\x07\n\x03\x43PU\x10\x00\x12\x07\n\x03GPU\x10\x01\x12\x07\n\x03\x44SP\x10\x02\x42\x07\n\x05_nameB\t\n\x07_device\"\x07\n\x05\x45mpty\"Q\n\x06Tensor\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x12\n\x05\x64type\x18\x02 \x01(\tH\x00\x88\x01\x01\x12\x0c\n\x04\x64\x61ta\x18\x03 \x01(\x0c\x12\r\n\x05shape\x18\x04 \x03(\x05\x42\x08\n\x06_dtype\"-\n\nTensorList\x12\x1f\n\x05\x64\x61tas\x18\x01 \x03(\x0b\x32\x10.mmdeploy.Tensor\"F\n\x05Reply\x12\x0e\n\x06status\x18\x01 \x01(\x05\x12\x0c\n\x04info\x18\x02 \x01(\t\x12\x1f\n\x05\x64\x61tas\x18\x03 \x03(\x0b\x32\x10.mmdeploy.Tensor\"\x16\n\x05Names\x12\r\n\x05names\x18\x01 \x03(\t2\xfb\x01\n\tInference\x12*\n\x04\x45\x63ho\x12\x0f.mmdeploy.Empty\x1a\x0f.mmdeploy.Reply\"\x00\x12*\n\x04Init\x12\x0f.mmdeploy.Model\x1a\x0f.mmdeploy.Reply\"\x00\x12\x31\n\x0bOutputNames\x12\x0f.mmdeploy.Empty\x1a\x0f.mmdeploy.Names\"\x00\x12\x34\n\tInference\x12\x14.mmdeploy.TensorList\x1a\x0f.mmdeploy.Reply\"\x00\x12-\n\x07\x44\x65stroy\x12\x0f.mmdeploy.Empty\x1a\x0f.mmdeploy.Reply\"\x00\x42%\n\rmmdeploy.snpeB\x0bSNPEWrapperP\x01\xa2\x02\x04SNPEb\x06proto3')
|
||||
|
||||
|
||||
|
||||
_MODEL = DESCRIPTOR.message_types_by_name['Model']
|
||||
_EMPTY = DESCRIPTOR.message_types_by_name['Empty']
|
||||
_TENSOR = DESCRIPTOR.message_types_by_name['Tensor']
|
||||
_TENSORLIST = DESCRIPTOR.message_types_by_name['TensorList']
|
||||
_REPLY = DESCRIPTOR.message_types_by_name['Reply']
|
||||
_NAMES = DESCRIPTOR.message_types_by_name['Names']
|
||||
_MODEL_DEVICE = _MODEL.enum_types_by_name['Device']
|
||||
Model = _reflection.GeneratedProtocolMessageType('Model', (_message.Message,), {
|
||||
'DESCRIPTOR' : _MODEL,
|
||||
'__module__' : 'inference_pb2'
|
||||
# @@protoc_insertion_point(class_scope:mmdeploy.Model)
|
||||
})
|
||||
_sym_db.RegisterMessage(Model)
|
||||
|
||||
Empty = _reflection.GeneratedProtocolMessageType('Empty', (_message.Message,), {
|
||||
'DESCRIPTOR' : _EMPTY,
|
||||
'__module__' : 'inference_pb2'
|
||||
# @@protoc_insertion_point(class_scope:mmdeploy.Empty)
|
||||
})
|
||||
_sym_db.RegisterMessage(Empty)
|
||||
|
||||
Tensor = _reflection.GeneratedProtocolMessageType('Tensor', (_message.Message,), {
|
||||
'DESCRIPTOR' : _TENSOR,
|
||||
'__module__' : 'inference_pb2'
|
||||
# @@protoc_insertion_point(class_scope:mmdeploy.Tensor)
|
||||
})
|
||||
_sym_db.RegisterMessage(Tensor)
|
||||
|
||||
TensorList = _reflection.GeneratedProtocolMessageType('TensorList', (_message.Message,), {
|
||||
'DESCRIPTOR' : _TENSORLIST,
|
||||
'__module__' : 'inference_pb2'
|
||||
# @@protoc_insertion_point(class_scope:mmdeploy.TensorList)
|
||||
})
|
||||
_sym_db.RegisterMessage(TensorList)
|
||||
|
||||
Reply = _reflection.GeneratedProtocolMessageType('Reply', (_message.Message,), {
|
||||
'DESCRIPTOR' : _REPLY,
|
||||
'__module__' : 'inference_pb2'
|
||||
# @@protoc_insertion_point(class_scope:mmdeploy.Reply)
|
||||
})
|
||||
_sym_db.RegisterMessage(Reply)
|
||||
|
||||
Names = _reflection.GeneratedProtocolMessageType('Names', (_message.Message,), {
|
||||
'DESCRIPTOR' : _NAMES,
|
||||
'__module__' : 'inference_pb2'
|
||||
# @@protoc_insertion_point(class_scope:mmdeploy.Names)
|
||||
})
|
||||
_sym_db.RegisterMessage(Names)
|
||||
|
||||
_INFERENCE = DESCRIPTOR.services_by_name['Inference']
|
||||
if _descriptor._USE_C_DESCRIPTORS == False:
|
||||
|
||||
DESCRIPTOR._options = None
|
||||
DESCRIPTOR._serialized_options = b'\n\rmmdeploy.snpeB\013SNPEWrapperP\001\242\002\004SNPE'
|
||||
_MODEL._serialized_start=30
|
||||
_MODEL._serialized_end=175
|
||||
_MODEL_DEVICE._serialized_start=120
|
||||
_MODEL_DEVICE._serialized_end=155
|
||||
_EMPTY._serialized_start=177
|
||||
_EMPTY._serialized_end=184
|
||||
_TENSOR._serialized_start=186
|
||||
_TENSOR._serialized_end=267
|
||||
_TENSORLIST._serialized_start=269
|
||||
_TENSORLIST._serialized_end=314
|
||||
_REPLY._serialized_start=316
|
||||
_REPLY._serialized_end=386
|
||||
_NAMES._serialized_start=388
|
||||
_NAMES._serialized_end=410
|
||||
_INFERENCE._serialized_start=413
|
||||
_INFERENCE._serialized_end=664
|
||||
# @@protoc_insertion_point(module_scope)
|
|
@ -0,0 +1,205 @@
|
|||
# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT!
|
||||
"""Client and server classes corresponding to protobuf-defined services."""
|
||||
import grpc
|
||||
|
||||
import inference_pb2 as inference__pb2
|
||||
|
||||
|
||||
class InferenceStub(object):
|
||||
"""The inference service definition.
|
||||
"""
|
||||
|
||||
def __init__(self, channel):
|
||||
"""Constructor.
|
||||
|
||||
Args:
|
||||
channel: A grpc.Channel.
|
||||
"""
|
||||
self.Echo = channel.unary_unary(
|
||||
'/mmdeploy.Inference/Echo',
|
||||
request_serializer=inference__pb2.Empty.SerializeToString,
|
||||
response_deserializer=inference__pb2.Reply.FromString,
|
||||
)
|
||||
self.Init = channel.unary_unary(
|
||||
'/mmdeploy.Inference/Init',
|
||||
request_serializer=inference__pb2.Model.SerializeToString,
|
||||
response_deserializer=inference__pb2.Reply.FromString,
|
||||
)
|
||||
self.OutputNames = channel.unary_unary(
|
||||
'/mmdeploy.Inference/OutputNames',
|
||||
request_serializer=inference__pb2.Empty.SerializeToString,
|
||||
response_deserializer=inference__pb2.Names.FromString,
|
||||
)
|
||||
self.Inference = channel.unary_unary(
|
||||
'/mmdeploy.Inference/Inference',
|
||||
request_serializer=inference__pb2.TensorList.SerializeToString,
|
||||
response_deserializer=inference__pb2.Reply.FromString,
|
||||
)
|
||||
self.Destroy = channel.unary_unary(
|
||||
'/mmdeploy.Inference/Destroy',
|
||||
request_serializer=inference__pb2.Empty.SerializeToString,
|
||||
response_deserializer=inference__pb2.Reply.FromString,
|
||||
)
|
||||
|
||||
|
||||
class InferenceServicer(object):
|
||||
"""The inference service definition.
|
||||
"""
|
||||
|
||||
def Echo(self, request, context):
|
||||
"""Missing associated documentation comment in .proto file."""
|
||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
||||
context.set_details('Method not implemented!')
|
||||
raise NotImplementedError('Method not implemented!')
|
||||
|
||||
def Init(self, request, context):
|
||||
"""Init Model with model file
|
||||
"""
|
||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
||||
context.set_details('Method not implemented!')
|
||||
raise NotImplementedError('Method not implemented!')
|
||||
|
||||
def OutputNames(self, request, context):
|
||||
"""Get output names
|
||||
"""
|
||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
||||
context.set_details('Method not implemented!')
|
||||
raise NotImplementedError('Method not implemented!')
|
||||
|
||||
def Inference(self, request, context):
|
||||
"""Inference with inputs
|
||||
"""
|
||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
||||
context.set_details('Method not implemented!')
|
||||
raise NotImplementedError('Method not implemented!')
|
||||
|
||||
def Destroy(self, request, context):
|
||||
"""Destroy handle
|
||||
"""
|
||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
||||
context.set_details('Method not implemented!')
|
||||
raise NotImplementedError('Method not implemented!')
|
||||
|
||||
|
||||
def add_InferenceServicer_to_server(servicer, server):
|
||||
rpc_method_handlers = {
|
||||
'Echo': grpc.unary_unary_rpc_method_handler(
|
||||
servicer.Echo,
|
||||
request_deserializer=inference__pb2.Empty.FromString,
|
||||
response_serializer=inference__pb2.Reply.SerializeToString,
|
||||
),
|
||||
'Init': grpc.unary_unary_rpc_method_handler(
|
||||
servicer.Init,
|
||||
request_deserializer=inference__pb2.Model.FromString,
|
||||
response_serializer=inference__pb2.Reply.SerializeToString,
|
||||
),
|
||||
'OutputNames': grpc.unary_unary_rpc_method_handler(
|
||||
servicer.OutputNames,
|
||||
request_deserializer=inference__pb2.Empty.FromString,
|
||||
response_serializer=inference__pb2.Names.SerializeToString,
|
||||
),
|
||||
'Inference': grpc.unary_unary_rpc_method_handler(
|
||||
servicer.Inference,
|
||||
request_deserializer=inference__pb2.TensorList.FromString,
|
||||
response_serializer=inference__pb2.Reply.SerializeToString,
|
||||
),
|
||||
'Destroy': grpc.unary_unary_rpc_method_handler(
|
||||
servicer.Destroy,
|
||||
request_deserializer=inference__pb2.Empty.FromString,
|
||||
response_serializer=inference__pb2.Reply.SerializeToString,
|
||||
),
|
||||
}
|
||||
generic_handler = grpc.method_handlers_generic_handler(
|
||||
'mmdeploy.Inference', rpc_method_handlers)
|
||||
server.add_generic_rpc_handlers((generic_handler,))
|
||||
|
||||
|
||||
# This class is part of an EXPERIMENTAL API.
|
||||
class Inference(object):
|
||||
"""The inference service definition.
|
||||
"""
|
||||
|
||||
@staticmethod
|
||||
def Echo(request,
|
||||
target,
|
||||
options=(),
|
||||
channel_credentials=None,
|
||||
call_credentials=None,
|
||||
insecure=False,
|
||||
compression=None,
|
||||
wait_for_ready=None,
|
||||
timeout=None,
|
||||
metadata=None):
|
||||
return grpc.experimental.unary_unary(request, target, '/mmdeploy.Inference/Echo',
|
||||
inference__pb2.Empty.SerializeToString,
|
||||
inference__pb2.Reply.FromString,
|
||||
options, channel_credentials,
|
||||
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
||||
|
||||
@staticmethod
|
||||
def Init(request,
|
||||
target,
|
||||
options=(),
|
||||
channel_credentials=None,
|
||||
call_credentials=None,
|
||||
insecure=False,
|
||||
compression=None,
|
||||
wait_for_ready=None,
|
||||
timeout=None,
|
||||
metadata=None):
|
||||
return grpc.experimental.unary_unary(request, target, '/mmdeploy.Inference/Init',
|
||||
inference__pb2.Model.SerializeToString,
|
||||
inference__pb2.Reply.FromString,
|
||||
options, channel_credentials,
|
||||
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
||||
|
||||
@staticmethod
|
||||
def OutputNames(request,
|
||||
target,
|
||||
options=(),
|
||||
channel_credentials=None,
|
||||
call_credentials=None,
|
||||
insecure=False,
|
||||
compression=None,
|
||||
wait_for_ready=None,
|
||||
timeout=None,
|
||||
metadata=None):
|
||||
return grpc.experimental.unary_unary(request, target, '/mmdeploy.Inference/OutputNames',
|
||||
inference__pb2.Empty.SerializeToString,
|
||||
inference__pb2.Names.FromString,
|
||||
options, channel_credentials,
|
||||
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
||||
|
||||
@staticmethod
|
||||
def Inference(request,
|
||||
target,
|
||||
options=(),
|
||||
channel_credentials=None,
|
||||
call_credentials=None,
|
||||
insecure=False,
|
||||
compression=None,
|
||||
wait_for_ready=None,
|
||||
timeout=None,
|
||||
metadata=None):
|
||||
return grpc.experimental.unary_unary(request, target, '/mmdeploy.Inference/Inference',
|
||||
inference__pb2.TensorList.SerializeToString,
|
||||
inference__pb2.Reply.FromString,
|
||||
options, channel_credentials,
|
||||
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
||||
|
||||
@staticmethod
|
||||
def Destroy(request,
|
||||
target,
|
||||
options=(),
|
||||
channel_credentials=None,
|
||||
call_credentials=None,
|
||||
insecure=False,
|
||||
compression=None,
|
||||
wait_for_ready=None,
|
||||
timeout=None,
|
||||
metadata=None):
|
||||
return grpc.experimental.unary_unary(request, target, '/mmdeploy.Inference/Destroy',
|
||||
inference__pb2.Empty.SerializeToString,
|
||||
inference__pb2.Reply.FromString,
|
||||
options, channel_credentials,
|
||||
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
|
@ -0,0 +1,70 @@
|
|||
syntax = "proto3";
|
||||
|
||||
option java_multiple_files = true;
|
||||
option java_package = "mmdeploy.snpe";
|
||||
option java_outer_classname = "SNPEWrapper";
|
||||
option objc_class_prefix = "SNPE";
|
||||
|
||||
package mmdeploy;
|
||||
|
||||
// The inference service definition.
|
||||
service Inference {
|
||||
|
||||
rpc Echo(Empty) returns (Reply) {}
|
||||
|
||||
// Init Model with model file
|
||||
rpc Init(Model) returns (Reply) {}
|
||||
|
||||
// Get output names
|
||||
rpc OutputNames(Empty) returns (Names) {}
|
||||
|
||||
// Inference with inputs
|
||||
rpc Inference(TensorList) returns (Reply) {}
|
||||
|
||||
// Destroy handle
|
||||
rpc Destroy(Empty) returns (Reply) {}
|
||||
}
|
||||
|
||||
message Model {
|
||||
optional string name = 1;
|
||||
// bin
|
||||
bytes weights = 2;
|
||||
// config
|
||||
enum Device {
|
||||
CPU = 0;
|
||||
GPU = 1;
|
||||
DSP = 2;
|
||||
}
|
||||
optional Device device = 3;
|
||||
}
|
||||
|
||||
// https://stackoverflow.com/questions/31768665/can-i-define-a-grpc-call-with-a-null-request-or-response
|
||||
message Empty {}
|
||||
|
||||
message Tensor {
|
||||
// name
|
||||
string name = 1;
|
||||
|
||||
// datatype
|
||||
optional string dtype = 2;
|
||||
|
||||
// data
|
||||
bytes data = 3;
|
||||
|
||||
// shape
|
||||
repeated int32 shape = 4;
|
||||
}
|
||||
|
||||
message TensorList {
|
||||
repeated Tensor datas = 1;
|
||||
}
|
||||
|
||||
message Reply {
|
||||
int32 status = 1;
|
||||
string info = 2;
|
||||
repeated Tensor datas = 3;
|
||||
}
|
||||
|
||||
message Names {
|
||||
repeated string names = 1;
|
||||
}
|
|
@ -0,0 +1,81 @@
|
|||
# Copyright 2018 gRPC authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
# cmake build file for C++ helloworld example.
|
||||
# Assumes protobuf and gRPC have been installed using cmake.
|
||||
# See cmake_externalproject/CMakeLists.txt for all-in-one cmake build
|
||||
# that automatically builds all the dependencies before building helloworld.
|
||||
|
||||
cmake_minimum_required(VERSION 3.5.1)
|
||||
project(SNPEServer C CXX)
|
||||
include(./common.cmake)
|
||||
|
||||
# Proto file
|
||||
get_filename_component(hw_proto "../inference.proto" ABSOLUTE)
|
||||
get_filename_component(hw_proto_path "${hw_proto}" PATH)
|
||||
|
||||
# Generated sources
|
||||
set(hw_proto_srcs "${CMAKE_CURRENT_BINARY_DIR}/inference.pb.cc")
|
||||
set(hw_proto_hdrs "${CMAKE_CURRENT_BINARY_DIR}/inference.pb.h")
|
||||
set(hw_grpc_srcs "${CMAKE_CURRENT_BINARY_DIR}/inference.grpc.pb.cc")
|
||||
set(hw_grpc_hdrs "${CMAKE_CURRENT_BINARY_DIR}/inference.grpc.pb.h")
|
||||
|
||||
add_custom_command(
|
||||
OUTPUT "${hw_proto_srcs}" "${hw_proto_hdrs}" "${hw_grpc_srcs}" "${hw_grpc_hdrs}"
|
||||
COMMAND ${_PROTOBUF_PROTOC}
|
||||
ARGS --grpc_out "${CMAKE_CURRENT_BINARY_DIR}"
|
||||
--cpp_out "${CMAKE_CURRENT_BINARY_DIR}"
|
||||
-I "${hw_proto_path}"
|
||||
--plugin=protoc-gen-grpc="${_GRPC_CPP_PLUGIN_EXECUTABLE}"
|
||||
"${hw_proto}"
|
||||
DEPENDS "${hw_proto}")
|
||||
|
||||
# Include generated *.pb.h files
|
||||
include_directories("${CMAKE_CURRENT_BINARY_DIR}")
|
||||
|
||||
# hw_grpc_proto
|
||||
add_library(hw_grpc_proto
|
||||
${hw_grpc_srcs}
|
||||
${hw_grpc_hdrs}
|
||||
${hw_proto_srcs}
|
||||
${hw_proto_hdrs})
|
||||
|
||||
target_link_libraries(hw_grpc_proto
|
||||
${_REFLECTION}
|
||||
${_GRPC_GRPCPP}
|
||||
${_PROTOBUF_LIBPROTOBUF})
|
||||
|
||||
add_library(snpe SHARED IMPORTED)
|
||||
|
||||
if (NOT EXISTS $ENV{SNPE_ROOT}/lib/aarch64-android-clang6.0/)
|
||||
message(FATAL_ERROR "snpe directory not exist: "$ENV{SNPE_ROOT}/lib/aarch64-android-clang6.0/)
|
||||
endif()
|
||||
|
||||
set_target_properties(snpe PROPERTIES
|
||||
IMPORTED_LOCATION "$ENV{SNPE_ROOT}/lib/aarch64-android-clang6.0/libSNPE.so"
|
||||
INTERFACE_INCLUDE_DIRECTORIES "$ENV{SNPE_ROOT}/include/zdl"
|
||||
)
|
||||
target_link_directories(
|
||||
snpe
|
||||
INTERFACE
|
||||
)
|
||||
|
||||
add_executable(inference_server inference_server.cc service_impl.cpp)
|
||||
|
||||
target_link_libraries(inference_server
|
||||
hw_grpc_proto
|
||||
${_REFLECTION}
|
||||
${_GRPC_GRPCPP}
|
||||
${_PROTOBUF_LIBPROTOBUF}
|
||||
snpe)
|
|
@ -0,0 +1,10 @@
|
|||
export ANDROID_NDK=/home/PJLAB/konghuanjun/Downloads/android-ndk-r17c
|
||||
cmake .. \
|
||||
-DCMAKE_TOOLCHAIN_FILE=${ANDROID_NDK}/build/cmake/android.toolchain.cmake \
|
||||
-DANDROID_ABI=arm64-v8a \
|
||||
-DANDROID_PLATFORM=android-26 \
|
||||
-DANDROID_STL=c++_shared \
|
||||
-DCMAKE_BUILD_TYPE=Release \
|
||||
-Dabsl_DIR=/tmp/android_grpc_install_shared/lib/cmake/absl \
|
||||
-DProtobuf_DIR=/tmp/android_grpc_install_shared/lib/cmake/protobuf \
|
||||
-DgRPC_DIR=/tmp/android_grpc_install_shared/lib/cmake/grpc
|
|
@ -0,0 +1,123 @@
|
|||
# Copyright 2018 gRPC authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
# cmake build file for C++ route_guide example.
|
||||
# Assumes protobuf and gRPC have been installed using cmake.
|
||||
# See cmake_externalproject/CMakeLists.txt for all-in-one cmake build
|
||||
# that automatically builds all the dependencies before building route_guide.
|
||||
|
||||
cmake_minimum_required(VERSION 3.5.1)
|
||||
|
||||
set (CMAKE_CXX_STANDARD 14)
|
||||
|
||||
if(MSVC)
|
||||
add_definitions(-D_WIN32_WINNT=0x600)
|
||||
endif()
|
||||
|
||||
find_package(Threads REQUIRED)
|
||||
|
||||
if(GRPC_AS_SUBMODULE)
|
||||
# One way to build a projects that uses gRPC is to just include the
|
||||
# entire gRPC project tree via "add_subdirectory".
|
||||
# This approach is very simple to use, but the are some potential
|
||||
# disadvantages:
|
||||
# * it includes gRPC's CMakeLists.txt directly into your build script
|
||||
# without and that can make gRPC's internal setting interfere with your
|
||||
# own build.
|
||||
# * depending on what's installed on your system, the contents of submodules
|
||||
# in gRPC's third_party/* might need to be available (and there might be
|
||||
# additional prerequisites required to build them). Consider using
|
||||
# the gRPC_*_PROVIDER options to fine-tune the expected behavior.
|
||||
#
|
||||
# A more robust approach to add dependency on gRPC is using
|
||||
# cmake's ExternalProject_Add (see cmake_externalproject/CMakeLists.txt).
|
||||
|
||||
# Include the gRPC's cmake build (normally grpc source code would live
|
||||
# in a git submodule called "third_party/grpc", but this example lives in
|
||||
# the same repository as gRPC sources, so we just look a few directories up)
|
||||
add_subdirectory(../../.. ${CMAKE_CURRENT_BINARY_DIR}/grpc EXCLUDE_FROM_ALL)
|
||||
message(STATUS "Using gRPC via add_subdirectory.")
|
||||
|
||||
# After using add_subdirectory, we can now use the grpc targets directly from
|
||||
# this build.
|
||||
set(_PROTOBUF_LIBPROTOBUF libprotobuf)
|
||||
set(_REFLECTION grpc++_reflection)
|
||||
if(CMAKE_CROSSCOMPILING)
|
||||
find_program(_PROTOBUF_PROTOC protoc)
|
||||
else()
|
||||
set(_PROTOBUF_PROTOC $<TARGET_FILE:protobuf::protoc>)
|
||||
endif()
|
||||
set(_GRPC_GRPCPP grpc++)
|
||||
if(CMAKE_CROSSCOMPILING)
|
||||
find_program(_GRPC_CPP_PLUGIN_EXECUTABLE grpc_cpp_plugin)
|
||||
else()
|
||||
set(_GRPC_CPP_PLUGIN_EXECUTABLE $<TARGET_FILE:grpc_cpp_plugin>)
|
||||
endif()
|
||||
elseif(GRPC_FETCHCONTENT)
|
||||
# Another way is to use CMake's FetchContent module to clone gRPC at
|
||||
# configure time. This makes gRPC's source code available to your project,
|
||||
# similar to a git submodule.
|
||||
message(STATUS "Using gRPC via add_subdirectory (FetchContent).")
|
||||
include(FetchContent)
|
||||
FetchContent_Declare(
|
||||
grpc
|
||||
GIT_REPOSITORY https://github.com/grpc/grpc.git
|
||||
# when using gRPC, you will actually set this to an existing tag, such as
|
||||
# v1.25.0, v1.26.0 etc..
|
||||
# For the purpose of testing, we override the tag used to the commit
|
||||
# that's currently under test.
|
||||
GIT_TAG vGRPC_TAG_VERSION_OF_YOUR_CHOICE)
|
||||
FetchContent_MakeAvailable(grpc)
|
||||
|
||||
# Since FetchContent uses add_subdirectory under the hood, we can use
|
||||
# the grpc targets directly from this build.
|
||||
set(_PROTOBUF_LIBPROTOBUF libprotobuf)
|
||||
set(_REFLECTION grpc++_reflection)
|
||||
set(_PROTOBUF_PROTOC $<TARGET_FILE:protoc>)
|
||||
set(_GRPC_GRPCPP grpc++)
|
||||
if(CMAKE_CROSSCOMPILING)
|
||||
find_program(_GRPC_CPP_PLUGIN_EXECUTABLE grpc_cpp_plugin)
|
||||
else()
|
||||
set(_GRPC_CPP_PLUGIN_EXECUTABLE $<TARGET_FILE:grpc_cpp_plugin>)
|
||||
endif()
|
||||
else()
|
||||
# This branch assumes that gRPC and all its dependencies are already installed
|
||||
# on this system, so they can be located by find_package().
|
||||
|
||||
# Find Protobuf installation
|
||||
# Looks for protobuf-config.cmake file installed by Protobuf's cmake installation.
|
||||
set(protobuf_MODULE_COMPATIBLE TRUE)
|
||||
find_package(Protobuf CONFIG REQUIRED)
|
||||
message(STATUS "Using protobuf ${Protobuf_VERSION}")
|
||||
|
||||
set(_PROTOBUF_LIBPROTOBUF protobuf::libprotobuf)
|
||||
set(_REFLECTION gRPC::grpc++_reflection)
|
||||
if(CMAKE_CROSSCOMPILING)
|
||||
find_program(_PROTOBUF_PROTOC protoc)
|
||||
else()
|
||||
set(_PROTOBUF_PROTOC $<TARGET_FILE:protobuf::protoc>)
|
||||
endif()
|
||||
|
||||
# Find gRPC installation
|
||||
# Looks for gRPCConfig.cmake file installed by gRPC's cmake installation.
|
||||
find_package(gRPC CONFIG REQUIRED)
|
||||
message(STATUS "Using gRPC ${gRPC_VERSION}")
|
||||
|
||||
set(_GRPC_GRPCPP gRPC::grpc++)
|
||||
if(CMAKE_CROSSCOMPILING)
|
||||
find_program(_GRPC_CPP_PLUGIN_EXECUTABLE grpc_cpp_plugin)
|
||||
else()
|
||||
set(_GRPC_CPP_PLUGIN_EXECUTABLE $<TARGET_FILE:gRPC::grpc_cpp_plugin>)
|
||||
endif()
|
||||
endif()
|
|
@ -0,0 +1,89 @@
|
|||
/*
|
||||
*
|
||||
* Copyright 2015 gRPC authors.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
*/
|
||||
|
||||
// Copyright (c) OpenMMLab. All rights reserved.
|
||||
|
||||
#include <arpa/inet.h>
|
||||
#include <ifaddrs.h>
|
||||
#include <netinet/in.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <sys/types.h>
|
||||
|
||||
#include <iostream>
|
||||
|
||||
#include "service_impl.h"
|
||||
|
||||
void PrintIPv4() {
|
||||
struct ifaddrs* ifAddrStruct = NULL;
|
||||
void* tmpAddrPtr = NULL;
|
||||
|
||||
getifaddrs(&ifAddrStruct);
|
||||
|
||||
while (ifAddrStruct != NULL) {
|
||||
if (ifAddrStruct->ifa_addr->sa_family == AF_INET) {
|
||||
tmpAddrPtr = &((struct sockaddr_in*)ifAddrStruct->ifa_addr)->sin_addr;
|
||||
char addressBuffer[INET_ADDRSTRLEN];
|
||||
inet_ntop(AF_INET, tmpAddrPtr, addressBuffer, INET_ADDRSTRLEN);
|
||||
fprintf(stdout, "%s IP Address %s\n", ifAddrStruct->ifa_name,
|
||||
addressBuffer);
|
||||
} else if (ifAddrStruct->ifa_addr->sa_family == AF_INET6) {
|
||||
tmpAddrPtr = &((struct sockaddr_in*)ifAddrStruct->ifa_addr)->sin_addr;
|
||||
char addressBuffer[INET6_ADDRSTRLEN];
|
||||
inet_ntop(AF_INET6, tmpAddrPtr, addressBuffer, INET6_ADDRSTRLEN);
|
||||
fprintf(stdout, "%s IP Address %s\n", ifAddrStruct->ifa_name,
|
||||
addressBuffer);
|
||||
}
|
||||
ifAddrStruct = ifAddrStruct->ifa_next;
|
||||
}
|
||||
}
|
||||
|
||||
void RunServer() {
|
||||
// listen IPv4 and IPv6
|
||||
std::string server_address("[::]:50051");
|
||||
InferenceServiceImpl service;
|
||||
|
||||
grpc::EnableDefaultHealthCheckService(true);
|
||||
grpc::reflection::InitProtoReflectionServerBuilderPlugin();
|
||||
ServerBuilder builder;
|
||||
// Listen on the given address without any authentication mechanism.
|
||||
builder.AddListeningPort(server_address, grpc::InsecureServerCredentials());
|
||||
|
||||
// Max 128MB
|
||||
builder.SetMaxMessageSize(2 << 29);
|
||||
builder.SetMaxSendMessageSize(2 << 29);
|
||||
|
||||
// Register "service" as the instance through which we'll communicate with
|
||||
// clients. In this case it corresponds to an *synchronous* service.
|
||||
|
||||
builder.RegisterService(&service);
|
||||
// Finally assemble the server.
|
||||
std::unique_ptr<Server> server(builder.BuildAndStart());
|
||||
std::cout << "Server listening on " << server_address << std::endl;
|
||||
|
||||
// Wait for the server to shutdown. Note that some other thread must be
|
||||
// responsible for shutting down the server for this call to ever return.
|
||||
server->Wait();
|
||||
}
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
PrintIPv4();
|
||||
RunServer();
|
||||
|
||||
return 0;
|
||||
}
|
|
@ -0,0 +1,265 @@
|
|||
// Copyright (c) OpenMMLab. All rights reserved.
|
||||
|
||||
#include "service_impl.h"
|
||||
|
||||
#include <getopt.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <iterator>
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
|
||||
zdl::DlSystem::Runtime_t InferenceServiceImpl::CheckRuntime(
|
||||
zdl::DlSystem::Runtime_t runtime, bool& staticQuantization) {
|
||||
static zdl::DlSystem::Version_t Version =
|
||||
zdl::SNPE::SNPEFactory::getLibraryVersion();
|
||||
|
||||
fprintf(stdout, "SNPE Version: %s\n", Version.asString().c_str());
|
||||
|
||||
if ((runtime != zdl::DlSystem::Runtime_t::DSP) && staticQuantization) {
|
||||
fprintf(stderr,
|
||||
"ERROR: Cannot use static quantization with CPU/GPU runtimes. "
|
||||
"It is only designed for DSP/AIP runtimes.\n"
|
||||
"ERROR: Proceeding without static quantization on selected "
|
||||
"runtime.\n");
|
||||
staticQuantization = false;
|
||||
}
|
||||
|
||||
if (!zdl::SNPE::SNPEFactory::isRuntimeAvailable(runtime)) {
|
||||
fprintf(stderr, "Selected runtime not present. Falling back to CPU.\n");
|
||||
runtime = zdl::DlSystem::Runtime_t::CPU;
|
||||
}
|
||||
|
||||
return runtime;
|
||||
}
|
||||
|
||||
std::unique_ptr<zdl::SNPE::SNPE> InferenceServiceImpl::SetBuilderOptions(
|
||||
std::unique_ptr<zdl::DlContainer::IDlContainer>& container,
|
||||
zdl::DlSystem::Runtime_t runtime, zdl::DlSystem::RuntimeList runtimeList,
|
||||
bool useUserSuppliedBuffers, zdl::DlSystem::PlatformConfig platformConfig,
|
||||
bool useCaching) {
|
||||
std::unique_ptr<zdl::SNPE::SNPE> psnpe;
|
||||
|
||||
zdl::SNPE::SNPEBuilder snpeBuilder(container.get());
|
||||
|
||||
if (runtimeList.empty()) {
|
||||
runtimeList.add(runtime);
|
||||
}
|
||||
|
||||
psnpe = snpeBuilder.setOutputLayers({})
|
||||
.setRuntimeProcessorOrder(runtimeList)
|
||||
.setUseUserSuppliedBuffers(useUserSuppliedBuffers)
|
||||
.setPlatformConfig(platformConfig)
|
||||
.setInitCacheMode(useCaching)
|
||||
.build();
|
||||
return psnpe;
|
||||
}
|
||||
|
||||
std::string InferenceServiceImpl::SaveDLC(const ::mmdeploy::Model* request) {
|
||||
std::string filename = "tmp.dlc";
|
||||
if (request->has_name()) {
|
||||
filename = request->name();
|
||||
}
|
||||
auto model = request->weights();
|
||||
fprintf(stdout, "saving file to %s\n", filename.c_str());
|
||||
std::ofstream fout;
|
||||
fout.open(filename, std::ios::binary | std::ios::out);
|
||||
fout.write(model.data(), model.size());
|
||||
fout.flush();
|
||||
fout.close();
|
||||
return filename;
|
||||
}
|
||||
|
||||
void InferenceServiceImpl::LoadFloatData(const std::string& data,
|
||||
std::vector<float>& vec) {
|
||||
size_t len = data.size();
|
||||
assert(len % sizeof(float) == 0);
|
||||
const char* ptr = data.data();
|
||||
for (int i = 0; i < len; i += sizeof(float)) {
|
||||
vec.push_back(*(float*)(ptr + i));
|
||||
}
|
||||
}
|
||||
|
||||
::grpc::Status InferenceServiceImpl::Echo(::grpc::ServerContext* context,
|
||||
const ::mmdeploy::Empty* request,
|
||||
::mmdeploy::Reply* response) {
|
||||
fprintf(stdout, "Stage Echo: recv command\n");
|
||||
response->set_info("echo");
|
||||
return Status::OK;
|
||||
}
|
||||
|
||||
// Logic and data behind the server's behavior.
|
||||
::grpc::Status InferenceServiceImpl::Init(::grpc::ServerContext* context,
|
||||
const ::mmdeploy::Model* request,
|
||||
::mmdeploy::Reply* response) {
|
||||
fprintf(stdout, "Stage Init: recv command\n");
|
||||
// std::string filename = SaveDLC(request);
|
||||
std::string filename = "end2end.dlc";
|
||||
|
||||
if (snpe != nullptr) {
|
||||
snpe.reset();
|
||||
}
|
||||
if (container != nullptr) {
|
||||
container.reset();
|
||||
}
|
||||
|
||||
container =
|
||||
zdl::DlContainer::IDlContainer::open(zdl::DlSystem::String(filename));
|
||||
if (container == nullptr) {
|
||||
fprintf(stdout, "Stage Init: load dlc failed.\n");
|
||||
|
||||
response->set_status(-1);
|
||||
response->set_info(zdl::DlSystem::getLastErrorString());
|
||||
return Status::OK;
|
||||
}
|
||||
|
||||
zdl::DlSystem::Runtime_t runtime = zdl::DlSystem::Runtime_t::CPU;
|
||||
if (request->has_device()) {
|
||||
switch (request->device()) {
|
||||
case mmdeploy::Model_Device_GPU:
|
||||
runtime = zdl::DlSystem::Runtime_t::GPU;
|
||||
break;
|
||||
case mmdeploy::Model_Device_DSP:
|
||||
runtime = zdl::DlSystem::Runtime_t::DSP;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (runtime != zdl::DlSystem::Runtime_t::CPU) {
|
||||
bool static_quant = false;
|
||||
runtime = CheckRuntime(runtime, static_quant);
|
||||
}
|
||||
|
||||
zdl::DlSystem::RuntimeList runtimeList;
|
||||
runtimeList.add(runtime);
|
||||
zdl::DlSystem::PlatformConfig platformConfig;
|
||||
snpe = SetBuilderOptions(container, runtime, runtimeList, false,
|
||||
platformConfig, false);
|
||||
|
||||
if (snpe == nullptr) {
|
||||
response->set_status(-2);
|
||||
response->set_info(zdl::DlSystem::getLastErrorString());
|
||||
}
|
||||
|
||||
response->set_status(0);
|
||||
response->set_info("Stage Init: success");
|
||||
return Status::OK;
|
||||
}
|
||||
|
||||
::grpc::Status InferenceServiceImpl::OutputNames(
|
||||
::grpc::ServerContext* context, const ::mmdeploy::Empty* request,
|
||||
::mmdeploy::Names* response) {
|
||||
const auto& outputTensorNamesRef = snpe->getOutputTensorNames();
|
||||
const auto& outputTensorNames = *outputTensorNamesRef;
|
||||
|
||||
for (int i = 0; i < outputTensorNames.size(); ++i) {
|
||||
response->add_names(outputTensorNames.at(i));
|
||||
}
|
||||
|
||||
return Status::OK;
|
||||
}
|
||||
|
||||
::grpc::Status InferenceServiceImpl::Inference(
|
||||
::grpc::ServerContext* context, const ::mmdeploy::TensorList* request,
|
||||
::mmdeploy::Reply* response) {
|
||||
// Get input names and number
|
||||
fprintf(stdout, "Stage Inference: command\n");
|
||||
|
||||
const auto& inputTensorNamesRef = snpe->getInputTensorNames();
|
||||
|
||||
if (!inputTensorNamesRef) {
|
||||
response->set_status(-1);
|
||||
response->set_info(zdl::DlSystem::getLastErrorString());
|
||||
return Status::OK;
|
||||
}
|
||||
|
||||
const auto& inputTensorNames = *inputTensorNamesRef;
|
||||
if (inputTensorNames.size() != request->datas_size()) {
|
||||
response->set_status(-2);
|
||||
response->set_info("Stage Inference: input names count not match !");
|
||||
return Status::OK;
|
||||
}
|
||||
|
||||
std::vector<std::unique_ptr<zdl::DlSystem::ITensor>> inputTensors(
|
||||
inputTensorNames.size());
|
||||
zdl::DlSystem::TensorMap inputTensorMap;
|
||||
// Load input/output buffers with TensorMap
|
||||
for (int i = 0; i < request->datas_size(); ++i) {
|
||||
auto tensor = request->datas(i);
|
||||
std::vector<float> float_input;
|
||||
LoadFloatData(tensor.data(), float_input);
|
||||
|
||||
const auto& inputShape_opt =
|
||||
snpe->getInputDimensions(tensor.name().c_str());
|
||||
const auto& inputShape = *inputShape_opt;
|
||||
|
||||
fprintf(stdout, "Stage Inference: tensor name: %s input data len %lu, [",
|
||||
inputTensorNames.at(i), float_input.size());
|
||||
for (int j = 0; j < inputShape.rank(); ++j) {
|
||||
fprintf(stdout, " %ld,", inputShape[j]);
|
||||
}
|
||||
fprintf(stdout, "]\n");
|
||||
|
||||
inputTensors[i] =
|
||||
zdl::SNPE::SNPEFactory::getTensorFactory().createTensor(inputShape);
|
||||
std::copy(float_input.begin(), float_input.end(), inputTensors[i]->begin());
|
||||
|
||||
inputTensorMap.add(tensor.name().c_str(), inputTensors[i].get());
|
||||
}
|
||||
|
||||
// A tensor map for SNPE execution outputs
|
||||
zdl::DlSystem::TensorMap outputTensorMap;
|
||||
// Execute the multiple input tensorMap on the model with SNPE
|
||||
bool success = snpe->execute(inputTensorMap, outputTensorMap);
|
||||
if (!success) {
|
||||
// build output status
|
||||
response->set_status(-3);
|
||||
response->set_info(zdl::DlSystem::getLastErrorString());
|
||||
return Status::OK;
|
||||
}
|
||||
|
||||
// build output tensor list
|
||||
{
|
||||
auto out_names = outputTensorMap.getTensorNames();
|
||||
for (size_t i = 0; i < out_names.size(); ++i) {
|
||||
const char* name = out_names.at(i);
|
||||
zdl::DlSystem::ITensor* pTensor = outputTensorMap.getTensor(name);
|
||||
|
||||
size_t data_length = pTensor->getSize();
|
||||
|
||||
std::string result;
|
||||
result.resize(sizeof(float) * data_length);
|
||||
int j = 0;
|
||||
for (auto it = pTensor->cbegin(); it != pTensor->cend();
|
||||
++it, j += sizeof(float)) {
|
||||
float f = *it;
|
||||
memcpy(&result[0] + j, reinterpret_cast<char*>(&f), sizeof(float));
|
||||
}
|
||||
|
||||
::mmdeploy::Tensor* pData = response->add_datas();
|
||||
pData->set_dtype("float32");
|
||||
pData->set_name(name);
|
||||
pData->set_data(result);
|
||||
}
|
||||
}
|
||||
|
||||
// build output status
|
||||
response->set_status(0);
|
||||
response->set_info("Stage Inference: success");
|
||||
return Status::OK;
|
||||
}
|
||||
|
||||
::grpc::Status InferenceServiceImpl::Destroy(::grpc::ServerContext* context,
|
||||
const ::mmdeploy::Empty* request,
|
||||
::mmdeploy::Reply* response) {
|
||||
snpe.reset();
|
||||
container.reset();
|
||||
response->set_status(0);
|
||||
return Status::OK;
|
||||
}
|
|
@ -0,0 +1,80 @@
|
|||
// Copyright (c) OpenMMLab. All rights reserved.
|
||||
|
||||
#ifndef SERVICE_IMPL_H
|
||||
#define SERVICE_IMPL_H
|
||||
|
||||
#include <iostream>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
|
||||
#include "DiagLog/IDiagLog.hpp"
|
||||
#include "DlContainer/IDlContainer.hpp"
|
||||
#include "DlSystem/DlEnums.hpp"
|
||||
#include "DlSystem/DlError.hpp"
|
||||
#include "DlSystem/ITensorFactory.hpp"
|
||||
#include "DlSystem/IUserBuffer.hpp"
|
||||
#include "DlSystem/PlatformConfig.hpp"
|
||||
#include "DlSystem/RuntimeList.hpp"
|
||||
#include "DlSystem/UserBufferMap.hpp"
|
||||
#include "SNPE/SNPE.hpp"
|
||||
#include "SNPE/SNPEBuilder.hpp"
|
||||
#include "SNPE/SNPEFactory.hpp"
|
||||
#include "inference.grpc.pb.h"
|
||||
|
||||
#include <grpcpp/ext/proto_server_reflection_plugin.h>
|
||||
#include <grpcpp/grpcpp.h>
|
||||
#include <grpcpp/health_check_service_interface.h>
|
||||
|
||||
using grpc::Server;
|
||||
using grpc::ServerBuilder;
|
||||
using grpc::ServerContext;
|
||||
using grpc::Status;
|
||||
|
||||
using mmdeploy::Empty;
|
||||
using mmdeploy::Inference;
|
||||
using mmdeploy::Model;
|
||||
using mmdeploy::Reply;
|
||||
using mmdeploy::Tensor;
|
||||
using mmdeploy::TensorList;
|
||||
|
||||
// Logic and data behind the server's behavior.
|
||||
class InferenceServiceImpl final : public Inference::Service {
|
||||
::grpc::Status Echo(::grpc::ServerContext* context,
|
||||
const ::mmdeploy::Empty* request,
|
||||
::mmdeploy::Reply* response) override;
|
||||
|
||||
// Init Model with model file
|
||||
::grpc::Status Init(::grpc::ServerContext* context,
|
||||
const ::mmdeploy::Model* request,
|
||||
::mmdeploy::Reply* response) override;
|
||||
// Get output names
|
||||
::grpc::Status OutputNames(::grpc::ServerContext* context,
|
||||
const ::mmdeploy::Empty* request,
|
||||
::mmdeploy::Names* response) override;
|
||||
// Inference with inputs
|
||||
::grpc::Status Inference(::grpc::ServerContext* context,
|
||||
const ::mmdeploy::TensorList* request,
|
||||
::mmdeploy::Reply* response) override;
|
||||
// Destory handle
|
||||
::grpc::Status Destroy(::grpc::ServerContext* context,
|
||||
const ::mmdeploy::Empty* request,
|
||||
::mmdeploy::Reply* response) override;
|
||||
|
||||
std::string SaveDLC(const ::mmdeploy::Model* request);
|
||||
|
||||
void LoadFloatData(const std::string& data, std::vector<float>& vec);
|
||||
|
||||
zdl::DlSystem::Runtime_t CheckRuntime(zdl::DlSystem::Runtime_t runtime,
|
||||
bool& staticQuantization);
|
||||
|
||||
std::unique_ptr<zdl::SNPE::SNPE> SetBuilderOptions(
|
||||
std::unique_ptr<zdl::DlContainer::IDlContainer>& container,
|
||||
zdl::DlSystem::Runtime_t runtime, zdl::DlSystem::RuntimeList runtimeList,
|
||||
bool useUserSuppliedBuffers, zdl::DlSystem::PlatformConfig platformConfig,
|
||||
bool useCaching);
|
||||
|
||||
std::unique_ptr<zdl::SNPE::SNPE> snpe;
|
||||
std::unique_ptr<zdl::DlContainer::IDlContainer> container;
|
||||
};
|
||||
|
||||
#endif
|
|
@ -0,0 +1,17 @@
|
|||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
|
||||
project(mmdeploy_ncnn_net)
|
||||
|
||||
if ("cpu" IN_LIST MMDEPLOY_TARGET_DEVICES)
|
||||
|
||||
find_package(ncnn REQUIRED)
|
||||
|
||||
mmdeploy_add_module(${PROJECT_NAME} snpe_net.cpp)
|
||||
target_link_libraries(${PROJECT_NAME} PRIVATE mmdeploy_ncnn_ops_obj)
|
||||
target_link_libraries(${PROJECT_NAME} PRIVATE ncnn)
|
||||
add_library(mmdeploy::ncnn_net ALIAS ${PROJECT_NAME})
|
||||
else ()
|
||||
message(
|
||||
ERROR
|
||||
"'snpe_net' is NOT supported in target devices: ${MMDEPLOY_TARGET_DEVICES}")
|
||||
endif ()
|
|
@ -0,0 +1,142 @@
|
|||
// Copyright (c) OpenMMLab. All rights reserved.
|
||||
|
||||
#include "ncnn_net.h"
|
||||
|
||||
#include "mmdeploy/core/logger.h"
|
||||
#include "mmdeploy/core/model.h"
|
||||
#include "mmdeploy/core/utils/formatter.h"
|
||||
|
||||
namespace mmdeploy {
|
||||
|
||||
SNPENet::~SNPENet() {}
|
||||
|
||||
Result<void> SNPENet::Init(const Value& args) {
|
||||
auto& context = args["context"];
|
||||
device_ = context["device"].get<Device>();
|
||||
stream_ = context["stream"].get<Stream>();
|
||||
if (!device_.is_host()) {
|
||||
return Status(eNotSupported);
|
||||
}
|
||||
|
||||
auto name = args["name"].get<std::string>();
|
||||
auto model = context["model"].get<Model>();
|
||||
OUTCOME_TRY(auto config, model.GetModelConfig(name));
|
||||
|
||||
container_ = zdl::DlContainer::IDlContainer::open(zdl::DlSystem::String(config.net));
|
||||
if (container_ == nullptr) {
|
||||
MMDEPLOY_ERROR("Load .dlc failed: {}", config.net);
|
||||
}
|
||||
|
||||
zdl::DlSystem::Runtime_t runtime = zdl::DlSystem::Runtime_t::GPU;
|
||||
if (!zdl::SNPE::SNPEFactory::isRuntimeAvailable(runtime)) {
|
||||
MMDEPLOY_WARN("Selected runtime not present. Falling back to CPU.\n");
|
||||
runtime = zdl::DlSystem::Runtime_t::CPU;
|
||||
}
|
||||
|
||||
zdl::DlSystem::RuntimeList runtimeList;
|
||||
runtimeList.add(runtime);
|
||||
zdl::DlSystem::PlatformConfig platformConfig;
|
||||
snpe_ = SetBuilderOptions(container_, runtime, runtimeList, false,
|
||||
platformConfig, false);
|
||||
|
||||
// init internal input tensor list
|
||||
const auto& inputTensorNamesRef = snpe->getInputTensorNames();
|
||||
const auto& inputTensorNames = *inputTensorNamesRef;
|
||||
inputs_internal_.resize(inputTensorNames.size());
|
||||
|
||||
for (int i = 0; i < inputTensorNames.size(); ++i) {
|
||||
const auto& inputShape_opt = snpe->getInputDimensions(inputTensorNames.at(i));
|
||||
const auto& inputShape = *inputShape_opt;
|
||||
|
||||
inputs_internal_[i] = zdl::SNPE::SNPEFactory::getTensorFactory().createTensor(inputShape);
|
||||
input_tensor_map_.add(tensor.name().c_str(), inputTensors[i].get());
|
||||
}
|
||||
|
||||
return success();
|
||||
}
|
||||
|
||||
Result<void> SNPENet::Deinit() {
|
||||
input_tensor_map_.clear();
|
||||
container_.reset();
|
||||
snpe_.reset();
|
||||
return success();
|
||||
}
|
||||
|
||||
Result<void> SNPENet::Reshape(Span<TensorShape> input_shapes) {
|
||||
for (size_t i = 0; i < input_shapes.size(); ++i) {
|
||||
input_tensors_[i].Reshape(input_shapes[i]);
|
||||
}
|
||||
return success();
|
||||
}
|
||||
|
||||
Result<Span<Tensor>> SNPENet::GetInputTensors() { return input_tensors_; }
|
||||
|
||||
Result<Span<Tensor>> SNPENet::GetOutputTensors() { return output_tensors_; }
|
||||
|
||||
Result<void> SNPENet::Forward() {
|
||||
OUTCOME_TRY(stream_.Wait());
|
||||
|
||||
const int LEN = inputs_internal_.size();
|
||||
for (int i = 0; i < LEN; ++i) {
|
||||
float *from = input_tensors_[i].data<float>();
|
||||
std::vector<float> vec = {from, from + inpute_tensors_[i].size()};
|
||||
std::copy(vec.begin(), vec.end(), input_tensors[i]->begin());
|
||||
}
|
||||
|
||||
bool success = snpe->execute(inputTensorMap, outputTensorMap);
|
||||
if (! success) {
|
||||
MMDEPLOY_ERROR("snpe Inference error: {}", std::string(zdl::DlSystem::getLastErrorString()));
|
||||
}
|
||||
|
||||
// extract result
|
||||
auto out_names = outputTensorMap.getTensorNames();
|
||||
for (size_t i = 0; i < out_names.size(); ++i) {
|
||||
const char* name = out_names.at(i);
|
||||
zdl::DlSystem::ITensor* pTensor = outputTensorMap.getTensor(name);
|
||||
|
||||
size_t data_size = sizeof(float) * pTensor->getSize();
|
||||
|
||||
auto& tensor = output_tensors_[i];
|
||||
auto& shape = pTensor->getShape();
|
||||
switch (shape.rank())
|
||||
{
|
||||
case 1:
|
||||
tensor.Reshape({shape[0]}):
|
||||
break;
|
||||
case 2:
|
||||
tensor.Reshape({shape[0], shape[1]}):
|
||||
break;
|
||||
case 3:
|
||||
tensor.Reshape({shape[0], shape[1], shape[2]}):
|
||||
break;
|
||||
case 4:
|
||||
tensor.Reshape({shape[0], shape[1], shape[2], shape[3]}):
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
float* to = tensor.data<float>();
|
||||
int j = 0;
|
||||
for (auto it = pTensor->cbegin(); it != pTensor->cend(); ++it, ++j) {
|
||||
to[j] = *it;
|
||||
}
|
||||
}
|
||||
|
||||
class SNPENetCreator : public Creator<Net> {
|
||||
public:
|
||||
const char* GetName() const override { return "snpe"; }
|
||||
int GetVersion() const override { return 0; }
|
||||
std::unique_ptr<Net> Create(const Value& args) override {
|
||||
auto p = std::make_unique<SNPENet>();
|
||||
if (auto r = p->Init(args)) {
|
||||
return p;
|
||||
} else {
|
||||
MMDEPLOY_ERROR("error creating SNPENet: {}", r.error().message().c_str());
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
REGISTER_MODULE(Net, SNPENetCreator);
|
||||
|
||||
} // namespace mmdeploy
|
|
@ -0,0 +1,38 @@
|
|||
// Copyright (c) OpenMMLab. All rights reserved.
|
||||
|
||||
#ifndef MMDEPLOY_SRC_NET_NCNN_NCNN_NET_H_
|
||||
#define MMDEPLOY_SRC_NET_NCNN_NCNN_NET_H_
|
||||
|
||||
#include "mmdeploy/core/net.h"
|
||||
#include "SNPE/SNPE.hpp"
|
||||
#include "DlContainer/IDlContainer.hpp"
|
||||
|
||||
namespace mmdeploy {
|
||||
|
||||
class SNPENet : public Net {
|
||||
public:
|
||||
~SNPENet() override;
|
||||
Result<void> Init(const Value& args) override;
|
||||
Result<void> Deinit() override;
|
||||
Result<Span<Tensor>> GetInputTensors() override;
|
||||
Result<Span<Tensor>> GetOutputTensors() override;
|
||||
Result<void> Reshape(Span<TensorShape> input_shapes) override;
|
||||
Result<void> Forward() override;
|
||||
Result<void> ForwardAsync(Event* event) override { return Status(eNotSupported); };
|
||||
|
||||
private:
|
||||
Device device_;
|
||||
Stream stream_;
|
||||
std::vector<Tensor> input_tensors_;
|
||||
std::vector<Tensor> output_tensors_;
|
||||
|
||||
std::unique_ptr<zdl::SNPE::SNPE> snpe_;
|
||||
std::unique_ptr<zdl::DlContainer::IDlContainer> container_;
|
||||
|
||||
std::vector<std::unique_ptr<zdl::DlSystem::ITensor>> inputs_internal_;
|
||||
zdl::DlSystem::TensorMap input_tensor_map_;
|
||||
};
|
||||
|
||||
} // namespace mmdeploy
|
||||
|
||||
#endif // MMDEPLOY_SRC_NET_NCNN_NCNN_NET_H_
|
|
@ -1,112 +0,0 @@
|
|||
# Linux 交叉编译 android gRPC
|
||||
|
||||
mmdeploy 已提供 prebuilt snpe inference server,如果你想自己编译、或需要对 .proto 接口做修改,可参考此文档。
|
||||
|
||||
注意 gRPC 官方文档并没有对 NDK 的完整支持。
|
||||
|
||||
## 环境说明
|
||||
|
||||
| 项目 | 版本 | 备注 |
|
||||
| ------ | ----- | ------ |
|
||||
| snpe | 1.63.0.3523 | - |
|
||||
| host OS | ubuntu18.04 | snpe1.63.0 文档指定版本 |
|
||||
| NDK | r17c | snpe1.63.0 文档指定版本 |
|
||||
| gRPC | commit 6f698b5 | - |
|
||||
| 硬件设备 | 红米 K40 | 需要 qcom 芯片 |
|
||||
|
||||
## NDK 交叉编译 gRPC
|
||||
1. 拉取 gRPC repo, 在 host 上编译出 `protoc` 和 `grpc_cpp_plugin`
|
||||
|
||||
```bash
|
||||
# 安装依赖
|
||||
$ apt-get update && apt-get install -y libssl-dev
|
||||
# 编译
|
||||
$ git clone https://github.com/grpc/grpc --recursive=1 --depth=1
|
||||
$ mkdir -p cmake/build
|
||||
$ pushd cmake/build
|
||||
|
||||
$ cmake \
|
||||
-DCMAKE_BUILD_TYPE=Release \
|
||||
-DgRPC_INSTALL=ON \
|
||||
-DgRPC_BUILD_TESTS=OFF \
|
||||
-DgRPC_SSL_PROVIDER=package \
|
||||
../..
|
||||
# 需要安装到 host 环境
|
||||
$ make -j
|
||||
$ sudo make install
|
||||
```
|
||||
|
||||
2. 下载 NDK,交叉编译 android aarch64 所需静态库
|
||||
```bash
|
||||
$ wget https://dl.google.com/android/repository/android-ndk-r17c-linux-x86_64.zip
|
||||
$ unzip android-ndk-r17c-linux-x86_64.zip
|
||||
|
||||
# 设置环境变量
|
||||
$ export ANDROID_NDK=/path/to/android-ndk-r17c
|
||||
|
||||
# 编译
|
||||
$ cd /path/to/grpc
|
||||
$ mkdir -p cmake/build_aarch64 && pushd cmake/build_aarch64
|
||||
|
||||
$ cmake ../.. \
|
||||
-DCMAKE_TOOLCHAIN_FILE=${ANDROID_NDK}/build/cmake/android.toolchain.cmake \
|
||||
-DANDROID_ABI=arm64-v8a \
|
||||
-DANDROID_PLATFORM=android-26 \
|
||||
-DANDROID_STL=c++_static \
|
||||
-DRUN_HAVE_STD_REGEX=0 \
|
||||
-DRUN_HAVE_POSIX_REGEX=0 \
|
||||
-DRUN_HAVE_STEADY_CLOCK=0 \
|
||||
-DCMAKE_BUILD_TYPE=Release \
|
||||
-DCMAKE_INSTALL_PREFIX=/tmp/android_grpc_install
|
||||
$ make -j
|
||||
$ make install
|
||||
```
|
||||
|
||||
3. install 结束后,`/tmp/android_grpc_install` 应有完整的安装文件
|
||||
```bash
|
||||
$ cd /tmp/android_grpc_install
|
||||
$ tree -L 1
|
||||
.
|
||||
├── bin
|
||||
├── include
|
||||
├── lib
|
||||
└── share
|
||||
```
|
||||
|
||||
## 测试 gRPC
|
||||
1. 编译 gRPC 自带的 helloworld
|
||||
```bash
|
||||
$ cd /path/to/grpc/examples/cpp/helloworld/
|
||||
$ mkdir cmake/build_aarch64 -p && pushd cmake/build_aarch64
|
||||
|
||||
$ cmake ../.. \
|
||||
-DCMAKE_TOOLCHAIN_FILE=${ANDROID_NDK}/build/cmake/android.toolchain.cmake \
|
||||
-DANDROID_ABI=arm64-v8a \
|
||||
-DANDROID_PLATFORM=android-26 \
|
||||
-DANDROID_STL=c++_static \
|
||||
-DRUN_HAVE_STD_REGEX=0 \
|
||||
-DRUN_HAVE_POSIX_REGEX=0 \
|
||||
-DRUN_HAVE_STEADY_CLOCK=0 \
|
||||
-DCMAKE_BUILD_TYPE=Release \
|
||||
-Dabsl_DIR=/tmp/android_grpc_install/lib/cmake/absl \
|
||||
-DProtobuf_DIR=/tmp/android_grpc_install/lib/cmake/protobuf \
|
||||
-DgRPC_DIR=/tmp/android_grpc_install/lib/cmake/grpc
|
||||
$ make -j
|
||||
$ ls greeter*
|
||||
greeter_async_client greeter_async_server greeter_callback_server greeter_server
|
||||
greeter_async_client2 greeter_callback_client greeter_client
|
||||
```
|
||||
2. 打卡手机的 adb 调试模式,push 编译结果到 `/data/local/tmp` 目录
|
||||
```bash
|
||||
$ adb push greeter* /data/local/tmp
|
||||
```
|
||||
3. `adb shell` 进手机,执行 client/server
|
||||
```bash
|
||||
/data/local/tmp $ ./greeter_client
|
||||
Greeter received: Hello world
|
||||
```
|
||||
|
||||
## 参考文档
|
||||
|
||||
* gRPC cross build script https://raw.githubusercontent.com/grpc/grpc/master/test/distrib/cpp/run_distrib_test_cmake_aarch64_cross.sh
|
||||
* stackoverflow https://stackoverflow.com/questions/54052229/build-grpc-c-for-android-using-ndk-arm-linux-androideabi-clang-compiler
|
|
@ -42,3 +42,4 @@ git clone -b master git@github.com:open-mmlab/mmdeploy.git --recursive
|
|||
- [Windows](windows.md)
|
||||
- [Android-aarch64](android.md)
|
||||
- [NVIDIA Jetson](jetsons.md)
|
||||
- [snpe](snpe.md)
|
||||
|
|
|
@ -1 +1,39 @@
|
|||
##
|
||||
# 支持 SNPE
|
||||
|
||||
mmdeploy 集成 snpe 的方式简单且有效: Client/Server 模式。
|
||||
|
||||
这种模式
|
||||
|
||||
1. 能剥离`模型转换`和`推理`环境:
|
||||
|
||||
- 推理无关事项在算力更高的设备上完成;
|
||||
- 推理计算,能拿到 gpu/npu 真实结果,而非 CPU 模拟器数值。
|
||||
|
||||
2. 能覆盖到成本敏感的设备。armv7/risc-v/mips 芯片满足产品需求,但往往对 Python 支持有限;
|
||||
|
||||
3. 能简化 mmdeploy 安装步骤。如果只想转 .dlc 模型测试精度,不需要接触 snpe tutorial。
|
||||
|
||||
## 一、部署推理服务
|
||||
|
||||
下载编译好的 snpe inference server 包, `adb push` 到手机,执行。
|
||||
|
||||
```bash
|
||||
$ wget https://media.githubusercontent.com/media/tpoisonooo/mmdeploy-onnx2ncnn-testdata/main/snpe-inference-server-1.59.zip
|
||||
$ unzip snpe-inference-server-1.59.zip
|
||||
$ adb push snpe-inference-server-1.59 /data/local/tmp/
|
||||
```
|
||||
|
||||
如果需要自己编译,可参照 [NDK 交叉编译 snpe inference sever](../06-appendix/cross-build-ndk-gRPC.md) 。
|
||||
|
||||
## 二、安装 mmdeploy
|
||||
|
||||
1. 环境要求
|
||||
|
||||
| 事项 | 版本 | 备注 |
|
||||
| ------- | ------------------ | ----------------- |
|
||||
| host OS | ubuntu18.04 x86_64 | snpe 工具指定版本 |
|
||||
| Python | 3.6.0 | snpe 工具指定版本 |
|
||||
|
||||
## 三、测试模型
|
||||
|
||||
## 四、编译 SDK
|
||||
|
|
|
@ -0,0 +1,158 @@
|
|||
# Ubuntu18.04 交叉编译 NDK snpe service
|
||||
|
||||
mmdeploy 已提供 prebuilt snpe inference server,如果你想自己编译、或需要对 .proto 接口做修改,可参考此文档。
|
||||
|
||||
注意 gRPC 官方文档并没有对 NDK 的完整支持。
|
||||
|
||||
## 一、环境说明
|
||||
|
||||
| 项目 | 版本 | 备注 |
|
||||
| -------- | -------------- | ------------------------------------- |
|
||||
| snpe | 1.59 | 1.60 使用 clang-8.0,可能导致兼容问题 |
|
||||
| host OS | ubuntu18.04 | snpe1.59 指定版本 |
|
||||
| NDK | r17c | snpe1.59 指定版本 |
|
||||
| gRPC | commit 6f698b5 | - |
|
||||
| 硬件设备 | 红米 K40 | 需要 qcom 芯片 |
|
||||
|
||||
## 二、NDK 交叉编译 gRPC
|
||||
|
||||
1. 拉取 gRPC repo, 在 host 上编译出 `protoc` 和 `grpc_cpp_plugin`
|
||||
|
||||
```bash
|
||||
# 安装依赖
|
||||
$ apt-get update && apt-get install -y libssl-dev
|
||||
# 编译
|
||||
$ git clone https://github.com/grpc/grpc --recursive=1 --depth=1
|
||||
$ mkdir -p cmake/build
|
||||
$ pushd cmake/build
|
||||
|
||||
$ cmake \
|
||||
-DCMAKE_BUILD_TYPE=Release \
|
||||
-DgRPC_INSTALL=ON \
|
||||
-DgRPC_BUILD_TESTS=OFF \
|
||||
-DgRPC_SSL_PROVIDER=package \
|
||||
../..
|
||||
# 需要安装到 host 环境
|
||||
$ make -j
|
||||
$ sudo make install
|
||||
```
|
||||
|
||||
2. 下载 NDK,交叉编译 android aarch64 所需静态库
|
||||
|
||||
```bash
|
||||
$ wget https://dl.google.com/android/repository/android-ndk-r17c-linux-x86_64.zip
|
||||
$ unzip android-ndk-r17c-linux-x86_64.zip
|
||||
|
||||
# 设置环境变量
|
||||
$ export ANDROID_NDK=/path/to/android-ndk-r17c
|
||||
|
||||
# 编译
|
||||
$ cd /path/to/grpc
|
||||
$ mkdir -p cmake/build_aarch64 && pushd cmake/build_aarch64
|
||||
|
||||
$ cmake ../.. \
|
||||
-DCMAKE_TOOLCHAIN_FILE=${ANDROID_NDK}/build/cmake/android.toolchain.cmake \
|
||||
-DANDROID_ABI=arm64-v8a \
|
||||
-DANDROID_PLATFORM=android-26 \
|
||||
-DANDROID_TOOLCHAIN=clang \
|
||||
-DANDROID_STL=c++_shared \
|
||||
-DCMAKE_BUILD_TYPE=Release \
|
||||
-DCMAKE_INSTALL_PREFIX=/tmp/android_grpc_install_shared
|
||||
|
||||
$ make -j
|
||||
$ make install
|
||||
```
|
||||
|
||||
3. 此时 `/tmp/android_grpc_install` 应有完整的安装文件
|
||||
|
||||
```bash
|
||||
$ cd /tmp/android_grpc_install
|
||||
$ tree -L 1
|
||||
.
|
||||
├── bin
|
||||
├── include
|
||||
├── lib
|
||||
└── share
|
||||
```
|
||||
|
||||
## 三、【可跳过】自测 NDK gRPC 是否正常
|
||||
|
||||
1. 编译 gRPC 自带的 helloworld
|
||||
|
||||
```bash
|
||||
$ cd /path/to/grpc/examples/cpp/helloworld/
|
||||
$ mkdir cmake/build_aarch64 -p && pushd cmake/build_aarch64
|
||||
|
||||
$ cmake ../.. \
|
||||
-DCMAKE_TOOLCHAIN_FILE=${ANDROID_NDK}/build/cmake/android.toolchain.cmake \
|
||||
-DANDROID_ABI=arm64-v8a \
|
||||
-DANDROID_PLATFORM=android-26 \
|
||||
-DANDROID_STL=c++_shared \
|
||||
-DANDROID_TOOLCHAIN=clang \
|
||||
-DCMAKE_BUILD_TYPE=Release \
|
||||
-Dabsl_DIR=/tmp/android_grpc_install_shared/lib/cmake/absl \
|
||||
-DProtobuf_DIR=/tmp/android_grpc_install_shared/lib/cmake/protobuf \
|
||||
-DgRPC_DIR=/tmp/android_grpc_install_shared/lib/cmake/grpc
|
||||
|
||||
$ make -j
|
||||
$ ls greeter*
|
||||
greeter_async_client greeter_async_server greeter_callback_server greeter_server
|
||||
greeter_async_client2 greeter_callback_client greeter_client
|
||||
```
|
||||
|
||||
2. 打开手机调试模式,push 编译结果到 `/data/local/tmp` 目录
|
||||
|
||||
tips:对于国产手机,设置 - 版本号,点击 7 次可进入开发者模式,然后才能打开 USB 调试
|
||||
|
||||
```bash
|
||||
$ adb push greeter* /data/local/tmp
|
||||
```
|
||||
|
||||
3. `adb shell` 进手机,执行 client/server
|
||||
|
||||
```bash
|
||||
/data/local/tmp $ ./greeter_client
|
||||
Greeter received: Hello world
|
||||
```
|
||||
|
||||
## 四、交叉编译 snpe service
|
||||
|
||||
1. 打开 [snpe tools 官网](https://developer.qualcomm.com/software/qualcomm-neural-processing-sdk/tools),下载 1.59 版本。 解压并设置环境变量
|
||||
|
||||
**注意 snpe 1.60 开始使用 `clang-8.0`,可能导致旧设备与 `libc++_shared.so` 不兼容。**
|
||||
|
||||
```bash
|
||||
$ export SNPE_ROOT=/path/to/snpe-1.59.0.3230
|
||||
```
|
||||
|
||||
2. 打开 mmdeploy snpe server 目录,使用交叉编译 gRPC 时的选项
|
||||
|
||||
```bash
|
||||
$ cd /path/to/mmdeploy
|
||||
$ cd csrc/mmdeploy/backend_ops/snpe/service
|
||||
|
||||
$ mkdir -p build && cd build
|
||||
$ export ANDROID_NDK=/path/to/android-ndk-r17c
|
||||
$ cmake .. \
|
||||
-DCMAKE_TOOLCHAIN_FILE=${ANDROID_NDK}/build/cmake/android.toolchain.cmake \
|
||||
-DANDROID_ABI=arm64-v8a \
|
||||
-DANDROID_PLATFORM=android-26 \
|
||||
-DANDROID_STL=c++_shared \
|
||||
-DANDROID_TOOLCHAIN=clang \
|
||||
-DCMAKE_BUILD_TYPE=Release \
|
||||
-Dabsl_DIR=/tmp/android_grpc_install_shared/lib/cmake/absl \
|
||||
-DProtobuf_DIR=/tmp/android_grpc_install_shared/lib/cmake/protobuf \
|
||||
-DgRPC_DIR=/tmp/android_grpc_install_shared/lib/cmake/grpc
|
||||
|
||||
$ make -j
|
||||
$ file inference_server
|
||||
inference_server: ELF 64-bit LSB shared object, ARM aarch64, version 1 (SYSV), dynamically linked, interpreter /system/bin/linker64, BuildID[sha1]=252aa04e2b982681603dacb74b571be2851176d2, with debug_info, not stripped
|
||||
```
|
||||
|
||||
最终可得到 `infernece_server`,`adb push` 到设备上即可执行。
|
||||
|
||||
## 参考文档
|
||||
|
||||
- snpe tutorial https://developer.qualcomm.com/sites/default/files/docs/snpe/cplus_plus_tutorial.html
|
||||
- gRPC cross build script https://raw.githubusercontent.com/grpc/grpc/master/test/distrib/cpp/run_distrib_test_cmake_aarch64_cross.sh
|
||||
- stackoverflow https://stackoverflow.com/questions/54052229/build-grpc-c-for-android-using-ndk-arm-linux-androideabi-clang-compiler
|
|
@ -0,0 +1,15 @@
|
|||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
from mmdeploy.backend.snpe import from_onnx as _from_onnx
|
||||
from mmdeploy.backend.snpe import is_available
|
||||
from ..core import PIPELINE_MANAGER
|
||||
|
||||
from_onnx = PIPELINE_MANAGER.register_pipeline()(_from_onnx)
|
||||
|
||||
__all__ = ['is_available', 'from_onnx']
|
||||
|
||||
if is_available():
|
||||
try:
|
||||
from mmdeploy.backend.snpe.onnx2dlc import get_output_model_file
|
||||
__all__ += ['get_output_model_file']
|
||||
except Exception:
|
||||
pass
|
|
@ -0,0 +1,27 @@
|
|||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
import os.path as osp
|
||||
|
||||
from .init_plugins import get_onnx2dlc_path
|
||||
from .onnx2dlc import from_onnx
|
||||
|
||||
def is_available():
|
||||
"""Check whether ncnn and snpe-onnx-to-dlc tool are installed.
|
||||
|
||||
Returns:
|
||||
bool: True if snpe-onnx-to-dlc tool are installed.
|
||||
"""
|
||||
|
||||
onnx2dlc = get_onnx2dlc_path()
|
||||
return osp.exists(onnx2dlc)
|
||||
|
||||
|
||||
__all__ = ['from_onnx']
|
||||
|
||||
if is_available():
|
||||
try:
|
||||
from .wrapper import SNPEWrapper
|
||||
|
||||
__all__ += ['SNPEWrapper']
|
||||
except Exception as e:
|
||||
print(e)
|
||||
pass
|
|
@ -0,0 +1,11 @@
|
|||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
import shutil
|
||||
|
||||
|
||||
def get_onnx2dlc_path() -> str:
|
||||
"""Get snpe-onnx-to-dlc path.
|
||||
|
||||
Returns:
|
||||
str: A path of snpe-onnx-to-dlc tool.
|
||||
"""
|
||||
return shutil.which('snpe-onnx-to-dlc')
|
|
@ -0,0 +1,69 @@
|
|||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
import os
|
||||
import os.path as osp
|
||||
import tempfile
|
||||
from subprocess import call
|
||||
from typing import List, Optional, Union
|
||||
|
||||
import onnx
|
||||
|
||||
from .init_plugins import get_onnx2dlc_path
|
||||
|
||||
|
||||
def mkdir_or_exist(dir_name, mode=0o777):
|
||||
if dir_name == '':
|
||||
return
|
||||
dir_name = osp.expanduser(dir_name)
|
||||
os.makedirs(dir_name, mode=mode, exist_ok=True)
|
||||
|
||||
|
||||
def get_output_model_file(onnx_path: str,
|
||||
work_dir: Optional[str] = None) -> List[str]:
|
||||
"""Returns the path to the .dlc file with export result.
|
||||
|
||||
Args:
|
||||
onnx_path (str): The path to the onnx model.
|
||||
work_dir (str|None): The path to the directory for saving the results.
|
||||
Defaults to `None`, which means use the directory of onnx_path.
|
||||
|
||||
Returns:
|
||||
List[str]: The path to the files where the export result will be
|
||||
located.
|
||||
"""
|
||||
if work_dir is None:
|
||||
work_dir = osp.dirname(onnx_path)
|
||||
mkdir_or_exist(osp.abspath(work_dir))
|
||||
file_name = osp.splitext(osp.split(onnx_path)[1])[0]
|
||||
save_dlc = osp.join(work_dir, file_name + '.dlc')
|
||||
return save_dlc
|
||||
|
||||
|
||||
def from_onnx(onnx_model: Union[onnx.ModelProto, str],
|
||||
output_file_prefix: str):
|
||||
"""Convert ONNX to dlc.
|
||||
|
||||
We need to use a executable program to convert the `.onnx` file to a `.dlc`
|
||||
|
||||
Example:
|
||||
>>> from mmdeploy.apis.snpe import from_onnx
|
||||
>>> onnx_path = 'work_dir/end2end.onnx'
|
||||
>>> output_file_prefix = 'work_dir/end2end'
|
||||
>>> from_onnx(onnx_path, output_file_prefix)
|
||||
|
||||
Args:
|
||||
onnx_path (ModelProto|str): The path of the onnx model.
|
||||
output_file_prefix (str): The path to save the output .dlc file.
|
||||
"""
|
||||
|
||||
if not isinstance(onnx_model, str):
|
||||
onnx_path = tempfile.NamedTemporaryFile(suffix='.onnx').name
|
||||
onnx.save(onnx_model, onnx_path)
|
||||
else:
|
||||
onnx_path = onnx_model
|
||||
|
||||
save_dlc = output_file_prefix + '.dlc'
|
||||
|
||||
onnx2dlc = get_onnx2dlc_path()
|
||||
ret_code = call(
|
||||
[onnx2dlc, '--input_network', onnx_path, '--output', save_dlc])
|
||||
assert ret_code == 0, 'onnx2dlc failed'
|
|
@ -0,0 +1,140 @@
|
|||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
import os
|
||||
import sys
|
||||
from typing import Dict, Optional, Sequence
|
||||
|
||||
import grpc
|
||||
# import mmdeploy.backend.snpe.inference_pb2
|
||||
# import mmdeploy.backend.snpe.inference_pb2_grpc
|
||||
import inference_pb2
|
||||
import inference_pb2_grpc
|
||||
import numpy as np
|
||||
import torch
|
||||
|
||||
from mmdeploy.utils import Backend, get_root_logger
|
||||
from mmdeploy.utils.timer import TimeCounter
|
||||
from ..base import BACKEND_WRAPPER, BaseWrapper
|
||||
|
||||
|
||||
@BACKEND_WRAPPER.register_module(Backend.SNPE.value)
|
||||
class SNPEWrapper(BaseWrapper):
|
||||
"""ncnn wrapper class for inference.
|
||||
|
||||
Args:
|
||||
dlc_file (str): Path of a weight file.
|
||||
output_names (Sequence[str] | None): Names of model outputs in order.
|
||||
Defaults to `None` and the wrapper will load the output names from
|
||||
snpe model.
|
||||
|
||||
Examples:
|
||||
>>> from mmdeploy.backend.snpe import SNPEWrapper
|
||||
>>> import torch
|
||||
>>>
|
||||
>>> snple_file = 'alexnet.dlc'
|
||||
>>> model = SNPEWrapper(snpe_file)
|
||||
>>> inputs = dict(input=torch.randn(1, 3, 224, 224))
|
||||
>>> outputs = model(inputs)
|
||||
>>> print(outputs)
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
dlc_file: str,
|
||||
output_names: Optional[Sequence[str]] = None,
|
||||
**kwargs):
|
||||
|
||||
logger = get_root_logger()
|
||||
|
||||
# The maximum model file size is 512MB
|
||||
MAX_SIZE = 2 << 29
|
||||
uri = os.environ['__MMDEPLOY_GRPC_URI']
|
||||
logger.info(f'fetch uri: {uri}')
|
||||
self.channel = grpc.insecure_channel(
|
||||
uri,
|
||||
options=(('grpc.GRPC_ARG_KEEPALIVE_TIME_MS',
|
||||
2000), ('grpc.max_send_message_length', MAX_SIZE),
|
||||
('grpc.keepalive_permit_without_calls', 1)))
|
||||
|
||||
weights = bytes()
|
||||
filesize = os.stat(dlc_file).st_size
|
||||
|
||||
logger.info(f'reading local model file {dlc_file}')
|
||||
# with open(dlc_file, 'rb') as f:
|
||||
# weights = f.read(filesize)
|
||||
|
||||
stub = inference_pb2_grpc.InferenceStub(self.channel)
|
||||
logger.info(f'init remote SNPE engine with RPC, please wait...')
|
||||
model = inference_pb2.Model(name=dlc_file, weights=weights, device=1)
|
||||
resp = stub.Init(model)
|
||||
|
||||
if resp.status != 0:
|
||||
logger.error(f'init SNPE model failed {resp.info}')
|
||||
return
|
||||
|
||||
output = stub.OutputNames(inference_pb2.Empty())
|
||||
output_names = output.names
|
||||
|
||||
super().__init__(output_names)
|
||||
logger.info(f'init success, outputs {output_names}')
|
||||
|
||||
# def __del__(self):
|
||||
# stub = inference_pb2_grpc.InferenceStub(self.channel)
|
||||
# stub.Destroy()
|
||||
|
||||
def forward(self, inputs: Dict[str,
|
||||
torch.Tensor]) -> Dict[str, torch.Tensor]:
|
||||
"""Run forward inference.
|
||||
|
||||
Args:
|
||||
inputs (Dict[str, torch.Tensor]): Key-value pairs of model inputs.
|
||||
|
||||
Returns:
|
||||
Dict[str, torch.Tensor]: Key-value pairs of model outputs.
|
||||
"""
|
||||
input_list = list(inputs.values())
|
||||
device_type = input_list[0].device.type
|
||||
|
||||
logger = get_root_logger()
|
||||
|
||||
# build `list` inputs for remote snpe engine
|
||||
snpe_inputs = []
|
||||
for name, input_tensor in inputs.items():
|
||||
data = input_tensor.contiguous()
|
||||
data = data.detach().cpu().numpy()
|
||||
if data.dtype != np.float32:
|
||||
logger.error('SNPE now only support fp32 input')
|
||||
data = data.astype(dtype=np.float32)
|
||||
tensor = inference_pb2.Tensor(
|
||||
data=data.tobytes(), name=name, dtype='float32')
|
||||
|
||||
snpe_inputs.append(tensor)
|
||||
|
||||
return self.__snpe_execute(
|
||||
inference_pb2.TensorList(datas=snpe_inputs), device_type)
|
||||
|
||||
@TimeCounter.count_time()
|
||||
def __snpe_execute(self, tensorList: inference_pb2.TensorList,
|
||||
device: str) -> Dict[str, torch.tensor]:
|
||||
"""Run inference with snpe remote inference engine.
|
||||
|
||||
Args:
|
||||
tensorList (inference_pb2.TensorList): snpe input tensor.
|
||||
|
||||
Returns:
|
||||
dict[str, torch.tensor]: Inference results of snpe model.
|
||||
"""
|
||||
stub = inference_pb2_grpc.InferenceStub(self.channel)
|
||||
resp = stub.Inference(tensorList)
|
||||
|
||||
result = dict()
|
||||
if resp.status == 0:
|
||||
for tensor in resp.datas:
|
||||
ndarray = np.frombuffer(tensor.data, dtype=np.float32)
|
||||
import pdb
|
||||
pdb.set_trace()
|
||||
result[tensor.name] = torch.from_numpy(
|
||||
ndarray.copy()).to(device)
|
||||
else:
|
||||
logger = get_root_logger()
|
||||
logger.error(f'snpe inference failed {resp.info}')
|
||||
|
||||
return result
|
|
@ -106,6 +106,10 @@ class BaseBackendModel(torch.nn.Module, metaclass=ABCMeta):
|
|||
model=backend_files[0],
|
||||
input_names=input_names,
|
||||
output_names=output_names)
|
||||
elif backend == Backend.SNPE:
|
||||
from mmdeploy.backend.snpe import SNPEWrapper
|
||||
return SNPEWrapper(
|
||||
dlc_file=backend_files[0], output_names=output_names)
|
||||
else:
|
||||
raise NotImplementedError(f'Unknown backend type: {backend.value}')
|
||||
|
||||
|
|
|
@ -55,6 +55,7 @@ class Backend(AdvancedEnum):
|
|||
ONNXRUNTIME = 'onnxruntime'
|
||||
PPLNN = 'pplnn'
|
||||
NCNN = 'ncnn'
|
||||
SNPE = 'snpe'
|
||||
OPENVINO = 'openvino'
|
||||
SDK = 'sdk'
|
||||
TORCHSCRIPT = 'torchscript'
|
||||
|
|
|
@ -7,3 +7,4 @@ mmrazor>=0.3.0
|
|||
mmsegmentation
|
||||
onnxruntime>=1.8.0
|
||||
openvino-dev
|
||||
grpcio
|
||||
|
|
|
@ -41,6 +41,9 @@ def check_backend():
|
|||
import mmdeploy.apis.openvino as openvino_apis
|
||||
logger.info(f'openvino_is_avaliable: {openvino_apis.is_available()}')
|
||||
|
||||
import mmdeploy.apis.snpe as snpe_apis
|
||||
logger.info(f'snpe_is_available: {snpe_apis.is_available()}')
|
||||
|
||||
|
||||
def check_codebase():
|
||||
codebase_versions = get_codebase_version()
|
||||
|
|
|
@ -54,6 +54,11 @@ def parse_args():
|
|||
help='Image directory for quantize model.')
|
||||
parser.add_argument(
|
||||
'--quant', action='store_true', help='Quantize model to low bit.')
|
||||
parser.add_argument(
|
||||
'--uri',
|
||||
action='store_true',
|
||||
default='10.1.80.67:50051',
|
||||
help='Remote ipv4:port or ipv6:port for inference on edge device.')
|
||||
args = parser.parse_args()
|
||||
return args
|
||||
|
||||
|
@ -266,6 +271,28 @@ def main():
|
|||
else:
|
||||
backend_files += [model_param_path, model_bin_path]
|
||||
|
||||
elif backend == Backend.SNPE:
|
||||
os.environ['__MMDEPLOY_GRPC_URI'] = args.uri
|
||||
from mmdeploy.apis.snpe import is_available as is_available
|
||||
|
||||
if not is_available():
|
||||
logger.error('snpe support is not available, please check \
|
||||
1) `snpe-onnx-to-dlc` existed in `PATH` 2) snpe only support ubuntu18.04'
|
||||
)
|
||||
exit(1)
|
||||
|
||||
import mmdeploy.apis.snpe as snpe_api
|
||||
from mmdeploy.apis.snpe import get_output_model_file
|
||||
|
||||
PIPELINE_MANAGER.set_log_level(log_level, [snpe_api.from_onnx])
|
||||
|
||||
backend_files = []
|
||||
for onnx_path in ir_files:
|
||||
dlc_path = get_output_model_file(onnx_path, args.work_dir)
|
||||
onnx_name = osp.splitext(osp.split(onnx_path)[1])[0]
|
||||
snpe_api.from_onnx(onnx_path, osp.join(args.work_dir, onnx_name))
|
||||
backend_files = [dlc_path]
|
||||
|
||||
elif backend == Backend.OPENVINO:
|
||||
from mmdeploy.apis.openvino import \
|
||||
is_available as is_available_openvino
|
||||
|
@ -332,17 +359,19 @@ def main():
|
|||
# for headless installation.
|
||||
if not headless:
|
||||
# visualize model of the backend
|
||||
create_process(
|
||||
f'visualize {backend.value} model',
|
||||
target=visualize_model,
|
||||
args=(model_cfg_path, deploy_cfg_path, backend_files,
|
||||
args.test_img, args.device),
|
||||
kwargs=dict(
|
||||
backend=backend,
|
||||
output_file=osp.join(args.work_dir,
|
||||
f'output_{backend.value}.jpg'),
|
||||
show_result=args.show),
|
||||
ret_value=ret_value)
|
||||
visualize_model(model_cfg_path, deploy_cfg_path, backend_files,
|
||||
args.test_img, args.device)
|
||||
# create_process(
|
||||
# f'visualize {backend.value} model',
|
||||
# target=visualize_model,
|
||||
# args=(model_cfg_path, deploy_cfg_path, backend_files,
|
||||
# args.test_img, args.device),
|
||||
# kwargs=dict(
|
||||
# backend=backend,
|
||||
# output_file=osp.join(args.work_dir,
|
||||
# f'output_{backend.value}.jpg'),
|
||||
# show_result=args.show),
|
||||
# ret_value=ret_value)
|
||||
|
||||
# visualize pytorch model
|
||||
create_process(
|
||||
|
|
Loading…
Reference in New Issue