mmdeploy/csrc/device/cpu/cpu_device.h
lvhan028 36124f6205
Merge sdk (#251)
* check in cmake

* move backend_ops to csrc/backend_ops

* check in preprocess, model, some codebase and their c-apis

* check in CMakeLists.txt

* check in parts of test_csrc

* commit everything else

* add readme

* update core's BUILD_INTERFACE directory

* skip codespell on third_party

* update trt_net and ort_net's CMakeLists

* ignore clion's build directory

* check in pybind11

* add onnx.proto. Remove MMDeploy's dependency on ncnn's source code

* export MMDeployTargets only when MMDEPLOY_BUILD_SDK is ON

* remove useless message

* target include directory is wrong

* change target name from mmdeploy_ppl_net to mmdeploy_pplnn_net

* skip install directory

* update project's cmake

* remove useless code

* set CMAKE_BUILD_TYPE to Release by force if it isn't set by user

* update custom ops CMakeLists

* pass object target's source lists

* fix lint end-of-file

* fix lint: trailing whitespace

* fix codespell hook

* remove bicubic_interpolate to csrc/backend_ops/

* set MMDEPLOY_BUILD_SDK OFF

* change custom ops build command

* add spdlog installation command

* update docs on how to checkout pybind11

* move bicubic_interpolate to backend_ops/tensorrt directory

* remove useless code

* correct cmake

* fix typo

* fix typo

* fix install directory

* correct sdk's readme

* set cub dir when cuda version < 11.0

* change directory where clang-format will apply to

* fix build command

* add .clang-format

* change clang-format style from google to file

* reformat csrc/backend_ops

* format sdk's code

* turn off clang-format for some files

* add -Xcompiler=-fno-gnu-unique

* fix trt topk initialize

* check in config for sdk demo

* update cmake script and csrc's readme

* correct config's path

* add cuda include directory, otherwise compile failed in case of tensorrt8.2

* clang-format onnx2ncnn.cpp

Co-authored-by: zhangli <lzhang329@gmail.com>
Co-authored-by: grimoire <yaoqian@sensetime.com>
2021-12-07 10:57:55 +08:00

150 lines
3.7 KiB
C++

// Copyright (c) OpenMMLab. All rights reserved.
#include <condition_variable>
#include <functional>
#include <future>
#include <mutex>
#include <queue>
#include <thread>
#include "core/device_impl.h"
#include "core/types.h"
namespace mmdeploy {
class CpuPlatformImpl : public PlatformImpl {
public:
int GetPlatformId() const noexcept override;
const char* GetPlatformName() const noexcept override;
shared_ptr<BufferImpl> CreateBuffer(Device device) override;
shared_ptr<StreamImpl> CreateStream(Device device) override;
shared_ptr<EventImpl> CreateEvent(Device device) override;
Result<void> Copy(const void* host_ptr, Buffer dst, size_t size, size_t dst_offset,
Stream stream) override;
Result<void> Copy(Buffer src, void* host_ptr, size_t size, size_t src_offset,
Stream stream) override;
Result<void> Copy(Buffer src, Buffer dst, size_t size, size_t src_offset, size_t dst_offset,
Stream stream) override;
Result<Stream> GetDefaultStream(int32_t device_id) override;
Device GetDevice(int device_id) const { return Device(GetPlatformId(), device_id); }
private:
static bool CheckCopyParam(size_t src_size, size_t dst_size, size_t src_offset, size_t dst_offset,
size_t copy_size);
static Result<void> CopyImpl(const void* src, void* dst, size_t src_size, size_t dst_size,
size_t src_offset, size_t dst_offset, size_t size, Stream st);
Stream default_stream_;
std::once_flag init_flag_;
};
CpuPlatformImpl& gCpuPlatform();
class CpuHostMemory;
class CpuBufferImpl : public BufferImpl {
public:
explicit CpuBufferImpl(Device device);
Result<void> Init(size_t size, Allocator allocator, size_t alignment, uint64_t flags) override;
Result<void> Init(size_t size, std::shared_ptr<void> native, uint64_t flags) override;
Result<BufferImplPtr> SubBuffer(size_t offset, size_t size, uint64_t flags) override;
void* GetNative(ErrorCode* ec) override;
Allocator GetAllocator() const override;
size_t GetSize(ErrorCode* ec) override;
private:
std::shared_ptr<CpuHostMemory> memory_;
size_t offset_{0};
size_t size_{0};
};
class CpuStreamImpl : public StreamImpl {
public:
using Task = std::function<void()>;
explicit CpuStreamImpl(Device device);
~CpuStreamImpl() override;
Result<void> Init(uint64_t flags) override;
Result<void> Init(std::shared_ptr<void> native, uint64_t flags) override;
Result<void> Enqueue(Task task);
Result<void> DependsOn(Event& event) override;
Result<void> Query() override;
Result<void> Wait() override;
Result<void> Submit(Kernel& kernel) override;
void* GetNative(ErrorCode* ec) override;
private:
void InternalThreadEntry();
std::mutex mutex_;
std::condition_variable cv_;
std::queue<Task> task_queue_;
std::thread thread_;
Device device_;
bool abort_{false};
};
class CpuEventImpl : public EventImpl {
public:
explicit CpuEventImpl(Device device);
~CpuEventImpl() override = default;
Result<void> Init(uint64_t flags) override;
Result<void> Init(std::shared_ptr<void> native, uint64_t flags) override;
Result<void> Query() override;
Result<void> Record(Stream& stream) override;
Result<void> Wait() override;
void* GetNative(ErrorCode* ec) override;
private:
void Reset();
std::shared_future<void> future_;
std::promise<void> promise_;
};
class CpuKernelImpl : public KernelImpl {
public:
using Task = CpuStreamImpl::Task;
explicit CpuKernelImpl(Device device, Task task) : KernelImpl(device), task_(std::move(task)) {}
void* GetNative(ErrorCode* ec) override {
if (ec) *ec = ErrorCode::eSuccess;
return &task_;
}
private:
Task task_;
};
} // namespace mmdeploy