// Copyright (c) OpenMMLab. All rights reserved. #ifndef MMDEPLOY_SRC_DEVICE_CUDA_DEFAULT_ALLOCATOR_H_ #define MMDEPLOY_SRC_DEVICE_CUDA_DEFAULT_ALLOCATOR_H_ #include #include #include #include "core/logger.h" namespace mmdeploy::cuda { class DefaultAllocator { public: DefaultAllocator() = default; ~DefaultAllocator() { MMDEPLOY_ERROR("=== CUDA Default Allocator ==="); MMDEPLOY_ERROR(" Allocation: count={}, size={}MB, time={}ms", alloc_count_, alloc_size_ / (1024 * 1024.f), alloc_time_ / 1000000.f); MMDEPLOY_ERROR("Deallocation: count={}, size={}MB, time={}ms", dealloc_count_, dealloc_size_ / (1024 * 1024.f), dealloc_time_ / 1000000.f); } [[nodiscard]] void* Allocate(std::size_t n) { void* p{}; auto t0 = std::chrono::high_resolution_clock::now(); auto ret = cudaMalloc(&p, n); auto t1 = std::chrono::high_resolution_clock::now(); alloc_time_ += (int64_t)std::chrono::duration(t1 - t0).count(); if (ret != cudaSuccess) { MMDEPLOY_ERROR("error allocating cuda memory: {}", cudaGetErrorString(ret)); return nullptr; } alloc_count_ += 1; alloc_size_ += n; return p; } void Deallocate(void* p, std::size_t n) { (void)n; auto t0 = std::chrono::high_resolution_clock::now(); auto ret = cudaFree(p); auto t1 = std::chrono::high_resolution_clock::now(); dealloc_time_ += (int64_t)std::chrono::duration(t1 - t0).count(); if (ret != cudaSuccess) { MMDEPLOY_ERROR("error deallocating cuda memory: {}", cudaGetErrorString(ret)); return; } dealloc_count_ += 1; dealloc_size_ += n; } private: std::atomic alloc_count_; std::atomic alloc_size_; std::atomic alloc_time_; std::atomic dealloc_count_; std::atomic dealloc_size_; std::atomic dealloc_time_; }; inline DefaultAllocator& gDefaultAllocator() { static DefaultAllocator v; return v; } } // namespace mmdeploy::cuda #endif // MMDEPLOY_SRC_DEVICE_CUDA_DEFAULT_ALLOCATOR_H_