docs/html/StandardGpuResources_8cpp_source.html

 /**

  * Copyright (c) 2015-present, Facebook, Inc.

  * All rights reserved.

  *

  * This source code is licensed under the CC-by-NC license found in the

  * LICENSE file in the root directory of this source tree.

  */


 // Copyright 2004-present Facebook. All Rights Reserved.


 #include "StandardGpuResources.h"

 #include "../FaissAssert.h"


 namespace faiss { namespace gpu {


 namespace {


 constexpr int kNumStreams = 2;


 /// Use 18% of GPU memory for temporary space by default

 constexpr float kDefaultTempMemFraction = 0.18f;


 /// Default pinned memory allocation size

 constexpr size_t kDefaultPinnedMemoryAllocation = (size_t) 256 * 1024 * 1024;


 }


 StandardGpuResources::StandardGpuResources() :

     pinnedMemAlloc_(nullptr),

     pinnedMemAllocSize_(0),

     tempMemFraction_(kDefaultTempMemFraction),

     tempMemSize_(0),

     useFraction_(true),

     pinnedMemSize_(kDefaultPinnedMemoryAllocation) {

 }


 StandardGpuResources::~StandardGpuResources() {

   for (auto& entry : defaultStreams_) {

     DeviceScope scope(entry.first);


     CUDA_VERIFY(cudaStreamDestroy(entry.second));

   }


   for (auto& entry : alternateStreams_) {

     DeviceScope scope(entry.first);


     for (auto stream : entry.second) {

       CUDA_VERIFY(cudaStreamDestroy(stream));

     }

   }


   for (auto& entry : asyncCopyStreams_) {

     DeviceScope scope(entry.first);


     CUDA_VERIFY(cudaStreamDestroy(entry.second));

   }


   for (auto& entry : blasHandles_) {

     DeviceScope scope(entry.first);


     auto blasStatus = cublasDestroy(entry.second);

     FAISS_ASSERT(blasStatus == CUBLAS_STATUS_SUCCESS);

   }


   if (pinnedMemAlloc_) {

     CUDA_VERIFY(cudaFreeHost(pinnedMemAlloc_));

   }

 }


 void

 StandardGpuResources::noTempMemory() {

   setTempMemory(0);

 }


 void

 StandardGpuResources::setTempMemory(size_t size) {

   useFraction_ = false;

   tempMemSize_ = size;

 }


 void

 StandardGpuResources::setTempMemoryFraction(float fraction) {

   FAISS_ASSERT(fraction >= 0.0f && fraction <= 0.5f);

   useFraction_ = true;

   tempMemFraction_ = fraction;

 }


 void

 StandardGpuResources::setPinnedMemory(size_t size) {

   // Should not call this after devices have been initialized

   FAISS_ASSERT(defaultStreams_.size() == 0);

   FAISS_ASSERT(!pinnedMemAlloc_);


   pinnedMemSize_ = size;

 }


 void

 StandardGpuResources::initializeForDevice(int device) {

   // Use default streams as a marker for whether or not a certain

   // device has been initialized

   if (defaultStreams_.count(device) != 0) {

     return;

   }


   // If this is the first device that we're initializing, create our

   // pinned memory allocation

   if (defaultStreams_.empty() && pinnedMemSize_ > 0) {

     CUDA_VERIFY(cudaHostAlloc(&pinnedMemAlloc_,

                               pinnedMemSize_,

                               cudaHostAllocDefault));

     pinnedMemAllocSize_ = pinnedMemSize_;

   }


   FAISS_ASSERT(device < getNumDevices());

   DeviceScope scope(device);


   // Make sure that device properties for all devices are cached

   auto& prop = getDeviceProperties(device);


   // Also check to make sure we meet our minimum compute capability (3.5)

   FAISS_ASSERT(prop.major > 3 || (prop.major == 3 && prop.minor >= 5) ||

                !"Device not supported, need 3.5+ compute capability");


   // Create streams

   cudaStream_t defaultStream = 0;

   CUDA_VERIFY(cudaStreamCreateWithFlags(&defaultStream,

                                         cudaStreamNonBlocking));


   defaultStreams_[device] = defaultStream;


   cudaStream_t asyncCopyStream = 0;

   CUDA_VERIFY(cudaStreamCreateWithFlags(&asyncCopyStream,

                                         cudaStreamNonBlocking));


   asyncCopyStreams_[device] = asyncCopyStream;


   std::vector<cudaStream_t> deviceStreams;

   for (int j = 0; j < kNumStreams; ++j) {

     cudaStream_t stream = 0;

     CUDA_VERIFY(cudaStreamCreateWithFlags(&stream,

                                           cudaStreamNonBlocking));


     deviceStreams.push_back(stream);

   }


   alternateStreams_[device] = std::move(deviceStreams);


   // Create cuBLAS handle

   cublasHandle_t blasHandle = 0;

   auto blasStatus = cublasCreate(&blasHandle);

   FAISS_ASSERT(blasStatus == CUBLAS_STATUS_SUCCESS);

   blasHandles_[device] = blasHandle;


   size_t toAlloc = 0;

   if (useFraction_) {

     size_t devFree = 0;

     size_t devTotal = 0;


     CUDA_VERIFY(cudaMemGetInfo(&devFree, &devTotal));


     toAlloc = (size_t) (tempMemFraction_ * devTotal);

   } else {

     toAlloc = tempMemSize_;

   }


   FAISS_ASSERT(memory_.count(device) == 0);

   memory_.emplace(device,

                   std::unique_ptr<StackDeviceMemory>(

                     new StackDeviceMemory(device, toAlloc)));

 }


 cublasHandle_t

 StandardGpuResources::getBlasHandle(int device) {

   initializeForDevice(device);

   return blasHandles_[device];

 }


 cudaStream_t

 StandardGpuResources::getDefaultStream(int device) {

   initializeForDevice(device);

   return defaultStreams_[device];

 }


 std::vector<cudaStream_t>

 StandardGpuResources::getAlternateStreams(int device) {

   initializeForDevice(device);

   return alternateStreams_[device];

 }


 DeviceMemory& StandardGpuResources::getMemoryManager(int device) {

   initializeForDevice(device);

   return *memory_[device];

 }


 std::pair<void*, size_t>

 StandardGpuResources::getPinnedMemory() {

   return std::make_pair(pinnedMemAlloc_, pinnedMemAllocSize_);

 }


 cudaStream_t

 StandardGpuResources::getAsyncCopyStream(int device) {

   initializeForDevice(device);

   return asyncCopyStreams_[device];

 }


 } } // namespace

faiss::gpu::StandardGpuResources::setTempMemoryFraction
void setTempMemoryFraction(float fraction)
Definition: StandardGpuResources.cpp:83

faiss::gpu::StandardGpuResources::noTempMemory
void noTempMemory()
Definition: StandardGpuResources.cpp:72

faiss::gpu::StandardGpuResources::initializeForDevice
void initializeForDevice(int device) override
Internal system calls.
Definition: StandardGpuResources.cpp:99

faiss::gpu::StandardGpuResources::setTempMemory
void setTempMemory(size_t size)
Definition: StandardGpuResources.cpp:77

faiss::gpu::StandardGpuResources::setPinnedMemory
void setPinnedMemory(size_t size)
Definition: StandardGpuResources.cpp:90

faiss::gpu::DeviceScope
Definition: DeviceUtils.h:55

faiss::gpu::StackDeviceMemory
Definition: StackDeviceMemory.h:23