docs/html/StackDeviceMemory_8cpp_source.html

 /**

  * Copyright (c) 2015-present, Facebook, Inc.

  * All rights reserved.

  *

  * This source code is licensed under the CC-by-NC license found in the

  * LICENSE file in the root directory of this source tree.

  */


 // Copyright 2004-present Facebook. All Rights Reserved.


 #include "StackDeviceMemory.h"

 #include "DeviceUtils.h"

 #include "StaticUtils.h"

 #include "../../FaissAssert.h"

 #include <stdio.h>

 #include <sstream>


 namespace faiss { namespace gpu {


 StackDeviceMemory::Stack::Stack(int d, size_t sz)

     : device_(d),

       isOwner_(true),

       start_(nullptr),

       end_(nullptr),

       size_(sz),

       head_(nullptr),

       mallocCurrent_(0),

       highWaterMemoryUsed_(0),

       highWaterMalloc_(0) {

   DeviceScope s(device_);


   cudaError_t err = cudaMalloc(&start_, size_);

   FAISS_ASSERT(err == cudaSuccess);


   head_ = start_;

   end_ = start_ + size_;

 }


 StackDeviceMemory::Stack::Stack(int d, void* p, size_t sz, bool isOwner)

     : device_(d),

       isOwner_(isOwner),

       start_((char*) p),

       end_(((char*) p) + sz),

       size_(sz),

       head_((char*) p) {

 }


 StackDeviceMemory::Stack::~Stack() {

   if (isOwner_) {

     DeviceScope s(device_);


     cudaError_t err = cudaFree(start_);

     FAISS_ASSERT(err == cudaSuccess);

   }

 }


 size_t

 StackDeviceMemory::Stack::getSizeAvailable() const {

   return (end_ - head_);

 }


 char*

 StackDeviceMemory::Stack::getAlloc(size_t size, cudaStream_t stream) {

   if (size > (end_ - head_)) {

     // Too large for our stack

     DeviceScope s(device_);


     char* p = nullptr;

     auto err = cudaMalloc(&p, size);

     FAISS_ASSERT(err == cudaSuccess);


     mallocCurrent_ += size;

     highWaterMalloc_ = std::max(highWaterMalloc_, mallocCurrent_);


     fprintf(stderr, "WARN: increase temp memory to avoid cudaMalloc, "

             "or decrease query/add size (alloc %zu B, highwater %zu B)\n",

             size, highWaterMalloc_);


     return p;

   } else {

     // We can make the allocation out of our stack

     // Find all the ranges that we overlap that may have been

     // previously allocated; our allocation will be [head, endAlloc)

     char* startAlloc = head_;

     char* endAlloc = head_ + size;


     while (lastUsers_.size() > 0) {

       auto& prevUser = lastUsers_.back();


       // Because there is a previous user, we must overlap it

       FAISS_ASSERT(prevUser.start_ <= endAlloc && prevUser.end_ >= startAlloc);


       if (stream != prevUser.stream_) {

         // Synchronization required

         // FIXME

         FAISS_ASSERT(false);

       }


       if (endAlloc < prevUser.end_) {

         // Update the previous user info

         prevUser.start_ = endAlloc;


         break;

       }


       // If we're the exact size of the previous request, then we

       // don't need to continue

       bool done = (prevUser.end_ == endAlloc);


       lastUsers_.pop_back();


       if (done) {

         break;

       }

     }


     head_ = endAlloc;

     FAISS_ASSERT(head_ <= end_);


     highWaterMemoryUsed_ = std::max(highWaterMemoryUsed_,

                                     (size_t) (head_ - start_));

     return startAlloc;

   }

 }


 void

 StackDeviceMemory::Stack::returnAlloc(char* p,

                                       size_t size,

                                       cudaStream_t stream) {

   if (p < start_ || p >= end_) {

     // This is not on our stack; it was a one-off allocation

     DeviceScope s(device_);


     auto err = cudaFree(p);

     FAISS_ASSERT(err == cudaSuccess);


     FAISS_ASSERT(mallocCurrent_ >= size);

     mallocCurrent_ -= size;

   } else {

     // This is on our stack

     // Allocations should be freed in the reverse order they are made

     FAISS_ASSERT(p + size == head_);


     head_ = p;

     lastUsers_.push_back(Range(p, p + size, stream));

    }

 }


 std::string

 StackDeviceMemory::Stack::toString() const {

   std::stringstream s;


   s << "SDM device " << device_ << ": Total memory " << size_ << " ["

     << (void*) start_ << ", " << (void*) end_ << ")\n";

   s << "     Available memory " << (size_t) (end_ - head_)

     << " [" << (void*) head_ << ", " << (void*) end_ << ")\n";

   s << "     High water temp alloc " << highWaterMemoryUsed_ << "\n";

   s << "     High water cudaMalloc " << highWaterMalloc_ << "\n";


   int i = lastUsers_.size();

   for (auto it = lastUsers_.rbegin(); it != lastUsers_.rend(); ++it) {

     s << i-- << ": size " << (size_t) (it->end_ - it->start_)

       << " stream " << it->stream_

       << " [" << (void*) it->start_ << ", " << (void*) it->end_ << ")\n";

   }


   return s.str();

 }


 size_t

 StackDeviceMemory::Stack::getHighWaterCudaMalloc() const {

   return highWaterMalloc_;

 }


 StackDeviceMemory::StackDeviceMemory(int device, size_t allocPerDevice)

     : device_(device),

       stack_(device, allocPerDevice) {

 }


 StackDeviceMemory::StackDeviceMemory(int device,

                                      void* p, size_t size, bool isOwner)

     : device_(device),

       stack_(device, p, size, isOwner) {

 }


 StackDeviceMemory::~StackDeviceMemory() {

 }


 int

 StackDeviceMemory::getDevice() const {

   return device_;

 }


 DeviceMemoryReservation

 StackDeviceMemory::getMemory(cudaStream_t stream, size_t size) {

   // We guarantee 16 byte alignment for allocations, so bump up `size`

   // to the next highest multiple of 16

   size = utils::roundUp(size, (size_t) 16);


   return DeviceMemoryReservation(this,

                                  device_,

                                  stack_.getAlloc(size, stream),

                                  size,

                                  stream);

 }


 size_t

 StackDeviceMemory::getSizeAvailable() const {

   return stack_.getSizeAvailable();

 }


 std::string

 StackDeviceMemory::toString() const {

   return stack_.toString();

 }


 size_t

 StackDeviceMemory::getHighWaterCudaMalloc() const {

   return stack_.getHighWaterCudaMalloc();

 }


 void

 StackDeviceMemory::returnAllocation(DeviceMemoryReservation& m) {

   FAISS_ASSERT(m.get());

   FAISS_ASSERT(device_ == m.device());


   stack_.returnAlloc((char*) m.get(), m.size(), m.stream());

 }


 } } // namespace

faiss::gpu::StackDeviceMemory::getMemory
DeviceMemoryReservation getMemory(cudaStream_t stream, size_t size) override
Definition: StackDeviceMemory.cpp:196

faiss::gpu::StackDeviceMemory::Stack::getHighWaterCudaMalloc
size_t getHighWaterCudaMalloc() const
Returns the high-water mark of cudaMalloc activity.
Definition: StackDeviceMemory.cpp:172

faiss::gpu::StackDeviceMemory::Stack::size_
size_t size_
Total size end_ - start_.
Definition: StackDeviceMemory.h:97

faiss::gpu::StackDeviceMemory::Stack::Stack
Stack(int device, size_t size)
Constructor that allocates memory via cudaMalloc.
Definition: StackDeviceMemory.cpp:21

faiss::gpu::StackDeviceMemory::stack_
Stack stack_
Memory stack.
Definition: StackDeviceMemory.h:122

faiss::gpu::StackDeviceMemory::Stack::returnAlloc
void returnAlloc(char *p, size_t size, cudaStream_t stream)
Returns an allocation.
Definition: StackDeviceMemory.cpp:128

faiss::gpu::StackDeviceMemory::Stack::head_
char * head_
Stack head within [start, end)
Definition: StackDeviceMemory.h:100

faiss::gpu::StackDeviceMemory::getSizeAvailable
size_t getSizeAvailable() const override
Returns the current size available without calling cudaMalloc.
Definition: StackDeviceMemory.cpp:209

faiss::gpu::StackDeviceMemory::Stack::getSizeAvailable
size_t getSizeAvailable() const
Definition: StackDeviceMemory.cpp:59

faiss::gpu::StackDeviceMemory::Stack::device_
int device_
Device this allocation is on.
Definition: StackDeviceMemory.h:86

faiss::gpu::DeviceScope
Definition: DeviceUtils.h:55

faiss::gpu::StackDeviceMemory::toString
std::string toString() const override
Returns a string containing our current memory manager state.
Definition: StackDeviceMemory.cpp:214

faiss::gpu::StackDeviceMemory::Range
Definition: StackDeviceMemory.h:49

faiss::gpu::StackDeviceMemory::Stack::toString
std::string toString() const
Returns the stack state.
Definition: StackDeviceMemory.cpp:151

faiss::gpu::StackDeviceMemory::getHighWaterCudaMalloc
size_t getHighWaterCudaMalloc() const override
Definition: StackDeviceMemory.cpp:219

faiss::gpu::StackDeviceMemory::Stack::getAlloc
char * getAlloc(size_t size, cudaStream_t stream)
Definition: StackDeviceMemory.cpp:64

faiss::gpu::StackDeviceMemory::Stack::start_
char * start_
Definition: StackDeviceMemory.h:93

faiss::gpu::StackDeviceMemory::device_
int device_
Our device.
Definition: StackDeviceMemory.h:119

faiss::gpu::DeviceMemoryReservation
Definition: DeviceMemory.h:21

faiss::gpu::StackDeviceMemory::getDevice
int getDevice() const override
Returns the device we are managing memory for.
Definition: StackDeviceMemory.cpp:191

faiss::gpu::StackDeviceMemory::StackDeviceMemory
StackDeviceMemory(int device, size_t allocPerDevice)
Allocate a new region of memory that we manage.
Definition: StackDeviceMemory.cpp:176