/** * Copyright (c) 2015-present, Facebook, Inc. * All rights reserved. * * This source code is licensed under the CC-by-NC license found in the * LICENSE file in the root directory of this source tree. */ // Copyright 2004-present Facebook. All Rights Reserved. #include "IVFBase.cuh" #include "../GpuResources.h" #include "FlatIndex.cuh" #include "InvertedListAppend.cuh" #include "RemapIndices.h" #include "../utils/DeviceDefs.cuh" #include "../utils/DeviceUtils.h" #include "../utils/HostTensor.cuh" #include #include #include namespace faiss { namespace gpu { IVFBase::IVFBase(GpuResources* resources, FlatIndex* quantizer, int bytesPerVector, IndicesOptions indicesOptions) : resources_(resources), quantizer_(quantizer), bytesPerVector_(bytesPerVector), indicesOptions_(indicesOptions), dim_(quantizer->getDim()), numLists_(quantizer->getSize()), maxListLength_(0) { reset(); } IVFBase::~IVFBase() { } void IVFBase::reserveMemory(size_t numVecs) { size_t vecsPerList = numVecs / deviceListData_.size(); if (vecsPerList < 1) { return; } auto stream = resources_->getDefaultStreamCurrentDevice(); size_t bytesPerDataList = vecsPerList * bytesPerVector_; for (auto& list : deviceListData_) { list->reserve(bytesPerDataList, stream); } if ((indicesOptions_ == INDICES_32_BIT) || (indicesOptions_ == INDICES_64_BIT)) { // Reserve for index lists as well size_t bytesPerIndexList = vecsPerList * (indicesOptions_ == INDICES_32_BIT ? sizeof(int) : sizeof(long)); for (auto& list : deviceListIndices_) { list->reserve(bytesPerIndexList, stream); } } // Update device info for all lists, since the base pointers may // have changed updateDeviceListInfo_(stream); } void IVFBase::reset() { deviceListData_.clear(); deviceListIndices_.clear(); deviceListDataPointers_.clear(); deviceListIndexPointers_.clear(); deviceListLengths_.clear(); for (size_t i = 0; i < numLists_; ++i) { deviceListData_.emplace_back( std::unique_ptr>( new DeviceVector())); deviceListIndices_.emplace_back( std::unique_ptr>( new DeviceVector())); listOffsetToUserIndex_.emplace_back(std::vector()); } deviceListDataPointers_.resize(numLists_, nullptr); deviceListIndexPointers_.resize(numLists_, nullptr); deviceListLengths_.resize(numLists_, 0); maxListLength_ = 0; } int IVFBase::getDim() const { return dim_; } size_t IVFBase::reclaimMemory() { // Reclaim all unused memory exactly return reclaimMemory_(true); } size_t IVFBase::reclaimMemory_(bool exact) { auto stream = resources_->getDefaultStreamCurrentDevice(); size_t totalReclaimed = 0; for (int i = 0; i < deviceListData_.size(); ++i) { auto& data = deviceListData_[i]; totalReclaimed += data->reclaim(exact, stream); deviceListDataPointers_[i] = data->data(); } for (int i = 0; i < deviceListIndices_.size(); ++i) { auto& indices = deviceListIndices_[i]; totalReclaimed += indices->reclaim(exact, stream); deviceListIndexPointers_[i] = indices->data(); } // Update device info for all lists, since the base pointers may // have changed updateDeviceListInfo_(stream); return totalReclaimed; } void IVFBase::updateDeviceListInfo_(cudaStream_t stream) { std::vector listIds(deviceListData_.size()); for (int i = 0; i < deviceListData_.size(); ++i) { listIds[i] = i; } updateDeviceListInfo_(listIds, stream); } void IVFBase::updateDeviceListInfo_(const std::vector& listIds, cudaStream_t stream) { auto& mem = resources_->getMemoryManagerCurrentDevice(); HostTensor hostListsToUpdate({(int) listIds.size()}); HostTensor hostNewListLength({(int) listIds.size()}); HostTensor hostNewDataPointers({(int) listIds.size()}); HostTensor hostNewIndexPointers({(int) listIds.size()}); for (int i = 0; i < listIds.size(); ++i) { auto listId = listIds[i]; auto& data = deviceListData_[listId]; auto& indices = deviceListIndices_[listId]; hostListsToUpdate[i] = listId; hostNewListLength[i] = data->size() / bytesPerVector_; hostNewDataPointers[i] = data->data(); hostNewIndexPointers[i] = indices->data(); } // Copy the above update sets to the GPU DeviceTensor listsToUpdate( mem, hostListsToUpdate, stream); DeviceTensor newListLength( mem, hostNewListLength, stream); DeviceTensor newDataPointers( mem, hostNewDataPointers, stream); DeviceTensor newIndexPointers( mem, hostNewIndexPointers, stream); // Update all pointers to the lists on the device that may have // changed runUpdateListPointers(listsToUpdate, newListLength, newDataPointers, newIndexPointers, deviceListLengths_, deviceListDataPointers_, deviceListIndexPointers_, stream); } size_t IVFBase::getNumLists() const { return numLists_; } int IVFBase::getListLength(int listId) const { FAISS_ASSERT(listId < deviceListLengths_.size()); return deviceListLengths_[listId]; } std::vector IVFBase::getListIndices(int listId) const { FAISS_ASSERT(listId < numLists_); if (indicesOptions_ == INDICES_32_BIT) { FAISS_ASSERT(listId < deviceListIndices_.size()); auto intInd = deviceListIndices_[listId]->copyToHost( resources_->getDefaultStreamCurrentDevice()); std::vector out(intInd.size()); for (size_t i = 0; i < intInd.size(); ++i) { out[i] = (long) intInd[i]; } return out; } else if (indicesOptions_ == INDICES_64_BIT) { FAISS_ASSERT(listId < deviceListIndices_.size()); return deviceListIndices_[listId]->copyToHost( resources_->getDefaultStreamCurrentDevice()); } else if (indicesOptions_ == INDICES_CPU) { FAISS_ASSERT(listId < deviceListData_.size()); FAISS_ASSERT(listId < listOffsetToUserIndex_.size()); auto& userIds = listOffsetToUserIndex_[listId]; FAISS_ASSERT(userIds.size() == deviceListData_[listId]->size() / bytesPerVector_); // this will return a copy return userIds; } else { // unhandled indices type (includes INDICES_IVF) FAISS_ASSERT(false); return std::vector(); } } void IVFBase::addIndicesFromCpu_(int listId, const long* indices, size_t numVecs) { auto stream = resources_->getDefaultStreamCurrentDevice(); auto& listIndices = deviceListIndices_[listId]; auto prevIndicesData = listIndices->data(); if (indicesOptions_ == INDICES_32_BIT) { // Make sure that all indices are in bounds std::vector indices32(numVecs); for (size_t i = 0; i < numVecs; ++i) { auto ind = indices[i]; FAISS_ASSERT(ind <= (long) std::numeric_limits::max()); indices32[i] = (int) ind; } listIndices->append((unsigned char*) indices32.data(), numVecs * sizeof(int), stream, true /* exact reserved size */); } else if (indicesOptions_ == INDICES_64_BIT) { listIndices->append((unsigned char*) indices, numVecs * sizeof(long), stream, true /* exact reserved size */); } else if (indicesOptions_ == INDICES_CPU) { // indices are stored on the CPU FAISS_ASSERT(listId < listOffsetToUserIndex_.size()); auto& userIndices = listOffsetToUserIndex_[listId]; userIndices.insert(userIndices.begin(), indices, indices + numVecs); } else { // indices are not stored FAISS_ASSERT(indicesOptions_ == INDICES_IVF); } if (prevIndicesData != listIndices->data()) { deviceListIndexPointers_[listId] = listIndices->data(); } } } } // namespace