/** * Copyright (c) 2015-present, Facebook, Inc. * All rights reserved. * * This source code is licensed under the CC-by-NC license found in the * LICENSE file in the root directory of this source tree. */ // Copyright 2004-present Facebook. All Rights Reserved. #include "../../FaissAssert.h" #include "DeviceUtils.h" #include namespace faiss { namespace gpu { template class PtrTraits> __host__ __device__ Tensor::Tensor() : data_(nullptr) { static_assert(Dim > 0, "must have > 0 dimensions"); for (int i = 0; i < Dim; ++i) { size_[i] = 0; stride_[i] = (IndexT) 1; } } template class PtrTraits> __host__ __device__ Tensor& Tensor::operator=( Tensor&& t) { data_ = t.data_; t.data_ = nullptr; for (int i = 0; i < Dim; ++i) { stride_[i] = t.stride_[i]; t.stride_[i] = 0; size_[i] = t.size_[i]; t.size_[i] = 0; } return *this; } template class PtrTraits> __host__ __device__ Tensor:: Tensor(DataPtrType data, const IndexT sizes[Dim]) : data_(data) { static_assert(Dim > 0, "must have > 0 dimensions"); for (int i = 0; i < Dim; ++i) { size_[i] = sizes[i]; } stride_[Dim - 1] = (IndexT) 1; for (int i = Dim - 2; i >= 0; --i) { stride_[i] = stride_[i + 1] * sizes[i + 1]; } } template class PtrTraits> __host__ __device__ Tensor:: Tensor(DataPtrType data, std::initializer_list sizes) : data_(data) { assert(sizes.size() == Dim); static_assert(Dim > 0, "must have > 0 dimensions"); int i = 0; for (auto s : sizes) { size_[i++] = s; } stride_[Dim - 1] = (IndexT) 1; for (int j = Dim - 2; j >= 0; --j) { stride_[j] = stride_[j + 1] * size_[j + 1]; } } template class PtrTraits> __host__ __device__ Tensor::Tensor( DataPtrType data, const IndexT sizes[Dim], const IndexT strides[Dim]) : data_(data) { static_assert(Dim > 0, "must have > 0 dimensions"); for (int i = 0; i < Dim; ++i) { size_[i] = sizes[i]; stride_[i] = strides[i]; } } template class PtrTraits> __host__ void Tensor::copyFrom( Tensor& t, cudaStream_t stream) { static_assert(Contig, "only contiguous tensors handled"); // Size must be the same (since dimensions are checked and // continuity is assumed, we need only check total number of // elements FAISS_ASSERT(this->numElements() == t.numElements()); if (t.numElements() > 0) { FAISS_ASSERT(this->data_); FAISS_ASSERT(t.data()); int ourDev = getDeviceForAddress(this->data_); int tDev = getDeviceForAddress(t.data()); if (tDev == -1) { CUDA_VERIFY(cudaMemcpyAsync(this->data_, t.data(), this->getSizeInBytes(), ourDev == -1 ? cudaMemcpyHostToHost : cudaMemcpyHostToDevice, stream)); } else { CUDA_VERIFY(cudaMemcpyAsync(this->data_, t.data(), this->getSizeInBytes(), ourDev == -1 ? cudaMemcpyDeviceToHost : cudaMemcpyDeviceToDevice, stream)); } } } template class PtrTraits> __host__ void Tensor::copyTo( Tensor& t, cudaStream_t stream) { static_assert(Contig, "only contiguous tensors handled"); // Size must be the same (since dimensions are checked and // continuity is assumed, we need only check total number of // elements FAISS_ASSERT(this->numElements() == t.numElements()); if (t.numElements() > 0) { FAISS_ASSERT(this->data_); FAISS_ASSERT(t.data()); int ourDev = getDeviceForAddress(this->data_); int tDev = getDeviceForAddress(t.data()); if (tDev == -1) { CUDA_VERIFY(cudaMemcpyAsync(t.data(), this->data_, this->getSizeInBytes(), ourDev == -1 ? cudaMemcpyHostToHost : cudaMemcpyDeviceToHost, stream)); } else { CUDA_VERIFY(cudaMemcpyAsync(t.data(), this->data_, this->getSizeInBytes(), ourDev == -1 ? cudaMemcpyHostToDevice : cudaMemcpyDeviceToDevice, stream)); } } } template class PtrTraits> template __host__ __device__ bool Tensor::isSame( const Tensor& rhs) const { if (Dim != OtherDim) { return false; } for (int i = 0; i < Dim; ++i) { if (size_[i] != rhs.size_[i]) { return false; } if (!Contig) { if (stride_[i] != rhs.stride_[i]) { return false; } } } return true; } template class PtrTraits> template __host__ __device__ Tensor Tensor::cast() { static_assert(sizeof(U) == sizeof(T), "cast must be to same size object"); return Tensor( reinterpret_cast(data_), size_, stride_); } template class PtrTraits> template __host__ __device__ const Tensor Tensor::cast() const { static_assert(sizeof(U) == sizeof(T), "cast must be to same size object"); return Tensor( reinterpret_cast(data_), size_, stride_); } template class PtrTraits> template __host__ __device__ Tensor Tensor::castResize() { static_assert(sizeof(U) >= sizeof(T), "only handles greater sizes"); constexpr int kMultiple = sizeof(U) / sizeof(T); assert(canCastResize()); IndexT newSize[Dim]; IndexT newStride[Dim]; for (int i = 0; i < Dim - 1; ++i) { newSize[i] = size_[i]; newStride[i] = stride_[i] / kMultiple; } newStride[Dim - 1] = 1; // this is the same as the old stride newSize[Dim - 1] = size_[Dim - 1] / kMultiple; return Tensor( reinterpret_cast(data_), newSize, newStride); } template class PtrTraits> template __host__ __device__ const Tensor Tensor::castResize() const { return const_cast*>(this)-> castResize(); } template class PtrTraits> template __host__ __device__ bool Tensor::canCastResize() const { static_assert(sizeof(U) >= sizeof(T), "only handles greater sizes"); constexpr int kMultiple = sizeof(U) / sizeof(T); // Check all outer strides for (int i = 0; i < Dim - 1; ++i) { if (stride_[i] % kMultiple != 0) { return false; } } // Check inner size if (size_[Dim - 1] % kMultiple != 0) { return false; } if (stride_[Dim - 1] != 1) { return false; } return true; } template class PtrTraits> template __host__ Tensor Tensor::castIndexType() const { if (sizeof(NewIndexT) < sizeof(IndexT)) { assert(this->canCastIndexType()); } NewIndexT newSize[Dim]; NewIndexT newStride[Dim]; for (int i = 0; i < Dim; ++i) { newSize[i] = (NewIndexT) size_[i]; newStride[i] = (NewIndexT) stride_[i]; } return Tensor( data_, newSize, newStride); } template class PtrTraits> template __host__ bool Tensor::canCastIndexType() const { static_assert(sizeof(size_t) >= sizeof(IndexT), "index size too large"); static_assert(sizeof(size_t) >= sizeof(NewIndexT), "new index size too large"); // Find maximum offset that can be calculated // FIXME: maybe also consider offset in bytes? multiply by sizeof(T)? size_t maxOffset = 0; if (Contig) { maxOffset = (size_t) size_[0] * (size_t) stride_[0]; } else { for (int i = 0; i < Dim; ++i) { size_t curMaxOffset = (size_t) size_[i] * (size_t) stride_[i]; if (curMaxOffset > maxOffset) { maxOffset = curMaxOffset; } } } if (maxOffset > (size_t) std::numeric_limits::max()) { return false; } return true; } template class PtrTraits> __host__ __device__ IndexT Tensor::numElements() const { long size = getSize(0); for (int i = 1; i < Dim; ++i) { size *= getSize(i); } return size; } template class PtrTraits> __host__ __device__ bool Tensor::isContiguous() const { long prevSize = 1; for (int i = Dim - 1; i >= 0; --i) { if (getSize(i) != (IndexT) 1) { if (getStride(i) == prevSize) { prevSize *= getSize(i); } else { return false; } } } return true; } template class PtrTraits> __host__ __device__ bool Tensor::isConsistentlySized(int i) const { if (i == 0 && getStride(i) > 0 && getSize(i) > 0) { return true; } else if ((i > 0) && (i < Dim) && (getStride(i) > 0) && ((getStride(i - 1) / getStride(i)) >= getSize(i))) { return true; } return false; } template class PtrTraits> __host__ __device__ bool Tensor::isConsistentlySized() const { for (int i = 0; i < Dim; ++i) { if (!isConsistentlySized(i)) { return false; } } return true; } template class PtrTraits> __host__ __device__ bool Tensor::isContiguousDim(int i) const { return (i == Dim - 1) || // just in case ((i < Dim - 1) && ((getStride(i) / getStride(i + 1)) == getSize(i + 1))); } template class PtrTraits> __host__ __device__ Tensor Tensor::transpose(int dim1, int dim2) const { assert(dim1 >= 0 && dim1 < Dim); assert(dim1 >= 0 && dim2 < Dim); static_assert(!Contig, "cannot transpose contiguous arrays"); IndexT newSize[Dim]; IndexT newStride[Dim]; for (int i = 0; i < Dim; ++i) { newSize[i] = size_[i]; newStride[i] = stride_[i]; } IndexT tmp = newSize[dim1]; newSize[dim1] = newSize[dim2]; newSize[dim2] = tmp; tmp = newStride[dim1]; newStride[dim1] = newStride[dim2]; newStride[dim2] = tmp; return Tensor(data_, newSize, newStride); } template class PtrTraits> template __host__ __device__ Tensor Tensor::upcastOuter() { // Can only create tensors of greater dimension static_assert(NewDim > Dim, "Can only upcast to greater dim"); IndexT newSize[NewDim]; IndexT newStride[NewDim]; int shift = NewDim - Dim; for (int i = 0; i < NewDim; ++i) { if (i < shift) { // These are the extended dimensions newSize[i] = (IndexT) 1; newStride[i] = size_[0] * stride_[0]; } else { // Shift the remaining dimensions newSize[i] = size_[i - shift]; newStride[i] = stride_[i - shift]; } } return Tensor( data_, newSize, newStride); } template class PtrTraits> template __host__ __device__ Tensor Tensor::upcastInner() { // Can only create tensors of greater dimension static_assert(NewDim > Dim, "Can only upcast to greater dim"); IndexT newSize[NewDim]; IndexT newStride[NewDim]; for (int i = 0; i < NewDim; ++i) { if (i < Dim) { // Existing dimensions get copied over newSize[i] = size_[i]; newStride[i] = stride_[i]; } else { // Extended dimensions newSize[i] = (IndexT) 1; newStride[i] = (IndexT) 1; } } return Tensor( data_, newSize, newStride); } template class PtrTraits> template __host__ __device__ Tensor Tensor::downcastOuter() { // Can only create tensors of lesser dimension static_assert(NewDim < Dim, "Can only downcast to lesser dim"); // We can't downcast non-contiguous tensors, since it leaves // garbage data in the tensor. The tensor needs to be contiguous // in all of the dimensions we are collapsing (no padding in // them). for (int i = 0; i < Dim - NewDim; ++i) { bool cont = isContiguousDim(i); assert(cont); } IndexT newSize[NewDim]; IndexT newStride[NewDim]; int ignoredDims = Dim - NewDim; IndexT collapsedSize = 1; for (int i = 0; i < Dim; ++i) { if (i < ignoredDims) { // Collapse these dimensions collapsedSize *= getSize(i); } else { // Non-collapsed dimensions if (i == ignoredDims) { // This is the first non-collapsed dimension newSize[i - ignoredDims] = collapsedSize * getSize(i); } else { // Subsequent non-collapsed dimensions newSize[i - ignoredDims] = getSize(i); } newStride[i - ignoredDims] = getStride(i); } } return Tensor( data_, newSize, newStride); } template class PtrTraits> template __host__ __device__ Tensor Tensor::downcastInner() { // Can only create tensors of lesser dimension static_assert(NewDim < Dim, "Can only downcast to lesser dim"); // We can't downcast non-contiguous tensors, since it leaves // garbage data in the tensor. The tensor needs to be contiguous // in all of the dimensions we are collapsing (no padding in // them). for (int i = NewDim; i < Dim; ++i) { assert(isContiguousDim(i)); } IndexT newSize[NewDim]; IndexT newStride[NewDim]; IndexT collapsedSize = 1; for (int i = Dim - 1; i >= 0; --i) { if (i >= NewDim) { // Collapse these dimensions collapsedSize *= getSize(i); } else { // Non-collapsed dimensions if (i == NewDim - 1) { // This is the first non-collapsed dimension newSize[i] = collapsedSize * getSize(i); newStride[i] = getStride(Dim - 1); } else { // Subsequent non-collapsed dimensions newSize[i] = getSize(i); newStride[i] = getStride(i); } } } return Tensor( data_, newSize, newStride); } template class PtrTraits> template __host__ __device__ Tensor Tensor::view(DataPtrType at) { static_assert(SubDim >= 1 && SubDim < Dim, "can only create view of lesser dim"); IndexT viewSizes[SubDim]; IndexT viewStrides[SubDim]; for (int i = 0; i < SubDim; ++i) { viewSizes[i] = size_[Dim - SubDim + i]; viewStrides[i] = stride_[Dim - SubDim + i]; } return Tensor( at, viewSizes, viewStrides); } template class PtrTraits> template __host__ __device__ Tensor Tensor::view() { return view(data_); } template class PtrTraits> __host__ __device__ Tensor Tensor::narrowOutermost(IndexT start, IndexT size) { DataPtrType newData = data_; if (start > 0) { newData += start * stride_[0]; } IndexT newSize[Dim]; for (int i = 0; i < Dim; ++i) { if (i == 0) { assert(start + size <= size_[0]); newSize[i] = size; } else { newSize[i] = size_[i]; } } return Tensor(newData, newSize, stride_); } template class PtrTraits> __host__ __device__ Tensor Tensor::narrow(int dim, IndexT start, IndexT size) { DataPtrType newData = data_; if (start > 0) { newData += start * stride_[dim]; } IndexT newSize[Dim]; for (int i = 0; i < Dim; ++i) { if (i == dim) { assert(start + size <= size_[dim]); newSize[i] = size; } else { newSize[i] = size_[i]; } } // The narrowed tensor is not necessarily contiguous return Tensor(newData, newSize, stride_); } template class PtrTraits> template __host__ __device__ Tensor Tensor::view( std::initializer_list sizes) { static_assert(Contig, "on contiguous tensors only"); assert(sizes.size() == NewDim); // The total size of the new view must be the same as the total size // of the old view size_t curSize = numElements(); size_t newSize = 1; for (auto s : sizes) { newSize *= s; } assert(curSize == newSize); return Tensor(data(), sizes); } } } // namespace