9 #include "../GpuFaissAssert.h"
10 #include "DeviceUtils.h"
13 namespace faiss {
namespace gpu {
15 template <
typename T,
int Dim,
bool InnerContig,
16 typename IndexT,
template <
typename U>
class PtrTraits>
20 static_assert(Dim > 0,
"must have > 0 dimensions");
22 for (
int i = 0; i < Dim; ++i) {
28 template <
typename T,
int Dim,
bool InnerContig,
29 typename IndexT,
template <
typename U>
class PtrTraits>
36 template <
typename T,
int Dim,
bool InnerContig,
37 typename IndexT,
template <
typename U>
class PtrTraits>
41 this->operator=(std::move(t));
44 template <
typename T,
int Dim,
bool InnerContig,
45 typename IndexT,
template <
typename U>
class PtrTraits>
51 for (
int i = 0; i < Dim; ++i) {
52 size_[i] = t.
size_[i];
59 template <
typename T,
int Dim,
bool InnerContig,
60 typename IndexT,
template <
typename U>
class PtrTraits>
65 data_ = t.data_; t.data_ =
nullptr;
66 for (
int i = 0; i < Dim; ++i) {
67 stride_[i] = t.stride_[i]; t.stride_[i] = 0;
68 size_[i] = t.size_[i]; t.size_[i] = 0;
74 template <
typename T,
int Dim,
bool InnerContig,
75 typename IndexT,
template <
typename U>
class PtrTraits>
78 Tensor(DataPtrType data,
const IndexT sizes[Dim])
80 static_assert(Dim > 0,
"must have > 0 dimensions");
82 for (
int i = 0; i < Dim; ++i) {
87 for (
int i = Dim - 2; i >= 0; --i) {
92 template <
typename T,
int Dim,
bool InnerContig,
93 typename IndexT,
template <
typename U>
class PtrTraits>
96 Tensor(DataPtrType data, std::initializer_list<IndexT> sizes)
98 GPU_FAISS_ASSERT(sizes.size() == Dim);
99 static_assert(Dim > 0,
"must have > 0 dimensions");
102 for (
auto s : sizes) {
107 for (
int j = Dim - 2; j >= 0; --j) {
113 template <
typename T,
int Dim,
bool InnerContig,
114 typename IndexT,
template <
typename U>
class PtrTraits>
117 DataPtrType data,
const IndexT sizes[Dim],
const IndexT strides[Dim])
119 static_assert(Dim > 0,
"must have > 0 dimensions");
121 for (
int i = 0; i < Dim; ++i) {
127 template <
typename T,
int Dim,
bool InnerContig,
128 typename IndexT,
template <
typename U>
class PtrTraits>
132 cudaStream_t stream) {
134 GPU_FAISS_ASSERT(this->isContiguous());
139 GPU_FAISS_ASSERT(this->numElements() == t.
numElements());
142 GPU_FAISS_ASSERT(this->data_);
143 GPU_FAISS_ASSERT(t.
data());
145 int ourDev = getDeviceForAddress(this->data_);
146 int tDev = getDeviceForAddress(t.
data());
149 CUDA_VERIFY(cudaMemcpyAsync(this->data_,
151 this->getSizeInBytes(),
152 ourDev == -1 ? cudaMemcpyHostToHost :
153 cudaMemcpyHostToDevice,
156 CUDA_VERIFY(cudaMemcpyAsync(this->data_,
158 this->getSizeInBytes(),
159 ourDev == -1 ? cudaMemcpyDeviceToHost :
160 cudaMemcpyDeviceToDevice,
166 template <
typename T,
int Dim,
bool InnerContig,
167 typename IndexT,
template <
typename U>
class PtrTraits>
171 cudaStream_t stream) {
173 GPU_FAISS_ASSERT(this->isContiguous());
178 GPU_FAISS_ASSERT(this->numElements() == t.
numElements());
181 GPU_FAISS_ASSERT(this->data_);
182 GPU_FAISS_ASSERT(t.
data());
184 int ourDev = getDeviceForAddress(this->data_);
185 int tDev = getDeviceForAddress(t.
data());
188 CUDA_VERIFY(cudaMemcpyAsync(t.
data(),
190 this->getSizeInBytes(),
191 ourDev == -1 ? cudaMemcpyHostToHost :
192 cudaMemcpyDeviceToHost,
195 CUDA_VERIFY(cudaMemcpyAsync(t.
data(),
197 this->getSizeInBytes(),
198 ourDev == -1 ? cudaMemcpyHostToDevice :
199 cudaMemcpyDeviceToDevice,
205 template <
typename T,
int Dim,
bool InnerContig,
206 typename IndexT,
template <
typename U>
class PtrTraits>
207 template <
typename OtherT,
int OtherDim>
208 __host__ __device__
bool
211 if (Dim != OtherDim) {
215 for (
int i = 0; i < Dim; ++i) {
216 if (this->getSize(i) != rhs.
getSize(i)) {
220 if (this->getStride(i) != rhs.
getStride(i)) {
228 template <
typename T,
int Dim,
bool InnerContig,
229 typename IndexT,
template <
typename U>
class PtrTraits>
230 template <
typename OtherT,
int OtherDim>
231 __host__ __device__
bool
234 if (Dim != OtherDim) {
238 for (
int i = 0; i < Dim; ++i) {
239 if (this->getSize(i) != rhs.
getSize(i)) {
247 template <
typename T,
int Dim,
bool InnerContig,
248 typename IndexT,
template <
typename U>
class PtrTraits>
249 template <
typename U>
252 static_assert(
sizeof(U) ==
sizeof(T),
"cast must be to same size object");
255 reinterpret_cast<U*
>(data_), size_, stride_);
258 template <
typename T,
int Dim,
bool InnerContig,
259 typename IndexT,
template <
typename U>
class PtrTraits>
260 template <
typename U>
263 static_assert(
sizeof(U) ==
sizeof(T),
"cast must be to same size object");
266 reinterpret_cast<U*
>(data_), size_, stride_);
269 template <
typename T,
int Dim,
bool InnerContig,
270 typename IndexT,
template <
typename U>
class PtrTraits>
271 template <
typename U>
274 static_assert(
sizeof(U) >=
sizeof(T),
"only handles greater sizes");
275 constexpr
int kMultiple =
sizeof(U) /
sizeof(T);
277 GPU_FAISS_ASSERT(canCastResize<U>());
280 IndexT newStride[Dim];
282 for (
int i = 0; i < Dim - 1; ++i) {
283 newSize[i] = size_[i];
284 newStride[i] = stride_[i] / kMultiple;
287 newStride[Dim - 1] = 1;
288 newSize[Dim - 1] = size_[Dim - 1] / kMultiple;
291 reinterpret_cast<U*
>(data_), newSize, newStride);
294 template <
typename T,
int Dim,
bool InnerContig,
295 typename IndexT,
template <
typename U>
class PtrTraits>
296 template <
typename U>
303 template <
typename T,
int Dim,
bool InnerContig,
304 typename IndexT,
template <
typename U>
class PtrTraits>
305 template <
typename U>
306 __host__ __device__
bool
308 static_assert(
sizeof(U) >=
sizeof(T),
"only handles greater sizes");
309 constexpr
int kMultiple =
sizeof(U) /
sizeof(T);
312 if (((uintptr_t) data_) %
sizeof(U) != 0) {
317 for (
int i = 0; i < Dim - 1; ++i) {
318 if (stride_[i] % kMultiple != 0) {
324 if (size_[Dim - 1] % kMultiple != 0) {
328 if (stride_[Dim - 1] != 1) {
335 template <
typename T,
int Dim,
bool InnerContig,
336 typename IndexT,
template <
typename U>
class PtrTraits>
337 template <
typename NewIndexT>
340 if (
sizeof(NewIndexT) <
sizeof(IndexT)) {
341 GPU_FAISS_ASSERT(this->canUseIndexType<NewIndexT>());
344 NewIndexT newSize[Dim];
345 NewIndexT newStride[Dim];
346 for (
int i = 0; i < Dim; ++i) {
347 newSize[i] = (NewIndexT) size_[i];
348 newStride[i] = (NewIndexT) stride_[i];
352 data_, newSize, newStride);
355 template <
typename T,
int Dim,
bool InnerContig,
356 typename IndexT,
template <
typename U>
class PtrTraits>
357 template <
typename NewIndexT>
360 static_assert(
sizeof(
size_t) >=
sizeof(IndexT),
361 "index size too large");
362 static_assert(
sizeof(
size_t) >=
sizeof(NewIndexT),
363 "new index size too large");
367 size_t maxOffset = 0;
369 for (
int i = 0; i < Dim; ++i) {
370 size_t curMaxOffset = (size_t) size_[i] * (
size_t) stride_[i];
371 if (curMaxOffset > maxOffset) {
372 maxOffset = curMaxOffset;
376 if (maxOffset > (
size_t) std::numeric_limits<NewIndexT>::max()) {
383 template <
typename T,
int Dim,
bool InnerContig,
384 typename IndexT,
template <
typename U>
class PtrTraits>
385 __host__ __device__
size_t
387 size_t size = (size_t) getSize(0);
389 for (
int i = 1; i < Dim; ++i) {
390 size *= (size_t) getSize(i);
396 template <
typename T,
int Dim,
bool InnerContig,
397 typename IndexT,
template <
typename U>
class PtrTraits>
398 __host__ __device__
bool
402 for (
int i = Dim - 1; i >= 0; --i) {
403 if (getSize(i) != (IndexT) 1) {
404 if (getStride(i) == prevSize) {
405 prevSize *= getSize(i);
415 template <
typename T,
int Dim,
bool InnerContig,
416 typename IndexT,
template <
typename U>
class PtrTraits>
417 __host__ __device__
bool
419 if (i == 0 && getStride(i) > 0 && getSize(i) > 0) {
421 }
else if ((i > 0) && (i < Dim) && (getStride(i) > 0) &&
422 ((getStride(i - 1) / getStride(i)) >= getSize(i))) {
429 template <
typename T,
int Dim,
bool InnerContig,
430 typename IndexT,
template <
typename U>
class PtrTraits>
431 __host__ __device__
bool
433 for (
int i = 0; i < Dim; ++i) {
434 if (!isConsistentlySized(i)) {
442 template <
typename T,
int Dim,
bool InnerContig,
443 typename IndexT,
template <
typename U>
class PtrTraits>
444 __host__ __device__
bool
446 return (i == Dim - 1) ||
448 ((getStride(i) / getStride(i + 1)) == getSize(i + 1)));
451 template <
typename T,
int Dim,
bool InnerContig,
452 typename IndexT,
template <
typename U>
class PtrTraits>
456 GPU_FAISS_ASSERT(dim1 >= 0 && dim1 < Dim);
457 GPU_FAISS_ASSERT(dim1 >= 0 && dim2 < Dim);
462 GPU_FAISS_ASSERT(dim1 != Dim - 1 && dim2 != Dim - 1);
466 IndexT newStride[Dim];
468 for (
int i = 0; i < Dim; ++i) {
469 newSize[i] = size_[i];
470 newStride[i] = stride_[i];
473 IndexT tmp = newSize[dim1];
474 newSize[dim1] = newSize[dim2];
477 tmp = newStride[dim1];
478 newStride[dim1] = newStride[dim2];
479 newStride[dim2] = tmp;
484 template <
typename T,
int Dim,
bool InnerContig,
485 typename IndexT,
template <
typename U>
class PtrTraits>
486 template <
int NewDim>
490 static_assert(NewDim > Dim,
"Can only upcast to greater dim");
492 IndexT newSize[NewDim];
493 IndexT newStride[NewDim];
495 int shift = NewDim - Dim;
497 for (
int i = 0; i < NewDim; ++i) {
500 newSize[i] = (IndexT) 1;
501 newStride[i] = size_[0] * stride_[0];
504 newSize[i] = size_[i - shift];
505 newStride[i] = stride_[i - shift];
510 data_, newSize, newStride);
513 template <
typename T,
int Dim,
bool InnerContig,
514 typename IndexT,
template <
typename U>
class PtrTraits>
515 template <
int NewDim>
519 static_assert(NewDim > Dim,
"Can only upcast to greater dim");
521 IndexT newSize[NewDim];
522 IndexT newStride[NewDim];
524 for (
int i = 0; i < NewDim; ++i) {
527 newSize[i] = size_[i];
528 newStride[i] = stride_[i];
531 newSize[i] = (IndexT) 1;
532 newStride[i] = (IndexT) 1;
537 data_, newSize, newStride);
540 template <
typename T,
int Dim,
bool InnerContig,
541 typename IndexT,
template <
typename U>
class PtrTraits>
542 template <
int NewDim>
546 static_assert(NewDim < Dim,
"Can only downcast to lesser dim");
552 for (
int i = 0; i < Dim - NewDim; ++i) {
553 bool cont = isContiguousDim(i);
554 GPU_FAISS_ASSERT(cont);
557 IndexT newSize[NewDim];
558 IndexT newStride[NewDim];
560 int ignoredDims = Dim - NewDim;
561 IndexT collapsedSize = 1;
563 for (
int i = 0; i < Dim; ++i) {
564 if (i < ignoredDims) {
566 collapsedSize *= getSize(i);
569 if (i == ignoredDims) {
571 newSize[i - ignoredDims] = collapsedSize * getSize(i);
574 newSize[i - ignoredDims] = getSize(i);
577 newStride[i - ignoredDims] = getStride(i);
582 data_, newSize, newStride);
585 template <
typename T,
int Dim,
bool InnerContig,
586 typename IndexT,
template <
typename U>
class PtrTraits>
587 template <
int NewDim>
591 static_assert(NewDim < Dim,
"Can only downcast to lesser dim");
597 for (
int i = NewDim; i < Dim; ++i) {
598 GPU_FAISS_ASSERT(isContiguousDim(i));
601 IndexT newSize[NewDim];
602 IndexT newStride[NewDim];
604 IndexT collapsedSize = 1;
606 for (
int i = Dim - 1; i >= 0; --i) {
609 collapsedSize *= getSize(i);
612 if (i == NewDim - 1) {
614 newSize[i] = collapsedSize * getSize(i);
615 newStride[i] = getStride(Dim - 1);
618 newSize[i] = getSize(i);
619 newStride[i] = getStride(i);
625 data_, newSize, newStride);
628 template <
typename T,
int Dim,
bool InnerContig,
629 typename IndexT,
template <
typename U>
class PtrTraits>
630 template <
int SubDim>
633 static_assert(SubDim >= 1 && SubDim < Dim,
634 "can only create view of lesser dim");
636 IndexT viewSizes[SubDim];
637 IndexT viewStrides[SubDim];
639 for (
int i = 0; i < SubDim; ++i) {
640 viewSizes[i] = size_[Dim - SubDim + i];
641 viewStrides[i] = stride_[Dim - SubDim + i];
645 at, viewSizes, viewStrides);
648 template <
typename T,
int Dim,
bool InnerContig,
649 typename IndexT,
template <
typename U>
class PtrTraits>
650 template <
int SubDim>
653 return view<SubDim>(data_);
656 template <
typename T,
int Dim,
bool InnerContig,
657 typename IndexT,
template <
typename U>
class PtrTraits>
661 return this->narrow(0, start, size);
664 template <
typename T,
int Dim,
bool InnerContig,
665 typename IndexT,
template <
typename U>
class PtrTraits>
670 DataPtrType newData = data_;
672 GPU_FAISS_ASSERT(start >= 0 &&
673 start < size_[dim] &&
674 (start + size) <= size_[dim]);
677 newData += (size_t) start * stride_[dim];
681 for (
int i = 0; i < Dim; ++i) {
683 GPU_FAISS_ASSERT(start + size <= size_[dim]);
686 newSize[i] = size_[i];
694 template <
typename T,
int Dim,
bool InnerContig,
695 typename IndexT,
template <
typename U>
class PtrTraits>
696 template <
int NewDim>
699 std::initializer_list<IndexT> sizes) {
700 GPU_FAISS_ASSERT(this->isContiguous());
702 GPU_FAISS_ASSERT(sizes.size() == NewDim);
706 size_t curSize = numElements();
709 for (
auto s : sizes) {
713 GPU_FAISS_ASSERT(curSize == newSize);
__host__ __device__ Tensor< T, NewDim, InnerContig, IndexT, PtrTraits > upcastOuter()
__host__ Tensor< T, Dim, InnerContig, NewIndexT, PtrTraits > castIndexType() const
__host__ __device__ bool isContiguousDim(int i) const
Returns true if the given dimension index has no padding.
__host__ __device__ Tensor< U, Dim, InnerContig, IndexT, PtrTraits > cast()
__host__ __device__ size_t numElements() const
__host__ __device__ Tensor< T, NewDim, InnerContig, IndexT, PtrTraits > downcastOuter()
__host__ __device__ bool canCastResize() const
Returns true if we can castResize() this tensor to the new type.
DataPtrType data_
Raw pointer to where the tensor data begins.
__host__ __device__ Tensor()
Default constructor.
__host__ __device__ Tensor< T, NewDim, InnerContig, IndexT, PtrTraits > upcastInner()
__host__ __device__ Tensor< T, Dim, InnerContig, IndexT, PtrTraits > narrowOutermost(IndexT start, IndexT size)
IndexT stride_[Dim]
Array of strides (in sizeof(T) terms) per each dimension.
__host__ __device__ bool isContiguous() const
__host__ __device__ const IndexT * sizes() const
Returns the size array.
__host__ void copyFrom(Tensor< T, Dim, InnerContig, IndexT, PtrTraits > &t, cudaStream_t stream)
Copies a tensor into ourselves; sizes must match.
IndexT size_[Dim]
Size per each dimension.
__host__ __device__ Tensor< T, Dim, InnerContig, IndexT, PtrTraits > & operator=(Tensor< T, Dim, InnerContig, IndexT, PtrTraits > &t)
Assignment.
__host__ __device__ const IndexT * strides() const
Returns the stride array.
__host__ __device__ IndexT getSize(int i) const
__host__ __device__ bool isSameSize(const Tensor< OtherT, OtherDim, InnerContig, IndexT, PtrTraits > &rhs) const
Returns true if the two tensors are of the same dimensionality and size.
__host__ __device__ Tensor< T, NewDim, InnerContig, IndexT, PtrTraits > downcastInner()
__host__ __device__ Tensor< T, Dim, InnerContig, IndexT, PtrTraits > narrow(int dim, IndexT start, IndexT size)
__host__ __device__ DataPtrType data()
Returns a raw pointer to the start of our data.
__host__ void copyTo(Tensor< T, Dim, InnerContig, IndexT, PtrTraits > &t, cudaStream_t stream)
Copies ourselves into a tensor; sizes must match.
__host__ bool canUseIndexType() const
__host__ __device__ Tensor< T, Dim, InnerContig, IndexT, PtrTraits > transpose(int dim1, int dim2) const
__host__ __device__ IndexT getStride(int i) const
__host__ __device__ Tensor< U, Dim, InnerContig, IndexT, PtrTraits > castResize()
__host__ __device__ Tensor< T, SubDim, InnerContig, IndexT, PtrTraits > view()
__host__ __device__ bool isSame(const Tensor< OtherT, OtherDim, InnerContig, IndexT, PtrTraits > &rhs) const