10 #include "../GpuFaissAssert.h"
11 #include "DeviceUtils.h"
14 namespace faiss {
namespace gpu {
16 template <
typename T,
int Dim,
bool InnerContig,
17 typename IndexT,
template <
typename U>
class PtrTraits>
21 static_assert(Dim > 0,
"must have > 0 dimensions");
23 for (
int i = 0; i < Dim; ++i) {
29 template <
typename T,
int Dim,
bool InnerContig,
30 typename IndexT,
template <
typename U>
class PtrTraits>
37 template <
typename T,
int Dim,
bool InnerContig,
38 typename IndexT,
template <
typename U>
class PtrTraits>
42 this->operator=(std::move(t));
45 template <
typename T,
int Dim,
bool InnerContig,
46 typename IndexT,
template <
typename U>
class PtrTraits>
52 for (
int i = 0; i < Dim; ++i) {
53 size_[i] = t.
size_[i];
60 template <
typename T,
int Dim,
bool InnerContig,
61 typename IndexT,
template <
typename U>
class PtrTraits>
66 data_ = t.data_; t.data_ =
nullptr;
67 for (
int i = 0; i < Dim; ++i) {
68 stride_[i] = t.stride_[i]; t.stride_[i] = 0;
69 size_[i] = t.size_[i]; t.size_[i] = 0;
75 template <
typename T,
int Dim,
bool InnerContig,
76 typename IndexT,
template <
typename U>
class PtrTraits>
79 Tensor(DataPtrType data,
const IndexT sizes[Dim])
81 static_assert(Dim > 0,
"must have > 0 dimensions");
83 for (
int i = 0; i < Dim; ++i) {
88 for (
int i = Dim - 2; i >= 0; --i) {
93 template <
typename T,
int Dim,
bool InnerContig,
94 typename IndexT,
template <
typename U>
class PtrTraits>
97 Tensor(DataPtrType data, std::initializer_list<IndexT> sizes)
99 GPU_FAISS_ASSERT(sizes.size() == Dim);
100 static_assert(Dim > 0,
"must have > 0 dimensions");
103 for (
auto s : sizes) {
108 for (
int j = Dim - 2; j >= 0; --j) {
114 template <
typename T,
int Dim,
bool InnerContig,
115 typename IndexT,
template <
typename U>
class PtrTraits>
118 DataPtrType data,
const IndexT sizes[Dim],
const IndexT strides[Dim])
120 static_assert(Dim > 0,
"must have > 0 dimensions");
122 for (
int i = 0; i < Dim; ++i) {
128 template <
typename T,
int Dim,
bool InnerContig,
129 typename IndexT,
template <
typename U>
class PtrTraits>
133 cudaStream_t stream) {
135 GPU_FAISS_ASSERT(this->isContiguous());
140 GPU_FAISS_ASSERT(this->numElements() == t.
numElements());
143 GPU_FAISS_ASSERT(this->data_);
144 GPU_FAISS_ASSERT(t.
data());
146 int ourDev = getDeviceForAddress(this->data_);
147 int tDev = getDeviceForAddress(t.
data());
150 CUDA_VERIFY(cudaMemcpyAsync(this->data_,
152 this->getSizeInBytes(),
153 ourDev == -1 ? cudaMemcpyHostToHost :
154 cudaMemcpyHostToDevice,
157 CUDA_VERIFY(cudaMemcpyAsync(this->data_,
159 this->getSizeInBytes(),
160 ourDev == -1 ? cudaMemcpyDeviceToHost :
161 cudaMemcpyDeviceToDevice,
167 template <
typename T,
int Dim,
bool InnerContig,
168 typename IndexT,
template <
typename U>
class PtrTraits>
172 cudaStream_t stream) {
174 GPU_FAISS_ASSERT(this->isContiguous());
179 GPU_FAISS_ASSERT(this->numElements() == t.
numElements());
182 GPU_FAISS_ASSERT(this->data_);
183 GPU_FAISS_ASSERT(t.
data());
185 int ourDev = getDeviceForAddress(this->data_);
186 int tDev = getDeviceForAddress(t.
data());
189 CUDA_VERIFY(cudaMemcpyAsync(t.
data(),
191 this->getSizeInBytes(),
192 ourDev == -1 ? cudaMemcpyHostToHost :
193 cudaMemcpyDeviceToHost,
196 CUDA_VERIFY(cudaMemcpyAsync(t.
data(),
198 this->getSizeInBytes(),
199 ourDev == -1 ? cudaMemcpyHostToDevice :
200 cudaMemcpyDeviceToDevice,
206 template <
typename T,
int Dim,
bool InnerContig,
207 typename IndexT,
template <
typename U>
class PtrTraits>
208 template <
typename OtherT,
int OtherDim>
209 __host__ __device__
bool
212 if (Dim != OtherDim) {
216 for (
int i = 0; i < Dim; ++i) {
217 if (this->getSize(i) != rhs.
getSize(i)) {
221 if (this->getStride(i) != rhs.
getStride(i)) {
229 template <
typename T,
int Dim,
bool InnerContig,
230 typename IndexT,
template <
typename U>
class PtrTraits>
231 template <
typename OtherT,
int OtherDim>
232 __host__ __device__
bool
235 if (Dim != OtherDim) {
239 for (
int i = 0; i < Dim; ++i) {
240 if (this->getSize(i) != rhs.
getSize(i)) {
248 template <
typename T,
int Dim,
bool InnerContig,
249 typename IndexT,
template <
typename U>
class PtrTraits>
250 template <
typename U>
253 static_assert(
sizeof(U) ==
sizeof(T),
"cast must be to same size object");
256 reinterpret_cast<U*
>(data_), size_, stride_);
259 template <
typename T,
int Dim,
bool InnerContig,
260 typename IndexT,
template <
typename U>
class PtrTraits>
261 template <
typename U>
264 static_assert(
sizeof(U) ==
sizeof(T),
"cast must be to same size object");
267 reinterpret_cast<U*
>(data_), size_, stride_);
270 template <
typename T,
int Dim,
bool InnerContig,
271 typename IndexT,
template <
typename U>
class PtrTraits>
272 template <
typename U>
275 static_assert(
sizeof(U) >=
sizeof(T),
"only handles greater sizes");
276 constexpr
int kMultiple =
sizeof(U) /
sizeof(T);
278 GPU_FAISS_ASSERT(canCastResize<U>());
281 IndexT newStride[Dim];
283 for (
int i = 0; i < Dim - 1; ++i) {
284 newSize[i] = size_[i];
285 newStride[i] = stride_[i] / kMultiple;
288 newStride[Dim - 1] = 1;
289 newSize[Dim - 1] = size_[Dim - 1] / kMultiple;
292 reinterpret_cast<U*
>(data_), newSize, newStride);
295 template <
typename T,
int Dim,
bool InnerContig,
296 typename IndexT,
template <
typename U>
class PtrTraits>
297 template <
typename U>
304 template <
typename T,
int Dim,
bool InnerContig,
305 typename IndexT,
template <
typename U>
class PtrTraits>
306 template <
typename U>
307 __host__ __device__
bool
309 static_assert(
sizeof(U) >=
sizeof(T),
"only handles greater sizes");
310 constexpr
int kMultiple =
sizeof(U) /
sizeof(T);
313 if (((uintptr_t) data_) %
sizeof(U) != 0) {
318 for (
int i = 0; i < Dim - 1; ++i) {
319 if (stride_[i] % kMultiple != 0) {
325 if (size_[Dim - 1] % kMultiple != 0) {
329 if (stride_[Dim - 1] != 1) {
336 template <
typename T,
int Dim,
bool InnerContig,
337 typename IndexT,
template <
typename U>
class PtrTraits>
338 template <
typename NewIndexT>
341 if (
sizeof(NewIndexT) <
sizeof(IndexT)) {
342 GPU_FAISS_ASSERT(this->canUseIndexType<NewIndexT>());
345 NewIndexT newSize[Dim];
346 NewIndexT newStride[Dim];
347 for (
int i = 0; i < Dim; ++i) {
348 newSize[i] = (NewIndexT) size_[i];
349 newStride[i] = (NewIndexT) stride_[i];
353 data_, newSize, newStride);
356 template <
typename T,
int Dim,
bool InnerContig,
357 typename IndexT,
template <
typename U>
class PtrTraits>
358 template <
typename NewIndexT>
361 static_assert(
sizeof(
size_t) >=
sizeof(IndexT),
362 "index size too large");
363 static_assert(
sizeof(
size_t) >=
sizeof(NewIndexT),
364 "new index size too large");
368 size_t maxOffset = 0;
370 for (
int i = 0; i < Dim; ++i) {
371 size_t curMaxOffset = (size_t) size_[i] * (
size_t) stride_[i];
372 if (curMaxOffset > maxOffset) {
373 maxOffset = curMaxOffset;
377 if (maxOffset > (
size_t) std::numeric_limits<NewIndexT>::max()) {
384 template <
typename T,
int Dim,
bool InnerContig,
385 typename IndexT,
template <
typename U>
class PtrTraits>
386 __host__ __device__
size_t
388 size_t size = (size_t) getSize(0);
390 for (
int i = 1; i < Dim; ++i) {
391 size *= (size_t) getSize(i);
397 template <
typename T,
int Dim,
bool InnerContig,
398 typename IndexT,
template <
typename U>
class PtrTraits>
399 __host__ __device__
bool
403 for (
int i = Dim - 1; i >= 0; --i) {
404 if (getSize(i) != (IndexT) 1) {
405 if (getStride(i) == prevSize) {
406 prevSize *= getSize(i);
416 template <
typename T,
int Dim,
bool InnerContig,
417 typename IndexT,
template <
typename U>
class PtrTraits>
418 __host__ __device__
bool
420 if (i == 0 && getStride(i) > 0 && getSize(i) > 0) {
422 }
else if ((i > 0) && (i < Dim) && (getStride(i) > 0) &&
423 ((getStride(i - 1) / getStride(i)) >= getSize(i))) {
430 template <
typename T,
int Dim,
bool InnerContig,
431 typename IndexT,
template <
typename U>
class PtrTraits>
432 __host__ __device__
bool
434 for (
int i = 0; i < Dim; ++i) {
435 if (!isConsistentlySized(i)) {
443 template <
typename T,
int Dim,
bool InnerContig,
444 typename IndexT,
template <
typename U>
class PtrTraits>
445 __host__ __device__
bool
447 return (i == Dim - 1) ||
449 ((getStride(i) / getStride(i + 1)) == getSize(i + 1)));
452 template <
typename T,
int Dim,
bool InnerContig,
453 typename IndexT,
template <
typename U>
class PtrTraits>
457 GPU_FAISS_ASSERT(dim1 >= 0 && dim1 < Dim);
458 GPU_FAISS_ASSERT(dim1 >= 0 && dim2 < Dim);
463 GPU_FAISS_ASSERT(dim1 != Dim - 1 && dim2 != Dim - 1);
467 IndexT newStride[Dim];
469 for (
int i = 0; i < Dim; ++i) {
470 newSize[i] = size_[i];
471 newStride[i] = stride_[i];
474 IndexT tmp = newSize[dim1];
475 newSize[dim1] = newSize[dim2];
478 tmp = newStride[dim1];
479 newStride[dim1] = newStride[dim2];
480 newStride[dim2] = tmp;
485 template <
typename T,
int Dim,
bool InnerContig,
486 typename IndexT,
template <
typename U>
class PtrTraits>
487 template <
int NewDim>
491 static_assert(NewDim > Dim,
"Can only upcast to greater dim");
493 IndexT newSize[NewDim];
494 IndexT newStride[NewDim];
496 int shift = NewDim - Dim;
498 for (
int i = 0; i < NewDim; ++i) {
501 newSize[i] = (IndexT) 1;
502 newStride[i] = size_[0] * stride_[0];
505 newSize[i] = size_[i - shift];
506 newStride[i] = stride_[i - shift];
511 data_, newSize, newStride);
514 template <
typename T,
int Dim,
bool InnerContig,
515 typename IndexT,
template <
typename U>
class PtrTraits>
516 template <
int NewDim>
520 static_assert(NewDim > Dim,
"Can only upcast to greater dim");
522 IndexT newSize[NewDim];
523 IndexT newStride[NewDim];
525 for (
int i = 0; i < NewDim; ++i) {
528 newSize[i] = size_[i];
529 newStride[i] = stride_[i];
532 newSize[i] = (IndexT) 1;
533 newStride[i] = (IndexT) 1;
538 data_, newSize, newStride);
541 template <
typename T,
int Dim,
bool InnerContig,
542 typename IndexT,
template <
typename U>
class PtrTraits>
543 template <
int NewDim>
547 static_assert(NewDim < Dim,
"Can only downcast to lesser dim");
553 for (
int i = 0; i < Dim - NewDim; ++i) {
554 bool cont = isContiguousDim(i);
555 GPU_FAISS_ASSERT(cont);
558 IndexT newSize[NewDim];
559 IndexT newStride[NewDim];
561 int ignoredDims = Dim - NewDim;
562 IndexT collapsedSize = 1;
564 for (
int i = 0; i < Dim; ++i) {
565 if (i < ignoredDims) {
567 collapsedSize *= getSize(i);
570 if (i == ignoredDims) {
572 newSize[i - ignoredDims] = collapsedSize * getSize(i);
575 newSize[i - ignoredDims] = getSize(i);
578 newStride[i - ignoredDims] = getStride(i);
583 data_, newSize, newStride);
586 template <
typename T,
int Dim,
bool InnerContig,
587 typename IndexT,
template <
typename U>
class PtrTraits>
588 template <
int NewDim>
592 static_assert(NewDim < Dim,
"Can only downcast to lesser dim");
598 for (
int i = NewDim; i < Dim; ++i) {
599 GPU_FAISS_ASSERT(isContiguousDim(i));
602 IndexT newSize[NewDim];
603 IndexT newStride[NewDim];
605 IndexT collapsedSize = 1;
607 for (
int i = Dim - 1; i >= 0; --i) {
610 collapsedSize *= getSize(i);
613 if (i == NewDim - 1) {
615 newSize[i] = collapsedSize * getSize(i);
616 newStride[i] = getStride(Dim - 1);
619 newSize[i] = getSize(i);
620 newStride[i] = getStride(i);
626 data_, newSize, newStride);
629 template <
typename T,
int Dim,
bool InnerContig,
630 typename IndexT,
template <
typename U>
class PtrTraits>
631 template <
int SubDim>
634 static_assert(SubDim >= 1 && SubDim < Dim,
635 "can only create view of lesser dim");
637 IndexT viewSizes[SubDim];
638 IndexT viewStrides[SubDim];
640 for (
int i = 0; i < SubDim; ++i) {
641 viewSizes[i] = size_[Dim - SubDim + i];
642 viewStrides[i] = stride_[Dim - SubDim + i];
646 at, viewSizes, viewStrides);
649 template <
typename T,
int Dim,
bool InnerContig,
650 typename IndexT,
template <
typename U>
class PtrTraits>
651 template <
int SubDim>
654 return view<SubDim>(data_);
657 template <
typename T,
int Dim,
bool InnerContig,
658 typename IndexT,
template <
typename U>
class PtrTraits>
662 return this->narrow(0, start, size);
665 template <
typename T,
int Dim,
bool InnerContig,
666 typename IndexT,
template <
typename U>
class PtrTraits>
671 DataPtrType newData = data_;
673 GPU_FAISS_ASSERT(start >= 0 &&
674 start < size_[dim] &&
675 (start + size) <= size_[dim]);
678 newData += (size_t) start * stride_[dim];
682 for (
int i = 0; i < Dim; ++i) {
684 GPU_FAISS_ASSERT(start + size <= size_[dim]);
687 newSize[i] = size_[i];
695 template <
typename T,
int Dim,
bool InnerContig,
696 typename IndexT,
template <
typename U>
class PtrTraits>
697 template <
int NewDim>
700 std::initializer_list<IndexT> sizes) {
701 GPU_FAISS_ASSERT(this->isContiguous());
703 GPU_FAISS_ASSERT(sizes.size() == NewDim);
707 size_t curSize = numElements();
710 for (
auto s : sizes) {
714 GPU_FAISS_ASSERT(curSize == newSize);
__host__ __device__ Tensor< T, NewDim, InnerContig, IndexT, PtrTraits > upcastOuter()
__host__ Tensor< T, Dim, InnerContig, NewIndexT, PtrTraits > castIndexType() const
__host__ __device__ bool isContiguousDim(int i) const
Returns true if the given dimension index has no padding.
__host__ __device__ Tensor< U, Dim, InnerContig, IndexT, PtrTraits > cast()
__host__ __device__ size_t numElements() const
__host__ __device__ Tensor< T, NewDim, InnerContig, IndexT, PtrTraits > downcastOuter()
__host__ __device__ bool canCastResize() const
Returns true if we can castResize() this tensor to the new type.
DataPtrType data_
Raw pointer to where the tensor data begins.
__host__ __device__ Tensor()
Default constructor.
__host__ __device__ Tensor< T, NewDim, InnerContig, IndexT, PtrTraits > upcastInner()
__host__ __device__ Tensor< T, Dim, InnerContig, IndexT, PtrTraits > narrowOutermost(IndexT start, IndexT size)
IndexT stride_[Dim]
Array of strides (in sizeof(T) terms) per each dimension.
__host__ __device__ bool isContiguous() const
__host__ __device__ const IndexT * sizes() const
Returns the size array.
__host__ void copyFrom(Tensor< T, Dim, InnerContig, IndexT, PtrTraits > &t, cudaStream_t stream)
Copies a tensor into ourselves; sizes must match.
IndexT size_[Dim]
Size per each dimension.
__host__ __device__ Tensor< T, Dim, InnerContig, IndexT, PtrTraits > & operator=(Tensor< T, Dim, InnerContig, IndexT, PtrTraits > &t)
Assignment.
__host__ __device__ const IndexT * strides() const
Returns the stride array.
__host__ __device__ IndexT getSize(int i) const
__host__ __device__ bool isSameSize(const Tensor< OtherT, OtherDim, InnerContig, IndexT, PtrTraits > &rhs) const
Returns true if the two tensors are of the same dimensionality and size.
__host__ __device__ Tensor< T, NewDim, InnerContig, IndexT, PtrTraits > downcastInner()
__host__ __device__ Tensor< T, Dim, InnerContig, IndexT, PtrTraits > narrow(int dim, IndexT start, IndexT size)
__host__ __device__ DataPtrType data()
Returns a raw pointer to the start of our data.
__host__ void copyTo(Tensor< T, Dim, InnerContig, IndexT, PtrTraits > &t, cudaStream_t stream)
Copies ourselves into a tensor; sizes must match.
__host__ bool canUseIndexType() const
__host__ __device__ Tensor< T, Dim, InnerContig, IndexT, PtrTraits > transpose(int dim1, int dim2) const
__host__ __device__ IndexT getStride(int i) const
__host__ __device__ Tensor< U, Dim, InnerContig, IndexT, PtrTraits > castResize()
__host__ __device__ Tensor< T, SubDim, InnerContig, IndexT, PtrTraits > view()
__host__ __device__ bool isSame(const Tensor< OtherT, OtherDim, InnerContig, IndexT, PtrTraits > &rhs) const