11 #include "../GpuFaissAssert.h"
12 #include "DeviceUtils.h"
15 namespace faiss {
namespace gpu {
17 template <
typename T,
int Dim,
bool InnerContig,
18 typename IndexT,
template <
typename U>
class PtrTraits>
22 static_assert(Dim > 0,
"must have > 0 dimensions");
24 for (
int i = 0; i < Dim; ++i) {
30 template <
typename T,
int Dim,
bool InnerContig,
31 typename IndexT,
template <
typename U>
class PtrTraits>
38 template <
typename T,
int Dim,
bool InnerContig,
39 typename IndexT,
template <
typename U>
class PtrTraits>
43 this->operator=(std::move(t));
46 template <
typename T,
int Dim,
bool InnerContig,
47 typename IndexT,
template <
typename U>
class PtrTraits>
53 for (
int i = 0; i < Dim; ++i) {
54 size_[i] = t.
size_[i];
61 template <
typename T,
int Dim,
bool InnerContig,
62 typename IndexT,
template <
typename U>
class PtrTraits>
67 data_ = t.data_; t.data_ =
nullptr;
68 for (
int i = 0; i < Dim; ++i) {
69 stride_[i] = t.stride_[i]; t.stride_[i] = 0;
70 size_[i] = t.size_[i]; t.size_[i] = 0;
76 template <
typename T,
int Dim,
bool InnerContig,
77 typename IndexT,
template <
typename U>
class PtrTraits>
80 Tensor(DataPtrType data,
const IndexT sizes[Dim])
82 static_assert(Dim > 0,
"must have > 0 dimensions");
84 for (
int i = 0; i < Dim; ++i) {
89 for (
int i = Dim - 2; i >= 0; --i) {
94 template <
typename T,
int Dim,
bool InnerContig,
95 typename IndexT,
template <
typename U>
class PtrTraits>
98 Tensor(DataPtrType data, std::initializer_list<IndexT> sizes)
100 GPU_FAISS_ASSERT(sizes.size() == Dim);
101 static_assert(Dim > 0,
"must have > 0 dimensions");
104 for (
auto s : sizes) {
109 for (
int j = Dim - 2; j >= 0; --j) {
115 template <
typename T,
int Dim,
bool InnerContig,
116 typename IndexT,
template <
typename U>
class PtrTraits>
119 DataPtrType data,
const IndexT sizes[Dim],
const IndexT strides[Dim])
121 static_assert(Dim > 0,
"must have > 0 dimensions");
123 for (
int i = 0; i < Dim; ++i) {
129 template <
typename T,
int Dim,
bool InnerContig,
130 typename IndexT,
template <
typename U>
class PtrTraits>
134 cudaStream_t stream) {
136 GPU_FAISS_ASSERT(this->isContiguous());
141 GPU_FAISS_ASSERT(this->numElements() == t.
numElements());
144 GPU_FAISS_ASSERT(this->data_);
145 GPU_FAISS_ASSERT(t.
data());
147 int ourDev = getDeviceForAddress(this->data_);
148 int tDev = getDeviceForAddress(t.
data());
151 CUDA_VERIFY(cudaMemcpyAsync(this->data_,
153 this->getSizeInBytes(),
154 ourDev == -1 ? cudaMemcpyHostToHost :
155 cudaMemcpyHostToDevice,
158 CUDA_VERIFY(cudaMemcpyAsync(this->data_,
160 this->getSizeInBytes(),
161 ourDev == -1 ? cudaMemcpyDeviceToHost :
162 cudaMemcpyDeviceToDevice,
168 template <
typename T,
int Dim,
bool InnerContig,
169 typename IndexT,
template <
typename U>
class PtrTraits>
173 cudaStream_t stream) {
175 GPU_FAISS_ASSERT(this->isContiguous());
180 GPU_FAISS_ASSERT(this->numElements() == t.
numElements());
183 GPU_FAISS_ASSERT(this->data_);
184 GPU_FAISS_ASSERT(t.
data());
186 int ourDev = getDeviceForAddress(this->data_);
187 int tDev = getDeviceForAddress(t.
data());
190 CUDA_VERIFY(cudaMemcpyAsync(t.
data(),
192 this->getSizeInBytes(),
193 ourDev == -1 ? cudaMemcpyHostToHost :
194 cudaMemcpyDeviceToHost,
197 CUDA_VERIFY(cudaMemcpyAsync(t.
data(),
199 this->getSizeInBytes(),
200 ourDev == -1 ? cudaMemcpyHostToDevice :
201 cudaMemcpyDeviceToDevice,
207 template <
typename T,
int Dim,
bool InnerContig,
208 typename IndexT,
template <
typename U>
class PtrTraits>
209 template <
typename OtherT,
int OtherDim>
210 __host__ __device__
bool
213 if (Dim != OtherDim) {
217 for (
int i = 0; i < Dim; ++i) {
218 if (this->getSize(i) != rhs.
getSize(i)) {
222 if (this->getStride(i) != rhs.
getStride(i)) {
230 template <
typename T,
int Dim,
bool InnerContig,
231 typename IndexT,
template <
typename U>
class PtrTraits>
232 template <
typename OtherT,
int OtherDim>
233 __host__ __device__
bool
236 if (Dim != OtherDim) {
240 for (
int i = 0; i < Dim; ++i) {
241 if (this->getSize(i) != rhs.
getSize(i)) {
249 template <
typename T,
int Dim,
bool InnerContig,
250 typename IndexT,
template <
typename U>
class PtrTraits>
251 template <
typename U>
254 static_assert(
sizeof(U) ==
sizeof(T),
"cast must be to same size object");
257 reinterpret_cast<U*
>(data_), size_, stride_);
260 template <
typename T,
int Dim,
bool InnerContig,
261 typename IndexT,
template <
typename U>
class PtrTraits>
262 template <
typename U>
265 static_assert(
sizeof(U) ==
sizeof(T),
"cast must be to same size object");
268 reinterpret_cast<U*
>(data_), size_, stride_);
271 template <
typename T,
int Dim,
bool InnerContig,
272 typename IndexT,
template <
typename U>
class PtrTraits>
273 template <
typename U>
276 static_assert(
sizeof(U) >=
sizeof(T),
"only handles greater sizes");
277 constexpr
int kMultiple =
sizeof(U) /
sizeof(T);
279 GPU_FAISS_ASSERT(canCastResize<U>());
282 IndexT newStride[Dim];
284 for (
int i = 0; i < Dim - 1; ++i) {
285 newSize[i] = size_[i];
286 newStride[i] = stride_[i] / kMultiple;
289 newStride[Dim - 1] = 1;
290 newSize[Dim - 1] = size_[Dim - 1] / kMultiple;
293 reinterpret_cast<U*
>(data_), newSize, newStride);
296 template <
typename T,
int Dim,
bool InnerContig,
297 typename IndexT,
template <
typename U>
class PtrTraits>
298 template <
typename U>
305 template <
typename T,
int Dim,
bool InnerContig,
306 typename IndexT,
template <
typename U>
class PtrTraits>
307 template <
typename U>
308 __host__ __device__
bool
310 static_assert(
sizeof(U) >=
sizeof(T),
"only handles greater sizes");
311 constexpr
int kMultiple =
sizeof(U) /
sizeof(T);
314 if (((uintptr_t) data_) %
sizeof(U) != 0) {
319 for (
int i = 0; i < Dim - 1; ++i) {
320 if (stride_[i] % kMultiple != 0) {
326 if (size_[Dim - 1] % kMultiple != 0) {
330 if (stride_[Dim - 1] != 1) {
337 template <
typename T,
int Dim,
bool InnerContig,
338 typename IndexT,
template <
typename U>
class PtrTraits>
339 template <
typename NewIndexT>
342 if (
sizeof(NewIndexT) <
sizeof(IndexT)) {
343 GPU_FAISS_ASSERT(this->canUseIndexType<NewIndexT>());
346 NewIndexT newSize[Dim];
347 NewIndexT newStride[Dim];
348 for (
int i = 0; i < Dim; ++i) {
349 newSize[i] = (NewIndexT) size_[i];
350 newStride[i] = (NewIndexT) stride_[i];
354 data_, newSize, newStride);
357 template <
typename T,
int Dim,
bool InnerContig,
358 typename IndexT,
template <
typename U>
class PtrTraits>
359 template <
typename NewIndexT>
362 static_assert(
sizeof(
size_t) >=
sizeof(IndexT),
363 "index size too large");
364 static_assert(
sizeof(
size_t) >=
sizeof(NewIndexT),
365 "new index size too large");
369 size_t maxOffset = 0;
371 for (
int i = 0; i < Dim; ++i) {
372 size_t curMaxOffset = (size_t) size_[i] * (
size_t) stride_[i];
373 if (curMaxOffset > maxOffset) {
374 maxOffset = curMaxOffset;
378 if (maxOffset > (
size_t) std::numeric_limits<NewIndexT>::max()) {
385 template <
typename T,
int Dim,
bool InnerContig,
386 typename IndexT,
template <
typename U>
class PtrTraits>
387 __host__ __device__
size_t
389 size_t size = (size_t) getSize(0);
391 for (
int i = 1; i < Dim; ++i) {
392 size *= (size_t) getSize(i);
398 template <
typename T,
int Dim,
bool InnerContig,
399 typename IndexT,
template <
typename U>
class PtrTraits>
400 __host__ __device__
bool
404 for (
int i = Dim - 1; i >= 0; --i) {
405 if (getSize(i) != (IndexT) 1) {
406 if (getStride(i) == prevSize) {
407 prevSize *= getSize(i);
417 template <
typename T,
int Dim,
bool InnerContig,
418 typename IndexT,
template <
typename U>
class PtrTraits>
419 __host__ __device__
bool
421 if (i == 0 && getStride(i) > 0 && getSize(i) > 0) {
423 }
else if ((i > 0) && (i < Dim) && (getStride(i) > 0) &&
424 ((getStride(i - 1) / getStride(i)) >= getSize(i))) {
431 template <
typename T,
int Dim,
bool InnerContig,
432 typename IndexT,
template <
typename U>
class PtrTraits>
433 __host__ __device__
bool
435 for (
int i = 0; i < Dim; ++i) {
436 if (!isConsistentlySized(i)) {
444 template <
typename T,
int Dim,
bool InnerContig,
445 typename IndexT,
template <
typename U>
class PtrTraits>
446 __host__ __device__
bool
448 return (i == Dim - 1) ||
450 ((getStride(i) / getStride(i + 1)) == getSize(i + 1)));
453 template <
typename T,
int Dim,
bool InnerContig,
454 typename IndexT,
template <
typename U>
class PtrTraits>
458 GPU_FAISS_ASSERT(dim1 >= 0 && dim1 < Dim);
459 GPU_FAISS_ASSERT(dim1 >= 0 && dim2 < Dim);
464 GPU_FAISS_ASSERT(dim1 != Dim - 1 && dim2 != Dim - 1);
468 IndexT newStride[Dim];
470 for (
int i = 0; i < Dim; ++i) {
471 newSize[i] = size_[i];
472 newStride[i] = stride_[i];
475 IndexT tmp = newSize[dim1];
476 newSize[dim1] = newSize[dim2];
479 tmp = newStride[dim1];
480 newStride[dim1] = newStride[dim2];
481 newStride[dim2] = tmp;
486 template <
typename T,
int Dim,
bool InnerContig,
487 typename IndexT,
template <
typename U>
class PtrTraits>
488 template <
int NewDim>
492 static_assert(NewDim > Dim,
"Can only upcast to greater dim");
494 IndexT newSize[NewDim];
495 IndexT newStride[NewDim];
497 int shift = NewDim - Dim;
499 for (
int i = 0; i < NewDim; ++i) {
502 newSize[i] = (IndexT) 1;
503 newStride[i] = size_[0] * stride_[0];
506 newSize[i] = size_[i - shift];
507 newStride[i] = stride_[i - shift];
512 data_, newSize, newStride);
515 template <
typename T,
int Dim,
bool InnerContig,
516 typename IndexT,
template <
typename U>
class PtrTraits>
517 template <
int NewDim>
521 static_assert(NewDim > Dim,
"Can only upcast to greater dim");
523 IndexT newSize[NewDim];
524 IndexT newStride[NewDim];
526 for (
int i = 0; i < NewDim; ++i) {
529 newSize[i] = size_[i];
530 newStride[i] = stride_[i];
533 newSize[i] = (IndexT) 1;
534 newStride[i] = (IndexT) 1;
539 data_, newSize, newStride);
542 template <
typename T,
int Dim,
bool InnerContig,
543 typename IndexT,
template <
typename U>
class PtrTraits>
544 template <
int NewDim>
548 static_assert(NewDim < Dim,
"Can only downcast to lesser dim");
554 for (
int i = 0; i < Dim - NewDim; ++i) {
555 bool cont = isContiguousDim(i);
556 GPU_FAISS_ASSERT(cont);
559 IndexT newSize[NewDim];
560 IndexT newStride[NewDim];
562 int ignoredDims = Dim - NewDim;
563 IndexT collapsedSize = 1;
565 for (
int i = 0; i < Dim; ++i) {
566 if (i < ignoredDims) {
568 collapsedSize *= getSize(i);
571 if (i == ignoredDims) {
573 newSize[i - ignoredDims] = collapsedSize * getSize(i);
576 newSize[i - ignoredDims] = getSize(i);
579 newStride[i - ignoredDims] = getStride(i);
584 data_, newSize, newStride);
587 template <
typename T,
int Dim,
bool InnerContig,
588 typename IndexT,
template <
typename U>
class PtrTraits>
589 template <
int NewDim>
593 static_assert(NewDim < Dim,
"Can only downcast to lesser dim");
599 for (
int i = NewDim; i < Dim; ++i) {
600 GPU_FAISS_ASSERT(isContiguousDim(i));
603 IndexT newSize[NewDim];
604 IndexT newStride[NewDim];
606 IndexT collapsedSize = 1;
608 for (
int i = Dim - 1; i >= 0; --i) {
611 collapsedSize *= getSize(i);
614 if (i == NewDim - 1) {
616 newSize[i] = collapsedSize * getSize(i);
617 newStride[i] = getStride(Dim - 1);
620 newSize[i] = getSize(i);
621 newStride[i] = getStride(i);
627 data_, newSize, newStride);
630 template <
typename T,
int Dim,
bool InnerContig,
631 typename IndexT,
template <
typename U>
class PtrTraits>
632 template <
int SubDim>
635 static_assert(SubDim >= 1 && SubDim < Dim,
636 "can only create view of lesser dim");
638 IndexT viewSizes[SubDim];
639 IndexT viewStrides[SubDim];
641 for (
int i = 0; i < SubDim; ++i) {
642 viewSizes[i] = size_[Dim - SubDim + i];
643 viewStrides[i] = stride_[Dim - SubDim + i];
647 at, viewSizes, viewStrides);
650 template <
typename T,
int Dim,
bool InnerContig,
651 typename IndexT,
template <
typename U>
class PtrTraits>
652 template <
int SubDim>
655 return view<SubDim>(data_);
658 template <
typename T,
int Dim,
bool InnerContig,
659 typename IndexT,
template <
typename U>
class PtrTraits>
663 return this->narrow(0, start, size);
666 template <
typename T,
int Dim,
bool InnerContig,
667 typename IndexT,
template <
typename U>
class PtrTraits>
672 DataPtrType newData = data_;
674 GPU_FAISS_ASSERT(start >= 0 &&
675 start < size_[dim] &&
676 (start + size) <= size_[dim]);
679 newData += (size_t) start * stride_[dim];
683 for (
int i = 0; i < Dim; ++i) {
685 GPU_FAISS_ASSERT(start + size <= size_[dim]);
688 newSize[i] = size_[i];
696 template <
typename T,
int Dim,
bool InnerContig,
697 typename IndexT,
template <
typename U>
class PtrTraits>
698 template <
int NewDim>
701 std::initializer_list<IndexT> sizes) {
702 GPU_FAISS_ASSERT(this->isContiguous());
704 GPU_FAISS_ASSERT(sizes.size() == NewDim);
708 size_t curSize = numElements();
711 for (
auto s : sizes) {
715 GPU_FAISS_ASSERT(curSize == newSize);
__host__ __device__ Tensor< T, NewDim, InnerContig, IndexT, PtrTraits > upcastOuter()
__host__ Tensor< T, Dim, InnerContig, NewIndexT, PtrTraits > castIndexType() const
__host__ __device__ bool isContiguousDim(int i) const
Returns true if the given dimension index has no padding.
__host__ __device__ Tensor< U, Dim, InnerContig, IndexT, PtrTraits > cast()
__host__ __device__ size_t numElements() const
__host__ __device__ Tensor< T, NewDim, InnerContig, IndexT, PtrTraits > downcastOuter()
__host__ __device__ bool canCastResize() const
Returns true if we can castResize() this tensor to the new type.
DataPtrType data_
Raw pointer to where the tensor data begins.
__host__ __device__ Tensor()
Default constructor.
__host__ __device__ Tensor< T, NewDim, InnerContig, IndexT, PtrTraits > upcastInner()
__host__ __device__ Tensor< T, Dim, InnerContig, IndexT, PtrTraits > narrowOutermost(IndexT start, IndexT size)
IndexT stride_[Dim]
Array of strides (in sizeof(T) terms) per each dimension.
__host__ __device__ bool isContiguous() const
__host__ __device__ const IndexT * sizes() const
Returns the size array.
__host__ void copyFrom(Tensor< T, Dim, InnerContig, IndexT, PtrTraits > &t, cudaStream_t stream)
Copies a tensor into ourselves; sizes must match.
IndexT size_[Dim]
Size per each dimension.
__host__ __device__ Tensor< T, Dim, InnerContig, IndexT, PtrTraits > & operator=(Tensor< T, Dim, InnerContig, IndexT, PtrTraits > &t)
Assignment.
__host__ __device__ const IndexT * strides() const
Returns the stride array.
__host__ __device__ IndexT getSize(int i) const
__host__ __device__ bool isSameSize(const Tensor< OtherT, OtherDim, InnerContig, IndexT, PtrTraits > &rhs) const
Returns true if the two tensors are of the same dimensionality and size.
__host__ __device__ Tensor< T, NewDim, InnerContig, IndexT, PtrTraits > downcastInner()
__host__ __device__ Tensor< T, Dim, InnerContig, IndexT, PtrTraits > narrow(int dim, IndexT start, IndexT size)
__host__ __device__ DataPtrType data()
Returns a raw pointer to the start of our data.
__host__ void copyTo(Tensor< T, Dim, InnerContig, IndexT, PtrTraits > &t, cudaStream_t stream)
Copies ourselves into a tensor; sizes must match.
__host__ bool canUseIndexType() const
__host__ __device__ Tensor< T, Dim, InnerContig, IndexT, PtrTraits > transpose(int dim1, int dim2) const
__host__ __device__ IndexT getStride(int i) const
__host__ __device__ Tensor< U, Dim, InnerContig, IndexT, PtrTraits > castResize()
__host__ __device__ Tensor< T, SubDim, InnerContig, IndexT, PtrTraits > view()
__host__ __device__ bool isSame(const Tensor< OtherT, OtherDim, InnerContig, IndexT, PtrTraits > &rhs) const