11 #include "../GpuFaissAssert.h"
12 #include "DeviceUtils.h"
15 namespace faiss {
namespace gpu {
17 template <
typename T,
int Dim,
bool InnerContig,
18 typename IndexT,
template <
typename U>
class PtrTraits>
22 static_assert(Dim > 0,
"must have > 0 dimensions");
24 for (
int i = 0; i < Dim; ++i) {
30 template <
typename T,
int Dim,
bool InnerContig,
31 typename IndexT,
template <
typename U>
class PtrTraits>
36 data_ = t.data_; t.data_ =
nullptr;
37 for (
int i = 0; i < Dim; ++i) {
38 stride_[i] = t.stride_[i]; t.stride_[i] = 0;
39 size_[i] = t.size_[i]; t.size_[i] = 0;
45 template <
typename T,
int Dim,
bool InnerContig,
46 typename IndexT,
template <
typename U>
class PtrTraits>
49 Tensor(DataPtrType data,
const IndexT sizes[Dim])
51 static_assert(Dim > 0,
"must have > 0 dimensions");
53 for (
int i = 0; i < Dim; ++i) {
58 for (
int i = Dim - 2; i >= 0; --i) {
63 template <
typename T,
int Dim,
bool InnerContig,
64 typename IndexT,
template <
typename U>
class PtrTraits>
67 Tensor(DataPtrType data, std::initializer_list<IndexT> sizes)
69 GPU_FAISS_ASSERT(sizes.size() == Dim);
70 static_assert(Dim > 0,
"must have > 0 dimensions");
73 for (
auto s : sizes) {
78 for (
int j = Dim - 2; j >= 0; --j) {
84 template <
typename T,
int Dim,
bool InnerContig,
85 typename IndexT,
template <
typename U>
class PtrTraits>
88 DataPtrType data,
const IndexT sizes[Dim],
const IndexT strides[Dim])
90 static_assert(Dim > 0,
"must have > 0 dimensions");
92 for (
int i = 0; i < Dim; ++i) {
98 template <
typename T,
int Dim,
bool InnerContig,
99 typename IndexT,
template <
typename U>
class PtrTraits>
103 cudaStream_t stream) {
105 GPU_FAISS_ASSERT(this->isContiguous());
110 GPU_FAISS_ASSERT(this->numElements() == t.
numElements());
113 GPU_FAISS_ASSERT(this->data_);
114 GPU_FAISS_ASSERT(t.
data());
116 int ourDev = getDeviceForAddress(this->data_);
117 int tDev = getDeviceForAddress(t.
data());
120 CUDA_VERIFY(cudaMemcpyAsync(this->data_,
122 this->getSizeInBytes(),
123 ourDev == -1 ? cudaMemcpyHostToHost :
124 cudaMemcpyHostToDevice,
127 CUDA_VERIFY(cudaMemcpyAsync(this->data_,
129 this->getSizeInBytes(),
130 ourDev == -1 ? cudaMemcpyDeviceToHost :
131 cudaMemcpyDeviceToDevice,
137 template <
typename T,
int Dim,
bool InnerContig,
138 typename IndexT,
template <
typename U>
class PtrTraits>
142 cudaStream_t stream) {
144 GPU_FAISS_ASSERT(this->isContiguous());
149 GPU_FAISS_ASSERT(this->numElements() == t.
numElements());
152 GPU_FAISS_ASSERT(this->data_);
153 GPU_FAISS_ASSERT(t.
data());
155 int ourDev = getDeviceForAddress(this->data_);
156 int tDev = getDeviceForAddress(t.
data());
159 CUDA_VERIFY(cudaMemcpyAsync(t.
data(),
161 this->getSizeInBytes(),
162 ourDev == -1 ? cudaMemcpyHostToHost :
163 cudaMemcpyDeviceToHost,
166 CUDA_VERIFY(cudaMemcpyAsync(t.
data(),
168 this->getSizeInBytes(),
169 ourDev == -1 ? cudaMemcpyHostToDevice :
170 cudaMemcpyDeviceToDevice,
176 template <
typename T,
int Dim,
bool InnerContig,
177 typename IndexT,
template <
typename U>
class PtrTraits>
178 template <
typename OtherT,
int OtherDim>
179 __host__ __device__
bool
182 if (Dim != OtherDim) {
186 for (
int i = 0; i < Dim; ++i) {
187 if (this->getSize(i) != rhs.
getSize(i)) {
191 if (this->getStride(i) != rhs.
getStride(i)) {
199 template <
typename T,
int Dim,
bool InnerContig,
200 typename IndexT,
template <
typename U>
class PtrTraits>
201 template <
typename OtherT,
int OtherDim>
202 __host__ __device__
bool
205 if (Dim != OtherDim) {
209 for (
int i = 0; i < Dim; ++i) {
210 if (this->getSize(i) != rhs.
getSize(i)) {
218 template <
typename T,
int Dim,
bool InnerContig,
219 typename IndexT,
template <
typename U>
class PtrTraits>
220 template <
typename U>
223 static_assert(
sizeof(U) ==
sizeof(T),
"cast must be to same size object");
226 reinterpret_cast<U*
>(data_), size_, stride_);
229 template <
typename T,
int Dim,
bool InnerContig,
230 typename IndexT,
template <
typename U>
class PtrTraits>
231 template <
typename U>
234 static_assert(
sizeof(U) ==
sizeof(T),
"cast must be to same size object");
237 reinterpret_cast<U*
>(data_), size_, stride_);
240 template <
typename T,
int Dim,
bool InnerContig,
241 typename IndexT,
template <
typename U>
class PtrTraits>
242 template <
typename U>
245 static_assert(
sizeof(U) >=
sizeof(T),
"only handles greater sizes");
246 constexpr
int kMultiple =
sizeof(U) /
sizeof(T);
248 GPU_FAISS_ASSERT(canCastResize<U>());
251 IndexT newStride[Dim];
253 for (
int i = 0; i < Dim - 1; ++i) {
254 newSize[i] = size_[i];
255 newStride[i] = stride_[i] / kMultiple;
258 newStride[Dim - 1] = 1;
259 newSize[Dim - 1] = size_[Dim - 1] / kMultiple;
262 reinterpret_cast<U*
>(data_), newSize, newStride);
265 template <
typename T,
int Dim,
bool InnerContig,
266 typename IndexT,
template <
typename U>
class PtrTraits>
267 template <
typename U>
274 template <
typename T,
int Dim,
bool InnerContig,
275 typename IndexT,
template <
typename U>
class PtrTraits>
276 template <
typename U>
277 __host__ __device__
bool
279 static_assert(
sizeof(U) >=
sizeof(T),
"only handles greater sizes");
280 constexpr
int kMultiple =
sizeof(U) /
sizeof(T);
283 for (
int i = 0; i < Dim - 1; ++i) {
284 if (stride_[i] % kMultiple != 0) {
290 if (size_[Dim - 1] % kMultiple != 0) {
294 if (stride_[Dim - 1] != 1) {
301 template <
typename T,
int Dim,
bool InnerContig,
302 typename IndexT,
template <
typename U>
class PtrTraits>
303 template <
typename NewIndexT>
306 if (
sizeof(NewIndexT) <
sizeof(IndexT)) {
307 GPU_FAISS_ASSERT(this->canUseIndexType<NewIndexT>());
310 NewIndexT newSize[Dim];
311 NewIndexT newStride[Dim];
312 for (
int i = 0; i < Dim; ++i) {
313 newSize[i] = (NewIndexT) size_[i];
314 newStride[i] = (NewIndexT) stride_[i];
318 data_, newSize, newStride);
321 template <
typename T,
int Dim,
bool InnerContig,
322 typename IndexT,
template <
typename U>
class PtrTraits>
323 template <
typename NewIndexT>
326 static_assert(
sizeof(
size_t) >=
sizeof(IndexT),
327 "index size too large");
328 static_assert(
sizeof(
size_t) >=
sizeof(NewIndexT),
329 "new index size too large");
333 size_t maxOffset = 0;
335 for (
int i = 0; i < Dim; ++i) {
336 size_t curMaxOffset = (size_t) size_[i] * (
size_t) stride_[i];
337 if (curMaxOffset > maxOffset) {
338 maxOffset = curMaxOffset;
342 if (maxOffset > (
size_t) std::numeric_limits<NewIndexT>::max()) {
349 template <
typename T,
int Dim,
bool InnerContig,
350 typename IndexT,
template <
typename U>
class PtrTraits>
351 __host__ __device__
size_t
353 size_t size = (size_t) getSize(0);
355 for (
int i = 1; i < Dim; ++i) {
356 size *= (size_t) getSize(i);
362 template <
typename T,
int Dim,
bool InnerContig,
363 typename IndexT,
template <
typename U>
class PtrTraits>
364 __host__ __device__
bool
368 for (
int i = Dim - 1; i >= 0; --i) {
369 if (getSize(i) != (IndexT) 1) {
370 if (getStride(i) == prevSize) {
371 prevSize *= getSize(i);
381 template <
typename T,
int Dim,
bool InnerContig,
382 typename IndexT,
template <
typename U>
class PtrTraits>
383 __host__ __device__
bool
385 if (i == 0 && getStride(i) > 0 && getSize(i) > 0) {
387 }
else if ((i > 0) && (i < Dim) && (getStride(i) > 0) &&
388 ((getStride(i - 1) / getStride(i)) >= getSize(i))) {
395 template <
typename T,
int Dim,
bool InnerContig,
396 typename IndexT,
template <
typename U>
class PtrTraits>
397 __host__ __device__
bool
399 for (
int i = 0; i < Dim; ++i) {
400 if (!isConsistentlySized(i)) {
408 template <
typename T,
int Dim,
bool InnerContig,
409 typename IndexT,
template <
typename U>
class PtrTraits>
410 __host__ __device__
bool
412 return (i == Dim - 1) ||
414 ((getStride(i) / getStride(i + 1)) == getSize(i + 1)));
417 template <
typename T,
int Dim,
bool InnerContig,
418 typename IndexT,
template <
typename U>
class PtrTraits>
422 GPU_FAISS_ASSERT(dim1 >= 0 && dim1 < Dim);
423 GPU_FAISS_ASSERT(dim1 >= 0 && dim2 < Dim);
428 GPU_FAISS_ASSERT(dim1 != Dim - 1 && dim2 != Dim - 1);
432 IndexT newStride[Dim];
434 for (
int i = 0; i < Dim; ++i) {
435 newSize[i] = size_[i];
436 newStride[i] = stride_[i];
439 IndexT tmp = newSize[dim1];
440 newSize[dim1] = newSize[dim2];
443 tmp = newStride[dim1];
444 newStride[dim1] = newStride[dim2];
445 newStride[dim2] = tmp;
450 template <
typename T,
int Dim,
bool InnerContig,
451 typename IndexT,
template <
typename U>
class PtrTraits>
452 template <
int NewDim>
456 static_assert(NewDim > Dim,
"Can only upcast to greater dim");
458 IndexT newSize[NewDim];
459 IndexT newStride[NewDim];
461 int shift = NewDim - Dim;
463 for (
int i = 0; i < NewDim; ++i) {
466 newSize[i] = (IndexT) 1;
467 newStride[i] = size_[0] * stride_[0];
470 newSize[i] = size_[i - shift];
471 newStride[i] = stride_[i - shift];
476 data_, newSize, newStride);
479 template <
typename T,
int Dim,
bool InnerContig,
480 typename IndexT,
template <
typename U>
class PtrTraits>
481 template <
int NewDim>
485 static_assert(NewDim > Dim,
"Can only upcast to greater dim");
487 IndexT newSize[NewDim];
488 IndexT newStride[NewDim];
490 for (
int i = 0; i < NewDim; ++i) {
493 newSize[i] = size_[i];
494 newStride[i] = stride_[i];
497 newSize[i] = (IndexT) 1;
498 newStride[i] = (IndexT) 1;
503 data_, newSize, newStride);
506 template <
typename T,
int Dim,
bool InnerContig,
507 typename IndexT,
template <
typename U>
class PtrTraits>
508 template <
int NewDim>
512 static_assert(NewDim < Dim,
"Can only downcast to lesser dim");
518 for (
int i = 0; i < Dim - NewDim; ++i) {
519 bool cont = isContiguousDim(i);
520 GPU_FAISS_ASSERT(cont);
523 IndexT newSize[NewDim];
524 IndexT newStride[NewDim];
526 int ignoredDims = Dim - NewDim;
527 IndexT collapsedSize = 1;
529 for (
int i = 0; i < Dim; ++i) {
530 if (i < ignoredDims) {
532 collapsedSize *= getSize(i);
535 if (i == ignoredDims) {
537 newSize[i - ignoredDims] = collapsedSize * getSize(i);
540 newSize[i - ignoredDims] = getSize(i);
543 newStride[i - ignoredDims] = getStride(i);
548 data_, newSize, newStride);
551 template <
typename T,
int Dim,
bool InnerContig,
552 typename IndexT,
template <
typename U>
class PtrTraits>
553 template <
int NewDim>
557 static_assert(NewDim < Dim,
"Can only downcast to lesser dim");
563 for (
int i = NewDim; i < Dim; ++i) {
564 GPU_FAISS_ASSERT(isContiguousDim(i));
567 IndexT newSize[NewDim];
568 IndexT newStride[NewDim];
570 IndexT collapsedSize = 1;
572 for (
int i = Dim - 1; i >= 0; --i) {
575 collapsedSize *= getSize(i);
578 if (i == NewDim - 1) {
580 newSize[i] = collapsedSize * getSize(i);
581 newStride[i] = getStride(Dim - 1);
584 newSize[i] = getSize(i);
585 newStride[i] = getStride(i);
591 data_, newSize, newStride);
594 template <
typename T,
int Dim,
bool InnerContig,
595 typename IndexT,
template <
typename U>
class PtrTraits>
596 template <
int SubDim>
599 static_assert(SubDim >= 1 && SubDim < Dim,
600 "can only create view of lesser dim");
602 IndexT viewSizes[SubDim];
603 IndexT viewStrides[SubDim];
605 for (
int i = 0; i < SubDim; ++i) {
606 viewSizes[i] = size_[Dim - SubDim + i];
607 viewStrides[i] = stride_[Dim - SubDim + i];
611 at, viewSizes, viewStrides);
614 template <
typename T,
int Dim,
bool InnerContig,
615 typename IndexT,
template <
typename U>
class PtrTraits>
616 template <
int SubDim>
619 return view<SubDim>(data_);
622 template <
typename T,
int Dim,
bool InnerContig,
623 typename IndexT,
template <
typename U>
class PtrTraits>
627 return this->narrow(0, start, size);
630 template <
typename T,
int Dim,
bool InnerContig,
631 typename IndexT,
template <
typename U>
class PtrTraits>
636 DataPtrType newData = data_;
638 GPU_FAISS_ASSERT(start >= 0 &&
639 start < size_[dim] &&
640 (start + size) <= size_[dim]);
643 newData += (size_t) start * stride_[dim];
647 for (
int i = 0; i < Dim; ++i) {
649 GPU_FAISS_ASSERT(start + size <= size_[dim]);
652 newSize[i] = size_[i];
660 template <
typename T,
int Dim,
bool InnerContig,
661 typename IndexT,
template <
typename U>
class PtrTraits>
662 template <
int NewDim>
665 std::initializer_list<IndexT> sizes) {
666 GPU_FAISS_ASSERT(this->isContiguous());
668 GPU_FAISS_ASSERT(sizes.size() == NewDim);
672 size_t curSize = numElements();
675 for (
auto s : sizes) {
679 GPU_FAISS_ASSERT(curSize == newSize);
__host__ __device__ Tensor< T, NewDim, InnerContig, IndexT, PtrTraits > upcastOuter()
__host__ Tensor< T, Dim, InnerContig, NewIndexT, PtrTraits > castIndexType() const
__host__ __device__ bool isContiguousDim(int i) const
Returns true if the given dimension index has no padding.
__host__ __device__ Tensor< U, Dim, InnerContig, IndexT, PtrTraits > cast()
__host__ __device__ size_t numElements() const
__host__ __device__ Tensor< T, NewDim, InnerContig, IndexT, PtrTraits > downcastOuter()
__host__ __device__ bool canCastResize() const
Returns true if we can castResize() this tensor to the new type.
__host__ __device__ Tensor()
Default constructor.
__host__ __device__ Tensor< T, NewDim, InnerContig, IndexT, PtrTraits > upcastInner()
__host__ __device__ Tensor< T, Dim, InnerContig, IndexT, PtrTraits > narrowOutermost(IndexT start, IndexT size)
IndexT stride_[Dim]
Array of strides (in sizeof(T) terms) per each dimension.
__host__ __device__ bool isContiguous() const
__host__ __device__ const IndexT * sizes() const
Returns the size array.
__host__ void copyFrom(Tensor< T, Dim, InnerContig, IndexT, PtrTraits > &t, cudaStream_t stream)
Copies a tensor into ourselves; sizes must match.
IndexT size_[Dim]
Size per each dimension.
__host__ __device__ const IndexT * strides() const
Returns the stride array.
__host__ __device__ IndexT getSize(int i) const
__host__ __device__ bool isSameSize(const Tensor< OtherT, OtherDim, InnerContig, IndexT, PtrTraits > &rhs) const
Returns true if the two tensors are of the same dimensionality and size.
__host__ __device__ Tensor< T, NewDim, InnerContig, IndexT, PtrTraits > downcastInner()
__host__ __device__ Tensor< T, Dim, InnerContig, IndexT, PtrTraits > narrow(int dim, IndexT start, IndexT size)
__host__ __device__ DataPtrType data()
Returns a raw pointer to the start of our data.
__host__ void copyTo(Tensor< T, Dim, InnerContig, IndexT, PtrTraits > &t, cudaStream_t stream)
Copies ourselves into a tensor; sizes must match.
__host__ bool canUseIndexType() const
__host__ __device__ Tensor< T, Dim, InnerContig, IndexT, PtrTraits > transpose(int dim1, int dim2) const
__host__ __device__ IndexT getStride(int i) const
__host__ __device__ Tensor< T, Dim, InnerContig, IndexT, PtrTraits > & operator=(Tensor< T, Dim, InnerContig, IndexT, PtrTraits > &t)=default
Assignment.
__host__ __device__ Tensor< U, Dim, InnerContig, IndexT, PtrTraits > castResize()
__host__ __device__ Tensor< T, SubDim, InnerContig, IndexT, PtrTraits > view()
__host__ __device__ bool isSame(const Tensor< OtherT, OtherDim, InnerContig, IndexT, PtrTraits > &rhs) const