12 #include "../../FaissAssert.h"
13 #include "DeviceUtils.h"
16 namespace faiss {
namespace gpu {
18 template <
typename T,
int Dim,
bool Contig,
19 typename IndexT,
template <
typename U>
class PtrTraits>
23 static_assert(Dim > 0,
"must have > 0 dimensions");
25 for (
int i = 0; i < Dim; ++i) {
31 template <
typename T,
int Dim,
bool Contig,
32 typename IndexT,
template <
typename U>
class PtrTraits>
37 data_ = t.data_; t.data_ =
nullptr;
38 for (
int i = 0; i < Dim; ++i) {
39 stride_[i] = t.stride_[i]; t.stride_[i] = 0;
40 size_[i] = t.size_[i]; t.size_[i] = 0;
46 template <
typename T,
int Dim,
bool Contig,
47 typename IndexT,
template <
typename U>
class PtrTraits>
50 Tensor(DataPtrType data,
const IndexT sizes[Dim])
52 static_assert(Dim > 0,
"must have > 0 dimensions");
54 for (
int i = 0; i < Dim; ++i) {
59 for (
int i = Dim - 2; i >= 0; --i) {
64 template <
typename T,
int Dim,
bool Contig,
65 typename IndexT,
template <
typename U>
class PtrTraits>
68 Tensor(DataPtrType data, std::initializer_list<IndexT> sizes)
70 assert(sizes.size() == Dim);
71 static_assert(Dim > 0,
"must have > 0 dimensions");
74 for (
auto s : sizes) {
79 for (
int j = Dim - 2; j >= 0; --j) {
85 template <
typename T,
int Dim,
bool Contig,
86 typename IndexT,
template <
typename U>
class PtrTraits>
89 DataPtrType data,
const IndexT sizes[Dim],
const IndexT strides[Dim])
91 static_assert(Dim > 0,
"must have > 0 dimensions");
93 for (
int i = 0; i < Dim; ++i) {
99 template <
typename T,
int Dim,
bool Contig,
100 typename IndexT,
template <
typename U>
class PtrTraits>
104 cudaStream_t stream) {
105 static_assert(Contig,
"only contiguous tensors handled");
110 FAISS_ASSERT(this->numElements() == t.
numElements());
113 FAISS_ASSERT(this->data_);
114 FAISS_ASSERT(t.
data());
116 int ourDev = getDeviceForAddress(this->data_);
117 int tDev = getDeviceForAddress(t.
data());
120 CUDA_VERIFY(cudaMemcpyAsync(this->data_,
122 this->getSizeInBytes(),
123 ourDev == -1 ? cudaMemcpyHostToHost :
124 cudaMemcpyHostToDevice,
127 CUDA_VERIFY(cudaMemcpyAsync(this->data_,
129 this->getSizeInBytes(),
130 ourDev == -1 ? cudaMemcpyDeviceToHost :
131 cudaMemcpyDeviceToDevice,
137 template <
typename T,
int Dim,
bool Contig,
138 typename IndexT,
template <
typename U>
class PtrTraits>
142 cudaStream_t stream) {
143 static_assert(Contig,
"only contiguous tensors handled");
148 FAISS_ASSERT(this->numElements() == t.
numElements());
151 FAISS_ASSERT(this->data_);
152 FAISS_ASSERT(t.
data());
154 int ourDev = getDeviceForAddress(this->data_);
155 int tDev = getDeviceForAddress(t.
data());
158 CUDA_VERIFY(cudaMemcpyAsync(t.
data(),
160 this->getSizeInBytes(),
161 ourDev == -1 ? cudaMemcpyHostToHost :
162 cudaMemcpyDeviceToHost,
165 CUDA_VERIFY(cudaMemcpyAsync(t.
data(),
167 this->getSizeInBytes(),
168 ourDev == -1 ? cudaMemcpyHostToDevice :
169 cudaMemcpyDeviceToDevice,
175 template <
typename T,
int Dim,
bool Contig,
176 typename IndexT,
template <
typename U>
class PtrTraits>
177 template <
int OtherDim>
178 __host__ __device__
bool
181 if (Dim != OtherDim) {
185 for (
int i = 0; i < Dim; ++i) {
186 if (size_[i] != rhs.
size_[i]) {
191 if (stride_[i] != rhs.
stride_[i]) {
200 template <
typename T,
int Dim,
bool Contig,
201 typename IndexT,
template <
typename U>
class PtrTraits>
202 template <
typename U>
205 static_assert(
sizeof(U) ==
sizeof(T),
"cast must be to same size object");
208 reinterpret_cast<U*
>(data_), size_, stride_);
211 template <
typename T,
int Dim,
bool Contig,
212 typename IndexT,
template <
typename U>
class PtrTraits>
213 template <
typename U>
216 static_assert(
sizeof(U) ==
sizeof(T),
"cast must be to same size object");
219 reinterpret_cast<U*
>(data_), size_, stride_);
222 template <
typename T,
int Dim,
bool Contig,
223 typename IndexT,
template <
typename U>
class PtrTraits>
224 template <
typename U>
227 static_assert(
sizeof(U) >=
sizeof(T),
"only handles greater sizes");
228 constexpr
int kMultiple =
sizeof(U) /
sizeof(T);
230 assert(canCastResize<U>());
233 IndexT newStride[Dim];
235 for (
int i = 0; i < Dim - 1; ++i) {
236 newSize[i] = size_[i];
237 newStride[i] = stride_[i] / kMultiple;
240 newStride[Dim - 1] = 1;
241 newSize[Dim - 1] = size_[Dim - 1] / kMultiple;
244 reinterpret_cast<U*
>(data_), newSize, newStride);
247 template <
typename T,
int Dim,
bool Contig,
248 typename IndexT,
template <
typename U>
class PtrTraits>
249 template <
typename U>
256 template <
typename T,
int Dim,
bool Contig,
257 typename IndexT,
template <
typename U>
class PtrTraits>
258 template <
typename U>
259 __host__ __device__
bool
261 static_assert(
sizeof(U) >=
sizeof(T),
"only handles greater sizes");
262 constexpr
int kMultiple =
sizeof(U) /
sizeof(T);
265 for (
int i = 0; i < Dim - 1; ++i) {
266 if (stride_[i] % kMultiple != 0) {
272 if (size_[Dim - 1] % kMultiple != 0) {
276 if (stride_[Dim - 1] != 1) {
283 template <
typename T,
int Dim,
bool Contig,
284 typename IndexT,
template <
typename U>
class PtrTraits>
285 template <
typename NewIndexT>
288 if (
sizeof(NewIndexT) <
sizeof(IndexT)) {
289 assert(this->canCastIndexType<NewIndexT>());
292 NewIndexT newSize[Dim];
293 NewIndexT newStride[Dim];
294 for (
int i = 0; i < Dim; ++i) {
295 newSize[i] = (NewIndexT) size_[i];
296 newStride[i] = (NewIndexT) stride_[i];
300 data_, newSize, newStride);
303 template <
typename T,
int Dim,
bool Contig,
304 typename IndexT,
template <
typename U>
class PtrTraits>
305 template <
typename NewIndexT>
308 static_assert(
sizeof(
size_t) >=
sizeof(IndexT),
309 "index size too large");
310 static_assert(
sizeof(
size_t) >=
sizeof(NewIndexT),
311 "new index size too large");
315 size_t maxOffset = 0;
318 maxOffset = (size_t) size_[0] * (
size_t) stride_[0];
320 for (
int i = 0; i < Dim; ++i) {
321 size_t curMaxOffset = (size_t) size_[i] * (
size_t) stride_[i];
322 if (curMaxOffset > maxOffset) {
323 maxOffset = curMaxOffset;
328 if (maxOffset > (
size_t) std::numeric_limits<NewIndexT>::max()) {
335 template <
typename T,
int Dim,
bool Contig,
336 typename IndexT,
template <
typename U>
class PtrTraits>
337 __host__ __device__ IndexT
339 long size = getSize(0);
341 for (
int i = 1; i < Dim; ++i) {
348 template <
typename T,
int Dim,
bool Contig,
349 typename IndexT,
template <
typename U>
class PtrTraits>
350 __host__ __device__
bool
354 for (
int i = Dim - 1; i >= 0; --i) {
355 if (getSize(i) != (IndexT) 1) {
356 if (getStride(i) == prevSize) {
357 prevSize *= getSize(i);
367 template <
typename T,
int Dim,
bool Contig,
368 typename IndexT,
template <
typename U>
class PtrTraits>
369 __host__ __device__
bool
371 if (i == 0 && getStride(i) > 0 && getSize(i) > 0) {
373 }
else if ((i > 0) && (i < Dim) && (getStride(i) > 0) &&
374 ((getStride(i - 1) / getStride(i)) >= getSize(i))) {
381 template <
typename T,
int Dim,
bool Contig,
382 typename IndexT,
template <
typename U>
class PtrTraits>
383 __host__ __device__
bool
385 for (
int i = 0; i < Dim; ++i) {
386 if (!isConsistentlySized(i)) {
394 template <
typename T,
int Dim,
bool Contig,
395 typename IndexT,
template <
typename U>
class PtrTraits>
396 __host__ __device__
bool
398 return (i == Dim - 1) ||
400 ((getStride(i) / getStride(i + 1)) == getSize(i + 1)));
403 template <
typename T,
int Dim,
bool Contig,
404 typename IndexT,
template <
typename U>
class PtrTraits>
408 assert(dim1 >= 0 && dim1 < Dim);
409 assert(dim1 >= 0 && dim2 < Dim);
410 static_assert(!Contig,
"cannot transpose contiguous arrays");
413 IndexT newStride[Dim];
415 for (
int i = 0; i < Dim; ++i) {
416 newSize[i] = size_[i];
417 newStride[i] = stride_[i];
420 IndexT tmp = newSize[dim1];
421 newSize[dim1] = newSize[dim2];
424 tmp = newStride[dim1];
425 newStride[dim1] = newStride[dim2];
426 newStride[dim2] = tmp;
431 template <
typename T,
int Dim,
bool Contig,
432 typename IndexT,
template <
typename U>
class PtrTraits>
433 template <
int NewDim>
437 static_assert(NewDim > Dim,
"Can only upcast to greater dim");
439 IndexT newSize[NewDim];
440 IndexT newStride[NewDim];
442 int shift = NewDim - Dim;
444 for (
int i = 0; i < NewDim; ++i) {
447 newSize[i] = (IndexT) 1;
448 newStride[i] = size_[0] * stride_[0];
451 newSize[i] = size_[i - shift];
452 newStride[i] = stride_[i - shift];
457 data_, newSize, newStride);
460 template <
typename T,
int Dim,
bool Contig,
461 typename IndexT,
template <
typename U>
class PtrTraits>
462 template <
int NewDim>
466 static_assert(NewDim > Dim,
"Can only upcast to greater dim");
468 IndexT newSize[NewDim];
469 IndexT newStride[NewDim];
471 for (
int i = 0; i < NewDim; ++i) {
474 newSize[i] = size_[i];
475 newStride[i] = stride_[i];
478 newSize[i] = (IndexT) 1;
479 newStride[i] = (IndexT) 1;
484 data_, newSize, newStride);
487 template <
typename T,
int Dim,
bool Contig,
488 typename IndexT,
template <
typename U>
class PtrTraits>
489 template <
int NewDim>
493 static_assert(NewDim < Dim,
"Can only downcast to lesser dim");
499 for (
int i = 0; i < Dim - NewDim; ++i) {
500 bool cont = isContiguousDim(i);
504 IndexT newSize[NewDim];
505 IndexT newStride[NewDim];
507 int ignoredDims = Dim - NewDim;
508 IndexT collapsedSize = 1;
510 for (
int i = 0; i < Dim; ++i) {
511 if (i < ignoredDims) {
513 collapsedSize *= getSize(i);
516 if (i == ignoredDims) {
518 newSize[i - ignoredDims] = collapsedSize * getSize(i);
521 newSize[i - ignoredDims] = getSize(i);
524 newStride[i - ignoredDims] = getStride(i);
529 data_, newSize, newStride);
532 template <
typename T,
int Dim,
bool Contig,
533 typename IndexT,
template <
typename U>
class PtrTraits>
534 template <
int NewDim>
538 static_assert(NewDim < Dim,
"Can only downcast to lesser dim");
544 for (
int i = NewDim; i < Dim; ++i) {
545 assert(isContiguousDim(i));
548 IndexT newSize[NewDim];
549 IndexT newStride[NewDim];
551 IndexT collapsedSize = 1;
553 for (
int i = Dim - 1; i >= 0; --i) {
556 collapsedSize *= getSize(i);
559 if (i == NewDim - 1) {
561 newSize[i] = collapsedSize * getSize(i);
562 newStride[i] = getStride(Dim - 1);
565 newSize[i] = getSize(i);
566 newStride[i] = getStride(i);
572 data_, newSize, newStride);
575 template <
typename T,
int Dim,
bool Contig,
576 typename IndexT,
template <
typename U>
class PtrTraits>
577 template <
int SubDim>
580 static_assert(SubDim >= 1 && SubDim < Dim,
581 "can only create view of lesser dim");
583 IndexT viewSizes[SubDim];
584 IndexT viewStrides[SubDim];
586 for (
int i = 0; i < SubDim; ++i) {
587 viewSizes[i] = size_[Dim - SubDim + i];
588 viewStrides[i] = stride_[Dim - SubDim + i];
592 at, viewSizes, viewStrides);
595 template <
typename T,
int Dim,
bool Contig,
596 typename IndexT,
template <
typename U>
class PtrTraits>
597 template <
int SubDim>
600 return view<SubDim>(data_);
603 template <
typename T,
int Dim,
bool Contig,
604 typename IndexT,
template <
typename U>
class PtrTraits>
608 DataPtrType newData = data_;
611 newData += start * stride_[0];
615 for (
int i = 0; i < Dim; ++i) {
617 assert(start + size <= size_[0]);
620 newSize[i] = size_[i];
627 template <
typename T,
int Dim,
bool Contig,
628 typename IndexT,
template <
typename U>
class PtrTraits>
633 DataPtrType newData = data_;
636 newData += start * stride_[dim];
640 for (
int i = 0; i < Dim; ++i) {
642 assert(start + size <= size_[dim]);
645 newSize[i] = size_[i];
653 template <
typename T,
int Dim,
bool Contig,
654 typename IndexT,
template <
typename U>
class PtrTraits>
655 template <
int NewDim>
658 std::initializer_list<IndexT> sizes) {
659 static_assert(Contig,
"on contiguous tensors only");
661 assert(sizes.size() == NewDim);
665 size_t curSize = numElements();
669 for (
auto s : sizes) {
673 assert(curSize == newSize);
__host__ __device__ Tensor()
Default constructor.
__host__ __device__ Tensor< T, NewDim, Contig, IndexT, PtrTraits > downcastInner()
__host__ __device__ bool isContiguousDim(int i) const
Returns true if the given dimension index has no padding.
__host__ __device__ Tensor< T, Dim, Contig, IndexT, PtrTraits > transpose(int dim1, int dim2) const
__host__ __device__ DataPtrType data()
Returns a raw pointer to the start of our data.
__host__ __device__ Tensor< T, Dim, Contig, IndexT, PtrTraits > narrowOutermost(IndexT start, IndexT size)
__host__ __device__ Tensor< T, Dim, Contig, IndexT, PtrTraits > & operator=(Tensor< T, Dim, Contig, IndexT, PtrTraits > &t)=default
Assignment.
__host__ __device__ Tensor< T, NewDim, Contig, IndexT, PtrTraits > upcastOuter()
__host__ bool canCastIndexType() const
__host__ __device__ Tensor< T, Dim, false, IndexT, PtrTraits > narrow(int dim, IndexT start, IndexT size)
__host__ __device__ Tensor< U, Dim, Contig, IndexT, PtrTraits > cast()
__host__ void copyTo(Tensor< T, Dim, Contig, IndexT, PtrTraits > &t, cudaStream_t stream)
Copies ourselves into a tensor; sizes must match.
__host__ __device__ bool isSame(const Tensor< T, OtherDim, Contig, IndexT, PtrTraits > &rhs) const
__host__ Tensor< T, Dim, Contig, NewIndexT, PtrTraits > castIndexType() const
__host__ __device__ IndexT numElements() const
__host__ __device__ const IndexT * strides() const
Returns the stride array.
__host__ __device__ const IndexT * sizes() const
Returns the size array.
__host__ __device__ Tensor< T, NewDim, Contig, IndexT, PtrTraits > upcastInner()
__host__ __device__ Tensor< T, SubDim, Contig, IndexT, PtrTraits > view()
__host__ void copyFrom(Tensor< T, Dim, Contig, IndexT, PtrTraits > &t, cudaStream_t stream)
Copies a tensor into ourselves; sizes must match.
IndexT stride_[Dim]
Array of strides (in sizeof(T) terms) per each dimension.
__host__ __device__ bool isContiguous() const
__host__ __device__ Tensor< T, NewDim, Contig, IndexT, PtrTraits > downcastOuter()
IndexT size_[Dim]
Size per each dimension.
__host__ __device__ bool canCastResize() const
Returns true if we can castResize() this tensor to the new type.
__host__ __device__ Tensor< U, Dim, Contig, IndexT, PtrTraits > castResize()