11 #include "../../FaissAssert.h"
12 #include "DeviceUtils.h"
15 namespace faiss {
namespace gpu {
17 template <
typename T,
int Dim,
bool Contig,
18 typename IndexT,
template <
typename U>
class PtrTraits>
22 static_assert(Dim > 0,
"must have > 0 dimensions");
24 for (
int i = 0; i < Dim; ++i) {
30 template <
typename T,
int Dim,
bool Contig,
31 typename IndexT,
template <
typename U>
class PtrTraits>
36 data_ = t.data_; t.data_ =
nullptr;
37 for (
int i = 0; i < Dim; ++i) {
38 stride_[i] = t.stride_[i]; t.stride_[i] = 0;
39 size_[i] = t.size_[i]; t.size_[i] = 0;
45 template <
typename T,
int Dim,
bool Contig,
46 typename IndexT,
template <
typename U>
class PtrTraits>
49 Tensor(DataPtrType data,
const IndexT sizes[Dim])
51 static_assert(Dim > 0,
"must have > 0 dimensions");
53 for (
int i = 0; i < Dim; ++i) {
58 for (
int i = Dim - 2; i >= 0; --i) {
63 template <
typename T,
int Dim,
bool Contig,
64 typename IndexT,
template <
typename U>
class PtrTraits>
67 Tensor(DataPtrType data, std::initializer_list<IndexT> sizes)
69 assert(sizes.size() == Dim);
70 static_assert(Dim > 0,
"must have > 0 dimensions");
73 for (
auto s : sizes) {
78 for (
int j = Dim - 2; j >= 0; --j) {
84 template <
typename T,
int Dim,
bool Contig,
85 typename IndexT,
template <
typename U>
class PtrTraits>
88 DataPtrType data,
const IndexT sizes[Dim],
const IndexT strides[Dim])
90 static_assert(Dim > 0,
"must have > 0 dimensions");
92 for (
int i = 0; i < Dim; ++i) {
98 template <
typename T,
int Dim,
bool Contig,
99 typename IndexT,
template <
typename U>
class PtrTraits>
103 cudaStream_t stream) {
104 static_assert(Contig,
"only contiguous tensors handled");
109 FAISS_ASSERT(this->numElements() == t.
numElements());
112 FAISS_ASSERT(this->data_);
113 FAISS_ASSERT(t.
data());
115 int ourDev = getDeviceForAddress(this->data_);
116 int tDev = getDeviceForAddress(t.
data());
119 CUDA_VERIFY(cudaMemcpyAsync(this->data_,
121 this->getSizeInBytes(),
122 ourDev == -1 ? cudaMemcpyHostToHost :
123 cudaMemcpyHostToDevice,
126 CUDA_VERIFY(cudaMemcpyAsync(this->data_,
128 this->getSizeInBytes(),
129 ourDev == -1 ? cudaMemcpyDeviceToHost :
130 cudaMemcpyDeviceToDevice,
136 template <
typename T,
int Dim,
bool Contig,
137 typename IndexT,
template <
typename U>
class PtrTraits>
141 cudaStream_t stream) {
142 static_assert(Contig,
"only contiguous tensors handled");
147 FAISS_ASSERT(this->numElements() == t.
numElements());
150 FAISS_ASSERT(this->data_);
151 FAISS_ASSERT(t.
data());
153 int ourDev = getDeviceForAddress(this->data_);
154 int tDev = getDeviceForAddress(t.
data());
157 CUDA_VERIFY(cudaMemcpyAsync(t.
data(),
159 this->getSizeInBytes(),
160 ourDev == -1 ? cudaMemcpyHostToHost :
161 cudaMemcpyDeviceToHost,
164 CUDA_VERIFY(cudaMemcpyAsync(t.
data(),
166 this->getSizeInBytes(),
167 ourDev == -1 ? cudaMemcpyHostToDevice :
168 cudaMemcpyDeviceToDevice,
174 template <
typename T,
int Dim,
bool Contig,
175 typename IndexT,
template <
typename U>
class PtrTraits>
176 template <
int OtherDim>
177 __host__ __device__
bool
180 if (Dim != OtherDim) {
184 for (
int i = 0; i < Dim; ++i) {
185 if (size_[i] != rhs.
size_[i]) {
190 if (stride_[i] != rhs.
stride_[i]) {
199 template <
typename T,
int Dim,
bool Contig,
200 typename IndexT,
template <
typename U>
class PtrTraits>
201 template <
typename U>
204 static_assert(
sizeof(U) ==
sizeof(T),
"cast must be to same size object");
207 reinterpret_cast<U*
>(data_), size_, stride_);
210 template <
typename T,
int Dim,
bool Contig,
211 typename IndexT,
template <
typename U>
class PtrTraits>
212 template <
typename U>
215 static_assert(
sizeof(U) ==
sizeof(T),
"cast must be to same size object");
218 reinterpret_cast<U*
>(data_), size_, stride_);
221 template <
typename T,
int Dim,
bool Contig,
222 typename IndexT,
template <
typename U>
class PtrTraits>
223 template <
typename U>
226 static_assert(
sizeof(U) >=
sizeof(T),
"only handles greater sizes");
227 constexpr
int kMultiple =
sizeof(U) /
sizeof(T);
229 assert(canCastResize<U>());
232 IndexT newStride[Dim];
234 for (
int i = 0; i < Dim - 1; ++i) {
235 newSize[i] = size_[i];
236 newStride[i] = stride_[i] / kMultiple;
239 newStride[Dim - 1] = 1;
240 newSize[Dim - 1] = size_[Dim - 1] / kMultiple;
243 reinterpret_cast<U*
>(data_), newSize, newStride);
246 template <
typename T,
int Dim,
bool Contig,
247 typename IndexT,
template <
typename U>
class PtrTraits>
248 template <
typename U>
255 template <
typename T,
int Dim,
bool Contig,
256 typename IndexT,
template <
typename U>
class PtrTraits>
257 template <
typename U>
258 __host__ __device__
bool
260 static_assert(
sizeof(U) >=
sizeof(T),
"only handles greater sizes");
261 constexpr
int kMultiple =
sizeof(U) /
sizeof(T);
264 for (
int i = 0; i < Dim - 1; ++i) {
265 if (stride_[i] % kMultiple != 0) {
271 if (size_[Dim - 1] % kMultiple != 0) {
275 if (stride_[Dim - 1] != 1) {
282 template <
typename T,
int Dim,
bool Contig,
283 typename IndexT,
template <
typename U>
class PtrTraits>
284 template <
typename NewIndexT>
287 if (
sizeof(NewIndexT) <
sizeof(IndexT)) {
288 assert(this->canCastIndexType<NewIndexT>());
291 NewIndexT newSize[Dim];
292 NewIndexT newStride[Dim];
293 for (
int i = 0; i < Dim; ++i) {
294 newSize[i] = (NewIndexT) size_[i];
295 newStride[i] = (NewIndexT) stride_[i];
299 data_, newSize, newStride);
302 template <
typename T,
int Dim,
bool Contig,
303 typename IndexT,
template <
typename U>
class PtrTraits>
304 template <
typename NewIndexT>
307 static_assert(
sizeof(
size_t) >=
sizeof(IndexT),
308 "index size too large");
309 static_assert(
sizeof(
size_t) >=
sizeof(NewIndexT),
310 "new index size too large");
314 size_t maxOffset = 0;
317 maxOffset = (size_t) size_[0] * (
size_t) stride_[0];
319 for (
int i = 0; i < Dim; ++i) {
320 size_t curMaxOffset = (size_t) size_[i] * (
size_t) stride_[i];
321 if (curMaxOffset > maxOffset) {
322 maxOffset = curMaxOffset;
327 if (maxOffset > (
size_t) std::numeric_limits<NewIndexT>::max()) {
334 template <
typename T,
int Dim,
bool Contig,
335 typename IndexT,
template <
typename U>
class PtrTraits>
336 __host__ __device__ IndexT
338 long size = getSize(0);
340 for (
int i = 1; i < Dim; ++i) {
347 template <
typename T,
int Dim,
bool Contig,
348 typename IndexT,
template <
typename U>
class PtrTraits>
349 __host__ __device__
bool
353 for (
int i = Dim - 1; i >= 0; --i) {
354 if (getSize(i) != (IndexT) 1) {
355 if (getStride(i) == prevSize) {
356 prevSize *= getSize(i);
366 template <
typename T,
int Dim,
bool Contig,
367 typename IndexT,
template <
typename U>
class PtrTraits>
368 __host__ __device__
bool
370 if (i == 0 && getStride(i) > 0 && getSize(i) > 0) {
372 }
else if ((i > 0) && (i < Dim) && (getStride(i) > 0) &&
373 ((getStride(i - 1) / getStride(i)) >= getSize(i))) {
380 template <
typename T,
int Dim,
bool Contig,
381 typename IndexT,
template <
typename U>
class PtrTraits>
382 __host__ __device__
bool
384 for (
int i = 0; i < Dim; ++i) {
385 if (!isConsistentlySized(i)) {
393 template <
typename T,
int Dim,
bool Contig,
394 typename IndexT,
template <
typename U>
class PtrTraits>
395 __host__ __device__
bool
397 return (i == Dim - 1) ||
399 ((getStride(i) / getStride(i + 1)) == getSize(i + 1)));
402 template <
typename T,
int Dim,
bool Contig,
403 typename IndexT,
template <
typename U>
class PtrTraits>
407 assert(dim1 >= 0 && dim1 < Dim);
408 assert(dim1 >= 0 && dim2 < Dim);
409 static_assert(!Contig,
"cannot transpose contiguous arrays");
412 IndexT newStride[Dim];
414 for (
int i = 0; i < Dim; ++i) {
415 newSize[i] = size_[i];
416 newStride[i] = stride_[i];
419 IndexT tmp = newSize[dim1];
420 newSize[dim1] = newSize[dim2];
423 tmp = newStride[dim1];
424 newStride[dim1] = newStride[dim2];
425 newStride[dim2] = tmp;
430 template <
typename T,
int Dim,
bool Contig,
431 typename IndexT,
template <
typename U>
class PtrTraits>
432 template <
int NewDim>
436 static_assert(NewDim > Dim,
"Can only upcast to greater dim");
438 IndexT newSize[NewDim];
439 IndexT newStride[NewDim];
441 int shift = NewDim - Dim;
443 for (
int i = 0; i < NewDim; ++i) {
446 newSize[i] = (IndexT) 1;
447 newStride[i] = size_[0] * stride_[0];
450 newSize[i] = size_[i - shift];
451 newStride[i] = stride_[i - shift];
456 data_, newSize, newStride);
459 template <
typename T,
int Dim,
bool Contig,
460 typename IndexT,
template <
typename U>
class PtrTraits>
461 template <
int NewDim>
465 static_assert(NewDim > Dim,
"Can only upcast to greater dim");
467 IndexT newSize[NewDim];
468 IndexT newStride[NewDim];
470 for (
int i = 0; i < NewDim; ++i) {
473 newSize[i] = size_[i];
474 newStride[i] = stride_[i];
477 newSize[i] = (IndexT) 1;
478 newStride[i] = (IndexT) 1;
483 data_, newSize, newStride);
486 template <
typename T,
int Dim,
bool Contig,
487 typename IndexT,
template <
typename U>
class PtrTraits>
488 template <
int NewDim>
492 static_assert(NewDim < Dim,
"Can only downcast to lesser dim");
498 for (
int i = 0; i < Dim - NewDim; ++i) {
499 bool cont = isContiguousDim(i);
503 IndexT newSize[NewDim];
504 IndexT newStride[NewDim];
506 int ignoredDims = Dim - NewDim;
507 IndexT collapsedSize = 1;
509 for (
int i = 0; i < Dim; ++i) {
510 if (i < ignoredDims) {
512 collapsedSize *= getSize(i);
515 if (i == ignoredDims) {
517 newSize[i - ignoredDims] = collapsedSize * getSize(i);
520 newSize[i - ignoredDims] = getSize(i);
523 newStride[i - ignoredDims] = getStride(i);
528 data_, newSize, newStride);
531 template <
typename T,
int Dim,
bool Contig,
532 typename IndexT,
template <
typename U>
class PtrTraits>
533 template <
int NewDim>
537 static_assert(NewDim < Dim,
"Can only downcast to lesser dim");
543 for (
int i = NewDim; i < Dim; ++i) {
544 assert(isContiguousDim(i));
547 IndexT newSize[NewDim];
548 IndexT newStride[NewDim];
550 IndexT collapsedSize = 1;
552 for (
int i = Dim - 1; i >= 0; --i) {
555 collapsedSize *= getSize(i);
558 if (i == NewDim - 1) {
560 newSize[i] = collapsedSize * getSize(i);
561 newStride[i] = getStride(Dim - 1);
564 newSize[i] = getSize(i);
565 newStride[i] = getStride(i);
571 data_, newSize, newStride);
574 template <
typename T,
int Dim,
bool Contig,
575 typename IndexT,
template <
typename U>
class PtrTraits>
576 template <
int SubDim>
579 static_assert(SubDim >= 1 && SubDim < Dim,
580 "can only create view of lesser dim");
582 IndexT viewSizes[SubDim];
583 IndexT viewStrides[SubDim];
585 for (
int i = 0; i < SubDim; ++i) {
586 viewSizes[i] = size_[Dim - SubDim + i];
587 viewStrides[i] = stride_[Dim - SubDim + i];
591 at, viewSizes, viewStrides);
594 template <
typename T,
int Dim,
bool Contig,
595 typename IndexT,
template <
typename U>
class PtrTraits>
596 template <
int SubDim>
599 return view<SubDim>(data_);
602 template <
typename T,
int Dim,
bool Contig,
603 typename IndexT,
template <
typename U>
class PtrTraits>
607 DataPtrType newData = data_;
610 newData += start * stride_[0];
614 for (
int i = 0; i < Dim; ++i) {
616 assert(start + size <= size_[0]);
619 newSize[i] = size_[i];
626 template <
typename T,
int Dim,
bool Contig,
627 typename IndexT,
template <
typename U>
class PtrTraits>
632 DataPtrType newData = data_;
635 newData += start * stride_[dim];
639 for (
int i = 0; i < Dim; ++i) {
641 assert(start + size <= size_[dim]);
644 newSize[i] = size_[i];
652 template <
typename T,
int Dim,
bool Contig,
653 typename IndexT,
template <
typename U>
class PtrTraits>
654 template <
int NewDim>
657 std::initializer_list<IndexT> sizes) {
658 static_assert(Contig,
"on contiguous tensors only");
660 assert(sizes.size() == NewDim);
664 size_t curSize = numElements();
668 for (
auto s : sizes) {
672 assert(curSize == newSize);
__host__ __device__ Tensor()
Default constructor.
__host__ __device__ Tensor< T, NewDim, Contig, IndexT, PtrTraits > downcastInner()
__host__ __device__ bool isContiguousDim(int i) const
Returns true if the given dimension index has no padding.
__host__ __device__ Tensor< T, Dim, Contig, IndexT, PtrTraits > transpose(int dim1, int dim2) const
__host__ __device__ DataPtrType data()
Returns a raw pointer to the start of our data.
__host__ __device__ Tensor< T, Dim, Contig, IndexT, PtrTraits > narrowOutermost(IndexT start, IndexT size)
__host__ __device__ Tensor< T, Dim, Contig, IndexT, PtrTraits > & operator=(Tensor< T, Dim, Contig, IndexT, PtrTraits > &t)=default
Assignment.
__host__ __device__ Tensor< T, NewDim, Contig, IndexT, PtrTraits > upcastOuter()
__host__ bool canCastIndexType() const
__host__ __device__ Tensor< T, Dim, false, IndexT, PtrTraits > narrow(int dim, IndexT start, IndexT size)
__host__ __device__ Tensor< U, Dim, Contig, IndexT, PtrTraits > cast()
__host__ void copyTo(Tensor< T, Dim, Contig, IndexT, PtrTraits > &t, cudaStream_t stream)
Copies ourselves into a tensor; sizes must match.
__host__ __device__ bool isSame(const Tensor< T, OtherDim, Contig, IndexT, PtrTraits > &rhs) const
__host__ Tensor< T, Dim, Contig, NewIndexT, PtrTraits > castIndexType() const
__host__ __device__ IndexT numElements() const
__host__ __device__ const IndexT * strides() const
Returns the stride array.
__host__ __device__ const IndexT * sizes() const
Returns the size array.
__host__ __device__ Tensor< T, NewDim, Contig, IndexT, PtrTraits > upcastInner()
__host__ __device__ Tensor< T, SubDim, Contig, IndexT, PtrTraits > view()
__host__ void copyFrom(Tensor< T, Dim, Contig, IndexT, PtrTraits > &t, cudaStream_t stream)
Copies a tensor into ourselves; sizes must match.
IndexT stride_[Dim]
Array of strides (in sizeof(T) terms) per each dimension.
__host__ __device__ bool isContiguous() const
__host__ __device__ Tensor< T, NewDim, Contig, IndexT, PtrTraits > downcastOuter()
IndexT size_[Dim]
Size per each dimension.
__host__ __device__ bool canCastResize() const
Returns true if we can castResize() this tensor to the new type.
__host__ __device__ Tensor< U, Dim, Contig, IndexT, PtrTraits > castResize()