Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
Tensor-inl.cuh
1 /**
2  * Copyright (c) 2015-present, Facebook, Inc.
3  * All rights reserved.
4  *
5  * This source code is licensed under the BSD+Patents license found in the
6  * LICENSE file in the root directory of this source tree.
7  */
8 
9 // Copyright 2004-present Facebook. All Rights Reserved.
10 
11 #include "../../FaissAssert.h"
12 #include "DeviceUtils.h"
13 #include <limits>
14 
15 namespace faiss { namespace gpu {
16 
17 template <typename T, int Dim, bool Contig,
18  typename IndexT, template <typename U> class PtrTraits>
19 __host__ __device__
21  : data_(nullptr) {
22  static_assert(Dim > 0, "must have > 0 dimensions");
23 
24  for (int i = 0; i < Dim; ++i) {
25  size_[i] = 0;
26  stride_[i] = (IndexT) 1;
27  }
28 }
29 
30 template <typename T, int Dim, bool Contig,
31  typename IndexT, template <typename U> class PtrTraits>
32 __host__ __device__
36  data_ = t.data_; t.data_ = nullptr;
37  for (int i = 0; i < Dim; ++i) {
38  stride_[i] = t.stride_[i]; t.stride_[i] = 0;
39  size_[i] = t.size_[i]; t.size_[i] = 0;
40  }
41 
42  return *this;
43 }
44 
45 template <typename T, int Dim, bool Contig,
46  typename IndexT, template <typename U> class PtrTraits>
47 __host__ __device__
49 Tensor(DataPtrType data, const IndexT sizes[Dim])
50  : data_(data) {
51  static_assert(Dim > 0, "must have > 0 dimensions");
52 
53  for (int i = 0; i < Dim; ++i) {
54  size_[i] = sizes[i];
55  }
56 
57  stride_[Dim - 1] = (IndexT) 1;
58  for (int i = Dim - 2; i >= 0; --i) {
59  stride_[i] = stride_[i + 1] * sizes[i + 1];
60  }
61 }
62 
63 template <typename T, int Dim, bool Contig,
64  typename IndexT, template <typename U> class PtrTraits>
65 __host__ __device__
67 Tensor(DataPtrType data, std::initializer_list<IndexT> sizes)
68  : data_(data) {
69  assert(sizes.size() == Dim);
70  static_assert(Dim > 0, "must have > 0 dimensions");
71 
72  int i = 0;
73  for (auto s : sizes) {
74  size_[i++] = s;
75  }
76 
77  stride_[Dim - 1] = (IndexT) 1;
78  for (int j = Dim - 2; j >= 0; --j) {
79  stride_[j] = stride_[j + 1] * size_[j + 1];
80  }
81 }
82 
83 
84 template <typename T, int Dim, bool Contig,
85  typename IndexT, template <typename U> class PtrTraits>
86 __host__ __device__
88  DataPtrType data, const IndexT sizes[Dim], const IndexT strides[Dim])
89  : data_(data) {
90  static_assert(Dim > 0, "must have > 0 dimensions");
91 
92  for (int i = 0; i < Dim; ++i) {
93  size_[i] = sizes[i];
94  stride_[i] = strides[i];
95  }
96 }
97 
98 template <typename T, int Dim, bool Contig,
99  typename IndexT, template <typename U> class PtrTraits>
100 __host__ void
103  cudaStream_t stream) {
104  static_assert(Contig, "only contiguous tensors handled");
105 
106  // Size must be the same (since dimensions are checked and
107  // continuity is assumed, we need only check total number of
108  // elements
109  FAISS_ASSERT(this->numElements() == t.numElements());
110 
111  if (t.numElements() > 0) {
112  FAISS_ASSERT(this->data_);
113  FAISS_ASSERT(t.data());
114 
115  int ourDev = getDeviceForAddress(this->data_);
116  int tDev = getDeviceForAddress(t.data());
117 
118  if (tDev == -1) {
119  CUDA_VERIFY(cudaMemcpyAsync(this->data_,
120  t.data(),
121  this->getSizeInBytes(),
122  ourDev == -1 ? cudaMemcpyHostToHost :
123  cudaMemcpyHostToDevice,
124  stream));
125  } else {
126  CUDA_VERIFY(cudaMemcpyAsync(this->data_,
127  t.data(),
128  this->getSizeInBytes(),
129  ourDev == -1 ? cudaMemcpyDeviceToHost :
130  cudaMemcpyDeviceToDevice,
131  stream));
132  }
133  }
134 }
135 
136 template <typename T, int Dim, bool Contig,
137  typename IndexT, template <typename U> class PtrTraits>
138 __host__ void
141  cudaStream_t stream) {
142  static_assert(Contig, "only contiguous tensors handled");
143 
144  // Size must be the same (since dimensions are checked and
145  // continuity is assumed, we need only check total number of
146  // elements
147  FAISS_ASSERT(this->numElements() == t.numElements());
148 
149  if (t.numElements() > 0) {
150  FAISS_ASSERT(this->data_);
151  FAISS_ASSERT(t.data());
152 
153  int ourDev = getDeviceForAddress(this->data_);
154  int tDev = getDeviceForAddress(t.data());
155 
156  if (tDev == -1) {
157  CUDA_VERIFY(cudaMemcpyAsync(t.data(),
158  this->data_,
159  this->getSizeInBytes(),
160  ourDev == -1 ? cudaMemcpyHostToHost :
161  cudaMemcpyDeviceToHost,
162  stream));
163  } else {
164  CUDA_VERIFY(cudaMemcpyAsync(t.data(),
165  this->data_,
166  this->getSizeInBytes(),
167  ourDev == -1 ? cudaMemcpyHostToDevice :
168  cudaMemcpyDeviceToDevice,
169  stream));
170  }
171  }
172 }
173 
174 template <typename T, int Dim, bool Contig,
175  typename IndexT, template <typename U> class PtrTraits>
176 template <int OtherDim>
177 __host__ __device__ bool
180  if (Dim != OtherDim) {
181  return false;
182  }
183 
184  for (int i = 0; i < Dim; ++i) {
185  if (size_[i] != rhs.size_[i]) {
186  return false;
187  }
188 
189  if (!Contig) {
190  if (stride_[i] != rhs.stride_[i]) {
191  return false;
192  }
193  }
194  }
195 
196  return true;
197 }
198 
199 template <typename T, int Dim, bool Contig,
200  typename IndexT, template <typename U> class PtrTraits>
201 template <typename U>
204  static_assert(sizeof(U) == sizeof(T), "cast must be to same size object");
205 
207  reinterpret_cast<U*>(data_), size_, stride_);
208 }
209 
210 template <typename T, int Dim, bool Contig,
211  typename IndexT, template <typename U> class PtrTraits>
212 template <typename U>
213 __host__ __device__ const Tensor<U, Dim, Contig, IndexT, PtrTraits>
215  static_assert(sizeof(U) == sizeof(T), "cast must be to same size object");
216 
218  reinterpret_cast<U*>(data_), size_, stride_);
219 }
220 
221 template <typename T, int Dim, bool Contig,
222  typename IndexT, template <typename U> class PtrTraits>
223 template <typename U>
226  static_assert(sizeof(U) >= sizeof(T), "only handles greater sizes");
227  constexpr int kMultiple = sizeof(U) / sizeof(T);
228 
229  assert(canCastResize<U>());
230 
231  IndexT newSize[Dim];
232  IndexT newStride[Dim];
233 
234  for (int i = 0; i < Dim - 1; ++i) {
235  newSize[i] = size_[i];
236  newStride[i] = stride_[i] / kMultiple;
237  }
238 
239  newStride[Dim - 1] = 1; // this is the same as the old stride
240  newSize[Dim - 1] = size_[Dim - 1] / kMultiple;
241 
243  reinterpret_cast<U*>(data_), newSize, newStride);
244 }
245 
246 template <typename T, int Dim, bool Contig,
247  typename IndexT, template <typename U> class PtrTraits>
248 template <typename U>
249 __host__ __device__ const Tensor<U, Dim, Contig, IndexT, PtrTraits>
251  return const_cast<Tensor<T, Dim, Contig, IndexT, PtrTraits>*>(this)->
252  castResize<U>();
253 }
254 
255 template <typename T, int Dim, bool Contig,
256  typename IndexT, template <typename U> class PtrTraits>
257 template <typename U>
258 __host__ __device__ bool
260  static_assert(sizeof(U) >= sizeof(T), "only handles greater sizes");
261  constexpr int kMultiple = sizeof(U) / sizeof(T);
262 
263  // Check all outer strides
264  for (int i = 0; i < Dim - 1; ++i) {
265  if (stride_[i] % kMultiple != 0) {
266  return false;
267  }
268  }
269 
270  // Check inner size
271  if (size_[Dim - 1] % kMultiple != 0) {
272  return false;
273  }
274 
275  if (stride_[Dim - 1] != 1) {
276  return false;
277  }
278 
279  return true;
280 }
281 
282 template <typename T, int Dim, bool Contig,
283  typename IndexT, template <typename U> class PtrTraits>
284 template <typename NewIndexT>
287  if (sizeof(NewIndexT) < sizeof(IndexT)) {
288  assert(this->canCastIndexType<NewIndexT>());
289  }
290 
291  NewIndexT newSize[Dim];
292  NewIndexT newStride[Dim];
293  for (int i = 0; i < Dim; ++i) {
294  newSize[i] = (NewIndexT) size_[i];
295  newStride[i] = (NewIndexT) stride_[i];
296  }
297 
299  data_, newSize, newStride);
300 }
301 
302 template <typename T, int Dim, bool Contig,
303  typename IndexT, template <typename U> class PtrTraits>
304 template <typename NewIndexT>
305 __host__ bool
307  static_assert(sizeof(size_t) >= sizeof(IndexT),
308  "index size too large");
309  static_assert(sizeof(size_t) >= sizeof(NewIndexT),
310  "new index size too large");
311 
312  // Find maximum offset that can be calculated
313  // FIXME: maybe also consider offset in bytes? multiply by sizeof(T)?
314  size_t maxOffset = 0;
315 
316  if (Contig) {
317  maxOffset = (size_t) size_[0] * (size_t) stride_[0];
318  } else {
319  for (int i = 0; i < Dim; ++i) {
320  size_t curMaxOffset = (size_t) size_[i] * (size_t) stride_[i];
321  if (curMaxOffset > maxOffset) {
322  maxOffset = curMaxOffset;
323  }
324  }
325  }
326 
327  if (maxOffset > (size_t) std::numeric_limits<NewIndexT>::max()) {
328  return false;
329  }
330 
331  return true;
332 }
333 
334 template <typename T, int Dim, bool Contig,
335  typename IndexT, template <typename U> class PtrTraits>
336 __host__ __device__ IndexT
338  long size = getSize(0);
339 
340  for (int i = 1; i < Dim; ++i) {
341  size *= getSize(i);
342  }
343 
344  return size;
345 }
346 
347 template <typename T, int Dim, bool Contig,
348  typename IndexT, template <typename U> class PtrTraits>
349 __host__ __device__ bool
351  long prevSize = 1;
352 
353  for (int i = Dim - 1; i >= 0; --i) {
354  if (getSize(i) != (IndexT) 1) {
355  if (getStride(i) == prevSize) {
356  prevSize *= getSize(i);
357  } else {
358  return false;
359  }
360  }
361  }
362 
363  return true;
364 }
365 
366 template <typename T, int Dim, bool Contig,
367  typename IndexT, template <typename U> class PtrTraits>
368 __host__ __device__ bool
370  if (i == 0 && getStride(i) > 0 && getSize(i) > 0) {
371  return true;
372  } else if ((i > 0) && (i < Dim) && (getStride(i) > 0) &&
373  ((getStride(i - 1) / getStride(i)) >= getSize(i))) {
374  return true;
375  }
376 
377  return false;
378 }
379 
380 template <typename T, int Dim, bool Contig,
381  typename IndexT, template <typename U> class PtrTraits>
382 __host__ __device__ bool
384  for (int i = 0; i < Dim; ++i) {
385  if (!isConsistentlySized(i)) {
386  return false;
387  }
388  }
389 
390  return true;
391 }
392 
393 template <typename T, int Dim, bool Contig,
394  typename IndexT, template <typename U> class PtrTraits>
395 __host__ __device__ bool
397  return (i == Dim - 1) || // just in case
398  ((i < Dim - 1) &&
399  ((getStride(i) / getStride(i + 1)) == getSize(i + 1)));
400 }
401 
402 template <typename T, int Dim, bool Contig,
403  typename IndexT, template <typename U> class PtrTraits>
406  int dim2) const {
407  assert(dim1 >= 0 && dim1 < Dim);
408  assert(dim1 >= 0 && dim2 < Dim);
409  static_assert(!Contig, "cannot transpose contiguous arrays");
410 
411  IndexT newSize[Dim];
412  IndexT newStride[Dim];
413 
414  for (int i = 0; i < Dim; ++i) {
415  newSize[i] = size_[i];
416  newStride[i] = stride_[i];
417  }
418 
419  IndexT tmp = newSize[dim1];
420  newSize[dim1] = newSize[dim2];
421  newSize[dim2] = tmp;
422 
423  tmp = newStride[dim1];
424  newStride[dim1] = newStride[dim2];
425  newStride[dim2] = tmp;
426 
427  return Tensor<T, Dim, Contig, IndexT, PtrTraits>(data_, newSize, newStride);
428 }
429 
430 template <typename T, int Dim, bool Contig,
431  typename IndexT, template <typename U> class PtrTraits>
432 template <int NewDim>
435  // Can only create tensors of greater dimension
436  static_assert(NewDim > Dim, "Can only upcast to greater dim");
437 
438  IndexT newSize[NewDim];
439  IndexT newStride[NewDim];
440 
441  int shift = NewDim - Dim;
442 
443  for (int i = 0; i < NewDim; ++i) {
444  if (i < shift) {
445  // These are the extended dimensions
446  newSize[i] = (IndexT) 1;
447  newStride[i] = size_[0] * stride_[0];
448  } else {
449  // Shift the remaining dimensions
450  newSize[i] = size_[i - shift];
451  newStride[i] = stride_[i - shift];
452  }
453  }
454 
456  data_, newSize, newStride);
457 }
458 
459 template <typename T, int Dim, bool Contig,
460  typename IndexT, template <typename U> class PtrTraits>
461 template <int NewDim>
464  // Can only create tensors of greater dimension
465  static_assert(NewDim > Dim, "Can only upcast to greater dim");
466 
467  IndexT newSize[NewDim];
468  IndexT newStride[NewDim];
469 
470  for (int i = 0; i < NewDim; ++i) {
471  if (i < Dim) {
472  // Existing dimensions get copied over
473  newSize[i] = size_[i];
474  newStride[i] = stride_[i];
475  } else {
476  // Extended dimensions
477  newSize[i] = (IndexT) 1;
478  newStride[i] = (IndexT) 1;
479  }
480  }
481 
483  data_, newSize, newStride);
484 }
485 
486 template <typename T, int Dim, bool Contig,
487  typename IndexT, template <typename U> class PtrTraits>
488 template <int NewDim>
491  // Can only create tensors of lesser dimension
492  static_assert(NewDim < Dim, "Can only downcast to lesser dim");
493 
494  // We can't downcast non-contiguous tensors, since it leaves
495  // garbage data in the tensor. The tensor needs to be contiguous
496  // in all of the dimensions we are collapsing (no padding in
497  // them).
498  for (int i = 0; i < Dim - NewDim; ++i) {
499  bool cont = isContiguousDim(i);
500  assert(cont);
501  }
502 
503  IndexT newSize[NewDim];
504  IndexT newStride[NewDim];
505 
506  int ignoredDims = Dim - NewDim;
507  IndexT collapsedSize = 1;
508 
509  for (int i = 0; i < Dim; ++i) {
510  if (i < ignoredDims) {
511  // Collapse these dimensions
512  collapsedSize *= getSize(i);
513  } else {
514  // Non-collapsed dimensions
515  if (i == ignoredDims) {
516  // This is the first non-collapsed dimension
517  newSize[i - ignoredDims] = collapsedSize * getSize(i);
518  } else {
519  // Subsequent non-collapsed dimensions
520  newSize[i - ignoredDims] = getSize(i);
521  }
522 
523  newStride[i - ignoredDims] = getStride(i);
524  }
525  }
526 
528  data_, newSize, newStride);
529 }
530 
531 template <typename T, int Dim, bool Contig,
532  typename IndexT, template <typename U> class PtrTraits>
533 template <int NewDim>
536  // Can only create tensors of lesser dimension
537  static_assert(NewDim < Dim, "Can only downcast to lesser dim");
538 
539  // We can't downcast non-contiguous tensors, since it leaves
540  // garbage data in the tensor. The tensor needs to be contiguous
541  // in all of the dimensions we are collapsing (no padding in
542  // them).
543  for (int i = NewDim; i < Dim; ++i) {
544  assert(isContiguousDim(i));
545  }
546 
547  IndexT newSize[NewDim];
548  IndexT newStride[NewDim];
549 
550  IndexT collapsedSize = 1;
551 
552  for (int i = Dim - 1; i >= 0; --i) {
553  if (i >= NewDim) {
554  // Collapse these dimensions
555  collapsedSize *= getSize(i);
556  } else {
557  // Non-collapsed dimensions
558  if (i == NewDim - 1) {
559  // This is the first non-collapsed dimension
560  newSize[i] = collapsedSize * getSize(i);
561  newStride[i] = getStride(Dim - 1);
562  } else {
563  // Subsequent non-collapsed dimensions
564  newSize[i] = getSize(i);
565  newStride[i] = getStride(i);
566  }
567  }
568  }
569 
571  data_, newSize, newStride);
572 }
573 
574 template <typename T, int Dim, bool Contig,
575  typename IndexT, template <typename U> class PtrTraits>
576 template <int SubDim>
579  static_assert(SubDim >= 1 && SubDim < Dim,
580  "can only create view of lesser dim");
581 
582  IndexT viewSizes[SubDim];
583  IndexT viewStrides[SubDim];
584 
585  for (int i = 0; i < SubDim; ++i) {
586  viewSizes[i] = size_[Dim - SubDim + i];
587  viewStrides[i] = stride_[Dim - SubDim + i];
588  }
589 
591  at, viewSizes, viewStrides);
592 }
593 
594 template <typename T, int Dim, bool Contig,
595  typename IndexT, template <typename U> class PtrTraits>
596 template <int SubDim>
599  return view<SubDim>(data_);
600 }
601 
602 template <typename T, int Dim, bool Contig,
603  typename IndexT, template <typename U> class PtrTraits>
606  IndexT size) {
607  DataPtrType newData = data_;
608 
609  if (start > 0) {
610  newData += start * stride_[0];
611  }
612 
613  IndexT newSize[Dim];
614  for (int i = 0; i < Dim; ++i) {
615  if (i == 0) {
616  assert(start + size <= size_[0]);
617  newSize[i] = size;
618  } else {
619  newSize[i] = size_[i];
620  }
621  }
622 
623  return Tensor<T, Dim, Contig, IndexT, PtrTraits>(newData, newSize, stride_);
624 }
625 
626 template <typename T, int Dim, bool Contig,
627  typename IndexT, template <typename U> class PtrTraits>
630  IndexT start,
631  IndexT size) {
632  DataPtrType newData = data_;
633 
634  if (start > 0) {
635  newData += start * stride_[dim];
636  }
637 
638  IndexT newSize[Dim];
639  for (int i = 0; i < Dim; ++i) {
640  if (i == dim) {
641  assert(start + size <= size_[dim]);
642  newSize[i] = size;
643  } else {
644  newSize[i] = size_[i];
645  }
646  }
647 
648  // The narrowed tensor is not necessarily contiguous
649  return Tensor<T, Dim, false, IndexT, PtrTraits>(newData, newSize, stride_);
650 }
651 
652 template <typename T, int Dim, bool Contig,
653  typename IndexT, template <typename U> class PtrTraits>
654 template <int NewDim>
657  std::initializer_list<IndexT> sizes) {
658  static_assert(Contig, "on contiguous tensors only");
659 
660  assert(sizes.size() == NewDim);
661 
662  // The total size of the new view must be the same as the total size
663  // of the old view
664  size_t curSize = numElements();
665 
666  size_t newSize = 1;
667 
668  for (auto s : sizes) {
669  newSize *= s;
670  }
671 
672  assert(curSize == newSize);
673  return Tensor<T, NewDim, true, IndexT, PtrTraits>(data(), sizes);
674 }
675 
676 } } // namespace
__host__ __device__ Tensor()
Default constructor.
Definition: Tensor-inl.cuh:20
__host__ __device__ Tensor< T, NewDim, Contig, IndexT, PtrTraits > downcastInner()
Definition: Tensor-inl.cuh:535
__host__ __device__ bool isContiguousDim(int i) const
Returns true if the given dimension index has no padding.
Definition: Tensor-inl.cuh:396
__host__ __device__ Tensor< T, Dim, Contig, IndexT, PtrTraits > transpose(int dim1, int dim2) const
Definition: Tensor-inl.cuh:405
__host__ __device__ DataPtrType data()
Returns a raw pointer to the start of our data.
Definition: Tensor.cuh:173
__host__ __device__ Tensor< T, Dim, Contig, IndexT, PtrTraits > narrowOutermost(IndexT start, IndexT size)
Definition: Tensor-inl.cuh:605
__host__ __device__ Tensor< T, Dim, Contig, IndexT, PtrTraits > & operator=(Tensor< T, Dim, Contig, IndexT, PtrTraits > &t)=default
Assignment.
__host__ __device__ Tensor< T, NewDim, Contig, IndexT, PtrTraits > upcastOuter()
Definition: Tensor-inl.cuh:434
__host__ bool canCastIndexType() const
Definition: Tensor-inl.cuh:306
__host__ __device__ Tensor< T, Dim, false, IndexT, PtrTraits > narrow(int dim, IndexT start, IndexT size)
Definition: Tensor-inl.cuh:629
__host__ __device__ Tensor< U, Dim, Contig, IndexT, PtrTraits > cast()
Definition: Tensor-inl.cuh:203
__host__ void copyTo(Tensor< T, Dim, Contig, IndexT, PtrTraits > &t, cudaStream_t stream)
Copies ourselves into a tensor; sizes must match.
Definition: Tensor-inl.cuh:139
__host__ __device__ bool isSame(const Tensor< T, OtherDim, Contig, IndexT, PtrTraits > &rhs) const
Definition: Tensor-inl.cuh:178
__host__ Tensor< T, Dim, Contig, NewIndexT, PtrTraits > castIndexType() const
Definition: Tensor-inl.cuh:286
__host__ __device__ IndexT numElements() const
Definition: Tensor-inl.cuh:337
__host__ __device__ const IndexT * strides() const
Returns the stride array.
Definition: Tensor.cuh:247
Our tensor type.
Definition: Tensor.cuh:30
__host__ __device__ const IndexT * sizes() const
Returns the size array.
Definition: Tensor.cuh:242
__host__ __device__ Tensor< T, NewDim, Contig, IndexT, PtrTraits > upcastInner()
Definition: Tensor-inl.cuh:463
__host__ __device__ Tensor< T, SubDim, Contig, IndexT, PtrTraits > view()
Definition: Tensor-inl.cuh:598
__host__ void copyFrom(Tensor< T, Dim, Contig, IndexT, PtrTraits > &t, cudaStream_t stream)
Copies a tensor into ourselves; sizes must match.
Definition: Tensor-inl.cuh:101
IndexT stride_[Dim]
Array of strides (in sizeof(T) terms) per each dimension.
Definition: Tensor.cuh:345
__host__ __device__ bool isContiguous() const
Definition: Tensor-inl.cuh:350
__host__ __device__ Tensor< T, NewDim, Contig, IndexT, PtrTraits > downcastOuter()
Definition: Tensor-inl.cuh:490
IndexT size_[Dim]
Size per each dimension.
Definition: Tensor.cuh:348
__host__ __device__ bool canCastResize() const
Returns true if we can castResize() this tensor to the new type.
Definition: Tensor-inl.cuh:259
__host__ __device__ Tensor< U, Dim, Contig, IndexT, PtrTraits > castResize()
Definition: Tensor-inl.cuh:225