Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
Tensor-inl.cuh
1 
2 /**
3  * Copyright (c) 2015-present, Facebook, Inc.
4  * All rights reserved.
5  *
6  * This source code is licensed under the CC-by-NC license found in the
7  * LICENSE file in the root directory of this source tree.
8  */
9 
10 // Copyright 2004-present Facebook. All Rights Reserved.
11 
12 #include "../../FaissAssert.h"
13 #include "DeviceUtils.h"
14 #include <limits>
15 
16 namespace faiss { namespace gpu {
17 
18 template <typename T, int Dim, bool Contig,
19  typename IndexT, template <typename U> class PtrTraits>
20 __host__ __device__
22  : data_(nullptr) {
23  static_assert(Dim > 0, "must have > 0 dimensions");
24 
25  for (int i = 0; i < Dim; ++i) {
26  size_[i] = 0;
27  stride_[i] = (IndexT) 1;
28  }
29 }
30 
31 template <typename T, int Dim, bool Contig,
32  typename IndexT, template <typename U> class PtrTraits>
33 __host__ __device__
37  data_ = t.data_; t.data_ = nullptr;
38  for (int i = 0; i < Dim; ++i) {
39  stride_[i] = t.stride_[i]; t.stride_[i] = 0;
40  size_[i] = t.size_[i]; t.size_[i] = 0;
41  }
42 
43  return *this;
44 }
45 
46 template <typename T, int Dim, bool Contig,
47  typename IndexT, template <typename U> class PtrTraits>
48 __host__ __device__
50 Tensor(DataPtrType data, const IndexT sizes[Dim])
51  : data_(data) {
52  static_assert(Dim > 0, "must have > 0 dimensions");
53 
54  for (int i = 0; i < Dim; ++i) {
55  size_[i] = sizes[i];
56  }
57 
58  stride_[Dim - 1] = (IndexT) 1;
59  for (int i = Dim - 2; i >= 0; --i) {
60  stride_[i] = stride_[i + 1] * sizes[i + 1];
61  }
62 }
63 
64 template <typename T, int Dim, bool Contig,
65  typename IndexT, template <typename U> class PtrTraits>
66 __host__ __device__
68 Tensor(DataPtrType data, std::initializer_list<IndexT> sizes)
69  : data_(data) {
70  assert(sizes.size() == Dim);
71  static_assert(Dim > 0, "must have > 0 dimensions");
72 
73  int i = 0;
74  for (auto s : sizes) {
75  size_[i++] = s;
76  }
77 
78  stride_[Dim - 1] = (IndexT) 1;
79  for (int j = Dim - 2; j >= 0; --j) {
80  stride_[j] = stride_[j + 1] * size_[j + 1];
81  }
82 }
83 
84 
85 template <typename T, int Dim, bool Contig,
86  typename IndexT, template <typename U> class PtrTraits>
87 __host__ __device__
89  DataPtrType data, const IndexT sizes[Dim], const IndexT strides[Dim])
90  : data_(data) {
91  static_assert(Dim > 0, "must have > 0 dimensions");
92 
93  for (int i = 0; i < Dim; ++i) {
94  size_[i] = sizes[i];
95  stride_[i] = strides[i];
96  }
97 }
98 
99 template <typename T, int Dim, bool Contig,
100  typename IndexT, template <typename U> class PtrTraits>
101 __host__ void
104  cudaStream_t stream) {
105  static_assert(Contig, "only contiguous tensors handled");
106 
107  // Size must be the same (since dimensions are checked and
108  // continuity is assumed, we need only check total number of
109  // elements
110  FAISS_ASSERT(this->numElements() == t.numElements());
111 
112  if (t.numElements() > 0) {
113  FAISS_ASSERT(this->data_);
114  FAISS_ASSERT(t.data());
115 
116  int ourDev = getDeviceForAddress(this->data_);
117  int tDev = getDeviceForAddress(t.data());
118 
119  if (tDev == -1) {
120  CUDA_VERIFY(cudaMemcpyAsync(this->data_,
121  t.data(),
122  this->getSizeInBytes(),
123  ourDev == -1 ? cudaMemcpyHostToHost :
124  cudaMemcpyHostToDevice,
125  stream));
126  } else {
127  CUDA_VERIFY(cudaMemcpyAsync(this->data_,
128  t.data(),
129  this->getSizeInBytes(),
130  ourDev == -1 ? cudaMemcpyDeviceToHost :
131  cudaMemcpyDeviceToDevice,
132  stream));
133  }
134  }
135 }
136 
137 template <typename T, int Dim, bool Contig,
138  typename IndexT, template <typename U> class PtrTraits>
139 __host__ void
142  cudaStream_t stream) {
143  static_assert(Contig, "only contiguous tensors handled");
144 
145  // Size must be the same (since dimensions are checked and
146  // continuity is assumed, we need only check total number of
147  // elements
148  FAISS_ASSERT(this->numElements() == t.numElements());
149 
150  if (t.numElements() > 0) {
151  FAISS_ASSERT(this->data_);
152  FAISS_ASSERT(t.data());
153 
154  int ourDev = getDeviceForAddress(this->data_);
155  int tDev = getDeviceForAddress(t.data());
156 
157  if (tDev == -1) {
158  CUDA_VERIFY(cudaMemcpyAsync(t.data(),
159  this->data_,
160  this->getSizeInBytes(),
161  ourDev == -1 ? cudaMemcpyHostToHost :
162  cudaMemcpyDeviceToHost,
163  stream));
164  } else {
165  CUDA_VERIFY(cudaMemcpyAsync(t.data(),
166  this->data_,
167  this->getSizeInBytes(),
168  ourDev == -1 ? cudaMemcpyHostToDevice :
169  cudaMemcpyDeviceToDevice,
170  stream));
171  }
172  }
173 }
174 
175 template <typename T, int Dim, bool Contig,
176  typename IndexT, template <typename U> class PtrTraits>
177 template <int OtherDim>
178 __host__ __device__ bool
181  if (Dim != OtherDim) {
182  return false;
183  }
184 
185  for (int i = 0; i < Dim; ++i) {
186  if (size_[i] != rhs.size_[i]) {
187  return false;
188  }
189 
190  if (!Contig) {
191  if (stride_[i] != rhs.stride_[i]) {
192  return false;
193  }
194  }
195  }
196 
197  return true;
198 }
199 
200 template <typename T, int Dim, bool Contig,
201  typename IndexT, template <typename U> class PtrTraits>
202 template <typename U>
205  static_assert(sizeof(U) == sizeof(T), "cast must be to same size object");
206 
208  reinterpret_cast<U*>(data_), size_, stride_);
209 }
210 
211 template <typename T, int Dim, bool Contig,
212  typename IndexT, template <typename U> class PtrTraits>
213 template <typename U>
214 __host__ __device__ const Tensor<U, Dim, Contig, IndexT, PtrTraits>
216  static_assert(sizeof(U) == sizeof(T), "cast must be to same size object");
217 
219  reinterpret_cast<U*>(data_), size_, stride_);
220 }
221 
222 template <typename T, int Dim, bool Contig,
223  typename IndexT, template <typename U> class PtrTraits>
224 template <typename U>
227  static_assert(sizeof(U) >= sizeof(T), "only handles greater sizes");
228  constexpr int kMultiple = sizeof(U) / sizeof(T);
229 
230  assert(canCastResize<U>());
231 
232  IndexT newSize[Dim];
233  IndexT newStride[Dim];
234 
235  for (int i = 0; i < Dim - 1; ++i) {
236  newSize[i] = size_[i];
237  newStride[i] = stride_[i] / kMultiple;
238  }
239 
240  newStride[Dim - 1] = 1; // this is the same as the old stride
241  newSize[Dim - 1] = size_[Dim - 1] / kMultiple;
242 
244  reinterpret_cast<U*>(data_), newSize, newStride);
245 }
246 
247 template <typename T, int Dim, bool Contig,
248  typename IndexT, template <typename U> class PtrTraits>
249 template <typename U>
250 __host__ __device__ const Tensor<U, Dim, Contig, IndexT, PtrTraits>
252  return const_cast<Tensor<T, Dim, Contig, IndexT, PtrTraits>*>(this)->
253  castResize<U>();
254 }
255 
256 template <typename T, int Dim, bool Contig,
257  typename IndexT, template <typename U> class PtrTraits>
258 template <typename U>
259 __host__ __device__ bool
261  static_assert(sizeof(U) >= sizeof(T), "only handles greater sizes");
262  constexpr int kMultiple = sizeof(U) / sizeof(T);
263 
264  // Check all outer strides
265  for (int i = 0; i < Dim - 1; ++i) {
266  if (stride_[i] % kMultiple != 0) {
267  return false;
268  }
269  }
270 
271  // Check inner size
272  if (size_[Dim - 1] % kMultiple != 0) {
273  return false;
274  }
275 
276  if (stride_[Dim - 1] != 1) {
277  return false;
278  }
279 
280  return true;
281 }
282 
283 template <typename T, int Dim, bool Contig,
284  typename IndexT, template <typename U> class PtrTraits>
285 template <typename NewIndexT>
288  if (sizeof(NewIndexT) < sizeof(IndexT)) {
289  assert(this->canCastIndexType<NewIndexT>());
290  }
291 
292  NewIndexT newSize[Dim];
293  NewIndexT newStride[Dim];
294  for (int i = 0; i < Dim; ++i) {
295  newSize[i] = (NewIndexT) size_[i];
296  newStride[i] = (NewIndexT) stride_[i];
297  }
298 
300  data_, newSize, newStride);
301 }
302 
303 template <typename T, int Dim, bool Contig,
304  typename IndexT, template <typename U> class PtrTraits>
305 template <typename NewIndexT>
306 __host__ bool
308  static_assert(sizeof(size_t) >= sizeof(IndexT),
309  "index size too large");
310  static_assert(sizeof(size_t) >= sizeof(NewIndexT),
311  "new index size too large");
312 
313  // Find maximum offset that can be calculated
314  // FIXME: maybe also consider offset in bytes? multiply by sizeof(T)?
315  size_t maxOffset = 0;
316 
317  if (Contig) {
318  maxOffset = (size_t) size_[0] * (size_t) stride_[0];
319  } else {
320  for (int i = 0; i < Dim; ++i) {
321  size_t curMaxOffset = (size_t) size_[i] * (size_t) stride_[i];
322  if (curMaxOffset > maxOffset) {
323  maxOffset = curMaxOffset;
324  }
325  }
326  }
327 
328  if (maxOffset > (size_t) std::numeric_limits<NewIndexT>::max()) {
329  return false;
330  }
331 
332  return true;
333 }
334 
335 template <typename T, int Dim, bool Contig,
336  typename IndexT, template <typename U> class PtrTraits>
337 __host__ __device__ IndexT
339  long size = getSize(0);
340 
341  for (int i = 1; i < Dim; ++i) {
342  size *= getSize(i);
343  }
344 
345  return size;
346 }
347 
348 template <typename T, int Dim, bool Contig,
349  typename IndexT, template <typename U> class PtrTraits>
350 __host__ __device__ bool
352  long prevSize = 1;
353 
354  for (int i = Dim - 1; i >= 0; --i) {
355  if (getSize(i) != (IndexT) 1) {
356  if (getStride(i) == prevSize) {
357  prevSize *= getSize(i);
358  } else {
359  return false;
360  }
361  }
362  }
363 
364  return true;
365 }
366 
367 template <typename T, int Dim, bool Contig,
368  typename IndexT, template <typename U> class PtrTraits>
369 __host__ __device__ bool
371  if (i == 0 && getStride(i) > 0 && getSize(i) > 0) {
372  return true;
373  } else if ((i > 0) && (i < Dim) && (getStride(i) > 0) &&
374  ((getStride(i - 1) / getStride(i)) >= getSize(i))) {
375  return true;
376  }
377 
378  return false;
379 }
380 
381 template <typename T, int Dim, bool Contig,
382  typename IndexT, template <typename U> class PtrTraits>
383 __host__ __device__ bool
385  for (int i = 0; i < Dim; ++i) {
386  if (!isConsistentlySized(i)) {
387  return false;
388  }
389  }
390 
391  return true;
392 }
393 
394 template <typename T, int Dim, bool Contig,
395  typename IndexT, template <typename U> class PtrTraits>
396 __host__ __device__ bool
398  return (i == Dim - 1) || // just in case
399  ((i < Dim - 1) &&
400  ((getStride(i) / getStride(i + 1)) == getSize(i + 1)));
401 }
402 
403 template <typename T, int Dim, bool Contig,
404  typename IndexT, template <typename U> class PtrTraits>
407  int dim2) const {
408  assert(dim1 >= 0 && dim1 < Dim);
409  assert(dim1 >= 0 && dim2 < Dim);
410  static_assert(!Contig, "cannot transpose contiguous arrays");
411 
412  IndexT newSize[Dim];
413  IndexT newStride[Dim];
414 
415  for (int i = 0; i < Dim; ++i) {
416  newSize[i] = size_[i];
417  newStride[i] = stride_[i];
418  }
419 
420  IndexT tmp = newSize[dim1];
421  newSize[dim1] = newSize[dim2];
422  newSize[dim2] = tmp;
423 
424  tmp = newStride[dim1];
425  newStride[dim1] = newStride[dim2];
426  newStride[dim2] = tmp;
427 
428  return Tensor<T, Dim, Contig, IndexT, PtrTraits>(data_, newSize, newStride);
429 }
430 
431 template <typename T, int Dim, bool Contig,
432  typename IndexT, template <typename U> class PtrTraits>
433 template <int NewDim>
436  // Can only create tensors of greater dimension
437  static_assert(NewDim > Dim, "Can only upcast to greater dim");
438 
439  IndexT newSize[NewDim];
440  IndexT newStride[NewDim];
441 
442  int shift = NewDim - Dim;
443 
444  for (int i = 0; i < NewDim; ++i) {
445  if (i < shift) {
446  // These are the extended dimensions
447  newSize[i] = (IndexT) 1;
448  newStride[i] = size_[0] * stride_[0];
449  } else {
450  // Shift the remaining dimensions
451  newSize[i] = size_[i - shift];
452  newStride[i] = stride_[i - shift];
453  }
454  }
455 
457  data_, newSize, newStride);
458 }
459 
460 template <typename T, int Dim, bool Contig,
461  typename IndexT, template <typename U> class PtrTraits>
462 template <int NewDim>
465  // Can only create tensors of greater dimension
466  static_assert(NewDim > Dim, "Can only upcast to greater dim");
467 
468  IndexT newSize[NewDim];
469  IndexT newStride[NewDim];
470 
471  for (int i = 0; i < NewDim; ++i) {
472  if (i < Dim) {
473  // Existing dimensions get copied over
474  newSize[i] = size_[i];
475  newStride[i] = stride_[i];
476  } else {
477  // Extended dimensions
478  newSize[i] = (IndexT) 1;
479  newStride[i] = (IndexT) 1;
480  }
481  }
482 
484  data_, newSize, newStride);
485 }
486 
487 template <typename T, int Dim, bool Contig,
488  typename IndexT, template <typename U> class PtrTraits>
489 template <int NewDim>
492  // Can only create tensors of lesser dimension
493  static_assert(NewDim < Dim, "Can only downcast to lesser dim");
494 
495  // We can't downcast non-contiguous tensors, since it leaves
496  // garbage data in the tensor. The tensor needs to be contiguous
497  // in all of the dimensions we are collapsing (no padding in
498  // them).
499  for (int i = 0; i < Dim - NewDim; ++i) {
500  bool cont = isContiguousDim(i);
501  assert(cont);
502  }
503 
504  IndexT newSize[NewDim];
505  IndexT newStride[NewDim];
506 
507  int ignoredDims = Dim - NewDim;
508  IndexT collapsedSize = 1;
509 
510  for (int i = 0; i < Dim; ++i) {
511  if (i < ignoredDims) {
512  // Collapse these dimensions
513  collapsedSize *= getSize(i);
514  } else {
515  // Non-collapsed dimensions
516  if (i == ignoredDims) {
517  // This is the first non-collapsed dimension
518  newSize[i - ignoredDims] = collapsedSize * getSize(i);
519  } else {
520  // Subsequent non-collapsed dimensions
521  newSize[i - ignoredDims] = getSize(i);
522  }
523 
524  newStride[i - ignoredDims] = getStride(i);
525  }
526  }
527 
529  data_, newSize, newStride);
530 }
531 
532 template <typename T, int Dim, bool Contig,
533  typename IndexT, template <typename U> class PtrTraits>
534 template <int NewDim>
537  // Can only create tensors of lesser dimension
538  static_assert(NewDim < Dim, "Can only downcast to lesser dim");
539 
540  // We can't downcast non-contiguous tensors, since it leaves
541  // garbage data in the tensor. The tensor needs to be contiguous
542  // in all of the dimensions we are collapsing (no padding in
543  // them).
544  for (int i = NewDim; i < Dim; ++i) {
545  assert(isContiguousDim(i));
546  }
547 
548  IndexT newSize[NewDim];
549  IndexT newStride[NewDim];
550 
551  IndexT collapsedSize = 1;
552 
553  for (int i = Dim - 1; i >= 0; --i) {
554  if (i >= NewDim) {
555  // Collapse these dimensions
556  collapsedSize *= getSize(i);
557  } else {
558  // Non-collapsed dimensions
559  if (i == NewDim - 1) {
560  // This is the first non-collapsed dimension
561  newSize[i] = collapsedSize * getSize(i);
562  newStride[i] = getStride(Dim - 1);
563  } else {
564  // Subsequent non-collapsed dimensions
565  newSize[i] = getSize(i);
566  newStride[i] = getStride(i);
567  }
568  }
569  }
570 
572  data_, newSize, newStride);
573 }
574 
575 template <typename T, int Dim, bool Contig,
576  typename IndexT, template <typename U> class PtrTraits>
577 template <int SubDim>
580  static_assert(SubDim >= 1 && SubDim < Dim,
581  "can only create view of lesser dim");
582 
583  IndexT viewSizes[SubDim];
584  IndexT viewStrides[SubDim];
585 
586  for (int i = 0; i < SubDim; ++i) {
587  viewSizes[i] = size_[Dim - SubDim + i];
588  viewStrides[i] = stride_[Dim - SubDim + i];
589  }
590 
592  at, viewSizes, viewStrides);
593 }
594 
595 template <typename T, int Dim, bool Contig,
596  typename IndexT, template <typename U> class PtrTraits>
597 template <int SubDim>
600  return view<SubDim>(data_);
601 }
602 
603 template <typename T, int Dim, bool Contig,
604  typename IndexT, template <typename U> class PtrTraits>
607  IndexT size) {
608  DataPtrType newData = data_;
609 
610  if (start > 0) {
611  newData += start * stride_[0];
612  }
613 
614  IndexT newSize[Dim];
615  for (int i = 0; i < Dim; ++i) {
616  if (i == 0) {
617  assert(start + size <= size_[0]);
618  newSize[i] = size;
619  } else {
620  newSize[i] = size_[i];
621  }
622  }
623 
624  return Tensor<T, Dim, Contig, IndexT, PtrTraits>(newData, newSize, stride_);
625 }
626 
627 template <typename T, int Dim, bool Contig,
628  typename IndexT, template <typename U> class PtrTraits>
631  IndexT start,
632  IndexT size) {
633  DataPtrType newData = data_;
634 
635  if (start > 0) {
636  newData += start * stride_[dim];
637  }
638 
639  IndexT newSize[Dim];
640  for (int i = 0; i < Dim; ++i) {
641  if (i == dim) {
642  assert(start + size <= size_[dim]);
643  newSize[i] = size;
644  } else {
645  newSize[i] = size_[i];
646  }
647  }
648 
649  // The narrowed tensor is not necessarily contiguous
650  return Tensor<T, Dim, false, IndexT, PtrTraits>(newData, newSize, stride_);
651 }
652 
653 template <typename T, int Dim, bool Contig,
654  typename IndexT, template <typename U> class PtrTraits>
655 template <int NewDim>
658  std::initializer_list<IndexT> sizes) {
659  static_assert(Contig, "on contiguous tensors only");
660 
661  assert(sizes.size() == NewDim);
662 
663  // The total size of the new view must be the same as the total size
664  // of the old view
665  size_t curSize = numElements();
666 
667  size_t newSize = 1;
668 
669  for (auto s : sizes) {
670  newSize *= s;
671  }
672 
673  assert(curSize == newSize);
674  return Tensor<T, NewDim, true, IndexT, PtrTraits>(data(), sizes);
675 }
676 
677 } } // namespace
__host__ __device__ Tensor()
Default constructor.
Definition: Tensor-inl.cuh:21
__host__ __device__ Tensor< T, NewDim, Contig, IndexT, PtrTraits > downcastInner()
Definition: Tensor-inl.cuh:536
__host__ __device__ bool isContiguousDim(int i) const
Returns true if the given dimension index has no padding.
Definition: Tensor-inl.cuh:397
__host__ __device__ Tensor< T, Dim, Contig, IndexT, PtrTraits > transpose(int dim1, int dim2) const
Definition: Tensor-inl.cuh:406
__host__ __device__ DataPtrType data()
Returns a raw pointer to the start of our data.
Definition: Tensor.cuh:174
__host__ __device__ Tensor< T, Dim, Contig, IndexT, PtrTraits > narrowOutermost(IndexT start, IndexT size)
Definition: Tensor-inl.cuh:606
__host__ __device__ Tensor< T, Dim, Contig, IndexT, PtrTraits > & operator=(Tensor< T, Dim, Contig, IndexT, PtrTraits > &t)=default
Assignment.
__host__ __device__ Tensor< T, NewDim, Contig, IndexT, PtrTraits > upcastOuter()
Definition: Tensor-inl.cuh:435
__host__ bool canCastIndexType() const
Definition: Tensor-inl.cuh:307
__host__ __device__ Tensor< T, Dim, false, IndexT, PtrTraits > narrow(int dim, IndexT start, IndexT size)
Definition: Tensor-inl.cuh:630
__host__ __device__ Tensor< U, Dim, Contig, IndexT, PtrTraits > cast()
Definition: Tensor-inl.cuh:204
__host__ void copyTo(Tensor< T, Dim, Contig, IndexT, PtrTraits > &t, cudaStream_t stream)
Copies ourselves into a tensor; sizes must match.
Definition: Tensor-inl.cuh:140
__host__ __device__ bool isSame(const Tensor< T, OtherDim, Contig, IndexT, PtrTraits > &rhs) const
Definition: Tensor-inl.cuh:179
__host__ Tensor< T, Dim, Contig, NewIndexT, PtrTraits > castIndexType() const
Definition: Tensor-inl.cuh:287
__host__ __device__ IndexT numElements() const
Definition: Tensor-inl.cuh:338
__host__ __device__ const IndexT * strides() const
Returns the stride array.
Definition: Tensor.cuh:248
Our tensor type.
Definition: Tensor.cuh:31
__host__ __device__ const IndexT * sizes() const
Returns the size array.
Definition: Tensor.cuh:243
__host__ __device__ Tensor< T, NewDim, Contig, IndexT, PtrTraits > upcastInner()
Definition: Tensor-inl.cuh:464
__host__ __device__ Tensor< T, SubDim, Contig, IndexT, PtrTraits > view()
Definition: Tensor-inl.cuh:599
__host__ void copyFrom(Tensor< T, Dim, Contig, IndexT, PtrTraits > &t, cudaStream_t stream)
Copies a tensor into ourselves; sizes must match.
Definition: Tensor-inl.cuh:102
IndexT stride_[Dim]
Array of strides (in sizeof(T) terms) per each dimension.
Definition: Tensor.cuh:346
__host__ __device__ bool isContiguous() const
Definition: Tensor-inl.cuh:351
__host__ __device__ Tensor< T, NewDim, Contig, IndexT, PtrTraits > downcastOuter()
Definition: Tensor-inl.cuh:491
IndexT size_[Dim]
Size per each dimension.
Definition: Tensor.cuh:349
__host__ __device__ bool canCastResize() const
Returns true if we can castResize() this tensor to the new type.
Definition: Tensor-inl.cuh:260
__host__ __device__ Tensor< U, Dim, Contig, IndexT, PtrTraits > castResize()
Definition: Tensor-inl.cuh:226