Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
Tensor-inl.cuh
1 /**
2  * Copyright (c) 2015-present, Facebook, Inc.
3  * All rights reserved.
4  *
5  * This source code is licensed under the BSD+Patents license found in the
6  * LICENSE file in the root directory of this source tree.
7  */
8 
9 
10 #include "../GpuFaissAssert.h"
11 #include "DeviceUtils.h"
12 #include <limits>
13 
14 namespace faiss { namespace gpu {
15 
16 template <typename T, int Dim, bool InnerContig,
17  typename IndexT, template <typename U> class PtrTraits>
18 __host__ __device__
20  : data_(nullptr) {
21  static_assert(Dim > 0, "must have > 0 dimensions");
22 
23  for (int i = 0; i < Dim; ++i) {
24  size_[i] = 0;
25  stride_[i] = (IndexT) 1;
26  }
27 }
28 
29 template <typename T, int Dim, bool InnerContig,
30  typename IndexT, template <typename U> class PtrTraits>
31 __host__ __device__
34  this->operator=(t);
35 }
36 
37 template <typename T, int Dim, bool InnerContig,
38  typename IndexT, template <typename U> class PtrTraits>
39 __host__ __device__
42  this->operator=(std::move(t));
43 }
44 
45 template <typename T, int Dim, bool InnerContig,
46  typename IndexT, template <typename U> class PtrTraits>
47 __host__ __device__
51  data_ = t.data_;
52  for (int i = 0; i < Dim; ++i) {
53  size_[i] = t.size_[i];
54  stride_[i] = t.stride_[i];
55  }
56 
57  return *this;
58 }
59 
60 template <typename T, int Dim, bool InnerContig,
61  typename IndexT, template <typename U> class PtrTraits>
62 __host__ __device__
66  data_ = t.data_; t.data_ = nullptr;
67  for (int i = 0; i < Dim; ++i) {
68  stride_[i] = t.stride_[i]; t.stride_[i] = 0;
69  size_[i] = t.size_[i]; t.size_[i] = 0;
70  }
71 
72  return *this;
73 }
74 
75 template <typename T, int Dim, bool InnerContig,
76  typename IndexT, template <typename U> class PtrTraits>
77 __host__ __device__
79 Tensor(DataPtrType data, const IndexT sizes[Dim])
80  : data_(data) {
81  static_assert(Dim > 0, "must have > 0 dimensions");
82 
83  for (int i = 0; i < Dim; ++i) {
84  size_[i] = sizes[i];
85  }
86 
87  stride_[Dim - 1] = (IndexT) 1;
88  for (int i = Dim - 2; i >= 0; --i) {
89  stride_[i] = stride_[i + 1] * sizes[i + 1];
90  }
91 }
92 
93 template <typename T, int Dim, bool InnerContig,
94  typename IndexT, template <typename U> class PtrTraits>
95 __host__ __device__
97 Tensor(DataPtrType data, std::initializer_list<IndexT> sizes)
98  : data_(data) {
99  GPU_FAISS_ASSERT(sizes.size() == Dim);
100  static_assert(Dim > 0, "must have > 0 dimensions");
101 
102  int i = 0;
103  for (auto s : sizes) {
104  size_[i++] = s;
105  }
106 
107  stride_[Dim - 1] = (IndexT) 1;
108  for (int j = Dim - 2; j >= 0; --j) {
109  stride_[j] = stride_[j + 1] * size_[j + 1];
110  }
111 }
112 
113 
114 template <typename T, int Dim, bool InnerContig,
115  typename IndexT, template <typename U> class PtrTraits>
116 __host__ __device__
118  DataPtrType data, const IndexT sizes[Dim], const IndexT strides[Dim])
119  : data_(data) {
120  static_assert(Dim > 0, "must have > 0 dimensions");
121 
122  for (int i = 0; i < Dim; ++i) {
123  size_[i] = sizes[i];
124  stride_[i] = strides[i];
125  }
126 }
127 
128 template <typename T, int Dim, bool InnerContig,
129  typename IndexT, template <typename U> class PtrTraits>
130 __host__ void
133  cudaStream_t stream) {
134  // The tensor must be fully contiguous
135  GPU_FAISS_ASSERT(this->isContiguous());
136 
137  // Size must be the same (since dimensions are checked and
138  // continuity is assumed, we need only check total number of
139  // elements
140  GPU_FAISS_ASSERT(this->numElements() == t.numElements());
141 
142  if (t.numElements() > 0) {
143  GPU_FAISS_ASSERT(this->data_);
144  GPU_FAISS_ASSERT(t.data());
145 
146  int ourDev = getDeviceForAddress(this->data_);
147  int tDev = getDeviceForAddress(t.data());
148 
149  if (tDev == -1) {
150  CUDA_VERIFY(cudaMemcpyAsync(this->data_,
151  t.data(),
152  this->getSizeInBytes(),
153  ourDev == -1 ? cudaMemcpyHostToHost :
154  cudaMemcpyHostToDevice,
155  stream));
156  } else {
157  CUDA_VERIFY(cudaMemcpyAsync(this->data_,
158  t.data(),
159  this->getSizeInBytes(),
160  ourDev == -1 ? cudaMemcpyDeviceToHost :
161  cudaMemcpyDeviceToDevice,
162  stream));
163  }
164  }
165 }
166 
167 template <typename T, int Dim, bool InnerContig,
168  typename IndexT, template <typename U> class PtrTraits>
169 __host__ void
172  cudaStream_t stream) {
173  // The tensor must be fully contiguous
174  GPU_FAISS_ASSERT(this->isContiguous());
175 
176  // Size must be the same (since dimensions are checked and
177  // continuity is assumed, we need only check total number of
178  // elements
179  GPU_FAISS_ASSERT(this->numElements() == t.numElements());
180 
181  if (t.numElements() > 0) {
182  GPU_FAISS_ASSERT(this->data_);
183  GPU_FAISS_ASSERT(t.data());
184 
185  int ourDev = getDeviceForAddress(this->data_);
186  int tDev = getDeviceForAddress(t.data());
187 
188  if (tDev == -1) {
189  CUDA_VERIFY(cudaMemcpyAsync(t.data(),
190  this->data_,
191  this->getSizeInBytes(),
192  ourDev == -1 ? cudaMemcpyHostToHost :
193  cudaMemcpyDeviceToHost,
194  stream));
195  } else {
196  CUDA_VERIFY(cudaMemcpyAsync(t.data(),
197  this->data_,
198  this->getSizeInBytes(),
199  ourDev == -1 ? cudaMemcpyHostToDevice :
200  cudaMemcpyDeviceToDevice,
201  stream));
202  }
203  }
204 }
205 
206 template <typename T, int Dim, bool InnerContig,
207  typename IndexT, template <typename U> class PtrTraits>
208 template <typename OtherT, int OtherDim>
209 __host__ __device__ bool
212  if (Dim != OtherDim) {
213  return false;
214  }
215 
216  for (int i = 0; i < Dim; ++i) {
217  if (this->getSize(i) != rhs.getSize(i)) {
218  return false;
219  }
220 
221  if (this->getStride(i) != rhs.getStride(i)) {
222  return false;
223  }
224  }
225 
226  return true;
227 }
228 
229 template <typename T, int Dim, bool InnerContig,
230  typename IndexT, template <typename U> class PtrTraits>
231 template <typename OtherT, int OtherDim>
232 __host__ __device__ bool
235  if (Dim != OtherDim) {
236  return false;
237  }
238 
239  for (int i = 0; i < Dim; ++i) {
240  if (this->getSize(i) != rhs.getSize(i)) {
241  return false;
242  }
243  }
244 
245  return true;
246 }
247 
248 template <typename T, int Dim, bool InnerContig,
249  typename IndexT, template <typename U> class PtrTraits>
250 template <typename U>
253  static_assert(sizeof(U) == sizeof(T), "cast must be to same size object");
254 
256  reinterpret_cast<U*>(data_), size_, stride_);
257 }
258 
259 template <typename T, int Dim, bool InnerContig,
260  typename IndexT, template <typename U> class PtrTraits>
261 template <typename U>
262 __host__ __device__ const Tensor<U, Dim, InnerContig, IndexT, PtrTraits>
264  static_assert(sizeof(U) == sizeof(T), "cast must be to same size object");
265 
267  reinterpret_cast<U*>(data_), size_, stride_);
268 }
269 
270 template <typename T, int Dim, bool InnerContig,
271  typename IndexT, template <typename U> class PtrTraits>
272 template <typename U>
275  static_assert(sizeof(U) >= sizeof(T), "only handles greater sizes");
276  constexpr int kMultiple = sizeof(U) / sizeof(T);
277 
278  GPU_FAISS_ASSERT(canCastResize<U>());
279 
280  IndexT newSize[Dim];
281  IndexT newStride[Dim];
282 
283  for (int i = 0; i < Dim - 1; ++i) {
284  newSize[i] = size_[i];
285  newStride[i] = stride_[i] / kMultiple;
286  }
287 
288  newStride[Dim - 1] = 1; // this is the same as the old stride
289  newSize[Dim - 1] = size_[Dim - 1] / kMultiple;
290 
292  reinterpret_cast<U*>(data_), newSize, newStride);
293 }
294 
295 template <typename T, int Dim, bool InnerContig,
296  typename IndexT, template <typename U> class PtrTraits>
297 template <typename U>
298 __host__ __device__ const Tensor<U, Dim, InnerContig, IndexT, PtrTraits>
300  return const_cast<Tensor<T, Dim, InnerContig, IndexT, PtrTraits>*>(this)->
301  castResize<U>();
302 }
303 
304 template <typename T, int Dim, bool InnerContig,
305  typename IndexT, template <typename U> class PtrTraits>
306 template <typename U>
307 __host__ __device__ bool
309  static_assert(sizeof(U) >= sizeof(T), "only handles greater sizes");
310  constexpr int kMultiple = sizeof(U) / sizeof(T);
311 
312  // Ensure that the base pointer is sizeof(U) aligned
313  if (((uintptr_t) data_) % sizeof(U) != 0) {
314  return false;
315  }
316 
317  // Check all outer strides
318  for (int i = 0; i < Dim - 1; ++i) {
319  if (stride_[i] % kMultiple != 0) {
320  return false;
321  }
322  }
323 
324  // Check inner size
325  if (size_[Dim - 1] % kMultiple != 0) {
326  return false;
327  }
328 
329  if (stride_[Dim - 1] != 1) {
330  return false;
331  }
332 
333  return true;
334 }
335 
336 template <typename T, int Dim, bool InnerContig,
337  typename IndexT, template <typename U> class PtrTraits>
338 template <typename NewIndexT>
341  if (sizeof(NewIndexT) < sizeof(IndexT)) {
342  GPU_FAISS_ASSERT(this->canUseIndexType<NewIndexT>());
343  }
344 
345  NewIndexT newSize[Dim];
346  NewIndexT newStride[Dim];
347  for (int i = 0; i < Dim; ++i) {
348  newSize[i] = (NewIndexT) size_[i];
349  newStride[i] = (NewIndexT) stride_[i];
350  }
351 
353  data_, newSize, newStride);
354 }
355 
356 template <typename T, int Dim, bool InnerContig,
357  typename IndexT, template <typename U> class PtrTraits>
358 template <typename NewIndexT>
359 __host__ bool
361  static_assert(sizeof(size_t) >= sizeof(IndexT),
362  "index size too large");
363  static_assert(sizeof(size_t) >= sizeof(NewIndexT),
364  "new index size too large");
365 
366  // Find maximum offset that can be calculated
367  // FIXME: maybe also consider offset in bytes? multiply by sizeof(T)?
368  size_t maxOffset = 0;
369 
370  for (int i = 0; i < Dim; ++i) {
371  size_t curMaxOffset = (size_t) size_[i] * (size_t) stride_[i];
372  if (curMaxOffset > maxOffset) {
373  maxOffset = curMaxOffset;
374  }
375  }
376 
377  if (maxOffset > (size_t) std::numeric_limits<NewIndexT>::max()) {
378  return false;
379  }
380 
381  return true;
382 }
383 
384 template <typename T, int Dim, bool InnerContig,
385  typename IndexT, template <typename U> class PtrTraits>
386 __host__ __device__ size_t
388  size_t size = (size_t) getSize(0);
389 
390  for (int i = 1; i < Dim; ++i) {
391  size *= (size_t) getSize(i);
392  }
393 
394  return size;
395 }
396 
397 template <typename T, int Dim, bool InnerContig,
398  typename IndexT, template <typename U> class PtrTraits>
399 __host__ __device__ bool
401  long prevSize = 1;
402 
403  for (int i = Dim - 1; i >= 0; --i) {
404  if (getSize(i) != (IndexT) 1) {
405  if (getStride(i) == prevSize) {
406  prevSize *= getSize(i);
407  } else {
408  return false;
409  }
410  }
411  }
412 
413  return true;
414 }
415 
416 template <typename T, int Dim, bool InnerContig,
417  typename IndexT, template <typename U> class PtrTraits>
418 __host__ __device__ bool
420  if (i == 0 && getStride(i) > 0 && getSize(i) > 0) {
421  return true;
422  } else if ((i > 0) && (i < Dim) && (getStride(i) > 0) &&
423  ((getStride(i - 1) / getStride(i)) >= getSize(i))) {
424  return true;
425  }
426 
427  return false;
428 }
429 
430 template <typename T, int Dim, bool InnerContig,
431  typename IndexT, template <typename U> class PtrTraits>
432 __host__ __device__ bool
434  for (int i = 0; i < Dim; ++i) {
435  if (!isConsistentlySized(i)) {
436  return false;
437  }
438  }
439 
440  return true;
441 }
442 
443 template <typename T, int Dim, bool InnerContig,
444  typename IndexT, template <typename U> class PtrTraits>
445 __host__ __device__ bool
447  return (i == Dim - 1) || // just in case
448  ((i < Dim - 1) &&
449  ((getStride(i) / getStride(i + 1)) == getSize(i + 1)));
450 }
451 
452 template <typename T, int Dim, bool InnerContig,
453  typename IndexT, template <typename U> class PtrTraits>
456  int dim2) const {
457  GPU_FAISS_ASSERT(dim1 >= 0 && dim1 < Dim);
458  GPU_FAISS_ASSERT(dim1 >= 0 && dim2 < Dim);
459 
460  // If a tensor is innermost contiguous, one cannot transpose the innermost
461  // dimension
462  if (InnerContig) {
463  GPU_FAISS_ASSERT(dim1 != Dim - 1 && dim2 != Dim - 1);
464  }
465 
466  IndexT newSize[Dim];
467  IndexT newStride[Dim];
468 
469  for (int i = 0; i < Dim; ++i) {
470  newSize[i] = size_[i];
471  newStride[i] = stride_[i];
472  }
473 
474  IndexT tmp = newSize[dim1];
475  newSize[dim1] = newSize[dim2];
476  newSize[dim2] = tmp;
477 
478  tmp = newStride[dim1];
479  newStride[dim1] = newStride[dim2];
480  newStride[dim2] = tmp;
481 
482  return Tensor<T, Dim, InnerContig, IndexT, PtrTraits>(data_, newSize, newStride);
483 }
484 
485 template <typename T, int Dim, bool InnerContig,
486  typename IndexT, template <typename U> class PtrTraits>
487 template <int NewDim>
490  // Can only create tensors of greater dimension
491  static_assert(NewDim > Dim, "Can only upcast to greater dim");
492 
493  IndexT newSize[NewDim];
494  IndexT newStride[NewDim];
495 
496  int shift = NewDim - Dim;
497 
498  for (int i = 0; i < NewDim; ++i) {
499  if (i < shift) {
500  // These are the extended dimensions
501  newSize[i] = (IndexT) 1;
502  newStride[i] = size_[0] * stride_[0];
503  } else {
504  // Shift the remaining dimensions
505  newSize[i] = size_[i - shift];
506  newStride[i] = stride_[i - shift];
507  }
508  }
509 
511  data_, newSize, newStride);
512 }
513 
514 template <typename T, int Dim, bool InnerContig,
515  typename IndexT, template <typename U> class PtrTraits>
516 template <int NewDim>
519  // Can only create tensors of greater dimension
520  static_assert(NewDim > Dim, "Can only upcast to greater dim");
521 
522  IndexT newSize[NewDim];
523  IndexT newStride[NewDim];
524 
525  for (int i = 0; i < NewDim; ++i) {
526  if (i < Dim) {
527  // Existing dimensions get copied over
528  newSize[i] = size_[i];
529  newStride[i] = stride_[i];
530  } else {
531  // Extended dimensions
532  newSize[i] = (IndexT) 1;
533  newStride[i] = (IndexT) 1;
534  }
535  }
536 
538  data_, newSize, newStride);
539 }
540 
541 template <typename T, int Dim, bool InnerContig,
542  typename IndexT, template <typename U> class PtrTraits>
543 template <int NewDim>
546  // Can only create tensors of lesser dimension
547  static_assert(NewDim < Dim, "Can only downcast to lesser dim");
548 
549  // We can't downcast non-contiguous tensors, since it leaves
550  // garbage data in the tensor. The tensor needs to be contiguous
551  // in all of the dimensions we are collapsing (no padding in
552  // them).
553  for (int i = 0; i < Dim - NewDim; ++i) {
554  bool cont = isContiguousDim(i);
555  GPU_FAISS_ASSERT(cont);
556  }
557 
558  IndexT newSize[NewDim];
559  IndexT newStride[NewDim];
560 
561  int ignoredDims = Dim - NewDim;
562  IndexT collapsedSize = 1;
563 
564  for (int i = 0; i < Dim; ++i) {
565  if (i < ignoredDims) {
566  // Collapse these dimensions
567  collapsedSize *= getSize(i);
568  } else {
569  // Non-collapsed dimensions
570  if (i == ignoredDims) {
571  // This is the first non-collapsed dimension
572  newSize[i - ignoredDims] = collapsedSize * getSize(i);
573  } else {
574  // Subsequent non-collapsed dimensions
575  newSize[i - ignoredDims] = getSize(i);
576  }
577 
578  newStride[i - ignoredDims] = getStride(i);
579  }
580  }
581 
583  data_, newSize, newStride);
584 }
585 
586 template <typename T, int Dim, bool InnerContig,
587  typename IndexT, template <typename U> class PtrTraits>
588 template <int NewDim>
591  // Can only create tensors of lesser dimension
592  static_assert(NewDim < Dim, "Can only downcast to lesser dim");
593 
594  // We can't downcast non-contiguous tensors, since it leaves
595  // garbage data in the tensor. The tensor needs to be contiguous
596  // in all of the dimensions we are collapsing (no padding in
597  // them).
598  for (int i = NewDim; i < Dim; ++i) {
599  GPU_FAISS_ASSERT(isContiguousDim(i));
600  }
601 
602  IndexT newSize[NewDim];
603  IndexT newStride[NewDim];
604 
605  IndexT collapsedSize = 1;
606 
607  for (int i = Dim - 1; i >= 0; --i) {
608  if (i >= NewDim) {
609  // Collapse these dimensions
610  collapsedSize *= getSize(i);
611  } else {
612  // Non-collapsed dimensions
613  if (i == NewDim - 1) {
614  // This is the first non-collapsed dimension
615  newSize[i] = collapsedSize * getSize(i);
616  newStride[i] = getStride(Dim - 1);
617  } else {
618  // Subsequent non-collapsed dimensions
619  newSize[i] = getSize(i);
620  newStride[i] = getStride(i);
621  }
622  }
623  }
624 
626  data_, newSize, newStride);
627 }
628 
629 template <typename T, int Dim, bool InnerContig,
630  typename IndexT, template <typename U> class PtrTraits>
631 template <int SubDim>
634  static_assert(SubDim >= 1 && SubDim < Dim,
635  "can only create view of lesser dim");
636 
637  IndexT viewSizes[SubDim];
638  IndexT viewStrides[SubDim];
639 
640  for (int i = 0; i < SubDim; ++i) {
641  viewSizes[i] = size_[Dim - SubDim + i];
642  viewStrides[i] = stride_[Dim - SubDim + i];
643  }
644 
646  at, viewSizes, viewStrides);
647 }
648 
649 template <typename T, int Dim, bool InnerContig,
650  typename IndexT, template <typename U> class PtrTraits>
651 template <int SubDim>
654  return view<SubDim>(data_);
655 }
656 
657 template <typename T, int Dim, bool InnerContig,
658  typename IndexT, template <typename U> class PtrTraits>
661  IndexT size) {
662  return this->narrow(0, start, size);
663 }
664 
665 template <typename T, int Dim, bool InnerContig,
666  typename IndexT, template <typename U> class PtrTraits>
669  IndexT start,
670  IndexT size) {
671  DataPtrType newData = data_;
672 
673  GPU_FAISS_ASSERT(start >= 0 &&
674  start < size_[dim] &&
675  (start + size) <= size_[dim]);
676 
677  if (start > 0) {
678  newData += (size_t) start * stride_[dim];
679  }
680 
681  IndexT newSize[Dim];
682  for (int i = 0; i < Dim; ++i) {
683  if (i == dim) {
684  GPU_FAISS_ASSERT(start + size <= size_[dim]);
685  newSize[i] = size;
686  } else {
687  newSize[i] = size_[i];
688  }
689  }
690 
691  // If we were innermost contiguous before, we are still innermost contiguous
692  return Tensor<T, Dim, InnerContig, IndexT, PtrTraits>(newData, newSize, stride_);
693 }
694 
695 template <typename T, int Dim, bool InnerContig,
696  typename IndexT, template <typename U> class PtrTraits>
697 template <int NewDim>
700  std::initializer_list<IndexT> sizes) {
701  GPU_FAISS_ASSERT(this->isContiguous());
702 
703  GPU_FAISS_ASSERT(sizes.size() == NewDim);
704 
705  // The total size of the new view must be the same as the total size
706  // of the old view
707  size_t curSize = numElements();
708  size_t newSize = 1;
709 
710  for (auto s : sizes) {
711  newSize *= s;
712  }
713 
714  GPU_FAISS_ASSERT(curSize == newSize);
715  return Tensor<T, NewDim, true, IndexT, PtrTraits>(data(), sizes);
716 }
717 
718 } } // namespace
__host__ __device__ Tensor< T, NewDim, InnerContig, IndexT, PtrTraits > upcastOuter()
Definition: Tensor-inl.cuh:489
__host__ Tensor< T, Dim, InnerContig, NewIndexT, PtrTraits > castIndexType() const
Definition: Tensor-inl.cuh:340
__host__ __device__ bool isContiguousDim(int i) const
Returns true if the given dimension index has no padding.
Definition: Tensor-inl.cuh:446
__host__ __device__ Tensor< U, Dim, InnerContig, IndexT, PtrTraits > cast()
Definition: Tensor-inl.cuh:252
__host__ __device__ size_t numElements() const
Definition: Tensor-inl.cuh:387
__host__ __device__ Tensor< T, NewDim, InnerContig, IndexT, PtrTraits > downcastOuter()
Definition: Tensor-inl.cuh:545
__host__ __device__ bool canCastResize() const
Returns true if we can castResize() this tensor to the new type.
Definition: Tensor-inl.cuh:308
DataPtrType data_
Raw pointer to where the tensor data begins.
Definition: Tensor.cuh:343
__host__ __device__ Tensor()
Default constructor.
Definition: Tensor-inl.cuh:19
__host__ __device__ Tensor< T, NewDim, InnerContig, IndexT, PtrTraits > upcastInner()
Definition: Tensor-inl.cuh:518
__host__ __device__ Tensor< T, Dim, InnerContig, IndexT, PtrTraits > narrowOutermost(IndexT start, IndexT size)
Definition: Tensor-inl.cuh:660
IndexT stride_[Dim]
Array of strides (in sizeof(T) terms) per each dimension.
Definition: Tensor.cuh:346
__host__ __device__ bool isContiguous() const
Definition: Tensor-inl.cuh:400
__host__ __device__ const IndexT * sizes() const
Returns the size array.
Definition: Tensor.cuh:244
__host__ void copyFrom(Tensor< T, Dim, InnerContig, IndexT, PtrTraits > &t, cudaStream_t stream)
Copies a tensor into ourselves; sizes must match.
Definition: Tensor-inl.cuh:131
IndexT size_[Dim]
Size per each dimension.
Definition: Tensor.cuh:349
__host__ __device__ Tensor< T, Dim, InnerContig, IndexT, PtrTraits > & operator=(Tensor< T, Dim, InnerContig, IndexT, PtrTraits > &t)
Assignment.
Definition: Tensor-inl.cuh:49
__host__ __device__ const IndexT * strides() const
Returns the stride array.
Definition: Tensor.cuh:249
__host__ __device__ IndexT getSize(int i) const
Definition: Tensor.cuh:223
__host__ __device__ bool isSameSize(const Tensor< OtherT, OtherDim, InnerContig, IndexT, PtrTraits > &rhs) const
Returns true if the two tensors are of the same dimensionality and size.
Definition: Tensor-inl.cuh:233
__host__ __device__ Tensor< T, NewDim, InnerContig, IndexT, PtrTraits > downcastInner()
Definition: Tensor-inl.cuh:590
__host__ __device__ Tensor< T, Dim, InnerContig, IndexT, PtrTraits > narrow(int dim, IndexT start, IndexT size)
Definition: Tensor-inl.cuh:668
__host__ __device__ DataPtrType data()
Returns a raw pointer to the start of our data.
Definition: Tensor.cuh:175
__host__ void copyTo(Tensor< T, Dim, InnerContig, IndexT, PtrTraits > &t, cudaStream_t stream)
Copies ourselves into a tensor; sizes must match.
Definition: Tensor-inl.cuh:170
Our tensor type.
Definition: Tensor.cuh:29
__host__ bool canUseIndexType() const
Definition: Tensor-inl.cuh:360
__host__ __device__ Tensor< T, Dim, InnerContig, IndexT, PtrTraits > transpose(int dim1, int dim2) const
Definition: Tensor-inl.cuh:455
__host__ __device__ IndexT getStride(int i) const
Definition: Tensor.cuh:229
__host__ __device__ Tensor< U, Dim, InnerContig, IndexT, PtrTraits > castResize()
Definition: Tensor-inl.cuh:274
__host__ __device__ Tensor< T, SubDim, InnerContig, IndexT, PtrTraits > view()
Definition: Tensor-inl.cuh:653
__host__ __device__ bool isSame(const Tensor< OtherT, OtherDim, InnerContig, IndexT, PtrTraits > &rhs) const
Definition: Tensor-inl.cuh:210