16 #include <cuda_runtime.h>
17 #include <initializer_list>
23 namespace faiss {
namespace gpu {
30 template <
typename U>
class PtrTraits>
35 template <
typename TensorType,
37 template <
typename U>
class PtrTraits>
45 typedef T* __restrict__ PtrType;
76 typename IndexT = int,
80 enum { NumDim = Dim };
82 typedef IndexT IndexType;
83 enum { IsContig = Contig };
84 typedef typename PtrTraits<T>::PtrType DataPtrType;
85 typedef Tensor<T, Dim, Contig, IndexT, PtrTraits> TensorType;
88 __host__ __device__
Tensor();
91 __host__ __device__
Tensor(Tensor<T, Dim, Contig, IndexT, PtrTraits>& t)
95 __host__ __device__
Tensor(Tensor<T, Dim, Contig, IndexT, PtrTraits>&& t)
99 __host__ __device__ Tensor<T, Dim, Contig, IndexT, PtrTraits>&
100 operator=(Tensor<T, Dim, Contig, IndexT, PtrTraits>& t) =
default;
103 __host__ __device__ Tensor<T, Dim, Contig, IndexT, PtrTraits>&
104 operator=(Tensor<T, Dim, Contig, IndexT, PtrTraits>&& t);
108 const IndexT
sizes[Dim]);
110 std::initializer_list<IndexT>
sizes);
116 const IndexT
sizes[Dim],
120 __host__
void copyFrom(Tensor<T, Dim, Contig, IndexT, PtrTraits>& t,
121 cudaStream_t stream);
124 __host__
void copyTo(Tensor<T, Dim, Contig, IndexT, PtrTraits>& t,
125 cudaStream_t stream);
129 template <
int OtherDim>
130 __host__ __device__
bool
131 isSame(
const Tensor<T, OtherDim, Contig, IndexT, PtrTraits>& rhs)
const;
135 template <
typename U>
136 __host__ __device__ Tensor<U, Dim, Contig, IndexT, PtrTraits>
cast();
139 template <
typename U>
141 const Tensor<U, Dim, Contig, IndexT, PtrTraits>
cast()
const;
149 template <
typename U>
150 __host__ __device__ Tensor<U, Dim, Contig, IndexT, PtrTraits>
castResize();
153 template <
typename U>
154 __host__ __device__
const Tensor<U, Dim, Contig, IndexT, PtrTraits>
158 template <
typename U>
164 template <
typename NewIndexT>
165 __host__ Tensor<T, Dim, Contig, NewIndexT, PtrTraits>
170 template <
typename NewIndexT>
174 __host__ __device__
inline DataPtrType
data() {
180 __host__ __device__
inline DataPtrType
end() {
185 __host__ __device__
inline
186 const DataPtrType
data()
const {
192 __host__ __device__
inline DataPtrType
end()
const {
197 template <
typename U>
198 __host__ __device__
inline
199 typename PtrTraits<U>::PtrType
dataAs() {
200 return reinterpret_cast<typename PtrTraits<U>::PtrType
>(
data_);
204 template <
typename U>
205 __host__ __device__
inline
206 const typename PtrTraits<const U>::PtrType
dataAs()
const {
207 return reinterpret_cast<typename PtrTraits<const U>::PtrType
>(
data_);
211 __host__ __device__
inline
216 __host__ __device__
inline
222 __host__ __device__
inline IndexT
getSize(
int i)
const {
228 __host__ __device__
inline IndexT
getStride(
int i)
const {
243 __host__ __device__
inline const IndexT*
sizes()
const {
248 __host__ __device__
inline const IndexT*
strides()
const {
263 __host__ __device__
bool isConsistentlySized(
int i)
const;
268 __host__ __device__
bool isConsistentlySized()
const;
283 template <
int NewDim>
290 template <
int NewDim>
297 template <
int NewDim>
304 template <
int NewDim>
310 template <
int SubDim>
312 view(DataPtrType at);
316 template <
int SubDim>
332 narrow(
int dim, IndexT start, IndexT size);
337 template <
int NewDim>
339 view(std::initializer_list<IndexT>
sizes);
355 template <
typename IndexType>
356 bool canCastIndexType() {
360 template <
typename IndexType,
typename T,
typename... U>
361 bool canCastIndexType(
const T& arg,
const U&... args) {
362 return arg.canCastIndexType<IndexType>() &&
363 canCastIndexType(args...);
368 template <
typename IndexType,
typename... T>
369 bool canCastIndexType(
const T&... args) {
370 return detail::canCastIndexType(args...);
376 template <
typename TensorType,
template <
typename U>
class PtrTraits>
380 operator=(
typename TensorType::DataType val) {
386 __host__ __device__
operator typename TensorType::DataType&() {
391 __host__ __device__
operator const typename TensorType::DataType&()
const {
396 __host__ __device__
typename TensorType::DataType* operator&() {
401 __host__ __device__
const typename TensorType::DataType* operator&()
const {
406 __host__ __device__
inline typename TensorType::DataPtrType
data() {
411 __host__ __device__
inline
412 const typename TensorType::DataPtrType
data()
const {
417 template <
typename T>
418 __host__ __device__ T&
as() {
423 template <
typename T>
424 __host__ __device__
const T&
as()
const {
429 template <
typename T>
430 __host__ __device__
inline
431 typename PtrTraits<T>::PtrType
dataAs() {
432 return reinterpret_cast<typename PtrTraits<T>::PtrType
>(
data_);
436 template <
typename T>
437 __host__ __device__
inline
438 typename PtrTraits<const T>::PtrType
dataAs()
const {
439 return reinterpret_cast<typename PtrTraits<const T>::PtrType
>(
data_);
443 __device__
inline typename TensorType::DataType
ldg()
const {
444 #if __CUDA_ARCH__ >= 350
452 template <
typename T>
454 #if __CUDA_ARCH__ >= 350
455 return __ldg(dataAs<T>());
466 friend class Tensor<typename TensorType::DataType,
468 TensorType::IsContig,
469 typename TensorType::IndexType,
474 typename TensorType::DataPtrType data)
483 typename TensorType::DataPtrType
const data_;
487 template <
typename TensorType,
489 template <
typename U>
class PtrTraits>
494 __host__ __device__
inline
495 SubTensor<TensorType, SubDim - 1, PtrTraits>
497 if (TensorType::IsContig && SubDim == 1) {
499 return SubTensor<TensorType, SubDim - 1, PtrTraits>(
502 return SubTensor<TensorType, SubDim - 1, PtrTraits>(
504 data_ + index * tensor_.getStride(TensorType::NumDim - SubDim));
510 __host__ __device__
inline
511 const SubTensor<TensorType, SubDim - 1, PtrTraits>
513 if (TensorType::IsContig && SubDim == 1) {
515 return SubTensor<TensorType, SubDim - 1, PtrTraits>(
518 return SubTensor<TensorType, SubDim - 1, PtrTraits>(
520 data_ + index * tensor_.getStride(TensorType::NumDim - SubDim));
525 __host__ __device__
typename TensorType::DataType* operator&() {
530 __host__ __device__
const typename TensorType::DataType* operator&()
const {
535 __host__ __device__
inline typename TensorType::DataPtrType
data() {
540 __host__ __device__
inline
541 const typename TensorType::DataPtrType
data()
const {
546 template <
typename T>
547 __host__ __device__ T&
as() {
552 template <
typename T>
553 __host__ __device__
const T&
as()
const {
558 template <
typename T>
559 __host__ __device__
inline
560 typename PtrTraits<T>::PtrType
dataAs() {
561 return reinterpret_cast<typename PtrTraits<T>::PtrType
>(
data_);
565 template <
typename T>
566 __host__ __device__
inline
567 typename PtrTraits<const T>::PtrType
dataAs()
const {
568 return reinterpret_cast<typename PtrTraits<const T>::PtrType
>(
data_);
572 __device__
inline typename TensorType::DataType
ldg()
const {
573 #if __CUDA_ARCH__ >= 350
581 template <
typename T>
583 #if __CUDA_ARCH__ >= 350
584 return __ldg(dataAs<T>());
592 Tensor<
typename TensorType::DataType,
594 TensorType::IsContig,
595 typename TensorType::IndexType,
602 friend class SubTensor<TensorType, SubDim + 1, PtrTraits>;
606 Tensor<
typename TensorType::DataType,
608 TensorType::IsContig,
609 typename TensorType::IndexType,
614 typename TensorType::DataPtrType
data)
623 typename TensorType::DataPtrType
const data_;
628 template <
typename T,
int Dim,
bool Contig,
629 typename IndexT,
template <
typename U>
class PtrTraits>
630 __host__ __device__
inline
636 *
this, data_)[index]);
639 template <
typename T,
int Dim,
bool Contig,
640 typename IndexT,
template <
typename U>
class PtrTraits>
641 __host__ __device__
inline
647 const_cast<TensorType&
>(*this), data_)[index]);
652 #include "Tensor-inl.cuh"
__host__ __device__ Tensor()
Default constructor.
__host__ __device__ const PtrTraits< const U >::PtrType dataAs() const
Cast to a different datatype.
__host__ __device__ Tensor< T, NewDim, Contig, IndexT, PtrTraits > downcastInner()
__host__ __device__ PtrTraits< const T >::PtrType dataAs() const
Cast to a different datatype (const)
__host__ __device__ bool isContiguousDim(int i) const
Returns true if the given dimension index has no padding.
__device__ T ldgAs() const
Use the texture cache for reads; cast as a particular type.
__host__ __device__ Tensor< T, Dim, Contig, IndexT, PtrTraits > transpose(int dim1, int dim2) const
__host__ __device__ DataPtrType data()
Returns a raw pointer to the start of our data.
__host__ __device__ PtrTraits< T >::PtrType dataAs()
Cast to a different datatype.
__host__ __device__ Tensor< T, Dim, Contig, IndexT, PtrTraits > narrowOutermost(IndexT start, IndexT size)
__host__ __device__ PtrTraits< const T >::PtrType dataAs() const
Cast to a different datatype (const)
DataPtrType data_
Raw pointer to where the tensor data begins.
__host__ __device__ Tensor< T, Dim, Contig, IndexT, PtrTraits > & operator=(Tensor< T, Dim, Contig, IndexT, PtrTraits > &t)=default
Assignment.
__host__ __device__ const TensorType::DataPtrType data() const
Returns a raw accessor to our slice (const).
__device__ TensorType::DataType ldg() const
Use the texture cache for reads.
__host__ __device__ Tensor< T, NewDim, Contig, IndexT, PtrTraits > upcastOuter()
__host__ bool canCastIndexType() const
__host__ __device__ T & as()
Cast to a different datatype.
__host__ __device__ T & as()
Cast to a different datatype.
TensorType & tensor_
The tensor we're referencing.
__host__ __device__ Tensor< T, Dim, false, IndexT, PtrTraits > narrow(int dim, IndexT start, IndexT size)
__device__ TensorType::DataType ldg() const
Use the texture cache for reads.
__host__ __device__ Tensor< U, Dim, Contig, IndexT, PtrTraits > cast()
__host__ void copyTo(Tensor< T, Dim, Contig, IndexT, PtrTraits > &t, cudaStream_t stream)
Copies ourselves into a tensor; sizes must match.
TensorType::DataPtrType const data_
The start of our sub-region.
__host__ __device__ bool isSame(const Tensor< T, OtherDim, Contig, IndexT, PtrTraits > &rhs) const
__host__ Tensor< T, Dim, Contig, NewIndexT, PtrTraits > castIndexType() const
__host__ __device__ IndexT numElements() const
__device__ T ldgAs() const
Use the texture cache for reads; cast as a particular type.
__host__ __device__ const SubTensor< TensorType, SubDim-1, PtrTraits > operator[](typename TensorType::IndexType index) const
Tensor< typename TensorType::DataType, SubDim, TensorType::IsContig, typename TensorType::IndexType, PtrTraits > view()
TensorType::DataPtrType const data_
Where our value is located.
__host__ __device__ const IndexT * strides() const
Returns the stride array.
__host__ __device__ IndexT getStride(int i) const
__host__ __device__ const IndexT * sizes() const
Returns the size array.
__host__ __device__ PtrTraits< U >::PtrType dataAs()
Cast to a different datatype.
__host__ __device__ size_t getSizeInBytes() const
__host__ __device__ DataPtrType end()
Specialization for a view of a single value (0-dimensional)
__host__ __device__ Tensor< T, NewDim, Contig, IndexT, PtrTraits > upcastInner()
__host__ __device__ Tensor< T, SubDim, Contig, IndexT, PtrTraits > view()
TensorType & tensor_
The tensor we're referencing.
__host__ __device__ DataPtrType end() const
__host__ __device__ const TensorType::DataPtrType data() const
Returns a raw accessor to our slice (const).
__host__ __device__ SubTensor< TensorType, SubDim-1, PtrTraits > operator[](typename TensorType::IndexType index)
__host__ __device__ IndexT getSize(int i) const
__host__ void copyFrom(Tensor< T, Dim, Contig, IndexT, PtrTraits > &t, cudaStream_t stream)
Copies a tensor into ourselves; sizes must match.
__host__ __device__ const T & as() const
Cast to a different datatype (const).
A SubDim-rank slice of a parent Tensor.
__host__ __device__ PtrTraits< T >::PtrType dataAs()
Cast to a different datatype.
__host__ __device__ TensorType::DataPtrType data()
Returns a raw accessor to our slice.
IndexT stride_[Dim]
Array of strides (in sizeof(T) terms) per each dimension.
__host__ __device__ bool isContiguous() const
__host__ __device__ Tensor< T, NewDim, Contig, IndexT, PtrTraits > downcastOuter()
IndexT size_[Dim]
Size per each dimension.
__host__ __device__ TensorType::DataPtrType data()
Returns a raw accessor to our slice.
__host__ __device__ detail::SubTensor< TensorType, Dim-1, PtrTraits > operator[](IndexT)
Returns a read/write view of a portion of our tensor.
__host__ __device__ bool canCastResize() const
Returns true if we can castResize() this tensor to the new type.
__host__ __device__ const T & as() const
Cast to a different datatype (const).
__host__ __device__ const DataPtrType data() const
Returns a raw pointer to the start of our data (const).
__host__ __device__ Tensor< U, Dim, Contig, IndexT, PtrTraits > castResize()