15 #include <cuda_runtime.h>
16 #include <initializer_list>
22 namespace faiss {
namespace gpu {
29 template <
typename U>
class PtrTraits>
34 template <
typename TensorType,
36 template <
typename U>
class PtrTraits>
44 typedef T* __restrict__ PtrType;
74 bool InnerContig =
false,
75 typename IndexT = int,
79 enum { NumDim = Dim };
81 typedef IndexT IndexType;
82 enum { IsInnerContig = InnerContig };
83 typedef typename PtrTraits<T>::PtrType DataPtrType;
84 typedef Tensor<T, Dim, InnerContig, IndexT, PtrTraits> TensorType;
87 __host__ __device__
Tensor();
90 __host__ __device__
Tensor(Tensor<T, Dim, InnerContig, IndexT, PtrTraits>& t)
94 __host__ __device__
Tensor(Tensor<T, Dim, InnerContig, IndexT, PtrTraits>&& t)
98 __host__ __device__ Tensor<T, Dim, InnerContig, IndexT, PtrTraits>&
99 operator=(Tensor<T, Dim, InnerContig, IndexT, PtrTraits>& t) =
default;
102 __host__ __device__ Tensor<T, Dim, InnerContig, IndexT, PtrTraits>&
103 operator=(Tensor<T, Dim, InnerContig, IndexT, PtrTraits>&& t);
107 const IndexT
sizes[Dim]);
109 std::initializer_list<IndexT>
sizes);
115 const IndexT
sizes[Dim],
119 __host__
void copyFrom(Tensor<T, Dim, InnerContig, IndexT, PtrTraits>& t,
120 cudaStream_t stream);
123 __host__
void copyTo(Tensor<T, Dim, InnerContig, IndexT, PtrTraits>& t,
124 cudaStream_t stream);
128 template <
typename OtherT,
int OtherDim>
129 __host__ __device__
bool
130 isSame(
const Tensor<OtherT, OtherDim, InnerContig, IndexT, PtrTraits>& rhs)
const;
133 template <
typename OtherT,
int OtherDim>
134 __host__ __device__
bool
135 isSameSize(
const Tensor<OtherT, OtherDim, InnerContig, IndexT, PtrTraits>& rhs)
const;
139 template <
typename U>
140 __host__ __device__ Tensor<U, Dim, InnerContig, IndexT, PtrTraits>
cast();
143 template <
typename U>
145 const Tensor<U, Dim, InnerContig, IndexT, PtrTraits>
cast()
const;
153 template <
typename U>
154 __host__ __device__ Tensor<U, Dim, InnerContig, IndexT, PtrTraits>
castResize();
157 template <
typename U>
158 __host__ __device__
const Tensor<U, Dim, InnerContig, IndexT, PtrTraits>
162 template <
typename U>
168 template <
typename NewIndexT>
169 __host__ Tensor<T, Dim, InnerContig, NewIndexT, PtrTraits>
174 template <
typename NewIndexT>
178 __host__ __device__
inline DataPtrType
data() {
184 __host__ __device__
inline DataPtrType
end() {
189 __host__ __device__
inline
190 const DataPtrType
data()
const {
196 __host__ __device__
inline DataPtrType
end()
const {
201 template <
typename U>
202 __host__ __device__
inline
203 typename PtrTraits<U>::PtrType
dataAs() {
204 return reinterpret_cast<typename PtrTraits<U>::PtrType
>(
data_);
208 template <
typename U>
209 __host__ __device__
inline
210 const typename PtrTraits<const U>::PtrType
dataAs()
const {
211 return reinterpret_cast<typename PtrTraits<const U>::PtrType
>(
data_);
215 __host__ __device__
inline
220 __host__ __device__
inline
226 __host__ __device__
inline IndexT
getSize(
int i)
const {
232 __host__ __device__
inline IndexT
getStride(
int i)
const {
247 __host__ __device__
inline const IndexT*
sizes()
const {
252 __host__ __device__
inline const IndexT*
strides()
const {
267 __host__ __device__
bool isConsistentlySized(
int i)
const;
272 __host__ __device__
bool isConsistentlySized()
const;
287 template <
int NewDim>
294 template <
int NewDim>
301 template <
int NewDim>
308 template <
int NewDim>
314 template <
int SubDim>
316 view(DataPtrType at);
320 template <
int SubDim>
335 narrow(
int dim, IndexT start, IndexT size);
340 template <
int NewDim>
342 view(std::initializer_list<IndexT>
sizes);
358 template <
typename IndexType>
359 bool canUseIndexType() {
363 template <
typename IndexType,
typename T,
typename... U>
364 bool canUseIndexType(
const T& arg,
const U&... args) {
365 return arg.canUseIndexType<IndexType>() &&
366 canUseIndexType(args...);
371 template <
typename IndexType,
typename... T>
372 bool canUseIndexType(
const T&... args) {
373 return detail::canUseIndexType(args...);
379 template <
typename TensorType,
template <
typename U>
class PtrTraits>
383 operator=(
typename TensorType::DataType val) {
389 __host__ __device__
operator typename TensorType::DataType&() {
394 __host__ __device__
operator const typename TensorType::DataType&()
const {
399 __host__ __device__
typename TensorType::DataType* operator&() {
404 __host__ __device__
const typename TensorType::DataType* operator&()
const {
409 __host__ __device__
inline typename TensorType::DataPtrType
data() {
414 __host__ __device__
inline
415 const typename TensorType::DataPtrType
data()
const {
420 template <
typename T>
421 __host__ __device__ T&
as() {
426 template <
typename T>
427 __host__ __device__
const T&
as()
const {
432 template <
typename T>
433 __host__ __device__
inline
434 typename PtrTraits<T>::PtrType
dataAs() {
435 return reinterpret_cast<typename PtrTraits<T>::PtrType
>(
data_);
439 template <
typename T>
440 __host__ __device__
inline
441 typename PtrTraits<const T>::PtrType
dataAs()
const {
442 return reinterpret_cast<typename PtrTraits<const T>::PtrType
>(
data_);
446 __device__
inline typename TensorType::DataType
ldg()
const {
447 #if __CUDA_ARCH__ >= 350
455 template <
typename T>
457 #if __CUDA_ARCH__ >= 350
458 return __ldg(dataAs<T>());
469 friend class Tensor<typename TensorType::DataType,
471 TensorType::IsInnerContig,
472 typename TensorType::IndexType,
477 typename TensorType::DataPtrType data)
486 typename TensorType::DataPtrType
const data_;
490 template <
typename TensorType,
492 template <
typename U>
class PtrTraits>
497 __host__ __device__
inline
498 SubTensor<TensorType, SubDim - 1, PtrTraits>
500 if (TensorType::IsInnerContig && SubDim == 1) {
502 return SubTensor<TensorType, SubDim - 1, PtrTraits>(
505 return SubTensor<TensorType, SubDim - 1, PtrTraits>(
507 data_ + index * tensor_.getStride(TensorType::NumDim - SubDim));
513 __host__ __device__
inline
514 const SubTensor<TensorType, SubDim - 1, PtrTraits>
516 if (TensorType::IsInnerContig && SubDim == 1) {
518 return SubTensor<TensorType, SubDim - 1, PtrTraits>(
521 return SubTensor<TensorType, SubDim - 1, PtrTraits>(
523 data_ + index * tensor_.getStride(TensorType::NumDim - SubDim));
528 __host__ __device__
typename TensorType::DataType* operator&() {
533 __host__ __device__
const typename TensorType::DataType* operator&()
const {
538 __host__ __device__
inline typename TensorType::DataPtrType
data() {
543 __host__ __device__
inline
544 const typename TensorType::DataPtrType
data()
const {
549 template <
typename T>
550 __host__ __device__ T&
as() {
555 template <
typename T>
556 __host__ __device__
const T&
as()
const {
561 template <
typename T>
562 __host__ __device__
inline
563 typename PtrTraits<T>::PtrType
dataAs() {
564 return reinterpret_cast<typename PtrTraits<T>::PtrType
>(
data_);
568 template <
typename T>
569 __host__ __device__
inline
570 typename PtrTraits<const T>::PtrType
dataAs()
const {
571 return reinterpret_cast<typename PtrTraits<const T>::PtrType
>(
data_);
575 __device__
inline typename TensorType::DataType
ldg()
const {
576 #if __CUDA_ARCH__ >= 350
584 template <
typename T>
586 #if __CUDA_ARCH__ >= 350
587 return __ldg(dataAs<T>());
595 Tensor<
typename TensorType::DataType,
597 TensorType::IsInnerContig,
598 typename TensorType::IndexType,
605 friend class SubTensor<TensorType, SubDim + 1, PtrTraits>;
609 Tensor<
typename TensorType::DataType,
611 TensorType::IsInnerContig,
612 typename TensorType::IndexType,
617 typename TensorType::DataPtrType
data)
626 typename TensorType::DataPtrType
const data_;
631 template <
typename T,
int Dim,
bool InnerContig,
632 typename IndexT,
template <
typename U>
class PtrTraits>
633 __host__ __device__
inline
639 *
this, data_)[index]);
642 template <
typename T,
int Dim,
bool InnerContig,
643 typename IndexT,
template <
typename U>
class PtrTraits>
644 __host__ __device__
inline
650 const_cast<TensorType&
>(*this), data_)[index]);
655 #include "Tensor-inl.cuh"
__host__ __device__ Tensor< T, NewDim, InnerContig, IndexT, PtrTraits > upcastOuter()
__host__ __device__ detail::SubTensor< TensorType, Dim-1, PtrTraits > operator[](IndexT)
Returns a read/write view of a portion of our tensor.
__host__ Tensor< T, Dim, InnerContig, NewIndexT, PtrTraits > castIndexType() const
Tensor< typename TensorType::DataType, SubDim, TensorType::IsInnerContig, typename TensorType::IndexType, PtrTraits > view()
__host__ __device__ bool isContiguousDim(int i) const
Returns true if the given dimension index has no padding.
__host__ __device__ Tensor< U, Dim, InnerContig, IndexT, PtrTraits > cast()
__host__ __device__ size_t numElements() const
__host__ __device__ Tensor< T, NewDim, InnerContig, IndexT, PtrTraits > downcastOuter()
__host__ __device__ PtrTraits< const T >::PtrType dataAs() const
Cast to a different datatype (const)
__host__ __device__ const PtrTraits< const U >::PtrType dataAs() const
Cast to a different datatype.
__host__ __device__ PtrTraits< U >::PtrType dataAs()
Cast to a different datatype.
__device__ T ldgAs() const
Use the texture cache for reads; cast as a particular type.
__host__ __device__ bool canCastResize() const
Returns true if we can castResize() this tensor to the new type.
DataPtrType data_
Raw pointer to where the tensor data begins.
__host__ __device__ PtrTraits< T >::PtrType dataAs()
Cast to a different datatype.
__host__ __device__ Tensor()
Default constructor.
__host__ __device__ DataPtrType end() const
__host__ __device__ PtrTraits< const T >::PtrType dataAs() const
Cast to a different datatype (const)
__host__ __device__ Tensor< T, NewDim, InnerContig, IndexT, PtrTraits > upcastInner()
__host__ __device__ const TensorType::DataPtrType data() const
Returns a raw accessor to our slice (const).
__device__ TensorType::DataType ldg() const
Use the texture cache for reads.
__host__ __device__ Tensor< T, Dim, InnerContig, IndexT, PtrTraits > narrowOutermost(IndexT start, IndexT size)
IndexT stride_[Dim]
Array of strides (in sizeof(T) terms) per each dimension.
__host__ __device__ T & as()
Cast to a different datatype.
__host__ __device__ T & as()
Cast to a different datatype.
TensorType & tensor_
The tensor we're referencing.
__host__ __device__ bool isContiguous() const
__host__ __device__ const DataPtrType data() const
Returns a raw pointer to the start of our data (const).
__device__ TensorType::DataType ldg() const
Use the texture cache for reads.
__host__ __device__ const IndexT * sizes() const
Returns the size array.
TensorType::DataPtrType const data_
The start of our sub-region.
__host__ void copyFrom(Tensor< T, Dim, InnerContig, IndexT, PtrTraits > &t, cudaStream_t stream)
Copies a tensor into ourselves; sizes must match.
IndexT size_[Dim]
Size per each dimension.
__device__ T ldgAs() const
Use the texture cache for reads; cast as a particular type.
__host__ __device__ const SubTensor< TensorType, SubDim-1, PtrTraits > operator[](typename TensorType::IndexType index) const
__host__ __device__ const IndexT * strides() const
Returns the stride array.
__host__ __device__ IndexT getSize(int i) const
__host__ __device__ bool isSameSize(const Tensor< OtherT, OtherDim, InnerContig, IndexT, PtrTraits > &rhs) const
Returns true if the two tensors are of the same dimensionality and size.
TensorType::DataPtrType const data_
Where our value is located.
__host__ __device__ Tensor< T, NewDim, InnerContig, IndexT, PtrTraits > downcastInner()
__host__ __device__ Tensor< T, Dim, InnerContig, IndexT, PtrTraits > narrow(int dim, IndexT start, IndexT size)
__host__ __device__ DataPtrType data()
Returns a raw pointer to the start of our data.
__host__ void copyTo(Tensor< T, Dim, InnerContig, IndexT, PtrTraits > &t, cudaStream_t stream)
Copies ourselves into a tensor; sizes must match.
__host__ bool canUseIndexType() const
__host__ __device__ Tensor< T, Dim, InnerContig, IndexT, PtrTraits > transpose(int dim1, int dim2) const
__host__ __device__ IndexT getStride(int i) const
Specialization for a view of a single value (0-dimensional)
__host__ __device__ DataPtrType end()
TensorType & tensor_
The tensor we're referencing.
__host__ __device__ const TensorType::DataPtrType data() const
Returns a raw accessor to our slice (const).
__host__ __device__ Tensor< T, Dim, InnerContig, IndexT, PtrTraits > & operator=(Tensor< T, Dim, InnerContig, IndexT, PtrTraits > &t)=default
Assignment.
__host__ __device__ SubTensor< TensorType, SubDim-1, PtrTraits > operator[](typename TensorType::IndexType index)
__host__ __device__ const T & as() const
Cast to a different datatype (const).
A SubDim-rank slice of a parent Tensor.
__host__ __device__ PtrTraits< T >::PtrType dataAs()
Cast to a different datatype.
__host__ __device__ TensorType::DataPtrType data()
Returns a raw accessor to our slice.
__host__ __device__ Tensor< U, Dim, InnerContig, IndexT, PtrTraits > castResize()
__host__ __device__ TensorType::DataPtrType data()
Returns a raw accessor to our slice.
__host__ __device__ const T & as() const
Cast to a different datatype (const).
__host__ __device__ size_t getSizeInBytes() const
__host__ __device__ Tensor< T, SubDim, InnerContig, IndexT, PtrTraits > view()
__host__ __device__ bool isSame(const Tensor< OtherT, OtherDim, InnerContig, IndexT, PtrTraits > &rhs) const