15 #include <cuda_runtime.h>
16 #include <initializer_list>
22 namespace faiss {
namespace gpu {
29 template <
typename U>
class PtrTraits>
34 template <
typename TensorType,
36 template <
typename U>
class PtrTraits>
44 typedef T* __restrict__ PtrType;
74 bool InnerContig =
false,
75 typename IndexT = int,
79 enum { NumDim = Dim };
81 typedef IndexT IndexType;
82 enum { IsInnerContig = InnerContig };
83 typedef typename PtrTraits<T>::PtrType DataPtrType;
84 typedef Tensor<T, Dim, InnerContig, IndexT, PtrTraits> TensorType;
87 __host__ __device__
Tensor();
90 __host__ __device__
Tensor(Tensor<T, Dim, InnerContig, IndexT, PtrTraits>& t);
93 __host__ __device__
Tensor(Tensor<T, Dim, InnerContig, IndexT, PtrTraits>&& t);
96 __host__ __device__ Tensor<T, Dim, InnerContig, IndexT, PtrTraits>&
97 operator=(Tensor<T, Dim, InnerContig, IndexT, PtrTraits>& t);
100 __host__ __device__ Tensor<T, Dim, InnerContig, IndexT, PtrTraits>&
101 operator=(Tensor<T, Dim, InnerContig, IndexT, PtrTraits>&& t);
105 const IndexT
sizes[Dim]);
107 std::initializer_list<IndexT>
sizes);
113 const IndexT
sizes[Dim],
117 __host__
void copyFrom(Tensor<T, Dim, InnerContig, IndexT, PtrTraits>& t,
118 cudaStream_t stream);
121 __host__
void copyTo(Tensor<T, Dim, InnerContig, IndexT, PtrTraits>& t,
122 cudaStream_t stream);
126 template <
typename OtherT,
int OtherDim>
127 __host__ __device__
bool
128 isSame(
const Tensor<OtherT, OtherDim, InnerContig, IndexT, PtrTraits>& rhs)
const;
131 template <
typename OtherT,
int OtherDim>
132 __host__ __device__
bool
133 isSameSize(
const Tensor<OtherT, OtherDim, InnerContig, IndexT, PtrTraits>& rhs)
const;
137 template <
typename U>
138 __host__ __device__ Tensor<U, Dim, InnerContig, IndexT, PtrTraits>
cast();
141 template <
typename U>
143 const Tensor<U, Dim, InnerContig, IndexT, PtrTraits>
cast()
const;
151 template <
typename U>
152 __host__ __device__ Tensor<U, Dim, InnerContig, IndexT, PtrTraits>
castResize();
155 template <
typename U>
156 __host__ __device__
const Tensor<U, Dim, InnerContig, IndexT, PtrTraits>
160 template <
typename U>
166 template <
typename NewIndexT>
167 __host__ Tensor<T, Dim, InnerContig, NewIndexT, PtrTraits>
172 template <
typename NewIndexT>
176 __host__ __device__
inline DataPtrType
data() {
182 __host__ __device__
inline DataPtrType
end() {
187 __host__ __device__
inline
188 const DataPtrType
data()
const {
194 __host__ __device__
inline DataPtrType
end()
const {
199 template <
typename U>
200 __host__ __device__
inline
201 typename PtrTraits<U>::PtrType
dataAs() {
202 return reinterpret_cast<typename PtrTraits<U>::PtrType
>(
data_);
206 template <
typename U>
207 __host__ __device__
inline
208 const typename PtrTraits<const U>::PtrType
dataAs()
const {
209 return reinterpret_cast<typename PtrTraits<const U>::PtrType
>(
data_);
213 __host__ __device__
inline
218 __host__ __device__
inline
224 __host__ __device__
inline IndexT
getSize(
int i)
const {
230 __host__ __device__
inline IndexT
getStride(
int i)
const {
245 __host__ __device__
inline const IndexT*
sizes()
const {
250 __host__ __device__
inline const IndexT*
strides()
const {
265 __host__ __device__
bool isConsistentlySized(
int i)
const;
270 __host__ __device__
bool isConsistentlySized()
const;
285 template <
int NewDim>
292 template <
int NewDim>
299 template <
int NewDim>
306 template <
int NewDim>
312 template <
int SubDim>
314 view(DataPtrType at);
318 template <
int SubDim>
333 narrow(
int dim, IndexT start, IndexT size);
338 template <
int NewDim>
340 view(std::initializer_list<IndexT>
sizes);
356 template <
typename IndexType>
357 bool canUseIndexType() {
361 template <
typename IndexType,
typename T,
typename... U>
362 bool canUseIndexType(
const T& arg,
const U&... args) {
363 return arg.canUseIndexType<IndexType>() &&
364 canUseIndexType(args...);
369 template <
typename IndexType,
typename... T>
370 bool canUseIndexType(
const T&... args) {
371 return detail::canUseIndexType(args...);
377 template <
typename TensorType,
template <
typename U>
class PtrTraits>
381 operator=(
typename TensorType::DataType val) {
387 __host__ __device__
operator typename TensorType::DataType&() {
392 __host__ __device__
operator const typename TensorType::DataType&()
const {
397 __host__ __device__
typename TensorType::DataType* operator&() {
402 __host__ __device__
const typename TensorType::DataType* operator&()
const {
407 __host__ __device__
inline typename TensorType::DataPtrType
data() {
412 __host__ __device__
inline
413 const typename TensorType::DataPtrType
data()
const {
418 template <
typename T>
419 __host__ __device__ T&
as() {
424 template <
typename T>
425 __host__ __device__
const T&
as()
const {
430 template <
typename T>
431 __host__ __device__
inline
432 typename PtrTraits<T>::PtrType
dataAs() {
433 return reinterpret_cast<typename PtrTraits<T>::PtrType
>(
data_);
437 template <
typename T>
438 __host__ __device__
inline
439 typename PtrTraits<const T>::PtrType
dataAs()
const {
440 return reinterpret_cast<typename PtrTraits<const T>::PtrType
>(
data_);
444 __device__
inline typename TensorType::DataType
ldg()
const {
445 #if __CUDA_ARCH__ >= 350
453 template <
typename T>
455 #if __CUDA_ARCH__ >= 350
456 return __ldg(dataAs<T>());
467 friend class Tensor<typename TensorType::DataType,
469 TensorType::IsInnerContig,
470 typename TensorType::IndexType,
475 typename TensorType::DataPtrType data)
484 typename TensorType::DataPtrType
const data_;
488 template <
typename TensorType,
490 template <
typename U>
class PtrTraits>
495 __host__ __device__
inline
496 SubTensor<TensorType, SubDim - 1, PtrTraits>
498 if (TensorType::IsInnerContig && SubDim == 1) {
500 return SubTensor<TensorType, SubDim - 1, PtrTraits>(
503 return SubTensor<TensorType, SubDim - 1, PtrTraits>(
505 data_ + index * tensor_.getStride(TensorType::NumDim - SubDim));
511 __host__ __device__
inline
512 const SubTensor<TensorType, SubDim - 1, PtrTraits>
514 if (TensorType::IsInnerContig && SubDim == 1) {
516 return SubTensor<TensorType, SubDim - 1, PtrTraits>(
519 return SubTensor<TensorType, SubDim - 1, PtrTraits>(
521 data_ + index * tensor_.getStride(TensorType::NumDim - SubDim));
526 __host__ __device__
typename TensorType::DataType* operator&() {
531 __host__ __device__
const typename TensorType::DataType* operator&()
const {
536 __host__ __device__
inline typename TensorType::DataPtrType
data() {
541 __host__ __device__
inline
542 const typename TensorType::DataPtrType
data()
const {
547 template <
typename T>
548 __host__ __device__ T&
as() {
553 template <
typename T>
554 __host__ __device__
const T&
as()
const {
559 template <
typename T>
560 __host__ __device__
inline
561 typename PtrTraits<T>::PtrType
dataAs() {
562 return reinterpret_cast<typename PtrTraits<T>::PtrType
>(
data_);
566 template <
typename T>
567 __host__ __device__
inline
568 typename PtrTraits<const T>::PtrType
dataAs()
const {
569 return reinterpret_cast<typename PtrTraits<const T>::PtrType
>(
data_);
573 __device__
inline typename TensorType::DataType
ldg()
const {
574 #if __CUDA_ARCH__ >= 350
582 template <
typename T>
584 #if __CUDA_ARCH__ >= 350
585 return __ldg(dataAs<T>());
593 Tensor<
typename TensorType::DataType,
595 TensorType::IsInnerContig,
596 typename TensorType::IndexType,
603 friend class SubTensor<TensorType, SubDim + 1, PtrTraits>;
607 Tensor<
typename TensorType::DataType,
609 TensorType::IsInnerContig,
610 typename TensorType::IndexType,
615 typename TensorType::DataPtrType
data)
624 typename TensorType::DataPtrType
const data_;
629 template <
typename T,
int Dim,
bool InnerContig,
630 typename IndexT,
template <
typename U>
class PtrTraits>
631 __host__ __device__
inline
637 *
this, data_)[index]);
640 template <
typename T,
int Dim,
bool InnerContig,
641 typename IndexT,
template <
typename U>
class PtrTraits>
642 __host__ __device__
inline
648 const_cast<TensorType&
>(*this), data_)[index]);
653 #include "Tensor-inl.cuh"
__host__ __device__ Tensor< T, NewDim, InnerContig, IndexT, PtrTraits > upcastOuter()
__host__ __device__ detail::SubTensor< TensorType, Dim-1, PtrTraits > operator[](IndexT)
Returns a read/write view of a portion of our tensor.
__host__ Tensor< T, Dim, InnerContig, NewIndexT, PtrTraits > castIndexType() const
Tensor< typename TensorType::DataType, SubDim, TensorType::IsInnerContig, typename TensorType::IndexType, PtrTraits > view()
__host__ __device__ bool isContiguousDim(int i) const
Returns true if the given dimension index has no padding.
__host__ __device__ Tensor< U, Dim, InnerContig, IndexT, PtrTraits > cast()
__host__ __device__ size_t numElements() const
__host__ __device__ Tensor< T, NewDim, InnerContig, IndexT, PtrTraits > downcastOuter()
__host__ __device__ PtrTraits< const T >::PtrType dataAs() const
Cast to a different datatype (const)
__host__ __device__ const PtrTraits< const U >::PtrType dataAs() const
Cast to a different datatype.
__host__ __device__ PtrTraits< U >::PtrType dataAs()
Cast to a different datatype.
__device__ T ldgAs() const
Use the texture cache for reads; cast as a particular type.
__host__ __device__ bool canCastResize() const
Returns true if we can castResize() this tensor to the new type.
DataPtrType data_
Raw pointer to where the tensor data begins.
__host__ __device__ PtrTraits< T >::PtrType dataAs()
Cast to a different datatype.
__host__ __device__ Tensor()
Default constructor.
__host__ __device__ DataPtrType end() const
__host__ __device__ PtrTraits< const T >::PtrType dataAs() const
Cast to a different datatype (const)
__host__ __device__ Tensor< T, NewDim, InnerContig, IndexT, PtrTraits > upcastInner()
__host__ __device__ const TensorType::DataPtrType data() const
Returns a raw accessor to our slice (const).
__device__ TensorType::DataType ldg() const
Use the texture cache for reads.
__host__ __device__ Tensor< T, Dim, InnerContig, IndexT, PtrTraits > narrowOutermost(IndexT start, IndexT size)
IndexT stride_[Dim]
Array of strides (in sizeof(T) terms) per each dimension.
__host__ __device__ T & as()
Cast to a different datatype.
__host__ __device__ T & as()
Cast to a different datatype.
TensorType & tensor_
The tensor we're referencing.
__host__ __device__ bool isContiguous() const
__host__ __device__ const DataPtrType data() const
Returns a raw pointer to the start of our data (const).
__device__ TensorType::DataType ldg() const
Use the texture cache for reads.
__host__ __device__ const IndexT * sizes() const
Returns the size array.
TensorType::DataPtrType const data_
The start of our sub-region.
__host__ void copyFrom(Tensor< T, Dim, InnerContig, IndexT, PtrTraits > &t, cudaStream_t stream)
Copies a tensor into ourselves; sizes must match.
IndexT size_[Dim]
Size per each dimension.
__host__ __device__ Tensor< T, Dim, InnerContig, IndexT, PtrTraits > & operator=(Tensor< T, Dim, InnerContig, IndexT, PtrTraits > &t)
Assignment.
__device__ T ldgAs() const
Use the texture cache for reads; cast as a particular type.
__host__ __device__ const SubTensor< TensorType, SubDim-1, PtrTraits > operator[](typename TensorType::IndexType index) const
__host__ __device__ const IndexT * strides() const
Returns the stride array.
__host__ __device__ IndexT getSize(int i) const
__host__ __device__ bool isSameSize(const Tensor< OtherT, OtherDim, InnerContig, IndexT, PtrTraits > &rhs) const
Returns true if the two tensors are of the same dimensionality and size.
TensorType::DataPtrType const data_
Where our value is located.
__host__ __device__ Tensor< T, NewDim, InnerContig, IndexT, PtrTraits > downcastInner()
__host__ __device__ Tensor< T, Dim, InnerContig, IndexT, PtrTraits > narrow(int dim, IndexT start, IndexT size)
__host__ __device__ DataPtrType data()
Returns a raw pointer to the start of our data.
__host__ void copyTo(Tensor< T, Dim, InnerContig, IndexT, PtrTraits > &t, cudaStream_t stream)
Copies ourselves into a tensor; sizes must match.
__host__ bool canUseIndexType() const
__host__ __device__ Tensor< T, Dim, InnerContig, IndexT, PtrTraits > transpose(int dim1, int dim2) const
__host__ __device__ IndexT getStride(int i) const
Specialization for a view of a single value (0-dimensional)
__host__ __device__ DataPtrType end()
TensorType & tensor_
The tensor we're referencing.
__host__ __device__ const TensorType::DataPtrType data() const
Returns a raw accessor to our slice (const).
__host__ __device__ SubTensor< TensorType, SubDim-1, PtrTraits > operator[](typename TensorType::IndexType index)
__host__ __device__ const T & as() const
Cast to a different datatype (const).
A SubDim-rank slice of a parent Tensor.
__host__ __device__ PtrTraits< T >::PtrType dataAs()
Cast to a different datatype.
__host__ __device__ TensorType::DataPtrType data()
Returns a raw accessor to our slice.
__host__ __device__ Tensor< U, Dim, InnerContig, IndexT, PtrTraits > castResize()
__host__ __device__ TensorType::DataPtrType data()
Returns a raw accessor to our slice.
__host__ __device__ const T & as() const
Cast to a different datatype (const).
__host__ __device__ size_t getSizeInBytes() const
__host__ __device__ Tensor< T, SubDim, InnerContig, IndexT, PtrTraits > view()
__host__ __device__ bool isSame(const Tensor< OtherT, OtherDim, InnerContig, IndexT, PtrTraits > &rhs) const