16 #include <cuda_runtime.h>
17 #include <initializer_list>
23 namespace faiss {
namespace gpu {
30 template <
typename U>
class PtrTraits>
35 template <
typename TensorType,
37 template <
typename U>
class PtrTraits>
45 typedef T* __restrict__ PtrType;
76 typename IndexT = int,
80 enum { NumDim = Dim };
82 typedef IndexT IndexType;
83 enum { IsContig = Contig };
84 typedef typename PtrTraits<T>::PtrType DataPtrType;
85 typedef Tensor<T, Dim, Contig, IndexT, PtrTraits> TensorType;
88 __host__ __device__
Tensor();
91 __host__ __device__
Tensor(Tensor<T, Dim, Contig, IndexT, PtrTraits>& t)
95 __host__ __device__
Tensor(Tensor<T, Dim, Contig, IndexT, PtrTraits>&& t)
99 __host__ __device__ Tensor<T, Dim, Contig, IndexT, PtrTraits>&
100 operator=(Tensor<T, Dim, Contig, IndexT, PtrTraits>& t) =
default;
103 __host__ __device__ Tensor<T, Dim, Contig, IndexT, PtrTraits>&
104 operator=(Tensor<T, Dim, Contig, IndexT, PtrTraits>&& t);
108 const IndexT
sizes[Dim]);
110 std::initializer_list<IndexT>
sizes);
116 const IndexT
sizes[Dim],
120 __host__
void copyFrom(Tensor<T, Dim, Contig, IndexT, PtrTraits>& t,
121 cudaStream_t stream);
124 __host__
void copyTo(Tensor<T, Dim, Contig, IndexT, PtrTraits>& t,
125 cudaStream_t stream);
129 template <
int OtherDim>
130 __host__ __device__
bool
131 isSame(
const Tensor<T, OtherDim, Contig, IndexT, PtrTraits>& rhs)
const;
135 template <
typename U>
136 __host__ __device__ Tensor<U, Dim, Contig, IndexT, PtrTraits>
cast();
139 template <
typename U>
141 const Tensor<U, Dim, Contig, IndexT, PtrTraits>
cast()
const;
149 template <
typename U>
150 __host__ __device__ Tensor<U, Dim, Contig, IndexT, PtrTraits>
castResize();
153 template <
typename U>
154 __host__ __device__
const Tensor<U, Dim, Contig, IndexT, PtrTraits>
158 template <
typename U>
162 __host__ __device__
inline DataPtrType
data() {
168 __host__ __device__
inline DataPtrType
end() {
173 __host__ __device__
inline
174 const DataPtrType
data()
const {
180 __host__ __device__
inline DataPtrType
end()
const {
185 template <
typename U>
186 __host__ __device__
inline
187 typename PtrTraits<U>::PtrType
dataAs() {
188 return reinterpret_cast<typename PtrTraits<U>::PtrType
>(
data_);
192 template <
typename U>
193 __host__ __device__
inline
194 const typename PtrTraits<const U>::PtrType
dataAs()
const {
195 return reinterpret_cast<typename PtrTraits<const U>::PtrType
>(
data_);
199 __host__ __device__
inline
204 __host__ __device__
inline
210 __host__ __device__
inline IndexT
getSize(
int i)
const {
216 __host__ __device__
inline IndexT
getStride(
int i)
const {
231 __host__ __device__
inline const IndexT*
sizes()
const {
236 __host__ __device__
inline const IndexT*
strides()
const {
251 __host__ __device__
bool isConsistentlySized(
int i)
const;
256 __host__ __device__
bool isConsistentlySized()
const;
271 template <
int NewDim>
278 template <
int NewDim>
285 template <
int NewDim>
292 template <
int NewDim>
298 template <
int SubDim>
300 view(DataPtrType at);
304 template <
int SubDim>
320 narrow(
int dim, IndexT start, IndexT size);
325 template <
int NewDim>
327 view(std::initializer_list<IndexT>
sizes);
343 template <
typename TensorType,
template <
typename U>
class PtrTraits>
347 operator=(
typename TensorType::DataType val) {
353 __host__ __device__
operator typename TensorType::DataType&() {
358 __host__ __device__
operator const typename TensorType::DataType&()
const {
363 __host__ __device__
typename TensorType::DataType* operator&() {
368 __host__ __device__
const typename TensorType::DataType* operator&()
const {
373 __host__ __device__
inline typename TensorType::DataPtrType
data() {
378 __host__ __device__
inline
379 const typename TensorType::DataPtrType
data()
const {
384 template <
typename T>
385 __host__ __device__ T&
as() {
390 template <
typename T>
391 __host__ __device__
const T&
as()
const {
396 template <
typename T>
397 __host__ __device__
inline
398 typename PtrTraits<T>::PtrType
dataAs() {
399 return reinterpret_cast<typename PtrTraits<T>::PtrType
>(
data_);
403 template <
typename T>
404 __host__ __device__
inline
405 typename PtrTraits<const T>::PtrType
dataAs()
const {
406 return reinterpret_cast<typename PtrTraits<const T>::PtrType
>(
data_);
410 __device__
inline typename TensorType::DataType
ldg()
const {
411 #if __CUDA_ARCH__ >= 350
419 template <
typename T>
421 #if __CUDA_ARCH__ >= 350
422 return __ldg(dataAs<T>());
433 friend class Tensor<typename TensorType::DataType,
435 TensorType::IsContig,
436 typename TensorType::IndexType,
441 typename TensorType::DataPtrType data)
450 typename TensorType::DataPtrType
const data_;
454 template <
typename TensorType,
456 template <
typename U>
class PtrTraits>
461 __host__ __device__
inline
462 SubTensor<TensorType, SubDim - 1, PtrTraits>
464 if (TensorType::IsContig && SubDim == 1) {
466 return SubTensor<TensorType, SubDim - 1, PtrTraits>(
469 return SubTensor<TensorType, SubDim - 1, PtrTraits>(
471 data_ + index * tensor_.getStride(TensorType::NumDim - SubDim));
477 __host__ __device__
inline
478 const SubTensor<TensorType, SubDim - 1, PtrTraits>
480 if (TensorType::IsContig && SubDim == 1) {
482 return SubTensor<TensorType, SubDim - 1, PtrTraits>(
485 return SubTensor<TensorType, SubDim - 1, PtrTraits>(
487 data_ + index * tensor_.getStride(TensorType::NumDim - SubDim));
492 __host__ __device__
typename TensorType::DataType* operator&() {
497 __host__ __device__
const typename TensorType::DataType* operator&()
const {
502 __host__ __device__
inline typename TensorType::DataPtrType
data() {
507 __host__ __device__
inline
508 const typename TensorType::DataPtrType
data()
const {
513 template <
typename T>
514 __host__ __device__ T&
as() {
519 template <
typename T>
520 __host__ __device__
const T&
as()
const {
525 template <
typename T>
526 __host__ __device__
inline
527 typename PtrTraits<T>::PtrType
dataAs() {
528 return reinterpret_cast<typename PtrTraits<T>::PtrType
>(
data_);
532 template <
typename T>
533 __host__ __device__
inline
534 typename PtrTraits<const T>::PtrType
dataAs()
const {
535 return reinterpret_cast<typename PtrTraits<const T>::PtrType
>(
data_);
539 __device__
inline typename TensorType::DataType
ldg()
const {
540 #if __CUDA_ARCH__ >= 350
548 template <
typename T>
550 #if __CUDA_ARCH__ >= 350
551 return __ldg(dataAs<T>());
559 Tensor<
typename TensorType::DataType,
561 TensorType::IsContig,
562 typename TensorType::IndexType,
569 friend class SubTensor<TensorType, SubDim + 1, PtrTraits>;
573 Tensor<
typename TensorType::DataType,
575 TensorType::IsContig,
576 typename TensorType::IndexType,
581 typename TensorType::DataPtrType
data)
590 typename TensorType::DataPtrType
const data_;
595 template <
typename T,
int Dim,
bool Contig,
596 typename IndexT,
template <
typename U>
class PtrTraits>
597 __host__ __device__
inline
603 *
this, data_)[index]);
606 template <
typename T,
int Dim,
bool Contig,
607 typename IndexT,
template <
typename U>
class PtrTraits>
608 __host__ __device__
inline
614 const_cast<TensorType&
>(*this), data_)[index]);
619 #include "Tensor-inl.cuh"
__host__ __device__ Tensor()
Default constructor.
__host__ __device__ const PtrTraits< const U >::PtrType dataAs() const
Cast to a different datatype.
__host__ __device__ Tensor< T, NewDim, Contig, IndexT, PtrTraits > downcastInner()
__host__ __device__ PtrTraits< const T >::PtrType dataAs() const
Cast to a different datatype (const)
__host__ __device__ bool isContiguousDim(int i) const
Returns true if the given dimension index has no padding.
__device__ T ldgAs() const
Use the texture cache for reads; cast as a particular type.
__host__ __device__ Tensor< T, Dim, Contig, IndexT, PtrTraits > transpose(int dim1, int dim2) const
__host__ __device__ DataPtrType data()
Returns a raw pointer to the start of our data.
__host__ __device__ PtrTraits< T >::PtrType dataAs()
Cast to a different datatype.
__host__ __device__ Tensor< T, Dim, Contig, IndexT, PtrTraits > narrowOutermost(IndexT start, IndexT size)
__host__ __device__ PtrTraits< const T >::PtrType dataAs() const
Cast to a different datatype (const)
DataPtrType data_
Raw pointer to where the tensor data begins.
__host__ __device__ Tensor< T, Dim, Contig, IndexT, PtrTraits > & operator=(Tensor< T, Dim, Contig, IndexT, PtrTraits > &t)=default
Assignment.
__host__ __device__ const TensorType::DataPtrType data() const
Returns a raw accessor to our slice (const).
__device__ TensorType::DataType ldg() const
Use the texture cache for reads.
__host__ __device__ Tensor< T, NewDim, Contig, IndexT, PtrTraits > upcastOuter()
__host__ __device__ T & as()
Cast to a different datatype.
__host__ __device__ T & as()
Cast to a different datatype.
TensorType & tensor_
The tensor we're referencing.
__host__ __device__ Tensor< T, Dim, false, IndexT, PtrTraits > narrow(int dim, IndexT start, IndexT size)
__device__ TensorType::DataType ldg() const
Use the texture cache for reads.
__host__ __device__ Tensor< U, Dim, Contig, IndexT, PtrTraits > cast()
__host__ void copyTo(Tensor< T, Dim, Contig, IndexT, PtrTraits > &t, cudaStream_t stream)
Copies ourselves into a tensor; sizes must match.
TensorType::DataPtrType const data_
The start of our sub-region.
__host__ __device__ bool isSame(const Tensor< T, OtherDim, Contig, IndexT, PtrTraits > &rhs) const
__host__ __device__ IndexT numElements() const
__device__ T ldgAs() const
Use the texture cache for reads; cast as a particular type.
__host__ __device__ const SubTensor< TensorType, SubDim-1, PtrTraits > operator[](typename TensorType::IndexType index) const
Tensor< typename TensorType::DataType, SubDim, TensorType::IsContig, typename TensorType::IndexType, PtrTraits > view()
TensorType::DataPtrType const data_
Where our value is located.
__host__ __device__ const IndexT * strides() const
Returns the stride array.
__host__ __device__ IndexT getStride(int i) const
__host__ __device__ const IndexT * sizes() const
Returns the size array.
__host__ __device__ PtrTraits< U >::PtrType dataAs()
Cast to a different datatype.
__host__ __device__ size_t getSizeInBytes() const
__host__ __device__ DataPtrType end()
Specialization for a view of a single value (0-dimensional)
__host__ __device__ Tensor< T, NewDim, Contig, IndexT, PtrTraits > upcastInner()
__host__ __device__ Tensor< T, SubDim, Contig, IndexT, PtrTraits > view()
TensorType & tensor_
The tensor we're referencing.
__host__ __device__ DataPtrType end() const
__host__ __device__ const TensorType::DataPtrType data() const
Returns a raw accessor to our slice (const).
__host__ __device__ SubTensor< TensorType, SubDim-1, PtrTraits > operator[](typename TensorType::IndexType index)
__host__ __device__ IndexT getSize(int i) const
__host__ void copyFrom(Tensor< T, Dim, Contig, IndexT, PtrTraits > &t, cudaStream_t stream)
Copies a tensor into ourselves; sizes must match.
__host__ __device__ const T & as() const
Cast to a different datatype (const).
A SubDim-rank slice of a parent Tensor.
__host__ __device__ PtrTraits< T >::PtrType dataAs()
Cast to a different datatype.
__host__ __device__ TensorType::DataPtrType data()
Returns a raw accessor to our slice.
IndexT stride_[Dim]
Array of strides (in sizeof(T) terms) per each dimension.
__host__ __device__ bool isContiguous() const
__host__ __device__ Tensor< T, NewDim, Contig, IndexT, PtrTraits > downcastOuter()
IndexT size_[Dim]
Size per each dimension.
__host__ __device__ TensorType::DataPtrType data()
Returns a raw accessor to our slice.
__host__ __device__ detail::SubTensor< TensorType, Dim-1, PtrTraits > operator[](IndexT)
Returns a read/write view of a portion of our tensor.
__host__ __device__ bool canCastResize() const
Returns true if we can castResize() this tensor to the new type.
__host__ __device__ const T & as() const
Cast to a different datatype (const).
__host__ __device__ const DataPtrType data() const
Returns a raw pointer to the start of our data (const).
__host__ __device__ Tensor< U, Dim, Contig, IndexT, PtrTraits > castResize()