/** * Copyright (c) 2015-present, Facebook, Inc. * All rights reserved. * * This source code is licensed under the CC-by-NC license found in the * LICENSE file in the root directory of this source tree. */ // Copyright 2004-present Facebook. All Rights Reserved. #pragma once #include #include #include #include /// Multi-dimensional array class for CUDA device and host usage. /// Originally from Facebook's fbcunn, since added to the Torch GPU /// library cutorch as well. namespace faiss { namespace gpu { /// Our tensor type template class PtrTraits> class Tensor; /// Type of a subspace of a tensor namespace detail { template class PtrTraits> class SubTensor; } namespace traits { template struct RestrictPtrTraits { typedef T* __restrict__ PtrType; }; template struct DefaultPtrTraits { typedef T* PtrType; }; } /** Templated multi-dimensional array that supports strided access of elements. Main access is through `operator[]`; e.g., `tensor[x][y][z]`. - `T` is the contained type (e.g., `float`) - `Dim` is the tensor rank - If `Contig` is true, then the tensor is assumed to be - contiguous, and only operations that make sense on contiguous - arrays are allowed (e.g., no transpose). Strides are still - calculated, but innermost stride is assumed to be 1. - `IndexT` is the integer type used for size/stride arrays, and for - all indexing math. Default is `int`, but for large tensors, `long` - can be used instead. - `PtrTraits` are traits applied to our data pointer (T*). By default, - this is just T*, but RestrictPtrTraits can be used to apply T* - __restrict__ for alias-free analysis. */ template class PtrTraits = traits::DefaultPtrTraits> class Tensor { public: enum { NumDim = Dim }; typedef T DataType; typedef IndexT IndexType; enum { IsContig = Contig }; typedef typename PtrTraits::PtrType DataPtrType; typedef Tensor TensorType; /// Default constructor __host__ __device__ Tensor(); /// Copy constructor __host__ __device__ Tensor(Tensor& t) = default; /// Move constructor __host__ __device__ Tensor(Tensor&& t) = default; /// Assignment __host__ __device__ Tensor& operator=(Tensor& t) = default; /// Move assignment __host__ __device__ Tensor& operator=(Tensor&& t); /// Constructor that calculates strides with no padding __host__ __device__ Tensor(DataPtrType data, const IndexT sizes[Dim]); __host__ __device__ Tensor(DataPtrType data, std::initializer_list sizes); /// Constructor that takes arbitrary size/stride arrays. /// Errors if you attempt to pass non-contiguous strides to a /// contiguous tensor. __host__ __device__ Tensor(DataPtrType data, const IndexT sizes[Dim], const IndexT strides[Dim]); /// Copies a tensor into ourselves; sizes must match __host__ void copyFrom(Tensor& t, cudaStream_t stream); /// Copies ourselves into a tensor; sizes must match __host__ void copyTo(Tensor& t, cudaStream_t stream); /// Returns true if the two tensors are of the same dimensionality, /// size and stride. template __host__ __device__ bool isSame(const Tensor& rhs) const; /// Cast to a tensor of a different type of the same size and /// stride. U and our type T must be of the same size template __host__ __device__ Tensor cast(); /// Const version of `cast` template __host__ __device__ const Tensor cast() const; /// Cast to a tensor of a different type which is potentially a /// different size than our type T. Tensor must be aligned and the /// innermost dimension must be a size that is a multiple of /// sizeof(U) / sizeof(T), and the stride of the innermost dimension /// must be contiguous. The stride of all outer dimensions must be a /// multiple of sizeof(U) / sizeof(T) as well. template __host__ __device__ Tensor castResize(); /// Const version of `castResize` template __host__ __device__ const Tensor castResize() const; /// Returns true if we can castResize() this tensor to the new type template __host__ __device__ bool canCastResize() const; /// Attempts to cast this tensor to a tensor of a different IndexT. /// Fails if size or stride entries are not representable in the new /// IndexT. template __host__ Tensor castIndexType() const; /// Returns true if we can castIndexType() this tensor to the new /// index type template __host__ bool canCastIndexType() const; /// Returns a raw pointer to the start of our data. __host__ __device__ inline DataPtrType data() { return data_; } /// Returns a raw pointer to the end of our data, assuming /// continuity __host__ __device__ inline DataPtrType end() { return data() + numElements(); } /// Returns a raw pointer to the start of our data (const). __host__ __device__ inline const DataPtrType data() const { return data_; } /// Returns a raw pointer to the end of our data, assuming /// continuity (const) __host__ __device__ inline DataPtrType end() const { return data() + numElements(); } /// Cast to a different datatype template __host__ __device__ inline typename PtrTraits::PtrType dataAs() { return reinterpret_cast::PtrType>(data_); } /// Cast to a different datatype template __host__ __device__ inline const typename PtrTraits::PtrType dataAs() const { return reinterpret_cast::PtrType>(data_); } /// Returns a read/write view of a portion of our tensor. __host__ __device__ inline detail::SubTensor operator[](IndexT); /// Returns a read/write view of a portion of our tensor (const). __host__ __device__ inline const detail::SubTensor operator[](IndexT) const; /// Returns the size of a given dimension, `[0, Dim - 1]`. No bounds /// checking. __host__ __device__ inline IndexT getSize(int i) const { return size_[i]; } /// Returns the stride of a given dimension, `[0, Dim - 1]`. No bounds /// checking. __host__ __device__ inline IndexT getStride(int i) const { return stride_[i]; } /// Returns the total number of elements contained within our data /// (product of `getSize(i)`) __host__ __device__ IndexT numElements() const; /// If we are contiguous, returns the total size in bytes of our /// data __host__ __device__ size_t getSizeInBytes() const { return (size_t) numElements() * sizeof(T); } /// Returns the size array. __host__ __device__ inline const IndexT* sizes() const { return size_; } /// Returns the stride array. __host__ __device__ inline const IndexT* strides() const { return stride_; } /// Returns true if there is no padding within the tensor and no /// re-ordering of the dimensions. /// ~~~ /// (stride(i) == size(i + 1) * stride(i + 1)) && stride(dim - 1) == 0 /// ~~~ __host__ __device__ bool isContiguous() const; /// Returns whether a given dimension has only increasing stride /// from the previous dimension. A tensor that was permuted by /// exchanging size and stride only will fail this check. /// If `i == 0` just check `size > 0`. Returns `false` if `stride` is `<= 0`. __host__ __device__ bool isConsistentlySized(int i) const; // Returns whether at each dimension `stride <= size`. // If this is not the case then iterating once over the size space will // touch the same memory locations multiple times. __host__ __device__ bool isConsistentlySized() const; /// Returns true if the given dimension index has no padding __host__ __device__ bool isContiguousDim(int i) const; /// Returns a tensor of the same dimension after transposing the two /// dimensions given. Does not actually move elements; transposition /// is made by permuting the size/stride arrays. /// If the dimensions are not valid, asserts. __host__ __device__ Tensor transpose(int dim1, int dim2) const; /// Upcast a tensor of dimension `D` to some tensor of dimension /// D' > D by padding the leading dimensions by 1 /// e.g., upcasting a 2-d tensor `[2][3]` to a 4-d tensor `[1][1][2][3]` template __host__ __device__ Tensor upcastOuter(); /// Upcast a tensor of dimension `D` to some tensor of dimension /// D' > D by padding the lowest/most varying dimensions by 1 /// e.g., upcasting a 2-d tensor `[2][3]` to a 4-d tensor `[2][3][1][1]` template __host__ __device__ Tensor upcastInner(); /// Downcast a tensor of dimension `D` to some tensor of dimension /// D' < D by collapsing the leading dimensions. asserts if there is /// padding on the leading dimensions. template __host__ __device__ Tensor downcastOuter(); /// Downcast a tensor of dimension `D` to some tensor of dimension /// D' < D by collapsing the leading dimensions. asserts if there is /// padding on the leading dimensions. template __host__ __device__ Tensor downcastInner(); /// Returns a tensor that is a view of the `SubDim`-dimensional slice /// of this tensor, starting at `at`. template __host__ __device__ Tensor view(DataPtrType at); /// Returns a tensor that is a view of the `SubDim`-dimensional slice /// of this tensor, starting where our data begins template __host__ __device__ Tensor view(); /// Returns a tensor of the same dimension that is a view of the /// original tensor with the specified dimension restricted to the /// elements in the range [start, start + size) __host__ __device__ Tensor narrowOutermost(IndexT start, IndexT size); /// Returns a tensor of the same dimension that is a view of the /// original tensor with the specified dimension restricted to the /// elements in the range [start, start + size). /// Can occur in an arbitrary dimension, and is possibly /// non-contiguous __host__ __device__ Tensor narrow(int dim, IndexT start, IndexT size); /// Returns a view of the given tensor expressed as a tensor of a /// different number of dimensions. /// Only works if we are contiguous. template __host__ __device__ Tensor view(std::initializer_list sizes); protected: /// Raw pointer to where the tensor data begins DataPtrType data_; /// Array of strides (in sizeof(T) terms) per each dimension IndexT stride_[Dim]; /// Size per each dimension IndexT size_[Dim]; }; // Utilities for checking a collection of tensors namespace detail { template bool canCastIndexType() { return true; } template bool canCastIndexType(const T& arg, const U&... args) { return arg.canCastIndexType() && canCastIndexType(args...); } } // namespace detail template bool canCastIndexType(const T&... args) { return detail::canCastIndexType(args...); } namespace detail { /// Specialization for a view of a single value (0-dimensional) template class PtrTraits> class SubTensor { public: __host__ __device__ SubTensor operator=(typename TensorType::DataType val) { *data_ = val; return *this; } // operator T& __host__ __device__ operator typename TensorType::DataType&() { return *data_; } // const operator T& returning const T& __host__ __device__ operator const typename TensorType::DataType&() const { return *data_; } // operator& returning T* __host__ __device__ typename TensorType::DataType* operator&() { return data_; } // const operator& returning const T* __host__ __device__ const typename TensorType::DataType* operator&() const { return data_; } /// Returns a raw accessor to our slice. __host__ __device__ inline typename TensorType::DataPtrType data() { return data_; } /// Returns a raw accessor to our slice (const). __host__ __device__ inline const typename TensorType::DataPtrType data() const { return data_; } /// Cast to a different datatype. template __host__ __device__ T& as() { return *dataAs(); } /// Cast to a different datatype (const). template __host__ __device__ const T& as() const { return *dataAs(); } /// Cast to a different datatype template __host__ __device__ inline typename PtrTraits::PtrType dataAs() { return reinterpret_cast::PtrType>(data_); } /// Cast to a different datatype (const) template __host__ __device__ inline typename PtrTraits::PtrType dataAs() const { return reinterpret_cast::PtrType>(data_); } /// Use the texture cache for reads __device__ inline typename TensorType::DataType ldg() const { #if __CUDA_ARCH__ >= 350 return __ldg(data_); #else return *data_; #endif } /// Use the texture cache for reads; cast as a particular type template __device__ inline T ldgAs() const { #if __CUDA_ARCH__ >= 350 return __ldg(dataAs()); #else return as(); #endif } protected: /// One dimension greater can create us friend class SubTensor; /// Our parent tensor can create us friend class Tensor; __host__ __device__ inline SubTensor( TensorType& t, typename TensorType::DataPtrType data) : tensor_(t), data_(data) { } /// The tensor we're referencing TensorType& tensor_; /// Where our value is located typename TensorType::DataPtrType const data_; }; /// A `SubDim`-rank slice of a parent Tensor template class PtrTraits> class SubTensor { public: /// Returns a view of the data located at our offset (the dimension /// `SubDim` - 1 tensor). __host__ __device__ inline SubTensor operator[](typename TensorType::IndexType index) { if (TensorType::IsContig && SubDim == 1) { // Innermost dimension is stride 1 for contiguous arrays return SubTensor( tensor_, data_ + index); } else { return SubTensor( tensor_, data_ + index * tensor_.getStride(TensorType::NumDim - SubDim)); } } /// Returns a view of the data located at our offset (the dimension /// `SubDim` - 1 tensor) (const). __host__ __device__ inline const SubTensor operator[](typename TensorType::IndexType index) const { if (TensorType::IsContig && SubDim == 1) { // Innermost dimension is stride 1 for contiguous arrays return SubTensor( tensor_, data_ + index); } else { return SubTensor( tensor_, data_ + index * tensor_.getStride(TensorType::NumDim - SubDim)); } } // operator& returning T* __host__ __device__ typename TensorType::DataType* operator&() { return data_; } // const operator& returning const T* __host__ __device__ const typename TensorType::DataType* operator&() const { return data_; } /// Returns a raw accessor to our slice. __host__ __device__ inline typename TensorType::DataPtrType data() { return data_; } /// Returns a raw accessor to our slice (const). __host__ __device__ inline const typename TensorType::DataPtrType data() const { return data_; } /// Cast to a different datatype. template __host__ __device__ T& as() { return *dataAs(); } /// Cast to a different datatype (const). template __host__ __device__ const T& as() const { return *dataAs(); } /// Cast to a different datatype template __host__ __device__ inline typename PtrTraits::PtrType dataAs() { return reinterpret_cast::PtrType>(data_); } /// Cast to a different datatype (const) template __host__ __device__ inline typename PtrTraits::PtrType dataAs() const { return reinterpret_cast::PtrType>(data_); } /// Use the texture cache for reads __device__ inline typename TensorType::DataType ldg() const { #if __CUDA_ARCH__ >= 350 return __ldg(data_); #else return *data_; #endif } /// Use the texture cache for reads; cast as a particular type template __device__ inline T ldgAs() const { #if __CUDA_ARCH__ >= 350 return __ldg(dataAs()); #else return as(); #endif } /// Returns a tensor that is a view of the SubDim-dimensional slice /// of this tensor, starting where our data begins Tensor view() { return tensor_.template view(data_); } protected: /// One dimension greater can create us friend class SubTensor; /// Our parent tensor can create us friend class Tensor; __host__ __device__ inline SubTensor( TensorType& t, typename TensorType::DataPtrType data) : tensor_(t), data_(data) { } /// The tensor we're referencing TensorType& tensor_; /// The start of our sub-region typename TensorType::DataPtrType const data_; }; } // namespace detail template class PtrTraits> __host__ __device__ inline detail::SubTensor, Dim - 1, PtrTraits> Tensor::operator[](IndexT index) { return detail::SubTensor( detail::SubTensor( *this, data_)[index]); } template class PtrTraits> __host__ __device__ inline const detail::SubTensor, Dim - 1, PtrTraits> Tensor::operator[](IndexT index) const { return detail::SubTensor( detail::SubTensor( const_cast(*this), data_)[index]); } } } // namespace #include "Tensor-inl.cuh"