mirror of
https://github.com/facebookresearch/faiss.git
synced 2025-06-03 21:54:02 +08:00
various bugfixes from github issues kmean with some frozen centroids GPU better tiling for large flat datasets default AVX for vector ops
116 lines
3.9 KiB
Plaintext
116 lines
3.9 KiB
Plaintext
/**
|
|
* Copyright (c) 2015-present, Facebook, Inc.
|
|
* All rights reserved.
|
|
*
|
|
* This source code is licensed under the BSD+Patents license found in the
|
|
* LICENSE file in the root directory of this source tree.
|
|
*/
|
|
|
|
// Copyright 2004-present Facebook. All Rights Reserved.
|
|
|
|
#pragma once
|
|
|
|
#include "Tensor.cuh"
|
|
#include "DeviceMemory.h"
|
|
#include "MemorySpace.h"
|
|
|
|
namespace faiss { namespace gpu {
|
|
|
|
template <typename T,
|
|
int Dim,
|
|
bool InnerContig = false,
|
|
typename IndexT = int,
|
|
template <typename U> class PtrTraits = traits::DefaultPtrTraits>
|
|
class DeviceTensor : public Tensor<T, Dim, InnerContig, IndexT, PtrTraits> {
|
|
public:
|
|
typedef IndexT IndexType;
|
|
typedef typename PtrTraits<T>::PtrType DataPtrType;
|
|
|
|
/// Default constructor
|
|
__host__ DeviceTensor();
|
|
|
|
/// Destructor
|
|
__host__ ~DeviceTensor();
|
|
|
|
/// Move constructor
|
|
__host__ DeviceTensor(DeviceTensor<T, Dim, InnerContig, IndexT, PtrTraits>&& t);
|
|
|
|
/// Move assignment
|
|
__host__ DeviceTensor<T, Dim, InnerContig, IndexT, PtrTraits>&
|
|
operator=(DeviceTensor<T, Dim, InnerContig, IndexT, PtrTraits>&& t);
|
|
|
|
/// Constructs a tensor of the given size, allocating memory for it
|
|
/// locally
|
|
__host__ DeviceTensor(const IndexT sizes[Dim],
|
|
MemorySpace space = MemorySpace::Device);
|
|
__host__ DeviceTensor(std::initializer_list<IndexT> sizes,
|
|
MemorySpace space = MemorySpace::Device);
|
|
|
|
/// Constructs a tensor of the given size, reserving a temporary
|
|
/// memory reservation via a memory manager.
|
|
/// The memory reservation should be ordered with respect to the
|
|
/// given stream.
|
|
__host__ DeviceTensor(DeviceMemory& m,
|
|
const IndexT sizes[Dim],
|
|
cudaStream_t stream,
|
|
MemorySpace space = MemorySpace::Device);
|
|
__host__ DeviceTensor(DeviceMemory& m,
|
|
std::initializer_list<IndexT> sizes,
|
|
cudaStream_t stream,
|
|
MemorySpace space = MemorySpace::Device);
|
|
|
|
/// Constructs a tensor of the given size and stride, referencing a
|
|
/// memory region we do not own
|
|
__host__ DeviceTensor(DataPtrType data,
|
|
const IndexT sizes[Dim],
|
|
MemorySpace space = MemorySpace::Device);
|
|
__host__ DeviceTensor(DataPtrType data,
|
|
std::initializer_list<IndexT> sizes,
|
|
MemorySpace space = MemorySpace::Device);
|
|
|
|
/// Constructs a tensor of the given size and stride, referencing a
|
|
/// memory region we do not own
|
|
__host__ DeviceTensor(DataPtrType data,
|
|
const IndexT sizes[Dim],
|
|
const IndexT strides[Dim],
|
|
MemorySpace space = MemorySpace::Device);
|
|
|
|
/// Copies a tensor into ourselves, allocating memory for it locally
|
|
__host__ DeviceTensor(Tensor<T, Dim, InnerContig, IndexT, PtrTraits>& t,
|
|
cudaStream_t stream,
|
|
MemorySpace space = MemorySpace::Device);
|
|
|
|
/// Copies a tensor into ourselves, reserving a temporary
|
|
/// memory reservation via a memory manager.
|
|
__host__ DeviceTensor(DeviceMemory& m,
|
|
Tensor<T, Dim, InnerContig, IndexT, PtrTraits>& t,
|
|
cudaStream_t stream,
|
|
MemorySpace space = MemorySpace::Device);
|
|
|
|
/// Call to zero out memory
|
|
__host__ DeviceTensor<T, Dim, InnerContig, IndexT, PtrTraits>&
|
|
zero(cudaStream_t stream);
|
|
|
|
private:
|
|
enum AllocState {
|
|
/// This tensor itself owns the memory, which must be freed via
|
|
/// cudaFree
|
|
Owner,
|
|
|
|
/// This tensor itself is not an owner of the memory; there is
|
|
/// nothing to free
|
|
NotOwner,
|
|
|
|
/// This tensor has the memory via a temporary memory reservation
|
|
Reservation
|
|
};
|
|
|
|
AllocState state_;
|
|
MemorySpace space_;
|
|
DeviceMemoryReservation reservation_;
|
|
};
|
|
|
|
} } // namespace
|
|
|
|
#include "DeviceTensor-inl.cuh"
|