faiss/gpu/impl/VectorResidual.cu
Lucas Hosseini 36ddba9196
Facebook sync (2019-09-10) (#943)
* Facebook sync (2019-09-10)

* Fix depends Makefile target.

* Add faiss symlink for new include directives.

* Fix missing header.

* Fix tests.

* Fix Makefile.

* Update depend.

* Fix include directives spacing.
2019-09-20 18:59:10 +02:00

145 lines
4.7 KiB
Plaintext

/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
#include <faiss/gpu/impl/VectorResidual.cuh>
#include <faiss/impl/FaissAssert.h>
#include <faiss/gpu/utils/ConversionOperators.cuh>
#include <faiss/gpu/utils/DeviceUtils.h>
#include <faiss/gpu/utils/Tensor.cuh>
#include <faiss/gpu/utils/StaticUtils.h>
#include <math_constants.h> // in CUDA SDK, for CUDART_NAN_F
namespace faiss { namespace gpu {
template <typename CentroidT, bool LargeDim>
__global__ void calcResidual(Tensor<float, 2, true> vecs,
Tensor<CentroidT, 2, true> centroids,
Tensor<int, 1, true> vecToCentroid,
Tensor<float, 2, true> residuals) {
auto vec = vecs[blockIdx.x];
auto residual = residuals[blockIdx.x];
int centroidId = vecToCentroid[blockIdx.x];
// Vector could be invalid (containing NaNs), so -1 was the
// classified centroid
if (centroidId == -1) {
if (LargeDim) {
for (int i = threadIdx.x; i < vecs.getSize(1); i += blockDim.x) {
residual[i] = CUDART_NAN_F;
}
} else {
residual[threadIdx.x] = CUDART_NAN_F;
}
return;
}
auto centroid = centroids[centroidId];
if (LargeDim) {
for (int i = threadIdx.x; i < vecs.getSize(1); i += blockDim.x) {
residual[i] = vec[i] - ConvertTo<float>::to(centroid[i]);
}
} else {
residual[threadIdx.x] = vec[threadIdx.x] -
ConvertTo<float>::to(centroid[threadIdx.x]);
}
}
template <typename T>
__global__ void gatherReconstruct(Tensor<int, 1, true> listIds,
Tensor<T, 2, true> vecs,
Tensor<float, 2, true> out) {
auto id = listIds[blockIdx.x];
auto vec = vecs[id];
auto outVec = out[blockIdx.x];
Convert<T, float> conv;
for (int i = threadIdx.x; i < vecs.getSize(1); i += blockDim.x) {
outVec[i] = id == -1 ? 0.0f : conv(vec[i]);
}
}
template <typename CentroidT>
void calcResidual(Tensor<float, 2, true>& vecs,
Tensor<CentroidT, 2, true>& centroids,
Tensor<int, 1, true>& vecToCentroid,
Tensor<float, 2, true>& residuals,
cudaStream_t stream) {
FAISS_ASSERT(vecs.getSize(1) == centroids.getSize(1));
FAISS_ASSERT(vecs.getSize(1) == residuals.getSize(1));
FAISS_ASSERT(vecs.getSize(0) == vecToCentroid.getSize(0));
FAISS_ASSERT(vecs.getSize(0) == residuals.getSize(0));
dim3 grid(vecs.getSize(0));
int maxThreads = getMaxThreadsCurrentDevice();
bool largeDim = vecs.getSize(1) > maxThreads;
dim3 block(std::min(vecs.getSize(1), maxThreads));
if (largeDim) {
calcResidual<CentroidT, true><<<grid, block, 0, stream>>>(
vecs, centroids, vecToCentroid, residuals);
} else {
calcResidual<CentroidT, false><<<grid, block, 0, stream>>>(
vecs, centroids, vecToCentroid, residuals);
}
CUDA_TEST_ERROR();
}
template <typename T>
void gatherReconstruct(Tensor<int, 1, true>& listIds,
Tensor<T, 2, true>& vecs,
Tensor<float, 2, true>& out,
cudaStream_t stream) {
FAISS_ASSERT(listIds.getSize(0) == out.getSize(0));
FAISS_ASSERT(vecs.getSize(1) == out.getSize(1));
dim3 grid(listIds.getSize(0));
int maxThreads = getMaxThreadsCurrentDevice();
dim3 block(std::min(vecs.getSize(1), maxThreads));
gatherReconstruct<T><<<grid, block, 0, stream>>>(listIds, vecs, out);
CUDA_TEST_ERROR();
}
void runCalcResidual(Tensor<float, 2, true>& vecs,
Tensor<float, 2, true>& centroids,
Tensor<int, 1, true>& vecToCentroid,
Tensor<float, 2, true>& residuals,
cudaStream_t stream) {
calcResidual<float>(vecs, centroids, vecToCentroid, residuals, stream);
}
void runCalcResidual(Tensor<float, 2, true>& vecs,
Tensor<half, 2, true>& centroids,
Tensor<int, 1, true>& vecToCentroid,
Tensor<float, 2, true>& residuals,
cudaStream_t stream) {
calcResidual<half>(vecs, centroids, vecToCentroid, residuals, stream);
}
void runReconstruct(Tensor<int, 1, true>& listIds,
Tensor<float, 2, true>& vecs,
Tensor<float, 2, true>& out,
cudaStream_t stream) {
gatherReconstruct<float>(listIds, vecs, out, stream);
}
void runReconstruct(Tensor<int, 1, true>& listIds,
Tensor<half, 2, true>& vecs,
Tensor<float, 2, true>& out,
cudaStream_t stream) {
gatherReconstruct<half>(listIds, vecs, out, stream);
}
} } // namespace