Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
VectorResidual.cu
1 
2 /**
3  * Copyright (c) 2015-present, Facebook, Inc.
4  * All rights reserved.
5  *
6  * This source code is licensed under the CC-by-NC license found in the
7  * LICENSE file in the root directory of this source tree.
8  */
9 
10 // Copyright 2004-present Facebook. All Rights Reserved.
11 #include "VectorResidual.cuh"
12 #include "../../FaissAssert.h"
13 #include "../utils/ConversionOperators.cuh"
14 #include "../utils/DeviceUtils.h"
15 #include "../utils/Tensor.cuh"
16 #include "../utils/StaticUtils.h"
17 #include <math_constants.h> // in CUDA SDK, for CUDART_NAN_F
18 
19 namespace faiss { namespace gpu {
20 
21 template <typename CentroidT, bool LargeDim>
22 __global__ void calcResidual(Tensor<float, 2, true> vecs,
23  Tensor<CentroidT, 2, true> centroids,
24  Tensor<int, 1, true> vecToCentroid,
25  Tensor<float, 2, true> residuals) {
26  auto vec = vecs[blockIdx.x];
27  auto residual = residuals[blockIdx.x];
28 
29  int centroidId = vecToCentroid[blockIdx.x];
30  // Vector could be invalid (containing NaNs), so -1 was the
31  // classified centroid
32  if (centroidId == -1) {
33  if (LargeDim) {
34  for (int i = threadIdx.x; i < vecs.getSize(1); i += blockDim.x) {
35  residual[i] = CUDART_NAN_F;
36  }
37  } else {
38  residual[threadIdx.x] = CUDART_NAN_F;
39  }
40 
41  return;
42  }
43 
44  auto centroid = centroids[centroidId];
45 
46  if (LargeDim) {
47  for (int i = threadIdx.x; i < vecs.getSize(1); i += blockDim.x) {
48  residual[i] = vec[i] - ConvertTo<float>::to(centroid[i]);
49  }
50  } else {
51  residual[threadIdx.x] = vec[threadIdx.x] -
52  ConvertTo<float>::to(centroid[threadIdx.x]);
53  }
54 }
55 
56 template <typename CentroidT>
57 void calcResidual(Tensor<float, 2, true>& vecs,
58  Tensor<CentroidT, 2, true>& centroids,
59  Tensor<int, 1, true>& vecToCentroid,
60  Tensor<float, 2, true>& residuals,
61  cudaStream_t stream) {
62  FAISS_ASSERT(vecs.getSize(1) == centroids.getSize(1));
63  FAISS_ASSERT(vecs.getSize(1) == residuals.getSize(1));
64  FAISS_ASSERT(vecs.getSize(0) == vecToCentroid.getSize(0));
65  FAISS_ASSERT(vecs.getSize(0) == residuals.getSize(0));
66 
67  dim3 grid(vecs.getSize(0));
68 
69  int maxThreads = getMaxThreadsCurrentDevice();
70  bool largeDim = vecs.getSize(1) > maxThreads;
71  dim3 block(std::min(vecs.getSize(1), maxThreads));
72 
73  if (largeDim) {
74  calcResidual<CentroidT, true><<<grid, block, 0, stream>>>(
75  vecs, centroids, vecToCentroid, residuals);
76  } else {
77  calcResidual<CentroidT, false><<<grid, block, 0, stream>>>(
78  vecs, centroids, vecToCentroid, residuals);
79  }
80 }
81 
82 void runCalcResidual(Tensor<float, 2, true>& vecs,
83  Tensor<float, 2, true>& centroids,
84  Tensor<int, 1, true>& vecToCentroid,
85  Tensor<float, 2, true>& residuals,
86  cudaStream_t stream) {
87  calcResidual<float>(vecs, centroids, vecToCentroid, residuals, stream);
88 }
89 
90 #ifdef FAISS_USE_FLOAT16
91 void runCalcResidual(Tensor<float, 2, true>& vecs,
92  Tensor<half, 2, true>& centroids,
93  Tensor<int, 1, true>& vecToCentroid,
94  Tensor<float, 2, true>& residuals,
95  cudaStream_t stream) {
96  calcResidual<half>(vecs, centroids, vecToCentroid, residuals, stream);
97 }
98 #endif
99 
100 } } // namespace