Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
IVFUtils.cu
1 /**
2  * Copyright (c) 2015-present, Facebook, Inc.
3  * All rights reserved.
4  *
5  * This source code is licensed under the BSD+Patents license found in the
6  * LICENSE file in the root directory of this source tree.
7  */
8 
9 
10 #include "IVFUtils.cuh"
11 #include "../utils/DeviceUtils.h"
12 #include "../utils/StaticUtils.h"
13 #include "../utils/Tensor.cuh"
14 #include "../utils/ThrustAllocator.cuh"
15 #include <thrust/scan.h>
16 #include <thrust/execution_policy.h>
17 
18 namespace faiss { namespace gpu {
19 
20 // Calculates the total number of intermediate distances to consider
21 // for all queries
22 __global__ void
23 getResultLengths(Tensor<int, 2, true> topQueryToCentroid,
24  int* listLengths,
25  int totalSize,
26  Tensor<int, 2, true> length) {
27  int linearThreadId = blockIdx.x * blockDim.x + threadIdx.x;
28  if (linearThreadId >= totalSize) {
29  return;
30  }
31 
32  int nprobe = topQueryToCentroid.getSize(1);
33  int queryId = linearThreadId / nprobe;
34  int listId = linearThreadId % nprobe;
35 
36  int centroidId = topQueryToCentroid[queryId][listId];
37 
38  // Safety guard in case NaNs in input cause no list ID to be generated
39  length[queryId][listId] = (centroidId != -1) ? listLengths[centroidId] : 0;
40 }
41 
42 void runCalcListOffsets(Tensor<int, 2, true>& topQueryToCentroid,
43  thrust::device_vector<int>& listLengths,
44  Tensor<int, 2, true>& prefixSumOffsets,
45  Tensor<char, 1, true>& thrustMem,
46  cudaStream_t stream) {
47  FAISS_ASSERT(topQueryToCentroid.getSize(0) == prefixSumOffsets.getSize(0));
48  FAISS_ASSERT(topQueryToCentroid.getSize(1) == prefixSumOffsets.getSize(1));
49 
50  int totalSize = topQueryToCentroid.numElements();
51 
52  int numThreads = std::min(totalSize, getMaxThreadsCurrentDevice());
53  int numBlocks = utils::divUp(totalSize, numThreads);
54 
55  auto grid = dim3(numBlocks);
56  auto block = dim3(numThreads);
57 
58  getResultLengths<<<grid, block, 0, stream>>>(
59  topQueryToCentroid,
60  listLengths.data().get(),
61  totalSize,
62  prefixSumOffsets);
63  CUDA_TEST_ERROR();
64 
65  // Prefix sum of the indices, so we know where the intermediate
66  // results should be maintained
67  // Thrust wants a place for its temporary allocations, so provide
68  // one, so it won't call cudaMalloc/Free
69  GpuResourcesThrustAllocator alloc(thrustMem.data(),
70  thrustMem.getSizeInBytes());
71 
72  thrust::inclusive_scan(thrust::cuda::par(alloc).on(stream),
73  prefixSumOffsets.data(),
74  prefixSumOffsets.data() + totalSize,
75  prefixSumOffsets.data());
76  CUDA_TEST_ERROR();
77 }
78 
79 } } // namespace