Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
BlockSelectKernel.cuh
1 
2 /**
3  * Copyright (c) 2015-present, Facebook, Inc.
4  * All rights reserved.
5  *
6  * This source code is licensed under the CC-by-NC license found in the
7  * LICENSE file in the root directory of this source tree.
8  */
9 
10 // Copyright 2004-present Facebook. All Rights Reserved.
11 #pragma once
12 
13 #include "Float16.cuh"
14 #include "Select.cuh"
15 
16 namespace faiss { namespace gpu {
17 
18 template <typename K,
19  typename IndexType,
20  bool Dir,
21  int NumWarpQ,
22  int NumThreadQ,
23  int ThreadsPerBlock>
24 __global__ void blockSelect(Tensor<K, 2, true> in,
25  Tensor<K, 2, true> outK,
26  Tensor<IndexType, 2, true> outV,
27  K initK,
28  IndexType initV,
29  int k) {
30  constexpr int kNumWarps = ThreadsPerBlock / kWarpSize;
31 
32  __shared__ K smemK[kNumWarps * NumWarpQ];
33  __shared__ IndexType smemV[kNumWarps * NumWarpQ];
34 
35  BlockSelect<K, IndexType, Dir, Comparator<K>,
36  NumWarpQ, NumThreadQ, ThreadsPerBlock>
37  heap(initK, initV, smemK, smemV, k);
38 
39  int row = blockIdx.x;
40 
41  // Whole warps must participate in the selection
42  int limit = utils::roundDown(in.getSize(1), kWarpSize);
43  int i = threadIdx.x;
44 
45  for (; i < limit; i += blockDim.x) {
46  heap.add(in[row][i], (IndexType) i);
47  }
48 
49  // Handle last remainder fraction of a warp of elements
50  if (i < in.getSize(1)) {
51  heap.addThreadQ(in[row][i], (IndexType) i);
52  }
53 
54  heap.reduce();
55 
56  for (int i = threadIdx.x; i < k; i += blockDim.x) {
57  outK[row][i] = smemK[i];
58  outV[row][i] = smemV[i];
59  }
60 }
61 
62 void runBlockSelect(Tensor<float, 2, true>& in,
63  Tensor<float, 2, true>& outKeys,
64  Tensor<int, 2, true>& outIndices,
65  bool dir, int k, cudaStream_t stream);
66 
67 #ifdef FAISS_USE_FLOAT16
68 void runBlockSelect(Tensor<half, 2, true>& in,
69  Tensor<half, 2, true>& outKeys,
70  Tensor<int, 2, true>& outIndices,
71  bool dir, int k, cudaStream_t stream);
72 #endif
73 
74 } } // namespace