Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
WarpSelectKernel.cuh
1 /**
2  * Copyright (c) 2015-present, Facebook, Inc.
3  * All rights reserved.
4  *
5  * This source code is licensed under the BSD+Patents license found in the
6  * LICENSE file in the root directory of this source tree.
7  */
8 
9 // Copyright 2004-present Facebook. All Rights Reserved.
10 #pragma once
11 
12 #include "Float16.cuh"
13 #include "Select.cuh"
14 
15 namespace faiss { namespace gpu {
16 
17 template <typename K,
18  typename IndexType,
19  bool Dir,
20  int NumWarpQ,
21  int NumThreadQ,
22  int ThreadsPerBlock>
23 __global__ void warpSelect(Tensor<K, 2, true> in,
24  Tensor<K, 2, true> outK,
25  Tensor<IndexType, 2, true> outV,
26  K initK,
27  IndexType initV,
28  int k) {
29  constexpr int kNumWarps = ThreadsPerBlock / kWarpSize;
30 
31  WarpSelect<K, IndexType, Dir, Comparator<K>,
32  NumWarpQ, NumThreadQ, ThreadsPerBlock>
33  heap(initK, initV, k);
34 
35  int warpId = threadIdx.x / kWarpSize;
36  int row = blockIdx.x * kNumWarps + warpId;
37 
38  if (row >= in.getSize(0)) {
39  return;
40  }
41 
42  int i = getLaneId();
43  K* inStart = in[row][i].data();
44 
45  // Whole warps must participate in the selection
46  int limit = utils::roundDown(in.getSize(1), kWarpSize);
47 
48  for (; i < limit; i += kWarpSize) {
49  heap.add(*inStart, (IndexType) i);
50  inStart += kWarpSize;
51  }
52 
53  // Handle non-warp multiple remainder
54  if (i < in.getSize(1)) {
55  heap.addThreadQ(*inStart, (IndexType) i);
56  }
57 
58  heap.reduce();
59  heap.writeOut(outK[row].data(),
60  outV[row].data(), k);
61 }
62 
63 void runWarpSelect(Tensor<float, 2, true>& in,
64  Tensor<float, 2, true>& outKeys,
65  Tensor<int, 2, true>& outIndices,
66  bool dir, int k, cudaStream_t stream);
67 
68 #ifdef FAISS_USE_FLOAT16
69 void runWarpSelect(Tensor<half, 2, true>& in,
70  Tensor<half, 2, true>& outKeys,
71  Tensor<int, 2, true>& outIndices,
72  bool dir, int k, cudaStream_t stream);
73 #endif
74 
75 } } // namespace