faiss/gpu/utils/WarpSelectKernel.cuh


/**
 * Copyright (c) 2015-present, Facebook, Inc.
 * All rights reserved.
 *
 * This source code is licensed under the CC-by-NC license found in the
 * LICENSE file in the root directory of this source tree.
 */

// Copyright 2004-present Facebook. All Rights Reserved.
#pragma once

#include "Float16.cuh"
#include "Select.cuh"

namespace faiss { namespace gpu {

template <typename K,
          typename IndexType,
          bool Dir,
          int NumWarpQ,
          int NumThreadQ,
          int ThreadsPerBlock>
__global__ void warpSelect(Tensor<K, 2, true> in,
                           Tensor<K, 2, true> outK,
                           Tensor<IndexType, 2, true> outV,
                           K initK,
                           IndexType initV,
                           int k) {
  constexpr int kNumWarps = ThreadsPerBlock / kWarpSize;

  WarpSelect<K, IndexType, Dir, Comparator<K>,
                NumWarpQ, NumThreadQ, ThreadsPerBlock>
    heap(initK, initV, k);

  int warpId = threadIdx.x / kWarpSize;
  int row = blockIdx.x * kNumWarps + warpId;

  if (row >= in.getSize(0)) {
    return;
  }

  K* inStart = in[row].data();
  int i = getLaneId();

  // Whole warps must participate in the selection
  int limit = utils::roundDown(in.getSize(1), kWarpSize);

  for (; i < limit; i += kWarpSize) {
    heap.add(inStart[i], (IndexType) i);
  }

  // Handle non-warp multiple remainder
  if (i < in.getSize(1)) {
    heap.addThreadQ(inStart[i], (IndexType) i);
  }

  heap.reduce();
  heap.writeOut(outK[row].data(),
                outV[row].data(), k);
}

void runWarpSelect(Tensor<float, 2, true>& in,
                      Tensor<float, 2, true>& outKeys,
                      Tensor<int, 2, true>& outIndices,
                      bool dir, int k, cudaStream_t stream);

#ifdef FAISS_USE_FLOAT16
void runWarpSelect(Tensor<half, 2, true>& in,
                      Tensor<half, 2, true>& outKeys,
                      Tensor<int, 2, true>& outIndices,
                      bool dir, int k, cudaStream_t stream);
#endif

} } // namespace
Initial commit 2017-02-23 06:26:44 +08:00
			`/**`
			`* Copyright (c) 2015-present, Facebook, Inc.`
			`* All rights reserved.`
			`*`
			`* This source code is licensed under the CC-by-NC license found in the`
			`* LICENSE file in the root directory of this source tree.`
			`*/`

			`// Copyright 2004-present Facebook. All Rights Reserved.`
			`#pragma once`

			`#include "Float16.cuh"`
			`#include "Select.cuh"`

			`namespace faiss { namespace gpu {`

			`template <typename K,`
			`typename IndexType,`
			`bool Dir,`
			`int NumWarpQ,`
			`int NumThreadQ,`
			`int ThreadsPerBlock>`
			`__global__ void warpSelect(Tensor<K, 2, true> in,`
			`Tensor<K, 2, true> outK,`
			`Tensor<IndexType, 2, true> outV,`
			`K initK,`
			`IndexType initV,`
			`int k) {`
			`constexpr int kNumWarps = ThreadsPerBlock / kWarpSize;`

			`WarpSelect<K, IndexType, Dir, Comparator<K>,`
			`NumWarpQ, NumThreadQ, ThreadsPerBlock>`
			`heap(initK, initV, k);`

			`int warpId = threadIdx.x / kWarpSize;`
			`int row = blockIdx.x * kNumWarps + warpId;`

			`if (row >= in.getSize(0)) {`
			`return;`
			`}`

			`K* inStart = in[row].data();`
			`int i = getLaneId();`

			`// Whole warps must participate in the selection`
			`int limit = utils::roundDown(in.getSize(1), kWarpSize);`

			`for (; i < limit; i += kWarpSize) {`
			`heap.add(inStart[i], (IndexType) i);`
			`}`

			`// Handle non-warp multiple remainder`
			`if (i < in.getSize(1)) {`
			`heap.addThreadQ(inStart[i], (IndexType) i);`
			`}`

			`heap.reduce();`
			`heap.writeOut(outK[row].data(),`
			`outV[row].data(), k);`
			`}`

			`void runWarpSelect(Tensor<float, 2, true>& in,`
			`Tensor<float, 2, true>& outKeys,`
			`Tensor<int, 2, true>& outIndices,`
			`bool dir, int k, cudaStream_t stream);`

			`#ifdef FAISS_USE_FLOAT16`
			`void runWarpSelect(Tensor<half, 2, true>& in,`
			`Tensor<half, 2, true>& outKeys,`
			`Tensor<int, 2, true>& outIndices,`
			`bool dir, int k, cudaStream_t stream);`
			`#endif`

			`} } // namespace`