faiss/gpu/utils/MatrixMult.cuh

/**
 * Copyright (c) Facebook, Inc. and its affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */


#pragma once

#include <cublas_v2.h>
#include <faiss/gpu/utils/Tensor.cuh>
#include <faiss/gpu/utils/DeviceTensor.cuh>
#include <faiss/gpu/utils/HostTensor.cuh>
#include <faiss/gpu/utils/Float16.cuh>

namespace faiss { namespace gpu {

class DeviceMemory;

/// C = alpha * A * B + beta * C
/// Expects row major layout, not fortran/blas column major!
template <typename AT, typename BT>
void
runMatrixMult(Tensor<float, 2, true>& c, bool transC,
              Tensor<AT, 2, true>& a, bool transA,
              Tensor<BT, 2, true>& b, bool transB,
              float alpha,
              float beta,
              cublasHandle_t handle,
              cudaStream_t stream);

/// C_i = alpha * A_i * B_i + beta * C_i
/// where `i` is the outermost dimension, via iterated gemm
/// Expects row major layout, not fortran/blas column major!
template <typename AT, typename BT>
void runIteratedMatrixMult(Tensor<float, 3, true>& c, bool transC,
                           Tensor<AT, 3, true>& a, bool transA,
                           Tensor<BT, 3, true>& b, bool transB,
                           float alpha,
                           float beta,
                           cublasHandle_t handle,
                           cudaStream_t stream);

/// C_i = alpha * A_i * B_i + beta * C_i
/// where `i` is the outermost dimension, via batched gemm
/// Expects row major layout, not fortran/blas column major!
void runBatchMatrixMult(Tensor<float, 3, true>& c, bool transC,
                        Tensor<float, 3, true>& a, bool transA,
                        Tensor<float, 3, true>& b, bool transB,
                        float alpha,
                        float beta,
                        DeviceMemory& mem,
                        cublasHandle_t handle,
                        cudaStream_t stream);

} } // namespace

#include <faiss/gpu/utils/MatrixMult-inl.cuh>
Initial commit 2017-02-23 06:26:44 +08:00			`/**`
Facebook sync (May 2019) + relicense (#838) Changelog: - changed license: BSD+Patents -> MIT - propagates exceptions raised in sub-indexes of IndexShards and IndexReplicas - support for searching several inverted lists in parallel (parallel_mode != 0) - better support for PQ codes where nbit != 8 or 16 - IVFSpectralHash implementation: spectral hash codes inside an IVF - 6-bit per component scalar quantizer (4 and 8 bit were already supported) - combinations of inverted lists: HStackInvertedLists and VStackInvertedLists - configurable number of threads for OnDiskInvertedLists prefetching (including 0=no prefetch) - more test and demo code compatible with Python 3 (print with parentheses) - refactored benchmark code: data loading is now in a single file 2019-05-28 22:17:22 +08:00			`* Copyright (c) Facebook, Inc. and its affiliates.`
Initial commit 2017-02-23 06:26:44 +08:00			`*`
Facebook sync (May 2019) + relicense (#838) Changelog: - changed license: BSD+Patents -> MIT - propagates exceptions raised in sub-indexes of IndexShards and IndexReplicas - support for searching several inverted lists in parallel (parallel_mode != 0) - better support for PQ codes where nbit != 8 or 16 - IVFSpectralHash implementation: spectral hash codes inside an IVF - 6-bit per component scalar quantizer (4 and 8 bit were already supported) - combinations of inverted lists: HStackInvertedLists and VStackInvertedLists - configurable number of threads for OnDiskInvertedLists prefetching (including 0=no prefetch) - more test and demo code compatible with Python 3 (print with parentheses) - refactored benchmark code: data loading is now in a single file 2019-05-28 22:17:22 +08:00			`* This source code is licensed under the MIT license found in the`
Initial commit 2017-02-23 06:26:44 +08:00			`* LICENSE file in the root directory of this source tree.`
			`*/`


			`#pragma once`

			`#include <cublas_v2.h>`
Facebook sync (2019-09-10) (#943) * Facebook sync (2019-09-10) * Fix depends Makefile target. * Add faiss symlink for new include directives. * Fix missing header. * Fix tests. * Fix Makefile. * Update depend. * Fix include directives spacing. 2019-09-21 00:59:10 +08:00			`#include <faiss/gpu/utils/Tensor.cuh>`
Sync 20200323. (#1157) * Sync 20200323. * Bump version. * Remove warning filter. 2020-03-24 21:06:48 +08:00			`#include <faiss/gpu/utils/DeviceTensor.cuh>`
			`#include <faiss/gpu/utils/HostTensor.cuh>`
			`#include <faiss/gpu/utils/Float16.cuh>`
Initial commit 2017-02-23 06:26:44 +08:00
			`namespace faiss { namespace gpu {`

			`class DeviceMemory;`

			`/// C = alpha * A * B + beta * C`
			`/// Expects row major layout, not fortran/blas column major!`
Sync 20200323. (#1157) * Sync 20200323. * Bump version. * Remove warning filter. 2020-03-24 21:06:48 +08:00			`template <typename AT, typename BT>`
			`void`
			`runMatrixMult(Tensor<float, 2, true>& c, bool transC,`
			`Tensor<AT, 2, true>& a, bool transA,`
			`Tensor<BT, 2, true>& b, bool transB,`
			`float alpha,`
			`float beta,`
			`cublasHandle_t handle,`
			`cudaStream_t stream);`
Initial commit 2017-02-23 06:26:44 +08:00
			`/// C_i = alpha * A_i * B_i + beta * C_i`
			/// where `i` is the outermost dimension, via iterated gemm
			`/// Expects row major layout, not fortran/blas column major!`
Sync 20200323. (#1157) * Sync 20200323. * Bump version. * Remove warning filter. 2020-03-24 21:06:48 +08:00			`template <typename AT, typename BT>`
Initial commit 2017-02-23 06:26:44 +08:00			`void runIteratedMatrixMult(Tensor<float, 3, true>& c, bool transC,`
Sync 20200323. (#1157) * Sync 20200323. * Bump version. * Remove warning filter. 2020-03-24 21:06:48 +08:00			`Tensor<AT, 3, true>& a, bool transA,`
			`Tensor<BT, 3, true>& b, bool transB,`
Initial commit 2017-02-23 06:26:44 +08:00			`float alpha,`
			`float beta,`
			`cublasHandle_t handle,`
			`cudaStream_t stream);`

			`/// C_i = alpha * A_i * B_i + beta * C_i`
			/// where `i` is the outermost dimension, via batched gemm
			`/// Expects row major layout, not fortran/blas column major!`
			`void runBatchMatrixMult(Tensor<float, 3, true>& c, bool transC,`
			`Tensor<float, 3, true>& a, bool transA,`
			`Tensor<float, 3, true>& b, bool transB,`
			`float alpha,`
			`float beta,`
			`DeviceMemory& mem,`
			`cublasHandle_t handle,`
			`cudaStream_t stream);`

			`} } // namespace`
Sync 20200323. (#1157) * Sync 20200323. * Bump version. * Remove warning filter. 2020-03-24 21:06:48 +08:00
			`#include <faiss/gpu/utils/MatrixMult-inl.cuh>`