2017-02-23 06:26:44 +08:00
|
|
|
/**
|
2019-05-28 22:17:22 +08:00
|
|
|
* Copyright (c) Facebook, Inc. and its affiliates.
|
2017-02-23 06:26:44 +08:00
|
|
|
*
|
2019-05-28 22:17:22 +08:00
|
|
|
* This source code is licensed under the MIT license found in the
|
2017-02-23 06:26:44 +08:00
|
|
|
* LICENSE file in the root directory of this source tree.
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
|
|
#pragma once
|
|
|
|
|
|
|
|
#include <cublas_v2.h>
|
2019-09-21 00:59:10 +08:00
|
|
|
#include <faiss/gpu/utils/Tensor.cuh>
|
2020-03-24 21:06:48 +08:00
|
|
|
#include <faiss/gpu/utils/DeviceTensor.cuh>
|
|
|
|
#include <faiss/gpu/utils/HostTensor.cuh>
|
|
|
|
#include <faiss/gpu/utils/Float16.cuh>
|
2017-02-23 06:26:44 +08:00
|
|
|
|
|
|
|
namespace faiss { namespace gpu {
|
|
|
|
|
|
|
|
class DeviceMemory;
|
|
|
|
|
|
|
|
/// C = alpha * A * B + beta * C
|
|
|
|
/// Expects row major layout, not fortran/blas column major!
|
2020-03-24 21:06:48 +08:00
|
|
|
template <typename AT, typename BT>
|
|
|
|
void
|
|
|
|
runMatrixMult(Tensor<float, 2, true>& c, bool transC,
|
|
|
|
Tensor<AT, 2, true>& a, bool transA,
|
|
|
|
Tensor<BT, 2, true>& b, bool transB,
|
|
|
|
float alpha,
|
|
|
|
float beta,
|
|
|
|
cublasHandle_t handle,
|
|
|
|
cudaStream_t stream);
|
2017-02-23 06:26:44 +08:00
|
|
|
|
|
|
|
/// C_i = alpha * A_i * B_i + beta * C_i
|
|
|
|
/// where `i` is the outermost dimension, via iterated gemm
|
|
|
|
/// Expects row major layout, not fortran/blas column major!
|
2020-03-24 21:06:48 +08:00
|
|
|
template <typename AT, typename BT>
|
2017-02-23 06:26:44 +08:00
|
|
|
void runIteratedMatrixMult(Tensor<float, 3, true>& c, bool transC,
|
2020-03-24 21:06:48 +08:00
|
|
|
Tensor<AT, 3, true>& a, bool transA,
|
|
|
|
Tensor<BT, 3, true>& b, bool transB,
|
2017-02-23 06:26:44 +08:00
|
|
|
float alpha,
|
|
|
|
float beta,
|
|
|
|
cublasHandle_t handle,
|
|
|
|
cudaStream_t stream);
|
|
|
|
|
|
|
|
/// C_i = alpha * A_i * B_i + beta * C_i
|
|
|
|
/// where `i` is the outermost dimension, via batched gemm
|
|
|
|
/// Expects row major layout, not fortran/blas column major!
|
|
|
|
void runBatchMatrixMult(Tensor<float, 3, true>& c, bool transC,
|
|
|
|
Tensor<float, 3, true>& a, bool transA,
|
|
|
|
Tensor<float, 3, true>& b, bool transB,
|
|
|
|
float alpha,
|
|
|
|
float beta,
|
|
|
|
DeviceMemory& mem,
|
|
|
|
cublasHandle_t handle,
|
|
|
|
cudaStream_t stream);
|
|
|
|
|
|
|
|
} } // namespace
|
2020-03-24 21:06:48 +08:00
|
|
|
|
|
|
|
#include <faiss/gpu/utils/MatrixMult-inl.cuh>
|