faiss/gpu/utils/MatrixMult.cuh

67 lines
2.4 KiB
Plaintext
Raw Normal View History

2017-02-23 06:26:44 +08:00
/**
* Copyright (c) Facebook, Inc. and its affiliates.
2017-02-23 06:26:44 +08:00
*
* This source code is licensed under the MIT license found in the
2017-02-23 06:26:44 +08:00
* LICENSE file in the root directory of this source tree.
*/
#pragma once
#include <cublas_v2.h>
#include "Float16.cuh"
#include "Tensor.cuh"
namespace faiss { namespace gpu {
class DeviceMemory;
/// C = alpha * A * B + beta * C
/// Expects row major layout, not fortran/blas column major!
void runMatrixMult(Tensor<float, 2, true>& c, bool transC,
Tensor<float, 2, true>& a, bool transA,
Tensor<float, 2, true>& b, bool transB,
float alpha,
float beta,
bool useHgemm, // ignored for float32
2017-02-23 06:26:44 +08:00
cublasHandle_t handle,
cudaStream_t stream);
#ifdef FAISS_USE_FLOAT16
/// C = alpha * A * B + beta * C
/// Expects row major layout, not fortran/blas column major!
void runMatrixMult(Tensor<half, 2, true>& c, bool transC,
Tensor<half, 2, true>& a, bool transA,
Tensor<half, 2, true>& b, bool transB,
float alpha,
float beta,
bool useHgemm,
2017-02-23 06:26:44 +08:00
cublasHandle_t handle,
cudaStream_t stream);
#endif
/// C_i = alpha * A_i * B_i + beta * C_i
/// where `i` is the outermost dimension, via iterated gemm
/// Expects row major layout, not fortran/blas column major!
void runIteratedMatrixMult(Tensor<float, 3, true>& c, bool transC,
Tensor<float, 3, true>& a, bool transA,
Tensor<float, 3, true>& b, bool transB,
float alpha,
float beta,
cublasHandle_t handle,
cudaStream_t stream);
/// C_i = alpha * A_i * B_i + beta * C_i
/// where `i` is the outermost dimension, via batched gemm
/// Expects row major layout, not fortran/blas column major!
void runBatchMatrixMult(Tensor<float, 3, true>& c, bool transC,
Tensor<float, 3, true>& a, bool transA,
Tensor<float, 3, true>& b, bool transB,
float alpha,
float beta,
DeviceMemory& mem,
cublasHandle_t handle,
cudaStream_t stream);
} } // namespace