13 #include "../GpuResources.h"
14 #include "DeviceTensor.cuh"
18 #if CUDA_VERSION >= 7050
19 #define FAISS_USE_FLOAT16 1
22 #if __CUDA_ARCH__ >= 530
23 #define FAISS_USE_FULL_FLOAT16 1
24 #endif // __CUDA_ARCH__ types
26 #endif // CUDA_VERSION
28 #ifdef FAISS_USE_FLOAT16
29 #include <cuda_fp16.h>
32 namespace faiss {
namespace gpu {
34 #ifdef FAISS_USE_FLOAT16
42 inline __device__ float4 half4ToFloat4(Half4 v) {
43 float2 a = __half22float2(v.a);
44 float2 b = __half22float2(v.b);
55 inline __device__ Half4 float4ToHalf4(float4 v) {
65 out.a = __float22half2_rn(a);
66 out.b = __float22half2_rn(b);
78 bool getDeviceSupportsFloat16Math(
int device);
81 void runConvertToFloat16(half* out,
88 void runConvertToFloat32(
float* out,
94 void toHalf(cudaStream_t stream,
95 Tensor<float, Dim, true>& in,
96 Tensor<half, Dim, true>& out) {
97 FAISS_ASSERT(in.numElements() == out.numElements());
101 runConvertToFloat16(out.data(), in.data(), in.numElements(), stream);
105 DeviceTensor<half, Dim, true> toHalf(GpuResources* resources,
107 Tensor<float, Dim, true>& in) {
108 DeviceTensor<half, Dim, true> out;
110 out = std::move(DeviceTensor<half, Dim, true>(
111 resources->getMemoryManagerCurrentDevice(),
115 out = std::move(DeviceTensor<half, Dim, true>(in.sizes()));
118 toHalf<Dim>(stream, in, out);
123 void fromHalf(cudaStream_t stream,
124 Tensor<half, Dim, true>& in,
125 Tensor<float, Dim, true>& out) {
126 FAISS_ASSERT(in.numElements() == out.numElements());
130 runConvertToFloat32(out.data(), in.data(), in.numElements(), stream);
134 DeviceTensor<float, Dim, true> fromHalf(GpuResources* resources,
136 Tensor<half, Dim, true>& in) {
137 DeviceTensor<float, Dim, true> out;
139 out = std::move(DeviceTensor<float, Dim, true>(
140 resources->getMemoryManagerCurrentDevice(),
144 out = std::move(DeviceTensor<float, Dim, true>(in.sizes()));
147 fromHalf<Dim>(stream, in, out);
151 __half hostFloat2Half(
float v);
153 #endif // FAISS_USE_FLOAT16