12 #include "Float16.cuh"
13 #include <thrust/execution_policy.h>
14 #include <thrust/transform.h>
16 #ifdef FAISS_USE_FLOAT16
18 namespace faiss {
namespace gpu {
20 bool getDeviceSupportsFloat16Math(
int device) {
21 const auto& prop = getDeviceProperties(device);
23 return (prop.major >= 6 ||
24 (prop.major == 5 && prop.minor >= 3));
28 __device__ half operator()(
float v)
const {
return __float2half(v); }
32 __device__
float operator()(half v)
const {
return __half2float(v); }
35 void runConvertToFloat16(half* out,
38 cudaStream_t stream) {
39 thrust::transform(thrust::cuda::par.on(stream),
40 in, in + num, out, FloatToHalf());
43 void runConvertToFloat32(
float* out,
46 cudaStream_t stream) {
47 thrust::transform(thrust::cuda::par.on(stream),
48 in, in + num, out, HalfToFloat());
76 half hostFloat2Half(
float a) {
79 memcpy(&ia, &a,
sizeof(
float));
81 ir = (ia >> 16) & 0x8000;
82 if ((ia & 0x7f800000) == 0x7f800000) {
83 if ((ia & 0x7fffffff) == 0x7f800000) {
88 }
else if ((ia & 0x7f800000) >= 0x33000000) {
89 int shift = (int)((ia >> 23) & 0xff) - 127;
93 ia = (ia & 0x007fffff) | 0x00800000;
95 ir |= ia >> (-1 - shift);
96 ia = ia << (32 - (-1 - shift));
98 ir |= ia >> (24 - 11);
99 ia = ia << (32 - (24 - 11));
100 ir = ir + ((14 + shift) << 10);
103 if ((ia > 0x80000000) || ((ia == 0x80000000) && (ir & 1))) {
110 memcpy(&ret, &ir,
sizeof(half));
116 #endif // FAISS_USE_FLOAT16